- update to work with newer ruby versions
- switch from using xhdr to xover for additional speed in reading article headers - implement a MAXAGE option to prevent downloading articles older than the given number of days
This commit is contained in:
parent
bb314ebce1
commit
f75852302f
3 changed files with 139 additions and 104 deletions
|
|
@ -17,9 +17,9 @@
|
|||
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
|
||||
require 'set/intspan'
|
||||
require 'net/nntp'
|
||||
require 'news/newsrc'
|
||||
require Pathname.new(__FILE__).dirname + '../set/intspan'
|
||||
require Pathname.new(__FILE__).dirname + '../net/nntp'
|
||||
require Pathname.new(__FILE__).dirname + '../news/newsrc'
|
||||
require 'tempfile'
|
||||
require 'timeout'
|
||||
#require 'yaml'
|
||||
|
|
@ -98,11 +98,11 @@ end # module Net
|
|||
|
||||
class Article
|
||||
|
||||
Debuglevel = 1
|
||||
Debuglevel = 0
|
||||
|
||||
Message = Struct.new(:messid, :id, :date, :from, :server, :subject)
|
||||
|
||||
def initialize(nntpservers, groupname, newsrc="~/.newsrc")
|
||||
def initialize(nntpservers, groupname, newsrc="~/.newsrc", maxage=0)
|
||||
@messageinfo = []
|
||||
|
||||
@grouped = false
|
||||
|
|
@ -113,6 +113,7 @@ def initialize(nntpservers, groupname, newsrc="~/.newsrc")
|
|||
@cache_buf = {}
|
||||
@serverlist = []
|
||||
@serverpasses = {}
|
||||
@maxage = maxage.to_i
|
||||
|
||||
tmplist = nntpservers.split('|')
|
||||
tmplist.each{ |server|
|
||||
|
|
@ -262,7 +263,9 @@ def get_articles(cachedir=false)
|
|||
end
|
||||
if first.to_i <= last.to_i
|
||||
# available articles on server
|
||||
# oudste
|
||||
@connections[server]["first"] = first ? first.to_i : 0
|
||||
# nieuwste
|
||||
@connections[server]["last"] = last ? last.to_i : 0
|
||||
if Debuglevel > 0
|
||||
puts " Server: #{server} First: #{first} Last: #{last}"
|
||||
|
|
@ -283,15 +286,14 @@ def get_articles(cachedir=false)
|
|||
# wil wel wat ophalen aangezien logging aantoont dat er wel oudere articles gedownload worden
|
||||
@connections.keys.each{|server|
|
||||
if @connections[server]["skip_ids"].max && @connections[server]["skip_ids"].max < @connections[server]["last"]
|
||||
articles = @connections[server]["last"] - @connections[server]["first"]
|
||||
if articles > 10000
|
||||
fillerend = (@connections[server]["skip_ids"].max - (articles/5)).to_i
|
||||
cnt_articles = @connections[server]["last"] - @connections[server]["first"]
|
||||
if cnt_articles > 10000
|
||||
fillerend = (@connections[server]["skip_ids"].max - (cnt_articles/5)).to_i
|
||||
else
|
||||
fillerend = @connections[server]["skip_ids"].max - 2000
|
||||
end
|
||||
if @connections[server]["skip_ids"].min && fillerend > @connections[server]["skip_ids"].min
|
||||
@connections[server]["skip_ids"] = @connections[server]["skip_ids"].union("#{@connections[server]["skip_ids"].min}-#{fillerend}")
|
||||
# p "filling #{@connections[server]["skip_ids"].min}-#{fillerend}"
|
||||
end
|
||||
end
|
||||
}
|
||||
|
|
@ -307,46 +309,45 @@ def get_articles(cachedir=false)
|
|||
puts "rangelist elements: #{range.diff(@connections[server]["skip_ids"]).elements}" if Debuglevel > 2
|
||||
begin
|
||||
unless rangelist == nil or rangelist =~ /^$/
|
||||
rangelist.split(',').each{|i|
|
||||
lastdate = DateTime.now
|
||||
# nieuwste eerst. Dat maakt het mogelijk om op te houden bij te oude datum.
|
||||
rangelist.split(',').reverse.each{|i|
|
||||
puts "i: #{i}" if Debuglevel > 2
|
||||
begin
|
||||
# resp, xover_lines = get_xover(server, i)
|
||||
resp, date_lines = get_xhdr(server, i, "date")
|
||||
resp, subj_lines = get_xhdr(server, i, "subject")
|
||||
resp, messid_lines = get_xhdr(server, i, "message-id")
|
||||
resp, from_lines = get_xhdr(server, i, "from")
|
||||
rescue TempError
|
||||
resp, xover_lines = get_xover(server, i)
|
||||
rescue TempError, EOFError
|
||||
printerr(server)
|
||||
next
|
||||
end
|
||||
|
||||
art = {}
|
||||
date_lines.collect{|x|
|
||||
art[x[0]] = {} unless art.has_key?(x[0])
|
||||
xover_lines.collect{|x|
|
||||
id = x[0]
|
||||
subj = x[1]
|
||||
auth = x[2]
|
||||
date = x[3]
|
||||
messid = x[4]
|
||||
|
||||
art[id] = {} unless art.has_key?(id)
|
||||
|
||||
begin
|
||||
art[x[0]]["date"] = DateTime.parse(x[1]).strftime('%Y%m%d')
|
||||
lastdate = art[id]["date"] = DateTime.parse(date).strftime('%Y%m%d')
|
||||
rescue
|
||||
puts $!.message
|
||||
puts x[1]
|
||||
art[x[0]]["date"] = Time.now.strftime('%Y%m%d')
|
||||
puts id
|
||||
art[id]["date"] = Time.now.strftime('%Y%m%d')
|
||||
end
|
||||
puts "art id: #{x[0]} date: #{x[1]}" if Debuglevel > 2
|
||||
}
|
||||
subj_lines.collect{|x|
|
||||
art[x[0]] = {} unless art.has_key?(x[0])
|
||||
art[x[0]]["subject"] = x[1]
|
||||
puts "art id: #{x[0]} subj: #{x[1]}" if Debuglevel > 2
|
||||
}
|
||||
messid_lines.collect{|x|
|
||||
art[x[0]] = {} unless art.has_key?(x[0])
|
||||
art[x[0]]["messid"] = x[1]
|
||||
puts "art id: #{x[0]} messid: #{x[1]}" if Debuglevel > 2
|
||||
}
|
||||
from_lines.collect{|x|
|
||||
art[x[0]] = {} unless art.has_key?(x[0])
|
||||
art[x[0]]["from"] = x[1]
|
||||
puts "art id: #{x[0]} from: #{x[1]}" if Debuglevel > 2
|
||||
}
|
||||
art[id]["subject"] = x[1]
|
||||
art[id]["messid"] = messid
|
||||
art[id]["from"] = auth
|
||||
|
||||
puts "art id: #{id} subj: #{subj}" if Debuglevel > 2
|
||||
puts "art id: #{id} from: #{auth}" if Debuglevel > 2
|
||||
puts "art id: #{id} date: #{date}" if Debuglevel > 2
|
||||
puts "art id: #{id} messid: #{messid}" if Debuglevel > 2
|
||||
|
||||
} # xover_lines.collect
|
||||
|
||||
art.keys.each{|id|
|
||||
if art[id].has_key?("date") and art[id].has_key?("subject") and art[id].has_key?("messid") and art[id].has_key?("from")
|
||||
puts "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["from"]}, #{art[id]["subject"]}" if Debuglevel > 2
|
||||
|
|
@ -359,6 +360,12 @@ def get_articles(cachedir=false)
|
|||
cache_add(cachedir, id, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server)
|
||||
end
|
||||
}
|
||||
if @maxage and @maxage > 0
|
||||
if DateTime.parse(lastdate) < ( DateTime.now - @maxage )
|
||||
puts "Skipping articles older than #{DateTime.now - @maxage}"
|
||||
break
|
||||
end
|
||||
end
|
||||
}
|
||||
end
|
||||
rescue PermError
|
||||
|
|
@ -407,6 +414,7 @@ end
|
|||
|
||||
def get_xhdr(server, range, header)
|
||||
timedout = 0
|
||||
attempts = 0
|
||||
resp = ""
|
||||
lines = []
|
||||
begin
|
||||
|
|
@ -432,11 +440,18 @@ def get_xhdr(server, range, header)
|
|||
else
|
||||
puts "Won't handle this... yet :("
|
||||
end
|
||||
rescue Errno::EPIPE, Errno::ECONNRESET, EOFError
|
||||
#rescue Errno::EPIPE, Errno::ECONNRESET, EOFError
|
||||
rescue Errno::EPIPE, Errno::ECONNRESET
|
||||
printerr(server)
|
||||
reconnect(server)
|
||||
get_group_info(server)
|
||||
attempts += 1
|
||||
if attempts < 2
|
||||
retry
|
||||
else
|
||||
printerr "giving up"
|
||||
return
|
||||
end
|
||||
end
|
||||
end
|
||||
return resp, lines
|
||||
|
|
@ -486,6 +501,11 @@ def get_xover(server, range)
|
|||
reconnect(server)
|
||||
get_group_info(server)
|
||||
retry
|
||||
rescue Net::NNTPDataError
|
||||
printerr(server)
|
||||
reconnect(server)
|
||||
get_group_info(server)
|
||||
retry
|
||||
end
|
||||
end
|
||||
return resp, lines
|
||||
|
|
@ -518,7 +538,7 @@ def get_body(server, message)
|
|||
resp, id, messid, list = @connections[server]["nntp"].body(message)
|
||||
rescue Net::NNTPReplyError
|
||||
a = ''
|
||||
a += $!
|
||||
a += $!.to_s
|
||||
printerr(server)
|
||||
if retries == 0 && (a =~ /^503/ || a =~ /^400/)
|
||||
reconnect(server)
|
||||
|
|
@ -826,7 +846,6 @@ def cache_read(cachedir)
|
|||
puts "#{Time.now} Reading & scrubbing caches"
|
||||
filename = "#{cachedir}/#{@group}.ripnewscache"
|
||||
excludes = {}
|
||||
regexp = Regexp.new('^([^\|]*)\|([^\|]*)\|([^\|]*)\|([^\|]*)\|(.*)')
|
||||
@connections.keys.each{|server|
|
||||
first = @connections[server]["first"]
|
||||
last = @connections[server]["last"]
|
||||
|
|
@ -836,13 +855,24 @@ puts " #{Time.now} Reading cache for #{server}"
|
|||
@connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true}
|
||||
if FileTest.directory?(cachedir) and FileTest.file?( "#{filename}.#{server}" ) and FileTest.readable?( "#{filename}.#{server}" )
|
||||
outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing"
|
||||
File.new( "#{filename}.#{server}" ).each{ |line|
|
||||
line =~ regexp
|
||||
id_i = $1.to_i
|
||||
messid = $2
|
||||
date = $3
|
||||
from = $4
|
||||
subject = $5
|
||||
cachefile = File.new( "#{filename}.#{server}" )
|
||||
begin
|
||||
while true do
|
||||
# using each on a big cachefile leads to out of memory conditions
|
||||
line = cachefile.readline
|
||||
begin
|
||||
line.encode!("US-ASCII")
|
||||
rescue
|
||||
# If it gets here, the original encoding is unknown
|
||||
# gracefully give up and go to the next line
|
||||
puts "String#encode couldn't handle: '#{line}'"
|
||||
next
|
||||
# so the following probably won't help
|
||||
#line.gsub!(/\\/, "")
|
||||
end
|
||||
|
||||
id_i, messid, date, from, subject = line.split('|', 5)
|
||||
id_i = id_i.to_i
|
||||
if first <= id_i and id_i <= last
|
||||
if ! excludes[server].has_key?(id_i)
|
||||
outfile.puts(line)
|
||||
|
|
@ -853,8 +883,10 @@ puts " #{Time.now} Reading cache for #{server}"
|
|||
@connections[server]["skip_ids"].insert!(id_i)
|
||||
end
|
||||
end
|
||||
}
|
||||
if ( File.move("#{filename}.#{server}.new", "#{filename}.#{server}") )
|
||||
end
|
||||
rescue EOFError
|
||||
end
|
||||
if ( FileUtils.move("#{filename}.#{server}.new", "#{filename}.#{server}") )
|
||||
puts " #{Time.now} Cache scrubbed for #{server}"
|
||||
else
|
||||
puts "Couldn't scrub #{server} cache"
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@
|
|||
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
|
||||
require "set/intspan"
|
||||
require Pathname.new(__FILE__).dirname + "../set/intspan"
|
||||
#require "thread"
|
||||
|
||||
module News
|
||||
|
|
@ -93,7 +93,7 @@ def save_as(file)
|
|||
# @@save_lock.synchronize{
|
||||
if FileTest.exists?("#{file}")
|
||||
begin
|
||||
File.rename(file, "#{file}.bak")
|
||||
FileUtils.mv(file, "#{file}.bak")
|
||||
rescue
|
||||
puts "Can't rename #{file}, #{file}.bak: #{$!}"
|
||||
exit
|
||||
|
|
@ -142,7 +142,9 @@ def save_group_as(file, group)
|
|||
#p Time.now
|
||||
#p "copy file"
|
||||
if FileTest.exists?("#{file}")
|
||||
if ( ! File.copy(file, "#{file}.bak") )
|
||||
begin
|
||||
FileUtils.copy(file, "#{file}.bak")
|
||||
rescue
|
||||
puts "Can't copy #{file} to #{file}.bak: #{$!}"
|
||||
end
|
||||
end
|
||||
|
|
@ -176,7 +178,9 @@ def save_group_as(file, group)
|
|||
puts "Newsrc.parse: Bad newsrc line: #{line}"
|
||||
# restore backup on failure, it'll contain the flaw too, but it'll
|
||||
# be complete
|
||||
if ( ! File.copy("#{file}.bak", file) )
|
||||
begin
|
||||
FileUtils.copy("#{file}.bak", file)
|
||||
rescue
|
||||
puts "Can't copy #{file}.bak to #{file}: #{$!}"
|
||||
end
|
||||
exit
|
||||
|
|
|
|||
|
|
@ -23,13 +23,14 @@ require 'date'
|
|||
require 'ftools'
|
||||
require 'time'
|
||||
require 'getoptlong'
|
||||
require 'news/article'
|
||||
require 'news/newsrc'
|
||||
require 'pathname'
|
||||
require Pathname.new(__FILE__).dirname + 'news/article'
|
||||
require Pathname.new(__FILE__).dirname + 'news/newsrc'
|
||||
require 'tempfile'
|
||||
require 'thread'
|
||||
require 'thwait'
|
||||
require 'encode/uuencode'
|
||||
require 'encode/yenc'
|
||||
require Pathname.new(__FILE__).dirname + 'encode/uuencode'
|
||||
require Pathname.new(__FILE__).dirname + 'encode/yenc'
|
||||
|
||||
###########################################################################
|
||||
|
||||
|
|
@ -100,9 +101,10 @@ def save_file(dir, name, data)
|
|||
case data.class.to_s
|
||||
when "String"
|
||||
puts " Moving #{data} to #{dir}/#{newname}"
|
||||
if File.move(data, "#{dir}/#{newname}")
|
||||
begin
|
||||
FileUtils.mv(data, "#{dir}/#{newname}")
|
||||
puts " Saved as: '#{newname}'"
|
||||
else
|
||||
rescue
|
||||
puts "couldn't rename tempfile"
|
||||
return false
|
||||
end
|
||||
|
|
@ -117,9 +119,10 @@ def save_file(dir, name, data)
|
|||
end
|
||||
when "Tempfile"
|
||||
puts " Moving #{data} to #{dir}/#{newname}"
|
||||
if File.move(data.path, "#{dir}/#{newname}")
|
||||
begin
|
||||
FileUtils.mv(data.path, "#{dir}/#{newname}")
|
||||
puts " Saved as: '#{newname}'"
|
||||
else
|
||||
rescue
|
||||
puts "couldn't rename tempfile #{data.path}"
|
||||
return false
|
||||
end
|
||||
|
|
@ -145,25 +148,22 @@ def filename_sanitize(nname)
|
|||
# XXX this is a mac problem, should be configurable!!!!
|
||||
group = @config.keys[0]
|
||||
if @config[group].has_key?("ASCIIFILENAMES") && @config[group]["ASCIIFILENAMES"]
|
||||
require 'iconv'
|
||||
begin
|
||||
# hmmm, misschien is de begin waarde wel gewoon geen UTF-8, maar 8-bit ASCII
|
||||
# wat de mac filesystems nog slechter maakt overigens
|
||||
nname = Iconv.conv("ASCII//TRANSLIT", "UTF-8", nname)
|
||||
rescue Iconv::IllegalSequence
|
||||
begin
|
||||
nname = Iconv.conv("ASCII//TRANSLIT", "ISO-8859-1", nname)
|
||||
rescue Iconv::IllegalSequence
|
||||
#nname = Iconv.conv("ASCII//TRANSLIT", "UTF-8", nname)
|
||||
nname.encode!("US-ASCII")
|
||||
rescue
|
||||
# If it gets here, the original encoding is not UTF-8 of ISO-8859-1
|
||||
# so the following probably won't help
|
||||
puts "Iconv couldn't handle: '#{nname}'"
|
||||
puts "String#encode couldn't handle: '#{nname}'"
|
||||
nname.gsub!(/\\\d+/, "#")
|
||||
nname.gsub!(/ä/, "\"a")
|
||||
nname.gsub!(/ë/, "\"e")
|
||||
nname.gsub!(/ï/, "\"i")
|
||||
nname.gsub!(/ö/, "\"o")
|
||||
nname.gsub!(/ü/, "\"u")
|
||||
nname.gsub!(/ñ/, "~n")
|
||||
#nname.gsub!(/ä/, "\"a")
|
||||
#nname.gsub!(/ë/, "\"e")
|
||||
#nname.gsub!(/ï/, "\"i")
|
||||
#nname.gsub!(/ö/, "\"o")
|
||||
#nname.gsub!(/ü/, "\"u")
|
||||
#nname.gsub!(/ñ/, "~n")
|
||||
nname.gsub!(/#{86.chr}/, "U")
|
||||
nname.gsub!(/#{151.chr}/, " ")
|
||||
nname.gsub!(/#{161.chr}/, "i")
|
||||
|
|
@ -178,7 +178,6 @@ def filename_sanitize(nname)
|
|||
nname.gsub!(/#{243.chr}/, "o")
|
||||
nname.gsub!(/#{247.chr}/, "S")
|
||||
end
|
||||
end
|
||||
# shit that gets through...
|
||||
nname.gsub!('#{146.chr}', "")
|
||||
end
|
||||
|
|
@ -429,7 +428,7 @@ def lock
|
|||
exit
|
||||
rescue Errno::ESRCH
|
||||
puts "Stale lock found... removing..."
|
||||
File.unlink(@config[group]["LOCKFILE"])
|
||||
FileUtils.rm(@config[group]["LOCKFILE"])
|
||||
end
|
||||
else
|
||||
puts "Empty lockfile found... removing..."
|
||||
|
|
@ -568,7 +567,7 @@ def get_multi(subj, group)
|
|||
while FileTest.exists?("#{tbodybase}-#{i}")
|
||||
i += 1
|
||||
end
|
||||
File.move(tbody, "#{tbodybase}-#{i}")
|
||||
FileUtils.mv(tbody, "#{tbodybase}-#{i}")
|
||||
tbody = "#{tbodybase}-#{i}"
|
||||
tfile.close
|
||||
tfileout.close(false)
|
||||
|
|
@ -666,7 +665,7 @@ def get_max_file_length(tempdir=".")
|
|||
name = "#$$#{name}"
|
||||
begin
|
||||
file = File.new("#{tempdir}/#{name}", "w", 0644).close
|
||||
File.delete("#{tempdir}/#{name}")
|
||||
FileUtils.rm("#{tempdir}/#{name}")
|
||||
rescue Errno::ENAMETOOLONG
|
||||
name = name[0...-1]
|
||||
retry
|
||||
|
|
@ -753,7 +752,7 @@ def main
|
|||
@newsrc_lock = Mutex.new
|
||||
profile_mem("#{group} start")
|
||||
puts "\nGetting articles for #{group}"
|
||||
@articles = Article.new(@config[group]["NNTPSERVER"], group, @config[group]["NEWSRCNAME"])
|
||||
@articles = Article.new(@config[group]["NNTPSERVER"], group, @config[group]["NEWSRCNAME"], @config[group]["MAXAGE"])
|
||||
fill_preselector(group)
|
||||
puts "initialized"
|
||||
@articles.get_articles(@config[group]["CACHEDIR"])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue