use xover & fix stupid cache reading bug

This commit is contained in:
Ward Wouts 2005-02-06 13:42:03 +00:00
parent 7a4b33bbda
commit 9895c5765d

View file

@ -1,4 +1,4 @@
# $Dwarf: article.rb,v 1.106 2005/02/05 10:48:31 ward Exp $ # $Dwarf: article.rb,v 1.107 2005/02/05 12:35:50 ward Exp $
# $Source$ # $Source$
# #
@ -126,7 +126,12 @@ def set_preselect_pattern(regexp)
end end
def preselect(subject) def preselect(subject)
return ( subject =~ @preselectpattern ) if subject =~ @preselectpattern
return true
else
return false
end
# return ( subject =~ @preselectpattern )
end end
def add(id, messid, subject, server) def add(id, messid, subject, server)
@ -172,25 +177,23 @@ def get_articles(cachedir=false)
end end
end end
cache_read(cachedir) cache_read(cachedir)
# for server in @connections.keys
# print "############################################################\n"
# print "skip_ids #{server}: #{@connections[server]["skip_ids"].run_list}\n"
# end
# spul dat echt te oud is gaat nooit gevuld worden, dus doe ook geen poging het op te halen # spul dat echt te oud is gaat nooit gevuld worden, dus doe ook geen poging het op te halen
# wil wel wat ophalen aangezien logging aantoont dat er wel oudere articles gedownload worden # wil wel wat ophalen aangezien logging aantoont dat er wel oudere articles gedownload worden
for server in @connections.keys # for server in @connections.keys
articles = @connections[server]["last"] - @connections[server]["first"] # if @connections[server]["skip_ids"].max && @connections[server]["skip_ids"].max
if articles > 10000 # articles = @connections[server]["last"] - @connections[server]["first"]
fillerend = (@connections[server]["last"] - (articles/10)).to_i # if articles > 10000
else # fillerend = (@connections[server]["skip_ids"].max - (articles/10)).to_i
fillerend = @connections[server]["last"] - 1000 # else
end # fillerend = @connections[server]["skip_ids"].max - 1000
if fillerend > @connections[server]["skip_ids"].min # end
@connections[server]["skip_ids"] = @connections[server]["skip_ids"].union("#{@connections[server]["skip_ids"].min}-#{fillerend}") # if @connections[server]["skip_ids"].min && fillerend > @connections[server]["skip_ids"].min
# p "filling #{@connections[server]["skip_ids"].min}-#{fillerend}" # @connections[server]["skip_ids"] = @connections[server]["skip_ids"].union("#{@connections[server]["skip_ids"].min}-#{fillerend}")
end # # p "filling #{@connections[server]["skip_ids"].min}-#{fillerend}"
end # end
# end
# end
for server in @connections.keys for server in @connections.keys
print " reading articles from server: #{server}\n" print " reading articles from server: #{server}\n"
@ -207,24 +210,32 @@ def get_articles(cachedir=false)
for i in rangelist.split(',') for i in rangelist.split(',')
print "i: #{i}\n" if Debuglevel > 2 print "i: #{i}\n" if Debuglevel > 2
begin begin
resp, subj_lines = get_xhdr(server, i, "subject") resp, xover_lines = get_xover(server, i)
resp, messid_lines = get_xhdr(server, i, "message-id") # resp, subj_lines = get_xhdr(server, i, "subject")
# resp, messid_lines = get_xhdr(server, i, "message-id")
rescue TempError rescue TempError
printerr(server) printerr(server)
next next
end end
art = {} art = {}
subj_lines.collect{|x| xover_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0]) art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["subject"] = x[1] art[x[0]]["subject"] = x[1]
art[x[0]]["messid"] = x[4]
print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 2 print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 2
print "art id: #{x[0]} messid: #{x[4]}\n" if Debuglevel > 2
} }
messid_lines.collect{|x| # subj_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0]) # art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["messid"] = x[1] # art[x[0]]["subject"] = x[1]
print "art id: #{x[0]} messid: #{x[1]}\n" if Debuglevel > 2 # print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 2
} # }
# messid_lines.collect{|x|
# art[x[0]] = {} unless art.has_key?(x[0])
# art[x[0]]["messid"] = x[1]
# print "art id: #{x[0]} messid: #{x[1]}\n" if Debuglevel > 2
# }
for id in art.keys for id in art.keys
if art[id].has_key?("subject") and art[id].has_key?("messid") if art[id].has_key?("subject") and art[id].has_key?("messid")
print "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["subject"]}\n" if Debuglevel > 2 print "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["subject"]}\n" if Debuglevel > 2
@ -238,10 +249,8 @@ def get_articles(cachedir=false)
cache_add(cachedir, id, art[id]["messid"], art[id]["subject"], server) cache_add(cachedir, id, art[id]["messid"], art[id]["subject"], server)
end end
end end
headerlines += subj_lines.length headerlines += xover_lines.length
# p "subj_lines.length #{subj_lines.length}" if headerlines >= 500
# p "headerlines #{headerlines}"
if headerlines >= 500 # hmmm, dit lijkt niet te werken...
cache_save(cachedir, server) cache_save(cachedir, server)
headerlines = 0 headerlines = 0
end end
@ -329,7 +338,56 @@ def get_xhdr(server, range, header)
rescue TimeoutError rescue TimeoutError
print "Time out, reconnecting to server (get_xhdr)\n" print "Time out, reconnecting to server (get_xhdr)\n"
timedout += 1 timedout += 1
raise PermError, "Too many timeouts! (get_xhrd)" if timedout > 1 raise PermError, "Too many timeouts! (get_xhdr)" if timedout > 1
reconnect(server)
get_group_info(server)
retry
end
end
def get_xover(server, range)
timedout = 0
resp = ""
lines = []
start, ed = range.split("-")
unless ed
ed = start
end
begin
timeout(180) do
begin
p Time.now if Debuglevel > 1
print "getting headers: #{range}\n" if Debuglevel > 1
resp, lines = @connections[server]["nntp"].xover(start, ed)
if resp.to_i == 500
print "xover not implemented\n"
print "Error: #{$!}\n"
end
unless resp.to_i >= 200 and resp.to_i < 300
print "got response #{resp} while reading group #{@group} from #{server}\n"
raise TempError
end
rescue Net::NNTPReplyError
printerr(server)
if ( $!.to_s =~ /^503|^400/ )
reconnect(server)
get_group_info(server)
retry
else
print "Won't handle this... yet :(\n"
end
rescue Errno::EPIPE, Errno::ECONNRESET, EOFError
printerr(server)
reconnect(server)
get_group_info(server)
retry
end
end
return resp, lines
rescue TimeoutError
print "Time out, reconnecting to server (get_xover)\n"
timedout += 1
raise PermError, "Too many timeouts! (get_xover)" if timedout > 1
reconnect(server) reconnect(server)
get_group_info(server) get_group_info(server)
retry retry
@ -613,7 +671,7 @@ p Time.now
unless excludes.has_key?(server) and excludes[server].has_key?(id.to_i) or unless excludes.has_key?(server) and excludes[server].has_key?(id.to_i) or
id.to_i < @connections[server]["first"] or id.to_i < @connections[server]["first"] or
id.to_i > @connections[server]["last"] id.to_i > @connections[server]["last"]
if preselect($3) if preselect(subject)
add(id, messid, subject, server) add(id, messid, subject, server)
end end
@connections[server]["skip_ids"].insert(id.to_i) @connections[server]["skip_ids"].insert(id.to_i)