From 9895c5765d8eb84710d054611f6a5c60ba209fdc Mon Sep 17 00:00:00 2001 From: Ward Wouts Date: Sun, 6 Feb 2005 13:42:03 +0000 Subject: [PATCH] use xover & fix stupid cache reading bug --- trunk/ripnews/news/article.rb | 122 +++++++++++++++++++++++++--------- 1 file changed, 90 insertions(+), 32 deletions(-) diff --git a/trunk/ripnews/news/article.rb b/trunk/ripnews/news/article.rb index 0c22c8f..3306af2 100644 --- a/trunk/ripnews/news/article.rb +++ b/trunk/ripnews/news/article.rb @@ -1,4 +1,4 @@ -# $Dwarf: article.rb,v 1.106 2005/02/05 10:48:31 ward Exp $ +# $Dwarf: article.rb,v 1.107 2005/02/05 12:35:50 ward Exp $ # $Source$ # @@ -126,7 +126,12 @@ def set_preselect_pattern(regexp) end def preselect(subject) - return ( subject =~ @preselectpattern ) + if subject =~ @preselectpattern + return true + else + return false + end +# return ( subject =~ @preselectpattern ) end def add(id, messid, subject, server) @@ -172,25 +177,23 @@ def get_articles(cachedir=false) end end cache_read(cachedir) -# for server in @connections.keys -# print "############################################################\n" -# print "skip_ids #{server}: #{@connections[server]["skip_ids"].run_list}\n" -# end # spul dat echt te oud is gaat nooit gevuld worden, dus doe ook geen poging het op te halen # wil wel wat ophalen aangezien logging aantoont dat er wel oudere articles gedownload worden - for server in @connections.keys - articles = @connections[server]["last"] - @connections[server]["first"] - if articles > 10000 - fillerend = (@connections[server]["last"] - (articles/10)).to_i - else - fillerend = @connections[server]["last"] - 1000 - end - if fillerend > @connections[server]["skip_ids"].min - @connections[server]["skip_ids"] = @connections[server]["skip_ids"].union("#{@connections[server]["skip_ids"].min}-#{fillerend}") - # p "filling #{@connections[server]["skip_ids"].min}-#{fillerend}" - end - end +# for server in @connections.keys +# if @connections[server]["skip_ids"].max && @connections[server]["skip_ids"].max +# articles = @connections[server]["last"] - @connections[server]["first"] +# if articles > 10000 +# fillerend = (@connections[server]["skip_ids"].max - (articles/10)).to_i +# else +# fillerend = @connections[server]["skip_ids"].max - 1000 +# end +# if @connections[server]["skip_ids"].min && fillerend > @connections[server]["skip_ids"].min +# @connections[server]["skip_ids"] = @connections[server]["skip_ids"].union("#{@connections[server]["skip_ids"].min}-#{fillerend}") +# # p "filling #{@connections[server]["skip_ids"].min}-#{fillerend}" +# end +# end +# end for server in @connections.keys print " reading articles from server: #{server}\n" @@ -207,24 +210,32 @@ def get_articles(cachedir=false) for i in rangelist.split(',') print "i: #{i}\n" if Debuglevel > 2 begin - resp, subj_lines = get_xhdr(server, i, "subject") - resp, messid_lines = get_xhdr(server, i, "message-id") + resp, xover_lines = get_xover(server, i) +# resp, subj_lines = get_xhdr(server, i, "subject") +# resp, messid_lines = get_xhdr(server, i, "message-id") rescue TempError printerr(server) next end art = {} - subj_lines.collect{|x| + xover_lines.collect{|x| art[x[0]] = {} unless art.has_key?(x[0]) art[x[0]]["subject"] = x[1] + art[x[0]]["messid"] = x[4] print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 2 + print "art id: #{x[0]} messid: #{x[4]}\n" if Debuglevel > 2 } - messid_lines.collect{|x| - art[x[0]] = {} unless art.has_key?(x[0]) - art[x[0]]["messid"] = x[1] - print "art id: #{x[0]} messid: #{x[1]}\n" if Debuglevel > 2 - } +# subj_lines.collect{|x| +# art[x[0]] = {} unless art.has_key?(x[0]) +# art[x[0]]["subject"] = x[1] +# print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 2 +# } +# messid_lines.collect{|x| +# art[x[0]] = {} unless art.has_key?(x[0]) +# art[x[0]]["messid"] = x[1] +# print "art id: #{x[0]} messid: #{x[1]}\n" if Debuglevel > 2 +# } for id in art.keys if art[id].has_key?("subject") and art[id].has_key?("messid") print "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["subject"]}\n" if Debuglevel > 2 @@ -238,10 +249,8 @@ def get_articles(cachedir=false) cache_add(cachedir, id, art[id]["messid"], art[id]["subject"], server) end end - headerlines += subj_lines.length -# p "subj_lines.length #{subj_lines.length}" -# p "headerlines #{headerlines}" - if headerlines >= 500 # hmmm, dit lijkt niet te werken... + headerlines += xover_lines.length + if headerlines >= 500 cache_save(cachedir, server) headerlines = 0 end @@ -329,7 +338,56 @@ def get_xhdr(server, range, header) rescue TimeoutError print "Time out, reconnecting to server (get_xhdr)\n" timedout += 1 - raise PermError, "Too many timeouts! (get_xhrd)" if timedout > 1 + raise PermError, "Too many timeouts! (get_xhdr)" if timedout > 1 + reconnect(server) + get_group_info(server) + retry + end +end + +def get_xover(server, range) + timedout = 0 + resp = "" + lines = [] + start, ed = range.split("-") + unless ed + ed = start + end + begin + timeout(180) do + begin + p Time.now if Debuglevel > 1 + print "getting headers: #{range}\n" if Debuglevel > 1 + resp, lines = @connections[server]["nntp"].xover(start, ed) + if resp.to_i == 500 + print "xover not implemented\n" + print "Error: #{$!}\n" + end + unless resp.to_i >= 200 and resp.to_i < 300 + print "got response #{resp} while reading group #{@group} from #{server}\n" + raise TempError + end + rescue Net::NNTPReplyError + printerr(server) + if ( $!.to_s =~ /^503|^400/ ) + reconnect(server) + get_group_info(server) + retry + else + print "Won't handle this... yet :(\n" + end + rescue Errno::EPIPE, Errno::ECONNRESET, EOFError + printerr(server) + reconnect(server) + get_group_info(server) + retry + end + end + return resp, lines + rescue TimeoutError + print "Time out, reconnecting to server (get_xover)\n" + timedout += 1 + raise PermError, "Too many timeouts! (get_xover)" if timedout > 1 reconnect(server) get_group_info(server) retry @@ -613,7 +671,7 @@ p Time.now unless excludes.has_key?(server) and excludes[server].has_key?(id.to_i) or id.to_i < @connections[server]["first"] or id.to_i > @connections[server]["last"] - if preselect($3) + if preselect(subject) add(id, messid, subject, server) end @connections[server]["skip_ids"].insert(id.to_i)