From 8b0eb20bdc476e8c10a8660442dd9f15020c8cad Mon Sep 17 00:00:00 2001 From: Ward Wouts Date: Sun, 13 Jul 2003 09:52:53 +0000 Subject: [PATCH] change some debuglevels, fix rechunk_runlist, use temporary file writing cache --- trunk/ripnews/news/article.rb | 72 ++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 22 deletions(-) diff --git a/trunk/ripnews/news/article.rb b/trunk/ripnews/news/article.rb index d1502cc..c270cd9 100644 --- a/trunk/ripnews/news/article.rb +++ b/trunk/ripnews/news/article.rb @@ -1,6 +1,6 @@ ################################# # -# $Dwarf: article.rb,v 1.69 2003/07/03 14:03:11 ward Exp $ +# $Dwarf: article.rb,v 1.70 2003/07/06 08:14:05 ward Exp $ # $Source$ # # article.rb @@ -21,7 +21,7 @@ class PermError < ArticleError; end class Article -Debuglevel = 0 +Debuglevel = 2 def initialize(nntpservers, groupname, newsrc="~/.newsrc") @messids = [] @@ -44,7 +44,7 @@ def initialize(nntpservers, groupname, newsrc="~/.newsrc") @connections[server]["skip_ids"] = Set::IntSpan.new() @connections[server]["newsrc"] = News::Newsrc.new("#{newsrc}.#{server}") set_skip_ids(server, @connections[server]["newsrc"].marked_articles(@group)) - rescue SocketError, Errno::EINVAL + rescue SocketError, Errno::EINVAL, EOFError print "Connection to #{server} failed: #{$!}\n" del_server(server) end @@ -54,7 +54,7 @@ end def reconnect(server) begin @connections[server]["nntp"] = Net::NNTP.new(server) - rescue SocketError, Errno::EINVAL + rescue SocketError, Errno::EINVAL, EOFError print "Reconnect to #{server} failed: #{$!}\n" del_server(server) raise PermError, "Couldn't connect to #{server}" @@ -93,9 +93,10 @@ def get_articles(cachedir=false) del_server(server) next end - if first <= last - @connections[server]["first"] = first ? first : 0 - @connections[server]["last"] = last ? last : 0 + if first.to_i <= last.to_i + # available articles on server + @connections[server]["first"] = first ? first.to_i : 0 + @connections[server]["last"] = last ? last.to_i : 0 if Debuglevel > 0 print " Server: #{server}\n" print " First: #{first}\n" @@ -112,13 +113,17 @@ def get_articles(cachedir=false) end end read_cache(cachedir) + for server in @connections.keys + print "############################################################\n" + print "skip_ids #{server}: #{@connections[server]["skip_ids"].run_list}\n" + end for server in @connections.keys print " reading articles from server: #{server}\n" range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}") rangelist = rechunk_runlist(range.diff(@connections[server]["skip_ids"]).run_list) print "rangelist: #{rangelist}\n" if Debuglevel > 2 print "rangelist: #{rangelist.type.to_s}\n" if Debuglevel > 2 - print "rangelist elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel >2 + print "rangelist elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel > 2 begin unless rangelist == nil or rangelist =~ /^$/ for i in rangelist.split(',') @@ -135,16 +140,16 @@ def get_articles(cachedir=false) subj_lines.collect{|x| art[x[0]] = {} unless art.has_key?(x[0]) art[x[0]]["subject"] = x[1] - print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 1 + print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 2 } messid_lines.collect{|x| art[x[0]] = {} unless art.has_key?(x[0]) art[x[0]]["messid"] = x[1] - print "art id: #{x[0]} messid: #{x[1]}\n" if Debuglevel > 1 + print "art id: #{x[0]} messid: #{x[1]}\n" if Debuglevel > 2 } for id in art.keys if art[id].has_key?("subject") and art[id].has_key?("messid") - print "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["subject"]}\n" if Debuglevel > 1 + print "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["subject"]}\n" if Debuglevel > 2 add(art[id]["messid"], id, server, art[id]["subject"]) end end @@ -195,6 +200,7 @@ def get_xhdr(server, range, header) begin timeout(180) do begin + print "getting headers: #{header}, #{range}\n" if Debuglevel > 1 resp, lines = @connections[server]["nntp"].xhdr(header, range) if resp.to_i == 500 print "xhdr not implemented\n" @@ -212,7 +218,7 @@ def get_xhdr(server, range, header) get_group_info(server) retry else - print "Won't handdle this... yet :(\n" + print "Won't handle this... yet :(\n" end rescue Errno::EPIPE, Errno::ECONNRESET, EOFError print "Caught #{$!.type} reading from server #{server} (get_xhdr)\n" @@ -285,7 +291,7 @@ def get_body(server, message) retry end return false - rescue EOFError + rescue EOFError, NameError print "Caught #{$!.type} reading article #{message} from #{server} (get_body)\n" print "Error: #{$!}\n" return false @@ -453,7 +459,7 @@ end def group_subjects @groups = {} for i in (0...@subjects.length) - print "group subjects: #{i} #{@subjects[i]}\n" if Debuglevel > 1 + print "group subjects: #{i} #{@subjects[i]}\n" if Debuglevel > 3 if @subjects[i] =~ /(.*)\((\d+)\/(\d+)\)(.*)/ || @subjects[i] =~ /(.*)\[(\d+)\/(\d+)\](.*)/ j = "#{$1}#{$4} (#{$3})" number = $2 @@ -490,8 +496,11 @@ def set_skip_ids(server, ids) end def group_update_newsrc(subject) + print "running group_update_newsrc\n"; for i in (0...@groups[subject]["messages"].length) - @connections[@groups[subject]["servers"][i]]["newsrc"].mark(@group, @groups[subject]["ids"][i]) + if @connections[@groups[subject]["servers"][i]] + @connections[@groups[subject]["servers"][i]]["newsrc"].mark(@group, @groups[subject]["ids"][i]) + end end end @@ -536,7 +545,7 @@ end def save_cache(cachedir) filename = "#{cachedir}/#{@group}.ripnewscache" if FileTest.directory?( cachedir ) - file = File.new( filename, "w" ) or print "couldn't open cachefile for writing\n" + file = File.new( "#{filename}.new", "w" ) or print "couldn't open cachefile for writing\n" cache = [] for i in (0...@subjects.length) cache.push("#{@ids[i]}|#{@messids[i]}|#{@servers[i]}|#{@subjects[i]}\n") @@ -544,6 +553,11 @@ def save_cache(cachedir) cache.sort! file.print cache file.close + if ( File.move("#{filename}.new", filename) ) + print "Cache updated\n" + else + print "Couldn't update cache\n" + end end end @@ -585,6 +599,13 @@ def group_subject_sort(subj) end sort_arr.sort!{|a,b| r = ward_sort(a[0], b[0]) + if serverhash[a[3]] == nil or serverhash[b[3]] == nil + print "serverhash[a[3]]: #{serverhash[a[3]]}\n" + print "serverhash[b[3]]: #{serverhash[b[3]]}\n" + print "a[3]: #{a[3]}\n" + print "b[3]: #{a[3]}\n" + print "strange things going on here...\n" + end if r == 0 r = serverhash[a[3]] <=> serverhash[b[3]] end @@ -628,23 +649,30 @@ end def rechunk_runlist(runlist) return nil if runlist == nil + chunksize = 500 blalist = runlist.split(',') + + # hmmm, als het aantal articles wat tussen de komma's ligt < pak um beet 3 + # dan is het volgens mij heel erg de moeite die 3 ook gewoon binnen te halen + # en minder network requests te doen... + # de manier om dat te doen is dan iets van die komma weghalen en + # een van de 2 getallen... + blalist.collect!{|x| result = "" if x =~ /(.*)-(.*)/ a = $1 - while ($2.to_i - a.to_i) > 200 - result << "#{a}-#{a.to_i+199}," - a = a.to_i + 200 + while ($2.to_i - a.to_i) > chunksize + result << "#{a}-#{a.to_i+(chunksize-1)}," + a = a.to_i + chunksize end result << "#{a}-#{$2}" else x end - blup = blalist.join(",") - return blup } - return + blup = blalist.join(",") + return blup end def quit