diff --git a/trunk/ripnews/news/article.rb b/trunk/ripnews/news/article.rb index ea8ae20..a18658c 100644 --- a/trunk/ripnews/news/article.rb +++ b/trunk/ripnews/news/article.rb @@ -1,4 +1,4 @@ -# $Dwarf: article.rb,v 1.92 2004/10/14 21:54:51 ward Exp $ +# $Dwarf: article.rb,v 1.93 2004/10/15 13:48:04 ward Exp $ # $Source$ # @@ -42,13 +42,13 @@ def initialize(nntpservers, groupname, newsrc="~/.newsrc") @gotten = {} @group = groupname @preselectpatterns = [] - @newids = {} + @cache_buf = {} @serverlist = nntpservers.split('|') @connections = {} @serverlist.collect{|server| @connections[server] = {} - @newids[server] = {} + @cache_buf[server] = [] begin p server p Time.now @@ -120,7 +120,7 @@ def memusage end def add_preselect_pattern(regexp) - @preselectpatterns.push(regexp) + @preselectpatterns.push(Regexp.new(regexp)) end def preselect(subject) @@ -132,7 +132,7 @@ def preselect(subject) return false end -def add(messid, id, server, subject) +def add(id, messid, subject, server) @messageinfo.push(Message.new(messid, id.to_i, server, subject)) @grouped = false end @@ -145,7 +145,7 @@ end def get_articles(cachedir=false) if cachedir != false - check_cache(cachedir) + cache_check(cachedir) end for server in @connections.keys begin @@ -174,7 +174,7 @@ def get_articles(cachedir=false) del_server(server) end end - read_cache(cachedir) + cache_read(cachedir) # for server in @connections.keys # print "############################################################\n" # print "skip_ids #{server}: #{@connections[server]["skip_ids"].run_list}\n" @@ -213,13 +213,20 @@ def get_articles(cachedir=false) for id in art.keys if art[id].has_key?("subject") and art[id].has_key?("messid") print "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["subject"]}\n" if Debuglevel > 2 - @newids[server][id.to_i] = true - add(art[id]["messid"], id, server, art[id]["subject"]) +# @newids[server][id.to_i] = true + # dit wellicht alleen doen indien preselector hem uitkiest + # en anders een leuk regeltje aan de cache toevoegen, + # maar niet in het geheugen houden + if preselect(art[id]["subject"]) + add(id, art[id]["messid"], art[id]["subject"], server) + end + cache_add(cachedir, id, art[id]["messid"], art[id]["subject"], server) end end headerlines += subj_lines.length - if headerlines >= 10000 # hmmm, dit lijkt niet te werken... - save_cache(cachedir, server) + #if headerlines >= 10000 # hmmm, dit lijkt niet te werken... + if headerlines >= 1000 # hmmm, dit lijkt niet te werken... + cache_save(cachedir, server) headerlines = 0 end end @@ -228,7 +235,7 @@ def get_articles(cachedir=false) del_server(server) next end - save_cache(cachedir, server) + cache_save(cachedir, server) end GC.start end @@ -352,7 +359,7 @@ p "get_body" end end return resp, id, messid, list - rescue TimeoutError + rescue TimeoutError, Errno::ETIMEDOUT print "Time out, reconnecting to server (get_body)\n" timedout += 1 raise PermError, "Too many timeouts! (get_body)" if timedout > 1 @@ -558,13 +565,24 @@ def save_newsrc() end end -def check_cache(cachedir) +def cache_add(cachedir, id, messid, subject, server) + if @cache_buf.has_key?(server) + @cache_buf[server].push("#{id}|#{messid}|#{subject}\n") + else + @cache_buf[server] = [ "#{id}|#{messid}|#{subject}\n" ] + end + if @cache_buf[server].length > 100 + cache_save(cachedir, server) + end +end + +def cache_check(cachedir) if ! FileTest.exists?(cachedir) print "Cachedir '#{cachedir}' doesn't exists, performance will suffer\n" end end -def read_cache(cachedir) +def cache_read(cachedir) p "reading cache" p Time.now filename = "#{cachedir}/#{@group}.ripnewscache" @@ -572,64 +590,49 @@ p Time.now # id | messageid | subject lineregexp = Regexp.new('^(\d+)\|(.*?)\|(.*)$') for server in @connections.keys + cache_scrub(cachedir, server) excludes[server] = {} @connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true} if FileTest.directory?( cachedir) and FileTest.file?( "#{filename}.#{server}" ) and FileTest.readable?( "#{filename}.#{server}" ) file = File.new( "#{filename}.#{server}" ) lines = file.readlines - #file.each{|line| lines.collect{|line| - # id | messageid | subject - #if line =~ /^(\d+)\|(.*?)\|(.*)$/ if line =~ lineregexp - #if lineregexp.match(line) != nil unless excludes.has_key?(server) and excludes[server].has_key?($1.to_i) or $1.to_i < @connections[server]["first"] or $1.to_i > @connections[server]["last"] if preselect($3) - add($2, $1, server, $3) + add($1, $2, $3, server) end @connections[server]["skip_ids"].insert($1.to_i) end end } file.close + lines = [] end end p Time.now memusage end -def save_cache(cachedir, server) -p "writing cache" -p Time.now +def cache_save(cachedir, server) +#p "writing cache" +#p Time.now filename = "#{cachedir}/#{@group}.ripnewscache" if FileTest.directory?( cachedir ) - if ! File.copy("#{filename}.#{server}","#{filename}.#{server}.new") - puts "Couldn't renew cache" - end - file = File.new( "#{filename}.#{server}.new", "a+" ) or print "couldn't open cachefile for writing\n" - print "Updating cache...\n" - cache = [] - for i in (0...@messageinfo.length) - if @newids[server].has_key?(@messageinfo[i][:id]) - cache.push("#{@messageinfo[i][:id]}|#{@messageinfo[i][:messid]}|#{@messageinfo[i][:subject]}\n") if @messageinfo[i][:server] == server - end - end - cache.sort! - file.print cache + file = File.new( "#{filename}.#{server}", "a+" ) or print "couldn't open cachefile for writing\n" +# print "Updating cache...\n" + @cache_buf[server].sort! + file.print @cache_buf[server] file.close - if ( File.move("#{filename}.#{server}.new", "#{filename}.#{server}") ) - print "Cache updated for #{server}\n" - else - print "Couldn't update #{server} cache\n" - end + @cache_buf[server] = [] +# print "Cache updated for #{server}\n" end -p Time.now - scrub_cache(cachedir, server) +#p Time.now end -def scrub_cache(cachedir, server) +def cache_scrub(cachedir, server) # XXX this could and probably should be done in a separate thread... # XXX but it'll work for now # XXX also read articles aren't removed right now @@ -637,21 +640,23 @@ def scrub_cache(cachedir, server) p "scrubbing cache" p Time.now filename = "#{cachedir}/#{@group}.ripnewscache" - regexp = Regexp.new('^(\d+)\|') - infile = File.new("#{filename}.#{server}") or puts "Couldn't open cachefile for reading" - outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing" - infile.each{ |line| - if line =~ regexp - if $1.to_i >= @connections[server]["first"] and - $1.to_i <= @connections[server]["last"] - outfile.puts(line) + if File.exists?("#{filename}.#{server}") + regexp = Regexp.new('^(\d+)\|') + infile = File.new("#{filename}.#{server}") or puts "Couldn't open cachefile for reading" + outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing" + infile.each{ |line| + if line =~ regexp + if $1.to_i >= @connections[server]["first"] and + $1.to_i <= @connections[server]["last"] + outfile.puts(line) + end end + } + if ( File.move("#{filename}.#{server}.new", "#{filename}.#{server}") ) + print "Cache scrubbed for #{server}\n" + else + print "Couldn't scrub #{server} cache\n" end - } - if ( File.move("#{filename}.#{server}.new", "#{filename}.#{server}") ) - print "Cache scrubbed for #{server}\n" - else - print "Couldn't scrub #{server} cache\n" end p Time.now end @@ -692,24 +697,28 @@ p "pre sort length: #{@groups[subj]['messageinfo'].length}" @groups[subj]["messageinfo"][i].dup ) if serverhash[@groups[subj]["messageinfo"][i][:server]] != nil end - sort_arr.sort!{|a,b| - r = ward_sort(a[:subject], b[:subject]) - if serverhash[a[:server]] == nil or serverhash[b[:server]] == nil - print "serverhash[a[:server]]: #{serverhash[a[:server]]}\n" - print "serverhash[b[:server]]: #{serverhash[b[:server]]}\n" - print "a[:server]: #{a[:server]}\n" - print "b[:server]: #{a[:server]}\n" - print "strange things going on here...\n" - end - if r == 0 - r = serverhash[a[:server]] <=> serverhash[b[:server]] - end - r - } + +p "sort_arr length pre sort: #{sort_arr.length}" + if sort_arr.length != 0 + sort_arr.sort!{|a,b| + r = ward_sort(a[:subject], b[:subject]) + if serverhash[a[:server]] == nil or serverhash[b[:server]] == nil + print "serverhash[a[:server]]: #{serverhash[a[:server]]}\n" + print "serverhash[b[:server]]: #{serverhash[b[:server]]}\n" + print "a[:server]: #{a[:server]}\n" + print "b[:server]: #{a[:server]}\n" + print "strange things going on here...\n" + end + if r == 0 + r = serverhash[a[:server]] <=> serverhash[b[:server]] + end + r + } + end @groups[subj].clear @groups[subj]["total"] = total -p "sort_arr length: #{sort_arr.length}" +p "sort_arr length post sort: #{sort_arr.length}" sort_arr.collect{|i| if @groups[subj].has_key?("messageinfo") @groups[subj]["messageinfo"].push(i) @@ -719,7 +728,9 @@ p "sort_arr length: #{sort_arr.length}" print "subject sort: #{i[:subject]}\n" if Debuglevel > 2 print "server: #{i[:server]}\n" if Debuglevel > 2 } -p "post sort length: #{@groups[subj]['messageinfo'].length}" +if ! @groups[subj]['messageinfo'].nil? + p "post sort length: #{@groups[subj]['messageinfo'].length}" +end #print "Done sorting\n" end