# $Dwarf: article.rb,v 1.114 2005/05/12 07:39:53 ward Exp $ # $Source$ # # Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Ward Wouts # # Permission to use, copy, modify, and distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. # require Pathname.new(__FILE__).dirname + '../set/intspan' require Pathname.new(__FILE__).dirname + '../net/nntp' require Pathname.new(__FILE__).dirname + '../news/newsrc' require 'tempfile' require 'timeout' #require 'yaml' require 'profiler' class ArticleError < RuntimeError; end class TempError < ArticleError; end class PermError < ArticleError; end module Net class KANNTP < Net::NNTP def initialize(host, port=nil, user=nil, password=nil, readermode=nil) @host = host @semaphore = Mutex.new @resettime = 60 @timecounter = @resettime @thr = Thread.new{ Thread.pass while true # puts "timecounter #{@timecounter} #{@host}" if @timecounter > 0 @timecounter -= 5 sleep 5 else sendka sleep 5 end end } super end def putline(line) # puts "timerreset #{@host}" @timecounter = @resettime super end def longcmd(line) @semaphore.synchronize{ return super } end def shortcmd(line) @semaphore.synchronize{ return super } end def setresettime(time) @resettime = time end def sendka # puts "SENDING KEEP ALIVE TO #{@host}" res = shortcmd("DATE") # puts res end def quit @thr.exit begin super rescue EOFError, Errno::EPIPE, IOError end end private :sendka end # class KANNTP end # module Net ############################################################ class Article Debuglevel = 0 Message = Struct.new(:messid, :id, :date, :from, :server, :subject) def initialize(nntpservers, groupname, newsrc="~/.newsrc", maxage=0) @messageinfo = [] @grouped = false @groups = {} @gotten = {} @group = groupname @preselectpattern = Regexp.new('^') @cache_buf = {} @serverlist = [] @serverpasses = {} @maxage = maxage.to_i tmplist = nntpservers.split('|') tmplist.each{ |server| if server.match(/(.*)@([^@]*)$/) userpass = $1 server = $2 @serverlist.push(server) @serverpasses[server] = {} if userpass.match(/([^:]*):(.*)/) @serverpasses[server]['user'] = $1 @serverpasses[server]['pass'] = $2 else @serverpasses[server]['user'] = userpass @serverpasses[server]['pass'] = nil end else @serverlist.push(server) @serverpasses[server] = {} @serverpasses[server]['user'] = nil @serverpasses[server]['pass'] = nil end } #p @serverlist #p @serverpasses #@serverlist = nntpservers.split('|') @connections = {} @serverlist.collect{|server| @connections[server] = {} @cache_buf[server] = [] begin # p server # p Time.now begin timeout(60) do #p "connecting" @connections[server]["nntp"] = Net::KANNTP.new(server, 119, @serverpasses[server]['user'], @serverpasses[server]['pass']) end resp = @connections[server]["nntp"].mode_reader #p resp rescue TimeoutError, Errno::ECONNRESET sleep 3 retry end # p Time.now @connections[server]["skip_ids"] = Set::IntSpan.new() @connections[server]["newsrc"] = News::Newsrc.new("#{newsrc}.#{server}") set_skip_ids(server, @connections[server]["newsrc"].marked_articles(@group)) rescue SocketError, Errno::EINVAL, EOFError, Errno::ETIMEDOUT puts "Connection to #{server} failed: #{$!}" if ! @connections[server]["nntp"].nil? @connections[server]["nntp"].quit end del_server(server) end } end def reconnect(server) retries = 0 begin puts "Trying to kill old connection #{Time.now}" timeout(10) do @connections[server]["nntp"].quit end puts "Killed old connection #{Time.now}" rescue TimeoutError puts "Timeout killing old connection" rescue Errno::EPIPE, Errno::ECONNRESET, EOFError, Errno::ETIMEDOUT end begin puts "Trying to reconnect #{Time.now}" sleep 3 #timeout(180) do timeout(60) do @connections[server]["nntp"] = Net::KANNTP.new(server, 119, @serverpasses[server]['user'], @serverpasses[server]['pass']) end resp = @connections[server]["nntp"].mode_reader rescue SocketError, Errno::EINVAL, EOFError, Errno::ETIMEDOUT, TimeoutError, Errno::ECONNREFUSED puts "Reconnect to #{server} failed: #{$!}" if retries > 1 del_server(server) raise PermError, "Couldn't connect to #{server}" else retries += 1 retry end end puts "Succesfully reconnected to #{server}" end def memusage puts "memprof:" puts "global:" # for i in global_variables # print "#{i}\n" # end # print "local:\n" # for i in local_variables # print "#{i}\n" # end self.instance_variables.each{|i| puts i print "X: " begin puts self.instance_eval(i).size rescue NoMethodError end } end def set_preselect_pattern(regexp) @preselectpattern = Regexp.new(regexp) end def preselect(subject) if subject =~ @preselectpattern return true else return false end # return ( subject =~ @preselectpattern ) end def add(id, messid, date, from, subject, server) @messageinfo.push(Message.new(messid, id.to_i, date.to_i, from, server, subject)) @grouped = false end def del_server(server) puts "Removing server #{server} from list" @connections.delete(server) @serverlist.delete(server) end def get_articles(cachedir=false) if cachedir != false cache_check(cachedir) end @connections.keys.each{|server| begin first, last = get_group_info(server) rescue PermError puts "Error: #{$!}" del_server(server) next end if first.to_i <= last.to_i # available articles on server # oudste @connections[server]["first"] = first ? first.to_i : 0 # nieuwste @connections[server]["last"] = last ? last.to_i : 0 if Debuglevel > 0 puts " Server: #{server} First: #{first} Last: #{last}" end # clean up old newsrc entries if @connections[server]["first"] > 0 @connections[server]["newsrc"].unmark_range(@group, 0, (@connections[server]["first"] - 1).to_s) @connections[server]["newsrc"].save_group(@group) end else puts " First article has higher number than last article on server #{server}." del_server(server) end } cache_read(cachedir) # spul dat echt te oud is gaat nooit gevuld worden, dus doe ook geen poging het op te halen # wil wel wat ophalen aangezien logging aantoont dat er wel oudere articles gedownload worden @connections.keys.each{|server| if @connections[server]["skip_ids"].max && @connections[server]["skip_ids"].max < @connections[server]["last"] cnt_articles = @connections[server]["last"] - @connections[server]["first"] if cnt_articles > 10000 fillerend = (@connections[server]["skip_ids"].max - (cnt_articles/5)).to_i else fillerend = @connections[server]["skip_ids"].max - 2000 end if @connections[server]["skip_ids"].min && fillerend > @connections[server]["skip_ids"].min @connections[server]["skip_ids"] = @connections[server]["skip_ids"].union("#{@connections[server]["skip_ids"].min}-#{fillerend}") end end } @connections.keys.each{|server| puts " reading articles from server: #{server}" range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}") rangelist = rechunk_runlist(range.diff(@connections[server]["skip_ids"]).run_list) puts "rangelist: #{rangelist}" if Debuglevel > 2 puts "rangelist: #{rangelist.class.to_s}" if Debuglevel > 2 puts "rangelist elements: #{range.diff(@connections[server]["skip_ids"]).elements}" if Debuglevel > 2 begin unless rangelist == nil or rangelist =~ /^$/ lastdate = DateTime.now # nieuwste eerst. Dat maakt het mogelijk om op te houden bij te oude datum. rangelist.split(',').reverse.each{|i| puts "i: #{i}" if Debuglevel > 2 begin resp, xover_lines = get_xover(server, i) rescue TempError, EOFError printerr(server) next end art = {} xover_lines.collect{|x| id = x[0] subj = x[1] auth = x[2] date = x[3] messid = x[4] art[id] = {} unless art.has_key?(id) begin lastdate = DateTime.parse(date) art[id]["date"] = DateTime.parse(date).strftime('%Y%m%d') rescue puts $!.message puts id art[id]["date"] = Time.now.strftime('%Y%m%d') end art[id]["subject"] = x[1] art[id]["messid"] = messid art[id]["from"] = auth puts "art id: #{id} subj: #{subj}" if Debuglevel > 2 puts "art id: #{id} from: #{auth}" if Debuglevel > 2 puts "art id: #{id} date: #{date}" if Debuglevel > 2 puts "art id: #{id} messid: #{messid}" if Debuglevel > 2 } # xover_lines.collect art.keys.each{|id| if art[id].has_key?("date") and art[id].has_key?("subject") and art[id].has_key?("messid") and art[id].has_key?("from") puts "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["from"]}, #{art[id]["subject"]}" if Debuglevel > 2 # dit wellicht alleen doen indien preselector hem uitkiest # en anders een leuk regeltje aan de cache toevoegen, # maar niet in het geheugen houden if preselect(art[id]["subject"]) add(id.to_i, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server) end cache_add(cachedir, id, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server) end } if @maxage and @maxage > 0 if lastdate < ( DateTime.now - @maxage ) puts "Skipping articles older than #{DateTime.now - @maxage}" break end end } end rescue PermError del_server(server) next end cache_save(cachedir, server) } GC.start end def get_group_info(server) timedout = 0 errs = 0 resp = "" first = "" last = "" begin timeout(30) do begin resp, count, first, last, name = @connections[server]["nntp"].group(@group) rescue Net::NNTPReplyError printerr(server) if ( $!.to_s =~ /^503|^400/ ) reconnect(server) retry else raise PermError, "#{$!}" end rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError, Errno::EINVAL printerr(server) raise PermError, "Too many errors! (get_group_info)" if errs > 3 reconnect(server) retry end end rescue TimeoutError timedout += 1 raise PermError, "Too many timeouts! (get_group_info)" if timedout > 1 puts "Time out, reconnecting to server... (get_group_info)" reconnect(server) retry end return first, last end def get_xhdr(server, range, header) timedout = 0 attempts = 0 resp = "" lines = [] begin timeout(180) do begin p Time.now if Debuglevel > 1 puts "getting headers: #{header}, #{range}" if Debuglevel > 1 resp, lines = @connections[server]["nntp"].xhdr(header, range) if resp.to_i == 500 puts "xhdr not implemented" puts "Error: #{$!}" end unless resp.to_i >= 200 and resp.to_i < 300 puts "got response #{resp} while reading group #{@group} from #{server}" raise TempError end rescue Net::NNTPReplyError printerr(server) if ( $!.to_s =~ /^503|^400/ ) reconnect(server) get_group_info(server) retry else puts "Won't handle this... yet :(" end #rescue Errno::EPIPE, Errno::ECONNRESET, EOFError rescue Errno::EPIPE, Errno::ECONNRESET printerr(server) reconnect(server) get_group_info(server) attempts += 1 if attempts < 2 retry else printerr "giving up" return end end end return resp, lines rescue TimeoutError puts "Time out, reconnecting to server (get_xhdr)" timedout += 1 raise PermError, "Too many timeouts! (get_xhdr)" if timedout > 1 reconnect(server) get_group_info(server) retry end end def get_xover(server, range) timedout = 0 resp = "" lines = [] start, ed = range.split("-") unless ed ed = start end begin timeout(180) do begin p Time.now if Debuglevel > 1 puts "getting headers: #{range}" if Debuglevel > 1 resp, lines = @connections[server]["nntp"].xover(start, ed) if resp.to_i == 500 puts "xover not implemented" puts "Error: #{$!}" end unless resp.to_i >= 200 and resp.to_i < 300 puts "got response #{resp} while reading group #{@group} from #{server}" raise TempError end rescue Net::NNTPReplyError printerr(server) if ( $!.to_s =~ /^503|^400/ ) reconnect(server) get_group_info(server) retry else puts "Won't handle this... yet :(" end rescue Errno::EPIPE, Errno::ECONNRESET, EOFError printerr(server) reconnect(server) get_group_info(server) retry rescue Net::NNTPDataError printerr(server) reconnect(server) get_group_info(server) retry end end return resp, lines rescue TimeoutError puts "Time out, reconnecting to server (get_xover)" timedout += 1 raise PermError, "Too many timeouts! (get_xover)" if timedout > 1 reconnect(server) get_group_info(server) retry end end def get_groupname return @group end def get_body(server, message) #p "get_body" timedout = 0 retries = 0 resp = "" id = "" messid = "" list = [] begin timeout(180) do begin list = [] resp, id, messid, list = @connections[server]["nntp"].body(message) rescue Net::NNTPReplyError a = '' a += $!.to_s printerr(server) if retries == 0 && (a =~ /^503/ || a =~ /^400/) reconnect(server) get_group_info(server) retries = 1 retry end return false rescue EOFError, NameError printerr(server) return false rescue Errno::EPIPE, Errno::ECONNRESET printerr(server) reconnect(server) get_group_info(server) retry end end return resp, id, messid, list rescue TimeoutError, Errno::ETIMEDOUT puts "Time out, reconnecting to server (get_body)" timedout += 1 raise PermError, "Too many timeouts! (get_body)" if timedout > 1 reconnect(server) get_group_info(server) retry end end def get_group_body(subj) #p "get_group_body" result = [] group_subject_sort(subj) # puts @groups[subj].to_yaml return false if @groups[subj]["messageinfo"] == nil (0...@groups[subj]["messageinfo"].length).each{|i| unless @gotten.has_key?(@groups[subj]["messageinfo"][i][:messid]) puts "getting article: #{i}" if Debuglevel > 1 puts "getting article: #{subj}" if Debuglevel > 1 puts "full subject: #{@groups[subj]["messageinfo"][i][:subject]}" if Debuglevel > 0 puts "message id: #{@groups[subj]["messageinfo"][i][:messid]}" if Debuglevel > 1 puts "id: #{@groups[subj]["messageinfo"][i][:id]}" if Debuglevel > 1 puts "from: #{@groups[subj]["messageinfo"][i][:from]}" if Debuglevel > 1 puts "server: #{@groups[subj]["messageinfo"][i][:server]}" if Debuglevel > 0 resp = false while resp == false if @serverlist.include?(@groups[subj]["messageinfo"][i][:server]) resp, id, messid, list = get_body(@groups[subj]["messageinfo"][i][:server], @groups[subj]["messageinfo"][i][:messid]) else resp = false end if resp == false if Debuglevel > 1 puts "mess-id i: #{@groups[subj]["messageinfo"][i][:messid]}" # XXX dit moet netter kunnen puts "mess-id i+1: #{@groups[subj]["messageinfo"][i+1][:messid]}" if @groups[subj]["messageinfo"][i+1] != nil end if (i+1 < @groups[subj]["messageinfo"].length) and (@groups[subj]["messageinfo"][i][:messid] == @groups[subj]["messageinfo"][i+1][:messid]) puts " Trying next server..." i += 1 else raise TempError, " Message-id not on another server" end end end @gotten[ @groups[subj]["messageinfo"][i][:messid] ] = true result = list end } return result end def get_group_body_first(subj) #p "get_group_body_first" group_subject_sort(subj) i = 0 unless @groups[subj]["messageinfo"] != nil && @groups[subj]["messageinfo"][0][:messid] p "ieks komt niet door lame check heen" return false end # p "komt wel door lame check heen" while @gotten.has_key?(@groups[subj]["messageinfo"][0][:messid]) == false puts "getting article: #{subj}" if Debuglevel > 0 puts "full subject: #{@groups[subj]['messageinfo'][0][:subject]}" if Debuglevel > 0 puts "message id: #{@groups[subj]['messageinfo'][i][:messid]}" if Debuglevel > 1 puts "id: #{@groups[subj]['messageinfo'][i][:id]}" if Debuglevel > 1 puts "from: #{@groups[subj]['messageinfo'][i][:from]}" if Debuglevel > 1 puts "server: #{@groups[subj]['messageinfo'][0][:server]}" if Debuglevel > 0 resp = false while resp == false resp, id, messid, list = get_body(@groups[subj]["messageinfo"][i][:server], @groups[subj]["messageinfo"][i][:messid]) if resp == false puts "mess-id i: #{@groups[subj]['messageinfo'][i][:messid]}" # XXX dit moet netter kunnen puts "mess-id i+1: #{@groups[subj]['messageinfo'][i+1][:messid]}" if @groups[subj]["messageinfo"][i+1] != nil if (i+1 < @groups[subj]["messageinfo"].length) and (@groups[subj]["messageinfo"][i][:messid] == @groups[subj]["messageinfo"][i+1][:messid]) puts "Trying next server..." i += 1 else raise TempError, "Message-id not on another server" end end end @gotten[@groups[subj]["messageinfo"][i][:messid]] = true end return list end def get_group_body_rest(subj, file=nil) #p "get_group_body_rest" result = [] (1...@groups[subj]["messageinfo"].length).each{|i| unless @gotten.has_key?(@groups[subj]["messageinfo"][i][:messid]) puts "getting article: #{i}" if Debuglevel > 1 puts "getting article: #{subj}" if Debuglevel > 1 puts "full subject: #{@groups[subj]['messageinfo'][i][:subject]}" if Debuglevel > 0 puts "message id: #{@groups[subj]['messageinfo'][i][:messid]}" if Debuglevel > 1 puts "id: #{@groups[subj]['messageinfo'][i][:id]}" if Debuglevel > 1 puts "from: #{@groups[subj]["messageinfo"][i][:from]}" if Debuglevel > 1 puts "server: #{@groups[subj]['messageinfo'][i][:server]}" if Debuglevel > 0 resp = false while resp == false resp, id, messid, list = get_body(@groups[subj]["messageinfo"][i][:server], @groups[subj]["messageinfo"][i][:messid]) if resp == false puts "mess-id i: #{@groups[subj]["messageinfo"][i][:messid]}" # print "mess-id i+1: #{@groups[subj]["messageinfo"][i+1][:messid]}\n" # XXX dit moet netter kunnen puts "mess-id i+1: #{@groups[subj]["messageinfo"][i+1][:messid]}" if @groups[subj]["messageinfo"][i+1] != nil if (i+1 < @groups[subj]["messageinfo"].length) and (@groups[subj]["messageinfo"][i][:messid] == @groups[subj]["messageinfo"][i+1][:messid]) puts "Trying next server..." i += 1 else raise TempError, "Message-id not on another server" end end end @gotten[ @groups[subj]["messageinfo"][i][:messid] ] = true if file list.collect{|line| file.print "#{line}\n"} else result.concat(list) end end } return result end def get_group_subjects group_subjects unless @grouped return @groups.keys end def get_group_poster(subj) group_subject_sort(subj) unless @groups[subj]["messageinfo"] != nil && @groups[subj]["messageinfo"][0][:from] p "ieks komt niet door lame check heen" return false end return @groups[subj]["messageinfo"][0][:from] end def get_group_date(subj) group_subject_sort(subj) unless @groups[subj]["messageinfo"] != nil && @groups[subj]["messageinfo"][0][:date] p "ieks komt niet door lame check heen" return false end return @groups[subj]["messageinfo"][0][:date] end def group_is_complete(subj) group_subjects unless @grouped #print "Subject: #{subj}\n" messids = [] @groups[subj]["messageinfo"].each {|x| messids.push(x[:messid]) } #p "group complete?: #{messids}" umessids = messids.uniq if (umessids.length ) >= @groups[subj]["total"].to_i return true else return false end end def group_percentage_primary(subj) group_subjects unless @grouped groupsize = @groups[subj]["messageinfo"].length primarycount = 0 @groups[subj]["messageinfo"].each {|x| if x[:server] == @serverlist[0] primarycount += 1 end } percentage = ((100.0/groupsize)*primarycount).to_i return percentage end def group_percentage_fallback(subj) group_subjects unless @grouped groupsize = @groups[subj]["messageinfo"].length fallbackcount = 0 if @serverlist[-1] == @serverlist[0] return 0 end onmain = {} @groups[subj]["messageinfo"].each {|x| if x[:server] != @serverlist[-1] && onmain[x[:subject]].nil? onmain[x[:subject]] = 1 end } @groups[subj]["messageinfo"].each {|x| if x[:server] == @serverlist[-1] && onmain[x[:subject]].nil? fallbackcount += 1 end } percentage = ((100.0/groupsize)*fallbackcount).to_i return percentage end def group_is_singlepart(subj) @groups[subj]["total"].to_i == 1 end def group_is_multipart(subj) @groups[subj]["total"].to_i > 1 end def group_subjects @groups = {} (0...@messageinfo.length).each{|i| puts "group subjects: #{i} #{@messageinfo[i][:subject]}" if Debuglevel > 3 # misschien is het wel belangrijk dat er voorkeuren in deze # match zitten... geen idee if @messageinfo[i][:subject] =~ /(.*)\((\d+)\/(\d+)\)(.*)/ || @messageinfo[i][:subject] =~ /(.*)\[(\d+)\/(\d+)\](.*)/ #if @messageinfo[i][:subject] =~ /(.*)[\(\[](\d+)\/(\d+)[\)\]](.*)/ j = "#{$1}#{$4} (#{$3})" number = $2 total = $3 else j = @messageinfo[i][:subject] number = 1 total = 1 end if @groups.has_key?(j) and number.to_i != 0 @groups[j]["messageinfo"].push(@messageinfo[i]) elsif number.to_i != 0 @groups[j] = {} @groups[j]["total"] = total @groups[j]["messageinfo"] = [ (@messageinfo[i]) ] end } @grouped = true end def set_skip_ids(server, ids) set = Set::IntSpan.new(ids) set.finite? or return false min = set.min min != nil and min < 0 and return false @connections[server]["skip_ids"] = set return true end def group_update_newsrc(subject) (0...@groups[subject]["messageinfo"].length).each{|i| if @connections[@groups[subject]["messageinfo"][i][:server]] @connections[@groups[subject]["messageinfo"][i][:server]]["newsrc"].mark(@group, @groups[subject]["messageinfo"][i][:id]) #p @group #p @groups[subject]["messageinfo"][i][:id] end } end def save_newsrc() @connections.keys.each{|server| #@connections[server]["newsrc"].save @connections[server]["newsrc"].save_group(@group) } end def cache_add(cachedir, id, messid, date, from, subject, server) if @cache_buf.has_key?(server) @cache_buf[server].push("#{id}|#{messid}|#{date}|#{from}|#{subject}\n") else @cache_buf[server] = [ "#{id}|#{messid}|#{date}|#{from}|#{subject}\n" ] end if @cache_buf[server].length > 100 cache_save(cachedir, server) end end def cache_check(cachedir) if ! FileTest.exists?(cachedir) puts "Cachedir '#{cachedir}' doesn't exists, performance will suffer" end end def cache_read(cachedir) # Profiler__::start_profile puts "#{Time.now} Reading & scrubbing caches" filename = "#{cachedir}/#{@group}.ripnewscache" excludes = {} @connections.keys.each{|server| first = @connections[server]["first"] last = @connections[server]["last"] #cache_scrub(cachedir, server) puts " #{Time.now} Reading cache for #{server}" excludes[server] = {} @connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true} if FileTest.directory?(cachedir) and FileTest.file?( "#{filename}.#{server}" ) and FileTest.readable?( "#{filename}.#{server}" ) outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing" cachefile = File.new( "#{filename}.#{server}" ) begin while true do # using each on a big cachefile leads to out of memory conditions line = cachefile.readline begin line.encode!("US-ASCII") rescue # If it gets here, the original encoding is unknown # gracefully give up and go to the next line puts "String#encode couldn't handle: '#{line}'" next # so the following probably won't help #line.gsub!(/\\/, "") end id_i, messid, date, from, subject = line.split('|', 5) id_i = id_i.to_i if first <= id_i and id_i <= last if ! excludes[server].has_key?(id_i) outfile.puts(line) if preselect(subject) add(id_i, messid, date, from, subject, server) end # XXX alle traagheid van de cache_read zit in deze regel: @connections[server]["skip_ids"].insert!(id_i) end end end rescue EOFError end if ( FileUtils.move("#{filename}.#{server}.new", "#{filename}.#{server}") ) puts " #{Time.now} Cache scrubbed for #{server}" else puts "Couldn't scrub #{server} cache" end end } puts "#{Time.now} Caches read" # Profiler__::stop_profile # Profiler__::print_profile($stderr) #memusage end def cache_save(cachedir, server) #p "writing cache" #p Time.now filename = "#{cachedir}/#{@group}.ripnewscache" if FileTest.directory?( cachedir ) file = File.new( "#{filename}.#{server}", "a+" ) or puts "couldn't open cachefile for writing" # print "Updating cache...\n" @cache_buf[server].sort! @cache_buf[server].each{|line| begin file.puts line rescue Encoding::UndefinedConversionError next end } file.close @cache_buf[server] = [] # print "Cache updated for #{server}\n" end #p Time.now end def cache_scrub(cachedir, server) # XXX this could and probably should be done in a separate thread... # XXX but it'll work for now # XXX also read articles aren't removed right now # XXX this could be done, but I don't know if I want to pay the overhead p "scrubbing cache" p Time.now filename = "#{cachedir}/#{@group}.ripnewscache" if File.exists?("#{filename}.#{server}") # regexp = Regexp.new('^(\d+)\|') infile = File.new("#{filename}.#{server}") or puts "Couldn't open cachefile for reading" outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing" infile.each{ |line| id, messid, date, subject = line.split("|", 3) if id.to_i >= @connections[server]["first"] and id.to_i <= @connections[server]["last"] outfile.puts(line) end } end p Time.now end ############################################################### # a base64 decoder... def decode64(str) string = '' str.split("\n").each{|line| line.delete!('^A-Za-z0-9+') # remove non-base64 chars line.tr!('A-Za-z0-9+', ' -_') # convert to uuencoded format len = ["#{32 + line.length * 3 / 4}"].pack("c") # compute length byte string += "#{len}#{line}".unpack("u") # uudecode and concatenate } return string end ############################################################### def group_subject_sort(subj) # XXX Waarom gebruik ik hier eigenlijk sort_arr ipv in place sorting? #print "Sorting articles\n" serverhash = {} (0...@serverlist.length).each{|i| serverhash[@serverlist[i]] = i } total = @groups[subj]["total"] sort_arr = [] #p "pre sort length: #{@groups[subj]['messageinfo'].length}" (0...@groups[subj]["messageinfo"].length).each{|i| puts "subj sort #{@groups[subj]['messageinfo'][i][:subject]}" if Debuglevel > 2 puts "subj sort #{@groups[subj]['messageinfo'][i][:messid]}" if Debuglevel > 2 puts "subj sort #{@groups[subj]['messageinfo'][i][:id]}" if Debuglevel > 2 puts "subj sort #{@groups[subj]['messageinfo'][i][:server]}" if Debuglevel > 2 sort_arr.push( @groups[subj]["messageinfo"][i].dup ) if serverhash[@groups[subj]["messageinfo"][i][:server]] != nil } #p "sort_arr length pre sort: #{sort_arr.length}" if sort_arr.length != 0 sort_arr.sort!{|a,b| r = ward_sort(a[:subject], b[:subject]) if serverhash[a[:server]] == nil or serverhash[b[:server]] == nil puts "serverhash[a[:server]]: #{serverhash[a[:server]]}" puts "serverhash[b[:server]]: #{serverhash[b[:server]]}" puts "a[:server]: #{a[:server]}" puts "b[:server]: #{a[:server]}" puts "strange things going on here..." end if r == 0 r = serverhash[a[:server]] <=> serverhash[b[:server]] end r } end @groups[subj].clear @groups[subj]["total"] = total #p "sort_arr length post sort: #{sort_arr.length}" sort_arr.collect{|i| if @groups[subj].has_key?("messageinfo") @groups[subj]["messageinfo"].push(i) else @groups[subj]["messageinfo"] = [ i ] end puts "subject sort: #{i[:subject]}" if Debuglevel > 2 puts "server: #{i[:server]}" if Debuglevel > 2 } #if ! @groups[subj]['messageinfo'].nil? # p "post sort length: #{@groups[subj]['messageinfo'].length}" #end #print "Done sorting\n" end def ward_sort(a, b) c = a.to_s.split(/([0-9]+)/) d = b.to_s.split(/([0-9]+)/) c.collect{|x| y = d.shift r = ((x.to_s =~ /^[0-9]+$/) && (y.to_s =~ /^[0-9]+$/)) ? (x.to_i <=> y.to_i) : (x.to_s <=> y.to_s) if r != 0 return r end } return -1 if (d != []) return 0 end def rechunk_runlist(runlist) return nil if runlist == nil chunksize = 500 blalist = runlist.split(',') # hmmm, als het aantal articles wat tussen de komma's ligt < pak um beet 3 # dan is het volgens mij heel erg de moeite die 3 ook gewoon binnen te halen # en minder network requests te doen... # de manier om dat te doen is dan iets van die komma weghalen en # een van de 2 getallen... blalist.collect!{|x| result = "" if x =~ /(.*)-(.*)/ a = $1 while ($2.to_i - a.to_i) > chunksize result << "#{a}-#{a.to_i+(chunksize-1)}," a = a.to_i + chunksize end result << "#{a}-#{$2}" else x end } blup = blalist.join(",") return blup end def printerr(server) puts "Caught #{$!.class} reading from server #{server} (#{caller[0]})" puts "Error: #{$!}" end def disconnect @connections.keys.each{|server| begin @connections[server]["nntp"].quit rescue Errno::EPIPE, Errno::ECONNRESET, EOFError, IOError end } end def quit # just testing if these should be reset... @messageinfo = [] disconnect end private :ward_sort end # class