diff --git a/trunk/ripnews/news/article.rb b/trunk/ripnews/news/article.rb index bea8fc1..0e37d7b 100644 --- a/trunk/ripnews/news/article.rb +++ b/trunk/ripnews/news/article.rb @@ -5,58 +5,128 @@ require 'set/intspan' require 'net/nntp' +require 'news/newsrc' require 'tempfile' class Article -Debuglevel = 0 +Debuglevel = 1 -def initialize(server) +def initialize(nntpservers, groupname, newsrc="~/.newsrc") @messids = [] @ids = [] + @servers = [] @subjects = [] + @sorted = false @grouped = false + #@skip_ids = Set::IntSpan.new() @groups = {} - @nntp = Net::NNTP.new(server) - @skip_ids = Set::IntSpan.new() + @gotten = {} + @group = groupname + + @serverlist = nntpservers.split('|') + @connections = {} + @serverlist.collect{|server| + @connections[server] = {} + @connections[server]["nntp"] = Net::NNTP.new(server) + @connections[server]["skip_ids"] = Set::IntSpan.new() + @connections[server]["newsrc"] = News::Newsrc.new("#{newsrc}.#{server}") + set_skip_ids(server, @connections[server]["newsrc"].marked_articles(@group)) + } + #@nntp = Net::NNTP.new(nntpservers) end -def add(messid, id, subject) +def add(messid, id, server, subject) +# print "Messid: #{messid}\n" +# print "Id: #{id}\n" +# print "Server: #{server}\n" +# print "Subject: #{subject}\n" @messids += [messid] @ids += [id.to_i] + @servers += [server] @subjects += [subject] @sorted = false @grouped = false end -def get_articles(group, cachedir=false) - begin - resp, count, first, last, name = @nntp.group(group) - rescue Net::NNTP::RuntimeError - print "Couldn't open group: #{group}\n" - return false - end - read_cache(group, cachedir, first, last) - range = Set::IntSpan.new("#{first}-#{last}") - for i in (range.diff(@skip_ids).elements) +def get_articles(cachedir=false) + for server in @connections.keys begin - @nntp.stat(i) - resp, id, messid, list = @nntp.head(i) - for j in list - if j =~ /Subject: (.*)/ - subj=$1 - end - end - print "get_articles messid: #{messid}\n" if Debuglevel > 1 - print "get_articles id: #{id}\n" if Debuglevel > 1 - print "get_articles subject: #{subj}\n" if Debuglevel > 1 - add(messid, id, subj) + resp, count, first, last, name = @connections[server]["nntp"].group(@group) + @connections[server]["first"] = first ? first : 0 + @connections[server]["last"] = last ? last : 0 rescue Net::NNTP::RuntimeError - print "whoopsie couldn't stat #{i}\n" if Debuglevel > 1 + print "Couldn't open group: #{@group}\n" + return false end end - save_cache(group, cachedir) + read_cache(cachedir) + for server in @connections.keys + print "reading articles from server: #{server}\n" + range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}") + rangelist = range.diff(@connections[server]["skip_ids"]).run_list + print "rangelist: #{rangelist}\n" if Debuglevel >1 + print "rangelist: #{rangelist.type.to_s}\n" if Debuglevel >1 + print "rangelsit elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel >1 + unless rangelist == nil or rangelist =~ /^$/ + for i in rangelist.split(',') + print "i: #{i}\n" if Debuglevel > 1 + begin + resp, subj_lines = @connections[server]["nntp"].xhdr("subject", i) + unless resp.to_i >= 200 and resp.to_i < 300 + print "got response #{resp} while reading group #{@group} from #{server}\n" + return false + end + resp, messid_lines = @connections[server]["nntp"].xhdr("message-id", i) + unless resp.to_i >=200 and resp.to_i < 300 + print "got response #{resp} while reading group #{@group} from #{server}\n" + return false + end + art = {} + subj_lines.collect{|x| + art[x[0]] = {} unless art.has_key?(x[0]) + art[x[0]]["subject"] = x[1] + print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 1 + } + messid_lines.collect{|x| + art[x[0]] = {} unless art.has_key?(x[0]) + art[x[0]]["messid"] = x[1] + print "art id: #{x[0]} messid: #{x[1]}\n" if Debuglevel > 1 + } + for id in art.keys + if art[id].has_key?("subject") and art[id].has_key?("messid") + print "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["subject"]}\n" if Debuglevel > 1 + add(art[id]["messid"], id, server, art[id]["subject"]) + end + end + rescue Net::NNTP::RuntimeError + end + end + end + +# if xhdr doesn't work, this should be used +# for i in (range.diff(@connections[server]["skip_ids"]).elements) +# begin +# @connections[server]["nntp"].stat(i) +# resp, id, messid, list = @connections[server]["nntp"].head(i) +# for j in list +# if j =~ /Subject: (.*)/ +# subj=$1 +# end +# end +# print "get_articles messid: #{messid}\n" if Debuglevel > 1 +# print "get_articles id: #{id}\n" if Debuglevel > 1 +# print "get_articles server: #{server}\n" if Debuglevel > 1 +# print "get_articles subject: #{subj}\n" if Debuglevel > 1 +# add(messid, id, server, subj) +# rescue Net::NNTP::RuntimeError +# print "whoopsie couldn't stat #{i}\n" if Debuglevel > 1 +# end +# end + end + subject_sort unless @sorted # store cache sorted + save_cache(cachedir) end def get_groups @@ -66,40 +136,60 @@ end def get_group_body(subj) result = [] - for i in @groups[subj]["messages"][0..@groups[subj]["messages"].length] - begin - resp, id, messid, list = @nntp.body(i) - rescue Net::NNTPReplyError - print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n" + for i in (0...@groups[subj]["messages"].length) + unless @gotten.has_key?(@groups[subj]["messages"][i]) + print "getting article: #{i}\n" if Debuglevel > 0 + print "#{@groups[subj]}\n" + begin + print "Server: #{@groups[subj]["servers"][i]}\n" + print "Messid: #{@groups[subj]["messages"][i]}\n" + resp, id, messid, list = @connections[@groups[subj]["servers"][i]]["nntp"].body(@groups[subj]["messages"][i]) + rescue Net::NNTPReplyError + print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n" + end + result = list end - result = list end return result end def get_group_body_first(subj) begin - resp, id, messid, list = @nntp.body(@groups[subj]["messages"][0]) + resp, id, messid, list = @connections[@groups[subj]["servers"][0]]["nntp"].body(@groups[subj]["messages"][0]) rescue Net::NNTPReplyError print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n" return false end - print "getting article: #{subj}\n" if Debuglevel > 0 - print "message id: #{messid}\n" if Debuglevel > 0 - print "id: #{id}\n" if Debuglevel > 0 + print "getting article: #{subj}\n" if Debuglevel > 0 + print "message id: #{messid}\n" if Debuglevel > 0 + print "id: #{id}\n" if Debuglevel > 0 + print "server: #{@groups[subj]["servers"][0]}\n" if Debuglevel > 0 + print "full subject: #{@groups[subj]["subject"][0]}\n" if Debuglevel > 0 + @gotten[messid] = true return list end def get_group_body_rest(subj, file=nil) result = [] - for i in @groups[subj]["messages"][1..@groups[subj]["messages"].length] - print "getting article: #{i}\n" if Debuglevel > 0 - begin - resp, id, messid, list = @nntp.body(i) - rescue Net::NNTPReplyError - print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n" - return false + for i in (1...@groups[subj]["messages"].length) + unless @gotten.has_key?(@groups[subj]["messages"][i]) + print "getting article: #{i}\n" if Debuglevel > 0 + begin + print "Server: #{@groups[subj]["servers"][i]}\n" + print "Messid: #{@groups[subj]["messages"][i]}\n" + resp, id, messid, list = @connections[@groups[subj]["servers"][i]]["nntp"].body(@groups[subj]["messages"][i]) + + rescue Net::NNTPReplyError + print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n" + return false + end end + print "getting article: #{subj}\n" if Debuglevel > 0 + print "message id: #{messid}\n" if Debuglevel > 0 + print "id: #{id}\n" if Debuglevel > 0 + print "server: #{@groups[subj]["servers"][i]}\n" if Debuglevel > 0 + print "full subject: #{@groups[subj]["subject"][i]}\n" if Debuglevel > 0 + @gotten[ @groups[subj]["messages"][i] ] = true if file list.collect{|line| file.print "#{line}\n"} else @@ -114,26 +204,27 @@ def get_group_subjects return @groups.keys end -def get_group_ids(subject) +def get_group_messids(subject) group_subjects unless @grouped - return @groups[subject]["ids"] + return @groups[subject]["messages"] end -def group_complete(subj) +def group_is_complete(subj) group_subjects unless @grouped print "length: #{@groups[subj]["messages"].length} total: #{@groups[subj]["total"].to_i}\n" if Debuglevel > 1 - if (@groups[subj]["messages"].length ) >= @groups[subj]["total"].to_i + umessids = @groups[subj]["messages"].uniq + if (umessids.length ) >= @groups[subj]["total"].to_i return true else return false end end -def group_singlepart(subj) +def group_is_singlepart(subj) @groups[subj]["total"].to_i == 1 end -def group_multipart(subj) +def group_is_multipart(subj) @groups[subj]["total"].to_i > 1 end @@ -149,7 +240,7 @@ def group_subjects @groups = {} subject_sort unless @sorted prev_subj = "" - for i in (0..@subjects.length) + for i in (0...@subjects.length) print "group subjects: #{i} #{@subjects[i]}\n" if Debuglevel > 1 if @subjects[i] =~ /(.*)\((\d+)\/(\d+)\)(.*)/ || @subjects[i] =~ /(.*)\[(\d+)\/(\d+)\](.*)/ j = "#{$1}#{$4}" @@ -162,60 +253,84 @@ def group_subjects end if j == prev_subj and number.to_i !=0 @groups[j]["messages"] += [ @messids[i] ] - @groups[j]["ids"] += [ @ids[i].to_i ] + @groups[j]["ids"] += [ @ids[i].to_i ] + @groups[j]["servers"] += [ @servers[i] ] + @groups[j]["subject"] += [ @subjects[i] ] else unless number.to_i == 0 prev_subj = j @groups[j] = {} - @groups[j]["total"] = total + @groups[j]["total"] = total @groups[j]["messages"] = [ @messids[i] ] - @groups[j]["ids"] = [ @ids[i].to_i ] + @groups[j]["ids"] = [ @ids[i].to_i ] + @groups[j]["servers"] = [ @servers[i] ] + @groups[j]["subject"] = [ @subjects[i] ] end end end @grouped = true end -def set_skip_ids(ids) +def set_skip_ids(server, ids) set = Set::IntSpan.new(ids) set.finite or return false min = set.min min != nil and min < 0 and return false - @skip_ids = set + @connections[server]["skip_ids"] = set return true end -def read_cache(group, cachedir, first=0, last=0) - filename = "#{cachedir}/#{group}.ripnewscache" +def group_update_newsrc(subject) + for i in (0...@groups[subject]["messages"].length) + @connections[@groups[subject]["servers"][i]]["newsrc"].mark(@group, @groups[subject]["ids"][i]) + end +end + +def save_newsrc() + for server in @connections.keys + @connections[server]["newsrc"].save + end +end + +def read_cache(cachedir) + filename = "#{cachedir}/#{@group}.ripnewscache" excludes = {} - @skip_ids.elements.collect!{|x| excludes[x]=true} + for server in @connections.keys + excludes[server] = {} + @connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true} + end if FileTest.directory?( cachedir) and FileTest.file?( filename ) and FileTest.readable?( filename ) file = File.new( filename ) lines = file.readlines for line in lines #print "line: #{line}\n" - if line =~ /^(.*?)\|(\d+)\|(.*)$/ + if line =~ /^(.*?)\|(\d+)\|(.*?)\|(.*)$/ #print "messid: #{$1}\n" #print "id: #{$2}\n" - #print "subject: #{$3}\n" - unless excludes.has_key?($2.to_i) or - $2.to_i < first.to_i or - $2.to_i > last.to_i - add($1, $2, $3) - @skip_ids.insert($2.to_i) + #print "server: #{$3}\n" + #print "subject: #{$4}\n" + #print "First: #{@connections[$3]["first"].to_i}\n"; + #print "Last: #{@connections[$3]["last"].to_i}\n"; + if @connections.has_key?($3) + unless excludes.has_key?($3) and excludes[$3].has_key?($2.to_i) or + $2.to_i < @connections[$3]["first"].to_i or + $2.to_i > @connections[$3]["last"].to_i + add($1, $2, $3, $4) + @connections[$3]["skip_ids"].insert($2.to_i) + end end end end end end -def save_cache(group, cachedir) - filename = "#{cachedir}/#{group}.ripnewscache" +def save_cache(cachedir) + filename = "#{cachedir}/#{@group}.ripnewscache" if FileTest.directory?( cachedir ) file = File.new( filename, "w" ) or print "couldn't open cachefile for writing\n" for i in (0...@subjects.length) - file.print("#{@messids[i]}|#{@ids[i]}|#{@subjects[i]}\n") - #print "writing: #{@messids[i]}|#{@ids[i]}|#{@subjects[i]}\n" + file.print("#{@messids[i]}|#{@ids[i]}|#{@servers[i]}|#{@subjects[i]}\n") + #print "writing: #{@messids[i]}|#{@ids[i]}|#{@servers[i]}|#{@subjects[i]}\n" end end end @@ -227,13 +342,15 @@ def uudecode(data, outfile=nil) when "Array" print "Calling _uudecode_array\n" if Debuglevel>0 mode, file, body = _uudecode_array(data) - when "File" + when "File", "Tempfile" unless outfile print "uudecode: need outfile\n" exit end print "Calling _uudecode_file\n" if Debuglevel>0 mode, file, body = _uudecode_file(data, outfile) + else + print "Funny stuff in uudecode. Data of type \"#{data.type.to_s}\"\n" end return mode, file, body end @@ -396,26 +513,29 @@ def subject_sort print "subj sort #{@subjects[i]}\n" if Debuglevel >2 print "subj sort #{@messids[i]}\n" if Debuglevel >2 print "subj sort #{@ids[i]}\n" if Debuglevel >2 - sort_arr += ["#{@subjects[i]} #{@messids[i]} #{@ids[i]}"] + print "subj sort #{@servers[i]}\n" if Debuglevel >2 + sort_arr += ["#{@subjects[i]} #{@messids[i]} #{@ids[i]} #{@servers[i]}"] end sort_arr.sort!{|a,b| ward_sort(a, b)} @messids = [] @ids = [] @subjects = [] - for i in sort_arr - i =~ /^(.*) (<[^<]*>) (\d+)$/ || i =~ /^(.*) \[<[^<]*>\] (\d+)$/ + @servers = [] + sort_arr.collect{|i| + i =~ /^(.*) (<[^<]*>) (\d+) (\S*)$/ || i =~ /^(.*) \[<[^<]*>\] (\d+) (\S*)$/ @messids += [$2] @ids += [$3] @subjects += [$1] + @servers += [$4] print "subject sort: #{$1}\n" if Debuglevel >2 - end + } @sorted = true end def ward_sort(a, b) - a =~ /^(.*) (<[^<]*> \d+)$/ + a =~ /^(.*) (<[^<]*> \d+ \S*)$/ c = $1.to_s.split(/([0-9]+)/) - b =~ /^(.*) (<[^<]*> \d+)$/ + b =~ /^(.*) (<[^<]*> \d+ \S*)$/ d = $1.to_s.split(/([0-9]+)/) for x in c @@ -432,7 +552,9 @@ def ward_sort(a, b) end def quit - @nntp.quit + for server in @connections.keys + @connections[server]["nntp"].quit + end end private :ward_sort