- fix sorting

- allow multiple servers
- use xhdr for faster header collection
- save cache sorted
This commit is contained in:
Ward Wouts 2002-05-05 20:07:03 +00:00
parent 3b1f9f304f
commit 5ca723bfc0

View file

@ -5,58 +5,128 @@
require 'set/intspan' require 'set/intspan'
require 'net/nntp' require 'net/nntp'
require 'news/newsrc'
require 'tempfile' require 'tempfile'
class Article class Article
Debuglevel = 0 Debuglevel = 1
def initialize(server) def initialize(nntpservers, groupname, newsrc="~/.newsrc")
@messids = [] @messids = []
@ids = [] @ids = []
@servers = []
@subjects = [] @subjects = []
@sorted = false @sorted = false
@grouped = false @grouped = false
#@skip_ids = Set::IntSpan.new()
@groups = {} @groups = {}
@nntp = Net::NNTP.new(server) @gotten = {}
@skip_ids = Set::IntSpan.new() @group = groupname
@serverlist = nntpservers.split('|')
@connections = {}
@serverlist.collect{|server|
@connections[server] = {}
@connections[server]["nntp"] = Net::NNTP.new(server)
@connections[server]["skip_ids"] = Set::IntSpan.new()
@connections[server]["newsrc"] = News::Newsrc.new("#{newsrc}.#{server}")
set_skip_ids(server, @connections[server]["newsrc"].marked_articles(@group))
}
#@nntp = Net::NNTP.new(nntpservers)
end end
def add(messid, id, subject) def add(messid, id, server, subject)
# print "Messid: #{messid}\n"
# print "Id: #{id}\n"
# print "Server: #{server}\n"
# print "Subject: #{subject}\n"
@messids += [messid] @messids += [messid]
@ids += [id.to_i] @ids += [id.to_i]
@servers += [server]
@subjects += [subject] @subjects += [subject]
@sorted = false @sorted = false
@grouped = false @grouped = false
end end
def get_articles(group, cachedir=false) def get_articles(cachedir=false)
for server in @connections.keys
begin begin
resp, count, first, last, name = @nntp.group(group) resp, count, first, last, name = @connections[server]["nntp"].group(@group)
@connections[server]["first"] = first ? first : 0
@connections[server]["last"] = last ? last : 0
rescue Net::NNTP::RuntimeError rescue Net::NNTP::RuntimeError
print "Couldn't open group: #{group}\n" print "Couldn't open group: #{@group}\n"
return false return false
end end
read_cache(group, cachedir, first, last) end
range = Set::IntSpan.new("#{first}-#{last}") read_cache(cachedir)
for i in (range.diff(@skip_ids).elements) for server in @connections.keys
print "reading articles from server: #{server}\n"
range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}")
rangelist = range.diff(@connections[server]["skip_ids"]).run_list
print "rangelist: #{rangelist}\n" if Debuglevel >1
print "rangelist: #{rangelist.type.to_s}\n" if Debuglevel >1
print "rangelsit elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel >1
unless rangelist == nil or rangelist =~ /^$/
for i in rangelist.split(',')
print "i: #{i}\n" if Debuglevel > 1
begin begin
@nntp.stat(i) resp, subj_lines = @connections[server]["nntp"].xhdr("subject", i)
resp, id, messid, list = @nntp.head(i) unless resp.to_i >= 200 and resp.to_i < 300
for j in list print "got response #{resp} while reading group #{@group} from #{server}\n"
if j =~ /Subject: (.*)/ return false
subj=$1 end
resp, messid_lines = @connections[server]["nntp"].xhdr("message-id", i)
unless resp.to_i >=200 and resp.to_i < 300
print "got response #{resp} while reading group #{@group} from #{server}\n"
return false
end
art = {}
subj_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["subject"] = x[1]
print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 1
}
messid_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["messid"] = x[1]
print "art id: #{x[0]} messid: #{x[1]}\n" if Debuglevel > 1
}
for id in art.keys
if art[id].has_key?("subject") and art[id].has_key?("messid")
print "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["subject"]}\n" if Debuglevel > 1
add(art[id]["messid"], id, server, art[id]["subject"])
end end
end end
print "get_articles messid: #{messid}\n" if Debuglevel > 1
print "get_articles id: #{id}\n" if Debuglevel > 1
print "get_articles subject: #{subj}\n" if Debuglevel > 1
add(messid, id, subj)
rescue Net::NNTP::RuntimeError rescue Net::NNTP::RuntimeError
print "whoopsie couldn't stat #{i}\n" if Debuglevel > 1
end end
end end
save_cache(group, cachedir) end
# if xhdr doesn't work, this should be used
# for i in (range.diff(@connections[server]["skip_ids"]).elements)
# begin
# @connections[server]["nntp"].stat(i)
# resp, id, messid, list = @connections[server]["nntp"].head(i)
# for j in list
# if j =~ /Subject: (.*)/
# subj=$1
# end
# end
# print "get_articles messid: #{messid}\n" if Debuglevel > 1
# print "get_articles id: #{id}\n" if Debuglevel > 1
# print "get_articles server: #{server}\n" if Debuglevel > 1
# print "get_articles subject: #{subj}\n" if Debuglevel > 1
# add(messid, id, server, subj)
# rescue Net::NNTP::RuntimeError
# print "whoopsie couldn't stat #{i}\n" if Debuglevel > 1
# end
# end
end
subject_sort unless @sorted # store cache sorted
save_cache(cachedir)
end end
def get_groups def get_groups
@ -66,20 +136,26 @@ end
def get_group_body(subj) def get_group_body(subj)
result = [] result = []
for i in @groups[subj]["messages"][0..@groups[subj]["messages"].length] for i in (0...@groups[subj]["messages"].length)
unless @gotten.has_key?(@groups[subj]["messages"][i])
print "getting article: #{i}\n" if Debuglevel > 0
print "#{@groups[subj]}\n"
begin begin
resp, id, messid, list = @nntp.body(i) print "Server: #{@groups[subj]["servers"][i]}\n"
print "Messid: #{@groups[subj]["messages"][i]}\n"
resp, id, messid, list = @connections[@groups[subj]["servers"][i]]["nntp"].body(@groups[subj]["messages"][i])
rescue Net::NNTPReplyError rescue Net::NNTPReplyError
print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n" print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n"
end end
result = list result = list
end end
end
return result return result
end end
def get_group_body_first(subj) def get_group_body_first(subj)
begin begin
resp, id, messid, list = @nntp.body(@groups[subj]["messages"][0]) resp, id, messid, list = @connections[@groups[subj]["servers"][0]]["nntp"].body(@groups[subj]["messages"][0])
rescue Net::NNTPReplyError rescue Net::NNTPReplyError
print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n" print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n"
return false return false
@ -87,19 +163,33 @@ def get_group_body_first(subj)
print "getting article: #{subj}\n" if Debuglevel > 0 print "getting article: #{subj}\n" if Debuglevel > 0
print "message id: #{messid}\n" if Debuglevel > 0 print "message id: #{messid}\n" if Debuglevel > 0
print "id: #{id}\n" if Debuglevel > 0 print "id: #{id}\n" if Debuglevel > 0
print "server: #{@groups[subj]["servers"][0]}\n" if Debuglevel > 0
print "full subject: #{@groups[subj]["subject"][0]}\n" if Debuglevel > 0
@gotten[messid] = true
return list return list
end end
def get_group_body_rest(subj, file=nil) def get_group_body_rest(subj, file=nil)
result = [] result = []
for i in @groups[subj]["messages"][1..@groups[subj]["messages"].length] for i in (1...@groups[subj]["messages"].length)
unless @gotten.has_key?(@groups[subj]["messages"][i])
print "getting article: #{i}\n" if Debuglevel > 0 print "getting article: #{i}\n" if Debuglevel > 0
begin begin
resp, id, messid, list = @nntp.body(i) print "Server: #{@groups[subj]["servers"][i]}\n"
print "Messid: #{@groups[subj]["messages"][i]}\n"
resp, id, messid, list = @connections[@groups[subj]["servers"][i]]["nntp"].body(@groups[subj]["messages"][i])
rescue Net::NNTPReplyError rescue Net::NNTPReplyError
print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n" print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n"
return false return false
end end
end
print "getting article: #{subj}\n" if Debuglevel > 0
print "message id: #{messid}\n" if Debuglevel > 0
print "id: #{id}\n" if Debuglevel > 0
print "server: #{@groups[subj]["servers"][i]}\n" if Debuglevel > 0
print "full subject: #{@groups[subj]["subject"][i]}\n" if Debuglevel > 0
@gotten[ @groups[subj]["messages"][i] ] = true
if file if file
list.collect{|line| file.print "#{line}\n"} list.collect{|line| file.print "#{line}\n"}
else else
@ -114,26 +204,27 @@ def get_group_subjects
return @groups.keys return @groups.keys
end end
def get_group_ids(subject) def get_group_messids(subject)
group_subjects unless @grouped group_subjects unless @grouped
return @groups[subject]["ids"] return @groups[subject]["messages"]
end end
def group_complete(subj) def group_is_complete(subj)
group_subjects unless @grouped group_subjects unless @grouped
print "length: #{@groups[subj]["messages"].length} total: #{@groups[subj]["total"].to_i}\n" if Debuglevel > 1 print "length: #{@groups[subj]["messages"].length} total: #{@groups[subj]["total"].to_i}\n" if Debuglevel > 1
if (@groups[subj]["messages"].length ) >= @groups[subj]["total"].to_i umessids = @groups[subj]["messages"].uniq
if (umessids.length ) >= @groups[subj]["total"].to_i
return true return true
else else
return false return false
end end
end end
def group_singlepart(subj) def group_is_singlepart(subj)
@groups[subj]["total"].to_i == 1 @groups[subj]["total"].to_i == 1
end end
def group_multipart(subj) def group_is_multipart(subj)
@groups[subj]["total"].to_i > 1 @groups[subj]["total"].to_i > 1
end end
@ -149,7 +240,7 @@ def group_subjects
@groups = {} @groups = {}
subject_sort unless @sorted subject_sort unless @sorted
prev_subj = "" prev_subj = ""
for i in (0..@subjects.length) for i in (0...@subjects.length)
print "group subjects: #{i} #{@subjects[i]}\n" if Debuglevel > 1 print "group subjects: #{i} #{@subjects[i]}\n" if Debuglevel > 1
if @subjects[i] =~ /(.*)\((\d+)\/(\d+)\)(.*)/ || @subjects[i] =~ /(.*)\[(\d+)\/(\d+)\](.*)/ if @subjects[i] =~ /(.*)\((\d+)\/(\d+)\)(.*)/ || @subjects[i] =~ /(.*)\[(\d+)\/(\d+)\](.*)/
j = "#{$1}#{$4}" j = "#{$1}#{$4}"
@ -163,6 +254,8 @@ def group_subjects
if j == prev_subj and number.to_i !=0 if j == prev_subj and number.to_i !=0
@groups[j]["messages"] += [ @messids[i] ] @groups[j]["messages"] += [ @messids[i] ]
@groups[j]["ids"] += [ @ids[i].to_i ] @groups[j]["ids"] += [ @ids[i].to_i ]
@groups[j]["servers"] += [ @servers[i] ]
@groups[j]["subject"] += [ @subjects[i] ]
else else
unless number.to_i == 0 unless number.to_i == 0
prev_subj = j prev_subj = j
@ -170,52 +263,74 @@ def group_subjects
@groups[j]["total"] = total @groups[j]["total"] = total
@groups[j]["messages"] = [ @messids[i] ] @groups[j]["messages"] = [ @messids[i] ]
@groups[j]["ids"] = [ @ids[i].to_i ] @groups[j]["ids"] = [ @ids[i].to_i ]
@groups[j]["servers"] = [ @servers[i] ]
@groups[j]["subject"] = [ @subjects[i] ]
end end
end end
end end
@grouped = true @grouped = true
end end
def set_skip_ids(ids) def set_skip_ids(server, ids)
set = Set::IntSpan.new(ids) set = Set::IntSpan.new(ids)
set.finite or return false set.finite or return false
min = set.min min = set.min
min != nil and min < 0 and return false min != nil and min < 0 and return false
@skip_ids = set @connections[server]["skip_ids"] = set
return true return true
end end
def read_cache(group, cachedir, first=0, last=0) def group_update_newsrc(subject)
filename = "#{cachedir}/#{group}.ripnewscache" for i in (0...@groups[subject]["messages"].length)
@connections[@groups[subject]["servers"][i]]["newsrc"].mark(@group, @groups[subject]["ids"][i])
end
end
def save_newsrc()
for server in @connections.keys
@connections[server]["newsrc"].save
end
end
def read_cache(cachedir)
filename = "#{cachedir}/#{@group}.ripnewscache"
excludes = {} excludes = {}
@skip_ids.elements.collect!{|x| excludes[x]=true} for server in @connections.keys
excludes[server] = {}
@connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true}
end
if FileTest.directory?( cachedir) and FileTest.file?( filename ) and FileTest.readable?( filename ) if FileTest.directory?( cachedir) and FileTest.file?( filename ) and FileTest.readable?( filename )
file = File.new( filename ) file = File.new( filename )
lines = file.readlines lines = file.readlines
for line in lines for line in lines
#print "line: #{line}\n" #print "line: #{line}\n"
if line =~ /^(.*?)\|(\d+)\|(.*)$/ if line =~ /^(.*?)\|(\d+)\|(.*?)\|(.*)$/
#print "messid: #{$1}\n" #print "messid: #{$1}\n"
#print "id: #{$2}\n" #print "id: #{$2}\n"
#print "subject: #{$3}\n" #print "server: #{$3}\n"
unless excludes.has_key?($2.to_i) or #print "subject: #{$4}\n"
$2.to_i < first.to_i or #print "First: #{@connections[$3]["first"].to_i}\n";
$2.to_i > last.to_i #print "Last: #{@connections[$3]["last"].to_i}\n";
add($1, $2, $3) if @connections.has_key?($3)
@skip_ids.insert($2.to_i) unless excludes.has_key?($3) and excludes[$3].has_key?($2.to_i) or
$2.to_i < @connections[$3]["first"].to_i or
$2.to_i > @connections[$3]["last"].to_i
add($1, $2, $3, $4)
@connections[$3]["skip_ids"].insert($2.to_i)
end
end end
end end
end end
end end
end end
def save_cache(group, cachedir) def save_cache(cachedir)
filename = "#{cachedir}/#{group}.ripnewscache" filename = "#{cachedir}/#{@group}.ripnewscache"
if FileTest.directory?( cachedir ) if FileTest.directory?( cachedir )
file = File.new( filename, "w" ) or print "couldn't open cachefile for writing\n" file = File.new( filename, "w" ) or print "couldn't open cachefile for writing\n"
for i in (0...@subjects.length) for i in (0...@subjects.length)
file.print("#{@messids[i]}|#{@ids[i]}|#{@subjects[i]}\n") file.print("#{@messids[i]}|#{@ids[i]}|#{@servers[i]}|#{@subjects[i]}\n")
#print "writing: #{@messids[i]}|#{@ids[i]}|#{@subjects[i]}\n" #print "writing: #{@messids[i]}|#{@ids[i]}|#{@servers[i]}|#{@subjects[i]}\n"
end end
end end
end end
@ -227,13 +342,15 @@ def uudecode(data, outfile=nil)
when "Array" when "Array"
print "Calling _uudecode_array\n" if Debuglevel>0 print "Calling _uudecode_array\n" if Debuglevel>0
mode, file, body = _uudecode_array(data) mode, file, body = _uudecode_array(data)
when "File" when "File", "Tempfile"
unless outfile unless outfile
print "uudecode: need outfile\n" print "uudecode: need outfile\n"
exit exit
end end
print "Calling _uudecode_file\n" if Debuglevel>0 print "Calling _uudecode_file\n" if Debuglevel>0
mode, file, body = _uudecode_file(data, outfile) mode, file, body = _uudecode_file(data, outfile)
else
print "Funny stuff in uudecode. Data of type \"#{data.type.to_s}\"\n"
end end
return mode, file, body return mode, file, body
end end
@ -396,26 +513,29 @@ def subject_sort
print "subj sort #{@subjects[i]}\n" if Debuglevel >2 print "subj sort #{@subjects[i]}\n" if Debuglevel >2
print "subj sort #{@messids[i]}\n" if Debuglevel >2 print "subj sort #{@messids[i]}\n" if Debuglevel >2
print "subj sort #{@ids[i]}\n" if Debuglevel >2 print "subj sort #{@ids[i]}\n" if Debuglevel >2
sort_arr += ["#{@subjects[i]} #{@messids[i]} #{@ids[i]}"] print "subj sort #{@servers[i]}\n" if Debuglevel >2
sort_arr += ["#{@subjects[i]} #{@messids[i]} #{@ids[i]} #{@servers[i]}"]
end end
sort_arr.sort!{|a,b| ward_sort(a, b)} sort_arr.sort!{|a,b| ward_sort(a, b)}
@messids = [] @messids = []
@ids = [] @ids = []
@subjects = [] @subjects = []
for i in sort_arr @servers = []
i =~ /^(.*) (<[^<]*>) (\d+)$/ || i =~ /^(.*) \[<[^<]*>\] (\d+)$/ sort_arr.collect{|i|
i =~ /^(.*) (<[^<]*>) (\d+) (\S*)$/ || i =~ /^(.*) \[<[^<]*>\] (\d+) (\S*)$/
@messids += [$2] @messids += [$2]
@ids += [$3] @ids += [$3]
@subjects += [$1] @subjects += [$1]
@servers += [$4]
print "subject sort: #{$1}\n" if Debuglevel >2 print "subject sort: #{$1}\n" if Debuglevel >2
end }
@sorted = true @sorted = true
end end
def ward_sort(a, b) def ward_sort(a, b)
a =~ /^(.*) (<[^<]*> \d+)$/ a =~ /^(.*) (<[^<]*> \d+ \S*)$/
c = $1.to_s.split(/([0-9]+)/) c = $1.to_s.split(/([0-9]+)/)
b =~ /^(.*) (<[^<]*> \d+)$/ b =~ /^(.*) (<[^<]*> \d+ \S*)$/
d = $1.to_s.split(/([0-9]+)/) d = $1.to_s.split(/([0-9]+)/)
for x in c for x in c
@ -432,7 +552,9 @@ def ward_sort(a, b)
end end
def quit def quit
@nntp.quit for server in @connections.keys
@connections[server]["nntp"].quit
end
end end
private :ward_sort private :ward_sort