- fix sorting

- allow multiple servers
- use xhdr for faster header collection
- save cache sorted
This commit is contained in:
Ward Wouts 2002-05-05 20:07:03 +00:00
parent 3b1f9f304f
commit 5ca723bfc0

View file

@ -5,58 +5,128 @@
require 'set/intspan'
require 'net/nntp'
require 'news/newsrc'
require 'tempfile'
class Article
Debuglevel = 0
Debuglevel = 1
def initialize(server)
def initialize(nntpservers, groupname, newsrc="~/.newsrc")
@messids = []
@ids = []
@servers = []
@subjects = []
@sorted = false
@grouped = false
#@skip_ids = Set::IntSpan.new()
@groups = {}
@nntp = Net::NNTP.new(server)
@skip_ids = Set::IntSpan.new()
@gotten = {}
@group = groupname
@serverlist = nntpservers.split('|')
@connections = {}
@serverlist.collect{|server|
@connections[server] = {}
@connections[server]["nntp"] = Net::NNTP.new(server)
@connections[server]["skip_ids"] = Set::IntSpan.new()
@connections[server]["newsrc"] = News::Newsrc.new("#{newsrc}.#{server}")
set_skip_ids(server, @connections[server]["newsrc"].marked_articles(@group))
}
#@nntp = Net::NNTP.new(nntpservers)
end
def add(messid, id, subject)
def add(messid, id, server, subject)
# print "Messid: #{messid}\n"
# print "Id: #{id}\n"
# print "Server: #{server}\n"
# print "Subject: #{subject}\n"
@messids += [messid]
@ids += [id.to_i]
@servers += [server]
@subjects += [subject]
@sorted = false
@grouped = false
end
def get_articles(group, cachedir=false)
begin
resp, count, first, last, name = @nntp.group(group)
rescue Net::NNTP::RuntimeError
print "Couldn't open group: #{group}\n"
return false
end
read_cache(group, cachedir, first, last)
range = Set::IntSpan.new("#{first}-#{last}")
for i in (range.diff(@skip_ids).elements)
def get_articles(cachedir=false)
for server in @connections.keys
begin
@nntp.stat(i)
resp, id, messid, list = @nntp.head(i)
for j in list
if j =~ /Subject: (.*)/
subj=$1
end
end
print "get_articles messid: #{messid}\n" if Debuglevel > 1
print "get_articles id: #{id}\n" if Debuglevel > 1
print "get_articles subject: #{subj}\n" if Debuglevel > 1
add(messid, id, subj)
resp, count, first, last, name = @connections[server]["nntp"].group(@group)
@connections[server]["first"] = first ? first : 0
@connections[server]["last"] = last ? last : 0
rescue Net::NNTP::RuntimeError
print "whoopsie couldn't stat #{i}\n" if Debuglevel > 1
print "Couldn't open group: #{@group}\n"
return false
end
end
save_cache(group, cachedir)
read_cache(cachedir)
for server in @connections.keys
print "reading articles from server: #{server}\n"
range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}")
rangelist = range.diff(@connections[server]["skip_ids"]).run_list
print "rangelist: #{rangelist}\n" if Debuglevel >1
print "rangelist: #{rangelist.type.to_s}\n" if Debuglevel >1
print "rangelsit elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel >1
unless rangelist == nil or rangelist =~ /^$/
for i in rangelist.split(',')
print "i: #{i}\n" if Debuglevel > 1
begin
resp, subj_lines = @connections[server]["nntp"].xhdr("subject", i)
unless resp.to_i >= 200 and resp.to_i < 300
print "got response #{resp} while reading group #{@group} from #{server}\n"
return false
end
resp, messid_lines = @connections[server]["nntp"].xhdr("message-id", i)
unless resp.to_i >=200 and resp.to_i < 300
print "got response #{resp} while reading group #{@group} from #{server}\n"
return false
end
art = {}
subj_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["subject"] = x[1]
print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 1
}
messid_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["messid"] = x[1]
print "art id: #{x[0]} messid: #{x[1]}\n" if Debuglevel > 1
}
for id in art.keys
if art[id].has_key?("subject") and art[id].has_key?("messid")
print "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["subject"]}\n" if Debuglevel > 1
add(art[id]["messid"], id, server, art[id]["subject"])
end
end
rescue Net::NNTP::RuntimeError
end
end
end
# if xhdr doesn't work, this should be used
# for i in (range.diff(@connections[server]["skip_ids"]).elements)
# begin
# @connections[server]["nntp"].stat(i)
# resp, id, messid, list = @connections[server]["nntp"].head(i)
# for j in list
# if j =~ /Subject: (.*)/
# subj=$1
# end
# end
# print "get_articles messid: #{messid}\n" if Debuglevel > 1
# print "get_articles id: #{id}\n" if Debuglevel > 1
# print "get_articles server: #{server}\n" if Debuglevel > 1
# print "get_articles subject: #{subj}\n" if Debuglevel > 1
# add(messid, id, server, subj)
# rescue Net::NNTP::RuntimeError
# print "whoopsie couldn't stat #{i}\n" if Debuglevel > 1
# end
# end
end
subject_sort unless @sorted # store cache sorted
save_cache(cachedir)
end
def get_groups
@ -66,40 +136,60 @@ end
def get_group_body(subj)
result = []
for i in @groups[subj]["messages"][0..@groups[subj]["messages"].length]
begin
resp, id, messid, list = @nntp.body(i)
rescue Net::NNTPReplyError
print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n"
for i in (0...@groups[subj]["messages"].length)
unless @gotten.has_key?(@groups[subj]["messages"][i])
print "getting article: #{i}\n" if Debuglevel > 0
print "#{@groups[subj]}\n"
begin
print "Server: #{@groups[subj]["servers"][i]}\n"
print "Messid: #{@groups[subj]["messages"][i]}\n"
resp, id, messid, list = @connections[@groups[subj]["servers"][i]]["nntp"].body(@groups[subj]["messages"][i])
rescue Net::NNTPReplyError
print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n"
end
result = list
end
result = list
end
return result
end
def get_group_body_first(subj)
begin
resp, id, messid, list = @nntp.body(@groups[subj]["messages"][0])
resp, id, messid, list = @connections[@groups[subj]["servers"][0]]["nntp"].body(@groups[subj]["messages"][0])
rescue Net::NNTPReplyError
print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n"
return false
end
print "getting article: #{subj}\n" if Debuglevel > 0
print "message id: #{messid}\n" if Debuglevel > 0
print "id: #{id}\n" if Debuglevel > 0
print "getting article: #{subj}\n" if Debuglevel > 0
print "message id: #{messid}\n" if Debuglevel > 0
print "id: #{id}\n" if Debuglevel > 0
print "server: #{@groups[subj]["servers"][0]}\n" if Debuglevel > 0
print "full subject: #{@groups[subj]["subject"][0]}\n" if Debuglevel > 0
@gotten[messid] = true
return list
end
def get_group_body_rest(subj, file=nil)
result = []
for i in @groups[subj]["messages"][1..@groups[subj]["messages"].length]
print "getting article: #{i}\n" if Debuglevel > 0
begin
resp, id, messid, list = @nntp.body(i)
rescue Net::NNTPReplyError
print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n"
return false
for i in (1...@groups[subj]["messages"].length)
unless @gotten.has_key?(@groups[subj]["messages"][i])
print "getting article: #{i}\n" if Debuglevel > 0
begin
print "Server: #{@groups[subj]["servers"][i]}\n"
print "Messid: #{@groups[subj]["messages"][i]}\n"
resp, id, messid, list = @connections[@groups[subj]["servers"][i]]["nntp"].body(@groups[subj]["messages"][i])
rescue Net::NNTPReplyError
print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n"
return false
end
end
print "getting article: #{subj}\n" if Debuglevel > 0
print "message id: #{messid}\n" if Debuglevel > 0
print "id: #{id}\n" if Debuglevel > 0
print "server: #{@groups[subj]["servers"][i]}\n" if Debuglevel > 0
print "full subject: #{@groups[subj]["subject"][i]}\n" if Debuglevel > 0
@gotten[ @groups[subj]["messages"][i] ] = true
if file
list.collect{|line| file.print "#{line}\n"}
else
@ -114,26 +204,27 @@ def get_group_subjects
return @groups.keys
end
def get_group_ids(subject)
def get_group_messids(subject)
group_subjects unless @grouped
return @groups[subject]["ids"]
return @groups[subject]["messages"]
end
def group_complete(subj)
def group_is_complete(subj)
group_subjects unless @grouped
print "length: #{@groups[subj]["messages"].length} total: #{@groups[subj]["total"].to_i}\n" if Debuglevel > 1
if (@groups[subj]["messages"].length ) >= @groups[subj]["total"].to_i
umessids = @groups[subj]["messages"].uniq
if (umessids.length ) >= @groups[subj]["total"].to_i
return true
else
return false
end
end
def group_singlepart(subj)
def group_is_singlepart(subj)
@groups[subj]["total"].to_i == 1
end
def group_multipart(subj)
def group_is_multipart(subj)
@groups[subj]["total"].to_i > 1
end
@ -149,7 +240,7 @@ def group_subjects
@groups = {}
subject_sort unless @sorted
prev_subj = ""
for i in (0..@subjects.length)
for i in (0...@subjects.length)
print "group subjects: #{i} #{@subjects[i]}\n" if Debuglevel > 1
if @subjects[i] =~ /(.*)\((\d+)\/(\d+)\)(.*)/ || @subjects[i] =~ /(.*)\[(\d+)\/(\d+)\](.*)/
j = "#{$1}#{$4}"
@ -162,60 +253,84 @@ def group_subjects
end
if j == prev_subj and number.to_i !=0
@groups[j]["messages"] += [ @messids[i] ]
@groups[j]["ids"] += [ @ids[i].to_i ]
@groups[j]["ids"] += [ @ids[i].to_i ]
@groups[j]["servers"] += [ @servers[i] ]
@groups[j]["subject"] += [ @subjects[i] ]
else
unless number.to_i == 0
prev_subj = j
@groups[j] = {}
@groups[j]["total"] = total
@groups[j]["total"] = total
@groups[j]["messages"] = [ @messids[i] ]
@groups[j]["ids"] = [ @ids[i].to_i ]
@groups[j]["ids"] = [ @ids[i].to_i ]
@groups[j]["servers"] = [ @servers[i] ]
@groups[j]["subject"] = [ @subjects[i] ]
end
end
end
@grouped = true
end
def set_skip_ids(ids)
def set_skip_ids(server, ids)
set = Set::IntSpan.new(ids)
set.finite or return false
min = set.min
min != nil and min < 0 and return false
@skip_ids = set
@connections[server]["skip_ids"] = set
return true
end
def read_cache(group, cachedir, first=0, last=0)
filename = "#{cachedir}/#{group}.ripnewscache"
def group_update_newsrc(subject)
for i in (0...@groups[subject]["messages"].length)
@connections[@groups[subject]["servers"][i]]["newsrc"].mark(@group, @groups[subject]["ids"][i])
end
end
def save_newsrc()
for server in @connections.keys
@connections[server]["newsrc"].save
end
end
def read_cache(cachedir)
filename = "#{cachedir}/#{@group}.ripnewscache"
excludes = {}
@skip_ids.elements.collect!{|x| excludes[x]=true}
for server in @connections.keys
excludes[server] = {}
@connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true}
end
if FileTest.directory?( cachedir) and FileTest.file?( filename ) and FileTest.readable?( filename )
file = File.new( filename )
lines = file.readlines
for line in lines
#print "line: #{line}\n"
if line =~ /^(.*?)\|(\d+)\|(.*)$/
if line =~ /^(.*?)\|(\d+)\|(.*?)\|(.*)$/
#print "messid: #{$1}\n"
#print "id: #{$2}\n"
#print "subject: #{$3}\n"
unless excludes.has_key?($2.to_i) or
$2.to_i < first.to_i or
$2.to_i > last.to_i
add($1, $2, $3)
@skip_ids.insert($2.to_i)
#print "server: #{$3}\n"
#print "subject: #{$4}\n"
#print "First: #{@connections[$3]["first"].to_i}\n";
#print "Last: #{@connections[$3]["last"].to_i}\n";
if @connections.has_key?($3)
unless excludes.has_key?($3) and excludes[$3].has_key?($2.to_i) or
$2.to_i < @connections[$3]["first"].to_i or
$2.to_i > @connections[$3]["last"].to_i
add($1, $2, $3, $4)
@connections[$3]["skip_ids"].insert($2.to_i)
end
end
end
end
end
end
def save_cache(group, cachedir)
filename = "#{cachedir}/#{group}.ripnewscache"
def save_cache(cachedir)
filename = "#{cachedir}/#{@group}.ripnewscache"
if FileTest.directory?( cachedir )
file = File.new( filename, "w" ) or print "couldn't open cachefile for writing\n"
for i in (0...@subjects.length)
file.print("#{@messids[i]}|#{@ids[i]}|#{@subjects[i]}\n")
#print "writing: #{@messids[i]}|#{@ids[i]}|#{@subjects[i]}\n"
file.print("#{@messids[i]}|#{@ids[i]}|#{@servers[i]}|#{@subjects[i]}\n")
#print "writing: #{@messids[i]}|#{@ids[i]}|#{@servers[i]}|#{@subjects[i]}\n"
end
end
end
@ -227,13 +342,15 @@ def uudecode(data, outfile=nil)
when "Array"
print "Calling _uudecode_array\n" if Debuglevel>0
mode, file, body = _uudecode_array(data)
when "File"
when "File", "Tempfile"
unless outfile
print "uudecode: need outfile\n"
exit
end
print "Calling _uudecode_file\n" if Debuglevel>0
mode, file, body = _uudecode_file(data, outfile)
else
print "Funny stuff in uudecode. Data of type \"#{data.type.to_s}\"\n"
end
return mode, file, body
end
@ -396,26 +513,29 @@ def subject_sort
print "subj sort #{@subjects[i]}\n" if Debuglevel >2
print "subj sort #{@messids[i]}\n" if Debuglevel >2
print "subj sort #{@ids[i]}\n" if Debuglevel >2
sort_arr += ["#{@subjects[i]} #{@messids[i]} #{@ids[i]}"]
print "subj sort #{@servers[i]}\n" if Debuglevel >2
sort_arr += ["#{@subjects[i]} #{@messids[i]} #{@ids[i]} #{@servers[i]}"]
end
sort_arr.sort!{|a,b| ward_sort(a, b)}
@messids = []
@ids = []
@subjects = []
for i in sort_arr
i =~ /^(.*) (<[^<]*>) (\d+)$/ || i =~ /^(.*) \[<[^<]*>\] (\d+)$/
@servers = []
sort_arr.collect{|i|
i =~ /^(.*) (<[^<]*>) (\d+) (\S*)$/ || i =~ /^(.*) \[<[^<]*>\] (\d+) (\S*)$/
@messids += [$2]
@ids += [$3]
@subjects += [$1]
@servers += [$4]
print "subject sort: #{$1}\n" if Debuglevel >2
end
}
@sorted = true
end
def ward_sort(a, b)
a =~ /^(.*) (<[^<]*> \d+)$/
a =~ /^(.*) (<[^<]*> \d+ \S*)$/
c = $1.to_s.split(/([0-9]+)/)
b =~ /^(.*) (<[^<]*> \d+)$/
b =~ /^(.*) (<[^<]*> \d+ \S*)$/
d = $1.to_s.split(/([0-9]+)/)
for x in c
@ -432,7 +552,9 @@ def ward_sort(a, b)
end
def quit
@nntp.quit
for server in @connections.keys
@connections[server]["nntp"].quit
end
end
private :ward_sort