- fix sorting
- allow multiple servers - use xhdr for faster header collection - save cache sorted
This commit is contained in:
parent
3b1f9f304f
commit
5ca723bfc0
1 changed files with 201 additions and 79 deletions
|
|
@ -5,58 +5,128 @@
|
|||
|
||||
require 'set/intspan'
|
||||
require 'net/nntp'
|
||||
require 'news/newsrc'
|
||||
require 'tempfile'
|
||||
|
||||
class Article
|
||||
|
||||
Debuglevel = 0
|
||||
Debuglevel = 1
|
||||
|
||||
def initialize(server)
|
||||
def initialize(nntpservers, groupname, newsrc="~/.newsrc")
|
||||
@messids = []
|
||||
@ids = []
|
||||
@servers = []
|
||||
@subjects = []
|
||||
|
||||
@sorted = false
|
||||
@grouped = false
|
||||
#@skip_ids = Set::IntSpan.new()
|
||||
@groups = {}
|
||||
@nntp = Net::NNTP.new(server)
|
||||
@skip_ids = Set::IntSpan.new()
|
||||
@gotten = {}
|
||||
@group = groupname
|
||||
|
||||
@serverlist = nntpservers.split('|')
|
||||
@connections = {}
|
||||
@serverlist.collect{|server|
|
||||
@connections[server] = {}
|
||||
@connections[server]["nntp"] = Net::NNTP.new(server)
|
||||
@connections[server]["skip_ids"] = Set::IntSpan.new()
|
||||
@connections[server]["newsrc"] = News::Newsrc.new("#{newsrc}.#{server}")
|
||||
set_skip_ids(server, @connections[server]["newsrc"].marked_articles(@group))
|
||||
}
|
||||
#@nntp = Net::NNTP.new(nntpservers)
|
||||
end
|
||||
|
||||
def add(messid, id, subject)
|
||||
def add(messid, id, server, subject)
|
||||
# print "Messid: #{messid}\n"
|
||||
# print "Id: #{id}\n"
|
||||
# print "Server: #{server}\n"
|
||||
# print "Subject: #{subject}\n"
|
||||
@messids += [messid]
|
||||
@ids += [id.to_i]
|
||||
@servers += [server]
|
||||
@subjects += [subject]
|
||||
@sorted = false
|
||||
@grouped = false
|
||||
end
|
||||
|
||||
def get_articles(group, cachedir=false)
|
||||
def get_articles(cachedir=false)
|
||||
for server in @connections.keys
|
||||
begin
|
||||
resp, count, first, last, name = @nntp.group(group)
|
||||
resp, count, first, last, name = @connections[server]["nntp"].group(@group)
|
||||
@connections[server]["first"] = first ? first : 0
|
||||
@connections[server]["last"] = last ? last : 0
|
||||
rescue Net::NNTP::RuntimeError
|
||||
print "Couldn't open group: #{group}\n"
|
||||
print "Couldn't open group: #{@group}\n"
|
||||
return false
|
||||
end
|
||||
read_cache(group, cachedir, first, last)
|
||||
range = Set::IntSpan.new("#{first}-#{last}")
|
||||
for i in (range.diff(@skip_ids).elements)
|
||||
end
|
||||
read_cache(cachedir)
|
||||
for server in @connections.keys
|
||||
print "reading articles from server: #{server}\n"
|
||||
range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}")
|
||||
rangelist = range.diff(@connections[server]["skip_ids"]).run_list
|
||||
print "rangelist: #{rangelist}\n" if Debuglevel >1
|
||||
print "rangelist: #{rangelist.type.to_s}\n" if Debuglevel >1
|
||||
print "rangelsit elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel >1
|
||||
unless rangelist == nil or rangelist =~ /^$/
|
||||
for i in rangelist.split(',')
|
||||
print "i: #{i}\n" if Debuglevel > 1
|
||||
begin
|
||||
@nntp.stat(i)
|
||||
resp, id, messid, list = @nntp.head(i)
|
||||
for j in list
|
||||
if j =~ /Subject: (.*)/
|
||||
subj=$1
|
||||
resp, subj_lines = @connections[server]["nntp"].xhdr("subject", i)
|
||||
unless resp.to_i >= 200 and resp.to_i < 300
|
||||
print "got response #{resp} while reading group #{@group} from #{server}\n"
|
||||
return false
|
||||
end
|
||||
resp, messid_lines = @connections[server]["nntp"].xhdr("message-id", i)
|
||||
unless resp.to_i >=200 and resp.to_i < 300
|
||||
print "got response #{resp} while reading group #{@group} from #{server}\n"
|
||||
return false
|
||||
end
|
||||
art = {}
|
||||
subj_lines.collect{|x|
|
||||
art[x[0]] = {} unless art.has_key?(x[0])
|
||||
art[x[0]]["subject"] = x[1]
|
||||
print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 1
|
||||
}
|
||||
messid_lines.collect{|x|
|
||||
art[x[0]] = {} unless art.has_key?(x[0])
|
||||
art[x[0]]["messid"] = x[1]
|
||||
print "art id: #{x[0]} messid: #{x[1]}\n" if Debuglevel > 1
|
||||
}
|
||||
for id in art.keys
|
||||
if art[id].has_key?("subject") and art[id].has_key?("messid")
|
||||
print "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["subject"]}\n" if Debuglevel > 1
|
||||
add(art[id]["messid"], id, server, art[id]["subject"])
|
||||
end
|
||||
end
|
||||
print "get_articles messid: #{messid}\n" if Debuglevel > 1
|
||||
print "get_articles id: #{id}\n" if Debuglevel > 1
|
||||
print "get_articles subject: #{subj}\n" if Debuglevel > 1
|
||||
add(messid, id, subj)
|
||||
rescue Net::NNTP::RuntimeError
|
||||
print "whoopsie couldn't stat #{i}\n" if Debuglevel > 1
|
||||
end
|
||||
end
|
||||
save_cache(group, cachedir)
|
||||
end
|
||||
|
||||
# if xhdr doesn't work, this should be used
|
||||
# for i in (range.diff(@connections[server]["skip_ids"]).elements)
|
||||
# begin
|
||||
# @connections[server]["nntp"].stat(i)
|
||||
# resp, id, messid, list = @connections[server]["nntp"].head(i)
|
||||
# for j in list
|
||||
# if j =~ /Subject: (.*)/
|
||||
# subj=$1
|
||||
# end
|
||||
# end
|
||||
# print "get_articles messid: #{messid}\n" if Debuglevel > 1
|
||||
# print "get_articles id: #{id}\n" if Debuglevel > 1
|
||||
# print "get_articles server: #{server}\n" if Debuglevel > 1
|
||||
# print "get_articles subject: #{subj}\n" if Debuglevel > 1
|
||||
# add(messid, id, server, subj)
|
||||
# rescue Net::NNTP::RuntimeError
|
||||
# print "whoopsie couldn't stat #{i}\n" if Debuglevel > 1
|
||||
# end
|
||||
# end
|
||||
end
|
||||
subject_sort unless @sorted # store cache sorted
|
||||
save_cache(cachedir)
|
||||
end
|
||||
|
||||
def get_groups
|
||||
|
|
@ -66,20 +136,26 @@ end
|
|||
|
||||
def get_group_body(subj)
|
||||
result = []
|
||||
for i in @groups[subj]["messages"][0..@groups[subj]["messages"].length]
|
||||
for i in (0...@groups[subj]["messages"].length)
|
||||
unless @gotten.has_key?(@groups[subj]["messages"][i])
|
||||
print "getting article: #{i}\n" if Debuglevel > 0
|
||||
print "#{@groups[subj]}\n"
|
||||
begin
|
||||
resp, id, messid, list = @nntp.body(i)
|
||||
print "Server: #{@groups[subj]["servers"][i]}\n"
|
||||
print "Messid: #{@groups[subj]["messages"][i]}\n"
|
||||
resp, id, messid, list = @connections[@groups[subj]["servers"][i]]["nntp"].body(@groups[subj]["messages"][i])
|
||||
rescue Net::NNTPReplyError
|
||||
print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n"
|
||||
end
|
||||
result = list
|
||||
end
|
||||
end
|
||||
return result
|
||||
end
|
||||
|
||||
def get_group_body_first(subj)
|
||||
begin
|
||||
resp, id, messid, list = @nntp.body(@groups[subj]["messages"][0])
|
||||
resp, id, messid, list = @connections[@groups[subj]["servers"][0]]["nntp"].body(@groups[subj]["messages"][0])
|
||||
rescue Net::NNTPReplyError
|
||||
print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n"
|
||||
return false
|
||||
|
|
@ -87,19 +163,33 @@ def get_group_body_first(subj)
|
|||
print "getting article: #{subj}\n" if Debuglevel > 0
|
||||
print "message id: #{messid}\n" if Debuglevel > 0
|
||||
print "id: #{id}\n" if Debuglevel > 0
|
||||
print "server: #{@groups[subj]["servers"][0]}\n" if Debuglevel > 0
|
||||
print "full subject: #{@groups[subj]["subject"][0]}\n" if Debuglevel > 0
|
||||
@gotten[messid] = true
|
||||
return list
|
||||
end
|
||||
|
||||
def get_group_body_rest(subj, file=nil)
|
||||
result = []
|
||||
for i in @groups[subj]["messages"][1..@groups[subj]["messages"].length]
|
||||
for i in (1...@groups[subj]["messages"].length)
|
||||
unless @gotten.has_key?(@groups[subj]["messages"][i])
|
||||
print "getting article: #{i}\n" if Debuglevel > 0
|
||||
begin
|
||||
resp, id, messid, list = @nntp.body(i)
|
||||
print "Server: #{@groups[subj]["servers"][i]}\n"
|
||||
print "Messid: #{@groups[subj]["messages"][i]}\n"
|
||||
resp, id, messid, list = @connections[@groups[subj]["servers"][i]]["nntp"].body(@groups[subj]["messages"][i])
|
||||
|
||||
rescue Net::NNTPReplyError
|
||||
print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n"
|
||||
return false
|
||||
end
|
||||
end
|
||||
print "getting article: #{subj}\n" if Debuglevel > 0
|
||||
print "message id: #{messid}\n" if Debuglevel > 0
|
||||
print "id: #{id}\n" if Debuglevel > 0
|
||||
print "server: #{@groups[subj]["servers"][i]}\n" if Debuglevel > 0
|
||||
print "full subject: #{@groups[subj]["subject"][i]}\n" if Debuglevel > 0
|
||||
@gotten[ @groups[subj]["messages"][i] ] = true
|
||||
if file
|
||||
list.collect{|line| file.print "#{line}\n"}
|
||||
else
|
||||
|
|
@ -114,26 +204,27 @@ def get_group_subjects
|
|||
return @groups.keys
|
||||
end
|
||||
|
||||
def get_group_ids(subject)
|
||||
def get_group_messids(subject)
|
||||
group_subjects unless @grouped
|
||||
return @groups[subject]["ids"]
|
||||
return @groups[subject]["messages"]
|
||||
end
|
||||
|
||||
def group_complete(subj)
|
||||
def group_is_complete(subj)
|
||||
group_subjects unless @grouped
|
||||
print "length: #{@groups[subj]["messages"].length} total: #{@groups[subj]["total"].to_i}\n" if Debuglevel > 1
|
||||
if (@groups[subj]["messages"].length ) >= @groups[subj]["total"].to_i
|
||||
umessids = @groups[subj]["messages"].uniq
|
||||
if (umessids.length ) >= @groups[subj]["total"].to_i
|
||||
return true
|
||||
else
|
||||
return false
|
||||
end
|
||||
end
|
||||
|
||||
def group_singlepart(subj)
|
||||
def group_is_singlepart(subj)
|
||||
@groups[subj]["total"].to_i == 1
|
||||
end
|
||||
|
||||
def group_multipart(subj)
|
||||
def group_is_multipart(subj)
|
||||
@groups[subj]["total"].to_i > 1
|
||||
end
|
||||
|
||||
|
|
@ -149,7 +240,7 @@ def group_subjects
|
|||
@groups = {}
|
||||
subject_sort unless @sorted
|
||||
prev_subj = ""
|
||||
for i in (0..@subjects.length)
|
||||
for i in (0...@subjects.length)
|
||||
print "group subjects: #{i} #{@subjects[i]}\n" if Debuglevel > 1
|
||||
if @subjects[i] =~ /(.*)\((\d+)\/(\d+)\)(.*)/ || @subjects[i] =~ /(.*)\[(\d+)\/(\d+)\](.*)/
|
||||
j = "#{$1}#{$4}"
|
||||
|
|
@ -163,6 +254,8 @@ def group_subjects
|
|||
if j == prev_subj and number.to_i !=0
|
||||
@groups[j]["messages"] += [ @messids[i] ]
|
||||
@groups[j]["ids"] += [ @ids[i].to_i ]
|
||||
@groups[j]["servers"] += [ @servers[i] ]
|
||||
@groups[j]["subject"] += [ @subjects[i] ]
|
||||
else
|
||||
unless number.to_i == 0
|
||||
prev_subj = j
|
||||
|
|
@ -170,52 +263,74 @@ def group_subjects
|
|||
@groups[j]["total"] = total
|
||||
@groups[j]["messages"] = [ @messids[i] ]
|
||||
@groups[j]["ids"] = [ @ids[i].to_i ]
|
||||
@groups[j]["servers"] = [ @servers[i] ]
|
||||
@groups[j]["subject"] = [ @subjects[i] ]
|
||||
end
|
||||
end
|
||||
end
|
||||
@grouped = true
|
||||
end
|
||||
|
||||
def set_skip_ids(ids)
|
||||
def set_skip_ids(server, ids)
|
||||
set = Set::IntSpan.new(ids)
|
||||
set.finite or return false
|
||||
min = set.min
|
||||
min != nil and min < 0 and return false
|
||||
@skip_ids = set
|
||||
@connections[server]["skip_ids"] = set
|
||||
return true
|
||||
end
|
||||
|
||||
def read_cache(group, cachedir, first=0, last=0)
|
||||
filename = "#{cachedir}/#{group}.ripnewscache"
|
||||
def group_update_newsrc(subject)
|
||||
for i in (0...@groups[subject]["messages"].length)
|
||||
@connections[@groups[subject]["servers"][i]]["newsrc"].mark(@group, @groups[subject]["ids"][i])
|
||||
end
|
||||
end
|
||||
|
||||
def save_newsrc()
|
||||
for server in @connections.keys
|
||||
@connections[server]["newsrc"].save
|
||||
end
|
||||
end
|
||||
|
||||
def read_cache(cachedir)
|
||||
filename = "#{cachedir}/#{@group}.ripnewscache"
|
||||
excludes = {}
|
||||
@skip_ids.elements.collect!{|x| excludes[x]=true}
|
||||
for server in @connections.keys
|
||||
excludes[server] = {}
|
||||
@connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true}
|
||||
end
|
||||
if FileTest.directory?( cachedir) and FileTest.file?( filename ) and FileTest.readable?( filename )
|
||||
file = File.new( filename )
|
||||
lines = file.readlines
|
||||
for line in lines
|
||||
#print "line: #{line}\n"
|
||||
if line =~ /^(.*?)\|(\d+)\|(.*)$/
|
||||
if line =~ /^(.*?)\|(\d+)\|(.*?)\|(.*)$/
|
||||
#print "messid: #{$1}\n"
|
||||
#print "id: #{$2}\n"
|
||||
#print "subject: #{$3}\n"
|
||||
unless excludes.has_key?($2.to_i) or
|
||||
$2.to_i < first.to_i or
|
||||
$2.to_i > last.to_i
|
||||
add($1, $2, $3)
|
||||
@skip_ids.insert($2.to_i)
|
||||
#print "server: #{$3}\n"
|
||||
#print "subject: #{$4}\n"
|
||||
#print "First: #{@connections[$3]["first"].to_i}\n";
|
||||
#print "Last: #{@connections[$3]["last"].to_i}\n";
|
||||
if @connections.has_key?($3)
|
||||
unless excludes.has_key?($3) and excludes[$3].has_key?($2.to_i) or
|
||||
$2.to_i < @connections[$3]["first"].to_i or
|
||||
$2.to_i > @connections[$3]["last"].to_i
|
||||
add($1, $2, $3, $4)
|
||||
@connections[$3]["skip_ids"].insert($2.to_i)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def save_cache(group, cachedir)
|
||||
filename = "#{cachedir}/#{group}.ripnewscache"
|
||||
def save_cache(cachedir)
|
||||
filename = "#{cachedir}/#{@group}.ripnewscache"
|
||||
if FileTest.directory?( cachedir )
|
||||
file = File.new( filename, "w" ) or print "couldn't open cachefile for writing\n"
|
||||
for i in (0...@subjects.length)
|
||||
file.print("#{@messids[i]}|#{@ids[i]}|#{@subjects[i]}\n")
|
||||
#print "writing: #{@messids[i]}|#{@ids[i]}|#{@subjects[i]}\n"
|
||||
file.print("#{@messids[i]}|#{@ids[i]}|#{@servers[i]}|#{@subjects[i]}\n")
|
||||
#print "writing: #{@messids[i]}|#{@ids[i]}|#{@servers[i]}|#{@subjects[i]}\n"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
@ -227,13 +342,15 @@ def uudecode(data, outfile=nil)
|
|||
when "Array"
|
||||
print "Calling _uudecode_array\n" if Debuglevel>0
|
||||
mode, file, body = _uudecode_array(data)
|
||||
when "File"
|
||||
when "File", "Tempfile"
|
||||
unless outfile
|
||||
print "uudecode: need outfile\n"
|
||||
exit
|
||||
end
|
||||
print "Calling _uudecode_file\n" if Debuglevel>0
|
||||
mode, file, body = _uudecode_file(data, outfile)
|
||||
else
|
||||
print "Funny stuff in uudecode. Data of type \"#{data.type.to_s}\"\n"
|
||||
end
|
||||
return mode, file, body
|
||||
end
|
||||
|
|
@ -396,26 +513,29 @@ def subject_sort
|
|||
print "subj sort #{@subjects[i]}\n" if Debuglevel >2
|
||||
print "subj sort #{@messids[i]}\n" if Debuglevel >2
|
||||
print "subj sort #{@ids[i]}\n" if Debuglevel >2
|
||||
sort_arr += ["#{@subjects[i]} #{@messids[i]} #{@ids[i]}"]
|
||||
print "subj sort #{@servers[i]}\n" if Debuglevel >2
|
||||
sort_arr += ["#{@subjects[i]} #{@messids[i]} #{@ids[i]} #{@servers[i]}"]
|
||||
end
|
||||
sort_arr.sort!{|a,b| ward_sort(a, b)}
|
||||
@messids = []
|
||||
@ids = []
|
||||
@subjects = []
|
||||
for i in sort_arr
|
||||
i =~ /^(.*) (<[^<]*>) (\d+)$/ || i =~ /^(.*) \[<[^<]*>\] (\d+)$/
|
||||
@servers = []
|
||||
sort_arr.collect{|i|
|
||||
i =~ /^(.*) (<[^<]*>) (\d+) (\S*)$/ || i =~ /^(.*) \[<[^<]*>\] (\d+) (\S*)$/
|
||||
@messids += [$2]
|
||||
@ids += [$3]
|
||||
@subjects += [$1]
|
||||
@servers += [$4]
|
||||
print "subject sort: #{$1}\n" if Debuglevel >2
|
||||
end
|
||||
}
|
||||
@sorted = true
|
||||
end
|
||||
|
||||
def ward_sort(a, b)
|
||||
a =~ /^(.*) (<[^<]*> \d+)$/
|
||||
a =~ /^(.*) (<[^<]*> \d+ \S*)$/
|
||||
c = $1.to_s.split(/([0-9]+)/)
|
||||
b =~ /^(.*) (<[^<]*> \d+)$/
|
||||
b =~ /^(.*) (<[^<]*> \d+ \S*)$/
|
||||
d = $1.to_s.split(/([0-9]+)/)
|
||||
|
||||
for x in c
|
||||
|
|
@ -432,7 +552,9 @@ def ward_sort(a, b)
|
|||
end
|
||||
|
||||
def quit
|
||||
@nntp.quit
|
||||
for server in @connections.keys
|
||||
@connections[server]["nntp"].quit
|
||||
end
|
||||
end
|
||||
|
||||
private :ward_sort
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue