ripnews/trunk/ripnews/news/article.rb

596 lines
16 KiB
Ruby
Raw Normal View History

2002-04-28 16:29:56 +00:00
#!/usr/local/bin/ruby
# $Id$
# $Source$
2002-04-28 22:06:03 +00:00
require 'set/intspan'
2002-04-28 16:29:56 +00:00
require 'net/nntp'
require 'news/newsrc'
require 'tempfile'
2002-04-28 16:29:56 +00:00
class Article
Debuglevel = 1
2002-04-28 16:29:56 +00:00
def initialize(nntpservers, groupname, newsrc="~/.newsrc")
2002-04-28 22:06:03 +00:00
@messids = []
2002-04-28 16:29:56 +00:00
@ids = []
@servers = []
2002-04-28 16:29:56 +00:00
@subjects = []
2002-04-28 16:29:56 +00:00
@sorted = false
@grouped = false
#@skip_ids = Set::IntSpan.new()
2002-04-28 22:06:03 +00:00
@groups = {}
@gotten = {}
@group = groupname
@serverlist = nntpservers.split('|')
@connections = {}
@serverlist.collect{|server|
@connections[server] = {}
@connections[server]["nntp"] = Net::NNTP.new(server)
@connections[server]["skip_ids"] = Set::IntSpan.new()
@connections[server]["newsrc"] = News::Newsrc.new("#{newsrc}.#{server}")
set_skip_ids(server, @connections[server]["newsrc"].marked_articles(@group))
}
#@nntp = Net::NNTP.new(nntpservers)
2002-04-28 16:29:56 +00:00
end
def add(messid, id, server, subject)
# print "Messid: #{messid}\n"
# print "Id: #{id}\n"
# print "Server: #{server}\n"
# print "Subject: #{subject}\n"
2002-04-28 22:06:03 +00:00
@messids += [messid]
@ids += [id.to_i]
@servers += [server]
2002-04-28 16:29:56 +00:00
@subjects += [subject]
@sorted = false
@grouped = false
end
def get_articles(cachedir=false)
for server in @connections.keys
2002-04-28 16:29:56 +00:00
begin
resp, count, first, last, name = @connections[server]["nntp"].group(@group)
@connections[server]["first"] = first ? first : 0
@connections[server]["last"] = last ? last : 0
rescue Net::NNTP::RuntimeError
print "Couldn't open group: #{@group}\n"
return false
end
end
read_cache(cachedir)
for server in @connections.keys
print "reading articles from server: #{server}\n"
range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}")
rangelist = range.diff(@connections[server]["skip_ids"]).run_list
print "rangelist: #{rangelist}\n" if Debuglevel >1
print "rangelist: #{rangelist.type.to_s}\n" if Debuglevel >1
print "rangelsit elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel >1
unless rangelist == nil or rangelist =~ /^$/
for i in rangelist.split(',')
print "i: #{i}\n" if Debuglevel > 1
begin
resp, subj_lines = @connections[server]["nntp"].xhdr("subject", i)
unless resp.to_i >= 200 and resp.to_i < 300
print "got response #{resp} while reading group #{@group} from #{server}\n"
return false
end
resp, messid_lines = @connections[server]["nntp"].xhdr("message-id", i)
unless resp.to_i >=200 and resp.to_i < 300
print "got response #{resp} while reading group #{@group} from #{server}\n"
return false
end
art = {}
subj_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["subject"] = x[1]
print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 1
}
messid_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["messid"] = x[1]
print "art id: #{x[0]} messid: #{x[1]}\n" if Debuglevel > 1
}
for id in art.keys
if art[id].has_key?("subject") and art[id].has_key?("messid")
print "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["subject"]}\n" if Debuglevel > 1
add(art[id]["messid"], id, server, art[id]["subject"])
end
end
rescue Net::NNTP::RuntimeError
2002-04-28 16:29:56 +00:00
end
end
end
# if xhdr doesn't work, this should be used
# for i in (range.diff(@connections[server]["skip_ids"]).elements)
# begin
# @connections[server]["nntp"].stat(i)
# resp, id, messid, list = @connections[server]["nntp"].head(i)
# for j in list
# if j =~ /Subject: (.*)/
# subj=$1
# end
# end
# print "get_articles messid: #{messid}\n" if Debuglevel > 1
# print "get_articles id: #{id}\n" if Debuglevel > 1
# print "get_articles server: #{server}\n" if Debuglevel > 1
# print "get_articles subject: #{subj}\n" if Debuglevel > 1
# add(messid, id, server, subj)
# rescue Net::NNTP::RuntimeError
# print "whoopsie couldn't stat #{i}\n" if Debuglevel > 1
# end
# end
2002-04-28 16:29:56 +00:00
end
# subject_sort unless @sorted # store cache sorted
save_cache(cachedir)
2002-04-28 16:29:56 +00:00
end
def get_groups
group_subjects unless @grouped
2002-04-28 22:06:03 +00:00
return @groups
2002-04-28 16:29:56 +00:00
end
2002-05-06 11:46:56 +00:00
def get_groupname
return @group
end
2002-04-28 16:29:56 +00:00
def get_group_body(subj)
result = []
2002-05-06 11:46:56 +00:00
group_subject_sort(subj)
for i in (0...@groups[subj]["messages"].length)
unless @gotten.has_key?(@groups[subj]["messages"][i])
print "getting article: #{i}\n" if Debuglevel > 0
begin
print "Server: #{@groups[subj]["servers"][i]}\n"
print "Messid: #{@groups[subj]["messages"][i]}\n"
resp, id, messid, list = @connections[@groups[subj]["servers"][i]]["nntp"].body(@groups[subj]["messages"][i])
rescue Net::NNTPReplyError
print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n"
end
result = list
end
2002-04-28 16:29:56 +00:00
end
return result
end
def get_group_body_first(subj)
2002-05-06 11:46:56 +00:00
group_subject_sort(subj)
2002-04-30 15:33:13 +00:00
begin
resp, id, messid, list = @connections[@groups[subj]["servers"][0]]["nntp"].body(@groups[subj]["messages"][0])
2002-04-30 15:33:13 +00:00
rescue Net::NNTPReplyError
print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n"
return false
end
print "getting article: #{subj}\n" if Debuglevel > 0
print "message id: #{messid}\n" if Debuglevel > 0
print "id: #{id}\n" if Debuglevel > 0
print "server: #{@groups[subj]["servers"][0]}\n" if Debuglevel > 0
print "full subject: #{@groups[subj]["subject"][0]}\n" if Debuglevel > 0
@gotten[messid] = true
2002-04-28 16:29:56 +00:00
return list
end
def get_group_body_rest(subj, file=nil)
result = []
for i in (1...@groups[subj]["messages"].length)
unless @gotten.has_key?(@groups[subj]["messages"][i])
print "getting article: #{i}\n" if Debuglevel > 0
begin
print "Server: #{@groups[subj]["servers"][i]}\n"
print "Messid: #{@groups[subj]["messages"][i]}\n"
resp, id, messid, list = @connections[@groups[subj]["servers"][i]]["nntp"].body(@groups[subj]["messages"][i])
if resp == false
return false
end
rescue Net::NNTPReplyError
print "Caught Net::NNTPReplyError reading article #{@groups[subj]["messages"][0]}\n"
return false
end
print "getting article: #{subj}\n" if Debuglevel > 0
print "message id: #{messid}\n" if Debuglevel > 0
print "id: #{id}\n" if Debuglevel > 0
print "server: #{@groups[subj]["servers"][i]}\n" if Debuglevel > 0
print "full subject: #{@groups[subj]["subject"][i]}\n" if Debuglevel > 0
@gotten[ @groups[subj]["messages"][i] ] = true
2002-05-06 11:46:56 +00:00
if file
list.collect{|line| file.print "#{line}\n"}
else
result += list
end
2002-04-28 16:29:56 +00:00
end
end
return result
end
def get_group_subjects
group_subjects unless @grouped
2002-04-28 22:06:03 +00:00
return @groups.keys
end
def get_group_messids(subject)
2002-04-28 22:06:03 +00:00
group_subjects unless @grouped
return @groups[subject]["messages"]
2002-04-28 16:29:56 +00:00
end
def group_is_complete(subj)
2002-04-28 16:29:56 +00:00
group_subjects unless @grouped
2002-04-28 22:06:03 +00:00
print "length: #{@groups[subj]["messages"].length} total: #{@groups[subj]["total"].to_i}\n" if Debuglevel > 1
umessids = @groups[subj]["messages"].uniq
if (umessids.length ) >= @groups[subj]["total"].to_i
2002-04-28 16:29:56 +00:00
return true
else
return false
end
end
def group_is_singlepart(subj)
@groups[subj]["total"].to_i == 1
end
def group_is_multipart(subj)
@groups[subj]["total"].to_i > 1
end
2002-04-28 22:06:03 +00:00
def get_messids
return @messids
2002-04-28 16:29:56 +00:00
end
def get_subjects
return @subjects
end
def group_subjects
2002-04-28 22:06:03 +00:00
@groups = {}
for i in (0...@subjects.length)
2002-04-28 22:06:03 +00:00
print "group subjects: #{i} #{@subjects[i]}\n" if Debuglevel > 1
2002-04-28 16:29:56 +00:00
if @subjects[i] =~ /(.*)\((\d+)\/(\d+)\)(.*)/ || @subjects[i] =~ /(.*)\[(\d+)\/(\d+)\](.*)/
j = "#{$1}#{$4}"
number = $2
total = $3
else
j = @subjects[i]
number = 1
total = 1
end
2002-05-06 11:46:56 +00:00
if @groups.has_key?(j) and number.to_i !=0
2002-04-28 22:06:03 +00:00
@groups[j]["messages"] += [ @messids[i] ]
@groups[j]["ids"] += [ @ids[i].to_i ]
@groups[j]["servers"] += [ @servers[i] ]
@groups[j]["subject"] += [ @subjects[i] ]
2002-05-06 11:46:56 +00:00
elsif number.to_i !=0
@groups[j] = {}
@groups[j]["total"] = total
@groups[j]["messages"] = [ @messids[i] ]
@groups[j]["ids"] = [ @ids[i].to_i ]
@groups[j]["servers"] = [ @servers[i] ]
@groups[j]["subject"] = [ @subjects[i] ]
2002-04-28 16:29:56 +00:00
end
end
@grouped = true
end
def set_skip_ids(server, ids)
2002-04-28 22:06:03 +00:00
set = Set::IntSpan.new(ids)
set.finite or return false
min = set.min
min != nil and min < 0 and return false
@connections[server]["skip_ids"] = set
2002-04-28 22:06:03 +00:00
return true
end
def group_update_newsrc(subject)
for i in (0...@groups[subject]["messages"].length)
2002-05-06 11:46:56 +00:00
# print "server: #{@groups[subject]["servers"][i]}\n"
# print "id: #{@groups[subject]["ids"][i]}\n"
@connections[@groups[subject]["servers"][i]]["newsrc"].mark(@group, @groups[subject]["ids"][i])
end
end
def save_newsrc()
for server in @connections.keys
@connections[server]["newsrc"].save
end
end
def read_cache(cachedir)
filename = "#{cachedir}/#{@group}.ripnewscache"
2002-04-30 14:09:06 +00:00
excludes = {}
for server in @connections.keys
excludes[server] = {}
@connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true}
end
if FileTest.directory?( cachedir) and FileTest.file?( filename ) and FileTest.readable?( filename )
file = File.new( filename )
lines = file.readlines
lines.collect{|line|
#print "line: #{line}\n"
if line =~ /^(.*?)\|(\d+)\|(.*?)\|(.*)$/
#print "messid: #{$1}\n"
#print "id: #{$2}\n"
#print "server: #{$3}\n"
#print "subject: #{$4}\n"
#print "First: #{@connections[$3]["first"].to_i}\n";
#print "Last: #{@connections[$3]["last"].to_i}\n";
if @connections.has_key?($3)
unless excludes.has_key?($3) and excludes[$3].has_key?($2.to_i) or
$2.to_i < @connections[$3]["first"].to_i or
$2.to_i > @connections[$3]["last"].to_i
add($1, $2, $3, $4)
@connections[$3]["skip_ids"].insert($2.to_i)
end
2002-04-30 14:09:06 +00:00
end
end
}
end
end
def save_cache(cachedir)
filename = "#{cachedir}/#{@group}.ripnewscache"
2002-04-30 14:09:06 +00:00
if FileTest.directory?( cachedir )
file = File.new( filename, "w" ) or print "couldn't open cachefile for writing\n"
for i in (0...@subjects.length)
file.print("#{@messids[i]}|#{@ids[i]}|#{@servers[i]}|#{@subjects[i]}\n")
#print "writing: #{@messids[i]}|#{@ids[i]}|#{@servers[i]}|#{@subjects[i]}\n"
end
end
end
#######################################################################
2002-04-28 16:29:56 +00:00
def uudecode(data, outfile=nil)
case data.type.to_s
when "Array"
print "Calling _uudecode_array\n" if Debuglevel>0
mode, file, body = _uudecode_array(data)
when "File", "Tempfile"
2002-04-28 16:29:56 +00:00
unless outfile
print "uudecode: need outfile\n"
exit
end
print "Calling _uudecode_file\n" if Debuglevel>0
mode, file, body = _uudecode_file(data, outfile)
else
print "Funny stuff in uudecode. Data of type \"#{data.type.to_s}\"\n"
2002-04-28 16:29:56 +00:00
end
return mode, file, body
end
def _uudecode_file(file, outfile)
mode = 0600
filename = "unknown"
c = 0
lines = file.pos
percent = 0
mark = lines/100
file.pos=0
while (! file.eof)
line = file.gets
print "line: #{line}" if Debuglevel > 0
if line =~ /^begin(.*)/
m = $1
print "beginning matched; rest: #{m}\n" if Debuglevel > 0
2002-04-28 16:29:56 +00:00
if m =~ /^(\s+(\d+))?(\s+(.*?\S))?\s*\Z/
mode = $2
filename = $4
print "found beginning\n" if Debuglevel > 0
else
print "mode, file set to defaults: #{m}\n"
end
break
end
end
print "not uuencoded!\n" if file.eof
print "c: #{c} mark: #{mark} lines: #{lines}\n" if Debuglevel > 1
print " UUdecoding...\n"
2002-04-28 16:29:56 +00:00
while (! file.eof)
if Debuglevel > 1
c = file.pos
if c > mark
print "#{percent}%\n"
print "c: #{c} mark: #{mark} lines: #{lines}\n" if Debuglevel > 1
percent += 1
mark = (lines/100)*(percent+1)
end
end
line = file.gets
print "line: #{line}" if Debuglevel > 1
return mode, filename if line =~ /^end/
next if line =~ /[a-z]/
next if line == nil
next unless ((((line[0] - 32) & 077) + 2) / 3).to_i ==
(line.length/4).to_i
outfile.print line.unpack("u")
end
print "No \"end\" found!!!\n"
return mode, file, outfile
end
# gaat volgens mij niet verder als er meerdere uuencoded blocks zijn...
# zal dan meerdere keren aangeroepen moeten worden, grmbl...
# tis getting a mess as we speak...
# toch maar een keer aparte class van maken...
def _uudecode_array(data)
decode = []
# begun = false
mode = 0600
file = "unknown"
c = 0
lines = data.length
percent = 0
mark = lines/100
i = 0
while (i < data.length)
if data[i] =~ /^begin(.*)/
m = $1
print "beginning matched; rest: #{m}\n" if Debuglevel > 0
2002-04-28 16:29:56 +00:00
if m =~ /^(\s+(\d+))?(\s+(.*?\S))?\s*\Z/
mode = $2
file = $4
print "found beginning\n" if Debuglevel > 0
else
print "mode, file set to defaults: #{m}\n"
end
break
end
i += 1
end
unless (i < data.length)
print "not uuencoded!\n"
end
print "UUdecoding...\n"
while (i < data.length)
if Debuglevel > 1
if c > mark
print "#{percent}%\n"
print "c: #{c} mark: #{mark} lines: #{lines} i: #{i}\n" if Debuglevel > 1
percent += 1
mark = (lines/100)*(percent+1)
end
c += 1
end
line = data[i]
i += 1
return mode, file, decode if line =~ /^end/
next if line =~ /[a-z]/
next if line == nil
next unless ((((line[0] - 32) & 077) + 2) / 3).to_i ==
(line.length/4).to_i
decode += line.unpack("u")
end
print "No \"end\" found!!!\n"
return mode, file, decode
end
#def uudecode_group(subj, file=nil, outfile=nil)
def uudecode_group(subj, tempdir=nil)
2002-04-28 16:29:56 +00:00
group_subjects unless @grouped
body = get_group_body_first(subj)
if body.to_s =~ /begin/
print "uuencoded!\n" if Debuglevel > 0
#if (file and outfile)
if (tempdir != nil)
file = Tempfile.new("#{tempdir}/riptmp")
body.collect{|i| file.print "#{i}\n"}
2002-04-28 16:29:56 +00:00
get_group_body_rest(subj, file)
mode, filename, result = uudecode(file, outfile)
else
body += get_group_body_rest(subj)
mode, filename, result = uudecode(body)
end
return mode, filename, result
else
print "Not uuencoded!\n" if Debuglevel > 0
return false
end
end
def is_uuencoded(data)
if data.to_s =~ /begin\s+\d+?\s+.*?\S?\s*\Z/
return true
else
return false
end
end
2002-04-28 16:29:56 +00:00
###############################################################
def subject_sort
2002-04-30 14:09:06 +00:00
print "Sorting articles\n"
2002-04-28 16:29:56 +00:00
sort_arr = []
for i in (0..@subjects.length)
2002-04-28 22:06:03 +00:00
print "subj sort #{@subjects[i]}\n" if Debuglevel >2
print "subj sort #{@messids[i]}\n" if Debuglevel >2
print "subj sort #{@ids[i]}\n" if Debuglevel >2
print "subj sort #{@servers[i]}\n" if Debuglevel >2
sort_arr += ["#{@subjects[i]} #{@messids[i]} #{@ids[i]} #{@servers[i]}"]
2002-04-28 16:29:56 +00:00
end
sort_arr.sort!{|a,b| ward_sort(a, b)}
2002-04-28 22:06:03 +00:00
@messids = []
2002-04-28 16:29:56 +00:00
@ids = []
@subjects = []
@servers = []
sort_arr.collect{|i|
i =~ /^(.*) (<[^<]*>) (\d+) (\S*)$/ || i =~ /^(.*) \[<[^<]*>\] (\d+) (\S*)$/
2002-04-28 22:06:03 +00:00
@messids += [$2]
@ids += [$3]
2002-04-28 16:29:56 +00:00
@subjects += [$1]
@servers += [$4]
2002-04-28 22:06:03 +00:00
print "subject sort: #{$1}\n" if Debuglevel >2
}
2002-04-28 16:29:56 +00:00
@sorted = true
end
2002-05-06 11:46:56 +00:00
def group_subject_sort(subj)
print "Sorting articles\n"
sort_arr = []
for i in (0...@groups[subj]["subject"].length)
print "subj sort #{@groups[subj]["subject"][i]}\n" if Debuglevel > 2
print "subj sort #{@groups[subj]["messages"][i]}\n" if Debuglevel > 2
print "subj sort #{@groups[subj]["ids"][i]}\n" if Debuglevel > 2
print "subj sort #{@groups[subj]["servers"][i]}\n" if Debuglevel > 2
sort_arr += ["#{@groups[subj]["subject"][i]} #{@groups[subj]["messages"][i]} #{@groups[subj]["ids"][i]} #{@groups[subj]["servers"][i]}"]
end
sort_arr.sort!{|a,b| ward_sort(a, b)}
@groups[subj].clear
sort_arr.collect{|i|
i =~ /^(.*) (<[^<]*>) (\d+) (\S*)$/ || i =~ /^(.*) \[<[^<]*>\] (\d+) (\S*)$/
if @groups[subj].has_key?("messages")
@groups[subj]["messages"] += [$2]
@groups[subj]["ids"] += [$3.to_i]
@groups[subj]["subject"] += [$1]
@groups[subj]["servers"] += [$4]
else
@groups[subj]["messages"] = [$2]
@groups[subj]["ids"] = [$3.to_i]
@groups[subj]["subject"] = [$1]
@groups[subj]["servers"] = [$4]
end
print "subject sort: #{$1}\n" if Debuglevel > 2
}
end
2002-04-28 16:29:56 +00:00
def ward_sort(a, b)
a =~ /^(.*) (<[^<]*> \d+ \S*)$/
2002-04-28 16:29:56 +00:00
c = $1.to_s.split(/([0-9]+)/)
b =~ /^(.*) (<[^<]*> \d+ \S*)$/
2002-04-28 16:29:56 +00:00
d = $1.to_s.split(/([0-9]+)/)
for x in c
y = d.shift
r = ((x.to_s =~ /^[0-9]+$/) && (y.to_s =~ /^[0-9]+$/)) ?
(x.to_i <=> y.to_i) :
(x.to_s <=> y.to_s)
if r != 0
return r
end
end
return -1 if (d)
return 0
end
def quit
for server in @connections.keys
@connections[server]["nntp"].quit
end
2002-04-28 16:29:56 +00:00
end
private :ward_sort
end # class