ripnews/trunk/ripnews/news/article.rb

850 lines
21 KiB
Ruby

#!/usr/local/bin/ruby
# $Id$
# $Source$
require 'set/intspan'
require 'net/nntp'
require 'news/newsrc'
require 'tempfile'
class Article
Debuglevel = 1
def initialize(nntpservers, groupname, newsrc="~/.newsrc")
@messids = []
@ids = []
@servers = []
@subjects = []
@sorted = false
@grouped = false
#@skip_ids = Set::IntSpan.new()
@groups = {}
@gotten = {}
@group = groupname
@serverlist = nntpservers.split('|')
@connections = {}
@serverlist.collect{|server|
@connections[server] = {}
@connections[server]["nntp"] = Net::NNTP.new(server)
@connections[server]["skip_ids"] = Set::IntSpan.new()
@connections[server]["newsrc"] = News::Newsrc.new("#{newsrc}.#{server}")
set_skip_ids(server, @connections[server]["newsrc"].marked_articles(@group))
}
#@nntp = Net::NNTP.new(nntpservers)
end
def add(messid, id, server, subject)
# print "Messid: #{messid}\n"
# print "Id: #{id}\n"
# print "Server: #{server}\n"
# print "Subject: #{subject}\n"
@messids += [messid]
@ids += [id.to_i]
@servers += [server]
@subjects += [subject]
@sorted = false
@grouped = false
end
def get_articles(cachedir=false)
for server in @connections.keys
begin
resp, count, first, last, name = @connections[server]["nntp"].group(@group)
@connections[server]["first"] = first ? first : 0
@connections[server]["last"] = last ? last : 0
rescue Net::NNTP::RuntimeError
print "Couldn't open group: #{@group}\n"
return false
end
end
read_cache(cachedir)
for server in @connections.keys
print "reading articles from server: #{server}\n"
range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}")
rangelist = rechunk_runlist(range.diff(@connections[server]["skip_ids"]).run_list)
print "rangelist: #{rangelist}\n" if Debuglevel >1
print "rangelist: #{rangelist.type.to_s}\n" if Debuglevel >1
print "rangelsit elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel >1
unless rangelist == nil or rangelist =~ /^$/
for i in rangelist.split(',')
print "i: #{i}\n" if Debuglevel > 1
begin
resp, subj_lines = @connections[server]["nntp"].xhdr("subject", i)
unless resp.to_i >= 200 and resp.to_i < 300
print "got response #{resp} while reading group #{@group} from #{server}\n"
return false
end
resp, messid_lines = @connections[server]["nntp"].xhdr("message-id", i)
unless resp.to_i >=200 and resp.to_i < 300
print "got response #{resp} while reading group #{@group} from #{server}\n"
return false
end
art = {}
subj_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["subject"] = x[1]
print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 1
}
messid_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["messid"] = x[1]
print "art id: #{x[0]} messid: #{x[1]}\n" if Debuglevel > 1
}
for id in art.keys
if art[id].has_key?("subject") and art[id].has_key?("messid")
print "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["subject"]}\n" if Debuglevel > 1
add(art[id]["messid"], id, server, art[id]["subject"])
end
end
rescue Net::NNTP::RuntimeError
end
end
end
# if xhdr doesn't work, this should be used
# for i in (range.diff(@connections[server]["skip_ids"]).elements)
# begin
# @connections[server]["nntp"].stat(i)
# resp, id, messid, list = @connections[server]["nntp"].head(i)
# for j in list
# if j =~ /Subject: (.*)/
# subj=$1
# end
# end
# print "get_articles messid: #{messid}\n" if Debuglevel > 1
# print "get_articles id: #{id}\n" if Debuglevel > 1
# print "get_articles server: #{server}\n" if Debuglevel > 1
# print "get_articles subject: #{subj}\n" if Debuglevel > 1
# add(messid, id, server, subj)
# rescue Net::NNTP::RuntimeError
# print "whoopsie couldn't stat #{i}\n" if Debuglevel > 1
# end
# end
end
save_cache(cachedir)
end
def get_groups
group_subjects unless @grouped
return @groups
end
def get_groupname
return @group
end
def get_group_body(subj)
result = []
group_subject_sort(subj)
for i in (0...@groups[subj]["messages"].length)
unless @gotten.has_key?(@groups[subj]["messages"][i])
print "getting article: #{i}\n" if Debuglevel > 0
begin
print "Server: #{@groups[subj]["servers"][i]}\n"
print "Messid: #{@groups[subj]["messages"][i]}\n"
resp, id, messid, list = @connections[@groups[subj]["servers"][i]]["nntp"].body(@groups[subj]["messages"][i])
rescue Net::NNTPReplyError
print "Caught Net::NNTPReplyError in get_group_body reading article #{@groups[subj]["messages"][0]}\n"
end
result = list
end
end
return result
end
def get_group_body_first(subj)
group_subject_sort(subj)
begin
resp, id, messid, list = @connections[@groups[subj]["servers"][0]]["nntp"].body(@groups[subj]["messages"][0])
rescue Net::NNTPReplyError
print "Caught Net::NNTPReplyError in get_group_body_first reading article #{@groups[subj]["messages"][0]}\n"
return false
end
print "getting article: #{subj}\n" if Debuglevel > 0
print "message id: #{messid}\n" if Debuglevel > 0
print "id: #{id}\n" if Debuglevel > 0
print "server: #{@groups[subj]["servers"][0]}\n" if Debuglevel > 0
print "full subject: #{@groups[subj]["subject"][0]}\n" if Debuglevel > 0
@gotten[messid] = true
return list
end
def get_group_body_rest(subj, file=nil)
result = []
for i in (1...@groups[subj]["messages"].length)
unless @gotten.has_key?(@groups[subj]["messages"][i])
print "getting article: #{i}\n" if Debuglevel > 0
begin
print "Server: #{@groups[subj]["servers"][i]}\n"
print "Messid: #{@groups[subj]["messages"][i]}\n"
resp, id, messid, list = @connections[@groups[subj]["servers"][i]]["nntp"].body(@groups[subj]["messages"][i])
if resp == false
return false
end
rescue Net::NNTPReplyError
print "Caught Net::NNTPReplyError in get_group_body_rest reading article #{@groups[subj]["messages"][0]}\n"
return false
end
print "getting article: #{subj}\n" if Debuglevel > 0
print "message id: #{messid}\n" if Debuglevel > 0
print "id: #{id}\n" if Debuglevel > 0
print "server: #{@groups[subj]["servers"][i]}\n" if Debuglevel > 0
print "full subject: #{@groups[subj]["subject"][i]}\n" if Debuglevel > 0
@gotten[ @groups[subj]["messages"][i] ] = true
if file
list.collect{|line| file.print "#{line}\n"}
else
result += list
end
end
end
return result
end
def get_group_subjects
group_subjects unless @grouped
return @groups.keys
end
def get_group_messids(subject)
group_subjects unless @grouped
return @groups[subject]["messages"]
end
def group_is_complete(subj)
group_subjects unless @grouped
print "length: #{@groups[subj]["messages"].length} total: #{@groups[subj]["total"].to_i}\n" if Debuglevel > 1
umessids = @groups[subj]["messages"].uniq
if (umessids.length ) >= @groups[subj]["total"].to_i
return true
else
return false
end
end
def group_is_singlepart(subj)
@groups[subj]["total"].to_i == 1
end
def group_is_multipart(subj)
@groups[subj]["total"].to_i > 1
end
def get_messids
return @messids
end
def get_subjects
return @subjects
end
def group_subjects
@groups = {}
for i in (0...@subjects.length)
print "group subjects: #{i} #{@subjects[i]}\n" if Debuglevel > 1
if @subjects[i] =~ /(.*)\((\d+)\/(\d+)\)(.*)/ || @subjects[i] =~ /(.*)\[(\d+)\/(\d+)\](.*)/
j = "#{$1}#{$4}"
number = $2
total = $3
else
j = @subjects[i]
number = 1
total = 1
end
if @groups.has_key?(j) and number.to_i !=0
@groups[j]["messages"] += [ @messids[i] ]
@groups[j]["ids"] += [ @ids[i].to_i ]
@groups[j]["servers"] += [ @servers[i] ]
@groups[j]["subject"] += [ @subjects[i] ]
elsif number.to_i !=0
@groups[j] = {}
@groups[j]["total"] = total
@groups[j]["messages"] = [ @messids[i] ]
@groups[j]["ids"] = [ @ids[i].to_i ]
@groups[j]["servers"] = [ @servers[i] ]
@groups[j]["subject"] = [ @subjects[i] ]
end
end
@grouped = true
end
def set_skip_ids(server, ids)
set = Set::IntSpan.new(ids)
set.finite or return false
min = set.min
min != nil and min < 0 and return false
@connections[server]["skip_ids"] = set
return true
end
def group_update_newsrc(subject)
for i in (0...@groups[subject]["messages"].length)
# print "server: #{@groups[subject]["servers"][i]}\n"
# print "id: #{@groups[subject]["ids"][i]}\n"
@connections[@groups[subject]["servers"][i]]["newsrc"].mark(@group, @groups[subject]["ids"][i])
end
end
def save_newsrc()
for server in @connections.keys
@connections[server]["newsrc"].save
end
end
def read_cache(cachedir)
filename = "#{cachedir}/#{@group}.ripnewscache"
excludes = {}
for server in @connections.keys
excludes[server] = {}
@connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true}
end
if FileTest.directory?( cachedir) and FileTest.file?( filename ) and FileTest.readable?( filename )
file = File.new( filename )
lines = file.readlines
lines.collect{|line|
#print "line: #{line}\n"
if line =~ /^(\d+)\|(.*?)\|(.*?)\|(.*)$/
#print "id: #{$1}\n"
#print "messid: #{$2}\n"
#print "server: #{$3}\n"
#print "subject: #{$4}\n"
#print "First: #{@connections[$3]["first"].to_i}\n";
#print "Last: #{@connections[$3]["last"].to_i}\n";
if @connections.has_key?($3)
unless excludes.has_key?($3) and excludes[$3].has_key?($1.to_i) or
$1.to_i < @connections[$3]["first"].to_i or
$1.to_i > @connections[$3]["last"].to_i
add($2, $1, $3, $4)
@connections[$3]["skip_ids"].insert($1.to_i)
end
end
end
}
file.close
end
end
def save_cache(cachedir)
filename = "#{cachedir}/#{@group}.ripnewscache"
if FileTest.directory?( cachedir )
file = File.new( filename, "w" ) or print "couldn't open cachefile for writing\n"
cache = []
for i in (0...@subjects.length)
cache += ["#{@ids[i]}|#{@messids[i]}|#{@servers[i]}|#{@subjects[i]}\n"]
end
cache.sort!
file.print cache
file.close
end
end
#######################################################################
def uudecode(data, outfile=nil)
case data.type.to_s
when "Array"
print "Calling _uudecode_array\n" if Debuglevel>0
mode, file, body = _uudecode_array(data)
when "File", "Tempfile"
unless outfile
print "uudecode: need outfile\n"
exit
end
print "Calling _uudecode_file\n" if Debuglevel>0
mode, file, body = _uudecode_file(data, outfile)
else
print "Funny stuff in uudecode. Data of type \"#{data.type.to_s}\"\n"
end
return mode, file, body
end
def _uudecode_file(file, outfile)
mode = 0600
filename = "unknown"
c = 0
lines = file.pos
percent = 0
mark = lines/100
file.pos=0
while (! file.eof)
line = file.gets
print "line: #{line}" if Debuglevel > 0
if line =~ /^begin(.*)/
m = $1
print "beginning matched; rest: #{m}\n" if Debuglevel > 0
if m =~ /^(\s+(\d+))?(\s+(.*?\S))?\s*\Z/
mode = $2
filename = $4
print "found beginning\n" if Debuglevel > 0
else
print "mode, file set to defaults: #{m}\n"
end
break
end
end
print "not uuencoded!\n" if file.eof
print "c: #{c} mark: #{mark} lines: #{lines}\n" if Debuglevel > 1
print " UUdecoding...\n"
while (! file.eof)
if Debuglevel > 1
c = file.pos
if c > mark
print "#{percent}%\n"
print "c: #{c} mark: #{mark} lines: #{lines}\n" if Debuglevel > 1
percent += 1
mark = (lines/100)*(percent+1)
end
end
line = file.gets
print "line: #{line}" if Debuglevel > 1
return mode, filename if line =~ /^end/
next if line =~ /[a-z]/
next if line == nil
next unless ((((line[0] - 32) & 077) + 2) / 3).to_i ==
(line.length/4).to_i
outfile.print line.unpack("u")
end
print "No \"end\" found!!!\n"
return mode, file, outfile
end
# gaat volgens mij niet verder als er meerdere uuencoded blocks zijn...
# zal dan meerdere keren aangeroepen moeten worden, grmbl...
# tis getting a mess as we speak...
# toch maar een keer aparte class van maken...
def _uudecode_array(data)
decode = []
# begun = false
mode = 0600
file = "unknown"
c = 0
lines = data.length
percent = 0
mark = lines/100
i = 0
while (i < data.length)
if data[i] =~ /^begin(.*)/
m = $1
print "beginning matched; rest: #{m}\n" if Debuglevel > 0
if m =~ /^(\s+(\d+))?(\s+(.*?\S))?\s*\Z/
mode = $2
file = $4
print "found beginning\n" if Debuglevel > 0
else
print "mode, file set to defaults: #{m}\n"
end
break
end
i += 1
end
unless (i < data.length)
print "not uuencoded!\n"
end
print "UUdecoding...\n"
while (i < data.length)
if Debuglevel > 1
if c > mark
print "#{percent}%\n"
print "c: #{c} mark: #{mark} lines: #{lines} i: #{i}\n" if Debuglevel > 1
percent += 1
mark = (lines/100)*(percent+1)
end
c += 1
end
line = data[i]
i += 1
return mode, file, decode if line =~ /^end/
next if line =~ /[a-z]/
next if line == nil
next unless ((((line[0] - 32) & 077) + 2) / 3).to_i ==
(line.length/4).to_i
decode += line.unpack("u")
end
print "No \"end\" found!!!\n"
return mode, file, decode
end
#def uudecode_group(subj, file=nil, outfile=nil)
def uudecode_group(subj, tempdir=nil)
group_subjects unless @grouped
body = get_group_body_first(subj)
if body.to_s =~ /begin/
print "uuencoded!\n" if Debuglevel > 0
#if (file and outfile)
if (tempdir != nil)
file = Tempfile.new("#{tempdir}/riptmp")
body.collect{|i| file.print "#{i}\n"}
get_group_body_rest(subj, file)
mode, filename, result = uudecode(file, outfile)
else
body += get_group_body_rest(subj)
mode, filename, result = uudecode(body)
end
return mode, filename, result
else
print "Not uuencoded!\n" if Debuglevel > 0
return false
end
end
def is_uuencoded(data)
if data.to_s =~ /begin\s+\d+?\s+.*?\S?\s*\Z/
return true
else
return false
end
end
#######################################################################
def ydecode(data, outfile=nil)
case data.type.to_s
when "Array"
print "Calling _ydecode_array\n" if Debuglevel>0
mode, file, body = _ydecode_array(data)
when "File", "Tempfile"
unless outfile
print "ydecode: need outfile\n"
exit
end
print "Calling _ydecode_file\n" if Debuglevel>0
mode, file, body = _ydecode_file(data, outfile)
else
print "Funny stuff in ydecode. Data of type \"#{data.type.to_s}\"\n"
end
return mode, file, body
end
def _ydecode_file(file, outfile)
mode = 0600
filename = "unknown"
lines = file.pos
file.pos = 0
bytes = 0
total = 0
while (! file.eof)
line = file.gets
print "line: #{line}" if Debuglevel > 0
if line =~ /^\=ybegin\s+(.*line\=.*)/
m = $1
print "ybegin match; rest: #{m}\n" if Debuglevel > 0
if m =~ /^\s*(part\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(.*))\Z/
part = $2.to_i
linesize = $4.to_i
totalsize = $6.to_i
filename = $8
print "found beginning"
if part
print " of part #{part}"
end
print ", linesize = #{linesize}, size = #{size}, filename = #{filename}\n"
break
else
print "not a valid yenc begin line\n"
end
end
end
print "not yencoded!\n" if file.eof
print " ydecoding...\n"
while (! file.eof)
print "at #{file.pos} need to go to #{lines}\n" if Debuglevel > 1
line = file.gets
line = line[0 ... line.length - 1]
if line =~ /^=yend\s+(.*)\Z/
m = $1
m =~ /(\s*size=(\d+)\s+)(\s*part=(\d+))?(\s+crc32=(\S+))?/
size = $2.to_i
part = $4.to_i
crc = $6
if size != bytes
print "part size mismatch, is #{bytes}, should be #{size}\n"
end
if !part
return mode, filename
end
total += bytes
if total >= totalsize
if total != totalsize
print "total size mismatch, is #{total}, should be #{totalsize}\n"
end
return mode, filename
end
search_begin = 1
bytes = 0
next
end
if search_begin && line =~ /^\=ybegin\s+(.*)\Z/
m = $1
search_begin = 0
if m =~ /^\s*(part\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(.*))\Z/
part = $2.to_i
linesize = $4.to_i
totalsize = $6.to_i
filename = $8
print "found beginning of part #{part}, linesize = #{linesize}, size = #{size}, filename = #{filename}\n" if Debuglevel > 0
end
next
end
if search_begin == 1
next
end
if line =~ /^=ypart\s+(\s*begin=(\d+))(\s+end=(\d+))/
b = $2
e = $4
print "next part begin #{b}, end #{e}\n"
next
end
# This seems to be a common 'error' - maybe I misunderstand the spec or
# something
# if line.length != linesize
# print "linesize mismatch, was #{line.length}, should be #{linesize}...\n"
# end
special = 0
line.each_byte { |b|
if special == 0
if b == 0x3d
special = 1
next
end
else
special = 0
b = (b - 64) % 256
end
outfile.putc((b - 42) % 256)
bytes += 1
}
end
print "No \"=yend\" found!!!\n"
return mode, file, outfile
end
# toch maar een keer aparte class van maken... geld ook voor dit geneuzel
def _ydecode_array(data)
decode = ""
mode = 0600
filename = "unknown"
c = 0
lines = data.length
percent = 0
mark = lines/100
i = 0
while (i < data.length)
if data[i] =~ /^\=ybegin\s+(.*line\=.*)/
m = $1
print "ybegin match; rest: #{m}\n" if Debuglevel > 0
if m =~ /^(\s*(part\=(\d+)\s+)?line\=(\d+))(\s*size\=(\d+))(\s*name=(.*))\Z/
linesize = $4.to_i
size = $6.to_i
filename = $8
print "found beginning, linesize = #{linesize}, size = #{size}, filename = #{filename}\n" if Debuglevel > 0
i += 1
break
else
print "not a valid yenc begin line\n"
end
end
i += 1
end
unless (i < data.length)
print "not yencoded!\n"
end
print "ydecoding...\n"
while (i < data.length)
print "at #{i} need to go to #{data.length}\r" if Debuglevel > 1
line = data[i]
i += 1
if line =~ /^\=yend(\s+size=(\d+))(\s+crc32=(\S+))?/
size = $2.to_i
crc = $4
if size != decode.length
print "size mismatch, was #{decode.length}, should be #{size}\n"
end
dec = [ decode ]
return mode, filename, dec
end
if line =~ /^\=ypart.*\Z/
# ignore for now
next
end
# This seems to be a common 'error' - maybe I misunderstand the spec or
# something
# if line.length != linesize
# print "#{i}: linesize mismatch, was #{line.length}, should be #{linesize}...\n"
# end
special = 0
str = ""
line.each_byte { |b|
if special == 0
if b == 0x3d
special = 1
next
end
else
special = 0
b = (b - 64) % 256
end
str += ((b - 42) % 256).chr
}
decode += str
end
print "${i}: no \"=yend\" found!!!\n"
dec = [ decode ]
return mode, filename, dec
end
def ydecode_group(subj, tempdir=nil)
group_subjects unless @grouped
body = get_group_body_first(subj)
if body.to_s =~ /=ybegin/
print "yencoded!\n" if Debuglevel > 0
#if (file and outfile)
if (tempdir != nil)
file = Tempfile.new("#{tempdir}/riptmp")
body.collect{|i| file.print "#{i}\n"}
get_group_body_rest(subj, file)
mode, filename, result = ydecode(file, outfile)
else
body += get_group_body_rest(subj)
mode, filename, result = ydecode(body)
end
return mode, filename, result
else
print "Not yencoded!\n" if Debuglevel > 0
return false
end
end
def is_yencoded(data)
if data.to_s =~ /=ybegin/
return true
else
return false
end
end
###############################################################
def group_subject_sort(subj)
#print "Sorting articles\n"
serverhash = {}
for i in (0...@serverlist.length)
serverhash[@serverlist[i]] = i
end
sort_arr = []
for i in (0...@groups[subj]["subject"].length)
print "subj sort #{@groups[subj]["subject"][i]}\n" if Debuglevel > 2
print "subj sort #{@groups[subj]["messages"][i]}\n" if Debuglevel > 2
print "subj sort #{@groups[subj]["ids"][i]}\n" if Debuglevel > 2
print "subj sort #{@groups[subj]["servers"][i]}\n" if Debuglevel > 2
sort_arr += [ [
@groups[subj]["subject"][i].dup,
@groups[subj]["messages"][i].dup,
@groups[subj]["ids"][i].dup,
@groups[subj]["servers"][i].dup
] ]
end
sort_arr.sort!{|a,b|
r = ward_sort(a[0], b[0])
if r == 0
r = serverhash[a[3]] <=> serverhash[b[3]]
end
r
}
@groups[subj].clear
sort_arr.collect{|i|
if @groups[subj].has_key?("messages")
@groups[subj]["subject"] += [i[0]]
@groups[subj]["messages"] += [i[1]]
@groups[subj]["ids"] += [i[2]]
@groups[subj]["servers"] += [i[3]]
else
@groups[subj]["subject"] = [i[0]]
@groups[subj]["messages"] = [i[1]]
@groups[subj]["ids"] = [i[2]]
@groups[subj]["servers"] = [i[3]]
end
print "subject sort: #{i[0]}\n" if Debuglevel > 2
print "server: #{i[3]}\n" if Debuglevel > 2
}
#print "Done sorting\n"
end
def ward_sort(a, b)
c = a.to_s.split(/([0-9]+)/)
d = b.to_s.split(/([0-9]+)/)
c.collect{|x|
y = d.shift
r = ((x.to_s =~ /^[0-9]+$/) && (y.to_s =~ /^[0-9]+$/)) ?
(x.to_i <=> y.to_i) :
(x.to_s <=> y.to_s)
if r != 0
return r
end
}
return -1 if (d != [])
return 0
end
def rechunk_runlist(runlist)
return nil if runlist == nil
blalist = runlist.split(',')
blalist.collect!{|x|
result = ""
if x =~ /(.*)-(.*)/
a = $1
while ($2.to_i - a.to_i) > 200
result += "#{a}-#{a.to_i+199},"
a = a.to_i + 200
end
result += "#{a}-#{$2}"
else
x
end
blup = blalist.join(",")
return blup
}
return
end
def quit
for server in @connections.keys
@connections[server]["nntp"].quit
end
end
private :ward_sort
end # class