934 lines
26 KiB
Ruby
934 lines
26 KiB
Ruby
# $Dwarf: article.rb,v 1.109 2005/02/07 08:53:43 ward Exp $
|
|
# $Source$
|
|
|
|
#
|
|
# Copyright (c) 2002, 2003, 2004, 2005 Ward Wouts <ward@wouts.nl>
|
|
#
|
|
# Permission to use, copy, modify, and distribute this software for any
|
|
# purpose with or without fee is hereby granted, provided that the above
|
|
# copyright notice and this permission notice appear in all copies.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
#
|
|
|
|
require 'set/intspan'
|
|
require 'net/nntp'
|
|
require 'news/newsrc'
|
|
require 'tempfile'
|
|
require 'timeout'
|
|
#require 'yaml'
|
|
|
|
class ArticleError < RuntimeError; end
|
|
class TempError < ArticleError; end
|
|
class PermError < ArticleError; end
|
|
|
|
module Net
|
|
class KANNTP < Net::NNTP
|
|
|
|
def initialize(host, port=nil, user=nil, password=nil, readermode=nil)
|
|
@host = host
|
|
@semaphore = Mutex.new
|
|
@resettime = 60
|
|
@timecounter = @resettime
|
|
@thr = Thread.new{
|
|
Thread.pass
|
|
while true
|
|
# puts "timecounter #{@timecounter} #{@host}"
|
|
if @timecounter > 0
|
|
@timecounter -= 5
|
|
sleep 5
|
|
else
|
|
sendka
|
|
sleep 5
|
|
end
|
|
end
|
|
}
|
|
super
|
|
end
|
|
|
|
def putline(line)
|
|
# puts "timerreset #{@host}"
|
|
@timecounter = @resettime
|
|
super
|
|
end
|
|
|
|
def longcmd(line)
|
|
@semaphore.synchronize{
|
|
return super
|
|
}
|
|
end
|
|
|
|
def shortcmd(line)
|
|
@semaphore.synchronize{
|
|
return super
|
|
}
|
|
end
|
|
|
|
def setresettime(time)
|
|
@resettime = time
|
|
end
|
|
|
|
def sendka
|
|
# puts "SENDING KEEP ALIVE TO #{@host}"
|
|
res = shortcmd("DATE")
|
|
# puts res
|
|
end
|
|
|
|
def quit
|
|
@thr.exit
|
|
super
|
|
end
|
|
|
|
private :sendka
|
|
|
|
end # class KANNTP
|
|
end # module Net
|
|
|
|
class Article
|
|
|
|
Debuglevel = 1
|
|
|
|
Message = Struct.new(:messid, :id, :server, :subject)
|
|
|
|
def initialize(nntpservers, groupname, newsrc="~/.newsrc")
|
|
@messageinfo = []
|
|
|
|
@grouped = false
|
|
@groups = {}
|
|
@gotten = {}
|
|
@group = groupname
|
|
@preselectpattern = Regexp.new('^')
|
|
@cache_buf = {}
|
|
|
|
@serverlist = nntpservers.split('|')
|
|
@connections = {}
|
|
@serverlist.collect{|server|
|
|
@connections[server] = {}
|
|
@cache_buf[server] = []
|
|
begin
|
|
p server
|
|
p Time.now
|
|
begin
|
|
timeout(60) do
|
|
@connections[server]["nntp"] = Net::KANNTP.new(server)
|
|
end
|
|
resp = @connections[server]["nntp"].mode_reader
|
|
rescue TimeoutError, Errno::ECONNRESET
|
|
sleep 3
|
|
retry
|
|
end
|
|
p Time.now
|
|
@connections[server]["skip_ids"] = Set::IntSpan.new()
|
|
@connections[server]["newsrc"] = News::Newsrc.new("#{newsrc}.#{server}")
|
|
set_skip_ids(server, @connections[server]["newsrc"].marked_articles(@group))
|
|
rescue SocketError, Errno::EINVAL, EOFError, Errno::ETIMEDOUT
|
|
print "Connection to #{server} failed: #{$!}\n"
|
|
del_server(server)
|
|
end
|
|
}
|
|
end
|
|
|
|
def reconnect(server)
|
|
retries = 0
|
|
begin
|
|
@connections[server]["nntp"].quit
|
|
# helpt dit in geheugen gebruik? : Volgens mij niet
|
|
#@connections[server].delete("nntp")
|
|
#GC.start
|
|
rescue Errno::EPIPE, Errno::ECONNRESET, EOFError
|
|
end
|
|
begin
|
|
sleep 3
|
|
#timeout(180) do
|
|
timeout(60) do
|
|
@connections[server]["nntp"] = Net::KANNTP.new(server)
|
|
end
|
|
resp = @connections[server]["nntp"].mode_reader
|
|
rescue SocketError, Errno::EINVAL, EOFError, Errno::ETIMEDOUT, TimeoutError
|
|
print "Reconnect to #{server} failed: #{$!}\n"
|
|
if retries > 1
|
|
del_server(server)
|
|
raise PermError, "Couldn't connect to #{server}"
|
|
else
|
|
retries += 1
|
|
retry
|
|
end
|
|
end
|
|
print "Succesfully reconnected to #{server}\n"
|
|
end
|
|
|
|
def memusage
|
|
print "memprof:\n"
|
|
print "global:\n"
|
|
# for i in global_variables
|
|
# print "#{i}\n"
|
|
# end
|
|
# print "local:\n"
|
|
# for i in local_variables
|
|
# print "#{i}\n"
|
|
# end
|
|
for i in self.instance_variables
|
|
puts i
|
|
print "X: "
|
|
begin
|
|
puts self.instance_eval(i).size
|
|
rescue NoMethodError
|
|
end
|
|
end
|
|
end
|
|
|
|
def set_preselect_pattern(regexp)
|
|
@preselectpattern = Regexp.new(regexp)
|
|
end
|
|
|
|
def preselect(subject)
|
|
if subject =~ @preselectpattern
|
|
return true
|
|
else
|
|
return false
|
|
end
|
|
# return ( subject =~ @preselectpattern )
|
|
end
|
|
|
|
def add(id, messid, subject, server)
|
|
@messageinfo.push(Message.new(messid, id.to_i, server, subject))
|
|
@grouped = false
|
|
end
|
|
|
|
def del_server(server)
|
|
print "Removing server #{server} from list\n"
|
|
@connections.delete(server)
|
|
@serverlist.delete(server)
|
|
end
|
|
|
|
def get_articles(cachedir=false)
|
|
if cachedir != false
|
|
cache_check(cachedir)
|
|
end
|
|
for server in @connections.keys
|
|
begin
|
|
first, last = get_group_info(server)
|
|
rescue PermError
|
|
print "Error: #{$!}\n"
|
|
del_server(server)
|
|
next
|
|
end
|
|
if first.to_i <= last.to_i
|
|
# available articles on server
|
|
@connections[server]["first"] = first ? first.to_i : 0
|
|
@connections[server]["last"] = last ? last.to_i : 0
|
|
if Debuglevel > 0
|
|
print " Server: #{server}\n"
|
|
print " First: #{first}\n"
|
|
print " Last: #{last}\n"
|
|
end
|
|
# clean up old newsrc entries
|
|
if @connections[server]["first"] > 0
|
|
@connections[server]["newsrc"].unmark_range(@group, 0, (@connections[server]["first"] - 1).to_s)
|
|
@connections[server]["newsrc"].save
|
|
end
|
|
else
|
|
print " First article has higher number than last article on server #{server}.\n"
|
|
del_server(server)
|
|
end
|
|
end
|
|
cache_read(cachedir)
|
|
|
|
# spul dat echt te oud is gaat nooit gevuld worden, dus doe ook geen poging het op te halen
|
|
# wil wel wat ophalen aangezien logging aantoont dat er wel oudere articles gedownload worden
|
|
for server in @connections.keys
|
|
if @connections[server]["skip_ids"].max && @connections[server]["skip_ids"].max
|
|
articles = @connections[server]["last"] - @connections[server]["first"]
|
|
if articles > 10000
|
|
fillerend = (@connections[server]["skip_ids"].max - (articles/10)).to_i
|
|
else
|
|
fillerend = @connections[server]["skip_ids"].max - 1000
|
|
end
|
|
if @connections[server]["skip_ids"].min && fillerend > @connections[server]["skip_ids"].min
|
|
@connections[server]["skip_ids"] = @connections[server]["skip_ids"].union("#{@connections[server]["skip_ids"].min}-#{fillerend}")
|
|
# p "filling #{@connections[server]["skip_ids"].min}-#{fillerend}"
|
|
end
|
|
end
|
|
end
|
|
|
|
for server in @connections.keys
|
|
print " reading articles from server: #{server}\n"
|
|
range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}")
|
|
|
|
rangelist = rechunk_runlist(range.diff(@connections[server]["skip_ids"]).run_list)
|
|
|
|
print "rangelist: #{rangelist}\n" if Debuglevel > 2
|
|
print "rangelist: #{rangelist.class.to_s}\n" if Debuglevel > 2
|
|
print "rangelist elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel > 2
|
|
begin
|
|
unless rangelist == nil or rangelist =~ /^$/
|
|
headerlines = 0
|
|
for i in rangelist.split(',')
|
|
print "i: #{i}\n" if Debuglevel > 2
|
|
begin
|
|
# resp, xover_lines = get_xover(server, i)
|
|
resp, subj_lines = get_xhdr(server, i, "subject")
|
|
resp, messid_lines = get_xhdr(server, i, "message-id")
|
|
rescue TempError
|
|
printerr(server)
|
|
next
|
|
end
|
|
|
|
art = {}
|
|
# xover_lines.collect{|x|
|
|
# art[x[0]] = {} unless art.has_key?(x[0])
|
|
# art[x[0]]["subject"] = x[1]
|
|
# art[x[0]]["messid"] = x[4]
|
|
# print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 2
|
|
# print "art id: #{x[0]} messid: #{x[4]}\n" if Debuglevel > 2
|
|
# }
|
|
subj_lines.collect{|x|
|
|
art[x[0]] = {} unless art.has_key?(x[0])
|
|
art[x[0]]["subject"] = x[1]
|
|
print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 2
|
|
}
|
|
messid_lines.collect{|x|
|
|
art[x[0]] = {} unless art.has_key?(x[0])
|
|
art[x[0]]["messid"] = x[1]
|
|
print "art id: #{x[0]} messid: #{x[1]}\n" if Debuglevel > 2
|
|
}
|
|
for id in art.keys
|
|
if art[id].has_key?("subject") and art[id].has_key?("messid")
|
|
print "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["subject"]}\n" if Debuglevel > 2
|
|
# @newids[server][id.to_i] = true
|
|
# dit wellicht alleen doen indien preselector hem uitkiest
|
|
# en anders een leuk regeltje aan de cache toevoegen,
|
|
# maar niet in het geheugen houden
|
|
if preselect(art[id]["subject"])
|
|
add(id, art[id]["messid"], art[id]["subject"], server)
|
|
end
|
|
cache_add(cachedir, id, art[id]["messid"], art[id]["subject"], server)
|
|
end
|
|
end
|
|
# headerlines += xover_lines.length
|
|
headerlines += subj_lines.length
|
|
if headerlines >= 500
|
|
cache_save(cachedir, server)
|
|
headerlines = 0
|
|
end
|
|
end
|
|
end
|
|
rescue PermError
|
|
del_server(server)
|
|
next
|
|
end
|
|
cache_save(cachedir, server)
|
|
end
|
|
GC.start
|
|
end
|
|
|
|
def get_group_info(server)
|
|
timedout = 0
|
|
errs = 0
|
|
resp = ""
|
|
first = ""
|
|
last = ""
|
|
begin
|
|
timeout(30) do
|
|
begin
|
|
resp, count, first, last, name = @connections[server]["nntp"].group(@group)
|
|
rescue Net::NNTPReplyError
|
|
printerr(server)
|
|
if ( $!.to_s =~ /^503|^400/ )
|
|
reconnect(server)
|
|
retry
|
|
else
|
|
raise PermError, "#{$!}"
|
|
end
|
|
rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError, Errno::EINVAL
|
|
printerr(server)
|
|
raise PermError, "Too many errors! (get_group_info)" if errs > 3
|
|
reconnect(server)
|
|
retry
|
|
end
|
|
end
|
|
rescue TimeoutError
|
|
timedout += 1
|
|
raise PermError, "Too many timeouts! (get_group_info)" if timedout > 1
|
|
print "Time out, reconnecting to server... (get_group_info)\n"
|
|
reconnect(server)
|
|
retry
|
|
end
|
|
return first, last
|
|
end
|
|
|
|
def get_xhdr(server, range, header)
|
|
timedout = 0
|
|
resp = ""
|
|
lines = []
|
|
begin
|
|
timeout(180) do
|
|
begin
|
|
p Time.now if Debuglevel > 1
|
|
print "getting headers: #{header}, #{range}\n" if Debuglevel > 1
|
|
resp, lines = @connections[server]["nntp"].xhdr(header, range)
|
|
if resp.to_i == 500
|
|
print "xhdr not implemented\n"
|
|
print "Error: #{$!}\n"
|
|
end
|
|
unless resp.to_i >= 200 and resp.to_i < 300
|
|
print "got response #{resp} while reading group #{@group} from #{server}\n"
|
|
raise TempError
|
|
end
|
|
rescue Net::NNTPReplyError
|
|
printerr(server)
|
|
if ( $!.to_s =~ /^503|^400/ )
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retry
|
|
else
|
|
print "Won't handle this... yet :(\n"
|
|
end
|
|
rescue Errno::EPIPE, Errno::ECONNRESET, EOFError
|
|
printerr(server)
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retry
|
|
end
|
|
end
|
|
return resp, lines
|
|
rescue TimeoutError
|
|
print "Time out, reconnecting to server (get_xhdr)\n"
|
|
timedout += 1
|
|
raise PermError, "Too many timeouts! (get_xhdr)" if timedout > 1
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retry
|
|
end
|
|
end
|
|
|
|
def get_xover(server, range)
|
|
timedout = 0
|
|
resp = ""
|
|
lines = []
|
|
start, ed = range.split("-")
|
|
unless ed
|
|
ed = start
|
|
end
|
|
begin
|
|
timeout(180) do
|
|
begin
|
|
p Time.now if Debuglevel > 1
|
|
print "getting headers: #{range}\n" if Debuglevel > 1
|
|
resp, lines = @connections[server]["nntp"].xover(start, ed)
|
|
if resp.to_i == 500
|
|
print "xover not implemented\n"
|
|
print "Error: #{$!}\n"
|
|
end
|
|
unless resp.to_i >= 200 and resp.to_i < 300
|
|
print "got response #{resp} while reading group #{@group} from #{server}\n"
|
|
raise TempError
|
|
end
|
|
rescue Net::NNTPReplyError
|
|
printerr(server)
|
|
if ( $!.to_s =~ /^503|^400/ )
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retry
|
|
else
|
|
print "Won't handle this... yet :(\n"
|
|
end
|
|
rescue Errno::EPIPE, Errno::ECONNRESET, EOFError
|
|
printerr(server)
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retry
|
|
end
|
|
end
|
|
return resp, lines
|
|
rescue TimeoutError
|
|
print "Time out, reconnecting to server (get_xover)\n"
|
|
timedout += 1
|
|
raise PermError, "Too many timeouts! (get_xover)" if timedout > 1
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retry
|
|
end
|
|
end
|
|
|
|
def get_groupname
|
|
return @group
|
|
end
|
|
|
|
def get_body(server, message)
|
|
#p "get_body"
|
|
timedout = 0
|
|
retries = 0
|
|
resp = ""
|
|
id = ""
|
|
messid = ""
|
|
list = []
|
|
begin
|
|
timeout(180) do
|
|
begin
|
|
list = []
|
|
resp, id, messid, list = @connections[server]["nntp"].body(message)
|
|
rescue Net::NNTPReplyError
|
|
a = ''
|
|
a += $!
|
|
printerr(server)
|
|
if retries == 0 && (a =~ /^503/ || a =~ /^400/)
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retries = 1
|
|
retry
|
|
end
|
|
return false
|
|
rescue EOFError, NameError
|
|
printerr(server)
|
|
return false
|
|
rescue Errno::EPIPE, Errno::ECONNRESET
|
|
printerr(server)
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retry
|
|
end
|
|
end
|
|
return resp, id, messid, list
|
|
rescue TimeoutError, Errno::ETIMEDOUT
|
|
print "Time out, reconnecting to server (get_body)\n"
|
|
timedout += 1
|
|
raise PermError, "Too many timeouts! (get_body)" if timedout > 1
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retry
|
|
end
|
|
end
|
|
|
|
def get_group_body(subj)
|
|
#p "get_group_body"
|
|
result = []
|
|
group_subject_sort(subj)
|
|
# puts @groups[subj].to_yaml
|
|
return false if @groups[subj]["messageinfo"] == nil
|
|
for i in (0...@groups[subj]["messageinfo"].length)
|
|
unless @gotten.has_key?(@groups[subj]["messageinfo"][i][:messid])
|
|
print "getting article: #{i}\n" if Debuglevel > 1
|
|
print "getting article: #{subj}\n" if Debuglevel > 1
|
|
print "full subject: #{@groups[subj]["messageinfo"][i][:subject]}\n" if Debuglevel > 0
|
|
print "message id: #{@groups[subj]["messageinfo"][i][:messid]}\n" if Debuglevel > 1
|
|
print "id: #{@groups[subj]["messageinfo"][i][:id]}\n" if Debuglevel > 1
|
|
print "server: #{@groups[subj]["messageinfo"][i][:server]}\n" if Debuglevel > 0
|
|
resp = false
|
|
while resp == false
|
|
if @serverlist.include?(@groups[subj]["messageinfo"][i][:server])
|
|
resp, id, messid, list = get_body(@groups[subj]["messageinfo"][i][:server], @groups[subj]["messageinfo"][i][:messid])
|
|
else
|
|
resp = false
|
|
end
|
|
if resp == false
|
|
if Debuglevel > 1
|
|
print "mess-id i: #{@groups[subj]["messageinfo"][i][:messid]}\n"
|
|
# XXX dit moet netter kunnen
|
|
print "mess-id i+1: #{@groups[subj]["messageinfo"][i+1][:messid]}\n" if @groups[subj]["messageinfo"][i+1] != nil
|
|
end
|
|
if (i+1 < @groups[subj]["messageinfo"].length) and
|
|
(@groups[subj]["messageinfo"][i][:messid] == @groups[subj]["messageinfo"][i+1][:messid])
|
|
print " Trying next server...\n"
|
|
i += 1
|
|
else
|
|
raise TempError, " Message-id not on another server"
|
|
end
|
|
end
|
|
end
|
|
@gotten[ @groups[subj]["messageinfo"][i][:messid] ] = true
|
|
result = list
|
|
end
|
|
end
|
|
return result
|
|
end
|
|
|
|
def get_group_body_first(subj)
|
|
#p "get_group_body_first"
|
|
group_subject_sort(subj)
|
|
i = 0
|
|
unless @groups[subj]["messageinfo"] != nil && @groups[subj]["messageinfo"][0][:messid]
|
|
p "ieks komt niet door lame check heen"
|
|
return false
|
|
end
|
|
p "komt wel door lame check heen"
|
|
while @gotten.has_key?(@groups[subj]["messageinfo"][0][:messid]) == false
|
|
print "getting article: #{subj}\n" if Debuglevel > 0
|
|
print "full subject: #{@groups[subj]['messageinfo'][0][:subject]}\n" if Debuglevel > 0
|
|
print "message id: #{@groups[subj]['messageinfo'][i][:messid]}\n" if Debuglevel > 1
|
|
print "id: #{@groups[subj]['messageinfo'][i][:id]}\n" if Debuglevel > 1
|
|
print "server: #{@groups[subj]['messageinfo'][0][:server]}\n" if Debuglevel > 0
|
|
resp = false
|
|
while resp == false
|
|
resp, id, messid, list = get_body(@groups[subj]["messageinfo"][i][:server], @groups[subj]["messageinfo"][i][:messid])
|
|
if resp == false
|
|
print "mess-id i: #{@groups[subj]['messageinfo'][i][:messid]}\n"
|
|
# XXX dit moet netter kunnen
|
|
print "mess-id i+1: #{@groups[subj]['messageinfo'][i+1][:messid]}\n" if @groups[subj]["messageinfo"][i+1] != nil
|
|
if (i+1 < @groups[subj]["messageinfo"].length) and
|
|
(@groups[subj]["messageinfo"][i][:messid] == @groups[subj]["messageinfo"][i+1][:messid])
|
|
print "Trying next server...\n"
|
|
i += 1
|
|
else
|
|
raise TempError, "Message-id not on another server"
|
|
end
|
|
end
|
|
end
|
|
@gotten[@groups[subj]["messageinfo"][i][:messid]] = true
|
|
end
|
|
return list
|
|
end
|
|
|
|
def get_group_body_rest(subj, file=nil)
|
|
#p "get_group_body_rest"
|
|
result = []
|
|
for i in (1...@groups[subj]["messageinfo"].length)
|
|
unless @gotten.has_key?(@groups[subj]["messageinfo"][i][:messid])
|
|
print "getting article: #{i}\n" if Debuglevel > 1
|
|
print "getting article: #{subj}\n" if Debuglevel > 1
|
|
print "full subject: #{@groups[subj]['messageinfo'][i][:subject]}\n" if Debuglevel > 0
|
|
print "message id: #{@groups[subj]['messageinfo'][i][:messid]}\n" if Debuglevel > 1
|
|
print "id: #{@groups[subj]['messageinfo'][i][:id]}\n" if Debuglevel > 1
|
|
print "server: #{@groups[subj]['messageinfo'][i][:server]}\n" if Debuglevel > 0
|
|
resp = false
|
|
while resp == false
|
|
resp, id, messid, list = get_body(@groups[subj]["messageinfo"][i][:server], @groups[subj]["messageinfo"][i][:messid])
|
|
if resp == false
|
|
print "mess-id i: #{@groups[subj]["messageinfo"][i][:messid]}\n"
|
|
# print "mess-id i+1: #{@groups[subj]["messageinfo"][i+1][:messid]}\n"
|
|
# XXX dit moet netter kunnen
|
|
print "mess-id i+1: #{@groups[subj]["messageinfo"][i+1][:messid]}\n" if @groups[subj]["messageinfo"][i+1] != nil
|
|
if (i+1 < @groups[subj]["messageinfo"].length) and
|
|
(@groups[subj]["messageinfo"][i][:messid] == @groups[subj]["messageinfo"][i+1][:messid])
|
|
print "Trying next server...\n"
|
|
i += 1
|
|
else
|
|
raise TempError, "Message-id not on another server"
|
|
end
|
|
end
|
|
end
|
|
@gotten[ @groups[subj]["messageinfo"][i][:messid] ] = true
|
|
if file
|
|
list.collect{|line| file.print "#{line}\n"}
|
|
else
|
|
result.concat(list)
|
|
end
|
|
end
|
|
end
|
|
return result
|
|
end
|
|
|
|
def get_group_subjects
|
|
group_subjects unless @grouped
|
|
return @groups.keys
|
|
end
|
|
|
|
def group_is_complete(subj)
|
|
group_subjects unless @grouped
|
|
#print "Subject: #{subj}\n"
|
|
print "length: #{@groups[subj]["messageinfo"].length} total: #{@groups[subj]["total"].to_i}\n" if Debuglevel > 1
|
|
messids = []
|
|
@groups[subj]["messageinfo"].each {|x|
|
|
messids.push(x[:messid])
|
|
}
|
|
#p "group complete?: #{messids}"
|
|
umessids = messids.uniq
|
|
if (umessids.length ) >= @groups[subj]["total"].to_i
|
|
return true
|
|
else
|
|
return false
|
|
end
|
|
end
|
|
|
|
def group_is_singlepart(subj)
|
|
@groups[subj]["total"].to_i == 1
|
|
end
|
|
|
|
def group_is_multipart(subj)
|
|
@groups[subj]["total"].to_i > 1
|
|
end
|
|
|
|
def group_subjects
|
|
@groups = {}
|
|
for i in (0...@messageinfo.length)
|
|
print "group subjects: #{i} #{@messageinfo[i][:subject]}\n" if Debuglevel > 3
|
|
if @messageinfo[i][:subject] =~ /(.*)\((\d+)\/(\d+)\)(.*)/ || @messageinfo[i][:subject] =~ /(.*)\[(\d+)\/(\d+)\](.*)/
|
|
j = "#{$1}#{$4} (#{$3})"
|
|
number = $2
|
|
total = $3
|
|
else
|
|
j = @messageinfo[i][:subject]
|
|
number = 1
|
|
total = 1
|
|
end
|
|
if @groups.has_key?(j) and number.to_i != 0
|
|
@groups[j]["messageinfo"].push(@messageinfo[i])
|
|
elsif number.to_i != 0
|
|
@groups[j] = {}
|
|
@groups[j]["total"] = total
|
|
@groups[j]["messageinfo"] = [ (@messageinfo[i]) ]
|
|
end
|
|
end
|
|
@grouped = true
|
|
end
|
|
|
|
def set_skip_ids(server, ids)
|
|
set = Set::IntSpan.new(ids)
|
|
set.finite or return false
|
|
min = set.min
|
|
min != nil and min < 0 and return false
|
|
@connections[server]["skip_ids"] = set
|
|
return true
|
|
end
|
|
|
|
def group_update_newsrc(subject)
|
|
print "running group_update_newsrc\n";
|
|
for i in (0...@groups[subject]["messageinfo"].length)
|
|
if @connections[@groups[subject]["messageinfo"][i][:server]]
|
|
@connections[@groups[subject]["messageinfo"][i][:server]]["newsrc"].mark(@group, @groups[subject]["messageinfo"][i][:id])
|
|
end
|
|
end
|
|
end
|
|
|
|
def save_newsrc()
|
|
for server in @connections.keys
|
|
@connections[server]["newsrc"].save
|
|
end
|
|
end
|
|
|
|
def cache_add(cachedir, id, messid, subject, server)
|
|
if @cache_buf.has_key?(server)
|
|
@cache_buf[server].push("#{id}|#{messid}|#{subject}\n")
|
|
else
|
|
@cache_buf[server] = [ "#{id}|#{messid}|#{subject}\n" ]
|
|
end
|
|
if @cache_buf[server].length > 100
|
|
cache_save(cachedir, server)
|
|
end
|
|
end
|
|
|
|
def cache_check(cachedir)
|
|
if ! FileTest.exists?(cachedir)
|
|
print "Cachedir '#{cachedir}' doesn't exists, performance will suffer\n"
|
|
end
|
|
end
|
|
|
|
def cache_read(cachedir)
|
|
p "reading cache"
|
|
p Time.now
|
|
filename = "#{cachedir}/#{@group}.ripnewscache"
|
|
excludes = {}
|
|
for server in @connections.keys
|
|
cache_scrub(cachedir, server)
|
|
excludes[server] = {}
|
|
@connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true}
|
|
if FileTest.directory?( cachedir) and FileTest.file?( "#{filename}.#{server}" ) and FileTest.readable?( "#{filename}.#{server}" )
|
|
File.new( "#{filename}.#{server}" ).each{ |line|
|
|
id, messid, subject = line.split("|", 3)
|
|
unless excludes.has_key?(server) and excludes[server].has_key?(id.to_i) or
|
|
id.to_i < @connections[server]["first"] or
|
|
id.to_i > @connections[server]["last"]
|
|
if preselect(subject)
|
|
add(id, messid, subject, server)
|
|
end
|
|
@connections[server]["skip_ids"].insert(id.to_i)
|
|
end
|
|
}
|
|
end
|
|
end
|
|
p Time.now
|
|
#memusage
|
|
end
|
|
|
|
def cache_save(cachedir, server)
|
|
#p "writing cache"
|
|
#p Time.now
|
|
filename = "#{cachedir}/#{@group}.ripnewscache"
|
|
if FileTest.directory?( cachedir )
|
|
file = File.new( "#{filename}.#{server}", "a+" ) or print "couldn't open cachefile for writing\n"
|
|
# print "Updating cache...\n"
|
|
@cache_buf[server].sort!
|
|
file.print @cache_buf[server]
|
|
file.close
|
|
@cache_buf[server] = []
|
|
# print "Cache updated for #{server}\n"
|
|
end
|
|
#p Time.now
|
|
end
|
|
|
|
def cache_scrub(cachedir, server)
|
|
# XXX this could and probably should be done in a separate thread...
|
|
# XXX but it'll work for now
|
|
# XXX also read articles aren't removed right now
|
|
# XXX this could be done, but I don't know if I want to pay the overhead
|
|
p "scrubbing cache"
|
|
p Time.now
|
|
filename = "#{cachedir}/#{@group}.ripnewscache"
|
|
if File.exists?("#{filename}.#{server}")
|
|
regexp = Regexp.new('^(\d+)\|')
|
|
infile = File.new("#{filename}.#{server}") or puts "Couldn't open cachefile for reading"
|
|
outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing"
|
|
infile.each{ |line|
|
|
if line =~ regexp
|
|
if $1.to_i >= @connections[server]["first"] and
|
|
$1.to_i <= @connections[server]["last"]
|
|
outfile.puts(line)
|
|
end
|
|
end
|
|
}
|
|
if ( File.move("#{filename}.#{server}.new", "#{filename}.#{server}") )
|
|
print "Cache scrubbed for #{server}\n"
|
|
else
|
|
print "Couldn't scrub #{server} cache\n"
|
|
end
|
|
end
|
|
p Time.now
|
|
end
|
|
|
|
###############################################################
|
|
|
|
# a base64 decoder...
|
|
def decode64(str)
|
|
string = ''
|
|
for line in str.split("\n")
|
|
line.delete!('^A-Za-z0-9+') # remove non-base64 chars
|
|
line.tr!('A-Za-z0-9+', ' -_') # convert to uuencoded format
|
|
len = ["#{32 + line.length * 3 / 4}"].pack("c")
|
|
# compute length byte
|
|
string += "#{len}#{line}".unpack("u") # uudecode and concatenate
|
|
end
|
|
return string
|
|
end
|
|
|
|
###############################################################
|
|
|
|
def group_subject_sort(subj)
|
|
# XXX Waarom gebruik ik hier eigenlijk sort_arr ipv in place sorting?
|
|
#print "Sorting articles\n"
|
|
serverhash = {}
|
|
for i in (0...@serverlist.length)
|
|
serverhash[@serverlist[i]] = i
|
|
end
|
|
total = @groups[subj]["total"]
|
|
sort_arr = []
|
|
#p "pre sort length: #{@groups[subj]['messageinfo'].length}"
|
|
for i in (0...@groups[subj]["messageinfo"].length)
|
|
print "subj sort #{@groups[subj]['messageinfo'][i][:subject]}\n" if Debuglevel > 2
|
|
print "subj sort #{@groups[subj]['messageinfo'][i][:messid]}\n" if Debuglevel > 2
|
|
print "subj sort #{@groups[subj]['messageinfo'][i][:id]}\n" if Debuglevel > 2
|
|
print "subj sort #{@groups[subj]['messageinfo'][i][:server]}\n" if Debuglevel > 2
|
|
sort_arr.push(
|
|
@groups[subj]["messageinfo"][i].dup
|
|
) if serverhash[@groups[subj]["messageinfo"][i][:server]] != nil
|
|
end
|
|
|
|
#p "sort_arr length pre sort: #{sort_arr.length}"
|
|
if sort_arr.length != 0
|
|
sort_arr.sort!{|a,b|
|
|
r = ward_sort(a[:subject], b[:subject])
|
|
if serverhash[a[:server]] == nil or serverhash[b[:server]] == nil
|
|
print "serverhash[a[:server]]: #{serverhash[a[:server]]}\n"
|
|
print "serverhash[b[:server]]: #{serverhash[b[:server]]}\n"
|
|
print "a[:server]: #{a[:server]}\n"
|
|
print "b[:server]: #{a[:server]}\n"
|
|
print "strange things going on here...\n"
|
|
end
|
|
if r == 0
|
|
r = serverhash[a[:server]] <=> serverhash[b[:server]]
|
|
end
|
|
r
|
|
}
|
|
end
|
|
|
|
@groups[subj].clear
|
|
@groups[subj]["total"] = total
|
|
#p "sort_arr length post sort: #{sort_arr.length}"
|
|
sort_arr.collect{|i|
|
|
if @groups[subj].has_key?("messageinfo")
|
|
@groups[subj]["messageinfo"].push(i)
|
|
else
|
|
@groups[subj]["messageinfo"] = [ i ]
|
|
end
|
|
print "subject sort: #{i[:subject]}\n" if Debuglevel > 2
|
|
print "server: #{i[:server]}\n" if Debuglevel > 2
|
|
}
|
|
#if ! @groups[subj]['messageinfo'].nil?
|
|
# p "post sort length: #{@groups[subj]['messageinfo'].length}"
|
|
#end
|
|
#print "Done sorting\n"
|
|
end
|
|
|
|
def ward_sort(a, b)
|
|
c = a.to_s.split(/([0-9]+)/)
|
|
d = b.to_s.split(/([0-9]+)/)
|
|
|
|
c.collect{|x|
|
|
y = d.shift
|
|
r = ((x.to_s =~ /^[0-9]+$/) && (y.to_s =~ /^[0-9]+$/)) ?
|
|
(x.to_i <=> y.to_i) :
|
|
(x.to_s <=> y.to_s)
|
|
if r != 0
|
|
return r
|
|
end
|
|
}
|
|
return -1 if (d != [])
|
|
return 0
|
|
end
|
|
|
|
def rechunk_runlist(runlist)
|
|
return nil if runlist == nil
|
|
chunksize = 500
|
|
blalist = runlist.split(',')
|
|
|
|
# hmmm, als het aantal articles wat tussen de komma's ligt < pak um beet 3
|
|
# dan is het volgens mij heel erg de moeite die 3 ook gewoon binnen te halen
|
|
# en minder network requests te doen...
|
|
# de manier om dat te doen is dan iets van die komma weghalen en
|
|
# een van de 2 getallen...
|
|
|
|
blalist.collect!{|x|
|
|
result = ""
|
|
if x =~ /(.*)-(.*)/
|
|
a = $1
|
|
while ($2.to_i - a.to_i) > chunksize
|
|
result << "#{a}-#{a.to_i+(chunksize-1)},"
|
|
a = a.to_i + chunksize
|
|
end
|
|
result << "#{a}-#{$2}"
|
|
else
|
|
x
|
|
end
|
|
}
|
|
blup = blalist.join(",")
|
|
return blup
|
|
end
|
|
|
|
def printerr(server)
|
|
print "Caught #{$!.class} reading from server #{server} (#{caller[0]})\n"
|
|
print "Error: #{$!}\n"
|
|
end
|
|
|
|
def disconnect
|
|
for server in @connections.keys
|
|
begin
|
|
@connections[server]["nntp"].quit
|
|
rescue Errno::EPIPE, Errno::ECONNRESET, EOFError, IOError
|
|
end
|
|
end
|
|
end
|
|
|
|
def quit
|
|
# just testing if these should be reset...
|
|
@messageinfo = []
|
|
disconnect
|
|
end
|
|
|
|
private :ward_sort
|
|
|
|
end # class
|