1088 lines
30 KiB
Ruby
1088 lines
30 KiB
Ruby
# $Dwarf: article.rb,v 1.114 2005/05/12 07:39:53 ward Exp $
|
|
# $Source$
|
|
|
|
#
|
|
# Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Ward Wouts <ward@wouts.nl>
|
|
#
|
|
# Permission to use, copy, modify, and distribute this software for any
|
|
# purpose with or without fee is hereby granted, provided that the above
|
|
# copyright notice and this permission notice appear in all copies.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
#
|
|
|
|
require Pathname.new(__FILE__).dirname + '../set/intspan'
|
|
require Pathname.new(__FILE__).dirname + '../net/nntp'
|
|
require Pathname.new(__FILE__).dirname + '../news/newsrc'
|
|
require 'tempfile'
|
|
require 'timeout'
|
|
#require 'yaml'
|
|
require 'profiler'
|
|
|
|
class ArticleError < RuntimeError; end
|
|
class TempError < ArticleError; end
|
|
class PermError < ArticleError; end
|
|
|
|
module Net
|
|
class KANNTP < Net::NNTP
|
|
|
|
def initialize(host, port=nil, user=nil, password=nil, readermode=nil)
|
|
@host = host
|
|
@semaphore = Mutex.new
|
|
@resettime = 60
|
|
@timecounter = @resettime
|
|
@thr = Thread.new{
|
|
Thread.pass
|
|
while true
|
|
# puts "timecounter #{@timecounter} #{@host}"
|
|
if @timecounter > 0
|
|
@timecounter -= 5
|
|
sleep 5
|
|
else
|
|
sendka
|
|
sleep 5
|
|
end
|
|
end
|
|
}
|
|
super
|
|
end
|
|
|
|
def putline(line)
|
|
# puts "timerreset #{@host}"
|
|
@timecounter = @resettime
|
|
super
|
|
end
|
|
|
|
def longcmd(line)
|
|
@semaphore.synchronize{
|
|
return super
|
|
}
|
|
end
|
|
|
|
def shortcmd(line)
|
|
@semaphore.synchronize{
|
|
return super
|
|
}
|
|
end
|
|
|
|
def setresettime(time)
|
|
@resettime = time
|
|
end
|
|
|
|
def sendka
|
|
# puts "SENDING KEEP ALIVE TO #{@host}"
|
|
res = shortcmd("DATE")
|
|
# puts res
|
|
end
|
|
|
|
def quit
|
|
@thr.exit
|
|
begin
|
|
super
|
|
rescue EOFError, Errno::EPIPE, IOError
|
|
end
|
|
end
|
|
|
|
private :sendka
|
|
|
|
end # class KANNTP
|
|
end # module Net
|
|
|
|
############################################################
|
|
|
|
class Article
|
|
|
|
Debuglevel = 0
|
|
|
|
Message = Struct.new(:messid, :id, :date, :from, :server, :subject)
|
|
|
|
def initialize(nntpservers, groupname, newsrc="~/.newsrc", maxage=0)
|
|
@messageinfo = []
|
|
|
|
@grouped = false
|
|
@groups = {}
|
|
@gotten = {}
|
|
@group = groupname
|
|
@preselectpattern = Regexp.new('^')
|
|
@cache_buf = {}
|
|
@serverlist = []
|
|
@serverpasses = {}
|
|
@maxage = maxage.to_i
|
|
|
|
tmplist = nntpservers.split('|')
|
|
tmplist.each{ |server|
|
|
if server.match(/(.*)@([^@]*)$/)
|
|
userpass = $1
|
|
server = $2
|
|
@serverlist.push(server)
|
|
@serverpasses[server] = {}
|
|
if userpass.match(/([^:]*):(.*)/)
|
|
@serverpasses[server]['user'] = $1
|
|
@serverpasses[server]['pass'] = $2
|
|
else
|
|
@serverpasses[server]['user'] = userpass
|
|
@serverpasses[server]['pass'] = nil
|
|
end
|
|
else
|
|
@serverlist.push(server)
|
|
@serverpasses[server] = {}
|
|
@serverpasses[server]['user'] = nil
|
|
@serverpasses[server]['pass'] = nil
|
|
end
|
|
}
|
|
#p @serverlist
|
|
#p @serverpasses
|
|
#@serverlist = nntpservers.split('|')
|
|
|
|
@connections = {}
|
|
@serverlist.collect{|server|
|
|
@connections[server] = {}
|
|
@cache_buf[server] = []
|
|
begin
|
|
# p server
|
|
# p Time.now
|
|
begin
|
|
timeout(60) do
|
|
#p "connecting"
|
|
@connections[server]["nntp"] = Net::KANNTP.new(server, 119, @serverpasses[server]['user'], @serverpasses[server]['pass'])
|
|
end
|
|
resp = @connections[server]["nntp"].mode_reader
|
|
#p resp
|
|
rescue TimeoutError, Errno::ECONNRESET
|
|
sleep 3
|
|
retry
|
|
end
|
|
# p Time.now
|
|
@connections[server]["skip_ids"] = Set::IntSpan.new()
|
|
@connections[server]["newsrc"] = News::Newsrc.new("#{newsrc}.#{server}")
|
|
set_skip_ids(server, @connections[server]["newsrc"].marked_articles(@group))
|
|
rescue SocketError, Errno::EINVAL, EOFError, Errno::ETIMEDOUT
|
|
puts "Connection to #{server} failed: #{$!}"
|
|
if ! @connections[server]["nntp"].nil?
|
|
@connections[server]["nntp"].quit
|
|
end
|
|
del_server(server)
|
|
end
|
|
}
|
|
end
|
|
|
|
def reconnect(server)
|
|
retries = 0
|
|
begin
|
|
puts "Trying to kill old connection #{Time.now}"
|
|
timeout(10) do
|
|
@connections[server]["nntp"].quit
|
|
end
|
|
puts "Killed old connection #{Time.now}"
|
|
rescue TimeoutError
|
|
puts "Timeout killing old connection"
|
|
rescue Errno::EPIPE, Errno::ECONNRESET, EOFError, Errno::ETIMEDOUT
|
|
end
|
|
begin
|
|
puts "Trying to reconnect #{Time.now}"
|
|
sleep 3
|
|
#timeout(180) do
|
|
timeout(60) do
|
|
@connections[server]["nntp"] = Net::KANNTP.new(server, 119, @serverpasses[server]['user'], @serverpasses[server]['pass'])
|
|
end
|
|
resp = @connections[server]["nntp"].mode_reader
|
|
rescue SocketError, Errno::EINVAL, EOFError, Errno::ETIMEDOUT, TimeoutError, Errno::ECONNREFUSED
|
|
puts "Reconnect to #{server} failed: #{$!}"
|
|
if retries > 1
|
|
del_server(server)
|
|
raise PermError, "Couldn't connect to #{server}"
|
|
else
|
|
retries += 1
|
|
retry
|
|
end
|
|
end
|
|
puts "Succesfully reconnected to #{server}"
|
|
end
|
|
|
|
def memusage
|
|
puts "memprof:"
|
|
puts "global:"
|
|
# for i in global_variables
|
|
# print "#{i}\n"
|
|
# end
|
|
# print "local:\n"
|
|
# for i in local_variables
|
|
# print "#{i}\n"
|
|
# end
|
|
self.instance_variables.each{|i|
|
|
puts i
|
|
print "X: "
|
|
begin
|
|
puts self.instance_eval(i).size
|
|
rescue NoMethodError
|
|
end
|
|
}
|
|
end
|
|
|
|
def set_preselect_pattern(regexp)
|
|
@preselectpattern = Regexp.new(regexp)
|
|
end
|
|
|
|
def preselect(subject)
|
|
if subject =~ @preselectpattern
|
|
return true
|
|
else
|
|
return false
|
|
end
|
|
# return ( subject =~ @preselectpattern )
|
|
end
|
|
|
|
def add(id, messid, date, from, subject, server)
|
|
@messageinfo.push(Message.new(messid, id.to_i, date.to_i, from, server, subject))
|
|
@grouped = false
|
|
end
|
|
|
|
def del_server(server)
|
|
puts "Removing server #{server} from list"
|
|
@connections.delete(server)
|
|
@serverlist.delete(server)
|
|
end
|
|
|
|
def get_articles(cachedir=false)
|
|
if cachedir != false
|
|
cache_check(cachedir)
|
|
end
|
|
@connections.keys.each{|server|
|
|
begin
|
|
first, last = get_group_info(server)
|
|
rescue PermError
|
|
puts "Error: #{$!}"
|
|
del_server(server)
|
|
next
|
|
end
|
|
if first.to_i <= last.to_i
|
|
# available articles on server
|
|
# oudste
|
|
@connections[server]["first"] = first ? first.to_i : 0
|
|
# nieuwste
|
|
@connections[server]["last"] = last ? last.to_i : 0
|
|
if Debuglevel > 0
|
|
puts " Server: #{server} First: #{first} Last: #{last}"
|
|
end
|
|
# clean up old newsrc entries
|
|
if @connections[server]["first"] > 0
|
|
@connections[server]["newsrc"].unmark_range(@group, 0, (@connections[server]["first"] - 1).to_s)
|
|
@connections[server]["newsrc"].save_group(@group)
|
|
end
|
|
else
|
|
puts " First article has higher number than last article on server #{server}."
|
|
del_server(server)
|
|
end
|
|
}
|
|
cache_read(cachedir)
|
|
|
|
# spul dat echt te oud is gaat nooit gevuld worden, dus doe ook geen poging het op te halen
|
|
# wil wel wat ophalen aangezien logging aantoont dat er wel oudere articles gedownload worden
|
|
@connections.keys.each{|server|
|
|
if @connections[server]["skip_ids"].max && @connections[server]["skip_ids"].max < @connections[server]["last"]
|
|
cnt_articles = @connections[server]["last"] - @connections[server]["first"]
|
|
if cnt_articles > 10000
|
|
fillerend = (@connections[server]["skip_ids"].max - (cnt_articles/5)).to_i
|
|
else
|
|
fillerend = @connections[server]["skip_ids"].max - 2000
|
|
end
|
|
if @connections[server]["skip_ids"].min && fillerend > @connections[server]["skip_ids"].min
|
|
@connections[server]["skip_ids"] = @connections[server]["skip_ids"].union("#{@connections[server]["skip_ids"].min}-#{fillerend}")
|
|
end
|
|
end
|
|
}
|
|
|
|
@connections.keys.each{|server|
|
|
puts " reading articles from server: #{server}"
|
|
range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}")
|
|
|
|
rangelist = rechunk_runlist(range.diff(@connections[server]["skip_ids"]).run_list)
|
|
|
|
puts "rangelist: #{rangelist}" if Debuglevel > 2
|
|
puts "rangelist: #{rangelist.class.to_s}" if Debuglevel > 2
|
|
puts "rangelist elements: #{range.diff(@connections[server]["skip_ids"]).elements}" if Debuglevel > 2
|
|
begin
|
|
unless rangelist == nil or rangelist =~ /^$/
|
|
lastdate = DateTime.now
|
|
# nieuwste eerst. Dat maakt het mogelijk om op te houden bij te oude datum.
|
|
rangelist.split(',').reverse.each{|i|
|
|
puts "i: #{i}" if Debuglevel > 2
|
|
begin
|
|
resp, xover_lines = get_xover(server, i)
|
|
rescue TempError, EOFError
|
|
printerr(server)
|
|
next
|
|
end
|
|
|
|
art = {}
|
|
xover_lines.collect{|x|
|
|
id = x[0]
|
|
subj = x[1]
|
|
auth = x[2]
|
|
date = x[3]
|
|
messid = x[4]
|
|
|
|
art[id] = {} unless art.has_key?(id)
|
|
|
|
begin
|
|
lastdate = DateTime.parse(date)
|
|
art[id]["date"] = DateTime.parse(date).strftime('%Y%m%d')
|
|
rescue
|
|
puts $!.message
|
|
puts id
|
|
art[id]["date"] = Time.now.strftime('%Y%m%d')
|
|
end
|
|
art[id]["subject"] = x[1]
|
|
art[id]["messid"] = messid
|
|
art[id]["from"] = auth
|
|
|
|
puts "art id: #{id} subj: #{subj}" if Debuglevel > 2
|
|
puts "art id: #{id} from: #{auth}" if Debuglevel > 2
|
|
puts "art id: #{id} date: #{date}" if Debuglevel > 2
|
|
puts "art id: #{id} messid: #{messid}" if Debuglevel > 2
|
|
|
|
} # xover_lines.collect
|
|
|
|
art.keys.each{|id|
|
|
if art[id].has_key?("date") and art[id].has_key?("subject") and art[id].has_key?("messid") and art[id].has_key?("from")
|
|
puts "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["from"]}, #{art[id]["subject"]}" if Debuglevel > 2
|
|
# dit wellicht alleen doen indien preselector hem uitkiest
|
|
# en anders een leuk regeltje aan de cache toevoegen,
|
|
# maar niet in het geheugen houden
|
|
if preselect(art[id]["subject"])
|
|
add(id.to_i, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server)
|
|
end
|
|
cache_add(cachedir, id, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server)
|
|
end
|
|
}
|
|
if @maxage and @maxage > 0
|
|
if lastdate < ( DateTime.now - @maxage )
|
|
puts "Skipping articles older than #{DateTime.now - @maxage}"
|
|
break
|
|
end
|
|
end
|
|
}
|
|
end
|
|
rescue PermError
|
|
del_server(server)
|
|
next
|
|
end
|
|
cache_save(cachedir, server)
|
|
}
|
|
GC.start
|
|
end
|
|
|
|
def get_group_info(server)
|
|
timedout = 0
|
|
errs = 0
|
|
resp = ""
|
|
first = ""
|
|
last = ""
|
|
begin
|
|
timeout(30) do
|
|
begin
|
|
resp, count, first, last, name = @connections[server]["nntp"].group(@group)
|
|
rescue Net::NNTPReplyError
|
|
printerr(server)
|
|
if ( $!.to_s =~ /^503|^400/ )
|
|
reconnect(server)
|
|
retry
|
|
else
|
|
raise PermError, "#{$!}"
|
|
end
|
|
rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError, Errno::EINVAL
|
|
printerr(server)
|
|
raise PermError, "Too many errors! (get_group_info)" if errs > 3
|
|
reconnect(server)
|
|
retry
|
|
end
|
|
end
|
|
rescue TimeoutError
|
|
timedout += 1
|
|
raise PermError, "Too many timeouts! (get_group_info)" if timedout > 1
|
|
puts "Time out, reconnecting to server... (get_group_info)"
|
|
reconnect(server)
|
|
retry
|
|
end
|
|
return first, last
|
|
end
|
|
|
|
def get_xhdr(server, range, header)
|
|
timedout = 0
|
|
attempts = 0
|
|
resp = ""
|
|
lines = []
|
|
begin
|
|
timeout(180) do
|
|
begin
|
|
p Time.now if Debuglevel > 1
|
|
puts "getting headers: #{header}, #{range}" if Debuglevel > 1
|
|
resp, lines = @connections[server]["nntp"].xhdr(header, range)
|
|
if resp.to_i == 500
|
|
puts "xhdr not implemented"
|
|
puts "Error: #{$!}"
|
|
end
|
|
unless resp.to_i >= 200 and resp.to_i < 300
|
|
puts "got response #{resp} while reading group #{@group} from #{server}"
|
|
raise TempError
|
|
end
|
|
rescue Net::NNTPReplyError
|
|
printerr(server)
|
|
if ( $!.to_s =~ /^503|^400/ )
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retry
|
|
else
|
|
puts "Won't handle this... yet :("
|
|
end
|
|
#rescue Errno::EPIPE, Errno::ECONNRESET, EOFError
|
|
rescue Errno::EPIPE, Errno::ECONNRESET
|
|
printerr(server)
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
attempts += 1
|
|
if attempts < 2
|
|
retry
|
|
else
|
|
printerr "giving up"
|
|
return
|
|
end
|
|
end
|
|
end
|
|
return resp, lines
|
|
rescue TimeoutError
|
|
puts "Time out, reconnecting to server (get_xhdr)"
|
|
timedout += 1
|
|
raise PermError, "Too many timeouts! (get_xhdr)" if timedout > 1
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retry
|
|
end
|
|
end
|
|
|
|
def get_xover(server, range)
|
|
timedout = 0
|
|
resp = ""
|
|
lines = []
|
|
start, ed = range.split("-")
|
|
unless ed
|
|
ed = start
|
|
end
|
|
begin
|
|
timeout(180) do
|
|
begin
|
|
p Time.now if Debuglevel > 1
|
|
puts "getting headers: #{range}" if Debuglevel > 1
|
|
resp, lines = @connections[server]["nntp"].xover(start, ed)
|
|
if resp.to_i == 500
|
|
puts "xover not implemented"
|
|
puts "Error: #{$!}"
|
|
end
|
|
unless resp.to_i >= 200 and resp.to_i < 300
|
|
puts "got response #{resp} while reading group #{@group} from #{server}"
|
|
raise TempError
|
|
end
|
|
rescue Net::NNTPReplyError
|
|
printerr(server)
|
|
if ( $!.to_s =~ /^503|^400/ )
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retry
|
|
else
|
|
puts "Won't handle this... yet :("
|
|
end
|
|
rescue Errno::EPIPE, Errno::ECONNRESET, EOFError
|
|
printerr(server)
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retry
|
|
rescue Net::NNTPDataError
|
|
printerr(server)
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retry
|
|
end
|
|
end
|
|
return resp, lines
|
|
rescue TimeoutError
|
|
puts "Time out, reconnecting to server (get_xover)"
|
|
timedout += 1
|
|
raise PermError, "Too many timeouts! (get_xover)" if timedout > 1
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retry
|
|
end
|
|
end
|
|
|
|
def get_groupname
|
|
return @group
|
|
end
|
|
|
|
def get_body(server, message)
|
|
#p "get_body"
|
|
timedout = 0
|
|
retries = 0
|
|
resp = ""
|
|
id = ""
|
|
messid = ""
|
|
list = []
|
|
begin
|
|
timeout(180) do
|
|
begin
|
|
list = []
|
|
resp, id, messid, list = @connections[server]["nntp"].body(message)
|
|
rescue Net::NNTPReplyError
|
|
a = ''
|
|
a += $!.to_s
|
|
printerr(server)
|
|
if retries == 0 && (a =~ /^503/ || a =~ /^400/)
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retries = 1
|
|
retry
|
|
end
|
|
return false
|
|
rescue EOFError, NameError
|
|
printerr(server)
|
|
return false
|
|
rescue Errno::EPIPE, Errno::ECONNRESET
|
|
printerr(server)
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retry
|
|
end
|
|
end
|
|
return resp, id, messid, list
|
|
rescue TimeoutError, Errno::ETIMEDOUT
|
|
puts "Time out, reconnecting to server (get_body)"
|
|
timedout += 1
|
|
raise PermError, "Too many timeouts! (get_body)" if timedout > 1
|
|
reconnect(server)
|
|
get_group_info(server)
|
|
retry
|
|
end
|
|
end
|
|
|
|
def get_group_body(subj)
|
|
#p "get_group_body"
|
|
result = []
|
|
group_subject_sort(subj)
|
|
# puts @groups[subj].to_yaml
|
|
return false if @groups[subj]["messageinfo"] == nil
|
|
(0...@groups[subj]["messageinfo"].length).each{|i|
|
|
unless @gotten.has_key?(@groups[subj]["messageinfo"][i][:messid])
|
|
puts "getting article: #{i}" if Debuglevel > 1
|
|
puts "getting article: #{subj}" if Debuglevel > 1
|
|
puts "full subject: #{@groups[subj]["messageinfo"][i][:subject]}" if Debuglevel > 0
|
|
puts "message id: #{@groups[subj]["messageinfo"][i][:messid]}" if Debuglevel > 1
|
|
puts "id: #{@groups[subj]["messageinfo"][i][:id]}" if Debuglevel > 1
|
|
puts "from: #{@groups[subj]["messageinfo"][i][:from]}" if Debuglevel > 1
|
|
puts "server: #{@groups[subj]["messageinfo"][i][:server]}" if Debuglevel > 0
|
|
resp = false
|
|
while resp == false
|
|
if @serverlist.include?(@groups[subj]["messageinfo"][i][:server])
|
|
resp, id, messid, list = get_body(@groups[subj]["messageinfo"][i][:server], @groups[subj]["messageinfo"][i][:messid])
|
|
else
|
|
resp = false
|
|
end
|
|
if resp == false
|
|
if Debuglevel > 1
|
|
puts "mess-id i: #{@groups[subj]["messageinfo"][i][:messid]}"
|
|
# XXX dit moet netter kunnen
|
|
puts "mess-id i+1: #{@groups[subj]["messageinfo"][i+1][:messid]}" if @groups[subj]["messageinfo"][i+1] != nil
|
|
end
|
|
if (i+1 < @groups[subj]["messageinfo"].length) and
|
|
(@groups[subj]["messageinfo"][i][:messid] == @groups[subj]["messageinfo"][i+1][:messid])
|
|
puts " Trying next server..."
|
|
i += 1
|
|
else
|
|
raise TempError, " Message-id not on another server"
|
|
end
|
|
end
|
|
end
|
|
@gotten[ @groups[subj]["messageinfo"][i][:messid] ] = true
|
|
result = list
|
|
end
|
|
}
|
|
return result
|
|
end
|
|
|
|
def get_group_body_first(subj)
|
|
#p "get_group_body_first"
|
|
group_subject_sort(subj)
|
|
i = 0
|
|
unless @groups[subj]["messageinfo"] != nil && @groups[subj]["messageinfo"][0][:messid]
|
|
p "ieks komt niet door lame check heen"
|
|
return false
|
|
end
|
|
# p "komt wel door lame check heen"
|
|
while @gotten.has_key?(@groups[subj]["messageinfo"][0][:messid]) == false
|
|
puts "getting article: #{subj}" if Debuglevel > 0
|
|
puts "full subject: #{@groups[subj]['messageinfo'][0][:subject]}" if Debuglevel > 0
|
|
puts "message id: #{@groups[subj]['messageinfo'][i][:messid]}" if Debuglevel > 1
|
|
puts "id: #{@groups[subj]['messageinfo'][i][:id]}" if Debuglevel > 1
|
|
puts "from: #{@groups[subj]['messageinfo'][i][:from]}" if Debuglevel > 1
|
|
puts "server: #{@groups[subj]['messageinfo'][0][:server]}" if Debuglevel > 0
|
|
resp = false
|
|
while resp == false
|
|
resp, id, messid, list = get_body(@groups[subj]["messageinfo"][i][:server], @groups[subj]["messageinfo"][i][:messid])
|
|
if resp == false
|
|
puts "mess-id i: #{@groups[subj]['messageinfo'][i][:messid]}"
|
|
# XXX dit moet netter kunnen
|
|
puts "mess-id i+1: #{@groups[subj]['messageinfo'][i+1][:messid]}" if @groups[subj]["messageinfo"][i+1] != nil
|
|
if (i+1 < @groups[subj]["messageinfo"].length) and
|
|
(@groups[subj]["messageinfo"][i][:messid] == @groups[subj]["messageinfo"][i+1][:messid])
|
|
puts "Trying next server..."
|
|
i += 1
|
|
else
|
|
raise TempError, "Message-id not on another server"
|
|
end
|
|
end
|
|
end
|
|
@gotten[@groups[subj]["messageinfo"][i][:messid]] = true
|
|
end
|
|
return list
|
|
end
|
|
|
|
def get_group_body_rest(subj, file=nil)
|
|
#p "get_group_body_rest"
|
|
result = []
|
|
(1...@groups[subj]["messageinfo"].length).each{|i|
|
|
unless @gotten.has_key?(@groups[subj]["messageinfo"][i][:messid])
|
|
puts "getting article: #{i}" if Debuglevel > 1
|
|
puts "getting article: #{subj}" if Debuglevel > 1
|
|
puts "full subject: #{@groups[subj]['messageinfo'][i][:subject]}" if Debuglevel > 0
|
|
puts "message id: #{@groups[subj]['messageinfo'][i][:messid]}" if Debuglevel > 1
|
|
puts "id: #{@groups[subj]['messageinfo'][i][:id]}" if Debuglevel > 1
|
|
puts "from: #{@groups[subj]["messageinfo"][i][:from]}" if Debuglevel > 1
|
|
puts "server: #{@groups[subj]['messageinfo'][i][:server]}" if Debuglevel > 0
|
|
resp = false
|
|
while resp == false
|
|
resp, id, messid, list = get_body(@groups[subj]["messageinfo"][i][:server], @groups[subj]["messageinfo"][i][:messid])
|
|
if resp == false
|
|
puts "mess-id i: #{@groups[subj]["messageinfo"][i][:messid]}"
|
|
# print "mess-id i+1: #{@groups[subj]["messageinfo"][i+1][:messid]}\n"
|
|
# XXX dit moet netter kunnen
|
|
puts "mess-id i+1: #{@groups[subj]["messageinfo"][i+1][:messid]}" if @groups[subj]["messageinfo"][i+1] != nil
|
|
if (i+1 < @groups[subj]["messageinfo"].length) and
|
|
(@groups[subj]["messageinfo"][i][:messid] == @groups[subj]["messageinfo"][i+1][:messid])
|
|
puts "Trying next server..."
|
|
i += 1
|
|
else
|
|
raise TempError, "Message-id not on another server"
|
|
end
|
|
end
|
|
end
|
|
@gotten[ @groups[subj]["messageinfo"][i][:messid] ] = true
|
|
if file
|
|
list.collect{|line| file.print "#{line}\n"}
|
|
else
|
|
result.concat(list)
|
|
end
|
|
end
|
|
}
|
|
return result
|
|
end
|
|
|
|
def get_group_subjects
|
|
group_subjects unless @grouped
|
|
return @groups.keys
|
|
end
|
|
|
|
def get_group_poster(subj)
|
|
group_subject_sort(subj)
|
|
unless @groups[subj]["messageinfo"] != nil && @groups[subj]["messageinfo"][0][:from]
|
|
p "ieks komt niet door lame check heen"
|
|
return false
|
|
end
|
|
return @groups[subj]["messageinfo"][0][:from]
|
|
end
|
|
|
|
def get_group_date(subj)
|
|
group_subject_sort(subj)
|
|
unless @groups[subj]["messageinfo"] != nil && @groups[subj]["messageinfo"][0][:date]
|
|
p "ieks komt niet door lame check heen"
|
|
return false
|
|
end
|
|
return @groups[subj]["messageinfo"][0][:date]
|
|
end
|
|
|
|
def group_is_complete(subj)
|
|
group_subjects unless @grouped
|
|
#print "Subject: #{subj}\n"
|
|
messids = []
|
|
@groups[subj]["messageinfo"].each {|x|
|
|
messids.push(x[:messid])
|
|
}
|
|
#p "group complete?: #{messids}"
|
|
umessids = messids.uniq
|
|
if (umessids.length ) >= @groups[subj]["total"].to_i
|
|
return true
|
|
else
|
|
return false
|
|
end
|
|
end
|
|
|
|
def group_percentage_primary(subj)
|
|
group_subjects unless @grouped
|
|
groupsize = @groups[subj]["messageinfo"].length
|
|
primarycount = 0
|
|
@groups[subj]["messageinfo"].each {|x|
|
|
if x[:server] == @serverlist[0]
|
|
primarycount += 1
|
|
end
|
|
}
|
|
percentage = ((100.0/groupsize)*primarycount).to_i
|
|
return percentage
|
|
end
|
|
|
|
def group_percentage_fallback(subj)
|
|
group_subjects unless @grouped
|
|
groupsize = @groups[subj]["messageinfo"].length
|
|
fallbackcount = 0
|
|
if @serverlist[-1] == @serverlist[0]
|
|
return 0
|
|
end
|
|
onmain = {}
|
|
@groups[subj]["messageinfo"].each {|x|
|
|
if x[:server] != @serverlist[-1] && onmain[x[:subject]].nil?
|
|
onmain[x[:subject]] = 1
|
|
end
|
|
}
|
|
@groups[subj]["messageinfo"].each {|x|
|
|
if x[:server] == @serverlist[-1] && onmain[x[:subject]].nil?
|
|
fallbackcount += 1
|
|
end
|
|
}
|
|
percentage = ((100.0/groupsize)*fallbackcount).to_i
|
|
return percentage
|
|
end
|
|
|
|
def group_is_singlepart(subj)
|
|
@groups[subj]["total"].to_i == 1
|
|
end
|
|
|
|
def group_is_multipart(subj)
|
|
@groups[subj]["total"].to_i > 1
|
|
end
|
|
|
|
def group_subjects
|
|
@groups = {}
|
|
(0...@messageinfo.length).each{|i|
|
|
puts "group subjects: #{i} #{@messageinfo[i][:subject]}" if Debuglevel > 3
|
|
# misschien is het wel belangrijk dat er voorkeuren in deze
|
|
# match zitten... geen idee
|
|
if @messageinfo[i][:subject] =~ /(.*)\((\d+)\/(\d+)\)(.*)/ || @messageinfo[i][:subject] =~ /(.*)\[(\d+)\/(\d+)\](.*)/
|
|
#if @messageinfo[i][:subject] =~ /(.*)[\(\[](\d+)\/(\d+)[\)\]](.*)/
|
|
j = "#{$1}#{$4} (#{$3})"
|
|
number = $2
|
|
total = $3
|
|
else
|
|
j = @messageinfo[i][:subject]
|
|
number = 1
|
|
total = 1
|
|
end
|
|
if @groups.has_key?(j) and number.to_i != 0
|
|
@groups[j]["messageinfo"].push(@messageinfo[i])
|
|
elsif number.to_i != 0
|
|
@groups[j] = {}
|
|
@groups[j]["total"] = total
|
|
@groups[j]["messageinfo"] = [ (@messageinfo[i]) ]
|
|
end
|
|
}
|
|
@grouped = true
|
|
end
|
|
|
|
def set_skip_ids(server, ids)
|
|
set = Set::IntSpan.new(ids)
|
|
set.finite? or return false
|
|
min = set.min
|
|
min != nil and min < 0 and return false
|
|
@connections[server]["skip_ids"] = set
|
|
return true
|
|
end
|
|
|
|
def group_update_newsrc(subject)
|
|
(0...@groups[subject]["messageinfo"].length).each{|i|
|
|
if @connections[@groups[subject]["messageinfo"][i][:server]]
|
|
@connections[@groups[subject]["messageinfo"][i][:server]]["newsrc"].mark(@group, @groups[subject]["messageinfo"][i][:id])
|
|
#p @group
|
|
#p @groups[subject]["messageinfo"][i][:id]
|
|
end
|
|
}
|
|
end
|
|
|
|
def save_newsrc()
|
|
@connections.keys.each{|server|
|
|
#@connections[server]["newsrc"].save
|
|
@connections[server]["newsrc"].save_group(@group)
|
|
}
|
|
end
|
|
|
|
def cache_add(cachedir, id, messid, date, from, subject, server)
|
|
if @cache_buf.has_key?(server)
|
|
@cache_buf[server].push("#{id}|#{messid}|#{date}|#{from}|#{subject}\n")
|
|
else
|
|
@cache_buf[server] = [ "#{id}|#{messid}|#{date}|#{from}|#{subject}\n" ]
|
|
end
|
|
if @cache_buf[server].length > 100
|
|
cache_save(cachedir, server)
|
|
end
|
|
end
|
|
|
|
def cache_check(cachedir)
|
|
if ! FileTest.exists?(cachedir)
|
|
puts "Cachedir '#{cachedir}' doesn't exists, performance will suffer"
|
|
end
|
|
end
|
|
|
|
def cache_read(cachedir)
|
|
# Profiler__::start_profile
|
|
puts "#{Time.now} Reading & scrubbing caches"
|
|
filename = "#{cachedir}/#{@group}.ripnewscache"
|
|
excludes = {}
|
|
@connections.keys.each{|server|
|
|
first = @connections[server]["first"]
|
|
last = @connections[server]["last"]
|
|
#cache_scrub(cachedir, server)
|
|
puts " #{Time.now} Reading cache for #{server}"
|
|
excludes[server] = {}
|
|
@connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true}
|
|
if FileTest.directory?(cachedir) and FileTest.file?( "#{filename}.#{server}" ) and FileTest.readable?( "#{filename}.#{server}" )
|
|
outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing"
|
|
cachefile = File.new( "#{filename}.#{server}" )
|
|
begin
|
|
while true do
|
|
# using each on a big cachefile leads to out of memory conditions
|
|
line = cachefile.readline
|
|
begin
|
|
line.encode!("US-ASCII")
|
|
rescue
|
|
# If it gets here, the original encoding is unknown
|
|
# gracefully give up and go to the next line
|
|
puts "String#encode couldn't handle: '#{line}'"
|
|
next
|
|
# so the following probably won't help
|
|
#line.gsub!(/\\/, "")
|
|
end
|
|
|
|
id_i, messid, date, from, subject = line.split('|', 5)
|
|
id_i = id_i.to_i
|
|
if first <= id_i and id_i <= last
|
|
if ! excludes[server].has_key?(id_i)
|
|
outfile.puts(line)
|
|
if preselect(subject)
|
|
add(id_i, messid, date, from, subject, server)
|
|
end
|
|
# XXX alle traagheid van de cache_read zit in deze regel:
|
|
@connections[server]["skip_ids"].insert!(id_i)
|
|
end
|
|
end
|
|
end
|
|
rescue EOFError
|
|
end
|
|
if ( FileUtils.move("#{filename}.#{server}.new", "#{filename}.#{server}") )
|
|
puts " #{Time.now} Cache scrubbed for #{server}"
|
|
else
|
|
puts "Couldn't scrub #{server} cache"
|
|
end
|
|
end
|
|
}
|
|
puts "#{Time.now} Caches read"
|
|
# Profiler__::stop_profile
|
|
# Profiler__::print_profile($stderr)
|
|
#memusage
|
|
end
|
|
|
|
def cache_save(cachedir, server)
|
|
#p "writing cache"
|
|
#p Time.now
|
|
filename = "#{cachedir}/#{@group}.ripnewscache"
|
|
if FileTest.directory?( cachedir )
|
|
file = File.new( "#{filename}.#{server}", "a+" ) or puts "couldn't open cachefile for writing"
|
|
# print "Updating cache...\n"
|
|
@cache_buf[server].sort!
|
|
@cache_buf[server].each{|line|
|
|
begin
|
|
file.puts line
|
|
rescue Encoding::UndefinedConversionError
|
|
next
|
|
end
|
|
}
|
|
file.close
|
|
@cache_buf[server] = []
|
|
# print "Cache updated for #{server}\n"
|
|
end
|
|
#p Time.now
|
|
end
|
|
|
|
def cache_scrub(cachedir, server)
|
|
# XXX this could and probably should be done in a separate thread...
|
|
# XXX but it'll work for now
|
|
# XXX also read articles aren't removed right now
|
|
# XXX this could be done, but I don't know if I want to pay the overhead
|
|
p "scrubbing cache"
|
|
p Time.now
|
|
filename = "#{cachedir}/#{@group}.ripnewscache"
|
|
if File.exists?("#{filename}.#{server}")
|
|
# regexp = Regexp.new('^(\d+)\|')
|
|
infile = File.new("#{filename}.#{server}") or puts "Couldn't open cachefile for reading"
|
|
outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing"
|
|
infile.each{ |line|
|
|
id, messid, date, subject = line.split("|", 3)
|
|
if id.to_i >= @connections[server]["first"] and
|
|
id.to_i <= @connections[server]["last"]
|
|
outfile.puts(line)
|
|
end
|
|
}
|
|
end
|
|
p Time.now
|
|
end
|
|
|
|
###############################################################
|
|
|
|
# a base64 decoder...
|
|
def decode64(str)
|
|
string = ''
|
|
str.split("\n").each{|line|
|
|
line.delete!('^A-Za-z0-9+') # remove non-base64 chars
|
|
line.tr!('A-Za-z0-9+', ' -_') # convert to uuencoded format
|
|
len = ["#{32 + line.length * 3 / 4}"].pack("c")
|
|
# compute length byte
|
|
string += "#{len}#{line}".unpack("u") # uudecode and concatenate
|
|
}
|
|
return string
|
|
end
|
|
|
|
###############################################################
|
|
|
|
def group_subject_sort(subj)
|
|
# XXX Waarom gebruik ik hier eigenlijk sort_arr ipv in place sorting?
|
|
#print "Sorting articles\n"
|
|
serverhash = {}
|
|
(0...@serverlist.length).each{|i|
|
|
serverhash[@serverlist[i]] = i
|
|
}
|
|
total = @groups[subj]["total"]
|
|
sort_arr = []
|
|
#p "pre sort length: #{@groups[subj]['messageinfo'].length}"
|
|
(0...@groups[subj]["messageinfo"].length).each{|i|
|
|
puts "subj sort #{@groups[subj]['messageinfo'][i][:subject]}" if Debuglevel > 2
|
|
puts "subj sort #{@groups[subj]['messageinfo'][i][:messid]}" if Debuglevel > 2
|
|
puts "subj sort #{@groups[subj]['messageinfo'][i][:id]}" if Debuglevel > 2
|
|
puts "subj sort #{@groups[subj]['messageinfo'][i][:server]}" if Debuglevel > 2
|
|
sort_arr.push(
|
|
@groups[subj]["messageinfo"][i].dup
|
|
) if serverhash[@groups[subj]["messageinfo"][i][:server]] != nil
|
|
}
|
|
|
|
#p "sort_arr length pre sort: #{sort_arr.length}"
|
|
if sort_arr.length != 0
|
|
sort_arr.sort!{|a,b|
|
|
r = ward_sort(a[:subject], b[:subject])
|
|
if serverhash[a[:server]] == nil or serverhash[b[:server]] == nil
|
|
puts "serverhash[a[:server]]: #{serverhash[a[:server]]}"
|
|
puts "serverhash[b[:server]]: #{serverhash[b[:server]]}"
|
|
puts "a[:server]: #{a[:server]}"
|
|
puts "b[:server]: #{a[:server]}"
|
|
puts "strange things going on here..."
|
|
end
|
|
if r == 0
|
|
r = serverhash[a[:server]] <=> serverhash[b[:server]]
|
|
end
|
|
r
|
|
}
|
|
end
|
|
|
|
@groups[subj].clear
|
|
@groups[subj]["total"] = total
|
|
#p "sort_arr length post sort: #{sort_arr.length}"
|
|
sort_arr.collect{|i|
|
|
if @groups[subj].has_key?("messageinfo")
|
|
@groups[subj]["messageinfo"].push(i)
|
|
else
|
|
@groups[subj]["messageinfo"] = [ i ]
|
|
end
|
|
puts "subject sort: #{i[:subject]}" if Debuglevel > 2
|
|
puts "server: #{i[:server]}" if Debuglevel > 2
|
|
}
|
|
#if ! @groups[subj]['messageinfo'].nil?
|
|
# p "post sort length: #{@groups[subj]['messageinfo'].length}"
|
|
#end
|
|
#print "Done sorting\n"
|
|
end
|
|
|
|
def ward_sort(a, b)
|
|
c = a.to_s.split(/([0-9]+)/)
|
|
d = b.to_s.split(/([0-9]+)/)
|
|
|
|
c.collect{|x|
|
|
y = d.shift
|
|
r = ((x.to_s =~ /^[0-9]+$/) && (y.to_s =~ /^[0-9]+$/)) ?
|
|
(x.to_i <=> y.to_i) :
|
|
(x.to_s <=> y.to_s)
|
|
if r != 0
|
|
return r
|
|
end
|
|
}
|
|
return -1 if (d != [])
|
|
return 0
|
|
end
|
|
|
|
def rechunk_runlist(runlist)
|
|
return nil if runlist == nil
|
|
chunksize = 500
|
|
blalist = runlist.split(',')
|
|
|
|
# hmmm, als het aantal articles wat tussen de komma's ligt < pak um beet 3
|
|
# dan is het volgens mij heel erg de moeite die 3 ook gewoon binnen te halen
|
|
# en minder network requests te doen...
|
|
# de manier om dat te doen is dan iets van die komma weghalen en
|
|
# een van de 2 getallen...
|
|
|
|
blalist.collect!{|x|
|
|
result = ""
|
|
if x =~ /(.*)-(.*)/
|
|
a = $1
|
|
while ($2.to_i - a.to_i) > chunksize
|
|
result << "#{a}-#{a.to_i+(chunksize-1)},"
|
|
a = a.to_i + chunksize
|
|
end
|
|
result << "#{a}-#{$2}"
|
|
else
|
|
x
|
|
end
|
|
}
|
|
blup = blalist.join(",")
|
|
return blup
|
|
end
|
|
|
|
def printerr(server)
|
|
puts "Caught #{$!.class} reading from server #{server} (#{caller[0]})"
|
|
puts "Error: #{$!}"
|
|
end
|
|
|
|
def disconnect
|
|
@connections.keys.each{|server|
|
|
begin
|
|
@connections[server]["nntp"].quit
|
|
rescue Errno::EPIPE, Errno::ECONNRESET, EOFError, IOError
|
|
end
|
|
}
|
|
end
|
|
|
|
def quit
|
|
# just testing if these should be reset...
|
|
@messageinfo = []
|
|
disconnect
|
|
end
|
|
|
|
private :ward_sort
|
|
|
|
end # class
|