ripnews/trunk/ripnews/news/article.rb

1029 lines
28 KiB
Ruby

# $Dwarf: article.rb,v 1.114 2005/05/12 07:39:53 ward Exp $
# $Source$
#
# Copyright (c) 2002, 2003, 2004, 2005 Ward Wouts <ward@wouts.nl>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
require 'set/intspan'
require 'net/nntp'
require 'news/newsrc'
require 'tempfile'
require 'timeout'
#require 'yaml'
class ArticleError < RuntimeError; end
class TempError < ArticleError; end
class PermError < ArticleError; end
module Net
class KANNTP < Net::NNTP
def initialize(host, port=nil, user=nil, password=nil, readermode=nil)
@host = host
@semaphore = Mutex.new
@resettime = 60
@timecounter = @resettime
@thr = Thread.new{
Thread.pass
while true
# puts "timecounter #{@timecounter} #{@host}"
if @timecounter > 0
@timecounter -= 5
sleep 5
else
sendka
sleep 5
end
end
}
super
end
def putline(line)
# puts "timerreset #{@host}"
@timecounter = @resettime
super
end
def longcmd(line)
@semaphore.synchronize{
return super
}
end
def shortcmd(line)
@semaphore.synchronize{
return super
}
end
def setresettime(time)
@resettime = time
end
def sendka
# puts "SENDING KEEP ALIVE TO #{@host}"
res = shortcmd("DATE")
# puts res
end
def quit
@thr.exit
begin
super
rescue EOFError, Errno::EPIPE
end
end
private :sendka
end # class KANNTP
end # module Net
############################################################
class Article
Debuglevel = 1
Message = Struct.new(:messid, :id, :from, :server, :subject)
def initialize(nntpservers, groupname, newsrc="~/.newsrc")
@messageinfo = []
@grouped = false
@groups = {}
@gotten = {}
@group = groupname
@preselectpattern = Regexp.new('^')
@cache_buf = {}
@serverlist = []
@serverpasses = {}
tmplist = nntpservers.split('|')
tmplist.each{ |server|
if server.match(/(.*)@([^@]*)$/)
userpass = $1
server = $2
@serverlist.push(server)
@serverpasses[server] = {}
if userpass.match(/([^:]*):(.*)/)
@serverpasses[server]['user'] = $1
@serverpasses[server]['pass'] = $2
else
@serverpasses[server]['user'] = userpass
@serverpasses[server]['pass'] = nil
end
else
@serverlist.push(server)
@serverpasses[server] = {}
@serverpasses[server]['user'] = nil
@serverpasses[server]['pass'] = nil
end
}
#p @serverlist
#p @serverpasses
#@serverlist = nntpservers.split('|')
@connections = {}
@serverlist.collect{|server|
@connections[server] = {}
@cache_buf[server] = []
begin
# p server
# p Time.now
begin
timeout(60) do
#p "connecting"
@connections[server]["nntp"] = Net::KANNTP.new(server, 119, @serverpasses[server]['user'], @serverpasses[server]['pass'])
end
resp = @connections[server]["nntp"].mode_reader
#p resp
rescue TimeoutError, Errno::ECONNRESET
sleep 3
retry
end
# p Time.now
@connections[server]["skip_ids"] = Set::IntSpan.new()
@connections[server]["newsrc"] = News::Newsrc.new("#{newsrc}.#{server}")
set_skip_ids(server, @connections[server]["newsrc"].marked_articles(@group))
rescue SocketError, Errno::EINVAL, EOFError, Errno::ETIMEDOUT
puts "Connection to #{server} failed: #{$!}"
@connections[server]["nntp"].quit
del_server(server)
end
}
end
def reconnect(server)
retries = 0
begin
puts "Trying to kill old connection #{Time.now}"
timeout(10) do
@connections[server]["nntp"].quit
end
puts "Killed old connection #{Time.now}"
rescue TimeoutError
puts "Timeout killing old connection"
rescue Errno::EPIPE, Errno::ECONNRESET, EOFError, Errno::ETIMEDOUT
end
begin
puts "Trying to reconnect #{Time.now}"
sleep 3
#timeout(180) do
timeout(60) do
@connections[server]["nntp"] = Net::KANNTP.new(server, 119, @serverpasses[server]['user'], @serverpasses[server]['pass'])
end
resp = @connections[server]["nntp"].mode_reader
rescue SocketError, Errno::EINVAL, EOFError, Errno::ETIMEDOUT, TimeoutError, Errno::ECONNREFUSED
puts "Reconnect to #{server} failed: #{$!}"
if retries > 1
del_server(server)
raise PermError, "Couldn't connect to #{server}"
else
retries += 1
retry
end
end
puts "Succesfully reconnected to #{server}"
end
def memusage
puts "memprof:"
puts "global:"
# for i in global_variables
# print "#{i}\n"
# end
# print "local:\n"
# for i in local_variables
# print "#{i}\n"
# end
self.instance_variables.each{|i|
puts i
print "X: "
begin
puts self.instance_eval(i).size
rescue NoMethodError
end
}
end
def set_preselect_pattern(regexp)
@preselectpattern = Regexp.new(regexp)
end
def preselect(subject)
if subject =~ @preselectpattern
return true
else
return false
end
# return ( subject =~ @preselectpattern )
end
def add(id, messid, from, subject, server)
@messageinfo.push(Message.new(messid, id, from, server, subject))
@grouped = false
end
def del_server(server)
puts "Removing server #{server} from list"
@connections.delete(server)
@serverlist.delete(server)
end
def get_articles(cachedir=false)
if cachedir != false
cache_check(cachedir)
end
@connections.keys.each{|server|
begin
first, last = get_group_info(server)
rescue PermError
puts "Error: #{$!}"
del_server(server)
next
end
if first.to_i <= last.to_i
# available articles on server
@connections[server]["first"] = first ? first.to_i : 0
@connections[server]["last"] = last ? last.to_i : 0
if Debuglevel > 0
puts " Server: #{server} First: #{first} Last: #{last}"
end
# clean up old newsrc entries
if @connections[server]["first"] > 0
@connections[server]["newsrc"].unmark_range(@group, 0, (@connections[server]["first"] - 1).to_s)
@connections[server]["newsrc"].save_group(@group)
end
else
puts " First article has higher number than last article on server #{server}."
del_server(server)
end
}
cache_read(cachedir)
# spul dat echt te oud is gaat nooit gevuld worden, dus doe ook geen poging het op te halen
# wil wel wat ophalen aangezien logging aantoont dat er wel oudere articles gedownload worden
@connections.keys.each{|server|
if @connections[server]["skip_ids"].max && @connections[server]["skip_ids"].max < @connections[server]["last"]
articles = @connections[server]["last"] - @connections[server]["first"]
if articles > 10000
fillerend = (@connections[server]["skip_ids"].max - (articles/3)).to_i
else
fillerend = @connections[server]["skip_ids"].max - 2000
end
if @connections[server]["skip_ids"].min && fillerend > @connections[server]["skip_ids"].min
@connections[server]["skip_ids"] = @connections[server]["skip_ids"].union("#{@connections[server]["skip_ids"].min}-#{fillerend}")
# p "filling #{@connections[server]["skip_ids"].min}-#{fillerend}"
end
end
}
@connections.keys.each{|server|
puts " reading articles from server: #{server}"
range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}")
rangelist = rechunk_runlist(range.diff(@connections[server]["skip_ids"]).run_list)
puts "rangelist: #{rangelist}" if Debuglevel > 2
puts "rangelist: #{rangelist.class.to_s}" if Debuglevel > 2
puts "rangelist elements: #{range.diff(@connections[server]["skip_ids"]).elements}" if Debuglevel > 2
begin
unless rangelist == nil or rangelist =~ /^$/
rangelist.split(',').each{|i|
puts "i: #{i}" if Debuglevel > 2
begin
# resp, xover_lines = get_xover(server, i)
resp, subj_lines = get_xhdr(server, i, "subject")
resp, messid_lines = get_xhdr(server, i, "message-id")
resp, from_lines = get_xhdr(server, i, "from")
rescue TempError
printerr(server)
next
end
art = {}
# xover_lines.collect{|x|
# art[x[0]] = {} unless art.has_key?(x[0])
# art[x[0]]["subject"] = x[1]
# art[x[0]]["messid"] = x[4]
# print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 2
# print "art id: #{x[0]} messid: #{x[4]}\n" if Debuglevel > 2
# }
subj_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["subject"] = x[1]
puts "art id: #{x[0]} subj: #{x[1]}" if Debuglevel > 2
}
messid_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["messid"] = x[1]
puts "art id: #{x[0]} messid: #{x[1]}" if Debuglevel > 2
}
from_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["from"] = x[1]
puts "art id: #{x[0]} from: #{x[1]}" if Debuglevel > 2
}
art.keys.each{|id|
if art[id].has_key?("subject") and art[id].has_key?("messid") and art[id].has_key?("from")
puts "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["from"]}, #{art[id]["subject"]}" if Debuglevel > 2
# @newids[server][id.to_i] = true
# dit wellicht alleen doen indien preselector hem uitkiest
# en anders een leuk regeltje aan de cache toevoegen,
# maar niet in het geheugen houden
if preselect(art[id]["subject"])
add(id.to_i, art[id]["messid"], art[id]["from"], art[id]["subject"], server)
end
cache_add(cachedir, id, art[id]["messid"], art[id]["from"], art[id]["subject"], server)
end
}
}
end
rescue PermError
del_server(server)
next
end
cache_save(cachedir, server)
}
GC.start
end
def get_group_info(server)
timedout = 0
errs = 0
resp = ""
first = ""
last = ""
begin
timeout(30) do
begin
resp, count, first, last, name = @connections[server]["nntp"].group(@group)
rescue Net::NNTPReplyError
printerr(server)
if ( $!.to_s =~ /^503|^400/ )
reconnect(server)
retry
else
raise PermError, "#{$!}"
end
rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError, Errno::EINVAL
printerr(server)
raise PermError, "Too many errors! (get_group_info)" if errs > 3
reconnect(server)
retry
end
end
rescue TimeoutError
timedout += 1
raise PermError, "Too many timeouts! (get_group_info)" if timedout > 1
puts "Time out, reconnecting to server... (get_group_info)"
reconnect(server)
retry
end
return first, last
end
def get_xhdr(server, range, header)
timedout = 0
resp = ""
lines = []
begin
timeout(180) do
begin
p Time.now if Debuglevel > 1
puts "getting headers: #{header}, #{range}" if Debuglevel > 1
resp, lines = @connections[server]["nntp"].xhdr(header, range)
if resp.to_i == 500
puts "xhdr not implemented"
puts "Error: #{$!}"
end
unless resp.to_i >= 200 and resp.to_i < 300
puts "got response #{resp} while reading group #{@group} from #{server}"
raise TempError
end
rescue Net::NNTPReplyError
printerr(server)
if ( $!.to_s =~ /^503|^400/ )
reconnect(server)
get_group_info(server)
retry
else
puts "Won't handle this... yet :("
end
rescue Errno::EPIPE, Errno::ECONNRESET, EOFError
printerr(server)
reconnect(server)
get_group_info(server)
retry
end
end
return resp, lines
rescue TimeoutError
puts "Time out, reconnecting to server (get_xhdr)"
timedout += 1
raise PermError, "Too many timeouts! (get_xhdr)" if timedout > 1
reconnect(server)
get_group_info(server)
retry
end
end
def get_xover(server, range)
timedout = 0
resp = ""
lines = []
start, ed = range.split("-")
unless ed
ed = start
end
begin
timeout(180) do
begin
p Time.now if Debuglevel > 1
puts "getting headers: #{range}" if Debuglevel > 1
resp, lines = @connections[server]["nntp"].xover(start, ed)
if resp.to_i == 500
puts "xover not implemented"
puts "Error: #{$!}"
end
unless resp.to_i >= 200 and resp.to_i < 300
puts "got response #{resp} while reading group #{@group} from #{server}"
raise TempError
end
rescue Net::NNTPReplyError
printerr(server)
if ( $!.to_s =~ /^503|^400/ )
reconnect(server)
get_group_info(server)
retry
else
puts "Won't handle this... yet :("
end
rescue Errno::EPIPE, Errno::ECONNRESET, EOFError
printerr(server)
reconnect(server)
get_group_info(server)
retry
end
end
return resp, lines
rescue TimeoutError
puts "Time out, reconnecting to server (get_xover)"
timedout += 1
raise PermError, "Too many timeouts! (get_xover)" if timedout > 1
reconnect(server)
get_group_info(server)
retry
end
end
def get_groupname
return @group
end
def get_body(server, message)
#p "get_body"
timedout = 0
retries = 0
resp = ""
id = ""
messid = ""
list = []
begin
timeout(180) do
begin
list = []
resp, id, messid, list = @connections[server]["nntp"].body(message)
rescue Net::NNTPReplyError
a = ''
a += $!
printerr(server)
if retries == 0 && (a =~ /^503/ || a =~ /^400/)
reconnect(server)
get_group_info(server)
retries = 1
retry
end
return false
rescue EOFError, NameError
printerr(server)
return false
rescue Errno::EPIPE, Errno::ECONNRESET
printerr(server)
reconnect(server)
get_group_info(server)
retry
end
end
return resp, id, messid, list
rescue TimeoutError, Errno::ETIMEDOUT
puts "Time out, reconnecting to server (get_body)"
timedout += 1
raise PermError, "Too many timeouts! (get_body)" if timedout > 1
reconnect(server)
get_group_info(server)
retry
end
end
def get_group_body(subj)
#p "get_group_body"
result = []
group_subject_sort(subj)
# puts @groups[subj].to_yaml
return false if @groups[subj]["messageinfo"] == nil
(0...@groups[subj]["messageinfo"].length).each{|i|
unless @gotten.has_key?(@groups[subj]["messageinfo"][i][:messid])
puts "getting article: #{i}" if Debuglevel > 1
puts "getting article: #{subj}" if Debuglevel > 1
puts "full subject: #{@groups[subj]["messageinfo"][i][:subject]}" if Debuglevel > 0
puts "message id: #{@groups[subj]["messageinfo"][i][:messid]}" if Debuglevel > 1
puts "id: #{@groups[subj]["messageinfo"][i][:id]}" if Debuglevel > 1
puts "from: #{@groups[subj]["messageinfo"][i][:from]}" if Debuglevel > 1
puts "server: #{@groups[subj]["messageinfo"][i][:server]}" if Debuglevel > 0
resp = false
while resp == false
if @serverlist.include?(@groups[subj]["messageinfo"][i][:server])
resp, id, messid, list = get_body(@groups[subj]["messageinfo"][i][:server], @groups[subj]["messageinfo"][i][:messid])
else
resp = false
end
if resp == false
if Debuglevel > 1
puts "mess-id i: #{@groups[subj]["messageinfo"][i][:messid]}"
# XXX dit moet netter kunnen
puts "mess-id i+1: #{@groups[subj]["messageinfo"][i+1][:messid]}" if @groups[subj]["messageinfo"][i+1] != nil
end
if (i+1 < @groups[subj]["messageinfo"].length) and
(@groups[subj]["messageinfo"][i][:messid] == @groups[subj]["messageinfo"][i+1][:messid])
puts " Trying next server..."
i += 1
else
raise TempError, " Message-id not on another server"
end
end
end
@gotten[ @groups[subj]["messageinfo"][i][:messid] ] = true
result = list
end
}
return result
end
def get_group_body_first(subj)
#p "get_group_body_first"
group_subject_sort(subj)
i = 0
unless @groups[subj]["messageinfo"] != nil && @groups[subj]["messageinfo"][0][:messid]
p "ieks komt niet door lame check heen"
return false
end
p "komt wel door lame check heen"
while @gotten.has_key?(@groups[subj]["messageinfo"][0][:messid]) == false
puts "getting article: #{subj}" if Debuglevel > 0
puts "full subject: #{@groups[subj]['messageinfo'][0][:subject]}" if Debuglevel > 0
puts "message id: #{@groups[subj]['messageinfo'][i][:messid]}" if Debuglevel > 1
puts "id: #{@groups[subj]['messageinfo'][i][:id]}" if Debuglevel > 1
puts "from: #{@groups[subj]['messageinfo'][i][:from]}" if Debuglevel > 1
puts "server: #{@groups[subj]['messageinfo'][0][:server]}" if Debuglevel > 0
resp = false
while resp == false
resp, id, messid, list = get_body(@groups[subj]["messageinfo"][i][:server], @groups[subj]["messageinfo"][i][:messid])
if resp == false
puts "mess-id i: #{@groups[subj]['messageinfo'][i][:messid]}"
# XXX dit moet netter kunnen
puts "mess-id i+1: #{@groups[subj]['messageinfo'][i+1][:messid]}" if @groups[subj]["messageinfo"][i+1] != nil
if (i+1 < @groups[subj]["messageinfo"].length) and
(@groups[subj]["messageinfo"][i][:messid] == @groups[subj]["messageinfo"][i+1][:messid])
puts "Trying next server..."
i += 1
else
raise TempError, "Message-id not on another server"
end
end
end
@gotten[@groups[subj]["messageinfo"][i][:messid]] = true
end
return list
end
def get_group_body_rest(subj, file=nil)
#p "get_group_body_rest"
result = []
(1...@groups[subj]["messageinfo"].length).each{|i|
unless @gotten.has_key?(@groups[subj]["messageinfo"][i][:messid])
puts "getting article: #{i}" if Debuglevel > 1
puts "getting article: #{subj}" if Debuglevel > 1
puts "full subject: #{@groups[subj]['messageinfo'][i][:subject]}" if Debuglevel > 0
puts "message id: #{@groups[subj]['messageinfo'][i][:messid]}" if Debuglevel > 1
puts "id: #{@groups[subj]['messageinfo'][i][:id]}" if Debuglevel > 1
puts "from: #{@groups[subj]["messageinfo"][i][:from]}" if Debuglevel > 1
puts "server: #{@groups[subj]['messageinfo'][i][:server]}" if Debuglevel > 0
resp = false
while resp == false
resp, id, messid, list = get_body(@groups[subj]["messageinfo"][i][:server], @groups[subj]["messageinfo"][i][:messid])
if resp == false
puts "mess-id i: #{@groups[subj]["messageinfo"][i][:messid]}"
# print "mess-id i+1: #{@groups[subj]["messageinfo"][i+1][:messid]}\n"
# XXX dit moet netter kunnen
puts "mess-id i+1: #{@groups[subj]["messageinfo"][i+1][:messid]}" if @groups[subj]["messageinfo"][i+1] != nil
if (i+1 < @groups[subj]["messageinfo"].length) and
(@groups[subj]["messageinfo"][i][:messid] == @groups[subj]["messageinfo"][i+1][:messid])
puts "Trying next server..."
i += 1
else
raise TempError, "Message-id not on another server"
end
end
end
@gotten[ @groups[subj]["messageinfo"][i][:messid] ] = true
if file
list.collect{|line| file.print "#{line}\n"}
else
result.concat(list)
end
end
}
return result
end
def get_group_subjects
group_subjects unless @grouped
return @groups.keys
end
def get_group_poster(subj)
group_subject_sort(subj)
unless @groups[subj]["messageinfo"] != nil && @groups[subj]["messageinfo"][0][:from]
p "ieks komt niet door lame check heen"
return false
end
return @groups[subj]["messageinfo"][0][:from]
end
def group_is_complete(subj)
group_subjects unless @grouped
#print "Subject: #{subj}\n"
messids = []
@groups[subj]["messageinfo"].each {|x|
messids.push(x[:messid])
}
#p "group complete?: #{messids}"
umessids = messids.uniq
if (umessids.length ) >= @groups[subj]["total"].to_i
return true
else
return false
end
end
def group_percentage_primary(subj)
group_subjects unless @grouped
groupsize = @groups[subj]["messageinfo"].length
primarycount = 0
@groups[subj]["messageinfo"].each {|x|
if x[:server] == @serverlist[0]
primarycount += 1
end
}
percentage = ((100.0/groupsize)*primarycount).to_i
return percentage
end
def group_percentage_fallback(subj)
group_subjects unless @grouped
groupsize = @groups[subj]["messageinfo"].length
fallbackcount = 0
if @serverlist[-1] == @serverlist[0]
return 0
end
onmain = {}
@groups[subj]["messageinfo"].each {|x|
if x[:server] != @serverlist[-1] && onmain[x[:subject]].nil?
onmain[x[:subject]] = 1
end
}
@groups[subj]["messageinfo"].each {|x|
if x[:server] == @serverlist[-1] && onmain[x[:subject]].nil?
fallbackcount += 1
end
}
percentage = ((100.0/groupsize)*fallbackcount).to_i
return percentage
end
def group_is_singlepart(subj)
@groups[subj]["total"].to_i == 1
end
def group_is_multipart(subj)
@groups[subj]["total"].to_i > 1
end
def group_subjects
@groups = {}
(0...@messageinfo.length).each{|i|
puts "group subjects: #{i} #{@messageinfo[i][:subject]}" if Debuglevel > 3
# misschien is het wel belangrijk dat er voorkeuren in deze
# match zitten... geen idee
if @messageinfo[i][:subject] =~ /(.*)\((\d+)\/(\d+)\)(.*)/ || @messageinfo[i][:subject] =~ /(.*)\[(\d+)\/(\d+)\](.*)/
#if @messageinfo[i][:subject] =~ /(.*)[\(\[](\d+)\/(\d+)[\)\]](.*)/
j = "#{$1}#{$4} (#{$3})"
number = $2
total = $3
else
j = @messageinfo[i][:subject]
number = 1
total = 1
end
if @groups.has_key?(j) and number.to_i != 0
@groups[j]["messageinfo"].push(@messageinfo[i])
elsif number.to_i != 0
@groups[j] = {}
@groups[j]["total"] = total
@groups[j]["messageinfo"] = [ (@messageinfo[i]) ]
end
}
@grouped = true
end
def set_skip_ids(server, ids)
set = Set::IntSpan.new(ids)
set.finite? or return false
min = set.min
min != nil and min < 0 and return false
@connections[server]["skip_ids"] = set
return true
end
def group_update_newsrc(subject)
(0...@groups[subject]["messageinfo"].length).each{|i|
if @connections[@groups[subject]["messageinfo"][i][:server]]
@connections[@groups[subject]["messageinfo"][i][:server]]["newsrc"].mark(@group, @groups[subject]["messageinfo"][i][:id])
#p @group
#p @groups[subject]["messageinfo"][i][:id]
end
}
end
def save_newsrc()
@connections.keys.each{|server|
#@connections[server]["newsrc"].save
@connections[server]["newsrc"].save_group(@group)
}
end
def cache_add(cachedir, id, messid, from, subject, server)
if @cache_buf.has_key?(server)
@cache_buf[server].push("#{id}|#{messid}|#{from}|#{subject}\n")
else
@cache_buf[server] = [ "#{id}|#{messid}|#{from}|#{subject}\n" ]
end
if @cache_buf[server].length > 100
cache_save(cachedir, server)
end
end
def cache_check(cachedir)
if ! FileTest.exists?(cachedir)
puts "Cachedir '#{cachedir}' doesn't exists, performance will suffer"
end
end
def cache_read(cachedir)
puts "#{Time.now} Reading & scrubbing caches"
filename = "#{cachedir}/#{@group}.ripnewscache"
excludes = {}
regexp = Regexp.new('^([^\|]*)\|([^\|]*)\|([^\|]*)\|(.*)')
@connections.keys.each{|server|
first = @connections[server]["first"]
last = @connections[server]["last"]
#cache_scrub(cachedir, server)
puts " #{Time.now} Reading cache for #{server}"
excludes[server] = {}
@connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true}
if FileTest.directory?( cachedir) and FileTest.file?( "#{filename}.#{server}" ) and FileTest.readable?( "#{filename}.#{server}" )
outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing"
File.new( "#{filename}.#{server}" ).each{ |line|
line =~ regexp
id_i = $1.to_i
messid = $2
from = $2
subject = $4
if first <= id_i and id_i <= last
unless excludes[server].has_key?(id_i)
outfile.puts(line)
if preselect(subject)
add(id_i, messid, from, subject, server)
end
# XXX alle traagheid van de cache_read zit in deze regel:
@connections[server]["skip_ids"].insert!(id_i)
end
end
}
if ( File.move("#{filename}.#{server}.new", "#{filename}.#{server}") )
puts " #{Time.now} Cache scrubbed for #{server}"
else
puts "Couldn't scrub #{server} cache"
end
end
}
puts "#{Time.now} Caches read"
#memusage
end
def cache_save(cachedir, server)
#p "writing cache"
#p Time.now
filename = "#{cachedir}/#{@group}.ripnewscache"
if FileTest.directory?( cachedir )
file = File.new( "#{filename}.#{server}", "a+" ) or puts "couldn't open cachefile for writing"
# print "Updating cache...\n"
@cache_buf[server].sort!
file.print @cache_buf[server]
file.close
@cache_buf[server] = []
# print "Cache updated for #{server}\n"
end
#p Time.now
end
def cache_scrub(cachedir, server)
# XXX this could and probably should be done in a separate thread...
# XXX but it'll work for now
# XXX also read articles aren't removed right now
# XXX this could be done, but I don't know if I want to pay the overhead
p "scrubbing cache"
p Time.now
filename = "#{cachedir}/#{@group}.ripnewscache"
if File.exists?("#{filename}.#{server}")
# regexp = Regexp.new('^(\d+)\|')
infile = File.new("#{filename}.#{server}") or puts "Couldn't open cachefile for reading"
outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing"
infile.each{ |line|
id, messid, subject = line.split("|", 3)
if id.to_i >= @connections[server]["first"] and
id.to_i <= @connections[server]["last"]
outfile.puts(line)
end
}
end
p Time.now
end
###############################################################
# a base64 decoder...
def decode64(str)
string = ''
str.split("\n").each{|line|
line.delete!('^A-Za-z0-9+') # remove non-base64 chars
line.tr!('A-Za-z0-9+', ' -_') # convert to uuencoded format
len = ["#{32 + line.length * 3 / 4}"].pack("c")
# compute length byte
string += "#{len}#{line}".unpack("u") # uudecode and concatenate
}
return string
end
###############################################################
def group_subject_sort(subj)
# XXX Waarom gebruik ik hier eigenlijk sort_arr ipv in place sorting?
#print "Sorting articles\n"
serverhash = {}
(0...@serverlist.length).each{|i|
serverhash[@serverlist[i]] = i
}
total = @groups[subj]["total"]
sort_arr = []
#p "pre sort length: #{@groups[subj]['messageinfo'].length}"
(0...@groups[subj]["messageinfo"].length).each{|i|
puts "subj sort #{@groups[subj]['messageinfo'][i][:subject]}" if Debuglevel > 2
puts "subj sort #{@groups[subj]['messageinfo'][i][:messid]}" if Debuglevel > 2
puts "subj sort #{@groups[subj]['messageinfo'][i][:id]}" if Debuglevel > 2
puts "subj sort #{@groups[subj]['messageinfo'][i][:server]}" if Debuglevel > 2
sort_arr.push(
@groups[subj]["messageinfo"][i].dup
) if serverhash[@groups[subj]["messageinfo"][i][:server]] != nil
}
#p "sort_arr length pre sort: #{sort_arr.length}"
if sort_arr.length != 0
sort_arr.sort!{|a,b|
r = ward_sort(a[:subject], b[:subject])
if serverhash[a[:server]] == nil or serverhash[b[:server]] == nil
puts "serverhash[a[:server]]: #{serverhash[a[:server]]}"
puts "serverhash[b[:server]]: #{serverhash[b[:server]]}"
puts "a[:server]: #{a[:server]}"
puts "b[:server]: #{a[:server]}"
puts "strange things going on here..."
end
if r == 0
r = serverhash[a[:server]] <=> serverhash[b[:server]]
end
r
}
end
@groups[subj].clear
@groups[subj]["total"] = total
#p "sort_arr length post sort: #{sort_arr.length}"
sort_arr.collect{|i|
if @groups[subj].has_key?("messageinfo")
@groups[subj]["messageinfo"].push(i)
else
@groups[subj]["messageinfo"] = [ i ]
end
puts "subject sort: #{i[:subject]}" if Debuglevel > 2
puts "server: #{i[:server]}" if Debuglevel > 2
}
#if ! @groups[subj]['messageinfo'].nil?
# p "post sort length: #{@groups[subj]['messageinfo'].length}"
#end
#print "Done sorting\n"
end
def ward_sort(a, b)
c = a.to_s.split(/([0-9]+)/)
d = b.to_s.split(/([0-9]+)/)
c.collect{|x|
y = d.shift
r = ((x.to_s =~ /^[0-9]+$/) && (y.to_s =~ /^[0-9]+$/)) ?
(x.to_i <=> y.to_i) :
(x.to_s <=> y.to_s)
if r != 0
return r
end
}
return -1 if (d != [])
return 0
end
def rechunk_runlist(runlist)
return nil if runlist == nil
chunksize = 500
blalist = runlist.split(',')
# hmmm, als het aantal articles wat tussen de komma's ligt < pak um beet 3
# dan is het volgens mij heel erg de moeite die 3 ook gewoon binnen te halen
# en minder network requests te doen...
# de manier om dat te doen is dan iets van die komma weghalen en
# een van de 2 getallen...
blalist.collect!{|x|
result = ""
if x =~ /(.*)-(.*)/
a = $1
while ($2.to_i - a.to_i) > chunksize
result << "#{a}-#{a.to_i+(chunksize-1)},"
a = a.to_i + chunksize
end
result << "#{a}-#{$2}"
else
x
end
}
blup = blalist.join(",")
return blup
end
def printerr(server)
puts "Caught #{$!.class} reading from server #{server} (#{caller[0]})"
puts "Error: #{$!}"
end
def disconnect
@connections.keys.each{|server|
begin
@connections[server]["nntp"].quit
rescue Errno::EPIPE, Errno::ECONNRESET, EOFError, IOError
end
}
end
def quit
# just testing if these should be reset...
@messageinfo = []
disconnect
end
private :ward_sort
end # class