ripnews/trunk/ripnews/news/article.rb

778 lines
22 KiB
Ruby
Raw Normal View History

# $Dwarf: article.rb,v 1.90 2004/10/14 11:46:31 ward Exp $
2002-04-28 16:29:56 +00:00
# $Source$
2003-07-20 20:32:24 +00:00
#
2004-05-19 09:55:12 +00:00
# Copyright (c) 2002, 2003, 2004 Ward Wouts <ward@wouts.nl>
#
2003-07-20 20:32:24 +00:00
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
2003-07-20 20:32:24 +00:00
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
2002-04-28 16:29:56 +00:00
2002-04-28 22:06:03 +00:00
require 'set/intspan'
2002-04-28 16:29:56 +00:00
require 'net/nntp'
require 'news/newsrc'
require 'tempfile'
require 'timeout'
#require 'yaml'
2002-04-28 16:29:56 +00:00
2002-07-03 22:18:40 +00:00
class ArticleError < RuntimeError; end
2002-07-04 22:29:38 +00:00
class TempError < ArticleError; end
class PermError < ArticleError; end
2002-07-03 22:18:40 +00:00
2002-04-28 16:29:56 +00:00
class Article
Debuglevel = 1
Message = Struct.new(:messid, :id, :server, :subject)
2002-04-28 16:29:56 +00:00
def initialize(nntpservers, groupname, newsrc="~/.newsrc")
@messageinfo = []
2002-04-28 16:29:56 +00:00
@grouped = false
2002-04-28 22:06:03 +00:00
@groups = {}
@gotten = {}
@group = groupname
@preselectpatterns = []
@newids = {}
@serverlist = nntpservers.split('|')
@connections = {}
@serverlist.collect{|server|
@connections[server] = {}
@newids[server] = {}
begin
p server
p Time.now
begin
timeout(60) do
@connections[server]["nntp"] = Net::NNTP.new(server)
end
rescue TimeoutError
sleep 3
retry
end
p Time.now
@connections[server]["skip_ids"] = Set::IntSpan.new()
@connections[server]["newsrc"] = News::Newsrc.new("#{newsrc}.#{server}")
set_skip_ids(server, @connections[server]["newsrc"].marked_articles(@group))
2004-06-07 21:47:51 +00:00
rescue SocketError, Errno::EINVAL, EOFError, Errno::ETIMEDOUT
print "Connection to #{server} failed: #{$!}\n"
2002-07-03 22:18:40 +00:00
del_server(server)
end
}
2002-04-28 16:29:56 +00:00
end
2002-05-08 22:08:32 +00:00
def reconnect(server)
retries = 0
begin
@connections[server]["nntp"].quit
2004-09-01 11:25:46 +00:00
# helpt dit in geheugen gebruik? : Volgens mij niet
#@connections[server].delete("nntp")
#GC.start
rescue Errno::EPIPE, Errno::ECONNRESET, EOFError
end
begin
sleep 3
#timeout(180) do
timeout(60) do
2004-06-02 21:05:08 +00:00
@connections[server]["nntp"] = Net::NNTP.new(server)
end
rescue SocketError, Errno::EINVAL, EOFError, Errno::ETIMEDOUT, TimeoutError
print "Reconnect to #{server} failed: #{$!}\n"
if retries > 1
del_server(server)
raise PermError, "Couldn't connect to #{server}"
else
retries += 1
retry
end
end
print "Succesfully reconnected to #{server}\n"
2002-05-08 22:08:32 +00:00
end
def memusage
print "memprof:\n"
print "global:\n"
# for i in global_variables
# print "#{i}\n"
# end
# print "local:\n"
# for i in local_variables
# print "#{i}\n"
# end
for i in self.instance_variables
puts i
print "X: "
begin
puts self.instance_eval(i).size
rescue NoMethodError
end
end
end
def add_preselect_pattern(regexp)
@preselectpatterns.push(regexp)
end
def preselect(subject)
@preselectpatterns.collect{|regexp|
if regexp.match(subject) != nil
return true
end
}
return false
end
def add(messid, id, server, subject)
@messageinfo.push(Message.new(messid, id.to_i, server, subject))
2002-04-28 16:29:56 +00:00
@grouped = false
end
2002-07-03 22:18:40 +00:00
def del_server(server)
print "Removing server #{server} from list\n"
@connections.delete(server)
@serverlist.delete(server)
end
def get_articles(cachedir=false)
2003-07-03 14:03:11 +00:00
if cachedir != false
check_cache(cachedir)
end
for server in @connections.keys
2002-07-03 22:18:40 +00:00
begin
first, last = get_group_info(server)
2002-07-04 22:29:38 +00:00
rescue PermError
print "Error: #{$!}\n"
2002-07-03 22:18:40 +00:00
del_server(server)
next
end
if first.to_i <= last.to_i
# available articles on server
@connections[server]["first"] = first ? first.to_i : 0
@connections[server]["last"] = last ? last.to_i : 0
if Debuglevel > 0
print " Server: #{server}\n"
print " First: #{first}\n"
print " Last: #{last}\n"
end
# clean up old newsrc entries
if @connections[server]["first"] > 0
@connections[server]["newsrc"].unmark_range(@group, 0, (@connections[server]["first"] - 1).to_s)
@connections[server]["newsrc"].save
end
2002-05-25 13:41:27 +00:00
else
2002-07-03 22:18:40 +00:00
print " First article has higher number than last article on server #{server}.\n"
del_server(server)
2002-05-25 13:41:27 +00:00
end
end
read_cache(cachedir)
# for server in @connections.keys
# print "############################################################\n"
# print "skip_ids #{server}: #{@connections[server]["skip_ids"].run_list}\n"
# end
for server in @connections.keys
2002-05-25 13:41:27 +00:00
print " reading articles from server: #{server}\n"
2002-07-03 22:18:40 +00:00
range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}")
rangelist = rechunk_runlist(range.diff(@connections[server]["skip_ids"]).run_list)
print "rangelist: #{rangelist}\n" if Debuglevel > 2
2004-06-16 08:17:48 +00:00
print "rangelist: #{rangelist.class.to_s}\n" if Debuglevel > 2
print "rangelist elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel > 2
2002-07-04 22:29:38 +00:00
begin
unless rangelist == nil or rangelist =~ /^$/
headerlines = 0
for i in rangelist.split(',')
print "i: #{i}\n" if Debuglevel > 2
2002-07-03 22:18:40 +00:00
begin
2002-05-19 10:23:36 +00:00
resp, subj_lines = get_xhdr(server, i, "subject")
resp, messid_lines = get_xhdr(server, i, "message-id")
2002-07-04 22:29:38 +00:00
rescue TempError
printerr(server)
2002-07-03 22:18:40 +00:00
next
end
2002-05-19 10:23:36 +00:00
art = {}
subj_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["subject"] = x[1]
print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 2
2002-05-19 10:23:36 +00:00
}
messid_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["messid"] = x[1]
print "art id: #{x[0]} messid: #{x[1]}\n" if Debuglevel > 2
2002-05-19 10:23:36 +00:00
}
for id in art.keys
if art[id].has_key?("subject") and art[id].has_key?("messid")
print "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["subject"]}\n" if Debuglevel > 2
@newids[server][id.to_i] = true
2002-05-19 10:23:36 +00:00
add(art[id]["messid"], id, server, art[id]["subject"])
end
2002-04-28 16:29:56 +00:00
end
headerlines += subj_lines.length
if headerlines >= 10000 # hmmm, dit lijkt niet te werken...
save_cache(cachedir, server)
headerlines = 0
end
2002-04-28 16:29:56 +00:00
end
end
2002-07-04 22:29:38 +00:00
rescue PermError
del_server(server)
next
end
save_cache(cachedir, server)
2002-05-19 10:23:36 +00:00
end
2004-09-01 11:25:46 +00:00
GC.start
2002-05-19 10:23:36 +00:00
end
2002-05-19 15:06:53 +00:00
def get_group_info(server)
2002-05-19 10:23:36 +00:00
timedout = 0
2004-06-02 21:19:04 +00:00
errs = 0
2002-05-19 10:23:36 +00:00
resp = ""
first = ""
last = ""
begin
2002-05-19 15:06:53 +00:00
timeout(30) do
2002-05-19 10:23:36 +00:00
begin
2002-05-19 15:06:53 +00:00
resp, count, first, last, name = @connections[server]["nntp"].group(@group)
2004-09-01 11:25:46 +00:00
rescue Net::NNTPReplyError
printerr(server)
if ( $!.to_s =~ /^503|^400/ )
reconnect(server)
retry
else
raise PermError, "#{$!}"
end
2004-06-02 21:07:29 +00:00
rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError, Errno::EINVAL
printerr(server)
2004-06-02 21:19:04 +00:00
raise PermError, "Too many errors! (get_group_info)" if errs > 3
2002-07-03 22:18:40 +00:00
reconnect(server)
retry
2002-05-19 10:23:36 +00:00
end
end
rescue TimeoutError
timedout += 1
2002-07-07 19:12:24 +00:00
raise PermError, "Too many timeouts! (get_group_info)" if timedout > 1
print "Time out, reconnecting to server... (get_group_info)\n"
2002-07-03 22:18:40 +00:00
reconnect(server)
retry
2002-05-19 10:23:36 +00:00
end
2002-07-03 22:18:40 +00:00
return first, last
2002-05-19 10:23:36 +00:00
end
def get_xhdr(server, range, header)
timedout = 0
resp = ""
lines = []
begin
timeout(180) do
begin
2004-05-19 09:55:12 +00:00
p Time.now if Debuglevel > 1
print "getting headers: #{header}, #{range}\n" if Debuglevel > 1
2002-05-19 10:23:36 +00:00
resp, lines = @connections[server]["nntp"].xhdr(header, range)
if resp.to_i == 500
print "xhdr not implemented\n"
print "Error: #{$!}\n"
end
2002-05-19 10:23:36 +00:00
unless resp.to_i >= 200 and resp.to_i < 300
print "got response #{resp} while reading group #{@group} from #{server}\n"
2002-07-04 22:40:24 +00:00
raise TempError
2002-05-19 10:23:36 +00:00
end
2004-09-01 11:25:46 +00:00
rescue Net::NNTPReplyError
printerr(server)
2002-10-28 20:48:40 +00:00
if ( $!.to_s =~ /^503|^400/ )
reconnect(server)
get_group_info(server)
retry
2002-10-29 20:34:56 +00:00
else
print "Won't handle this... yet :(\n"
end
2002-10-29 21:00:37 +00:00
rescue Errno::EPIPE, Errno::ECONNRESET, EOFError
printerr(server)
2002-07-03 22:18:40 +00:00
reconnect(server)
get_group_info(server)
retry
2002-05-19 10:23:36 +00:00
end
end
return resp, lines
rescue TimeoutError
print "Time out, reconnecting to server (get_xhdr)\n"
2002-05-19 10:23:36 +00:00
timedout += 1
2002-07-07 19:12:24 +00:00
raise PermError, "Too many timeouts! (get_xhrd)" if timedout > 1
2002-07-03 22:18:40 +00:00
reconnect(server)
get_group_info(server)
retry
2002-05-19 10:23:36 +00:00
end
end
2002-05-06 11:46:56 +00:00
def get_groupname
return @group
end
def get_body(server, message)
timedout = 0
2004-09-01 11:25:46 +00:00
retries = 0
resp = ""
id = ""
messid = ""
list = []
begin
timeout(180) do
begin
2004-09-01 11:25:46 +00:00
list = []
resp, id, messid, list = @connections[server]["nntp"].body(message)
rescue Net::NNTPReplyError
a = ''
a += $!
printerr(server)
2002-08-07 20:51:44 +00:00
if retries == 0 && (a =~ /^503/ || a =~ /^400/)
reconnect(server)
get_group_info(server)
2002-08-06 23:06:59 +00:00
retries = 1
retry
end
return false
rescue EOFError, NameError
printerr(server)
return false
rescue Errno::EPIPE, Errno::ECONNRESET
printerr(server)
2002-07-03 22:18:40 +00:00
reconnect(server)
get_group_info(server)
retry
end
end
return resp, id, messid, list
rescue TimeoutError
print "Time out, reconnecting to server (get_body)\n"
timedout += 1
2002-07-07 19:12:24 +00:00
raise PermError, "Too many timeouts! (get_body)" if timedout > 1
2002-07-03 22:18:40 +00:00
reconnect(server)
get_group_info(server)
retry
end
end
2002-04-28 16:29:56 +00:00
def get_group_body(subj)
result = []
2002-05-06 11:46:56 +00:00
group_subject_sort(subj)
# puts @groups[subj].to_yaml
return false if @groups[subj]["messageinfo"] == nil
for i in (0...@groups[subj]["messageinfo"].length)
unless @gotten.has_key?(@groups[subj]["messageinfo"][i][:messid])
print "getting article: #{i}\n" if Debuglevel > 1
2002-05-08 13:11:38 +00:00
print "getting article: #{subj}\n" if Debuglevel > 1
print "full subject: #{@groups[subj]["messageinfo"][i][:subject]}\n" if Debuglevel > 0
print "message id: #{@groups[subj]["messageinfo"][i][:messid]}\n" if Debuglevel > 1
print "id: #{@groups[subj]["messageinfo"][i][:id]}\n" if Debuglevel > 1
print "server: #{@groups[subj]["messageinfo"][i][:server]}\n" if Debuglevel > 0
resp = false
while resp == false
if @serverlist.include?(@groups[subj]["messageinfo"][i][:server])
resp, id, messid, list = get_body(@groups[subj]["messageinfo"][i][:server], @groups[subj]["messageinfo"][i][:messid])
else
resp = false
end
if resp == false
if Debuglevel > 1
print "mess-id i: #{@groups[subj]["messages"][i]}\n"
# XXX dit moet netter kunnen
print "mess-id i+1: #{@groups[subj]["messageinfo"][i+1][:messid]}\n" if @groups[subj]["messageinfo"][i+1] != nil
end
if (i+1 < @groups[subj]["messageinfo"].length) and
(@groups[subj]["messageinfo"][i][:messid] == @groups[subj]["messageinfo"][i+1][:messid])
2003-04-28 20:50:43 +00:00
print " Trying next server...\n"
i += 1
else
raise TempError, " Message-id not on another server"
end
end
end
@gotten[ @groups[subj]["messageinfo"][i][:messid] ] = true
result = list
end
2002-04-28 16:29:56 +00:00
end
return result
end
def get_group_body_first(subj)
2002-05-06 11:46:56 +00:00
group_subject_sort(subj)
i = 0
unless @groups[subj]["messageinfo"] != nil && @groups[subj]["messageinfo"][0][:messid]
2003-11-12 14:01:43 +00:00
return false
end
while @gotten.has_key?(@groups[subj]["messageinfo"][0][:messid]) == false
2002-05-08 13:11:38 +00:00
print "getting article: #{subj}\n" if Debuglevel > 0
print "full subject: #{@groups[subj]["messageinfo"][0][:subject]}\n" if Debuglevel > 0
print "message id: #{@groups[subj]["messageinfo"][i][:messid]}\n" if Debuglevel > 1
print "id: #{@groups[subj]["messageinfo"][i][:id]}\n" if Debuglevel > 1
print "server: #{@groups[subj]["messageinfo"][0][:server]}\n" if Debuglevel > 0
resp = false
while resp == false
resp, id, messid, list = get_body(@groups[subj]["messageinfo"][i][:server], @groups[subj]["messageinfo"][i][:messid])
if resp == false
print "mess-id i: #{@groups[subj]["messageinfo"][i][:messid]}\n"
# XXX dit moet netter kunnen
print "mess-id i+1: #{@groups[subj]["messageinfo"][i+1][:messid]}\n" if @groups[subj]["messageinfo"][i+1] != nil
if (i+1 < @groups[subj]["messageinfo"].length) and
(@groups[subj]["messageinfo"][i][:messid] == @groups[subj]["messageinfo"][i+1][:messid])
print "Trying next server...\n"
i += 1
else
2002-07-07 19:12:24 +00:00
raise TempError, "Message-id not on another server"
end
end
end
@gotten[@groups[subj]["messageinfo"][i][:messid]] = true
2002-04-30 15:33:13 +00:00
end
2002-04-28 16:29:56 +00:00
return list
end
def get_group_body_rest(subj, file=nil)
result = []
for i in (1...@groups[subj]["messageinfo"].length)
unless @gotten.has_key?(@groups[subj]["messageinfo"][i][:messid])
print "getting article: #{i}\n" if Debuglevel > 1
2002-05-08 13:11:38 +00:00
print "getting article: #{subj}\n" if Debuglevel > 1
print "full subject: #{@groups[subj]["messageinfo"][i][:subject]}\n" if Debuglevel > 0
print "message id: #{@groups[subj]["messageinfo"][i][:messid]}\n" if Debuglevel > 1
print "id: #{@groups[subj]["messageinfo"][i][:id]}\n" if Debuglevel > 1
print "server: #{@groups[subj]["messageinfo"][i][:server]}\n" if Debuglevel > 0
resp = false
while resp == false
resp, id, messid, list = get_body(@groups[subj]["messageinfo"][i][:server], @groups[subj]["messages"][i])
if resp == false
print "mess-id i: #{@groups[subj]["messageinfo"][i][:messid]}\n"
# print "mess-id i+1: #{@groups[subj]["messageinfo"][i+1][:messid]}\n"
# XXX dit moet netter kunnen
print "mess-id i+1: #{@groups[subj]["messageinfo"][i+1][:messid]}\n" if @groups[subj]["messageinfo"][i+1] != nil
if (i+1 < @groups[subj]["messageinfo"].length) and
(@groups[subj]["messageinfo"][i][:messid] == @groups[subj]["messageinfo"][i+1][:messid])
print "Trying next server...\n"
i += 1
else
2002-07-07 19:12:24 +00:00
raise TempError, "Message-id not on another server"
end
end
end
@gotten[ @groups[subj]["messageinfo"][i][:messid] ] = true
2002-05-06 11:46:56 +00:00
if file
list.collect{|line| file.print "#{line}\n"}
else
result.concat(list)
2002-05-06 11:46:56 +00:00
end
2002-04-28 16:29:56 +00:00
end
end
return result
end
def get_group_subjects
group_subjects unless @grouped
2002-04-28 22:06:03 +00:00
return @groups.keys
end
def group_is_complete(subj)
2002-04-28 16:29:56 +00:00
group_subjects unless @grouped
#print "Subject: #{subj}\n"
print "length: #{@groups[subj]["messageinfo"].length} total: #{@groups[subj]["total"].to_i}\n" if Debuglevel > 1
messids = []
@groups[subj]["messageinfo"].each {|x|
messids.push(x[:messid])
}
umessids = messids.uniq
if (umessids.length ) >= @groups[subj]["total"].to_i
2002-04-28 16:29:56 +00:00
return true
else
return false
end
end
def group_is_singlepart(subj)
@groups[subj]["total"].to_i == 1
end
def group_is_multipart(subj)
@groups[subj]["total"].to_i > 1
end
2002-04-28 16:29:56 +00:00
def group_subjects
2002-04-28 22:06:03 +00:00
@groups = {}
for i in (0...@messageinfo.length)
print "group subjects: #{i} #{@messageinfo[i][:subject]}\n" if Debuglevel > 3
if @messageinfo[i][:subject] =~ /(.*)\((\d+)\/(\d+)\)(.*)/ || @messageinfo[i][:subject] =~ /(.*)\[(\d+)\/(\d+)\](.*)/
j = "#{$1}#{$4} (#{$3})"
2002-04-28 16:29:56 +00:00
number = $2
total = $3
else
j = @messageinfo[i][:subject]
2002-04-28 16:29:56 +00:00
number = 1
total = 1
end
if @groups.has_key?(j) and number.to_i != 0
@groups[j]["messageinfo"].push(@messageinfo[i])
elsif number.to_i != 0
2002-05-06 11:46:56 +00:00
@groups[j] = {}
@groups[j]["total"] = total
@groups[j]["messageinfo"] = [ (@messageinfo[i]) ]
2002-04-28 16:29:56 +00:00
end
end
@grouped = true
end
def set_skip_ids(server, ids)
2002-04-28 22:06:03 +00:00
set = Set::IntSpan.new(ids)
set.finite or return false
min = set.min
min != nil and min < 0 and return false
@connections[server]["skip_ids"] = set
2002-04-28 22:06:03 +00:00
return true
end
def group_update_newsrc(subject)
print "running group_update_newsrc\n";
for i in (0...@groups[subject]["messageinfo"].length)
if @connections[@groups[subject]["messageinfo"][i][:server]]
@connections[@groups[subject]["messageinfo"][i][:server]]["newsrc"].mark(@group, @groups[subject]["messageinfo"][i][:id])
end
end
end
def save_newsrc()
for server in @connections.keys
@connections[server]["newsrc"].save
end
end
2003-07-03 14:03:11 +00:00
def check_cache(cachedir)
if ! FileTest.exists?(cachedir)
print "Cachedir '#{cachedir}' doesn't exists, performance will suffer\n"
end
end
def read_cache(cachedir)
p "reading cache"
p Time.now
filename = "#{cachedir}/#{@group}.ripnewscache"
2002-04-30 14:09:06 +00:00
excludes = {}
# id | messageid | subject
lineregexp = Regexp.new('^(\d+)\|(.*?)\|(.*)$')
for server in @connections.keys
excludes[server] = {}
@connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true}
if FileTest.directory?( cachedir) and FileTest.file?( "#{filename}.#{server}" ) and FileTest.readable?( "#{filename}.#{server}" )
file = File.new( "#{filename}.#{server}" )
file.each{|line|
# id | messageid | subject
#if line =~ /^(\d+)\|(.*?)\|(.*)$/
if line =~ lineregexp
#if lineregexp.match(line) != nil
unless excludes.has_key?(server) and excludes[server].has_key?($1.to_i) or
$1.to_i < @connections[server]["first"] or
$1.to_i > @connections[server]["last"]
if preselect($3)
add($2, $1, server, $3)
end
@connections[server]["skip_ids"].insert($1.to_i)
end
2002-04-30 14:09:06 +00:00
end
}
file.close
end
end
p Time.now
memusage
end
def save_cache(cachedir, server)
p "writing cache"
p Time.now
filename = "#{cachedir}/#{@group}.ripnewscache"
2002-04-30 14:09:06 +00:00
if FileTest.directory?( cachedir )
if ! File.copy("#{filename}.#{server}","#{filename}.#{server}.new")
puts "Couldn't renew cache"
end
file = File.new( "#{filename}.#{server}.new", "a+" ) or print "couldn't open cachefile for writing\n"
print "Updating cache...\n"
cache = []
for i in (0...@messageinfo.length)
if @newids[server].has_key?(@messageinfo[i][:id])
cache.push("#{@messageinfo[i][:id]}|#{@messageinfo[i][:messid]}|#{@messageinfo[i][:subject]}\n") if @messageinfo[i][:server] == server
end
end
cache.sort!
file.print cache
file.close
if ( File.move("#{filename}.#{server}.new", "#{filename}.#{server}") )
print "Cache updated for #{server}\n"
else
print "Couldn't update #{server} cache\n"
end
end
p Time.now
scrub_cache(cachedir, server)
end
def scrub_cache(cachedir, server)
# XXX this could and probably should be done in a separate thread...
# XXX but it'll work for now
p "scrubbing cache"
p Time.now
filename = "#{cachedir}/#{@group}.ripnewscache"
regexp = Regexp.new('^(\d+)\|')
infile = File.new("#{filename}.#{server}") or puts "Couldn't open cachefile for reading"
outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing"
infile.each{ |line|
if line =~ regexp
if $1.to_i >= @connections[server]["first"] and
$1.to_i <= @connections[server]["last"]
outfile.puts(line)
end
end
}
#if ( File.move("#{filename}.#{server}.new", "#{filename}.#{server}") )
# print "Cache scrubbed for #{server}\n"
#else
# print "Couldn't scrub #{server} cache\n"
#end
p Time.now
end
2002-04-28 16:29:56 +00:00
###############################################################
# a base64 decoder...
def decode64(str)
string = ''
for line in str.split("\n")
line.delete!('^A-Za-z0-9+') # remove non-base64 chars
line.tr!('A-Za-z0-9+', ' -_') # convert to uuencoded format
len = ["#{32 + line.length * 3 / 4}"].pack("c")
# compute length byte
string += "#{len}#{line}".unpack("u") # uudecode and concatenate
end
return string
end
###############################################################
2002-05-06 11:46:56 +00:00
def group_subject_sort(subj)
# XXX Waarom gebruik ik hier eigenlijk sort_arr ipv in place sorting?
#print "Sorting articles\n"
serverhash = {}
for i in (0...@serverlist.length)
serverhash[@serverlist[i]] = i
end
2002-05-06 11:46:56 +00:00
sort_arr = []
for i in (0...@groups[subj]["messageinfo"].length)
print "subj sort #{@groups[subj]["messageinfo"][i][:subject]}\n" if Debuglevel > 2
print "subj sort #{@groups[subj]["messageinfo"][i][:messid]}\n" if Debuglevel > 2
print "subj sort #{@groups[subj]["messageinfo"][i][:id]}\n" if Debuglevel > 2
print "subj sort #{@groups[subj]["messageinfo"][i][:server]}\n" if Debuglevel > 2
sort_arr.push(
@groups[subj]["messageinfo"][i].dup
) if serverhash[@groups[subj]["messageinfo"][i][:server]] != nil
end
sort_arr.sort!{|a,b|
r = ward_sort(a[:subject], b[:subject])
if serverhash[a[:server]] == nil or serverhash[b[:server]] == nil
print "serverhash[a[:server]]: #{serverhash[a[:server]]}\n"
print "serverhash[b[:server]]: #{serverhash[b[:server]]}\n"
print "a[:server]: #{a[:server]}\n"
print "b[:server]: #{a[:server]}\n"
print "strange things going on here...\n"
end
if r == 0
r = serverhash[a[:server]] <=> serverhash[b[:server]]
end
r
}
# XXX hmmmm lijkt er op dat @groups[x]["total"] hier wel gesloopt wordt...
# XXX maakt blijkbaar niet uit, maar is niet netjes
2002-05-06 11:46:56 +00:00
@groups[subj].clear
sort_arr.collect{|i|
if @groups[subj].has_key?("messages")
@groups[subj]["messageinfo"].push(i)
2002-05-06 11:46:56 +00:00
else
@groups[subj]["messageinfo"] = [ i ]
2002-05-06 11:46:56 +00:00
end
print "subject sort: #{i[:subject]}\n" if Debuglevel > 2
print "server: #{i[:server]}\n" if Debuglevel > 2
2002-05-06 11:46:56 +00:00
}
#print "Done sorting\n"
2002-05-06 11:46:56 +00:00
end
2002-04-28 16:29:56 +00:00
def ward_sort(a, b)
c = a.to_s.split(/([0-9]+)/)
d = b.to_s.split(/([0-9]+)/)
2002-04-28 16:29:56 +00:00
c.collect{|x|
2002-04-28 16:29:56 +00:00
y = d.shift
r = ((x.to_s =~ /^[0-9]+$/) && (y.to_s =~ /^[0-9]+$/)) ?
(x.to_i <=> y.to_i) :
(x.to_s <=> y.to_s)
if r != 0
return r
end
}
return -1 if (d != [])
2002-04-28 16:29:56 +00:00
return 0
end
def rechunk_runlist(runlist)
2002-05-07 13:46:17 +00:00
return nil if runlist == nil
chunksize = 1000
blalist = runlist.split(',')
# hmmm, als het aantal articles wat tussen de komma's ligt < pak um beet 3
# dan is het volgens mij heel erg de moeite die 3 ook gewoon binnen te halen
# en minder network requests te doen...
# de manier om dat te doen is dan iets van die komma weghalen en
# een van de 2 getallen...
blalist.collect!{|x|
result = ""
if x =~ /(.*)-(.*)/
a = $1
while ($2.to_i - a.to_i) > chunksize
result << "#{a}-#{a.to_i+(chunksize-1)},"
a = a.to_i + chunksize
end
result << "#{a}-#{$2}"
else
x
end
}
blup = blalist.join(",")
return blup
end
def printerr(server)
2004-06-16 08:17:48 +00:00
print "Caught #{$!.class} reading from server #{server} (#{caller[0]})\n"
print "Error: #{$!}\n"
end
2002-04-28 16:29:56 +00:00
def quit
2003-11-12 14:01:43 +00:00
# just testing if these should be reset...
@messageinfo = []
2003-11-12 14:01:43 +00:00
for server in @connections.keys
begin
@connections[server]["nntp"].quit
2003-07-06 08:14:05 +00:00
rescue Errno::EPIPE, Errno::ECONNRESET, EOFError
end
end
2002-04-28 16:29:56 +00:00
end
private :ward_sort
end # class