use xover & fix stupid cache reading bug

This commit is contained in:
Ward Wouts 2005-02-06 13:42:03 +00:00
parent 7a4b33bbda
commit 9895c5765d

View file

@ -1,4 +1,4 @@
# $Dwarf: article.rb,v 1.106 2005/02/05 10:48:31 ward Exp $
# $Dwarf: article.rb,v 1.107 2005/02/05 12:35:50 ward Exp $
# $Source$
#
@ -126,7 +126,12 @@ def set_preselect_pattern(regexp)
end
def preselect(subject)
return ( subject =~ @preselectpattern )
if subject =~ @preselectpattern
return true
else
return false
end
# return ( subject =~ @preselectpattern )
end
def add(id, messid, subject, server)
@ -172,25 +177,23 @@ def get_articles(cachedir=false)
end
end
cache_read(cachedir)
# for server in @connections.keys
# print "############################################################\n"
# print "skip_ids #{server}: #{@connections[server]["skip_ids"].run_list}\n"
# end
# spul dat echt te oud is gaat nooit gevuld worden, dus doe ook geen poging het op te halen
# wil wel wat ophalen aangezien logging aantoont dat er wel oudere articles gedownload worden
for server in @connections.keys
articles = @connections[server]["last"] - @connections[server]["first"]
if articles > 10000
fillerend = (@connections[server]["last"] - (articles/10)).to_i
else
fillerend = @connections[server]["last"] - 1000
end
if fillerend > @connections[server]["skip_ids"].min
@connections[server]["skip_ids"] = @connections[server]["skip_ids"].union("#{@connections[server]["skip_ids"].min}-#{fillerend}")
# p "filling #{@connections[server]["skip_ids"].min}-#{fillerend}"
end
end
# for server in @connections.keys
# if @connections[server]["skip_ids"].max && @connections[server]["skip_ids"].max
# articles = @connections[server]["last"] - @connections[server]["first"]
# if articles > 10000
# fillerend = (@connections[server]["skip_ids"].max - (articles/10)).to_i
# else
# fillerend = @connections[server]["skip_ids"].max - 1000
# end
# if @connections[server]["skip_ids"].min && fillerend > @connections[server]["skip_ids"].min
# @connections[server]["skip_ids"] = @connections[server]["skip_ids"].union("#{@connections[server]["skip_ids"].min}-#{fillerend}")
# # p "filling #{@connections[server]["skip_ids"].min}-#{fillerend}"
# end
# end
# end
for server in @connections.keys
print " reading articles from server: #{server}\n"
@ -207,24 +210,32 @@ def get_articles(cachedir=false)
for i in rangelist.split(',')
print "i: #{i}\n" if Debuglevel > 2
begin
resp, subj_lines = get_xhdr(server, i, "subject")
resp, messid_lines = get_xhdr(server, i, "message-id")
resp, xover_lines = get_xover(server, i)
# resp, subj_lines = get_xhdr(server, i, "subject")
# resp, messid_lines = get_xhdr(server, i, "message-id")
rescue TempError
printerr(server)
next
end
art = {}
subj_lines.collect{|x|
xover_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["subject"] = x[1]
art[x[0]]["messid"] = x[4]
print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 2
print "art id: #{x[0]} messid: #{x[4]}\n" if Debuglevel > 2
}
messid_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["messid"] = x[1]
print "art id: #{x[0]} messid: #{x[1]}\n" if Debuglevel > 2
}
# subj_lines.collect{|x|
# art[x[0]] = {} unless art.has_key?(x[0])
# art[x[0]]["subject"] = x[1]
# print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 2
# }
# messid_lines.collect{|x|
# art[x[0]] = {} unless art.has_key?(x[0])
# art[x[0]]["messid"] = x[1]
# print "art id: #{x[0]} messid: #{x[1]}\n" if Debuglevel > 2
# }
for id in art.keys
if art[id].has_key?("subject") and art[id].has_key?("messid")
print "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["subject"]}\n" if Debuglevel > 2
@ -238,10 +249,8 @@ def get_articles(cachedir=false)
cache_add(cachedir, id, art[id]["messid"], art[id]["subject"], server)
end
end
headerlines += subj_lines.length
# p "subj_lines.length #{subj_lines.length}"
# p "headerlines #{headerlines}"
if headerlines >= 500 # hmmm, dit lijkt niet te werken...
headerlines += xover_lines.length
if headerlines >= 500
cache_save(cachedir, server)
headerlines = 0
end
@ -329,7 +338,56 @@ def get_xhdr(server, range, header)
rescue TimeoutError
print "Time out, reconnecting to server (get_xhdr)\n"
timedout += 1
raise PermError, "Too many timeouts! (get_xhrd)" if timedout > 1
raise PermError, "Too many timeouts! (get_xhdr)" if timedout > 1
reconnect(server)
get_group_info(server)
retry
end
end
def get_xover(server, range)
timedout = 0
resp = ""
lines = []
start, ed = range.split("-")
unless ed
ed = start
end
begin
timeout(180) do
begin
p Time.now if Debuglevel > 1
print "getting headers: #{range}\n" if Debuglevel > 1
resp, lines = @connections[server]["nntp"].xover(start, ed)
if resp.to_i == 500
print "xover not implemented\n"
print "Error: #{$!}\n"
end
unless resp.to_i >= 200 and resp.to_i < 300
print "got response #{resp} while reading group #{@group} from #{server}\n"
raise TempError
end
rescue Net::NNTPReplyError
printerr(server)
if ( $!.to_s =~ /^503|^400/ )
reconnect(server)
get_group_info(server)
retry
else
print "Won't handle this... yet :(\n"
end
rescue Errno::EPIPE, Errno::ECONNRESET, EOFError
printerr(server)
reconnect(server)
get_group_info(server)
retry
end
end
return resp, lines
rescue TimeoutError
print "Time out, reconnecting to server (get_xover)\n"
timedout += 1
raise PermError, "Too many timeouts! (get_xover)" if timedout > 1
reconnect(server)
get_group_info(server)
retry
@ -613,7 +671,7 @@ p Time.now
unless excludes.has_key?(server) and excludes[server].has_key?(id.to_i) or
id.to_i < @connections[server]["first"] or
id.to_i > @connections[server]["last"]
if preselect($3)
if preselect(subject)
add(id, messid, subject, server)
end
@connections[server]["skip_ids"].insert(id.to_i)