a bit faster cache reading

This commit is contained in:
Ward Wouts 2005-03-06 11:14:28 +00:00
parent a145079d95
commit 760c879fc4

View file

@ -1,4 +1,4 @@
# $Dwarf: article.rb,v 1.110 2005/03/01 19:55:32 ward Exp $ # $Dwarf: article.rb,v 1.111 2005/03/06 07:01:08 ward Exp $
# $Source$ # $Source$
# #
@ -112,8 +112,8 @@ def initialize(nntpservers, groupname, newsrc="~/.newsrc")
@connections[server] = {} @connections[server] = {}
@cache_buf[server] = [] @cache_buf[server] = []
begin begin
p server # p server
p Time.now # p Time.now
begin begin
timeout(60) do timeout(60) do
@connections[server]["nntp"] = Net::KANNTP.new(server) @connections[server]["nntp"] = Net::KANNTP.new(server)
@ -123,7 +123,7 @@ def initialize(nntpservers, groupname, newsrc="~/.newsrc")
sleep 3 sleep 3
retry retry
end end
p Time.now # p Time.now
@connections[server]["skip_ids"] = Set::IntSpan.new() @connections[server]["skip_ids"] = Set::IntSpan.new()
@connections[server]["newsrc"] = News::Newsrc.new("#{newsrc}.#{server}") @connections[server]["newsrc"] = News::Newsrc.new("#{newsrc}.#{server}")
set_skip_ids(server, @connections[server]["newsrc"].marked_articles(@group)) set_skip_ids(server, @connections[server]["newsrc"].marked_articles(@group))
@ -268,7 +268,6 @@ def get_articles(cachedir=false)
print "rangelist elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel > 2 print "rangelist elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel > 2
begin begin
unless rangelist == nil or rangelist =~ /^$/ unless rangelist == nil or rangelist =~ /^$/
headerlines = 0
for i in rangelist.split(',') for i in rangelist.split(',')
print "i: #{i}\n" if Debuglevel > 2 print "i: #{i}\n" if Debuglevel > 2
begin begin
@ -311,12 +310,6 @@ def get_articles(cachedir=false)
cache_add(cachedir, id, art[id]["messid"], art[id]["subject"], server) cache_add(cachedir, id, art[id]["messid"], art[id]["subject"], server)
end end
end end
# headerlines += xover_lines.length
headerlines += subj_lines.length
if headerlines >= 500
cache_save(cachedir, server)
headerlines = 0
end
end end
end end
rescue PermError rescue PermError
@ -720,28 +713,40 @@ def cache_check(cachedir)
end end
def cache_read(cachedir) def cache_read(cachedir)
p "reading cache" p "reading & scrubbing cache"
p Time.now p Time.now
filename = "#{cachedir}/#{@group}.ripnewscache" filename = "#{cachedir}/#{@group}.ripnewscache"
excludes = {} excludes = {}
for server in @connections.keys for server in @connections.keys
cache_scrub(cachedir, server) #cache_scrub(cachedir, server)
puts " reading cache for #{server}"
puts " #{Time.now}"
excludes[server] = {} excludes[server] = {}
@connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true} @connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true}
if FileTest.directory?( cachedir) and FileTest.file?( "#{filename}.#{server}" ) and FileTest.readable?( "#{filename}.#{server}" ) if FileTest.directory?( cachedir) and FileTest.file?( "#{filename}.#{server}" ) and FileTest.readable?( "#{filename}.#{server}" )
outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing"
File.new( "#{filename}.#{server}" ).each{ |line| File.new( "#{filename}.#{server}" ).each{ |line|
id, messid, subject = line.split("|", 3) id, messid, subject = line.split("|", 3)
unless excludes.has_key?(server) and excludes[server].has_key?(id.to_i) or id_i = id.to_i
id.to_i < @connections[server]["first"] or if id_i >= @connections[server]["first"] and
id.to_i > @connections[server]["last"] id_i <= @connections[server]["last"]
outfile.puts(line)
unless excludes[server].has_key?(id_i)
if preselect(subject) if preselect(subject)
add(id, messid, subject, server) add(id_i, messid, subject, server)
end
@connections[server]["skip_ids"].insert(id_i)
end end
@connections[server]["skip_ids"].insert(id.to_i)
end end
} }
if ( File.move("#{filename}.#{server}.new", "#{filename}.#{server}") )
print "Cache scrubbed for #{server}\n"
else
print "Couldn't scrub #{server} cache\n"
end end
end end
end
p "caches read"
p Time.now p Time.now
#memusage #memusage
end end
@ -771,22 +776,16 @@ p "scrubbing cache"
p Time.now p Time.now
filename = "#{cachedir}/#{@group}.ripnewscache" filename = "#{cachedir}/#{@group}.ripnewscache"
if File.exists?("#{filename}.#{server}") if File.exists?("#{filename}.#{server}")
regexp = Regexp.new('^(\d+)\|') # regexp = Regexp.new('^(\d+)\|')
infile = File.new("#{filename}.#{server}") or puts "Couldn't open cachefile for reading" infile = File.new("#{filename}.#{server}") or puts "Couldn't open cachefile for reading"
outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing" outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing"
infile.each{ |line| infile.each{ |line|
if line =~ regexp id, messid, subject = line.split("|", 3)
if $1.to_i >= @connections[server]["first"] and if id.to_i >= @connections[server]["first"] and
$1.to_i <= @connections[server]["last"] id.to_i <= @connections[server]["last"]
outfile.puts(line) outfile.puts(line)
end end
end
} }
if ( File.move("#{filename}.#{server}.new", "#{filename}.#{server}") )
print "Cache scrubbed for #{server}\n"
else
print "Couldn't scrub #{server} cache\n"
end
end end
p Time.now p Time.now
end end