MAJOR memory usage fix & some speedups

This commit is contained in:
Ward Wouts 2005-02-05 08:26:44 +00:00
parent 882380beca
commit 249656452f

View file

@ -1,4 +1,4 @@
# $Dwarf: article.rb,v 1.103 2005/02/01 20:58:40 ward Exp $ # $Dwarf: article.rb,v 1.104 2005/02/01 22:09:22 ward Exp $
# $Source$ # $Source$
# #
@ -41,7 +41,7 @@ def initialize(nntpservers, groupname, newsrc="~/.newsrc")
@groups = {} @groups = {}
@gotten = {} @gotten = {}
@group = groupname @group = groupname
@preselectpatterns = [] @preselectpattern = Regexp.new('^')
@cache_buf = {} @cache_buf = {}
@serverlist = nntpservers.split('|') @serverlist = nntpservers.split('|')
@ -121,17 +121,12 @@ def memusage
end end
end end
def add_preselect_pattern(regexp) def set_preselect_pattern(regexp)
@preselectpatterns.push(Regexp.new(regexp)) @preselectpattern = Regexp.new(regexp)
end end
def preselect(subject) def preselect(subject)
@preselectpatterns.collect{|regexp| return ( subject =~ @preselectpattern )
if subject =~ regexp
return true
end
}
return false
end end
def add(id, messid, subject, server) def add(id, messid, subject, server)
@ -183,8 +178,18 @@ def get_articles(cachedir=false)
# end # end
for server in @connections.keys for server in @connections.keys
print " reading articles from server: #{server}\n" print " reading articles from server: #{server}\n"
range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}") # range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}")
rangelist = rechunk_runlist(range.diff(@connections[server]["skip_ids"]).run_list) # is dit wel handig? ik denk dat het eigenlijk beter is om alleen de articles op te halen
# die nieuwe zijn dan de laatste die je al hebt
# al de gaten krijg je toch niet gevuld en duren kei lang
# rangelist = rechunk_runlist(range.diff(@connections[server]["skip_ids"]).run_list)
# dat idee maar eens testen dan:
if @connections[server]["skip_ids"].max < @connections[server]["last"]
rangelist = Set::IntSpan.new("#{@connections[server]["skip_ids"].max}-#{@connections[server]["last"]}").run_list
else
rangelist = ""
end
print "rangelist: #{rangelist}\n" if Debuglevel > 2 print "rangelist: #{rangelist}\n" if Debuglevel > 2
print "rangelist: #{rangelist.class.to_s}\n" if Debuglevel > 2 print "rangelist: #{rangelist.class.to_s}\n" if Debuglevel > 2
print "rangelist elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel > 2 print "rangelist elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel > 2
@ -226,8 +231,9 @@ def get_articles(cachedir=false)
end end
end end
headerlines += subj_lines.length headerlines += subj_lines.length
#if headerlines >= 10000 # hmmm, dit lijkt niet te werken... # p "subj_lines.length #{subj_lines.length}"
if headerlines >= 1000 # hmmm, dit lijkt niet te werken... # p "headerlines #{headerlines}"
if headerlines >= 500 # hmmm, dit lijkt niet te werken...
cache_save(cachedir, server) cache_save(cachedir, server)
headerlines = 0 headerlines = 0
end end
@ -589,29 +595,22 @@ p "reading cache"
p Time.now p Time.now
filename = "#{cachedir}/#{@group}.ripnewscache" filename = "#{cachedir}/#{@group}.ripnewscache"
excludes = {} excludes = {}
# id | messageid | subject
lineregexp = Regexp.new('^(\d+)\|(.*?)\|(.*)$')
for server in @connections.keys for server in @connections.keys
cache_scrub(cachedir, server) cache_scrub(cachedir, server)
excludes[server] = {} excludes[server] = {}
@connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true} @connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true}
if FileTest.directory?( cachedir) and FileTest.file?( "#{filename}.#{server}" ) and FileTest.readable?( "#{filename}.#{server}" ) if FileTest.directory?( cachedir) and FileTest.file?( "#{filename}.#{server}" ) and FileTest.readable?( "#{filename}.#{server}" )
file = File.new( "#{filename}.#{server}" ) File.new( "#{filename}.#{server}" ).each{ |line|
lines = file.readlines id, messid, subject = line.split("|", 3)
lines.collect{|line| unless excludes.has_key?(server) and excludes[server].has_key?(id.to_i) or
if line =~ lineregexp id.to_i < @connections[server]["first"] or
unless excludes.has_key?(server) and excludes[server].has_key?($1.to_i) or id.to_i > @connections[server]["last"]
$1.to_i < @connections[server]["first"] or
$1.to_i > @connections[server]["last"]
if preselect($3) if preselect($3)
add($1, $2, $3, server) add(id, messid, subject, server)
end
@connections[server]["skip_ids"].insert($1.to_i)
end end
@connections[server]["skip_ids"].insert(id.to_i)
end end
} }
file.close
lines = []
end end
end end
p Time.now p Time.now