MAJOR memory usage fix & some speedups
This commit is contained in:
parent
882380beca
commit
249656452f
1 changed files with 30 additions and 31 deletions
|
|
@ -1,4 +1,4 @@
|
||||||
# $Dwarf: article.rb,v 1.103 2005/02/01 20:58:40 ward Exp $
|
# $Dwarf: article.rb,v 1.104 2005/02/01 22:09:22 ward Exp $
|
||||||
# $Source$
|
# $Source$
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|
@ -41,7 +41,7 @@ def initialize(nntpservers, groupname, newsrc="~/.newsrc")
|
||||||
@groups = {}
|
@groups = {}
|
||||||
@gotten = {}
|
@gotten = {}
|
||||||
@group = groupname
|
@group = groupname
|
||||||
@preselectpatterns = []
|
@preselectpattern = Regexp.new('^')
|
||||||
@cache_buf = {}
|
@cache_buf = {}
|
||||||
|
|
||||||
@serverlist = nntpservers.split('|')
|
@serverlist = nntpservers.split('|')
|
||||||
|
|
@ -121,17 +121,12 @@ def memusage
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def add_preselect_pattern(regexp)
|
def set_preselect_pattern(regexp)
|
||||||
@preselectpatterns.push(Regexp.new(regexp))
|
@preselectpattern = Regexp.new(regexp)
|
||||||
end
|
end
|
||||||
|
|
||||||
def preselect(subject)
|
def preselect(subject)
|
||||||
@preselectpatterns.collect{|regexp|
|
return ( subject =~ @preselectpattern )
|
||||||
if subject =~ regexp
|
|
||||||
return true
|
|
||||||
end
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def add(id, messid, subject, server)
|
def add(id, messid, subject, server)
|
||||||
|
|
@ -183,8 +178,18 @@ def get_articles(cachedir=false)
|
||||||
# end
|
# end
|
||||||
for server in @connections.keys
|
for server in @connections.keys
|
||||||
print " reading articles from server: #{server}\n"
|
print " reading articles from server: #{server}\n"
|
||||||
range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}")
|
# range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}")
|
||||||
rangelist = rechunk_runlist(range.diff(@connections[server]["skip_ids"]).run_list)
|
# is dit wel handig? ik denk dat het eigenlijk beter is om alleen de articles op te halen
|
||||||
|
# die nieuwe zijn dan de laatste die je al hebt
|
||||||
|
# al de gaten krijg je toch niet gevuld en duren kei lang
|
||||||
|
# rangelist = rechunk_runlist(range.diff(@connections[server]["skip_ids"]).run_list)
|
||||||
|
|
||||||
|
# dat idee maar eens testen dan:
|
||||||
|
if @connections[server]["skip_ids"].max < @connections[server]["last"]
|
||||||
|
rangelist = Set::IntSpan.new("#{@connections[server]["skip_ids"].max}-#{@connections[server]["last"]}").run_list
|
||||||
|
else
|
||||||
|
rangelist = ""
|
||||||
|
end
|
||||||
print "rangelist: #{rangelist}\n" if Debuglevel > 2
|
print "rangelist: #{rangelist}\n" if Debuglevel > 2
|
||||||
print "rangelist: #{rangelist.class.to_s}\n" if Debuglevel > 2
|
print "rangelist: #{rangelist.class.to_s}\n" if Debuglevel > 2
|
||||||
print "rangelist elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel > 2
|
print "rangelist elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel > 2
|
||||||
|
|
@ -226,8 +231,9 @@ def get_articles(cachedir=false)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
headerlines += subj_lines.length
|
headerlines += subj_lines.length
|
||||||
#if headerlines >= 10000 # hmmm, dit lijkt niet te werken...
|
# p "subj_lines.length #{subj_lines.length}"
|
||||||
if headerlines >= 1000 # hmmm, dit lijkt niet te werken...
|
# p "headerlines #{headerlines}"
|
||||||
|
if headerlines >= 500 # hmmm, dit lijkt niet te werken...
|
||||||
cache_save(cachedir, server)
|
cache_save(cachedir, server)
|
||||||
headerlines = 0
|
headerlines = 0
|
||||||
end
|
end
|
||||||
|
|
@ -589,29 +595,22 @@ p "reading cache"
|
||||||
p Time.now
|
p Time.now
|
||||||
filename = "#{cachedir}/#{@group}.ripnewscache"
|
filename = "#{cachedir}/#{@group}.ripnewscache"
|
||||||
excludes = {}
|
excludes = {}
|
||||||
# id | messageid | subject
|
|
||||||
lineregexp = Regexp.new('^(\d+)\|(.*?)\|(.*)$')
|
|
||||||
for server in @connections.keys
|
for server in @connections.keys
|
||||||
cache_scrub(cachedir, server)
|
cache_scrub(cachedir, server)
|
||||||
excludes[server] = {}
|
excludes[server] = {}
|
||||||
@connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true}
|
@connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true}
|
||||||
if FileTest.directory?( cachedir) and FileTest.file?( "#{filename}.#{server}" ) and FileTest.readable?( "#{filename}.#{server}" )
|
if FileTest.directory?( cachedir) and FileTest.file?( "#{filename}.#{server}" ) and FileTest.readable?( "#{filename}.#{server}" )
|
||||||
file = File.new( "#{filename}.#{server}" )
|
File.new( "#{filename}.#{server}" ).each{ |line|
|
||||||
lines = file.readlines
|
id, messid, subject = line.split("|", 3)
|
||||||
lines.collect{|line|
|
unless excludes.has_key?(server) and excludes[server].has_key?(id.to_i) or
|
||||||
if line =~ lineregexp
|
id.to_i < @connections[server]["first"] or
|
||||||
unless excludes.has_key?(server) and excludes[server].has_key?($1.to_i) or
|
id.to_i > @connections[server]["last"]
|
||||||
$1.to_i < @connections[server]["first"] or
|
|
||||||
$1.to_i > @connections[server]["last"]
|
|
||||||
if preselect($3)
|
if preselect($3)
|
||||||
add($1, $2, $3, server)
|
add(id, messid, subject, server)
|
||||||
end
|
|
||||||
@connections[server]["skip_ids"].insert($1.to_i)
|
|
||||||
end
|
end
|
||||||
|
@connections[server]["skip_ids"].insert(id.to_i)
|
||||||
end
|
end
|
||||||
}
|
}
|
||||||
file.close
|
|
||||||
lines = []
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
p Time.now
|
p Time.now
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue