diff --git a/trunk/ripnews/news/article.rb b/trunk/ripnews/news/article.rb index b8a0e49..9f3e583 100644 --- a/trunk/ripnews/news/article.rb +++ b/trunk/ripnews/news/article.rb @@ -1,4 +1,4 @@ -# $Dwarf: article.rb,v 1.103 2005/02/01 20:58:40 ward Exp $ +# $Dwarf: article.rb,v 1.104 2005/02/01 22:09:22 ward Exp $ # $Source$ # @@ -41,7 +41,7 @@ def initialize(nntpservers, groupname, newsrc="~/.newsrc") @groups = {} @gotten = {} @group = groupname - @preselectpatterns = [] + @preselectpattern = Regexp.new('^') @cache_buf = {} @serverlist = nntpservers.split('|') @@ -121,17 +121,12 @@ def memusage end end -def add_preselect_pattern(regexp) - @preselectpatterns.push(Regexp.new(regexp)) +def set_preselect_pattern(regexp) + @preselectpattern = Regexp.new(regexp) end def preselect(subject) - @preselectpatterns.collect{|regexp| - if subject =~ regexp - return true - end - } - return false + return ( subject =~ @preselectpattern ) end def add(id, messid, subject, server) @@ -183,8 +178,18 @@ def get_articles(cachedir=false) # end for server in @connections.keys print " reading articles from server: #{server}\n" - range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}") - rangelist = rechunk_runlist(range.diff(@connections[server]["skip_ids"]).run_list) +# range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}") + # is dit wel handig? ik denk dat het eigenlijk beter is om alleen de articles op te halen + # die nieuwe zijn dan de laatste die je al hebt + # al de gaten krijg je toch niet gevuld en duren kei lang +# rangelist = rechunk_runlist(range.diff(@connections[server]["skip_ids"]).run_list) + + # dat idee maar eens testen dan: + if @connections[server]["skip_ids"].max < @connections[server]["last"] + rangelist = Set::IntSpan.new("#{@connections[server]["skip_ids"].max}-#{@connections[server]["last"]}").run_list + else + rangelist = "" + end print "rangelist: #{rangelist}\n" if Debuglevel > 2 print "rangelist: #{rangelist.class.to_s}\n" if Debuglevel > 2 print "rangelist elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel > 2 @@ -194,8 +199,8 @@ def get_articles(cachedir=false) for i in rangelist.split(',') print "i: #{i}\n" if Debuglevel > 2 begin - resp, subj_lines = get_xhdr(server, i, "subject") - resp, messid_lines = get_xhdr(server, i, "message-id") + resp, subj_lines = get_xhdr(server, i, "subject") + resp, messid_lines = get_xhdr(server, i, "message-id") rescue TempError printerr(server) next @@ -226,8 +231,9 @@ def get_articles(cachedir=false) end end headerlines += subj_lines.length - #if headerlines >= 10000 # hmmm, dit lijkt niet te werken... - if headerlines >= 1000 # hmmm, dit lijkt niet te werken... +# p "subj_lines.length #{subj_lines.length}" +# p "headerlines #{headerlines}" + if headerlines >= 500 # hmmm, dit lijkt niet te werken... cache_save(cachedir, server) headerlines = 0 end @@ -589,29 +595,22 @@ p "reading cache" p Time.now filename = "#{cachedir}/#{@group}.ripnewscache" excludes = {} - # id | messageid | subject - lineregexp = Regexp.new('^(\d+)\|(.*?)\|(.*)$') for server in @connections.keys cache_scrub(cachedir, server) excludes[server] = {} @connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true} if FileTest.directory?( cachedir) and FileTest.file?( "#{filename}.#{server}" ) and FileTest.readable?( "#{filename}.#{server}" ) - file = File.new( "#{filename}.#{server}" ) - lines = file.readlines - lines.collect{|line| - if line =~ lineregexp - unless excludes.has_key?(server) and excludes[server].has_key?($1.to_i) or - $1.to_i < @connections[server]["first"] or - $1.to_i > @connections[server]["last"] - if preselect($3) - add($1, $2, $3, server) - end - @connections[server]["skip_ids"].insert($1.to_i) + File.new( "#{filename}.#{server}" ).each{ |line| + id, messid, subject = line.split("|", 3) + unless excludes.has_key?(server) and excludes[server].has_key?(id.to_i) or + id.to_i < @connections[server]["first"] or + id.to_i > @connections[server]["last"] + if preselect($3) + add(id, messid, subject, server) end + @connections[server]["skip_ids"].insert(id.to_i) end } - file.close - lines = [] end end p Time.now