From 872158c6865c726130deb9bd41d573f6f30421c1 Mon Sep 17 00:00:00 2001 From: Ward Wouts Date: Sun, 24 Feb 2008 18:20:50 +0000 Subject: [PATCH] add date to caches --- trunk/ripnews/news/article.rb | 55 ++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/trunk/ripnews/news/article.rb b/trunk/ripnews/news/article.rb index 7d26432..d5cfc37 100644 --- a/trunk/ripnews/news/article.rb +++ b/trunk/ripnews/news/article.rb @@ -23,6 +23,7 @@ require 'news/newsrc' require 'tempfile' require 'timeout' #require 'yaml' +require 'profiler' class ArticleError < RuntimeError; end class TempError < ArticleError; end @@ -99,7 +100,7 @@ class Article Debuglevel = 1 -Message = Struct.new(:messid, :id, :from, :server, :subject) +Message = Struct.new(:messid, :id, :date, :from, :server, :subject) def initialize(nntpservers, groupname, newsrc="~/.newsrc") @messageinfo = [] @@ -234,8 +235,8 @@ def preselect(subject) # return ( subject =~ @preselectpattern ) end -def add(id, messid, from, subject, server) - @messageinfo.push(Message.new(messid, id, from, server, subject)) +def add(id, messid, date, from, subject, server) + @messageinfo.push(Message.new(messid, id, date, from, server, subject)) @grouped = false end @@ -282,7 +283,7 @@ def get_articles(cachedir=false) if @connections[server]["skip_ids"].max && @connections[server]["skip_ids"].max < @connections[server]["last"] articles = @connections[server]["last"] - @connections[server]["first"] if articles > 10000 - fillerend = (@connections[server]["skip_ids"].max - (articles/3)).to_i + fillerend = (@connections[server]["skip_ids"].max - (articles/5)).to_i else fillerend = @connections[server]["skip_ids"].max - 2000 end @@ -308,6 +309,7 @@ def get_articles(cachedir=false) puts "i: #{i}" if Debuglevel > 2 begin # resp, xover_lines = get_xover(server, i) + resp, date_lines = get_xhdr(server, i, "date") resp, subj_lines = get_xhdr(server, i, "subject") resp, messid_lines = get_xhdr(server, i, "message-id") resp, from_lines = get_xhdr(server, i, "from") @@ -317,13 +319,17 @@ def get_articles(cachedir=false) end art = {} -# xover_lines.collect{|x| -# art[x[0]] = {} unless art.has_key?(x[0]) -# art[x[0]]["subject"] = x[1] -# art[x[0]]["messid"] = x[4] -# print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 2 -# print "art id: #{x[0]} messid: #{x[4]}\n" if Debuglevel > 2 -# } + date_lines.collect{|x| + art[x[0]] = {} unless art.has_key?(x[0]) + begin + art[x[0]]["date"] = DateTime.parse(x[1]).strftime('%Y%m%d') + rescue + puts $!.message + puts x[1] + art[x[0]]["date"] = Time.now.strftime('%Y%m%d') + end + puts "art id: #{x[0]} date: #{x[1]}" if Debuglevel > 2 + } subj_lines.collect{|x| art[x[0]] = {} unless art.has_key?(x[0]) art[x[0]]["subject"] = x[1] @@ -340,16 +346,15 @@ def get_articles(cachedir=false) puts "art id: #{x[0]} from: #{x[1]}" if Debuglevel > 2 } art.keys.each{|id| - if art[id].has_key?("subject") and art[id].has_key?("messid") and art[id].has_key?("from") + if art[id].has_key?("date") and art[id].has_key?("subject") and art[id].has_key?("messid") and art[id].has_key?("from") puts "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["from"]}, #{art[id]["subject"]}" if Debuglevel > 2 -# @newids[server][id.to_i] = true # dit wellicht alleen doen indien preselector hem uitkiest # en anders een leuk regeltje aan de cache toevoegen, # maar niet in het geheugen houden if preselect(art[id]["subject"]) - add(id.to_i, art[id]["messid"], art[id]["from"], art[id]["subject"], server) + add(id.to_i, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server) end - cache_add(cachedir, id, art[id]["messid"], art[id]["from"], art[id]["subject"], server) + cache_add(cachedir, id, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server) end } } @@ -788,11 +793,11 @@ def save_newsrc() } end -def cache_add(cachedir, id, messid, from, subject, server) +def cache_add(cachedir, id, messid, date, from, subject, server) if @cache_buf.has_key?(server) - @cache_buf[server].push("#{id}|#{messid}|#{from}|#{subject}\n") + @cache_buf[server].push("#{id}|#{messid}|#{date}|#{from}|#{subject}\n") else - @cache_buf[server] = [ "#{id}|#{messid}|#{from}|#{subject}\n" ] + @cache_buf[server] = [ "#{id}|#{messid}|#{date}|#{from}|#{subject}\n" ] end if @cache_buf[server].length > 100 cache_save(cachedir, server) @@ -806,10 +811,11 @@ def cache_check(cachedir) end def cache_read(cachedir) +# Profiler__::start_profile puts "#{Time.now} Reading & scrubbing caches" filename = "#{cachedir}/#{@group}.ripnewscache" excludes = {} - regexp = Regexp.new('^([^\|]*)\|([^\|]*)\|([^\|]*)\|(.*)') + regexp = Regexp.new('^([^\|]*)\|([^\|]*)\|([^\|]*)\|([^\|]*)\|(.*)') @connections.keys.each{|server| first = @connections[server]["first"] last = @connections[server]["last"] @@ -823,13 +829,14 @@ puts " #{Time.now} Reading cache for #{server}" line =~ regexp id_i = $1.to_i messid = $2 - from = $2 - subject = $4 + date = $3 + from = $4 + subject = $5 if first <= id_i and id_i <= last unless excludes[server].has_key?(id_i) outfile.puts(line) if preselect(subject) - add(id_i, messid, from, subject, server) + add(id_i, messid, date, from, subject, server) end # XXX alle traagheid van de cache_read zit in deze regel: @connections[server]["skip_ids"].insert!(id_i) @@ -844,6 +851,8 @@ puts " #{Time.now} Reading cache for #{server}" end } puts "#{Time.now} Caches read" +# Profiler__::stop_profile +# Profiler__::print_profile($stderr) #memusage end @@ -876,7 +885,7 @@ p Time.now infile = File.new("#{filename}.#{server}") or puts "Couldn't open cachefile for reading" outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing" infile.each{ |line| - id, messid, subject = line.split("|", 3) + id, messid, date, subject = line.split("|", 3) if id.to_i >= @connections[server]["first"] and id.to_i <= @connections[server]["last"] outfile.puts(line)