add date to caches

This commit is contained in:
Ward Wouts 2008-02-24 18:20:50 +00:00
parent 64d3e59cbb
commit 872158c686

View file

@ -23,6 +23,7 @@ require 'news/newsrc'
require 'tempfile' require 'tempfile'
require 'timeout' require 'timeout'
#require 'yaml' #require 'yaml'
require 'profiler'
class ArticleError < RuntimeError; end class ArticleError < RuntimeError; end
class TempError < ArticleError; end class TempError < ArticleError; end
@ -99,7 +100,7 @@ class Article
Debuglevel = 1 Debuglevel = 1
Message = Struct.new(:messid, :id, :from, :server, :subject) Message = Struct.new(:messid, :id, :date, :from, :server, :subject)
def initialize(nntpservers, groupname, newsrc="~/.newsrc") def initialize(nntpservers, groupname, newsrc="~/.newsrc")
@messageinfo = [] @messageinfo = []
@ -234,8 +235,8 @@ def preselect(subject)
# return ( subject =~ @preselectpattern ) # return ( subject =~ @preselectpattern )
end end
def add(id, messid, from, subject, server) def add(id, messid, date, from, subject, server)
@messageinfo.push(Message.new(messid, id, from, server, subject)) @messageinfo.push(Message.new(messid, id, date, from, server, subject))
@grouped = false @grouped = false
end end
@ -282,7 +283,7 @@ def get_articles(cachedir=false)
if @connections[server]["skip_ids"].max && @connections[server]["skip_ids"].max < @connections[server]["last"] if @connections[server]["skip_ids"].max && @connections[server]["skip_ids"].max < @connections[server]["last"]
articles = @connections[server]["last"] - @connections[server]["first"] articles = @connections[server]["last"] - @connections[server]["first"]
if articles > 10000 if articles > 10000
fillerend = (@connections[server]["skip_ids"].max - (articles/3)).to_i fillerend = (@connections[server]["skip_ids"].max - (articles/5)).to_i
else else
fillerend = @connections[server]["skip_ids"].max - 2000 fillerend = @connections[server]["skip_ids"].max - 2000
end end
@ -308,6 +309,7 @@ def get_articles(cachedir=false)
puts "i: #{i}" if Debuglevel > 2 puts "i: #{i}" if Debuglevel > 2
begin begin
# resp, xover_lines = get_xover(server, i) # resp, xover_lines = get_xover(server, i)
resp, date_lines = get_xhdr(server, i, "date")
resp, subj_lines = get_xhdr(server, i, "subject") resp, subj_lines = get_xhdr(server, i, "subject")
resp, messid_lines = get_xhdr(server, i, "message-id") resp, messid_lines = get_xhdr(server, i, "message-id")
resp, from_lines = get_xhdr(server, i, "from") resp, from_lines = get_xhdr(server, i, "from")
@ -317,13 +319,17 @@ def get_articles(cachedir=false)
end end
art = {} art = {}
# xover_lines.collect{|x| date_lines.collect{|x|
# art[x[0]] = {} unless art.has_key?(x[0]) art[x[0]] = {} unless art.has_key?(x[0])
# art[x[0]]["subject"] = x[1] begin
# art[x[0]]["messid"] = x[4] art[x[0]]["date"] = DateTime.parse(x[1]).strftime('%Y%m%d')
# print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 2 rescue
# print "art id: #{x[0]} messid: #{x[4]}\n" if Debuglevel > 2 puts $!.message
# } puts x[1]
art[x[0]]["date"] = Time.now.strftime('%Y%m%d')
end
puts "art id: #{x[0]} date: #{x[1]}" if Debuglevel > 2
}
subj_lines.collect{|x| subj_lines.collect{|x|
art[x[0]] = {} unless art.has_key?(x[0]) art[x[0]] = {} unless art.has_key?(x[0])
art[x[0]]["subject"] = x[1] art[x[0]]["subject"] = x[1]
@ -340,16 +346,15 @@ def get_articles(cachedir=false)
puts "art id: #{x[0]} from: #{x[1]}" if Debuglevel > 2 puts "art id: #{x[0]} from: #{x[1]}" if Debuglevel > 2
} }
art.keys.each{|id| art.keys.each{|id|
if art[id].has_key?("subject") and art[id].has_key?("messid") and art[id].has_key?("from") if art[id].has_key?("date") and art[id].has_key?("subject") and art[id].has_key?("messid") and art[id].has_key?("from")
puts "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["from"]}, #{art[id]["subject"]}" if Debuglevel > 2 puts "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["from"]}, #{art[id]["subject"]}" if Debuglevel > 2
# @newids[server][id.to_i] = true
# dit wellicht alleen doen indien preselector hem uitkiest # dit wellicht alleen doen indien preselector hem uitkiest
# en anders een leuk regeltje aan de cache toevoegen, # en anders een leuk regeltje aan de cache toevoegen,
# maar niet in het geheugen houden # maar niet in het geheugen houden
if preselect(art[id]["subject"]) if preselect(art[id]["subject"])
add(id.to_i, art[id]["messid"], art[id]["from"], art[id]["subject"], server) add(id.to_i, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server)
end end
cache_add(cachedir, id, art[id]["messid"], art[id]["from"], art[id]["subject"], server) cache_add(cachedir, id, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server)
end end
} }
} }
@ -788,11 +793,11 @@ def save_newsrc()
} }
end end
def cache_add(cachedir, id, messid, from, subject, server) def cache_add(cachedir, id, messid, date, from, subject, server)
if @cache_buf.has_key?(server) if @cache_buf.has_key?(server)
@cache_buf[server].push("#{id}|#{messid}|#{from}|#{subject}\n") @cache_buf[server].push("#{id}|#{messid}|#{date}|#{from}|#{subject}\n")
else else
@cache_buf[server] = [ "#{id}|#{messid}|#{from}|#{subject}\n" ] @cache_buf[server] = [ "#{id}|#{messid}|#{date}|#{from}|#{subject}\n" ]
end end
if @cache_buf[server].length > 100 if @cache_buf[server].length > 100
cache_save(cachedir, server) cache_save(cachedir, server)
@ -806,10 +811,11 @@ def cache_check(cachedir)
end end
def cache_read(cachedir) def cache_read(cachedir)
# Profiler__::start_profile
puts "#{Time.now} Reading & scrubbing caches" puts "#{Time.now} Reading & scrubbing caches"
filename = "#{cachedir}/#{@group}.ripnewscache" filename = "#{cachedir}/#{@group}.ripnewscache"
excludes = {} excludes = {}
regexp = Regexp.new('^([^\|]*)\|([^\|]*)\|([^\|]*)\|(.*)') regexp = Regexp.new('^([^\|]*)\|([^\|]*)\|([^\|]*)\|([^\|]*)\|(.*)')
@connections.keys.each{|server| @connections.keys.each{|server|
first = @connections[server]["first"] first = @connections[server]["first"]
last = @connections[server]["last"] last = @connections[server]["last"]
@ -823,13 +829,14 @@ puts " #{Time.now} Reading cache for #{server}"
line =~ regexp line =~ regexp
id_i = $1.to_i id_i = $1.to_i
messid = $2 messid = $2
from = $2 date = $3
subject = $4 from = $4
subject = $5
if first <= id_i and id_i <= last if first <= id_i and id_i <= last
unless excludes[server].has_key?(id_i) unless excludes[server].has_key?(id_i)
outfile.puts(line) outfile.puts(line)
if preselect(subject) if preselect(subject)
add(id_i, messid, from, subject, server) add(id_i, messid, date, from, subject, server)
end end
# XXX alle traagheid van de cache_read zit in deze regel: # XXX alle traagheid van de cache_read zit in deze regel:
@connections[server]["skip_ids"].insert!(id_i) @connections[server]["skip_ids"].insert!(id_i)
@ -844,6 +851,8 @@ puts " #{Time.now} Reading cache for #{server}"
end end
} }
puts "#{Time.now} Caches read" puts "#{Time.now} Caches read"
# Profiler__::stop_profile
# Profiler__::print_profile($stderr)
#memusage #memusage
end end
@ -876,7 +885,7 @@ p Time.now
infile = File.new("#{filename}.#{server}") or puts "Couldn't open cachefile for reading" infile = File.new("#{filename}.#{server}") or puts "Couldn't open cachefile for reading"
outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing" outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing"
infile.each{ |line| infile.each{ |line|
id, messid, subject = line.split("|", 3) id, messid, date, subject = line.split("|", 3)
if id.to_i >= @connections[server]["first"] and if id.to_i >= @connections[server]["first"] and
id.to_i <= @connections[server]["last"] id.to_i <= @connections[server]["last"]
outfile.puts(line) outfile.puts(line)