add date to caches
This commit is contained in:
parent
64d3e59cbb
commit
872158c686
1 changed files with 32 additions and 23 deletions
|
|
@ -23,6 +23,7 @@ require 'news/newsrc'
|
||||||
require 'tempfile'
|
require 'tempfile'
|
||||||
require 'timeout'
|
require 'timeout'
|
||||||
#require 'yaml'
|
#require 'yaml'
|
||||||
|
require 'profiler'
|
||||||
|
|
||||||
class ArticleError < RuntimeError; end
|
class ArticleError < RuntimeError; end
|
||||||
class TempError < ArticleError; end
|
class TempError < ArticleError; end
|
||||||
|
|
@ -99,7 +100,7 @@ class Article
|
||||||
|
|
||||||
Debuglevel = 1
|
Debuglevel = 1
|
||||||
|
|
||||||
Message = Struct.new(:messid, :id, :from, :server, :subject)
|
Message = Struct.new(:messid, :id, :date, :from, :server, :subject)
|
||||||
|
|
||||||
def initialize(nntpservers, groupname, newsrc="~/.newsrc")
|
def initialize(nntpservers, groupname, newsrc="~/.newsrc")
|
||||||
@messageinfo = []
|
@messageinfo = []
|
||||||
|
|
@ -234,8 +235,8 @@ def preselect(subject)
|
||||||
# return ( subject =~ @preselectpattern )
|
# return ( subject =~ @preselectpattern )
|
||||||
end
|
end
|
||||||
|
|
||||||
def add(id, messid, from, subject, server)
|
def add(id, messid, date, from, subject, server)
|
||||||
@messageinfo.push(Message.new(messid, id, from, server, subject))
|
@messageinfo.push(Message.new(messid, id, date, from, server, subject))
|
||||||
@grouped = false
|
@grouped = false
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -282,7 +283,7 @@ def get_articles(cachedir=false)
|
||||||
if @connections[server]["skip_ids"].max && @connections[server]["skip_ids"].max < @connections[server]["last"]
|
if @connections[server]["skip_ids"].max && @connections[server]["skip_ids"].max < @connections[server]["last"]
|
||||||
articles = @connections[server]["last"] - @connections[server]["first"]
|
articles = @connections[server]["last"] - @connections[server]["first"]
|
||||||
if articles > 10000
|
if articles > 10000
|
||||||
fillerend = (@connections[server]["skip_ids"].max - (articles/3)).to_i
|
fillerend = (@connections[server]["skip_ids"].max - (articles/5)).to_i
|
||||||
else
|
else
|
||||||
fillerend = @connections[server]["skip_ids"].max - 2000
|
fillerend = @connections[server]["skip_ids"].max - 2000
|
||||||
end
|
end
|
||||||
|
|
@ -308,6 +309,7 @@ def get_articles(cachedir=false)
|
||||||
puts "i: #{i}" if Debuglevel > 2
|
puts "i: #{i}" if Debuglevel > 2
|
||||||
begin
|
begin
|
||||||
# resp, xover_lines = get_xover(server, i)
|
# resp, xover_lines = get_xover(server, i)
|
||||||
|
resp, date_lines = get_xhdr(server, i, "date")
|
||||||
resp, subj_lines = get_xhdr(server, i, "subject")
|
resp, subj_lines = get_xhdr(server, i, "subject")
|
||||||
resp, messid_lines = get_xhdr(server, i, "message-id")
|
resp, messid_lines = get_xhdr(server, i, "message-id")
|
||||||
resp, from_lines = get_xhdr(server, i, "from")
|
resp, from_lines = get_xhdr(server, i, "from")
|
||||||
|
|
@ -317,13 +319,17 @@ def get_articles(cachedir=false)
|
||||||
end
|
end
|
||||||
|
|
||||||
art = {}
|
art = {}
|
||||||
# xover_lines.collect{|x|
|
date_lines.collect{|x|
|
||||||
# art[x[0]] = {} unless art.has_key?(x[0])
|
art[x[0]] = {} unless art.has_key?(x[0])
|
||||||
# art[x[0]]["subject"] = x[1]
|
begin
|
||||||
# art[x[0]]["messid"] = x[4]
|
art[x[0]]["date"] = DateTime.parse(x[1]).strftime('%Y%m%d')
|
||||||
# print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 2
|
rescue
|
||||||
# print "art id: #{x[0]} messid: #{x[4]}\n" if Debuglevel > 2
|
puts $!.message
|
||||||
# }
|
puts x[1]
|
||||||
|
art[x[0]]["date"] = Time.now.strftime('%Y%m%d')
|
||||||
|
end
|
||||||
|
puts "art id: #{x[0]} date: #{x[1]}" if Debuglevel > 2
|
||||||
|
}
|
||||||
subj_lines.collect{|x|
|
subj_lines.collect{|x|
|
||||||
art[x[0]] = {} unless art.has_key?(x[0])
|
art[x[0]] = {} unless art.has_key?(x[0])
|
||||||
art[x[0]]["subject"] = x[1]
|
art[x[0]]["subject"] = x[1]
|
||||||
|
|
@ -340,16 +346,15 @@ def get_articles(cachedir=false)
|
||||||
puts "art id: #{x[0]} from: #{x[1]}" if Debuglevel > 2
|
puts "art id: #{x[0]} from: #{x[1]}" if Debuglevel > 2
|
||||||
}
|
}
|
||||||
art.keys.each{|id|
|
art.keys.each{|id|
|
||||||
if art[id].has_key?("subject") and art[id].has_key?("messid") and art[id].has_key?("from")
|
if art[id].has_key?("date") and art[id].has_key?("subject") and art[id].has_key?("messid") and art[id].has_key?("from")
|
||||||
puts "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["from"]}, #{art[id]["subject"]}" if Debuglevel > 2
|
puts "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["from"]}, #{art[id]["subject"]}" if Debuglevel > 2
|
||||||
# @newids[server][id.to_i] = true
|
|
||||||
# dit wellicht alleen doen indien preselector hem uitkiest
|
# dit wellicht alleen doen indien preselector hem uitkiest
|
||||||
# en anders een leuk regeltje aan de cache toevoegen,
|
# en anders een leuk regeltje aan de cache toevoegen,
|
||||||
# maar niet in het geheugen houden
|
# maar niet in het geheugen houden
|
||||||
if preselect(art[id]["subject"])
|
if preselect(art[id]["subject"])
|
||||||
add(id.to_i, art[id]["messid"], art[id]["from"], art[id]["subject"], server)
|
add(id.to_i, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server)
|
||||||
end
|
end
|
||||||
cache_add(cachedir, id, art[id]["messid"], art[id]["from"], art[id]["subject"], server)
|
cache_add(cachedir, id, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server)
|
||||||
end
|
end
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -788,11 +793,11 @@ def save_newsrc()
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
def cache_add(cachedir, id, messid, from, subject, server)
|
def cache_add(cachedir, id, messid, date, from, subject, server)
|
||||||
if @cache_buf.has_key?(server)
|
if @cache_buf.has_key?(server)
|
||||||
@cache_buf[server].push("#{id}|#{messid}|#{from}|#{subject}\n")
|
@cache_buf[server].push("#{id}|#{messid}|#{date}|#{from}|#{subject}\n")
|
||||||
else
|
else
|
||||||
@cache_buf[server] = [ "#{id}|#{messid}|#{from}|#{subject}\n" ]
|
@cache_buf[server] = [ "#{id}|#{messid}|#{date}|#{from}|#{subject}\n" ]
|
||||||
end
|
end
|
||||||
if @cache_buf[server].length > 100
|
if @cache_buf[server].length > 100
|
||||||
cache_save(cachedir, server)
|
cache_save(cachedir, server)
|
||||||
|
|
@ -806,10 +811,11 @@ def cache_check(cachedir)
|
||||||
end
|
end
|
||||||
|
|
||||||
def cache_read(cachedir)
|
def cache_read(cachedir)
|
||||||
|
# Profiler__::start_profile
|
||||||
puts "#{Time.now} Reading & scrubbing caches"
|
puts "#{Time.now} Reading & scrubbing caches"
|
||||||
filename = "#{cachedir}/#{@group}.ripnewscache"
|
filename = "#{cachedir}/#{@group}.ripnewscache"
|
||||||
excludes = {}
|
excludes = {}
|
||||||
regexp = Regexp.new('^([^\|]*)\|([^\|]*)\|([^\|]*)\|(.*)')
|
regexp = Regexp.new('^([^\|]*)\|([^\|]*)\|([^\|]*)\|([^\|]*)\|(.*)')
|
||||||
@connections.keys.each{|server|
|
@connections.keys.each{|server|
|
||||||
first = @connections[server]["first"]
|
first = @connections[server]["first"]
|
||||||
last = @connections[server]["last"]
|
last = @connections[server]["last"]
|
||||||
|
|
@ -823,13 +829,14 @@ puts " #{Time.now} Reading cache for #{server}"
|
||||||
line =~ regexp
|
line =~ regexp
|
||||||
id_i = $1.to_i
|
id_i = $1.to_i
|
||||||
messid = $2
|
messid = $2
|
||||||
from = $2
|
date = $3
|
||||||
subject = $4
|
from = $4
|
||||||
|
subject = $5
|
||||||
if first <= id_i and id_i <= last
|
if first <= id_i and id_i <= last
|
||||||
unless excludes[server].has_key?(id_i)
|
unless excludes[server].has_key?(id_i)
|
||||||
outfile.puts(line)
|
outfile.puts(line)
|
||||||
if preselect(subject)
|
if preselect(subject)
|
||||||
add(id_i, messid, from, subject, server)
|
add(id_i, messid, date, from, subject, server)
|
||||||
end
|
end
|
||||||
# XXX alle traagheid van de cache_read zit in deze regel:
|
# XXX alle traagheid van de cache_read zit in deze regel:
|
||||||
@connections[server]["skip_ids"].insert!(id_i)
|
@connections[server]["skip_ids"].insert!(id_i)
|
||||||
|
|
@ -844,6 +851,8 @@ puts " #{Time.now} Reading cache for #{server}"
|
||||||
end
|
end
|
||||||
}
|
}
|
||||||
puts "#{Time.now} Caches read"
|
puts "#{Time.now} Caches read"
|
||||||
|
# Profiler__::stop_profile
|
||||||
|
# Profiler__::print_profile($stderr)
|
||||||
#memusage
|
#memusage
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -876,7 +885,7 @@ p Time.now
|
||||||
infile = File.new("#{filename}.#{server}") or puts "Couldn't open cachefile for reading"
|
infile = File.new("#{filename}.#{server}") or puts "Couldn't open cachefile for reading"
|
||||||
outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing"
|
outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing"
|
||||||
infile.each{ |line|
|
infile.each{ |line|
|
||||||
id, messid, subject = line.split("|", 3)
|
id, messid, date, subject = line.split("|", 3)
|
||||||
if id.to_i >= @connections[server]["first"] and
|
if id.to_i >= @connections[server]["first"] and
|
||||||
id.to_i <= @connections[server]["last"]
|
id.to_i <= @connections[server]["last"]
|
||||||
outfile.puts(line)
|
outfile.puts(line)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue