Compare commits

...

8 commits

Author SHA1 Message Date
cb321d68ed some fixes 2025-01-09 07:45:12 +01:00
Ward Wouts
8c20f8435f be much more aggressive in scrubbing caches. 2021-04-01 14:35:36 +02:00
Ward Wouts
cde8e59f3e debugging crap 2021-03-12 13:06:07 +01:00
Ward Wouts
30e9890eda Merge branch 'master' of gogs.wizeazz.nl:ward/ripnews 2020-10-18 16:08:35 +02:00
Ward Wouts
666a480c3f be stricter on type 2020-10-18 16:08:17 +02:00
92d374d607 Update 'CHANGELOG.md' 2020-03-12 09:06:30 +00:00
Ward Wouts
908288f566 Merge branch 'master' of gogs.wizeazz.nl:ward/ripnews 2020-03-12 10:03:00 +01:00
Ward Wouts
35aa9e9e86 allow for different ordering of items in ybegin/part lines.
TODO generalise, this is ugly
2020-03-12 10:02:02 +01:00
5 changed files with 92 additions and 59 deletions

View file

@ -1,3 +1,6 @@
ripnews-20200312
- allow for different yenc line ordering
from 0.5.4 to ripnews-20200311
- add option to only rip one group
- add option to list configured groups

View file

@ -90,7 +90,8 @@ def _uudecode_file(file, outfile)
next if line =~ /[a-z]/
next if line == nil
next unless ((((line[0].ord - 32) & 077) + 2) / 3).to_i == (line.length/4).to_i
line.unpack("u").each{|x| outfile.print x}
#line.unpack("u").each{|x| outfile.print x}
line.unpack("u").each{|x| outfile.write x}
end
puts "No \"end\" found!!!"

View file

@ -97,12 +97,14 @@ def _ydecode_file(file, outfile)
if line.match(/^\=ybegin\s+(.*line\=.*)/)
m = $1
puts " #{Thread.current.inspect} ybegin match; rest: #{m}" if Debuglevel > 0
if m.match(/^\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(.*?\S))\s*$/)
part = $2.to_i
total = $4.to_i
linesize = $6.to_i
totalsize = $8.to_i
filename = $10
print "full line: #{line}" if Debuglevel > 0
if matchdata = m.match(/^\s*(part\=(?<part>\d+)\s+)?(total\=(?<total>\d+)\s+)?(line\=(?<linesize>\d+))(\s*size\=(?<totalsize>\d+))(\s*name=(?<filename>.*?\S))\s*$/) or
matchdata = m.match(/^\s*(part\=(?<part>\d+)\s+)?(total\=(?<total>\d+)\s+)?(size\=(?<totalsize>\d+))(\s*line\=(?<linesize>\d+))(\s*name=(?<filename>.*?\S))\s*$/)
part = matchdata[:part].to_i
total = matchdata[:total].to_i
linesize = matchdata[:linesize].to_i
totalsize = matchdata[:totalsize].to_i
filename = matchdata[:filename]
if Debuglevel > 0
print "found beginning"
if part != nil
@ -161,12 +163,13 @@ def _ydecode_file(file, outfile)
closure = false
m = $1
search_begin = false
if m.match(/^\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(.*?\S))\s*$/)
part = $2.to_i
total = $4.to_i
linesize = $6.to_i
totalsize = $8.to_i
filename = $10
if matchdata = m.match(/^\s*(part\=(?<part>\d+)\s+)?(total\=(?<total>\d+)\s+)?(line\=(?<linesize>\d+))(\s*size\=(?<totalsize>\d+))(\s*name=(?<filename>.*?\S))\s*$/) or
matchdata = m.match(/^\s*(part\=(?<part>\d+)\s+)?(total\=(?<total>\d+)\s+)?(size\=(?<totalsize>\d+))(\s*line\=(?<linesize>\d+))(\s*name=(?<filename>.*?\S))\s*$/)
part = matchdata[:part].to_i
total = matchdata[:total].to_i
linesize = matchdata[:linesize].to_i
totalsize = matchdata[:totalsize].to_i
filename = matchdata[:filename]
puts "found beginning of part #{part}, linesize = #{linesize}, size = #{totalsize}, filename = #{filename}" if Debuglevel > 0
end
next
@ -230,13 +233,14 @@ def _ydecode_array(data)
if data[i].match(/^\=ybegin\s+(.*line\=.*)/)
m = $1
puts " #{Thread.current.inspect} ybegin match; rest: #{m}" if Debuglevel > 0
if m.match(/^\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(.*?\S))\s*$/)
part = $2.to_i
total = $4.to_i
linesize = $6.to_i
size = $8.to_i
filename = $10
puts " #{Thread.current.inspect} found beginning, linesize = #{linesize}, size = #{size}, filename = #{filename}" if Debuglevel > 0
if matchdata = m.match(/^\s*(part\=(?<part>\d+)\s+)?(total\=(?<total>\d+)\s+)?(line\=(?<linesize>\d+))(\s*size\=(?<totalsize>\d+))(\s*name=(?<filename>.*?\S))\s*$/) or
matchdata = m.match(/^\s*(part\=(?<part>\d+)\s+)?(total\=(?<total>\d+)\s+)?(size\=(?<totalsize>\d+))(\s*line\=(?<linesize>\d+))(\s*name=(?<filename>.*?\S))\s*$/)
part = matchdata[:part].to_i
total = matchdata[:total].to_i
linesize = matchdata[:linesize].to_i
totalsize = matchdata[:totalsize].to_i
filename = matchdata[:filename]
puts " #{Thread.current.inspect} found beginning, linesize = #{linesize}, size = #{totalsize}, filename = #{filename}" if Debuglevel > 0
i += 1
break
else
@ -317,8 +321,9 @@ def get_filename(data)
i = 0
while i < data.length
line = data[i]
if line.match(/=ybegin\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(.*?\S))\s*$/m)
return $10
if matchdata = line.match(/=ybegin\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(?<filename>.*?\S))\s*$/m) or
matchdata = line.match(/=ybegin\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(size\=(\d+))(\s*line\=(\d+))(\s*name=(?<filename>.*?\S))\s*$/m)
return matchdata[:filename]
end
i += 1
end

View file

@ -1,6 +1,3 @@
# $Dwarf: article.rb,v 1.114 2005/05/12 07:39:53 ward Exp $
# $Source$
#
# Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Ward Wouts <ward@wouts.nl>
#
@ -23,7 +20,7 @@ require Pathname.new(__FILE__).dirname + '../news/newsrc'
require 'tempfile'
require 'timeout'
#require 'yaml'
require 'profiler'
#require 'profiler' # removed from ruby?
class ArticleError < RuntimeError; end
class TempError < ArticleError; end
@ -114,6 +111,10 @@ def initialize(nntpservers, groupname, newsrc="~/.newsrc", maxage=0)
@serverlist = []
@serverpasses = {}
@maxage = maxage.to_i
@oldestallowed = 0
if @maxage != 0
@oldestallowed = (DateTime.now - @maxage).strftime('%Y%m%d').to_i
end
tmplist = nntpservers.split('|')
tmplist.each{ |server|
@ -148,7 +149,7 @@ def initialize(nntpservers, groupname, newsrc="~/.newsrc", maxage=0)
# p server
# p Time.now
begin
timeout(60) do
Timeout.timeout(60) do
#p "connecting"
@connections[server]["nntp"] = Net::KANNTP.new(server, 119, @serverpasses[server]['user'], @serverpasses[server]['pass'])
end
@ -176,7 +177,7 @@ def reconnect(server)
retries = 0
begin
puts "Trying to kill old connection #{Time.now}"
timeout(10) do
Timeout.timeout(10) do
@connections[server]["nntp"].quit
end
puts "Killed old connection #{Time.now}"
@ -188,7 +189,7 @@ def reconnect(server)
puts "Trying to reconnect #{Time.now}"
sleep 3
#timeout(180) do
timeout(60) do
Timeout.timeout(60) do
@connections[server]["nntp"] = Net::KANNTP.new(server, 119, @serverpasses[server]['user'], @serverpasses[server]['pass'])
end
resp = @connections[server]["nntp"].mode_reader
@ -331,7 +332,8 @@ def get_articles(cachedir=false)
art[id] = {} unless art.has_key?(id)
begin
lastdate = art[id]["date"] = DateTime.parse(date).strftime('%Y%m%d')
lastdate = DateTime.parse(date)
art[id]["date"] = DateTime.parse(date).strftime('%Y%m%d')
rescue
puts $!.message
puts id
@ -354,14 +356,16 @@ def get_articles(cachedir=false)
# dit wellicht alleen doen indien preselector hem uitkiest
# en anders een leuk regeltje aan de cache toevoegen,
# maar niet in het geheugen houden
if preselect(art[id]["subject"])
if art[id]["date"].to_i >= @oldestallowed && preselect(art[id]["subject"])
add(id.to_i, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server)
end
if art[id]["date"].to_i >= @oldestallowed
cache_add(cachedir, id, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server)
end
end
}
if @maxage and @maxage > 0
if DateTime.parse(lastdate) < ( DateTime.now - @maxage )
if lastdate < ( DateTime.now - @maxage )
puts "Skipping articles older than #{DateTime.now - @maxage}"
break
end
@ -384,7 +388,7 @@ def get_group_info(server)
first = ""
last = ""
begin
timeout(30) do
Timeout.timeout(30) do
begin
resp, count, first, last, name = @connections[server]["nntp"].group(@group)
rescue Net::NNTPReplyError
@ -418,7 +422,7 @@ def get_xhdr(server, range, header)
resp = ""
lines = []
begin
timeout(180) do
Timeout.timeout(180) do
begin
p Time.now if Debuglevel > 1
puts "getting headers: #{header}, #{range}" if Debuglevel > 1
@ -474,7 +478,7 @@ def get_xover(server, range)
ed = start
end
begin
timeout(180) do
Timeout.timeout(180) do
begin
p Time.now if Debuglevel > 1
puts "getting headers: #{range}" if Debuglevel > 1
@ -532,7 +536,7 @@ def get_body(server, message)
messid = ""
list = []
begin
timeout(180) do
Timeout.timeout(180) do
begin
list = []
resp, id, messid, list = @connections[server]["nntp"].body(message)
@ -825,6 +829,7 @@ def save_newsrc()
end
def cache_add(cachedir, id, messid, date, from, subject, server)
# also add to skip stuff
if @cache_buf.has_key?(server)
@cache_buf[server].push("#{id}|#{messid}|#{date}|#{from}|#{subject}\n")
else
@ -849,7 +854,7 @@ puts "#{Time.now} Reading & scrubbing caches"
@connections.keys.each{|server|
first = @connections[server]["first"]
last = @connections[server]["last"]
#cache_scrub(cachedir, server)
cache_scrub(cachedir, server)
puts " #{Time.now} Reading cache for #{server}"
excludes[server] = {}
@connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true}
@ -876,14 +881,16 @@ puts " #{Time.now} Reading cache for #{server}"
if first <= id_i and id_i <= last
if ! excludes[server].has_key?(id_i)
outfile.puts(line)
if preselect(subject)
if date.to_i >= @oldestallowed && preselect(subject)
add(id_i, messid, date, from, subject, server)
end
# XXX alle traagheid van de cache_read zit in deze regel:
if date.to_i < @oldestallowed
@connections[server]["skip_ids"].insert!(id_i)
end
end
end
end
rescue EOFError
end
if ( FileUtils.move("#{filename}.#{server}.new", "#{filename}.#{server}") )
@ -935,11 +942,19 @@ p Time.now
outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing"
infile.each{ |line|
id, messid, date, subject = line.split("|", 3)
#puts "#{date.to_i} #{@oldestallowed}"
# XXX maybe also add to skipids ??
next if date.to_i < @oldestallowed
if id.to_i >= @connections[server]["first"] and
id.to_i <= @connections[server]["last"]
outfile.puts(line)
end
}
if ( FileUtils.move("#{filename}.#{server}.new", "#{filename}.#{server}") )
puts " #{Time.now} Cache scrubbed for #{server}"
else
puts "Couldn't scrub #{server} cache"
end
end
p Time.now
end

View file

@ -547,11 +547,17 @@ def get_multi(subj, group)
# puts "inside thread post pass\n"
if UUEncode.is_uuencoded(tbody)
puts " UUDecoding..."
begin
if tfile
tmode, tfilename, tbody = UUEncode.uudecode(tfile, tfileout)
else
tmode, tfilename, tbody = UUEncode.uudecode(tbody)
end
rescue Encoding::UndefinedConversionError
puts "#{$!}"
puts " Skipping article..."
Thread.current.exit
end
elsif YEnc.is_yencoded(tbody)
puts " YDecoding..."
begin
@ -683,7 +689,7 @@ def get_max_file_length(tempdir=".")
name = "a"*500
name = "#$$#{name}"
begin
file = File.new("#{tempdir}/#{name}", "w", 0644).close
File.new("#{tempdir}/#{name}", "w", 0644).close
FileUtils.rm("#{tempdir}/#{name}")
rescue Errno::ENAMETOOLONG
name = name[0...-1]
@ -774,6 +780,9 @@ def main
@decode_file_lock = Mutex.new
profile_mem("#{group} start")
puts "\nGetting articles for #{group}"
puts "nntpserver #{@config[group]["NNTPSERVER"]}"
puts "newsrcname #{@config[group]["NEWSRCNAME"]}"
puts "maxage #{@config[group]["MAXAGE"]}"
@articles = Article.new(@config[group]["NNTPSERVER"], group, @config[group]["NEWSRCNAME"], @config[group]["MAXAGE"])
fill_preselector(group)
puts "initialized"