Compare commits

..

No commits in common. "master" and "ripnews-20200311" have entirely different histories.

5 changed files with 59 additions and 92 deletions

View file

@ -1,6 +1,3 @@
ripnews-20200312
- allow for different yenc line ordering
from 0.5.4 to ripnews-20200311 from 0.5.4 to ripnews-20200311
- add option to only rip one group - add option to only rip one group
- add option to list configured groups - add option to list configured groups

View file

@ -90,8 +90,7 @@ def _uudecode_file(file, outfile)
next if line =~ /[a-z]/ next if line =~ /[a-z]/
next if line == nil next if line == nil
next unless ((((line[0].ord - 32) & 077) + 2) / 3).to_i == (line.length/4).to_i next unless ((((line[0].ord - 32) & 077) + 2) / 3).to_i == (line.length/4).to_i
#line.unpack("u").each{|x| outfile.print x} line.unpack("u").each{|x| outfile.print x}
line.unpack("u").each{|x| outfile.write x}
end end
puts "No \"end\" found!!!" puts "No \"end\" found!!!"

View file

@ -97,14 +97,12 @@ def _ydecode_file(file, outfile)
if line.match(/^\=ybegin\s+(.*line\=.*)/) if line.match(/^\=ybegin\s+(.*line\=.*)/)
m = $1 m = $1
puts " #{Thread.current.inspect} ybegin match; rest: #{m}" if Debuglevel > 0 puts " #{Thread.current.inspect} ybegin match; rest: #{m}" if Debuglevel > 0
print "full line: #{line}" if Debuglevel > 0 if m.match(/^\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(.*?\S))\s*$/)
if matchdata = m.match(/^\s*(part\=(?<part>\d+)\s+)?(total\=(?<total>\d+)\s+)?(line\=(?<linesize>\d+))(\s*size\=(?<totalsize>\d+))(\s*name=(?<filename>.*?\S))\s*$/) or part = $2.to_i
matchdata = m.match(/^\s*(part\=(?<part>\d+)\s+)?(total\=(?<total>\d+)\s+)?(size\=(?<totalsize>\d+))(\s*line\=(?<linesize>\d+))(\s*name=(?<filename>.*?\S))\s*$/) total = $4.to_i
part = matchdata[:part].to_i linesize = $6.to_i
total = matchdata[:total].to_i totalsize = $8.to_i
linesize = matchdata[:linesize].to_i filename = $10
totalsize = matchdata[:totalsize].to_i
filename = matchdata[:filename]
if Debuglevel > 0 if Debuglevel > 0
print "found beginning" print "found beginning"
if part != nil if part != nil
@ -163,13 +161,12 @@ def _ydecode_file(file, outfile)
closure = false closure = false
m = $1 m = $1
search_begin = false search_begin = false
if matchdata = m.match(/^\s*(part\=(?<part>\d+)\s+)?(total\=(?<total>\d+)\s+)?(line\=(?<linesize>\d+))(\s*size\=(?<totalsize>\d+))(\s*name=(?<filename>.*?\S))\s*$/) or if m.match(/^\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(.*?\S))\s*$/)
matchdata = m.match(/^\s*(part\=(?<part>\d+)\s+)?(total\=(?<total>\d+)\s+)?(size\=(?<totalsize>\d+))(\s*line\=(?<linesize>\d+))(\s*name=(?<filename>.*?\S))\s*$/) part = $2.to_i
part = matchdata[:part].to_i total = $4.to_i
total = matchdata[:total].to_i linesize = $6.to_i
linesize = matchdata[:linesize].to_i totalsize = $8.to_i
totalsize = matchdata[:totalsize].to_i filename = $10
filename = matchdata[:filename]
puts "found beginning of part #{part}, linesize = #{linesize}, size = #{totalsize}, filename = #{filename}" if Debuglevel > 0 puts "found beginning of part #{part}, linesize = #{linesize}, size = #{totalsize}, filename = #{filename}" if Debuglevel > 0
end end
next next
@ -233,14 +230,13 @@ def _ydecode_array(data)
if data[i].match(/^\=ybegin\s+(.*line\=.*)/) if data[i].match(/^\=ybegin\s+(.*line\=.*)/)
m = $1 m = $1
puts " #{Thread.current.inspect} ybegin match; rest: #{m}" if Debuglevel > 0 puts " #{Thread.current.inspect} ybegin match; rest: #{m}" if Debuglevel > 0
if matchdata = m.match(/^\s*(part\=(?<part>\d+)\s+)?(total\=(?<total>\d+)\s+)?(line\=(?<linesize>\d+))(\s*size\=(?<totalsize>\d+))(\s*name=(?<filename>.*?\S))\s*$/) or if m.match(/^\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(.*?\S))\s*$/)
matchdata = m.match(/^\s*(part\=(?<part>\d+)\s+)?(total\=(?<total>\d+)\s+)?(size\=(?<totalsize>\d+))(\s*line\=(?<linesize>\d+))(\s*name=(?<filename>.*?\S))\s*$/) part = $2.to_i
part = matchdata[:part].to_i total = $4.to_i
total = matchdata[:total].to_i linesize = $6.to_i
linesize = matchdata[:linesize].to_i size = $8.to_i
totalsize = matchdata[:totalsize].to_i filename = $10
filename = matchdata[:filename] puts " #{Thread.current.inspect} found beginning, linesize = #{linesize}, size = #{size}, filename = #{filename}" if Debuglevel > 0
puts " #{Thread.current.inspect} found beginning, linesize = #{linesize}, size = #{totalsize}, filename = #{filename}" if Debuglevel > 0
i += 1 i += 1
break break
else else
@ -321,9 +317,8 @@ def get_filename(data)
i = 0 i = 0
while i < data.length while i < data.length
line = data[i] line = data[i]
if matchdata = line.match(/=ybegin\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(?<filename>.*?\S))\s*$/m) or if line.match(/=ybegin\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(.*?\S))\s*$/m)
matchdata = line.match(/=ybegin\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(size\=(\d+))(\s*line\=(\d+))(\s*name=(?<filename>.*?\S))\s*$/m) return $10
return matchdata[:filename]
end end
i += 1 i += 1
end end

View file

@ -1,3 +1,6 @@
# $Dwarf: article.rb,v 1.114 2005/05/12 07:39:53 ward Exp $
# $Source$
# #
# Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Ward Wouts <ward@wouts.nl> # Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Ward Wouts <ward@wouts.nl>
# #
@ -20,7 +23,7 @@ require Pathname.new(__FILE__).dirname + '../news/newsrc'
require 'tempfile' require 'tempfile'
require 'timeout' require 'timeout'
#require 'yaml' #require 'yaml'
#require 'profiler' # removed from ruby? require 'profiler'
class ArticleError < RuntimeError; end class ArticleError < RuntimeError; end
class TempError < ArticleError; end class TempError < ArticleError; end
@ -111,10 +114,6 @@ def initialize(nntpservers, groupname, newsrc="~/.newsrc", maxage=0)
@serverlist = [] @serverlist = []
@serverpasses = {} @serverpasses = {}
@maxage = maxage.to_i @maxage = maxage.to_i
@oldestallowed = 0
if @maxage != 0
@oldestallowed = (DateTime.now - @maxage).strftime('%Y%m%d').to_i
end
tmplist = nntpservers.split('|') tmplist = nntpservers.split('|')
tmplist.each{ |server| tmplist.each{ |server|
@ -149,7 +148,7 @@ def initialize(nntpservers, groupname, newsrc="~/.newsrc", maxage=0)
# p server # p server
# p Time.now # p Time.now
begin begin
Timeout.timeout(60) do timeout(60) do
#p "connecting" #p "connecting"
@connections[server]["nntp"] = Net::KANNTP.new(server, 119, @serverpasses[server]['user'], @serverpasses[server]['pass']) @connections[server]["nntp"] = Net::KANNTP.new(server, 119, @serverpasses[server]['user'], @serverpasses[server]['pass'])
end end
@ -177,7 +176,7 @@ def reconnect(server)
retries = 0 retries = 0
begin begin
puts "Trying to kill old connection #{Time.now}" puts "Trying to kill old connection #{Time.now}"
Timeout.timeout(10) do timeout(10) do
@connections[server]["nntp"].quit @connections[server]["nntp"].quit
end end
puts "Killed old connection #{Time.now}" puts "Killed old connection #{Time.now}"
@ -189,7 +188,7 @@ def reconnect(server)
puts "Trying to reconnect #{Time.now}" puts "Trying to reconnect #{Time.now}"
sleep 3 sleep 3
#timeout(180) do #timeout(180) do
Timeout.timeout(60) do timeout(60) do
@connections[server]["nntp"] = Net::KANNTP.new(server, 119, @serverpasses[server]['user'], @serverpasses[server]['pass']) @connections[server]["nntp"] = Net::KANNTP.new(server, 119, @serverpasses[server]['user'], @serverpasses[server]['pass'])
end end
resp = @connections[server]["nntp"].mode_reader resp = @connections[server]["nntp"].mode_reader
@ -332,8 +331,7 @@ def get_articles(cachedir=false)
art[id] = {} unless art.has_key?(id) art[id] = {} unless art.has_key?(id)
begin begin
lastdate = DateTime.parse(date) lastdate = art[id]["date"] = DateTime.parse(date).strftime('%Y%m%d')
art[id]["date"] = DateTime.parse(date).strftime('%Y%m%d')
rescue rescue
puts $!.message puts $!.message
puts id puts id
@ -356,16 +354,14 @@ def get_articles(cachedir=false)
# dit wellicht alleen doen indien preselector hem uitkiest # dit wellicht alleen doen indien preselector hem uitkiest
# en anders een leuk regeltje aan de cache toevoegen, # en anders een leuk regeltje aan de cache toevoegen,
# maar niet in het geheugen houden # maar niet in het geheugen houden
if art[id]["date"].to_i >= @oldestallowed && preselect(art[id]["subject"]) if preselect(art[id]["subject"])
add(id.to_i, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server) add(id.to_i, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server)
end end
if art[id]["date"].to_i >= @oldestallowed cache_add(cachedir, id, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server)
cache_add(cachedir, id, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server)
end
end end
} }
if @maxage and @maxage > 0 if @maxage and @maxage > 0
if lastdate < ( DateTime.now - @maxage ) if DateTime.parse(lastdate) < ( DateTime.now - @maxage )
puts "Skipping articles older than #{DateTime.now - @maxage}" puts "Skipping articles older than #{DateTime.now - @maxage}"
break break
end end
@ -388,7 +384,7 @@ def get_group_info(server)
first = "" first = ""
last = "" last = ""
begin begin
Timeout.timeout(30) do timeout(30) do
begin begin
resp, count, first, last, name = @connections[server]["nntp"].group(@group) resp, count, first, last, name = @connections[server]["nntp"].group(@group)
rescue Net::NNTPReplyError rescue Net::NNTPReplyError
@ -422,7 +418,7 @@ def get_xhdr(server, range, header)
resp = "" resp = ""
lines = [] lines = []
begin begin
Timeout.timeout(180) do timeout(180) do
begin begin
p Time.now if Debuglevel > 1 p Time.now if Debuglevel > 1
puts "getting headers: #{header}, #{range}" if Debuglevel > 1 puts "getting headers: #{header}, #{range}" if Debuglevel > 1
@ -478,7 +474,7 @@ def get_xover(server, range)
ed = start ed = start
end end
begin begin
Timeout.timeout(180) do timeout(180) do
begin begin
p Time.now if Debuglevel > 1 p Time.now if Debuglevel > 1
puts "getting headers: #{range}" if Debuglevel > 1 puts "getting headers: #{range}" if Debuglevel > 1
@ -536,7 +532,7 @@ def get_body(server, message)
messid = "" messid = ""
list = [] list = []
begin begin
Timeout.timeout(180) do timeout(180) do
begin begin
list = [] list = []
resp, id, messid, list = @connections[server]["nntp"].body(message) resp, id, messid, list = @connections[server]["nntp"].body(message)
@ -829,7 +825,6 @@ def save_newsrc()
end end
def cache_add(cachedir, id, messid, date, from, subject, server) def cache_add(cachedir, id, messid, date, from, subject, server)
# also add to skip stuff
if @cache_buf.has_key?(server) if @cache_buf.has_key?(server)
@cache_buf[server].push("#{id}|#{messid}|#{date}|#{from}|#{subject}\n") @cache_buf[server].push("#{id}|#{messid}|#{date}|#{from}|#{subject}\n")
else else
@ -854,7 +849,7 @@ puts "#{Time.now} Reading & scrubbing caches"
@connections.keys.each{|server| @connections.keys.each{|server|
first = @connections[server]["first"] first = @connections[server]["first"]
last = @connections[server]["last"] last = @connections[server]["last"]
cache_scrub(cachedir, server) #cache_scrub(cachedir, server)
puts " #{Time.now} Reading cache for #{server}" puts " #{Time.now} Reading cache for #{server}"
excludes[server] = {} excludes[server] = {}
@connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true} @connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true}
@ -881,13 +876,11 @@ puts " #{Time.now} Reading cache for #{server}"
if first <= id_i and id_i <= last if first <= id_i and id_i <= last
if ! excludes[server].has_key?(id_i) if ! excludes[server].has_key?(id_i)
outfile.puts(line) outfile.puts(line)
if date.to_i >= @oldestallowed && preselect(subject) if preselect(subject)
add(id_i, messid, date, from, subject, server) add(id_i, messid, date, from, subject, server)
end end
# XXX alle traagheid van de cache_read zit in deze regel: # XXX alle traagheid van de cache_read zit in deze regel:
if date.to_i < @oldestallowed @connections[server]["skip_ids"].insert!(id_i)
@connections[server]["skip_ids"].insert!(id_i)
end
end end
end end
end end
@ -942,19 +935,11 @@ p Time.now
outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing" outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing"
infile.each{ |line| infile.each{ |line|
id, messid, date, subject = line.split("|", 3) id, messid, date, subject = line.split("|", 3)
#puts "#{date.to_i} #{@oldestallowed}"
# XXX maybe also add to skipids ??
next if date.to_i < @oldestallowed
if id.to_i >= @connections[server]["first"] and if id.to_i >= @connections[server]["first"] and
id.to_i <= @connections[server]["last"] id.to_i <= @connections[server]["last"]
outfile.puts(line) outfile.puts(line)
end end
} }
if ( FileUtils.move("#{filename}.#{server}.new", "#{filename}.#{server}") )
puts " #{Time.now} Cache scrubbed for #{server}"
else
puts "Couldn't scrub #{server} cache"
end
end end
p Time.now p Time.now
end end

View file

@ -50,20 +50,20 @@ def aprofile_mem(group)
groups = {} groups = {}
ObjectSpace.each_object { |x| ObjectSpace.each_object { |x|
if not [Array,Hash].include? x.class if not [Array,Hash].include? x.class
e = nil e = nil
begin begin
e = MEntry.new( x.class, Marshal::dump(x).size ) e = MEntry.new( x.class, Marshal::dump(x).size )
rescue TypeError # undumpable rescue TypeError # undumpable
e = MEntry.new( x.class, 0 ) e = MEntry.new( x.class, 0 )
end
if groups.has_key? e.c
groups[e.c].mem += e.mem
groups[e.c].total += 1
else
groups[e.c] = GroupEntry.new( e.c, e.mem, 1 )
end
end end
} if groups.has_key? e.c
groups[e.c].mem += e.mem
groups[e.c].total += 1
else
groups[e.c] = GroupEntry.new( e.c, e.mem, 1 )
end
end
}
File.open( "mem_log", "a+" ) { |file| File.open( "mem_log", "a+" ) { |file|
file << "Group #{group}\n" file << "Group #{group}\n"
total = 0 total = 0
@ -547,17 +547,11 @@ def get_multi(subj, group)
# puts "inside thread post pass\n" # puts "inside thread post pass\n"
if UUEncode.is_uuencoded(tbody) if UUEncode.is_uuencoded(tbody)
puts " UUDecoding..." puts " UUDecoding..."
begin if tfile
if tfile tmode, tfilename, tbody = UUEncode.uudecode(tfile, tfileout)
tmode, tfilename, tbody = UUEncode.uudecode(tfile, tfileout) else
else tmode, tfilename, tbody = UUEncode.uudecode(tbody)
tmode, tfilename, tbody = UUEncode.uudecode(tbody) end
end
rescue Encoding::UndefinedConversionError
puts "#{$!}"
puts " Skipping article..."
Thread.current.exit
end
elsif YEnc.is_yencoded(tbody) elsif YEnc.is_yencoded(tbody)
puts " YDecoding..." puts " YDecoding..."
begin begin
@ -689,7 +683,7 @@ def get_max_file_length(tempdir=".")
name = "a"*500 name = "a"*500
name = "#$$#{name}" name = "#$$#{name}"
begin begin
File.new("#{tempdir}/#{name}", "w", 0644).close file = File.new("#{tempdir}/#{name}", "w", 0644).close
FileUtils.rm("#{tempdir}/#{name}") FileUtils.rm("#{tempdir}/#{name}")
rescue Errno::ENAMETOOLONG rescue Errno::ENAMETOOLONG
name = name[0...-1] name = name[0...-1]
@ -780,9 +774,6 @@ def main
@decode_file_lock = Mutex.new @decode_file_lock = Mutex.new
profile_mem("#{group} start") profile_mem("#{group} start")
puts "\nGetting articles for #{group}" puts "\nGetting articles for #{group}"
puts "nntpserver #{@config[group]["NNTPSERVER"]}"
puts "newsrcname #{@config[group]["NEWSRCNAME"]}"
puts "maxage #{@config[group]["MAXAGE"]}"
@articles = Article.new(@config[group]["NNTPSERVER"], group, @config[group]["NEWSRCNAME"], @config[group]["MAXAGE"]) @articles = Article.new(@config[group]["NNTPSERVER"], group, @config[group]["NEWSRCNAME"], @config[group]["MAXAGE"])
fill_preselector(group) fill_preselector(group)
puts "initialized" puts "initialized"