From 35aa9e9e861dea76cf9e3ca88e52a8033edf172f Mon Sep 17 00:00:00 2001 From: Ward Wouts Date: Thu, 12 Mar 2020 10:02:02 +0100 Subject: [PATCH 1/6] allow for different ordering of items in ybegin/part lines. TODO generalise, this is ugly --- encode/yenc.rb | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/encode/yenc.rb b/encode/yenc.rb index b039b57..8424a9e 100644 --- a/encode/yenc.rb +++ b/encode/yenc.rb @@ -97,12 +97,14 @@ def _ydecode_file(file, outfile) if line.match(/^\=ybegin\s+(.*line\=.*)/) m = $1 puts " #{Thread.current.inspect} ybegin match; rest: #{m}" if Debuglevel > 0 - if m.match(/^\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(.*?\S))\s*$/) - part = $2.to_i - total = $4.to_i - linesize = $6.to_i - totalsize = $8.to_i - filename = $10 + print "full line: #{line}" if Debuglevel > 0 + if matchdata = m.match(/^\s*(part\=(?\d+)\s+)?(total\=(?\d+)\s+)?(line\=(?\d+))(\s*size\=(?\d+))(\s*name=(?.*?\S))\s*$/) or + matchdata = m.match(/^\s*(part\=(?\d+)\s+)?(total\=(?\d+)\s+)?(size\=(?\d+))(\s*line\=(?\d+))(\s*name=(?.*?\S))\s*$/) + part = matchdata[:part].to_i + total = matchdata[:total].to_i + linesize = matchdata[:linesize].to_i + totalsize = matchdata[:totalsize].to_i + filename = matchdata[:filename] if Debuglevel > 0 print "found beginning" if part != nil @@ -161,12 +163,13 @@ def _ydecode_file(file, outfile) closure = false m = $1 search_begin = false - if m.match(/^\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(.*?\S))\s*$/) - part = $2.to_i - total = $4.to_i - linesize = $6.to_i - totalsize = $8.to_i - filename = $10 + if matchdata = m.match(/^\s*(part\=(?\d+)\s+)?(total\=(?\d+)\s+)?(line\=(?\d+))(\s*size\=(?\d+))(\s*name=(?.*?\S))\s*$/) or + matchdata = m.match(/^\s*(part\=(?\d+)\s+)?(total\=(?\d+)\s+)?(size\=(?\d+))(\s*line\=(?\d+))(\s*name=(?.*?\S))\s*$/) + part = matchdata[:part].to_i + total = matchdata[:total].to_i + linesize = matchdata[:linesize].to_i + totalsize = matchdata[:totalsize].to_i + filename = matchdata[:filename] puts "found beginning of part #{part}, linesize = #{linesize}, size = #{totalsize}, filename = #{filename}" if Debuglevel > 0 end next @@ -230,13 +233,14 @@ def _ydecode_array(data) if data[i].match(/^\=ybegin\s+(.*line\=.*)/) m = $1 puts " #{Thread.current.inspect} ybegin match; rest: #{m}" if Debuglevel > 0 - if m.match(/^\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(.*?\S))\s*$/) - part = $2.to_i - total = $4.to_i - linesize = $6.to_i - size = $8.to_i - filename = $10 - puts " #{Thread.current.inspect} found beginning, linesize = #{linesize}, size = #{size}, filename = #{filename}" if Debuglevel > 0 + if matchdata = m.match(/^\s*(part\=(?\d+)\s+)?(total\=(?\d+)\s+)?(line\=(?\d+))(\s*size\=(?\d+))(\s*name=(?.*?\S))\s*$/) or + matchdata = m.match(/^\s*(part\=(?\d+)\s+)?(total\=(?\d+)\s+)?(size\=(?\d+))(\s*line\=(?\d+))(\s*name=(?.*?\S))\s*$/) + part = matchdata[:part].to_i + total = matchdata[:total].to_i + linesize = matchdata[:linesize].to_i + totalsize = matchdata[:totalsize].to_i + filename = matchdata[:filename] + puts " #{Thread.current.inspect} found beginning, linesize = #{linesize}, size = #{totalsize}, filename = #{filename}" if Debuglevel > 0 i += 1 break else @@ -317,8 +321,9 @@ def get_filename(data) i = 0 while i < data.length line = data[i] - if line.match(/=ybegin\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(.*?\S))\s*$/m) - return $10 + if matchdata = line.match(/=ybegin\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(?.*?\S))\s*$/m) or + matchdata = line.match(/=ybegin\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(size\=(\d+))(\s*line\=(\d+))(\s*name=(?.*?\S))\s*$/m) + return matchdata[:filename] end i += 1 end From 92d374d607f074b6221e00a2b8d2487ffb14336d Mon Sep 17 00:00:00 2001 From: ward Date: Thu, 12 Mar 2020 09:06:30 +0000 Subject: [PATCH 2/6] Update 'CHANGELOG.md' --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6cc72a..ec24c1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +ripnews-20200312 +- allow for different yenc line ordering + from 0.5.4 to ripnews-20200311 - add option to only rip one group - add option to list configured groups From 666a480c3f0483fb34d25096d3a58d68cbf0e46e Mon Sep 17 00:00:00 2001 From: Ward Wouts Date: Sun, 18 Oct 2020 16:08:17 +0200 Subject: [PATCH 3/6] be stricter on type --- news/article.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/news/article.rb b/news/article.rb index cbfe52a..5df84ef 100644 --- a/news/article.rb +++ b/news/article.rb @@ -331,7 +331,8 @@ def get_articles(cachedir=false) art[id] = {} unless art.has_key?(id) begin - lastdate = art[id]["date"] = DateTime.parse(date).strftime('%Y%m%d') + lastdate = DateTime.parse(date) + art[id]["date"] = DateTime.parse(date).strftime('%Y%m%d') rescue puts $!.message puts id @@ -361,7 +362,7 @@ def get_articles(cachedir=false) end } if @maxage and @maxage > 0 - if DateTime.parse(lastdate) < ( DateTime.now - @maxage ) + if lastdate < ( DateTime.now - @maxage ) puts "Skipping articles older than #{DateTime.now - @maxage}" break end From cde8e59f3ed13450d7cd44e26dbe6935fef4ffbc Mon Sep 17 00:00:00 2001 From: Ward Wouts Date: Fri, 12 Mar 2021 13:06:07 +0100 Subject: [PATCH 4/6] debugging crap --- ripnews.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ripnews.rb b/ripnews.rb index 1f81e12..035ba55 100755 --- a/ripnews.rb +++ b/ripnews.rb @@ -774,6 +774,9 @@ def main @decode_file_lock = Mutex.new profile_mem("#{group} start") puts "\nGetting articles for #{group}" +puts "nntpserver #{@config[group]["NNTPSERVER"]}" +puts "newsrcname #{@config[group]["NEWSRCNAME"]}" +puts "maxage #{@config[group]["MAXAGE"]}" @articles = Article.new(@config[group]["NNTPSERVER"], group, @config[group]["NEWSRCNAME"], @config[group]["MAXAGE"]) fill_preselector(group) puts "initialized" From 8c20f8435f4a583f856d86941e66909eed91f1bb Mon Sep 17 00:00:00 2001 From: Ward Wouts Date: Thu, 1 Apr 2021 14:35:36 +0200 Subject: [PATCH 5/6] be much more aggressive in scrubbing caches. --- news/article.rb | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/news/article.rb b/news/article.rb index 5df84ef..baded62 100644 --- a/news/article.rb +++ b/news/article.rb @@ -114,6 +114,10 @@ def initialize(nntpservers, groupname, newsrc="~/.newsrc", maxage=0) @serverlist = [] @serverpasses = {} @maxage = maxage.to_i + @oldestallowed = 0 + if @maxage != 0 + @oldestallowed = (DateTime.now - @maxage).strftime('%Y%m%d').to_i + end tmplist = nntpservers.split('|') tmplist.each{ |server| @@ -355,10 +359,12 @@ def get_articles(cachedir=false) # dit wellicht alleen doen indien preselector hem uitkiest # en anders een leuk regeltje aan de cache toevoegen, # maar niet in het geheugen houden - if preselect(art[id]["subject"]) + if art[id]["date"].to_i >= @oldestallowed && preselect(art[id]["subject"]) add(id.to_i, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server) end - cache_add(cachedir, id, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server) + if art[id]["date"].to_i >= @oldestallowed + cache_add(cachedir, id, art[id]["messid"], art[id]["date"], art[id]["from"], art[id]["subject"], server) + end end } if @maxage and @maxage > 0 @@ -826,6 +832,7 @@ def save_newsrc() end def cache_add(cachedir, id, messid, date, from, subject, server) + # also add to skip stuff if @cache_buf.has_key?(server) @cache_buf[server].push("#{id}|#{messid}|#{date}|#{from}|#{subject}\n") else @@ -850,7 +857,7 @@ puts "#{Time.now} Reading & scrubbing caches" @connections.keys.each{|server| first = @connections[server]["first"] last = @connections[server]["last"] - #cache_scrub(cachedir, server) + cache_scrub(cachedir, server) puts " #{Time.now} Reading cache for #{server}" excludes[server] = {} @connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true} @@ -877,11 +884,13 @@ puts " #{Time.now} Reading cache for #{server}" if first <= id_i and id_i <= last if ! excludes[server].has_key?(id_i) outfile.puts(line) - if preselect(subject) + if date.to_i >= @oldestallowed && preselect(subject) add(id_i, messid, date, from, subject, server) end # XXX alle traagheid van de cache_read zit in deze regel: - @connections[server]["skip_ids"].insert!(id_i) + if date.to_i < @oldestallowed + @connections[server]["skip_ids"].insert!(id_i) + end end end end @@ -936,11 +945,19 @@ p Time.now outfile = File.new("#{filename}.#{server}.new", "w") or puts "Couldn't open cachefile for writing" infile.each{ |line| id, messid, date, subject = line.split("|", 3) + #puts "#{date.to_i} #{@oldestallowed}" + # XXX maybe also add to skipids ?? + next if date.to_i < @oldestallowed if id.to_i >= @connections[server]["first"] and id.to_i <= @connections[server]["last"] outfile.puts(line) end } + if ( FileUtils.move("#{filename}.#{server}.new", "#{filename}.#{server}") ) + puts " #{Time.now} Cache scrubbed for #{server}" + else + puts "Couldn't scrub #{server} cache" + end end p Time.now end From cb321d68eda51b1cf4c525e6192e4aaf4e66d806 Mon Sep 17 00:00:00 2001 From: Ward Wouts Date: Thu, 9 Jan 2025 07:45:12 +0100 Subject: [PATCH 6/6] some fixes --- encode/uuencode.rb | 3 ++- news/article.rb | 19 ++++++++----------- ripnews.rb | 44 +++++++++++++++++++++++++------------------- 3 files changed, 35 insertions(+), 31 deletions(-) diff --git a/encode/uuencode.rb b/encode/uuencode.rb index 6058f65..77c4f67 100644 --- a/encode/uuencode.rb +++ b/encode/uuencode.rb @@ -90,7 +90,8 @@ def _uudecode_file(file, outfile) next if line =~ /[a-z]/ next if line == nil next unless ((((line[0].ord - 32) & 077) + 2) / 3).to_i == (line.length/4).to_i - line.unpack("u").each{|x| outfile.print x} + #line.unpack("u").each{|x| outfile.print x} + line.unpack("u").each{|x| outfile.write x} end puts "No \"end\" found!!!" diff --git a/news/article.rb b/news/article.rb index baded62..8118343 100644 --- a/news/article.rb +++ b/news/article.rb @@ -1,6 +1,3 @@ -# $Dwarf: article.rb,v 1.114 2005/05/12 07:39:53 ward Exp $ -# $Source$ - # # Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Ward Wouts # @@ -23,7 +20,7 @@ require Pathname.new(__FILE__).dirname + '../news/newsrc' require 'tempfile' require 'timeout' #require 'yaml' -require 'profiler' +#require 'profiler' # removed from ruby? class ArticleError < RuntimeError; end class TempError < ArticleError; end @@ -152,7 +149,7 @@ def initialize(nntpservers, groupname, newsrc="~/.newsrc", maxage=0) # p server # p Time.now begin - timeout(60) do + Timeout.timeout(60) do #p "connecting" @connections[server]["nntp"] = Net::KANNTP.new(server, 119, @serverpasses[server]['user'], @serverpasses[server]['pass']) end @@ -180,7 +177,7 @@ def reconnect(server) retries = 0 begin puts "Trying to kill old connection #{Time.now}" - timeout(10) do + Timeout.timeout(10) do @connections[server]["nntp"].quit end puts "Killed old connection #{Time.now}" @@ -192,7 +189,7 @@ def reconnect(server) puts "Trying to reconnect #{Time.now}" sleep 3 #timeout(180) do - timeout(60) do + Timeout.timeout(60) do @connections[server]["nntp"] = Net::KANNTP.new(server, 119, @serverpasses[server]['user'], @serverpasses[server]['pass']) end resp = @connections[server]["nntp"].mode_reader @@ -391,7 +388,7 @@ def get_group_info(server) first = "" last = "" begin - timeout(30) do + Timeout.timeout(30) do begin resp, count, first, last, name = @connections[server]["nntp"].group(@group) rescue Net::NNTPReplyError @@ -425,7 +422,7 @@ def get_xhdr(server, range, header) resp = "" lines = [] begin - timeout(180) do + Timeout.timeout(180) do begin p Time.now if Debuglevel > 1 puts "getting headers: #{header}, #{range}" if Debuglevel > 1 @@ -481,7 +478,7 @@ def get_xover(server, range) ed = start end begin - timeout(180) do + Timeout.timeout(180) do begin p Time.now if Debuglevel > 1 puts "getting headers: #{range}" if Debuglevel > 1 @@ -539,7 +536,7 @@ def get_body(server, message) messid = "" list = [] begin - timeout(180) do + Timeout.timeout(180) do begin list = [] resp, id, messid, list = @connections[server]["nntp"].body(message) diff --git a/ripnews.rb b/ripnews.rb index 035ba55..2af18c7 100755 --- a/ripnews.rb +++ b/ripnews.rb @@ -50,20 +50,20 @@ def aprofile_mem(group) groups = {} ObjectSpace.each_object { |x| if not [Array,Hash].include? x.class - e = nil - begin - e = MEntry.new( x.class, Marshal::dump(x).size ) - rescue TypeError # undumpable - e = MEntry.new( x.class, 0 ) + e = nil + begin + e = MEntry.new( x.class, Marshal::dump(x).size ) + rescue TypeError # undumpable + e = MEntry.new( x.class, 0 ) + end + if groups.has_key? e.c + groups[e.c].mem += e.mem + groups[e.c].total += 1 + else + groups[e.c] = GroupEntry.new( e.c, e.mem, 1 ) + end end - if groups.has_key? e.c - groups[e.c].mem += e.mem - groups[e.c].total += 1 - else - groups[e.c] = GroupEntry.new( e.c, e.mem, 1 ) - end - end - } + } File.open( "mem_log", "a+" ) { |file| file << "Group #{group}\n" total = 0 @@ -547,11 +547,17 @@ def get_multi(subj, group) # puts "inside thread post pass\n" if UUEncode.is_uuencoded(tbody) puts " UUDecoding..." - if tfile - tmode, tfilename, tbody = UUEncode.uudecode(tfile, tfileout) - else - tmode, tfilename, tbody = UUEncode.uudecode(tbody) - end + begin + if tfile + tmode, tfilename, tbody = UUEncode.uudecode(tfile, tfileout) + else + tmode, tfilename, tbody = UUEncode.uudecode(tbody) + end + rescue Encoding::UndefinedConversionError + puts "#{$!}" + puts " Skipping article..." + Thread.current.exit + end elsif YEnc.is_yencoded(tbody) puts " YDecoding..." begin @@ -683,7 +689,7 @@ def get_max_file_length(tempdir=".") name = "a"*500 name = "#$$#{name}" begin - file = File.new("#{tempdir}/#{name}", "w", 0644).close + File.new("#{tempdir}/#{name}", "w", 0644).close FileUtils.rm("#{tempdir}/#{name}") rescue Errno::ENAMETOOLONG name = name[0...-1]