ripnews/ripnews.rb

941 lines
25 KiB
Ruby
Raw Normal View History

2014-01-04 19:20:47 +00:00
#!/usr/bin/env ruby
2002-04-27 20:31:59 +00:00
2005-06-20 07:29:14 +00:00
# $Dwarf: ripnews.rb,v 1.107 2005/06/06 12:53:10 ward Exp $
2002-04-27 20:34:15 +00:00
# $Source$
2003-07-20 20:32:24 +00:00
#
2008-07-28 19:51:15 +00:00
# Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Ward Wouts <ward@wouts.nl>
2003-07-20 20:32:24 +00:00
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
2002-04-27 20:31:59 +00:00
require 'date'
2003-06-16 10:56:11 +00:00
require 'ftools'
2003-04-28 09:57:30 +00:00
require 'time'
2002-04-27 20:31:59 +00:00
require 'getoptlong'
require 'pathname'
require Pathname.new(__FILE__).dirname + 'news/article'
require Pathname.new(__FILE__).dirname + 'news/newsrc'
require 'tempfile'
2005-01-30 14:47:00 +00:00
require 'thread'
2005-03-01 19:56:56 +00:00
require 'thwait'
require Pathname.new(__FILE__).dirname + 'encode/uuencode'
require Pathname.new(__FILE__).dirname + 'encode/yenc'
2002-04-27 20:31:59 +00:00
###########################################################################
2002-04-27 20:31:59 +00:00
###########################################################################
# memory profiling stuff
MEntry = Struct.new( "MEntry", :c, :mem )
class MEntry; def to_s() "#{c} : #{mem}"; end; end
GroupEntry = Struct.new( "GroupEntry", :c, :mem, :total )
class GroupEntry; def to_s() "#{mem}\t\t#{c} x#{total}"; end; end
def profile_mem(group)
end
def aprofile_mem(group)
t = Thread.new {
groups = {}
ObjectSpace.each_object { |x|
if not [Array,Hash].include? x.class
e = nil
begin
e = MEntry.new( x.class, Marshal::dump(x).size )
rescue TypeError # undumpable
e = MEntry.new( x.class, 0 )
end
if groups.has_key? e.c
groups[e.c].mem += e.mem
groups[e.c].total += 1
else
groups[e.c] = GroupEntry.new( e.c, e.mem, 1 )
end
end
}
File.open( "mem_log", "a+" ) { |file|
file << "Group #{group}\n"
total = 0
file << "bytes/class/count\n"
groups.to_a.sort_by { |e| e[1].mem }.each { |e|
file << "#{e[1]}\n"; total += e[1].mem }
file << "TOTAL == #{total}\n\n"
}
}
sleep 10
t.join
end
###########################################################################
2016-01-21 21:12:10 +00:00
Debuglevel = 1
@tstart = Time.now
2002-04-27 20:31:59 +00:00
def save_file(dir, name, data)
2008-02-04 21:37:09 +00:00
puts "savename: #{name}" if Debuglevel > 1
2005-08-02 20:34:59 +00:00
2008-07-28 19:51:15 +00:00
nname = filename_sanitize(name)
2005-08-02 20:34:59 +00:00
newname = nname[0...@maxfilelength] # all the recodings could have made
# the filename too long
2002-04-27 20:31:59 +00:00
count = 1
d = Date.today
date = "#{d.year}#{d.month}#{d.mday}"
while FileTest.exists?("#{dir}/#{newname}")
newname = "#{nname}-<#{date}.#{count}>"
count += 1
end
2008-02-04 21:37:09 +00:00
puts "name: #{newname}" if Debuglevel > 1
2002-04-27 20:31:59 +00:00
2005-08-02 20:34:59 +00:00
begin
case data.class.to_s
when "String"
2008-07-28 19:51:15 +00:00
puts " Moving #{data} to #{dir}/#{newname}"
begin
FileUtils.mv(data, "#{dir}/#{newname}")
2008-07-28 19:51:15 +00:00
puts " Saved as: '#{newname}'"
rescue
2008-02-04 21:37:09 +00:00
puts "couldn't rename tempfile"
2002-08-05 21:15:02 +00:00
return false
end
2005-08-02 20:34:59 +00:00
when "Array"
2008-07-28 19:51:15 +00:00
puts " Saving to #{dir}/#{newname}"
2016-01-21 21:12:10 +00:00
if file = File.new("#{dir}/#{newname}", "w:ascii-8bit", 0644)
2008-07-28 19:51:15 +00:00
puts " Saved as: '#{newname}'"
2016-01-21 21:12:10 +00:00
data.collect{|i| file.write i }
2005-06-20 07:29:14 +00:00
else
2008-02-04 21:37:09 +00:00
puts "couldn't open file for writeing"
2005-06-20 07:29:14 +00:00
return false
end
2005-08-02 20:34:59 +00:00
when "Tempfile"
2008-07-28 19:51:15 +00:00
puts " Moving #{data} to #{dir}/#{newname}"
begin
FileUtils.mv(data.path, "#{dir}/#{newname}")
2008-07-28 19:51:15 +00:00
puts " Saved as: '#{newname}'"
rescue
2008-02-04 21:37:09 +00:00
puts "couldn't rename tempfile #{data.path}"
return false
end
2005-08-02 20:34:59 +00:00
else
2008-02-04 21:37:09 +00:00
puts "EEEEPS Can't save data of class: #{data.class.to_s}"
return false
2005-08-02 20:34:59 +00:00
end
rescue Errno::ENOENT, Errno::EINVAL
2008-02-04 21:37:09 +00:00
puts "Caught #{$!.class} (save_file)"
puts "Error: #{$!}"
puts "What the *beep* happened?"
puts "(if Errno::EINVAL probably evil chars in filename)"
if data.class.to_s == "Tempfile"
puts "keeping #{data.path}"
end
2005-08-02 20:34:59 +00:00
return false
2002-04-27 20:31:59 +00:00
end
2002-05-05 22:18:11 +00:00
return true
2002-04-27 20:31:59 +00:00
end
2008-07-28 19:51:15 +00:00
def filename_sanitize(nname)
# XXX I really have to find something a bit more elegant for this
# XXX this is a mac problem, should be configurable!!!!
group = @config.keys[0]
if @config[group].has_key?("ASCIIFILENAMES") && @config[group]["ASCIIFILENAMES"]
begin
# hmmm, misschien is de begin waarde wel gewoon geen UTF-8, maar 8-bit ASCII
# wat de mac filesystems nog slechter maakt overigens
#nname = Iconv.conv("ASCII//TRANSLIT", "UTF-8", nname)
nname.encode!("US-ASCII")
rescue
# If it gets here, the original encoding is not UTF-8 of ISO-8859-1
# so the following probably won't help
puts "String#encode couldn't handle: '#{nname}'"
nname.gsub!(/\\\d+/, "#")
#nname.gsub!(/<2F>/, "\"a")
#nname.gsub!(/<2F>/, "\"e")
#nname.gsub!(/<2F>/, "\"i")
#nname.gsub!(/<2F>/, "\"o")
#nname.gsub!(/<2F>/, "\"u")
#nname.gsub!(/<2F>/, "~n")
nname.gsub!(/#{86.chr}/, "U")
nname.gsub!(/#{151.chr}/, " ")
nname.gsub!(/#{161.chr}/, "i")
nname.gsub!(/#{164.chr}/, "")
nname.gsub!(/#{171.chr}/, " ")
nname.gsub!(/#{180.chr}/, " ")
nname.gsub!(/#{183.chr}/, " ")
nname.gsub!(/#{187.chr}/, " ")
nname.gsub!(/#{227.chr}/, "~a")
nname.gsub!(/#{229.chr}/, "a")
nname.gsub!(/#{231.chr}/, "c")
nname.gsub!(/#{243.chr}/, "o")
nname.gsub!(/#{247.chr}/, "S")
2008-07-28 19:51:15 +00:00
end
# shit that gets through...
nname.gsub!('#{146.chr}', "")
end
# I think the new iconv handling should fix this
# and it really doesn't belong on non apple machines
# nname.gsub!(/<2F>/, "(c)")
# nname.gsub!(/<2F>/, "(r)")
# nname.gsub!(/<2F>/, "A"); nname.gsub!(/<2F>/, "a")
# nname.gsub!(/<2F>/, "A"); nname.gsub!(/<2F>/, "a")
# nname.gsub!(/<2F>/, "A"); nname.gsub!(/<2F>/, "a")
# nname.gsub!(/<2F>/, "A"); nname.gsub!(/<2F>/, "a")
#
# nname.gsub!(/<2F>/, "E"); nname.gsub!(/<2F>/, "e")
# nname.gsub!(/<2F>/, "E"); nname.gsub!(/<2F>/, "e")
# nname.gsub!(/<2F>/, "E"); nname.gsub!(/<2F>/, "e")
# nname.gsub!(/<2F>/, "E"); nname.gsub!(/<2F>/, "e")
#
# nname.gsub!(/<2F>/, "I"); nname.gsub!(/<2F>/, "i")
# nname.gsub!(/<2F>/, "I"); nname.gsub!(/<2F>/, "i")
# nname.gsub!(/<2F>/, "I"); nname.gsub!(/<2F>/, "i")
# nname.gsub!(/<2F>/, "I"); nname.gsub!(/<2F>/, "i")
#
# nname.gsub!(/<2F>/, "O"); nname.gsub!(/<2F>/, "o")
# nname.gsub!(/<2F>/, "O"); nname.gsub!(/<2F>/, "o")
# nname.gsub!(/<2F>/, "O"); nname.gsub!(/<2F>/, "o")
# nname.gsub!(/<2F>/, "O"); nname.gsub!(/<2F>/, "o")
#
# nname.gsub!(/<2F>/, "U"); nname.gsub!(/<2F>/, "u")
# nname.gsub!(/<2F>/, "U"); nname.gsub!(/<2F>/, "u")
# nname.gsub!(/<2F>/, "U"); nname.gsub!(/<2F>/, "u")
# nname.gsub!(/<2F>/, "U"); nname.gsub!(/<2F>/, "u")
#
# nname.gsub!(/<2F>/, "S"); nname.gsub!(/<2F>/, "L")
# nname.gsub!(/<2F>/, "ss")
# dangerous chars
nname = nname.gsub(/\//, "-")
nname.gsub!(/>/, "")
nname.gsub!(/</, "")
# just annoying
nname.sub!(/\s*$/, "")
nname.sub!(/^[\s\.-]*/, "")
puts "nname: #{nname}" if Debuglevel > 1
return nname
end
2002-04-30 14:08:14 +00:00
def parse_options(options)
2002-04-27 20:31:59 +00:00
begin
2002-05-18 23:12:59 +00:00
opts = GetoptLong.new(
[ "-I", "--include", GetoptLong::REQUIRED_ARGUMENT ],
[ "-c", "--configfile", GetoptLong::REQUIRED_ARGUMENT ],
[ "-g", "--group", GetoptLong::REQUIRED_ARGUMENT ],
[ "-h", "--help", GetoptLong::NO_ARGUMENT ],
[ "-l", "--list", GetoptLong::NO_ARGUMENT ],
2002-05-18 23:12:59 +00:00
[ "-L", "--longname", GetoptLong::NO_ARGUMENT ],
[ "-C", "--combinedname", GetoptLong::NO_ARGUMENT ],
[ "-M", "--multipart", GetoptLong::NO_ARGUMENT ],
2005-03-01 09:18:50 +00:00
[ "-s", GetoptLong::NO_ARGUMENT ],
2002-05-18 23:12:59 +00:00
[ "-S", "--singlepart", GetoptLong::NO_ARGUMENT ],
[ "-T", "--test", GetoptLong::NO_ARGUMENT ],
[ "-X", "--exclude", GetoptLong::REQUIRED_ARGUMENT ]
2002-05-18 23:12:59 +00:00
)
opts.quiet=true
2002-04-27 20:31:59 +00:00
2002-05-18 23:12:59 +00:00
opts.each do |opt, arg|
options[opt] = arg
end
2002-07-04 22:28:26 +00:00
rescue GetoptLong::InvalidOption
2008-02-04 21:37:09 +00:00
puts "#{$!}"
2002-07-04 22:28:26 +00:00
usage
2002-04-27 20:31:59 +00:00
end
options["-h"] && usage
2002-04-27 20:31:59 +00:00
return options
end
2002-07-04 22:28:26 +00:00
def usage
2008-02-04 21:37:09 +00:00
puts <<EOT
Usage:
ripnews.rb [-I <pattern>] [-c <file>] [-g <group] [-L] [-C] [-M] [-S] [-T] [-X <pattern>]
-I <pattern>, --include <pattern> specify an include pattern
-c <file>, --configfile <file> specify an alternate configfile
-g <group>, --group <group> only rip specified group
-h, --help display this help and exit
-l, --list list configured groups and exit
-L, --longname use subject as filename
-C, --combinedname use combined filenames
-M, --multipart get multipart articles
-s exit silently if already running
-S, --singlepart get singlepart articles
-T, --test test mode, don't update newsrc file
-X <pattern>, --exclude <pattern> specify an exclude pattern
2008-02-04 21:37:09 +00:00
EOT
2002-07-04 22:28:26 +00:00
exit
end
2002-04-30 14:08:14 +00:00
def parse_config(default = {})
2005-03-01 09:18:50 +00:00
if ! default.has_key?('-s')
2008-02-04 21:37:09 +00:00
puts "Parsing config"
puts "#{default['-c']}"
2005-03-01 09:18:50 +00:00
end
2004-06-16 08:17:48 +00:00
if FileTest.readable?("#{default['-c']}")
file = File.new("#{default['-c']}")
2003-04-24 09:41:17 +00:00
lines = file.readlines
else
lines = []
end
2002-04-30 14:08:14 +00:00
i = 0
group = ""
2002-05-05 22:18:11 +00:00
grouparr = []
config = {}
2002-04-30 14:08:14 +00:00
2005-01-28 20:06:45 +00:00
lines.collect!{|x|
2005-01-28 20:13:54 +00:00
x.gsub!(/\$\{HOME\}/, "#{ENV['HOME']}")
2005-01-28 20:06:45 +00:00
if x =~ /^\s*INCLUDEFILE=(.*?)\s*$/i
x = File.new($1).readlines
end
x
}
2005-01-29 18:42:30 +00:00
lines.flatten!
lines.collect!{|x|
x.sub!(/^\s*/, "")
x.sub!(/\#.*$/, "")
x.sub!(/\s*$/, "")
2003-07-03 14:30:40 +00:00
x.gsub!(/\$\{HOME\}/, "#{ENV['HOME']}")
x.chomp
}
2002-04-30 14:08:14 +00:00
while i < lines.length
line = lines[i]
while line.sub!(/\s*\\$/, "") != nil
line << lines[i+1]
2002-04-30 14:08:14 +00:00
i += 1
end
line.sub!(/\s*$/, "")
i += 1
if line =~ /^OPT_(.*?)=(.*)/
line = "-#{$1}=#{$2}"
end
2008-02-04 21:37:09 +00:00
puts "#{i}: #{line}" if Debuglevel > 1
2002-04-30 14:08:14 +00:00
if line =~ /(.*?)\s*\+=\s*(.*)/
if group == ""
2002-05-05 22:18:11 +00:00
if default.has_key?($1)
default[$1] << $2
2002-05-05 22:18:11 +00:00
else
default[$1] = $2
end
2002-04-30 14:08:14 +00:00
else
2002-05-05 22:18:11 +00:00
grouparr.collect{|g|
if config[g].has_key?($1)
config[g][$1] << $2
2002-04-30 14:08:14 +00:00
elsif default.has_key?($1)
config[g][$1] = default[$1] + $2
2002-04-30 14:08:14 +00:00
else
config[g][$1] = $2
2002-04-30 14:08:14 +00:00
end
2002-05-05 22:18:11 +00:00
}
2002-04-30 14:08:14 +00:00
end
elsif line =~ /(.*?)\s*=\s*(.*)/
if group == ""
default[$1] = $2
2002-04-27 20:31:59 +00:00
else
2002-05-05 22:18:11 +00:00
grouparr.collect{|g|
config[g][$1] = $2
2002-05-05 22:18:11 +00:00
}
2002-04-30 14:08:14 +00:00
end
elsif line =~ /(.*?)\s*\{/
group = $1
2002-05-05 22:18:11 +00:00
grouparr = group.split('|')
grouparr.collect{|g|
config[g] = {} unless config.has_key?(g)
2002-05-05 22:18:11 +00:00
}
elsif line =~ /^\}$/
2002-04-30 14:08:14 +00:00
default.each_key{|x|
2002-05-05 22:18:11 +00:00
grouparr.collect{|g|
config[g][x] = default[x].dup unless config[g].has_key?(x)
2002-05-05 22:18:11 +00:00
}
2002-04-30 14:08:14 +00:00
}
group = ""
2002-05-05 22:18:11 +00:00
grouparr = []
2002-04-30 14:08:14 +00:00
elsif line =~ /^$/
next
else
2008-02-04 21:37:09 +00:00
puts "Error parsing config on line: #{i}"
return false
2002-04-27 20:31:59 +00:00
end
end
2002-04-30 14:08:14 +00:00
if group != ""
2008-02-04 21:37:09 +00:00
puts "Error parsing config: group not terminated on line #{i}"
return false
2002-04-27 20:31:59 +00:00
end
2002-04-30 14:08:14 +00:00
if Debuglevel > 2
config.each_key{|x|
2008-02-04 21:37:09 +00:00
puts "Group: #{x}"
config[x].each_key{|y|
2008-02-04 21:37:09 +00:00
puts "Key: '#{y}' => Value: '#{config[x][y]}'"
2002-04-30 14:08:14 +00:00
}
}
2002-04-27 20:31:59 +00:00
end
return config
2002-04-27 20:31:59 +00:00
end
2002-05-05 22:18:11 +00:00
def check_config
2003-04-24 09:41:17 +00:00
if @config.length == 0
2008-02-04 21:37:09 +00:00
puts "No configuration, nothing to do"
2003-04-24 09:41:17 +00:00
exit
end
2002-05-05 22:18:11 +00:00
@config.each_key {|i|
unless @config[i].has_key?("-I")
2008-02-04 21:37:09 +00:00
puts "No inclusions given for group #{i}. Won't match anything."
2002-04-30 14:08:14 +00:00
end
2002-05-18 23:12:59 +00:00
@config[i]["DATADIR"] ="." unless @config[i].has_key?("DATADIR")
@config[i]["PERMISSION"] = "0755" unless @config[i].has_key?("PERMISSION")
if @config[i].has_key?("EXTENSIONS")
@config[i]["-S"] = @config[i]["EXTENSIONS"]
@config[i]["-M"] = @config[i]["EXTENSIONS"]
end
if @config[i].has_key?("DELEXT")
@config[i]["-SD"] = @config[i]["DELEXT"]
@config[i]["-MD"] = @config[i]["DELEXT"]
end
2002-05-18 23:12:59 +00:00
@config[i]["-M"] = "(?!.*)" if @config[i].has_key?("-S") and ! @config[i].has_key?("-M")
@config[i]["-S"] = "(?!.*)" if @config[i].has_key?("-M") and ! @config[i].has_key?("-S")
2002-04-30 14:08:14 +00:00
}
2002-04-27 20:31:59 +00:00
end
2003-05-24 18:33:46 +00:00
def lock
group = @config.keys[0]
if @config[group].has_key?("LOCKFILE")
if FileTest.exists?(@config[group]["LOCKFILE"])
2003-05-27 19:12:46 +00:00
lock = File.open(@config[group]["LOCKFILE"], "r")
pid = lock.gets
lock.close
if pid
pid.chomp!
begin
2005-02-01 09:38:22 +00:00
Process.kill(0, pid.to_i)
2005-03-01 09:18:50 +00:00
if ! @config[group].has_key?('-s')
2008-02-04 21:37:09 +00:00
puts "Already running, exiting..."
2005-03-01 09:18:50 +00:00
end
2003-08-14 15:33:04 +00:00
exit
rescue Errno::ESRCH
2008-02-04 21:37:09 +00:00
puts "Stale lock found... removing..."
FileUtils.rm(@config[group]["LOCKFILE"])
2003-08-14 15:33:04 +00:00
end
2003-05-27 19:12:46 +00:00
else
2008-02-04 21:37:09 +00:00
puts "Empty lockfile found... removing..."
2003-05-27 19:12:46 +00:00
File.unlink(@config[group]["LOCKFILE"])
end
end
2003-05-27 19:12:46 +00:00
lock = File.new(@config[group]["LOCKFILE"], "w")
lock.print "#{Process.pid}\n"
lock.close
end
end
2003-05-24 18:33:46 +00:00
def unlock
group = @config.keys[0]
File.unlink(@config[group]["LOCKFILE"])
end
def cleantmp
puts "Removing old tmp files"
@config.each_key{ |group |
Dir.glob("#{@config[group]["TEMPDIR"]}/riptmp*"){ |file|
begin
File.delete(file)
rescue
puts "Couldn't remove #{file}"
next
end
}
}
end
2003-05-26 19:35:04 +00:00
def renice
group = @config.keys[0]
if @config[group].has_key?("NICE")
Process.setpriority(Process::PRIO_PROCESS, 0, @config[group]["NICE"].to_i)
end
end
def get_single(subj, group)
2008-02-04 21:37:09 +00:00
puts "Fetching singlepart article: #{subj}"
2002-05-06 11:46:13 +00:00
body = @articles.get_group_body(subj)
2003-04-20 16:34:40 +00:00
if UUEncode.is_uuencoded(body)
filename = UUEncode.get_filename(body)
2008-02-04 21:37:09 +00:00
puts " filename #{filename}"
2005-01-30 14:24:47 +00:00
unless check_ext(group, filename, "s", subj)
2008-02-04 21:37:09 +00:00
puts " Skipping article..."
2005-01-30 14:24:47 +00:00
return false
end
2008-02-04 21:37:09 +00:00
puts " UUDecoding..."
2003-04-20 16:34:40 +00:00
mode, filename, body = UUEncode.uudecode(body)
2003-04-20 18:39:13 +00:00
elsif YEnc.is_yencoded(body)
filename = YEnc.get_filename(body)
2005-01-30 14:24:47 +00:00
unless check_ext(group, filename, "s", subj)
2008-02-04 21:37:09 +00:00
puts " Skipping article..."
2005-01-30 14:24:47 +00:00
return false
end
2008-02-04 21:37:09 +00:00
puts " YDecoding..."
2003-04-20 18:39:13 +00:00
mode, filename, body = YEnc.ydecode(body)
2003-04-18 22:27:58 +00:00
else
2008-02-04 21:37:09 +00:00
puts " Unknown encoding (not UU, not yEnc), skipping..."
2003-04-18 22:27:58 +00:00
return false
2002-05-05 22:18:11 +00:00
end
2003-06-20 09:56:15 +00:00
if mode == false
2008-02-04 21:37:09 +00:00
puts " Decoding failed skipping article..."
2003-06-20 09:56:15 +00:00
return false
end
2003-06-16 22:07:35 +00:00
output_data(subj, mode, filename, body)
return true
2002-05-05 22:18:11 +00:00
end
2002-05-06 11:46:13 +00:00
def get_multi(subj, group)
2008-02-04 21:37:09 +00:00
puts "Fetching multipart article: #{subj}"
2005-02-02 09:11:28 +00:00
body = @articles.get_group_body_first(subj)
if UUEncode.is_uuencoded(body) or YEnc.is_yencoded(body)
2003-04-20 16:34:40 +00:00
if UUEncode.is_uuencoded(body)
filename = UUEncode.get_filename(body)
2008-02-04 21:37:09 +00:00
puts " filename #{filename}"
2005-01-30 14:24:47 +00:00
unless check_ext(group, filename, "m", subj)
2008-02-04 21:37:09 +00:00
puts " Skipping article..."
2005-01-30 14:24:47 +00:00
return false
end
2003-04-20 18:39:13 +00:00
elsif YEnc.is_yencoded(body)
2008-02-04 21:37:09 +00:00
puts "yenc"
2003-04-20 18:39:13 +00:00
filename = YEnc.get_filename(body)
2008-02-04 21:37:09 +00:00
puts "filename #{filename}"
2005-01-30 14:24:47 +00:00
unless check_ext(group, filename, "m", subj)
2008-02-04 21:37:09 +00:00
puts " Skipping article..."
2005-01-30 14:24:47 +00:00
return false
end
2005-02-02 09:11:28 +00:00
end
if @config[group]["TEMPDIR"] == nil or @config[group]["TEMPDIR"] == ""
bodyrest = @articles.get_group_body_rest(subj)
unless bodyrest
2008-02-04 21:37:09 +00:00
puts " Skipping article..."
2005-02-01 22:09:22 +00:00
return false
2005-01-28 20:06:45 +00:00
end
2005-02-02 09:11:28 +00:00
body.concat(bodyrest)
2003-04-18 22:27:58 +00:00
else
file = Tempfile.new("riptmp", @config[group]["TEMPDIR"], :encoding => 'ascii-8bit')
body.collect{|x|
file.write x
file.write "\n"
}
2003-06-16 22:13:56 +00:00
2005-01-30 14:24:47 +00:00
unless @articles.get_group_body_rest(subj, file)
2008-02-04 21:37:09 +00:00
puts " Skipping article..."
2003-06-20 09:56:15 +00:00
return false
end
2005-01-30 14:24:47 +00:00
fileout = Tempfile.new("riptmp", @config[group]["TEMPDIR"])
2005-02-02 09:11:28 +00:00
end
2005-02-02 09:11:28 +00:00
@decode_threads << Thread.new(body, file, fileout, subj) do |tbody, tfile, tfileout, tsubj|
2005-05-10 20:52:58 +00:00
# puts "inside thread pre pass\n"
2005-03-01 19:56:56 +00:00
Thread.pass
2005-05-10 20:52:58 +00:00
# puts "inside thread post pass\n"
2005-02-02 09:11:28 +00:00
if UUEncode.is_uuencoded(tbody)
2008-02-04 21:37:09 +00:00
puts " UUDecoding..."
2005-02-02 09:11:28 +00:00
if tfile
tmode, tfilename, tbody = UUEncode.uudecode(tfile, tfileout)
2005-02-02 09:11:28 +00:00
else
tmode, tfilename, tbody = UUEncode.uudecode(tbody)
2005-01-30 14:24:47 +00:00
end
2005-02-02 09:11:28 +00:00
elsif YEnc.is_yencoded(tbody)
2008-02-04 21:37:09 +00:00
puts " YDecoding..."
2005-02-02 09:11:28 +00:00
begin
if tfile
tmode, tfilename, tbody = YEnc.ydecode(tfile, tfileout)
2005-02-02 09:11:28 +00:00
else
tmode, tfilename, tbody = YEnc.ydecode(tbody)
2005-02-02 09:11:28 +00:00
end
rescue YencError
# XXX if there is a yenc problem I want the data so I can research it
output_data(tsubj, 0600, "YencProblem", tbody)
2005-02-02 09:11:28 +00:00
# XXX return succes even though it's not true
Thread.current.exit
2005-02-02 09:11:28 +00:00
rescue PermError
2008-02-04 21:37:09 +00:00
puts "#{$!}"
puts " Skipping article..."
Thread.current.exit
2005-01-30 14:24:47 +00:00
end
2005-02-02 09:11:28 +00:00
end
if tmode == false
2008-02-04 21:37:09 +00:00
puts " Decoding failed skipping article..."
Thread.current.exit
2005-02-02 09:11:28 +00:00
end
if tfile
2016-01-21 21:12:10 +00:00
@decode_file_lock.synchronize {
# horrible cheat to not lose the outputted file
tbody = tfileout.path
tbodybase = tbody.sub(/\/[^\/]*$/, "/ripnewsdecode")
i = 1
while FileTest.exists?("#{tbodybase}-#{i}")
i += 1
end
FileUtils.mv(tbody, "#{tbodybase}-#{i}")
tbody = "#{tbodybase}-#{i}"
tfile.close
tfileout.close(false)
}
end
output_data(tsubj, tmode, tfilename, tbody)
2005-02-02 09:11:28 +00:00
end # thread end
2005-05-10 20:52:58 +00:00
# puts "ouside thread\n"
2005-01-30 14:24:47 +00:00
2005-02-02 09:11:28 +00:00
return true
else
2008-02-04 21:37:09 +00:00
puts " Unknown encoding (not UU, not yEnc), skipping..."
2005-02-02 09:11:28 +00:00
return false
2002-05-05 22:18:11 +00:00
end
end
def fill_preselector(group)
2005-05-10 20:52:58 +00:00
if @config[group].has_key?("-I") and !(@config[group].has_key?("-MRR") and @config[group]["-MRR"])
@articles.set_preselect_pattern(Regexp.new(@config[group]["-I"]))
end
end
2002-05-06 11:46:13 +00:00
def output_data(subject, mode, filename="", body="")
2002-07-03 22:18:40 +00:00
group = @articles.get_groupname
2008-02-04 21:37:09 +00:00
puts " mode: #{mode}" if Debuglevel > 0
puts " Filename: '#{filename}'" if Debuglevel > 0
2003-04-18 21:39:29 +00:00
# de-crap subject...
2003-04-18 22:27:58 +00:00
sub = subject.sub(/\s*$/, "") # strip trailing spaces
2003-06-20 22:20:35 +00:00
sub.sub!(/^[\s\.!-#]*/, "") # strip leading spaces, dots, exclamation points, dashes and hashes
2003-04-18 21:39:29 +00:00
# decide on a filename
2002-07-03 22:18:40 +00:00
if @config[group].has_key?("-L") and @config[group]["-L"]
2008-02-04 21:37:09 +00:00
puts "longname" if Debuglevel > 1
2003-04-18 22:27:58 +00:00
outfile = sub[0...@maxfilelength]
2002-07-03 22:18:40 +00:00
elsif @config[group].has_key?("-C") and @config[group]["-C"]
2008-02-04 21:37:09 +00:00
puts "combinedname" if Debuglevel > 1
2003-04-18 22:27:58 +00:00
outfile = sub[0...@maxfilelength-filename.length-3]
outfile = "#{outfile} [#{filename}]"
2003-04-18 21:39:29 +00:00
if outfile.length > @maxfilelength
2003-04-18 22:27:58 +00:00
outfile = filename[0...@maxfilelength]
end
2006-09-12 20:54:54 +00:00
elsif @config[group].has_key?("-CP") and @config[group]["-CP"]
2008-02-04 21:37:09 +00:00
puts "combinedname" if Debuglevel > 1
2006-09-12 20:54:54 +00:00
poster = @articles.get_group_poster(subject)
outfile = sub[0...@maxfilelength-poster.length-filename.length-6]
outfile = "#{outfile} [#{poster}] [#{filename}]"
if outfile.length > @maxfilelength
outfile = filename[0...@maxfilelength]
end
2002-05-05 22:18:11 +00:00
else
2008-02-04 21:37:09 +00:00
puts "shortname" if Debuglevel > 1
2003-04-18 22:27:58 +00:00
outfile = filename[0...@maxfilelength]
2002-07-03 22:18:40 +00:00
end
2003-04-18 21:39:29 +00:00
# do the actual saving
2002-07-03 22:18:40 +00:00
if save_file("#{@config[group]["DATADIR"]}/#{group}", outfile, body)
2005-01-30 14:47:00 +00:00
@newsrc_lock.synchronize {
@articles.group_update_newsrc(subject)
2005-11-25 10:32:30 +00:00
_save_newsrc(group)
2005-01-30 14:47:00 +00:00
}
2002-05-05 22:18:11 +00:00
end
end
def check_ext(group, filename, mode, subject)
case mode
when "s"
if @config[group].has_key?("-SD") && ( filename =~ /\.(#{@config[group]["-SD"]})$/ )
2008-02-04 21:37:09 +00:00
puts "Marking '#{subject}' as read"
@articles.group_update_newsrc(subject)
return false
end
2003-04-18 21:06:51 +00:00
return @config[group].has_key?("-S") ? ( filename =~ /\.(#{@config[group]["-S"]})$/ ) : true
when "m"
if @config[group].has_key?("-MD") && ( filename =~ /\.(#{@config[group]["-MD"]})$/ )
2008-02-04 21:37:09 +00:00
puts "Marking '#{subject}' as read"
@articles.group_update_newsrc(subject)
return false
end
2003-04-18 21:06:51 +00:00
return @config[group].has_key?("-M") ? ( filename =~ /\.(#{@config[group]["-M"]})$/ ) : true
else
2008-02-04 21:37:09 +00:00
puts "Illegal mode \"#{mode}\" in check_ext"
exit
end
end
def get_max_file_length(tempdir=".")
2003-04-24 12:14:36 +00:00
if ! FileTest.directory?("#{tempdir}") || ! FileTest.writable?("#{tempdir}")
2008-02-04 21:37:09 +00:00
puts "Tempdir '#{tempdir}' is not a writable directory"
2003-04-24 12:14:36 +00:00
exit
end
# this is quite stupid, there is no guarantee at all the generated file names
# don't already exist
2003-04-18 22:27:58 +00:00
name = "a"*500
name = "#$$#{name}"
2003-04-18 22:27:58 +00:00
begin
2004-06-16 08:17:48 +00:00
file = File.new("#{tempdir}/#{name}", "w", 0644).close
FileUtils.rm("#{tempdir}/#{name}")
2003-04-18 22:27:58 +00:00
rescue Errno::ENAMETOOLONG
name = name[0...-1]
retry
rescue Errno::ENOENT
2008-02-04 21:37:09 +00:00
puts "#{$!}"
puts "strange..."
retry
2003-04-18 22:27:58 +00:00
end
# this is how many characters are still likely to be appended
# is the filename already exists '-<#{date}.#{count}>' in save_file
# this could be brought back to 5 '-<#{count}>' ...
return name.length - 14
end
2003-06-15 22:01:43 +00:00
def ward_sort(a, b)
c = a.to_s.split(/([0-9]+)/)
d = b.to_s.split(/([0-9]+)/)
c.collect{|x|
y = d.shift
r = ((x.to_s =~ /^[0-9]+$/) && (y.to_s =~ /^[0-9]+$/)) ?
(x.to_i <=> y.to_i) :
(x.to_s <=> y.to_s)
if r != 0
return r
end
}
return -1 if (d != [])
return 0
end
def startup
$stdout.sync=true # line buffered output
@defaults = {'-c' => "#{ENV['HOME']}/.ripnewsrc"}
@defaults = parse_options(@defaults)
@config = parse_config(@defaults)
exit if @config == false
check_config
lock
renice
trap("HUP") {
2008-02-04 21:37:09 +00:00
puts "Rereading config..."
config = parse_config(@defaults)
if config != false
@config = config
check_config
2008-02-04 21:37:09 +00:00
puts "Done reading config"
else
2008-02-04 21:37:09 +00:00
puts "Keeping old config due to errors"
end
}
2003-06-15 22:01:43 +00:00
cleantmp
@maxfilelength = get_max_file_length(@config[@config.keys[0]]["TEMPDIR"])
2011-05-20 14:03:33 +00:00
# puts "\n$Id$"
2008-02-04 21:37:09 +00:00
puts "Starting: #{@tstart}"
2002-07-04 11:24:48 +00:00
if Debuglevel > 2
@config.each_key{|i|
2008-02-04 21:37:09 +00:00
puts "Group: #{i}"
@config[i].each_key{|j|
2008-02-04 21:37:09 +00:00
puts "Opt: #{j} val: #{@config[i][j]}"
}
2002-04-30 14:08:14 +00:00
}
end
if @defaults["-l"]
puts "The following groups have been configured:\n\n"
@config.keys.sort.each{|group|
puts group
}
exit
end
2002-04-30 14:08:14 +00:00
end
2002-04-27 20:31:59 +00:00
def main
profile_mem("out side of loop still")
@config.keys.sort.each{|group|
if ! @defaults["-g"].nil? && group != @defaults["-g"]
next
end
2005-01-30 14:24:47 +00:00
@decode_threads = []
2005-01-30 14:47:00 +00:00
@newsrc_lock = Mutex.new
2016-01-21 21:12:10 +00:00
@decode_file_lock = Mutex.new
profile_mem("#{group} start")
2008-02-04 21:37:09 +00:00
puts "\nGetting articles for #{group}"
@articles = Article.new(@config[group]["NNTPSERVER"], group, @config[group]["NEWSRCNAME"], @config[group]["MAXAGE"])
fill_preselector(group)
2008-02-04 21:37:09 +00:00
puts "initialized"
@articles.get_articles(@config[group]["CACHEDIR"])
2002-04-30 14:08:14 +00:00
profile_mem("#{group} articles read")
2005-11-25 10:32:30 +00:00
_create_group_dir(group)
2008-02-12 15:19:13 +00:00
subjcount = 0
2005-11-25 10:32:30 +00:00
for subj in @articles.get_group_subjects.sort{|a, b| ward_sort(a, b)}
2008-02-04 21:37:09 +00:00
puts "#{subj}" if Debuglevel > 2
2006-09-12 20:54:54 +00:00
poster = @articles.get_group_poster(subj)
2008-07-28 19:51:15 +00:00
postdate = @articles.get_group_date(subj)
2005-05-10 20:52:58 +00:00
# explicitly mark as read
2005-11-25 10:32:30 +00:00
if @config[group].has_key?("-MR") and subj =~ /#{@config[group]["-MR"]}/
2008-02-04 21:37:09 +00:00
puts "Marking '#{subj}' as read"
2008-02-12 15:19:13 +00:00
subjcount += 1
2005-11-25 10:32:30 +00:00
_mark_read(subj)
2006-09-12 20:54:54 +00:00
elsif @config[group].has_key?("-MRF") and poster =~ /#{@config[group]["-MRF"]}/
2008-02-04 21:37:09 +00:00
puts "Marking poster '#{poster}' as read (subj: '#{subj}')"
2008-02-12 15:19:13 +00:00
subjcount += 1
2006-09-12 20:54:54 +00:00
_mark_read(subj)
2008-07-28 19:51:15 +00:00
# mark read old (Time.now - MRO days ).strf(YYYYMMDD).to_i ofzo
elsif @config[group].has_key?("-MRO") and postdate < (Time.now-(@config[group]["-MRO"].to_i*24*60*60)).strftime('%Y%m%d').to_i
puts "Marking date '#{postdate}' as read (subj: '#{subj}')"
subjcount += 1
_mark_read(subj)
2008-02-12 15:19:13 +00:00
# get the juicy bits
2005-11-25 10:32:30 +00:00
elsif !(@config[group].has_key?("-X") and subj =~ /#{@config[group]["-X"]}/) and
subj =~ /#{@config[group]["-I"]}/
2008-02-04 21:37:09 +00:00
puts "Match: #{subj}" if Debuglevel > 0
2005-11-25 10:32:30 +00:00
_get_article(subj, group)
2006-09-12 20:54:54 +00:00
elsif !(@config[group].has_key?("-XF") and poster =~ /#{@config[group]["-XF"]}/) and
@config[group].has_key?("-IF") and poster =~ /#{@config[group]["-IF"]}/
2008-02-04 21:37:09 +00:00
puts "Poster match: #{poster}" if Debuglevel > 0
2006-09-12 20:54:54 +00:00
_get_article(subj, group)
2005-11-25 10:32:30 +00:00
else
2008-02-12 15:19:13 +00:00
subjcount += 1
2005-11-25 10:32:30 +00:00
_mark_remaining(subj, group)
2002-04-27 20:31:59 +00:00
end
2008-02-12 15:19:13 +00:00
# prevent haveing to start all over in case of a crash
# the number of subjects on which to sync should be balanced
# between the time it takes to sync and the time it takes to read them
# this is just an initial guess
# the counter is only used for the mark read stuff, as the get article
# stuff saves the newsrc anyway
if subjcount >= 150
_save_newsrc(group)
subjcount = 0
end
2002-04-27 20:31:59 +00:00
end
2005-01-30 14:24:47 +00:00
2005-11-25 10:32:30 +00:00
_wait_for_threads
_save_newsrc(group)
2005-01-30 14:24:47 +00:00
@articles.quit
2004-09-01 11:24:15 +00:00
@articles = nil
profile_mem("#{group} pre-GC")
2004-09-01 11:24:15 +00:00
GC.start
profile_mem("#{group} end")
}
2002-04-27 20:31:59 +00:00
end
def ending
tend = Time.now
2008-02-04 21:37:09 +00:00
puts "\nFinished: #{tend}"
runtime = (tend - @tstart).to_i
h=runtime/3600
m=runtime%3600
s=m%60
m=m/60
printf("Running time: %02d:%02d:%02d\n", h, m, s)
unlock
end
2005-11-25 10:32:30 +00:00
def _create_group_dir(group)
unless FileTest.directory?("#{@config[group]["DATADIR"]}/#{group}") or
Dir.mkdir("#{@config[group]["DATADIR"]}/#{group}", @config[group]["PERMISSION"].oct)
2008-02-04 21:37:09 +00:00
puts "eeeps, couldn't create dir"
2005-11-25 10:32:30 +00:00
exit
end
end
def _primary_thres_skip(subj, group)
if @config[group].has_key?("PRIMARYTHRES")
if ( @articles.group_percentage_primary(subj) < @config[group]["PRIMARYTHRES"].to_i )
2008-02-04 21:37:09 +00:00
puts "Only #{@articles.group_percentage_primary(subj)}% on primary, skipping: #{subj}"
2005-11-25 10:32:30 +00:00
return true
end
end
return false
end
def _fallback_thres_skip(subj, group)
if @config[group].has_key?("FALLBACKTHRES")
if ( @articles.group_percentage_fallback(subj) > @config[group]["FALLBACKTHRES"].to_i )
2008-02-04 21:37:09 +00:00
puts "#{@articles.group_percentage_fallback(subj)}% only on fallback, skipping: #{subj}"
2005-11-25 10:32:30 +00:00
return true
end
end
return false
end
def _mark_read(subj)
@articles.group_update_newsrc(subj)
end
def _get_article(subj, group)
if @articles.group_is_complete(subj)
skip = false
skip = _primary_thres_skip(subj, group) ? true : skip
skip = _fallback_thres_skip(subj, group) ? true : skip
if ! skip
begin
if @articles.group_is_singlepart(subj)
get_single(subj, group)
elsif @articles.group_is_multipart(subj)
get_multi(subj, group)
end
rescue TempError, PermError, YencError
2008-02-04 21:37:09 +00:00
puts "#{$!}"
puts " Skipping article..."
#puts "Caught #{$!.class}"
#puts "Error: #{$!}"
2005-11-25 10:32:30 +00:00
end
end
else
2008-02-04 21:37:09 +00:00
puts "Not complete: #{subj}"
2005-11-25 10:32:30 +00:00
end
end
def _mark_remaining(subj, group)
# if Mark Remaining Read is set do so
if @config[group].has_key?("-MRR") and @config[group]["-MRR"] and
!(@config[group].has_key?("-X") and subj =~ /#{@config[group]["-X"]}/) and
!(subj =~ /#{@config[group]["-I"]}/)
if subj =~ /#{@config[group]["-I"]}/
puts "fucking up here"
end
2008-02-04 21:37:09 +00:00
puts "Marking remaining '#{subj}' as read"
2005-11-25 10:32:30 +00:00
@articles.group_update_newsrc(subj)
end
end
def _wait_for_threads
# wait for threads if there are any
if ! @decode_threads.empty?
@articles.disconnect
puts "Waiting for decode threads..."
ThreadsWait.all_waits(@decode_threads){ |t|
puts "Thread #{t} has terminated"
}
puts "Decode threads all done"
end
end
def _save_newsrc(group)
@articles.save_newsrc unless @config[group].has_key?("-T") and @config[group]["-T"]
end
startup
main
ending