ripnews/trunk/ripnews/ripnews.rb

716 lines
18 KiB
Ruby
Executable file

#!/usr/local/bin/ruby -w
# $Dwarf: ripnews.rb,v 1.102 2005/03/01 09:18:50 ward Exp $
# $Source$
#
# Copyright (c) 2002, 2003, 2004, 2005 Ward Wouts <ward@wouts.nl>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
require 'date'
require 'ftools'
require 'time'
require 'getoptlong'
require 'news/article'
require 'news/newsrc'
require 'tempfile'
require 'thread'
require 'thwait'
require 'encode/uuencode'
require 'encode/yenc'
###########################################################################
###########################################################################
# memory profiling stuff
MEntry = Struct.new( "MEntry", :c, :mem )
class MEntry; def to_s() "#{c} : #{mem}"; end; end
GroupEntry = Struct.new( "GroupEntry", :c, :mem, :total )
class GroupEntry; def to_s() "#{mem}\t\t#{c} x#{total}"; end; end
def profile_mem(group)
end
def aprofile_mem(group)
t = Thread.new {
groups = {}
ObjectSpace.each_object { |x|
if not [Array,Hash].include? x.class
e = nil
begin
e = MEntry.new( x.class, Marshal::dump(x).size )
rescue TypeError # undumpable
e = MEntry.new( x.class, 0 )
end
if groups.has_key? e.c
groups[e.c].mem += e.mem
groups[e.c].total += 1
else
groups[e.c] = GroupEntry.new( e.c, e.mem, 1 )
end
end
}
File.open( "mem_log", "a+" ) { |file|
file << "Group #{group}\n"
total = 0
file << "bytes/class/count\n"
groups.to_a.sort_by { |e| e[1].mem }.each { |e|
file << "#{e[1]}\n"; total += e[1].mem }
file << "TOTAL == #{total}\n\n"
}
}
sleep 10
t.join
end
###########################################################################
Debuglevel = 0
@tstart = Time.now
def save_file(dir, name, data)
print "savename: #{name}\n" if Debuglevel > 1
nname = name.gsub(/\//, "-")
nname.sub!(/\s*$/, "")
nname.sub!(/^[\s\.-]*/, "")
print "nname: #{nname}\n" if Debuglevel > 1
newname = nname
count = 1
d = Date.today
date = "#{d.year}#{d.month}#{d.mday}"
while FileTest.exists?("#{dir}/#{newname}")
newname = "#{nname}-<#{date}.#{count}>"
count += 1
end
print "name: #{newname}\n" if Debuglevel > 1
case data.class.to_s
when "String"
begin
if File.move(data, "#{dir}/#{newname}")
print " Saving as: '#{newname}'\n"
else
print "couldn't rename tempfile\n"
return false
end
rescue Errno::ENOENT
print "Caught Errno::ENOENT (save_file)\n"
print "Error: #{$!}\n"
print "What the *beep* happened?\n"
return false
end
when "Array"
if file = File.new("#{dir}/#{newname}", "w", 0644)
print " Saving as: '#{newname}'\n"
data.collect{|i| file.print "#{i}"}
else
print "couldn't open file for writeing\n"
return false
end
when "Tempfile"
begin
if File.move(data.path, "#{dir}/#{newname}")
print " Saving as: '#{newname}'\n"
else
print "couldn't rename tempfile\n"
return false
end
rescue Errno::ENOENT
print "Caught Errno::ENOENT (save_file)\n"
print "Error: #{$!}\n"
print "What the *beep* happened?\n"
return false
end
else
print "EEEEPS Can't save data of class: #{data.class.to_s}\n"
return false
end
return true
end
def parse_options(options)
begin
opts = GetoptLong.new(
[ "-I", "--include", GetoptLong::REQUIRED_ARGUMENT ],
[ "-c", "--configfile", GetoptLong::REQUIRED_ARGUMENT ],
[ "-L", "--longname", GetoptLong::NO_ARGUMENT ],
[ "-C", "--combinedname", GetoptLong::NO_ARGUMENT ],
[ "-M", "--multipart", GetoptLong::NO_ARGUMENT ],
[ "-s", GetoptLong::NO_ARGUMENT ],
[ "-S", "--singlepart", GetoptLong::NO_ARGUMENT ],
[ "-T", "--test", GetoptLong::NO_ARGUMENT ],
[ "-X", "--exclude", GetoptLong::REQUIRED_ARGUMENT ]
)
opts.quiet=true
opts.each do |opt, arg|
options[opt] = arg
end
rescue GetoptLong::InvalidOption
print "#{$!}\n"
usage
end
return options
end
def usage
print "\nUsage:\n\n"
print "ripnews.rb [-I <pattern>] [-c <file>] [-L] [-C] [-M] [-S] [-T] [-X <pattern>]\n\n"
print "-I <pattern> specify an include pattern\n"
print "-c <file> specify an alternate configfile\n"
print "-L use subject as filename\n"
print "-C use combined filenames\n"
print "-M get multipart articles\n"
print "-s exit silently if already running\n"
print "-S get singlepart articles\n"
print "-T test mode, don't update newsrc file\n"
print "-X <pattern> specify an exclude pattern\n"
exit
end
def parse_config(default = {})
if ! default.has_key?('-s')
print "Parsing config\n"
print "#{default['-c']}\n"
end
if FileTest.readable?("#{default['-c']}")
file = File.new("#{default['-c']}")
lines = file.readlines
else
lines = []
end
i = 0
group = ""
grouparr = []
config = {}
lines.collect!{|x|
x.gsub!(/\$\{HOME\}/, "#{ENV['HOME']}")
if x =~ /^\s*INCLUDEFILE=(.*?)\s*$/i
x = File.new($1).readlines
end
x
}
lines.flatten!
lines.collect!{|x|
x.sub!(/^\s*/, "")
x.sub!(/\#.*$/, "")
x.sub!(/\s*$/, "")
x.gsub!(/\$\{HOME\}/, "#{ENV['HOME']}")
x.chomp
}
while i < lines.length
line = lines[i]
while line.sub!(/\s*\\$/, "") != nil
line << lines[i+1]
i += 1
end
line.sub!(/\s*$/, "")
i += 1
if line =~ /^OPT_(.*?)=(.*)/
line = "-#{$1}=#{$2}"
end
print "#{i}: #{line}\n" if Debuglevel > 1
if line =~ /(.*?)\s*\+=\s*(.*)/
if group == ""
if default.has_key?($1)
default[$1] << $2
else
default[$1] = $2
end
else
grouparr.collect{|g|
if config[g].has_key?($1)
config[g][$1] << $2
elsif default.has_key?($1)
config[g][$1] = default[$1] + $2
else
config[g][$1] = $2
end
}
end
elsif line =~ /(.*?)\s*=\s*(.*)/
if group == ""
default[$1] = $2
else
grouparr.collect{|g|
config[g][$1] = $2
}
end
elsif line =~ /(.*?)\s*\{/
group = $1
grouparr = group.split('|')
grouparr.collect{|g|
config[g] = {} unless config.has_key?(g)
}
elsif line =~ /^\}$/
default.each_key{|x|
grouparr.collect{|g|
config[g][x] = default[x] unless config[g].has_key?(x)
}
}
group = ""
grouparr = []
elsif line =~ /^$/
next
else
print "Error parsing config on line: #{i}\n"
return false
end
end
if group != ""
print "Error parsing config: group not terminated on line #{i}\n"
return false
end
if Debuglevel > 2
config.each_key{|x|
print "Group: #{x}\n"
config[x].each_key{|y|
print "Key: '#{y}' => Value: '#{config[x][y]}'\n"
}
}
end
return config
end
def check_config
if @config.length == 0
print "No configuration, nothing to do\n"
exit
end
@config.each_key {|i|
unless @config[i].has_key?("-I")
print "No inclusions given for group #{i}. Won't match anything.\n"
end
@config[i]["DATADIR"] ="." unless @config[i].has_key?("DATADIR")
@config[i]["PERMISSION"] = "0755" unless @config[i].has_key?("PERMISSION")
if @config[i].has_key?("EXTENSIONS")
@config[i]["-S"] = @config[i]["EXTENSIONS"]
@config[i]["-M"] = @config[i]["EXTENSIONS"]
end
if @config[i].has_key?("DELEXT")
@config[i]["-SD"] = @config[i]["DELEXT"]
@config[i]["-MD"] = @config[i]["DELEXT"]
end
@config[i]["-M"] = "(?!.*)" if @config[i].has_key?("-S") and ! @config[i].has_key?("-M")
@config[i]["-S"] = "(?!.*)" if @config[i].has_key?("-M") and ! @config[i].has_key?("-S")
}
end
def lock
group = @config.keys[0]
if @config[group].has_key?("LOCKFILE")
if FileTest.exists?(@config[group]["LOCKFILE"])
lock = File.open(@config[group]["LOCKFILE"], "r")
pid = lock.gets
lock.close
if pid
pid.chomp!
begin
Process.kill(0, pid.to_i)
if ! @config[group].has_key?('-s')
print "Already running, exiting...\n"
end
exit
rescue Errno::ESRCH
print "Stale lock found... removing...\n"
File.unlink(@config[group]["LOCKFILE"])
end
else
print "Empty lockfile found... removing...\n"
File.unlink(@config[group]["LOCKFILE"])
end
end
lock = File.new(@config[group]["LOCKFILE"], "w")
lock.print "#{Process.pid}\n"
lock.close
end
end
def unlock
group = @config.keys[0]
File.unlink(@config[group]["LOCKFILE"])
end
def renice
group = @config.keys[0]
if @config[group].has_key?("NICE")
Process.setpriority(Process::PRIO_PROCESS, 0, @config[group]["NICE"].to_i)
end
end
def get_single(subj, group)
print "Fetching singlepart article: #{subj}\n"
body = @articles.get_group_body(subj)
if UUEncode.is_uuencoded(body)
filename = UUEncode.get_filename(body)
print " filename #{filename}\n"
unless check_ext(group, filename, "s", subj)
print " Skipping article...\n"
return false
end
print " UUDecoding...\n"
mode, filename, body = UUEncode.uudecode(body)
elsif YEnc.is_yencoded(body)
filename = YEnc.get_filename(body)
unless check_ext(group, filename, "s", subj)
print " Skipping article...\n"
return false
end
print " YDecoding...\n"
mode, filename, body = YEnc.ydecode(body)
else
print " Unknown encoding (not UU, not yEnc), skipping...\n"
return false
end
if mode == false
print " Decoding failed skipping article...\n"
return false
end
output_data(subj, mode, filename, body)
return true
end
def get_multi(subj, group)
print "Fetching multipart article: #{subj}\n"
body = @articles.get_group_body_first(subj)
if UUEncode.is_uuencoded(body) or YEnc.is_yencoded(body)
if UUEncode.is_uuencoded(body)
filename = UUEncode.get_filename(body)
print " filename #{filename}\n"
unless check_ext(group, filename, "m", subj)
print " Skipping article...\n"
return false
end
elsif YEnc.is_yencoded(body)
print "yencc\n"
filename = YEnc.get_filename(body)
print "filename #{filename}\n"
unless check_ext(group, filename, "m", subj)
print " Skipping article...\n"
return false
end
end
if @config[group]["TEMPDIR"] == nil or @config[group]["TEMPDIR"] == ""
bodyrest = @articles.get_group_body_rest(subj)
unless bodyrest
print " Skipping article...\n"
return false
end
body.concat(bodyrest)
else
file = Tempfile.new("riptmp", @config[group]["TEMPDIR"])
body.collect{|x| file.print "#{x}\n"}
unless @articles.get_group_body_rest(subj, file)
print " Skipping article...\n"
return false
end
fileout = Tempfile.new("riptmp", @config[group]["TEMPDIR"])
end
@decode_threads << Thread.new(body, file, fileout, subj) do |tbody, tfile, tfileout, tsubj|
puts "inside thread pre pass\n"
Thread.pass
puts "inside thread post pass\n"
if UUEncode.is_uuencoded(tbody)
print " UUDecoding...\n"
if tfile
tmode, tfilename, tbody = UUEncode.uudecode(tfile, tfileout)
else
tmode, tfilename, tbody = UUEncode.uudecode(tbody)
end
elsif YEnc.is_yencoded(tbody)
print " YDecoding...\n"
begin
if tfile
tmode, tfilename, tbody = YEnc.ydecode(tfile, tfileout)
else
tmode, tfilename, tbody = YEnc.ydecode(tbody)
end
rescue YencError
# XXX if there is a yenc problem I want the data so I can research it
output_data(tsubj, 0600, "YencProblem", tbody)
# XXX return succes even though it's not true
Thread.current.exit
rescue PermError
print "#{$!}\n"
print " Skipping article...\n"
Thread.current.exit
end
end
if tmode == false
print " Decoding failed skipping article...\n"
Thread.current.exit
end
if tfile
# horrible cheat to not lose the outputted file
tbody = tfileout.path
tbodybase = tbody.sub(/\/[^\/]*$/, "/ripnewsdecode")
i = 1
while FileTest.exists?("#{tbodybase}-#{i}")
i += 1
end
File.move(tbody, "#{tbodybase}-#{i}")
tbody = "#{tbodybase}-#{i}"
tfile.close
tfileout.close(false)
end
output_data(tsubj, tmode, tfilename, tbody)
end # thread end
puts "ouside thread\n"
return true
else
print " Unknown encoding (not UU, not yEnc), skipping...\n"
return false
end
end
def fill_preselector(group)
if @config[group].has_key?("-I")
@articles.set_preselect_pattern(Regexp.new(@config[group]["-I"]))
end
end
def output_data(subject, mode, filename="", body="")
group = @articles.get_groupname
print " mode: #{mode}\n" if Debuglevel > 0
print " Filename: '#{filename}'\n" if Debuglevel > 0
# de-crap subject...
sub = subject.sub(/\s*$/, "") # strip trailing spaces
sub.sub!(/^[\s\.!-#]*/, "") # strip leading spaces, dots, exclamation points, dashes and hashes
# decide on a filename
if @config[group].has_key?("-L") and @config[group]["-L"]
print "longname\n" if Debuglevel > 1
outfile = sub[0...@maxfilelength]
elsif @config[group].has_key?("-C") and @config[group]["-C"]
print "combinedname\n" if Debuglevel > 1
outfile = sub[0...@maxfilelength-filename.length-3]
outfile = "#{outfile} [#{filename}]"
if outfile.length > @maxfilelength
outfile = filename[0...@maxfilelength]
end
else
print "shortname\n" if Debuglevel > 1
outfile = filename[0...@maxfilelength]
end
# do the actual saving
if save_file("#{@config[group]["DATADIR"]}/#{group}", outfile, body)
@newsrc_lock.synchronize {
@articles.group_update_newsrc(subject)
@articles.save_newsrc unless @config[group].has_key?("-T") and @config[group]["-T"]
}
end
end
def check_ext(group, filename, mode, subject)
case mode
when "s"
if @config[group].has_key?("-SD") && ( filename =~ /\.(#{@config[group]["-SD"]})$/ )
print "Marking '#{subject}' as read\n"
@articles.group_update_newsrc(subject)
return false
end
return @config[group].has_key?("-S") ? ( filename =~ /\.(#{@config[group]["-S"]})$/ ) : true
when "m"
if @config[group].has_key?("-MD") && ( filename =~ /\.(#{@config[group]["-MD"]})$/ )
print "Marking '#{subject}' as read\n"
@articles.group_update_newsrc(subject)
return false
end
return @config[group].has_key?("-M") ? ( filename =~ /\.(#{@config[group]["-M"]})$/ ) : true
else
print "Illegal mode \"#{mode}\" in check_ext\n"
exit
end
end
def get_max_file_length(tempdir=".")
if ! FileTest.directory?("#{tempdir}") || ! FileTest.writable?("#{tempdir}")
print "Tempdir '#{tempdir}' is not a writable directory\n"
exit
end
# this is quite stupid, there is no guarantee at all the generated file names
# don't already exist
name = "a"*500
name = "#$$#{name}"
begin
file = File.new("#{tempdir}/#{name}", "w", 0644).close
File.delete("#{tempdir}/#{name}")
rescue Errno::ENAMETOOLONG
name = name[0...-1]
retry
rescue Errno::ENOENT
print "#{$!}\n"
print "strange...\n"
retry
end
# this is how many characters are still likely to be appended
# is the filename already exists '-<#{date}.#{count}>' in save_file
# this could be brought back to 5 '-<#{count}>' ...
return name.length - 14
end
def ward_sort(a, b)
c = a.to_s.split(/([0-9]+)/)
d = b.to_s.split(/([0-9]+)/)
c.collect{|x|
y = d.shift
r = ((x.to_s =~ /^[0-9]+$/) && (y.to_s =~ /^[0-9]+$/)) ?
(x.to_i <=> y.to_i) :
(x.to_s <=> y.to_s)
if r != 0
return r
end
}
return -1 if (d != [])
return 0
end
def startup
$stdout.sync=true # line buffered output
@defaults = {'-c' => "#{ENV['HOME']}/.ripnewsrc"}
@defaults = parse_options(@defaults)
@config = parse_config(@defaults)
exit if @config == false
check_config
lock
renice
trap("HUP") {
print "Rereading config...\n"
config = parse_config(@defaults)
if config != false
@config = config
check_config
print "Done reading config\n"
else
print "Keeping old config due to errors\n"
end
}
@maxfilelength = get_max_file_length(@config[@config.keys[0]]["TEMPDIR"])
print "\n$Id$\n"
print "Starting: #{@tstart}\n"
if Debuglevel > 2
@config.each_key{|i|
print "Group: #{i}\n"
@config[i].each_key{|j|
print "Opt: #{j} val: #{@config[i][j]}\n"
}
}
end
end
def main
profile_mem("out side of loop still")
for group in @config.keys.sort
@decode_threads = []
@newsrc_lock = Mutex.new
profile_mem("#{group} start")
# puts "object count:"
# puts ObjectSpace.each_object(){}
print "\nGetting articles for #{group}\n"
@articles = Article.new(@config[group]["NNTPSERVER"], group, @config[group]["NEWSRCNAME"])
fill_preselector(group)
print "initialized\n"
@articles.get_articles(@config[group]["CACHEDIR"])
profile_mem("#{group} articles read")
unless FileTest.directory?("#{@config[group]["DATADIR"]}/#{group}") or
Dir.mkdir("#{@config[group]["DATADIR"]}/#{group}", @config[group]["PERMISSION"].oct)
print "eeeps, couldn't create dir\n"
exit
end
for i in @articles.get_group_subjects.sort{|a, b| ward_sort(a, b)}
print "#{i}\n" if Debuglevel > 2
if @config[group].has_key?("-MR") and i =~ /#{@config[group]["-MR"]}/
print "Marking '#{i}' as read\n"
@articles.group_update_newsrc(i)
next
end
if !(@config[group].has_key?("-X") and i =~ /#{@config[group]["-X"]}/) and
i =~ /#{@config[group]["-I"]}/
print "Match: #{i}\n" if Debuglevel > 0
if @articles.group_is_complete(i)
begin
if @articles.group_is_singlepart(i)
get_single(i, group)
elsif @articles.group_is_multipart(i)
get_multi(i, group)
end
#rescue Article::TempError, Article::PermError
rescue TempError, PermError
print "#{$!}\n"
print " Skipping article...\n"
#print "Caught #{$!.class}\n"
#print "Error: #{$!}\n"
next
end
else
print "Not complete: #{i}\n"
end
end
end
# hier wachten op evt. threads...
if ! @decode_threads.empty?
@articles.disconnect
puts "Waiting for decode threads..."
ThreadsWait.all_waits(@decode_threads){ |t|
p "Thread #{t} has terminated"
}
puts "Decode threads all done"
end
@articles.quit
@articles = nil
profile_mem("#{group} pre-GC")
GC.start
profile_mem("#{group} end")
end
end
def ending
tend = Time.now
print "\nFinished: #{tend}\n"
runtime = (tend - @tstart).to_i
h=runtime/3600
m=runtime%3600
s=m%60
m=m/60
printf("Running time: %02d:%02d:%02d\n", h, m, s)
unlock
end
startup
main
ending