598 lines
16 KiB
Ruby
Executable file
598 lines
16 KiB
Ruby
Executable file
#!/usr/local/bin/ruby -w
|
|
|
|
# $Dwarf: ripnews.rb,v 1.82 2004/06/16 08:17:48 ward Exp $
|
|
# $Source$
|
|
|
|
#
|
|
# Copyright (c) 2002, 2003, 2004 Ward Wouts <ward@wouts.nl>
|
|
#
|
|
# Permission to use, copy, modify, and distribute this software for any
|
|
# purpose with or without fee is hereby granted, provided that the above
|
|
# copyright notice and this permission notice appear in all copies.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
#
|
|
|
|
require 'date'
|
|
require 'ftools'
|
|
require 'time'
|
|
require 'getoptlong'
|
|
require 'news/article'
|
|
require 'news/newsrc'
|
|
require 'tempfile'
|
|
require 'encode/uuencode'
|
|
require 'encode/yenc'
|
|
|
|
|
|
###########################################################################
|
|
|
|
Debuglevel = 0
|
|
@tstart = Time.now
|
|
|
|
def save_file(dir, name, data)
|
|
print "savename: #{name}\n" if Debuglevel > 1
|
|
nname = name.gsub(/\//, "-")
|
|
nname.sub!(/\s*$/, "")
|
|
nname.sub!(/^[\s\.-]*/, "")
|
|
print "nname: #{nname}\n" if Debuglevel > 1
|
|
newname = nname
|
|
count = 1
|
|
d = Date.today
|
|
date = "#{d.year}#{d.month}#{d.mday}"
|
|
while FileTest.exists?("#{dir}/#{newname}")
|
|
newname = "#{nname}-<#{date}.#{count}>"
|
|
count += 1
|
|
end
|
|
print "name: #{newname}\n" if Debuglevel > 1
|
|
|
|
case data.class.to_s
|
|
when "String"
|
|
begin
|
|
if File.move(data, "#{dir}/#{newname}")
|
|
print " Saving as: '#{newname}'\n"
|
|
else
|
|
print "couldn't rename tempfile\n"
|
|
return false
|
|
end
|
|
rescue Errno::ENOENT
|
|
print "Caught Errno::ENOENT (save_file)\n"
|
|
print "Error: #{$!}\n"
|
|
print "What the *beep* happened?\n"
|
|
return false
|
|
end
|
|
when "Array"
|
|
if file = File.new("#{dir}/#{newname}", "w", 0644)
|
|
print " Saving as: '#{newname}'\n"
|
|
data.collect{|i| file.print "#{i}"}
|
|
else
|
|
print "couldn't open file for writeing\n"
|
|
return false
|
|
end
|
|
when "Tempfile"
|
|
begin
|
|
if File.move(data.path, "#{dir}/#{newname}")
|
|
print " Saving as: '#{newname}'\n"
|
|
else
|
|
print "couldn't rename tempfile\n"
|
|
return false
|
|
end
|
|
rescue Errno::ENOENT
|
|
print "Caught Errno::ENOENT (save_file)\n"
|
|
print "Error: #{$!}\n"
|
|
print "What the *beep* happened?\n"
|
|
return false
|
|
end
|
|
else
|
|
print "EEEEPS Can't save data of class: #{data.class.to_s}\n"
|
|
return false
|
|
end
|
|
return true
|
|
end
|
|
|
|
def parse_options(options)
|
|
begin
|
|
opts = GetoptLong.new(
|
|
[ "-I", "--include", GetoptLong::REQUIRED_ARGUMENT ],
|
|
[ "-c", "--configfile", GetoptLong::REQUIRED_ARGUMENT ],
|
|
[ "-L", "--longname", GetoptLong::NO_ARGUMENT ],
|
|
[ "-C", "--combinedname", GetoptLong::NO_ARGUMENT ],
|
|
[ "-M", "--multipart", GetoptLong::NO_ARGUMENT ],
|
|
[ "-S", "--singlepart", GetoptLong::NO_ARGUMENT ],
|
|
[ "-T", "--test", GetoptLong::NO_ARGUMENT ],
|
|
[ "-X", "--exclude", GetoptLong::REQUIRED_ARGUMENT ]
|
|
)
|
|
opts.quiet=true
|
|
|
|
opts.each do |opt, arg|
|
|
options[opt] = arg
|
|
end
|
|
rescue GetoptLong::InvalidOption
|
|
print "#{$!}\n"
|
|
usage
|
|
end
|
|
|
|
return options
|
|
end
|
|
|
|
def usage
|
|
print "\nUsage:\n\n"
|
|
print "ripnews.rb [-I <pattern>] [-c <file>] [-L] [-C] [-M] [-S] [-T] [-X <pattern>]\n\n"
|
|
print "-I <pattern> specify an include pattern\n"
|
|
print "-c <file> specify an alternate configfile\n"
|
|
print "-L use subject as filename\n"
|
|
print "-C use combined filenames\n"
|
|
print "-M get multipart articles\n"
|
|
print "-S get singlepart articles\n"
|
|
print "-T test mode, don't update newsrc file\n"
|
|
print "-X <pattern> specify an exclude pattern\n"
|
|
exit
|
|
end
|
|
|
|
def parse_config(default = {})
|
|
print "Parsing config\n"
|
|
print "#{default['-c']}\n"
|
|
if FileTest.readable?("#{default['-c']}")
|
|
file = File.new("#{default['-c']}")
|
|
lines = file.readlines
|
|
else
|
|
lines = []
|
|
end
|
|
|
|
i = 0
|
|
group = ""
|
|
grouparr = []
|
|
config = {}
|
|
|
|
lines.collect!{|x|
|
|
x.sub!(/^\s*/, "")
|
|
x.sub!(/\#.*$/, "")
|
|
x.sub!(/\s*$/, "")
|
|
x.gsub!(/\$\{HOME\}/, "#{ENV['HOME']}")
|
|
x.chomp
|
|
}
|
|
while i < lines.length
|
|
line = lines[i]
|
|
while line.sub!(/\s*\\$/, "") != nil
|
|
line << lines[i+1]
|
|
i += 1
|
|
end
|
|
line.sub!(/\s*$/, "")
|
|
i += 1
|
|
if line =~ /^OPT_(.*?)=(.*)/
|
|
line = "-#{$1}=#{$2}"
|
|
end
|
|
print "#{i}: #{line}\n" if Debuglevel > 1
|
|
if line =~ /(.*?)\s*\+=\s*(.*)/
|
|
if group == ""
|
|
if default.has_key?($1)
|
|
default[$1] << $2
|
|
else
|
|
default[$1] = $2
|
|
end
|
|
else
|
|
grouparr.collect{|g|
|
|
if config[g].has_key?($1)
|
|
config[g][$1] << $2
|
|
elsif default.has_key?($1)
|
|
config[g][$1] = default[$1] + $2
|
|
else
|
|
config[g][$1] = $2
|
|
end
|
|
}
|
|
end
|
|
elsif line =~ /(.*?)\s*=\s*(.*)/
|
|
if group == ""
|
|
default[$1] = $2
|
|
else
|
|
grouparr.collect{|g|
|
|
config[g][$1] = $2
|
|
}
|
|
end
|
|
elsif line =~ /(.*?)\s*\{/
|
|
group = $1
|
|
grouparr = group.split('|')
|
|
grouparr.collect{|g|
|
|
config[g] = {} unless config.has_key?(g)
|
|
}
|
|
elsif line =~ /^}$/
|
|
default.each_key{|x|
|
|
grouparr.collect{|g|
|
|
config[g][x] = default[x] unless config[g].has_key?(x)
|
|
}
|
|
}
|
|
group = ""
|
|
grouparr = []
|
|
elsif line =~ /^$/
|
|
next
|
|
else
|
|
print "Error parsing config on line: #{i}\n"
|
|
return false
|
|
end
|
|
end
|
|
|
|
if group != ""
|
|
print "Error parsing config: group not terminated on line #{i}\n"
|
|
return false
|
|
end
|
|
|
|
if Debuglevel > 2
|
|
config.each_key{|x|
|
|
print "Group: #{x}\n"
|
|
config[x].each_key{|y|
|
|
print "Key: '#{y}' => Value: '#{config[x][y]}'\n"
|
|
}
|
|
}
|
|
end
|
|
return config
|
|
end
|
|
|
|
def check_config
|
|
if @config.length == 0
|
|
print "No configuration, nothing to do\n"
|
|
exit
|
|
end
|
|
@config.each_key {|i|
|
|
unless @config[i].has_key?("-I")
|
|
print "No inclusions given for group #{i}. Won't match anything.\n"
|
|
end
|
|
@config[i]["DATADIR"] ="." unless @config[i].has_key?("DATADIR")
|
|
@config[i]["PERMISSION"] = "0755" unless @config[i].has_key?("PERMISSION")
|
|
if @config[i].has_key?("EXTENSIONS")
|
|
@config[i]["-S"] = @config[i]["EXTENSIONS"]
|
|
@config[i]["-M"] = @config[i]["EXTENSIONS"]
|
|
end
|
|
if @config[i].has_key?("DELEXT")
|
|
@config[i]["-SD"] = @config[i]["DELEXT"]
|
|
@config[i]["-MD"] = @config[i]["DELEXT"]
|
|
end
|
|
@config[i]["-M"] = "(?!.*)" if @config[i].has_key?("-S") and ! @config[i].has_key?("-M")
|
|
@config[i]["-S"] = "(?!.*)" if @config[i].has_key?("-M") and ! @config[i].has_key?("-S")
|
|
}
|
|
end
|
|
|
|
def lock
|
|
group = @config.keys[0]
|
|
if @config[group].has_key?("LOCKFILE")
|
|
if FileTest.exists?(@config[group]["LOCKFILE"])
|
|
lock = File.open(@config[group]["LOCKFILE"], "r")
|
|
line = lock.gets
|
|
lock.close
|
|
if line
|
|
line.chomp!
|
|
psauxw = `ps auxw`
|
|
if /^\S+\s+#{line}\s+/.match(psauxw)
|
|
print "Already running, exiting...\n"
|
|
exit
|
|
else
|
|
print "Stale lock found... removing...\n"
|
|
File.unlink(@config[group]["LOCKFILE"])
|
|
end
|
|
else
|
|
print "Empty lockfile found... removing...\n"
|
|
File.unlink(@config[group]["LOCKFILE"])
|
|
end
|
|
end
|
|
lock = File.new(@config[group]["LOCKFILE"], "w")
|
|
lock.print "#{Process.pid}\n"
|
|
lock.close
|
|
end
|
|
end
|
|
|
|
def unlock
|
|
group = @config.keys[0]
|
|
File.unlink(@config[group]["LOCKFILE"])
|
|
end
|
|
|
|
def renice
|
|
group = @config.keys[0]
|
|
if @config[group].has_key?("NICE")
|
|
Process.setpriority(Process::PRIO_PROCESS, 0, @config[group]["NICE"].to_i)
|
|
end
|
|
end
|
|
|
|
def get_single(subj, group)
|
|
print "Fetching singlepart article: #{subj}\n"
|
|
body = @articles.get_group_body(subj)
|
|
if UUEncode.is_uuencoded(body)
|
|
filename = UUEncode.get_filename(body)
|
|
print " filename #{filename}\n"
|
|
return false unless check_ext(group, filename, "s", subj)
|
|
print " UUDecoding...\n"
|
|
mode, filename, body = UUEncode.uudecode(body)
|
|
elsif YEnc.is_yencoded(body)
|
|
filename = YEnc.get_filename(body)
|
|
return false unless check_ext(group, filename, "s", subj)
|
|
print " YDecoding...\n"
|
|
mode, filename, body = YEnc.ydecode(body)
|
|
else
|
|
print " Unknown encoding (not UU, not yEnc), skipping...\n"
|
|
return false
|
|
end
|
|
if mode == false
|
|
return false
|
|
end
|
|
output_data(subj, mode, filename, body)
|
|
return true
|
|
end
|
|
|
|
def get_multi(subj, group)
|
|
print "Fetching multipart article: #{subj}\n"
|
|
if @config[group]["TEMPDIR"] == nil or @config[group]["TEMPDIR"] == ""
|
|
body = @articles.get_group_body(subj)
|
|
if UUEncode.is_uuencoded(body)
|
|
filename = UUEncode.get_filename(body)
|
|
print " filename #{filename}\n"
|
|
return false unless check_ext(group, filename, "m", subj)
|
|
print " UUDecoding...\n"
|
|
mode, filename, body = UUEncode.uudecode(body)
|
|
elsif YEnc.is_yencoded(body)
|
|
print "yencc\n"
|
|
filename = YEnc.get_filename(body)
|
|
print "filename #{filename}\n"
|
|
return false unless check_ext(group, filename, "m", subj)
|
|
print " YDecoding...\n"
|
|
mode, filename, body = YEnc.ydecode(body)
|
|
else
|
|
print " Unknown encoding (not UU, not yEnc), skipping...\n"
|
|
return false
|
|
end
|
|
if mode == false
|
|
return false
|
|
end
|
|
output_data(subj, mode, filename, body)
|
|
return true
|
|
else
|
|
body = @articles.get_group_body_first(subj)
|
|
if UUEncode.is_uuencoded(body) or YEnc.is_yencoded(body)
|
|
if UUEncode.is_uuencoded(body)
|
|
filename = UUEncode.get_filename(body)
|
|
print " filename #{filename}\n"
|
|
return false unless check_ext(group, filename, "m", subj)
|
|
elsif YEnc.is_yencoded(body)
|
|
print "yencc\n"
|
|
filename = YEnc.get_filename(body)
|
|
print "filename #{filename}\n"
|
|
return false unless check_ext(group, filename, "m", subj)
|
|
end
|
|
file = Tempfile.new("riptmp", @config[group]["TEMPDIR"])
|
|
body.collect{|x| file.print "#{x}\n"}
|
|
return false unless @articles.get_group_body_rest(subj, file)
|
|
fileout = Tempfile.new("riptmp", @config[group]["TEMPDIR"])
|
|
|
|
# I think a thread should start about here
|
|
|
|
if UUEncode.is_uuencoded(body)
|
|
print " UUDecoding...\n"
|
|
mode, filename, body = UUEncode.uudecode(file, fileout)
|
|
elsif YEnc.is_yencoded(body)
|
|
print " YDecoding...\n"
|
|
mode, filename, body = YEnc.ydecode(file, fileout)
|
|
end
|
|
if mode == false
|
|
return false
|
|
end
|
|
|
|
# horrible cheat to not lose the outputted file
|
|
body = fileout.path
|
|
bodybase = body.sub(/\/[^\/]*$/, "/ripnewsdecode")
|
|
i = 1
|
|
while FileTest.exists?("#{bodybase}-#{i}")
|
|
i += 1
|
|
end
|
|
File.move(body, "#{bodybase}-#{i}")
|
|
body = "#{bodybase}-#{i}"
|
|
file.close
|
|
fileout.close(false)
|
|
|
|
output_data(subj, mode, filename, body)
|
|
|
|
# thread could end here
|
|
|
|
return true
|
|
else
|
|
print " Unknown encoding (not UU, not yEnc), skipping...\n"
|
|
return false
|
|
end
|
|
end
|
|
end
|
|
|
|
def output_data(subject, mode, filename="", body="")
|
|
group = @articles.get_groupname
|
|
print " mode: #{mode}\n" if Debuglevel > 0
|
|
print " Filename: '#{filename}'\n" if Debuglevel > 0
|
|
|
|
# de-crap subject...
|
|
sub = subject.sub(/\s*$/, "") # strip trailing spaces
|
|
sub.sub!(/^[\s\.!-#]*/, "") # strip leading spaces, dots, exclamation points, dashes and hashes
|
|
|
|
# decide on a filename
|
|
if @config[group].has_key?("-L") and @config[group]["-L"]
|
|
print "longname\n" if Debuglevel > 1
|
|
outfile = sub[0...@maxfilelength]
|
|
elsif @config[group].has_key?("-C") and @config[group]["-C"]
|
|
print "combinedname\n" if Debuglevel > 1
|
|
outfile = sub[0...@maxfilelength-filename.length-3]
|
|
outfile = "#{outfile} [#{filename}]"
|
|
if outfile.length > @maxfilelength
|
|
outfile = filename[0...@maxfilelength]
|
|
end
|
|
else
|
|
print "shortname\n" if Debuglevel > 1
|
|
outfile = filename[0...@maxfilelength]
|
|
end
|
|
|
|
# do the actual saving
|
|
if save_file("#{@config[group]["DATADIR"]}/#{group}", outfile, body)
|
|
@articles.group_update_newsrc(subject)
|
|
@articles.save_newsrc unless @config[group].has_key?("-T") and @config[group]["-T"]
|
|
end
|
|
end
|
|
|
|
def check_ext(group, filename, mode, subject)
|
|
case mode
|
|
when "s"
|
|
if @config[group].has_key?("-SD") && ( filename =~ /\.(#{@config[group]["-SD"]})$/ )
|
|
print "Marking '#{subject}' as read\n"
|
|
@articles.group_update_newsrc(subject)
|
|
return false
|
|
end
|
|
return @config[group].has_key?("-S") ? ( filename =~ /\.(#{@config[group]["-S"]})$/ ) : true
|
|
when "m"
|
|
if @config[group].has_key?("-MD") && ( filename =~ /\.(#{@config[group]["-MD"]})$/ )
|
|
print "Marking '#{subject}' as read\n"
|
|
@articles.group_update_newsrc(subject)
|
|
return false
|
|
end
|
|
return @config[group].has_key?("-M") ? ( filename =~ /\.(#{@config[group]["-M"]})$/ ) : true
|
|
else
|
|
print "Illegal mode \"#{mode}\" in check_ext\n"
|
|
exit
|
|
end
|
|
end
|
|
|
|
def get_max_file_length(tempdir=".")
|
|
if ! FileTest.directory?("#{tempdir}") || ! FileTest.writable?("#{tempdir}")
|
|
print "Tempdir '#{tempdir}' is not a writable directory\n"
|
|
exit
|
|
end
|
|
# this is quite stupid, there is no guarantee at all the generated file names
|
|
# don't already exist
|
|
name = "a"*500
|
|
name = "#$$#{name}"
|
|
begin
|
|
file = File.new("#{tempdir}/#{name}", "w", 0644).close
|
|
File.delete("#{tempdir}/#{name}")
|
|
rescue Errno::ENAMETOOLONG
|
|
name = name[0...-1]
|
|
retry
|
|
rescue Errno::ENOENT
|
|
print "#{$!}\n"
|
|
print "raar hoor\n"
|
|
retry
|
|
end
|
|
# this is how many characters are still likely to be appended
|
|
# is the filename already exists '-<#{date}.#{count}>' in save_file
|
|
# this could be brought back to 5 '-<#{count}>' ...
|
|
return name.length - 14
|
|
end
|
|
|
|
def ward_sort(a, b)
|
|
c = a.to_s.split(/([0-9]+)/)
|
|
d = b.to_s.split(/([0-9]+)/)
|
|
|
|
c.collect{|x|
|
|
y = d.shift
|
|
r = ((x.to_s =~ /^[0-9]+$/) && (y.to_s =~ /^[0-9]+$/)) ?
|
|
(x.to_i <=> y.to_i) :
|
|
(x.to_s <=> y.to_s)
|
|
if r != 0
|
|
return r
|
|
end
|
|
}
|
|
return -1 if (d != [])
|
|
return 0
|
|
end
|
|
|
|
def startup
|
|
$stdout.sync=true # line buffered output
|
|
@defaults = {'-c' => "#{ENV['HOME']}/.ripnewsrc"}
|
|
@defaults = parse_options(@defaults)
|
|
@config = parse_config(@defaults)
|
|
exit if @config == false
|
|
check_config
|
|
lock
|
|
renice
|
|
|
|
trap("HUP") {
|
|
print "Rereading config...\n"
|
|
config = parse_config(@defaults)
|
|
if config != false
|
|
@config = config
|
|
check_config
|
|
print "Done reading config\n"
|
|
else
|
|
print "Keeping old config due to errors\n"
|
|
end
|
|
}
|
|
|
|
@maxfilelength = get_max_file_length(@config[@config.keys[0]]["TEMPDIR"])
|
|
|
|
print "\n$Id$\n"
|
|
print "Starting: #{@tstart}\n"
|
|
|
|
if Debuglevel > 2
|
|
@config.each_key{|i|
|
|
print "Group: #{i}\n"
|
|
@config[i].each_key{|j|
|
|
print "Opt: #{j} val: #{@config[i][j]}\n"
|
|
}
|
|
}
|
|
end
|
|
end
|
|
|
|
def main
|
|
for group in @config.keys.sort
|
|
print "\nGetting articles for #{group}\n"
|
|
@articles = Article.new(@config[group]["NNTPSERVER"], group, @config[group]["NEWSRCNAME"])
|
|
@articles.get_articles(@config[group]["CACHEDIR"])
|
|
|
|
unless FileTest.directory?("#{@config[group]["DATADIR"]}/#{group}") or
|
|
Dir.mkdir("#{@config[group]["DATADIR"]}/#{group}", @config[group]["PERMISSION"].oct)
|
|
print "eeeps, couldn't create dir\n"
|
|
exit
|
|
end
|
|
for i in @articles.get_group_subjects.sort{|a, b| ward_sort(a, b)}
|
|
print "#{i}\n" if Debuglevel > 2
|
|
if @config[group].has_key?("-MR") and i =~ /#{@config[group]["-MR"]}/
|
|
print "Marking '#{i}' as read\n"
|
|
@articles.group_update_newsrc(i)
|
|
next
|
|
end
|
|
if !(@config[group].has_key?("-X") and i =~ /#{@config[group]["-X"]}/) and
|
|
i =~ /#{@config[group]["-I"]}/
|
|
print "Match: #{i}\n" if Debuglevel > 0
|
|
if @articles.group_is_complete(i)
|
|
begin
|
|
if @articles.group_is_singlepart(i)
|
|
succes = get_single(i, group)
|
|
elsif @articles.group_is_multipart(i)
|
|
succes = get_multi(i, group)
|
|
end
|
|
if succes == false
|
|
print " Skipping article...\n"
|
|
end
|
|
rescue Article::TempError, Article::PermError
|
|
print "#{$!}\n"
|
|
print " Skipping article...\n"
|
|
next
|
|
end
|
|
else
|
|
print "Not complete: #{i}\n"
|
|
end
|
|
end
|
|
end
|
|
@articles.quit
|
|
end
|
|
end
|
|
|
|
def ending
|
|
tend = Time.now
|
|
print "\nFinished: #{tend}\n"
|
|
runtime = (tend - @tstart).to_i
|
|
h=runtime/3600
|
|
m=runtime%3600
|
|
s=m%60
|
|
m=m/60
|
|
printf("Running time: %02d:%02d:%02d\n", h, m, s)
|
|
unlock
|
|
end
|
|
|
|
startup
|
|
main
|
|
ending
|