ripnews/trunk/ripnews/ripnews.rb

466 lines
13 KiB
Ruby
Raw Normal View History

2002-08-19 13:57:48 +00:00
#!/usr/local/bin/ruby -w
2002-04-27 20:31:59 +00:00
2003-04-28 09:57:30 +00:00
# $Dwarf: ripnews.rb,v 1.54 2003/04/28 09:07:17 ward Exp $
2002-04-27 20:34:15 +00:00
# $Source$
2002-04-27 20:31:59 +00:00
require 'date'
2003-04-28 09:57:30 +00:00
require 'time'
2002-04-27 20:31:59 +00:00
require 'getoptlong'
2002-04-28 16:29:56 +00:00
require 'news/article'
2002-04-28 22:06:41 +00:00
require 'news/newsrc'
require 'tempfile'
2003-04-20 16:34:40 +00:00
require 'encode/uuencode'
require 'encode/yenc'
2002-04-27 20:31:59 +00:00
###########################################################################
Debuglevel = 0
2002-04-27 20:31:59 +00:00
def save_file(dir, name, data)
print "savename: #{name}\n" if Debuglevel > 1
nname = name.gsub(/\//, "-")
nname.sub!(/\s*$/, "")
2003-04-18 22:27:58 +00:00
nname.sub!(/^[\s\.]*/, "")
print "nname: #{nname}\n" if Debuglevel > 1
2002-04-27 20:31:59 +00:00
newname = nname
count = 1
d = Date.today
date = "#{d.year}#{d.month}#{d.mday}"
while FileTest.exists?("#{dir}/#{newname}")
newname = "#{nname}-<#{date}.#{count}>"
count += 1
end
print "name: #{newname}\n" if Debuglevel > 1
case data.type.to_s
when "String"
2002-08-05 21:15:02 +00:00
begin
if File.rename(data, "#{dir}/#{newname}")
print " Saving as: '#{newname}'\n"
else
print "couldn't rename tempfile\n"
return false
end
rescue Errno::ENOENT
print "Caught Errno::ENOENT (save_file)\n"
print "Error: #{$!}\n"
print "What the #@$$ happended?\n"
2002-05-05 22:18:11 +00:00
return false
2002-04-27 20:31:59 +00:00
end
when "Array"
if file = File.new("#{dir}/#{newname}", "w", "0644")
print " Saving as: '#{newname}'\n"
data.collect{|i| file.print "#{i}"}
2002-04-27 20:31:59 +00:00
else
print "couldn't open file for writeing\n"
2002-05-05 22:18:11 +00:00
return false
2002-04-27 20:31:59 +00:00
end
when "Tempfile"
begin
if File.rename(data.path, "#{dir}/#{newname}")
print " Saving as: '#{newname}'\n"
else
print "couldn't rename tempfile\n"
return false
end
rescue Errno::ENOENT
print "Caught Errno::ENOENT (save_file)\n"
print "Error: #{$!}\n"
print "What the #@$$ happended?\n"
return false
end
2002-04-27 20:31:59 +00:00
else
2002-05-05 22:18:11 +00:00
print "EEEEPS Can't save data of type: #{data.type.to_s}\n"
return false
2002-04-27 20:31:59 +00:00
end
2002-05-05 22:18:11 +00:00
return true
2002-04-27 20:31:59 +00:00
end
2002-04-30 14:08:14 +00:00
def parse_options(options)
2002-04-27 20:31:59 +00:00
begin
2002-05-18 23:12:59 +00:00
opts = GetoptLong.new(
[ "-I", "--include", GetoptLong::REQUIRED_ARGUMENT ],
[ "-c", "--configfile", GetoptLong::REQUIRED_ARGUMENT ],
[ "-L", "--longname", GetoptLong::NO_ARGUMENT ],
[ "-C", "--combinedname", GetoptLong::NO_ARGUMENT ],
[ "-M", "--multipart", GetoptLong::NO_ARGUMENT ],
[ "-S", "--singlepart", GetoptLong::NO_ARGUMENT ],
[ "-T", "--test", GetoptLong::NO_ARGUMENT ],
[ "-X", "--exclude", GetoptLong::REQUIRED_ARGUMENT ]
2002-05-18 23:12:59 +00:00
)
opts.quiet=true
2002-04-27 20:31:59 +00:00
2002-05-18 23:12:59 +00:00
opts.each do |opt, arg|
options[opt] = arg
end
2002-07-04 22:28:26 +00:00
rescue GetoptLong::InvalidOption
print "#{$!}\n"
usage
2002-04-27 20:31:59 +00:00
end
return options
end
2002-07-04 22:28:26 +00:00
def usage
print "\nUsage:\n\n"
print "ripnews.rb [-I <pattern>] [-c <file>] [-L] [-C] [-M] [-S] [-T] [-X <pattern>]\n\n"
print "-I <pattern> specify an include pattern\n"
print "-c <file> specify an alternate configfile\n"
print "-L use subject as filename\n"
print "-C use combined filenames\n"
print "-M get multipart articles\n"
print "-S get singlepart articles\n"
print "-T test mode, don't update newsrc file\n"
print "-X <pattern> specify an exclude pattern\n"
2002-07-04 22:28:26 +00:00
exit
end
2002-04-30 14:08:14 +00:00
def parse_config(default = {})
2003-04-24 09:41:17 +00:00
if FileTest.readable?("#{default[\"-c\"]}")
file = File.new("#{default[\"-c\"]}")
lines = file.readlines
else
lines = []
end
2002-04-30 14:08:14 +00:00
i = 0
group = ""
2002-05-05 22:18:11 +00:00
grouparr = []
@config = {}
2002-04-30 14:08:14 +00:00
lines.collect!{|x|
x.sub!(/^\s*/, "")
x.sub!(/\#.*$/, "")
x.sub!(/\s*$/, "")
x.chomp
}
2002-04-30 14:08:14 +00:00
while i < lines.length
line = lines[i]
while line.sub!(/\s*\\$/, "") != nil
line << lines[i+1]
2002-04-30 14:08:14 +00:00
i += 1
end
line.sub!(/\s*$/, "")
i += 1
if line =~ /^OPT_(.*?)=(.*)/
line = "-#{$1}=#{$2}"
end
print "#{i}: #{line}\n" if Debuglevel > 1
if line =~ /(.*?)\s*\+=\s*(.*)/
if group == ""
2002-05-05 22:18:11 +00:00
if default.has_key?($1)
default[$1] << $2
2002-05-05 22:18:11 +00:00
else
default[$1] = $2
end
2002-04-30 14:08:14 +00:00
else
2002-05-05 22:18:11 +00:00
grouparr.collect{|g|
if @config[g].has_key?($1)
@config[g][$1] << $2
2002-04-30 14:08:14 +00:00
elsif default.has_key?($1)
2002-05-05 22:18:11 +00:00
@config[g][$1] = default[$1] + $2
2002-04-30 14:08:14 +00:00
else
2002-05-05 22:18:11 +00:00
@config[g][$1] = $2
2002-04-30 14:08:14 +00:00
end
2002-05-05 22:18:11 +00:00
}
2002-04-30 14:08:14 +00:00
end
elsif line =~ /(.*?)\s*=\s*(.*)/
if group == ""
default[$1] = $2
2002-04-27 20:31:59 +00:00
else
2002-05-05 22:18:11 +00:00
grouparr.collect{|g|
@config[g][$1] = $2
}
2002-04-30 14:08:14 +00:00
end
elsif line =~ /(.*?)\s*\{/
group = $1
2002-05-05 22:18:11 +00:00
grouparr = group.split('|')
grouparr.collect{|g|
@config[g] = {} unless @config.has_key?(g)
}
2002-04-30 14:08:14 +00:00
elsif line =~ /^}$/
default.each_key{|x|
2002-05-05 22:18:11 +00:00
grouparr.collect{|g|
@config[g][x] = default[x] unless @config[g].has_key?(x)
}
2002-04-30 14:08:14 +00:00
}
group = ""
2002-05-05 22:18:11 +00:00
grouparr = []
2002-04-30 14:08:14 +00:00
elsif line =~ /^$/
next
else
print "Error parsing config on line: #{i}\n"
exit
2002-04-27 20:31:59 +00:00
end
end
2002-04-30 14:08:14 +00:00
if group != ""
print "Error parsing config: group not terminated on line #{i}\n"
2002-04-27 20:31:59 +00:00
exit
end
2002-04-30 14:08:14 +00:00
if Debuglevel > 2
2002-05-05 22:18:11 +00:00
@config.each_key{|x|
2002-04-30 14:08:14 +00:00
print "Group: #{x}\n"
2002-05-05 22:18:11 +00:00
@config[x].each_key{|y|
print "Key: '#{y}' => Value: '#{@config[x][y]}'\n"
2002-04-30 14:08:14 +00:00
}
}
2002-04-27 20:31:59 +00:00
end
2002-05-05 22:18:11 +00:00
return true
2002-04-27 20:31:59 +00:00
end
2002-05-05 22:18:11 +00:00
def check_config
2003-04-24 09:41:17 +00:00
if @config.length == 0
print "No configuration, nothing to do\n"
exit
end
2002-05-05 22:18:11 +00:00
@config.each_key {|i|
unless @config[i].has_key?("-I")
2002-04-30 14:08:14 +00:00
print "No inclusions given for group #{i}. Won't match anything.\n"
exit
end
2002-05-18 23:12:59 +00:00
@config[i]["DATADIR"] ="." unless @config[i].has_key?("DATADIR")
@config[i]["PERMISSION"] = "0755" unless @config[i].has_key?("PERMISSION")
if @config[i].has_key?("EXTENSIONS")
@config[i]["-S"] = @config[i]["EXTENSIONS"]
@config[i]["-M"] = @config[i]["EXTENSIONS"]
end
if @config[i].has_key?("DELEXT")
@config[i]["-SD"] = @config[i]["DELEXT"]
@config[i]["-MD"] = @config[i]["DELEXT"]
end
2002-05-18 23:12:59 +00:00
@config[i]["-M"] = "(?!.*)" if @config[i].has_key?("-S") and ! @config[i].has_key?("-M")
@config[i]["-S"] = "(?!.*)" if @config[i].has_key?("-M") and ! @config[i].has_key?("-S")
2002-04-30 14:08:14 +00:00
}
2002-04-27 20:31:59 +00:00
end
def get_single(subj, group)
2002-05-25 13:41:54 +00:00
print "Fetching singlepart article: #{subj}\n"
2002-05-06 11:46:13 +00:00
body = @articles.get_group_body(subj)
2003-04-20 16:34:40 +00:00
if UUEncode.is_uuencoded(body)
filename = UUEncode.get_filename(body)
print " filename #{filename}\n"
return false unless check_ext(group, filename, "s", subj)
2003-04-20 18:39:13 +00:00
print " UUDecoding...\n"
2003-04-20 16:34:40 +00:00
mode, filename, body = UUEncode.uudecode(body)
2003-04-20 18:39:13 +00:00
elsif YEnc.is_yencoded(body)
filename = YEnc.get_filename(body)
return false unless check_ext(group, filename, "s", subj)
2003-04-20 18:39:13 +00:00
print " YDecoding...\n"
mode, filename, body = YEnc.ydecode(body)
2003-04-18 22:27:58 +00:00
else
print " Unknown encoding (not UU, not yEnc), skipping...\n"
return false
2002-05-05 22:18:11 +00:00
end
2003-04-18 22:27:58 +00:00
return mode, filename, body
2002-05-05 22:18:11 +00:00
end
2002-05-06 11:46:13 +00:00
def get_multi(subj, group)
2002-05-25 13:41:54 +00:00
print "Fetching multipart article: #{subj}\n"
2002-05-05 22:18:11 +00:00
if @config[group]["TEMPDIR"] == nil or @config[group]["TEMPDIR"] == ""
2002-05-06 11:46:13 +00:00
body = @articles.get_group_body(subj)
2003-04-20 16:34:40 +00:00
if UUEncode.is_uuencoded(body)
filename = UUEncode.get_filename(body)
print " filename #{filename}\n"
return false unless check_ext(group, filename, "m", subj)
2003-04-20 18:39:13 +00:00
print " UUDecoding...\n"
2003-04-20 16:34:40 +00:00
mode, filename, body = UUEncode.uudecode(body)
2003-04-20 18:39:13 +00:00
elsif YEnc.is_yencoded(body)
print "yencc\n"
2003-04-20 18:39:13 +00:00
filename = YEnc.get_filename(body)
print "filename #{filename}\n"
return false unless check_ext(group, filename, "m", subj)
2003-04-20 18:39:13 +00:00
print " YDecoding...\n"
mode, filename, body = YEnc.ydecode(body)
2003-04-18 22:27:58 +00:00
else
print " Unknown encoding (not UU, not yEnc), skipping...\n"
return false
end
2003-04-18 22:27:58 +00:00
return mode, filename, body
2002-05-05 22:18:11 +00:00
else
2002-05-06 11:46:13 +00:00
body = @articles.get_group_body_first(subj)
2003-04-20 18:39:13 +00:00
if UUEncode.is_uuencoded(body) or YEnc.is_yencoded(body)
2003-04-20 16:34:40 +00:00
if UUEncode.is_uuencoded(body)
filename = UUEncode.get_filename(body)
print " filename #{filename}\n"
return false unless check_ext(group, filename, "m", subj)
2003-04-20 18:39:13 +00:00
elsif YEnc.is_yencoded(body)
print "yencc\n"
2003-04-20 18:39:13 +00:00
filename = YEnc.get_filename(body)
print "filename #{filename}\n"
return false unless check_ext(group, filename, "m", subj)
2003-04-20 16:34:40 +00:00
end
2002-05-05 22:18:11 +00:00
file = Tempfile.new("riptmp", @config[group]["TEMPDIR"])
body.collect{|x| file.print "#{x}\n"}
2002-05-07 07:42:44 +00:00
return false unless @articles.get_group_body_rest(subj, file)
2002-05-05 22:18:11 +00:00
fileout = Tempfile.new("riptmp", @config[group]["TEMPDIR"])
2003-04-20 16:34:40 +00:00
if UUEncode.is_uuencoded(body)
2003-04-20 18:39:13 +00:00
print " UUDecoding...\n"
2003-04-20 16:34:40 +00:00
mode, filename, body = UUEncode.uudecode(file, fileout)
2003-04-20 18:39:13 +00:00
elsif YEnc.is_yencoded(body)
print " YDecoding...\n"
mode, filename, body = YEnc.ydecode(file, fileout)
2002-05-18 23:12:59 +00:00
end
body = fileout.path
bodybase = body.sub(/\/[^\/]*$/, "")
File.rename(body, "#{bodybase}/ripnewsdecode")
body = "#{bodybase}/ripnewsdecode"
file.close
2002-08-05 21:15:02 +00:00
fileout.close(false)
return mode, filename, body
2002-05-18 23:12:59 +00:00
else
print " Unknown encoding (not UU, not yEnc), skipping...\n"
return false
end
2002-05-05 22:18:11 +00:00
end
end
2002-05-06 11:46:13 +00:00
def output_data(subject, mode, filename="", body="")
2002-07-03 22:18:40 +00:00
group = @articles.get_groupname
print " mode: #{mode}\n" if Debuglevel > 0
print " Filename: '#{filename}'\n" if Debuglevel > 0
2003-04-18 21:39:29 +00:00
# de-crap subject...
2003-04-18 22:27:58 +00:00
sub = subject.sub(/\s*$/, "") # strip trailing spaces
sub.sub!(/^[\s\.!-]*/, "") # strip leading spaces, dots, exclamation points and dashes
2003-04-18 21:39:29 +00:00
# decide on a filename
2002-07-03 22:18:40 +00:00
if @config[group].has_key?("-L") and @config[group]["-L"]
print "longname\n" if Debuglevel > 1
2003-04-18 22:27:58 +00:00
outfile = sub[0...@maxfilelength]
2002-07-03 22:18:40 +00:00
elsif @config[group].has_key?("-C") and @config[group]["-C"]
print "combinedname\n" if Debuglevel > 1
2003-04-18 22:27:58 +00:00
outfile = sub[0...@maxfilelength-filename.length-3]
outfile = "#{outfile} [#{filename}]"
2003-04-18 21:39:29 +00:00
if outfile.length > @maxfilelength
2003-04-18 22:27:58 +00:00
outfile = filename[0...@maxfilelength]
end
2002-05-05 22:18:11 +00:00
else
2002-07-03 22:18:40 +00:00
print "shortname\n" if Debuglevel > 1
2003-04-18 22:27:58 +00:00
outfile = filename[0...@maxfilelength]
2002-07-03 22:18:40 +00:00
end
2003-04-18 21:39:29 +00:00
# do the actual saving
2002-07-03 22:18:40 +00:00
if save_file("#{@config[group]["DATADIR"]}/#{group}", outfile, body)
@articles.group_update_newsrc(subject)
@articles.save_newsrc unless @config[group].has_key?("-T") and @config[group]["-T"]
2002-05-05 22:18:11 +00:00
end
end
def check_ext(group, filename, mode, subject)
case mode
when "s"
if @config[group].has_key?("-SD") && ( filename =~ /\.(#{@config[group]["-SD"]})$/ )
2003-04-28 09:07:17 +00:00
print " Marking '#{subject}' matches as read\n"
@articles.group_update_newsrc(subject)
return false
end
2003-04-18 21:06:51 +00:00
return @config[group].has_key?("-S") ? ( filename =~ /\.(#{@config[group]["-S"]})$/ ) : true
when "m"
if @config[group].has_key?("-MD") && ( filename =~ /\.(#{@config[group]["-MD"]})$/ )
2003-04-28 09:07:17 +00:00
print " Marking '#{subject}' matches as read\n"
@articles.group_update_newsrc(subject)
return false
end
2003-04-18 21:06:51 +00:00
return @config[group].has_key?("-M") ? ( filename =~ /\.(#{@config[group]["-M"]})$/ ) : true
else
print "Illegal mode \"#{mode}\" in check_ext\n"
exit
end
end
def get_max_file_length(tempdir=".")
2003-04-24 12:14:36 +00:00
if ! FileTest.directory?("#{tempdir}") || ! FileTest.writable?("#{tempdir}")
print "Tempdir '#{tempdir}' is not a writable directory\n"
exit
end
2003-04-18 22:27:58 +00:00
name = "a"*500
begin
file = File.new("#{tempdir}/#{name}", "w", "0644").close
File.delete("#{tempdir}/#{name}")
rescue Errno::ENAMETOOLONG
name = name[0...-1]
retry
end
# this is how many characters are still likely to be appended
# is the filename already exists '-<#{date}.#{count}>' in save_file
# this could be brought back to 5 '-<#{count}>' ...
return name.length - 14
end
2002-05-05 22:18:11 +00:00
#############################################################################################
$stdout.sync=true # line buffered output
2003-04-28 09:57:30 +00:00
print "\nStarting: #{Time.now}\n\n"
defaults = {'-c' => "#{ENV['HOME']}/.ripnewsrc"}
2002-04-30 14:08:14 +00:00
defaults = parse_options(defaults)
2002-05-05 22:18:11 +00:00
parse_config(defaults)
check_config
2002-04-28 22:06:41 +00:00
2002-07-31 07:48:33 +00:00
@maxfilelength = get_max_file_length(@config[@config.keys[0]]["TEMPDIR"])
print "$Id$\n\n"
2002-07-04 11:24:48 +00:00
2002-04-30 14:08:14 +00:00
if Debuglevel > 2
2002-05-05 22:18:11 +00:00
@config.each_key{|i|
2002-04-30 14:08:14 +00:00
print "Group: #{i}\n"
2002-05-05 22:18:11 +00:00
@config[i].each_key{|j|
print "Opt: #{j} val: #{@config[i][j]}\n"
2002-04-30 14:08:14 +00:00
}
}
end
2002-04-27 20:31:59 +00:00
2002-05-05 22:18:11 +00:00
for group in @config.keys.sort
2002-04-30 14:08:14 +00:00
print "Getting articles for #{group}\n"
2002-05-06 11:46:13 +00:00
@articles = Article.new(@config[group]["NNTPSERVER"], group, @config[group]["NEWSRCNAME"])
2002-07-03 22:18:40 +00:00
# begin
@articles.get_articles(@config[group]["CACHEDIR"])
# rescue Article::
# print "Caught something: #{$!}\n"
# @articles.quit
# next
# end
2002-04-30 14:08:14 +00:00
2002-05-05 22:18:11 +00:00
unless FileTest.directory?("#{@config[group]["DATADIR"]}/#{group}") or
Dir.mkdir("#{@config[group]["DATADIR"]}/#{group}", @config[group]["PERMISSION"].oct)
2002-04-30 14:08:14 +00:00
print "eeeps, couldn't create dir\n"
exit
end
2002-05-06 11:46:13 +00:00
for i in @articles.get_group_subjects
2002-04-30 14:08:14 +00:00
print "#{i}\n" if Debuglevel > 2
2003-04-28 09:57:30 +00:00
if @config[group].has_key?("-MR") and i =~ /#{@config[group]["-MR"]}/
print " Marking '#{subject}' matches as read\n"
@articles.group_update_newsrc(i)
next
end
2002-05-05 22:18:11 +00:00
if !(@config[group].has_key?("-X") and i =~ /#{@config[group]["-X"]}/) and
i =~ /#{@config[group]["-I"]}/
2002-04-30 14:08:14 +00:00
print "Match: #{i}\n" if Debuglevel > 0
2002-05-06 11:46:13 +00:00
if @articles.group_is_complete(i)
2002-07-03 22:18:40 +00:00
begin
if @articles.group_is_singlepart(i)
mode, filename, body = get_single(i, group)
2002-07-03 22:18:40 +00:00
elsif @articles.group_is_multipart(i)
mode, filename, body = get_multi(i, group)
end
2003-04-19 12:42:12 +00:00
if mode == false
print " Skipping article...\n"
2003-04-19 12:42:12 +00:00
else
output_data(i, mode, filename, body)
end
2002-07-04 22:40:24 +00:00
rescue Article::TempError, Article::PermError
print "#{$!}\n"
print " Skipping article...\n"
2002-07-03 22:18:40 +00:00
next
2002-04-30 14:08:14 +00:00
end
else
2003-04-20 21:09:00 +00:00
print "Not complete: #{i}\n"
2002-04-27 20:31:59 +00:00
end
end
end
2002-05-06 11:46:13 +00:00
@articles.quit
2002-04-27 20:31:59 +00:00
end
2003-04-28 09:57:30 +00:00
print "\nFinished: #{Time.now}\n"