856 lines
22 KiB
Ruby
Executable file
856 lines
22 KiB
Ruby
Executable file
#!/usr/bin/ruby -w
|
||
|
||
# $Dwarf: ripnews.rb,v 1.107 2005/06/06 12:53:10 ward Exp $
|
||
# $Source$
|
||
|
||
#
|
||
# Copyright (c) 2002, 2003, 2004, 2005 Ward Wouts <ward@wouts.nl>
|
||
#
|
||
# Permission to use, copy, modify, and distribute this software for any
|
||
# purpose with or without fee is hereby granted, provided that the above
|
||
# copyright notice and this permission notice appear in all copies.
|
||
#
|
||
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||
#
|
||
|
||
require 'date'
|
||
require 'ftools'
|
||
require 'time'
|
||
require 'getoptlong'
|
||
require 'news/article'
|
||
require 'news/newsrc'
|
||
require 'tempfile'
|
||
require 'thread'
|
||
require 'thwait'
|
||
require 'encode/uuencode'
|
||
require 'encode/yenc'
|
||
|
||
###########################################################################
|
||
|
||
###########################################################################
|
||
|
||
# memory profiling stuff
|
||
MEntry = Struct.new( "MEntry", :c, :mem )
|
||
class MEntry; def to_s() "#{c} : #{mem}"; end; end
|
||
|
||
GroupEntry = Struct.new( "GroupEntry", :c, :mem, :total )
|
||
class GroupEntry; def to_s() "#{mem}\t\t#{c} x#{total}"; end; end
|
||
|
||
def profile_mem(group)
|
||
end
|
||
def aprofile_mem(group)
|
||
t = Thread.new {
|
||
groups = {}
|
||
ObjectSpace.each_object { |x|
|
||
if not [Array,Hash].include? x.class
|
||
e = nil
|
||
begin
|
||
e = MEntry.new( x.class, Marshal::dump(x).size )
|
||
rescue TypeError # undumpable
|
||
e = MEntry.new( x.class, 0 )
|
||
end
|
||
if groups.has_key? e.c
|
||
groups[e.c].mem += e.mem
|
||
groups[e.c].total += 1
|
||
else
|
||
groups[e.c] = GroupEntry.new( e.c, e.mem, 1 )
|
||
end
|
||
end
|
||
}
|
||
File.open( "mem_log", "a+" ) { |file|
|
||
file << "Group #{group}\n"
|
||
total = 0
|
||
file << "bytes/class/count\n"
|
||
groups.to_a.sort_by { |e| e[1].mem }.each { |e|
|
||
file << "#{e[1]}\n"; total += e[1].mem }
|
||
file << "TOTAL == #{total}\n\n"
|
||
}
|
||
}
|
||
sleep 10
|
||
t.join
|
||
end
|
||
###########################################################################
|
||
|
||
Debuglevel = 0
|
||
@tstart = Time.now
|
||
|
||
def save_file(dir, name, data)
|
||
puts "savename: #{name}" if Debuglevel > 1
|
||
|
||
nname = name.gsub(/\//, "-")
|
||
nname.gsub!(/>/, "")
|
||
nname.gsub!(/</, "")
|
||
nname.gsub!(/<EFBFBD>/, "(c)")
|
||
nname.gsub!(/<EFBFBD>/, "(r)")
|
||
# XXX I really have to find something a bit more elegant for this
|
||
# XXX this is a mac problem, should be configurable!!!!
|
||
group = @config.keys[0]
|
||
if @config[group].has_key?("ASCIIFILENAMES") && @config[group]["ASCIIFILENAMES"]
|
||
require 'iconv'
|
||
begin
|
||
nname = Iconv.conv("ASCII//TRANSLIT", "UTF-8", nname)
|
||
rescue Iconv::IllegalSequence
|
||
puts "Iconv couldn't handle: '#{nname}'"
|
||
nname.gsub!(/\\\d+/, "#")
|
||
nname.gsub!(/<EFBFBD>/, "\"a")
|
||
nname.gsub!(/<EFBFBD>/, "\"e")
|
||
nname.gsub!(/<EFBFBD>/, "\"i")
|
||
nname.gsub!(/<EFBFBD>/, "\"o")
|
||
nname.gsub!(/<EFBFBD>/, "\"u")
|
||
nname.gsub!(/<EFBFBD>/, "~n")
|
||
nname.gsub!(/#{86.chr}/, "U")
|
||
nname.gsub!(/#{151.chr}/, " ")
|
||
nname.gsub!(/#{243.chr}/, "L")
|
||
nname.gsub!(/#{247.chr}/, "S")
|
||
end
|
||
# shit that gets through...
|
||
nname.gsub!('#{146.chr}', "")
|
||
end
|
||
nname.gsub!(/<EFBFBD>/, "A"); nname.gsub!(/<EFBFBD>/, "a")
|
||
nname.gsub!(/<EFBFBD>/, "A"); nname.gsub!(/<EFBFBD>/, "a")
|
||
nname.gsub!(/<EFBFBD>/, "A"); nname.gsub!(/<EFBFBD>/, "a")
|
||
nname.gsub!(/<EFBFBD>/, "A"); nname.gsub!(/<EFBFBD>/, "a")
|
||
|
||
nname.gsub!(/<EFBFBD>/, "E"); nname.gsub!(/<EFBFBD>/, "e")
|
||
nname.gsub!(/<EFBFBD>/, "E"); nname.gsub!(/<EFBFBD>/, "e")
|
||
nname.gsub!(/<EFBFBD>/, "E"); nname.gsub!(/<EFBFBD>/, "e")
|
||
nname.gsub!(/<EFBFBD>/, "E"); nname.gsub!(/<EFBFBD>/, "e")
|
||
|
||
nname.gsub!(/<EFBFBD>/, "I"); nname.gsub!(/<EFBFBD>/, "i")
|
||
nname.gsub!(/<EFBFBD>/, "I"); nname.gsub!(/<EFBFBD>/, "i")
|
||
nname.gsub!(/<EFBFBD>/, "I"); nname.gsub!(/<EFBFBD>/, "i")
|
||
nname.gsub!(/<EFBFBD>/, "I"); nname.gsub!(/<EFBFBD>/, "i")
|
||
|
||
nname.gsub!(/<EFBFBD>/, "O"); nname.gsub!(/<EFBFBD>/, "o")
|
||
nname.gsub!(/<EFBFBD>/, "O"); nname.gsub!(/<EFBFBD>/, "o")
|
||
nname.gsub!(/<EFBFBD>/, "O"); nname.gsub!(/<EFBFBD>/, "o")
|
||
nname.gsub!(/<EFBFBD>/, "O"); nname.gsub!(/<EFBFBD>/, "o")
|
||
|
||
nname.gsub!(/<EFBFBD>/, "U"); nname.gsub!(/<EFBFBD>/, "u")
|
||
nname.gsub!(/<EFBFBD>/, "U"); nname.gsub!(/<EFBFBD>/, "u")
|
||
nname.gsub!(/<EFBFBD>/, "U"); nname.gsub!(/<EFBFBD>/, "u")
|
||
nname.gsub!(/<EFBFBD>/, "U"); nname.gsub!(/<EFBFBD>/, "u")
|
||
|
||
nname.gsub!(/<EFBFBD>/, "S"); nname.gsub!(/<EFBFBD>/, "L")
|
||
nname.gsub!(/<EFBFBD>/, "ss")
|
||
|
||
### nname.gsub!('#{160.chr}', "")
|
||
### if nname.match(/([#{128.chr}-#{255.chr}])/)
|
||
### puts "replacing #{$1] with '*' in '#{nname}'"
|
||
### nname.tr!('#{128.chr}-#{255.chr}', "*") # catch all the other stupid shit
|
||
### end
|
||
|
||
nname.sub!(/\s*$/, "")
|
||
nname.sub!(/^[\s\.-]*/, "")
|
||
puts "nname: #{nname}" if Debuglevel > 1
|
||
newname = nname[0...@maxfilelength] # all the recodings could have made
|
||
# the filename too long
|
||
count = 1
|
||
d = Date.today
|
||
date = "#{d.year}#{d.month}#{d.mday}"
|
||
while FileTest.exists?("#{dir}/#{newname}")
|
||
newname = "#{nname}-<#{date}.#{count}>"
|
||
count += 1
|
||
end
|
||
puts "name: #{newname}" if Debuglevel > 1
|
||
|
||
begin
|
||
case data.class.to_s
|
||
when "String"
|
||
if File.move(data, "#{dir}/#{newname}")
|
||
puts " Saving as: '#{newname}'"
|
||
else
|
||
puts "couldn't rename tempfile"
|
||
return false
|
||
end
|
||
when "Array"
|
||
if file = File.new("#{dir}/#{newname}", "w", 0644)
|
||
puts " Saving as: '#{newname}'"
|
||
data.collect{|i| file.print "#{i}"}
|
||
else
|
||
puts "couldn't open file for writeing"
|
||
return false
|
||
end
|
||
when "Tempfile"
|
||
if File.move(data.path, "#{dir}/#{newname}")
|
||
puts " Saving as: '#{newname}'"
|
||
else
|
||
puts "couldn't rename tempfile #{data.path}"
|
||
return false
|
||
end
|
||
else
|
||
puts "EEEEPS Can't save data of class: #{data.class.to_s}"
|
||
return false
|
||
end
|
||
rescue Errno::ENOENT, Errno::EINVAL
|
||
puts "Caught #{$!.class} (save_file)"
|
||
puts "Error: #{$!}"
|
||
puts "What the *beep* happened?"
|
||
puts "(if Errno::EINVAL probably evil chars in filename)"
|
||
if data.class.to_s == "Tempfile"
|
||
puts "keeping #{data.path}"
|
||
end
|
||
return false
|
||
end
|
||
return true
|
||
end
|
||
|
||
def parse_options(options)
|
||
begin
|
||
opts = GetoptLong.new(
|
||
[ "-I", "--include", GetoptLong::REQUIRED_ARGUMENT ],
|
||
[ "-c", "--configfile", GetoptLong::REQUIRED_ARGUMENT ],
|
||
[ "-L", "--longname", GetoptLong::NO_ARGUMENT ],
|
||
[ "-C", "--combinedname", GetoptLong::NO_ARGUMENT ],
|
||
[ "-M", "--multipart", GetoptLong::NO_ARGUMENT ],
|
||
[ "-s", GetoptLong::NO_ARGUMENT ],
|
||
[ "-S", "--singlepart", GetoptLong::NO_ARGUMENT ],
|
||
[ "-T", "--test", GetoptLong::NO_ARGUMENT ],
|
||
[ "-X", "--exclude", GetoptLong::REQUIRED_ARGUMENT ]
|
||
)
|
||
opts.quiet=true
|
||
|
||
opts.each do |opt, arg|
|
||
options[opt] = arg
|
||
end
|
||
rescue GetoptLong::InvalidOption
|
||
puts "#{$!}"
|
||
usage
|
||
end
|
||
return options
|
||
end
|
||
|
||
def usage
|
||
puts <<EOT
|
||
|
||
Usage:
|
||
|
||
ripnews.rb [-I <pattern>] [-c <file>] [-L] [-C] [-M] [-S] [-T] [-X <pattern>]
|
||
|
||
-I <pattern> specify an include pattern
|
||
-c <file> specify an alternate configfile
|
||
-L use subject as filename
|
||
-C use combined filenames
|
||
-M get multipart articles
|
||
-s exit silently if already running
|
||
-S get singlepart articles
|
||
-T test mode, don't update newsrc file
|
||
-X <pattern> specify an exclude pattern
|
||
EOT
|
||
exit
|
||
end
|
||
|
||
def parse_config(default = {})
|
||
if ! default.has_key?('-s')
|
||
puts "Parsing config"
|
||
puts "#{default['-c']}"
|
||
end
|
||
if FileTest.readable?("#{default['-c']}")
|
||
file = File.new("#{default['-c']}")
|
||
lines = file.readlines
|
||
else
|
||
lines = []
|
||
end
|
||
|
||
i = 0
|
||
group = ""
|
||
grouparr = []
|
||
config = {}
|
||
|
||
lines.collect!{|x|
|
||
x.gsub!(/\$\{HOME\}/, "#{ENV['HOME']}")
|
||
if x =~ /^\s*INCLUDEFILE=(.*?)\s*$/i
|
||
x = File.new($1).readlines
|
||
end
|
||
x
|
||
}
|
||
|
||
lines.flatten!
|
||
|
||
lines.collect!{|x|
|
||
x.sub!(/^\s*/, "")
|
||
x.sub!(/\#.*$/, "")
|
||
x.sub!(/\s*$/, "")
|
||
x.gsub!(/\$\{HOME\}/, "#{ENV['HOME']}")
|
||
x.chomp
|
||
}
|
||
while i < lines.length
|
||
line = lines[i]
|
||
while line.sub!(/\s*\\$/, "") != nil
|
||
line << lines[i+1]
|
||
i += 1
|
||
end
|
||
line.sub!(/\s*$/, "")
|
||
i += 1
|
||
if line =~ /^OPT_(.*?)=(.*)/
|
||
line = "-#{$1}=#{$2}"
|
||
end
|
||
puts "#{i}: #{line}" if Debuglevel > 1
|
||
if line =~ /(.*?)\s*\+=\s*(.*)/
|
||
if group == ""
|
||
if default.has_key?($1)
|
||
default[$1] << $2
|
||
else
|
||
default[$1] = $2
|
||
end
|
||
else
|
||
grouparr.collect{|g|
|
||
if config[g].has_key?($1)
|
||
config[g][$1] << $2
|
||
elsif default.has_key?($1)
|
||
config[g][$1] = default[$1] + $2
|
||
else
|
||
config[g][$1] = $2
|
||
end
|
||
}
|
||
end
|
||
elsif line =~ /(.*?)\s*=\s*(.*)/
|
||
if group == ""
|
||
default[$1] = $2
|
||
else
|
||
grouparr.collect{|g|
|
||
config[g][$1] = $2
|
||
}
|
||
end
|
||
elsif line =~ /(.*?)\s*\{/
|
||
group = $1
|
||
grouparr = group.split('|')
|
||
grouparr.collect{|g|
|
||
config[g] = {} unless config.has_key?(g)
|
||
}
|
||
elsif line =~ /^\}$/
|
||
default.each_key{|x|
|
||
grouparr.collect{|g|
|
||
config[g][x] = default[x].dup unless config[g].has_key?(x)
|
||
}
|
||
}
|
||
group = ""
|
||
grouparr = []
|
||
elsif line =~ /^$/
|
||
next
|
||
else
|
||
puts "Error parsing config on line: #{i}"
|
||
return false
|
||
end
|
||
end
|
||
|
||
if group != ""
|
||
puts "Error parsing config: group not terminated on line #{i}"
|
||
return false
|
||
end
|
||
|
||
if Debuglevel > 2
|
||
config.each_key{|x|
|
||
puts "Group: #{x}"
|
||
config[x].each_key{|y|
|
||
puts "Key: '#{y}' => Value: '#{config[x][y]}'"
|
||
}
|
||
}
|
||
end
|
||
return config
|
||
end
|
||
|
||
def check_config
|
||
if @config.length == 0
|
||
puts "No configuration, nothing to do"
|
||
exit
|
||
end
|
||
@config.each_key {|i|
|
||
unless @config[i].has_key?("-I")
|
||
puts "No inclusions given for group #{i}. Won't match anything."
|
||
end
|
||
@config[i]["DATADIR"] ="." unless @config[i].has_key?("DATADIR")
|
||
@config[i]["PERMISSION"] = "0755" unless @config[i].has_key?("PERMISSION")
|
||
if @config[i].has_key?("EXTENSIONS")
|
||
@config[i]["-S"] = @config[i]["EXTENSIONS"]
|
||
@config[i]["-M"] = @config[i]["EXTENSIONS"]
|
||
end
|
||
if @config[i].has_key?("DELEXT")
|
||
@config[i]["-SD"] = @config[i]["DELEXT"]
|
||
@config[i]["-MD"] = @config[i]["DELEXT"]
|
||
end
|
||
@config[i]["-M"] = "(?!.*)" if @config[i].has_key?("-S") and ! @config[i].has_key?("-M")
|
||
@config[i]["-S"] = "(?!.*)" if @config[i].has_key?("-M") and ! @config[i].has_key?("-S")
|
||
}
|
||
end
|
||
|
||
def lock
|
||
group = @config.keys[0]
|
||
if @config[group].has_key?("LOCKFILE")
|
||
if FileTest.exists?(@config[group]["LOCKFILE"])
|
||
lock = File.open(@config[group]["LOCKFILE"], "r")
|
||
pid = lock.gets
|
||
lock.close
|
||
if pid
|
||
pid.chomp!
|
||
begin
|
||
Process.kill(0, pid.to_i)
|
||
if ! @config[group].has_key?('-s')
|
||
puts "Already running, exiting..."
|
||
end
|
||
exit
|
||
rescue Errno::ESRCH
|
||
puts "Stale lock found... removing..."
|
||
File.unlink(@config[group]["LOCKFILE"])
|
||
end
|
||
else
|
||
puts "Empty lockfile found... removing..."
|
||
File.unlink(@config[group]["LOCKFILE"])
|
||
end
|
||
end
|
||
lock = File.new(@config[group]["LOCKFILE"], "w")
|
||
lock.print "#{Process.pid}\n"
|
||
lock.close
|
||
end
|
||
end
|
||
|
||
def unlock
|
||
group = @config.keys[0]
|
||
File.unlink(@config[group]["LOCKFILE"])
|
||
end
|
||
|
||
def renice
|
||
group = @config.keys[0]
|
||
if @config[group].has_key?("NICE")
|
||
Process.setpriority(Process::PRIO_PROCESS, 0, @config[group]["NICE"].to_i)
|
||
end
|
||
end
|
||
|
||
def get_single(subj, group)
|
||
puts "Fetching singlepart article: #{subj}"
|
||
body = @articles.get_group_body(subj)
|
||
if UUEncode.is_uuencoded(body)
|
||
filename = UUEncode.get_filename(body)
|
||
puts " filename #{filename}"
|
||
unless check_ext(group, filename, "s", subj)
|
||
puts " Skipping article..."
|
||
return false
|
||
end
|
||
puts " UUDecoding..."
|
||
mode, filename, body = UUEncode.uudecode(body)
|
||
elsif YEnc.is_yencoded(body)
|
||
filename = YEnc.get_filename(body)
|
||
unless check_ext(group, filename, "s", subj)
|
||
puts " Skipping article..."
|
||
return false
|
||
end
|
||
puts " YDecoding..."
|
||
mode, filename, body = YEnc.ydecode(body)
|
||
else
|
||
puts " Unknown encoding (not UU, not yEnc), skipping..."
|
||
return false
|
||
end
|
||
if mode == false
|
||
puts " Decoding failed skipping article..."
|
||
return false
|
||
end
|
||
output_data(subj, mode, filename, body)
|
||
return true
|
||
end
|
||
|
||
def get_multi(subj, group)
|
||
puts "Fetching multipart article: #{subj}"
|
||
body = @articles.get_group_body_first(subj)
|
||
if UUEncode.is_uuencoded(body) or YEnc.is_yencoded(body)
|
||
if UUEncode.is_uuencoded(body)
|
||
filename = UUEncode.get_filename(body)
|
||
puts " filename #{filename}"
|
||
unless check_ext(group, filename, "m", subj)
|
||
puts " Skipping article..."
|
||
return false
|
||
end
|
||
elsif YEnc.is_yencoded(body)
|
||
puts "yenc"
|
||
filename = YEnc.get_filename(body)
|
||
puts "filename #{filename}"
|
||
unless check_ext(group, filename, "m", subj)
|
||
puts " Skipping article..."
|
||
return false
|
||
end
|
||
end
|
||
|
||
if @config[group]["TEMPDIR"] == nil or @config[group]["TEMPDIR"] == ""
|
||
bodyrest = @articles.get_group_body_rest(subj)
|
||
unless bodyrest
|
||
puts " Skipping article..."
|
||
return false
|
||
end
|
||
body.concat(bodyrest)
|
||
else
|
||
file = Tempfile.new("riptmp", @config[group]["TEMPDIR"])
|
||
body.collect{|x| file.print "#{x}\n"}
|
||
|
||
unless @articles.get_group_body_rest(subj, file)
|
||
puts " Skipping article..."
|
||
return false
|
||
end
|
||
fileout = Tempfile.new("riptmp", @config[group]["TEMPDIR"])
|
||
end
|
||
|
||
@decode_threads << Thread.new(body, file, fileout, subj) do |tbody, tfile, tfileout, tsubj|
|
||
# puts "inside thread pre pass\n"
|
||
Thread.pass
|
||
# puts "inside thread post pass\n"
|
||
if UUEncode.is_uuencoded(tbody)
|
||
puts " UUDecoding..."
|
||
if tfile
|
||
tmode, tfilename, tbody = UUEncode.uudecode(tfile, tfileout)
|
||
else
|
||
tmode, tfilename, tbody = UUEncode.uudecode(tbody)
|
||
end
|
||
elsif YEnc.is_yencoded(tbody)
|
||
puts " YDecoding..."
|
||
begin
|
||
if tfile
|
||
tmode, tfilename, tbody = YEnc.ydecode(tfile, tfileout)
|
||
else
|
||
tmode, tfilename, tbody = YEnc.ydecode(tbody)
|
||
end
|
||
rescue YencError
|
||
# XXX if there is a yenc problem I want the data so I can research it
|
||
output_data(tsubj, 0600, "YencProblem", tbody)
|
||
# XXX return succes even though it's not true
|
||
Thread.current.exit
|
||
rescue PermError
|
||
puts "#{$!}"
|
||
puts " Skipping article..."
|
||
Thread.current.exit
|
||
end
|
||
end
|
||
if tmode == false
|
||
puts " Decoding failed skipping article..."
|
||
Thread.current.exit
|
||
end
|
||
|
||
if tfile
|
||
# horrible cheat to not lose the outputted file
|
||
tbody = tfileout.path
|
||
tbodybase = tbody.sub(/\/[^\/]*$/, "/ripnewsdecode")
|
||
i = 1
|
||
while FileTest.exists?("#{tbodybase}-#{i}")
|
||
i += 1
|
||
end
|
||
File.move(tbody, "#{tbodybase}-#{i}")
|
||
tbody = "#{tbodybase}-#{i}"
|
||
tfile.close
|
||
tfileout.close(false)
|
||
end
|
||
output_data(tsubj, tmode, tfilename, tbody)
|
||
end # thread end
|
||
|
||
# puts "ouside thread\n"
|
||
|
||
return true
|
||
else
|
||
puts " Unknown encoding (not UU, not yEnc), skipping..."
|
||
return false
|
||
end
|
||
end
|
||
|
||
def fill_preselector(group)
|
||
if @config[group].has_key?("-I") and !(@config[group].has_key?("-MRR") and @config[group]["-MRR"])
|
||
@articles.set_preselect_pattern(Regexp.new(@config[group]["-I"]))
|
||
end
|
||
end
|
||
|
||
def output_data(subject, mode, filename="", body="")
|
||
group = @articles.get_groupname
|
||
puts " mode: #{mode}" if Debuglevel > 0
|
||
puts " Filename: '#{filename}'" if Debuglevel > 0
|
||
|
||
# de-crap subject...
|
||
sub = subject.sub(/\s*$/, "") # strip trailing spaces
|
||
sub.sub!(/^[\s\.!-#]*/, "") # strip leading spaces, dots, exclamation points, dashes and hashes
|
||
|
||
# decide on a filename
|
||
if @config[group].has_key?("-L") and @config[group]["-L"]
|
||
puts "longname" if Debuglevel > 1
|
||
outfile = sub[0...@maxfilelength]
|
||
elsif @config[group].has_key?("-C") and @config[group]["-C"]
|
||
puts "combinedname" if Debuglevel > 1
|
||
outfile = sub[0...@maxfilelength-filename.length-3]
|
||
outfile = "#{outfile} [#{filename}]"
|
||
if outfile.length > @maxfilelength
|
||
outfile = filename[0...@maxfilelength]
|
||
end
|
||
elsif @config[group].has_key?("-CP") and @config[group]["-CP"]
|
||
puts "combinedname" if Debuglevel > 1
|
||
poster = @articles.get_group_poster(subject)
|
||
outfile = sub[0...@maxfilelength-poster.length-filename.length-6]
|
||
outfile = "#{outfile} [#{poster}] [#{filename}]"
|
||
if outfile.length > @maxfilelength
|
||
outfile = filename[0...@maxfilelength]
|
||
end
|
||
else
|
||
puts "shortname" if Debuglevel > 1
|
||
outfile = filename[0...@maxfilelength]
|
||
end
|
||
|
||
# do the actual saving
|
||
if save_file("#{@config[group]["DATADIR"]}/#{group}", outfile, body)
|
||
@newsrc_lock.synchronize {
|
||
@articles.group_update_newsrc(subject)
|
||
_save_newsrc(group)
|
||
}
|
||
end
|
||
end
|
||
|
||
def check_ext(group, filename, mode, subject)
|
||
case mode
|
||
when "s"
|
||
if @config[group].has_key?("-SD") && ( filename =~ /\.(#{@config[group]["-SD"]})$/ )
|
||
puts "Marking '#{subject}' as read"
|
||
@articles.group_update_newsrc(subject)
|
||
return false
|
||
end
|
||
return @config[group].has_key?("-S") ? ( filename =~ /\.(#{@config[group]["-S"]})$/ ) : true
|
||
when "m"
|
||
if @config[group].has_key?("-MD") && ( filename =~ /\.(#{@config[group]["-MD"]})$/ )
|
||
puts "Marking '#{subject}' as read"
|
||
@articles.group_update_newsrc(subject)
|
||
return false
|
||
end
|
||
return @config[group].has_key?("-M") ? ( filename =~ /\.(#{@config[group]["-M"]})$/ ) : true
|
||
else
|
||
puts "Illegal mode \"#{mode}\" in check_ext"
|
||
exit
|
||
end
|
||
end
|
||
|
||
def get_max_file_length(tempdir=".")
|
||
if ! FileTest.directory?("#{tempdir}") || ! FileTest.writable?("#{tempdir}")
|
||
puts "Tempdir '#{tempdir}' is not a writable directory"
|
||
exit
|
||
end
|
||
# this is quite stupid, there is no guarantee at all the generated file names
|
||
# don't already exist
|
||
name = "a"*500
|
||
name = "#$$#{name}"
|
||
begin
|
||
file = File.new("#{tempdir}/#{name}", "w", 0644).close
|
||
File.delete("#{tempdir}/#{name}")
|
||
rescue Errno::ENAMETOOLONG
|
||
name = name[0...-1]
|
||
retry
|
||
rescue Errno::ENOENT
|
||
puts "#{$!}"
|
||
puts "strange..."
|
||
retry
|
||
end
|
||
# this is how many characters are still likely to be appended
|
||
# is the filename already exists '-<#{date}.#{count}>' in save_file
|
||
# this could be brought back to 5 '-<#{count}>' ...
|
||
return name.length - 14
|
||
end
|
||
|
||
def ward_sort(a, b)
|
||
c = a.to_s.split(/([0-9]+)/)
|
||
d = b.to_s.split(/([0-9]+)/)
|
||
|
||
c.collect{|x|
|
||
y = d.shift
|
||
r = ((x.to_s =~ /^[0-9]+$/) && (y.to_s =~ /^[0-9]+$/)) ?
|
||
(x.to_i <=> y.to_i) :
|
||
(x.to_s <=> y.to_s)
|
||
if r != 0
|
||
return r
|
||
end
|
||
}
|
||
return -1 if (d != [])
|
||
return 0
|
||
end
|
||
|
||
def startup
|
||
$stdout.sync=true # line buffered output
|
||
@defaults = {'-c' => "#{ENV['HOME']}/.ripnewsrc"}
|
||
@defaults = parse_options(@defaults)
|
||
@config = parse_config(@defaults)
|
||
exit if @config == false
|
||
check_config
|
||
lock
|
||
renice
|
||
|
||
trap("HUP") {
|
||
puts "Rereading config..."
|
||
config = parse_config(@defaults)
|
||
if config != false
|
||
@config = config
|
||
check_config
|
||
puts "Done reading config"
|
||
else
|
||
puts "Keeping old config due to errors"
|
||
end
|
||
}
|
||
|
||
@maxfilelength = get_max_file_length(@config[@config.keys[0]]["TEMPDIR"])
|
||
|
||
puts "\n$Id$"
|
||
puts "Starting: #{@tstart}"
|
||
|
||
if Debuglevel > 2
|
||
@config.each_key{|i|
|
||
puts "Group: #{i}"
|
||
@config[i].each_key{|j|
|
||
puts "Opt: #{j} val: #{@config[i][j]}"
|
||
}
|
||
}
|
||
end
|
||
end
|
||
|
||
def main
|
||
profile_mem("out side of loop still")
|
||
for group in @config.keys.sort
|
||
@decode_threads = []
|
||
@newsrc_lock = Mutex.new
|
||
profile_mem("#{group} start")
|
||
puts "\nGetting articles for #{group}"
|
||
@articles = Article.new(@config[group]["NNTPSERVER"], group, @config[group]["NEWSRCNAME"])
|
||
fill_preselector(group)
|
||
puts "initialized"
|
||
@articles.get_articles(@config[group]["CACHEDIR"])
|
||
|
||
profile_mem("#{group} articles read")
|
||
|
||
_create_group_dir(group)
|
||
|
||
for subj in @articles.get_group_subjects.sort{|a, b| ward_sort(a, b)}
|
||
puts "#{subj}" if Debuglevel > 2
|
||
poster = @articles.get_group_poster(subj)
|
||
# explicitly mark as read
|
||
if @config[group].has_key?("-MR") and subj =~ /#{@config[group]["-MR"]}/
|
||
puts "Marking '#{subj}' as read"
|
||
_mark_read(subj)
|
||
# get the juicy bits
|
||
elsif @config[group].has_key?("-MRF") and poster =~ /#{@config[group]["-MRF"]}/
|
||
puts "Marking poster '#{poster}' as read (subj: '#{subj}')"
|
||
_mark_read(subj)
|
||
elsif !(@config[group].has_key?("-X") and subj =~ /#{@config[group]["-X"]}/) and
|
||
subj =~ /#{@config[group]["-I"]}/
|
||
puts "Match: #{subj}" if Debuglevel > 0
|
||
_get_article(subj, group)
|
||
elsif !(@config[group].has_key?("-XF") and poster =~ /#{@config[group]["-XF"]}/) and
|
||
@config[group].has_key?("-IF") and poster =~ /#{@config[group]["-IF"]}/
|
||
puts "Poster match: #{poster}" if Debuglevel > 0
|
||
_get_article(subj, group)
|
||
else
|
||
_mark_remaining(subj, group)
|
||
end
|
||
end
|
||
|
||
_wait_for_threads
|
||
_save_newsrc(group)
|
||
|
||
@articles.quit
|
||
@articles = nil
|
||
profile_mem("#{group} pre-GC")
|
||
GC.start
|
||
profile_mem("#{group} end")
|
||
end
|
||
end
|
||
|
||
def ending
|
||
tend = Time.now
|
||
puts "\nFinished: #{tend}"
|
||
runtime = (tend - @tstart).to_i
|
||
h=runtime/3600
|
||
m=runtime%3600
|
||
s=m%60
|
||
m=m/60
|
||
printf("Running time: %02d:%02d:%02d\n", h, m, s)
|
||
unlock
|
||
end
|
||
|
||
def _create_group_dir(group)
|
||
unless FileTest.directory?("#{@config[group]["DATADIR"]}/#{group}") or
|
||
Dir.mkdir("#{@config[group]["DATADIR"]}/#{group}", @config[group]["PERMISSION"].oct)
|
||
puts "eeeps, couldn't create dir"
|
||
exit
|
||
end
|
||
end
|
||
|
||
def _primary_thres_skip(subj, group)
|
||
if @config[group].has_key?("PRIMARYTHRES")
|
||
if ( @articles.group_percentage_primary(subj) < @config[group]["PRIMARYTHRES"].to_i )
|
||
puts "Only #{@articles.group_percentage_primary(subj)}% on primary, skipping: #{subj}"
|
||
return true
|
||
end
|
||
end
|
||
return false
|
||
end
|
||
|
||
def _fallback_thres_skip(subj, group)
|
||
if @config[group].has_key?("FALLBACKTHRES")
|
||
if ( @articles.group_percentage_fallback(subj) > @config[group]["FALLBACKTHRES"].to_i )
|
||
puts "#{@articles.group_percentage_fallback(subj)}% only on fallback, skipping: #{subj}"
|
||
return true
|
||
end
|
||
end
|
||
return false
|
||
end
|
||
|
||
def _mark_read(subj)
|
||
@articles.group_update_newsrc(subj)
|
||
end
|
||
|
||
def _get_article(subj, group)
|
||
if @articles.group_is_complete(subj)
|
||
skip = false
|
||
skip = _primary_thres_skip(subj, group) ? true : skip
|
||
skip = _fallback_thres_skip(subj, group) ? true : skip
|
||
if ! skip
|
||
begin
|
||
if @articles.group_is_singlepart(subj)
|
||
get_single(subj, group)
|
||
elsif @articles.group_is_multipart(subj)
|
||
get_multi(subj, group)
|
||
end
|
||
rescue TempError, PermError, YencError
|
||
puts "#{$!}"
|
||
puts " Skipping article..."
|
||
#puts "Caught #{$!.class}"
|
||
#puts "Error: #{$!}"
|
||
end
|
||
end
|
||
else
|
||
puts "Not complete: #{subj}"
|
||
end
|
||
end
|
||
|
||
def _mark_remaining(subj, group)
|
||
# if Mark Remaining Read is set do so
|
||
if @config[group].has_key?("-MRR") and @config[group]["-MRR"] and
|
||
!(@config[group].has_key?("-X") and subj =~ /#{@config[group]["-X"]}/) and
|
||
!(subj =~ /#{@config[group]["-I"]}/)
|
||
if subj =~ /#{@config[group]["-I"]}/
|
||
puts "fucking up here"
|
||
end
|
||
puts "Marking remaining '#{subj}' as read"
|
||
@articles.group_update_newsrc(subj)
|
||
end
|
||
end
|
||
|
||
def _wait_for_threads
|
||
# wait for threads if there are any
|
||
if ! @decode_threads.empty?
|
||
@articles.disconnect
|
||
puts "Waiting for decode threads..."
|
||
ThreadsWait.all_waits(@decode_threads){ |t|
|
||
puts "Thread #{t} has terminated"
|
||
}
|
||
puts "Decode threads all done"
|
||
end
|
||
end
|
||
|
||
def _save_newsrc(group)
|
||
@articles.save_newsrc unless @config[group].has_key?("-T") and @config[group]["-T"]
|
||
end
|
||
|
||
startup
|
||
main
|
||
ending
|