#!/usr/local/bin/ruby -w # $Dwarf: ripnews.rb,v 1.80 2004/05/19 09:25:40 ward Exp $ # $Source$ # # Copyright (c) 2002, 2003, 2004 Ward Wouts # # Permission to use, copy, modify, and distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. # require 'date' require 'ftools' require 'time' require 'getoptlong' require 'news/article' require 'news/newsrc' require 'tempfile' require 'encode/uuencode' require 'encode/yenc' ########################################################################### Debuglevel = 0 @tstart = Time.now def save_file(dir, name, data) print "savename: #{name}\n" if Debuglevel > 1 nname = name.gsub(/\//, "-") nname.sub!(/\s*$/, "") nname.sub!(/^[\s\.-]*/, "") print "nname: #{nname}\n" if Debuglevel > 1 newname = nname count = 1 d = Date.today date = "#{d.year}#{d.month}#{d.mday}" while FileTest.exists?("#{dir}/#{newname}") newname = "#{nname}-<#{date}.#{count}>" count += 1 end print "name: #{newname}\n" if Debuglevel > 1 case data.type.to_s when "String" begin if File.move(data, "#{dir}/#{newname}") print " Saving as: '#{newname}'\n" else print "couldn't rename tempfile\n" return false end rescue Errno::ENOENT print "Caught Errno::ENOENT (save_file)\n" print "Error: #{$!}\n" print "What the *beep* happened?\n" return false end when "Array" if file = File.new("#{dir}/#{newname}", "w", "0644") print " Saving as: '#{newname}'\n" data.collect{|i| file.print "#{i}"} else print "couldn't open file for writeing\n" return false end when "Tempfile" begin if File.move(data.path, "#{dir}/#{newname}") print " Saving as: '#{newname}'\n" else print "couldn't rename tempfile\n" return false end rescue Errno::ENOENT print "Caught Errno::ENOENT (save_file)\n" print "Error: #{$!}\n" print "What the *beep* happened?\n" return false end else print "EEEEPS Can't save data of type: #{data.type.to_s}\n" return false end return true end def parse_options(options) begin opts = GetoptLong.new( [ "-I", "--include", GetoptLong::REQUIRED_ARGUMENT ], [ "-c", "--configfile", GetoptLong::REQUIRED_ARGUMENT ], [ "-L", "--longname", GetoptLong::NO_ARGUMENT ], [ "-C", "--combinedname", GetoptLong::NO_ARGUMENT ], [ "-M", "--multipart", GetoptLong::NO_ARGUMENT ], [ "-S", "--singlepart", GetoptLong::NO_ARGUMENT ], [ "-T", "--test", GetoptLong::NO_ARGUMENT ], [ "-X", "--exclude", GetoptLong::REQUIRED_ARGUMENT ] ) opts.quiet=true opts.each do |opt, arg| options[opt] = arg end rescue GetoptLong::InvalidOption print "#{$!}\n" usage end return options end def usage print "\nUsage:\n\n" print "ripnews.rb [-I ] [-c ] [-L] [-C] [-M] [-S] [-T] [-X ]\n\n" print "-I specify an include pattern\n" print "-c specify an alternate configfile\n" print "-L use subject as filename\n" print "-C use combined filenames\n" print "-M get multipart articles\n" print "-S get singlepart articles\n" print "-T test mode, don't update newsrc file\n" print "-X specify an exclude pattern\n" exit end def parse_config(default = {}) print "Parsing config\n" print "#{default['-c']}\n" if FileTest.readable?("#{default['-c']}") file = File.new("#{default['-c']}") lines = file.readlines else lines = [] end i = 0 group = "" grouparr = [] config = {} lines.collect!{|x| x.sub!(/^\s*/, "") x.sub!(/\#.*$/, "") x.sub!(/\s*$/, "") x.gsub!(/\$\{HOME\}/, "#{ENV['HOME']}") x.chomp } while i < lines.length line = lines[i] while line.sub!(/\s*\\$/, "") != nil line << lines[i+1] i += 1 end line.sub!(/\s*$/, "") i += 1 if line =~ /^OPT_(.*?)=(.*)/ line = "-#{$1}=#{$2}" end print "#{i}: #{line}\n" if Debuglevel > 1 if line =~ /(.*?)\s*\+=\s*(.*)/ if group == "" if default.has_key?($1) default[$1] << $2 else default[$1] = $2 end else grouparr.collect{|g| if config[g].has_key?($1) config[g][$1] << $2 elsif default.has_key?($1) config[g][$1] = default[$1] + $2 else config[g][$1] = $2 end } end elsif line =~ /(.*?)\s*=\s*(.*)/ if group == "" default[$1] = $2 else grouparr.collect{|g| config[g][$1] = $2 } end elsif line =~ /(.*?)\s*\{/ group = $1 grouparr = group.split('|') grouparr.collect{|g| config[g] = {} unless config.has_key?(g) } elsif line =~ /^}$/ default.each_key{|x| grouparr.collect{|g| config[g][x] = default[x] unless config[g].has_key?(x) } } group = "" grouparr = [] elsif line =~ /^$/ next else print "Error parsing config on line: #{i}\n" return false end end if group != "" print "Error parsing config: group not terminated on line #{i}\n" return false end if Debuglevel > 2 config.each_key{|x| print "Group: #{x}\n" config[x].each_key{|y| print "Key: '#{y}' => Value: '#{config[x][y]}'\n" } } end return config end def check_config if @config.length == 0 print "No configuration, nothing to do\n" exit end @config.each_key {|i| unless @config[i].has_key?("-I") print "No inclusions given for group #{i}. Won't match anything.\n" end @config[i]["DATADIR"] ="." unless @config[i].has_key?("DATADIR") @config[i]["PERMISSION"] = "0755" unless @config[i].has_key?("PERMISSION") if @config[i].has_key?("EXTENSIONS") @config[i]["-S"] = @config[i]["EXTENSIONS"] @config[i]["-M"] = @config[i]["EXTENSIONS"] end if @config[i].has_key?("DELEXT") @config[i]["-SD"] = @config[i]["DELEXT"] @config[i]["-MD"] = @config[i]["DELEXT"] end @config[i]["-M"] = "(?!.*)" if @config[i].has_key?("-S") and ! @config[i].has_key?("-M") @config[i]["-S"] = "(?!.*)" if @config[i].has_key?("-M") and ! @config[i].has_key?("-S") } end def lock group = @config.keys[0] if @config[group].has_key?("LOCKFILE") if FileTest.exists?(@config[group]["LOCKFILE"]) lock = File.open(@config[group]["LOCKFILE"], "r") line = lock.gets lock.close if line line.chomp! psauxw = `ps auxw` if /^\S+\s+#{line}\s+/.match(psauxw) print "Already running, exiting...\n" exit else print "Stale lock found... removing...\n" File.unlink(@config[group]["LOCKFILE"]) end else print "Empty lockfile found... removing...\n" File.unlink(@config[group]["LOCKFILE"]) end end lock = File.new(@config[group]["LOCKFILE"], "w") lock.print "#{Process.pid}\n" lock.close end end def unlock group = @config.keys[0] File.unlink(@config[group]["LOCKFILE"]) end def renice group = @config.keys[0] if @config[group].has_key?("NICE") Process.setpriority(Process::PRIO_PROCESS, 0, @config[group]["NICE"].to_i) end end def get_single(subj, group) print "Fetching singlepart article: #{subj}\n" body = @articles.get_group_body(subj) if UUEncode.is_uuencoded(body) filename = UUEncode.get_filename(body) print " filename #{filename}\n" return false unless check_ext(group, filename, "s", subj) print " UUDecoding...\n" mode, filename, body = UUEncode.uudecode(body) elsif YEnc.is_yencoded(body) filename = YEnc.get_filename(body) return false unless check_ext(group, filename, "s", subj) print " YDecoding...\n" mode, filename, body = YEnc.ydecode(body) else print " Unknown encoding (not UU, not yEnc), skipping...\n" return false end if mode == false return false end output_data(subj, mode, filename, body) return true end def get_multi(subj, group) print "Fetching multipart article: #{subj}\n" if @config[group]["TEMPDIR"] == nil or @config[group]["TEMPDIR"] == "" body = @articles.get_group_body(subj) if UUEncode.is_uuencoded(body) filename = UUEncode.get_filename(body) print " filename #{filename}\n" return false unless check_ext(group, filename, "m", subj) print " UUDecoding...\n" mode, filename, body = UUEncode.uudecode(body) elsif YEnc.is_yencoded(body) print "yencc\n" filename = YEnc.get_filename(body) print "filename #{filename}\n" return false unless check_ext(group, filename, "m", subj) print " YDecoding...\n" mode, filename, body = YEnc.ydecode(body) else print " Unknown encoding (not UU, not yEnc), skipping...\n" return false end if mode == false return false end output_data(subj, mode, filename, body) return true else body = @articles.get_group_body_first(subj) if UUEncode.is_uuencoded(body) or YEnc.is_yencoded(body) if UUEncode.is_uuencoded(body) filename = UUEncode.get_filename(body) print " filename #{filename}\n" return false unless check_ext(group, filename, "m", subj) elsif YEnc.is_yencoded(body) print "yencc\n" filename = YEnc.get_filename(body) print "filename #{filename}\n" return false unless check_ext(group, filename, "m", subj) end file = Tempfile.new("riptmp", @config[group]["TEMPDIR"]) body.collect{|x| file.print "#{x}\n"} return false unless @articles.get_group_body_rest(subj, file) fileout = Tempfile.new("riptmp", @config[group]["TEMPDIR"]) # I think a thread should start about here if UUEncode.is_uuencoded(body) print " UUDecoding...\n" mode, filename, body = UUEncode.uudecode(file, fileout) elsif YEnc.is_yencoded(body) print " YDecoding...\n" mode, filename, body = YEnc.ydecode(file, fileout) end if mode == false return false end # horrible cheat to not lose the outputted file body = fileout.path bodybase = body.sub(/\/[^\/]*$/, "/ripnewsdecode") i = 1 while FileTest.exists?("#{bodybase}-#{i}") i += 1 end File.move(body, "#{bodybase}-#{i}") body = "#{bodybase}-#{i}" file.close fileout.close(false) output_data(subj, mode, filename, body) # thread could end here return true else print " Unknown encoding (not UU, not yEnc), skipping...\n" return false end end end def output_data(subject, mode, filename="", body="") group = @articles.get_groupname print " mode: #{mode}\n" if Debuglevel > 0 print " Filename: '#{filename}'\n" if Debuglevel > 0 # de-crap subject... sub = subject.sub(/\s*$/, "") # strip trailing spaces sub.sub!(/^[\s\.!-#]*/, "") # strip leading spaces, dots, exclamation points, dashes and hashes # decide on a filename if @config[group].has_key?("-L") and @config[group]["-L"] print "longname\n" if Debuglevel > 1 outfile = sub[0...@maxfilelength] elsif @config[group].has_key?("-C") and @config[group]["-C"] print "combinedname\n" if Debuglevel > 1 outfile = sub[0...@maxfilelength-filename.length-3] outfile = "#{outfile} [#{filename}]" if outfile.length > @maxfilelength outfile = filename[0...@maxfilelength] end else print "shortname\n" if Debuglevel > 1 outfile = filename[0...@maxfilelength] end # do the actual saving if save_file("#{@config[group]["DATADIR"]}/#{group}", outfile, body) @articles.group_update_newsrc(subject) @articles.save_newsrc unless @config[group].has_key?("-T") and @config[group]["-T"] end end def check_ext(group, filename, mode, subject) case mode when "s" if @config[group].has_key?("-SD") && ( filename =~ /\.(#{@config[group]["-SD"]})$/ ) print "Marking '#{subject}' as read\n" @articles.group_update_newsrc(subject) return false end return @config[group].has_key?("-S") ? ( filename =~ /\.(#{@config[group]["-S"]})$/ ) : true when "m" if @config[group].has_key?("-MD") && ( filename =~ /\.(#{@config[group]["-MD"]})$/ ) print "Marking '#{subject}' as read\n" @articles.group_update_newsrc(subject) return false end return @config[group].has_key?("-M") ? ( filename =~ /\.(#{@config[group]["-M"]})$/ ) : true else print "Illegal mode \"#{mode}\" in check_ext\n" exit end end def get_max_file_length(tempdir=".") if ! FileTest.directory?("#{tempdir}") || ! FileTest.writable?("#{tempdir}") print "Tempdir '#{tempdir}' is not a writable directory\n" exit end # this is quite stupid, there is no guarantee at all the generated file names # don't already exist name = "a"*500 name = "#$$#{name}" begin file = File.new("#{tempdir}/#{name}", "w", 0644).close File.delete("#{tempdir}/#{name}") rescue Errno::ENAMETOOLONG name = name[0...-1] retry rescue Errno::ENOENT print "#{$!}\n" print "raar hoor\n" retry end # this is how many characters are still likely to be appended # is the filename already exists '-<#{date}.#{count}>' in save_file # this could be brought back to 5 '-<#{count}>' ... return name.length - 14 end def ward_sort(a, b) c = a.to_s.split(/([0-9]+)/) d = b.to_s.split(/([0-9]+)/) c.collect{|x| y = d.shift r = ((x.to_s =~ /^[0-9]+$/) && (y.to_s =~ /^[0-9]+$/)) ? (x.to_i <=> y.to_i) : (x.to_s <=> y.to_s) if r != 0 return r end } return -1 if (d != []) return 0 end def startup $stdout.sync=true # line buffered output @defaults = {'-c' => "#{ENV['HOME']}/.ripnewsrc"} @defaults = parse_options(@defaults) @config = parse_config(@defaults) exit if @config == false check_config lock renice trap("HUP") { print "Rereading config...\n" config = parse_config(@defaults) if config != false @config = config check_config print "Done reading config\n" else print "Keeping old config due to errors\n" end } @maxfilelength = get_max_file_length(@config[@config.keys[0]]["TEMPDIR"]) print "\n$Id$\n" print "Starting: #{@tstart}\n" if Debuglevel > 2 @config.each_key{|i| print "Group: #{i}\n" @config[i].each_key{|j| print "Opt: #{j} val: #{@config[i][j]}\n" } } end end def main for group in @config.keys.sort print "\nGetting articles for #{group}\n" @articles = Article.new(@config[group]["NNTPSERVER"], group, @config[group]["NEWSRCNAME"]) @articles.get_articles(@config[group]["CACHEDIR"]) unless FileTest.directory?("#{@config[group]["DATADIR"]}/#{group}") or Dir.mkdir("#{@config[group]["DATADIR"]}/#{group}", @config[group]["PERMISSION"].oct) print "eeeps, couldn't create dir\n" exit end for i in @articles.get_group_subjects.sort{|a, b| ward_sort(a, b)} print "#{i}\n" if Debuglevel > 2 if @config[group].has_key?("-MR") and i =~ /#{@config[group]["-MR"]}/ print "Marking '#{i}' as read\n" @articles.group_update_newsrc(i) next end if !(@config[group].has_key?("-X") and i =~ /#{@config[group]["-X"]}/) and i =~ /#{@config[group]["-I"]}/ print "Match: #{i}\n" if Debuglevel > 0 if @articles.group_is_complete(i) begin if @articles.group_is_singlepart(i) succes = get_single(i, group) elsif @articles.group_is_multipart(i) succes = get_multi(i, group) end if succes == false print " Skipping article...\n" end rescue Article::TempError, Article::PermError print "#{$!}\n" print " Skipping article...\n" next end else print "Not complete: #{i}\n" end end end @articles.quit end end def ending tend = Time.now print "\nFinished: #{tend}\n" runtime = (tend - @tstart).to_i h=runtime/3600 m=runtime%3600 s=m%60 m=m/60 printf("Running time: %02d:%02d:%02d\n", h, m, s) unlock end startup main ending