From ad42b063296d7424a815f51cf26b86cc939e4dd5 Mon Sep 17 00:00:00 2001 From: Ward Wouts Date: Thu, 1 Aug 2002 13:16:55 +0000 Subject: [PATCH] This commit was manufactured by cvs2svn to create tag 'ripnews-release-0_0_9'. --- tags/ripnews-release-0_0_9/ripnews/CHANGELOG | 45 + tags/ripnews-release-0_0_9/ripnews/INSTALL | 8 + tags/ripnews-release-0_0_9/ripnews/README | 151 +++ tags/ripnews-release-0_0_9/ripnews/TODO | 18 + .../ripnews-release-0_0_9/ripnews/net/nntp.rb | 319 +++++ .../ripnews/news/article.rb | 1030 +++++++++++++++++ .../ripnews/news/newsrc.rb | 415 +++++++ tags/ripnews-release-0_0_9/ripnews/ripnews.rb | 386 ++++++ .../ripnews/set/intspan.rb | 928 +++++++++++++++ 9 files changed, 3300 insertions(+) create mode 100644 tags/ripnews-release-0_0_9/ripnews/CHANGELOG create mode 100644 tags/ripnews-release-0_0_9/ripnews/INSTALL create mode 100644 tags/ripnews-release-0_0_9/ripnews/README create mode 100644 tags/ripnews-release-0_0_9/ripnews/TODO create mode 100644 tags/ripnews-release-0_0_9/ripnews/net/nntp.rb create mode 100644 tags/ripnews-release-0_0_9/ripnews/news/article.rb create mode 100644 tags/ripnews-release-0_0_9/ripnews/news/newsrc.rb create mode 100755 tags/ripnews-release-0_0_9/ripnews/ripnews.rb create mode 100644 tags/ripnews-release-0_0_9/ripnews/set/intspan.rb diff --git a/tags/ripnews-release-0_0_9/ripnews/CHANGELOG b/tags/ripnews-release-0_0_9/ripnews/CHANGELOG new file mode 100644 index 0000000..be43a0b --- /dev/null +++ b/tags/ripnews-release-0_0_9/ripnews/CHANGELOG @@ -0,0 +1,45 @@ +# $Id$ +# $Source$ + +from 0.0.8 to 0.0.9 + - maxfilelength check + - improved subject checking + - linebuffered stdout + - always use push when adding stuff to an array, this is way more + efficient than += + - always use << when adding stuff to a string, this is way more + efficient than += + +from 0.0.7 to 0.0.8 + - more and simpler exceptions + - better argument checking + - more helpful help + +from 0.0.6 to 0.0.7 + - use exceptions for a lot of problems + - code cleanups + +from 0.0.5 to 0.0.6 + - new option -C for combined filenames eg. "subject-[filename]" + - prevent reconnect loops + - be more paranoid with decoding yEnc-encoded articles + - more/better timeouts + +from 0.0.4 to 0.0.5 + - implement timeouts on article fetching + (no more "hangs", hopefully) + - remove servers from list on connection failure + - much more robust + +from 0.0.3 to 0.0.4 + - server reconnects now work + +from 0.0.2 to 0.0.3 + - filtering on file extensions + - multiple servers are now tried in order + +from 0.01 to 0.02 + - yEnc support by Stijn Hoop. Thanks. + - change cache file format + - sort cache file + - minor bugs diff --git a/tags/ripnews-release-0_0_9/ripnews/INSTALL b/tags/ripnews-release-0_0_9/ripnews/INSTALL new file mode 100644 index 0000000..51f706e --- /dev/null +++ b/tags/ripnews-release-0_0_9/ripnews/INSTALL @@ -0,0 +1,8 @@ +# $Id$ +# $Source$ + +For now the easiest way to install this is just extract the tarball in +its own directory and run ./ripnews.rb from there. Before running you +should make your own .ripnewsrc configuration file which is described in +the README file. You may have to change the first line in ripnews.rb to +point to your ruby executable. diff --git a/tags/ripnews-release-0_0_9/ripnews/README b/tags/ripnews-release-0_0_9/ripnews/README new file mode 100644 index 0000000..7602f22 --- /dev/null +++ b/tags/ripnews-release-0_0_9/ripnews/README @@ -0,0 +1,151 @@ +# $Id$ +# $Source$ + +Ripnews is a bulk downloader for usenet. It's quite flexible in terms of +configuration. Some of it's features are: + +- basic support for multiple servers per group +- cacheing of article headers to speed up reading of newsgroups +- newsrc file support (one newsrc file per server) +- flexible but simple configuration + +Configuration: +============== + +I'll just give a commented example config, it should be pretty clear, +after that I'll list the possible options. + +<== cut here ==> +# Set the default NNTPSERVER to localhost +NNTPSERVER=localhost + +# Set the cachedir, this is where the subject caches are stored +# without this ripnews will be much slower (but should still work) +CACHEDIR=/mnt/newspace/News/.ripnews_caches + +# Set the datadir, this where a subdir for each group will be made to +# store the ripped articles +DATADIR=/mnt/newspace/News + +# Set the tempdir, used to store the undecoed data. Without this ripnews +# uses a lot more memory +TEMPDIR=/mnt/newspace/News/ripnews_temp + +# Set include pattern to a case insensitive "bad religion" +OPT_I=(?i)bad religion + +# Set the base newsrc name. The server name will be appended. +NEWSRCNAME=/ward/src/ruby/ripnews/.newsrc + +# Set the permission to create subdirs with +PERMISSION=0700 + +# For alt.binaries.e-book.technical change from defaults... +alt.binaries.e-book.technical { + # Set another include pattern + OPT_I=(?i)reilly +} + +alt.binaries.e-book.flood { + # Add to default pattern, this will not be case insensitive + # anymore, cause that's how ruby patterns work + OPT_I+=|douglas adams +} + +# For both alt.binaries.e-book.technical and alt.binaries.e-book.flood +# change some value +alt.binaries.e-book.technical| \ +alt.binaries.e-book.flood { + # Sets long filenames. If this is set the subject will be used + # as a filename instead of the name specified in the encoding. + OPT_L = true +} + +# Change default server to news.tilbu1.nb.nl.home.com, since the config +# is parse in order this will be used from her on down +NNTPSERVER=news.tilbu1.nb.nl.home.com + +alt.binaries.sounds.mp3.heavy-metal| \ +alt.binaries.sounds.mp3.1980s { + # Add news4.euro.net as a second server for + # alt.binaries.sounds.mp3.heavy-metal and + # alt.binaries.sounds.mp3.1980s + NNTPSERVER+=|news4.euro.net +} + +alt.binaries.sounds.mp3.gothic-industrial +alt.binaries.sounds.mp3.heavy-metal| \ +alt.binaries.sounds.mp3.1980s { + OPT_L=true + OPT_I=(?i)( \ + bauhaus| \ + big black \ + ) +} +<== cut here ==> + +Supported commandline options: +------------------------------ + +"-I", "--include" Set include pattern. +"-c", "--configfile" Specify a different config file. Default + .ripnewsrc +"-L", "--longname" Sets long filenames. +"-C", "--combinedname" Sets combined filenames. +"-X", "--exclude" Set exclude pattern. +"-T", "--test" Set test mode. Newsrc files will not be writen + to. + +Supported config options: +------------------------- + +OPT_I= Set include pattern. +OPT_L= Set long filenames. +OPT_C= Sets combined filenames. +OPT_X= Set include pattern +OPT_T= Set test mode. Newsrc files will not be written + to. +TEMPDIR= Set tempdir location. +NNTPSERVER=[|server] Set NNTPSERVER names +CACHEDIR= Set cachedir location. +DATADIR= Set output dir location. +NEWSRCNAME= Specify newsrc basename. Server names + will be appended. +PERMISSION= Set permission bits for directory + creation. Standard unix style, eg. 0755. + +Ruby patterns: +-------------- + +Ruby patterns are a lot like perl patterns, but there are some +differences. (?i) is the modifier to turn on case insensitivity, unlike +perl this modifier only works on the following block. Luckily you can +group multiple blocks into one by enclosing them with ()'s. So while +'OPT_I=(?i)foo|bar' would match 'foo' case insensitve and 'bar' case +sensitive 'OPT_I=(?i)(foo|bar)' will match both 'foo' and 'bar' case +insensitivly. + +Where can I find newsservers: +============================= +freenews.maxbaud.net +www.newzbot.com +www.gj.net/~bhkraft + +Known bugs: +=========== + +There are a lot of known bugs at this time. Basically the error handling +is still almost non existant. Well, if it breaks you get to keep +_both_ pieces. That's what you get with pre-alpha software ;) + +Credits: +======== +- Stijn Hoop for adding yEnc support + +Contact info: +============= + +New problems can be reported directly to me at . Patches +welcome ;) + +Ward Wouts diff --git a/tags/ripnews-release-0_0_9/ripnews/TODO b/tags/ripnews-release-0_0_9/ripnews/TODO new file mode 100644 index 0000000..8320c71 --- /dev/null +++ b/tags/ripnews-release-0_0_9/ripnews/TODO @@ -0,0 +1,18 @@ +# $Id$ +# $Source$ + +[ ] support mime encoding +[x] support yEnc encoding +[x] matching on encoded file extensions +[ ] documentation +[ ] code cleanup +[ ] finish intspan +[ ] profiling/speed ups +[ ] improve error handling +[ ] use exceptions for error handling +[x] server reconnects +[ ] check if xhdr implemented +[ ] write man page +[x] use prefered server order +[x] check MAX_PATH_LEN while writing files +[ ] split decoding stuff from article class diff --git a/tags/ripnews-release-0_0_9/ripnews/net/nntp.rb b/tags/ripnews-release-0_0_9/ripnews/net/nntp.rb new file mode 100644 index 0000000..13d49e6 --- /dev/null +++ b/tags/ripnews-release-0_0_9/ripnews/net/nntp.rb @@ -0,0 +1,319 @@ +################################# +# +# nntp.rb - an NNTP client implementing RFC 977 +# ported from the Python code by Jefferson Heard +# this software is released under the terms of the GNU Library General Public License +# (C) 2001, Jefferson Heard +# +# Contributors: Jefferson Heard, Ward Wouts +# +# Release History +# 0.1: 11.7.2001 - Initial revision. +# 0.2: 11-9-2001 - fixed regexp bugs, +# fixed XHDR bugs, +# made internal methods private, +# changed constructor default arg +# 0.3: 11-14-2001 - Fixed numerous bugs and made things a little cleaner +# as per the suggestions of Ward Wouts +# 0.4: 11-15-2001 - Fixed statcmd bug - Ward Wouts +# 0.5: 12-06-2001 - Fixed post buf - Ozawa, Sakuro +################################# + +require 'socket' +require 'net/protocol' + +module Net + +# Exceptions raised by NNTP + +class NNTPError < RuntimeError; end +class NNTPReplyError < NNTPError; end +class NNTPTemporaryError < NNTPError; end +class NNTPPermanentError < NNTPError; end +class NNTPDataError < NNTPError; end + +class NNTP + NNTP_PORT = 119 + LONGRESP = ['100', '215', '220', '221', '222', '224', '230', '231', '282'] + CRLF = "\r\n" + + def initialize(host, port=NNTP_PORT, user=nil, password=nil, readermode=nil) + @debuglevel = 0 + @host = host + if port then @port = port else @port = NNTP_PORT end + @socket = TCPSocket.new @host, @port + @welcome = getresp + readermode_afterauth = false + + if readermode + begin + @welcome = shortcmd('mode reader') + rescue NNTPPermanentError + rescue NNTPTemporaryError + if user and $!.response[0...3] == '480' + readermode_afterauth = true + else + raise + end + end + end + + if user + resp = shortcmd "authinfo user #{user}" + if resp[0...3] == '381' # then we need a password + raise NNTPReplyError, resp, caller unless password + resp = shortcmd "authinfo pass #{password}" + raise NNTPPermanentError, resp, caller unless resp[0...3] == '281' + end + end + + if readermode_afterauth + begin + @welcome = shortcmd('mode reader') + rescue NNTPPermanentError + end + end + end + + def welcome + puts "*welcome*, #{@welcome}" if @debuglevel > 0 + return @welcome + end + + attr_writer :debuglevel + + def putline(line) + puts '*put* '+line+'\r\n' if @debuglevel > 1 + @socket.send "#{line}\r\n", 0 + end + + def putcmd(cmd) + puts "*cmd* #{cmd}" if @debuglevel > 0 + putline cmd + end + + def getline + line = '' + line.concat @socket.recv 1 until line.length > 2 and line[-1] == "\n" or line[-2..-1] == "\r\n" + puts '*getline* '+line if @debuglevel > 0 + line = line[0...-2] if line[-2..-1] == "\r\n" + line = line[0...-1] if "\r\n".include? line[-1].to_s + return line + end + + def getresp + resp = getline + puts "*getresp* #{resp}" if @debuglevel > 0 + c = resp[0] + case c + when c == '4' then raise NNTPTemporaryError, resp, caller + when c == '5' then raise NNTPPermanentError, resp, caller + when '123'.include?(c) then raise NNTPProtocolError, resp, caller + end + return resp + end + + def getlongresp + resp = getresp + raise NNTPReplyError, resp, caller unless LONGRESP.include? resp[0...3] + list = [] + while true + line = getline + break if line == '.' + line = line[1..-1] if line.to_s[0...2] == '..' + list << line + end + return resp, list + end + + def shortcmd(line) + putcmd line + return getresp + end + + def longcmd(line) + putcmd line + return getlongresp + end + + def newgroups(date, time) + return longcmd "NEWGROUPS #{date.to_s} #{time.to_s}" + end + + def newnews(group, date, time) + return longcmd "NEWNEWS #{group} #{date.to_s} #{time.to_s}" + end + + def list + resp, list = longcmd "LIST" + list.each_index {|ix| + list[ix] = list[ix].split " " + } + return resp, list + end + + def group(name) + resp = shortcmd "GROUP #{name}" + raise NNTPReplyError, resp, caller unless resp[0...3] == '211' + words = resp.split " " + count, first, last = 0 + n = words.length + if n>1 + count = words[1] + if n>2 + first = words[2] + if n>3 + last = words[3] + if n>4 + name = words[4].downcase + end + end + end + end + return resp, count, first, last, name + end + + def help + return longcmd "HELP" + end + + def statparse(resp) + raise NNTPReplyError, resp, caller unless resp[0...2] == '22' + words = resp.split " " + nr = 0 + id = '' + n = words.length + if n>1 + nr = words[1] + if n>2 + id = words[2] + end + end + return resp, nr, id + end + + def statcmd(line) + resp = shortcmd line + return statparse(resp) + end + + def stat(id) + return statcmd "STAT #{id}" + end + + def next + return statcmd "NEXT" + end + + def last + return statcmd "LAST" + end + + def articlecmd(line) + resp, list = longcmd line + resp, nr, id = statparse(resp) + return resp, nr, id, list + end + + def head(id) + return articlecmd "HEAD #{id}" + end + + def body(id) + return articlecmd "BODY #{id}" + end + + def article(id) + return articlecmd "ARTICLE #{id}" + end + + def slave(id) + return shortcmd "SLAVE" + end + + def xhdr(hdr, str) + pat = Regexp.new '^([0-9]+) ?(.*)\n?' + resp, lines = longcmd "XHDR #{hdr} #{str}" + lines.each_index {|ix| + line = lines[ix] + m = pat.match line + lines[ix] = m[1..2] if m + } + return resp, lines + end + + def xover(start, ed) + begin + resp, lines = longcmd "XOVER #{start}-#{ed}" + xover_lines = [] + lines.each {|line| + elements = line.split "\t" + elements[5].split! " " + 0.upto(7) {|ix| xover_lines << element[ix]} + } + return resp, xover_lines + rescue RuntimeError + raise NNTPDataError line, caller + end + end + + def xgtitle(group) + line_pat = Regexp.new "^([^\t]+)[\t]+(.*)$" + resp, raw_lines = longcmd "XGTITLE #{group}" + lines = [] + raw_lines.each {|line| + match = line_pat.match line.strip + lines << match[1..2] if match + } + return resp, lines + end + + def date + resp = shortcmd "DATE" + raise NNTPReplyError unless resp[0...3] == '111' + resp.split! " " + raise NNTPDataError unless resp.length == 2 + date = resp[1][2...8] + time = resp[1][-6..-1] + raise NNTPDataError resp, caller unless date.length == 6 and time.length == 6 + return resp, date, time + end + + def post(f) + resp = shortcmd "POST" + raise NNTPReplyError unless resp =~ /^3/ #[0] == 3 + lines = f.readlines + lines.each {|line| + line.chop! + line = '.' + line if line[0] == '.' + putline line + } + putline '.' + return getresp + end + + def quit + resp = shortcmd "QUIT" + @socket.close_read + @socket.close_write + return resp + end + + private :statparse, :getline, :putline, :articlecmd, :statcmd + protected :getresp, :getlongresp +end + +end + +if __FILE__ == $0 + s = Net::NNTP.new('news') + resp, count, first, last, name = s.group('comp.lang.ruby') + puts resp + puts "group #{name} has #{count} articles, range #{first} to #{last}" + resp, subs = s.xhdr('subject', "#{first}-#{last}") + puts resp + subs.each do |sub| puts sub end + resp = s.quit + puts resp +end + diff --git a/tags/ripnews-release-0_0_9/ripnews/news/article.rb b/tags/ripnews-release-0_0_9/ripnews/news/article.rb new file mode 100644 index 0000000..925965f --- /dev/null +++ b/tags/ripnews-release-0_0_9/ripnews/news/article.rb @@ -0,0 +1,1030 @@ +################################# +# +# $Id$ +# $Source$ +# +# article.rb +# +# (C) 2002, Ward Wouts +# +################################# + +require 'set/intspan' +require 'net/nntp' +require 'news/newsrc' +require 'tempfile' +require 'timeout' + +class ArticleError < RuntimeError; end +class TempError < ArticleError; end +class PermError < ArticleError; end + +class Article + +Debuglevel = 0 + +def initialize(nntpservers, groupname, newsrc="~/.newsrc") + @messids = [] + @ids = [] + @servers = [] + @subjects = [] + + @sorted = false + @grouped = false + @groups = {} + @gotten = {} + @group = groupname + + @serverlist = nntpservers.split('|') + @connections = {} + @serverlist.collect{|server| + @connections[server] = {} + begin + @connections[server]["nntp"] = Net::NNTP.new(server) + @connections[server]["skip_ids"] = Set::IntSpan.new() + @connections[server]["newsrc"] = News::Newsrc.new("#{newsrc}.#{server}") + set_skip_ids(server, @connections[server]["newsrc"].marked_articles(@group)) + rescue SocketError, Errno::EINVAL + print "Connection to #{server} failed\n" + del_server(server) + end + } +end + +def reconnect(server) + begin + @connections[server]["nntp"] = Net::NNTP.new(server) + rescue SocketError + print "Reconnect to #{server} failed\n" + del_server(server) + raise PermError, "Couldn't connect to #{server}" + end + print "Succesfully reconnected to #{server}\n" +end + +def add(messid, id, server, subject) +# print "Messid: #{messid}\n" +# print "Id: #{id}\n" +# print "Server: #{server}\n" +# print "Subject: #{subject}\n" + @messids.push(messid) + @ids.push(id.to_i) + @servers.push(server) + @subjects.push(subject) + @sorted = false + @grouped = false +end + +def del_server(server) + print "Removing server #{server} from list\n" + @connections.delete(server) + @serverlist.delete(server) +end + +def get_articles(cachedir=false) + for server in @connections.keys + begin + first, last = get_group_info(server) + rescue PermError + print "#{$!}\n" + del_server(server) + next + end + if first <= last + @connections[server]["first"] = first ? first : 0 + @connections[server]["last"] = last ? last : 0 + else + print " First article has higher number than last article on server #{server}.\n" + del_server(server) + end + end + read_cache(cachedir) + for server in @connections.keys + print " reading articles from server: #{server}\n" + range = Set::IntSpan.new("#{@connections[server]["first"]}-#{@connections[server]["last"]}") + rangelist = rechunk_runlist(range.diff(@connections[server]["skip_ids"]).run_list) + print "rangelist: #{rangelist}\n" if Debuglevel >1 + print "rangelist: #{rangelist.type.to_s}\n" if Debuglevel >1 + print "rangelist elements: #{range.diff(@connections[server]["skip_ids"]).elements}\n" if Debuglevel >1 + begin + unless rangelist == nil or rangelist =~ /^$/ + for i in rangelist.split(',') + print "i: #{i}\n" if Debuglevel > 1 + begin + resp, subj_lines = get_xhdr(server, i, "subject") + resp, messid_lines = get_xhdr(server, i, "message-id") + rescue TempError + print "Caught: #{$!} reading from #{server}\n" + next + end + + art = {} + subj_lines.collect{|x| + art[x[0]] = {} unless art.has_key?(x[0]) + art[x[0]]["subject"] = x[1] + print "art id: #{x[0]} subj: #{x[1]}\n" if Debuglevel > 1 + } + messid_lines.collect{|x| + art[x[0]] = {} unless art.has_key?(x[0]) + art[x[0]]["messid"] = x[1] + print "art id: #{x[0]} messid: #{x[1]}\n" if Debuglevel > 1 + } + for id in art.keys + if art[id].has_key?("subject") and art[id].has_key?("messid") + print "adding: #{art[id]["messid"]}, #{id}, #{server}, #{art[id]["subject"]}\n" if Debuglevel > 1 + add(art[id]["messid"], id, server, art[id]["subject"]) + end + end + end + end + rescue PermError + del_server(server) + next + end + end + save_cache(cachedir) +end + +def get_group_info(server) + timedout = 0 + resp = "" + first = "" + last = "" + begin + timeout(30) do + begin + resp, count, first, last, name = @connections[server]["nntp"].group(@group) + rescue Net::NNTP::RuntimeError + print "Got error \"#{$!}\" from #{server}\n" + raise PermError, "#{$!}" + rescue Errno::EPIPE, Errno::ECONNRESET + print "Caught Errno::EPIPE reading from server #{server}\n" + print "Error: #{$!}\n" + reconnect(server) + retry + end + end + rescue TimeoutError + timedout += 1 + raise PermError, "Too many timeouts! (get_group_info)" if timedout > 1 + print "Time out, reconnecting to server...\n" + reconnect(server) + retry + end + return first, last +end + +def get_xhdr(server, range, header) + timedout = 0 + resp = "" + lines = [] + begin + timeout(180) do + begin + resp, lines = @connections[server]["nntp"].xhdr(header, range) + if resp.to_i == 500 + print "xhdr not implemented\n" + print "Error: #{$!}\n" + end + unless resp.to_i >= 200 and resp.to_i < 300 + print "got response #{resp} while reading group #{@group} from #{server}\n" + raise TempError + end + rescue Net::NNTP::RuntimeError + print "Caught Net::NNTP::RuntimeError reading from server #{server}\n" + print "Error: #{$!}\n" + rescue Errno::EPIPE, Errno::ECONNRESET + print "Caught Errno::EPIPE reading from server #{server}\n" + print "Error: #{$!}\n" + reconnect(server) + get_group_info(server) + retry + end + end + return resp, lines + rescue TimeoutError + print "Time out, reconnecting to server\n" + timedout += 1 + raise PermError, "Too many timeouts! (get_xhrd)" if timedout > 1 + reconnect(server) + get_group_info(server) + retry + end +end + +# if xhdr doesn't work, this should be used +# for i in (range.diff(@connections[server]["skip_ids"]).elements) +# begin +# @connections[server]["nntp"].stat(i) +# resp, id, messid, list = @connections[server]["nntp"].head(i) +# for j in list +# if j =~ /Subject: (.*)/ +# subj=$1 +# end +# end +# print "get_articles messid: #{messid}\n" if Debuglevel > 1 +# print "get_articles id: #{id}\n" if Debuglevel > 1 +# print "get_articles server: #{server}\n" if Debuglevel > 1 +# print "get_articles subject: #{subj}\n" if Debuglevel > 1 +# add(messid, id, server, subj) +# rescue Net::NNTP::RuntimeError +# print "whoopsie couldn't stat #{i}\n" if Debuglevel > 1 +# end +# end + +def get_groups + group_subjects unless @grouped + return @groups +end + +def get_groupname + return @group +end + +def get_body(server, message) + timedout = 0 + resp = "" + id = "" + messid = "" + list = [] + begin + timeout(180) do + begin + resp, id, messid, list = @connections[server]["nntp"].body(message) + rescue Net::NNTPReplyError + print "Caught Net::NNTPReplyError reading article #{message} from #{server}\n" + print "Error: #{$!}\n" + return false + rescue Errno::EPIPE, Errno::ECONNRESET + print "Caught Errno::EPIPE reading from server #{server}\n" + print "Error: #{$!}\n" + reconnect(server) + get_group_info(server) + retry + end + end + return resp, id, messid, list + rescue TimeoutError + print "Time out, reconnecting to server\n" + timedout += 1 + raise PermError, "Too many timeouts! (get_body)" if timedout > 1 + reconnect(server) + get_group_info(server) + retry + end +end + +def get_group_body(subj) + result = [] + group_subject_sort(subj) + for i in (0...@groups[subj]["messages"].length) + unless @gotten.has_key?(@groups[subj]["messages"][i]) + print "getting article: #{i}\n" if Debuglevel > 1 + print "getting article: #{subj}\n" if Debuglevel > 1 + print "full subject: #{@groups[subj]["subject"][i]}\n" if Debuglevel > 0 + print "message id: #{@groups[subj]["messages"][i]}\n" if Debuglevel > 1 + print "id: #{@groups[subj]["ids"][i]}\n" if Debuglevel > 1 + print "server: #{@groups[subj]["servers"][i]}\n" if Debuglevel > 0 + begin + resp, id, messid, list = get_body(@groups[subj]["servers"][i], @groups[subj]["messages"][i]) + if resp == false + print "mess-id i: #{@groups[subj]["messages"][i]}\n" + print "mess-id i+1: #{@groups[subj]["messages"][i+1]}\n" + if (i+1 < @groups[subj]["messages"].length) and + (@groups[subj]["messages"][i] == @groups[subj]["messages"][i+1]) + print "Trying next server...\n" + i += 1 + retry + else + raise TempError, "Message-id not on another server" + end + end + end + @gotten[ @groups[subj]["messages"][i] ] = true + result = list + end + end + return result +end + +def get_group_body_first(subj) + group_subject_sort(subj) + i = 0 + while @gotten.has_key?(@groups[subj]["messages"][0]) == false + print "getting article: #{subj}\n" if Debuglevel > 0 + print "full subject: #{@groups[subj]["subject"][0]}\n" if Debuglevel > 0 + print "message id: #{@groups[subj]["messages"][i]}\n" if Debuglevel > 1 + print "id: #{@groups[subj]["ids"][i]}\n" if Debuglevel > 1 + print "server: #{@groups[subj]["servers"][0]}\n" if Debuglevel > 0 + begin + resp, id, messid, list = get_body(@groups[subj]["servers"][i], @groups[subj]["messages"][i]) + if resp == false + print "mess-id i: #{@groups[subj]["messages"][i]}\n" + print "mess-id i+1: #{@groups[subj]["messages"][i+1]}\n" + if (i+1 < @groups[subj]["messages"].length) and + (@groups[subj]["messages"][i] == @groups[subj]["messages"][i+1]) + print "Trying next server...\n" + i += 1 + retry + else + raise TempError, "Message-id not on another server" + end + end + end + @gotten[@groups[subj]["messages"][i]] = true + end + return list +end + +def get_group_body_rest(subj, file=nil) + result = [] + for i in (1...@groups[subj]["messages"].length) + unless @gotten.has_key?(@groups[subj]["messages"][i]) + print "getting article: #{i}\n" if Debuglevel > 1 + print "getting article: #{subj}\n" if Debuglevel > 1 + print "full subject: #{@groups[subj]["subject"][i]}\n" if Debuglevel > 0 + print "message id: #{@groups[subj]["messages"][i]}\n" if Debuglevel > 1 + print "id: #{@groups[subj]["ids"][i]}\n" if Debuglevel > 1 + print "server: #{@groups[subj]["servers"][i]}\n" if Debuglevel > 0 + begin + resp, id, messid, list = get_body(@groups[subj]["servers"][i], @groups[subj]["messages"][i]) + if resp == false + print "mess-id i: #{@groups[subj]["messages"][i]}\n" + print "mess-id i+1: #{@groups[subj]["messages"][i+1]}\n" + if (i+1 < @groups[subj]["messages"].length) and + (@groups[subj]["messages"][i] == @groups[subj]["messages"][i+1]) + print "Trying next server...\n" + i += 1 + retry + else + raise TempError, "Message-id not on another server" + end + end + end + @gotten[ @groups[subj]["messages"][i] ] = true + if file + list.collect{|line| file.print "#{line}\n"} + else + result.concat(list) + end + end + end + return result +end + +def get_group_subjects + group_subjects unless @grouped + return @groups.keys +end + +def get_group_messids(subject) + group_subjects unless @grouped + return @groups[subject]["messages"] +end + +def group_is_complete(subj) + group_subjects unless @grouped + #print "Subject: #{subj}\n" + print "length: #{@groups[subj]["messages"].length} total: #{@groups[subj]["total"].to_i}\n" if Debuglevel > 1 + umessids = @groups[subj]["messages"].uniq + if (umessids.length ) >= @groups[subj]["total"].to_i + return true + else + return false + end +end + +def group_is_singlepart(subj) + @groups[subj]["total"].to_i == 1 +end + +def group_is_multipart(subj) + @groups[subj]["total"].to_i > 1 +end + +def get_messids + return @messids +end + +def get_subjects + return @subjects +end + +def group_subjects + @groups = {} + for i in (0...@subjects.length) + print "group subjects: #{i} #{@subjects[i]}\n" if Debuglevel > 1 + if @subjects[i] =~ /(.*)\((\d+)\/(\d+)\)(.*)/ || @subjects[i] =~ /(.*)\[(\d+)\/(\d+)\](.*)/ + j = "#{$1}#{$4} (#{$3})" + number = $2 + total = $3 + else + j = @subjects[i] + number = 1 + total = 1 + end + if @groups.has_key?(j) and number.to_i != 0 + @groups[j]["messages"].push(@messids[i]) + @groups[j]["ids"].push(@ids[i].to_i) + @groups[j]["servers"].push(@servers[i]) + @groups[j]["subject"].push(@subjects[i]) + elsif number.to_i != 0 + @groups[j] = {} + @groups[j]["total"] = total + @groups[j]["messages"] = [ @messids[i] ] + @groups[j]["ids"] = [ @ids[i].to_i ] + @groups[j]["servers"] = [ @servers[i] ] + @groups[j]["subject"] = [ @subjects[i] ] + end + end + @grouped = true +end + +def set_skip_ids(server, ids) + set = Set::IntSpan.new(ids) + set.finite or return false + min = set.min + min != nil and min < 0 and return false + @connections[server]["skip_ids"] = set + return true +end + +def group_update_newsrc(subject) + for i in (0...@groups[subject]["messages"].length) + @connections[@groups[subject]["servers"][i]]["newsrc"].mark(@group, @groups[subject]["ids"][i]) + end +end + +def save_newsrc() + for server in @connections.keys + @connections[server]["newsrc"].save + end +end + +def read_cache(cachedir) + filename = "#{cachedir}/#{@group}.ripnewscache" + excludes = {} + for server in @connections.keys + excludes[server] = {} + @connections[server]["skip_ids"].elements.collect!{|x| excludes[server][x]=true} + end + if FileTest.directory?( cachedir) and FileTest.file?( filename ) and FileTest.readable?( filename ) + file = File.new( filename ) + lines = file.readlines + lines.collect{|line| + if line =~ /^(\d+)\|(.*?)\|(.*?)\|(.*)$/ + if @connections.has_key?($3) + unless excludes.has_key?($3) and excludes[$3].has_key?($1.to_i) or + $1.to_i < @connections[$3]["first"].to_i or + $1.to_i > @connections[$3]["last"].to_i + add($2, $1, $3, $4) + @connections[$3]["skip_ids"].insert($1.to_i) + end + end + end + } + file.close + end +end + +def save_cache(cachedir) + filename = "#{cachedir}/#{@group}.ripnewscache" + if FileTest.directory?( cachedir ) + file = File.new( filename, "w" ) or print "couldn't open cachefile for writing\n" + cache = [] + for i in (0...@subjects.length) + cache.push("#{@ids[i]}|#{@messids[i]}|#{@servers[i]}|#{@subjects[i]}\n") + end + cache.sort! + file.print cache + file.close + end +end + +####################################################################### + +def uudecode(data, outfile=nil) + case data.type.to_s + when "Array" + print "Calling _uudecode_array\n" if Debuglevel>0 + mode, filename, body = _uudecode_array(data) + when "File", "Tempfile" + unless outfile + print "uudecode: need outfile\n" + exit + end + print "Calling _uudecode_file\n" if Debuglevel>0 + mode, filename, body = _uudecode_file(data, outfile) + else + print "Funny stuff in uudecode. Data of type \"#{data.type.to_s}\"\n" + end + return mode, filename, body +end + +def _uudecode_file(file, outfile) + mode = 0600 + filename = "unknown" + c = 0 + lines = file.pos + percent = 0 + mark = lines/100 + file.pos=0 + + while (! file.eof) + line = file.gets + print "line: #{line}" if Debuglevel > 0 + if line =~ /^begin(.*)/ + m = $1 + print "beginning matched; rest: #{m}\n" if Debuglevel > 0 + if m =~ /^(\s+(\d+))?(\s+(.*?\S))?\s*\Z/ + mode = $2 + filename = $4 + print "found beginning\n" if Debuglevel > 0 + else + print "mode, file set to defaults: #{m}\n" + end + break + end + end + + if file.eof + print "Not UUencoded!\n" + return false + end + print "c: #{c} mark: #{mark} lines: #{lines}\n" if Debuglevel > 1 + + print " UUdecoding...\n" + + while (! file.eof) + if Debuglevel > 1 + c = file.pos + if c > mark + print "#{percent}%\n" + print "c: #{c} mark: #{mark} lines: #{lines}\n" if Debuglevel > 1 + percent += 1 + mark = (lines/100)*(percent+1) + end + end + line = file.gets + print "line: #{line}" if Debuglevel > 1 + return mode, filename if line =~ /^end/ + next if line =~ /[a-z]/ + next if line == nil + next unless ((((line[0] - 32) & 077) + 2) / 3).to_i == + (line.length/4).to_i + outfile.print line.unpack("u") + end + + print "No \"end\" found!!!\n" + #return mode, file, outfile + return false +end + +# gaat volgens mij niet verder als er meerdere uuencoded blocks zijn... +# zal dan meerdere keren aangeroepen moeten worden, grmbl... +# tis getting a mess as we speak... +# toch maar een keer aparte class van maken... +def _uudecode_array(data) + decode = [] + mode = 0600 + filename = "unknown" + c = 0 + lines = data.length + percent = 0 + mark = lines/100 + + i = 0 + while (i < data.length) + if data[i] =~ /^begin(.*)/ + m = $1 + print "beginning matched; rest: #{m}\n" if Debuglevel > 0 + if m =~ /^(\s+(\d+))?(\s+(.*?\S))?\s*\Z/ + mode = $2 + filename = $4 + print "found beginning\n" if Debuglevel > 0 + else + print "mode, filename set to defaults: #{m}\n" + end + break + end + i += 1 + end + + unless (i < data.length) + print "Not UUencoded!\n" + return false + end + + print "UUdecoding...\n" + + while (i < data.length) + if Debuglevel > 1 + if c > mark + print "#{percent}%\n" + print "c: #{c} mark: #{mark} lines: #{lines} i: #{i}\n" if Debuglevel > 1 + percent += 1 + mark = (lines/100)*(percent+1) + end + c += 1 + end + line = data[i] + i += 1 + return mode, filename, decode if line =~ /^end/ + next if line =~ /[a-z]/ + next if line == nil + next unless ((((line[0] - 32) & 077) + 2) / 3).to_i == + (line.length/4).to_i + decode.concat(line.unpack("u")) + end + + print "No \"end\" found!!!\n" + return false +end + +#def uudecode_group(subj, file=nil, outfile=nil) +def uudecode_group(subj, tempdir=nil) + group_subjects unless @grouped + + body = get_group_body_first(subj) + if body.to_s =~ /begin/ + print "uuencoded!\n" if Debuglevel > 0 + if (tempdir != nil) + file = Tempfile.new("#{tempdir}/riptmp") + body.collect{|i| file.print "#{i}\n"} + get_group_body_rest(subj, file) + mode, filename, result = uudecode(file, outfile) + else + body.concat(get_group_body_rest(subj)) + mode, filename, result = uudecode(body) + end + return mode, filename, result + else + print "Not uuencoded!\n" if Debuglevel > 0 + return false + end +end + +def is_uuencoded(data) + if data.to_s =~ /begin\s+\d+?\s+.*?\S?\s*\Z/ + return true + else + return false + end +end + +####################################################################### + +def ydecode(data, outfile=nil) + case data.type.to_s + when "Array" + print "Calling _ydecode_array\n" if Debuglevel>0 + mode, filename, body = _ydecode_array(data) + when "File", "Tempfile" + unless outfile + print "ydecode: need outfile\n" + exit + end + print "Calling _ydecode_file\n" if Debuglevel>0 + mode, filename, body = _ydecode_file(data, outfile) + else + print "Funny stuff in ydecode. Data of type \"#{data.type.to_s}\"\n" + end + return mode, filename, body +end + +def _ydecode_file(file, outfile) + mode = 0600 + filename = "unknown" + lines = file.pos + file.pos = 0 + bytes = 0 + total = 0 + oldpartend = 0 + + while (! file.eof) + line = file.gets + print "line: #{line}" if Debuglevel > 0 + if line =~ /^\=ybegin\s+(.*line\=.*)/ + m = $1 + print "ybegin match; rest: #{m}\n" if Debuglevel > 0 + if m =~ /^\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(.*))\Z/ + part = $2.to_i + total = $4.to_i + linesize = $6.to_i + totalsize = $8.to_i + filename = $10 + print "found beginning" + if part != nil + print " of part #{part}" + end + if total != nil + print " of #{total}" + end + print ", linesize = #{linesize}, size = #{totalsize}, filename = #{filename}\n" + break + else + print "not a valid yenc begin line\n" + end + end + end + + if file.eof + print "Not yencoded!\n" + return false + end + + print " ydecoding...\n" + + while (! file.eof) + print "at #{file.pos} need to go to #{lines}\n" if Debuglevel > 1 + line = file.gets + line = line[0 ... line.length - 1] + + if line =~ /^=yend\s+(.*)\Z/ + m = $1 + m =~ /(\s*size=(\d+)\s+)(\s*part=(\d+))?(\s+crc32=(\S+))?/ + size = $2.to_i + part = $4.to_i + crc = $6 + if size != bytes + print "part size mismatch, is #{bytes}, should be #{size}\n" + end + if part == nil + return mode, filename + end + total += bytes + if total >= totalsize + if total != totalsize + print "total size mismatch, is #{total}, should be #{totalsize}\n" + end + return mode, filename + end + search_begin = 1 + bytes = 0 + next + end + if search_begin && line =~ /^\=ybegin\s+(.*)\Z/ + m = $1 + search_begin = 0 + if m =~ /^\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(.*))\Z/ + part = $2.to_i + total = $4.to_i + linesize = $6.to_i + totalsize = $8.to_i + filename = $10 + print "found beginning of part #{part}, linesize = #{linesize}, size = #{totalsize}, filename = #{filename}\n" if Debuglevel > 0 + end + next + end + if search_begin == 1 + next + end + if line =~ /^=ypart\s+(\s*begin=(\d+))(\s+end=(\d+))/ + b = $2 + e = $4 + print " next part begin #{b}, end #{e}\n" + if b.to_i == oldpartend + 1 + oldpartend = e.to_i + else + raise PermError, "Parts not continuous! last end #{oldpartend}, begin #{b}" + end + next + end + +# This seems to be a common 'error' - maybe I misunderstand the spec or +# something +# if line.length != linesize +# print "linesize mismatch, was #{line.length}, should be #{linesize}...\n" +# end + special = 0 + line.each_byte { |b| + if special == 0 + if b == 0x3d + special = 1 + next + end + else + special = 0 + b = (b - 64) % 256 + end + outfile.putc((b - 42) % 256) + bytes += 1 + } + end + + print "No \"=yend\" found!!!\n" + return mode, filename, outfile +end + +# toch maar een keer aparte class van maken... geld ook voor dit geneuzel +def _ydecode_array(data) + decode = "" + mode = 0600 + filename = "unknown" + c = 0 + lines = data.length + percent = 0 + mark = lines/100 + + i = 0 + while (i < data.length) + if data[i] =~ /^\=ybegin\s+(.*line\=.*)/ + m = $1 + print "ybegin match; rest: #{m}\n" if Debuglevel > 0 + if m =~ /^\s*(part\=(\d+)\s+)?(total\=(\d+)\s+)?(line\=(\d+))(\s*size\=(\d+))(\s*name=(.*))\Z/ + part = $2.to_i + total = $4.to_i + linesize = $6.to_i + size = $8.to_i + filename = $10 + print "found beginning, linesize = #{linesize}, size = #{size}, filename = #{filename}\n" if Debuglevel > 0 + i += 1 + break + else + print "not a valid yenc begin line\n" + end + end + i += 1 + end + + unless (i < data.length) + print "Not yencoded!\n" + return false + end + + print "ydecoding...\n" + + while (i < data.length) + print "at #{i} need to go to #{data.length}\r" if Debuglevel > 1 + line = data[i] + i += 1 + if line =~ /^\=yend(\s+size=(\d+))(\s+crc32=(\S+))?/ + size = $2.to_i + crc = $4 + if size != decode.length + print "size mismatch, was #{decode.length}, should be #{size}\n" + end + dec = [ decode ] + return mode, filename, dec + end + if line =~ /^\=ypart.*\Z/ + # ignore for now + next + end + +# This seems to be a common 'error' - maybe I misunderstand the spec or +# something +# if line.length != linesize +# print "#{i}: linesize mismatch, was #{line.length}, should be #{linesize}...\n" +# end + + special = 0 + str = "" + line.each_byte { |b| + if special == 0 + if b == 0x3d + special = 1 + next + end + else + special = 0 + b = (b - 64) % 256 + end + str << ((b - 42) % 256).chr + } + decode << str + end + + print "${i}: no \"=yend\" found!!!\n" + dec = [ decode ] + return mode, filename, dec +end + +def ydecode_group(subj, tempdir=nil) + group_subjects unless @grouped + + body = get_group_body_first(subj) + if body.to_s =~ /=ybegin/ + print "yencoded!\n" if Debuglevel > 0 + #if (file and outfile) + if (tempdir != nil) + file = Tempfile.new("#{tempdir}/riptmp") + body.collect{|i| file.print "#{i}\n"} + get_group_body_rest(subj, file) + mode, filename, result = ydecode(file, outfile) + else + body.concat(get_group_body_rest(subj)) + mode, filename, result = ydecode(body) + end + return mode, filename, result + else + print "Not yencoded!\n" if Debuglevel > 0 + return false + end +end + +def is_yencoded(data) + if data.to_s =~ /=ybegin/ + return true + else + return false + end +end + +############################################################### + +def group_subject_sort(subj) + #print "Sorting articles\n" + serverhash = {} + for i in (0...@serverlist.length) + serverhash[@serverlist[i]] = i + end + sort_arr = [] + for i in (0...@groups[subj]["subject"].length) + print "subj sort #{@groups[subj]["subject"][i]}\n" if Debuglevel > 2 + print "subj sort #{@groups[subj]["messages"][i]}\n" if Debuglevel > 2 + print "subj sort #{@groups[subj]["ids"][i]}\n" if Debuglevel > 2 + print "subj sort #{@groups[subj]["servers"][i]}\n" if Debuglevel > 2 + sort_arr.push( [ + @groups[subj]["subject"][i].dup, + @groups[subj]["messages"][i].dup, + @groups[subj]["ids"][i].dup, + @groups[subj]["servers"][i].dup + ] ) + end + sort_arr.sort!{|a,b| + r = ward_sort(a[0], b[0]) + if r == 0 + r = serverhash[a[3]] <=> serverhash[b[3]] + end + r + } + @groups[subj].clear + sort_arr.collect{|i| + if @groups[subj].has_key?("messages") + @groups[subj]["subject"].push(i[0]) + @groups[subj]["messages"].push(i[1]) + @groups[subj]["ids"].push(i[2]) + @groups[subj]["servers"].push(i[3]) + else + @groups[subj]["subject"] = [i[0]] + @groups[subj]["messages"] = [i[1]] + @groups[subj]["ids"] = [i[2]] + @groups[subj]["servers"] = [i[3]] + end + print "subject sort: #{i[0]}\n" if Debuglevel > 2 + print "server: #{i[3]}\n" if Debuglevel > 2 + } + #print "Done sorting\n" +end + +def ward_sort(a, b) + c = a.to_s.split(/([0-9]+)/) + d = b.to_s.split(/([0-9]+)/) + + c.collect{|x| + y = d.shift + r = ((x.to_s =~ /^[0-9]+$/) && (y.to_s =~ /^[0-9]+$/)) ? + (x.to_i <=> y.to_i) : + (x.to_s <=> y.to_s) + if r != 0 + return r + end + } + return -1 if (d != []) + return 0 +end + +def rechunk_runlist(runlist) + return nil if runlist == nil + blalist = runlist.split(',') + blalist.collect!{|x| + result = "" + if x =~ /(.*)-(.*)/ + a = $1 + while ($2.to_i - a.to_i) > 200 + result << "#{a}-#{a.to_i+199}," + a = a.to_i + 200 + end + result << "#{a}-#{$2}" + else + x + end + blup = blalist.join(",") + return blup + } + return +end + +def quit + for server in @connections.keys + begin + @connections[server]["nntp"].quit + rescue Errno::EPIPE, Errno::ECONNRESET + end + end +end + + private :ward_sort + +end # class diff --git a/tags/ripnews-release-0_0_9/ripnews/news/newsrc.rb b/tags/ripnews-release-0_0_9/ripnews/news/newsrc.rb new file mode 100644 index 0000000..71c7c8a --- /dev/null +++ b/tags/ripnews-release-0_0_9/ripnews/news/newsrc.rb @@ -0,0 +1,415 @@ +################################# +# +# $Id$ +# $Source$ +# +# newsrc.rb +# ported from Perl code by Ward Wouts +# +# (C) 2001, Ward Wouts +# +################################# + +require "set/intspan" + +module News + +class Newsrc + +def initialize(file=nil) + @newsrc = { "group" => Hash.new, "list" => Array.new } + if file + unless load(file) + print "Can't load #{file}\n" + exit + end + end +end + +def load(file=nil) + file = "#{ENV['HOME']}/.newsrc" unless file + @newsrc["file"] = file + @newsrc["group"] = {} + @newsrc["list"] = [] + + if FileTest.file?( "#{file}" ) and FileTest.readable?( "#{file}" ) + lines = IO.readlines("#{file}") + import_rc(lines) + end + + return true +end + +def import_rc(lines) + @newsrc["group"] = {} + @newsrc["list"] = [] + linenumber = 1 + for line in lines + parse(line) + end +end + +def parse(line) + unless line =~ /^([^!:]+)([!:])\s(.*)$/x + print "Newsrc.parse: Bad newsrc line: #{line}\n" + exit + end + + name = $1 + mark = $2 + articles = $3 + + unless Set::IntSpan.valid(articles) + print "Newsrc.parse: Bad article list: #{line}\n" + end + + + group = { "name" => name, "subscribed" => (mark == ":"), + "articles" => Set::IntSpan.new(articles)} + + @newsrc["group"][name] = group + @newsrc["list"].push(group) +end + +def save + unless @newsrc.has_key?("file") + @newsrc["file"] = "#{$ENV['HOME']}/.newsrc" + end + save_as(@newsrc["file"]) +end + +def save_as(file) + if FileTest.exists?("#{file}") + begin + File.rename(file, "#{file}.bak") + rescue + print "Can't rename #{file}, #{file}.bak: #{$!}\n" + exit + end + end + begin + newsrc = File.new(file, "w") + rescue + print "Can't open #{file}: #{$!}\n" + exit + end + @newsrc["file"] = file + for group in @newsrc["list"] + newsrc.print format(group) + end + newsrc.close +end + +def format(group) + name = group["name"] + sub = group["subscribed"] ? ':' : '!' + articles = group["articles"].run_list + #space = articles ? ' ' : '' + #return "#{name}#{sub}#{space}#{articles}\n" + return "#{name}#{sub} #{articles}\n" +end + +def export_rc + lines = @newsrc["list"].collect{ |group| + name = group["name"] + sub = group["subscribed"] ? ':' : '!' + articles = group["articles"].run_list + space = articles ? ' ' : '' + "#{name}#{sub}#{space}#{articles}\n" } + return lines +end + +def add_group(name, options) + + if @newsrc["group"].has_key?(name) + options.has_key?("replace") or return false + del_group(name) + end + group = {"name" => name, + "subscribed" => true, + "articles" => Set::IntSpan.new } + + @newsrc["group"][name] = group + _insert(group, options) + + return true +end + +def move_group(name, options) + if @newsrc["group"].has_key?(name) + group = @newsrc["group"][name] + else + return false + end + + @newsrc["list"] = @newsrc["list"].delete_if{|x| x["name"] == name} + + _insert(group, options) + return true +end + +def _insert(group, options) + list = @newsrc["list"] + + where = "" + arg = "" + if options.has_key?("where") + where = options["where"] + end + arg = where.slice!(1) if where.type.to_s == "Array" + + case where.to_s + when "first" + @newsrc["list"].unshift(group) + when "last" + @newsrc["list"].push(group) + when "" + @newsrc["list"].push(group) # default + when "alpha" + alpha(group) + when "before" + before(group, arg) + when "after" + after(group, arg) + when "number" + number(group, arg) + end +end + +def alpha (group) + name = group["name"] + for i in (0...@newsrc["list"].length) + if ((name <=> @newsrc["list"][i]["name"]) == -1) + upper = @newsrc["list"].slice!(i..@newsrc["list"].length) + @newsrc["list"].push(group) + @newsrc["list"].push(upper) + return; + end + end + @newsrc["list"].push(group) +end + +def before(group, before) + name = group["name"] + for i in (0...@newsrc["list"].length) + if (@newsrc["list"][i]["name"] == before.to_s) + upper = @newsrc["list"].slice!(i..@newsrc["list"].length) + @newsrc["list"].push(group) + @newsrc["list"].push(upper) + + return; + end + end + + @newsrc["list"].push(group) +end + +def after(group, after) + name = group["name"] + + for i in (0...@newsrc["list"].length) + if (@newsrc["list"][i]["name"] == after.to_s) + upper = @newsrc["list"].slice!((i+1)..@newsrc["list"].length) + @newsrc["list"].push(group) + @newsrc["list"].push(upper) + return; + end + end + + @newsrc["list"].push(group) +end + +def number(group, offset) + offset = @newsrc["list"].length if offset[0] > @newsrc["list"].length + upper = @newsrc["list"].slice!(offset..@newsrc["list"].length) + @newsrc["list"].push(group) + @newsrc["list"].push(upper) +end + + +def del_group(name) + if @newsrc["group"].has_key?(name) + group = @newsrc["group"][name] + else + return false + end + + @newsrc["group"].delete(name) + @newsrc["list"] = @newsrc["list"].delete_if{|x| x["name"] == name} + + return true +end + +def subscribe(name, options = {"where" => ""}) + unless @newsrc["group"].has_key?(name) + add_group(name, options) + end + @newsrc["group"][name]["subscribed"] = true +end + +def unsubscribe(name, options = {"where" => ""}) + unless @newsrc["group"].has_key?(name) + add_group(name, options) + end + @newsrc["group"][name]["subscribed"] = false +end + +def mark(name, article, options = {"where" => ""}) + unless @newsrc["group"].has_key?(name) + add_group(name, options) + end + @newsrc["group"][name]["articles"].insert(article) +end + +def mark_list(name, list, options = {"where" => ""}) + unless @newsrc["group"].has_key?(name) + add_group(name, options) + end + articles = @newsrc["group"][name]["articles"].union(list) + @newsrc["group"][name]["articles"] = articles +end + +def mark_range(name, from, to, options = {"where" => ""}) + unless @newsrc["group"].has_key?(name) + add_group(name, options) + end + range = Set::IntSpan.new("#{from}-#{to}") + articles = @newsrc["group"][name]["articles"].union(range) + @newsrc["group"][name]["articles"] = articles +end + +def unmark(name, article, options = {"where" => ""}) + unless @newsrc["group"].has_key?(name) + add_group(name, options) + end + @newsrc["group"][name]["articles"].remove(article) +end + +def unmark_list(name, list, options = {"where" => ""}) + unless @newsrc["group"].has_key?(name) + add_group(name, options) + end + articles = @newsrc["group"][name]["articles"].diff(list) + @newsrc["group"][name]["articles"] = articles +end + +def unmark_range(name, from, to, options = {"where" => ""}) + unless @newsrc["group"].has_key?(name) + add_group(name, options) + end + range = Set::IntSpan.new("#{from}-#{to}") + articles = @newsrc["group"][name]["articles"].diff(range) + @newsrc["group"][name]["articles"] = articles +end + +def exists(name) + return @newsrc["group"].has_key?(name) ? true : false +end + +def subscribed(name) + exists(name) and @newsrc["group"][name]["subscribed"] +end + +def marked(name, article) + exists(name) and @newsrc["group"][name]["articles"].member(article) +end + +def num_groups + return @newsrc["list"].length +end + +def groups + list = @newsrc["list"].dup + list.collect!{|x| x["name"]} +end + +def sub_groups + list = @newsrc["list"].dup + list.collect!{|x| x["subscribed"] ? x["name"] : nil}.compact! +end + +def unsub_groups + list = @newsrc["list"].dup + list.collect!{|x| x["subscribed"] ? nil : x["name"]}.compact! +end + +def marked_articles(name, options = {"where" => ""}) + unless @newsrc["group"].has_key?(name) + add_group(name, options) + end + return @newsrc["group"][name]["articles"].elements +end + +def unmarked_articles(name, from, to, options = {"where" => ""}) + unless @newsrc["group"].has_key?(name) + add_group(name, options) + end + range = Set::IntSpan.new("#{from}-#{to}") + return range.diff(@newsrc["group"][name]["articles"]).elements +end + +def get_articles(name, options = {"where" => ""}) + unless @newsrc["group"].has_key?(name) + add_group(name, options) + end + @newsrc["group"][name]["articles"].run_list +end + +def set_articles(name, articles, options = {"where" => ""}) + Set::IntSpan.valid(articles) or return false + set = Set::IntSpan.new(articles) + set.finite or return false + min = set.min + min != nil and min < 0 and return false + unless @newsrc["group"].has_key?(name) + add_group(name, options) + end + @newsrc["group"][name]["articles"] = set + return true +end + +end # class + +end # module + + +# TODO +# Do not kill an item until it's tested! + +# [x] new +# [x] load +# [ ] _scan # Initializes a Newsrc object from a string. Used for testing. +# [x] import_rc +# [x] parse # parses a single line from a newsrc file +# [x] save +# [x] save_as +# [x] format +# [x] export_rc +# [ ] _dump # Formats a Newsrc object to a string. Used for testing +# [x] add_group +# [x] move_group +# [x] Splice(\@$$@) # heet nu number en is simpeler +# [x] _insert +# [x] Alpha +# [x] Before +# [x] After +# [x] del_group +# [x] subscribe +# [x] unsubscribe +# [x] mark +# [x] mark_list +# [x] mark_range +# [x] unmark +# [x] unmark_list +# [x] unmark_range +# [x] exists +# [x] subscribed +# [x] marked +# [x] num_groups +# [x] groups +# [x] sub_groups +# [x] unsub_groups +# [x] marked_articles +# [x] unmarked_articles +# [x] get_articles +# [x] set_articles diff --git a/tags/ripnews-release-0_0_9/ripnews/ripnews.rb b/tags/ripnews-release-0_0_9/ripnews/ripnews.rb new file mode 100755 index 0000000..e310b61 --- /dev/null +++ b/tags/ripnews-release-0_0_9/ripnews/ripnews.rb @@ -0,0 +1,386 @@ +#!/usr/local/bin/ruby + +# $Id$ +# $Source$ + +require 'date' +require 'getoptlong' +require 'news/article' +require 'news/newsrc' +require 'tempfile' + + +########################################################################### + +Debuglevel = 0 + +def save_file(dir, name, data) + print "savename: #{name}\n" if Debuglevel > 1 + nname = name.gsub(/\//, "-") + print "nname: #{nname}\n" if Debuglevel > 1 + nname.sub!(/\s*$/, "") + nname.sub!(/^\s*/, "") + newname = nname + count = 1 + d = Date.today + date = "#{d.year}#{d.month}#{d.mday}" + while FileTest.exists?("#{dir}/#{newname}") + newname = "#{nname}-<#{date}.#{count}>" + count += 1 + end + print "name: #{newname}\n" if Debuglevel > 1 + + case data.type.to_s + when "String" + if File.rename(data, "#{dir}/#{newname}") + print " Saving as: '#{newname}'\n" + else + print "couldn't rename tempfile\n" + return false + end + when "Array" + if file = File.new("#{dir}/#{newname}", "w", "0644") + print " Saving as: '#{newname}'\n" + data.collect{|i| file.print "#{i}"} + else + print "couldn't open file for writeing\n" + return false + end + else + print "EEEEPS Can't save data of type: #{data.type.to_s}\n" + return false + end + return true +end + +def parse_options(options) + begin + opts = GetoptLong.new( + [ "-I", "--include", GetoptLong::REQUIRED_ARGUMENT ], + [ "-c", "--configfile", GetoptLong::REQUIRED_ARGUMENT ], + [ "-L", "--longname", GetoptLong::NO_ARGUMENT ], + [ "-C", "--combinedname", GetoptLong::NO_ARGUMENT ], + [ "-M", "--multipart", GetoptLong::NO_ARGUMENT ], + [ "-S", "--singlepart", GetoptLong::NO_ARGUMENT ], + [ "-T", "--test", GetoptLong::NO_ARGUMENT ], + [ "-X", "--exclude", GetoptLong::REQUIRED_ARGUMENT ] + ) + opts.quiet=true + + opts.each do |opt, arg| + options[opt] = arg + end + rescue GetoptLong::InvalidOption + print "#{$!}\n" + usage + end + + return options +end + +def usage + print "\nUsage:\n\n" + print "ripnews.rb [-I ] [-c ] [-L] [-C] [-M] [-S] [-T] [-X ]\n\n" + print "-I specify an include pattern\n" + print "-c specify an alternate configfile\n" + print "-L use subject as filename\n" + print "-C use combined filenames\n" + print "-M get multipart articles\n" + print "-S get singlepart articles\n" + print "-T test mode, don't update newsrc file\n" + print "-X specify an exclude pattern\n" + exit +end + +def parse_config(default = {}) + file = File.new("#{default[\"-c\"]}") + lines = file.readlines + + i = 0 + group = "" + grouparr = [] + @config = {} + + lines.collect!{|x| + x.sub!(/^\s*/, "") + x.sub!(/\#.*$/, "") + x.chomp + } + while i < lines.length + line = lines[i] + while line.sub!(/\s*\\$/, "") != nil + line << lines[i+1] + i += 1 + end + line.sub!(/\s*$/, "") + i += 1 + if line =~ /^OPT_(.*?)=(.*)/ + line = "-#{$1}=#{$2}" + end + print "#{i}: #{line}\n" if Debuglevel > 1 + if line =~ /(.*?)\s*\+=\s*(.*)/ + if group == "" + if default.has_key?($1) + default[$1] << $2 + else + default[$1] = $2 + end + else + grouparr.collect{|g| + if @config[g].has_key?($1) + @config[g][$1] << $2 + elsif default.has_key?($1) + @config[g][$1] = default[$1] + $2 + else + @config[g][$1] = $2 + end + } + end + elsif line =~ /(.*?)\s*=\s*(.*)/ + if group == "" + default[$1] = $2 + else + grouparr.collect{|g| + @config[g][$1] = $2 + } + end + elsif line =~ /(.*?)\s*\{/ + group = $1 + grouparr = group.split('|') + grouparr.collect{|g| + @config[g] = {} unless @config.has_key?(g) + } + elsif line =~ /^}$/ + default.each_key{|x| + grouparr.collect{|g| + @config[g][x] = default[x] unless @config[g].has_key?(x) + } + } + group = "" + grouparr = [] + elsif line =~ /^$/ + next + else + print "Error parsing config on line: #{i}\n" + exit + end + end + + if group != "" + print "Error parsing config: group not terminated on line #{i}\n" + exit + end + + if Debuglevel > 2 + @config.each_key{|x| + print "Group: #{x}\n" + @config[x].each_key{|y| + print "Key: '#{y}' => Value: '#{@config[x][y]}'\n" + } + } + end + return true +end + +def check_config + @config.each_key {|i| + unless @config[i].has_key?("-I") + print "No inclusions given for group #{i}. Won't match anything.\n" + exit + end + @config[i]["DATADIR"] ="." unless @config[i].has_key?("DATADIR") + @config[i]["PERMISSION"] = "0755" unless @config[i].has_key?("PERMISSION") + if @config[i].has_key?("EXTENSIONS") + @config[i]["-S"] = @config[i]["EXTENSIONS"] + @config[i]["-M"] = @config[i]["EXTENSIONS"] + end + @config[i]["-M"] = "(?!.*)" if @config[i].has_key?("-S") and ! @config[i].has_key?("-M") + @config[i]["-S"] = "(?!.*)" if @config[i].has_key?("-M") and ! @config[i].has_key?("-S") + } +end + +def get_single(subj) + print "Fetching singlepart article: #{subj}\n" + body = @articles.get_group_body(subj) + if @articles.is_uuencoded(body) + mode, filename, body = @articles.uudecode(body) + return false unless check_ext(filename, "s") + return mode, filename, body + end + if @articles.is_yencoded(body) + mode, filename, body = @articles.ydecode(body) + return false unless check_ext(filename, "s") + return mode, filename, body + end + print " Unknown encoding (not UU, not yEnc), skipping...\n" + return false +end + +def get_multi(subj, group) + print "Fetching multipart article: #{subj}\n" + if @config[group]["TEMPDIR"] == nil or @config[group]["TEMPDIR"] == "" + body = @articles.get_group_body(subj) + if @articles.is_uuencoded(body) + mode, filename, body = @articles.uudecode(body) + return false unless check_ext(filename, "m") + return mode, filename, body + elsif @articles.is_yencoded(body) + mode, filename, body = @articles.ydecode(body) + return false unless check_ext(filename, "m") + return mode, filename, body + end + print " Unknown encoding (not UU, not yEnc), skipping...\n" + return false + else + body = @articles.get_group_body_first(subj) + if @articles.is_uuencoded(body) or @articles.is_yencoded(body) + file = Tempfile.new("riptmp", @config[group]["TEMPDIR"]) + body.collect{|x| file.print "#{x}\n"} + return false unless @articles.get_group_body_rest(subj, file) + fileout = Tempfile.new("riptmp", @config[group]["TEMPDIR"]) + if @articles.is_uuencoded(body) + mode, filename, body = @articles.uudecode(file, fileout) + elsif @articles.is_yencoded(body) + mode, filename, body = @articles.ydecode(file, fileout) + end + return false unless check_ext(filename, "m") + body = fileout.path + file.close + fileout.close + return mode, filename, body + else + print " Unknown encoding (not UU, not yEnc), skipping...\n" + return false + end + end +end + +def output_data(subject, mode, filename="", body="") + group = @articles.get_groupname + print " mode: #{mode}\n" if Debuglevel > 0 + print " Filename: '#{filename}'\n" if Debuglevel > 0 + if @config[group].has_key?("-L") and @config[group]["-L"] + print "longname\n" if Debuglevel > 1 + outfile = subject + while outfile.length > @maxfilelength + outfile = outfile[0...-1] + end + elsif @config[group].has_key?("-C") and @config[group]["-C"] + print "combinedname\n" if Debuglevel > 1 + outfile = "#{subject} [#{filename}]" + sub2 = subject + while outfile.length > @maxfilelength + lastlength = outfile.length + sub2 = sub2[0...-1] + outfile = "#{sub2} [#{filename}]" # this is going to loop if the #{filename} is too long :( + if outfile.length == lastlength + outfile = filename + while outfile.length > @maxfilelength + outfile = outfile[0...-1] + end + end + end + else + print "shortname\n" if Debuglevel > 1 + outfile = filename + while outfile.length > @maxfilelength + outfile = outfile[0...-1] + end + end + if save_file("#{@config[group]["DATADIR"]}/#{group}", outfile, body) + @articles.group_update_newsrc(subject) + @articles.save_newsrc unless @config[group].has_key?("-T") and @config[group]["-T"] + end +end + +def check_ext(filename, mode) + case mode + when "s" + return @config.has_key?("-S") ? filename =~ /\.(#{@config["-S"]})$/ : true + when "m" + return @config.has_key?("-M") ? filename =~ /\.(#{@config["-M"]})$/ : true + else + print "Illegal mode \"#{mode}\" in check_ext\n" + exit + end +end + +def get_max_file_length(tempdir=".") +i = 500 +name = "a"*i +begin + file = File.new("#{tempdir}/#{name}", "w", "0644").close + #File.delete(name) +rescue Errno::ENAMETOOLONG + i -= 1 + name = "a"*i + retry +end + i -= 14 # this is how many characters are still likely to be appended + # is the filename already exists '-<#{date}.#{count}>' in save_file + # this could be brought back to 5 '-<#{count}>' ... + return i +end + +############################################################################################# + +$stdout.sync=true # line buffered output +defaults = {'-c' => "#{ENV['HOME']}/.ripnewsrc"} +defaults = parse_options(defaults) +parse_config(defaults) +check_config + +@maxfilelength = get_max_file_length(@config[@config.keys[0]]["TEMPDIR"]) + +print "$Id$\n" + +if Debuglevel > 2 + @config.each_key{|i| + print "Group: #{i}\n" + @config[i].each_key{|j| + print "Opt: #{j} val: #{@config[i][j]}\n" + } + } +end + +for group in @config.keys.sort + print "Getting articles for #{group}\n" + @articles = Article.new(@config[group]["NNTPSERVER"], group, @config[group]["NEWSRCNAME"]) +# begin + @articles.get_articles(@config[group]["CACHEDIR"]) +# rescue Article:: +# print "Caught something: #{$!}\n" +# @articles.quit +# next +# end + + unless FileTest.directory?("#{@config[group]["DATADIR"]}/#{group}") or + Dir.mkdir("#{@config[group]["DATADIR"]}/#{group}", @config[group]["PERMISSION"].oct) + print "eeeps, couldn't create dir\n" + exit + end + for i in @articles.get_group_subjects + print "#{i}\n" if Debuglevel > 2 + if !(@config[group].has_key?("-X") and i =~ /#{@config[group]["-X"]}/) and + i =~ /#{@config[group]["-I"]}/ + print "Match: #{i}\n" if Debuglevel > 0 + if @articles.group_is_complete(i) + begin + if @articles.group_is_singlepart(i) + mode, filename, body = get_single(i) + elsif @articles.group_is_multipart(i) + mode, filename, body = get_multi(i, group) + end + output_data(i, mode, filename, body) if mode != false + rescue Article::TempError, Article::PermError + print "#{$!}\n" + print "Skipping article...\n" + next + end + else + print " Not complete: #{i}\n" + end + end + end + @articles.quit +end diff --git a/tags/ripnews-release-0_0_9/ripnews/set/intspan.rb b/tags/ripnews-release-0_0_9/ripnews/set/intspan.rb new file mode 100644 index 0000000..9bd36a6 --- /dev/null +++ b/tags/ripnews-release-0_0_9/ripnews/set/intspan.rb @@ -0,0 +1,928 @@ +################################# +# +# $Id$ +# $Source$ +# +# intspan.rb +# ported from Perl code by Ward Wouts +# +# (C) 2001, Ward Wouts +# +################################# + +module Set + +class IntSpan + +Empty_String = '-' +Debuglevel = 0 + +def initialize(setspec=nil) + @set = { "empty_string" => Empty_String } + print "initialize: Calling copy\n" if Debuglevel > 0 + copy(setspec) +end + +def IntSpan.valid(run_list) + testset = new + begin + testset._copy_run_list(run_list) + rescue SystemExit + return false + end + return true +end + +def copy(set_spec) + print "Copy #{set_spec.type.to_s}\n" if Debuglevel > 0 + case set_spec.type.to_s + when "NilClass" + print "copy: Calling _copy_empty\n" if Debuglevel > 0 + _copy_empty + when "String" + print "copy: Calling _copy_run_list\n" if Debuglevel > 0 + _copy_run_list(set_spec) + when "Array" + print "copy: Calling _copy_array\n" if Debuglevel > 0 + _copy_array(set_spec) + when "Set::Intspan" + print "copy: Calling _copy_set\n" + _copy_set(set_spec) + when "Hash" + print "copy: Calling _copy_set\n" + _copy_set(set_spec) + else + print "eeps\n" + end +end + +def _copy_empty # makes @set the empty set + @set = { "negInf" => false } + @set["posInf"] = false + @set["edges"] = [] + @set["run"] = [] +end + +def _copy_array(array) # copies an array into @set + @set["negInf"] = false + @set["posInf"] = false + + #print "scary thingy gets called!!!\n" + edges = [] + for element in array.sort + next if (edges.length > 0) and (edges[-1] == element) # skip duplicates + + if (edges.length > 0) and (edges[-1] == element-1) + edges[-1] = element + else + edges.push(element-1, element) + end + end + + @set["edges"] = edges + @set["run"] = [] +end + +def _copy_set(src) # copies one set to another + @set["negInf"] = src.neg_inf + @set["posInf"] = src.pos_inf + @set["edges"] = src.edges + @set["run"] = [] +end + +def _copy_run_list(runlist) + + _copy_empty + + runlist.gsub!(/\s|_/, '') + return true if runlist == "" + + + print "copy run list...\n" if Debuglevel > 0 + + first = true + last = false + + edges = [] + + for i in runlist.split(/,/) + print "#{i}\n" if Debuglevel > 0 + begin + if i =~ /^(-?\d+)$/x + edges.push(($1.to_i-1), $1.to_i) + next + end + + if i =~ /^ (-?\d+) - (-?\d+) $/x + if $1.to_i > $2.to_i + print "match rule 1 #{$1} > #{$2}\n" + print "Set::IntSpan::_copy_run_list: Bad order: #{runlist}\n" + exit + else + edges.push(($1.to_i-1), $2.to_i) + next + end + end + + if i =~ /^\(-(-?\d+)$/x + unless first + print "match rule 2\n" + print "Set::IntSpan::_copy_run_list: Bad order: #{runlist}\n" + exit + end + @set = {"negInf" => true} + edges.push($1.to_i) + next + end + + if i =~ /^(-?\d+)-\)$/x + print "match rule 3\n" + edges.push(($1.to_i-1)) + @set = {"posInf" => true} + last = true + next + end + + if i =~ /^\(-\)$/x + unless first + print "match rule 4\n" + print "Set::IntSpan::_copy_run_list: Bad order: #{runlist}\n" + exit + end + @set = {"negInf" => true} + @set = {"posInf" => true} + last = true + next + end + + print "no match! \"#{i}\"\n" + print "Set::IntSpan::_copy_run_list: Bad syntax: #{runlist}\n" + end + first = false + end + + @set["edges"] = edges + @set["run"] = [] + + return true +end + +# check for overlapping runs +# delete duplicate edges +def _cleanup + edges = @set["edges"] + + for i in (0..(edges.length-1)) + cmp = edges[i] <=> edges[i+1]; + begin + case cmp + when -1 + i = i + 1 + break + when 0 + edges.slice!(i..(i+1)) + break + when 1 + return 0 + end + end + end + + 1 +end + +#def splice(array, offset, length=nil, list=[]) +# if offset >= 0 +# length = array.length-offset unless length +# leftarray = array.slice(0, offset) +# rightarray = array.slice(offset+length, (array.length - offset)) +# else +# length = array.length+offset unless length +# leftarray = array.slice(0, (array.length+offset)) +# rightarray = array.slice(array.length+length+offset, array.length+offset) +# end +# +# array = leftarray +# array += list +# array += rightarray if rightarray +# +# return array +#end + +def run_list + if empty + return @set["empty_string"] + end + + print "edges leng: ", @set["edges"].length, "\n" if Debuglevel > 0 + edges = [] + edges = @set["edges"] + runs = [] + + if edges.length > 0 + edges = ['(', edges] if @set["negInf"] + edges.push(')') if @set["posInf"] + + print edges.join("/"),"\n" if Debuglevel > 0 + + while(edges.length>0) + print "edges leng: ", @set["edges"].length, "\n" if Debuglevel > 0 + lower = edges[0] + upper = edges[1] + print "Lower: \"#{lower}\" Upper: \"#{upper}\"\n" if Debuglevel > 0 + edges = edges.slice(2..edges.length) + + if ((lower.to_s <=> '(')!=0 and + (upper.to_s <=> ')')!=0 and + ((lower+1) == upper)) + print "#{upper}\n" if Debuglevel > 0 + runs.push("#{upper}") + else + lower += 1 if (lower.to_s <=> "(")!=0 + print "#{lower}-#{upper}\n" if Debuglevel > 0 + runs.push("#{lower}-#{upper}") + end + end + end + + print "edges leng: ", @set["edges"].length, "\n" if Debuglevel > 0 + + return runs.join(',') +end + +def elements + if (@set["negInf"] == true or @set["posInf"] == true) + print "Set::IntSpan::elements: infinite set\n" + exit + end + + elements = [] + edges = @set["edges"].dup + while (edges.length>0) + lower, upper = edges.slice!(0..1) + elements += (lower+1 .. upper).to_a + end + + return elements +end + +def _real_set(set_spec=nil) # converts a set specification into a set + (set_spec != nil and set_spec.type.to_s == "Set::IntSpan") ? + set_spec : + IntSpan.new(set_spec) +end + +def union(set_spec) + b = _real_set(set_spec) + s = IntSpan.new + + s.set_neg_inf(@set["negInf"] || b.neg_inf) + + eA = @set["edges"] + eB = b.edges + eS = s.edges + + inA = @set["negInf"] + inB = b.neg_inf + + iA = 0 + iB = 0 + + while (iA < eA.length and iB < eB.length) + xA = eA[iA] + xB = eB[iB] + + if (xA < xB) + iA += 1 + inA = ! inA + not inB and eS.push(xA) + elsif (xB < xA) + iB += 1 + inB = ! inB + not inA and eS.push(xB) + else + iA += 1 + iB += 1 + inA = ! inA + inB = ! inB + inA == inB and eS.push(xA) + end + end + + iA < eA.length and (! inB) and eS.concat(eA[iA..eA.length]) + iB < eB.length and (! inA) and eS.concat(eB[iB..eB.length]) + + s.set_pos_inf(@set["posInf"] || b.pos_inf) + s.set_edges(eS) + + return s +end + +def intersect(set_spec) + b = _real_set(set_spec) + s = IntSpan.new + + s.set_neg_inf(@set["negInf"] && b.neg_inf) + + eA = @set["edges"] + eB = b.edges + eS = s.edges + + inA = @set["negInf"] + inB = b.neg_inf + + iA = 0 + iB = 0 + + while (iA < eA.length and iB < eB.length) + xA = eA[iA] + xB = eB[iB] + + if (xA < xB) + iA += 1 + inA = ! inA + inB and eS.push(xA) + elsif (xB < xA) + iB += 1 + inB = ! inB + inA and eS.push(xB) + else + iA += 1 + iB += 1 + inA = ! inA + inB = ! inB + inA == inB and eS.push(xA) + end + end + + iA < eA.length and inB and eS.concat(eA[iA..eA.length]) + iB < eB.length and inA and eS.concat(eB[iB..eB.length]) + + s.set_neg_inf(@set["posInf"] && b.pos_inf) + s.set_edges(eS) + return s +end + +def diff (set_spec) + b = _real_set(set_spec) + s = IntSpan.new + + s.set_neg_inf(@set["negInf"] && ! b.neg_inf) + + eA = @set["edges"] + eB = b.edges + eS = s.edges + + inA = @set["negInf"] + inB = b.neg_inf + + iA = 0 + iB = 0 + + while (iA < eA.length and iB < eB.length) + xA = eA[iA] + xB = eB[iB] + + if (xA < xB) + iA += 1 + inA = ! inA + not inB and eS.push(xA) + elsif (xB < xA) + iB += 1 + inB = ! inB + inA and eS.push(xB) + else + iA += 1 + iB += 1 + inA = ! inA + inB = ! inB + inA != inB and eS.push(xA) + end + end + + iA < eA.length and not inB and eS.concat(eA[iA..eA.length]) + iB < eB.length and inA and eS.concat(eB[iB..eB.length]) + + s.set_edges(eS) + + s.set_pos_inf(@set["posInf"] && ! b.pos_inf) + return s +end + +def xor(set_spec) + b = _real_set(set_spec) + s = IntSpan.new + + s.set_neg_inf(@set["negInf"] ^ b.neg_inf) + + eA = @set["edges"] + eB = b.edges + eS = s.edges + + iA = 0 + iB = 0 + + while (iA < eA.length and iB < eB.length) + xA = eA[iA] + xB = eB[iB] + + if (xA < xB) + iA += 1 + eS.push(xA) + elsif (xB < xA) + iB += 1 + eS.push(xB) + else + iA += 1 + iB += 1 + end + end + + iA < eA.length and eS.concat(eA[iA..eA.length]) + iB < eB.length and eS.concat(eB[iB..eB.length]) + + s.set_pos_inf(@set["posInf"] ^ b.pos_inf) + s.set_edges(eS) + return s +end + +def complement +# complement is inverse set; dit klopt hier dus niet + a = first + b = last + + print "first #{a} last #{b}\n" if Debuglevel > 0 + if a!=b + s = IntSpan.new("#{a}-#{b}") + comp = xor(s) + else + comp = IntSpan.new("#{a}") + end + + if Debuglevel > 0 + while i = comp.next + print "#{i}\n" + end + end + + comp.set_neg_inf(! comp.neg_inf) + comp.set_pos_inf(! comp.pos_inf) + return comp +end + + +def superset(set_spec) + b = _real_set(set_spec) + +# $b->diff($a)->empty + s = b.diff(self) + return s.empty +end + + +def subset(set_spec) + b = _real_set(set_spec) + +# $a->diff($b)->empty + s = diff(b) + return s.empty +end + + +def equal(set_spec) + b = _real_set(set_spec) + +print "a\n" + @set["negInf"] == b.neg_inf or return false +print "b\n" + @set["posInf"] == b.pos_inf or return false + + aEdge = @set["edges"] + bEdge = b.edges + print "aEdge #{aEdge.length} bEdge #{bEdge.length}\n" + aEdge.length == bEdge.length or return false +print "c\n" + + for i in (0...aEdge.length) + aEdge[i] == bEdge[i] or return false + end + + return true +end + +def equivalent(set_spec) + b = _real_set(set_spec) + + cardinality == b.cardinality +end + + +def cardinality + (@set["negInf"] or @set["posInf"]) and return -1 + + car = 0 + edges = @set["edges"] + i=0 + while (i < edges.length) + lower = edges[i] + upper = edges[i+1] + car += upper - lower + i += 2 + end + + return car +end + +def empty + if @set["negInf"] == false and @set["edges"].length > 0 and + @set["posInf"] == false + return false + end + return true +end + +def finite + if @set["negInf"] == false and @set["posInf"] == false + return true + end + return false +end + +def edges + return @set["edges"] +end + +def set_edges(edges) + @set["edges"] = edges +end + +def neg_inf + return @set["negInf"] +end + +def set_neg_inf(negInf) + @set["negInf"] = negInf +end + +def pos_inf + return @set["posInf"] +end + +def set_pos_inf(posInf) + @set["posInf"] = posInf +end + +def infinite + @set["negInf"] or @set["posInf"] +end + +def universal + @set["negInf"] and not @set["edges"].length > 0 and @set["posInf"] +end + +def member(n) + inSet = @set["negInf"] + edge = @set["edges"] + + for i in (0...edge.length) + if inSet + return true if n <= edge[i] + inSet = false + else + return false if n <= edge[i] + inSet = true + end + end + + inSet +end + +def insert(n) + inSet = @set["negInf"] + edge = @set["edges"] + + if (edge.length == 0) + @set["edges"] = [n-1, n] + return + end + + if n > edge[-1]+1 + @set["edges"].push(n-1, n) + return + elsif n > edge[-1] + @set["edges"][-1] += 1 + return + end + + for i in (0...edge.length) + if (inSet) + n <= edge[i] and return + inSet = false + else + n <= edge[i] and break + inSet = true + end + end + + inSet and return + + lGap = i == 0 || n-1 - edge[i-1] + lGap = false if lGap == 0 + + rGap = i == edge.length-1 ? i : edge[i] - n + rGap = false if rGap == 0 + + if ( lGap and rGap) + lower = edge[0...i] + upper = edge[i...edge.length] + edge = lower + edge.push(n-1, n) + edge.concat(upper) + elsif (not lGap and rGap) + edge[i-1] += 1 + elsif ( lGap and not rGap) + edge[i] -= 1 + else + edge.delete_at(i-1) + edge.delete_at(i-1) + end + + @set["edges"] = edge +end + +def remove(n) + n or return + + inSet = @set["negInf"] + edge = @set["edges"] + + for i in (0...edge.length) + if (inSet) + break if n <= edge[i] + inSet = false + else + return if n <= edge[i] + inSet = true + end + end + + return unless inSet + + for i in (0...edge.length) + if edge[i] == n-1 and edge[i+1] == n + lower = edge[0...i] + upper = edge[i+2..edge.length] + edge = lower + upper + break + elsif edge[i] == n-1 + edge[i] += 1 + break + elsif edge[i] == n + edge[i] += 1 + break + elsif edge[i+1] == n + edge[i+1] -= 1 + break + elsif edge[i]n + lower = edge[0..i] + upper = edge[i+1..edge.length] + edge = lower + [n-1, n] +upper + break + end + i += 1 + end + + @set["edges"] = edge +end + +def min + empty and return nil + neg_inf and return nil + @set["edges"][0]+1 +end + + +def max + empty and return nil + pos_inf and return nil + @set["edges"][-1] +end + +def grep_set(block) + return nil if @set["negInf"] or @set["posInf"] + + edges = @set["edges"] + sub_edges = [] + + while (edges.length > 0) + lower = edges[0] + upper = edges[1] + edges = edges.slice(2..edges.length) + + for i in (lower+1..upper) +# local $_ = i +# &$block() or next # definately wrong, must eval block + + if (sub_edges.length > 0 and sub_edges[-1] == i-1) + sub_edges[-1] = i + else + sub_edges += [ i-1, i ] + end + end + end + + sub_set = new + sub_set["edges"] = sub_edges + sub_set +end + +def map_set(block) + return nil if @set["negInf"] or @set["posInf"] + + map_set = new + + edges = @set["edges"] + while (edges.length > 0) + lower = edges[0] + upper = edges[1] + edges = edges.slice(2..edges.length) + + for domain in (lower+1..upper) + local $_ = domain; + +# for range (&$block()) # definately wrong, must eval block +# map_set.insert(range) +# end + end + end + + map_set +end + +def first + @set["iterator"] = min + @set["run"] = [] + @set["run"][0] = 0 + @set["run"][1] = @set["edges"].length > 0 ? 1 : nil + + @set["iterator"] +end + + +def last + lastEdge = @set["edges"].length - 1 + @set["iterator"] = max + @set["run"][0] = lastEdge > 0 ? lastEdge-1 : nil + @set["run"][1] = lastEdge + + @set["iterator"] +end + +def start(startval) + set["iterator"] = nil + startval or return nil + + inSet = @set["negInf"] + edges = @set["edges"] + + for i in (0...edges.length) + if (inSet) + if (startval <= edges[i]) + @set["iterator"] = startval + @set["run"][0] = i ? i-1 : nil + @set["run"][1] = i + return $startval + end + inSet = false + else + if (startval <= edges[i]) + return nil + end + inSet = true + end + end + + if (inSet) + @set["iterator"] = startval + @set["run"][0] = edges.length > 0 ? edges.length: nil + @set["run"][1] = nil + end + + @set["iterator"] +end + +def current + @set["iterator"] +end + + +def next + @set["iterator"] or return first + + run1 = @set["run"][1] + run1 or return ++@set["iterator"] + + edges = @set["edges"] + if (@set["iterator"] < edges[run1]) + @set["iterator"] += 1 + return @set["iterator"] + end + + if (run1 < edges.length-2) + run0 = run1 + 1 + @set["run"] = [run0, run0+1] + @set["iterator"] = edges[run0]+1 + elsif (run1 < edges.length-1) + run0 = run1 + 1 + @set["run"] = [run0, nil] + @set["iterator"] = edges[run0]+1 + else + @set["iterator"] = nil + end + + @set["iterator"] +end + +def prev + @set["iterator"] or return last + + run0 = @set["run"][0] + run0 or return --@set["iterator"] + + edges = @set["edges"] + + if (@set["iterator"] > edges[run0]+1) + @set["iterator"] -= 1 + return @set["iterator"] + end + + if (run0 > 1) + run1 = run0 - 1 + @set["run"] = [run1-1, run1] + @set["iterator"] = edges[run1] + elsif (run0 > 0) + run1 = run0 - 1 + @set["run"] = [nil, run1] + @set["iterator"] = edges[run1] + else + @set["iterator"] = nil + end + + @set["iterator"] +end + + +end # class + +end # module + + +# TODO +# Do not kill an item until it's tested! + +# [x] new +# [x] valid +# [ ] copy +# [ ] _copy_empty # makes $set the empty set +# [x] _copy_array # copies an array into a set +# [ ] _copy_set # copies one set to another +# [ ] _copy_run_list # parses a run list +# [ ] _cleanup +# [x] run_list +# [x] elements +# [x] _real_set # converts a set specification into a set +# [x] union +# [x] intersect +# [x] diff +# [x] xor +# [ ] complement +# [x] superset +# [x] subset +# [x] equal +# [x] equivalent +# [x] cardinality +# [x] empty +# [x] finite +# [x] neg_inf { shift->{negInf} } +# [x] pos_inf { shift->{posInf} } +# [x] infinite +# [ ] universal +# [x] member +# [x] insert # way to much code i think +# [x] remove +# [x] min +# [x] max +# [ ] grep_set(&$) +# [ ] map_set(&$) +# [x] first($) +# [x] last($) +# [ ] start($$) +# [x] current($) { shift->{iterator} } +# [x] next($) +# [x] prev($) + +# New methods +# [x] set_neg_inf +# [x] set_pos_inf +# [x] set_edges +# [x] edges