From e7f301c90d77491872264877e87d166c714b6159 Mon Sep 17 00:00:00 2001 From: Ward Wouts Date: Tue, 12 Feb 2008 15:19:13 +0000 Subject: [PATCH] fix save_group and save_group_as --- trunk/ripnews/news/newsrc.rb | 87 ++++++++++++++++++++++++++++-------- trunk/ripnews/ripnews.rb | 16 ++++++- 2 files changed, 84 insertions(+), 19 deletions(-) diff --git a/trunk/ripnews/news/newsrc.rb b/trunk/ripnews/news/newsrc.rb index 5633444..17117d5 100644 --- a/trunk/ripnews/news/newsrc.rb +++ b/trunk/ripnews/news/newsrc.rb @@ -18,16 +18,19 @@ # require "set/intspan" +#require "thread" module News class Newsrc +#@@save_lock = Mutex.new + def initialize(file=nil) @newsrc = { "group" => Hash.new, "list" => Array.new } if file unless load(file) - print "Can't load #{file}\n" + puts "Can't load #{file}" exit end end @@ -58,7 +61,7 @@ end def parse(line) unless line =~ /^([^!:]+)([!:])\s(.*)$/x - print "Newsrc.parse: Bad newsrc line: #{line}\n" + puts "Newsrc.parse: Bad newsrc line: #{line}" exit end @@ -67,7 +70,7 @@ def parse(line) articles = $3 unless Set::IntSpan.valid(articles) - print "Newsrc.parse: Bad article list: #{line}\n" + puts "Newsrc.parse: Bad article list: #{line}" end @@ -85,49 +88,75 @@ def save save_as(@newsrc["file"]) end -# this is not thread safe! +# this is not thread safe! (well, it should be now) def save_as(file) +# @@save_lock.synchronize{ if FileTest.exists?("#{file}") begin File.rename(file, "#{file}.bak") rescue - print "Can't rename #{file}, #{file}.bak: #{$!}\n" + puts "Can't rename #{file}, #{file}.bak: #{$!}" exit end end begin newsrc = File.new(file, "w") + newsrc.flock(File::LOCK_EX) rescue - print "Can't open #{file}: #{$!}\n" + puts "Can't open #{file}: #{$!}" exit end @newsrc["file"] = file @newsrc["list"].each{|group| newsrc.print format(group) } + newsrc.sync + newsrc.flock(File::LOCK_UN) # what's the right order here? newsrc.close +# } end +# Here 'group' is a group structure. It'd probably be much more useful if +# it could just be a group_name_; which it can now. def save_group(group) unless @newsrc.has_key?("file") @newsrc["file"] = "#{$ENV['HOME']}/.newsrc" end - save_as(@newsrc["file"], group) + if group.class.to_s == "String" + groupname = group.dup + @newsrc["list"].each{|g| + if g["name"] == groupname + group = g.dup + break + end + } + + end + save_group_as(@newsrc["file"], group) end -# this should be thread safe +# This should be thread safe, but may not be. It needs testing! +# If not, mutexes are needed. def save_group_as(file, group) +# @@save_lock.synchronize{ +p Time.now +p "copy file" if FileTest.exists?("#{file}") if ( ! File.copy(file, "#{file}.bak") ) - print "Can't copy #{file} to #{file}.bak: #{$!}\n" + puts "Can't copy #{file} to #{file}.bak: #{$!}" end end +p Time.now +p "open & lock file" begin - newsrc = File.new(file, "r+").flock(File::LOCK_EX) + newsrc = File.new(file, "r+") + newsrc.flock(File::LOCK_EX) rescue - print "Can't open ${file}: #{$!}\n" + puts "Can't open ${file}: #{$!}" exit end +p Time.now +p "opened & locked" # read file lines = newsrc.readlines @@ -137,23 +166,46 @@ def save_group_as(file, group) # write read stuff & replace group lines.each{|line| - if line =~ /^#{group}(:|!)/ - newsrc.print line - else + # same parsing as the parse method uses + unless line =~ /^([^!:]+)([!:])\s(.*)$/x + puts "Newsrc.parse: Bad newsrc line: #{line}" + # restore backup on failure, it'll contain the flaw too, but it'll + # be complete + if ( ! File.copy("#{file}.bak", file) ) + puts "Can't copy #{file}.bak to #{file}: #{$!}" + end + exit + end + linegroup = $1 + if linegroup == group["name"] newsrc.print format(group) + else + newsrc.print line end } +p Time.now +p "truncate, sync, unlock & close file" + # sometimes the file grows and then shrinks + # this is because a 'read' line van become shorter when more + # articles have been read (1,3,5 vs 1-5) + # when this happens the file needs to be truncated + pos = newsrc.pos + newsrc.truncate(pos) + newsrc.sync newsrc.flock(File::LOCK_UN) # what's the right order here? newsrc.close +p Time.now +p "garbage collect" +p Time.now + GC.start +# } end def format(group) name = group["name"] sub = group["subscribed"] ? ':' : '!' articles = group["articles"].run_list - #space = articles ? ' ' : '' - #return "#{name}#{sub}#{space}#{articles}\n" return "#{name}#{sub} #{articles}\n" end @@ -274,7 +326,6 @@ def number(group, offset) @newsrc["list"].push(upper) end - def del_group(name) if @newsrc["group"].has_key?(name) group = @newsrc["group"][name] @@ -306,7 +357,7 @@ def mark(name, article, options = {"where" => ""}) unless @newsrc["group"].has_key?(name) add_group(name, options) end - @newsrc["group"][name]["articles"].insert(article) + @newsrc["group"][name]["articles"].insert!(article) end def mark_list(name, list, options = {"where" => ""}) diff --git a/trunk/ripnews/ripnews.rb b/trunk/ripnews/ripnews.rb index 4bc09cc..aa84d19 100755 --- a/trunk/ripnews/ripnews.rb +++ b/trunk/ripnews/ripnews.rb @@ -737,17 +737,20 @@ def main _create_group_dir(group) + subjcount = 0 for subj in @articles.get_group_subjects.sort{|a, b| ward_sort(a, b)} puts "#{subj}" if Debuglevel > 2 poster = @articles.get_group_poster(subj) # explicitly mark as read if @config[group].has_key?("-MR") and subj =~ /#{@config[group]["-MR"]}/ puts "Marking '#{subj}' as read" + subjcount += 1 _mark_read(subj) - # get the juicy bits elsif @config[group].has_key?("-MRF") and poster =~ /#{@config[group]["-MRF"]}/ puts "Marking poster '#{poster}' as read (subj: '#{subj}')" + subjcount += 1 _mark_read(subj) + # get the juicy bits elsif !(@config[group].has_key?("-X") and subj =~ /#{@config[group]["-X"]}/) and subj =~ /#{@config[group]["-I"]}/ puts "Match: #{subj}" if Debuglevel > 0 @@ -757,8 +760,19 @@ def main puts "Poster match: #{poster}" if Debuglevel > 0 _get_article(subj, group) else + subjcount += 1 _mark_remaining(subj, group) end + # prevent haveing to start all over in case of a crash + # the number of subjects on which to sync should be balanced + # between the time it takes to sync and the time it takes to read them + # this is just an initial guess + # the counter is only used for the mark read stuff, as the get article + # stuff saves the newsrc anyway + if subjcount >= 150 + _save_newsrc(group) + subjcount = 0 + end end _wait_for_threads