#!/usr/bin/env ruby # $Id$ # $URL$ require 'net/https' require 'uri' require 'rexml/document' require 'date' require 'getoptlong' class DSParse # info over DS_Store files is te vinden op https://wiki.mozilla.org/DS_Store_File_Format # iemand heeft ooit ook een 'dsdump.c' programma geschreven wat wel wat info bevat def initialize @store = Array.new end def arr2long(arr) return (arr[0]<<24) + (arr[1]<<16) + (arr[2]<<8) + arr[3] end def arr2string(arr) string = "" (0...arr.length).step(2){|i| string += ((arr[i]<<8) + (arr[i+1])).chr } return string end def readfile(filename) @store = Array.new File.open(filename).each_byte{|byte| @store.push byte } end def readstring(string) @store = Array.new string.each_byte{|byte| @store.push byte } end def isds? # 00 00 00 01 42 75 64 31 # kan vast netter, don't care @store[0] == 0x00 && @store[1] == 0x00 && @store[2] == 0x00 && @store[3] == 0x01 && @store[4] == 0x42 && @store[5] == 0x75 && @store[6] == 0x64 && @store[7] == 0x31 end def getfilenames filenames = Array.new counter = 0 offset = arr2long(@store[0x14, 4]) offset &= ~15 # tim zegt dat dit de laatste zoveel bits op 0 zet. structcount = arr2long(@store[offset+8, 4]) pointer = offset+12 (0...structcount).each{|structcounter| objnamelength = arr2long(@store[pointer, 4]) pointer+=4 objnamebuf = @store[pointer, objnamelength*2] pointer+=objnamelength*2 filenames.push arr2string(objnamebuf) objtag = @store[pointer, 4].map{|x| x.chr}.join('') pointer+=4 objtype = @store[pointer, 4].map{|x| x.chr}.join('') pointer+=4 datasize = 0 case objtype when 'bool' datasize = 1 when 'shor', 'long', 'type' datasize = 4 when 'ustr' datasize = arr2long(@store[pointer, 4]) pointer += 4 datasize *= 2 when 'blob' datasize = arr2long(@store[pointer, 4]) pointer += 4 else puts "Unrecognized data type" end if datasize > 0 data = @store[pointer, datasize] pointer += datasize end } filenames.uniq! return filenames end end # class DSParse class GitParse # info over Git index files is te vinden op http://git.rsbx.net/Documents/Git_Data_Formats.txt # voorspelbare bestanden: # # FETCH_HEAD # HEAD # ORIG_HEAD # branches/ # config # description # hooks/ # hooks/applypatch-msg # hooks/commit-msg # hooks/post-commit # hooks/post-receive # hooks/post-update # hooks/pre-applypatch # hooks/pre-commit # hooks/pre-rebase # hooks/prepare-commit-msg # hooks/update # index # info/ # info/exclude # logs/ # logs/HEAD # logs/heads/ # logs/heads/master # logs/remotes/ # logs/remotes/origin/ # logs/remotes/origin/master # objects/ # hieronder grotendeels (zoniet helemaal) te halen uit 'index' (zie statinfo() notities) # objects/info/ # objects/pack/ # hieronder ook weer te halen uit iets anders # packed-refs # refs/ # refs/heads/ # refs/heads/master # refs/remotes/ # refs/remotes/origin/ # refs/remotes/origin/HEAD # refs/remotes/origin/master # refs/tags/ # hieronder ook weer te halen uit iets anders def initialize @store = Array.new end def arr2long(arr) return (arr[0]<<24) + (arr[1]<<16) + (arr[2]<<8) + arr[3] end def arr2string(arr) string = "" (0...arr.length).step{|i| string += (arr[i]).chr } return string end def arr2hexstring(arr) string = "" (0...arr.length).step{|i| string += ("%02x" % arr[i]) } return string end def readfile(filename) @store = Array.new File.open(filename).each_byte{|byte| @store.push byte } end def readstring(string) @store = Array.new string.each_byte{|byte| @store.push byte } end def isgit? # 00 00 00 01 42 75 64 31 # kan vast netter, don't care @store[0] == 0x44 && @store[1] == 0x49 && @store[2] == 0x52 && @store[3] == 0x43 end def gitversion version = arr2long(@store[4, 4]) end def entrycount count = arr2long(@store[8, 4]) end def entries filenames= Array.new # eerste heeft altijd een offset van 12 offset = 12 (0..entrycount).each{ name, offset = statinfo(offset) filenames.push name } filenames end def statinfo(offset) # ctime = arr2long(@store[offset, 8]) # mtime = arr2long(@store[offset+8, 8]) # dev = arr2long(@store[offset+16, 4]) # inode = arr2long(@store[offset+20, 4]) # mode = arr2long(@store[offset+24, 4]) # uid = arr2long(@store[offset+28, 4]) # gid = arr2long(@store[offset+32, 4]) # size = arr2long(@store[offset+36, 4]) #p uid #p gid #p size # entryid = arr2long(@store[offset+40, 4]) # hoe groot is zo'n object ID? # The object ID, or "name", of an object is # _sha-1_digest_( ). # random sha1 voorbeeld: "42796666f0c5748b943956f367907ce6d9f42654" # wikipedia zegt 160 bits, ofwel 20 bytes entryid = arr2hexstring(@store[offset+40, 20]) p entryid # opvallend: bestandsnamen onder .git/objects/??/* zijn 38 tekens lang # de dir lijkt dan inderdaad de eerste 2 tekens van deze entryid te zijn! # de naam de rest. Maar het bestaat niet bij alle entries! # entryflags = arr2long(@store[offset+60, 2]) i = offset+62 name = "" while true if @store[i] == 0x00 i += 1 break else name += @store[i].chr end i += 1 end cl = i-offset plus = cl if cl.modulo(8) != 0 plus += 8 - cl.modulo(8) end nextoffset = offset + plus p name return name, nextoffset end end # class GitParse def usage puts < -h, --help show this message -H harvest subversion repositories. Tip: svn revert $(svn st|sed 's/^!//') -m check for one of subversion, CVS, git or .DS_Store (default: all) -u set baseurl -s use ssl --user basic authentiction user --pass basic authentication password EOT exit end def cmdline options = Hash.new begin opts = GetoptLong.new( [ "-h", "--help", GetoptLong::NO_ARGUMENT ], [ "-H", GetoptLong::NO_ARGUMENT ], [ "-m", GetoptLong::REQUIRED_ARGUMENT ], [ "-u", GetoptLong::REQUIRED_ARGUMENT ], [ "-s", GetoptLong::NO_ARGUMENT ], [ "--user", GetoptLong::REQUIRED_ARGUMENT ], [ "--pass", GetoptLong::REQUIRED_ARGUMENT ] ) opts.quiet=true opts.each do |opt, arg| options[opt] = arg end rescue print "#{$!}\n" usage end if options["-h"] usage end @harvest = options["-H"] return options end def fetch(uri_str, limit = 10) # You should choose better exception. raise ArgumentError, 'HTTP redirect too deep' if limit == 0 if URI.parse(uri_str).scheme.to_s == "file" response = "" if FileTest.exists?(URI.parse(uri_str).path) File.open(URI.parse(uri_str).path).each_line{|line| response += line } else raise "Not found" end response else host = URI.parse(uri_str).host port = URI.parse(uri_str).port path = URI.parse(uri_str).path query = URI.parse(uri_str).query userinfo = URI.parse(uri_str).userinfo user = "" pass = "" if userinfo user, pass = userinfo.split(/:/) end user = @user if @user pass = @pass if @pass http = Net::HTTP.new(host, port) http.use_ssl = @use_ssl if query req = Net::HTTP::Get.new("#{URI.escape(path)}?#{URI.escape(query)}") else req = Net::HTTP::Get.new("#{URI.escape(path)}") end req.basic_auth user, pass response = http.request(req) case response when Net::HTTPSuccess then response.body when Net::HTTPRedirection then fetch(response['location'], limit - 1) when Net::HTTPUnauthorized then puts "401 Authorization Required #{uri_str}" when Net::HTTPUnauthorized then puts "401 Authorization Required #{uri_str}" when Net::HTTPForbidden then puts "403 Forbidden #{uri_str}" # when Net::HTTPNotFound then puts "404 Not Found #{uri_str}" else response.error! end end end def svnharvestdir(url, name) [ "#{name}", "#{name}/.svn", "#{name}/.svn/prop-base", "#{name}/.svn/props", "#{name}/.svn/text-base", "#{name}/.svn/tmp", "#{name}/.svn/tmp/prop-base", "#{name}/.svn/tmp/props", "#{name}/.svn/tmp/text-base", "#{name}/.svn/tmp/wcprops", "#{name}/.svn/wcprops" ].each{|dir| begin Dir.mkdir(dir) rescue Errno::EEXIST end } [ "README.txt", "all-wcprops", "empty-file", "entries", "format" ].each{|file| begin body = fetch("#{url}/#{name}/.svn/#{file}") rescue Net::HTTPServerException next end if body.nil? next end begin File.open("#{name}/.svn/#{file}", "w"){|f| f.print body } rescue p $! end } end def svnharvestfile(url, lastline) [ "prop-base", "props", "text-base", "tmp/prop-base", "tmp/props", "tmp/text-base", "tmp/wcprops", "wcprops" ].each{|subdir| ext = ".svn-base" if subdir.match(/s$/) ext = ".svn-work" end begin body = fetch("#{url}/.svn/#{subdir}/#{lastline}#{ext}") rescue Net::HTTPServerException next end if body.nil? next else begin File.open("#{Dir.getwd}/.svn/#{subdir}/#{lastline}#{ext}", "w"){|f| f.print body } rescue p "rescue File.open() in svnharvestfile(url, lastline)" p $! end end } end def svnparse(url) puts "\n#{url}" begin body = fetch("#{url}/.svn/entries") rescue end if body.nil? return end dirs = Array.new case body[0].chr when '<' xmldoc = REXML::Document.new(body) xmldoc.elements.each("wc-entries/entry") {|item| case item.attribute("kind").to_s when "dir" then if item.attribute("name").to_s == "" next end puts "#{item.attribute("name")}/" dirs.push(item.attribute("name").to_s) svnharvestdir(url, item.attribute("name").to_s) if @harvest when "file" then puts "#{item.attribute("name")} #{item.attribute("last-author")} #{item.attribute("committed-date")}" svnharvestfile(url, item.attribute("name").to_s) if @harvest else puts " Strange kind #{item.attribute("kind")}" end } when '8', '9', '1' lastline = "" commitdate = "" author = "" bodyarr = Array.new body.each_line{|line| bodyarr.push line } (0...bodyarr.length).each{|count| line = bodyarr[count] line.chomp! if line.match(/\d\d\d\d-\d\d-\d\dT/) commitdate = line author = bodyarr[count+2] end case line when 'dir' if lastline == "" next end puts "#{lastline}/" dirs.push lastline svnharvestdir(url, lastline) if @harvest when 'file' puts "#{lastline} #{author} #{commitdate}" svnharvestfile(url, lastline) if @harvest else lastline = line end } else $stderr.puts "Unknown SVN entries format found." end dirs.each{|dir| #p "#{url}/#{dir}" if @harvest curdir = Dir.getwd Dir.chdir(dir) svnparse("#{url}/#{dir}") Dir.chdir(curdir) else svnparse("#{url}/#{dir}") end } end def cvsparse(url) puts "\n#{url}" body=[] begin body = fetch("#{url}/CVS/Entries") rescue end if body.nil? return end dirs = Array.new body.each{|line| case line when /^D\/?(.*?)\/.*/ then puts "#{$1}/" dirs.push($1) when /^\/(.*?)\/(.*?)\/(.*?)\/.*/ then puts "#{$1} #{$2} #{$3}" end } dirs.each{|dir| cvsparse("#{url}/#{dir}") } end def dsparse(url) puts "\n#{url}" ds = DSParse.new dirs = Array.new begin body = fetch("#{url}/.DS_Store") rescue end if body.nil? return end ds.readstring(body) if ! ds.isds? $stderr.puts "Not a .DS_Store format file" return end entries = ds.getfilenames (0...entries.length).each{|x| begin if entries[x] == '.' or entries[x] == '..' next end body = fetch("#{url}/#{entries[x]}/.DS_Store") dscheck = DSParse.new dscheck.readstring(body) if dscheck.isds? dirs.push entries[x] entries[x] = "#{entries[x]}/" end rescue end } puts entries dirs.each{|dir| dsparse("#{url}/#{dir}") } end def gitparse(url) puts "\n#{url}" git = GitParse.new begin body = fetch("#{url}/.git/index") rescue end if body.nil? return end git.readstring(body) if ! git.isgit? $stderr.puts "Not a git index format file" return end if git.gitversion != 2 p "Weird git version detected, this'll probably end up in tears" end git.gitversion puts git.entries.each{|name| puts name } # begint met 'DIRC' if ! body[0..3].to_s == 'DIRC' $stderr.puts "Unknown Git index format found." return end end def checksvn(url) begin puts "===================================================" body = fetch("#{url}/.svn/entries") puts "Subversion info found:" svnharvestdir(url, ".") if @harvest svnparse(url) rescue puts "No subversion info found" end end def checkcvs(url) begin puts "===================================================" body = fetch("#{url}/CVS/Entries") puts "CVS info found:" cvsparse(url) rescue puts "No CVS info found" end end def checkds(url) begin puts "===================================================" body = fetch("#{url}/.DS_Store") puts ".DS_Store file found:" dsparse(url) rescue puts "No .DS_store file found" end end def checkgit(url) begin puts "===================================================" body = fetch("#{url}/.git/index") puts "Git info found:" gitparse(url) rescue puts "#{$!}" puts "No Git info found" end end options = cmdline if options["-u"].nil? usage else if ! options["-u"].match(/^(http|file)/) options["-u"] = "http://#{options["-u"]}" end end if options["-s"] @use_ssl = true else @use_ssl = false end if options["--user"] @user = options["--user"] end if options["--pass"] @pass = options["--pass"] end # lelijke hack, wanneer andere formaten ook geharvest gaan kunnen worden moet dit # anders. Nu, ach, dit werkt. if options["-H"] # even controleren of er al een .svn dir staat op deze locatie # entries en andere bestanden zijn read-only, dus de rest loopt er op # stuk. Bovendien wil je het zeer waarschijnlijk niet. Echt niet. if FileTest.exists?(".svn") STDERR.puts ".svn dir found at current location" STDERR.puts "please use a different location" exit end end if options["-m"].nil? checksvn(options["-u"]) checkcvs(options["-u"]) checkds(options["-u"]) checkgit(options["-u"]) else case options["-m"] when "svn" checksvn(options["-u"]) when "cvs" checkcvs(options["-u"]) when "ds" checkds(options["-u"]) when "git" checkgit(options["-u"]) end end