publicscripts/listversioned/listversioned.rb
2011-05-20 13:43:46 +00:00

670 lines
14 KiB
Ruby
Executable file

#!/usr/bin/env ruby
# $Id$
# $URL$
require 'net/https'
require 'uri'
require 'rexml/document'
require 'date'
require 'getoptlong'
class DSParse
# info over DS_Store files is te vinden op https://wiki.mozilla.org/DS_Store_File_Format
# iemand heeft ooit ook een 'dsdump.c' programma geschreven wat wel wat info bevat
def initialize
@store = Array.new
end
def arr2long(arr)
return (arr[0]<<24) + (arr[1]<<16) + (arr[2]<<8) + arr[3]
end
def arr2string(arr)
string = ""
(0...arr.length).step(2){|i|
string += ((arr[i]<<8) + (arr[i+1])).chr
}
return string
end
def readfile(filename)
@store = Array.new
File.open(filename).each_byte{|byte|
@store.push byte
}
end
def readstring(string)
@store = Array.new
string.each_byte{|byte|
@store.push byte
}
end
def isds?
# 00 00 00 01 42 75 64 31
# kan vast netter, don't care
@store[0] == 0x00 && @store[1] == 0x00 && @store[2] == 0x00 && @store[3] == 0x01 &&
@store[4] == 0x42 && @store[5] == 0x75 && @store[6] == 0x64 && @store[7] == 0x31
end
def getfilenames
filenames = Array.new
counter = 0
offset = arr2long(@store[0x14, 4])
offset &= ~15 # tim zegt dat dit de laatste zoveel bits op 0 zet.
structcount = arr2long(@store[offset+8, 4])
pointer = offset+12
(0...structcount).each{|structcounter|
objnamelength = arr2long(@store[pointer, 4])
pointer+=4
objnamebuf = @store[pointer, objnamelength*2]
pointer+=objnamelength*2
filenames.push arr2string(objnamebuf)
objtag = @store[pointer, 4].map{|x| x.chr}.join('')
pointer+=4
objtype = @store[pointer, 4].map{|x| x.chr}.join('')
pointer+=4
datasize = 0
case objtype
when 'bool'
datasize = 1
when 'shor', 'long', 'type'
datasize = 4
when 'ustr'
datasize = arr2long(@store[pointer, 4])
pointer += 4
datasize *= 2
when 'blob'
datasize = arr2long(@store[pointer, 4])
pointer += 4
else
puts "Unrecognized data type"
end
if datasize > 0
data = @store[pointer, datasize]
pointer += datasize
end
}
filenames.uniq!
return filenames
end
end # class DSParse
class GitParse
# info over Git index files is te vinden op http://git.rsbx.net/Documents/Git_Data_Formats.txt
# voorspelbare bestanden:
#
# FETCH_HEAD
# HEAD
# ORIG_HEAD
# branches/
# config
# description
# hooks/
# hooks/applypatch-msg
# hooks/commit-msg
# hooks/post-commit
# hooks/post-receive
# hooks/post-update
# hooks/pre-applypatch
# hooks/pre-commit
# hooks/pre-rebase
# hooks/prepare-commit-msg
# hooks/update
# index
# info/
# info/exclude
# logs/
# logs/HEAD
# logs/heads/
# logs/heads/master
# logs/remotes/
# logs/remotes/origin/
# logs/remotes/origin/master
# objects/ # hieronder grotendeels (zoniet helemaal) te halen uit 'index' (zie statinfo() notities)
# objects/info/
# objects/pack/ # hieronder ook weer te halen uit iets anders
# packed-refs
# refs/
# refs/heads/
# refs/heads/master
# refs/remotes/
# refs/remotes/origin/
# refs/remotes/origin/HEAD
# refs/remotes/origin/master
# refs/tags/ # hieronder ook weer te halen uit iets anders
def initialize
@store = Array.new
end
def arr2long(arr)
return (arr[0]<<24) + (arr[1]<<16) + (arr[2]<<8) + arr[3]
end
def arr2string(arr)
string = ""
(0...arr.length).step{|i|
string += (arr[i]).chr
}
return string
end
def arr2hexstring(arr)
string = ""
(0...arr.length).step{|i|
string += ("%02x" % arr[i])
}
return string
end
def readfile(filename)
@store = Array.new
File.open(filename).each_byte{|byte|
@store.push byte
}
end
def readstring(string)
@store = Array.new
string.each_byte{|byte|
@store.push byte
}
end
def isgit?
# 00 00 00 01 42 75 64 31
# kan vast netter, don't care
@store[0] == 0x44 && @store[1] == 0x49 && @store[2] == 0x52 && @store[3] == 0x43
end
def gitversion
version = arr2long(@store[4, 4])
end
def entrycount
count = arr2long(@store[8, 4])
end
def entries
filenames= Array.new
# eerste heeft altijd een offset van 12
offset = 12
(0..entrycount).each{
name, offset = statinfo(offset)
filenames.push name
}
filenames
end
def statinfo(offset)
# ctime = arr2long(@store[offset, 8])
# mtime = arr2long(@store[offset+8, 8])
# dev = arr2long(@store[offset+16, 4])
# inode = arr2long(@store[offset+20, 4])
# mode = arr2long(@store[offset+24, 4])
# uid = arr2long(@store[offset+28, 4])
# gid = arr2long(@store[offset+32, 4])
# size = arr2long(@store[offset+36, 4])
#p uid
#p gid
#p size
# entryid = arr2long(@store[offset+40, 4]) # hoe groot is zo'n object ID?
# The object ID, or "name", of an object is
# _sha-1_digest_( <OBJECT_HEADER> <object_CONTENTS> ).
# random sha1 voorbeeld: "42796666f0c5748b943956f367907ce6d9f42654"
# wikipedia zegt 160 bits, ofwel 20 bytes
entryid = arr2hexstring(@store[offset+40, 20])
p entryid
# opvallend: bestandsnamen onder .git/objects/??/* zijn 38 tekens lang
# de dir lijkt dan inderdaad de eerste 2 tekens van deze entryid te zijn!
# de naam de rest. Maar het bestaat niet bij alle entries!
# entryflags = arr2long(@store[offset+60, 2])
i = offset+62
name = ""
while true
if @store[i] == 0x00
i += 1
break
else
name += @store[i].chr
end
i += 1
end
cl = i-offset
plus = cl
if cl.modulo(8) != 0
plus += 8 - cl.modulo(8)
end
nextoffset = offset + plus
p name
return name, nextoffset
end
end # class GitParse
def usage
puts <<EOT
Usage: #{$0.sub(/.*\//, "")} [options] -u <baseurl>
-h, --help show this message
-H harvest subversion repositories. Tip: svn revert $(svn st|sed 's/^!//')
-m <svn|cvs|git|ds> check for one of subversion, CVS, git or .DS_Store (default: all)
-u <baseurl> set baseurl
-s use ssl
EOT
exit
end
def cmdline
options = Hash.new
begin
opts = GetoptLong.new(
[ "-h", "--help", GetoptLong::NO_ARGUMENT ],
[ "-H", GetoptLong::NO_ARGUMENT ],
[ "-m", GetoptLong::REQUIRED_ARGUMENT ],
[ "-u", GetoptLong::REQUIRED_ARGUMENT ],
[ "-s", GetoptLong::NO_ARGUMENT ]
)
opts.quiet=true
opts.each do |opt, arg|
options[opt] = arg
end
rescue
print "#{$!}\n"
usage
end
if options["-h"]
usage
end
@harvest = options["-H"]
return options
end
def fetch(uri_str, limit = 10)
# You should choose better exception.
raise ArgumentError, 'HTTP redirect too deep' if limit == 0
if URI.parse(uri_str).scheme.to_s == "file"
response = ""
if FileTest.exists?(URI.parse(uri_str).path)
File.open(URI.parse(uri_str).path).each_line{|line|
response += line
}
else
raise "Not found"
end
response
else
host = URI.parse(uri_str).host
port = URI.parse(uri_str).port
path = URI.parse(uri_str).path
query = URI.parse(uri_str).query
http = Net::HTTP.new(host, port)
http.use_ssl = @use_ssl
if query
req = Net::HTTP::Get.new("#{URI.escape(path)}?#{URI.escape(query)}")
else
req = Net::HTTP::Get.new("#{URI.escape(path)}")
end
req.basic_auth @user, @pass
response = http.request(req)
case response
when Net::HTTPSuccess then response.body
when Net::HTTPRedirection then fetch(response['location'], limit - 1)
when Net::HTTPUnauthorized then puts "401 Authorization Required #{uri_str}"
when Net::HTTPUnauthorized then puts "401 Authorization Required #{uri_str}"
when Net::HTTPForbidden then puts "403 Forbidden #{uri_str}"
# when Net::HTTPNotFound then puts "404 Not Found #{uri_str}"
else
response.error!
end
end
end
def svnharvestdir(url, name)
[ "#{name}",
"#{name}/.svn",
"#{name}/.svn/prop-base",
"#{name}/.svn/props",
"#{name}/.svn/text-base",
"#{name}/.svn/tmp",
"#{name}/.svn/tmp/prop-base",
"#{name}/.svn/tmp/props",
"#{name}/.svn/tmp/text-base",
"#{name}/.svn/tmp/wcprops",
"#{name}/.svn/wcprops" ].each{|dir|
begin
Dir.mkdir(dir)
rescue Errno::EEXIST
end
}
[ "README.txt", "all-wcprops", "empty-file", "entries", "format" ].each{|file|
begin
body = fetch("#{url}/#{name}/.svn/#{file}")
rescue Net::HTTPServerException
next
end
if body.nil?
next
end
begin
File.open("#{name}/.svn/#{file}", "w"){|f|
f.print body
}
rescue
p $!
end
}
end
def svnharvestfile(url, lastline)
[ "prop-base", "props", "text-base", "tmp/prop-base", "tmp/props", "tmp/text-base", "tmp/wcprops", "wcprops" ].each{|subdir|
ext = ".svn-base"
if subdir.match(/s$/)
ext = ".svn-work"
end
begin
body = fetch("#{url}/.svn/#{subdir}/#{lastline}#{ext}")
rescue Net::HTTPServerException
next
end
if body.nil?
next
else
begin
File.open("#{Dir.getwd}/.svn/#{subdir}/#{lastline}#{ext}", "w"){|f|
f.print body
}
rescue
p "rescue File.open() in svnharvestfile(url, lastline)"
p $!
end
end
}
end
def svnparse(url)
puts "\n#{url}"
begin
body = fetch("#{url}/.svn/entries")
rescue
end
if body.nil?
return
end
dirs = Array.new
case body[0].chr
when '<'
xmldoc = REXML::Document.new(body)
xmldoc.elements.each("wc-entries/entry") {|item|
case item.attribute("kind").to_s
when "dir" then
if item.attribute("name").to_s == ""
next
end
puts "#{item.attribute("name")}/"
dirs.push(item.attribute("name").to_s)
svnharvestdir(url, item.attribute("name").to_s) if @harvest
when "file" then
puts "#{item.attribute("name")} #{item.attribute("last-author")} #{item.attribute("committed-date")}"
svnharvestfile(url, item.attribute("name").to_s) if @harvest
else
puts " Strange kind #{item.attribute("kind")}"
end
}
when '8', '9', '1'
lastline = ""
commitdate = ""
author = ""
bodyarr = Array.new
body.each_line{|line|
bodyarr.push line
}
(0...bodyarr.length).each{|count|
line = bodyarr[count]
line.chomp!
if line.match(/\d\d\d\d-\d\d-\d\dT/)
commitdate = line
author = bodyarr[count+2]
end
case line
when 'dir'
if lastline == ""
next
end
puts "#{lastline}/"
dirs.push lastline
svnharvestdir(url, lastline) if @harvest
when 'file'
puts "#{lastline} #{author} #{commitdate}"
svnharvestfile(url, lastline) if @harvest
else
lastline = line
end
}
else
$stderr.puts "Unknown SVN entries format found."
end
dirs.each{|dir|
#p "#{url}/#{dir}"
if @harvest
curdir = Dir.getwd
Dir.chdir(dir)
svnparse("#{url}/#{dir}")
Dir.chdir(curdir)
else
svnparse("#{url}/#{dir}")
end
}
end
def cvsparse(url)
puts "\n#{url}"
body=[]
begin
body = fetch("#{url}/CVS/Entries")
rescue
end
if body.nil?
return
end
dirs = Array.new
body.each{|line|
case line
when /^D\/?(.*?)\/.*/ then
puts "#{$1}/"
dirs.push($1)
when /^\/(.*?)\/(.*?)\/(.*?)\/.*/ then
puts "#{$1} #{$2} #{$3}"
end
}
dirs.each{|dir|
cvsparse("#{url}/#{dir}")
}
end
def dsparse(url)
puts "\n#{url}"
ds = DSParse.new
dirs = Array.new
begin
body = fetch("#{url}/.DS_Store")
rescue
end
if body.nil?
return
end
ds.readstring(body)
if ! ds.isds?
$stderr.puts "Not a .DS_Store format file"
return
end
entries = ds.getfilenames
(0...entries.length).each{|x|
begin
if entries[x] == '.' or entries[x] == '..'
next
end
body = fetch("#{url}/#{entries[x]}/.DS_Store")
dscheck = DSParse.new
dscheck.readstring(body)
if dscheck.isds?
dirs.push entries[x]
entries[x] = "#{entries[x]}/"
end
rescue
end
}
puts entries
dirs.each{|dir|
dsparse("#{url}/#{dir}")
}
end
def gitparse(url)
puts "\n#{url}"
git = GitParse.new
begin
body = fetch("#{url}/.git/index")
rescue
end
if body.nil?
return
end
git.readstring(body)
if ! git.isgit?
$stderr.puts "Not a git index format file"
return
end
if git.gitversion != 2
p "Weird git version detected, this'll probably end up in tears"
end
git.gitversion
puts
git.entries.each{|name|
puts name
}
# begint met 'DIRC'
if ! body[0..3].to_s == 'DIRC'
$stderr.puts "Unknown Git index format found."
return
end
end
def checksvn(url)
begin
puts "==================================================="
body = fetch("#{url}/.svn/entries")
puts "Subversion info found:"
svnharvestdir(url, ".") if @harvest
svnparse(url)
rescue
puts "No subversion info found"
end
end
def checkcvs(url)
begin
puts "==================================================="
body = fetch("#{url}/CVS/Entries")
puts "CVS info found:"
cvsparse(url)
rescue
puts "No CVS info found"
end
end
def checkds(url)
begin
puts "==================================================="
body = fetch("#{url}/.DS_Store")
puts ".DS_Store file found:"
dsparse(url)
rescue
puts "No .DS_store file found"
end
end
def checkgit(url)
begin
puts "==================================================="
body = fetch("#{url}/.git/index")
puts "Git info found:"
gitparse(url)
rescue
puts "#{$!}"
puts "No Git info found"
end
end
options = cmdline
if options["-u"].nil?
usage
else
if ! options["-u"].match(/^(http|file)/)
options["-u"] = "http://#{options["-u"]}"
end
end
if options["-s"]
@use_ssl = true
else
@use_ssl = false
end
# lelijke hack, wanneer andere formaten ook geharvest gaan kunnen worden moet dit
# anders. Nu, ach, dit werkt.
if options["-H"]
# even controleren of er al een .svn dir staat op deze locatie
# entries en andere bestanden zijn read-only, dus de rest loopt er op
# stuk. Bovendien wil je het zeer waarschijnlijk niet. Echt niet.
if FileTest.exists?(".svn")
STDERR.puts ".svn dir found at current location"
STDERR.puts "please use a different location"
exit
end
end
if options["-m"].nil?
checksvn(options["-u"])
checkcvs(options["-u"])
checkds(options["-u"])
checkgit(options["-u"])
else
case options["-m"]
when "svn"
checksvn(options["-u"])
when "cvs"
checkcvs(options["-u"])
when "ds"
checkds(options["-u"])
when "git"
checkgit(options["-u"])
end
end