major update

- use strict;
- bunch more subroutines
- import mp3md5 stuff
- add help
This commit is contained in:
Ward Wouts 2003-01-23 22:02:20 +00:00
parent 9f1d2be8d6
commit ab452d27df

View file

@ -6,89 +6,172 @@
# Generates a list of files in cur. dir which are actually the same # Generates a list of files in cur. dir which are actually the same
# Compares size and MD5 checksum # Compares size and MD5 checksum
# Handy for cleaning up pictures like this: # Handy for cleaning up pictures like this:
# list_same|cut -f 1 -d " "|xargs rm # list_same -es | xargs rm
# wishlist: # wishlist:
# - entering a list of file to check on the commandline # - entering a list of file to check on the commandline
# - include escapes in output (spaces, brackets, etc) (optional)
# - option to ignore 0 length files # - option to ignore 0 length files
use strict;
# Changelog:
# 2001-08-30:
# _much_ saner now. only calculate md5s if sizes of
# files are the same
# nearly complete rewrite
# 2001-08-29:
# use MD5; instead of a shell call to md5
# some cleaning of code
use Digest::MD5; use Digest::MD5;
use Getopt::Std; use Getopt::Std;
getopts('es'); my ( $opt_e, $opt_h, $opt_s );
my %opts = ();
getopts('ehms', \%opts);
opendir(DIR, ".") or die "can't open . $!"; if ( $opts{h} ) { &help; }
while (defined($file = readdir(DIR))) {
push @filelist, $file;
}
closedir(DIR);
foreach (@filelist) { my @filelist = &getdir;
if ( -f $_ ) { if ( $opts{m} ) {
push @{$sizes{&get_size($_)}}, $_; &mp3_comp(@filelist);
} } else {
my %sizes = &get_sizes(@filelist);
&quick_comp(%sizes);
} }
$start = $opt_s ? 1 : 0;
foreach $size (keys %sizes) {
if (@{$sizes{$size}} > 1) {
%md5s = ();
foreach (@{$sizes{$size}}) {
push @{$md5s{&calc_md5($_)}}, $_;
}
foreach $key (keys %md5s) {
if ( @{$md5s{$key}} > 1 ) {
@files = sort @{$md5s{$key}};
for $i ($start .. $#files) {
chomp $files[$i];
&output("$files[$i]\n");
}
unless($opt_s) { print "\n"; }
}
}
}
}
################################# #################################
# functions # functions
sub get_size { sub getdir {
my $file = shift; my ( $file, @filelist );
my @stat; opendir(DIR, ".") or die "can't open . $!";
@stat = stat $file; while (defined($file = readdir(DIR))) {
return $stat[7]; if ( -f $file ) {
push @filelist, $file;
}
}
closedir(DIR);
return @filelist;
}
sub get_sizes(@) {
my @filelist = @_;
my ( @stat, %sizes );
foreach (@filelist) {
@stat = stat $_;
push @{$sizes{$stat[7]}}, $_;
}
return %sizes;
}
sub quick_comp(%) {
my %sizes = @_;
my ( $size, %md5s );
foreach $size (keys %sizes) {
if (@{$sizes{$size}} > 1) {
%md5s = ();
foreach (@{$sizes{$size}}) {
push @{$md5s{&calc_md5($_)}}, $_;
}
&output_doubles(%md5s);
}
}
} }
# same md5 calculation i use in mv_wrap # same md5 calculation i use in mv_wrap
sub calc_md5($) { sub calc_md5($) {
my ($file, $digest); my ($file, $digest, $md5, $FILE);
$file = shift; $file = shift;
$md5 = Digest::MD5->new; $md5 = Digest::MD5->new;
open FILE, "<$file" or die "couldn't open file: $!\n"; open $FILE, "<$file" or die "couldn't open file: $!\n";
seek(FILE, 0, 0); seek($FILE, 0, 0);
$md5->reset; $md5->reset;
$md5->addfile(FILE); $md5->addfile($FILE);
$digest = $md5->hexdigest; $digest = $md5->hexdigest;
close(FILE); close($FILE);
return $digest; return $digest;
} }
sub mp3_comp(@) {
my @filelist = @_;
my %md5s = ();
foreach ( @filelist ) {
push @{$md5s{&calc_mp3md5($_)}}, $_;
}
&output_doubles(%md5s);
}
sub calc_mp3md5($) {
my $file = shift;
my ($fh, $off, $size);
my $buf = 4096*1024;
open($fh, "<$file") or die "Couldn't open file: $!\n";
binmode($fh);
seek $fh, 0, 2; # go to end of file
my $eof = tell $fh;
if ($size = &has_v1_tag($fh)) {
$eof -= $size;
}
$off = 0;
if ($size = &has_v2_tag($fh)) {
$off = $size;
}
my $md5 = Digest::MD5->new;
while ($off < $eof) {
seek $fh, $off, 0;
if ($buf > ($eof - $off)) { $buf = $eof - $off; }
read $fh, my($bytes), $buf;
$md5->add($bytes);
$off += $buf;
}
close $fh;
return $md5->hexdigest;
}
sub has_v1_tag {
my $fh = shift;
seek $fh, -128, 2;
if (<$fh> =~ /^TAG/) {
return 128;
}
return 0;
}
sub has_v2_tag {
my $fh = shift;
my ($head, @bytes);
my $tagsize = 10;
seek $fh, 0, 0;
read $fh, $head, 3;
if ($head =~ /^ID3/) {
read $fh, $head, 3; # skip 3 bytes
read $fh, $head, 4;
@bytes = reverse unpack 'C4', $head;
foreach (0 .. 3) {
$tagsize += $bytes[$_] * 128 ** $_;
}
return $tagsize;
}
return 0;
}
sub output_doubles(@) {
my %md5s = @_;
my ( $key, @files, $i );
my $start = $opts{s} ? 1 : 0;
foreach $key (keys %md5s) {
if ( @{$md5s{$key}} > 1 ) {
@files = sort @{$md5s{$key}};
for $i ($start .. $#files) {
chomp $files[$i];
&output("$files[$i]\n");
}
unless($opts{s}) { print "\n"; }
}
}
}
# escape output if necessary # escape output if necessary
sub output($) { sub output($) {
my $string = shift; my $string = shift;
if ($opt_e) { if ($opts{e}) {
$string =~ s/\\/\\\\/g; $string =~ s/\\/\\\\/g;
$string =~ s/ /\\ /g; $string =~ s/ /\\ /g;
$string =~ s/'/\\'/g; $string =~ s/'/\\'/g;
@ -101,5 +184,22 @@ sub output($) {
} else { } else {
print "$string"; print "$string";
} }
$opt_e = $opt_e;
} }
sub help {
my $name = $0;
$name =~ s/.*\///;
print <<EOF;
Usage: $name [OPTION] ...
-e escape output filenames with backslashes
-h display this help message
-m mp3 compare, ignores ID3 tags (slow)
-s skip the first entry for doubles
EOF
exit;
}