major update

- use strict;
- bunch more subroutines
- import mp3md5 stuff
- add help
This commit is contained in:
Ward Wouts 2003-01-23 22:02:20 +00:00
parent 9f1d2be8d6
commit ab452d27df

View file

@ -6,41 +6,60 @@
# Generates a list of files in cur. dir which are actually the same # Generates a list of files in cur. dir which are actually the same
# Compares size and MD5 checksum # Compares size and MD5 checksum
# Handy for cleaning up pictures like this: # Handy for cleaning up pictures like this:
# list_same|cut -f 1 -d " "|xargs rm # list_same -es | xargs rm
# wishlist: # wishlist:
# - entering a list of file to check on the commandline # - entering a list of file to check on the commandline
# - include escapes in output (spaces, brackets, etc) (optional)
# - option to ignore 0 length files # - option to ignore 0 length files
use strict;
# Changelog:
# 2001-08-30:
# _much_ saner now. only calculate md5s if sizes of
# files are the same
# nearly complete rewrite
# 2001-08-29:
# use MD5; instead of a shell call to md5
# some cleaning of code
use Digest::MD5; use Digest::MD5;
use Getopt::Std; use Getopt::Std;
getopts('es'); my ( $opt_e, $opt_h, $opt_s );
my %opts = ();
getopts('ehms', \%opts);
if ( $opts{h} ) { &help; }
my @filelist = &getdir;
if ( $opts{m} ) {
&mp3_comp(@filelist);
} else {
my %sizes = &get_sizes(@filelist);
&quick_comp(%sizes);
}
#################################
# functions
sub getdir {
my ( $file, @filelist );
opendir(DIR, ".") or die "can't open . $!"; opendir(DIR, ".") or die "can't open . $!";
while (defined($file = readdir(DIR))) { while (defined($file = readdir(DIR))) {
if ( -f $file ) {
push @filelist, $file; push @filelist, $file;
} }
}
closedir(DIR); closedir(DIR);
return @filelist;
}
sub get_sizes(@) {
my @filelist = @_;
my ( @stat, %sizes );
foreach (@filelist) { foreach (@filelist) {
if ( -f $_ ) { @stat = stat $_;
push @{$sizes{&get_size($_)}}, $_; push @{$sizes{$stat[7]}}, $_;
} }
return %sizes;
} }
$start = $opt_s ? 1 : 0; sub quick_comp(%) {
my %sizes = @_;
my ( $size, %md5s );
foreach $size (keys %sizes) { foreach $size (keys %sizes) {
if (@{$sizes{$size}} > 1) { if (@{$sizes{$size}} > 1) {
@ -48,6 +67,95 @@ foreach $size (keys %sizes) {
foreach (@{$sizes{$size}}) { foreach (@{$sizes{$size}}) {
push @{$md5s{&calc_md5($_)}}, $_; push @{$md5s{&calc_md5($_)}}, $_;
} }
&output_doubles(%md5s);
}
}
}
# same md5 calculation i use in mv_wrap
sub calc_md5($) {
my ($file, $digest, $md5, $FILE);
$file = shift;
$md5 = Digest::MD5->new;
open $FILE, "<$file" or die "couldn't open file: $!\n";
seek($FILE, 0, 0);
$md5->reset;
$md5->addfile($FILE);
$digest = $md5->hexdigest;
close($FILE);
return $digest;
}
sub mp3_comp(@) {
my @filelist = @_;
my %md5s = ();
foreach ( @filelist ) {
push @{$md5s{&calc_mp3md5($_)}}, $_;
}
&output_doubles(%md5s);
}
sub calc_mp3md5($) {
my $file = shift;
my ($fh, $off, $size);
my $buf = 4096*1024;
open($fh, "<$file") or die "Couldn't open file: $!\n";
binmode($fh);
seek $fh, 0, 2; # go to end of file
my $eof = tell $fh;
if ($size = &has_v1_tag($fh)) {
$eof -= $size;
}
$off = 0;
if ($size = &has_v2_tag($fh)) {
$off = $size;
}
my $md5 = Digest::MD5->new;
while ($off < $eof) {
seek $fh, $off, 0;
if ($buf > ($eof - $off)) { $buf = $eof - $off; }
read $fh, my($bytes), $buf;
$md5->add($bytes);
$off += $buf;
}
close $fh;
return $md5->hexdigest;
}
sub has_v1_tag {
my $fh = shift;
seek $fh, -128, 2;
if (<$fh> =~ /^TAG/) {
return 128;
}
return 0;
}
sub has_v2_tag {
my $fh = shift;
my ($head, @bytes);
my $tagsize = 10;
seek $fh, 0, 0;
read $fh, $head, 3;
if ($head =~ /^ID3/) {
read $fh, $head, 3; # skip 3 bytes
read $fh, $head, 4;
@bytes = reverse unpack 'C4', $head;
foreach (0 .. 3) {
$tagsize += $bytes[$_] * 128 ** $_;
}
return $tagsize;
}
return 0;
}
sub output_doubles(@) {
my %md5s = @_;
my ( $key, @files, $i );
my $start = $opts{s} ? 1 : 0;
foreach $key (keys %md5s) { foreach $key (keys %md5s) {
if ( @{$md5s{$key}} > 1 ) { if ( @{$md5s{$key}} > 1 ) {
@files = sort @{$md5s{$key}}; @files = sort @{$md5s{$key}};
@ -55,40 +163,15 @@ foreach $size (keys %sizes) {
chomp $files[$i]; chomp $files[$i];
&output("$files[$i]\n"); &output("$files[$i]\n");
} }
unless($opt_s) { print "\n"; } unless($opts{s}) { print "\n"; }
} }
} }
} }
}
#################################
# functions
sub get_size {
my $file = shift;
my @stat;
@stat = stat $file;
return $stat[7];
}
# same md5 calculation i use in mv_wrap
sub calc_md5($) {
my ($file, $digest);
$file = shift;
$md5 = Digest::MD5->new;
open FILE, "<$file" or die "couldn't open file: $!\n";
seek(FILE, 0, 0);
$md5->reset;
$md5->addfile(FILE);
$digest = $md5->hexdigest;
close(FILE);
return $digest;
}
# escape output if necessary # escape output if necessary
sub output($) { sub output($) {
my $string = shift; my $string = shift;
if ($opt_e) { if ($opts{e}) {
$string =~ s/\\/\\\\/g; $string =~ s/\\/\\\\/g;
$string =~ s/ /\\ /g; $string =~ s/ /\\ /g;
$string =~ s/'/\\'/g; $string =~ s/'/\\'/g;
@ -101,5 +184,22 @@ sub output($) {
} else { } else {
print "$string"; print "$string";
} }
$opt_e = $opt_e;
} }
sub help {
my $name = $0;
$name =~ s/.*\///;
print <<EOF;
Usage: $name [OPTION] ...
-e escape output filenames with backslashes
-h display this help message
-m mp3 compare, ignores ID3 tags (slow)
-s skip the first entry for doubles
EOF
exit;
}