major update
- use strict; - bunch more subroutines - import mp3md5 stuff - add help
This commit is contained in:
parent
9f1d2be8d6
commit
ab452d27df
1 changed files with 154 additions and 54 deletions
|
|
@ -6,48 +6,156 @@
|
|||
# Generates a list of files in cur. dir which are actually the same
|
||||
# Compares size and MD5 checksum
|
||||
# Handy for cleaning up pictures like this:
|
||||
# list_same|cut -f 1 -d " "|xargs rm
|
||||
# list_same -es | xargs rm
|
||||
|
||||
# wishlist:
|
||||
# - entering a list of file to check on the commandline
|
||||
# - include escapes in output (spaces, brackets, etc) (optional)
|
||||
# - option to ignore 0 length files
|
||||
|
||||
|
||||
# Changelog:
|
||||
# 2001-08-30:
|
||||
# _much_ saner now. only calculate md5s if sizes of
|
||||
# files are the same
|
||||
# nearly complete rewrite
|
||||
# 2001-08-29:
|
||||
# use MD5; instead of a shell call to md5
|
||||
# some cleaning of code
|
||||
|
||||
use strict;
|
||||
use Digest::MD5;
|
||||
use Getopt::Std;
|
||||
|
||||
getopts('es');
|
||||
my ( $opt_e, $opt_h, $opt_s );
|
||||
my %opts = ();
|
||||
getopts('ehms', \%opts);
|
||||
|
||||
opendir(DIR, ".") or die "can't open . $!";
|
||||
while (defined($file = readdir(DIR))) {
|
||||
if ( $opts{h} ) { &help; }
|
||||
|
||||
my @filelist = &getdir;
|
||||
if ( $opts{m} ) {
|
||||
&mp3_comp(@filelist);
|
||||
} else {
|
||||
my %sizes = &get_sizes(@filelist);
|
||||
&quick_comp(%sizes);
|
||||
}
|
||||
|
||||
|
||||
#################################
|
||||
# functions
|
||||
|
||||
sub getdir {
|
||||
my ( $file, @filelist );
|
||||
opendir(DIR, ".") or die "can't open . $!";
|
||||
while (defined($file = readdir(DIR))) {
|
||||
if ( -f $file ) {
|
||||
push @filelist, $file;
|
||||
}
|
||||
closedir(DIR);
|
||||
|
||||
foreach (@filelist) {
|
||||
if ( -f $_ ) {
|
||||
push @{$sizes{&get_size($_)}}, $_;
|
||||
}
|
||||
}
|
||||
closedir(DIR);
|
||||
return @filelist;
|
||||
}
|
||||
|
||||
$start = $opt_s ? 1 : 0;
|
||||
sub get_sizes(@) {
|
||||
my @filelist = @_;
|
||||
my ( @stat, %sizes );
|
||||
|
||||
foreach $size (keys %sizes) {
|
||||
foreach (@filelist) {
|
||||
@stat = stat $_;
|
||||
push @{$sizes{$stat[7]}}, $_;
|
||||
}
|
||||
return %sizes;
|
||||
}
|
||||
|
||||
sub quick_comp(%) {
|
||||
my %sizes = @_;
|
||||
my ( $size, %md5s );
|
||||
|
||||
foreach $size (keys %sizes) {
|
||||
if (@{$sizes{$size}} > 1) {
|
||||
%md5s = ();
|
||||
foreach (@{$sizes{$size}}) {
|
||||
push @{$md5s{&calc_md5($_)}}, $_;
|
||||
}
|
||||
&output_doubles(%md5s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# same md5 calculation i use in mv_wrap
|
||||
sub calc_md5($) {
|
||||
my ($file, $digest, $md5, $FILE);
|
||||
$file = shift;
|
||||
$md5 = Digest::MD5->new;
|
||||
open $FILE, "<$file" or die "couldn't open file: $!\n";
|
||||
seek($FILE, 0, 0);
|
||||
$md5->reset;
|
||||
$md5->addfile($FILE);
|
||||
$digest = $md5->hexdigest;
|
||||
close($FILE);
|
||||
return $digest;
|
||||
}
|
||||
|
||||
sub mp3_comp(@) {
|
||||
my @filelist = @_;
|
||||
my %md5s = ();
|
||||
foreach ( @filelist ) {
|
||||
push @{$md5s{&calc_mp3md5($_)}}, $_;
|
||||
}
|
||||
&output_doubles(%md5s);
|
||||
}
|
||||
|
||||
sub calc_mp3md5($) {
|
||||
my $file = shift;
|
||||
my ($fh, $off, $size);
|
||||
my $buf = 4096*1024;
|
||||
|
||||
open($fh, "<$file") or die "Couldn't open file: $!\n";
|
||||
binmode($fh);
|
||||
seek $fh, 0, 2; # go to end of file
|
||||
my $eof = tell $fh;
|
||||
if ($size = &has_v1_tag($fh)) {
|
||||
$eof -= $size;
|
||||
}
|
||||
|
||||
$off = 0;
|
||||
if ($size = &has_v2_tag($fh)) {
|
||||
$off = $size;
|
||||
}
|
||||
my $md5 = Digest::MD5->new;
|
||||
while ($off < $eof) {
|
||||
seek $fh, $off, 0;
|
||||
if ($buf > ($eof - $off)) { $buf = $eof - $off; }
|
||||
read $fh, my($bytes), $buf;
|
||||
$md5->add($bytes);
|
||||
$off += $buf;
|
||||
}
|
||||
close $fh;
|
||||
return $md5->hexdigest;
|
||||
}
|
||||
|
||||
sub has_v1_tag {
|
||||
my $fh = shift;
|
||||
seek $fh, -128, 2;
|
||||
if (<$fh> =~ /^TAG/) {
|
||||
return 128;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub has_v2_tag {
|
||||
my $fh = shift;
|
||||
my ($head, @bytes);
|
||||
my $tagsize = 10;
|
||||
seek $fh, 0, 0;
|
||||
read $fh, $head, 3;
|
||||
if ($head =~ /^ID3/) {
|
||||
read $fh, $head, 3; # skip 3 bytes
|
||||
read $fh, $head, 4;
|
||||
@bytes = reverse unpack 'C4', $head;
|
||||
foreach (0 .. 3) {
|
||||
$tagsize += $bytes[$_] * 128 ** $_;
|
||||
}
|
||||
return $tagsize;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub output_doubles(@) {
|
||||
my %md5s = @_;
|
||||
my ( $key, @files, $i );
|
||||
my $start = $opts{s} ? 1 : 0;
|
||||
|
||||
foreach $key (keys %md5s) {
|
||||
if ( @{$md5s{$key}} > 1 ) {
|
||||
@files = sort @{$md5s{$key}};
|
||||
|
|
@ -55,40 +163,15 @@ foreach $size (keys %sizes) {
|
|||
chomp $files[$i];
|
||||
&output("$files[$i]\n");
|
||||
}
|
||||
unless($opt_s) { print "\n"; }
|
||||
unless($opts{s}) { print "\n"; }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#################################
|
||||
# functions
|
||||
|
||||
sub get_size {
|
||||
my $file = shift;
|
||||
my @stat;
|
||||
@stat = stat $file;
|
||||
return $stat[7];
|
||||
}
|
||||
|
||||
# same md5 calculation i use in mv_wrap
|
||||
sub calc_md5($) {
|
||||
my ($file, $digest);
|
||||
$file = shift;
|
||||
$md5 = Digest::MD5->new;
|
||||
open FILE, "<$file" or die "couldn't open file: $!\n";
|
||||
seek(FILE, 0, 0);
|
||||
$md5->reset;
|
||||
$md5->addfile(FILE);
|
||||
$digest = $md5->hexdigest;
|
||||
close(FILE);
|
||||
return $digest;
|
||||
}
|
||||
|
||||
# escape output if necessary
|
||||
sub output($) {
|
||||
my $string = shift;
|
||||
if ($opt_e) {
|
||||
if ($opts{e}) {
|
||||
$string =~ s/\\/\\\\/g;
|
||||
$string =~ s/ /\\ /g;
|
||||
$string =~ s/'/\\'/g;
|
||||
|
|
@ -101,5 +184,22 @@ sub output($) {
|
|||
} else {
|
||||
print "$string";
|
||||
}
|
||||
$opt_e = $opt_e;
|
||||
}
|
||||
|
||||
sub help {
|
||||
my $name = $0;
|
||||
$name =~ s/.*\///;
|
||||
print <<EOF;
|
||||
|
||||
Usage: $name [OPTION] ...
|
||||
|
||||
-e escape output filenames with backslashes
|
||||
-h display this help message
|
||||
-m mp3 compare, ignores ID3 tags (slow)
|
||||
-s skip the first entry for doubles
|
||||
|
||||
EOF
|
||||
exit;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue