publicscripts/list_same/list_same

72 lines
1.3 KiB
Perl
Executable file

#!/usr/bin/perl -w
# Generates a list of files in cur. dir which are actually the same
# Compares size and MD5 checksum
# Handy for cleaning up pictures like this:
# list_same|cut -f 1 -d " "|xargs rm
# wishlist:
# - entering a list of file to check on the commandline
# Changelog:
# 2001-08-30:
# _much_ saner now. only calculate md5s if sizes of
# files are the same
# nearly complete rewrite
# 2001-08-29:
# use MD5; instead of a shell call to md5
# some cleaning of code
use MD5;
opendir(DIR, ".") or die "can't open . $!";
while (defined($file = readdir(DIR))) {
push @filelist, $file;
}
closedir(DIR);
foreach (@filelist) {
if ( -f $_ ) {
push @{$sizes{&get_size($_)}}, $_;
}
}
foreach (keys %sizes) {
if (@{$sizes{$_}} > 1) {
%md5s = ();
foreach (@{$sizes{$_}}) {
push @{$md5s{&calc_md5($_)}}, $_;
}
foreach (keys %md5s) {
if (@{$md5s{$_}} > 1) {
foreach (@{$md5s{$_}}) { print "$_ "; }
print "\n";
}
}
}
}
#################################
# functions
sub get_size {
my $file = shift;
my @stat;
@stat = stat $file;
return $stat[7];
}
# same md5 calculation i use in mv_wrap
sub calc_md5($file) {
my ($file, $digest);
$file = shift;
$md5 = new MD5;
open FILE, "<$file" or die "couldn't open file: $!\n";
seek(FILE, 0, 0);
$md5->reset;
$md5->addfile(FILE);
$digest = $md5->hexdigest;
close(FILE);
return $digest;
}