publicscripts/list_same/list_same
2002-11-14 12:14:13 +00:00

87 lines
1.7 KiB
Perl
Executable file

#!/usr/bin/perl -w
# $Id$
# $Source$
# Generates a list of files in cur. dir which are actually the same
# Compares size and MD5 checksum
# Handy for cleaning up pictures like this:
# list_same|cut -f 1 -d " "|xargs rm
# wishlist:
# - entering a list of file to check on the commandline
# - include escapes in output (spaces, brackets, etc) (optional)
# - option to ignore 0 length files
# Changelog:
# 2001-08-30:
# _much_ saner now. only calculate md5s if sizes of
# files are the same
# nearly complete rewrite
# 2001-08-29:
# use MD5; instead of a shell call to md5
# some cleaning of code
use Digest::MD5;
use Getopt::Std;
getopts('s');
opendir(DIR, ".") or die "can't open . $!";
while (defined($file = readdir(DIR))) {
push @filelist, $file;
}
closedir(DIR);
foreach (@filelist) {
if ( -f $_ ) {
push @{$sizes{&get_size($_)}}, $_;
}
}
foreach $size (keys %sizes) {
if (@{$sizes{$size}} > 1) {
%md5s = ();
foreach (@{$sizes{$size}}) {
push @{$md5s{&calc_md5($_)}}, $_;
}
foreach $key (keys %md5s) {
if ((@{$md5s{$key}} > 1) and !$opt_s ) {
foreach (@{$md5s{$key}}) { print "$_ "; }
print "\n";
}
elsif ((@{$md5s{$key}} > 1) and $opt_s ) {
@files = sort @{$md5s{$key}};
for $i (1 .. $#files) {
chomp $files[$i];
print "$files[$i]\n";
}
}
}
}
}
#################################
# functions
sub get_size {
my $file = shift;
my @stat;
@stat = stat $file;
return $stat[7];
}
# same md5 calculation i use in mv_wrap
sub calc_md5($) {
my ($file, $digest);
$file = shift;
$md5 = Digest::MD5->new;
open FILE, "<$file" or die "couldn't open file: $!\n";
seek(FILE, 0, 0);
$md5->reset;
$md5->addfile(FILE);
$digest = $md5->hexdigest;
close(FILE);
return $digest;
}