From ddbd3b5eb2b17434dc02af398372abd7684b5955 Mon Sep 17 00:00:00 2001 From: Ward Wouts Date: Thu, 30 Aug 2001 09:51:04 +0000 Subject: [PATCH] almost complete rewrite. much saner/faster now --- list_same/list_same | 61 ++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/list_same/list_same b/list_same/list_same index 8aafbab..dacfb65 100755 --- a/list_same/list_same +++ b/list_same/list_same @@ -5,12 +5,15 @@ # Handy for cleaning up pictures like this: # list_same|cut -f 1 -d " "|xargs rm -# This still sucks big time! -# Can it handle directories at all? -# Why does it bother with calculating md5 checksums of everything, -# instead of just the files that have the same size? +# wishlist: +# - entering a list of file to check on the commandline + # Changelog: +# 2001-08-30: +# _much_ saner now. only calculate md5s if sizes of +# files are the same +# nearly complete rewrite # 2001-08-29: # use MD5; instead of a shell call to md5 # some cleaning of code @@ -19,42 +22,42 @@ use MD5; opendir(DIR, ".") or die "can't open . $!"; while (defined($file = readdir(DIR))) { - &get_finfo; + push @filelist, $file; } closedir(DIR); -$old_md5=""; -$old_size=""; -$old_name=""; +foreach (@filelist) { + if ( -f $_ ) { + push @{$sizes{&get_size($_)}}, $_; + } +} -foreach $line (sort(@file_lijstje)) { -# print $line; - $line .= "\n"; - $line =~ /(.*?)\s(.*?)\s(.*)/; - $md5 = $1; $size = $2; $name = $3; - if (($old_md5 eq $md5) && ($old_size eq $size)) { - print "$old_name $name\n"; - } else { - $old_md5=$md5; - $old_size=$size; - $old_name=$name; +foreach (keys %sizes) { + if (@{$sizes{$_}} > 1) { + %md5s = (); + foreach (@{$sizes{$_}}) { + push @{$md5s{&calc_md5($_)}}, $_; + } + foreach (keys %md5s) { + if (@{$md5s{$_}} > 1) { + foreach (@{$md5s{$_}}) { print "$_ "; } + print "\n"; + } + } } } ################################# -# dain bread functions +# functions -sub get_finfo() { - my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks); - my $line; - if (($file ne ".")&&($file ne "..")) { - ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, - $atime,$mtime,$ctime,$blksize,$blocks) = stat $file; - $line = &calc_md5($file); - push @file_lijstje, "$line $size $file"; - } +sub get_size { + my $file = shift; + my @stat; + @stat = stat $file; + return $stat[7]; } +# same md5 calculation i use in mv_wrap sub calc_md5($file) { my ($file, $digest); $file = shift;