From b9bfa6e1ddc1f2ceaed14ac4c4c7782713b40fda Mon Sep 17 00:00:00 2001 From: Ward Wouts Date: Thu, 13 Aug 2015 13:24:44 +0000 Subject: [PATCH] nieuwe vlaggetjes --- list_same/list_same | 87 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 67 insertions(+), 20 deletions(-) diff --git a/list_same/list_same b/list_same/list_same index d7e467d..78a2774 100755 --- a/list_same/list_same +++ b/list_same/list_same @@ -1,8 +1,5 @@ #!/usr/bin/perl -w -# $Id$ -# $Source$ - # Generates a list of files in cur. dir which are actually the same # Compares size and MD5 checksum # Handy for cleaning up pictures like this: @@ -20,26 +17,42 @@ use Getopt::Std; my %opts = (); my @dirs; +my @files; &cmdline; my @filelist = &getdir; +push @filelist, @files; + if ( $opts{m} ) { &mp3_comp(@filelist); } else { - my %sizes = &get_sizes(@filelist); - &quick_comp(%sizes); + if ( $opts{f} ) { + &get_md5s(@filelist); + } else { + my %sizes = &get_sizes(@filelist); + &quick_comp(%sizes); + } } ################################# # functions sub cmdline { - getopts('dehlms', \%opts); + getopts('adefhlms', \%opts); if ( $opts{h} ) { &help; } - map { if ( -d $_ ) { s/\/$//; push @dirs, $_; } else { die "$_ not a directory" } } @ARGV; - if ( scalar @dirs == 0 ) { + map { if ( -d $_ ) { + s/\/$//; + push @dirs, $_; + } else { + if ( -f $_ ) { + push @files, $_; + } else { + die "$_ not a directory or file" + } + } } @ARGV; + if ( scalar @dirs == 0 && scalar @files == 0 ) { push @dirs, "."; } @@ -47,7 +60,6 @@ sub cmdline { print "-l doens't combine with -d, -e or -s\n\n"; &help; } -# map { print "$_\n"; } @dirs; } sub getdir { @@ -75,6 +87,14 @@ sub get_sizes(@) { return %sizes; } +sub get_md5s(@) { + my %md5s; + foreach (@_) { + push @{$md5s{&calc_md5($_)}}, $_; + } + &output_doubles(%md5s); +} + sub quick_comp(%) { my %sizes = @_; my ( $size, %md5s ); @@ -93,15 +113,34 @@ sub quick_comp(%) { # same md5 calculation i use in mv_wrap sub calc_md5($) { my $file = shift; - my ( $digest, $md5, $FILE ); + my ( $digest, $fh, $off, $startoff ); + my $buf = 4096*1024; + my $md5 = Digest::MD5->new; $md5 = Digest::MD5->new; - open $FILE, "<$file" or die "couldn't open file: $!\n"; - seek($FILE, 0, 0); - $md5->reset; - $md5->addfile($FILE); - $digest = $md5->hexdigest; - close($FILE); - return $digest; + open($fh, "<$file") or die "Couldn't open file: $!\n"; + binmode($fh); + seek $fh, 0, 2; # go to end of file + my $eof = tell $fh; + + seek($fh, 0, 0); + if ( $opts{f} ) { + readline($fh); + $off = tell $fh; + } else { + $off = 0; + } + + while ($off < $eof) { + seek $fh, $off, 0; + if ($buf > ($eof - $off)) { $buf = $eof - $off; } + read $fh, my($bytes), $buf; + $md5->add($bytes); + $off += $buf; + } + close $fh; + return $md5->hexdigest; + + } sub mp3_comp(@) { @@ -171,12 +210,18 @@ sub has_v2_tag { sub output_doubles(@) { my %md5s = @_; - my ( $key, @files, $i ); + my ( $key, @files, $i, $numkeys ); my $start = $opts{s} ? 1 : 0; + if ( $opts{a} ) { $numkeys = 0; } else { $numkeys = 1; } + foreach $key (keys %md5s) { - if ( @{$md5s{$key}} > 1 ) { - @files = sort dirsort @{$md5s{$key}}; + if ( @{$md5s{$key}} > $numkeys ) { + if ( scalar @dirs > 0 ) { + @files = sort dirsort @{$md5s{$key}}; + } else { + @files = @{$md5s{$key}}; + } for $i ($start .. $#files) { chomp $files[$i]; if ($opts{d}) { @@ -253,8 +298,10 @@ sub help { Usage: $name [OPTION] ... + -a output all files -d delete resulting files -e escape output filenames with backslashes + -f skip first line -h display this help message -l hardlink resulting files (no change if on different filesystems)