#!/usr/local/bin/perl use File::Basename; use File::Spec; use File::Find::Rule; use Getopt::Std; use Cwd qw(realpath); use POSIX; use Backfire; use strict; my %opt; getopts("vSMEIl:s:o:b:J:", \%opt ); for my $o (qw(l s o)) { usage() unless defined($opt{$o}); } my $lev=$opt{l}; my $out=$opt{o}; my $ul; # Upper limit. my $gtotal=0; # Grand total that we are dealing with. my $tick=0; # Our little counter for stats. my $fc=0; # Number of elements we end up dealing with. my %E; # the HoA of {size}=listOfFiles. my %L; # The outgoing packed list (HoA). my %S; # the file sizes found (temporary). my $nbin=1; # starting number of bins. my %binsize=( 1 => 0 ); # The size of each bin, bin one initialised now. # List of common sizes, in bytes. # These values are intentionally lowballed. my %comsizes = ( dvd => 4689231872, dvddl => 8450348154, cdr80 => 734003200, cdr74 => 680525824, fdd144 => 1471488, fdd288 => 2942976, cf256 => 244140625, cf512 => 488281250 ); # Grab the upper limit from the command line, and convert to a byte-size. if (defined($comsizes{$opt{s}})) { $ul=$comsizes{$opt{s}}; } elsif (($opt{s} =~ /^\d+K$/)||($opt{s} =~ /^\d+\.\d+K$/)) { $ul=$opt{s}; chop($ul); $ul *= 1024; } elsif (($opt{s} =~ /^\d+M$/)||($opt{s} =~ /^\d+\.\d+M$/)) { $ul=$opt{s}; chop($ul); $ul *= 1048756; } elsif (($opt{s} =~ /^\d+G$/)||($opt{s} =~ /^\d+\.\d+G$/)) { $ul=$opt{s}; chop($ul); $ul *= 1073741824; } elsif (($opt{s} =~ /^\d+$/)||($opt{s} =~ /^\d+\.\d+$/)) { $ul = $opt{s}; } else { print STDERR "Invalid volume size specified.\n"; exit(3); } $ul=floor($ul); # if it's not an int, lowball for safety. if (( $opt{l} !~ /^\d+$/ )||( $opt{l} <= 0 )) { print STDERR "Invalid depth specified.\n"; exit(5); } unless (-w dirname($opt{o}) && -x _ ) { print STDERR "Can't write lists to the output directory.\n"; exit(60); } my @files; if (exists($opt{E})) { foreach my $list (@ARGV) { unless (open(LIST, $list)) { print STDERR "I couldn't open $list for reading!\n"; exit(50); } if ($opt{b}) { unless (chdir $opt{b}) { print STDERR "Can't chdir to $opt{b}!\n"; exit(49); } } print STDERR "Searching $list... " if ($opt{v}); while () { chomp; next unless length; my $ent=realpath($_); unless ((defined($ent))&&(length($ent))&&(-e $ent)) { print STDERR "$list: line $.: not found.\n"; exit(48) if (exists($opt{I})); next; } push(@files,$ent); } close(LIST); print STDERR "Done.\n" if ($opt{v}); } } else { foreach my $dir (@ARGV) { if (-d $dir) { unless (chdir($dir)) { print STDERR "Error opening $dir: $!\n"; exit(24); } print STDERR "Searching $dir... " if ($opt{v}); my @filestmp = File::Find::Rule->maxdepth($lev) ->in(getcwd); push(@files,@filestmp); my $foo=@filestmp; print STDERR "done (+$foo).\n" if ($opt{v}); } elsif (-f $dir) { print STDERR "Adding file $dir (+1).\n" if ($opt{v}); push(@files,$dir); } } } my $howmany=@files; # Total number of entries that we are playing with my $count=0; # A counter for preogress when -v. for my $f (@files) { $count++; if (-d $f) { unless (-r $f && -x _) { print STDERR "Unreadable directory found: $f.\n"; exit(4); } my @chsplit=File::Spec->splitdir(getcwd); my @crsplit=File::Spec->splitdir($f); shift @crsplit for (0 .. $#chsplit); next if (scalar(@crsplit) < $lev); } elsif (-f $f && !-r _) { print STDERR "Unreadable file found: $f.\n"; exit(4); } my $f_s=getsize($f); if ($f_s == -1) { print STDERR "Error getting size for: $f.\n"; exit(10); } if ($f_s > $ul) { print STDERR "$f: too big to fit on a volume.\n"; if (defined($opt{S})) { next; } else { print STDERR " Try increasing -l, or use a larger volume, or specify -S to ignore.\n"; exit(1); } } $S{$f_s}=1; $gtotal+=$f_s; if (defined($E{$f_s})) { push(@{$E{$f_s}}, $f); $fc++; } else { $E{$f_s}=[$f]; $fc++; } if ($opt{v} && ($count % 400 == 0 )) { print STDERR "\r[". pctbars(25,($count/$howmany)) . "] $fc of $howmany items sized"; } } undef @files; print STDERR "\n" if ($opt{v}); print STDERR "\r$fc elements (". commify($gtotal) ." bytes).\n" if ($opt{v}); my @SI; @SI=reverse sort{ $a <=> $b } keys %S; # Largest->smallest @SI=reverse(@SI) if (defined($opt{M})); undef %S; # %S is useless now. print STDERR "\n" if ($opt{v}); print STDERR "Estimated ". ceil($gtotal/$ul) ." volumes.\n" if ($opt{v}); bin_fill(); bin_print(); sub bin_fill { my $tbin=1; if ($gtotal <= $ul) { print STDERR "It all fits on a single volume.\n" if ($opt{v}); for my $k (keys %E) { my @aos=@{$E{$k}}; push(@{$L{1}},@aos); # put them all into the only bin. } return; } # place all empty files into the first volume. if ($SI[$#SI] == 0) { @{$L{1}}=@{$E{0}}; # carry it out. $tick += scalar(@{$E{0}}); #update how many we've sorted. delete($E{0}); pop(@SI); # dont work with size 0 files ever again. } my $k=0; # size index to try. 0th (largest files) first. while (@SI) { # Make $k a sane value. It looks wrong but it's not. $k=$#SI if ($k > $#SI)||($k < 0); # I forget what this is for: unless ($SI[$k]) { $k=0; next; } # @aos is the array of files of this size my @aos=sort(@{$E{$SI[$k]}}); my $oldk=$k; if ($tbin > $nbin) { # we are going for a bin that's not made, because the # file is too big for all of them. $nbin++; # create a new bin $tbin=$nbin; # try current size on #1 again $binsize{$nbin}=0; # initialise new bin's size. } # Some sort of sanity check. # Prolly not needed. redo unless ($SI[$k]); if (($SI[$k]+$binsize{$tbin})<=$ul) { # The current size is worthy. Put ONE into %L. push(@{$L{$tbin}}, shift(@aos)); $binsize{$tbin}+=$SI[$k]; # update the size of this bin. if (@aos) { # there's more of these sizes left still, back to %E. @{$E{$SI[$k]}}=@aos; } else { # that was the only one. Delete the arraryref. delete($E{$SI[$k]}); delete($SI[$k]); $k++; # because these sizes are done. } $tick++; # got one stored! $tbin=1; # Try the first bin when we come back # Chat about the good news. if ($opt{v} && ($tick % 200 == 0 )) { print STDERR "\r[" . pctbars(25,($tick/$fc)) . "] $tick ents packed in $nbin volumes"; } } else { # The current size is too big for this bin. # Move to the next bin, retry current size there. $tbin++; } } # End the party if ($opt{v}) { print STDERR "\r$tick elements in $nbin volumes.\n"; } } # end the long, hard algorithm sub bin_print { unless (chdir dirname($opt{o})) { print STDERR "I can't chdir to " . dirname($opt{o}) . ": $!\n"; exit(7); } # XXX: Make cooler filenames in the future. my $prefix=basename($opt{o}); for my $k (sort{ $a <=> $b } keys %L) { my $filename=File::Spec->catfile(getcwd,"$prefix.$k"); if (-e $filename) { print STDERR "Not overwriting $filename\n"; exit(9); } unless (open(OUT, ">".$filename)) { print STDERR "I can't open the output file!: $!\n"; exit(8); } if ($ul < $binsize{$k}) { print STDERR "Volume $k too large. This is a serious bug!\n"; exit(127); } foreach (@{$L{$k}}) { next unless ($_); print OUT $_."\n"; } close OUT; if ($opt{v}) { print STDERR "\n### Volume $k: " . scalar(@{$L{$k}}) . " elements. " . commify($binsize{$k}) . ' bytes used, ' . commify($ul-$binsize{$k}) . ' bytes free (' . sprintf("%.2f", (100*(1-($binsize{$k}/$ul)))) . "%)\n"; } print "$filename\n"; } } sub getsize { my $f=shift; my $s=0; if (-d $f) { for my $cf (File::Find::Rule->in($f)) { return -1 unless (-r $cf); if (-d _) { return -1 unless (-x _); } $s += -s $cf || 0; } } $s += -s $f || 0; return $s; } sub fail { my $str=shift; print STDERR $str . "\n"; exit(1); } sub usage { print < Specifies the size to split into. Can be any of these: dvd,cdr74,cdr80,fdd144,fd288,cf256,cf512 You may also specify a decimal value, with a trailing K,M or G. -o Path to write the lists to. -l Maximum depth to do the splitting. Specify a huge value to disable this feature. .. Optional: -v Output some playful stats on the way. -E Read file lists instead of paths. -B Base to use for file locations in lists if not cwd. -I Nonfatal if files in lists don't exist. -J Dir to 'fill up' before making new volumes. -M Reverse the algorithm. Packs as many files on the first volume. (This may waste volumes.) Example: $0 -s cdr74 -l 1 -o /tmp/ /home/music/mp3 Split /home/music/mp3 into 650M directories, keeping child dirs intact. EOF exit(2); } __END__