#
# $Id: Split.pm,v 0.21 2003/12/30 14:12:32 st.schubiger Exp $

package Dir::Split;

use 5.6.1;
use strict;
use warnings;

use Carp;
use File::Copy 'cp';
use File::Path;
use File::Spec;

our (# external refs
        $source_dir_ref, # scl
        $target_dir_ref, # scl

     # opt refs
        $verbose_ref,  # scl
        $ident_ref,    # scl
        $f_limit_ref,  # scl
        $f_sort_ref,   # scl
        $cont_num_ref, # scl
        $sep_ref,      # scl
        $l_req_ref,    # scl

     # data refs
        $files_ref,   # arr
        $f_names_ref, # hsh
        $suffix_ref,  # scl

     # data
        @files,
        %f_names,
        $suffix,
    );

our $VERSION = 0.21;

=head1 NAME

Dir::Split - Split the files of a directory to subdirectories.

=head1 SYNOPSIS

 use Dir::Split;

 # define options
 %options = ( verbose =>   0,

              sub_dir => { identifier          =>    'system',
                           file_limit          =>         '2',
                           file_sort           =>         '+',
              },

              suffix =>  { separator           =>         '.',
                           length              =>           4,
                           continue_num        =>         'y',
              },
 );

 # create object
 $dir = Dir::Split->new (\%options);

 # set source & target dirs
 $source_dir = '/var/tmp/src';
 $target_dir = '/var/tmp/target';

 # split files to subdirs
 $files_moved = $dir->split (\$source_dir, \$target_dir);

 # changes the subdir identifier
 $dir->{'sub_dir'}{'identifier'} = 'test';

 # split files to subdirs
 $files_moved = $dir->split (\$source_dir, \$target_dir);

=head1 DESCRIPTION

C<Dir::Split> moves files from a source directory to numbered subdirectories within
a target directory.

=head1 METHODS

=head2 new ( \%options )

Object constructor.

 $dir = Dir::Split->new (\%options);

C<%options> contains the options that will influence the splitting process.

 %options = ( verbose =>   0,

              sub_dir => { identifier          =>    'system',
                           file_limit          =>         '2',
                           file_sort           =>         '+',
              },

              suffix =>  { continue_num        =>         'y',
                           separator           =>         '.',
                           length              =>           4,
              },
 );

C<verbose> sets the verbosity (see table VERBOSITY MODES); if enabled,
mkpath will output the pathes on creating subdirectories.

C<sub_dir/identifier> will affect the prefix of each subdirectory.
C<sub_dir/file_limit> sets the limit of files per each subdirectory.
C<sub_dir/file_sort> defines the sorting order of files
(see table SORT MODES).

C<suffix/continue_num> defines whether the numbering shall be continued
where it previously stopped or start at 1 (see table CONTINUE NUMBERING MODES).
C<suffix/separator> contains the string that separates the identifier
from the suffix. C<suffix/length> is an non-floating-point integer that sets
the amount of zeros to be added to the subdirectory numbering.

Differing identifiers or separators do affect the numbering e.g. I<systm-> does
not equal I<system->, I<system_> does not equal I<system->. C<file_limit>, C<file_sort>
and C<separator> options have no influence on decisions whether the numbering shall
be continued or not.

  VERBOSITY MODES
    0  disabled
    1  enabled

  SORT MODES
    +  ascending sort order
    -  descending sort order

  CONTINUE NUMBERING MODES
    y   yes
    ''  no

=cut

sub new {
    my ($pkg, $opt_ref) = @_;
    croak q~Invalid arguments: new (\%options)~
      unless ref $opt_ref eq 'HASH';

    my $class = ref ($pkg) || $pkg;
    my $obj = Dir::Split::_tie_var ($opt_ref);

    bless $obj, $class;
}

#
# _tie_var (\%hash)
#
# Dereferences a two-dimensional hash. A reference
# to a private hash will be returned.
#

sub _tie_var {
    my $opt_ref = $_[0];

    my %my_hash;
    foreach my $key (keys %$opt_ref) {
        if (ref $$opt_ref{$key} eq 'HASH') {
            foreach ( keys %{$$opt_ref{$key}} ) {
                $my_hash{$key}{$_} = $$opt_ref{$key}{$_};
            }
        }
        else { $my_hash{$key} = $$opt_ref{$key} }
    }

    return \%my_hash;
}

#
# DESTROY - object destructor
#

sub DESTROY { }

=head2 split ( \$source_dir, \$target_dir )

Split the files to subdirectories.

 $files_moved = $dir->split (\$source_dir, \$target_dir);

C<$source_dir> specifies the source directory.

C<$target_dir> specifies the target directory.

Returns the amount of files that have been successfully moved;
if none, it will return undef.

=cut

sub split {
    my $self = shift;
    ($source_dir_ref, $target_dir_ref) = @_;
    croak q~Invalid arguments: split (\$source_dir, \$target_dir)~
      unless ref $source_dir_ref eq 'SCALAR' && ref $target_dir_ref eq 'SCALAR';

    # opts refs
    $verbose_ref = \$self->{'verbose'};
    $ident_ref = \$self->{'sub_dir'}{'identifier'};
    $f_limit_ref = \$self->{'sub_dir'}{'file_limit'};
    $f_sort_ref = \$self->{'sub_dir'}{'file_sort'};
    $cont_num_ref = \$self->{'suffix'}{'continue_num'};
    $l_req_ref = \$self->{'suffix'}{'length'};
    $sep_ref = \$self->{'suffix'}{'separator'};

    # data refs
    $files_ref = \@files;
    $f_names_ref = \%f_names;
    $suffix_ref = \$suffix;

    $self->_gather_files;
    $self->_suffix_highest_num if $$cont_num_ref eq 'y';
    $self->_suffix_sum_up;
    $self->_move_files;
}

#
# _gather_files ()
#
# Gathers the files the source directory
# consists of and sorts them according to
# the options.
#

sub _gather_files {
    my $self = $_[0];

    $self->_dir_read ($source_dir_ref, $files_ref);
    @$files_ref = grep !-d File::Spec->catfile($$source_dir_ref, $_), @$files_ref;

    if ($$f_sort_ref eq '+' || $$f_sort_ref eq '-') {
        # preserve filenames in hash with their lowercased filenames as keys.
        foreach (@$files_ref) { $$f_names_ref{lc($_)} = $_ }
        @$files_ref = map { lc } @$files_ref;

        if ($$f_sort_ref eq '+') { @$files_ref = sort @$files_ref }
        elsif ($$f_sort_ref eq '-') { @$files_ref = reverse @$files_ref }
    }
}

#
# _suffix_highest_num ()
#
# Evaluates the highest existing subdir suffix number
# from the target directory in order to continue numbering
# where it stopped previously.
#

sub _suffix_highest_num {
    my $self = $_[0];

    my @dirs;
    $self->_dir_read ($target_dir_ref, \@dirs);
    @dirs = grep -d File::Spec->catfile($$target_dir_ref, $_), @dirs;

    # surpress warnings
    $$suffix_ref = 0;
    my $sep = quotemeta $$sep_ref;
    foreach (@dirs) {
        # extract exist. identifier
        ($_) = /(.+?)$sep(.*)/;
        next unless $$ident_ref eq $_;
        # increase suffix to highest number
        $$suffix_ref = $2 if $2 > $$suffix_ref;
    }
    # avoid collisions with exist. subdirs
    $$suffix_ref++;
}

#
# _suffix_sum_up ()
#
# Sums the suffix with a given amount
# of zeros up and concatenates the numbering
# at the end.
#

sub _suffix_sum_up {
    if (length $$suffix_ref != $$l_req_ref) {
        my $format = "%0.$$l_req_ref" . 'd';
        $$suffix_ref = sprintf $format, $$suffix_ref;
    }
}

#
# _dir_read (\$dir, \@items)
#
# Reads the items a directory consists of and
# stores them in a referenced array.
#

sub _dir_read {
    my ($self, $dir_ref, $items_ref) = @_;

    opendir D, $$dir_ref or
      croak qq~Could not open dir $$dir_ref for read-access: $!~;
    @$items_ref = readdir D; splice (@$items_ref, 0, 2);
    closedir D or croak qq~Could not close dir $$dir_ref: $!~;
}

#
# _move_files ()
#
# Copies the files from the source dir to the target dir
# subdirs. Files will be unlinked after they have been
# successfully copied.
#

sub _move_files {
    my $ret_f_moved;
    for (; @$files_ref; $$suffix++) {
        # create subdir
        my $path = File::Spec->catfile($$target_dir_ref,
          "$$ident_ref$$sep_ref$$suffix_ref");
        mkpath $path, $$verbose_ref or croak qq~Could not create subdir $path: $!~;

        # cp & rm files
        for (my $i = 0; $i < $$f_limit_ref; $i++) {
            last unless my $file = shift @$files_ref;

            # obtain case-sensitive target path
            my $path_full = File::Spec->catfile($path, $$f_names_ref{$file});
            # cat absolute file path
            $file = File::Spec->catfile($$source_dir_ref, $file);

            cp ($file, $path_full) or
              croak qq~Could not copy file $file to subdir $path_full: $!~;
            unlink ($file) or croak qq~Could not remove file $file: $!~;

            $ret_f_moved++;
        }
    }

    return $ret_f_moved ? $ret_f_moved : undef;
}

1;

__END__

=head1 EXAMPLE

Assuming the source directory '/var/tmp/src' contains 9 files, the directory
tree in the target directory '/var/tmp/target' will look as following:

    + /var/tmp/target
    +- system.0001 / 2 file(s)
    +- system.0002 / 2 "
    +- system.0003 / 2 "
    +- system.0004 / 2 "
    +- system.0005 / 1 "

=head1 DEPENDENCIES

C<Perl 5.6.1>; C<File::Copy>, C<File::Path>, C<File::Spec>.

=head1 CAVEATS

Recursive source directory processing is not supported;
existing directories within the source directory will be ignored.

=head1 SEE ALSO

perl(1)

=head1 LICENSE

This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.

=head1 AUTHOR

Steven Schubiger

=cut
