use 5.008001;
use strict;
use warnings;
use utf8;

package String::Copyright;

=encoding UTF-8

=head1 NAME

String::Copyright - Representation of text-based copyright statements

=head1 VERSION

Version 0.001001

=cut

our $VERSION = '0.001001';

# Dependencies
use Exporter 5.57 (qw/import/);
use Carp ();

our @EXPORT = qw/copyright/;

use constant {
	PLAINTEXT => 0,
	BLOCKS    => 1,
};

use overload (

#	q{@{}}   => sub { ${$_[0]->[BLOCKS]} },
#	q{@{}}   => sub { ( $_[0]->[BLOCKS] ) },
#	q{@{}}   => sub { $_[0]->[BLOCKS] },
#	q{0+}    => sub { 0+$_[0]->[BLOCKS] },
	q{""}    => sub { $_[0]->_compose },
	fallback => 1,
);

=head1 SYNOPSIS

    use String::Copyright;

    my $copyright = copyright(<<'END');
    copr. © 1999,2000 Foo Barbaz <fb@acme.corp> and Acme Corp.
    Copyright (c) 2001,2004 Foo (work address) <foo@zorg.corp>
    Copyright 2003, Foo B. and friends
    © 2000, 2002 Foo Barbaz <foo@bar.baz>
    END

    print $copyright;

    # Copyright 1999-2000 Foo Barbaz <fb@acme.com> and Acme Corp.
    # Copyright 2000, 2002 Foo Barbaz and Acme Corp.
    # Copyright 2001, 2004 Foo (work address) <foo@zorg.org>
    # Copyright 2003 Foo B. and friends

    print $copyright->normalize(
      alias  => {
        [ 'foo@bar.baz' => [ 'fb@acme.com', 'foo@zorg.org'] ] }
      mangle => {
        [ 's/Foo Barbaz\K(?= and .*)$/ <foo@bar.baz>/' ] }
    );

    # Copyright 1999-2000, 2002-2003 Acme Corp.
    # Copyright 1999-2004 Foo Barbaz <foo@bar.baz>
    # Copyright 2003 Foo B. and friends

=head1 DESCRIPTION

L<String::Copyright> Parses common styles of copyright statements
and serializes in normalized format.

=cut

sub copyright
{
	my $copyright = shift;
	Carp::croak("String::Copyright strings require defined parts")
		unless 1 + @_ == grep {defined} $copyright, @_;

	# String::Copyright objects are effectively immutable and can be reused
	if ( !@_ && ref($copyright) eq __PACKAGE__ ) {
		return $copyright;
	}

	# stringify objects
	$copyright = "$copyright";

	# TODO: also parse @_ - but each separately!
	my $blocks = parse_string( split /^/, $copyright );

	bless [ $copyright, $blocks ], __PACKAGE__;
}

sub new
{
	my ( $self, @data ) = @_;
	Carp::croak("String::Copyright require defined, positive-length parts")
		unless 1 + @_ == grep { defined && length } @data;

	# String::Copyright objects are simply stripped of their string part
	if ( !@_ && ref($self) eq __PACKAGE__ ) {
		return bless [ undef, $data[1] ], __PACKAGE__;
	}

	# FIXME: properly validate data
	Carp::croak("String::Copyright blocks must be an array of strings")
		unless @_ == grep { ref eq 'ARRAY' } @data;

	bless [ undef, \@data ], __PACKAGE__;
}

sub blocks { $_[0]->[BLOCKS] }

sub normalize
{
	my ( $self, @opts ) = @_;
	Carp::confess("normalize options not yet implemented")
		if @opts;

	new($self);
}

sub _compose { join "\n", @{ $_[0]->[BLOCKS] } }

sub is_normalized { !defined $_[0]->[PLAINTEXT] }

sub parse_string
{
	my @block;
	my $lines_after_copyright_block = 0;

	my $in_copyright_block = 0;
	while (@_) {
		my $line = shift;
		my $copyright_match = parse_line( $line, \$in_copyright_block );
		if ($copyright_match) {
			while ( @_ and $copyright_match =~ /\d[,.]?\s*$/ ) {

   # looks like copyright end with a year, assume the owner is on next line(s)
				$copyright_match .= ' ' . shift;
			}
			$copyright_match =~ s/\s+/ /g;
			$copyright_match =~ s/\s*$//;

# split block into year and owner

# split owner into owner_id and owner

			# normalize year delimiters and ranges
			# TODO: test if \K or non-backref beneficial on perl >= 5.10
			#$copyright_match =~ s/\b\d{4}\K\s*,?\s*(?=\d{4}\b)/, /g;
			#$copyright_match =~ s/\b\d{4}\K\s*-\s*(?=\d{4}\b)/-/g;
			#$copyright_match =~ s/\b(\d{4})\s*,?\s*(?=\d{4}\b)/$1, /g;
			#$copyright_match =~ s/\b(\d{4})\s*-\s*(?=\d{4}\b)/$1-/g;
			$copyright_match =~ s/\b(?<=\d{4})\s*,?\s*(?=\d{4}\b)/, /g;
			$copyright_match =~ s/\b(?<=\d{4})\s*-\s*(?=\d{4}\b)/-/g;

			# normalize years and year ranges
			my $y;
			$copyright_match =~ s/
					# TODO: test if \K beneficial on perl >= 5.10
					#(?:\A|(?<!-))             # start or not-a-range
					#(\b\d{4})\K               # year
					#(?{$y=$^N})               # save year
					(?:(?<=[^-](\b\d{4}))      # non-range-end year
					|(?<=\A(\b\d{4})))         # or first year
					(?{$y=$^N})                # save year
					(?:
						(?:,\s|-)          # list or range
						((??{++$y}))\b     # next year
						|
						-(\d{4})\b         # range-end year
						(??{
							if($y <= $^N) {   # if later year
								$y = $^N; # then save
								'';       # and match
							} else {
								'XXXX';   # else mismatch
							}
						})\b
					)+
				/-$^N/gx;

			push @block, $copyright_match;
		}
		elsif (@block) {

 # skip remaining lines if a copyright blocks was found more than 5 lines ago.
 # so a copyright block may contain up to 5 blank lines, but no more
			last if $lines_after_copyright_block++ > 5;
		}
	}

# TODO: save $lines_after_copyright_block to indicate how dirty parsing was

	return \@block;
}

# also used to cleanup
my $copyright_indicator_regex = qr!
	(?:copyright(?:-holders?)? # The full word (or slightly more)
		|copr\. # Legally-valid abbreviation
		|© # Unicode character COPYRIGHT SIGN
		|\(c\) # Legally-null representation of sign
	)
!ix;

my $copyright_indicator_regex_with_capture
	= qr!$copyright_indicator_regex(?::\s*|\s+)(\S.*)$!lix;

# avoid ditching things like <info@foo.com>
my $copyright_disindicator_regex = qr{
	\b(?:info(?:rmation)?(?!@) # Discussing copyright information
	|(:?notice|statement|claim|string)s? # Discussing the notice
	|is|in|to # Part of a sentence
	|(?:holder|owner)s? # Part of a sentence
	|ownership # Part of a sentence
	)\b
}ix;

my $copyright_predisindicator_regex = qr!(
	^[#]define\s.*\(c\) # #define foo(c) -- not copyright
)!ix;

sub parse_line
{
	my $data                   = shift;
	my $in_copyright_block_ref = shift;
	my $copyright              = '';
	my $match;

	if ( $data !~ $copyright_predisindicator_regex ) {

		#print "match against ->$data<-\n";
		if ( $data =~ $copyright_indicator_regex_with_capture ) {
			$match                   = $1;
			$$in_copyright_block_ref = 1;

			# Ignore lines matching "see foo for copyright information" etc.
			if ( $match !~ $copyright_disindicator_regex ) {

				# De-cruft
				$match =~ s/$copyright_indicator_regex//igx;
				$match =~ s/^\s+//;
				$match =~ s/\s*\bby\b\s*/ /;
				$match =~ s/\s*$//;
				$match =~ s/\s{2,}/ /g;
				$copyright = $match;
			}
		}
		elsif ( $$in_copyright_block_ref and $data =~ /^\d{2,}[,\s]+/ ) {

			# following lines beginning with a year are supposed to be
			# continued copyright blocks
			$copyright = $data;
		}
		else {
			$$in_copyright_block_ref = 0;
		}
	}
	return $copyright;
}

=head1 SEE ALSO

=over 4

=item *

L<Encode>

=back

=head1 BUGS/CAVEATS/etc

L<String::Copyright> operates on strings, not bytes.
Data encoded as UTF-8, Latin1 or other formats
need to be decoded to strings before use.

Only ASCII characters and B<©> (copyright sign) are directly processed.

If copyright sign is mis-detected
or accents or multi-byte characters display wrong,
then most likely the data was not decoded into a string.

If ranges or lists of years are not tidied,
then maybe it contained non-ASCII whitespace or digits.

=head1 AUTHOR

Jonas Smedegaard C<< <dr@jones.dk> >>

=head1 COPYRIGHT AND LICENSE

Derived from L<App::Licensecheck> originally part of the KDE SDK,
originally introduced by Stefan Westerfeld C<< <stefan@space.twc.de> >>;
and on the script licensecheck2dep5 part of Debian CDBS tool,
written by Jonas Smedegaard.

  Copyright © 2007, 2008 Adam D. Barratt

  Copyright © 2005-2012, 2016 Jonas Smedegaard

This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License along
with this program. If not, see <https://www.gnu.org/licenses/>.

=cut

1;
