#!/usr/bin/perl

# pla.pl -n negfile -p posfile [outfile]
#
# Perceptron learning algorithm (developed by Frank Rosenblatt, 1958)
#
# Writes out a weight vector to OUTFILE (or STDOUT) trained on negative
# examples from NEGFILE and positive examples from POSFILE.
# For the program to work, the input list needs to consist of binary 
# feature vectors, such as those that can be built with feature.pl.
#
# This function will only converge if the training vectors are 
# linearly seperable, which is seldom the case in practice.
#
# This program implements the same algorithm, and should do the same 
# thing, as does the GNU Octave/Matlab program "pla.m" that is 
# included with Perform.
#
# Perform (Perceptron Classifier in Inform) v1.0
# Nick Montfort  http://nickm.com  2004-06-24

use strict;
use warnings;
use Getopt::Std;
use vars qw/ $opt_n $opt_p $opt_u /;

getopt("n:p:u");
my @data; my @nmat; my @pmat; my $j; my @point; my $sum; my $iter;
if(!$opt_n or !$opt_p or $opt_u)
  { die ("Usage: $0 -n negfile -p posfile [outfile]\n"); }
if ($ARGV[0])
  { open OUT, ">$ARGV[0]" or die("<!> Can't open $ARGV[0] for writing."); }
  
# Load data...
open NEG, "<$opt_n" or die("<!> Can't open $opt_n for reading.");
print "Loading negative data from $opt_n ... ";
my @neg = <NEG>;
close NEG;
print "got all ".scalar(@neg)." of them.\n";
open POS, "<$opt_p" or die("<!> Can't open $opt_p for reading.");  
print "Loading positive data from $opt_p ... ";
my @pos = <POS>;
close POS;
print "got all ".scalar(@pos)." of them.\n";
print "Data loaded.\n";

# There's a " 0" or " 1" for each feature, so the number of features
# is half the line length minus one (for the newline at the end).
my $dim = (length($neg[0]) - 1)/2;
print "Dimension of data is $dim.\n";

# Process all negative examples into @nmat.
my $i = 0;
for(@neg)
  {
  chomp(); $j = 0;
  for(split(/ /))
    { $nmat[$i][$j] = $_; $j++; }  
  $i++;
  }

# Process all negative examples into @pmat.
$i = 0;
for(@pos)
  {
  chomp(); $j = 0;
  for(split(/ /))
    { $pmat[$i][$j] = $_; $j++; }
  $i++;
  }

# Set weight vector to all zeros initially.
my @w; my $bias = 0;
for(1..$dim)  { push(@w, 0); }

# Initially, all the negative examples are correctly classified by
# the zero weight vector.
my @tneg; my $sumtneg = 0;
for(@neg) { push(@tneg, 0); }

# But the zero weight vector misclassifies all the positive examples.
my @tpos;
for(@pos) { push(@tpos, 1); }
my $sumtpos = @tpos;

while (($sumtneg + $sumtpos) > 0) # Go until perfectly classified.
  {
  $iter++; # New iteration...
  print "Starting iteration $iter with ";
  print "$sumtneg (-) and $sumtpos (+) misclassified...\n";

  # Be informative if there are a just a few points misclassified.
  if ($sumtneg < 11 and $sumtneg > 0) {
    print "(-) points missed: [ ";
    $j = 1;
    for( @tneg ) { if ($_ == 1) { print "$j "; } $j++; }
    print "]\n";
  }
  if ($sumtpos < 11 and $sumtpos > 0) {
    print "(+) points missed: [ ";
    $j = 1;
    for( @tpos ) { if ($_ == 1) { print "$j "; } $j++; }
    print "]\n";
  }

  # Clear the number of (-) and (+) misclassified.
  $sumtneg = 0;
  $sumtpos = 0;

  # If an element of tneg is 1, subtract that point from w.
  $j = 0;
  for( @tneg )
    {
    if ($_ == 1)
      {
      for(0..($dim-1)) { $w[$_] -= $nmat[$j][$_]; } 
      $bias--;
      }
    $j++;
    }

  # If an element of tpos is 1, add that point to w.
  $j = 0;
  for( @tpos )
    {
    if ($_ == 1)
      {
      for(0..($dim-1)) { $w[$_] += $pmat[$j][$_]; }
      $bias++;
      }
    $j++;
    }

  # Update tneg to reflect how the new weights classify the points.
  for($j = 0; $j < @neg; $j++)
    {
    for(0..($dim-1)) { $point[$_] = $nmat[$j][$_]; }; 
    $sum = $bias; 
    for(0..($dim-1)) { $sum += $point[$_] * $w[$_]; }
    $tneg[$j] = ($sum > 0);
    $sumtneg += $tneg[$j];
    }

  # Update tpos to reflect how the new weights classify the points.
  for($j = 0; $j < scalar(@pos); $j++)
    {
    for(0..($dim-1)) { $point[$_] = $pmat[$j][$_]; }; 
    $sum = $bias; 
    for(0..($dim-1)) { $sum += $point[$_] * $w[$_]; }
    $tpos[$j] = ($sum <= 0);
    $sumtpos += $tpos[$j];
    }
  }

print "Finished successfully.\n\n";
if ($ARGV[0])
  { print OUT " @w"; print OUT "\n $bias"; print "\n"; close OUT; }
else
  { print "@w "; print "\n $bias"; }
  
__END__
