#!/usr/bin/perl

#**********************************************************************/
#* Copyright (c) University of Washington,                            */
#* Department of genome Sciences, 2006. Written by Shobhit Gupta      */
#* All rights reserved.                                               */
#**********************************************************************/
# Usage memehtml2text.pl -strands [1|2]  <meme_file.html>
# Output is written on stdout meme version 3 formatted file 

$usage =
    "\nUSAGE: memehtml2text.pl -strands [1|2]  <meme_file.html>
  Options: -strands [1|2] single or both strands, default 2
           -addprior <positive int>
           Output is written on stdout\n
           \n";

if (scalar(@ARGV) == 0) {
  printf(STDERR $usage);
  exit(1);
}

$addprior = 0;
$STRANDS = 2;
while(scalar(@ARGV) > 1) {
    $arg=shift @ARGV;
    if ($arg eq "-strands") {
        $STRANDS = shift @ARGV;
    } elsif ($arg eq "-addprior") {
	$addprior = shift @ARGV;
	if (($addprior <0) || (($addprior - int($addprior)) != 0))
	{
	    print "Illegal value for addprior\n";
	    exit (2);
	}
    } else {
    print(STDERR "Illegal argument ($arg)\n");
    exit(1);
  }
}

while (scalar(@ARGV) > 0) {
    $INPUT = shift(@ARGV);
}


if ($STRANDS == 2) {
    $strand_string = " \+ \-"
}
elsif ($STRANDS == 1){
    $strand_string = " \+"
}
else {
    print "value of \-strands not valid: should be 1 or 2 default is 2\n";
    exit (2);
}

$nsites = 0;
open (IN, "<$INPUT");
while ($l = <IN>)
{
    if ($l =~ m/^MEME version/)
    {
	print "$l\n";
	
    }
    elsif ($l =~ m/^ALPHABET= (\S+)/)
    {
	$alph_size = length $1;
	print "$l\n";
    }
    elsif ($l =~ m/^strands: /)
    {
	print "strands: $strand_string\n\n";
    }
    elsif ($l =~ m/^Background letter frequencies /)
    {
	print $l;
	$l = <IN>;
	print "$l\n";
    }
    elsif ($l =~ m/summary_doc\"\>(MOTIF [^\<]*)\</)
    {
	$m = $1;
	print "$m\n\n";
	$nsites = 0;
    }
    elsif ($l =~ m/^BL /)
    {
	print $l;
	$l =~ m/seqs\=(\d+)/;
	$nsites = $1;
    }
    elsif ($l =~ m/^log-odds matrix: /)
    {
	print $l;
	$l=<IN>;
	while (!($l=~m/\<\/PRE\>/))
	{
	    print $l;
	    $l = <IN>;
	}
    }
    elsif ($l =~ m/^letter-probability matrix: /)
    {
	print $l;
	$l=<IN>;
	while (!($l=~m/\<\/PRE\>/))
	{
	    if ($addprior != 0)
	    {
		my @t = split /\s+/, $l;
		print " ";
		foreach my $index (@t)
		{
		    unless ($index eq '')
		    {
			my $value = (($index * $nsites) + $addprior) 
			    / ($nsites + $alph_size);
			printf("%5f  ", $value);
		    }
		}
		print "\n";
		$l = <IN>;
	    }
	    else
	    {
		print $l;
		$l = <IN>;
	    }
	}
	print "\n";
    }
}
close IN;
