/***************************************/
/*      Weeder 1.4      
   see LICENSE.txt file for
  terms and conditions of use
*/

#define MAXSEQ 100000

#define CPS CLOCKS_PER_SEC

#include <float.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <ctype.h>
#include <math.h>
#include <signal.h>
#include <time.h>
#include <unistd.h>
#include <sys/time.h>

char **sequence;
int *lens;
int winlen = 0;
char alphabet[4] = { 'A', 'C', 'G', 'T' };
char **name;


double eightmers[65536];
double sixmers[4096];
double tenmers[1048576];

int mycounter = 0;

int totseq = 0;
int totlen = 0;


int MINREP = 2;
double repperc = -1;
int MAXERR = -1;
int localerror;
double percent = 0.0;
FILE *F;

int *besthere;
int *inbestseq;

time_t starttime;
time_t endtime;

int verbose = 0;
int reversed = 0;
int onlybestflag = 1;

char organism[80] = "";

struct pattern
{
  double sig;
  double score;
  unsigned short int counter;
  unsigned short int searchcount;
  char *pat;
  struct pattern *next;
};

struct pattern *headpat;
struct pattern *endpat;
struct pattern *seekpat;

int chartoint (char car);
int min (int a, int b);
struct pattern *mergesrt (struct pattern *primo, int quanti);
struct pattern *merge (struct pattern *uno, struct pattern *due);
char revcomp (char c);
int readfreqs (void);
double compfreqs (char *pat, char *pos, int err);
double exactfreqs (char *pat, int errs);
double newtwmers (char *pat, char *pos, int err);
double tentotwelve (char *pat);
char *reversepat (char *pat);
char inttochar (int num);
double exactfreqswithcheck (char *pat, int errs, char *check);
double compfreqswithcheck (char *pat, char *pos, int err, char *check);
double newtwmerswithcheck (char *inpat, char *pos, int err, char *check);
int isvalid (char car);
int checkmotifanderror (int motlen, int errs);

FILE *OF;
FILE *MF;
FILE *HF;

main (int argc, char *argv[])
{
  int errdef = 0;
  int uu, pp, qq;
  int **used;
  int flagover;
  double patfreqs[5];
  int toreport = 10;
  int start, end;
  double tmpfloat;
  int maah;
  int lastocc;
  char revseek[15];
  int inbest;
  double bestmy;
  double myscore;
  double *besterr;
  int jj;
  int bestsofar;
  int partial;
  int bestpos;

  int i, j, k, l;
  int krev;

  int howmany = 0;

  int lastinseq = -1;
  int howmanyseq = 0;
  int n;
  int nchar;
  char buffer[MAXSEQ];
  char infile[128] = "";
  int len;
  int perint;
  int extfile = 0;
  int partiallen = 0;


  FILE *G;


  char outfile[800] = "";
  char mixfile[800] = "";
  char browsefile[800] = "";

  if (argc < 4)
    {
      fprintf (stderr,
	       "\nUsage : weederTFBS.out -f inputfilename -O speciescode -W motif_width -e error -R repeatspercentage <options>\n");

      fprintf (stderr,
	       "\nspeciescode: two-letter species code (e.g. HS, MM, RN, SC, and so on)\n");
      fprintf (stderr, "-W : motif length\n");
      fprintf (stderr, "-e : number of mutations allowed\n");
      fprintf (stderr,
	       "-R : sequences percentage on which motif has to appear\n");
      fprintf (stderr, "\noptions (not required):\n");
      fprintf (stderr,
	       "-S: process both strands of input sequences (default: single strand)\n");
      fprintf (stderr,
	       "-M: multiple motif occurrences in each sequence (default: expect zero or one occurrences per sequence)\n");
      fprintf (stderr, "-T <number>: report top <number> motifs\n");
      fprintf (stderr, "-V: verbose mode\n");
      exit (1);
    }

  i = 1;

  while ((i < argc))
    {
      switch (argv[i][1])
	{
	case 'O':
	  if ((i + 1 < argc) && (argv[i + 1][0] != '-'))
	    {
	      strcpy (organism, argv[i + 1]);

	      i = i + 2;
	    }
	  else
	    {
	      fprintf (stderr, "\nProblem in organism definition\n");
	      exit (1);
	    }
	  break;
	case 'W':
	  if ((i + 1 < argc) && (argv[i + 1][0] != '-'))
	    {
	      winlen = atoi (argv[i + 1]);

	      if ((winlen != 6) && (winlen != 8) && (winlen != 10)
		  && (winlen != 12))
		{
		  fprintf (stderr,
			   "\nAccepted motif lengths are even values between 6 and 12\n");
		  exit (1);
		}

	      i = i + 2;
	    }
	  else
	    {
	      fprintf (stderr, "\nProblem in motif length definition\n");
	      exit (1);
	    }
	  break;

	case 'T':
	  if ((i + 1 < argc) && (argv[i + 1][0] != '-'))
	    {
	      toreport = atoi (argv[i + 1]);


	      i = i + 2;
	    }
	  else
	    {
	      fprintf (stderr,
		       "\nProblem in number of reported motifs definition\n");
	      exit (1);
	    }
	  break;



	case 'e':
	  if ((i + 1 < argc) && (argv[i + 1][0] != '-'))
	    {
	      MAXERR = atoi (argv[i + 1]);

	      if ((MAXERR < 0))
		{
		  fprintf (stderr, "\nError in defining mismatch number\n");
		  exit (1);
		}
	      i = i + 2;
	      errdef = 1;
	    }
	  else
	    {
	      fprintf (stderr, "\nError in defining mismatch number\n");
	      exit (1);
	    }
	  break;

	case 'R':
	  if ((i + 1 < argc) && (argv[i + 1][0] != '-'))
	    {
	      MINREP = atoi (argv[i + 1]);

	      if ((MINREP < 1) || (MINREP > 100))
		{
		  fprintf (stderr,
			   "\nError in defining repeat percentage -R %d\n",
			   MINREP);
		  exit (1);
		}
	      i = i + 2;
	      repperc = MINREP;
	    }
	  else
	    {
	      fprintf (stderr, "\nError in defining repeat percentage -R\n");
	      exit (1);
	    }
	  break;


	case 'f':
	  if (strcmp ("-f", argv[i]) == 0)
	    {
	      if (i + 1 < argc)
		{
		  strcpy (infile, argv[i + 1]);

		  extfile = 1;
		  i = i + 2;
		}
	      else
		{
		  fprintf (stderr, "\nDid not understand filename\n");
		  exit (1);
		}
	    }
	  break;

	case 'V':
	  verbose = 1;
	  i++;
	  break;

	case 'S':
	  reversed = 1;
	  i++;
	  break;


	case 'N':
	  i++;
	  break;

	case 'M':
	  onlybestflag = 0;
	  i++;
	  break;

	default:
	  fprintf (stderr, "\nSomething wrong with the commandline\n");
	  fprintf (stderr,
		   "\nUsage : weederTFBS.out -f inputfilename -O speciescode -W motif_width -e error -R repeatspercentage <options>\n");

	  fprintf (stderr,
		   "\nspeciescode: two-letter species code (e.g. HS, MM, RN, SC, and so on)\n");
	  fprintf (stderr, "-W : motif length\n");
	  fprintf (stderr, "-e : number of mutations allowed\n");
	  fprintf (stderr,
		   "-R : sequences percentage on which motif has to appear\n");
	  fprintf (stderr, "\noptions (not required):\n");
	  fprintf (stderr,
		   "-S: process both strands of input sequences (default: single strand)\n");
	  fprintf (stderr,
		   "-M: multiple motif occurrences in each sequence (default: expect zero or one occurrences per sequence)\n");
	  fprintf (stderr, "-T <number>: report top <number> motifs\n");
	  fprintf (stderr, "-V: verbose mode\n");


	  exit (1);
	  break;
	}
    }
  if (errdef == 0)
    {
      fprintf (stderr, "\nMissing error number -e\n");
      exit (1);
    }

  if (extfile == 0)
    {
      fprintf (stderr, "\nMissing input filename\n");
      exit (1);
    }

  G = fopen (infile, "r");

  if (G == NULL)
    {
      fprintf (stderr, "\nNo such file : %s\n", infile);
      exit (1);
    }

  if (winlen == 0)
    {
      fprintf (stderr, "\nIllegal motif length\n");
      exit (1);
    }


  if (checkmotifanderror (winlen, MAXERR) == 0)
    {
      fprintf (stderr,
	       "\nIllegal combination of length %d and mismatches %d\n",
	       winlen, MAXERR);
      exit (1);
    }


  readfreqs ();

  /* Conteggio lunghezza sequenze */


  partiallen = 0;

  l = 0;

  nchar = 0;
  totseq = 0;

  lens = (int *) malloc (MAXSEQ * sizeof (int));

  bzero (buffer, MAXSEQ);
  nchar = fscanf (G, "\n%[^\n]", buffer);

  if (buffer[0] != '>')
    {
      fprintf (stderr, "\nInput file not in FASTA format\nExiting program\n");
      exit (0);
    }


  while (nchar != EOF)
    {
      totseq++;
      nchar = fscanf (G, "\n%[^\n]", buffer);

      while ((buffer[0] != '>') && (nchar != EOF))
	{
	  partiallen += strlen (buffer);
	  bzero (buffer, MAXSEQ);
	  nchar = fscanf (G, "\n%[^\n]", buffer);
	}
      lens[l++] = partiallen;
      partiallen = 0;
    }

  /* Fine conteggio lunghezza sequenze */

  fclose (G);

  G = fopen (infile, "r");

  if (G == NULL)
    {
      fprintf (stderr, "\nNo such file : %s\n", infile);
      exit (1);
    }



  besthere = (int *) malloc (totseq * sizeof (int));
  inbestseq = (int *) malloc (totseq * sizeof (int));

  sequence = (char **) malloc ((totseq + 1) * sizeof (char *));
  used = (int **) malloc ((totseq + 1) * sizeof (int *));
  name = (char **) malloc ((totseq + 1) * sizeof (char *));



  for (i = 0; i < totseq + 1; i++)
    {
      name[i] = (char *) malloc ((20) * (sizeof (char)));
      sequence[i] =
	(char *) malloc ((lens[i] + 1 + 20 + winlen) * (sizeof (char)));
      used[i] = (int *) malloc ((lens[i]) * sizeof (int));

    }

  if(totseq > MAXSEQ)
  {
   fprintf (stderr, "\nSorry! At most %d sequences can be input!\n", MAXSEQ);
   exit(1);
  }

  for (i = 0; i < totseq; i++)
    {
      for (j = 0; j < lens[i]; j++)
      used[i][j] = 0;
    }






  bzero (buffer, MAXSEQ);


  l = 0;




  fscanf (G, "\n%[^\n]", buffer);

  strncpy (sequence[l], buffer, 20);

ERE:

  while (l < totseq)
    {
      if (buffer[0] == '>')
	{
	  strncpy (name[l], buffer, 19);

	  bzero (sequence[l], lens[l] + 20 + 1);


	  bzero (buffer, MAXSEQ);

	  nchar = fscanf (G, "\n%[^\n]", buffer);

	  while ((buffer[0] != '>') && (nchar != EOF))
	    {
	      strcat (sequence[l], buffer);
	      bzero (buffer, MAXSEQ);

	      nchar = fscanf (G, "\n%[^\n]", buffer);
	    }
	}

      len = strlen (sequence[l]);

      if (verbose == 1)
	fprintf (stderr, "Sequence %s is %d letters long\n", name[l], len);


      lens[l] = strlen (sequence[l]);
      totlen += lens[l];

      l++;
    }






  besterr = (double *) malloc ((MAXERR + 1) * sizeof (double));



  if (repperc < 0)
    {
      fprintf (stderr, "\nUndefined repeat percentage -R\n");
      exit (1);
    }



  repperc = (repperc * totseq / 100.0);
  MINREP = ceil (repperc);


  sprintf (outfile, "%s.wee", infile);
  sprintf (mixfile, "%s.mix", infile);
  sprintf (browsefile, "%s.html", infile);

  OF = fopen (outfile, "a");
  MF = fopen (mixfile, "a");
  HF = fopen (browsefile, "a");


  fprintf (stderr,
	   "\nSearching for motifs of length %d with %d mutations in file %s.....\n",
	   winlen, MAXERR, infile);
  fprintf (OF, "\nSearching for motifs of length %d with %d mutations.....\n",
	   winlen, MAXERR);
  fprintf (HF,
	   "<br>Searching for motifs of length %d with %d mutations.....<br><br>",
	   winlen, MAXERR);

  if (verbose == 1)
    fprintf (stderr, "\nMinimum sequence repeats : %d out of %d\n", MINREP,
	     totseq);

  fprintf (OF, "(Actual commandline is : \n");
  fprintf (HF, "(Actual commandline is : <br>");

  for (i = 0; i < argc; i++)
    {
      fprintf (OF, "%s ", argv[i]);
      fprintf (HF, "%s ", argv[i]);

    }

  fprintf (OF, ")\n");
  fprintf (HF, ")<br>");

  for (l = 0; l < totseq; l++)
    for (i = 0; i < lens[l]; i++)
      {
	sequence[l][i] = toupper (sequence[l][i]);

	if (isvalid (sequence[l][i]) == 0)
	  {
	    if (i - winlen + 1 < 0)
	      start = 0;
	    else
	      start = i - winlen + 1;

	    //if (i + winlen > lens[l])
	      //end = lens[l];
	    //else
	      end = i;

	    for (j = start; j <= end; j++)
	      used[l][j] = 1;
	  }

      }


  starttime = clock () / CPS;







  headpat = (struct pattern *) malloc (sizeof (struct pattern));
  endpat = (struct pattern *) malloc (sizeof (struct pattern));
  endpat->score = -111111.0;
  headpat->next = endpat;

  l = 0;

  while (l < totseq)
    {
      if (verbose == 1)
	fprintf (stderr, "Processing sequence %s\n", name[l]);

      for (i = 0; i < lens[l] - winlen; i++)
	{

	  if ((verbose == 1) && ((i % 100) == 0))
	    fprintf (stderr, "Position %d\n", i);

	  if ((used[l][i] == 0))
	    {
	      used[l][i] = 1;
	      seekpat = (struct pattern *) malloc (sizeof (struct pattern));

	      seekpat->pat = (char *) malloc ((winlen + 1) * sizeof (char));
	      bzero (seekpat->pat, winlen + 1);

	      seekpat->counter = l;
	      seekpat->searchcount = i;
	      seekpat->sig = 1.0;

	      mycounter++;

	      for (j = 0; j < winlen; j++)
		seekpat->pat[j] = toupper (sequence[l][i + j]);

	      seekpat->next = headpat->next;
	      headpat->next = seekpat;

	      seekpat = headpat;
	      myscore = 0.0;





	      howmany = 0;
	      howmanyseq = 0;

	      if (reversed == 1)
		{
		  bzero (revseek, 15);

		  j = strlen (seekpat->next->pat) + 1;

		  for (uu = 0; uu <= j - 2; uu++)
		    {
		      revseek[uu] = revcomp (seekpat->next->pat[j - 2 - uu]);
		    }
		}


/* QUI PER SINGOLO STRAND */
	      if (reversed != 1)
		{

		  for (uu = 0; uu <= MAXERR; uu++)
		    {
		      besterr[uu] = -1.0;
		    }

		  lastinseq = -1;

		  for (uu = 0; uu < totseq; uu++)
		    {
		      inbestseq[uu] = -1;
		      besthere[uu] = -1;
		      inbest = 0;
		      lastocc = -1;
		      bestsofar = 1000;
		      bestpos = -1;
		      inbest = 0;
		      localerror = MAXERR;

		      maah = (lens[uu]) - strlen (seekpat->next->pat) + 1;

		      if ((maah > 0) && (howmanyseq + totseq - uu >= MINREP))
			{
			  for (j = 0; j < maah; j++)
			    {
			      k = 0;

			      for (pp = 0; pp < winlen; pp++)
				{
				  if (toupper (sequence[uu][j + pp]) !=
				      (seekpat->next->pat[pp]))
				    {
				      k++;
				    }

				  if (k > localerror)
				    break;
				}



			      if ((k <= localerror))
				{

				  if (k == 0)
				    used[uu][j] = 1;


				  howmany++;
				  lastocc = j;


				  if ((k == bestsofar) && (lastinseq == uu))
				    {
				      inbest++;
				    }



				  if (lastinseq != uu)
				    {
				      lastinseq = uu;
				      howmanyseq++;
				      bestpos = j;
				      bestsofar = k;
				      inbest = 1;
				    }



				  if ((k < bestsofar))
				    {
				      bestpos = j;
				      bestsofar = k;
				      inbest = 1;

				      if (onlybestflag == 1)
					localerror = bestsofar;

				    }

				  k = 0;
				}

			    }
			}

		      if (bestpos > -1)
			{

			  if (besterr[bestsofar] == -1)
			    besterr[bestsofar] = 1;



			  besthere[uu] = bestsofar;
			  inbestseq[uu] = inbest;


			}
		    }



		  if ((howmanyseq >= MINREP))
		    {
		      patfreqs[0] = exactfreqs (seekpat->next->pat, 0);

		      for (uu = 1; uu <= MAXERR; uu++)
			{
			  if (winlen != 12)
			    patfreqs[uu] =
			      exactfreqs (seekpat->next->pat, uu);
			  else
			    patfreqs[uu] =
			      exactfreqs (seekpat->next->pat,
					  uu) + patfreqs[uu - 1];
			}

		      seekpat->next->score = 0.0;
		      myscore = 0.0;

		      for (uu = 0; uu < totseq; uu++)
			{
			  if (besthere[uu] >= 0)
			    tmpfloat =
			      log ((double) inbestseq[uu]) -
			      (log (patfreqs[besthere[uu]]) +
			       log ((double) lens[uu]));
			  else
			    tmpfloat = 0;

			  myscore += tmpfloat;
			}

		      seekpat->next->score = (myscore);

		      if (onlybestflag != 1)
			seekpat->next->score +=
			  log ((double) howmany) -
			  (log (patfreqs[MAXERR]) + log ((double) totlen));

		    }
		  else
		    {
		      seekpat->next->score = -1.0;
		    }

		  // seekpat = seekpat->next;
		}
	      else
		{
		  for (uu = 0; uu <= MAXERR; uu++)
		    {
		      besterr[uu] = -1.0;
		    }

		  howmany = 0;
		  howmanyseq = 0;
		  lastinseq = -1;
		  myscore = 0.0;

		  for (uu = 0; uu < totseq; uu++)
		    {
		      inbestseq[uu] = -1;
		      besthere[uu] = -1;
		      bestsofar = 1000;
		      bestpos = -1;
		      maah = (lens[uu]) - winlen + 1;
		      localerror = MAXERR;

		      if ((maah > 0) && (howmanyseq + totseq - uu >= MINREP))
			{
			  for (j = 0; j < maah; j++)
			    {
			      k = 0;
			      krev = 0;

			      for (pp = 0; pp < winlen; pp++)
				{
				  if ((sequence[uu][j + pp]) !=
				      (seekpat->next->pat[pp]))
				    {
				      k++;
				    }
				  if ((sequence[uu][j + pp]) !=
				      ((revseek[pp])))
				    {
				      krev++;
				    }
				  if ((k > localerror) && (krev > localerror))
				    break;
				}

			      if ((k <= localerror) && (k <= krev))
				{
				  howmany++;

				  if (k == 0)
				    used[uu][j] = 1;

				  flagover = 0;

				  if ((j - winlen + 1 >
				       lastocc) || (lastocc == -1))
				    {
				      flagover = 1;
				    }


				  if ((k == bestsofar) && (lastinseq == uu))
				    inbest++;

				  if (lastinseq != uu)
				    {
				      inbest = 1;
				      lastinseq = uu;
				      howmanyseq++;
				      bestpos = j;
				      bestsofar = k;
				    }

				  if (k < bestsofar)
				    {
				      inbest = 1;
				      bestpos = j;
				      bestsofar = k;

				      if (onlybestflag == 1)
					localerror = bestsofar;
				    }




				}

			      if ((krev <= localerror)&&(krev < k))
				{
				  if (krev == 0)
				    used[uu][j] = 1;

				  howmany++;
				  flagover = 0;

				  if ((j - winlen + 1 >
				       lastocc) || (lastocc == -1))
				    {
				      flagover = 1;
				    }


				  if ((krev == bestsofar)
				      && (lastinseq == uu))
				    inbest++;

				  if (lastinseq != uu)
				    {
				      inbest = 1;
				      lastinseq = uu;
				      howmanyseq++;
				      bestpos = j;
				      bestsofar = krev;
				    }

				  if ((krev < bestsofar))
				    {
				      inbest = 1;
				      bestpos = j;
				      bestsofar = krev;
				      if (onlybestflag == 1)
					localerror = bestsofar;
				    }





				  k = 0;
				}
			    }
			}
		      if (bestpos > -1)
			{
			  besthere[uu] = bestsofar;
			  inbestseq[uu] = inbest;
			}
		    }


		  if ((howmanyseq >= MINREP))
		    {
		      patfreqs[0] =
			exactfreqs (seekpat->next->pat,
				    0) + exactfreqswithcheck (revseek, 0,
							      seekpat->next->
							      pat);

		      for (uu = 1; uu <= MAXERR; uu++)
			{
			  if (winlen != 12)
			    patfreqs[uu] =
			      exactfreqs (seekpat->next->pat,
					  uu) + exactfreqswithcheck (revseek,
								     uu,
								     seekpat->
								     next->
								     pat);
			  else
			    patfreqs[uu] =
			      exactfreqs (seekpat->next->pat,
					  uu) + exactfreqswithcheck (revseek,
								     0,
								     seekpat->
								     next->
								     pat) +
			      patfreqs[uu - 1];
			}

		      seekpat->next->score = 0.0;
		      myscore = 0.0;

		      for (uu = 0; uu < totseq; uu++)
			{
			  if (besthere[uu] > -1)
			    tmpfloat =
			      log ((double) inbestseq[uu]) -
			      (log (patfreqs[besthere[uu]]) + log (lens[uu]));
			  else
			    tmpfloat = 0.0;

			  myscore += tmpfloat;

			}
		      seekpat->next->score = (myscore);

		      if (onlybestflag != 1)
			seekpat->next->score +=
			  log ((double) howmany) -
			  (log (patfreqs[MAXERR]) + log ((double) totlen));

		    }
		  else
		    {
		      seekpat->next->score = -1;
		      //mycounter++;
		    }

/*
		  if (search_complement (seekpat) == 1)
		    {
		      fprintf (stderr, "\nREVERSE COMPLEMENT : %s\n",
			       seekpat->next->next->pat);
		      //fprintf (stderr, "\nREVERSE COMPLEMENT : %s \n",
		      //seekpat->next->next->pat);

		      seekpat->next->next->score = -10000;
		      i++;
		    }
*/
		}

	    }
	  //else
	  //fprintf(stderr, "USED!! %d %d\n", l,i);
	}
      l++;
    }

  if (headpat->next != endpat)
    headpat->next = mergesrt (headpat->next, mycounter);

  seekpat = headpat;
  i = 0;


  while ((seekpat->next->score > 0) && (i < toreport))
    {
      i++;

	fprintf (stderr, "%d) %s %.2f\n", i, seekpat->next->pat,
		 seekpat->next->score/totseq);
	fprintf (OF, "%d) %s %.2f \n", i, seekpat->next->pat,
		 seekpat->next->score/totseq);
	fprintf (HF, "%d) %s %.2f <br>", i, seekpat->next->pat,
		 seekpat->next->score/totseq);


	fprintf (MF, "%d) %s %.2f %d\n", i, seekpat->next->pat,
		 seekpat->next->score/totseq, MAXERR);



      seekpat = seekpat->next;

    }


  endtime = clock () / CPS;
  fprintf (OF, "\nElapsed time : %d min. %d sec.\n",
	   (endtime - starttime) / 60, (endtime - starttime) % 60);
  fprintf (HF, "<br>Elapsed time : %d min. %d sec.<br>",
	   (endtime - starttime) / 60, (endtime - starttime) % 60);
  fprintf (stderr, "Elapsed time : %d min. %d sec.",
	   (endtime - starttime) / 60, (endtime - starttime) % 60);

  fprintf (stderr, "\n");
  fprintf (OF, "\n");
  fprintf (HF, "<br>");


  fclose (OF);
  fclose (MF);
  fclose (HF);

  fprintf (stderr, "\nOutput written in files %s and %s\n", outfile,
	   browsefile);

  exit (0);
}


int
min (int a, int b)
{
  if (a < b)
    return a;
  else
    return b;
}



int
chartoint (char car)
{
  int ret;

  switch (tolower (car))
    {
    case 'a':
      return 0;
      break;
    case 'c':
      return 1;
      break;
    case 'g':
      return 2;
      break;
    case 't':
      return 3;
      break;
    case '$':
      return 4;
      break;
    default:
      return -1;
      break;
    }
}


struct pattern *
mergesrt (struct pattern *primo, int quanti)
{
  int i, j, k;
  int first, second;
  double mid;
  struct pattern *l1, *l2;
  struct pattern *ans;

  if (quanti == 1)
    return primo;

  else
    {
      l1 = primo;
      l2 = primo;

      mid = (double) (quanti) / 2;

      if (mid > floor (mid))
	{
	  mid = mid - .5;
	  first = (int) mid;
	  second = first + 1;
	}
      else
	{
	  first = (int) mid;
	  second = (int) mid;
	}

      for (i = 0; i < first - 1; i++)
	l2 = l2->next;

      ans = l2;
      l2 = l2->next;
      ans->next = endpat;

      l1 = mergesrt (l1, first);
      l2 = mergesrt (l2, second);
      ans = merge (l1, l2);

      return ans;
    }
}

struct pattern *
merge (struct pattern *uno, struct pattern *due)
{

  struct pattern *ret, *first;
  int flag = 0;

  if ((uno->score > due->score)
      || ((uno->score == due->score) && (uno->counter >= due->counter)))
    {
      first = uno;
      uno = uno->next;
      ret = first;
      ret->next = endpat;
    }
  else
    {
      first = due;
      due = due->next;
      ret = first;
      ret->next = endpat;
    }

  while (flag == 0)
    {
      if ((uno->score > due->score)
	  || ((uno->score == due->score) && (uno->counter >= due->counter)))
	{
	  if (uno->score == -111111)
	    flag = 1;
	  else
	    {
	      ret->next = uno;
	      uno = uno->next;
	      ret = ret->next;
	      ret->next = endpat;
	    }
	}
      else
	{
	  ret->next = due;
	  due = due->next;
	  ret = ret->next;
	  ret->next = endpat;
	}
    }

  return first;
}

char
revcomp (char c)
{
  char ret;

  switch (tolower (c))
    {
    case 'a':
      ret = 'T';
      break;

    case 'c':
      ret = 'G';
      break;

    case 'g':
      ret = 'C';
      break;

    case 't':
      ret = 'A';
      break;

    default:
      ret = c;
      break;
    }

  return ret;
}

int
readfreqs (void)
{
  double checksum = 0;
  double decsum = 0;
  int pre;
  int post;
  int backpre, backpost;
  double backfreqpre, backfreqpost;
  double decfreq;
  double prefreq, postfreq;
  int sum;
  int i, j, k, n;
  int i1, i2, i3, i4, i5, i6, i7, i8;
  double totalcountsix;
  double totalcounteight;
  FILE *F;
  int tmpint;
  int quale = 0;
  char base[20] = "";
  char freqfname[40] = "";
  char decfname[20] = "";
  char decamer[10];
  int eightmer;
  int deccount;

  if (verbose == 1)
    fprintf (stderr, "\nComputing frequencies...\n");

  if (strcmp (organism, "") == 0)
    {
      fprintf (stderr, "\nUndefined organism (must add -O)\n");
      exit (0);
    }



  for (i = 0; i < strlen (organism); i++)
    organism[i] = toupper (organism[i]);


  sprintf (freqfname, "./FreqFiles/%s.8.freq", organism);

  F = fopen (freqfname, "r");

  if (F == NULL)
    {
      fprintf (stderr,
	       "\nCannot find frequency file %s\nWrong organism code, or the FreqFiles directory is not correctly positioned\n",
	       freqfname);
      exit (0);
    }

  i = 0;
  n = fscanf (F, "%*s %d\n", &tmpint);
  sum = 0;

  while (n != EOF)
    {
      sum += tmpint;
      eightmers[i] = (double) tmpint;
      i++;

      n = fscanf (F, "%*s %d\n", &tmpint);

    }

  fclose (F);

  totalcounteight = sum;

  checksum = 0;

  for (i = 0; i < 65536; i++)
    {
      eightmers[i] /= (double) totalcounteight;
      checksum += eightmers[i];
    }

  //fprintf (stderr, "\nEightmers : %g", checksum);

  sprintf (freqfname, "./FreqFiles/%s.6.freq", organism);

  F = fopen (freqfname, "r");

  if (F == NULL)
    {
      fprintf (stderr, "\nCannot find frequency file %s\n", freqfname);
      exit (0);
    }

  i = 0;
  sum = 0;
  n = fscanf (F, "%*s %d\n", &tmpint);

  while (n != EOF)
    {
      sum += tmpint;
      sixmers[i] = (double) tmpint;
      i++;

      n = fscanf (F, "%*s %d\n", &tmpint);

    }

  totalcountsix = sum;

  for (i = 0; i < 4096; i++)
    sixmers[i] /= (double) totalcountsix;

  if (winlen >= 10)
    {
      for (i = 0; i < 4; i++)
	{
	  decamer[0] = inttochar (i);
	  eightmer = 0;

	  for (i1 = 0; i1 < 4; i1++)
	    {
	      decamer[1] = inttochar (i1);
	      eightmer += (int) pow (4.0, 7.0) * i1;

	      for (i2 = 0; i2 < 4; i2++)
		{
		  decamer[2] = inttochar (i2);
		  eightmer += (int) pow (4.0, 6.0) * i2;
		  for (i3 = 0; i3 < 4; i3++)
		    {
		      decamer[3] = inttochar (i3);
		      eightmer += (int) pow (4.0, 5.0) * i3;
		      for (i4 = 0; i4 < 4; i4++)
			{
			  decamer[4] = inttochar (i4);
			  eightmer += (int) pow (4.0, 4.0) * i4;
			  for (i5 = 0; i5 < 4; i5++)
			    {
			      decamer[5] = inttochar (i5);
			      eightmer += (int) pow (4.0, 3.0) * i5;
			      for (i6 = 0; i6 < 4; i6++)
				{
				  decamer[6] = inttochar (i6);
				  eightmer += (int) pow (4.0, 2.0) * i6;
				  for (i7 = 0; i7 < 4; i7++)
				    {
				      decamer[7] = inttochar (i7);
				      eightmer += (int) pow (4.0, 1.0) * i7;

				      for (i8 = 0; i8 < 4; i8++)
					{
					  decamer[8] = inttochar (i8);
					  pre =
					    eightmer / 4 + (int) pow (4.0,
								      7.0) *
					    i;


					  backpre = 0;

					  backfreqpre = 0.0;

					  for (k = 0; k < 4; k++)
					    {
					      backpre =
						eightmer / 4 + (int) pow (4.0,
									  7.0)
						* k;
					      backfreqpre +=
						eightmers[backpre];
					    }


					  prefreq =
					    eightmers[pre] / backfreqpre;

					  eightmer +=
					    (int) pow (4.0, 0.0) * i8;




					  for (j = 0; j < 4; j++)
					    {
					      decfreq =
						prefreq * eightmers[eightmer];
					      decamer[9] = inttochar (j);

					      post =
						(eightmer -
						 ((int) pow (4.0, 7.0) *
						  i1)) * 4;
					      backpost =
						(eightmer -
						 ((int) pow (4.0, 7.0) *
						  i1)) * 4;

					      post += j;
					      backfreqpost = 0;

					      for (k = 0; k < 4; k++)
						{
						  backpost = backpost + k;
						  backfreqpost +=
						    eightmers[backpost];

						  backpost -= k;
						}

					      decfreq = decfreq *
						(eightmers[post] /
						 backfreqpost);

					      postfreq =
						(eightmers[post] /
						 backfreqpost);

					      decsum += decfreq;

					      deccount =
						eightmer * 4 +
						(i * (int) pow (4.0, 9.0)) +
						j;

					      tenmers[deccount] = decfreq;

					      //fprintf (stderr, "%d %s %f %d %f %f %.10f\n", deccount,decamer,prefreq,eightmer,eightmers[eightmer],postfreq,decfreq);

					      //fprintf (DF, "%.10f\n", decfreq);
					      //if (decfreq <= 0)
					      //{
					      //fprintf (stderr, "\n%g\n",
					      //decfreq);
					      //getchar ();
					      //}

					    }
					  eightmer -=
					    (int) pow (4.0, 0.0) * i8;
					}
				      eightmer -= (int) pow (4.0, 1.0) * i7;
				    }
				  eightmer -= (int) pow (4.0, 2.0) * i6;
				}
			      eightmer -= (int) pow (4.0, 3.0) * i5;
			    }
			  eightmer -= (int) pow (4.0, 4.0) * i4;
			}
		      eightmer -= (int) pow (4.0, 5.0) * i3;
		    }
		  eightmer -= (int) pow (4.0, 6.0) * i2;
		}

	      eightmer -= (int) pow (4.0, 7.0) * i1;
	    }
	}
      //fprintf (stderr, "%.8f\n", decsum);
    }






  return 1;


}

double
compsix (char *pat, char *pos, int err)
{
  int i, j, k;
  int jj, pp;
  int nn;
  double toret = 0;
  int *appro;
  int totapp;

  nn = strlen (pat) - 1;
  totapp = (int) pow (4.0, err);

  appro = (int *) malloc ((totapp+1) * sizeof (int));

  for (i = 0; i <= totapp; i++)
    appro[i] = 0;



  for (jj = 0; jj <= nn; jj++)
    {
      if (pos[nn - jj] != '*')
	{
	  for (i = 0; i < totapp; i++)
	    appro[i] += chartoint ((pat[nn - jj])) * (int) pow (4.0, jj);
	}
      else
	{
	  for (i = 0; i < totapp; i++)
	    appro[i] += (i % 4) * (int) pow (4.0, jj);
	}
    }

  for (i = 0; i < totapp; i++)
    toret += sixmers[appro[i]];


  return toret;
}

double
compfreqs (char *pat, char *pos, int err)
{
  int i, j, k;
  int jj, pp;
  int nn;
  double toret = 0;
  int *appro;
  int totapp;
  int contast = 0;

  nn = strlen (pat) - 1;
  totapp = (int) pow (4.0, err);

  appro = (int *) malloc ((totapp+1) * sizeof (int));

  for (i = 0; i <= totapp; i++)
    appro[i] = 0;



  for (jj = 0; jj <= nn; jj++)
    {
      if (pos[nn - jj] != '*')
	{
	  for (i = 0; i < totapp; i++)
	    appro[i] += chartoint ((pat[nn - jj])) * (int) pow (4.0, jj);
	}
      else
	{
	  for (i = 0; i < totapp; i++)
	    appro[i] += ((i/((int)pow(4.0,contast))) % 4) * (int) pow (4.0, jj);

            contast++;
	}
    }

  if (nn == 7)
    {
      for (i = 0; i < totapp; i++)
    {
	toret += eightmers[appro[i]];

      //if(err > 1)
       //fprintf(stderr, "%d\n",appro[i]);
    }
    }


  if (nn == 9)
    {
      for (i = 0; i < totapp; i++)
    {
	toret += tenmers[appro[i]];
      //if(err > 1)
       //fprintf(stderr, "%d\n",appro[i]);
    }
    }

  //if(err > 1)
  //getchar();

  free (appro);

  return toret;
}


double
exactfreqs (char *pat, int errs)
{
  int i, j, k, jj, kk;
  int patno = 0;
  double toret = 0.0;
  char ast[9];
  char longast[13];
  char empty[9] = "        ";
  char longempty[13] = "            ";


  bzero (ast, 9);
  bzero (longast, 13);



  if (strlen (pat) == 6)
    {
      for (jj = 0; jj < 6; jj++)
	patno += chartoint (tolower (pat[5 - jj])) * (int) pow (4.0, jj);

      if (errs == 0)
	{

	  return sixmers[patno];
	}

      if (errs == 1)
	{
	  for (j = 0; j < 6; j++)
	    {
	      strcpy (ast, empty);
	      ast[j] = '*';

	      toret += compsix (pat, ast, 1);
	    }
	  return (double) toret;
	}


    }




  if (strlen (pat) == 8)
    {
      for (jj = 0; jj < 8; jj++)
	patno += chartoint (tolower (pat[7 - jj])) * (int) pow (4.0, jj);

      if (errs == 0)
	{

	  return eightmers[patno];
	}

      if (errs == 1)
	{
	  for (j = 0; j < 8; j++)
	    {
	      strcpy (ast, empty);
	      ast[j] = '*';

	      toret += compfreqs (pat, ast, 1);
	    }
	  return (double) toret;
	}

      if (errs == 2)
	{
	  for (j = 0; j < 7; j++)
	    {
	      strcpy (ast, empty);
	      ast[j] = '*';

	      for (k = j + 1; k < 8; k++)

		{
		  ast[k] = '*';
		  if (k > j + 1)
		    ast[k - 1] = ' ';

		  toret += compfreqs (pat, ast, 2);
		}
	    }
	  return (double) toret;
	}
    }

  if (strlen (pat) == 10)
    {
      if (errs == 0)
	{
	  patno = 0;

	  for (jj = 0; jj < 10; jj++)
	    patno += chartoint (tolower (pat[9 - jj])) * (int) pow (4.0, jj);
	  //toret += newtwmers (pat, longempty,0);
	  toret = tenmers[patno];

	  return (double) toret;
	}
      if (errs == 1)
	{
	  for (j = 0; j < 10; j++)
	    {
	      strcpy (longast, longempty);
	      longast[j] = '*';

	      toret += compfreqs (pat, longast, 1);
	    }

	  return (double) toret;
	}
      if (errs == 2)
	{
	  for (j = 0; j < 9; j++)
	    {
	      strcpy (longast, longempty);

	      longast[j] = '*';

	      for (k = j + 1; k < 10; k++)
		{
		  longast[k] = '*';

		  if (k > j + 1)
		    longast[k - 1] = ' ';
		  toret += compfreqs (pat, longast, 2);
		}

	    }
	  return (double) toret;
	}
      if (errs == 3)
	{
	  for (j = 0; j < 8; j++)
	    {
	      strcpy (longast, longempty);

	      longast[j] = '*';

	      for (k = j + 1; k < 9; k++)
		{
		  longast[k] = '*';

		  //if (k > j + 1)
		    //longast[k - 1] = ' ';

		  for (i = k + 1; i < 10; i++)
		    {
		      longast[i] = '*';

		      //if (i > k + 1)
			//longast[i - 1] = ' ';

		      toret += compfreqs (pat, longast, 3);

                      //if(i == 9)
                        longast[i] = ' ';

		    }

                  longast[k] = ' ';
		}

	    }
	  return (double) toret;
	}
    }
  if (strlen (pat) == 12)
    {
      if (errs == 0)
	{
	  strcpy (longast, longempty);
	  toret += newtwmers (pat, longast, 0);

	  return (double) toret;
	}
      if (errs == 1)
	{
	  for (j = 0; j < 12; j++)
	    {
	      strcpy (longast, longempty);
	      longast[j] = '*';

	      toret += newtwmers (pat, longast, 1);
	    }

	  return (double) toret;
	}
      if (errs == 2)
	{
	  for (j = 0; j < 11; j++)
	    {
	      strcpy (longast, longempty);

	      longast[j] = '*';

	      for (k = j + 1; k < 12; k++)
		{
		  longast[k] = '*';
		  if (k > j + 1)
		    longast[k - 1] = ' ';
		  toret += newtwmers (pat, longast, 2);

                   longast[k] = ' ';
		}

	    }
	  return (double) toret;
	}
      if (errs == 3)
	{
	  for (j = 0; j < 10; j++)
	    {
	      strcpy (longast, longempty);

	      longast[j] = '*';

	      for (k = j + 1; k < 11; k++)
		{
		  longast[k] = '*';

		  //if (k > j + 1)
		    //longast[k - 1] = ' ';

		  for (i = k + 1; i < 12; i++)
		    {
		      longast[i] = '*';

		      //if (i > k + 1)
			//longast[i - 1] = ' ';

		      toret += newtwmers (pat, longast, 3);

                        longast[i] = ' ';

		    }
                  longast[k] = ' ';
		}

	    }
	  return (double) toret;
	}
      if (errs == 4)
	{
	  for (j = 0; j < 9; j++)
	    {
	      strcpy (longast, longempty);

	      longast[j] = '*';

	      for (k = j + 1; k < 10; k++)
		{
		  longast[k] = '*';

		  //if (k > j + 1)
		    //longast[k - 1] = ' ';

		  for (i = k + 1; i < 11; i++)
		    {
		      longast[i] = '*';

		      //if (i > k + 1)
			//longast[i - 1] = ' ';


		      for (kk = i + 1; kk < 12; kk++)
			{
			  longast[kk] = '*';

			  //if (kk > i + 1)
			    //longast[kk - 1] = ' ';

			  toret += newtwmers (pat, longast, 4);

                           longast[kk] = ' ';
			}

                       longast[i] = ' ';
		    }

                   longast[k] = ' ';
		}

	    }
	  return (double) toret;
	}
    }


  return 5859.0;
}


int
pattolong (char *pat)
{
  int i, toret;
  int len;

  len = strlen (pat) - 1;
  toret = 0;

  for (i = 0; i <= len; i++)
    toret += chartoint ((pat[len - i])) * (int) pow (4.0, i);


  return toret;
}

double
tentotwelve (char *pat)
{
  int i, j, len;
  char mid[11], post[11], pre[11], back[11];
  double startfreq = 0;
  double frontfreq = 0;
  double endfreq = 0;
  double norm = 0;
  int prelong, midlong, postlong;
  int primo, ultimo;

  bzero (post, 11);
  bzero (pre, 11);
  bzero (back, 11);
  bzero (mid, 11);

  len = strlen (pat);


  for (i = 0; i < 10; i++)
    {
      mid[i] = pat[i + 1];
      post[i] = pat[i + 2];
      pre[i] = pat[i];
    }
  strcpy (back, pre);

  prelong = pattolong (pre);
  frontfreq = tenmers[prelong];
  norm = frontfreq;
  primo = chartoint (pre[0]);
//ATTENZIONE
  for (i = 0; i < 4; i++)
    {
      back[0] = alphabet[i];

      prelong -= (primo * pow (4.0, 9.0));

      if (alphabet[i] != pre[0])
	{
	  primo = i;
	  prelong += (primo * pow (4.0, 9.0));
	  norm += tenmers[prelong];
	}
    }

  frontfreq /= norm;

  strcpy (back, post);
  postlong = pattolong (post);
  endfreq = tenmers[postlong];
  primo = chartoint (post[9]);
  norm = endfreq;

  for (i = 0; i < 4; i++)
    {
      back[9] = alphabet[i];
      postlong -= primo;

      if (alphabet[i] != post[9])
	{
	  primo = i;
	  postlong += primo;
	  norm += tenmers[postlong];
	}
    }

  endfreq /= norm;

  startfreq = tenmers[pattolong (mid)];

  return startfreq * frontfreq * endfreq;
}

double
newtwmers (char *inpat, char *pos, int err)
{
  int i, j, jj, k, len;
  int asts[4];
  char pre[9], post[9];
  char pat[13];
  double toret = 0;
  double freqs[4];
  int patmis;
  int paterr;

  for (i = 0; i < 4; i++)
    freqs[i] = 0.0;

  len = strlen (inpat);

  strcpy (pat, inpat);
  j = 0;

  for (i = 0; i < len; i++)
    {
      if (pos[i] == '*')
	asts[j++] = i;
    }

  bzero (post, 9);
  bzero (pre, 9);

  if (err == 0)
    {
      return tentotwelve (pat);
    }

  if (err == 1)
    {

      for (i = 0; i < 4; i++)
	{
	  paterr = 0;
	  pat[asts[0]] = alphabet[i];


	  if (pat[asts[0]] != inpat[asts[0]])
	    toret += tentotwelve (pat);

	}
      return toret;
    }

  if (err == 2)
    {
      for (i = 0; i < 4; i++)
	{
	  pat[asts[0]] = alphabet[i];

	  if (pat[asts[0]] != inpat[asts[0]])
	    {
	      for (j = 0; j < 4; j++)
		{
		  pat[asts[1]] = alphabet[j];


		  if (pat[asts[1]] != inpat[asts[1]])
		    toret += tentotwelve (pat);

		}
	    }
	}
      return toret;
    }

  if (err == 3)
    {
      toret += tentotwelve (inpat);

      for (i = 0; i < 4; i++)
	{
	  pat[asts[0]] = alphabet[i];

	  if (pat[asts[0]] != inpat[asts[0]])
	    {
	      for (j = 0; j < 4; j++)
		{
		  pat[asts[1]] = alphabet[j];


		  if (pat[asts[1]] != inpat[asts[1]])
		    {
		      for (k = 0; k < 4; k++)
			{
			  pat[asts[2]] = alphabet[k];

			  if (pat[asts[1]] != inpat[asts[1]])
			    toret += tentotwelve (pat);

			}
		    }
		}
	    }
	}
      return toret;
    }
  if (err == 4)
    {
      toret += tentotwelve (inpat);


      for (i = 0; i < 4; i++)
	{
	  pat[asts[0]] = alphabet[i];


	  if (pat[asts[0]] != inpat[asts[0]])
	    {
	      for (j = 0; j < 4; j++)
		{
		  pat[asts[1]] = alphabet[j];

		  if (pat[asts[1]] != inpat[asts[1]])
		    {
		      for (k = 0; k < 4; k++)
			{
			  pat[asts[2]] = alphabet[k];

			  if (pat[asts[2]] != inpat[asts[2]])
			    {
			      for (jj = 0; jj < 4; jj++)
				{
				  pat[asts[3]] = alphabet[jj];

				  if (pat[asts[3]] != inpat[asts[3]])
				    toret += tentotwelve (pat);
				}
			    }
			}
		    }
		}
	    }
	}
      return toret;
    }





}

char *
reversepat (char *pat)
{

  int len;
  int i, j, k;
  char *rev;

  len = strlen (pat) + 1;

  rev = (char *) malloc (len + 1);

  bzero (rev, len + 1);

  for (i = 0; i <= len - 2; i++)
    {
      rev[i] = revcomp (pat[len - 2 - i]);
    }

  return rev;

}

char
inttochar (int num)
{
  if (num == 0)
    return 'A';
  if (num == 1)
    return 'C';
  if (num == 2)
    return 'G';
  if (num == 3)
    return 'T';



}

double
exactfreqswithcheck (char *pat, int errs, char *check)
{
  int i, j, k, jj, kk;
  char *partial;
  int patno = 0;
  double toret = 0.0;
  char ast[9];
  char longast[13];
  char empty[9] = "        ";
  char longempty[13] = "            ";


  bzero (ast, 9);
  bzero (longast, 13);

  partial = (char *) malloc (13 * sizeof (char));

  bzero (partial, 13);



  if (strlen (pat) == 6)
    {
      for (jj = 0; jj < 6; jj++)
	patno += chartoint (tolower (pat[5 - jj])) * (int) pow (4.0, jj);

      if (errs == 0)
	{
	  if (strcmp (pat, check) != 0)
	    return sixmers[patno];
	  else
	    return 0.0;
	}

      if (errs == 1)
	{
	  for (j = 0; j < 6; j++)
	    {
	      strcpy (ast, empty);
	      ast[j] = '*';

	      toret += compsix (pat, ast, 1);
	    }
	  return (double) toret;
	}


    }




  if (strlen (pat) == 8)
    {
      for (jj = 0; jj < 8; jj++)
	patno += chartoint (tolower (pat[7 - jj])) * (int) pow (4.0, jj);

      if (errs == 0)
	{

	  if (strcmp (pat, check) != 0)
	    return eightmers[patno];
	  else
	    return 0.0;
	}

      if (errs == 1)
	{
	  for (j = 0; j < 8; j++)
	    {
	      strcpy (ast, empty);
	      ast[j] = '*';

	      toret += compfreqswithcheck (pat, ast, 1, check);
	    }
	  return (double) toret;
	}

      if (errs == 2)
	{
	  for (j = 0; j < 7; j++)
	    {
	      strcpy (ast, empty);
	      ast[j] = '*';

	      for (k = j + 1; k < 8; k++)

		{
		  ast[k] = '*';

		  toret += compfreqswithcheck (pat, ast, 2, check);
		  ast[k] = ' ';
		}
	    }
	  return (double) toret;
	}
    }

  if (strlen (pat) == 10)
    {
      if (errs == 0)
	{
	  patno = 0;

	  for (jj = 0; jj < 10; jj++)
	    patno += chartoint (tolower (pat[9 - jj])) * (int) pow (4.0, jj);
	  //toret += newtwmers (pat, longempty,0);
	  toret = tenmers[patno];

	  return (double) toret;
	}
      if (errs == 1)
	{
	  for (j = 0; j < 10; j++)
	    {
	      strcpy (longast, longempty);
	      longast[j] = '*';

	      toret += compfreqswithcheck (pat, longast, 1, check);
	    }

	  return (double) toret;
	}
      if (errs == 2)
	{
	  for (j = 0; j < 9; j++)
	    {
	      strcpy (longast, longempty);

	      longast[j] = '*';

	      for (k = j + 1; k < 10; k++)
		{
		  longast[k] = '*';

		  toret += compfreqswithcheck (pat, longast, 2, check);
		  longast[k] = ' ';
		}

	    }
	  return (double) toret;
	}
      if (errs == 3)
	{
	  for (j = 0; j < 8; j++)
	    {
	      strcpy (longast, longempty);

	      longast[j] = '*';

	      for (k = j + 1; k < 9; k++)
		{
		  longast[k] = '*';


		  for (i = k + 1; i < 10; i++)
		    {
		      longast[i] = '*';


		      toret += compfreqswithcheck (pat, longast, 3, check);

                      longast[i] = ' ';

		    }
		  longast[k] = ' ';
		}

	    }
	  return (double) toret;
	}
    }
  if (strlen (pat) == 12)
    {
      if (errs == 0)
	{
	  strcpy (longast, longempty);
	  if (hamming (pat, check, 0) == 1)
	    toret += newtwmers (pat, longast, 0);
	  else
	    toret = 0;

	  return (double) toret;
	}
      if (errs == 1)
	{
	  for (j = 0; j < 12; j++)
	    {
	      strcpy (longast, longempty);
	      longast[j] = '*';

	      toret += newtwmerswithcheck (pat, longast, 1, check);
	    }

	  return (double) toret;
	}
      if (errs == 2)
	{
	  for (j = 0; j < 11; j++)
	    {
	      strcpy (longast, longempty);

	      longast[j] = '*';

	      for (k = j + 1; k < 12; k++)
		{
		  longast[k] = '*';

		  toret += newtwmerswithcheck (pat, longast, 2, check);

                  longast[k] = ' ';
		}

	    }
	  return (double) toret;
	}
      if (errs == 3)
	{
	  for (j = 0; j < 10; j++)
	    {
	      strcpy (longast, longempty);

	      longast[j] = '*';

	      for (k = j + 1; k < 11; k++)
		{
		  longast[k] = '*';

		  for (i = k + 1; i < 12; i++)
		    {
		      longast[i] = '*';

		      toret += newtwmerswithcheck (pat, longast, 3, check);

		      longast[i] = ' ';

		    }
		  longast[k] = ' ';
		}

	    }
	  return (double) toret;
	}
      if (errs == 4)
	{
	  for (j = 0; j < 9; j++)
	    {
	      strcpy (longast, longempty);

	      longast[j] = '*';

	      for (k = j + 1; k < 10; k++)
		{
		  longast[k] = '*';


		  for (i = k + 1; i < 11; i++)
		    {
		      longast[i] = '*';



		      for (kk = i + 1; kk < 12; kk++)
			{
			  longast[kk] = '*';


			  toret +=
			    newtwmerswithcheck (pat, longast, 4, check);

			  longast[kk] = ' ';

			}
		      longast[i] = ' ';
		    }

		  longast[k] = ' ';
		}

	    }
	  return (double) toret;
	}
    }


  return 5859.0;
}

double
compfreqswithcheck (char *pat, char *pos, int err, char *check)
{
  int i, j, k;
  int jj, pp;
  int nn;
  double toret = 0;
  int *appro;
  int totapp;
  char **partial;
  int contast = 0;

  nn = strlen (pat) - 1;
  totapp = (int) pow (4.0, err);

  partial = (char **) malloc ((totapp + 1) * sizeof (char *));

  for (i = 0; i <= totapp; i++)
    partial[i] = (char *) malloc ((nn + 2) * sizeof (char));

  for (i = 0; i <= totapp; i++)
    bzero (partial[i], nn + 2);

  appro = (int *) malloc ((totapp+1) * sizeof (int));

  for (i = 0; i <= totapp; i++)
    appro[i] = 0;

  for (i = 0; i <= totapp; i++)
    strcpy (partial[i], pat);


 for (jj = 0; jj <= nn; jj++)
    {
      if (pos[nn - jj] != '*')
        {
          for (i = 0; i < totapp; i++)
            appro[i] += chartoint ((pat[nn - jj])) * (int) pow (4.0, jj);
        }
      else
        {
          for (i = 0; i < totapp; i++)
        {
            appro[i] += ((i/((int)pow(4.0,contast))) % 4) * (int) pow (4.0, jj);
            partial[i][jj] = inttochar (((i/((int)pow(4.0,contast))) % 4));
        }

            contast++;
        }
    }

  if (nn == 7)
    {
      for (i = 0; i < totapp; i++)
	{
	  if ((hamming (partial[i], check, err) != 0))
	    toret += eightmers[appro[i]];

          //fprintf(stderr, "%s\n",partial[i]);
	}
      //getchar();
    }
  if (nn == 9)
    {
      for (i = 0; i < totapp; i++)
	{
	  if ((hamming (partial[i], check, err) != 0))
	    toret += tenmers[appro[i]];

          //fprintf(stderr, "%s\n",partial[i]);
	}
      //getchar();
    }


  free (appro);

  for (i = 0; i <= totapp; i++)
    free (partial[i]);

  free(partial);
  return toret;
}

int
hamming (char *pat1, char *pat2, int thre)
{
  int i, len;
  int err = 0;

  len = strlen (pat1);

  for (i = 0; i < len; i++)
    {
      if (pat1[i] != pat2[i])
	err++;

      if (err > thre)
	break;

    }

  if (err > thre)
    return 1;
  else
    return 0;

}

double
newtwmerswithcheck (char *inpat, char *pos, int err, char *check)
{
  int i, j, jj, k, len;
  int asts[4];
  char pre[9], post[9];
  char pat[13];
  char **partial;
  double toret = 0;

  len = strlen (inpat);


  strcpy (pat, inpat);
  j = 0;

  for (i = 0; i < len; i++)
    {
      if (pos[i] == '*')
	asts[j++] = i;
    }

  bzero (post, 9);
  bzero (pre, 9);

  if (err == 0)
    {
      if ((hamming (pat, check, err) == 1))
	return tentotwelve (pat);
      else
	return 0;
    }

  if (err == 1)
    {

      for (i = 0; i < 4; i++)
	{
	  pat[asts[0]] = alphabet[i];

	  if (pat[asts[0]] != inpat[asts[0]])
	    {
	      if ((hamming (pat, check, err) == 1))
		toret += tentotwelve (pat);
	    }
	}
      return toret;
    }

  if (err == 2)
    {
      toret += tentotwelve (inpat);
      for (i = 0; i < 4; i++)
	{
	  pat[asts[0]] = alphabet[i];

	  if (pat[asts[0]] != inpat[asts[0]])
	    {
	      for (j = 0; j < 4; j++)
		{
		  pat[asts[1]] = alphabet[j];

		  if (pat[asts[1]] != inpat[asts[1]])
		    {
		      if ((hamming (pat, check, err) == 1)
			  && (strcmp (pat, inpat) != 0))
			toret += tentotwelve (pat);

		    }
		}
	    }
	}
      return toret;
    }

  if (err == 3)
    {
      toret += tentotwelve (inpat);

      for (i = 0; i < 4; i++)
	{
	  pat[asts[0]] = alphabet[i];

	  if (pat[asts[0]] != inpat[asts[0]])
	    {
	      for (j = 0; j < 4; j++)
		{
		  pat[asts[1]] = alphabet[j];


		  if (pat[asts[1]] != inpat[asts[1]])
		    {
		      for (k = 0; k < 4; k++)
			{
			  pat[asts[2]] = alphabet[k];

			  if (pat[asts[2]] != inpat[asts[2]])
			    {
			      if ((hamming (pat, check, err) == 1)
				  && (strcmp (pat, inpat) != 0))
				toret += tentotwelve (pat);

			    }
			}
		    }
		}
	    }
	}
      return toret;
    }
  if (err == 4)
    {
      toret += tentotwelve (inpat);
      for (i = 0; i < 4; i++)
	{
	  pat[asts[0]] = alphabet[i];

	  if (pat[asts[0]] != inpat[asts[0]])
	    {
	      for (j = 0; j < 4; j++)
		{
		  pat[asts[1]] = alphabet[j];


		  if (pat[asts[1]] != inpat[asts[1]])
		    {
		      for (k = 0; k < 4; k++)
			{
			  pat[asts[2]] = alphabet[k];

			  if (pat[asts[2]] != inpat[asts[2]])
			    {
			      for (jj = 0; jj < 4; jj++)
				{
				  pat[asts[3]] = alphabet[jj];

				  if (pat[asts[3]] != inpat[asts[3]])
				    {
				      if ((hamming (pat, check, err) == 1)
					  && (strcmp (pat, inpat) != 0))
					toret += tentotwelve (pat);
				    }
				}
			    }
			}
		    }
		}
	    }
	}
      return toret;
    }
}

int
isvalid (char car)
{
  int i, j, ret = 1;

  if ((car != 'A') && (car != 'C') && (car != 'G') && (car != 'T'))
    ret = 0;

  return ret;
}

int
checkmotifanderror (int motlen, int errs)
{
  switch (motlen)
    {
    case 6:
      if (errs > 1)
	return 0;
      else
	return 1;
      break;
    case 8:
      if (errs > 3)
	return 0;
      else
	return 1;
      break;
    case 10:
      if (errs > 4)
	return 0;
      else
	return 1;
      break;
    case 12:
      if (errs > 4)
	return 0;
      else
	return 1;
      break;
    default:
      return 0;
      break;
    }
}
