/* lib.d file opt.c */
/* Finite-state automaton optimizer.

This file contains a minimization procedure for finite state automata.
Its input is an automaton with various categories of distinguished
states (in standard examples, there would be two categories, the
accepting and non-accepting states).  Its output is another finite-state
automaton with no more than the original number of states that has
exactly the same behavior, in that the new automaton will always be
in the same type of state as the original, after reading the same
string.
*/
# include <stdio.h>
# include "defs.h"
# include "list.h"
# include "word.h"
# include "lg.h"
# define NULL 0
#define DONE ((block *) -1) /* used for |wait_l| in blocks not on the list */

/* Implementation.

We use a procedure based on the ``partitioning'' algorithm found in Aho,
Hopcroft, and Ullman's ``The Design and Analysis of Computer Algorithms''.
The algorithm requires running time proportional to |a@,n@,log(n)| where
|a| is the number of symbols in the alphabet and |n| is the number of
initial states.  We need an amount of intermediate storage roughly
twice as large as the original graph.

Here is the algorithm, in a nutshell: start off with the optimistic assumption
that each category can be a single state.  We will call the new, combined,
states |block|s.  Put each initial block on a list |waiting|.  Loop while
|waiting| is non-empty, removing each block in turn and processing it once for
each symbol in the alphabet.  The processing consists of finding the inverse of
the block under the symbol -- finding all states that transfer to any state in
this block on receiving this input symbol.  If a state in the inverse belongs to
some block (including this block), but other states in that block aren't in the
inverse,
the block must obviously be split.  We then split it in two parts -- those
states that were part of the inverse and those that weren't.  If the original
block was on |waiting|, we put both halves back on.  If the original wasn't,
we only put the smaller chunk onto |waiting|.

During the partitioning, we don't use the original FSA states (|vertex|s)
at all.  Instead we use our own blocks (which we allocate on entrance
and release before we return).  Corresponding to the original states are
|FSA_OPTstruct|s (for |member| states),
which are members of new, combined, states
called |block|s.
 Initially, there is one |FSA_OPTstruct| for each original state,
and one |block| for each category.  After partitioning, there may be more
blocks, one for each distinct final state.

An |FSA_OPTstruct| contains pointers to its corresponding |vertex| and
the |block| that owns it.  It also contains pointers for a doubly-linked
member list (linking together all members of a block) and a singly-linked
splitting list (linking together members that are part of the inverse
of some other block).  Finally, it contains an array of linked lists
giving the inverse transition function for each symbol of the alphabet.

A |block| contains a |count| of its members, the list headers for
its full list of member |FSA_OPTstruct|s and the subset to be split off,
and links on three other lists of blocks: a list of all blocks (|blk_h|),
a list of those whose inverses must be checked (|waiting|), and a list
of those that must now be split (|jlist|).


 The main procedure is |partition()|.  It uses one subroutine |split|, to
split a block.

The list |waiting| has the same meaning as in the Aho, Hopcroft, and
Ullman algorithm.  Their list ``j'' we have called |jlist|, and their
list ``INVERSE'' we never explicitly form.  Instead we form the subsets
of the inverse belonging to each current block, which is the block's
|split_h| list.

Note that during the processing, the block we're processing hasn't been
removed from |waiting|.  In fact |waiting| is the only pointer we have
to that block.

We use a special marker |DONE| (which must be distinct from |NULL|) as the
link field for any block that's not currently on the |waiting| list.
*/


static block *blk_h; /* the list of all blocks */
int nsyms;
vindex failure=UNDEFINED;
static FSA_OPTstruct * FSA_OPTstruct_create PARMS ((VOID));
static void FSA_OPTstruct_kill PARMS ((vindex));
static void FSA_OPTstruct_print PARMS ((vindex));
static void partition PARMS ((VOID));
/* Split subroutine.  Called with the block to split, it returns a pointer
	to a newly-allocated block containing all the states on the original
block's |split_h| list.  It removes these states from the original block's
	|member_h| list.  The count is computed for the new block, the 
	member list is set up, and the block is added to the main block list.
	*/
static block *split (b) block *b; {
   block *b2; FSA_OPTstruct *s, **s_b;
   b2 = vzalloc1(block); /* make a new block */
   assert (b2->count==0 && b2->split_h==NULL);
   b2->blk_l = blk_h; /* add new block to front of list */
   blk_h = b2;
   s_b = &(b2->member_h);
   for (s = b->split_h; (*s_b = s) != NULL; s = s->split_l) {
      s->blk = b2; /* point all members to new block */
      b2->count++; /* count the members */
      *(s->member_b) = s->member_l; /* unlink from original */
      if (s->member_l != NULL) s->member_l->member_b = s->member_b;
      s->member_b = s_b; /* doubly link onto new list */
      s_b = &(s->member_l);
   }
   return b2;
}
	/* |newstate| subroutine, used by |fsa_opt| to allocate a new state
	and possibly a new block header (if this is for a category we haven't
	yet seen).  We can tell if a category has been seen before by looking
	at the |ctbl| array, passed as an argument, which records the block
	header for each category (initially NULL).  The array may be released
	immediately after the last call to |newstate|.
	*/
static void newstate (ctbl, v, cat) 
	block *ctbl[]; 
	vertex *v; 
	int cat; 
{
	block *b; 
	FSA_OPTstruct * s;
	s = (FSA_OPTstruct *)(*FSA_OPTstruct_create)();
	v->mp = (dp)s;
	assert (s->inverse[0].h==NULL && s->member_l==NULL);
   s->orig = v;
   b = ctbl[cat];
   if (b == NULL) { /* none in this category -- start a new block */
      b = vzalloc1(block);
      assert (b->count==0 && b->split_h==NULL);
      b->wait_l = b->blk_l = blk_h;
      ctbl[cat] = blk_h = b;
   } else { /* add to the existing block */
      (s->member_l = b->member_h)->member_b = &(s->member_l);
   }
   s->blk = b;
   s->member_b = &(b->member_h);
   b->count++;
   b->member_h = s;
}

	/* Initialization subroutine, called by |fsa_opt| immediately before
	calling |partition|.  It sets up the |inverse| arrays for each state,
	to be used by |partition|.
	*/
static void part_init PARMS((VOID));

mp_fntab FSA_OPT_fntab =
{
	0,
	(V2DP)FSA_OPTstruct_create,
	FSA_OPTstruct_kill,
	FSA_OPTstruct_print,
	0,
	0
};

static void
FSA_OPTstruct_kill(v)
	vindex v;
{
	FSA_OPTstruct * As;
	assert(v->mp);
	As = (FSA_OPTstruct *)(v->mp);
	assert((v->fntab) == FSA_OPT);
	Free_dp((dp)(As->inverse));
	As->inverse=0;	
	Free_dp((dp)(v->mp));
	v->mp=0;
}

static FSA_OPTstruct *
FSA_OPTstruct_create()
{
	FSA_OPTstruct * As;
	As = vzalloc1(FSA_OPTstruct);
	As->inverse = vzalloc2(inv_array,nsyms+1);
	return As;
}

static void
FSA_OPTstruct_print(v)
	vindex v;
{
}

void fsa_opt (ncats, fsa) int ncats; lg * fsa; {
	list states1;
	list states2;
	list states3;
	int s;
	vindex v;
	vindex t;
	vindex newfailure;
	int count=0;
	bfs_traverser tfsa;
	block **ctbl; /* an allocated array of blocks, one for each category */
	block *b, *nxtb; 
	auto void (*mp_kill_fnp) PARMS ((vindex));

	nsyms=get_degree(fsa);

	assert(get_type(fsa)==BASIC_FSA);
	list_init(&states1,VINDEX,FIFO);
	list_init(&states2,VINDEX,FIFO);
	list_init(&states3,VINDEX,FIFO);
	ctbl = vzalloc2(block *, ncats); /* allocate the array */
	blk_h = NULL;
	if ((fsa->fntab)!=0)
		mp_kill_fnp = (fsa)->fntab->mp_kill;
	else
		mp_kill_fnp=0;
	
	failure=lg_vertex_create(fsa);
	set_category(failure,NONACCEPTSTATE);
	list_insert(&states1,(dp)&failure);
	bfs_init(&tfsa,fsa);
	while (bfs_next(&tfsa,&v))  
		list_insert(&states1,(dp)&v);
	bfs_clear(&tfsa);

	while (list_delget_first(&states1,(dp)&v)){
		if (v->mp) 
			(*mp_kill_fnp)(v);
		v->fntab = FSA_OPT;
      	assert(get_category(v)< ncats);
      	newstate(ctbl, v, get_category(v));
			/* set up the block and state lists */
		list_insert(&states2,(dp)&v);
	}	
	fsa->fntab = FSA_OPT;
   Free_dp((dp)ctbl); ctbl=0; /* we're now done with the block table */
   part_init (); /* initialize -- set up inverses */
   partition (); /* partition -- get minimum non-redundant blocks */
	newfailure= ((FSA_OPTstruct *)(failure->mp))->blk->member_h->orig;
	while (list_delget_first(&states2,(dp)&v)) {
      for (s=1;s<=nsyms;s++) 
		if (get(v,s)!=UNDEFINED){
if ((t=((FSA_OPTstruct *)((v->adj[s])->mp))->blk->member_h->orig)==newfailure)
				v->adj[s]=UNDEFINED;
			else
				v->adj[s]=t;
	}
      v->adj[0] = ((FSA_OPTstruct *)(v->mp))->blk->member_h->orig;
		list_insert(&states3,(dp)&v);
   }
	while (list_delget_first(&states3,(dp)&v))
		if (v->adj[0] != v) {
			if (v==basept(fsa))
				lg_reset_basepoint(fsa,v->adj[0]);
			if (v->adj[0]==newfailure)
				count++;
			lg_vertex_kill(fsa,v);
		}
	if (newfailure!=basept(fsa))
		lg_vertex_kill(fsa,newfailure);
	list_clear(&states1);
	list_clear(&states2);
	list_clear(&states3);
   for (b = blk_h; b != NULL; b = nxtb) {
      nxtb = b->blk_l;
      Free_dp((dp)b); b=0;
   }
	set_type(fsa,OPT_FSA);
}

static void partition ()
{
   block *waiting; /* list of waiting blocks, linked through |wait_l| */
   block *jlist; /* list of blocks that may be split, linked through |j_l| */
   int sym; /* the current symbol */
   block *b;

   waiting = blk_h;

   while (waiting != NULL) { /* for each waiting block */
      for (sym=0; sym<nsyms; sym++) { /* and for each symbol */
	  	jlist = NULL;

/* Find the inverses.  The current symbol is |sym|, the block to split by
is |waiting|, and the result will be a list of blocks in |jlist|.  Each
block in |jlist| will have its |split_h| field set to a list of its states
in the inverse.  We assume that |split_h| is |NULL| for each block when we
are entered, and use that as the flag that this block isn't on |jlist|.  We
also decrement the block's |count| by the number of its states that are
in the inverse (i.e. to the correct value after it's split, except that
a |count| of zero means it needn't be split).
*/
		{  block *b; FSA_OPTstruct *waits, *invs;
   			for (waits = waiting->member_h;
				waits != NULL; waits = waits->member_l) {
      			for (invs = waits->inverse[sym].h;
				invs != 0; invs = invs->inverse[sym].l) {
	 				b = invs->blk; 
			/* find block for each element in the inverse */
	 				if (b->split_h==NULL) {
				/* initialize if it's a new block */
		   	 			b->j_l = jlist;
		   	 			jlist = b;
				/* link it onto the head of the |jlist| */
	 				}
	 				invs->split_l = b->split_h; 
			/* link into inverses of waiting in b */
					b->split_h = invs;
	 				b->count--;
					/* remove from the block count */
      			}
   			}
		}
	
/* Loop through |jlist|.  We now proceed to split any blocks on |jlist|
that have weren't entirely in the inverse of |waiting|.  We can tell
this by simply looking at the block's |count|, which was adjusted when
we formed |jlist| to exclude the states that transferred to |waiting|
for the symbol |sym| -- if |count| is now zero, all states transferred,
and the block can be left alone (except for fixing |count|).  The actual
splitting is done by the subroutine |split|.

The only subtlety of this code is the possibility that the current block
(|waiting|) is one of the ones that must be split.  In that case, we need
to process {\sl both} halves for the remaining symbols, and to re-process the
smaller half only for the symbols we've already looped through.  In other
words, we have to put the smaller half back onto |waiting|, and finish off
the current |sym| loop with the larger chunk.

On entry, |jlist| is a list (linked through |j_l|) of all blocks
containing any state in the inverse of |waiting|, and its |count|
is the number of its states that aren't in the inverse.  As with
|waiting|, we use |jlist| to refer to the current element.
*/


		{  block *b2; FSA_OPTstruct *s;
   			while (jlist != NULL) {
				/* now check all blocks in inverse */
   		 	  	assert (jlist->split_h != NULL);
   		   		if (jlist->count == 0) {
				/* this one doesn't need to be split */
		 			for (s = jlist->member_h;
						s != NULL; s = s->member_l) 
						jlist->count++;
		 			assert (jlist->count > 0);
   		   		} 
				else {
		 			b2 = split(jlist); 
				/* make a new block by splitting the old */
		 			if (jlist->wait_l == DONE) { 
				/* this element isn't waiting */
		    			if (b2->count > jlist->count) {
		       				b2->wait_l = DONE; 
					/* larger chunk is off waiting list */
		       				b2 = jlist;
				/* get |b2| = smaller chunk */
		    			} 
		/* whatever |b2| points to now will be linked into |waiting| */
					} 
					else if (jlist == waiting) { 
				/* this is the current element */
		    			if (b2->count > jlist->count) { 
					/* continue with larger half */
	       					b2->wait_l = jlist->wait_l;
					/* smaller will be done in full */
	       					waiting = b2;
	       					b2 = jlist;
	    				}
		/* if original is larger, just link |b2| on later */
	 				} 
		/* if original was on |waiting|, link in the other hunk too */
	 				b2->wait_l = waiting->wait_l; 
				/* link |b2| at head of list */
	 				waiting->wait_l = b2;
     			}
      			jlist->split_h = NULL;
				/* keep |split_h| cleared except on |jlist| */
      			jlist = jlist->j_l; /* link to next block */
   			}
		}
	
  	  }
  	  b = waiting;
      waiting = waiting->wait_l; /* remove the block we've just processed */
      b->wait_l = DONE; /* mark it as off the list */
	}
}



static void 
part_init ()
{
   int sym; block *b; FSA_OPTstruct *s, *t; vertex *v;
   for (b = blk_h; b != NULL; b = b->blk_l) {
      for (s = b->member_h; s != NULL; s = s->member_l) {
	 v = s->orig;
	 for (sym=0; sym<nsyms; sym++) {
		if (get(v,sym+1)==UNDEFINED)
			t = (FSA_OPTstruct *)(failure->mp);
		else
	    	t = (FSA_OPTstruct *)(get(v,sym+1)->mp);
	    s->inverse[sym].l = t->inverse[sym].h;
	    t->inverse[sym].h = s;
	 }
      }
   }
}



