#ifndef RXRUNH
#define RXRUNH

/*	Copyright (C) 1992 Free Software Foundation, Inc.

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this software; see the file COPYING.  If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
/*  t. lord	Sun Oct  4 23:05:05 1992	*/

#include "rx.h"


/* The file `rx.h' defined constructors for a type of NFA.  This file
 * describes the interface by which that NFA can be lazilly converted
 * to an equivelent, but more efficient NFA.  One the basis of this conversion,
 * one can write very fast recognizers for regular languages.
 *
 * The new NFA states are represented by `struct superstate', defined below.
 * Every superstate contains a transition table.  You might expect the
 * entries in that transition table to point to other superstates, but
 * the actual situation is somewhat more complicated.
 *
 * The complications arise for three reasons.  First, the superstate
 * reached by a transition may not yet have been created.  Second,
 * there may or may not be side effects associated with the
 * transition.  Third, for a given transition, there may be more than
 * one possible set of side effects, and consequently more than one
 * possible destination superstate.
 *
 * A matcher must be prepared to react to all of these conditions.
 * Therefore, instead of recording transitions as pointers to
 * superstates, we record two pieces of information.  The first is an
 * instruction, that tells the matcher which of the conditions
 * described above applies to the transition being considered.  The
 * second is a void * whose interpretation depends on the instruction;
 * in the simplest case, it will be a pointer to a destination
 * superstate.
 *
 * To help speed up matchers, there are two instruction formats.  The
 * only difference is whether the data or data_2 field of the
 * instruction frame is used.  Only a next_char instruction uses the
 * data field (to point to the next superstate).  All other
 * instructions set data to 0, and use data_2 to hold their argument.
 */


/* These are the `names' of the instructions that can occur in the
 * new NFA.  The value stored in an instruction frame for a specific name
 * is a parameter under control of the caller to this library.
 */
 
enum instruction
{
  /* 
   * BACKTRACK_POINT is invoked when a transition results in more
   * than one possible future.
   *
   * There is one occurence of this instruction per transition_class.
   * (A transition class is roughly the set of possible futures
   *  implied by a superstate transition.)
   */
  backtrack_point = 0,		/* data is (struct transition_class *) */

  /* 
   * DO_SIDE_EFFECTS evaluates the side effects of a set of side_effect edges.
   * There is one occurence of this instruction per discernable_future.
   */
  do_side_effects = backtrack_point + 1,
  /* data is (struct discernable_future *) */

  /* 
   * CACHE_MISS instructions are stored in discernable_futures whose
   * destination superstate has been reclaimed (or was never built).
   * It recomputes the destination superstate.
   */
  cache_miss = do_side_effects + 1, /* data is (struct discernable_future *) */

  /* 
   * NEXT_CHAR is called to consume the next character and take the
   * corresponding transition.  
   */
  next_char = cache_miss + 1,	/* data is (struct superstate *) */

  /* 
   * BACKTRACK indicates that a transition fails.
   */
  backtrack = next_char + 1,	/* no data */

  /* 
   * ERROR_INX is stored only in places that should never be executed.
   */
  error_inx = backtrack + 1,			/* Not supposed to occur. */

  num_instructions = error_inx + 1,
};

/* An id_instruction_table holds the values stored in instruction
 * frames.  The table is indexed by the enums declared above.
 */
extern void * rx_id_instruction_table[num_instructions];

/*  If the instruction is `next_char' then data is valid.  Otherwise it's 0
 *  and data_2 is valid.
 */
struct instruction_frame 
{
  void * inx;
  void * data;
  void * data_2;
};


/* A superstate corresponds to a set of nfa states.  Those sets are
 * represented by STRUCT SUPERSTATE_CONTENTS.  The constructors
 * guarantee that only one (shared) structure is created for a given set.
 */
struct superstate_contents
{
  int refs;			/* Because these are shared. */
  struct nfa_state * car;
  struct superstate_contents * cdr; /* cdr == parent. */
  struct superstate * superstate; /* If the corresponding superstate exists. */
  struct superstate_contents * kids[TAIL_ARRAY];  /* kids[x].cdr == this */
};

/* 0 is not the null list for superstate_contents.  Instead, a list with NULL
 * car and cdr is used.  This constructs the null list for a given rx.
 */
extern struct superstate_contents * nil_superstate_contents P_((struct rx *));

/* The lists are sorted sets (no element occurs more than once). */
extern struct superstate_contents * superstate_enjoin
  P_((struct rx *, struct nfa_state * car, struct superstate_contents*));

/* The state sets in the compactified nfa are of a different type.
 * SUPERSTATE_ECLOSURE_UNION combines compactified sets into superstate sets.
 */
extern struct superstate_contents * superstate_eclosure_union
  P_((struct rx *, struct superstate_contents *, struct nfa_state_set *));

/* These constructors don't protect their results, but do protect the cdr of 
 * their results.  Releases are propogated to cdrs when the head of the list
 * drops to 0 references. 
 */
#define protect_superstate_contents(RX,CON) (++(CON)->refs)
extern void release_superstate_contents P_((struct rx *,
					    struct superstate_contents *));

/* In a superstate, every character is bound to a transition class.
 * The transition class determines the set of possible futures for a
 * machine in that state, given that character.  Each possible future
 * has its own set of side effects and its own next-superstate.
 */
struct transition_class
{
  struct transition_class *next;
  struct instruction_frame backtrack_frame;
  ut_Bitset cset;
  struct discernable_future *options;
};

/* A superstate is a set of nfa states (SUPERSTATE_CONTENTS) along
 * with a transition table.  Superstates are built on demand and reclaimed
 * without warning.  To protect a superstate, use LOCK_SUPERSTATE.
 */
struct superstate
{
  int locks;
  struct superstate * next_recyclable;
  struct superstate * prev_recyclable;
  struct discernable_future * transition_refs;
  struct superstate_contents * contents;
  struct transition_class * edges;
  struct instruction_frame transitions[TAIL_ARRAY];
};

struct discernable_future
{
  struct discernable_future * next_same_transition_class;
  struct discernable_future * next_same_dest;
  struct discernable_future * prev_same_dest;
  struct superstate * present;
  struct superstate * future;
  struct transition_class * edge;
  struct instruction_frame future_frame;
  struct instruction_frame side_effects_frame;
  struct rx_side_effect_list * effects;
};

/* This constructs a superstate from the set of nfa states it
 * contains.  Calling this function may cause other superstates in the
 * same machine to be recyled or tentatively freed.  States recently
 * returned by this function are not susceptable to this lossage.
 * (You can count on MIN_LIVE_SUPERSTATES).
 */
extern struct superstate * superstate P_((struct rx *,
					  struct superstate_contents *));

#define MIN_LIVE_SUPERSTATES 10

/* A superstate can be preserved indefinately by locking it: */
extern void lock_superstate P_((struct rx *, struct superstate *));
extern void unlock_superstate P_((struct rx *, struct superstate *));

extern struct instruction_frame * handle_cache_miss P_((struct rx *,
							struct superstate *,
							unsigned char,
							void *));
#endif
