#include <stdio.h>
#include "rxregex.h"
#include "rxparse.h"

#define CSET_SIZE 256

/* Sets some map entries to 1.  Returns 1 if rexp might match the emtpy string.
 * The fastmap is easily computed from the expression tree.
 */
static int
build_fastmap (map, rexp, rxb)
     char * map;
     struct rexp_node * rexp;
     struct rx_buf * rxb;
{
  int x;
  int is_empty;
  switch (rexp->type)
    {
    case r_cset:
      for (x = 0; x < CSET_SIZE; ++x)
	if (UT_bitset_member (rexp->params.cset, x))
	  map[x] = 1;
      return 0;
    case r_concat:
      return (build_fastmap (map, rexp->params.pair.left, rxb)
	      && build_fastmap (map, rexp->params.pair.right, rxb));
    case r_alternate:
      return (build_fastmap (map, rexp->params.pair.left, rxb)
	      || build_fastmap (map, rexp->params.pair.right, rxb));
    case r_opt:
    case r_star:
      build_fastmap (map, rexp->params.pair.left, rxb);
      return 1;
    case r_side_effect:
      return 1;
    }
}

/* Sets the current default syntax to SYNTAX, and return the old syntax.
   You can also simply assign to the `re_syntax_options' variable.  */
rx_syntax rx_syntax_options = 0;

reg_syntax_t
re_set_syntax (syntax)
     reg_syntax_t syntax;
{
  rx_syntax rs = rx_syntax_options;
  rx_syntax_options = syntax;
  return rs;
}

const char *
rx_compile_pattern (pat, length, rxb)
     const char * pat;
     int length;
     struct rx_buf * rxb;
{
  static int rx_comp_calls = 0;
  int cset_size = CSET_SIZE;
  struct rexp_node * rexp;
  struct nfa_state *start = 0;
  struct nfa_state *end = 0;

  rxb->rx.local_cset_size = cset_size;
  rxb->rx.nodec = rxb->rx.epsnodec = 0;
  rxb->rx.se_memo = 0;
  rxb->rx.nfa_set_memo = 0;
  rxb->rx.nil_set = 0;
  rxb->rx.min_free_superstate = rxb->rx.max_free_superstate = 0;
  rxb->rx.superstate_count = 0;
  rxb->rx.nfa_states = 0;
  rxb->rx.instruction_table = rx_id_instruction_table;
  rxb->syntax = rx_syntax_options;
  rxb->error = 0;
  rxb->fastmap_accurate = 0;
  rexp = parse_grexp (rxb, &pat, strlen(pat) - 1, 0);
  if (!rexp)
    return rx_msg[rxb->error];
  if (rxb->fastmap)
    {
      rxb->can_match_empty =
	build_fastmap (rxb->fastmap, rexp, rxb);
      rxb->fastmap_accurate = 1;
    }
  /* Below the expression is prefixed by `a' and postfixed by a `regex_win'
   * side effect (which records a successful match) */
  {
    ut_Bitset cs = cset (&rxb->rx);
    struct rexp_node * rn0 = rexp_node (&rxb->rx, r_cset);
    struct rexp_node * rn1 = rexp_node (&rxb->rx, r_concat);
    struct rexp_node * rn2 = rexp_node (&rxb->rx, r_side_effect);
    struct rexp_node * rn3 = rexp_node (&rxb->rx, r_concat);

    /* The following adds some nodes to the parse tree.  `a' is prepended to
     * the patter to make bootstrapping the matcher easier. A succeed 
     * side effect is added to the end.
     */

    if (!(rn0 && rn1 && rn2 && rn3 && cs))
      return rx_msg[REG_ESPACE];
    UT_bitset_enjoin (cs, 'a');
    rn0->params.cset = cs;
    rn1->params.pair.left = rn0;
    rn1->params.pair.right = rexp;
    rn2->params.side_effect = (void *)regex_win;
    rn3->params.pair.left = rn1;
    rn3->params.pair.right = rn2;
    rexp = rn3;
  }
  if (!build_nfa (&rxb->rx, rexp, &start, &end))
    return rx_msg[REG_ESPACE];
  else
    {
      void * mem = (void *)rxb->buffer;
      unsigned long size = rxb->allocated;
      int start_id;
      end->is_final = 1;
      start->is_start = 1;
      name_nfa_states (&rxb->rx);
      start_id = start->id;
      if (!eclose_nfa (&rxb->rx))
	return rx_msg[REG_ESPACE];
      else
	{
	  delete_epsilon_transitions (rxb);
	  
	  if (!compactify_nfa (&rxb->rx, &mem, &size))
	    return rx_msg[REG_ESPACE];
	  rxb->start_id = start_id;
	  rxb->buffer = mem;
	  rxb->allocated = size;
	  rxb->rx.buffer = mem;
	  rxb->rx.allocated = size;
	}
      free_rexp (rexp);
    }
  rxb->rx.nil_set = nil_superstate_contents (&rxb->rx);
  return 0;
}

typedef int rx_off;
struct rx_regs
{
  int n_reg;
  int last_l;
  int last_r;
  rx_off * lparen;
  rx_off * rparen;
  rx_off * best_lparen;
  rx_off * best_rparen;
};

/* This should be an unsigned scalar of some kind.  It is the
 * type of characters in the string being tested for a match.
 */
typedef const unsigned char rx_char_t;

int
explore_future (super, str, len, pos, c, opt_tc, regs, rxb)
     struct superstate * super;
     rx_char_t * str;
     rx_off len;
     rx_char_t * pos;
     unsigned char c;
     struct instruction_frame * opt_tc;
     struct rx_regs * regs;
     struct rx_buf * rxb;
{
  struct instruction_frame * ifr;
  rx_off to_do = len - (pos - str);
  rx_char_t * stop = pos + to_do;

  if (opt_tc)
    {
      ifr = opt_tc;
      goto restart;
    }

  while (1)
    {
      int inx;
      ifr = &super->transitions [c];
    restart:
      {
	/* This loop can be unrolled by creating a special
	   `failure superstate'  that leads to itself for all characters. */
	struct superstate * next_super = (struct superstate *)ifr->data;
	if (next_super)
	  {
	    super = next_super;
	    ++pos;
	    if (pos == stop)
	      return 0;
	    c = *pos;
	    continue;
	  }
      }
      inx = (int)ifr->inx;
      switch ((int)ifr->inx)
	{
	case do_side_effects:
	  {
	    struct discernable_future * df =
	      (struct discernable_future *)ifr->data_2;
	    struct rx_side_effect_list * el = df->effects;
	    while (el)
	      {
		rx_off urhere = (rx_off)(pos - str);
		int effect = (int)el->car;
		if (effect >= regex_lparen_1 &&
		    effect <= regex_last_lparen)
		  {
		    int reg = effect - regex_lparen_1;
		    if (reg > regs->last_l)
		      {
			regs->lparen[reg] = urhere + 1;
			while (++regs->last_l < reg)
			  regs->lparen[regs->last_l] = -1;
		      }
		  }
		else if (effect >= regex_rparen_1 &&
			 effect <= regex_last_rparen)
		  {
		    int reg = effect - regex_rparen_1;
		    regs->rparen[reg] = urhere + 1;
		    while (++regs->last_r < reg)
		      regs->rparen[regs->last_r] = -1;
		  }
		else if ((effect >= regex_backreference_1)
			 && (effect <= regex_last_backreference))
		  {
		    int reg = effect - regex_backreference_1;
		    if (reg > regs->last_r
			|| regs->rparen[reg] < 0)
		      return 0;
		    {
		      rx_char_t * there = str + regs->lparen[reg];
		      rx_char_t * last = str + regs->rparen[reg];
		      rx_char_t * here = pos;
		      while (there <= last)
			if (rxb->translate[*there]
			    != rxb->translate[*here])
			  return 0;
			else
			  { ++there; ++here; }
		      pos = last;
		    }
		  }
		else if (effect == regex_win)
		  {
		    if (urhere + 1 > regs->best_rparen[0])
		      {
			int x;
			for (x = 0; x <= regs->last_l; ++x)
			  regs->best_lparen[x] = regs->lparen[x];
			for (x = 0; x <= regs->last_r; ++x)
			  regs->best_rparen[x] = regs->rparen[x];
			regs->best_rparen[0] = urhere + 1;
		      }
		  }
		el = el->cdr;
	      }
	    ifr = &df->future_frame;
	    goto restart;
	  }

	case backtrack_point:
	  {
	    int top_lparen = regs->last_l;
	    int top_rparen = regs->last_r;
	    struct transition_class * tc =
	      (struct transition_class *)ifr->data_2;
	    struct discernable_future * df = tc->options;
	    lock_superstate (&rxb->rx, super);
	    while (df->next_same_transition_class)
	      {
		int x;
		explore_future (super, str, len, pos, c, 
				(df->effects
				 ? &df->side_effects_frame
				 : &df->future_frame),
				regs, rxb);
		regs->last_l = top_lparen;
		regs->last_r = top_rparen;
		df = df->next_same_transition_class;
	      }
	    ifr = (df->effects
		   ? &df->side_effects_frame
		   : &df->future_frame);
	    unlock_superstate (&rxb->rx, super);
	    goto restart;
	  }

	case cache_miss:
	  ifr = handle_cache_miss (&rxb->rx, super, c, ifr->data_2);
	  if (!ifr)
	    return 0;
	  goto restart;

	case backtrack:
	  return 1;
	}
    }
}


static int
do_match (rxb, str, size, posi, c, regs, start_superstate)
     struct rx_buf * rxb;
     const char * str;
     regoff_t size;
     regoff_t posi;
     struct rx_regs * regs;
     struct superstate * start_superstate;
{
  rxb->rx.locks = 0;
  explore_future (start_superstate, (rx_char_t *) str,
		  size, str + posi, c, 0, regs, rxb);
  return (regs->best_rparen[0] >= 0
	  ? regs->best_rparen[0] - regs->best_lparen[0]
	  : -1);
}

static int
rx_match_init (rxb, rx_regs, super, re_regs)
     struct rx_buf * rxb;
     struct rx_regs * rx_regs;
     struct superstate ** super;
     struct re_registers * re_regs;
{
  struct superstate_contents * start_contents;
  struct superstate * start_superstate;

  start_contents =
    superstate_enjoin (&rxb->rx, id_to_nfa_state(&rxb->rx, rxb->start_id),
		       rxb->rx.nil_set); 
  if (!start_contents)
    return -1;
  protect_superstate_contents(rxb, start_contents);
  start_superstate = superstate (&rxb->rx, start_contents);
  if (!start_superstate)
    return -1;
  lock_superstate (&rxb->rx, start_superstate);
  *super = start_superstate;

  if (re_regs)
    {
      rx_regs->lparen =
	(regoff_t *)malloc (re_regs->num_regs * sizeof (regoff_t));
      rx_regs->rparen =
	(regoff_t *)malloc (re_regs->num_regs * sizeof (regoff_t));
      if (!(rx_regs->lparen && rx_regs->rparen))
	{
	  rxb->error = REG_ESPACE;
	  return -1;
	}
      rx_regs->best_lparen = re_regs->start;
      rx_regs->best_rparen = re_regs->end;
      rx_regs->n_reg = re_regs->num_regs;
    }
  return 0;
}

static void
rx_match_done (rxb, regs, re_regs)
     struct rx_buf * rxb;
     struct rx_regs * regs;
     struct re_registers * re_regs;
{
  if (re_regs)
    {
      free (regs->lparen);
      free (regs->rparen);
      re_regs->start = regs->best_lparen;
      re_regs->end = regs->best_rparen;
    }
}


int
rx_match (rxb, str, len, start, re_regs)
     struct rx_buf * rxb;
     const char * str;
     int len;
     int start;
     struct re_registers * re_regs;
{
  regoff_t lpspace[1];
  regoff_t rpspace[1];
  regoff_t best_lpspace[1];
  regoff_t best_rpspace[1];
  struct rx_regs regs_s;
  struct superstate * super;
  int x;

  regs_s.n_reg = 1;
  regs_s.lparen = lpspace;
  regs_s.rparen = rpspace;
  regs_s.best_lparen = best_lpspace;
  regs_s.best_rparen = best_rpspace;

  if (rx_match_init (rxb, &regs_s, &super, re_regs) < 0)
    return -1;
  for (x = 0; x < regs_s.n_reg; ++x)
    regs_s.best_rparen[x] = regs_s.best_lparen[x] = -1;
  regs_s.last_l = regs_s.last_r = 0;
  regs_s.lparen[0] = start;
  x = do_match (rxb, str, len, start - 1, 'a', &regs_s, super);
  rx_match_done (rxb, &regs_s, re_regs);
  return x;
}


int
rx_search (rxb, str_passed, len, start, range, regs)
     struct rx_buf * rxb;
     const char * str_passed;
     int len;
     int start;
     int range;
     struct re_registers * regs;
{
  const unsigned char * str = (const unsigned char *) str_passed;
  int last = start + range;
  int ret;

  if (rxb->can_match_empty)
    goto try_match;
  while (start < last)
    {
      char * fastmap = rxb->fastmap;
      const unsigned char * end = str + last;
      const unsigned char * pos = str + start;
      while ((pos < end) && !fastmap[*pos])
	++pos;
      if (pos == end)
	return -1;
      start = pos - str;
    try_match:
      ret = rx_match (rxb, str, len, start, regs);
      if (ret >= 0)
	return start;
      ++start;
    }
}

