/*  $Header: conc_syn.c,v 3.0 88/04/13 15:43:19 jos Locked $ */
/*
 *  This file is part of the Amsterdam SGML Parser.
 *
 *  Copyright: Faculteit Wiskunde en Informatica
 *             Department of Mathematics and Computer Science
 *             Vrije Universiteit Amsterdam
 *             The Netherlands
 *
 *  Authors:   Sylvia van Egmond
 *             Jos Warmer
 */
#include "types.h"
#include "Lpars.h"
#include "charclas.h"
#include "conc_syn.h"
#include "lexical.h"
#include "modes.h"
#include "set.h"
#include "shortref.h"
#include "symtable.h"

#ifdef DOC_PARSER
#include "tags.h"
#endif

static SymbolTable function_names = { {"RE", 13}, {"RS", 10},
			            {"SPACE", 32}, {"TAB", 9},
			            {ILL_STRING, ILL_SYMBOL} };

#define UNKNOWN          255

#define START_DELIMITER  257
#define NR_DELIMITERS    116
#define MAX_DELIMITERS   START_DELIMITER + NR_DELIMITERS

static    char        concrete_syntax  [ MAX_DELIMITERS ][20];
static    char        abstract_info    [ MAX_DELIMITERS ][20];
static    Constraint  delim_constraints[ MAX_DELIMITERS ];
static    P_Set       delim_chars;

static void  concrete(abstract, concrete, info, constr)
Delimiter  abstract;
String     concrete;
String     info;
Constraint constr;
{
    strcpy(concrete_syntax[abstract], concrete);
    strcpy(abstract_info  [abstract], info    );
    delim_constraints[ abstract ] = constr;
    if( is_shortref(abstract) and (concrete[0]=='B')){
	set_add(delim_chars, ' ');
	set_add(delim_chars, '\t');
    } else {
	set_add(delim_chars, concrete[0]);
    }
}

Bool is_delim_char(i)
int i;
{
    return set_member(delim_chars, i);
}

void init_conc()
{
    int i;

    delim_chars = new_set(256);
    for(i=0; i<MAX_DELIMITERS; i++){
	 concrete_syntax[i][0] = '\0';
	 abstract_info  [i][0] = '\0';
	 delim_constraints[i]  = C_NO;
    }

    concrete(TOK_NOD  , "@", "NOD"  , C_NO   );
    concrete(TOK_AND  , "&" , "AND"  , C_NO   );
    concrete(TOK_COM  , "--", "COM"  , C_NO   );
    concrete(TOK_CRO  , "&#", "CRO"  , C_CREF );
    concrete(TOK_DTGC , "]" , "DTGC" , C_NO   );
    concrete(TOK_DTGO , "[" , "DTGO" , C_NO   );
    concrete(TOK_DSC  , "]" , "DSC"  , C_NO   );
    concrete(TOK_DSO  , "[" , "DSO"  , C_NO   );
    concrete(TOK_ERO  , "&" , "ERO"  , C_NMS  );
    concrete(TOK_ETAGO, "</", "ETAGO", C_GI   );
    concrete(TOK_GRPC , ")" , "GRPC" , C_NO   );
    concrete(TOK_GRPO , "(" , "GRPO" , C_NO   );
    concrete(TOK_LIT  , "\"", "LIT"  , C_NO   );
    concrete(TOK_LITA , "'" , "LITA" , C_NO   );
    concrete(TOK_MDC  , ">" , "MDC"  , C_NO   );
    concrete(TOK_MDO  , "<!", "MDO"  , C_DCL  );
    concrete(TOK_MINUS, "-" , "MINUS", C_NO   );
    concrete(TOK_MSC  , "]]", "MSC"  , C_MSE  );
    concrete(TOK_NET  , "/" , "NET"  , C_ELEM );
    concrete(TOK_OPT  , "?" , "OPT"  , C_NO   );
    concrete(TOK_OR   , "|" , "OR"   , C_NO   );
    concrete(TOK_PERO , "%" , "PERO" , C_NMS  );
    concrete(TOK_PIC  , ">" , "PIC"  , C_NO   );
    concrete(TOK_PIO  , "<?", "PIO"  , C_NO   );
    concrete(TOK_PLUS , "+" , "PLUS" , C_NO   );
    concrete(TOK_REFC , ";" , "REFC" , C_NO   );
    concrete(TOK_REP  , "*" , "REP"  , C_NO   );
    concrete(TOK_RNI  , "#" , "RNI"  , C_NO   );
    concrete(TOK_SEQ  , "," , "SEQ"  , C_NO   );
    concrete(TOK_STAGO, "<" , "STAGO", C_GI   );
    concrete(TOK_TAGC , ">" , "TAGC" , C_NO   );
    concrete(TOK_VI   , "=" , "VI"   , C_NO   );

    concrete(TOK_DATACHAR, "@",   "datachar",     C_NO   );
    concrete(TOK_NONSGML,  "@",    "NONSGML",      C_NO   ); 
    concrete(TOK_SHORTREF, "@",   "shortref",     C_NO   );
    concrete(TOK_EE,       "@",         "Ee",           C_NO   );
    concrete(MDO_MDC,      "<!>",        "mdo_mdc",      C_NO   );
    concrete(TOK_MSC_MDC , "]]>",        "msc_mdc",      C_NO   );
    concrete(TOK_MDO_DSO,  "<![",        "mdo_dso",      C_NO   );
    concrete(MDO_COM,      "<!--",       "mdo_com",      C_NO   );
    concrete(MDO_ENTITY,   "<!entity",   "mdo_entity",   C_NO   );
    concrete(MDO_DOCTYPE,  "<!doctype",  "mdo_doctype",  C_NO   );
    concrete(MDO_ELEMENT,  "<!element",  "mdo_element",  C_NO   );
    concrete(MDO_ATTLIST,  "<!attlist",  "mdo_attlist",  C_NO   );
    concrete(MDO_NOTATION, "<!notation", "mdo_notation", C_NO   );
    concrete(MDO_SHORTREF, "<!shortref", "mdo_shortref", C_NO   );
    concrete(MDO_USEMAP,   "<!usemap",   "mdo_usemap",   C_NO   );
    concrete(MDO_LINKTYPE, "<!linktype", "mdo_linktype", C_NO   );
    concrete(MDO_LINK,     "<!link",     "mdo_link",     C_NO   );
    concrete(MDO_USELINK,  "<!uselink",  "mdo_uselink",  C_NO   );
    concrete(MDO_SGML,     "<!sgml",     "mdo_sgml",     C_NO   );
    concrete(TOK_DIGIT,    "@",      "digit",        C_NO   );
    concrete(TOK_LETTER,   "@",     "letter",       C_NO   );
    concrete(TOK_NMCHAR,   "@",     "nmchar",       C_NO   );
    concrete(TOK_NMSTRT,   "@",     "nmstrt",       C_NO   );
    concrete(TOK_SEPCHAR,  "\t",    "sepchar",      C_NO   );
    concrete(TOK_RS,       "\n",         "rs",           C_NO   );
    concrete(TOK_RE,       "\r",         "re",           C_NO   );
    concrete(TOK_SPACE,    "\s",      "space",        C_NO   );

    concrete(SH_REF+ 0, "B\015",        "SH_REF00", C_NO);
    concrete(SH_REF+ 1, "\015",         "SH_REF01", C_NO);
    concrete(SH_REF+ 2, "\012B\015",    "SH_REF02", C_NO);
    concrete(SH_REF+ 3, "\012B",        "SH_REF03", C_NO);
    concrete(SH_REF+ 4, "\012\015",     "SH_REF04", C_NO);
    concrete(SH_REF+ 5, "\012",         "SH_REF05", C_NO);
    concrete(SH_REF+ 6, "BB",           "SH_REF06", C_NO);
    concrete(SH_REF+ 7, "\011",         "SH_REF07", C_NO);
    concrete(SH_REF+ 8, "\040",         "SH_REF08", C_NO);
    concrete(SH_REF+ 9, "\"",           "SH_REF09", C_NO);
    concrete(SH_REF+10, "#",            "SH_REF10", C_NO);
    concrete(SH_REF+11, "%",            "SH_REF11", C_NO);
    concrete(SH_REF+12, "'",            "SH_REF12", C_NO);
    concrete(SH_REF+13, "(",            "SH_REF13", C_NO);
    concrete(SH_REF+14, ")",            "SH_REF14", C_NO);
    concrete(SH_REF+15, "*",            "SH_REF15", C_NO);
    concrete(SH_REF+16, "+",            "SH_REF16", C_NO);
    concrete(SH_REF+17, ",",            "SH_REF17", C_NO);
    concrete(SH_REF+18, "--",           "SH_REF18", C_NO);
    concrete(SH_REF+19, "-",            "SH_REF19", C_NO);
    concrete(SH_REF+20, ":",            "SH_REF20", C_NO);
    concrete(SH_REF+21, ";",            "SH_REF21", C_NO);
    concrete(SH_REF+22, "=",            "SH_REF22", C_NO);
    concrete(SH_REF+23, "@",            "SH_REF23", C_NO);
    concrete(SH_REF+24, "[",            "SH_REF24", C_NO);
    concrete(SH_REF+25, "]",            "SH_REF25", C_NO);
    concrete(SH_REF+26, "^",            "SH_REF26", C_NO);
    concrete(SH_REF+27, "_",            "SH_REF27", C_NO);
    concrete(SH_REF+28, "{",            "SH_REF28", C_NO);
    concrete(SH_REF+29, "|",            "SH_REF29", C_NO);
    concrete(SH_REF+30, "}",            "SH_REF30", C_NO);
    concrete(SH_REF+31, "~",            "SH_REF31", C_NO);

    concrete(0,         "@",            "EOF",      C_NO);
    concrete(UNKNOWN, "unknown delimiter", "unknown delimiter",  C_NO);
}

String delimiter_to_string( delimiter )
Delimiter delimiter;
{
    if( delimiter < 0 ){
	return (concrete_syntax[ 0 ]);
    } else if( delimiter >= MAX_DELIMITERS ){
	return (concrete_syntax[ UNKNOWN ]);
    } else {
	return( concrete_syntax[ delimiter ] );
    }
}

Delimiter string_to_delimiter(str, mode)
String          str;
RecognitionMode mode;
{
    int i;
    for( i=1; i<= MAX_DELIMITERS; i++){
	if( strcmp(concrete_syntax[i], str) == 0 ){
	    if( mode_recognizes(mode, i) ){
		return (Delimiter)i;
	    }
	}
    }
    return TOK_NOD;
}

String delimiter_info(delim)
Delimiter delim;
{
    if( delim < 0 ){
	return (abstract_info[ 0 ]);
    } else if( delim >= MAX_DELIMITERS ){
	return (abstract_info[ UNKNOWN ]);
    } else {
	return( abstract_info[ delim ] );
    }
}

int token_length(token, value)
int token;
int value;
{
    String str;

#ifdef DOC_PARSER
    if( token == TOK_SHORTREF ){
	str = delimiter_to_string(value);
	if( str[0] != '\0' ){
	    return strlen( str );
	}
	return 0;
    }
#endif

    str = delimiter_to_string(token);
    if( str[0] != '\0' ){
	return strlen( str );
    }

#ifdef DOC_PARSER
    str = starttag_to_string(token);
    if( str != ILL_STRING ){
	return ( strlen(str) + 2 );
    }
    str = endtag_to_string(token);
    if( str != ILL_STRING ){
	return ( strlen(str) + 3 );
    }
#endif
}

String token_to_string(token, value)
Token token;
int   value;
{
    static char   str[80];

    switch(token){
    case  TOK_DATACHAR : sprintf(str,"DATACHAR %c", value );
			 break;
    case  TOK_LETTER   : sprintf(str,"LETTER %c", value );
			 break;
    case  TOK_DIGIT    : sprintf(str,"DIGIT %c", value );
			 break;
    case  TOK_NMCHAR   : sprintf(str,"NMCHAR %c", value );
			 break;
    case  TOK_NMSTRT   : sprintf(str,"NMSTRT %c", value );
			 break;
    case  TOK_SPECIAL  : sprintf(str,"SPECIAL %c", value );
			 break;
    case  TOK_NONSGML  : sprintf(str,"NONSGML character number %d", value );
			 break;
    case  RS           : sprintf(str,"RS");
			 break;
    case  RE           : sprintf(str,"RE");
			 break;
#ifdef DOC_PARSER
    case  TOK_SHORTREF : sprintf(str,"SHORTREF `%s' (%d)",
				delimiter_to_string(value), value );
			 break;
    default:
	     if( starttag_to_string(token) != ILL_STRING ){
	         sprintf(str,"<%s>", starttag_to_string(token));
             } else if( endtag_to_string(token) != ILL_STRING ){
	         sprintf(str,"</%s>",endtag_to_string(token));
             } else
#else
    default:
#endif
#ifdef DEBUG
		 sprintf(str,"%s : (%d) '%s'", delimiter_info(token),
				    token, delimiter_to_string(token) );
#else
	         sprintf(str,"%s : '%s'", delimiter_info(token),
			            delimiter_to_string(token) );
#endif
	     break;
    }
    return str;
}
/*
 *  Constraints
 */
Constraint    constraint(delim)
Delimiter  delim;
{
    if( delim < 0 ){
	return (delim_constraints[ 0 ]);
    } else if( delim >= MAX_DELIMITERS ){
	return (delim_constraints[ UNKNOWN ]);
    } else {
	return( delim_constraints[ delim ] );
    }
}

int function_char(string)
String   string;
{
    int ch;

    ch = string_symbol(function_names, string);
    if( ch != ILL_SYMBOL ){
	return ch;
    } else {
	return '\0';
    }
}
