
/*
 * Scanner for the Newsclip compiler.  A fairly simple scanner.
 * Handwritten scanners are not complex and are usually smaller and faster
 * than those generated by lex.
 *
 * The only thing fancy here are the 'newsgroup name' tokens, which are
 * always preceded by either a '#' or the 'is' operator/token.  These are
 * special because they are not otherwise quoted and can contain things
 * like dashes and plus signs, which would otherwise delimit tokens.
 */
 /*
  * Newsclip(TM) Compiler Source Code.
  * Copyright 1989 Looking Glass Software Limited.  All Rights Reserved.
  * Unless otherwise licenced, the only authorized use of this source
  * code is compilation into a binary of the newsclip compiler for the
  * use of licenced Newsclip customers.  Minor source code modifications
  * are allowed before compiling.
  * A short time evaluation of this product is also permitted.  See the file
  * 'Licence' in the library source directory for details.
  */

#include "nc.h"

#include "y.tab.h"


#define EOFCHAR -1

char yytext[MAX_LLEN];
int yylineno = 0;
int column = 0;
char *yyfilename;
extern YYSTYPE yylval;
int lookaheadchar;
FILE *yyin;			/* Input stream */

#define scanerror yyerror

extern char *allocstring();
extern FILE *trace;

/* number - string base for newsgroup names */


int
yylex()
{
	register c;
	register char *cp;
	int f;
	int toktype;
	char delim;
/* Temporary violation of indent to give room for bit switch */

if (yylineno == 0)
	yylineno++;

while(1) {
	/*
	 * skip white space
	 */
	if (c = lookaheadchar ) {
		lookaheadchar = 0;
		}
	 else
		c = readkey();

	cp = yytext;
	while (c == ' ' || c == '\t' || c == 0 || c == 12 /* FF */) {
		c = readkey();
	}
	yytext[0] = c; yytext[1] = yytext[2] = 0;
	if( isascii(c) && (isalpha( c ) || c == '_') ) {
		do {
			*cp++ = lowcase(c);
			c = readkey();
		} while (isascii(c) && (isalnum(c) || c == '_'));
		*cp = 0;

		lookaheadchar = c;
		/* special newsgroup op */
		if( strcmp( yytext, "is" ) == 0 ) {
			scanng();
			return YQUERYGROUP;
			}
		 else {
			c = look_kw(yytext);
			if (c == 0) {
				yylval.strval = allocstring(yytext);
				return (YID);
				}
			return c;
			}
		}

	else if( isascii(c) && isdigit(c) ) {
		do {
			*cp++ = c;
			c = readkey();
		} while (isascii(c) && isdigit(c));
		*cp = 0;
		lookaheadchar = c;
		yylval.intval = atoi(yytext);
		return (YINT);
		}
	/* printt2("Select on char %d - %c\n", c, c ); */
	switch (c) {
	case EOF:
		strcpy( yytext, "<End-of-File>" );
		return 0;
	case ' ':
	case '\t':
	case 12: /* form feed */
		break;
	case '"': {
		int backflag;
		backflag = FALSE;
		for(;;){
			c = readkey();
			if( c == '"' && !backflag )
				break;
			if( c == '\n' ) {
				parerror( "Newline in string constant" );
				return YILLCH;
				}
			if( !backflag && c == '\\' ) 
				backflag = TRUE;
			 else
				backflag = FALSE;
			*cp++ = c;
			}
		*cp = 0;
		yylval.strval = allocstring(yytext);
		return (YSTRING);
		}
	case '\'':
		c = readkey();
		if( c == '\\' ) {
			c = readkey();
			switch(c) {
				case 'n':
					c = '\n';
					break;
				case 'r':
					c = '\r';
					break;
				case 't':
					c = '\t';
					break;
				case 'f':
					c = '\f';
					break;
				}
			}
		if( readkey() != '\'' )
			parerror( "Invalid character constant" );
		yylval.intval = c;
		return YINT;
		
	case '\n':
		break;
	case '/':
		c = readkey();
		if( c == '*' )  {/* comment */
			char oldc;
			c = 0;
			do {
				oldc = c;
				c = readkey();
				if( c == EOF )
					parerror( "End of File inside comment" );
				} while( c != '/' || oldc != '*' );
			break;
			}
		 else {
			lookaheadchar = c;
			return '/';
			}

		/*NOTREACHED*/
		break;
	case '#':
		/* newsgroup or query if a newsgroup is present */
		scanng();
		return YNEWSGROUP;
	case '+':
		if( (c=readkey()) == '+' ) 
			return INC_OP;
		lookaheadchar = c;
		return '+';

	case '-':
		if( (c=readkey()) == '-' ) 
			return DEC_OP;
		lookaheadchar = c;
		return '-';

	case '<':
		if( (c=readkey()) == '=' ) 
			return LE_OP;
		lookaheadchar = c;
		return '<';

	case '>':
		if( (c=readkey()) == '=' ) 
			return GE_OP;
		lookaheadchar = c;
		return '>';

	case '!':
		if( (c=readkey()) == '=' ) 
			return NE_OP;
		lookaheadchar = c;
		return '!';

	case '=':
		if( (c=readkey()) == '=' ) 
			return EQ_OP;
		lookaheadchar = c;
		return '=';
	case '&':
		if( (c=readkey()) == '&' ) 
			return AND_OP;
		lookaheadchar = c;
		return '&';
	case '|':
		if( (c=readkey()) == '|' ) 
			return OR_OP;
		lookaheadchar = c;
		return '|';

	case ';':
	case ',':
	case '*':
	case '[':
	case ']':
	case '{':
	case '}':
	case '%':
	case '^':
	case '?':
	case '(':
	case ')':
	case '.':
	case ':':
		return c;

	default:
		if (c <= 0)
			return (0);
		do
			lookaheadchar = readkey();
		while (lookaheadchar == c);
		/* printt1("illegal char in scanner %o\n", c); */
		return (YILLCH);
	}
  
  } /* big while */
}


struct kwtab {
	char *word;
	int ttype;
} kwtab[] = {
	"accept",	YACCEPT,
	"adjust",	YADJUST,
	"array",	YARRAY,
	"break",	YBREAK,
	"case",		YCASE,
	"continue",	YCONTINUE,
	"database",	YTDATABASE,
	"datetime",	YTDATE,
	"default",	YDEFAULT,
	"else",		YELSE,
	"extern",	YEXTERN,
	"for",		YFOR,
	"forward",	YFORWARD,
	"goto",		YGOTO,
	"has",		YHAS,
	"header",	YHEADER,
	"if",		YIF,
	"in",		YIN,
	"int",		YTINT,
	"newsgroup",	YTNEWSGROUP,
	"parse",	YPARSE,
	"procedure",	YPROCEDURE,
	"reject",	YREJECT,
	"return",	YRETURN,
	"string",	YTSTRING,
	"switch",	YSWITCH,
	"userid",	YTUSERID,
	"while",	YWHILE,
	0,		0,
};



int
look_kw(str)
register char *str;
{
	register int l;
	int h, m, r;

	
	l=0; h=sizeof(kwtab) / sizeof(kwtab[0]) - 2;
	while (l <= h) {
		m = (l+h)/2;
		r = strcmp(kwtab[m].word, str);
		if (r < 0)
			l = m+1;
		else if (r > 0)
			h = m-1;
		else {
			return kwtab[m].ttype;
			}
	}
	return 0;
}

readkey()
{
	int c;
	extern int listing;

	c = getc(yyin);

	if( column == 0 && c == '#' ) {
		char buf[MAX_FNAME+20];
		char *p, *eq;
		char *space;
		int len;

		fgets( buf, sizeof(buf), yyin );
		/* accept both # and #line directives */
		space = strchr( buf, ' ' );
		if( space )
			yylineno = atoi( space+1 );
		/* get filename */
		p = strchr( buf, '"' );
		if( p ) {
			p++;
			eq = strchr( p, '"' );
			if( eq )
				*eq = 0;
			if( strcmp( p, yyfilename ) != 0 )
				yyfilename = allocstring( p );
			}
		c = getc(yyin);
		}

	if (c == '\n') {
		yylineno++;
		column = 0;
		}
	 else
		column++;
	return c;
}

/* numbered group database structure */

struct stringmap thegroups = { 0, 0, 0, 0 };

/* scan a newsgroup name after a newsgroup name prefix token (is|#) */

scanng()
{
	int c;			/* byte read */
	char *cp;		/* pointer into yytext */

	cp = yytext;

	/* skip whitespace */

	do {
		c = readkey();
	} while( isspace(c) );
	while (isascii(c) && (isalnum(c) ||
					strchr( "_-.+", c ) != NULL) ) {
		*cp++ = c;
		c = readkey();
		}
	*cp = 0;
	yylval.intval = nums_lookup( &thegroups, yytext );
	lookaheadchar = c;
}

yyerror(s)
char *s;
{
	parerror( "%s at or near %s", s, yytext );
}
