/* parser */

/*
 * Copyright 1989 Jonathan Lee.  All rights reserved.
 *
 * Permission to use, copy, and/or distribute for any purpose and
 * without fee is hereby granted, provided that both the above copyright
 * notice and this permission notice appear in all copies and derived works.
 * Fees for distribution or use of this software or derived works may only
 * be charged with express written permission of the copyright holder.
 * This software is provided ``as is'' without express or implied warranty.
 */

#include "fools.h"
#include <ctype.h>
#include <sys/ioctl.h>
#include "utils.h"
#include "parser.h"

#ifndef lint
static char SccsId[] = "@(#)parser.c	1.13 2/23/90";
#endif

extern double atof();
extern double fabs();

char parsePrompt1[PROMPT_LEN] = "> ", parsePrompt2[PROMPT_LEN] = "? ";

/* special chars */
static struct {
    char special;
    Token tok;
} specialChars[ 128 ];

#define setSpecial(c, token) \
    specialChars[(c)].special = TRUE; specialChars[(c)].tok = (token)
	
/* special input characters */
#define isspecial(c) ((c) < 128 && specialChars[(c)].special)

Mem parInfoAlloc;

/* initialize the parsing module */
void parseInit()
{
    setSpecial('(', tLpar);
    setSpecial(')', tRpar);
    setSpecial('\'', tQuote);
    setSpecial('{', tLbrace);
    setSpecial('}', tRbrace);
    setSpecial('[', tLbracket);
    setSpecial(']', tRbracket);
    setSpecial('`', tBquote);
    setSpecial('\"', tNone);
    setSpecial(';', tNone);
    setSpecial(',', tNone);

    /* white space */
    setSpecial(' ', tSpace);
    setSpecial('\n', tSpace);
    setSpecial('\t', tSpace);
    setSpecial('\f', tSpace);
    setSpecial('\b', tSpace);
    setSpecial('\0', tSpace);

    parInfoAlloc = memNew(ALIGNSIZE(sizeof (struct par_info_s)), 2);
}

/* forward declarations */
static void lexUngetToken();
static Token lexToken();
static int parseChar();
static Obj parseList(), parseVector(), parseSymbol();

/* return the next expression from st or NULL if EOF
 *
 * prompts are sent to wfile (if NULL then no prompting occurs) */
Obj parseObj(st, wfile)
     Obj st;
     FILE *wfile;
{
    Obj new;
    Token tok;
    Boolean recur;
    char *ptr;

    recur = checkCond(st, MORE);

    tok = lexToken(st, wfile);
    ptr = objText(st);
    setCond(st, MORE);
    
    switch (tok) {

    case tEOF:
	if (recur) errorPrint(BadParse, "%O", st);
	new = (Obj)NULL;
	break ;

    case tLbracket:
    case tLpar:
	new = parseList(st, wfile, tok == tLpar ? tRpar : tRbracket);
	break ;
    case tRpar:
    case tRbracket:
	errorPrint(BadParen, (char *)NULL);

    case tLbrace:
    case tVector:
	new = parseVector(st, wfile, tok == tVector ? tRpar : tRbrace);
	break ;
    case tRbrace:
	errorPrint(BadBrace, (char *)NULL);

    case tChar:
	new = newChar(gcTemp, (char)parseChar(ptr + 2));
	break ;
    case tInt:
	new = parseInteger(ptr);
	break ;
    case tNum:
	new = newNumber(gcTemp, atof(ptr));
	setCond(new, EXACT);
	break ;

    case tString:
	new = newString(gcTemp, ptr);
	break ;
    case tPsymbol:
	new = newPackage(ptr);
	break ;
    case tSymbol:
	new = parseSymbol(ptr);
	break ;

	/* expand abbreviations */
    case tQuote:
    case tSplice:
    case tUnquote:
    case tBquote:
	if ((new = parseObj(st, wfile)) == (Obj)NULL)
	    errorPrint(BadParse, "%O", st);
	new = objOp1(tok == tQuote ? QuoteSymb :
		     (tok == tSplice ? SpliceSymb :
		      (tok == tUnquote ? UnquoteSymb : QuasiSymb)),
		     new);
	break ;

    case tBox:
	if ((new = parseObj(st, wfile)) == (Obj)NULL)
	    errorPrint(BadParse, "%O", st);
	new = newBox(gcTemp, new);
	break ;

    default:
	fprintf(stderr, "Unknown token - ignored\n");
	break ;
    }
    
    if (!recur) clearCond(st, MORE);
    return new;
}

/* read in the rest of the list and return it */
static Obj parseList(st, wfile, term)
     Obj st;
     FILE *wfile;
     Token term;
{
    Obj list = (Obj)NULL, cur = (Obj)NULL, temp;
    Token tok;

    /* read until a right paren */
    while ((tok = lexToken(st, wfile)) != term) {
	/* check for dotted list */
	if ((tok == tSymbol) && (strcmp(objText(st), ".") == 0)) {
	    if (cur) {
		if (temp = parseObj(st, wfile))
		    objSetCdr(cur, temp);
		else errorPrint(BadParse, "%O", st);
	    }
	    else errorPrint(BadDot, (char *)NULL);

	    /* next token must be the term token */
	    if ((tok = lexToken(st, wfile)) != term)
		errorPrint(BadDot, (char *)NULL);
	    
	    return list;
	}
	/* append new cons cell */
	lexUngetToken(st, tok);
	if ((temp = parseObj(st, wfile)) == (Obj)NULL)
	    errorPrint(BadParse, "%O", st);
	if (cur) {
	    objSetCdr(cur, temp = newPair(gcNew, temp, NilSymb));
	    cur = temp;
	}
	else {
	    cur = list = newPair(gcTemp, temp, NilSymb);
	}
    }
    return (list ? list : NilSymb);
}

/* read in the rest of a vector and return it */
static Obj parseVector(st, wfile, term)
     Obj st;
     FILE *wfile;
     Token term;
{
    Obj vec;
    int size;
    List items;
    Callback_t cb;
    Token tok;

    items = gcListNew(&cb);

    for (size = 0; (tok = lexToken(st, wfile)) != term; size++) {
	lexUngetToken(st, tok);
	if ((vec = parseObj(st, wfile)) == (Obj)NULL)
	    errorPrint(BadParse, "%O", st);
	listPush((Ptr)vec, items);
    }

    vec = newVector(gcTemp, size);
    while (--size >= 0)
	objVectorSet(vec, size, (Obj)listPop(items));

    gcListFree(items);

    return vec;
}

/* convert #\chr into a char */
static int parseChar(ch)
     char *ch;
{
    if (ch[0] == '\0') return ' ';
    else if (ch[1] == '\0') return ch[0];
    else if (strcmp(ch, "newline") == 0) return '\n';
    else if (strcmp(ch, "tab") == 0) return '\t';
    else if (strcmp(ch, "space") == 0) return ' ';
    else if (ch[0] == 'x' && ch[3] == '\0') {
	double num;

	if (str2num(ch + 1, 16, &num))
	    return (int)num;
    }

    errorPrint(BadChar, "syntax: #\\%s", ch);
    /*NOTREACHED*/
}

/* convert str into an integer */
Obj parseInteger(str)
     char *str;
{
    Obj new;
    Boolean exact = TRUE;
    int base = 10;
    char *ptr = str;
    double num;

    if (*ptr == '#') {
	char c;

	c = *++ptr;
	if (c == 'i' || c == 'e') {
	    exact = (c == 'e');
	    c = *++ptr;
	}

	++ptr;
	switch (c) {
	case 'b': base = 2; break ;
	case 'o': base = 8; break ;
	case 'd': base = 10; break ;
	case 'x': base = 16; break ;
	default:
	    --ptr;
	    break ;
	}
    }

    if (str2num(ptr, base, &num)) {
	if (fabs(num) <= (double)0x7fffffff)
	    new = newInteger(gcTemp, (long)num);
	else new = newNumber(gcTemp, num);
    }
    else errorPrint(BadChar, "in integer %s", str);

    if (!exact) clearCond(new, EXACT);
    return new;
}

/* convert sym into a symbol */
static Obj parseSymbol(sym)
     char *sym;
{
    if (sym[0] == '#') {
	if (sym[2] == '\0')
	    switch (sym[1]) {
	    case 't':
		return TrueSymb;
	    case 'f':
		return FalseSymb;
	    default:
		break ;
	    }
    }
    return objIntern(sym, 0);
}

	/* lex routines */

/* forward declarations */
static void lexString(), lexFill();
static Token lexHash(), lexNumOrSym();

/* unget the most recently read token
 *
 * Only one token may be pushed back per read. */
static void lexUngetToken(st, tok)
     Obj st;
     Token tok;
{
    objState(st) = tok;
    setCond(st, PUSHBACK);
}

/* check for input */
static Boolean inputWaiting(file)
     FILE *file;
{
    int bytes;

    if (file->_cnt > 0) return TRUE;

#ifdef FIONREAD

#ifdef  __SABER__

    /* <sys/ioctl.h> may not be ANSI standard C, so redefine for saber */
#undef FIONREAD
#define FIONREAD (IOC_OUT|((sizeof(int)&IOCPARM_MASK)<<16)|('f'<<8)|127)

#endif /* __SABER__ */

    if (ioctl(fileno(file), FIONREAD, &bytes) != 0)
	perror("ioctl");
    else if (bytes > 0) return TRUE;

#endif /* FIONREAD */

    return FALSE;
}
     
/* returns the next token from the input stream st
 *
 * prompts are sent wfile (if NULL no prompting will occur) */
static Token lexToken(st, wfile)
     Obj st;
     FILE *wfile;
{
    char *s;
    int c;
    FILE *file;

    if (checkCond(st, PUSHBACK)) {
	clearCond(st, PUSHBACK);
	return objState(st);
    }
    file = objFile(st);

    if ((s = objText(st)) == (char *)NULL)
	errorPrint(BadRead, "on write only file %O", st);

#ifdef PLIMIT
    /* zero input buffer and mark next to last char */
    while (*s) *s++ = '\0';
    s = objText(st);
    s[ LEXBUF-2 ] = (char)1;
#endif /* PLIMIT */

    for (;;) {
	if (checkCond(st, QUERY) && wfile) {
	    if (!inputWaiting(file))
		(void)fputs(checkCond(st, MORE) ? parsePrompt2 : parsePrompt1,
			    wfile);
	    clearCond(st, QUERY);
	}

	if ((c = getc(file)) == EOF)
	    return tEOF;
	else if (isspecial(c)) {
	    Token tok;

	    tok = specialChars[c].tok;
	    if (tok == tSpace) {
		if (c == '\n') setCond(st, QUERY);
		continue ;
	    }
	    else if (tok != tNone) return tok;

	    switch (c) {
		/* comment */
	    case ';':
		for (;;) {
		    if ((c = getc(file)) == EOF) return tEOF;
		    else if (c == '\n') {
			setCond(st, QUERY);
			break ;
		    }
		}
		continue ;

		/* string literal */
	    case '\"':
		lexString(file, s);
		return tString;

		/* unquote and unquote-splicing shorthands */
	    case ',':
		if ((c = getc(file)) == '@') return tSplice;
		else {
		    ungetc(c, file);
		    return tUnquote;
		}
	    }
	}
	else if (c == '#') /* handle #stuff */
	    return lexHash(file, s);

	ungetc(c, file);
	return lexNumOrSym(file, s);
    }
}

#ifdef PLIMIT
/* skip the rest of the input token */
static void lexSkip(fp)
     FILE *fp;
{
    int c;

    (void)fputs("input buffer overflowed\n", stderr);
    while ((c = getc(fp)) != EOF && !isspecial(c));
    ungetc(c, fp);
}
#endif /* PLIMIT */

/* read in a string */
static void lexString(fp, s)
     FILE *fp;
     char *s;
{
    int c;

    for (;;) {
	if ((c = getc(fp)) == EOF)
	    errorPrint(BadParse, "string");
#ifdef PLIMIT
	else if (*s) {
	    (void)fputs("input buffer overflowed\n", stderr);
	    *s = '\0';
	    while (c != '\"' && c != EOF) {
		if ((c = getc(fp)) == '\\') c = getc(fp);
	    }
	    return ;
	}
#endif /* PLIMIT */

	switch (c) {
	case '\"':
	    *s = '\0';
	    return ;

	case '\\':
	    switch (c = getc(fp)) {
	    case EOF: errorPrint(BadParse, "string");
	    case 'n': c = '\n'; break ;
	    case 't': c = '\t'; break ;
	    case '\"': c = '\"'; break ;
	    case '\'': c =  '\''; break ;
	    case 'x': {
		char hex[3];
		double num;
		
		hex[0] = getc(fp);
		hex[1] = getc(fp);
		hex[2] = '\0';
		
		if (hex[0] == EOF || hex[1] == EOF || !str2num(hex, 16, &num))
		    errorPrint(BadParse, "string");
		c = (double)num;
		break ;
	    }
	    default:
		break ;
	    }
	default:
	    break ;
	}
#ifdef PLIMIT
	if ((*s++ = c) == '\0') {
	    /* skip remainder of input until an end quote or EOF */
	    while (c = getc(fp), c != '\"' && c != EOF) {
		if (c == '\\') (void)getc(fp);
	    }
	    return ;
	}
#else
	*s++ = c;
#endif /* PLIMIT */
    }
}

/* fill s with characters from fp until a whitespace or special appears */
static void lexFill(fp, s)
     FILE *fp;
     char *s;
{
    int c;

    while ((c = getc(fp)) != EOF) {
	if (isspecial(c)) {
	    ungetc(c, fp);
	    break ;
	}
#ifdef PLIMIT
	else if (*s) {
	    lexSkip(fp);
	    break ;
	}
#endif /* PLIMIT */
	else *s++ = c;
    }
    *s = '\0';
    return ;
}
	    
/* parse #stuff */
static Token lexHash(fp, s)
     FILE *fp;
     char *s;
{
    int c;

    *s++ = '#';
    switch (c = *s++ = getc(fp)) {
    case EOF:
	errorPrint(BadParse, "symbol");

    case '\\':
	/* character */
	if ((c = getc(fp)) == EOF)
	    errorPrint(BadParse, "character");
	else if (isspace(c)) {
	    *s++ = ' ';
	    *s = '\0';
	    return tChar;
	}
	*s++ = c;
	lexFill(fp, s);
	return tChar;

    case '(':
	/* vector */
	return tVector;

    case '&':
	/* box */
	return tBox;

    case 'e':
    case 'i':
    case 'b':
    case 'o':
    case 'd':
    case 'x':
	/* integer constants */
	lexFill(fp, s);
	return tInt;
	
    default:
#ifdef PLIMIT
	*(--s) = '\0';
#else
	--s;
#endif /* PLIMIT */
	ungetc(c, fp);
	lexFill(fp, s);
	return isdigit(c) ? tInt : tSymbol;
    }
}

/* lex a number or symbol and return the Token type */
static Token lexNumOrSym(fp, s)
     FILE *fp;
     char *s;
{
    int c;
    Boolean dec, exp, colon;
    enum lexState_e {
	lsStart, lsDigit1, lsDigit0, lsSymbol,
    } state;

    state = lsStart;
    dec = exp = colon = FALSE;

    for (;;) {
	if ((c = getc(fp)) == EOF  || isspecial(c)) {
	    ungetc(c, fp);
	    *s = '\0';
	    break ;
	}

#ifdef PLIMIT
	else if (*s) {
	    *s = '\0';
	    lexSkip(fp);
	    break ;
	}
#endif /* PLIMIT */

	else if ((*s++ = c) == ':') colon = TRUE;
	if (state == lsSymbol) continue ;

	switch (state) {
	case lsStart:
	    if (c == '+' || c == '-')
		state = lsDigit1;
	    else if (c == '.') {
		state = dec || exp ? lsSymbol : lsDigit1;
		dec = TRUE;
	    }
	    else if (isdigit(c))
		state = lsDigit0;
	    else state = lsSymbol;
	    break ;
	case lsDigit1:
	    if (c == '.') {
		state = dec || exp ? lsSymbol : lsDigit1;
		dec = TRUE;
	    }
	    else if (isdigit(c))
		state = lsDigit0;
	    else state = lsSymbol;
	    break ;
	case lsDigit0:
	    if (c == '.') {
		state = dec || exp ? lsSymbol : lsDigit0;
		dec = TRUE;
	    }
	    else if (c == 'e' || c == 'E') {
		state = exp ? lsSymbol : lsStart;
		exp = TRUE;
	    }
	    else if (isdigit(c))
		state = lsDigit0;
	    else state = lsSymbol;
	    break ;
	}
    }

    if (state == lsDigit0)
	return (dec || exp) ? tNum : tInt;
    return colon ? tPsymbol : tSymbol;
}

	/* base conversion utilities */

/* digit to character */
static int chr2digit(chr)
     char chr;
{
    if (isdigit(chr)) return chr - '0';
    else if (isupper(chr))
	return chr - 'A' + 10;
    else return chr - 'a' + 10;
}

/* character to digit */
static char digit2chr(digit)
     int digit;
{
    if (digit < 10) return digit + '0';
    else return digit - 10 + 'a';
}

/* convert integer of radix base in str to a number
 *
 * number is stored in out
 * and TRUE is returned if str contains only valid digits */
Boolean str2num(str, base, out)
     char *str;
     int base;
     double *out;
{
    double num = (double)0;
    char c;
    int sign = 1, digit;

    if ((c = *str) == '+') str++;
    else if (c  == '-') {
	str++;
	sign = -1;
    }

    while (c = *str++) {
	if ((digit = chr2digit(c)) >= base || digit < 0)
	    return FALSE;
	num = num * (double)base + (double)digit;
    }
    *out = sign < 0 ? -num : num;
    return TRUE;
}

/* convert num to string with radix base */
char *num2str(num, base)
     long num;
     int base;
{
    static char output[ 34 ];
    char *ptr = output + sizeof (output) - 1;
    int neg = 0;

    if (num < 0) {
	neg = 1;
	num = -num;
    }
    do *--ptr = digit2chr((int)(num % base));
    while (num /= base);
    if (neg) *--ptr = '-';
    return ptr;
}
