/* ================================================================= *\

   biblook -- look up references in a bibindexed BibTeX file

   This program was specifically developed for use with the
   computational geometry bibliographic database.  The database
   can be obtained by anonymous ftp from cs.usask.ca in the file
   `pub/geometry/geombib.tar.Z'.

   Version 1.0 written by Jeff Erickson <jeff@ics.uci.edu>, 27 Mar 92
   Version 2.0 written by Jeff Erickson <jeff@ics.uci.edu>, 17 Jun 92

   This program is in the public domain.  You may use it or modify
   it to your heart's content, at your own risk.

   %Make% gcc -O -o biblook biblook.c

   Usage: biblook bibfile [savefile]

   -----------------------------------------------------------------

   HOW IT WORKS:

   The user can enter any of the following commands:

   f[ind] [not] <field> <words>
	Find the entries containing the given words in any field
	with a prefix matching the <field> argument.  For example,
	`a' matches both `author' and `address', and `au' matches
	`author' only.  If the <field> argument is `-' (or any
	string with no letters or numbers), match any field.

	If `not' appears before the <field>, the sense of the search
	is reversed.  The symbols `~' and `!' can be used in place
	of `not'.

	Each word is a contiguous sequence of letters and digits.
	Case is ignored; accents should be omitted; apostrophes are
	not required.  Single characters and a few common words are
	also ignored.  Any word ending with an asterisk is treated
	as a prefix.  Thus, `point*' matches `point', `points',
	`pointer', etc.

   and [not] <field> <words>
   or [not] <field> <words>
	Intersect (resp. union) the results of the given search
	with the previous search.  Several of these commands may be
	combined on a single line.  Commands are handled in the order
	in which they appear; there is no precedence.  Unlike other
	commands, and like `not', these must be spelled out
	completely.  `&' can be used in place of `and', and `|' can
	be used in place of `or'.

   d[isplay]
	Display the results of the previous search.

   s[ave] [<filename>]
	Save the results of the previous results into the specified
	file.  If <filename> is omitted, the previous save file is
	used.  If no save file has ever been specified, results are
	saved in the file specified on the command line.  If no such
	file is specified, `save.bib' is used.  If the save file
	exists, results are appended to it.

   q[uit]/EOF
	Quit.

   Several commands can be combined on a single line by separating
   them with semicolons.  For example, the following command displays
   all STOC papers cowritten by Erdo"s without `Voronoi diagrams' in
   the title:

   f b stoc* | b symp* theory comp* & au erdos & ~t voronoi diagrams ; d

   -----------------------------------------------------------------
   Version history

   1.0 <jge> 3/29/92	Initial version complete
   1.1 <jge> 4/3/92	Fixed GetToken bug.
			Prompts and feedback messages sent to stderr
			instead of stdout, so results can be
			redirected to a file.

   2.0 <jge> 6/17/92	Major change in file format and commands.
	1. Allow searching on any field or all fields.
	2. More extensive boolean queries (and, or, not)
	3. New command to save results to a file
	4. New command to display results, rather than displaying
	   them automatically.
	5. Allow searching for prefixes
	6. Pipe display results through $PAGER or /usr/ucb/more
   2.1 <jge> 7/8/92	Minor bug fixes.

\* ================================================================= */


#define FILE_VERSION  2		/* file format version needed */
#define MAJOR_VERSION 2
#define MINOR_VERSION 2

#define MAXWORD	  31			/* MUST match value in bibindex.c */

#include <stdio.h>
#if (__STDC__ || __cplusplus || c_plusplus)
#include <stdlib.h>
#endif /*  (__STDC__ || __cplusplus || c_plusplus) */
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <time.h>

#if !__NeXT__
#include <unistd.h>
#endif /* !__NeXT__ */

#if sun
#if __cplusplus
extern "C" int	_filbuf(FILE *);	/* missing from stdio.h */
#if !__GNUC__
int	_flsbuf(unsigned char,FILE *);	/* missing from stdio.h */
#endif /* !__GNUC__ */
extern "C" int	_flsbuf(unsigned int, FILE *); /* missing from stdio.h */
extern "C" char	*tempnam(const char *, const char *);
					/* not defined by acc's stdio.h */
extern "C" int	waitpid(int, int *, int); /* not defined by any Sun .h file */
#else /* NOT __cplusplus */
int	_filbuf(FILE *);		/* missing from stdio.h */
#if !__GNUC__
int	_flsbuf(unsigned char,FILE *);	/* missing from stdio.h */
#endif /* !__GNUC__ */
char	*tempnam(const char *, const char *); /* not defined by acc's stdio.h */
int	waitpid(int, int *, int);	/* not defined by any Sun .h file */
#endif /* __cplusplus */
#endif /* sun */

#if __NeXT__
static char* p__;
static char* q__;
/* NB: This is not a general definition of tempnam(), but works for 
this program! */
#define tempnam(dir,pfx)	(p__ = tmpnam((char*)NULL), \
				 q__ = malloc(strlen(p__)+1), \
				 strcpy(q__,p__))
#include <libc.h>			/* for struct rusage definition */
#define waitpid(pid, statusp, options)	wait4((pid), (statusp), (options),\
					      (struct rusage*)NULL)
#endif /* __NeXT__ */

#if ardent
/* Stardent has only simple wait-for-all-children function, sigh... */
#define waitpid(pid, statusp, options)	wait((int*)NULL)
char *getenv(const char *name);		/* missing from system header files */
#endif

#if (__STDC__ || __cplusplus || c_plusplus)
#define VOID	void
#else /* NOT (__STDC__ || __cplusplus || c_plusplus) */
#define VOID
#endif /* (__STDC__ || __cplusplus || c_plusplus) */

/* ======================= UTILITY FUNCTIONS ======================= */

/* ----------------------------------------------------------------- *\
|  void die(const char *msg1, const char *msg2) -- print an error message and die
\* ----------------------------------------------------------------- */
void die(const char *msg1, const char *msg2)
{
    fprintf(stderr, "Error: %s %s\n", msg1, msg2);
    exit(1);
}

/* ----------------------------------------------------------------- *\
|  void safefread(void *ptr, size_t size, size_t num, FILE *fp)
|
|  Read from the file, but die if there's an error.
\* ----------------------------------------------------------------- */
void safefread(void *ptr, size_t size, size_t num, FILE *fp)
{
    if (fread(ptr, size, num, fp) < num)
	die("Unexpected EOF in bix file.", "");
}

/* ----------------------------------------------------------------- *\
|  char safegetc(FILE *fp)
|
|  Get the next character safely.  Used by routines that assume that
|  they won't run into the end of file.
\* ----------------------------------------------------------------- */
char safegetc(FILE *fp)
{
    if (feof(fp))
	die("Unexpected EOF in bib file.", "");
    return getc(fp);
}


/* ========================== INDEX TABLES ========================= */

typedef struct
{
    char  theword[16];
    short numindex;
    short *index;
} Index, *IndexPtr;

typedef struct
{
    char     thefield[16];
    short    numwords;
    IndexPtr words;
} IndexTable;

char numfields;
IndexTable *fieldtable;

short numoffsets;
long *offsets;

/* ----------------------------------------------------------------- *\
|  void ReadWord(FILE *ifp, char *word)
|
|  Read a "pascal" string into the given buffer
\* ----------------------------------------------------------------- */
void ReadWord(FILE *ifp, char *word)
{
    char length;

    safefread((void *) &length, sizeof(char), 1, ifp);
    if (length > MAXWORD)
	die("Index file is corrupt", "(word too long).");

    safefread((void *) word, sizeof(char), length, ifp);
    word[length] = 0;
}

/* ----------------------------------------------------------------- *\
|  void GetOneTable(FILE *ifp, IndexTable *table)
|
|  Get one index table from the file
\* ----------------------------------------------------------------- */
void GetOneTable(FILE *ifp, register IndexTable *table)
{
    short i, num;

    safefread((void *) &table->numwords, sizeof(short), 1, ifp);
    table->words = (IndexPtr) malloc(table->numwords * sizeof(Index));

    for (i=0; i<table->numwords; i++)
    {
	ReadWord(ifp, table->words[i].theword);
	safefread((void *) &num, sizeof(short), 1, ifp);
	table->words[i].numindex = num;
	table->words[i].index = (short *) malloc(num * sizeof(short));
	safefread((void *) table->words[i].index, sizeof(short), num,
		  ifp);
    }
}

/* ----------------------------------------------------------------- *\
|  void GetTables(char *filename)
|
|  Get the tables from the index file.
\* ----------------------------------------------------------------- */
void GetTables(char *filename)
{
    int version, i;
    FILE *ifp;

    ifp = fopen(filename, "r");
    if (!ifp)
	die("Can't read", filename);
    if (fscanf(ifp, "bibindex %d %*[^\n]%*c", &version) < 1)
	die(filename, "is not a bibindex file!");
    if (version != FILE_VERSION)
	die(filename, "is the wrong version.  Please rerun bibindex.");

    safefread((void *) &numoffsets, sizeof(short), 1, ifp);
    offsets = (long *) malloc(numoffsets * sizeof(long));
    safefread((void *) offsets, sizeof(long), numoffsets, ifp);

    safefread((void *) &numfields, sizeof(char), 1, ifp);
    fieldtable = (IndexTable *) malloc(numfields * sizeof(IndexTable));

    for (i=0; i<numfields; i++)
	ReadWord(ifp, fieldtable[i].thefield);

    for (i=0; i<numfields; i++)
	GetOneTable(ifp, fieldtable+i);

    fclose(ifp);
}

/* ----------------------------------------------------------------- *\
|  void FreeTables(void)
|
|  Free the index tables.
\* ----------------------------------------------------------------- */
void FreeTables(VOID)
{
    register int i,j;

    for (i=0; i<numfields; i++)
    {
	for (j=0; j<fieldtable[i].numwords; j++)
	    free(fieldtable[i].words[j].index);
	free(fieldtable[i].words);
    }
    free(fieldtable);
    free(offsets);
}


/* ----------------------------------------------------------------- *\
|  short FindIndex(IndexTable table, char *word, char prefix)
|
|  Find the index of a word in a table.  Return -1 if the word isn't
|  there.  If prefix is true, return the index of the first matching
|  word.
\* ----------------------------------------------------------------- */
short FindIndex(IndexTable table, char *word, char prefix)
{
    register IndexPtr words = table.words;
    register short hi, lo, mid, cmp;

    hi = table.numwords-1;
    lo = 0;

    while (hi>=lo)
    {
	mid = (hi+lo)/2;
	cmp = strcmp(word, words[mid].theword);

	if (cmp == 0)
	    return mid;
	else if (cmp < 0)
	    hi = mid-1;
	else if (cmp > 0)
	    lo = mid+1;
    }

    if (prefix && !strncmp(word, words[lo].theword, strlen(word)))
	return lo;
    else
	return -1;
}


/* =================== SET MANIPULATION ROUTINES =================== */

#define SETSCALE (sizeof(unsigned long)*8)

static short setsize;
static unsigned long setmask;	   /* used to erase extra bits */
typedef unsigned long *Set;

/* ----------------------------------------------------------------- *\
|  Set NewSet(void)
|
|  Get a new variable to hold sets of integers in the range
|  [0, numoffsets].  Set setsize and setmask.
\* ----------------------------------------------------------------- */
Set NewSet(VOID)
{
    setsize = (numoffsets + SETSCALE - 1)/SETSCALE;	/* HACK */
    setmask = (1<<(numoffsets%SETSCALE)) - 1;		/* KLUDGE */

    return (Set) malloc(setsize * SETSCALE);
}

/* ----------------------------------------------------------------- *\
|  void EmptySet(Set theset)
|
|  Empty the set.
\* ----------------------------------------------------------------- */
void EmptySet(Set theset)
{
    register int i;
    for (i=0; i<setsize; i++)
	theset[i] = 0L;
}

/* ----------------------------------------------------------------- *\
|  void SetUnion(Set src1, Set src2, Set result)
|
|  Get the union of two sets
\* ----------------------------------------------------------------- */
void SetUnion(Set src1, Set src2, Set result)
{
    register int i;
    for (i=0; i<setsize; i++)
	result[i] = src1[i] | src2[i];
}

/* ----------------------------------------------------------------- *\
|  void SetInsersection(Set src1, Set src2, Set result)
|
|  Get the intersection of two sets
\* ----------------------------------------------------------------- */
void SetIntersection(Set src1, Set src2, Set result)
{
    register int i;
    for (i=0; i<setsize; i++)
	result[i] = src1[i] & src2[i];
}

/* ----------------------------------------------------------------- *\
|  void SetComplement(Set src, Set result)
|
|  Get the complement of a set
\* ----------------------------------------------------------------- */
void SetComplement(Set src, Set result)
{
    register int i;
    for (i=0; i<setsize; i++)
	result[i] = ~src[i];
    result[setsize-1] &= setmask;	/* clear those last few bits */
}

/* ----------------------------------------------------------------- *\
|  void CopySet(Set src, Set result)
|
|  Copy one set into another
\* ----------------------------------------------------------------- */
void CopySet(Set src, Set result)
{
    register int i;
    for (i=0; i<setsize; i++)
	result[i] = src[i];
}


/* ----------------------------------------------------------------- *\
|  int CountSet(Set theset)
|
|  Get the cardinality of the set
\* ----------------------------------------------------------------- */
int CountSet(Set theset)
{
    register unsigned i, j, count;

    count = 0;
    for (i=0; i<(unsigned)setsize; i++)
	for (j=0; j<(unsigned)SETSCALE; j++)
	    if (theset[i] & (1<<j))
		count++;

    return count;
}

/* ----------------------------------------------------------------- *\
|  void BuildSet(Set theset, short *thelist, short length)
|
|  Build a set out of a list of integers
\* ----------------------------------------------------------------- */
void BuildSet(Set theset, short *thelist, short length)
{
    register unsigned i;

    EmptySet(theset);
    for (i=0; i<(unsigned)length; i++)
	theset[thelist[i]/SETSCALE] |= 1 << (thelist[i] % SETSCALE);
}

/* ----------------------------------------------------------------- *\
|  void DoForSet(Set theset, void (*action)(int, void *), void *arg)
|
|  Do something to every element in a set
\* ----------------------------------------------------------------- */
void DoForSet(Set theset, void (*action)(int, void *), void *arg)
{
    register unsigned i, j;

    for (i=0; i<(unsigned)setsize; i++)
	for (j=0; j<(unsigned)SETSCALE; j++)
	    if (theset[i] & (1<<j))
		(*action)(SETSCALE*i + j, arg);
}


/* ======================== SEARCH ROUTINES ======================== */

Set results, oldresults, oneword, onefield;
short firstfield, lastfield;		/* indices into fieldtable */

/* ----------------------------------------------------------------- *\
|  void InitSearch(void)
|
|  Initialize the search lists
\* ----------------------------------------------------------------- */
void InitSearch(VOID)
{
    results = NewSet();
    oldresults = NewSet();
    oneword = NewSet();
    onefield = NewSet();
    firstfield = lastfield = -1;
}

/* ----------------------------------------------------------------- *\
|  void FreeSearch(void)
|
|  Free the search list
\* ----------------------------------------------------------------- */
void FreeSearch(VOID)
{
    free(results);
    free(oldresults);
    free(oneword);
    free(onefield);
}

/* ----------------------------------------------------------------- *\
|  void ClearResults(void)
|
|  Clear the current and old results
\* ----------------------------------------------------------------- */
void ClearResults(VOID)
{
    EmptySet(results);
    SetComplement(results, results);
    CopySet(results, oldresults);
}

/* ----------------------------------------------------------------- *\
|  void SaveResults(void)
|
|  Save and clear the current results
\* ----------------------------------------------------------------- */
void SaveResults(VOID)
{
    CopySet(results, oldresults);
    EmptySet(results);
    SetComplement(results, results);
}

/* ----------------------------------------------------------------- *\
|  void CombineResults(char invert, char intersect)
|
|  Combine current results with old results
\* ----------------------------------------------------------------- */
void CombineResults(char invert, char intersect)
{
    if (invert)
	SetComplement(results, results);
    if (intersect)
	SetIntersection(results, oldresults, results);
    else
	SetUnion(results, oldresults, results);
}


/* ----------------------------------------------------------------- *\
|  char SetUpField(char *field)
|
|  Set up the search fields.  Return the number of searchable fields.
\* ----------------------------------------------------------------- */
char SetUpField(char *field)
{
    int i, len;

    firstfield = -1;
    len = strlen(field);

    for (i=0; i<numfields; i++)
    {
	if (!strncmp(field, fieldtable[i].thefield, len))
	{
	    if (firstfield == -1)
		firstfield = i;
	    lastfield = i;
	}
    }

    if (firstfield == -1)
    {
	printf("\tNo searchable fields matching \"%s\".\n", field);
	return 0;
    }
    else
	return lastfield - firstfield + 1;

}

/* ----------------------------------------------------------------- *\
|  void FindWord(char *word, char prefix)
|
|  Find a word in the currently active field and update `results'.
|  If the prefix flag is set, find all words having the given prefix.
\* ----------------------------------------------------------------- */
void FindWord(register char *word, char prefix)
{
    static char badwords[][5] =
    {
	"an", "and", "for", "in", "of",
	"on", "the", "to", "with", "",
    };
    register IndexPtr words;
    short win, len;
    int i;

    if (!prefix)
    {
	if (!word[0])
	{
	    printf("\t[ignoring empty string]\n");
	    return;
	}
	if (!word[1])
	{
	    printf("\t[ignoring \"%s\"]\n", word);
	    return;
	}
	for (i=0; *badwords[i]; i++)
	{
	    if (!strcmp(badwords[i], word))
	    {
		printf("\t[ignoring \"%s\"]\n", word);
		return;
	    }
	}
    }

    EmptySet(oneword);
    len = strlen(word);

    for (i=firstfield; i<=lastfield; i++)
    {
	words = fieldtable[i].words;
	win = FindIndex(fieldtable[i], word, prefix);

	if (win != -1)
	{
	    if (prefix)
	    {
		while ((win < fieldtable[i].numwords) &&
		       !strncmp(words[win].theword, word, len))
		{
		    BuildSet(onefield, words[win].index,
			     words[win].numindex);
		    SetUnion(oneword, onefield, oneword);
		    win++;
		}
	    }
	    else
	    {
		BuildSet(onefield, words[win].index,
			 words[win].numindex);
		SetUnion(oneword, onefield, oneword);
	    }
	}
    }

    SetIntersection(oneword, results, results);
}


/* ============================= OUTPUT ============================ */
FILE *bibfp;

/* ----------------------------------------------------------------- *\
|  void ReportResults(void)
|
|  Report the results of the previous search.
\* ----------------------------------------------------------------- */
void ReportResults(VOID)
{
    int numresults;

    numresults = CountSet(results);

    if (numresults == 0)
	printf("\tNo matches found.\n");
    else if (numresults == 1)
	printf("\t1 match found.\n");
    else
	printf("\t%d matches found.\n", numresults);
}

/* ----------------------------------------------------------------- *\
|  void PrintEntry(int entry, FILE *ofp)
|
|  Print the entry.
\* ----------------------------------------------------------------- */
void PrintEntry(int entry, FILE *ofp)
{
    char ch;
    char braces;
    char quotes;

    if (entry >= numoffsets)		/* extra bits might be set */
	return;

    putc('\n', ofp);
    if (fseek(bibfp, offsets[entry], 0))
	die("Index file is corrupt.", "");

    ch = safegetc(bibfp);

    while (ch != '@')
    {
	putc(ch, ofp);
	ch = safegetc(bibfp);
    }
	while ((ch != '{') && (ch != '('))
    {
	putc(ch, ofp);
	ch = safegetc(bibfp);
    }

    braces = quotes = 0;

    putc(ch, ofp);
    ch = safegetc(bibfp);
    while (braces || quotes || ((ch != '}') && (ch != ')')))
    {
	if (ch == '{')
	    braces++;
	else if (ch == '}')
	    braces--;
	else if ((ch == '"') && !braces)
	    quotes = !quotes;
	putc(ch, ofp);
	ch = safegetc(bibfp);
    }

    putc(ch, ofp);
    putc('\n', ofp);
}

/* ----------------------------------------------------------------- *\
|  void PrintResults(char *filename)
|
|  Print the current search results into the given file.  If the
|  filename is NULL, pipe the output through $PAGER.
\* ----------------------------------------------------------------- */
void PrintResults(char *filename)
{
    int numresults;
    FILE *ofp;
    char *pager;
    char *the_tmpfile = (char*)NULL;
    int childpid;

    numresults = CountSet(results);
    if (numresults == 0)
	printf("\tNothing to display!\n");
    else if (numresults == numoffsets)
	printf("\tI can't display the entire bibliography!\n");
    else
    {
	if (filename)
	{
	    ofp = fopen(filename, "a");
	    if (!ofp)
	    {
		printf("\tCan't write to \"%s\"!\n", filename);
		return;
	    }
	}
	else
	{
	    the_tmpfile = (char*)tempnam(NULL, "bibl.");
	    ofp = fopen(the_tmpfile, "w");
	    if (!ofp)
	    {
		printf("\tCan't write to \"%s\"!\n", the_tmpfile);
		return;
	    }
	}

	if (filename)
	{
	    time_t now = time(0);
	    fprintf(ofp, "%% Retrieved by biblook %d.%d at %s",
		    MAJOR_VERSION, MINOR_VERSION, ctime(&now));
	}

	DoForSet(results, (void (*)(int, void *)) PrintEntry,
		 (void *) ofp);

	fclose(ofp);
	if (filename)
	    printf("\tResults saved in \"%s\"\n", filename);
	else
	{
	    pager = getenv("PAGER");

	    if (childpid = fork())
		waitpid(childpid, NULL, 0);
	    else if (pager)
	    {
		execlp(pager, pager, the_tmpfile, (char *) 0);
		perror(pager);		/* should never get here! */
		exit(0);
	    }
	    else
	    {
		execl("/usr/ucb/more", "more", the_tmpfile, (char *) 0);
		perror("/usr/ucb/more");
		exit(0);
	    }

	    unlink(the_tmpfile);
	    free(the_tmpfile);	    	/* malloc'ed by tempnam() */
	    putchar('\n');
	}
    }
}


/* ======================== USER INTERFACE ========================= */

typedef enum {
    T_Find, T_Display, T_Save, T_Quit, T_Word,
    T_And, T_Or, T_Not, T_Semi, T_Return, T_Help
} Token;

/* ----------------------------------------------------------------- *\
|  Token GetToken(char *tokenstr)
|
|  Get the next input token.
\* ----------------------------------------------------------------- */
Token GetToken(char *tokenstr)
{
    static char line[256];
    static short pos;
    static char neednew = 1;
    short tlen = 0;

    *tokenstr = 0;

    if (neednew)
    {
	printf("biblook: ");
	if (!fgets(line, 254, stdin))
	    return T_Quit;

	pos = 0;
	neednew = 0;
    }

    while ((line[pos] == ' ') || (line[pos] == '\t'))
	pos++;

    switch (line[pos])
    {
	case '\n':
	    pos++;
	    neednew = 1;
	    return T_Return;

	case '&':
	    pos++;
	    return T_And;

	case '|':
	    pos++;
	    return T_Or;

	case '~':
	case '!':
	    pos++;
	    return T_Not;

	case ';':
	    pos++;
	    return T_Semi;

	default:
	    tokenstr[tlen++] = tolower(line[pos++]);
	    while (!isspace(line[pos]) && (line[pos] != ';') &&
		   (line[pos] != '&') && (line[pos] != '|'))
	    {
		tokenstr[tlen++] = tolower(line[pos++]);
	    }
	    tokenstr[tlen] = 0;

	    /* I really ought to use a hash table here. */

	    if (!strncmp(tokenstr, "find", tlen))
		return T_Find;
	    else if (!strncmp(tokenstr, "display", tlen))
		return T_Display;
	    else if (!strncmp(tokenstr, "help", tlen))
		return T_Help;
	    else if (!strncmp(tokenstr, "?", tlen))
		return T_Help;
	    else if (!strncmp(tokenstr, "save", tlen))
		return T_Save;
	    else if (!strncmp(tokenstr, "quit", tlen))
		return T_Quit;
	    else if (!strcmp(tokenstr, "and"))
		return T_And;
	    else if (!strcmp(tokenstr, "or"))
		return T_Or;
	    else if (!strcmp(tokenstr, "not"))
		return T_Not;
	    else
		return T_Word;
    }
}


/* ----------------------------------------------------------------- *\
|  char Strip(char *string)
|
|  Strip all but alphanumeric characters out of the string.  Return
|  true if the original string ended with the prefix character '*'.
\* ----------------------------------------------------------------- */
char Strip(char *string)
{
    char prefix = 0;
    char *src = string;

    while (*src)
    {
	prefix = (*src == '*');
	if (isalnum(*src))
	    *string++ = *src;
	src++;
    }
    *string = 0;
    return prefix;
}

/* ----------------------------------------------------------------- *\
|  void CmdError(void)
|
|  Print syntax error message
\* ----------------------------------------------------------------- */
void CmdError(VOID)
{
    printf("?? Syntax error ??\n");
}

static const char* helplines[] =
{
    "biblook permits rapid lookup in a BibTeX bibliography data",
    "base, using a compact binary index file prepared by bibindex(1).",
    "",
    "Available commands:",
    "? or h[elp]",
    "     Display this help message.",
    "",
    "f[ind] [not] <field> <words>",
    "     Find the entries containing the given words in any",
    "     field with a prefix matching the <field> argument.  For",
    "     example, `a' matches both `author' and `address', and",
    "     `au' matches `author' only.  If the <field> argument is",
    "     `-' (or any string with no letters or numbers), match",
    "     any field.",
    "",
    "     If `not' appears before the <field>, the sense of the",
    "     search is reversed.  The symbols `~' and `!' can be",
    "     used in place of `not'.",
    "",
    "     Each word is a contiguous sequence of letters and",
    "     digits.  Case is ignored; accents should be omitted;",
    "     apostrophes are not required.  Single characters and a",
    "     few common words are also ignored.  Any word ending",
    "     with an asterisk is treated as a prefix.  Thus,",
    "     `point*' matches `point', `points', `pointer', etc.",
    "",
    "and [not] <field> <words>",
    "or [not] <field> <words>",
    "     Intersect (resp. union) the results of the given search",
    "     with the previous search.  Several of these commands",
    "     may be combined on a single line.  Commands are handled",
    "     in the order in which they appear; there is no pre-",
    "     cedence.  Unlike other commands, and like `not', these",
    "     must be spelled out completely.  `&' can be used in",
    "     place of `and', and `|' can be used in place of `or'.",
    "",
    "d[isplay]",
    "     Display the results of the previous search.",
    "",
    "s[ave] [<filename>]",
    "     Save the results of the previous results into the",
    "     specified file.  If <filename> is omitted, the previous",
    "     save file is used.  If no save file has ever been",
    "     specified, results are saved in the file specified on",
    "     the command line.  If no such file is specified,",
    "     `save.bib' is used.  If the save file exists, results",
    "     are appended to it.",
    "",
    "q[uit]/EOF",
    "     Quit.",
    "",
    "Several commands can be combined on a single line by",
    "separating them with semicolons.  For example, the following",
    "command displays all STOC papers cowritten by Erdo\"s",
    "without `Voronoi diagrams' in the title:",
    "",
    "f b stoc* | b symp* theory comp* & au erdos & ~t voronoi diagrams ; d",
    "",
    (const char*)NULL,
};

/* ----------------------------------------------------------------- *\
|  void GiveHelp(void)
|
|  Print a help message.  Lines are stored as separate strings to
|  avoid hitting compiler limits.
\* ----------------------------------------------------------------- */

void GiveHelp(VOID)
{
    int k;

    for (k = 0; helplines[k]; ++k)
	printf("\t%s\n",helplines[k]);
}

/* ----------------------------------------------------------------- *\
|  States for Lookup()
\* ----------------------------------------------------------------- */
typedef enum {
    Wait, Find, FindN, FindF, FindW,
    Display, Save, SaveF, Error
} CmdState;

/* ----------------------------------------------------------------- *\
|  void Lookup(const char *defsave)
|
|  Execute commands until the user quits.  Defsave is the default
|  save file name.  This is one big finite state machine.  It's long
|  and boring, but that's interface code for ya!
\* ----------------------------------------------------------------- */
void Lookup(const char *defsave)
{
    char tokenstr[256];
    char savefile[256];
    CmdState state = Wait;
    Token thetoken;
    char intersect = 1;		/* 1 = intersect, 0 = union */
    char invert = 0;		/* 1 = invert */
    char prefix;		/* 1 = word is really a prefix */

    ClearResults();
    strcpy(savefile, defsave);

    while (1)
    {
	thetoken = GetToken(tokenstr);

	if ((thetoken == T_Quit) && !tokenstr[0])
	    return;
	else if (thetoken == T_Help)
	{
	    GiveHelp();
	    continue;
	}

	switch (state)
	{
	    case Wait:
		switch (thetoken)
		{
		    case T_Quit:
			return;
		    case T_Find:
			state = Find;
			ClearResults();
			break;
		    case T_And:
			state = Find;
			SaveResults();
			break;
		    case T_Or:
			state = Find;
			intersect = 0;
			SaveResults();
			break;
		    case T_Display:
			state = Display;
			break;
		    case T_Save:
			state = Save;
			break;
		    case T_Return:
		    case T_Semi:
			break;
		    default:
			state = Error; CmdError(); break;
		}
		break;

	    case Find:
		if (thetoken == T_Not)
		{
		    state = FindN;
		    invert = 1;
		}
		else
		{
		    if (tokenstr[0])
		    {
			state = FindF;
			Strip(tokenstr);
			if (!SetUpField(tokenstr))
			    state = Error;
		    }
		    else
		    {
			state = (thetoken == T_Return) ? Wait : Error;
			CmdError();
		    }
		}
		break;

	    case FindN:
		if (tokenstr[0])
		{
		    state = FindF;
		    Strip(tokenstr);
		    if (!SetUpField(tokenstr))
			state = Error;
		}
		else
		{
		    state = (thetoken == T_Return) ? Wait : Error;
		    CmdError();
		}
		break;

	    case FindF:
		if (tokenstr[0])
		{
		    state = FindW;
		    prefix = Strip(tokenstr);
		    FindWord(tokenstr, prefix);
		}
		else
		{
		    state = (thetoken == T_Return) ? Wait : Error;
		    CmdError();
		}
		break;

	    case FindW:
		switch (thetoken)
		{
		    case T_And:
			state = Find;
			CombineResults(invert, intersect);
			SaveResults();
			invert = 0;
			intersect = 1;
			break;
		    case T_Or:
			state = Find;
			CombineResults(invert, intersect);
			SaveResults();
			invert = 0;
			intersect = 0;
			break;
		    case T_Semi:
			state = Wait;
			CombineResults(invert, intersect);
			invert = 0;
			intersect = 1;
			break;
		    case T_Return:
			state = Wait;
			CombineResults(invert, intersect);
			ReportResults();
			invert = 0;
			intersect = 1;
			break;
		    default:
			if (tokenstr[0])
			{
			    state = FindW;
			    prefix = Strip(tokenstr);
			    FindWord(tokenstr, prefix);
			}
			else
			{
			    state = Error;
			    CmdError();
			}
			break;
		}
		break;

	    case Display:
		if ((thetoken == T_Semi) || (thetoken == T_Return))
		{
		    state = Wait;
		    PrintResults(NULL);
		}
		else
		{
		    state = Error;
		    CmdError();
		}
		break;

	    case Save:
		if (tokenstr[0])
		{
		    state = SaveF;
		    strcpy(savefile, tokenstr);
		}
		else if ((thetoken == T_Semi) || (thetoken == T_Return))
		{
		    state = Wait;
		    PrintResults(savefile);
		}
		else
		{
		    state = Error;
		    CmdError();
		}
		break;

	    case SaveF:
		if ((thetoken == T_Semi) || (thetoken == T_Return))
		{
		    state = Wait;
		    PrintResults(savefile);
		}
		else
		{
		    state = Error;
		    CmdError();
		}
		break;

	    case Error:
		switch (thetoken)
		{
		    case T_Quit:
			return;
		    case T_Return:
			state = Wait;
			break;
		    default:
			break;
		}
		break;
	}
    }
}


/* ================================================================= *\
|  The main program
\* ================================================================= */
int main(int argc, char **argv)
{
    char bibfile[256];
    char bixfile[256];
    struct stat bibstat, bixstat;
    char *p;

    printf("biblook version %d.%d  file version %d\n",
	   (int)MAJOR_VERSION, (int)MINOR_VERSION, (int)FILE_VERSION);
    printf("Type ? or h for help\n");

    if ((argc != 2) && (argc != 3))
    {
	fprintf(stderr, "Usage: biblook bib [savefile]\n");
	exit(1);
    }

    if (((p = strrchr(argv[1],'.')) != (char*)NULL) &&
	(strcmp(p, ".bib") == 0))
	*p = '\0';			/* remove any .bib extension */

    sprintf(bibfile, "%s.bib", argv[1]);
    sprintf(bixfile, "%s.bix", argv[1]);

    stat(bibfile, &bibstat);
    stat(bixfile, &bixstat);
    if (bibstat.st_mtime > bixstat.st_mtime)
	die(bixfile, "is out of date.  Please rerun bibindex.");

    bibfp = fopen(bibfile, "r");
    if (!bibfp)
	die("Can't read", bibfile);

    GetTables(bixfile);
    InitSearch();

    if (argc == 3)
	Lookup(argv[2]);
    else
	Lookup("save.bib");

    FreeSearch();
    FreeTables();

    fclose(bibfp);
    exit(0);
    return(0);
}
