/* File: Homophones.m - Manipulates HashFile of homophones extracted from Webster
 *
 * By: Christopher Lane
 * Symbolic Systems Resources Group
 * Knowledge Systems Laboratory
 * Stanford University
 *
 * Date: 14 September 1990
 *
 * Copyright: 1990 by The Leland Stanford Junior University.  This program
 * may be distributed without restriction for non-commercial use.
 */

#import <c.h>
#import <libc.h>
#import <stdlib.h>
#import <ctype.h>
#import <string.h>
#import <stdio.h>
#import <getopt.h>

#import <objc/Storage.h>
#import <objc/HashFile.h>
#import <objc/HashTable.h>

#import <text/webster.h>
#import <text/strutil.h>
#import <text/pathutil.h>

#define DATABASE "/usr/local/lib/homophones/Homophones"
#define REFERENCE "Webster-Dictionary"

#define ATOM "%" /* @encode(NXAtom) doesn't work :-( */
#define OBJECT @encode(id)

#define BUFFER_SIZE 512
#define MAX_PRONUNCIATIONS 48

#define SPACE ' '
#define HYPHEN '-'
#define LPAREN '('
#define RPAREN ')'

#define SEPARATOR ", "
#define BEGINPRONUNCIATION "["
#define ENDPRONUNCIATION "]"

#define OPEN "%s: Can't open %s.\n"

#define OPTIONSTRING "bdrh:w:"
#define USAGE "usage: %s [-b] [-d] [-r] [-h database] [-w dictionary]\n"

typedef enum { BUILD = 'b', DUMP = 'd', RAW = 'r', HASHFILE = 'h', DICTIONARY = 'w' } OPTIONS;
typedef enum { PROGRAM } ARGUMENTS;

BOOL rawFlag = NO;

BOOL complete(char *string)
{
	int head = 0, tail = strlen(string) - 1;
	
	while(head < tail && string[head] != HYPHEN && ! isalpha(string[head])) ++head;
	while(tail > head && string[tail] != HYPHEN && ! isalpha(string[tail])) --tail;
	
	return (BOOL) string[head] != HYPHEN && string[tail] != HYPHEN ;	
}

BOOL garbage(char *string)
{
	return (BOOL) strlen(string) <= 1 || blank(string) || index(string, SPACE) || hasdigit(string) || hasupper(string);	
}

void printPronunciation(NXAtom atom)
{
	int c;
	
	(void) fputs(BEGINPRONUNCIATION, stdout);
	if(rawFlag) (void) fputs(atom, stdout);
	else while((c = toascii(*(atom++))) != '\0') if(isprint(c)) (void) putchar(c);
	(void) fputs(ENDPRONUNCIATION, stdout);
}

void options(char *strings[], unsigned int *size) /* /(')wich/ => /'wich/ & /wich/ */
{
	unsigned int i, offset;
	char *s, *t, buffer[BUFFER_SIZE];
	
	for(i = 0; i < *size; i++) {
		if((s = index(strings[i], LPAREN)) != NULL && (t = index(s, RPAREN)) != NULL) {
			strncpy(buffer, strings[i], (offset = s++ - strings[i]));
			strcpy(buffer + offset, t + 1);
			strings[(*size)++] = NXCopyStringBuffer(buffer);
			strncpy(buffer + offset, s, t - s);
			strcpy(buffer + offset + (t - s), t + 1);
			strings[i--] = NXCopyStringBuffer(buffer);
			} /* if */
		} /* for */
}

unsigned int pronunciations(char *string, NXAtom atoms[])
{
	unsigned int i = 0, j;
	char *s, buffer[BUFFER_SIZE], *strings[MAX_PRONUNCIATIONS];
	
	string = strcpy(buffer, string);
	
	while((s = strindex(string, SEPARATOR)) != NULL) {
		*(s++) = '\0';
		if(!garbage(string) && complete(string)) strings[i++] = NXCopyStringBuffer(string);
		string = ++s;
		} /* while */
	if(!garbage(string) && complete(string)) strings[i++] = NXCopyStringBuffer(string);
	
	options(strings, &i);
	
	for(j = 0; j < i; j++) {
		atoms[j] = NXUniqueString(strings[j]);
		free(strings[j]);
		}
	
	return i;
}

void buildTable(id table, ReferenceBook *book)
{			
	Storage *list;
	SenseList *sense;
	NXHashState state;
	void *key, *value;
	Definition *definition;
	unsigned int p, pCount, wCount;
	NXAtom string, atoms[MAX_PRONUNCIATIONS];
	HashTable *scratch = [HashTable newKeyDesc:ATOM valueDesc:OBJECT];

	while((definition = nextDefinition(book)) != NULL) {
		sense = definition->l;
		while(sense != NULL) {		
			if((sense->s)[0] == dPronunciation && (pCount = pronunciations((sense->s) + 1, atoms)) != 0) {
				for(p = 0; p < pCount; p++) {
					if([scratch isKey:(void *) atoms[p]]) list = [scratch valueForKey:(void *) atoms[p]];
					else {
						list = [Storage newCount:0 elementSize:sizeof(char *) description:ATOM];
						[scratch insertKey:(void *) atoms[p] value:(void *) list];
						} /* else */
					string = NXUniqueString(definition->entry);
					if((wCount = [list count]) == 0 || string != *((char **) [list elementAt:wCount - 1]))
						[list insert:(void *) &string at:wCount];
					} /* for */
				} /* if */
			sense = sense->next;
			} /* while */
	 	freeDefinition(definition);
		} /* while */

	state = [scratch initState];

	while([scratch nextState:&state key:&key value:&value]) {
		if([(Storage *) value count] > 1) [table insertKey:key value:value];
		else [(Storage *) value free];
		} /* while */
		
	[scratch free];
}

void lookupTable(char *word, id table, ReferenceBook *book)
{			
	Storage *list;
	SenseList *sense;
	Definition *definition;
	NXAtom atoms[MAX_PRONUNCIATIONS];
	unsigned int p, w, pCount, wCount;

	definition = getDefinition(word, book, TRUE);
	
	while(definition != NULL) {
		if(strcmp(word, definition->entry) == 0) {
			sense = definition->l; 
			while(sense != NULL) {		
				if((sense->s)[0] == dPronunciation && (pCount = pronunciations((sense->s) + 1, atoms)) != 0) {
					for(p = 0; p < pCount; p++) {
						if([table isKey:(void *) atoms[p]]) {
							printPronunciation(atoms[p]);
							list = [table valueForKey:(void *) atoms[p]];
							wCount = [list count];
							for(w = 0; w < wCount; w++) (void) printf(" %s", *((char **) [list elementAt:w]));
							(void) printf("\n");
							} /* if */
#ifdef DEBUG
						else {
							printPronunciation(atoms[p]);
							(void) printf("\n");
							}
#endif
						} /* for */
					} /* if */
				sense = sense->next;
				} /* while */
			} /* if */
	 	freeDefinition(definition);
		definition = getNextDefinition(word, book, TRUE);
		} /* while */
}

void dumpTable(id table)
{
	void *key, *value;
	unsigned int w, wCount;
	NXHashState state = [table initState];

	while([table nextState:&state key:&key value:&value]) {
		printPronunciation((NXAtom) key);
		wCount = [(Storage *) value count];
		for(w = 0; w < wCount; w++)
			(void) printf(" %s", *((char **) [(Storage *) value elementAt:w]));
		(void) printf("\n");
		} /* while */
}

void main(int argc, char *argv[])
{			
	HashFile *file;
	ReferenceBook *book;
	int option, status = EXIT_SUCCESS;
	BOOL buildFlag = NO, dumpFlag = NO;
	char buffer[BUFFER_SIZE], *reference = REFERENCE, *database = DATABASE;

	while((option = getopt(argc, argv, OPTIONSTRING)) != EOF)
		switch(option) {
		case BUILD : buildFlag = YES; break;
		case DUMP : dumpFlag = YES; break;
		case RAW : rawFlag = YES; break;
		case HASHFILE : database = optarg; break;
		case DICTIONARY : reference = optarg; break;
		default : status = EXIT_FAILURE;
		} /* while */

	if(optind < argc || status == EXIT_FAILURE) {
		(void) fprintf(stderr, USAGE, basename(argv[PROGRAM]));
		exit(EXIT_FAILURE);
		} /* if */

	if((buildFlag || [HashFile isHashFile:database]) &&
	   (file = [HashFile newFromFile:database keyDesc:ATOM valueDesc:OBJECT]) != nil) {
		if(dumpFlag) dumpTable(file);
		else if((book = referenceOpen(reference)) != NULL) {
			if(buildFlag) buildTable(file, book);
			else while(scanf("%s", buffer) != EOF) lookupTable(buffer, file, book);
			(void) referenceClose(book);
			} /* else if */
		else (void) fprintf(stderr, OPEN, basename(argv[PROGRAM]), reference, status = EXIT_FAILURE);
		[file free];
		} /* if */
	else (void) fprintf(stderr, OPEN, basename(argv[PROGRAM]), database, status = EXIT_FAILURE);

	exit(status);
}
