/*  indextex.C   */
/*  Copyright 1991 Mountain Math Software  */
/*  All Rights Reserved                    */
#include "ObjProGen/cpyrght_exe.h"
#include <string.h>
#include <ctype.h>
#include <fstream.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#include "linlist.h"
#include "lexnum.h"
#include "outtok.h"
#include "mkstr.h"
#include "texutil.h"
#include "texspec.h"

#define LogOut if (Debug) cerr 
int Debug = 0 ;
int multiply_two_word = 0 ;

struct IndexEntry ;
class LinearList ;

extern LinearList TheIndexEntrys ;

ofstream * OutFile = 0 ;
OutTokens * OutTok = 0 ;

FILE * InFile = 0 ;
char * InputFileName = 0 ;

int MaxEntry = 8 ;

struct IndexEntry {
	const char * Cite ;
	const char * Page ;
	const char * Cmd ;
	IndexEntry(const char * cite, const char * page, const char * cmd);
	int Identical(IndexEntry * check);
	int SameCite(IndexEntry * check) ;
};

IndexEntry::IndexEntry(const char * cite, const char * page, const char * cmd):
	Cite(MakeLitString(cite)),
	Page(MakeLitString(page))
{
	if (cmd) Cmd = MakeLitString(cmd);
	else Cmd = 0 ;
	LogOut << "Found: " << Cite << ", " << Page ;
	if (Cmd) LogOut << ", " << Cmd ;
	LogOut << "\n" ;
}

int IndexEntry::Identical(IndexEntry * check)
{
	if (!check) return 0 ;
	LogOut << "`" << Cite << "' : `" << Page << "'\n" ;
	LogOut << "`" << check->Cite << "' : `" << check->Page << "'\n" ;
	int Ck = TeXstrcmp(check->Cite,Cite) ;
	int Ck2 = strcmp(check->Page,Page) ;
	int Return = !Ck && !Ck2 ;
	LogOut << "Identical (" << Ck << ", " << Ck2 << ") = " << Return<<"\n" ;
	return Return ;
}
int safe_cmp(const char * a, const char * b)
{
	if (!a) if (!b) return 0 ;
		else return 1 ;
	if (!b) return -1 ;
	return (strcmp(a,b));
}

int IndexEntry::SameCite(IndexEntry * check)
{
	if (!check) return 0 ;
	int cmp = TeXstrcmp(check->Cite,Cite) ;
	if (cmp) return !cmp; 
	return !safe_cmp(check->Cmd,Cmd);
}

class LinearList {
	void ** Items ;
	int Size ;
	int Max ;
	const MinSize ;
	void Expand() ;
public:
	LinearList(void ** Initial = 0, int Size = 0, int min=32);
	void AddItem(void * Item);
	void ** Get() { return Items;}
	int Length() {return ListLength((const void **) Items);}
};

LinearList TheIndexEntrys ;

LinearList::LinearList(void ** Initial, int Size, int min):
	Items(0),
	Size(0),
	Max(0),
	MinSize(min>4?min:4)
	// Less than this and the expansion formula FAILS.
{
	if (Size) {
		Size = Max = Size ;
		Items = Initial ;
		Items[Size] = 0 ;
	} else {
		Items = Initial ;
		Size=Max = ListLength((const void **) Items);
	}
}

void LinearList::Expand()
{
	void ** Temp = new void * [1 + (Max = Max + (Max >> 2)) ] ;
	if (Items) for (int i = 0 ; i < Size+1 ; i++) Temp[i] = Items[i];
	delete Items ;
	Items = Temp ;
}

void LinearList::AddItem(void * Item)
{
	if (!Items) {
		Items = new void * [MinSize + 1] ;
		Items[0] = Item ;
		Size = 1 ;
		Max = MinSize ;
		return ;
	}
	if (Size >= Max) Expand() ;
	Items[Size++] = Item ;
}

int LineNumber ;
int WarningCount = 0 ;
int ErrorCount = 0 ;

void Warning()
{
	cerr << " at line " << LineNumber << "\n" ;
	WarningCount++ ;
}

void Error()
{
	cerr << " at line " << LineNumber << "\n" ;
	ErrorCount++ ;
}

void Fatal()
{
	cerr << " at line " << LineNumber << "\n" ;
	exit(1);
}

IndexEntry * Scan(char * Buf)
{
	LogOut << "Scanning: " << Buf << "\n" ;

	// remove blanks before `{' 
	int last_blank = 0 ;
	char * dest =  Buf ;
	for (char * pt = Buf; *pt; pt++) {
		if (*pt == '{' && last_blank) {
			dest-- ;
		}
		if (*pt ==' ') last_blank = 1 ;
		else last_blank = 0 ;
		*dest++ = *pt ;
	}
	*dest ='\0' ;


	int Length = strlen(Buf);
	char * Ptr = Buf + Length -1 ;
	if (*Ptr != '}') {
		cerr << "Entry not terminated by `}'" ;
		Warning();
		return 0 ;
	}
	*Ptr-- = '\0' ;
	int BracketDepth = 1 ;
	for ( ; Ptr >= Buf; Ptr--) if (*Ptr == '}') BracketDepth++ ;
		else if (*Ptr == '{') if (!--BracketDepth) break ;
	const char * Page = Ptr+1 ;
	int Error = 1 ;
	if (Ptr > Buf+1) if (*Ptr-- == '{') if (*Ptr == '}') Error = 0 ;
	if (Error) {
		cerr << "Cannot find page entry" ;
		cerr << "Buf:`" << Buf << "'\n" ;
		cerr << "Ptr:`" << Ptr << "'\n" ;
		Warning();
		return 0 ;
	}
	*Ptr = '\0' ;
	for (char * ClearBlank = Ptr - 1 ; ClearBlank >=Buf; ClearBlank++)
		if(isspace(*ClearBlank)) *ClearBlank = '\0' ; else break ;
	const char * Init = "\\indexentry{" ;
	if (strncmp(Buf,Init,strlen(Init))) {
		cerr << "Cannot find citation" ;
		Warning() ;
		return 0 ;
	}
	char * Special = 0 ;
	const char * Cite = Buf + strlen(Init);
	const char * save = Cite ;
	if (Cite[0] == '{' && Cite[1] =='\\') {
		int count = 2 ;
		Cite += count ;
		while (isalnum(*Cite)) Cite++, count++ ;
		if (count > 2) if (strncmp("\\Godel",save,count-2)) {
			Special = new char[count+1] ;
			strncpy(Special,save,count);
			Special[count] ='\0' ;
		}
		if (!Special) Cite = save ;
	}
	while (*Cite) if (!isspace(*Cite)) break ; else Cite++ ;
	if (!*Cite || !*Page) {
		cerr << "Partially NULL entry" ;
		Warning() ;
		return 0 ;
	}
	return new IndexEntry(Cite, Page, Special);
}

void remove_underscore_expansion(char * buf)
{
	if (!buf) return ;
	if (!*buf) return ;
	static const char * to_remove =
		"\\unhbox \\voidb@x \\kern .06em \\vbox {\\hrule width.3em}" ;
	static int length= 0 ;
	if (!length) length = strlen(to_remove);
	int bl = strlen(buf);
	if (bl <= length) return ;
	for (char * pt = buf + bl - length ; pt >= buf ; pt--) {
		if (strncmp(pt,to_remove,length)) continue ;
		pt[0] = '_' ;
		strcpy(pt+1,pt+length);
	}
}

IndexEntry * GetNext()
{
	const BufSize = 2048 ;
	char Buf[BufSize + 1] ;
	if (!InFile) return 0 ;
	if (!fgets(Buf,BufSize,InFile)) return 0 ;
	remove_underscore_expansion(Buf);
	LineNumber++ ;
	int End = strlen(Buf) ;
	if (Buf[End-1] == '\n') Buf[End-1] = '\0' ;
	else if (End == BufSize) {
		cerr << "Line  > " << BufSize << " characters.\n" ;
		Fatal();
	} else {
		cerr << "Incomplete last line at line " ;
		Warning();
	}
	return Scan(Buf);
}

class PrintIndex {
	IndexEntry ** TheSortedEntrys ;
	int LastFirstCharacter ;
	IndexEntry * LastEntry ;
	int CheckIfTooMany(IndexEntry ** Start) ;
	int SkipFlag ;
	int AnyEntries ;
public:
	PrintIndex(IndexEntry ** entrys);
	void WriteEntry(IndexEntry ** Entry) ;
	void WriteIndex();
	void CheckNewCharacter(char C);
	int SameCite(IndexEntry ** next);
};

PrintIndex::PrintIndex(IndexEntry ** entrys):
	TheSortedEntrys(entrys),
	LastEntry(0),
	LastFirstCharacter(0),
	AnyEntries(0),
	SkipFlag(0)
{
}


int PrintIndex::CheckIfTooMany(IndexEntry ** Start)
{
	int RepeatCount = 1 ;
	IndexEntry * First = *Start++ ;
	const char * LastPage = "XXX" ;
	for (; *Start; Start++) {
		if ((*Start)->SameCite(First))
			if (strcmp(LastPage,(*Start)->Page)) RepeatCount++ ;
		LastPage = (*Start)->Page ;
		if (RepeatCount > MaxEntry) return 1 ;
	}
	return 0 ;
}

int PrintIndex::SameCite(IndexEntry** next)
{
	IndexEntry& Next = **next ;
	if (!Next.SameCite(LastEntry)) {
		SkipFlag = CheckIfTooMany(next);
		LastEntry = &Next ;
		return 0 ;
	}
	if (SkipFlag) return 1 ;
	LastEntry = &Next ;
	*OutFile << ", " << Next.Page ;
	return 1 ;
}

void PrintIndex::CheckNewCharacter(char C)
{
	if (tolower(C) == LastFirstCharacter) return ;
	if (!isalpha(C)) return ;
	if (AnyEntries) *OutFile << "\n\n\\vspace{.1in}\n\n" ;
	AnyEntries = 0 ;
	LastFirstCharacter = tolower(C) ;
}

static const char * EscapeUnderscore(const char * str)
{
    const buf_size = 8192;
    static char buf[buf_size+1] ;
    char * dest = buf ;
	// int dollar_count = 0 ;
    for(const char * pt = str; *pt; pt++) {
		/* if (*pt) dollar_count = !dollar_count ; */
		/* if (!dollar_count) */
		if(*pt == '_') {
			int do_escape = 1 ;
			if (pt > str) if (pt[-1] == '\\') do_escape = 0 ;
			if (do_escape) *dest++ = '\\' ;
		}
        *dest++ = *pt ;
    }
	*dest++ = '\0' ;
    return buf ;
}




void PrintIndex::WriteEntry(IndexEntry **  entry)
{
	IndexEntry& Entry = **entry ;
	LogOut << "Processing " << EscapeUnderscore(Entry.Cite) << ", " <<
		Entry.Page << "\n" ;
	if (Entry.Identical(LastEntry)) return ;
	LogOut << "Not identical\n" ;
	CheckNewCharacter(Entry.Cite[0]);
	if (SameCite(entry)) return ;
	if (SkipFlag) return ;
	LogOut << "Not same cite\n" ;
	*OutFile << "\n\\item " ;
	if (Entry.Cmd) *OutFile << Entry.Cmd << " " ;
	*OutFile << EscapeUnderscore(Entry.Cite) << " " << Entry.Page ;
	AnyEntries = 1 ;
}
	
void PrintIndex::WriteIndex()
{
	if (TheSortedEntrys) {
		*OutFile << "\\begin{theindex}\n" ;
		for (IndexEntry ** entry = TheSortedEntrys ; *entry; entry++)
			WriteEntry(entry);
		*OutFile << "\n\\end{theindex}\n" ;
	} else {
		cerr << "No citations found.\n" ;
		exit(0) ;
	}
}

void WriteIndex()
{
	PrintIndex DoPrint( (IndexEntry **) TheIndexEntrys.Get());
	DoPrint.WriteIndex();
}

static int SafeCompare(const char * a,const char * b)
{
	if (!a) if (!b) return 0; else return -1 ;
	if (!b) return 1 ;
	return TeXstrcmp(a,b);
}


static int ToValue(const char * page)
{
	int sum = 0 ;
	int repeated = 0 ;
	int last_roman = 0 ;
	int type = -1 ;
	int t ;
	int v ;
	int err = 0 ;
	for (const char * pt = page;*pt;pt++) {
		 switch(*pt) {
default:
			err = 1 ;
			break ;
case '0':
			v = 0 ; t = 0 ;
			break ;
case '1':
			v = 1 ; t = 0 ;
			break ;
case '2':
			v = 2 ; t = 0 ;
			break ;
case '3':
			v = 3 ; t = 0 ;
			break ;
case '4':
			v = 4 ; t = 0 ;
			break ;
case '5':
			v = 5 ; t = 0 ;
			break ;
case '6':
			v = 6 ; t = 0 ;
			break ;
case '7':
			v = 7 ; t = 0 ;
			break ;
case '8':
			v = 8 ; t = 0 ;
			break ;
case '9':
			v = 9 ; t = 0 ;
			break ;
case 'i' :
			v= 1 ; t = 1 ; break ;
case 'v' :
			v= 5 ; t = 1 ; break ;
case 'x' :
			v= 10 ; t = 1 ; break ;
case 'l' :
			v= 50 ; t = 1 ; break ;
case 'c' :
			v= 100 ; t = 1 ; break ;
case 'd' :
			v= 500 ; t = 1 ; break ;
case 'm' :
			v= 1000 ; t = 1 ; break ;
		}
		if (type != -1) if (t != type) err = 1;
		if (err) {
			cerr << "Warning: invalid number `" << pt << "'.\n" ;
			return -1 ;
		}
		type = t ;
		if (type == 0) sum = sum * 10 + v ;
		else {
			if (last_roman == *pt) {
				repeated += v ;
				continue ;
			}
			if (v > last_roman) sum -= repeated ;
			else sum += repeated ;
			repeated = v ;
			last_roman = v ;
		}
	}
	if (repeated) sum+= repeated ;
	if (!type) sum += 100000 ;
	LogOut << "Converted `" << page << "' to " << sum << "\n" ;
	return sum ;
}

static int SafeNumberCompare(const char * a,const char * b)
{
	if (!a) if (!b) return 0; else return -1 ;
	if (!b) return 1 ;
	return ToValue(a) - ToValue(b) ;
// 	int SizeDiff = strlen(a) - strlen(b) ;
	// leading zeros not allowed
// 	if (SizeDiff) return SizeDiff ;
// 	const char * PtrA = a ;
// 	const char * PtrB = b ;
// 	int Test ;
// 	while (*a && *b) if (Test = *a++ - *b++) return Test ;
// 	if (!*a) if (!*b) return 0 ; else return -1;
// 	return 1 ;
}

inline char * FromTeX(const char * a)
	{return TheTeXWordTranslator.Translate(a);}

static int TeXSafeCompare(const char * A, const char * B)
{
	char * AA = FromTeX(A);
	char * BB = FromTeX(B);
	int Return = SafeCompare(AA?AA:A,BB?BB:B);
	delete AA ;
	delete BB ;
	return Return ;
}

int IndexListSort(const void * a, const void * b)
{
	const IndexEntry ** A = (const IndexEntry **) a ;
	const IndexEntry ** B = (const IndexEntry **) b ;
	if (!A) return -1 ;
	if (!B) return 1 ;
	const char * Aa = (*A)->Cite ;
	const char * Bb = (*B)->Cite;
	LogOut << Aa << " ::: " << Bb << "\n" ;
	if (*Aa == '$') if (!strcmp(Aa,Bb))
			return SafeNumberCompare((*A)->Page,(*B)->Page);
			else return -1 ;
	if (*Bb == '$') return 1 ;
	if (!strncmp(Aa,"\\Godel",6)) Aa++ ;
	if (!strncmp(Bb,"\\Godel",6)) Bb++ ;
	int Return = TeXSafeCompare(Aa,Bb);
	if (Return) return Return ;
	Return = safe_cmp((*A)->Cmd,(*B)->Cmd);
	if (Return) return Return ;
	return SafeNumberCompare((*A)->Page,(*B)->Page);
	
}

		
void SortIndex()
{
	IndexEntry ** Lst = (IndexEntry **) TheIndexEntrys.Get();
	if (!Lst) return ;
	qsort((char *)Lst,TheIndexEntrys.Length(),sizeof(*Lst),IndexListSort);
}

const char * ignore_list[] = {
	"or ",
	"of ",
	"to ",
	"also ",
	"and ",
	"body ",
	"play ",
	"plays ",
	"like ",
	"for ",
	"point ",
	"path ",
	"the ",
	"one's ",
	"if ",
	"not ",
	"is ",
	"am ",
	"an ",
	"at ",
	"a ",
	"as ",
	"aided ",
	"between ",
	"in ",
	"it ",
	"its ",
	"based",
	"on ",
	"over ",
	"all ",
	"an ",
	"I ",
	0
};

int check_ignore_word(const char * check)
{
	if (!check) return 0 ;
	if (!*check) return 0 ;
	for (const char ** pt = ignore_list; *pt;pt++) {
		const char * p = *pt ;
		int length= strlen(p);
		if (!strncmp(p,check,length)) return 1 ;
	}
	return 0 ;
}

IndexEntry * CheckForMultipleWords(IndexEntry& NextEntry)
{
	int ignore_next_blank = 0;
	int alpha_count = 0 ;
	int save_alpha_count = 0 ;
	const char * split_point = 0 ;
	if (NextEntry.Cmd) return 0 ;
	for (const char *pt = NextEntry.Cite;*pt;pt++){
		if (*pt == '{') return 0 ;
		if (*pt == '$') return 0 ;
		if (isalpha(*pt)) alpha_count++ ;
		if (split_point) continue ;
		if (*pt == '\\') {
			if (strncmp(pt+1,"Godel",5)) ignore_next_blank = 1 ;
		}
		if (*pt == ' ') {
			int t = alpha_count ;
			alpha_count = 0 ;
			if (save_alpha_count) t = save_alpha_count ;
			save_alpha_count = 0 ;
			while (pt[1] == ' ') pt++ ;
			if (ignore_next_blank) {
				ignore_next_blank = 0 ;
				continue ;
			}
			
			if (t < 3) continue ;
			if (check_ignore_word(pt+1)) {
				save_alpha_count = t ;
				continue ;
			}
			split_point = pt+ 1 ;
		}
	}
	if (!split_point) return 0 ;
	if (!*split_point) return 0 ;
	if (alpha_count < 3) return 0 ;
	int length = strlen(NextEntry.Cite) + 2 ;
	char * cite = new char[length];
	strcpy(cite,split_point);
	strcat(cite,", ");
	char * dest = cite;
	while (*dest) dest++ ;
	for (const char * p = NextEntry.Cite; p < split_point - 1; *dest++ = *p++);
	*dest++ = '\0' ;
	if (dest - cite != length ) {
		cerr << "Length expected " << length << " found " << dest - cite <<
			"\n" ;
		return 0 ;
	}
	LogOut << "Created entry: " << cite << " ::: " << NextEntry.Page << "\n" ; 
	return new IndexEntry(cite,NextEntry.Page,NextEntry.Cmd) ;
}
		
void ReadIdxFile()
{
	IndexEntry * NextEntry ;
	while(NextEntry = GetNext()) {
		TheIndexEntrys.AddItem(NextEntry);
		if (multiply_two_word)
			if (NextEntry = CheckForMultipleWords(*NextEntry))
				TheIndexEntrys.AddItem(NextEntry);
	}
}

void Main()
{
	ReadIdxFile();
	SortIndex() ;
	WriteIndex();
}


const char * FileSuffix = ".idx" ;

static void Usage(const char * Pgm)
{
	cerr << "Usage is: " << Pgm << " [-d] file\n" ;
	cerr << "`-t' specifies make two entries for two word items.\n" ;
	cerr << "`-d' specifes debugging mode.\n" ;
	cerr << "`file' is a tex index file.\n" ;
	cerr << "`-w dir' specified directory for output file.\n" ;
	cerr << "`-l N' number of entry to delete entry (default = "
		<< MaxEntry << ").\n" ;
	cerr << "FileSuffix `" << FileSuffix <<
		"' will be appended to the name if missing.\n" ;
	exit (1);
}

void main(int argc,char ** argv)
{
	int c;
	int errflg = 0;
	const char * OutputDirectory = 0;
	const char * OutputFileName = 0 ;
	const char * MaxEntryString = 0 ;

	while (( c = getopt(argc,argv,"tdw:l:")) != EOF) switch(c) {
case 'd':
		Debug = 1 ;
		break ;
case 't':
		multiply_two_word = 1 ;
		break ;
case 'w':
		OutputDirectory = MakeLitString(optarg);
		break ;
case 'l':
		MaxEntryString = MakeLitString(optarg);
default:
		errflg++;
		break ;
	}
	if (errflg) {
		cerr << "Invalid command line.\n" ;
		Usage(argv[0]);
	}
	for (; optind < argc; optind++) {
		if (InFile) {
			cerr << "Only one input file is allowed.\n" ;
			Usage(argv[0]);
		}
		InputFileName = new char[strlen(argv[optind])+1];
		strcpy(InputFileName,argv[optind]);
		InputFileName = AddSuffix(InputFileName,FileSuffix);

		InFile = fopen(InputFileName,"r");
		if (!InFile) {
			cerr << "Cannot open input file `" << InputFileName <<
				"'\n" ;
			Usage(argv[0]);
		}
	}
	if (!InputFileName) Usage(argv[0]);
	if (MaxEntryString) if (sscanf(MaxEntryString,"%d",&MaxEntry) != 1) {
		cerr << "Parameter for option `-l' is not an integer.\n" ;
		Usage(argv[0]);
	}

	OutputFileName = AddSuffix(RemoveSuffix(InputFileName,FileSuffix),
		".txx");
	if (OutputDirectory)  
		if (OutputDirectory[strlen(OutputDirectory)-1] == '/')
			OutputFileName = Concatenate(OutputDirectory,
				OutputFileName);
		else OutputFileName = Concatenate(OutputDirectory,
				"/",OutputFileName);
		
	OutFile = new ofstream(OutputFileName);
	if (!OutFile->good()) {
		cerr << "Cannot create output file `" << OutputFileName
			<< "'.\n" ;
		Usage(argv[0]);
	}
	OutTok = new OutTokens(OutFile,0,""," ","",80,1000000) ;

		
	
	Main();
	OutTok->FlushLine();

	delete OutFile ;
	exit(0);
}
