/* dap6.c -- categorical models */

/*  Copyright (C) 2001, 2002 Free Software Foundation, Inc.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include "dap_make.h"
#include "externs.h"

extern dataobs dap_obs[];
extern FILE *dap_lst;
extern FILE *dap_err;
extern char *dap_dapname;

static double *allparam;
static char *sel;
static char *selred;
static double (*ex)();
static double **tab;
static int nc;


static double loglike(double *selparam)
{
int s;
int p;
double expected;
double ll;
int c;

for (s = 0, p = 0; sel[s]; s++)
	{
	if (sel[s] != '!')
		allparam[s] = selparam[p++];
	else
		allparam[s] = 0.0;
	}
for (ll = 0.0, c = 0; c < nc; c++)
	{
	expected = ex(allparam, tab[c] + 1);	/* skip cell count */
	ll += tab[c][0] * log(expected) - expected;
	}
return ll;
}

/* parses parameter list as names into codes */
static int selparse(char *names, char *codes)
{
int n;	/* index to names */
int c;	/* index to codes */

for (n = 0; names[n] == ' '; n++)
	;
for (c = 0; names[n]; c++)
	{
	if (names[n] == '!' || names[n] == '?')
		{
		codes[c] = names[n];
		for (++n; names[n] == ' '; n++)
			;
		}
	else
		codes[c] = '1';
	while (names[n] && names[n] != ' ')
		n++;
	while (names[n] == ' ')
		n++;
	}
codes[c] = '\0';
return c;
}

static void categ1(double **tab, int ncell, int *varv, int nvar,
		double (*expect)(), double *param,
		char *select, char *selcodes, char *trace)
{
int sparam;	/* number of selected parameters */
int sparamr;	/* number of selected parameters in reduced model */
int nparam;	/* number of parameters */
int c;
double *x;
double *xch;
int p;
double step, tol;
double tmp;
int v;
double likerat;
double likered;
double pearson;
int typen;
double *infomem;
double **info;
int p1, p2;
double lpp, lpm, lmp, lmm;
double h, halfh;
int s;

sparamr = 0;
likered = 0.0;
if ((typen = dap_varnum("_type_")) < 0)
	{
	fputs("(categ1) missing _type_ variable\n", dap_err);
	exit(1);
	}
nc = ncell;
ex = expect;
x = (double *) dap_malloc(sizeof(double) * strlen(sel), "");
xch = (double *) dap_malloc(sizeof(double) * strlen(sel), "");
if (selred)
	{
	sel = selred;
	for (nparam = 0, sparamr = 0; sel[nparam]; nparam++)
		{
		if (sel[nparam] != '!')
			{
			allparam[nparam] = param[nparam];
			sparamr++;
			}
		else
			allparam[nparam] = 0.0;
		}
	for (p = 0, nparam = 0; sel[nparam]; nparam++)
		{
		if (sel[nparam] != '!')
			x[p++] = param[nparam];
		}
	for (step = 0.0, p = 0; p < sparamr; p++)
		{
		tmp = x[p];
		step += tmp * tmp;
		}
	if (step > 0.0)
		step = 0.1 * sqrt(step);
	else
		step = 0.01;
	tol = dap_cattol * step;
	dap_maximize(&loglike, sparamr, x, step, tol, trace);
	for (c = 0, likerat = 0.0; c < ncell; c++)
		{
		likered += (tab[c][0] + dap_addtozero) *
			log((tab[c][0] + dap_addtozero) / expect(allparam, tab[c] + 1));
		output();
		}
	likered *= 2.0;
	}
sel = selcodes;
for (nparam = 0, sparam = 0; sel[nparam]; nparam++)
	{
	if (sel[nparam] != '!')
		{
		allparam[nparam] = param[nparam];
		sparam++;
		}
	else
		allparam[nparam] = 0.0;
	}
for (p = 0, nparam = 0; sel[nparam]; nparam++)
	{
	if (sel[nparam] != '!')
		x[p++] = param[nparam];
	}
for (step = 0.0, p = 0; p < sparam; p++)
	{
	tmp = x[p];
	step += tmp * tmp;
	}
if (step > 0.0)
	step = 0.1 * sqrt(step);
else
	step = 0.01;
tol = dap_cattol * step;
dap_maximize(&loglike, sparam, x, step, tol, trace);
for (c = 0, likerat = 0.0, pearson = 0.0; c < ncell; c++)
	{
	for (v = 0; v < nvar; v++)
		dap_obs[0].do_dbl[varv[v]] = tab[c][v];
	strcpy(dap_obs[0].do_str[typen], "OBS");
	output();
	strcpy(dap_obs[0].do_str[typen], "FIT");
	dap_obs[0].do_dbl[varv[0]] = expect(allparam, tab[c] + 1);
	likerat += (tab[c][0] + dap_addtozero) *
		log((tab[c][0] + dap_addtozero) / dap_obs[0].do_dbl[varv[0]]);
	tmp = dap_obs[0].do_dbl[varv[0]] - tab[c][0];
	pearson += tmp * tmp / dap_obs[0].do_dbl[varv[0]];
	output();
	}
likerat *= 2.0;
infomem = (double *) dap_malloc(sizeof(double) * sparam * sparam, "");
info = (double **) dap_malloc(sizeof(double *) * sparam, "");
for (p = 0; p < sparam; p++)
	info[p] = infomem + p * sparam;
h = 0.0001;
halfh = h / 2.0;
for (p1 = 0; p1 < sparam; p1++)
	{
	for (p = 0; p < sparam; p++)
		xch[p] = x[p];
	lpm = loglike(xch);
	xch[p1] += h;
	lpp = loglike(xch);
	xch[p1] = x[p1] - h;
	lmm = loglike(xch);
	info[p1][p1] = -(lpp - 2.0 * lpm + lmm) / (h * h);
	}
for (p1 = 0; p1 < sparam; p1++)
	for (p2 = 0; p2 < p1; p2++)
		{
		for (p = 0; p < sparam; p++)
			xch[p] = x[p];
		xch[p1] += halfh;
		xch[p2] += halfh;
		lpp = loglike(xch);
		xch[p1] = x[p1] - halfh;
		lmp = loglike(xch);
		xch[p2] = x[p2] - halfh;
		lmm = loglike(xch);
		xch[p1] = x[p1] + halfh;
		lpm = loglike(xch);
		info[p1][p2] = -(lpp - lpm - lmp + lmm) / (h * h);
		info[p2][p1] = info[p1][p2];
		}
if (!dap_invert(info, sparam))
	{
	fputs("(categ1) covariance matrix is singular\n", dap_err);
	exit(1);
	}
fputs("Maximum likelihood estimation\n\n", dap_lst);
fprintf(dap_lst, "Cell count: %s\n", dap_obs[0].do_nam[varv[0]]);
fputs("Class and aux variables:", dap_lst);
for (v = 1; v < nvar; v++)
	fprintf(dap_lst, " %s", dap_obs[0].do_nam[varv[v]]);
putc('\n', dap_lst);
fputs("\nStatistic              df      Prob\n", dap_lst);
fprintf(dap_lst, "G2[Model]   = %6.2f  %3d    %.4f\n",
	likerat, ncell - sparam,
	ceil(10000.0 * probchisq(likerat, ncell - sparam)) / 10000.0);
if (selred)
	{
	fprintf(dap_lst, "G2[Reduced] = %6.2f  %3d    %.4f\n",
		likered, ncell - sparamr,
		ceil(10000.0 * probchisq(likered, ncell - sparamr)) / 10000.0);
	fprintf(dap_lst, "G2[Diff]    = %6.2f  %3d    %.4f\n",
		likered - likerat, sparam - sparamr,
		ceil(10000.0 * probchisq(likered - likerat, sparam - sparamr)) / 10000.0);
	}
fprintf(dap_lst, "X2[Model]   = %6.2f  %3d    %.4f\n",
	pearson, ncell - sparam,
	ceil(10000.0 * probchisq(pearson, ncell - sparam)) / 10000.0);
putc('\n', dap_lst);
fputs("    Estimate          ASE  Model  Parameter\n", dap_lst);
for (c = 0; select[c] == ' '; c++)
	;
for (p = 0, s = 0; p < nparam; p++)
	{
	fprintf(dap_lst, "%12g ", allparam[p]);
	if (sel[p] == '!')
		fputs("              ", dap_lst);
	else
		{
		fprintf(dap_lst, "%12g  ", sqrt(info[s][s]));
		s++;
		}
	switch (selcodes[p])
		{
	case '1':
		fprintf(dap_lst, "  *    ");
		break;
	case '?':
		fprintf(dap_lst, "  ?    ");
		for (++c; select[c] == ' '; c++)
			;
		break;
	default:
		fprintf(dap_lst, "       ");
		for (++c; select[c] == ' '; c++)
			;
		break;
		}
	while (select[c] && select[c] != ' ')
		{
		putc(select[c], dap_lst);
		c++;
		}
	while (select[c] == ' ')
		c++;
	putc('\n', dap_lst);
	}
dap_free(x);
dap_free(xch);
dap_free(infomem);
dap_free(info);
}

void categ(char *dataset, char *varlist, double (*expect)(),
		double *param, char *select, char *part, char *trace)
{
int p;
char *catset;
int *varv;
int *partv;
int nvar;
int npart;
int more;
int nparam;	/* number of parameters identified in select */
char *selcodes;	/* string to hold '1', '!', '?' codes */
double *tabmem;
int v;
int ncell;
int s;

varv = (int *) dap_malloc(sizeof(int) * dap_maxvar, "dap_maxvar");
partv = (int *) dap_malloc(sizeof(int) * dap_maxvar, "dap_maxvar");
catset = dap_malloc(strlen(dataset) + 5, "");
strcpy(catset, dataset);
strcat(catset, ".cat");
inset(dataset);
outset(catset, "");
nvar = dap_list(varlist, varv, dap_maxvar);
for (v = 1; v < nvar; v++)
	{
	if (dap_obs[0].do_len[varv[v]] != DBL)
		{
fprintf(dap_err, "categ: classification or auxiliary variable not of type double: %s\n",
		dap_obs[0].do_nam[varv[v]]);
		exit(1);
		}
	}
npart = dap_list(part, partv, dap_maxvar);
tabmem = (double *) dap_malloc(sizeof(double) * nvar * dap_maxcell, "");
tab = (double **) dap_malloc (sizeof(double *) * dap_maxcell, "");
for (ncell = 0; ncell < dap_maxcell; ncell++)
	tab[ncell] = tabmem + ncell * nvar;
selcodes = dap_malloc(strlen(select) + 1, "");
nparam = selparse(select, selcodes);
allparam = (double *) dap_malloc(sizeof(double) * nparam, "");
for (p = 0; p < nparam; p++)
	allparam[p] = param[p];
sel = selcodes;
if (index(selcodes, '?'))
	{
	selred = dap_malloc(nparam + 1, "");
	for (s = 0; selcodes[s]; s++)
		{
		if (selcodes[s] == '?')
			selred[s] = '!';
		else
			selred[s] = selcodes[s];
		}
	selred[s] = '\0';
	}
else
	selred = NULL;
for (ncell = 0, more = 1; more; )
	{
	more = step();
	if (dap_newpart(partv, npart))
		{
		dap_swap();
		dap_head(partv, npart);
		categ1(tab, ncell, varv, nvar, expect, param,
						select, selcodes, trace);
		dap_swap();
		ncell = 0;
		}
	if (ncell < dap_maxcell)
		{
		for (v = 0; v < nvar; v++)
			tab[ncell][v] = dap_obs[0].do_dbl[varv[v]];
		ncell++;
		}
	else
		{
		fputs("categ: too many cells\n", dap_err);
		exit(1);
		}
	}
if (selred)
	{
	dap_free(selred);
	selred = NULL;
	}
dap_free(varv);
dap_free(partv);
dap_free(catset);
dap_free(tabmem);
dap_free(tab);
dap_free(allparam);
dap_free(selcodes);
}
