/* dap5.c -- regression and nonparametric statistics */

/*  Copyright (C) 2001 Free Software Foundation, Inc.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include "dap_make.h"
#include "externs.h"

extern dataobs dap_obs[];
extern FILE *dap_lst;
extern FILE *dap_err;
extern int dap_ono;
extern char *dap_dapname;

static int matchmark(int *markv, int *xmarkv, int nmark, double level)
{
int i;
int diff;

if (xmarkv[0] < 0)
	return !dap_newpart(markv, nmark);
for (diff = 0, i = 0; !diff && i < nmark; i++)
	{
	if (dap_obs[0].do_len[markv[i]] > 0)
		diff = strcmp(dap_obs[0].do_str[markv[i]], dap_obs[1].do_str[xmarkv[i]]);
	else if (dap_obs[0].do_len[markv[i]] == INT)
		diff = (dap_obs[0].do_int[markv[i]] != dap_obs[1].do_int[xmarkv[i]]);
	else
		diff = (dap_obs[0].do_dbl[markv[i]] != dap_obs[1].do_dbl[xmarkv[i]]);
	}
return !diff;
}

void linreg1(double **xymat, int *varv,
		int nx0, int nx, int ny, int nobs, int *xvarv,
		int *markv, int *xmarkv, int nmark, double level)
{
double *invmem;
double **inv;
int r, c;
int rr, cc;
double pivot;
double tmp, tmp2;
int typen;
double dnobs;
double *rss0, *rss1, *rss;
double *f, *fch;
int df;
double ddf;
double xi, xj;
static double tpt;
int yn;
int i, j;
double *pred;
double *sepred;

dap_swap();
if ((typen = dap_varnum("_type_")) < 0)
	{
	fprintf(dap_err, "(linreg1) Missing _type_ variable.\n");
	exit(1);
	}
invmem = (double *) dap_malloc(sizeof(double) * nx * nx, "");
inv = (double **) dap_malloc(sizeof(double *) * nx, "");
for (r = 0; r < nx; r++)
	inv[r] = invmem + r * nx;
rss0 = (double *) dap_malloc(sizeof(double) * ny, "");
rss1 = (double *) dap_malloc(sizeof(double) * ny, "");
rss = (double *) dap_malloc(sizeof(double) * ny, "");
f = (double *) dap_malloc(sizeof(double) * ny, "");
fch = (double *) dap_malloc(sizeof(double) * ny, "");
pred = (double *) dap_malloc(sizeof(double) * ny, "");
sepred = (double *) dap_malloc(sizeof(double) * ny, "");
dnobs = (double) nobs;
for (c = 1; c < nx + ny; c++)
	xymat[0][c] /= dnobs;
for (r = 1; r < nx; r++)
	for (c = 1; c < nx; c++)
		{
		if (r == c)
			inv[r][c] = 1.0;
		else
			inv[r][c] = 0.0;
		}
for (c = 1; c < nx; c++)
	{
	if (c == 1)
		{
		for (cc = 0; cc < ny; cc++)
			rss0[cc] = xymat[nx + cc][nx + cc];
		}
	if (c == nx0)
		{
		for (cc = 0; cc < ny; cc++)
			rss1[cc] = xymat[nx + cc][nx + cc];
		}
	pivot = xymat[c][c];
	if (pivot != 0.0)
		{
		for (rr = c + 1; rr < nx + ny; rr++)
			{
			tmp = xymat[rr][c] / pivot;
			xymat[rr][c] = 0.0;
			for (cc = c + 1; cc < nx + ny; cc++)
				{
				if (rr < nx || cc < nx || rr == cc)
					{
					xymat[rr][cc] -= tmp * xymat[c][cc];
					if (fabs(xymat[rr][cc]) < dap_tol * pivot)
						xymat[rr][cc] = 0.0;
					}
				}
			if (rr < nx)
				{
				for (cc = 1; cc < nx; cc++)
					{
					inv[rr][cc] -= tmp * inv[c][cc];
					if (fabs(inv[rr][cc]) < dap_tol * pivot)
						inv[rr][cc] = 0.0;
					}
				}
			}
		}
	else
		{
		fprintf(dap_err, "(linreg1) X'X matrix is singular.\n");
		exit(1);
		}
	}
for (c = nx - 1; c > 0; --c)
	{
	for (r = c - 1; r > 0; --r)
		{
		tmp = xymat[r][c] / xymat[c][c];
		for (cc = c + 1; cc < nx + ny; cc++)
			xymat[r][cc] -= tmp * xymat[c][cc];
		for (cc = 0; cc < nx; cc++)
			inv[r][cc] -= tmp * inv[c][cc];
		}
	for (cc = c + 1; cc < nx + ny; cc++)
		xymat[c][cc] /= xymat[c][c];
	for (cc = 0; cc < nx; cc++)
		inv[c][cc] /= xymat[c][c];
	}
df = nobs - nx;
ddf = (double) df;
for (c = 0; c < ny; c++)
	{
	rss[c] = xymat[nx + c][nx + c];
	f[c] = (rss0[c] - rss[c]) / rss[c] * ddf / ((double) nx - 1);
	fch[c] = (rss1[c] - rss[c]) / rss[c] * ddf / ((double) nx - nx0);
	}
for (r = 1; r < nx; r++)
	for (c = 1; c < nx; c++)
		xymat[r][c] = inv[r][c];
for (r = 1; r < nx; r++)
	for (c = 1; c < nx; c++)
		xymat[r][0] -= xymat[r][c] * xymat[0][c];
xymat[0][0] = 1.0 / dnobs;
for (c = 1; c < nx; c++)
	xymat[0][0] -= xymat[0][c] * xymat[c][0];
for (c = 0; c < ny; c++)
	for (cc = 1; cc < nx; cc++)
		xymat[0][nx + c] -= xymat[0][cc] * xymat[cc][nx + c];
for (c = 1; c < nx; c++)
	xymat[0][c] = xymat[c][0];
fprintf(dap_lst, "Reduced | full model regressors:");
for (r = 0; r < nx0; r++)
	fprintf(dap_lst, " %s", dap_obs[0].do_nam[varv[r]]);
fprintf(dap_lst, " |");
while (r < nx)
	fprintf(dap_lst, " %s", dap_obs[0].do_nam[varv[r++]]);
putc('\n', dap_lst);
fprintf(dap_lst, "Number of observations = %d\n", nobs);
for (c = 0; c < ny; c++)
	{
	fprintf(dap_lst, "\nResponse: %s\n", dap_obs[0].do_nam[varv[nx + c]]);
	fprintf(dap_lst,
	"   F0(%d, %d) = %.6g, Prob[F > F0] = %.5f\n   R-sq = %.6g, Adj R-sq = %.6g\n",
		nx - 1, nobs - nx, f[c],
		0.00001 * ceil(100000.0 * probf(f[c], nx - 1, nobs - nx)),
		1.0 - rss[c] / rss0[c],
		1.0 - rss[c] * ((double) (nobs - 1)) / (rss0[c] * ddf));
	if (nx0 > 1)
		fprintf(dap_lst, "   F-change(%d, %d) = %.6g, Prob[F > F-change] = %.5f\n",
			nx - nx0, nobs - nx, fch[c],
			0.00001 * ceil(100000.0 * probf(fch[c], nx - nx0, nobs - nx)));
	fprintf(dap_lst,
	"\n   Parameter           Estimate    Std Error   T0[%6d]  Prob[|T|>|T0|]\n",
		nobs - nx);
	for (r = 0; r < nx; r++)
		{
		tmp = sqrt(rss[c] / ddf * xymat[r][r]);
		tmp2 = xymat[r][nx + c] / tmp;
		fprintf(dap_lst, "   %-15s %12.6g %12.6g %12.6g  %14.5f\n",
			dap_obs[0].do_nam[varv[r]],
			xymat[r][nx + c], tmp,
			tmp2,
			0.00001 * ceil(200000.0 * probt(fabs(tmp2), nobs - nx)));
		}
	}
if (level < 1.0)
	tpt = tpoint(0.5 * (1.0 - level), nobs - nx);
else
	tpt = 0.0;
dap_obs[0].do_dbl[varv[0]] = 1.0;
if (xvarv[0] < 0)
	{
	dap_rewind();
	step();
	}
while (matchmark(markv, xmarkv, nmark, level))
	{
	dap_ono = 0;
	if (xvarv[0] >= 0)
		{
		for (i = 1; i < nx; i++)
			dap_obs[0].do_dbl[varv[i]] = dap_obs[1].do_dbl[xvarv[i - 1]];
		}
	else
		{
		strcpy(dap_obs[0].do_str[typen], "OBS");
		output();
		}
	for (yn = 0; yn < ny; yn++)
		{
		for (pred[yn] = 0.0, i = 0; i < nx; i++)
			pred[yn] += xymat[i][nx + yn] * dap_obs[0].do_dbl[varv[i]];
		for (sepred[yn] = 0.0, i = 0; i < nx; i++)
			{
			xi = dap_obs[0].do_dbl[varv[i]];
			for (j = 0; j < nx; j++)
				{
				xj = dap_obs[0].do_dbl[varv[j]];
				sepred[yn] += xi * rss[yn] / ddf * xymat[i][j] * xj;
				}
			}
		}
	strcpy(dap_obs[0].do_str[typen], "PRED");
	for (yn = 0; yn < ny; yn++)
		dap_obs[0].do_dbl[varv[nx + yn]] = pred[yn];
	output();
	if (tpt != 0.0)
		{
		strcpy(dap_obs[0].do_str[typen], "LOWER");
		for (yn = 0; yn < ny; yn++)
			dap_obs[0].do_dbl[varv[nx + yn]] = pred[yn] - tpt * sqrt(sepred[yn]);
		output();
		strcpy(dap_obs[0].do_str[typen], "UPPER");
		for (yn = 0; yn < ny; yn++)
			dap_obs[0].do_dbl[varv[nx + yn]] = pred[yn] + tpt * sqrt(sepred[yn]);
		output();
		}
	if (xvarv[0] >= 0)
		dap_ono = 1;
	dap_mark();
	if (!step())
		break;
	}
dap_ono = 0;
if (xvarv[0] >= 0)
	dap_swap();
dap_free(invmem);
dap_free(inv);
dap_free(rss0);
dap_free(rss1);
dap_free(rss);
dap_free(f);
dap_free(fch);
dap_free(pred);
dap_free(sepred);
}

void linreg(char *fname, char *ylist, char *x0list, char *x1list,
				char *marks, char *xname, double level)
{
char *regname;
int *varv;
int *xvarv;
int ny;
int nx0, nx1;
int nx;
int nvar;
int nxx;
int *markv;
int *xmarkv;
int nmark;
int v, w;
double tmp;
int nobs;
double dnobs;
double *xymem;
double **xymat;
int more;

if (!fname)
	{
	fputs("(linreg) No dataset name given.\n", dap_err);
	exit(1);
	}
varv = (int *) dap_malloc(sizeof(int) * dap_maxvar, "dap_maxvar");
xvarv = (int *) dap_malloc(sizeof(int) * dap_maxvar, "dap_maxvar");
markv = (int *) dap_malloc(sizeof(int) * dap_maxvar, "dap_maxvar");
xmarkv = (int *) dap_malloc(sizeof(int) * dap_maxvar, "dap_maxvar");
regname = dap_malloc(strlen(fname) + 5, "");
dap_suffix(regname, fname, ".reg");
inset(fname);
dap_vd("_intercept_ -1", 0);
nx0 = dap_list("_intercept_", varv, dap_maxvar);
nx0 += dap_list(x0list, varv + 1, dap_maxvar);
nx1 = dap_list(x1list, varv + nx0, dap_maxvar);
nx = nx0 + nx1;
ny = dap_list(ylist, varv + nx, dap_maxvar);
nvar = nx + ny;
xymem = (double *) dap_malloc(sizeof(double) * nvar * nvar, "");
xymat = (double **) dap_malloc(sizeof(double *) * nvar, "");
for (v = 0; v < nvar; v++)
	xymat[v] = xymem + v * nvar;
nmark = dap_list(marks, markv, dap_maxvar);
dap_ono = 1;
if (xname && xname[0])
	{
	inset(xname);
	nxx = dap_list(x0list, xvarv, dap_maxvar);
	nxx += dap_list(x1list, xvarv + nxx, dap_maxvar);
	if (nxx != nx - 1)
		{
		fprintf(dap_err,
		"(linreg) %s and %s have different numbers (%d and %d) of x-variables.\n",
			fname, xname, nx - 1, nxx);
		exit(1);
		}
	if (nmark)
		{
		for (v = 0; v < nmark; v++)
			{
			if ((xmarkv[v] = dap_varnum(dap_obs[0].do_nam[markv[v]])) < 0)
				{
				fprintf(dap_err,
				"(linreg) Mark variable %s in %s not in %s.\n",
					dap_obs[0].do_nam[markv[v]], fname, xname);
				exit(1);
				}
			}
		}
	if (!step())
		{
		fprintf(dap_err, "(linreg) No data in %s.\n", xname);
		exit(1);
		}
	}
else
	{
	xvarv[0] = -1;
	xmarkv[0] = -1;
	}
dap_ono = 0;
for (w = 0; w < nvar; w++)
	{
	if (dap_obs[0].do_len[varv[w]] != DBL)
		{
		fprintf(dap_err, "(linreg) Variable %s not double.\n",
					dap_obs[0].do_nam[varv[w]]);
		exit(1);
		}
	for (v = 0; v < nvar; v++)
		xymat[v][w] = 0.0;
	}
outset(regname, "");
for (nobs = 0, dap_mark(), more = 1; more; nobs++)
	{
	more = step();
	if (dap_newpart(markv, nmark))
		{
		dap_swap();
		dap_head(markv, nmark);
		dap_swap();
		linreg1(xymat, varv, nx0, nx, ny, nobs,
				xvarv, markv, xmarkv, nmark, level);
		for (w = 0; w < nvar; w++)
			{
			for (v = 0; v < nvar; v++)
				xymat[v][w] = 0.0;
			}
		nobs = 0;
		}
	if (nobs)
		{
		dnobs = (double) nobs;
		for (v = 1; v < nvar; v++)
			{
			tmp = xymat[0][v] -
				dnobs * dap_obs[0].do_dbl[varv[v]];
			for (w = 1; w < nvar; w++)
				{
				if (v < nx || w < nx || v == w)
					xymat[v][w] += tmp *
						(xymat[0][w] - dnobs *
						dap_obs[0].do_dbl[varv[w]]) /
						(dnobs * (dnobs + 1.0));
				}
			}
		}
	for (w = 1; w < nvar; w++)
		xymat[0][w] += dap_obs[0].do_dbl[varv[w]];
	}
dap_free(regname);
dap_free(varv);
dap_free(xvarv);
dap_free(markv);
dap_free(xmarkv);
dap_free(xymem);
dap_free(xymat);
}

void dap_parsey(char *yspec, int *varv)
{
int l;
int i;
char *vname;
int vn;
int ntrials;

vname = dap_malloc(dap_namelen + 1, "dap_namelen");
for (l = 0; yspec[l] == ' '; l++)
	;
for (i = 0; yspec[l + i] && yspec[l + i] != ' ' && yspec[l + i] != '/'; i++)
	{
	if (i < dap_namelen)
		vname[i] = yspec[l + i];
	else
		{
		fprintf(dap_err, "(parsey) Variable name too long: %s\n", yspec + l);
		exit(1);
		}
	}
vname[i] = '\0';
l += i;
if ((vn = dap_varnum(vname)) < 0)
	{
	fprintf(dap_err, "(parsey) Unknown variable: %s\n", vname);
	exit(1);
	}
if (dap_obs[dap_ono].do_len[vn] != DBL)
	{
	fprintf(dap_err, "(parsey) Events variable not double: %s\n", vname);
	exit(1);
	}
varv[0] = vn;
while (yspec[l] == ' ')
	l++;
if (yspec[l] == '/')
	{
	for (l++; yspec[l] == ' '; l++)
		;
	for (i = 0; yspec[l + i] && yspec[l + 1] != ' '; i++)
		{
		if (i < dap_namelen)
			vname[i] = yspec[l + i];
		else
			{
			fprintf(dap_err, "(parsey) Variable name too long: %s\n", yspec + l);
			exit(1);
			}
		}
	vname[i] = '\0';
	for (i = 0, ntrials = 0; '0' <= vname[i] && vname[i] <= '9'; i++)
		ntrials = 10 * ntrials + vname[i] - '0';
	if (i)
		{
		if (vname[i])
			{
			fprintf(dap_err, "(parsey) Invalid number of trials: %s\n", vname);
			exit(1);
			}
		varv[1] = -ntrials;
		}
	else
		{
		if ((vn = dap_varnum(vname)) < 0)
			{
			fprintf(dap_err, "(parsey) Unknown variable: %s\n", vname);
			exit(1);
			}
		if (dap_obs[dap_ono].do_len[vn] != DBL)
			{
			fprintf(dap_err, "(parsey) Trials variable not double: %s\n", vname);
			exit(1);
			}
		varv[1] = vn;
		}
	}
else
	{
	fprintf(dap_err, "(parsey) Expected / in yspec at: %s\n", yspec + l);
	exit(1);
	}
dap_free(vname);
}

static double vlen(double *v, int nv)
{
int i;
double len;

for (len = 0.0, i = 0; i < nv; i++)
	len += v[i] * v[i];
return sqrt(len);
}

static double vdiff(double *v0, double *v1, int nv)
{
int i;
double tmp;
double diff;

for (diff = 0.0, i = 0; i < nv; i++)
	{
	tmp = v0[i] - v1[i];
	diff += tmp * tmp;
	}
return sqrt(diff);
}

int dap_invert(double **a, int nrc)
{
double *invmem;
double **inv;
int r, c;
int cc;
double tmp;
double mult;

invmem = (double *) dap_malloc(sizeof(double) * nrc * nrc, "");
inv = (double **) dap_malloc(sizeof(double *) * nrc, "");
for (r = 0; r < nrc; r++)
	{
	inv[r] = invmem + r * nrc;
	for (c = 0; c < nrc; c++)
		{
		if (r == c)
			inv[r][c] = 1.0;
		else
			inv[r][c] = 0.0;
		}
	}
for (c = 0; c < nrc; c++)
	{
	if (a[c][c] != 0.0)
		{
		tmp = a[c][c];
		for (r = c + 1; r < nrc; r++)
			{
			mult = a[r][c] / tmp;
			a[r][c] = 0.0;
			for (cc = c + 1; cc < nrc; cc++)
				{
				a[r][cc] -= mult * a[c][cc];
				if (fabs(a[r][cc]) < dap_tol * tmp)
					a[r][cc] = 0.0;
				}
			for (cc = 0; cc < nrc; cc++)
				{
				inv[r][cc] -= mult * inv[c][cc];
				if (fabs(inv[r][cc]) < dap_tol * tmp)
					inv[r][cc] = 0.0;
				}
			}
		}
	else
		return 0;
	}
for (c = nrc - 1; c >= 0; --c)
	{
	tmp = a[c][c];
	for (cc = c + 1; cc < nrc; cc++)
		a[c][cc] /= tmp;
	for (cc = 0; cc < nrc; cc++)
		inv[c][cc] /= tmp;
	for (r = c - 1; r >= 0; --r)
		{
		tmp = a[r][c];
		for (cc = c; cc < nrc; cc++)
			a[r][cc] -= tmp * a[c][cc];
		for (cc = 0; cc < nrc; cc++)
			inv[r][cc] -= tmp * inv[c][cc];
		}
	}
for (r = 0; r < nrc; r++)
	for (c = 0; c < nrc; c++)
		a[r][c] = inv[r][c];
dap_free(invmem);
dap_free(inv);
return 1;
}

static double irls(double **x, double **y, double *pr, double *beta0,
			double **cov, int nx, int nobs)
{
int i, j;
int n;
double *beta1;
double *v;
double *step;
double loglike0, loglike1;
int niter;
double maxv;
double maxcov;
double tmp;

beta1 = (double *) dap_malloc(sizeof(double) * nx, "");
v = (double *) dap_malloc(sizeof(double) * nx, "");
step = (double *) dap_malloc(sizeof(double) * nx, "");
for (i = 0; i < nx; i++)
	beta1[i] = 0.0;
for (loglike1 = 0.0, n = 0; n < nobs; n++)
	{
	pr[n] = 0.5;
	loglike1 += y[1][n];
	}
loglike1 *= log(0.5);
niter = 0;
do	{
	for (i = 0; i < nx; i++)
		beta0[i] = beta1[i];
	loglike0 = loglike1;
	for (i = 0, maxv = 0.0, maxcov = 0.0; i < nx; i++)
		{
		for (v[i] = 0.0, n = 0; n < nobs; n++)
			v[i] += x[i][n] * (y[0][n] - y[1][n] * pr[n]);
		if ((tmp = fabs(v[i])) > maxv)
			maxv = tmp;
		for (j = 0; j < nx; j++)
			{
			cov[i][j] = 0.0;
			for (n = 0; n < nobs; n++)
				cov[i][j] += y[1][n] * pr[n] * (1.0 - pr[n]) *
								x[i][n] * x[j][n];
			if ((tmp = fabs(cov[i][j])) > maxcov)
				maxcov = tmp;
			}
		}
	for (i = 0; i < nx; i++)
		{
		if (fabs(v[i]) < dap_ctol * maxv)
			v[i] = 0.0;
		for (j = 0; j < nx; j++)
			{
			if (fabs(cov[i][j]) < dap_ctol * maxcov)
				cov[i][j] = 0.0;
			}
		}
	if (!dap_invert(cov, nx))
		{
		fputs("(irls) X'DX matrix is singular\n", dap_err);
		exit(1);
		}
	for (i = 0; i < nx; i++)
		{
		step[i] = 0.0;
		for (j = 0; j < nx; j++)
			step[i] += cov[i][j] * v[j];
		}
	for ( ; ; )
		{
		for (i = 0; i < nx; i++)
			beta1[i] = beta0[i] + step[i];
		for (n = 0, loglike1 = 0.0; n < nobs; n++)
			{
			for (pr[n] = 0.0, i = 0; i < nx; i++)
				pr[n] += x[i][n] * beta1[i];
			pr[n] = 1.0 / (1.0 + exp(-pr[n]));
			loglike1 += y[0][n] * log(pr[n]) +
					(y[1][n] - y[0][n]) * log(1.0 - pr[n]);
			}
		if (loglike1 >= loglike0)
			break;
		else
			{
			for (i = 0; i < nx; i++)
				step[i] *= 0.5;
			}
		}
	} while (++niter <= dap_maxiter && vdiff(beta1, beta0, nx) > dap_ctol * vlen(beta0, nx));
if (niter > dap_maxiter)
	fprintf(dap_lst, "Failed to converge after %d iterations.\n", dap_maxiter);
dap_free(beta1);
dap_free(v);
dap_free(step);
return loglike0;
}

void logreg1(double **y, double **x, int nx0, int nx, int nobs,
	int *varv, int *xvarv, int *markv, int *xmarkv, int nmark,
	double level)
{
int typen;
double *covmem;
double **cov;
double *pr;
double *beta;
int i, j;
double tmp, tmp2;
double loglike0, loglike1;
static double npt;
double xi, xj;
double logit;
double selogit;
int ntrials;

covmem = (double *) dap_malloc(sizeof(double) * dap_maxvar * dap_maxvar, "");
cov = (double **) dap_malloc(sizeof(double *) * dap_maxvar, "");
for (i = 0; i < dap_maxvar; i++)
	cov[i] = covmem + i * dap_maxvar;
beta = (double *) dap_malloc(sizeof(double) * nx, "");
dap_swap();
if ((typen = dap_varnum("_type_")) < 0)
	{
	fprintf(dap_err, "(logreg1) Missing _type_ variable.\n");
	exit(1);
	}
pr = (double *) dap_malloc(nobs * sizeof(double), "");
fprintf(dap_lst, "Reduced | full model regressors:");
for (i = 0; i < nx0; i++)
        fprintf(dap_lst, " %s", dap_obs[0].do_nam[varv[i]]);
fprintf(dap_lst, " |");
while (i < nx)
        fprintf(dap_lst, " %s", dap_obs[0].do_nam[varv[i++]]);
putc('\n', dap_lst);
fprintf(dap_lst, "Number of observations = %d\n", nobs);
for (i =  0, ntrials = 0; i < nobs; i++)
	ntrials += (int) rint(y[1][i]);
fprintf(dap_lst, "Number of trials = %d\n", ntrials);
if (varv[nx + 1] >= 0)
	fprintf(dap_lst, "Events / Trials: %s / %s\n",
			dap_obs[0].do_nam[varv[nx]],
			dap_obs[0].do_nam[varv[nx + 1]]);
else
	fprintf(dap_lst, "Events / Trials: %s / %d\n",
			dap_obs[0].do_nam[varv[nx]], -varv[nx + 1]);
loglike0 = irls(x, y, pr, beta, cov, nx0, nobs);
loglike1 = irls(x, y, pr, beta, cov, nx, nobs);
dap_free(pr);
fprintf(dap_lst, "-2 (Lred - Lfull) = %.6g = ChiSq0[%d]\n",
		tmp = -2.0 * (loglike0 - loglike1), nx - nx0);
fprintf(dap_lst, "Prob[ChiSq > ChiSq0] = %.5f\n\n",
	0.00001 * ceil(100000.0 * probchisq(fabs(tmp), nx - nx0)));
fputs(
"  Parameter           Estimate    Std Error   Wald ChiSq  Prob[ChiSq>Wald ChiSq]\n",
	dap_lst);
for (i = 0; i < nx; i++)
	{
	tmp = sqrt(cov[i][i]);
	tmp2 = beta[i] * beta[i] / cov[i][i];
	fprintf(dap_lst, "  %-15s %12.6g %12.6g %12.6g  %14.5f\n",
		dap_obs[0].do_nam[varv[i]],
		beta[i], tmp, tmp2,
		0.00001 * ceil(100000.0 * probchisq(fabs(tmp2), 1)));
	}
if (fabs(level) < 1.0)
	npt = -zpoint(0.5 * (1.0 - level));
else
	npt = 0.0;
dap_obs[0].do_dbl[varv[0]] = 1.0;
if (xvarv[0] < 0)
	{
        dap_rewind();
	step();
	}
while (matchmark(markv, xmarkv, nmark, level))
        {
	dap_ono = 0;
	if (xvarv[0] >= 0)
		{
		for (i = 1; i < nx; i++)
			dap_obs[0].do_dbl[varv[i]] = dap_obs[1].do_dbl[xvarv[i - 1]];
		}
	for (logit = 0.0, i = 0; i < nx; i++)
		logit += beta[i] * dap_obs[0].do_dbl[varv[i]];
	for (selogit = 0.0, i = 0; i < nx; i++)
		{
		xi = dap_obs[0].do_dbl[varv[i]];
		for (j = 0; j < nx; j++)
			{
			xj = dap_obs[0].do_dbl[varv[j]];
			selogit += xi * cov[i][j] * xj;
			}
		}
	selogit = sqrt(selogit);
	strcpy(dap_obs[0].do_str[typen], "PRED");
	dap_obs[0].do_dbl[varv[nx]] = 1.0 / (1.0 + exp(-logit));
        output();
	if (npt != 0.0)
		{
		strcpy(dap_obs[0].do_str[typen], "LOWER");
		dap_obs[0].do_dbl[varv[nx]] =
				1.0 / (1.0 + exp(-logit + npt * selogit));
		output();
		strcpy(dap_obs[0].do_str[typen], "UPPER");
		dap_obs[0].do_dbl[varv[nx]] =
				1.0 / (1.0 + exp(-logit - npt * selogit));
		output();
		}
	if (xvarv[0] >= 0)
		dap_ono = 1;
	dap_mark();
        if (!step())
                break;
	}
dap_ono = 0;
if (xvarv[0] >= 0)
	dap_swap();
dap_free(covmem);
dap_free(cov);
dap_free(beta);
}

void logreg(char *fname, char *yspec, char *x0list, char *x1list,
			char *marks, char *xname, double level)
{
char *regname;
int *varv;
int *xvarv;
int nx0, nx1;
int nx;
int nxx;
int *markv;
int *xmarkv;
int nmark;
double *xmem;
double **x;
double *ymem;
double *y[2];
int v;
int nobs;
int more;

if (!fname)
	{
	fputs("(logreg) No dataset name given.\n", dap_err);
	exit(1);
	}
varv = (int *) dap_malloc(sizeof(int) * dap_maxvar, "dap_maxvar");
xvarv = (int *) dap_malloc(sizeof(int) * dap_maxvar, "dap_maxvar");
markv = (int *) dap_malloc(sizeof(int) * dap_maxvar, "dap_maxvar");
xmarkv = (int *) dap_malloc(sizeof(int) * dap_maxvar, "dap_maxvar");
regname = dap_malloc(strlen(fname) + 5, "");
dap_suffix(regname, fname, ".lgr");
inset(fname);
dap_vd("_intercept_ -1", 0);
nx0 = dap_list("_intercept_", varv, dap_maxvar);
nx0 += dap_list(x0list, varv + 1, dap_maxvar);
nx1 = dap_list(x1list, varv + nx0, dap_maxvar);
nx = nx0 + nx1;
dap_parsey(yspec, varv + nx);
xmem = (double *) dap_malloc(sizeof(double) * nx * dap_maxval, "dap_maxval");
x = (double **) dap_malloc(sizeof(double *) * nx, "");
for (v = 0; v < nx; v++)
        x[v] = xmem + v * dap_maxval;
ymem = (double *) dap_malloc(sizeof(double) * 2 * dap_maxval, "dap_maxval");
y[0] = ymem;
y[1] = ymem + dap_maxval;
nmark = dap_list(marks, markv, dap_maxvar);
dap_ono = 1;
if (xname && xname[0])
	{
        inset(xname);
	nxx = dap_list(x0list, xvarv, dap_maxvar);
	nxx += dap_list(x1list, xvarv + nxx, dap_maxvar);
	if (nxx != nx - 1)
		{
		fprintf(dap_err,
		"(logreg) %s and %s have different numbers (%d and %d) of x-variables.\n",
			fname, xname, nx - 1, nxx);
		exit(1);
		}
	if (nmark)
		{
		for (v = 0; v < nmark; v++)
			{
			if ((xmarkv[v] = dap_varnum(dap_obs[0].do_nam[markv[v]])) < 0)
				{
				fprintf(dap_err,
				"(logreg) Mark variable %s in %s not in %s.\n",
					dap_obs[0].do_nam[markv[v]], fname, xname);
				exit(1);
				}
			}
		}
	if (!step())
		{
		fprintf(dap_err, "(logreg) No data in %s.\n",
				(xname[0] ? xname : fname));
		exit(1);
		}
	}
else
	{
	xvarv[0] = -1;
	xmarkv[0] = -1;
	}
dap_ono = 0;
outset(regname, "");
for (nobs = 0, dap_mark(), more = 1; more; nobs++)
	{
	more = step();
	if (dap_newpart(markv, nmark))
		{
		dap_swap();
		dap_head(markv, nmark);
		dap_swap();
		logreg1(y, x, nx0, nx, nobs,
				varv, xvarv, markv, xmarkv, nmark, level);
		nobs = 0;
		}
	if (nobs < dap_maxval)
		{
		x[0][nobs] = 1.0;
		for (v = 1; v < nx; v++)
			x[v][nobs] = dap_obs[dap_ono].do_dbl[varv[v]];
		y[0][nobs] = dap_obs[dap_ono].do_dbl[varv[nx]];
		if (varv[nx + 1] >= 0)
			y[1][nobs] = dap_obs[dap_ono].do_dbl[varv[nx + 1]];
		else
			y[1][nobs] = -(double) varv[nx + 1];
		}
	else
		{
		fputs("(logreg) Too many data.\n", dap_err);
		exit(1);
		}
	}
dap_free(regname);
dap_free(varv);
dap_free(xvarv);
dap_free(markv);
dap_free(xmarkv);
dap_free(xmem);
dap_free(x);
dap_free(ymem);
}

typedef struct {
int val_class;
double val_val;
} value;

static int valcmp1(value *v1, value *v2)
{
if (v1->val_val < v2->val_val)
	return -1;
if (v1->val_val > v2->val_val)
	return 1;
return 0;
}

static double probkol(double d, double n)
{
double lambda;
double l;
double k;
double term;
int sign;

lambda = d * sqrt(n);
lambda *= -2.0 * lambda;
for (k = 1.0, l = 0.0, sign = 1;
		(term = exp(k * k * lambda)) > dap_ktol; k += 1.0, sign = -sign)
	l += (sign > 0 ? term : -term);
return 2.0 * l;
}

static int (*pvalcmp1)() = &valcmp1;

static void nonpar1(value *val, int nval, char **level,
				int nlevels, int *varv, int nvar)
{
int rank, ntied, tied;
int tottied;
double tiecorr;
double drank;
double dn;
double stat0, stat;
double prob;
int *levnobs;
int minnobs;
int *rank1;
int r;
int levn;
double *sumr;
double tmp, tmp1;
double kold;
int kolr;
double kolval;
double f[2];

tmp = 0.0;
tmp1 = 0.0;
kolr = 0;
kolval = 0.0;
dap_swap();
levnobs = (int *) dap_malloc(sizeof(int) * nlevels, "");
sumr = (double *) dap_malloc(sizeof(double) * nlevels, "");
rank1 = (int *) dap_malloc(sizeof(int) * dap_maxex2, "dap_maxex2");
dn = (double) nval;
if (nvar == 2)
	{
	qsort(val, nval, sizeof(value), pvalcmp1);
	for (levn = 0; levn < nlevels; levn++)
		levnobs[levn] = 0;
	for (rank = 0; rank < nval; rank++)
		levnobs[val[rank].val_class]++;
	if (nlevels == 2)
		{
		for (rank = 0, stat0 = 0.0, stat = 0.0, tottied = 0,
				tiecorr = 0.0, f[0] = 0.0, f[1] = 0.0,
				kold = 0.0; rank < nval; )
			{
			for (ntied = 1; rank + ntied < nval &&
				fabs(val[rank + ntied].val_val - val[rank].val_val) <
				dap_tol * (fabs(val[rank + ntied].val_val) + fabs(val[rank].val_val));
					ntied++)
				;
			drank = ((double) (2 * rank + ntied + 1)) / 2.0; 
			if (ntied > 1)
				{
				tottied += ntied;
				tiecorr += (double) (ntied * (ntied + 1) * (ntied - 1));
				}
			for (tied = 0; tied < ntied; tied++, rank++)
				{
				if (val[rank].val_class)
					{
					stat0 += drank;
					f[1] += 1.0;
					}
				else
					{
					stat += drank;
					f[0] += 1.0;
					}
				tmp1 = val[rank].val_val;
				val[rank].val_val = drank;
				}
			tmp = fabs(f[1] / ((double) levnobs[1]) - f[0] / ((double) levnobs[0]));
			if (tmp > kold)
				{
				kold = tmp;
				kolr = rank;
				kolval = tmp1;
				}
			}
		fprintf(dap_lst, "\nResponse: %s\n", dap_obs[0].do_nam[varv[0]]);
		fprintf(dap_lst, "Classified by %s:", dap_obs[0].do_nam[varv[1]]);
		for (r = 0; r < nlevels; r++)
			fprintf(dap_lst, " %s (%d)", level[r], levnobs[r]);
		putc('\n', dap_lst);
		fprintf(dap_lst, "Number of tied observations = %d\n", tottied);
		if (levnobs[0] < levnobs[1])
			{
			levn = 0;
			stat0 = stat;
			}
		else
			levn = 1;
		fprintf(dap_lst, "\nWilcoxon rank sum statistic S0 (%s) = %g\n",
					level[0], stat0);
		fprintf(dap_lst, "Expected under H0 = %g\n",
				((double) levnobs[levn]) * (dn + 1.0) / 2.0);
		stat0 = fabs(stat0 - ((double) levnobs[levn]) * (dn + 1.0) / 2.0);
		if (nval < dap_maxex2)
			{
			for (r = 0; r < levnobs[levn]; r++)
				rank1[r] = r;
			rank1[r] = nval;
			for (prob = 0.0; ; )
				{
				for (r = 0, stat = -((double) levnobs[levn]) * (dn + 1.0) / 2.0;
						r < levnobs[levn]; r++)
					stat += val[rank1[r]].val_val;
				if (fabs(stat) >= stat0)
					prob += 1.0;
				for (r = levnobs[levn] - 1;
						r >= 0 && rank1[r] + 1 == rank1[r + 1]; --r)
					;
				if (r >= 0)
					{
					rank1[r]++;
					for (r++; r < levnobs[levn]; r++)
						rank1[r] = rank1[r - 1] + 1;
					}
				else
					break;
				}
			fprintf(dap_lst, "Prob[|S - E(S)| >= |S0 - E(S)|] = %.4g (exact)\n",
					prob / dap_bincoeff(dn, (double) levnobs[levn]));
			}
		else
			{
			if (stat0 >= 0.5)
				stat0 -= 0.5;
			prob = 2.0 * (1.0 - probz(stat0 /
					sqrt(((double) (levnobs[0] * levnobs[1])) *
					((dn + 1.0) - tiecorr /
					(dn * (dn - 1.0))) / 12.0)));
			fprintf(dap_lst, "Prob[|S - E(S)| >= |S0 - E(S)|] = %.4g\n", prob);
			fputs("(normal approximation, with continuity correction)\n", dap_lst);
			}
		fprintf(dap_lst, "\nKolmogorov statistic D0 = %g\n", kold);
		fprintf(dap_lst, "Maximum deviation at %g for level %s\n",
					kolval, level[val[kolr].val_class]);
		fprintf(dap_lst, "Prob[D >= D0] = %.4g\n",
				probkol(kold, ((double) (levnobs[0] * levnobs[1])) / dn));
		}
	else if (nlevels > 2)
		{
		for (levn = 0; levn < nlevels; levn++)
			sumr[levn] = 0.0;
		for (rank = 0, tottied = 0, tiecorr = 0.0; rank < nval; )
			{
			for (ntied = 1; rank + ntied < nval &&
				fabs(val[rank + ntied].val_val - val[rank].val_val) <
				dap_tol * (fabs(val[rank + ntied].val_val) + fabs(val[rank].val_val));
					ntied++)
				;
			drank = ((double) (2 * rank + ntied + 1)) / 2.0; 
			if (ntied > 1)
				{
				tottied += ntied;
				tiecorr += (double) (ntied * (ntied + 1) * (ntied - 1));
				}
			for (tied = 0; tied < ntied; tied++, rank++)
				{
				sumr[val[rank].val_class] += drank;
				val[rank].val_val = drank;
				}
			}
		for (stat0 = 0.0, levn = 0; levn < nlevels; levn++)
			{
			tmp = sumr[levn] / ((double) levnobs[levn]) - 0.5 * (dn + 1.0);
			stat0 += tmp * tmp * (double) levnobs[levn];
			}
		stat0 *= 12.0 / (dn * (dn + 1.0) - tiecorr / (dn - 1.0));
		fprintf(dap_lst, "\nResponse: %s\n", dap_obs[0].do_nam[varv[0]]);
		fprintf(dap_lst, "Classified by %s:", dap_obs[0].do_nam[varv[1]]);
		for (levn = 0, minnobs = levnobs[0]; levn < nlevels; levn++)
			{
			fprintf(dap_lst, " %s (%d)", level[levn], levnobs[levn]);
			if (levnobs[levn] < minnobs)
				minnobs = levnobs[levn];
			}
		putc('\n', dap_lst);
		fprintf(dap_lst, "Number of tied observations = %d\n", tottied);
		fprintf(dap_lst, "Kruskal-Wallis statistic T0 = %g\n", stat0);
		prob = probchisq(stat0, nlevels - 1);
		fprintf(dap_lst,
		"Prob[T >= T0] = %g (chi-squared approximation, df = %d)\n",
			prob, nlevels - 1);
		if ((nlevels == 3 && minnobs < 6) || minnobs < 5)
			fputs(
			"Warning: sample may be too small for this approximation.\n",
				dap_lst);
		}
	}
else
	{
	fprintf(dap_lst, "\nResponse: %s\n", dap_obs[0].do_nam[varv[0]]);
	fprintf(dap_lst, "Number of non-zero observations = %d\n", nval);
	qsort(val, nval, sizeof(value), pvalcmp1);
	for (rank = 0, stat0 = 0.0, tottied = 0, tiecorr = 0, levnobs[0] = 0; rank < nval; )
		{
		for (ntied = 1; rank + ntied < nval &&
				fabs(val[rank + ntied].val_val - val[rank].val_val) <
				dap_tol * (fabs(val[rank + ntied].val_val) + fabs(val[rank].val_val));
				ntied++)
			;
		drank = ((double) (2 * rank + ntied + 1)) / 2.0; 
		if (ntied > 1)
			{
			tottied += ntied;
			tiecorr += ((double) (ntied * (ntied + 1) * (ntied - 1))) / 2.0;
			}
		for (tied = 0; tied < ntied; tied++, rank++)
			{
			if (val[rank].val_class)
				{
				stat0 += drank;
				levnobs[0]++;
				}
			val[rank].val_val = drank;
			}
		}
	fprintf(dap_lst, "Number of tied observations = %d\n", tottied);
	fprintf(dap_lst, "Number of positive observations = %d\n", levnobs[0]);
	fprintf(dap_lst, "\nWilcoxon signed rank statistic W0 = %g\n", stat0);
	fprintf(dap_lst, "Expected under H0 = %g\n", (dn * (dn + 1.0)) / 4.0);
	stat0 = fabs(stat0 - (dn * (dn + 1.0)) / 4.0);
	if (nval <= dap_maxex1)
		{
		for (rank = 0; rank < nval; rank++)
			val[rank].val_class = 0;
		prob = 0.0;
		do	{
			for (rank = 0, stat = -(dn * (dn + 1.0)) / 4.0;
					rank < nval; rank++)
				{
				if (val[rank].val_class)
					stat += val[rank].val_val;
				}
			if (fabs(stat) >= stat0)
				prob += 1.0;
			for (rank = 0; rank < nval; rank++)
				{
				if (!val[rank].val_class)
					{
					val[rank].val_class = 1;
					break;
					}
				else
					val[rank].val_class = 0;
				}
			} while (rank < nval);
		for (rank = 0; rank < nval; rank++)
			prob /= 2.0;
		fprintf(dap_lst, "Prob[|W - E(W)| >= |W0 - E(W)|] = %.4g (exact)\n", prob);
		}
	else
		{
		prob = 2.0 *
		probt(stat0 * sqrt((dn - 1.0) /
			(dn * (dn * (dn + 1.0) * (2.0 * dn + 1.0) - tiecorr) /
			24.0 - stat0 * stat0)), nval - 1);
		fprintf(dap_lst, "Prob[|W - E(W)| >= |W0 - E(W)|] = %.4g\n", prob);
		fprintf(dap_lst, "(t-approximation, df = %d, with continuity correction)\n",
						nval - 1);
		}
	}
dap_free(levnobs);
dap_free(rank1);
dap_free(sumr);
dap_swap();
}

static int findlev(char *levname, char **level, int *nlevels)
{
int l;

for (l = 0; l < *nlevels; l++)
	{
	if (!strcmp(levname, level[l]))
		return l;
	}
if (*nlevels < dap_maxlev)
	{
	strcpy(level[*nlevels], levname);
	return (*nlevels)++;
	}
else
	{
	fprintf(dap_err, "(findlev) Too many levels (%s)\n", levname);
	exit(1);
	}
}

void nonparam(char *fname, char *variables, char *marks)
{
int varv[2];
int nvar;
int *markv;
int nmark;
value *val;
int nval;
int nobs;
char *levmem;
char **level;
int nlevels;
int more;

if (!fname)
	{
	fputs("(nonparam) No dataset name given.\n", dap_err);
	exit(1);
	}
markv = (int *) dap_malloc(sizeof(int) * dap_maxvar, "dap_maxvar");
levmem = dap_malloc(dap_maxlev * (dap_strlen + 1), "dap_maxlev, dap_strlen");
level = (char **) dap_malloc(sizeof(char *) * dap_maxlev, "dap_maxlev");
for (nlevels = 0; nlevels < dap_maxlev; nlevels++)
	level[nlevels] = levmem + nlevels * (dap_strlen + 1);
inset(fname);
nvar = dap_list(variables, varv, dap_maxvar);
if (!nvar)
	{
	fputs("(nonparam) No variables specified.\n", dap_err);
	exit(1);
	}
if (nvar > 2)
	{
	fputs("(nonparam) At most one response and one class variable allowed\n", dap_err);
	exit(1);
	}
if (dap_obs[0].do_len[varv[0]] != DBL)
	{
	fprintf(dap_err, "(nonparam) Variable must be dap_double: %s\n",
			dap_obs[0].do_nam[varv[0]]);
	exit(1);
	}
if (nvar == 2 && dap_obs[0].do_len[varv[1]] <= 0)
	{
	fprintf(dap_err, "(nonparam) Classification variable must be dap_char: %s\n",
			dap_obs[0].do_nam[varv[1]]);
	exit(1);
	}
nmark = dap_list(marks, markv, dap_maxvar);
val = (value *) dap_malloc(sizeof(value) * dap_maxval, "dap_maxval");
for (more = 1, nlevels = 0, nval = 0, nobs = 0; more; nobs++)
	{
	more = step();
	if (dap_newpart(markv, nmark))
		{
		dap_swap();
		dap_head(markv, nmark);
		dap_swap();
		fprintf(dap_lst, "Number of observations = %d\n", nobs);
		nonpar1(val, nval, level, nlevels, varv, nvar);
		if (!more)
			break;
		nval = 0;
		nobs = 0;
		nlevels = 0;
		}
	if (nval >= dap_maxval)
		{
		fputs("(nonparam) Too many values.\n", dap_err);
		exit(1);
		}
	if (nvar == 2)
		{
		val[nval].val_class =
			findlev(dap_obs[0].do_str[varv[1]], level, &nlevels);
		val[nval].val_val = dap_obs[0].do_dbl[varv[0]];
		nval++;
		}
	else
		{
		if (dap_obs[0].do_dbl[varv[0]] != 0.0)
			{
			val[nval].val_class = (dap_obs[0].do_dbl[varv[0]] > 0.0);
			val[nval].val_val = fabs(dap_obs[0].do_dbl[varv[0]]);
			nval++;
			}
		}
	}
dap_free(val);
dap_free(markv);
dap_free(levmem);
dap_free(level);
}
