/* gibbs.c

   written by Frederic Bois
   16 August 1993

   Copyright (c) 1993.  Don Maszle, Frederic Bois.  All rights reserved.

   -- Revisions -----
     Logfile:  SCCS/s.gibbs.c
    Revision:  1.36
        Date:  13 Jan 1996
     Modtime:  15:55:33
      Author:  F. Bois
   -- SCCS  ---------

*/

#include <assert.h>
#include <stdlib.h>
#include <string.h>

#include "gibbs.h"
#include "hungtype.h"
#include "lexerr.h"
#include "matutil.h"
#include "simmonte.h"
#include "yourcode.h"


/* ----------------------------------------------------------------------------
   Global/External variables
*/

BOOL bModelComputed = FALSE;


/* ----------------------------------------------------------------------------
   CheckIfSubject

   Callback function for FindSubjects. If the pData points to a Subject
   variable the value of Subject is assigned to the iSubject element of
   the current experiment specification.

*/

int CheckIfSubject (PVOID pData, PVOID pInfo)
{
  PVARMOD pvarmod = (PVARMOD) pData;
  PINT piSubject = (PINT) pInfo;

  if ( !strcmp (GetVarName (pvarmod->hvar), "Subject")) {
    *piSubject = (int) pvarmod->uvar.dVal;
    return 1;
  }

  return 0;

} /* CheckIfSubject */


/* ----------------------------------------------------------------------------
   FindSubjects

   Check the parameters in the plistParmMods LIST of the experiment "pexp"
   to see if a subject is specified. Does that by a call to ForAllList
   to increment through the list.
*/

int FindSubjects (PEXPERIMENT pexp)
{
  assert (pexp->plistParmMods);
  return ForAllList (pexp->plistParmMods, &CheckIfSubject,
                     (PVOID) &pexp->iSubject);

} /* FindSubjects */


/* ----------------------------------------------------------------------------
   GetNSubjects

   The number of subjects is extracted from the input information.

   If the variable "Subject" is not defined by the model then a unique subject
   is assumed and all experiments are for that subject. No population sampling
   will be done.

   If the variable "Subject" is defined by the model and never used, a unique
   subject is also assumed. If a Subject assignment is used once it should be
   used for all experiments, otherwise an error message is given.
   Assignments of the Subject variable should be integers higher than zero,
   but not necessarily sequential.
   The same subject can be assigned to different experiments.

   Outputs:
   panal:      analysis specifications. Used to find how many
               experiments are simulated etc. The iSubject elements
               of the experiment specifications are eventually changed.

   pnSubjs:      number of subjects found in the simulation file.
   plSubjsIndex: array of subjects labels by order of appearance
*/

void GetNSubjects (PANALYSIS panal, long *pnSubjs, long **plSubjsIndex)
{
  int i, j, k;
  int nSubjFound;
  BOOL bSubjectUsed = FALSE;

  *pnSubjs = 0;

  if (GetVarHandle ("Subject")) {
    for (i = 0; i < panal->expGlobal.iExp; i++) {

      /* check whether the Mods for this exp contain a Subject assignment */
      nSubjFound = FindSubjects (panal->rgpExps[i]);

      if (nSubjFound == 1) { /* a Subject was found */
        bSubjectUsed = TRUE;
        j = 0; /* second index for the experiments */
        while ((j < i) &&
               (panal->rgpExps[i]->iSubject != panal->rgpExps[j]->iSubject)) {
          j++;
        }
        if (j == i) /* this subject has not already been defined */
          *pnSubjs = *pnSubjs + 1;
      }
      else { /* Subject not found in this exp or several subjects defined */
        if (bSubjectUsed) { /* but Subject was found before */
          printf ("Error: Subject missing or misspecified ");
          printf ("in experiment %d - Exiting\n", i);
          exit (0);
        }
        else { /* Subject never used before */
          if (nSubjFound != 0) {
            printf ("Error: Subject missing in some experiments\n");
            exit (0);
          }
        }
      }
    } /* for i */
  }

  if (*pnSubjs == 0) *pnSubjs = 1;

  if (bSubjectUsed) printf ("%ld Subject(s) defined\n", *pnSubjs);

  /* allocate the plSubjsIndex array */
  if ( !(*plSubjsIndex = InitlVector (*pnSubjs)))
    ReportError (NULL, RE_OUTOFMEM | RE_FATAL, "GetnSubjs", NULL);

  /* fill the plSubjsIndex array with the subjects labels, by order of
     appearance */
  k = 0;
  for (i = 0; i < panal->expGlobal.iExp; i++) {
    j = 0; /* second index for the experiments */
    while ((j < i) &&
           (panal->rgpExps[i]->iSubject != panal->rgpExps[j]->iSubject)) {
      j++;
    }
    if (j == i) { /* this subject has not already been defined */
      (*plSubjsIndex)[k] = panal->rgpExps[i]->iSubject;
      k++;
    }
  } /* for */

} /* GetNSubjects */


/* ----------------------------------------------------------------------------
   InitArrays

   Inputs (unmodified):
   lDim:        dimension to use for the arrays.
   nSubjs:      number of subjects (must be > 0), it's one of the dimensions
   
   Outputs:
   pdSum:       array of running sums, dimension nSubjs x lDim.
   prgdSumProd: array of running sums of parameters cross products,
                dimension (nSubjs x lDim x lDim).
*/

void InitArrays (long lDim, long nSubjs, double ***pdSum, 
                 double ****prgdSumProd)
{
  long i, j, k;

  /* allocate the pdSum and prgdSumProd arrays */

  if ( !(*pdSum = (double **) malloc(nSubjs * sizeof(double *))) ||
       !(*prgdSumProd = (double ***) malloc(nSubjs * sizeof(double **))))
    ReportError (NULL, RE_OUTOFMEM | RE_FATAL, "InitArrays", NULL);
      
  for (i = 0; i < nSubjs; i++) {
    if ( !((*pdSum)[i] = InitdVector (lDim)) ||
         !((*prgdSumProd)[i] = InitdMatrix (lDim, lDim)))
      ReportError (NULL, RE_OUTOFMEM | RE_FATAL, "InitArrays", NULL);

    /* init pdSum */
    for (j = 0; j < lDim; j++) (*pdSum)[i][j] = 0;

    /* init prgdSumProd */
    for (j = 0; j < lDim; j++)
      for (k = 0; k < lDim; k++) (*prgdSumProd)[i][j][k] = 0;
  }

} /* InitArrays */


/* ----------------------------------------------------------------------------
   SkipWhiteSpaceAndComments

   Just does that. Comments start with a #.
*/

void SkipWhiteSpaceAndComments (FILE *pfile)
{
  char ch = ' ';

  while (1)
    switch (ch = fgetc (pfile)) {
      default: goto Skipped;
        
      case '#' : fscanf (pfile, "%*[^\n]");
                 (void) fgetc (pfile);
                     break;     
      case '\t': /* Keep going */
      case ' ' : /* Keep going */
      case '\n': break;
    } /* switch */

  Skipped: ungetc (ch, pfile);

} /* SkipWhiteSpaceAndComments */


/* ----------------------------------------------------------------------------
   InitData

   If Data statements are not used and, instead, the data
   file is used, it opens the data file which contains the
   observed data values for each experiment, each variable
   observed, and each time of observation. Missing values
   can be specified.

   If Data are used it flattens them in pdData.
   Missing values can be specified.

   If subjects are defined it creates one data array per subject.

   Inputs (unmodified):
   panal:      analysis specifications. Used to find how many
               experiments are simulated etc.

   Outputs:
   pdData:     array of data points. It is allocated and filled
               with values given.
   pdNewPred:  array of old predictions. It is allocated but
               not filled.
   pdPred:     array of predictions. It is allocated and not
               filled.
   pnData:     a pointer to the total number of data points.
               That number has to be > 0.
*/

void InitData (PANALYSIS panal, double **pdData, double **pdNewPred,
               double **pdPred, long *pnData)
{
  BOOL bWarn;
  int  i, j, k;
  FILE *pfile;
  OUTSPEC *pos;

  if (panal->gd.szGdata) { /* read the data from a file */

    pfile = fopen (panal->gd.szGdata, "r");

    if (!pfile) {
      printf ("Cannot open Markov file '%s'\n", panal->gd.szGdata);
      exit (-1);
    } /* if */

    /* skip comment lines */
    SkipWhiteSpaceAndComments (pfile);

    /* read the size of the data list */
    fscanf (pfile, "%ld", pnData);

    if (*pnData == 0) { /* no data ? */
      printf("Error: number of data is zero in data file - Exiting.\n");
      exit(0);
    }

    /* allocate the pdData array  */
    if ( !(*pdData = InitdVector (*pnData)))
      ReportError (NULL, RE_OUTOFMEM | RE_FATAL, "InitData", NULL);

    /* Read the pdData array */
    for  (i = 0; i < *pnData; i++) {
      fscanf (pfile, "%lg", &(*pdData)[i]);
      if ((*pdData)[i] == INPUT_MISSING_VALUE)
        (*pdData)[i] = MISSING_VALUE;
    } /* for */

  } /* if panal->gd.szGdata */

  else { /* read the data from Data statements */

    /* count the data points for allocation */
    bWarn = FALSE;
    *pnData = 0;
    for (i = 0; i < panal->expGlobal.iExp; i++) {
      pos = &panal->rgpExps[i]->os;
      for (j = 0; j < pos->nOutputs; j++) {
        if (pos->prgdDataVals[j]) /* if a Data statement exists... */
          for (k = 0; k < pos->pcOutputTimes[j]; k++)
            *pnData = *pnData + 1;
        else bWarn = TRUE;
      } /* for j */
    } /* for i */

    if (*pnData == 0) { /* no data ? */
      printf("Error: you must either provide Data Statements ");
      printf("or a data file - Exiting.\n");
      exit(0);
    }

    if (bWarn) { /* be nice: warn the user */
      printf ("Warning: there are more outputs than data.\n");
      printf ("The extra outputs are not used.\n");
    }

    /* allocate the pdData array  */
    if ( !(*pdData = InitdVector (*pnData)))
      ReportError (NULL, RE_OUTOFMEM | RE_FATAL, "InitData", NULL);

    /* copy the data from Data statements in pdData */
    *pnData = 0;
    for (i = 0; i < panal->expGlobal.iExp; i++) {
      pos = &panal->rgpExps[i]->os;
      for (j = 0; j < pos->nOutputs; j++)
        if (pos->prgdDataVals[j]) /* if a Data statement exists */
          for (k = 0; k < pos->pcOutputTimes[j]; k++) {
            (*pdData)[*pnData] = pos->prgdDataVals[j][k];
            if ((*pdData)[*pnData] == INPUT_MISSING_VALUE)
              (*pdData)[*pnData] = MISSING_VALUE;
            *pnData = *pnData + 1;
          } /* for k */
    } /* for i */

  } /* else */

  /* allocate the pdNewPred and pdPred arrays */
  if ( !(*pdNewPred = InitdVector (*pnData)) ||
       !(*pdPred = InitdVector (*pnData)))
    ReportError (NULL, RE_OUTOFMEM | RE_FATAL, "InitData", NULL);

} /* InitData */


/* ----------------------------------------------------------------------------
   OpenMarkovFiles

   Opens the output file and the restart file.
*/

void OpenMarkovFiles (PANALYSIS panal)
{
  /* Take care of the output file first */

  /* Use command line spec if given */
  if (panal->expGlobal.os.bCommandLineSpec)
    panal->gd.szGout = panal->expGlobal.os.szOutfilename;

  /* Default if none given */
  else if (!(panal->gd.szGout))
    panal->gd.szGout = "simgb.out";

  if (!(panal->gd.pfileGibOut)
      && !(panal->gd.pfileGibOut = fopen (panal->gd.szGout, "w")))
    ReportError (NULL, RE_FATAL | RE_CANNOTOPEN,
                 panal->gd.szGout, "OpenMarkovFiles()");

  /* eventually open the restart file */
  if (panal->gd.szGrestart)
    if (!(panal->gd.pfileRestart)
      && !(panal->gd.pfileRestart = fopen (panal->gd.szGrestart, "r")))
      ReportError (NULL, RE_FATAL | RE_CANNOTOPEN,
                   panal->gd.szGrestart, "OpenMarkovFiles()");

} /* OpenMarkovFiles */


/* ----------------------------------------------------------------------------
   WriteOutHeader

   Prints a tabulated header at the top of the output file.
   The header gives the current run number, the name of
   the parameters, the list of experimental standard deviations
   estimated, the probability of the model under the prior, the
   value of the likelihood and the posterior probability.
*/

void WriteOutHeader (FILE *pfileOut, PANALYSIS panal, long nSubjs,
                     long *plSubjsIndex, long nSigma)
{
  long i, j;
  PMONTECARLO pmc = &panal->mc;

  fprintf (pfileOut, "iter");

  if (nSubjs > 1) {
    /* population parameters */
    for (j = 0; j < pmc->nParms; j++)
      fprintf (pfileOut, "\tMu_%s", GetVarName(pmc->rgpMCVar[j]->hvar));
    for (j = 0; j < pmc->nParms; j++)
      fprintf (pfileOut, "\tKsi_%s", GetVarName(pmc->rgpMCVar[j]->hvar));

    /* subjects' parameters */
    for (i = 0; i < nSubjs; i++)
      for (j = 0; j < pmc->nParms; j++)
        fprintf (pfileOut, "\t%ld_%s",
                 plSubjsIndex[i], GetVarName(pmc->rgpMCVar[j]->hvar));
  }
  else
    for (j = 0; j < pmc->nParms; j++)
      fprintf (pfileOut, "\t%s", GetVarName(pmc->rgpMCVar[j]->hvar));

  /* experimental SDs */
  for (i = 0; i < nSigma; i++)
    fprintf (pfileOut, "\tsigma_%ld", i+1);

  fprintf (pfileOut, "\tlnPrior\tlnLikelihood\tlnPosterior\n");

  fflush (pfileOut);

} /* WriteOutHeader */


/* ----------------------------------------------------------------------------
   Estimate_y

   Calculates y[] for the given conditions by running the model.
   The data is not transformed. Transformations should be coded
   in the likelihood function etc. However it is flattened in the pdPred
   array.

   Note that pdPred may not be completely initialized by this routine if it
   is passed uninitialized.
*/

int Estimate_y (PANALYSIS panal, long iSubject, long *plSubjsIndex,
                double *pdTheta, double *pdPred)
{
  int cNPred;
  int i, j, k;
  OUTSPEC *pos;

  /* Run the PBPK model for each experiment assigned to the subject specified
   */
  for (i = 0; i < panal->expGlobal.iExp; i++)
    if (panal->rgpExps[i]->iSubject == plSubjsIndex[iSubject]) {
      InitModel ();

      /* Global modifications */
      ModifyParms (panal->expGlobal.plistParmMods);

      /* Set params to pdTheta values */
      SetParms (panal->mc.nParms, panal->mc.rghvar, pdTheta);

      /* set the Mods for this exp */
      ModifyParms (panal->rgpExps[i]->plistParmMods);

      if (DoOneExperiment (panal->rgpExps[i])) {
        /* Error */
        printf ("Warning: Can't estimate y with parameters:\n");
        WriteArray (stdout, panal->mc.nParms, pdTheta);
        fputc('\n', stdout);
        return 0;
      }
    } /* for - if */

  /* flatten the predictions: this rewrites pdPred, for the current subject
   */
  cNPred = 0;
  for (i = 0; i < panal->expGlobal.iExp; i++) {
    pos = &panal->rgpExps[i]->os;
    for (j = 0; j < pos->nOutputs; j++) {
      if ((pos->prgdDataVals[j]) || (panal->gd.szGdata)) {
        /* if a Data statement exists or a data file is used */
        for (k = 0; k < pos->pcOutputTimes[j]; k++) {
          if (panal->rgpExps[i]->iSubject == plSubjsIndex[iSubject]) {
            pdPred[cNPred] = pos->prgdOutputVals[j][k];
            if (pdPred[cNPred] <= 0.0) {
              /* Error */
              printf ("Error: negative y (%g) ", pdPred[cNPred]);
              printf ("incompatible with lognormal error model\n");
              printf ("Parameter values:\n");
              WriteArray (stdout, panal->mc.nParms, pdTheta);
              fputc('\n', stdout);
              return 0;
            }
          }
          cNPred = cNPred + 1;
        } /* for k */
      } /* if */
    } /* for j */
  } /* for i */

  return 1;

} /* Estimate_y */


/* ----------------------------------------------------------------------------
   Init_theta

   Inits the thetas by sampling out of a normal or lognormal distribution,
   eventually truncated, with parameters pdMu and pdKsi (population parameters)
   if there is more than 1 subject, and with prior parameters (from input file)
   if there is only one subject.
   The bounds used are those specified in the input prior.
   The sampling distribution depends on the prior form.

   Makes sure that the sampled values give a computable model. That initializes
   the pdPred array with usable values.
*/

void Init_theta (PANALYSIS panal, long nSubjs, long *plSubjsIndex,
                 long nParms, double *pdMu, double *pdKsi, double ***pdTheta,
                 double *pdPred)
{
  register long i, j, k;
  PMONTECARLO pmc = &panal->mc;

  if ( !(*pdTheta     = InitdMatrix (nSubjs, nParms)))
    ReportError (NULL, RE_OUTOFMEM | RE_FATAL, "Init_theta", NULL);

  for (i = 0; i < nSubjs; i++) {
    k = 0; /* iteration counter */
    do {
      for (j = 0; j < nParms; j++) {
        if (nSubjs == 1) { /* just sample from the input prior */
          CalculateOneMCParm (pmc->rgpMCVar[j], pmc);
          (*pdTheta)[i][j] = pmc->rgpMCVar[j]->dVal;
        }
        else { /* sampling a variate from N(mu,ksi) or variant */

          switch (pmc->rgpMCVar[j]->iType) {

          case MCV_NORMAL:
            (*pdTheta)[i][j] = NormalRandom (pdMu[j], pdKsi[j]);
            break;

          case MCV_LOGNORMAL:
            (*pdTheta)[i][j] = LogNormalRandom (log (pdMu[j]), pdKsi[j]);
            break;

          case MCV_UNIFORM:
          case MCV_TRUNCNORMAL:
            (*pdTheta)[i][j] = TruncNormalRandom (pdMu[j], pdKsi[j],
                               pmc->rgpMCVar[j]->uMin.dval,
                               pmc->rgpMCVar[j]->uMax.dval);
            break;

          case MCV_LOGUNIFORM:
          case MCV_TRUNCLOGNORMAL:
            (*pdTheta)[i][j] = TruncLogNormalRandom(log (pdMu[j]), pdKsi[j],
                               pmc->rgpMCVar[j]->uMin.dval,
                               pmc->rgpMCVar[j]->uMax.dval);
            break;

          default:
            printf ("Error: unknown distribution in Init_theta - Exiting\n");
            exit(0);
            break;

          } /* switch */
        } /* else */
      } /* for */

      k++;

    } while ((k <= 10) &&
             !(bModelComputed = Estimate_y(panal, i, plSubjsIndex,
                                           (*pdTheta)[i], pdPred)));

    if (k > 10) {
      if (nSubjs == 1)
        printf ("Problem: no starting point computable - Exiting\n");
      else {
        printf ("Problem: no starting point computable for subject %ld ", i);
        printf ("- Exiting\n");
      }
      exit (0);
    }

  } /* for i */

} /* Init_theta */


/* ----------------------------------------------------------------------------
 ReadChain

   initialize the population and individual parameters by reading them in the 
   restart file. Reads also initial values of the experimental SD Sigma.
*/

void ReadChain (FILE *pfileRestart, long nSubjs, long nParms, double *pdMu, 
                double *pdKsi, double ***pdTheta, double **pdSigma, long nSigma,
                double **pdSum, double ***prgdSumProd, long nInitIter, 
                long *iter)
{
  long lDummy;
  int i, j, k;

  /* space allocation */
  if ( !(*pdTheta = InitdMatrix (nSubjs, nParms)))
    ReportError (NULL, RE_OUTOFMEM | RE_FATAL, "ReadChain", NULL);

  /* skip the first line.  This allows a MC output file to be used
     directly as a restart file. */
  fscanf (pfileRestart,  "%*[^\n]");  getc(pfileRestart);

  /* as long as we have not reached the end of the file we keep reading lines
     and overwriting the thetas and sigmas, they keep only their
     last value. We also keep incrementing the global iteration counter
     iter:
   */
  while ( !( feof(pfileRestart) ||
          (fscanf(pfileRestart, "%ld", &lDummy) == EOF))) {

    /* if several subjects, read the population parameters */
    if (nSubjs > 1) {
      for (i = 0; i < nParms; i++)
        fscanf (pfileRestart, "%lg", &(pdMu[i]));
      for (i = 0; i < nParms; i++)
        fscanf (pfileRestart, "%lg", &(pdKsi[i]));
    }

    /* read pdTheta */
    for (i = 0; i < nSubjs; i++)
      for (j = 0; j < nParms; j++)
        fscanf (pfileRestart, "%lg", &((*pdTheta)[i][j]));
      
    /* read pdSigma */
    for (i = 0; i < nSigma; i++)
      fscanf (pfileRestart, "%lg", &(*pdSigma)[i]);

    /* Throw away remainder of line. This allows a MC output file to be used
       directly as a restart file. */
    fscanf (pfileRestart,  "%*[^\n]"); getc(pfileRestart);

    /* update pdSum */
    for (i = 0; i < nSubjs; i++)
      for (j = 0; j < nParms; j++)
        pdSum[i][j] = pdSum[i][j] + (*pdTheta)[i][j];

    /* update prgdSumProd */
    for (i = 0; i < nSubjs; i++)
      for (j = 0; j < nParms; j++)
        for (k = 0; k < nParms; k++)
          prgdSumProd[i][j][k] = prgdSumProd[i][j][k] + 
                                 (*pdTheta)[i][j] * (*pdTheta)[i][k];

    /* increment lIter */
    *iter = *iter + 1;

    if (*iter > nInitIter) { /* oops, too far */
      printf ("Error: the number of line in restart file exceeds ");
      printf ("       the number of initial iterations (%ld) - Exiting",
              nInitIter);
      exit(0);
    }

  } /* end while */

  /* note that the theta returned is the last parameter set read */

  fclose (pfileRestart);

} /* ReadChain */


/* ----------------------------------------------------------------------------
   LnPrior

   Returns the log of the density of a pdTheta component under its prior, up to
   some constant. The prior is the input prior is there is only one subject,
   and is defined by (mu,ksi) otherwise. If there are several subjects and if
   the prior of mu is lognormal or loguniform theta is distributed
   lognormally, and normally otherwise.

   It should, maybe, simply use calls to random.c routines...:
   The problem is that the constants would have to be introduced in the
   external routines (to be allways usable) and that would slow down the
   computation.
   Is that important ?
*/

double LnPrior (MCVAR *pmcvar, long nSubjs, double dTheta, double dMu,
                double dKsi)
{
  double dTmp, density;

  if (nSubjs == 1)
    switch (pmcvar->iType) {

      case MCV_NORMAL:
      case MCV_TRUNCNORMAL:
        dTmp = (dTheta - pmcvar->uParm1.dval) / pmcvar->uParm2.dval;
        return -0.5 * dTmp * dTmp - log(pmcvar->uParm2.dval);
        break;

      case MCV_LOGNORMAL:
      case MCV_TRUNCLOGNORMAL:
        dTmp = (log(dTheta) - pmcvar->uParm1.dval) / pmcvar->uParm2.dval;
        return -0.5 * dTmp * dTmp - log(pmcvar->uParm2.dval);
        break;

      case MCV_UNIFORM:
        return -log(pmcvar->uMax.dval - pmcvar->uMin.dval);
        break;

      case MCV_LOGUNIFORM:
        return -log(dTheta);
        break;

      case MCV_PIECEWISE:
        density = 2 / (pmcvar->uMax.dval + pmcvar->uParm2.dval -
                       pmcvar->uParm1.dval - pmcvar->uMin.dval);

        if (dTheta <= pmcvar->uParm1.dval)
          return log (density * (dTheta - pmcvar->uMin.dval) /
                                (pmcvar->uParm1.dval - pmcvar->uMin.dval));

        else
          if (dTheta <= pmcvar->uParm2.dval) return log (density);
          else
            return log (density * (pmcvar->uMax.dval - dTheta) /
                                  (pmcvar->uMax.dval - pmcvar->uParm2.dval));
        break;

      case MCV_BETA:
        if ((dTheta == 0.0) || (dTheta == 1)) return (0.0);
        else
          return (pmcvar->uParm1.dval - 1) * log(dTheta) +
                 (pmcvar->uParm2.dval - 1) * log(1 - dTheta) +
                 lnGamma(pmcvar->uParm1.dval + pmcvar->uParm2.dval) -
                 lnGamma(pmcvar->uParm1.dval) - lnGamma(pmcvar->uParm2.dval);
        break;

      default:
        printf ("Error: unknown distribution in LnPrior - Exiting\n");
        exit(0);
        break;

    } /* switch - if nSubjs == 1 */

  else /* more than 1 subject, use the population parameters */

    switch (pmcvar->iType) {

      case MCV_NORMAL:
      case MCV_TRUNCNORMAL:
      case MCV_UNIFORM:
        dTmp = (dTheta - dMu) / dKsi;
        return -0.5 * dTmp * dTmp - log(dKsi);
        break;

      case MCV_LOGNORMAL:
      case MCV_TRUNCLOGNORMAL:
      case MCV_LOGUNIFORM:
        dTmp = (log(dTheta) - log(dMu)) / dKsi;
        return -0.5 * dTmp * dTmp - log(dKsi);
        break;

      default:
        printf ("Error: unimplemented prior in LnPrior - Exiting\n");
        exit(0);
        break;

    } /* switch - if nSubjs == 1 */

} /* LnPrior */


/* ----------------------------------------------------------------------------
   Sample_theta

   sample pdTheta[i] from its conditional (prior times likelihood),
   using a Metropolis jump. The proposal distribution is a
   normal centered on the previous value of theta.
*/

void Sample_theta (PANALYSIS panal, long nParms, double *pdData,
                   double *pdNewPred, double *pdPred, long nSubjs,
                   long *plSubjsIndex, long *plSigmaIndex, long nData,
                   double *pdSigma, long nSigma, double *pdMu, double *pdKsi,
                   double **pdTheta, long iter)
{
  #define UPDATE_AT 20

  register long i, j, k;
  double dAccept, dPjump, dpropSD, theta_old, lnpdf_theta_old, lnpdf_theta_new;
  MCVAR *pMCVar;

  static long **lAccepted = NULL;
  static double **dKernelRatio;

  if (!lAccepted) {
    /* allocate */
    if ( !(lAccepted = InitlMatrix (nSubjs, nParms)) ||
         !(dKernelRatio = InitdMatrix (nSubjs, nParms)))
      ReportError (NULL, RE_OUTOFMEM | RE_FATAL, "Sample_theta", NULL);

    /* initialize */
    for (i = 0; i < nSubjs; i++)
      for (j = 0; j < nParms; j++) {
        lAccepted[i][j] = 0;
        dKernelRatio[i][j] = 20.0;  /* heuristic */
    }

    for (k = 0; k < nData; k++) pdNewPred[k] = pdPred[k];
  }

  /* Sample a new value for each parameter
   */
  for (i = 0; i < nSubjs; i++) {
    for (j = 0; j < nParms; j++) {

      /* set the KernelRatio (ratio of the prior over the jumping kernel) */
      if ((iter != 0) && (iter % UPDATE_AT) == 0) {
        dAccept = lAccepted[i][j] / (double) UPDATE_AT;
        if ( dAccept > 0.3)
          dKernelRatio[i][j] = dKernelRatio[i][j] / 2;
        else
          if (dAccept < 0.15)
            dKernelRatio[i][j] = dKernelRatio[i][j] * 2;

        if (dKernelRatio[i][j] < 1.0) dKernelRatio[i][j] = 1.0;
        lAccepted[i][j] = 0; /* reset the counter */
      }

      pMCVar = panal->mc.rgpMCVar[j];

      /* keep the value of theta */
      theta_old = pdTheta[i][j];

      /* Run the model using the old pdTheta value, if needed
       */
      if (!bModelComputed)
        bModelComputed = Estimate_y (panal, i, plSubjsIndex, pdTheta[i],
                                     pdPred);

      if (!bModelComputed) {
        /* if we cannot even get compute the current model we
           cannot event start */
        printf ("Error: cannot update pdTheta - Exiting.\n");
        exit (0);
      }

      /* so the model has been computed with theta_old, proceed
         by computing the log of the posterior at this point
       */
      lnpdf_theta_old = LnPrior(panal->mc.rgpMCVar[j], nSubjs, theta_old,
                                pdMu[j], pdKsi[j]) +
                        LnLikelihood(panal, pdData, pdPred, plSigmaIndex,
                                     nData, pdSigma, nSigma);
                        
      /* Sample a new value from a normal centered around the old value.
         The SD of that normal is computed here.
       */
      switch (pMCVar->iType) {
        case MCV_NORMAL:
          dpropSD = pMCVar->uParm2.dval / dKernelRatio[i][j]; break;

        case MCV_LOGNORMAL:
          dpropSD = exp(pMCVar->uParm1.dval) * (exp(pMCVar->uParm2.dval) - 1) /
                    dKernelRatio[i][j];
          break;
      
        default:
          dpropSD = (pMCVar->uMax.dval - pMCVar->uMin.dval) /
                    dKernelRatio[i][j];
      }
      pdTheta[i][j] = TruncNormalRandom (theta_old, dpropSD, pMCVar->uMin.dval,
                                         pMCVar->uMax.dval);

      /* Run the model using the newly sampled pdTheta value
       */
      if ( !(Estimate_y (panal, i, plSubjsIndex, pdTheta[i], pdNewPred))) {
        printf ("Warning: theta[%ld][%ld] not updated - Continuing.\n", i, j);
        pdTheta[i][j] = theta_old; /* revert to the old pdTheta */
        if (j == nParms - 1) {
          /* we need to clean up pdNewPred */
          for (k = 0; k < nData; k++) pdNewPred[k] = pdPred[k]; /* restore */
        }
      }
      else {
        /* new point gives a computable model, get the log-posterior */
        lnpdf_theta_new = LnPrior(panal->mc.rgpMCVar[j], nSubjs,
                                  pdTheta[i][j], pdMu[j], pdKsi[j]) +
                          LnLikelihood(panal, pdData, pdNewPred, plSigmaIndex,
                                       nData, pdSigma, nSigma);

        /* Test for a Metropolis-Hastings jump. Only calc the P() of the
           current pdTheta, as the others are constant and would cancel out.
           The normal proposal used is symetrical and the proposal density
           terms cancel out
         */

        dPjump = exp(lnpdf_theta_new - lnpdf_theta_old);

        if (Randoms() > dPjump) { /* don't jump */
          pdTheta[i][j] = theta_old; /* Reject - keep old pdTheta */
          for (k = 0; k < nData; k++) pdNewPred[k] = pdPred[k]; /* restore */
        }
        else {
          lAccepted[i][j]++; /* used above to adjust the acceptation rate */
          for (k = 0; k < nData; k++) pdPred[k] = pdNewPred[k]; /* update */
        }

      } /* else */
    } /* for j (each pdTheta) */
  } /* for i (each subjects) */

  #undef UPDATE_AT

} /* Sample_theta */


/* ----------------------------------------------------------------------------
   Sample_theta_by_vector

   sample the parameters by jumping by entire vector. The proposal distribution
   is a multinormal centered at the previous vector.

*/
void Sample_theta_by_vector (PANALYSIS panal, long nParms, double *pdData,
                             double *pdNewPred, double *pdPred, long nSubjs,
                             long *plSubjsIndex, long *plSigmaIndex,
                             long nData, double *pdSigma, long nSigma,
                             double *pdMu, double *pdKsi, double **pdTheta,
                             double **pdSum, double ***prgdSumProd, long iter)
{

  #define UPDATE_AT 100

  BOOL bOutBound, bModelOK;
  register long i, j, k;
  double dAccept, dLnPold, dLnPnew, dPjump, dTmp;
  MCVAR *pMCVar;

  static long lAccepted = 0;
  static double dJumpSpread;
  static PDOUBLE **prgdComponent;
  static PDOUBLE *pdTheta_old; /* previous model parameters values */
  static PDOUBLE **prgdVariance = NULL;
  static PDOUBLE dNormVar; /* storage for nParms normal deviates */

  if ((iter == panal->gd.nInitIter) ||
      ((iter > panal->gd.nInitIter) && ((iter % UPDATE_AT) == 0))) {

    if (iter == panal->gd.nInitIter) {

      /* allocate */

      if (!(prgdVariance = (double ***) malloc(nSubjs * sizeof(double **))) ||
          !(prgdComponent = (double ***) malloc(nSubjs * sizeof(double **))))
        ReportError (NULL, RE_OUTOFMEM | RE_FATAL, "Sample_theta_by_vector", 
                     NULL);

      for (i = 0; i < nSubjs; i++) 
        if ( !(prgdVariance[i]  = InitdMatrix (nParms, nParms)) ||
             !(prgdComponent[i] = InitdMatrix (nParms, nParms)))
          ReportError (NULL, RE_OUTOFMEM | RE_FATAL, "Sample_theta_by_vector",
                       NULL);

      if ( !(pdTheta_old = InitdMatrix (nSubjs, nParms)) ||
           !(dNormVar    = InitdVector (nParms)))
        ReportError (NULL, RE_OUTOFMEM | RE_FATAL, "Sample_theta_by_vector",
                     NULL);

      /* initialize dJumpSpread */
      dJumpSpread = 2.4 / sqrt(nParms); /* Gelman's Normal theory result */
    }

    /* generate the covariance matrix */
    for (i = 0; i < nSubjs; i++)
      for (j = 0; j < nParms; j++)
        for (k = 0; k < nParms; k++)
          prgdVariance[i][j][k] = (prgdSumProd[i][j][k] -
                                   pdSum[i][j] * pdSum[i][k] / iter) / (iter-1);

    /* do the Cholesky decomposition of the covariance matrix */
    for (i = 0; i < nSubjs; i++)
      Cholesky (prgdVariance[i], prgdComponent[i], nParms);

    /* If some vector samplings have been made, check that the current
       dJumpSpread leads to an acceptation rate of 15 to 30% over the
       last batch of simulations. Adjust eventually. */

    if (iter > panal->gd.nInitIter) {
      dAccept = ((double) lAccepted) / (double) UPDATE_AT;

      if ( dAccept > 0.3) dJumpSpread = dJumpSpread * 1.5;
      else if (dAccept < 0.15) dJumpSpread = dJumpSpread / 1.5;

      printf ("Monitoring: iter\t%ld\t", iter);
      printf ("success rate\t%g\tspread\t%g\n", dAccept, dJumpSpread);
      lAccepted = 0; /* reset the counter */
    }

  } /* if iter == panal->gd.nInitIter */

  /* keep the value of theta */
  for (i = 0; i < nSubjs; i++)
    for (j = 0; j < nParms; j++) pdTheta_old[i][j] = pdTheta[i][j];

  /* make sure that the model has been computed for all subjects
   */
  if (!bModelComputed) {
    i = 0;
    do {
      bModelComputed = Estimate_y (panal, i, plSubjsIndex, pdTheta[i],
                                   pdPred);
      i++;
    } while (bModelComputed && (i < nSubjs));
  }

  if (!bModelComputed) {
    /* if we cannot even get compute the current model we
       cannot event start */
    printf ("Error: cannot update pdTheta - Exiting.\n");
    exit (0);
  }

  /* so the model has been computed with theta_old: proceed
     and compute the log of the conditional density dLnPold at this point
   */

  /* first the prior */
  dLnPold = 0.0;
  for (i = 0; i < nSubjs; i++)
    for (j = 0; j < nParms; j++)
      dLnPold = dLnPold + LnPrior(panal->mc.rgpMCVar[j], nSubjs,
                                  pdTheta[i][j], pdMu[j], pdKsi[j]);

  /* then the likelihood */
  dLnPold = dLnPold + LnLikelihood(panal, pdData, pdPred, plSigmaIndex,
                                   nData, pdSigma, nSigma);

  /* sample a new pdTheta vector out of the posterior: try only once.
     more without printing would undersample extreme values of
     the variables out of the vector */

  /* generate new pdTheta vector in the range of each element */
  for (i = 0; i < nSubjs; i++)
    do {
      bOutBound = FALSE;
    
      for (j = 0; j < nParms; j++) dNormVar[j] = NormalRandom(0, 1);

      for (j = 0; j < nParms; j++) {
        pMCVar = panal->mc.rgpMCVar[j];

        dTmp = 0;
        for (k = 0; k <= j; k++) /* only the non-zero part of prgdComponent */
          dTmp = dTmp + dNormVar[k] * prgdComponent[i][j][k];

        pdTheta[i][j] = pdTheta_old[i][j] + dJumpSpread * dTmp;

        if ((pMCVar->iType != MCV_NORMAL) && (pMCVar->iType != MCV_LOGNORMAL))
         bOutBound = ((pdTheta[i][j] < pMCVar->uMin.dval) ||
                      (pdTheta[i][j] > pMCVar->uMax.dval));

        if (bOutBound) j = nParms; /* force loop exit */
      }
    } while (bOutBound);

  /* compute the model at the newly drawn point
   */
  i = 0;
  do {
    bModelOK = Estimate_y (panal, i, plSubjsIndex, pdTheta[i], pdNewPred);
    i++;
  } while (bModelOK && (i < nSubjs));

  if (!bModelOK) {
    printf ("pdTheta not updated - Continuing.\n");
    /* restore pdTheta */
    for (i = 0; i < nSubjs; i++)
      for (j = 0; j < nParms; j++) pdTheta[i][j] = pdTheta_old[i][j];
  }
  else {
    /* the new point gives a computable model
       compute the log of the prior for this point
     */
    dLnPnew = 0.0;
    for (i = 0; i < nSubjs; i++)
      for (j = 0; j < nParms; j++)
        dLnPnew = dLnPnew + LnPrior(panal->mc.rgpMCVar[j], nSubjs,
                                    pdTheta[i][j], pdMu[j], pdKsi[j]);

    /* compute the loglikelihood of the data for this point */
    dLnPnew = dLnPnew + LnLikelihood(panal, pdData, pdNewPred, plSigmaIndex,
                                     nData, pdSigma, nSigma);

    /* Test for a Metropolis-Hastings jump. Only calc the P() of the
       current pdTheta, as the others are constant and would cancel out.
       The normal proposal used is symetrical and the proposal density
       terms cancel out
     */

    dPjump = exp(dLnPnew - dLnPold);

    if (Randoms() > dPjump) { /* don't jump, restore pdTheta */
      for (i = 0; i < nSubjs; i++)
        for (j = 0; j < nParms; j++) pdTheta[i][j] = pdTheta_old[i][j];
    }
    else {
      lAccepted++; /* this is used above to adjust the acceptation rate */
      for (i = 0; i < nData; i++) pdPred[i] = pdNewPred[i]; /* update pdPred */
    }

  } /* else */

} /* Sample_theta_by_vector */


/* ----------------------------------------------------------------------------
   UpdateArrays

   update the running sums.
*/

void UpdateArrays (long nParms, long nSubjs, double **pdTheta, 
                   double **pdSum, double ***prgdSumProd)
{
  long i, j, k;

  /* update pdSum */
  for (i = 0; i < nSubjs; i++)
    for (j = 0; j < nParms; j++)
      pdSum[i][j] = pdSum[i][j] + pdTheta[i][j];

  /* update prgdSumProd */
  for (i = 0; i < nSubjs; i++)
    for (j = 0; j < nParms; j++)
      for (k = 0; k < nParms; k++)
        prgdSumProd[i][j][k] = prgdSumProd[i][j][k] + 
                               pdTheta[i][j] * pdTheta[i][k];

} /* UpdateArrays */


/* ----------------------------------------------------------------------------
   WriteMarkovOut

   writes to the Markov output file.
*/

void WriteMarkovOut (FILE *pfileOut, long iter, double **pdTheta,
                     long nSubjs, long nParms, double *pdMu, double *pdKsi,
                     double *pdSigma, long nSigma, PANALYSIS panal,
                     double *pdData, double *pdPred, long *plSigmaIndex,
                     long nData)
{
  long i, j;
  double dTmp1, dTmp2;

  /* Show which iteration this is */
  fprintf(pfileOut, "%ld\t", iter);

  /* print the population parameters */
  if (nSubjs > 1) {
    WriteArray(pfileOut, nParms, pdMu);  fputc ('\t', pfileOut);
    WriteArray(pfileOut, nParms, pdKsi); fputc ('\t', pfileOut);
  }

  /* print the subjects' parameters */
  for (i = 0; i < nSubjs; i++) {
    WriteArray(pfileOut, nParms, pdTheta[i]);
    fputc ('\t', pfileOut);
  }

  /* print the experimental variances */
  WriteArray(pfileOut, nSigma, pdSigma); fputc ('\t', pfileOut);

  /* compute the log of the prior */
  dTmp1 = 0;
  for (i = 0; i < nSubjs; i++)
    for (j = 0; j < nParms; j++)
    dTmp1 = dTmp1 + LnPrior(panal->mc.rgpMCVar[j], nSubjs, pdTheta[i][j],
                            pdMu[j], pdKsi[j]);

  /* compute the loglikelihood */
  dTmp2 = LnLikelihood(panal, pdData, pdPred, plSigmaIndex,
                       nData, pdSigma, nSigma);

  /* print the logprior, the loglikelihood and the logposterior */
  fprintf(pfileOut, "%g\t%g\t%g\n", dTmp1, dTmp2, dTmp1+dTmp2);

  /* Let us look at the results after each iteration */
  fflush (pfileOut);

} /* WriteMarkovOut */


/* ----------------------------------------------------------------------------
   CloseMarkovFiles

   Closes output files associated with the Markov sampler.
   The restart file has already been closed by the ReadChain

*/

void CloseMarkovFiles (PANALYSIS panal)
{
  if (panal->gd.pfileGibOut) {
    fclose (panal->gd.pfileGibOut);
    printf ("\nWrote results to \"%s\"\n", panal->gd.szGout);
  }

} /* CloseMarkovFiles */


/* ----------------------------------------------------------------------------
   DoMarkov

   Core routine of the MCMC sampler
*/

void DoMarkov (PANALYSIS panal)
{
  PGIBBSDATA  pgd = &panal->gd;
  PMONTECARLO pmc = &panal->mc;

  long iter = 0;
  long nData;                 /* number of data points */
  long nSigma;                /* number of experimental SDs */
  long nSubjs;                /* number of subjects investigated */
  long nIter = pgd->nMaxIter; /* scheduled iterations of the sampler */
  long *plSigmaIndex;         /* index array for pdSigma computation */
  long *plSubjsIndex;         /* index array of subject labels */
  double *pdData;             /* observations */
  double *pdKsi;              /* population SDs */
  double *pdKsi_prior;        /* priors of the population SDs */
  double *pdMu;               /* population means */
  double *pdNewPred;          /* proposed predictions, all subjects mixed */
  double *pdPred;             /* predictions, all subjects mixed */
  double *pdSigma;            /* experimental standard deviations */
  double **pdSum;             /* parameters running sums, bu subjects */
  double **pdTheta;           /* model parameters sampled, by subjects */
  double ***prgdSumProd;      /* params cross products running sums, by subj. */

  InitRandom (panal->dSeed, TRUE);

  /* which experiments are assigned to which subjects ? */
  GetNSubjects (panal, &nSubjs, &plSubjsIndex);

  /* for multivariate sampling the mean vector and covariance matrix of all
     parameters sampled (or at least over a significant number of iterations)
     needs to be evaluated. The running sums of the parameters and their
     cross products are stored and the corresponding arrays are allocated here
   */
  InitArrays (pmc->nParms, nSubjs, &pdSum, &prgdSumProd);

  /* announce the work to be done
   */
  printf ("\nDoing analysis - %ld %s iteration%s- %d experiment%c%s\n",
          nIter, "MCMC sampling", (nIter != 1 ? "s " : " "),
          panal->expGlobal.iExp,
          (panal->expGlobal.iExp > 1 ? 's' : ' '),
          (nIter != 1 ? " each" : " "));

  /* open restart and output files */
  OpenMarkovFiles (panal);

  /* Initialize the data and predictions arrays
   */
  InitData (panal, &pdData, &pdNewPred, &pdPred, &nData);

  /* initialize the population means
   */
  Init_mu (pmc, pmc->nParms, nSubjs, &pdMu);

  /* initialize the population variances
   */
  Init_ksi (pmc->nParms, nSubjs, &pdKsi_prior, &pdKsi);

  /* initialize the experimental variances
   */
  Init_sigma (panal, nData, &pdSigma, &nSigma, &plSigmaIndex);

  /* now that we know nSigma we can write out the header of the output file */
  WriteOutHeader (pgd->pfileGibOut, panal, nSubjs, plSubjsIndex, nSigma);

  /* Initialize the individual parameters to be sampled
   */
  if (!(pgd->szGrestart)) {
    /* normal initialization */
    Init_theta (panal, nSubjs, plSubjsIndex, pmc->nParms, pdMu, pdKsi,
                &pdTheta, pdPred);

    /* print the starting values */
    WriteMarkovOut (pgd->pfileGibOut, iter, pdTheta, nSubjs, pmc->nParms,
                    pdMu, pdKsi, pdSigma, nSigma, panal, pdData, pdPred,
                    plSigmaIndex, nData);
  }
  else {
    /* read the starting values in the order they are printed and
       close the file when finished */
    ReadChain (pgd->pfileRestart, nSubjs, pmc->nParms, pdMu, pdKsi, &pdTheta,
               &pdSigma, nSigma, pdSum, prgdSumProd, pgd->nInitIter, &iter);
  }

  /* start the loop ...
   */
  while (iter < nIter) {

    Sample_ksi (pmc, pmc->nParms, nSubjs, pdTheta, pdMu, pdKsi_prior, pdKsi);

    Sample_mu  (pmc, pmc->nParms, nSubjs, pdTheta, pdKsi, pdMu);

    Sample_sigma (panal, pdData, pdPred, nSubjs, plSubjsIndex,
                  plSigmaIndex, nData, nSigma, pdTheta, pdSigma);

    if(iter < pgd->nInitIter)
      Sample_theta (panal, pmc->nParms, pdData, pdNewPred, pdPred, nSubjs,
                    plSubjsIndex, plSigmaIndex, nData, pdSigma, nSigma, pdMu,
                    pdKsi, pdTheta, iter);
    else
      Sample_theta_by_vector (panal, pmc->nParms, pdData, pdNewPred, pdPred,
                              nSubjs, plSubjsIndex, plSigmaIndex, nData,
                              pdSigma, nSigma, pdMu, pdKsi, pdTheta, pdSum,
                              prgdSumProd, iter);

    /* Thus completes one iteration of the sampler */

    /* update pdSum and prgdSumProd */
    UpdateArrays (pmc->nParms, nSubjs, pdTheta, pdSum, prgdSumProd);

    /* increment iter */
    iter = iter + 1;

    if ((iter % pgd->nPrintFreq == 0) && (iter > nIter - pgd->nPrintIter)) {
#ifdef _MACOS_
      printf ("iter %ld\n", iter);
#endif
      WriteMarkovOut (pgd->pfileGibOut, iter, pdTheta, nSubjs, pmc->nParms,
                      pdMu, pdKsi, pdSigma, nSigma, panal, pdData, pdPred,
                      plSigmaIndex, nData);
    }

  } /* while iter */

  CloseMarkovFiles (panal);

} /* DoMarkov */
