#ifndef lint
static char SCCSid[] = "@(#) ./blog/save/blogat.c 07/23/93";
#endif

#define DBUG(a)
/* #define DBUG(a) {printf("%s",a);fflush(stdout);} */

/*
   This file contains the routines to adjust logfiles before they
   are actually FILES.

   The timer model used here is

       global_time = local_time * skew + local_offset

   The local times are computed as a delta from time when the logging
   is initialized (see xx_BLOG_tinit and SYuscDiff).

   Because all of these corrections are approximate, they can be selectively
   disabled.
 */

#include <stdio.h>
#include <strings.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "tools.h"
#include "comm/comm.h"
#include "blog/blog.h"
#include "system/system.h"
typedef struct _blog { struct _blog *next; int size; } BLOG_BLOCK;
extern BLOG_BLOCK *xx_BLOG_get_blog_ptr();
static SYusc_time_t xx_BLOG_tinit;

typedef struct {
    double  a1, b1, a2;         /* Times for the events */
    int     p1;                 /* processor that participated in
					 this time-exchange */
    } OffsetEvents;

static OffsetEvents *offsetevents;
static int          noffsets       = 0;
static int          disableAll     = 0,
		    disableSkew    = 0,
		    disableOffsets = 0,
		    printOffsets   = 0;

/* This routine is called by the BLOG initialization routine to
   set the 0-point for the clocks */		    
xx_BLOG_init_clock()
{
SYusc_clock( &xx_BLOG_tinit );
}

/* Scan through a log record and determine the offset events
   We should change csync.c to generate events that contain within them
   the time stamp.  Note that we'll either have to generate a double or
   send the 0-pt along as well.
 */   
int BLOGGetOffsetEvents()
{
int               xx_i, procid, *bp, n, np, i, j, cnt;
SYusc_time_t      *t;
BLOG_BLOCK        *bl;
int               *work;
BLOG_HEADER       *ap;
BLOG_VFIELD       *v;

DBUG("Getting offset events\n" );
np     = NUMNODES;
work   = (int *)MALLOC( np * sizeof(OffsetEvents) );    CHKPTRV(work,0);
offsetevents = (OffsetEvents *)MALLOC( np * sizeof(OffsetEvents) );
CHKPTRV(offsetevents,0);               
for (i=0; i<np; i++) {
    offsetevents[i].a1 = 0.0;
    offsetevents[i].a2 = 0.0;
    offsetevents[i].b1 = 0.0;
    offsetevents[i].p1 = 0;
    }
cnt = 0;
i   = MYPROCID;
bl  = xx_BLOG_get_blog_ptr();
while (bl) {
    n      = bl->size;
    bp     = (int *)(bl + 1);
    xx_i   = 0;
    /* printf( "[%d] checking block, size %d\n", MYPROCID, n ); */
    while (xx_i < n) {
	ap = (BLOG_HEADER *)bp;
	/* printf( "[%d] block type %d\n", MYPROCID, ap->event ); */
	if (ap->event == BLOG_EVENT_PAIR_A1) {
	    offsetevents[i].a1 = ap->time.s2;
	    cnt++;
	    }
	else if (ap->event == BLOG_EVENT_PAIR_A2) {
	    offsetevents[i].a2 = ap->time.s2;
	    }
	else if (ap->event == BLOG_EVENT_PAIR_B1) {
	    v                  = (BLOG_VFIELD *)(ap + 1);
	    j                  = v->other[0];
	    if (j < 0 || j >= np) {
		fprintf( stderr, "[%d] bad offset info (%d)\n", MYPROCID, j );
		}
	    else {
		offsetevents[j].b1 = ap->time.s2;
		offsetevents[j].p1 = i;
		}
	    }
	if (ap->len <= 0) {
	    fprintf( stderr, "Malformed BLOG block\n" );
	    break;
	    }
	xx_i += ap->len;
	bp   += ap->len;
	}
    bl = bl->next;
    }
/* Now, do a global merge */
#ifdef FOO
{int i;
for (i=0; i<=NUMNODES; i++)
if (GTOKEN(ALLPROCS,i)){
    printf( "[%d]\n", i );BLOGDumpOffsetEvents( );fflush(stdout);}
GSYNC(ALLPROCS);
}
#endif
/* printf( "[%d]About to do a GIOR\n", MYPROCID ); */
GIOR( offsetevents, 
      (np * sizeof(OffsetEvents)) / sizeof(int), work, ALLPROCS );
/* printf( "[%d] Done with GIOR\n", MYPROCID ); fflush( stdout ); */
FREE(work);
noffsets = cnt;
GISUM( &noffsets, 1, &cnt, ALLPROCS );
/* printf( "[%d] cnt = %d in get offsets\n", MYPROCID, noffsets ); 
   fflush(stdout);  */
return 1;
}

/*
    This routine takes offset events and solves for the offsets.  The
    approach is:

    Let the global time be given by (local_time - offset)*scale ,
    with a different offset and scale on each processor.  Each processor
    originates exactly one communication event (except processor 0),
    generating an a1 and a2 event.  A corresponding number of b2 events
    are generated, but note that one processor may have more than 1 b2
    event (if using Dunnigan's synchronization, there will be np-1 b2 events
    on processor 0, and none anywhere else).

    These events are:

   pi   a1 (send to nbr)                        (recv) a2
   pj                     (recv) b1 (send back)

    We base the analysis on the assumption that in the GLOBAL time
    repreresentation, a2-a1 is twice the time to do a (send) and
    a (recv).  This is equivalent to assuming that global((a1+a2)/2) ==
    global(b1).  Then, with the unknowns the offsets (the scales
    are assumed known from the syncevent calculation), the matrix is

    1
    -s0 s1
       ....
       -sj ... si

    where si is the scale for the i'th processor (note s0 = 1).
    The right hand sides are (1/2)(a1(i)+a2(i)) *s(i) - b1(j)*s(j).
    Because of the triangular nature of the matrix, this reduces to

       o(i) = (a1(i)+a2(i))/2 - (s(j)/s(i)) * (b1(j)-o(j))

    Note that if s(i)==s(j) and b1 == (a1+a2)/2, this gives o(i)==o(j).

    This works with ANY triangular matrix; we can use a master-slave
    version (all exchange with processor 0), a log-tree version
    (everyone exchanges with binary tree parent), or a linear version
    (2p+1 exchanges with 2p).  Others are possible.    

    This returns the offset and skew for the calling processor only.
 */
void BLOGComputeOffsets( np, skew, goff )
int    np;
double *skew, *goff;
{
int    i, j;
double d1, delta;
double *globaloffset;

/* If there aren't enough events, return */
if (noffsets != np - 1) {
    return;
    }

globaloffset = (double *)MALLOC( np * sizeof(double) );   CHKPTR(globaloffset);
/* Take globaloffset[0] from sync */
globaloffset[0] = *goff;
/* This code isn't right */
for (i=1; i<np; i++) {
    /* o(i) = (a1(i)+a2(i))/2 - (s(j)/s(i)) * (b1(j)-o(j)) */
    j     = offsetevents[i].p1;
    d1    = (offsetevents[i].a2 + offsetevents[i].a1)/2;
    delta = (skew[j] / skew[i]) * (offsetevents[i].b1 - globaloffset[j] );

    globaloffset[i] = d1 - delta;
    }
*goff = globaloffset[MYPROCID];
FREE( globaloffset );
}

/* for debugging */
BLOGDumpOffsetEvents( )
{
int i;
for (i=0; i<NUMNODES; i++) {
    fprintf( stdout, "[%f,%f,%f] with [%d]\n", offsetevents[i].a1, 
	    offsetevents[i].b1, offsetevents[i].a2, 
	    offsetevents[i].p1 );
    }
}

/* Convert the time "cookies" to usecs.  This is done carefully to 
   preserve precision.  HOWEVER, this may perturb the data on 
   machines that have synchronous clocks. NOTE that the time is
   stored as DOUBLES after this routine is called. */
void xx_BLOG_adjtime1()
{
int               xx_i, procid, *bp, n;
SYusc_time_t      *t;
double            dif;
BLOG_BLOCK        *bl;
BLOG_HEADER       *ap;

bl         = xx_BLOG_get_blog_ptr();
while (bl) {
    n      = bl->size;
    bp     = (int *)(bl + 1);
    xx_i   = 0;
    while (xx_i < n) {
	ap    = (BLOG_HEADER *)bp;
	t     = &ap->time;
	if (t->s1[0] != 0 || t->s1[1] != 0) {
	    dif   = SYuscDiff( &xx_BLOG_tinit, t );
	    ap->time.s2 = dif;
	    }
	xx_i += ap->len;
	bp   += ap->len;
	}
    bl = bl->next;
    }
}

/*
   Find skew and a basic offset based on the global sync events in the
   log.  Returns 1 if values found, 0 otherwise.
 */
int xx_BLOG_FindSkew( ps, Skew, Goff )
ProcSet *ps;
double  *Skew, *Goff;
{
int               xx_i, procid, *bp, n;
SYusc_time_t      *t;
double            sync_start, sync_end, tim, v0[2], sk, goff, time, gt;
int               found_first = 0, nsync = 0;
int               xx_save;
BLOG_BLOCK        *bl;
BLOG_HEADER       *ap;

/* printf( "[%d] starting adj 2\n", MYPROCID ); */
bl         = xx_BLOG_get_blog_ptr();
while (bl) {
    n      = bl->size;
    bp     = (int *)(bl + 1);
    xx_i   = 0;
    while (xx_i < n) {
	ap   = (BLOG_HEADER *)bp;
	if (ap->event == BLOG_EVENT_SYNC) {
	    tim   = ap->time.s2;
	    if (found_first) 
		sync_end   = tim;
	    else {
		sync_start  = tim;
		found_first = 1;
		}
	    nsync ++;
	    }
	xx_i += ap->len;
	bp   += ap->len;
	}
    bl = bl->next;
    }

/* Trade the information (for more complete adjustments, everyone needs
   the skews and global offsets) */
v0[0] = sync_start;
v0[1] = sync_end;
/* Don't add yet more events! */
#undef LOGOPSTART
#undef LOGOPEND
#define LOGOPSTART(n)
#define LOGOPEND(n)
DBUG("About to do a scatter\n" );
GSCATTER( v0, 2*sizeof(double), MYPROCID == 0, ps, MSG_DBL );

/* Don't do anything if there aren't enough values */
if (nsync < 2) {
    xx_BLOG_status = xx_save;
    return 0;
    }

/* printf( "[%d] scaling times \n", MYPROCID ); */
*Skew = (v0[1] - v0[0]) / (sync_end - sync_start);
*Goff = v0[0];
return 1;
}

/*
   Find offsets from the offset events, given a skew.
   Return 1 on success, 0 on failure.
 */
int xx_BLOG_FindLocalOffset( ps, sk, Goff )
ProcSet *ps;
double  sk, *Goff;
{
int               xx_i, procid, *bp, n;
SYusc_time_t      *t;
double            tim, goff, time, gt;
int               xx_save;
BLOG_BLOCK        *bl;
BLOG_HEADER       *ap;

/* This code looks for offset events and tries to correct the local clocks
   (GSYNC isn't very accurate for the offsets, though it is better than
   nothing) */
if (BLOGGetOffsetEvents()) {
    double *skew, *w;
    int    i;
    /* printf( "[%d] about to get the offset events\n", MYPROCID ); */
    /* if (MYPROCID == 0)
	BLOGDumpOffsetEvents( ); */
    
    /* Get the skews for all of the clocks */
    skew = (double *)MALLOC( NUMNODES * sizeof(double) );   CHKPTR(skew);
    w    = skew + NUMNODES;
    for (i=0; i<NUMNODES; i++) skew[i] = 1.0;  /* for now, could use
						gcolx */
    /* printf( "[%d] skew = %f offset = %f\n", MYPROCID, sk, goff ); */
    BLOGComputeOffsets( NUMNODES, skew, &goff );
    FREE( skew );
    /* printf( "[%d] skew = %f offset = %f loffset = %f\n", 
	     MYPROCID, sk, goff, sync_start ); */
    }

/* goff should really be the adjusted by subtracting off the minimum of all
   of the goff's */
gt = goff;
GDMIN( &gt, 1, &time, ps );
goff -= gt;

return 1;
}

void xx_BLOG_ApplyTimeCorrection( ps, sk, goff )
ProcSet *ps;
double  sk, goff;
{
int               xx_i, procid, *bp, n;
SYusc_time_t      *t;
double            tim, time, gt;
int               xx_save;
BLOG_BLOCK        *bl;
BLOG_HEADER       *ap;

bl = xx_BLOG_get_blog_ptr();
while (bl) {
    n      = bl->size;
    bp     = (int *)(bl + 1);
    xx_i   = 0;
    while (xx_i < n) {
	ap    = (BLOG_HEADER *)bp;
	t     = (SYusc_time_t *)(&ap->time);
	if (t->s1[0] != 0 || t->s1[1] != 0) {
	    time  = ap->time.s2;
	    gt    = time * sk + goff;
	    /* The following is a hack! */
	    /* printf( "[%d] o = %f n = %f\n", MYPROCID, time, gt ); */
	    if (gt < 0.0) gt = 0.0;
	    /* End of hack */
	    /* This should really be
	       if (gt > maxunsignedlong) ap[2] = gt / maxunsignedlong;
	       else
	     */  
	    ap->time.s1[0] = 0;
	    ap->time.s1[1] = (unsigned long)(gt * 1.0e6);
	    }
	xx_i += ap->len;
	bp   += ap->len;
	}
    bl = bl->next;
    }
}

/*
   Find offsets for a procset.
 */
void xx_BLOG_adjtime2( ps )
ProcSet *ps;
{
int     i;	
double  sk, goff;

/* printf( "[%d] starting adj 2\n", MYPROCID ); */
/* Set the defaults, just in case */
sk   = 1.0;
goff = 0.0;
if (!disableSkew && !xx_BLOG_FindSkew( ps, &sk, &goff )) return;

if (!disableOffsets && !xx_BLOG_FindLocalOffset( ps, sk, &goff )) return;

if (printOffsets) {
    for (i=0; i<=NUMNODES; i++) {
    	if (GTOKEN(ps,i)) {
    	    fprintf( stdout, "[%d] local * %f + %f\n", MYPROCID, sk, goff );
    	    }
        }
    }
xx_BLOG_ApplyTimeCorrection( sk, goff );
/* printf( "[%d] Done adjusting times\n", MYPROCID ); */
}


static int AdjustedTimes = 0;
xx_BLOG_adjusttimes()
{
int xx_save;

if (AdjustedTimes) return;

DBUG("About to convert cookies to times\n");
xx_BLOG_adjtime1();

if (disableAll) return;

DBUG("Did 1, about to do 2\n" );
xx_save = xx_BLOG_status;
xx_BLOG_status = 0;
xx_BLOG_adjtime2((ProcSet*)0);
xx_BLOG_status = xx_save;
AdjustedTimes = 1;
/* printf( "Adjusted times\n" ); fflush(stdout); */
BLOG_DISABLE;
}
