#ifndef lint
static char SCCSid[] = "@(#) ./comm/global/gsetopp.c 07/23/93";
#endif

#include "comm/comm.h"
#include "comm/procset.h"
#include "comm/global/global.h"
#include <stdio.h>

/*
    This directory contains pipelined versions of the reduction
    operations.
 */    
/*+
    gsetopTP - Tree-oriented combine operation.

    Note:
    This version includes an option for pipelining the combination.  One
    complication is that we'd like to continue to use the FORCE version
    for at least the first packet returning down the tree (and certainly
    the asyncrhonous version).

    Note that if we start returning packets down the tree while packets
    are still coming up the tree, there is the possibility of deadlock
    when two neighbors are trying to send to each other (using
    asynchronous communication isn't a solution, since not all implementation
    layers support it).

    This can be avoided by using an exchange protocol, for instance,
    sending a null message down when return packets are available, and
    then switching to a hand-shake protocol until all up-going packets
    are gone.

    For now, we won't bother with this.
+*/ 
void gsetopTP( val, n, work, procset, elmsize, datatype, op )
char    *val, *work;
void    (*op)();
int     n, elmsize, datatype;
ProcSet *procset;
{
int size = n * elmsize;
int l_child, r_child, parent, am_left, rid;
int msgup, msgdn, lphase;
int pktcnt = 0, pktlen, pkttest;
int elen, rlen;
int upcnt, upoffset, dncnt, dnoffset;

/* printf( "[%d] doing gsetop with size = %d\n", MYPROCID, size ); */
if (!procset) {
    if (_PIPARENT < -1)
	PISetupCollectiveTree( );
    l_child = _PILCHILD;
    r_child = _PIRCHILD;
    parent  = _PIPARENT;
    am_left = _PIAM_LEFT;
    _PIPHASE= _PIPHASE ? 0 : 1;
    lphase  = _PIPHASE;
    }
else {
    l_child = procset->l_child;
    r_child = procset->r_child;
    parent  = procset->parent;
    am_left = procset->am_left;
    GMSGPHASE(procset,lphase);
    }

if (_PIPKTSIZE > 0) {
    pktlen = _PIPKTSIZE;
    elen   = pktlen / elmsize;
    pktlen = elen * elmsize;
    }
else {
    pktlen = size;
    elen   = n;
    }
pkttest = pktlen * 2;

upcnt    = 0;
upoffset = 0;
while (upoffset < size) {
    /* Avoid small packets (this code forces the last packet onto
       the last-1 packet) */
    if (pkttest < (size-upoffset)) 
	rlen = pktlen;
    else {
	rlen = (size-upoffset);
	elen = rlen / elmsize;
	}

    msgup = GMSGTYPEPKT(procset,MSG_UP|lphase,upcnt,0);

    /* Receive values from my children and accumulate */
    /* We need to compute the number of elements */
    if (l_child >= 0) {
        RECVSYNCNOMEM(msgup | MSG_LEFT, work,rlen,datatype);
        (*op)( val+upoffset, work, elen );
    }
    if (r_child >= 0) {
        RECVSYNCNOMEM(msgup | MSG_RIGHT, work,rlen,datatype);
        (*op)( val+upoffset, work, elen );
        }
    /* Send to parent */
    if (parent >= 0) {
	if (am_left) msgup |= MSG_LEFT;
	else         msgup |= MSG_RIGHT;
        SENDSYNCNOMEM(msgup,val+upoffset,rlen,parent,datatype);
        }
    upoffset += rlen;
    upcnt ++;
    }

/* We could issue async receives (at least some) during the up phase */

dnoffset = 0;
dncnt    = 0;
/* Distribute final value back down */
while (dnoffset < size) {
    if (pkttest < (size-dnoffset)) rlen = pktlen;
    else                           rlen = (size-dnoffset);
    
    msgup = GMSGTYPEPKT(procset,MSG_DN|lphase,dncnt,0);
    if (parent >= 0) {
        RECVSYNCNOMEM(msgdn,val+dnoffset,rlen,datatype);
        }
    if (l_child >= 0)
        SENDSYNCNOMEM(msgdn,val+dnoffset,rlen,l_child,datatype);
    if (r_child >= 0)
        SENDSYNCNOMEM(msgdn,val+dnoffset,rlen,r_child,datatype);
    dnoffset += rlen;
    dncnt ++;
    }
}
