// This may look like C code, but it is really -*- C++ -*-
// 
// Copyright (C) 1988 University of Illinois, Urbana, Illinois
//
// written by Dirk Grunwald (grunwald@cs.uiuc.edu)
//

#ifdef __GNUG__
#  pragma implementation
#endif

#include "MultiCpuMux.h"
#include "CpuMuxP.h"
#include "HardwareContextP.h"
#include "SpinLock.h"
#include "SpinBarrier.h"
#include "SpinFetchAndOp.h"
#include "Thread.h"
#include "ThreadContainer.h"
#include "ReserveByException.h"
#include "Pragma.h"
#include <math.h>
#include <signal.h>
#include <stdlib.h>

extern "C" {
    extern void mkdir(char*, int);
    extern int wait(void*);
};

//
//	Things left to do:
//
//	+ Capture signals, transfer them to an Exception class. Can
//	  use this to implement time-slices & the like, as well as....
//
//	+ Put in *addCpu* and *removeCpu* calls to CpuMux.
//	  This would allow run-time addition/removal of CPUS, so
//	  you can tailor your program to system 
//		This is tricky. Should probably do it when you
//		advance the clock, but it'll be tricky to get all
//		the CPUs to agree on the barrier height for the
//		rendezvous. Also might complicate the *distinct
//		pools of threads per cpu*.
//


static SpinFetchAndOp GlobalCurrentEventsCounter(0);
static SpinLock GivingUpLock;
static VolatileInt GivingUpCounter = 0;
static VolatileInt GivingUpGeneration = 0;

static SpinBarrier CpuBarrier(1);

//
//	A currentEvents pile for each processor. The count is only correct
//	if you have reserved the spin lock -- its used as a guess.
//
//	This should be made a private data type to encapsulate the
//	manipulations of the data structures
//
static SpinLock CurrentEventsLock[_MAXIMUM_CPU_MUXS_];
static VolatileInt CurrentEventsCounter[_MAXIMUM_CPU_MUXS_];
static ThreadContainer *CurrentEvents[_MAXIMUM_CPU_MUXS_];

void
MultiCpuMux::AddToCpu(int cpu, Thread *who)
{
    CurrentEventsLock[cpu].reserve();
    CurrentEvents[cpu] -> add( who );
    CurrentEventsCounter[cpu]++;
    GlobalCurrentEventsCounter += 1;
    CurrentEventsLock[cpu].release();
}

//
// Possibly remove something from a given CPU queue; we err on the 
// side of optimization
//
Thread *
MultiCpuMux::RemoveFromCpu(int cpu)
{
    Thread *x = 0;

    if ( CurrentEventsCounter[cpu] != 0 
	&& CurrentEventsLock[cpu].reserveNoBlock() ) {
	if ( CurrentEventsCounter[cpu] > 0 ) {
	    x = CurrentEvents[cpu] -> remove();
	    CurrentEventsCounter[cpu]--;
	    GlobalCurrentEventsCounter -= 1;
	} 
	CurrentEventsLock[cpu].release();
    }
    return( x );
}

#ifdef _PARANOID_
//
// RealityCheck counts the number of tasks in each queue
// and compares that to the 
void
CpuMuxRealityCheck(char *name, int line)
{

    int cpu;
    for ( cpu = 0; cpu < CpuMux::Muxs; cpu++ ) {
	CurrentEventsLock[cpu].reserve();
    }

    int inQueues = 0;
    int inQueue[CpuMux::Muxs];
    int inGlobal = GlobalCurrentEventsCounter.value();

    for ( cpu = 0; cpu < CpuMux::Muxs; cpu++ ) {

	if ( CurrentEventsCounter[cpu] < 0 ) {
	    cerr << "BAD NEWS: CurrentEventsCounter[" << cpu << "] = ";
	    cerr << CurrentEventsCounter[cpu] << "\n";
	}
	inQueue[ cpu ] = CurrentEventsCounter[cpu];
	inQueues += inQueue[ cpu ];
    }

    for ( cpu = 0; cpu < CpuMux::Muxs; cpu++ ) {
	CurrentEventsLock[cpu].release();
    }

    if ( inQueues != inGlobal ) {
	CERR_ALWAYS_PRE;

	cerr << name << "Found " << inQueues << " in queues ";
	cerr << "and " << inGlobal << " in global counter\n";
	cerr << name << " at line " << line << "\n";

	for (cpu = 0; cpu < CpuMux::Muxs; cpu++ ) {
	    cerr << "Cpu " << cpu << " has " << inQueue[cpu] << "\n";
	}
	CERR_POST;
    }
}
#endif

MultiCpuMux::MultiCpuMux(bool debug) : (debug)
{
    pNameTemplate = "MultiCpuMux";
    iYam = 0;
    CpuMux::Debug = debug;
}

MultiCpuMux::~MultiCpuMux()
{
}

//
// Add a single CPU to a set of current CPUs. There is an advantage of
// having all child processes be spawned by CPU #0; all child signals
// will be caught by the single parent.
//
// This entry is called by a Thread.
//
void MultiCpuMux::enrollCpu()
{
    //
    // move thread to master process. There's a distinct possibility
    // that this guy will get stolen from Cpu #0 if everyone else is
    // looking for work.
    //
    while (iYam != 0) {
	currentThread -> affinity(0);
	relocateException.cpu(0);
	raise( &relocateException );
    }
    //
    // raise an exception to do the actual fork. This means that
    // control flow for the new child process will be in the
    // stirItAround loop, as opposed to here.
    //
    enrollDismissCpuException.enroll();
    raise( &enrollDismissCpuException );
    currentThread -> affinity(-1);
}

void
MultiCpuMux::dismissCpu()
{
    assert(0);
}

void
MultiCpuMux::allocateLocalEventStructures(int newIYam, int outOf)
{
    iYam = newIYam;
    sprintf(nameSpace, "[%s-%d] ", pNameTemplate, iYam);
    pName = nameSpace;
    
    CERR_PRE;
    cerr << name() << "Allocate CpuMux structures for new CPU (";
    cerr << newIYam << ")\n";
    CERR_POST;
    
    CurrentEventsCounter[iYam] = 0;
    CurrentEvents[iYam] = allocateThreadContainer();

    CERR_PRE;
    cerr << name() << "set CpuMux::Muxs to " << outOf << "\n";
    CERR_POST;
    
    CpuMux::MuxsLock.reserve();
    CpuMux::Muxs = outOf;
    CpuMux::MuxsLock.release();
    
    GivingUpLock.reserve();
    if (GivingUpCounter >= outOf) {
	GivingUpGeneration++;
	GivingUpCounter = 0;
    }
    GivingUpLock.release();
}

//
// Assumes iYam != 0
//
void
MultiCpuMux::deallocateEventStructures()
{
    assert( iYam != 0 );

    CERR_PRE;
    cerr << name() << "Deallocate CpuMux structures for " << iYam << "\n";
    CERR_POST;
    
    
    CurrentEventsLock[ iYam ].reserve();
    ThreadContainer *myEvents = CurrentEvents[ iYam ];
    //
    // Move remaining events to another queue. We're not adding new events,
    // just moving them around, so we don't increase GlobalCurrentEventsCounter
    //
    while ( CurrentEventsCounter[iYam] > 0 ) {
	CurrentEventsLock[0].reserve();
	assert(CurrentEvents[0] != 0);
	while( ! myEvents -> isEmpty() )  {
	    Thread *t = myEvents -> remove();
	    CurrentEvents[0] -> add( t );
	    CurrentEventsCounter[0]++;
	    CurrentEventsCounter[iYam]--;
	}
	CurrentEventsLock[0].release();
    }
    
    CpuMux::MuxsLock.reserve();
    CpuMux::Muxs--;
    
    GivingUpLock.reserve();
    if (GivingUpCounter >= CpuMux::Muxs) {
	GivingUpGeneration++;
	GivingUpCounter = 0;
    }
    GivingUpLock.release();
    
    CpuMux::MuxsLock.release();
    
    CERR_PRE;
    cerr << name() << "set CpuMux::Muxs to " << CpuMux::Muxs;
    cerr << " and trigger GivingUp\n";
    CERR_POST;
    
    delete CurrentEvents[iYam];
    CurrentEvents[iYam] = 0;
    CurrentEventsCounter[iYam] = 0;

    CurrentEventsLock[iYam].release();
}

static void YouKillMe(int)
{
    cerr << "Change in child state - has child has died?\n";
}

void
MultiCpuMux::fireItUp(int cpus, unsigned shared)
{
    assert(cpus > 0);
    
    if ( cpus > _MAXIMUM_CPU_MUXS_ ) {
	cpus = _MAXIMUM_CPU_MUXS_;
    }
    
    CERR_PRE;
    cerr << name() << "Allocate " << shared << " bytes of shared memory\n";
    CERR_POST;
    
    if ( cpus > 1 ) {
	extern void SharedMemoryInit( unsigned );
	SharedMemoryInit( shared * sizeof(HardwareContextQuad) );
    }
    
    //
    // Set the barrier height so everyone can rendezvous..
    //
    CpuBarrier.height(cpus);
    
    if ( iYam == 0 ) {
	signal(SIGCHLD, YouKillMe);
    }

    warmThePot(cpus);
    
    CERR_PRE;
    cerr << name() << " returns from warmThePot, join barrier\n";
    CERR_POST;
    
    CpuBarrier.rendezvous();
    
    stirItAround();

    if ( iYam == 0 ) {
	signal(SIGCHLD, SignalDefault);
    }

    coolItDown();
}

void
MultiCpuMux::warmThePot(int cpus)
{
    assert(cpus > 0);
    CpuMux::Muxs = cpus;
    enabled = 1;
    
    //
    //	Spawn the children, giving each a unique number from 0..(cpus-1).
    //  The first child gets id (cpus-1), and the original process gets 0.
    //
    
    iYam = 0;

    CERR_PRE;
    cerr << name() << "Allocate " << CpuMux::Muxs << " cpus\n";
    CERR_POST;
    
    for (int whoAmI = 1; whoAmI < CpuMux::Muxs; whoAmI++) {
	if (iYam == 0) {
	    int childPid = fork();
	    
	    if (childPid == 0) {	// child 
		
		CERR_PRE;
		cerr << getpid() << " is born,";
		CERR_POST;
		
		allocateLocalEventStructures(whoAmI, CpuMux::Muxs);
		break;
	    } else {
		
		CERR_PRE;
		cerr << name() << "Parent spawns child "<< childPid << "\n";
		CERR_POST;
		
		if ( pid == -1 ) {
		    cerr << "Error in spawn:\n";
		    perror("fork");
		    exit(99);
		}
	    }
	}
    }
    pid = getpid();
    
    CERR_PRE;
    cerr << name() << "I am now id " << iYam << " and pid " << pid <<" \n";

    //
    // give each child a distinct temporary directory for core dumps
    // when debugging.
    //

    char tempName[L_tmpnam];
    tmpnam(tempName);
    mkdir(tempName,0777);
    int xx = chdir(tempName);

    cerr << name() << "change dir to " << tempName;
    if ( xx >= 0 ) {
	cerr << " worked\n";
    } else {
	cerr << " did not work\n";
    }

    CERR_POST;

}

void
MultiCpuMux::coolItDown()
{
    if (iYam > 0) {
	
	CERR_PRE;
	cerr << name() << "exit\n";
	CERR_POST;
	
	deallocateEventStructures();
	_exit(0);
    }
    else {
	//
	//	reap the dead children. This way we know they are all dead.
	//	The caller can then safely exit.
	//
	while (CpuMux::Muxs > 1) {
	    int pid = wait(0);
	    if (pid == -1) {
		perror("wait");
		break;
	    }
	}
	//
	//  In case of break in above loop
	//
	CpuMux::Muxs = 1;
    }
}

void
MultiCpuMux::addToAnother_(int cpu, Thread *who)
{
    assert( cpu >= 0 && cpu < CpuMux::Muxs );
    AddToCpu(cpu, who);
}

void
MultiCpuMux::add_(Thread *who)
{
    CERR_PRE;
    if (who != 0 && who -> name() != 0) {
	cerr << name() << " add " << who -> name() << "\n";
    } else {
	cerr << name() << " add " << hex(long(who)) << "\n";
    }
    CERR_POST;
    
    AddToCpu(iYam,who);
}

void
MultiCpuMux::addReserve_()
{
    CurrentEventsLock[iYam].reserve();
    eventsAddedThisBatch = 0;
}

void
MultiCpuMux::addUnlocked_(Thread *thread)
{
    CurrentEvents[iYam] -> add(thread);
    CurrentEventsCounter[iYam]++;
    eventsAddedThisBatch++;
}

void
MultiCpuMux::addRelease_()
{
    GlobalCurrentEventsCounter += eventsAddedThisBatch;
    CurrentEventsLock[iYam].release();
}

Thread *
MultiCpuMux::remove()
{
    
    //
    // remov()::
    //
    // Check to see if there is a current event, either in our current
    // events queue or someone elses current events queue. If there is
    // nothing, return 0.
    //
    
    //
    // Optimizations in getting a thread:
    //

    // We are *not* locking before looking at CurrentEventsCount -- we
    // treat this as a *guess* before bothering to lock on it. Admittedly,
    // this can cause problems, but the global count of tasks is always
    // locked correctly, so we'll never make a mistake -- it's just that
    // we might be inefficient.
    //
    // Also, we use reserveNoBlock to avoid busy task queues -- this is
    // again an optimization that might bite us.

    Thread *threadToExecute = 0;
    
    if (*terminated) return(0);
    
    //
    //	Maybe someone else has something to do?
    //
    if ( GlobalCurrentEventsCounter.value() > 0 )  {
	int ask = iYam;
	
	do {
	    CERR_PRE;
	    cerr << name() << "Ask " << ask << " about events \n";
	    CERR_POST;
	    
	    threadToExecute = RemoveFromCpu(ask);
	    ask++;
	    if ( ask >= CpuMux::Muxs ) ask = 0;
	} while (ask != iYam && threadToExecute == 0);

    }
    else {
	CERR_PRE;
	cerr << name() << " Found no global events";
	cerr << " while looking for thread\n";
	CERR_POST;
	
    }

    //
    // Check that this thread is not trying to get to a specific CPU.
    //
    if (threadToExecute  != 0
	&& threadToExecute -> affinity() > 0
	&& threadToExecute -> affinity() != iYam) {
	
	CERR_PRE;
	cerr << name();
	cerr << "Returning thread because of afinity\n";
	CERR_POST;
				
	AddToCpu(threadToExecute -> affinity(), threadToExecute);
	threadToExecute = 0;
    }

    CERR_PRE;
    cerr << name() << "find ";
    if (threadToExecute == 0) {
	cerr << "nothing\n";
    } else {
	cerr << threadToExecute -> name() << "\n";
    }
    CERR_POST;
    
    return( threadToExecute );
}

//
// This is the job dispatcher.
//

void
MultiCpuMux::stirItAround()
{
    currentThread = 0;

    assert( CurrentEvents[ iYam ] != 0);
    
    if (!enabled) {
	cerr << "Need to initialize CpuMux before using it\n";
    }
    
    while( ! *terminated ) {
	while ( currentThread == 0 ) {
	    
	    currentThread = remove();
	    
	    //
	    // run if we got one
	    //
	    if (currentThread != 0) break;

	    //
	    // reloop if some should exist
	    //
	    if ( GlobalCurrentEventsCounter.value() > 0 ) continue;
	    
	    CERR_PRE;
	    cerr << name() << "check if I should quit\n";
	    CERR_POST;
	    
	    GivingUpLock.reserve();
	    
	    GivingUpCounter++;
	    
	    CERR_PRE;
	    cerr << name() << GivingUpCounter;
	    cerr << " CPUs attempting to give up\n";
	    CERR_POST;
	    
	    assert( GivingUpCounter > 0);
	    assert( GivingUpCounter <= CpuMux::Muxs);
	    
	    if ( GivingUpCounter == CpuMux::Muxs 
		&& GlobalCurrentEventsCounter.value() == 0)
	    {
		
		GivingUpGeneration ++;
		GivingUpCounter = 0;
		GivingUpLock.release();
		
		CERR_PRE;
		cerr << name() << "give up\n";
		cerr << name() << " GCEC = " << GlobalCurrentEventsCounter.value() << "\n";
		cerr << name() << "my CEC = " << CurrentEventsCounter[ iYam ] << "\n";
		CERR_POST;
		
		return;
	    }
	    else {
		
		//
		// Record the current generation of the the giving up
		// barrier -- we're going to give up only if every
		// one else agrees to give up and nothing new to do
		// comes along in the meantime.
		//
		
		int generation = GivingUpGeneration;
		VolatileInt *genp = &GivingUpGeneration;
		
		GivingUpLock.release();
		
		CERR_PRE;
		cerr << name() << " *genp = " << *genp << "\n";
		cerr << name() << " generation = " << generation << "\n";
		CERR_POST;

		while( generation == *genp
		      && GlobalCurrentEventsCounter.value() == 0
		      && !*terminated );
		

		GivingUpLock.reserve();
		if ( *genp != generation || *terminated ) {

		    CERR_PRE;
		    cerr << name() << " *genp = " << *genp << "\n";
		    cerr << name() << " generation = " << generation << "\n";
		    cerr << name() << " GCEC = " << GlobalCurrentEventsCounter.value() << "\n";
		    cerr << name() << " *terminated = " << *terminated << "\n";
		    cerr << name() << " giving up\n";
		    cerr << name() << "my CEC = " << CurrentEventsCounter[iYam] << "\n";
		    CERR_POST;
		    
		    GivingUpLock.release();
		    return;
		}
		else {
		    
		    CERR_PRE;
		    cerr << name() << " bail out\n";
		    CERR_POST;

		    GivingUpCounter--;
		    assert(GivingUpCounter >= 0);
		    GivingUpLock.release();
		    
		    CERR_PRE;
		    cerr << name() << " check for something\n";
		    cerr << name() << " i have " << CurrentEventsCounter[iYam];
		    cerr << " events\n" ;
		    
		    cerr << name() << " out of " ;
		    cerr << GlobalCurrentEventsCounter.value() << "\n";
		    CERR_POST;
		}
	    }
	}
	

        CERR_PRE;
	cerr << name() << " switch to ";
	cerr << currentThread->name() << "\n";
	CERR_POST;

	systemTransfer( currentThread );
	
	assert(raisedBy != 0);

	raisedBy -> handleException();
	raisedBy = 0;
    }
}

ThreadContainer*
MultiCpuMux::allocateThreadContainer()
{
    assert(0);
    abort();
}
