#ifndef LINT
static char SCCSid[] = "@(#) ./solvers/osm.c 07/23/93";
#endif

/*
   A simple overlapping Schwarz preconditioner for the SV codes

   By default, the matrix is divided into OSM_FRAGS pieces, using
   consequtive blocks.  SpSubsetSorted is used to generate local copies
   (we could use SpSubsetInPlace for this, but see below).

*/

#define OSM_FRAGS -1
#define OSM_OVERLAP 1

#include "tools.h"
#include "solvers/svctx.h"
#include "solvers/svpriv.h"
#include "inline/spops.h" 
#include "inline/copy.h"  
#include "inline/setval.h"
#include <math.h>

void SViOSMComputeIntersections(), SViGlobalSolveOSM();

/*ARGSUSED*/
void SViCreateOSM(ctx,mat)
SVctx *ctx;
SpMat *mat;
{
SVOSMctx *lctx;

lctx         = NEW(SVOSMctx);   CHKPTR(lctx);
lctx->nd     = OSM_FRAGS;
lctx->overlap= OSM_OVERLAP;
lctx->bb     = 0;
lctx->defMeth= SVLU;
lctx->defIt  = ITGMRES;
lctx->itmax  = -1;
lctx->is_mult= 1;
lctx->use_intersections = 0;

lctx->global      = 0;
lctx->gctx        = 0;
lctx->Interpolate = SViDefaultInterpolateOSM;
lctx->Restrict    = SViDefaultRestrictOSM;
lctx->interpCtx   = 0;
lctx->restrictCtx = 0;
lctx->globalMonitor = 0;
lctx->localMonitor  = 0;
lctx->gmctx       = 0;
lctx->lmctx       = 0;
lctx->cb          = 0;
lctx->cx          = 0;
lctx->w1          = 0;
lctx->w2          = 0;
lctx->worksize    = 0;
ctx->method       = ITGMRES;
ctx->private      = (void *) lctx;
ctx->is_iter      = 1;

ctx->setup        = SViSetupOSM;
ctx->solve        = SViSolveOSM;
ctx->destroy      = SViDestroyOSM;
}

void SViSetupOSM(ctx)
SVctx *ctx;
{
SVOSMctx   *lctx = (SVOSMctx *) ctx->private;
int        i, j, ioff, *idx, nv, dnv, nlv, ncols, l, maxcols, *cols, *lcols;
SVOSMBlock *BB;
double      t1;

t1 = SYGetCPUTime();

ctx->itctx = ITCreate( ctx->method );   CHKERR(1);
DVSetDefaultFunctions( ctx->itctx->vc); CHKERR(1);
ctx->itctx->amult = SViMult;
ctx->itctx->tamult= SViMultTrans;
ctx->itctx->binv  = SViApplyMOSM;
/* put dummy values in vec_sol and vec_rhs */
ctx->itctx->vec_sol = (void *) 1;
ctx->itctx->vec_rhs = (void *) 1;
ctx->itctx->usr_monitor = 0;

if (!lctx->bb) 
    SViAllocOSMDomains( ctx );

/* Compute a default partition of the matrix */
ioff = 0;
dnv  = (ctx->mat->rows + lctx->nd - 1) / lctx->nd;
nlv  = 0;
maxcols = ctx->mat->rows;

/* Work area for computing support of a domain */
cols = (int *)MALLOC( maxcols * sizeof(int) );     CHKPTR(cols);

/* For each block, generate any data that is not provided. */
for (i=0; i<lctx->nd; i++) {
    BB = lctx->bb + i;
    nv = dnv;
    if (ioff + nv > ctx->mat->rows) 
	nv = ctx->mat->rows - ioff;
    if (!BB->idx) {
	/* The idx values should be unique BETWEEN the domains.
	   However, we don't test for this */
	idx = BB->idx   = (int *)MALLOC( nv * sizeof(int) );  
	BB->nv          = nv;
	CHKPTR(idx);
	for (j=0; j<nv; j++) idx[j] = j + ioff;
	}
    if (!BB->ovidx) {
	ncols = BB->nv;
	idx   = BB->idx;
	ICOPY(cols,idx,ncols);
	for (l=0; l<lctx->overlap; l++) {
	    SpSupport2( ctx->mat, ncols, cols, maxcols, &ncols, cols );
	    CHKERR(1);
	    if (ncols == maxcols) break;
	    }
	BB->ovidx = (int *)MALLOC( ncols * sizeof(int) );
	CHKPTR(BB->ovidx);
	BB->ovnv  = ncols;
	ICOPY(BB->ovidx,cols,ncols);
	}
    /* See if ovidx is contiguous */
    BB->ovIsContig = ISiIsContig( BB->ovnv, BB->ovidx );
    if (!BB->block) {
	ncols  = BB->ovnv;
	lcols  = BB->ovidx;
	BB->block = 
	             SpSubsetSorted( ctx->mat, ncols, ncols, lcols, lcols );
	CHKPTR(BB->block);
	}
    if (lctx->is_mult && !lctx->use_intersections && !BB->submat) {
	ncols  = BB->ovnv;
	lcols  = BB->ovidx;
 	BB->submat= SpRowSubMatrixInPlace( ctx->mat, lcols, ncols ); 
	CHKPTR(BB->submat);
	}
    if (!BB->svc) {
	BB->svc   = SVCreate( BB->block, BB->lmeth );
	CHKPTR(BB->svc);
	SVSetAccelerator( BB->svc, lctx->defIt );
	}

    ioff += nv;
    SVSetUp( BB->svc );
    if (lctx->itmax > 0) {
	SVSetIts( BB->svc, lctx->itmax );
	/* Set the relative tolerance small enough to force itmax steps */
	SVSetRelativeTol( BB->svc, 1.0e-15 );
	}
    ctx->flops             += BB->svc->flops;
    BB->svc->flops = 0;
    if (BB->block->rows > nlv) 
	nlv = BB->block->rows;
    }
FREE( cols );

lctx->worksize = nlv;

if (lctx->is_mult && lctx->use_intersections) {
    SViOSMComputeIntersections( ctx, lctx );
    for (i=0; i<lctx->nd; i++) {
	BB = lctx->bb + i;
	if (!BB->submat) {
	    ncols  = BB->oinv;
	    lcols  = BB->oiidx;
	    if (ncols > 0) {
		BB->submat= SpRowSubMatrixInPlace( ctx->mat, lcols, ncols );
		CHKPTR(BB->submat);
		}
	    }
	}
    }

/* Allocate coarse-grid problem */
if (lctx->global) {
    lctx->cb = (double *)MALLOC( 2 * lctx->global->rows * sizeof(double) );
    CHKPTR(lctx->cb);
    lctx->cx = lctx->cb + lctx->global->rows;
    if (!lctx->gctx) {
	lctx->gctx = SVCreate( lctx->global, lctx->defMeth );
	CHKPTR( lctx->gctx );
	}
    SVSetUp( lctx->gctx );
    }

lctx->w1 = (double *)MALLOC( nlv * 2 * sizeof(double) );    CHKPTR(lctx->w1) ;
lctx->w2 = lctx->w1 + nlv;

ctx->nzorig = SpNz(ctx->mat);

ctx->setupcalled = 1;
ctx->t_setup += SYGetCPUTime() - t1;
}

int SViSolveOSM( ctx, b, x )
SVctx  *ctx;
double *b, *x;
{
SVOSMctx   *lctx = (SVOSMctx *) ctx->private;
SVOSMBlock *BB;
int        its;
int        i, nd;
double     t1;

if (!ctx->setupcalled) {(*ctx->setup)( ctx );  CHKERRV(1,-1);}
if (!ctx->solvecalled) {
    t1   = SYGetCPUTime();
    ITSetUp(ctx->itctx,(void *) ctx); CHKERRV(1,-1);
    ctx->t_setup += SYGetCPUTime() - t1;
    }

t1  = SYGetCPUTime();
ctx->solvecalled = 1;

SViManageInitialGuess( ctx, x );
ctx->itctx->vec_rhs = (void *)b;
ctx->itctx->vec_sol = (void *)x;
if (lctx->is_mult)
    ctx->itctx->binv  = SViApplyMOSM;
else
    ctx->itctx->binv  = SViApplyAOSM;
its = ITSolve( ctx->itctx, (void *)ctx );
SVGetITFlops(ctx,2*ctx->nzorig,0);
/* Get the flops from the subsidiary solvers */
nd = lctx->nd;
for (i=0; i<nd; i++) {
    BB             = lctx->bb + i;
    ctx->flops     += BB->svc->flops + BB->nv;
    BB->svc->flops = 0;
    }
/* Global problem as well */
if (lctx->gctx) {
    ctx->flops += lctx->gctx->flops;
    lctx->gctx->flops = 0;
    }
ctx->its     = its;
ctx->t_solve += SYGetCPUTime() - t1;
return its;
}

void SViDestroyOSM( ctx )
SVctx *ctx;
{
SVOSMctx   *lctx = (SVOSMctx *) ctx->private;
int        i, nd;
SVOSMBlock *BB;

nd = lctx->nd;

for (i=0; i<nd; i++) {
    BB = lctx->bb + i;
    SVDestroy( BB->svc );
    SpDestroy( BB->block );
    if (BB->submat) 
	SpDestroy( BB->submat );
    if (BB->idx) 
	FREE( BB->idx );
    FREE( BB->ovidx );
    if (BB->oiidx) {
	FREE( BB->oiidx );
	FREE( BB->oividx );
	}
    if (BB->cidx)
	FREE( BB->cidx );
    }
FREE( lctx->bb );
FREE( lctx->w1 );
if (lctx->cb) 
    FREE( lctx->cb );
if (lctx->gctx) 
    SVDestroy( lctx->gctx );
FREE( lctx );
VEDestroy( ctx->itctx->vc );
ITDestroy( ctx->itctx, ctx );
FREE( ctx );
}

/* Additive version */
void SViApplyAOSM( ctx, x, y )
SVctx  *ctx;
double *x, *y;
{
SVOSMctx   *lctx = (SVOSMctx *) ctx->private;
int        i, nd, *idx, nv;
double     *bb, *xx;
SVOSMBlock *BB;

ctx->nbinv++;

bb = lctx->w1;
xx = lctx->w2;
nd = lctx->nd;
if (!lctx->global) {
    nv = ctx->mat->rows;
    SET(y,nv,0.0);
    }
else {
    SViGlobalSolveOSM( ctx, lctx, x, y );
    }
for (i=0; i<nd; i++) {
    BB  = lctx->bb + i;
    idx = BB->ovidx;
    nv  = BB->ovnv;
    if (BB->ovIsContig) {
	SVSolve( BB->svc, x + idx[0], xx );
	/* Eventually replace this with SVSolveAdd to y + idx[0] */
	SCATTERADD(xx,idx,y,nv);
	}
    else {
	GATHER(bb,idx,x,nv);
	SVSolve( BB->svc, bb, xx );
	SCATTERADD(xx,idx,y,nv);
	}
    }
}

/* Compute local residual. 
   i - index of domain
   y - current solution (global vector)
   x - current rhs      (global vector)
   bb - local rhs
   xx - local temp
 */
void SViOSMComputeLocalResidual( lctx, BB, i, y, x, bb, xx )
SVOSMctx   *lctx;
SVOSMBlock *BB;
int        i;
double     *y, *x, *bb, *xx;
{
int        nv, l, *idx, *lidx;
/* Note that IF there is no global problem AND this domains does
   not overlap with any domains that have already been updated THEN
   we can replace these steps with bb <- x[idx[l]] 

   In addition, note that the residual consists of two pieces: the 
   piece where the matrix is 0 (just x) and the piece we the matrix
   is non-zero.
 */
if (!lctx->use_intersections) {
    SpMult( BB->submat, y, xx );
    idx = BB->ovidx;
    nv  = BB->ovnv;
    if (BB->ovIsContig) {
	register int k = idx[0];
	for (l=0; l<nv; l++) bb[l] = x[k+l] - xx[l];
	}
    else {
	for (l=0; l<nv; l++) bb[l] = x[idx[l]] - xx[l];
	}
    }
else{
    idx = BB->ovidx;
    nv  = BB->ovnv;
    if (BB->ovIsContig) {
	register int k = idx[0];
	for (l=0; l<nv; l++) bb[l] = x[k+l];
	}
    else {
	for (l=0; l<nv; l++) bb[l] = x[idx[l]];
	}
    if (BB->submat) {
	/*
	printf( "[%d] Size of submat is %d; size of domain is %d\n", 
	        i, BB->submat->rows, BB->ovnv );
	 */
	SpMult( BB->submat, y, xx );
	nv  = BB->oinv;
	/* We also need to know the local indices in bb (compare idx[l] to 
	   BB->ovidx[l]).  This should really look like
	   bb[index of overlap in the subdomain[l]] -= xx[l]
	   */
	lidx = BB->oividx;
	for (l=0; l<nv; l++) bb[lidx[l]] -= xx[l];
	}
    /*
    else
	printf( "[%d] overlap is null; size of domain is %d\n", i, BB->ovnv );
     */
    }
}

/* Multiplicative version */
void SViApplyMOSM( ctx, x, y )
SVctx           *ctx;
register double *x, *y;
{
SVOSMctx        *lctx = (SVOSMctx *) ctx->private;
int             l, i, nd, nv;
register int    *idx;
register double *bb, *xx;
SVOSMBlock      *BB;

ctx->nbinv++;

bb = lctx->w1;
xx = lctx->w2;
nd = lctx->nd;
/* Use the global problem to set y here, if possible  */
nv = ctx->mat->rows;
if (!lctx->global) {
    SET(y,nv,0.0);
    }
else {
    SViGlobalSolveOSM( ctx, lctx, x, y );
    }
for (i=0; i<nd; i++) {
    BB  = lctx->bb + i;

    /* Compute local residual.  */
    SViOSMComputeLocalResidual( lctx, BB, i, y, x, bb, xx );
    if (lctx->localMonitor)
        (*lctx->localMonitor)( ctx, BB->ovnv, bb, xx, i, lctx->lmctx, 0 );

    /* Solve and add in the correction */
    /* If ovIsContig, use SVSolveAdd... (once it is available) */
    SVSolve( BB->svc, bb, xx );
    if (lctx->localMonitor)
        (*lctx->localMonitor)( ctx, BB->ovnv, bb, xx, i, lctx->lmctx, 1 );
    idx = BB->ovidx;
    nv  = BB->ovnv;
    SCATTERADD(xx,idx,y,nv);
    if (lctx->localMonitor)
        (*lctx->localMonitor)( ctx, BB->ovnv, bb, xx, i, lctx->lmctx, 2 );
    }
}

/* Symmetric version of MOSM */
void SViApplyMOSMSym( ctx, x, y )
SVctx           *ctx;
register double *x, *y;
{
SVOSMctx        *lctx = (SVOSMctx *) ctx->private;
int             l, i, nd, nv;
register int    *idx;
register double *bb, *xx;
SVOSMBlock      *BB;

ctx->nbinv++;

bb = lctx->w1;
xx = lctx->w2;
nd = lctx->nd;
/* Use the global problem to set y here, if possible  */
nv = ctx->mat->rows;
if (!lctx->global) {
    SET(y,nv,0.0);
    }
else {
    SViGlobalSolveOSM( ctx, lctx, x, y );
    }
for (i=0; i<nd; i++) {
    BB  = lctx->bb + i;

    /* Compute local residual.  */
    SViOSMComputeLocalResidual( lctx, BB, i, y, x, bb, xx );
    if (lctx->localMonitor)
        (*lctx->localMonitor)( ctx, BB->ovnv, bb, xx, i, lctx->lmctx, 0 );

    /* Solve and add in the correction */
    /* If ovIsContig, use SVSolveAdd... (once it is available) */
    SVSolve( BB->svc, bb, xx );
    if (lctx->localMonitor)
        (*lctx->localMonitor)( ctx, BB->ovnv, bb, xx, i, lctx->lmctx, 1 );
    idx = BB->ovidx;
    nv  = BB->ovnv;
    SCATTERADD(xx,idx,y,nv);
    if (lctx->localMonitor)
        (*lctx->localMonitor)( ctx, BB->ovnv, bb, xx, i, lctx->lmctx, 2 );
    }
for (i=nd-1; i>=0; i--) {
    BB  = lctx->bb + i;

    /* Compute local residual. Note that if "use_overlap" is true, we need
       another computation of the residual over the entire vector, AND we
       need a different set of overlaps */
    SViOSMComputeLocalResidual( lctx, BB, i, y, x, bb, xx );
    if (lctx->localMonitor)
        (*lctx->localMonitor)( ctx, BB->ovnv, bb, xx, i, lctx->lmctx, 0 );

    /* Solve and add in the correction */
    /* If ovIsContig, use SVSolveAdd... (once it is available) */
    SVSolve( BB->svc, bb, xx );
    if (lctx->localMonitor)
        (*lctx->localMonitor)( ctx, BB->ovnv, bb, xx, i, lctx->lmctx, 1 );
    idx = BB->ovidx;
    nv  = BB->ovnv;
    SCATTERADD(xx,idx,y,nv);
    if (lctx->localMonitor)
        (*lctx->localMonitor)( ctx, BB->ovnv, bb, xx, i, lctx->lmctx, 2 );
    }
}

/* 
   We should also include the multiplicative - additive hybrid:
   Do the global problem multiplicatively, the local problems all
   additively.
 */
void SViApplyMAOSM( ctx, x, y )
SVctx           *ctx;
register double *x, *y;
{
SVOSMctx        *lctx = (SVOSMctx *) ctx->private;
int             l, i, nd, nv;
register int    *idx;
register double *bb, *xx;
SVOSMBlock      *BB;

ctx->nbinv++;

bb = lctx->w1;
xx = lctx->w2;
nd = lctx->nd;
/* Use the global problem to set y here, if possible  */
nv = ctx->mat->rows;
if (!lctx->global) {
    SET(y,nv,0.0);
    }
else {
    SViGlobalSolveOSM( ctx, lctx, x, y );
    }
/* NOT CHANGED YET */
for (i=0; i<nd; i++) {
    BB  = lctx->bb + i;

    /* Compute local residual.  */
    SViOSMComputeLocalResidual( lctx, BB, i, y, x, bb, xx );
    if (lctx->localMonitor)
        (*lctx->localMonitor)( ctx, BB->ovnv, bb, xx, i, lctx->lmctx, 0 );

    /* Solve and add in the correction */
    /* If ovIsContig, use SVSolveAdd... (once it is available) */
    SVSolve( BB->svc, bb, xx );
    if (lctx->localMonitor)
        (*lctx->localMonitor)( ctx, BB->ovnv, bb, xx, i, lctx->lmctx, 1 );
    idx = BB->ovidx;
    nv  = BB->ovnv;
    SCATTERADD(xx,idx,y,nv);
    if (lctx->localMonitor)
        (*lctx->localMonitor)( ctx, BB->ovnv, bb, xx, i, lctx->lmctx, 2 );
    }
}

/*
    Allocate the domains 
 */
void SViAllocOSMDomains( ctx )
SVctx *ctx;
{
SVOSMctx   *lctx = (SVOSMctx *) ctx->private;
int        i, nd;
SVOSMBlock *BB;

if (!lctx->bb) {
    nd           = lctx->nd;
    if (nd < 0) {
	nd = (int) sqrt( (double)(ctx->mat->rows + 0.5) );
	if (nd < 1) nd = 1;
	lctx->nd = nd;
	}
    lctx->bb     = (SVOSMBlock *)MALLOC( nd * sizeof(SVOSMBlock) ); 
    CHKERR(1);
    for (i=0; i<nd; i++) {
	BB             = lctx->bb + i;
	BB->block      = 0;
	BB->submat     = 0;
	BB->nv         = 0;
	BB->idx        = 0;
	BB->ovidx      = 0;
	BB->oinv       = 0;
	BB->oiidx      = 0;
	BB->oividx     = 0;
	BB->ovIsContig = 0;
	BB->svc        = 0;
	BB->cnv        = 0;
	BB->cidx       = 0;
	BB->lmeth      = lctx->defMeth;
	}
    }
}

/*
   Set the indices (idx) for domain i.  There are nv elements in idx.
   A COPY is made
 */
void SViSetOSMDecomp( ctx, i, idx, nv )
SVctx *ctx;
int   i, *idx, nv;
{
SVOSMctx *lctx = (SVOSMctx *) ctx->private;
int      *lidx;

if (ctx->type != SVOSM) return;

if (!lctx->bb) {
    SViAllocOSMDomains( ctx );
    CHKERR(1);
    }

if (i >= lctx->nd) { 
    SETERRC(1,"Attempt to set indices for out-of-range domain"); return; }

lidx = lctx->bb[i].idx = (int *)MALLOC( nv * sizeof(int) );   CHKPTR(lidx);
lctx->bb[i].nv = nv;
ICOPY(lidx,idx,nv);
}

/*
   Set the indices (idx) for domain i.  There are nv elements in idx.
   A COPY is made
 */
void SViSetOSMOverlapDecomp( ctx, i, idx, nv )
SVctx *ctx;
int   i, *idx, nv;
{
SVOSMctx *lctx = (SVOSMctx *) ctx->private;
int      *lidx;

if (ctx->type != SVOSM) return;

if (!lctx->bb) {
    SViAllocOSMDomains( ctx );
    CHKERR(1);
    }

if (i >= lctx->nd) { 
    SETERRC(1,"Attempt to set indices for out-of-range domain"); return; }

lidx = lctx->bb[i].ovidx = (int *)MALLOC( nv * sizeof(int) );   CHKPTR(lidx);
lctx->bb[i].ovnv = nv;
ICOPY(lidx,idx,nv);
}

/*
   Set the Method for domain i. 
 */
void SViSetOSMMethodDecomp( ctx, i, v )
SVctx    *ctx;
int      i;
SVMETHOD v;
{
SVOSMctx *lctx = (SVOSMctx *) ctx->private;

if (ctx->type != SVOSM) return;

if (!lctx->bb) {
    SViAllocOSMDomains( ctx );     CHKERR(1);
    }

if (i >= lctx->nd) { 
    SETERRC(1,"Attempt to set method for out-of-range domain"); return; }

lctx->bb[i].lmeth = v;
}

/* Solver context for a particular subdomain (with optional matrix) */
#ifdef FOO
void SViSetOSMSolverDecomp( ctx, i, v, mat )
SVctx    *ctx;
int      i;
ITMETHOD v;
SpMat    *mat;
{
SVOSMctx *lctx = (SVOSMctx *) ctx->private;

if (ctx->type != SVOSM) return;

if (!lctx->bb) {
    SViAllocOSMDomains( ctx );    CHKERR(1);
    }

if (i >= lctx->nd) { 
    SETERR(1,"Attempt to set solver for out-of-range domain"); return; }

/* lctx->bb[i].svc = SVCreate( matrix, v ); */
}
#endif


/*
   Default interpolation/restriction routines

   So far, only piecewise-constant is provided.  This is defined as ?

   (Problems: double counting of points in multiple subdomains.  How
   do we define a "subdomain"?)
   
 */
void SViDefaultRestrictOSM( ctx, lctx, x, cx, rctx )
SVctx    *ctx;
SVOSMctx *lctx;
double   *x, *cx;
void     *rctx;
{
int        i, l, nv, *idx;
double     sum;
SVOSMBlock *BB;

for (l=0; l<lctx->global->rows; l++) cx[l] = 0.0;
for (i=0; i<lctx->nd; i++) {
    BB  = lctx->bb + i;
    sum = 0.0;
    idx = BB->idx;
    nv  = BB->nv;
    sum = 0.0;
    for (l=0; l<nv; l++) sum += x[idx[l]];
    sum /= nv;
    for (l=0; l<BB->cnv; l++) cx[BB->cidx[l]] += sum;
    }
}

void SViDefaultInterpolateOSM( ctx, lctx, cx, x, ictx )
SVctx    *ctx;
SVOSMctx *lctx;
double   *cx, *x;
void     *ictx;
{
int        i, l, k, nv, *idx;
double     val;
SVOSMBlock *BB;

for (i=0; i<lctx->nd; i++) {
    BB  = lctx->bb + i;
    idx = BB->idx;
    nv  = BB->nv;
    val = 0.0;
    for (l=0; l<BB->cnv; l++) 
	val += cx[BB->cidx[l]];
    /* Assumes non-overlapping initial domains */
    val /= BB->cnv;
    for (k=0; k<nv; l++) x[idx[k]] = val;
    }
}

/* Global solver */
void SViGlobalSolveOSM( ctx, lctx, b, x )
SVctx    *ctx;
SVOSMctx *lctx;
double   *b, *x;
{
double   *cb, *cx;

cb = lctx->cb;
cx = lctx->cx;

/* Restrict to the coarse-grid */
(*lctx->Restrict)( ctx, lctx, b, cb, lctx->restrictCtx );

if (lctx->globalMonitor)
    (*lctx->globalMonitor)( ctx, ctx->size, lctx->global->rows, b, cb, 
			    lctx->gmctx, 0 );

SVSolve( lctx->gctx, cb, cx );
if (lctx->globalMonitor)
    (*lctx->globalMonitor)( ctx, lctx->global->rows, lctx->global->rows, 
			    cb, cx, lctx->gmctx, 1 );

/* Interpolate the solution */
(*lctx->Interpolate)( ctx, lctx, cx, x, lctx->interpCtx );
if (lctx->globalMonitor)
    (*lctx->globalMonitor)( ctx, lctx->global->rows, ctx->size, cx, x, 
			    lctx->gmctx, 2 );
}

/*
   Set the indices (idx) for domain i for the global mesh points.
   A COPY is made.
 */
void SViSetOSMCoarseDecomp( ctx, i, idx, nv )
SVctx *ctx;
int   i, *idx, nv;
{
SVOSMctx *lctx = (SVOSMctx *) ctx->private;
int      *lidx;

if (ctx->type != SVOSM) return;

if (!lctx->bb) {
    SViAllocOSMDomains( ctx );
    CHKERR(1);
    }

if (i >= lctx->nd) { 
    SETERRC(1,"Attempt to set global indices for out-of-range domain"); 
    return; }

lidx = lctx->bb[i].cidx = (int *)MALLOC( nv * sizeof(int) );   CHKPTR(lidx);
lctx->bb[i].cnv = nv;
COPY(lidx,idx,nv);
}

/*-----------------------------------------------------------------------*/
/* These routines are common between bdd and osm (they are used for 
   numbering domains)
 */
/* This routine takes in ndtrial, n1, n2, and n3, and returns the
   actual number of domains, the number in x (Nx1), and 
   number of points in each domain (all have the same size except the last)
   
   Note that this is NOT the optimal choice; the sizes of the domains
   can be set so that the maximum difference between their sizes is 
   1 in each direction.  To deal with this, we'll eventually CHANGE
   this routine.  BE WARNED!
 */
int SViGet2dDomain( size, ndtrial, n1, n2, nc, Nxi, Nyi, Nx1 )
int size, ndtrial, n1, n2, nc;
int *Nxi, *Nyi, *Nx1;
{
int nx1, nx2, nxi, nyi, nd, ny1;

/* Check for valid input */
if (n1 * n2 * nc != size) {
    SETERRC( 1, "Specified size for 2d domain does not match problem size" );
    return -1;
    }
nd = ndtrial;

/* divide the domain into at most nd pieces */
nx1      = sqrt( (double)(nd) ) + 0.5;
nd       = nx1 * nx1;
while (nd > ndtrial && nx1 > 0) {
    nx1--;
    nd = nx1 * nx1;
    }
nxi      = (n1 + nx1 - 1) / nx1;
nyi      = (n2 + nx1 - 1) / nx1;
if (nx1 == 1) { nxi = n1 - 1; nyi = n2 - 1; }

*Nxi = nxi;
*Nyi = nyi;
*Nx1 = nx1;
return nd;
}

/* 
   Number the points using the formula
   idx[] = k + nc * (i + n1 * j)
   for i in [si,ei], j in [sj,ej], and k in [0,nc-1].
   It returns the number of elements ((ei-si+1)*(ej-sj+1))
 */
int SViNumber2dDomain( sj, ej, si, ei, nc, n1, idx )
int sj, ej, si, ei, nc, n1, *idx;
{
int i, j, k, cnt;
int toff, ncn1 = nc * n1, ncn1j = sj * ncn1;

cnt = 0;
for (j=sj; j<=ej; j++) {
    for (i=si; i<=ei; i++) {
	toff = nc * i + ncn1j;
	for (k=0; k<nc; k++) {
	    idx[cnt++] = toff++;
	    }
	}
    ncn1j += ncn1;
    }
return cnt;
}

/*-----------------------------------------------------------------------*/


/*@
    SVSetOSMRegularDomains2d - Set the domains for a n1 x n2 regular mesh

    Input parameters:
.   ctx   - solver context
.   n1,n2 - mesh is n1 x n2
.   nc    - there are nc components per mesh point.  Components are numbered
            first

    Notes:
    This routine is provided as (1) an example of a routine to set 
    user-defined domains and (2) a service routine for a relatively
    common case.

    This uses a square decomposition.  It may change the number of subdomains
    if that number is not a square.

    It also defines the coarse-grid mapping, under the assumption that 
    the boundaries are included in the coarse-grid problem.
@*/
void SVSetOSMRegularDomains2d( ctx, n1, n2, nc )
SVctx *ctx;
int   n1, n2, nc;
{
SVOSMctx *lctx = (SVOSMctx *) ctx->private;
int      i, j, k, id, nd, nx1, *idx, *cidx;
int      nxi, nyi;         /* Number of points along each side of a domain */
int      sx, ex, sy, ey, ii, jj, cnt;
 
if (ctx->type != SVOSM) return;

nd = lctx->nd = SViGet2dDomain( ctx->size, lctx->nd, n1, n2, nc, 
			        &nxi, &nyi, &nx1 );
CHKERR(1);

/* Allocate storage to hold the local indices */
idx = (int *)MALLOC( (nxi + 2) * (nyi + 2) * nc * sizeof(int) ); CHKPTR(idx);
cidx= (int *)MALLOC( 4 * nc * sizeof(int) );                     CHKPTR(cidx); 
/* Set the indices.  Note that we make the domains overlap along their
   common borders */
sy = 0;
ey = nyi;
id = 0;
for (jj = 0; jj < nx1; jj++) {
    sx = 0; 
    ex = nxi;
    for (ii = 0; ii < nx1; ii++) {
	cnt = SViNumber2dDomain( sy, ey, sx, ex, nc, n1, idx );
	SViSetOSMDecomp( ctx, id, idx, cnt );

	/* Coarse-grid indices */
	for (k=0; k<nc; k++) {
	    cidx[k]      = k + (ii +           jj * (nx1 + 1)) * nc;
	    cidx[k+nc]   = k + (ii + 1 +       jj * (nx1 + 1)) * nc;
	    cidx[k+2*nc] = k + (ii +     (jj + 1) * (nx1 + 1)) * nc;
	    cidx[k+3*nc] = k + (ii + 1 + (jj + 1) * (nx1 + 1)) * nc;
	    }
	SViSetOSMCoarseDecomp( ctx, id, cidx, 4*nc );

	id++;
	sx = ex;
	ex += nxi;
	if (ex >= n1 || ii == nx1 - 2) ex = n1 - 1;
	}
    sy = ey;
    ey += nyi;
    if (ey >= n2 || jj == nx1 - 2) ey = n2 - 1;
    }
FREE( idx );
FREE( cidx );
}

/*@
    SVSetOSMRegularOverlap2d - Set the overlap indices for a n1 x n2 regular 
                               mesh

    Input parameters:
.   ctx   - solver context
.   n1,n2 - mesh is n1 x n2
.   nc    - there are nc components per mesh point.  Components are numbered
            first
.   w1,w2 - width in directions 1 and 2

    Notes:
    This routine is provided as (1) an example of a routine to set 
    user-defined domains and (2) a service routine for a relatively
    common case.

    This uses a square decomposition.  It may change the number of subdomains
    if that number is not a square.
@*/
void SVSetOSMRegularOverlap2d( ctx, n1, n2, nc, w1, w2 )
SVctx *ctx;
int   n1, n2, nc, w1, w2;
{
SVOSMctx *lctx = (SVOSMctx *) ctx->private;
int      i, j, k, id, nd, nx1, *idx;
int      nxi, nyi;         /* Number of points along each side of a domain */
int      sx, ex, sy, ey, ii, jj, cnt;
int      ssx, eex, ssy, eey;
 
if (ctx->type != SVOSM) return;
nd = lctx->nd = SViGet2dDomain( ctx->size, lctx->nd, n1, n2, nc, 
			        &nxi, &nyi, &nx1 );
CHKERR(1);

/* Allocate storage to hold the local indices */
idx = (int *)MALLOC( (nxi + 2 + 2*w1) * (nyi + 2 + 2*w2) * nc * sizeof(int) );
CHKPTR(idx);

/* Set the indices.  Note that we make the domains overlap along their
   common borders */
sy = 0;
ey = nyi;
id = 0;
for (jj = 0; jj < nx1; jj++) {
    sx = 0; 
    ex = nxi;
    ssy = sy - w2; 
    if (ssy < 0) ssy = 0;
    eey = ey + w2;
    if (eey >= n2) eey = n2 - 1;
    for (ii = 0; ii < nx1; ii++) {
	ssx = sx - w1;
	if (ssx < 0) ssx = 0;
	eex = ex + w1;
	if (eex >= n1) eex = n1 - 1;

	cnt = SViNumber2dDomain( ssy, eey, ssx, eex, nc, n1, idx );
	SViSetOSMOverlapDecomp( ctx, id, idx, cnt );

	id++;
	sx = ex;
	ex += nxi;
	if (ex >= n1 || ii == nx1 - 2) ex = n1 - 1;
	}
    sy = ey;
    ey += nyi;
    if (ey >= n2 || jj == nx1 - 2) ey = n2 - 1;
    }
FREE( idx );
}

/* @

   Notes:
   This routine establishes a simple SSOR sweep for the "local" solvers
   We do this by simply picking SSOR, setting maxits to 3, and setting
   up the iterative method

 @ */

/*
   To do this, we need to either cache this info OR separate out the
   setup code so that we can generate the domains and the submatrices
   separately
 */


/* --------------------------------------------------------------------- */

/* 
    For best performance of the multiplicative method, we really need to
    color the domains so that the independent colors can be done
    together.  An important optimization for the no-global-problem
    case is the 0th color: the initial residual is just the RHS, 
    saving a matrix-vector product and a sparse axpy.

    A compilcation is that in computing the intersections, we need to
    compute the support of the final set, since the intersection gives
    the COLUMNS that are active, but we need the ROWS.
 */
#include "set/iset.h"

void SViOSMComputeIntersections( ctx, lctx )
SVctx    *ctx;
SVOSMctx *lctx;
{
int        i, j, nd, cnt, cur;
SVOSMBlock *bb;
ISet       *iv, *jv, *iiv, *ov, *nov, *tmp;

nd  = lctx->nd;
bb  = lctx->bb;
iiv = ISAlloc( 2*lctx->worksize );             CHKPTR(iiv);
ov  = ISAlloc( 2*lctx->worksize );             CHKPTR(ov);
nov = ISAlloc( 2*lctx->worksize );             CHKPTR(nov);
iv  = ISAlloc( 0 );                          CHKPTR(iv);
jv  = ISAlloc( 0 );                          CHKPTR(jv);
for (i=0; i<nd; i++) {
    ISSetFromData( iv, bb[i].ovnv, bb[i].ovidx );
    ov->n  = 0;
    nov->n = 0;
    for (j=0; j<i; j++) {
	ISSetFromData( jv, bb[j].ovnv, bb[j].ovidx );
	iiv->n = 0;
	ISIntersection( iv, jv, iiv );
	if (iiv->n > 0) {
	    /* Also add j to the graph of overlapping domains for i ?? */
	    ISUnion( ov, iiv, nov );
	    tmp = ov;	    ov  = nov;	    nov = tmp;
	    }
	}
    bb[i].oinv = ov->n;
    if (ov->n > 0) {
	/* Compute the support of those columns */
	SpSupport2( ctx->mat, ov->n, ov->idx, 2*lctx->worksize, 
		   &nov->n, nov->idx );
	/* Now we have to intersect this with ovidx */
	ISIntersection( nov, iv, ov );

	bb[i].oinv  = ov->n;
	bb[i].oiidx = (int *)MALLOC( ov->n * sizeof(int) );    
	CHKPTR(bb[i].oiidx);
	ICOPY( bb[i].oiidx, ov->idx, ov->n );

	bb[i].oividx = (int *)MALLOC( ov->n * sizeof(int) );
	CHKPTR(bb[i].oividx );

	/* Find the mapping to ovidx */
	cnt = 0;
	cur = bb[i].oiidx[cnt];
	for (j=0; j<bb[i].ovnv; j++) {
	    if (cur == bb[i].ovidx[j]) {
		bb[i].oividx[cnt++] = j;
		if (cnt == ov->n) break;
		cur = bb[i].oiidx[cnt];
		}
	    }
	if (cnt != bb[i].oinv) {
	    SETERRC( 1, 
		    "Failed to compute mapping from overlap to local domain" );
	    return;
	    }
	}
    }

iv->idx = 0;
jv->idx = 0;
ISDestroy( iv );
ISDestroy( jv );
ISDestroy( iiv );
ISDestroy( ov );
ISDestroy( nov );
}
