#ifndef LINT
static char SCCSid[] = "@(#) ./sparse/diag/diag.c 07/23/93";
#endif

/*
   This file contains routines for representing and manipulating matrices
   in a sparse-diagonal format.  This format is defined by the SparseDiagonal
   structure; a matrix consists of a group of these.  

   A sparse diagonal is a collection of diagonals that ALL start on a given
   row.  In order to provide the most flexibility (needed, for example,
   for matrices arising from discretizations of multicomponent problems
   on regular grids), the rows corresponding to the entries along the
   diagonal may increment by a constant other than one.

   In order to accomodate more general ways of setting the diagonals, 
   an extra layer of indirection is/will be provided in the "diag" field.

   We need a number of additional options:
   One is to use diagonals that are not interleaved.  This permits
   the use of the matrix-mult routines with in-place data to do
   triangular solves (such as in SOR).

   Finally, we need to change this to support the standard matrix data
   structure, with the diagonal structure being the "data".
 */

#include "tools.h"
#include "sparse/spmat.h"
#include "sparse/sppriv.h"
#include "sparse/diag/diag.h"
#include "inline/vmult.h"
#include "inline/axpy.h"

extern void SpDMult();
void   SpDDestroy();
SpMat  *SpDCreate();
static SpOps _SpDiagOps = { SpDMult, 0, 0, 
			    0, 0, 
			    0, 0,
			    SpDDestroy, 
			   (void *(*)())SpDCreate, 
			   0, 
			   0, 0, 0, 
			   0, 0, 
			   0, 0, 0, 0,
			   0, 
			   0, 0, 0 };

/*@
   SpDMult - Form the matrix-vector product vout = mat * vin.

   Description:
   In order to achieve efficiencies near that of code for a specific matrix,
   several diagonal are done at once.  Handle constant diagonals as a 
   separate case (using AXPYINC definition). 

   Input Parameters:
.  nmat  - matrix (in diagonal format)
.  vin   - vector to multiply

   Output Parameters:
.  vout - result vector
@*/
void SpDMult( nmat, vin, vout )
SpMat  *nmat;
double *vin, *vout;
{
SpMatDiag      *mat = (SpMatDiag *)nmat->data;
SpDiag         *csd, **sd = mat->sd;
int            nsd = mat->nsd;
int            nd, nnr, nr, *doff, sr;
register int   rinc, dinc;    /* rinc and dinc are alias-free */
double         *dv, *v0, *v1, *v2, *v3, *v4, *vo, *d0, *d1, *d2, *d3, *d4, 
               **pdv;
double         a0, a1, a2, a3, a4;

while (nsd--) {
    csd = *sd++;
    nd  = csd->nd;
    nr  = csd->nr;
    doff= csd->doff;
    pdv = csd->v;
    rinc= csd->rinc;
    dinc= csd->dinc;
    sr  = csd->sr;

    /* Load vout with the first diagonal (could just set to zero, or do
       first couple of diagonals).  
       In order to match performance of specific routines, we should really
       do the maximum assignment, followed by incremental adds.
     */
    nnr = nr;
    vo  = vout + sr;
    v0  = vin  + *doff++;
    switch (csd->kind) {
	case DIAG_GENERAL:
	d0  = *pdv++;
	VMULTINCSET1(vo,d0,dinc,v0,rinc,nnr);
	nd--;
	
	/* Process remaining diagonals */
	while (nd > 0) {
	    nnr = nr;
	    d0  = pdv[0]; d1  = pdv[1]; d2  = pdv[2]; d3  = pdv[3];
	    d4  = pdv[4]; pdv += 5;
	    vo  = vout + sr;
	    v0  = vin  + doff[0];
	    v1  = vin  + doff[1]; v2  = vin  + doff[2];
	    v3  = vin  + doff[3]; v4  = vin  + doff[4]; doff += 5;
	    switch ((nd > 5) ? 5 : nd) {
		case 1:VMULTADDINC1(vo,d0,dinc,v0,rinc,nnr);break;
		case 2:VMULTADDINC2(vo,d0,d1,dinc,v0,v1,rinc,nnr);break;
		case 3:VMULTADDINC3(vo,d0,d1,d2,dinc,v0,v1,v2,rinc,nnr);break;
		case 4:VMULTADDINC4(vo,d0,d1,d2,d3,dinc,v0,v1,v2,v3,rinc,nnr);
		       break;
		case 5:
		VMULTADDINC5(vo,d0,d1,d2,d3,d4,dinc,v0,v1,v2,v3,v4,rinc,nnr);
		break;
		}
	    nd -= 5;
	    }
	break;

	case DIAG_UNITSTRIDE:
	/* Unit strides (actually, compile-time strides) are easier to 
	   unroll; the unit stride case is an important one */
	d0  = *pdv++; 
	VMULTSET1(vo,d0,v0,rinc,nnr);
	nd--;
	
	/* Process remaining diagonals */
	while (nd > 0) {
	    nnr = nr;
	    d0  = pdv[0]; d1  = pdv[1]; d2  = pdv[2]; d3  = pdv[3]; pdv += 4;
	    vo  = vout + sr;
	    v0  = vin  + doff[0]; v1  = vin  + doff[1];
	    v2  = vin  + doff[2]; v3  = vin  + doff[3]; doff += 4;
	    switch ((nd > 4) ? 4 : nd) {
		case 1:	VMULTADD1(vo,d0,v0,rinc,nnr);       break;
		case 2: VMULTADD2(vo,d0,d1,v0,v1,rinc,nnr); break;
		case 3: VMULTADD3(vo,d0,d1,d2,v0,v1,v2,rinc,nnr); break;
		case 4: VMULTADD4(vo,d0,d1,d2,d3,v0,v1,v2,v3,rinc,nnr);break;
		}
	    nd -= 4;
	    }
	break;

	case DIAG_CONSTANT:
	/* Diags are constant values.  Just for grins, we see if we
	   can do 5 diagonals first (try and reduce stores) */
	dv  = (double *)pdv;
	if (nd >= 5) {
	    nnr = nr;
	    a0  = dv[0]; a1 = dv[1]; a2 = dv[2]; a3 = dv[3]; a4 = dv[4]; 
	    dv += 5;
	    v1  = vin  + doff[0];
	    v2  = vin  + doff[1]; v3  = vin  + doff[2]; v4 = vin + doff[3];
	    doff += 4; /* just to confuse things, v0 has been set */
	    while (nnr--) {
		*vo = a0 * *v0 + a1 * *v1 + a2 * *v2 + a3 * *v3 + a4 * *v4;
		v0 += rinc; v1 += rinc; v2 += rinc; v3 += rinc; v4 += rinc;
		vo += rinc;
		}
	    nd -= 5;
	    }
	else {
	    a0 = *dv++;
	    VSCALINC(vo,a0,v0,rinc,nnr);
	    nd--;
	    }
	
	/* Process remaining diagonals */
	while (nd > 0) {
	    nnr = nr;
	    a0  = dv[0]; a1 = dv[1]; a2 = dv[2]; a3 = dv[3]; dv += 4;
	    vo  = vout + sr;
	    v0  = vin  + doff[0]; v1  = vin  + doff[1];
	    v2  = vin  + doff[2]; v3  = vin  + doff[3]; doff += 4;
	    switch ((nd > 4) ? 4 : nd) {
		case 1:APXYINC(vo,a0,v0,nnr,rinc);break;
		case 2:APXY2INC(vo,a0,a1,v0,v1,nnr,rinc);break;
		case 3:APXY3INC(vo,a0,a1,a2,v0,v1,v2,nnr,rinc);break;
 		case 4:APXY4INC(vo,a0,a1,a2,a3,v0,v1,v2,v3,nnr,rinc);break;
		}
	    nd -= 4;
	    }
	break;
	
	case DIAG_USER:
	break;
	}
    }
}

/*@
   SpDAllocateBase - Allocate the basic descriptor (diagonal format)

   Input Parameters:
.  sr - starting row
.  nr - number of rows
.  rinc - increment between rows
.  dinc - increment between diagonals
 @*/
SpDiag *SpDAllocateBase( sr, nr, rinc, dinc )
int sr, nr, rinc, dinc;
{
SpDiag *sd;

sd       = NEW(SpDiag);   CHKPTRV(sd,0);
sd->kind = DIAG_UNKNOWN;
sd->sr   = sr;
sd->nr   = nr;
sd->rinc = rinc;
sd->dinc = dinc;
sd->nd   = 0;
sd->doff = (int *)    0;
sd->v    = (double **) 0;
return sd;
}

/*@
  SpDAllocateBlock - Allocate and return a sparse diagonal block.  
  Do not allocate space for diagonals.

   Input Parameters:
.  sr - starting row
.  nr - number of rows
.  rinc - increment between rows
.  dinc - increment between diagonals
.  nd   - number of diagonals
.  kind - kind of diagonal structure (DIAG_CONSTANT or DIAG_UNKNOWN)
 @*/
SpDiag *SpDAllocateBlock( sr, nr, rinc, dinc, nd, kind )
int sr, nr, rinc, dinc, nd;
DIAGTYPE kind;
{
SpDiag *sd;

sd       = SpDAllocateBase( sr, nr, rinc, dinc );       CHKERRN(0);
sd->kind = kind;
sd->nd   = nd;
/* A single allocation would be a better idea */
sd->doff = (int *)MALLOC( nd * sizeof(int) );           CHKPTRV(sd->doff,0);
if (kind == DIAG_CONSTANT) 
    /* Just store the values directly */
    sd->v = (double **)MALLOC( nd * sizeof(double) );
else 
    sd->v = (double **)MALLOC( nd * sizeof(double *) );
CHKPTRV(sd->v,0);
return sd;
}

/*@
   SpDCreate - Allocate a matrix defined by diagonals

   Input Parameters:
.  n   - number of rows
.  m   - number of columns
.  nsd - number of diagonal blocks
 @*/
SpMat *SpDCreate( n, m, nsd )
int n, m, nsd;
{
SpMatDiag *mat;
SpMat     *nmat;

mat      = NEW(SpMatDiag);      CHKPTRV(mat,0);
mat->nsd = nsd;

/* Allocate the sparse diagonals */
if (nsd > 0) {
    mat->sd  = (SpDiag **)MALLOC( nsd * sizeof(SpDiag) );
    CHKPTRV(mat->sd,0);
    }
else
    mat->sd = 0;

/* Allocate and initialize the SpMat structure (there should be a separate
   routine for this) */
nmat                 = NEW(SpMat);         CHKPTRV(nmat,0);
nmat->type           = MATDIAG;
nmat->ops            = &_SpDiagOps;
nmat->rows           = n;
nmat->cols           = m;
nmat->is_sorted      = 0;
nmat->element_length = 8;
nmat->nz             = 0;
nmat->pool.pool.ptr  = 0;
nmat->pool.pool.n    = 0;
nmat->pool.pool.next = 0;
nmat->pool.alloc     = 0;
nmat->pool.free      = 0;
nmat->alloc_together = 0;
nmat->alloc_incr     = 0;
nmat->map            = 0;
nmat->data           = (void *)mat;

return nmat;
}

/*
   Need routine to set?  Leave the up to the (sophisticated) user
   For a rectangular grid (ni x nj with nc components), we
   need (how many) blocks?
   nj blocks of size ni (exploit zeros at beginning/end of each row
   to simplify)
   2*(nc-1) blocks for the 
 */

/*@
  SpDDestroy - Free storage for a diagonal matrix 

  Input Parameter:
. nmat - diagonal format matrix to free
 @*/
void SpDDestroy( nmat )
SpMat     *nmat;
{
SpMatDiag *mat = (SpMatDiag *)nmat->data;
int       nsd = mat->nsd;
SpDiag    **sd = mat->sd;

/* Free the actual diagonals */
while (nsd--) {
    FREE( (*sd)->v );
    FREE( (*sd)->doff );
    FREE( *sd++ );
    }

/* Free the indirect to the diagonals and the matrix itself */
FREE( mat->sd );
FREE( mat );
FREE( nmat );
}
