#include "easy.h"

#define TRANSP transp_
#define INIT init_

#if defined(IMA_CRAY) || defined(IMA_CRAY2)
#undef TRANSP
#undef INIT
#endif

#if defined(IMA_RS6K) || defined(IMA_HPPA)
#undef TRANSP
#undef INIT
#define TRANSP transp
#define INIT init
#endif

#define Usage "transp matrix_sub_blocksize_R\n"

#define A(i,j,k) a[(k)*blksize + (j)*R + (i)]

static
void check_print(s,a,R)
     char *s;
     double *a; 
     int R;
{
  int blksize = R*R;

  if (blksize < 10) {
    int nnodes = numnodes();
    int me = mynode();
    int i, j, k;
    int you;
    char hostname[64];

    gethostname(hostname,64);
    
    for (you=0; you<nnodes; you++) {

      if (me == you) {
	printf("%s node#%d@%s:",s ? s : "",me,hostname);
	for (i=0; i<R; i++) {
	  int lf=1;
	  for (k=0; k<nnodes; k++) {
	    for (j=0; j<R; j++) {
	      printf("%c%4.1lf",lf ?'\n':' ',A(i,j,k));
	      lf=0;
	    }
	    printf(" : ");
	  }
	}
	printf("\n");
      }

      gsync();

    } /* for you */
  } /* if blksize ... */
}


main(argc, argv)
     int argc;
     char *argv[];
/* Driver and memory allocator program for matrix transpose */
{
  int R = 0;
  int nnodes = numnodes();
  int me = mynode();
  double *a = NULL, *atrans = NULL;
  double t1, t2, tmin, tmax, tdiff;
  int wheremin, wheremax;
  int i;

  if (argc <= 1) {
    if (me == 0) printf(Usage);
    exit(1);
  }

  R = atoi(argv[1]);

  if (R <= 0) {
    if (me == 0) printf("transp: blocksize_dim_R=%d less than zero\n",R);
    exit(2);
  }

  a = (double *)malloc( R * R * nnodes * sizeof(*a) );
  atrans = (double *)malloc( R * R * nnodes * sizeof(*atrans) );

  if (!a || !atrans) {
    perror("malloc()");
    exit(3);
  }

  INIT(a,atrans,&R);

  check_print("A@start",a,R);

  gsync();
  gsync();

  t1 = dclock();
  TRANSP(a,&R,atrans);
  t2 = dclock();
  tdiff = t2 - t1;

  check_print("trans(A)@end",atrans,R);

  setdatatype(REAL8); /* Safer */
  gmax(&tdiff,1,&tmax,&wheremax);
  gmin(&tdiff,1,&tmin,&wheremin);

  if (me == 0) {
    printf("TRANSP of %dx%d-matrix (#%d): %.3lfms (@%d) (%.3lfms@%d)\n",
	   R * nnodes, R * nnodes, 
	   nnodes,
	   tmax * 1000.0, wheremax, 
	   tmin * 1000.0, wheremin);
  }

  exit(0);
}
