C SCCSID = "@(#)progdefs.inc    1.1 9/21/87"
C Depending on switches set in the ARFOR compiler, various versions of
C the FORTRAN source code will be written.  The three possibilities are
C a "utilization" version, with timing information only;
C a "monitoring" version, which includes timing and key iterates;
C and a "debugging" version, which includes communication monitoring
C and more iterate information.
C To do FORTRAN debugging using the VAX compiler and linker, a VAXONLY
C ARFOR switch is also included.
C An ARFOR definition file is included which contains all:
C (3) array declarators
C     (maximum of 31 interior grid points in each direction)
C     (maximum of 64 processors in square array)
C (4) boundary type indicators
C (5) direction codes
C (6) miscellaneous real constants
C (7) convergence flags
C====================================================================
      SUBROUTINE PCGMR( N, NC, NX, NY, SX, EX, SY, EY, M, M1, SOL, RHS,
     * EPSREL, EPSABS, MATOP, OPTPRE, COUPLE, ORIENT, VV, HH, CC, SS, RS
     *, ITS, MSOL, SOLPL, HES, BETA, FLGSTP )
C--------------------------------------------------------------------
C PCGMR is a generalized minimum residual technique (into which
C left-preconditioning can be coordinated through the user-supplied
C subroutine MATOP and the user-supplied input argument RHS).
C The system to be solved is:
C                            A x = b,
C in the form
C                            M x = f,
C where M = B^(-1) A, f = B^(-1)b, and B is a preconditioner.
C Its output consists of the solution vector, and working variables
C of the Krylov method (from which convergence data can be obtained,
C as well as an estimate of the spectrum of the (preconditioned)
C operator.
C--------------------------------------------------------------------
C Arguments supplied on input:
C       N       system size
C [ Parallel Version: N is roughly the true size divided by P ]
C      (SX,EX) x (SY,EY)    part of problem to do on this processor
C       M       maximum Krylov subspace dimension
C [ Parallel Version: M and related quantities are full-size ]
C       M1      maximum Krylov subspace dimension plus one
C       SOL     initial iterate of solution vector
C       RHS     vector of right-hand sides of linear system
C [ Parallel Version: SOL and RHS are the local pieces, and below ]
C       EPSREL  relative convergence tolerance
C       EPSABS  absolute convergence tolerance
C       MATOP   user-supplied matrix operator subroutine
C       (optpre,couple) parms to matop
C [ Parallel Version: AMULT and BSOLV take place within MATOP ]
C Arguments available on output:
C       FLGSTP  termination code
C       SOL     final iterate of solution vector
C       VV      matrix of Krylov subspace vectors
C [ Parallel Version: VV is the local piece ]
C       HH      matrix of coefficients of solution with respect to V
C       CC      vector of cosines for QR least squares factorization
C       SS      vector of sines for QR least squares factorization
C       RS      vector of right-hand sides for least squares problem
C       ITS     counter for total iterations (incl. possible restarts)
C Other arguments:
C       MSOL    working vector
C       SOLPL   working vector
C       HES     Hessenberg matrix for Jacobian eigenvalue estimation
C       BETA    residual norm for Jacobian eigenvalue estimation
C       FLGSTP  stopping flag
C--------------------------------------------------------------------
      INTEGER  N, NC, NX, NY, SX, EX, SY, EY, M, M1, ITS, OPTPRE, COUPLE
     *, ORIENT
      REAL*8   EPSREL, EPSABS, BETA
      REAL*8   SOL(N), RHS(N), VV(N,M1), HH(M1,M), CC(M), SS(M), RS(M1),
     * MSOL(N), SOLPL(N), HES(M,M)
C for bug in f77
      INTEGER  MATOP
      EXTERNAL MATOP
C private data for parallel code
      INTEGER CMNP, CMNY, CMNX, CMMYID, BOUND(4)
      COMMON  /PRIATE/ CMNP, CMNY, CMNX, CMMYID, BOUND
C for parallelism
C--------------------------------------------------------------------
C Local variables:
C       I, I1, II,
C        J, K, K1   loop indices
C       HAPEND      flag for happy breakdown of inner iteration
C       HAPBND      tolerance for happy breakdown of inner iteration
C       HAPTOL      used in calculating HAPBND
C       IERR        error flag from eigenvalue routine RG
C       RESNRM      2-norm of current (preconditioned) residual
C       TT          auxiliary scalar
C--------------------------------------------------------------------
      INTEGER I, I1, II, J, JJ, K, K1, HAPEND, FLGSTP, IERR
      REAL*8  RESNRM, TT, HAPBND, HAPTOL
      SAVE    HAPTOL
C--------------------------------------------------------------------
C BLAS subroutine function
C--------------------------------------------------------------------
      REAL*8   DPDOT
C--------------------------------------------------------------------
      DATA    HAPTOL/1.0D-10/
C initialization phase----------------------------------------------\
C zero iteration counters (two, to later include restart possibility)
      I         = 0
      ITS       = 0
C accumulate in VV(.,1) the initial residual, f - M * x
      CALL MATOP( OPTPRE, COUPLE, ORIENT, SX, EX, SY, EY, SOL, MSOL )
      CALL MCOPY( NC, NX, NY, RHS, VV, SX, EX, SY, EY )
      CALL MAXPY( NC, NX, NY, -1.0D0, MSOL, VV, SX, EX, SY, EY )
      RESNRM = DSQRT( DPDOT( VV, VV, NC, NX, NY, SX, EX, SY, EY ) )
C test for satisfaction of the outer loop stopping criteria
      CALL TSTCNV( RESNRM, EPSREL, EPSABS, I, M, FLGSTP )
      IF (CMMYID .EQ. 0) THEN
          CALL TSTPRT( 1, I, ITS, FLGSTP, N, RESNRM, SOL, IERR )
      ENDIF
      IF ( FLGSTP .NE. 0 ) THEN
          RETURN
      ENDIF
      RS(1)     = RESNRM
      CALL MSCAL( NC, NX, NY, 1.0D0 / RESNRM, VV, SX, EX, SY, EY )
C end initialization phase------------------------------------------/
C main loop---------------------------------------------------------\
      DO 1 I = 1, M
C increment other counters commensurately with Krylov index, I
          ITS = ITS + 1
          I1  = I + 1
C compute A*VV(.,i), stored temporarily in VV(.,i+1)
          CALL MATOP( OPTPRE, COUPLE, ORIENT, SX, EX, SY, EY, VV(1,I),
     *     VV(1,I1) )
C update Hessenberg matrix and do Gram-Schmidt to get VV(.,i+1)
          DO 3 J = 1, I
              HH(J,I)    = DPDOT( VV(1,J), VV(1,I1), NC, NX, NY, SX, EX,
     *         SY, EY )
              HES(J,I)   = HH(J,I)
              CALL MAXPY( NC, NX, NY, -HH(J,I), VV(1,J), VV(1,I1), SX,
     *         EX, SY, EY )
3         CONTINUE
          TT = DSQRT( DPDOT( VV(1,I1), VV(1,I1), NC, NX, NY, SX, EX, SY,
     *     EY ) )
C check for the happy breakdown
          HAPBND  = DMIN1( EPSABS * DABS( HH(I,I)/RS(I) ), HAPTOL )
          IF ( TT .GT. HAPBND ) THEN
              HH(I1,I)  = TT
              HES(I1,I) = HH(I1,I)
              BETA      = HES(I1,I)
              CALL MSCAL( NC, NX, NY, 1.0D0 / TT, VV(1,I1), SX, EX, SY,
     *         EY )
          ELSE
              HAPEND = 1
          ENDIF
C apply all the previously computed plane rotations to the
C new column of the Hessenberg matrix
          IF ( I .GT. 1 ) THEN
              DO 5 K = 2, I
                  K1       = K - 1
                  TT       = HH(K1,I)
                  HH(K1,I) = CC(K1)*TT + SS(K1)*HH(K,I)
                  HH(K,I)  = -SS(K1)*TT + CC(K1)*HH(K,I)
5             CONTINUE
C compute the new plane rotation, and apply it to:
C (1) the right hand side of the Hessenberg system, and
C (2) the new column of the Hessenberg matrix,
C thus obtaining the updated value of the residual
          ENDIF
          TT      = DSQRT( HH(I,I)**2 + HH(I1,I)**2 )
          CC(I)   = HH(I,I) / TT
          SS(I)   = HH(I1,I) / TT
          RS(I1)  = -SS(I) * RS(I)
          RS(I)   = CC(I) * RS(I)
          HH(I,I) = CC(I) * HH(I,I) + SS(I) * HH(I1,I)
          RESNRM  = DABS( RS(I1) )
C test for convergence and allow for output
          CALL TSTCNV( RESNRM, EPSREL, EPSABS, I, M, FLGSTP )
          IF (CMMYID .EQ. 0) THEN
              CALL TSTPRT( 1, I, ITS, FLGSTP, N, RESNRM, SOL, IERR )
          ENDIF
          IF ( FLGSTP .LT. 0 ) THEN
              RETURN
          ENDIF
          IF ( FLGSTP .GT. 0 ) THEN
              GOTO 2
          ENDIF
1     CONTINUE
2     CONTINUE
C end main loop-----------------------------------------------------/
C termination phase-------------------------------------------------\
C solve for the "best" coefficients of the Krylov columns
      RS(I)    = RS(I) / HH(I,I)
      IF ( I .GT. 1 ) THEN
          DO 7 II = 2, I
              K     = I - II + 1
              K1    = K + 1
              TT    = RS(K)
              DO 9 J = K1, I
                  TT  = TT - HH(K,J) * RS(J)
9             CONTINUE
              RS(K) = TT / HH(K,K)
7         CONTINUE
C accumulate appropriate linear combination of Krylov vectors in SOLPL
      ENDIF
      CALL MSET( NC, NX, NY, SOLPL, 0.0D0, SX, EX, SY, EY )
      DO 11 J = 1, I
          CALL MAXPY( NC, NX, NY, RS(J), VV(1,J), SOLPL, SX, EX, SY, EY
     *     )
11    CONTINUE
C increment initial iterate in SOL
      CALL MAXPY( NC, NX, NY, 1.0D0, SOLPL, SOL, SX, EX, SY, EY )
C unwind preconditioning if necessary
      CALL MPOST( OPTPRE, COUPLE, ORIENT, SX, EX, SY, EY, SOL )
C print results
      CALL SYNC()
C since we access all subdomains
      IF (CMMYID .EQ. 0) THEN
          CALL TSTPRT( 2, I, ITS, FLGSTP, N, RESNRM, SOL, IERR )
C end termination phase---------------------------------------------/
      ENDIF
      RETURN
      END
C======================================================================
      SUBROUTINE TSTCNV( RES, EPSREL, EPSABS, I, M, FLGSTP )
C--------------------------------------------------------------------
C TSTCNV tests for convergence of the GMRES iterations by means of
C a combined relative-absolute tolerance on the (preconditioned)
C residual.  When called with I=0, it sets the ultimate tolerance
C based on the initial residual.  On this and subsequent calls,
C convergence is signalled by returning a "1" in flgstp, and a large
C residual (indicative of poor scaling relative to the machine or
C of divergence) is signalled with a "-1".  Exhaustion of workspace
C for the Krylov vectors is signalled with a "-2".
C
C (Note that GMRES cannot diverge in exact arithmetic, but TSTCNV
C is also a debugging tool.)
C--------------------------------------------------------------------
C Arguments on input:
C       RES     norm of current residual
C       EPSREL  relative error tolerance
C       EPSABS  absolute error tolerance
C       I       current iteration
C       M       maximum number of Krylov vectors allowed for
C Argument on output:
C       FLGSTP  control flag
C        (0 continue, 1 normal termination, -1 abnormal termination)
C--------------------------------------------------------------------
      INTEGER I, M, FLGSTP
      REAL*8  RES, EPSREL, EPSABS
C--------------------------------------------------------------------
C Internal declarations
C--------------------------------------------------------------------
      REAL*8  EPS, DIVRG
      SAVE    EPS, DIVRG
      DATA    DIVRG/1.0D30/
C--------------------------------------------------------------------
C set flag to continue iterations by default
      FLGSTP = 0
C set combined relative-absolute residual threshhold
      IF (I .EQ. 0) THEN
          EPS = DMAX1( EPSREL*RES, EPSABS )
      ENDIF
      IF    ( RES .LT. EPS ) THEN
          FLGSTP = 1
      ELSEIF    ( RES .GT. DIVRG ) THEN
          FLGSTP = -1
      ELSEIF    ( I .EQ. M ) THEN
          FLGSTP = -2
      ENDIF
      RETURN
      END
C=====================================================================
      SUBROUTINE TSTPRT( MODE, I, ITS, FLGSTP, N, RESNRM, SOL, IERR )
C--------------------------------------------------------------------
C TSTPRT is a debugging routine which handles all of the output of
C PCGMR.  It is called once per inner iteration of PCGMR.
C [ Parallel Version: This routine is modified considerably ]
C--------------------------------------------------------------------
C Arguments on input:
C       MODE     print code
C                 ("1" residual data)
C       I        dimension of Krylov subspace
C       ITS      overall iteration counter
C       FLGSTP   stopping flag from TSTCNV
C       N        dimension of problem
C       RESNRM   norm of preconditioned residual at present iteration
C       SOL      current solution vector
C       IERR     error code from RG, for incomplete spectrum
C--------------------------------------------------------------------
      INTEGER  MODE, I, ITS, FLGSTP, N, IERR
      REAL*8   RESNRM, SOL(*)
C--------------------------------------------------------------------
C Local variables:
C--------------------------------------------------------------------
      INTEGER  J, COUNT
C--------------------------------------------------------------------
      IF (MODE .LE. 2) THEN
          IF (FLGSTP .EQ. 1) THEN
              WRITE(6,2001) I, RESNRM
          ENDIF
          IF (FLGSTP .EQ. 0) THEN
              WRITE(6,2002) I, RESNRM
          ENDIF
          IF (FLGSTP .LT. 0) THEN
              WRITE(6,2003) I, RESNRM, FLGSTP
          ENDIF
      ENDIF
      IF (MODE .EQ. 2) THEN
          DO 1 J = N/2-5, N/2+5
              WRITE(6,1001) J, SOL(J)
1         CONTINUE
      ENDIF
      RETURN
1001  FORMAT(I6,3E15.6)
2001  FORMAT(/' PCGMR finished at step', I4, ' res. norm =', E15.6, /
     *' INDEX       SOLUTION')
2002  FORMAT(' After PCGMR step', I4, ' res. norm = ', E15.6)
2003  FORMAT(/' PCGMR failed at step ', I4, ' res. norm = ', E15.6, /
     *' Stopping Flag =', I4)
      END
C====================================================================
C PRIVATE = PRIATE
