/* ----------------------------------------------------------------------- */
/* Subroutine */ int psneupd_(integer *comm, logical *rvec, char *howmny, 
	logical *select, real *dr, real *di, real *z__, integer *ldz, real *
	sigmar, real *sigmai, real *workev, char *bmat, integer *n, char *
	which, integer *nev, real *tol, real *resid, integer *ncv, real *v, 
	integer *ldv, integer *iparam, integer *ipntr, real *workd, real *
	workl, integer *lworkl, integer *info, ftnlen howmny_len, ftnlen 
	bmat_len, ftnlen which_len)
{
    /* System generated locals */
    integer v_dim1, v_offset, z_dim1, z_offset, i__1;
    real r__1, r__2;
    doublereal d__1;

    /* Builtin functions */
    double pow_dd(doublereal *, doublereal *);
    integer s_cmp(char *, char *, ftnlen, ftnlen);
    /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);

    /* Local variables */
    static integer j, k, ih, jj, np;
    static real vl[1]	/* was [1][1] */;
    static integer ibd, ldh, ldq, iri;
    static real sep;
    static integer irr, wri, wrr, mode;
    static real eps23;
    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 
	    integer *, real *, integer *, real *, integer *);
    static integer ierr;
    static real temp;
    static integer iwev;
    static char type__[6];
    static real temp1;
    extern doublereal snrm2_(integer *, real *, integer *);
    static integer ihbds, iconj;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static real conds;
    static logical reord;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *, 
	    ftnlen);
    static integer nconv, iwork[1];
    static real rnorm;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    static integer ritzi;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
	    , ftnlen, ftnlen, ftnlen, ftnlen);
    static integer ritzr;
    extern /* Subroutine */ int sgeqr2_(integer *, integer *, real *, integer 
	    *, real *, real *, integer *);
    extern doublereal slapy2_(real *, real *);
    extern /* Subroutine */ int sorm2r_(char *, char *, integer *, integer *, 
	    integer *, real *, integer *, real *, real *, integer *, real *, 
	    integer *, ftnlen, ftnlen);
    static integer iheigi, iheigr, bounds, invsub, iuptri, msglvl, outncv, 
	    ishift, numcnv;
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *, ftnlen), slahqr_(logical *, logical 
	    *, integer *, integer *, integer *, real *, integer *, real *, 
	    real *, integer *, integer *, real *, integer *, integer *), 
	    slaset_(char *, integer *, integer *, real *, real *, real *, 
	    integer *, ftnlen), psmout_(integer *, integer *, integer *, 
	    integer *, real *, integer *, integer *, char *, ftnlen), strevc_(
	    char *, char *, logical *, integer *, real *, integer *, real *, 
	    integer *, real *, integer *, integer *, integer *, real *, 
	    integer *, ftnlen, ftnlen), strsen_(char *, char *, logical *, 
	    integer *, real *, integer *, real *, integer *, real *, real *, 
	    integer *, real *, real *, real *, integer *, integer *, integer *
	    , integer *, ftnlen, ftnlen), psvout_(integer *, integer *, 
	    integer *, real *, integer *, char *, ftnlen), pivout_(integer *, 
	    integer *, integer *, integer *, integer *, char *, ftnlen);
    extern doublereal pslamch_(integer *, char *, ftnlen);
    extern /* Subroutine */ int psngets_(integer *, integer *, char *, 
	    integer *, integer *, real *, real *, real *, real *, real *, 
	    ftnlen);


/*     %--------------------% */
/*     | MPI  Communicator | */
/*     %--------------------% */


/*     %----------------------------------------------------% */
/*     | Include files for debugging and timing information | */
/*     %----------------------------------------------------% */


/* \SCCS Information: @(#) */
/* FILE: debug.h   SID: 2.3   DATE OF SID: 11/16/95   RELEASE: 2 */

/*     %---------------------------------% */
/*     | See debug.doc for documentation | */
/*     %---------------------------------% */

/*     %------------------% */
/*     | Scalar Arguments | */
/*     %------------------% */

/*     %--------------------------------% */
/*     | See stat.doc for documentation | */
/*     %--------------------------------% */

/* \SCCS Information: @(#) */
/* FILE: stat.h   SID: 2.2   DATE OF SID: 11/16/95   RELEASE: 2 */



/*     %-----------------% */
/*     | Array Arguments | */
/*     %-----------------% */


/*     %------------% */
/*     | Parameters | */
/*     %------------% */


/*     %---------------% */
/*     | Local Scalars | */
/*     %---------------% */


/*     %----------------------% */
/*     | External Subroutines | */
/*     %----------------------% */


/*     %--------------------% */
/*     | External Functions | */
/*     %--------------------% */


/*     %---------------------% */
/*     | Intrinsic Functions | */
/*     %---------------------% */


/*     %-----------------------% */
/*     | Executable Statements | */
/*     %-----------------------% */

/*     %------------------------% */
/*     | Set default parameters | */
/*     %------------------------% */

    /* Parameter adjustments */
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1;
    z__ -= z_offset;
    --workd;
    --resid;
    --di;
    --dr;
    --workev;
    --select;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    --iparam;
    --ipntr;
    --workl;

    /* Function Body */
    msglvl = debug_1.mneupd;
    mode = iparam[7];
    nconv = iparam[5];
    *info = 0;

/*     %---------------------------------% */
/*     | Get machine dependent constant. | */
/*     %---------------------------------% */

    eps23 = pslamch_(comm, "Epsilon-Machine", (ftnlen)15);
    d__1 = (doublereal) eps23;
    eps23 = pow_dd(&d__1, &c_b3);

/*     %--------------% */
/*     | Quick return | */
/*     %--------------% */

    ierr = 0;

    if (nconv <= 0) {
	ierr = -14;
    } else if (*n <= 0) {
	ierr = -1;
    } else if (*nev <= 0) {
	ierr = -2;
    } else if (*ncv <= *nev + 1) {
	ierr = -3;
    } else if (s_cmp(which, "LM", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, 
	    "SM", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, "LR", (ftnlen)2, 
	    (ftnlen)2) != 0 && s_cmp(which, "SR", (ftnlen)2, (ftnlen)2) != 0 
	    && s_cmp(which, "LI", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, 
	    "SI", (ftnlen)2, (ftnlen)2) != 0) {
	ierr = -5;
    } else if (*(unsigned char *)bmat != 'I' && *(unsigned char *)bmat != 'G')
	     {
	ierr = -6;
    } else /* if(complicated condition) */ {
/* Computing 2nd power */
	i__1 = *ncv;
	if (*lworkl < i__1 * i__1 * 3 + *ncv * 6) {
	    ierr = -7;
	} else if (*(unsigned char *)howmny != 'A' && *(unsigned char *)
		howmny != 'P' && *(unsigned char *)howmny != 'S' && *rvec) {
	    ierr = -13;
	} else if (*(unsigned char *)howmny == 'S') {
	    ierr = -12;
	}
    }

    if (mode == 1 || mode == 2) {
	s_copy(type__, "REGULR", (ftnlen)6, (ftnlen)6);
    } else if (mode == 3 && *sigmai == 0.f) {
	s_copy(type__, "SHIFTI", (ftnlen)6, (ftnlen)6);
    } else if (mode == 3) {
	s_copy(type__, "REALPT", (ftnlen)6, (ftnlen)6);
    } else if (mode == 4) {
	s_copy(type__, "IMAGPT", (ftnlen)6, (ftnlen)6);
    } else {
	ierr = -10;
    }
    if (mode == 1 && *(unsigned char *)bmat == 'G') {
	ierr = -11;
    }

/*     %------------% */
/*     | Error Exit | */
/*     %------------% */

    if (ierr != 0) {
	*info = ierr;
	goto L9000;
    }

/*     %--------------------------------------------------------% */
/*     | Pointer into WORKL for address of H, RITZ, BOUNDS, Q   | */
/*     | etc... and the remaining workspace.                    | */
/*     | Also update pointer to be used on output.              | */
/*     | Memory is laid out as follows:                         | */
/*     | workl(1:ncv*ncv) := generated Hessenberg matrix        | */
/*     | workl(ncv*ncv+1:ncv*ncv+2*ncv) := real and imaginary   | */
/*     |                                   parts of ritz values | */
/*     | workl(ncv*ncv+2*ncv+1:ncv*ncv+3*ncv) := error bounds   | */
/*     %--------------------------------------------------------% */

/*     %-----------------------------------------------------------% */
/*     | The following is used and set by PSNEUPD.                 | */
/*     | workl(ncv*ncv+3*ncv+1:ncv*ncv+4*ncv) := The untransformed | */
/*     |                             real part of the Ritz values. | */
/*     | workl(ncv*ncv+4*ncv+1:ncv*ncv+5*ncv) := The untransformed | */
/*     |                        imaginary part of the Ritz values. | */
/*     | workl(ncv*ncv+5*ncv+1:ncv*ncv+6*ncv) := The untransformed | */
/*     |                           error bounds of the Ritz values | */
/*     | workl(ncv*ncv+6*ncv+1:2*ncv*ncv+6*ncv) := Holds the upper | */
/*     |                             quasi-triangular matrix for H | */
/*     | workl(2*ncv*ncv+6*ncv+1: 3*ncv*ncv+6*ncv) := Holds the    | */
/*     |       associated matrix representation of the invariant   | */
/*     |       subspace for H.                                     | */
/*     | GRAND total of NCV * ( 3 * NCV + 6 ) locations.           | */
/*     %-----------------------------------------------------------% */

    ih = ipntr[5];
    ritzr = ipntr[6];
    ritzi = ipntr[7];
    bounds = ipntr[8];
    ldh = *ncv;
    ldq = *ncv;
    iheigr = bounds + ldh;
    iheigi = iheigr + ldh;
    ihbds = iheigi + ldh;
    iuptri = ihbds + ldh;
    invsub = iuptri + ldh * *ncv;
    ipntr[9] = iheigr;
    ipntr[10] = iheigi;
    ipntr[11] = ihbds;
    ipntr[12] = iuptri;
    ipntr[13] = invsub;
    wrr = 1;
    wri = *ncv + 1;
    iwev = wri + *ncv;

/*     %-----------------------------------------% */
/*     | irr points to the REAL part of the Ritz | */
/*     |     values computed by _neigh before    | */
/*     |     exiting _naup2.                     | */
/*     | iri points to the IMAGINARY part of the | */
/*     |     Ritz values computed by _neigh      | */
/*     |     before exiting _naup2.              | */
/*     | ibd points to the Ritz estimates        | */
/*     |     computed by _neigh before exiting   | */
/*     |     _naup2.                             | */
/*     %-----------------------------------------% */

    irr = ipntr[14] + *ncv * *ncv;
    iri = irr + *ncv;
    ibd = iri + *ncv;

/*     %------------------------------------% */
/*     | RNORM is B-norm of the RESID(1:N). | */
/*     %------------------------------------% */

    rnorm = workl[ih + 2];
    workl[ih + 2] = 0.f;

    if (msglvl > 2) {
	psvout_(comm, &debug_1.logfil, ncv, &workl[irr], &debug_1.ndigit, 
		"_neupd: Real part of Ritz values passed in from _NAUPD.", (
		ftnlen)55);
	psvout_(comm, &debug_1.logfil, ncv, &workl[iri], &debug_1.ndigit, 
		"_neupd: Imag part of Ritz values passed in from _NAUPD.", (
		ftnlen)55);
	psvout_(comm, &debug_1.logfil, ncv, &workl[ibd], &debug_1.ndigit, 
		"_neupd: Ritz estimates passed in from _NAUPD.", (ftnlen)45);
    }

    if (*rvec) {

	reord = FALSE_;

/*        %---------------------------------------------------% */
/*        | Use the temporary bounds array to store indices   | */
/*        | These will be used to mark the select array later | */
/*        %---------------------------------------------------% */

	i__1 = *ncv;
	for (j = 1; j <= i__1; ++j) {
	    workl[bounds + j - 1] = (real) j;
	    select[j] = FALSE_;
/* L10: */
	}

/*        %-------------------------------------% */
/*        | Select the wanted Ritz values.      | */
/*        | Sort the Ritz values so that the    | */
/*        | wanted ones appear at the tailing   | */
/*        | NEV positions of workl(irr) and     | */
/*        | workl(iri).  Move the corresponding | */
/*        | error estimates in workl(bound)     | */
/*        | accordingly.                        | */
/*        %-------------------------------------% */

	np = *ncv - *nev;
	ishift = 0;
	psngets_(comm, &ishift, which, nev, &np, &workl[irr], &workl[iri], &
		workl[bounds], &workl[1], &workl[np + 1], (ftnlen)2);

	if (msglvl > 2) {
	    psvout_(comm, &debug_1.logfil, ncv, &workl[irr], &debug_1.ndigit, 
		    "_neupd: Real part of Ritz values after calling _NGETS.", 
		    (ftnlen)54);
	    psvout_(comm, &debug_1.logfil, ncv, &workl[iri], &debug_1.ndigit, 
		    "_neupd: Imag part of Ritz values after calling _NGETS.", 
		    (ftnlen)54);
	    psvout_(comm, &debug_1.logfil, ncv, &workl[bounds], &
		    debug_1.ndigit, "_neupd: Ritz value indices after callin"
		    "g _NGETS.", (ftnlen)48);
	}

/*        %-----------------------------------------------------% */
/*        | Record indices of the converged wanted Ritz values  | */
/*        | Mark the select array for possible reordering       | */
/*        %-----------------------------------------------------% */

	numcnv = 0;
	i__1 = *ncv;
	for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
	    r__1 = eps23, r__2 = slapy2_(&workl[irr + *ncv - j], &workl[iri + 
		    *ncv - j]);
	    temp1 = dmax(r__1,r__2);
	    jj = workl[bounds + *ncv - j];
	    if (numcnv < nconv && workl[ibd + jj - 1] <= *tol * temp1) {
		select[jj] = TRUE_;
		++numcnv;
		if (jj > *nev) {
		    reord = TRUE_;
		}
	    }
/* L11: */
	}

/*        %-----------------------------------------------------------% */
/*        | Check the count (numcnv) of converged Ritz values with    | */
/*        | the number (nconv) reported by dnaupd.  If these two      | */
/*        | are different then there has probably been an error       | */
/*        | caused by incorrect passing of the dnaupd data.           | */
/*        %-----------------------------------------------------------% */

	if (msglvl > 2) {
	    pivout_(comm, &debug_1.logfil, &c__1, &numcnv, &debug_1.ndigit, 
		    "_neupd: Number of specified eigenvalues", (ftnlen)39);
	    pivout_(comm, &debug_1.logfil, &c__1, &nconv, &debug_1.ndigit, 
		    "_neupd: Number of \"converged\" eigenvalues", (ftnlen)41)
		    ;
	}

	if (numcnv != nconv) {
	    *info = -15;
	    goto L9000;
	}

/*        %-----------------------------------------------------------% */
/*        | Call LAPACK routine slahqr to compute the real Schur form | */
/*        | of the upper Hessenberg matrix returned by PSNAUPD.       | */
/*        | Make a copy of the upper Hessenberg matrix.               | */
/*        | Initialize the Schur vector matrix Q to the identity.     | */
/*        %-----------------------------------------------------------% */

	i__1 = ldh * *ncv;
	scopy_(&i__1, &workl[ih], &c__1, &workl[iuptri], &c__1);
	slaset_("All", ncv, ncv, &c_b37, &c_b38, &workl[invsub], &ldq, (
		ftnlen)3);
	slahqr_(&c_true, &c_true, ncv, &c__1, ncv, &workl[iuptri], &ldh, &
		workl[iheigr], &workl[iheigi], &c__1, ncv, &workl[invsub], &
		ldq, &ierr);
	scopy_(ncv, &workl[invsub + *ncv - 1], &ldq, &workl[ihbds], &c__1);

	if (ierr != 0) {
	    *info = -8;
	    goto L9000;
	}

	if (msglvl > 1) {
	    psvout_(comm, &debug_1.logfil, ncv, &workl[iheigr], &
		    debug_1.ndigit, "_neupd: Real part of the eigenvalues of"
		    " H", (ftnlen)41);
	    psvout_(comm, &debug_1.logfil, ncv, &workl[iheigi], &
		    debug_1.ndigit, "_neupd: Imaginary part of the Eigenvalu"
		    "es of H", (ftnlen)46);
	    psvout_(comm, &debug_1.logfil, ncv, &workl[ihbds], &
		    debug_1.ndigit, "_neupd: Last row of the Schur vector ma"
		    "trix", (ftnlen)43);
	    if (msglvl > 3) {
		psmout_(comm, &debug_1.logfil, ncv, ncv, &workl[iuptri], &ldh,
			 &debug_1.ndigit, "_neupd: The upper quasi-triangula"
			"r matrix ", (ftnlen)42);
	    }
	}

	if (reord) {

/*           %-----------------------------------------------------% */
/*           | Reorder the computed upper quasi-triangular matrix. | */
/*           %-----------------------------------------------------% */

	    strsen_("None", "V", &select[1], ncv, &workl[iuptri], &ldh, &
		    workl[invsub], &ldq, &workl[iheigr], &workl[iheigi], &
		    nconv, &conds, &sep, &workl[ihbds], ncv, iwork, &c__1, &
		    ierr, (ftnlen)4, (ftnlen)1);

	    if (ierr == 1) {
		*info = 1;
		goto L9000;
	    }

	    if (msglvl > 2) {
		psvout_(comm, &debug_1.logfil, ncv, &workl[iheigr], &
			debug_1.ndigit, "_neupd: Real part of the eigenvalue"
			"s of H--reordered", (ftnlen)52);
		psvout_(comm, &debug_1.logfil, ncv, &workl[iheigi], &
			debug_1.ndigit, "_neupd: Imag part of the eigenvalue"
			"s of H--reordered", (ftnlen)52);
		if (msglvl > 3) {
		    psmout_(comm, &debug_1.logfil, ncv, ncv, &workl[iuptri], &
			    ldq, &debug_1.ndigit, "_neupd: Quasi-triangular "
			    "matrix after re-ordering", (ftnlen)49);
		}
	    }
	}

/*        %---------------------------------------% */
/*        | Copy the last row of the Schur vector | */
/*        | into workl(ihbds).  This will be used | */
/*        | to compute the Ritz estimates of      | */
/*        | converged Ritz values.                | */
/*        %---------------------------------------% */

	scopy_(ncv, &workl[invsub + *ncv - 1], &ldq, &workl[ihbds], &c__1);

/*        %----------------------------------------------------% */
/*        | Place the computed eigenvalues of H into DR and DI | */
/*        | if a spectral transformation was not used.         | */
/*        %----------------------------------------------------% */

	if (s_cmp(type__, "REGULR", (ftnlen)6, (ftnlen)6) == 0) {
	    scopy_(&nconv, &workl[iheigr], &c__1, &dr[1], &c__1);
	    scopy_(&nconv, &workl[iheigi], &c__1, &di[1], &c__1);
	}

/*        %----------------------------------------------------------% */
/*        | Compute the QR factorization of the matrix representing  | */
/*        | the wanted invariant subspace located in the first NCONV | */
/*        | columns of workl(invsub,ldq).                            | */
/*        %----------------------------------------------------------% */

	sgeqr2_(ncv, &nconv, &workl[invsub], &ldq, &workev[1], &workev[*ncv + 
		1], &ierr);

/*        %---------------------------------------------------------% */
/*        | * Postmultiply V by Q using sorm2r.                     | */
/*        | * Copy the first NCONV columns of VQ into Z.            | */
/*        | * Postmultiply Z by R.                                  | */
/*        | The N by NCONV matrix Z is now a matrix representation  | */
/*        | of the approximate invariant subspace associated with   | */
/*        | the Ritz values in workl(iheigr) and workl(iheigi)      | */
/*        | The first NCONV columns of V are now approximate Schur  | */
/*        | vectors associated with the real upper quasi-triangular | */
/*        | matrix of order NCONV in workl(iuptri)                  | */
/*        %---------------------------------------------------------% */

	sorm2r_("Right", "Notranspose", n, ncv, &nconv, &workl[invsub], &ldq, 
		&workev[1], &v[v_offset], ldv, &workd[*n + 1], &ierr, (ftnlen)
		5, (ftnlen)11);
	slacpy_("All", n, &nconv, &v[v_offset], ldv, &z__[z_offset], ldz, (
		ftnlen)3);

	i__1 = nconv;
	for (j = 1; j <= i__1; ++j) {

/*           %---------------------------------------------------% */
/*           | Perform both a column and row scaling if the      | */
/*           | diagonal element of workl(invsub,ldq) is negative | */
/*           | I'm lazy and don't take advantage of the upper    | */
/*           | quasi-triangular form of workl(iuptri,ldq)        | */
/*           | Note that since Q is orthogonal, R is a diagonal  | */
/*           | matrix consisting of plus or minus ones           | */
/*           %---------------------------------------------------% */

	    if (workl[invsub + (j - 1) * ldq + j - 1] < 0.f) {
		sscal_(&nconv, &c_b64, &workl[iuptri + j - 1], &ldq);
		sscal_(&nconv, &c_b64, &workl[iuptri + (j - 1) * ldq], &c__1);
	    }

/* L20: */
	}

	if (*(unsigned char *)howmny == 'A') {

/*           %--------------------------------------------% */
/*           | Compute the NCONV wanted eigenvectors of T | */
/*           | located in workl(iuptri,ldq).              | */
/*           %--------------------------------------------% */

	    i__1 = *ncv;
	    for (j = 1; j <= i__1; ++j) {
		if (j <= nconv) {
		    select[j] = TRUE_;
		} else {
		    select[j] = FALSE_;
		}
/* L30: */
	    }

	    strevc_("Right", "Select", &select[1], ncv, &workl[iuptri], &ldq, 
		    vl, &c__1, &workl[invsub], &ldq, ncv, &outncv, &workev[1],
		     &ierr, (ftnlen)5, (ftnlen)6);

	    if (ierr != 0) {
		*info = -9;
		goto L9000;
	    }

/*           %------------------------------------------------% */
/*           | Scale the returning eigenvectors so that their | */
/*           | Euclidean norms are all one. LAPACK subroutine | */
/*           | strevc returns each eigenvector normalized so  | */
/*           | that the element of largest magnitude has      | */
/*           | magnitude 1;                                   | */
/*           %------------------------------------------------% */

	    iconj = 0;
	    i__1 = nconv;
	    for (j = 1; j <= i__1; ++j) {

		if (workl[iheigi + j - 1] == 0.f) {

/*                 %----------------------% */
/*                 | real eigenvalue case | */
/*                 %----------------------% */

		    temp = snrm2_(ncv, &workl[invsub + (j - 1) * ldq], &c__1);
		    r__1 = 1.f / temp;
		    sscal_(ncv, &r__1, &workl[invsub + (j - 1) * ldq], &c__1);
		} else {

/*                 %-------------------------------------------% */
/*                 | Complex conjugate pair case. Note that    | */
/*                 | since the real and imaginary part of      | */
/*                 | the eigenvector are stored in consecutive | */
/*                 | columns, we further normalize by the      | */
/*                 | square root of two.                       | */
/*                 %-------------------------------------------% */

		    if (iconj == 0) {
			r__1 = snrm2_(ncv, &workl[invsub + (j - 1) * ldq], &
				c__1);
			r__2 = snrm2_(ncv, &workl[invsub + j * ldq], &c__1);
			temp = slapy2_(&r__1, &r__2);
			r__1 = 1.f / temp;
			sscal_(ncv, &r__1, &workl[invsub + (j - 1) * ldq], &
				c__1);
			r__1 = 1.f / temp;
			sscal_(ncv, &r__1, &workl[invsub + j * ldq], &c__1);
			iconj = 1;
		    } else {
			iconj = 0;
		    }

		}

/* L40: */
	    }

	    sgemv_("T", ncv, &nconv, &c_b38, &workl[invsub], &ldq, &workl[
		    ihbds], &c__1, &c_b37, &workev[1], &c__1, (ftnlen)1);

	    iconj = 0;
	    i__1 = nconv;
	    for (j = 1; j <= i__1; ++j) {
		if (workl[iheigi + j - 1] != 0.f) {

/*                 %-------------------------------------------% */
/*                 | Complex conjugate pair case. Note that    | */
/*                 | since the real and imaginary part of      | */
/*                 | the eigenvector are stored in consecutive | */
/*                 %-------------------------------------------% */

		    if (iconj == 0) {
			workev[j] = slapy2_(&workev[j], &workev[j + 1]);
			workev[j + 1] = workev[j];
			iconj = 1;
		    } else {
			iconj = 0;
		    }
		}
/* L45: */
	    }

	    if (msglvl > 2) {
		psvout_(comm, &debug_1.logfil, ncv, &workl[ihbds], &
			debug_1.ndigit, "_neupd: Last row of the eigenvector"
			" matrix for T", (ftnlen)48);
		if (msglvl > 3) {
		    psmout_(comm, &debug_1.logfil, ncv, ncv, &workl[invsub], &
			    ldq, &debug_1.ndigit, "_neupd: The eigenvector m"
			    "atrix for T", (ftnlen)36);
		}
	    }


/*           %---------------------------------------% */
/*           | Copy Ritz estimates into workl(ihbds) | */
/*           %---------------------------------------% */

	    scopy_(&nconv, &workev[1], &c__1, &workl[ihbds], &c__1);

/*           %---------------------------------------------------------% */
/*           | Compute the QR factorization of the eigenvector matrix  | */
/*           | associated with leading portion of T in the first NCONV | */
/*           | columns of workl(invsub,ldq).                           | */
/*           %---------------------------------------------------------% */

	    sgeqr2_(ncv, &nconv, &workl[invsub], &ldq, &workev[1], &workev[*
		    ncv + 1], &ierr);

/*           %----------------------------------------------% */
/*           | * Postmultiply Z by Q.                       | */
/*           | * Postmultiply Z by R.                       | */
/*           | The N by NCONV matrix Z is now contains the  | */
/*           | Ritz vectors associated with the Ritz values | */
/*           | in workl(iheigr) and workl(iheigi).          | */
/*           %----------------------------------------------% */

	    sorm2r_("Right", "Notranspose", n, ncv, &nconv, &workl[invsub], &
		    ldq, &workev[1], &z__[z_offset], ldz, &workd[*n + 1], &
		    ierr, (ftnlen)5, (ftnlen)11);

	    strmm_("Right", "Upper", "No transpose", "Non-unit", n, &nconv, &
		    c_b38, &workl[invsub], &ldq, &z__[z_offset], ldz, (ftnlen)
		    5, (ftnlen)5, (ftnlen)12, (ftnlen)8);

	}

    } else {

/*        %------------------------------------------------------% */
/*        | An approximate invariant subspace is not needed.     | */
/*        | Place the Ritz values computed PSNAUPD into DR and DI | */
/*        %------------------------------------------------------% */

	scopy_(&nconv, &workl[ritzr], &c__1, &dr[1], &c__1);
	scopy_(&nconv, &workl[ritzi], &c__1, &di[1], &c__1);
	scopy_(&nconv, &workl[ritzr], &c__1, &workl[iheigr], &c__1);
	scopy_(&nconv, &workl[ritzi], &c__1, &workl[iheigi], &c__1);
	scopy_(&nconv, &workl[bounds], &c__1, &workl[ihbds], &c__1);
    }

/*     %------------------------------------------------% */
/*     | Transform the Ritz values and possibly vectors | */
/*     | and corresponding error bounds of OP to those  | */
/*     | of A*x = lambda*B*x.                           | */
/*     %------------------------------------------------% */

    if (s_cmp(type__, "REGULR", (ftnlen)6, (ftnlen)6) == 0) {

	if (*rvec) {
	    sscal_(ncv, &rnorm, &workl[ihbds], &c__1);
	}

    } else {

/*        %---------------------------------------% */
/*        |   A spectral transformation was used. | */
/*        | * Determine the Ritz estimates of the | */
/*        |   Ritz values in the original system. | */
/*        %---------------------------------------% */

	if (s_cmp(type__, "SHIFTI", (ftnlen)6, (ftnlen)6) == 0) {

	    if (*rvec) {
		sscal_(ncv, &rnorm, &workl[ihbds], &c__1);
	    }
	    i__1 = *ncv;
	    for (k = 1; k <= i__1; ++k) {
		temp = slapy2_(&workl[iheigr + k - 1], &workl[iheigi + k - 1])
			;
		workl[ihbds + k - 1] = (r__1 = workl[ihbds + k - 1], dabs(
			r__1)) / temp / temp;
/* L50: */
	    }

	} else if (s_cmp(type__, "REALPT", (ftnlen)6, (ftnlen)6) == 0) {

	    i__1 = *ncv;
	    for (k = 1; k <= i__1; ++k) {
/* L60: */
	    }

	} else if (s_cmp(type__, "IMAGPT", (ftnlen)6, (ftnlen)6) == 0) {

	    i__1 = *ncv;
	    for (k = 1; k <= i__1; ++k) {
/* L70: */
	    }

	}

/*        %-----------------------------------------------------------% */
/*        | *  Transform the Ritz values back to the original system. | */
/*        |    For TYPE = 'SHIFTI' the transformation is              | */
/*        |             lambda = 1/theta + sigma                      | */
/*        |    For TYPE = 'REALPT' or 'IMAGPT' the user must from     | */
/*        |    Rayleigh quotients or a projection. See remark 3 above.| */
/*        | NOTES:                                                    | */
/*        | *The Ritz vectors are not affected by the transformation. | */
/*        %-----------------------------------------------------------% */

	if (s_cmp(type__, "SHIFTI", (ftnlen)6, (ftnlen)6) == 0) {

	    i__1 = *ncv;
	    for (k = 1; k <= i__1; ++k) {
		temp = slapy2_(&workl[iheigr + k - 1], &workl[iheigi + k - 1])
			;
		workl[iheigr + k - 1] = workl[iheigr + k - 1] / temp / temp + 
			*sigmar;
		workl[iheigi + k - 1] = -workl[iheigi + k - 1] / temp / temp 
			+ *sigmai;
/* L80: */
	    }

	    scopy_(&nconv, &workl[iheigr], &c__1, &dr[1], &c__1);
	    scopy_(&nconv, &workl[iheigi], &c__1, &di[1], &c__1);

	} else if (s_cmp(type__, "REALPT", (ftnlen)6, (ftnlen)6) == 0 || 
		s_cmp(type__, "IMAGPT", (ftnlen)6, (ftnlen)6) == 0) {

	    scopy_(&nconv, &workl[iheigr], &c__1, &dr[1], &c__1);
	    scopy_(&nconv, &workl[iheigi], &c__1, &di[1], &c__1);

	}

	if (s_cmp(type__, "SHIFTI", (ftnlen)6, (ftnlen)6) == 0 && msglvl > 1) 
		{
	    psvout_(comm, &debug_1.logfil, &nconv, &dr[1], &debug_1.ndigit, 
		    "_neupd: Untransformed real part of the Ritz valuess.", (
		    ftnlen)52);
	    psvout_(comm, &debug_1.logfil, &nconv, &di[1], &debug_1.ndigit, 
		    "_neupd: Untransformed imag part of the Ritz valuess.", (
		    ftnlen)52);
	    psvout_(comm, &debug_1.logfil, &nconv, &workl[ihbds], &
		    debug_1.ndigit, "_neupd: Ritz estimates of untransformed"
		    " Ritz values.", (ftnlen)52);
	} else if (s_cmp(type__, "REGULR", (ftnlen)6, (ftnlen)6) == 0 && 
		msglvl > 1) {
	    psvout_(comm, &debug_1.logfil, &nconv, &dr[1], &debug_1.ndigit, 
		    "_neupd: Real parts of converged Ritz values.", (ftnlen)
		    44);
	    psvout_(comm, &debug_1.logfil, &nconv, &di[1], &debug_1.ndigit, 
		    "_neupd: Imag parts of converged Ritz values.", (ftnlen)
		    44);
	    psvout_(comm, &debug_1.logfil, &nconv, &workl[ihbds], &
		    debug_1.ndigit, "_neupd: Associated Ritz estimates.", (
		    ftnlen)34);
	}

    }

/*     %-------------------------------------------------% */
/*     | Eigenvector Purification step. Formally perform | */
/*     | one of inverse subspace iteration. Only used    | */
/*     | for MODE = 2.                                   | */
/*     %-------------------------------------------------% */

    if (*rvec && *(unsigned char *)howmny == 'A' && s_cmp(type__, "SHIFTI", (
	    ftnlen)6, (ftnlen)6) == 0) {

/*        %------------------------------------------------% */
/*        | Purify the computed Ritz vectors by adding a   | */
/*        | little bit of the residual vector:             | */
/*        |                      T                         | */
/*        |          resid(:)*( e    s ) / theta           | */
/*        |                      NCV                       | */
/*        | where H s = s theta. Remember that when theta  | */
/*        | has nonzero imaginary part, the corresponding  | */
/*        | Ritz vector is stored across two columns of Z. | */
/*        %------------------------------------------------% */

	iconj = 0;
	i__1 = nconv;
	for (j = 1; j <= i__1; ++j) {
	    if (workl[iheigi + j - 1] == 0.f) {
		workev[j] = workl[invsub + (j - 1) * ldq + *ncv - 1] / workl[
			iheigr + j - 1];
	    } else if (iconj == 0) {
		temp = slapy2_(&workl[iheigr + j - 1], &workl[iheigi + j - 1])
			;
		workev[j] = (workl[invsub + (j - 1) * ldq + *ncv - 1] * workl[
			iheigr + j - 1] + workl[invsub + j * ldq + *ncv - 1] *
			 workl[iheigi + j - 1]) / temp / temp;
		workev[j + 1] = (workl[invsub + j * ldq + *ncv - 1] * workl[
			iheigr + j - 1] - workl[invsub + (j - 1) * ldq + *ncv 
			- 1] * workl[iheigi + j - 1]) / temp / temp;
		iconj = 1;
	    } else {
		iconj = 0;
	    }
/* L110: */
	}

/*        %---------------------------------------% */
/*        | Perform a rank one update to Z and    | */
/*        | purify all the Ritz vectors together. | */
/*        %---------------------------------------% */

	sger_(n, &nconv, &c_b38, &resid[1], &c__1, &workev[1], &c__1, &z__[
		z_offset], ldz);

    }

L9000:

    return 0;

/*     %----------------% */
/*     | End of PSNEUPD | */
/*     %----------------% */

} /* psneupd_ */
Beispiel #2
0
int main(int argc, char **argv) {
        int iam, nprocs;
        int myrank_mpi, nprocs_mpi;
        int ictxt, nprow, npcol, myrow, mycol;
        int nb, m, n;
        int mpA, nqA, mpU, nqU, mpVT, nqVT;
        int i, j, k, itemp, min_mn;
        int descA[9], descU[9], descVT[9];
        float *A=NULL;
        int info, infoNN, infoVV, infoNV, infoVN;
        float *U_NN=NULL,  *U_VV=NULL,  *U_NV=NULL,  *U_VN=NULL;
        float *VT_NN=NULL, *VT_VV=NULL, *VT_NV=NULL, *VT_VN=NULL;
        float *S_NN=NULL,  *S_VV=NULL, *S_NV=NULL, *S_VN=NULL;
        float *S_res_NN=NULL;
        float orthU_VV, residF, orthVT_VV;
        float orthU_VN, orthVT_NV;
        float  residS_NN, eps;
        float  res_repres_NV, res_repres_VN;
/**/
        int izero=0,ione=1;
        float rtmone=-1.0e+00;
/**/
        double MPIelapsedVV, MPIelapsedNN, MPIelapsedVN, MPIelapsedNV;
        char jobU, jobVT;
        int nbfailure=0, nbtestcase=0,inputfromfile, nbhetereogeneity=0;
        float threshold=100e+00;
        char buf[1024];
        FILE *fd;       
        char *c;
        char *t_jobU, *t_jobVT;
        int *t_m, *t_n, *t_nb, *t_nprow, *t_npcol;
        int nb_expe, expe;
        char hetereogeneityVV, hetereogeneityNN, hetereogeneityVN, hetereogeneityNV;
        int iseed[4], idist;
/**/
        MPI_Init( &argc, &argv);
        MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi);
        MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi);
/**/
        m = 100; n = 100; nprow = 1; npcol = 1; nb = 64; jobU='A'; jobVT='A'; inputfromfile = 0;
        for( i = 1; i < argc; i++ ) {
                if( strcmp( argv[i], "-f" ) == 0 ) {
                        inputfromfile = 1;
                }
                if( strcmp( argv[i], "-jobvt" ) == 0 ) {
                        if (i+1<argc) {
                                if( strcmp( argv[i+1], "V" ) == 0 ){ jobVT = 'V'; i++; }
                                else if( strcmp( argv[i+1], "N" ) == 0 ){ jobVT = 'N'; i++; }
                                else if( strcmp( argv[i+1], "A" ) == 0 ){ jobVT = 'A'; i++; }
                                else printf(" ** warning: jobvt should be set to V, N or A in the command line ** \n");
                        }
                        else    
                                printf(" ** warning: jobvt should be set to V, N or A in the command line ** \n");
                }
                if( strcmp( argv[i], "-jobu" ) == 0 ) {
                        if (i+1<argc) {
                                if( strcmp( argv[i+1], "V" ) == 0 ){ jobU = 'V'; i++; }
                                else if( strcmp( argv[i+1], "N" ) == 0 ){ jobU = 'N'; i++; }
                                else if( strcmp( argv[i+1], "A" ) == 0 ){ jobU = 'A'; i++; }
                                else printf(" ** warning: jobu should be set to V, N or A in the command line ** \n");
                        }
                        else    
                                printf(" ** warning: jobu should be set to V, N or A in the command line ** \n");
                }
                if( strcmp( argv[i], "-m" ) == 0 ) {
                        m      = atoi(argv[i+1]);
                        i++;
                }
                if( strcmp( argv[i], "-n" ) == 0 ) {
                        n      = atoi(argv[i+1]);
                        i++;
                }
                if( strcmp( argv[i], "-p" ) == 0 ) {
                        nprow  = atoi(argv[i+1]);
                        i++;
                }
                if( strcmp( argv[i], "-q" ) == 0 ) {
                        npcol  = atoi(argv[i+1]);
                        i++;
                }
                if( strcmp( argv[i], "-nb" ) == 0 ) {
                        nb     = atoi(argv[i+1]);
                        i++;
                }
        }
/**/
        if (inputfromfile){
                nb_expe = 0;
                fd = fopen("svd.dat", "r");
                if (fd == NULL) { printf("File failed to open svd.dat from processor mpirank(%d/%d): \n",myrank_mpi,nprocs_mpi); exit(-1); }
                do {    
                        c = fgets(buf, 1024, fd);  /* get one line from the file */
                        if (c != NULL)
                                if (c[0] != '#')
                                        nb_expe++;
                } while (c != NULL);              /* repeat until NULL          */
                fclose(fd);
                t_jobU  = (char *)calloc(nb_expe,sizeof(char)) ;
                t_jobVT = (char *)calloc(nb_expe,sizeof(char)) ;
                t_m     = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_n     = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_nb    = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_nprow = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_npcol = (int  *)calloc(nb_expe,sizeof(int )) ;
                fd = fopen("svd.dat", "r");
                expe=0;
                do {    
                        c = fgets(buf, 1024, fd);  /* get one line from the file */
                        if (c != NULL)
                                if (c[0] != '#'){
                                        //printf("NBEXPE = %d\n",expe);
                                        sscanf(c,"%c %c %d %d %d %d %d",
                                                &(t_jobU[expe]),&(t_jobVT[expe]),&(t_m[expe]),&(t_n[expe]),
                                                &(t_nb[expe]),(&t_nprow[expe]),&(t_npcol[expe]));
                                        expe++;
                                }
                } while (c != NULL);              /* repeat until NULL          */
                fclose(fd);
        }
        else {
                nb_expe = 1;
                t_jobU  = (char *)calloc(nb_expe,sizeof(char)) ;
                t_jobVT = (char *)calloc(nb_expe,sizeof(char)) ;
                t_m     = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_n     = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_nb    = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_nprow = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_npcol = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_jobU[0]  = jobU;
                t_jobVT[0] = jobVT;
                t_m[0]     = m;
                t_n[0]     = n;
                t_nb[0]    = nb;
                t_nprow[0] = nprow;
                t_npcol[0] = npcol;
        }

        if (myrank_mpi==0){
                printf("\n");
                printf("--------------------------------------------------------------------------------------------------------------------\n");
                                printf("                            Testing psgsevd -- float precision SVD ScaLAPACK routine                \n");
                printf("jobU jobVT    m     n     nb   p   q   || info   heter   resid     orthU    orthVT   |SNN-SVV|    time(s)   cond(A) \n");
                printf("--------------------------------------------------------------------------------------------------------------------\n");
        }
/**/
        for (expe = 0; expe<nb_expe; expe++){

        jobU  = t_jobU[expe]  ; 
        jobVT = t_jobVT[expe] ; 
        m     = t_m[expe]     ; 
        n     = t_n[expe]     ; 
        nb    = t_nb[expe]    ; 
        nprow = t_nprow[expe] ; 
        npcol = t_npcol[expe] ; 

        if (nb>n)
                nb = n;
        if (nprow*npcol>nprocs_mpi){
                if (myrank_mpi==0)
                        printf(" **** ERROR : we do not have enough processes available to make a p-by-q process grid ***\n");
                        printf(" **** Bye-bye                                                                         ***\n");
                MPI_Finalize(); exit(1);
        }
/**/
        Cblacs_pinfo( &iam, &nprocs ) ;
        Cblacs_get( -1, 0, &ictxt );
        Cblacs_gridinit( &ictxt, "Row", nprow, npcol );
        Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );
/**/
        min_mn = min(m,n);
/**/
        //if (iam==0)
                //printf("\tm=%d\tn = %d\t\t(%d,%d)\t%dx%d\n",m,n,nprow,npcol,nb,nb);
        //printf("Hello World, I am proc %d over %d for MPI, proc %d over %d for BLACS in position (%d,%d) in the process grid\n", 
                        //myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol);
/*
*
*     Work only the process in the process grid
*
*/
        //if ((myrow < nprow)&(mycol < npcol)){
        if ((myrow>-1)&(mycol>-1)&(myrow<nprow)&(mycol<npcol)){

/*
*
*     Compute the size of the local matrices (thanks to numroc)
*
*/ 
                mpA    = numroc_( &m     , &nb, &myrow, &izero, &nprow );
                nqA    = numroc_( &n     , &nb, &mycol, &izero, &npcol );
                mpU    = numroc_( &m     , &nb, &myrow, &izero, &nprow );
                nqU    = numroc_( &min_mn, &nb, &mycol, &izero, &npcol );
                mpVT   = numroc_( &min_mn, &nb, &myrow, &izero, &nprow );
                nqVT   = numroc_( &n     , &nb, &mycol, &izero, &npcol );
/*
*
*     Allocate and fill the matrices A and B
*
*/ 
                A = (float *)calloc(mpA*nqA,sizeof(float)) ;
                if (A==NULL){ printf("error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); }
/**/            
//              seed = iam*(mpA*nqA*2); srand(seed);
                idist = 2;
                iseed[0] = mpA%4096;
                iseed[1] = iam%4096;
                iseed[2] = nqA%4096;
                iseed[3] = 23;
/**/            
                k = 0;
                for (i = 0; i < mpA; i++) {
                        for (j = 0; j < nqA; j++) {
                                slarnv_( &idist, iseed, &ione, &(A[k]) );
                                k++;    
                        }
                }
/*
*
*     Initialize the array descriptor for the distributed matrices xA, U and VT
*
*/ 
                itemp = max( 1, mpA );
                descinit_( descA,  &m, &n, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info );
                itemp = max( 1, mpA );
                descinit_( descU,  &m, &min_mn, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info );
                itemp = max( 1, mpVT );
                descinit_( descVT, &min_mn, &n, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info );
/**/
                eps = pslamch_( &ictxt, "Epsilon" );
/**/
                if ( ((jobU=='V')&(jobVT=='N')) ||(jobU == 'A' )||(jobVT=='A')){
                nbtestcase++;   
                U_VN = (float *)calloc(mpU*nqU,sizeof(float)) ;
                if (U_VN==NULL){ printf("error of memory allocation U_VN on proc %dx%d\n",myrow,mycol); exit(0); }
                S_VN = (float *)calloc(min_mn,sizeof(float)) ;
                if (S_VN==NULL){ printf("error of memory allocation S_VN on proc %dx%d\n",myrow,mycol); exit(0); }
                infoVN = driver_psgesvd( 'V', 'N', m, n, A, 1, 1, descA,
                        S_VN, U_VN, 1, 1, descU, VT_VN, 1, 1, descVT,
                        &MPIelapsedVN);
                orthU_VN  = verif_orthogonality(m,min_mn,U_VN , 1, 1, descU);
                res_repres_VN = verif_repres_VN( m, n, A, 1, 1, descA, U_VN, 1, 1, descU, S_VN);
                if (infoVN==min_mn+1) hetereogeneityVN = 'H'; else hetereogeneityVN = 'N';
                if ( iam==0 )
                        printf(" V    N   %6d %6d  %3d  %3d %3d  ||  %3d     %c   %7.1e   %7.1e                        %8.2f    %7.1e\n",
                                m,n,nb,nprow,npcol,infoVN,hetereogeneityVN,res_repres_VN/(S_VN[0]/S_VN[min_mn-1]),
                                orthU_VN,MPIelapsedVN,S_VN[0]/S_VN[min_mn-1]);
                if (infoVN==min_mn+1) nbhetereogeneity++ ;
                else if ((res_repres_VN/eps/(S_VN[0]/S_VN[min_mn-1])>threshold)||(orthU_VN/eps>threshold)||(infoVN!=0)) nbfailure++;
                }
/**/
                if (((jobU=='N')&(jobVT=='V'))||(jobU == 'A' )||(jobVT=='A')){
                nbtestcase++;   
                VT_NV = (float *)calloc(mpVT*nqVT,sizeof(float)) ;
                if (VT_NV==NULL){ printf("error of memory allocation VT_NV on proc %dx%d\n",myrow,mycol); exit(0); }
                S_NV = (float *)calloc(min_mn,sizeof(float)) ;
                if (S_NV==NULL){ printf("error of memory allocation S_NV on proc %dx%d\n",myrow,mycol); exit(0); }
                infoNV = driver_psgesvd( 'N', 'V', m, n, A, 1, 1, descA,
                        S_NV, U_NV, 1, 1, descU, VT_NV, 1, 1, descVT,
                        &MPIelapsedNV);
                orthVT_NV = verif_orthogonality(min_mn,n,VT_NV, 1, 1, descVT);
                res_repres_NV = verif_repres_NV( m, n, A, 1, 1, descA, VT_NV, 1, 1, descVT, S_NV);
                if (infoNV==min_mn+1) hetereogeneityNV = 'H'; else hetereogeneityNV = 'N';
                if ( iam==0 )
                        printf(" N    V   %6d %6d  %3d  %3d %3d  ||  %3d     %c   %7.1e             %7.1e              %8.2f    %7.1e\n",
                                m,n,nb,nprow,npcol,infoNV,hetereogeneityNV,res_repres_NV/(S_NV[0]/S_NV[min_mn-1]),
                                orthVT_NV,MPIelapsedNV,S_NV[0]/S_NV[min_mn-1]);
                if (infoNV==min_mn+1) nbhetereogeneity++ ;
                else if ((res_repres_NV/eps/(S_NV[0]/S_NV[min_mn-1])>threshold)||(orthVT_NV/eps>threshold)||(infoNV!=0)) nbfailure++;
                }
/**/
                if ( ((jobU=='N')&(jobVT=='N')) || ((jobU=='V')&(jobVT=='V')) || (jobU == 'A' ) || (jobVT=='A') ) {
                nbtestcase++;   
                U_VV = (float *)calloc(mpU*nqU,sizeof(float)) ;
                if (U_VV==NULL){ printf("error of memory allocation U_VV on proc %dx%d\n",myrow,mycol); exit(0); }
                VT_VV = (float *)calloc(mpVT*nqVT,sizeof(float)) ;
                if (VT_VV==NULL){ printf("error of memory allocation VT_VV on proc %dx%d\n",myrow,mycol); exit(0); }
                S_VV = (float *)calloc(min_mn,sizeof(float)) ;
                if (S_VV==NULL){ printf("error of memory allocation S_VV on proc %dx%d\n",myrow,mycol); exit(0); }
                infoVV = driver_psgesvd( 'V', 'V', m, n, A, 1, 1, descA,
                        S_VV, U_VV, 1, 1, descU, VT_VV, 1, 1, descVT,
                        &MPIelapsedVV);
                orthU_VV  = verif_orthogonality(m,min_mn,U_VV , 1, 1, descU);
                orthVT_VV = verif_orthogonality(min_mn,n,VT_VV, 1, 1, descVT);
                residF =  verif_representativity( m, n,     A, 1, 1, descA,
                                                         U_VV, 1, 1, descU,
                                                        VT_VV, 1, 1, descVT,
                                                         S_VV);
                if (infoVV==min_mn+1) hetereogeneityVV = 'H'; else hetereogeneityVV = 'N';
                if ( iam==0 )
                        printf(" V    V   %6d %6d  %3d  %3d %3d  ||  %3d     %c   %7.1e   %7.1e   %7.1e              %8.2f    %7.1e\n",
                                m,n,nb,nprow,npcol,infoVV,hetereogeneityVV,residF,orthU_VV,orthVT_VV,MPIelapsedVV,S_VV[0]/S_VV[min_mn-1]);
                if (infoVV==min_mn+1) nbhetereogeneity++ ;
                else if ((residF/eps>threshold)||(orthU_VV/eps>threshold)||(orthVT_VV/eps>threshold)||(infoVV!=0)) nbfailure++;
                }
/**/
                if (((jobU=='N')&(jobVT=='N'))||(jobU == 'A' )||(jobVT=='A')){
                nbtestcase++;   
                S_NN = (float *)calloc(min_mn,sizeof(float)) ;
                if (S_NN==NULL){ printf("error of memory allocation S_NN on proc %dx%d\n",myrow,mycol); exit(0); }
                infoNN = driver_psgesvd( 'N', 'N', m, n, A, 1, 1, descA,
                        S_NN, U_NN, 1, 1, descU, VT_NN, 1, 1, descVT,
                        &MPIelapsedNN);
                S_res_NN = (float *)calloc(min_mn,sizeof(float)) ;
                if (S_res_NN==NULL){ printf("error of memory allocation S on proc %dx%d\n",myrow,mycol); exit(0); }
                scopy_(&min_mn,S_VV,&ione,S_res_NN,&ione);
                saxpy_ (&min_mn,&rtmone,S_NN,&ione,S_res_NN,&ione);
                residS_NN = snrm2_(&min_mn,S_res_NN,&ione) / snrm2_(&min_mn,S_VV,&ione);
                free(S_res_NN);
                if (infoNN==min_mn+1) hetereogeneityNN = 'H'; else hetereogeneityNN = 'N';
                if ( iam==0 )
                        printf(" N    N   %6d %6d  %3d  %3d %3d  ||  %3d     %c                                  %7.1e   %8.2f    %7.1e\n",
                                m,n,nb,nprow,npcol,infoNN,hetereogeneityNN,residS_NN,MPIelapsedNN,S_NN[0]/S_NN[min_mn-1]);
                if (infoNN==min_mn+1) nbhetereogeneity++ ;
                else if ((residS_NN/eps>threshold)||(infoNN!=0)) nbfailure++;
                }
/**/
                if (((jobU=='V')&(jobVT=='N'))||(jobU == 'A' )||(jobVT=='A')){ free(S_VN); free(U_VN); }
                if (((jobU=='N')&(jobVT=='V'))||(jobU == 'A' )||(jobVT=='A')){ free(VT_NV); free(S_NV); }
                if (((jobU=='N')&(jobVT=='N'))||(jobU == 'A' )||(jobVT=='A')){ free(S_NN); }
                if (((jobU=='N')&(jobVT=='N'))||((jobU=='V')&(jobVT=='V'))||(jobU == 'A' )||(jobVT=='A')){ free(U_VV); free(S_VV); free(VT_VV);}
                free(A);
                Cblacs_gridexit( 0 );
        }
/*
*     Print ending messages
*/
        }
        if ( iam==0 ){
                printf("--------------------------------------------------------------------------------------------------------------------\n");
                printf("               [ nbhetereogeneity = %d / %d ]\n",nbhetereogeneity, nbtestcase);
                printf("               [ nbfailure        = %d / %d ]\n",nbfailure, nbtestcase-nbhetereogeneity);
                printf("--------------------------------------------------------------------------------------------------------------------\n");
                printf("\n");
        }
/**/
        free(t_jobU  );
        free(t_jobVT );
        free(t_m     );
        free(t_n     );
        free(t_nb    );
        free(t_nprow );
        free(t_npcol );
        MPI_Finalize();
        exit(0);
}
/* Subroutine */ int pssaupd_(integer *comm, integer *ido, char *bmat, 
	integer *n, char *which, integer *nev, real *tol, real *resid, 
	integer *ncv, real *v, integer *ldv, integer *iparam, integer *ipntr, 
	real *workd, real *workl, integer *lworkl, integer *info, ftnlen 
	bmat_len, ftnlen which_len)
{
    /* Format strings */
    static char fmt_1000[] = "(//,5x,\002==================================="
	    "=======\002,/5x,\002= Symmetric implicit Arnoldi update code "
	    "=\002,/5x,\002= Version Number:\002,\002 2.1\002,19x,\002 =\002,"
	    "/5x,\002= Version Date:  \002,\002 3/19/97\002,14x,\002 =\002,/5"
	    "x,\002==========================================\002,/5x,\002= S"
	    "ummary of timing statistics           =\002,/5x,\002============"
	    "==============================\002,//)";
    static char fmt_1100[] = "(5x,\002Total number update iterations        "
	    "     = \002,i5,/5x,\002Total number of OP*x operations          "
	    "  = \002,i5,/5x,\002Total number of B*x operations             = "
	    "\002,i5,/5x,\002Total number of reorthogonalization steps  = "
	    "\002,i5,/5x,\002Total number of iterative refinement steps = "
	    "\002,i5,/5x,\002Total number of restart steps              = "
	    "\002,i5,/5x,\002Total time in user OP*x operation          = "
	    "\002,f12.6,/5x,\002Total time in user B*x operation           ="
	    " \002,f12.6,/5x,\002Total time in Arnoldi update routine       = "
	    "\002,f12.6,/5x,\002Total time in p_saup2 routine              ="
	    " \002,f12.6,/5x,\002Total time in basic Arnoldi iteration loop = "
	    "\002,f12.6,/5x,\002Total time in reorthogonalization phase    ="
	    " \002,f12.6,/5x,\002Total time in (re)start vector generation  = "
	    "\002,f12.6,/5x,\002Total time in trid eigenvalue subproblem   ="
	    " \002,f12.6,/5x,\002Total time in getting the shifts           = "
	    "\002,f12.6,/5x,\002Total time in applying the shifts          ="
	    " \002,f12.6,/5x,\002Total time in convergence testing          = "
	    "\002,f12.6)";

    /* System generated locals */
    integer v_dim1, v_offset, i__1, i__2;

    /* Builtin functions */
    integer s_cmp(char *, char *, ftnlen, ftnlen), s_wsfe(cilist *), e_wsfe(
	    void), do_fio(integer *, char *, ftnlen);

    /* Local variables */
    static integer j;
    static real t0, t1;
    static integer nb, ih, iq, np, iw, ldh, ldq, nev0, mode, ierr, myid, iupd,
	     next, ritz;
    extern /* Subroutine */ int mpi_comm_rank__(integer *, integer *, integer 
	    *);
    static integer bounds, ishift, msglvl, mxiter;
    extern /* Subroutine */ int pivout_(integer *, integer *, integer *, 
	    integer *, integer *, char *, ftnlen), second_(real *), sstats_(
	    void), psvout_(integer *, integer *, integer *, real *, integer *,
	     char *, ftnlen), pssaup2_(integer *, integer *, char *, integer *
	    , char *, integer *, integer *, real *, real *, integer *, 
	    integer *, integer *, integer *, real *, integer *, real *, 
	    integer *, real *, real *, real *, integer *, real *, integer *, 
	    real *, integer *, ftnlen, ftnlen);
    extern doublereal pslamch_(integer *, char *, ftnlen);

    /* Fortran I/O blocks */
    static cilist io___22 = { 0, 6, 0, fmt_1000, 0 };
    static cilist io___23 = { 0, 6, 0, fmt_1100, 0 };




/*     %------------------% */
/*     | MPI Variables    | */
/*     %------------------% */

/* /+ */
/* * */
/* *  (C) 1993 by Argonne National Laboratory and Mississipi State University. */
/* *      All rights reserved.  See COPYRIGHT in top-level directory. */
/* +/ */

/* /+ user include file for MPI programs, with no dependencies +/ */

/* /+ return codes +/ */







/*     We handle datatypes by putting the variables that hold them into */
/*     common.  This way, a Fortran program can directly use the various */
/*     datatypes and can even give them to C programs. */

/*     MPI_BOTTOM needs to be a known address; here we put it at the */
/*     beginning of the common block.  The point-to-point and collective */
/*     routines know about MPI_BOTTOM, but MPI_TYPE_STRUCT as yet does not. */

/*     The types MPI_INTEGER1,2,4 and MPI_REAL4,8 are OPTIONAL. */
/*     Their values are zero if they are not available.  Note that */
/*     using these reduces the portability of code (though may enhance */
/*     portability between Crays and other systems) */



/*     All other MPI routines are subroutines */

/*     The attribute copy/delete functions are symbols that can be passed */
/*     to MPI routines */

/*     %----------------------------------------------------% */
/*     | Include files for debugging and timing information | */
/*     %----------------------------------------------------% */


/* \SCCS Information: @(#) */
/* FILE: debug.h   SID: 2.3   DATE OF SID: 11/16/95   RELEASE: 2 */

/*     %---------------------------------% */
/*     | See debug.doc for documentation | */
/*     %---------------------------------% */

/*     %------------------% */
/*     | Scalar Arguments | */
/*     %------------------% */

/*     %--------------------------------% */
/*     | See stat.doc for documentation | */
/*     %--------------------------------% */

/* \SCCS Information: @(#) */
/* FILE: stat.h   SID: 2.2   DATE OF SID: 11/16/95   RELEASE: 2 */



/*     %-----------------% */
/*     | Array Arguments | */
/*     %-----------------% */


/*     %------------% */
/*     | Parameters | */
/*     %------------% */


/*     %---------------% */
/*     | Local Scalars | */
/*     %---------------% */


/*     %----------------------% */
/*     | External Subroutines | */
/*     %----------------------% */


/*     %--------------------% */
/*     | External Functions | */
/*     %--------------------% */


/*     %-----------------------% */
/*     | Executable Statements | */
/*     %-----------------------% */

    /* Parameter adjustments */
    --workd;
    --resid;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    --iparam;
    --ipntr;
    --workl;

    /* Function Body */
    if (*ido == 0) {

/*        %-------------------------------% */
/*        | Initialize timing statistics  | */
/*        | & message level for debugging | */
/*        %-------------------------------% */

	sstats_();
	second_(&t0);
	msglvl = debug_1.msaupd;

	ierr = 0;
	ishift = iparam[1];
	mxiter = iparam[3];
/*         nb     = iparam(4) */
	nb = 1;

/*        %--------------------------------------------% */
/*        | Revision 2 performs only implicit restart. | */
/*        %--------------------------------------------% */

	iupd = 1;
	mode = iparam[7];

/*        %----------------% */
/*        | Error checking | */
/*        %----------------% */

	if (*n <= 0) {
	    ierr = -1;
	} else if (*nev <= 0) {
	    ierr = -2;
	} else if (*ncv <= *nev) {
	    ierr = -3;
	}

/*        %----------------------------------------------% */
/*        | NP is the number of additional steps to      | */
/*        | extend the length NEV Lanczos factorization. | */
/*        %----------------------------------------------% */

	np = *ncv - *nev;

	if (mxiter <= 0) {
	    ierr = -4;
	}
	if (s_cmp(which, "LM", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, 
		"SM", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, "LA", (
		ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, "SA", (ftnlen)2, (
		ftnlen)2) != 0 && s_cmp(which, "BE", (ftnlen)2, (ftnlen)2) != 
		0) {
	    ierr = -5;
	}
	if (*(unsigned char *)bmat != 'I' && *(unsigned char *)bmat != 'G') {
	    ierr = -6;
	}

/* Computing 2nd power */
	i__1 = *ncv;
	if (*lworkl < i__1 * i__1 + (*ncv << 3)) {
	    ierr = -7;
	}
	if (mode < 1 || mode > 5) {
	    ierr = -10;
	} else if (mode == 1 && *(unsigned char *)bmat == 'G') {
	    ierr = -11;
	} else if (ishift < 0 || ishift > 1) {
	    ierr = -12;
	} else if (*nev == 1 && s_cmp(which, "BE", (ftnlen)2, (ftnlen)2) == 0)
		 {
	    ierr = -13;
	}

/*        %------------% */
/*        | Error Exit | */
/*        %------------% */

	if (ierr != 0) {
	    *info = ierr;
	    *ido = 99;
	    goto L9000;
	}

/*        %------------------------% */
/*        | Set default parameters | */
/*        %------------------------% */

	if (nb <= 0) {
	    nb = 1;
	}
	if (*tol <= 0.f) {
	    *tol = pslamch_(comm, "EpsMach", (ftnlen)7);
	}

/*        %----------------------------------------------% */
/*        | NP is the number of additional steps to      | */
/*        | extend the length NEV Lanczos factorization. | */
/*        | NEV0 is the local variable designating the   | */
/*        | size of the invariant subspace desired.      | */
/*        %----------------------------------------------% */

	np = *ncv - *nev;
	nev0 = *nev;

/*        %-----------------------------% */
/*        | Zero out internal workspace | */
/*        %-----------------------------% */

/* Computing 2nd power */
	i__2 = *ncv;
	i__1 = i__2 * i__2 + (*ncv << 3);
	for (j = 1; j <= i__1; ++j) {
	    workl[j] = 0.f;
/* L10: */
	}

/*        %-------------------------------------------------------% */
/*        | Pointer into WORKL for address of H, RITZ, BOUNDS, Q  | */
/*        | etc... and the remaining workspace.                   | */
/*        | Also update pointer to be used on output.             | */
/*        | Memory is laid out as follows:                        | */
/*        | workl(1:2*ncv) := generated tridiagonal matrix        | */
/*        | workl(2*ncv+1:2*ncv+ncv) := ritz values               | */
/*        | workl(3*ncv+1:3*ncv+ncv) := computed error bounds     | */
/*        | workl(4*ncv+1:4*ncv+ncv*ncv) := rotation matrix Q     | */
/*        | workl(4*ncv+ncv*ncv+1:7*ncv+ncv*ncv) := workspace     | */
/*        %-------------------------------------------------------% */

	ldh = *ncv;
	ldq = *ncv;
	ih = 1;
	ritz = ih + (ldh << 1);
	bounds = ritz + *ncv;
	iq = bounds + *ncv;
/* Computing 2nd power */
	i__1 = *ncv;
	iw = iq + i__1 * i__1;
	next = iw + *ncv * 3;

	ipntr[4] = next;
	ipntr[5] = ih;
	ipntr[6] = ritz;
	ipntr[7] = bounds;
	ipntr[11] = iw;
    }

/*     %-------------------------------------------------------% */
/*     | Carry out the Implicitly restarted Lanczos Iteration. | */
/*     %-------------------------------------------------------% */

    pssaup2_(comm, ido, bmat, n, which, &nev0, &np, tol, &resid[1], &mode, &
	    iupd, &ishift, &mxiter, &v[v_offset], ldv, &workl[ih], &ldh, &
	    workl[ritz], &workl[bounds], &workl[iq], &ldq, &workl[iw], &ipntr[
	    1], &workd[1], info, (ftnlen)1, (ftnlen)2);

/*     %--------------------------------------------------% */
/*     | ido .ne. 99 implies use of reverse communication | */
/*     | to compute operations involving OP or shifts.    | */
/*     %--------------------------------------------------% */

    if (*ido == 3) {
	iparam[8] = np;
    }
    if (*ido != 99) {
	goto L9000;
    }

    iparam[3] = mxiter;
    iparam[5] = np;
    iparam[9] = timing_1.nopx;
    iparam[10] = timing_1.nbx;
    iparam[11] = timing_1.nrorth;

/*     %------------------------------------% */
/*     | Exit if there was an informational | */
/*     | error within pssaup2.              | */
/*     %------------------------------------% */

    if (*info < 0) {
	goto L9000;
    }
    if (*info == 2) {
	*info = 3;
    }

    if (msglvl > 0) {
	pivout_(comm, &debug_1.logfil, &c__1, &mxiter, &debug_1.ndigit, "_sa"
		"upd: number of update iterations taken", (ftnlen)41);
	pivout_(comm, &debug_1.logfil, &c__1, &np, &debug_1.ndigit, "_saupd:"
		" number of \"converged\" Ritz values", (ftnlen)41);
	psvout_(comm, &debug_1.logfil, &np, &workl[ritz], &debug_1.ndigit, 
		"_saupd: final Ritz values", (ftnlen)25);
	psvout_(comm, &debug_1.logfil, &np, &workl[bounds], &debug_1.ndigit, 
		"_saupd: corresponding error bounds", (ftnlen)34);
    }

    second_(&t1);
    timing_1.tsaupd = t1 - t0;

    if (msglvl > 0) {
	mpi_comm_rank__(comm, &myid, &ierr);
	if (myid == 0) {

/*        %--------------------------------------------------------% */
/*        | Version Number & Version Date are defined in version.h | */
/*        %--------------------------------------------------------% */

	    s_wsfe(&io___22);
	    e_wsfe();
	    s_wsfe(&io___23);
	    do_fio(&c__1, (char *)&mxiter, (ftnlen)sizeof(integer));
	    do_fio(&c__1, (char *)&timing_1.nopx, (ftnlen)sizeof(integer));
	    do_fio(&c__1, (char *)&timing_1.nbx, (ftnlen)sizeof(integer));
	    do_fio(&c__1, (char *)&timing_1.nrorth, (ftnlen)sizeof(integer));
	    do_fio(&c__1, (char *)&timing_1.nitref, (ftnlen)sizeof(integer));
	    do_fio(&c__1, (char *)&timing_1.nrstrt, (ftnlen)sizeof(integer));
	    do_fio(&c__1, (char *)&timing_1.tmvopx, (ftnlen)sizeof(real));
	    do_fio(&c__1, (char *)&timing_1.tmvbx, (ftnlen)sizeof(real));
	    do_fio(&c__1, (char *)&timing_1.tsaupd, (ftnlen)sizeof(real));
	    do_fio(&c__1, (char *)&timing_1.tsaup2, (ftnlen)sizeof(real));
	    do_fio(&c__1, (char *)&timing_1.tsaitr, (ftnlen)sizeof(real));
	    do_fio(&c__1, (char *)&timing_1.titref, (ftnlen)sizeof(real));
	    do_fio(&c__1, (char *)&timing_1.tgetv0, (ftnlen)sizeof(real));
	    do_fio(&c__1, (char *)&timing_1.tseigt, (ftnlen)sizeof(real));
	    do_fio(&c__1, (char *)&timing_1.tsgets, (ftnlen)sizeof(real));
	    do_fio(&c__1, (char *)&timing_1.tsapps, (ftnlen)sizeof(real));
	    do_fio(&c__1, (char *)&timing_1.tsconv, (ftnlen)sizeof(real));
	    e_wsfe();
	}
    }

L9000:

    return 0;

/*     %----------------% */
/*     | End of pssaupd | */
/*     %----------------% */

} /* pssaupd_ */
/* Subroutine */ int psnaitr_(integer *comm, integer *ido, char *bmat,
                              integer *n, integer *k, integer *np, integer *nb, real *resid, real *
                              rnorm, real *v, integer *ldv, real *h__, integer *ldh, integer *ipntr,
                              real *workd, real *workl, integer *info, ftnlen bmat_len)
{
    /* Initialized data */

    static logical first = TRUE_;

    /* System generated locals */
    integer h_dim1, h_offset, v_dim1, v_offset, i__1, i__2;
    real r__1, r__2;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    static integer i__, j;
    static real t0, t1, t2, t3, t4, t5, rnorm_buf__;
    static integer jj, ipj, irj, ivj;
    static real ulp, tst1;
    static integer ierr, iter;
    static real unfl, ovfl;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    static integer itry;
    static real temp1;
    static logical orth1, orth2, step3, step4;
    static real betaj;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static integer infol;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
                                       real *, integer *, real *, integer *, real *, real *, integer *,
                                       ftnlen);
    static real xtemp[2], wnorm;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
                                       integer *), saxpy_(integer *, real *, real *, integer *, real *,
                                               integer *), mpi_allreduce__(real *, real *, integer *, integer *,
                                                       integer *, integer *, integer *);
    static real rnorm1;
    extern /* Subroutine */ int slabad_(real *, real *);
    static logical rstart;
    static integer msglvl;
    static real smlnum;
    extern /* Subroutine */ int psmout_(integer *, integer *, integer *,
                                        integer *, real *, integer *, integer *, char *, ftnlen), pivout_(
                                            integer *, integer *, integer *, integer *, integer *, char *,
                                            ftnlen), second_(real *);
    extern doublereal slanhs_(char *, integer *, real *, integer *, real *,
                              ftnlen);
    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
                                        real *, integer *, integer *, real *, integer *, integer *,
                                        ftnlen), psvout_(integer *, integer *, integer *, real *, integer
                                                *, char *, ftnlen), psgetv0_(integer *, integer *, char *,
                                                        integer *, logical *, integer *, integer *, real *, integer *,
                                                        real *, real *, integer *, real *, real *, integer *, ftnlen);
    extern doublereal psnorm2_(integer *, integer *, real *, integer *),
           pslamch_(integer *, char *, ftnlen);



    /*     %---------------% */
    /*     | MPI Variables | */
    /*     %---------------% */

    /* /+ */
    /* * */
    /* *  (C) 1993 by Argonne National Laboratory and Mississipi State University. */
    /* *      All rights reserved.  See COPYRIGHT in top-level directory. */
    /* +/ */

    /* /+ user include file for MPI programs, with no dependencies +/ */

    /* /+ return codes +/ */







    /*     We handle datatypes by putting the variables that hold them into */
    /*     common.  This way, a Fortran program can directly use the various */
    /*     datatypes and can even give them to C programs. */

    /*     MPI_BOTTOM needs to be a known address; here we put it at the */
    /*     beginning of the common block.  The point-to-point and collective */
    /*     routines know about MPI_BOTTOM, but MPI_TYPE_STRUCT as yet does not. */

    /*     The types MPI_INTEGER1,2,4 and MPI_REAL4,8 are OPTIONAL. */
    /*     Their values are zero if they are not available.  Note that */
    /*     using these reduces the portability of code (though may enhance */
    /*     portability between Crays and other systems) */



    /*     All other MPI routines are subroutines */

    /*     The attribute copy/delete functions are symbols that can be passed */
    /*     to MPI routines */

    /*     %----------------------------------------------------% */
    /*     | Include files for debugging and timing information | */
    /*     %----------------------------------------------------% */


    /* \SCCS Information: @(#) */
    /* FILE: debug.h   SID: 2.3   DATE OF SID: 11/16/95   RELEASE: 2 */

    /*     %---------------------------------% */
    /*     | See debug.doc for documentation | */
    /*     %---------------------------------% */

    /*     %------------------% */
    /*     | Scalar Arguments | */
    /*     %------------------% */

    /*     %--------------------------------% */
    /*     | See stat.doc for documentation | */
    /*     %--------------------------------% */

    /* \SCCS Information: @(#) */
    /* FILE: stat.h   SID: 2.2   DATE OF SID: 11/16/95   RELEASE: 2 */



    /*     %-----------------% */
    /*     | Array Arguments | */
    /*     %-----------------% */


    /*     %------------% */
    /*     | Parameters | */
    /*     %------------% */


    /*     %---------------% */
    /*     | Local Scalars | */
    /*     %---------------% */




    /*     %-----------------------% */
    /*     | Local Array Arguments | */
    /*     %-----------------------% */


    /*     %----------------------% */
    /*     | External Subroutines | */
    /*     %----------------------% */


    /*     %--------------------% */
    /*     | External Functions | */
    /*     %--------------------% */


    /*     %---------------------% */
    /*     | Intrinsic Functions | */
    /*     %---------------------% */


    /*     %-----------------% */
    /*     | Data statements | */
    /*     %-----------------% */

    /* Parameter adjustments */
    --workd;
    --resid;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    --workl;
    h_dim1 = *ldh;
    h_offset = 1 + h_dim1;
    h__ -= h_offset;
    --ipntr;

    /* Function Body */

    /*     %-----------------------% */
    /*     | Executable Statements | */
    /*     %-----------------------% */

    if (first) {

        /*        %-----------------------------------------% */
        /*        | Set machine-dependent constants for the | */
        /*        | the splitting and deflation criterion.  | */
        /*        | If norm(H) <= sqrt(OVFL),               | */
        /*        | overflow should not occur.              | */
        /*        | REFERENCE: LAPACK subroutine slahqr     | */
        /*        %-----------------------------------------% */

        unfl = pslamch_(comm, "safe minimum", (ftnlen)12);
        ovfl = 1.f / unfl;
        slabad_(&unfl, &ovfl);
        ulp = pslamch_(comm, "precision", (ftnlen)9);
        smlnum = unfl * (*n / ulp);
        first = FALSE_;
    }

    if (*ido == 0) {

        /*        %-------------------------------% */
        /*        | Initialize timing statistics  | */
        /*        | & message level for debugging | */
        /*        %-------------------------------% */

        second_(&t0);
        msglvl = debug_1.mnaitr;

        /*        %------------------------------% */
        /*        | Initial call to this routine | */
        /*        %------------------------------% */

        *info = 0;
        step3 = FALSE_;
        step4 = FALSE_;
        rstart = FALSE_;
        orth1 = FALSE_;
        orth2 = FALSE_;
        j = *k + 1;
        ipj = 1;
        irj = ipj + *n;
        ivj = irj + *n;
    }

    /*     %-------------------------------------------------% */
    /*     | When in reverse communication mode one of:      | */
    /*     | STEP3, STEP4, ORTH1, ORTH2, RSTART              | */
    /*     | will be .true. when ....                        | */
    /*     | STEP3: return from computing OP*v_{j}.          | */
    /*     | STEP4: return from computing B-norm of OP*v_{j} | */
    /*     | ORTH1: return from computing B-norm of r_{j+1}  | */
    /*     | ORTH2: return from computing B-norm of          | */
    /*     |        correction to the residual vector.       | */
    /*     | RSTART: return from OP computations needed by   | */
    /*     |         psgetv0.                                | */
    /*     %-------------------------------------------------% */

    if (step3) {
        goto L50;
    }
    if (step4) {
        goto L60;
    }
    if (orth1) {
        goto L70;
    }
    if (orth2) {
        goto L90;
    }
    if (rstart) {
        goto L30;
    }

    /*     %-----------------------------% */
    /*     | Else this is the first step | */
    /*     %-----------------------------% */

    /*     %--------------------------------------------------------------% */
    /*     |                                                              | */
    /*     |        A R N O L D I     I T E R A T I O N     L O O P       | */
    /*     |                                                              | */
    /*     | Note:  B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) | */
    /*     %--------------------------------------------------------------% */
L1000:

    if (msglvl > 1) {
        pivout_(comm, &debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: "
                "generating Arnoldi vector number", (ftnlen)40);
        psvout_(comm, &debug_1.logfil, &c__1, rnorm, &debug_1.ndigit, "_nait"
                "r: B-norm of the current residual is", (ftnlen)41);
    }

    /*        %---------------------------------------------------% */
    /*        | STEP 1: Check if the B norm of j-th residual      | */
    /*        | vector is zero. Equivalent to determing whether   | */
    /*        | an exact j-step Arnoldi factorization is present. | */
    /*        %---------------------------------------------------% */

    betaj = *rnorm;
    if (*rnorm > 0.f) {
        goto L40;
    }

    /*           %---------------------------------------------------% */
    /*           | Invariant subspace found, generate a new starting | */
    /*           | vector which is orthogonal to the current Arnoldi | */
    /*           | basis and continue the iteration.                 | */
    /*           %---------------------------------------------------% */

    if (msglvl > 0) {
        pivout_(comm, &debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: "
                "****** RESTART AT STEP ******", (ftnlen)37);
    }

    /*           %---------------------------------------------% */
    /*           | ITRY is the loop variable that controls the | */
    /*           | maximum amount of times that a restart is   | */
    /*           | attempted. NRSTRT is used by stat.h         | */
    /*           %---------------------------------------------% */

    betaj = 0.f;
    ++timing_1.nrstrt;
    itry = 1;
L20:
    rstart = TRUE_;
    *ido = 0;
L30:

    /*           %--------------------------------------% */
    /*           | If in reverse communication mode and | */
    /*           | RSTART = .true. flow returns here.   | */
    /*           %--------------------------------------% */

    psgetv0_(comm, ido, bmat, &itry, &c_false, n, &j, &v[v_offset], ldv, &
             resid[1], rnorm, &ipntr[1], &workd[1], &workl[1], &ierr, (ftnlen)
             1);
    if (*ido != 99) {
        goto L9000;
    }
    if (ierr < 0) {
        ++itry;
        if (itry <= 3) {
            goto L20;
        }

        /*              %------------------------------------------------% */
        /*              | Give up after several restart attempts.        | */
        /*              | Set INFO to the size of the invariant subspace | */
        /*              | which spans OP and exit.                       | */
        /*              %------------------------------------------------% */

        *info = j - 1;
        second_(&t1);
        timing_1.tnaitr += t1 - t0;
        *ido = 99;
        goto L9000;
    }

L40:

    /*        %---------------------------------------------------------% */
    /*        | STEP 2:  v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm  | */
    /*        | Note that p_{j} = B*r_{j-1}. In order to avoid overflow | */
    /*        | when reciprocating a small RNORM, test against lower    | */
    /*        | machine bound.                                          | */
    /*        %---------------------------------------------------------% */

    scopy_(n, &resid[1], &c__1, &v[j * v_dim1 + 1], &c__1);
    if (*rnorm >= unfl) {
        temp1 = 1.f / *rnorm;
        sscal_(n, &temp1, &v[j * v_dim1 + 1], &c__1);
        sscal_(n, &temp1, &workd[ipj], &c__1);
    } else {

        /*            %-----------------------------------------% */
        /*            | To scale both v_{j} and p_{j} carefully | */
        /*            | use LAPACK routine SLASCL               | */
        /*            %-----------------------------------------% */

        slascl_("General", &i__, &i__, rnorm, &c_b25, n, &c__1, &v[j * v_dim1
                + 1], n, &infol, (ftnlen)7);
        slascl_("General", &i__, &i__, rnorm, &c_b25, n, &c__1, &workd[ipj],
                n, &infol, (ftnlen)7);
    }

    /*        %------------------------------------------------------% */
    /*        | STEP 3:  r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} | */
    /*        | Note that this is not quite yet r_{j}. See STEP 4    | */
    /*        %------------------------------------------------------% */

    step3 = TRUE_;
    ++timing_1.nopx;
    second_(&t2);
    scopy_(n, &v[j * v_dim1 + 1], &c__1, &workd[ivj], &c__1);
    ipntr[1] = ivj;
    ipntr[2] = irj;
    ipntr[3] = ipj;
    *ido = 1;

    /*        %-----------------------------------% */
    /*        | Exit in order to compute OP*v_{j} | */
    /*        %-----------------------------------% */

    goto L9000;
L50:

    /*        %----------------------------------% */
    /*        | Back from reverse communication; | */
    /*        | WORKD(IRJ:IRJ+N-1) := OP*v_{j}   | */
    /*        | if step3 = .true.                | */
    /*        %----------------------------------% */

    second_(&t3);
    timing_1.tmvopx += t3 - t2;
    step3 = FALSE_;

    /*        %------------------------------------------% */
    /*        | Put another copy of OP*v_{j} into RESID. | */
    /*        %------------------------------------------% */

    scopy_(n, &workd[irj], &c__1, &resid[1], &c__1);

    /*        %---------------------------------------% */
    /*        | STEP 4:  Finish extending the Arnoldi | */
    /*        |          factorization to length j.   | */
    /*        %---------------------------------------% */

    second_(&t2);
    if (*(unsigned char *)bmat == 'G') {
        ++timing_1.nbx;
        step4 = TRUE_;
        ipntr[1] = irj;
        ipntr[2] = ipj;
        *ido = 2;

        /*           %-------------------------------------% */
        /*           | Exit in order to compute B*OP*v_{j} | */
        /*           %-------------------------------------% */

        goto L9000;
    } else if (*(unsigned char *)bmat == 'I') {
        scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1);
    }
L60:

    /*        %----------------------------------% */
    /*        | Back from reverse communication; | */
    /*        | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} | */
    /*        | if step4 = .true.                | */
    /*        %----------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
        second_(&t3);
        timing_1.tmvbx += t3 - t2;
    }

    step4 = FALSE_;

    /*        %-------------------------------------% */
    /*        | The following is needed for STEP 5. | */
    /*        | Compute the B-norm of OP*v_{j}.     | */
    /*        %-------------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
        rnorm_buf__ = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1);
        mpi_allreduce__(&rnorm_buf__, &wnorm, &c__1, &mpipriv_1.mpi_real__, &
                        mpipriv_1.mpi_sum__, comm, &ierr);
        wnorm = sqrt((dabs(wnorm)));
    } else if (*(unsigned char *)bmat == 'I') {
        wnorm = psnorm2_(comm, n, &resid[1], &c__1);
    }

    /*        %-----------------------------------------% */
    /*        | Compute the j-th residual corresponding | */
    /*        | to the j step factorization.            | */
    /*        | Use Classical Gram Schmidt and compute: | */
    /*        | w_{j} <-  V_{j}^T * B * OP * v_{j}      | */
    /*        | r_{j} <-  OP*v_{j} - V_{j} * w_{j}      | */
    /*        %-----------------------------------------% */


    /*        %------------------------------------------% */
    /*        | Compute the j Fourier coefficients w_{j} | */
    /*        | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}.  | */
    /*        %------------------------------------------% */

    sgemv_("T", n, &j, &c_b25, &v[v_offset], ldv, &workd[ipj], &c__1, &c_b48,
           &workl[1], &c__1, (ftnlen)1);
    mpi_allreduce__(&workl[1], &h__[j * h_dim1 + 1], &j, &
                    mpipriv_1.mpi_real__, &mpipriv_1.mpi_sum__, comm, &ierr);

    /*        %--------------------------------------% */
    /*        | Orthogonalize r_{j} against V_{j}.   | */
    /*        | RESID contains OP*v_{j}. See STEP 3. | */
    /*        %--------------------------------------% */

    sgemv_("N", n, &j, &c_b51, &v[v_offset], ldv, &h__[j * h_dim1 + 1], &c__1,
           &c_b25, &resid[1], &c__1, (ftnlen)1);

    if (j > 1) {
        h__[j + (j - 1) * h_dim1] = betaj;
    }

    second_(&t4);

    orth1 = TRUE_;

    second_(&t2);
    if (*(unsigned char *)bmat == 'G') {
        ++timing_1.nbx;
        scopy_(n, &resid[1], &c__1, &workd[irj], &c__1);
        ipntr[1] = irj;
        ipntr[2] = ipj;
        *ido = 2;

        /*           %----------------------------------% */
        /*           | Exit in order to compute B*r_{j} | */
        /*           %----------------------------------% */

        goto L9000;
    } else if (*(unsigned char *)bmat == 'I') {
        scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1);
    }
L70:

    /*        %---------------------------------------------------% */
    /*        | Back from reverse communication if ORTH1 = .true. | */
    /*        | WORKD(IPJ:IPJ+N-1) := B*r_{j}.                    | */
    /*        %---------------------------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
        second_(&t3);
        timing_1.tmvbx += t3 - t2;
    }

    orth1 = FALSE_;

    /*        %------------------------------% */
    /*        | Compute the B-norm of r_{j}. | */
    /*        %------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
        rnorm_buf__ = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1);
        mpi_allreduce__(&rnorm_buf__, rnorm, &c__1, &mpipriv_1.mpi_real__, &
                        mpipriv_1.mpi_sum__, comm, &ierr);
        *rnorm = sqrt((dabs(*rnorm)));
    } else if (*(unsigned char *)bmat == 'I') {
        *rnorm = psnorm2_(comm, n, &resid[1], &c__1);
    }

    /*        %-----------------------------------------------------------% */
    /*        | STEP 5: Re-orthogonalization / Iterative refinement phase | */
    /*        | Maximum NITER_ITREF tries.                                | */
    /*        |                                                           | */
    /*        |          s      = V_{j}^T * B * r_{j}                     | */
    /*        |          r_{j}  = r_{j} - V_{j}*s                         | */
    /*        |          alphaj = alphaj + s_{j}                          | */
    /*        |                                                           | */
    /*        | The stopping criteria used for iterative refinement is    | */
    /*        | discussed in Parlett's book SEP, page 107 and in Gragg &  | */
    /*        | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990.         | */
    /*        | Determine if we need to correct the residual. The goal is | */
    /*        | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} ||  | */
    /*        | The following test determines whether the sine of the     | */
    /*        | angle between  OP*x and the computed residual is less     | */
    /*        | than or equal to 0.717.                                   | */
    /*        %-----------------------------------------------------------% */

    if (*rnorm > wnorm * .717f) {
        goto L100;
    }
    iter = 0;
    ++timing_1.nrorth;

    /*        %---------------------------------------------------% */
    /*        | Enter the Iterative refinement phase. If further  | */
    /*        | refinement is necessary, loop back here. The loop | */
    /*        | variable is ITER. Perform a step of Classical     | */
    /*        | Gram-Schmidt using all the Arnoldi vectors V_{j}  | */
    /*        %---------------------------------------------------% */

L80:

    if (msglvl > 2) {
        xtemp[0] = wnorm;
        xtemp[1] = *rnorm;
        psvout_(comm, &debug_1.logfil, &c__2, xtemp, &debug_1.ndigit, "_nait"
                "r: re-orthonalization; wnorm and rnorm are", (ftnlen)47);
        psvout_(comm, &debug_1.logfil, &j, &h__[j * h_dim1 + 1], &
                debug_1.ndigit, "_naitr: j-th column of H", (ftnlen)24);
    }

    /*        %----------------------------------------------------% */
    /*        | Compute V_{j}^T * B * r_{j}.                       | */
    /*        | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). | */
    /*        %----------------------------------------------------% */

    sgemv_("T", n, &j, &c_b25, &v[v_offset], ldv, &workd[ipj], &c__1, &c_b48,
           &workl[j + 1], &c__1, (ftnlen)1);
    mpi_allreduce__(&workl[j + 1], &workl[1], &j, &mpipriv_1.mpi_real__, &
                    mpipriv_1.mpi_sum__, comm, &ierr);

    /*        %---------------------------------------------% */
    /*        | Compute the correction to the residual:     | */
    /*        | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). | */
    /*        | The correction to H is v(:,1:J)*H(1:J,1:J)  | */
    /*        | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j.         | */
    /*        %---------------------------------------------% */

    sgemv_("N", n, &j, &c_b51, &v[v_offset], ldv, &workl[1], &c__1, &c_b25, &
           resid[1], &c__1, (ftnlen)1);
    saxpy_(&j, &c_b25, &workl[1], &c__1, &h__[j * h_dim1 + 1], &c__1);

    orth2 = TRUE_;
    second_(&t2);
    if (*(unsigned char *)bmat == 'G') {
        ++timing_1.nbx;
        scopy_(n, &resid[1], &c__1, &workd[irj], &c__1);
        ipntr[1] = irj;
        ipntr[2] = ipj;
        *ido = 2;

        /*           %-----------------------------------% */
        /*           | Exit in order to compute B*r_{j}. | */
        /*           | r_{j} is the corrected residual.  | */
        /*           %-----------------------------------% */

        goto L9000;
    } else if (*(unsigned char *)bmat == 'I') {
        scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1);
    }
L90:

    /*        %---------------------------------------------------% */
    /*        | Back from reverse communication if ORTH2 = .true. | */
    /*        %---------------------------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
        second_(&t3);
        timing_1.tmvbx += t3 - t2;
    }

    /*        %-----------------------------------------------------% */
    /*        | Compute the B-norm of the corrected residual r_{j}. | */
    /*        %-----------------------------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
        rnorm_buf__ = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1);
        mpi_allreduce__(&rnorm_buf__, &rnorm1, &c__1, &mpipriv_1.mpi_real__, &
                        mpipriv_1.mpi_sum__, comm, &ierr);
        rnorm1 = sqrt((dabs(rnorm1)));
    } else if (*(unsigned char *)bmat == 'I') {
        rnorm1 = psnorm2_(comm, n, &resid[1], &c__1);
    }

    if (msglvl > 0 && iter > 0) {
        pivout_(comm, &debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: "
                "Iterative refinement for Arnoldi residual", (ftnlen)49);
        if (msglvl > 2) {
            xtemp[0] = *rnorm;
            xtemp[1] = rnorm1;
            psvout_(comm, &debug_1.logfil, &c__2, xtemp, &debug_1.ndigit,
                    "_naitr: iterative refinement ; rnorm and rnorm1 are", (
                        ftnlen)51);
        }
    }

    /*        %-----------------------------------------% */
    /*        | Determine if we need to perform another | */
    /*        | step of re-orthogonalization.           | */
    /*        %-----------------------------------------% */

    if (rnorm1 > *rnorm * .717f) {

        /*           %---------------------------------------% */
        /*           | No need for further refinement.       | */
        /*           | The cosine of the angle between the   | */
        /*           | corrected residual vector and the old | */
        /*           | residual vector is greater than 0.717 | */
        /*           | In other words the corrected residual | */
        /*           | and the old residual vector share an  | */
        /*           | angle of less than arcCOS(0.717)      | */
        /*           %---------------------------------------% */

        *rnorm = rnorm1;

    } else {

        /*           %-------------------------------------------% */
        /*           | Another step of iterative refinement step | */
        /*           | is required. NITREF is used by stat.h     | */
        /*           %-------------------------------------------% */

        ++timing_1.nitref;
        *rnorm = rnorm1;
        ++iter;
        if (iter <= 1) {
            goto L80;
        }

        /*           %-------------------------------------------------% */
        /*           | Otherwise RESID is numerically in the span of V | */
        /*           %-------------------------------------------------% */

        i__1 = *n;
        for (jj = 1; jj <= i__1; ++jj) {
            resid[jj] = 0.f;
            /* L95: */
        }
        *rnorm = 0.f;
    }

    /*        %----------------------------------------------% */
    /*        | Branch here directly if iterative refinement | */
    /*        | wasn't necessary or after at most NITER_REF  | */
    /*        | steps of iterative refinement.               | */
    /*        %----------------------------------------------% */

L100:

    rstart = FALSE_;
    orth2 = FALSE_;

    second_(&t5);
    timing_1.titref += t5 - t4;

    /*        %------------------------------------% */
    /*        | STEP 6: Update  j = j+1;  Continue | */
    /*        %------------------------------------% */

    ++j;
    if (j > *k + *np) {
        second_(&t1);
        timing_1.tnaitr += t1 - t0;
        *ido = 99;
        i__1 = *k + *np - 1;
        for (i__ = max(1,*k); i__ <= i__1; ++i__) {

            /*              %--------------------------------------------% */
            /*              | Check for splitting and deflation.         | */
            /*              | Use a standard test as in the QR algorithm | */
            /*              | REFERENCE: LAPACK subroutine slahqr        | */
            /*              %--------------------------------------------% */

            tst1 = (r__1 = h__[i__ + i__ * h_dim1], dabs(r__1)) + (r__2 = h__[
                        i__ + 1 + (i__ + 1) * h_dim1], dabs(r__2));
            if (tst1 == 0.f) {
                i__2 = *k + *np;
                tst1 = slanhs_("1", &i__2, &h__[h_offset], ldh, &workd[*n + 1]
                               , (ftnlen)1);
            }
            /* Computing MAX */
            r__2 = ulp * tst1;
            if ((r__1 = h__[i__ + 1 + i__ * h_dim1], dabs(r__1)) <= dmax(r__2,
                    smlnum)) {
                h__[i__ + 1 + i__ * h_dim1] = 0.f;
            }
            /* L110: */
        }

        if (msglvl > 2) {
            i__1 = *k + *np;
            i__2 = *k + *np;
            psmout_(comm, &debug_1.logfil, &i__1, &i__2, &h__[h_offset], ldh,
                    &debug_1.ndigit, "_naitr: Final upper Hessenberg matrix "
                    "H of order K+NP", (ftnlen)53);
        }

        goto L9000;
    }

    /*        %--------------------------------------------------------% */
    /*        | Loop back to extend the factorization by another step. | */
    /*        %--------------------------------------------------------% */

    goto L1000;

    /*     %---------------------------------------------------------------% */
    /*     |                                                               | */
    /*     |  E N D     O F     M A I N     I T E R A T I O N     L O O P  | */
    /*     |                                                               | */
    /*     %---------------------------------------------------------------% */

L9000:
    return 0;

    /*     %----------------% */
    /*     | End of psnaitr | */
    /*     %----------------% */

} /* psnaitr_ */