/* ----------------------------------------------------------------------- */ /* Subroutine */ int psneupd_(integer *comm, logical *rvec, char *howmny, logical *select, real *dr, real *di, real *z__, integer *ldz, real * sigmar, real *sigmai, real *workev, char *bmat, integer *n, char * which, integer *nev, real *tol, real *resid, integer *ncv, real *v, integer *ldv, integer *iparam, integer *ipntr, real *workd, real * workl, integer *lworkl, integer *info, ftnlen howmny_len, ftnlen bmat_len, ftnlen which_len) { /* System generated locals */ integer v_dim1, v_offset, z_dim1, z_offset, i__1; real r__1, r__2; doublereal d__1; /* Builtin functions */ double pow_dd(doublereal *, doublereal *); integer s_cmp(char *, char *, ftnlen, ftnlen); /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); /* Local variables */ static integer j, k, ih, jj, np; static real vl[1] /* was [1][1] */; static integer ibd, ldh, ldq, iri; static real sep; static integer irr, wri, wrr, mode; static real eps23; extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, integer *, real *, integer *, real *, integer *); static integer ierr; static real temp; static integer iwev; static char type__[6]; static real temp1; extern doublereal snrm2_(integer *, real *, integer *); static integer ihbds, iconj; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static real conds; static logical reord; extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen); static integer nconv, iwork[1]; static real rnorm; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *); static integer ritzi; extern /* Subroutine */ int strmm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * , ftnlen, ftnlen, ftnlen, ftnlen); static integer ritzr; extern /* Subroutine */ int sgeqr2_(integer *, integer *, real *, integer *, real *, real *, integer *); extern doublereal slapy2_(real *, real *); extern /* Subroutine */ int sorm2r_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *, ftnlen, ftnlen); static integer iheigi, iheigr, bounds, invsub, iuptri, msglvl, outncv, ishift, numcnv; extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *, ftnlen), slahqr_(logical *, logical *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, real *, integer *, ftnlen), psmout_(integer *, integer *, integer *, integer *, real *, integer *, integer *, char *, ftnlen), strevc_( char *, char *, logical *, integer *, real *, integer *, real *, integer *, real *, integer *, integer *, integer *, real *, integer *, ftnlen, ftnlen), strsen_(char *, char *, logical *, integer *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, integer *, integer *, integer * , integer *, ftnlen, ftnlen), psvout_(integer *, integer *, integer *, real *, integer *, char *, ftnlen), pivout_(integer *, integer *, integer *, integer *, integer *, char *, ftnlen); extern doublereal pslamch_(integer *, char *, ftnlen); extern /* Subroutine */ int psngets_(integer *, integer *, char *, integer *, integer *, real *, real *, real *, real *, real *, ftnlen); /* %--------------------% */ /* | MPI Communicator | */ /* %--------------------% */ /* %----------------------------------------------------% */ /* | Include files for debugging and timing information | */ /* %----------------------------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: debug.h SID: 2.3 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %---------------------------------% */ /* | See debug.doc for documentation | */ /* %---------------------------------% */ /* %------------------% */ /* | Scalar Arguments | */ /* %------------------% */ /* %--------------------------------% */ /* | See stat.doc for documentation | */ /* %--------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: stat.h SID: 2.2 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %-----------------% */ /* | Array Arguments | */ /* %-----------------% */ /* %------------% */ /* | Parameters | */ /* %------------% */ /* %---------------% */ /* | Local Scalars | */ /* %---------------% */ /* %----------------------% */ /* | External Subroutines | */ /* %----------------------% */ /* %--------------------% */ /* | External Functions | */ /* %--------------------% */ /* %---------------------% */ /* | Intrinsic Functions | */ /* %---------------------% */ /* %-----------------------% */ /* | Executable Statements | */ /* %-----------------------% */ /* %------------------------% */ /* | Set default parameters | */ /* %------------------------% */ /* Parameter adjustments */ z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --workd; --resid; --di; --dr; --workev; --select; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; --iparam; --ipntr; --workl; /* Function Body */ msglvl = debug_1.mneupd; mode = iparam[7]; nconv = iparam[5]; *info = 0; /* %---------------------------------% */ /* | Get machine dependent constant. | */ /* %---------------------------------% */ eps23 = pslamch_(comm, "Epsilon-Machine", (ftnlen)15); d__1 = (doublereal) eps23; eps23 = pow_dd(&d__1, &c_b3); /* %--------------% */ /* | Quick return | */ /* %--------------% */ ierr = 0; if (nconv <= 0) { ierr = -14; } else if (*n <= 0) { ierr = -1; } else if (*nev <= 0) { ierr = -2; } else if (*ncv <= *nev + 1) { ierr = -3; } else if (s_cmp(which, "LM", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, "SM", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, "LR", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, "SR", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, "LI", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, "SI", (ftnlen)2, (ftnlen)2) != 0) { ierr = -5; } else if (*(unsigned char *)bmat != 'I' && *(unsigned char *)bmat != 'G') { ierr = -6; } else /* if(complicated condition) */ { /* Computing 2nd power */ i__1 = *ncv; if (*lworkl < i__1 * i__1 * 3 + *ncv * 6) { ierr = -7; } else if (*(unsigned char *)howmny != 'A' && *(unsigned char *) howmny != 'P' && *(unsigned char *)howmny != 'S' && *rvec) { ierr = -13; } else if (*(unsigned char *)howmny == 'S') { ierr = -12; } } if (mode == 1 || mode == 2) { s_copy(type__, "REGULR", (ftnlen)6, (ftnlen)6); } else if (mode == 3 && *sigmai == 0.f) { s_copy(type__, "SHIFTI", (ftnlen)6, (ftnlen)6); } else if (mode == 3) { s_copy(type__, "REALPT", (ftnlen)6, (ftnlen)6); } else if (mode == 4) { s_copy(type__, "IMAGPT", (ftnlen)6, (ftnlen)6); } else { ierr = -10; } if (mode == 1 && *(unsigned char *)bmat == 'G') { ierr = -11; } /* %------------% */ /* | Error Exit | */ /* %------------% */ if (ierr != 0) { *info = ierr; goto L9000; } /* %--------------------------------------------------------% */ /* | Pointer into WORKL for address of H, RITZ, BOUNDS, Q | */ /* | etc... and the remaining workspace. | */ /* | Also update pointer to be used on output. | */ /* | Memory is laid out as follows: | */ /* | workl(1:ncv*ncv) := generated Hessenberg matrix | */ /* | workl(ncv*ncv+1:ncv*ncv+2*ncv) := real and imaginary | */ /* | parts of ritz values | */ /* | workl(ncv*ncv+2*ncv+1:ncv*ncv+3*ncv) := error bounds | */ /* %--------------------------------------------------------% */ /* %-----------------------------------------------------------% */ /* | The following is used and set by PSNEUPD. | */ /* | workl(ncv*ncv+3*ncv+1:ncv*ncv+4*ncv) := The untransformed | */ /* | real part of the Ritz values. | */ /* | workl(ncv*ncv+4*ncv+1:ncv*ncv+5*ncv) := The untransformed | */ /* | imaginary part of the Ritz values. | */ /* | workl(ncv*ncv+5*ncv+1:ncv*ncv+6*ncv) := The untransformed | */ /* | error bounds of the Ritz values | */ /* | workl(ncv*ncv+6*ncv+1:2*ncv*ncv+6*ncv) := Holds the upper | */ /* | quasi-triangular matrix for H | */ /* | workl(2*ncv*ncv+6*ncv+1: 3*ncv*ncv+6*ncv) := Holds the | */ /* | associated matrix representation of the invariant | */ /* | subspace for H. | */ /* | GRAND total of NCV * ( 3 * NCV + 6 ) locations. | */ /* %-----------------------------------------------------------% */ ih = ipntr[5]; ritzr = ipntr[6]; ritzi = ipntr[7]; bounds = ipntr[8]; ldh = *ncv; ldq = *ncv; iheigr = bounds + ldh; iheigi = iheigr + ldh; ihbds = iheigi + ldh; iuptri = ihbds + ldh; invsub = iuptri + ldh * *ncv; ipntr[9] = iheigr; ipntr[10] = iheigi; ipntr[11] = ihbds; ipntr[12] = iuptri; ipntr[13] = invsub; wrr = 1; wri = *ncv + 1; iwev = wri + *ncv; /* %-----------------------------------------% */ /* | irr points to the REAL part of the Ritz | */ /* | values computed by _neigh before | */ /* | exiting _naup2. | */ /* | iri points to the IMAGINARY part of the | */ /* | Ritz values computed by _neigh | */ /* | before exiting _naup2. | */ /* | ibd points to the Ritz estimates | */ /* | computed by _neigh before exiting | */ /* | _naup2. | */ /* %-----------------------------------------% */ irr = ipntr[14] + *ncv * *ncv; iri = irr + *ncv; ibd = iri + *ncv; /* %------------------------------------% */ /* | RNORM is B-norm of the RESID(1:N). | */ /* %------------------------------------% */ rnorm = workl[ih + 2]; workl[ih + 2] = 0.f; if (msglvl > 2) { psvout_(comm, &debug_1.logfil, ncv, &workl[irr], &debug_1.ndigit, "_neupd: Real part of Ritz values passed in from _NAUPD.", ( ftnlen)55); psvout_(comm, &debug_1.logfil, ncv, &workl[iri], &debug_1.ndigit, "_neupd: Imag part of Ritz values passed in from _NAUPD.", ( ftnlen)55); psvout_(comm, &debug_1.logfil, ncv, &workl[ibd], &debug_1.ndigit, "_neupd: Ritz estimates passed in from _NAUPD.", (ftnlen)45); } if (*rvec) { reord = FALSE_; /* %---------------------------------------------------% */ /* | Use the temporary bounds array to store indices | */ /* | These will be used to mark the select array later | */ /* %---------------------------------------------------% */ i__1 = *ncv; for (j = 1; j <= i__1; ++j) { workl[bounds + j - 1] = (real) j; select[j] = FALSE_; /* L10: */ } /* %-------------------------------------% */ /* | Select the wanted Ritz values. | */ /* | Sort the Ritz values so that the | */ /* | wanted ones appear at the tailing | */ /* | NEV positions of workl(irr) and | */ /* | workl(iri). Move the corresponding | */ /* | error estimates in workl(bound) | */ /* | accordingly. | */ /* %-------------------------------------% */ np = *ncv - *nev; ishift = 0; psngets_(comm, &ishift, which, nev, &np, &workl[irr], &workl[iri], & workl[bounds], &workl[1], &workl[np + 1], (ftnlen)2); if (msglvl > 2) { psvout_(comm, &debug_1.logfil, ncv, &workl[irr], &debug_1.ndigit, "_neupd: Real part of Ritz values after calling _NGETS.", (ftnlen)54); psvout_(comm, &debug_1.logfil, ncv, &workl[iri], &debug_1.ndigit, "_neupd: Imag part of Ritz values after calling _NGETS.", (ftnlen)54); psvout_(comm, &debug_1.logfil, ncv, &workl[bounds], & debug_1.ndigit, "_neupd: Ritz value indices after callin" "g _NGETS.", (ftnlen)48); } /* %-----------------------------------------------------% */ /* | Record indices of the converged wanted Ritz values | */ /* | Mark the select array for possible reordering | */ /* %-----------------------------------------------------% */ numcnv = 0; i__1 = *ncv; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ r__1 = eps23, r__2 = slapy2_(&workl[irr + *ncv - j], &workl[iri + *ncv - j]); temp1 = dmax(r__1,r__2); jj = workl[bounds + *ncv - j]; if (numcnv < nconv && workl[ibd + jj - 1] <= *tol * temp1) { select[jj] = TRUE_; ++numcnv; if (jj > *nev) { reord = TRUE_; } } /* L11: */ } /* %-----------------------------------------------------------% */ /* | Check the count (numcnv) of converged Ritz values with | */ /* | the number (nconv) reported by dnaupd. If these two | */ /* | are different then there has probably been an error | */ /* | caused by incorrect passing of the dnaupd data. | */ /* %-----------------------------------------------------------% */ if (msglvl > 2) { pivout_(comm, &debug_1.logfil, &c__1, &numcnv, &debug_1.ndigit, "_neupd: Number of specified eigenvalues", (ftnlen)39); pivout_(comm, &debug_1.logfil, &c__1, &nconv, &debug_1.ndigit, "_neupd: Number of \"converged\" eigenvalues", (ftnlen)41) ; } if (numcnv != nconv) { *info = -15; goto L9000; } /* %-----------------------------------------------------------% */ /* | Call LAPACK routine slahqr to compute the real Schur form | */ /* | of the upper Hessenberg matrix returned by PSNAUPD. | */ /* | Make a copy of the upper Hessenberg matrix. | */ /* | Initialize the Schur vector matrix Q to the identity. | */ /* %-----------------------------------------------------------% */ i__1 = ldh * *ncv; scopy_(&i__1, &workl[ih], &c__1, &workl[iuptri], &c__1); slaset_("All", ncv, ncv, &c_b37, &c_b38, &workl[invsub], &ldq, ( ftnlen)3); slahqr_(&c_true, &c_true, ncv, &c__1, ncv, &workl[iuptri], &ldh, & workl[iheigr], &workl[iheigi], &c__1, ncv, &workl[invsub], & ldq, &ierr); scopy_(ncv, &workl[invsub + *ncv - 1], &ldq, &workl[ihbds], &c__1); if (ierr != 0) { *info = -8; goto L9000; } if (msglvl > 1) { psvout_(comm, &debug_1.logfil, ncv, &workl[iheigr], & debug_1.ndigit, "_neupd: Real part of the eigenvalues of" " H", (ftnlen)41); psvout_(comm, &debug_1.logfil, ncv, &workl[iheigi], & debug_1.ndigit, "_neupd: Imaginary part of the Eigenvalu" "es of H", (ftnlen)46); psvout_(comm, &debug_1.logfil, ncv, &workl[ihbds], & debug_1.ndigit, "_neupd: Last row of the Schur vector ma" "trix", (ftnlen)43); if (msglvl > 3) { psmout_(comm, &debug_1.logfil, ncv, ncv, &workl[iuptri], &ldh, &debug_1.ndigit, "_neupd: The upper quasi-triangula" "r matrix ", (ftnlen)42); } } if (reord) { /* %-----------------------------------------------------% */ /* | Reorder the computed upper quasi-triangular matrix. | */ /* %-----------------------------------------------------% */ strsen_("None", "V", &select[1], ncv, &workl[iuptri], &ldh, & workl[invsub], &ldq, &workl[iheigr], &workl[iheigi], & nconv, &conds, &sep, &workl[ihbds], ncv, iwork, &c__1, & ierr, (ftnlen)4, (ftnlen)1); if (ierr == 1) { *info = 1; goto L9000; } if (msglvl > 2) { psvout_(comm, &debug_1.logfil, ncv, &workl[iheigr], & debug_1.ndigit, "_neupd: Real part of the eigenvalue" "s of H--reordered", (ftnlen)52); psvout_(comm, &debug_1.logfil, ncv, &workl[iheigi], & debug_1.ndigit, "_neupd: Imag part of the eigenvalue" "s of H--reordered", (ftnlen)52); if (msglvl > 3) { psmout_(comm, &debug_1.logfil, ncv, ncv, &workl[iuptri], & ldq, &debug_1.ndigit, "_neupd: Quasi-triangular " "matrix after re-ordering", (ftnlen)49); } } } /* %---------------------------------------% */ /* | Copy the last row of the Schur vector | */ /* | into workl(ihbds). This will be used | */ /* | to compute the Ritz estimates of | */ /* | converged Ritz values. | */ /* %---------------------------------------% */ scopy_(ncv, &workl[invsub + *ncv - 1], &ldq, &workl[ihbds], &c__1); /* %----------------------------------------------------% */ /* | Place the computed eigenvalues of H into DR and DI | */ /* | if a spectral transformation was not used. | */ /* %----------------------------------------------------% */ if (s_cmp(type__, "REGULR", (ftnlen)6, (ftnlen)6) == 0) { scopy_(&nconv, &workl[iheigr], &c__1, &dr[1], &c__1); scopy_(&nconv, &workl[iheigi], &c__1, &di[1], &c__1); } /* %----------------------------------------------------------% */ /* | Compute the QR factorization of the matrix representing | */ /* | the wanted invariant subspace located in the first NCONV | */ /* | columns of workl(invsub,ldq). | */ /* %----------------------------------------------------------% */ sgeqr2_(ncv, &nconv, &workl[invsub], &ldq, &workev[1], &workev[*ncv + 1], &ierr); /* %---------------------------------------------------------% */ /* | * Postmultiply V by Q using sorm2r. | */ /* | * Copy the first NCONV columns of VQ into Z. | */ /* | * Postmultiply Z by R. | */ /* | The N by NCONV matrix Z is now a matrix representation | */ /* | of the approximate invariant subspace associated with | */ /* | the Ritz values in workl(iheigr) and workl(iheigi) | */ /* | The first NCONV columns of V are now approximate Schur | */ /* | vectors associated with the real upper quasi-triangular | */ /* | matrix of order NCONV in workl(iuptri) | */ /* %---------------------------------------------------------% */ sorm2r_("Right", "Notranspose", n, ncv, &nconv, &workl[invsub], &ldq, &workev[1], &v[v_offset], ldv, &workd[*n + 1], &ierr, (ftnlen) 5, (ftnlen)11); slacpy_("All", n, &nconv, &v[v_offset], ldv, &z__[z_offset], ldz, ( ftnlen)3); i__1 = nconv; for (j = 1; j <= i__1; ++j) { /* %---------------------------------------------------% */ /* | Perform both a column and row scaling if the | */ /* | diagonal element of workl(invsub,ldq) is negative | */ /* | I'm lazy and don't take advantage of the upper | */ /* | quasi-triangular form of workl(iuptri,ldq) | */ /* | Note that since Q is orthogonal, R is a diagonal | */ /* | matrix consisting of plus or minus ones | */ /* %---------------------------------------------------% */ if (workl[invsub + (j - 1) * ldq + j - 1] < 0.f) { sscal_(&nconv, &c_b64, &workl[iuptri + j - 1], &ldq); sscal_(&nconv, &c_b64, &workl[iuptri + (j - 1) * ldq], &c__1); } /* L20: */ } if (*(unsigned char *)howmny == 'A') { /* %--------------------------------------------% */ /* | Compute the NCONV wanted eigenvectors of T | */ /* | located in workl(iuptri,ldq). | */ /* %--------------------------------------------% */ i__1 = *ncv; for (j = 1; j <= i__1; ++j) { if (j <= nconv) { select[j] = TRUE_; } else { select[j] = FALSE_; } /* L30: */ } strevc_("Right", "Select", &select[1], ncv, &workl[iuptri], &ldq, vl, &c__1, &workl[invsub], &ldq, ncv, &outncv, &workev[1], &ierr, (ftnlen)5, (ftnlen)6); if (ierr != 0) { *info = -9; goto L9000; } /* %------------------------------------------------% */ /* | Scale the returning eigenvectors so that their | */ /* | Euclidean norms are all one. LAPACK subroutine | */ /* | strevc returns each eigenvector normalized so | */ /* | that the element of largest magnitude has | */ /* | magnitude 1; | */ /* %------------------------------------------------% */ iconj = 0; i__1 = nconv; for (j = 1; j <= i__1; ++j) { if (workl[iheigi + j - 1] == 0.f) { /* %----------------------% */ /* | real eigenvalue case | */ /* %----------------------% */ temp = snrm2_(ncv, &workl[invsub + (j - 1) * ldq], &c__1); r__1 = 1.f / temp; sscal_(ncv, &r__1, &workl[invsub + (j - 1) * ldq], &c__1); } else { /* %-------------------------------------------% */ /* | Complex conjugate pair case. Note that | */ /* | since the real and imaginary part of | */ /* | the eigenvector are stored in consecutive | */ /* | columns, we further normalize by the | */ /* | square root of two. | */ /* %-------------------------------------------% */ if (iconj == 0) { r__1 = snrm2_(ncv, &workl[invsub + (j - 1) * ldq], & c__1); r__2 = snrm2_(ncv, &workl[invsub + j * ldq], &c__1); temp = slapy2_(&r__1, &r__2); r__1 = 1.f / temp; sscal_(ncv, &r__1, &workl[invsub + (j - 1) * ldq], & c__1); r__1 = 1.f / temp; sscal_(ncv, &r__1, &workl[invsub + j * ldq], &c__1); iconj = 1; } else { iconj = 0; } } /* L40: */ } sgemv_("T", ncv, &nconv, &c_b38, &workl[invsub], &ldq, &workl[ ihbds], &c__1, &c_b37, &workev[1], &c__1, (ftnlen)1); iconj = 0; i__1 = nconv; for (j = 1; j <= i__1; ++j) { if (workl[iheigi + j - 1] != 0.f) { /* %-------------------------------------------% */ /* | Complex conjugate pair case. Note that | */ /* | since the real and imaginary part of | */ /* | the eigenvector are stored in consecutive | */ /* %-------------------------------------------% */ if (iconj == 0) { workev[j] = slapy2_(&workev[j], &workev[j + 1]); workev[j + 1] = workev[j]; iconj = 1; } else { iconj = 0; } } /* L45: */ } if (msglvl > 2) { psvout_(comm, &debug_1.logfil, ncv, &workl[ihbds], & debug_1.ndigit, "_neupd: Last row of the eigenvector" " matrix for T", (ftnlen)48); if (msglvl > 3) { psmout_(comm, &debug_1.logfil, ncv, ncv, &workl[invsub], & ldq, &debug_1.ndigit, "_neupd: The eigenvector m" "atrix for T", (ftnlen)36); } } /* %---------------------------------------% */ /* | Copy Ritz estimates into workl(ihbds) | */ /* %---------------------------------------% */ scopy_(&nconv, &workev[1], &c__1, &workl[ihbds], &c__1); /* %---------------------------------------------------------% */ /* | Compute the QR factorization of the eigenvector matrix | */ /* | associated with leading portion of T in the first NCONV | */ /* | columns of workl(invsub,ldq). | */ /* %---------------------------------------------------------% */ sgeqr2_(ncv, &nconv, &workl[invsub], &ldq, &workev[1], &workev[* ncv + 1], &ierr); /* %----------------------------------------------% */ /* | * Postmultiply Z by Q. | */ /* | * Postmultiply Z by R. | */ /* | The N by NCONV matrix Z is now contains the | */ /* | Ritz vectors associated with the Ritz values | */ /* | in workl(iheigr) and workl(iheigi). | */ /* %----------------------------------------------% */ sorm2r_("Right", "Notranspose", n, ncv, &nconv, &workl[invsub], & ldq, &workev[1], &z__[z_offset], ldz, &workd[*n + 1], & ierr, (ftnlen)5, (ftnlen)11); strmm_("Right", "Upper", "No transpose", "Non-unit", n, &nconv, & c_b38, &workl[invsub], &ldq, &z__[z_offset], ldz, (ftnlen) 5, (ftnlen)5, (ftnlen)12, (ftnlen)8); } } else { /* %------------------------------------------------------% */ /* | An approximate invariant subspace is not needed. | */ /* | Place the Ritz values computed PSNAUPD into DR and DI | */ /* %------------------------------------------------------% */ scopy_(&nconv, &workl[ritzr], &c__1, &dr[1], &c__1); scopy_(&nconv, &workl[ritzi], &c__1, &di[1], &c__1); scopy_(&nconv, &workl[ritzr], &c__1, &workl[iheigr], &c__1); scopy_(&nconv, &workl[ritzi], &c__1, &workl[iheigi], &c__1); scopy_(&nconv, &workl[bounds], &c__1, &workl[ihbds], &c__1); } /* %------------------------------------------------% */ /* | Transform the Ritz values and possibly vectors | */ /* | and corresponding error bounds of OP to those | */ /* | of A*x = lambda*B*x. | */ /* %------------------------------------------------% */ if (s_cmp(type__, "REGULR", (ftnlen)6, (ftnlen)6) == 0) { if (*rvec) { sscal_(ncv, &rnorm, &workl[ihbds], &c__1); } } else { /* %---------------------------------------% */ /* | A spectral transformation was used. | */ /* | * Determine the Ritz estimates of the | */ /* | Ritz values in the original system. | */ /* %---------------------------------------% */ if (s_cmp(type__, "SHIFTI", (ftnlen)6, (ftnlen)6) == 0) { if (*rvec) { sscal_(ncv, &rnorm, &workl[ihbds], &c__1); } i__1 = *ncv; for (k = 1; k <= i__1; ++k) { temp = slapy2_(&workl[iheigr + k - 1], &workl[iheigi + k - 1]) ; workl[ihbds + k - 1] = (r__1 = workl[ihbds + k - 1], dabs( r__1)) / temp / temp; /* L50: */ } } else if (s_cmp(type__, "REALPT", (ftnlen)6, (ftnlen)6) == 0) { i__1 = *ncv; for (k = 1; k <= i__1; ++k) { /* L60: */ } } else if (s_cmp(type__, "IMAGPT", (ftnlen)6, (ftnlen)6) == 0) { i__1 = *ncv; for (k = 1; k <= i__1; ++k) { /* L70: */ } } /* %-----------------------------------------------------------% */ /* | * Transform the Ritz values back to the original system. | */ /* | For TYPE = 'SHIFTI' the transformation is | */ /* | lambda = 1/theta + sigma | */ /* | For TYPE = 'REALPT' or 'IMAGPT' the user must from | */ /* | Rayleigh quotients or a projection. See remark 3 above.| */ /* | NOTES: | */ /* | *The Ritz vectors are not affected by the transformation. | */ /* %-----------------------------------------------------------% */ if (s_cmp(type__, "SHIFTI", (ftnlen)6, (ftnlen)6) == 0) { i__1 = *ncv; for (k = 1; k <= i__1; ++k) { temp = slapy2_(&workl[iheigr + k - 1], &workl[iheigi + k - 1]) ; workl[iheigr + k - 1] = workl[iheigr + k - 1] / temp / temp + *sigmar; workl[iheigi + k - 1] = -workl[iheigi + k - 1] / temp / temp + *sigmai; /* L80: */ } scopy_(&nconv, &workl[iheigr], &c__1, &dr[1], &c__1); scopy_(&nconv, &workl[iheigi], &c__1, &di[1], &c__1); } else if (s_cmp(type__, "REALPT", (ftnlen)6, (ftnlen)6) == 0 || s_cmp(type__, "IMAGPT", (ftnlen)6, (ftnlen)6) == 0) { scopy_(&nconv, &workl[iheigr], &c__1, &dr[1], &c__1); scopy_(&nconv, &workl[iheigi], &c__1, &di[1], &c__1); } if (s_cmp(type__, "SHIFTI", (ftnlen)6, (ftnlen)6) == 0 && msglvl > 1) { psvout_(comm, &debug_1.logfil, &nconv, &dr[1], &debug_1.ndigit, "_neupd: Untransformed real part of the Ritz valuess.", ( ftnlen)52); psvout_(comm, &debug_1.logfil, &nconv, &di[1], &debug_1.ndigit, "_neupd: Untransformed imag part of the Ritz valuess.", ( ftnlen)52); psvout_(comm, &debug_1.logfil, &nconv, &workl[ihbds], & debug_1.ndigit, "_neupd: Ritz estimates of untransformed" " Ritz values.", (ftnlen)52); } else if (s_cmp(type__, "REGULR", (ftnlen)6, (ftnlen)6) == 0 && msglvl > 1) { psvout_(comm, &debug_1.logfil, &nconv, &dr[1], &debug_1.ndigit, "_neupd: Real parts of converged Ritz values.", (ftnlen) 44); psvout_(comm, &debug_1.logfil, &nconv, &di[1], &debug_1.ndigit, "_neupd: Imag parts of converged Ritz values.", (ftnlen) 44); psvout_(comm, &debug_1.logfil, &nconv, &workl[ihbds], & debug_1.ndigit, "_neupd: Associated Ritz estimates.", ( ftnlen)34); } } /* %-------------------------------------------------% */ /* | Eigenvector Purification step. Formally perform | */ /* | one of inverse subspace iteration. Only used | */ /* | for MODE = 2. | */ /* %-------------------------------------------------% */ if (*rvec && *(unsigned char *)howmny == 'A' && s_cmp(type__, "SHIFTI", ( ftnlen)6, (ftnlen)6) == 0) { /* %------------------------------------------------% */ /* | Purify the computed Ritz vectors by adding a | */ /* | little bit of the residual vector: | */ /* | T | */ /* | resid(:)*( e s ) / theta | */ /* | NCV | */ /* | where H s = s theta. Remember that when theta | */ /* | has nonzero imaginary part, the corresponding | */ /* | Ritz vector is stored across two columns of Z. | */ /* %------------------------------------------------% */ iconj = 0; i__1 = nconv; for (j = 1; j <= i__1; ++j) { if (workl[iheigi + j - 1] == 0.f) { workev[j] = workl[invsub + (j - 1) * ldq + *ncv - 1] / workl[ iheigr + j - 1]; } else if (iconj == 0) { temp = slapy2_(&workl[iheigr + j - 1], &workl[iheigi + j - 1]) ; workev[j] = (workl[invsub + (j - 1) * ldq + *ncv - 1] * workl[ iheigr + j - 1] + workl[invsub + j * ldq + *ncv - 1] * workl[iheigi + j - 1]) / temp / temp; workev[j + 1] = (workl[invsub + j * ldq + *ncv - 1] * workl[ iheigr + j - 1] - workl[invsub + (j - 1) * ldq + *ncv - 1] * workl[iheigi + j - 1]) / temp / temp; iconj = 1; } else { iconj = 0; } /* L110: */ } /* %---------------------------------------% */ /* | Perform a rank one update to Z and | */ /* | purify all the Ritz vectors together. | */ /* %---------------------------------------% */ sger_(n, &nconv, &c_b38, &resid[1], &c__1, &workev[1], &c__1, &z__[ z_offset], ldz); } L9000: return 0; /* %----------------% */ /* | End of PSNEUPD | */ /* %----------------% */ } /* psneupd_ */
int main(int argc, char **argv) { int iam, nprocs; int myrank_mpi, nprocs_mpi; int ictxt, nprow, npcol, myrow, mycol; int nb, m, n; int mpA, nqA, mpU, nqU, mpVT, nqVT; int i, j, k, itemp, min_mn; int descA[9], descU[9], descVT[9]; float *A=NULL; int info, infoNN, infoVV, infoNV, infoVN; float *U_NN=NULL, *U_VV=NULL, *U_NV=NULL, *U_VN=NULL; float *VT_NN=NULL, *VT_VV=NULL, *VT_NV=NULL, *VT_VN=NULL; float *S_NN=NULL, *S_VV=NULL, *S_NV=NULL, *S_VN=NULL; float *S_res_NN=NULL; float orthU_VV, residF, orthVT_VV; float orthU_VN, orthVT_NV; float residS_NN, eps; float res_repres_NV, res_repres_VN; /**/ int izero=0,ione=1; float rtmone=-1.0e+00; /**/ double MPIelapsedVV, MPIelapsedNN, MPIelapsedVN, MPIelapsedNV; char jobU, jobVT; int nbfailure=0, nbtestcase=0,inputfromfile, nbhetereogeneity=0; float threshold=100e+00; char buf[1024]; FILE *fd; char *c; char *t_jobU, *t_jobVT; int *t_m, *t_n, *t_nb, *t_nprow, *t_npcol; int nb_expe, expe; char hetereogeneityVV, hetereogeneityNN, hetereogeneityVN, hetereogeneityNV; int iseed[4], idist; /**/ MPI_Init( &argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); /**/ m = 100; n = 100; nprow = 1; npcol = 1; nb = 64; jobU='A'; jobVT='A'; inputfromfile = 0; for( i = 1; i < argc; i++ ) { if( strcmp( argv[i], "-f" ) == 0 ) { inputfromfile = 1; } if( strcmp( argv[i], "-jobvt" ) == 0 ) { if (i+1<argc) { if( strcmp( argv[i+1], "V" ) == 0 ){ jobVT = 'V'; i++; } else if( strcmp( argv[i+1], "N" ) == 0 ){ jobVT = 'N'; i++; } else if( strcmp( argv[i+1], "A" ) == 0 ){ jobVT = 'A'; i++; } else printf(" ** warning: jobvt should be set to V, N or A in the command line ** \n"); } else printf(" ** warning: jobvt should be set to V, N or A in the command line ** \n"); } if( strcmp( argv[i], "-jobu" ) == 0 ) { if (i+1<argc) { if( strcmp( argv[i+1], "V" ) == 0 ){ jobU = 'V'; i++; } else if( strcmp( argv[i+1], "N" ) == 0 ){ jobU = 'N'; i++; } else if( strcmp( argv[i+1], "A" ) == 0 ){ jobU = 'A'; i++; } else printf(" ** warning: jobu should be set to V, N or A in the command line ** \n"); } else printf(" ** warning: jobu should be set to V, N or A in the command line ** \n"); } if( strcmp( argv[i], "-m" ) == 0 ) { m = atoi(argv[i+1]); i++; } if( strcmp( argv[i], "-n" ) == 0 ) { n = atoi(argv[i+1]); i++; } if( strcmp( argv[i], "-p" ) == 0 ) { nprow = atoi(argv[i+1]); i++; } if( strcmp( argv[i], "-q" ) == 0 ) { npcol = atoi(argv[i+1]); i++; } if( strcmp( argv[i], "-nb" ) == 0 ) { nb = atoi(argv[i+1]); i++; } } /**/ if (inputfromfile){ nb_expe = 0; fd = fopen("svd.dat", "r"); if (fd == NULL) { printf("File failed to open svd.dat from processor mpirank(%d/%d): \n",myrank_mpi,nprocs_mpi); exit(-1); } do { c = fgets(buf, 1024, fd); /* get one line from the file */ if (c != NULL) if (c[0] != '#') nb_expe++; } while (c != NULL); /* repeat until NULL */ fclose(fd); t_jobU = (char *)calloc(nb_expe,sizeof(char)) ; t_jobVT = (char *)calloc(nb_expe,sizeof(char)) ; t_m = (int *)calloc(nb_expe,sizeof(int )) ; t_n = (int *)calloc(nb_expe,sizeof(int )) ; t_nb = (int *)calloc(nb_expe,sizeof(int )) ; t_nprow = (int *)calloc(nb_expe,sizeof(int )) ; t_npcol = (int *)calloc(nb_expe,sizeof(int )) ; fd = fopen("svd.dat", "r"); expe=0; do { c = fgets(buf, 1024, fd); /* get one line from the file */ if (c != NULL) if (c[0] != '#'){ //printf("NBEXPE = %d\n",expe); sscanf(c,"%c %c %d %d %d %d %d", &(t_jobU[expe]),&(t_jobVT[expe]),&(t_m[expe]),&(t_n[expe]), &(t_nb[expe]),(&t_nprow[expe]),&(t_npcol[expe])); expe++; } } while (c != NULL); /* repeat until NULL */ fclose(fd); } else { nb_expe = 1; t_jobU = (char *)calloc(nb_expe,sizeof(char)) ; t_jobVT = (char *)calloc(nb_expe,sizeof(char)) ; t_m = (int *)calloc(nb_expe,sizeof(int )) ; t_n = (int *)calloc(nb_expe,sizeof(int )) ; t_nb = (int *)calloc(nb_expe,sizeof(int )) ; t_nprow = (int *)calloc(nb_expe,sizeof(int )) ; t_npcol = (int *)calloc(nb_expe,sizeof(int )) ; t_jobU[0] = jobU; t_jobVT[0] = jobVT; t_m[0] = m; t_n[0] = n; t_nb[0] = nb; t_nprow[0] = nprow; t_npcol[0] = npcol; } if (myrank_mpi==0){ printf("\n"); printf("--------------------------------------------------------------------------------------------------------------------\n"); printf(" Testing psgsevd -- float precision SVD ScaLAPACK routine \n"); printf("jobU jobVT m n nb p q || info heter resid orthU orthVT |SNN-SVV| time(s) cond(A) \n"); printf("--------------------------------------------------------------------------------------------------------------------\n"); } /**/ for (expe = 0; expe<nb_expe; expe++){ jobU = t_jobU[expe] ; jobVT = t_jobVT[expe] ; m = t_m[expe] ; n = t_n[expe] ; nb = t_nb[expe] ; nprow = t_nprow[expe] ; npcol = t_npcol[expe] ; if (nb>n) nb = n; if (nprow*npcol>nprocs_mpi){ if (myrank_mpi==0) printf(" **** ERROR : we do not have enough processes available to make a p-by-q process grid ***\n"); printf(" **** Bye-bye ***\n"); MPI_Finalize(); exit(1); } /**/ Cblacs_pinfo( &iam, &nprocs ) ; Cblacs_get( -1, 0, &ictxt ); Cblacs_gridinit( &ictxt, "Row", nprow, npcol ); Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol ); /**/ min_mn = min(m,n); /**/ //if (iam==0) //printf("\tm=%d\tn = %d\t\t(%d,%d)\t%dx%d\n",m,n,nprow,npcol,nb,nb); //printf("Hello World, I am proc %d over %d for MPI, proc %d over %d for BLACS in position (%d,%d) in the process grid\n", //myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol); /* * * Work only the process in the process grid * */ //if ((myrow < nprow)&(mycol < npcol)){ if ((myrow>-1)&(mycol>-1)&(myrow<nprow)&(mycol<npcol)){ /* * * Compute the size of the local matrices (thanks to numroc) * */ mpA = numroc_( &m , &nb, &myrow, &izero, &nprow ); nqA = numroc_( &n , &nb, &mycol, &izero, &npcol ); mpU = numroc_( &m , &nb, &myrow, &izero, &nprow ); nqU = numroc_( &min_mn, &nb, &mycol, &izero, &npcol ); mpVT = numroc_( &min_mn, &nb, &myrow, &izero, &nprow ); nqVT = numroc_( &n , &nb, &mycol, &izero, &npcol ); /* * * Allocate and fill the matrices A and B * */ A = (float *)calloc(mpA*nqA,sizeof(float)) ; if (A==NULL){ printf("error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } /**/ // seed = iam*(mpA*nqA*2); srand(seed); idist = 2; iseed[0] = mpA%4096; iseed[1] = iam%4096; iseed[2] = nqA%4096; iseed[3] = 23; /**/ k = 0; for (i = 0; i < mpA; i++) { for (j = 0; j < nqA; j++) { slarnv_( &idist, iseed, &ione, &(A[k]) ); k++; } } /* * * Initialize the array descriptor for the distributed matrices xA, U and VT * */ itemp = max( 1, mpA ); descinit_( descA, &m, &n, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info ); itemp = max( 1, mpA ); descinit_( descU, &m, &min_mn, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info ); itemp = max( 1, mpVT ); descinit_( descVT, &min_mn, &n, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info ); /**/ eps = pslamch_( &ictxt, "Epsilon" ); /**/ if ( ((jobU=='V')&(jobVT=='N')) ||(jobU == 'A' )||(jobVT=='A')){ nbtestcase++; U_VN = (float *)calloc(mpU*nqU,sizeof(float)) ; if (U_VN==NULL){ printf("error of memory allocation U_VN on proc %dx%d\n",myrow,mycol); exit(0); } S_VN = (float *)calloc(min_mn,sizeof(float)) ; if (S_VN==NULL){ printf("error of memory allocation S_VN on proc %dx%d\n",myrow,mycol); exit(0); } infoVN = driver_psgesvd( 'V', 'N', m, n, A, 1, 1, descA, S_VN, U_VN, 1, 1, descU, VT_VN, 1, 1, descVT, &MPIelapsedVN); orthU_VN = verif_orthogonality(m,min_mn,U_VN , 1, 1, descU); res_repres_VN = verif_repres_VN( m, n, A, 1, 1, descA, U_VN, 1, 1, descU, S_VN); if (infoVN==min_mn+1) hetereogeneityVN = 'H'; else hetereogeneityVN = 'N'; if ( iam==0 ) printf(" V N %6d %6d %3d %3d %3d || %3d %c %7.1e %7.1e %8.2f %7.1e\n", m,n,nb,nprow,npcol,infoVN,hetereogeneityVN,res_repres_VN/(S_VN[0]/S_VN[min_mn-1]), orthU_VN,MPIelapsedVN,S_VN[0]/S_VN[min_mn-1]); if (infoVN==min_mn+1) nbhetereogeneity++ ; else if ((res_repres_VN/eps/(S_VN[0]/S_VN[min_mn-1])>threshold)||(orthU_VN/eps>threshold)||(infoVN!=0)) nbfailure++; } /**/ if (((jobU=='N')&(jobVT=='V'))||(jobU == 'A' )||(jobVT=='A')){ nbtestcase++; VT_NV = (float *)calloc(mpVT*nqVT,sizeof(float)) ; if (VT_NV==NULL){ printf("error of memory allocation VT_NV on proc %dx%d\n",myrow,mycol); exit(0); } S_NV = (float *)calloc(min_mn,sizeof(float)) ; if (S_NV==NULL){ printf("error of memory allocation S_NV on proc %dx%d\n",myrow,mycol); exit(0); } infoNV = driver_psgesvd( 'N', 'V', m, n, A, 1, 1, descA, S_NV, U_NV, 1, 1, descU, VT_NV, 1, 1, descVT, &MPIelapsedNV); orthVT_NV = verif_orthogonality(min_mn,n,VT_NV, 1, 1, descVT); res_repres_NV = verif_repres_NV( m, n, A, 1, 1, descA, VT_NV, 1, 1, descVT, S_NV); if (infoNV==min_mn+1) hetereogeneityNV = 'H'; else hetereogeneityNV = 'N'; if ( iam==0 ) printf(" N V %6d %6d %3d %3d %3d || %3d %c %7.1e %7.1e %8.2f %7.1e\n", m,n,nb,nprow,npcol,infoNV,hetereogeneityNV,res_repres_NV/(S_NV[0]/S_NV[min_mn-1]), orthVT_NV,MPIelapsedNV,S_NV[0]/S_NV[min_mn-1]); if (infoNV==min_mn+1) nbhetereogeneity++ ; else if ((res_repres_NV/eps/(S_NV[0]/S_NV[min_mn-1])>threshold)||(orthVT_NV/eps>threshold)||(infoNV!=0)) nbfailure++; } /**/ if ( ((jobU=='N')&(jobVT=='N')) || ((jobU=='V')&(jobVT=='V')) || (jobU == 'A' ) || (jobVT=='A') ) { nbtestcase++; U_VV = (float *)calloc(mpU*nqU,sizeof(float)) ; if (U_VV==NULL){ printf("error of memory allocation U_VV on proc %dx%d\n",myrow,mycol); exit(0); } VT_VV = (float *)calloc(mpVT*nqVT,sizeof(float)) ; if (VT_VV==NULL){ printf("error of memory allocation VT_VV on proc %dx%d\n",myrow,mycol); exit(0); } S_VV = (float *)calloc(min_mn,sizeof(float)) ; if (S_VV==NULL){ printf("error of memory allocation S_VV on proc %dx%d\n",myrow,mycol); exit(0); } infoVV = driver_psgesvd( 'V', 'V', m, n, A, 1, 1, descA, S_VV, U_VV, 1, 1, descU, VT_VV, 1, 1, descVT, &MPIelapsedVV); orthU_VV = verif_orthogonality(m,min_mn,U_VV , 1, 1, descU); orthVT_VV = verif_orthogonality(min_mn,n,VT_VV, 1, 1, descVT); residF = verif_representativity( m, n, A, 1, 1, descA, U_VV, 1, 1, descU, VT_VV, 1, 1, descVT, S_VV); if (infoVV==min_mn+1) hetereogeneityVV = 'H'; else hetereogeneityVV = 'N'; if ( iam==0 ) printf(" V V %6d %6d %3d %3d %3d || %3d %c %7.1e %7.1e %7.1e %8.2f %7.1e\n", m,n,nb,nprow,npcol,infoVV,hetereogeneityVV,residF,orthU_VV,orthVT_VV,MPIelapsedVV,S_VV[0]/S_VV[min_mn-1]); if (infoVV==min_mn+1) nbhetereogeneity++ ; else if ((residF/eps>threshold)||(orthU_VV/eps>threshold)||(orthVT_VV/eps>threshold)||(infoVV!=0)) nbfailure++; } /**/ if (((jobU=='N')&(jobVT=='N'))||(jobU == 'A' )||(jobVT=='A')){ nbtestcase++; S_NN = (float *)calloc(min_mn,sizeof(float)) ; if (S_NN==NULL){ printf("error of memory allocation S_NN on proc %dx%d\n",myrow,mycol); exit(0); } infoNN = driver_psgesvd( 'N', 'N', m, n, A, 1, 1, descA, S_NN, U_NN, 1, 1, descU, VT_NN, 1, 1, descVT, &MPIelapsedNN); S_res_NN = (float *)calloc(min_mn,sizeof(float)) ; if (S_res_NN==NULL){ printf("error of memory allocation S on proc %dx%d\n",myrow,mycol); exit(0); } scopy_(&min_mn,S_VV,&ione,S_res_NN,&ione); saxpy_ (&min_mn,&rtmone,S_NN,&ione,S_res_NN,&ione); residS_NN = snrm2_(&min_mn,S_res_NN,&ione) / snrm2_(&min_mn,S_VV,&ione); free(S_res_NN); if (infoNN==min_mn+1) hetereogeneityNN = 'H'; else hetereogeneityNN = 'N'; if ( iam==0 ) printf(" N N %6d %6d %3d %3d %3d || %3d %c %7.1e %8.2f %7.1e\n", m,n,nb,nprow,npcol,infoNN,hetereogeneityNN,residS_NN,MPIelapsedNN,S_NN[0]/S_NN[min_mn-1]); if (infoNN==min_mn+1) nbhetereogeneity++ ; else if ((residS_NN/eps>threshold)||(infoNN!=0)) nbfailure++; } /**/ if (((jobU=='V')&(jobVT=='N'))||(jobU == 'A' )||(jobVT=='A')){ free(S_VN); free(U_VN); } if (((jobU=='N')&(jobVT=='V'))||(jobU == 'A' )||(jobVT=='A')){ free(VT_NV); free(S_NV); } if (((jobU=='N')&(jobVT=='N'))||(jobU == 'A' )||(jobVT=='A')){ free(S_NN); } if (((jobU=='N')&(jobVT=='N'))||((jobU=='V')&(jobVT=='V'))||(jobU == 'A' )||(jobVT=='A')){ free(U_VV); free(S_VV); free(VT_VV);} free(A); Cblacs_gridexit( 0 ); } /* * Print ending messages */ } if ( iam==0 ){ printf("--------------------------------------------------------------------------------------------------------------------\n"); printf(" [ nbhetereogeneity = %d / %d ]\n",nbhetereogeneity, nbtestcase); printf(" [ nbfailure = %d / %d ]\n",nbfailure, nbtestcase-nbhetereogeneity); printf("--------------------------------------------------------------------------------------------------------------------\n"); printf("\n"); } /**/ free(t_jobU ); free(t_jobVT ); free(t_m ); free(t_n ); free(t_nb ); free(t_nprow ); free(t_npcol ); MPI_Finalize(); exit(0); }
/* Subroutine */ int pssaupd_(integer *comm, integer *ido, char *bmat, integer *n, char *which, integer *nev, real *tol, real *resid, integer *ncv, real *v, integer *ldv, integer *iparam, integer *ipntr, real *workd, real *workl, integer *lworkl, integer *info, ftnlen bmat_len, ftnlen which_len) { /* Format strings */ static char fmt_1000[] = "(//,5x,\002===================================" "=======\002,/5x,\002= Symmetric implicit Arnoldi update code " "=\002,/5x,\002= Version Number:\002,\002 2.1\002,19x,\002 =\002," "/5x,\002= Version Date: \002,\002 3/19/97\002,14x,\002 =\002,/5" "x,\002==========================================\002,/5x,\002= S" "ummary of timing statistics =\002,/5x,\002============" "==============================\002,//)"; static char fmt_1100[] = "(5x,\002Total number update iterations " " = \002,i5,/5x,\002Total number of OP*x operations " " = \002,i5,/5x,\002Total number of B*x operations = " "\002,i5,/5x,\002Total number of reorthogonalization steps = " "\002,i5,/5x,\002Total number of iterative refinement steps = " "\002,i5,/5x,\002Total number of restart steps = " "\002,i5,/5x,\002Total time in user OP*x operation = " "\002,f12.6,/5x,\002Total time in user B*x operation =" " \002,f12.6,/5x,\002Total time in Arnoldi update routine = " "\002,f12.6,/5x,\002Total time in p_saup2 routine =" " \002,f12.6,/5x,\002Total time in basic Arnoldi iteration loop = " "\002,f12.6,/5x,\002Total time in reorthogonalization phase =" " \002,f12.6,/5x,\002Total time in (re)start vector generation = " "\002,f12.6,/5x,\002Total time in trid eigenvalue subproblem =" " \002,f12.6,/5x,\002Total time in getting the shifts = " "\002,f12.6,/5x,\002Total time in applying the shifts =" " \002,f12.6,/5x,\002Total time in convergence testing = " "\002,f12.6)"; /* System generated locals */ integer v_dim1, v_offset, i__1, i__2; /* Builtin functions */ integer s_cmp(char *, char *, ftnlen, ftnlen), s_wsfe(cilist *), e_wsfe( void), do_fio(integer *, char *, ftnlen); /* Local variables */ static integer j; static real t0, t1; static integer nb, ih, iq, np, iw, ldh, ldq, nev0, mode, ierr, myid, iupd, next, ritz; extern /* Subroutine */ int mpi_comm_rank__(integer *, integer *, integer *); static integer bounds, ishift, msglvl, mxiter; extern /* Subroutine */ int pivout_(integer *, integer *, integer *, integer *, integer *, char *, ftnlen), second_(real *), sstats_( void), psvout_(integer *, integer *, integer *, real *, integer *, char *, ftnlen), pssaup2_(integer *, integer *, char *, integer * , char *, integer *, integer *, real *, real *, integer *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, real *, real *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen); extern doublereal pslamch_(integer *, char *, ftnlen); /* Fortran I/O blocks */ static cilist io___22 = { 0, 6, 0, fmt_1000, 0 }; static cilist io___23 = { 0, 6, 0, fmt_1100, 0 }; /* %------------------% */ /* | MPI Variables | */ /* %------------------% */ /* /+ */ /* * */ /* * (C) 1993 by Argonne National Laboratory and Mississipi State University. */ /* * All rights reserved. See COPYRIGHT in top-level directory. */ /* +/ */ /* /+ user include file for MPI programs, with no dependencies +/ */ /* /+ return codes +/ */ /* We handle datatypes by putting the variables that hold them into */ /* common. This way, a Fortran program can directly use the various */ /* datatypes and can even give them to C programs. */ /* MPI_BOTTOM needs to be a known address; here we put it at the */ /* beginning of the common block. The point-to-point and collective */ /* routines know about MPI_BOTTOM, but MPI_TYPE_STRUCT as yet does not. */ /* The types MPI_INTEGER1,2,4 and MPI_REAL4,8 are OPTIONAL. */ /* Their values are zero if they are not available. Note that */ /* using these reduces the portability of code (though may enhance */ /* portability between Crays and other systems) */ /* All other MPI routines are subroutines */ /* The attribute copy/delete functions are symbols that can be passed */ /* to MPI routines */ /* %----------------------------------------------------% */ /* | Include files for debugging and timing information | */ /* %----------------------------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: debug.h SID: 2.3 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %---------------------------------% */ /* | See debug.doc for documentation | */ /* %---------------------------------% */ /* %------------------% */ /* | Scalar Arguments | */ /* %------------------% */ /* %--------------------------------% */ /* | See stat.doc for documentation | */ /* %--------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: stat.h SID: 2.2 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %-----------------% */ /* | Array Arguments | */ /* %-----------------% */ /* %------------% */ /* | Parameters | */ /* %------------% */ /* %---------------% */ /* | Local Scalars | */ /* %---------------% */ /* %----------------------% */ /* | External Subroutines | */ /* %----------------------% */ /* %--------------------% */ /* | External Functions | */ /* %--------------------% */ /* %-----------------------% */ /* | Executable Statements | */ /* %-----------------------% */ /* Parameter adjustments */ --workd; --resid; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; --iparam; --ipntr; --workl; /* Function Body */ if (*ido == 0) { /* %-------------------------------% */ /* | Initialize timing statistics | */ /* | & message level for debugging | */ /* %-------------------------------% */ sstats_(); second_(&t0); msglvl = debug_1.msaupd; ierr = 0; ishift = iparam[1]; mxiter = iparam[3]; /* nb = iparam(4) */ nb = 1; /* %--------------------------------------------% */ /* | Revision 2 performs only implicit restart. | */ /* %--------------------------------------------% */ iupd = 1; mode = iparam[7]; /* %----------------% */ /* | Error checking | */ /* %----------------% */ if (*n <= 0) { ierr = -1; } else if (*nev <= 0) { ierr = -2; } else if (*ncv <= *nev) { ierr = -3; } /* %----------------------------------------------% */ /* | NP is the number of additional steps to | */ /* | extend the length NEV Lanczos factorization. | */ /* %----------------------------------------------% */ np = *ncv - *nev; if (mxiter <= 0) { ierr = -4; } if (s_cmp(which, "LM", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, "SM", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, "LA", ( ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, "SA", (ftnlen)2, ( ftnlen)2) != 0 && s_cmp(which, "BE", (ftnlen)2, (ftnlen)2) != 0) { ierr = -5; } if (*(unsigned char *)bmat != 'I' && *(unsigned char *)bmat != 'G') { ierr = -6; } /* Computing 2nd power */ i__1 = *ncv; if (*lworkl < i__1 * i__1 + (*ncv << 3)) { ierr = -7; } if (mode < 1 || mode > 5) { ierr = -10; } else if (mode == 1 && *(unsigned char *)bmat == 'G') { ierr = -11; } else if (ishift < 0 || ishift > 1) { ierr = -12; } else if (*nev == 1 && s_cmp(which, "BE", (ftnlen)2, (ftnlen)2) == 0) { ierr = -13; } /* %------------% */ /* | Error Exit | */ /* %------------% */ if (ierr != 0) { *info = ierr; *ido = 99; goto L9000; } /* %------------------------% */ /* | Set default parameters | */ /* %------------------------% */ if (nb <= 0) { nb = 1; } if (*tol <= 0.f) { *tol = pslamch_(comm, "EpsMach", (ftnlen)7); } /* %----------------------------------------------% */ /* | NP is the number of additional steps to | */ /* | extend the length NEV Lanczos factorization. | */ /* | NEV0 is the local variable designating the | */ /* | size of the invariant subspace desired. | */ /* %----------------------------------------------% */ np = *ncv - *nev; nev0 = *nev; /* %-----------------------------% */ /* | Zero out internal workspace | */ /* %-----------------------------% */ /* Computing 2nd power */ i__2 = *ncv; i__1 = i__2 * i__2 + (*ncv << 3); for (j = 1; j <= i__1; ++j) { workl[j] = 0.f; /* L10: */ } /* %-------------------------------------------------------% */ /* | Pointer into WORKL for address of H, RITZ, BOUNDS, Q | */ /* | etc... and the remaining workspace. | */ /* | Also update pointer to be used on output. | */ /* | Memory is laid out as follows: | */ /* | workl(1:2*ncv) := generated tridiagonal matrix | */ /* | workl(2*ncv+1:2*ncv+ncv) := ritz values | */ /* | workl(3*ncv+1:3*ncv+ncv) := computed error bounds | */ /* | workl(4*ncv+1:4*ncv+ncv*ncv) := rotation matrix Q | */ /* | workl(4*ncv+ncv*ncv+1:7*ncv+ncv*ncv) := workspace | */ /* %-------------------------------------------------------% */ ldh = *ncv; ldq = *ncv; ih = 1; ritz = ih + (ldh << 1); bounds = ritz + *ncv; iq = bounds + *ncv; /* Computing 2nd power */ i__1 = *ncv; iw = iq + i__1 * i__1; next = iw + *ncv * 3; ipntr[4] = next; ipntr[5] = ih; ipntr[6] = ritz; ipntr[7] = bounds; ipntr[11] = iw; } /* %-------------------------------------------------------% */ /* | Carry out the Implicitly restarted Lanczos Iteration. | */ /* %-------------------------------------------------------% */ pssaup2_(comm, ido, bmat, n, which, &nev0, &np, tol, &resid[1], &mode, & iupd, &ishift, &mxiter, &v[v_offset], ldv, &workl[ih], &ldh, & workl[ritz], &workl[bounds], &workl[iq], &ldq, &workl[iw], &ipntr[ 1], &workd[1], info, (ftnlen)1, (ftnlen)2); /* %--------------------------------------------------% */ /* | ido .ne. 99 implies use of reverse communication | */ /* | to compute operations involving OP or shifts. | */ /* %--------------------------------------------------% */ if (*ido == 3) { iparam[8] = np; } if (*ido != 99) { goto L9000; } iparam[3] = mxiter; iparam[5] = np; iparam[9] = timing_1.nopx; iparam[10] = timing_1.nbx; iparam[11] = timing_1.nrorth; /* %------------------------------------% */ /* | Exit if there was an informational | */ /* | error within pssaup2. | */ /* %------------------------------------% */ if (*info < 0) { goto L9000; } if (*info == 2) { *info = 3; } if (msglvl > 0) { pivout_(comm, &debug_1.logfil, &c__1, &mxiter, &debug_1.ndigit, "_sa" "upd: number of update iterations taken", (ftnlen)41); pivout_(comm, &debug_1.logfil, &c__1, &np, &debug_1.ndigit, "_saupd:" " number of \"converged\" Ritz values", (ftnlen)41); psvout_(comm, &debug_1.logfil, &np, &workl[ritz], &debug_1.ndigit, "_saupd: final Ritz values", (ftnlen)25); psvout_(comm, &debug_1.logfil, &np, &workl[bounds], &debug_1.ndigit, "_saupd: corresponding error bounds", (ftnlen)34); } second_(&t1); timing_1.tsaupd = t1 - t0; if (msglvl > 0) { mpi_comm_rank__(comm, &myid, &ierr); if (myid == 0) { /* %--------------------------------------------------------% */ /* | Version Number & Version Date are defined in version.h | */ /* %--------------------------------------------------------% */ s_wsfe(&io___22); e_wsfe(); s_wsfe(&io___23); do_fio(&c__1, (char *)&mxiter, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&timing_1.nopx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&timing_1.nbx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&timing_1.nrorth, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&timing_1.nitref, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&timing_1.nrstrt, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&timing_1.tmvopx, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&timing_1.tmvbx, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&timing_1.tsaupd, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&timing_1.tsaup2, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&timing_1.tsaitr, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&timing_1.titref, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&timing_1.tgetv0, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&timing_1.tseigt, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&timing_1.tsgets, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&timing_1.tsapps, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&timing_1.tsconv, (ftnlen)sizeof(real)); e_wsfe(); } } L9000: return 0; /* %----------------% */ /* | End of pssaupd | */ /* %----------------% */ } /* pssaupd_ */
/* Subroutine */ int psnaitr_(integer *comm, integer *ido, char *bmat, integer *n, integer *k, integer *np, integer *nb, real *resid, real * rnorm, real *v, integer *ldv, real *h__, integer *ldh, integer *ipntr, real *workd, real *workl, integer *info, ftnlen bmat_len) { /* Initialized data */ static logical first = TRUE_; /* System generated locals */ integer h_dim1, h_offset, v_dim1, v_offset, i__1, i__2; real r__1, r__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer i__, j; static real t0, t1, t2, t3, t4, t5, rnorm_buf__; static integer jj, ipj, irj, ivj; static real ulp, tst1; static integer ierr, iter; static real unfl, ovfl; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); static integer itry; static real temp1; static logical orth1, orth2, step3, step4; static real betaj; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static integer infol; extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen); static real xtemp[2], wnorm; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), saxpy_(integer *, real *, real *, integer *, real *, integer *), mpi_allreduce__(real *, real *, integer *, integer *, integer *, integer *, integer *); static real rnorm1; extern /* Subroutine */ int slabad_(real *, real *); static logical rstart; static integer msglvl; static real smlnum; extern /* Subroutine */ int psmout_(integer *, integer *, integer *, integer *, real *, integer *, integer *, char *, ftnlen), pivout_( integer *, integer *, integer *, integer *, integer *, char *, ftnlen), second_(real *); extern doublereal slanhs_(char *, integer *, real *, integer *, real *, ftnlen); extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *, ftnlen), psvout_(integer *, integer *, integer *, real *, integer *, char *, ftnlen), psgetv0_(integer *, integer *, char *, integer *, logical *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, real *, integer *, ftnlen); extern doublereal psnorm2_(integer *, integer *, real *, integer *), pslamch_(integer *, char *, ftnlen); /* %---------------% */ /* | MPI Variables | */ /* %---------------% */ /* /+ */ /* * */ /* * (C) 1993 by Argonne National Laboratory and Mississipi State University. */ /* * All rights reserved. See COPYRIGHT in top-level directory. */ /* +/ */ /* /+ user include file for MPI programs, with no dependencies +/ */ /* /+ return codes +/ */ /* We handle datatypes by putting the variables that hold them into */ /* common. This way, a Fortran program can directly use the various */ /* datatypes and can even give them to C programs. */ /* MPI_BOTTOM needs to be a known address; here we put it at the */ /* beginning of the common block. The point-to-point and collective */ /* routines know about MPI_BOTTOM, but MPI_TYPE_STRUCT as yet does not. */ /* The types MPI_INTEGER1,2,4 and MPI_REAL4,8 are OPTIONAL. */ /* Their values are zero if they are not available. Note that */ /* using these reduces the portability of code (though may enhance */ /* portability between Crays and other systems) */ /* All other MPI routines are subroutines */ /* The attribute copy/delete functions are symbols that can be passed */ /* to MPI routines */ /* %----------------------------------------------------% */ /* | Include files for debugging and timing information | */ /* %----------------------------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: debug.h SID: 2.3 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %---------------------------------% */ /* | See debug.doc for documentation | */ /* %---------------------------------% */ /* %------------------% */ /* | Scalar Arguments | */ /* %------------------% */ /* %--------------------------------% */ /* | See stat.doc for documentation | */ /* %--------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: stat.h SID: 2.2 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %-----------------% */ /* | Array Arguments | */ /* %-----------------% */ /* %------------% */ /* | Parameters | */ /* %------------% */ /* %---------------% */ /* | Local Scalars | */ /* %---------------% */ /* %-----------------------% */ /* | Local Array Arguments | */ /* %-----------------------% */ /* %----------------------% */ /* | External Subroutines | */ /* %----------------------% */ /* %--------------------% */ /* | External Functions | */ /* %--------------------% */ /* %---------------------% */ /* | Intrinsic Functions | */ /* %---------------------% */ /* %-----------------% */ /* | Data statements | */ /* %-----------------% */ /* Parameter adjustments */ --workd; --resid; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; --workl; h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; --ipntr; /* Function Body */ /* %-----------------------% */ /* | Executable Statements | */ /* %-----------------------% */ if (first) { /* %-----------------------------------------% */ /* | Set machine-dependent constants for the | */ /* | the splitting and deflation criterion. | */ /* | If norm(H) <= sqrt(OVFL), | */ /* | overflow should not occur. | */ /* | REFERENCE: LAPACK subroutine slahqr | */ /* %-----------------------------------------% */ unfl = pslamch_(comm, "safe minimum", (ftnlen)12); ovfl = 1.f / unfl; slabad_(&unfl, &ovfl); ulp = pslamch_(comm, "precision", (ftnlen)9); smlnum = unfl * (*n / ulp); first = FALSE_; } if (*ido == 0) { /* %-------------------------------% */ /* | Initialize timing statistics | */ /* | & message level for debugging | */ /* %-------------------------------% */ second_(&t0); msglvl = debug_1.mnaitr; /* %------------------------------% */ /* | Initial call to this routine | */ /* %------------------------------% */ *info = 0; step3 = FALSE_; step4 = FALSE_; rstart = FALSE_; orth1 = FALSE_; orth2 = FALSE_; j = *k + 1; ipj = 1; irj = ipj + *n; ivj = irj + *n; } /* %-------------------------------------------------% */ /* | When in reverse communication mode one of: | */ /* | STEP3, STEP4, ORTH1, ORTH2, RSTART | */ /* | will be .true. when .... | */ /* | STEP3: return from computing OP*v_{j}. | */ /* | STEP4: return from computing B-norm of OP*v_{j} | */ /* | ORTH1: return from computing B-norm of r_{j+1} | */ /* | ORTH2: return from computing B-norm of | */ /* | correction to the residual vector. | */ /* | RSTART: return from OP computations needed by | */ /* | psgetv0. | */ /* %-------------------------------------------------% */ if (step3) { goto L50; } if (step4) { goto L60; } if (orth1) { goto L70; } if (orth2) { goto L90; } if (rstart) { goto L30; } /* %-----------------------------% */ /* | Else this is the first step | */ /* %-----------------------------% */ /* %--------------------------------------------------------------% */ /* | | */ /* | A R N O L D I I T E R A T I O N L O O P | */ /* | | */ /* | Note: B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) | */ /* %--------------------------------------------------------------% */ L1000: if (msglvl > 1) { pivout_(comm, &debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: " "generating Arnoldi vector number", (ftnlen)40); psvout_(comm, &debug_1.logfil, &c__1, rnorm, &debug_1.ndigit, "_nait" "r: B-norm of the current residual is", (ftnlen)41); } /* %---------------------------------------------------% */ /* | STEP 1: Check if the B norm of j-th residual | */ /* | vector is zero. Equivalent to determing whether | */ /* | an exact j-step Arnoldi factorization is present. | */ /* %---------------------------------------------------% */ betaj = *rnorm; if (*rnorm > 0.f) { goto L40; } /* %---------------------------------------------------% */ /* | Invariant subspace found, generate a new starting | */ /* | vector which is orthogonal to the current Arnoldi | */ /* | basis and continue the iteration. | */ /* %---------------------------------------------------% */ if (msglvl > 0) { pivout_(comm, &debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: " "****** RESTART AT STEP ******", (ftnlen)37); } /* %---------------------------------------------% */ /* | ITRY is the loop variable that controls the | */ /* | maximum amount of times that a restart is | */ /* | attempted. NRSTRT is used by stat.h | */ /* %---------------------------------------------% */ betaj = 0.f; ++timing_1.nrstrt; itry = 1; L20: rstart = TRUE_; *ido = 0; L30: /* %--------------------------------------% */ /* | If in reverse communication mode and | */ /* | RSTART = .true. flow returns here. | */ /* %--------------------------------------% */ psgetv0_(comm, ido, bmat, &itry, &c_false, n, &j, &v[v_offset], ldv, & resid[1], rnorm, &ipntr[1], &workd[1], &workl[1], &ierr, (ftnlen) 1); if (*ido != 99) { goto L9000; } if (ierr < 0) { ++itry; if (itry <= 3) { goto L20; } /* %------------------------------------------------% */ /* | Give up after several restart attempts. | */ /* | Set INFO to the size of the invariant subspace | */ /* | which spans OP and exit. | */ /* %------------------------------------------------% */ *info = j - 1; second_(&t1); timing_1.tnaitr += t1 - t0; *ido = 99; goto L9000; } L40: /* %---------------------------------------------------------% */ /* | STEP 2: v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm | */ /* | Note that p_{j} = B*r_{j-1}. In order to avoid overflow | */ /* | when reciprocating a small RNORM, test against lower | */ /* | machine bound. | */ /* %---------------------------------------------------------% */ scopy_(n, &resid[1], &c__1, &v[j * v_dim1 + 1], &c__1); if (*rnorm >= unfl) { temp1 = 1.f / *rnorm; sscal_(n, &temp1, &v[j * v_dim1 + 1], &c__1); sscal_(n, &temp1, &workd[ipj], &c__1); } else { /* %-----------------------------------------% */ /* | To scale both v_{j} and p_{j} carefully | */ /* | use LAPACK routine SLASCL | */ /* %-----------------------------------------% */ slascl_("General", &i__, &i__, rnorm, &c_b25, n, &c__1, &v[j * v_dim1 + 1], n, &infol, (ftnlen)7); slascl_("General", &i__, &i__, rnorm, &c_b25, n, &c__1, &workd[ipj], n, &infol, (ftnlen)7); } /* %------------------------------------------------------% */ /* | STEP 3: r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} | */ /* | Note that this is not quite yet r_{j}. See STEP 4 | */ /* %------------------------------------------------------% */ step3 = TRUE_; ++timing_1.nopx; second_(&t2); scopy_(n, &v[j * v_dim1 + 1], &c__1, &workd[ivj], &c__1); ipntr[1] = ivj; ipntr[2] = irj; ipntr[3] = ipj; *ido = 1; /* %-----------------------------------% */ /* | Exit in order to compute OP*v_{j} | */ /* %-----------------------------------% */ goto L9000; L50: /* %----------------------------------% */ /* | Back from reverse communication; | */ /* | WORKD(IRJ:IRJ+N-1) := OP*v_{j} | */ /* | if step3 = .true. | */ /* %----------------------------------% */ second_(&t3); timing_1.tmvopx += t3 - t2; step3 = FALSE_; /* %------------------------------------------% */ /* | Put another copy of OP*v_{j} into RESID. | */ /* %------------------------------------------% */ scopy_(n, &workd[irj], &c__1, &resid[1], &c__1); /* %---------------------------------------% */ /* | STEP 4: Finish extending the Arnoldi | */ /* | factorization to length j. | */ /* %---------------------------------------% */ second_(&t2); if (*(unsigned char *)bmat == 'G') { ++timing_1.nbx; step4 = TRUE_; ipntr[1] = irj; ipntr[2] = ipj; *ido = 2; /* %-------------------------------------% */ /* | Exit in order to compute B*OP*v_{j} | */ /* %-------------------------------------% */ goto L9000; } else if (*(unsigned char *)bmat == 'I') { scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1); } L60: /* %----------------------------------% */ /* | Back from reverse communication; | */ /* | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} | */ /* | if step4 = .true. | */ /* %----------------------------------% */ if (*(unsigned char *)bmat == 'G') { second_(&t3); timing_1.tmvbx += t3 - t2; } step4 = FALSE_; /* %-------------------------------------% */ /* | The following is needed for STEP 5. | */ /* | Compute the B-norm of OP*v_{j}. | */ /* %-------------------------------------% */ if (*(unsigned char *)bmat == 'G') { rnorm_buf__ = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1); mpi_allreduce__(&rnorm_buf__, &wnorm, &c__1, &mpipriv_1.mpi_real__, & mpipriv_1.mpi_sum__, comm, &ierr); wnorm = sqrt((dabs(wnorm))); } else if (*(unsigned char *)bmat == 'I') { wnorm = psnorm2_(comm, n, &resid[1], &c__1); } /* %-----------------------------------------% */ /* | Compute the j-th residual corresponding | */ /* | to the j step factorization. | */ /* | Use Classical Gram Schmidt and compute: | */ /* | w_{j} <- V_{j}^T * B * OP * v_{j} | */ /* | r_{j} <- OP*v_{j} - V_{j} * w_{j} | */ /* %-----------------------------------------% */ /* %------------------------------------------% */ /* | Compute the j Fourier coefficients w_{j} | */ /* | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}. | */ /* %------------------------------------------% */ sgemv_("T", n, &j, &c_b25, &v[v_offset], ldv, &workd[ipj], &c__1, &c_b48, &workl[1], &c__1, (ftnlen)1); mpi_allreduce__(&workl[1], &h__[j * h_dim1 + 1], &j, & mpipriv_1.mpi_real__, &mpipriv_1.mpi_sum__, comm, &ierr); /* %--------------------------------------% */ /* | Orthogonalize r_{j} against V_{j}. | */ /* | RESID contains OP*v_{j}. See STEP 3. | */ /* %--------------------------------------% */ sgemv_("N", n, &j, &c_b51, &v[v_offset], ldv, &h__[j * h_dim1 + 1], &c__1, &c_b25, &resid[1], &c__1, (ftnlen)1); if (j > 1) { h__[j + (j - 1) * h_dim1] = betaj; } second_(&t4); orth1 = TRUE_; second_(&t2); if (*(unsigned char *)bmat == 'G') { ++timing_1.nbx; scopy_(n, &resid[1], &c__1, &workd[irj], &c__1); ipntr[1] = irj; ipntr[2] = ipj; *ido = 2; /* %----------------------------------% */ /* | Exit in order to compute B*r_{j} | */ /* %----------------------------------% */ goto L9000; } else if (*(unsigned char *)bmat == 'I') { scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1); } L70: /* %---------------------------------------------------% */ /* | Back from reverse communication if ORTH1 = .true. | */ /* | WORKD(IPJ:IPJ+N-1) := B*r_{j}. | */ /* %---------------------------------------------------% */ if (*(unsigned char *)bmat == 'G') { second_(&t3); timing_1.tmvbx += t3 - t2; } orth1 = FALSE_; /* %------------------------------% */ /* | Compute the B-norm of r_{j}. | */ /* %------------------------------% */ if (*(unsigned char *)bmat == 'G') { rnorm_buf__ = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1); mpi_allreduce__(&rnorm_buf__, rnorm, &c__1, &mpipriv_1.mpi_real__, & mpipriv_1.mpi_sum__, comm, &ierr); *rnorm = sqrt((dabs(*rnorm))); } else if (*(unsigned char *)bmat == 'I') { *rnorm = psnorm2_(comm, n, &resid[1], &c__1); } /* %-----------------------------------------------------------% */ /* | STEP 5: Re-orthogonalization / Iterative refinement phase | */ /* | Maximum NITER_ITREF tries. | */ /* | | */ /* | s = V_{j}^T * B * r_{j} | */ /* | r_{j} = r_{j} - V_{j}*s | */ /* | alphaj = alphaj + s_{j} | */ /* | | */ /* | The stopping criteria used for iterative refinement is | */ /* | discussed in Parlett's book SEP, page 107 and in Gragg & | */ /* | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990. | */ /* | Determine if we need to correct the residual. The goal is | */ /* | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} || | */ /* | The following test determines whether the sine of the | */ /* | angle between OP*x and the computed residual is less | */ /* | than or equal to 0.717. | */ /* %-----------------------------------------------------------% */ if (*rnorm > wnorm * .717f) { goto L100; } iter = 0; ++timing_1.nrorth; /* %---------------------------------------------------% */ /* | Enter the Iterative refinement phase. If further | */ /* | refinement is necessary, loop back here. The loop | */ /* | variable is ITER. Perform a step of Classical | */ /* | Gram-Schmidt using all the Arnoldi vectors V_{j} | */ /* %---------------------------------------------------% */ L80: if (msglvl > 2) { xtemp[0] = wnorm; xtemp[1] = *rnorm; psvout_(comm, &debug_1.logfil, &c__2, xtemp, &debug_1.ndigit, "_nait" "r: re-orthonalization; wnorm and rnorm are", (ftnlen)47); psvout_(comm, &debug_1.logfil, &j, &h__[j * h_dim1 + 1], & debug_1.ndigit, "_naitr: j-th column of H", (ftnlen)24); } /* %----------------------------------------------------% */ /* | Compute V_{j}^T * B * r_{j}. | */ /* | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). | */ /* %----------------------------------------------------% */ sgemv_("T", n, &j, &c_b25, &v[v_offset], ldv, &workd[ipj], &c__1, &c_b48, &workl[j + 1], &c__1, (ftnlen)1); mpi_allreduce__(&workl[j + 1], &workl[1], &j, &mpipriv_1.mpi_real__, & mpipriv_1.mpi_sum__, comm, &ierr); /* %---------------------------------------------% */ /* | Compute the correction to the residual: | */ /* | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). | */ /* | The correction to H is v(:,1:J)*H(1:J,1:J) | */ /* | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j. | */ /* %---------------------------------------------% */ sgemv_("N", n, &j, &c_b51, &v[v_offset], ldv, &workl[1], &c__1, &c_b25, & resid[1], &c__1, (ftnlen)1); saxpy_(&j, &c_b25, &workl[1], &c__1, &h__[j * h_dim1 + 1], &c__1); orth2 = TRUE_; second_(&t2); if (*(unsigned char *)bmat == 'G') { ++timing_1.nbx; scopy_(n, &resid[1], &c__1, &workd[irj], &c__1); ipntr[1] = irj; ipntr[2] = ipj; *ido = 2; /* %-----------------------------------% */ /* | Exit in order to compute B*r_{j}. | */ /* | r_{j} is the corrected residual. | */ /* %-----------------------------------% */ goto L9000; } else if (*(unsigned char *)bmat == 'I') { scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1); } L90: /* %---------------------------------------------------% */ /* | Back from reverse communication if ORTH2 = .true. | */ /* %---------------------------------------------------% */ if (*(unsigned char *)bmat == 'G') { second_(&t3); timing_1.tmvbx += t3 - t2; } /* %-----------------------------------------------------% */ /* | Compute the B-norm of the corrected residual r_{j}. | */ /* %-----------------------------------------------------% */ if (*(unsigned char *)bmat == 'G') { rnorm_buf__ = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1); mpi_allreduce__(&rnorm_buf__, &rnorm1, &c__1, &mpipriv_1.mpi_real__, & mpipriv_1.mpi_sum__, comm, &ierr); rnorm1 = sqrt((dabs(rnorm1))); } else if (*(unsigned char *)bmat == 'I') { rnorm1 = psnorm2_(comm, n, &resid[1], &c__1); } if (msglvl > 0 && iter > 0) { pivout_(comm, &debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: " "Iterative refinement for Arnoldi residual", (ftnlen)49); if (msglvl > 2) { xtemp[0] = *rnorm; xtemp[1] = rnorm1; psvout_(comm, &debug_1.logfil, &c__2, xtemp, &debug_1.ndigit, "_naitr: iterative refinement ; rnorm and rnorm1 are", ( ftnlen)51); } } /* %-----------------------------------------% */ /* | Determine if we need to perform another | */ /* | step of re-orthogonalization. | */ /* %-----------------------------------------% */ if (rnorm1 > *rnorm * .717f) { /* %---------------------------------------% */ /* | No need for further refinement. | */ /* | The cosine of the angle between the | */ /* | corrected residual vector and the old | */ /* | residual vector is greater than 0.717 | */ /* | In other words the corrected residual | */ /* | and the old residual vector share an | */ /* | angle of less than arcCOS(0.717) | */ /* %---------------------------------------% */ *rnorm = rnorm1; } else { /* %-------------------------------------------% */ /* | Another step of iterative refinement step | */ /* | is required. NITREF is used by stat.h | */ /* %-------------------------------------------% */ ++timing_1.nitref; *rnorm = rnorm1; ++iter; if (iter <= 1) { goto L80; } /* %-------------------------------------------------% */ /* | Otherwise RESID is numerically in the span of V | */ /* %-------------------------------------------------% */ i__1 = *n; for (jj = 1; jj <= i__1; ++jj) { resid[jj] = 0.f; /* L95: */ } *rnorm = 0.f; } /* %----------------------------------------------% */ /* | Branch here directly if iterative refinement | */ /* | wasn't necessary or after at most NITER_REF | */ /* | steps of iterative refinement. | */ /* %----------------------------------------------% */ L100: rstart = FALSE_; orth2 = FALSE_; second_(&t5); timing_1.titref += t5 - t4; /* %------------------------------------% */ /* | STEP 6: Update j = j+1; Continue | */ /* %------------------------------------% */ ++j; if (j > *k + *np) { second_(&t1); timing_1.tnaitr += t1 - t0; *ido = 99; i__1 = *k + *np - 1; for (i__ = max(1,*k); i__ <= i__1; ++i__) { /* %--------------------------------------------% */ /* | Check for splitting and deflation. | */ /* | Use a standard test as in the QR algorithm | */ /* | REFERENCE: LAPACK subroutine slahqr | */ /* %--------------------------------------------% */ tst1 = (r__1 = h__[i__ + i__ * h_dim1], dabs(r__1)) + (r__2 = h__[ i__ + 1 + (i__ + 1) * h_dim1], dabs(r__2)); if (tst1 == 0.f) { i__2 = *k + *np; tst1 = slanhs_("1", &i__2, &h__[h_offset], ldh, &workd[*n + 1] , (ftnlen)1); } /* Computing MAX */ r__2 = ulp * tst1; if ((r__1 = h__[i__ + 1 + i__ * h_dim1], dabs(r__1)) <= dmax(r__2, smlnum)) { h__[i__ + 1 + i__ * h_dim1] = 0.f; } /* L110: */ } if (msglvl > 2) { i__1 = *k + *np; i__2 = *k + *np; psmout_(comm, &debug_1.logfil, &i__1, &i__2, &h__[h_offset], ldh, &debug_1.ndigit, "_naitr: Final upper Hessenberg matrix " "H of order K+NP", (ftnlen)53); } goto L9000; } /* %--------------------------------------------------------% */ /* | Loop back to extend the factorization by another step. | */ /* %--------------------------------------------------------% */ goto L1000; /* %---------------------------------------------------------------% */ /* | | */ /* | E N D O F M A I N I T E R A T I O N L O O P | */ /* | | */ /* %---------------------------------------------------------------% */ L9000: return 0; /* %----------------% */ /* | End of psnaitr | */ /* %----------------% */ } /* psnaitr_ */