/* Subroutine */ int snaup2_(integer *ido, char *bmat, integer *n, char * which, integer *nev, integer *np, real *tol, real *resid, integer * mode, integer *iupd, integer *ishift, integer *mxiter, real *v, integer *ldv, real *h__, integer *ldh, real *ritzr, real *ritzi, real *bounds, real *q, integer *ldq, real *workl, integer *ipntr, real * workd, integer *info, ftnlen bmat_len, ftnlen which_len) { /* System generated locals */ integer h_dim1, h_offset, q_dim1, q_offset, v_dim1, v_offset, i__1, i__2; real r__1, r__2; doublereal d__1; /* Builtin functions */ double pow_dd(doublereal *, doublereal *); integer s_cmp(char *, char *, ftnlen, ftnlen); /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); double sqrt(doublereal); /* Local variables */ static integer j; static real t0, t1, t2, t3; static integer kp[4], np0, nev0; static real eps23; static integer ierr, iter; static real temp; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); static logical getv0; extern doublereal snrm2_(integer *, real *, integer *); static logical cnorm; static integer nconv; static logical initv; static real rnorm; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), ivout_(integer *, integer *, integer *, integer *, char *, ftnlen), smout_(integer *, integer *, integer *, real *, integer *, integer *, char *, ftnlen), svout_(integer *, integer * , real *, integer *, char *, ftnlen), sgetv0_(integer *, char *, integer *, logical *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *, ftnlen); extern doublereal slapy2_(real *, real *); static integer nevbef; extern doublereal slamch_(char *, ftnlen); extern /* Subroutine */ int second_(real *); static logical update; static char wprime[2]; static logical ushift; static integer kplusp, msglvl, nptemp, numcnv; extern /* Subroutine */ int snaitr_(integer *, char *, integer *, integer *, integer *, integer *, real *, real *, real *, integer *, real * , integer *, integer *, real *, integer *, ftnlen), snconv_( integer *, real *, real *, real *, real *, integer *), sneigh_( real *, integer *, real *, integer *, real *, real *, real *, real *, integer *, real *, integer *), sngets_(integer *, char *, integer *, integer *, real *, real *, real *, real *, real *, ftnlen), snapps_(integer *, integer *, integer *, real *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *), ssortc_(char *, logical *, integer *, real *, real *, real *, ftnlen); /* %----------------------------------------------------% */ /* | Include files for debugging and timing information | */ /* %----------------------------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: debug.h SID: 2.3 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %---------------------------------% */ /* | See debug.doc for documentation | */ /* %---------------------------------% */ /* %------------------% */ /* | Scalar Arguments | */ /* %------------------% */ /* %--------------------------------% */ /* | See stat.doc for documentation | */ /* %--------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: stat.h SID: 2.2 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %-----------------% */ /* | Array Arguments | */ /* %-----------------% */ /* %------------% */ /* | Parameters | */ /* %------------% */ /* %---------------% */ /* | Local Scalars | */ /* %---------------% */ /* %-----------------------% */ /* | Local array arguments | */ /* %-----------------------% */ /* %----------------------% */ /* | External Subroutines | */ /* %----------------------% */ /* %--------------------% */ /* | External Functions | */ /* %--------------------% */ /* %---------------------% */ /* | Intrinsic Functions | */ /* %---------------------% */ /* %-----------------------% */ /* | Executable Statements | */ /* %-----------------------% */ /* Parameter adjustments */ --workd; --resid; --workl; --bounds; --ritzi; --ritzr; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --ipntr; /* Function Body */ if (*ido == 0) { second_(&t0); msglvl = debug_1.mnaup2; /* %-------------------------------------% */ /* | Get the machine dependent constant. | */ /* %-------------------------------------% */ eps23 = slamch_("Epsilon-Machine", (ftnlen)15); d__1 = (doublereal) eps23; eps23 = pow_dd(&d__1, &c_b3); nev0 = *nev; np0 = *np; /* %-------------------------------------% */ /* | kplusp is the bound on the largest | */ /* | Lanczos factorization built. | */ /* | nconv is the current number of | */ /* | "converged" eigenvlues. | */ /* | iter is the counter on the current | */ /* | iteration step. | */ /* %-------------------------------------% */ kplusp = *nev + *np; nconv = 0; iter = 0; /* %---------------------------------------% */ /* | Set flags for computing the first NEV | */ /* | steps of the Arnoldi factorization. | */ /* %---------------------------------------% */ getv0 = TRUE_; update = FALSE_; ushift = FALSE_; cnorm = FALSE_; if (*info != 0) { /* %--------------------------------------------% */ /* | User provides the initial residual vector. | */ /* %--------------------------------------------% */ initv = TRUE_; *info = 0; } else { initv = FALSE_; } } /* %---------------------------------------------% */ /* | Get a possibly random starting vector and | */ /* | force it into the range of the operator OP. | */ /* %---------------------------------------------% */ /* L10: */ if (getv0) { sgetv0_(ido, bmat, &c__1, &initv, n, &c__1, &v[v_offset], ldv, &resid[ 1], &rnorm, &ipntr[1], &workd[1], info, (ftnlen)1); if (*ido != 99) { goto L9000; } if (rnorm == 0.f) { /* %-----------------------------------------% */ /* | The initial vector is zero. Error exit. | */ /* %-----------------------------------------% */ *info = -9; goto L1100; } getv0 = FALSE_; *ido = 0; } /* %-----------------------------------% */ /* | Back from reverse communication : | */ /* | continue with update step | */ /* %-----------------------------------% */ if (update) { goto L20; } /* %-------------------------------------------% */ /* | Back from computing user specified shifts | */ /* %-------------------------------------------% */ if (ushift) { goto L50; } /* %-------------------------------------% */ /* | Back from computing residual norm | */ /* | at the end of the current iteration | */ /* %-------------------------------------% */ if (cnorm) { goto L100; } /* %----------------------------------------------------------% */ /* | Compute the first NEV steps of the Arnoldi factorization | */ /* %----------------------------------------------------------% */ snaitr_(ido, bmat, n, &c__0, nev, mode, &resid[1], &rnorm, &v[v_offset], ldv, &h__[h_offset], ldh, &ipntr[1], &workd[1], info, (ftnlen)1); /* %---------------------------------------------------% */ /* | ido .ne. 99 implies use of reverse communication | */ /* | to compute operations involving OP and possibly B | */ /* %---------------------------------------------------% */ if (*ido != 99) { goto L9000; } if (*info > 0) { *np = *info; *mxiter = iter; *info = -9999; goto L1200; } /* %--------------------------------------------------------------% */ /* | | */ /* | M A I N ARNOLDI I T E R A T I O N L O O P | */ /* | Each iteration implicitly restarts the Arnoldi | */ /* | factorization in place. | */ /* | | */ /* %--------------------------------------------------------------% */ L1000: ++iter; if (msglvl > 0) { ivout_(&debug_1.logfil, &c__1, &iter, &debug_1.ndigit, "_naup2: ****" " Start of major iteration number ****", (ftnlen)49); } /* %-----------------------------------------------------------% */ /* | Compute NP additional steps of the Arnoldi factorization. | */ /* | Adjust NP since NEV might have been updated by last call | */ /* | to the shift application routine snapps. | */ /* %-----------------------------------------------------------% */ *np = kplusp - *nev; if (msglvl > 1) { ivout_(&debug_1.logfil, &c__1, nev, &debug_1.ndigit, "_naup2: The le" "ngth of the current Arnoldi factorization", (ftnlen)55); ivout_(&debug_1.logfil, &c__1, np, &debug_1.ndigit, "_naup2: Extend " "the Arnoldi factorization by", (ftnlen)43); } /* %-----------------------------------------------------------% */ /* | Compute NP additional steps of the Arnoldi factorization. | */ /* %-----------------------------------------------------------% */ *ido = 0; L20: update = TRUE_; snaitr_(ido, bmat, n, nev, np, mode, &resid[1], &rnorm, &v[v_offset], ldv, &h__[h_offset], ldh, &ipntr[1], &workd[1], info, (ftnlen)1); /* %---------------------------------------------------% */ /* | ido .ne. 99 implies use of reverse communication | */ /* | to compute operations involving OP and possibly B | */ /* %---------------------------------------------------% */ if (*ido != 99) { goto L9000; } if (*info > 0) { *np = *info; *mxiter = iter; *info = -9999; goto L1200; } update = FALSE_; if (msglvl > 1) { svout_(&debug_1.logfil, &c__1, &rnorm, &debug_1.ndigit, "_naup2: Cor" "responding B-norm of the residual", (ftnlen)44); } /* %--------------------------------------------------------% */ /* | Compute the eigenvalues and corresponding error bounds | */ /* | of the current upper Hessenberg matrix. | */ /* %--------------------------------------------------------% */ sneigh_(&rnorm, &kplusp, &h__[h_offset], ldh, &ritzr[1], &ritzi[1], & bounds[1], &q[q_offset], ldq, &workl[1], &ierr); if (ierr != 0) { *info = -8; goto L1200; } /* %----------------------------------------------------% */ /* | Make a copy of eigenvalues and corresponding error | */ /* | bounds obtained from sneigh. | */ /* %----------------------------------------------------% */ /* Computing 2nd power */ i__1 = kplusp; scopy_(&kplusp, &ritzr[1], &c__1, &workl[i__1 * i__1 + 1], &c__1); /* Computing 2nd power */ i__1 = kplusp; scopy_(&kplusp, &ritzi[1], &c__1, &workl[i__1 * i__1 + kplusp + 1], &c__1) ; /* Computing 2nd power */ i__1 = kplusp; scopy_(&kplusp, &bounds[1], &c__1, &workl[i__1 * i__1 + (kplusp << 1) + 1] , &c__1); /* %---------------------------------------------------% */ /* | Select the wanted Ritz values and their bounds | */ /* | to be used in the convergence test. | */ /* | The wanted part of the spectrum and corresponding | */ /* | error bounds are in the last NEV loc. of RITZR, | */ /* | RITZI and BOUNDS respectively. The variables NEV | */ /* | and NP may be updated if the NEV-th wanted Ritz | */ /* | value has a non zero imaginary part. In this case | */ /* | NEV is increased by one and NP decreased by one. | */ /* | NOTE: The last two arguments of sngets are no | */ /* | longer used as of version 2.1. | */ /* %---------------------------------------------------% */ *nev = nev0; *np = np0; numcnv = *nev; sngets_(ishift, which, nev, np, &ritzr[1], &ritzi[1], &bounds[1], &workl[ 1], &workl[*np + 1], (ftnlen)2); if (*nev == nev0 + 1) { numcnv = nev0 + 1; } /* %-------------------% */ /* | Convergence test. | */ /* %-------------------% */ scopy_(nev, &bounds[*np + 1], &c__1, &workl[(*np << 1) + 1], &c__1); snconv_(nev, &ritzr[*np + 1], &ritzi[*np + 1], &workl[(*np << 1) + 1], tol, &nconv); if (msglvl > 2) { kp[0] = *nev; kp[1] = *np; kp[2] = numcnv; kp[3] = nconv; ivout_(&debug_1.logfil, &c__4, kp, &debug_1.ndigit, "_naup2: NEV, NP" ", NUMCNV, NCONV are", (ftnlen)34); svout_(&debug_1.logfil, &kplusp, &ritzr[1], &debug_1.ndigit, "_naup2" ": Real part of the eigenvalues of H", (ftnlen)41); svout_(&debug_1.logfil, &kplusp, &ritzi[1], &debug_1.ndigit, "_naup2" ": Imaginary part of the eigenvalues of H", (ftnlen)46); svout_(&debug_1.logfil, &kplusp, &bounds[1], &debug_1.ndigit, "_naup" "2: Ritz estimates of the current NCV Ritz values", (ftnlen)53) ; } /* %---------------------------------------------------------% */ /* | Count the number of unwanted Ritz values that have zero | */ /* | Ritz estimates. If any Ritz estimates are equal to zero | */ /* | then a leading block of H of order equal to at least | */ /* | the number of Ritz values with zero Ritz estimates has | */ /* | split off. None of these Ritz values may be removed by | */ /* | shifting. Decrease NP the number of shifts to apply. If | */ /* | no shifts may be applied, then prepare to exit | */ /* %---------------------------------------------------------% */ nptemp = *np; i__1 = nptemp; for (j = 1; j <= i__1; ++j) { if (bounds[j] == 0.f) { --(*np); ++(*nev); } /* L30: */ } if (nconv >= numcnv || iter > *mxiter || *np == 0) { if (msglvl > 4) { /* Computing 2nd power */ i__1 = kplusp; svout_(&debug_1.logfil, &kplusp, &workl[i__1 * i__1 + 1], & debug_1.ndigit, "_naup2: Real part of the eig computed b" "y _neigh:", (ftnlen)48); /* Computing 2nd power */ i__1 = kplusp; svout_(&debug_1.logfil, &kplusp, &workl[i__1 * i__1 + kplusp + 1], &debug_1.ndigit, "_naup2: Imag part of the eig computed" " by _neigh:", (ftnlen)48); /* Computing 2nd power */ i__1 = kplusp; svout_(&debug_1.logfil, &kplusp, &workl[i__1 * i__1 + (kplusp << 1) + 1], &debug_1.ndigit, "_naup2: Ritz eistmates comput" "ed by _neigh:", (ftnlen)42); } /* %------------------------------------------------% */ /* | Prepare to exit. Put the converged Ritz values | */ /* | and corresponding bounds in RITZ(1:NCONV) and | */ /* | BOUNDS(1:NCONV) respectively. Then sort. Be | */ /* | careful when NCONV > NP | */ /* %------------------------------------------------% */ /* %------------------------------------------% */ /* | Use h( 3,1 ) as storage to communicate | */ /* | rnorm to _neupd if needed | */ /* %------------------------------------------% */ h__[h_dim1 + 3] = rnorm; /* %----------------------------------------------% */ /* | To be consistent with sngets, we first do a | */ /* | pre-processing sort in order to keep complex | */ /* | conjugate pairs together. This is similar | */ /* | to the pre-processing sort used in sngets | */ /* | except that the sort is done in the opposite | */ /* | order. | */ /* %----------------------------------------------% */ if (s_cmp(which, "LM", (ftnlen)2, (ftnlen)2) == 0) { s_copy(wprime, "SR", (ftnlen)2, (ftnlen)2); } if (s_cmp(which, "SM", (ftnlen)2, (ftnlen)2) == 0) { s_copy(wprime, "LR", (ftnlen)2, (ftnlen)2); } if (s_cmp(which, "LR", (ftnlen)2, (ftnlen)2) == 0) { s_copy(wprime, "SM", (ftnlen)2, (ftnlen)2); } if (s_cmp(which, "SR", (ftnlen)2, (ftnlen)2) == 0) { s_copy(wprime, "LM", (ftnlen)2, (ftnlen)2); } if (s_cmp(which, "LI", (ftnlen)2, (ftnlen)2) == 0) { s_copy(wprime, "SM", (ftnlen)2, (ftnlen)2); } if (s_cmp(which, "SI", (ftnlen)2, (ftnlen)2) == 0) { s_copy(wprime, "LM", (ftnlen)2, (ftnlen)2); } ssortc_(wprime, &c_true, &kplusp, &ritzr[1], &ritzi[1], &bounds[1], ( ftnlen)2); /* %----------------------------------------------% */ /* | Now sort Ritz values so that converged Ritz | */ /* | values appear within the first NEV locations | */ /* | of ritzr, ritzi and bounds, and the most | */ /* | desired one appears at the front. | */ /* %----------------------------------------------% */ if (s_cmp(which, "LM", (ftnlen)2, (ftnlen)2) == 0) { s_copy(wprime, "SM", (ftnlen)2, (ftnlen)2); } if (s_cmp(which, "SM", (ftnlen)2, (ftnlen)2) == 0) { s_copy(wprime, "LM", (ftnlen)2, (ftnlen)2); } if (s_cmp(which, "LR", (ftnlen)2, (ftnlen)2) == 0) { s_copy(wprime, "SR", (ftnlen)2, (ftnlen)2); } if (s_cmp(which, "SR", (ftnlen)2, (ftnlen)2) == 0) { s_copy(wprime, "LR", (ftnlen)2, (ftnlen)2); } if (s_cmp(which, "LI", (ftnlen)2, (ftnlen)2) == 0) { s_copy(wprime, "SI", (ftnlen)2, (ftnlen)2); } if (s_cmp(which, "SI", (ftnlen)2, (ftnlen)2) == 0) { s_copy(wprime, "LI", (ftnlen)2, (ftnlen)2); } ssortc_(wprime, &c_true, &kplusp, &ritzr[1], &ritzi[1], &bounds[1], ( ftnlen)2); /* %--------------------------------------------------% */ /* | Scale the Ritz estimate of each Ritz value | */ /* | by 1 / max(eps23,magnitude of the Ritz value). | */ /* %--------------------------------------------------% */ i__1 = numcnv; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ r__1 = eps23, r__2 = slapy2_(&ritzr[j], &ritzi[j]); temp = dmax(r__1,r__2); bounds[j] /= temp; /* L35: */ } /* %----------------------------------------------------% */ /* | Sort the Ritz values according to the scaled Ritz | */ /* | esitmates. This will push all the converged ones | */ /* | towards the front of ritzr, ritzi, bounds | */ /* | (in the case when NCONV < NEV.) | */ /* %----------------------------------------------------% */ s_copy(wprime, "LR", (ftnlen)2, (ftnlen)2); ssortc_(wprime, &c_true, &numcnv, &bounds[1], &ritzr[1], &ritzi[1], ( ftnlen)2); /* %----------------------------------------------% */ /* | Scale the Ritz estimate back to its original | */ /* | value. | */ /* %----------------------------------------------% */ i__1 = numcnv; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ r__1 = eps23, r__2 = slapy2_(&ritzr[j], &ritzi[j]); temp = dmax(r__1,r__2); bounds[j] *= temp; /* L40: */ } /* %------------------------------------------------% */ /* | Sort the converged Ritz values again so that | */ /* | the "threshold" value appears at the front of | */ /* | ritzr, ritzi and bound. | */ /* %------------------------------------------------% */ ssortc_(which, &c_true, &nconv, &ritzr[1], &ritzi[1], &bounds[1], ( ftnlen)2); if (msglvl > 1) { svout_(&debug_1.logfil, &kplusp, &ritzr[1], &debug_1.ndigit, "_naup2: Sorted real part of the eigenvalues", (ftnlen)43) ; svout_(&debug_1.logfil, &kplusp, &ritzi[1], &debug_1.ndigit, "_naup2: Sorted imaginary part of the eigenvalues", ( ftnlen)48); svout_(&debug_1.logfil, &kplusp, &bounds[1], &debug_1.ndigit, "_naup2: Sorted ritz estimates.", (ftnlen)30); } /* %------------------------------------% */ /* | Max iterations have been exceeded. | */ /* %------------------------------------% */ if (iter > *mxiter && nconv < numcnv) { *info = 1; } /* %---------------------% */ /* | No shifts to apply. | */ /* %---------------------% */ if (*np == 0 && nconv < numcnv) { *info = 2; } *np = nconv; goto L1100; } else if (nconv < numcnv && *ishift == 1) { /* %-------------------------------------------------% */ /* | Do not have all the requested eigenvalues yet. | */ /* | To prevent possible stagnation, adjust the size | */ /* | of NEV. | */ /* %-------------------------------------------------% */ nevbef = *nev; /* Computing MIN */ i__1 = nconv, i__2 = *np / 2; *nev += min(i__1,i__2); if (*nev == 1 && kplusp >= 6) { *nev = kplusp / 2; } else if (*nev == 1 && kplusp > 3) { *nev = 2; } *np = kplusp - *nev; /* %---------------------------------------% */ /* | If the size of NEV was just increased | */ /* | resort the eigenvalues. | */ /* %---------------------------------------% */ if (nevbef < *nev) { sngets_(ishift, which, nev, np, &ritzr[1], &ritzi[1], &bounds[1], &workl[1], &workl[*np + 1], (ftnlen)2); } } if (msglvl > 0) { ivout_(&debug_1.logfil, &c__1, &nconv, &debug_1.ndigit, "_naup2: no." " of \"converged\" Ritz values at this iter.", (ftnlen)52); if (msglvl > 1) { kp[0] = *nev; kp[1] = *np; ivout_(&debug_1.logfil, &c__2, kp, &debug_1.ndigit, "_naup2: NEV" " and NP are", (ftnlen)22); svout_(&debug_1.logfil, nev, &ritzr[*np + 1], &debug_1.ndigit, "_naup2: \"wanted\" Ritz values -- real part", (ftnlen)41) ; svout_(&debug_1.logfil, nev, &ritzi[*np + 1], &debug_1.ndigit, "_naup2: \"wanted\" Ritz values -- imag part", (ftnlen)41) ; svout_(&debug_1.logfil, nev, &bounds[*np + 1], &debug_1.ndigit, "_naup2: Ritz estimates of the \"wanted\" values ", ( ftnlen)46); } } if (*ishift == 0) { /* %-------------------------------------------------------% */ /* | User specified shifts: reverse comminucation to | */ /* | compute the shifts. They are returned in the first | */ /* | 2*NP locations of WORKL. | */ /* %-------------------------------------------------------% */ ushift = TRUE_; *ido = 3; goto L9000; } L50: /* %------------------------------------% */ /* | Back from reverse communication; | */ /* | User specified shifts are returned | */ /* | in WORKL(1:2*NP) | */ /* %------------------------------------% */ ushift = FALSE_; if (*ishift == 0) { /* %----------------------------------% */ /* | Move the NP shifts from WORKL to | */ /* | RITZR, RITZI to free up WORKL | */ /* | for non-exact shift case. | */ /* %----------------------------------% */ scopy_(np, &workl[1], &c__1, &ritzr[1], &c__1); scopy_(np, &workl[*np + 1], &c__1, &ritzi[1], &c__1); } if (msglvl > 2) { ivout_(&debug_1.logfil, &c__1, np, &debug_1.ndigit, "_naup2: The num" "ber of shifts to apply ", (ftnlen)38); svout_(&debug_1.logfil, np, &ritzr[1], &debug_1.ndigit, "_naup2: Rea" "l part of the shifts", (ftnlen)31); svout_(&debug_1.logfil, np, &ritzi[1], &debug_1.ndigit, "_naup2: Ima" "ginary part of the shifts", (ftnlen)36); if (*ishift == 1) { svout_(&debug_1.logfil, np, &bounds[1], &debug_1.ndigit, "_naup2" ": Ritz estimates of the shifts", (ftnlen)36); } } /* %---------------------------------------------------------% */ /* | Apply the NP implicit shifts by QR bulge chasing. | */ /* | Each shift is applied to the whole upper Hessenberg | */ /* | matrix H. | */ /* | The first 2*N locations of WORKD are used as workspace. | */ /* %---------------------------------------------------------% */ snapps_(n, nev, np, &ritzr[1], &ritzi[1], &v[v_offset], ldv, &h__[ h_offset], ldh, &resid[1], &q[q_offset], ldq, &workl[1], &workd[1] ); /* %---------------------------------------------% */ /* | Compute the B-norm of the updated residual. | */ /* | Keep B*RESID in WORKD(1:N) to be used in | */ /* | the first step of the next call to snaitr. | */ /* %---------------------------------------------% */ cnorm = TRUE_; second_(&t2); if (*(unsigned char *)bmat == 'G') { ++timing_1.nbx; scopy_(n, &resid[1], &c__1, &workd[*n + 1], &c__1); ipntr[1] = *n + 1; ipntr[2] = 1; *ido = 2; /* %----------------------------------% */ /* | Exit in order to compute B*RESID | */ /* %----------------------------------% */ goto L9000; } else if (*(unsigned char *)bmat == 'I') { scopy_(n, &resid[1], &c__1, &workd[1], &c__1); } L100: /* %----------------------------------% */ /* | Back from reverse communication; | */ /* | WORKD(1:N) := B*RESID | */ /* %----------------------------------% */ if (*(unsigned char *)bmat == 'G') { second_(&t3); timing_1.tmvbx += t3 - t2; } if (*(unsigned char *)bmat == 'G') { rnorm = sdot_(n, &resid[1], &c__1, &workd[1], &c__1); rnorm = sqrt((dabs(rnorm))); } else if (*(unsigned char *)bmat == 'I') { rnorm = snrm2_(n, &resid[1], &c__1); } cnorm = FALSE_; if (msglvl > 2) { svout_(&debug_1.logfil, &c__1, &rnorm, &debug_1.ndigit, "_naup2: B-n" "orm of residual for compressed factorization", (ftnlen)55); smout_(&debug_1.logfil, nev, nev, &h__[h_offset], ldh, & debug_1.ndigit, "_naup2: Compressed upper Hessenberg matrix H" , (ftnlen)44); } goto L1000; /* %---------------------------------------------------------------% */ /* | | */ /* | E N D O F M A I N I T E R A T I O N L O O P | */ /* | | */ /* %---------------------------------------------------------------% */ L1100: *mxiter = iter; *nev = numcnv; L1200: *ido = 99; /* %------------% */ /* | Error Exit | */ /* %------------% */ second_(&t1); timing_1.tnaup2 = t1 - t0; L9000: /* %---------------% */ /* | End of snaup2 | */ /* %---------------% */ return 0; } /* snaup2_ */
/* Subroutine */ int snaitr_(integer *ido, char *bmat, integer *n, integer *k, integer *np, integer *nb, real *resid, real *rnorm, real *v, integer *ldv, real *h__, integer *ldh, integer *ipntr, real *workd, integer * info, ftnlen bmat_len) { /* Initialized data */ static logical first = TRUE_; /* System generated locals */ integer h_dim1, h_offset, v_dim1, v_offset, i__1, i__2; real r__1, r__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer i__, j; static real t0, t1, t2, t3, t4, t5; static integer jj, ipj, irj, ivj; static real ulp, tst1; static integer ierr, iter; static real unfl, ovfl; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); static integer itry; static real temp1; static logical orth1, orth2, step3, step4; extern doublereal snrm2_(integer *, real *, integer *); static real betaj; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static integer infol; extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen); static real xtemp[2]; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *); static real wnorm; extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, real *, integer *), ivout_(integer *, integer *, integer *, integer *, char *, ftnlen), smout_(integer *, integer *, integer * , real *, integer *, integer *, char *, ftnlen), svout_(integer *, integer *, real *, integer *, char *, ftnlen), sgetv0_(integer *, char *, integer *, logical *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *, ftnlen); static real rnorm1; extern /* Subroutine */ int slabad_(real *, real *); extern doublereal slamch_(char *, ftnlen); extern /* Subroutine */ int second_(real *), slascl_(char *, integer *, integer *, real *, real *, integer *, integer *, real *, integer * , integer *, ftnlen); static logical rstart; static integer msglvl; static real smlnum; extern doublereal slanhs_(char *, integer *, real *, integer *, real *, ftnlen); /* %----------------------------------------------------% */ /* | Include files for debugging and timing information | */ /* %----------------------------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: debug.h SID: 2.3 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %---------------------------------% */ /* | See debug.doc for documentation | */ /* %---------------------------------% */ /* %------------------% */ /* | Scalar Arguments | */ /* %------------------% */ /* %--------------------------------% */ /* | See stat.doc for documentation | */ /* %--------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: stat.h SID: 2.2 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %-----------------% */ /* | Array Arguments | */ /* %-----------------% */ /* %------------% */ /* | Parameters | */ /* %------------% */ /* %---------------% */ /* | Local Scalars | */ /* %---------------% */ /* %-----------------------% */ /* | Local Array Arguments | */ /* %-----------------------% */ /* %----------------------% */ /* | External Subroutines | */ /* %----------------------% */ /* %--------------------% */ /* | External Functions | */ /* %--------------------% */ /* %---------------------% */ /* | Intrinsic Functions | */ /* %---------------------% */ /* %-----------------% */ /* | Data statements | */ /* %-----------------% */ /* Parameter adjustments */ --workd; --resid; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; --ipntr; /* Function Body */ /* %-----------------------% */ /* | Executable Statements | */ /* %-----------------------% */ if (first) { /* %-----------------------------------------% */ /* | Set machine-dependent constants for the | */ /* | the splitting and deflation criterion. | */ /* | If norm(H) <= sqrt(OVFL), | */ /* | overflow should not occur. | */ /* | REFERENCE: LAPACK subroutine slahqr | */ /* %-----------------------------------------% */ unfl = slamch_("safe minimum", (ftnlen)12); ovfl = 1.f / unfl; slabad_(&unfl, &ovfl); ulp = slamch_("precision", (ftnlen)9); smlnum = unfl * (*n / ulp); first = FALSE_; } if (*ido == 0) { /* %-------------------------------% */ /* | Initialize timing statistics | */ /* | & message level for debugging | */ /* %-------------------------------% */ second_(&t0); msglvl = debug_1.mnaitr; /* %------------------------------% */ /* | Initial call to this routine | */ /* %------------------------------% */ *info = 0; step3 = FALSE_; step4 = FALSE_; rstart = FALSE_; orth1 = FALSE_; orth2 = FALSE_; j = *k + 1; ipj = 1; irj = ipj + *n; ivj = irj + *n; } /* %-------------------------------------------------% */ /* | When in reverse communication mode one of: | */ /* | STEP3, STEP4, ORTH1, ORTH2, RSTART | */ /* | will be .true. when .... | */ /* | STEP3: return from computing OP*v_{j}. | */ /* | STEP4: return from computing B-norm of OP*v_{j} | */ /* | ORTH1: return from computing B-norm of r_{j+1} | */ /* | ORTH2: return from computing B-norm of | */ /* | correction to the residual vector. | */ /* | RSTART: return from OP computations needed by | */ /* | sgetv0. | */ /* %-------------------------------------------------% */ if (step3) { goto L50; } if (step4) { goto L60; } if (orth1) { goto L70; } if (orth2) { goto L90; } if (rstart) { goto L30; } /* %-----------------------------% */ /* | Else this is the first step | */ /* %-----------------------------% */ /* %--------------------------------------------------------------% */ /* | | */ /* | A R N O L D I I T E R A T I O N L O O P | */ /* | | */ /* | Note: B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) | */ /* %--------------------------------------------------------------% */ L1000: if (msglvl > 1) { ivout_(&debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: generat" "ing Arnoldi vector number", (ftnlen)40); svout_(&debug_1.logfil, &c__1, rnorm, &debug_1.ndigit, "_naitr: B-no" "rm of the current residual is", (ftnlen)41); } /* %---------------------------------------------------% */ /* | STEP 1: Check if the B norm of j-th residual | */ /* | vector is zero. Equivalent to determing whether | */ /* | an exact j-step Arnoldi factorization is present. | */ /* %---------------------------------------------------% */ betaj = *rnorm; if (*rnorm > 0.f) { goto L40; } /* %---------------------------------------------------% */ /* | Invariant subspace found, generate a new starting | */ /* | vector which is orthogonal to the current Arnoldi | */ /* | basis and continue the iteration. | */ /* %---------------------------------------------------% */ if (msglvl > 0) { ivout_(&debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: ****** " "RESTART AT STEP ******", (ftnlen)37); } /* %---------------------------------------------% */ /* | ITRY is the loop variable that controls the | */ /* | maximum amount of times that a restart is | */ /* | attempted. NRSTRT is used by stat.h | */ /* %---------------------------------------------% */ betaj = 0.f; ++timing_1.nrstrt; itry = 1; L20: rstart = TRUE_; *ido = 0; L30: /* %--------------------------------------% */ /* | If in reverse communication mode and | */ /* | RSTART = .true. flow returns here. | */ /* %--------------------------------------% */ sgetv0_(ido, bmat, &itry, &c_false, n, &j, &v[v_offset], ldv, &resid[1], rnorm, &ipntr[1], &workd[1], &ierr, (ftnlen)1); if (*ido != 99) { goto L9000; } if (ierr < 0) { ++itry; if (itry <= 3) { goto L20; } /* %------------------------------------------------% */ /* | Give up after several restart attempts. | */ /* | Set INFO to the size of the invariant subspace | */ /* | which spans OP and exit. | */ /* %------------------------------------------------% */ *info = j - 1; second_(&t1); timing_1.tnaitr += t1 - t0; *ido = 99; goto L9000; } L40: /* %---------------------------------------------------------% */ /* | STEP 2: v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm | */ /* | Note that p_{j} = B*r_{j-1}. In order to avoid overflow | */ /* | when reciprocating a small RNORM, test against lower | */ /* | machine bound. | */ /* %---------------------------------------------------------% */ scopy_(n, &resid[1], &c__1, &v[j * v_dim1 + 1], &c__1); if (*rnorm >= unfl) { temp1 = 1.f / *rnorm; sscal_(n, &temp1, &v[j * v_dim1 + 1], &c__1); sscal_(n, &temp1, &workd[ipj], &c__1); } else { /* %-----------------------------------------% */ /* | To scale both v_{j} and p_{j} carefully | */ /* | use LAPACK routine SLASCL | */ /* %-----------------------------------------% */ slascl_("General", &i__, &i__, rnorm, &c_b25, n, &c__1, &v[j * v_dim1 + 1], n, &infol, (ftnlen)7); slascl_("General", &i__, &i__, rnorm, &c_b25, n, &c__1, &workd[ipj], n, &infol, (ftnlen)7); } /* %------------------------------------------------------% */ /* | STEP 3: r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} | */ /* | Note that this is not quite yet r_{j}. See STEP 4 | */ /* %------------------------------------------------------% */ step3 = TRUE_; ++timing_1.nopx; second_(&t2); scopy_(n, &v[j * v_dim1 + 1], &c__1, &workd[ivj], &c__1); ipntr[1] = ivj; ipntr[2] = irj; ipntr[3] = ipj; *ido = 1; /* %-----------------------------------% */ /* | Exit in order to compute OP*v_{j} | */ /* %-----------------------------------% */ goto L9000; L50: /* %----------------------------------% */ /* | Back from reverse communication; | */ /* | WORKD(IRJ:IRJ+N-1) := OP*v_{j} | */ /* | if step3 = .true. | */ /* %----------------------------------% */ second_(&t3); timing_1.tmvopx += t3 - t2; step3 = FALSE_; /* %------------------------------------------% */ /* | Put another copy of OP*v_{j} into RESID. | */ /* %------------------------------------------% */ scopy_(n, &workd[irj], &c__1, &resid[1], &c__1); /* %---------------------------------------% */ /* | STEP 4: Finish extending the Arnoldi | */ /* | factorization to length j. | */ /* %---------------------------------------% */ second_(&t2); if (*(unsigned char *)bmat == 'G') { ++timing_1.nbx; step4 = TRUE_; ipntr[1] = irj; ipntr[2] = ipj; *ido = 2; /* %-------------------------------------% */ /* | Exit in order to compute B*OP*v_{j} | */ /* %-------------------------------------% */ goto L9000; } else if (*(unsigned char *)bmat == 'I') { scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1); } L60: /* %----------------------------------% */ /* | Back from reverse communication; | */ /* | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} | */ /* | if step4 = .true. | */ /* %----------------------------------% */ if (*(unsigned char *)bmat == 'G') { second_(&t3); timing_1.tmvbx += t3 - t2; } step4 = FALSE_; /* %-------------------------------------% */ /* | The following is needed for STEP 5. | */ /* | Compute the B-norm of OP*v_{j}. | */ /* %-------------------------------------% */ if (*(unsigned char *)bmat == 'G') { wnorm = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1); wnorm = sqrt((dabs(wnorm))); } else if (*(unsigned char *)bmat == 'I') { wnorm = snrm2_(n, &resid[1], &c__1); } /* %-----------------------------------------% */ /* | Compute the j-th residual corresponding | */ /* | to the j step factorization. | */ /* | Use Classical Gram Schmidt and compute: | */ /* | w_{j} <- V_{j}^T * B * OP * v_{j} | */ /* | r_{j} <- OP*v_{j} - V_{j} * w_{j} | */ /* %-----------------------------------------% */ /* %------------------------------------------% */ /* | Compute the j Fourier coefficients w_{j} | */ /* | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}. | */ /* %------------------------------------------% */ sgemv_("T", n, &j, &c_b25, &v[v_offset], ldv, &workd[ipj], &c__1, &c_b47, &h__[j * h_dim1 + 1], &c__1, (ftnlen)1); /* %--------------------------------------% */ /* | Orthogonalize r_{j} against V_{j}. | */ /* | RESID contains OP*v_{j}. See STEP 3. | */ /* %--------------------------------------% */ sgemv_("N", n, &j, &c_b50, &v[v_offset], ldv, &h__[j * h_dim1 + 1], &c__1, &c_b25, &resid[1], &c__1, (ftnlen)1); if (j > 1) { h__[j + (j - 1) * h_dim1] = betaj; } second_(&t4); orth1 = TRUE_; second_(&t2); if (*(unsigned char *)bmat == 'G') { ++timing_1.nbx; scopy_(n, &resid[1], &c__1, &workd[irj], &c__1); ipntr[1] = irj; ipntr[2] = ipj; *ido = 2; /* %----------------------------------% */ /* | Exit in order to compute B*r_{j} | */ /* %----------------------------------% */ goto L9000; } else if (*(unsigned char *)bmat == 'I') { scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1); } L70: /* %---------------------------------------------------% */ /* | Back from reverse communication if ORTH1 = .true. | */ /* | WORKD(IPJ:IPJ+N-1) := B*r_{j}. | */ /* %---------------------------------------------------% */ if (*(unsigned char *)bmat == 'G') { second_(&t3); timing_1.tmvbx += t3 - t2; } orth1 = FALSE_; /* %------------------------------% */ /* | Compute the B-norm of r_{j}. | */ /* %------------------------------% */ if (*(unsigned char *)bmat == 'G') { *rnorm = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1); *rnorm = sqrt((dabs(*rnorm))); } else if (*(unsigned char *)bmat == 'I') { *rnorm = snrm2_(n, &resid[1], &c__1); } /* %-----------------------------------------------------------% */ /* | STEP 5: Re-orthogonalization / Iterative refinement phase | */ /* | Maximum NITER_ITREF tries. | */ /* | | */ /* | s = V_{j}^T * B * r_{j} | */ /* | r_{j} = r_{j} - V_{j}*s | */ /* | alphaj = alphaj + s_{j} | */ /* | | */ /* | The stopping criteria used for iterative refinement is | */ /* | discussed in Parlett's book SEP, page 107 and in Gragg & | */ /* | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990. | */ /* | Determine if we need to correct the residual. The goal is | */ /* | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} || | */ /* | The following test determines whether the sine of the | */ /* | angle between OP*x and the computed residual is less | */ /* | than or equal to 0.717. | */ /* %-----------------------------------------------------------% */ if (*rnorm > wnorm * .717f) { goto L100; } iter = 0; ++timing_1.nrorth; /* %---------------------------------------------------% */ /* | Enter the Iterative refinement phase. If further | */ /* | refinement is necessary, loop back here. The loop | */ /* | variable is ITER. Perform a step of Classical | */ /* | Gram-Schmidt using all the Arnoldi vectors V_{j} | */ /* %---------------------------------------------------% */ L80: if (msglvl > 2) { xtemp[0] = wnorm; xtemp[1] = *rnorm; svout_(&debug_1.logfil, &c__2, xtemp, &debug_1.ndigit, "_naitr: re-o" "rthonalization; wnorm and rnorm are", (ftnlen)47); svout_(&debug_1.logfil, &j, &h__[j * h_dim1 + 1], &debug_1.ndigit, "_naitr: j-th column of H", (ftnlen)24); } /* %----------------------------------------------------% */ /* | Compute V_{j}^T * B * r_{j}. | */ /* | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). | */ /* %----------------------------------------------------% */ sgemv_("T", n, &j, &c_b25, &v[v_offset], ldv, &workd[ipj], &c__1, &c_b47, &workd[irj], &c__1, (ftnlen)1); /* %---------------------------------------------% */ /* | Compute the correction to the residual: | */ /* | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). | */ /* | The correction to H is v(:,1:J)*H(1:J,1:J) | */ /* | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j. | */ /* %---------------------------------------------% */ sgemv_("N", n, &j, &c_b50, &v[v_offset], ldv, &workd[irj], &c__1, &c_b25, &resid[1], &c__1, (ftnlen)1); saxpy_(&j, &c_b25, &workd[irj], &c__1, &h__[j * h_dim1 + 1], &c__1); orth2 = TRUE_; second_(&t2); if (*(unsigned char *)bmat == 'G') { ++timing_1.nbx; scopy_(n, &resid[1], &c__1, &workd[irj], &c__1); ipntr[1] = irj; ipntr[2] = ipj; *ido = 2; /* %-----------------------------------% */ /* | Exit in order to compute B*r_{j}. | */ /* | r_{j} is the corrected residual. | */ /* %-----------------------------------% */ goto L9000; } else if (*(unsigned char *)bmat == 'I') { scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1); } L90: /* %---------------------------------------------------% */ /* | Back from reverse communication if ORTH2 = .true. | */ /* %---------------------------------------------------% */ if (*(unsigned char *)bmat == 'G') { second_(&t3); timing_1.tmvbx += t3 - t2; } /* %-----------------------------------------------------% */ /* | Compute the B-norm of the corrected residual r_{j}. | */ /* %-----------------------------------------------------% */ if (*(unsigned char *)bmat == 'G') { rnorm1 = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1); rnorm1 = sqrt((dabs(rnorm1))); } else if (*(unsigned char *)bmat == 'I') { rnorm1 = snrm2_(n, &resid[1], &c__1); } if (msglvl > 0 && iter > 0) { ivout_(&debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: Iterati" "ve refinement for Arnoldi residual", (ftnlen)49); if (msglvl > 2) { xtemp[0] = *rnorm; xtemp[1] = rnorm1; svout_(&debug_1.logfil, &c__2, xtemp, &debug_1.ndigit, "_naitr: " "iterative refinement ; rnorm and rnorm1 are", (ftnlen)51); } } /* %-----------------------------------------% */ /* | Determine if we need to perform another | */ /* | step of re-orthogonalization. | */ /* %-----------------------------------------% */ if (rnorm1 > *rnorm * .717f) { /* %---------------------------------------% */ /* | No need for further refinement. | */ /* | The cosine of the angle between the | */ /* | corrected residual vector and the old | */ /* | residual vector is greater than 0.717 | */ /* | In other words the corrected residual | */ /* | and the old residual vector share an | */ /* | angle of less than arcCOS(0.717) | */ /* %---------------------------------------% */ *rnorm = rnorm1; } else { /* %-------------------------------------------% */ /* | Another step of iterative refinement step | */ /* | is required. NITREF is used by stat.h | */ /* %-------------------------------------------% */ ++timing_1.nitref; *rnorm = rnorm1; ++iter; if (iter <= 1) { goto L80; } /* %-------------------------------------------------% */ /* | Otherwise RESID is numerically in the span of V | */ /* %-------------------------------------------------% */ i__1 = *n; for (jj = 1; jj <= i__1; ++jj) { resid[jj] = 0.f; /* L95: */ } *rnorm = 0.f; } /* %----------------------------------------------% */ /* | Branch here directly if iterative refinement | */ /* | wasn't necessary or after at most NITER_REF | */ /* | steps of iterative refinement. | */ /* %----------------------------------------------% */ L100: rstart = FALSE_; orth2 = FALSE_; second_(&t5); timing_1.titref += t5 - t4; /* %------------------------------------% */ /* | STEP 6: Update j = j+1; Continue | */ /* %------------------------------------% */ ++j; if (j > *k + *np) { second_(&t1); timing_1.tnaitr += t1 - t0; *ido = 99; i__1 = *k + *np - 1; for (i__ = max(1,*k); i__ <= i__1; ++i__) { /* %--------------------------------------------% */ /* | Check for splitting and deflation. | */ /* | Use a standard test as in the QR algorithm | */ /* | REFERENCE: LAPACK subroutine slahqr | */ /* %--------------------------------------------% */ tst1 = (r__1 = h__[i__ + i__ * h_dim1], dabs(r__1)) + (r__2 = h__[ i__ + 1 + (i__ + 1) * h_dim1], dabs(r__2)); if (tst1 == 0.f) { i__2 = *k + *np; tst1 = slanhs_("1", &i__2, &h__[h_offset], ldh, &workd[*n + 1] , (ftnlen)1); } /* Computing MAX */ r__2 = ulp * tst1; if ((r__1 = h__[i__ + 1 + i__ * h_dim1], dabs(r__1)) <= dmax(r__2, smlnum)) { h__[i__ + 1 + i__ * h_dim1] = 0.f; } /* L110: */ } if (msglvl > 2) { i__1 = *k + *np; i__2 = *k + *np; smout_(&debug_1.logfil, &i__1, &i__2, &h__[h_offset], ldh, & debug_1.ndigit, "_naitr: Final upper Hessenberg matrix H" " of order K+NP", (ftnlen)53); } goto L9000; } /* %--------------------------------------------------------% */ /* | Loop back to extend the factorization by another step. | */ /* %--------------------------------------------------------% */ goto L1000; /* %---------------------------------------------------------------% */ /* | | */ /* | E N D O F M A I N I T E R A T I O N L O O P | */ /* | | */ /* %---------------------------------------------------------------% */ L9000: return 0; /* %---------------% */ /* | End of snaitr | */ /* %---------------% */ } /* snaitr_ */
/* Subroutine */ int ssaup2_(integer *ido, char *bmat, integer *n, char * which, integer *nev, integer *np, real *tol, real *resid, integer * mode, integer *iupd, integer *ishift, integer *mxiter, real *v, integer *ldv, real *h__, integer *ldh, real *ritz, real *bounds, real *q, integer *ldq, real *workl, integer *ipntr, real *workd, integer * info, ftnlen bmat_len, ftnlen which_len) { /* System generated locals */ integer h_dim1, h_offset, q_dim1, q_offset, v_dim1, v_offset, i__1, i__2, i__3; real r__1, r__2, r__3; doublereal d__1; /* Builtin functions */ double pow_dd(doublereal *, doublereal *); integer s_cmp(char *, char *, ftnlen, ftnlen); /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); double sqrt(doublereal); /* Local variables */ static integer j; static real t0, t1, t2, t3; static integer kp[3], np0, nev0; static real eps23; static integer ierr, iter; static real temp; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); static integer nevd2; static logical getv0; static integer nevm2; extern doublereal snrm2_(integer *, real *, integer *); static logical cnorm; static integer nconv; static logical initv; static real rnorm; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer * ), ivout_(integer *, integer *, integer *, integer *, char *, ftnlen), svout_(integer *, integer *, real *, integer *, char *, ftnlen), sgetv0_(integer *, char *, integer *, logical *, integer *, integer *, real *, integer *, real *, real *, integer *, real * , integer *, ftnlen); static integer nevbef; extern doublereal slamch_(char *, ftnlen); extern /* Subroutine */ int second_(real *); static logical update; static char wprime[2]; static logical ushift; static integer kplusp, msglvl, nptemp; extern /* Subroutine */ int ssaitr_(integer *, char *, integer *, integer *, integer *, integer *, real *, real *, real *, integer *, real * , integer *, integer *, real *, integer *, ftnlen), ssconv_( integer *, real *, real *, real *, integer *), sseigt_(real *, integer *, real *, integer *, real *, real *, real *, integer *), ssgets_(integer *, char *, integer *, integer *, real *, real *, real *, ftnlen), ssapps_(integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *), ssortr_(char *, logical *, integer *, real *, real *, ftnlen); /* %----------------------------------------------------% */ /* | Include files for debugging and timing information | */ /* %----------------------------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: debug.h SID: 2.3 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %---------------------------------% */ /* | See debug.doc for documentation | */ /* %---------------------------------% */ /* %------------------% */ /* | Scalar Arguments | */ /* %------------------% */ /* %--------------------------------% */ /* | See stat.doc for documentation | */ /* %--------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: stat.h SID: 2.2 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %-----------------% */ /* | Array Arguments | */ /* %-----------------% */ /* %------------% */ /* | Parameters | */ /* %------------% */ /* %---------------% */ /* | Local Scalars | */ /* %---------------% */ /* %----------------------% */ /* | External Subroutines | */ /* %----------------------% */ /* %--------------------% */ /* | External Functions | */ /* %--------------------% */ /* %---------------------% */ /* | Intrinsic Functions | */ /* %---------------------% */ /* %-----------------------% */ /* | Executable Statements | */ /* %-----------------------% */ /* Parameter adjustments */ --workd; --resid; --workl; --bounds; --ritz; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --ipntr; /* Function Body */ if (*ido == 0) { /* %-------------------------------% */ /* | Initialize timing statistics | */ /* | & message level for debugging | */ /* %-------------------------------% */ second_(&t0); msglvl = debug_1.msaup2; /* %---------------------------------% */ /* | Set machine dependent constant. | */ /* %---------------------------------% */ eps23 = slamch_("Epsilon-Machine", (ftnlen)15); d__1 = (doublereal) eps23; eps23 = pow_dd(&d__1, &c_b3); /* %-------------------------------------% */ /* | nev0 and np0 are integer variables | */ /* | hold the initial values of NEV & NP | */ /* %-------------------------------------% */ nev0 = *nev; np0 = *np; /* %-------------------------------------% */ /* | kplusp is the bound on the largest | */ /* | Lanczos factorization built. | */ /* | nconv is the current number of | */ /* | "converged" eigenvlues. | */ /* | iter is the counter on the current | */ /* | iteration step. | */ /* %-------------------------------------% */ kplusp = nev0 + np0; nconv = 0; iter = 0; /* %--------------------------------------------% */ /* | Set flags for computing the first NEV steps | */ /* | of the Lanczos factorization. | */ /* %--------------------------------------------% */ getv0 = TRUE_; update = FALSE_; ushift = FALSE_; cnorm = FALSE_; if (*info != 0) { /* %--------------------------------------------% */ /* | User provides the initial residual vector. | */ /* %--------------------------------------------% */ initv = TRUE_; *info = 0; } else { initv = FALSE_; } } /* %---------------------------------------------% */ /* | Get a possibly random starting vector and | */ /* | force it into the range of the operator OP. | */ /* %---------------------------------------------% */ /* L10: */ if (getv0) { sgetv0_(ido, bmat, &c__1, &initv, n, &c__1, &v[v_offset], ldv, &resid[ 1], &rnorm, &ipntr[1], &workd[1], info, (ftnlen)1); if (*ido != 99) { goto L9000; } if (rnorm == 0.f) { /* %-----------------------------------------% */ /* | The initial vector is zero. Error exit. | */ /* %-----------------------------------------% */ *info = -9; goto L1200; } getv0 = FALSE_; *ido = 0; } /* %------------------------------------------------------------% */ /* | Back from reverse communication: continue with update step | */ /* %------------------------------------------------------------% */ if (update) { goto L20; } /* %-------------------------------------------% */ /* | Back from computing user specified shifts | */ /* %-------------------------------------------% */ if (ushift) { goto L50; } /* %-------------------------------------% */ /* | Back from computing residual norm | */ /* | at the end of the current iteration | */ /* %-------------------------------------% */ if (cnorm) { goto L100; } /* %----------------------------------------------------------% */ /* | Compute the first NEV steps of the Lanczos factorization | */ /* %----------------------------------------------------------% */ ssaitr_(ido, bmat, n, &c__0, &nev0, mode, &resid[1], &rnorm, &v[v_offset], ldv, &h__[h_offset], ldh, &ipntr[1], &workd[1], info, (ftnlen)1); /* %---------------------------------------------------% */ /* | ido .ne. 99 implies use of reverse communication | */ /* | to compute operations involving OP and possibly B | */ /* %---------------------------------------------------% */ if (*ido != 99) { goto L9000; } if (*info > 0) { /* %-----------------------------------------------------% */ /* | ssaitr was unable to build an Lanczos factorization | */ /* | of length NEV0. INFO is returned with the size of | */ /* | the factorization built. Exit main loop. | */ /* %-----------------------------------------------------% */ *np = *info; *mxiter = iter; *info = -9999; goto L1200; } /* %--------------------------------------------------------------% */ /* | | */ /* | M A I N LANCZOS I T E R A T I O N L O O P | */ /* | Each iteration implicitly restarts the Lanczos | */ /* | factorization in place. | */ /* | | */ /* %--------------------------------------------------------------% */ L1000: ++iter; if (msglvl > 0) { ivout_(&debug_1.logfil, &c__1, &iter, &debug_1.ndigit, "_saup2: ****" " Start of major iteration number ****", (ftnlen)49); } if (msglvl > 1) { ivout_(&debug_1.logfil, &c__1, nev, &debug_1.ndigit, "_saup2: The le" "ngth of the current Lanczos factorization", (ftnlen)55); ivout_(&debug_1.logfil, &c__1, np, &debug_1.ndigit, "_saup2: Extend " "the Lanczos factorization by", (ftnlen)43); } /* %------------------------------------------------------------% */ /* | Compute NP additional steps of the Lanczos factorization. | */ /* %------------------------------------------------------------% */ *ido = 0; L20: update = TRUE_; ssaitr_(ido, bmat, n, nev, np, mode, &resid[1], &rnorm, &v[v_offset], ldv, &h__[h_offset], ldh, &ipntr[1], &workd[1], info, (ftnlen)1); /* %---------------------------------------------------% */ /* | ido .ne. 99 implies use of reverse communication | */ /* | to compute operations involving OP and possibly B | */ /* %---------------------------------------------------% */ if (*ido != 99) { goto L9000; } if (*info > 0) { /* %-----------------------------------------------------% */ /* | ssaitr was unable to build an Lanczos factorization | */ /* | of length NEV0+NP0. INFO is returned with the size | */ /* | of the factorization built. Exit main loop. | */ /* %-----------------------------------------------------% */ *np = *info; *mxiter = iter; *info = -9999; goto L1200; } update = FALSE_; if (msglvl > 1) { svout_(&debug_1.logfil, &c__1, &rnorm, &debug_1.ndigit, "_saup2: Cur" "rent B-norm of residual for factorization", (ftnlen)52); } /* %--------------------------------------------------------% */ /* | Compute the eigenvalues and corresponding error bounds | */ /* | of the current symmetric tridiagonal matrix. | */ /* %--------------------------------------------------------% */ sseigt_(&rnorm, &kplusp, &h__[h_offset], ldh, &ritz[1], &bounds[1], & workl[1], &ierr); if (ierr != 0) { *info = -8; goto L1200; } /* %----------------------------------------------------% */ /* | Make a copy of eigenvalues and corresponding error | */ /* | bounds obtained from _seigt. | */ /* %----------------------------------------------------% */ scopy_(&kplusp, &ritz[1], &c__1, &workl[kplusp + 1], &c__1); scopy_(&kplusp, &bounds[1], &c__1, &workl[(kplusp << 1) + 1], &c__1); /* %---------------------------------------------------% */ /* | Select the wanted Ritz values and their bounds | */ /* | to be used in the convergence test. | */ /* | The selection is based on the requested number of | */ /* | eigenvalues instead of the current NEV and NP to | */ /* | prevent possible misconvergence. | */ /* | * Wanted Ritz values := RITZ(NP+1:NEV+NP) | */ /* | * Shifts := RITZ(1:NP) := WORKL(1:NP) | */ /* %---------------------------------------------------% */ *nev = nev0; *np = np0; ssgets_(ishift, which, nev, np, &ritz[1], &bounds[1], &workl[1], (ftnlen) 2); /* %-------------------% */ /* | Convergence test. | */ /* %-------------------% */ scopy_(nev, &bounds[*np + 1], &c__1, &workl[*np + 1], &c__1); ssconv_(nev, &ritz[*np + 1], &workl[*np + 1], tol, &nconv); if (msglvl > 2) { kp[0] = *nev; kp[1] = *np; kp[2] = nconv; ivout_(&debug_1.logfil, &c__3, kp, &debug_1.ndigit, "_saup2: NEV, NP" ", NCONV are", (ftnlen)26); svout_(&debug_1.logfil, &kplusp, &ritz[1], &debug_1.ndigit, "_saup2:" " The eigenvalues of H", (ftnlen)28); svout_(&debug_1.logfil, &kplusp, &bounds[1], &debug_1.ndigit, "_saup" "2: Ritz estimates of the current NCV Ritz values", (ftnlen)53) ; } /* %---------------------------------------------------------% */ /* | Count the number of unwanted Ritz values that have zero | */ /* | Ritz estimates. If any Ritz estimates are equal to zero | */ /* | then a leading block of H of order equal to at least | */ /* | the number of Ritz values with zero Ritz estimates has | */ /* | split off. None of these Ritz values may be removed by | */ /* | shifting. Decrease NP the number of shifts to apply. If | */ /* | no shifts may be applied, then prepare to exit | */ /* %---------------------------------------------------------% */ nptemp = *np; i__1 = nptemp; for (j = 1; j <= i__1; ++j) { if (bounds[j] == 0.f) { --(*np); ++(*nev); } /* L30: */ } if (nconv >= nev0 || iter > *mxiter || *np == 0) { /* %------------------------------------------------% */ /* | Prepare to exit. Put the converged Ritz values | */ /* | and corresponding bounds in RITZ(1:NCONV) and | */ /* | BOUNDS(1:NCONV) respectively. Then sort. Be | */ /* | careful when NCONV > NP since we don't want to | */ /* | swap overlapping locations. | */ /* %------------------------------------------------% */ if (s_cmp(which, "BE", (ftnlen)2, (ftnlen)2) == 0) { /* %-----------------------------------------------------% */ /* | Both ends of the spectrum are requested. | */ /* | Sort the eigenvalues into algebraically decreasing | */ /* | order first then swap low end of the spectrum next | */ /* | to high end in appropriate locations. | */ /* | NOTE: when np < floor(nev/2) be careful not to swap | */ /* | overlapping locations. | */ /* %-----------------------------------------------------% */ s_copy(wprime, "SA", (ftnlen)2, (ftnlen)2); ssortr_(wprime, &c_true, &kplusp, &ritz[1], &bounds[1], (ftnlen)2) ; nevd2 = nev0 / 2; nevm2 = nev0 - nevd2; if (*nev > 1) { i__1 = min(nevd2,*np); /* Computing MAX */ i__2 = kplusp - nevd2 + 1, i__3 = kplusp - *np + 1; sswap_(&i__1, &ritz[nevm2 + 1], &c__1, &ritz[max(i__2,i__3)], &c__1); i__1 = min(nevd2,*np); /* Computing MAX */ i__2 = kplusp - nevd2 + 1, i__3 = kplusp - *np + 1; sswap_(&i__1, &bounds[nevm2 + 1], &c__1, &bounds[max(i__2, i__3)], &c__1); } } else { /* %--------------------------------------------------% */ /* | LM, SM, LA, SA case. | */ /* | Sort the eigenvalues of H into the an order that | */ /* | is opposite to WHICH, and apply the resulting | */ /* | order to BOUNDS. The eigenvalues are sorted so | */ /* | that the wanted part are always within the first | */ /* | NEV locations. | */ /* %--------------------------------------------------% */ if (s_cmp(which, "LM", (ftnlen)2, (ftnlen)2) == 0) { s_copy(wprime, "SM", (ftnlen)2, (ftnlen)2); } if (s_cmp(which, "SM", (ftnlen)2, (ftnlen)2) == 0) { s_copy(wprime, "LM", (ftnlen)2, (ftnlen)2); } if (s_cmp(which, "LA", (ftnlen)2, (ftnlen)2) == 0) { s_copy(wprime, "SA", (ftnlen)2, (ftnlen)2); } if (s_cmp(which, "SA", (ftnlen)2, (ftnlen)2) == 0) { s_copy(wprime, "LA", (ftnlen)2, (ftnlen)2); } ssortr_(wprime, &c_true, &kplusp, &ritz[1], &bounds[1], (ftnlen)2) ; } /* %--------------------------------------------------% */ /* | Scale the Ritz estimate of each Ritz value | */ /* | by 1 / max(eps23,magnitude of the Ritz value). | */ /* %--------------------------------------------------% */ i__1 = nev0; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ r__2 = eps23, r__3 = (r__1 = ritz[j], dabs(r__1)); temp = dmax(r__2,r__3); bounds[j] /= temp; /* L35: */ } /* %----------------------------------------------------% */ /* | Sort the Ritz values according to the scaled Ritz | */ /* | esitmates. This will push all the converged ones | */ /* | towards the front of ritzr, ritzi, bounds | */ /* | (in the case when NCONV < NEV.) | */ /* %----------------------------------------------------% */ s_copy(wprime, "LA", (ftnlen)2, (ftnlen)2); ssortr_(wprime, &c_true, &nev0, &bounds[1], &ritz[1], (ftnlen)2); /* %----------------------------------------------% */ /* | Scale the Ritz estimate back to its original | */ /* | value. | */ /* %----------------------------------------------% */ i__1 = nev0; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ r__2 = eps23, r__3 = (r__1 = ritz[j], dabs(r__1)); temp = dmax(r__2,r__3); bounds[j] *= temp; /* L40: */ } /* %--------------------------------------------------% */ /* | Sort the "converged" Ritz values again so that | */ /* | the "threshold" values and their associated Ritz | */ /* | estimates appear at the appropriate position in | */ /* | ritz and bound. | */ /* %--------------------------------------------------% */ if (s_cmp(which, "BE", (ftnlen)2, (ftnlen)2) == 0) { /* %------------------------------------------------% */ /* | Sort the "converged" Ritz values in increasing | */ /* | order. The "threshold" values are in the | */ /* | middle. | */ /* %------------------------------------------------% */ s_copy(wprime, "LA", (ftnlen)2, (ftnlen)2); ssortr_(wprime, &c_true, &nconv, &ritz[1], &bounds[1], (ftnlen)2); } else { /* %----------------------------------------------% */ /* | In LM, SM, LA, SA case, sort the "converged" | */ /* | Ritz values according to WHICH so that the | */ /* | "threshold" value appears at the front of | */ /* | ritz. | */ /* %----------------------------------------------% */ ssortr_(which, &c_true, &nconv, &ritz[1], &bounds[1], (ftnlen)2); } /* %------------------------------------------% */ /* | Use h( 1,1 ) as storage to communicate | */ /* | rnorm to _seupd if needed | */ /* %------------------------------------------% */ h__[h_dim1 + 1] = rnorm; if (msglvl > 1) { svout_(&debug_1.logfil, &kplusp, &ritz[1], &debug_1.ndigit, "_sa" "up2: Sorted Ritz values.", (ftnlen)27); svout_(&debug_1.logfil, &kplusp, &bounds[1], &debug_1.ndigit, "_saup2: Sorted ritz estimates.", (ftnlen)30); } /* %------------------------------------% */ /* | Max iterations have been exceeded. | */ /* %------------------------------------% */ if (iter > *mxiter && nconv < *nev) { *info = 1; } /* %---------------------% */ /* | No shifts to apply. | */ /* %---------------------% */ if (*np == 0 && nconv < nev0) { *info = 2; } *np = nconv; goto L1100; } else if (nconv < *nev && *ishift == 1) { /* %---------------------------------------------------% */ /* | Do not have all the requested eigenvalues yet. | */ /* | To prevent possible stagnation, adjust the number | */ /* | of Ritz values and the shifts. | */ /* %---------------------------------------------------% */ nevbef = *nev; /* Computing MIN */ i__1 = nconv, i__2 = *np / 2; *nev += min(i__1,i__2); if (*nev == 1 && kplusp >= 6) { *nev = kplusp / 2; } else if (*nev == 1 && kplusp > 2) { *nev = 2; } *np = kplusp - *nev; /* %---------------------------------------% */ /* | If the size of NEV was just increased | */ /* | resort the eigenvalues. | */ /* %---------------------------------------% */ if (nevbef < *nev) { ssgets_(ishift, which, nev, np, &ritz[1], &bounds[1], &workl[1], ( ftnlen)2); } } if (msglvl > 0) { ivout_(&debug_1.logfil, &c__1, &nconv, &debug_1.ndigit, "_saup2: no." " of \"converged\" Ritz values at this iter.", (ftnlen)52); if (msglvl > 1) { kp[0] = *nev; kp[1] = *np; ivout_(&debug_1.logfil, &c__2, kp, &debug_1.ndigit, "_saup2: NEV" " and NP are", (ftnlen)22); svout_(&debug_1.logfil, nev, &ritz[*np + 1], &debug_1.ndigit, "_saup2: \"wanted\" Ritz values.", (ftnlen)29); svout_(&debug_1.logfil, nev, &bounds[*np + 1], &debug_1.ndigit, "_saup2: Ritz estimates of the \"wanted\" values ", ( ftnlen)46); } } if (*ishift == 0) { /* %-----------------------------------------------------% */ /* | User specified shifts: reverse communication to | */ /* | compute the shifts. They are returned in the first | */ /* | NP locations of WORKL. | */ /* %-----------------------------------------------------% */ ushift = TRUE_; *ido = 3; goto L9000; } L50: /* %------------------------------------% */ /* | Back from reverse communication; | */ /* | User specified shifts are returned | */ /* | in WORKL(1:*NP) | */ /* %------------------------------------% */ ushift = FALSE_; /* %---------------------------------------------------------% */ /* | Move the NP shifts to the first NP locations of RITZ to | */ /* | free up WORKL. This is for the non-exact shift case; | */ /* | in the exact shift case, ssgets already handles this. | */ /* %---------------------------------------------------------% */ if (*ishift == 0) { scopy_(np, &workl[1], &c__1, &ritz[1], &c__1); } if (msglvl > 2) { ivout_(&debug_1.logfil, &c__1, np, &debug_1.ndigit, "_saup2: The num" "ber of shifts to apply ", (ftnlen)38); svout_(&debug_1.logfil, np, &workl[1], &debug_1.ndigit, "_saup2: shi" "fts selected", (ftnlen)23); if (*ishift == 1) { svout_(&debug_1.logfil, np, &bounds[1], &debug_1.ndigit, "_saup2" ": corresponding Ritz estimates", (ftnlen)36); } } /* %---------------------------------------------------------% */ /* | Apply the NP0 implicit shifts by QR bulge chasing. | */ /* | Each shift is applied to the entire tridiagonal matrix. | */ /* | The first 2*N locations of WORKD are used as workspace. | */ /* | After ssapps is done, we have a Lanczos | */ /* | factorization of length NEV. | */ /* %---------------------------------------------------------% */ ssapps_(n, nev, np, &ritz[1], &v[v_offset], ldv, &h__[h_offset], ldh, & resid[1], &q[q_offset], ldq, &workd[1]); /* %---------------------------------------------% */ /* | Compute the B-norm of the updated residual. | */ /* | Keep B*RESID in WORKD(1:N) to be used in | */ /* | the first step of the next call to ssaitr. | */ /* %---------------------------------------------% */ cnorm = TRUE_; second_(&t2); if (*(unsigned char *)bmat == 'G') { ++timing_1.nbx; scopy_(n, &resid[1], &c__1, &workd[*n + 1], &c__1); ipntr[1] = *n + 1; ipntr[2] = 1; *ido = 2; /* %----------------------------------% */ /* | Exit in order to compute B*RESID | */ /* %----------------------------------% */ goto L9000; } else if (*(unsigned char *)bmat == 'I') { scopy_(n, &resid[1], &c__1, &workd[1], &c__1); } L100: /* %----------------------------------% */ /* | Back from reverse communication; | */ /* | WORKD(1:N) := B*RESID | */ /* %----------------------------------% */ if (*(unsigned char *)bmat == 'G') { second_(&t3); timing_1.tmvbx += t3 - t2; } if (*(unsigned char *)bmat == 'G') { rnorm = sdot_(n, &resid[1], &c__1, &workd[1], &c__1); rnorm = sqrt((dabs(rnorm))); } else if (*(unsigned char *)bmat == 'I') { rnorm = snrm2_(n, &resid[1], &c__1); } cnorm = FALSE_; /* L130: */ if (msglvl > 2) { svout_(&debug_1.logfil, &c__1, &rnorm, &debug_1.ndigit, "_saup2: B-n" "orm of residual for NEV factorization", (ftnlen)48); svout_(&debug_1.logfil, nev, &h__[(h_dim1 << 1) + 1], &debug_1.ndigit, "_saup2: main diagonal of compressed H matrix", (ftnlen)44); i__1 = *nev - 1; svout_(&debug_1.logfil, &i__1, &h__[h_dim1 + 2], &debug_1.ndigit, "_saup2: subdiagonal of compressed H matrix", (ftnlen)42); } goto L1000; /* %---------------------------------------------------------------% */ /* | | */ /* | E N D O F M A I N I T E R A T I O N L O O P | */ /* | | */ /* %---------------------------------------------------------------% */ L1100: *mxiter = iter; *nev = nconv; L1200: *ido = 99; /* %------------% */ /* | Error exit | */ /* %------------% */ second_(&t1); timing_1.tsaup2 = t1 - t0; L9000: return 0; /* %---------------% */ /* | End of ssaup2 | */ /* %---------------% */ } /* ssaup2_ */