static void compute_resnorms(Complex_Z *V, Complex_Z *W, Complex_Z *hVecs, double *hVals, int basisSize, double *blockNorms, int *iev, int left, int right, void *rwork, primme_params *primme) { int i; /* Loop variable */ int numResiduals; /* Number of residual vectors to be computed */ double *dwork = (double *) rwork; /* pointer casting rwork to double */ Complex_Z ztmp; /* temp var holding shift */ Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00}; /* constants */ numResiduals = right - left + 1; /* We want to compute residuals r = Ax-hVal*x for the Ritz vectors x */ /* Eqivalently, r = A*V*hVec - hval*V*hVec = W*hVec - hVal*V*hVec. */ /* Compute the Ritz vectors */ Num_gemm_zprimme("N", "N", primme->nLocal, numResiduals, basisSize, tpone, V, primme->nLocal, hVecs, basisSize, tzero, &V[primme->nLocal*(basisSize+left)], primme->nLocal); /* Compute W*hVecs */ Num_gemm_zprimme("N", "N", primme->nLocal, numResiduals, basisSize, tpone, W, primme->nLocal, hVecs, basisSize, tzero, &W[primme->nLocal*(basisSize+left)], primme->nLocal); /* Compute the residuals */ for (i=left; i <= right; i++) { { ztmp.r = -hVals[iev[i]]; ztmp.i = 0.0L; } Num_axpy_zprimme(primme->nLocal, ztmp, &V[primme->nLocal*(basisSize+i)], 1, &W[primme->nLocal*(basisSize+i)], 1); } /* Compute the residual norms */ for (i=left; i <= right; i++) { ztmp = Num_dot_zprimme(primme->nLocal, &W[primme->nLocal*(basisSize+i)], 1, &W[primme->nLocal*(basisSize+i)] , 1); dwork[i] = ztmp.r; } (*primme->globalSumDouble)(&dwork[left], &blockNorms[left], &numResiduals, primme); for (i=left; i <= right; i++) { blockNorms[i] = sqrt(blockNorms[i]); } }
static void apply_projected_matrix(Complex_Z *v, double shift, Complex_Z *Q, int dimQ, Complex_Z *result, Complex_Z *rwork, primme_params *primme) { int ONE = 1; /* For passing it by reference in matrixMatvec */ Complex_Z ztmp; (*primme->matrixMatvec)(v, result, &ONE, primme); {ztmp.r = -shift; ztmp.i = 0.0L;} Num_axpy_zprimme(primme->nLocal, ztmp, v, 1, result, 1); if (dimQ > 0) apply_projector(Q, dimQ, result, rwork, primme); primme->stats.numMatvecs += 1; }
static void Olsen_preconditioner_block(Complex_Z *r, Complex_Z *x, int blockSize, Complex_Z *rwork, primme_params *primme) { int blockIndex, count; Complex_Z alpha; Complex_Z *Kinvx, *xKinvx, *xKinvr, *xKinvx_local, *xKinvr_local; Complex_Z ztmp; Complex_Z tzero = {+0.0e+00,+0.0e00}; //------------------------------------------------------------------ // Subdivide workspace //------------------------------------------------------------------ Kinvx = rwork; xKinvx_local = Kinvx + primme->nLocal*blockSize; xKinvr_local = xKinvx_local + blockSize; xKinvx = xKinvr_local + blockSize; xKinvr = xKinvx + blockSize; //------------------------------------------------------------------ // Compute K^{-1}x for block x. Kinvx memory requirement (blockSize*nLocal) //------------------------------------------------------------------ apply_preconditioner_block(x, Kinvx, blockSize, primme ); //------------------------------------------------------------------ // Compute local x^TK^{-1}x and x^TK^{-1}r = (K^{-1}x)^Tr for each vector //------------------------------------------------------------------ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { xKinvx_local[blockIndex] = Num_dot_zprimme(primme->nLocal, &x[primme->nLocal*blockIndex],1, &Kinvx[primme->nLocal*blockIndex],1); xKinvr_local[blockIndex] = Num_dot_zprimme(primme->nLocal, &Kinvx[primme->nLocal*blockIndex],1, &r[primme->nLocal*blockIndex],1); } count = 4*blockSize; (*primme->globalSumDouble)(xKinvx_local, xKinvx, &count, primme); //------------------------------------------------------------------ // Compute K^{-1}r //------------------------------------------------------------------ apply_preconditioner_block(r, x, blockSize, primme ); //------------------------------------------------------------------ // Compute K^(-1)r - ( xKinvr/xKinvx ) K^(-1)r for each vector //------------------------------------------------------------------ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { if (z_abs_primme(xKinvx[blockIndex]) > 0.0L) { ztmp.r = -xKinvr[blockIndex].r; ztmp.i = -xKinvr[blockIndex].i; z_div_primme(&alpha, &ztmp, &xKinvx[blockIndex]); } else alpha = tzero; Num_axpy_zprimme(primme->nLocal,alpha,&Kinvx[primme->nLocal*blockIndex], 1, &x[primme->nLocal*blockIndex],1); } //for } // of Olsen_preconditiner_block
int solve_correction_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *evecs, Complex_Z *evecsHat, Complex_Z *UDU, int *ipivot, double *lockedEvals, int numLocked, int numConvergedStored, double *ritzVals, double *prevRitzVals, int *flags, int basisSize, double *blockNorms, int *iev, int blockSize, double eresTol, double machEps, double aNormEstimate, Complex_Z *rwork, int *iwork, int rworkSize, primme_params *primme) { int blockIndex; /* Loop index. Ranges from 0..blockSize-1. */ int ritzIndex; /* Ritz value index blockIndex corresponds to. */ /* Possible values range from 0..basisSize-1. */ int sortedIndex; /* Ritz value index in sortedRitzVals, blockIndex */ /* corresponds to. Range 0..numLocked+basisSize-1 */ int neededRsize; /* Needed size for rwork. If not enough return */ int linSolverRWorkSize; /* Size of the linSolverRWork array. */ int *ilev; /* Array of size blockSize. Maps the target Ritz */ /* values to their positions in the sortedEvals */ /* array. */ int sizeLprojector; /* Sizes of the various left/right projectors */ int sizeRprojectorQ; /* These will be 0/1/or numOrthConstr+numLocked */ int sizeRprojectorX; /* or numOrthConstr+numConvergedStored w/o locking*/ static int numPrevRitzVals = 0; /* Size of prevRitzVals */ int ret; /* Return code. */ Complex_Z *r, *x, *sol; /* Residual, Ritz vector, and correction. */ Complex_Z *linSolverRWork;/* Workspace needed by linear solver. */ double *sortedRitzVals; /* Sorted array of current and converged Ritz */ /* values. Size of array is numLocked+basisSize. */ double *blockOfShifts; /* Shifts for (A-shiftI) or (if needed) (K-shiftI)*/ double *approxOlsenEps; /* Shifts for approximate Olsen implementation */ Complex_Z *Kinvx; /* Workspace to store K^{-1}x */ Complex_Z *Lprojector; /* Q pointer for (I-Q*Q'). Usually points to evecs*/ Complex_Z *RprojectorQ; /* May point to evecs/evecsHat depending on skewQ */ Complex_Z *RprojectorX; /* May point to x/Kinvx depending on skewX */ Complex_Z xKinvx; /* Stores x'*K^{-1}x if needed */ double eval, shift, robustShift; /* robust shift values. */ Complex_Z tmpShift; /* Temp shift for daxpy */ //------------------------------------------------------------ // Subdivide the workspace with pointers, and figure out // the total amount of needed real workspace (neededRsize) //------------------------------------------------------------ /* needed worksize */ neededRsize = 0; Kinvx = rwork; /* Kinvx will have nonzero size if precond and both RightX and SkewX */ if (primme->correctionParams.precondition && primme->correctionParams.projectors.RightX && primme->correctionParams.projectors.SkewX ) { /* OLSEN's method requires a block, but JDQMR is vector by vector */ if (primme->correctionParams.maxInnerIterations == 0) { sol = Kinvx + primme->nLocal*blockSize; neededRsize = neededRsize + primme->nLocal*blockSize; } else { sol = Kinvx + primme->nLocal; neededRsize = neededRsize + primme->nLocal; } } else { sol = Kinvx + 0; } if (primme->correctionParams.maxInnerIterations == 0) { linSolverRWork = sol + 0; /* sol not needed for GD */ linSolverRWorkSize = 0; /* No inner solver used */ } else { linSolverRWork = sol + primme->nLocal; /* sol needed in innerJD */ neededRsize = neededRsize + primme->nLocal; linSolverRWorkSize = /* Inner solver worksize */ 4*primme->nLocal + 2*(primme->numOrthoConst+primme->numEvals); neededRsize = neededRsize + linSolverRWorkSize; } sortedRitzVals = (double *)(linSolverRWork + linSolverRWorkSize); blockOfShifts = sortedRitzVals + (numLocked+basisSize); approxOlsenEps = blockOfShifts + blockSize; neededRsize = neededRsize + numLocked+basisSize + 2*blockSize; if (neededRsize > rworkSize) { return(neededRsize); } // Subdivide also the integer work space ilev = iwork; // of size blockSize //------------------------------------------------------------ // Figuring out preconditioning shifts (robust, Olsen, etc) //------------------------------------------------------------ /* blockOfShifts will contain the preconditioning shifts: */ /* either Ritz values or robustShifts computed below. These shifts */ /* will be used in the correction equations or in inverting (K-sigma I) */ /* approxOlsenEps will contain error approximations for eigenavalues */ /* to be used for Olsen's method (when innerIterations =0). */ if (primme->locking) { /* Combine the sorted list of locked Ritz values with the sorted */ /* list of current Ritz values, ritzVals. The merging of the two */ /* lists lockedEvals and ritzVals is stored in sortedRitzVals. */ mergeSort(lockedEvals, numLocked, ritzVals, flags, basisSize, sortedRitzVals, ilev, blockSize, primme); } else { /* Then the sorted evals are simply the ritzVals, targeted as iev */ sortedRitzVals = ritzVals; ilev = iev; } /*-----------------------------------------------------------------*/ /* For interior eigenpairs, use the user provided shifts */ /*-----------------------------------------------------------------*/ if (primme->target != primme_smallest && primme->target != primme_largest) { for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { sortedIndex = ilev[blockIndex]; blockOfShifts[blockIndex] = primme->targetShifts[ min(primme->numTargetShifts-1, sortedIndex) ]; if (sortedIndex < numPrevRitzVals) { approxOlsenEps[blockIndex] = fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]); } else { approxOlsenEps[blockIndex] = blockNorms[blockIndex]; } } /* for loop */ } /* user provided shifts */ else { /*-----------------------------------------------------------------*/ /* else it is primme_smallest or primme_largest */ /*-----------------------------------------------------------------*/ if (primme->correctionParams.robustShifts) { /*----------------------------------------------------*/ /* Subtract/add a robust shift from/to the Ritz value */ /*----------------------------------------------------*/ /* Find the robust shift for each block vector */ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { sortedIndex = ilev[blockIndex]; eval = sortedRitzVals[sortedIndex]; robustShift = computeRobustShift(blockIndex, blockNorms[blockIndex], prevRitzVals, numPrevRitzVals, sortedRitzVals, &approxOlsenEps[blockIndex], numLocked+basisSize, ilev, primme); /* Subtract/add the shift if looking for the smallest/largest */ /* eigenvalues, Do not go beyond the previous computed eigval */ if (primme->target == primme_smallest) { blockOfShifts[blockIndex] = eval - robustShift; if (sortedIndex > 0) blockOfShifts[blockIndex] = max(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]); } else { blockOfShifts[blockIndex] = eval + robustShift; if (sortedIndex > 0) blockOfShifts[blockIndex] = min(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]); } // robust shifting } /* for loop */ } /* endif robust shifts */ else { /*--------------------------------------------------------------*/ /* Otherwise, the shifts for both preconditioner and correction */ /* equation should be just the Ritz values. For Olsen's method, */ /* the shifts for r-eps*x, are chosen as the difference in Ritz */ /* value between successive iterations. */ /*--------------------------------------------------------------*/ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { ritzIndex = iev[blockIndex]; sortedIndex = ilev[blockIndex]; blockOfShifts[blockIndex] = ritzVals[ritzIndex]; if (sortedIndex < numPrevRitzVals) { approxOlsenEps[blockIndex] = fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]); } else { approxOlsenEps[blockIndex] = blockNorms[blockIndex]; } } /* for loop */ } /* else no robust shifts */ } /* else primme_smallest or primme_largest */ /* Remember the previous ritz values*/ numPrevRitzVals = numLocked+basisSize; Num_dcopy_primme(numPrevRitzVals, sortedRitzVals, 1, prevRitzVals, 1); // Equip the primme struct with the blockOfShifts, in case the user // wants to precondition (K-sigma_i I)^{-1} with a different shift // for each vector primme->ShiftsForPreconditioner = blockOfShifts; //------------------------------------------------------------ // Generalized Davidson variants -- No inner iterations //------------------------------------------------------------ if (primme->correctionParams.maxInnerIterations == 0) { // This is Generalized Davidson or approximate Olsen's method. // Perform block preconditioning (with or without projections) r = &W[primme->nLocal*basisSize]; // All the block residuals x = &V[primme->nLocal*basisSize]; // All the block Ritz vectors if ( primme->correctionParams.projectors.RightX && primme->correctionParams.projectors.SkewX ) { // Compute exact Olsen's projected preconditioner. This is // expensive and rarely improves anything! Included for completeness. Olsen_preconditioner_block(r, x, blockSize, Kinvx, primme); } else { if ( primme->correctionParams.projectors.RightX ) { // Compute a cheap approximation to OLSENS, where (x'Kinvr)/xKinvx // is approximated by e: Kinvr-e*Kinvx=Kinv(r-e*x)=Kinv(I-ct*x*x')r for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { // Compute r_i = r_i - err_i * x_i {tmpShift.r = -approxOlsenEps[blockIndex]; tmpShift.i = 0.0L;} Num_axpy_zprimme(primme->nLocal, tmpShift, &x[primme->nLocal*blockIndex],1,&r[primme->nLocal*blockIndex],1); } //for } // GD: compute K^{-1}r , or approx.Olsen: K^{-1}(r-ex) apply_preconditioner_block(r, x, blockSize, primme ); } } //------------------------------------------------------------ // JDQMR --- JD inner-outer variants //------------------------------------------------------------ else { // maxInnerIterations > 0 We perform inner-outer JDQMR. /* Solve the correction for each block vector. */ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { r = &W[primme->nLocal*(basisSize+blockIndex)]; x = &V[primme->nLocal*(basisSize+blockIndex)]; /* Set up the left/right/skew projectors for JDQMR. */ /* The pointers Lprojector, Rprojector(Q/X) point to the */ /* appropriate arrays for use in the projection step */ setup_JD_projectors(x, r, evecs, evecsHat, Kinvx, &xKinvx, &Lprojector, &RprojectorQ, &RprojectorX, &sizeLprojector, &sizeRprojectorQ, &sizeRprojectorX, numLocked, numConvergedStored, primme); /* Map the index of the block vector to its corresponding eigenvalue */ /* index, and the shift for the correction equation. Also make the */ /* shift available to primme, in case (K-shift I)^-1 is needed */ ritzIndex = iev[blockIndex]; shift = blockOfShifts[blockIndex]; primme->ShiftsForPreconditioner = &blockOfShifts[blockIndex]; ret = inner_solve_zprimme(x, r, &blockNorms[blockIndex], evecs, evecsHat, UDU, ipivot, &xKinvx, Lprojector, RprojectorQ, RprojectorX, sizeLprojector, sizeRprojectorQ, sizeRprojectorX, sol, ritzVals[ritzIndex], shift, eresTol, aNormEstimate, machEps, linSolverRWork, linSolverRWorkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_solve_correction, Primme_inner_solve, ret, __FILE__, __LINE__, primme); return (INNER_SOLVE_FAILURE); } Num_zcopy_zprimme(primme->nLocal, sol, 1, &V[primme->nLocal*(basisSize+blockIndex)], 1); } // end for each block vector } // JDqmr variants return 0; }
static int apply_skew_projector(Complex_Z *Q, Complex_Z *Qhat, Complex_Z *UDU, int *ipivot, int numCols, Complex_Z *v, Complex_Z *rwork, primme_params *primme) { int count; Complex_Z ztmp; Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00}, tmone = {-1.0e+00,+0.0e00}; if (numCols > 0) { /* there is a projector to be applied */ int ret; Complex_Z *overlaps; /* overlaps of v with columns of Q */ Complex_Z *workSpace; /* Used for computing local overlaps */ overlaps = rwork; workSpace = overlaps + numCols; /* --------------------------------------------------------*/ /* Treat the one vector case with BLAS 1 calls */ /* --------------------------------------------------------*/ if (numCols == 1) { /* Compute workspace = Q'*v */ overlaps[0] = dist_dot(Q, 1, v, 1, primme); /* Backsolve only if there is a skew projector */ if (UDU != NULL) { if ( z_eq_primme(UDU[0], tzero) ) { return UDUSOLVE_FAILURE; } z_div_primme(&overlaps[0], &overlaps[0], &UDU[0]); } /* Compute v=v-Qhat*overlaps */ ztmp.r = - overlaps[0].r; ztmp.i = - overlaps[0].i; Num_axpy_zprimme(primme->nLocal, ztmp, Qhat, 1, v, 1); } else { /* ------------------------------------------------------*/ /* More than one vectors. Use BLAS 2. */ /* ------------------------------------------------------*/ /* Compute workspace = Q'*v */ Num_gemv_zprimme("C", primme->nLocal, numCols, tpone, Q, primme->nLocal, v, 1, tzero, workSpace, 1); /* Global sum: overlaps = Q'*v */ // In Complex, the size of the array to globalSum is twice as large count = 2*numCols; (*primme->globalSumDouble)(workSpace, overlaps, &count, primme); /* --------------------------------------------*/ /* Backsolve only if there is a skew projector */ /* --------------------------------------------*/ if (UDU != NULL) { /* Solve (Q'Qhat)^{-1}*workSpace = overlaps = Q'*v for alpha by */ /* backsolving with the UDU decomposition. */ ret = UDUSolve_zprimme(UDU, ipivot, numCols, overlaps, workSpace); if (ret != 0) { primme_PushErrorMessage(Primme_apply_skew_projector, Primme_udusolve, ret, __FILE__, __LINE__, primme); return UDUSOLVE_FAILURE; } /* Compute v=v-Qhat*workspace */ Num_gemv_zprimme("N", primme->nLocal, numCols, tmone, Qhat, primme->nLocal, workSpace, 1, tpone, v, 1); } else { /* Compute v=v-Qhat*overlaps */ Num_gemv_zprimme("N", primme->nLocal, numCols, tmone, Qhat, primme->nLocal, overlaps, 1, tpone, v, 1); } // UDU==null } // numCols != 1 } // numCols > 0 return 0; }
int inner_solve_zprimme(Complex_Z *x, Complex_Z *r, double *rnorm, Complex_Z *evecs, Complex_Z *evecsHat, Complex_Z *UDU, int *ipivot, Complex_Z *xKinvx, Complex_Z *Lprojector, Complex_Z *RprojectorQ, Complex_Z *RprojectorX, int sizeLprojector, int sizeRprojectorQ, int sizeRprojectorX, Complex_Z *sol, double eval, double shift, double eresTol, double aNormEstimate, double machEps, Complex_Z *rwork, int rworkSize, primme_params *primme) { int i; /* loop variable */ int workSpaceSize; /* Size of local work array. */ int numIts; /* Number of inner iterations */ int ret; /* Return value used for error checking. */ int maxIterations; /* The maximum # iterations allowed. Depends on primme */ Complex_Z *workSpace; /* Workspace needed by UDU routine */ /* QMR parameters */ Complex_Z *g, *d, *delta, *w; Complex_Z alpha_prev, beta, rho_prev, rho; Complex_Z ztmp; double Theta_prev, Theta, c, sigma_prev, tau_init, tau_prev, tau; /* Parameters used to dynamically update eigenpair */ Complex_Z Beta, Delta, Psi, Beta_prev, Delta_prev, Psi_prev; Complex_Z eta; double dot_sol, eval_updated, eval_prev, eres2_updated, eres_updated, R; double Gamma_prev, Phi_prev; double Gamma, Phi; double gamma; /* The convergence criteria of the inner linear system must satisfy: */ /* || current residual || <= relativeTolerance * || initial residual || */ /* + absoluteTol */ double relativeTolerance; double absoluteTolerance; double LTolerance, ETolerance; /* Some constants */ Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00}; /* -------------------------------------------*/ /* Subdivide the workspace into needed arrays */ /* -------------------------------------------*/ g = rwork; d = g + primme->nLocal; delta = d + primme->nLocal; w = delta + primme->nLocal; workSpace = w + primme->nLocal; // This needs at least 2*numOrth+NumEvals) workSpaceSize = rworkSize - (workSpace - rwork); /* -----------------------------------------*/ /* Set up convergence criteria by Tolerance */ /* -----------------------------------------*/ if (primme->aNorm <= 0.0L) { absoluteTolerance = aNormEstimate*machEps; eresTol = eresTol*aNormEstimate; } else { absoluteTolerance = primme->aNorm*machEps; } tau_prev = tau_init = *rnorm; /* Assumes zero initial guess */ LTolerance = eresTol; // Andreas: note that eigenresidual tol may not be achievable, because we // iterate on P(A-s)P not (A-s). But tau reflects linSys on P(A-s)P. if (primme->correctionParams.convTest == primme_adaptive) { ETolerance = max(eresTol/1.8L, absoluteTolerance); LTolerance = ETolerance; } else if (primme->correctionParams.convTest == primme_adaptive_ETolerance) { LTolerance = max(eresTol/1.8L, absoluteTolerance); ETolerance = max(tau_init*0.1L, LTolerance); } else if (primme->correctionParams.convTest == primme_decreasing_LTolerance) { relativeTolerance = pow(primme->correctionParams.relTolBase, (double)-primme->stats.numOuterIterations); LTolerance = relativeTolerance * tau_init + absoluteTolerance + eresTol; //printf(" RL %e INI %e abso %e LToler %e aNormEstimate %e \n", //relativeTolerance, tau_init, absoluteTolerance,LTolerance,aNormEstimate); } /* --------------------------------------------------------*/ /* Set up convergence criteria by max number of iterations */ /* --------------------------------------------------------*/ /* compute first total number of remaining matvecs */ maxIterations = primme->maxMatvecs - primme->stats.numMatvecs; /* Perform primme.maxInnerIterations, but do not exceed total remaining */ if (primme->correctionParams.maxInnerIterations > 0) { maxIterations = min(primme->correctionParams.maxInnerIterations, maxIterations); } /* --------------------------------------------------------*/ /* Rest of initializations */ /* --------------------------------------------------------*/ /* Assume zero initial guess */ Num_zcopy_zprimme(primme->nLocal, r, 1, g, 1); ret = apply_projected_preconditioner(g, evecs, RprojectorQ, x, RprojectorX, sizeRprojectorQ, sizeRprojectorX, xKinvx, UDU, ipivot, d, workSpace, primme); if (ret != 0) { primme_PushErrorMessage(Primme_inner_solve, Primme_apply_projected_preconditioner, ret, __FILE__, __LINE__, primme); return APPLYPROJECTEDPRECONDITIONER_FAILURE; } Theta_prev = 0.0L; eval_prev = eval; rho_prev = dist_dot(g, 1, d, 1, primme); /* Initialize recurrences used to dynamically update the eigenpair */ Beta_prev = Delta_prev = Psi_prev = tzero; Gamma_prev = Phi_prev = 0.0L; /* other initializations */ for (i = 0; i < primme->nLocal; i++) { delta[i] = tzero; sol[i] = tzero; } numIts = 0; /*----------------------------------------------------------------------*/ /*------------------------ Begin Inner Loop ----------------------------*/ /*----------------------------------------------------------------------*/ while (numIts < maxIterations) { apply_projected_matrix(d, shift, Lprojector, sizeLprojector, w, workSpace, primme); ztmp = dist_dot(d, 1, w, 1, primme); sigma_prev = ztmp.r; if (sigma_prev == 0.0L) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"Exiting because SIGMA %e\n",sigma_prev); } break; } zd_mult_primme(alpha_prev, rho_prev, 1.0L/sigma_prev); if (z_abs_primme(alpha_prev) < machEps || z_abs_primme(alpha_prev) > 1.0L/machEps){ if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"Exiting because ALPHA %e\n",alpha_prev); } break; } ztmp.r = -alpha_prev.r; ztmp.i = -alpha_prev.i; Num_axpy_zprimme(primme->nLocal, ztmp, w, 1, g, 1); ztmp = dist_dot(g, 1, g, 1, primme); Theta = ztmp.r; Theta = sqrt(Theta); Theta = Theta/tau_prev; c = 1.0L/sqrt(1+Theta*Theta); tau = tau_prev*Theta*c; gamma = c*c*Theta_prev*Theta_prev; {ztmp.r = gamma; ztmp.i = 0.0L;} zd_mult_primme(eta, alpha_prev, c*c); Num_scal_zprimme(primme->nLocal, ztmp, delta, 1); Num_axpy_zprimme(primme->nLocal, eta, d, 1, delta, 1); Num_axpy_zprimme(primme->nLocal, tpone, delta, 1, sol, 1); numIts++; if (z_abs_primme(rho_prev) == 0.0L ) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"Exiting because abs(rho) %e\n", z_abs_primme(rho_prev)); } break; } if (tau < LTolerance) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, " tau < LTol %e %e\n",tau, LTolerance); } break; } else if (primme->correctionParams.convTest == primme_adaptive_ETolerance || primme->correctionParams.convTest == primme_adaptive) { /* --------------------------------------------------------*/ /* Adaptive stopping based on dynamic monitoring of eResid */ /* --------------------------------------------------------*/ /* Update the Ritz value and eigenresidual using the */ /* following recurrences. */ zd_mult_primme(Delta, Delta_prev, gamma); zz_mult_primme(ztmp, eta, rho_prev); z_add_primme(Delta, Delta, ztmp); z_sub_primme(Beta, Beta_prev, Delta); Phi = gamma*gamma*Phi_prev + z_abs_primme(eta)*z_abs_primme(eta)*sigma_prev; zd_mult_primme(Psi, Psi_prev, gamma); {ztmp.r = gamma*Phi_prev; ztmp.i = 0.0L;} z_add_primme(Psi, Psi, ztmp); Gamma = Gamma_prev + 2.0L*Psi.r + Phi; /* Perform the update: update the eigenvalue and the square of the */ /* residual norm. */ ztmp = dist_dot(sol, 1, sol, 1, primme); dot_sol = ztmp.r; eval_updated = shift + (eval - shift + 2*Beta.r + Gamma)/(1 + dot_sol); eres2_updated = (tau*tau)/(1 + dot_sol) + ((eval - shift)*(eval - shift) + z_abs_primme(Beta)*z_abs_primme(Beta) + 2.0L*(eval - shift)*Beta.r)/(1 + dot_sol) - (eval_updated - shift)*(eval_updated - shift); /* If numerical problems, let eres about the same as tau */ if (eres2_updated < 0){ eres_updated = sqrt( (tau*tau)/(1 + dot_sol) ); } else eres_updated = sqrt(eres2_updated); /* --------------------------------------------------------*/ /* Stopping criteria */ /* --------------------------------------------------------*/ R = max(0.9878, sqrt(tau/tau_prev))*sqrt(1+dot_sol); if ( tau <= R*eres_updated || eres_updated <= tau*R ) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, " tau < R eres \n"); } break; } if (primme->target == primme_smallest && eval_updated > eval_prev) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "eval_updated > eval_prev\n"); } break; } else if (primme->target == primme_largest && eval_updated < eval_prev){ if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "eval_updated < eval_prev\n"); } break; } if (eres_updated < ETolerance) { // tau < LTol has been checked if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "eres < eresTol %e \n",eres_updated); } break; } eval_prev = eval_updated; if (primme->printLevel >= 4 && primme->procID == 0) { fprintf(primme->outputFile, "INN MV %d Sec %e Eval %e Lin|r| %.3e EV|r| %.3e\n", primme->stats. numMatvecs, primme_wTimer(0), eval_updated, tau, eres_updated); fflush(primme->outputFile); } /* --------------------------------------------------------*/ } /* End of if adaptive JDQMR section */ /* --------------------------------------------------------*/ else if (primme->printLevel >= 4 && primme->procID == 0) { // Report for non adaptive inner iterations fprintf(primme->outputFile, "INN MV %d Sec %e Lin|r| %e\n", primme->stats.numMatvecs, primme_wTimer(0),tau); fflush(primme->outputFile); } if (numIts < maxIterations) { ret = apply_projected_preconditioner(g, evecs, RprojectorQ, x, RprojectorX, sizeRprojectorQ, sizeRprojectorX, xKinvx, UDU, ipivot, w, workSpace, primme); if (ret != 0) { primme_PushErrorMessage(Primme_inner_solve, Primme_apply_projected_preconditioner, ret, __FILE__, __LINE__, primme); ret = APPLYPROJECTEDPRECONDITIONER_FAILURE; break; } rho = dist_dot(g, 1, w, 1, primme); z_div_primme(&beta, &rho, &rho_prev); Num_scal_zprimme(primme->nLocal, beta, d, 1); Num_axpy_zprimme(primme->nLocal, tpone, w, 1, d, 1); rho_prev = rho; tau_prev = tau; Theta_prev = Theta; Delta_prev = Delta; Beta_prev = Beta; Phi_prev = Phi; Psi_prev = Psi; Gamma_prev = Gamma; } /* --------------------------------------------------------*/ } /* End of QMR main while loop */ /* --------------------------------------------------------*/ *rnorm = eres_updated; return 0; }