static void apply_preconditioner_block(Complex_Z *v, Complex_Z *result, int blockSize, primme_params *primme) { if (primme->correctionParams.precondition) { (*primme->applyPreconditioner)(v, result, &blockSize, primme); primme->stats.numPreconds += blockSize; } else { Num_zcopy_zprimme(primme->nLocal*blockSize, v, 1, result, 1); } }
static void restart_X(Complex_Z *X, Complex_Z *hVecs, int nLocal, int basisSize, int restartSize, Complex_Z *rwork, int rworkSize) { int i, k; /* Loop variables */ int AvailRows = min(rworkSize/restartSize, nLocal); Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00}; i = 0; while (i < nLocal) { /* Block matrix multiply */ Num_gemm_zprimme("N", "N", AvailRows, restartSize, basisSize, tpone, &X[i], nLocal, hVecs, basisSize, tzero, rwork, AvailRows ); /* Copy the result in the desired location of X */ for (k=0; k < restartSize; k++) { Num_zcopy_zprimme(AvailRows, &rwork[AvailRows*k],1, &X[i+nLocal*k], 1); } i = i+AvailRows; AvailRows = min(AvailRows, nLocal-i); } }
static int apply_projected_preconditioner(Complex_Z *v, Complex_Z *Q, Complex_Z *RprojectorQ, Complex_Z *x, Complex_Z *RprojectorX, int sizeRprojectorQ, int sizeRprojectorX, Complex_Z *xKinvx, Complex_Z *UDU, int *ipivot, Complex_Z *result, Complex_Z *rwork, primme_params *primme) { int ONE = 1; int ret; if (primme->correctionParams.precondition) { /* Place K^{-1}v in result */ (*primme->applyPreconditioner)(v, result, &ONE, primme); primme->stats.numPreconds += 1; } else { Num_zcopy_zprimme(primme->nLocal, v, 1, result, 1); } ret = apply_skew_projector(Q, RprojectorQ, UDU, ipivot, sizeRprojectorQ, result, rwork, primme); if (ret != 0) { primme_PushErrorMessage(Primme_apply_projected_preconditioner, Primme_apply_skew_projector, ret, __FILE__, __LINE__, primme); return APPLYSKEWPROJECTOR_FAILURE; } apply_skew_projector(x, RprojectorX, xKinvx, ipivot, sizeRprojectorX, result, rwork, primme); if (ret != 0) { primme_PushErrorMessage(Primme_apply_projected_preconditioner, Primme_apply_skew_projector, ret, __FILE__, __LINE__, primme); return APPLYSKEWPROJECTOR_FAILURE; } return 0; }
static void setup_JD_projectors(Complex_Z *x, Complex_Z *r, Complex_Z *evecs, Complex_Z *evecsHat, Complex_Z *Kinvx, Complex_Z *xKinvx, Complex_Z **Lprojector, Complex_Z **RprojectorQ, Complex_Z **RprojectorX, int *sizeLprojector, int *sizeRprojectorQ, int *sizeRprojectorX, int numLocked, int numConverged, primme_params *primme) { int n, sizeEvecs; int ONE = 1; // In Complex, the size of the array to globalSum is twice as large int count = 2; Complex_Z xKinvx_local; Complex_Z tpone = {+1.0e+00,+0.0e00}; *sizeLprojector = 0; *sizeRprojectorQ = 0; *sizeRprojectorX = 0; *Lprojector = NULL; *RprojectorQ = NULL; *RprojectorX = NULL; n = primme->nLocal; if (primme->locking) sizeEvecs = primme->numOrthoConst+numLocked; else sizeEvecs = primme->numOrthoConst+numConverged; /* --------------------------------------------------------*/ /* Set up the left projector arrays. Make x adjacent to Q. */ /* --------------------------------------------------------*/ if (primme->correctionParams.projectors.LeftQ) { *sizeLprojector = sizeEvecs; *Lprojector = evecs; if (primme->correctionParams.projectors.LeftX) { Num_zcopy_zprimme(n, x, 1, &evecs[sizeEvecs*n], 1); *sizeLprojector = *sizeLprojector + 1; } } else { if (primme->correctionParams.projectors.LeftX) { *Lprojector = x; *sizeLprojector = 1; } } /* --------------------------------------------------------*/ /* Set up the right projector arrays. Q and x separately */ /* --------------------------------------------------------*/ /* ------------*/ /* First for Q */ /* ------------*/ if (primme->correctionParams.projectors.RightQ) { if (primme->correctionParams.precondition && primme->correctionParams.projectors.SkewQ) { *RprojectorQ = evecsHat; /* Use the K^(-1)evecs array */ } else { /* Right Q but not SkewQ */ *RprojectorQ = evecs; /* Use just the evecs array. */ } *sizeRprojectorQ = sizeEvecs; } else { /* if no RightQ projector */ *RprojectorQ = NULL; *sizeRprojectorQ = 0; } /* ------------*/ /* Then for x */ /* ------------*/ if (primme->correctionParams.projectors.RightX) { if (primme->correctionParams.precondition && primme->correctionParams.projectors.SkewX) { (*primme->applyPreconditioner)(x, Kinvx, &ONE, primme); primme->stats.numPreconds += 1; *RprojectorX = Kinvx; xKinvx_local = Num_dot_zprimme(primme->nLocal, x, 1, Kinvx, 1); (*primme->globalSumDouble)(&xKinvx_local, xKinvx, &count, primme); } else { *RprojectorX = x; *xKinvx = tpone; } *sizeRprojectorX = 1; } else { *RprojectorX = NULL; *sizeRprojectorX = 0; *xKinvx = tpone; } } /* setup_JD_projectors */
int solve_correction_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *evecs, Complex_Z *evecsHat, Complex_Z *UDU, int *ipivot, double *lockedEvals, int numLocked, int numConvergedStored, double *ritzVals, double *prevRitzVals, int *flags, int basisSize, double *blockNorms, int *iev, int blockSize, double eresTol, double machEps, double aNormEstimate, Complex_Z *rwork, int *iwork, int rworkSize, primme_params *primme) { int blockIndex; /* Loop index. Ranges from 0..blockSize-1. */ int ritzIndex; /* Ritz value index blockIndex corresponds to. */ /* Possible values range from 0..basisSize-1. */ int sortedIndex; /* Ritz value index in sortedRitzVals, blockIndex */ /* corresponds to. Range 0..numLocked+basisSize-1 */ int neededRsize; /* Needed size for rwork. If not enough return */ int linSolverRWorkSize; /* Size of the linSolverRWork array. */ int *ilev; /* Array of size blockSize. Maps the target Ritz */ /* values to their positions in the sortedEvals */ /* array. */ int sizeLprojector; /* Sizes of the various left/right projectors */ int sizeRprojectorQ; /* These will be 0/1/or numOrthConstr+numLocked */ int sizeRprojectorX; /* or numOrthConstr+numConvergedStored w/o locking*/ static int numPrevRitzVals = 0; /* Size of prevRitzVals */ int ret; /* Return code. */ Complex_Z *r, *x, *sol; /* Residual, Ritz vector, and correction. */ Complex_Z *linSolverRWork;/* Workspace needed by linear solver. */ double *sortedRitzVals; /* Sorted array of current and converged Ritz */ /* values. Size of array is numLocked+basisSize. */ double *blockOfShifts; /* Shifts for (A-shiftI) or (if needed) (K-shiftI)*/ double *approxOlsenEps; /* Shifts for approximate Olsen implementation */ Complex_Z *Kinvx; /* Workspace to store K^{-1}x */ Complex_Z *Lprojector; /* Q pointer for (I-Q*Q'). Usually points to evecs*/ Complex_Z *RprojectorQ; /* May point to evecs/evecsHat depending on skewQ */ Complex_Z *RprojectorX; /* May point to x/Kinvx depending on skewX */ Complex_Z xKinvx; /* Stores x'*K^{-1}x if needed */ double eval, shift, robustShift; /* robust shift values. */ Complex_Z tmpShift; /* Temp shift for daxpy */ //------------------------------------------------------------ // Subdivide the workspace with pointers, and figure out // the total amount of needed real workspace (neededRsize) //------------------------------------------------------------ /* needed worksize */ neededRsize = 0; Kinvx = rwork; /* Kinvx will have nonzero size if precond and both RightX and SkewX */ if (primme->correctionParams.precondition && primme->correctionParams.projectors.RightX && primme->correctionParams.projectors.SkewX ) { /* OLSEN's method requires a block, but JDQMR is vector by vector */ if (primme->correctionParams.maxInnerIterations == 0) { sol = Kinvx + primme->nLocal*blockSize; neededRsize = neededRsize + primme->nLocal*blockSize; } else { sol = Kinvx + primme->nLocal; neededRsize = neededRsize + primme->nLocal; } } else { sol = Kinvx + 0; } if (primme->correctionParams.maxInnerIterations == 0) { linSolverRWork = sol + 0; /* sol not needed for GD */ linSolverRWorkSize = 0; /* No inner solver used */ } else { linSolverRWork = sol + primme->nLocal; /* sol needed in innerJD */ neededRsize = neededRsize + primme->nLocal; linSolverRWorkSize = /* Inner solver worksize */ 4*primme->nLocal + 2*(primme->numOrthoConst+primme->numEvals); neededRsize = neededRsize + linSolverRWorkSize; } sortedRitzVals = (double *)(linSolverRWork + linSolverRWorkSize); blockOfShifts = sortedRitzVals + (numLocked+basisSize); approxOlsenEps = blockOfShifts + blockSize; neededRsize = neededRsize + numLocked+basisSize + 2*blockSize; if (neededRsize > rworkSize) { return(neededRsize); } // Subdivide also the integer work space ilev = iwork; // of size blockSize //------------------------------------------------------------ // Figuring out preconditioning shifts (robust, Olsen, etc) //------------------------------------------------------------ /* blockOfShifts will contain the preconditioning shifts: */ /* either Ritz values or robustShifts computed below. These shifts */ /* will be used in the correction equations or in inverting (K-sigma I) */ /* approxOlsenEps will contain error approximations for eigenavalues */ /* to be used for Olsen's method (when innerIterations =0). */ if (primme->locking) { /* Combine the sorted list of locked Ritz values with the sorted */ /* list of current Ritz values, ritzVals. The merging of the two */ /* lists lockedEvals and ritzVals is stored in sortedRitzVals. */ mergeSort(lockedEvals, numLocked, ritzVals, flags, basisSize, sortedRitzVals, ilev, blockSize, primme); } else { /* Then the sorted evals are simply the ritzVals, targeted as iev */ sortedRitzVals = ritzVals; ilev = iev; } /*-----------------------------------------------------------------*/ /* For interior eigenpairs, use the user provided shifts */ /*-----------------------------------------------------------------*/ if (primme->target != primme_smallest && primme->target != primme_largest) { for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { sortedIndex = ilev[blockIndex]; blockOfShifts[blockIndex] = primme->targetShifts[ min(primme->numTargetShifts-1, sortedIndex) ]; if (sortedIndex < numPrevRitzVals) { approxOlsenEps[blockIndex] = fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]); } else { approxOlsenEps[blockIndex] = blockNorms[blockIndex]; } } /* for loop */ } /* user provided shifts */ else { /*-----------------------------------------------------------------*/ /* else it is primme_smallest or primme_largest */ /*-----------------------------------------------------------------*/ if (primme->correctionParams.robustShifts) { /*----------------------------------------------------*/ /* Subtract/add a robust shift from/to the Ritz value */ /*----------------------------------------------------*/ /* Find the robust shift for each block vector */ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { sortedIndex = ilev[blockIndex]; eval = sortedRitzVals[sortedIndex]; robustShift = computeRobustShift(blockIndex, blockNorms[blockIndex], prevRitzVals, numPrevRitzVals, sortedRitzVals, &approxOlsenEps[blockIndex], numLocked+basisSize, ilev, primme); /* Subtract/add the shift if looking for the smallest/largest */ /* eigenvalues, Do not go beyond the previous computed eigval */ if (primme->target == primme_smallest) { blockOfShifts[blockIndex] = eval - robustShift; if (sortedIndex > 0) blockOfShifts[blockIndex] = max(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]); } else { blockOfShifts[blockIndex] = eval + robustShift; if (sortedIndex > 0) blockOfShifts[blockIndex] = min(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]); } // robust shifting } /* for loop */ } /* endif robust shifts */ else { /*--------------------------------------------------------------*/ /* Otherwise, the shifts for both preconditioner and correction */ /* equation should be just the Ritz values. For Olsen's method, */ /* the shifts for r-eps*x, are chosen as the difference in Ritz */ /* value between successive iterations. */ /*--------------------------------------------------------------*/ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { ritzIndex = iev[blockIndex]; sortedIndex = ilev[blockIndex]; blockOfShifts[blockIndex] = ritzVals[ritzIndex]; if (sortedIndex < numPrevRitzVals) { approxOlsenEps[blockIndex] = fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]); } else { approxOlsenEps[blockIndex] = blockNorms[blockIndex]; } } /* for loop */ } /* else no robust shifts */ } /* else primme_smallest or primme_largest */ /* Remember the previous ritz values*/ numPrevRitzVals = numLocked+basisSize; Num_dcopy_primme(numPrevRitzVals, sortedRitzVals, 1, prevRitzVals, 1); // Equip the primme struct with the blockOfShifts, in case the user // wants to precondition (K-sigma_i I)^{-1} with a different shift // for each vector primme->ShiftsForPreconditioner = blockOfShifts; //------------------------------------------------------------ // Generalized Davidson variants -- No inner iterations //------------------------------------------------------------ if (primme->correctionParams.maxInnerIterations == 0) { // This is Generalized Davidson or approximate Olsen's method. // Perform block preconditioning (with or without projections) r = &W[primme->nLocal*basisSize]; // All the block residuals x = &V[primme->nLocal*basisSize]; // All the block Ritz vectors if ( primme->correctionParams.projectors.RightX && primme->correctionParams.projectors.SkewX ) { // Compute exact Olsen's projected preconditioner. This is // expensive and rarely improves anything! Included for completeness. Olsen_preconditioner_block(r, x, blockSize, Kinvx, primme); } else { if ( primme->correctionParams.projectors.RightX ) { // Compute a cheap approximation to OLSENS, where (x'Kinvr)/xKinvx // is approximated by e: Kinvr-e*Kinvx=Kinv(r-e*x)=Kinv(I-ct*x*x')r for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { // Compute r_i = r_i - err_i * x_i {tmpShift.r = -approxOlsenEps[blockIndex]; tmpShift.i = 0.0L;} Num_axpy_zprimme(primme->nLocal, tmpShift, &x[primme->nLocal*blockIndex],1,&r[primme->nLocal*blockIndex],1); } //for } // GD: compute K^{-1}r , or approx.Olsen: K^{-1}(r-ex) apply_preconditioner_block(r, x, blockSize, primme ); } } //------------------------------------------------------------ // JDQMR --- JD inner-outer variants //------------------------------------------------------------ else { // maxInnerIterations > 0 We perform inner-outer JDQMR. /* Solve the correction for each block vector. */ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { r = &W[primme->nLocal*(basisSize+blockIndex)]; x = &V[primme->nLocal*(basisSize+blockIndex)]; /* Set up the left/right/skew projectors for JDQMR. */ /* The pointers Lprojector, Rprojector(Q/X) point to the */ /* appropriate arrays for use in the projection step */ setup_JD_projectors(x, r, evecs, evecsHat, Kinvx, &xKinvx, &Lprojector, &RprojectorQ, &RprojectorX, &sizeLprojector, &sizeRprojectorQ, &sizeRprojectorX, numLocked, numConvergedStored, primme); /* Map the index of the block vector to its corresponding eigenvalue */ /* index, and the shift for the correction equation. Also make the */ /* shift available to primme, in case (K-shift I)^-1 is needed */ ritzIndex = iev[blockIndex]; shift = blockOfShifts[blockIndex]; primme->ShiftsForPreconditioner = &blockOfShifts[blockIndex]; ret = inner_solve_zprimme(x, r, &blockNorms[blockIndex], evecs, evecsHat, UDU, ipivot, &xKinvx, Lprojector, RprojectorQ, RprojectorX, sizeLprojector, sizeRprojectorQ, sizeRprojectorX, sol, ritzVals[ritzIndex], shift, eresTol, aNormEstimate, machEps, linSolverRWork, linSolverRWorkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_solve_correction, Primme_inner_solve, ret, __FILE__, __LINE__, primme); return (INNER_SOLVE_FAILURE); } Num_zcopy_zprimme(primme->nLocal, sol, 1, &V[primme->nLocal*(basisSize+blockIndex)], 1); } // end for each block vector } // JDqmr variants return 0; }
int inner_solve_zprimme(Complex_Z *x, Complex_Z *r, double *rnorm, Complex_Z *evecs, Complex_Z *evecsHat, Complex_Z *UDU, int *ipivot, Complex_Z *xKinvx, Complex_Z *Lprojector, Complex_Z *RprojectorQ, Complex_Z *RprojectorX, int sizeLprojector, int sizeRprojectorQ, int sizeRprojectorX, Complex_Z *sol, double eval, double shift, double eresTol, double aNormEstimate, double machEps, Complex_Z *rwork, int rworkSize, primme_params *primme) { int i; /* loop variable */ int workSpaceSize; /* Size of local work array. */ int numIts; /* Number of inner iterations */ int ret; /* Return value used for error checking. */ int maxIterations; /* The maximum # iterations allowed. Depends on primme */ Complex_Z *workSpace; /* Workspace needed by UDU routine */ /* QMR parameters */ Complex_Z *g, *d, *delta, *w; Complex_Z alpha_prev, beta, rho_prev, rho; Complex_Z ztmp; double Theta_prev, Theta, c, sigma_prev, tau_init, tau_prev, tau; /* Parameters used to dynamically update eigenpair */ Complex_Z Beta, Delta, Psi, Beta_prev, Delta_prev, Psi_prev; Complex_Z eta; double dot_sol, eval_updated, eval_prev, eres2_updated, eres_updated, R; double Gamma_prev, Phi_prev; double Gamma, Phi; double gamma; /* The convergence criteria of the inner linear system must satisfy: */ /* || current residual || <= relativeTolerance * || initial residual || */ /* + absoluteTol */ double relativeTolerance; double absoluteTolerance; double LTolerance, ETolerance; /* Some constants */ Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00}; /* -------------------------------------------*/ /* Subdivide the workspace into needed arrays */ /* -------------------------------------------*/ g = rwork; d = g + primme->nLocal; delta = d + primme->nLocal; w = delta + primme->nLocal; workSpace = w + primme->nLocal; // This needs at least 2*numOrth+NumEvals) workSpaceSize = rworkSize - (workSpace - rwork); /* -----------------------------------------*/ /* Set up convergence criteria by Tolerance */ /* -----------------------------------------*/ if (primme->aNorm <= 0.0L) { absoluteTolerance = aNormEstimate*machEps; eresTol = eresTol*aNormEstimate; } else { absoluteTolerance = primme->aNorm*machEps; } tau_prev = tau_init = *rnorm; /* Assumes zero initial guess */ LTolerance = eresTol; // Andreas: note that eigenresidual tol may not be achievable, because we // iterate on P(A-s)P not (A-s). But tau reflects linSys on P(A-s)P. if (primme->correctionParams.convTest == primme_adaptive) { ETolerance = max(eresTol/1.8L, absoluteTolerance); LTolerance = ETolerance; } else if (primme->correctionParams.convTest == primme_adaptive_ETolerance) { LTolerance = max(eresTol/1.8L, absoluteTolerance); ETolerance = max(tau_init*0.1L, LTolerance); } else if (primme->correctionParams.convTest == primme_decreasing_LTolerance) { relativeTolerance = pow(primme->correctionParams.relTolBase, (double)-primme->stats.numOuterIterations); LTolerance = relativeTolerance * tau_init + absoluteTolerance + eresTol; //printf(" RL %e INI %e abso %e LToler %e aNormEstimate %e \n", //relativeTolerance, tau_init, absoluteTolerance,LTolerance,aNormEstimate); } /* --------------------------------------------------------*/ /* Set up convergence criteria by max number of iterations */ /* --------------------------------------------------------*/ /* compute first total number of remaining matvecs */ maxIterations = primme->maxMatvecs - primme->stats.numMatvecs; /* Perform primme.maxInnerIterations, but do not exceed total remaining */ if (primme->correctionParams.maxInnerIterations > 0) { maxIterations = min(primme->correctionParams.maxInnerIterations, maxIterations); } /* --------------------------------------------------------*/ /* Rest of initializations */ /* --------------------------------------------------------*/ /* Assume zero initial guess */ Num_zcopy_zprimme(primme->nLocal, r, 1, g, 1); ret = apply_projected_preconditioner(g, evecs, RprojectorQ, x, RprojectorX, sizeRprojectorQ, sizeRprojectorX, xKinvx, UDU, ipivot, d, workSpace, primme); if (ret != 0) { primme_PushErrorMessage(Primme_inner_solve, Primme_apply_projected_preconditioner, ret, __FILE__, __LINE__, primme); return APPLYPROJECTEDPRECONDITIONER_FAILURE; } Theta_prev = 0.0L; eval_prev = eval; rho_prev = dist_dot(g, 1, d, 1, primme); /* Initialize recurrences used to dynamically update the eigenpair */ Beta_prev = Delta_prev = Psi_prev = tzero; Gamma_prev = Phi_prev = 0.0L; /* other initializations */ for (i = 0; i < primme->nLocal; i++) { delta[i] = tzero; sol[i] = tzero; } numIts = 0; /*----------------------------------------------------------------------*/ /*------------------------ Begin Inner Loop ----------------------------*/ /*----------------------------------------------------------------------*/ while (numIts < maxIterations) { apply_projected_matrix(d, shift, Lprojector, sizeLprojector, w, workSpace, primme); ztmp = dist_dot(d, 1, w, 1, primme); sigma_prev = ztmp.r; if (sigma_prev == 0.0L) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"Exiting because SIGMA %e\n",sigma_prev); } break; } zd_mult_primme(alpha_prev, rho_prev, 1.0L/sigma_prev); if (z_abs_primme(alpha_prev) < machEps || z_abs_primme(alpha_prev) > 1.0L/machEps){ if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"Exiting because ALPHA %e\n",alpha_prev); } break; } ztmp.r = -alpha_prev.r; ztmp.i = -alpha_prev.i; Num_axpy_zprimme(primme->nLocal, ztmp, w, 1, g, 1); ztmp = dist_dot(g, 1, g, 1, primme); Theta = ztmp.r; Theta = sqrt(Theta); Theta = Theta/tau_prev; c = 1.0L/sqrt(1+Theta*Theta); tau = tau_prev*Theta*c; gamma = c*c*Theta_prev*Theta_prev; {ztmp.r = gamma; ztmp.i = 0.0L;} zd_mult_primme(eta, alpha_prev, c*c); Num_scal_zprimme(primme->nLocal, ztmp, delta, 1); Num_axpy_zprimme(primme->nLocal, eta, d, 1, delta, 1); Num_axpy_zprimme(primme->nLocal, tpone, delta, 1, sol, 1); numIts++; if (z_abs_primme(rho_prev) == 0.0L ) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"Exiting because abs(rho) %e\n", z_abs_primme(rho_prev)); } break; } if (tau < LTolerance) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, " tau < LTol %e %e\n",tau, LTolerance); } break; } else if (primme->correctionParams.convTest == primme_adaptive_ETolerance || primme->correctionParams.convTest == primme_adaptive) { /* --------------------------------------------------------*/ /* Adaptive stopping based on dynamic monitoring of eResid */ /* --------------------------------------------------------*/ /* Update the Ritz value and eigenresidual using the */ /* following recurrences. */ zd_mult_primme(Delta, Delta_prev, gamma); zz_mult_primme(ztmp, eta, rho_prev); z_add_primme(Delta, Delta, ztmp); z_sub_primme(Beta, Beta_prev, Delta); Phi = gamma*gamma*Phi_prev + z_abs_primme(eta)*z_abs_primme(eta)*sigma_prev; zd_mult_primme(Psi, Psi_prev, gamma); {ztmp.r = gamma*Phi_prev; ztmp.i = 0.0L;} z_add_primme(Psi, Psi, ztmp); Gamma = Gamma_prev + 2.0L*Psi.r + Phi; /* Perform the update: update the eigenvalue and the square of the */ /* residual norm. */ ztmp = dist_dot(sol, 1, sol, 1, primme); dot_sol = ztmp.r; eval_updated = shift + (eval - shift + 2*Beta.r + Gamma)/(1 + dot_sol); eres2_updated = (tau*tau)/(1 + dot_sol) + ((eval - shift)*(eval - shift) + z_abs_primme(Beta)*z_abs_primme(Beta) + 2.0L*(eval - shift)*Beta.r)/(1 + dot_sol) - (eval_updated - shift)*(eval_updated - shift); /* If numerical problems, let eres about the same as tau */ if (eres2_updated < 0){ eres_updated = sqrt( (tau*tau)/(1 + dot_sol) ); } else eres_updated = sqrt(eres2_updated); /* --------------------------------------------------------*/ /* Stopping criteria */ /* --------------------------------------------------------*/ R = max(0.9878, sqrt(tau/tau_prev))*sqrt(1+dot_sol); if ( tau <= R*eres_updated || eres_updated <= tau*R ) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, " tau < R eres \n"); } break; } if (primme->target == primme_smallest && eval_updated > eval_prev) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "eval_updated > eval_prev\n"); } break; } else if (primme->target == primme_largest && eval_updated < eval_prev){ if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "eval_updated < eval_prev\n"); } break; } if (eres_updated < ETolerance) { // tau < LTol has been checked if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "eres < eresTol %e \n",eres_updated); } break; } eval_prev = eval_updated; if (primme->printLevel >= 4 && primme->procID == 0) { fprintf(primme->outputFile, "INN MV %d Sec %e Eval %e Lin|r| %.3e EV|r| %.3e\n", primme->stats. numMatvecs, primme_wTimer(0), eval_updated, tau, eres_updated); fflush(primme->outputFile); } /* --------------------------------------------------------*/ } /* End of if adaptive JDQMR section */ /* --------------------------------------------------------*/ else if (primme->printLevel >= 4 && primme->procID == 0) { // Report for non adaptive inner iterations fprintf(primme->outputFile, "INN MV %d Sec %e Lin|r| %e\n", primme->stats.numMatvecs, primme_wTimer(0),tau); fflush(primme->outputFile); } if (numIts < maxIterations) { ret = apply_projected_preconditioner(g, evecs, RprojectorQ, x, RprojectorX, sizeRprojectorQ, sizeRprojectorX, xKinvx, UDU, ipivot, w, workSpace, primme); if (ret != 0) { primme_PushErrorMessage(Primme_inner_solve, Primme_apply_projected_preconditioner, ret, __FILE__, __LINE__, primme); ret = APPLYPROJECTEDPRECONDITIONER_FAILURE; break; } rho = dist_dot(g, 1, w, 1, primme); z_div_primme(&beta, &rho, &rho_prev); Num_scal_zprimme(primme->nLocal, beta, d, 1); Num_axpy_zprimme(primme->nLocal, tpone, w, 1, d, 1); rho_prev = rho; tau_prev = tau; Theta_prev = Theta; Delta_prev = Delta; Beta_prev = Beta; Phi_prev = Phi; Psi_prev = Psi; Gamma_prev = Gamma; } /* --------------------------------------------------------*/ } /* End of QMR main while loop */ /* --------------------------------------------------------*/ *rnorm = eres_updated; return 0; }
static int combine_retained_vectors(double *hVals, int *flags, Complex_Z *hVecs, int basisSize, int *restartSize, int numPacked, Complex_Z *previousHVecs, int *numPrevRetained, double machEps, Complex_Z *rwork, primme_params *primme) { int i; /* Loop variable */ int indexOfPreviousVecs; /* The index within hVecs where the previous */ /* coefficients are inserted. */ if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "combine: basisSize: %d restartSize: %d numPrevRetained: %d numPacked: %d \n", basisSize, *restartSize, *numPrevRetained, numPacked); } /* ------------------------------------------------------------------ */ /* Orthogonalize the coefficents from the previous iteration with the */ /* current coefficients. This may annihilate some of the previous */ /* vectors, if the have much overlap with the current vectors. Thus, */ /* numPrevRetained may be reduced. */ /* ------------------------------------------------------------------ */ *numPrevRetained = ortho_retained_vectors_zprimme(hVecs, basisSize, *restartSize, previousHVecs, *numPrevRetained, machEps, rwork); /* --------------------------------------------------------------------- */ /* If locking is engaged and there exist retained previous coefficent */ /* vectors, then move the last numPacked vectors forward numPrevRetained */ /* spaces. This will make room for the orthogonalized previous vectors. */ /* --------------------------------------------------------------------- */ if (primme->locking && *numPrevRetained > 0) { indexOfPreviousVecs = *restartSize - numPacked; /* WARNING: dcopy's -1 step is not implemented appropriately in Mac's * Veclib (and other libs). Perform vector by vector instead. Num_zcopy_zprimme(basisSize*numPacked, &hVecs[basisSize*(*restartSize-numPacked)], -1, &hVecs[basisSize*(*restartSize-numPacked+*numPrevRetained)], -1); */ for (i=1;i<=numPacked;i++) Num_zcopy_zprimme(basisSize, &hVecs[basisSize*(*restartSize-i)],1, &hVecs[basisSize*(*restartSize+*numPrevRetained-i)],1); /* Move the Ritz values and flag values forward as well */ for (i = numPacked-1; i >= 0; i--) { hVals[*restartSize-numPacked+*numPrevRetained+i] = hVals[*restartSize-numPacked+i]; flags[*restartSize-numPacked+*numPrevRetained+i] = flags[*restartSize-numPacked+i]; } } else { indexOfPreviousVecs = *restartSize; } /* ----------------------------------------------------------------*/ /* Copy the orthogonalized previous coefficents to the hVecs array */ /* ----------------------------------------------------------------*/ Num_zcopy_zprimme(basisSize*(*numPrevRetained), previousHVecs, 1, &hVecs[basisSize*indexOfPreviousVecs], 1); /* Initialize the Ritz and flag values */ for (i = 0; i < *numPrevRetained; i++) { hVals[indexOfPreviousVecs+i] = 0.0L; flags[indexOfPreviousVecs+i] = UNCONVERGED; } if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "numPrevRetained: %d restartSize: %d\n", *numPrevRetained, *restartSize); } /* Increase the restart size with the previous vectors added. */ *restartSize = *restartSize + *numPrevRetained; return indexOfPreviousVecs; }
static int dtr(int numLocked, Complex_Z *hVecs, double *hVals, int *flags, int basisSize, int numFree, int *iev, Complex_Z *rwork, primme_params *primme) { int i; /* Loop variable */ int l, lOpt, lMin; /* Determine how many left side vectors to retain */ int r, rOpt; /* Determine how many right side vectors to retain */ int maxIndex; /* basisSize - 1 */ int restartSize; /* The new restart size */ double currentRitzVal; /* The current Ritz value the solver is computing */ double newVal, optVal; /* Used to find the optimum gap ratio */ /* ---------------------------------------------------------------- */ /* Compute lOpt and rOpt with respect to the first Ritz value being */ /* targeted by the block. */ /* ---------------------------------------------------------------- */ currentRitzVal = hVals[iev[0]]; maxIndex = basisSize-1; /* If locking is engaged, then lMin must be large enough to retain */ /* the coefficient vector associated with a converged target. */ /* lMin should be no smaller than primme->minRestartSize. */ if (primme->locking) { lMin = 0; /* Determine the largest index of any converged but unlocked target */ /* Ritz vector. */ for (l = 0; l < basisSize; l++) { if ( (flags[l] == CONVERGED || flags[l] == PRACTICALLY_CONVERGED) && (numLocked + l < primme->numEvals)) { lMin = l; } } lMin = max(lMin, min(basisSize, primme->minRestartSize)); } else { lMin = min(basisSize, primme->minRestartSize); } lOpt = lMin; rOpt = 0; optVal = 0.0L; if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"DTR basisSize: %d\n", basisSize); } /* ---------------------------------------------------------------------- */ /* Compute lOpt and rOpt that maximize the function. */ /* maximize the function (basisSize-numFree-lMin-rMin)* */ /* sqrt((currentRitzVal - hVals[l+1])/ */ /* (hVals[l+1]-hVals[basisSize-1-r])) */ /* ---------------------------------------------------------------------- */ for (l = lMin; l < basisSize - numFree; l++) { for (r = 0; r < basisSize - l - numFree; r++) { if ((basisSize - l - r) % primme->maxBlockSize == 0) { newVal = (basisSize - l - r) * sqrt((currentRitzVal - hVals[l+1])/ (hVals[l+1]-hVals[maxIndex-r])); if (newVal > optVal) { optVal = newVal; lOpt = l; rOpt = r; } } } } restartSize = lOpt + rOpt; /* --------------------------------------------------------------- */ /* Swap the rOpt vectors from the right hand side so that they are */ /* contiguous with the vectors from the left hand side. */ /* --------------------------------------------------------------- */ i = basisSize - restartSize; Num_zcopy_zprimme(i*basisSize, &hVecs[basisSize*lOpt], 1, rwork, 1); Num_zcopy_zprimme(rOpt*basisSize, &hVecs[basisSize*(basisSize-rOpt)], 1, &hVecs[basisSize*lOpt], 1); Num_zcopy_zprimme(i*basisSize, rwork, 1, &hVecs[basisSize*restartSize], 1); /* Do the same with the eigenvalues of H */ Num_dcopy_primme(i, &hVals[lOpt], 1, (double *) rwork, 1); Num_dcopy_primme(rOpt, &hVals[(basisSize-rOpt)], 1, &hVals[lOpt], 1); Num_dcopy_primme(i, (double *) rwork, 1, &hVals[restartSize], 1); /* Set only those flags lower than restartSize. The rest will be reset */ for (i = 0; i < rOpt; i++) { flags[lOpt + i] = flags[basisSize-rOpt + i]; } if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"DTR restart size: %d L: %d R: %d\n", restartSize, lOpt, rOpt); } reset_flags_zprimme(flags, restartSize, primme->maxBasisSize); return restartSize; }
int restart_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *H, Complex_Z *hVecs, double *hVals, int *flags, int *iev, Complex_Z *evecs, Complex_Z *evecsHat, Complex_Z *M, Complex_Z *UDU, int *ipivot, int basisSize, int numConverged, int *numConvergedStored, int numLocked, int numGuesses, Complex_Z *previousHVecs, int numPrevRetained, double machEps, Complex_Z *rwork, int rworkSize, primme_params *primme) { int numFree; /* The number of basis vectors to be left free */ int numPacked; /* The number of coefficient vectors moved to the */ /* end of the hVecs array. */ int restartSize; /* The number of vectors to restart with */ int indexOfPreviousVecs=0; /* Position within hVecs array the previous */ /* coefficient vectors will be stored */ int i, n, eStart; /* various variables */ int ret; /* Return value */ numPacked = 0; /* --------------------------------------------------------------------- */ /* If dynamic thick restarting is to be used, then determine the minimum */ /* number of free spaces to be maintained and call the DTR routine. */ /* The DTR routine will determine how many coefficient vectors from the */ /* left and right of H-spectrum to retain at restart. If DTR is not used */ /* then set the restart size to the minimum restart size. */ /* --------------------------------------------------------------------- */ if (primme->restartingParams.scheme == primme_dtr) { numFree = numPrevRetained+max(3, primme->maxBlockSize); restartSize = dtr(numLocked, hVecs, hVals, flags, basisSize, numFree, iev, rwork, primme); } else { restartSize = min(basisSize, primme->minRestartSize); } /* ----------------------------------------------------------------------- */ /* If locking is engaged, then swap coefficient vectors corresponding to */ /* converged Ritz vectors to the end of the hVecs(:, restartSize) subarray.*/ /* This allows the converged Ritz vectors to be stored contiguously in */ /* memory after restart. This significantly reduces the amount of data */ /* movement the locking routine would have to perform otherwise. */ /* The following function also covers some limit cases where restartSize */ /* plus 'to be locked' and previous Ritz vectors may exceed the basisSize */ /* ----------------------------------------------------------------------- */ if (primme->locking) { numPacked = pack_converged_coefficients(&restartSize, basisSize, &numPrevRetained, numLocked, numGuesses, hVecs, hVals, flags, primme); } /* ----------------------------------------------------------------------- */ /* Restarting with a small number of coefficient vectors from the previous */ /* iteration can be retained to accelerate convergence. The previous */ /* coefficient vectors must be combined with the current coefficient */ /* vectors by first orthogonalizing the previous ones versus the current */ /* restartSize ones. The orthogonalized previous vectors are then */ /* inserted into the hVecs array at hVecs(:,indexOfPreviousVecs). */ /* ----------------------------------------------------------------------- */ if (numPrevRetained > 0) { indexOfPreviousVecs = combine_retained_vectors(hVals, flags, hVecs, basisSize, &restartSize, numPacked, previousHVecs, &numPrevRetained, machEps, rwork, primme); } /* -------------------------------------------------------- */ /* Restart V by replacing it with the current Ritz vectors. */ /* -------------------------------------------------------- */ restart_X(V, hVecs, primme->nLocal, basisSize, restartSize, rwork,rworkSize); /* ------------------------------------------------------------ */ /* Restart W by replacing it with W times the eigenvectors of H */ /* ------------------------------------------------------------ */ restart_X(W, hVecs, primme->nLocal, basisSize, restartSize, rwork,rworkSize); /* ---------------------------------------------------------------- */ /* Because we have replaced V by the Ritz vectors, V'*A*V should be */ /* diagonal with the Ritz values on the diagonal. The eigenvectors */ /* of the new matrix V'*A*V become the standard basis vectors. */ /* ---------------------------------------------------------------- */ ret = restart_H(H, hVecs, hVals, restartSize, basisSize, previousHVecs, numPrevRetained, indexOfPreviousVecs, rworkSize, rwork, primme); if (ret != 0) { primme_PushErrorMessage(Primme_restart, Primme_restart_h, ret, __FILE__, __LINE__, primme); return RESTART_H_FAILURE; } /* --------------------------------------------------------------------- */ /* If the user requires (I-QQ') projectors in JDQMR without locking, */ /* the converged eigenvectors are copied temporarily to evecs. There */ /* they stay locked for use in (I-QQ') and (I-K^{-1}Q () Q') projectors.*/ /* NOTE THIS IS NOT LOCKING! The Ritz vectors remain in the basis, and */ /* they will overwrite evecs at the end. */ /* We recommend against this type of usage. It's better to use locking. */ /* --------------------------------------------------------------------- */ /* Andreas NOTE: is done inefficiently for the moment. We should only */ /* add the recently converged. But we need to differentiate them */ /* from flags... */ if (!primme->locking && primme->correctionParams.maxInnerIterations != 0 && numConverged > 0 && (primme->correctionParams.projectors.LeftQ || primme->correctionParams.projectors.RightQ ) ) { n = primme->nLocal; *numConvergedStored = 0; eStart = primme->numOrthoConst; for (i=0;i<primme->numEvals;i++) { if (flags[i] == CONVERGED) { if (*numConvergedStored < numConverged) { Num_zcopy_zprimme(n, &V[i*n], 1, &evecs[(eStart+*numConvergedStored)*n], 1); (*numConvergedStored)++; } } /* if converged */ } /* for */ if (*numConvergedStored != numConverged) { if (primme->printLevel >= 1 && primme->procID == 0) { fprintf(primme->outputFile, "Flags and converged eigenpairs do not correspond %d %d\n", numConverged, *numConvergedStored); } return PSEUDOLOCK_FAILURE; } /* Update also the M = K^{-1}evecs and its udu factorization if needed */ if (UDU != NULL) { apply_preconditioner_block(&evecs[eStart*n], &evecsHat[eStart*n], numConverged, primme ); /* rwork must be maxEvecsSize*numEvals! */ update_projection_zprimme(evecs, evecsHat, M, eStart*n, primme->numOrthoConst+primme->numEvals, numConverged, rwork, primme); ret = UDUDecompose_zprimme(M, UDU, ipivot, eStart+numConverged, rwork, rworkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_lock_vectors,Primme_ududecompose,ret, __FILE__, __LINE__, primme); return UDUDECOMPOSE_FAILURE; } } /* if UDU factorization is needed */ } /* if this pseudo locking should take place */ return restartSize; }
int check_convergence_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *hVecs, double *hVals, int *flags, int basisSize, int *iev, int *ievMax, double *blockNorms, int *blockSize, int numConverged, int numLocked, Complex_Z *evecs, double tol, double maxConvTol, double aNormEstimate, Complex_Z *rwork, primme_params *primme) { int i; /* Loop variable */ int left, right; /* Range of block vectors to be checked for convergence */ int start; /* starting index in block of converged/tobeProject vecs*/ int numVacancies; /* Number of vacant positions between left and right */ int recentlyConverged; /* The number of Ritz values declared converged */ /* since the last iteration */ int numToProject; /* Number of vectors with potential accuracy problem*/ double attainableTol; /* Used in locking to check near convergence problem*/ /* -------------------------------------------- */ /* Tolerance based on our dynamic norm estimate */ /* -------------------------------------------- */ if (primme->aNorm <= 0.0L) { tol = tol * aNormEstimate; } /* ---------------------------------------------------------------------- */ /* If locking, set tol beyond which we need to check for accuracy problem */ /* ---------------------------------------------------------------------- */ if (primme->locking) { attainableTol = sqrt(primme->numOrthoConst+numLocked)*maxConvTol; } /* --------------------------------------------------------------- */ /* Compute each Ritz vector and its corresponding residual vector. */ /* The Ritz vector and residual are stored temporarily in V and W */ /* respectively. For each Ritz vector, determine if it has */ /* converged. If it has, try to replace it with one that hasn't. */ /* --------------------------------------------------------------- */ recentlyConverged = 0; left = 0; right = *blockSize - 1; numVacancies = 1; while (numVacancies > 0 && (numConverged + recentlyConverged) < primme->numEvals) { /* Consider the newly added vectors in the block and reset counters */ numVacancies = 0; numToProject = 0; /* Copy needed hvecs into the front of the work array. */ for (i=left; i <= right; i++) { Num_zcopy_zprimme(basisSize, &hVecs[basisSize*iev[i]], 1, &rwork[basisSize*(i-left)], 1); } /* ----------------------------------------------------------------- */ /* Compute the Ritz vectors, residuals, and norms for the next */ /* blockSize unconverged Ritz vectors. The Ritz vectors will be */ /* placed from V(0,lft) to V(0,rgt) and the residual vectors from */ /* W(0,lft) to W(0,rgt). */ /* ----------------------------------------------------------------- */ /* rwork must be maxBasisSize*maxBlockSize + maxBlockSize in size, */ /* maxBasisSize*maxBlockSize holds selected hVecs to facilitate */ /* blocking, and maxBlockSize to hold the residual norms */ /* ----------------------------------------------------------------- */ compute_resnorms(V, W, rwork, hVals, basisSize, blockNorms, iev, left, right, &rwork[basisSize*(right-left+1)], primme); if (primme->stats.numOuterIterations % 1 == 0 && 1) { print_residuals(hVals, blockNorms, numConverged, numLocked, iev, left, right, primme); } /* ----------------------------------------------------------------- */ /* Determine which Ritz vectors have converged < tol and flag them. */ /* ----------------------------------------------------------------- */ for (i=left; i <= right; i++) { /* ------------------------------------*/ /* If the vector is converged, flag it */ /* ------------------------------------*/ if (blockNorms[i] < tol) { flags[iev[i]] = CONVERGED; numVacancies++; if ((!primme->locking && iev[i] < primme->numEvals) || (primme->locking && ((numLocked + iev[i]) < primme->numEvals))) { recentlyConverged++; if (!primme->locking && primme->procID == 0 && primme->printLevel >= 2) { fprintf(primme->outputFile, "#Converged %d eval[ %d ]= %e norm %e Mvecs %d Time %g\n", numConverged+recentlyConverged, iev[i], hVals[iev[i]], blockNorms[i], primme->stats.numMatvecs,primme_wTimer(0)); fflush(primme->outputFile); } // printf } //if } //if converged /* ---------------------------------------------------------------- */ /* If locking there may be an accuracy problem close to convergence */ /* Check if there is danger and set these Ritz vecs for projection */ /* ---------------------------------------------------------------- */ else if (primme->locking && numLocked > 0 && blockNorms[i] < attainableTol ) { flags[iev[i]] = TO_BE_PROJECTED; numToProject++; } } //for /* ---------------------------------------------------------------- */ /* If some of the Ritz vectors in the block have converged, or need */ /* to be projected against evecs, move those flagged Ritz vectors */ /* and residuals towards the end of the block [left,right]. Also */ /* swap iev, and blockNorms for the targeted block. */ /* ---------------------------------------------------------------- */ if (numVacancies > 0 || numToProject > 0) { swap_UnconvVecs(V, W, primme->nLocal, basisSize, iev, flags, blockNorms, primme->numOrthoConst + numLocked, *blockSize, left); } /* --------------------------------------------------------------- */ /* Project the TO_BE_PROJECTED residuals and check for practical */ /* convergence among them. Those practically converged evecs are */ /* swapped just before the converged ones at the end of the block. */ /* numVacancies and recentlyConverged are also updated */ /* --------------------------------------------------------------- */ if (numToProject > 0) { start = *blockSize - numVacancies - numToProject; check_practical_convergence(V, W, evecs, numLocked, basisSize, *blockSize, start, numToProject, iev, flags, blockNorms, tol, &recentlyConverged, &numVacancies, rwork, primme); } /* ---------------------------------------------------------------- */ /* Replace the vacancies, with as many unconverged vectors beyond */ /* ievMax as possible. If not enough are available reduce blockSize */ /* ---------------------------------------------------------------- */ if (numVacancies > 0) { replace_vectors(iev, flags, *blockSize, basisSize, numVacancies, &left, &right, ievMax); numVacancies = right - left + 1; *blockSize = left + numVacancies; } } // while there are vacancies return recentlyConverged; }
static void swap_UnconvVecs(Complex_Z *V, Complex_Z *W, int nLocal, int basisSize, int *iev, int *flags, double *blockNorms, int dimEvecs, int blockSize, int left) { int right; /* holds the right swapping position */ int temp; /* used to swap integers */ double dtemp; /* used to swap doubles */ /* Search from left to right within the block looking for flagged */ /* Ritz vectors. If a flagged one is found, find also an unconverged */ /* Ritz vector from the right side of the block. If the flagged was */ /* converged, replace it with the unconverged, otherwise if it was */ /* was flagged TO_BE_PROJECTED, swap it with the unconverged vector. */ while (left < blockSize) { /* If block vector left is unconverged, move left one vector. */ if (flags[iev[left]] == UNCONVERGED) { left++; } else { /* If block vector left is converged, find an unconverged */ /* vector somewhere between left+1 and blockSize. */ right = left + 1; /* If the end of the block has been reached, there are */ /* no more unconverged vectors left. If not, keep */ /* searching right for another unconverged vector to */ /* perform the replacement. */ while (right < blockSize && flags[iev[right]] != UNCONVERGED) { right++; } /* No unconverged block vector could be found */ if (right == blockSize) { return; } /* An unconverged Ritz vector was found and should */ /* replace or be swapped with block vector left. */ if (flags[iev[left]] != TO_BE_PROJECTED) { /* replace */ Num_zcopy_zprimme(nLocal, &V[nLocal*(basisSize+right)], 1, &V[nLocal*(basisSize+left)], 1); Num_zcopy_zprimme(nLocal, &W[nLocal*(basisSize+right)], 1, &W[nLocal*(basisSize+left)], 1); temp = iev[left]; iev[left] = iev[right]; iev[right] = temp; blockNorms[left] = blockNorms[right]; } else { /* swap */ Num_swap_zprimme(nLocal, &V[nLocal*(basisSize+left)], 1, &V[nLocal*(basisSize+right)], 1); Num_swap_zprimme(nLocal, &W[nLocal*(basisSize+left)], 1, &W[nLocal*(basisSize+right)], 1); temp = iev[left]; iev[left] = iev[right]; iev[right] = temp; dtemp = blockNorms[left]; blockNorms[left] = blockNorms[right]; blockNorms[right] = dtemp; } /* end of swaps */ left++; } // looking for replacement } //while left < blockSize }
static int init_block_krylov(Complex_Z *V, Complex_Z *W, int dv1, int dv2, Complex_Z *locked, int numLocked, double machEps, Complex_Z *rwork, int rworkSize, primme_params *primme) { int i; /* Loop variables */ int numNewVectors; /* Number of vectors to be generated */ int ret; /* Return code. */ int ONE = 1; /* Used for passing it by reference in matrixmatvec */ numNewVectors = dv2 - dv1 + 1; /*----------------------------------------------------------------------*/ /* Generate a single Krylov space if there are only a few vectors to be */ /* generated, else generate a block Krylov space with */ /* primme->maxBlockSize as the block Size. */ /*----------------------------------------------------------------------*/ if (numNewVectors <= primme->maxBlockSize) { /* Create and orthogonalize the inital vectors */ Num_larnv_zprimme(2, primme->iseed,primme->nLocal,&V[primme->nLocal*dv1]); ret = ortho_zprimme(V, primme->nLocal, dv1, dv1, locked, primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } /* Generate the remainder of the Krylov space. */ for (i = dv1; i < dv2; i++) { (*primme->matrixMatvec) (&V[primme->nLocal*i], &V[primme->nLocal*(i+1)], &ONE, primme); Num_zcopy_zprimme(primme->nLocal, &V[primme->nLocal*(i+1)], 1, &W[primme->nLocal*i], 1); ret = ortho_zprimme(V, primme->nLocal, i+1, i+1, locked, primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } } primme->stats.numMatvecs += dv2-dv1; update_W_zprimme(V, W, dv2, 1, primme); } else { /*----------------------------------------------------------------------*/ /* Generate the initial vectors. */ /*----------------------------------------------------------------------*/ Num_larnv_zprimme(2, primme->iseed, primme->nLocal*primme->maxBlockSize, &V[primme->nLocal*dv1]); ret = ortho_zprimme(V, primme->nLocal, dv1, dv1+primme->maxBlockSize-1, locked, primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); /* Generate the remaining vectors in the sequence */ for (i = dv1+primme->maxBlockSize; i <= dv2; i++) { (*primme->matrixMatvec)(&V[primme->nLocal*(i-primme->maxBlockSize)], &V[primme->nLocal*i], &ONE, primme); Num_zcopy_zprimme(primme->nLocal, &V[primme->nLocal*i], 1, &W[primme->nLocal*(i-primme->maxBlockSize)], 1); ret = ortho_zprimme(V, primme->nLocal, i, i, locked, primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } } primme->stats.numMatvecs += dv2-(dv1+primme->maxBlockSize)+1; update_W_zprimme(V, W, dv2-primme->maxBlockSize+1, primme->maxBlockSize, primme); } return 0; }
int init_basis_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *evecs, Complex_Z *evecsHat, Complex_Z *M, Complex_Z *UDU, int *ipivot, double machEps, Complex_Z *rwork, int rworkSize, int *basisSize, int *nextGuess, int *numGuesses, double *timeForMV, primme_params *primme) { int ret; /* Return value */ int currentSize; /*-----------------------------------------------------------------------*/ /* Orthogonalize the orthogonalization constraints provided by the user. */ /* If a preconditioner is given and inner iterations are to be */ /* performed, then initialize M. */ /*-----------------------------------------------------------------------*/ if (primme->numOrthoConst > 0) { ret = ortho_zprimme(evecs, primme->nLocal, 0, primme->numOrthoConst - 1, NULL, 0, 0, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } /* Initialize evecsHat, M, and its factorization UDU,ipivot. This */ /* allows the orthogonalization constraints to be included in the */ /* projector (I-QQ'). Only needed if there is preconditioning, and */ /* JDqmr inner iterations with a right, skew projector. Only in */ /* that case, is UDU not NULL */ if (UDU != NULL) { (*primme->applyPreconditioner) (evecs, evecsHat, &primme->numOrthoConst, primme); primme->stats.numPreconds += primme->numOrthoConst; update_projection_zprimme(evecs, evecsHat, M, 0, primme->numOrthoConst+primme->numEvals, primme->numOrthoConst, rwork, primme); ret = UDUDecompose_zprimme(M, UDU, ipivot, primme->numOrthoConst, rwork, rworkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ududecompose, ret, __FILE__, __LINE__, primme); return UDUDECOMPOSE_FAILURE; } } /* if evecsHat and M=evecs'evecsHat, UDU are needed */ } /* if numOrthoCont >0 */ /*-----------------------------------------------------------------------*/ /* No locking */ /*-----------------------------------------------------------------------*/ if (!primme->locking) { /* Handle case when no initial guesses are provided by the user */ if (primme->initSize == 0) { ret = init_block_krylov(V, W, 0, primme->minRestartSize - 1, evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov, ret, __FILE__, __LINE__, primme); return INIT_BLOCK_KRYLOV_FAILURE; } *basisSize = primme->minRestartSize; } else { /* Handle case when some or all initial guesses are provided by */ /* the user */ /* Copy over the initial guesses provided by the user */ Num_zcopy_zprimme(primme->nLocal*primme->initSize, &evecs[primme->numOrthoConst*primme->nLocal], 1, V, 1); /* Orthonormalize the guesses provided by the user */ ret = ortho_zprimme(V, primme->nLocal, 0, primme->initSize-1, evecs, primme->nLocal, primme->numOrthoConst, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } update_W_zprimme(V, W, 0, primme->initSize, primme); /* An insufficient number of initial guesses were provided by */ /* the user. Generate a block Krylov space to fill the */ /* remaining vacancies. */ if (primme->initSize < primme->minRestartSize) { ret = init_block_krylov(V, W, primme->initSize, primme->minRestartSize - 1, evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov, ret, __FILE__, __LINE__, primme); return INIT_KRYLOV_FAILURE; } *basisSize = primme->minRestartSize; } else { *basisSize = primme->initSize; } } *numGuesses = 0; *nextGuess = 0; } else { /*-----------------------------------------------------------------------*/ /* Locking */ /*-----------------------------------------------------------------------*/ *numGuesses = primme->initSize; *nextGuess = primme->numOrthoConst; /* If some initial guesses are available, copy them to the basis */ /* and orthogonalize them against themselves and the orthogonalization */ /* constraints. */ if (primme->initSize > 0) { currentSize = min(primme->initSize, primme->minRestartSize); Num_zcopy_zprimme(primme->nLocal*currentSize, &evecs[primme->numOrthoConst*primme->nLocal], 1, V, 1); ret = ortho_zprimme(V, primme->nLocal, 0, currentSize-1, evecs, primme->nLocal, primme->numOrthoConst, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } update_W_zprimme(V, W, 0, currentSize, primme); *numGuesses = *numGuesses - currentSize; *nextGuess = *nextGuess + currentSize; } else { currentSize = 0; } /* If an insufficient number of guesses was provided, then fill */ /* the remaining vacancies with a block Krylov space. */ if (currentSize < primme->minRestartSize) { ret = init_block_krylov(V, W, currentSize, primme->minRestartSize - 1, evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov, ret, __FILE__, __LINE__, primme); return INIT_BLOCK_KRYLOV_FAILURE; } } *basisSize = primme->minRestartSize; } /* ----------------------------------------------------------- */ /* If time measurements are needed, waste one MV + one Precond */ /* Put dummy results in the first open space of W (currentSize)*/ /* ----------------------------------------------------------- */ if (primme->dynamicMethodSwitch) { currentSize = primme->nLocal*(*basisSize); ret = 1; *timeForMV = primme_wTimer(0); (*primme->matrixMatvec)(V, &W[currentSize], &ret, primme); *timeForMV = primme_wTimer(0) - *timeForMV; primme->stats.numMatvecs += 1; } return 0; }