int solve_correction_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *evecs, Complex_Z *evecsHat, Complex_Z *UDU, int *ipivot, double *lockedEvals, int numLocked, int numConvergedStored, double *ritzVals, double *prevRitzVals, int *flags, int basisSize, double *blockNorms, int *iev, int blockSize, double eresTol, double machEps, double aNormEstimate, Complex_Z *rwork, int *iwork, int rworkSize, primme_params *primme) { int blockIndex; /* Loop index. Ranges from 0..blockSize-1. */ int ritzIndex; /* Ritz value index blockIndex corresponds to. */ /* Possible values range from 0..basisSize-1. */ int sortedIndex; /* Ritz value index in sortedRitzVals, blockIndex */ /* corresponds to. Range 0..numLocked+basisSize-1 */ int neededRsize; /* Needed size for rwork. If not enough return */ int linSolverRWorkSize; /* Size of the linSolverRWork array. */ int *ilev; /* Array of size blockSize. Maps the target Ritz */ /* values to their positions in the sortedEvals */ /* array. */ int sizeLprojector; /* Sizes of the various left/right projectors */ int sizeRprojectorQ; /* These will be 0/1/or numOrthConstr+numLocked */ int sizeRprojectorX; /* or numOrthConstr+numConvergedStored w/o locking*/ static int numPrevRitzVals = 0; /* Size of prevRitzVals */ int ret; /* Return code. */ Complex_Z *r, *x, *sol; /* Residual, Ritz vector, and correction. */ Complex_Z *linSolverRWork;/* Workspace needed by linear solver. */ double *sortedRitzVals; /* Sorted array of current and converged Ritz */ /* values. Size of array is numLocked+basisSize. */ double *blockOfShifts; /* Shifts for (A-shiftI) or (if needed) (K-shiftI)*/ double *approxOlsenEps; /* Shifts for approximate Olsen implementation */ Complex_Z *Kinvx; /* Workspace to store K^{-1}x */ Complex_Z *Lprojector; /* Q pointer for (I-Q*Q'). Usually points to evecs*/ Complex_Z *RprojectorQ; /* May point to evecs/evecsHat depending on skewQ */ Complex_Z *RprojectorX; /* May point to x/Kinvx depending on skewX */ Complex_Z xKinvx; /* Stores x'*K^{-1}x if needed */ double eval, shift, robustShift; /* robust shift values. */ Complex_Z tmpShift; /* Temp shift for daxpy */ //------------------------------------------------------------ // Subdivide the workspace with pointers, and figure out // the total amount of needed real workspace (neededRsize) //------------------------------------------------------------ /* needed worksize */ neededRsize = 0; Kinvx = rwork; /* Kinvx will have nonzero size if precond and both RightX and SkewX */ if (primme->correctionParams.precondition && primme->correctionParams.projectors.RightX && primme->correctionParams.projectors.SkewX ) { /* OLSEN's method requires a block, but JDQMR is vector by vector */ if (primme->correctionParams.maxInnerIterations == 0) { sol = Kinvx + primme->nLocal*blockSize; neededRsize = neededRsize + primme->nLocal*blockSize; } else { sol = Kinvx + primme->nLocal; neededRsize = neededRsize + primme->nLocal; } } else { sol = Kinvx + 0; } if (primme->correctionParams.maxInnerIterations == 0) { linSolverRWork = sol + 0; /* sol not needed for GD */ linSolverRWorkSize = 0; /* No inner solver used */ } else { linSolverRWork = sol + primme->nLocal; /* sol needed in innerJD */ neededRsize = neededRsize + primme->nLocal; linSolverRWorkSize = /* Inner solver worksize */ 4*primme->nLocal + 2*(primme->numOrthoConst+primme->numEvals); neededRsize = neededRsize + linSolverRWorkSize; } sortedRitzVals = (double *)(linSolverRWork + linSolverRWorkSize); blockOfShifts = sortedRitzVals + (numLocked+basisSize); approxOlsenEps = blockOfShifts + blockSize; neededRsize = neededRsize + numLocked+basisSize + 2*blockSize; if (neededRsize > rworkSize) { return(neededRsize); } // Subdivide also the integer work space ilev = iwork; // of size blockSize //------------------------------------------------------------ // Figuring out preconditioning shifts (robust, Olsen, etc) //------------------------------------------------------------ /* blockOfShifts will contain the preconditioning shifts: */ /* either Ritz values or robustShifts computed below. These shifts */ /* will be used in the correction equations or in inverting (K-sigma I) */ /* approxOlsenEps will contain error approximations for eigenavalues */ /* to be used for Olsen's method (when innerIterations =0). */ if (primme->locking) { /* Combine the sorted list of locked Ritz values with the sorted */ /* list of current Ritz values, ritzVals. The merging of the two */ /* lists lockedEvals and ritzVals is stored in sortedRitzVals. */ mergeSort(lockedEvals, numLocked, ritzVals, flags, basisSize, sortedRitzVals, ilev, blockSize, primme); } else { /* Then the sorted evals are simply the ritzVals, targeted as iev */ sortedRitzVals = ritzVals; ilev = iev; } /*-----------------------------------------------------------------*/ /* For interior eigenpairs, use the user provided shifts */ /*-----------------------------------------------------------------*/ if (primme->target != primme_smallest && primme->target != primme_largest) { for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { sortedIndex = ilev[blockIndex]; blockOfShifts[blockIndex] = primme->targetShifts[ min(primme->numTargetShifts-1, sortedIndex) ]; if (sortedIndex < numPrevRitzVals) { approxOlsenEps[blockIndex] = fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]); } else { approxOlsenEps[blockIndex] = blockNorms[blockIndex]; } } /* for loop */ } /* user provided shifts */ else { /*-----------------------------------------------------------------*/ /* else it is primme_smallest or primme_largest */ /*-----------------------------------------------------------------*/ if (primme->correctionParams.robustShifts) { /*----------------------------------------------------*/ /* Subtract/add a robust shift from/to the Ritz value */ /*----------------------------------------------------*/ /* Find the robust shift for each block vector */ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { sortedIndex = ilev[blockIndex]; eval = sortedRitzVals[sortedIndex]; robustShift = computeRobustShift(blockIndex, blockNorms[blockIndex], prevRitzVals, numPrevRitzVals, sortedRitzVals, &approxOlsenEps[blockIndex], numLocked+basisSize, ilev, primme); /* Subtract/add the shift if looking for the smallest/largest */ /* eigenvalues, Do not go beyond the previous computed eigval */ if (primme->target == primme_smallest) { blockOfShifts[blockIndex] = eval - robustShift; if (sortedIndex > 0) blockOfShifts[blockIndex] = max(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]); } else { blockOfShifts[blockIndex] = eval + robustShift; if (sortedIndex > 0) blockOfShifts[blockIndex] = min(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]); } // robust shifting } /* for loop */ } /* endif robust shifts */ else { /*--------------------------------------------------------------*/ /* Otherwise, the shifts for both preconditioner and correction */ /* equation should be just the Ritz values. For Olsen's method, */ /* the shifts for r-eps*x, are chosen as the difference in Ritz */ /* value between successive iterations. */ /*--------------------------------------------------------------*/ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { ritzIndex = iev[blockIndex]; sortedIndex = ilev[blockIndex]; blockOfShifts[blockIndex] = ritzVals[ritzIndex]; if (sortedIndex < numPrevRitzVals) { approxOlsenEps[blockIndex] = fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]); } else { approxOlsenEps[blockIndex] = blockNorms[blockIndex]; } } /* for loop */ } /* else no robust shifts */ } /* else primme_smallest or primme_largest */ /* Remember the previous ritz values*/ numPrevRitzVals = numLocked+basisSize; Num_dcopy_primme(numPrevRitzVals, sortedRitzVals, 1, prevRitzVals, 1); // Equip the primme struct with the blockOfShifts, in case the user // wants to precondition (K-sigma_i I)^{-1} with a different shift // for each vector primme->ShiftsForPreconditioner = blockOfShifts; //------------------------------------------------------------ // Generalized Davidson variants -- No inner iterations //------------------------------------------------------------ if (primme->correctionParams.maxInnerIterations == 0) { // This is Generalized Davidson or approximate Olsen's method. // Perform block preconditioning (with or without projections) r = &W[primme->nLocal*basisSize]; // All the block residuals x = &V[primme->nLocal*basisSize]; // All the block Ritz vectors if ( primme->correctionParams.projectors.RightX && primme->correctionParams.projectors.SkewX ) { // Compute exact Olsen's projected preconditioner. This is // expensive and rarely improves anything! Included for completeness. Olsen_preconditioner_block(r, x, blockSize, Kinvx, primme); } else { if ( primme->correctionParams.projectors.RightX ) { // Compute a cheap approximation to OLSENS, where (x'Kinvr)/xKinvx // is approximated by e: Kinvr-e*Kinvx=Kinv(r-e*x)=Kinv(I-ct*x*x')r for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { // Compute r_i = r_i - err_i * x_i {tmpShift.r = -approxOlsenEps[blockIndex]; tmpShift.i = 0.0L;} Num_axpy_zprimme(primme->nLocal, tmpShift, &x[primme->nLocal*blockIndex],1,&r[primme->nLocal*blockIndex],1); } //for } // GD: compute K^{-1}r , or approx.Olsen: K^{-1}(r-ex) apply_preconditioner_block(r, x, blockSize, primme ); } } //------------------------------------------------------------ // JDQMR --- JD inner-outer variants //------------------------------------------------------------ else { // maxInnerIterations > 0 We perform inner-outer JDQMR. /* Solve the correction for each block vector. */ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { r = &W[primme->nLocal*(basisSize+blockIndex)]; x = &V[primme->nLocal*(basisSize+blockIndex)]; /* Set up the left/right/skew projectors for JDQMR. */ /* The pointers Lprojector, Rprojector(Q/X) point to the */ /* appropriate arrays for use in the projection step */ setup_JD_projectors(x, r, evecs, evecsHat, Kinvx, &xKinvx, &Lprojector, &RprojectorQ, &RprojectorX, &sizeLprojector, &sizeRprojectorQ, &sizeRprojectorX, numLocked, numConvergedStored, primme); /* Map the index of the block vector to its corresponding eigenvalue */ /* index, and the shift for the correction equation. Also make the */ /* shift available to primme, in case (K-shift I)^-1 is needed */ ritzIndex = iev[blockIndex]; shift = blockOfShifts[blockIndex]; primme->ShiftsForPreconditioner = &blockOfShifts[blockIndex]; ret = inner_solve_zprimme(x, r, &blockNorms[blockIndex], evecs, evecsHat, UDU, ipivot, &xKinvx, Lprojector, RprojectorQ, RprojectorX, sizeLprojector, sizeRprojectorQ, sizeRprojectorX, sol, ritzVals[ritzIndex], shift, eresTol, aNormEstimate, machEps, linSolverRWork, linSolverRWorkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_solve_correction, Primme_inner_solve, ret, __FILE__, __LINE__, primme); return (INNER_SOLVE_FAILURE); } Num_zcopy_zprimme(primme->nLocal, sol, 1, &V[primme->nLocal*(basisSize+blockIndex)], 1); } // end for each block vector } // JDqmr variants return 0; }
static void Olsen_preconditioner_block(Complex_Z *r, Complex_Z *x, int blockSize, Complex_Z *rwork, primme_params *primme) { int blockIndex, count; Complex_Z alpha; Complex_Z *Kinvx, *xKinvx, *xKinvr, *xKinvx_local, *xKinvr_local; Complex_Z ztmp; Complex_Z tzero = {+0.0e+00,+0.0e00}; //------------------------------------------------------------------ // Subdivide workspace //------------------------------------------------------------------ Kinvx = rwork; xKinvx_local = Kinvx + primme->nLocal*blockSize; xKinvr_local = xKinvx_local + blockSize; xKinvx = xKinvr_local + blockSize; xKinvr = xKinvx + blockSize; //------------------------------------------------------------------ // Compute K^{-1}x for block x. Kinvx memory requirement (blockSize*nLocal) //------------------------------------------------------------------ apply_preconditioner_block(x, Kinvx, blockSize, primme ); //------------------------------------------------------------------ // Compute local x^TK^{-1}x and x^TK^{-1}r = (K^{-1}x)^Tr for each vector //------------------------------------------------------------------ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { xKinvx_local[blockIndex] = Num_dot_zprimme(primme->nLocal, &x[primme->nLocal*blockIndex],1, &Kinvx[primme->nLocal*blockIndex],1); xKinvr_local[blockIndex] = Num_dot_zprimme(primme->nLocal, &Kinvx[primme->nLocal*blockIndex],1, &r[primme->nLocal*blockIndex],1); } count = 4*blockSize; (*primme->globalSumDouble)(xKinvx_local, xKinvx, &count, primme); //------------------------------------------------------------------ // Compute K^{-1}r //------------------------------------------------------------------ apply_preconditioner_block(r, x, blockSize, primme ); //------------------------------------------------------------------ // Compute K^(-1)r - ( xKinvr/xKinvx ) K^(-1)r for each vector //------------------------------------------------------------------ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { if (z_abs_primme(xKinvx[blockIndex]) > 0.0L) { ztmp.r = -xKinvr[blockIndex].r; ztmp.i = -xKinvr[blockIndex].i; z_div_primme(&alpha, &ztmp, &xKinvx[blockIndex]); } else alpha = tzero; Num_axpy_zprimme(primme->nLocal,alpha,&Kinvx[primme->nLocal*blockIndex], 1, &x[primme->nLocal*blockIndex],1); } //for } // of Olsen_preconditiner_block
int restart_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *H, Complex_Z *hVecs, double *hVals, int *flags, int *iev, Complex_Z *evecs, Complex_Z *evecsHat, Complex_Z *M, Complex_Z *UDU, int *ipivot, int basisSize, int numConverged, int *numConvergedStored, int numLocked, int numGuesses, Complex_Z *previousHVecs, int numPrevRetained, double machEps, Complex_Z *rwork, int rworkSize, primme_params *primme) { int numFree; /* The number of basis vectors to be left free */ int numPacked; /* The number of coefficient vectors moved to the */ /* end of the hVecs array. */ int restartSize; /* The number of vectors to restart with */ int indexOfPreviousVecs=0; /* Position within hVecs array the previous */ /* coefficient vectors will be stored */ int i, n, eStart; /* various variables */ int ret; /* Return value */ numPacked = 0; /* --------------------------------------------------------------------- */ /* If dynamic thick restarting is to be used, then determine the minimum */ /* number of free spaces to be maintained and call the DTR routine. */ /* The DTR routine will determine how many coefficient vectors from the */ /* left and right of H-spectrum to retain at restart. If DTR is not used */ /* then set the restart size to the minimum restart size. */ /* --------------------------------------------------------------------- */ if (primme->restartingParams.scheme == primme_dtr) { numFree = numPrevRetained+max(3, primme->maxBlockSize); restartSize = dtr(numLocked, hVecs, hVals, flags, basisSize, numFree, iev, rwork, primme); } else { restartSize = min(basisSize, primme->minRestartSize); } /* ----------------------------------------------------------------------- */ /* If locking is engaged, then swap coefficient vectors corresponding to */ /* converged Ritz vectors to the end of the hVecs(:, restartSize) subarray.*/ /* This allows the converged Ritz vectors to be stored contiguously in */ /* memory after restart. This significantly reduces the amount of data */ /* movement the locking routine would have to perform otherwise. */ /* The following function also covers some limit cases where restartSize */ /* plus 'to be locked' and previous Ritz vectors may exceed the basisSize */ /* ----------------------------------------------------------------------- */ if (primme->locking) { numPacked = pack_converged_coefficients(&restartSize, basisSize, &numPrevRetained, numLocked, numGuesses, hVecs, hVals, flags, primme); } /* ----------------------------------------------------------------------- */ /* Restarting with a small number of coefficient vectors from the previous */ /* iteration can be retained to accelerate convergence. The previous */ /* coefficient vectors must be combined with the current coefficient */ /* vectors by first orthogonalizing the previous ones versus the current */ /* restartSize ones. The orthogonalized previous vectors are then */ /* inserted into the hVecs array at hVecs(:,indexOfPreviousVecs). */ /* ----------------------------------------------------------------------- */ if (numPrevRetained > 0) { indexOfPreviousVecs = combine_retained_vectors(hVals, flags, hVecs, basisSize, &restartSize, numPacked, previousHVecs, &numPrevRetained, machEps, rwork, primme); } /* -------------------------------------------------------- */ /* Restart V by replacing it with the current Ritz vectors. */ /* -------------------------------------------------------- */ restart_X(V, hVecs, primme->nLocal, basisSize, restartSize, rwork,rworkSize); /* ------------------------------------------------------------ */ /* Restart W by replacing it with W times the eigenvectors of H */ /* ------------------------------------------------------------ */ restart_X(W, hVecs, primme->nLocal, basisSize, restartSize, rwork,rworkSize); /* ---------------------------------------------------------------- */ /* Because we have replaced V by the Ritz vectors, V'*A*V should be */ /* diagonal with the Ritz values on the diagonal. The eigenvectors */ /* of the new matrix V'*A*V become the standard basis vectors. */ /* ---------------------------------------------------------------- */ ret = restart_H(H, hVecs, hVals, restartSize, basisSize, previousHVecs, numPrevRetained, indexOfPreviousVecs, rworkSize, rwork, primme); if (ret != 0) { primme_PushErrorMessage(Primme_restart, Primme_restart_h, ret, __FILE__, __LINE__, primme); return RESTART_H_FAILURE; } /* --------------------------------------------------------------------- */ /* If the user requires (I-QQ') projectors in JDQMR without locking, */ /* the converged eigenvectors are copied temporarily to evecs. There */ /* they stay locked for use in (I-QQ') and (I-K^{-1}Q () Q') projectors.*/ /* NOTE THIS IS NOT LOCKING! The Ritz vectors remain in the basis, and */ /* they will overwrite evecs at the end. */ /* We recommend against this type of usage. It's better to use locking. */ /* --------------------------------------------------------------------- */ /* Andreas NOTE: is done inefficiently for the moment. We should only */ /* add the recently converged. But we need to differentiate them */ /* from flags... */ if (!primme->locking && primme->correctionParams.maxInnerIterations != 0 && numConverged > 0 && (primme->correctionParams.projectors.LeftQ || primme->correctionParams.projectors.RightQ ) ) { n = primme->nLocal; *numConvergedStored = 0; eStart = primme->numOrthoConst; for (i=0;i<primme->numEvals;i++) { if (flags[i] == CONVERGED) { if (*numConvergedStored < numConverged) { Num_zcopy_zprimme(n, &V[i*n], 1, &evecs[(eStart+*numConvergedStored)*n], 1); (*numConvergedStored)++; } } /* if converged */ } /* for */ if (*numConvergedStored != numConverged) { if (primme->printLevel >= 1 && primme->procID == 0) { fprintf(primme->outputFile, "Flags and converged eigenpairs do not correspond %d %d\n", numConverged, *numConvergedStored); } return PSEUDOLOCK_FAILURE; } /* Update also the M = K^{-1}evecs and its udu factorization if needed */ if (UDU != NULL) { apply_preconditioner_block(&evecs[eStart*n], &evecsHat[eStart*n], numConverged, primme ); /* rwork must be maxEvecsSize*numEvals! */ update_projection_zprimme(evecs, evecsHat, M, eStart*n, primme->numOrthoConst+primme->numEvals, numConverged, rwork, primme); ret = UDUDecompose_zprimme(M, UDU, ipivot, eStart+numConverged, rwork, rworkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_lock_vectors,Primme_ududecompose,ret, __FILE__, __LINE__, primme); return UDUDECOMPOSE_FAILURE; } } /* if UDU factorization is needed */ } /* if this pseudo locking should take place */ return restartSize; }