void primme_seq_globalSumDouble(void *sendBuf, void *recvBuf, int *count, primme_params *params) { Num_dcopy_primme(*count, (double *) sendBuf, 1, (double *) recvBuf, 1); }
int solve_correction_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *evecs, Complex_Z *evecsHat, Complex_Z *UDU, int *ipivot, double *lockedEvals, int numLocked, int numConvergedStored, double *ritzVals, double *prevRitzVals, int *flags, int basisSize, double *blockNorms, int *iev, int blockSize, double eresTol, double machEps, double aNormEstimate, Complex_Z *rwork, int *iwork, int rworkSize, primme_params *primme) { int blockIndex; /* Loop index. Ranges from 0..blockSize-1. */ int ritzIndex; /* Ritz value index blockIndex corresponds to. */ /* Possible values range from 0..basisSize-1. */ int sortedIndex; /* Ritz value index in sortedRitzVals, blockIndex */ /* corresponds to. Range 0..numLocked+basisSize-1 */ int neededRsize; /* Needed size for rwork. If not enough return */ int linSolverRWorkSize; /* Size of the linSolverRWork array. */ int *ilev; /* Array of size blockSize. Maps the target Ritz */ /* values to their positions in the sortedEvals */ /* array. */ int sizeLprojector; /* Sizes of the various left/right projectors */ int sizeRprojectorQ; /* These will be 0/1/or numOrthConstr+numLocked */ int sizeRprojectorX; /* or numOrthConstr+numConvergedStored w/o locking*/ static int numPrevRitzVals = 0; /* Size of prevRitzVals */ int ret; /* Return code. */ Complex_Z *r, *x, *sol; /* Residual, Ritz vector, and correction. */ Complex_Z *linSolverRWork;/* Workspace needed by linear solver. */ double *sortedRitzVals; /* Sorted array of current and converged Ritz */ /* values. Size of array is numLocked+basisSize. */ double *blockOfShifts; /* Shifts for (A-shiftI) or (if needed) (K-shiftI)*/ double *approxOlsenEps; /* Shifts for approximate Olsen implementation */ Complex_Z *Kinvx; /* Workspace to store K^{-1}x */ Complex_Z *Lprojector; /* Q pointer for (I-Q*Q'). Usually points to evecs*/ Complex_Z *RprojectorQ; /* May point to evecs/evecsHat depending on skewQ */ Complex_Z *RprojectorX; /* May point to x/Kinvx depending on skewX */ Complex_Z xKinvx; /* Stores x'*K^{-1}x if needed */ double eval, shift, robustShift; /* robust shift values. */ Complex_Z tmpShift; /* Temp shift for daxpy */ //------------------------------------------------------------ // Subdivide the workspace with pointers, and figure out // the total amount of needed real workspace (neededRsize) //------------------------------------------------------------ /* needed worksize */ neededRsize = 0; Kinvx = rwork; /* Kinvx will have nonzero size if precond and both RightX and SkewX */ if (primme->correctionParams.precondition && primme->correctionParams.projectors.RightX && primme->correctionParams.projectors.SkewX ) { /* OLSEN's method requires a block, but JDQMR is vector by vector */ if (primme->correctionParams.maxInnerIterations == 0) { sol = Kinvx + primme->nLocal*blockSize; neededRsize = neededRsize + primme->nLocal*blockSize; } else { sol = Kinvx + primme->nLocal; neededRsize = neededRsize + primme->nLocal; } } else { sol = Kinvx + 0; } if (primme->correctionParams.maxInnerIterations == 0) { linSolverRWork = sol + 0; /* sol not needed for GD */ linSolverRWorkSize = 0; /* No inner solver used */ } else { linSolverRWork = sol + primme->nLocal; /* sol needed in innerJD */ neededRsize = neededRsize + primme->nLocal; linSolverRWorkSize = /* Inner solver worksize */ 4*primme->nLocal + 2*(primme->numOrthoConst+primme->numEvals); neededRsize = neededRsize + linSolverRWorkSize; } sortedRitzVals = (double *)(linSolverRWork + linSolverRWorkSize); blockOfShifts = sortedRitzVals + (numLocked+basisSize); approxOlsenEps = blockOfShifts + blockSize; neededRsize = neededRsize + numLocked+basisSize + 2*blockSize; if (neededRsize > rworkSize) { return(neededRsize); } // Subdivide also the integer work space ilev = iwork; // of size blockSize //------------------------------------------------------------ // Figuring out preconditioning shifts (robust, Olsen, etc) //------------------------------------------------------------ /* blockOfShifts will contain the preconditioning shifts: */ /* either Ritz values or robustShifts computed below. These shifts */ /* will be used in the correction equations or in inverting (K-sigma I) */ /* approxOlsenEps will contain error approximations for eigenavalues */ /* to be used for Olsen's method (when innerIterations =0). */ if (primme->locking) { /* Combine the sorted list of locked Ritz values with the sorted */ /* list of current Ritz values, ritzVals. The merging of the two */ /* lists lockedEvals and ritzVals is stored in sortedRitzVals. */ mergeSort(lockedEvals, numLocked, ritzVals, flags, basisSize, sortedRitzVals, ilev, blockSize, primme); } else { /* Then the sorted evals are simply the ritzVals, targeted as iev */ sortedRitzVals = ritzVals; ilev = iev; } /*-----------------------------------------------------------------*/ /* For interior eigenpairs, use the user provided shifts */ /*-----------------------------------------------------------------*/ if (primme->target != primme_smallest && primme->target != primme_largest) { for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { sortedIndex = ilev[blockIndex]; blockOfShifts[blockIndex] = primme->targetShifts[ min(primme->numTargetShifts-1, sortedIndex) ]; if (sortedIndex < numPrevRitzVals) { approxOlsenEps[blockIndex] = fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]); } else { approxOlsenEps[blockIndex] = blockNorms[blockIndex]; } } /* for loop */ } /* user provided shifts */ else { /*-----------------------------------------------------------------*/ /* else it is primme_smallest or primme_largest */ /*-----------------------------------------------------------------*/ if (primme->correctionParams.robustShifts) { /*----------------------------------------------------*/ /* Subtract/add a robust shift from/to the Ritz value */ /*----------------------------------------------------*/ /* Find the robust shift for each block vector */ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { sortedIndex = ilev[blockIndex]; eval = sortedRitzVals[sortedIndex]; robustShift = computeRobustShift(blockIndex, blockNorms[blockIndex], prevRitzVals, numPrevRitzVals, sortedRitzVals, &approxOlsenEps[blockIndex], numLocked+basisSize, ilev, primme); /* Subtract/add the shift if looking for the smallest/largest */ /* eigenvalues, Do not go beyond the previous computed eigval */ if (primme->target == primme_smallest) { blockOfShifts[blockIndex] = eval - robustShift; if (sortedIndex > 0) blockOfShifts[blockIndex] = max(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]); } else { blockOfShifts[blockIndex] = eval + robustShift; if (sortedIndex > 0) blockOfShifts[blockIndex] = min(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]); } // robust shifting } /* for loop */ } /* endif robust shifts */ else { /*--------------------------------------------------------------*/ /* Otherwise, the shifts for both preconditioner and correction */ /* equation should be just the Ritz values. For Olsen's method, */ /* the shifts for r-eps*x, are chosen as the difference in Ritz */ /* value between successive iterations. */ /*--------------------------------------------------------------*/ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { ritzIndex = iev[blockIndex]; sortedIndex = ilev[blockIndex]; blockOfShifts[blockIndex] = ritzVals[ritzIndex]; if (sortedIndex < numPrevRitzVals) { approxOlsenEps[blockIndex] = fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]); } else { approxOlsenEps[blockIndex] = blockNorms[blockIndex]; } } /* for loop */ } /* else no robust shifts */ } /* else primme_smallest or primme_largest */ /* Remember the previous ritz values*/ numPrevRitzVals = numLocked+basisSize; Num_dcopy_primme(numPrevRitzVals, sortedRitzVals, 1, prevRitzVals, 1); // Equip the primme struct with the blockOfShifts, in case the user // wants to precondition (K-sigma_i I)^{-1} with a different shift // for each vector primme->ShiftsForPreconditioner = blockOfShifts; //------------------------------------------------------------ // Generalized Davidson variants -- No inner iterations //------------------------------------------------------------ if (primme->correctionParams.maxInnerIterations == 0) { // This is Generalized Davidson or approximate Olsen's method. // Perform block preconditioning (with or without projections) r = &W[primme->nLocal*basisSize]; // All the block residuals x = &V[primme->nLocal*basisSize]; // All the block Ritz vectors if ( primme->correctionParams.projectors.RightX && primme->correctionParams.projectors.SkewX ) { // Compute exact Olsen's projected preconditioner. This is // expensive and rarely improves anything! Included for completeness. Olsen_preconditioner_block(r, x, blockSize, Kinvx, primme); } else { if ( primme->correctionParams.projectors.RightX ) { // Compute a cheap approximation to OLSENS, where (x'Kinvr)/xKinvx // is approximated by e: Kinvr-e*Kinvx=Kinv(r-e*x)=Kinv(I-ct*x*x')r for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { // Compute r_i = r_i - err_i * x_i {tmpShift.r = -approxOlsenEps[blockIndex]; tmpShift.i = 0.0L;} Num_axpy_zprimme(primme->nLocal, tmpShift, &x[primme->nLocal*blockIndex],1,&r[primme->nLocal*blockIndex],1); } //for } // GD: compute K^{-1}r , or approx.Olsen: K^{-1}(r-ex) apply_preconditioner_block(r, x, blockSize, primme ); } } //------------------------------------------------------------ // JDQMR --- JD inner-outer variants //------------------------------------------------------------ else { // maxInnerIterations > 0 We perform inner-outer JDQMR. /* Solve the correction for each block vector. */ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { r = &W[primme->nLocal*(basisSize+blockIndex)]; x = &V[primme->nLocal*(basisSize+blockIndex)]; /* Set up the left/right/skew projectors for JDQMR. */ /* The pointers Lprojector, Rprojector(Q/X) point to the */ /* appropriate arrays for use in the projection step */ setup_JD_projectors(x, r, evecs, evecsHat, Kinvx, &xKinvx, &Lprojector, &RprojectorQ, &RprojectorX, &sizeLprojector, &sizeRprojectorQ, &sizeRprojectorX, numLocked, numConvergedStored, primme); /* Map the index of the block vector to its corresponding eigenvalue */ /* index, and the shift for the correction equation. Also make the */ /* shift available to primme, in case (K-shift I)^-1 is needed */ ritzIndex = iev[blockIndex]; shift = blockOfShifts[blockIndex]; primme->ShiftsForPreconditioner = &blockOfShifts[blockIndex]; ret = inner_solve_zprimme(x, r, &blockNorms[blockIndex], evecs, evecsHat, UDU, ipivot, &xKinvx, Lprojector, RprojectorQ, RprojectorX, sizeLprojector, sizeRprojectorQ, sizeRprojectorX, sol, ritzVals[ritzIndex], shift, eresTol, aNormEstimate, machEps, linSolverRWork, linSolverRWorkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_solve_correction, Primme_inner_solve, ret, __FILE__, __LINE__, primme); return (INNER_SOLVE_FAILURE); } Num_zcopy_zprimme(primme->nLocal, sol, 1, &V[primme->nLocal*(basisSize+blockIndex)], 1); } // end for each block vector } // JDqmr variants return 0; }
static int dtr(int numLocked, Complex_Z *hVecs, double *hVals, int *flags, int basisSize, int numFree, int *iev, Complex_Z *rwork, primme_params *primme) { int i; /* Loop variable */ int l, lOpt, lMin; /* Determine how many left side vectors to retain */ int r, rOpt; /* Determine how many right side vectors to retain */ int maxIndex; /* basisSize - 1 */ int restartSize; /* The new restart size */ double currentRitzVal; /* The current Ritz value the solver is computing */ double newVal, optVal; /* Used to find the optimum gap ratio */ /* ---------------------------------------------------------------- */ /* Compute lOpt and rOpt with respect to the first Ritz value being */ /* targeted by the block. */ /* ---------------------------------------------------------------- */ currentRitzVal = hVals[iev[0]]; maxIndex = basisSize-1; /* If locking is engaged, then lMin must be large enough to retain */ /* the coefficient vector associated with a converged target. */ /* lMin should be no smaller than primme->minRestartSize. */ if (primme->locking) { lMin = 0; /* Determine the largest index of any converged but unlocked target */ /* Ritz vector. */ for (l = 0; l < basisSize; l++) { if ( (flags[l] == CONVERGED || flags[l] == PRACTICALLY_CONVERGED) && (numLocked + l < primme->numEvals)) { lMin = l; } } lMin = max(lMin, min(basisSize, primme->minRestartSize)); } else { lMin = min(basisSize, primme->minRestartSize); } lOpt = lMin; rOpt = 0; optVal = 0.0L; if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"DTR basisSize: %d\n", basisSize); } /* ---------------------------------------------------------------------- */ /* Compute lOpt and rOpt that maximize the function. */ /* maximize the function (basisSize-numFree-lMin-rMin)* */ /* sqrt((currentRitzVal - hVals[l+1])/ */ /* (hVals[l+1]-hVals[basisSize-1-r])) */ /* ---------------------------------------------------------------------- */ for (l = lMin; l < basisSize - numFree; l++) { for (r = 0; r < basisSize - l - numFree; r++) { if ((basisSize - l - r) % primme->maxBlockSize == 0) { newVal = (basisSize - l - r) * sqrt((currentRitzVal - hVals[l+1])/ (hVals[l+1]-hVals[maxIndex-r])); if (newVal > optVal) { optVal = newVal; lOpt = l; rOpt = r; } } } } restartSize = lOpt + rOpt; /* --------------------------------------------------------------- */ /* Swap the rOpt vectors from the right hand side so that they are */ /* contiguous with the vectors from the left hand side. */ /* --------------------------------------------------------------- */ i = basisSize - restartSize; Num_zcopy_zprimme(i*basisSize, &hVecs[basisSize*lOpt], 1, rwork, 1); Num_zcopy_zprimme(rOpt*basisSize, &hVecs[basisSize*(basisSize-rOpt)], 1, &hVecs[basisSize*lOpt], 1); Num_zcopy_zprimme(i*basisSize, rwork, 1, &hVecs[basisSize*restartSize], 1); /* Do the same with the eigenvalues of H */ Num_dcopy_primme(i, &hVals[lOpt], 1, (double *) rwork, 1); Num_dcopy_primme(rOpt, &hVals[(basisSize-rOpt)], 1, &hVals[lOpt], 1); Num_dcopy_primme(i, (double *) rwork, 1, &hVals[restartSize], 1); /* Set only those flags lower than restartSize. The rest will be reset */ for (i = 0; i < rOpt; i++) { flags[lOpt + i] = flags[basisSize-rOpt + i]; } if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"DTR restart size: %d L: %d R: %d\n", restartSize, lOpt, rOpt); } reset_flags_zprimme(flags, restartSize, primme->maxBasisSize); return restartSize; }