static void compute_resnorms(Complex_Z *V, Complex_Z *W, Complex_Z *hVecs, double *hVals, int basisSize, double *blockNorms, int *iev, int left, int right, void *rwork, primme_params *primme) { int i; /* Loop variable */ int numResiduals; /* Number of residual vectors to be computed */ double *dwork = (double *) rwork; /* pointer casting rwork to double */ Complex_Z ztmp; /* temp var holding shift */ Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00}; /* constants */ numResiduals = right - left + 1; /* We want to compute residuals r = Ax-hVal*x for the Ritz vectors x */ /* Eqivalently, r = A*V*hVec - hval*V*hVec = W*hVec - hVal*V*hVec. */ /* Compute the Ritz vectors */ Num_gemm_zprimme("N", "N", primme->nLocal, numResiduals, basisSize, tpone, V, primme->nLocal, hVecs, basisSize, tzero, &V[primme->nLocal*(basisSize+left)], primme->nLocal); /* Compute W*hVecs */ Num_gemm_zprimme("N", "N", primme->nLocal, numResiduals, basisSize, tpone, W, primme->nLocal, hVecs, basisSize, tzero, &W[primme->nLocal*(basisSize+left)], primme->nLocal); /* Compute the residuals */ for (i=left; i <= right; i++) { { ztmp.r = -hVals[iev[i]]; ztmp.i = 0.0L; } Num_axpy_zprimme(primme->nLocal, ztmp, &V[primme->nLocal*(basisSize+i)], 1, &W[primme->nLocal*(basisSize+i)], 1); } /* Compute the residual norms */ for (i=left; i <= right; i++) { ztmp = Num_dot_zprimme(primme->nLocal, &W[primme->nLocal*(basisSize+i)], 1, &W[primme->nLocal*(basisSize+i)] , 1); dwork[i] = ztmp.r; } (*primme->globalSumDouble)(&dwork[left], &blockNorms[left], &numResiduals, primme); for (i=left; i <= right; i++) { blockNorms[i] = sqrt(blockNorms[i]); } }
static Complex_Z dist_dot(Complex_Z *x, int incx, Complex_Z *y, int incy, primme_params *primme) { Complex_Z temp, product; int count; temp = Num_dot_zprimme(primme->nLocal, x, incx, y, incy); // In Complex, the size of the array to globalSum is twice as large count = 2; (*primme->globalSumDouble)(&temp, &product, &count, primme); return product; }
static void setup_JD_projectors(Complex_Z *x, Complex_Z *r, Complex_Z *evecs, Complex_Z *evecsHat, Complex_Z *Kinvx, Complex_Z *xKinvx, Complex_Z **Lprojector, Complex_Z **RprojectorQ, Complex_Z **RprojectorX, int *sizeLprojector, int *sizeRprojectorQ, int *sizeRprojectorX, int numLocked, int numConverged, primme_params *primme) { int n, sizeEvecs; int ONE = 1; // In Complex, the size of the array to globalSum is twice as large int count = 2; Complex_Z xKinvx_local; Complex_Z tpone = {+1.0e+00,+0.0e00}; *sizeLprojector = 0; *sizeRprojectorQ = 0; *sizeRprojectorX = 0; *Lprojector = NULL; *RprojectorQ = NULL; *RprojectorX = NULL; n = primme->nLocal; if (primme->locking) sizeEvecs = primme->numOrthoConst+numLocked; else sizeEvecs = primme->numOrthoConst+numConverged; /* --------------------------------------------------------*/ /* Set up the left projector arrays. Make x adjacent to Q. */ /* --------------------------------------------------------*/ if (primme->correctionParams.projectors.LeftQ) { *sizeLprojector = sizeEvecs; *Lprojector = evecs; if (primme->correctionParams.projectors.LeftX) { Num_zcopy_zprimme(n, x, 1, &evecs[sizeEvecs*n], 1); *sizeLprojector = *sizeLprojector + 1; } } else { if (primme->correctionParams.projectors.LeftX) { *Lprojector = x; *sizeLprojector = 1; } } /* --------------------------------------------------------*/ /* Set up the right projector arrays. Q and x separately */ /* --------------------------------------------------------*/ /* ------------*/ /* First for Q */ /* ------------*/ if (primme->correctionParams.projectors.RightQ) { if (primme->correctionParams.precondition && primme->correctionParams.projectors.SkewQ) { *RprojectorQ = evecsHat; /* Use the K^(-1)evecs array */ } else { /* Right Q but not SkewQ */ *RprojectorQ = evecs; /* Use just the evecs array. */ } *sizeRprojectorQ = sizeEvecs; } else { /* if no RightQ projector */ *RprojectorQ = NULL; *sizeRprojectorQ = 0; } /* ------------*/ /* Then for x */ /* ------------*/ if (primme->correctionParams.projectors.RightX) { if (primme->correctionParams.precondition && primme->correctionParams.projectors.SkewX) { (*primme->applyPreconditioner)(x, Kinvx, &ONE, primme); primme->stats.numPreconds += 1; *RprojectorX = Kinvx; xKinvx_local = Num_dot_zprimme(primme->nLocal, x, 1, Kinvx, 1); (*primme->globalSumDouble)(&xKinvx_local, xKinvx, &count, primme); } else { *RprojectorX = x; *xKinvx = tpone; } *sizeRprojectorX = 1; } else { *RprojectorX = NULL; *sizeRprojectorX = 0; *xKinvx = tpone; } } /* setup_JD_projectors */
static void Olsen_preconditioner_block(Complex_Z *r, Complex_Z *x, int blockSize, Complex_Z *rwork, primme_params *primme) { int blockIndex, count; Complex_Z alpha; Complex_Z *Kinvx, *xKinvx, *xKinvr, *xKinvx_local, *xKinvr_local; Complex_Z ztmp; Complex_Z tzero = {+0.0e+00,+0.0e00}; //------------------------------------------------------------------ // Subdivide workspace //------------------------------------------------------------------ Kinvx = rwork; xKinvx_local = Kinvx + primme->nLocal*blockSize; xKinvr_local = xKinvx_local + blockSize; xKinvx = xKinvr_local + blockSize; xKinvr = xKinvx + blockSize; //------------------------------------------------------------------ // Compute K^{-1}x for block x. Kinvx memory requirement (blockSize*nLocal) //------------------------------------------------------------------ apply_preconditioner_block(x, Kinvx, blockSize, primme ); //------------------------------------------------------------------ // Compute local x^TK^{-1}x and x^TK^{-1}r = (K^{-1}x)^Tr for each vector //------------------------------------------------------------------ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { xKinvx_local[blockIndex] = Num_dot_zprimme(primme->nLocal, &x[primme->nLocal*blockIndex],1, &Kinvx[primme->nLocal*blockIndex],1); xKinvr_local[blockIndex] = Num_dot_zprimme(primme->nLocal, &Kinvx[primme->nLocal*blockIndex],1, &r[primme->nLocal*blockIndex],1); } count = 4*blockSize; (*primme->globalSumDouble)(xKinvx_local, xKinvx, &count, primme); //------------------------------------------------------------------ // Compute K^{-1}r //------------------------------------------------------------------ apply_preconditioner_block(r, x, blockSize, primme ); //------------------------------------------------------------------ // Compute K^(-1)r - ( xKinvr/xKinvx ) K^(-1)r for each vector //------------------------------------------------------------------ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { if (z_abs_primme(xKinvx[blockIndex]) > 0.0L) { ztmp.r = -xKinvr[blockIndex].r; ztmp.i = -xKinvr[blockIndex].i; z_div_primme(&alpha, &ztmp, &xKinvx[blockIndex]); } else alpha = tzero; Num_axpy_zprimme(primme->nLocal,alpha,&Kinvx[primme->nLocal*blockIndex], 1, &x[primme->nLocal*blockIndex],1); } //for } // of Olsen_preconditiner_block
/******************************************************************************* * Subroutine check_practical_convergence() * * This function is called after swaping has pushed converged (C) * and to be projected (P) evecs at the end of blockSize. * Makes C, P contiguous, projects P, checks and finds practically * converged evecs (p), and makes p contiguous to C, updating * flags[], blockNorms[], and numVacancies. Eg: * * blockSize * |------CPCCPPCP| -> |------PPPPCCCC| -> |------PpPpCCCC| -> * |------PPppCCCC|, numVacancies = 6 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * INPUT ARRAYS AND PARAMETERS * --------------------------- * evecs The locked eigenvectors * numLocked The number of locked eigenvectors * basisSize Number of vectors in the basis * blockSize The number of block vectors * start Starting index in V,W of vectors converged or to be projected * numToProject The number of vectors to project. * tol The required convergence tolerance * rwork real work array of size: 2*maxEvecsSize*primme->maxBlockSize * primme Structure containing various solver parameters * * * OUTPUT ARRAYS AND PARAMETERS * ---------------------------- * V The basis vectors * W A*V * iev Indicates which Ritz value each block vector corresponds to * flags Indicates which Ritz pairs have converged * blockNorms The norms of the block vectors to be targeted * recentlyConverged Number of converged vectors in the whole basis V * = converged+practicallyConverged * numVacancies Number of Ritz values between left and right that were * declared converged or practicallyConverged. [left, right] * can be smaller than the whole V. See while-loop in * check_convergence. * left, right Indices indicating which vectors are to be replaced * ievMax Index of the next Ritz value to be targeted by the block ******************************************************************************/ static void check_practical_convergence(Complex_Z *V, Complex_Z *W, Complex_Z *evecs, int numLocked, int basisSize, int blockSize, int start, int numToProject, int *iev, int *flags, double *blockNorms, double tol, int *recentlyConverged, int *numVacancies, Complex_Z *rwork, primme_params *primme) { int i, n, dimEvecs; int count; double normPr; double normDiff; Complex_Z *overlaps; /* Pointer in rwork to keep Q'*r */ Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00}, tmone = {-1.0e+00,+0.0e00}; /* constants */ /* convenience variables */ n = primme->nLocal; dimEvecs = primme->numOrthoConst + numLocked; /* Subdivide rwork */ overlaps = rwork + dimEvecs*primme->maxBlockSize; /* --------------------------------------------------------------- */ /* Reset any TO_BE_PROJECTED flags back to UNCONVERGED, and swap */ /* again CONVERGED flags toward the end of the block. Now swapping */ /* occurs in the block basisSize+[start:blockSize]: */ /* [ . . . . . . . P P P P C C C C ] blockSize */ /* start^ ^start+numToProject */ /* --------------------------------------------------------------- */ for (i=start; i < blockSize; i++) if (flags[iev[i]] == TO_BE_PROJECTED) flags[iev[i]] = UNCONVERGED; if (*numVacancies > 0) swap_UnconvVecs(V, W, primme->nLocal, basisSize, iev, flags, blockNorms, dimEvecs, blockSize, start); /* ------------------------------------------------------------------ */ /* Project the numToProject residuals agaist (I-evecs*evecs') */ /* ------------------------------------------------------------------ */ /* overlaps = evecs'*residuals */ Num_gemm_zprimme("C", "N", dimEvecs, numToProject, n, tpone, evecs, n, &W[(basisSize+start)*n], n, tzero, rwork, dimEvecs); // In Complex, the size of the array to globalSum is twice as large count = 2*(dimEvecs*numToProject); (*primme->globalSumDouble)(rwork, overlaps, &count, primme); /* residuals = residuals - evecs*overlaps */ Num_gemm_zprimme("N", "N", n, numToProject, dimEvecs, tmone, evecs, n, overlaps, dimEvecs, tpone, &W[(basisSize+start)*n], n); /* ------------------------------------------------------------------ */ /* Compute norms^2 of the projected res and the differences from res */ /* note: ||residual - (I-QQ')residual||=||Q'*r||=||overlaps|| */ /* ------------------------------------------------------------------ */ for (i=0; i < numToProject; i++) { /* || res - (I-QQ')res || */ rwork[i] = Num_dot_zprimme(dimEvecs, &overlaps[dimEvecs*i], 1, &overlaps[dimEvecs*i], 1); /* || (I-QQ')res || */ rwork[i+numToProject] = Num_dot_zprimme(n, &W[(basisSize+start+i)*n], 1, &W[(basisSize+start+i)*n], 1); } /* global sum ||overlaps|| and ||(I-QQ')r|| */ // In Complex, the size of the array to globalSum is twice as large count = 2*(2*numToProject); (*primme->globalSumDouble)(rwork, &rwork[count], &count, primme); /* ------------------------------------------------------------------ */ /* For each projected residual check whether there is an accuracy */ /* problem and, if so, declare it PRACTICALLY_CONVERGED to lock later.*/ /* normDiff is a lower bound to the attainable accuracy for this pair */ /* so problems exist only if normDiff > tol. Then, we stop if Tol is */ /* the geometric mean of normPr and r. */ /* ------------------------------------------------------------------ */ for (i=start; i < start+numToProject; i++) { normDiff = sqrt(rwork[i-start].r); normPr = sqrt(rwork[i-start+numToProject].r); //printf(" R Pr |R-Pr| %e %e %e \n", blockNorms[i],normPr,normDiff); if (normDiff >= tol && normPr < tol*tol/blockNorms[i]/2) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, " PRACTICALLY_CONVERGED %d norm(I-QQt)r %e bound %e\n", iev[i],normPr,tol*tol/normDiff); fflush(primme->outputFile); } flags[iev[i]] = PRACTICALLY_CONVERGED; (*numVacancies)++; if (numLocked + iev[i] < primme->numEvals){ recentlyConverged++; } } // if practically converged } // for each projected residual /* ------------------------------------------------------------------ */ /* Finally swap all practically converged toward the end of the block */ /* to be contiguous with the converged ones. Swap all relevant arrays */ /* ------------------------------------------------------------------ */ start = blockSize - *numVacancies; swap_UnconvVecs(V, W, primme->nLocal, basisSize, iev, flags, blockNorms, dimEvecs, blockSize, start); }