static void compute_resnorms(Complex_Z *V, Complex_Z *W, Complex_Z *hVecs, double *hVals, int basisSize, double *blockNorms, int *iev, int left, int right, void *rwork, primme_params *primme) { int i; /* Loop variable */ int numResiduals; /* Number of residual vectors to be computed */ double *dwork = (double *) rwork; /* pointer casting rwork to double */ Complex_Z ztmp; /* temp var holding shift */ Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00}; /* constants */ numResiduals = right - left + 1; /* We want to compute residuals r = Ax-hVal*x for the Ritz vectors x */ /* Eqivalently, r = A*V*hVec - hval*V*hVec = W*hVec - hVal*V*hVec. */ /* Compute the Ritz vectors */ Num_gemm_zprimme("N", "N", primme->nLocal, numResiduals, basisSize, tpone, V, primme->nLocal, hVecs, basisSize, tzero, &V[primme->nLocal*(basisSize+left)], primme->nLocal); /* Compute W*hVecs */ Num_gemm_zprimme("N", "N", primme->nLocal, numResiduals, basisSize, tpone, W, primme->nLocal, hVecs, basisSize, tzero, &W[primme->nLocal*(basisSize+left)], primme->nLocal); /* Compute the residuals */ for (i=left; i <= right; i++) { { ztmp.r = -hVals[iev[i]]; ztmp.i = 0.0L; } Num_axpy_zprimme(primme->nLocal, ztmp, &V[primme->nLocal*(basisSize+i)], 1, &W[primme->nLocal*(basisSize+i)], 1); } /* Compute the residual norms */ for (i=left; i <= right; i++) { ztmp = Num_dot_zprimme(primme->nLocal, &W[primme->nLocal*(basisSize+i)], 1, &W[primme->nLocal*(basisSize+i)] , 1); dwork[i] = ztmp.r; } (*primme->globalSumDouble)(&dwork[left], &blockNorms[left], &numResiduals, primme); for (i=left; i <= right; i++) { blockNorms[i] = sqrt(blockNorms[i]); } }
static void compute_submatrix(Complex_Z *previousHVecs, int numPrevRetained, Complex_Z *H, int basisSize, int maxBasisSize, Complex_Z *subMatrix, Complex_Z *rwork) { Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00}; Num_symm_zprimme("L", "U", basisSize, numPrevRetained, tpone, H, maxBasisSize, previousHVecs, maxBasisSize, tzero, rwork, basisSize); Num_gemm_zprimme("C", "N", numPrevRetained, numPrevRetained, basisSize, tpone, previousHVecs, basisSize, rwork, basisSize, tzero, subMatrix, numPrevRetained); }
void update_projection_zprimme(Complex_Z *X, Complex_Z *Y, Complex_Z *Z, int numCols, int maxCols, int blockSize, Complex_Z *rwork, primme_params *primme) { int j; /* Loop variable */ int count; Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00}; /* --------------------------------------------------------------------- */ /* Zero the work array to prevent floating point traps during all-reduce */ /* --------------------------------------------------------------------- */ for (j = 0; j < maxCols*blockSize; j++) { rwork[j] = tzero; } /* --------------------------------------------------------------------- */ /* Grow Z by blockSize number of rows and columns all at once */ /* --------------------------------------------------------------------- */ Num_gemm_zprimme("C", "N", numCols+blockSize, blockSize, primme->nLocal, tpone, X, primme->nLocal, &Y[primme->nLocal*numCols], primme->nLocal, tzero, rwork, maxCols); /* -------------------------------------------------------------- */ /* Alternative to the previous call: */ /* Compute next the additional rows of each new column vector. */ /* Only the upper triangular portion is computed and stored. */ /* -------------------------------------------------------------- */ /* for (j = numCols; j < numCols+blockSize; j++) { Num_gemv_zprimme("C", primme->nLocal, j-numCols+1, tpone, &X[primme->nLocal*numCols], primme->nLocal, &Y[primme->nLocal*j], 1, tzero, &rwork[maxCols*(j-numCols)+numCols], 1); } */ count = 2*maxCols*blockSize; (*primme->globalSumDouble)(rwork, &Z[maxCols*numCols], &count, primme); }
static void restart_X(Complex_Z *X, Complex_Z *hVecs, int nLocal, int basisSize, int restartSize, Complex_Z *rwork, int rworkSize) { int i, k; /* Loop variables */ int AvailRows = min(rworkSize/restartSize, nLocal); Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00}; i = 0; while (i < nLocal) { /* Block matrix multiply */ Num_gemm_zprimme("N", "N", AvailRows, restartSize, basisSize, tpone, &X[i], nLocal, hVecs, basisSize, tzero, rwork, AvailRows ); /* Copy the result in the desired location of X */ for (k=0; k < restartSize; k++) { Num_zcopy_zprimme(AvailRows, &rwork[AvailRows*k],1, &X[i+nLocal*k], 1); } i = i+AvailRows; AvailRows = min(AvailRows, nLocal-i); } }
/******************************************************************************* * Subroutine check_practical_convergence() * * This function is called after swaping has pushed converged (C) * and to be projected (P) evecs at the end of blockSize. * Makes C, P contiguous, projects P, checks and finds practically * converged evecs (p), and makes p contiguous to C, updating * flags[], blockNorms[], and numVacancies. Eg: * * blockSize * |------CPCCPPCP| -> |------PPPPCCCC| -> |------PpPpCCCC| -> * |------PPppCCCC|, numVacancies = 6 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * INPUT ARRAYS AND PARAMETERS * --------------------------- * evecs The locked eigenvectors * numLocked The number of locked eigenvectors * basisSize Number of vectors in the basis * blockSize The number of block vectors * start Starting index in V,W of vectors converged or to be projected * numToProject The number of vectors to project. * tol The required convergence tolerance * rwork real work array of size: 2*maxEvecsSize*primme->maxBlockSize * primme Structure containing various solver parameters * * * OUTPUT ARRAYS AND PARAMETERS * ---------------------------- * V The basis vectors * W A*V * iev Indicates which Ritz value each block vector corresponds to * flags Indicates which Ritz pairs have converged * blockNorms The norms of the block vectors to be targeted * recentlyConverged Number of converged vectors in the whole basis V * = converged+practicallyConverged * numVacancies Number of Ritz values between left and right that were * declared converged or practicallyConverged. [left, right] * can be smaller than the whole V. See while-loop in * check_convergence. * left, right Indices indicating which vectors are to be replaced * ievMax Index of the next Ritz value to be targeted by the block ******************************************************************************/ static void check_practical_convergence(Complex_Z *V, Complex_Z *W, Complex_Z *evecs, int numLocked, int basisSize, int blockSize, int start, int numToProject, int *iev, int *flags, double *blockNorms, double tol, int *recentlyConverged, int *numVacancies, Complex_Z *rwork, primme_params *primme) { int i, n, dimEvecs; int count; double normPr; double normDiff; Complex_Z *overlaps; /* Pointer in rwork to keep Q'*r */ Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00}, tmone = {-1.0e+00,+0.0e00}; /* constants */ /* convenience variables */ n = primme->nLocal; dimEvecs = primme->numOrthoConst + numLocked; /* Subdivide rwork */ overlaps = rwork + dimEvecs*primme->maxBlockSize; /* --------------------------------------------------------------- */ /* Reset any TO_BE_PROJECTED flags back to UNCONVERGED, and swap */ /* again CONVERGED flags toward the end of the block. Now swapping */ /* occurs in the block basisSize+[start:blockSize]: */ /* [ . . . . . . . P P P P C C C C ] blockSize */ /* start^ ^start+numToProject */ /* --------------------------------------------------------------- */ for (i=start; i < blockSize; i++) if (flags[iev[i]] == TO_BE_PROJECTED) flags[iev[i]] = UNCONVERGED; if (*numVacancies > 0) swap_UnconvVecs(V, W, primme->nLocal, basisSize, iev, flags, blockNorms, dimEvecs, blockSize, start); /* ------------------------------------------------------------------ */ /* Project the numToProject residuals agaist (I-evecs*evecs') */ /* ------------------------------------------------------------------ */ /* overlaps = evecs'*residuals */ Num_gemm_zprimme("C", "N", dimEvecs, numToProject, n, tpone, evecs, n, &W[(basisSize+start)*n], n, tzero, rwork, dimEvecs); // In Complex, the size of the array to globalSum is twice as large count = 2*(dimEvecs*numToProject); (*primme->globalSumDouble)(rwork, overlaps, &count, primme); /* residuals = residuals - evecs*overlaps */ Num_gemm_zprimme("N", "N", n, numToProject, dimEvecs, tmone, evecs, n, overlaps, dimEvecs, tpone, &W[(basisSize+start)*n], n); /* ------------------------------------------------------------------ */ /* Compute norms^2 of the projected res and the differences from res */ /* note: ||residual - (I-QQ')residual||=||Q'*r||=||overlaps|| */ /* ------------------------------------------------------------------ */ for (i=0; i < numToProject; i++) { /* || res - (I-QQ')res || */ rwork[i] = Num_dot_zprimme(dimEvecs, &overlaps[dimEvecs*i], 1, &overlaps[dimEvecs*i], 1); /* || (I-QQ')res || */ rwork[i+numToProject] = Num_dot_zprimme(n, &W[(basisSize+start+i)*n], 1, &W[(basisSize+start+i)*n], 1); } /* global sum ||overlaps|| and ||(I-QQ')r|| */ // In Complex, the size of the array to globalSum is twice as large count = 2*(2*numToProject); (*primme->globalSumDouble)(rwork, &rwork[count], &count, primme); /* ------------------------------------------------------------------ */ /* For each projected residual check whether there is an accuracy */ /* problem and, if so, declare it PRACTICALLY_CONVERGED to lock later.*/ /* normDiff is a lower bound to the attainable accuracy for this pair */ /* so problems exist only if normDiff > tol. Then, we stop if Tol is */ /* the geometric mean of normPr and r. */ /* ------------------------------------------------------------------ */ for (i=start; i < start+numToProject; i++) { normDiff = sqrt(rwork[i-start].r); normPr = sqrt(rwork[i-start+numToProject].r); //printf(" R Pr |R-Pr| %e %e %e \n", blockNorms[i],normPr,normDiff); if (normDiff >= tol && normPr < tol*tol/blockNorms[i]/2) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, " PRACTICALLY_CONVERGED %d norm(I-QQt)r %e bound %e\n", iev[i],normPr,tol*tol/normDiff); fflush(primme->outputFile); } flags[iev[i]] = PRACTICALLY_CONVERGED; (*numVacancies)++; if (numLocked + iev[i] < primme->numEvals){ recentlyConverged++; } } // if practically converged } // for each projected residual /* ------------------------------------------------------------------ */ /* Finally swap all practically converged toward the end of the block */ /* to be contiguous with the converged ones. Swap all relevant arrays */ /* ------------------------------------------------------------------ */ start = blockSize - *numVacancies; swap_UnconvVecs(V, W, primme->nLocal, basisSize, iev, flags, blockNorms, dimEvecs, blockSize, start); }