static void compute_resnorms(double *V, double *W, double *hVecs, double *hVals, int basisSize, double *blockNorms, int *iev, int left, int right, void *rwork, primme_params *primme) { int i; /* Loop variable */ int numResiduals; /* Number of residual vectors to be computed */ double *dwork = (double *) rwork; /* pointer casting rwork to double */ double ztmp; /* temp var holding shift */ double tpone = +1.0e+00, tzero = +0.0e+00; /* constants */ numResiduals = right - left + 1; /* We want to compute residuals r = Ax-hVal*x for the Ritz vectors x */ /* Eqivalently, r = A*V*hVec - hval*V*hVec = W*hVec - hVal*V*hVec. */ /* Compute the Ritz vectors */ Num_gemm_dprimme("N", "N", primme->nLocal, numResiduals, basisSize, tpone, V, primme->nLocal, hVecs, basisSize, tzero, &V[primme->nLocal*(basisSize+left)], primme->nLocal); /* Compute W*hVecs */ Num_gemm_dprimme("N", "N", primme->nLocal, numResiduals, basisSize, tpone, W, primme->nLocal, hVecs, basisSize, tzero, &W[primme->nLocal*(basisSize+left)], primme->nLocal); /* Compute the residuals */ for (i=left; i <= right; i++) { ztmp = -hVals[iev[i]]; Num_axpy_dprimme(primme->nLocal, ztmp, &V[primme->nLocal*(basisSize+i)], 1, &W[primme->nLocal*(basisSize+i)], 1); } /* Compute the residual norms */ for (i=left; i <= right; i++) { dwork[i] = Num_dot_dprimme(primme->nLocal, &W[primme->nLocal*(basisSize+i)], 1, &W[primme->nLocal*(basisSize+i)], 1); } (*primme->globalSumDouble)(&dwork[left], &blockNorms[left], &numResiduals, primme); for (i=left; i <= right; i++) { blockNorms[i] = sqrt(blockNorms[i]); } }
/******************************************************************************* * Subroutine check_practical_convergence() * * This function is called after swaping has pushed converged (C) * and to be projected (P) evecs at the end of blockSize. * Makes C, P contiguous, projects P, checks and finds practically * converged evecs (p), and makes p contiguous to C, updating * flags[], blockNorms[], and numVacancies. Eg: * * blockSize * |------CPCCPPCP| -> |------PPPPCCCC| -> |------PpPpCCCC| -> * |------PPppCCCC|, numVacancies = 6 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * INPUT ARRAYS AND PARAMETERS * --------------------------- * evecs The locked eigenvectors * numLocked The number of locked eigenvectors * basisSize Number of vectors in the basis * blockSize The number of block vectors * start Starting index in V,W of vectors converged or to be projected * numToProject The number of vectors to project. * tol The required convergence tolerance * rwork real work array of size: 2*maxEvecsSize*primme->maxBlockSize * primme Structure containing various solver parameters * * * OUTPUT ARRAYS AND PARAMETERS * ---------------------------- * V The basis vectors * W A*V * iev Indicates which Ritz value each block vector corresponds to * flags Indicates which Ritz pairs have converged * blockNorms The norms of the block vectors to be targeted * recentlyConverged Number of converged vectors in the whole basis V * = converged+practicallyConverged * numVacancies Number of Ritz values between left and right that were * declared converged or practicallyConverged. [left, right] * can be smaller than the whole V. See while-loop in * check_convergence. * left, right Indices indicating which vectors are to be replaced * ievMax Index of the next Ritz value to be targeted by the block ******************************************************************************/ static void check_practical_convergence(double *V, double *W, double *evecs, int numLocked, int basisSize, int blockSize, int start, int numToProject, int *iev, int *flags, double *blockNorms, double tol, int *recentlyConverged, int *numVacancies, double *rwork, primme_params *primme) { int i, n, dimEvecs; int count; double normPr; double normDiff; double *overlaps; /* Pointer in rwork to keep Q'*r */ double tpone = +1.0e+00, tzero = +0.0e+00, tmone = -1.0e+00; /* constants */ /* convenience variables */ n = primme->nLocal; dimEvecs = primme->numOrthoConst + numLocked; /* Subdivide rwork */ overlaps = rwork + dimEvecs*primme->maxBlockSize; /* --------------------------------------------------------------- */ /* Reset any TO_BE_PROJECTED flags back to UNCONVERGED, and swap */ /* again CONVERGED flags toward the end of the block. Now swapping */ /* occurs in the block basisSize+[start:blockSize]: */ /* [ . . . . . . . P P P P C C C C ] blockSize */ /* start^ ^start+numToProject */ /* --------------------------------------------------------------- */ for (i=start; i < blockSize; i++) if (flags[iev[i]] == TO_BE_PROJECTED) flags[iev[i]] = UNCONVERGED; if (*numVacancies > 0) swap_UnconvVecs(V, W, primme->nLocal, basisSize, iev, flags, blockNorms, dimEvecs, blockSize, start); /* ------------------------------------------------------------------ */ /* Project the numToProject residuals agaist (I-evecs*evecs') */ /* ------------------------------------------------------------------ */ /* overlaps = evecs'*residuals */ Num_gemm_dprimme("C", "N", dimEvecs, numToProject, n, tpone, evecs, n, &W[(basisSize+start)*n], n, tzero, rwork, dimEvecs); count = dimEvecs*numToProject; (*primme->globalSumDouble)(rwork, overlaps, &count, primme); /* residuals = residuals - evecs*overlaps */ Num_gemm_dprimme("N", "N", n, numToProject, dimEvecs, tmone, evecs, n, overlaps, dimEvecs, tpone, &W[(basisSize+start)*n], n); /* ------------------------------------------------------------------ */ /* Compute norms^2 of the projected res and the differences from res */ /* note: ||residual - (I-QQ')residual||=||Q'*r||=||overlaps|| */ /* ------------------------------------------------------------------ */ for (i=0; i < numToProject; i++) { /* || res - (I-QQ')res || */ rwork[i] = Num_dot_dprimme(dimEvecs, &overlaps[dimEvecs*i], 1, &overlaps[dimEvecs*i], 1); /* || (I-QQ')res || */ rwork[i+numToProject] = Num_dot_dprimme(n, &W[(basisSize+start+i)*n], 1, &W[(basisSize+start+i)*n], 1); } /* global sum ||overlaps|| and ||(I-QQ')r|| */ count = 2*numToProject; (*primme->globalSumDouble)(rwork, &rwork[count], &count, primme); /* ------------------------------------------------------------------ */ /* For each projected residual check whether there is an accuracy */ /* problem and, if so, declare it PRACTICALLY_CONVERGED to lock later.*/ /* normDiff is a lower bound to the attainable accuracy for this pair */ /* so problems exist only if normDiff > tol. Then, we stop if Tol is */ /* the geometric mean of normPr and r. */ /* ------------------------------------------------------------------ */ for (i=start; i < start+numToProject; i++) { normDiff = sqrt(rwork[i-start]); normPr = sqrt(rwork[i-start+numToProject]); /* printf(" R Pr |R-Pr| %e %e %e \n", blockNorms[i],normPr,normDiff); */ if (normDiff >= tol && normPr < tol*tol/blockNorms[i]/2) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, " PRACTICALLY_CONVERGED %d norm(I-QQt)r %e bound %e\n", iev[i],normPr,tol*tol/normDiff); fflush(primme->outputFile); } flags[iev[i]] = PRACTICALLY_CONVERGED; (*numVacancies)++; if (numLocked + iev[i] < primme->numEvals){ recentlyConverged++; } } /* if practically converged */ } /* for each projected residual */ /* ------------------------------------------------------------------ */ /* Finally swap all practically converged toward the end of the block */ /* to be contiguous with the converged ones. Swap all relevant arrays */ /* ------------------------------------------------------------------ */ start = blockSize - *numVacancies; swap_UnconvVecs(V, W, primme->nLocal, basisSize, iev, flags, blockNorms, dimEvecs, blockSize, start); }
int update_projection_dprimme(double *X, int ldX, double *Y, int ldY, double *Z, int ldZ, int nLocal, int numCols, int blockSize, double *rwork, int lrwork, int isSymmetric, primme_params *primme) { int count, count_doubles, m; double tpone = +1.0e+00, tzero = +0.0e+00; /* -------------------------- */ /* Return memory requirements */ /* -------------------------- */ if (X == NULL) { return (numCols+blockSize)*numCols*2 + (isSymmetric ? 0 : blockSize*numCols*2); } assert(ldX >= nLocal && ldY >= nLocal && ldZ >= numCols+blockSize); /* ------------ */ /* Quick return */ /* ------------ */ if (blockSize <= 0) return 0; /* --------------------------------------------------------------------- */ /* Grow Z by blockSize number of rows and columns all at once */ /* --------------------------------------------------------------------- */ m = numCols+blockSize; Num_gemm_dprimme("C", "N", m, blockSize, nLocal, tpone, X, ldX, &Y[ldY*numCols], ldY, tzero, &Z[ldZ*numCols], ldZ); /* -------------------------------------------------------------- */ /* Alternative to the previous call: */ /* Compute next the additional rows of each new column vector. */ /* Only the upper triangular portion is computed and stored. */ /* -------------------------------------------------------------- */ /* for (j = numCols; j < numCols+blockSize; j++) { Num_gemv_dprimme("C", primme->nLocal, j-numCols+1, tpone, &X[primme->nLocal*numCols], primme->nLocal, &Y[primme->nLocal*j], 1, tzero, &rwork[maxCols*(j-numCols)+numCols], 1); } */ if (!isSymmetric) { Num_gemm_dprimme("C", "N", blockSize, numCols, nLocal, tpone, &X[ldX*numCols], ldX, Y, ldY, tzero, &Z[numCols], ldZ); } if (primme->numProcs > 1 && isSymmetric) { /* --------------------------------------------------------------------- */ /* Reduce the upper triangular part of the new columns in Z. */ /* --------------------------------------------------------------------- */ Num_copy_trimatrix_compact_dprimme(&Z[ldZ*numCols], m, blockSize, ldZ, numCols, rwork, &count); assert(count*2 <= lrwork); count_doubles = count; primme->globalSumDouble(rwork, (double*)&rwork[count], &count_doubles, primme); Num_copy_compact_trimatrix_dprimme(&rwork[count], m, blockSize, numCols, &Z[ldZ*numCols], ldZ); } else if (primme->numProcs > 1 && !isSymmetric) { /* --------------------------------------------------------------------- */ /* Reduce Z(:,numCols:end) and Z(numCols:end,:). */ /* --------------------------------------------------------------------- */ Num_copy_matrix_dprimme(&Z[ldZ*numCols], m, blockSize, ldZ, rwork, m); Num_copy_matrix_dprimme(&Z[numCols], blockSize, numCols, ldZ, &rwork[m*blockSize], blockSize); count = m*blockSize+blockSize*numCols; assert(count*2 <= lrwork); count_doubles = count; primme->globalSumDouble(rwork, (double*)&rwork[count], &count_doubles, primme); Num_copy_matrix_dprimme(&rwork[count], m, blockSize, ldZ, &Z[ldZ*numCols], m); Num_copy_matrix_dprimme(&rwork[count+m*blockSize], blockSize, numCols, ldZ, &Z[numCols], blockSize); } return 0; }