static double dist_dot(double *x, int incx, double *y, int incy, primme_params *primme) { double temp, product; int count; temp = Num_dot_dprimme(primme->nLocal, x, incx, y, incy); count = 1; (*primme->globalSumDouble)(&temp, &product, &count, primme); return product; }
static void compute_resnorms(double *V, double *W, double *hVecs, double *hVals, int basisSize, double *blockNorms, int *iev, int left, int right, void *rwork, primme_params *primme) { int i; /* Loop variable */ int numResiduals; /* Number of residual vectors to be computed */ double *dwork = (double *) rwork; /* pointer casting rwork to double */ double ztmp; /* temp var holding shift */ double tpone = +1.0e+00, tzero = +0.0e+00; /* constants */ numResiduals = right - left + 1; /* We want to compute residuals r = Ax-hVal*x for the Ritz vectors x */ /* Eqivalently, r = A*V*hVec - hval*V*hVec = W*hVec - hVal*V*hVec. */ /* Compute the Ritz vectors */ Num_gemm_dprimme("N", "N", primme->nLocal, numResiduals, basisSize, tpone, V, primme->nLocal, hVecs, basisSize, tzero, &V[primme->nLocal*(basisSize+left)], primme->nLocal); /* Compute W*hVecs */ Num_gemm_dprimme("N", "N", primme->nLocal, numResiduals, basisSize, tpone, W, primme->nLocal, hVecs, basisSize, tzero, &W[primme->nLocal*(basisSize+left)], primme->nLocal); /* Compute the residuals */ for (i=left; i <= right; i++) { ztmp = -hVals[iev[i]]; Num_axpy_dprimme(primme->nLocal, ztmp, &V[primme->nLocal*(basisSize+i)], 1, &W[primme->nLocal*(basisSize+i)], 1); } /* Compute the residual norms */ for (i=left; i <= right; i++) { dwork[i] = Num_dot_dprimme(primme->nLocal, &W[primme->nLocal*(basisSize+i)], 1, &W[primme->nLocal*(basisSize+i)], 1); } (*primme->globalSumDouble)(&dwork[left], &blockNorms[left], &numResiduals, primme); for (i=left; i <= right; i++) { blockNorms[i] = sqrt(blockNorms[i]); } }
int lock_vectors_dprimme(double tol, double *aNormEstimate, double *maxConvTol, int *basisSize, int *numLocked, int *numGuesses, int *nextGuess, double *V, double *W, double *H, double *evecsHat, double *M, double *UDU, int *ipivot, double *hVals, double *hVecs, double *evecs, double *evals, int *perm, double machEps, double *resNorms, int *numPrevRitzVals, double *prevRitzVals, int *flag, double *rwork, int rworkSize, int *iwork, int *LockingProblem, primme_params *primme) { int i; /* Loop counter */ int numCandidates; /* Number of targeted Ritz vectors converged before */ /* restart. */ int newStart; /* Index in evecs where the locked vectors were added */ int numNewVectors; /* Number of vectors added to the basis to replace */ /* locked vectors. */ int candidate; /* Index of Ritz vector to be checked for convergence */ int numDeflated; /* The number of vectors actually locked */ int numReplaced; /* The number of locked vectors that were replaced by */ /* initial guesses. */ int numRecentlyLocked; /* Number of vectors locked. */ int evecsSize; /* The number of orthogonalization constraints plus */ /* the number of locked vectors. */ int ret; /* Used to store return values. */ int workinW; /* Flag whether an active W vector is used as tempwork*/ int entireSpace = (*basisSize+*numLocked >= primme->n); /* bool if entire*/ /* space is built, so current ritzvecs are accurate. */ double *norms, *tnorms; /* Array of residual norms, and temp array */ double attainableTol; /* Used to verify a practical convergence problem*/ double *residual; /* Stores residual vector */ double ztmp; /* temp variable */ /* ----------------------------------------*/ /* Assign temporary work space for residual*/ /* ----------------------------------------*/ if (*basisSize < primme->maxBasisSize) { /* compute residuals in the next open slot of W */ residual = &W[*basisSize*primme->nLocal]; workinW = 0; } else { /* This basiSize==maxBasisSize, immediately after restart, can only occur * if the basisSize + numLocked = n (at which case we lock everything) * OR if (numConverged + restartSize + numPrevRetain > basisSize), ie. * too many converged. Since we do not know which evec will be locked * we use the W[LAST] as temporary space, but only after W[LAST] has * been used to compute residual(LAST) -the while loop starts from LAST. * After all lockings, if the LAST evec was not locked, we must * recompute W[LAST]=Av. This matvec event is extremely infrequent */ residual = &W[(*basisSize-1)*primme->nLocal]; workinW = 1; } /* -------------------------------------*/ /* Set the tolerance, and attainableTol */ /* -------------------------------------*/ if (primme->aNorm <= 0.0L) { tol = tol * (*aNormEstimate); } attainableTol=max(tol,sqrt(primme->numOrthoConst+*numLocked)*(*maxConvTol)); /* -------------------------------------------------------- */ /* Determine how many Ritz vectors converged before restart */ /* -------------------------------------------------------- */ i = *basisSize - 1; while ((flag[i] == LOCK_IT ||flag[i] == UNCONDITIONAL_LOCK_IT) && i >= 0) { i--; } numCandidates = *basisSize - i - 1; if (numCandidates == 0) { return 0; } /* --------------------------------- */ /* Compute residuals and their norms */ /* --------------------------------- */ tnorms = (double *) rwork; norms = tnorms + numCandidates; for (i = *basisSize-1, candidate = numCandidates-1; i >= *basisSize-numCandidates; i--, candidate--) { Num_dcopy_dprimme(primme->nLocal, &W[primme->nLocal*i], 1, residual, 1); ztmp = -hVals[i]; Num_axpy_dprimme(primme->nLocal, ztmp, &V[primme->nLocal*i],1,residual,1); tnorms[candidate] = Num_dot_dprimme(primme->nLocal,residual,1,residual,1); } /* Global sum the dot products */ (*primme->globalSumDouble)(tnorms, norms, &numCandidates, primme); numRecentlyLocked = 0; /* ------------------------------------------------------------------- */ /* Check the convergence of each residual norm. If the Ritz vector is */ /* converged, then lock it. */ /* ------------------------------------------------------------------- */ for (i = *basisSize - numCandidates, candidate = 0; i < *basisSize; i++, candidate++) { norms[candidate] = sqrt(norms[candidate]); /* If the vector has become (regularly or practically) unconverged, */ /* then flag it, else lock it and replace it with an initial guess, */ /* if one is available. Exception: If the entire space is spanned, */ /* we can't do better, so lock it. */ if ((flag[i]!=UNCONDITIONAL_LOCK_IT && norms[candidate] >= tol && !entireSpace ) || (flag[i]==UNCONDITIONAL_LOCK_IT && norms[candidate] >= attainableTol && !entireSpace )) { flag[i] = UNCONVERGED; } else { /* If an unconditional lock has become converged, show it and */ /* record the max converged tolerance accordingly */ if (norms[candidate]<tol) { flag[i]=LOCK_IT; *maxConvTol = max(*maxConvTol, tol); } else { *maxConvTol = max(*maxConvTol, norms[candidate]); *LockingProblem = 1; } if (primme->printLevel >= 2 && primme->procID == 0) { fprintf(primme->outputFile, "Lock epair[ %d ]= %e norm %.4e Mvecs %d Time %.4e Flag %d\n", *numLocked+1, hVals[i], norms[candidate], primme->stats.numMatvecs,primme_wTimer(0),flag[i]); fflush(primme->outputFile); } /* Copy the converged Ritz vector to the evecs array and */ /* insert the converged Ritz value in sorted order within */ /* the evals array. */ Num_dcopy_dprimme(primme->nLocal, &V[primme->nLocal*i], 1, &evecs[primme->nLocal*(primme->numOrthoConst + *numLocked)], 1); insertionSort(hVals[i], evals, norms[candidate], resNorms, perm, *numLocked, primme); /* If there are any initial guesses remaining, then copy it */ /* into the basis, else flag the vector as locked so it may */ /* be discarded later. */ if (*numGuesses > 0) { Num_dcopy_dprimme(primme->nLocal, &evecs[primme->nLocal*(*nextGuess)], 1, &V[primme->nLocal*i], 1); flag[i] = INITIAL_GUESS; *numGuesses = *numGuesses - 1; *nextGuess = *nextGuess + 1; } else { flag[i] = LOCKED; } *numLocked = *numLocked + 1; numRecentlyLocked++; } } evecsSize = primme->numOrthoConst + *numLocked; /* -------------------------------------------------------------------- */ /* If a W vector was used as workspace for residual AND its evec has */ /* not been locked out, recompute it, W = A*v. This is rare. */ /* -------------------------------------------------------------------- */ if (workinW && flag[*basisSize-1] != LOCKED) { update_W_dprimme(V, W, *basisSize-1, 1, primme); } /* -------------------------------------------------------------------- */ /* Return IF all target Ritz vectors have been locked, ELSE update the */ /* evecsHat array by applying the preconditioner (if preconditioning is */ /* needed, and JDQMR with right, skew Q projector is applied */ /* -------------------------------------------------------------------- */ if (*numLocked >= primme->numEvals) { return 0; } else if (UDU != NULL) { /* Compute K^{-1}x for all newly locked eigenvectors */ newStart = primme->nLocal*(evecsSize - numRecentlyLocked); (*primme->applyPreconditioner)( &evecs[newStart], &evecsHat[newStart], &numRecentlyLocked, primme); primme->stats.numPreconds += numRecentlyLocked; /* Update the projection evecs'*evecsHat now that evecs and evecsHat */ /* have been expanded by numRecentlyLocked columns. Required */ /* workspace is numLocked*numEvals. The most ever needed would be */ /* maxBasisSize*numEvals. */ update_projection_dprimme(evecs, evecsHat, M, evecsSize-numRecentlyLocked, primme->numOrthoConst+primme->numEvals, numRecentlyLocked, rwork, primme); ret = UDUDecompose_dprimme(M, UDU, ipivot, evecsSize, rwork, rworkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_lock_vectors, Primme_ududecompose, ret, __FILE__, __LINE__, primme); return UDUDECOMPOSE_FAILURE; } } /* --------------------------------------------------------------------- */ /* Swap, towards the end of the basis, vectors that were locked but not */ /* replaced by new initial guesses. */ /* --------------------------------------------------------------------- */ numDeflated = swap_flagVecs_toEnd(*basisSize, LOCKED, V, W, H, hVals, flag, primme); /* --------------------------------------------------------------------- */ /* Reduce the basis size by numDeflated and swap the new initial guesses */ /* towards the end of the basis. */ /* --------------------------------------------------------------------- */ numReplaced = swap_flagVecs_toEnd(*basisSize-numDeflated, INITIAL_GUESS, V, W, H, hVals, flag, primme); *basisSize = *basisSize - (numDeflated + numReplaced); if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "numDeflated: %d numReplaced: %d \ basisSize: %d\n", numDeflated, numReplaced, *basisSize); }
/******************************************************************************* * Subroutine check_practical_convergence() * * This function is called after swaping has pushed converged (C) * and to be projected (P) evecs at the end of blockSize. * Makes C, P contiguous, projects P, checks and finds practically * converged evecs (p), and makes p contiguous to C, updating * flags[], blockNorms[], and numVacancies. Eg: * * blockSize * |------CPCCPPCP| -> |------PPPPCCCC| -> |------PpPpCCCC| -> * |------PPppCCCC|, numVacancies = 6 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * INPUT ARRAYS AND PARAMETERS * --------------------------- * evecs The locked eigenvectors * numLocked The number of locked eigenvectors * basisSize Number of vectors in the basis * blockSize The number of block vectors * start Starting index in V,W of vectors converged or to be projected * numToProject The number of vectors to project. * tol The required convergence tolerance * rwork real work array of size: 2*maxEvecsSize*primme->maxBlockSize * primme Structure containing various solver parameters * * * OUTPUT ARRAYS AND PARAMETERS * ---------------------------- * V The basis vectors * W A*V * iev Indicates which Ritz value each block vector corresponds to * flags Indicates which Ritz pairs have converged * blockNorms The norms of the block vectors to be targeted * recentlyConverged Number of converged vectors in the whole basis V * = converged+practicallyConverged * numVacancies Number of Ritz values between left and right that were * declared converged or practicallyConverged. [left, right] * can be smaller than the whole V. See while-loop in * check_convergence. * left, right Indices indicating which vectors are to be replaced * ievMax Index of the next Ritz value to be targeted by the block ******************************************************************************/ static void check_practical_convergence(double *V, double *W, double *evecs, int numLocked, int basisSize, int blockSize, int start, int numToProject, int *iev, int *flags, double *blockNorms, double tol, int *recentlyConverged, int *numVacancies, double *rwork, primme_params *primme) { int i, n, dimEvecs; int count; double normPr; double normDiff; double *overlaps; /* Pointer in rwork to keep Q'*r */ double tpone = +1.0e+00, tzero = +0.0e+00, tmone = -1.0e+00; /* constants */ /* convenience variables */ n = primme->nLocal; dimEvecs = primme->numOrthoConst + numLocked; /* Subdivide rwork */ overlaps = rwork + dimEvecs*primme->maxBlockSize; /* --------------------------------------------------------------- */ /* Reset any TO_BE_PROJECTED flags back to UNCONVERGED, and swap */ /* again CONVERGED flags toward the end of the block. Now swapping */ /* occurs in the block basisSize+[start:blockSize]: */ /* [ . . . . . . . P P P P C C C C ] blockSize */ /* start^ ^start+numToProject */ /* --------------------------------------------------------------- */ for (i=start; i < blockSize; i++) if (flags[iev[i]] == TO_BE_PROJECTED) flags[iev[i]] = UNCONVERGED; if (*numVacancies > 0) swap_UnconvVecs(V, W, primme->nLocal, basisSize, iev, flags, blockNorms, dimEvecs, blockSize, start); /* ------------------------------------------------------------------ */ /* Project the numToProject residuals agaist (I-evecs*evecs') */ /* ------------------------------------------------------------------ */ /* overlaps = evecs'*residuals */ Num_gemm_dprimme("C", "N", dimEvecs, numToProject, n, tpone, evecs, n, &W[(basisSize+start)*n], n, tzero, rwork, dimEvecs); count = dimEvecs*numToProject; (*primme->globalSumDouble)(rwork, overlaps, &count, primme); /* residuals = residuals - evecs*overlaps */ Num_gemm_dprimme("N", "N", n, numToProject, dimEvecs, tmone, evecs, n, overlaps, dimEvecs, tpone, &W[(basisSize+start)*n], n); /* ------------------------------------------------------------------ */ /* Compute norms^2 of the projected res and the differences from res */ /* note: ||residual - (I-QQ')residual||=||Q'*r||=||overlaps|| */ /* ------------------------------------------------------------------ */ for (i=0; i < numToProject; i++) { /* || res - (I-QQ')res || */ rwork[i] = Num_dot_dprimme(dimEvecs, &overlaps[dimEvecs*i], 1, &overlaps[dimEvecs*i], 1); /* || (I-QQ')res || */ rwork[i+numToProject] = Num_dot_dprimme(n, &W[(basisSize+start+i)*n], 1, &W[(basisSize+start+i)*n], 1); } /* global sum ||overlaps|| and ||(I-QQ')r|| */ count = 2*numToProject; (*primme->globalSumDouble)(rwork, &rwork[count], &count, primme); /* ------------------------------------------------------------------ */ /* For each projected residual check whether there is an accuracy */ /* problem and, if so, declare it PRACTICALLY_CONVERGED to lock later.*/ /* normDiff is a lower bound to the attainable accuracy for this pair */ /* so problems exist only if normDiff > tol. Then, we stop if Tol is */ /* the geometric mean of normPr and r. */ /* ------------------------------------------------------------------ */ for (i=start; i < start+numToProject; i++) { normDiff = sqrt(rwork[i-start]); normPr = sqrt(rwork[i-start+numToProject]); /* printf(" R Pr |R-Pr| %e %e %e \n", blockNorms[i],normPr,normDiff); */ if (normDiff >= tol && normPr < tol*tol/blockNorms[i]/2) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, " PRACTICALLY_CONVERGED %d norm(I-QQt)r %e bound %e\n", iev[i],normPr,tol*tol/normDiff); fflush(primme->outputFile); } flags[iev[i]] = PRACTICALLY_CONVERGED; (*numVacancies)++; if (numLocked + iev[i] < primme->numEvals){ recentlyConverged++; } } /* if practically converged */ } /* for each projected residual */ /* ------------------------------------------------------------------ */ /* Finally swap all practically converged toward the end of the block */ /* to be contiguous with the converged ones. Swap all relevant arrays */ /* ------------------------------------------------------------------ */ start = blockSize - *numVacancies; swap_UnconvVecs(V, W, primme->nLocal, basisSize, iev, flags, blockNorms, dimEvecs, blockSize, start); }