static void apply_projected_matrix(double *v, double shift, double *Q, int dimQ, double *result, double *rwork, primme_params *primme) { int ONE = 1; /* For passing it by reference in matrixMatvec */ (*primme->matrixMatvec)(v, result, &ONE, primme); Num_axpy_dprimme(primme->nLocal, -shift, v, 1, result, 1); if (dimQ > 0) apply_projector(Q, dimQ, result, rwork, primme); primme->stats.numMatvecs += 1; }
static void compute_resnorms(double *V, double *W, double *hVecs, double *hVals, int basisSize, double *blockNorms, int *iev, int left, int right, void *rwork, primme_params *primme) { int i; /* Loop variable */ int numResiduals; /* Number of residual vectors to be computed */ double *dwork = (double *) rwork; /* pointer casting rwork to double */ double ztmp; /* temp var holding shift */ double tpone = +1.0e+00, tzero = +0.0e+00; /* constants */ numResiduals = right - left + 1; /* We want to compute residuals r = Ax-hVal*x for the Ritz vectors x */ /* Eqivalently, r = A*V*hVec - hval*V*hVec = W*hVec - hVal*V*hVec. */ /* Compute the Ritz vectors */ Num_gemm_dprimme("N", "N", primme->nLocal, numResiduals, basisSize, tpone, V, primme->nLocal, hVecs, basisSize, tzero, &V[primme->nLocal*(basisSize+left)], primme->nLocal); /* Compute W*hVecs */ Num_gemm_dprimme("N", "N", primme->nLocal, numResiduals, basisSize, tpone, W, primme->nLocal, hVecs, basisSize, tzero, &W[primme->nLocal*(basisSize+left)], primme->nLocal); /* Compute the residuals */ for (i=left; i <= right; i++) { ztmp = -hVals[iev[i]]; Num_axpy_dprimme(primme->nLocal, ztmp, &V[primme->nLocal*(basisSize+i)], 1, &W[primme->nLocal*(basisSize+i)], 1); } /* Compute the residual norms */ for (i=left; i <= right; i++) { dwork[i] = Num_dot_dprimme(primme->nLocal, &W[primme->nLocal*(basisSize+i)], 1, &W[primme->nLocal*(basisSize+i)], 1); } (*primme->globalSumDouble)(&dwork[left], &blockNorms[left], &numResiduals, primme); for (i=left; i <= right; i++) { blockNorms[i] = sqrt(blockNorms[i]); } }
static int apply_skew_projector(double *Q, double *Qhat, double *UDU, int *ipivot, int numCols, double *v, double *rwork, primme_params *primme) { int count; double tpone = +1.0e+00, tzero = +0.0e+00, tmone = -1.0e+00; if (numCols > 0) { /* there is a projector to be applied */ int ret; double *overlaps; /* overlaps of v with columns of Q */ double *workSpace; /* Used for computing local overlaps */ overlaps = rwork; workSpace = overlaps + numCols; /* --------------------------------------------------------*/ /* Treat the one vector case with BLAS 1 calls */ /* --------------------------------------------------------*/ if (numCols == 1) { /* Compute workspace = Q'*v */ overlaps[0] = dist_dot(Q, 1, v, 1, primme); /* Backsolve only if there is a skew projector */ if (UDU != NULL) { if (UDU[0] == 0.0L) { return UDUSOLVE_FAILURE; } overlaps[0] = overlaps[0]/UDU[0]; } /* Compute v=v-Qhat*overlaps */ Num_axpy_dprimme(primme->nLocal, -overlaps[0], Qhat, 1, v, 1); } else { /* ------------------------------------------------------*/ /* More than one vectors. Use BLAS 2. */ /* ------------------------------------------------------*/ /* Compute workspace = Q'*v */ Num_gemv_dprimme("C", primme->nLocal, numCols, tpone, Q, primme->nLocal, v, 1, tzero, workSpace, 1); /* Global sum: overlaps = Q'*v */ count = numCols; (*primme->globalSumDouble)(workSpace, overlaps, &count, primme); /* --------------------------------------------*/ /* Backsolve only if there is a skew projector */ /* --------------------------------------------*/ if (UDU != NULL) { /* Solve (Q'Qhat)^{-1}*workSpace = overlaps = Q'*v for alpha by */ /* backsolving with the UDU decomposition. */ ret = UDUSolve_dprimme(UDU, ipivot, numCols, overlaps, workSpace); if (ret != 0) { primme_PushErrorMessage(Primme_apply_skew_projector, Primme_udusolve, ret, __FILE__, __LINE__, primme); return UDUSOLVE_FAILURE; } /* Compute v=v-Qhat*workspace */ Num_gemv_dprimme("N", primme->nLocal, numCols, tmone, Qhat, primme->nLocal, workSpace, 1, tpone, v, 1); } else { /* Compute v=v-Qhat*overlaps */ Num_gemv_dprimme("N", primme->nLocal, numCols, tmone, Qhat, primme->nLocal, overlaps, 1, tpone, v, 1); } /* UDU==null */ } /* numCols != 1 */ } /* numCols > 0 */ return 0; }
int inner_solve_dprimme(double *x, double *r, double *rnorm, double *evecs, double *evecsHat, double *UDU, int *ipivot, double *xKinvx, double *Lprojector, double *RprojectorQ, double *RprojectorX, int sizeLprojector, int sizeRprojectorQ, int sizeRprojectorX, double *sol, double eval, double shift, double eresTol, double aNormEstimate, double machEps, double *rwork, int rworkSize, primme_params *primme) { int i; /* loop variable */ int workSpaceSize; /* Size of local work array. */ int numIts; /* Number of inner iterations */ int ret; /* Return value used for error checking. */ int maxIterations; /* The maximum # iterations allowed. Depends on primme */ double *workSpace; /* Workspace needed by UDU routine */ /* QMR parameters */ double *g, *d, *delta, *w, *ptmp; double alpha_prev, beta, rho_prev, rho; double Theta_prev, Theta, c, sigma_prev, tau_init, tau_prev, tau; double ztmp; /* Parameters used to dynamically update eigenpair */ double Beta, Delta, Psi, Beta_prev, Delta_prev, Psi_prev, eta; double dot_sol, eval_updated, eval_prev, eres2_updated, eres_updated, R; double Gamma_prev, Phi_prev; double Gamma, Phi; double gamma; /* The convergence criteria of the inner linear system must satisfy: */ /* || current residual || <= relativeTolerance * || initial residual || */ /* + absoluteTol */ double relativeTolerance; double absoluteTolerance; double LTolerance, ETolerance; /* Some constants */ double tpone = +1.0e+00, tzero = +0.0e+00; /* -------------------------------------------*/ /* Subdivide the workspace into needed arrays */ /* -------------------------------------------*/ g = rwork; d = g + primme->nLocal; delta = d + primme->nLocal; w = delta + primme->nLocal; workSpace = w + primme->nLocal; /* This needs at least 2*numOrth+NumEvals) */ workSpaceSize = rworkSize - (workSpace - rwork); /* -----------------------------------------*/ /* Set up convergence criteria by Tolerance */ /* -----------------------------------------*/ if (primme->aNorm <= 0.0L) { absoluteTolerance = aNormEstimate*machEps; eresTol = eresTol*aNormEstimate; } else { absoluteTolerance = primme->aNorm*machEps; } tau_prev = tau_init = *rnorm; /* Assumes zero initial guess */ LTolerance = eresTol; /* Andreas: note that eigenresidual tol may not be achievable, because we */ /* iterate on P(A-s)P not (A-s). But tau reflects linSys on P(A-s)P. */ if (primme->correctionParams.convTest == primme_adaptive) { ETolerance = max(eresTol/1.8L, absoluteTolerance); LTolerance = ETolerance; } else if (primme->correctionParams.convTest == primme_adaptive_ETolerance) { LTolerance = max(eresTol/1.8L, absoluteTolerance); ETolerance = max(tau_init*0.1L, LTolerance); } else if (primme->correctionParams.convTest == primme_decreasing_LTolerance) { relativeTolerance = pow(primme->correctionParams.relTolBase, (double)-primme->stats.numOuterIterations); LTolerance = relativeTolerance * tau_init + absoluteTolerance + eresTol; /*printf(" RL %e INI %e abso %e LToler %e aNormEstimate %e \n", */ /*relativeTolerance, tau_init, absoluteTolerance,LTolerance,aNormEstimate);*/ } /* --------------------------------------------------------*/ /* Set up convergence criteria by max number of iterations */ /* --------------------------------------------------------*/ /* compute first total number of remaining matvecs */ maxIterations = primme->maxMatvecs - primme->stats.numMatvecs; /* Perform primme.maxInnerIterations, but do not exceed total remaining */ if (primme->correctionParams.maxInnerIterations > 0) { maxIterations = min(primme->correctionParams.maxInnerIterations, maxIterations); } /* --------------------------------------------------------*/ /* Rest of initializations */ /* --------------------------------------------------------*/ /* Assume zero initial guess */ Num_dcopy_dprimme(primme->nLocal, r, 1, g, 1); ret = apply_projected_preconditioner(g, evecs, RprojectorQ, x, RprojectorX, sizeRprojectorQ, sizeRprojectorX, xKinvx, UDU, ipivot, d, workSpace, primme); if (ret != 0) { primme_PushErrorMessage(Primme_inner_solve, Primme_apply_projected_preconditioner, ret, __FILE__, __LINE__, primme); return APPLYPROJECTEDPRECONDITIONER_FAILURE; } Theta_prev = 0.0L; eval_prev = eval; rho_prev = dist_dot(g, 1, d, 1, primme); /* Initialize recurrences used to dynamically update the eigenpair */ Beta_prev = Delta_prev = Psi_prev = 0.0L; Gamma_prev = Phi_prev = 0.0L; /* other initializations */ for (i = 0; i < primme->nLocal; i++) { delta[i] = tzero; sol[i] = tzero; } numIts = 0; /*----------------------------------------------------------------------*/ /*------------------------ Begin Inner Loop ----------------------------*/ /*----------------------------------------------------------------------*/ while (numIts < maxIterations) { apply_projected_matrix(d, shift, Lprojector, sizeLprojector, w, workSpace, primme); sigma_prev = dist_dot(d, 1, w, 1, primme); if (sigma_prev == 0.0L) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"Exiting because SIGMA %e\n",sigma_prev); } break; } alpha_prev = rho_prev/sigma_prev; if (fabs(alpha_prev) < machEps || fabs(alpha_prev) > 1.0L/machEps){ if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"Exiting because ALPHA %e\n",alpha_prev); } break; } Num_axpy_dprimme(primme->nLocal, -alpha_prev, w, 1, g, 1); Theta = dist_dot(g, 1, g, 1, primme); Theta = sqrt(Theta); Theta = Theta/tau_prev; c = 1.0L/sqrt(1+Theta*Theta); tau = tau_prev*Theta*c; gamma = c*c*Theta_prev*Theta_prev; eta = alpha_prev*c*c; for (i = 0; i < primme->nLocal; i++) { delta[i] = gamma*delta[i] + eta*d[i]; sol[i] = delta[i]+sol[i]; } numIts++; if (fabs(rho_prev) == 0.0L ) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"Exiting because abs(rho) %e\n", fabs(rho_prev)); } break; } if (tau < LTolerance) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, " tau < LTol %e %e\n",tau, LTolerance); } break; } else if (primme->correctionParams.convTest == primme_adaptive_ETolerance || primme->correctionParams.convTest == primme_adaptive) { /* --------------------------------------------------------*/ /* Adaptive stopping based on dynamic monitoring of eResid */ /* --------------------------------------------------------*/ /* Update the Ritz value and eigenresidual using the */ /* following recurrences. */ Delta = gamma*Delta_prev + eta*rho_prev; Beta = Beta_prev - Delta; Phi = gamma*gamma*Phi_prev + eta*eta*sigma_prev; Psi = gamma*Psi_prev + gamma*Phi_prev; Gamma = Gamma_prev + 2.0L*Psi + Phi; /* Perform the update: update the eigenvalue and the square of the */ /* residual norm. */ dot_sol = dist_dot(sol, 1, sol, 1, primme); eval_updated = shift + (eval - shift + 2*Beta + Gamma)/(1 + dot_sol); eres2_updated = (tau*tau)/(1 + dot_sol) + ((eval - shift + Beta)*(eval - shift + Beta))/(1 + dot_sol) - (eval_updated - shift)*(eval_updated - shift); /* If numerical problems, let eres about the same as tau */ if (eres2_updated < 0){ eres_updated = sqrt( (tau*tau)/(1 + dot_sol) ); } else eres_updated = sqrt(eres2_updated); /* --------------------------------------------------------*/ /* Stopping criteria */ /* --------------------------------------------------------*/ R = max(0.9878, sqrt(tau/tau_prev))*sqrt(1+dot_sol); if ( tau <= R*eres_updated || eres_updated <= tau*R ) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, " tau < R eres \n"); } break; } if (primme->target == primme_smallest && eval_updated > eval_prev) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "eval_updated > eval_prev\n"); } break; } else if (primme->target == primme_largest && eval_updated < eval_prev){ if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "eval_updated < eval_prev\n"); } break; } if (eres_updated < ETolerance) { /* tau < LTol has been checked */ if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "eres < eresTol %e \n",eres_updated); } break; } eval_prev = eval_updated; if (primme->printLevel >= 4 && primme->procID == 0) { fprintf(primme->outputFile, "INN MV %d Sec %e Eval %e Lin|r| %.3e EV|r| %.3e\n", primme->stats. numMatvecs, primme_wTimer(0), eval_updated, tau, eres_updated); fflush(primme->outputFile); } /* --------------------------------------------------------*/ } /* End of if adaptive JDQMR section */ /* --------------------------------------------------------*/ else if (primme->printLevel >= 4 && primme->procID == 0) { /* Report for non adaptive inner iterations */ fprintf(primme->outputFile, "INN MV %d Sec %e Lin|r| %e\n", primme->stats.numMatvecs, primme_wTimer(0),tau); fflush(primme->outputFile); } if (numIts < maxIterations) { ret = apply_projected_preconditioner(g, evecs, RprojectorQ, x, RprojectorX, sizeRprojectorQ, sizeRprojectorX, xKinvx, UDU, ipivot, w, workSpace, primme); if (ret != 0) { primme_PushErrorMessage(Primme_inner_solve, Primme_apply_projected_preconditioner, ret, __FILE__, __LINE__, primme); ret = APPLYPROJECTEDPRECONDITIONER_FAILURE; break; } rho = dist_dot(g, 1, w, 1, primme); beta = rho/rho_prev; Num_axpy_dprimme(primme->nLocal, beta, d, 1, w, 1); /* Alternate between w and d buffers in successive iterations * This saves a memory copy. */ ptmp = d; d = w; w = ptmp; rho_prev = rho; tau_prev = tau; Theta_prev = Theta; Delta_prev = Delta; Beta_prev = Beta; Phi_prev = Phi; Psi_prev = Psi; Gamma_prev = Gamma; } /* --------------------------------------------------------*/ } /* End of QMR main while loop */ /* --------------------------------------------------------*/ *rnorm = eres_updated; return 0; }
int lock_vectors_dprimme(double tol, double *aNormEstimate, double *maxConvTol, int *basisSize, int *numLocked, int *numGuesses, int *nextGuess, double *V, double *W, double *H, double *evecsHat, double *M, double *UDU, int *ipivot, double *hVals, double *hVecs, double *evecs, double *evals, int *perm, double machEps, double *resNorms, int *numPrevRitzVals, double *prevRitzVals, int *flag, double *rwork, int rworkSize, int *iwork, int *LockingProblem, primme_params *primme) { int i; /* Loop counter */ int numCandidates; /* Number of targeted Ritz vectors converged before */ /* restart. */ int newStart; /* Index in evecs where the locked vectors were added */ int numNewVectors; /* Number of vectors added to the basis to replace */ /* locked vectors. */ int candidate; /* Index of Ritz vector to be checked for convergence */ int numDeflated; /* The number of vectors actually locked */ int numReplaced; /* The number of locked vectors that were replaced by */ /* initial guesses. */ int numRecentlyLocked; /* Number of vectors locked. */ int evecsSize; /* The number of orthogonalization constraints plus */ /* the number of locked vectors. */ int ret; /* Used to store return values. */ int workinW; /* Flag whether an active W vector is used as tempwork*/ int entireSpace = (*basisSize+*numLocked >= primme->n); /* bool if entire*/ /* space is built, so current ritzvecs are accurate. */ double *norms, *tnorms; /* Array of residual norms, and temp array */ double attainableTol; /* Used to verify a practical convergence problem*/ double *residual; /* Stores residual vector */ double ztmp; /* temp variable */ /* ----------------------------------------*/ /* Assign temporary work space for residual*/ /* ----------------------------------------*/ if (*basisSize < primme->maxBasisSize) { /* compute residuals in the next open slot of W */ residual = &W[*basisSize*primme->nLocal]; workinW = 0; } else { /* This basiSize==maxBasisSize, immediately after restart, can only occur * if the basisSize + numLocked = n (at which case we lock everything) * OR if (numConverged + restartSize + numPrevRetain > basisSize), ie. * too many converged. Since we do not know which evec will be locked * we use the W[LAST] as temporary space, but only after W[LAST] has * been used to compute residual(LAST) -the while loop starts from LAST. * After all lockings, if the LAST evec was not locked, we must * recompute W[LAST]=Av. This matvec event is extremely infrequent */ residual = &W[(*basisSize-1)*primme->nLocal]; workinW = 1; } /* -------------------------------------*/ /* Set the tolerance, and attainableTol */ /* -------------------------------------*/ if (primme->aNorm <= 0.0L) { tol = tol * (*aNormEstimate); } attainableTol=max(tol,sqrt(primme->numOrthoConst+*numLocked)*(*maxConvTol)); /* -------------------------------------------------------- */ /* Determine how many Ritz vectors converged before restart */ /* -------------------------------------------------------- */ i = *basisSize - 1; while ((flag[i] == LOCK_IT ||flag[i] == UNCONDITIONAL_LOCK_IT) && i >= 0) { i--; } numCandidates = *basisSize - i - 1; if (numCandidates == 0) { return 0; } /* --------------------------------- */ /* Compute residuals and their norms */ /* --------------------------------- */ tnorms = (double *) rwork; norms = tnorms + numCandidates; for (i = *basisSize-1, candidate = numCandidates-1; i >= *basisSize-numCandidates; i--, candidate--) { Num_dcopy_dprimme(primme->nLocal, &W[primme->nLocal*i], 1, residual, 1); ztmp = -hVals[i]; Num_axpy_dprimme(primme->nLocal, ztmp, &V[primme->nLocal*i],1,residual,1); tnorms[candidate] = Num_dot_dprimme(primme->nLocal,residual,1,residual,1); } /* Global sum the dot products */ (*primme->globalSumDouble)(tnorms, norms, &numCandidates, primme); numRecentlyLocked = 0; /* ------------------------------------------------------------------- */ /* Check the convergence of each residual norm. If the Ritz vector is */ /* converged, then lock it. */ /* ------------------------------------------------------------------- */ for (i = *basisSize - numCandidates, candidate = 0; i < *basisSize; i++, candidate++) { norms[candidate] = sqrt(norms[candidate]); /* If the vector has become (regularly or practically) unconverged, */ /* then flag it, else lock it and replace it with an initial guess, */ /* if one is available. Exception: If the entire space is spanned, */ /* we can't do better, so lock it. */ if ((flag[i]!=UNCONDITIONAL_LOCK_IT && norms[candidate] >= tol && !entireSpace ) || (flag[i]==UNCONDITIONAL_LOCK_IT && norms[candidate] >= attainableTol && !entireSpace )) { flag[i] = UNCONVERGED; } else { /* If an unconditional lock has become converged, show it and */ /* record the max converged tolerance accordingly */ if (norms[candidate]<tol) { flag[i]=LOCK_IT; *maxConvTol = max(*maxConvTol, tol); } else { *maxConvTol = max(*maxConvTol, norms[candidate]); *LockingProblem = 1; } if (primme->printLevel >= 2 && primme->procID == 0) { fprintf(primme->outputFile, "Lock epair[ %d ]= %e norm %.4e Mvecs %d Time %.4e Flag %d\n", *numLocked+1, hVals[i], norms[candidate], primme->stats.numMatvecs,primme_wTimer(0),flag[i]); fflush(primme->outputFile); } /* Copy the converged Ritz vector to the evecs array and */ /* insert the converged Ritz value in sorted order within */ /* the evals array. */ Num_dcopy_dprimme(primme->nLocal, &V[primme->nLocal*i], 1, &evecs[primme->nLocal*(primme->numOrthoConst + *numLocked)], 1); insertionSort(hVals[i], evals, norms[candidate], resNorms, perm, *numLocked, primme); /* If there are any initial guesses remaining, then copy it */ /* into the basis, else flag the vector as locked so it may */ /* be discarded later. */ if (*numGuesses > 0) { Num_dcopy_dprimme(primme->nLocal, &evecs[primme->nLocal*(*nextGuess)], 1, &V[primme->nLocal*i], 1); flag[i] = INITIAL_GUESS; *numGuesses = *numGuesses - 1; *nextGuess = *nextGuess + 1; } else { flag[i] = LOCKED; } *numLocked = *numLocked + 1; numRecentlyLocked++; } } evecsSize = primme->numOrthoConst + *numLocked; /* -------------------------------------------------------------------- */ /* If a W vector was used as workspace for residual AND its evec has */ /* not been locked out, recompute it, W = A*v. This is rare. */ /* -------------------------------------------------------------------- */ if (workinW && flag[*basisSize-1] != LOCKED) { update_W_dprimme(V, W, *basisSize-1, 1, primme); } /* -------------------------------------------------------------------- */ /* Return IF all target Ritz vectors have been locked, ELSE update the */ /* evecsHat array by applying the preconditioner (if preconditioning is */ /* needed, and JDQMR with right, skew Q projector is applied */ /* -------------------------------------------------------------------- */ if (*numLocked >= primme->numEvals) { return 0; } else if (UDU != NULL) { /* Compute K^{-1}x for all newly locked eigenvectors */ newStart = primme->nLocal*(evecsSize - numRecentlyLocked); (*primme->applyPreconditioner)( &evecs[newStart], &evecsHat[newStart], &numRecentlyLocked, primme); primme->stats.numPreconds += numRecentlyLocked; /* Update the projection evecs'*evecsHat now that evecs and evecsHat */ /* have been expanded by numRecentlyLocked columns. Required */ /* workspace is numLocked*numEvals. The most ever needed would be */ /* maxBasisSize*numEvals. */ update_projection_dprimme(evecs, evecsHat, M, evecsSize-numRecentlyLocked, primme->numOrthoConst+primme->numEvals, numRecentlyLocked, rwork, primme); ret = UDUDecompose_dprimme(M, UDU, ipivot, evecsSize, rwork, rworkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_lock_vectors, Primme_ududecompose, ret, __FILE__, __LINE__, primme); return UDUDECOMPOSE_FAILURE; } } /* --------------------------------------------------------------------- */ /* Swap, towards the end of the basis, vectors that were locked but not */ /* replaced by new initial guesses. */ /* --------------------------------------------------------------------- */ numDeflated = swap_flagVecs_toEnd(*basisSize, LOCKED, V, W, H, hVals, flag, primme); /* --------------------------------------------------------------------- */ /* Reduce the basis size by numDeflated and swap the new initial guesses */ /* towards the end of the basis. */ /* --------------------------------------------------------------------- */ numReplaced = swap_flagVecs_toEnd(*basisSize-numDeflated, INITIAL_GUESS, V, W, H, hVals, flag, primme); *basisSize = *basisSize - (numDeflated + numReplaced); if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "numDeflated: %d numReplaced: %d \ basisSize: %d\n", numDeflated, numReplaced, *basisSize); }