static void print_residuals(double *ritzValues, double *blockNorms, int numConverged, int numLocked, int *iev, int left, int right, primme_params *primme) { int i; /* Loop variable */ int found; /* Loop variable */ if (primme->printLevel >= 3 && primme->procID == 0) { if (primme->locking) found = numLocked; else found = numConverged; for (i=left; i <= right; i++) { fprintf(primme->outputFile, "OUT %d conv %d blk %d MV %d Sec %E EV %13E |r| %.3E\n", primme->stats.numOuterIterations, found, i, primme->stats.numMatvecs, primme_wTimer(0), ritzValues[iev[i]], blockNorms[i]); } fflush(primme->outputFile); } }
int inner_solve_dprimme(double *x, double *r, double *rnorm, double *evecs, double *evecsHat, double *UDU, int *ipivot, double *xKinvx, double *Lprojector, double *RprojectorQ, double *RprojectorX, int sizeLprojector, int sizeRprojectorQ, int sizeRprojectorX, double *sol, double eval, double shift, double eresTol, double aNormEstimate, double machEps, double *rwork, int rworkSize, primme_params *primme) { int i; /* loop variable */ int workSpaceSize; /* Size of local work array. */ int numIts; /* Number of inner iterations */ int ret; /* Return value used for error checking. */ int maxIterations; /* The maximum # iterations allowed. Depends on primme */ double *workSpace; /* Workspace needed by UDU routine */ /* QMR parameters */ double *g, *d, *delta, *w, *ptmp; double alpha_prev, beta, rho_prev, rho; double Theta_prev, Theta, c, sigma_prev, tau_init, tau_prev, tau; double ztmp; /* Parameters used to dynamically update eigenpair */ double Beta, Delta, Psi, Beta_prev, Delta_prev, Psi_prev, eta; double dot_sol, eval_updated, eval_prev, eres2_updated, eres_updated, R; double Gamma_prev, Phi_prev; double Gamma, Phi; double gamma; /* The convergence criteria of the inner linear system must satisfy: */ /* || current residual || <= relativeTolerance * || initial residual || */ /* + absoluteTol */ double relativeTolerance; double absoluteTolerance; double LTolerance, ETolerance; /* Some constants */ double tpone = +1.0e+00, tzero = +0.0e+00; /* -------------------------------------------*/ /* Subdivide the workspace into needed arrays */ /* -------------------------------------------*/ g = rwork; d = g + primme->nLocal; delta = d + primme->nLocal; w = delta + primme->nLocal; workSpace = w + primme->nLocal; /* This needs at least 2*numOrth+NumEvals) */ workSpaceSize = rworkSize - (workSpace - rwork); /* -----------------------------------------*/ /* Set up convergence criteria by Tolerance */ /* -----------------------------------------*/ if (primme->aNorm <= 0.0L) { absoluteTolerance = aNormEstimate*machEps; eresTol = eresTol*aNormEstimate; } else { absoluteTolerance = primme->aNorm*machEps; } tau_prev = tau_init = *rnorm; /* Assumes zero initial guess */ LTolerance = eresTol; /* Andreas: note that eigenresidual tol may not be achievable, because we */ /* iterate on P(A-s)P not (A-s). But tau reflects linSys on P(A-s)P. */ if (primme->correctionParams.convTest == primme_adaptive) { ETolerance = max(eresTol/1.8L, absoluteTolerance); LTolerance = ETolerance; } else if (primme->correctionParams.convTest == primme_adaptive_ETolerance) { LTolerance = max(eresTol/1.8L, absoluteTolerance); ETolerance = max(tau_init*0.1L, LTolerance); } else if (primme->correctionParams.convTest == primme_decreasing_LTolerance) { relativeTolerance = pow(primme->correctionParams.relTolBase, (double)-primme->stats.numOuterIterations); LTolerance = relativeTolerance * tau_init + absoluteTolerance + eresTol; /*printf(" RL %e INI %e abso %e LToler %e aNormEstimate %e \n", */ /*relativeTolerance, tau_init, absoluteTolerance,LTolerance,aNormEstimate);*/ } /* --------------------------------------------------------*/ /* Set up convergence criteria by max number of iterations */ /* --------------------------------------------------------*/ /* compute first total number of remaining matvecs */ maxIterations = primme->maxMatvecs - primme->stats.numMatvecs; /* Perform primme.maxInnerIterations, but do not exceed total remaining */ if (primme->correctionParams.maxInnerIterations > 0) { maxIterations = min(primme->correctionParams.maxInnerIterations, maxIterations); } /* --------------------------------------------------------*/ /* Rest of initializations */ /* --------------------------------------------------------*/ /* Assume zero initial guess */ Num_dcopy_dprimme(primme->nLocal, r, 1, g, 1); ret = apply_projected_preconditioner(g, evecs, RprojectorQ, x, RprojectorX, sizeRprojectorQ, sizeRprojectorX, xKinvx, UDU, ipivot, d, workSpace, primme); if (ret != 0) { primme_PushErrorMessage(Primme_inner_solve, Primme_apply_projected_preconditioner, ret, __FILE__, __LINE__, primme); return APPLYPROJECTEDPRECONDITIONER_FAILURE; } Theta_prev = 0.0L; eval_prev = eval; rho_prev = dist_dot(g, 1, d, 1, primme); /* Initialize recurrences used to dynamically update the eigenpair */ Beta_prev = Delta_prev = Psi_prev = 0.0L; Gamma_prev = Phi_prev = 0.0L; /* other initializations */ for (i = 0; i < primme->nLocal; i++) { delta[i] = tzero; sol[i] = tzero; } numIts = 0; /*----------------------------------------------------------------------*/ /*------------------------ Begin Inner Loop ----------------------------*/ /*----------------------------------------------------------------------*/ while (numIts < maxIterations) { apply_projected_matrix(d, shift, Lprojector, sizeLprojector, w, workSpace, primme); sigma_prev = dist_dot(d, 1, w, 1, primme); if (sigma_prev == 0.0L) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"Exiting because SIGMA %e\n",sigma_prev); } break; } alpha_prev = rho_prev/sigma_prev; if (fabs(alpha_prev) < machEps || fabs(alpha_prev) > 1.0L/machEps){ if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"Exiting because ALPHA %e\n",alpha_prev); } break; } Num_axpy_dprimme(primme->nLocal, -alpha_prev, w, 1, g, 1); Theta = dist_dot(g, 1, g, 1, primme); Theta = sqrt(Theta); Theta = Theta/tau_prev; c = 1.0L/sqrt(1+Theta*Theta); tau = tau_prev*Theta*c; gamma = c*c*Theta_prev*Theta_prev; eta = alpha_prev*c*c; for (i = 0; i < primme->nLocal; i++) { delta[i] = gamma*delta[i] + eta*d[i]; sol[i] = delta[i]+sol[i]; } numIts++; if (fabs(rho_prev) == 0.0L ) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"Exiting because abs(rho) %e\n", fabs(rho_prev)); } break; } if (tau < LTolerance) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, " tau < LTol %e %e\n",tau, LTolerance); } break; } else if (primme->correctionParams.convTest == primme_adaptive_ETolerance || primme->correctionParams.convTest == primme_adaptive) { /* --------------------------------------------------------*/ /* Adaptive stopping based on dynamic monitoring of eResid */ /* --------------------------------------------------------*/ /* Update the Ritz value and eigenresidual using the */ /* following recurrences. */ Delta = gamma*Delta_prev + eta*rho_prev; Beta = Beta_prev - Delta; Phi = gamma*gamma*Phi_prev + eta*eta*sigma_prev; Psi = gamma*Psi_prev + gamma*Phi_prev; Gamma = Gamma_prev + 2.0L*Psi + Phi; /* Perform the update: update the eigenvalue and the square of the */ /* residual norm. */ dot_sol = dist_dot(sol, 1, sol, 1, primme); eval_updated = shift + (eval - shift + 2*Beta + Gamma)/(1 + dot_sol); eres2_updated = (tau*tau)/(1 + dot_sol) + ((eval - shift + Beta)*(eval - shift + Beta))/(1 + dot_sol) - (eval_updated - shift)*(eval_updated - shift); /* If numerical problems, let eres about the same as tau */ if (eres2_updated < 0){ eres_updated = sqrt( (tau*tau)/(1 + dot_sol) ); } else eres_updated = sqrt(eres2_updated); /* --------------------------------------------------------*/ /* Stopping criteria */ /* --------------------------------------------------------*/ R = max(0.9878, sqrt(tau/tau_prev))*sqrt(1+dot_sol); if ( tau <= R*eres_updated || eres_updated <= tau*R ) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, " tau < R eres \n"); } break; } if (primme->target == primme_smallest && eval_updated > eval_prev) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "eval_updated > eval_prev\n"); } break; } else if (primme->target == primme_largest && eval_updated < eval_prev){ if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "eval_updated < eval_prev\n"); } break; } if (eres_updated < ETolerance) { /* tau < LTol has been checked */ if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "eres < eresTol %e \n",eres_updated); } break; } eval_prev = eval_updated; if (primme->printLevel >= 4 && primme->procID == 0) { fprintf(primme->outputFile, "INN MV %d Sec %e Eval %e Lin|r| %.3e EV|r| %.3e\n", primme->stats. numMatvecs, primme_wTimer(0), eval_updated, tau, eres_updated); fflush(primme->outputFile); } /* --------------------------------------------------------*/ } /* End of if adaptive JDQMR section */ /* --------------------------------------------------------*/ else if (primme->printLevel >= 4 && primme->procID == 0) { /* Report for non adaptive inner iterations */ fprintf(primme->outputFile, "INN MV %d Sec %e Lin|r| %e\n", primme->stats.numMatvecs, primme_wTimer(0),tau); fflush(primme->outputFile); } if (numIts < maxIterations) { ret = apply_projected_preconditioner(g, evecs, RprojectorQ, x, RprojectorX, sizeRprojectorQ, sizeRprojectorX, xKinvx, UDU, ipivot, w, workSpace, primme); if (ret != 0) { primme_PushErrorMessage(Primme_inner_solve, Primme_apply_projected_preconditioner, ret, __FILE__, __LINE__, primme); ret = APPLYPROJECTEDPRECONDITIONER_FAILURE; break; } rho = dist_dot(g, 1, w, 1, primme); beta = rho/rho_prev; Num_axpy_dprimme(primme->nLocal, beta, d, 1, w, 1); /* Alternate between w and d buffers in successive iterations * This saves a memory copy. */ ptmp = d; d = w; w = ptmp; rho_prev = rho; tau_prev = tau; Theta_prev = Theta; Delta_prev = Delta; Beta_prev = Beta; Phi_prev = Phi; Psi_prev = Psi; Gamma_prev = Gamma; } /* --------------------------------------------------------*/ } /* End of QMR main while loop */ /* --------------------------------------------------------*/ *rnorm = eres_updated; return 0; }
int lock_vectors_dprimme(double tol, double *aNormEstimate, double *maxConvTol, int *basisSize, int *numLocked, int *numGuesses, int *nextGuess, double *V, double *W, double *H, double *evecsHat, double *M, double *UDU, int *ipivot, double *hVals, double *hVecs, double *evecs, double *evals, int *perm, double machEps, double *resNorms, int *numPrevRitzVals, double *prevRitzVals, int *flag, double *rwork, int rworkSize, int *iwork, int *LockingProblem, primme_params *primme) { int i; /* Loop counter */ int numCandidates; /* Number of targeted Ritz vectors converged before */ /* restart. */ int newStart; /* Index in evecs where the locked vectors were added */ int numNewVectors; /* Number of vectors added to the basis to replace */ /* locked vectors. */ int candidate; /* Index of Ritz vector to be checked for convergence */ int numDeflated; /* The number of vectors actually locked */ int numReplaced; /* The number of locked vectors that were replaced by */ /* initial guesses. */ int numRecentlyLocked; /* Number of vectors locked. */ int evecsSize; /* The number of orthogonalization constraints plus */ /* the number of locked vectors. */ int ret; /* Used to store return values. */ int workinW; /* Flag whether an active W vector is used as tempwork*/ int entireSpace = (*basisSize+*numLocked >= primme->n); /* bool if entire*/ /* space is built, so current ritzvecs are accurate. */ double *norms, *tnorms; /* Array of residual norms, and temp array */ double attainableTol; /* Used to verify a practical convergence problem*/ double *residual; /* Stores residual vector */ double ztmp; /* temp variable */ /* ----------------------------------------*/ /* Assign temporary work space for residual*/ /* ----------------------------------------*/ if (*basisSize < primme->maxBasisSize) { /* compute residuals in the next open slot of W */ residual = &W[*basisSize*primme->nLocal]; workinW = 0; } else { /* This basiSize==maxBasisSize, immediately after restart, can only occur * if the basisSize + numLocked = n (at which case we lock everything) * OR if (numConverged + restartSize + numPrevRetain > basisSize), ie. * too many converged. Since we do not know which evec will be locked * we use the W[LAST] as temporary space, but only after W[LAST] has * been used to compute residual(LAST) -the while loop starts from LAST. * After all lockings, if the LAST evec was not locked, we must * recompute W[LAST]=Av. This matvec event is extremely infrequent */ residual = &W[(*basisSize-1)*primme->nLocal]; workinW = 1; } /* -------------------------------------*/ /* Set the tolerance, and attainableTol */ /* -------------------------------------*/ if (primme->aNorm <= 0.0L) { tol = tol * (*aNormEstimate); } attainableTol=max(tol,sqrt(primme->numOrthoConst+*numLocked)*(*maxConvTol)); /* -------------------------------------------------------- */ /* Determine how many Ritz vectors converged before restart */ /* -------------------------------------------------------- */ i = *basisSize - 1; while ((flag[i] == LOCK_IT ||flag[i] == UNCONDITIONAL_LOCK_IT) && i >= 0) { i--; } numCandidates = *basisSize - i - 1; if (numCandidates == 0) { return 0; } /* --------------------------------- */ /* Compute residuals and their norms */ /* --------------------------------- */ tnorms = (double *) rwork; norms = tnorms + numCandidates; for (i = *basisSize-1, candidate = numCandidates-1; i >= *basisSize-numCandidates; i--, candidate--) { Num_dcopy_dprimme(primme->nLocal, &W[primme->nLocal*i], 1, residual, 1); ztmp = -hVals[i]; Num_axpy_dprimme(primme->nLocal, ztmp, &V[primme->nLocal*i],1,residual,1); tnorms[candidate] = Num_dot_dprimme(primme->nLocal,residual,1,residual,1); } /* Global sum the dot products */ (*primme->globalSumDouble)(tnorms, norms, &numCandidates, primme); numRecentlyLocked = 0; /* ------------------------------------------------------------------- */ /* Check the convergence of each residual norm. If the Ritz vector is */ /* converged, then lock it. */ /* ------------------------------------------------------------------- */ for (i = *basisSize - numCandidates, candidate = 0; i < *basisSize; i++, candidate++) { norms[candidate] = sqrt(norms[candidate]); /* If the vector has become (regularly or practically) unconverged, */ /* then flag it, else lock it and replace it with an initial guess, */ /* if one is available. Exception: If the entire space is spanned, */ /* we can't do better, so lock it. */ if ((flag[i]!=UNCONDITIONAL_LOCK_IT && norms[candidate] >= tol && !entireSpace ) || (flag[i]==UNCONDITIONAL_LOCK_IT && norms[candidate] >= attainableTol && !entireSpace )) { flag[i] = UNCONVERGED; } else { /* If an unconditional lock has become converged, show it and */ /* record the max converged tolerance accordingly */ if (norms[candidate]<tol) { flag[i]=LOCK_IT; *maxConvTol = max(*maxConvTol, tol); } else { *maxConvTol = max(*maxConvTol, norms[candidate]); *LockingProblem = 1; } if (primme->printLevel >= 2 && primme->procID == 0) { fprintf(primme->outputFile, "Lock epair[ %d ]= %e norm %.4e Mvecs %d Time %.4e Flag %d\n", *numLocked+1, hVals[i], norms[candidate], primme->stats.numMatvecs,primme_wTimer(0),flag[i]); fflush(primme->outputFile); } /* Copy the converged Ritz vector to the evecs array and */ /* insert the converged Ritz value in sorted order within */ /* the evals array. */ Num_dcopy_dprimme(primme->nLocal, &V[primme->nLocal*i], 1, &evecs[primme->nLocal*(primme->numOrthoConst + *numLocked)], 1); insertionSort(hVals[i], evals, norms[candidate], resNorms, perm, *numLocked, primme); /* If there are any initial guesses remaining, then copy it */ /* into the basis, else flag the vector as locked so it may */ /* be discarded later. */ if (*numGuesses > 0) { Num_dcopy_dprimme(primme->nLocal, &evecs[primme->nLocal*(*nextGuess)], 1, &V[primme->nLocal*i], 1); flag[i] = INITIAL_GUESS; *numGuesses = *numGuesses - 1; *nextGuess = *nextGuess + 1; } else { flag[i] = LOCKED; } *numLocked = *numLocked + 1; numRecentlyLocked++; } } evecsSize = primme->numOrthoConst + *numLocked; /* -------------------------------------------------------------------- */ /* If a W vector was used as workspace for residual AND its evec has */ /* not been locked out, recompute it, W = A*v. This is rare. */ /* -------------------------------------------------------------------- */ if (workinW && flag[*basisSize-1] != LOCKED) { update_W_dprimme(V, W, *basisSize-1, 1, primme); } /* -------------------------------------------------------------------- */ /* Return IF all target Ritz vectors have been locked, ELSE update the */ /* evecsHat array by applying the preconditioner (if preconditioning is */ /* needed, and JDQMR with right, skew Q projector is applied */ /* -------------------------------------------------------------------- */ if (*numLocked >= primme->numEvals) { return 0; } else if (UDU != NULL) { /* Compute K^{-1}x for all newly locked eigenvectors */ newStart = primme->nLocal*(evecsSize - numRecentlyLocked); (*primme->applyPreconditioner)( &evecs[newStart], &evecsHat[newStart], &numRecentlyLocked, primme); primme->stats.numPreconds += numRecentlyLocked; /* Update the projection evecs'*evecsHat now that evecs and evecsHat */ /* have been expanded by numRecentlyLocked columns. Required */ /* workspace is numLocked*numEvals. The most ever needed would be */ /* maxBasisSize*numEvals. */ update_projection_dprimme(evecs, evecsHat, M, evecsSize-numRecentlyLocked, primme->numOrthoConst+primme->numEvals, numRecentlyLocked, rwork, primme); ret = UDUDecompose_dprimme(M, UDU, ipivot, evecsSize, rwork, rworkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_lock_vectors, Primme_ududecompose, ret, __FILE__, __LINE__, primme); return UDUDECOMPOSE_FAILURE; } } /* --------------------------------------------------------------------- */ /* Swap, towards the end of the basis, vectors that were locked but not */ /* replaced by new initial guesses. */ /* --------------------------------------------------------------------- */ numDeflated = swap_flagVecs_toEnd(*basisSize, LOCKED, V, W, H, hVals, flag, primme); /* --------------------------------------------------------------------- */ /* Reduce the basis size by numDeflated and swap the new initial guesses */ /* towards the end of the basis. */ /* --------------------------------------------------------------------- */ numReplaced = swap_flagVecs_toEnd(*basisSize-numDeflated, INITIAL_GUESS, V, W, H, hVals, flag, primme); *basisSize = *basisSize - (numDeflated + numReplaced); if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "numDeflated: %d numReplaced: %d \ basisSize: %d\n", numDeflated, numReplaced, *basisSize); }
int zprimme(double *evals, Complex_Z *evecs, double *resNorms, primme_params *primme) { int ret; int *perm; double machEps; /* ------------------ */ /* zero out the timer */ /* ------------------ */ primme_wTimer(1); /* ---------------------------- */ /* Clear previous error reports */ /* ---------------------------- */ primme_DeleteStackTrace(primme); /* ----------------------- */ /* Find machine precision */ /* ----------------------- */ machEps = Num_dlamch_primme("E"); /* ------------------ */ /* Set some defaults */ /* ------------------ */ primme_set_defaults(primme); /* -------------------------------------------------------------- */ /* If needed, we are ready to estimate required memory and return */ /* -------------------------------------------------------------- */ if (evals == NULL && evecs == NULL && resNorms == NULL) return allocate_workspace(primme, FALSE); /* ----------------------------------------------------- */ /* Reset random number seed if inappropriate for DLARENV */ /* Yields unique quadruples per proc if procID < 4096^3 */ /* ----------------------------------------------------- */ if (primme->iseed[0]<0 || primme->iseed[0]>4095) primme->iseed[0] = primme->procID % 4096; if (primme->iseed[1]<0 || primme->iseed[1]>4095) primme->iseed[1] = (int)(primme->procID/4096+1) % 4096; if (primme->iseed[2]<0 || primme->iseed[2]>4095) primme->iseed[2] = (int)((primme->procID/4096)/4096+2) % 4096; if (primme->iseed[3]<0 || primme->iseed[3]>4095) primme->iseed[3] = (2*(int)(((primme->procID/4096)/4096)/4096)+1) % 4096; /* ----------------------- */ /* Set default convTetFun */ /* ----------------------- */ if (!primme->convTestFun) { primme->convTestFun = convTestFunAbsolute; } /* ------------------------------------------------------- */ /* Check primme input data for bounds, correct values etc. */ /* ------------------------------------------------------- */ ret = check_input(evals, evecs, resNorms, primme); if (ret != 0) { primme_PushErrorMessage(Primme_zprimme, Primme_check_input, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return ret; } /* ----------------------------------------------------------------------- */ /* Compute AND allocate memory requirements for main_iter and subordinates */ /* ----------------------------------------------------------------------- */ ret = allocate_workspace(primme, TRUE); if (ret != 0) { primme_PushErrorMessage(Primme_zprimme, Primme_allocate_workspace, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return ALLOCATE_WORKSPACE_FAILURE; } /* --------------------------------------------------------- */ /* Allocate workspace that will be needed locally by zprimme */ /* --------------------------------------------------------- */ perm = (int *)primme_calloc((primme->numEvals), sizeof(int), "Perm array"); if (perm == NULL) { primme_PushErrorMessage(Primme_zprimme, Primme_malloc, 0, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return MALLOC_FAILURE; } /*----------------------------------------------------------------------*/ /* Call the solver */ /*----------------------------------------------------------------------*/ ret = main_iter_zprimme(evals, perm, evecs, resNorms, machEps, primme->intWork, primme->realWork, primme); if (ret < 0) { primme_PushErrorMessage(Primme_zprimme, Primme_main_iter, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return MAIN_ITER_FAILURE; } /*----------------------------------------------------------------------*/ /*----------------------------------------------------------------------*/ /* If locking is engaged, the converged Ritz vectors are stored in the */ /* order they converged. They must then be permuted so that they */ /* correspond to the sorted Ritz values in evals. */ /*----------------------------------------------------------------------*/ permute_vecs_zprimme(&evecs[primme->numOrthoConst], primme->nLocal, primme->initSize, primme->nLocal, perm, (Complex_Z*)primme->realWork, (int*)primme->intWork); free(perm); primme->stats.elapsedTime = primme_wTimer(0); return(0); }
int dprimme(double *evals, double *evecs, double *resNorms, primme_params *primme) { int ret; int *perm; double machEps; /* ------------------ */ /* zero out the timer */ /* ------------------ */ primme_wTimer(1); /* ---------------------------- */ /* Clear previous error reports */ /* ---------------------------- */ primme_DeleteStackTrace(primme); /* ----------------------- */ /* Find machine precision */ /* ----------------------- */ machEps = Num_dlamch_primme("E"); /* ----------------------------------------- */ /* Set some defaults for sequential programs */ /* ----------------------------------------- */ if (primme->numProcs == 1) { primme->nLocal = primme->n; primme->procID = 0; if (primme->globalSumDouble == NULL) primme->globalSumDouble = primme_seq_globalSumDouble; } /* --------------------------------------------------------------------- */ /* Decide on whether to use locking (hard locking), or not (soft locking)*/ /* --------------------------------------------------------------------- */ if (primme->target != primme_smallest && primme->target != primme_largest ) { /* Locking is necessary as interior Ritz values can cross shifts */ primme->locking = 1; } else { if (primme->locking == 0) { /* use locking when not enough vectors to restart with */ primme->locking = (primme->numEvals > primme->minRestartSize); } } /* -------------------------------------------------------------- */ /* If needed, we are ready to estimate required memory and return */ /* -------------------------------------------------------------- */ if (evals == NULL && evecs == NULL && resNorms == NULL) return allocate_workspace(primme, FALSE); /* ----------------------------------------------------- */ /* Reset random number seed if inappropriate for DLARENV */ /* Yields unique quadruples per proc if procID < 4096^3 */ /* ----------------------------------------------------- */ if (primme->iseed[0]<0 || primme->iseed[0]>4095) primme->iseed[0] = primme->procID % 4096; if (primme->iseed[1]<0 || primme->iseed[1]>4095) primme->iseed[1] = (int)(primme->procID/4096+1) % 4096; if (primme->iseed[2]<0 || primme->iseed[2]>4095) primme->iseed[2] = (int)((primme->procID/4096)/4096+2) % 4096; if (primme->iseed[3]<0 || primme->iseed[3]>4095) primme->iseed[3] = (2*(int)(((primme->procID/4096)/4096)/4096)+1) % 4096; /* ------------------------------------------------------- */ /* Check primme input data for bounds, correct values etc. */ /* ------------------------------------------------------- */ ret = check_input(evals, evecs, resNorms, primme); if (ret != 0) { primme_PushErrorMessage(Primme_dprimme, Primme_check_input, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return ret; } /* ----------------------------------------------------------------------- */ /* Compute AND allocate memory requirements for main_iter and subordinates */ /* ----------------------------------------------------------------------- */ ret = allocate_workspace(primme, TRUE); if (ret != 0) { primme_PushErrorMessage(Primme_dprimme, Primme_allocate_workspace, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return ALLOCATE_WORKSPACE_FAILURE; } /* --------------------------------------------------------- */ /* Allocate workspace that will be needed locally by dprimme */ /* --------------------------------------------------------- */ perm = (int *)primme_calloc((primme->numEvals), sizeof(int), "Perm array"); if (perm == NULL) { primme_PushErrorMessage(Primme_dprimme, Primme_malloc, 0, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return MALLOC_FAILURE; } /*----------------------------------------------------------------------*/ /* Call the solver */ /*----------------------------------------------------------------------*/ ret = main_iter_dprimme(evals, perm, evecs, resNorms, machEps, primme->intWork, primme->realWork, primme); if (ret < 0) { primme_PushErrorMessage(Primme_dprimme, Primme_main_iter, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return MAIN_ITER_FAILURE; } /*----------------------------------------------------------------------*/ /*----------------------------------------------------------------------*/ /* If locking is engaged, the converged Ritz vectors are stored in the */ /* order they converged. They must then be permuted so that they */ /* correspond to the sorted Ritz values in evals. */ /*----------------------------------------------------------------------*/ permute_evecs_dprimme(&evecs[primme->numOrthoConst], perm, (double *) primme->realWork, primme->numEvals, primme->nLocal); free(perm); primme->stats.elapsedTime = primme_wTimer(0); return(0); }
int check_convergence_dprimme(double *V, double *W, double *hVecs, double *hVals, int *flags, int basisSize, int *iev, int *ievMax, double *blockNorms, int *blockSize, int numConverged, int numLocked, double *evecs, double tol, double maxConvTol, double aNormEstimate, double *rwork, primme_params *primme) { int i; /* Loop variable */ int left, right; /* Range of block vectors to be checked for convergence */ int start; /* starting index in block of converged/tobeProject vecs*/ int numVacancies; /* Number of vacant positions between left and right */ int recentlyConverged; /* The number of Ritz values declared converged */ /* since the last iteration */ int numToProject; /* Number of vectors with potential accuracy problem*/ double attainableTol; /* Used in locking to check near convergence problem*/ /* -------------------------------------------- */ /* Tolerance based on our dynamic norm estimate */ /* -------------------------------------------- */ if (primme->aNorm <= 0.0L) { tol = tol * aNormEstimate; } /* ---------------------------------------------------------------------- */ /* If locking, set tol beyond which we need to check for accuracy problem */ /* ---------------------------------------------------------------------- */ if (primme->locking) { attainableTol = sqrt(primme->numOrthoConst+numLocked)*maxConvTol; } /* --------------------------------------------------------------- */ /* Compute each Ritz vector and its corresponding residual vector. */ /* The Ritz vector and residual are stored temporarily in V and W */ /* respectively. For each Ritz vector, determine if it has */ /* converged. If it has, try to replace it with one that hasn't. */ /* --------------------------------------------------------------- */ recentlyConverged = 0; left = 0; right = *blockSize - 1; numVacancies = 1; while (numVacancies > 0 && (numConverged + recentlyConverged) < primme->numEvals) { /* Consider the newly added vectors in the block and reset counters */ numVacancies = 0; numToProject = 0; /* Copy needed hvecs into the front of the work array. */ for (i=left; i <= right; i++) { Num_dcopy_dprimme(basisSize, &hVecs[basisSize*iev[i]], 1, &rwork[basisSize*(i-left)], 1); } /* ----------------------------------------------------------------- */ /* Compute the Ritz vectors, residuals, and norms for the next */ /* blockSize unconverged Ritz vectors. The Ritz vectors will be */ /* placed from V(0,lft) to V(0,rgt) and the residual vectors from */ /* W(0,lft) to W(0,rgt). */ /* ----------------------------------------------------------------- */ /* rwork must be maxBasisSize*maxBlockSize + maxBlockSize in size, */ /* maxBasisSize*maxBlockSize holds selected hVecs to facilitate */ /* blocking, and maxBlockSize to hold the residual norms */ /* ----------------------------------------------------------------- */ compute_resnorms(V, W, rwork, hVals, basisSize, blockNorms, iev, left, right, &rwork[basisSize*(right-left+1)], primme); print_residuals(hVals, blockNorms, numConverged, numLocked, iev, left, right, primme); /* ----------------------------------------------------------------- */ /* Determine which Ritz vectors have converged < tol and flag them. */ /* ----------------------------------------------------------------- */ for (i=left; i <= right; i++) { /* ------------------------------------*/ /* If the vector is converged, flag it */ /* ------------------------------------*/ if (blockNorms[i] < tol) { flags[iev[i]] = CONVERGED; numVacancies++; if ((!primme->locking && iev[i] < primme->numEvals) || (primme->locking && ((numLocked + iev[i]) < primme->numEvals))) { recentlyConverged++; if (!primme->locking && primme->procID == 0 && primme->printLevel >= 2) { fprintf(primme->outputFile, "#Converged %d eval[ %d ]= %e norm %e Mvecs %d Time %g\n", numConverged+recentlyConverged, iev[i], hVals[iev[i]], blockNorms[i], primme->stats.numMatvecs,primme_wTimer(0)); fflush(primme->outputFile); } /* printf */ } /*if */ } /*if converged */ /* ---------------------------------------------------------------- */ /* If locking there may be an accuracy problem close to convergence */ /* Check if there is danger and set these Ritz vecs for projection */ /* ---------------------------------------------------------------- */ else if (primme->locking && numLocked > 0 && blockNorms[i] < attainableTol ) { flags[iev[i]] = TO_BE_PROJECTED; numToProject++; } } /* for */ /* ---------------------------------------------------------------- */ /* If some of the Ritz vectors in the block have converged, or need */ /* to be projected against evecs, move those flagged Ritz vectors */ /* and residuals towards the end of the block [left,right]. Also */ /* swap iev, and blockNorms for the targeted block. */ /* ---------------------------------------------------------------- */ if (numVacancies > 0 || numToProject > 0) { swap_UnconvVecs(V, W, primme->nLocal, basisSize, iev, flags, blockNorms, primme->numOrthoConst + numLocked, *blockSize, left); } /* --------------------------------------------------------------- */ /* Project the TO_BE_PROJECTED residuals and check for practical */ /* convergence among them. Those practically converged evecs are */ /* swapped just before the converged ones at the end of the block. */ /* numVacancies and recentlyConverged are also updated */ /* --------------------------------------------------------------- */ if (numToProject > 0) { start = *blockSize - numVacancies - numToProject; check_practical_convergence(V, W, evecs, numLocked, basisSize, *blockSize, start, numToProject, iev, flags, blockNorms, tol, &recentlyConverged, &numVacancies, rwork, primme); } /* ---------------------------------------------------------------- */ /* Replace the vacancies, with as many unconverged vectors beyond */ /* ievMax as possible. If not enough are available reduce blockSize */ /* ---------------------------------------------------------------- */ if (numVacancies > 0) { replace_vectors(iev, flags, *blockSize, basisSize, numVacancies, &left, &right, ievMax); numVacancies = right - left + 1; *blockSize = left + numVacancies; } } /* while there are vacancies */ return recentlyConverged; }
int init_basis_dprimme(double *V, double *W, double *evecs, double *evecsHat, double *M, double *UDU, int *ipivot, double machEps, double *rwork, int rworkSize, int *basisSize, int *nextGuess, int *numGuesses, double *timeForMV, primme_params *primme) { int ret; /* Return value */ int currentSize; /*-----------------------------------------------------------------------*/ /* Orthogonalize the orthogonalization constraints provided by the user. */ /* If a preconditioner is given and inner iterations are to be */ /* performed, then initialize M. */ /*-----------------------------------------------------------------------*/ if (primme->numOrthoConst > 0) { ret = ortho_dprimme(evecs, primme->nLocal, 0, primme->numOrthoConst - 1, NULL, 0, 0, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } /* Initialize evecsHat, M, and its factorization UDU,ipivot. This */ /* allows the orthogonalization constraints to be included in the */ /* projector (I-QQ'). Only needed if there is preconditioning, and */ /* JDqmr inner iterations with a right, skew projector. Only in */ /* that case, is UDU not NULL */ if (UDU != NULL) { (*primme->applyPreconditioner) (evecs, evecsHat, &primme->numOrthoConst, primme); primme->stats.numPreconds += primme->numOrthoConst; update_projection_dprimme(evecs, evecsHat, M, 0, primme->numOrthoConst+primme->numEvals, primme->numOrthoConst, rwork, primme); ret = UDUDecompose_dprimme(M, UDU, ipivot, primme->numOrthoConst, rwork, rworkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ududecompose, ret, __FILE__, __LINE__, primme); return UDUDECOMPOSE_FAILURE; } } /* if evecsHat and M=evecs'evecsHat, UDU are needed */ } /* if numOrthoCont >0 */ /*-----------------------------------------------------------------------*/ /* No locking */ /*-----------------------------------------------------------------------*/ if (!primme->locking) { /* Handle case when no initial guesses are provided by the user */ if (primme->initSize == 0) { ret = init_block_krylov(V, W, 0, primme->minRestartSize - 1, evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov, ret, __FILE__, __LINE__, primme); return INIT_BLOCK_KRYLOV_FAILURE; } *basisSize = primme->minRestartSize; } else { /* Handle case when some or all initial guesses are provided by */ /* the user */ /* Copy over the initial guesses provided by the user */ Num_dcopy_dprimme(primme->nLocal*primme->initSize, &evecs[primme->numOrthoConst*primme->nLocal], 1, V, 1); /* Orthonormalize the guesses provided by the user */ ret = ortho_dprimme(V, primme->nLocal, 0, primme->initSize-1, evecs, primme->nLocal, primme->numOrthoConst, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } update_W_dprimme(V, W, 0, primme->initSize, primme); /* An insufficient number of initial guesses were provided by */ /* the user. Generate a block Krylov space to fill the */ /* remaining vacancies. */ if (primme->initSize < primme->minRestartSize) { ret = init_block_krylov(V, W, primme->initSize, primme->minRestartSize - 1, evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov, ret, __FILE__, __LINE__, primme); return INIT_KRYLOV_FAILURE; } *basisSize = primme->minRestartSize; } else { *basisSize = primme->initSize; } } *numGuesses = 0; *nextGuess = 0; } else { /*-----------------------------------------------------------------------*/ /* Locking */ /*-----------------------------------------------------------------------*/ *numGuesses = primme->initSize; *nextGuess = primme->numOrthoConst; /* If some initial guesses are available, copy them to the basis */ /* and orthogonalize them against themselves and the orthogonalization */ /* constraints. */ if (primme->initSize > 0) { currentSize = min(primme->initSize, primme->minRestartSize); Num_dcopy_dprimme(primme->nLocal*currentSize, &evecs[primme->numOrthoConst*primme->nLocal], 1, V, 1); ret = ortho_dprimme(V, primme->nLocal, 0, currentSize-1, evecs, primme->nLocal, primme->numOrthoConst, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } update_W_dprimme(V, W, 0, currentSize, primme); *numGuesses = *numGuesses - currentSize; *nextGuess = *nextGuess + currentSize; } else { currentSize = 0; } /* If an insufficient number of guesses was provided, then fill */ /* the remaining vacancies with a block Krylov space. */ if (currentSize < primme->minRestartSize) { ret = init_block_krylov(V, W, currentSize, primme->minRestartSize - 1, evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov, ret, __FILE__, __LINE__, primme); return INIT_BLOCK_KRYLOV_FAILURE; } } *basisSize = primme->minRestartSize; } /* ----------------------------------------------------------- */ /* If time measurements are needed, waste one MV + one Precond */ /* Put dummy results in the first open space of W (currentSize)*/ /* ----------------------------------------------------------- */ if (primme->dynamicMethodSwitch) { currentSize = primme->nLocal*(*basisSize); ret = 1; *timeForMV = primme_wTimer(0); (*primme->matrixMatvec)(V, &W[currentSize], &ret, primme); *timeForMV = primme_wTimer(0) - *timeForMV; primme->stats.numMatvecs += 1; } return 0; }