static int insert_submatrix(Complex_Z *H, double *hVals, Complex_Z *hVecs, int restartSize, Complex_Z *subMatrix, int numPrevRetained, int indexOfPreviousVecs, int rworkSize, Complex_Z *rwork, primme_params *primme) { int info; int i, j; double *doubleWork; /* ---------------------------------------------------------------------- */ /* Copy the submatrix into H with the upper right corner of the submatrix */ /* at H(indexOfPreviousVecs, indexOfPreviousVecs). */ /* ---------------------------------------------------------------------- */ for (j = indexOfPreviousVecs; j < indexOfPreviousVecs+numPrevRetained; j++) { for (i = indexOfPreviousVecs; i <= j; i++) { H[primme->maxBasisSize*j+i] = subMatrix[numPrevRetained*(j-indexOfPreviousVecs) +(i-indexOfPreviousVecs)]; } } /* ----------------------------------------- */ /* Solve the eigenproblrm for the submatrix. */ /* ----------------------------------------- */ /* -------------------------------------------------------------------- */ /* Assign also 3N double work space after the 2N complex rwork finishes */ /* -------------------------------------------------------------------- */ doubleWork = (double *) (rwork+ 2*numPrevRetained); Num_zheev_zprimme("V", "U", numPrevRetained, subMatrix, numPrevRetained, &hVals[indexOfPreviousVecs], rwork, rworkSize, doubleWork, &info); if (info != 0) { primme_PushErrorMessage(Primme_insert_submatrix, Primme_num_dsyev, info, __FILE__, __LINE__, primme); return NUM_DSYEV_FAILURE; } /* ---------------------------------------------------------------------- */ /* The eigenvectors of the submatrix are of dimension numPrevRetained */ /* and reside in the submatrix array after dsyev is called. The */ /* eigenvectors of the new H corresponding to the submatrix are easily */ /* constructed using the eigenvectors of the submatrix. */ /* ---------------------------------------------------------------------- */ for (j = indexOfPreviousVecs; j < indexOfPreviousVecs+numPrevRetained; j++) { for (i = indexOfPreviousVecs; i < indexOfPreviousVecs + numPrevRetained; i++) { hVecs[restartSize*j+i] = subMatrix[numPrevRetained*(j-indexOfPreviousVecs)+ (i-indexOfPreviousVecs)]; } } return 0; }
static int apply_projected_preconditioner(double *v, double *Q, double *RprojectorQ, double *x, double *RprojectorX, int sizeRprojectorQ, int sizeRprojectorX, double *xKinvx, double *UDU, int *ipivot, double *result, double *rwork, primme_params *primme) { int ONE = 1; int ret; if (primme->correctionParams.precondition) { /* Place K^{-1}v in result */ (*primme->applyPreconditioner)(v, result, &ONE, primme); primme->stats.numPreconds += 1; } else { Num_dcopy_dprimme(primme->nLocal, v, 1, result, 1); } ret = apply_skew_projector(Q, RprojectorQ, UDU, ipivot, sizeRprojectorQ, result, rwork, primme); if (ret != 0) { primme_PushErrorMessage(Primme_apply_projected_preconditioner, Primme_apply_skew_projector, ret, __FILE__, __LINE__, primme); return APPLYSKEWPROJECTOR_FAILURE; } apply_skew_projector(x, RprojectorX, xKinvx, ipivot, sizeRprojectorX, result, rwork, primme); if (ret != 0) { primme_PushErrorMessage(Primme_apply_projected_preconditioner, Primme_apply_skew_projector, ret, __FILE__, __LINE__, primme); return APPLYSKEWPROJECTOR_FAILURE; } return 0; }
int solve_correction_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *evecs, Complex_Z *evecsHat, Complex_Z *UDU, int *ipivot, double *lockedEvals, int numLocked, int numConvergedStored, double *ritzVals, double *prevRitzVals, int *flags, int basisSize, double *blockNorms, int *iev, int blockSize, double eresTol, double machEps, double aNormEstimate, Complex_Z *rwork, int *iwork, int rworkSize, primme_params *primme) { int blockIndex; /* Loop index. Ranges from 0..blockSize-1. */ int ritzIndex; /* Ritz value index blockIndex corresponds to. */ /* Possible values range from 0..basisSize-1. */ int sortedIndex; /* Ritz value index in sortedRitzVals, blockIndex */ /* corresponds to. Range 0..numLocked+basisSize-1 */ int neededRsize; /* Needed size for rwork. If not enough return */ int linSolverRWorkSize; /* Size of the linSolverRWork array. */ int *ilev; /* Array of size blockSize. Maps the target Ritz */ /* values to their positions in the sortedEvals */ /* array. */ int sizeLprojector; /* Sizes of the various left/right projectors */ int sizeRprojectorQ; /* These will be 0/1/or numOrthConstr+numLocked */ int sizeRprojectorX; /* or numOrthConstr+numConvergedStored w/o locking*/ static int numPrevRitzVals = 0; /* Size of prevRitzVals */ int ret; /* Return code. */ Complex_Z *r, *x, *sol; /* Residual, Ritz vector, and correction. */ Complex_Z *linSolverRWork;/* Workspace needed by linear solver. */ double *sortedRitzVals; /* Sorted array of current and converged Ritz */ /* values. Size of array is numLocked+basisSize. */ double *blockOfShifts; /* Shifts for (A-shiftI) or (if needed) (K-shiftI)*/ double *approxOlsenEps; /* Shifts for approximate Olsen implementation */ Complex_Z *Kinvx; /* Workspace to store K^{-1}x */ Complex_Z *Lprojector; /* Q pointer for (I-Q*Q'). Usually points to evecs*/ Complex_Z *RprojectorQ; /* May point to evecs/evecsHat depending on skewQ */ Complex_Z *RprojectorX; /* May point to x/Kinvx depending on skewX */ Complex_Z xKinvx; /* Stores x'*K^{-1}x if needed */ double eval, shift, robustShift; /* robust shift values. */ Complex_Z tmpShift; /* Temp shift for daxpy */ //------------------------------------------------------------ // Subdivide the workspace with pointers, and figure out // the total amount of needed real workspace (neededRsize) //------------------------------------------------------------ /* needed worksize */ neededRsize = 0; Kinvx = rwork; /* Kinvx will have nonzero size if precond and both RightX and SkewX */ if (primme->correctionParams.precondition && primme->correctionParams.projectors.RightX && primme->correctionParams.projectors.SkewX ) { /* OLSEN's method requires a block, but JDQMR is vector by vector */ if (primme->correctionParams.maxInnerIterations == 0) { sol = Kinvx + primme->nLocal*blockSize; neededRsize = neededRsize + primme->nLocal*blockSize; } else { sol = Kinvx + primme->nLocal; neededRsize = neededRsize + primme->nLocal; } } else { sol = Kinvx + 0; } if (primme->correctionParams.maxInnerIterations == 0) { linSolverRWork = sol + 0; /* sol not needed for GD */ linSolverRWorkSize = 0; /* No inner solver used */ } else { linSolverRWork = sol + primme->nLocal; /* sol needed in innerJD */ neededRsize = neededRsize + primme->nLocal; linSolverRWorkSize = /* Inner solver worksize */ 4*primme->nLocal + 2*(primme->numOrthoConst+primme->numEvals); neededRsize = neededRsize + linSolverRWorkSize; } sortedRitzVals = (double *)(linSolverRWork + linSolverRWorkSize); blockOfShifts = sortedRitzVals + (numLocked+basisSize); approxOlsenEps = blockOfShifts + blockSize; neededRsize = neededRsize + numLocked+basisSize + 2*blockSize; if (neededRsize > rworkSize) { return(neededRsize); } // Subdivide also the integer work space ilev = iwork; // of size blockSize //------------------------------------------------------------ // Figuring out preconditioning shifts (robust, Olsen, etc) //------------------------------------------------------------ /* blockOfShifts will contain the preconditioning shifts: */ /* either Ritz values or robustShifts computed below. These shifts */ /* will be used in the correction equations or in inverting (K-sigma I) */ /* approxOlsenEps will contain error approximations for eigenavalues */ /* to be used for Olsen's method (when innerIterations =0). */ if (primme->locking) { /* Combine the sorted list of locked Ritz values with the sorted */ /* list of current Ritz values, ritzVals. The merging of the two */ /* lists lockedEvals and ritzVals is stored in sortedRitzVals. */ mergeSort(lockedEvals, numLocked, ritzVals, flags, basisSize, sortedRitzVals, ilev, blockSize, primme); } else { /* Then the sorted evals are simply the ritzVals, targeted as iev */ sortedRitzVals = ritzVals; ilev = iev; } /*-----------------------------------------------------------------*/ /* For interior eigenpairs, use the user provided shifts */ /*-----------------------------------------------------------------*/ if (primme->target != primme_smallest && primme->target != primme_largest) { for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { sortedIndex = ilev[blockIndex]; blockOfShifts[blockIndex] = primme->targetShifts[ min(primme->numTargetShifts-1, sortedIndex) ]; if (sortedIndex < numPrevRitzVals) { approxOlsenEps[blockIndex] = fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]); } else { approxOlsenEps[blockIndex] = blockNorms[blockIndex]; } } /* for loop */ } /* user provided shifts */ else { /*-----------------------------------------------------------------*/ /* else it is primme_smallest or primme_largest */ /*-----------------------------------------------------------------*/ if (primme->correctionParams.robustShifts) { /*----------------------------------------------------*/ /* Subtract/add a robust shift from/to the Ritz value */ /*----------------------------------------------------*/ /* Find the robust shift for each block vector */ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { sortedIndex = ilev[blockIndex]; eval = sortedRitzVals[sortedIndex]; robustShift = computeRobustShift(blockIndex, blockNorms[blockIndex], prevRitzVals, numPrevRitzVals, sortedRitzVals, &approxOlsenEps[blockIndex], numLocked+basisSize, ilev, primme); /* Subtract/add the shift if looking for the smallest/largest */ /* eigenvalues, Do not go beyond the previous computed eigval */ if (primme->target == primme_smallest) { blockOfShifts[blockIndex] = eval - robustShift; if (sortedIndex > 0) blockOfShifts[blockIndex] = max(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]); } else { blockOfShifts[blockIndex] = eval + robustShift; if (sortedIndex > 0) blockOfShifts[blockIndex] = min(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]); } // robust shifting } /* for loop */ } /* endif robust shifts */ else { /*--------------------------------------------------------------*/ /* Otherwise, the shifts for both preconditioner and correction */ /* equation should be just the Ritz values. For Olsen's method, */ /* the shifts for r-eps*x, are chosen as the difference in Ritz */ /* value between successive iterations. */ /*--------------------------------------------------------------*/ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { ritzIndex = iev[blockIndex]; sortedIndex = ilev[blockIndex]; blockOfShifts[blockIndex] = ritzVals[ritzIndex]; if (sortedIndex < numPrevRitzVals) { approxOlsenEps[blockIndex] = fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]); } else { approxOlsenEps[blockIndex] = blockNorms[blockIndex]; } } /* for loop */ } /* else no robust shifts */ } /* else primme_smallest or primme_largest */ /* Remember the previous ritz values*/ numPrevRitzVals = numLocked+basisSize; Num_dcopy_primme(numPrevRitzVals, sortedRitzVals, 1, prevRitzVals, 1); // Equip the primme struct with the blockOfShifts, in case the user // wants to precondition (K-sigma_i I)^{-1} with a different shift // for each vector primme->ShiftsForPreconditioner = blockOfShifts; //------------------------------------------------------------ // Generalized Davidson variants -- No inner iterations //------------------------------------------------------------ if (primme->correctionParams.maxInnerIterations == 0) { // This is Generalized Davidson or approximate Olsen's method. // Perform block preconditioning (with or without projections) r = &W[primme->nLocal*basisSize]; // All the block residuals x = &V[primme->nLocal*basisSize]; // All the block Ritz vectors if ( primme->correctionParams.projectors.RightX && primme->correctionParams.projectors.SkewX ) { // Compute exact Olsen's projected preconditioner. This is // expensive and rarely improves anything! Included for completeness. Olsen_preconditioner_block(r, x, blockSize, Kinvx, primme); } else { if ( primme->correctionParams.projectors.RightX ) { // Compute a cheap approximation to OLSENS, where (x'Kinvr)/xKinvx // is approximated by e: Kinvr-e*Kinvx=Kinv(r-e*x)=Kinv(I-ct*x*x')r for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { // Compute r_i = r_i - err_i * x_i {tmpShift.r = -approxOlsenEps[blockIndex]; tmpShift.i = 0.0L;} Num_axpy_zprimme(primme->nLocal, tmpShift, &x[primme->nLocal*blockIndex],1,&r[primme->nLocal*blockIndex],1); } //for } // GD: compute K^{-1}r , or approx.Olsen: K^{-1}(r-ex) apply_preconditioner_block(r, x, blockSize, primme ); } } //------------------------------------------------------------ // JDQMR --- JD inner-outer variants //------------------------------------------------------------ else { // maxInnerIterations > 0 We perform inner-outer JDQMR. /* Solve the correction for each block vector. */ for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { r = &W[primme->nLocal*(basisSize+blockIndex)]; x = &V[primme->nLocal*(basisSize+blockIndex)]; /* Set up the left/right/skew projectors for JDQMR. */ /* The pointers Lprojector, Rprojector(Q/X) point to the */ /* appropriate arrays for use in the projection step */ setup_JD_projectors(x, r, evecs, evecsHat, Kinvx, &xKinvx, &Lprojector, &RprojectorQ, &RprojectorX, &sizeLprojector, &sizeRprojectorQ, &sizeRprojectorX, numLocked, numConvergedStored, primme); /* Map the index of the block vector to its corresponding eigenvalue */ /* index, and the shift for the correction equation. Also make the */ /* shift available to primme, in case (K-shift I)^-1 is needed */ ritzIndex = iev[blockIndex]; shift = blockOfShifts[blockIndex]; primme->ShiftsForPreconditioner = &blockOfShifts[blockIndex]; ret = inner_solve_zprimme(x, r, &blockNorms[blockIndex], evecs, evecsHat, UDU, ipivot, &xKinvx, Lprojector, RprojectorQ, RprojectorX, sizeLprojector, sizeRprojectorQ, sizeRprojectorX, sol, ritzVals[ritzIndex], shift, eresTol, aNormEstimate, machEps, linSolverRWork, linSolverRWorkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_solve_correction, Primme_inner_solve, ret, __FILE__, __LINE__, primme); return (INNER_SOLVE_FAILURE); } Num_zcopy_zprimme(primme->nLocal, sol, 1, &V[primme->nLocal*(basisSize+blockIndex)], 1); } // end for each block vector } // JDqmr variants return 0; }
static int apply_skew_projector(double *Q, double *Qhat, double *UDU, int *ipivot, int numCols, double *v, double *rwork, primme_params *primme) { int count; double tpone = +1.0e+00, tzero = +0.0e+00, tmone = -1.0e+00; if (numCols > 0) { /* there is a projector to be applied */ int ret; double *overlaps; /* overlaps of v with columns of Q */ double *workSpace; /* Used for computing local overlaps */ overlaps = rwork; workSpace = overlaps + numCols; /* --------------------------------------------------------*/ /* Treat the one vector case with BLAS 1 calls */ /* --------------------------------------------------------*/ if (numCols == 1) { /* Compute workspace = Q'*v */ overlaps[0] = dist_dot(Q, 1, v, 1, primme); /* Backsolve only if there is a skew projector */ if (UDU != NULL) { if (UDU[0] == 0.0L) { return UDUSOLVE_FAILURE; } overlaps[0] = overlaps[0]/UDU[0]; } /* Compute v=v-Qhat*overlaps */ Num_axpy_dprimme(primme->nLocal, -overlaps[0], Qhat, 1, v, 1); } else { /* ------------------------------------------------------*/ /* More than one vectors. Use BLAS 2. */ /* ------------------------------------------------------*/ /* Compute workspace = Q'*v */ Num_gemv_dprimme("C", primme->nLocal, numCols, tpone, Q, primme->nLocal, v, 1, tzero, workSpace, 1); /* Global sum: overlaps = Q'*v */ count = numCols; (*primme->globalSumDouble)(workSpace, overlaps, &count, primme); /* --------------------------------------------*/ /* Backsolve only if there is a skew projector */ /* --------------------------------------------*/ if (UDU != NULL) { /* Solve (Q'Qhat)^{-1}*workSpace = overlaps = Q'*v for alpha by */ /* backsolving with the UDU decomposition. */ ret = UDUSolve_dprimme(UDU, ipivot, numCols, overlaps, workSpace); if (ret != 0) { primme_PushErrorMessage(Primme_apply_skew_projector, Primme_udusolve, ret, __FILE__, __LINE__, primme); return UDUSOLVE_FAILURE; } /* Compute v=v-Qhat*workspace */ Num_gemv_dprimme("N", primme->nLocal, numCols, tmone, Qhat, primme->nLocal, workSpace, 1, tpone, v, 1); } else { /* Compute v=v-Qhat*overlaps */ Num_gemv_dprimme("N", primme->nLocal, numCols, tmone, Qhat, primme->nLocal, overlaps, 1, tpone, v, 1); } /* UDU==null */ } /* numCols != 1 */ } /* numCols > 0 */ return 0; }
int inner_solve_dprimme(double *x, double *r, double *rnorm, double *evecs, double *evecsHat, double *UDU, int *ipivot, double *xKinvx, double *Lprojector, double *RprojectorQ, double *RprojectorX, int sizeLprojector, int sizeRprojectorQ, int sizeRprojectorX, double *sol, double eval, double shift, double eresTol, double aNormEstimate, double machEps, double *rwork, int rworkSize, primme_params *primme) { int i; /* loop variable */ int workSpaceSize; /* Size of local work array. */ int numIts; /* Number of inner iterations */ int ret; /* Return value used for error checking. */ int maxIterations; /* The maximum # iterations allowed. Depends on primme */ double *workSpace; /* Workspace needed by UDU routine */ /* QMR parameters */ double *g, *d, *delta, *w, *ptmp; double alpha_prev, beta, rho_prev, rho; double Theta_prev, Theta, c, sigma_prev, tau_init, tau_prev, tau; double ztmp; /* Parameters used to dynamically update eigenpair */ double Beta, Delta, Psi, Beta_prev, Delta_prev, Psi_prev, eta; double dot_sol, eval_updated, eval_prev, eres2_updated, eres_updated, R; double Gamma_prev, Phi_prev; double Gamma, Phi; double gamma; /* The convergence criteria of the inner linear system must satisfy: */ /* || current residual || <= relativeTolerance * || initial residual || */ /* + absoluteTol */ double relativeTolerance; double absoluteTolerance; double LTolerance, ETolerance; /* Some constants */ double tpone = +1.0e+00, tzero = +0.0e+00; /* -------------------------------------------*/ /* Subdivide the workspace into needed arrays */ /* -------------------------------------------*/ g = rwork; d = g + primme->nLocal; delta = d + primme->nLocal; w = delta + primme->nLocal; workSpace = w + primme->nLocal; /* This needs at least 2*numOrth+NumEvals) */ workSpaceSize = rworkSize - (workSpace - rwork); /* -----------------------------------------*/ /* Set up convergence criteria by Tolerance */ /* -----------------------------------------*/ if (primme->aNorm <= 0.0L) { absoluteTolerance = aNormEstimate*machEps; eresTol = eresTol*aNormEstimate; } else { absoluteTolerance = primme->aNorm*machEps; } tau_prev = tau_init = *rnorm; /* Assumes zero initial guess */ LTolerance = eresTol; /* Andreas: note that eigenresidual tol may not be achievable, because we */ /* iterate on P(A-s)P not (A-s). But tau reflects linSys on P(A-s)P. */ if (primme->correctionParams.convTest == primme_adaptive) { ETolerance = max(eresTol/1.8L, absoluteTolerance); LTolerance = ETolerance; } else if (primme->correctionParams.convTest == primme_adaptive_ETolerance) { LTolerance = max(eresTol/1.8L, absoluteTolerance); ETolerance = max(tau_init*0.1L, LTolerance); } else if (primme->correctionParams.convTest == primme_decreasing_LTolerance) { relativeTolerance = pow(primme->correctionParams.relTolBase, (double)-primme->stats.numOuterIterations); LTolerance = relativeTolerance * tau_init + absoluteTolerance + eresTol; /*printf(" RL %e INI %e abso %e LToler %e aNormEstimate %e \n", */ /*relativeTolerance, tau_init, absoluteTolerance,LTolerance,aNormEstimate);*/ } /* --------------------------------------------------------*/ /* Set up convergence criteria by max number of iterations */ /* --------------------------------------------------------*/ /* compute first total number of remaining matvecs */ maxIterations = primme->maxMatvecs - primme->stats.numMatvecs; /* Perform primme.maxInnerIterations, but do not exceed total remaining */ if (primme->correctionParams.maxInnerIterations > 0) { maxIterations = min(primme->correctionParams.maxInnerIterations, maxIterations); } /* --------------------------------------------------------*/ /* Rest of initializations */ /* --------------------------------------------------------*/ /* Assume zero initial guess */ Num_dcopy_dprimme(primme->nLocal, r, 1, g, 1); ret = apply_projected_preconditioner(g, evecs, RprojectorQ, x, RprojectorX, sizeRprojectorQ, sizeRprojectorX, xKinvx, UDU, ipivot, d, workSpace, primme); if (ret != 0) { primme_PushErrorMessage(Primme_inner_solve, Primme_apply_projected_preconditioner, ret, __FILE__, __LINE__, primme); return APPLYPROJECTEDPRECONDITIONER_FAILURE; } Theta_prev = 0.0L; eval_prev = eval; rho_prev = dist_dot(g, 1, d, 1, primme); /* Initialize recurrences used to dynamically update the eigenpair */ Beta_prev = Delta_prev = Psi_prev = 0.0L; Gamma_prev = Phi_prev = 0.0L; /* other initializations */ for (i = 0; i < primme->nLocal; i++) { delta[i] = tzero; sol[i] = tzero; } numIts = 0; /*----------------------------------------------------------------------*/ /*------------------------ Begin Inner Loop ----------------------------*/ /*----------------------------------------------------------------------*/ while (numIts < maxIterations) { apply_projected_matrix(d, shift, Lprojector, sizeLprojector, w, workSpace, primme); sigma_prev = dist_dot(d, 1, w, 1, primme); if (sigma_prev == 0.0L) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"Exiting because SIGMA %e\n",sigma_prev); } break; } alpha_prev = rho_prev/sigma_prev; if (fabs(alpha_prev) < machEps || fabs(alpha_prev) > 1.0L/machEps){ if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"Exiting because ALPHA %e\n",alpha_prev); } break; } Num_axpy_dprimme(primme->nLocal, -alpha_prev, w, 1, g, 1); Theta = dist_dot(g, 1, g, 1, primme); Theta = sqrt(Theta); Theta = Theta/tau_prev; c = 1.0L/sqrt(1+Theta*Theta); tau = tau_prev*Theta*c; gamma = c*c*Theta_prev*Theta_prev; eta = alpha_prev*c*c; for (i = 0; i < primme->nLocal; i++) { delta[i] = gamma*delta[i] + eta*d[i]; sol[i] = delta[i]+sol[i]; } numIts++; if (fabs(rho_prev) == 0.0L ) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile,"Exiting because abs(rho) %e\n", fabs(rho_prev)); } break; } if (tau < LTolerance) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, " tau < LTol %e %e\n",tau, LTolerance); } break; } else if (primme->correctionParams.convTest == primme_adaptive_ETolerance || primme->correctionParams.convTest == primme_adaptive) { /* --------------------------------------------------------*/ /* Adaptive stopping based on dynamic monitoring of eResid */ /* --------------------------------------------------------*/ /* Update the Ritz value and eigenresidual using the */ /* following recurrences. */ Delta = gamma*Delta_prev + eta*rho_prev; Beta = Beta_prev - Delta; Phi = gamma*gamma*Phi_prev + eta*eta*sigma_prev; Psi = gamma*Psi_prev + gamma*Phi_prev; Gamma = Gamma_prev + 2.0L*Psi + Phi; /* Perform the update: update the eigenvalue and the square of the */ /* residual norm. */ dot_sol = dist_dot(sol, 1, sol, 1, primme); eval_updated = shift + (eval - shift + 2*Beta + Gamma)/(1 + dot_sol); eres2_updated = (tau*tau)/(1 + dot_sol) + ((eval - shift + Beta)*(eval - shift + Beta))/(1 + dot_sol) - (eval_updated - shift)*(eval_updated - shift); /* If numerical problems, let eres about the same as tau */ if (eres2_updated < 0){ eres_updated = sqrt( (tau*tau)/(1 + dot_sol) ); } else eres_updated = sqrt(eres2_updated); /* --------------------------------------------------------*/ /* Stopping criteria */ /* --------------------------------------------------------*/ R = max(0.9878, sqrt(tau/tau_prev))*sqrt(1+dot_sol); if ( tau <= R*eres_updated || eres_updated <= tau*R ) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, " tau < R eres \n"); } break; } if (primme->target == primme_smallest && eval_updated > eval_prev) { if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "eval_updated > eval_prev\n"); } break; } else if (primme->target == primme_largest && eval_updated < eval_prev){ if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "eval_updated < eval_prev\n"); } break; } if (eres_updated < ETolerance) { /* tau < LTol has been checked */ if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "eres < eresTol %e \n",eres_updated); } break; } eval_prev = eval_updated; if (primme->printLevel >= 4 && primme->procID == 0) { fprintf(primme->outputFile, "INN MV %d Sec %e Eval %e Lin|r| %.3e EV|r| %.3e\n", primme->stats. numMatvecs, primme_wTimer(0), eval_updated, tau, eres_updated); fflush(primme->outputFile); } /* --------------------------------------------------------*/ } /* End of if adaptive JDQMR section */ /* --------------------------------------------------------*/ else if (primme->printLevel >= 4 && primme->procID == 0) { /* Report for non adaptive inner iterations */ fprintf(primme->outputFile, "INN MV %d Sec %e Lin|r| %e\n", primme->stats.numMatvecs, primme_wTimer(0),tau); fflush(primme->outputFile); } if (numIts < maxIterations) { ret = apply_projected_preconditioner(g, evecs, RprojectorQ, x, RprojectorX, sizeRprojectorQ, sizeRprojectorX, xKinvx, UDU, ipivot, w, workSpace, primme); if (ret != 0) { primme_PushErrorMessage(Primme_inner_solve, Primme_apply_projected_preconditioner, ret, __FILE__, __LINE__, primme); ret = APPLYPROJECTEDPRECONDITIONER_FAILURE; break; } rho = dist_dot(g, 1, w, 1, primme); beta = rho/rho_prev; Num_axpy_dprimme(primme->nLocal, beta, d, 1, w, 1); /* Alternate between w and d buffers in successive iterations * This saves a memory copy. */ ptmp = d; d = w; w = ptmp; rho_prev = rho; tau_prev = tau; Theta_prev = Theta; Delta_prev = Delta; Beta_prev = Beta; Phi_prev = Phi; Psi_prev = Psi; Gamma_prev = Gamma; } /* --------------------------------------------------------*/ } /* End of QMR main while loop */ /* --------------------------------------------------------*/ *rnorm = eres_updated; return 0; }
int lock_vectors_dprimme(double tol, double *aNormEstimate, double *maxConvTol, int *basisSize, int *numLocked, int *numGuesses, int *nextGuess, double *V, double *W, double *H, double *evecsHat, double *M, double *UDU, int *ipivot, double *hVals, double *hVecs, double *evecs, double *evals, int *perm, double machEps, double *resNorms, int *numPrevRitzVals, double *prevRitzVals, int *flag, double *rwork, int rworkSize, int *iwork, int *LockingProblem, primme_params *primme) { int i; /* Loop counter */ int numCandidates; /* Number of targeted Ritz vectors converged before */ /* restart. */ int newStart; /* Index in evecs where the locked vectors were added */ int numNewVectors; /* Number of vectors added to the basis to replace */ /* locked vectors. */ int candidate; /* Index of Ritz vector to be checked for convergence */ int numDeflated; /* The number of vectors actually locked */ int numReplaced; /* The number of locked vectors that were replaced by */ /* initial guesses. */ int numRecentlyLocked; /* Number of vectors locked. */ int evecsSize; /* The number of orthogonalization constraints plus */ /* the number of locked vectors. */ int ret; /* Used to store return values. */ int workinW; /* Flag whether an active W vector is used as tempwork*/ int entireSpace = (*basisSize+*numLocked >= primme->n); /* bool if entire*/ /* space is built, so current ritzvecs are accurate. */ double *norms, *tnorms; /* Array of residual norms, and temp array */ double attainableTol; /* Used to verify a practical convergence problem*/ double *residual; /* Stores residual vector */ double ztmp; /* temp variable */ /* ----------------------------------------*/ /* Assign temporary work space for residual*/ /* ----------------------------------------*/ if (*basisSize < primme->maxBasisSize) { /* compute residuals in the next open slot of W */ residual = &W[*basisSize*primme->nLocal]; workinW = 0; } else { /* This basiSize==maxBasisSize, immediately after restart, can only occur * if the basisSize + numLocked = n (at which case we lock everything) * OR if (numConverged + restartSize + numPrevRetain > basisSize), ie. * too many converged. Since we do not know which evec will be locked * we use the W[LAST] as temporary space, but only after W[LAST] has * been used to compute residual(LAST) -the while loop starts from LAST. * After all lockings, if the LAST evec was not locked, we must * recompute W[LAST]=Av. This matvec event is extremely infrequent */ residual = &W[(*basisSize-1)*primme->nLocal]; workinW = 1; } /* -------------------------------------*/ /* Set the tolerance, and attainableTol */ /* -------------------------------------*/ if (primme->aNorm <= 0.0L) { tol = tol * (*aNormEstimate); } attainableTol=max(tol,sqrt(primme->numOrthoConst+*numLocked)*(*maxConvTol)); /* -------------------------------------------------------- */ /* Determine how many Ritz vectors converged before restart */ /* -------------------------------------------------------- */ i = *basisSize - 1; while ((flag[i] == LOCK_IT ||flag[i] == UNCONDITIONAL_LOCK_IT) && i >= 0) { i--; } numCandidates = *basisSize - i - 1; if (numCandidates == 0) { return 0; } /* --------------------------------- */ /* Compute residuals and their norms */ /* --------------------------------- */ tnorms = (double *) rwork; norms = tnorms + numCandidates; for (i = *basisSize-1, candidate = numCandidates-1; i >= *basisSize-numCandidates; i--, candidate--) { Num_dcopy_dprimme(primme->nLocal, &W[primme->nLocal*i], 1, residual, 1); ztmp = -hVals[i]; Num_axpy_dprimme(primme->nLocal, ztmp, &V[primme->nLocal*i],1,residual,1); tnorms[candidate] = Num_dot_dprimme(primme->nLocal,residual,1,residual,1); } /* Global sum the dot products */ (*primme->globalSumDouble)(tnorms, norms, &numCandidates, primme); numRecentlyLocked = 0; /* ------------------------------------------------------------------- */ /* Check the convergence of each residual norm. If the Ritz vector is */ /* converged, then lock it. */ /* ------------------------------------------------------------------- */ for (i = *basisSize - numCandidates, candidate = 0; i < *basisSize; i++, candidate++) { norms[candidate] = sqrt(norms[candidate]); /* If the vector has become (regularly or practically) unconverged, */ /* then flag it, else lock it and replace it with an initial guess, */ /* if one is available. Exception: If the entire space is spanned, */ /* we can't do better, so lock it. */ if ((flag[i]!=UNCONDITIONAL_LOCK_IT && norms[candidate] >= tol && !entireSpace ) || (flag[i]==UNCONDITIONAL_LOCK_IT && norms[candidate] >= attainableTol && !entireSpace )) { flag[i] = UNCONVERGED; } else { /* If an unconditional lock has become converged, show it and */ /* record the max converged tolerance accordingly */ if (norms[candidate]<tol) { flag[i]=LOCK_IT; *maxConvTol = max(*maxConvTol, tol); } else { *maxConvTol = max(*maxConvTol, norms[candidate]); *LockingProblem = 1; } if (primme->printLevel >= 2 && primme->procID == 0) { fprintf(primme->outputFile, "Lock epair[ %d ]= %e norm %.4e Mvecs %d Time %.4e Flag %d\n", *numLocked+1, hVals[i], norms[candidate], primme->stats.numMatvecs,primme_wTimer(0),flag[i]); fflush(primme->outputFile); } /* Copy the converged Ritz vector to the evecs array and */ /* insert the converged Ritz value in sorted order within */ /* the evals array. */ Num_dcopy_dprimme(primme->nLocal, &V[primme->nLocal*i], 1, &evecs[primme->nLocal*(primme->numOrthoConst + *numLocked)], 1); insertionSort(hVals[i], evals, norms[candidate], resNorms, perm, *numLocked, primme); /* If there are any initial guesses remaining, then copy it */ /* into the basis, else flag the vector as locked so it may */ /* be discarded later. */ if (*numGuesses > 0) { Num_dcopy_dprimme(primme->nLocal, &evecs[primme->nLocal*(*nextGuess)], 1, &V[primme->nLocal*i], 1); flag[i] = INITIAL_GUESS; *numGuesses = *numGuesses - 1; *nextGuess = *nextGuess + 1; } else { flag[i] = LOCKED; } *numLocked = *numLocked + 1; numRecentlyLocked++; } } evecsSize = primme->numOrthoConst + *numLocked; /* -------------------------------------------------------------------- */ /* If a W vector was used as workspace for residual AND its evec has */ /* not been locked out, recompute it, W = A*v. This is rare. */ /* -------------------------------------------------------------------- */ if (workinW && flag[*basisSize-1] != LOCKED) { update_W_dprimme(V, W, *basisSize-1, 1, primme); } /* -------------------------------------------------------------------- */ /* Return IF all target Ritz vectors have been locked, ELSE update the */ /* evecsHat array by applying the preconditioner (if preconditioning is */ /* needed, and JDQMR with right, skew Q projector is applied */ /* -------------------------------------------------------------------- */ if (*numLocked >= primme->numEvals) { return 0; } else if (UDU != NULL) { /* Compute K^{-1}x for all newly locked eigenvectors */ newStart = primme->nLocal*(evecsSize - numRecentlyLocked); (*primme->applyPreconditioner)( &evecs[newStart], &evecsHat[newStart], &numRecentlyLocked, primme); primme->stats.numPreconds += numRecentlyLocked; /* Update the projection evecs'*evecsHat now that evecs and evecsHat */ /* have been expanded by numRecentlyLocked columns. Required */ /* workspace is numLocked*numEvals. The most ever needed would be */ /* maxBasisSize*numEvals. */ update_projection_dprimme(evecs, evecsHat, M, evecsSize-numRecentlyLocked, primme->numOrthoConst+primme->numEvals, numRecentlyLocked, rwork, primme); ret = UDUDecompose_dprimme(M, UDU, ipivot, evecsSize, rwork, rworkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_lock_vectors, Primme_ududecompose, ret, __FILE__, __LINE__, primme); return UDUDECOMPOSE_FAILURE; } } /* --------------------------------------------------------------------- */ /* Swap, towards the end of the basis, vectors that were locked but not */ /* replaced by new initial guesses. */ /* --------------------------------------------------------------------- */ numDeflated = swap_flagVecs_toEnd(*basisSize, LOCKED, V, W, H, hVals, flag, primme); /* --------------------------------------------------------------------- */ /* Reduce the basis size by numDeflated and swap the new initial guesses */ /* towards the end of the basis. */ /* --------------------------------------------------------------------- */ numReplaced = swap_flagVecs_toEnd(*basisSize-numDeflated, INITIAL_GUESS, V, W, H, hVals, flag, primme); *basisSize = *basisSize - (numDeflated + numReplaced); if (primme->printLevel >= 5 && primme->procID == 0) { fprintf(primme->outputFile, "numDeflated: %d numReplaced: %d \ basisSize: %d\n", numDeflated, numReplaced, *basisSize); }
static int allocate_workspace(primme_params *primme, int allocate) { long int realWorkSize; /* Size of real work space. */ long int rworkByteSize; /* Size of all real data in bytes */ int dataSize; /* Number of Complex_Z positions allocated, excluding */ /* doubles (see doubleSize below) and work space. */ int doubleSize=0; /* Number of doubles allocated exclusively to the */ /* double arrays: hVals, prevRitzVals, blockNorms */ int maxEvecsSize; /* Maximum number of vectors in evecs and evecsHat */ int intWorkSize; /* Size of integer work space in bytes */ int initSize; /* Amount of work space required by init routine */ int orthoSize; /* Amount of work space required by ortho routine */ int convSize; /* Amount of work space required by converg. routine */ int restartSize; /* Amount of work space required by restart routine */ int solveCorSize; /* work space for solve_correction and inner_solve */ int solveHSize; /* work space for solve_H */ int mainSize; /* work space for main_iter */ Complex_Z *evecsHat=NULL;/* not NULL when evecsHat will be used */ Complex_Z t; /* dummy variable */ maxEvecsSize = primme->numOrthoConst + primme->numEvals; /* first determine real workspace */ /*----------------------------------------------------------------------*/ /* Compute the memory required by the main iteration data structures */ /*----------------------------------------------------------------------*/ dataSize = primme->nLocal*primme->maxBasisSize /* Size of V */ + primme->nLocal*primme->maxBasisSize /* Size of W */ + primme->maxBasisSize*primme->maxBasisSize /* Size of H */ + primme->maxBasisSize*primme->maxBasisSize /* Size of hVecs */ + primme->restartingParams.maxPrevRetain*primme->maxBasisSize; /* size of prevHVecs */ /*----------------------------------------------------------------------*/ /* Add memory for Harmonic or Refined projection */ /*----------------------------------------------------------------------*/ if (primme->projectionParams.projection == primme_proj_harmonic || primme->projectionParams.projection == primme_proj_refined) { dataSize += primme->nLocal*primme->maxBasisSize /* Size of Q */ + primme->maxBasisSize*primme->maxBasisSize /* Size of R */ + primme->maxBasisSize*primme->maxBasisSize; /* Size of hU */ doubleSize += primme->maxBasisSize; /* Size of hSVals */ } /*----------------------------------------------------------------------*/ /* Add also memory needed for JD skew projectors */ /*----------------------------------------------------------------------*/ if ( (primme->correctionParams.precondition && primme->correctionParams.maxInnerIterations != 0 && primme->correctionParams.projectors.RightQ && primme->correctionParams.projectors.SkewQ ) ) { dataSize = dataSize + + primme->nLocal*maxEvecsSize /* Size of evecsHat */ + maxEvecsSize*maxEvecsSize /* Size of M */ + maxEvecsSize*maxEvecsSize; /* Size of UDU */ evecsHat = &t; /* set not NULL */ } /*----------------------------------------------------------------------*/ /* Determine workspace required by init and its children */ /*----------------------------------------------------------------------*/ initSize = init_basis_zprimme(NULL, primme->nLocal, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, &primme->maxBasisSize, NULL, NULL, NULL, primme); /*----------------------------------------------------------------------*/ /* Determine orthogalization workspace with and without locking. */ /*----------------------------------------------------------------------*/ if (primme->locking) { orthoSize = ortho_zprimme(NULL, 0, NULL, 0, primme->maxBasisSize, primme->maxBasisSize+primme->maxBlockSize-1, NULL, primme->nLocal, maxEvecsSize, primme->nLocal, NULL, 0.0, NULL, 0, primme); } else { orthoSize = ortho_zprimme(NULL, 0, NULL, 0, primme->maxBasisSize, primme->maxBasisSize+primme->maxBlockSize-1, NULL, primme->nLocal, primme->numOrthoConst+1, primme->nLocal, NULL, 0.0, NULL, 0, primme); } /*----------------------------------------------------------------------*/ /* Determine workspace required by solve_H and its children */ /*----------------------------------------------------------------------*/ solveHSize = solve_H_zprimme(NULL, primme->maxBasisSize, 0, NULL, 0, NULL, 0, NULL, 0, NULL, NULL, 0, 0, NULL, NULL, primme); /*----------------------------------------------------------------------*/ /* Determine workspace required by solve_correction and its children */ /*----------------------------------------------------------------------*/ solveCorSize = solve_correction_zprimme(NULL, NULL, NULL, NULL, NULL, NULL, NULL, maxEvecsSize, 0, NULL, NULL, NULL, NULL, primme->maxBasisSize, NULL, NULL, primme->maxBlockSize, 1.0, 0.0, 1.0, NULL, NULL, 0, primme); /*----------------------------------------------------------------------*/ /* Determine workspace required by solve_H and its children */ /*----------------------------------------------------------------------*/ convSize = check_convergence_zprimme(NULL, primme->nLocal, 0, &t, 0, NULL, primme->numEvals, 0, 0, primme->maxBasisSize, NULL, NULL, NULL, 0.0, NULL, 0, NULL, primme); /*----------------------------------------------------------------------*/ /* Determine workspace required by restarting and its children */ /*----------------------------------------------------------------------*/ restartSize = restart_zprimme(NULL, NULL, primme->nLocal, primme->maxBasisSize, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, evecsHat, 0, NULL, 0, NULL, 0, NULL, NULL, NULL, &primme->numEvals, NULL, &primme->restartingParams.maxPrevRetain, primme->maxBasisSize, primme->initSize, NULL, &primme->maxBasisSize, NULL, primme->maxBasisSize, NULL, 0, NULL, 0, NULL, 0, 0, NULL, 0, 0, NULL, NULL, 0.0, NULL, 0, NULL, primme); /*----------------------------------------------------------------------*/ /* Determine workspace required by main_iter and its children */ /*----------------------------------------------------------------------*/ mainSize = max( update_projection_zprimme(NULL, 0, NULL, 0, NULL, 0, 0, 0, primme->maxBasisSize, NULL, 0, primme), prepare_candidates_zprimme(NULL, NULL, primme->nLocal, primme->maxBasisSize, 0, NULL, NULL, NULL, 0, NULL, NULL, primme->numEvals, primme->numEvals, NULL, 0, primme->maxBlockSize, NULL, primme->numEvals, NULL, NULL, 0.0, NULL, &primme->maxBlockSize, NULL, NULL, 0, NULL, primme)); /*----------------------------------------------------------------------*/ /* Workspace is reused in many functions. Allocate the max needed by any*/ /*----------------------------------------------------------------------*/ realWorkSize = Num_imax_primme(8, /* Workspace needed by init_basis */ initSize, /* Workspace needed by solve_correction and its child inner_solve */ solveCorSize, /* Workspace needed by function solve_H */ solveHSize, /* Workspace needed by function check_convergence */ convSize, /* Workspace needed by function restart*/ restartSize, /* Workspace needed by function verify_norms */ 2*primme->numEvals, /* maximum workspace needed by ortho */ orthoSize, /* maximum workspace for main */ mainSize); /*----------------------------------------------------------------------*/ /* The following size is always allocated as double */ /*----------------------------------------------------------------------*/ doubleSize += 4 /* Padding */ + primme->maxBasisSize /* Size of hVals */ + primme->numEvals+primme->maxBasisSize /* Size of prevRitzVals */ + primme->maxBlockSize; /* Size of blockNorms */ /*----------------------------------------------------------------------*/ /* Determine the integer workspace needed */ /*----------------------------------------------------------------------*/ intWorkSize = primme->maxBasisSize /* Size of flag */ + 2*primme->maxBlockSize /* Size of iev and ilev */ + maxEvecsSize /* Size of ipivot */ + 5*primme->maxBasisSize; /* Auxiliary permutation arrays */ /*----------------------------------------------------------------------*/ /* byte sizes: */ /*----------------------------------------------------------------------*/ rworkByteSize = (dataSize + realWorkSize)*sizeof(Complex_Z) + doubleSize*sizeof(double); /*----------------------------------------------------------------------*/ /* If only the amount of required workspace is needed return it in bytes*/ /*----------------------------------------------------------------------*/ if (!allocate) { primme->intWorkSize = intWorkSize*sizeof(int); primme->realWorkSize = rworkByteSize; return 1; } /*----------------------------------------------------------------------*/ /* Allocate the required workspace, if the user did not provide enough */ /*----------------------------------------------------------------------*/ if (primme->realWorkSize < rworkByteSize || primme->realWork == NULL) { if (primme->realWork != NULL) { free(primme->realWork); } primme->realWorkSize = rworkByteSize; primme->realWork = (void *) primme_valloc(rworkByteSize,"Real Alloc"); if (primme->printLevel >= 5) fprintf(primme->outputFile, "Allocating real workspace: %ld bytes\n", primme->realWorkSize); } if (primme->intWorkSize < intWorkSize*(int)sizeof(int) || primme->intWork==NULL) { if (primme->intWork != NULL) { free(primme->intWork); } primme->intWorkSize = intWorkSize*sizeof(int); primme->intWork= (int *)primme_valloc(primme->intWorkSize ,"Int Alloc"); if (primme->printLevel >= 5) fprintf(primme->outputFile, "Allocating integer workspace: %d bytes\n", primme->intWorkSize); } if (primme->intWork == NULL || primme->realWork == NULL) { primme_PushErrorMessage(Primme_allocate_workspace, Primme_malloc, 0, __FILE__, __LINE__, primme); return MALLOC_FAILURE; } return 0; /***************************************************************************/ } /* end of allocate workspace
int zprimme(double *evals, Complex_Z *evecs, double *resNorms, primme_params *primme) { int ret; int *perm; double machEps; /* ------------------ */ /* zero out the timer */ /* ------------------ */ primme_wTimer(1); /* ---------------------------- */ /* Clear previous error reports */ /* ---------------------------- */ primme_DeleteStackTrace(primme); /* ----------------------- */ /* Find machine precision */ /* ----------------------- */ machEps = Num_dlamch_primme("E"); /* ------------------ */ /* Set some defaults */ /* ------------------ */ primme_set_defaults(primme); /* -------------------------------------------------------------- */ /* If needed, we are ready to estimate required memory and return */ /* -------------------------------------------------------------- */ if (evals == NULL && evecs == NULL && resNorms == NULL) return allocate_workspace(primme, FALSE); /* ----------------------------------------------------- */ /* Reset random number seed if inappropriate for DLARENV */ /* Yields unique quadruples per proc if procID < 4096^3 */ /* ----------------------------------------------------- */ if (primme->iseed[0]<0 || primme->iseed[0]>4095) primme->iseed[0] = primme->procID % 4096; if (primme->iseed[1]<0 || primme->iseed[1]>4095) primme->iseed[1] = (int)(primme->procID/4096+1) % 4096; if (primme->iseed[2]<0 || primme->iseed[2]>4095) primme->iseed[2] = (int)((primme->procID/4096)/4096+2) % 4096; if (primme->iseed[3]<0 || primme->iseed[3]>4095) primme->iseed[3] = (2*(int)(((primme->procID/4096)/4096)/4096)+1) % 4096; /* ----------------------- */ /* Set default convTetFun */ /* ----------------------- */ if (!primme->convTestFun) { primme->convTestFun = convTestFunAbsolute; } /* ------------------------------------------------------- */ /* Check primme input data for bounds, correct values etc. */ /* ------------------------------------------------------- */ ret = check_input(evals, evecs, resNorms, primme); if (ret != 0) { primme_PushErrorMessage(Primme_zprimme, Primme_check_input, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return ret; } /* ----------------------------------------------------------------------- */ /* Compute AND allocate memory requirements for main_iter and subordinates */ /* ----------------------------------------------------------------------- */ ret = allocate_workspace(primme, TRUE); if (ret != 0) { primme_PushErrorMessage(Primme_zprimme, Primme_allocate_workspace, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return ALLOCATE_WORKSPACE_FAILURE; } /* --------------------------------------------------------- */ /* Allocate workspace that will be needed locally by zprimme */ /* --------------------------------------------------------- */ perm = (int *)primme_calloc((primme->numEvals), sizeof(int), "Perm array"); if (perm == NULL) { primme_PushErrorMessage(Primme_zprimme, Primme_malloc, 0, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return MALLOC_FAILURE; } /*----------------------------------------------------------------------*/ /* Call the solver */ /*----------------------------------------------------------------------*/ ret = main_iter_zprimme(evals, perm, evecs, resNorms, machEps, primme->intWork, primme->realWork, primme); if (ret < 0) { primme_PushErrorMessage(Primme_zprimme, Primme_main_iter, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return MAIN_ITER_FAILURE; } /*----------------------------------------------------------------------*/ /*----------------------------------------------------------------------*/ /* If locking is engaged, the converged Ritz vectors are stored in the */ /* order they converged. They must then be permuted so that they */ /* correspond to the sorted Ritz values in evals. */ /*----------------------------------------------------------------------*/ permute_vecs_zprimme(&evecs[primme->numOrthoConst], primme->nLocal, primme->initSize, primme->nLocal, perm, (Complex_Z*)primme->realWork, (int*)primme->intWork); free(perm); primme->stats.elapsedTime = primme_wTimer(0); return(0); }
static int apply_skew_projector(Complex_Z *Q, Complex_Z *Qhat, Complex_Z *UDU, int *ipivot, int numCols, Complex_Z *v, Complex_Z *rwork, primme_params *primme) { int count; Complex_Z ztmp; Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00}, tmone = {-1.0e+00,+0.0e00}; if (numCols > 0) { /* there is a projector to be applied */ int ret; Complex_Z *overlaps; /* overlaps of v with columns of Q */ Complex_Z *workSpace; /* Used for computing local overlaps */ overlaps = rwork; workSpace = overlaps + numCols; /* --------------------------------------------------------*/ /* Treat the one vector case with BLAS 1 calls */ /* --------------------------------------------------------*/ if (numCols == 1) { /* Compute workspace = Q'*v */ overlaps[0] = dist_dot(Q, 1, v, 1, primme); /* Backsolve only if there is a skew projector */ if (UDU != NULL) { if ( z_eq_primme(UDU[0], tzero) ) { return UDUSOLVE_FAILURE; } z_div_primme(&overlaps[0], &overlaps[0], &UDU[0]); } /* Compute v=v-Qhat*overlaps */ ztmp.r = - overlaps[0].r; ztmp.i = - overlaps[0].i; Num_axpy_zprimme(primme->nLocal, ztmp, Qhat, 1, v, 1); } else { /* ------------------------------------------------------*/ /* More than one vectors. Use BLAS 2. */ /* ------------------------------------------------------*/ /* Compute workspace = Q'*v */ Num_gemv_zprimme("C", primme->nLocal, numCols, tpone, Q, primme->nLocal, v, 1, tzero, workSpace, 1); /* Global sum: overlaps = Q'*v */ // In Complex, the size of the array to globalSum is twice as large count = 2*numCols; (*primme->globalSumDouble)(workSpace, overlaps, &count, primme); /* --------------------------------------------*/ /* Backsolve only if there is a skew projector */ /* --------------------------------------------*/ if (UDU != NULL) { /* Solve (Q'Qhat)^{-1}*workSpace = overlaps = Q'*v for alpha by */ /* backsolving with the UDU decomposition. */ ret = UDUSolve_zprimme(UDU, ipivot, numCols, overlaps, workSpace); if (ret != 0) { primme_PushErrorMessage(Primme_apply_skew_projector, Primme_udusolve, ret, __FILE__, __LINE__, primme); return UDUSOLVE_FAILURE; } /* Compute v=v-Qhat*workspace */ Num_gemv_zprimme("N", primme->nLocal, numCols, tmone, Qhat, primme->nLocal, workSpace, 1, tpone, v, 1); } else { /* Compute v=v-Qhat*overlaps */ Num_gemv_zprimme("N", primme->nLocal, numCols, tmone, Qhat, primme->nLocal, overlaps, 1, tpone, v, 1); } // UDU==null } // numCols != 1 } // numCols > 0 return 0; }
static int restart_H(Complex_Z *H, Complex_Z *hVecs, double *hVals, int restartSize, int basisSize, Complex_Z *previousHVecs, int numPrevRetained, int indexOfPreviousVecs, int rworkSize, Complex_Z *rwork, primme_params *primme) { int i, j; /* Loop variables */ int workSpaceSize; /* Workspace size needed by insert_submatrix */ int ret; /* Return value */ Complex_Z *subMatrix;/* Contains the submatrix previousHVecs'*H*previousHvecs*/ Complex_Z *workSpace;/* Workspace size needed */ Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00}; /*constants*/ /* ---------------------------------------------------------------------- */ /* If coefficient vectors from the previous iteration were retained, then */ /* set up work space for computing the numPrevRetained x numPrevRetained */ /* submatrix, then compute the submatrix. */ /* ---------------------------------------------------------------------- */ if (numPrevRetained > 0) { subMatrix = rwork; workSpace = &rwork[numPrevRetained*numPrevRetained]; workSpaceSize = rworkSize - numPrevRetained*numPrevRetained; compute_submatrix(previousHVecs, numPrevRetained, H, basisSize, primme->maxBasisSize, subMatrix, workSpace); } /* ----------------------------------------------------------------- */ /* V*hVecs yields a diagonal matrix composed of the Ritz values of A */ /* with respect to V. Set H to a diagonal matrix with the Ritz */ /* values on the diagonal. */ /* ----------------------------------------------------------------- */ for (j=0; j < restartSize; j++) { for (i=0; i < j; i++) { H[primme->maxBasisSize*j+i] = tzero; } H[primme->maxBasisSize*j+j].r = hVals[j]; H[primme->maxBasisSize*j+j].i = 0.0L; } /* --------------------------------------------------------------------- */ /* Given the above H, we know the eigenvectors of H will be the standard */ /* basis vectors if no previous coefficient vectors are retained */ /* --------------------------------------------------------------------- */ for (j=0; j < restartSize; j++) { for (i=0; i < j; i++) { hVecs[restartSize*j+i] = tzero; hVecs[restartSize*i+j] = tzero; } hVecs[restartSize*j+j] = tpone; } /* ---------------------------------------------------------------------- */ /* If coefficient vectors from the previous iteration have been retained, */ /* then insert the computed overlap matrix into the restarted H and solve */ /* the resulting eigenproblem for the resulting H. */ /* ---------------------------------------------------------------------- */ if (numPrevRetained > 0) { ret = insert_submatrix(H, hVals, hVecs, restartSize, subMatrix, numPrevRetained, indexOfPreviousVecs, workSpaceSize, workSpace, primme); if (ret != 0) { primme_PushErrorMessage(Primme_restart_h, Primme_insert_submatrix, ret, __FILE__, __LINE__, primme); return INSERT_SUBMATRIX_FAILURE; } } return 0; }
int restart_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *H, Complex_Z *hVecs, double *hVals, int *flags, int *iev, Complex_Z *evecs, Complex_Z *evecsHat, Complex_Z *M, Complex_Z *UDU, int *ipivot, int basisSize, int numConverged, int *numConvergedStored, int numLocked, int numGuesses, Complex_Z *previousHVecs, int numPrevRetained, double machEps, Complex_Z *rwork, int rworkSize, primme_params *primme) { int numFree; /* The number of basis vectors to be left free */ int numPacked; /* The number of coefficient vectors moved to the */ /* end of the hVecs array. */ int restartSize; /* The number of vectors to restart with */ int indexOfPreviousVecs=0; /* Position within hVecs array the previous */ /* coefficient vectors will be stored */ int i, n, eStart; /* various variables */ int ret; /* Return value */ numPacked = 0; /* --------------------------------------------------------------------- */ /* If dynamic thick restarting is to be used, then determine the minimum */ /* number of free spaces to be maintained and call the DTR routine. */ /* The DTR routine will determine how many coefficient vectors from the */ /* left and right of H-spectrum to retain at restart. If DTR is not used */ /* then set the restart size to the minimum restart size. */ /* --------------------------------------------------------------------- */ if (primme->restartingParams.scheme == primme_dtr) { numFree = numPrevRetained+max(3, primme->maxBlockSize); restartSize = dtr(numLocked, hVecs, hVals, flags, basisSize, numFree, iev, rwork, primme); } else { restartSize = min(basisSize, primme->minRestartSize); } /* ----------------------------------------------------------------------- */ /* If locking is engaged, then swap coefficient vectors corresponding to */ /* converged Ritz vectors to the end of the hVecs(:, restartSize) subarray.*/ /* This allows the converged Ritz vectors to be stored contiguously in */ /* memory after restart. This significantly reduces the amount of data */ /* movement the locking routine would have to perform otherwise. */ /* The following function also covers some limit cases where restartSize */ /* plus 'to be locked' and previous Ritz vectors may exceed the basisSize */ /* ----------------------------------------------------------------------- */ if (primme->locking) { numPacked = pack_converged_coefficients(&restartSize, basisSize, &numPrevRetained, numLocked, numGuesses, hVecs, hVals, flags, primme); } /* ----------------------------------------------------------------------- */ /* Restarting with a small number of coefficient vectors from the previous */ /* iteration can be retained to accelerate convergence. The previous */ /* coefficient vectors must be combined with the current coefficient */ /* vectors by first orthogonalizing the previous ones versus the current */ /* restartSize ones. The orthogonalized previous vectors are then */ /* inserted into the hVecs array at hVecs(:,indexOfPreviousVecs). */ /* ----------------------------------------------------------------------- */ if (numPrevRetained > 0) { indexOfPreviousVecs = combine_retained_vectors(hVals, flags, hVecs, basisSize, &restartSize, numPacked, previousHVecs, &numPrevRetained, machEps, rwork, primme); } /* -------------------------------------------------------- */ /* Restart V by replacing it with the current Ritz vectors. */ /* -------------------------------------------------------- */ restart_X(V, hVecs, primme->nLocal, basisSize, restartSize, rwork,rworkSize); /* ------------------------------------------------------------ */ /* Restart W by replacing it with W times the eigenvectors of H */ /* ------------------------------------------------------------ */ restart_X(W, hVecs, primme->nLocal, basisSize, restartSize, rwork,rworkSize); /* ---------------------------------------------------------------- */ /* Because we have replaced V by the Ritz vectors, V'*A*V should be */ /* diagonal with the Ritz values on the diagonal. The eigenvectors */ /* of the new matrix V'*A*V become the standard basis vectors. */ /* ---------------------------------------------------------------- */ ret = restart_H(H, hVecs, hVals, restartSize, basisSize, previousHVecs, numPrevRetained, indexOfPreviousVecs, rworkSize, rwork, primme); if (ret != 0) { primme_PushErrorMessage(Primme_restart, Primme_restart_h, ret, __FILE__, __LINE__, primme); return RESTART_H_FAILURE; } /* --------------------------------------------------------------------- */ /* If the user requires (I-QQ') projectors in JDQMR without locking, */ /* the converged eigenvectors are copied temporarily to evecs. There */ /* they stay locked for use in (I-QQ') and (I-K^{-1}Q () Q') projectors.*/ /* NOTE THIS IS NOT LOCKING! The Ritz vectors remain in the basis, and */ /* they will overwrite evecs at the end. */ /* We recommend against this type of usage. It's better to use locking. */ /* --------------------------------------------------------------------- */ /* Andreas NOTE: is done inefficiently for the moment. We should only */ /* add the recently converged. But we need to differentiate them */ /* from flags... */ if (!primme->locking && primme->correctionParams.maxInnerIterations != 0 && numConverged > 0 && (primme->correctionParams.projectors.LeftQ || primme->correctionParams.projectors.RightQ ) ) { n = primme->nLocal; *numConvergedStored = 0; eStart = primme->numOrthoConst; for (i=0;i<primme->numEvals;i++) { if (flags[i] == CONVERGED) { if (*numConvergedStored < numConverged) { Num_zcopy_zprimme(n, &V[i*n], 1, &evecs[(eStart+*numConvergedStored)*n], 1); (*numConvergedStored)++; } } /* if converged */ } /* for */ if (*numConvergedStored != numConverged) { if (primme->printLevel >= 1 && primme->procID == 0) { fprintf(primme->outputFile, "Flags and converged eigenpairs do not correspond %d %d\n", numConverged, *numConvergedStored); } return PSEUDOLOCK_FAILURE; } /* Update also the M = K^{-1}evecs and its udu factorization if needed */ if (UDU != NULL) { apply_preconditioner_block(&evecs[eStart*n], &evecsHat[eStart*n], numConverged, primme ); /* rwork must be maxEvecsSize*numEvals! */ update_projection_zprimme(evecs, evecsHat, M, eStart*n, primme->numOrthoConst+primme->numEvals, numConverged, rwork, primme); ret = UDUDecompose_zprimme(M, UDU, ipivot, eStart+numConverged, rwork, rworkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_lock_vectors,Primme_ududecompose,ret, __FILE__, __LINE__, primme); return UDUDECOMPOSE_FAILURE; } } /* if UDU factorization is needed */ } /* if this pseudo locking should take place */ return restartSize; }
int dprimme(double *evals, double *evecs, double *resNorms, primme_params *primme) { int ret; int *perm; double machEps; /* ------------------ */ /* zero out the timer */ /* ------------------ */ primme_wTimer(1); /* ---------------------------- */ /* Clear previous error reports */ /* ---------------------------- */ primme_DeleteStackTrace(primme); /* ----------------------- */ /* Find machine precision */ /* ----------------------- */ machEps = Num_dlamch_primme("E"); /* ----------------------------------------- */ /* Set some defaults for sequential programs */ /* ----------------------------------------- */ if (primme->numProcs == 1) { primme->nLocal = primme->n; primme->procID = 0; if (primme->globalSumDouble == NULL) primme->globalSumDouble = primme_seq_globalSumDouble; } /* --------------------------------------------------------------------- */ /* Decide on whether to use locking (hard locking), or not (soft locking)*/ /* --------------------------------------------------------------------- */ if (primme->target != primme_smallest && primme->target != primme_largest ) { /* Locking is necessary as interior Ritz values can cross shifts */ primme->locking = 1; } else { if (primme->locking == 0) { /* use locking when not enough vectors to restart with */ primme->locking = (primme->numEvals > primme->minRestartSize); } } /* -------------------------------------------------------------- */ /* If needed, we are ready to estimate required memory and return */ /* -------------------------------------------------------------- */ if (evals == NULL && evecs == NULL && resNorms == NULL) return allocate_workspace(primme, FALSE); /* ----------------------------------------------------- */ /* Reset random number seed if inappropriate for DLARENV */ /* Yields unique quadruples per proc if procID < 4096^3 */ /* ----------------------------------------------------- */ if (primme->iseed[0]<0 || primme->iseed[0]>4095) primme->iseed[0] = primme->procID % 4096; if (primme->iseed[1]<0 || primme->iseed[1]>4095) primme->iseed[1] = (int)(primme->procID/4096+1) % 4096; if (primme->iseed[2]<0 || primme->iseed[2]>4095) primme->iseed[2] = (int)((primme->procID/4096)/4096+2) % 4096; if (primme->iseed[3]<0 || primme->iseed[3]>4095) primme->iseed[3] = (2*(int)(((primme->procID/4096)/4096)/4096)+1) % 4096; /* ------------------------------------------------------- */ /* Check primme input data for bounds, correct values etc. */ /* ------------------------------------------------------- */ ret = check_input(evals, evecs, resNorms, primme); if (ret != 0) { primme_PushErrorMessage(Primme_dprimme, Primme_check_input, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return ret; } /* ----------------------------------------------------------------------- */ /* Compute AND allocate memory requirements for main_iter and subordinates */ /* ----------------------------------------------------------------------- */ ret = allocate_workspace(primme, TRUE); if (ret != 0) { primme_PushErrorMessage(Primme_dprimme, Primme_allocate_workspace, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return ALLOCATE_WORKSPACE_FAILURE; } /* --------------------------------------------------------- */ /* Allocate workspace that will be needed locally by dprimme */ /* --------------------------------------------------------- */ perm = (int *)primme_calloc((primme->numEvals), sizeof(int), "Perm array"); if (perm == NULL) { primme_PushErrorMessage(Primme_dprimme, Primme_malloc, 0, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return MALLOC_FAILURE; } /*----------------------------------------------------------------------*/ /* Call the solver */ /*----------------------------------------------------------------------*/ ret = main_iter_dprimme(evals, perm, evecs, resNorms, machEps, primme->intWork, primme->realWork, primme); if (ret < 0) { primme_PushErrorMessage(Primme_dprimme, Primme_main_iter, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return MAIN_ITER_FAILURE; } /*----------------------------------------------------------------------*/ /*----------------------------------------------------------------------*/ /* If locking is engaged, the converged Ritz vectors are stored in the */ /* order they converged. They must then be permuted so that they */ /* correspond to the sorted Ritz values in evals. */ /*----------------------------------------------------------------------*/ permute_evecs_dprimme(&evecs[primme->numOrthoConst], perm, (double *) primme->realWork, primme->numEvals, primme->nLocal); free(perm); primme->stats.elapsedTime = primme_wTimer(0); return(0); }
static int allocate_workspace(primme_params *primme, int allocate) { long int realWorkSize; /* Size of real work space. */ long int rworkByteSize; /* Size of all real data in bytes */ int dataSize; /* Number of double positions allocated, excluding */ /* doubles (see doubleSize below) and work space. */ int doubleSize; /* Number of doubles allocated exclusively to the */ /* double arrays: hVals, prevRitzVals, blockNorms */ int maxEvecsSize; /* Maximum number of vectors in evecs and evecsHat */ int intWorkSize; /* Size of integer work space in bytes */ int orthoSize; /* Amount of work space required by ortho routine */ int solveCorSize; /* work space for solve_correction and inner_solve */ maxEvecsSize = primme->numOrthoConst + primme->numEvals; /* first determine real workspace */ /*----------------------------------------------------------------------*/ /* Compute the memory required by the main iteration data structures */ /*----------------------------------------------------------------------*/ dataSize = primme->nLocal*primme->maxBasisSize /* Size of V */ + primme->nLocal*primme->maxBasisSize /* Size of W */ + primme->maxBasisSize*primme->maxBasisSize /* Size of H */ + primme->maxBasisSize*primme->maxBasisSize /* Size of hVecs */ + primme->restartingParams.maxPrevRetain*primme->maxBasisSize; /* size of prevHVecs */ /*----------------------------------------------------------------------*/ /* Add also memory needed for JD skew projectors */ /*----------------------------------------------------------------------*/ if ( (primme->correctionParams.precondition && primme->correctionParams.maxInnerIterations != 0 && primme->correctionParams.projectors.RightQ && primme->correctionParams.projectors.SkewQ ) ) { dataSize = dataSize + + primme->nLocal*maxEvecsSize /* Size of evecsHat */ + maxEvecsSize*maxEvecsSize /* Size of M */ + maxEvecsSize*maxEvecsSize; /* Size of UDU */ } /*----------------------------------------------------------------------*/ /* Determine orthogalization workspace with and without locking. */ /*----------------------------------------------------------------------*/ if (primme->locking) { orthoSize = ortho_dprimme(NULL, primme->nLocal, primme->maxBasisSize, primme->maxBasisSize+primme->maxBlockSize-1, NULL, primme->nLocal, maxEvecsSize, primme->nLocal, NULL, 0.0, NULL, 0, primme); } else { orthoSize = ortho_dprimme(NULL, primme->nLocal, primme->maxBasisSize, primme->maxBasisSize+primme->maxBlockSize-1, NULL, primme->nLocal, primme->numOrthoConst+1, primme->nLocal, NULL, 0.0, NULL, 0, primme); } /*----------------------------------------------------------------------*/ /* Determine workspace required by solve_correction and its children */ /*----------------------------------------------------------------------*/ solveCorSize = solve_correction_dprimme(NULL, NULL, NULL, NULL, NULL, NULL, NULL, maxEvecsSize, 0, NULL, NULL, NULL, NULL, primme->maxBasisSize, NULL, NULL, primme->maxBlockSize, 1.0, 0.0, 1.0, NULL, NULL, 0, primme); /*----------------------------------------------------------------------*/ /* Workspace is reused in many functions. Allocate the max needed by any*/ /*----------------------------------------------------------------------*/ realWorkSize = Num_imax_primme(8, /* Workspace needed by init_basis */ Num_imax_primme(3, maxEvecsSize*primme->numOrthoConst, maxEvecsSize, orthoSize), /* Workspace needed by solve_correction and its child inner_solve */ solveCorSize, /* Workspace needed by function solve_H */ #ifdef ESSL 2*primme->maxBasisSize + primme->maxBasisSize*(primme->maxBasisSize + 1)/2, #else 3*primme->maxBasisSize, #endif /* Workspace needed by function check_convergence */ max(primme->maxBasisSize*primme->maxBlockSize + primme->maxBlockSize, 2*maxEvecsSize*primme->maxBlockSize), /* Workspace needed by function restart*/ primme->restartingParams.maxPrevRetain* primme->restartingParams.maxPrevRetain /* for submatrix of prev hvecs */ + Num_imax_primme(4, primme->maxBasisSize, 3*primme->restartingParams.maxPrevRetain, primme->maxBasisSize*primme->restartingParams.maxPrevRetain, primme->maxBasisSize*primme->maxBasisSize, /* for DTR copying */ maxEvecsSize*primme->numEvals), /*this one is for UDU w/o locking */ /* Workspace needed by function verify_norms */ 2*primme->numEvals, /* space needed by lock vectors (no need w/o lock but doesn't add any) */ (2*primme->maxBasisSize) + Num_imax_primme(3, maxEvecsSize*primme->maxBasisSize, orthoSize, 3*primme->maxBasisSize), /* maximum workspace needed by ortho */ orthoSize); /*----------------------------------------------------------------------*/ /* The following size is always alloced as double */ /*----------------------------------------------------------------------*/ doubleSize = primme->maxBasisSize /* Size of hVals */ + primme->numEvals+primme->maxBasisSize /* Size of prevRitzVals */ + primme->maxBlockSize; /* Size of blockNorms */ /*----------------------------------------------------------------------*/ /* Determine the integer workspace needed */ /*----------------------------------------------------------------------*/ intWorkSize = primme->maxBasisSize /* Size of flag */ + 2*primme->maxBlockSize /* Size of iev and ilev */ + maxEvecsSize /* Size of ipivot */ + 2*primme->maxBasisSize; /* Size of 2 perms in solve_H */ /*----------------------------------------------------------------------*/ /* byte sizes: */ /*----------------------------------------------------------------------*/ rworkByteSize = (dataSize + realWorkSize)*sizeof(double) + doubleSize*sizeof(double); /*----------------------------------------------------------------------*/ /* If only the amount of required workspace is needed return it in bytes*/ /*----------------------------------------------------------------------*/ if (!allocate) { primme->intWorkSize = intWorkSize*sizeof(int); primme->realWorkSize = rworkByteSize; return 1; } /*----------------------------------------------------------------------*/ /* Allocate the required workspace, if the user did not provide enough */ /*----------------------------------------------------------------------*/ if (primme->realWorkSize < rworkByteSize || primme->realWork == NULL) { if (primme->realWork != NULL) { free(primme->realWork); } primme->realWorkSize = rworkByteSize; primme->realWork = (void *) primme_valloc(rworkByteSize,"Real Alloc"); if (primme->printLevel >= 5) fprintf(primme->outputFile, "Allocating real workspace: %ld bytes\n", primme->realWorkSize); } if (primme->intWorkSize < intWorkSize*sizeof(int) || primme->intWork==NULL) { if (primme->intWork != NULL) { free(primme->intWork); } primme->intWorkSize = intWorkSize*sizeof(int); primme->intWork= (int *)primme_valloc(primme->intWorkSize ,"Int Alloc"); if (primme->printLevel >= 5) fprintf(primme->outputFile, "Allocating integer workspace: %d bytes\n", primme->intWorkSize); } if (primme->intWork == NULL || primme->realWork == NULL) { primme_PushErrorMessage(Primme_allocate_workspace, Primme_malloc, 0, __FILE__, __LINE__, primme); return MALLOC_FAILURE; } return 0; /***************************************************************************/ } /* end of allocate workspace
static int init_block_krylov(double *V, double *W, int dv1, int dv2, double *locked, int numLocked, double machEps, double *rwork, int rworkSize, primme_params *primme) { int i; /* Loop variables */ int numNewVectors; /* Number of vectors to be generated */ int ret; /* Return code. */ int ONE = 1; /* Used for passing it by reference in matrixmatvec */ numNewVectors = dv2 - dv1 + 1; /*----------------------------------------------------------------------*/ /* Generate a single Krylov space if there are only a few vectors to be */ /* generated, else generate a block Krylov space with */ /* primme->maxBlockSize as the block Size. */ /*----------------------------------------------------------------------*/ if (numNewVectors <= primme->maxBlockSize) { /* Create and orthogonalize the inital vectors */ Num_larnv_dprimme(2, primme->iseed,primme->nLocal,&V[primme->nLocal*dv1]); ret = ortho_dprimme(V, primme->nLocal, dv1, dv1, locked, primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } /* Generate the remainder of the Krylov space. */ for (i = dv1; i < dv2; i++) { (*primme->matrixMatvec) (&V[primme->nLocal*i], &V[primme->nLocal*(i+1)], &ONE, primme); Num_dcopy_dprimme(primme->nLocal, &V[primme->nLocal*(i+1)], 1, &W[primme->nLocal*i], 1); ret = ortho_dprimme(V, primme->nLocal, i+1, i+1, locked, primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } } primme->stats.numMatvecs += dv2-dv1; update_W_dprimme(V, W, dv2, 1, primme); } else { /*----------------------------------------------------------------------*/ /* Generate the initial vectors. */ /*----------------------------------------------------------------------*/ Num_larnv_dprimme(2, primme->iseed, primme->nLocal*primme->maxBlockSize, &V[primme->nLocal*dv1]); ret = ortho_dprimme(V, primme->nLocal, dv1, dv1+primme->maxBlockSize-1, locked, primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); /* Generate the remaining vectors in the sequence */ for (i = dv1+primme->maxBlockSize; i <= dv2; i++) { (*primme->matrixMatvec)(&V[primme->nLocal*(i-primme->maxBlockSize)], &V[primme->nLocal*i], &ONE, primme); Num_dcopy_dprimme(primme->nLocal, &V[primme->nLocal*i], 1, &W[primme->nLocal*(i-primme->maxBlockSize)], 1); ret = ortho_dprimme(V, primme->nLocal, i, i, locked, primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } } primme->stats.numMatvecs += dv2-(dv1+primme->maxBlockSize)+1; update_W_dprimme(V, W, dv2-primme->maxBlockSize+1, primme->maxBlockSize, primme); } return 0; }
int init_basis_dprimme(double *V, double *W, double *evecs, double *evecsHat, double *M, double *UDU, int *ipivot, double machEps, double *rwork, int rworkSize, int *basisSize, int *nextGuess, int *numGuesses, double *timeForMV, primme_params *primme) { int ret; /* Return value */ int currentSize; /*-----------------------------------------------------------------------*/ /* Orthogonalize the orthogonalization constraints provided by the user. */ /* If a preconditioner is given and inner iterations are to be */ /* performed, then initialize M. */ /*-----------------------------------------------------------------------*/ if (primme->numOrthoConst > 0) { ret = ortho_dprimme(evecs, primme->nLocal, 0, primme->numOrthoConst - 1, NULL, 0, 0, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } /* Initialize evecsHat, M, and its factorization UDU,ipivot. This */ /* allows the orthogonalization constraints to be included in the */ /* projector (I-QQ'). Only needed if there is preconditioning, and */ /* JDqmr inner iterations with a right, skew projector. Only in */ /* that case, is UDU not NULL */ if (UDU != NULL) { (*primme->applyPreconditioner) (evecs, evecsHat, &primme->numOrthoConst, primme); primme->stats.numPreconds += primme->numOrthoConst; update_projection_dprimme(evecs, evecsHat, M, 0, primme->numOrthoConst+primme->numEvals, primme->numOrthoConst, rwork, primme); ret = UDUDecompose_dprimme(M, UDU, ipivot, primme->numOrthoConst, rwork, rworkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ududecompose, ret, __FILE__, __LINE__, primme); return UDUDECOMPOSE_FAILURE; } } /* if evecsHat and M=evecs'evecsHat, UDU are needed */ } /* if numOrthoCont >0 */ /*-----------------------------------------------------------------------*/ /* No locking */ /*-----------------------------------------------------------------------*/ if (!primme->locking) { /* Handle case when no initial guesses are provided by the user */ if (primme->initSize == 0) { ret = init_block_krylov(V, W, 0, primme->minRestartSize - 1, evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov, ret, __FILE__, __LINE__, primme); return INIT_BLOCK_KRYLOV_FAILURE; } *basisSize = primme->minRestartSize; } else { /* Handle case when some or all initial guesses are provided by */ /* the user */ /* Copy over the initial guesses provided by the user */ Num_dcopy_dprimme(primme->nLocal*primme->initSize, &evecs[primme->numOrthoConst*primme->nLocal], 1, V, 1); /* Orthonormalize the guesses provided by the user */ ret = ortho_dprimme(V, primme->nLocal, 0, primme->initSize-1, evecs, primme->nLocal, primme->numOrthoConst, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } update_W_dprimme(V, W, 0, primme->initSize, primme); /* An insufficient number of initial guesses were provided by */ /* the user. Generate a block Krylov space to fill the */ /* remaining vacancies. */ if (primme->initSize < primme->minRestartSize) { ret = init_block_krylov(V, W, primme->initSize, primme->minRestartSize - 1, evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov, ret, __FILE__, __LINE__, primme); return INIT_KRYLOV_FAILURE; } *basisSize = primme->minRestartSize; } else { *basisSize = primme->initSize; } } *numGuesses = 0; *nextGuess = 0; } else { /*-----------------------------------------------------------------------*/ /* Locking */ /*-----------------------------------------------------------------------*/ *numGuesses = primme->initSize; *nextGuess = primme->numOrthoConst; /* If some initial guesses are available, copy them to the basis */ /* and orthogonalize them against themselves and the orthogonalization */ /* constraints. */ if (primme->initSize > 0) { currentSize = min(primme->initSize, primme->minRestartSize); Num_dcopy_dprimme(primme->nLocal*currentSize, &evecs[primme->numOrthoConst*primme->nLocal], 1, V, 1); ret = ortho_dprimme(V, primme->nLocal, 0, currentSize-1, evecs, primme->nLocal, primme->numOrthoConst, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } update_W_dprimme(V, W, 0, currentSize, primme); *numGuesses = *numGuesses - currentSize; *nextGuess = *nextGuess + currentSize; } else { currentSize = 0; } /* If an insufficient number of guesses was provided, then fill */ /* the remaining vacancies with a block Krylov space. */ if (currentSize < primme->minRestartSize) { ret = init_block_krylov(V, W, currentSize, primme->minRestartSize - 1, evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov, ret, __FILE__, __LINE__, primme); return INIT_BLOCK_KRYLOV_FAILURE; } } *basisSize = primme->minRestartSize; } /* ----------------------------------------------------------- */ /* If time measurements are needed, waste one MV + one Precond */ /* Put dummy results in the first open space of W (currentSize)*/ /* ----------------------------------------------------------- */ if (primme->dynamicMethodSwitch) { currentSize = primme->nLocal*(*basisSize); ret = 1; *timeForMV = primme_wTimer(0); (*primme->matrixMatvec)(V, &W[currentSize], &ret, primme); *timeForMV = primme_wTimer(0) - *timeForMV; primme->stats.numMatvecs += 1; } return 0; }