int restart_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *H, Complex_Z *hVecs, double *hVals, int *flags, int *iev, Complex_Z *evecs, Complex_Z *evecsHat, Complex_Z *M, Complex_Z *UDU, int *ipivot, int basisSize, int numConverged, int *numConvergedStored, int numLocked, int numGuesses, Complex_Z *previousHVecs, int numPrevRetained, double machEps, Complex_Z *rwork, int rworkSize, primme_params *primme) { int numFree; /* The number of basis vectors to be left free */ int numPacked; /* The number of coefficient vectors moved to the */ /* end of the hVecs array. */ int restartSize; /* The number of vectors to restart with */ int indexOfPreviousVecs=0; /* Position within hVecs array the previous */ /* coefficient vectors will be stored */ int i, n, eStart; /* various variables */ int ret; /* Return value */ numPacked = 0; /* --------------------------------------------------------------------- */ /* If dynamic thick restarting is to be used, then determine the minimum */ /* number of free spaces to be maintained and call the DTR routine. */ /* The DTR routine will determine how many coefficient vectors from the */ /* left and right of H-spectrum to retain at restart. If DTR is not used */ /* then set the restart size to the minimum restart size. */ /* --------------------------------------------------------------------- */ if (primme->restartingParams.scheme == primme_dtr) { numFree = numPrevRetained+max(3, primme->maxBlockSize); restartSize = dtr(numLocked, hVecs, hVals, flags, basisSize, numFree, iev, rwork, primme); } else { restartSize = min(basisSize, primme->minRestartSize); } /* ----------------------------------------------------------------------- */ /* If locking is engaged, then swap coefficient vectors corresponding to */ /* converged Ritz vectors to the end of the hVecs(:, restartSize) subarray.*/ /* This allows the converged Ritz vectors to be stored contiguously in */ /* memory after restart. This significantly reduces the amount of data */ /* movement the locking routine would have to perform otherwise. */ /* The following function also covers some limit cases where restartSize */ /* plus 'to be locked' and previous Ritz vectors may exceed the basisSize */ /* ----------------------------------------------------------------------- */ if (primme->locking) { numPacked = pack_converged_coefficients(&restartSize, basisSize, &numPrevRetained, numLocked, numGuesses, hVecs, hVals, flags, primme); } /* ----------------------------------------------------------------------- */ /* Restarting with a small number of coefficient vectors from the previous */ /* iteration can be retained to accelerate convergence. The previous */ /* coefficient vectors must be combined with the current coefficient */ /* vectors by first orthogonalizing the previous ones versus the current */ /* restartSize ones. The orthogonalized previous vectors are then */ /* inserted into the hVecs array at hVecs(:,indexOfPreviousVecs). */ /* ----------------------------------------------------------------------- */ if (numPrevRetained > 0) { indexOfPreviousVecs = combine_retained_vectors(hVals, flags, hVecs, basisSize, &restartSize, numPacked, previousHVecs, &numPrevRetained, machEps, rwork, primme); } /* -------------------------------------------------------- */ /* Restart V by replacing it with the current Ritz vectors. */ /* -------------------------------------------------------- */ restart_X(V, hVecs, primme->nLocal, basisSize, restartSize, rwork,rworkSize); /* ------------------------------------------------------------ */ /* Restart W by replacing it with W times the eigenvectors of H */ /* ------------------------------------------------------------ */ restart_X(W, hVecs, primme->nLocal, basisSize, restartSize, rwork,rworkSize); /* ---------------------------------------------------------------- */ /* Because we have replaced V by the Ritz vectors, V'*A*V should be */ /* diagonal with the Ritz values on the diagonal. The eigenvectors */ /* of the new matrix V'*A*V become the standard basis vectors. */ /* ---------------------------------------------------------------- */ ret = restart_H(H, hVecs, hVals, restartSize, basisSize, previousHVecs, numPrevRetained, indexOfPreviousVecs, rworkSize, rwork, primme); if (ret != 0) { primme_PushErrorMessage(Primme_restart, Primme_restart_h, ret, __FILE__, __LINE__, primme); return RESTART_H_FAILURE; } /* --------------------------------------------------------------------- */ /* If the user requires (I-QQ') projectors in JDQMR without locking, */ /* the converged eigenvectors are copied temporarily to evecs. There */ /* they stay locked for use in (I-QQ') and (I-K^{-1}Q () Q') projectors.*/ /* NOTE THIS IS NOT LOCKING! The Ritz vectors remain in the basis, and */ /* they will overwrite evecs at the end. */ /* We recommend against this type of usage. It's better to use locking. */ /* --------------------------------------------------------------------- */ /* Andreas NOTE: is done inefficiently for the moment. We should only */ /* add the recently converged. But we need to differentiate them */ /* from flags... */ if (!primme->locking && primme->correctionParams.maxInnerIterations != 0 && numConverged > 0 && (primme->correctionParams.projectors.LeftQ || primme->correctionParams.projectors.RightQ ) ) { n = primme->nLocal; *numConvergedStored = 0; eStart = primme->numOrthoConst; for (i=0;i<primme->numEvals;i++) { if (flags[i] == CONVERGED) { if (*numConvergedStored < numConverged) { Num_zcopy_zprimme(n, &V[i*n], 1, &evecs[(eStart+*numConvergedStored)*n], 1); (*numConvergedStored)++; } } /* if converged */ } /* for */ if (*numConvergedStored != numConverged) { if (primme->printLevel >= 1 && primme->procID == 0) { fprintf(primme->outputFile, "Flags and converged eigenpairs do not correspond %d %d\n", numConverged, *numConvergedStored); } return PSEUDOLOCK_FAILURE; } /* Update also the M = K^{-1}evecs and its udu factorization if needed */ if (UDU != NULL) { apply_preconditioner_block(&evecs[eStart*n], &evecsHat[eStart*n], numConverged, primme ); /* rwork must be maxEvecsSize*numEvals! */ update_projection_zprimme(evecs, evecsHat, M, eStart*n, primme->numOrthoConst+primme->numEvals, numConverged, rwork, primme); ret = UDUDecompose_zprimme(M, UDU, ipivot, eStart+numConverged, rwork, rworkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_lock_vectors,Primme_ududecompose,ret, __FILE__, __LINE__, primme); return UDUDECOMPOSE_FAILURE; } } /* if UDU factorization is needed */ } /* if this pseudo locking should take place */ return restartSize; }
static int allocate_workspace(primme_params *primme, int allocate) { long int realWorkSize; /* Size of real work space. */ long int rworkByteSize; /* Size of all real data in bytes */ int dataSize; /* Number of Complex_Z positions allocated, excluding */ /* doubles (see doubleSize below) and work space. */ int doubleSize=0; /* Number of doubles allocated exclusively to the */ /* double arrays: hVals, prevRitzVals, blockNorms */ int maxEvecsSize; /* Maximum number of vectors in evecs and evecsHat */ int intWorkSize; /* Size of integer work space in bytes */ int initSize; /* Amount of work space required by init routine */ int orthoSize; /* Amount of work space required by ortho routine */ int convSize; /* Amount of work space required by converg. routine */ int restartSize; /* Amount of work space required by restart routine */ int solveCorSize; /* work space for solve_correction and inner_solve */ int solveHSize; /* work space for solve_H */ int mainSize; /* work space for main_iter */ Complex_Z *evecsHat=NULL;/* not NULL when evecsHat will be used */ Complex_Z t; /* dummy variable */ maxEvecsSize = primme->numOrthoConst + primme->numEvals; /* first determine real workspace */ /*----------------------------------------------------------------------*/ /* Compute the memory required by the main iteration data structures */ /*----------------------------------------------------------------------*/ dataSize = primme->nLocal*primme->maxBasisSize /* Size of V */ + primme->nLocal*primme->maxBasisSize /* Size of W */ + primme->maxBasisSize*primme->maxBasisSize /* Size of H */ + primme->maxBasisSize*primme->maxBasisSize /* Size of hVecs */ + primme->restartingParams.maxPrevRetain*primme->maxBasisSize; /* size of prevHVecs */ /*----------------------------------------------------------------------*/ /* Add memory for Harmonic or Refined projection */ /*----------------------------------------------------------------------*/ if (primme->projectionParams.projection == primme_proj_harmonic || primme->projectionParams.projection == primme_proj_refined) { dataSize += primme->nLocal*primme->maxBasisSize /* Size of Q */ + primme->maxBasisSize*primme->maxBasisSize /* Size of R */ + primme->maxBasisSize*primme->maxBasisSize; /* Size of hU */ doubleSize += primme->maxBasisSize; /* Size of hSVals */ } /*----------------------------------------------------------------------*/ /* Add also memory needed for JD skew projectors */ /*----------------------------------------------------------------------*/ if ( (primme->correctionParams.precondition && primme->correctionParams.maxInnerIterations != 0 && primme->correctionParams.projectors.RightQ && primme->correctionParams.projectors.SkewQ ) ) { dataSize = dataSize + + primme->nLocal*maxEvecsSize /* Size of evecsHat */ + maxEvecsSize*maxEvecsSize /* Size of M */ + maxEvecsSize*maxEvecsSize; /* Size of UDU */ evecsHat = &t; /* set not NULL */ } /*----------------------------------------------------------------------*/ /* Determine workspace required by init and its children */ /*----------------------------------------------------------------------*/ initSize = init_basis_zprimme(NULL, primme->nLocal, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, &primme->maxBasisSize, NULL, NULL, NULL, primme); /*----------------------------------------------------------------------*/ /* Determine orthogalization workspace with and without locking. */ /*----------------------------------------------------------------------*/ if (primme->locking) { orthoSize = ortho_zprimme(NULL, 0, NULL, 0, primme->maxBasisSize, primme->maxBasisSize+primme->maxBlockSize-1, NULL, primme->nLocal, maxEvecsSize, primme->nLocal, NULL, 0.0, NULL, 0, primme); } else { orthoSize = ortho_zprimme(NULL, 0, NULL, 0, primme->maxBasisSize, primme->maxBasisSize+primme->maxBlockSize-1, NULL, primme->nLocal, primme->numOrthoConst+1, primme->nLocal, NULL, 0.0, NULL, 0, primme); } /*----------------------------------------------------------------------*/ /* Determine workspace required by solve_H and its children */ /*----------------------------------------------------------------------*/ solveHSize = solve_H_zprimme(NULL, primme->maxBasisSize, 0, NULL, 0, NULL, 0, NULL, 0, NULL, NULL, 0, 0, NULL, NULL, primme); /*----------------------------------------------------------------------*/ /* Determine workspace required by solve_correction and its children */ /*----------------------------------------------------------------------*/ solveCorSize = solve_correction_zprimme(NULL, NULL, NULL, NULL, NULL, NULL, NULL, maxEvecsSize, 0, NULL, NULL, NULL, NULL, primme->maxBasisSize, NULL, NULL, primme->maxBlockSize, 1.0, 0.0, 1.0, NULL, NULL, 0, primme); /*----------------------------------------------------------------------*/ /* Determine workspace required by solve_H and its children */ /*----------------------------------------------------------------------*/ convSize = check_convergence_zprimme(NULL, primme->nLocal, 0, &t, 0, NULL, primme->numEvals, 0, 0, primme->maxBasisSize, NULL, NULL, NULL, 0.0, NULL, 0, NULL, primme); /*----------------------------------------------------------------------*/ /* Determine workspace required by restarting and its children */ /*----------------------------------------------------------------------*/ restartSize = restart_zprimme(NULL, NULL, primme->nLocal, primme->maxBasisSize, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, evecsHat, 0, NULL, 0, NULL, 0, NULL, NULL, NULL, &primme->numEvals, NULL, &primme->restartingParams.maxPrevRetain, primme->maxBasisSize, primme->initSize, NULL, &primme->maxBasisSize, NULL, primme->maxBasisSize, NULL, 0, NULL, 0, NULL, 0, 0, NULL, 0, 0, NULL, NULL, 0.0, NULL, 0, NULL, primme); /*----------------------------------------------------------------------*/ /* Determine workspace required by main_iter and its children */ /*----------------------------------------------------------------------*/ mainSize = max( update_projection_zprimme(NULL, 0, NULL, 0, NULL, 0, 0, 0, primme->maxBasisSize, NULL, 0, primme), prepare_candidates_zprimme(NULL, NULL, primme->nLocal, primme->maxBasisSize, 0, NULL, NULL, NULL, 0, NULL, NULL, primme->numEvals, primme->numEvals, NULL, 0, primme->maxBlockSize, NULL, primme->numEvals, NULL, NULL, 0.0, NULL, &primme->maxBlockSize, NULL, NULL, 0, NULL, primme)); /*----------------------------------------------------------------------*/ /* Workspace is reused in many functions. Allocate the max needed by any*/ /*----------------------------------------------------------------------*/ realWorkSize = Num_imax_primme(8, /* Workspace needed by init_basis */ initSize, /* Workspace needed by solve_correction and its child inner_solve */ solveCorSize, /* Workspace needed by function solve_H */ solveHSize, /* Workspace needed by function check_convergence */ convSize, /* Workspace needed by function restart*/ restartSize, /* Workspace needed by function verify_norms */ 2*primme->numEvals, /* maximum workspace needed by ortho */ orthoSize, /* maximum workspace for main */ mainSize); /*----------------------------------------------------------------------*/ /* The following size is always allocated as double */ /*----------------------------------------------------------------------*/ doubleSize += 4 /* Padding */ + primme->maxBasisSize /* Size of hVals */ + primme->numEvals+primme->maxBasisSize /* Size of prevRitzVals */ + primme->maxBlockSize; /* Size of blockNorms */ /*----------------------------------------------------------------------*/ /* Determine the integer workspace needed */ /*----------------------------------------------------------------------*/ intWorkSize = primme->maxBasisSize /* Size of flag */ + 2*primme->maxBlockSize /* Size of iev and ilev */ + maxEvecsSize /* Size of ipivot */ + 5*primme->maxBasisSize; /* Auxiliary permutation arrays */ /*----------------------------------------------------------------------*/ /* byte sizes: */ /*----------------------------------------------------------------------*/ rworkByteSize = (dataSize + realWorkSize)*sizeof(Complex_Z) + doubleSize*sizeof(double); /*----------------------------------------------------------------------*/ /* If only the amount of required workspace is needed return it in bytes*/ /*----------------------------------------------------------------------*/ if (!allocate) { primme->intWorkSize = intWorkSize*sizeof(int); primme->realWorkSize = rworkByteSize; return 1; } /*----------------------------------------------------------------------*/ /* Allocate the required workspace, if the user did not provide enough */ /*----------------------------------------------------------------------*/ if (primme->realWorkSize < rworkByteSize || primme->realWork == NULL) { if (primme->realWork != NULL) { free(primme->realWork); } primme->realWorkSize = rworkByteSize; primme->realWork = (void *) primme_valloc(rworkByteSize,"Real Alloc"); if (primme->printLevel >= 5) fprintf(primme->outputFile, "Allocating real workspace: %ld bytes\n", primme->realWorkSize); } if (primme->intWorkSize < intWorkSize*(int)sizeof(int) || primme->intWork==NULL) { if (primme->intWork != NULL) { free(primme->intWork); } primme->intWorkSize = intWorkSize*sizeof(int); primme->intWork= (int *)primme_valloc(primme->intWorkSize ,"Int Alloc"); if (primme->printLevel >= 5) fprintf(primme->outputFile, "Allocating integer workspace: %d bytes\n", primme->intWorkSize); } if (primme->intWork == NULL || primme->realWork == NULL) { primme_PushErrorMessage(Primme_allocate_workspace, Primme_malloc, 0, __FILE__, __LINE__, primme); return MALLOC_FAILURE; } return 0; /***************************************************************************/ } /* end of allocate workspace
int init_basis_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *evecs, Complex_Z *evecsHat, Complex_Z *M, Complex_Z *UDU, int *ipivot, double machEps, Complex_Z *rwork, int rworkSize, int *basisSize, int *nextGuess, int *numGuesses, double *timeForMV, primme_params *primme) { int ret; /* Return value */ int currentSize; /*-----------------------------------------------------------------------*/ /* Orthogonalize the orthogonalization constraints provided by the user. */ /* If a preconditioner is given and inner iterations are to be */ /* performed, then initialize M. */ /*-----------------------------------------------------------------------*/ if (primme->numOrthoConst > 0) { ret = ortho_zprimme(evecs, primme->nLocal, 0, primme->numOrthoConst - 1, NULL, 0, 0, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } /* Initialize evecsHat, M, and its factorization UDU,ipivot. This */ /* allows the orthogonalization constraints to be included in the */ /* projector (I-QQ'). Only needed if there is preconditioning, and */ /* JDqmr inner iterations with a right, skew projector. Only in */ /* that case, is UDU not NULL */ if (UDU != NULL) { (*primme->applyPreconditioner) (evecs, evecsHat, &primme->numOrthoConst, primme); primme->stats.numPreconds += primme->numOrthoConst; update_projection_zprimme(evecs, evecsHat, M, 0, primme->numOrthoConst+primme->numEvals, primme->numOrthoConst, rwork, primme); ret = UDUDecompose_zprimme(M, UDU, ipivot, primme->numOrthoConst, rwork, rworkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ududecompose, ret, __FILE__, __LINE__, primme); return UDUDECOMPOSE_FAILURE; } } /* if evecsHat and M=evecs'evecsHat, UDU are needed */ } /* if numOrthoCont >0 */ /*-----------------------------------------------------------------------*/ /* No locking */ /*-----------------------------------------------------------------------*/ if (!primme->locking) { /* Handle case when no initial guesses are provided by the user */ if (primme->initSize == 0) { ret = init_block_krylov(V, W, 0, primme->minRestartSize - 1, evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov, ret, __FILE__, __LINE__, primme); return INIT_BLOCK_KRYLOV_FAILURE; } *basisSize = primme->minRestartSize; } else { /* Handle case when some or all initial guesses are provided by */ /* the user */ /* Copy over the initial guesses provided by the user */ Num_zcopy_zprimme(primme->nLocal*primme->initSize, &evecs[primme->numOrthoConst*primme->nLocal], 1, V, 1); /* Orthonormalize the guesses provided by the user */ ret = ortho_zprimme(V, primme->nLocal, 0, primme->initSize-1, evecs, primme->nLocal, primme->numOrthoConst, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } update_W_zprimme(V, W, 0, primme->initSize, primme); /* An insufficient number of initial guesses were provided by */ /* the user. Generate a block Krylov space to fill the */ /* remaining vacancies. */ if (primme->initSize < primme->minRestartSize) { ret = init_block_krylov(V, W, primme->initSize, primme->minRestartSize - 1, evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov, ret, __FILE__, __LINE__, primme); return INIT_KRYLOV_FAILURE; } *basisSize = primme->minRestartSize; } else { *basisSize = primme->initSize; } } *numGuesses = 0; *nextGuess = 0; } else { /*-----------------------------------------------------------------------*/ /* Locking */ /*-----------------------------------------------------------------------*/ *numGuesses = primme->initSize; *nextGuess = primme->numOrthoConst; /* If some initial guesses are available, copy them to the basis */ /* and orthogonalize them against themselves and the orthogonalization */ /* constraints. */ if (primme->initSize > 0) { currentSize = min(primme->initSize, primme->minRestartSize); Num_zcopy_zprimme(primme->nLocal*currentSize, &evecs[primme->numOrthoConst*primme->nLocal], 1, V, 1); ret = ortho_zprimme(V, primme->nLocal, 0, currentSize-1, evecs, primme->nLocal, primme->numOrthoConst, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } update_W_zprimme(V, W, 0, currentSize, primme); *numGuesses = *numGuesses - currentSize; *nextGuess = *nextGuess + currentSize; } else { currentSize = 0; } /* If an insufficient number of guesses was provided, then fill */ /* the remaining vacancies with a block Krylov space. */ if (currentSize < primme->minRestartSize) { ret = init_block_krylov(V, W, currentSize, primme->minRestartSize - 1, evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov, ret, __FILE__, __LINE__, primme); return INIT_BLOCK_KRYLOV_FAILURE; } } *basisSize = primme->minRestartSize; } /* ----------------------------------------------------------- */ /* If time measurements are needed, waste one MV + one Precond */ /* Put dummy results in the first open space of W (currentSize)*/ /* ----------------------------------------------------------- */ if (primme->dynamicMethodSwitch) { currentSize = primme->nLocal*(*basisSize); ret = 1; *timeForMV = primme_wTimer(0); (*primme->matrixMatvec)(V, &W[currentSize], &ret, primme); *timeForMV = primme_wTimer(0) - *timeForMV; primme->stats.numMatvecs += 1; } return 0; }