static int init_block_krylov(double *V, double *W, int dv1, int dv2, double *locked, int numLocked, double machEps, double *rwork, int rworkSize, primme_params *primme) { int i; /* Loop variables */ int numNewVectors; /* Number of vectors to be generated */ int ret; /* Return code. */ int ONE = 1; /* Used for passing it by reference in matrixmatvec */ numNewVectors = dv2 - dv1 + 1; /*----------------------------------------------------------------------*/ /* Generate a single Krylov space if there are only a few vectors to be */ /* generated, else generate a block Krylov space with */ /* primme->maxBlockSize as the block Size. */ /*----------------------------------------------------------------------*/ if (numNewVectors <= primme->maxBlockSize) { /* Create and orthogonalize the inital vectors */ Num_larnv_dprimme(2, primme->iseed,primme->nLocal,&V[primme->nLocal*dv1]); ret = ortho_dprimme(V, primme->nLocal, dv1, dv1, locked, primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } /* Generate the remainder of the Krylov space. */ for (i = dv1; i < dv2; i++) { (*primme->matrixMatvec) (&V[primme->nLocal*i], &V[primme->nLocal*(i+1)], &ONE, primme); Num_dcopy_dprimme(primme->nLocal, &V[primme->nLocal*(i+1)], 1, &W[primme->nLocal*i], 1); ret = ortho_dprimme(V, primme->nLocal, i+1, i+1, locked, primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } } primme->stats.numMatvecs += dv2-dv1; update_W_dprimme(V, W, dv2, 1, primme); } else { /*----------------------------------------------------------------------*/ /* Generate the initial vectors. */ /*----------------------------------------------------------------------*/ Num_larnv_dprimme(2, primme->iseed, primme->nLocal*primme->maxBlockSize, &V[primme->nLocal*dv1]); ret = ortho_dprimme(V, primme->nLocal, dv1, dv1+primme->maxBlockSize-1, locked, primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); /* Generate the remaining vectors in the sequence */ for (i = dv1+primme->maxBlockSize; i <= dv2; i++) { (*primme->matrixMatvec)(&V[primme->nLocal*(i-primme->maxBlockSize)], &V[primme->nLocal*i], &ONE, primme); Num_dcopy_dprimme(primme->nLocal, &V[primme->nLocal*i], 1, &W[primme->nLocal*(i-primme->maxBlockSize)], 1); ret = ortho_dprimme(V, primme->nLocal, i, i, locked, primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } } primme->stats.numMatvecs += dv2-(dv1+primme->maxBlockSize)+1; update_W_dprimme(V, W, dv2-primme->maxBlockSize+1, primme->maxBlockSize, primme); } return 0; }
static int allocate_workspace(primme_params *primme, int allocate) { long int realWorkSize; /* Size of real work space. */ long int rworkByteSize; /* Size of all real data in bytes */ int dataSize; /* Number of double positions allocated, excluding */ /* doubles (see doubleSize below) and work space. */ int doubleSize; /* Number of doubles allocated exclusively to the */ /* double arrays: hVals, prevRitzVals, blockNorms */ int maxEvecsSize; /* Maximum number of vectors in evecs and evecsHat */ int intWorkSize; /* Size of integer work space in bytes */ int orthoSize; /* Amount of work space required by ortho routine */ int solveCorSize; /* work space for solve_correction and inner_solve */ maxEvecsSize = primme->numOrthoConst + primme->numEvals; /* first determine real workspace */ /*----------------------------------------------------------------------*/ /* Compute the memory required by the main iteration data structures */ /*----------------------------------------------------------------------*/ dataSize = primme->nLocal*primme->maxBasisSize /* Size of V */ + primme->nLocal*primme->maxBasisSize /* Size of W */ + primme->maxBasisSize*primme->maxBasisSize /* Size of H */ + primme->maxBasisSize*primme->maxBasisSize /* Size of hVecs */ + primme->restartingParams.maxPrevRetain*primme->maxBasisSize; /* size of prevHVecs */ /*----------------------------------------------------------------------*/ /* Add also memory needed for JD skew projectors */ /*----------------------------------------------------------------------*/ if ( (primme->correctionParams.precondition && primme->correctionParams.maxInnerIterations != 0 && primme->correctionParams.projectors.RightQ && primme->correctionParams.projectors.SkewQ ) ) { dataSize = dataSize + + primme->nLocal*maxEvecsSize /* Size of evecsHat */ + maxEvecsSize*maxEvecsSize /* Size of M */ + maxEvecsSize*maxEvecsSize; /* Size of UDU */ } /*----------------------------------------------------------------------*/ /* Determine orthogalization workspace with and without locking. */ /*----------------------------------------------------------------------*/ if (primme->locking) { orthoSize = ortho_dprimme(NULL, primme->nLocal, primme->maxBasisSize, primme->maxBasisSize+primme->maxBlockSize-1, NULL, primme->nLocal, maxEvecsSize, primme->nLocal, NULL, 0.0, NULL, 0, primme); } else { orthoSize = ortho_dprimme(NULL, primme->nLocal, primme->maxBasisSize, primme->maxBasisSize+primme->maxBlockSize-1, NULL, primme->nLocal, primme->numOrthoConst+1, primme->nLocal, NULL, 0.0, NULL, 0, primme); } /*----------------------------------------------------------------------*/ /* Determine workspace required by solve_correction and its children */ /*----------------------------------------------------------------------*/ solveCorSize = solve_correction_dprimme(NULL, NULL, NULL, NULL, NULL, NULL, NULL, maxEvecsSize, 0, NULL, NULL, NULL, NULL, primme->maxBasisSize, NULL, NULL, primme->maxBlockSize, 1.0, 0.0, 1.0, NULL, NULL, 0, primme); /*----------------------------------------------------------------------*/ /* Workspace is reused in many functions. Allocate the max needed by any*/ /*----------------------------------------------------------------------*/ realWorkSize = Num_imax_primme(8, /* Workspace needed by init_basis */ Num_imax_primme(3, maxEvecsSize*primme->numOrthoConst, maxEvecsSize, orthoSize), /* Workspace needed by solve_correction and its child inner_solve */ solveCorSize, /* Workspace needed by function solve_H */ #ifdef ESSL 2*primme->maxBasisSize + primme->maxBasisSize*(primme->maxBasisSize + 1)/2, #else 3*primme->maxBasisSize, #endif /* Workspace needed by function check_convergence */ max(primme->maxBasisSize*primme->maxBlockSize + primme->maxBlockSize, 2*maxEvecsSize*primme->maxBlockSize), /* Workspace needed by function restart*/ primme->restartingParams.maxPrevRetain* primme->restartingParams.maxPrevRetain /* for submatrix of prev hvecs */ + Num_imax_primme(4, primme->maxBasisSize, 3*primme->restartingParams.maxPrevRetain, primme->maxBasisSize*primme->restartingParams.maxPrevRetain, primme->maxBasisSize*primme->maxBasisSize, /* for DTR copying */ maxEvecsSize*primme->numEvals), /*this one is for UDU w/o locking */ /* Workspace needed by function verify_norms */ 2*primme->numEvals, /* space needed by lock vectors (no need w/o lock but doesn't add any) */ (2*primme->maxBasisSize) + Num_imax_primme(3, maxEvecsSize*primme->maxBasisSize, orthoSize, 3*primme->maxBasisSize), /* maximum workspace needed by ortho */ orthoSize); /*----------------------------------------------------------------------*/ /* The following size is always alloced as double */ /*----------------------------------------------------------------------*/ doubleSize = primme->maxBasisSize /* Size of hVals */ + primme->numEvals+primme->maxBasisSize /* Size of prevRitzVals */ + primme->maxBlockSize; /* Size of blockNorms */ /*----------------------------------------------------------------------*/ /* Determine the integer workspace needed */ /*----------------------------------------------------------------------*/ intWorkSize = primme->maxBasisSize /* Size of flag */ + 2*primme->maxBlockSize /* Size of iev and ilev */ + maxEvecsSize /* Size of ipivot */ + 2*primme->maxBasisSize; /* Size of 2 perms in solve_H */ /*----------------------------------------------------------------------*/ /* byte sizes: */ /*----------------------------------------------------------------------*/ rworkByteSize = (dataSize + realWorkSize)*sizeof(double) + doubleSize*sizeof(double); /*----------------------------------------------------------------------*/ /* If only the amount of required workspace is needed return it in bytes*/ /*----------------------------------------------------------------------*/ if (!allocate) { primme->intWorkSize = intWorkSize*sizeof(int); primme->realWorkSize = rworkByteSize; return 1; } /*----------------------------------------------------------------------*/ /* Allocate the required workspace, if the user did not provide enough */ /*----------------------------------------------------------------------*/ if (primme->realWorkSize < rworkByteSize || primme->realWork == NULL) { if (primme->realWork != NULL) { free(primme->realWork); } primme->realWorkSize = rworkByteSize; primme->realWork = (void *) primme_valloc(rworkByteSize,"Real Alloc"); if (primme->printLevel >= 5) fprintf(primme->outputFile, "Allocating real workspace: %ld bytes\n", primme->realWorkSize); } if (primme->intWorkSize < intWorkSize*sizeof(int) || primme->intWork==NULL) { if (primme->intWork != NULL) { free(primme->intWork); } primme->intWorkSize = intWorkSize*sizeof(int); primme->intWork= (int *)primme_valloc(primme->intWorkSize ,"Int Alloc"); if (primme->printLevel >= 5) fprintf(primme->outputFile, "Allocating integer workspace: %d bytes\n", primme->intWorkSize); } if (primme->intWork == NULL || primme->realWork == NULL) { primme_PushErrorMessage(Primme_allocate_workspace, Primme_malloc, 0, __FILE__, __LINE__, primme); return MALLOC_FAILURE; } return 0; /***************************************************************************/ } /* end of allocate workspace
int init_basis_dprimme(double *V, double *W, double *evecs, double *evecsHat, double *M, double *UDU, int *ipivot, double machEps, double *rwork, int rworkSize, int *basisSize, int *nextGuess, int *numGuesses, double *timeForMV, primme_params *primme) { int ret; /* Return value */ int currentSize; /*-----------------------------------------------------------------------*/ /* Orthogonalize the orthogonalization constraints provided by the user. */ /* If a preconditioner is given and inner iterations are to be */ /* performed, then initialize M. */ /*-----------------------------------------------------------------------*/ if (primme->numOrthoConst > 0) { ret = ortho_dprimme(evecs, primme->nLocal, 0, primme->numOrthoConst - 1, NULL, 0, 0, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } /* Initialize evecsHat, M, and its factorization UDU,ipivot. This */ /* allows the orthogonalization constraints to be included in the */ /* projector (I-QQ'). Only needed if there is preconditioning, and */ /* JDqmr inner iterations with a right, skew projector. Only in */ /* that case, is UDU not NULL */ if (UDU != NULL) { (*primme->applyPreconditioner) (evecs, evecsHat, &primme->numOrthoConst, primme); primme->stats.numPreconds += primme->numOrthoConst; update_projection_dprimme(evecs, evecsHat, M, 0, primme->numOrthoConst+primme->numEvals, primme->numOrthoConst, rwork, primme); ret = UDUDecompose_dprimme(M, UDU, ipivot, primme->numOrthoConst, rwork, rworkSize, primme); if (ret != 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ududecompose, ret, __FILE__, __LINE__, primme); return UDUDECOMPOSE_FAILURE; } } /* if evecsHat and M=evecs'evecsHat, UDU are needed */ } /* if numOrthoCont >0 */ /*-----------------------------------------------------------------------*/ /* No locking */ /*-----------------------------------------------------------------------*/ if (!primme->locking) { /* Handle case when no initial guesses are provided by the user */ if (primme->initSize == 0) { ret = init_block_krylov(V, W, 0, primme->minRestartSize - 1, evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov, ret, __FILE__, __LINE__, primme); return INIT_BLOCK_KRYLOV_FAILURE; } *basisSize = primme->minRestartSize; } else { /* Handle case when some or all initial guesses are provided by */ /* the user */ /* Copy over the initial guesses provided by the user */ Num_dcopy_dprimme(primme->nLocal*primme->initSize, &evecs[primme->numOrthoConst*primme->nLocal], 1, V, 1); /* Orthonormalize the guesses provided by the user */ ret = ortho_dprimme(V, primme->nLocal, 0, primme->initSize-1, evecs, primme->nLocal, primme->numOrthoConst, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } update_W_dprimme(V, W, 0, primme->initSize, primme); /* An insufficient number of initial guesses were provided by */ /* the user. Generate a block Krylov space to fill the */ /* remaining vacancies. */ if (primme->initSize < primme->minRestartSize) { ret = init_block_krylov(V, W, primme->initSize, primme->minRestartSize - 1, evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme); /* Push an error message onto the stack trace if an error occured */ if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov, ret, __FILE__, __LINE__, primme); return INIT_KRYLOV_FAILURE; } *basisSize = primme->minRestartSize; } else { *basisSize = primme->initSize; } } *numGuesses = 0; *nextGuess = 0; } else { /*-----------------------------------------------------------------------*/ /* Locking */ /*-----------------------------------------------------------------------*/ *numGuesses = primme->initSize; *nextGuess = primme->numOrthoConst; /* If some initial guesses are available, copy them to the basis */ /* and orthogonalize them against themselves and the orthogonalization */ /* constraints. */ if (primme->initSize > 0) { currentSize = min(primme->initSize, primme->minRestartSize); Num_dcopy_dprimme(primme->nLocal*currentSize, &evecs[primme->numOrthoConst*primme->nLocal], 1, V, 1); ret = ortho_dprimme(V, primme->nLocal, 0, currentSize-1, evecs, primme->nLocal, primme->numOrthoConst, primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret, __FILE__, __LINE__, primme); return ORTHO_FAILURE; } update_W_dprimme(V, W, 0, currentSize, primme); *numGuesses = *numGuesses - currentSize; *nextGuess = *nextGuess + currentSize; } else { currentSize = 0; } /* If an insufficient number of guesses was provided, then fill */ /* the remaining vacancies with a block Krylov space. */ if (currentSize < primme->minRestartSize) { ret = init_block_krylov(V, W, currentSize, primme->minRestartSize - 1, evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme); if (ret < 0) { primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov, ret, __FILE__, __LINE__, primme); return INIT_BLOCK_KRYLOV_FAILURE; } } *basisSize = primme->minRestartSize; } /* ----------------------------------------------------------- */ /* If time measurements are needed, waste one MV + one Precond */ /* Put dummy results in the first open space of W (currentSize)*/ /* ----------------------------------------------------------- */ if (primme->dynamicMethodSwitch) { currentSize = primme->nLocal*(*basisSize); ret = 1; *timeForMV = primme_wTimer(0); (*primme->matrixMatvec)(V, &W[currentSize], &ret, primme); *timeForMV = primme_wTimer(0) - *timeForMV; primme->stats.numMatvecs += 1; } return 0; }