Пример #1
0
static int init_block_krylov(double *V, double *W, int dv1, int dv2,
                             double *locked, int numLocked, double machEps, double *rwork,
                             int rworkSize, primme_params *primme) {

    int i;               /* Loop variables */
    int numNewVectors;   /* Number of vectors to be generated */
    int ret;             /* Return code.                      */
    int ONE = 1;         /* Used for passing it by reference in matrixmatvec */

    numNewVectors = dv2 - dv1 + 1;

    /*----------------------------------------------------------------------*/
    /* Generate a single Krylov space if there are only a few vectors to be */
    /* generated, else generate a block Krylov space with                   */
    /* primme->maxBlockSize as the block Size.                              */
    /*----------------------------------------------------------------------*/

    if (numNewVectors <= primme->maxBlockSize) {

        /* Create and orthogonalize the inital vectors */

        Num_larnv_dprimme(2, primme->iseed,primme->nLocal,&V[primme->nLocal*dv1]);
        ret = ortho_dprimme(V, primme->nLocal, dv1, dv1, locked,
                            primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps,
                            rwork, rworkSize, primme);

        if (ret < 0) {
            primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho, ret,
                                    __FILE__, __LINE__, primme);
            return ORTHO_FAILURE;
        }

        /* Generate the remainder of the Krylov space. */

        for (i = dv1; i < dv2; i++) {
            (*primme->matrixMatvec)
            (&V[primme->nLocal*i], &V[primme->nLocal*(i+1)], &ONE, primme);
            Num_dcopy_dprimme(primme->nLocal, &V[primme->nLocal*(i+1)], 1,
                              &W[primme->nLocal*i], 1);
            ret = ortho_dprimme(V, primme->nLocal, i+1, i+1, locked,
                                primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps,
                                rwork, rworkSize, primme);

            if (ret < 0) {
                primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho,
                                        ret, __FILE__, __LINE__, primme);
                return ORTHO_FAILURE;
            }
        }

        primme->stats.numMatvecs += dv2-dv1;
        update_W_dprimme(V, W, dv2, 1, primme);

    }
    else {
        /*----------------------------------------------------------------------*/
        /* Generate the initial vectors.                                        */
        /*----------------------------------------------------------------------*/

        Num_larnv_dprimme(2, primme->iseed, primme->nLocal*primme->maxBlockSize,
                          &V[primme->nLocal*dv1]);
        ret = ortho_dprimme(V, primme->nLocal, dv1,
                            dv1+primme->maxBlockSize-1, locked, primme->nLocal, numLocked,
                            primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme);

        /* Generate the remaining vectors in the sequence */

        for (i = dv1+primme->maxBlockSize; i <= dv2; i++) {
            (*primme->matrixMatvec)(&V[primme->nLocal*(i-primme->maxBlockSize)],
                                    &V[primme->nLocal*i], &ONE, primme);
            Num_dcopy_dprimme(primme->nLocal, &V[primme->nLocal*i], 1,
                              &W[primme->nLocal*(i-primme->maxBlockSize)], 1);

            ret = ortho_dprimme(V, primme->nLocal, i, i, locked,
                                primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps,
                                rwork, rworkSize, primme);

            if (ret < 0) {
                primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho,
                                        ret, __FILE__, __LINE__, primme);
                return ORTHO_FAILURE;
            }

        }

        primme->stats.numMatvecs += dv2-(dv1+primme->maxBlockSize)+1;
        update_W_dprimme(V, W, dv2-primme->maxBlockSize+1, primme->maxBlockSize,
                         primme);

    }

    return 0;
}
Пример #2
0
static int allocate_workspace(primme_params *primme, int allocate) {

   long int realWorkSize;  /* Size of real work space.                  */
   long int rworkByteSize; /* Size of all real data in bytes            */

   int dataSize;     /* Number of double positions allocated, excluding */
                     /* doubles (see doubleSize below) and work space.  */
   int doubleSize;   /* Number of doubles allocated exclusively to the  */
                     /* double arrays: hVals, prevRitzVals, blockNorms  */
   int maxEvecsSize; /* Maximum number of vectors in evecs and evecsHat */
   int intWorkSize;  /* Size of integer work space in bytes             */
   int orthoSize;    /* Amount of work space required by ortho routine  */
   int solveCorSize; /* work space for solve_correction and inner_solve */

   maxEvecsSize = primme->numOrthoConst + primme->numEvals;

   /* first determine real workspace */

   /*----------------------------------------------------------------------*/
   /* Compute the memory required by the main iteration data structures    */
   /*----------------------------------------------------------------------*/

   dataSize = primme->nLocal*primme->maxBasisSize  /* Size of V            */
      + primme->nLocal*primme->maxBasisSize        /* Size of W            */
      + primme->maxBasisSize*primme->maxBasisSize  /* Size of H            */
      + primme->maxBasisSize*primme->maxBasisSize  /* Size of hVecs        */
      + primme->restartingParams.maxPrevRetain*primme->maxBasisSize;
                                                   /* size of prevHVecs    */

   /*----------------------------------------------------------------------*/
   /* Add also memory needed for JD skew projectors                        */
   /*----------------------------------------------------------------------*/
   if ( (primme->correctionParams.precondition && 
         primme->correctionParams.maxInnerIterations != 0 &&
         primme->correctionParams.projectors.RightQ &&
         primme->correctionParams.projectors.SkewQ          ) ) {

      dataSize = dataSize + 
         + primme->nLocal*maxEvecsSize             /* Size of evecsHat     */ 
         + maxEvecsSize*maxEvecsSize               /* Size of M            */
         + maxEvecsSize*maxEvecsSize;              /* Size of UDU          */
   }

   /*----------------------------------------------------------------------*/
   /* Determine orthogalization workspace with and without locking.        */
   /*----------------------------------------------------------------------*/

   if (primme->locking) {
      orthoSize = ortho_dprimme(NULL, primme->nLocal, primme->maxBasisSize,
         primme->maxBasisSize+primme->maxBlockSize-1, NULL, primme->nLocal, 
         maxEvecsSize, primme->nLocal, NULL, 0.0, NULL, 0, primme);
   }
   else {
      orthoSize = ortho_dprimme(NULL, primme->nLocal, primme->maxBasisSize,
         primme->maxBasisSize+primme->maxBlockSize-1, NULL, primme->nLocal, 
         primme->numOrthoConst+1, primme->nLocal, NULL, 0.0, NULL, 0, primme);
   }

   /*----------------------------------------------------------------------*/
   /* Determine workspace required by solve_correction and its children    */
   /*----------------------------------------------------------------------*/

   solveCorSize = solve_correction_dprimme(NULL, NULL, NULL, NULL, NULL, 
                  NULL, NULL, maxEvecsSize, 0, NULL, NULL, NULL, NULL, 
                  primme->maxBasisSize, NULL, NULL, primme->maxBlockSize, 
                  1.0, 0.0, 1.0, NULL, NULL, 0, primme);

   /*----------------------------------------------------------------------*/
   /* Workspace is reused in many functions. Allocate the max needed by any*/
   /*----------------------------------------------------------------------*/
   realWorkSize = Num_imax_primme(8,

      /* Workspace needed by init_basis */
      Num_imax_primme(3, 
         maxEvecsSize*primme->numOrthoConst, maxEvecsSize, orthoSize),

      /* Workspace needed by solve_correction and its child inner_solve */
      solveCorSize, 

      /* Workspace needed by function solve_H */
#ifdef ESSL
      2*primme->maxBasisSize +
         primme->maxBasisSize*(primme->maxBasisSize + 1)/2,
#else
      3*primme->maxBasisSize,
#endif
   
      /* Workspace needed by function check_convergence */ 
      max(primme->maxBasisSize*primme->maxBlockSize + primme->maxBlockSize,
                2*maxEvecsSize*primme->maxBlockSize),

      /* Workspace needed by function restart*/
      primme->restartingParams.maxPrevRetain*
      primme->restartingParams.maxPrevRetain  /* for submatrix of prev hvecs */
      + Num_imax_primme(4, primme->maxBasisSize, 
           3*primme->restartingParams.maxPrevRetain,
           primme->maxBasisSize*primme->restartingParams.maxPrevRetain,
           primme->maxBasisSize*primme->maxBasisSize,     /* for DTR copying */
           maxEvecsSize*primme->numEvals), /*this one is for UDU w/o locking */

      /* Workspace needed by function verify_norms */
      2*primme->numEvals,

      /* space needed by lock vectors (no need w/o lock but doesn't add any) */
      (2*primme->maxBasisSize) + Num_imax_primme(3, 
          maxEvecsSize*primme->maxBasisSize, orthoSize, 3*primme->maxBasisSize),

      /* maximum workspace needed by ortho */ 
      orthoSize);

   /*----------------------------------------------------------------------*/
   /* The following size is always alloced as double                       */
   /*----------------------------------------------------------------------*/

   doubleSize = primme->maxBasisSize               /* Size of hVals        */
      + primme->numEvals+primme->maxBasisSize      /* Size of prevRitzVals */
      + primme->maxBlockSize;                      /* Size of blockNorms   */

   /*----------------------------------------------------------------------*/
   /* Determine the integer workspace needed                               */
   /*----------------------------------------------------------------------*/

   intWorkSize = primme->maxBasisSize /* Size of flag               */
      + 2*primme->maxBlockSize        /* Size of iev and ilev       */
      + maxEvecsSize                  /* Size of ipivot             */
      + 2*primme->maxBasisSize;       /* Size of 2 perms in solve_H */

   /*----------------------------------------------------------------------*/
   /* byte sizes:                                                          */
   /*----------------------------------------------------------------------*/
   
   rworkByteSize = (dataSize + realWorkSize)*sizeof(double)
                                + doubleSize*sizeof(double); 

   /*----------------------------------------------------------------------*/
   /* If only the amount of required workspace is needed return it in bytes*/
   /*----------------------------------------------------------------------*/

   if (!allocate) {
      primme->intWorkSize  = intWorkSize*sizeof(int);
      primme->realWorkSize = rworkByteSize;
      return 1;
   }

   /*----------------------------------------------------------------------*/
   /* Allocate the required workspace, if the user did not provide enough  */
   /*----------------------------------------------------------------------*/
   if (primme->realWorkSize < rworkByteSize || primme->realWork == NULL) {
      if (primme->realWork != NULL) {
         free(primme->realWork);
      }
      primme->realWorkSize = rworkByteSize;
      primme->realWork = (void *) primme_valloc(rworkByteSize,"Real Alloc");
      if (primme->printLevel >= 5) fprintf(primme->outputFile, 
         "Allocating real workspace: %ld bytes\n", primme->realWorkSize);
   }

   if (primme->intWorkSize < intWorkSize*sizeof(int) || primme->intWork==NULL) {
      if (primme->intWork != NULL) {
         free(primme->intWork);
      }
      primme->intWorkSize = intWorkSize*sizeof(int);
      primme->intWork= (int *)primme_valloc(primme->intWorkSize ,"Int Alloc");
      if (primme->printLevel >= 5) fprintf(primme->outputFile, 
         "Allocating integer workspace: %d bytes\n", primme->intWorkSize);
   }

   if (primme->intWork == NULL || primme->realWork == NULL) {

      primme_PushErrorMessage(Primme_allocate_workspace, Primme_malloc, 0, 
         __FILE__, __LINE__, primme);
      return MALLOC_FAILURE;
   }
      
   return 0;

  /***************************************************************************/
} /* end of allocate workspace
Пример #3
0
int init_basis_dprimme(double *V, double *W, double *evecs,
                       double *evecsHat, double *M, double *UDU, int *ipivot,
                       double machEps,  double *rwork, int rworkSize, int *basisSize,
                       int *nextGuess, int *numGuesses, double *timeForMV,
                       primme_params *primme) {

    int ret;          /* Return value                              */
    int currentSize;

    /*-----------------------------------------------------------------------*/
    /* Orthogonalize the orthogonalization constraints provided by the user. */
    /* If a preconditioner is given and inner iterations are to be           */
    /* performed, then initialize M.                                         */
    /*-----------------------------------------------------------------------*/

    if (primme->numOrthoConst > 0) {
        ret = ortho_dprimme(evecs, primme->nLocal, 0,
                            primme->numOrthoConst - 1, NULL, 0, 0, primme->nLocal,
                            primme->iseed, machEps, rwork, rworkSize, primme);

        /* Push an error message onto the stack trace if an error occured */
        if (ret < 0) {
            primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret,
                                    __FILE__, __LINE__, primme);
            return ORTHO_FAILURE;
        }

        /* Initialize evecsHat, M, and its factorization UDU,ipivot. This   */
        /* allows the orthogonalization constraints to be included in the   */
        /* projector (I-QQ'). Only needed if there is preconditioning, and  */
        /* JDqmr inner iterations with a right, skew projector. Only in     */
        /* that case, is UDU not NULL                                       */

        if (UDU != NULL) {

            (*primme->applyPreconditioner)
            (evecs, evecsHat, &primme->numOrthoConst, primme);
            primme->stats.numPreconds += primme->numOrthoConst;

            update_projection_dprimme(evecs, evecsHat, M, 0,
                                      primme->numOrthoConst+primme->numEvals, primme->numOrthoConst,
                                      rwork, primme);

            ret = UDUDecompose_dprimme(M, UDU, ipivot, primme->numOrthoConst,
                                       rwork, rworkSize, primme);

            if (ret != 0) {
                primme_PushErrorMessage(Primme_init_basis, Primme_ududecompose, ret,
                                        __FILE__, __LINE__, primme);
                return UDUDECOMPOSE_FAILURE;
            }

        }  /* if evecsHat and M=evecs'evecsHat, UDU are needed */

    }  /* if numOrthoCont >0 */


    /*-----------------------------------------------------------------------*/
    /* No locking                                                            */
    /*-----------------------------------------------------------------------*/
    if (!primme->locking) {

        /* Handle case when no initial guesses are provided by the user */
        if (primme->initSize == 0) {

            ret = init_block_krylov(V, W, 0, primme->minRestartSize - 1, evecs,
                                    primme->numOrthoConst, machEps, rwork, rworkSize, primme);

            /* Push an error message onto the stack trace if an error occured */
            if (ret < 0) {
                primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov,
                                        ret, __FILE__, __LINE__, primme);
                return INIT_BLOCK_KRYLOV_FAILURE;
            }

            *basisSize = primme->minRestartSize;

        }
        else {
            /* Handle case when some or all initial guesses are provided by */
            /* the user                                                     */

            /* Copy over the initial guesses provided by the user */
            Num_dcopy_dprimme(primme->nLocal*primme->initSize,
                              &evecs[primme->numOrthoConst*primme->nLocal], 1, V, 1);

            /* Orthonormalize the guesses provided by the user */

            ret = ortho_dprimme(V, primme->nLocal, 0, primme->initSize-1,
                                evecs, primme->nLocal, primme->numOrthoConst, primme->nLocal,
                                primme->iseed, machEps, rwork, rworkSize, primme);

            /* Push an error message onto the stack trace if an error occured */
            if (ret < 0) {
                primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret,
                                        __FILE__, __LINE__, primme);
                return ORTHO_FAILURE;
            }

            update_W_dprimme(V, W, 0, primme->initSize, primme);

            /* An insufficient number of initial guesses were provided by */
            /* the user.  Generate a block Krylov space to fill the       */
            /* remaining vacancies.                                       */

            if (primme->initSize < primme->minRestartSize) {

                ret = init_block_krylov(V, W, primme->initSize,
                                        primme->minRestartSize - 1, evecs, primme->numOrthoConst,
                                        machEps, rwork, rworkSize, primme);

                /* Push an error message onto the stack trace if an error occured */
                if (ret < 0) {
                    primme_PushErrorMessage(Primme_init_basis,
                                            Primme_init_block_krylov, ret, __FILE__, __LINE__, primme);
                    return INIT_KRYLOV_FAILURE;
                }

                *basisSize = primme->minRestartSize;
            }
            else {
                *basisSize = primme->initSize;
            }

        }

        *numGuesses = 0;
        *nextGuess = 0;

    }
    else {
        /*-----------------------------------------------------------------------*/
        /* Locking                                                               */
        /*-----------------------------------------------------------------------*/

        *numGuesses = primme->initSize;
        *nextGuess = primme->numOrthoConst;

        /* If some initial guesses are available, copy them to the basis       */
        /* and orthogonalize them against themselves and the orthogonalization */
        /* constraints.                                                        */

        if (primme->initSize > 0) {
            currentSize = min(primme->initSize, primme->minRestartSize);
            Num_dcopy_dprimme(primme->nLocal*currentSize,
                              &evecs[primme->numOrthoConst*primme->nLocal], 1, V, 1);

            ret = ortho_dprimme(V, primme->nLocal, 0, currentSize-1, evecs,
                                primme->nLocal, primme->numOrthoConst, primme->nLocal,
                                primme->iseed, machEps, rwork, rworkSize, primme);

            if (ret < 0) {
                primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret,
                                        __FILE__, __LINE__, primme);
                return ORTHO_FAILURE;
            }

            update_W_dprimme(V, W, 0, currentSize, primme);
            *numGuesses = *numGuesses - currentSize;
            *nextGuess = *nextGuess + currentSize;

        }
        else {
            currentSize = 0;
        }

        /* If an insufficient number of guesses was provided, then fill */
        /* the remaining vacancies with a block Krylov space.           */

        if (currentSize < primme->minRestartSize) {

            ret = init_block_krylov(V, W, currentSize, primme->minRestartSize - 1,
                                    evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme);

            if (ret < 0) {
                primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov,
                                        ret, __FILE__, __LINE__, primme);
                return INIT_BLOCK_KRYLOV_FAILURE;
            }

        }

        *basisSize = primme->minRestartSize;
    }

    /* ----------------------------------------------------------- */
    /* If time measurements are needed, waste one MV + one Precond */
    /* Put dummy results in the first open space of W (currentSize)*/
    /* ----------------------------------------------------------- */
    if (primme->dynamicMethodSwitch) {
        currentSize = primme->nLocal*(*basisSize);
        ret = 1;
        *timeForMV = primme_wTimer(0);
        (*primme->matrixMatvec)(V, &W[currentSize], &ret, primme);
        *timeForMV = primme_wTimer(0) - *timeForMV;
        primme->stats.numMatvecs += 1;
    }

    return 0;
}