Exemplos de Num_gemm_dprimme em C++ (Cpp)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: convergence_d.c Projeto: nikoloutsa/primme

static void compute_resnorms(double *V, double *W, double *hVecs, 
   double *hVals, int basisSize, double *blockNorms, int *iev, int left, 
   int right, void *rwork, primme_params *primme) {

   int i;            /* Loop variable                             */
   int numResiduals; /* Number of residual vectors to be computed */
   double *dwork = (double *) rwork;  /* pointer casting rwork to double */
   double ztmp;     /* temp var holding shift                    */
   double tpone = +1.0e+00, tzero = +0.0e+00;       /* constants */

   numResiduals = right - left + 1;

   /* We want to compute residuals r = Ax-hVal*x for the Ritz vectors x */
   /* Eqivalently, r = A*V*hVec - hval*V*hVec = W*hVec - hVal*V*hVec.   */

   /* Compute the Ritz vectors */

   Num_gemm_dprimme("N", "N", primme->nLocal, numResiduals, basisSize, 
      tpone, V, primme->nLocal, hVecs, basisSize, tzero,
      &V[primme->nLocal*(basisSize+left)], primme->nLocal);

   /* Compute W*hVecs */

   Num_gemm_dprimme("N", "N", primme->nLocal, numResiduals, basisSize, 
      tpone, W, primme->nLocal, hVecs, basisSize, tzero,
      &W[primme->nLocal*(basisSize+left)], primme->nLocal);

   /* Compute the residuals */

   for (i=left; i <= right; i++) {
      ztmp = -hVals[iev[i]];
      Num_axpy_dprimme(primme->nLocal, ztmp, &V[primme->nLocal*(basisSize+i)],
       1, &W[primme->nLocal*(basisSize+i)], 1);
   }

   /* Compute the residual norms */

   for (i=left; i <= right; i++) {
      dwork[i] = Num_dot_dprimme(primme->nLocal, 
      &W[primme->nLocal*(basisSize+i)], 1, &W[primme->nLocal*(basisSize+i)], 1);
   }
   
   (*primme->globalSumDouble)(&dwork[left], &blockNorms[left], &numResiduals,
                              primme);

   for (i=left; i <= right; i++) {
      blockNorms[i] = sqrt(blockNorms[i]);
   }

}

Exemplo n.º 2

0

Exibir arquivo

Arquivo: convergence_d.c Projeto: nikoloutsa/primme

/*******************************************************************************
 * Subroutine check_practical_convergence()
 *
 *       This function is called after swaping has pushed converged (C)
 *       and to be projected (P) evecs at the end of blockSize.
 *       Makes C, P contiguous, projects P, checks and finds practically 
 *       converged evecs (p), and makes p contiguous to C, updating
 *       flags[], blockNorms[], and numVacancies. Eg:
 *
 *          blockSize 
 *       |------CPCCPPCP|  -> |------PPPPCCCC|  ->  |------PpPpCCCC| ->
 *       |------PPppCCCC|, numVacancies = 6
 *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 *                    
 * INPUT ARRAYS AND PARAMETERS
 * ---------------------------
 * evecs           The locked eigenvectors
 * numLocked       The number of locked eigenvectors
 * basisSize       Number of vectors in the basis
 * blockSize       The number of block vectors
 * start           Starting index in V,W of vectors converged or to be projected
 * numToProject    The number of vectors to project. 
 * tol             The required convergence tolerance
 * rwork           real work array of size: 2*maxEvecsSize*primme->maxBlockSize
 * primme          Structure containing various solver parameters
 *
 *
 * OUTPUT ARRAYS AND PARAMETERS
 * ----------------------------
 * V               The basis vectors
 * W               A*V
 * iev             Indicates which Ritz value each block vector corresponds to
 * flags           Indicates which Ritz pairs have converged
 * blockNorms      The norms of the block vectors to be targeted
 * recentlyConverged Number of converged vectors in the whole basis V
 *                   = converged+practicallyConverged 
 * numVacancies    Number of Ritz values between left and right that were
 *                 declared converged or practicallyConverged. [left, right]
 *                 can be smaller than the whole V. See while-loop in 
 *                 check_convergence.
 * left, right     Indices indicating which vectors are to be replaced
 * ievMax          Index of the next Ritz value to be targeted by the block
 ******************************************************************************/
static void check_practical_convergence(double *V, double *W, 
   double *evecs, int numLocked, int basisSize, int blockSize, int start, 
   int numToProject, int *iev, int *flags, double *blockNorms, double tol, 
   int *recentlyConverged, int *numVacancies, double *rwork, 
   primme_params *primme) {

   int i, n, dimEvecs;
   int count; 
   double normPr; 
   double normDiff;
   double *overlaps;  /* Pointer in rwork to keep Q'*r */
   double tpone = +1.0e+00, tzero = +0.0e+00, tmone = -1.0e+00; /* constants */

   /* convenience variables */
   n        = primme->nLocal;
   dimEvecs = primme->numOrthoConst + numLocked;

   /* Subdivide rwork */
   overlaps = rwork + dimEvecs*primme->maxBlockSize;

   /* --------------------------------------------------------------- */
   /* Reset any TO_BE_PROJECTED flags back to UNCONVERGED, and  swap  */
   /* again CONVERGED flags toward the end of the block. Now swapping */
   /* occurs in the block basisSize+[start:blockSize]:                */
   /*        [ . . . . . . . P P P P C C C C ]     blockSize          */
   /*                   start^       ^start+numToProject              */
   /* --------------------------------------------------------------- */

   for (i=start; i < blockSize; i++)
      if (flags[iev[i]] == TO_BE_PROJECTED)
         flags[iev[i]] = UNCONVERGED;

   if (*numVacancies > 0)
      swap_UnconvVecs(V, W, primme->nLocal, basisSize, iev, flags, 
                   blockNorms, dimEvecs, blockSize, start);

   /* ------------------------------------------------------------------ */
   /* Project the numToProject residuals agaist (I-evecs*evecs')         */
   /* ------------------------------------------------------------------ */

   /* overlaps = evecs'*residuals */

   Num_gemm_dprimme("C", "N", dimEvecs, numToProject, n, tpone, evecs, n, 
                  &W[(basisSize+start)*n], n, tzero, rwork, dimEvecs);

   count = dimEvecs*numToProject;
   (*primme->globalSumDouble)(rwork, overlaps, &count, primme);

   /* residuals = residuals - evecs*overlaps */

   Num_gemm_dprimme("N", "N", n, numToProject, dimEvecs, tmone, evecs, n, 
                  overlaps, dimEvecs, tpone, &W[(basisSize+start)*n], n);

   /* ------------------------------------------------------------------ */
   /* Compute norms^2 of the projected res and the differences from res  */ 
   /* note: ||residual - (I-QQ')residual||=||Q'*r||=||overlaps||         */
   /* ------------------------------------------------------------------ */

   for (i=0; i < numToProject; i++) {
      /* || res - (I-QQ')res || */
      rwork[i] = Num_dot_dprimme(dimEvecs, &overlaps[dimEvecs*i], 1, 
                                &overlaps[dimEvecs*i], 1);
      /* || (I-QQ')res || */
      rwork[i+numToProject] = Num_dot_dprimme(n, &W[(basisSize+start+i)*n], 1,
                                &W[(basisSize+start+i)*n], 1);
   }
   /* global sum ||overlaps|| and ||(I-QQ')r|| */
   count = 2*numToProject;
   (*primme->globalSumDouble)(rwork, &rwork[count], &count, primme);

   /* ------------------------------------------------------------------ */
   /* For each projected residual check whether there is an accuracy     */
   /* problem and, if so, declare it PRACTICALLY_CONVERGED to lock later.*/
   /* normDiff is a lower bound to the attainable accuracy for this pair */
   /* so problems exist only if normDiff > tol. Then, we stop if Tol is  */
   /* the geometric mean of normPr and r.                                */
   /* ------------------------------------------------------------------ */

   for (i=start; i < start+numToProject; i++) {

      normDiff = sqrt(rwork[i-start]);
      normPr   = sqrt(rwork[i-start+numToProject]);

      /* printf(" R Pr |R-Pr| %e %e %e \n", blockNorms[i],normPr,normDiff); */

      if (normDiff >= tol && normPr < tol*tol/blockNorms[i]/2) {
         if (primme->printLevel >= 5 && primme->procID == 0) {
            fprintf(primme->outputFile, 
               " PRACTICALLY_CONVERGED %d norm(I-QQt)r %e bound %e\n",
                iev[i],normPr,tol*tol/normDiff);
                  fflush(primme->outputFile);
         }
         flags[iev[i]] = PRACTICALLY_CONVERGED;
         (*numVacancies)++;
         if (numLocked + iev[i] < primme->numEvals){
            recentlyConverged++;
         }
      } /* if practically converged */

   } /* for each projected residual */

   /* ------------------------------------------------------------------ */
   /* Finally swap all practically converged toward the end of the block */
   /* to be contiguous with the converged ones. Swap all relevant arrays */
   /* ------------------------------------------------------------------ */

   start = blockSize - *numVacancies;

   swap_UnconvVecs(V, W, primme->nLocal, basisSize, iev, flags, blockNorms, 
                         dimEvecs, blockSize, start);

}

Exemplo n.º 3

0

Exibir arquivo

Arquivo: update_projection_d.c Projeto: SCM-NV/primme

int update_projection_dprimme(double *X, int ldX, double *Y, int ldY,
   double *Z, int ldZ, int nLocal, int numCols, int blockSize, double *rwork,
   int lrwork, int isSymmetric, primme_params *primme) {

   int count, count_doubles, m;
   double tpone = +1.0e+00, tzero = +0.0e+00;

   /* -------------------------- */
   /* Return memory requirements */
   /* -------------------------- */

   if (X == NULL) {
      return (numCols+blockSize)*numCols*2 + (isSymmetric ? 0 : blockSize*numCols*2);
   }

   assert(ldX >= nLocal && ldY >= nLocal && ldZ >= numCols+blockSize);

   /* ------------ */
   /* Quick return */
   /* ------------ */

   if (blockSize <= 0) return 0;

   /* --------------------------------------------------------------------- */
   /* Grow Z by blockSize number of rows and columns all at once            */
   /* --------------------------------------------------------------------- */

   m = numCols+blockSize;
   Num_gemm_dprimme("C", "N", m, blockSize, nLocal, tpone, 
      X, ldX, &Y[ldY*numCols], ldY, tzero, &Z[ldZ*numCols], ldZ);

   /* -------------------------------------------------------------- */
   /* Alternative to the previous call:                              */
   /*    Compute next the additional rows of each new column vector. */
   /*    Only the upper triangular portion is computed and stored.   */
   /* -------------------------------------------------------------- */

   /*
   for (j = numCols; j < numCols+blockSize; j++) {
      Num_gemv_dprimme("C", primme->nLocal, j-numCols+1, tpone,
         &X[primme->nLocal*numCols], primme->nLocal, &Y[primme->nLocal*j], 1, 
         tzero, &rwork[maxCols*(j-numCols)+numCols], 1);  
   }
   */

   if (!isSymmetric) {
      Num_gemm_dprimme("C", "N", blockSize, numCols, nLocal, tpone, 
            &X[ldX*numCols], ldX, Y, ldY, tzero, &Z[numCols], ldZ);
   }

   if (primme->numProcs > 1 && isSymmetric) {
      /* --------------------------------------------------------------------- */
      /* Reduce the upper triangular part of the new columns in Z.             */
      /* --------------------------------------------------------------------- */

      Num_copy_trimatrix_compact_dprimme(&Z[ldZ*numCols], m, blockSize, ldZ,
            numCols, rwork, &count);
      assert(count*2 <= lrwork);

      count_doubles = count;
      primme->globalSumDouble(rwork, (double*)&rwork[count], &count_doubles, primme);

      Num_copy_compact_trimatrix_dprimme(&rwork[count], m, blockSize, numCols,
            &Z[ldZ*numCols], ldZ);
   }
   else if (primme->numProcs > 1 && !isSymmetric) {
      /* --------------------------------------------------------------------- */
      /* Reduce Z(:,numCols:end) and Z(numCols:end,:).                         */
      /* --------------------------------------------------------------------- */

      Num_copy_matrix_dprimme(&Z[ldZ*numCols], m, blockSize, ldZ,
            rwork, m);
      Num_copy_matrix_dprimme(&Z[numCols], blockSize, numCols, ldZ,
            &rwork[m*blockSize], blockSize);
      count = m*blockSize+blockSize*numCols;
      assert(count*2 <= lrwork);

      count_doubles = count;
      primme->globalSumDouble(rwork, (double*)&rwork[count], &count_doubles, primme);

      Num_copy_matrix_dprimme(&rwork[count], m, blockSize, ldZ, &Z[ldZ*numCols], m);
      Num_copy_matrix_dprimme(&rwork[count+m*blockSize], blockSize, numCols, ldZ,
            &Z[numCols], blockSize);
   }

   return 0;
}