C++ (Cpp) Num_gemm_zprimme 예제들

예제 #1

0

파일 보기

파일: convergence_z.c 프로젝트: nikoloutsa/primme

static void compute_resnorms(Complex_Z *V, Complex_Z *W, Complex_Z *hVecs,
                             double *hVals, int basisSize, double *blockNorms, int *iev, int left,
                             int right, void *rwork, primme_params *primme) {

    int i;            /* Loop variable                             */
    int numResiduals; /* Number of residual vectors to be computed */
    double *dwork = (double *) rwork;  /* pointer casting rwork to double */
    Complex_Z ztmp;     /* temp var holding shift                    */
    Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00};       /* constants */

    numResiduals = right - left + 1;

    /* We want to compute residuals r = Ax-hVal*x for the Ritz vectors x */
    /* Eqivalently, r = A*V*hVec - hval*V*hVec = W*hVec - hVal*V*hVec.   */

    /* Compute the Ritz vectors */

    Num_gemm_zprimme("N", "N", primme->nLocal, numResiduals, basisSize,
                     tpone, V, primme->nLocal, hVecs, basisSize, tzero,
                     &V[primme->nLocal*(basisSize+left)], primme->nLocal);

    /* Compute W*hVecs */

    Num_gemm_zprimme("N", "N", primme->nLocal, numResiduals, basisSize,
                     tpone, W, primme->nLocal, hVecs, basisSize, tzero,
                     &W[primme->nLocal*(basisSize+left)], primme->nLocal);

    /* Compute the residuals */

    for (i=left; i <= right; i++) {
        {
            ztmp.r = -hVals[iev[i]];
            ztmp.i = 0.0L;
        }
        Num_axpy_zprimme(primme->nLocal, ztmp, &V[primme->nLocal*(basisSize+i)],
                         1, &W[primme->nLocal*(basisSize+i)], 1);
    }

    /* Compute the residual norms */

    for (i=left; i <= right; i++) {
        ztmp = Num_dot_zprimme(primme->nLocal, &W[primme->nLocal*(basisSize+i)],
                               1, &W[primme->nLocal*(basisSize+i)] , 1);
        dwork[i] = ztmp.r;
    }

    (*primme->globalSumDouble)(&dwork[left], &blockNorms[left], &numResiduals,
                               primme);

    for (i=left; i <= right; i++) {
        blockNorms[i] = sqrt(blockNorms[i]);
    }

}

예제 #2

0

파일 보기

파일: restart_z.c 프로젝트: nikoloutsa/primme

static void compute_submatrix(Complex_Z *previousHVecs, int numPrevRetained, 
   Complex_Z *H, int basisSize, int maxBasisSize, Complex_Z *subMatrix, 
   Complex_Z *rwork) {

   Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00};

   Num_symm_zprimme("L", "U", basisSize, numPrevRetained, tpone, H, 
      maxBasisSize, previousHVecs, maxBasisSize, tzero, rwork, basisSize);
   
   Num_gemm_zprimme("C", "N", numPrevRetained, numPrevRetained, basisSize,
      tpone, previousHVecs, basisSize, rwork, basisSize, tzero, subMatrix, 
      numPrevRetained);
}

예제 #3

0

파일 보기

파일: update_projection_z.c 프로젝트: nikoloutsa/primme

void update_projection_zprimme(Complex_Z *X, Complex_Z *Y, Complex_Z *Z, 
   int numCols, int maxCols, int blockSize, Complex_Z *rwork, 
   primme_params *primme) {

   int j;    /* Loop variable  */ 
   int count;
   Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00};

   /* --------------------------------------------------------------------- */
   /* Zero the work array to prevent floating point traps during all-reduce */
   /* --------------------------------------------------------------------- */

   for (j = 0; j < maxCols*blockSize; j++) {
      rwork[j] = tzero;
   }

   /* --------------------------------------------------------------------- */
   /* Grow Z by blockSize number of rows and columns all at once            */
   /* --------------------------------------------------------------------- */

   Num_gemm_zprimme("C", "N", numCols+blockSize, blockSize, primme->nLocal, tpone, 
      X, primme->nLocal, &Y[primme->nLocal*numCols], primme->nLocal, 
      tzero, rwork, maxCols);

   /* -------------------------------------------------------------- */
   /* Alternative to the previous call:                              */
   /*    Compute next the additional rows of each new column vector. */
   /*    Only the upper triangular portion is computed and stored.   */
   /* -------------------------------------------------------------- */

   /*
   for (j = numCols; j < numCols+blockSize; j++) {
      Num_gemv_zprimme("C", primme->nLocal, j-numCols+1, tpone,
         &X[primme->nLocal*numCols], primme->nLocal, &Y[primme->nLocal*j], 1, 
         tzero, &rwork[maxCols*(j-numCols)+numCols], 1);  
   }
   */
   
   count = 2*maxCols*blockSize;
   (*primme->globalSumDouble)(rwork, &Z[maxCols*numCols], &count, primme);
}

예제 #4

0

파일 보기

파일: restart_z.c 프로젝트: nikoloutsa/primme

static void restart_X(Complex_Z *X, Complex_Z *hVecs, int nLocal, 
   int basisSize, int restartSize, Complex_Z *rwork, int rworkSize) {

   int i, k;  /* Loop variables */
   int AvailRows = min(rworkSize/restartSize, nLocal);
   Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00};
   i = 0;

   while (i < nLocal) {
      /* Block matrix multiply */
      Num_gemm_zprimme("N", "N", AvailRows, restartSize, basisSize, tpone,
         &X[i], nLocal, hVecs, basisSize, tzero, rwork, AvailRows );

      /* Copy the result in the desired location of X */
      for (k=0; k < restartSize; k++) {
         Num_zcopy_zprimme(AvailRows, &rwork[AvailRows*k],1, &X[i+nLocal*k], 1);
      }
      i = i+AvailRows;
      AvailRows = min(AvailRows, nLocal-i);
   }
}

예제 #5

0

파일 보기

파일: convergence_z.c 프로젝트: anygo/array4j

/*******************************************************************************
 * Subroutine check_practical_convergence()
 *
 *       This function is called after swaping has pushed converged (C)
 *       and to be projected (P) evecs at the end of blockSize.
 *       Makes C, P contiguous, projects P, checks and finds practically 
 *       converged evecs (p), and makes p contiguous to C, updating
 *       flags[], blockNorms[], and numVacancies. Eg:
 *
 *          blockSize 
 *       |------CPCCPPCP|  -> |------PPPPCCCC|  ->  |------PpPpCCCC| ->
 *       |------PPppCCCC|, numVacancies = 6
 *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 *                    
 * INPUT ARRAYS AND PARAMETERS
 * ---------------------------
 * evecs	   The locked eigenvectors
 * numLocked	   The number of locked eigenvectors
 * basisSize       Number of vectors in the basis
 * blockSize       The number of block vectors
 * start 	   Starting index in V,W of vectors converged or to be projected
 * numToProject	   The number of vectors to project. 
 * tol 		   The required convergence tolerance
 * rwork	   real work array of size: 2*maxEvecsSize*primme->maxBlockSize
 * primme          Structure containing various solver parameters
 *
 *
 * OUTPUT ARRAYS AND PARAMETERS
 * ----------------------------
 * V		   The basis vectors
 * W 		   A*V
 * iev             Indicates which Ritz value each block vector corresponds to
 * flags           Indicates which Ritz pairs have converged
 * blockNorms      The norms of the block vectors to be targeted
 * recentlyConverged Number of converged vectors in the whole basis V
 * 		     = converged+practicallyConverged 
 * numVacancies    Number of Ritz values between left and right that were
 *                 declared converged or practicallyConverged. [left, right]
 *                 can be smaller than the whole V. See while-loop in 
 *                 check_convergence.
 * left, right     Indices indicating which vectors are to be replaced
 * ievMax          Index of the next Ritz value to be targeted by the block
 ******************************************************************************/
static void check_practical_convergence(Complex_Z *V, Complex_Z *W, 
   Complex_Z *evecs, int numLocked, int basisSize, int blockSize, int start, 
   int numToProject, int *iev, int *flags, double *blockNorms, double tol, 
   int *recentlyConverged, int *numVacancies, Complex_Z *rwork, 
   primme_params *primme) {

   int i, n, dimEvecs;
   int count; 
   double normPr; 
   double normDiff;
   Complex_Z *overlaps;  /* Pointer in rwork to keep Q'*r */
   Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00}, tmone = {-1.0e+00,+0.0e00}; /* constants */

   /* convenience variables */
   n        = primme->nLocal;
   dimEvecs = primme->numOrthoConst + numLocked;

   /* Subdivide rwork */
   overlaps = rwork + dimEvecs*primme->maxBlockSize;

   /* --------------------------------------------------------------- */
   /* Reset any TO_BE_PROJECTED flags back to UNCONVERGED, and  swap  */
   /* again CONVERGED flags toward the end of the block. Now swapping */
   /* occurs in the block basisSize+[start:blockSize]:                */
   /*        [ . . . . . . . P P P P C C C C ]     blockSize          */
   /*        	        start^       ^start+numToProject              */
   /* --------------------------------------------------------------- */

   for (i=start; i < blockSize; i++)
      if (flags[iev[i]] == TO_BE_PROJECTED)
         flags[iev[i]] = UNCONVERGED;

   if (*numVacancies > 0)
      swap_UnconvVecs(V, W, primme->nLocal, basisSize, iev, flags, 
		   blockNorms, dimEvecs, blockSize, start);

   /* ------------------------------------------------------------------ */
   /* Project the numToProject residuals agaist (I-evecs*evecs')         */
   /* ------------------------------------------------------------------ */

   /* overlaps = evecs'*residuals */

   Num_gemm_zprimme("C", "N", dimEvecs, numToProject, n, tpone, evecs, n, 
		  &W[(basisSize+start)*n], n, tzero, rwork, dimEvecs);

   // In Complex, the size of the array to globalSum is twice as large
   count = 2*(dimEvecs*numToProject);
   (*primme->globalSumDouble)(rwork, overlaps, &count, primme);

   /* residuals = residuals - evecs*overlaps */

   Num_gemm_zprimme("N", "N", n, numToProject, dimEvecs, tmone, evecs, n, 
		  overlaps, dimEvecs, tpone, &W[(basisSize+start)*n], n);

   /* ------------------------------------------------------------------ */
   /* Compute norms^2 of the projected res and the differences from res  */ 
   /* note: ||residual - (I-QQ')residual||=||Q'*r||=||overlaps||         */
   /* ------------------------------------------------------------------ */

   for (i=0; i < numToProject; i++) {
      /* || res - (I-QQ')res || */
      rwork[i] = Num_dot_zprimme(dimEvecs, &overlaps[dimEvecs*i], 1, 
				&overlaps[dimEvecs*i], 1);
      /* || (I-QQ')res || */
      rwork[i+numToProject] = Num_dot_zprimme(n, &W[(basisSize+start+i)*n], 1,
				&W[(basisSize+start+i)*n], 1);
   }
   /* global sum ||overlaps|| and ||(I-QQ')r|| */
   // In Complex, the size of the array to globalSum is twice as large
   count = 2*(2*numToProject);
   (*primme->globalSumDouble)(rwork, &rwork[count], &count, primme);

   /* ------------------------------------------------------------------ */
   /* For each projected residual check whether there is an accuracy     */
   /* problem and, if so, declare it PRACTICALLY_CONVERGED to lock later.*/
   /* normDiff is a lower bound to the attainable accuracy for this pair */
   /* so problems exist only if normDiff > tol. Then, we stop if Tol is  */
   /* the geometric mean of normPr and r.                                */
   /* ------------------------------------------------------------------ */

   for (i=start; i < start+numToProject; i++) {

      normDiff = sqrt(rwork[i-start].r);
      normPr   = sqrt(rwork[i-start+numToProject].r);

      //printf(" R Pr |R-Pr| %e %e %e \n", blockNorms[i],normPr,normDiff); 

      if (normDiff >= tol && normPr < tol*tol/blockNorms[i]/2) {
	 if (primme->printLevel >= 5 && primme->procID == 0) {
	    fprintf(primme->outputFile, 
	       " PRACTICALLY_CONVERGED %d norm(I-QQt)r %e bound %e\n",
		iev[i],normPr,tol*tol/normDiff);
		  fflush(primme->outputFile);
	 }
	 flags[iev[i]] = PRACTICALLY_CONVERGED;
  	 (*numVacancies)++;
	 if (numLocked + iev[i] < primme->numEvals){
	    recentlyConverged++;
	 }
      } // if practically converged

   } // for each projected residual 

   /* ------------------------------------------------------------------ */
   /* Finally swap all practically converged toward the end of the block */
   /* to be contiguous with the converged ones. Swap all relevant arrays */
   /* ------------------------------------------------------------------ */

   start = blockSize - *numVacancies;

   swap_UnconvVecs(V, W, primme->nLocal, basisSize, iev, flags, blockNorms, 
			 dimEvecs, blockSize, start);

}