Exemple #1
0
static void apply_preconditioner_block(Complex_Z *v, Complex_Z *result, 
		int blockSize, primme_params *primme) {
	 
   if (primme->correctionParams.precondition) {

      (*primme->applyPreconditioner)(v, result, &blockSize, primme);
      primme->stats.numPreconds += blockSize;
   }
   else {
      Num_zcopy_zprimme(primme->nLocal*blockSize, v, 1, result, 1);
   }

}
Exemple #2
0
static void restart_X(Complex_Z *X, Complex_Z *hVecs, int nLocal, 
   int basisSize, int restartSize, Complex_Z *rwork, int rworkSize) {

   int i, k;  /* Loop variables */
   int AvailRows = min(rworkSize/restartSize, nLocal);
   Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00};
   i = 0;

   while (i < nLocal) {
      /* Block matrix multiply */
      Num_gemm_zprimme("N", "N", AvailRows, restartSize, basisSize, tpone,
         &X[i], nLocal, hVecs, basisSize, tzero, rwork, AvailRows );

      /* Copy the result in the desired location of X */
      for (k=0; k < restartSize; k++) {
         Num_zcopy_zprimme(AvailRows, &rwork[AvailRows*k],1, &X[i+nLocal*k], 1);
      }
      i = i+AvailRows;
      AvailRows = min(AvailRows, nLocal-i);
   }
}
Exemple #3
0
static int apply_projected_preconditioner(Complex_Z *v, Complex_Z *Q, 
   Complex_Z *RprojectorQ, Complex_Z *x, Complex_Z *RprojectorX, 
   int sizeRprojectorQ, int sizeRprojectorX, Complex_Z *xKinvx, 
   Complex_Z *UDU, int *ipivot, Complex_Z *result, Complex_Z *rwork, 
   primme_params *primme) {  

   int ONE = 1;
   int ret;

   if (primme->correctionParams.precondition) {
      /* Place K^{-1}v in result */
      (*primme->applyPreconditioner)(v, result, &ONE, primme);
      primme->stats.numPreconds += 1;
   }
   else {
      Num_zcopy_zprimme(primme->nLocal, v, 1, result, 1);
   }

   ret = apply_skew_projector(Q, RprojectorQ, UDU, ipivot, sizeRprojectorQ,
		           result, rwork, primme);
   if (ret != 0) {
         primme_PushErrorMessage(Primme_apply_projected_preconditioner, 
            Primme_apply_skew_projector, ret, __FILE__, __LINE__, primme);
         return APPLYSKEWPROJECTOR_FAILURE;
   }

   apply_skew_projector(x, RprojectorX, xKinvx, ipivot, sizeRprojectorX,
		           result, rwork, primme);
   if (ret != 0) {
         primme_PushErrorMessage(Primme_apply_projected_preconditioner, 
            Primme_apply_skew_projector, ret, __FILE__, __LINE__, primme);
         return APPLYSKEWPROJECTOR_FAILURE;
   }

   return 0;
}
Exemple #4
0
static void setup_JD_projectors(Complex_Z *x, Complex_Z *r, Complex_Z *evecs, 
   Complex_Z *evecsHat, Complex_Z *Kinvx, Complex_Z *xKinvx, 
   Complex_Z **Lprojector, Complex_Z **RprojectorQ, Complex_Z **RprojectorX, 
   int *sizeLprojector, int *sizeRprojectorQ, int *sizeRprojectorX, 
   int numLocked, int numConverged, primme_params *primme) {

   int n, sizeEvecs;
   int ONE = 1;
   // In Complex, the size of the array to globalSum is twice as large
   int count = 2;
   Complex_Z xKinvx_local;
   Complex_Z tpone = {+1.0e+00,+0.0e00};

   *sizeLprojector  = 0;
   *sizeRprojectorQ = 0;
   *sizeRprojectorX = 0;
   *Lprojector  = NULL;
   *RprojectorQ = NULL;
   *RprojectorX = NULL;

   n = primme->nLocal;
   if (primme->locking) 
      sizeEvecs = primme->numOrthoConst+numLocked;
   else
      sizeEvecs = primme->numOrthoConst+numConverged;
   
   /* --------------------------------------------------------*/
   /* Set up the left projector arrays. Make x adjacent to Q. */
   /* --------------------------------------------------------*/
   
   if (primme->correctionParams.projectors.LeftQ) {
   
         *sizeLprojector = sizeEvecs;
         *Lprojector = evecs;
         if (primme->correctionParams.projectors.LeftX) {
            Num_zcopy_zprimme(n, x, 1, &evecs[sizeEvecs*n], 1);
            *sizeLprojector = *sizeLprojector + 1;
         }
   }
   else {
      if (primme->correctionParams.projectors.LeftX) {
         *Lprojector = x;
         *sizeLprojector = 1;
      }
   }
      
   /* --------------------------------------------------------*/
   /* Set up the right projector arrays. Q and x separately   */
   /* --------------------------------------------------------*/
   
   /* ------------*/
   /* First for Q */
   /* ------------*/
   if (primme->correctionParams.projectors.RightQ) {
   
      if (primme->correctionParams.precondition    &&
	  primme->correctionParams.projectors.SkewQ) {
         *RprojectorQ = evecsHat;       /* Use the K^(-1)evecs array */
      }				       
      else {  /* Right Q but not SkewQ */
         *RprojectorQ = evecs;          /* Use just the evecs array. */
      }
      *sizeRprojectorQ = sizeEvecs;
   
   } 
   else { /* if no RightQ projector */
   
      *RprojectorQ = NULL;       
      *sizeRprojectorQ = 0;
   }
   
   /* ------------*/
   /* Then for x  */
   /* ------------*/
   if (primme->correctionParams.projectors.RightX) {
   
      if (primme->correctionParams.precondition   &&
	  primme->correctionParams.projectors.SkewX) {
         (*primme->applyPreconditioner)(x, Kinvx, &ONE, primme);
	 primme->stats.numPreconds += 1;
         *RprojectorX  = Kinvx;
         xKinvx_local = Num_dot_zprimme(primme->nLocal, x, 1, Kinvx, 1);
         (*primme->globalSumDouble)(&xKinvx_local, xKinvx, &count, primme);
      }      
      else {
         *RprojectorX = x;
         *xKinvx = tpone;
      }
      *sizeRprojectorX = 1;
   }
   else { 
         *RprojectorX = NULL;
         *sizeRprojectorX = 0;
         *xKinvx = tpone;
   }
         
} /* setup_JD_projectors */
Exemple #5
0
int solve_correction_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *evecs, 
   Complex_Z *evecsHat, Complex_Z *UDU, int *ipivot, double *lockedEvals, 
   int numLocked, int numConvergedStored, double *ritzVals, 
   double *prevRitzVals, int *flags, int basisSize, double *blockNorms, 
   int *iev, int blockSize, double eresTol, double machEps, 
   double aNormEstimate, Complex_Z *rwork, int *iwork, int rworkSize, 
   primme_params *primme) {

   int blockIndex;         /* Loop index.  Ranges from 0..blockSize-1.       */
   int ritzIndex;          /* Ritz value index blockIndex corresponds to.    */
                           /* Possible values range from 0..basisSize-1.     */
   int sortedIndex;        /* Ritz value index in sortedRitzVals, blockIndex */
   			   /* corresponds to. Range 0..numLocked+basisSize-1 */
   int neededRsize;        /* Needed size for rwork. If not enough return    */
   int linSolverRWorkSize; /* Size of the linSolverRWork array.              */
   int *ilev;              /* Array of size blockSize.  Maps the target Ritz */
                           /* values to their positions in the sortedEvals   */
                           /* array.                                         */
   int sizeLprojector;     /* Sizes of the various left/right projectors     */
   int sizeRprojectorQ;	   /* These will be 0/1/or numOrthConstr+numLocked   */
   int sizeRprojectorX;    /* or numOrthConstr+numConvergedStored w/o locking*/

   static int numPrevRitzVals = 0; /* Size of prevRitzVals                   */
   int ret;                /* Return code.                                   */
   Complex_Z *r, *x, *sol;  /* Residual, Ritz vector, and correction.         */
   Complex_Z *linSolverRWork;/* Workspace needed by linear solver.            */
   double *sortedRitzVals; /* Sorted array of current and converged Ritz     */
			   /* values.  Size of array is numLocked+basisSize. */
   double *blockOfShifts;  /* Shifts for (A-shiftI) or (if needed) (K-shiftI)*/
   double *approxOlsenEps; /* Shifts for approximate Olsen implementation    */
   Complex_Z *Kinvx;	   /* Workspace to store K^{-1}x                     */
   Complex_Z *Lprojector;   /* Q pointer for (I-Q*Q'). Usually points to evecs*/
   Complex_Z *RprojectorQ;  /* May point to evecs/evecsHat depending on skewQ */
   Complex_Z *RprojectorX;  /* May point to x/Kinvx depending on skewX        */

   Complex_Z xKinvx;                        /* Stores x'*K^{-1}x if needed    */
   double eval, shift, robustShift;       /* robust shift values.           */
   Complex_Z tmpShift;			  /* Temp shift for daxpy           */

   //------------------------------------------------------------
   // Subdivide the workspace with pointers, and figure out 
   // the total amount of needed real workspace (neededRsize)
   //------------------------------------------------------------

   /* needed worksize */
   neededRsize = 0;
   Kinvx       = rwork;
   /* Kinvx will have nonzero size if precond and both RightX and SkewX */
   if (primme->correctionParams.precondition &&        
       primme->correctionParams.projectors.RightX &&  
       primme->correctionParams.projectors.SkewX ) { 

      /* OLSEN's method requires a block, but JDQMR is vector by vector */
      if (primme->correctionParams.maxInnerIterations == 0) {    
	 sol = Kinvx + primme->nLocal*blockSize;
         neededRsize = neededRsize + primme->nLocal*blockSize;
      }
      else { 					         
         sol = Kinvx + primme->nLocal;
         neededRsize = neededRsize + primme->nLocal;
      }
   }
   else {
      sol = Kinvx + 0;
   }
   if (primme->correctionParams.maxInnerIterations == 0) {    
      linSolverRWork = sol + 0; 		  /* sol not needed for GD */
      linSolverRWorkSize = 0;			  /* No inner solver used  */
   }
   else {
      linSolverRWork = sol + primme->nLocal;      /* sol needed in innerJD */
      neededRsize = neededRsize + primme->nLocal;
      linSolverRWorkSize = 			  /* Inner solver worksize */
	      4*primme->nLocal + 2*(primme->numOrthoConst+primme->numEvals);
      neededRsize = neededRsize + linSolverRWorkSize;
   }
   sortedRitzVals = (double *)(linSolverRWork + linSolverRWorkSize);
   blockOfShifts  = sortedRitzVals + (numLocked+basisSize);
   approxOlsenEps = blockOfShifts  + blockSize;
   neededRsize = neededRsize + numLocked+basisSize + 2*blockSize;

   if (neededRsize > rworkSize) {
      return(neededRsize);
   }

   // Subdivide also the integer work space
   ilev = iwork;          // of size blockSize

   //------------------------------------------------------------
   //  Figuring out preconditioning shifts  (robust, Olsen, etc)
   //------------------------------------------------------------
   /* blockOfShifts will contain the preconditioning shifts:               */
   /* either Ritz values or robustShifts computed below. These shifts      */
   /* will be used in the correction equations or in inverting (K-sigma I) */
   /* approxOlsenEps will contain error approximations for eigenavalues    */
   /* to be used for Olsen's method (when innerIterations =0).             */
    
   if (primme->locking) {
      /* Combine the sorted list of locked Ritz values with the sorted  */
      /* list of current Ritz values, ritzVals.  The merging of the two */
      /* lists lockedEvals and ritzVals is stored in sortedRitzVals.    */

      mergeSort(lockedEvals, numLocked, ritzVals, flags, basisSize, 
         	   sortedRitzVals, ilev, blockSize, primme);
   }
   else {
      /* Then the sorted evals are simply the ritzVals, targeted as iev */

      sortedRitzVals = ritzVals;
      ilev = iev;
   }

   /*-----------------------------------------------------------------*/
   /* For interior eigenpairs, use the user provided shifts           */
   /*-----------------------------------------------------------------*/

   if (primme->target != primme_smallest && primme->target != primme_largest) {

      for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
         sortedIndex = ilev[blockIndex];
         blockOfShifts[blockIndex] = 
	    primme->targetShifts[ min(primme->numTargetShifts-1, sortedIndex) ];
         if (sortedIndex < numPrevRitzVals) {
            approxOlsenEps[blockIndex] = 
            fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]);
         }  
         else {
            approxOlsenEps[blockIndex] = blockNorms[blockIndex];
         }  
      } /* for loop */

   } /* user provided shifts */
   else {    
   /*-----------------------------------------------------------------*/
   /* else it is primme_smallest or primme_largest                    */
   /*-----------------------------------------------------------------*/

      if (primme->correctionParams.robustShifts) { 
         /*----------------------------------------------------*/
         /* Subtract/add a robust shift from/to the Ritz value */
         /*----------------------------------------------------*/

         /* Find the robust shift for each block vector */
         for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
   
	    sortedIndex = ilev[blockIndex];
	    eval = sortedRitzVals[sortedIndex];
   
            robustShift = computeRobustShift(blockIndex, 
	      blockNorms[blockIndex], prevRitzVals, numPrevRitzVals, 
	      sortedRitzVals, &approxOlsenEps[blockIndex], 
	      numLocked+basisSize, ilev, primme);
   
            /* Subtract/add the shift if looking for the smallest/largest  */
	    /* eigenvalues, Do not go beyond the previous computed eigval  */
       
            if (primme->target == primme_smallest) {
               blockOfShifts[blockIndex] = eval - robustShift;
	       if (sortedIndex > 0) blockOfShifts[blockIndex] = 
	          max(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]);
	    }
	    else {
               blockOfShifts[blockIndex] = eval + robustShift;
	       if (sortedIndex > 0) blockOfShifts[blockIndex] = 
		  min(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]);
            } // robust shifting 
   
         }  /* for loop */
   
      }  /* endif robust shifts */
      else {
         /*--------------------------------------------------------------*/
         /* Otherwise, the shifts for both preconditioner and correction */
         /* equation should be just the Ritz values. For Olsen's method, */
         /* the shifts for r-eps*x, are chosen as the difference in Ritz */
	 /* value between successive iterations.                         */
         /*--------------------------------------------------------------*/
   
         for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
            ritzIndex   =  iev[blockIndex];
	    sortedIndex = ilev[blockIndex];
            blockOfShifts[blockIndex] = ritzVals[ritzIndex];
 	    if (sortedIndex < numPrevRitzVals) {
               approxOlsenEps[blockIndex] = 
               fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]);
	    }
	    else {
               approxOlsenEps[blockIndex] = blockNorms[blockIndex]; 
	    }
         } /* for loop */
      } /* else no robust shifts */
   } /* else primme_smallest or primme_largest */

   
   /* Remember the previous ritz values*/

   numPrevRitzVals = numLocked+basisSize;
   Num_dcopy_primme(numPrevRitzVals, sortedRitzVals, 1, prevRitzVals, 1);

   // Equip the primme struct with the blockOfShifts, in case the user
   // wants to precondition (K-sigma_i I)^{-1} with a different shift 
   // for each vector

   primme->ShiftsForPreconditioner = blockOfShifts;

   //------------------------------------------------------------
   //  Generalized Davidson variants -- No inner iterations
   //------------------------------------------------------------
   if (primme->correctionParams.maxInnerIterations == 0) {
      // This is Generalized Davidson or approximate Olsen's method. 
      // Perform block preconditioning (with or without projections)
      
      r = &W[primme->nLocal*basisSize];    // All the block residuals
      x = &V[primme->nLocal*basisSize];    // All the block Ritz vectors
      
      if ( primme->correctionParams.projectors.RightX &&
	   primme->correctionParams.projectors.SkewX    ) {    
 	  // Compute exact Olsen's projected preconditioner. This is 
	  // expensive and rarely improves anything! Included for completeness.
	  
	  Olsen_preconditioner_block(r, x, blockSize, Kinvx, primme);
      }
      else {
         if ( primme->correctionParams.projectors.RightX ) {   
            // Compute a cheap approximation to OLSENS, where (x'Kinvr)/xKinvx 
	    // is approximated by e: Kinvr-e*Kinvx=Kinv(r-e*x)=Kinv(I-ct*x*x')r

    	    for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
	       // Compute r_i = r_i - err_i * x_i 
      	       {tmpShift.r = -approxOlsenEps[blockIndex]; tmpShift.i = 0.0L;}
               Num_axpy_zprimme(primme->nLocal, tmpShift,
	       &x[primme->nLocal*blockIndex],1,&r[primme->nLocal*blockIndex],1);
	    } //for
	 }

	 // GD: compute K^{-1}r , or approx.Olsen: K^{-1}(r-ex) 

         apply_preconditioner_block(r, x, blockSize, primme );
      }
   }
   //------------------------------------------------------------
   //  JDQMR --- JD inner-outer variants
   //------------------------------------------------------------
   else {  // maxInnerIterations > 0  We perform inner-outer JDQMR.

      /* Solve the correction for each block vector. */

      for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {

         r = &W[primme->nLocal*(basisSize+blockIndex)];
         x = &V[primme->nLocal*(basisSize+blockIndex)];

	 /* Set up the left/right/skew projectors for JDQMR.        */
	 /* The pointers Lprojector, Rprojector(Q/X) point to the   */
	 /* appropriate arrays for use in the projection step       */

	 setup_JD_projectors(x, r, evecs, evecsHat, Kinvx, &xKinvx, 
   	    &Lprojector, &RprojectorQ, &RprojectorX, 
	    &sizeLprojector, &sizeRprojectorQ, &sizeRprojectorX,
   	    numLocked, numConvergedStored, primme);

         /* Map the index of the block vector to its corresponding eigenvalue */
         /* index, and the shift for the correction equation. Also make the   */
	 /* shift available to primme, in case (K-shift I)^-1 is needed       */

         ritzIndex = iev[blockIndex];
         shift = blockOfShifts[blockIndex];
         primme->ShiftsForPreconditioner = &blockOfShifts[blockIndex];

         ret = inner_solve_zprimme(x, r, &blockNorms[blockIndex], evecs, 
	    evecsHat, UDU, ipivot, &xKinvx, Lprojector, RprojectorQ, 
	    RprojectorX, sizeLprojector, sizeRprojectorQ, sizeRprojectorX,
	    sol, ritzVals[ritzIndex], shift, eresTol, aNormEstimate, 
	    machEps, linSolverRWork, linSolverRWorkSize, primme);

         if (ret != 0) {
            primme_PushErrorMessage(Primme_solve_correction, Primme_inner_solve,
			    ret, __FILE__, __LINE__, primme);
            return (INNER_SOLVE_FAILURE);
         }

         Num_zcopy_zprimme(primme->nLocal, sol, 1, 
            &V[primme->nLocal*(basisSize+blockIndex)], 1);

      } // end for each block vector
   } // JDqmr variants

   return 0;

}
Exemple #6
0
int inner_solve_zprimme(Complex_Z *x, Complex_Z *r, double *rnorm, 
   Complex_Z *evecs, Complex_Z *evecsHat, Complex_Z *UDU, int *ipivot, 
   Complex_Z *xKinvx, Complex_Z *Lprojector, Complex_Z *RprojectorQ, 
   Complex_Z *RprojectorX, int sizeLprojector, int sizeRprojectorQ, 
   int sizeRprojectorX, Complex_Z *sol, double eval, double shift, 
   double eresTol, double aNormEstimate, double machEps, Complex_Z *rwork, 
   int rworkSize, primme_params *primme) {

   int i;             /* loop variable                                      */
   int workSpaceSize; /* Size of local work array.                          */
   int numIts;        /* Number of inner iterations                         */
   int ret;           /* Return value used for error checking.              */
   int maxIterations; /* The maximum # iterations allowed. Depends on primme  */

   Complex_Z *workSpace; /* Workspace needed by UDU routine */

   /* QMR parameters */

   Complex_Z *g, *d, *delta, *w;
   Complex_Z alpha_prev, beta, rho_prev, rho;
   Complex_Z ztmp;
   double Theta_prev, Theta, c, sigma_prev, tau_init, tau_prev, tau; 

   /* Parameters used to dynamically update eigenpair */
   Complex_Z Beta, Delta, Psi, Beta_prev, Delta_prev, Psi_prev;
   Complex_Z eta;

   double dot_sol, eval_updated, eval_prev, eres2_updated, eres_updated, R;
   double Gamma_prev, Phi_prev;
   double Gamma, Phi;
   double gamma;

   /* The convergence criteria of the inner linear system must satisfy:       */
   /* || current residual || <= relativeTolerance * || initial residual ||    */
   /*                                               + absoluteTol             */

   double relativeTolerance; 
   double absoluteTolerance;
   double LTolerance, ETolerance;

   /* Some constants 							      */
   Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00};

   /* -------------------------------------------*/
   /* Subdivide the workspace into needed arrays */
   /* -------------------------------------------*/

   g      = rwork;
   d      = g + primme->nLocal;
   delta  = d + primme->nLocal;
   w      = delta + primme->nLocal;
   workSpace = w + primme->nLocal;  // This needs at least 2*numOrth+NumEvals)
   
   workSpaceSize = rworkSize - (workSpace - rwork);
   
   /* -----------------------------------------*/
   /* Set up convergence criteria by Tolerance */
   /* -----------------------------------------*/

   if (primme->aNorm <= 0.0L) {
      absoluteTolerance = aNormEstimate*machEps;
      eresTol = eresTol*aNormEstimate;
   }
   else {
      absoluteTolerance = primme->aNorm*machEps;
   }
   tau_prev = tau_init = *rnorm;       /* Assumes zero initial guess */
   LTolerance = eresTol;

   // Andreas: note that eigenresidual tol may not be achievable, because we
   // iterate on P(A-s)P not (A-s). But tau reflects linSys on P(A-s)P.
   if (primme->correctionParams.convTest == primme_adaptive) {
      ETolerance = max(eresTol/1.8L, absoluteTolerance);
      LTolerance = ETolerance;
   }
   else if (primme->correctionParams.convTest == primme_adaptive_ETolerance) {
      LTolerance = max(eresTol/1.8L, absoluteTolerance);
      ETolerance = max(tau_init*0.1L, LTolerance);
   }
   else if (primme->correctionParams.convTest == primme_decreasing_LTolerance) {
      relativeTolerance = pow(primme->correctionParams.relTolBase, 
         (double)-primme->stats.numOuterIterations);
      LTolerance = relativeTolerance * tau_init 
	           + absoluteTolerance + eresTol;
     //printf(" RL %e INI %e abso %e LToler %e aNormEstimate %e \n",
     //relativeTolerance, tau_init, absoluteTolerance,LTolerance,aNormEstimate);
   }
   
   /* --------------------------------------------------------*/
   /* Set up convergence criteria by max number of iterations */
   /* --------------------------------------------------------*/

   /* compute first total number of remaining matvecs */

   maxIterations = primme->maxMatvecs - primme->stats.numMatvecs;

   /* Perform primme.maxInnerIterations, but do not exceed total remaining */
   if (primme->correctionParams.maxInnerIterations > 0) {

      maxIterations = min(primme->correctionParams.maxInnerIterations, 
		          maxIterations);
   }

   /* --------------------------------------------------------*/
   /* Rest of initializations                                 */
   /* --------------------------------------------------------*/

   /* Assume zero initial guess */
   Num_zcopy_zprimme(primme->nLocal, r, 1, g, 1);

   ret = apply_projected_preconditioner(g, evecs, RprojectorQ, 
	   x, RprojectorX, sizeRprojectorQ, sizeRprojectorX, 
	   xKinvx, UDU, ipivot, d, workSpace, primme);

   if (ret != 0) {
      primme_PushErrorMessage(Primme_inner_solve, 
         Primme_apply_projected_preconditioner, ret, __FILE__, __LINE__, 
         primme);
      return APPLYPROJECTEDPRECONDITIONER_FAILURE;
   }
      
   Theta_prev = 0.0L;
   eval_prev = eval;
   rho_prev = dist_dot(g, 1, d, 1, primme);
      
   /* Initialize recurrences used to dynamically update the eigenpair */

   Beta_prev = Delta_prev = Psi_prev = tzero;
   Gamma_prev = Phi_prev = 0.0L;

   /* other initializations */
   for (i = 0; i < primme->nLocal; i++) {
      delta[i] = tzero;
      sol[i] = tzero;
   }

   numIts = 0;
      
   /*----------------------------------------------------------------------*/
   /*------------------------ Begin Inner Loop ----------------------------*/
   /*----------------------------------------------------------------------*/

   while (numIts < maxIterations) {

      apply_projected_matrix(d, shift, Lprojector, sizeLprojector, 
		             w, workSpace, primme);
      ztmp = dist_dot(d, 1, w, 1, primme);
      sigma_prev = ztmp.r;

      if (sigma_prev == 0.0L) {
         if (primme->printLevel >= 5 && primme->procID == 0) {
            fprintf(primme->outputFile,"Exiting because SIGMA %e\n",sigma_prev);
         }
         break;
      }

      zd_mult_primme(alpha_prev, rho_prev, 1.0L/sigma_prev);
      if (z_abs_primme(alpha_prev) < machEps || z_abs_primme(alpha_prev) > 1.0L/machEps){
         if (primme->printLevel >= 5 && primme->procID == 0) {
            fprintf(primme->outputFile,"Exiting because ALPHA %e\n",alpha_prev);
         }
	 break;
      }

      ztmp.r = -alpha_prev.r;
      ztmp.i = -alpha_prev.i;
      Num_axpy_zprimme(primme->nLocal, ztmp, w, 1, g, 1);

      ztmp = dist_dot(g, 1, g, 1, primme);
      Theta = ztmp.r;
      Theta = sqrt(Theta);
      Theta = Theta/tau_prev;
      c = 1.0L/sqrt(1+Theta*Theta);
      tau = tau_prev*Theta*c;

      gamma = c*c*Theta_prev*Theta_prev;
      {ztmp.r = gamma; ztmp.i = 0.0L;}
      zd_mult_primme(eta, alpha_prev, c*c);
      Num_scal_zprimme(primme->nLocal, ztmp, delta, 1);
      Num_axpy_zprimme(primme->nLocal, eta, d, 1, delta, 1);
      Num_axpy_zprimme(primme->nLocal, tpone, delta, 1, sol, 1);
      numIts++;

      if (z_abs_primme(rho_prev) == 0.0L ) {
         if (primme->printLevel >= 5 && primme->procID == 0) {
            fprintf(primme->outputFile,"Exiting because abs(rho) %e\n",
	       z_abs_primme(rho_prev));
         }
         break;
      }
      
      if (tau < LTolerance) {
         if (primme->printLevel >= 5 && primme->procID == 0) {
            fprintf(primme->outputFile, " tau < LTol %e %e\n",tau, LTolerance);
         }
         break;
      }
      else if (primme->correctionParams.convTest == primme_adaptive_ETolerance
	    || primme->correctionParams.convTest == primme_adaptive) {
         /* --------------------------------------------------------*/
	 /* Adaptive stopping based on dynamic monitoring of eResid */
         /* --------------------------------------------------------*/

         /* Update the Ritz value and eigenresidual using the */
         /* following recurrences.                            */
      
         zd_mult_primme(Delta, Delta_prev, gamma);
	 zz_mult_primme(ztmp, eta, rho_prev);
	 z_add_primme(Delta, Delta, ztmp);
         z_sub_primme(Beta, Beta_prev, Delta);
         Phi = gamma*gamma*Phi_prev + z_abs_primme(eta)*z_abs_primme(eta)*sigma_prev;
         zd_mult_primme(Psi, Psi_prev, gamma);
	 {ztmp.r = gamma*Phi_prev; ztmp.i = 0.0L;}
	 z_add_primme(Psi, Psi, ztmp);
         Gamma = Gamma_prev + 2.0L*Psi.r + Phi;

         /* Perform the update: update the eigenvalue and the square of the  */
         /* residual norm.                                                   */
	 
         ztmp = dist_dot(sol, 1, sol, 1, primme);
	 dot_sol = ztmp.r;
         eval_updated = shift + (eval - shift + 2*Beta.r + Gamma)/(1 + dot_sol);
         eres2_updated = (tau*tau)/(1 + dot_sol) 
            + ((eval - shift)*(eval - shift) + z_abs_primme(Beta)*z_abs_primme(Beta)
	       + 2.0L*(eval - shift)*Beta.r)/(1 + dot_sol) 
	    - (eval_updated - shift)*(eval_updated - shift);

	 /* If numerical problems, let eres about the same as tau */
	 if (eres2_updated < 0){
            eres_updated = sqrt( (tau*tau)/(1 + dot_sol) );
	 }
	 else 
            eres_updated = sqrt(eres2_updated);

         /* --------------------------------------------------------*/
	 /* Stopping criteria                                       */
         /* --------------------------------------------------------*/

         R = max(0.9878, sqrt(tau/tau_prev))*sqrt(1+dot_sol);
        
	 if ( tau <= R*eres_updated || eres_updated <= tau*R ) {
            if (primme->printLevel >= 5 && primme->procID == 0) {
               fprintf(primme->outputFile, " tau < R eres \n");
            }
	    break;
	 }

	 if (primme->target == primme_smallest && eval_updated > eval_prev) {
            if (primme->printLevel >= 5 && primme->procID == 0) {
               fprintf(primme->outputFile, "eval_updated > eval_prev\n");
            }
	    break;
	 }
	 else if (primme->target == primme_largest && eval_updated < eval_prev){
            if (primme->printLevel >= 5 && primme->procID == 0) {
               fprintf(primme->outputFile, "eval_updated < eval_prev\n");
	    }
	    break;
	 }
	 
         if (eres_updated < ETolerance) {    // tau < LTol has been checked
            if (primme->printLevel >= 5 && primme->procID == 0) {
               fprintf(primme->outputFile, "eres < eresTol %e \n",eres_updated);
            }
            break;
         }

         eval_prev = eval_updated;

         if (primme->printLevel >= 4 && primme->procID == 0) {
            fprintf(primme->outputFile,
           "INN MV %d Sec %e Eval %e Lin|r| %.3e EV|r| %.3e\n", primme->stats.
	    numMatvecs, primme_wTimer(0), eval_updated, tau, eres_updated);
	    fflush(primme->outputFile);
         }

        /* --------------------------------------------------------*/
      } /* End of if adaptive JDQMR section                        */
        /* --------------------------------------------------------*/
      else if (primme->printLevel >= 4 && primme->procID == 0) {
        // Report for non adaptive inner iterations
        fprintf(primme->outputFile,
           "INN MV %d Sec %e Lin|r| %e\n", primme->stats.numMatvecs,
           primme_wTimer(0),tau);
	fflush(primme->outputFile);
      }

      if (numIts < maxIterations) {

	 ret = apply_projected_preconditioner(g, evecs, RprojectorQ, 
	   x, RprojectorX, sizeRprojectorQ, sizeRprojectorX, 
	   xKinvx, UDU, ipivot, w, workSpace, primme);

         if (ret != 0) {
            primme_PushErrorMessage(Primme_inner_solve, 
               Primme_apply_projected_preconditioner, ret, __FILE__, __LINE__, 
               primme);
               ret = APPLYPROJECTEDPRECONDITIONER_FAILURE;
	       break;
         }
         rho = dist_dot(g, 1, w, 1, primme);
         z_div_primme(&beta, &rho, &rho_prev);
         Num_scal_zprimme(primme->nLocal, beta, d, 1);
         Num_axpy_zprimme(primme->nLocal, tpone, w, 1, d, 1);
      
         rho_prev = rho; 
         tau_prev = tau;
         Theta_prev = Theta;

         Delta_prev = Delta;
         Beta_prev = Beta;
         Phi_prev = Phi;
         Psi_prev = Psi;
         Gamma_prev = Gamma;
      }

     /* --------------------------------------------------------*/
   } /* End of QMR main while loop                              */
     /* --------------------------------------------------------*/

   *rnorm = eres_updated;
   return 0;
}
Exemple #7
0
static int combine_retained_vectors(double *hVals, int *flags, Complex_Z *hVecs,
   int basisSize, int *restartSize, int numPacked, Complex_Z *previousHVecs, 
   int *numPrevRetained, double machEps, Complex_Z *rwork, 
   primme_params *primme) {

   int i;                    /* Loop variable */
   int indexOfPreviousVecs;  /* The index within hVecs where the previous */
                             /* coefficients are inserted.                */

   if (primme->printLevel >= 5 && primme->procID == 0) {
      fprintf(primme->outputFile,
  "combine: basisSize: %d restartSize: %d numPrevRetained: %d numPacked: %d \n",
         basisSize, *restartSize, *numPrevRetained, numPacked);
   }

   /* ------------------------------------------------------------------ */
   /* Orthogonalize the coefficents from the previous iteration with the */
   /* current coefficients.  This may annihilate some of the previous    */
   /* vectors, if the have much overlap with the current vectors.  Thus, */
   /* numPrevRetained may be reduced.                                    */
   /* ------------------------------------------------------------------ */

   *numPrevRetained = ortho_retained_vectors_zprimme(hVecs, basisSize, 
      *restartSize, previousHVecs, *numPrevRetained, machEps, rwork);

   /* --------------------------------------------------------------------- */
   /* If locking is engaged and there exist retained previous coefficent    */
   /* vectors, then move the last numPacked vectors forward numPrevRetained */
   /* spaces.  This will make room for the orthogonalized previous vectors. */
   /* --------------------------------------------------------------------- */

   if (primme->locking && *numPrevRetained > 0) {
      indexOfPreviousVecs = *restartSize - numPacked;
      /* WARNING: dcopy's -1 step is not implemented appropriately in Mac's 
       * Veclib (and other libs).  Perform vector by vector instead.
      Num_zcopy_zprimme(basisSize*numPacked, 
         &hVecs[basisSize*(*restartSize-numPacked)], -1, 
         &hVecs[basisSize*(*restartSize-numPacked+*numPrevRetained)], -1);
      */
      for (i=1;i<=numPacked;i++) Num_zcopy_zprimme(basisSize,
                     &hVecs[basisSize*(*restartSize-i)],1,
                     &hVecs[basisSize*(*restartSize+*numPrevRetained-i)],1);

      /* Move the Ritz values and flag values forward as well */

      for (i = numPacked-1; i >= 0; i--) {
         hVals[*restartSize-numPacked+*numPrevRetained+i] = 
            hVals[*restartSize-numPacked+i];      
         flags[*restartSize-numPacked+*numPrevRetained+i] = 
            flags[*restartSize-numPacked+i];      
      }

   }
   else {
      indexOfPreviousVecs = *restartSize;
   }
   
   /* ----------------------------------------------------------------*/
   /* Copy the orthogonalized previous coefficents to the hVecs array */
   /* ----------------------------------------------------------------*/

   Num_zcopy_zprimme(basisSize*(*numPrevRetained), previousHVecs, 1, 
      &hVecs[basisSize*indexOfPreviousVecs], 1);

   /* Initialize the Ritz and flag values */
   for (i = 0; i < *numPrevRetained; i++) {
      hVals[indexOfPreviousVecs+i] = 0.0L;
      flags[indexOfPreviousVecs+i] = UNCONVERGED;
   }

   
   if (primme->printLevel >= 5 && primme->procID == 0) {
      fprintf(primme->outputFile, "numPrevRetained: %d restartSize: %d\n", 
         *numPrevRetained, *restartSize);
   }

   /* Increase the restart size with the previous vectors added. */

   *restartSize = *restartSize + *numPrevRetained;

   return indexOfPreviousVecs;
}
Exemple #8
0
static int dtr(int numLocked, Complex_Z *hVecs, double *hVals, int *flags, 
  int basisSize, int numFree, int *iev, Complex_Z *rwork, primme_params *primme)
{

   int i;                 /* Loop variable */
   int l, lOpt, lMin;     /* Determine how many left side vectors to retain   */
   int r, rOpt;           /* Determine how many right side vectors to retain  */
   int maxIndex;          /* basisSize - 1                                    */
   int restartSize;       /* The new restart size                             */
   double currentRitzVal; /* The current Ritz value the solver is computing   */
   double newVal, optVal; /* Used to find the optimum gap ratio               */

   /* ---------------------------------------------------------------- */
   /* Compute lOpt and rOpt with respect to the first Ritz value being */
   /* targeted by the block.                                           */
   /* ---------------------------------------------------------------- */

   currentRitzVal = hVals[iev[0]];
   maxIndex = basisSize-1;

   /* If locking is engaged, then lMin must be large enough to retain */
   /* the coefficient vector associated with a converged target.      */
   /* lMin should be no smaller than primme->minRestartSize.            */

   if (primme->locking) {

      lMin = 0;

      /* Determine the largest index of any converged but unlocked target */
      /* Ritz vector.                                                     */

      for (l = 0; l < basisSize; l++) {
         if (  (flags[l] == CONVERGED || flags[l] == PRACTICALLY_CONVERGED) 
             && (numLocked + l < primme->numEvals)) {
            lMin = l;
         }
      }

      lMin = max(lMin, min(basisSize, primme->minRestartSize));

   }
   else {
      lMin = min(basisSize, primme->minRestartSize);
   }

   
   lOpt = lMin;
   rOpt = 0;   
   optVal = 0.0L;

   if (primme->printLevel >= 5 && primme->procID == 0) {
      fprintf(primme->outputFile,"DTR basisSize: %d\n", basisSize);
   }

   /* ---------------------------------------------------------------------- */
   /* Compute lOpt and rOpt that maximize the function.                      */
   /* maximize the function (basisSize-numFree-lMin-rMin)*                   */
   /*                       sqrt((currentRitzVal - hVals[l+1])/              */
   /*                            (hVals[l+1]-hVals[basisSize-1-r]))          */
   /* ---------------------------------------------------------------------- */

   for (l = lMin; l < basisSize - numFree; l++) {
      for (r = 0; r < basisSize - l - numFree; r++)       {
         if ((basisSize - l - r) % primme->maxBlockSize == 0) {
            newVal = (basisSize - l - r)
                     * sqrt((currentRitzVal - hVals[l+1])/
                            (hVals[l+1]-hVals[maxIndex-r]));

            if (newVal > optVal) {
               optVal = newVal;
               lOpt = l;
               rOpt = r;
            }

         }
      }
   }


   restartSize = lOpt + rOpt;

   /* --------------------------------------------------------------- */
   /* Swap the rOpt vectors from the right hand side so that they are */
   /* contiguous with the vectors from the left hand side.            */
   /* --------------------------------------------------------------- */

   i = basisSize - restartSize; 

   Num_zcopy_zprimme(i*basisSize, &hVecs[basisSize*lOpt], 1, rwork, 1);
   Num_zcopy_zprimme(rOpt*basisSize, &hVecs[basisSize*(basisSize-rOpt)], 1,
      &hVecs[basisSize*lOpt], 1);
   Num_zcopy_zprimme(i*basisSize, rwork, 1, &hVecs[basisSize*restartSize], 1);

   /* Do the same with the eigenvalues of H */

   Num_dcopy_primme(i, &hVals[lOpt], 1, (double *) rwork, 1);
   Num_dcopy_primme(rOpt, &hVals[(basisSize-rOpt)], 1, &hVals[lOpt], 1);
   Num_dcopy_primme(i, (double *) rwork, 1, &hVals[restartSize], 1);

   /* Set only those flags lower than restartSize. The rest will be reset */
   for (i = 0; i < rOpt; i++) {
      flags[lOpt + i] = flags[basisSize-rOpt + i];
   }

   if (primme->printLevel >= 5 && primme->procID == 0) {
      fprintf(primme->outputFile,"DTR restart size: %d L: %d R: %d\n", 
         restartSize, lOpt, rOpt);
   }

   reset_flags_zprimme(flags, restartSize, primme->maxBasisSize);
   return restartSize;

}
Exemple #9
0
int restart_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *H, Complex_Z *hVecs,
   double *hVals, int *flags, int *iev, Complex_Z *evecs, Complex_Z *evecsHat, 
   Complex_Z *M, Complex_Z *UDU, int *ipivot, int basisSize, int numConverged, 
   int *numConvergedStored, int numLocked, int numGuesses, 
   Complex_Z *previousHVecs, int numPrevRetained, double machEps, 
   Complex_Z *rwork, int rworkSize, primme_params *primme) {
  
   int numFree;             /* The number of basis vectors to be left free    */
   int numPacked;           /* The number of coefficient vectors moved to the */
                            /* end of the hVecs array.                        */
   int restartSize;         /* The number of vectors to restart with          */
   int indexOfPreviousVecs=0; /* Position within hVecs array the previous       */
                            /* coefficient vectors will be stored             */
   int i, n, eStart;        /* various variables                              */
   int ret;                 /* Return value                                   */

   numPacked = 0;

   /* --------------------------------------------------------------------- */
   /* If dynamic thick restarting is to be used, then determine the minimum */
   /* number of free spaces to be maintained and call the DTR routine.      */
   /* The DTR routine will determine how many coefficient vectors from the  */
   /* left and right of H-spectrum to retain at restart. If DTR is not used */
   /* then set the restart size to the minimum restart size.                */
   /* --------------------------------------------------------------------- */

   if (primme->restartingParams.scheme == primme_dtr) {
      numFree = numPrevRetained+max(3, primme->maxBlockSize);
      restartSize = dtr(numLocked, hVecs, hVals, flags, basisSize, numFree, 
                        iev, rwork, primme);
   }
   else {
      restartSize = min(basisSize, primme->minRestartSize);
   }

   /* ----------------------------------------------------------------------- */
   /* If locking is engaged, then swap coefficient vectors corresponding to   */
   /* converged Ritz vectors to the end of the hVecs(:, restartSize) subarray.*/
   /* This allows the converged Ritz vectors to be stored contiguously in     */
   /* memory after restart.  This significantly reduces the amount of data    */
   /* movement the locking routine would have to perform otherwise.           */
   /* The following function also covers some limit cases where restartSize   */
   /* plus 'to be locked' and previous Ritz vectors may exceed the basisSize  */
   /* ----------------------------------------------------------------------- */

   if (primme->locking) {
      numPacked = pack_converged_coefficients(&restartSize, basisSize, 
         &numPrevRetained, numLocked, numGuesses, hVecs, hVals, flags, primme);
   }

   /* ----------------------------------------------------------------------- */
   /* Restarting with a small number of coefficient vectors from the previous */
   /* iteration can be retained to accelerate convergence.  The previous      */
   /* coefficient vectors must be combined with the current coefficient       */
   /* vectors by first orthogonalizing the previous ones versus the current   */
   /* restartSize ones.  The orthogonalized previous vectors are then         */
   /* inserted into the hVecs array at hVecs(:,indexOfPreviousVecs).          */
   /* ----------------------------------------------------------------------- */

   if (numPrevRetained > 0) {
      indexOfPreviousVecs = combine_retained_vectors(hVals, flags, hVecs,
         basisSize, &restartSize, numPacked, previousHVecs, 
         &numPrevRetained, machEps, rwork, primme);
   }

   /* -------------------------------------------------------- */
   /* Restart V by replacing it with the current Ritz vectors. */
   /* -------------------------------------------------------- */

   restart_X(V, hVecs, primme->nLocal, basisSize, restartSize, rwork,rworkSize);
   
   /* ------------------------------------------------------------ */
   /* Restart W by replacing it with W times the eigenvectors of H */
   /* ------------------------------------------------------------ */

   restart_X(W, hVecs, primme->nLocal, basisSize, restartSize, rwork,rworkSize);

   /* ---------------------------------------------------------------- */
   /* Because we have replaced V by the Ritz vectors, V'*A*V should be */
   /* diagonal with the Ritz values on the diagonal.  The eigenvectors */
   /* of the new matrix V'*A*V become the standard basis vectors.      */
   /* ---------------------------------------------------------------- */

   ret = restart_H(H, hVecs, hVals, restartSize, basisSize, previousHVecs, 
      numPrevRetained, indexOfPreviousVecs, rworkSize, rwork, primme);

   if (ret != 0) {
      primme_PushErrorMessage(Primme_restart, Primme_restart_h, ret, __FILE__, 
         __LINE__, primme);
      return RESTART_H_FAILURE;
   }

   /* --------------------------------------------------------------------- */
   /* If the user requires (I-QQ') projectors in JDQMR without locking,     */
   /* the converged eigenvectors are copied temporarily to evecs. There     */
   /* they stay locked  for use in (I-QQ') and (I-K^{-1}Q () Q') projectors.*/
   /* NOTE THIS IS NOT LOCKING! The Ritz vectors remain in the basis, and   */
   /* they will overwrite evecs at the end.                                 */
   /* We recommend against this type of usage. It's better to use locking.  */
   /* --------------------------------------------------------------------- */

   /* Andreas NOTE: is done inefficiently for the moment. We should only */
   /* add the recently converged. But we need to differentiate them      */
   /* from flags...                                                      */

   if (!primme->locking && primme->correctionParams.maxInnerIterations != 0 && 
        numConverged > 0 &&
        (primme->correctionParams.projectors.LeftQ ||
         primme->correctionParams.projectors.RightQ )  ) {

       n = primme->nLocal;
       *numConvergedStored = 0;
       eStart = primme->numOrthoConst;

       for (i=0;i<primme->numEvals;i++) {
           if (flags[i] == CONVERGED) {
              if (*numConvergedStored < numConverged) {
                 Num_zcopy_zprimme(n, &V[i*n], 1, 
                              &evecs[(eStart+*numConvergedStored)*n], 1);
                 (*numConvergedStored)++;
              }
           } /* if converged */
       } /* for */
       if (*numConvergedStored != numConverged) {
          if (primme->printLevel >= 1 && primme->procID == 0) {
             fprintf(primme->outputFile, 
             "Flags and converged eigenpairs do not correspond %d %d\n",
                numConverged, *numConvergedStored);
          }
          return PSEUDOLOCK_FAILURE;
       }

      /* Update also the M = K^{-1}evecs and its udu factorization if needed */
      if (UDU != NULL) {

         apply_preconditioner_block(&evecs[eStart*n], &evecsHat[eStart*n], 
                                    numConverged, primme );
         /* rwork must be maxEvecsSize*numEvals! */
         update_projection_zprimme(evecs, evecsHat, M, eStart*n,
           primme->numOrthoConst+primme->numEvals, numConverged, rwork, primme);

         ret = UDUDecompose_zprimme(M, UDU, ipivot, eStart+numConverged, 
                         rwork, rworkSize, primme);
         if (ret != 0) {
            primme_PushErrorMessage(Primme_lock_vectors,Primme_ududecompose,ret,
               __FILE__, __LINE__, primme);
            return UDUDECOMPOSE_FAILURE;
         }
      } /* if UDU factorization is needed */
   } /* if this pseudo locking should take place */

   return restartSize;
}
Exemple #10
0
int check_convergence_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *hVecs, 
   double *hVals, int *flags, int basisSize, int *iev, int *ievMax, 
   double *blockNorms, int *blockSize, int numConverged, int numLocked, 
   Complex_Z *evecs, double tol, double maxConvTol, double aNormEstimate, 
   Complex_Z *rwork, primme_params *primme) {

   int i;             /* Loop variable                                        */
   int left, right;   /* Range of block vectors to be checked for convergence */
   int start;         /* starting index in block of converged/tobeProject vecs*/
   int numVacancies;  /* Number of vacant positions between left and right    */
   int recentlyConverged; /* The number of Ritz values declared converged     */
                          /* since the last iteration                         */
   int numToProject;      /* Number of vectors with potential accuracy problem*/
   double attainableTol;  /* Used in locking to check near convergence problem*/

   /* -------------------------------------------- */
   /* Tolerance based on our dynamic norm estimate */
   /* -------------------------------------------- */

   if (primme->aNorm <= 0.0L) {
      tol = tol * aNormEstimate;
   }

   /* ---------------------------------------------------------------------- */
   /* If locking, set tol beyond which we need to check for accuracy problem */
   /* ---------------------------------------------------------------------- */
   if (primme->locking) {
      attainableTol = sqrt(primme->numOrthoConst+numLocked)*maxConvTol;
   }   

      
   /* --------------------------------------------------------------- */
   /* Compute each Ritz vector and its corresponding residual vector. */
   /* The Ritz vector and residual are stored temporarily in V and W  */
   /* respectively.  For each Ritz vector, determine if it has        */
   /* converged.  If it has, try to replace it with one that hasn't.  */
   /* --------------------------------------------------------------- */

   recentlyConverged = 0;
   left = 0;
   right = *blockSize - 1;
   numVacancies = 1;

   while (numVacancies > 0 && 
          (numConverged + recentlyConverged) < primme->numEvals) {

      /* Consider the newly added vectors in the block and reset counters */
      numVacancies = 0;
      numToProject = 0;

      /* Copy needed hvecs into the front of the work array. */

      for (i=left; i <= right; i++) {
         Num_zcopy_zprimme(basisSize, &hVecs[basisSize*iev[i]], 1, 
            &rwork[basisSize*(i-left)], 1);
      }
            
      /* ----------------------------------------------------------------- */
      /* Compute the Ritz vectors, residuals, and norms for the next       */
      /* blockSize unconverged Ritz vectors.  The Ritz vectors will be     */
      /* placed from V(0,lft) to V(0,rgt) and the residual vectors from    */
      /* W(0,lft) to W(0,rgt).                                             */
      /* ----------------------------------------------------------------- */
      /* rwork must be maxBasisSize*maxBlockSize + maxBlockSize in size,   */
      /* maxBasisSize*maxBlockSize holds selected hVecs to facilitate      */
      /* blocking, and maxBlockSize to hold the residual norms             */
      /* ----------------------------------------------------------------- */

      compute_resnorms(V, W, rwork, hVals, basisSize, blockNorms,
         iev, left, right, &rwork[basisSize*(right-left+1)], primme);

      if (primme->stats.numOuterIterations % 1 == 0 && 1) {
          print_residuals(hVals, blockNorms, numConverged, numLocked, iev, 
	     left, right, primme);
      }

      /* ----------------------------------------------------------------- */
      /* Determine which Ritz vectors have converged < tol and flag them.  */
      /* ----------------------------------------------------------------- */

      for (i=left; i <= right; i++) {
       
         /* ------------------------------------*/
         /* If the vector is converged, flag it */
         /* ------------------------------------*/
         if (blockNorms[i] < tol) {
            flags[iev[i]] = CONVERGED;
            numVacancies++;

            if ((!primme->locking && iev[i] < primme->numEvals) || 
	       (primme->locking && ((numLocked + iev[i]) < primme->numEvals))) {

               recentlyConverged++;

	       if (!primme->locking && primme->procID == 0 && 
		   primme->printLevel >= 2) { fprintf(primme->outputFile, 
                  "#Converged %d eval[ %d ]= %e norm %e Mvecs %d Time %g\n",
	          numConverged+recentlyConverged, iev[i], hVals[iev[i]], 
		  blockNorms[i], primme->stats.numMatvecs,primme_wTimer(0));
		  fflush(primme->outputFile);
	       } // printf
            } //if 
         } //if converged
         /* ---------------------------------------------------------------- */
	 /* If locking there may be an accuracy problem close to convergence */
	 /* Check if there is danger and set these Ritz vecs for projection  */
         /* ---------------------------------------------------------------- */
	 else if (primme->locking && numLocked > 0 &&
		  blockNorms[i] < attainableTol ) {

            flags[iev[i]] = TO_BE_PROJECTED;
            numToProject++;
	 }

      } //for 

      /* ---------------------------------------------------------------- */
      /* If some of the Ritz vectors in the block have converged, or need */
      /* to be projected against evecs, move those flagged Ritz vectors   */
      /* and residuals towards the end of the block [left,right]. Also    */
      /* swap iev, and blockNorms for the targeted block.                 */
      /* ---------------------------------------------------------------- */
      if (numVacancies > 0 || numToProject > 0) {

         swap_UnconvVecs(V, W, primme->nLocal, basisSize, iev, flags, 
	    blockNorms, primme->numOrthoConst + numLocked, *blockSize, left);
      }
      /* --------------------------------------------------------------- */
      /* Project the TO_BE_PROJECTED residuals and check for practical   */
      /* convergence among them. Those practically converged evecs are   */
      /* swapped just before the converged ones at the end of the block. */
      /* numVacancies and recentlyConverged are also updated		 */
      /* --------------------------------------------------------------- */
      if (numToProject > 0) {

	 start = *blockSize - numVacancies - numToProject;

	 check_practical_convergence(V, W, evecs, numLocked, basisSize, 
  	    *blockSize, start, numToProject, iev, flags, blockNorms, tol, 
	    &recentlyConverged, &numVacancies, rwork, primme);
      }

      /* ---------------------------------------------------------------- */
      /* Replace the vacancies, with as many unconverged vectors beyond   */
      /* ievMax as possible. If not enough are available reduce blockSize */
      /* ---------------------------------------------------------------- */

      if (numVacancies > 0) {
         replace_vectors(iev, flags, *blockSize, basisSize, numVacancies, 
			 &left, &right, ievMax); 
         numVacancies = right - left + 1;
         *blockSize = left + numVacancies;
      }

   } // while there are vacancies

   return recentlyConverged;
}
Exemple #11
0
static void swap_UnconvVecs(Complex_Z *V, Complex_Z *W, int nLocal, 
   int basisSize, int *iev, int *flags, double *blockNorms, int dimEvecs, 
   int blockSize, int left) {

   int right; /* holds the right swapping position */
   int temp;  /* used to swap integers */
   double dtemp; /* used to swap doubles */

   /* Search from left to right within the block looking for flagged     */
   /* Ritz vectors.  If a flagged one is found, find also an unconverged */
   /* Ritz vector from the right side of the block. If the flagged was   */
   /* converged, replace it with the unconverged, otherwise if it was    */
   /* was flagged TO_BE_PROJECTED, swap it with the unconverged vector.  */

   while (left < blockSize) {

      /* If block vector left is unconverged, move left one vector. */ 

      if (flags[iev[left]] == UNCONVERGED) {
         left++;
      }
      else {

         /* If block vector left is converged, find an unconverged */
         /* vector somewhere between left+1 and blockSize.         */

         right = left + 1;
   
         /* If the end of the block has been reached, there are */
         /* no more unconverged vectors left.  If not, keep     */
         /* searching right for another unconverged vector to   */
         /* perform the replacement.                            */

         while (right < blockSize && flags[iev[right]] != UNCONVERGED) {
            right++;
         }

         /* No unconverged block vector could be found */

         if (right == blockSize) {
            return;
         }

         /* An unconverged Ritz vector was found and should */
         /* replace or be swapped with block vector left.   */

	 if (flags[iev[left]] != TO_BE_PROJECTED) { 
		/* replace */
            Num_zcopy_zprimme(nLocal, &V[nLocal*(basisSize+right)], 1,
               &V[nLocal*(basisSize+left)], 1);
            Num_zcopy_zprimme(nLocal, &W[nLocal*(basisSize+right)], 1,
               &W[nLocal*(basisSize+left)], 1);
            temp = iev[left];
            iev[left] = iev[right];
            iev[right] = temp;
	    blockNorms[left] = blockNorms[right];
	 }
	 else { /* swap */
      	    Num_swap_zprimme(nLocal, &V[nLocal*(basisSize+left)], 1, 
		              &V[nLocal*(basisSize+right)], 1);
      	    Num_swap_zprimme(nLocal, &W[nLocal*(basisSize+left)], 1, 
		      	      &W[nLocal*(basisSize+right)], 1);
            temp = iev[left];
            iev[left] = iev[right];
            iev[right] = temp;
	    dtemp = blockNorms[left];
	    blockNorms[left] = blockNorms[right];
	    blockNorms[right] = dtemp;
	 } /* end of swaps */

	 left++;

      }  // looking for replacement

   } //while left < blockSize
}
Exemple #12
0
static int init_block_krylov(Complex_Z *V, Complex_Z *W, int dv1, int dv2, 
   Complex_Z *locked, int numLocked, double machEps, Complex_Z *rwork, 
   int rworkSize, primme_params *primme) {

   int i;               /* Loop variables */
   int numNewVectors;   /* Number of vectors to be generated */
   int ret;             /* Return code.                      */  
   int ONE = 1;         /* Used for passing it by reference in matrixmatvec */
   
   numNewVectors = dv2 - dv1 + 1;

   /*----------------------------------------------------------------------*/
   /* Generate a single Krylov space if there are only a few vectors to be */
   /* generated, else generate a block Krylov space with                   */
   /* primme->maxBlockSize as the block Size.                              */ 
   /*----------------------------------------------------------------------*/

   if (numNewVectors <= primme->maxBlockSize) {

      /* Create and orthogonalize the inital vectors */

      Num_larnv_zprimme(2, primme->iseed,primme->nLocal,&V[primme->nLocal*dv1]);
      ret = ortho_zprimme(V, primme->nLocal, dv1, dv1, locked, 
         primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps, 
         rwork, rworkSize, primme);

      if (ret < 0) {
         primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho, ret, 
            __FILE__, __LINE__, primme);
         return ORTHO_FAILURE;
      }

      /* Generate the remainder of the Krylov space. */

      for (i = dv1; i < dv2; i++) {
         (*primme->matrixMatvec)
           (&V[primme->nLocal*i], &V[primme->nLocal*(i+1)], &ONE, primme);
         Num_zcopy_zprimme(primme->nLocal, &V[primme->nLocal*(i+1)], 1,
            &W[primme->nLocal*i], 1);
         ret = ortho_zprimme(V, primme->nLocal, i+1, i+1, locked, 
            primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps,
            rwork, rworkSize, primme);
      
         if (ret < 0) {
            primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho, 
                            ret, __FILE__, __LINE__, primme);
            return ORTHO_FAILURE;
         }
      }

      primme->stats.numMatvecs += dv2-dv1;
      update_W_zprimme(V, W, dv2, 1, primme);

   }
   else {
   /*----------------------------------------------------------------------*/
   /* Generate the initial vectors.                                        */
   /*----------------------------------------------------------------------*/

      Num_larnv_zprimme(2, primme->iseed, primme->nLocal*primme->maxBlockSize,
         &V[primme->nLocal*dv1]);
      ret = ortho_zprimme(V, primme->nLocal, dv1, 
         dv1+primme->maxBlockSize-1, locked, primme->nLocal, numLocked, 
         primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme);

      /* Generate the remaining vectors in the sequence */

      for (i = dv1+primme->maxBlockSize; i <= dv2; i++) {
         (*primme->matrixMatvec)(&V[primme->nLocal*(i-primme->maxBlockSize)], 
            &V[primme->nLocal*i], &ONE, primme);
         Num_zcopy_zprimme(primme->nLocal, &V[primme->nLocal*i], 1,
            &W[primme->nLocal*(i-primme->maxBlockSize)], 1);

         ret = ortho_zprimme(V, primme->nLocal, i, i, locked, 
            primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps,
            rwork, rworkSize, primme);

         if (ret < 0) {
            primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho, 
                            ret, __FILE__, __LINE__, primme);
            return ORTHO_FAILURE;
         }

      }

      primme->stats.numMatvecs += dv2-(dv1+primme->maxBlockSize)+1;
      update_W_zprimme(V, W, dv2-primme->maxBlockSize+1, primme->maxBlockSize,
         primme);

   }
         
   return 0;
}
Exemple #13
0
int init_basis_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *evecs, 
   Complex_Z *evecsHat, Complex_Z *M, Complex_Z *UDU, int *ipivot, 
   double machEps,  Complex_Z *rwork, int rworkSize, int *basisSize, 
   int *nextGuess, int *numGuesses, double *timeForMV,
   primme_params *primme) {

   int ret;          /* Return value                              */
   int currentSize;

   /*-----------------------------------------------------------------------*/
   /* Orthogonalize the orthogonalization constraints provided by the user. */
   /* If a preconditioner is given and inner iterations are to be           */
   /* performed, then initialize M.                                         */
   /*-----------------------------------------------------------------------*/

   if (primme->numOrthoConst > 0) {
      ret = ortho_zprimme(evecs, primme->nLocal, 0, 
        primme->numOrthoConst - 1, NULL, 0, 0, primme->nLocal, 
        primme->iseed, machEps, rwork, rworkSize, primme);

      /* Push an error message onto the stack trace if an error occured */
      if (ret < 0) {
         primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret, 
                         __FILE__, __LINE__, primme);
         return ORTHO_FAILURE;
      }

      /* Initialize evecsHat, M, and its factorization UDU,ipivot. This   */
      /* allows the orthogonalization constraints to be included in the   */
      /* projector (I-QQ'). Only needed if there is preconditioning, and  */
      /* JDqmr inner iterations with a right, skew projector. Only in     */
      /* that case, is UDU not NULL                                       */

      if (UDU != NULL) {

         (*primme->applyPreconditioner)
            (evecs, evecsHat, &primme->numOrthoConst, primme); 
         primme->stats.numPreconds += primme->numOrthoConst;

         update_projection_zprimme(evecs, evecsHat, M, 0, 
            primme->numOrthoConst+primme->numEvals, primme->numOrthoConst, 
            rwork, primme);

         ret = UDUDecompose_zprimme(M, UDU, ipivot, primme->numOrthoConst, 
            rwork, rworkSize, primme);

         if (ret != 0) {
            primme_PushErrorMessage(Primme_init_basis, Primme_ududecompose, ret,
               __FILE__, __LINE__, primme);
            return UDUDECOMPOSE_FAILURE;
         }

      }  /* if evecsHat and M=evecs'evecsHat, UDU are needed */

   }  /* if numOrthoCont >0 */


   /*-----------------------------------------------------------------------*/
   /* No locking                                                            */
   /*-----------------------------------------------------------------------*/
   if (!primme->locking) {

      /* Handle case when no initial guesses are provided by the user */
      if (primme->initSize == 0) {

         ret = init_block_krylov(V, W, 0, primme->minRestartSize - 1, evecs, 
            primme->numOrthoConst, machEps, rwork, rworkSize, primme); 

         /* Push an error message onto the stack trace if an error occured */
         if (ret < 0) {
            primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov,
                            ret, __FILE__, __LINE__, primme);
            return INIT_BLOCK_KRYLOV_FAILURE;
         }

         *basisSize = primme->minRestartSize;

      }
      else {
      /* Handle case when some or all initial guesses are provided by */ 
      /* the user                                                     */

         /* Copy over the initial guesses provided by the user */
         Num_zcopy_zprimme(primme->nLocal*primme->initSize, 
            &evecs[primme->numOrthoConst*primme->nLocal], 1, V, 1);

         /* Orthonormalize the guesses provided by the user */ 

         ret = ortho_zprimme(V, primme->nLocal, 0, primme->initSize-1, 
            evecs, primme->nLocal, primme->numOrthoConst, primme->nLocal, 
            primme->iseed, machEps, rwork, rworkSize, primme);

         /* Push an error message onto the stack trace if an error occured */
         if (ret < 0) {
            primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret, 
                            __FILE__, __LINE__, primme);
            return ORTHO_FAILURE;
         }

         update_W_zprimme(V, W, 0, primme->initSize, primme);

         /* An insufficient number of initial guesses were provided by */
         /* the user.  Generate a block Krylov space to fill the       */
         /* remaining vacancies.                                       */

         if (primme->initSize < primme->minRestartSize) {

            ret = init_block_krylov(V, W, primme->initSize, 
               primme->minRestartSize - 1, evecs, primme->numOrthoConst, 
               machEps, rwork, rworkSize, primme);

            /* Push an error message onto the stack trace if an error occured */
            if (ret < 0) {
               primme_PushErrorMessage(Primme_init_basis, 
                  Primme_init_block_krylov, ret, __FILE__, __LINE__, primme);
               return INIT_KRYLOV_FAILURE;
            }

            *basisSize = primme->minRestartSize;
         }
         else {
            *basisSize = primme->initSize;
         }

      }

      *numGuesses = 0;
      *nextGuess = 0;

   }
   else {
   /*-----------------------------------------------------------------------*/
   /* Locking                                                               */
   /*-----------------------------------------------------------------------*/

      *numGuesses = primme->initSize;
      *nextGuess = primme->numOrthoConst;

      /* If some initial guesses are available, copy them to the basis       */
      /* and orthogonalize them against themselves and the orthogonalization */
      /* constraints.                                                        */

      if (primme->initSize > 0) {
         currentSize = min(primme->initSize, primme->minRestartSize);
         Num_zcopy_zprimme(primme->nLocal*currentSize, 
            &evecs[primme->numOrthoConst*primme->nLocal], 1, V, 1);

         ret = ortho_zprimme(V, primme->nLocal, 0, currentSize-1, evecs,
            primme->nLocal, primme->numOrthoConst, primme->nLocal,
            primme->iseed, machEps, rwork, rworkSize, primme);

         if (ret < 0) {
            primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret,
                   __FILE__, __LINE__, primme);
            return ORTHO_FAILURE;
         }
      
         update_W_zprimme(V, W, 0, currentSize, primme);
         *numGuesses = *numGuesses - currentSize;
         *nextGuess = *nextGuess + currentSize;
         
      }
      else {
         currentSize = 0;
      }

      /* If an insufficient number of guesses was provided, then fill */
      /* the remaining vacancies with a block Krylov space.           */

      if (currentSize < primme->minRestartSize) {
         
         ret = init_block_krylov(V, W, currentSize, primme->minRestartSize - 1,
            evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme);

         if (ret < 0) {
            primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov,
                            ret, __FILE__, __LINE__, primme);
            return INIT_BLOCK_KRYLOV_FAILURE;
         }

      }

      *basisSize = primme->minRestartSize;
   }

   /* ----------------------------------------------------------- */
   /* If time measurements are needed, waste one MV + one Precond */
   /* Put dummy results in the first open space of W (currentSize)*/
   /* ----------------------------------------------------------- */
   if (primme->dynamicMethodSwitch) {
      currentSize = primme->nLocal*(*basisSize);
      ret = 1;
      *timeForMV = primme_wTimer(0);
       (*primme->matrixMatvec)(V, &W[currentSize], &ret, primme);
      *timeForMV = primme_wTimer(0) - *timeForMV;
      primme->stats.numMatvecs += 1;
   }
      
   return 0;
}