Example #1
0
static int insert_submatrix(Complex_Z *H, double *hVals, Complex_Z *hVecs, 
   int restartSize, Complex_Z *subMatrix, int numPrevRetained, 
   int indexOfPreviousVecs, int rworkSize, Complex_Z *rwork, 
   primme_params *primme) {

   int info;
   int i, j;
   double *doubleWork;

   /* ---------------------------------------------------------------------- */
   /* Copy the submatrix into H with the upper right corner of the submatrix */
   /* at H(indexOfPreviousVecs, indexOfPreviousVecs).                        */
   /* ---------------------------------------------------------------------- */

   for (j = indexOfPreviousVecs; j < indexOfPreviousVecs+numPrevRetained; j++) {
      for (i = indexOfPreviousVecs; i <= j; i++) {
         H[primme->maxBasisSize*j+i] = 
         subMatrix[numPrevRetained*(j-indexOfPreviousVecs)
                              +(i-indexOfPreviousVecs)];
      }
   }

   /* ----------------------------------------- */
   /* Solve the eigenproblrm for the submatrix. */
   /* ----------------------------------------- */
   /* -------------------------------------------------------------------- */
   /* Assign also 3N double work space after the 2N complex rwork finishes */
   /* -------------------------------------------------------------------- */
   doubleWork = (double *) (rwork+ 2*numPrevRetained);

   Num_zheev_zprimme("V", "U", numPrevRetained, subMatrix, numPrevRetained, 
      &hVals[indexOfPreviousVecs], rwork, rworkSize, doubleWork, &info);

   if (info != 0) {
      primme_PushErrorMessage(Primme_insert_submatrix, Primme_num_dsyev, info, 
         __FILE__, __LINE__, primme);
      return NUM_DSYEV_FAILURE;
   }

   /* ---------------------------------------------------------------------- */
   /* The eigenvectors of the submatrix are of dimension numPrevRetained  */
   /* and reside in the submatrix array after dsyev is called.  The       */
   /* eigenvectors of the new H corresponding to the submatrix are easily */
   /* constructed using the eigenvectors of the submatrix.                */
   /* ---------------------------------------------------------------------- */

   for (j = indexOfPreviousVecs; j < indexOfPreviousVecs+numPrevRetained; j++) {
      for (i = indexOfPreviousVecs; 
           i < indexOfPreviousVecs + numPrevRetained; i++) 
      {
         hVecs[restartSize*j+i] = 
         subMatrix[numPrevRetained*(j-indexOfPreviousVecs)+
                   (i-indexOfPreviousVecs)];
      }
   }

   return 0;
} 
static int apply_projected_preconditioner(double *v, double *Q, 
   double *RprojectorQ, double *x, double *RprojectorX, 
   int sizeRprojectorQ, int sizeRprojectorX, double *xKinvx, 
   double *UDU, int *ipivot, double *result, double *rwork, 
   primme_params *primme) {  

   int ONE = 1;
   int ret;

   if (primme->correctionParams.precondition) {
      /* Place K^{-1}v in result */
      (*primme->applyPreconditioner)(v, result, &ONE, primme);
      primme->stats.numPreconds += 1;
   }
   else {
      Num_dcopy_dprimme(primme->nLocal, v, 1, result, 1);
   }

   ret = apply_skew_projector(Q, RprojectorQ, UDU, ipivot, sizeRprojectorQ,
                           result, rwork, primme);
   if (ret != 0) {
         primme_PushErrorMessage(Primme_apply_projected_preconditioner, 
            Primme_apply_skew_projector, ret, __FILE__, __LINE__, primme);
         return APPLYSKEWPROJECTOR_FAILURE;
   }

   apply_skew_projector(x, RprojectorX, xKinvx, ipivot, sizeRprojectorX,
                           result, rwork, primme);
   if (ret != 0) {
         primme_PushErrorMessage(Primme_apply_projected_preconditioner, 
            Primme_apply_skew_projector, ret, __FILE__, __LINE__, primme);
         return APPLYSKEWPROJECTOR_FAILURE;
   }

   return 0;
}
Example #3
0
int solve_correction_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *evecs, 
   Complex_Z *evecsHat, Complex_Z *UDU, int *ipivot, double *lockedEvals, 
   int numLocked, int numConvergedStored, double *ritzVals, 
   double *prevRitzVals, int *flags, int basisSize, double *blockNorms, 
   int *iev, int blockSize, double eresTol, double machEps, 
   double aNormEstimate, Complex_Z *rwork, int *iwork, int rworkSize, 
   primme_params *primme) {

   int blockIndex;         /* Loop index.  Ranges from 0..blockSize-1.       */
   int ritzIndex;          /* Ritz value index blockIndex corresponds to.    */
                           /* Possible values range from 0..basisSize-1.     */
   int sortedIndex;        /* Ritz value index in sortedRitzVals, blockIndex */
   			   /* corresponds to. Range 0..numLocked+basisSize-1 */
   int neededRsize;        /* Needed size for rwork. If not enough return    */
   int linSolverRWorkSize; /* Size of the linSolverRWork array.              */
   int *ilev;              /* Array of size blockSize.  Maps the target Ritz */
                           /* values to their positions in the sortedEvals   */
                           /* array.                                         */
   int sizeLprojector;     /* Sizes of the various left/right projectors     */
   int sizeRprojectorQ;	   /* These will be 0/1/or numOrthConstr+numLocked   */
   int sizeRprojectorX;    /* or numOrthConstr+numConvergedStored w/o locking*/

   static int numPrevRitzVals = 0; /* Size of prevRitzVals                   */
   int ret;                /* Return code.                                   */
   Complex_Z *r, *x, *sol;  /* Residual, Ritz vector, and correction.         */
   Complex_Z *linSolverRWork;/* Workspace needed by linear solver.            */
   double *sortedRitzVals; /* Sorted array of current and converged Ritz     */
			   /* values.  Size of array is numLocked+basisSize. */
   double *blockOfShifts;  /* Shifts for (A-shiftI) or (if needed) (K-shiftI)*/
   double *approxOlsenEps; /* Shifts for approximate Olsen implementation    */
   Complex_Z *Kinvx;	   /* Workspace to store K^{-1}x                     */
   Complex_Z *Lprojector;   /* Q pointer for (I-Q*Q'). Usually points to evecs*/
   Complex_Z *RprojectorQ;  /* May point to evecs/evecsHat depending on skewQ */
   Complex_Z *RprojectorX;  /* May point to x/Kinvx depending on skewX        */

   Complex_Z xKinvx;                        /* Stores x'*K^{-1}x if needed    */
   double eval, shift, robustShift;       /* robust shift values.           */
   Complex_Z tmpShift;			  /* Temp shift for daxpy           */

   //------------------------------------------------------------
   // Subdivide the workspace with pointers, and figure out 
   // the total amount of needed real workspace (neededRsize)
   //------------------------------------------------------------

   /* needed worksize */
   neededRsize = 0;
   Kinvx       = rwork;
   /* Kinvx will have nonzero size if precond and both RightX and SkewX */
   if (primme->correctionParams.precondition &&        
       primme->correctionParams.projectors.RightX &&  
       primme->correctionParams.projectors.SkewX ) { 

      /* OLSEN's method requires a block, but JDQMR is vector by vector */
      if (primme->correctionParams.maxInnerIterations == 0) {    
	 sol = Kinvx + primme->nLocal*blockSize;
         neededRsize = neededRsize + primme->nLocal*blockSize;
      }
      else { 					         
         sol = Kinvx + primme->nLocal;
         neededRsize = neededRsize + primme->nLocal;
      }
   }
   else {
      sol = Kinvx + 0;
   }
   if (primme->correctionParams.maxInnerIterations == 0) {    
      linSolverRWork = sol + 0; 		  /* sol not needed for GD */
      linSolverRWorkSize = 0;			  /* No inner solver used  */
   }
   else {
      linSolverRWork = sol + primme->nLocal;      /* sol needed in innerJD */
      neededRsize = neededRsize + primme->nLocal;
      linSolverRWorkSize = 			  /* Inner solver worksize */
	      4*primme->nLocal + 2*(primme->numOrthoConst+primme->numEvals);
      neededRsize = neededRsize + linSolverRWorkSize;
   }
   sortedRitzVals = (double *)(linSolverRWork + linSolverRWorkSize);
   blockOfShifts  = sortedRitzVals + (numLocked+basisSize);
   approxOlsenEps = blockOfShifts  + blockSize;
   neededRsize = neededRsize + numLocked+basisSize + 2*blockSize;

   if (neededRsize > rworkSize) {
      return(neededRsize);
   }

   // Subdivide also the integer work space
   ilev = iwork;          // of size blockSize

   //------------------------------------------------------------
   //  Figuring out preconditioning shifts  (robust, Olsen, etc)
   //------------------------------------------------------------
   /* blockOfShifts will contain the preconditioning shifts:               */
   /* either Ritz values or robustShifts computed below. These shifts      */
   /* will be used in the correction equations or in inverting (K-sigma I) */
   /* approxOlsenEps will contain error approximations for eigenavalues    */
   /* to be used for Olsen's method (when innerIterations =0).             */
    
   if (primme->locking) {
      /* Combine the sorted list of locked Ritz values with the sorted  */
      /* list of current Ritz values, ritzVals.  The merging of the two */
      /* lists lockedEvals and ritzVals is stored in sortedRitzVals.    */

      mergeSort(lockedEvals, numLocked, ritzVals, flags, basisSize, 
         	   sortedRitzVals, ilev, blockSize, primme);
   }
   else {
      /* Then the sorted evals are simply the ritzVals, targeted as iev */

      sortedRitzVals = ritzVals;
      ilev = iev;
   }

   /*-----------------------------------------------------------------*/
   /* For interior eigenpairs, use the user provided shifts           */
   /*-----------------------------------------------------------------*/

   if (primme->target != primme_smallest && primme->target != primme_largest) {

      for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
         sortedIndex = ilev[blockIndex];
         blockOfShifts[blockIndex] = 
	    primme->targetShifts[ min(primme->numTargetShifts-1, sortedIndex) ];
         if (sortedIndex < numPrevRitzVals) {
            approxOlsenEps[blockIndex] = 
            fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]);
         }  
         else {
            approxOlsenEps[blockIndex] = blockNorms[blockIndex];
         }  
      } /* for loop */

   } /* user provided shifts */
   else {    
   /*-----------------------------------------------------------------*/
   /* else it is primme_smallest or primme_largest                    */
   /*-----------------------------------------------------------------*/

      if (primme->correctionParams.robustShifts) { 
         /*----------------------------------------------------*/
         /* Subtract/add a robust shift from/to the Ritz value */
         /*----------------------------------------------------*/

         /* Find the robust shift for each block vector */
         for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
   
	    sortedIndex = ilev[blockIndex];
	    eval = sortedRitzVals[sortedIndex];
   
            robustShift = computeRobustShift(blockIndex, 
	      blockNorms[blockIndex], prevRitzVals, numPrevRitzVals, 
	      sortedRitzVals, &approxOlsenEps[blockIndex], 
	      numLocked+basisSize, ilev, primme);
   
            /* Subtract/add the shift if looking for the smallest/largest  */
	    /* eigenvalues, Do not go beyond the previous computed eigval  */
       
            if (primme->target == primme_smallest) {
               blockOfShifts[blockIndex] = eval - robustShift;
	       if (sortedIndex > 0) blockOfShifts[blockIndex] = 
	          max(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]);
	    }
	    else {
               blockOfShifts[blockIndex] = eval + robustShift;
	       if (sortedIndex > 0) blockOfShifts[blockIndex] = 
		  min(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]);
            } // robust shifting 
   
         }  /* for loop */
   
      }  /* endif robust shifts */
      else {
         /*--------------------------------------------------------------*/
         /* Otherwise, the shifts for both preconditioner and correction */
         /* equation should be just the Ritz values. For Olsen's method, */
         /* the shifts for r-eps*x, are chosen as the difference in Ritz */
	 /* value between successive iterations.                         */
         /*--------------------------------------------------------------*/
   
         for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
            ritzIndex   =  iev[blockIndex];
	    sortedIndex = ilev[blockIndex];
            blockOfShifts[blockIndex] = ritzVals[ritzIndex];
 	    if (sortedIndex < numPrevRitzVals) {
               approxOlsenEps[blockIndex] = 
               fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]);
	    }
	    else {
               approxOlsenEps[blockIndex] = blockNorms[blockIndex]; 
	    }
         } /* for loop */
      } /* else no robust shifts */
   } /* else primme_smallest or primme_largest */

   
   /* Remember the previous ritz values*/

   numPrevRitzVals = numLocked+basisSize;
   Num_dcopy_primme(numPrevRitzVals, sortedRitzVals, 1, prevRitzVals, 1);

   // Equip the primme struct with the blockOfShifts, in case the user
   // wants to precondition (K-sigma_i I)^{-1} with a different shift 
   // for each vector

   primme->ShiftsForPreconditioner = blockOfShifts;

   //------------------------------------------------------------
   //  Generalized Davidson variants -- No inner iterations
   //------------------------------------------------------------
   if (primme->correctionParams.maxInnerIterations == 0) {
      // This is Generalized Davidson or approximate Olsen's method. 
      // Perform block preconditioning (with or without projections)
      
      r = &W[primme->nLocal*basisSize];    // All the block residuals
      x = &V[primme->nLocal*basisSize];    // All the block Ritz vectors
      
      if ( primme->correctionParams.projectors.RightX &&
	   primme->correctionParams.projectors.SkewX    ) {    
 	  // Compute exact Olsen's projected preconditioner. This is 
	  // expensive and rarely improves anything! Included for completeness.
	  
	  Olsen_preconditioner_block(r, x, blockSize, Kinvx, primme);
      }
      else {
         if ( primme->correctionParams.projectors.RightX ) {   
            // Compute a cheap approximation to OLSENS, where (x'Kinvr)/xKinvx 
	    // is approximated by e: Kinvr-e*Kinvx=Kinv(r-e*x)=Kinv(I-ct*x*x')r

    	    for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
	       // Compute r_i = r_i - err_i * x_i 
      	       {tmpShift.r = -approxOlsenEps[blockIndex]; tmpShift.i = 0.0L;}
               Num_axpy_zprimme(primme->nLocal, tmpShift,
	       &x[primme->nLocal*blockIndex],1,&r[primme->nLocal*blockIndex],1);
	    } //for
	 }

	 // GD: compute K^{-1}r , or approx.Olsen: K^{-1}(r-ex) 

         apply_preconditioner_block(r, x, blockSize, primme );
      }
   }
   //------------------------------------------------------------
   //  JDQMR --- JD inner-outer variants
   //------------------------------------------------------------
   else {  // maxInnerIterations > 0  We perform inner-outer JDQMR.

      /* Solve the correction for each block vector. */

      for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {

         r = &W[primme->nLocal*(basisSize+blockIndex)];
         x = &V[primme->nLocal*(basisSize+blockIndex)];

	 /* Set up the left/right/skew projectors for JDQMR.        */
	 /* The pointers Lprojector, Rprojector(Q/X) point to the   */
	 /* appropriate arrays for use in the projection step       */

	 setup_JD_projectors(x, r, evecs, evecsHat, Kinvx, &xKinvx, 
   	    &Lprojector, &RprojectorQ, &RprojectorX, 
	    &sizeLprojector, &sizeRprojectorQ, &sizeRprojectorX,
   	    numLocked, numConvergedStored, primme);

         /* Map the index of the block vector to its corresponding eigenvalue */
         /* index, and the shift for the correction equation. Also make the   */
	 /* shift available to primme, in case (K-shift I)^-1 is needed       */

         ritzIndex = iev[blockIndex];
         shift = blockOfShifts[blockIndex];
         primme->ShiftsForPreconditioner = &blockOfShifts[blockIndex];

         ret = inner_solve_zprimme(x, r, &blockNorms[blockIndex], evecs, 
	    evecsHat, UDU, ipivot, &xKinvx, Lprojector, RprojectorQ, 
	    RprojectorX, sizeLprojector, sizeRprojectorQ, sizeRprojectorX,
	    sol, ritzVals[ritzIndex], shift, eresTol, aNormEstimate, 
	    machEps, linSolverRWork, linSolverRWorkSize, primme);

         if (ret != 0) {
            primme_PushErrorMessage(Primme_solve_correction, Primme_inner_solve,
			    ret, __FILE__, __LINE__, primme);
            return (INNER_SOLVE_FAILURE);
         }

         Num_zcopy_zprimme(primme->nLocal, sol, 1, 
            &V[primme->nLocal*(basisSize+blockIndex)], 1);

      } // end for each block vector
   } // JDqmr variants

   return 0;

}
static int apply_skew_projector(double *Q, double *Qhat, double *UDU, 
   int *ipivot, int numCols, double *v, double *rwork, 
   primme_params *primme) {

   int count;
   double tpone = +1.0e+00, tzero = +0.0e+00, tmone = -1.0e+00;

   if (numCols > 0) {    /* there is a projector to be applied */

      int ret;
      double *overlaps;  /* overlaps of v with columns of Q   */
      double *workSpace; /* Used for computing local overlaps */

      overlaps = rwork;
      workSpace = overlaps + numCols;

      /* --------------------------------------------------------*/
      /* Treat the one vector case with BLAS 1 calls             */
      /* --------------------------------------------------------*/
      if (numCols == 1) {
         /* Compute workspace = Q'*v */
         overlaps[0] = dist_dot(Q, 1, v, 1, primme);

         /* Backsolve only if there is a skew projector */
         if (UDU != NULL) {
            if (UDU[0] == 0.0L) {
               return UDUSOLVE_FAILURE;
            }
            overlaps[0] = overlaps[0]/UDU[0];
         }
         /* Compute v=v-Qhat*overlaps */
         Num_axpy_dprimme(primme->nLocal, -overlaps[0], Qhat, 1, v, 1);
      }
      else {
         /* ------------------------------------------------------*/
         /* More than one vectors. Use BLAS 2.                    */
         /* ------------------------------------------------------*/
         /* Compute workspace = Q'*v */
         Num_gemv_dprimme("C", primme->nLocal, numCols, tpone, Q, 
                      primme->nLocal, v, 1, tzero, workSpace, 1);

         /* Global sum: overlaps = Q'*v */
         count = numCols;
         (*primme->globalSumDouble)(workSpace, overlaps, &count, primme);   

         /* --------------------------------------------*/
         /* Backsolve only if there is a skew projector */
         /* --------------------------------------------*/
         if (UDU != NULL) {
            /* Solve (Q'Qhat)^{-1}*workSpace = overlaps = Q'*v for alpha by */
            /* backsolving  with the UDU decomposition.                 */
   
            ret = UDUSolve_dprimme(UDU, ipivot, numCols, overlaps, workSpace);
            if (ret != 0) {
               primme_PushErrorMessage(Primme_apply_skew_projector,
                  Primme_udusolve, ret, __FILE__, __LINE__, primme);
               return UDUSOLVE_FAILURE;
            }
            /* Compute v=v-Qhat*workspace */
            Num_gemv_dprimme("N", primme->nLocal, numCols, tmone, Qhat, 
                         primme->nLocal, workSpace, 1, tpone, v, 1);
         }
         else  {
            /* Compute v=v-Qhat*overlaps  */
            Num_gemv_dprimme("N", primme->nLocal, numCols, tmone, Qhat, 
                         primme->nLocal, overlaps, 1, tpone, v, 1);
         } /* UDU==null */
      } /* numCols != 1 */
   } /* numCols > 0 */

   return 0;
}
int inner_solve_dprimme(double *x, double *r, double *rnorm, 
   double *evecs, double *evecsHat, double *UDU, int *ipivot, 
   double *xKinvx, double *Lprojector, double *RprojectorQ, 
   double *RprojectorX, int sizeLprojector, int sizeRprojectorQ, 
   int sizeRprojectorX, double *sol, double eval, double shift, 
   double eresTol, double aNormEstimate, double machEps, double *rwork, 
   int rworkSize, primme_params *primme) {

   int i;             /* loop variable                                       */
   int workSpaceSize; /* Size of local work array.                           */
   int numIts;        /* Number of inner iterations                          */
   int ret;           /* Return value used for error checking.               */
   int maxIterations; /* The maximum # iterations allowed. Depends on primme */

   double *workSpace; /* Workspace needed by UDU routine */

   /* QMR parameters */

   double *g, *d, *delta, *w, *ptmp;
   double alpha_prev, beta, rho_prev, rho;
   double Theta_prev, Theta, c, sigma_prev, tau_init, tau_prev, tau; 
   double ztmp;

   /* Parameters used to dynamically update eigenpair */
   double Beta, Delta, Psi, Beta_prev, Delta_prev, Psi_prev, eta;
   double dot_sol, eval_updated, eval_prev, eres2_updated, eres_updated, R;
   double Gamma_prev, Phi_prev;
   double Gamma, Phi;
   double gamma;

   /* The convergence criteria of the inner linear system must satisfy:       */
   /* || current residual || <= relativeTolerance * || initial residual ||    */
   /*                                               + absoluteTol             */

   double relativeTolerance; 
   double absoluteTolerance;
   double LTolerance, ETolerance;

   /* Some constants                                                          */
   double tpone = +1.0e+00, tzero = +0.0e+00;

   /* -------------------------------------------*/
   /* Subdivide the workspace into needed arrays */
   /* -------------------------------------------*/

   g      = rwork;
   d      = g + primme->nLocal;
   delta  = d + primme->nLocal;
   w      = delta + primme->nLocal;
   workSpace = w + primme->nLocal; /* This needs at least 2*numOrth+NumEvals) */
   
   workSpaceSize = rworkSize - (workSpace - rwork);
   
   /* -----------------------------------------*/
   /* Set up convergence criteria by Tolerance */
   /* -----------------------------------------*/

   if (primme->aNorm <= 0.0L) {
      absoluteTolerance = aNormEstimate*machEps;
      eresTol = eresTol*aNormEstimate;
   }
   else {
      absoluteTolerance = primme->aNorm*machEps;
   }
   tau_prev = tau_init = *rnorm;       /* Assumes zero initial guess */
   LTolerance = eresTol;

   /* Andreas: note that eigenresidual tol may not be achievable, because we */
   /* iterate on P(A-s)P not (A-s). But tau reflects linSys on P(A-s)P. */
   if (primme->correctionParams.convTest == primme_adaptive) {
      ETolerance = max(eresTol/1.8L, absoluteTolerance);
      LTolerance = ETolerance;
   }
   else if (primme->correctionParams.convTest == primme_adaptive_ETolerance) {
      LTolerance = max(eresTol/1.8L, absoluteTolerance);
      ETolerance = max(tau_init*0.1L, LTolerance);
   }
   else if (primme->correctionParams.convTest == primme_decreasing_LTolerance) {
      relativeTolerance = pow(primme->correctionParams.relTolBase, 
         (double)-primme->stats.numOuterIterations);
      LTolerance = relativeTolerance * tau_init 
                   + absoluteTolerance + eresTol;
   /*printf(" RL %e INI %e abso %e LToler %e aNormEstimate %e \n", */
   /*relativeTolerance, tau_init, absoluteTolerance,LTolerance,aNormEstimate);*/
   }
   
   /* --------------------------------------------------------*/
   /* Set up convergence criteria by max number of iterations */
   /* --------------------------------------------------------*/

   /* compute first total number of remaining matvecs */

   maxIterations = primme->maxMatvecs - primme->stats.numMatvecs;

   /* Perform primme.maxInnerIterations, but do not exceed total remaining */
   if (primme->correctionParams.maxInnerIterations > 0) {

      maxIterations = min(primme->correctionParams.maxInnerIterations, 
                          maxIterations);
   }

   /* --------------------------------------------------------*/
   /* Rest of initializations                                 */
   /* --------------------------------------------------------*/

   /* Assume zero initial guess */
   Num_dcopy_dprimme(primme->nLocal, r, 1, g, 1);

   ret = apply_projected_preconditioner(g, evecs, RprojectorQ, 
           x, RprojectorX, sizeRprojectorQ, sizeRprojectorX, 
           xKinvx, UDU, ipivot, d, workSpace, primme);

   if (ret != 0) {
      primme_PushErrorMessage(Primme_inner_solve, 
         Primme_apply_projected_preconditioner, ret, __FILE__, __LINE__, 
         primme);
      return APPLYPROJECTEDPRECONDITIONER_FAILURE;
   }
      
   Theta_prev = 0.0L;
   eval_prev = eval;
   rho_prev = dist_dot(g, 1, d, 1, primme);
      
   /* Initialize recurrences used to dynamically update the eigenpair */

   Beta_prev = Delta_prev = Psi_prev = 0.0L;
   Gamma_prev = Phi_prev = 0.0L;

   /* other initializations */
   for (i = 0; i < primme->nLocal; i++) {
      delta[i] = tzero;
      sol[i] = tzero;
   }

   numIts = 0;
      
   /*----------------------------------------------------------------------*/
   /*------------------------ Begin Inner Loop ----------------------------*/
   /*----------------------------------------------------------------------*/

   while (numIts < maxIterations) {

      apply_projected_matrix(d, shift, Lprojector, sizeLprojector, 
                             w, workSpace, primme);
      sigma_prev = dist_dot(d, 1, w, 1, primme);

      if (sigma_prev == 0.0L) {
         if (primme->printLevel >= 5 && primme->procID == 0) {
            fprintf(primme->outputFile,"Exiting because SIGMA %e\n",sigma_prev);
         }
         break;
      }

      alpha_prev = rho_prev/sigma_prev;
      if (fabs(alpha_prev) < machEps || fabs(alpha_prev) > 1.0L/machEps){
         if (primme->printLevel >= 5 && primme->procID == 0) {
            fprintf(primme->outputFile,"Exiting because ALPHA %e\n",alpha_prev);
         }
         break;
      }

      Num_axpy_dprimme(primme->nLocal, -alpha_prev, w, 1, g, 1);

      Theta = dist_dot(g, 1, g, 1, primme);
      Theta = sqrt(Theta);
      Theta = Theta/tau_prev;
      c = 1.0L/sqrt(1+Theta*Theta);
      tau = tau_prev*Theta*c;

      gamma = c*c*Theta_prev*Theta_prev;
      eta = alpha_prev*c*c;
      for (i = 0; i < primme->nLocal; i++) {
          delta[i] = gamma*delta[i] + eta*d[i];
          sol[i] = delta[i]+sol[i];
      }
      numIts++;

      if (fabs(rho_prev) == 0.0L ) {
         if (primme->printLevel >= 5 && primme->procID == 0) {
            fprintf(primme->outputFile,"Exiting because abs(rho) %e\n",
               fabs(rho_prev));
         }
         break;
      }
      
      if (tau < LTolerance) {
         if (primme->printLevel >= 5 && primme->procID == 0) {
            fprintf(primme->outputFile, " tau < LTol %e %e\n",tau, LTolerance);
         }
         break;
      }
      else if (primme->correctionParams.convTest == primme_adaptive_ETolerance
            || primme->correctionParams.convTest == primme_adaptive) {
         /* --------------------------------------------------------*/
         /* Adaptive stopping based on dynamic monitoring of eResid */
         /* --------------------------------------------------------*/

         /* Update the Ritz value and eigenresidual using the */
         /* following recurrences.                            */
      
         Delta = gamma*Delta_prev + eta*rho_prev;
         Beta = Beta_prev - Delta;
         Phi = gamma*gamma*Phi_prev + eta*eta*sigma_prev;
         Psi = gamma*Psi_prev + gamma*Phi_prev;
         Gamma = Gamma_prev + 2.0L*Psi + Phi;
        
         /* Perform the update: update the eigenvalue and the square of the  */
         /* residual norm.                                                   */
         
         dot_sol = dist_dot(sol, 1, sol, 1, primme);
         eval_updated = shift + (eval - shift + 2*Beta + Gamma)/(1 + dot_sol);
         eres2_updated = (tau*tau)/(1 + dot_sol) + 
            ((eval - shift + Beta)*(eval - shift + Beta))/(1 + dot_sol) - 
            (eval_updated - shift)*(eval_updated - shift);

         /* If numerical problems, let eres about the same as tau */
         if (eres2_updated < 0){
            eres_updated = sqrt( (tau*tau)/(1 + dot_sol) );
         }
         else 
            eres_updated = sqrt(eres2_updated);

         /* --------------------------------------------------------*/
         /* Stopping criteria                                       */
         /* --------------------------------------------------------*/

         R = max(0.9878, sqrt(tau/tau_prev))*sqrt(1+dot_sol);
        
         if ( tau <= R*eres_updated || eres_updated <= tau*R ) {
            if (primme->printLevel >= 5 && primme->procID == 0) {
               fprintf(primme->outputFile, " tau < R eres \n");
            }
            break;
         }

         if (primme->target == primme_smallest && eval_updated > eval_prev) {
            if (primme->printLevel >= 5 && primme->procID == 0) {
               fprintf(primme->outputFile, "eval_updated > eval_prev\n");
            }
            break;
         }
         else if (primme->target == primme_largest && eval_updated < eval_prev){
            if (primme->printLevel >= 5 && primme->procID == 0) {
               fprintf(primme->outputFile, "eval_updated < eval_prev\n");
            }
            break;
         }
         
         if (eres_updated < ETolerance) {    /* tau < LTol has been checked */
            if (primme->printLevel >= 5 && primme->procID == 0) {
               fprintf(primme->outputFile, "eres < eresTol %e \n",eres_updated);
            }
            break;
         }

         eval_prev = eval_updated;

         if (primme->printLevel >= 4 && primme->procID == 0) {
            fprintf(primme->outputFile,
           "INN MV %d Sec %e Eval %e Lin|r| %.3e EV|r| %.3e\n", primme->stats.
            numMatvecs, primme_wTimer(0), eval_updated, tau, eres_updated);
            fflush(primme->outputFile);
         }

        /* --------------------------------------------------------*/
      } /* End of if adaptive JDQMR section                        */
        /* --------------------------------------------------------*/
      else if (primme->printLevel >= 4 && primme->procID == 0) {
        /* Report for non adaptive inner iterations */
        fprintf(primme->outputFile,
           "INN MV %d Sec %e Lin|r| %e\n", primme->stats.numMatvecs,
           primme_wTimer(0),tau);
        fflush(primme->outputFile);
      }

      if (numIts < maxIterations) {

         ret = apply_projected_preconditioner(g, evecs, RprojectorQ, 
            x, RprojectorX, sizeRprojectorQ, sizeRprojectorX, 
            xKinvx, UDU, ipivot, w, workSpace, primme);

         if (ret != 0) {
            primme_PushErrorMessage(Primme_inner_solve, 
               Primme_apply_projected_preconditioner, ret, __FILE__, __LINE__, 
               primme);
               ret = APPLYPROJECTEDPRECONDITIONER_FAILURE;
               break;
         }
         rho = dist_dot(g, 1, w, 1, primme);
         beta = rho/rho_prev;
         Num_axpy_dprimme(primme->nLocal, beta, d, 1, w, 1);
         /* Alternate between w and d buffers in successive iterations
          * This saves a memory copy. */
         ptmp = d; d = w; w = ptmp;
      
         rho_prev = rho; 
         tau_prev = tau;
         Theta_prev = Theta;

         Delta_prev = Delta;
         Beta_prev = Beta;
         Phi_prev = Phi;
         Psi_prev = Psi;
         Gamma_prev = Gamma;
      }

     /* --------------------------------------------------------*/
   } /* End of QMR main while loop                              */
     /* --------------------------------------------------------*/

   *rnorm = eres_updated;
   return 0;
}
Example #6
0
int lock_vectors_dprimme(double tol, double *aNormEstimate, double *maxConvTol, 
   int *basisSize, int *numLocked, int *numGuesses, int *nextGuess,
   double *V, double *W, double *H, double *evecsHat, double *M, 
   double *UDU, int *ipivot, double *hVals, double *hVecs, 
   double *evecs, double *evals, int *perm, double machEps, 
   double *resNorms, int *numPrevRitzVals, double *prevRitzVals, 
   int *flag, double *rwork, int rworkSize, int *iwork, 
   int *LockingProblem, primme_params *primme) {

   int i;             /* Loop counter                                       */
   int numCandidates; /* Number of targeted Ritz vectors converged before   */
                      /* restart.                                           */
   int newStart;      /* Index in evecs where the locked vectors were added */
   int numNewVectors; /* Number of vectors added to the basis to replace    */
                      /* locked vectors.                                    */
   int candidate;     /* Index of Ritz vector to be checked for convergence */
   int numDeflated;   /* The number of vectors actually locked              */
   int numReplaced;   /* The number of locked vectors that were replaced by */
                      /* initial guesses.                                   */
   int numRecentlyLocked; /* Number of vectors locked.                      */
   int evecsSize;     /* The number of orthogonalization constraints plus   */
                      /* the number of locked vectors.                      */
   int ret;           /* Used to store return values.                       */
   int workinW;       /* Flag whether an active W vector is used as tempwork*/
   int entireSpace = (*basisSize+*numLocked >= primme->n); /* bool if entire*/
                      /* space is built, so current ritzvecs are accurate.  */

   double *norms, *tnorms; /* Array of residual norms, and temp array       */
   double attainableTol;   /* Used to verify a practical convergence problem*/
   double *residual;  /* Stores residual vector                             */
   double ztmp;       /* temp variable */

   /* ----------------------------------------*/
   /* Assign temporary work space for residual*/
   /* ----------------------------------------*/

   if (*basisSize < primme->maxBasisSize) {
      /* compute residuals in the next open slot of W */
      residual = &W[*basisSize*primme->nLocal];
      workinW = 0;
   }
   else {
      /* This basiSize==maxBasisSize, immediately after restart, can only occur
       * if the basisSize + numLocked = n (at which case we lock everything)
       * OR if (numConverged + restartSize + numPrevRetain > basisSize), ie.
       * too many converged. Since we do not know which evec will be locked
       * we use the W[LAST] as temporary space, but only after W[LAST] has 
       * been used to compute residual(LAST) -the while loop starts from LAST.
       * After all lockings, if the LAST evec was not locked, we must  
       * recompute W[LAST]=Av. This matvec event is extremely infrequent */
      residual = &W[(*basisSize-1)*primme->nLocal];
      workinW = 1;
   }

   /* -------------------------------------*/
   /* Set the tolerance, and attainableTol */
   /* -------------------------------------*/
   
   if (primme->aNorm <= 0.0L) {
      tol = tol * (*aNormEstimate);
   }
   attainableTol=max(tol,sqrt(primme->numOrthoConst+*numLocked)*(*maxConvTol));

   /* -------------------------------------------------------- */
   /* Determine how many Ritz vectors converged before restart */
   /* -------------------------------------------------------- */

   i = *basisSize - 1;
   while ((flag[i] == LOCK_IT ||flag[i] == UNCONDITIONAL_LOCK_IT) && i >= 0) {
      i--;
   }
      
   numCandidates = *basisSize - i - 1;

   if (numCandidates == 0) {
      return 0;
   }

   /* --------------------------------- */
   /* Compute residuals and their norms */
   /* --------------------------------- */

   tnorms = (double *) rwork;
   norms  = tnorms + numCandidates;

   for (i = *basisSize-1, candidate = numCandidates-1;  
      i >= *basisSize-numCandidates; i--, candidate--) {
      Num_dcopy_dprimme(primme->nLocal, &W[primme->nLocal*i], 1, residual, 1);
      ztmp = -hVals[i];
      Num_axpy_dprimme(primme->nLocal, ztmp, &V[primme->nLocal*i],1,residual,1);
      tnorms[candidate] = Num_dot_dprimme(primme->nLocal,residual,1,residual,1);
   }

   /* Global sum the dot products */
   (*primme->globalSumDouble)(tnorms, norms, &numCandidates, primme); 

   numRecentlyLocked = 0;

   /* ------------------------------------------------------------------- */
   /* Check the convergence of each residual norm.  If the Ritz vector is */
   /* converged, then lock it.                                            */
   /* ------------------------------------------------------------------- */

   for (i = *basisSize - numCandidates, candidate = 0; i < *basisSize; i++, 
      candidate++) {

      norms[candidate] = sqrt(norms[candidate]);

      /* If the vector has become (regularly or practically) unconverged, */
      /* then flag it, else lock it and replace it with an initial guess, */
      /* if one is available. Exception: If the entire space is spanned,  */
      /* we can't do better, so lock it.                                  */


      if ((flag[i]!=UNCONDITIONAL_LOCK_IT && norms[candidate] >= tol 
                                   && !entireSpace ) ||
          (flag[i]==UNCONDITIONAL_LOCK_IT && norms[candidate] >= attainableTol
                                   && !entireSpace )) {
         flag[i] = UNCONVERGED;
      }
      else {
        /* If an unconditional lock has become converged, show it and */
        /* record the max converged tolerance accordingly             */
        if (norms[candidate]<tol) {
           flag[i]=LOCK_IT;
           *maxConvTol = max(*maxConvTol, tol);
        }
        else {
           *maxConvTol = max(*maxConvTol, norms[candidate]);
           *LockingProblem = 1;
        }

         if (primme->printLevel >= 2 && primme->procID == 0) { 
            fprintf(primme->outputFile, 
            "Lock epair[ %d ]= %e norm %.4e Mvecs %d Time %.4e Flag %d\n",
                  *numLocked+1, hVals[i], norms[candidate], 
                   primme->stats.numMatvecs,primme_wTimer(0),flag[i]);
            fflush(primme->outputFile);
         }

         /* Copy the converged Ritz vector to the evecs array and  */
         /* insert the converged Ritz value in sorted order within */
         /* the evals array.                                       */

         Num_dcopy_dprimme(primme->nLocal, &V[primme->nLocal*i], 1, 
            &evecs[primme->nLocal*(primme->numOrthoConst + *numLocked)], 1);
         insertionSort(hVals[i], evals, norms[candidate], resNorms, perm, 
            *numLocked, primme);

         /* If there are any initial guesses remaining, then copy it */
         /* into the basis, else flag the vector as locked so it may */
         /* be discarded later.                                      */

         if (*numGuesses > 0) {
            Num_dcopy_dprimme(primme->nLocal, 
               &evecs[primme->nLocal*(*nextGuess)], 1, &V[primme->nLocal*i], 1);
            flag[i] = INITIAL_GUESS;
            *numGuesses = *numGuesses - 1;
            *nextGuess = *nextGuess + 1;
         }
         else {
            flag[i] = LOCKED;
         }

         *numLocked = *numLocked + 1;
         numRecentlyLocked++;
            
      }
   }
      
   evecsSize = primme->numOrthoConst + *numLocked;

   /* -------------------------------------------------------------------- */
   /* If a W vector was used as workspace for residual AND its evec has    */
   /* not been locked out,  recompute it, W = A*v. This is rare.           */
   /* -------------------------------------------------------------------- */
      if (workinW && flag[*basisSize-1] != LOCKED) {
         update_W_dprimme(V, W, *basisSize-1, 1, primme);
      }
   
   /* -------------------------------------------------------------------- */
   /* Return IF all target Ritz vectors have been locked, ELSE update the  */
   /* evecsHat array by applying the preconditioner (if preconditioning is */
   /* needed, and JDQMR with right, skew Q projector is applied            */
   /* -------------------------------------------------------------------- */

   if (*numLocked >= primme->numEvals) {
      return 0;
   }
   else if (UDU != NULL) {

      /* Compute K^{-1}x for all newly locked eigenvectors */

      newStart = primme->nLocal*(evecsSize - numRecentlyLocked);
      (*primme->applyPreconditioner)( &evecs[newStart], &evecsHat[newStart], 
                                    &numRecentlyLocked, primme);
      primme->stats.numPreconds += numRecentlyLocked;

      /* Update the projection evecs'*evecsHat now that evecs and evecsHat   */
      /* have been expanded by numRecentlyLocked columns.  Required          */
      /* workspace is numLocked*numEvals.  The most ever needed would be     */
      /* maxBasisSize*numEvals.                                              */

      update_projection_dprimme(evecs, evecsHat, M, 
         evecsSize-numRecentlyLocked, primme->numOrthoConst+primme->numEvals, 
         numRecentlyLocked, rwork, primme);

      ret = UDUDecompose_dprimme(M, UDU, ipivot, evecsSize, rwork, 
         rworkSize, primme);

      if (ret != 0) {
         primme_PushErrorMessage(Primme_lock_vectors, Primme_ududecompose, ret,
            __FILE__, __LINE__, primme);
         return UDUDECOMPOSE_FAILURE;
      }

   }


   /* --------------------------------------------------------------------- */
   /* Swap, towards the end of the basis, vectors that were locked but not  */
   /* replaced by new initial guesses.                                      */
   /* --------------------------------------------------------------------- */

   numDeflated = swap_flagVecs_toEnd(*basisSize, LOCKED, V, W, H, hVals, flag, 
      primme);

   /* --------------------------------------------------------------------- */
   /* Reduce the basis size by numDeflated and swap the new initial guesses */
   /* towards the end of the basis.                                         */
   /* --------------------------------------------------------------------- */
  
   numReplaced = swap_flagVecs_toEnd(*basisSize-numDeflated, INITIAL_GUESS, 
      V, W, H, hVals, flag, primme);

   *basisSize = *basisSize - (numDeflated + numReplaced);

   if (primme->printLevel >= 5 && primme->procID == 0) {
      fprintf(primme->outputFile, "numDeflated: %d numReplaced: %d \
              basisSize: %d\n", numDeflated, numReplaced, *basisSize);
   }
Example #7
0
static int allocate_workspace(primme_params *primme, int allocate) {

   long int realWorkSize;  /* Size of real work space.                  */
   long int rworkByteSize; /* Size of all real data in bytes            */

   int dataSize;     /* Number of Complex_Z positions allocated, excluding */
                     /* doubles (see doubleSize below) and work space.  */
   int doubleSize=0; /* Number of doubles allocated exclusively to the  */
                     /* double arrays: hVals, prevRitzVals, blockNorms  */
   int maxEvecsSize; /* Maximum number of vectors in evecs and evecsHat */
   int intWorkSize;  /* Size of integer work space in bytes             */
   int initSize;     /* Amount of work space required by init routine   */
   int orthoSize;    /* Amount of work space required by ortho routine  */
   int convSize;     /* Amount of work space required by converg. routine */
   int restartSize;  /* Amount of work space required by restart routine */
   int solveCorSize; /* work space for solve_correction and inner_solve */
   int solveHSize;   /* work space for solve_H                          */
   int mainSize;     /* work space for main_iter                        */
   Complex_Z *evecsHat=NULL;/* not NULL when evecsHat will be used        */
   Complex_Z t;        /* dummy variable */

   maxEvecsSize = primme->numOrthoConst + primme->numEvals;
 
   /* first determine real workspace */

   /*----------------------------------------------------------------------*/
   /* Compute the memory required by the main iteration data structures    */
   /*----------------------------------------------------------------------*/

   dataSize = primme->nLocal*primme->maxBasisSize  /* Size of V            */
      + primme->nLocal*primme->maxBasisSize        /* Size of W            */
      + primme->maxBasisSize*primme->maxBasisSize  /* Size of H            */
      + primme->maxBasisSize*primme->maxBasisSize  /* Size of hVecs        */
      + primme->restartingParams.maxPrevRetain*primme->maxBasisSize;
                                                   /* size of prevHVecs    */

   /*----------------------------------------------------------------------*/
   /* Add memory for Harmonic or Refined projection                        */
   /*----------------------------------------------------------------------*/
   if (primme->projectionParams.projection == primme_proj_harmonic ||
         primme->projectionParams.projection == primme_proj_refined) {

      dataSize += primme->nLocal*primme->maxBasisSize    /* Size of Q      */
         + primme->maxBasisSize*primme->maxBasisSize     /* Size of R      */
         + primme->maxBasisSize*primme->maxBasisSize;    /* Size of hU     */
      doubleSize += primme->maxBasisSize;                /* Size of hSVals */
   }


   /*----------------------------------------------------------------------*/
   /* Add also memory needed for JD skew projectors                        */
   /*----------------------------------------------------------------------*/
   if ( (primme->correctionParams.precondition && 
         primme->correctionParams.maxInnerIterations != 0 &&
         primme->correctionParams.projectors.RightQ &&
         primme->correctionParams.projectors.SkewQ          ) ) {

      dataSize = dataSize + 
         + primme->nLocal*maxEvecsSize             /* Size of evecsHat     */ 
         + maxEvecsSize*maxEvecsSize               /* Size of M            */
         + maxEvecsSize*maxEvecsSize;              /* Size of UDU          */
      evecsHat = &t; /* set not NULL */
   }

   /*----------------------------------------------------------------------*/
   /* Determine workspace required by init and its children                */
   /*----------------------------------------------------------------------*/

   initSize = init_basis_zprimme(NULL, primme->nLocal, 0, NULL, 0, NULL,
         0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, &primme->maxBasisSize,
         NULL, NULL, NULL, primme);

   /*----------------------------------------------------------------------*/
   /* Determine orthogalization workspace with and without locking.        */
   /*----------------------------------------------------------------------*/

   if (primme->locking) {
      orthoSize = ortho_zprimme(NULL, 0, NULL, 0, primme->maxBasisSize,
         primme->maxBasisSize+primme->maxBlockSize-1, NULL, primme->nLocal, 
         maxEvecsSize, primme->nLocal, NULL, 0.0, NULL, 0, primme);
   }
   else {
      orthoSize = ortho_zprimme(NULL, 0, NULL, 0, primme->maxBasisSize,
         primme->maxBasisSize+primme->maxBlockSize-1, NULL, primme->nLocal, 
         primme->numOrthoConst+1, primme->nLocal, NULL, 0.0, NULL, 0, primme);
   }

   /*----------------------------------------------------------------------*/
   /* Determine workspace required by solve_H and its children             */
   /*----------------------------------------------------------------------*/

   solveHSize = solve_H_zprimme(NULL, primme->maxBasisSize, 0, NULL, 0, NULL, 0, NULL,
         0, NULL, NULL, 0, 0, NULL, NULL, primme);

   /*----------------------------------------------------------------------*/
   /* Determine workspace required by solve_correction and its children    */
   /*----------------------------------------------------------------------*/

   solveCorSize = solve_correction_zprimme(NULL, NULL, NULL, NULL, NULL, 
                  NULL, NULL, maxEvecsSize, 0, NULL, NULL, NULL, NULL, 
                  primme->maxBasisSize, NULL, NULL, primme->maxBlockSize, 
                  1.0, 0.0, 1.0, NULL, NULL, 0, primme);

   /*----------------------------------------------------------------------*/
   /* Determine workspace required by solve_H and its children             */
   /*----------------------------------------------------------------------*/

   convSize = check_convergence_zprimme(NULL, primme->nLocal, 0, &t, 0,
         NULL, primme->numEvals, 0, 0, primme->maxBasisSize, NULL, NULL,
         NULL, 0.0, NULL, 0, NULL, primme);

   /*----------------------------------------------------------------------*/
   /* Determine workspace required by restarting and its children          */
   /*----------------------------------------------------------------------*/

   restartSize = restart_zprimme(NULL, NULL, primme->nLocal, primme->maxBasisSize, 0, NULL,
         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
         evecsHat, 0, NULL, 0, NULL, 0, NULL, NULL,
         NULL, &primme->numEvals, NULL, &primme->restartingParams.maxPrevRetain,
         primme->maxBasisSize, primme->initSize, NULL, &primme->maxBasisSize, NULL,
         primme->maxBasisSize, NULL, 0, NULL, 0, NULL,
         0, 0, NULL, 0, 0, NULL, NULL, 0.0,
         NULL, 0, NULL, primme);

   /*----------------------------------------------------------------------*/
   /* Determine workspace required by main_iter and its children           */
   /*----------------------------------------------------------------------*/

   mainSize = max(
         update_projection_zprimme(NULL, 0, NULL, 0, NULL, 0, 0, 0,
            primme->maxBasisSize, NULL, 0, primme),
         prepare_candidates_zprimme(NULL, NULL, primme->nLocal, primme->maxBasisSize,
            0, NULL, NULL, NULL, 0, NULL, NULL, primme->numEvals, primme->numEvals,
            NULL, 0, primme->maxBlockSize, NULL, primme->numEvals, NULL, NULL, 0.0,
            NULL, &primme->maxBlockSize, NULL, NULL, 0, NULL, primme));
 
 
   /*----------------------------------------------------------------------*/
   /* Workspace is reused in many functions. Allocate the max needed by any*/
   /*----------------------------------------------------------------------*/
   realWorkSize = Num_imax_primme(8,

      /* Workspace needed by init_basis */
      initSize,

      /* Workspace needed by solve_correction and its child inner_solve */
      solveCorSize, 

      /* Workspace needed by function solve_H */
      solveHSize,
   
      /* Workspace needed by function check_convergence */ 
      convSize,

      /* Workspace needed by function restart*/
      restartSize,

      /* Workspace needed by function verify_norms */
      2*primme->numEvals,

      /* maximum workspace needed by ortho */ 
      orthoSize,

      /* maximum workspace for main */
      mainSize);

   /*----------------------------------------------------------------------*/
   /* The following size is always allocated as double                     */
   /*----------------------------------------------------------------------*/

   doubleSize += 4                                 /* Padding              */
      + primme->maxBasisSize                       /* Size of hVals        */
      + primme->numEvals+primme->maxBasisSize      /* Size of prevRitzVals */
      + primme->maxBlockSize;                      /* Size of blockNorms   */

   /*----------------------------------------------------------------------*/
   /* Determine the integer workspace needed                               */
   /*----------------------------------------------------------------------*/

   intWorkSize = primme->maxBasisSize /* Size of flag               */
      + 2*primme->maxBlockSize        /* Size of iev and ilev       */
      + maxEvecsSize                  /* Size of ipivot             */
      + 5*primme->maxBasisSize;       /* Auxiliary permutation arrays */

   /*----------------------------------------------------------------------*/
   /* byte sizes:                                                          */
   /*----------------------------------------------------------------------*/
   
   rworkByteSize = (dataSize + realWorkSize)*sizeof(Complex_Z)
                                + doubleSize*sizeof(double); 

   /*----------------------------------------------------------------------*/
   /* If only the amount of required workspace is needed return it in bytes*/
   /*----------------------------------------------------------------------*/

   if (!allocate) {
      primme->intWorkSize  = intWorkSize*sizeof(int);
      primme->realWorkSize = rworkByteSize;
      return 1;
   }

   /*----------------------------------------------------------------------*/
   /* Allocate the required workspace, if the user did not provide enough  */
   /*----------------------------------------------------------------------*/
   if (primme->realWorkSize < rworkByteSize || primme->realWork == NULL) {
      if (primme->realWork != NULL) {
         free(primme->realWork);
      }
      primme->realWorkSize = rworkByteSize;
      primme->realWork = (void *) primme_valloc(rworkByteSize,"Real Alloc");
      if (primme->printLevel >= 5) fprintf(primme->outputFile, 
         "Allocating real workspace: %ld bytes\n", primme->realWorkSize);
   }

   if (primme->intWorkSize < intWorkSize*(int)sizeof(int) || primme->intWork==NULL) {
      if (primme->intWork != NULL) {
         free(primme->intWork);
      }
      primme->intWorkSize = intWorkSize*sizeof(int);
      primme->intWork= (int *)primme_valloc(primme->intWorkSize ,"Int Alloc");
      if (primme->printLevel >= 5) fprintf(primme->outputFile, 
         "Allocating integer workspace: %d bytes\n", primme->intWorkSize);
   }

   if (primme->intWork == NULL || primme->realWork == NULL) {

      primme_PushErrorMessage(Primme_allocate_workspace, Primme_malloc, 0, 
         __FILE__, __LINE__, primme);
      return MALLOC_FAILURE;
   }
      
   return 0;

  /***************************************************************************/
} /* end of allocate workspace
Example #8
0
int zprimme(double *evals, Complex_Z *evecs, double *resNorms, 
            primme_params *primme) {
      
   int ret;
   int *perm;
   double machEps;

   /* ------------------ */
   /* zero out the timer */
   /* ------------------ */
   primme_wTimer(1);

   /* ---------------------------- */
   /* Clear previous error reports */
   /* ---------------------------- */
   primme_DeleteStackTrace(primme);

   /* ----------------------- */
   /*  Find machine precision */
   /* ----------------------- */
   machEps = Num_dlamch_primme("E");

   /* ------------------ */
   /* Set some defaults  */
   /* ------------------ */
   primme_set_defaults(primme);

   /* -------------------------------------------------------------- */
   /* If needed, we are ready to estimate required memory and return */
   /* -------------------------------------------------------------- */
   if (evals == NULL && evecs == NULL && resNorms == NULL)
       return allocate_workspace(primme, FALSE);

   /* ----------------------------------------------------- */
   /* Reset random number seed if inappropriate for DLARENV */
   /* Yields unique quadruples per proc if procID < 4096^3  */
   /* ----------------------------------------------------- */

   if (primme->iseed[0]<0 || primme->iseed[0]>4095) primme->iseed[0] = 
      primme->procID % 4096;
   if (primme->iseed[1]<0 || primme->iseed[1]>4095) primme->iseed[1] = 
      (int)(primme->procID/4096+1) % 4096;
   if (primme->iseed[2]<0 || primme->iseed[2]>4095) primme->iseed[2] = 
      (int)((primme->procID/4096)/4096+2) % 4096;
   if (primme->iseed[3]<0 || primme->iseed[3]>4095) primme->iseed[3] = 
      (2*(int)(((primme->procID/4096)/4096)/4096)+1) % 4096;

   /* ----------------------- */
   /* Set default convTetFun  */
   /* ----------------------- */

   if (!primme->convTestFun) {
      primme->convTestFun = convTestFunAbsolute;
   }

   /* ------------------------------------------------------- */
   /* Check primme input data for bounds, correct values etc. */
   /* ------------------------------------------------------- */

   ret = check_input(evals, evecs, resNorms, primme);

   if (ret != 0) {
      primme_PushErrorMessage(Primme_zprimme, Primme_check_input, ret,
                      __FILE__, __LINE__, primme);
      primme->stats.elapsedTime = primme_wTimer(0);
      return ret;
   }
   
   /* ----------------------------------------------------------------------- */
   /* Compute AND allocate memory requirements for main_iter and subordinates */
   /* ----------------------------------------------------------------------- */

   ret = allocate_workspace(primme, TRUE);

   if (ret != 0) {
      primme_PushErrorMessage(Primme_zprimme, Primme_allocate_workspace, ret,
                      __FILE__, __LINE__, primme);
      primme->stats.elapsedTime = primme_wTimer(0);
      return ALLOCATE_WORKSPACE_FAILURE;
   }

   /* --------------------------------------------------------- */
   /* Allocate workspace that will be needed locally by zprimme */
   /* --------------------------------------------------------- */
   perm = (int *)primme_calloc((primme->numEvals), sizeof(int), "Perm array");

   if (perm == NULL) {
      primme_PushErrorMessage(Primme_zprimme, Primme_malloc, 0, 
                      __FILE__, __LINE__, primme);
      primme->stats.elapsedTime = primme_wTimer(0);
      return MALLOC_FAILURE;
   }

   /*----------------------------------------------------------------------*/
   /* Call the solver                                                      */
   /*----------------------------------------------------------------------*/

   ret = main_iter_zprimme(evals, perm, evecs, resNorms, machEps, 
                   primme->intWork, primme->realWork, primme);

   if (ret < 0) {
      primme_PushErrorMessage(Primme_zprimme, Primme_main_iter, 
                      ret, __FILE__, __LINE__, primme);
      primme->stats.elapsedTime = primme_wTimer(0);
      return MAIN_ITER_FAILURE;
   }
   /*----------------------------------------------------------------------*/

   /*----------------------------------------------------------------------*/
   /* If locking is engaged, the converged Ritz vectors are stored in the  */
   /* order they converged.  They must then be permuted so that they       */
   /* correspond to the sorted Ritz values in evals.                       */
   /*----------------------------------------------------------------------*/

   permute_vecs_zprimme(&evecs[primme->numOrthoConst], primme->nLocal,
         primme->initSize, primme->nLocal, perm, (Complex_Z*)primme->realWork,
         (int*)primme->intWork);

   free(perm);

   primme->stats.elapsedTime = primme_wTimer(0);
   return(0);
}
Example #9
0
static int apply_skew_projector(Complex_Z *Q, Complex_Z *Qhat, Complex_Z *UDU, 
   int *ipivot, int numCols, Complex_Z *v, Complex_Z *rwork, 
   primme_params *primme) {

   int count;
   Complex_Z ztmp;
   Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00}, tmone = {-1.0e+00,+0.0e00};

   if (numCols > 0) {    /* there is a projector to be applied */

      int ret;
      Complex_Z *overlaps;  /* overlaps of v with columns of Q   */
      Complex_Z *workSpace; /* Used for computing local overlaps */

      overlaps = rwork;
      workSpace = overlaps + numCols;

      /* --------------------------------------------------------*/
      /* Treat the one vector case with BLAS 1 calls             */
      /* --------------------------------------------------------*/
      if (numCols == 1) {
         /* Compute workspace = Q'*v */
         overlaps[0] = dist_dot(Q, 1, v, 1, primme);

	 /* Backsolve only if there is a skew projector */
	 if (UDU != NULL) {
	    if ( z_eq_primme(UDU[0], tzero) ) {
	       return UDUSOLVE_FAILURE;
	    }
	    z_div_primme(&overlaps[0], &overlaps[0], &UDU[0]);
	 }
         /* Compute v=v-Qhat*overlaps */
	 ztmp.r = - overlaps[0].r;
	 ztmp.i = - overlaps[0].i;
	 Num_axpy_zprimme(primme->nLocal, ztmp, Qhat, 1, v, 1);
      }
      else {
         /* ------------------------------------------------------*/
         /* More than one vectors. Use BLAS 2.                    */
         /* ------------------------------------------------------*/
         /* Compute workspace = Q'*v */
         Num_gemv_zprimme("C", primme->nLocal, numCols, tpone, Q, 
		      primme->nLocal, v, 1, tzero, workSpace, 1);

         /* Global sum: overlaps = Q'*v */
         // In Complex, the size of the array to globalSum is twice as large
         count = 2*numCols;
         (*primme->globalSumDouble)(workSpace, overlaps, &count, primme);   

         /* --------------------------------------------*/
         /* Backsolve only if there is a skew projector */
         /* --------------------------------------------*/
         if (UDU != NULL) {
            /* Solve (Q'Qhat)^{-1}*workSpace = overlaps = Q'*v for alpha by */
	    /* backsolving  with the UDU decomposition.                 */
   
            ret = UDUSolve_zprimme(UDU, ipivot, numCols, overlaps, workSpace);
            if (ret != 0) {
               primme_PushErrorMessage(Primme_apply_skew_projector,
                  Primme_udusolve, ret, __FILE__, __LINE__, primme);
               return UDUSOLVE_FAILURE;
            }
            /* Compute v=v-Qhat*workspace */
            Num_gemv_zprimme("N", primme->nLocal, numCols, tmone, Qhat, 
			 primme->nLocal, workSpace, 1, tpone, v, 1);
	 }
         else  {
            /* Compute v=v-Qhat*overlaps  */
            Num_gemv_zprimme("N", primme->nLocal, numCols, tmone, Qhat, 
			 primme->nLocal, overlaps, 1, tpone, v, 1);
	 } // UDU==null
      } // numCols != 1
   } // numCols > 0

   return 0;
}
Example #10
0
static int restart_H(Complex_Z *H, Complex_Z *hVecs, double *hVals, 
   int restartSize, int basisSize, Complex_Z *previousHVecs, 
   int numPrevRetained, int indexOfPreviousVecs, int rworkSize, 
   Complex_Z *rwork, primme_params *primme) {

   int i, j;          /* Loop variables                                       */
   int workSpaceSize; /* Workspace size needed by insert_submatrix            */
   int ret;           /* Return value                                         */
   Complex_Z *subMatrix;/* Contains the submatrix previousHVecs'*H*previousHvecs*/
   Complex_Z *workSpace;/* Workspace size needed                              */
   Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00};             /*constants*/
   
   /* ---------------------------------------------------------------------- */
   /* If coefficient vectors from the previous iteration were retained, then */
   /* set up work space for computing the numPrevRetained x numPrevRetained  */
   /* submatrix, then compute the submatrix.                                 */
   /* ---------------------------------------------------------------------- */

   if (numPrevRetained > 0) {
      subMatrix = rwork;
      workSpace = &rwork[numPrevRetained*numPrevRetained];
      workSpaceSize = rworkSize - numPrevRetained*numPrevRetained;
      compute_submatrix(previousHVecs, numPrevRetained, H, basisSize, 
         primme->maxBasisSize, subMatrix, workSpace);
   }
      
   /* ----------------------------------------------------------------- */
   /* V*hVecs yields a diagonal matrix composed of the Ritz values of A */
   /* with respect to V.  Set H to a diagonal matrix with the Ritz      */
   /* values on the diagonal.                                           */
   /* ----------------------------------------------------------------- */

   for (j=0; j < restartSize; j++) {
      for (i=0; i < j; i++) {
         H[primme->maxBasisSize*j+i] = tzero;
      }
      H[primme->maxBasisSize*j+j].r = hVals[j];
      H[primme->maxBasisSize*j+j].i = 0.0L;
   }

   /* --------------------------------------------------------------------- */
   /* Given the above H, we know the eigenvectors of H will be the standard */
   /* basis vectors if no previous coefficient vectors are retained         */
   /* --------------------------------------------------------------------- */

   for (j=0; j < restartSize; j++) {
      for (i=0; i < j; i++) {
          hVecs[restartSize*j+i] = tzero;
          hVecs[restartSize*i+j] = tzero;
      }
      hVecs[restartSize*j+j] = tpone;
   }      

   /* ---------------------------------------------------------------------- */
   /* If coefficient vectors from the previous iteration have been retained, */
   /* then insert the computed overlap matrix into the restarted H and solve */ 
   /* the resulting eigenproblem for the resulting H.                        */
   /* ---------------------------------------------------------------------- */

   if (numPrevRetained > 0) {
      ret = insert_submatrix(H, hVals, hVecs, restartSize, subMatrix, 
        numPrevRetained, indexOfPreviousVecs, workSpaceSize, workSpace, primme);

      if (ret != 0) {
         primme_PushErrorMessage(Primme_restart_h, Primme_insert_submatrix, 
            ret, __FILE__, __LINE__, primme);
         return INSERT_SUBMATRIX_FAILURE;
      }

   }

   return 0;
}
Example #11
0
int restart_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *H, Complex_Z *hVecs,
   double *hVals, int *flags, int *iev, Complex_Z *evecs, Complex_Z *evecsHat, 
   Complex_Z *M, Complex_Z *UDU, int *ipivot, int basisSize, int numConverged, 
   int *numConvergedStored, int numLocked, int numGuesses, 
   Complex_Z *previousHVecs, int numPrevRetained, double machEps, 
   Complex_Z *rwork, int rworkSize, primme_params *primme) {
  
   int numFree;             /* The number of basis vectors to be left free    */
   int numPacked;           /* The number of coefficient vectors moved to the */
                            /* end of the hVecs array.                        */
   int restartSize;         /* The number of vectors to restart with          */
   int indexOfPreviousVecs=0; /* Position within hVecs array the previous       */
                            /* coefficient vectors will be stored             */
   int i, n, eStart;        /* various variables                              */
   int ret;                 /* Return value                                   */

   numPacked = 0;

   /* --------------------------------------------------------------------- */
   /* If dynamic thick restarting is to be used, then determine the minimum */
   /* number of free spaces to be maintained and call the DTR routine.      */
   /* The DTR routine will determine how many coefficient vectors from the  */
   /* left and right of H-spectrum to retain at restart. If DTR is not used */
   /* then set the restart size to the minimum restart size.                */
   /* --------------------------------------------------------------------- */

   if (primme->restartingParams.scheme == primme_dtr) {
      numFree = numPrevRetained+max(3, primme->maxBlockSize);
      restartSize = dtr(numLocked, hVecs, hVals, flags, basisSize, numFree, 
                        iev, rwork, primme);
   }
   else {
      restartSize = min(basisSize, primme->minRestartSize);
   }

   /* ----------------------------------------------------------------------- */
   /* If locking is engaged, then swap coefficient vectors corresponding to   */
   /* converged Ritz vectors to the end of the hVecs(:, restartSize) subarray.*/
   /* This allows the converged Ritz vectors to be stored contiguously in     */
   /* memory after restart.  This significantly reduces the amount of data    */
   /* movement the locking routine would have to perform otherwise.           */
   /* The following function also covers some limit cases where restartSize   */
   /* plus 'to be locked' and previous Ritz vectors may exceed the basisSize  */
   /* ----------------------------------------------------------------------- */

   if (primme->locking) {
      numPacked = pack_converged_coefficients(&restartSize, basisSize, 
         &numPrevRetained, numLocked, numGuesses, hVecs, hVals, flags, primme);
   }

   /* ----------------------------------------------------------------------- */
   /* Restarting with a small number of coefficient vectors from the previous */
   /* iteration can be retained to accelerate convergence.  The previous      */
   /* coefficient vectors must be combined with the current coefficient       */
   /* vectors by first orthogonalizing the previous ones versus the current   */
   /* restartSize ones.  The orthogonalized previous vectors are then         */
   /* inserted into the hVecs array at hVecs(:,indexOfPreviousVecs).          */
   /* ----------------------------------------------------------------------- */

   if (numPrevRetained > 0) {
      indexOfPreviousVecs = combine_retained_vectors(hVals, flags, hVecs,
         basisSize, &restartSize, numPacked, previousHVecs, 
         &numPrevRetained, machEps, rwork, primme);
   }

   /* -------------------------------------------------------- */
   /* Restart V by replacing it with the current Ritz vectors. */
   /* -------------------------------------------------------- */

   restart_X(V, hVecs, primme->nLocal, basisSize, restartSize, rwork,rworkSize);
   
   /* ------------------------------------------------------------ */
   /* Restart W by replacing it with W times the eigenvectors of H */
   /* ------------------------------------------------------------ */

   restart_X(W, hVecs, primme->nLocal, basisSize, restartSize, rwork,rworkSize);

   /* ---------------------------------------------------------------- */
   /* Because we have replaced V by the Ritz vectors, V'*A*V should be */
   /* diagonal with the Ritz values on the diagonal.  The eigenvectors */
   /* of the new matrix V'*A*V become the standard basis vectors.      */
   /* ---------------------------------------------------------------- */

   ret = restart_H(H, hVecs, hVals, restartSize, basisSize, previousHVecs, 
      numPrevRetained, indexOfPreviousVecs, rworkSize, rwork, primme);

   if (ret != 0) {
      primme_PushErrorMessage(Primme_restart, Primme_restart_h, ret, __FILE__, 
         __LINE__, primme);
      return RESTART_H_FAILURE;
   }

   /* --------------------------------------------------------------------- */
   /* If the user requires (I-QQ') projectors in JDQMR without locking,     */
   /* the converged eigenvectors are copied temporarily to evecs. There     */
   /* they stay locked  for use in (I-QQ') and (I-K^{-1}Q () Q') projectors.*/
   /* NOTE THIS IS NOT LOCKING! The Ritz vectors remain in the basis, and   */
   /* they will overwrite evecs at the end.                                 */
   /* We recommend against this type of usage. It's better to use locking.  */
   /* --------------------------------------------------------------------- */

   /* Andreas NOTE: is done inefficiently for the moment. We should only */
   /* add the recently converged. But we need to differentiate them      */
   /* from flags...                                                      */

   if (!primme->locking && primme->correctionParams.maxInnerIterations != 0 && 
        numConverged > 0 &&
        (primme->correctionParams.projectors.LeftQ ||
         primme->correctionParams.projectors.RightQ )  ) {

       n = primme->nLocal;
       *numConvergedStored = 0;
       eStart = primme->numOrthoConst;

       for (i=0;i<primme->numEvals;i++) {
           if (flags[i] == CONVERGED) {
              if (*numConvergedStored < numConverged) {
                 Num_zcopy_zprimme(n, &V[i*n], 1, 
                              &evecs[(eStart+*numConvergedStored)*n], 1);
                 (*numConvergedStored)++;
              }
           } /* if converged */
       } /* for */
       if (*numConvergedStored != numConverged) {
          if (primme->printLevel >= 1 && primme->procID == 0) {
             fprintf(primme->outputFile, 
             "Flags and converged eigenpairs do not correspond %d %d\n",
                numConverged, *numConvergedStored);
          }
          return PSEUDOLOCK_FAILURE;
       }

      /* Update also the M = K^{-1}evecs and its udu factorization if needed */
      if (UDU != NULL) {

         apply_preconditioner_block(&evecs[eStart*n], &evecsHat[eStart*n], 
                                    numConverged, primme );
         /* rwork must be maxEvecsSize*numEvals! */
         update_projection_zprimme(evecs, evecsHat, M, eStart*n,
           primme->numOrthoConst+primme->numEvals, numConverged, rwork, primme);

         ret = UDUDecompose_zprimme(M, UDU, ipivot, eStart+numConverged, 
                         rwork, rworkSize, primme);
         if (ret != 0) {
            primme_PushErrorMessage(Primme_lock_vectors,Primme_ududecompose,ret,
               __FILE__, __LINE__, primme);
            return UDUDECOMPOSE_FAILURE;
         }
      } /* if UDU factorization is needed */
   } /* if this pseudo locking should take place */

   return restartSize;
}
Example #12
0
int dprimme(double *evals, double *evecs, double *resNorms, 
            primme_params *primme) {
      
   int ret;
   int *perm;
   double machEps;

   /* ------------------ */
   /* zero out the timer */
   /* ------------------ */
   primme_wTimer(1);

   /* ---------------------------- */
   /* Clear previous error reports */
   /* ---------------------------- */
   primme_DeleteStackTrace(primme);

   /* ----------------------- */
   /*  Find machine precision */
   /* ----------------------- */
   machEps = Num_dlamch_primme("E");

   /* ----------------------------------------- */
   /* Set some defaults for sequential programs */
   /* ----------------------------------------- */
   if (primme->numProcs == 1) {
      primme->nLocal = primme->n;
      primme->procID = 0;
      if (primme->globalSumDouble == NULL) 
         primme->globalSumDouble = primme_seq_globalSumDouble;
   }

   /* --------------------------------------------------------------------- */
   /* Decide on whether to use locking (hard locking), or not (soft locking)*/
   /* --------------------------------------------------------------------- */
   if (primme->target != primme_smallest &&
       primme->target != primme_largest ) {
      /* Locking is necessary as interior Ritz values can cross shifts */
      primme->locking = 1;
   }
   else {
      if (primme->locking == 0) {
         /* use locking when not enough vectors to restart with */
         primme->locking = (primme->numEvals > primme->minRestartSize);   
      }
   }

   /* -------------------------------------------------------------- */
   /* If needed, we are ready to estimate required memory and return */
   /* -------------------------------------------------------------- */
   if (evals == NULL && evecs == NULL && resNorms == NULL)
       return allocate_workspace(primme, FALSE);

   /* ----------------------------------------------------- */
   /* Reset random number seed if inappropriate for DLARENV */
   /* Yields unique quadruples per proc if procID < 4096^3  */
   /* ----------------------------------------------------- */

   if (primme->iseed[0]<0 || primme->iseed[0]>4095) primme->iseed[0] = 
      primme->procID % 4096;
   if (primme->iseed[1]<0 || primme->iseed[1]>4095) primme->iseed[1] = 
      (int)(primme->procID/4096+1) % 4096;
   if (primme->iseed[2]<0 || primme->iseed[2]>4095) primme->iseed[2] = 
      (int)((primme->procID/4096)/4096+2) % 4096;
   if (primme->iseed[3]<0 || primme->iseed[3]>4095) primme->iseed[3] = 
      (2*(int)(((primme->procID/4096)/4096)/4096)+1) % 4096;

   /* ------------------------------------------------------- */
   /* Check primme input data for bounds, correct values etc. */
   /* ------------------------------------------------------- */

   ret = check_input(evals, evecs, resNorms, primme);

   if (ret != 0) {
      primme_PushErrorMessage(Primme_dprimme, Primme_check_input, ret,
                      __FILE__, __LINE__, primme);
      primme->stats.elapsedTime = primme_wTimer(0);
      return ret;
   }
   
   /* ----------------------------------------------------------------------- */
   /* Compute AND allocate memory requirements for main_iter and subordinates */
   /* ----------------------------------------------------------------------- */

   ret = allocate_workspace(primme, TRUE);

   if (ret != 0) {
      primme_PushErrorMessage(Primme_dprimme, Primme_allocate_workspace, ret,
                      __FILE__, __LINE__, primme);
      primme->stats.elapsedTime = primme_wTimer(0);
      return ALLOCATE_WORKSPACE_FAILURE;
   }

   /* --------------------------------------------------------- */
   /* Allocate workspace that will be needed locally by dprimme */
   /* --------------------------------------------------------- */
   perm = (int *)primme_calloc((primme->numEvals), sizeof(int), "Perm array");

   if (perm == NULL) {
      primme_PushErrorMessage(Primme_dprimme, Primme_malloc, 0, 
                      __FILE__, __LINE__, primme);
      primme->stats.elapsedTime = primme_wTimer(0);
      return MALLOC_FAILURE;
   }

   /*----------------------------------------------------------------------*/
   /* Call the solver                                                      */
   /*----------------------------------------------------------------------*/

   ret = main_iter_dprimme(evals, perm, evecs, resNorms, machEps, 
                   primme->intWork, primme->realWork, primme);

   if (ret < 0) {
      primme_PushErrorMessage(Primme_dprimme, Primme_main_iter, 
                      ret, __FILE__, __LINE__, primme);
      primme->stats.elapsedTime = primme_wTimer(0);
      return MAIN_ITER_FAILURE;
   }
   /*----------------------------------------------------------------------*/

   /*----------------------------------------------------------------------*/
   /* If locking is engaged, the converged Ritz vectors are stored in the  */
   /* order they converged.  They must then be permuted so that they       */
   /* correspond to the sorted Ritz values in evals.                       */
   /*----------------------------------------------------------------------*/

   permute_evecs_dprimme(&evecs[primme->numOrthoConst], perm, 
        (double *) primme->realWork, primme->numEvals, primme->nLocal);

   free(perm);

   primme->stats.elapsedTime = primme_wTimer(0);
   return(0);
}
Example #13
0
static int allocate_workspace(primme_params *primme, int allocate) {

   long int realWorkSize;  /* Size of real work space.                  */
   long int rworkByteSize; /* Size of all real data in bytes            */

   int dataSize;     /* Number of double positions allocated, excluding */
                     /* doubles (see doubleSize below) and work space.  */
   int doubleSize;   /* Number of doubles allocated exclusively to the  */
                     /* double arrays: hVals, prevRitzVals, blockNorms  */
   int maxEvecsSize; /* Maximum number of vectors in evecs and evecsHat */
   int intWorkSize;  /* Size of integer work space in bytes             */
   int orthoSize;    /* Amount of work space required by ortho routine  */
   int solveCorSize; /* work space for solve_correction and inner_solve */

   maxEvecsSize = primme->numOrthoConst + primme->numEvals;

   /* first determine real workspace */

   /*----------------------------------------------------------------------*/
   /* Compute the memory required by the main iteration data structures    */
   /*----------------------------------------------------------------------*/

   dataSize = primme->nLocal*primme->maxBasisSize  /* Size of V            */
      + primme->nLocal*primme->maxBasisSize        /* Size of W            */
      + primme->maxBasisSize*primme->maxBasisSize  /* Size of H            */
      + primme->maxBasisSize*primme->maxBasisSize  /* Size of hVecs        */
      + primme->restartingParams.maxPrevRetain*primme->maxBasisSize;
                                                   /* size of prevHVecs    */

   /*----------------------------------------------------------------------*/
   /* Add also memory needed for JD skew projectors                        */
   /*----------------------------------------------------------------------*/
   if ( (primme->correctionParams.precondition && 
         primme->correctionParams.maxInnerIterations != 0 &&
         primme->correctionParams.projectors.RightQ &&
         primme->correctionParams.projectors.SkewQ          ) ) {

      dataSize = dataSize + 
         + primme->nLocal*maxEvecsSize             /* Size of evecsHat     */ 
         + maxEvecsSize*maxEvecsSize               /* Size of M            */
         + maxEvecsSize*maxEvecsSize;              /* Size of UDU          */
   }

   /*----------------------------------------------------------------------*/
   /* Determine orthogalization workspace with and without locking.        */
   /*----------------------------------------------------------------------*/

   if (primme->locking) {
      orthoSize = ortho_dprimme(NULL, primme->nLocal, primme->maxBasisSize,
         primme->maxBasisSize+primme->maxBlockSize-1, NULL, primme->nLocal, 
         maxEvecsSize, primme->nLocal, NULL, 0.0, NULL, 0, primme);
   }
   else {
      orthoSize = ortho_dprimme(NULL, primme->nLocal, primme->maxBasisSize,
         primme->maxBasisSize+primme->maxBlockSize-1, NULL, primme->nLocal, 
         primme->numOrthoConst+1, primme->nLocal, NULL, 0.0, NULL, 0, primme);
   }

   /*----------------------------------------------------------------------*/
   /* Determine workspace required by solve_correction and its children    */
   /*----------------------------------------------------------------------*/

   solveCorSize = solve_correction_dprimme(NULL, NULL, NULL, NULL, NULL, 
                  NULL, NULL, maxEvecsSize, 0, NULL, NULL, NULL, NULL, 
                  primme->maxBasisSize, NULL, NULL, primme->maxBlockSize, 
                  1.0, 0.0, 1.0, NULL, NULL, 0, primme);

   /*----------------------------------------------------------------------*/
   /* Workspace is reused in many functions. Allocate the max needed by any*/
   /*----------------------------------------------------------------------*/
   realWorkSize = Num_imax_primme(8,

      /* Workspace needed by init_basis */
      Num_imax_primme(3, 
         maxEvecsSize*primme->numOrthoConst, maxEvecsSize, orthoSize),

      /* Workspace needed by solve_correction and its child inner_solve */
      solveCorSize, 

      /* Workspace needed by function solve_H */
#ifdef ESSL
      2*primme->maxBasisSize +
         primme->maxBasisSize*(primme->maxBasisSize + 1)/2,
#else
      3*primme->maxBasisSize,
#endif
   
      /* Workspace needed by function check_convergence */ 
      max(primme->maxBasisSize*primme->maxBlockSize + primme->maxBlockSize,
                2*maxEvecsSize*primme->maxBlockSize),

      /* Workspace needed by function restart*/
      primme->restartingParams.maxPrevRetain*
      primme->restartingParams.maxPrevRetain  /* for submatrix of prev hvecs */
      + Num_imax_primme(4, primme->maxBasisSize, 
           3*primme->restartingParams.maxPrevRetain,
           primme->maxBasisSize*primme->restartingParams.maxPrevRetain,
           primme->maxBasisSize*primme->maxBasisSize,     /* for DTR copying */
           maxEvecsSize*primme->numEvals), /*this one is for UDU w/o locking */

      /* Workspace needed by function verify_norms */
      2*primme->numEvals,

      /* space needed by lock vectors (no need w/o lock but doesn't add any) */
      (2*primme->maxBasisSize) + Num_imax_primme(3, 
          maxEvecsSize*primme->maxBasisSize, orthoSize, 3*primme->maxBasisSize),

      /* maximum workspace needed by ortho */ 
      orthoSize);

   /*----------------------------------------------------------------------*/
   /* The following size is always alloced as double                       */
   /*----------------------------------------------------------------------*/

   doubleSize = primme->maxBasisSize               /* Size of hVals        */
      + primme->numEvals+primme->maxBasisSize      /* Size of prevRitzVals */
      + primme->maxBlockSize;                      /* Size of blockNorms   */

   /*----------------------------------------------------------------------*/
   /* Determine the integer workspace needed                               */
   /*----------------------------------------------------------------------*/

   intWorkSize = primme->maxBasisSize /* Size of flag               */
      + 2*primme->maxBlockSize        /* Size of iev and ilev       */
      + maxEvecsSize                  /* Size of ipivot             */
      + 2*primme->maxBasisSize;       /* Size of 2 perms in solve_H */

   /*----------------------------------------------------------------------*/
   /* byte sizes:                                                          */
   /*----------------------------------------------------------------------*/
   
   rworkByteSize = (dataSize + realWorkSize)*sizeof(double)
                                + doubleSize*sizeof(double); 

   /*----------------------------------------------------------------------*/
   /* If only the amount of required workspace is needed return it in bytes*/
   /*----------------------------------------------------------------------*/

   if (!allocate) {
      primme->intWorkSize  = intWorkSize*sizeof(int);
      primme->realWorkSize = rworkByteSize;
      return 1;
   }

   /*----------------------------------------------------------------------*/
   /* Allocate the required workspace, if the user did not provide enough  */
   /*----------------------------------------------------------------------*/
   if (primme->realWorkSize < rworkByteSize || primme->realWork == NULL) {
      if (primme->realWork != NULL) {
         free(primme->realWork);
      }
      primme->realWorkSize = rworkByteSize;
      primme->realWork = (void *) primme_valloc(rworkByteSize,"Real Alloc");
      if (primme->printLevel >= 5) fprintf(primme->outputFile, 
         "Allocating real workspace: %ld bytes\n", primme->realWorkSize);
   }

   if (primme->intWorkSize < intWorkSize*sizeof(int) || primme->intWork==NULL) {
      if (primme->intWork != NULL) {
         free(primme->intWork);
      }
      primme->intWorkSize = intWorkSize*sizeof(int);
      primme->intWork= (int *)primme_valloc(primme->intWorkSize ,"Int Alloc");
      if (primme->printLevel >= 5) fprintf(primme->outputFile, 
         "Allocating integer workspace: %d bytes\n", primme->intWorkSize);
   }

   if (primme->intWork == NULL || primme->realWork == NULL) {

      primme_PushErrorMessage(Primme_allocate_workspace, Primme_malloc, 0, 
         __FILE__, __LINE__, primme);
      return MALLOC_FAILURE;
   }
      
   return 0;

  /***************************************************************************/
} /* end of allocate workspace
Example #14
0
static int init_block_krylov(double *V, double *W, int dv1, int dv2,
                             double *locked, int numLocked, double machEps, double *rwork,
                             int rworkSize, primme_params *primme) {

    int i;               /* Loop variables */
    int numNewVectors;   /* Number of vectors to be generated */
    int ret;             /* Return code.                      */
    int ONE = 1;         /* Used for passing it by reference in matrixmatvec */

    numNewVectors = dv2 - dv1 + 1;

    /*----------------------------------------------------------------------*/
    /* Generate a single Krylov space if there are only a few vectors to be */
    /* generated, else generate a block Krylov space with                   */
    /* primme->maxBlockSize as the block Size.                              */
    /*----------------------------------------------------------------------*/

    if (numNewVectors <= primme->maxBlockSize) {

        /* Create and orthogonalize the inital vectors */

        Num_larnv_dprimme(2, primme->iseed,primme->nLocal,&V[primme->nLocal*dv1]);
        ret = ortho_dprimme(V, primme->nLocal, dv1, dv1, locked,
                            primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps,
                            rwork, rworkSize, primme);

        if (ret < 0) {
            primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho, ret,
                                    __FILE__, __LINE__, primme);
            return ORTHO_FAILURE;
        }

        /* Generate the remainder of the Krylov space. */

        for (i = dv1; i < dv2; i++) {
            (*primme->matrixMatvec)
            (&V[primme->nLocal*i], &V[primme->nLocal*(i+1)], &ONE, primme);
            Num_dcopy_dprimme(primme->nLocal, &V[primme->nLocal*(i+1)], 1,
                              &W[primme->nLocal*i], 1);
            ret = ortho_dprimme(V, primme->nLocal, i+1, i+1, locked,
                                primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps,
                                rwork, rworkSize, primme);

            if (ret < 0) {
                primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho,
                                        ret, __FILE__, __LINE__, primme);
                return ORTHO_FAILURE;
            }
        }

        primme->stats.numMatvecs += dv2-dv1;
        update_W_dprimme(V, W, dv2, 1, primme);

    }
    else {
        /*----------------------------------------------------------------------*/
        /* Generate the initial vectors.                                        */
        /*----------------------------------------------------------------------*/

        Num_larnv_dprimme(2, primme->iseed, primme->nLocal*primme->maxBlockSize,
                          &V[primme->nLocal*dv1]);
        ret = ortho_dprimme(V, primme->nLocal, dv1,
                            dv1+primme->maxBlockSize-1, locked, primme->nLocal, numLocked,
                            primme->nLocal, primme->iseed, machEps, rwork, rworkSize, primme);

        /* Generate the remaining vectors in the sequence */

        for (i = dv1+primme->maxBlockSize; i <= dv2; i++) {
            (*primme->matrixMatvec)(&V[primme->nLocal*(i-primme->maxBlockSize)],
                                    &V[primme->nLocal*i], &ONE, primme);
            Num_dcopy_dprimme(primme->nLocal, &V[primme->nLocal*i], 1,
                              &W[primme->nLocal*(i-primme->maxBlockSize)], 1);

            ret = ortho_dprimme(V, primme->nLocal, i, i, locked,
                                primme->nLocal, numLocked, primme->nLocal, primme->iseed, machEps,
                                rwork, rworkSize, primme);

            if (ret < 0) {
                primme_PushErrorMessage(Primme_init_block_krylov, Primme_ortho,
                                        ret, __FILE__, __LINE__, primme);
                return ORTHO_FAILURE;
            }

        }

        primme->stats.numMatvecs += dv2-(dv1+primme->maxBlockSize)+1;
        update_W_dprimme(V, W, dv2-primme->maxBlockSize+1, primme->maxBlockSize,
                         primme);

    }

    return 0;
}
Example #15
0
int init_basis_dprimme(double *V, double *W, double *evecs,
                       double *evecsHat, double *M, double *UDU, int *ipivot,
                       double machEps,  double *rwork, int rworkSize, int *basisSize,
                       int *nextGuess, int *numGuesses, double *timeForMV,
                       primme_params *primme) {

    int ret;          /* Return value                              */
    int currentSize;

    /*-----------------------------------------------------------------------*/
    /* Orthogonalize the orthogonalization constraints provided by the user. */
    /* If a preconditioner is given and inner iterations are to be           */
    /* performed, then initialize M.                                         */
    /*-----------------------------------------------------------------------*/

    if (primme->numOrthoConst > 0) {
        ret = ortho_dprimme(evecs, primme->nLocal, 0,
                            primme->numOrthoConst - 1, NULL, 0, 0, primme->nLocal,
                            primme->iseed, machEps, rwork, rworkSize, primme);

        /* Push an error message onto the stack trace if an error occured */
        if (ret < 0) {
            primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret,
                                    __FILE__, __LINE__, primme);
            return ORTHO_FAILURE;
        }

        /* Initialize evecsHat, M, and its factorization UDU,ipivot. This   */
        /* allows the orthogonalization constraints to be included in the   */
        /* projector (I-QQ'). Only needed if there is preconditioning, and  */
        /* JDqmr inner iterations with a right, skew projector. Only in     */
        /* that case, is UDU not NULL                                       */

        if (UDU != NULL) {

            (*primme->applyPreconditioner)
            (evecs, evecsHat, &primme->numOrthoConst, primme);
            primme->stats.numPreconds += primme->numOrthoConst;

            update_projection_dprimme(evecs, evecsHat, M, 0,
                                      primme->numOrthoConst+primme->numEvals, primme->numOrthoConst,
                                      rwork, primme);

            ret = UDUDecompose_dprimme(M, UDU, ipivot, primme->numOrthoConst,
                                       rwork, rworkSize, primme);

            if (ret != 0) {
                primme_PushErrorMessage(Primme_init_basis, Primme_ududecompose, ret,
                                        __FILE__, __LINE__, primme);
                return UDUDECOMPOSE_FAILURE;
            }

        }  /* if evecsHat and M=evecs'evecsHat, UDU are needed */

    }  /* if numOrthoCont >0 */


    /*-----------------------------------------------------------------------*/
    /* No locking                                                            */
    /*-----------------------------------------------------------------------*/
    if (!primme->locking) {

        /* Handle case when no initial guesses are provided by the user */
        if (primme->initSize == 0) {

            ret = init_block_krylov(V, W, 0, primme->minRestartSize - 1, evecs,
                                    primme->numOrthoConst, machEps, rwork, rworkSize, primme);

            /* Push an error message onto the stack trace if an error occured */
            if (ret < 0) {
                primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov,
                                        ret, __FILE__, __LINE__, primme);
                return INIT_BLOCK_KRYLOV_FAILURE;
            }

            *basisSize = primme->minRestartSize;

        }
        else {
            /* Handle case when some or all initial guesses are provided by */
            /* the user                                                     */

            /* Copy over the initial guesses provided by the user */
            Num_dcopy_dprimme(primme->nLocal*primme->initSize,
                              &evecs[primme->numOrthoConst*primme->nLocal], 1, V, 1);

            /* Orthonormalize the guesses provided by the user */

            ret = ortho_dprimme(V, primme->nLocal, 0, primme->initSize-1,
                                evecs, primme->nLocal, primme->numOrthoConst, primme->nLocal,
                                primme->iseed, machEps, rwork, rworkSize, primme);

            /* Push an error message onto the stack trace if an error occured */
            if (ret < 0) {
                primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret,
                                        __FILE__, __LINE__, primme);
                return ORTHO_FAILURE;
            }

            update_W_dprimme(V, W, 0, primme->initSize, primme);

            /* An insufficient number of initial guesses were provided by */
            /* the user.  Generate a block Krylov space to fill the       */
            /* remaining vacancies.                                       */

            if (primme->initSize < primme->minRestartSize) {

                ret = init_block_krylov(V, W, primme->initSize,
                                        primme->minRestartSize - 1, evecs, primme->numOrthoConst,
                                        machEps, rwork, rworkSize, primme);

                /* Push an error message onto the stack trace if an error occured */
                if (ret < 0) {
                    primme_PushErrorMessage(Primme_init_basis,
                                            Primme_init_block_krylov, ret, __FILE__, __LINE__, primme);
                    return INIT_KRYLOV_FAILURE;
                }

                *basisSize = primme->minRestartSize;
            }
            else {
                *basisSize = primme->initSize;
            }

        }

        *numGuesses = 0;
        *nextGuess = 0;

    }
    else {
        /*-----------------------------------------------------------------------*/
        /* Locking                                                               */
        /*-----------------------------------------------------------------------*/

        *numGuesses = primme->initSize;
        *nextGuess = primme->numOrthoConst;

        /* If some initial guesses are available, copy them to the basis       */
        /* and orthogonalize them against themselves and the orthogonalization */
        /* constraints.                                                        */

        if (primme->initSize > 0) {
            currentSize = min(primme->initSize, primme->minRestartSize);
            Num_dcopy_dprimme(primme->nLocal*currentSize,
                              &evecs[primme->numOrthoConst*primme->nLocal], 1, V, 1);

            ret = ortho_dprimme(V, primme->nLocal, 0, currentSize-1, evecs,
                                primme->nLocal, primme->numOrthoConst, primme->nLocal,
                                primme->iseed, machEps, rwork, rworkSize, primme);

            if (ret < 0) {
                primme_PushErrorMessage(Primme_init_basis, Primme_ortho, ret,
                                        __FILE__, __LINE__, primme);
                return ORTHO_FAILURE;
            }

            update_W_dprimme(V, W, 0, currentSize, primme);
            *numGuesses = *numGuesses - currentSize;
            *nextGuess = *nextGuess + currentSize;

        }
        else {
            currentSize = 0;
        }

        /* If an insufficient number of guesses was provided, then fill */
        /* the remaining vacancies with a block Krylov space.           */

        if (currentSize < primme->minRestartSize) {

            ret = init_block_krylov(V, W, currentSize, primme->minRestartSize - 1,
                                    evecs, primme->numOrthoConst, machEps, rwork, rworkSize, primme);

            if (ret < 0) {
                primme_PushErrorMessage(Primme_init_basis, Primme_init_block_krylov,
                                        ret, __FILE__, __LINE__, primme);
                return INIT_BLOCK_KRYLOV_FAILURE;
            }

        }

        *basisSize = primme->minRestartSize;
    }

    /* ----------------------------------------------------------- */
    /* If time measurements are needed, waste one MV + one Precond */
    /* Put dummy results in the first open space of W (currentSize)*/
    /* ----------------------------------------------------------- */
    if (primme->dynamicMethodSwitch) {
        currentSize = primme->nLocal*(*basisSize);
        ret = 1;
        *timeForMV = primme_wTimer(0);
        (*primme->matrixMatvec)(V, &W[currentSize], &ret, primme);
        *timeForMV = primme_wTimer(0) - *timeForMV;
        primme->stats.numMatvecs += 1;
    }

    return 0;
}