예제 #1
0
int solve_correction_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *evecs, 
   Complex_Z *evecsHat, Complex_Z *UDU, int *ipivot, double *lockedEvals, 
   int numLocked, int numConvergedStored, double *ritzVals, 
   double *prevRitzVals, int *flags, int basisSize, double *blockNorms, 
   int *iev, int blockSize, double eresTol, double machEps, 
   double aNormEstimate, Complex_Z *rwork, int *iwork, int rworkSize, 
   primme_params *primme) {

   int blockIndex;         /* Loop index.  Ranges from 0..blockSize-1.       */
   int ritzIndex;          /* Ritz value index blockIndex corresponds to.    */
                           /* Possible values range from 0..basisSize-1.     */
   int sortedIndex;        /* Ritz value index in sortedRitzVals, blockIndex */
   			   /* corresponds to. Range 0..numLocked+basisSize-1 */
   int neededRsize;        /* Needed size for rwork. If not enough return    */
   int linSolverRWorkSize; /* Size of the linSolverRWork array.              */
   int *ilev;              /* Array of size blockSize.  Maps the target Ritz */
                           /* values to their positions in the sortedEvals   */
                           /* array.                                         */
   int sizeLprojector;     /* Sizes of the various left/right projectors     */
   int sizeRprojectorQ;	   /* These will be 0/1/or numOrthConstr+numLocked   */
   int sizeRprojectorX;    /* or numOrthConstr+numConvergedStored w/o locking*/

   static int numPrevRitzVals = 0; /* Size of prevRitzVals                   */
   int ret;                /* Return code.                                   */
   Complex_Z *r, *x, *sol;  /* Residual, Ritz vector, and correction.         */
   Complex_Z *linSolverRWork;/* Workspace needed by linear solver.            */
   double *sortedRitzVals; /* Sorted array of current and converged Ritz     */
			   /* values.  Size of array is numLocked+basisSize. */
   double *blockOfShifts;  /* Shifts for (A-shiftI) or (if needed) (K-shiftI)*/
   double *approxOlsenEps; /* Shifts for approximate Olsen implementation    */
   Complex_Z *Kinvx;	   /* Workspace to store K^{-1}x                     */
   Complex_Z *Lprojector;   /* Q pointer for (I-Q*Q'). Usually points to evecs*/
   Complex_Z *RprojectorQ;  /* May point to evecs/evecsHat depending on skewQ */
   Complex_Z *RprojectorX;  /* May point to x/Kinvx depending on skewX        */

   Complex_Z xKinvx;                        /* Stores x'*K^{-1}x if needed    */
   double eval, shift, robustShift;       /* robust shift values.           */
   Complex_Z tmpShift;			  /* Temp shift for daxpy           */

   //------------------------------------------------------------
   // Subdivide the workspace with pointers, and figure out 
   // the total amount of needed real workspace (neededRsize)
   //------------------------------------------------------------

   /* needed worksize */
   neededRsize = 0;
   Kinvx       = rwork;
   /* Kinvx will have nonzero size if precond and both RightX and SkewX */
   if (primme->correctionParams.precondition &&        
       primme->correctionParams.projectors.RightX &&  
       primme->correctionParams.projectors.SkewX ) { 

      /* OLSEN's method requires a block, but JDQMR is vector by vector */
      if (primme->correctionParams.maxInnerIterations == 0) {    
	 sol = Kinvx + primme->nLocal*blockSize;
         neededRsize = neededRsize + primme->nLocal*blockSize;
      }
      else { 					         
         sol = Kinvx + primme->nLocal;
         neededRsize = neededRsize + primme->nLocal;
      }
   }
   else {
      sol = Kinvx + 0;
   }
   if (primme->correctionParams.maxInnerIterations == 0) {    
      linSolverRWork = sol + 0; 		  /* sol not needed for GD */
      linSolverRWorkSize = 0;			  /* No inner solver used  */
   }
   else {
      linSolverRWork = sol + primme->nLocal;      /* sol needed in innerJD */
      neededRsize = neededRsize + primme->nLocal;
      linSolverRWorkSize = 			  /* Inner solver worksize */
	      4*primme->nLocal + 2*(primme->numOrthoConst+primme->numEvals);
      neededRsize = neededRsize + linSolverRWorkSize;
   }
   sortedRitzVals = (double *)(linSolverRWork + linSolverRWorkSize);
   blockOfShifts  = sortedRitzVals + (numLocked+basisSize);
   approxOlsenEps = blockOfShifts  + blockSize;
   neededRsize = neededRsize + numLocked+basisSize + 2*blockSize;

   if (neededRsize > rworkSize) {
      return(neededRsize);
   }

   // Subdivide also the integer work space
   ilev = iwork;          // of size blockSize

   //------------------------------------------------------------
   //  Figuring out preconditioning shifts  (robust, Olsen, etc)
   //------------------------------------------------------------
   /* blockOfShifts will contain the preconditioning shifts:               */
   /* either Ritz values or robustShifts computed below. These shifts      */
   /* will be used in the correction equations or in inverting (K-sigma I) */
   /* approxOlsenEps will contain error approximations for eigenavalues    */
   /* to be used for Olsen's method (when innerIterations =0).             */
    
   if (primme->locking) {
      /* Combine the sorted list of locked Ritz values with the sorted  */
      /* list of current Ritz values, ritzVals.  The merging of the two */
      /* lists lockedEvals and ritzVals is stored in sortedRitzVals.    */

      mergeSort(lockedEvals, numLocked, ritzVals, flags, basisSize, 
         	   sortedRitzVals, ilev, blockSize, primme);
   }
   else {
      /* Then the sorted evals are simply the ritzVals, targeted as iev */

      sortedRitzVals = ritzVals;
      ilev = iev;
   }

   /*-----------------------------------------------------------------*/
   /* For interior eigenpairs, use the user provided shifts           */
   /*-----------------------------------------------------------------*/

   if (primme->target != primme_smallest && primme->target != primme_largest) {

      for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
         sortedIndex = ilev[blockIndex];
         blockOfShifts[blockIndex] = 
	    primme->targetShifts[ min(primme->numTargetShifts-1, sortedIndex) ];
         if (sortedIndex < numPrevRitzVals) {
            approxOlsenEps[blockIndex] = 
            fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]);
         }  
         else {
            approxOlsenEps[blockIndex] = blockNorms[blockIndex];
         }  
      } /* for loop */

   } /* user provided shifts */
   else {    
   /*-----------------------------------------------------------------*/
   /* else it is primme_smallest or primme_largest                    */
   /*-----------------------------------------------------------------*/

      if (primme->correctionParams.robustShifts) { 
         /*----------------------------------------------------*/
         /* Subtract/add a robust shift from/to the Ritz value */
         /*----------------------------------------------------*/

         /* Find the robust shift for each block vector */
         for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
   
	    sortedIndex = ilev[blockIndex];
	    eval = sortedRitzVals[sortedIndex];
   
            robustShift = computeRobustShift(blockIndex, 
	      blockNorms[blockIndex], prevRitzVals, numPrevRitzVals, 
	      sortedRitzVals, &approxOlsenEps[blockIndex], 
	      numLocked+basisSize, ilev, primme);
   
            /* Subtract/add the shift if looking for the smallest/largest  */
	    /* eigenvalues, Do not go beyond the previous computed eigval  */
       
            if (primme->target == primme_smallest) {
               blockOfShifts[blockIndex] = eval - robustShift;
	       if (sortedIndex > 0) blockOfShifts[blockIndex] = 
	          max(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]);
	    }
	    else {
               blockOfShifts[blockIndex] = eval + robustShift;
	       if (sortedIndex > 0) blockOfShifts[blockIndex] = 
		  min(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]);
            } // robust shifting 
   
         }  /* for loop */
   
      }  /* endif robust shifts */
      else {
         /*--------------------------------------------------------------*/
         /* Otherwise, the shifts for both preconditioner and correction */
         /* equation should be just the Ritz values. For Olsen's method, */
         /* the shifts for r-eps*x, are chosen as the difference in Ritz */
	 /* value between successive iterations.                         */
         /*--------------------------------------------------------------*/
   
         for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
            ritzIndex   =  iev[blockIndex];
	    sortedIndex = ilev[blockIndex];
            blockOfShifts[blockIndex] = ritzVals[ritzIndex];
 	    if (sortedIndex < numPrevRitzVals) {
               approxOlsenEps[blockIndex] = 
               fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]);
	    }
	    else {
               approxOlsenEps[blockIndex] = blockNorms[blockIndex]; 
	    }
         } /* for loop */
      } /* else no robust shifts */
   } /* else primme_smallest or primme_largest */

   
   /* Remember the previous ritz values*/

   numPrevRitzVals = numLocked+basisSize;
   Num_dcopy_primme(numPrevRitzVals, sortedRitzVals, 1, prevRitzVals, 1);

   // Equip the primme struct with the blockOfShifts, in case the user
   // wants to precondition (K-sigma_i I)^{-1} with a different shift 
   // for each vector

   primme->ShiftsForPreconditioner = blockOfShifts;

   //------------------------------------------------------------
   //  Generalized Davidson variants -- No inner iterations
   //------------------------------------------------------------
   if (primme->correctionParams.maxInnerIterations == 0) {
      // This is Generalized Davidson or approximate Olsen's method. 
      // Perform block preconditioning (with or without projections)
      
      r = &W[primme->nLocal*basisSize];    // All the block residuals
      x = &V[primme->nLocal*basisSize];    // All the block Ritz vectors
      
      if ( primme->correctionParams.projectors.RightX &&
	   primme->correctionParams.projectors.SkewX    ) {    
 	  // Compute exact Olsen's projected preconditioner. This is 
	  // expensive and rarely improves anything! Included for completeness.
	  
	  Olsen_preconditioner_block(r, x, blockSize, Kinvx, primme);
      }
      else {
         if ( primme->correctionParams.projectors.RightX ) {   
            // Compute a cheap approximation to OLSENS, where (x'Kinvr)/xKinvx 
	    // is approximated by e: Kinvr-e*Kinvx=Kinv(r-e*x)=Kinv(I-ct*x*x')r

    	    for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
	       // Compute r_i = r_i - err_i * x_i 
      	       {tmpShift.r = -approxOlsenEps[blockIndex]; tmpShift.i = 0.0L;}
               Num_axpy_zprimme(primme->nLocal, tmpShift,
	       &x[primme->nLocal*blockIndex],1,&r[primme->nLocal*blockIndex],1);
	    } //for
	 }

	 // GD: compute K^{-1}r , or approx.Olsen: K^{-1}(r-ex) 

         apply_preconditioner_block(r, x, blockSize, primme );
      }
   }
   //------------------------------------------------------------
   //  JDQMR --- JD inner-outer variants
   //------------------------------------------------------------
   else {  // maxInnerIterations > 0  We perform inner-outer JDQMR.

      /* Solve the correction for each block vector. */

      for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {

         r = &W[primme->nLocal*(basisSize+blockIndex)];
         x = &V[primme->nLocal*(basisSize+blockIndex)];

	 /* Set up the left/right/skew projectors for JDQMR.        */
	 /* The pointers Lprojector, Rprojector(Q/X) point to the   */
	 /* appropriate arrays for use in the projection step       */

	 setup_JD_projectors(x, r, evecs, evecsHat, Kinvx, &xKinvx, 
   	    &Lprojector, &RprojectorQ, &RprojectorX, 
	    &sizeLprojector, &sizeRprojectorQ, &sizeRprojectorX,
   	    numLocked, numConvergedStored, primme);

         /* Map the index of the block vector to its corresponding eigenvalue */
         /* index, and the shift for the correction equation. Also make the   */
	 /* shift available to primme, in case (K-shift I)^-1 is needed       */

         ritzIndex = iev[blockIndex];
         shift = blockOfShifts[blockIndex];
         primme->ShiftsForPreconditioner = &blockOfShifts[blockIndex];

         ret = inner_solve_zprimme(x, r, &blockNorms[blockIndex], evecs, 
	    evecsHat, UDU, ipivot, &xKinvx, Lprojector, RprojectorQ, 
	    RprojectorX, sizeLprojector, sizeRprojectorQ, sizeRprojectorX,
	    sol, ritzVals[ritzIndex], shift, eresTol, aNormEstimate, 
	    machEps, linSolverRWork, linSolverRWorkSize, primme);

         if (ret != 0) {
            primme_PushErrorMessage(Primme_solve_correction, Primme_inner_solve,
			    ret, __FILE__, __LINE__, primme);
            return (INNER_SOLVE_FAILURE);
         }

         Num_zcopy_zprimme(primme->nLocal, sol, 1, 
            &V[primme->nLocal*(basisSize+blockIndex)], 1);

      } // end for each block vector
   } // JDqmr variants

   return 0;

}
예제 #2
0
static void Olsen_preconditioner_block(Complex_Z *r, Complex_Z *x,
		int blockSize, Complex_Z *rwork, primme_params *primme) {

   int blockIndex, count;
   Complex_Z alpha;
   Complex_Z *Kinvx, *xKinvx, *xKinvr, *xKinvx_local, *xKinvr_local;
   Complex_Z ztmp;
   Complex_Z tzero = {+0.0e+00,+0.0e00};

   //------------------------------------------------------------------
   // Subdivide workspace
   //------------------------------------------------------------------
   Kinvx = rwork;
   xKinvx_local = Kinvx + primme->nLocal*blockSize;
   xKinvr_local = xKinvx_local + blockSize;
   xKinvx       = xKinvr_local + blockSize;
   xKinvr       = xKinvx + blockSize;
          
   //------------------------------------------------------------------
   // Compute K^{-1}x for block x. Kinvx memory requirement (blockSize*nLocal)
   //------------------------------------------------------------------

   apply_preconditioner_block(x, Kinvx, blockSize, primme );

   //------------------------------------------------------------------
   // Compute local x^TK^{-1}x and x^TK^{-1}r = (K^{-1}x)^Tr for each vector
   //------------------------------------------------------------------

   for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
      xKinvx_local[blockIndex] =
        Num_dot_zprimme(primme->nLocal, &x[primme->nLocal*blockIndex],1, 
                           &Kinvx[primme->nLocal*blockIndex],1);
      xKinvr_local[blockIndex] =
        Num_dot_zprimme(primme->nLocal, &Kinvx[primme->nLocal*blockIndex],1,
                                   &r[primme->nLocal*blockIndex],1);
   }      
   count = 4*blockSize;
   (*primme->globalSumDouble)(xKinvx_local, xKinvx, &count, primme);

   //------------------------------------------------------------------
   // Compute K^{-1}r
   //------------------------------------------------------------------

   apply_preconditioner_block(r, x, blockSize, primme );

   //------------------------------------------------------------------
   // Compute K^(-1)r  - ( xKinvr/xKinvx ) K^(-1)r for each vector
   //------------------------------------------------------------------

   for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
      if (z_abs_primme(xKinvx[blockIndex]) > 0.0L) 
      {
	 ztmp.r = -xKinvr[blockIndex].r;
	 ztmp.i = -xKinvr[blockIndex].i;
         z_div_primme(&alpha, &ztmp, &xKinvx[blockIndex]);
      }
      else   
         alpha = tzero;

      Num_axpy_zprimme(primme->nLocal,alpha,&Kinvx[primme->nLocal*blockIndex],
		      		       1, &x[primme->nLocal*blockIndex],1);
   } //for

} // of Olsen_preconditiner_block
예제 #3
0
int restart_zprimme(Complex_Z *V, Complex_Z *W, Complex_Z *H, Complex_Z *hVecs,
   double *hVals, int *flags, int *iev, Complex_Z *evecs, Complex_Z *evecsHat, 
   Complex_Z *M, Complex_Z *UDU, int *ipivot, int basisSize, int numConverged, 
   int *numConvergedStored, int numLocked, int numGuesses, 
   Complex_Z *previousHVecs, int numPrevRetained, double machEps, 
   Complex_Z *rwork, int rworkSize, primme_params *primme) {
  
   int numFree;             /* The number of basis vectors to be left free    */
   int numPacked;           /* The number of coefficient vectors moved to the */
                            /* end of the hVecs array.                        */
   int restartSize;         /* The number of vectors to restart with          */
   int indexOfPreviousVecs=0; /* Position within hVecs array the previous       */
                            /* coefficient vectors will be stored             */
   int i, n, eStart;        /* various variables                              */
   int ret;                 /* Return value                                   */

   numPacked = 0;

   /* --------------------------------------------------------------------- */
   /* If dynamic thick restarting is to be used, then determine the minimum */
   /* number of free spaces to be maintained and call the DTR routine.      */
   /* The DTR routine will determine how many coefficient vectors from the  */
   /* left and right of H-spectrum to retain at restart. If DTR is not used */
   /* then set the restart size to the minimum restart size.                */
   /* --------------------------------------------------------------------- */

   if (primme->restartingParams.scheme == primme_dtr) {
      numFree = numPrevRetained+max(3, primme->maxBlockSize);
      restartSize = dtr(numLocked, hVecs, hVals, flags, basisSize, numFree, 
                        iev, rwork, primme);
   }
   else {
      restartSize = min(basisSize, primme->minRestartSize);
   }

   /* ----------------------------------------------------------------------- */
   /* If locking is engaged, then swap coefficient vectors corresponding to   */
   /* converged Ritz vectors to the end of the hVecs(:, restartSize) subarray.*/
   /* This allows the converged Ritz vectors to be stored contiguously in     */
   /* memory after restart.  This significantly reduces the amount of data    */
   /* movement the locking routine would have to perform otherwise.           */
   /* The following function also covers some limit cases where restartSize   */
   /* plus 'to be locked' and previous Ritz vectors may exceed the basisSize  */
   /* ----------------------------------------------------------------------- */

   if (primme->locking) {
      numPacked = pack_converged_coefficients(&restartSize, basisSize, 
         &numPrevRetained, numLocked, numGuesses, hVecs, hVals, flags, primme);
   }

   /* ----------------------------------------------------------------------- */
   /* Restarting with a small number of coefficient vectors from the previous */
   /* iteration can be retained to accelerate convergence.  The previous      */
   /* coefficient vectors must be combined with the current coefficient       */
   /* vectors by first orthogonalizing the previous ones versus the current   */
   /* restartSize ones.  The orthogonalized previous vectors are then         */
   /* inserted into the hVecs array at hVecs(:,indexOfPreviousVecs).          */
   /* ----------------------------------------------------------------------- */

   if (numPrevRetained > 0) {
      indexOfPreviousVecs = combine_retained_vectors(hVals, flags, hVecs,
         basisSize, &restartSize, numPacked, previousHVecs, 
         &numPrevRetained, machEps, rwork, primme);
   }

   /* -------------------------------------------------------- */
   /* Restart V by replacing it with the current Ritz vectors. */
   /* -------------------------------------------------------- */

   restart_X(V, hVecs, primme->nLocal, basisSize, restartSize, rwork,rworkSize);
   
   /* ------------------------------------------------------------ */
   /* Restart W by replacing it with W times the eigenvectors of H */
   /* ------------------------------------------------------------ */

   restart_X(W, hVecs, primme->nLocal, basisSize, restartSize, rwork,rworkSize);

   /* ---------------------------------------------------------------- */
   /* Because we have replaced V by the Ritz vectors, V'*A*V should be */
   /* diagonal with the Ritz values on the diagonal.  The eigenvectors */
   /* of the new matrix V'*A*V become the standard basis vectors.      */
   /* ---------------------------------------------------------------- */

   ret = restart_H(H, hVecs, hVals, restartSize, basisSize, previousHVecs, 
      numPrevRetained, indexOfPreviousVecs, rworkSize, rwork, primme);

   if (ret != 0) {
      primme_PushErrorMessage(Primme_restart, Primme_restart_h, ret, __FILE__, 
         __LINE__, primme);
      return RESTART_H_FAILURE;
   }

   /* --------------------------------------------------------------------- */
   /* If the user requires (I-QQ') projectors in JDQMR without locking,     */
   /* the converged eigenvectors are copied temporarily to evecs. There     */
   /* they stay locked  for use in (I-QQ') and (I-K^{-1}Q () Q') projectors.*/
   /* NOTE THIS IS NOT LOCKING! The Ritz vectors remain in the basis, and   */
   /* they will overwrite evecs at the end.                                 */
   /* We recommend against this type of usage. It's better to use locking.  */
   /* --------------------------------------------------------------------- */

   /* Andreas NOTE: is done inefficiently for the moment. We should only */
   /* add the recently converged. But we need to differentiate them      */
   /* from flags...                                                      */

   if (!primme->locking && primme->correctionParams.maxInnerIterations != 0 && 
        numConverged > 0 &&
        (primme->correctionParams.projectors.LeftQ ||
         primme->correctionParams.projectors.RightQ )  ) {

       n = primme->nLocal;
       *numConvergedStored = 0;
       eStart = primme->numOrthoConst;

       for (i=0;i<primme->numEvals;i++) {
           if (flags[i] == CONVERGED) {
              if (*numConvergedStored < numConverged) {
                 Num_zcopy_zprimme(n, &V[i*n], 1, 
                              &evecs[(eStart+*numConvergedStored)*n], 1);
                 (*numConvergedStored)++;
              }
           } /* if converged */
       } /* for */
       if (*numConvergedStored != numConverged) {
          if (primme->printLevel >= 1 && primme->procID == 0) {
             fprintf(primme->outputFile, 
             "Flags and converged eigenpairs do not correspond %d %d\n",
                numConverged, *numConvergedStored);
          }
          return PSEUDOLOCK_FAILURE;
       }

      /* Update also the M = K^{-1}evecs and its udu factorization if needed */
      if (UDU != NULL) {

         apply_preconditioner_block(&evecs[eStart*n], &evecsHat[eStart*n], 
                                    numConverged, primme );
         /* rwork must be maxEvecsSize*numEvals! */
         update_projection_zprimme(evecs, evecsHat, M, eStart*n,
           primme->numOrthoConst+primme->numEvals, numConverged, rwork, primme);

         ret = UDUDecompose_zprimme(M, UDU, ipivot, eStart+numConverged, 
                         rwork, rworkSize, primme);
         if (ret != 0) {
            primme_PushErrorMessage(Primme_lock_vectors,Primme_ududecompose,ret,
               __FILE__, __LINE__, primme);
            return UDUDECOMPOSE_FAILURE;
         }
      } /* if UDU factorization is needed */
   } /* if this pseudo locking should take place */

   return restartSize;
}