int inner_solve_dprimme(double *x, double *r, double *rnorm, 
   double *evecs, double *evecsHat, double *UDU, int *ipivot, 
   double *xKinvx, double *Lprojector, double *RprojectorQ, 
   double *RprojectorX, int sizeLprojector, int sizeRprojectorQ, 
   int sizeRprojectorX, double *sol, double eval, double shift, 
   double eresTol, double aNormEstimate, double machEps, double *rwork, 
   int rworkSize, primme_params *primme) {

   int i;             /* loop variable                                       */
   int workSpaceSize; /* Size of local work array.                           */
   int numIts;        /* Number of inner iterations                          */
   int ret;           /* Return value used for error checking.               */
   int maxIterations; /* The maximum # iterations allowed. Depends on primme */

   double *workSpace; /* Workspace needed by UDU routine */

   /* QMR parameters */

   double *g, *d, *delta, *w, *ptmp;
   double alpha_prev, beta, rho_prev, rho;
   double Theta_prev, Theta, c, sigma_prev, tau_init, tau_prev, tau; 
   double ztmp;

   /* Parameters used to dynamically update eigenpair */
   double Beta, Delta, Psi, Beta_prev, Delta_prev, Psi_prev, eta;
   double dot_sol, eval_updated, eval_prev, eres2_updated, eres_updated, R;
   double Gamma_prev, Phi_prev;
   double Gamma, Phi;
   double gamma;

   /* The convergence criteria of the inner linear system must satisfy:       */
   /* || current residual || <= relativeTolerance * || initial residual ||    */
   /*                                               + absoluteTol             */

   double relativeTolerance; 
   double absoluteTolerance;
   double LTolerance, ETolerance;

   /* Some constants                                                          */
   double tpone = +1.0e+00, tzero = +0.0e+00;

   /* -------------------------------------------*/
   /* Subdivide the workspace into needed arrays */
   /* -------------------------------------------*/

   g      = rwork;
   d      = g + primme->nLocal;
   delta  = d + primme->nLocal;
   w      = delta + primme->nLocal;
   workSpace = w + primme->nLocal; /* This needs at least 2*numOrth+NumEvals) */
   
   workSpaceSize = rworkSize - (workSpace - rwork);
   
   /* -----------------------------------------*/
   /* Set up convergence criteria by Tolerance */
   /* -----------------------------------------*/

   if (primme->aNorm <= 0.0L) {
      absoluteTolerance = aNormEstimate*machEps;
      eresTol = eresTol*aNormEstimate;
   }
   else {
      absoluteTolerance = primme->aNorm*machEps;
   }
   tau_prev = tau_init = *rnorm;       /* Assumes zero initial guess */
   LTolerance = eresTol;

   /* Andreas: note that eigenresidual tol may not be achievable, because we */
   /* iterate on P(A-s)P not (A-s). But tau reflects linSys on P(A-s)P. */
   if (primme->correctionParams.convTest == primme_adaptive) {
      ETolerance = max(eresTol/1.8L, absoluteTolerance);
      LTolerance = ETolerance;
   }
   else if (primme->correctionParams.convTest == primme_adaptive_ETolerance) {
      LTolerance = max(eresTol/1.8L, absoluteTolerance);
      ETolerance = max(tau_init*0.1L, LTolerance);
   }
   else if (primme->correctionParams.convTest == primme_decreasing_LTolerance) {
      relativeTolerance = pow(primme->correctionParams.relTolBase, 
         (double)-primme->stats.numOuterIterations);
      LTolerance = relativeTolerance * tau_init 
                   + absoluteTolerance + eresTol;
   /*printf(" RL %e INI %e abso %e LToler %e aNormEstimate %e \n", */
   /*relativeTolerance, tau_init, absoluteTolerance,LTolerance,aNormEstimate);*/
   }
   
   /* --------------------------------------------------------*/
   /* Set up convergence criteria by max number of iterations */
   /* --------------------------------------------------------*/

   /* compute first total number of remaining matvecs */

   maxIterations = primme->maxMatvecs - primme->stats.numMatvecs;

   /* Perform primme.maxInnerIterations, but do not exceed total remaining */
   if (primme->correctionParams.maxInnerIterations > 0) {

      maxIterations = min(primme->correctionParams.maxInnerIterations, 
                          maxIterations);
   }

   /* --------------------------------------------------------*/
   /* Rest of initializations                                 */
   /* --------------------------------------------------------*/

   /* Assume zero initial guess */
   Num_dcopy_dprimme(primme->nLocal, r, 1, g, 1);

   ret = apply_projected_preconditioner(g, evecs, RprojectorQ, 
           x, RprojectorX, sizeRprojectorQ, sizeRprojectorX, 
           xKinvx, UDU, ipivot, d, workSpace, primme);

   if (ret != 0) {
      primme_PushErrorMessage(Primme_inner_solve, 
         Primme_apply_projected_preconditioner, ret, __FILE__, __LINE__, 
         primme);
      return APPLYPROJECTEDPRECONDITIONER_FAILURE;
   }
      
   Theta_prev = 0.0L;
   eval_prev = eval;
   rho_prev = dist_dot(g, 1, d, 1, primme);
      
   /* Initialize recurrences used to dynamically update the eigenpair */

   Beta_prev = Delta_prev = Psi_prev = 0.0L;
   Gamma_prev = Phi_prev = 0.0L;

   /* other initializations */
   for (i = 0; i < primme->nLocal; i++) {
      delta[i] = tzero;
      sol[i] = tzero;
   }

   numIts = 0;
      
   /*----------------------------------------------------------------------*/
   /*------------------------ Begin Inner Loop ----------------------------*/
   /*----------------------------------------------------------------------*/

   while (numIts < maxIterations) {

      apply_projected_matrix(d, shift, Lprojector, sizeLprojector, 
                             w, workSpace, primme);
      sigma_prev = dist_dot(d, 1, w, 1, primme);

      if (sigma_prev == 0.0L) {
         if (primme->printLevel >= 5 && primme->procID == 0) {
            fprintf(primme->outputFile,"Exiting because SIGMA %e\n",sigma_prev);
         }
         break;
      }

      alpha_prev = rho_prev/sigma_prev;
      if (fabs(alpha_prev) < machEps || fabs(alpha_prev) > 1.0L/machEps){
         if (primme->printLevel >= 5 && primme->procID == 0) {
            fprintf(primme->outputFile,"Exiting because ALPHA %e\n",alpha_prev);
         }
         break;
      }

      Num_axpy_dprimme(primme->nLocal, -alpha_prev, w, 1, g, 1);

      Theta = dist_dot(g, 1, g, 1, primme);
      Theta = sqrt(Theta);
      Theta = Theta/tau_prev;
      c = 1.0L/sqrt(1+Theta*Theta);
      tau = tau_prev*Theta*c;

      gamma = c*c*Theta_prev*Theta_prev;
      eta = alpha_prev*c*c;
      for (i = 0; i < primme->nLocal; i++) {
          delta[i] = gamma*delta[i] + eta*d[i];
          sol[i] = delta[i]+sol[i];
      }
      numIts++;

      if (fabs(rho_prev) == 0.0L ) {
         if (primme->printLevel >= 5 && primme->procID == 0) {
            fprintf(primme->outputFile,"Exiting because abs(rho) %e\n",
               fabs(rho_prev));
         }
         break;
      }
      
      if (tau < LTolerance) {
         if (primme->printLevel >= 5 && primme->procID == 0) {
            fprintf(primme->outputFile, " tau < LTol %e %e\n",tau, LTolerance);
         }
         break;
      }
      else if (primme->correctionParams.convTest == primme_adaptive_ETolerance
            || primme->correctionParams.convTest == primme_adaptive) {
         /* --------------------------------------------------------*/
         /* Adaptive stopping based on dynamic monitoring of eResid */
         /* --------------------------------------------------------*/

         /* Update the Ritz value and eigenresidual using the */
         /* following recurrences.                            */
      
         Delta = gamma*Delta_prev + eta*rho_prev;
         Beta = Beta_prev - Delta;
         Phi = gamma*gamma*Phi_prev + eta*eta*sigma_prev;
         Psi = gamma*Psi_prev + gamma*Phi_prev;
         Gamma = Gamma_prev + 2.0L*Psi + Phi;
        
         /* Perform the update: update the eigenvalue and the square of the  */
         /* residual norm.                                                   */
         
         dot_sol = dist_dot(sol, 1, sol, 1, primme);
         eval_updated = shift + (eval - shift + 2*Beta + Gamma)/(1 + dot_sol);
         eres2_updated = (tau*tau)/(1 + dot_sol) + 
            ((eval - shift + Beta)*(eval - shift + Beta))/(1 + dot_sol) - 
            (eval_updated - shift)*(eval_updated - shift);

         /* If numerical problems, let eres about the same as tau */
         if (eres2_updated < 0){
            eres_updated = sqrt( (tau*tau)/(1 + dot_sol) );
         }
         else 
            eres_updated = sqrt(eres2_updated);

         /* --------------------------------------------------------*/
         /* Stopping criteria                                       */
         /* --------------------------------------------------------*/

         R = max(0.9878, sqrt(tau/tau_prev))*sqrt(1+dot_sol);
        
         if ( tau <= R*eres_updated || eres_updated <= tau*R ) {
            if (primme->printLevel >= 5 && primme->procID == 0) {
               fprintf(primme->outputFile, " tau < R eres \n");
            }
            break;
         }

         if (primme->target == primme_smallest && eval_updated > eval_prev) {
            if (primme->printLevel >= 5 && primme->procID == 0) {
               fprintf(primme->outputFile, "eval_updated > eval_prev\n");
            }
            break;
         }
         else if (primme->target == primme_largest && eval_updated < eval_prev){
            if (primme->printLevel >= 5 && primme->procID == 0) {
               fprintf(primme->outputFile, "eval_updated < eval_prev\n");
            }
            break;
         }
         
         if (eres_updated < ETolerance) {    /* tau < LTol has been checked */
            if (primme->printLevel >= 5 && primme->procID == 0) {
               fprintf(primme->outputFile, "eres < eresTol %e \n",eres_updated);
            }
            break;
         }

         eval_prev = eval_updated;

         if (primme->printLevel >= 4 && primme->procID == 0) {
            fprintf(primme->outputFile,
           "INN MV %d Sec %e Eval %e Lin|r| %.3e EV|r| %.3e\n", primme->stats.
            numMatvecs, primme_wTimer(0), eval_updated, tau, eres_updated);
            fflush(primme->outputFile);
         }

        /* --------------------------------------------------------*/
      } /* End of if adaptive JDQMR section                        */
        /* --------------------------------------------------------*/
      else if (primme->printLevel >= 4 && primme->procID == 0) {
        /* Report for non adaptive inner iterations */
        fprintf(primme->outputFile,
           "INN MV %d Sec %e Lin|r| %e\n", primme->stats.numMatvecs,
           primme_wTimer(0),tau);
        fflush(primme->outputFile);
      }

      if (numIts < maxIterations) {

         ret = apply_projected_preconditioner(g, evecs, RprojectorQ, 
            x, RprojectorX, sizeRprojectorQ, sizeRprojectorX, 
            xKinvx, UDU, ipivot, w, workSpace, primme);

         if (ret != 0) {
            primme_PushErrorMessage(Primme_inner_solve, 
               Primme_apply_projected_preconditioner, ret, __FILE__, __LINE__, 
               primme);
               ret = APPLYPROJECTEDPRECONDITIONER_FAILURE;
               break;
         }
         rho = dist_dot(g, 1, w, 1, primme);
         beta = rho/rho_prev;
         Num_axpy_dprimme(primme->nLocal, beta, d, 1, w, 1);
         /* Alternate between w and d buffers in successive iterations
          * This saves a memory copy. */
         ptmp = d; d = w; w = ptmp;
      
         rho_prev = rho; 
         tau_prev = tau;
         Theta_prev = Theta;

         Delta_prev = Delta;
         Beta_prev = Beta;
         Phi_prev = Phi;
         Psi_prev = Psi;
         Gamma_prev = Gamma;
      }

     /* --------------------------------------------------------*/
   } /* End of QMR main while loop                              */
     /* --------------------------------------------------------*/

   *rnorm = eres_updated;
   return 0;
}
static int apply_skew_projector(double *Q, double *Qhat, double *UDU, 
   int *ipivot, int numCols, double *v, double *rwork, 
   primme_params *primme) {

   int count;
   double tpone = +1.0e+00, tzero = +0.0e+00, tmone = -1.0e+00;

   if (numCols > 0) {    /* there is a projector to be applied */

      int ret;
      double *overlaps;  /* overlaps of v with columns of Q   */
      double *workSpace; /* Used for computing local overlaps */

      overlaps = rwork;
      workSpace = overlaps + numCols;

      /* --------------------------------------------------------*/
      /* Treat the one vector case with BLAS 1 calls             */
      /* --------------------------------------------------------*/
      if (numCols == 1) {
         /* Compute workspace = Q'*v */
         overlaps[0] = dist_dot(Q, 1, v, 1, primme);

         /* Backsolve only if there is a skew projector */
         if (UDU != NULL) {
            if (UDU[0] == 0.0L) {
               return UDUSOLVE_FAILURE;
            }
            overlaps[0] = overlaps[0]/UDU[0];
         }
         /* Compute v=v-Qhat*overlaps */
         Num_axpy_dprimme(primme->nLocal, -overlaps[0], Qhat, 1, v, 1);
      }
      else {
         /* ------------------------------------------------------*/
         /* More than one vectors. Use BLAS 2.                    */
         /* ------------------------------------------------------*/
         /* Compute workspace = Q'*v */
         Num_gemv_dprimme("C", primme->nLocal, numCols, tpone, Q, 
                      primme->nLocal, v, 1, tzero, workSpace, 1);

         /* Global sum: overlaps = Q'*v */
         count = numCols;
         (*primme->globalSumDouble)(workSpace, overlaps, &count, primme);   

         /* --------------------------------------------*/
         /* Backsolve only if there is a skew projector */
         /* --------------------------------------------*/
         if (UDU != NULL) {
            /* Solve (Q'Qhat)^{-1}*workSpace = overlaps = Q'*v for alpha by */
            /* backsolving  with the UDU decomposition.                 */
   
            ret = UDUSolve_dprimme(UDU, ipivot, numCols, overlaps, workSpace);
            if (ret != 0) {
               primme_PushErrorMessage(Primme_apply_skew_projector,
                  Primme_udusolve, ret, __FILE__, __LINE__, primme);
               return UDUSOLVE_FAILURE;
            }
            /* Compute v=v-Qhat*workspace */
            Num_gemv_dprimme("N", primme->nLocal, numCols, tmone, Qhat, 
                         primme->nLocal, workSpace, 1, tpone, v, 1);
         }
         else  {
            /* Compute v=v-Qhat*overlaps  */
            Num_gemv_dprimme("N", primme->nLocal, numCols, tmone, Qhat, 
                         primme->nLocal, overlaps, 1, tpone, v, 1);
         } /* UDU==null */
      } /* numCols != 1 */
   } /* numCols > 0 */

   return 0;
}
Exemple #3
0
static int apply_skew_projector(Complex_Z *Q, Complex_Z *Qhat, Complex_Z *UDU, 
   int *ipivot, int numCols, Complex_Z *v, Complex_Z *rwork, 
   primme_params *primme) {

   int count;
   Complex_Z ztmp;
   Complex_Z tpone = {+1.0e+00,+0.0e00}, tzero = {+0.0e+00,+0.0e00}, tmone = {-1.0e+00,+0.0e00};

   if (numCols > 0) {    /* there is a projector to be applied */

      int ret;
      Complex_Z *overlaps;  /* overlaps of v with columns of Q   */
      Complex_Z *workSpace; /* Used for computing local overlaps */

      overlaps = rwork;
      workSpace = overlaps + numCols;

      /* --------------------------------------------------------*/
      /* Treat the one vector case with BLAS 1 calls             */
      /* --------------------------------------------------------*/
      if (numCols == 1) {
         /* Compute workspace = Q'*v */
         overlaps[0] = dist_dot(Q, 1, v, 1, primme);

	 /* Backsolve only if there is a skew projector */
	 if (UDU != NULL) {
	    if ( z_eq_primme(UDU[0], tzero) ) {
	       return UDUSOLVE_FAILURE;
	    }
	    z_div_primme(&overlaps[0], &overlaps[0], &UDU[0]);
	 }
         /* Compute v=v-Qhat*overlaps */
	 ztmp.r = - overlaps[0].r;
	 ztmp.i = - overlaps[0].i;
	 Num_axpy_zprimme(primme->nLocal, ztmp, Qhat, 1, v, 1);
      }
      else {
         /* ------------------------------------------------------*/
         /* More than one vectors. Use BLAS 2.                    */
         /* ------------------------------------------------------*/
         /* Compute workspace = Q'*v */
         Num_gemv_zprimme("C", primme->nLocal, numCols, tpone, Q, 
		      primme->nLocal, v, 1, tzero, workSpace, 1);

         /* Global sum: overlaps = Q'*v */
         // In Complex, the size of the array to globalSum is twice as large
         count = 2*numCols;
         (*primme->globalSumDouble)(workSpace, overlaps, &count, primme);   

         /* --------------------------------------------*/
         /* Backsolve only if there is a skew projector */
         /* --------------------------------------------*/
         if (UDU != NULL) {
            /* Solve (Q'Qhat)^{-1}*workSpace = overlaps = Q'*v for alpha by */
	    /* backsolving  with the UDU decomposition.                 */
   
            ret = UDUSolve_zprimme(UDU, ipivot, numCols, overlaps, workSpace);
            if (ret != 0) {
               primme_PushErrorMessage(Primme_apply_skew_projector,
                  Primme_udusolve, ret, __FILE__, __LINE__, primme);
               return UDUSOLVE_FAILURE;
            }
            /* Compute v=v-Qhat*workspace */
            Num_gemv_zprimme("N", primme->nLocal, numCols, tmone, Qhat, 
			 primme->nLocal, workSpace, 1, tpone, v, 1);
	 }
         else  {
            /* Compute v=v-Qhat*overlaps  */
            Num_gemv_zprimme("N", primme->nLocal, numCols, tmone, Qhat, 
			 primme->nLocal, overlaps, 1, tpone, v, 1);
	 } // UDU==null
      } // numCols != 1
   } // numCols > 0

   return 0;
}