C++ (Cpp) KSP_PCApplyBAorAB Beispiele

Programmiersprache: C++ (Cpp)

Methode / Funktion: KSP_PCApplyBAorAB

Beispiele auf hotexamples.com: 11

C++ (Cpp) KSP_PCApplyBAorAB - 11 Beispiele gefunden. Dies sind die am besten bewerteten C++ (Cpp) Beispiele für die KSP_PCApplyBAorAB, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: pgmres.c Projekt: feelpp/debian-petsc

static PetscErrorCode KSPPGMRESCycle(PetscInt *itcount,KSP ksp)
{
  KSP_PGMRES     *pgmres = (KSP_PGMRES*)(ksp->data);
  PetscReal      res_norm,res,newnorm;
  PetscErrorCode ierr;
  PetscInt       it     = 0,j,k;
  PetscBool      hapend = PETSC_FALSE;

  PetscFunctionBegin;
  if (itcount) *itcount = 0;
  ierr   = VecNormalize(VEC_VV(0),&res_norm);CHKERRQ(ierr);
  res    = res_norm;
  *RS(0) = res_norm;

  /* check for the convergence */
  ierr       = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
  ksp->rnorm = res;
  ierr       = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
  pgmres->it = it-2;
  ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);
  ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr);
  if (!res) {
    ksp->reason = KSP_CONVERGED_ATOL;
    ierr        = PetscInfo(ksp,"Converged due to zero residual norm on entry\n");CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
  for (; !ksp->reason; it++) {
    Vec Zcur,Znext;
    if (pgmres->vv_allocated <= it + VEC_OFFSET + 1) {
      ierr = KSPGMRESGetNewVectors(ksp,it+1);CHKERRQ(ierr);
    }
    /* VEC_VV(it-1) is orthogonal, it will be normalized once the VecNorm arrives. */
    Zcur  = VEC_VV(it);         /* Zcur is not yet orthogonal, but the VecMDot to orthogonalize it has been started. */
    Znext = VEC_VV(it+1);       /* This iteration will compute Znext, update with a deferred correction once we know how
                                 * Zcur relates to the previous vectors, and start the reduction to orthogonalize it. */

    if (it < pgmres->max_k+1 && ksp->its+1 < PetscMax(2,ksp->max_it)) { /* We don't know whether what we have computed is enough, so apply the matrix. */
      ierr = KSP_PCApplyBAorAB(ksp,Zcur,Znext,VEC_TEMP_MATOP);CHKERRQ(ierr);
    }

    if (it > 1) {               /* Complete the pending reduction */
      ierr           = VecNormEnd(VEC_VV(it-1),NORM_2,&newnorm);CHKERRQ(ierr);
      *HH(it-1,it-2) = newnorm;
    }
    if (it > 0) {               /* Finish the reduction computing the latest column of H */
      ierr = VecMDotEnd(Zcur,it,&(VEC_VV(0)),HH(0,it-1));CHKERRQ(ierr);
    }

    if (it > 1) {
      /* normalize the base vector from two iterations ago, basis is complete up to here */
      ierr = VecScale(VEC_VV(it-1),1./ *HH(it-1,it-2));CHKERRQ(ierr);

      ierr       = KSPPGMRESUpdateHessenberg(ksp,it-2,&hapend,&res);CHKERRQ(ierr);
      pgmres->it = it-2;
      ksp->its++;
      ksp->rnorm = res;

      ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
      if (it < pgmres->max_k+1 || ksp->reason || ksp->its == ksp->max_it) {  /* Monitor if we are done or still iterating, but not before a restart. */
        ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);
        ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr);
      }
      if (ksp->reason) break;
      /* Catch error in happy breakdown and signal convergence and break from loop */
      if (hapend) {
        if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res);
        else {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          break;
        }
      }

      if (!(it < pgmres->max_k+1 && ksp->its < ksp->max_it)) break;

      /* The it-2 column of H was not scaled when we computed Zcur, apply correction */
      ierr = VecScale(Zcur,1./ *HH(it-1,it-2));CHKERRQ(ierr);
      /* And Znext computed in this iteration was computed using the under-scaled Zcur */
      ierr = VecScale(Znext,1./ *HH(it-1,it-2));CHKERRQ(ierr);

      /* In the previous iteration, we projected an unnormalized Zcur against the Krylov basis, so we need to fix the column of H resulting from that projection. */
      for (k=0; k<it; k++) *HH(k,it-1) /= *HH(it-1,it-2);
      /* When Zcur was projected against the Krylov basis, VV(it-1) was still not normalized, so fix that too. This
       * column is complete except for HH(it,it-1) which we won't know until the next iteration. */
      *HH(it-1,it-1) /= *HH(it-1,it-2);
    }

    if (it > 0) {
      PetscScalar *work;
      if (!pgmres->orthogwork) {ierr = PetscMalloc((pgmres->max_k + 2)*sizeof(PetscScalar),&pgmres->orthogwork);CHKERRQ(ierr);}
      work = pgmres->orthogwork;
      /* Apply correction computed by the VecMDot in the last iteration to Znext. The original form is
       *
       *   Znext -= sum_{j=0}^{i-1} Z[j+1] * H[j,i-1]
       *
       * where
       *
       *   Z[j] = sum_{k=0}^j V[k] * H[k,j-1]
       *
       * substituting
       *
       *   Znext -= sum_{j=0}^{i-1} sum_{k=0}^{j+1} V[k] * H[k,j] * H[j,i-1]
       *
       * rearranging the iteration space from row-column to column-row
       *
       *   Znext -= sum_{k=0}^i sum_{j=k-1}^{i-1} V[k] * H[k,j] * H[j,i-1]
       *
       * Note that column it-1 of HH is correct. For all previous columns, we must look at HES because HH has already
       * been transformed to upper triangular form.
       */
      for (k=0; k<it+1; k++) {
        work[k] = 0;
        for (j=PetscMax(0,k-1); j<it-1; j++) work[k] -= *HES(k,j) * *HH(j,it-1);
      }
      ierr = VecMAXPY(Znext,it+1,work,&VEC_VV(0));CHKERRQ(ierr);
      ierr = VecAXPY(Znext,-*HH(it-1,it-1),Zcur);CHKERRQ(ierr);

      /* Orthogonalize Zcur against existing basis vectors. */
      for (k=0; k<it; k++) work[k] = -*HH(k,it-1);
      ierr = VecMAXPY(Zcur,it,work,&VEC_VV(0));CHKERRQ(ierr);
      /* Zcur is now orthogonal, and will be referred to as VEC_VV(it) again, though it is still not normalized. */
      /* Begin computing the norm of the new vector, will be normalized after the MatMult in the next iteration. */
      ierr = VecNormBegin(VEC_VV(it),NORM_2,&newnorm);CHKERRQ(ierr);
    }

    /* Compute column of H (to the diagonal, but not the subdiagonal) to be able to orthogonalize the newest vector. */
    ierr = VecMDotBegin(Znext,it+1,&VEC_VV(0),HH(0,it));CHKERRQ(ierr);

    /* Start an asynchronous split-mode reduction, the result of the MDot and Norm will be collected on the next iteration. */
    ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Znext));CHKERRQ(ierr);
  }

  if (itcount) *itcount = it-1; /* Number of iterations actually completed. */

  /*
    Down here we have to solve for the "best" coefficients of the Krylov
    columns, add the solution values together, and possibly unwind the
    preconditioning from the solution
   */
  /* Form the solution (or the solution so far) */
  ierr = KSPPGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-2);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

Beispiel #2

Datei anzeigen

Datei: tcqmr.c Projekt: firedrakeproject/petsc

static PetscErrorCode KSPSolve_TCQMR(KSP ksp)
{
  PetscReal      rnorm0,rnorm,dp1,Gamma;
  PetscScalar    theta,ep,cl1,sl1,cl,sl,sprod,tau_n1,f;
  PetscScalar    deltmp,rho,beta,eptmp,ta,s,c,tau_n,delta;
  PetscScalar    dp11,dp2,rhom1,alpha,tmp;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  ksp->its = 0;

  ierr = KSPInitialResidual(ksp,x,u,v,r,b);CHKERRQ(ierr);
  ierr = VecNorm(r,NORM_2,&rnorm0);CHKERRQ(ierr);          /*  rnorm0 = ||r|| */
  KSPCheckNorm(ksp,rnorm0);

  ierr = (*ksp->converged)(ksp,0,rnorm0,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
  if (ksp->reason) PetscFunctionReturn(0);

  ierr  = VecSet(um1,0.0);CHKERRQ(ierr);
  ierr  = VecCopy(r,u);CHKERRQ(ierr);
  rnorm = rnorm0;
  tmp   = 1.0/rnorm; ierr = VecScale(u,tmp);CHKERRQ(ierr);
  ierr  = VecSet(vm1,0.0);CHKERRQ(ierr);
  ierr  = VecCopy(u,v);CHKERRQ(ierr);
  ierr  = VecCopy(u,v0);CHKERRQ(ierr);
  ierr  = VecSet(pvec1,0.0);CHKERRQ(ierr);
  ierr  = VecSet(pvec2,0.0);CHKERRQ(ierr);
  ierr  = VecSet(p,0.0);CHKERRQ(ierr);
  theta = 0.0;
  ep    = 0.0;
  cl1   = 0.0;
  sl1   = 0.0;
  cl    = 0.0;
  sl    = 0.0;
  sprod = 1.0;
  tau_n1= rnorm0;
  f     = 1.0;
  Gamma = 1.0;
  rhom1 = 1.0;

  /*
   CALCULATE SQUARED LANCZOS  vectors
   */
  ierr = (*ksp->converged)(ksp,ksp->its,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
  while (!ksp->reason) {
    ierr = KSPMonitor(ksp,ksp->its,rnorm);CHKERRQ(ierr);
    ksp->its++;

    ierr   = KSP_PCApplyBAorAB(ksp,u,y,vtmp);CHKERRQ(ierr); /* y = A*u */
    ierr   = VecDot(y,v0,&dp11);CHKERRQ(ierr);
    KSPCheckDot(ksp,dp11);
    ierr   = VecDot(u,v0,&dp2);CHKERRQ(ierr);
    alpha  = dp11 / dp2;                          /* alpha = v0'*y/v0'*u */
    deltmp = alpha;
    ierr   = VecCopy(y,z);CHKERRQ(ierr);
    ierr   = VecAXPY(z,-alpha,u);CHKERRQ(ierr); /* z = y - alpha u */
    ierr   = VecDot(u,v0,&rho);CHKERRQ(ierr);
    beta   = rho / (f*rhom1);
    rhom1  = rho;
    ierr   = VecCopy(z,utmp);CHKERRQ(ierr);    /* up1 = (A-alpha*I)*
                                                (z-2*beta*p) + f*beta*
                                                beta*um1 */
    ierr   = VecAXPY(utmp,-2.0*beta,p);CHKERRQ(ierr);
    ierr   = KSP_PCApplyBAorAB(ksp,utmp,up1,vtmp);CHKERRQ(ierr);
    ierr   = VecAXPY(up1,-alpha,utmp);CHKERRQ(ierr);
    ierr   = VecAXPY(up1,f*beta*beta,um1);CHKERRQ(ierr);
    ierr   = VecNorm(up1,NORM_2,&dp1);CHKERRQ(ierr);
    KSPCheckNorm(ksp,dp1);
    f      = 1.0 / dp1;
    ierr   = VecScale(up1,f);CHKERRQ(ierr);
    ierr   = VecAYPX(p,-beta,z);CHKERRQ(ierr);   /* p = f*(z-beta*p) */
    ierr   = VecScale(p,f);CHKERRQ(ierr);
    ierr   = VecCopy(u,um1);CHKERRQ(ierr);
    ierr   = VecCopy(up1,u);CHKERRQ(ierr);
    beta   = beta/Gamma;
    eptmp  = beta;
    ierr   = KSP_PCApplyBAorAB(ksp,v,vp1,vtmp);CHKERRQ(ierr);
    ierr   = VecAXPY(vp1,-alpha,v);CHKERRQ(ierr);
    ierr   = VecAXPY(vp1,-beta,vm1);CHKERRQ(ierr);
    ierr   = VecNorm(vp1,NORM_2,&Gamma);CHKERRQ(ierr);
    KSPCheckNorm(ksp,Gamma);
    ierr   = VecScale(vp1,1.0/Gamma);CHKERRQ(ierr);
    ierr   = VecCopy(v,vm1);CHKERRQ(ierr);
    ierr   = VecCopy(vp1,v);CHKERRQ(ierr);

    /*
       SOLVE  Ax = b
     */
    /* Apply last two Given's (Gl-1 and Gl) rotations to (beta,alpha,Gamma) */
    if (ksp->its > 2) {
      theta =  sl1*beta;
      eptmp = -cl1*beta;
    }
    if (ksp->its > 1) {
      ep     = -cl*eptmp + sl*alpha;
      deltmp = -sl*eptmp - cl*alpha;
    }
    if (PetscAbsReal(Gamma) > PetscAbsScalar(deltmp)) {
      ta = -deltmp / Gamma;
      s  = 1.0 / PetscSqrtScalar(1.0 + ta*ta);
      c  = s*ta;
    } else {
      ta = -Gamma/deltmp;
      c  = 1.0 / PetscSqrtScalar(1.0 + ta*ta);
      s  = c*ta;
    }

    delta = -c*deltmp + s*Gamma;
    tau_n = -c*tau_n1; tau_n1 = -s*tau_n1;
    ierr  = VecCopy(vm1,pvec);CHKERRQ(ierr);
    ierr  = VecAXPY(pvec,-theta,pvec2);CHKERRQ(ierr);
    ierr  = VecAXPY(pvec,-ep,pvec1);CHKERRQ(ierr);
    ierr  = VecScale(pvec,1.0/delta);CHKERRQ(ierr);
    ierr  = VecAXPY(x,tau_n,pvec);CHKERRQ(ierr);
    cl1   = cl; sl1 = sl; cl = c; sl = s;

    ierr = VecCopy(pvec1,pvec2);CHKERRQ(ierr);
    ierr = VecCopy(pvec,pvec1);CHKERRQ(ierr);

    /* Compute the upper bound on the residual norm r (See QMR paper p. 13) */
    sprod = sprod*PetscAbsScalar(s);
    rnorm = rnorm0 * PetscSqrtReal((PetscReal)ksp->its+2.0) * PetscRealPart(sprod);
    ierr  = (*ksp->converged)(ksp,ksp->its,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
    if (ksp->its >= ksp->max_it) {
      if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
      break;
    }
  }
  ierr = KSPMonitor(ksp,ksp->its,rnorm);CHKERRQ(ierr);
  ierr = KSPUnwindPreconditioner(ksp,x,vtmp);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

Beispiel #3

Datei anzeigen

Datei: rich.c Projekt: OpenCMISS-Dependencies/petsc

PetscErrorCode  KSPSolve_Richardson(KSP ksp)
{
  PetscErrorCode ierr;
  PetscInt       i,maxit;
  PetscReal      rnorm = 0.0,abr;
  PetscScalar    scale,rdot;
  Vec            x,b,r,z,w = NULL,y = NULL;
  PetscInt       xs, ws;
  Mat            Amat,Pmat;
  KSP_Richardson *richardsonP = (KSP_Richardson*)ksp->data;
  PetscBool      exists,diagonalscale;

  PetscFunctionBegin;
  ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr);
  if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);

  ksp->its = 0;

  ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);
  x    = ksp->vec_sol;
  b    = ksp->vec_rhs;
  ierr = VecGetSize(x,&xs);CHKERRQ(ierr);
  ierr = VecGetSize(ksp->work[0],&ws);CHKERRQ(ierr);
  if (xs != ws) {
    if (richardsonP->selfscale) {
      ierr = KSPSetWorkVecs(ksp,4);CHKERRQ(ierr);
    } else {
      ierr = KSPSetWorkVecs(ksp,2);CHKERRQ(ierr);
    }
  }
  r = ksp->work[0];
  z = ksp->work[1];
  if (richardsonP->selfscale) {
    w = ksp->work[2];
    y = ksp->work[3];
  }
  maxit = ksp->max_it;

  /* if user has provided fast Richardson code use that */
  ierr = PCApplyRichardsonExists(ksp->pc,&exists);CHKERRQ(ierr);
  if (exists && !ksp->numbermonitors && !ksp->transpose_solve & !ksp->nullsp) {
    PCRichardsonConvergedReason reason;
    ierr        = PCApplyRichardson(ksp->pc,b,x,r,ksp->rtol,ksp->abstol,ksp->divtol,maxit,ksp->guess_zero,&ksp->its,&reason);CHKERRQ(ierr);
    ksp->reason = (KSPConvergedReason)reason;
    PetscFunctionReturn(0);
  }

  scale = richardsonP->scale;

  if (!ksp->guess_zero) {                          /*   r <- b - A x     */
    ierr = KSP_MatMult(ksp,Amat,x,r);CHKERRQ(ierr);
    ierr = VecAYPX(r,-1.0,b);CHKERRQ(ierr);
  } else {
    ierr = VecCopy(b,r);CHKERRQ(ierr);
  }

  ksp->its = 0;
  if (richardsonP->selfscale) {
    ierr = KSP_PCApply(ksp,r,z);CHKERRQ(ierr);         /*   z <- B r          */
    for (i=0; i<maxit; i++) {

      if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
        ierr       = VecNorm(r,NORM_2,&rnorm);CHKERRQ(ierr); /*   rnorm <- r'*r     */
        ierr       = KSPMonitor(ksp,i,rnorm);CHKERRQ(ierr);
        ksp->rnorm = rnorm;
        ierr = KSPLogResidualHistory(ksp,rnorm);CHKERRQ(ierr);
        ierr = (*ksp->converged)(ksp,i,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
        if (ksp->reason) break;
      } else if (ksp->normtype == KSP_NORM_PRECONDITIONED) {
        ierr       = VecNorm(z,NORM_2,&rnorm);CHKERRQ(ierr); /*   rnorm <- z'*z     */
        ierr       = KSPMonitor(ksp,i,rnorm);CHKERRQ(ierr);
        ksp->rnorm = rnorm;
        ierr = KSPLogResidualHistory(ksp,rnorm);CHKERRQ(ierr);
        ierr = (*ksp->converged)(ksp,i,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
        if (ksp->reason) break;
      }
      ierr  = KSP_PCApplyBAorAB(ksp,z,y,w);CHKERRQ(ierr); /* y = BAz = BABr */
      ierr  = VecDotNorm2(z,y,&rdot,&abr);CHKERRQ(ierr);   /*   rdot = (Br)^T(BABR); abr = (BABr)^T (BABr) */
      scale = rdot/abr;
      ierr  = PetscInfo1(ksp,"Self-scale factor %g\n",(double)PetscRealPart(scale));CHKERRQ(ierr);
      ierr  = VecAXPY(x,scale,z);CHKERRQ(ierr);   /*   x  <- x + scale z */
      ierr  = VecAXPY(r,-scale,w);CHKERRQ(ierr);  /*  r <- r - scale*Az */
      ierr  = VecAXPY(z,-scale,y);CHKERRQ(ierr);  /*  z <- z - scale*y */
      ksp->its++;
    }
  } else {
    for (i=0; i<maxit; i++) {

      if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
        ierr       = VecNorm(r,NORM_2,&rnorm);CHKERRQ(ierr); /*   rnorm <- r'*r     */
        ierr       = KSPMonitor(ksp,i,rnorm);CHKERRQ(ierr);
        ksp->rnorm = rnorm;
        ierr = KSPLogResidualHistory(ksp,rnorm);CHKERRQ(ierr);
        ierr = (*ksp->converged)(ksp,i,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
        if (ksp->reason) break;
      }

      ierr = KSP_PCApply(ksp,r,z);CHKERRQ(ierr);    /*   z <- B r          */

      if (ksp->normtype == KSP_NORM_PRECONDITIONED) {
        ierr       = VecNorm(z,NORM_2,&rnorm);CHKERRQ(ierr); /*   rnorm <- z'*z     */
        ierr       = KSPMonitor(ksp,i,rnorm);CHKERRQ(ierr);
        ksp->rnorm = rnorm;
        ierr = KSPLogResidualHistory(ksp,rnorm);CHKERRQ(ierr);
        ierr = (*ksp->converged)(ksp,i,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
        if (ksp->reason) break;
      }

      ierr = VecAXPY(x,scale,z);CHKERRQ(ierr);    /*   x  <- x + scale z */
      ksp->its++;

      if (i+1 < maxit || ksp->normtype != KSP_NORM_NONE) {
        ierr = KSP_MatMult(ksp,Amat,x,r);CHKERRQ(ierr);      /*   r  <- b - Ax      */
        ierr = VecAYPX(r,-1.0,b);CHKERRQ(ierr);
      }
    }
  }
  if (!ksp->reason) {
    if (ksp->normtype != KSP_NORM_NONE) {
      if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
        ierr = VecNorm(r,NORM_2,&rnorm);CHKERRQ(ierr);     /*   rnorm <- r'*r     */
      } else {
        ierr = KSP_PCApply(ksp,r,z);CHKERRQ(ierr);   /*   z <- B r          */
        ierr = VecNorm(z,NORM_2,&rnorm);CHKERRQ(ierr);     /*   rnorm <- z'*z     */
      }
      ksp->rnorm = rnorm;
      ierr = KSPLogResidualHistory(ksp,rnorm);CHKERRQ(ierr);
      ierr = KSPMonitor(ksp,i,rnorm);CHKERRQ(ierr);
    }
    if (ksp->its >= ksp->max_it) {
      if (ksp->normtype != KSP_NORM_NONE) {
        ierr = (*ksp->converged)(ksp,i,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
        if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
      } else {
        ksp->reason = KSP_CONVERGED_ITS;
      }
    }
  }
  PetscFunctionReturn(0);
}

Beispiel #4

Datei anzeigen

Datei: cgs.c Projekt: masa-ito/PETScToPoisson

static PetscErrorCode  KSPSolve_CGS(KSP ksp)
{
  PetscErrorCode ierr;
  PetscInt       i;
  PetscScalar    rho,rhoold,a,s,b;
  Vec            X,B,V,P,R,RP,T,Q,U,AUQ;
  PetscReal      dp = 0.0;
  PetscBool      diagonalscale;

  PetscFunctionBegin;
  /* not sure what residual norm it does use, should use for right preconditioning */

  ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr);
  if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);

  X   = ksp->vec_sol;
  B   = ksp->vec_rhs;
  R   = ksp->work[0];
  RP  = ksp->work[1];
  V   = ksp->work[2];
  T   = ksp->work[3];
  Q   = ksp->work[4];
  P   = ksp->work[5];
  U   = ksp->work[6];
  AUQ = V;

  /* Compute initial preconditioned residual */
  ierr = KSPInitialResidual(ksp,X,V,T,R,B);CHKERRQ(ierr);

  /* Test for nothing to do */
  ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr);
  if (ksp->normtype == KSP_NORM_NATURAL) dp *= dp;
  ierr       = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
  ksp->its   = 0;
  ksp->rnorm = dp;
  ierr       = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
  ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr);
  ierr = KSPMonitor(ksp,0,dp);CHKERRQ(ierr);
  ierr = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
  if (ksp->reason) PetscFunctionReturn(0);

  /* Make the initial Rp == R */
  ierr = VecCopy(R,RP);CHKERRQ(ierr);
  /*  added for Fidap */
  /* Penalize Startup - Isaac Hasbani Trick for CGS
     Since most initial conditions result in a mostly 0 residual,
     we change all the 0 values in the vector RP to the maximum.
  */
  if (ksp->normtype == KSP_NORM_NATURAL) {
    PetscReal   vr0max;
    PetscScalar *tmp_RP=0;
    PetscInt    numnp  =0, *max_pos=0;
    ierr = VecMax(RP, max_pos, &vr0max);CHKERRQ(ierr);
    ierr = VecGetArray(RP, &tmp_RP);CHKERRQ(ierr);
    ierr = VecGetLocalSize(RP, &numnp);CHKERRQ(ierr);
    for (i=0; i<numnp; i++) {
      if (tmp_RP[i] == 0.0) tmp_RP[i] = vr0max;
    }
    ierr = VecRestoreArray(RP, &tmp_RP);CHKERRQ(ierr);
  }
  /*  end of addition for Fidap */

  /* Set the initial conditions */
  ierr = VecDot(R,RP,&rhoold);CHKERRQ(ierr);        /* rhoold = (r,rp)      */
  ierr = VecCopy(R,U);CHKERRQ(ierr);
  ierr = VecCopy(R,P);CHKERRQ(ierr);
  ierr = KSP_PCApplyBAorAB(ksp,P,V,T);CHKERRQ(ierr);

  i = 0;
  do {

    ierr = VecDot(V,RP,&s);CHKERRQ(ierr);           /* s <- (v,rp)          */
    a    = rhoold / s;                               /* a <- rho / s         */
    ierr = VecWAXPY(Q,-a,V,U);CHKERRQ(ierr);      /* q <- u - a v         */
    ierr = VecWAXPY(T,1.0,U,Q);CHKERRQ(ierr);      /* t <- u + q           */
    ierr = VecAXPY(X,a,T);CHKERRQ(ierr);           /* x <- x + a (u + q)   */
    ierr = KSP_PCApplyBAorAB(ksp,T,AUQ,U);CHKERRQ(ierr);
    ierr = VecAXPY(R,-a,AUQ);CHKERRQ(ierr);       /* r <- r - a K (u + q) */
    ierr = VecDot(R,RP,&rho);CHKERRQ(ierr);         /* rho <- (r,rp)        */
    if (ksp->normtype == KSP_NORM_NATURAL) {
      dp = PetscAbsScalar(rho);
    } else {
      ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr);
    }

    ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
    ksp->its++;
    ksp->rnorm = dp;
    ierr       = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
    ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr);
    ierr = KSPMonitor(ksp,i+1,dp);CHKERRQ(ierr);
    ierr = (*ksp->converged)(ksp,i+1,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
    if (ksp->reason) break;

    b      = rho / rhoold;                           /* b <- rho / rhoold    */
    ierr   = VecWAXPY(U,b,Q,R);CHKERRQ(ierr);       /* u <- r + b q         */
    ierr   = VecAXPY(Q,b,P);CHKERRQ(ierr);
    ierr   = VecWAXPY(P,b,Q,U);CHKERRQ(ierr);       /* p <- u + b(q + b p)  */
    ierr   = KSP_PCApplyBAorAB(ksp,P,V,Q);CHKERRQ(ierr);    /* v <- K p    */
    rhoold = rho;
    i++;
  } while (i<ksp->max_it);
  if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;

  ierr = KSPUnwindPreconditioner(ksp,X,T);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

Beispiel #5

Datei anzeigen

Datei: bcgsl.c Projekt: hansec/petsc

static PetscErrorCode  KSPSolve_BCGSL(KSP ksp)
{
  KSP_BCGSL      *bcgsl = (KSP_BCGSL*) ksp->data;
  PetscScalar    alpha, beta, omega, sigma;
  PetscScalar    rho0, rho1;
  PetscReal      kappa0, kappaA, kappa1;
  PetscReal      ghat;
  PetscReal      zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
  PetscBool      bUpdateX;
  PetscInt       maxit;
  PetscInt       h, i, j, k, vi, ell;
  PetscBLASInt   ldMZ,bierr;
  PetscScalar    utb;
  PetscReal      max_s, pinv_tol;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  /* set up temporary vectors */
  vi         = 0;
  ell        = bcgsl->ell;
  bcgsl->vB  = ksp->work[vi]; vi++;
  bcgsl->vRt = ksp->work[vi]; vi++;
  bcgsl->vTm = ksp->work[vi]; vi++;
  bcgsl->vvR = ksp->work+vi; vi += ell+1;
  bcgsl->vvU = ksp->work+vi; vi += ell+1;
  bcgsl->vXr = ksp->work[vi]; vi++;
  ierr       = PetscBLASIntCast(ell+1,&ldMZ);CHKERRQ(ierr);

  /* Prime the iterative solver */
  ierr           = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);CHKERRQ(ierr);
  ierr           = VecNorm(VVR[0], NORM_2, &zeta0);CHKERRQ(ierr);
  rnmax_computed = zeta0;
  rnmax_true     = zeta0;

  ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
  if (ksp->reason) {
    ierr       = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
    ksp->its   = 0;
    ksp->rnorm = zeta0;
    ierr       = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  ierr  = VecSet(VVU[0],0.0);CHKERRQ(ierr);
  alpha = 0.;
  rho0  = omega = 1;

  if (bcgsl->delta>0.0) {
    ierr = VecCopy(VX, VXR);CHKERRQ(ierr);
    ierr = VecSet(VX,0.0);CHKERRQ(ierr);
    ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr);
  } else {
    ierr = VecCopy(ksp->vec_rhs, VB);CHKERRQ(ierr);
  }

  /* Life goes on */
  ierr = VecCopy(VVR[0], VRT);CHKERRQ(ierr);
  zeta = zeta0;

  ierr = KSPGetTolerances(ksp, NULL, NULL, NULL, &maxit);CHKERRQ(ierr);

  for (k=0; k<maxit; k += bcgsl->ell) {
    ksp->its   = k;
    ksp->rnorm = zeta;

    ierr = KSPLogResidualHistory(ksp, zeta);CHKERRQ(ierr);
    ierr = KSPMonitor(ksp, ksp->its, zeta);CHKERRQ(ierr);

    ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
    if (ksp->reason < 0) PetscFunctionReturn(0);
    else if (ksp->reason) break;

    /* BiCG part */
    rho0 = -omega*rho0;
    nrm0 = zeta;
    for (j=0; j<bcgsl->ell; j++) {
      /* rho1 <- r_j' * r_tilde */
      ierr = VecDot(VVR[j], VRT, &rho1);CHKERRQ(ierr);
      if (rho1 == 0.0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
        PetscFunctionReturn(0);
      }
      beta = alpha*(rho1/rho0);
      rho0 = rho1;
      for (i=0; i<=j; i++) {
        /* u_i <- r_i - beta*u_i */
        ierr = VecAYPX(VVU[i], -beta, VVR[i]);CHKERRQ(ierr);
      }
      /* u_{j+1} <- inv(K)*A*u_j */
      ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);CHKERRQ(ierr);

      ierr = VecDot(VVU[j+1], VRT, &sigma);CHKERRQ(ierr);
      if (sigma == 0.0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
        PetscFunctionReturn(0);
      }
      alpha = rho1/sigma;

      /* x <- x + alpha*u_0 */
      ierr = VecAXPY(VX, alpha, VVU[0]);CHKERRQ(ierr);

      for (i=0; i<=j; i++) {
        /* r_i <- r_i - alpha*u_{i+1} */
        ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);CHKERRQ(ierr);
      }

      /* r_{j+1} <- inv(K)*A*r_j */
      ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);CHKERRQ(ierr);

      ierr = VecNorm(VVR[0], NORM_2, &nrm0);CHKERRQ(ierr);
      if (bcgsl->delta>0.0) {
        if (rnmax_computed<nrm0) rnmax_computed = nrm0;
        if (rnmax_true<nrm0) rnmax_true = nrm0;
      }

      /* NEW: check for early exit */
      ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
      if (ksp->reason) {
        ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr);

        ksp->its   = k+j;
        ksp->rnorm = nrm0;

        ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
        if (ksp->reason < 0) PetscFunctionReturn(0);
      }
    }

    /* Polynomial part */
    for (i = 0; i <= bcgsl->ell; ++i) {
      ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);CHKERRQ(ierr);
    }
    /* Symmetrize MZa */
    for (i = 0; i <= bcgsl->ell; ++i) {
      for (j = i+1; j <= bcgsl->ell; ++j) {
        MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]);
      }
    }
    /* Copy MZa to MZb */
    ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));CHKERRQ(ierr);

    if (!bcgsl->bConvex || bcgsl->ell==1) {
      PetscBLASInt ione = 1,bell;
      ierr = PetscBLASIntCast(bcgsl->ell,&bell);CHKERRQ(ierr);

      AY0c[0] = -1;
      if (bcgsl->pinv) {
#if defined(PETSC_MISSING_LAPACK_GESVD)
        SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable.");
#else
#  if defined(PETSC_USE_COMPLEX)
        PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,bcgsl->realwork,&bierr));
#  else
        PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,&bierr));
#  endif
#endif
        if (bierr!=0) {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          PetscFunctionReturn(0);
        }
        /* Apply pseudo-inverse */
        max_s = bcgsl->s[0];
        for (i=1; i<bell; i++) {
          if (bcgsl->s[i] > max_s) {
            max_s = bcgsl->s[i];
          }
        }
        /* tolerance is hardwired to bell*max(s)*PETSC_MACHINE_EPSILON */
        pinv_tol = bell*max_s*PETSC_MACHINE_EPSILON;
        ierr = PetscMemzero(&AY0c[1],bell*sizeof(PetscScalar));CHKERRQ(ierr);
        for (i=0; i<bell; i++) {
          if (bcgsl->s[i] >= pinv_tol) {
            utb=0.;
            for (j=0; j<bell; j++) {
              utb += MZb[1+j]*bcgsl->u[i*bell+j];
            }

            for (j=0; j<bell; j++) {
              AY0c[1+j] += utb/bcgsl->s[i]*bcgsl->v[j*bell+i];
            }
          }
        }
      } else {
#if defined(PETSC_MISSING_LAPACK_POTRF)
        SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable.");
#else
        PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr));
#endif
        if (bierr!=0) {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          PetscFunctionReturn(0);
        }
        ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));CHKERRQ(ierr);
        PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
      }
    } else {
      PetscBLASInt ione = 1;
      PetscScalar  aone = 1.0, azero = 0.0;
      PetscBLASInt neqs;
      ierr = PetscBLASIntCast(bcgsl->ell-1,&neqs);CHKERRQ(ierr);

#if defined(PETSC_MISSING_LAPACK_POTRF)
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable.");
#else
      PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr));
#endif
      if (bierr!=0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN;
        PetscFunctionReturn(0);
      }
      ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr);
      PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
      AY0c[0]          = -1;
      AY0c[bcgsl->ell] = 0.;

      ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr);
      PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr));

      AYlc[0]          = 0.;
      AYlc[bcgsl->ell] = -1;

      PetscStackCall("BLASgemv",BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione));

      kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione));

      /* round-off can cause negative kappa's */
      if (kappa0<0) kappa0 = -kappa0;
      kappa0 = PetscSqrtReal(kappa0);

      kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));

      PetscStackCall("BLASgemv",BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione));

      kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));

      if (kappa1<0) kappa1 = -kappa1;
      kappa1 = PetscSqrtReal(kappa1);

      if (kappa0!=0.0 && kappa1!=0.0) {
        if (kappaA<0.7*kappa0*kappa1) {
          ghat = (kappaA<0.0) ?  -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
        } else {
          ghat = kappaA/(kappa1*kappa1);
        }
        for (i=0; i<=bcgsl->ell; i++) {
          AY0c[i] = AY0c[i] - ghat* AYlc[i];
        }
      }
    }

    omega = AY0c[bcgsl->ell];
    for (h=bcgsl->ell; h>0 && omega==0.0; h--) omega = AY0c[h];
    if (omega==0.0) {
      ksp->reason = KSP_DIVERGED_BREAKDOWN;
      PetscFunctionReturn(0);
    }


    ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);CHKERRQ(ierr);
    for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0;
    ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);CHKERRQ(ierr);
    ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);CHKERRQ(ierr);
    for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0;
    ierr = VecNorm(VVR[0], NORM_2, &zeta);CHKERRQ(ierr);

    /* Accurate Update */
    if (bcgsl->delta>0.0) {
      if (rnmax_computed<zeta) rnmax_computed = zeta;
      if (rnmax_true<zeta) rnmax_true = zeta;

      bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
      if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
        /* r0 <- b-inv(K)*A*X */
        ierr       = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);CHKERRQ(ierr);
        ierr       = VecAYPX(VVR[0], -1.0, VB);CHKERRQ(ierr);
        rnmax_true = zeta;

        if (bUpdateX) {
          ierr           = VecAXPY(VXR,1.0,VX);CHKERRQ(ierr);
          ierr           = VecSet(VX,0.0);CHKERRQ(ierr);
          ierr           = VecCopy(VVR[0], VB);CHKERRQ(ierr);
          rnmax_computed = zeta;
        }
      }
    }
  }
  if (bcgsl->delta>0.0) {
    ierr = VecAXPY(VX,1.0,VXR);CHKERRQ(ierr);
  }

  ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
  if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
  PetscFunctionReturn(0);
}

Beispiel #6

Datei anzeigen

Datei: gmres.c Projekt: feelpp/debian-petsc

PetscErrorCode KSPGMRESCycle(PetscInt *itcount,KSP ksp)
{
  KSP_GMRES      *gmres = (KSP_GMRES*)(ksp->data);
  PetscReal      res_norm,res,hapbnd,tt;
  PetscErrorCode ierr;
  PetscInt       it     = 0, max_k = gmres->max_k;
  PetscBool      hapend = PETSC_FALSE;

  PetscFunctionBegin;
  ierr    = VecNormalize(VEC_VV(0),&res_norm);CHKERRQ(ierr);
  res     = res_norm;
  *GRS(0) = res_norm;

  /* check for the convergence */
  ierr       = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
  ksp->rnorm = res;
  ierr       = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
  gmres->it  = (it - 1);
  ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);
  ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr);
  if (!res) {
    if (itcount) *itcount = 0;
    ksp->reason = KSP_CONVERGED_ATOL;
    ierr        = PetscInfo(ksp,"Converged due to zero residual norm on entry\n");CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
  while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
    if (it) {
      ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);
      ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr);
    }
    gmres->it = (it - 1);
    if (gmres->vv_allocated <= it + VEC_OFFSET + 1) {
      ierr = KSPGMRESGetNewVectors(ksp,it+1);CHKERRQ(ierr);
    }
    ierr = KSP_PCApplyBAorAB(ksp,VEC_VV(it),VEC_VV(1+it),VEC_TEMP_MATOP);CHKERRQ(ierr);

    /* update hessenberg matrix and do Gram-Schmidt */
    ierr = (*gmres->orthog)(ksp,it);CHKERRQ(ierr);

    /* vv(i+1) . vv(i+1) */
    ierr = VecNormalize(VEC_VV(it+1),&tt);CHKERRQ(ierr);

    /* save the magnitude */
    *HH(it+1,it)  = tt;
    *HES(it+1,it) = tt;

    /* check for the happy breakdown */
    hapbnd = PetscAbsScalar(tt / *GRS(it));
    if (hapbnd > gmres->haptol) hapbnd = gmres->haptol;
    if (tt < hapbnd) {
      ierr   = PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %14.12e tt = %14.12e\n",(double)hapbnd,(double)tt);CHKERRQ(ierr);
      hapend = PETSC_TRUE;
    }
    ierr = KSPGMRESUpdateHessenberg(ksp,it,hapend,&res);CHKERRQ(ierr);

    it++;
    gmres->it = (it-1);   /* For converged */
    ksp->its++;
    ksp->rnorm = res;
    if (ksp->reason) break;

    ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);

    /* Catch error in happy breakdown and signal convergence and break from loop */
    if (hapend) {
      if (!ksp->reason) {
        if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res);
        else {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          break;
        }
      }
    }
  }

  /* Monitor if we know that we will not return for a restart */
  if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
    ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);
    ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr);
  }

  if (itcount) *itcount = it;


  /*
    Down here we have to solve for the "best" coefficients of the Krylov
    columns, add the solution values together, and possibly unwind the
    preconditioning from the solution
   */
  /* Form the solution (or the solution so far) */
  ierr = KSPGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

Beispiel #7

Datei anzeigen

Datei: ibcgs.c Projekt: Kun-Qu/petsc

static PetscErrorCode  KSPSolve_IBCGS(KSP ksp)
{
  PetscErrorCode ierr;
  PetscInt       i,N;
  PetscReal      rnorm,rnormin = 0.0;
#if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
  /* Because of possible instabilities in the algorithm (as indicated by different residual histories for the same problem 
     on the same number of processes  with different runs) we support computing the inner products using Intel's 80 bit arithematic
     rather than just 64 bit. Thus we copy our double precision values into long doubles (hoping this keeps the 16 extra bits)
     and tell MPI to do its ALlreduces with MPI_LONG_DOUBLE.

     Note for developers that does not effect the code. Intel's long double is implemented by storing the 80 bits of extended double
     precision into a 16 byte space (the rest of the space is ignored)  */
  long double    insums[7],outsums[7];
#else
  PetscScalar    insums[7],outsums[7];
#endif
  PetscScalar    sigman_2, sigman_1, sigman, pin_1, pin, phin_1, phin,tmp1,tmp2;
  PetscScalar    taun_1, taun, rhon, alphan_1, alphan, omegan_1, omegan;
  const PetscScalar *PETSC_RESTRICT r0, *PETSC_RESTRICT f0, *PETSC_RESTRICT qn, *PETSC_RESTRICT b, *PETSC_RESTRICT un;
  PetscScalar    *PETSC_RESTRICT rn, *PETSC_RESTRICT xn, *PETSC_RESTRICT vn, *PETSC_RESTRICT zn;
  /* the rest do not have to keep n_1 values */
  PetscScalar    kappan, thetan, etan, gamman, betan, deltan;
  const PetscScalar *PETSC_RESTRICT tn;
  PetscScalar    *PETSC_RESTRICT sn;
  Vec            R0,Rn,Xn,F0,Vn,Zn,Qn,Tn,Sn,B,Un;
  Mat            A;

  PetscFunctionBegin;
  if (!ksp->vec_rhs->petscnative) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"Only coded for PETSc vectors");

  ierr = PCGetOperators(ksp->pc,&A,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
  ierr = VecGetLocalSize(ksp->vec_sol,&N);CHKERRQ(ierr);
  Xn = ksp->vec_sol;ierr = VecGetArray(Xn_1,(PetscScalar**)&xn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Xn_1,PETSC_NULL);CHKERRQ(ierr);
  B  = ksp->vec_rhs;ierr = VecGetArrayRead(B,(const PetscScalar**)&b);ierr = VecRestoreArrayRead(B,PETSC_NULL);CHKERRQ(ierr);
  R0 = ksp->work[0];ierr = VecGetArrayRead(R0,(const PetscScalar**)&r0);CHKERRQ(ierr);ierr = VecRestoreArrayRead(R0,PETSC_NULL);CHKERRQ(ierr);
  Rn = ksp->work[1];ierr = VecGetArray(Rn_1,(PetscScalar**)&rn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Rn_1,PETSC_NULL);CHKERRQ(ierr);
  Un = ksp->work[2];ierr = VecGetArrayRead(Un_1,(const PetscScalar**)&un_1);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Un_1,PETSC_NULL);CHKERRQ(ierr);
  F0 = ksp->work[3];ierr = VecGetArrayRead(F0,(const PetscScalar**)&f0);CHKERRQ(ierr);ierr = VecRestoreArrayRead(F0,PETSC_NULL);CHKERRQ(ierr);
  Vn = ksp->work[4];ierr = VecGetArray(Vn_1,(PetscScalar**)&vn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Vn_1,PETSC_NULL);CHKERRQ(ierr);
  Zn = ksp->work[5];ierr = VecGetArray(Zn_1,(PetscScalar**)&zn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Zn_1,PETSC_NULL);CHKERRQ(ierr);
  Qn = ksp->work[6];ierr = VecGetArrayRead(Qn_1,(const PetscScalar**)&qn_1);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Qn_1,PETSC_NULL);CHKERRQ(ierr);
  Tn = ksp->work[7];ierr = VecGetArrayRead(Tn,(const PetscScalar**)&tn);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Tn,PETSC_NULL);CHKERRQ(ierr);
  Sn = ksp->work[8];ierr = VecGetArrayRead(Sn,(const PetscScalar**)&sn);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Sn,PETSC_NULL);CHKERRQ(ierr);

  /* r0 = rn_1 = b - A*xn_1; */
  /* ierr = KSP_PCApplyBAorAB(ksp,Xn_1,Rn_1,Tn);CHKERRQ(ierr);
     ierr = VecAYPX(Rn_1,-1.0,B);CHKERRQ(ierr); */
  ierr = KSPInitialResidual(ksp,Xn_1,Tn,Sn,Rn_1,B);CHKERRQ(ierr);

  ierr = VecNorm(Rn_1,NORM_2,&rnorm);CHKERRQ(ierr);
  ierr = KSPMonitor(ksp,0,rnorm);CHKERRQ(ierr);
  ierr = (*ksp->converged)(ksp,0,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);   
  if (ksp->reason) PetscFunctionReturn(0);

  ierr = VecCopy(Rn_1,R0);CHKERRQ(ierr);

  /* un_1 = A*rn_1; */
  ierr = KSP_PCApplyBAorAB(ksp,Rn_1,Un_1,Tn);CHKERRQ(ierr);
  
  /* f0   = A'*rn_1; */
  if (ksp->pc_side == PC_RIGHT) { /* B' A' */
    ierr = MatMultTranspose(A,R0,Tn);CHKERRQ(ierr);
    ierr = PCApplyTranspose(ksp->pc,Tn,F0);CHKERRQ(ierr);
  } else if (ksp->pc_side == PC_LEFT) { /* A' B' */
    ierr = PCApplyTranspose(ksp->pc,R0,Tn);CHKERRQ(ierr);
    ierr = MatMultTranspose(A,Tn,F0);CHKERRQ(ierr);
  }

  /*qn_1 = vn_1 = zn_1 = 0.0; */
  ierr = VecSet(Qn_1,0.0);CHKERRQ(ierr);
  ierr = VecSet(Vn_1,0.0);CHKERRQ(ierr);
  ierr = VecSet(Zn_1,0.0);CHKERRQ(ierr);

  sigman_2 = pin_1 = taun_1 = 0.0;

  /* the paper says phin_1 should be initialized to zero, it is actually R0'R0 */
  ierr = VecDot(R0,R0,&phin_1);CHKERRQ(ierr); 

  /* sigman_1 = rn_1'un_1  */
  ierr = VecDot(R0,Un_1,&sigman_1);CHKERRQ(ierr); 

  alphan_1 = omegan_1 = 1.0;

  for (ksp->its = 1; ksp->its<ksp->max_it+1; ksp->its++) {
    rhon   = phin_1 - omegan_1*sigman_2 + omegan_1*alphan_1*pin_1;
    /*    if (rhon == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"rhon is zero, iteration %D",n); */
    if (ksp->its == 1) deltan = rhon;
    else deltan = rhon/taun_1;
    betan  = deltan/omegan_1;
    taun   = sigman_1 + betan*taun_1  - deltan*pin_1;
    if (taun == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"taun is zero, iteration %D",ksp->its);
    alphan = rhon/taun;
    ierr = PetscLogFlops(15.0);

    /*  
        zn = alphan*rn_1 + (alphan/alphan_1)betan*zn_1 - alphan*deltan*vn_1
        vn = un_1 + betan*vn_1 - deltan*qn_1
        sn = rn_1 - alphan*vn

       The algorithm in the paper is missing the alphan/alphan_1 term in the zn update
    */
    ierr = PetscLogEventBegin(VEC_Ops,0,0,0,0);CHKERRQ(ierr);
    tmp1 = (alphan/alphan_1)*betan;
    tmp2 = alphan*deltan;
    for (i=0; i<N; i++) {
      zn[i] = alphan*rn_1[i] + tmp1*zn_1[i] - tmp2*vn_1[i];
      vn[i] = un_1[i] + betan*vn_1[i] - deltan*qn_1[i];
      sn[i] = rn_1[i] - alphan*vn[i];
    }
    ierr = PetscLogFlops(3.0+11.0*N);
    ierr = PetscLogEventEnd(VEC_Ops,0,0,0,0);CHKERRQ(ierr);

    /*
        qn = A*vn
    */
    ierr = KSP_PCApplyBAorAB(ksp,Vn,Qn,Tn);CHKERRQ(ierr);

    /*
        tn = un_1 - alphan*qn
    */
    ierr = VecWAXPY(Tn,-alphan,Qn,Un_1);CHKERRQ(ierr);
      

    /*
        phin = r0'sn
        pin  = r0'qn
        gamman = f0'sn
        etan   = f0'tn
        thetan = sn'tn
        kappan = tn'tn
    */
    ierr = PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr);
    phin = pin = gamman = etan = thetan = kappan = 0.0;
    for (i=0; i<N; i++) {
      phin += r0[i]*sn[i];
      pin  += r0[i]*qn[i];
      gamman += f0[i]*sn[i];
      etan   += f0[i]*tn[i];
      thetan += sn[i]*tn[i];
      kappan += tn[i]*tn[i];
    }
    ierr = PetscLogFlops(12.0*N);
    ierr = PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr);
    insums[0] = phin;
    insums[1] = pin;
    insums[2] = gamman;
    insums[3] = etan;
    insums[4] = thetan;
    insums[5] = kappan;
    insums[6] = rnormin;
    ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr);
#if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
    if (ksp->lagnorm && ksp->its > 1) {
      ierr = MPI_Allreduce(insums,outsums,7,MPI_LONG_DOUBLE,MPI_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr);
    } else {
      ierr = MPI_Allreduce(insums,outsums,6,MPI_LONG_DOUBLE,MPI_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr);
    }
#else
    if (ksp->lagnorm && ksp->its > 1) {
      ierr = MPI_Allreduce(insums,outsums,7,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr);
    } else {
      ierr = MPI_Allreduce(insums,outsums,6,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr);
    }
#endif
    ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr);
    phin     = outsums[0];
    pin      = outsums[1];
    gamman   = outsums[2];
    etan     = outsums[3];
    thetan   = outsums[4];
    kappan   = outsums[5];
    if (ksp->lagnorm && ksp->its > 1) rnorm = PetscSqrtReal(PetscRealPart(outsums[6]));

    if (kappan == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"kappan is zero, iteration %D",ksp->its);
    if (thetan == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"thetan is zero, iteration %D",ksp->its);
    omegan = thetan/kappan;
    sigman = gamman - omegan*etan;

    /*
        rn = sn - omegan*tn
        xn = xn_1 + zn + omegan*sn
    */
    ierr = PetscLogEventBegin(VEC_Ops,0,0,0,0);CHKERRQ(ierr);
    rnormin = 0.0;
    for (i=0; i<N; i++) {
      rn[i]    = sn[i] - omegan*tn[i];
      rnormin += PetscRealPart(PetscConj(rn[i])*rn[i]);
      xn[i]   += zn[i] + omegan*sn[i];
    }
    ierr = PetscObjectStateIncrease((PetscObject)Xn);CHKERRQ(ierr);
    ierr = PetscLogFlops(7.0*N);
    ierr = PetscLogEventEnd(VEC_Ops,0,0,0,0);CHKERRQ(ierr);

    if (!ksp->lagnorm && ksp->chknorm < ksp->its) {
      ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr);
      ierr = MPI_Allreduce(&rnormin,&rnorm,1,MPIU_REAL,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr);
      ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr);
      rnorm = PetscSqrtReal(rnorm);
    } 

    /* Test for convergence */
    ierr = KSPMonitor(ksp,ksp->its,rnorm);CHKERRQ(ierr);
    ierr = (*ksp->converged)(ksp,ksp->its,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);   
    if (ksp->reason) break;
 
    /* un = A*rn */
    ierr = KSP_PCApplyBAorAB(ksp,Rn,Un,Tn);CHKERRQ(ierr);   

    /* Update n-1 locations with n locations */
    sigman_2 = sigman_1;
    sigman_1 = sigman;
    pin_1    = pin;
    phin_1   = phin;
    alphan_1 = alphan;
    taun_1   = taun;
    omegan_1 = omegan;
  }
  if (ksp->its >= ksp->max_it) {
    ksp->reason = KSP_DIVERGED_ITS;
  }
  ierr = KSPUnwindPreconditioner(ksp,Xn,Tn);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

Beispiel #8

Datei anzeigen

Datei: lgmres.c Projekt: ZJLi2013/petsc

PetscErrorCode KSPLGMRESCycle(PetscInt *itcount,KSP ksp)
{
  KSP_LGMRES     *lgmres = (KSP_LGMRES*)(ksp->data);
  PetscReal      res_norm, res;
  PetscReal      hapbnd, tt;
  PetscScalar    tmp;
  PetscBool      hapend = PETSC_FALSE;  /* indicates happy breakdown ending */
  PetscErrorCode ierr;
  PetscInt       loc_it;                /* local count of # of dir. in Krylov space */
  PetscInt       max_k  = lgmres->max_k; /* max approx space size */
  PetscInt       max_it = ksp->max_it;  /* max # of overall iterations for the method */

  /* LGMRES_MOD - new variables*/
  PetscInt    aug_dim = lgmres->aug_dim;
  PetscInt    spot    = 0;
  PetscInt    order   = 0;
  PetscInt    it_arnoldi;                /* number of arnoldi steps to take */
  PetscInt    it_total;                  /* total number of its to take (=approx space size)*/
  PetscInt    ii, jj;
  PetscReal   tmp_norm;
  PetscScalar inv_tmp_norm;
  PetscScalar *avec;

  PetscFunctionBegin;
  /* Number of pseudo iterations since last restart is the number
     of prestart directions */
  loc_it = 0;

  /* LGMRES_MOD: determine number of arnoldi steps to take */
  /* if approx_constant then we keep the space the same size even if
     we don't have the full number of aug vectors yet*/
  if (lgmres->approx_constant) it_arnoldi = max_k - lgmres->aug_ct;
  else it_arnoldi = max_k - aug_dim;

  it_total =  it_arnoldi + lgmres->aug_ct;

  /* initial residual is in VEC_VV(0)  - compute its norm*/
  ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr);
  res  = res_norm;

  /* first entry in right-hand-side of hessenberg system is just
     the initial residual norm */
  *GRS(0) = res_norm;

  /* check for the convergence */
  if (!res) {
    if (itcount) *itcount = 0;
    ksp->reason = KSP_CONVERGED_ATOL;

    ierr = PetscInfo(ksp,"Converged due to zero residual norm on entry\n");CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  /* scale VEC_VV (the initial residual) */
  tmp = 1.0/res_norm; ierr = VecScale(VEC_VV(0),tmp);CHKERRQ(ierr);

  ksp->rnorm = res;


  /* note: (lgmres->it) is always set one less than (loc_it) It is used in
     KSPBUILDSolution_LGMRES, where it is passed to KSPLGMRESBuildSoln.
     Note that when KSPLGMRESBuildSoln is called from this function,
     (loc_it -1) is passed, so the two are equivalent */
  lgmres->it = (loc_it - 1);


  /* MAIN ITERATION LOOP BEGINNING*/


  /* keep iterating until we have converged OR generated the max number
     of directions OR reached the max number of iterations for the method */
  ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);

  while (!ksp->reason && loc_it < it_total && ksp->its < max_it) { /* LGMRES_MOD: changed to it_total */
    ierr       = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);
    lgmres->it = (loc_it - 1);
    ierr       = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr);

    /* see if more space is needed for work vectors */
    if (lgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
      ierr = KSPLGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr);
      /* (loc_it+1) is passed in as number of the first vector that should
          be allocated */
    }

    /*LGMRES_MOD: decide whether this is an arnoldi step or an aug step */
    if (loc_it < it_arnoldi) { /* Arnoldi */
      ierr = KSP_PCApplyBAorAB(ksp,VEC_VV(loc_it),VEC_VV(1+loc_it),VEC_TEMP_MATOP);CHKERRQ(ierr);
    } else { /*aug step */
      order = loc_it - it_arnoldi + 1; /* which aug step */
      for (ii=0; ii<aug_dim; ii++) {
        if (lgmres->aug_order[ii] == order) {
          spot = ii;
          break; /* must have this because there will be duplicates before aug_ct = aug_dim */
        }
      }

      ierr = VecCopy(A_AUGVEC(spot), VEC_VV(1+loc_it));CHKERRQ(ierr);
      /*note: an alternate implementation choice would be to only save the AUGVECS and
        not A_AUGVEC and then apply the PC here to the augvec */
    }

    /* update hessenberg matrix and do Gram-Schmidt - new direction is in
       VEC_VV(1+loc_it)*/
    ierr = (*lgmres->orthog)(ksp,loc_it);CHKERRQ(ierr);

    /* new entry in hessenburg is the 2-norm of our new direction */
    ierr = VecNorm(VEC_VV(loc_it+1),NORM_2,&tt);CHKERRQ(ierr);

    *HH(loc_it+1,loc_it)  = tt;
    *HES(loc_it+1,loc_it) = tt;


    /* check for the happy breakdown */
    hapbnd = PetscAbsScalar(tt / *GRS(loc_it)); /* GRS(loc_it) contains the res_norm from the last iteration  */
    if (hapbnd > lgmres->haptol) hapbnd = lgmres->haptol;
    if (tt > hapbnd) {
      tmp  = 1.0/tt;
      ierr = VecScale(VEC_VV(loc_it+1),tmp);CHKERRQ(ierr); /* scale new direction by its norm */
    } else {
      ierr   = PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %G tt = %G\n",hapbnd,tt);CHKERRQ(ierr);
      hapend = PETSC_TRUE;
    }

    /* Now apply rotations to new col of hessenberg (and right side of system),
       calculate new rotation, and get new residual norm at the same time*/
    ierr = KSPLGMRESUpdateHessenberg(ksp,loc_it,hapend,&res);CHKERRQ(ierr);
    if (ksp->reason) break;

    loc_it++;
    lgmres->it = (loc_it-1);   /* Add this here in case it has converged */

    ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
    ksp->its++;
    ksp->rnorm = res;
    ierr       = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);

    ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);

    /* Catch error in happy breakdown and signal convergence and break from loop */
    if (hapend) {
      if (!ksp->reason) {
        if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res);
        else {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          break;
        }
      }
    }
  }
  /* END OF ITERATION LOOP */
  ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);

  /* Monitor if we know that we will not return for a restart */
  if (ksp->reason || ksp->its >= max_it) {
    ierr = KSPMonitor(ksp, ksp->its, res);CHKERRQ(ierr);
  }

  if (itcount) *itcount = loc_it;

  /*
    Down here we have to solve for the "best" coefficients of the Krylov
    columns, add the solution values together, and possibly unwind the
    preconditioning from the solution
   */

  /* Form the solution (or the solution so far) */
  /* Note: must pass in (loc_it-1) for iteration count so that KSPLGMRESBuildSoln
     properly navigates */

  ierr = KSPLGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr);


  /* LGMRES_MOD collect aug vector and A*augvector for future restarts -
     only if we will be restarting (i.e. this cycle performed it_total
     iterations)  */
  if (!ksp->reason && ksp->its < max_it && aug_dim > 0) {

    /*AUG_TEMP contains the new augmentation vector (assigned in  KSPLGMRESBuildSoln) */
    if (!lgmres->aug_ct) {
      spot = 0;
      lgmres->aug_ct++;
    } else if (lgmres->aug_ct < aug_dim) {
      spot = lgmres->aug_ct;
      lgmres->aug_ct++;
    } else { /* truncate */
      for (ii=0; ii<aug_dim; ii++) {
        if (lgmres->aug_order[ii] == aug_dim) spot = ii;
      }
    }



    ierr = VecCopy(AUG_TEMP, AUGVEC(spot));CHKERRQ(ierr);
    /*need to normalize */
    ierr = VecNorm(AUGVEC(spot), NORM_2, &tmp_norm);CHKERRQ(ierr);

    inv_tmp_norm = 1.0/tmp_norm;

    ierr = VecScale(AUGVEC(spot),inv_tmp_norm);CHKERRQ(ierr);

    /*set new aug vector to order 1  - move all others back one */
    for (ii=0; ii < aug_dim; ii++) AUG_ORDER(ii)++;
    AUG_ORDER(spot) = 1;

    /*now add the A*aug vector to A_AUGVEC(spot)  - this is independ. of preconditioning type*/
    /* want V*H*y - y is in GRS, V is in VEC_VV and H is in HES */


    /* first do H+*y */
    avec = lgmres->hwork;
    ierr = PetscMemzero(avec,(it_total+1)*sizeof(*avec));CHKERRQ(ierr);
    for (ii=0; ii < it_total + 1; ii++) {
      for (jj=0; jj <= ii+1 && jj < it_total+1; jj++) {
        avec[jj] += *HES(jj ,ii) * *GRS(ii);
      }
    }

    /*now multiply result by V+ */
    ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr);
    ierr = VecMAXPY(VEC_TEMP, it_total+1, avec, &VEC_VV(0));CHKERRQ(ierr); /*answer is in VEC_TEMP*/

    /*copy answer to aug location  and scale*/
    ierr = VecCopy(VEC_TEMP,  A_AUGVEC(spot));CHKERRQ(ierr);
    ierr = VecScale(A_AUGVEC(spot),inv_tmp_norm);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

Beispiel #9

Datei anzeigen

Datei: agmres.c Projekt: fengyuqi/petsc

static PetscErrorCode KSPAGMRESBuildBasis(KSP ksp)
{
  PetscErrorCode ierr;
  KSP_AGMRES     *agmres = (KSP_AGMRES*)ksp->data;
  PetscReal      *Rshift = agmres->Rshift;
  PetscReal      *Ishift = agmres->Ishift;
  PetscReal      *Scale  = agmres->Scale;
  PetscInt       max_k   = agmres->max_k;
  PetscInt       KspSize = KSPSIZE;  /* if max_k == KspSizen then the basis should not be augmented */
  PetscInt       j       = 1;

  PetscFunctionBegin;
  ierr     = PetscLogEventBegin(KSP_AGMRESBuildBasis, ksp, 0,0,0);CHKERRQ(ierr);
  Scale[0] = 1.0;
  while (j <= max_k) {
    if (Ishift[j-1] == 0) {
      if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) {
        /* Apply the precond-matrix operators */
        ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr);
        /* Then apply deflation as a preconditioner */
        ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j));CHKERRQ(ierr);
      } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) {
        ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j-1), VEC_TMP);CHKERRQ(ierr);
        ierr = KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr);
      } else {
        ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr);
      }
      ierr = VecAXPY(VEC_V(j), -Rshift[j-1], VEC_V(j-1));CHKERRQ(ierr);
#if defined(KSP_AGMRES_NONORM)
      Scale[j] = 1.0;
#else
      ierr     = VecScale(VEC_V(j), Scale[j-1]);CHKERRQ(ierr); /* This step can be postponed until all vectors are built */
      ierr     = VecNorm(VEC_V(j), NORM_2, &(Scale[j]));CHKERRQ(ierr);
      Scale[j] = 1.0/Scale[j];
#endif

      agmres->matvecs += 1;
      j++;
    } else {
      if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) {
        /* Apply the precond-matrix operators */
        ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr);
        /* Then apply deflation as a preconditioner */
        ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j));CHKERRQ(ierr);
      } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) {
        ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j-1), VEC_TMP);CHKERRQ(ierr);
        ierr = KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr);
      } else {
        ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr);
      }
      ierr = VecAXPY(VEC_V(j), -Rshift[j-1], VEC_V(j-1));CHKERRQ(ierr);
#if defined(KSP_AGMRES_NONORM)
      Scale[j] = 1.0;
#else
      ierr     = VecScale(VEC_V(j), Scale[j-1]);CHKERRQ(ierr);
      ierr     = VecNorm(VEC_V(j), NORM_2, &(Scale[j]));CHKERRQ(ierr);
      Scale[j] = 1.0/Scale[j];
#endif
      agmres->matvecs += 1;
      j++;
      if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) {
        /* Apply the precond-matrix operators */
        ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr);
        /* Then apply deflation as a preconditioner */
        ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j));CHKERRQ(ierr);
      } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) {
        ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j-1), VEC_TMP);CHKERRQ(ierr);
        ierr = KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr);
      } else {
        ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr);
      }
      ierr = VecAXPY(VEC_V(j), -Rshift[j-2], VEC_V(j-1));CHKERRQ(ierr);
      ierr = VecAXPY(VEC_V(j), Scale[j-2]*Ishift[j-2]*Ishift[j-2], VEC_V(j-2));CHKERRQ(ierr);
#if defined(KSP_AGMRES_NONORM)
      Scale[j] = 1.0;
#else
      ierr     = VecNorm(VEC_V(j), NORM_2, &(Scale[j]));CHKERRQ(ierr);
      Scale[j] = 1.0/Scale[j];
#endif
      agmres->matvecs += 1;
      j++;
    }
  }
  /* Augment the subspace with the eigenvectors*/
  while (j <= KspSize) {
    ierr = KSP_PCApplyBAorAB(ksp, agmres->U[j - max_k - 1], VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr);
#if defined(KSP_AGMRES_NONORM)
    Scale[j] = 1.0;
#else
    ierr     = VecScale(VEC_V(j), Scale[j-1]);CHKERRQ(ierr);
    ierr     = VecNorm(VEC_V(j), NORM_2, &(Scale[j]));CHKERRQ(ierr);
    Scale[j] = 1.0/Scale[j];
#endif
    agmres->matvecs += 1;
    j++;
  }
  ierr = PetscLogEventEnd(KSP_AGMRESBuildBasis, ksp, 0,0,0);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

Beispiel #10

Datei anzeigen

Datei: tfqmr.c Projekt: erdc-cm/petsc-dev

static PetscErrorCode  KSPSolve_TFQMR(KSP ksp)
{
  PetscErrorCode ierr;
  PetscInt       i,m;
  PetscScalar    rho,rhoold,a,s,b,eta,etaold,psiold,cf;
  PetscReal      dp,dpold,w,dpest,tau,psi,cm;
  Vec            X,B,V,P,R,RP,T,T1,Q,U,D,AUQ;

  PetscFunctionBegin;
  X        = ksp->vec_sol;
  B        = ksp->vec_rhs;
  R        = ksp->work[0];
  RP       = ksp->work[1];
  V        = ksp->work[2];
  T        = ksp->work[3];
  Q        = ksp->work[4];
  P        = ksp->work[5];
  U        = ksp->work[6];
  D        = ksp->work[7];
  T1       = ksp->work[8];
  AUQ      = V;

  /* Compute initial preconditioned residual */
  ierr = KSPInitialResidual(ksp,X,V,T,R,B);CHKERRQ(ierr);

  /* Test for nothing to do */
  ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr);
  ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr);
  ksp->rnorm  = dp;
  ksp->its    = 0;
  ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr);
  ierr = KSPMonitor(ksp,0,dp);CHKERRQ(ierr);
  ierr = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
  if (ksp->reason) PetscFunctionReturn(0);

  /* Make the initial Rp == R */
  ierr = VecCopy(R,RP);CHKERRQ(ierr);

  /* Set the initial conditions */
  etaold = 0.0;
  psiold = 0.0;
  tau    = dp;
  dpold  = dp;

  ierr = VecDot(R,RP,&rhoold);CHKERRQ(ierr);       /* rhoold = (r,rp)     */
  ierr = VecCopy(R,U);CHKERRQ(ierr);
  ierr = VecCopy(R,P);CHKERRQ(ierr);
  ierr = KSP_PCApplyBAorAB(ksp,P,V,T);CHKERRQ(ierr);
  ierr = VecSet(D,0.0);CHKERRQ(ierr);

  i=0;
  do {
    ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr);
    ksp->its++;
    ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr);
    ierr = VecDot(V,RP,&s);CHKERRQ(ierr);          /* s <- (v,rp)          */
    a = rhoold / s;                                 /* a <- rho / s         */
    ierr = VecWAXPY(Q,-a,V,U);CHKERRQ(ierr);  /* q <- u - a v         */
    ierr = VecWAXPY(T,1.0,U,Q);CHKERRQ(ierr);     /* t <- u + q           */
    ierr = KSP_PCApplyBAorAB(ksp,T,AUQ,T1);CHKERRQ(ierr);
    ierr = VecAXPY(R,-a,AUQ);CHKERRQ(ierr);      /* r <- r - a K (u + q) */
    ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr);
    for (m=0; m<2; m++) {
      if (!m) {
        w = PetscSqrtReal(dp*dpold);
      } else {
        w = dp;
      }
      psi = w / tau;
      cm  = 1.0 / PetscSqrtReal(1.0 + psi * psi);
      tau = tau * psi * cm;
      eta = cm * cm * a;
      cf  = psiold * psiold * etaold / a;
      if (!m) {
        ierr = VecAYPX(D,cf,U);CHKERRQ(ierr);
      } else {
        ierr = VecAYPX(D,cf,Q);CHKERRQ(ierr);
      }
      ierr = VecAXPY(X,eta,D);CHKERRQ(ierr);

      dpest = PetscSqrtReal(m + 1.0) * tau;
      ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr);
      ksp->rnorm                                    = dpest;
      ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr);
      KSPLogResidualHistory(ksp,dpest);
      ierr = KSPMonitor(ksp,i+1,dpest);CHKERRQ(ierr);
      ierr = (*ksp->converged)(ksp,i+1,dpest,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
      if (ksp->reason) break;

      etaold = eta;
      psiold = psi;
    }
    if (ksp->reason) break;

    ierr = VecDot(R,RP,&rho);CHKERRQ(ierr);        /* rho <- (r,rp)       */
    b = rho / rhoold;                               /* b <- rho / rhoold   */
    ierr = VecWAXPY(U,b,Q,R);CHKERRQ(ierr);       /* u <- r + b q        */
    ierr = VecAXPY(Q,b,P);CHKERRQ(ierr);
    ierr = VecWAXPY(P,b,Q,U);CHKERRQ(ierr);       /* p <- u + b(q + b p) */
    ierr = KSP_PCApplyBAorAB(ksp,P,V,Q);CHKERRQ(ierr); /* v <- K p  */

    rhoold = rho;
    dpold  = dp;

    i++;
  } while (i<ksp->max_it);
  if (i >= ksp->max_it) {
    ksp->reason = KSP_DIVERGED_ITS;
  }

  ierr = KSPUnwindPreconditioner(ksp,X,T);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

Beispiel #11

Datei anzeigen

static PetscErrorCode  KSPSolve_BCGSL(KSP ksp)
{
    KSP_BCGSL      *bcgsl = (KSP_BCGSL *) ksp->data;
    PetscScalar    alpha, beta, omega, sigma;
    PetscScalar    rho0, rho1;
    PetscReal      kappa0, kappaA, kappa1;
    PetscReal      ghat, epsilon, abstol;
    PetscReal      zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
    PetscTruth     bUpdateX;
    PetscTruth     bBombed = PETSC_FALSE;

    PetscInt       maxit;
    PetscInt       h, i, j, k, vi, ell;
    PetscBLASInt   ldMZ,bierr;

    PetscErrorCode ierr;

    PetscFunctionBegin;
    if (ksp->normtype == KSP_NORM_NATURAL) SETERRQ(PETSC_ERR_SUP,"Cannot use natural norm with KSPBCGSL");
    if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->pc_side != PC_LEFT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type unpreconditioned for right preconditioning and KSPBCGSL");
    if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->pc_side != PC_RIGHT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type preconditioned for left preconditioning and KSPBCGSL");

    /* set up temporary vectors */
    vi = 0;
    ell = bcgsl->ell;
    bcgsl->vB    = ksp->work[vi];
    vi++;
    bcgsl->vRt   = ksp->work[vi];
    vi++;
    bcgsl->vTm   = ksp->work[vi];
    vi++;
    bcgsl->vvR   = ksp->work+vi;
    vi += ell+1;
    bcgsl->vvU   = ksp->work+vi;
    vi += ell+1;
    bcgsl->vXr   = ksp->work[vi];
    vi++;
    ldMZ = PetscBLASIntCast(ell+1);

    /* Prime the iterative solver */
    ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);
    CHKERRQ(ierr);
    ierr = VecNorm(VVR[0], NORM_2, &zeta0);
    CHKERRQ(ierr);
    rnmax_computed = zeta0;
    rnmax_true = zeta0;

    ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);
    CHKERRQ(ierr);
    if (ksp->reason) {
        ierr = PetscObjectTakeAccess(ksp);
        CHKERRQ(ierr);
        ksp->its   = 0;
        ksp->rnorm = zeta0;
        ierr = PetscObjectGrantAccess(ksp);
        CHKERRQ(ierr);
        PetscFunctionReturn(0);
    }

    ierr = VecSet(VVU[0],0.0);
    CHKERRQ(ierr);
    alpha = 0.;
    rho0 = omega = 1;

    if (bcgsl->delta>0.0) {
        ierr = VecCopy(VX, VXR);
        CHKERRQ(ierr);
        ierr = VecSet(VX,0.0);
        CHKERRQ(ierr);
        ierr = VecCopy(VVR[0], VB);
        CHKERRQ(ierr);
    } else {
        ierr = VecCopy(ksp->vec_rhs, VB);
        CHKERRQ(ierr);
    }

    /* Life goes on */
    ierr = VecCopy(VVR[0], VRT);
    CHKERRQ(ierr);
    zeta = zeta0;

    ierr = KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit);
    CHKERRQ(ierr);

    for (k=0; k<maxit; k += bcgsl->ell) {
        ksp->its   = k;
        ksp->rnorm = zeta;

        KSPLogResidualHistory(ksp, zeta);
        KSPMonitor(ksp, ksp->its, zeta);

        ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
        CHKERRQ(ierr);
        if (ksp->reason) break;

        /* BiCG part */
        rho0 = -omega*rho0;
        nrm0 = zeta;
        for (j=0; j<bcgsl->ell; j++) {
            /* rho1 <- r_j' * r_tilde */
            ierr = VecDot(VVR[j], VRT, &rho1);
            CHKERRQ(ierr);
            if (rho1 == 0.0) {
                ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
                bBombed = PETSC_TRUE;
                break;
            }
            beta = alpha*(rho1/rho0);
            rho0 = rho1;
            for (i=0; i<=j; i++) {
                /* u_i <- r_i - beta*u_i */
                ierr = VecAYPX(VVU[i], -beta, VVR[i]);
                CHKERRQ(ierr);
            }
            /* u_{j+1} <- inv(K)*A*u_j */
            ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);
            CHKERRQ(ierr);

            ierr = VecDot(VVU[j+1], VRT, &sigma);
            CHKERRQ(ierr);
            if (sigma == 0.0) {
                ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
                bBombed = PETSC_TRUE;
                break;
            }
            alpha = rho1/sigma;

            /* x <- x + alpha*u_0 */
            ierr = VecAXPY(VX, alpha, VVU[0]);
            CHKERRQ(ierr);

            for (i=0; i<=j; i++) {
                /* r_i <- r_i - alpha*u_{i+1} */
                ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);
                CHKERRQ(ierr);
            }

            /* r_{j+1} <- inv(K)*A*r_j */
            ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);
            CHKERRQ(ierr);

            ierr = VecNorm(VVR[0], NORM_2, &nrm0);
            CHKERRQ(ierr);
            if (bcgsl->delta>0.0) {
                if (rnmax_computed<nrm0) rnmax_computed = nrm0;
                if (rnmax_true<nrm0) rnmax_true = nrm0;
            }

            /* NEW: check for early exit */
            ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);
            CHKERRQ(ierr);
            if (ksp->reason) {
                ierr = PetscObjectTakeAccess(ksp);
                CHKERRQ(ierr);
                ksp->its   = k+j;
                ksp->rnorm = nrm0;
                ierr = PetscObjectGrantAccess(ksp);
                CHKERRQ(ierr);
                break;
            }
        }

        if (bBombed==PETSC_TRUE) break;

        /* Polynomial part */
        for(i = 0; i <= bcgsl->ell; ++i) {
            ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);
            CHKERRQ(ierr);
        }
        /* Symmetrize MZa */
        for(i = 0; i <= bcgsl->ell; ++i) {
            for(j = i+1; j <= bcgsl->ell; ++j) {
                MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]);
            }
        }
        /* Copy MZa to MZb */
        ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));
        CHKERRQ(ierr);

        if (!bcgsl->bConvex || bcgsl->ell==1) {
            PetscBLASInt ione = 1,bell = PetscBLASIntCast(bcgsl->ell);

            AY0c[0] = -1;
            LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr);
            if (ierr!=0) {
                ksp->reason = KSP_DIVERGED_BREAKDOWN;
                bBombed = PETSC_TRUE;
                break;
            }
            ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));
            CHKERRQ(ierr);
            LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
        } else {
            PetscBLASInt ione = 1;
            PetscScalar aone = 1.0, azero = 0.0;
            PetscBLASInt neqs = PetscBLASIntCast(bcgsl->ell-1);

            LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr);
            if (ierr!=0) {
                ksp->reason = KSP_DIVERGED_BREAKDOWN;
                bBombed = PETSC_TRUE;
                break;
            }
            ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));
            CHKERRQ(ierr);
            LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
            AY0c[0] = -1;
            AY0c[bcgsl->ell] = 0.;

            ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));
            CHKERRQ(ierr);
            LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr);

            AYlc[0] = 0.;
            AYlc[bcgsl->ell] = -1;

            BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione);

            kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione);

            /* round-off can cause negative kappa's */
            if (kappa0<0) kappa0 = -kappa0;
            kappa0 = sqrt(kappa0);

            kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

            BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione);

            kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

            if (kappa1<0) kappa1 = -kappa1;
            kappa1 = sqrt(kappa1);

            if (kappa0!=0.0 && kappa1!=0.0) {
                if (kappaA<0.7*kappa0*kappa1) {
                    ghat = (kappaA<0.0) ?  -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
                } else {
                    ghat = kappaA/(kappa1*kappa1);
                }
                for (i=0; i<=bcgsl->ell; i++) {
                    AY0c[i] = AY0c[i] - ghat* AYlc[i];
                }
            }
        }

        omega = AY0c[bcgsl->ell];
        for (h=bcgsl->ell; h>0 && omega==0.0; h--) {
            omega = AY0c[h];
        }
        if (omega==0.0) {
            ksp->reason = KSP_DIVERGED_BREAKDOWN;
            break;
        }


        ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);
        CHKERRQ(ierr);
        for (i=1; i<=bcgsl->ell; i++) {
            AY0c[i] *= -1.0;
        }
        ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);
        CHKERRQ(ierr);
        ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);
        CHKERRQ(ierr);
        for (i=1; i<=bcgsl->ell; i++) {
            AY0c[i] *= -1.0;
        }
        ierr = VecNorm(VVR[0], NORM_2, &zeta);
        CHKERRQ(ierr);

        /* Accurate Update */
        if (bcgsl->delta>0.0) {
            if (rnmax_computed<zeta) rnmax_computed = zeta;
            if (rnmax_true<zeta) rnmax_true = zeta;

            bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
            if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
                /* r0 <- b-inv(K)*A*X */
                ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);
                CHKERRQ(ierr);
                ierr = VecAYPX(VVR[0], -1.0, VB);
                CHKERRQ(ierr);
                rnmax_true = zeta;

                if (bUpdateX) {
                    ierr = VecAXPY(VXR,1.0,VX);
                    CHKERRQ(ierr);
                    ierr = VecSet(VX,0.0);
                    CHKERRQ(ierr);
                    ierr = VecCopy(VVR[0], VB);
                    CHKERRQ(ierr);
                    rnmax_computed = zeta;
                }
            }
        }
    }
    if (bcgsl->delta>0.0) {
        ierr = VecAXPY(VX,1.0,VXR);
        CHKERRQ(ierr);
    }

    ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
    CHKERRQ(ierr);
    if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
    PetscFunctionReturn(0);
}