Esempio n. 1
0
File: bcgsl.c Progetto: hansec/petsc
static PetscErrorCode  KSPSolve_BCGSL(KSP ksp)
{
  KSP_BCGSL      *bcgsl = (KSP_BCGSL*) ksp->data;
  PetscScalar    alpha, beta, omega, sigma;
  PetscScalar    rho0, rho1;
  PetscReal      kappa0, kappaA, kappa1;
  PetscReal      ghat;
  PetscReal      zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
  PetscBool      bUpdateX;
  PetscInt       maxit;
  PetscInt       h, i, j, k, vi, ell;
  PetscBLASInt   ldMZ,bierr;
  PetscScalar    utb;
  PetscReal      max_s, pinv_tol;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  /* set up temporary vectors */
  vi         = 0;
  ell        = bcgsl->ell;
  bcgsl->vB  = ksp->work[vi]; vi++;
  bcgsl->vRt = ksp->work[vi]; vi++;
  bcgsl->vTm = ksp->work[vi]; vi++;
  bcgsl->vvR = ksp->work+vi; vi += ell+1;
  bcgsl->vvU = ksp->work+vi; vi += ell+1;
  bcgsl->vXr = ksp->work[vi]; vi++;
  ierr       = PetscBLASIntCast(ell+1,&ldMZ);CHKERRQ(ierr);

  /* Prime the iterative solver */
  ierr           = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);CHKERRQ(ierr);
  ierr           = VecNorm(VVR[0], NORM_2, &zeta0);CHKERRQ(ierr);
  rnmax_computed = zeta0;
  rnmax_true     = zeta0;

  ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
  if (ksp->reason) {
    ierr       = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
    ksp->its   = 0;
    ksp->rnorm = zeta0;
    ierr       = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  ierr  = VecSet(VVU[0],0.0);CHKERRQ(ierr);
  alpha = 0.;
  rho0  = omega = 1;

  if (bcgsl->delta>0.0) {
    ierr = VecCopy(VX, VXR);CHKERRQ(ierr);
    ierr = VecSet(VX,0.0);CHKERRQ(ierr);
    ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr);
  } else {
    ierr = VecCopy(ksp->vec_rhs, VB);CHKERRQ(ierr);
  }

  /* Life goes on */
  ierr = VecCopy(VVR[0], VRT);CHKERRQ(ierr);
  zeta = zeta0;

  ierr = KSPGetTolerances(ksp, NULL, NULL, NULL, &maxit);CHKERRQ(ierr);

  for (k=0; k<maxit; k += bcgsl->ell) {
    ksp->its   = k;
    ksp->rnorm = zeta;

    ierr = KSPLogResidualHistory(ksp, zeta);CHKERRQ(ierr);
    ierr = KSPMonitor(ksp, ksp->its, zeta);CHKERRQ(ierr);

    ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
    if (ksp->reason < 0) PetscFunctionReturn(0);
    else if (ksp->reason) break;

    /* BiCG part */
    rho0 = -omega*rho0;
    nrm0 = zeta;
    for (j=0; j<bcgsl->ell; j++) {
      /* rho1 <- r_j' * r_tilde */
      ierr = VecDot(VVR[j], VRT, &rho1);CHKERRQ(ierr);
      if (rho1 == 0.0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
        PetscFunctionReturn(0);
      }
      beta = alpha*(rho1/rho0);
      rho0 = rho1;
      for (i=0; i<=j; i++) {
        /* u_i <- r_i - beta*u_i */
        ierr = VecAYPX(VVU[i], -beta, VVR[i]);CHKERRQ(ierr);
      }
      /* u_{j+1} <- inv(K)*A*u_j */
      ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);CHKERRQ(ierr);

      ierr = VecDot(VVU[j+1], VRT, &sigma);CHKERRQ(ierr);
      if (sigma == 0.0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
        PetscFunctionReturn(0);
      }
      alpha = rho1/sigma;

      /* x <- x + alpha*u_0 */
      ierr = VecAXPY(VX, alpha, VVU[0]);CHKERRQ(ierr);

      for (i=0; i<=j; i++) {
        /* r_i <- r_i - alpha*u_{i+1} */
        ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);CHKERRQ(ierr);
      }

      /* r_{j+1} <- inv(K)*A*r_j */
      ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);CHKERRQ(ierr);

      ierr = VecNorm(VVR[0], NORM_2, &nrm0);CHKERRQ(ierr);
      if (bcgsl->delta>0.0) {
        if (rnmax_computed<nrm0) rnmax_computed = nrm0;
        if (rnmax_true<nrm0) rnmax_true = nrm0;
      }

      /* NEW: check for early exit */
      ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
      if (ksp->reason) {
        ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr);

        ksp->its   = k+j;
        ksp->rnorm = nrm0;

        ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
        if (ksp->reason < 0) PetscFunctionReturn(0);
      }
    }

    /* Polynomial part */
    for (i = 0; i <= bcgsl->ell; ++i) {
      ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);CHKERRQ(ierr);
    }
    /* Symmetrize MZa */
    for (i = 0; i <= bcgsl->ell; ++i) {
      for (j = i+1; j <= bcgsl->ell; ++j) {
        MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]);
      }
    }
    /* Copy MZa to MZb */
    ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));CHKERRQ(ierr);

    if (!bcgsl->bConvex || bcgsl->ell==1) {
      PetscBLASInt ione = 1,bell;
      ierr = PetscBLASIntCast(bcgsl->ell,&bell);CHKERRQ(ierr);

      AY0c[0] = -1;
      if (bcgsl->pinv) {
#if defined(PETSC_MISSING_LAPACK_GESVD)
        SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable.");
#else
#  if defined(PETSC_USE_COMPLEX)
        PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,bcgsl->realwork,&bierr));
#  else
        PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,&bierr));
#  endif
#endif
        if (bierr!=0) {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          PetscFunctionReturn(0);
        }
        /* Apply pseudo-inverse */
        max_s = bcgsl->s[0];
        for (i=1; i<bell; i++) {
          if (bcgsl->s[i] > max_s) {
            max_s = bcgsl->s[i];
          }
        }
        /* tolerance is hardwired to bell*max(s)*PETSC_MACHINE_EPSILON */
        pinv_tol = bell*max_s*PETSC_MACHINE_EPSILON;
        ierr = PetscMemzero(&AY0c[1],bell*sizeof(PetscScalar));CHKERRQ(ierr);
        for (i=0; i<bell; i++) {
          if (bcgsl->s[i] >= pinv_tol) {
            utb=0.;
            for (j=0; j<bell; j++) {
              utb += MZb[1+j]*bcgsl->u[i*bell+j];
            }

            for (j=0; j<bell; j++) {
              AY0c[1+j] += utb/bcgsl->s[i]*bcgsl->v[j*bell+i];
            }
          }
        }
      } else {
#if defined(PETSC_MISSING_LAPACK_POTRF)
        SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable.");
#else
        PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr));
#endif
        if (bierr!=0) {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          PetscFunctionReturn(0);
        }
        ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));CHKERRQ(ierr);
        PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
      }
    } else {
      PetscBLASInt ione = 1;
      PetscScalar  aone = 1.0, azero = 0.0;
      PetscBLASInt neqs;
      ierr = PetscBLASIntCast(bcgsl->ell-1,&neqs);CHKERRQ(ierr);

#if defined(PETSC_MISSING_LAPACK_POTRF)
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable.");
#else
      PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr));
#endif
      if (bierr!=0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN;
        PetscFunctionReturn(0);
      }
      ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr);
      PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
      AY0c[0]          = -1;
      AY0c[bcgsl->ell] = 0.;

      ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr);
      PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr));

      AYlc[0]          = 0.;
      AYlc[bcgsl->ell] = -1;

      PetscStackCall("BLASgemv",BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione));

      kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione));

      /* round-off can cause negative kappa's */
      if (kappa0<0) kappa0 = -kappa0;
      kappa0 = PetscSqrtReal(kappa0);

      kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));

      PetscStackCall("BLASgemv",BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione));

      kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));

      if (kappa1<0) kappa1 = -kappa1;
      kappa1 = PetscSqrtReal(kappa1);

      if (kappa0!=0.0 && kappa1!=0.0) {
        if (kappaA<0.7*kappa0*kappa1) {
          ghat = (kappaA<0.0) ?  -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
        } else {
          ghat = kappaA/(kappa1*kappa1);
        }
        for (i=0; i<=bcgsl->ell; i++) {
          AY0c[i] = AY0c[i] - ghat* AYlc[i];
        }
      }
    }

    omega = AY0c[bcgsl->ell];
    for (h=bcgsl->ell; h>0 && omega==0.0; h--) omega = AY0c[h];
    if (omega==0.0) {
      ksp->reason = KSP_DIVERGED_BREAKDOWN;
      PetscFunctionReturn(0);
    }


    ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);CHKERRQ(ierr);
    for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0;
    ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);CHKERRQ(ierr);
    ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);CHKERRQ(ierr);
    for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0;
    ierr = VecNorm(VVR[0], NORM_2, &zeta);CHKERRQ(ierr);

    /* Accurate Update */
    if (bcgsl->delta>0.0) {
      if (rnmax_computed<zeta) rnmax_computed = zeta;
      if (rnmax_true<zeta) rnmax_true = zeta;

      bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
      if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
        /* r0 <- b-inv(K)*A*X */
        ierr       = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);CHKERRQ(ierr);
        ierr       = VecAYPX(VVR[0], -1.0, VB);CHKERRQ(ierr);
        rnmax_true = zeta;

        if (bUpdateX) {
          ierr           = VecAXPY(VXR,1.0,VX);CHKERRQ(ierr);
          ierr           = VecSet(VX,0.0);CHKERRQ(ierr);
          ierr           = VecCopy(VVR[0], VB);CHKERRQ(ierr);
          rnmax_computed = zeta;
        }
      }
    }
  }
  if (bcgsl->delta>0.0) {
    ierr = VecAXPY(VX,1.0,VXR);CHKERRQ(ierr);
  }

  ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
  if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
  PetscFunctionReturn(0);
}
Esempio n. 2
0
static PetscErrorCode  KSPSolve_BCGSL(KSP ksp)
{
    KSP_BCGSL      *bcgsl = (KSP_BCGSL *) ksp->data;
    PetscScalar    alpha, beta, omega, sigma;
    PetscScalar    rho0, rho1;
    PetscReal      kappa0, kappaA, kappa1;
    PetscReal      ghat, epsilon, abstol;
    PetscReal      zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
    PetscTruth     bUpdateX;
    PetscTruth     bBombed = PETSC_FALSE;

    PetscInt       maxit;
    PetscInt       h, i, j, k, vi, ell;
    PetscBLASInt   ldMZ,bierr;

    PetscErrorCode ierr;

    PetscFunctionBegin;
    if (ksp->normtype == KSP_NORM_NATURAL) SETERRQ(PETSC_ERR_SUP,"Cannot use natural norm with KSPBCGSL");
    if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->pc_side != PC_LEFT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type unpreconditioned for right preconditioning and KSPBCGSL");
    if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->pc_side != PC_RIGHT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type preconditioned for left preconditioning and KSPBCGSL");

    /* set up temporary vectors */
    vi = 0;
    ell = bcgsl->ell;
    bcgsl->vB    = ksp->work[vi];
    vi++;
    bcgsl->vRt   = ksp->work[vi];
    vi++;
    bcgsl->vTm   = ksp->work[vi];
    vi++;
    bcgsl->vvR   = ksp->work+vi;
    vi += ell+1;
    bcgsl->vvU   = ksp->work+vi;
    vi += ell+1;
    bcgsl->vXr   = ksp->work[vi];
    vi++;
    ldMZ = PetscBLASIntCast(ell+1);

    /* Prime the iterative solver */
    ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);
    CHKERRQ(ierr);
    ierr = VecNorm(VVR[0], NORM_2, &zeta0);
    CHKERRQ(ierr);
    rnmax_computed = zeta0;
    rnmax_true = zeta0;

    ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);
    CHKERRQ(ierr);
    if (ksp->reason) {
        ierr = PetscObjectTakeAccess(ksp);
        CHKERRQ(ierr);
        ksp->its   = 0;
        ksp->rnorm = zeta0;
        ierr = PetscObjectGrantAccess(ksp);
        CHKERRQ(ierr);
        PetscFunctionReturn(0);
    }

    ierr = VecSet(VVU[0],0.0);
    CHKERRQ(ierr);
    alpha = 0.;
    rho0 = omega = 1;

    if (bcgsl->delta>0.0) {
        ierr = VecCopy(VX, VXR);
        CHKERRQ(ierr);
        ierr = VecSet(VX,0.0);
        CHKERRQ(ierr);
        ierr = VecCopy(VVR[0], VB);
        CHKERRQ(ierr);
    } else {
        ierr = VecCopy(ksp->vec_rhs, VB);
        CHKERRQ(ierr);
    }

    /* Life goes on */
    ierr = VecCopy(VVR[0], VRT);
    CHKERRQ(ierr);
    zeta = zeta0;

    ierr = KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit);
    CHKERRQ(ierr);

    for (k=0; k<maxit; k += bcgsl->ell) {
        ksp->its   = k;
        ksp->rnorm = zeta;

        KSPLogResidualHistory(ksp, zeta);
        KSPMonitor(ksp, ksp->its, zeta);

        ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
        CHKERRQ(ierr);
        if (ksp->reason) break;

        /* BiCG part */
        rho0 = -omega*rho0;
        nrm0 = zeta;
        for (j=0; j<bcgsl->ell; j++) {
            /* rho1 <- r_j' * r_tilde */
            ierr = VecDot(VVR[j], VRT, &rho1);
            CHKERRQ(ierr);
            if (rho1 == 0.0) {
                ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
                bBombed = PETSC_TRUE;
                break;
            }
            beta = alpha*(rho1/rho0);
            rho0 = rho1;
            for (i=0; i<=j; i++) {
                /* u_i <- r_i - beta*u_i */
                ierr = VecAYPX(VVU[i], -beta, VVR[i]);
                CHKERRQ(ierr);
            }
            /* u_{j+1} <- inv(K)*A*u_j */
            ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);
            CHKERRQ(ierr);

            ierr = VecDot(VVU[j+1], VRT, &sigma);
            CHKERRQ(ierr);
            if (sigma == 0.0) {
                ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
                bBombed = PETSC_TRUE;
                break;
            }
            alpha = rho1/sigma;

            /* x <- x + alpha*u_0 */
            ierr = VecAXPY(VX, alpha, VVU[0]);
            CHKERRQ(ierr);

            for (i=0; i<=j; i++) {
                /* r_i <- r_i - alpha*u_{i+1} */
                ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);
                CHKERRQ(ierr);
            }

            /* r_{j+1} <- inv(K)*A*r_j */
            ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);
            CHKERRQ(ierr);

            ierr = VecNorm(VVR[0], NORM_2, &nrm0);
            CHKERRQ(ierr);
            if (bcgsl->delta>0.0) {
                if (rnmax_computed<nrm0) rnmax_computed = nrm0;
                if (rnmax_true<nrm0) rnmax_true = nrm0;
            }

            /* NEW: check for early exit */
            ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);
            CHKERRQ(ierr);
            if (ksp->reason) {
                ierr = PetscObjectTakeAccess(ksp);
                CHKERRQ(ierr);
                ksp->its   = k+j;
                ksp->rnorm = nrm0;
                ierr = PetscObjectGrantAccess(ksp);
                CHKERRQ(ierr);
                break;
            }
        }

        if (bBombed==PETSC_TRUE) break;

        /* Polynomial part */
        for(i = 0; i <= bcgsl->ell; ++i) {
            ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);
            CHKERRQ(ierr);
        }
        /* Symmetrize MZa */
        for(i = 0; i <= bcgsl->ell; ++i) {
            for(j = i+1; j <= bcgsl->ell; ++j) {
                MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]);
            }
        }
        /* Copy MZa to MZb */
        ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));
        CHKERRQ(ierr);

        if (!bcgsl->bConvex || bcgsl->ell==1) {
            PetscBLASInt ione = 1,bell = PetscBLASIntCast(bcgsl->ell);

            AY0c[0] = -1;
            LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr);
            if (ierr!=0) {
                ksp->reason = KSP_DIVERGED_BREAKDOWN;
                bBombed = PETSC_TRUE;
                break;
            }
            ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));
            CHKERRQ(ierr);
            LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
        } else {
            PetscBLASInt ione = 1;
            PetscScalar aone = 1.0, azero = 0.0;
            PetscBLASInt neqs = PetscBLASIntCast(bcgsl->ell-1);

            LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr);
            if (ierr!=0) {
                ksp->reason = KSP_DIVERGED_BREAKDOWN;
                bBombed = PETSC_TRUE;
                break;
            }
            ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));
            CHKERRQ(ierr);
            LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
            AY0c[0] = -1;
            AY0c[bcgsl->ell] = 0.;

            ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));
            CHKERRQ(ierr);
            LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr);

            AYlc[0] = 0.;
            AYlc[bcgsl->ell] = -1;

            BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione);

            kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione);

            /* round-off can cause negative kappa's */
            if (kappa0<0) kappa0 = -kappa0;
            kappa0 = sqrt(kappa0);

            kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

            BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione);

            kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

            if (kappa1<0) kappa1 = -kappa1;
            kappa1 = sqrt(kappa1);

            if (kappa0!=0.0 && kappa1!=0.0) {
                if (kappaA<0.7*kappa0*kappa1) {
                    ghat = (kappaA<0.0) ?  -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
                } else {
                    ghat = kappaA/(kappa1*kappa1);
                }
                for (i=0; i<=bcgsl->ell; i++) {
                    AY0c[i] = AY0c[i] - ghat* AYlc[i];
                }
            }
        }

        omega = AY0c[bcgsl->ell];
        for (h=bcgsl->ell; h>0 && omega==0.0; h--) {
            omega = AY0c[h];
        }
        if (omega==0.0) {
            ksp->reason = KSP_DIVERGED_BREAKDOWN;
            break;
        }


        ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);
        CHKERRQ(ierr);
        for (i=1; i<=bcgsl->ell; i++) {
            AY0c[i] *= -1.0;
        }
        ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);
        CHKERRQ(ierr);
        ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);
        CHKERRQ(ierr);
        for (i=1; i<=bcgsl->ell; i++) {
            AY0c[i] *= -1.0;
        }
        ierr = VecNorm(VVR[0], NORM_2, &zeta);
        CHKERRQ(ierr);

        /* Accurate Update */
        if (bcgsl->delta>0.0) {
            if (rnmax_computed<zeta) rnmax_computed = zeta;
            if (rnmax_true<zeta) rnmax_true = zeta;

            bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
            if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
                /* r0 <- b-inv(K)*A*X */
                ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);
                CHKERRQ(ierr);
                ierr = VecAYPX(VVR[0], -1.0, VB);
                CHKERRQ(ierr);
                rnmax_true = zeta;

                if (bUpdateX) {
                    ierr = VecAXPY(VXR,1.0,VX);
                    CHKERRQ(ierr);
                    ierr = VecSet(VX,0.0);
                    CHKERRQ(ierr);
                    ierr = VecCopy(VVR[0], VB);
                    CHKERRQ(ierr);
                    rnmax_computed = zeta;
                }
            }
        }
    }
    if (bcgsl->delta>0.0) {
        ierr = VecAXPY(VX,1.0,VXR);
        CHKERRQ(ierr);
    }

    ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
    CHKERRQ(ierr);
    if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
    PetscFunctionReturn(0);
}