static PetscErrorCode KSPSolve_PGMRES(KSP ksp) { PetscErrorCode ierr; PetscInt its,itcount; KSP_PGMRES *pgmres = (KSP_PGMRES*)ksp->data; PetscBool guess_zero = ksp->guess_zero; PetscFunctionBegin; if (ksp->calc_sings && !pgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called"); ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); itcount = 0; ksp->reason = KSP_CONVERGED_ITERATING; while (!ksp->reason) { ierr = KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);CHKERRQ(ierr); ierr = KSPPGMRESCycle(&its,ksp);CHKERRQ(ierr); itcount += its; if (itcount >= ksp->max_it) { if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; break; } ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ } ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */ PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_AGMRES(KSP ksp) { PetscErrorCode ierr; PetscInt its; KSP_AGMRES *agmres = (KSP_AGMRES*)ksp->data; PetscBool guess_zero = ksp->guess_zero; PetscReal res_old, res; PetscInt test; PetscFunctionBegin; ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->reason = KSP_CONVERGED_ITERATING; if (!agmres->HasShifts) { /* Compute Shifts for the Newton basis */ ierr = KSPComputeShifts_DGMRES(ksp);CHKERRQ(ierr); } /* NOTE: At this step, the initial guess is not equal to zero since one cycle of the classical GMRES is performed to compute the shifts */ ierr = (*ksp->converged)(ksp,0,ksp->rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); while (!ksp->reason) { ierr = KSPInitialResidual(ksp,ksp->vec_sol,VEC_TMP,VEC_TMP_MATOP,VEC_V(0),ksp->vec_rhs);CHKERRQ(ierr); if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) { ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(0), VEC_TMP);CHKERRQ(ierr); ierr = VecCopy(VEC_TMP, VEC_V(0));CHKERRQ(ierr); agmres->matvecs += 1; } ierr = VecNormalize(VEC_V(0),&(ksp->rnorm));CHKERRQ(ierr); KSPCheckNorm(ksp,ksp->rnorm); res_old = ksp->rnorm; /* Record the residual norm to test if deflation is needed */ ksp->ops->buildsolution = KSPBuildSolution_AGMRES; ierr = KSPAGMRESCycle(&its,ksp);CHKERRQ(ierr); if (ksp->its >= ksp->max_it) { if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; break; } /* compute the eigenvectors to augment the subspace : use an adaptive strategy */ res = ksp->rnorm; if (!ksp->reason && agmres->neig > 0) { test = agmres->max_k * PetscLogReal(ksp->rtol/res) / PetscLogReal(res/res_old); /* estimate the remaining number of steps */ if ((test > agmres->smv*(ksp->max_it-ksp->its)) || agmres->force) { if (!agmres->force && ((test > agmres->bgv*(ksp->max_it-ksp->its)) && ((agmres->r + 1) < agmres->max_neig))) { agmres->neig += 1; /* Augment the number of eigenvalues to deflate if the convergence is too slow */ } ierr = KSPDGMRESComputeDeflationData_DGMRES(ksp,&agmres->neig);CHKERRQ(ierr); } } ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ } ksp->guess_zero = guess_zero; /* restore if user has provided nonzero initial guess */ PetscFunctionReturn(0); }
PetscErrorCode KSPSolve_LGMRES(KSP ksp) { PetscErrorCode ierr; PetscInt cycle_its; /* iterations done in a call to KSPLGMRESCycle */ PetscInt itcount; /* running total of iterations, incl. those in restarts */ KSP_LGMRES *lgmres = (KSP_LGMRES *)ksp->data; PetscBool guess_zero = ksp->guess_zero; PetscInt ii; /*LGMRES_MOD variable */ PetscFunctionBegin; if (ksp->calc_sings && !lgmres->Rsvd) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called"); ierr = PetscObjectTakeAccess(ksp); CHKERRQ(ierr); ksp->its = 0; lgmres->aug_ct = 0; lgmres->matvecs = 0; ierr = PetscObjectGrantAccess(ksp); CHKERRQ(ierr); /* initialize */ itcount = 0; ksp->reason = KSP_CONVERGED_ITERATING; /*LGMRES_MOD*/ for (ii=0; ii<lgmres->aug_dim; ii++) { lgmres->aug_order[ii] = 0; } while (!ksp->reason) { /* calc residual - puts in VEC_VV(0) */ ierr = KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs); CHKERRQ(ierr); ierr = KSPLGMRESCycle(&cycle_its,ksp); CHKERRQ(ierr); itcount += cycle_its; if (itcount >= ksp->max_it) { if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; break; } ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ } ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */ PetscFunctionReturn(0); }
/* KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method. Input Parameter: . ksp - the Krylov space object that was set to use pipefgmres Output Parameter: . outits - number of iterations used */ static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp) { PetscErrorCode ierr; PetscInt its,itcount; KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; PetscBool guess_zero = ksp->guess_zero; PetscFunctionBegin; /* We have not checked these routines for use with complex numbers. The inner products are likely not defined correctly for that case */ #if (defined(PETSC_USE_COMPLEX) && !defined(PETSC_SKIP_COMPLEX)) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars"); #endif ierr = PetscCitationsRegister(citation,&cited);CHKERRQ(ierr); if (ksp->calc_sings && !pipefgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called"); ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); itcount = 0; ksp->reason = KSP_CONVERGED_ITERATING; while (!ksp->reason) { ierr = KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);CHKERRQ(ierr); ierr = KSPPIPEFGMRESCycle(&its,ksp);CHKERRQ(ierr); itcount += its; if (itcount >= ksp->max_it) { if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; break; } ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ } ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */ PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_TCQMR(KSP ksp) { PetscReal rnorm0,rnorm,dp1,Gamma; PetscScalar theta,ep,cl1,sl1,cl,sl,sprod,tau_n1,f; PetscScalar deltmp,rho,beta,eptmp,ta,s,c,tau_n,delta; PetscScalar dp11,dp2,rhom1,alpha,tmp; PetscErrorCode ierr; PetscFunctionBegin; ksp->its = 0; ierr = KSPInitialResidual(ksp,x,u,v,r,b);CHKERRQ(ierr); ierr = VecNorm(r,NORM_2,&rnorm0);CHKERRQ(ierr); /* rnorm0 = ||r|| */ KSPCheckNorm(ksp,rnorm0); ierr = (*ksp->converged)(ksp,0,rnorm0,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); ierr = VecSet(um1,0.0);CHKERRQ(ierr); ierr = VecCopy(r,u);CHKERRQ(ierr); rnorm = rnorm0; tmp = 1.0/rnorm; ierr = VecScale(u,tmp);CHKERRQ(ierr); ierr = VecSet(vm1,0.0);CHKERRQ(ierr); ierr = VecCopy(u,v);CHKERRQ(ierr); ierr = VecCopy(u,v0);CHKERRQ(ierr); ierr = VecSet(pvec1,0.0);CHKERRQ(ierr); ierr = VecSet(pvec2,0.0);CHKERRQ(ierr); ierr = VecSet(p,0.0);CHKERRQ(ierr); theta = 0.0; ep = 0.0; cl1 = 0.0; sl1 = 0.0; cl = 0.0; sl = 0.0; sprod = 1.0; tau_n1= rnorm0; f = 1.0; Gamma = 1.0; rhom1 = 1.0; /* CALCULATE SQUARED LANCZOS vectors */ ierr = (*ksp->converged)(ksp,ksp->its,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); while (!ksp->reason) { ierr = KSPMonitor(ksp,ksp->its,rnorm);CHKERRQ(ierr); ksp->its++; ierr = KSP_PCApplyBAorAB(ksp,u,y,vtmp);CHKERRQ(ierr); /* y = A*u */ ierr = VecDot(y,v0,&dp11);CHKERRQ(ierr); KSPCheckDot(ksp,dp11); ierr = VecDot(u,v0,&dp2);CHKERRQ(ierr); alpha = dp11 / dp2; /* alpha = v0'*y/v0'*u */ deltmp = alpha; ierr = VecCopy(y,z);CHKERRQ(ierr); ierr = VecAXPY(z,-alpha,u);CHKERRQ(ierr); /* z = y - alpha u */ ierr = VecDot(u,v0,&rho);CHKERRQ(ierr); beta = rho / (f*rhom1); rhom1 = rho; ierr = VecCopy(z,utmp);CHKERRQ(ierr); /* up1 = (A-alpha*I)* (z-2*beta*p) + f*beta* beta*um1 */ ierr = VecAXPY(utmp,-2.0*beta,p);CHKERRQ(ierr); ierr = KSP_PCApplyBAorAB(ksp,utmp,up1,vtmp);CHKERRQ(ierr); ierr = VecAXPY(up1,-alpha,utmp);CHKERRQ(ierr); ierr = VecAXPY(up1,f*beta*beta,um1);CHKERRQ(ierr); ierr = VecNorm(up1,NORM_2,&dp1);CHKERRQ(ierr); KSPCheckNorm(ksp,dp1); f = 1.0 / dp1; ierr = VecScale(up1,f);CHKERRQ(ierr); ierr = VecAYPX(p,-beta,z);CHKERRQ(ierr); /* p = f*(z-beta*p) */ ierr = VecScale(p,f);CHKERRQ(ierr); ierr = VecCopy(u,um1);CHKERRQ(ierr); ierr = VecCopy(up1,u);CHKERRQ(ierr); beta = beta/Gamma; eptmp = beta; ierr = KSP_PCApplyBAorAB(ksp,v,vp1,vtmp);CHKERRQ(ierr); ierr = VecAXPY(vp1,-alpha,v);CHKERRQ(ierr); ierr = VecAXPY(vp1,-beta,vm1);CHKERRQ(ierr); ierr = VecNorm(vp1,NORM_2,&Gamma);CHKERRQ(ierr); KSPCheckNorm(ksp,Gamma); ierr = VecScale(vp1,1.0/Gamma);CHKERRQ(ierr); ierr = VecCopy(v,vm1);CHKERRQ(ierr); ierr = VecCopy(vp1,v);CHKERRQ(ierr); /* SOLVE Ax = b */ /* Apply last two Given's (Gl-1 and Gl) rotations to (beta,alpha,Gamma) */ if (ksp->its > 2) { theta = sl1*beta; eptmp = -cl1*beta; } if (ksp->its > 1) { ep = -cl*eptmp + sl*alpha; deltmp = -sl*eptmp - cl*alpha; } if (PetscAbsReal(Gamma) > PetscAbsScalar(deltmp)) { ta = -deltmp / Gamma; s = 1.0 / PetscSqrtScalar(1.0 + ta*ta); c = s*ta; } else { ta = -Gamma/deltmp; c = 1.0 / PetscSqrtScalar(1.0 + ta*ta); s = c*ta; } delta = -c*deltmp + s*Gamma; tau_n = -c*tau_n1; tau_n1 = -s*tau_n1; ierr = VecCopy(vm1,pvec);CHKERRQ(ierr); ierr = VecAXPY(pvec,-theta,pvec2);CHKERRQ(ierr); ierr = VecAXPY(pvec,-ep,pvec1);CHKERRQ(ierr); ierr = VecScale(pvec,1.0/delta);CHKERRQ(ierr); ierr = VecAXPY(x,tau_n,pvec);CHKERRQ(ierr); cl1 = cl; sl1 = sl; cl = c; sl = s; ierr = VecCopy(pvec1,pvec2);CHKERRQ(ierr); ierr = VecCopy(pvec,pvec1);CHKERRQ(ierr); /* Compute the upper bound on the residual norm r (See QMR paper p. 13) */ sprod = sprod*PetscAbsScalar(s); rnorm = rnorm0 * PetscSqrtReal((PetscReal)ksp->its+2.0) * PetscRealPart(sprod); ierr = (*ksp->converged)(ksp,ksp->its,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->its >= ksp->max_it) { if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; break; } } ierr = KSPMonitor(ksp,ksp->its,rnorm);CHKERRQ(ierr); ierr = KSPUnwindPreconditioner(ksp,x,vtmp);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_CGS(KSP ksp) { PetscErrorCode ierr; PetscInt i; PetscScalar rho,rhoold,a,s,b; Vec X,B,V,P,R,RP,T,Q,U,AUQ; PetscReal dp = 0.0; PetscBool diagonalscale; PetscFunctionBegin; /* not sure what residual norm it does use, should use for right preconditioning */ ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); X = ksp->vec_sol; B = ksp->vec_rhs; R = ksp->work[0]; RP = ksp->work[1]; V = ksp->work[2]; T = ksp->work[3]; Q = ksp->work[4]; P = ksp->work[5]; U = ksp->work[6]; AUQ = V; /* Compute initial preconditioned residual */ ierr = KSPInitialResidual(ksp,X,V,T,R,B);CHKERRQ(ierr); /* Test for nothing to do */ ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); if (ksp->normtype == KSP_NORM_NATURAL) dp *= dp; ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = dp; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,dp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); /* Make the initial Rp == R */ ierr = VecCopy(R,RP);CHKERRQ(ierr); /* added for Fidap */ /* Penalize Startup - Isaac Hasbani Trick for CGS Since most initial conditions result in a mostly 0 residual, we change all the 0 values in the vector RP to the maximum. */ if (ksp->normtype == KSP_NORM_NATURAL) { PetscReal vr0max; PetscScalar *tmp_RP=0; PetscInt numnp =0, *max_pos=0; ierr = VecMax(RP, max_pos, &vr0max);CHKERRQ(ierr); ierr = VecGetArray(RP, &tmp_RP);CHKERRQ(ierr); ierr = VecGetLocalSize(RP, &numnp);CHKERRQ(ierr); for (i=0; i<numnp; i++) { if (tmp_RP[i] == 0.0) tmp_RP[i] = vr0max; } ierr = VecRestoreArray(RP, &tmp_RP);CHKERRQ(ierr); } /* end of addition for Fidap */ /* Set the initial conditions */ ierr = VecDot(R,RP,&rhoold);CHKERRQ(ierr); /* rhoold = (r,rp) */ ierr = VecCopy(R,U);CHKERRQ(ierr); ierr = VecCopy(R,P);CHKERRQ(ierr); ierr = KSP_PCApplyBAorAB(ksp,P,V,T);CHKERRQ(ierr); i = 0; do { ierr = VecDot(V,RP,&s);CHKERRQ(ierr); /* s <- (v,rp) */ a = rhoold / s; /* a <- rho / s */ ierr = VecWAXPY(Q,-a,V,U);CHKERRQ(ierr); /* q <- u - a v */ ierr = VecWAXPY(T,1.0,U,Q);CHKERRQ(ierr); /* t <- u + q */ ierr = VecAXPY(X,a,T);CHKERRQ(ierr); /* x <- x + a (u + q) */ ierr = KSP_PCApplyBAorAB(ksp,T,AUQ,U);CHKERRQ(ierr); ierr = VecAXPY(R,-a,AUQ);CHKERRQ(ierr); /* r <- r - a K (u + q) */ ierr = VecDot(R,RP,&rho);CHKERRQ(ierr); /* rho <- (r,rp) */ if (ksp->normtype == KSP_NORM_NATURAL) { dp = PetscAbsScalar(rho); } else { ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); } ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its++; ksp->rnorm = dp; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i+1,dp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; b = rho / rhoold; /* b <- rho / rhoold */ ierr = VecWAXPY(U,b,Q,R);CHKERRQ(ierr); /* u <- r + b q */ ierr = VecAXPY(Q,b,P);CHKERRQ(ierr); ierr = VecWAXPY(P,b,Q,U);CHKERRQ(ierr); /* p <- u + b(q + b p) */ ierr = KSP_PCApplyBAorAB(ksp,P,V,Q);CHKERRQ(ierr); /* v <- K p */ rhoold = rho; i++; } while (i<ksp->max_it); if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; ierr = KSPUnwindPreconditioner(ksp,X,T);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_BCGSL(KSP ksp) { KSP_BCGSL *bcgsl = (KSP_BCGSL*) ksp->data; PetscScalar alpha, beta, omega, sigma; PetscScalar rho0, rho1; PetscReal kappa0, kappaA, kappa1; PetscReal ghat; PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0; PetscBool bUpdateX; PetscInt maxit; PetscInt h, i, j, k, vi, ell; PetscBLASInt ldMZ,bierr; PetscScalar utb; PetscReal max_s, pinv_tol; PetscErrorCode ierr; PetscFunctionBegin; /* set up temporary vectors */ vi = 0; ell = bcgsl->ell; bcgsl->vB = ksp->work[vi]; vi++; bcgsl->vRt = ksp->work[vi]; vi++; bcgsl->vTm = ksp->work[vi]; vi++; bcgsl->vvR = ksp->work+vi; vi += ell+1; bcgsl->vvU = ksp->work+vi; vi += ell+1; bcgsl->vXr = ksp->work[vi]; vi++; ierr = PetscBLASIntCast(ell+1,&ldMZ);CHKERRQ(ierr); /* Prime the iterative solver */ ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &zeta0);CHKERRQ(ierr); rnmax_computed = zeta0; rnmax_true = zeta0; ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = zeta0; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = VecSet(VVU[0],0.0);CHKERRQ(ierr); alpha = 0.; rho0 = omega = 1; if (bcgsl->delta>0.0) { ierr = VecCopy(VX, VXR);CHKERRQ(ierr); ierr = VecSet(VX,0.0);CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr); } else { ierr = VecCopy(ksp->vec_rhs, VB);CHKERRQ(ierr); } /* Life goes on */ ierr = VecCopy(VVR[0], VRT);CHKERRQ(ierr); zeta = zeta0; ierr = KSPGetTolerances(ksp, NULL, NULL, NULL, &maxit);CHKERRQ(ierr); for (k=0; k<maxit; k += bcgsl->ell) { ksp->its = k; ksp->rnorm = zeta; ierr = KSPLogResidualHistory(ksp, zeta);CHKERRQ(ierr); ierr = KSPMonitor(ksp, ksp->its, zeta);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason < 0) PetscFunctionReturn(0); else if (ksp->reason) break; /* BiCG part */ rho0 = -omega*rho0; nrm0 = zeta; for (j=0; j<bcgsl->ell; j++) { /* rho1 <- r_j' * r_tilde */ ierr = VecDot(VVR[j], VRT, &rho1);CHKERRQ(ierr); if (rho1 == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; PetscFunctionReturn(0); } beta = alpha*(rho1/rho0); rho0 = rho1; for (i=0; i<=j; i++) { /* u_i <- r_i - beta*u_i */ ierr = VecAYPX(VVU[i], -beta, VVR[i]);CHKERRQ(ierr); } /* u_{j+1} <- inv(K)*A*u_j */ ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);CHKERRQ(ierr); ierr = VecDot(VVU[j+1], VRT, &sigma);CHKERRQ(ierr); if (sigma == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; PetscFunctionReturn(0); } alpha = rho1/sigma; /* x <- x + alpha*u_0 */ ierr = VecAXPY(VX, alpha, VVU[0]);CHKERRQ(ierr); for (i=0; i<=j; i++) { /* r_i <- r_i - alpha*u_{i+1} */ ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);CHKERRQ(ierr); } /* r_{j+1} <- inv(K)*A*r_j */ ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &nrm0);CHKERRQ(ierr); if (bcgsl->delta>0.0) { if (rnmax_computed<nrm0) rnmax_computed = nrm0; if (rnmax_true<nrm0) rnmax_true = nrm0; } /* NEW: check for early exit */ ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = k+j; ksp->rnorm = nrm0; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); if (ksp->reason < 0) PetscFunctionReturn(0); } } /* Polynomial part */ for (i = 0; i <= bcgsl->ell; ++i) { ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);CHKERRQ(ierr); } /* Symmetrize MZa */ for (i = 0; i <= bcgsl->ell; ++i) { for (j = i+1; j <= bcgsl->ell; ++j) { MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]); } } /* Copy MZa to MZb */ ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));CHKERRQ(ierr); if (!bcgsl->bConvex || bcgsl->ell==1) { PetscBLASInt ione = 1,bell; ierr = PetscBLASIntCast(bcgsl->ell,&bell);CHKERRQ(ierr); AY0c[0] = -1; if (bcgsl->pinv) { #if defined(PETSC_MISSING_LAPACK_GESVD) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable."); #else # if defined(PETSC_USE_COMPLEX) PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,bcgsl->realwork,&bierr)); # else PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,&bierr)); # endif #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } /* Apply pseudo-inverse */ max_s = bcgsl->s[0]; for (i=1; i<bell; i++) { if (bcgsl->s[i] > max_s) { max_s = bcgsl->s[i]; } } /* tolerance is hardwired to bell*max(s)*PETSC_MACHINE_EPSILON */ pinv_tol = bell*max_s*PETSC_MACHINE_EPSILON; ierr = PetscMemzero(&AY0c[1],bell*sizeof(PetscScalar));CHKERRQ(ierr); for (i=0; i<bell; i++) { if (bcgsl->s[i] >= pinv_tol) { utb=0.; for (j=0; j<bell; j++) { utb += MZb[1+j]*bcgsl->u[i*bell+j]; } for (j=0; j<bell; j++) { AY0c[1+j] += utb/bcgsl->s[i]*bcgsl->v[j*bell+i]; } } } } else { #if defined(PETSC_MISSING_LAPACK_POTRF) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable."); #else PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr)); #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr)); } } else { PetscBLASInt ione = 1; PetscScalar aone = 1.0, azero = 0.0; PetscBLASInt neqs; ierr = PetscBLASIntCast(bcgsl->ell-1,&neqs);CHKERRQ(ierr); #if defined(PETSC_MISSING_LAPACK_POTRF) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable."); #else PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr)); #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr)); AY0c[0] = -1; AY0c[bcgsl->ell] = 0.; ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr)); AYlc[0] = 0.; AYlc[bcgsl->ell] = -1; PetscStackCall("BLASgemv",BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione)); kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione)); /* round-off can cause negative kappa's */ if (kappa0<0) kappa0 = -kappa0; kappa0 = PetscSqrtReal(kappa0); kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione)); PetscStackCall("BLASgemv",BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione)); kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione)); if (kappa1<0) kappa1 = -kappa1; kappa1 = PetscSqrtReal(kappa1); if (kappa0!=0.0 && kappa1!=0.0) { if (kappaA<0.7*kappa0*kappa1) { ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1; } else { ghat = kappaA/(kappa1*kappa1); } for (i=0; i<=bcgsl->ell; i++) { AY0c[i] = AY0c[i] - ghat* AYlc[i]; } } } omega = AY0c[bcgsl->ell]; for (h=bcgsl->ell; h>0 && omega==0.0; h--) omega = AY0c[h]; if (omega==0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0; ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);CHKERRQ(ierr); ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0; ierr = VecNorm(VVR[0], NORM_2, &zeta);CHKERRQ(ierr); /* Accurate Update */ if (bcgsl->delta>0.0) { if (rnmax_computed<zeta) rnmax_computed = zeta; if (rnmax_true<zeta) rnmax_true = zeta; bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed); if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) { /* r0 <- b-inv(K)*A*X */ ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);CHKERRQ(ierr); ierr = VecAYPX(VVR[0], -1.0, VB);CHKERRQ(ierr); rnmax_true = zeta; if (bUpdateX) { ierr = VecAXPY(VXR,1.0,VX);CHKERRQ(ierr); ierr = VecSet(VX,0.0);CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr); rnmax_computed = zeta; } } } } if (bcgsl->delta>0.0) { ierr = VecAXPY(VX,1.0,VXR);CHKERRQ(ierr); } ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_IBCGS(KSP ksp) { PetscErrorCode ierr; PetscInt i,N; PetscReal rnorm,rnormin = 0.0; #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) /* Because of possible instabilities in the algorithm (as indicated by different residual histories for the same problem on the same number of processes with different runs) we support computing the inner products using Intel's 80 bit arithematic rather than just 64 bit. Thus we copy our double precision values into long doubles (hoping this keeps the 16 extra bits) and tell MPI to do its ALlreduces with MPI_LONG_DOUBLE. Note for developers that does not effect the code. Intel's long double is implemented by storing the 80 bits of extended double precision into a 16 byte space (the rest of the space is ignored) */ long double insums[7],outsums[7]; #else PetscScalar insums[7],outsums[7]; #endif PetscScalar sigman_2, sigman_1, sigman, pin_1, pin, phin_1, phin,tmp1,tmp2; PetscScalar taun_1, taun, rhon, alphan_1, alphan, omegan_1, omegan; const PetscScalar *PETSC_RESTRICT r0, *PETSC_RESTRICT f0, *PETSC_RESTRICT qn, *PETSC_RESTRICT b, *PETSC_RESTRICT un; PetscScalar *PETSC_RESTRICT rn, *PETSC_RESTRICT xn, *PETSC_RESTRICT vn, *PETSC_RESTRICT zn; /* the rest do not have to keep n_1 values */ PetscScalar kappan, thetan, etan, gamman, betan, deltan; const PetscScalar *PETSC_RESTRICT tn; PetscScalar *PETSC_RESTRICT sn; Vec R0,Rn,Xn,F0,Vn,Zn,Qn,Tn,Sn,B,Un; Mat A; PetscFunctionBegin; if (!ksp->vec_rhs->petscnative) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"Only coded for PETSc vectors"); ierr = PCGetOperators(ksp->pc,&A,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); ierr = VecGetLocalSize(ksp->vec_sol,&N);CHKERRQ(ierr); Xn = ksp->vec_sol;ierr = VecGetArray(Xn_1,(PetscScalar**)&xn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Xn_1,PETSC_NULL);CHKERRQ(ierr); B = ksp->vec_rhs;ierr = VecGetArrayRead(B,(const PetscScalar**)&b);ierr = VecRestoreArrayRead(B,PETSC_NULL);CHKERRQ(ierr); R0 = ksp->work[0];ierr = VecGetArrayRead(R0,(const PetscScalar**)&r0);CHKERRQ(ierr);ierr = VecRestoreArrayRead(R0,PETSC_NULL);CHKERRQ(ierr); Rn = ksp->work[1];ierr = VecGetArray(Rn_1,(PetscScalar**)&rn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Rn_1,PETSC_NULL);CHKERRQ(ierr); Un = ksp->work[2];ierr = VecGetArrayRead(Un_1,(const PetscScalar**)&un_1);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Un_1,PETSC_NULL);CHKERRQ(ierr); F0 = ksp->work[3];ierr = VecGetArrayRead(F0,(const PetscScalar**)&f0);CHKERRQ(ierr);ierr = VecRestoreArrayRead(F0,PETSC_NULL);CHKERRQ(ierr); Vn = ksp->work[4];ierr = VecGetArray(Vn_1,(PetscScalar**)&vn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Vn_1,PETSC_NULL);CHKERRQ(ierr); Zn = ksp->work[5];ierr = VecGetArray(Zn_1,(PetscScalar**)&zn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Zn_1,PETSC_NULL);CHKERRQ(ierr); Qn = ksp->work[6];ierr = VecGetArrayRead(Qn_1,(const PetscScalar**)&qn_1);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Qn_1,PETSC_NULL);CHKERRQ(ierr); Tn = ksp->work[7];ierr = VecGetArrayRead(Tn,(const PetscScalar**)&tn);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Tn,PETSC_NULL);CHKERRQ(ierr); Sn = ksp->work[8];ierr = VecGetArrayRead(Sn,(const PetscScalar**)&sn);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Sn,PETSC_NULL);CHKERRQ(ierr); /* r0 = rn_1 = b - A*xn_1; */ /* ierr = KSP_PCApplyBAorAB(ksp,Xn_1,Rn_1,Tn);CHKERRQ(ierr); ierr = VecAYPX(Rn_1,-1.0,B);CHKERRQ(ierr); */ ierr = KSPInitialResidual(ksp,Xn_1,Tn,Sn,Rn_1,B);CHKERRQ(ierr); ierr = VecNorm(Rn_1,NORM_2,&rnorm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,rnorm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,0,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); ierr = VecCopy(Rn_1,R0);CHKERRQ(ierr); /* un_1 = A*rn_1; */ ierr = KSP_PCApplyBAorAB(ksp,Rn_1,Un_1,Tn);CHKERRQ(ierr); /* f0 = A'*rn_1; */ if (ksp->pc_side == PC_RIGHT) { /* B' A' */ ierr = MatMultTranspose(A,R0,Tn);CHKERRQ(ierr); ierr = PCApplyTranspose(ksp->pc,Tn,F0);CHKERRQ(ierr); } else if (ksp->pc_side == PC_LEFT) { /* A' B' */ ierr = PCApplyTranspose(ksp->pc,R0,Tn);CHKERRQ(ierr); ierr = MatMultTranspose(A,Tn,F0);CHKERRQ(ierr); } /*qn_1 = vn_1 = zn_1 = 0.0; */ ierr = VecSet(Qn_1,0.0);CHKERRQ(ierr); ierr = VecSet(Vn_1,0.0);CHKERRQ(ierr); ierr = VecSet(Zn_1,0.0);CHKERRQ(ierr); sigman_2 = pin_1 = taun_1 = 0.0; /* the paper says phin_1 should be initialized to zero, it is actually R0'R0 */ ierr = VecDot(R0,R0,&phin_1);CHKERRQ(ierr); /* sigman_1 = rn_1'un_1 */ ierr = VecDot(R0,Un_1,&sigman_1);CHKERRQ(ierr); alphan_1 = omegan_1 = 1.0; for (ksp->its = 1; ksp->its<ksp->max_it+1; ksp->its++) { rhon = phin_1 - omegan_1*sigman_2 + omegan_1*alphan_1*pin_1; /* if (rhon == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"rhon is zero, iteration %D",n); */ if (ksp->its == 1) deltan = rhon; else deltan = rhon/taun_1; betan = deltan/omegan_1; taun = sigman_1 + betan*taun_1 - deltan*pin_1; if (taun == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"taun is zero, iteration %D",ksp->its); alphan = rhon/taun; ierr = PetscLogFlops(15.0); /* zn = alphan*rn_1 + (alphan/alphan_1)betan*zn_1 - alphan*deltan*vn_1 vn = un_1 + betan*vn_1 - deltan*qn_1 sn = rn_1 - alphan*vn The algorithm in the paper is missing the alphan/alphan_1 term in the zn update */ ierr = PetscLogEventBegin(VEC_Ops,0,0,0,0);CHKERRQ(ierr); tmp1 = (alphan/alphan_1)*betan; tmp2 = alphan*deltan; for (i=0; i<N; i++) { zn[i] = alphan*rn_1[i] + tmp1*zn_1[i] - tmp2*vn_1[i]; vn[i] = un_1[i] + betan*vn_1[i] - deltan*qn_1[i]; sn[i] = rn_1[i] - alphan*vn[i]; } ierr = PetscLogFlops(3.0+11.0*N); ierr = PetscLogEventEnd(VEC_Ops,0,0,0,0);CHKERRQ(ierr); /* qn = A*vn */ ierr = KSP_PCApplyBAorAB(ksp,Vn,Qn,Tn);CHKERRQ(ierr); /* tn = un_1 - alphan*qn */ ierr = VecWAXPY(Tn,-alphan,Qn,Un_1);CHKERRQ(ierr); /* phin = r0'sn pin = r0'qn gamman = f0'sn etan = f0'tn thetan = sn'tn kappan = tn'tn */ ierr = PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); phin = pin = gamman = etan = thetan = kappan = 0.0; for (i=0; i<N; i++) { phin += r0[i]*sn[i]; pin += r0[i]*qn[i]; gamman += f0[i]*sn[i]; etan += f0[i]*tn[i]; thetan += sn[i]*tn[i]; kappan += tn[i]*tn[i]; } ierr = PetscLogFlops(12.0*N); ierr = PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); insums[0] = phin; insums[1] = pin; insums[2] = gamman; insums[3] = etan; insums[4] = thetan; insums[5] = kappan; insums[6] = rnormin; ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) if (ksp->lagnorm && ksp->its > 1) { ierr = MPI_Allreduce(insums,outsums,7,MPI_LONG_DOUBLE,MPI_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); } else { ierr = MPI_Allreduce(insums,outsums,6,MPI_LONG_DOUBLE,MPI_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); } #else if (ksp->lagnorm && ksp->its > 1) { ierr = MPI_Allreduce(insums,outsums,7,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); } else { ierr = MPI_Allreduce(insums,outsums,6,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); } #endif ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); phin = outsums[0]; pin = outsums[1]; gamman = outsums[2]; etan = outsums[3]; thetan = outsums[4]; kappan = outsums[5]; if (ksp->lagnorm && ksp->its > 1) rnorm = PetscSqrtReal(PetscRealPart(outsums[6])); if (kappan == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"kappan is zero, iteration %D",ksp->its); if (thetan == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"thetan is zero, iteration %D",ksp->its); omegan = thetan/kappan; sigman = gamman - omegan*etan; /* rn = sn - omegan*tn xn = xn_1 + zn + omegan*sn */ ierr = PetscLogEventBegin(VEC_Ops,0,0,0,0);CHKERRQ(ierr); rnormin = 0.0; for (i=0; i<N; i++) { rn[i] = sn[i] - omegan*tn[i]; rnormin += PetscRealPart(PetscConj(rn[i])*rn[i]); xn[i] += zn[i] + omegan*sn[i]; } ierr = PetscObjectStateIncrease((PetscObject)Xn);CHKERRQ(ierr); ierr = PetscLogFlops(7.0*N); ierr = PetscLogEventEnd(VEC_Ops,0,0,0,0);CHKERRQ(ierr); if (!ksp->lagnorm && ksp->chknorm < ksp->its) { ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&rnormin,&rnorm,1,MPIU_REAL,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); rnorm = PetscSqrtReal(rnorm); } /* Test for convergence */ ierr = KSPMonitor(ksp,ksp->its,rnorm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,ksp->its,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; /* un = A*rn */ ierr = KSP_PCApplyBAorAB(ksp,Rn,Un,Tn);CHKERRQ(ierr); /* Update n-1 locations with n locations */ sigman_2 = sigman_1; sigman_1 = sigman; pin_1 = pin; phin_1 = phin; alphan_1 = alphan; taun_1 = taun; omegan_1 = omegan; } if (ksp->its >= ksp->max_it) { ksp->reason = KSP_DIVERGED_ITS; } ierr = KSPUnwindPreconditioner(ksp,Xn,Tn);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_TFQMR(KSP ksp) { PetscErrorCode ierr; PetscInt i,m; PetscScalar rho,rhoold,a,s,b,eta,etaold,psiold,cf; PetscReal dp,dpold,w,dpest,tau,psi,cm; Vec X,B,V,P,R,RP,T,T1,Q,U,D,AUQ; PetscFunctionBegin; X = ksp->vec_sol; B = ksp->vec_rhs; R = ksp->work[0]; RP = ksp->work[1]; V = ksp->work[2]; T = ksp->work[3]; Q = ksp->work[4]; P = ksp->work[5]; U = ksp->work[6]; D = ksp->work[7]; T1 = ksp->work[8]; AUQ = V; /* Compute initial preconditioned residual */ ierr = KSPInitialResidual(ksp,X,V,T,R,B);CHKERRQ(ierr); /* Test for nothing to do */ ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr); ksp->rnorm = dp; ksp->its = 0; ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,dp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); /* Make the initial Rp == R */ ierr = VecCopy(R,RP);CHKERRQ(ierr); /* Set the initial conditions */ etaold = 0.0; psiold = 0.0; tau = dp; dpold = dp; ierr = VecDot(R,RP,&rhoold);CHKERRQ(ierr); /* rhoold = (r,rp) */ ierr = VecCopy(R,U);CHKERRQ(ierr); ierr = VecCopy(R,P);CHKERRQ(ierr); ierr = KSP_PCApplyBAorAB(ksp,P,V,T);CHKERRQ(ierr); ierr = VecSet(D,0.0);CHKERRQ(ierr); i=0; do { ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr); ksp->its++; ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr); ierr = VecDot(V,RP,&s);CHKERRQ(ierr); /* s <- (v,rp) */ a = rhoold / s; /* a <- rho / s */ ierr = VecWAXPY(Q,-a,V,U);CHKERRQ(ierr); /* q <- u - a v */ ierr = VecWAXPY(T,1.0,U,Q);CHKERRQ(ierr); /* t <- u + q */ ierr = KSP_PCApplyBAorAB(ksp,T,AUQ,T1);CHKERRQ(ierr); ierr = VecAXPY(R,-a,AUQ);CHKERRQ(ierr); /* r <- r - a K (u + q) */ ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); for (m=0; m<2; m++) { if (!m) { w = PetscSqrtReal(dp*dpold); } else { w = dp; } psi = w / tau; cm = 1.0 / PetscSqrtReal(1.0 + psi * psi); tau = tau * psi * cm; eta = cm * cm * a; cf = psiold * psiold * etaold / a; if (!m) { ierr = VecAYPX(D,cf,U);CHKERRQ(ierr); } else { ierr = VecAYPX(D,cf,Q);CHKERRQ(ierr); } ierr = VecAXPY(X,eta,D);CHKERRQ(ierr); dpest = PetscSqrtReal(m + 1.0) * tau; ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr); ksp->rnorm = dpest; ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr); KSPLogResidualHistory(ksp,dpest); ierr = KSPMonitor(ksp,i+1,dpest);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,dpest,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; etaold = eta; psiold = psi; } if (ksp->reason) break; ierr = VecDot(R,RP,&rho);CHKERRQ(ierr); /* rho <- (r,rp) */ b = rho / rhoold; /* b <- rho / rhoold */ ierr = VecWAXPY(U,b,Q,R);CHKERRQ(ierr); /* u <- r + b q */ ierr = VecAXPY(Q,b,P);CHKERRQ(ierr); ierr = VecWAXPY(P,b,Q,U);CHKERRQ(ierr); /* p <- u + b(q + b p) */ ierr = KSP_PCApplyBAorAB(ksp,P,V,Q);CHKERRQ(ierr); /* v <- K p */ rhoold = rho; dpold = dp; i++; } while (i<ksp->max_it); if (i >= ksp->max_it) { ksp->reason = KSP_DIVERGED_ITS; } ierr = KSPUnwindPreconditioner(ksp,X,T);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_BCGSL(KSP ksp) { KSP_BCGSL *bcgsl = (KSP_BCGSL *) ksp->data; PetscScalar alpha, beta, omega, sigma; PetscScalar rho0, rho1; PetscReal kappa0, kappaA, kappa1; PetscReal ghat, epsilon, abstol; PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0; PetscTruth bUpdateX; PetscTruth bBombed = PETSC_FALSE; PetscInt maxit; PetscInt h, i, j, k, vi, ell; PetscBLASInt ldMZ,bierr; PetscErrorCode ierr; PetscFunctionBegin; if (ksp->normtype == KSP_NORM_NATURAL) SETERRQ(PETSC_ERR_SUP,"Cannot use natural norm with KSPBCGSL"); if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->pc_side != PC_LEFT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type unpreconditioned for right preconditioning and KSPBCGSL"); if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->pc_side != PC_RIGHT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type preconditioned for left preconditioning and KSPBCGSL"); /* set up temporary vectors */ vi = 0; ell = bcgsl->ell; bcgsl->vB = ksp->work[vi]; vi++; bcgsl->vRt = ksp->work[vi]; vi++; bcgsl->vTm = ksp->work[vi]; vi++; bcgsl->vvR = ksp->work+vi; vi += ell+1; bcgsl->vvU = ksp->work+vi; vi += ell+1; bcgsl->vXr = ksp->work[vi]; vi++; ldMZ = PetscBLASIntCast(ell+1); /* Prime the iterative solver */ ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs); CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &zeta0); CHKERRQ(ierr); rnmax_computed = zeta0; rnmax_true = zeta0; ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectTakeAccess(ksp); CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = zeta0; ierr = PetscObjectGrantAccess(ksp); CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = VecSet(VVU[0],0.0); CHKERRQ(ierr); alpha = 0.; rho0 = omega = 1; if (bcgsl->delta>0.0) { ierr = VecCopy(VX, VXR); CHKERRQ(ierr); ierr = VecSet(VX,0.0); CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB); CHKERRQ(ierr); } else { ierr = VecCopy(ksp->vec_rhs, VB); CHKERRQ(ierr); } /* Life goes on */ ierr = VecCopy(VVR[0], VRT); CHKERRQ(ierr); zeta = zeta0; ierr = KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit); CHKERRQ(ierr); for (k=0; k<maxit; k += bcgsl->ell) { ksp->its = k; ksp->rnorm = zeta; KSPLogResidualHistory(ksp, zeta); KSPMonitor(ksp, ksp->its, zeta); ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) break; /* BiCG part */ rho0 = -omega*rho0; nrm0 = zeta; for (j=0; j<bcgsl->ell; j++) { /* rho1 <- r_j' * r_tilde */ ierr = VecDot(VVR[j], VRT, &rho1); CHKERRQ(ierr); if (rho1 == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; bBombed = PETSC_TRUE; break; } beta = alpha*(rho1/rho0); rho0 = rho1; for (i=0; i<=j; i++) { /* u_i <- r_i - beta*u_i */ ierr = VecAYPX(VVU[i], -beta, VVR[i]); CHKERRQ(ierr); } /* u_{j+1} <- inv(K)*A*u_j */ ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM); CHKERRQ(ierr); ierr = VecDot(VVU[j+1], VRT, &sigma); CHKERRQ(ierr); if (sigma == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; bBombed = PETSC_TRUE; break; } alpha = rho1/sigma; /* x <- x + alpha*u_0 */ ierr = VecAXPY(VX, alpha, VVU[0]); CHKERRQ(ierr); for (i=0; i<=j; i++) { /* r_i <- r_i - alpha*u_{i+1} */ ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]); CHKERRQ(ierr); } /* r_{j+1} <- inv(K)*A*r_j */ ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM); CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &nrm0); CHKERRQ(ierr); if (bcgsl->delta>0.0) { if (rnmax_computed<nrm0) rnmax_computed = nrm0; if (rnmax_true<nrm0) rnmax_true = nrm0; } /* NEW: check for early exit */ ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectTakeAccess(ksp); CHKERRQ(ierr); ksp->its = k+j; ksp->rnorm = nrm0; ierr = PetscObjectGrantAccess(ksp); CHKERRQ(ierr); break; } } if (bBombed==PETSC_TRUE) break; /* Polynomial part */ for(i = 0; i <= bcgsl->ell; ++i) { ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]); CHKERRQ(ierr); } /* Symmetrize MZa */ for(i = 0; i <= bcgsl->ell; ++i) { for(j = i+1; j <= bcgsl->ell; ++j) { MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]); } } /* Copy MZa to MZb */ ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar)); CHKERRQ(ierr); if (!bcgsl->bConvex || bcgsl->ell==1) { PetscBLASInt ione = 1,bell = PetscBLASIntCast(bcgsl->ell); AY0c[0] = -1; LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr); if (ierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; bBombed = PETSC_TRUE; break; } ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr); } else { PetscBLASInt ione = 1; PetscScalar aone = 1.0, azero = 0.0; PetscBLASInt neqs = PetscBLASIntCast(bcgsl->ell-1); LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr); if (ierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; bBombed = PETSC_TRUE; break; } ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr); AY0c[0] = -1; AY0c[bcgsl->ell] = 0.; ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr); AYlc[0] = 0.; AYlc[bcgsl->ell] = -1; BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione); kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione); /* round-off can cause negative kappa's */ if (kappa0<0) kappa0 = -kappa0; kappa0 = sqrt(kappa0); kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione); BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione); kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione); if (kappa1<0) kappa1 = -kappa1; kappa1 = sqrt(kappa1); if (kappa0!=0.0 && kappa1!=0.0) { if (kappaA<0.7*kappa0*kappa1) { ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1; } else { ghat = kappaA/(kappa1*kappa1); } for (i=0; i<=bcgsl->ell; i++) { AY0c[i] = AY0c[i] - ghat* AYlc[i]; } } } omega = AY0c[bcgsl->ell]; for (h=bcgsl->ell; h>0 && omega==0.0; h--) { omega = AY0c[h]; } if (omega==0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; break; } ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR); CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) { AY0c[i] *= -1.0; } ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1); CHKERRQ(ierr); ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1); CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) { AY0c[i] *= -1.0; } ierr = VecNorm(VVR[0], NORM_2, &zeta); CHKERRQ(ierr); /* Accurate Update */ if (bcgsl->delta>0.0) { if (rnmax_computed<zeta) rnmax_computed = zeta; if (rnmax_true<zeta) rnmax_true = zeta; bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed); if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) { /* r0 <- b-inv(K)*A*X */ ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM); CHKERRQ(ierr); ierr = VecAYPX(VVR[0], -1.0, VB); CHKERRQ(ierr); rnmax_true = zeta; if (bUpdateX) { ierr = VecAXPY(VXR,1.0,VX); CHKERRQ(ierr); ierr = VecSet(VX,0.0); CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB); CHKERRQ(ierr); rnmax_computed = zeta; } } } } if (bcgsl->delta>0.0) { ierr = VecAXPY(VX,1.0,VXR); CHKERRQ(ierr); } ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }