static PetscErrorCode KSPPGMRESCycle(PetscInt *itcount,KSP ksp) { KSP_PGMRES *pgmres = (KSP_PGMRES*)(ksp->data); PetscReal res_norm,res,newnorm; PetscErrorCode ierr; PetscInt it = 0,j,k; PetscBool hapend = PETSC_FALSE; PetscFunctionBegin; if (itcount) *itcount = 0; ierr = VecNormalize(VEC_VV(0),&res_norm);CHKERRQ(ierr); res = res_norm; *RS(0) = res_norm; /* check for the convergence */ ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->rnorm = res; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); pgmres->it = it-2; ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr); if (!res) { ksp->reason = KSP_CONVERGED_ATOL; ierr = PetscInfo(ksp,"Converged due to zero residual norm on entry\n");CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); for (; !ksp->reason; it++) { Vec Zcur,Znext; if (pgmres->vv_allocated <= it + VEC_OFFSET + 1) { ierr = KSPGMRESGetNewVectors(ksp,it+1);CHKERRQ(ierr); } /* VEC_VV(it-1) is orthogonal, it will be normalized once the VecNorm arrives. */ Zcur = VEC_VV(it); /* Zcur is not yet orthogonal, but the VecMDot to orthogonalize it has been started. */ Znext = VEC_VV(it+1); /* This iteration will compute Znext, update with a deferred correction once we know how * Zcur relates to the previous vectors, and start the reduction to orthogonalize it. */ if (it < pgmres->max_k+1 && ksp->its+1 < PetscMax(2,ksp->max_it)) { /* We don't know whether what we have computed is enough, so apply the matrix. */ ierr = KSP_PCApplyBAorAB(ksp,Zcur,Znext,VEC_TEMP_MATOP);CHKERRQ(ierr); } if (it > 1) { /* Complete the pending reduction */ ierr = VecNormEnd(VEC_VV(it-1),NORM_2,&newnorm);CHKERRQ(ierr); *HH(it-1,it-2) = newnorm; } if (it > 0) { /* Finish the reduction computing the latest column of H */ ierr = VecMDotEnd(Zcur,it,&(VEC_VV(0)),HH(0,it-1));CHKERRQ(ierr); } if (it > 1) { /* normalize the base vector from two iterations ago, basis is complete up to here */ ierr = VecScale(VEC_VV(it-1),1./ *HH(it-1,it-2));CHKERRQ(ierr); ierr = KSPPGMRESUpdateHessenberg(ksp,it-2,&hapend,&res);CHKERRQ(ierr); pgmres->it = it-2; ksp->its++; ksp->rnorm = res; ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (it < pgmres->max_k+1 || ksp->reason || ksp->its == ksp->max_it) { /* Monitor if we are done or still iterating, but not before a restart. */ ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr); } if (ksp->reason) break; /* Catch error in happy breakdown and signal convergence and break from loop */ if (hapend) { if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res); else { ksp->reason = KSP_DIVERGED_BREAKDOWN; break; } } if (!(it < pgmres->max_k+1 && ksp->its < ksp->max_it)) break; /* The it-2 column of H was not scaled when we computed Zcur, apply correction */ ierr = VecScale(Zcur,1./ *HH(it-1,it-2));CHKERRQ(ierr); /* And Znext computed in this iteration was computed using the under-scaled Zcur */ ierr = VecScale(Znext,1./ *HH(it-1,it-2));CHKERRQ(ierr); /* In the previous iteration, we projected an unnormalized Zcur against the Krylov basis, so we need to fix the column of H resulting from that projection. */ for (k=0; k<it; k++) *HH(k,it-1) /= *HH(it-1,it-2); /* When Zcur was projected against the Krylov basis, VV(it-1) was still not normalized, so fix that too. This * column is complete except for HH(it,it-1) which we won't know until the next iteration. */ *HH(it-1,it-1) /= *HH(it-1,it-2); } if (it > 0) { PetscScalar *work; if (!pgmres->orthogwork) {ierr = PetscMalloc((pgmres->max_k + 2)*sizeof(PetscScalar),&pgmres->orthogwork);CHKERRQ(ierr);} work = pgmres->orthogwork; /* Apply correction computed by the VecMDot in the last iteration to Znext. The original form is * * Znext -= sum_{j=0}^{i-1} Z[j+1] * H[j,i-1] * * where * * Z[j] = sum_{k=0}^j V[k] * H[k,j-1] * * substituting * * Znext -= sum_{j=0}^{i-1} sum_{k=0}^{j+1} V[k] * H[k,j] * H[j,i-1] * * rearranging the iteration space from row-column to column-row * * Znext -= sum_{k=0}^i sum_{j=k-1}^{i-1} V[k] * H[k,j] * H[j,i-1] * * Note that column it-1 of HH is correct. For all previous columns, we must look at HES because HH has already * been transformed to upper triangular form. */ for (k=0; k<it+1; k++) { work[k] = 0; for (j=PetscMax(0,k-1); j<it-1; j++) work[k] -= *HES(k,j) * *HH(j,it-1); } ierr = VecMAXPY(Znext,it+1,work,&VEC_VV(0));CHKERRQ(ierr); ierr = VecAXPY(Znext,-*HH(it-1,it-1),Zcur);CHKERRQ(ierr); /* Orthogonalize Zcur against existing basis vectors. */ for (k=0; k<it; k++) work[k] = -*HH(k,it-1); ierr = VecMAXPY(Zcur,it,work,&VEC_VV(0));CHKERRQ(ierr); /* Zcur is now orthogonal, and will be referred to as VEC_VV(it) again, though it is still not normalized. */ /* Begin computing the norm of the new vector, will be normalized after the MatMult in the next iteration. */ ierr = VecNormBegin(VEC_VV(it),NORM_2,&newnorm);CHKERRQ(ierr); } /* Compute column of H (to the diagonal, but not the subdiagonal) to be able to orthogonalize the newest vector. */ ierr = VecMDotBegin(Znext,it+1,&VEC_VV(0),HH(0,it));CHKERRQ(ierr); /* Start an asynchronous split-mode reduction, the result of the MDot and Norm will be collected on the next iteration. */ ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Znext));CHKERRQ(ierr); } if (itcount) *itcount = it-1; /* Number of iterations actually completed. */ /* Down here we have to solve for the "best" coefficients of the Krylov columns, add the solution values together, and possibly unwind the preconditioning from the solution */ /* Form the solution (or the solution so far) */ ierr = KSPPGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-2);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_TCQMR(KSP ksp) { PetscReal rnorm0,rnorm,dp1,Gamma; PetscScalar theta,ep,cl1,sl1,cl,sl,sprod,tau_n1,f; PetscScalar deltmp,rho,beta,eptmp,ta,s,c,tau_n,delta; PetscScalar dp11,dp2,rhom1,alpha,tmp; PetscErrorCode ierr; PetscFunctionBegin; ksp->its = 0; ierr = KSPInitialResidual(ksp,x,u,v,r,b);CHKERRQ(ierr); ierr = VecNorm(r,NORM_2,&rnorm0);CHKERRQ(ierr); /* rnorm0 = ||r|| */ KSPCheckNorm(ksp,rnorm0); ierr = (*ksp->converged)(ksp,0,rnorm0,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); ierr = VecSet(um1,0.0);CHKERRQ(ierr); ierr = VecCopy(r,u);CHKERRQ(ierr); rnorm = rnorm0; tmp = 1.0/rnorm; ierr = VecScale(u,tmp);CHKERRQ(ierr); ierr = VecSet(vm1,0.0);CHKERRQ(ierr); ierr = VecCopy(u,v);CHKERRQ(ierr); ierr = VecCopy(u,v0);CHKERRQ(ierr); ierr = VecSet(pvec1,0.0);CHKERRQ(ierr); ierr = VecSet(pvec2,0.0);CHKERRQ(ierr); ierr = VecSet(p,0.0);CHKERRQ(ierr); theta = 0.0; ep = 0.0; cl1 = 0.0; sl1 = 0.0; cl = 0.0; sl = 0.0; sprod = 1.0; tau_n1= rnorm0; f = 1.0; Gamma = 1.0; rhom1 = 1.0; /* CALCULATE SQUARED LANCZOS vectors */ ierr = (*ksp->converged)(ksp,ksp->its,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); while (!ksp->reason) { ierr = KSPMonitor(ksp,ksp->its,rnorm);CHKERRQ(ierr); ksp->its++; ierr = KSP_PCApplyBAorAB(ksp,u,y,vtmp);CHKERRQ(ierr); /* y = A*u */ ierr = VecDot(y,v0,&dp11);CHKERRQ(ierr); KSPCheckDot(ksp,dp11); ierr = VecDot(u,v0,&dp2);CHKERRQ(ierr); alpha = dp11 / dp2; /* alpha = v0'*y/v0'*u */ deltmp = alpha; ierr = VecCopy(y,z);CHKERRQ(ierr); ierr = VecAXPY(z,-alpha,u);CHKERRQ(ierr); /* z = y - alpha u */ ierr = VecDot(u,v0,&rho);CHKERRQ(ierr); beta = rho / (f*rhom1); rhom1 = rho; ierr = VecCopy(z,utmp);CHKERRQ(ierr); /* up1 = (A-alpha*I)* (z-2*beta*p) + f*beta* beta*um1 */ ierr = VecAXPY(utmp,-2.0*beta,p);CHKERRQ(ierr); ierr = KSP_PCApplyBAorAB(ksp,utmp,up1,vtmp);CHKERRQ(ierr); ierr = VecAXPY(up1,-alpha,utmp);CHKERRQ(ierr); ierr = VecAXPY(up1,f*beta*beta,um1);CHKERRQ(ierr); ierr = VecNorm(up1,NORM_2,&dp1);CHKERRQ(ierr); KSPCheckNorm(ksp,dp1); f = 1.0 / dp1; ierr = VecScale(up1,f);CHKERRQ(ierr); ierr = VecAYPX(p,-beta,z);CHKERRQ(ierr); /* p = f*(z-beta*p) */ ierr = VecScale(p,f);CHKERRQ(ierr); ierr = VecCopy(u,um1);CHKERRQ(ierr); ierr = VecCopy(up1,u);CHKERRQ(ierr); beta = beta/Gamma; eptmp = beta; ierr = KSP_PCApplyBAorAB(ksp,v,vp1,vtmp);CHKERRQ(ierr); ierr = VecAXPY(vp1,-alpha,v);CHKERRQ(ierr); ierr = VecAXPY(vp1,-beta,vm1);CHKERRQ(ierr); ierr = VecNorm(vp1,NORM_2,&Gamma);CHKERRQ(ierr); KSPCheckNorm(ksp,Gamma); ierr = VecScale(vp1,1.0/Gamma);CHKERRQ(ierr); ierr = VecCopy(v,vm1);CHKERRQ(ierr); ierr = VecCopy(vp1,v);CHKERRQ(ierr); /* SOLVE Ax = b */ /* Apply last two Given's (Gl-1 and Gl) rotations to (beta,alpha,Gamma) */ if (ksp->its > 2) { theta = sl1*beta; eptmp = -cl1*beta; } if (ksp->its > 1) { ep = -cl*eptmp + sl*alpha; deltmp = -sl*eptmp - cl*alpha; } if (PetscAbsReal(Gamma) > PetscAbsScalar(deltmp)) { ta = -deltmp / Gamma; s = 1.0 / PetscSqrtScalar(1.0 + ta*ta); c = s*ta; } else { ta = -Gamma/deltmp; c = 1.0 / PetscSqrtScalar(1.0 + ta*ta); s = c*ta; } delta = -c*deltmp + s*Gamma; tau_n = -c*tau_n1; tau_n1 = -s*tau_n1; ierr = VecCopy(vm1,pvec);CHKERRQ(ierr); ierr = VecAXPY(pvec,-theta,pvec2);CHKERRQ(ierr); ierr = VecAXPY(pvec,-ep,pvec1);CHKERRQ(ierr); ierr = VecScale(pvec,1.0/delta);CHKERRQ(ierr); ierr = VecAXPY(x,tau_n,pvec);CHKERRQ(ierr); cl1 = cl; sl1 = sl; cl = c; sl = s; ierr = VecCopy(pvec1,pvec2);CHKERRQ(ierr); ierr = VecCopy(pvec,pvec1);CHKERRQ(ierr); /* Compute the upper bound on the residual norm r (See QMR paper p. 13) */ sprod = sprod*PetscAbsScalar(s); rnorm = rnorm0 * PetscSqrtReal((PetscReal)ksp->its+2.0) * PetscRealPart(sprod); ierr = (*ksp->converged)(ksp,ksp->its,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->its >= ksp->max_it) { if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; break; } } ierr = KSPMonitor(ksp,ksp->its,rnorm);CHKERRQ(ierr); ierr = KSPUnwindPreconditioner(ksp,x,vtmp);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode KSPSolve_Richardson(KSP ksp) { PetscErrorCode ierr; PetscInt i,maxit; PetscReal rnorm = 0.0,abr; PetscScalar scale,rdot; Vec x,b,r,z,w = NULL,y = NULL; PetscInt xs, ws; Mat Amat,Pmat; KSP_Richardson *richardsonP = (KSP_Richardson*)ksp->data; PetscBool exists,diagonalscale; PetscFunctionBegin; ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); ksp->its = 0; ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); x = ksp->vec_sol; b = ksp->vec_rhs; ierr = VecGetSize(x,&xs);CHKERRQ(ierr); ierr = VecGetSize(ksp->work[0],&ws);CHKERRQ(ierr); if (xs != ws) { if (richardsonP->selfscale) { ierr = KSPSetWorkVecs(ksp,4);CHKERRQ(ierr); } else { ierr = KSPSetWorkVecs(ksp,2);CHKERRQ(ierr); } } r = ksp->work[0]; z = ksp->work[1]; if (richardsonP->selfscale) { w = ksp->work[2]; y = ksp->work[3]; } maxit = ksp->max_it; /* if user has provided fast Richardson code use that */ ierr = PCApplyRichardsonExists(ksp->pc,&exists);CHKERRQ(ierr); if (exists && !ksp->numbermonitors && !ksp->transpose_solve & !ksp->nullsp) { PCRichardsonConvergedReason reason; ierr = PCApplyRichardson(ksp->pc,b,x,r,ksp->rtol,ksp->abstol,ksp->divtol,maxit,ksp->guess_zero,&ksp->its,&reason);CHKERRQ(ierr); ksp->reason = (KSPConvergedReason)reason; PetscFunctionReturn(0); } scale = richardsonP->scale; if (!ksp->guess_zero) { /* r <- b - A x */ ierr = KSP_MatMult(ksp,Amat,x,r);CHKERRQ(ierr); ierr = VecAYPX(r,-1.0,b);CHKERRQ(ierr); } else { ierr = VecCopy(b,r);CHKERRQ(ierr); } ksp->its = 0; if (richardsonP->selfscale) { ierr = KSP_PCApply(ksp,r,z);CHKERRQ(ierr); /* z <- B r */ for (i=0; i<maxit; i++) { if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) { ierr = VecNorm(r,NORM_2,&rnorm);CHKERRQ(ierr); /* rnorm <- r'*r */ ierr = KSPMonitor(ksp,i,rnorm);CHKERRQ(ierr); ksp->rnorm = rnorm; ierr = KSPLogResidualHistory(ksp,rnorm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; } else if (ksp->normtype == KSP_NORM_PRECONDITIONED) { ierr = VecNorm(z,NORM_2,&rnorm);CHKERRQ(ierr); /* rnorm <- z'*z */ ierr = KSPMonitor(ksp,i,rnorm);CHKERRQ(ierr); ksp->rnorm = rnorm; ierr = KSPLogResidualHistory(ksp,rnorm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; } ierr = KSP_PCApplyBAorAB(ksp,z,y,w);CHKERRQ(ierr); /* y = BAz = BABr */ ierr = VecDotNorm2(z,y,&rdot,&abr);CHKERRQ(ierr); /* rdot = (Br)^T(BABR); abr = (BABr)^T (BABr) */ scale = rdot/abr; ierr = PetscInfo1(ksp,"Self-scale factor %g\n",(double)PetscRealPart(scale));CHKERRQ(ierr); ierr = VecAXPY(x,scale,z);CHKERRQ(ierr); /* x <- x + scale z */ ierr = VecAXPY(r,-scale,w);CHKERRQ(ierr); /* r <- r - scale*Az */ ierr = VecAXPY(z,-scale,y);CHKERRQ(ierr); /* z <- z - scale*y */ ksp->its++; } } else { for (i=0; i<maxit; i++) { if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) { ierr = VecNorm(r,NORM_2,&rnorm);CHKERRQ(ierr); /* rnorm <- r'*r */ ierr = KSPMonitor(ksp,i,rnorm);CHKERRQ(ierr); ksp->rnorm = rnorm; ierr = KSPLogResidualHistory(ksp,rnorm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; } ierr = KSP_PCApply(ksp,r,z);CHKERRQ(ierr); /* z <- B r */ if (ksp->normtype == KSP_NORM_PRECONDITIONED) { ierr = VecNorm(z,NORM_2,&rnorm);CHKERRQ(ierr); /* rnorm <- z'*z */ ierr = KSPMonitor(ksp,i,rnorm);CHKERRQ(ierr); ksp->rnorm = rnorm; ierr = KSPLogResidualHistory(ksp,rnorm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; } ierr = VecAXPY(x,scale,z);CHKERRQ(ierr); /* x <- x + scale z */ ksp->its++; if (i+1 < maxit || ksp->normtype != KSP_NORM_NONE) { ierr = KSP_MatMult(ksp,Amat,x,r);CHKERRQ(ierr); /* r <- b - Ax */ ierr = VecAYPX(r,-1.0,b);CHKERRQ(ierr); } } } if (!ksp->reason) { if (ksp->normtype != KSP_NORM_NONE) { if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) { ierr = VecNorm(r,NORM_2,&rnorm);CHKERRQ(ierr); /* rnorm <- r'*r */ } else { ierr = KSP_PCApply(ksp,r,z);CHKERRQ(ierr); /* z <- B r */ ierr = VecNorm(z,NORM_2,&rnorm);CHKERRQ(ierr); /* rnorm <- z'*z */ } ksp->rnorm = rnorm; ierr = KSPLogResidualHistory(ksp,rnorm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i,rnorm);CHKERRQ(ierr); } if (ksp->its >= ksp->max_it) { if (ksp->normtype != KSP_NORM_NONE) { ierr = (*ksp->converged)(ksp,i,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; } else { ksp->reason = KSP_CONVERGED_ITS; } } } PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_CGS(KSP ksp) { PetscErrorCode ierr; PetscInt i; PetscScalar rho,rhoold,a,s,b; Vec X,B,V,P,R,RP,T,Q,U,AUQ; PetscReal dp = 0.0; PetscBool diagonalscale; PetscFunctionBegin; /* not sure what residual norm it does use, should use for right preconditioning */ ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); X = ksp->vec_sol; B = ksp->vec_rhs; R = ksp->work[0]; RP = ksp->work[1]; V = ksp->work[2]; T = ksp->work[3]; Q = ksp->work[4]; P = ksp->work[5]; U = ksp->work[6]; AUQ = V; /* Compute initial preconditioned residual */ ierr = KSPInitialResidual(ksp,X,V,T,R,B);CHKERRQ(ierr); /* Test for nothing to do */ ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); if (ksp->normtype == KSP_NORM_NATURAL) dp *= dp; ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = dp; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,dp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); /* Make the initial Rp == R */ ierr = VecCopy(R,RP);CHKERRQ(ierr); /* added for Fidap */ /* Penalize Startup - Isaac Hasbani Trick for CGS Since most initial conditions result in a mostly 0 residual, we change all the 0 values in the vector RP to the maximum. */ if (ksp->normtype == KSP_NORM_NATURAL) { PetscReal vr0max; PetscScalar *tmp_RP=0; PetscInt numnp =0, *max_pos=0; ierr = VecMax(RP, max_pos, &vr0max);CHKERRQ(ierr); ierr = VecGetArray(RP, &tmp_RP);CHKERRQ(ierr); ierr = VecGetLocalSize(RP, &numnp);CHKERRQ(ierr); for (i=0; i<numnp; i++) { if (tmp_RP[i] == 0.0) tmp_RP[i] = vr0max; } ierr = VecRestoreArray(RP, &tmp_RP);CHKERRQ(ierr); } /* end of addition for Fidap */ /* Set the initial conditions */ ierr = VecDot(R,RP,&rhoold);CHKERRQ(ierr); /* rhoold = (r,rp) */ ierr = VecCopy(R,U);CHKERRQ(ierr); ierr = VecCopy(R,P);CHKERRQ(ierr); ierr = KSP_PCApplyBAorAB(ksp,P,V,T);CHKERRQ(ierr); i = 0; do { ierr = VecDot(V,RP,&s);CHKERRQ(ierr); /* s <- (v,rp) */ a = rhoold / s; /* a <- rho / s */ ierr = VecWAXPY(Q,-a,V,U);CHKERRQ(ierr); /* q <- u - a v */ ierr = VecWAXPY(T,1.0,U,Q);CHKERRQ(ierr); /* t <- u + q */ ierr = VecAXPY(X,a,T);CHKERRQ(ierr); /* x <- x + a (u + q) */ ierr = KSP_PCApplyBAorAB(ksp,T,AUQ,U);CHKERRQ(ierr); ierr = VecAXPY(R,-a,AUQ);CHKERRQ(ierr); /* r <- r - a K (u + q) */ ierr = VecDot(R,RP,&rho);CHKERRQ(ierr); /* rho <- (r,rp) */ if (ksp->normtype == KSP_NORM_NATURAL) { dp = PetscAbsScalar(rho); } else { ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); } ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its++; ksp->rnorm = dp; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i+1,dp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; b = rho / rhoold; /* b <- rho / rhoold */ ierr = VecWAXPY(U,b,Q,R);CHKERRQ(ierr); /* u <- r + b q */ ierr = VecAXPY(Q,b,P);CHKERRQ(ierr); ierr = VecWAXPY(P,b,Q,U);CHKERRQ(ierr); /* p <- u + b(q + b p) */ ierr = KSP_PCApplyBAorAB(ksp,P,V,Q);CHKERRQ(ierr); /* v <- K p */ rhoold = rho; i++; } while (i<ksp->max_it); if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; ierr = KSPUnwindPreconditioner(ksp,X,T);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_BCGSL(KSP ksp) { KSP_BCGSL *bcgsl = (KSP_BCGSL*) ksp->data; PetscScalar alpha, beta, omega, sigma; PetscScalar rho0, rho1; PetscReal kappa0, kappaA, kappa1; PetscReal ghat; PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0; PetscBool bUpdateX; PetscInt maxit; PetscInt h, i, j, k, vi, ell; PetscBLASInt ldMZ,bierr; PetscScalar utb; PetscReal max_s, pinv_tol; PetscErrorCode ierr; PetscFunctionBegin; /* set up temporary vectors */ vi = 0; ell = bcgsl->ell; bcgsl->vB = ksp->work[vi]; vi++; bcgsl->vRt = ksp->work[vi]; vi++; bcgsl->vTm = ksp->work[vi]; vi++; bcgsl->vvR = ksp->work+vi; vi += ell+1; bcgsl->vvU = ksp->work+vi; vi += ell+1; bcgsl->vXr = ksp->work[vi]; vi++; ierr = PetscBLASIntCast(ell+1,&ldMZ);CHKERRQ(ierr); /* Prime the iterative solver */ ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &zeta0);CHKERRQ(ierr); rnmax_computed = zeta0; rnmax_true = zeta0; ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = zeta0; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = VecSet(VVU[0],0.0);CHKERRQ(ierr); alpha = 0.; rho0 = omega = 1; if (bcgsl->delta>0.0) { ierr = VecCopy(VX, VXR);CHKERRQ(ierr); ierr = VecSet(VX,0.0);CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr); } else { ierr = VecCopy(ksp->vec_rhs, VB);CHKERRQ(ierr); } /* Life goes on */ ierr = VecCopy(VVR[0], VRT);CHKERRQ(ierr); zeta = zeta0; ierr = KSPGetTolerances(ksp, NULL, NULL, NULL, &maxit);CHKERRQ(ierr); for (k=0; k<maxit; k += bcgsl->ell) { ksp->its = k; ksp->rnorm = zeta; ierr = KSPLogResidualHistory(ksp, zeta);CHKERRQ(ierr); ierr = KSPMonitor(ksp, ksp->its, zeta);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason < 0) PetscFunctionReturn(0); else if (ksp->reason) break; /* BiCG part */ rho0 = -omega*rho0; nrm0 = zeta; for (j=0; j<bcgsl->ell; j++) { /* rho1 <- r_j' * r_tilde */ ierr = VecDot(VVR[j], VRT, &rho1);CHKERRQ(ierr); if (rho1 == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; PetscFunctionReturn(0); } beta = alpha*(rho1/rho0); rho0 = rho1; for (i=0; i<=j; i++) { /* u_i <- r_i - beta*u_i */ ierr = VecAYPX(VVU[i], -beta, VVR[i]);CHKERRQ(ierr); } /* u_{j+1} <- inv(K)*A*u_j */ ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);CHKERRQ(ierr); ierr = VecDot(VVU[j+1], VRT, &sigma);CHKERRQ(ierr); if (sigma == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; PetscFunctionReturn(0); } alpha = rho1/sigma; /* x <- x + alpha*u_0 */ ierr = VecAXPY(VX, alpha, VVU[0]);CHKERRQ(ierr); for (i=0; i<=j; i++) { /* r_i <- r_i - alpha*u_{i+1} */ ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);CHKERRQ(ierr); } /* r_{j+1} <- inv(K)*A*r_j */ ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &nrm0);CHKERRQ(ierr); if (bcgsl->delta>0.0) { if (rnmax_computed<nrm0) rnmax_computed = nrm0; if (rnmax_true<nrm0) rnmax_true = nrm0; } /* NEW: check for early exit */ ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = k+j; ksp->rnorm = nrm0; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); if (ksp->reason < 0) PetscFunctionReturn(0); } } /* Polynomial part */ for (i = 0; i <= bcgsl->ell; ++i) { ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);CHKERRQ(ierr); } /* Symmetrize MZa */ for (i = 0; i <= bcgsl->ell; ++i) { for (j = i+1; j <= bcgsl->ell; ++j) { MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]); } } /* Copy MZa to MZb */ ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));CHKERRQ(ierr); if (!bcgsl->bConvex || bcgsl->ell==1) { PetscBLASInt ione = 1,bell; ierr = PetscBLASIntCast(bcgsl->ell,&bell);CHKERRQ(ierr); AY0c[0] = -1; if (bcgsl->pinv) { #if defined(PETSC_MISSING_LAPACK_GESVD) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable."); #else # if defined(PETSC_USE_COMPLEX) PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,bcgsl->realwork,&bierr)); # else PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,&bierr)); # endif #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } /* Apply pseudo-inverse */ max_s = bcgsl->s[0]; for (i=1; i<bell; i++) { if (bcgsl->s[i] > max_s) { max_s = bcgsl->s[i]; } } /* tolerance is hardwired to bell*max(s)*PETSC_MACHINE_EPSILON */ pinv_tol = bell*max_s*PETSC_MACHINE_EPSILON; ierr = PetscMemzero(&AY0c[1],bell*sizeof(PetscScalar));CHKERRQ(ierr); for (i=0; i<bell; i++) { if (bcgsl->s[i] >= pinv_tol) { utb=0.; for (j=0; j<bell; j++) { utb += MZb[1+j]*bcgsl->u[i*bell+j]; } for (j=0; j<bell; j++) { AY0c[1+j] += utb/bcgsl->s[i]*bcgsl->v[j*bell+i]; } } } } else { #if defined(PETSC_MISSING_LAPACK_POTRF) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable."); #else PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr)); #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr)); } } else { PetscBLASInt ione = 1; PetscScalar aone = 1.0, azero = 0.0; PetscBLASInt neqs; ierr = PetscBLASIntCast(bcgsl->ell-1,&neqs);CHKERRQ(ierr); #if defined(PETSC_MISSING_LAPACK_POTRF) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable."); #else PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr)); #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr)); AY0c[0] = -1; AY0c[bcgsl->ell] = 0.; ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr)); AYlc[0] = 0.; AYlc[bcgsl->ell] = -1; PetscStackCall("BLASgemv",BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione)); kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione)); /* round-off can cause negative kappa's */ if (kappa0<0) kappa0 = -kappa0; kappa0 = PetscSqrtReal(kappa0); kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione)); PetscStackCall("BLASgemv",BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione)); kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione)); if (kappa1<0) kappa1 = -kappa1; kappa1 = PetscSqrtReal(kappa1); if (kappa0!=0.0 && kappa1!=0.0) { if (kappaA<0.7*kappa0*kappa1) { ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1; } else { ghat = kappaA/(kappa1*kappa1); } for (i=0; i<=bcgsl->ell; i++) { AY0c[i] = AY0c[i] - ghat* AYlc[i]; } } } omega = AY0c[bcgsl->ell]; for (h=bcgsl->ell; h>0 && omega==0.0; h--) omega = AY0c[h]; if (omega==0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0; ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);CHKERRQ(ierr); ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0; ierr = VecNorm(VVR[0], NORM_2, &zeta);CHKERRQ(ierr); /* Accurate Update */ if (bcgsl->delta>0.0) { if (rnmax_computed<zeta) rnmax_computed = zeta; if (rnmax_true<zeta) rnmax_true = zeta; bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed); if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) { /* r0 <- b-inv(K)*A*X */ ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);CHKERRQ(ierr); ierr = VecAYPX(VVR[0], -1.0, VB);CHKERRQ(ierr); rnmax_true = zeta; if (bUpdateX) { ierr = VecAXPY(VXR,1.0,VX);CHKERRQ(ierr); ierr = VecSet(VX,0.0);CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr); rnmax_computed = zeta; } } } } if (bcgsl->delta>0.0) { ierr = VecAXPY(VX,1.0,VXR);CHKERRQ(ierr); } ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }
PetscErrorCode KSPGMRESCycle(PetscInt *itcount,KSP ksp) { KSP_GMRES *gmres = (KSP_GMRES*)(ksp->data); PetscReal res_norm,res,hapbnd,tt; PetscErrorCode ierr; PetscInt it = 0, max_k = gmres->max_k; PetscBool hapend = PETSC_FALSE; PetscFunctionBegin; ierr = VecNormalize(VEC_VV(0),&res_norm);CHKERRQ(ierr); res = res_norm; *GRS(0) = res_norm; /* check for the convergence */ ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->rnorm = res; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); gmres->it = (it - 1); ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr); if (!res) { if (itcount) *itcount = 0; ksp->reason = KSP_CONVERGED_ATOL; ierr = PetscInfo(ksp,"Converged due to zero residual norm on entry\n");CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) { if (it) { ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr); } gmres->it = (it - 1); if (gmres->vv_allocated <= it + VEC_OFFSET + 1) { ierr = KSPGMRESGetNewVectors(ksp,it+1);CHKERRQ(ierr); } ierr = KSP_PCApplyBAorAB(ksp,VEC_VV(it),VEC_VV(1+it),VEC_TEMP_MATOP);CHKERRQ(ierr); /* update hessenberg matrix and do Gram-Schmidt */ ierr = (*gmres->orthog)(ksp,it);CHKERRQ(ierr); /* vv(i+1) . vv(i+1) */ ierr = VecNormalize(VEC_VV(it+1),&tt);CHKERRQ(ierr); /* save the magnitude */ *HH(it+1,it) = tt; *HES(it+1,it) = tt; /* check for the happy breakdown */ hapbnd = PetscAbsScalar(tt / *GRS(it)); if (hapbnd > gmres->haptol) hapbnd = gmres->haptol; if (tt < hapbnd) { ierr = PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %14.12e tt = %14.12e\n",(double)hapbnd,(double)tt);CHKERRQ(ierr); hapend = PETSC_TRUE; } ierr = KSPGMRESUpdateHessenberg(ksp,it,hapend,&res);CHKERRQ(ierr); it++; gmres->it = (it-1); /* For converged */ ksp->its++; ksp->rnorm = res; if (ksp->reason) break; ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); /* Catch error in happy breakdown and signal convergence and break from loop */ if (hapend) { if (!ksp->reason) { if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res); else { ksp->reason = KSP_DIVERGED_BREAKDOWN; break; } } } } /* Monitor if we know that we will not return for a restart */ if (it && (ksp->reason || ksp->its >= ksp->max_it)) { ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr); } if (itcount) *itcount = it; /* Down here we have to solve for the "best" coefficients of the Krylov columns, add the solution values together, and possibly unwind the preconditioning from the solution */ /* Form the solution (or the solution so far) */ ierr = KSPGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_IBCGS(KSP ksp) { PetscErrorCode ierr; PetscInt i,N; PetscReal rnorm,rnormin = 0.0; #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) /* Because of possible instabilities in the algorithm (as indicated by different residual histories for the same problem on the same number of processes with different runs) we support computing the inner products using Intel's 80 bit arithematic rather than just 64 bit. Thus we copy our double precision values into long doubles (hoping this keeps the 16 extra bits) and tell MPI to do its ALlreduces with MPI_LONG_DOUBLE. Note for developers that does not effect the code. Intel's long double is implemented by storing the 80 bits of extended double precision into a 16 byte space (the rest of the space is ignored) */ long double insums[7],outsums[7]; #else PetscScalar insums[7],outsums[7]; #endif PetscScalar sigman_2, sigman_1, sigman, pin_1, pin, phin_1, phin,tmp1,tmp2; PetscScalar taun_1, taun, rhon, alphan_1, alphan, omegan_1, omegan; const PetscScalar *PETSC_RESTRICT r0, *PETSC_RESTRICT f0, *PETSC_RESTRICT qn, *PETSC_RESTRICT b, *PETSC_RESTRICT un; PetscScalar *PETSC_RESTRICT rn, *PETSC_RESTRICT xn, *PETSC_RESTRICT vn, *PETSC_RESTRICT zn; /* the rest do not have to keep n_1 values */ PetscScalar kappan, thetan, etan, gamman, betan, deltan; const PetscScalar *PETSC_RESTRICT tn; PetscScalar *PETSC_RESTRICT sn; Vec R0,Rn,Xn,F0,Vn,Zn,Qn,Tn,Sn,B,Un; Mat A; PetscFunctionBegin; if (!ksp->vec_rhs->petscnative) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"Only coded for PETSc vectors"); ierr = PCGetOperators(ksp->pc,&A,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); ierr = VecGetLocalSize(ksp->vec_sol,&N);CHKERRQ(ierr); Xn = ksp->vec_sol;ierr = VecGetArray(Xn_1,(PetscScalar**)&xn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Xn_1,PETSC_NULL);CHKERRQ(ierr); B = ksp->vec_rhs;ierr = VecGetArrayRead(B,(const PetscScalar**)&b);ierr = VecRestoreArrayRead(B,PETSC_NULL);CHKERRQ(ierr); R0 = ksp->work[0];ierr = VecGetArrayRead(R0,(const PetscScalar**)&r0);CHKERRQ(ierr);ierr = VecRestoreArrayRead(R0,PETSC_NULL);CHKERRQ(ierr); Rn = ksp->work[1];ierr = VecGetArray(Rn_1,(PetscScalar**)&rn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Rn_1,PETSC_NULL);CHKERRQ(ierr); Un = ksp->work[2];ierr = VecGetArrayRead(Un_1,(const PetscScalar**)&un_1);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Un_1,PETSC_NULL);CHKERRQ(ierr); F0 = ksp->work[3];ierr = VecGetArrayRead(F0,(const PetscScalar**)&f0);CHKERRQ(ierr);ierr = VecRestoreArrayRead(F0,PETSC_NULL);CHKERRQ(ierr); Vn = ksp->work[4];ierr = VecGetArray(Vn_1,(PetscScalar**)&vn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Vn_1,PETSC_NULL);CHKERRQ(ierr); Zn = ksp->work[5];ierr = VecGetArray(Zn_1,(PetscScalar**)&zn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Zn_1,PETSC_NULL);CHKERRQ(ierr); Qn = ksp->work[6];ierr = VecGetArrayRead(Qn_1,(const PetscScalar**)&qn_1);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Qn_1,PETSC_NULL);CHKERRQ(ierr); Tn = ksp->work[7];ierr = VecGetArrayRead(Tn,(const PetscScalar**)&tn);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Tn,PETSC_NULL);CHKERRQ(ierr); Sn = ksp->work[8];ierr = VecGetArrayRead(Sn,(const PetscScalar**)&sn);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Sn,PETSC_NULL);CHKERRQ(ierr); /* r0 = rn_1 = b - A*xn_1; */ /* ierr = KSP_PCApplyBAorAB(ksp,Xn_1,Rn_1,Tn);CHKERRQ(ierr); ierr = VecAYPX(Rn_1,-1.0,B);CHKERRQ(ierr); */ ierr = KSPInitialResidual(ksp,Xn_1,Tn,Sn,Rn_1,B);CHKERRQ(ierr); ierr = VecNorm(Rn_1,NORM_2,&rnorm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,rnorm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,0,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); ierr = VecCopy(Rn_1,R0);CHKERRQ(ierr); /* un_1 = A*rn_1; */ ierr = KSP_PCApplyBAorAB(ksp,Rn_1,Un_1,Tn);CHKERRQ(ierr); /* f0 = A'*rn_1; */ if (ksp->pc_side == PC_RIGHT) { /* B' A' */ ierr = MatMultTranspose(A,R0,Tn);CHKERRQ(ierr); ierr = PCApplyTranspose(ksp->pc,Tn,F0);CHKERRQ(ierr); } else if (ksp->pc_side == PC_LEFT) { /* A' B' */ ierr = PCApplyTranspose(ksp->pc,R0,Tn);CHKERRQ(ierr); ierr = MatMultTranspose(A,Tn,F0);CHKERRQ(ierr); } /*qn_1 = vn_1 = zn_1 = 0.0; */ ierr = VecSet(Qn_1,0.0);CHKERRQ(ierr); ierr = VecSet(Vn_1,0.0);CHKERRQ(ierr); ierr = VecSet(Zn_1,0.0);CHKERRQ(ierr); sigman_2 = pin_1 = taun_1 = 0.0; /* the paper says phin_1 should be initialized to zero, it is actually R0'R0 */ ierr = VecDot(R0,R0,&phin_1);CHKERRQ(ierr); /* sigman_1 = rn_1'un_1 */ ierr = VecDot(R0,Un_1,&sigman_1);CHKERRQ(ierr); alphan_1 = omegan_1 = 1.0; for (ksp->its = 1; ksp->its<ksp->max_it+1; ksp->its++) { rhon = phin_1 - omegan_1*sigman_2 + omegan_1*alphan_1*pin_1; /* if (rhon == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"rhon is zero, iteration %D",n); */ if (ksp->its == 1) deltan = rhon; else deltan = rhon/taun_1; betan = deltan/omegan_1; taun = sigman_1 + betan*taun_1 - deltan*pin_1; if (taun == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"taun is zero, iteration %D",ksp->its); alphan = rhon/taun; ierr = PetscLogFlops(15.0); /* zn = alphan*rn_1 + (alphan/alphan_1)betan*zn_1 - alphan*deltan*vn_1 vn = un_1 + betan*vn_1 - deltan*qn_1 sn = rn_1 - alphan*vn The algorithm in the paper is missing the alphan/alphan_1 term in the zn update */ ierr = PetscLogEventBegin(VEC_Ops,0,0,0,0);CHKERRQ(ierr); tmp1 = (alphan/alphan_1)*betan; tmp2 = alphan*deltan; for (i=0; i<N; i++) { zn[i] = alphan*rn_1[i] + tmp1*zn_1[i] - tmp2*vn_1[i]; vn[i] = un_1[i] + betan*vn_1[i] - deltan*qn_1[i]; sn[i] = rn_1[i] - alphan*vn[i]; } ierr = PetscLogFlops(3.0+11.0*N); ierr = PetscLogEventEnd(VEC_Ops,0,0,0,0);CHKERRQ(ierr); /* qn = A*vn */ ierr = KSP_PCApplyBAorAB(ksp,Vn,Qn,Tn);CHKERRQ(ierr); /* tn = un_1 - alphan*qn */ ierr = VecWAXPY(Tn,-alphan,Qn,Un_1);CHKERRQ(ierr); /* phin = r0'sn pin = r0'qn gamman = f0'sn etan = f0'tn thetan = sn'tn kappan = tn'tn */ ierr = PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); phin = pin = gamman = etan = thetan = kappan = 0.0; for (i=0; i<N; i++) { phin += r0[i]*sn[i]; pin += r0[i]*qn[i]; gamman += f0[i]*sn[i]; etan += f0[i]*tn[i]; thetan += sn[i]*tn[i]; kappan += tn[i]*tn[i]; } ierr = PetscLogFlops(12.0*N); ierr = PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); insums[0] = phin; insums[1] = pin; insums[2] = gamman; insums[3] = etan; insums[4] = thetan; insums[5] = kappan; insums[6] = rnormin; ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) if (ksp->lagnorm && ksp->its > 1) { ierr = MPI_Allreduce(insums,outsums,7,MPI_LONG_DOUBLE,MPI_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); } else { ierr = MPI_Allreduce(insums,outsums,6,MPI_LONG_DOUBLE,MPI_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); } #else if (ksp->lagnorm && ksp->its > 1) { ierr = MPI_Allreduce(insums,outsums,7,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); } else { ierr = MPI_Allreduce(insums,outsums,6,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); } #endif ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); phin = outsums[0]; pin = outsums[1]; gamman = outsums[2]; etan = outsums[3]; thetan = outsums[4]; kappan = outsums[5]; if (ksp->lagnorm && ksp->its > 1) rnorm = PetscSqrtReal(PetscRealPart(outsums[6])); if (kappan == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"kappan is zero, iteration %D",ksp->its); if (thetan == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"thetan is zero, iteration %D",ksp->its); omegan = thetan/kappan; sigman = gamman - omegan*etan; /* rn = sn - omegan*tn xn = xn_1 + zn + omegan*sn */ ierr = PetscLogEventBegin(VEC_Ops,0,0,0,0);CHKERRQ(ierr); rnormin = 0.0; for (i=0; i<N; i++) { rn[i] = sn[i] - omegan*tn[i]; rnormin += PetscRealPart(PetscConj(rn[i])*rn[i]); xn[i] += zn[i] + omegan*sn[i]; } ierr = PetscObjectStateIncrease((PetscObject)Xn);CHKERRQ(ierr); ierr = PetscLogFlops(7.0*N); ierr = PetscLogEventEnd(VEC_Ops,0,0,0,0);CHKERRQ(ierr); if (!ksp->lagnorm && ksp->chknorm < ksp->its) { ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&rnormin,&rnorm,1,MPIU_REAL,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); rnorm = PetscSqrtReal(rnorm); } /* Test for convergence */ ierr = KSPMonitor(ksp,ksp->its,rnorm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,ksp->its,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; /* un = A*rn */ ierr = KSP_PCApplyBAorAB(ksp,Rn,Un,Tn);CHKERRQ(ierr); /* Update n-1 locations with n locations */ sigman_2 = sigman_1; sigman_1 = sigman; pin_1 = pin; phin_1 = phin; alphan_1 = alphan; taun_1 = taun; omegan_1 = omegan; } if (ksp->its >= ksp->max_it) { ksp->reason = KSP_DIVERGED_ITS; } ierr = KSPUnwindPreconditioner(ksp,Xn,Tn);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode KSPLGMRESCycle(PetscInt *itcount,KSP ksp) { KSP_LGMRES *lgmres = (KSP_LGMRES*)(ksp->data); PetscReal res_norm, res; PetscReal hapbnd, tt; PetscScalar tmp; PetscBool hapend = PETSC_FALSE; /* indicates happy breakdown ending */ PetscErrorCode ierr; PetscInt loc_it; /* local count of # of dir. in Krylov space */ PetscInt max_k = lgmres->max_k; /* max approx space size */ PetscInt max_it = ksp->max_it; /* max # of overall iterations for the method */ /* LGMRES_MOD - new variables*/ PetscInt aug_dim = lgmres->aug_dim; PetscInt spot = 0; PetscInt order = 0; PetscInt it_arnoldi; /* number of arnoldi steps to take */ PetscInt it_total; /* total number of its to take (=approx space size)*/ PetscInt ii, jj; PetscReal tmp_norm; PetscScalar inv_tmp_norm; PetscScalar *avec; PetscFunctionBegin; /* Number of pseudo iterations since last restart is the number of prestart directions */ loc_it = 0; /* LGMRES_MOD: determine number of arnoldi steps to take */ /* if approx_constant then we keep the space the same size even if we don't have the full number of aug vectors yet*/ if (lgmres->approx_constant) it_arnoldi = max_k - lgmres->aug_ct; else it_arnoldi = max_k - aug_dim; it_total = it_arnoldi + lgmres->aug_ct; /* initial residual is in VEC_VV(0) - compute its norm*/ ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr); res = res_norm; /* first entry in right-hand-side of hessenberg system is just the initial residual norm */ *GRS(0) = res_norm; /* check for the convergence */ if (!res) { if (itcount) *itcount = 0; ksp->reason = KSP_CONVERGED_ATOL; ierr = PetscInfo(ksp,"Converged due to zero residual norm on entry\n");CHKERRQ(ierr); PetscFunctionReturn(0); } /* scale VEC_VV (the initial residual) */ tmp = 1.0/res_norm; ierr = VecScale(VEC_VV(0),tmp);CHKERRQ(ierr); ksp->rnorm = res; /* note: (lgmres->it) is always set one less than (loc_it) It is used in KSPBUILDSolution_LGMRES, where it is passed to KSPLGMRESBuildSoln. Note that when KSPLGMRESBuildSoln is called from this function, (loc_it -1) is passed, so the two are equivalent */ lgmres->it = (loc_it - 1); /* MAIN ITERATION LOOP BEGINNING*/ /* keep iterating until we have converged OR generated the max number of directions OR reached the max number of iterations for the method */ ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); while (!ksp->reason && loc_it < it_total && ksp->its < max_it) { /* LGMRES_MOD: changed to it_total */ ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr); lgmres->it = (loc_it - 1); ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr); /* see if more space is needed for work vectors */ if (lgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) { ierr = KSPLGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr); /* (loc_it+1) is passed in as number of the first vector that should be allocated */ } /*LGMRES_MOD: decide whether this is an arnoldi step or an aug step */ if (loc_it < it_arnoldi) { /* Arnoldi */ ierr = KSP_PCApplyBAorAB(ksp,VEC_VV(loc_it),VEC_VV(1+loc_it),VEC_TEMP_MATOP);CHKERRQ(ierr); } else { /*aug step */ order = loc_it - it_arnoldi + 1; /* which aug step */ for (ii=0; ii<aug_dim; ii++) { if (lgmres->aug_order[ii] == order) { spot = ii; break; /* must have this because there will be duplicates before aug_ct = aug_dim */ } } ierr = VecCopy(A_AUGVEC(spot), VEC_VV(1+loc_it));CHKERRQ(ierr); /*note: an alternate implementation choice would be to only save the AUGVECS and not A_AUGVEC and then apply the PC here to the augvec */ } /* update hessenberg matrix and do Gram-Schmidt - new direction is in VEC_VV(1+loc_it)*/ ierr = (*lgmres->orthog)(ksp,loc_it);CHKERRQ(ierr); /* new entry in hessenburg is the 2-norm of our new direction */ ierr = VecNorm(VEC_VV(loc_it+1),NORM_2,&tt);CHKERRQ(ierr); *HH(loc_it+1,loc_it) = tt; *HES(loc_it+1,loc_it) = tt; /* check for the happy breakdown */ hapbnd = PetscAbsScalar(tt / *GRS(loc_it)); /* GRS(loc_it) contains the res_norm from the last iteration */ if (hapbnd > lgmres->haptol) hapbnd = lgmres->haptol; if (tt > hapbnd) { tmp = 1.0/tt; ierr = VecScale(VEC_VV(loc_it+1),tmp);CHKERRQ(ierr); /* scale new direction by its norm */ } else { ierr = PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %G tt = %G\n",hapbnd,tt);CHKERRQ(ierr); hapend = PETSC_TRUE; } /* Now apply rotations to new col of hessenberg (and right side of system), calculate new rotation, and get new residual norm at the same time*/ ierr = KSPLGMRESUpdateHessenberg(ksp,loc_it,hapend,&res);CHKERRQ(ierr); if (ksp->reason) break; loc_it++; lgmres->it = (loc_it-1); /* Add this here in case it has converged */ ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its++; ksp->rnorm = res; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); /* Catch error in happy breakdown and signal convergence and break from loop */ if (hapend) { if (!ksp->reason) { if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res); else { ksp->reason = KSP_DIVERGED_BREAKDOWN; break; } } } } /* END OF ITERATION LOOP */ ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr); /* Monitor if we know that we will not return for a restart */ if (ksp->reason || ksp->its >= max_it) { ierr = KSPMonitor(ksp, ksp->its, res);CHKERRQ(ierr); } if (itcount) *itcount = loc_it; /* Down here we have to solve for the "best" coefficients of the Krylov columns, add the solution values together, and possibly unwind the preconditioning from the solution */ /* Form the solution (or the solution so far) */ /* Note: must pass in (loc_it-1) for iteration count so that KSPLGMRESBuildSoln properly navigates */ ierr = KSPLGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr); /* LGMRES_MOD collect aug vector and A*augvector for future restarts - only if we will be restarting (i.e. this cycle performed it_total iterations) */ if (!ksp->reason && ksp->its < max_it && aug_dim > 0) { /*AUG_TEMP contains the new augmentation vector (assigned in KSPLGMRESBuildSoln) */ if (!lgmres->aug_ct) { spot = 0; lgmres->aug_ct++; } else if (lgmres->aug_ct < aug_dim) { spot = lgmres->aug_ct; lgmres->aug_ct++; } else { /* truncate */ for (ii=0; ii<aug_dim; ii++) { if (lgmres->aug_order[ii] == aug_dim) spot = ii; } } ierr = VecCopy(AUG_TEMP, AUGVEC(spot));CHKERRQ(ierr); /*need to normalize */ ierr = VecNorm(AUGVEC(spot), NORM_2, &tmp_norm);CHKERRQ(ierr); inv_tmp_norm = 1.0/tmp_norm; ierr = VecScale(AUGVEC(spot),inv_tmp_norm);CHKERRQ(ierr); /*set new aug vector to order 1 - move all others back one */ for (ii=0; ii < aug_dim; ii++) AUG_ORDER(ii)++; AUG_ORDER(spot) = 1; /*now add the A*aug vector to A_AUGVEC(spot) - this is independ. of preconditioning type*/ /* want V*H*y - y is in GRS, V is in VEC_VV and H is in HES */ /* first do H+*y */ avec = lgmres->hwork; ierr = PetscMemzero(avec,(it_total+1)*sizeof(*avec));CHKERRQ(ierr); for (ii=0; ii < it_total + 1; ii++) { for (jj=0; jj <= ii+1 && jj < it_total+1; jj++) { avec[jj] += *HES(jj ,ii) * *GRS(ii); } } /*now multiply result by V+ */ ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr); ierr = VecMAXPY(VEC_TEMP, it_total+1, avec, &VEC_VV(0));CHKERRQ(ierr); /*answer is in VEC_TEMP*/ /*copy answer to aug location and scale*/ ierr = VecCopy(VEC_TEMP, A_AUGVEC(spot));CHKERRQ(ierr); ierr = VecScale(A_AUGVEC(spot),inv_tmp_norm);CHKERRQ(ierr); } PetscFunctionReturn(0); }
static PetscErrorCode KSPAGMRESBuildBasis(KSP ksp) { PetscErrorCode ierr; KSP_AGMRES *agmres = (KSP_AGMRES*)ksp->data; PetscReal *Rshift = agmres->Rshift; PetscReal *Ishift = agmres->Ishift; PetscReal *Scale = agmres->Scale; PetscInt max_k = agmres->max_k; PetscInt KspSize = KSPSIZE; /* if max_k == KspSizen then the basis should not be augmented */ PetscInt j = 1; PetscFunctionBegin; ierr = PetscLogEventBegin(KSP_AGMRESBuildBasis, ksp, 0,0,0);CHKERRQ(ierr); Scale[0] = 1.0; while (j <= max_k) { if (Ishift[j-1] == 0) { if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) { /* Apply the precond-matrix operators */ ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr); /* Then apply deflation as a preconditioner */ ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j));CHKERRQ(ierr); } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) { ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j-1), VEC_TMP);CHKERRQ(ierr); ierr = KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr); } else { ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr); } ierr = VecAXPY(VEC_V(j), -Rshift[j-1], VEC_V(j-1));CHKERRQ(ierr); #if defined(KSP_AGMRES_NONORM) Scale[j] = 1.0; #else ierr = VecScale(VEC_V(j), Scale[j-1]);CHKERRQ(ierr); /* This step can be postponed until all vectors are built */ ierr = VecNorm(VEC_V(j), NORM_2, &(Scale[j]));CHKERRQ(ierr); Scale[j] = 1.0/Scale[j]; #endif agmres->matvecs += 1; j++; } else { if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) { /* Apply the precond-matrix operators */ ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr); /* Then apply deflation as a preconditioner */ ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j));CHKERRQ(ierr); } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) { ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j-1), VEC_TMP);CHKERRQ(ierr); ierr = KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr); } else { ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr); } ierr = VecAXPY(VEC_V(j), -Rshift[j-1], VEC_V(j-1));CHKERRQ(ierr); #if defined(KSP_AGMRES_NONORM) Scale[j] = 1.0; #else ierr = VecScale(VEC_V(j), Scale[j-1]);CHKERRQ(ierr); ierr = VecNorm(VEC_V(j), NORM_2, &(Scale[j]));CHKERRQ(ierr); Scale[j] = 1.0/Scale[j]; #endif agmres->matvecs += 1; j++; if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) { /* Apply the precond-matrix operators */ ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr); /* Then apply deflation as a preconditioner */ ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j));CHKERRQ(ierr); } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) { ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j-1), VEC_TMP);CHKERRQ(ierr); ierr = KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr); } else { ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr); } ierr = VecAXPY(VEC_V(j), -Rshift[j-2], VEC_V(j-1));CHKERRQ(ierr); ierr = VecAXPY(VEC_V(j), Scale[j-2]*Ishift[j-2]*Ishift[j-2], VEC_V(j-2));CHKERRQ(ierr); #if defined(KSP_AGMRES_NONORM) Scale[j] = 1.0; #else ierr = VecNorm(VEC_V(j), NORM_2, &(Scale[j]));CHKERRQ(ierr); Scale[j] = 1.0/Scale[j]; #endif agmres->matvecs += 1; j++; } } /* Augment the subspace with the eigenvectors*/ while (j <= KspSize) { ierr = KSP_PCApplyBAorAB(ksp, agmres->U[j - max_k - 1], VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr); #if defined(KSP_AGMRES_NONORM) Scale[j] = 1.0; #else ierr = VecScale(VEC_V(j), Scale[j-1]);CHKERRQ(ierr); ierr = VecNorm(VEC_V(j), NORM_2, &(Scale[j]));CHKERRQ(ierr); Scale[j] = 1.0/Scale[j]; #endif agmres->matvecs += 1; j++; } ierr = PetscLogEventEnd(KSP_AGMRESBuildBasis, ksp, 0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_TFQMR(KSP ksp) { PetscErrorCode ierr; PetscInt i,m; PetscScalar rho,rhoold,a,s,b,eta,etaold,psiold,cf; PetscReal dp,dpold,w,dpest,tau,psi,cm; Vec X,B,V,P,R,RP,T,T1,Q,U,D,AUQ; PetscFunctionBegin; X = ksp->vec_sol; B = ksp->vec_rhs; R = ksp->work[0]; RP = ksp->work[1]; V = ksp->work[2]; T = ksp->work[3]; Q = ksp->work[4]; P = ksp->work[5]; U = ksp->work[6]; D = ksp->work[7]; T1 = ksp->work[8]; AUQ = V; /* Compute initial preconditioned residual */ ierr = KSPInitialResidual(ksp,X,V,T,R,B);CHKERRQ(ierr); /* Test for nothing to do */ ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr); ksp->rnorm = dp; ksp->its = 0; ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,dp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); /* Make the initial Rp == R */ ierr = VecCopy(R,RP);CHKERRQ(ierr); /* Set the initial conditions */ etaold = 0.0; psiold = 0.0; tau = dp; dpold = dp; ierr = VecDot(R,RP,&rhoold);CHKERRQ(ierr); /* rhoold = (r,rp) */ ierr = VecCopy(R,U);CHKERRQ(ierr); ierr = VecCopy(R,P);CHKERRQ(ierr); ierr = KSP_PCApplyBAorAB(ksp,P,V,T);CHKERRQ(ierr); ierr = VecSet(D,0.0);CHKERRQ(ierr); i=0; do { ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr); ksp->its++; ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr); ierr = VecDot(V,RP,&s);CHKERRQ(ierr); /* s <- (v,rp) */ a = rhoold / s; /* a <- rho / s */ ierr = VecWAXPY(Q,-a,V,U);CHKERRQ(ierr); /* q <- u - a v */ ierr = VecWAXPY(T,1.0,U,Q);CHKERRQ(ierr); /* t <- u + q */ ierr = KSP_PCApplyBAorAB(ksp,T,AUQ,T1);CHKERRQ(ierr); ierr = VecAXPY(R,-a,AUQ);CHKERRQ(ierr); /* r <- r - a K (u + q) */ ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); for (m=0; m<2; m++) { if (!m) { w = PetscSqrtReal(dp*dpold); } else { w = dp; } psi = w / tau; cm = 1.0 / PetscSqrtReal(1.0 + psi * psi); tau = tau * psi * cm; eta = cm * cm * a; cf = psiold * psiold * etaold / a; if (!m) { ierr = VecAYPX(D,cf,U);CHKERRQ(ierr); } else { ierr = VecAYPX(D,cf,Q);CHKERRQ(ierr); } ierr = VecAXPY(X,eta,D);CHKERRQ(ierr); dpest = PetscSqrtReal(m + 1.0) * tau; ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr); ksp->rnorm = dpest; ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr); KSPLogResidualHistory(ksp,dpest); ierr = KSPMonitor(ksp,i+1,dpest);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,dpest,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; etaold = eta; psiold = psi; } if (ksp->reason) break; ierr = VecDot(R,RP,&rho);CHKERRQ(ierr); /* rho <- (r,rp) */ b = rho / rhoold; /* b <- rho / rhoold */ ierr = VecWAXPY(U,b,Q,R);CHKERRQ(ierr); /* u <- r + b q */ ierr = VecAXPY(Q,b,P);CHKERRQ(ierr); ierr = VecWAXPY(P,b,Q,U);CHKERRQ(ierr); /* p <- u + b(q + b p) */ ierr = KSP_PCApplyBAorAB(ksp,P,V,Q);CHKERRQ(ierr); /* v <- K p */ rhoold = rho; dpold = dp; i++; } while (i<ksp->max_it); if (i >= ksp->max_it) { ksp->reason = KSP_DIVERGED_ITS; } ierr = KSPUnwindPreconditioner(ksp,X,T);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_BCGSL(KSP ksp) { KSP_BCGSL *bcgsl = (KSP_BCGSL *) ksp->data; PetscScalar alpha, beta, omega, sigma; PetscScalar rho0, rho1; PetscReal kappa0, kappaA, kappa1; PetscReal ghat, epsilon, abstol; PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0; PetscTruth bUpdateX; PetscTruth bBombed = PETSC_FALSE; PetscInt maxit; PetscInt h, i, j, k, vi, ell; PetscBLASInt ldMZ,bierr; PetscErrorCode ierr; PetscFunctionBegin; if (ksp->normtype == KSP_NORM_NATURAL) SETERRQ(PETSC_ERR_SUP,"Cannot use natural norm with KSPBCGSL"); if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->pc_side != PC_LEFT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type unpreconditioned for right preconditioning and KSPBCGSL"); if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->pc_side != PC_RIGHT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type preconditioned for left preconditioning and KSPBCGSL"); /* set up temporary vectors */ vi = 0; ell = bcgsl->ell; bcgsl->vB = ksp->work[vi]; vi++; bcgsl->vRt = ksp->work[vi]; vi++; bcgsl->vTm = ksp->work[vi]; vi++; bcgsl->vvR = ksp->work+vi; vi += ell+1; bcgsl->vvU = ksp->work+vi; vi += ell+1; bcgsl->vXr = ksp->work[vi]; vi++; ldMZ = PetscBLASIntCast(ell+1); /* Prime the iterative solver */ ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs); CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &zeta0); CHKERRQ(ierr); rnmax_computed = zeta0; rnmax_true = zeta0; ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectTakeAccess(ksp); CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = zeta0; ierr = PetscObjectGrantAccess(ksp); CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = VecSet(VVU[0],0.0); CHKERRQ(ierr); alpha = 0.; rho0 = omega = 1; if (bcgsl->delta>0.0) { ierr = VecCopy(VX, VXR); CHKERRQ(ierr); ierr = VecSet(VX,0.0); CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB); CHKERRQ(ierr); } else { ierr = VecCopy(ksp->vec_rhs, VB); CHKERRQ(ierr); } /* Life goes on */ ierr = VecCopy(VVR[0], VRT); CHKERRQ(ierr); zeta = zeta0; ierr = KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit); CHKERRQ(ierr); for (k=0; k<maxit; k += bcgsl->ell) { ksp->its = k; ksp->rnorm = zeta; KSPLogResidualHistory(ksp, zeta); KSPMonitor(ksp, ksp->its, zeta); ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) break; /* BiCG part */ rho0 = -omega*rho0; nrm0 = zeta; for (j=0; j<bcgsl->ell; j++) { /* rho1 <- r_j' * r_tilde */ ierr = VecDot(VVR[j], VRT, &rho1); CHKERRQ(ierr); if (rho1 == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; bBombed = PETSC_TRUE; break; } beta = alpha*(rho1/rho0); rho0 = rho1; for (i=0; i<=j; i++) { /* u_i <- r_i - beta*u_i */ ierr = VecAYPX(VVU[i], -beta, VVR[i]); CHKERRQ(ierr); } /* u_{j+1} <- inv(K)*A*u_j */ ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM); CHKERRQ(ierr); ierr = VecDot(VVU[j+1], VRT, &sigma); CHKERRQ(ierr); if (sigma == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; bBombed = PETSC_TRUE; break; } alpha = rho1/sigma; /* x <- x + alpha*u_0 */ ierr = VecAXPY(VX, alpha, VVU[0]); CHKERRQ(ierr); for (i=0; i<=j; i++) { /* r_i <- r_i - alpha*u_{i+1} */ ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]); CHKERRQ(ierr); } /* r_{j+1} <- inv(K)*A*r_j */ ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM); CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &nrm0); CHKERRQ(ierr); if (bcgsl->delta>0.0) { if (rnmax_computed<nrm0) rnmax_computed = nrm0; if (rnmax_true<nrm0) rnmax_true = nrm0; } /* NEW: check for early exit */ ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectTakeAccess(ksp); CHKERRQ(ierr); ksp->its = k+j; ksp->rnorm = nrm0; ierr = PetscObjectGrantAccess(ksp); CHKERRQ(ierr); break; } } if (bBombed==PETSC_TRUE) break; /* Polynomial part */ for(i = 0; i <= bcgsl->ell; ++i) { ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]); CHKERRQ(ierr); } /* Symmetrize MZa */ for(i = 0; i <= bcgsl->ell; ++i) { for(j = i+1; j <= bcgsl->ell; ++j) { MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]); } } /* Copy MZa to MZb */ ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar)); CHKERRQ(ierr); if (!bcgsl->bConvex || bcgsl->ell==1) { PetscBLASInt ione = 1,bell = PetscBLASIntCast(bcgsl->ell); AY0c[0] = -1; LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr); if (ierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; bBombed = PETSC_TRUE; break; } ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr); } else { PetscBLASInt ione = 1; PetscScalar aone = 1.0, azero = 0.0; PetscBLASInt neqs = PetscBLASIntCast(bcgsl->ell-1); LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr); if (ierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; bBombed = PETSC_TRUE; break; } ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr); AY0c[0] = -1; AY0c[bcgsl->ell] = 0.; ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr); AYlc[0] = 0.; AYlc[bcgsl->ell] = -1; BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione); kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione); /* round-off can cause negative kappa's */ if (kappa0<0) kappa0 = -kappa0; kappa0 = sqrt(kappa0); kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione); BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione); kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione); if (kappa1<0) kappa1 = -kappa1; kappa1 = sqrt(kappa1); if (kappa0!=0.0 && kappa1!=0.0) { if (kappaA<0.7*kappa0*kappa1) { ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1; } else { ghat = kappaA/(kappa1*kappa1); } for (i=0; i<=bcgsl->ell; i++) { AY0c[i] = AY0c[i] - ghat* AYlc[i]; } } } omega = AY0c[bcgsl->ell]; for (h=bcgsl->ell; h>0 && omega==0.0; h--) { omega = AY0c[h]; } if (omega==0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; break; } ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR); CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) { AY0c[i] *= -1.0; } ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1); CHKERRQ(ierr); ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1); CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) { AY0c[i] *= -1.0; } ierr = VecNorm(VVR[0], NORM_2, &zeta); CHKERRQ(ierr); /* Accurate Update */ if (bcgsl->delta>0.0) { if (rnmax_computed<zeta) rnmax_computed = zeta; if (rnmax_true<zeta) rnmax_true = zeta; bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed); if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) { /* r0 <- b-inv(K)*A*X */ ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM); CHKERRQ(ierr); ierr = VecAYPX(VVR[0], -1.0, VB); CHKERRQ(ierr); rnmax_true = zeta; if (bUpdateX) { ierr = VecAXPY(VXR,1.0,VX); CHKERRQ(ierr); ierr = VecSet(VX,0.0); CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB); CHKERRQ(ierr); rnmax_computed = zeta; } } } } if (bcgsl->delta>0.0) { ierr = VecAXPY(VX,1.0,VXR); CHKERRQ(ierr); } ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }