PetscErrorCode SNESNGMRESUpdateSubspace_Private(SNES snes,PetscInt ivec,PetscInt l,Vec F,PetscReal fnorm,Vec X) { SNES_NGMRES *ngmres = (SNES_NGMRES*) snes->data; Vec *Fdot = ngmres->Fdot; Vec *Xdot = ngmres->Xdot; PetscScalar *xi = ngmres->xi; PetscInt i; PetscReal nu; PetscErrorCode ierr; PetscFunctionBegin; if (ivec > l) SETERRQ2(PetscObjectComm((PetscObject)snes),PETSC_ERR_ARG_WRONGSTATE,"Cannot update vector %d with space size %d!",ivec,l); ierr = VecCopy(F,Fdot[ivec]);CHKERRQ(ierr); ierr = VecCopy(X,Xdot[ivec]);CHKERRQ(ierr); ngmres->fnorms[ivec] = fnorm; if (l > 0) { ierr = VecMDot(F,l,Fdot,xi);CHKERRQ(ierr); for (i = 0; i < l; i++) { Q(i,ivec) = xi[i]; Q(ivec,i) = xi[i]; } } else { nu = fnorm*fnorm; Q(0,0) = nu; } PetscFunctionReturn(0); }
/*@C MatNullSpaceRemove - Removes all the components of a null space from a vector. Collective on MatNullSpace Input Parameters: + sp - the null space context (if this is NULL then no null space is removed) - vec - the vector from which the null space is to be removed Level: advanced .keywords: PC, null space, remove .seealso: MatNullSpaceCreate(), MatNullSpaceDestroy(), MatNullSpaceSetFunction() @*/ PetscErrorCode MatNullSpaceRemove(MatNullSpace sp,Vec vec) { PetscScalar sum; PetscInt i,N; PetscErrorCode ierr; PetscFunctionBegin; if (!sp) PetscFunctionReturn(0); PetscValidHeaderSpecific(sp,MAT_NULLSPACE_CLASSID,1); PetscValidHeaderSpecific(vec,VEC_CLASSID,2); if (sp->has_cnst) { ierr = VecGetSize(vec,&N);CHKERRQ(ierr); if (N > 0) { ierr = VecSum(vec,&sum);CHKERRQ(ierr); sum = sum/((PetscScalar)(-1.0*N)); ierr = VecShift(vec,sum);CHKERRQ(ierr); } } if (sp->n) { ierr = VecMDot(vec,sp->n,sp->vecs,sp->alpha);CHKERRQ(ierr); for (i=0; i<sp->n; i++) sp->alpha[i] = -sp->alpha[i]; ierr = VecMAXPY(vec,sp->n,sp->alpha,sp->vecs);CHKERRQ(ierr); } if (sp->remove) { ierr = (*sp->remove)(sp,vec,sp->rmctx);CHKERRQ(ierr); } PetscFunctionReturn(0); }
int main(int argc, char **argv) { PetscErrorCode ierr; Vec *V,t; PetscInt i,j,reps,n=15,k=6; PetscRandom rctx; PetscScalar *val_dot,*val_mdot,*tval_dot,*tval_mdot; ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-k",&k,NULL);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Test with %D random vectors of length %D",k,n);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"\n",k,n);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rctx);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rctx);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&t);CHKERRQ(ierr); ierr = VecSetSizes(t,n,PETSC_DECIDE);CHKERRQ(ierr); ierr = VecSetFromOptions(t);CHKERRQ(ierr); ierr = VecDuplicateVecs(t,k,&V);CHKERRQ(ierr); ierr = VecSetRandom(t,rctx);CHKERRQ(ierr); ierr = PetscMalloc1(k,&val_dot);CHKERRQ(ierr); ierr = PetscMalloc1(k,&val_mdot);CHKERRQ(ierr); ierr = PetscMalloc1(k,&tval_dot);CHKERRQ(ierr); ierr = PetscMalloc1(k,&tval_mdot);CHKERRQ(ierr); for (i=0; i<k; i++) { ierr = VecSetRandom(V[i],rctx);CHKERRQ(ierr); } for (reps=0; reps<20; reps++) { for (i=1; i<k; i++) { ierr = VecMDot(t,i,V,val_mdot);CHKERRQ(ierr); ierr = VecMTDot(t,i,V,tval_mdot);CHKERRQ(ierr); for (j=0;j<i;j++) { ierr = VecDot(t,V[j],&val_dot[j]);CHKERRQ(ierr); ierr = VecTDot(t,V[j],&tval_dot[j]);CHKERRQ(ierr); } /* Check result */ for (j=0;j<i;j++) { if (PetscAbsScalar(val_mdot[j] - val_dot[j])/PetscAbsScalar(val_dot[j]) > 1e-5) { ierr = PetscPrintf(PETSC_COMM_WORLD, "[TEST FAILED] i=%D, j=%D, val_mdot[j]=%g, val_dot[j]=%g\n",i,j,(double)PetscAbsScalar(val_mdot[j]), (double)PetscAbsScalar(val_dot[j]));CHKERRQ(ierr); break; } if (PetscAbsScalar(tval_mdot[j] - tval_dot[j])/PetscAbsScalar(tval_dot[j]) > 1e-5) { ierr = PetscPrintf(PETSC_COMM_WORLD, "[TEST FAILED] i=%D, j=%D, tval_mdot[j]=%g, tval_dot[j]=%g\n",i,j,(double)PetscAbsScalar(tval_mdot[j]), (double)PetscAbsScalar(tval_dot[j]));CHKERRQ(ierr); break; } } } } ierr = PetscPrintf(PETSC_COMM_WORLD,"Test completed successfully!\n",k,n);CHKERRQ(ierr); ierr = PetscFree(val_dot);CHKERRQ(ierr); ierr = PetscFree(val_mdot);CHKERRQ(ierr); ierr = PetscFree(tval_dot);CHKERRQ(ierr); ierr = PetscFree(tval_mdot);CHKERRQ(ierr); ierr = VecDestroyVecs(k,&V);CHKERRQ(ierr); ierr = VecDestroy(&t);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rctx);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
void FETI_Operations::apply_RB_projection(Vec vec_in, Vec vec_out) { homemade_assert_msg(m_bNullVecsSet,"Null space vectors not set yet!"); homemade_assert_msg(m_binvRITRIMatSet,"Null space matrices not set yet!"); // vec_out = [ I - RC * (inv_RITRI_mat) * RC^t ] * vec_in // Declaration of Vecs with size 'm_null_nb_vecs' Vec dummy_seq_vec; Vec dummy_seq_vec_bis; VecCreateSeq(PETSC_COMM_SELF,m_null_nb_vecs,&dummy_seq_vec); VecZeroEntries(dummy_seq_vec); VecDuplicate(dummy_seq_vec,&dummy_seq_vec_bis); // dummy_seq_vec = RC^t * vec_in // -> All the communications are done here! PetscScalar *dummy_seq_array; VecGetArray(dummy_seq_vec,&dummy_seq_array); VecMDot(vec_in,m_null_nb_vecs,m_null_coupled_vecs,dummy_seq_array); VecRestoreArray(dummy_seq_vec,&dummy_seq_array); // dummy_seq_vec_bis = - inv_RITRI_mat * dummy_seq_vec // -> Calculate dummy_seq_vec_bis on the first proc, and then broadcast the value /* * Originally, this operation was done locally, but due to a syncing issue, * we have to do it this way to avoid a "Value must the same in all processors" error * when calling VecMAXPY below. */ PETSC_MatMultScale_Bcast(m_inv_RITRI_mat,dummy_seq_vec,dummy_seq_vec_bis,-1); m_comm.barrier(); // vec_out = vec_in + sum ( dummy_seq_vec_bis[i] * vec_RC[i]) // -> This should have no communications at all! VecCopy(vec_in,vec_out); VecGetArray(dummy_seq_vec_bis,&dummy_seq_array); VecMAXPY(vec_out,m_null_nb_vecs,dummy_seq_array,m_null_coupled_vecs); VecRestoreArray(dummy_seq_vec_bis,&dummy_seq_array); // Cleanup VecDestroy(&dummy_seq_vec); VecDestroy(&dummy_seq_vec_bis); }
PetscErrorCode KSPFischerGuessFormGuess_Method1(KSPFischerGuess_Method1 *itg,Vec b,Vec x) { PetscErrorCode ierr; PetscInt i; PetscFunctionBegin; PetscValidPointer(itg,2); PetscValidHeaderSpecific(x,VEC_CLASSID,3); ierr = VecSet(x,0.0);CHKERRQ(ierr); ierr = VecMDot(b,itg->curl,itg->btilde,itg->alpha);CHKERRQ(ierr); if (itg->monitor) { ierr = PetscPrintf(((PetscObject)itg->ksp)->comm,"KSPFischerGuess alphas = ");CHKERRQ(ierr); for (i=0; i<itg->curl; i++) { ierr = PetscPrintf(((PetscObject)itg->ksp)->comm,"%g ",(double)PetscAbsScalar(itg->alpha[i]));CHKERRQ(ierr); } ierr = PetscPrintf(((PetscObject)itg->ksp)->comm,"\n");CHKERRQ(ierr); } ierr = VecMAXPY(x,itg->curl,itg->alpha,itg->xtilde);CHKERRQ(ierr); ierr = VecCopy(x,itg->guess);CHKERRQ(ierr); /* Note: do not change the b right hand side as is done in the publication */ PetscFunctionReturn(0); }
PetscErrorCode KSPFischerGuessUpdate_Method1(KSPFischerGuess_Method1 *itg,Vec x) { PetscReal norm; PetscErrorCode ierr; int curl = itg->curl,i; PetscFunctionBegin; PetscValidHeaderSpecific(x,VEC_CLASSID,2); PetscValidPointer(itg,3); if (curl == itg->maxl) { ierr = KSP_MatMult(itg->ksp,itg->mat,x,itg->btilde[0]);CHKERRQ(ierr); ierr = VecNormalize(itg->btilde[0],&norm);CHKERRQ(ierr); ierr = VecCopy(x,itg->xtilde[0]);CHKERRQ(ierr); ierr = VecScale(itg->xtilde[0],1.0/norm);CHKERRQ(ierr); itg->curl = 1; } else { if (!curl) { ierr = VecCopy(x,itg->xtilde[curl]);CHKERRQ(ierr); } else { ierr = VecWAXPY(itg->xtilde[curl],-1.0,itg->guess,x);CHKERRQ(ierr); } ierr = KSP_MatMult(itg->ksp,itg->mat,itg->xtilde[curl],itg->btilde[curl]);CHKERRQ(ierr); ierr = VecMDot(itg->btilde[curl],curl,itg->btilde,itg->alpha);CHKERRQ(ierr); for (i=0; i<curl; i++) itg->alpha[i] = -itg->alpha[i]; ierr = VecMAXPY(itg->btilde[curl],curl,itg->alpha,itg->btilde);CHKERRQ(ierr); ierr = VecMAXPY(itg->xtilde[curl],curl,itg->alpha,itg->xtilde);CHKERRQ(ierr); ierr = VecNormalize(itg->btilde[curl],&norm);CHKERRQ(ierr); if (norm) { ierr = VecScale(itg->xtilde[curl],1.0/norm);CHKERRQ(ierr); itg->curl++; } else { ierr = PetscInfo(itg->ksp,"Not increasing dimension of Fischer space because new direction is identical to previous\n");CHKERRQ(ierr); } } PetscFunctionReturn(0); }
void PetscNonlinearSolver<T>::build_mat_null_space(NonlinearImplicitSystem::ComputeVectorSubspace* computeSubspaceObject, void (*computeSubspace)(std::vector<NumericVector<Number>*>&, sys_type&), MatNullSpace *msp) { PetscErrorCode ierr; std::vector<NumericVector<Number>* > sp; if (computeSubspaceObject) (*computeSubspaceObject)(sp, this->system()); else (*computeSubspace)(sp, this->system()); *msp = PETSC_NULL; if (sp.size()) { Vec *modes; PetscScalar *dots; PetscInt nmodes = sp.size(); ierr = PetscMalloc2(nmodes,Vec,&modes,nmodes,PetscScalar,&dots); LIBMESH_CHKERRABORT(ierr); for (PetscInt i=0; i<nmodes; ++i) { PetscVector<T>* pv = libmesh_cast_ptr<PetscVector<T>*>(sp[i]); Vec v = pv->vec(); ierr = VecDuplicate(v, modes+i); LIBMESH_CHKERRABORT(ierr); ierr = VecCopy(v,modes[i]); LIBMESH_CHKERRABORT(ierr); } // Normalize. ierr = VecNormalize(modes[0],PETSC_NULL); LIBMESH_CHKERRABORT(ierr); for (PetscInt i=1; i<nmodes; i++) { // Orthonormalize vec[i] against vec[0:i-1] ierr = VecMDot(modes[i],i,modes,dots); LIBMESH_CHKERRABORT(ierr); for (PetscInt j=0; j<i; j++) dots[j] *= -1.; ierr = VecMAXPY(modes[i],i,dots,modes); LIBMESH_CHKERRABORT(ierr); ierr = VecNormalize(modes[i],PETSC_NULL); LIBMESH_CHKERRABORT(ierr); } ierr = MatNullSpaceCreate(this->comm().get(), PETSC_FALSE, nmodes, modes, msp); LIBMESH_CHKERRABORT(ierr); for (PetscInt i=0; i<nmodes; ++i) { ierr = VecDestroy(modes+i); LIBMESH_CHKERRABORT(ierr); } ierr = PetscFree2(modes,dots); LIBMESH_CHKERRABORT(ierr); } }
/*@ MatNullSpaceCreateRigidBody - create rigid body modes from coordinates Collective on Vec Input Argument: . coords - block of coordinates of each node, must have block size set Output Argument: . sp - the null space Level: advanced Notes: If you are solving an elasticity problems you should likely use this, in conjunction with ee MatSetNearNullspace(), to provide information that the PCGAMG preconditioner can use to construct a much more efficient preconditioner. If you are solving an elasticity problem with pure Neumann boundary conditions you can use this in conjunction with MatSetNullspace() to provide this information to the linear solver so it can handle the null space appropriately in the linear solution. .seealso: MatNullSpaceCreate(), MatSetNearNullspace(), MatSetNullspace() @*/ PetscErrorCode MatNullSpaceCreateRigidBody(Vec coords,MatNullSpace *sp) { PetscErrorCode ierr; const PetscScalar *x; PetscScalar *v[6],dots[5]; Vec vec[6]; PetscInt n,N,dim,nmodes,i,j; PetscReal sN; PetscFunctionBegin; ierr = VecGetBlockSize(coords,&dim);CHKERRQ(ierr); ierr = VecGetLocalSize(coords,&n);CHKERRQ(ierr); ierr = VecGetSize(coords,&N);CHKERRQ(ierr); n /= dim; N /= dim; sN = 1./PetscSqrtReal((PetscReal)N); switch (dim) { case 1: ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coords),PETSC_TRUE,0,NULL,sp);CHKERRQ(ierr); break; case 2: case 3: nmodes = (dim == 2) ? 3 : 6; ierr = VecCreate(PetscObjectComm((PetscObject)coords),&vec[0]);CHKERRQ(ierr); ierr = VecSetSizes(vec[0],dim*n,dim*N);CHKERRQ(ierr); ierr = VecSetBlockSize(vec[0],dim);CHKERRQ(ierr); ierr = VecSetUp(vec[0]);CHKERRQ(ierr); for (i=1; i<nmodes; i++) {ierr = VecDuplicate(vec[0],&vec[i]);CHKERRQ(ierr);} for (i=0; i<nmodes; i++) {ierr = VecGetArray(vec[i],&v[i]);CHKERRQ(ierr);} ierr = VecGetArrayRead(coords,&x);CHKERRQ(ierr); for (i=0; i<n; i++) { if (dim == 2) { v[0][i*2+0] = sN; v[0][i*2+1] = 0.; v[1][i*2+0] = 0.; v[1][i*2+1] = sN; /* Rotations */ v[2][i*2+0] = -x[i*2+1]; v[2][i*2+1] = x[i*2+0]; } else { v[0][i*3+0] = sN; v[0][i*3+1] = 0.; v[0][i*3+2] = 0.; v[1][i*3+0] = 0.; v[1][i*3+1] = sN; v[1][i*3+2] = 0.; v[2][i*3+0] = 0.; v[2][i*3+1] = 0.; v[2][i*3+2] = sN; v[3][i*3+0] = x[i*3+1]; v[3][i*3+1] = -x[i*3+0]; v[3][i*3+2] = 0.; v[4][i*3+0] = 0.; v[4][i*3+1] = -x[i*3+2]; v[4][i*3+2] = x[i*3+1]; v[5][i*3+0] = x[i*3+2]; v[5][i*3+1] = 0.; v[5][i*3+2] = -x[i*3+0]; } } for (i=0; i<nmodes; i++) {ierr = VecRestoreArray(vec[i],&v[i]);CHKERRQ(ierr);} ierr = VecRestoreArrayRead(coords,&x);CHKERRQ(ierr); for (i=dim; i<nmodes; i++) { /* Orthonormalize vec[i] against vec[0:i-1] */ ierr = VecMDot(vec[i],i,vec,dots);CHKERRQ(ierr); for (j=0; j<i; j++) dots[j] *= -1.; ierr = VecMAXPY(vec[i],i,dots,vec);CHKERRQ(ierr); ierr = VecNormalize(vec[i],NULL);CHKERRQ(ierr); } ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coords),PETSC_FALSE,nmodes,vec,sp);CHKERRQ(ierr); for (i=0; i<nmodes; i++) {ierr = VecDestroy(&vec[i]);CHKERRQ(ierr);} } PetscFunctionReturn(0); }
PetscErrorCode SNESQNApply_LBFGS(SNES snes,PetscInt it,Vec Y,Vec X,Vec Xold,Vec D,Vec Dold) { PetscErrorCode ierr; SNES_QN *qn = (SNES_QN*)snes->data; Vec W = snes->work[3]; Vec *dX = qn->U; Vec *dF = qn->V; PetscScalar *alpha = qn->alpha; PetscScalar *beta = qn->beta; PetscScalar *dXtdF = qn->dXtdF; PetscScalar *dFtdX = qn->dFtdX; PetscScalar *YtdX = qn->YtdX; /* ksp thing for jacobian scaling */ KSPConvergedReason kspreason; MatStructure flg = DIFFERENT_NONZERO_PATTERN; PetscInt k,i,j,g,lits; PetscInt m = qn->m; PetscScalar t; PetscInt l = m; Mat jac,jac_pre; PetscFunctionBegin; if (it < m) l = it; if (it > 0) { k = (it - 1) % l; ierr = VecCopy(D, dF[k]);CHKERRQ(ierr); ierr = VecAXPY(dF[k], -1.0, Dold);CHKERRQ(ierr); ierr = VecCopy(X, dX[k]);CHKERRQ(ierr); ierr = VecAXPY(dX[k], -1.0, Xold);CHKERRQ(ierr); if (qn->singlereduction) { ierr = VecMDot(dF[k], l, dX, dXtdF);CHKERRQ(ierr); ierr = VecMDot(dX[k], l, dF, dFtdX);CHKERRQ(ierr); for (j = 0; j < l; j++) { H(k, j) = dFtdX[j]; H(j, k) = dXtdF[j]; } /* copy back over to make the computation of alpha and beta easier */ for (j = 0; j < l; j++) { dXtdF[j] = H(j, j); } } else { ierr = VecDot(dX[k], dF[k], &dXtdF[k]);CHKERRQ(ierr); } if (qn->scale_type == SNES_QN_SCALE_SHANNO) { PetscReal dFtdF; ierr = VecDotRealPart(dF[k],dF[k],&dFtdF);CHKERRQ(ierr); qn->scaling = PetscRealPart(dXtdF[k])/dFtdF; } else if (qn->scale_type == SNES_QN_SCALE_LINESEARCH) { ierr = SNESLineSearchGetLambda(snes->linesearch,&qn->scaling);CHKERRQ(ierr); } } ierr = VecCopy(D,Y);CHKERRQ(ierr); if (qn->singlereduction) { ierr = VecMDot(Y,l,dX,YtdX);CHKERRQ(ierr); } /* outward recursion starting at iteration k's update and working back */ for (i=0;i<l;i++) { k = (it-i-1)%l; if (qn->singlereduction) { /* construct t = dX[k] dot Y as Y_0 dot dX[k] + sum(-alpha[j]dX[k]dF[j]) */ t = YtdX[k]; for (j=0;j<i;j++) { g = (it-j-1)%l; t += -alpha[g]*H(g, k); } alpha[k] = t/H(k,k); } else { ierr = VecDot(dX[k],Y,&t);CHKERRQ(ierr); alpha[k] = t/dXtdF[k]; } if (qn->monitor) { ierr = PetscViewerASCIIAddTab(qn->monitor,((PetscObject)snes)->tablevel+2);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(qn->monitor, "it: %d k: %d alpha: %14.12e\n", it, k, PetscRealPart(alpha[k]));CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(qn->monitor,((PetscObject)snes)->tablevel+2);CHKERRQ(ierr); } ierr = VecAXPY(Y,-alpha[k],dF[k]);CHKERRQ(ierr); } if (qn->scale_type == SNES_QN_SCALE_JACOBIAN) { ierr = SNESGetJacobian(snes, &jac, &jac_pre, PETSC_NULL, PETSC_NULL);CHKERRQ(ierr); ierr = KSPSetOperators(snes->ksp,jac,jac_pre,flg);CHKERRQ(ierr); ierr = SNES_KSPSolve(snes,snes->ksp,Y,W);CHKERRQ(ierr); ierr = KSPGetConvergedReason(snes->ksp,&kspreason);CHKERRQ(ierr); if (kspreason < 0) { if (++snes->numLinearSolveFailures >= snes->maxLinearSolveFailures) { ierr = PetscInfo2(snes,"iter=%D, number linear solve failures %D greater than current SNES allowed, stopping solve\n",snes->iter,snes->numLinearSolveFailures);CHKERRQ(ierr); snes->reason = SNES_DIVERGED_LINEAR_SOLVE; PetscFunctionReturn(0); } } ierr = KSPGetIterationNumber(snes->ksp,&lits);CHKERRQ(ierr); snes->linear_its += lits; ierr = VecCopy(W, Y);CHKERRQ(ierr); } else { ierr = VecScale(Y, qn->scaling);CHKERRQ(ierr); } if (qn->singlereduction) { ierr = VecMDot(Y,l,dF,YtdX);CHKERRQ(ierr); } /* inward recursion starting at the first update and working forward */ for (i = 0; i < l; i++) { k = (it + i - l) % l; if (qn->singlereduction) { t = YtdX[k]; for (j = 0; j < i; j++) { g = (it + j - l) % l; t += (alpha[g] - beta[g])*H(k, g); } beta[k] = t / H(k, k); } else { ierr = VecDot(dF[k], Y, &t);CHKERRQ(ierr); beta[k] = t / dXtdF[k]; } ierr = VecAXPY(Y, (alpha[k] - beta[k]), dX[k]);CHKERRQ(ierr); if (qn->monitor) { ierr = PetscViewerASCIIAddTab(qn->monitor,((PetscObject)snes)->tablevel+2);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(qn->monitor, "it: %d k: %d alpha - beta: %14.12e\n", it, k, PetscRealPart(alpha[k] - beta[k]));CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(qn->monitor,((PetscObject)snes)->tablevel+2);CHKERRQ(ierr); } } PetscFunctionReturn(0); }
PetscErrorCode KSPSolve_CG(KSP ksp) { PetscErrorCode ierr; PetscInt i,stored_max_it,eigs; PetscScalar dpi = 0.0,a = 1.0,beta,betaold = 1.0,b = 0,*e = 0,*d = 0,delta,dpiold; PetscReal dp = 0.0; Vec X,B,Z,R,P,S,W; KSP_CG *cg; Mat Amat,Pmat; PetscBool diagonalscale; PetscFunctionBegin; ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); cg = (KSP_CG*)ksp->data; eigs = ksp->calc_sings; stored_max_it = ksp->max_it; X = ksp->vec_sol; B = ksp->vec_rhs; R = ksp->work[0]; Z = ksp->work[1]; P = ksp->work[2]; if (cg->singlereduction) { S = ksp->work[3]; W = ksp->work[4]; } else { S = 0; /* unused */ W = Z; } #define VecXDot(x,y,a) (((cg->type) == (KSP_CG_HERMITIAN)) ? VecDot(x,y,a) : VecTDot(x,y,a)) if (eigs) {e = cg->e; d = cg->d; e[0] = 0.0; } ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); ksp->its = 0; if (!ksp->guess_zero) { ierr = KSP_MatMult(ksp,Amat,X,R);CHKERRQ(ierr); /* r <- b - Ax */ ierr = VecAYPX(R,-1.0,B);CHKERRQ(ierr); } else { ierr = VecCopy(B,R);CHKERRQ(ierr); /* r <- b (x is 0) */ } switch (ksp->normtype) { case KSP_NORM_PRECONDITIONED: ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ ierr = VecNorm(Z,NORM_2,&dp);CHKERRQ(ierr); /* dp <- z'*z = e'*A'*B'*B*A'*e' */ break; case KSP_NORM_UNPRECONDITIONED: ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); /* dp <- r'*r = e'*A'*A*e */ break; case KSP_NORM_NATURAL: ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ if (cg->singlereduction) { ierr = KSP_MatMult(ksp,Amat,Z,S);CHKERRQ(ierr); ierr = VecXDot(Z,S,&delta);CHKERRQ(ierr); } ierr = VecXDot(Z,R,&beta);CHKERRQ(ierr); /* beta <- z'*r */ if (PetscIsInfOrNanScalar(beta)) { if (ksp->errorifnotconverged) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSPSolve has not converged due to Nan or Inf inner product"); else { ksp->reason = KSP_DIVERGED_NANORINF; PetscFunctionReturn(0); } } dp = PetscSqrtReal(PetscAbsScalar(beta)); /* dp <- r'*z = r'*B*r = e'*A'*B*A*e */ break; case KSP_NORM_NONE: dp = 0.0; break; default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]); } ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,dp);CHKERRQ(ierr); ksp->rnorm = dp; ierr = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); /* test for convergence */ if (ksp->reason) PetscFunctionReturn(0); if (ksp->normtype != KSP_NORM_PRECONDITIONED && (ksp->normtype != KSP_NORM_NATURAL)) { ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ } if (ksp->normtype != KSP_NORM_NATURAL) { if (cg->singlereduction) { ierr = KSP_MatMult(ksp,Amat,Z,S);CHKERRQ(ierr); ierr = VecXDot(Z,S,&delta);CHKERRQ(ierr); } ierr = VecXDot(Z,R,&beta);CHKERRQ(ierr); /* beta <- z'*r */ if (PetscIsInfOrNanScalar(beta)) { if (ksp->errorifnotconverged) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSPSolve has not converged due to Nan or Inf inner product"); else { ksp->reason = KSP_DIVERGED_NANORINF; PetscFunctionReturn(0); } } } i = 0; do { ksp->its = i+1; if (beta == 0.0) { ksp->reason = KSP_CONVERGED_ATOL; ierr = PetscInfo(ksp,"converged due to beta = 0\n");CHKERRQ(ierr); break; #if !defined(PETSC_USE_COMPLEX) } else if ((i > 0) && (beta*betaold < 0.0)) { ksp->reason = KSP_DIVERGED_INDEFINITE_PC; ierr = PetscInfo(ksp,"diverging due to indefinite preconditioner\n");CHKERRQ(ierr); break; #endif } if (!i) { ierr = VecCopy(Z,P);CHKERRQ(ierr); /* p <- z */ b = 0.0; } else { b = beta/betaold; if (eigs) { if (ksp->max_it != stored_max_it) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Can not change maxit AND calculate eigenvalues"); e[i] = PetscSqrtReal(PetscAbsScalar(b))/a; } ierr = VecAYPX(P,b,Z);CHKERRQ(ierr); /* p <- z + b* p */ } dpiold = dpi; if (!cg->singlereduction || !i) { ierr = KSP_MatMult(ksp,Amat,P,W);CHKERRQ(ierr); /* w <- Ap */ ierr = VecXDot(P,W,&dpi);CHKERRQ(ierr); /* dpi <- p'w */ } else { ierr = VecAYPX(W,beta/betaold,S);CHKERRQ(ierr); /* w <- Ap */ dpi = delta - beta*beta*dpiold/(betaold*betaold); /* dpi <- p'w */ } betaold = beta; if (PetscIsInfOrNanScalar(dpi)) { if (ksp->errorifnotconverged) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSPSolve has not converged due to Nan or Inf inner product"); else { ksp->reason = KSP_DIVERGED_NANORINF; PetscFunctionReturn(0); } } if ((dpi == 0.0) || ((i > 0) && (PetscRealPart(dpi*dpiold) <= 0.0))) { ksp->reason = KSP_DIVERGED_INDEFINITE_MAT; ierr = PetscInfo(ksp,"diverging due to indefinite or negative definite matrix\n");CHKERRQ(ierr); break; } a = beta/dpi; /* a = beta/p'w */ if (eigs) d[i] = PetscSqrtReal(PetscAbsScalar(b))*e[i] + 1.0/a; ierr = VecAXPY(X,a,P);CHKERRQ(ierr); /* x <- x + ap */ ierr = VecAXPY(R,-a,W);CHKERRQ(ierr); /* r <- r - aw */ if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->chknorm < i+2) { ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ if (cg->singlereduction) { ierr = KSP_MatMult(ksp,Amat,Z,S);CHKERRQ(ierr); } ierr = VecNorm(Z,NORM_2,&dp);CHKERRQ(ierr); /* dp <- z'*z */ } else if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->chknorm < i+2) { ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); /* dp <- r'*r */ } else if (ksp->normtype == KSP_NORM_NATURAL) { ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ if (cg->singlereduction) { PetscScalar tmp[2]; Vec vecs[2]; vecs[0] = S; vecs[1] = R; ierr = KSP_MatMult(ksp,Amat,Z,S);CHKERRQ(ierr); ierr = VecMDot(Z,2,vecs,tmp);CHKERRQ(ierr); delta = tmp[0]; beta = tmp[1]; } else { ierr = VecXDot(Z,R,&beta);CHKERRQ(ierr); /* beta <- r'*z */ } if (PetscIsInfOrNanScalar(beta)) { if (ksp->errorifnotconverged) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSPSolve has not converged due to Nan or Inf inner product"); else { ksp->reason = KSP_DIVERGED_NANORINF; PetscFunctionReturn(0); } } dp = PetscSqrtReal(PetscAbsScalar(beta)); } else { dp = 0.0; } ksp->rnorm = dp; CHKERRQ(ierr);KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i+1,dp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; if ((ksp->normtype != KSP_NORM_PRECONDITIONED && (ksp->normtype != KSP_NORM_NATURAL)) || (ksp->chknorm >= i+2)) { ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ if (cg->singlereduction) { ierr = KSP_MatMult(ksp,Amat,Z,S);CHKERRQ(ierr); } } if ((ksp->normtype != KSP_NORM_NATURAL) || (ksp->chknorm >= i+2)) { if (cg->singlereduction) { PetscScalar tmp[2]; Vec vecs[2]; vecs[0] = S; vecs[1] = R; ierr = VecMDot(Z,2,vecs,tmp);CHKERRQ(ierr); delta = tmp[0]; beta = tmp[1]; } else { ierr = VecXDot(Z,R,&beta);CHKERRQ(ierr); /* beta <- z'*r */ } if (PetscIsInfOrNanScalar(beta)) { if (ksp->errorifnotconverged) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSPSolve has not converged due to Nan or Inf inner product"); else { ksp->reason = KSP_DIVERGED_NANORINF; PetscFunctionReturn(0); } } } i++; } while (i<ksp->max_it); if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }
PetscErrorCode SNESQNApply_LBFGS(SNES snes,PetscInt it,Vec Y,Vec X,Vec Xold,Vec D,Vec Dold) { PetscErrorCode ierr; SNES_QN *qn = (SNES_QN*)snes->data; Vec W = snes->work[3]; Vec *dX = qn->U; Vec *dF = qn->V; PetscScalar *alpha = qn->alpha; PetscScalar *beta = qn->beta; PetscScalar *dXtdF = qn->dXtdF; PetscScalar *dFtdX = qn->dFtdX; PetscScalar *YtdX = qn->YtdX; /* ksp thing for Jacobian scaling */ PetscInt k,i,j,g,lits; PetscInt m = qn->m; PetscScalar t; PetscInt l = m; PetscFunctionBegin; if (it < m) l = it; ierr = VecCopy(D,Y);CHKERRQ(ierr); if (it > 0) { k = (it - 1) % l; ierr = VecCopy(D,dF[k]);CHKERRQ(ierr); ierr = VecAXPY(dF[k], -1.0, Dold);CHKERRQ(ierr); ierr = VecCopy(X, dX[k]);CHKERRQ(ierr); ierr = VecAXPY(dX[k], -1.0, Xold);CHKERRQ(ierr); if (qn->singlereduction) { ierr = VecMDotBegin(dF[k],l,dX,dXtdF);CHKERRQ(ierr); ierr = VecMDotBegin(dX[k],l,dF,dFtdX);CHKERRQ(ierr); ierr = VecMDotBegin(Y,l,dX,YtdX);CHKERRQ(ierr); ierr = VecMDotEnd(dF[k],l,dX,dXtdF);CHKERRQ(ierr); ierr = VecMDotEnd(dX[k],l,dF,dFtdX);CHKERRQ(ierr); ierr = VecMDotEnd(Y,l,dX,YtdX);CHKERRQ(ierr); for (j = 0; j < l; j++) { H(k, j) = dFtdX[j]; H(j, k) = dXtdF[j]; } /* copy back over to make the computation of alpha and beta easier */ for (j = 0; j < l; j++) dXtdF[j] = H(j, j); } else { ierr = VecDot(dX[k], dF[k], &dXtdF[k]);CHKERRQ(ierr); } if (qn->scale_type == SNES_QN_SCALE_LINESEARCH) { ierr = SNESLineSearchGetLambda(snes->linesearch,&qn->scaling);CHKERRQ(ierr); } } /* outward recursion starting at iteration k's update and working back */ for (i=0; i<l; i++) { k = (it-i-1)%l; if (qn->singlereduction) { /* construct t = dX[k] dot Y as Y_0 dot dX[k] + sum(-alpha[j]dX[k]dF[j]) */ t = YtdX[k]; for (j=0; j<i; j++) { g = (it-j-1)%l; t -= alpha[g]*H(k, g); } alpha[k] = t/H(k,k); } else { ierr = VecDot(dX[k],Y,&t);CHKERRQ(ierr); alpha[k] = t/dXtdF[k]; } if (qn->monitor) { ierr = PetscViewerASCIIAddTab(qn->monitor,((PetscObject)snes)->tablevel+2);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(qn->monitor, "it: %d k: %d alpha: %14.12e\n", it, k, PetscRealPart(alpha[k]));CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(qn->monitor,((PetscObject)snes)->tablevel+2);CHKERRQ(ierr); } ierr = VecAXPY(Y,-alpha[k],dF[k]);CHKERRQ(ierr); } if (qn->scale_type == SNES_QN_SCALE_JACOBIAN) { ierr = KSPSolve(snes->ksp,Y,W);CHKERRQ(ierr); SNESCheckKSPSolve(snes); ierr = KSPGetIterationNumber(snes->ksp,&lits);CHKERRQ(ierr); snes->linear_its += lits; ierr = VecCopy(W, Y);CHKERRQ(ierr); } else { ierr = VecScale(Y, qn->scaling);CHKERRQ(ierr); } if (qn->singlereduction) { ierr = VecMDot(Y,l,dF,YtdX);CHKERRQ(ierr); } /* inward recursion starting at the first update and working forward */ for (i = 0; i < l; i++) { k = (it + i - l) % l; if (qn->singlereduction) { t = YtdX[k]; for (j = 0; j < i; j++) { g = (it + j - l) % l; t += (alpha[g] - beta[g])*H(g, k); } beta[k] = t / H(k, k); } else { ierr = VecDot(dF[k], Y, &t);CHKERRQ(ierr); beta[k] = t / dXtdF[k]; } ierr = VecAXPY(Y, (alpha[k] - beta[k]), dX[k]);CHKERRQ(ierr); if (qn->monitor) { ierr = PetscViewerASCIIAddTab(qn->monitor,((PetscObject)snes)->tablevel+2);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(qn->monitor, "it: %d k: %d alpha - beta: %14.12e\n", it, k, PetscRealPart(alpha[k] - beta[k]));CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(qn->monitor,((PetscObject)snes)->tablevel+2);CHKERRQ(ierr); } } PetscFunctionReturn(0); }
int main(int argc,char **argv) { Vec x,y,w; /* vectors */ Vec *z; /* array of vectors */ PetscReal norm,v,v1,v2,maxval; PetscInt n = 20,maxind; PetscErrorCode ierr; PetscScalar one = 1.0,two = 2.0,three = 3.0,dots[3],dot; ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); /* Create a vector, specifying only its global dimension. When using VecCreate(), VecSetSizes() and VecSetFromOptions(), the vector format (currently parallel, shared, or sequential) is determined at runtime. Also, the parallel partitioning of the vector is determined by PETSc at runtime. Routines for creating particular vector types directly are: VecCreateSeq() - uniprocessor vector VecCreateMPI() - distributed vector, where the user can determine the parallel partitioning VecCreateShared() - parallel vector that uses shared memory (available only on the SGI); otherwise, is the same as VecCreateMPI() With VecCreate(), VecSetSizes() and VecSetFromOptions() the option -vec_type mpi or -vec_type shared causes the particular type of vector to be formed. */ ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,PETSC_DECIDE,n);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); /* Duplicate some work vectors (of the same format and partitioning as the initial vector). */ ierr = VecDuplicate(x,&y);CHKERRQ(ierr); ierr = VecDuplicate(x,&w);CHKERRQ(ierr); /* Duplicate more work vectors (of the same format and partitioning as the initial vector). Here we duplicate an array of vectors, which is often more convenient than duplicating individual ones. */ ierr = VecDuplicateVecs(x,3,&z);CHKERRQ(ierr); /* Set the vectors to entries to a constant value. */ ierr = VecSet(x,one);CHKERRQ(ierr); ierr = VecSet(y,two);CHKERRQ(ierr); ierr = VecSet(z[0],one);CHKERRQ(ierr); ierr = VecSet(z[1],two);CHKERRQ(ierr); ierr = VecSet(z[2],three);CHKERRQ(ierr); /* Demonstrate various basic vector routines. */ ierr = VecDot(x,y,&dot);CHKERRQ(ierr); ierr = VecMDot(x,3,z,dots);CHKERRQ(ierr); /* Note: If using a complex numbers version of PETSc, then PETSC_USE_COMPLEX is defined in the makefiles; otherwise, (when using real numbers) it is undefined. */ ierr = PetscPrintf(PETSC_COMM_WORLD,"Vector length %D\n",n);CHKERRQ(ierr); ierr = VecMax(x,&maxind,&maxval);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"VecMax %g, VecInd %D\n",(double)maxval,maxind);CHKERRQ(ierr); ierr = VecMin(x,&maxind,&maxval);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"VecMin %g, VecInd %D\n",(double)maxval,maxind);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"All other values should be near zero\n");CHKERRQ(ierr); ierr = VecScale(x,two);CHKERRQ(ierr); ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); v = norm-2.0*PetscSqrtReal((PetscReal)n); if (v > -PETSC_SMALL && v < PETSC_SMALL) v = 0.0; ierr = PetscPrintf(PETSC_COMM_WORLD,"VecScale %g\n",(double)v);CHKERRQ(ierr); ierr = VecCopy(x,w);CHKERRQ(ierr); ierr = VecNorm(w,NORM_2,&norm);CHKERRQ(ierr); v = norm-2.0*PetscSqrtReal((PetscReal)n); if (v > -PETSC_SMALL && v < PETSC_SMALL) v = 0.0; ierr = PetscPrintf(PETSC_COMM_WORLD,"VecCopy %g\n",(double)v);CHKERRQ(ierr); ierr = VecAXPY(y,three,x);CHKERRQ(ierr); ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); v = norm-8.0*PetscSqrtReal((PetscReal)n); if (v > -PETSC_SMALL && v < PETSC_SMALL) v = 0.0; ierr = PetscPrintf(PETSC_COMM_WORLD,"VecAXPY %g\n",(double)v);CHKERRQ(ierr); ierr = VecAYPX(y,two,x);CHKERRQ(ierr); ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); v = norm-18.0*PetscSqrtReal((PetscReal)n); if (v > -PETSC_SMALL && v < PETSC_SMALL) v = 0.0; ierr = PetscPrintf(PETSC_COMM_WORLD,"VecAYPX %g\n",(double)v);CHKERRQ(ierr); ierr = VecSwap(x,y);CHKERRQ(ierr); ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); v = norm-2.0*PetscSqrtReal((PetscReal)n); if (v > -PETSC_SMALL && v < PETSC_SMALL) v = 0.0; ierr = PetscPrintf(PETSC_COMM_WORLD,"VecSwap %g\n",(double)v);CHKERRQ(ierr); ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); v = norm-18.0*PetscSqrtReal((PetscReal)n); if (v > -PETSC_SMALL && v < PETSC_SMALL) v = 0.0; ierr = PetscPrintf(PETSC_COMM_WORLD,"VecSwap %g\n",(double)v);CHKERRQ(ierr); ierr = VecWAXPY(w,two,x,y);CHKERRQ(ierr); ierr = VecNorm(w,NORM_2,&norm);CHKERRQ(ierr); v = norm-38.0*PetscSqrtReal((PetscReal)n); if (v > -PETSC_SMALL && v < PETSC_SMALL) v = 0.0; ierr = PetscPrintf(PETSC_COMM_WORLD,"VecWAXPY %g\n",(double)v);CHKERRQ(ierr); ierr = VecPointwiseMult(w,y,x);CHKERRQ(ierr); ierr = VecNorm(w,NORM_2,&norm);CHKERRQ(ierr); v = norm-36.0*PetscSqrtReal((PetscReal)n); if (v > -PETSC_SMALL && v < PETSC_SMALL) v = 0.0; ierr = PetscPrintf(PETSC_COMM_WORLD,"VecPointwiseMult %g\n",(double)v);CHKERRQ(ierr); ierr = VecPointwiseDivide(w,x,y);CHKERRQ(ierr); ierr = VecNorm(w,NORM_2,&norm);CHKERRQ(ierr); v = norm-9.0*PetscSqrtReal((PetscReal)n); if (v > -PETSC_SMALL && v < PETSC_SMALL) v = 0.0; ierr = PetscPrintf(PETSC_COMM_WORLD,"VecPointwiseDivide %g\n",(double)v);CHKERRQ(ierr); dots[0] = one; dots[1] = three; dots[2] = two; ierr = VecSet(x,one);CHKERRQ(ierr); ierr = VecMAXPY(x,3,dots,z);CHKERRQ(ierr); ierr = VecNorm(z[0],NORM_2,&norm);CHKERRQ(ierr); v = norm-PetscSqrtReal((PetscReal)n); if (v > -PETSC_SMALL && v < PETSC_SMALL) v = 0.0; ierr = VecNorm(z[1],NORM_2,&norm);CHKERRQ(ierr); v1 = norm-2.0*PetscSqrtReal((PetscReal)n); if (v1 > -PETSC_SMALL && v1 < PETSC_SMALL) v1 = 0.0; ierr = VecNorm(z[2],NORM_2,&norm);CHKERRQ(ierr); v2 = norm-3.0*PetscSqrtReal((PetscReal)n); if (v2 > -PETSC_SMALL && v2 < PETSC_SMALL) v2 = 0.0; ierr = PetscPrintf(PETSC_COMM_WORLD,"VecMAXPY %g %g %g \n",(double)v,(double)v1,(double)v2);CHKERRQ(ierr); /* Free work space. All PETSc objects should be destroyed when they are no longer needed. */ ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = VecDestroy(&w);CHKERRQ(ierr); ierr = VecDestroyVecs(3,&z);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
PetscErrorCode KSPSolve_CG(KSP ksp) { PetscErrorCode ierr; PetscInt i,stored_max_it,eigs; PetscScalar dpi = 0.0,a = 1.0,beta,betaold = 1.0,b = 0,*e = 0,*d = 0,delta,dpiold; PetscReal dp = 0.0; Vec X,B,Z,R,P,S,W; KSP_CG *cg; Mat Amat,Pmat; PetscBool diagonalscale; /* Dingwen */ PetscInt itv_d, itv_c; PetscScalar CKSX1,CKSZ1,CKSR1,CKSP1,CKSS1,CKSW1; PetscScalar CKSX2,CKSZ2,CKSR2,CKSP2,CKSS2,CKSW2; Vec CKSAmat1; Vec CKSAmat2; Vec C1,C2; PetscScalar d1,d2; PetscScalar sumX1,sumR1; PetscScalar sumX2,sumR2; Vec CKPX,CKPP; PetscScalar CKPbetaold; PetscInt CKPi; PetscBool flag1 = PETSC_TRUE, flag2 = PETSC_TRUE; PetscInt pos; PetscScalar v; VecScatter ctx; Vec W_SEQ; PetscScalar *_W; /* Dingwen */ PetscFunctionBegin; ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); cg = (KSP_CG*)ksp->data; eigs = ksp->calc_sings; stored_max_it = ksp->max_it; X = ksp->vec_sol; B = ksp->vec_rhs; R = ksp->work[0]; Z = ksp->work[1]; P = ksp->work[2]; /* Dingwen */ CKPX = ksp->work[3]; CKPP = ksp->work[4]; CKSAmat1 = ksp->work[5]; CKSAmat2 = ksp->work[6]; C1 = ksp->work[7]; C2 = ksp->work[8]; /* Dingwen */ /* Dingwen */ int rank; /* Get MPI variables */ MPI_Comm_rank (MPI_COMM_WORLD,&rank); /* Dingwen */ #define VecXDot(x,y,a) (((cg->type) == (KSP_CG_HERMITIAN)) ? VecDot(x,y,a) : VecTDot(x,y,a)) if (cg->singlereduction) { S = ksp->work[9]; W = ksp->work[10]; } else { S = 0; /* unused */ W = Z; } if (eigs) {e = cg->e; d = cg->d; e[0] = 0.0; } ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); ksp->its = 0; if (!ksp->guess_zero) { ierr = KSP_MatMult(ksp,Amat,X,R);CHKERRQ(ierr); /* r <- b - Ax */ ierr = VecAYPX(R,-1.0,B);CHKERRQ(ierr); } else { ierr = VecCopy(B,R);CHKERRQ(ierr); /* r <- b (x is 0) */ } /* Dingwen */ /* checksum coefficients initialization */ PetscInt size; ierr = VecGetSize(B,&size); for (i=0; i<size; i++) { v = 1.0; ierr = VecSetValues(C1,1,&i,&v,INSERT_VALUES);CHKERRQ(ierr); v = i; ierr = VecSetValues(C2,1,&i,&v,INSERT_VALUES);CHKERRQ(ierr); } d1 = 1.0; d2 = 2.0; /* Dingwen */ switch (ksp->normtype) { case KSP_NORM_PRECONDITIONED: ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ ierr = VecNorm(Z,NORM_2,&dp);CHKERRQ(ierr); /* dp <- z'*z = e'*A'*B'*B*A'*e' */ break; case KSP_NORM_UNPRECONDITIONED: ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); /* dp <- r'*r = e'*A'*A*e */ break; case KSP_NORM_NATURAL: ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ if (cg->singlereduction) { ierr = KSP_MatMult(ksp,Amat,Z,S);CHKERRQ(ierr); ierr = VecXDot(Z,S,&delta);CHKERRQ(ierr); /* Dingwen */ ierr = VecXDot(C1,S,&CKSS1);CHKERRQ(ierr); /* Compute the initial checksum1(S) */ ierr = VecXDot(C2,S,&CKSS2);CHKERRQ(ierr); /* Compute the initial checksum2(S) */ /* Dingwen */ } ierr = VecXDot(Z,R,&beta);CHKERRQ(ierr); /* beta <- z'*r */ KSPCheckDot(ksp,beta); dp = PetscSqrtReal(PetscAbsScalar(beta)); /* dp <- r'*z = r'*B*r = e'*A'*B*A*e */ break; case KSP_NORM_NONE: dp = 0.0; break; default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]); } ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,dp);CHKERRQ(ierr); ksp->rnorm = dp; ierr = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); /* test for convergence */ if (ksp->reason) PetscFunctionReturn(0); if (ksp->normtype != KSP_NORM_PRECONDITIONED && (ksp->normtype != KSP_NORM_NATURAL)) { ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ } if (ksp->normtype != KSP_NORM_NATURAL) { if (cg->singlereduction) { ierr = KSP_MatMult(ksp,Amat,Z,S);CHKERRQ(ierr); ierr = VecXDot(Z,S,&delta);CHKERRQ(ierr); } ierr = VecXDot(Z,R,&beta);CHKERRQ(ierr); /* beta <- z'*r */ KSPCheckDot(ksp,beta); } /* Dingwen */ /* Checksum Initialization */ ierr = VecXDot(C1,X,&CKSX1);CHKERRQ(ierr); /* Compute the initial checksum1(X) */ ierr = VecXDot(C1,W,&CKSW1);CHKERRQ(ierr); /* Compute the initial checksum1(W) */ ierr = VecXDot(C1,R,&CKSR1);CHKERRQ(ierr); /* Compute the initial checksum1(R) */ ierr = VecXDot(C1,Z,&CKSZ1);CHKERRQ(ierr); /* Compute the initial checksum1(Z) */ ierr = VecXDot(C2,X,&CKSX2);CHKERRQ(ierr); /* Compute the initial checksum2(X) */ ierr = VecXDot(C2,W,&CKSW2);CHKERRQ(ierr); /* Compute the initial checksum2(W) */ ierr = VecXDot(C2,R,&CKSR2);CHKERRQ(ierr); /* Compute the initial checksum2(R) */ ierr = VecXDot(C2,Z,&CKSZ2);CHKERRQ(ierr); /* Compute the initial checksum2(Z) */ ierr = KSP_MatMultTranspose(ksp,Amat,C1,CKSAmat1);CHKERRQ(ierr); ierr = VecAXPY(CKSAmat1,-d1,C1);CHKERRQ(ierr); ierr = VecAXPY(CKSAmat1,-d2,C2);CHKERRQ(ierr); /* Compute the initial checksum1(A) */ ierr = KSP_MatMultTranspose(ksp,Amat,C2,CKSAmat2);CHKERRQ(ierr); ierr = VecAXPY(CKSAmat2,-d2,C1);CHKERRQ(ierr); ierr = VecAXPY(CKSAmat2,-d1,C2);CHKERRQ(ierr); /* Compute the initial checksum2(A) */ itv_c = 2; itv_d = 10; /* Dingwen */ i = 0; do { /* Dingwen */ if ((i>0) && (i%itv_d == 0)) { ierr = VecXDot(C1,X,&sumX1);CHKERRQ(ierr); ierr = VecXDot(C1,R,&sumR1);CHKERRQ(ierr); if ((PetscAbsScalar(sumX1-CKSX1) > 1.0e-6) || (PetscAbsScalar(sumR1-CKSR1) > 1.0e-6)) { /* Rollback and Recovery */ if (rank==0) printf ("Recovery start...\n"); if (rank==0) printf ("Rollback from iteration-%d to iteration-%d\n",i,CKPi); betaold = CKPbetaold; /* Recovery scalar betaold by checkpoint*/ i = CKPi; /* Recovery integer i by checkpoint */ ierr = VecCopy(CKPP,P);CHKERRQ(ierr); /* Recovery vector P from checkpoint */ ierr = VecXDot(C1,P,&CKSP1);CHKERRQ(ierr); /* Recovery checksum1(P) by P */ ierr = VecXDot(C2,P,&CKSP2);CHKERRQ(ierr); /* Recovery checksum2(P) by P */ ierr = KSP_MatMult(ksp,Amat,P,W);CHKERRQ(ierr); /* Recovery vector W by P */ ierr = VecXDot(P,W,&dpi);CHKERRQ(ierr); /* Recovery scalar dpi by P and W */ ierr = VecCopy(CKPX,X);CHKERRQ(ierr); /* Recovery vector X from checkpoint */ ierr = VecXDot(C1,X,&CKSX1);CHKERRQ(ierr); /* Recovery checksum1(X) by X */ ierr = VecXDot(C2,X,&CKSX2);CHKERRQ(ierr); /* Recovery checksum2(X) by X */ ierr = KSP_MatMult(ksp,Amat,X,R);CHKERRQ(ierr); /* Recovery vector R by X */ ierr = VecAYPX(R,-1.0,B);CHKERRQ(ierr); ierr = VecXDot(C1,R,&CKSR1);CHKERRQ(ierr); /* Recovery checksum1(R) by R */ ierr = VecXDot(C2,R,&CKSR2);CHKERRQ(ierr); /* Recovery checksum2(R) by R */ ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* Recovery vector Z by R */ ierr = VecXDot(C1,Z,&CKSZ1);CHKERRQ(ierr); /* Recovery checksum1(Z) by Z */ ierr = VecXDot(C2,Z,&CKSZ2);CHKERRQ(ierr); /* Recovery checksum2(Z) by Z */ ierr = VecXDot(Z,R,&beta);CHKERRQ(ierr); /* Recovery scalar beta by Z and R */ if (rank==0) printf ("Recovery end.\n"); } else if (i%(itv_c*itv_d) == 0) { if (rank==0) printf ("Checkpoint iteration-%d\n",i); ierr = VecCopy(X,CKPX);CHKERRQ(ierr); ierr = VecCopy(P,CKPP);CHKERRQ(ierr); CKPbetaold = betaold; CKPi = i; } } ksp->its = i+1; if (beta == 0.0) { ksp->reason = KSP_CONVERGED_ATOL; ierr = PetscInfo(ksp,"converged due to beta = 0\n");CHKERRQ(ierr); break; #if !defined(PETSC_USE_COMPLEX) } else if ((i > 0) && (beta*betaold < 0.0)) { ksp->reason = KSP_DIVERGED_INDEFINITE_PC; ierr = PetscInfo(ksp,"diverging due to indefinite preconditioner\n");CHKERRQ(ierr); break; #endif } if (!i) { ierr = VecCopy(Z,P);CHKERRQ(ierr); /* p <- z */ b = 0.0; /* Dingwen */ ierr = VecXDot(C1,P, &CKSP1);CHKERRQ(ierr); /* Compute the initial checksum1(P) */ ierr = VecXDot(C2,P, &CKSP2);CHKERRQ(ierr); /* Compute the initial checksum2(P) */ /* Dingwen */ } else { b = beta/betaold; if (eigs) { if (ksp->max_it != stored_max_it) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Can not change maxit AND calculate eigenvalues"); e[i] = PetscSqrtReal(PetscAbsScalar(b))/a; } ierr = VecAYPX(P,b,Z);CHKERRQ(ierr); /* p <- z + b* p */ /* Dingwen */ CKSP1 = CKSZ1 + b*CKSP1; /* Update checksum1(P) = checksum1(Z) + b*checksum1(P); */ CKSP2 = CKSZ2 + b*CKSP2; /* Update checksum2(P) = checksum2(Z) + b*checksum2(P); */ /* Dingwen */ } dpiold = dpi; if (!cg->singlereduction || !i) { ierr = KSP_MatMult(ksp,Amat,P,W);CHKERRQ(ierr); /* w <- Ap */ /* MVM */ ierr = VecXDot(P,W,&dpi);CHKERRQ(ierr); /* dpi <- p'w */ /* Dingwen */ ierr = VecXDot(CKSAmat1, P, &CKSW1);CHKERRQ(ierr); CKSW1 = CKSW1 + d1*CKSP1 + d2*CKSP2; /* Update checksum1(W) = checksum1(A)P + d1*checksum1(P) + d2*checksum2(P); */ ierr = VecXDot(CKSAmat2, P, &CKSW2);CHKERRQ(ierr); CKSW2 = CKSW2 + d2*CKSP1 + d1*CKSP2; /* Update checksum2(W) = checksum2(A)P + d2*checksum1(P) + d1*checksum2(P); */ if((i==41)&&(flag2)) { pos = 100; v = 1000; ierr = VecSetValue(W,pos,v,INSERT_VALUES);CHKERRQ(ierr); VecAssemblyBegin(W); VecAssemblyEnd(W); if (rank==0) printf ("Inject an error in %d-th element of vector W after MVM W=AP at iteration-%d\n", pos,i); flag2 = PETSC_FALSE; } PetscScalar delta1,delta2; PetscScalar sumW1,sumW2; ierr = VecXDot(C1,W,&sumW1);CHKERRQ(ierr); ierr = VecXDot(C2,W,&sumW2);CHKERRQ(ierr); delta1 = sumW1 - CKSW1; delta2 = sumW2 - CKSW2; if (PetscAbsScalar(delta1) > 1.0e-6) { VecScatterCreateToAll(W,&ctx,&W_SEQ); VecScatterBegin(ctx,W,W_SEQ,INSERT_VALUES,SCATTER_FORWARD); VecScatterEnd(ctx,W,W_SEQ,INSERT_VALUES,SCATTER_FORWARD); VecGetArray(W_SEQ,&_W); pos = rint(delta2/delta1); v = _W[pos]; v = v - delta1; ierr = VecSetValues(W,1,&pos,&v,INSERT_VALUES);CHKERRQ(ierr); if (rank==0) printf ("Correct an error of %d-th elements of vector W after MVM W=AP at iteration-%d\n", pos, i); } } else { ierr = VecAYPX(W,beta/betaold,S);CHKERRQ(ierr); /* w <- Ap */ dpi = delta - beta*beta*dpiold/(betaold*betaold); /* dpi <- p'w */ /* Dingwen */ CKSW1 = beta/betaold*CKSW1 + CKSS1; /* Update checksum1(W) = checksum1(S) + beta/betaold*checksum1(W); */ CKSW2 = beta/betaold*CKSW2 + CKSS2; /* Update checksum2(W) = checksum2(S) + beta/betaold*checksum2(W); */ /* Dingwen */ } betaold = beta; KSPCheckDot(ksp,beta); if ((dpi == 0.0) || ((i > 0) && (PetscRealPart(dpi*dpiold) <= 0.0))) { ksp->reason = KSP_DIVERGED_INDEFINITE_MAT; ierr = PetscInfo(ksp,"diverging due to indefinite or negative definite matrix\n");CHKERRQ(ierr); break; } a = beta/dpi; /* a = beta/p'w */ if (eigs) d[i] = PetscSqrtReal(PetscAbsScalar(b))*e[i] + 1.0/a; ierr = VecAXPY(X,a,P);CHKERRQ(ierr); /* x <- x + ap */ /* Dingwen */ CKSX1 = CKSX1 + a*CKSP1; /* Update checksum1(X) = checksum1(X) + a*checksum1(P); */ CKSX2 = CKSX2 + a*CKSP2; /* Update checksum2(X) = checksum2(X) + a*checksum2(P); */ /* Dingwen */ ierr = VecAXPY(R,-a,W);CHKERRQ(ierr); /* r <- r - aw */ /* Dingwen */ CKSR1 = CKSR1 - a*CKSW1; /* Update checksum1(R) = checksum1(R) - a*checksum1(W); */ CKSR2 = CKSR2 - a*CKSW2; /* Update checksum2(R) = checksum2(R) - a*checksum2(W); */ /* Dingwen */ if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->chknorm < i+2) { ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ /* Dingwen */ ierr = VecXDot(C1,Z, &CKSZ1);CHKERRQ(ierr); /* Update checksum1(Z) */ ierr = VecXDot(C2,Z, &CKSZ2);CHKERRQ(ierr); /* Update checksum2(Z) */ /* Dingwen */ if (cg->singlereduction) { ierr = KSP_MatMult(ksp,Amat,Z,S);CHKERRQ(ierr); /* MVM */ /* Dingwen */ ierr = VecXDot(CKSAmat1, Z, &CKSS1);CHKERRQ(ierr); CKSS1 = CKSS1 + d1*CKSZ1 + d2*CKSZ2; /* Update checksum1(S) = checksum1(A)Z + d1*chekcsum1(Z) + d2*checksum2(Z); */ ierr = VecXDot(CKSAmat2, Z, &CKSS2);CHKERRQ(ierr); CKSS2 = CKSS2 + d2*CKSZ1 + d1*CKSZ2; /* Update checksum2(S) = checksum2(A)Z + d2*chekcsum1(Z) + d1*checksum2(Z); */ /* Dingwen */ } ierr = VecNorm(Z,NORM_2,&dp);CHKERRQ(ierr); /* dp <- z'*z */ } else if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->chknorm < i+2) { ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); /* dp <- r'*r */ } else if (ksp->normtype == KSP_NORM_NATURAL) { ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ /* Dingwen */ ierr = VecXDot(C1,Z, &CKSZ1);CHKERRQ(ierr); /* Update checksum1(Z) */ ierr = VecXDot(C2,Z, &CKSZ2);CHKERRQ(ierr); /* Update checksum2(Z) */ /* Dingwen */ if (cg->singlereduction) { PetscScalar tmp[2]; Vec vecs[2]; vecs[0] = S; vecs[1] = R; ierr = KSP_MatMult(ksp,Amat,Z,S);CHKERRQ(ierr); ierr = VecMDot(Z,2,vecs,tmp);CHKERRQ(ierr); delta = tmp[0]; beta = tmp[1]; } else { ierr = VecXDot(Z,R,&beta);CHKERRQ(ierr); /* beta <- r'*z */ } KSPCheckDot(ksp,beta); dp = PetscSqrtReal(PetscAbsScalar(beta)); } else { dp = 0.0; } ksp->rnorm = dp; CHKERRQ(ierr);KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i+1,dp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; if ((ksp->normtype != KSP_NORM_PRECONDITIONED && (ksp->normtype != KSP_NORM_NATURAL)) || (ksp->chknorm >= i+2)) { ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ /* Dingwen */ ierr = VecXDot(C1,Z, &CKSZ1);CHKERRQ(ierr); /* Update checksum1(Z) */ ierr = VecXDot(C2,Z, &CKSZ2);CHKERRQ(ierr); /* Update checksum2(Z) */ /* Dingwen */ if (cg->singlereduction) { ierr = KSP_MatMult(ksp,Amat,Z,S);CHKERRQ(ierr); } } if ((ksp->normtype != KSP_NORM_NATURAL) || (ksp->chknorm >= i+2)) { if (cg->singlereduction) { PetscScalar tmp[2]; Vec vecs[2]; vecs[0] = S; vecs[1] = R; ierr = VecMDot(Z,2,vecs,tmp);CHKERRQ(ierr); delta = tmp[0]; beta = tmp[1]; } else { ierr = VecXDot(Z,R,&beta);CHKERRQ(ierr); /* beta <- z'*r */ } KSPCheckDot(ksp,beta); } i++; /* Dingwen */ /* Inject error */ if ((i==50)&&(flag1)) { pos = 1000; v = -1; ierr = VecSetValues(X,1,&pos,&v,INSERT_VALUES);CHKERRQ(ierr); ierr = VecAssemblyBegin(X);CHKERRQ(ierr); ierr = VecAssemblyEnd(X);CHKERRQ(ierr); flag1 = PETSC_FALSE; if (rank==0)printf ("Inject an error in vector X at the end of iteration-%d\n", i-1); } /* Dingwen */ } while (i<ksp->max_it); /* Dingwen */ ierr = VecXDot(C1,X,&sumX1);CHKERRQ(ierr); ierr = VecXDot(C1,R,&sumR1);CHKERRQ(ierr); ierr = VecXDot(C2,X,&sumX2);CHKERRQ(ierr); ierr = VecXDot(C2,R,&sumR2);CHKERRQ(ierr); if (rank==0) { printf ("sum1 of X = %f\n", sumX1); printf ("checksum1(X) = %f\n", CKSX1); printf ("sum2 of X = %f\n", sumX2); printf ("checksum2(X) = %f\n", CKSX2); printf ("sum1 of R = %f\n", sumR1); printf ("checksum1(R) = %f\n", CKSR1); printf ("sum2 of R = %f\n", sumR2); printf ("checksum2(R) = %f\n", CKSR2); } VecDestroy(&W_SEQ); VecScatterDestroy(&ctx); /* Dingwen */ if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; if (eigs) cg->ned = ksp->its; PetscFunctionReturn(0); }
void FETI_Operations::set_null_space(const std::string& input_filename_base, int nb_of_vecs) { homemade_assert_msg(m_bNullVecsDimensionsSet,"Null vectors sizes not set yet!"); homemade_assert_msg(m_bScratchFolderSet,"Scratch folder not set yet!"); // Set the null vec arrays m_null_nb_vecs = nb_of_vecs; // Set the (dummy) coupling matrix pointer Mat * coupling_matrix; switch (m_RB_modes_system) { case RBModesSystem::MICRO : coupling_matrix = &m_C_R_micro; break; case RBModesSystem::MACRO : // TODO: Remove the error message below after the more general code was implemented homemade_error_msg("Option not implemented yet!"); coupling_matrix = &m_C_R_BIG; break; } // Matrix dimensions // M x N // coupling_matrix : n_med x n_sys // m_null_vecs : n_sys x nb_of_vecs ( R ) -> nb_of_vecs vectors of dim n_sys // m_null_coupled_vecs : n_med x nb_of_vecs ( RC ) -> nb_of_vecs vectors of dim n_coupl // Set the first nullspace vectors std::string input_filename = input_filename_base + "_0_n_" + std::to_string(m_null_nb_vecs) + ".petscvec"; VecCreate(m_comm.get(),&m_null_vecs[0]); VecSetSizes(m_null_vecs[0],m_null_vecs_N_local,m_null_vecs_N); read_PETSC_vector(m_null_vecs[0],input_filename, m_comm.get()); std::string output_filename = m_scratch_folder_path + "/rb_coupl_vector_0_n_" + std::to_string(m_null_nb_vecs) + ".petscvec"; VecCreate(m_comm.get(),&m_null_coupled_vecs[0]); VecSetSizes(m_null_coupled_vecs[0],m_C_RR_M_local,m_C_RR_M); VecSetFromOptions(m_null_coupled_vecs[0]); MatMult(*coupling_matrix,m_null_vecs[0],m_null_coupled_vecs[0]); write_PETSC_vector(m_null_coupled_vecs[0],output_filename,m_comm.rank(),m_comm.get()); // Read and calculate the rest of the nullspace vectors for(int iii = 1; iii < m_null_nb_vecs; ++iii) { input_filename = input_filename_base + "_" + std::to_string(iii) + "_n_" + std::to_string(m_null_nb_vecs) + ".petscvec"; VecDuplicate(m_null_vecs[0],&m_null_vecs[iii]); read_PETSC_vector(m_null_vecs[iii],input_filename, m_comm.get()); std::string output_filename = m_scratch_folder_path + "/rb_coupl_vector_" + std::to_string(iii) + "_n_" + std::to_string(m_null_nb_vecs) + ".petscvec"; VecDuplicate(m_null_coupled_vecs[0],&m_null_coupled_vecs[iii]); MatMult(*coupling_matrix,m_null_vecs[iii],m_null_coupled_vecs[iii]); write_PETSC_vector(m_null_coupled_vecs[iii],output_filename,m_comm.rank(),m_comm.get()); } // Build the LOCAL dense matrix std::vector<PetscScalar> dummy_vec_val(m_null_nb_vecs,0); std::vector<PetscInt> dummy_vec_row(m_null_nb_vecs,0); for(PetscInt iii = 0; iii < m_null_nb_vecs; ++iii) { dummy_vec_row[iii] = iii; } MatCreateSeqDense(PETSC_COMM_SELF,m_null_nb_vecs,m_null_nb_vecs,NULL,&m_RITRI_mat); for(PetscInt iii = 0; iii < m_null_nb_vecs; ++iii) { VecMDot(m_null_coupled_vecs[iii],m_null_nb_vecs,m_null_coupled_vecs,dummy_vec_val.data()); MatSetValues(m_RITRI_mat,m_null_nb_vecs,dummy_vec_row.data(),1,&iii,dummy_vec_val.data(),INSERT_VALUES); } MatAssemblyBegin(m_RITRI_mat,MAT_FINAL_ASSEMBLY); MatAssemblyEnd(m_RITRI_mat,MAT_FINAL_ASSEMBLY); PETSC_invert_dense_matrix(m_RITRI_mat,m_inv_RITRI_mat); if(m_comm.rank() == 0) { write_PETSC_matrix(m_inv_RITRI_mat,m_scratch_folder_path + "/rb_inv_RITRI.petscmat",0,PETSC_COMM_SELF); } // Set up flag m_bNullVecsSet = true; m_binvRITRIMatSet = true; // Cleanup MatDestroy(&m_RITRI_mat); }
PetscErrorCode SNESNGMRESFormCombinedSolution_Private(SNES snes,PetscInt l,Vec XM,Vec FM,PetscReal fMnorm,Vec X,Vec XA,Vec FA) { SNES_NGMRES *ngmres = (SNES_NGMRES*) snes->data; PetscInt i,j; Vec *Fdot = ngmres->Fdot; Vec *Xdot = ngmres->Xdot; PetscScalar *beta = ngmres->beta; PetscScalar *xi = ngmres->xi; PetscScalar alph_total = 0.; PetscErrorCode ierr; PetscReal nu; Vec Y = snes->work[2]; PetscBool changed_y,changed_w; PetscFunctionBegin; nu = fMnorm*fMnorm; /* construct the right hand side and xi factors */ ierr = VecMDot(FM,l,Fdot,xi);CHKERRQ(ierr); for (i = 0; i < l; i++) beta[i] = nu - xi[i]; /* construct h */ for (j = 0; j < l; j++) { for (i = 0; i < l; i++) { H(i,j) = Q(i,j)-xi[i]-xi[j]+nu; } } if (l == 1) { /* simply set alpha[0] = beta[0] / H[0, 0] */ if (H(0,0) != 0.) beta[0] = beta[0]/H(0,0); else beta[0] = 0.; } else { #if defined(PETSC_MISSING_LAPACK_GELSS) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_SUP,"NGMRES with LS requires the LAPACK GELSS routine."); #else ierr = PetscBLASIntCast(l,&ngmres->m);CHKERRQ(ierr); ierr = PetscBLASIntCast(l,&ngmres->n);CHKERRQ(ierr); ngmres->info = 0; ngmres->rcond = -1.; ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); #if defined(PETSC_USE_COMPLEX) PetscStackCall("LAPACKgelss",LAPACKgelss_(&ngmres->m,&ngmres->n,&ngmres->nrhs,ngmres->h,&ngmres->lda,ngmres->beta,&ngmres->ldb,ngmres->s,&ngmres->rcond,&ngmres->rank,ngmres->work,&ngmres->lwork,ngmres->rwork,&ngmres->info)); #else PetscStackCall("LAPACKgelss",LAPACKgelss_(&ngmres->m,&ngmres->n,&ngmres->nrhs,ngmres->h,&ngmres->lda,ngmres->beta,&ngmres->ldb,ngmres->s,&ngmres->rcond,&ngmres->rank,ngmres->work,&ngmres->lwork,&ngmres->info)); #endif ierr = PetscFPTrapPop();CHKERRQ(ierr); if (ngmres->info < 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"Bad argument to GELSS"); if (ngmres->info > 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD failed to converge"); #endif } for (i=0; i<l; i++) { if (PetscIsInfOrNanScalar(beta[i])) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD generated inconsistent output"); } alph_total = 0.; for (i = 0; i < l; i++) alph_total += beta[i]; ierr = VecCopy(XM,XA);CHKERRQ(ierr); ierr = VecScale(XA,1.-alph_total);CHKERRQ(ierr); ierr = VecMAXPY(XA,l,beta,Xdot);CHKERRQ(ierr); /* check the validity of the step */ ierr = VecCopy(XA,Y);CHKERRQ(ierr); ierr = VecAXPY(Y,-1.0,X);CHKERRQ(ierr); ierr = SNESLineSearchPostCheck(snes->linesearch,X,Y,XA,&changed_y,&changed_w);CHKERRQ(ierr); if (!ngmres->approxfunc) {ierr = SNESComputeFunction(snes,XA,FA);CHKERRQ(ierr);} else { ierr = VecCopy(FM,FA);CHKERRQ(ierr); ierr = VecScale(FA,1.-alph_total);CHKERRQ(ierr); ierr = VecMAXPY(FA,l,beta,Fdot);CHKERRQ(ierr); } PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_BCGSL(KSP ksp) { KSP_BCGSL *bcgsl = (KSP_BCGSL*) ksp->data; PetscScalar alpha, beta, omega, sigma; PetscScalar rho0, rho1; PetscReal kappa0, kappaA, kappa1; PetscReal ghat; PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0; PetscBool bUpdateX; PetscInt maxit; PetscInt h, i, j, k, vi, ell; PetscBLASInt ldMZ,bierr; PetscScalar utb; PetscReal max_s, pinv_tol; PetscErrorCode ierr; PetscFunctionBegin; /* set up temporary vectors */ vi = 0; ell = bcgsl->ell; bcgsl->vB = ksp->work[vi]; vi++; bcgsl->vRt = ksp->work[vi]; vi++; bcgsl->vTm = ksp->work[vi]; vi++; bcgsl->vvR = ksp->work+vi; vi += ell+1; bcgsl->vvU = ksp->work+vi; vi += ell+1; bcgsl->vXr = ksp->work[vi]; vi++; ierr = PetscBLASIntCast(ell+1,&ldMZ);CHKERRQ(ierr); /* Prime the iterative solver */ ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &zeta0);CHKERRQ(ierr); rnmax_computed = zeta0; rnmax_true = zeta0; ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = zeta0; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = VecSet(VVU[0],0.0);CHKERRQ(ierr); alpha = 0.; rho0 = omega = 1; if (bcgsl->delta>0.0) { ierr = VecCopy(VX, VXR);CHKERRQ(ierr); ierr = VecSet(VX,0.0);CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr); } else { ierr = VecCopy(ksp->vec_rhs, VB);CHKERRQ(ierr); } /* Life goes on */ ierr = VecCopy(VVR[0], VRT);CHKERRQ(ierr); zeta = zeta0; ierr = KSPGetTolerances(ksp, NULL, NULL, NULL, &maxit);CHKERRQ(ierr); for (k=0; k<maxit; k += bcgsl->ell) { ksp->its = k; ksp->rnorm = zeta; ierr = KSPLogResidualHistory(ksp, zeta);CHKERRQ(ierr); ierr = KSPMonitor(ksp, ksp->its, zeta);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason < 0) PetscFunctionReturn(0); else if (ksp->reason) break; /* BiCG part */ rho0 = -omega*rho0; nrm0 = zeta; for (j=0; j<bcgsl->ell; j++) { /* rho1 <- r_j' * r_tilde */ ierr = VecDot(VVR[j], VRT, &rho1);CHKERRQ(ierr); if (rho1 == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; PetscFunctionReturn(0); } beta = alpha*(rho1/rho0); rho0 = rho1; for (i=0; i<=j; i++) { /* u_i <- r_i - beta*u_i */ ierr = VecAYPX(VVU[i], -beta, VVR[i]);CHKERRQ(ierr); } /* u_{j+1} <- inv(K)*A*u_j */ ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);CHKERRQ(ierr); ierr = VecDot(VVU[j+1], VRT, &sigma);CHKERRQ(ierr); if (sigma == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; PetscFunctionReturn(0); } alpha = rho1/sigma; /* x <- x + alpha*u_0 */ ierr = VecAXPY(VX, alpha, VVU[0]);CHKERRQ(ierr); for (i=0; i<=j; i++) { /* r_i <- r_i - alpha*u_{i+1} */ ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);CHKERRQ(ierr); } /* r_{j+1} <- inv(K)*A*r_j */ ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &nrm0);CHKERRQ(ierr); if (bcgsl->delta>0.0) { if (rnmax_computed<nrm0) rnmax_computed = nrm0; if (rnmax_true<nrm0) rnmax_true = nrm0; } /* NEW: check for early exit */ ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = k+j; ksp->rnorm = nrm0; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); if (ksp->reason < 0) PetscFunctionReturn(0); } } /* Polynomial part */ for (i = 0; i <= bcgsl->ell; ++i) { ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);CHKERRQ(ierr); } /* Symmetrize MZa */ for (i = 0; i <= bcgsl->ell; ++i) { for (j = i+1; j <= bcgsl->ell; ++j) { MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]); } } /* Copy MZa to MZb */ ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));CHKERRQ(ierr); if (!bcgsl->bConvex || bcgsl->ell==1) { PetscBLASInt ione = 1,bell; ierr = PetscBLASIntCast(bcgsl->ell,&bell);CHKERRQ(ierr); AY0c[0] = -1; if (bcgsl->pinv) { #if defined(PETSC_MISSING_LAPACK_GESVD) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable."); #else # if defined(PETSC_USE_COMPLEX) PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,bcgsl->realwork,&bierr)); # else PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,&bierr)); # endif #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } /* Apply pseudo-inverse */ max_s = bcgsl->s[0]; for (i=1; i<bell; i++) { if (bcgsl->s[i] > max_s) { max_s = bcgsl->s[i]; } } /* tolerance is hardwired to bell*max(s)*PETSC_MACHINE_EPSILON */ pinv_tol = bell*max_s*PETSC_MACHINE_EPSILON; ierr = PetscMemzero(&AY0c[1],bell*sizeof(PetscScalar));CHKERRQ(ierr); for (i=0; i<bell; i++) { if (bcgsl->s[i] >= pinv_tol) { utb=0.; for (j=0; j<bell; j++) { utb += MZb[1+j]*bcgsl->u[i*bell+j]; } for (j=0; j<bell; j++) { AY0c[1+j] += utb/bcgsl->s[i]*bcgsl->v[j*bell+i]; } } } } else { #if defined(PETSC_MISSING_LAPACK_POTRF) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable."); #else PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr)); #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr)); } } else { PetscBLASInt ione = 1; PetscScalar aone = 1.0, azero = 0.0; PetscBLASInt neqs; ierr = PetscBLASIntCast(bcgsl->ell-1,&neqs);CHKERRQ(ierr); #if defined(PETSC_MISSING_LAPACK_POTRF) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable."); #else PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr)); #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr)); AY0c[0] = -1; AY0c[bcgsl->ell] = 0.; ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr)); AYlc[0] = 0.; AYlc[bcgsl->ell] = -1; PetscStackCall("BLASgemv",BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione)); kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione)); /* round-off can cause negative kappa's */ if (kappa0<0) kappa0 = -kappa0; kappa0 = PetscSqrtReal(kappa0); kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione)); PetscStackCall("BLASgemv",BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione)); kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione)); if (kappa1<0) kappa1 = -kappa1; kappa1 = PetscSqrtReal(kappa1); if (kappa0!=0.0 && kappa1!=0.0) { if (kappaA<0.7*kappa0*kappa1) { ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1; } else { ghat = kappaA/(kappa1*kappa1); } for (i=0; i<=bcgsl->ell; i++) { AY0c[i] = AY0c[i] - ghat* AYlc[i]; } } } omega = AY0c[bcgsl->ell]; for (h=bcgsl->ell; h>0 && omega==0.0; h--) omega = AY0c[h]; if (omega==0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0; ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);CHKERRQ(ierr); ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0; ierr = VecNorm(VVR[0], NORM_2, &zeta);CHKERRQ(ierr); /* Accurate Update */ if (bcgsl->delta>0.0) { if (rnmax_computed<zeta) rnmax_computed = zeta; if (rnmax_true<zeta) rnmax_true = zeta; bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed); if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) { /* r0 <- b-inv(K)*A*X */ ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);CHKERRQ(ierr); ierr = VecAYPX(VVR[0], -1.0, VB);CHKERRQ(ierr); rnmax_true = zeta; if (bUpdateX) { ierr = VecAXPY(VXR,1.0,VX);CHKERRQ(ierr); ierr = VecSet(VX,0.0);CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr); rnmax_computed = zeta; } } } } if (bcgsl->delta>0.0) { ierr = VecAXPY(VX,1.0,VXR);CHKERRQ(ierr); } ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }
/*@C MatNullSpaceCreate - Creates a data structure used to project vectors out of null spaces. Collective on MPI_Comm Input Parameters: + comm - the MPI communicator associated with the object . has_cnst - PETSC_TRUE if the null space contains the constant vector; otherwise PETSC_FALSE . n - number of vectors (excluding constant vector) in null space - vecs - the vectors that span the null space (excluding the constant vector); these vectors must be orthonormal. These vectors are NOT copied, so do not change them after this call. You should free the array that you pass in and destroy the vectors (this will reduce the reference count for them by one). Output Parameter: . SP - the null space context Level: advanced Notes: See MatNullSpaceSetFunction() as an alternative way of providing the null space information instead of setting vecs. If has_cnst is PETSC_TRUE you do not need to pass a constant vector in as a fourth argument to this routine, nor do you need to pass in a function that eliminates the constant function into MatNullSpaceSetFunction(). Users manual sections: . sec_singular .keywords: PC, null space, create .seealso: MatNullSpaceDestroy(), MatNullSpaceRemove(), MatSetNullSpace(), MatNullSpace, MatNullSpaceSetFunction() @*/ PetscErrorCode MatNullSpaceCreate(MPI_Comm comm,PetscBool has_cnst,PetscInt n,const Vec vecs[],MatNullSpace *SP) { MatNullSpace sp; PetscErrorCode ierr; PetscInt i; PetscFunctionBegin; if (n < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of vectors (given %D) cannot be negative",n); if (n) PetscValidPointer(vecs,4); for (i=0; i<n; i++) PetscValidHeaderSpecific(vecs[i],VEC_CLASSID,4); PetscValidPointer(SP,5); if (n) { for (i=0; i<n; i++) { /* prevent the user from changes values in the vector */ ierr = VecLockPush(vecs[i]);CHKERRQ(ierr); } } #if defined(PETSC_USE_DEBUG) if (n) { PetscScalar *dots; for (i=0; i<n; i++) { PetscReal norm; ierr = VecNorm(vecs[i],NORM_2,&norm);CHKERRQ(ierr); if (PetscAbsReal(norm - 1) > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PetscObjectComm((PetscObject)vecs[i]),PETSC_ERR_ARG_WRONG,"Vector %D must have 2-norm of 1.0, it is %g",i,(double)norm); } if (has_cnst) { for (i=0; i<n; i++) { PetscScalar sum; ierr = VecSum(vecs[i],&sum);CHKERRQ(ierr); if (PetscAbsScalar(sum) > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PetscObjectComm((PetscObject)vecs[i]),PETSC_ERR_ARG_WRONG,"Vector %D must be orthogonal to constant vector, inner product is %g",i,(double)PetscAbsScalar(sum)); } } ierr = PetscMalloc1(n-1,&dots);CHKERRQ(ierr); for (i=0; i<n-1; i++) { PetscInt j; ierr = VecMDot(vecs[i],n-i-1,vecs+i+1,dots);CHKERRQ(ierr); for (j=0;j<n-i-1;j++) { if (PetscAbsScalar(dots[j]) > PETSC_SQRT_MACHINE_EPSILON) SETERRQ3(PetscObjectComm((PetscObject)vecs[i]),PETSC_ERR_ARG_WRONG,"Vector %D must be orthogonal to vector %D, inner product is %g",i,i+j+1,(double)PetscAbsScalar(dots[j])); } } PetscFree(dots);CHKERRQ(ierr); } #endif *SP = NULL; ierr = MatInitializePackage();CHKERRQ(ierr); ierr = PetscHeaderCreate(sp,MAT_NULLSPACE_CLASSID,"MatNullSpace","Null space","Mat",comm,MatNullSpaceDestroy,MatNullSpaceView);CHKERRQ(ierr); sp->has_cnst = has_cnst; sp->n = n; sp->vecs = 0; sp->alpha = 0; sp->remove = 0; sp->rmctx = 0; if (n) { ierr = PetscMalloc1(n,&sp->vecs);CHKERRQ(ierr); ierr = PetscMalloc1(n,&sp->alpha);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)sp,n*(sizeof(Vec)+sizeof(PetscScalar)));CHKERRQ(ierr); for (i=0; i<n; i++) { ierr = PetscObjectReference((PetscObject)vecs[i]);CHKERRQ(ierr); sp->vecs[i] = vecs[i]; } } *SP = sp; PetscFunctionReturn(0); }
PetscErrorCode KSPSolve_GCR_cycle( KSP ksp ) { KSP_GCR *ctx = (KSP_GCR*)ksp->data; PetscErrorCode ierr; PetscScalar r_dot_v; Mat A, B; PC pc; Vec s,v,r; PetscReal norm_r,nrm; PetscInt k, i, restart; Vec x; PetscReal res; PetscFunctionBegin; restart = ctx->restart; ierr = KSPGetPC( ksp, &pc );CHKERRQ(ierr); ierr = KSPGetOperators( ksp, &A, &B, 0 );CHKERRQ(ierr); x = ksp->vec_sol; r = ctx->R; for ( k=0; k<restart; k++ ) { v = ctx->VV[k]; s = ctx->SS[k]; if (ctx->modifypc) { ierr = (*ctx->modifypc)(ksp,ksp->its,ksp->rnorm,ctx->modifypc_ctx);CHKERRQ(ierr); } ierr = PCApply( pc, r, s );CHKERRQ(ierr); /* s = B^{-1} r */ ierr = MatMult( A, s, v );CHKERRQ(ierr); /* v = A s */ ierr = VecMDot( v,k, ctx->VV, ctx->val );CHKERRQ(ierr); for (i=0; i<k; i++) ctx->val[i] = -ctx->val[i]; ierr = VecMAXPY(v,k,ctx->val,ctx->VV);CHKERRQ(ierr); /* v = v - sum_{i=0}^{k-1} alpha_i v_i */ ierr = VecMAXPY(s,k,ctx->val,ctx->SS);CHKERRQ(ierr); /* s = s - sum_{i=0}^{k-1} alpha_i s_i */ ierr = VecDotNorm2(r,v,&r_dot_v,&nrm);CHKERRQ(ierr); nrm = PetscSqrtReal(nrm); r_dot_v = r_dot_v/nrm; ierr = VecScale( v, 1.0/nrm );CHKERRQ(ierr); ierr = VecScale( s, 1.0/nrm );CHKERRQ(ierr); ierr = VecAXPY( x, r_dot_v, s );CHKERRQ(ierr); ierr = VecAXPY( r, -r_dot_v, v );CHKERRQ(ierr); if (ksp->its > ksp->chknorm ) { ierr = VecNorm( r, NORM_2, &norm_r );CHKERRQ(ierr); } /* update the local counter and the global counter */ ksp->its++; res = norm_r; ksp->rnorm = res; KSPLogResidualHistory(ksp,res); ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr); if ( ksp->its > ksp->chknorm ) { ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; } if ( ksp->its >= ksp->max_it ) { ksp->reason = KSP_CONVERGED_ITS; break; } } ctx->n_restarts++; PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_BCGSL(KSP ksp) { KSP_BCGSL *bcgsl = (KSP_BCGSL *) ksp->data; PetscScalar alpha, beta, omega, sigma; PetscScalar rho0, rho1; PetscReal kappa0, kappaA, kappa1; PetscReal ghat, epsilon, abstol; PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0; PetscTruth bUpdateX; PetscTruth bBombed = PETSC_FALSE; PetscInt maxit; PetscInt h, i, j, k, vi, ell; PetscBLASInt ldMZ,bierr; PetscErrorCode ierr; PetscFunctionBegin; if (ksp->normtype == KSP_NORM_NATURAL) SETERRQ(PETSC_ERR_SUP,"Cannot use natural norm with KSPBCGSL"); if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->pc_side != PC_LEFT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type unpreconditioned for right preconditioning and KSPBCGSL"); if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->pc_side != PC_RIGHT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type preconditioned for left preconditioning and KSPBCGSL"); /* set up temporary vectors */ vi = 0; ell = bcgsl->ell; bcgsl->vB = ksp->work[vi]; vi++; bcgsl->vRt = ksp->work[vi]; vi++; bcgsl->vTm = ksp->work[vi]; vi++; bcgsl->vvR = ksp->work+vi; vi += ell+1; bcgsl->vvU = ksp->work+vi; vi += ell+1; bcgsl->vXr = ksp->work[vi]; vi++; ldMZ = PetscBLASIntCast(ell+1); /* Prime the iterative solver */ ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs); CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &zeta0); CHKERRQ(ierr); rnmax_computed = zeta0; rnmax_true = zeta0; ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectTakeAccess(ksp); CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = zeta0; ierr = PetscObjectGrantAccess(ksp); CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = VecSet(VVU[0],0.0); CHKERRQ(ierr); alpha = 0.; rho0 = omega = 1; if (bcgsl->delta>0.0) { ierr = VecCopy(VX, VXR); CHKERRQ(ierr); ierr = VecSet(VX,0.0); CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB); CHKERRQ(ierr); } else { ierr = VecCopy(ksp->vec_rhs, VB); CHKERRQ(ierr); } /* Life goes on */ ierr = VecCopy(VVR[0], VRT); CHKERRQ(ierr); zeta = zeta0; ierr = KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit); CHKERRQ(ierr); for (k=0; k<maxit; k += bcgsl->ell) { ksp->its = k; ksp->rnorm = zeta; KSPLogResidualHistory(ksp, zeta); KSPMonitor(ksp, ksp->its, zeta); ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) break; /* BiCG part */ rho0 = -omega*rho0; nrm0 = zeta; for (j=0; j<bcgsl->ell; j++) { /* rho1 <- r_j' * r_tilde */ ierr = VecDot(VVR[j], VRT, &rho1); CHKERRQ(ierr); if (rho1 == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; bBombed = PETSC_TRUE; break; } beta = alpha*(rho1/rho0); rho0 = rho1; for (i=0; i<=j; i++) { /* u_i <- r_i - beta*u_i */ ierr = VecAYPX(VVU[i], -beta, VVR[i]); CHKERRQ(ierr); } /* u_{j+1} <- inv(K)*A*u_j */ ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM); CHKERRQ(ierr); ierr = VecDot(VVU[j+1], VRT, &sigma); CHKERRQ(ierr); if (sigma == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; bBombed = PETSC_TRUE; break; } alpha = rho1/sigma; /* x <- x + alpha*u_0 */ ierr = VecAXPY(VX, alpha, VVU[0]); CHKERRQ(ierr); for (i=0; i<=j; i++) { /* r_i <- r_i - alpha*u_{i+1} */ ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]); CHKERRQ(ierr); } /* r_{j+1} <- inv(K)*A*r_j */ ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM); CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &nrm0); CHKERRQ(ierr); if (bcgsl->delta>0.0) { if (rnmax_computed<nrm0) rnmax_computed = nrm0; if (rnmax_true<nrm0) rnmax_true = nrm0; } /* NEW: check for early exit */ ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectTakeAccess(ksp); CHKERRQ(ierr); ksp->its = k+j; ksp->rnorm = nrm0; ierr = PetscObjectGrantAccess(ksp); CHKERRQ(ierr); break; } } if (bBombed==PETSC_TRUE) break; /* Polynomial part */ for(i = 0; i <= bcgsl->ell; ++i) { ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]); CHKERRQ(ierr); } /* Symmetrize MZa */ for(i = 0; i <= bcgsl->ell; ++i) { for(j = i+1; j <= bcgsl->ell; ++j) { MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]); } } /* Copy MZa to MZb */ ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar)); CHKERRQ(ierr); if (!bcgsl->bConvex || bcgsl->ell==1) { PetscBLASInt ione = 1,bell = PetscBLASIntCast(bcgsl->ell); AY0c[0] = -1; LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr); if (ierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; bBombed = PETSC_TRUE; break; } ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr); } else { PetscBLASInt ione = 1; PetscScalar aone = 1.0, azero = 0.0; PetscBLASInt neqs = PetscBLASIntCast(bcgsl->ell-1); LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr); if (ierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; bBombed = PETSC_TRUE; break; } ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr); AY0c[0] = -1; AY0c[bcgsl->ell] = 0.; ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr); AYlc[0] = 0.; AYlc[bcgsl->ell] = -1; BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione); kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione); /* round-off can cause negative kappa's */ if (kappa0<0) kappa0 = -kappa0; kappa0 = sqrt(kappa0); kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione); BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione); kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione); if (kappa1<0) kappa1 = -kappa1; kappa1 = sqrt(kappa1); if (kappa0!=0.0 && kappa1!=0.0) { if (kappaA<0.7*kappa0*kappa1) { ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1; } else { ghat = kappaA/(kappa1*kappa1); } for (i=0; i<=bcgsl->ell; i++) { AY0c[i] = AY0c[i] - ghat* AYlc[i]; } } } omega = AY0c[bcgsl->ell]; for (h=bcgsl->ell; h>0 && omega==0.0; h--) { omega = AY0c[h]; } if (omega==0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; break; } ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR); CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) { AY0c[i] *= -1.0; } ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1); CHKERRQ(ierr); ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1); CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) { AY0c[i] *= -1.0; } ierr = VecNorm(VVR[0], NORM_2, &zeta); CHKERRQ(ierr); /* Accurate Update */ if (bcgsl->delta>0.0) { if (rnmax_computed<zeta) rnmax_computed = zeta; if (rnmax_true<zeta) rnmax_true = zeta; bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed); if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) { /* r0 <- b-inv(K)*A*X */ ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM); CHKERRQ(ierr); ierr = VecAYPX(VVR[0], -1.0, VB); CHKERRQ(ierr); rnmax_true = zeta; if (bUpdateX) { ierr = VecAXPY(VXR,1.0,VX); CHKERRQ(ierr); ierr = VecSet(VX,0.0); CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB); CHKERRQ(ierr); rnmax_computed = zeta; } } } } if (bcgsl->delta>0.0) { ierr = VecAXPY(VX,1.0,VXR); CHKERRQ(ierr); } ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }