/*@C PetscMemoryView - Shows the amount of memory currently being used in a communicator. Collective on PetscViewer Input Parameter: + viewer - the viewer that defines the communicator - message - string printed before values Options Database: + -malloc - have PETSc track how much memory it has allocated - -memory_view - during PetscFinalize() have this routine called Level: intermediate Concepts: memory usage .seealso: PetscMallocDump(), PetscMemoryGetCurrentUsage(), PetscMemorySetGetMaximumUsage() @*/ PetscErrorCode PetscMemoryView(PetscViewer viewer,const char message[]) { PetscLogDouble allocated,allocatedmax,resident,residentmax,gallocated,gallocatedmax,gresident,gresidentmax,maxgallocated,maxgallocatedmax,maxgresident,maxgresidentmax; PetscLogDouble mingallocated,mingallocatedmax,mingresident,mingresidentmax; PetscErrorCode ierr; MPI_Comm comm; PetscFunctionBegin; if (!viewer) viewer = PETSC_VIEWER_STDOUT_WORLD; ierr = PetscMallocGetCurrentUsage(&allocated);CHKERRQ(ierr); ierr = PetscMallocGetMaximumUsage(&allocatedmax);CHKERRQ(ierr); ierr = PetscMemoryGetCurrentUsage(&resident);CHKERRQ(ierr); ierr = PetscMemoryGetMaximumUsage(&residentmax);CHKERRQ(ierr); if (residentmax > 0) residentmax = PetscMax(resident,residentmax); ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,message);CHKERRQ(ierr); if (resident && residentmax && allocated) { ierr = MPI_Reduce(&residentmax,&gresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&residentmax,&maxgresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&residentmax,&mingresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Maximum (over computational time) process memory: total %5.4e max %5.4e min %5.4e\n",gresidentmax,maxgresidentmax,mingresidentmax);CHKERRQ(ierr); ierr = MPI_Reduce(&resident,&gresident,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&resident,&maxgresident,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&resident,&mingresident,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Current process memory: total %5.4e max %5.4e min %5.4e\n",gresident,maxgresident,mingresident);CHKERRQ(ierr); ierr = MPI_Reduce(&allocatedmax,&gallocatedmax,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&allocatedmax,&maxgallocatedmax,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&allocatedmax,&mingallocatedmax,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Maximum (over computational time) space PetscMalloc()ed: total %5.4e max %5.4e min %5.4e\n",gallocatedmax,maxgallocatedmax,mingallocatedmax);CHKERRQ(ierr); ierr = MPI_Reduce(&allocated,&gallocated,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&allocated,&maxgallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&allocated,&mingallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Current space PetscMalloc()ed: total %5.4e max %5.4e min %5.4e\n",gallocated,maxgallocated,mingallocated);CHKERRQ(ierr); } else if (resident && residentmax) { ierr = MPI_Reduce(&residentmax,&gresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&residentmax,&maxgresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&residentmax,&mingresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Maximum (over computational time) process memory: total %5.4e max %5.4e min %5.4e\n",gresidentmax,maxgresidentmax,mingresidentmax);CHKERRQ(ierr); ierr = MPI_Reduce(&resident,&gresident,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&resident,&maxgresident,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&resident,&mingresident,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Current process memory: total %5.4e max %5.4e min %5.4e\n",gresident,maxgresident,mingresident);CHKERRQ(ierr); } else if (resident && allocated) { ierr = MPI_Reduce(&resident,&gresident,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&resident,&maxgresident,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&resident,&mingresident,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Current process memory: total %5.4e max %5.4e min %5.4e\n",gresident,maxgresident,mingresident);CHKERRQ(ierr); ierr = MPI_Reduce(&allocated,&gallocated,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&allocated,&maxgallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&allocated,&mingallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Current space PetscMalloc()ed: total %5.4e max %5.4e min %5.4e\n",gallocated,maxgallocated,mingallocated);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Run with -memory_view to get maximum memory usage\n");CHKERRQ(ierr); } else if (allocated) { ierr = MPI_Reduce(&allocated,&gallocated,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&allocated,&maxgallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&allocated,&mingallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Current space PetscMalloc()ed: total %5.4e max %5.4e min %5.4e\n",gallocated,maxgallocated,mingallocated);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Run with -memory_view to get maximum memory usage\n");CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"OS cannot compute process memory\n");CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(viewer,"Run with -malloc to get statistics on PetscMalloc() calls\nOS cannot compute process memory\n");CHKERRQ(ierr); } ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode TaoSolve_NTR(Tao tao) { TAO_NTR *tr = (TAO_NTR *)tao->data; PC pc; KSPConvergedReason ksp_reason; TaoConvergedReason reason; PetscReal fmin, ftrial, prered, actred, kappa, sigma, beta; PetscReal tau, tau_1, tau_2, tau_max, tau_min, max_radius; PetscReal f, gnorm; PetscReal delta; PetscReal norm_d; PetscErrorCode ierr; PetscInt iter = 0; PetscInt bfgsUpdates = 0; PetscInt needH; PetscInt i_max = 5; PetscInt j_max = 1; PetscInt i, j, N, n, its; PetscFunctionBegin; if (tao->XL || tao->XU || tao->ops->computebounds) { ierr = PetscPrintf(((PetscObject)tao)->comm,"WARNING: Variable bounds have been set but will be ignored by ntr algorithm\n");CHKERRQ(ierr); } tao->trust = tao->trust0; /* Modify the radius if it is too large or small */ tao->trust = PetscMax(tao->trust, tr->min_radius); tao->trust = PetscMin(tao->trust, tr->max_radius); if (NTR_PC_BFGS == tr->pc_type && !tr->M) { ierr = VecGetLocalSize(tao->solution,&n);CHKERRQ(ierr); ierr = VecGetSize(tao->solution,&N);CHKERRQ(ierr); ierr = MatCreateLMVM(((PetscObject)tao)->comm,n,N,&tr->M);CHKERRQ(ierr); ierr = MatLMVMAllocateVectors(tr->M,tao->solution);CHKERRQ(ierr); } /* Check convergence criteria */ ierr = TaoComputeObjectiveAndGradient(tao, tao->solution, &f, tao->gradient);CHKERRQ(ierr); ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr); if (PetscIsInfOrNanReal(f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN"); needH = 1; ierr = TaoMonitor(tao, iter, f, gnorm, 0.0, 1.0, &reason);CHKERRQ(ierr); if (reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); /* Create vectors for the limited memory preconditioner */ if ((NTR_PC_BFGS == tr->pc_type) && (BFGS_SCALE_BFGS != tr->bfgs_scale_type)) { if (!tr->Diag) { ierr = VecDuplicate(tao->solution, &tr->Diag);CHKERRQ(ierr); } } switch(tr->ksp_type) { case NTR_KSP_NASH: ierr = KSPSetType(tao->ksp, KSPNASH);CHKERRQ(ierr); if (tao->ksp->ops->setfromoptions) { (*tao->ksp->ops->setfromoptions)(tao->ksp); } break; case NTR_KSP_STCG: ierr = KSPSetType(tao->ksp, KSPSTCG);CHKERRQ(ierr); if (tao->ksp->ops->setfromoptions) { (*tao->ksp->ops->setfromoptions)(tao->ksp); } break; default: ierr = KSPSetType(tao->ksp, KSPGLTR);CHKERRQ(ierr); if (tao->ksp->ops->setfromoptions) { (*tao->ksp->ops->setfromoptions)(tao->ksp); } break; } /* Modify the preconditioner to use the bfgs approximation */ ierr = KSPGetPC(tao->ksp, &pc);CHKERRQ(ierr); switch(tr->pc_type) { case NTR_PC_NONE: ierr = PCSetType(pc, PCNONE);CHKERRQ(ierr); if (pc->ops->setfromoptions) { (*pc->ops->setfromoptions)(pc); } break; case NTR_PC_AHESS: ierr = PCSetType(pc, PCJACOBI);CHKERRQ(ierr); if (pc->ops->setfromoptions) { (*pc->ops->setfromoptions)(pc); } ierr = PCJacobiSetUseAbs(pc);CHKERRQ(ierr); break; case NTR_PC_BFGS: ierr = PCSetType(pc, PCSHELL);CHKERRQ(ierr); if (pc->ops->setfromoptions) { (*pc->ops->setfromoptions)(pc); } ierr = PCShellSetName(pc, "bfgs");CHKERRQ(ierr); ierr = PCShellSetContext(pc, tr->M);CHKERRQ(ierr); ierr = PCShellSetApply(pc, MatLMVMSolveShell);CHKERRQ(ierr); break; default: /* Use the pc method set by pc_type */ break; } /* Initialize trust-region radius */ switch(tr->init_type) { case NTR_INIT_CONSTANT: /* Use the initial radius specified */ break; case NTR_INIT_INTERPOLATION: /* Use the initial radius specified */ max_radius = 0.0; for (j = 0; j < j_max; ++j) { fmin = f; sigma = 0.0; if (needH) { ierr = TaoComputeHessian(tao,tao->solution,tao->hessian,tao->hessian_pre);CHKERRQ(ierr); needH = 0; } for (i = 0; i < i_max; ++i) { ierr = VecCopy(tao->solution, tr->W);CHKERRQ(ierr); ierr = VecAXPY(tr->W, -tao->trust/gnorm, tao->gradient);CHKERRQ(ierr); ierr = TaoComputeObjective(tao, tr->W, &ftrial);CHKERRQ(ierr); if (PetscIsInfOrNanReal(ftrial)) { tau = tr->gamma1_i; } else { if (ftrial < fmin) { fmin = ftrial; sigma = -tao->trust / gnorm; } ierr = MatMult(tao->hessian, tao->gradient, tao->stepdirection);CHKERRQ(ierr); ierr = VecDot(tao->gradient, tao->stepdirection, &prered);CHKERRQ(ierr); prered = tao->trust * (gnorm - 0.5 * tao->trust * prered / (gnorm * gnorm)); actred = f - ftrial; if ((PetscAbsScalar(actred) <= tr->epsilon) && (PetscAbsScalar(prered) <= tr->epsilon)) { kappa = 1.0; } else { kappa = actred / prered; } tau_1 = tr->theta_i * gnorm * tao->trust / (tr->theta_i * gnorm * tao->trust + (1.0 - tr->theta_i) * prered - actred); tau_2 = tr->theta_i * gnorm * tao->trust / (tr->theta_i * gnorm * tao->trust - (1.0 + tr->theta_i) * prered + actred); tau_min = PetscMin(tau_1, tau_2); tau_max = PetscMax(tau_1, tau_2); if (PetscAbsScalar(kappa - 1.0) <= tr->mu1_i) { /* Great agreement */ max_radius = PetscMax(max_radius, tao->trust); if (tau_max < 1.0) { tau = tr->gamma3_i; } else if (tau_max > tr->gamma4_i) { tau = tr->gamma4_i; } else { tau = tau_max; } } else if (PetscAbsScalar(kappa - 1.0) <= tr->mu2_i) { /* Good agreement */ max_radius = PetscMax(max_radius, tao->trust); if (tau_max < tr->gamma2_i) { tau = tr->gamma2_i; } else if (tau_max > tr->gamma3_i) { tau = tr->gamma3_i; } else { tau = tau_max; } } else { /* Not good agreement */ if (tau_min > 1.0) { tau = tr->gamma2_i; } else if (tau_max < tr->gamma1_i) { tau = tr->gamma1_i; } else if ((tau_min < tr->gamma1_i) && (tau_max >= 1.0)) { tau = tr->gamma1_i; } else if ((tau_1 >= tr->gamma1_i) && (tau_1 < 1.0) && ((tau_2 < tr->gamma1_i) || (tau_2 >= 1.0))) { tau = tau_1; } else if ((tau_2 >= tr->gamma1_i) && (tau_2 < 1.0) && ((tau_1 < tr->gamma1_i) || (tau_2 >= 1.0))) { tau = tau_2; } else { tau = tau_max; } } } tao->trust = tau * tao->trust; } if (fmin < f) { f = fmin; ierr = VecAXPY(tao->solution, sigma, tao->gradient);CHKERRQ(ierr); ierr = TaoComputeGradient(tao,tao->solution, tao->gradient);CHKERRQ(ierr); ierr = VecNorm(tao->gradient, NORM_2, &gnorm);CHKERRQ(ierr); if (PetscIsInfOrNanReal(f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN"); needH = 1; ierr = TaoMonitor(tao, iter, f, gnorm, 0.0, 1.0, &reason);CHKERRQ(ierr); if (reason != TAO_CONTINUE_ITERATING) { PetscFunctionReturn(0); } } } tao->trust = PetscMax(tao->trust, max_radius); /* Modify the radius if it is too large or small */ tao->trust = PetscMax(tao->trust, tr->min_radius); tao->trust = PetscMin(tao->trust, tr->max_radius); break; default: /* Norm of the first direction will initialize radius */ tao->trust = 0.0; break; } /* Set initial scaling for the BFGS preconditioner This step is done after computing the initial trust-region radius since the function value may have decreased */ if (NTR_PC_BFGS == tr->pc_type) { if (f != 0.0) { delta = 2.0 * PetscAbsScalar(f) / (gnorm*gnorm); } else { delta = 2.0 / (gnorm*gnorm); } ierr = MatLMVMSetDelta(tr->M,delta);CHKERRQ(ierr); } /* Have not converged; continue with Newton method */ while (reason == TAO_CONTINUE_ITERATING) { ++iter; /* Compute the Hessian */ if (needH) { ierr = TaoComputeHessian(tao,tao->solution,tao->hessian,tao->hessian_pre);CHKERRQ(ierr); needH = 0; } if (NTR_PC_BFGS == tr->pc_type) { if (BFGS_SCALE_AHESS == tr->bfgs_scale_type) { /* Obtain diagonal for the bfgs preconditioner */ ierr = MatGetDiagonal(tao->hessian, tr->Diag);CHKERRQ(ierr); ierr = VecAbs(tr->Diag);CHKERRQ(ierr); ierr = VecReciprocal(tr->Diag);CHKERRQ(ierr); ierr = MatLMVMSetScale(tr->M,tr->Diag);CHKERRQ(ierr); } /* Update the limited memory preconditioner */ ierr = MatLMVMUpdate(tr->M, tao->solution, tao->gradient);CHKERRQ(ierr); ++bfgsUpdates; } while (reason == TAO_CONTINUE_ITERATING) { ierr = KSPSetOperators(tao->ksp, tao->hessian, tao->hessian_pre);CHKERRQ(ierr); /* Solve the trust region subproblem */ if (NTR_KSP_NASH == tr->ksp_type) { ierr = KSPNASHSetRadius(tao->ksp,tao->trust);CHKERRQ(ierr); ierr = KSPSolve(tao->ksp, tao->gradient, tao->stepdirection);CHKERRQ(ierr); ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr); tao->ksp_its+=its; ierr = KSPNASHGetNormD(tao->ksp, &norm_d);CHKERRQ(ierr); } else if (NTR_KSP_STCG == tr->ksp_type) { ierr = KSPSTCGSetRadius(tao->ksp,tao->trust);CHKERRQ(ierr); ierr = KSPSolve(tao->ksp, tao->gradient, tao->stepdirection);CHKERRQ(ierr); ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr); tao->ksp_its+=its; ierr = KSPSTCGGetNormD(tao->ksp, &norm_d);CHKERRQ(ierr); } else { /* NTR_KSP_GLTR */ ierr = KSPGLTRSetRadius(tao->ksp,tao->trust);CHKERRQ(ierr); ierr = KSPSolve(tao->ksp, tao->gradient, tao->stepdirection);CHKERRQ(ierr); ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr); tao->ksp_its+=its; ierr = KSPGLTRGetNormD(tao->ksp, &norm_d);CHKERRQ(ierr); } if (0.0 == tao->trust) { /* Radius was uninitialized; use the norm of the direction */ if (norm_d > 0.0) { tao->trust = norm_d; /* Modify the radius if it is too large or small */ tao->trust = PetscMax(tao->trust, tr->min_radius); tao->trust = PetscMin(tao->trust, tr->max_radius); } else { /* The direction was bad; set radius to default value and re-solve the trust-region subproblem to get a direction */ tao->trust = tao->trust0; /* Modify the radius if it is too large or small */ tao->trust = PetscMax(tao->trust, tr->min_radius); tao->trust = PetscMin(tao->trust, tr->max_radius); if (NTR_KSP_NASH == tr->ksp_type) { ierr = KSPNASHSetRadius(tao->ksp,tao->trust);CHKERRQ(ierr); ierr = KSPSolve(tao->ksp, tao->gradient, tao->stepdirection);CHKERRQ(ierr); ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr); tao->ksp_its+=its; ierr = KSPNASHGetNormD(tao->ksp, &norm_d);CHKERRQ(ierr); } else if (NTR_KSP_STCG == tr->ksp_type) { ierr = KSPSTCGSetRadius(tao->ksp,tao->trust);CHKERRQ(ierr); ierr = KSPSolve(tao->ksp, tao->gradient, tao->stepdirection);CHKERRQ(ierr); ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr); tao->ksp_its+=its; ierr = KSPSTCGGetNormD(tao->ksp, &norm_d);CHKERRQ(ierr); } else { /* NTR_KSP_GLTR */ ierr = KSPGLTRSetRadius(tao->ksp,tao->trust);CHKERRQ(ierr); ierr = KSPSolve(tao->ksp, tao->gradient, tao->stepdirection);CHKERRQ(ierr); ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr); tao->ksp_its+=its; ierr = KSPGLTRGetNormD(tao->ksp, &norm_d);CHKERRQ(ierr); } if (norm_d == 0.0) SETERRQ(PETSC_COMM_SELF,1, "Initial direction zero"); } } ierr = VecScale(tao->stepdirection, -1.0);CHKERRQ(ierr); ierr = KSPGetConvergedReason(tao->ksp, &ksp_reason);CHKERRQ(ierr); if ((KSP_DIVERGED_INDEFINITE_PC == ksp_reason) && (NTR_PC_BFGS == tr->pc_type) && (bfgsUpdates > 1)) { /* Preconditioner is numerically indefinite; reset the approximate if using BFGS preconditioning. */ if (f != 0.0) { delta = 2.0 * PetscAbsScalar(f) / (gnorm*gnorm); } else { delta = 2.0 / (gnorm*gnorm); } ierr = MatLMVMSetDelta(tr->M, delta);CHKERRQ(ierr); ierr = MatLMVMReset(tr->M);CHKERRQ(ierr); ierr = MatLMVMUpdate(tr->M, tao->solution, tao->gradient);CHKERRQ(ierr); bfgsUpdates = 1; } if (NTR_UPDATE_REDUCTION == tr->update_type) { /* Get predicted reduction */ if (NTR_KSP_NASH == tr->ksp_type) { ierr = KSPNASHGetObjFcn(tao->ksp,&prered);CHKERRQ(ierr); } else if (NTR_KSP_STCG == tr->ksp_type) { ierr = KSPSTCGGetObjFcn(tao->ksp,&prered);CHKERRQ(ierr); } else { /* gltr */ ierr = KSPGLTRGetObjFcn(tao->ksp,&prered);CHKERRQ(ierr); } if (prered >= 0.0) { /* The predicted reduction has the wrong sign. This cannot happen in infinite precision arithmetic. Step should be rejected! */ tao->trust = tr->alpha1 * PetscMin(tao->trust, norm_d); } else { /* Compute trial step and function value */ ierr = VecCopy(tao->solution,tr->W);CHKERRQ(ierr); ierr = VecAXPY(tr->W, 1.0, tao->stepdirection);CHKERRQ(ierr); ierr = TaoComputeObjective(tao, tr->W, &ftrial);CHKERRQ(ierr); if (PetscIsInfOrNanReal(ftrial)) { tao->trust = tr->alpha1 * PetscMin(tao->trust, norm_d); } else { /* Compute and actual reduction */ actred = f - ftrial; prered = -prered; if ((PetscAbsScalar(actred) <= tr->epsilon) && (PetscAbsScalar(prered) <= tr->epsilon)) { kappa = 1.0; } else { kappa = actred / prered; } /* Accept or reject the step and update radius */ if (kappa < tr->eta1) { /* Reject the step */ tao->trust = tr->alpha1 * PetscMin(tao->trust, norm_d); } else { /* Accept the step */ if (kappa < tr->eta2) { /* Marginal bad step */ tao->trust = tr->alpha2 * PetscMin(tao->trust, norm_d); } else if (kappa < tr->eta3) { /* Reasonable step */ tao->trust = tr->alpha3 * tao->trust; } else if (kappa < tr->eta4) { /* Good step */ tao->trust = PetscMax(tr->alpha4 * norm_d, tao->trust); } else { /* Very good step */ tao->trust = PetscMax(tr->alpha5 * norm_d, tao->trust); } break; } } } } else { /* Get predicted reduction */ if (NTR_KSP_NASH == tr->ksp_type) { ierr = KSPNASHGetObjFcn(tao->ksp,&prered);CHKERRQ(ierr); } else if (NTR_KSP_STCG == tr->ksp_type) { ierr = KSPSTCGGetObjFcn(tao->ksp,&prered);CHKERRQ(ierr); } else { /* gltr */ ierr = KSPGLTRGetObjFcn(tao->ksp,&prered);CHKERRQ(ierr); } if (prered >= 0.0) { /* The predicted reduction has the wrong sign. This cannot happen in infinite precision arithmetic. Step should be rejected! */ tao->trust = tr->gamma1 * PetscMin(tao->trust, norm_d); } else { ierr = VecCopy(tao->solution, tr->W);CHKERRQ(ierr); ierr = VecAXPY(tr->W, 1.0, tao->stepdirection);CHKERRQ(ierr); ierr = TaoComputeObjective(tao, tr->W, &ftrial);CHKERRQ(ierr); if (PetscIsInfOrNanReal(ftrial)) { tao->trust = tr->gamma1 * PetscMin(tao->trust, norm_d); } else { ierr = VecDot(tao->gradient, tao->stepdirection, &beta);CHKERRQ(ierr); actred = f - ftrial; prered = -prered; if ((PetscAbsScalar(actred) <= tr->epsilon) && (PetscAbsScalar(prered) <= tr->epsilon)) { kappa = 1.0; } else { kappa = actred / prered; } tau_1 = tr->theta * beta / (tr->theta * beta - (1.0 - tr->theta) * prered + actred); tau_2 = tr->theta * beta / (tr->theta * beta + (1.0 + tr->theta) * prered - actred); tau_min = PetscMin(tau_1, tau_2); tau_max = PetscMax(tau_1, tau_2); if (kappa >= 1.0 - tr->mu1) { /* Great agreement; accept step and update radius */ if (tau_max < 1.0) { tao->trust = PetscMax(tao->trust, tr->gamma3 * norm_d); } else if (tau_max > tr->gamma4) { tao->trust = PetscMax(tao->trust, tr->gamma4 * norm_d); } else { tao->trust = PetscMax(tao->trust, tau_max * norm_d); } break; } else if (kappa >= 1.0 - tr->mu2) { /* Good agreement */ if (tau_max < tr->gamma2) { tao->trust = tr->gamma2 * PetscMin(tao->trust, norm_d); } else if (tau_max > tr->gamma3) { tao->trust = PetscMax(tao->trust, tr->gamma3 * norm_d); } else if (tau_max < 1.0) { tao->trust = tau_max * PetscMin(tao->trust, norm_d); } else { tao->trust = PetscMax(tao->trust, tau_max * norm_d); } break; } else { /* Not good agreement */ if (tau_min > 1.0) { tao->trust = tr->gamma2 * PetscMin(tao->trust, norm_d); } else if (tau_max < tr->gamma1) { tao->trust = tr->gamma1 * PetscMin(tao->trust, norm_d); } else if ((tau_min < tr->gamma1) && (tau_max >= 1.0)) { tao->trust = tr->gamma1 * PetscMin(tao->trust, norm_d); } else if ((tau_1 >= tr->gamma1) && (tau_1 < 1.0) && ((tau_2 < tr->gamma1) || (tau_2 >= 1.0))) { tao->trust = tau_1 * PetscMin(tao->trust, norm_d); } else if ((tau_2 >= tr->gamma1) && (tau_2 < 1.0) && ((tau_1 < tr->gamma1) || (tau_2 >= 1.0))) { tao->trust = tau_2 * PetscMin(tao->trust, norm_d); } else { tao->trust = tau_max * PetscMin(tao->trust, norm_d); } } } } } /* The step computed was not good and the radius was decreased. Monitor the radius to terminate. */ ierr = TaoMonitor(tao, iter, f, gnorm, 0.0, tao->trust, &reason);CHKERRQ(ierr); } /* The radius may have been increased; modify if it is too large */ tao->trust = PetscMin(tao->trust, tr->max_radius); if (reason == TAO_CONTINUE_ITERATING) { ierr = VecCopy(tr->W, tao->solution);CHKERRQ(ierr); f = ftrial; ierr = TaoComputeGradient(tao, tao->solution, tao->gradient); ierr = VecNorm(tao->gradient, NORM_2, &gnorm);CHKERRQ(ierr); if (PetscIsInfOrNanReal(f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN"); needH = 1; ierr = TaoMonitor(tao, iter, f, gnorm, 0.0, tao->trust, &reason);CHKERRQ(ierr); } } PetscFunctionReturn(0); }
static PetscErrorCode KSPPGMRESCycle(PetscInt *itcount,KSP ksp) { KSP_PGMRES *pgmres = (KSP_PGMRES*)(ksp->data); PetscReal res_norm,res,newnorm; PetscErrorCode ierr; PetscInt it = 0,j,k; PetscBool hapend = PETSC_FALSE; PetscFunctionBegin; if (itcount) *itcount = 0; ierr = VecNormalize(VEC_VV(0),&res_norm);CHKERRQ(ierr); res = res_norm; *RS(0) = res_norm; /* check for the convergence */ ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->rnorm = res; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); pgmres->it = it-2; ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr); if (!res) { ksp->reason = KSP_CONVERGED_ATOL; ierr = PetscInfo(ksp,"Converged due to zero residual norm on entry\n");CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); for (; !ksp->reason; it++) { Vec Zcur,Znext; if (pgmres->vv_allocated <= it + VEC_OFFSET + 1) { ierr = KSPGMRESGetNewVectors(ksp,it+1);CHKERRQ(ierr); } /* VEC_VV(it-1) is orthogonal, it will be normalized once the VecNorm arrives. */ Zcur = VEC_VV(it); /* Zcur is not yet orthogonal, but the VecMDot to orthogonalize it has been started. */ Znext = VEC_VV(it+1); /* This iteration will compute Znext, update with a deferred correction once we know how * Zcur relates to the previous vectors, and start the reduction to orthogonalize it. */ if (it < pgmres->max_k+1 && ksp->its+1 < PetscMax(2,ksp->max_it)) { /* We don't know whether what we have computed is enough, so apply the matrix. */ ierr = KSP_PCApplyBAorAB(ksp,Zcur,Znext,VEC_TEMP_MATOP);CHKERRQ(ierr); } if (it > 1) { /* Complete the pending reduction */ ierr = VecNormEnd(VEC_VV(it-1),NORM_2,&newnorm);CHKERRQ(ierr); *HH(it-1,it-2) = newnorm; } if (it > 0) { /* Finish the reduction computing the latest column of H */ ierr = VecMDotEnd(Zcur,it,&(VEC_VV(0)),HH(0,it-1));CHKERRQ(ierr); } if (it > 1) { /* normalize the base vector from two iterations ago, basis is complete up to here */ ierr = VecScale(VEC_VV(it-1),1./ *HH(it-1,it-2));CHKERRQ(ierr); ierr = KSPPGMRESUpdateHessenberg(ksp,it-2,&hapend,&res);CHKERRQ(ierr); pgmres->it = it-2; ksp->its++; ksp->rnorm = res; ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (it < pgmres->max_k+1 || ksp->reason || ksp->its == ksp->max_it) { /* Monitor if we are done or still iterating, but not before a restart. */ ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr); } if (ksp->reason) break; /* Catch error in happy breakdown and signal convergence and break from loop */ if (hapend) { if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res); else { ksp->reason = KSP_DIVERGED_BREAKDOWN; break; } } if (!(it < pgmres->max_k+1 && ksp->its < ksp->max_it)) break; /* The it-2 column of H was not scaled when we computed Zcur, apply correction */ ierr = VecScale(Zcur,1./ *HH(it-1,it-2));CHKERRQ(ierr); /* And Znext computed in this iteration was computed using the under-scaled Zcur */ ierr = VecScale(Znext,1./ *HH(it-1,it-2));CHKERRQ(ierr); /* In the previous iteration, we projected an unnormalized Zcur against the Krylov basis, so we need to fix the column of H resulting from that projection. */ for (k=0; k<it; k++) *HH(k,it-1) /= *HH(it-1,it-2); /* When Zcur was projected against the Krylov basis, VV(it-1) was still not normalized, so fix that too. This * column is complete except for HH(it,it-1) which we won't know until the next iteration. */ *HH(it-1,it-1) /= *HH(it-1,it-2); } if (it > 0) { PetscScalar *work; if (!pgmres->orthogwork) {ierr = PetscMalloc((pgmres->max_k + 2)*sizeof(PetscScalar),&pgmres->orthogwork);CHKERRQ(ierr);} work = pgmres->orthogwork; /* Apply correction computed by the VecMDot in the last iteration to Znext. The original form is * * Znext -= sum_{j=0}^{i-1} Z[j+1] * H[j,i-1] * * where * * Z[j] = sum_{k=0}^j V[k] * H[k,j-1] * * substituting * * Znext -= sum_{j=0}^{i-1} sum_{k=0}^{j+1} V[k] * H[k,j] * H[j,i-1] * * rearranging the iteration space from row-column to column-row * * Znext -= sum_{k=0}^i sum_{j=k-1}^{i-1} V[k] * H[k,j] * H[j,i-1] * * Note that column it-1 of HH is correct. For all previous columns, we must look at HES because HH has already * been transformed to upper triangular form. */ for (k=0; k<it+1; k++) { work[k] = 0; for (j=PetscMax(0,k-1); j<it-1; j++) work[k] -= *HES(k,j) * *HH(j,it-1); } ierr = VecMAXPY(Znext,it+1,work,&VEC_VV(0));CHKERRQ(ierr); ierr = VecAXPY(Znext,-*HH(it-1,it-1),Zcur);CHKERRQ(ierr); /* Orthogonalize Zcur against existing basis vectors. */ for (k=0; k<it; k++) work[k] = -*HH(k,it-1); ierr = VecMAXPY(Zcur,it,work,&VEC_VV(0));CHKERRQ(ierr); /* Zcur is now orthogonal, and will be referred to as VEC_VV(it) again, though it is still not normalized. */ /* Begin computing the norm of the new vector, will be normalized after the MatMult in the next iteration. */ ierr = VecNormBegin(VEC_VV(it),NORM_2,&newnorm);CHKERRQ(ierr); } /* Compute column of H (to the diagonal, but not the subdiagonal) to be able to orthogonalize the newest vector. */ ierr = VecMDotBegin(Znext,it+1,&VEC_VV(0),HH(0,it));CHKERRQ(ierr); /* Start an asynchronous split-mode reduction, the result of the MDot and Norm will be collected on the next iteration. */ ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Znext));CHKERRQ(ierr); } if (itcount) *itcount = it-1; /* Number of iterations actually completed. */ /* Down here we have to solve for the "best" coefficients of the Krylov columns, add the solution values together, and possibly unwind the preconditioning from the solution */ /* Form the solution (or the solution so far) */ ierr = KSPPGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-2);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* IFunction - Evaluates nonlinear function, F(U). Input Parameters: . ts - the TS context . U - input vector . ptr - optional user-defined context, as set by SNESSetFunction() Output Parameter: . F - function vector */ PetscErrorCode IFunction(TS ts,PetscReal ftime,Vec U,Vec Udot,Vec F,void *ptr) { AppCtx *appctx = (AppCtx*)ptr; DM da; PetscErrorCode ierr; PetscInt i,Mx,xs,xm; PetscReal hx,sx; PetscScalar rho,c,rhoxx,cxx,cx,rhox,kcxrhox; Field *u,*f,*udot; Vec localU; PetscFunctionBegin; ierr = TSGetDM(ts,&da);CHKERRQ(ierr); ierr = DMGetLocalVector(da,&localU);CHKERRQ(ierr); ierr = DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);CHKERRQ(ierr); hx = 1.0/(PetscReal)(Mx-1); sx = 1.0/(hx*hx); /* Scatter ghost points to local vector,using the 2-step process DMGlobalToLocalBegin(),DMGlobalToLocalEnd(). By placing code between these two statements, computations can be done while messages are in transition. */ ierr = DMGlobalToLocalBegin(da,U,INSERT_VALUES,localU);CHKERRQ(ierr); ierr = DMGlobalToLocalEnd(da,U,INSERT_VALUES,localU);CHKERRQ(ierr); /* Get pointers to vector data */ ierr = DMDAVecGetArrayRead(da,localU,&u);CHKERRQ(ierr); ierr = DMDAVecGetArrayRead(da,Udot,&udot);CHKERRQ(ierr); ierr = DMDAVecGetArray(da,F,&f);CHKERRQ(ierr); /* Get local grid boundaries */ ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); if (!xs) { f[0].rho = udot[0].rho; /* u[0].rho - 0.0; */ f[0].c = udot[0].c; /* u[0].c - 1.0; */ xs++; xm--; } if (xs+xm == Mx) { f[Mx-1].rho = udot[Mx-1].rho; /* u[Mx-1].rho - 1.0; */ f[Mx-1].c = udot[Mx-1].c; /* u[Mx-1].c - 0.0; */ xm--; } /* Compute function over the locally owned part of the grid */ for (i=xs; i<xs+xm; i++) { rho = u[i].rho; rhoxx = (-2.0*rho + u[i-1].rho + u[i+1].rho)*sx; c = u[i].c; cxx = (-2.0*c + u[i-1].c + u[i+1].c)*sx; if (!appctx->upwind) { rhox = .5*(u[i+1].rho - u[i-1].rho)/hx; cx = .5*(u[i+1].c - u[i-1].c)/hx; kcxrhox = appctx->kappa*(cxx*rho + cx*rhox); } else { kcxrhox = appctx->kappa*((u[i+1].c - u[i].c)*u[i+1].rho - (u[i].c - u[i-1].c)*u[i].rho)*sx; } f[i].rho = udot[i].rho - appctx->epsilon*rhoxx + kcxrhox - appctx->mu*PetscAbsScalar(rho)*(1.0 - rho)*PetscMax(0,PetscRealPart(c - appctx->cstar)) + appctx->beta*rho; f[i].c = udot[i].c - appctx->delta*cxx + appctx->lambda*c + appctx->alpha*rho*c/(appctx->gamma + c); } /* Restore vectors */ ierr = DMDAVecRestoreArrayRead(da,localU,&u);CHKERRQ(ierr); ierr = DMDAVecRestoreArrayRead(da,Udot,&udot);CHKERRQ(ierr); ierr = DMDAVecRestoreArray(da,F,&f);CHKERRQ(ierr); ierr = DMRestoreLocalVector(da,&localU);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc,char **args) { MatType mtype = MATMPIAIJ; /* matrix format */ Mat A,B; /* matrix */ PetscViewer fd; /* viewer */ char file[PETSC_MAX_PATH_LEN]; /* input file name */ PetscBool flg,viewMats,viewIS,viewVecs; PetscInt ierr,*nlocal,m,n; PetscMPIInt rank,size; MatPartitioning part; IS is,isn; Vec xin, xout; VecScatter scat; PetscInitialize(&argc,&args,(char*)0,help); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = PetscOptionsHasName(NULL, "-view_mats", &viewMats);CHKERRQ(ierr); ierr = PetscOptionsHasName(NULL, "-view_is", &viewIS);CHKERRQ(ierr); ierr = PetscOptionsHasName(NULL, "-view_vecs", &viewVecs);CHKERRQ(ierr); /* Determine file from which we read the matrix */ ierr = PetscOptionsGetString(NULL,"-f",file,PETSC_MAX_PATH_LEN,&flg);CHKERRQ(ierr); /* Open binary file. Note that we use FILE_MODE_READ to indicate reading from this file. */ ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,file,FILE_MODE_READ,&fd);CHKERRQ(ierr); /* Load the matrix and vector; then destroy the viewer. */ ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatSetType(A,mtype);CHKERRQ(ierr); ierr = MatLoad(A,fd);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&xin);CHKERRQ(ierr); ierr = VecLoad(xin,fd);CHKERRQ(ierr); ierr = PetscViewerDestroy(&fd);CHKERRQ(ierr); if (viewMats) { if (!rank) printf("Original matrix:\n"); ierr = MatView(A,PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); } if (viewVecs) { if (!rank) printf("Original vector:\n"); ierr = VecView(xin,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* Partition the graph of the matrix */ ierr = MatPartitioningCreate(PETSC_COMM_WORLD,&part);CHKERRQ(ierr); ierr = MatPartitioningSetAdjacency(part,A);CHKERRQ(ierr); ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr); /* get new processor owner number of each vertex */ ierr = MatPartitioningApply(part,&is);CHKERRQ(ierr); if (viewIS) { if (!rank) printf("IS1 - new processor ownership:\n"); ierr = ISView(is,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* get new global number of each old global number */ ierr = ISPartitioningToNumbering(is,&isn);CHKERRQ(ierr); if (viewIS) { if (!rank) printf("IS2 - new global numbering:\n"); ierr = ISView(isn,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* get number of new vertices for each processor */ ierr = PetscMalloc(size*sizeof(PetscInt),&nlocal);CHKERRQ(ierr); ierr = ISPartitioningCount(is,size,nlocal);CHKERRQ(ierr); ierr = ISDestroy(&is);CHKERRQ(ierr); /* get old global number of each new global number */ ierr = ISInvertPermutation(isn,nlocal[rank],&is);CHKERRQ(ierr); ierr = PetscFree(nlocal);CHKERRQ(ierr); ierr = ISDestroy(&isn);CHKERRQ(ierr); ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr); if (viewIS) { if (!rank) printf("IS3=inv(IS2) - old global number of each new global number:\n"); ierr = ISView(is,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* move the matrix rows to the new processes they have been assigned to by the permutation */ ierr = ISSort(is);CHKERRQ(ierr); ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); /* move the vector rows to the new processes they have been assigned to */ ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr); ierr = VecCreateMPI(PETSC_COMM_WORLD,m,PETSC_DECIDE,&xout);CHKERRQ(ierr); ierr = VecScatterCreate(xin,is,xout,NULL,&scat);CHKERRQ(ierr); ierr = VecScatterBegin(scat,xin,xout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(scat,xin,xout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterDestroy(&scat);CHKERRQ(ierr); ierr = ISDestroy(&is);CHKERRQ(ierr); if (viewMats) { if (!rank) printf("Partitioned matrix:\n"); ierr = MatView(B,PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); } if (viewVecs) { if (!rank) printf("Mapped vector:\n"); ierr = VecView(xout,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } { PetscInt rstart,i,*nzd,*nzo,nzl,nzmax = 0,*ncols,nrow,j; Mat J; const PetscInt *cols; const PetscScalar *vals; PetscScalar *nvals; ierr = MatGetOwnershipRange(B,&rstart,NULL);CHKERRQ(ierr); ierr = PetscMalloc(2*m*sizeof(PetscInt),&nzd);CHKERRQ(ierr); ierr = PetscMemzero(nzd,2*m*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMalloc(2*m*sizeof(PetscInt),&nzo);CHKERRQ(ierr); ierr = PetscMemzero(nzo,2*m*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i<m; i++) { ierr = MatGetRow(B,i+rstart,&nzl,&cols,NULL);CHKERRQ(ierr); for (j=0; j<nzl; j++) { if (cols[j] >= rstart && cols[j] < rstart+n) { nzd[2*i] += 2; nzd[2*i+1] += 2; } else { nzo[2*i] += 2; nzo[2*i+1] += 2; } } nzmax = PetscMax(nzmax,nzd[2*i]+nzo[2*i]); ierr = MatRestoreRow(B,i+rstart,&nzl,&cols,NULL);CHKERRQ(ierr); } ierr = MatCreateAIJ(PETSC_COMM_WORLD,2*m,2*m,PETSC_DECIDE,PETSC_DECIDE,0,nzd,0,nzo,&J);CHKERRQ(ierr); ierr = PetscInfo(0,"Created empty Jacobian matrix\n");CHKERRQ(ierr); ierr = PetscFree(nzd);CHKERRQ(ierr); ierr = PetscFree(nzo);CHKERRQ(ierr); ierr = PetscMalloc2(nzmax,PetscInt,&ncols,nzmax,PetscScalar,&nvals);CHKERRQ(ierr); ierr = PetscMemzero(nvals,nzmax*sizeof(PetscScalar));CHKERRQ(ierr); for (i=0; i<m; i++) { ierr = MatGetRow(B,i+rstart,&nzl,&cols,&vals);CHKERRQ(ierr); for (j=0; j<nzl; j++) { ncols[2*j] = 2*cols[j]; ncols[2*j+1] = 2*cols[j]+1; } nrow = 2*(i+rstart); ierr = MatSetValues(J,1,&nrow,2*nzl,ncols,nvals,INSERT_VALUES);CHKERRQ(ierr); nrow = 2*(i+rstart) + 1; ierr = MatSetValues(J,1,&nrow,2*nzl,ncols,nvals,INSERT_VALUES);CHKERRQ(ierr); ierr = MatRestoreRow(B,i+rstart,&nzl,&cols,&vals);CHKERRQ(ierr); } ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (viewMats) { if (!rank) printf("Jacobian matrix structure:\n"); ierr = MatView(J,PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); } ierr = MatDestroy(&J);CHKERRQ(ierr); ierr = PetscFree2(ncols,nvals);CHKERRQ(ierr); } /* Free work space. All PETSc objects should be destroyed when they are no longer needed. */ ierr = MatDestroy(&B);CHKERRQ(ierr); ierr = VecDestroy(&xin);CHKERRQ(ierr); ierr = VecDestroy(&xout);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
/* data1, odata1 and odata2 are packed in the format (for communication): data[0] = is_max, no of is data[1] = size of is[0] ... data[is_max] = size of is[is_max-1] data[is_max + 1] = data(is[0]) ... data[is_max+1+sum(size of is[k]), k=0,...,i-1] = data(is[i]) ... data2 is packed in the format (for creating output is[]): data[0] = is_max, no of is data[1] = size of is[0] ... data[is_max] = size of is[is_max-1] data[is_max + 1] = data(is[0]) ... data[is_max + 1 + Mbs*i) = data(is[i]) ... */ static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat C,PetscInt is_max,IS is[]) { Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data; PetscErrorCode ierr; PetscMPIInt size,rank,tag1,tag2,*len_s,nrqr,nrqs,*id_r1,*len_r1,flag,len,*iwork; const PetscInt *idx_i; PetscInt idx,isz,col,*n,*data1,**data1_start,*data2,*data2_i,*data,*data_i; PetscInt Mbs,i,j,k,*odata1,*odata2; PetscInt proc_id,**odata2_ptr,*ctable=0,*btable,len_max,len_est; PetscInt proc_end=0,len_unused,nodata2; PetscInt ois_max; /* max no of is[] in each of processor */ char *t_p; MPI_Comm comm; MPI_Request *s_waits1,*s_waits2,r_req; MPI_Status *s_status,r_status; PetscBT *table; /* mark indices of this processor's is[] */ PetscBT table_i; PetscBT otable; /* mark indices of other processors' is[] */ PetscInt bs=C->rmap->bs,Bn = c->B->cmap->n,Bnbs = Bn/bs,*Bowners; IS garray_local,garray_gl; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)C,&comm); CHKERRQ(ierr); size = c->size; rank = c->rank; Mbs = c->Mbs; ierr = PetscObjectGetNewTag((PetscObject)C,&tag1); CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2); CHKERRQ(ierr); /* create tables used in step 1: table[i] - mark c->garray of proc [i] step 3: table[i] - mark indices of is[i] when whose=MINE table[0] - mark incideces of is[] when whose=OTHER */ len = PetscMax(is_max, size); CHKERRQ(ierr); ierr = PetscMalloc2(len,&table,(Mbs/PETSC_BITS_PER_BYTE+1)*len,&t_p); CHKERRQ(ierr); for (i=0; i<len; i++) { table[i] = t_p + (Mbs/PETSC_BITS_PER_BYTE+1)*i; } ierr = MPIU_Allreduce(&is_max,&ois_max,1,MPIU_INT,MPI_MAX,comm); CHKERRQ(ierr); /* 1. Send this processor's is[] to other processors */ /*---------------------------------------------------*/ /* allocate spaces */ ierr = PetscMalloc1(is_max,&n); CHKERRQ(ierr); len = 0; for (i=0; i<is_max; i++) { ierr = ISGetLocalSize(is[i],&n[i]); CHKERRQ(ierr); len += n[i]; } if (!len) { is_max = 0; } else { len += 1 + is_max; /* max length of data1 for one processor */ } ierr = PetscMalloc1(size*len+1,&data1); CHKERRQ(ierr); ierr = PetscMalloc1(size,&data1_start); CHKERRQ(ierr); for (i=0; i<size; i++) data1_start[i] = data1 + i*len; ierr = PetscMalloc4(size,&len_s,size,&btable,size,&iwork,size+1,&Bowners); CHKERRQ(ierr); /* gather c->garray from all processors */ ierr = ISCreateGeneral(comm,Bnbs,c->garray,PETSC_COPY_VALUES,&garray_local); CHKERRQ(ierr); ierr = ISAllGather(garray_local, &garray_gl); CHKERRQ(ierr); ierr = ISDestroy(&garray_local); CHKERRQ(ierr); ierr = MPI_Allgather(&Bnbs,1,MPIU_INT,Bowners+1,1,MPIU_INT,comm); CHKERRQ(ierr); Bowners[0] = 0; for (i=0; i<size; i++) Bowners[i+1] += Bowners[i]; if (is_max) { /* hash table ctable which maps c->row to proc_id) */ ierr = PetscMalloc1(Mbs,&ctable); CHKERRQ(ierr); for (proc_id=0,j=0; proc_id<size; proc_id++) { for (; j<C->rmap->range[proc_id+1]/bs; j++) ctable[j] = proc_id; } /* hash tables marking c->garray */ ierr = ISGetIndices(garray_gl,&idx_i); CHKERRQ(ierr); for (i=0; i<size; i++) { table_i = table[i]; ierr = PetscBTMemzero(Mbs,table_i); CHKERRQ(ierr); for (j = Bowners[i]; j<Bowners[i+1]; j++) { /* go through B cols of proc[i]*/ ierr = PetscBTSet(table_i,idx_i[j]); CHKERRQ(ierr); } } ierr = ISRestoreIndices(garray_gl,&idx_i); CHKERRQ(ierr); } /* if (is_max) */ ierr = ISDestroy(&garray_gl); CHKERRQ(ierr); /* evaluate communication - mesg to who, length, and buffer space */ for (i=0; i<size; i++) len_s[i] = 0; /* header of data1 */ for (proc_id=0; proc_id<size; proc_id++) { iwork[proc_id] = 0; *data1_start[proc_id] = is_max; data1_start[proc_id]++; for (j=0; j<is_max; j++) { if (proc_id == rank) { *data1_start[proc_id] = n[j]; } else { *data1_start[proc_id] = 0; } data1_start[proc_id]++; } } for (i=0; i<is_max; i++) { ierr = ISGetIndices(is[i],&idx_i); CHKERRQ(ierr); for (j=0; j<n[i]; j++) { idx = idx_i[j]; *data1_start[rank] = idx; data1_start[rank]++; /* for local proccessing */ proc_end = ctable[idx]; for (proc_id=0; proc_id<=proc_end; proc_id++) { /* for others to process */ if (proc_id == rank) continue; /* done before this loop */ if (proc_id < proc_end && !PetscBTLookup(table[proc_id],idx)) continue; /* no need for sending idx to [proc_id] */ *data1_start[proc_id] = idx; data1_start[proc_id]++; len_s[proc_id]++; } } /* update header data */ for (proc_id=0; proc_id<size; proc_id++) { if (proc_id== rank) continue; *(data1 + proc_id*len + 1 + i) = len_s[proc_id] - iwork[proc_id]; iwork[proc_id] = len_s[proc_id]; } ierr = ISRestoreIndices(is[i],&idx_i); CHKERRQ(ierr); } nrqs = 0; nrqr = 0; for (i=0; i<size; i++) { data1_start[i] = data1 + i*len; if (len_s[i]) { nrqs++; len_s[i] += 1 + is_max; /* add no. of header msg */ } } for (i=0; i<is_max; i++) { ierr = ISDestroy(&is[i]); CHKERRQ(ierr); } ierr = PetscFree(n); CHKERRQ(ierr); ierr = PetscFree(ctable); CHKERRQ(ierr); /* Determine the number of messages to expect, their lengths, from from-ids */ ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&nrqr); CHKERRQ(ierr); ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,len_s,&id_r1,&len_r1); CHKERRQ(ierr); /* Now post the sends */ ierr = PetscMalloc2(size,&s_waits1,size,&s_waits2); CHKERRQ(ierr); k = 0; for (proc_id=0; proc_id<size; proc_id++) { /* send data1 to processor [proc_id] */ if (len_s[proc_id]) { ierr = MPI_Isend(data1_start[proc_id],len_s[proc_id],MPIU_INT,proc_id,tag1,comm,s_waits1+k); CHKERRQ(ierr); k++; } } /* 2. Receive other's is[] and process. Then send back */ /*-----------------------------------------------------*/ len = 0; for (i=0; i<nrqr; i++) { if (len_r1[i] > len) len = len_r1[i]; } ierr = PetscFree(len_r1); CHKERRQ(ierr); ierr = PetscFree(id_r1); CHKERRQ(ierr); for (proc_id=0; proc_id<size; proc_id++) len_s[proc_id] = iwork[proc_id] = 0; ierr = PetscMalloc1(len+1,&odata1); CHKERRQ(ierr); ierr = PetscMalloc1(size,&odata2_ptr); CHKERRQ(ierr); ierr = PetscBTCreate(Mbs,&otable); CHKERRQ(ierr); len_max = ois_max*(Mbs+1); /* max space storing all is[] for each receive */ len_est = 2*len_max; /* estimated space of storing is[] for all receiving messages */ ierr = PetscMalloc1(len_est+1,&odata2); CHKERRQ(ierr); nodata2 = 0; /* nodata2+1: num of PetscMalloc(,&odata2_ptr[]) called */ odata2_ptr[nodata2] = odata2; len_unused = len_est; /* unused space in the array odata2_ptr[nodata2]-- needs to be >= len_max */ k = 0; while (k < nrqr) { /* Receive messages */ ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag1,comm,&flag,&r_status); CHKERRQ(ierr); if (flag) { ierr = MPI_Get_count(&r_status,MPIU_INT,&len); CHKERRQ(ierr); proc_id = r_status.MPI_SOURCE; ierr = MPI_Irecv(odata1,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req); CHKERRQ(ierr); ierr = MPI_Wait(&r_req,&r_status); CHKERRQ(ierr); /* Process messages */ /* make sure there is enough unused space in odata2 array */ if (len_unused < len_max) { /* allocate more space for odata2 */ ierr = PetscMalloc1(len_est+1,&odata2); CHKERRQ(ierr); odata2_ptr[++nodata2] = odata2; len_unused = len_est; } ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,odata1,OTHER,odata2,&otable); CHKERRQ(ierr); len = 1 + odata2[0]; for (i=0; i<odata2[0]; i++) len += odata2[1 + i]; /* Send messages back */ ierr = MPI_Isend(odata2,len,MPIU_INT,proc_id,tag2,comm,s_waits2+k); CHKERRQ(ierr); k++; odata2 += len; len_unused -= len; len_s[proc_id] = len; /* num of messages sending back to [proc_id] by this proc */ } } ierr = PetscFree(odata1); CHKERRQ(ierr); ierr = PetscBTDestroy(&otable); CHKERRQ(ierr); /* 3. Do local work on this processor's is[] */ /*-------------------------------------------*/ /* make sure there is enough unused space in odata2(=data) array */ len_max = is_max*(Mbs+1); /* max space storing all is[] for this processor */ if (len_unused < len_max) { /* allocate more space for odata2 */ ierr = PetscMalloc1(len_est+1,&odata2); CHKERRQ(ierr); odata2_ptr[++nodata2] = odata2; } data = odata2; ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,data1_start[rank],MINE,data,table); CHKERRQ(ierr); ierr = PetscFree(data1_start); CHKERRQ(ierr); /* 4. Receive work done on other processors, then merge */ /*------------------------------------------------------*/ /* get max number of messages that this processor expects to recv */ ierr = MPIU_Allreduce(len_s,iwork,size,MPI_INT,MPI_MAX,comm); CHKERRQ(ierr); ierr = PetscMalloc1(iwork[rank]+1,&data2); CHKERRQ(ierr); ierr = PetscFree4(len_s,btable,iwork,Bowners); CHKERRQ(ierr); k = 0; while (k < nrqs) { /* Receive messages */ ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag2,comm,&flag,&r_status); CHKERRQ(ierr); if (flag) { ierr = MPI_Get_count(&r_status,MPIU_INT,&len); CHKERRQ(ierr); proc_id = r_status.MPI_SOURCE; ierr = MPI_Irecv(data2,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req); CHKERRQ(ierr); ierr = MPI_Wait(&r_req,&r_status); CHKERRQ(ierr); if (len > 1+is_max) { /* Add data2 into data */ data2_i = data2 + 1 + is_max; for (i=0; i<is_max; i++) { table_i = table[i]; data_i = data + 1 + is_max + Mbs*i; isz = data[1+i]; for (j=0; j<data2[1+i]; j++) { col = data2_i[j]; if (!PetscBTLookupSet(table_i,col)) data_i[isz++] = col; } data[1+i] = isz; if (i < is_max - 1) data2_i += data2[1+i]; } } k++; } } ierr = PetscFree(data2); CHKERRQ(ierr); ierr = PetscFree2(table,t_p); CHKERRQ(ierr); /* phase 1 sends are complete */ ierr = PetscMalloc1(size,&s_status); CHKERRQ(ierr); if (nrqs) { ierr = MPI_Waitall(nrqs,s_waits1,s_status); CHKERRQ(ierr); } ierr = PetscFree(data1); CHKERRQ(ierr); /* phase 2 sends are complete */ if (nrqr) { ierr = MPI_Waitall(nrqr,s_waits2,s_status); CHKERRQ(ierr); } ierr = PetscFree2(s_waits1,s_waits2); CHKERRQ(ierr); ierr = PetscFree(s_status); CHKERRQ(ierr); /* 5. Create new is[] */ /*--------------------*/ for (i=0; i<is_max; i++) { data_i = data + 1 + is_max + Mbs*i; ierr = ISCreateGeneral(PETSC_COMM_SELF,data[1+i],data_i,PETSC_COPY_VALUES,is+i); CHKERRQ(ierr); } for (k=0; k<=nodata2; k++) { ierr = PetscFree(odata2_ptr[k]); CHKERRQ(ierr); } ierr = PetscFree(odata2_ptr); CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode NEPSolve_RII(NEP nep) { PetscErrorCode ierr; Mat T=nep->function,Tp=nep->jacobian,Tsigma; Vec u,r=nep->work[0],delta=nep->work[1]; PetscScalar lambda,a1,a2; PetscReal relerr; PetscBool hascopy; KSPConvergedReason kspreason; PetscFunctionBegin; /* get initial approximation of eigenvalue and eigenvector */ ierr = NEPGetDefaultShift(nep,&lambda);CHKERRQ(ierr); if (!nep->nini) { ierr = BVSetRandomColumn(nep->V,0,nep->rand);CHKERRQ(ierr); } ierr = BVGetColumn(nep->V,0,&u);CHKERRQ(ierr); /* correct eigenvalue approximation: lambda = lambda - (u'*T*u)/(u'*Tp*u) */ ierr = NEPComputeFunction(nep,lambda,T,T);CHKERRQ(ierr); ierr = MatMult(T,u,r);CHKERRQ(ierr); ierr = VecDot(u,r,&a1);CHKERRQ(ierr); ierr = NEPApplyJacobian(nep,lambda,u,delta,r,Tp);CHKERRQ(ierr); ierr = VecDot(u,r,&a2);CHKERRQ(ierr); lambda = lambda - a1/a2; /* prepare linear solver */ ierr = MatDuplicate(T,MAT_COPY_VALUES,&Tsigma);CHKERRQ(ierr); ierr = KSPSetOperators(nep->ksp,Tsigma,Tsigma);CHKERRQ(ierr); /* Restart loop */ while (nep->reason == NEP_CONVERGED_ITERATING) { nep->its++; /* update preconditioner and set adaptive tolerance */ if (nep->lag && !(nep->its%nep->lag) && nep->its>2*nep->lag && relerr<1e-2) { ierr = MatHasOperation(T,MATOP_COPY,&hascopy);CHKERRQ(ierr); if (hascopy) { ierr = MatCopy(T,Tsigma,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); } else { ierr = MatDestroy(&Tsigma);CHKERRQ(ierr); ierr = MatDuplicate(T,MAT_COPY_VALUES,&Tsigma);CHKERRQ(ierr); } ierr = KSPSetOperators(nep->ksp,Tsigma,Tsigma);CHKERRQ(ierr); } if (!nep->cctol) { nep->ktol = PetscMax(nep->ktol/2.0,PETSC_MACHINE_EPSILON*10.0); ierr = KSPSetTolerances(nep->ksp,nep->ktol,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);CHKERRQ(ierr); } /* form residual, r = T(lambda)*u */ ierr = NEPApplyFunction(nep,lambda,u,delta,r,T,T);CHKERRQ(ierr); /* convergence test */ ierr = VecNorm(r,NORM_2,&relerr);CHKERRQ(ierr); nep->errest[nep->nconv] = relerr; nep->eigr[nep->nconv] = lambda; if (relerr<=nep->rtol) { nep->nconv = nep->nconv + 1; nep->reason = NEP_CONVERGED_FNORM_RELATIVE; } ierr = NEPMonitor(nep,nep->its,nep->nconv,nep->eigr,nep->errest,1);CHKERRQ(ierr); if (!nep->nconv) { /* eigenvector correction: delta = T(sigma)\r */ ierr = NEP_KSPSolve(nep,r,delta);CHKERRQ(ierr); ierr = KSPGetConvergedReason(nep->ksp,&kspreason);CHKERRQ(ierr); if (kspreason<0) { ierr = PetscInfo1(nep,"iter=%D, linear solve failed, stopping solve\n",nep->its);CHKERRQ(ierr); nep->reason = NEP_DIVERGED_LINEAR_SOLVE; break; } /* update eigenvector: u = u - delta */ ierr = VecAXPY(u,-1.0,delta);CHKERRQ(ierr); /* normalize eigenvector */ ierr = VecNormalize(u,NULL);CHKERRQ(ierr); /* correct eigenvalue: lambda = lambda - (u'*T*u)/(u'*Tp*u) */ ierr = NEPApplyFunction(nep,lambda,u,delta,r,T,T);CHKERRQ(ierr); ierr = VecDot(u,r,&a1);CHKERRQ(ierr); ierr = NEPApplyJacobian(nep,lambda,u,delta,r,Tp);CHKERRQ(ierr); ierr = VecDot(u,r,&a2);CHKERRQ(ierr); lambda = lambda - a1/a2; } if (nep->its >= nep->max_it) nep->reason = NEP_DIVERGED_MAX_IT; } ierr = MatDestroy(&Tsigma);CHKERRQ(ierr); ierr = BVRestoreColumn(nep->V,0,&u);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@ MatCreateSubMatrixBanded - Extract the banded subset B of A such that ||Vec(B)||_1 >= frac ||Vec(A)||_1 Input Parameters: + A - The matrix . kmax - The maximum half-bandwidth, so 2k+1 diagonals may be extracted - frac - The norm fraction for the extracted band Output Parameters: . B - The banded submatrix Level: intermediate .seealso: MatChop() @*/ PetscErrorCode MatCreateSubMatrixBanded(Mat A, PetscInt kmax, PetscReal frac, Mat *B) { Vec weight; PetscScalar *w, *newVals; PetscReal normA = 0.0, normB = 0.0; PetscInt rStart, rEnd, r; PetscInt *dnnz, *onnz, *newCols; PetscInt m, n, M, N, k, maxcols = 0; PetscErrorCode ierr; PetscFunctionBegin; /* Create weight vector */ ierr = MatGetVecs(A, NULL, &weight);CHKERRQ(ierr); ierr = VecSet(weight, 0.0);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr); ierr = VecGetArray(weight, &w);CHKERRQ(ierr); for (r = rStart; r < rEnd; ++r) { const PetscScalar *vals; const PetscInt *cols; PetscInt ncols, c; ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); for (c = 0; c < ncols; ++c) { w[abs(r - cols[c])] += PetscAbsScalar(vals[c]); normA += PetscAbsScalar(vals[c]); } ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); } ierr = VecRestoreArray(weight, &w);CHKERRQ(ierr); /* Determine bandwidth */ ierr = PetscPrintf(PETSC_COMM_WORLD, "||Vec(A)||_1: %g\n", normA);CHKERRQ(ierr); ierr = VecGetArray(weight, &w);CHKERRQ(ierr); for (k = 0; k < kmax; ++k) { normB += w[k]; if (normB >= frac*normA) break; } ierr = VecRestoreArray(weight, &w);CHKERRQ(ierr); ierr = VecDestroy(&weight);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Bandwidth of %d%% band: %d frac: %g\n", (PetscInt) (frac*100), k, normB/normA);CHKERRQ(ierr); /* Extract band */ ierr = MatCreate(PetscObjectComm((PetscObject) A), B);CHKERRQ(ierr); ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); ierr = MatSetSizes(*B, m, n, M, N);CHKERRQ(ierr); ierr = PetscMalloc2(m,PetscInt,&dnnz,m,PetscInt,&onnz);CHKERRQ(ierr); for (r = rStart; r < rEnd; ++r) { const PetscScalar *vals; const PetscInt *cols; PetscInt ncols, c; dnnz[r-rStart] = onnz[r-rStart] = 0; ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); for (c = 0; c < ncols; ++c) { if (abs(cols[c] - r) > k) continue; if ((cols[c] >= rStart) && (cols[c] < rEnd)) ++dnnz[r-rStart]; else ++onnz[r-rStart]; } maxcols = PetscMax(ncols, maxcols); ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); } ierr = MatSetFromOptions(*B);CHKERRQ(ierr); ierr = MatXAIJSetPreallocation(*B, 1, dnnz, onnz, NULL, NULL);CHKERRQ(ierr); ierr = MatSetUp(*B);CHKERRQ(ierr); ierr = PetscMalloc2(maxcols,PetscInt,&newCols,maxcols,PetscScalar,&newVals);CHKERRQ(ierr); for (r = rStart; r < rEnd; ++r) { const PetscScalar *vals; const PetscInt *cols; PetscInt ncols, newcols, c; ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); for (c = 0, newcols = 0; c < ncols; ++c) { if (abs(cols[c] - r) > k) continue; newCols[newcols] = cols[c]; newVals[newcols] = vals[c]; ++newcols; if (newcols > maxcols) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Overran work space"); } ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); ierr = MatSetValues(*B, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr); } ierr = PetscFree2(newCols, newVals);CHKERRQ(ierr); ierr = PetscFree2(dnnz, onnz);CHKERRQ(ierr); ierr = MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@ MatLaplacian - Form the matrix Laplacian, with all values in the matrix less than the tolerance set to zero Input Parameters: + A - The matrix - tol - The zero tolerance Output Parameters: . L - The graph Laplacian matrix Level: intermediate .seealso: MatChop() @*/ PetscErrorCode MatLaplacian(Mat A, PetscReal tol, Mat *L) { PetscScalar *newVals; PetscInt *newCols; PetscInt rStart, rEnd, r, colMax = 0; PetscInt *dnnz, *onnz; PetscInt m, n, M, N; PetscErrorCode ierr; PetscFunctionBegin; ierr = MatCreate(PetscObjectComm((PetscObject) A), L);CHKERRQ(ierr); ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); ierr = MatSetSizes(*L, m, n, M, N);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr); ierr = PetscMalloc2(m,PetscInt,&dnnz,m,PetscInt,&onnz);CHKERRQ(ierr); for (r = rStart; r < rEnd; ++r) { const PetscScalar *vals; const PetscInt *cols; PetscInt ncols, newcols, c; PetscBool hasdiag = PETSC_FALSE; dnnz[r-rStart] = onnz[r-rStart] = 0; ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); for (c = 0, newcols = 0; c < ncols; ++c) { if (cols[c] == r) { ++newcols; hasdiag = PETSC_TRUE; ++dnnz[r-rStart]; } else if (PetscAbsScalar(vals[c]) >= tol) { if ((cols[c] >= rStart) && (cols[c] < rEnd)) ++dnnz[r-rStart]; else ++onnz[r-rStart]; ++newcols; } } if (!hasdiag) {++newcols; ++dnnz[r-rStart];} colMax = PetscMax(colMax, newcols);CHKERRQ(ierr); ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); } ierr = MatSetFromOptions(*L);CHKERRQ(ierr); ierr = MatXAIJSetPreallocation(*L, 1, dnnz, onnz, NULL, NULL);CHKERRQ(ierr); ierr = MatSetUp(*L);CHKERRQ(ierr); ierr = PetscMalloc2(colMax,PetscInt,&newCols,colMax,PetscScalar,&newVals);CHKERRQ(ierr); for (r = rStart; r < rEnd; ++r) { const PetscScalar *vals; const PetscInt *cols; PetscInt ncols, newcols, c; PetscBool hasdiag = PETSC_FALSE; ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); for (c = 0, newcols = 0; c < ncols; ++c) { if (cols[c] == r) { newCols[newcols] = cols[c]; newVals[newcols] = dnnz[r-rStart]+onnz[r-rStart]-1; ++newcols; hasdiag = PETSC_TRUE; } else if (PetscAbsScalar(vals[c]) >= tol) { newCols[newcols] = cols[c]; newVals[newcols] = -1.0; ++newcols; } if (newcols > colMax) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Overran work space"); } if (!hasdiag) { newCols[newcols] = r; newVals[newcols] = dnnz[r-rStart]+onnz[r-rStart]-1; ++newcols; } ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); ierr = MatSetValues(*L, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr); } ierr = PetscFree2(dnnz,onnz);CHKERRQ(ierr); ierr = MatAssemblyBegin(*L, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(*L, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode SVDSolve_TRLanczos(SVD svd) { PetscErrorCode ierr; SVD_TRLANCZOS *lanczos = (SVD_TRLANCZOS*)svd->data; PetscReal *alpha,*beta,lastbeta,norm; PetscScalar *Q,*swork=NULL,*w; PetscInt i,k,l,nv,ld; Mat U,VT; PetscBool conv; BVOrthogType orthog; PetscFunctionBegin; /* allocate working space */ ierr = DSGetLeadingDimension(svd->ds,&ld);CHKERRQ(ierr); ierr = BVGetOrthogonalization(svd->V,&orthog,NULL,NULL);CHKERRQ(ierr); ierr = PetscMalloc1(ld,&w);CHKERRQ(ierr); if (lanczos->oneside && orthog == BV_ORTHOG_CGS) { ierr = PetscMalloc1(svd->ncv+1,&swork);CHKERRQ(ierr); } /* normalize start vector */ if (!svd->nini) { ierr = BVSetRandomColumn(svd->V,0,svd->rand);CHKERRQ(ierr); ierr = BVNormColumn(svd->V,0,NORM_2,&norm);CHKERRQ(ierr); ierr = BVScaleColumn(svd->V,0,1.0/norm);CHKERRQ(ierr); } l = 0; while (svd->reason == SVD_CONVERGED_ITERATING) { svd->its++; /* inner loop */ nv = PetscMin(svd->nconv+svd->mpd,svd->ncv); ierr = BVSetActiveColumns(svd->V,svd->nconv,nv);CHKERRQ(ierr); ierr = BVSetActiveColumns(svd->U,svd->nconv,nv);CHKERRQ(ierr); ierr = DSGetArrayReal(svd->ds,DS_MAT_T,&alpha);CHKERRQ(ierr); beta = alpha + ld; if (lanczos->oneside) { if (orthog == BV_ORTHOG_MGS) { ierr = SVDOneSideTRLanczosMGS(svd,alpha,beta,svd->V,svd->U,svd->nconv,l,nv);CHKERRQ(ierr); } else { ierr = SVDOneSideTRLanczosCGS(svd,alpha,beta,svd->V,svd->U,svd->nconv,l,nv,swork);CHKERRQ(ierr); } } else { ierr = SVDTwoSideLanczos(svd,alpha,beta,svd->V,svd->U,svd->nconv+l,nv);CHKERRQ(ierr); } lastbeta = beta[nv-1]; ierr = DSRestoreArrayReal(svd->ds,DS_MAT_T,&alpha);CHKERRQ(ierr); ierr = BVScaleColumn(svd->V,nv,1.0/lastbeta);CHKERRQ(ierr); /* compute SVD of general matrix */ ierr = DSSetDimensions(svd->ds,nv,nv,svd->nconv,svd->nconv+l);CHKERRQ(ierr); if (l==0) { ierr = DSSetState(svd->ds,DS_STATE_INTERMEDIATE);CHKERRQ(ierr); } else { ierr = DSSetState(svd->ds,DS_STATE_RAW);CHKERRQ(ierr); } ierr = DSSolve(svd->ds,w,NULL);CHKERRQ(ierr); ierr = DSSort(svd->ds,w,NULL,NULL,NULL,NULL);CHKERRQ(ierr); /* compute error estimates */ k = 0; conv = PETSC_TRUE; ierr = DSGetArray(svd->ds,DS_MAT_U,&Q);CHKERRQ(ierr); ierr = DSGetArrayReal(svd->ds,DS_MAT_T,&alpha);CHKERRQ(ierr); beta = alpha + ld; for (i=svd->nconv;i<nv;i++) { svd->sigma[i] = PetscRealPart(w[i]); beta[i] = PetscRealPart(Q[nv-1+i*ld])*lastbeta; svd->errest[i] = PetscAbsScalar(beta[i]); if (svd->sigma[i] > svd->tol) svd->errest[i] /= svd->sigma[i]; if (conv) { if (svd->errest[i] < svd->tol) k++; else conv = PETSC_FALSE; } } ierr = DSRestoreArrayReal(svd->ds,DS_MAT_T,&alpha);CHKERRQ(ierr); ierr = DSRestoreArray(svd->ds,DS_MAT_U,&Q);CHKERRQ(ierr); /* check convergence and update l */ if (svd->its >= svd->max_it) svd->reason = SVD_DIVERGED_ITS; if (svd->nconv+k >= svd->nsv) svd->reason = SVD_CONVERGED_TOL; if (svd->reason != SVD_CONVERGED_ITERATING) l = 0; else l = PetscMax((nv-svd->nconv-k)/2,0); /* compute converged singular vectors and restart vectors */ ierr = DSGetMat(svd->ds,DS_MAT_VT,&VT);CHKERRQ(ierr); ierr = BVMultInPlaceTranspose(svd->V,VT,svd->nconv,svd->nconv+k+l);CHKERRQ(ierr); ierr = MatDestroy(&VT);CHKERRQ(ierr); ierr = DSGetMat(svd->ds,DS_MAT_U,&U);CHKERRQ(ierr); ierr = BVMultInPlace(svd->U,U,svd->nconv,svd->nconv+k+l);CHKERRQ(ierr); ierr = MatDestroy(&U);CHKERRQ(ierr); /* copy the last vector to be the next initial vector */ if (svd->reason == SVD_CONVERGED_ITERATING) { ierr = BVCopyColumn(svd->V,nv,svd->nconv+k+l);CHKERRQ(ierr); } svd->nconv += k; ierr = SVDMonitor(svd,svd->its,svd->nconv,svd->sigma,svd->errest,nv);CHKERRQ(ierr); } /* orthonormalize U columns in one side method */ if (lanczos->oneside) { for (i=0;i<svd->nconv;i++) { ierr = BVOrthogonalizeColumn(svd->U,i,NULL,&norm,NULL);CHKERRQ(ierr); ierr = BVScaleColumn(svd->U,i,1.0/norm);CHKERRQ(ierr); } } /* free working space */ ierr = PetscFree(w);CHKERRQ(ierr); if (swork) { ierr = PetscFree(swork);CHKERRQ(ierr); } PetscFunctionReturn(0); }
/* FormFunctionLocal - Form the local residual F from the local input X Input Parameters: + dm - The mesh . X - Local input vector - user - The user context Output Parameter: . F - Local output vector Note: We form the residual one batch of elements at a time. This allows us to offload work onto an accelerator, like a GPU, or vectorize on a multicore machine. .seealso: FormJacobianLocal() */ PetscErrorCode FormFunctionLocal(DM dm, Vec X, Vec F, AppCtx *user) { const PetscInt debug = user->debug; const PetscInt dim = user->dim; PetscReal *coords, *v0, *J, *invJ, *detJ; PetscScalar *elemVec, *u; PetscInt cellDof = 0; PetscInt maxQuad = 0; PetscInt jacSize = dim*dim; PetscInt numCells, cStart, cEnd, c, field, d; PetscErrorCode ierr; PetscFunctionBeginUser; ierr = PetscLogEventBegin(user->residualEvent,0,0,0,0);CHKERRQ(ierr); ierr = VecSet(F, 0.0);CHKERRQ(ierr); ierr = DMDAGetHeightStratum(dm, 0, &cStart, &cEnd);CHKERRQ(ierr); numCells = cEnd - cStart; for (field = 0; field < numFields; ++field) { cellDof += user->q[field].numBasisFuncs*user->q[field].numComponents; maxQuad = PetscMax(maxQuad, user->q[field].numQuadPoints); } for (d = 0; d < dim; ++d) jacSize *= maxQuad; ierr = PetscMalloc3(dim,&coords,dim,&v0,jacSize,&J);CHKERRQ(ierr); ierr = PetscMalloc4(numCells*cellDof,&u,numCells*jacSize,&invJ,numCells*maxQuad,&detJ,numCells*cellDof,&elemVec);CHKERRQ(ierr); for (c = cStart; c < cEnd; ++c) { PetscScalar *x = NULL; PetscInt i; ierr = DMDAComputeCellGeometry(dm, c, &user->q[0], v0, J, &invJ[c*jacSize], &detJ[c]);CHKERRQ(ierr); if (detJ[c] <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ[c], c); ierr = DMDAVecGetClosure(dm, NULL, X, c, &x);CHKERRQ(ierr); for (i = 0; i < cellDof; ++i) u[c*cellDof+i] = x[i]; } for (field = 0; field < numFields; ++field) { const PetscInt numQuadPoints = user->q[field].numQuadPoints; const PetscInt numBasisFuncs = user->q[field].numBasisFuncs; void (*f0)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]) = user->f0Funcs[field]; void (*f1)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]) = user->f1Funcs[field]; /* Conforming batches */ PetscInt blockSize = numBasisFuncs*numQuadPoints; PetscInt numBlocks = 1; PetscInt batchSize = numBlocks * blockSize; PetscInt numBatches = user->numBatches; PetscInt numChunks = numCells / (numBatches*batchSize); ierr = IntegrateResidualBatchCPU(numChunks*numBatches*batchSize, numFields, field, u, invJ, detJ, user->q, f0, f1, elemVec, user);CHKERRQ(ierr); /* Remainder */ PetscInt numRemainder = numCells % (numBatches * batchSize); PetscInt offset = numCells - numRemainder; ierr = IntegrateResidualBatchCPU(numRemainder, numFields, field, &u[offset*cellDof], &invJ[offset*dim*dim], &detJ[offset], user->q, f0, f1, &elemVec[offset*cellDof], user);CHKERRQ(ierr); } for (c = cStart; c < cEnd; ++c) { if (debug) {ierr = DMPrintCellVector(c, "Residual", cellDof, &elemVec[c*cellDof]);CHKERRQ(ierr);} ierr = DMDAVecSetClosure(dm, NULL, F, c, &elemVec[c*cellDof], ADD_VALUES);CHKERRQ(ierr); } ierr = PetscFree4(u,invJ,detJ,elemVec);CHKERRQ(ierr); ierr = PetscFree3(coords,v0,J);CHKERRQ(ierr); if (user->showResidual) { PetscInt p; ierr = PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");CHKERRQ(ierr); for (p = 0; p < user->numProcs; ++p) { if (p == user->rank) { Vec f; ierr = VecDuplicate(F, &f);CHKERRQ(ierr); ierr = VecCopy(F, f);CHKERRQ(ierr); ierr = VecChop(f, 1.0e-10);CHKERRQ(ierr); ierr = VecView(f, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); ierr = VecDestroy(&f);CHKERRQ(ierr); } ierr = PetscBarrier((PetscObject) dm);CHKERRQ(ierr); } } ierr = PetscLogEventEnd(user->residualEvent,0,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* DMPlexGetRawFaces_Internal - Gets groups of vertices that correspond to faces for the given cone */ PetscErrorCode DMPlexGetRawFaces_Internal(DM dm, PetscInt dim, PetscInt coneSize, const PetscInt cone[], PetscInt *numFaces, PetscInt *faceSize, const PetscInt *faces[]) { PetscInt *facesTmp; PetscInt maxConeSize, maxSupportSize; PetscErrorCode ierr; PetscFunctionBegin; PetscValidHeaderSpecific(dm, DM_CLASSID, 1); ierr = DMPlexGetMaxSizes(dm, &maxConeSize, &maxSupportSize);CHKERRQ(ierr); if (faces) {ierr = DMGetWorkArray(dm, PetscSqr(PetscMax(maxConeSize, maxSupportSize)), PETSC_INT, &facesTmp);CHKERRQ(ierr);} switch (dim) { case 1: switch (coneSize) { case 2: if (faces) { facesTmp[0] = cone[0]; facesTmp[1] = cone[1]; *faces = facesTmp; } if (numFaces) *numFaces = 2; if (faceSize) *faceSize = 1; break; default: SETERRQ2(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Cone size %D not supported for dimension %D", coneSize, dim); } break; case 2: switch (coneSize) { case 3: if (faces) { facesTmp[0] = cone[0]; facesTmp[1] = cone[1]; facesTmp[2] = cone[1]; facesTmp[3] = cone[2]; facesTmp[4] = cone[2]; facesTmp[5] = cone[0]; *faces = facesTmp; } if (numFaces) *numFaces = 3; if (faceSize) *faceSize = 2; break; case 4: /* Vertices follow right hand rule */ if (faces) { facesTmp[0] = cone[0]; facesTmp[1] = cone[1]; facesTmp[2] = cone[1]; facesTmp[3] = cone[2]; facesTmp[4] = cone[2]; facesTmp[5] = cone[3]; facesTmp[6] = cone[3]; facesTmp[7] = cone[0]; *faces = facesTmp; } if (numFaces) *numFaces = 4; if (faceSize) *faceSize = 2; break; default: SETERRQ2(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Cone size %D not supported for dimension %D", coneSize, dim); } break; case 3: switch (coneSize) { case 3: if (faces) { facesTmp[0] = cone[0]; facesTmp[1] = cone[1]; facesTmp[2] = cone[1]; facesTmp[3] = cone[2]; facesTmp[4] = cone[2]; facesTmp[5] = cone[0]; *faces = facesTmp; } if (numFaces) *numFaces = 3; if (faceSize) *faceSize = 2; break; case 4: /* Vertices of first face follow right hand rule and normal points away from last vertex */ if (faces) { facesTmp[0] = cone[0]; facesTmp[1] = cone[1]; facesTmp[2] = cone[2]; facesTmp[3] = cone[0]; facesTmp[4] = cone[3]; facesTmp[5] = cone[1]; facesTmp[6] = cone[0]; facesTmp[7] = cone[2]; facesTmp[8] = cone[3]; facesTmp[9] = cone[2]; facesTmp[10] = cone[1]; facesTmp[11] = cone[3]; *faces = facesTmp; } if (numFaces) *numFaces = 4; if (faceSize) *faceSize = 3; break; case 8: if (faces) { facesTmp[0] = cone[0]; facesTmp[1] = cone[1]; facesTmp[2] = cone[2]; facesTmp[3] = cone[3]; /* Bottom */ facesTmp[4] = cone[4]; facesTmp[5] = cone[5]; facesTmp[6] = cone[6]; facesTmp[7] = cone[7]; /* Top */ facesTmp[8] = cone[0]; facesTmp[9] = cone[3]; facesTmp[10] = cone[5]; facesTmp[11] = cone[4]; /* Front */ facesTmp[12] = cone[2]; facesTmp[13] = cone[1]; facesTmp[14] = cone[7]; facesTmp[15] = cone[6]; /* Back */ facesTmp[16] = cone[3]; facesTmp[17] = cone[2]; facesTmp[18] = cone[6]; facesTmp[19] = cone[5]; /* Right */ facesTmp[20] = cone[0]; facesTmp[21] = cone[4]; facesTmp[22] = cone[7]; facesTmp[23] = cone[1]; /* Left */ *faces = facesTmp; } if (numFaces) *numFaces = 6; if (faceSize) *faceSize = 4; break; default: SETERRQ2(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Cone size %D not supported for dimension %D", coneSize, dim); } break; default: SETERRQ1(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Dimension %D not supported", dim); } PetscFunctionReturn(0); }
/* This interpolates faces for cells at some stratum */ static PetscErrorCode DMPlexInterpolateFaces_Internal(DM dm, PetscInt cellDepth, DM idm) { DMLabel subpointMap; PetscHashIJKL faceTable; PetscInt *pStart, *pEnd; PetscInt cellDim, depth, faceDepth = cellDepth, numPoints = 0, faceSizeAll = 0, face, c, d; PetscErrorCode ierr; PetscFunctionBegin; ierr = DMPlexGetDimension(dm, &cellDim);CHKERRQ(ierr); /* HACK: I need a better way to determine face dimension, or an alternative to GetFaces() */ ierr = DMPlexGetSubpointMap(dm, &subpointMap);CHKERRQ(ierr); if (subpointMap) ++cellDim; ierr = DMPlexGetDepth(dm, &depth);CHKERRQ(ierr); ++depth; ++cellDepth; cellDim -= depth - cellDepth; ierr = PetscMalloc2(depth+1,&pStart,depth+1,&pEnd);CHKERRQ(ierr); for (d = depth-1; d >= faceDepth; --d) { ierr = DMPlexGetDepthStratum(dm, d, &pStart[d+1], &pEnd[d+1]);CHKERRQ(ierr); } ierr = DMPlexGetDepthStratum(dm, -1, NULL, &pStart[faceDepth]);CHKERRQ(ierr); pEnd[faceDepth] = pStart[faceDepth]; for (d = faceDepth-1; d >= 0; --d) { ierr = DMPlexGetDepthStratum(dm, d, &pStart[d], &pEnd[d]);CHKERRQ(ierr); } if (pEnd[cellDepth] > pStart[cellDepth]) {ierr = DMPlexGetFaces_Internal(dm, cellDim, pStart[cellDepth], NULL, &faceSizeAll, NULL);CHKERRQ(ierr);} if (faceSizeAll > 4) SETERRQ1(PetscObjectComm((PetscObject) dm), PETSC_ERR_ARG_WRONG, "Do not support interpolation of meshes with faces of %D vertices", faceSizeAll); ierr = PetscHashIJKLCreate(&faceTable);CHKERRQ(ierr); for (c = pStart[cellDepth], face = pStart[faceDepth]; c < pEnd[cellDepth]; ++c) { const PetscInt *cellFaces; PetscInt numCellFaces, faceSize, cf; ierr = DMPlexGetFaces_Internal(dm, cellDim, c, &numCellFaces, &faceSize, &cellFaces);CHKERRQ(ierr); if (faceSize != faceSizeAll) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inconsistent face for cell %D of size %D != %D", c, faceSize, faceSizeAll); for (cf = 0; cf < numCellFaces; ++cf) { const PetscInt *cellFace = &cellFaces[cf*faceSize]; PetscHashIJKLKey key; PetscHashIJKLIter missing, iter; if (faceSize == 2) { key.i = PetscMin(cellFace[0], cellFace[1]); key.j = PetscMax(cellFace[0], cellFace[1]); key.k = 0; key.l = 0; } else { key.i = cellFace[0]; key.j = cellFace[1]; key.k = cellFace[2]; key.l = faceSize > 3 ? cellFace[3] : 0; ierr = PetscSortInt(faceSize, (PetscInt *) &key); } ierr = PetscHashIJKLPut(faceTable, key, &missing, &iter);CHKERRQ(ierr); if (missing) {ierr = PetscHashIJKLSet(faceTable, iter, face++);CHKERRQ(ierr);} } ierr = DMPlexRestoreFaces_Internal(dm, cellDim, c, &numCellFaces, &faceSize, &cellFaces);CHKERRQ(ierr); } pEnd[faceDepth] = face; ierr = PetscHashIJKLDestroy(&faceTable);CHKERRQ(ierr); /* Count new points */ for (d = 0; d <= depth; ++d) { numPoints += pEnd[d]-pStart[d]; } ierr = DMPlexSetChart(idm, 0, numPoints);CHKERRQ(ierr); /* Set cone sizes */ for (d = 0; d <= depth; ++d) { PetscInt coneSize, p; if (d == faceDepth) { for (p = pStart[d]; p < pEnd[d]; ++p) { /* I see no way to do this if we admit faces of different shapes */ ierr = DMPlexSetConeSize(idm, p, faceSizeAll);CHKERRQ(ierr); } } else if (d == cellDepth) { for (p = pStart[d]; p < pEnd[d]; ++p) { /* Number of cell faces may be different from number of cell vertices*/ ierr = DMPlexGetFaces_Internal(dm, cellDim, p, &coneSize, NULL, NULL);CHKERRQ(ierr); ierr = DMPlexSetConeSize(idm, p, coneSize);CHKERRQ(ierr); } } else { for (p = pStart[d]; p < pEnd[d]; ++p) { ierr = DMPlexGetConeSize(dm, p, &coneSize);CHKERRQ(ierr); ierr = DMPlexSetConeSize(idm, p, coneSize);CHKERRQ(ierr); } } } ierr = DMSetUp(idm);CHKERRQ(ierr); /* Get face cones from subsets of cell vertices */ if (faceSizeAll > 4) SETERRQ1(PetscObjectComm((PetscObject) dm), PETSC_ERR_ARG_WRONG, "Do not support interpolation of meshes with faces of %D vertices", faceSizeAll); ierr = PetscHashIJKLCreate(&faceTable);CHKERRQ(ierr); for (d = depth; d > cellDepth; --d) { const PetscInt *cone; PetscInt p; for (p = pStart[d]; p < pEnd[d]; ++p) { ierr = DMPlexGetCone(dm, p, &cone);CHKERRQ(ierr); ierr = DMPlexSetCone(idm, p, cone);CHKERRQ(ierr); ierr = DMPlexGetConeOrientation(dm, p, &cone);CHKERRQ(ierr); ierr = DMPlexSetConeOrientation(idm, p, cone);CHKERRQ(ierr); } } for (c = pStart[cellDepth], face = pStart[faceDepth]; c < pEnd[cellDepth]; ++c) { const PetscInt *cellFaces; PetscInt numCellFaces, faceSize, cf; ierr = DMPlexGetFaces_Internal(dm, cellDim, c, &numCellFaces, &faceSize, &cellFaces);CHKERRQ(ierr); if (faceSize != faceSizeAll) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inconsistent face for cell %D of size %D != %D", c, faceSize, faceSizeAll); for (cf = 0; cf < numCellFaces; ++cf) { const PetscInt *cellFace = &cellFaces[cf*faceSize]; PetscHashIJKLKey key; PetscHashIJKLIter missing, iter; if (faceSize == 2) { key.i = PetscMin(cellFace[0], cellFace[1]); key.j = PetscMax(cellFace[0], cellFace[1]); key.k = 0; key.l = 0; } else { key.i = cellFace[0]; key.j = cellFace[1]; key.k = cellFace[2]; key.l = faceSize > 3 ? cellFace[3] : 0; ierr = PetscSortInt(faceSize, (PetscInt *) &key); } ierr = PetscHashIJKLPut(faceTable, key, &missing, &iter);CHKERRQ(ierr); if (missing) { ierr = DMPlexSetCone(idm, face, cellFace);CHKERRQ(ierr); ierr = PetscHashIJKLSet(faceTable, iter, face);CHKERRQ(ierr); ierr = DMPlexInsertCone(idm, c, cf, face++);CHKERRQ(ierr); } else { const PetscInt *cone; PetscInt coneSize, ornt, i, j, f; ierr = PetscHashIJKLGet(faceTable, iter, &f);CHKERRQ(ierr); ierr = DMPlexInsertCone(idm, c, cf, f);CHKERRQ(ierr); /* Orient face: Do not allow reverse orientation at the first vertex */ ierr = DMPlexGetConeSize(idm, f, &coneSize);CHKERRQ(ierr); ierr = DMPlexGetCone(idm, f, &cone);CHKERRQ(ierr); if (coneSize != faceSize) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of face vertices %D for face %D should be %D", coneSize, f, faceSize); /* - First find the initial vertex */ for (i = 0; i < faceSize; ++i) if (cellFace[0] == cone[i]) break; /* - Try forward comparison */ for (j = 0; j < faceSize; ++j) if (cellFace[j] != cone[(i+j)%faceSize]) break; if (j == faceSize) { if ((faceSize == 2) && (i == 1)) ornt = -2; else ornt = i; } else { /* - Try backward comparison */ for (j = 0; j < faceSize; ++j) if (cellFace[j] != cone[(i+faceSize-j)%faceSize]) break; if (j == faceSize) { if (i == 0) ornt = -faceSize; else ornt = -i; } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not determine face orientation"); } ierr = DMPlexInsertConeOrientation(idm, c, cf, ornt);CHKERRQ(ierr); } } ierr = DMPlexRestoreFaces_Internal(dm, cellDim, c, &numCellFaces, &faceSize, &cellFaces);CHKERRQ(ierr); } if (face != pEnd[faceDepth]) SETERRQ2(PetscObjectComm((PetscObject) dm), PETSC_ERR_PLIB, "Invalid number of faces %D should be %D", face-pStart[faceDepth], pEnd[faceDepth]-pStart[faceDepth]); ierr = PetscFree2(pStart,pEnd);CHKERRQ(ierr); ierr = PetscHashIJKLDestroy(&faceTable);CHKERRQ(ierr); ierr = PetscFree2(pStart,pEnd);CHKERRQ(ierr); ierr = DMPlexSymmetrize(idm);CHKERRQ(ierr); ierr = DMPlexStratify(idm);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode DMSetUp_ADDA(DM dm) { PetscErrorCode ierr; PetscInt s=1; /* stencil width, fixed to 1 at the moment */ PetscMPIInt rank,size; PetscInt i; PetscInt procsleft; PetscInt procsdimi; PetscInt ranki; PetscInt rpq; DM_ADDA *dd = (DM_ADDA*)dm->data; MPI_Comm comm; PetscInt *nodes,*procs,dim,dof; PetscBool *periodic; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); procs = dd->procs; nodes = dd->nodes; dim = dd->dim; dof = dd->dof; periodic = dd->periodic; /* check for validity */ procsleft = 1; for(i=0; i<dim; i++) { if (nodes[i] < procs[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in direction %d is too fine! %D nodes, %D processors", i, nodes[i], procs[i]); procsleft *= procs[i]; } if (procsleft != size) SETERRQ(comm,PETSC_ERR_PLIB, "Created or was provided with inconsistent distribution of processors"); /* find out local region */ ierr = PetscMalloc(dim*sizeof(PetscInt), &(dd->lcs));CHKERRQ(ierr); ierr = PetscMalloc(dim*sizeof(PetscInt), &(dd->lce));CHKERRQ(ierr); procsdimi=size; ranki=rank; for(i=0; i<dim; i++) { /* What is the number of processor for dimensions i+1, ..., dim-1? */ procsdimi /= procs[i]; /* these are all nodes that come before our region */ rpq = ranki / procsdimi; dd->lcs[i] = rpq * (nodes[i]/procs[i]); if( rpq + 1 < procs[i] ) { dd->lce[i] = (rpq + 1) * (nodes[i]/procs[i]); } else { /* last one gets all the rest */ dd->lce[i] = nodes[i]; } ranki = ranki - rpq*procsdimi; } /* compute local size */ dd->lsize=1; for(i=0; i<dim; i++) { dd->lsize *= (dd->lce[i]-dd->lcs[i]); } dd->lsize *= dof; /* find out ghost points */ ierr = PetscMalloc(dim*sizeof(PetscInt), &(dd->lgs));CHKERRQ(ierr); ierr = PetscMalloc(dim*sizeof(PetscInt), &(dd->lge));CHKERRQ(ierr); for(i=0; i<dim; i++) { if( periodic[i] ) { dd->lgs[i] = dd->lcs[i] - s; dd->lge[i] = dd->lce[i] + s; } else { dd->lgs[i] = PetscMax(dd->lcs[i] - s, 0); dd->lge[i] = PetscMin(dd->lce[i] + s, nodes[i]); } } /* compute local size with ghost points */ dd->lgsize=1; for(i=0; i<dim; i++) { dd->lgsize *= (dd->lge[i]-dd->lgs[i]); } dd->lgsize *= dof; /* create global and local prototype vector */ ierr = VecCreateMPIWithArray(comm,dd->dof,dd->lsize,PETSC_DECIDE,0,&(dd->global));CHKERRQ(ierr); #if ADDA_NEEDS_LOCAL_VECTOR /* local includes ghost points */ ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,dof,dd->lgsize,0,&(dd->local));CHKERRQ(ierr); #endif ierr = PetscMalloc(dim*sizeof(PetscInt), &(dd->refine));CHKERRQ(ierr); for(i=0; i<dim; i++) dd->refine[i] = 3; dd->dofrefine = 1; PetscFunctionReturn(0); }
static PetscErrorCode TaoLineSearchApply_MT(TaoLineSearch ls, Vec x, PetscReal *f, Vec g, Vec s) { PetscErrorCode ierr; TaoLineSearch_MT *mt; PetscReal xtrapf = 4.0; PetscReal finit, width, width1, dginit, fm, fxm, fym, dgm, dgxm, dgym; PetscReal dgx, dgy, dg, dg2, fx, fy, stx, sty, dgtest; PetscReal ftest1=0.0, ftest2=0.0; PetscInt i, stage1,n1,n2,nn1,nn2; PetscReal bstepmin1, bstepmin2, bstepmax; PetscBool g_computed=PETSC_FALSE; /* to prevent extra gradient computation */ PetscFunctionBegin; PetscValidHeaderSpecific(ls,TAOLINESEARCH_CLASSID,1); PetscValidHeaderSpecific(x,VEC_CLASSID,2); PetscValidScalarPointer(f,3); PetscValidHeaderSpecific(g,VEC_CLASSID,4); PetscValidHeaderSpecific(s,VEC_CLASSID,5); /* comm,type,size checks are done in interface TaoLineSearchApply */ mt = (TaoLineSearch_MT*)(ls->data); ls->reason = TAOLINESEARCH_CONTINUE_ITERATING; /* Check work vector */ if (!mt->work) { ierr = VecDuplicate(x,&mt->work);CHKERRQ(ierr); mt->x = x; ierr = PetscObjectReference((PetscObject)mt->x);CHKERRQ(ierr); } else if (x != mt->x) { ierr = VecDestroy(&mt->work);CHKERRQ(ierr); ierr = VecDuplicate(x,&mt->work);CHKERRQ(ierr); ierr = PetscObjectDereference((PetscObject)mt->x);CHKERRQ(ierr); mt->x = x; ierr = PetscObjectReference((PetscObject)mt->x);CHKERRQ(ierr); } if (ls->bounded) { /* Compute step length needed to make all variables equal a bound */ /* Compute the smallest steplength that will make one nonbinding variable equal the bound */ ierr = VecGetLocalSize(ls->upper,&n1);CHKERRQ(ierr); ierr = VecGetLocalSize(mt->x, &n2);CHKERRQ(ierr); ierr = VecGetSize(ls->upper,&nn1);CHKERRQ(ierr); ierr = VecGetSize(mt->x,&nn2);CHKERRQ(ierr); if (n1 != n2 || nn1 != nn2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Variable vector not compatible with bounds vector"); ierr = VecScale(s,-1.0);CHKERRQ(ierr); ierr = VecBoundGradientProjection(s,x,ls->lower,ls->upper,s);CHKERRQ(ierr); ierr = VecScale(s,-1.0);CHKERRQ(ierr); ierr = VecStepBoundInfo(x,s,ls->lower,ls->upper,&bstepmin1,&bstepmin2,&bstepmax);CHKERRQ(ierr); ls->stepmax = PetscMin(bstepmax,1.0e15); } ierr = VecDot(g,s,&dginit);CHKERRQ(ierr); if (PetscIsInfOrNanReal(dginit)) { ierr = PetscInfo1(ls,"Initial Line Search step * g is Inf or Nan (%g)\n",(double)dginit);CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_INFORNAN; PetscFunctionReturn(0); } if (dginit >= 0.0) { ierr = PetscInfo1(ls,"Initial Line Search step * g is not descent direction (%g)\n",(double)dginit);CHKERRQ(ierr); ls->reason = TAOLINESEARCH_FAILED_ASCENT; PetscFunctionReturn(0); } /* Initialization */ mt->bracket = 0; stage1 = 1; finit = *f; dgtest = ls->ftol * dginit; width = ls->stepmax - ls->stepmin; width1 = width * 2.0; ierr = VecCopy(x,mt->work);CHKERRQ(ierr); /* Variable dictionary: stx, fx, dgx - the step, function, and derivative at the best step sty, fy, dgy - the step, function, and derivative at the other endpoint of the interval of uncertainty step, f, dg - the step, function, and derivative at the current step */ stx = 0.0; fx = finit; dgx = dginit; sty = 0.0; fy = finit; dgy = dginit; ls->step=ls->initstep; for (i=0; i< ls->max_funcs; i++) { /* Set min and max steps to correspond to the interval of uncertainty */ if (mt->bracket) { ls->stepmin = PetscMin(stx,sty); ls->stepmax = PetscMax(stx,sty); } else { ls->stepmin = stx; ls->stepmax = ls->step + xtrapf * (ls->step - stx); } /* Force the step to be within the bounds */ ls->step = PetscMax(ls->step,ls->stepmin); ls->step = PetscMin(ls->step,ls->stepmax); /* If an unusual termination is to occur, then let step be the lowest point obtained thus far */ if ((stx!=0) && (((mt->bracket) && (ls->step <= ls->stepmin || ls->step >= ls->stepmax)) || ((mt->bracket) && (ls->stepmax - ls->stepmin <= ls->rtol * ls->stepmax)) || ((ls->nfeval+ls->nfgeval) >= ls->max_funcs - 1) || (mt->infoc == 0))) { ls->step = stx; } ierr = VecCopy(x,mt->work);CHKERRQ(ierr); ierr = VecAXPY(mt->work,ls->step,s);CHKERRQ(ierr); /* W = X + step*S */ if (ls->bounded) { ierr = VecMedian(ls->lower, mt->work, ls->upper, mt->work);CHKERRQ(ierr); } if (ls->usegts) { ierr = TaoLineSearchComputeObjectiveAndGTS(ls,mt->work,f,&dg);CHKERRQ(ierr); g_computed=PETSC_FALSE; } else { ierr = TaoLineSearchComputeObjectiveAndGradient(ls,mt->work,f,g);CHKERRQ(ierr); g_computed=PETSC_TRUE; if (ls->bounded) { ierr = VecDot(g,x,&dg);CHKERRQ(ierr); ierr = VecDot(g,mt->work,&dg2);CHKERRQ(ierr); dg = (dg2 - dg)/ls->step; } else { ierr = VecDot(g,s,&dg);CHKERRQ(ierr); } } if (0 == i) { ls->f_fullstep=*f; } if (PetscIsInfOrNanReal(*f) || PetscIsInfOrNanReal(dg)) { /* User provided compute function generated Not-a-Number, assume domain violation and set function value and directional derivative to infinity. */ *f = PETSC_INFINITY; dg = PETSC_INFINITY; } ftest1 = finit + ls->step * dgtest; if (ls->bounded) { ftest2 = finit + ls->step * dgtest * ls->ftol; } /* Convergence testing */ if (((*f - ftest1 <= 1.0e-10 * PetscAbsReal(finit)) && (PetscAbsReal(dg) + ls->gtol*dginit <= 0.0))) { ierr = PetscInfo(ls, "Line search success: Sufficient decrease and directional deriv conditions hold\n");CHKERRQ(ierr); ls->reason = TAOLINESEARCH_SUCCESS; break; } /* Check Armijo if beyond the first breakpoint */ if (ls->bounded && (*f <= ftest2) && (ls->step >= bstepmin2)) { ierr = PetscInfo(ls,"Line search success: Sufficient decrease.\n");CHKERRQ(ierr); ls->reason = TAOLINESEARCH_SUCCESS; break; } /* Checks for bad cases */ if (((mt->bracket) && (ls->step <= ls->stepmin||ls->step >= ls->stepmax)) || (!mt->infoc)) { ierr = PetscInfo(ls,"Rounding errors may prevent further progress. May not be a step satisfying\n");CHKERRQ(ierr); ierr = PetscInfo(ls,"sufficient decrease and curvature conditions. Tolerances may be too small.\n");CHKERRQ(ierr); ls->reason = TAOLINESEARCH_HALTED_OTHER; break; } if ((ls->step == ls->stepmax) && (*f <= ftest1) && (dg <= dgtest)) { ierr = PetscInfo1(ls,"Step is at the upper bound, stepmax (%g)\n",(double)ls->stepmax);CHKERRQ(ierr); ls->reason = TAOLINESEARCH_HALTED_UPPERBOUND; break; } if ((ls->step == ls->stepmin) && (*f >= ftest1) && (dg >= dgtest)) { ierr = PetscInfo1(ls,"Step is at the lower bound, stepmin (%g)\n",(double)ls->stepmin);CHKERRQ(ierr); ls->reason = TAOLINESEARCH_HALTED_LOWERBOUND; break; } if ((mt->bracket) && (ls->stepmax - ls->stepmin <= ls->rtol*ls->stepmax)){ ierr = PetscInfo1(ls,"Relative width of interval of uncertainty is at most rtol (%g)\n",(double)ls->rtol);CHKERRQ(ierr); ls->reason = TAOLINESEARCH_HALTED_RTOL; break; } /* In the first stage, we seek a step for which the modified function has a nonpositive value and nonnegative derivative */ if ((stage1) && (*f <= ftest1) && (dg >= dginit * PetscMin(ls->ftol, ls->gtol))) { stage1 = 0; } /* A modified function is used to predict the step only if we have not obtained a step for which the modified function has a nonpositive function value and nonnegative derivative, and if a lower function value has been obtained but the decrease is not sufficient */ if ((stage1) && (*f <= fx) && (*f > ftest1)) { fm = *f - ls->step * dgtest; /* Define modified function */ fxm = fx - stx * dgtest; /* and derivatives */ fym = fy - sty * dgtest; dgm = dg - dgtest; dgxm = dgx - dgtest; dgym = dgy - dgtest; /* if (dgxm * (ls->step - stx) >= 0.0) */ /* Update the interval of uncertainty and compute the new step */ ierr = Tao_mcstep(ls,&stx,&fxm,&dgxm,&sty,&fym,&dgym,&ls->step,&fm,&dgm);CHKERRQ(ierr); fx = fxm + stx * dgtest; /* Reset the function and */ fy = fym + sty * dgtest; /* gradient values */ dgx = dgxm + dgtest; dgy = dgym + dgtest; } else { /* Update the interval of uncertainty and compute the new step */ ierr = Tao_mcstep(ls,&stx,&fx,&dgx,&sty,&fy,&dgy,&ls->step,f,&dg);CHKERRQ(ierr); } /* Force a sufficient decrease in the interval of uncertainty */ if (mt->bracket) { if (PetscAbsReal(sty - stx) >= 0.66 * width1) ls->step = stx + 0.5*(sty - stx); width1 = width; width = PetscAbsReal(sty - stx); } } if ((ls->nfeval+ls->nfgeval) > ls->max_funcs) { ierr = PetscInfo2(ls,"Number of line search function evals (%D) > maximum (%D)\n",(ls->nfeval+ls->nfgeval),ls->max_funcs);CHKERRQ(ierr); ls->reason = TAOLINESEARCH_HALTED_MAXFCN; } /* Finish computations */ ierr = PetscInfo2(ls,"%D function evals in line search, step = %g\n",(ls->nfeval+ls->nfgeval),(double)ls->step);CHKERRQ(ierr); /* Set new solution vector and compute gradient if needed */ ierr = VecCopy(mt->work,x);CHKERRQ(ierr); if (!g_computed) { ierr = TaoLineSearchComputeGradient(ls,mt->work,g);CHKERRQ(ierr); } PetscFunctionReturn(0); }
int main(int argc,char **argv) { TS ts; /* time integrator */ SNES snes; /* nonlinear solver */ SNESLineSearch linesearch; /* line search */ Vec X; /* solution, residual vectors */ Mat J; /* Jacobian matrix */ PetscInt steps,maxsteps,mx; PetscErrorCode ierr; DM da; PetscReal ftime,dt; struct _User user; /* user-defined work context */ TSConvergedReason reason; PetscInitialize(&argc,&argv,(char*)0,help); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create distributed array (DMDA) to manage parallel grid and vectors - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = DMDACreate1d(PETSC_COMM_WORLD,DMDA_BOUNDARY_NONE,-11,2,2,NULL,&da);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Extract global vectors from DMDA; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = DMCreateGlobalVector(da,&X);CHKERRQ(ierr); /* Initialize user application context */ ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Advection-reaction options",""); { user.a[0] = 1; ierr = PetscOptionsReal("-a0","Advection rate 0","",user.a[0],&user.a[0],NULL);CHKERRQ(ierr); user.a[1] = 0; ierr = PetscOptionsReal("-a1","Advection rate 1","",user.a[1],&user.a[1],NULL);CHKERRQ(ierr); user.k[0] = 1e6; ierr = PetscOptionsReal("-k0","Reaction rate 0","",user.k[0],&user.k[0],NULL);CHKERRQ(ierr); user.k[1] = 2*user.k[0]; ierr = PetscOptionsReal("-k1","Reaction rate 1","",user.k[1],&user.k[1],NULL);CHKERRQ(ierr); user.s[0] = 0; ierr = PetscOptionsReal("-s0","Source 0","",user.s[0],&user.s[0],NULL);CHKERRQ(ierr); user.s[1] = 1; ierr = PetscOptionsReal("-s1","Source 1","",user.s[1],&user.s[1],NULL);CHKERRQ(ierr); } ierr = PetscOptionsEnd();CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create timestepping solver context - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSCreate(PETSC_COMM_WORLD,&ts);CHKERRQ(ierr); ierr = TSSetDM(ts,da);CHKERRQ(ierr); ierr = TSSetType(ts,TSARKIMEX);CHKERRQ(ierr); ierr = TSSetRHSFunction(ts,NULL,FormRHSFunction,&user);CHKERRQ(ierr); ierr = TSSetIFunction(ts,NULL,FormIFunction,&user);CHKERRQ(ierr); ierr = DMSetMatType(da,MATAIJ);CHKERRQ(ierr); ierr = DMCreateMatrix(da,&J);CHKERRQ(ierr); ierr = TSSetIJacobian(ts,J,J,FormIJacobian,&user);CHKERRQ(ierr); /* A line search in the nonlinear solve can fail due to ill-conditioning unless an absolute tolerance is set. Since * this problem is linear, we deactivate the line search. For a linear problem, it is usually recommended to also use * SNESSetType(snes,SNESKSPONLY). */ ierr = TSGetSNES(ts,&snes);CHKERRQ(ierr); ierr = SNESGetLineSearch(snes,&linesearch);CHKERRQ(ierr); ierr = SNESLineSearchSetType(linesearch,SNESLINESEARCHBASIC);CHKERRQ(ierr); ftime = 1.0; maxsteps = 10000; ierr = TSSetDuration(ts,maxsteps,ftime);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set initial conditions - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = FormInitialSolution(ts,X,&user);CHKERRQ(ierr); ierr = TSSetSolution(ts,X);CHKERRQ(ierr); ierr = VecGetSize(X,&mx);CHKERRQ(ierr); dt = .1 * PetscMax(user.a[0],user.a[1]) / mx; /* Advective CFL, I don't know why it needs so much safety factor. */ ierr = TSSetInitialTimeStep(ts,0.0,dt);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set runtime options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSSetFromOptions(ts);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Solve nonlinear system - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSSolve(ts,X);CHKERRQ(ierr); ierr = TSGetSolveTime(ts,&ftime);CHKERRQ(ierr); ierr = TSGetTimeStepNumber(ts,&steps);CHKERRQ(ierr); ierr = TSGetConvergedReason(ts,&reason);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"%s at time %G after %D steps\n",TSConvergedReasons[reason],ftime,steps);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Free work space. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = MatDestroy(&J);CHKERRQ(ierr); ierr = VecDestroy(&X);CHKERRQ(ierr); ierr = TSDestroy(&ts);CHKERRQ(ierr); ierr = DMDestroy(&da);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
/* @ TaoApply_Armijo - This routine performs a linesearch. It backtracks until the (nonmonotone) Armijo conditions are satisfied. Input Parameters: + tao - Tao context . X - current iterate (on output X contains new iterate, X + step*S) . S - search direction . f - merit function evaluated at X . G - gradient of merit function evaluated at X . W - work vector - step - initial estimate of step length Output parameters: + f - merit function evaluated at new iterate, X + step*S . G - gradient of merit function evaluated at new iterate, X + step*S . X - new iterate - step - final step length @ */ static PetscErrorCode TaoLineSearchApply_Armijo(TaoLineSearch ls, Vec x, PetscReal *f, Vec g, Vec s) { TaoLineSearch_ARMIJO *armP = (TaoLineSearch_ARMIJO *)ls->data; PetscErrorCode ierr; PetscInt i; PetscReal fact, ref, gdx; PetscInt idx; PetscBool g_computed=PETSC_FALSE; /* to prevent extra gradient computation */ PetscFunctionBegin; ls->reason = TAOLINESEARCH_CONTINUE_ITERATING; if (!armP->work) { ierr = VecDuplicate(x,&armP->work);CHKERRQ(ierr); armP->x = x; ierr = PetscObjectReference((PetscObject)armP->x);CHKERRQ(ierr); } else if (x != armP->x) { /* If x has changed, then recreate work */ ierr = VecDestroy(&armP->work);CHKERRQ(ierr); ierr = VecDuplicate(x,&armP->work);CHKERRQ(ierr); ierr = PetscObjectDereference((PetscObject)armP->x);CHKERRQ(ierr); armP->x = x; ierr = PetscObjectReference((PetscObject)armP->x);CHKERRQ(ierr); } /* Check linesearch parameters */ if (armP->alpha < 1) { ierr = PetscInfo1(ls,"Armijo line search error: alpha (%g) < 1\n", (double)armP->alpha);CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER; } else if ((armP->beta <= 0) || (armP->beta >= 1)) { ierr = PetscInfo1(ls,"Armijo line search error: beta (%g) invalid\n", (double)armP->beta);CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER; } else if ((armP->beta_inf <= 0) || (armP->beta_inf >= 1)) { ierr = PetscInfo1(ls,"Armijo line search error: beta_inf (%g) invalid\n", (double)armP->beta_inf);CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER; } else if ((armP->sigma <= 0) || (armP->sigma >= 0.5)) { ierr = PetscInfo1(ls,"Armijo line search error: sigma (%g) invalid\n", (double)armP->sigma);CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER; } else if (armP->memorySize < 1) { ierr = PetscInfo1(ls,"Armijo line search error: memory_size (%D) < 1\n", armP->memorySize);CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER; } else if ((armP->referencePolicy != REFERENCE_MAX) && (armP->referencePolicy != REFERENCE_AVE) && (armP->referencePolicy != REFERENCE_MEAN)) { ierr = PetscInfo(ls,"Armijo line search error: reference_policy invalid\n");CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER; } else if ((armP->replacementPolicy != REPLACE_FIFO) && (armP->replacementPolicy != REPLACE_MRU)) { ierr = PetscInfo(ls,"Armijo line search error: replacement_policy invalid\n");CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER; } else if (PetscIsInfOrNanReal(*f)) { ierr = PetscInfo(ls,"Armijo line search error: initial function inf or nan\n");CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER; } if (ls->reason != TAOLINESEARCH_CONTINUE_ITERATING) { PetscFunctionReturn(0); } /* Check to see of the memory has been allocated. If not, allocate the historical array and populate it with the initial function values. */ if (!armP->memory) { ierr = PetscMalloc1(armP->memorySize, &armP->memory );CHKERRQ(ierr); } if (!armP->memorySetup) { for (i = 0; i < armP->memorySize; i++) { armP->memory[i] = armP->alpha*(*f); } armP->current = 0; armP->lastReference = armP->memory[0]; armP->memorySetup=PETSC_TRUE; } /* Calculate reference value (MAX) */ ref = armP->memory[0]; idx = 0; for (i = 1; i < armP->memorySize; i++) { if (armP->memory[i] > ref) { ref = armP->memory[i]; idx = i; } } if (armP->referencePolicy == REFERENCE_AVE) { ref = 0; for (i = 0; i < armP->memorySize; i++) { ref += armP->memory[i]; } ref = ref / armP->memorySize; ref = PetscMax(ref, armP->memory[armP->current]); } else if (armP->referencePolicy == REFERENCE_MEAN) { ref = PetscMin(ref, 0.5*(armP->lastReference + armP->memory[armP->current])); } ierr = VecDot(g,s,&gdx);CHKERRQ(ierr); if (PetscIsInfOrNanReal(gdx)) { ierr = PetscInfo1(ls,"Initial Line Search step * g is Inf or Nan (%g)\n",(double)gdx);CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_INFORNAN; PetscFunctionReturn(0); } if (gdx >= 0.0) { ierr = PetscInfo1(ls,"Initial Line Search step is not descent direction (g's=%g)\n",(double)gdx);CHKERRQ(ierr); ls->reason = TAOLINESEARCH_FAILED_ASCENT; PetscFunctionReturn(0); } if (armP->nondescending) { fact = armP->sigma; } else { fact = armP->sigma * gdx; } ls->step = ls->initstep; while (ls->step >= ls->stepmin && (ls->nfeval+ls->nfgeval) < ls->max_funcs) { /* Calculate iterate */ ierr = VecCopy(x,armP->work);CHKERRQ(ierr); ierr = VecAXPY(armP->work,ls->step,s);CHKERRQ(ierr); if (ls->bounded) { ierr = VecMedian(ls->lower,armP->work,ls->upper,armP->work);CHKERRQ(ierr); } /* Calculate function at new iterate */ if (ls->hasobjective) { ierr = TaoLineSearchComputeObjective(ls,armP->work,f);CHKERRQ(ierr); g_computed=PETSC_FALSE; } else if (ls->usegts) { ierr = TaoLineSearchComputeObjectiveAndGTS(ls,armP->work,f,&gdx);CHKERRQ(ierr); g_computed=PETSC_FALSE; } else { ierr = TaoLineSearchComputeObjectiveAndGradient(ls,armP->work,f,g);CHKERRQ(ierr); g_computed=PETSC_TRUE; } if (ls->step == ls->initstep) { ls->f_fullstep = *f; } if (PetscIsInfOrNanReal(*f)) { ls->step *= armP->beta_inf; } else { /* Check descent condition */ if (armP->nondescending && *f <= ref - ls->step*fact*ref) break; if (!armP->nondescending && *f <= ref + ls->step*fact) { break; } ls->step *= armP->beta; } } /* Check termination */ if (PetscIsInfOrNanReal(*f)) { ierr = PetscInfo(ls, "Function is inf or nan.\n");CHKERRQ(ierr); ls->reason = TAOLINESEARCH_FAILED_INFORNAN; } else if (ls->step < ls->stepmin) { ierr = PetscInfo(ls, "Step length is below tolerance.\n");CHKERRQ(ierr); ls->reason = TAOLINESEARCH_HALTED_RTOL; } else if ((ls->nfeval+ls->nfgeval) >= ls->max_funcs) { ierr = PetscInfo2(ls, "Number of line search function evals (%D) > maximum allowed (%D)\n",ls->nfeval+ls->nfgeval, ls->max_funcs);CHKERRQ(ierr); ls->reason = TAOLINESEARCH_HALTED_MAXFCN; } if (ls->reason) { PetscFunctionReturn(0); } /* Successful termination, update memory */ ls->reason = TAOLINESEARCH_SUCCESS; armP->lastReference = ref; if (armP->replacementPolicy == REPLACE_FIFO) { armP->memory[armP->current++] = *f; if (armP->current >= armP->memorySize) { armP->current = 0; } } else { armP->current = idx; armP->memory[idx] = *f; } /* Update iterate and compute gradient */ ierr = VecCopy(armP->work,x);CHKERRQ(ierr); if (!g_computed) { ierr = TaoLineSearchComputeGradient(ls, x, g);CHKERRQ(ierr); } ierr = PetscInfo2(ls, "%D function evals in line search, step = %g\n",ls->nfeval, (double)ls->step);CHKERRQ(ierr); PetscFunctionReturn(0); }
// Approximates continuous L^\infty and L^2 norms of error, normalized by L^\infty and L^2 norms of analytic solution PetscErrorCode OpIntegrateNorms(Op op,DM dm,Vec U,PetscReal *normInfty,PetscReal *normL2) { PetscErrorCode ierr; Vec X,Uloc; DM dmx; const PetscScalar *x,*u; const PetscReal *B,*D,*w3; PetscReal L[3]; struct {PetscReal error,u;} sumInfty={},sum2={}; PetscInt nelem,ne = op->ne,P,Q,P3,Q3; PetscFunctionBegin; ierr = PetscLogEventBegin(OP_IntegrateNorms,dm,U,0,0);CHKERRQ(ierr); ierr = DMFEGetTensorEval(dm,&P,&Q,&B,&D,NULL,NULL,&w3);CHKERRQ(ierr); P3 = P*P*P; Q3 = Q*Q*Q; ierr = DMFEGetUniformCoordinates(dm,L);CHKERRQ(ierr); ierr = DMGetCoordinateDM(dm,&dmx);CHKERRQ(ierr); ierr = DMGetCoordinatesLocal(dm,&X);CHKERRQ(ierr); ierr = DMGetLocalVector(dm,&Uloc);CHKERRQ(ierr); ierr = DMGlobalToLocalBegin(dm,U,INSERT_VALUES,Uloc);CHKERRQ(ierr); ierr = DMGlobalToLocalEnd(dm,U,INSERT_VALUES,Uloc);CHKERRQ(ierr); ierr = DMFEGetNumElements(dm,&nelem);CHKERRQ(ierr); ierr = VecGetArrayRead(X,&x);CHKERRQ(ierr); ierr = VecGetArrayRead(Uloc,&u);CHKERRQ(ierr); for (PetscInt e=0; e<nelem; e+=ne) { PetscScalar ue[op->dof*P3*ne]_align,uq[op->dof][Q3][ne]_align,xe[3*P3*ne]_align,xq[3][Q3][ne]_align,dx[3][3][Q3][ne]_align,wdxdet[Q3][ne]_align; ierr = DMFEExtractElements(dmx,x,e,ne,xe);CHKERRQ(ierr); ierr = PetscMemzero(xq,sizeof xq);CHKERRQ(ierr); ierr = TensorContract(op->Tensor3,B,B,B,TENSOR_EVAL,xe,xq[0][0]);CHKERRQ(ierr); ierr = PetscMemzero(dx,sizeof dx);CHKERRQ(ierr); ierr = TensorContract(op->Tensor3,D,B,B,TENSOR_EVAL,xe,dx[0][0][0]);CHKERRQ(ierr); ierr = TensorContract(op->Tensor3,B,D,B,TENSOR_EVAL,xe,dx[1][0][0]);CHKERRQ(ierr); ierr = TensorContract(op->Tensor3,B,B,D,TENSOR_EVAL,xe,dx[2][0][0]);CHKERRQ(ierr); ierr = PointwiseJacobianInvert(ne,Q3,w3,dx,wdxdet);CHKERRQ(ierr); ierr = DMFEExtractElements(dm,u,e,ne,ue);CHKERRQ(ierr); ierr = PetscMemzero(uq,sizeof uq);CHKERRQ(ierr); ierr = TensorContract(op->TensorDOF,B,B,B,TENSOR_EVAL,ue,uq[0][0]);CHKERRQ(ierr); for (PetscInt i=0; i<Q3; i++) { for (PetscInt l=0; l<ne; l++) { PetscReal xx[] = {xq[0][i][l],xq[1][i][l],xq[2][i][l]}; PetscScalar uql[op->dof],fql[op->dof]; ierr = (op->PointwiseSolution)(op,xx,L,uql);CHKERRQ(ierr); ierr = (op->PointwiseForcing)(op,xx,L,fql);CHKERRQ(ierr); for (PetscInt d=0; d<op->dof; d++) { PetscReal error = uq[d][i][l] - uql[d]; sumInfty.error = PetscMax(sumInfty.error,PetscAbs(error)); sumInfty.u = PetscMax(sumInfty.u ,PetscAbs(uql[d])); sum2.error += PetscSqr(error) * wdxdet[i][l]; sum2.u += PetscSqr(uql[d]) * wdxdet[i][l]; } } } } ierr = VecRestoreArrayRead(X,&x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(Uloc,&u);CHKERRQ(ierr); ierr = DMRestoreLocalVector(dm,&Uloc);CHKERRQ(ierr); ierr = MPI_Allreduce(MPI_IN_PLACE,(void*)&sumInfty,2,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)dm));CHKERRQ(ierr); ierr = MPI_Allreduce(MPI_IN_PLACE,(void*)&sum2,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)dm));CHKERRQ(ierr); *normInfty = sumInfty.error/sumInfty.u; *normL2 = PetscSqrtReal(sum2.error)/PetscSqrtReal(sum2.u); ierr = PetscLogEventEnd(OP_IntegrateNorms,dm,U,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode SNESLineSearchApply_NLEQERR(SNESLineSearch linesearch) { PetscBool changed_y,changed_w; PetscErrorCode ierr; Vec X,F,Y,W,G; SNES snes; PetscReal fnorm, xnorm, ynorm, gnorm, wnorm; PetscReal lambda, minlambda, stol; PetscViewer monitor; PetscInt max_its, count, snes_iteration; PetscReal theta, mudash, lambdadash; SNESLineSearch_NLEQERR *nleqerr = (SNESLineSearch_NLEQERR*)linesearch->data; KSPConvergedReason kspreason; PetscFunctionBegin; ierr = PetscCitationsRegister(NLEQERR_citation, &NLEQERR_cited);CHKERRQ(ierr); ierr = SNESLineSearchGetVecs(linesearch, &X, &F, &Y, &W, &G);CHKERRQ(ierr); ierr = SNESLineSearchGetNorms(linesearch, &xnorm, &fnorm, &ynorm);CHKERRQ(ierr); ierr = SNESLineSearchGetLambda(linesearch, &lambda);CHKERRQ(ierr); ierr = SNESLineSearchGetSNES(linesearch, &snes);CHKERRQ(ierr); ierr = SNESLineSearchGetDefaultMonitor(linesearch, &monitor);CHKERRQ(ierr); ierr = SNESLineSearchGetTolerances(linesearch,&minlambda,NULL,NULL,NULL,NULL,&max_its);CHKERRQ(ierr); ierr = SNESGetTolerances(snes,NULL,NULL,&stol,NULL,NULL);CHKERRQ(ierr); /* reset the state of the Lipschitz estimates */ ierr = SNESGetIterationNumber(snes, &snes_iteration);CHKERRQ(ierr); if (!snes_iteration) { ierr = SNESLineSearchReset_NLEQERR(linesearch);CHKERRQ(ierr); } /* precheck */ ierr = SNESLineSearchPreCheck(linesearch,X,Y,&changed_y);CHKERRQ(ierr); ierr = SNESLineSearchSetReason(linesearch, SNES_LINESEARCH_SUCCEEDED);CHKERRQ(ierr); ierr = VecNormBegin(Y, NORM_2, &ynorm);CHKERRQ(ierr); ierr = VecNormBegin(X, NORM_2, &xnorm);CHKERRQ(ierr); ierr = VecNormEnd(Y, NORM_2, &ynorm);CHKERRQ(ierr); ierr = VecNormEnd(X, NORM_2, &xnorm);CHKERRQ(ierr); /* Note: Y is *minus* the Newton step. For whatever reason PETSc doesn't solve with the minus on the RHS. */ if (ynorm == 0.0) { if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search: Initial direction and size is 0\n");CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } ierr = VecCopy(X,W);CHKERRQ(ierr); ierr = VecCopy(F,G);CHKERRQ(ierr); ierr = SNESLineSearchSetNorms(linesearch,xnorm,fnorm,ynorm);CHKERRQ(ierr); ierr = SNESLineSearchSetReason(linesearch, SNES_LINESEARCH_FAILED_REDUCT);CHKERRQ(ierr); PetscFunctionReturn(0); } /* At this point, we've solved the Newton system for delta_x, and we assume that its norm is greater than the solution tolerance (otherwise we wouldn't be in here). So let's go ahead and estimate the Lipschitz constant. W contains bar_delta_x_prev at this point. */ if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search: norm of Newton step: %14.12e\n", (double) ynorm);CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } /* this needs information from a previous iteration, so can't do it on the first one */ if (nleqerr->norm_delta_x_prev > 0 && nleqerr->norm_bar_delta_x_prev > 0) { ierr = VecWAXPY(G, +1.0, Y, W);CHKERRQ(ierr); /* bar_delta_x - delta_x; +1 because Y is -delta_x */ ierr = VecNormBegin(G, NORM_2, &gnorm);CHKERRQ(ierr); ierr = VecNormEnd(G, NORM_2, &gnorm);CHKERRQ(ierr); nleqerr->mu_curr = nleqerr->lambda_prev * (nleqerr->norm_delta_x_prev * nleqerr->norm_bar_delta_x_prev) / (gnorm * ynorm); lambda = PetscMin(1.0, nleqerr->mu_curr); if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search: Lipschitz estimate: %14.12e; lambda: %14.12e\n", (double) nleqerr->mu_curr, (double) lambda);CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } } else { lambda = linesearch->damping; } /* The main while loop of the algorithm. At the end of this while loop, G should have the accepted new X in it. */ count = 0; while (PETSC_TRUE) { if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search: entering iteration with lambda: %14.12e\n", lambda);CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } /* Check that we haven't performed too many iterations */ count += 1; if (count >= max_its) { if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search: maximum iterations reached\n");CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } ierr = SNESLineSearchSetReason(linesearch, SNES_LINESEARCH_FAILED_REDUCT);CHKERRQ(ierr); PetscFunctionReturn(0); } /* Now comes the Regularity Test. */ if (lambda <= minlambda) { /* This isn't what is suggested by Deuflhard, but it works better in my experience */ if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search: lambda has reached lambdamin, taking full Newton step\n");CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } lambda = 1.0; ierr = VecWAXPY(G, -lambda, Y, X);CHKERRQ(ierr); /* and clean up the state for next time */ ierr = SNESLineSearchReset_NLEQERR(linesearch);CHKERRQ(ierr); /* The clang static analyzer detected a problem here; once the loop is broken the values nleqerr->norm_delta_x_prev = ynorm; nleqerr->norm_bar_delta_x_prev = wnorm; are set, but wnorm has not even been computed. I don't know if this is the correct fix but by setting ynorm and wnorm to -1.0 at least the linesearch object is kept in the state set by the SNESLineSearchReset_NLEQERR() call above */ ynorm = wnorm = -1.0; break; } /* Compute new trial iterate */ ierr = VecWAXPY(W, -lambda, Y, X);CHKERRQ(ierr); ierr = SNESComputeFunction(snes, W, G);CHKERRQ(ierr); /* Solve linear system for bar_delta_x_curr: old Jacobian, new RHS. Note absence of minus sign, compared to Deuflhard, in keeping with PETSc convention */ ierr = KSPSolve(snes->ksp, G, W);CHKERRQ(ierr); ierr = KSPGetConvergedReason(snes->ksp, &kspreason);CHKERRQ(ierr); if (kspreason < 0) { ierr = PetscInfo(snes,"Solution for \\bar{delta x}^{k+1} failed.");CHKERRQ(ierr); } /* W now contains -bar_delta_x_curr. */ ierr = VecNorm(W, NORM_2, &wnorm);CHKERRQ(ierr); if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search: norm of simplified Newton update: %14.12e\n", (double) wnorm);CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } /* compute the monitoring quantities theta and mudash. */ theta = wnorm / ynorm; ierr = VecWAXPY(G, -(1.0 - lambda), Y, W);CHKERRQ(ierr); ierr = VecNorm(G, NORM_2, &gnorm);CHKERRQ(ierr); mudash = (0.5 * ynorm * lambda * lambda) / gnorm; /* Check for termination of the linesearch */ if (theta >= 1.0) { /* need to go around again with smaller lambda */ if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search: monotonicity check failed, ratio: %14.12e\n", (double) theta);CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } lambda = PetscMin(mudash, 0.5 * lambda); lambda = PetscMax(lambda, minlambda); /* continue through the loop, i.e. go back to regularity test */ } else { /* linesearch terminated */ lambdadash = PetscMin(1.0, mudash); if (lambdadash == 1.0 && lambda == 1.0 && wnorm <= stol) { /* store the updated state, X - Y - W, in G: I need to keep W for the next linesearch */ ierr = VecCopy(X, G);CHKERRQ(ierr); ierr = VecAXPY(G, -1.0, Y);CHKERRQ(ierr); ierr = VecAXPY(G, -1.0, W);CHKERRQ(ierr); break; } /* Deuflhard suggests to add the following: else if (lambdadash >= 4.0 * lambda) { lambda = lambdadash; } to continue through the loop, i.e. go back to regularity test. I deliberately exclude this, as I have practical experience of this getting stuck in infinite loops (on e.g. an Allen--Cahn problem). */ else { /* accept iterate without adding on, i.e. don't use bar_delta_x; again, I need to keep W for the next linesearch */ ierr = VecWAXPY(G, -lambda, Y, X);CHKERRQ(ierr); break; } } } if (linesearch->ops->viproject) { ierr = (*linesearch->ops->viproject)(snes, G);CHKERRQ(ierr); } /* W currently contains -bar_delta_u. Scale it so that it contains bar_delta_u. */ ierr = VecScale(W, -1.0);CHKERRQ(ierr); /* postcheck */ ierr = SNESLineSearchPostCheck(linesearch,X,Y,G,&changed_y,&changed_w);CHKERRQ(ierr); if (changed_y || changed_w) { ierr = SNESLineSearchSetReason(linesearch, SNES_LINESEARCH_FAILED_USER);CHKERRQ(ierr); ierr = PetscInfo(snes,"Changing the search direction here doesn't make sense.\n");CHKERRQ(ierr); PetscFunctionReturn(0); } /* copy the solution and information from this iteration over */ nleqerr->norm_delta_x_prev = ynorm; nleqerr->norm_bar_delta_x_prev = wnorm; nleqerr->lambda_prev = lambda; ierr = VecCopy(G, X);CHKERRQ(ierr); ierr = SNESComputeFunction(snes, X, F);CHKERRQ(ierr); ierr = VecNorm(X, NORM_2, &xnorm);CHKERRQ(ierr); ierr = VecNorm(F, NORM_2, &fnorm);CHKERRQ(ierr); ierr = SNESLineSearchSetLambda(linesearch, lambda);CHKERRQ(ierr); ierr = SNESLineSearchSetNorms(linesearch, xnorm, fnorm, (ynorm < 0 ? PETSC_INFINITY : ynorm));CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode PCBDDCGraphComputeConnectedComponents(PCBDDCGraph graph) { PetscBool adapt_interface_reduced; MPI_Comm interface_comm; PetscMPIInt size; PetscInt i; PetscErrorCode ierr; PetscFunctionBegin; /* compute connected components locally */ ierr = PetscObjectGetComm((PetscObject)(graph->l2gmap),&interface_comm);CHKERRQ(ierr); ierr = PCBDDCGraphComputeConnectedComponentsLocal(graph);CHKERRQ(ierr); /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ ierr = MPI_Comm_size(interface_comm,&size);CHKERRQ(ierr); adapt_interface_reduced = PETSC_FALSE; if (size > 1) { PetscInt i; PetscBool adapt_interface = PETSC_FALSE; for (i=0;i<graph->n_subsets;i++) { /* We are not sure that on a given subset of the local interface, with two connected components, the latters be the same among sharing subdomains */ if (graph->subset_ncc[i] > 1) { adapt_interface = PETSC_TRUE; break; } } ierr = MPIU_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_BOOL,MPI_LOR,interface_comm);CHKERRQ(ierr); } if (graph->n_subsets && adapt_interface_reduced) { PetscBT subset_cc_adapt; MPI_Request *send_requests,*recv_requests; PetscInt *send_buffer,*recv_buffer; PetscInt sum_requests,start_of_recv,start_of_send; PetscInt *cum_recv_counts; PetscInt *labels; PetscInt ncc,cum_queue,mss,mns,j,k,s; PetscInt **refine_buffer=NULL,*private_labels = NULL; ierr = PetscMalloc1(graph->nvtxs,&labels);CHKERRQ(ierr); ierr = PetscMemzero(labels,graph->nvtxs*sizeof(*labels));CHKERRQ(ierr); for (i=0;i<graph->ncc;i++) for (j=graph->cptr[i];j<graph->cptr[i+1];j++) labels[graph->queue[j]] = i; /* allocate some space */ ierr = PetscMalloc1(graph->n_subsets+1,&cum_recv_counts);CHKERRQ(ierr); ierr = PetscMemzero(cum_recv_counts,(graph->n_subsets+1)*sizeof(*cum_recv_counts));CHKERRQ(ierr); /* first count how many neighbours per connected component I will receive from */ cum_recv_counts[0] = 0; for (i=0;i<graph->n_subsets;i++) cum_recv_counts[i+1] = cum_recv_counts[i]+graph->count[graph->subset_idxs[i][0]]; ierr = PetscMalloc1(cum_recv_counts[graph->n_subsets],&recv_buffer);CHKERRQ(ierr); ierr = PetscMalloc2(cum_recv_counts[graph->n_subsets],&send_requests,cum_recv_counts[graph->n_subsets],&recv_requests);CHKERRQ(ierr); for (i=0;i<cum_recv_counts[graph->n_subsets];i++) { send_requests[i] = MPI_REQUEST_NULL; recv_requests[i] = MPI_REQUEST_NULL; } /* exchange with my neighbours the number of my connected components on the subset of interface */ sum_requests = 0; for (i=0;i<graph->n_subsets;i++) { PetscMPIInt neigh,tag; PetscInt count,*neighs; count = graph->count[graph->subset_idxs[i][0]]; neighs = graph->neighbours_set[graph->subset_idxs[i][0]]; ierr = PetscMPIIntCast(2*graph->subset_ref_node[i],&tag);CHKERRQ(ierr); for (k=0;k<count;k++) { ierr = PetscMPIIntCast(neighs[k],&neigh);CHKERRQ(ierr); ierr = MPI_Isend(&graph->subset_ncc[i],1,MPIU_INT,neigh,tag,interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); ierr = MPI_Irecv(&recv_buffer[sum_requests],1,MPIU_INT,neigh,tag,interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); sum_requests++; } } ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); /* determine the subsets I have to adapt (those having more than 1 cc) */ ierr = PetscBTCreate(graph->n_subsets,&subset_cc_adapt);CHKERRQ(ierr); ierr = PetscBTMemzero(graph->n_subsets,subset_cc_adapt);CHKERRQ(ierr); for (i=0;i<graph->n_subsets;i++) { if (graph->subset_ncc[i] > 1) { ierr = PetscBTSet(subset_cc_adapt,i);CHKERRQ(ierr); continue; } for (j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ if (recv_buffer[j] > 1) { ierr = PetscBTSet(subset_cc_adapt,i);CHKERRQ(ierr); break; } } } ierr = PetscFree(recv_buffer);CHKERRQ(ierr); /* determine send/recv buffers sizes */ j = 0; mss = 0; for (i=0;i<graph->n_subsets;i++) { if (PetscBTLookup(subset_cc_adapt,i)) { j += graph->subset_size[i]; mss = PetscMax(graph->subset_size[i],mss); } } k = 0; mns = 0; for (i=0;i<graph->n_subsets;i++) { if (PetscBTLookup(subset_cc_adapt,i)) { k += (cum_recv_counts[i+1]-cum_recv_counts[i])*graph->subset_size[i]; mns = PetscMax(cum_recv_counts[i+1]-cum_recv_counts[i],mns); } } ierr = PetscMalloc2(j,&send_buffer,k,&recv_buffer);CHKERRQ(ierr); /* fill send buffer (order matters: subset_idxs ordered by global ordering) */ j = 0; for (i=0;i<graph->n_subsets;i++) if (PetscBTLookup(subset_cc_adapt,i)) for (k=0;k<graph->subset_size[i];k++) send_buffer[j++] = labels[graph->subset_idxs[i][k]]; /* now exchange the data */ start_of_recv = 0; start_of_send = 0; sum_requests = 0; for (i=0;i<graph->n_subsets;i++) { if (PetscBTLookup(subset_cc_adapt,i)) { PetscMPIInt neigh,tag; PetscInt size_of_send = graph->subset_size[i]; j = graph->subset_idxs[i][0]; ierr = PetscMPIIntCast(2*graph->subset_ref_node[i]+1,&tag);CHKERRQ(ierr); for (k=0;k<graph->count[j];k++) { ierr = PetscMPIIntCast(graph->neighbours_set[j][k],&neigh);CHKERRQ(ierr); ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,neigh,tag,interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_send,MPIU_INT,neigh,tag,interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); start_of_recv += size_of_send; sum_requests++; } start_of_send += size_of_send; } } ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); /* refine connected components */ start_of_recv = 0; /* allocate some temporary space */ if (mss) { ierr = PetscMalloc1(mss,&refine_buffer);CHKERRQ(ierr); ierr = PetscMalloc2(mss*(mns+1),&refine_buffer[0],mss,&private_labels);CHKERRQ(ierr); } ncc = 0; cum_queue = 0; graph->cptr[0] = 0; for (i=0;i<graph->n_subsets;i++) { if (PetscBTLookup(subset_cc_adapt,i)) { PetscInt subset_counter = 0; PetscInt sharingprocs = cum_recv_counts[i+1]-cum_recv_counts[i]+1; /* count myself */ PetscInt buffer_size = graph->subset_size[i]; /* compute pointers */ for (j=1;j<buffer_size;j++) refine_buffer[j] = refine_buffer[j-1] + sharingprocs; /* analyze contributions from subdomains that share the i-th subset The stricture of refine_buffer is suitable to find intersections of ccs among sharingprocs. supposing the current subset is shared by 3 processes and has dimension 5 with global dofs 0,1,2,3,4 (local 0,4,3,1,2) sharing procs connected components: neigh 0: [0 1 4], [2 3], labels [4,7] (2 connected components) neigh 1: [0 1], [2 3 4], labels [3 2] (2 connected components) neigh 2: [0 4], [1], [2 3], labels [1 5 6] (3 connected components) refine_buffer will be filled as: [ 4, 3, 1; 4, 2, 1; 7, 2, 6; 4, 3, 5; 7, 2, 6; ]; The connected components in local ordering are [0], [1], [2 3], [4] */ /* fill temp_buffer */ for (k=0;k<buffer_size;k++) refine_buffer[k][0] = labels[graph->subset_idxs[i][k]]; for (j=0;j<sharingprocs-1;j++) { for (k=0;k<buffer_size;k++) refine_buffer[k][j+1] = recv_buffer[start_of_recv+k]; start_of_recv += buffer_size; } ierr = PetscMemzero(private_labels,buffer_size*sizeof(PetscInt));CHKERRQ(ierr); for (j=0;j<buffer_size;j++) { if (!private_labels[j]) { /* found a new cc */ PetscBool same_set; graph->cptr[ncc] = cum_queue; ncc++; subset_counter++; private_labels[j] = subset_counter; graph->queue[cum_queue++] = graph->subset_idxs[i][j]; for (k=j+1;k<buffer_size;k++) { /* check for other nodes in new cc */ same_set = PETSC_TRUE; for (s=0;s<sharingprocs;s++) { if (refine_buffer[j][s] != refine_buffer[k][s]) { same_set = PETSC_FALSE; break; } } if (same_set) { private_labels[k] = subset_counter; graph->queue[cum_queue++] = graph->subset_idxs[i][k]; } } } } graph->cptr[ncc] = cum_queue; graph->subset_ncc[i] = subset_counter; graph->queue_sorted = PETSC_FALSE; } else { /* this subset does not need to be adapted */ ierr = PetscMemcpy(graph->queue+cum_queue,graph->subset_idxs[i],graph->subset_size[i]*sizeof(PetscInt));CHKERRQ(ierr); ncc++; cum_queue += graph->subset_size[i]; graph->cptr[ncc] = cum_queue; } } graph->cptr[ncc] = cum_queue; graph->ncc = ncc; if (mss) { ierr = PetscFree2(refine_buffer[0],private_labels);CHKERRQ(ierr); ierr = PetscFree(refine_buffer);CHKERRQ(ierr); } ierr = PetscFree(labels);CHKERRQ(ierr); ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); ierr = PetscFree2(send_requests,recv_requests);CHKERRQ(ierr); ierr = PetscFree2(send_buffer,recv_buffer);CHKERRQ(ierr); ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); ierr = PetscBTDestroy(&subset_cc_adapt);CHKERRQ(ierr); } /* Determine if we are in 2D or 3D */ if (!graph->twodimset) { PetscBool twodim = PETSC_TRUE; for (i=0;i<graph->ncc;i++) { PetscInt repdof = graph->queue[graph->cptr[i]]; PetscInt ccsize = graph->cptr[i+1]-graph->cptr[i]; if (graph->count[repdof] > 1 && ccsize > graph->custom_minimal_size) { twodim = PETSC_FALSE; break; } } ierr = MPIU_Allreduce(&twodim,&graph->twodim,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)graph->l2gmap));CHKERRQ(ierr); graph->twodimset = PETSC_TRUE; } PetscFunctionReturn(0); }
PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) { Mat B; Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscErrorCode ierr; PetscInt bs = A->rmap->bs,*ai = a->i,*aj = a->j,n = A->rmap->N/bs,i,j,k; PetscInt *rowlengths,*rows,*cols,maxlen = 0,ncols; MatScalar *aa = a->a; PetscFunctionBegin; ierr = PetscMalloc1(n*bs,&rowlengths); CHKERRQ(ierr); for (i=0; i<n; i++) { maxlen = PetscMax(maxlen,(ai[i+1] - ai[i])); for (j=0; j<bs; j++) { rowlengths[i*bs+j] = bs*(ai[i+1] - ai[i]); } } ierr = MatCreate(PetscObjectComm((PetscObject)A),&B); CHKERRQ(ierr); ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N); CHKERRQ(ierr); ierr = MatSetType(B,MATSEQAIJ); CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(B,0,rowlengths); CHKERRQ(ierr); ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_FALSE); CHKERRQ(ierr); ierr = PetscFree(rowlengths); CHKERRQ(ierr); ierr = PetscMalloc1(bs,&rows); CHKERRQ(ierr); ierr = PetscMalloc1(bs*maxlen,&cols); CHKERRQ(ierr); for (i=0; i<n; i++) { for (j=0; j<bs; j++) { rows[j] = i*bs+j; } ncols = ai[i+1] - ai[i]; for (k=0; k<ncols; k++) { for (j=0; j<bs; j++) { cols[k*bs+j] = bs*(*aj) + j; } aj++; } ierr = MatSetValues(B,bs,rows,bs*ncols,cols,aa,INSERT_VALUES); CHKERRQ(ierr); aa += ncols*bs*bs; } ierr = PetscFree(cols); CHKERRQ(ierr); ierr = PetscFree(rows); CHKERRQ(ierr); ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); B->rmap->bs = A->rmap->bs; if (reuse == MAT_REUSE_MATRIX) { ierr = MatHeaderReplace(A,B); CHKERRQ(ierr); } else { *newmat = B; } PetscFunctionReturn(0); }
static PetscErrorCode TestCellShape(DM dm) { PetscMPIInt rank; PetscInt dim, c, cStart, cEnd, count = 0; ex1_stats_t stats, globalStats; PetscReal *J, *invJ, min = 0, max = 0, mean = 0, stdev = 0; MPI_Comm comm = PetscObjectComm((PetscObject)dm); DM dmCoarse; PetscErrorCode ierr; PetscFunctionBegin; stats.min = PETSC_MAX_REAL; stats.max = PETSC_MIN_REAL; stats.sum = stats.squaresum = 0.; stats.count = 0; ierr = DMGetDimension(dm,&dim);CHKERRQ(ierr); ierr = PetscMalloc2(dim * dim, &J, dim * dim, &invJ);CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm,0,&cStart,&cEnd);CHKERRQ(ierr); for (c = cStart; c < cEnd; c++) { PetscInt i; PetscReal frobJ = 0., frobInvJ = 0., cond2, cond, detJ; ierr = DMPlexComputeCellGeometryAffineFEM(dm,c,NULL,J,invJ,&detJ);CHKERRQ(ierr); for (i = 0; i < dim * dim; i++) { frobJ += J[i] * J[i]; frobInvJ += invJ[i] * invJ[i]; } cond2 = frobJ * frobInvJ; cond = PetscSqrtReal(cond2); stats.min = PetscMin(stats.min,cond); stats.max = PetscMax(stats.max,cond); stats.sum += cond; stats.squaresum += cond2; stats.count++; } { PetscMPIInt blockLengths[2] = {4,1}; MPI_Aint blockOffsets[2] = {offsetof(ex1_stats_t,min),offsetof(ex1_stats_t,count)}; MPI_Datatype blockTypes[2] = {MPIU_REAL,MPIU_INT}, statType; MPI_Op statReduce; ierr = MPI_Type_create_struct(2,blockLengths,blockOffsets,blockTypes,&statType);CHKERRQ(ierr); ierr = MPI_Type_commit(&statType);CHKERRQ(ierr); ierr = MPI_Op_create(ex1_stats_reduce, PETSC_TRUE, &statReduce);CHKERRQ(ierr); ierr = MPI_Reduce(&stats,&globalStats,1,statType,statReduce,0,comm);CHKERRQ(ierr); ierr = MPI_Op_free(&statReduce);CHKERRQ(ierr); ierr = MPI_Type_free(&statType);CHKERRQ(ierr); } ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); if (!rank) { count = globalStats.count; min = globalStats.min; max = globalStats.max; mean = globalStats.sum / globalStats.count; stdev = PetscSqrtReal(globalStats.squaresum / globalStats.count - mean * mean); } ierr = PetscPrintf(comm,"Mesh with %d cells, shape condition numbers: min = %g, max = %g, mean = %g, stddev = %g\n", count, (double) min, (double) max, (double) mean, (double) stdev); ierr = PetscFree2(J,invJ);CHKERRQ(ierr); ierr = DMPlexGetCoarseDM(dm,&dmCoarse);CHKERRQ(ierr); if (dmCoarse) { ierr = TestCellShape(dmCoarse);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode EPSSetUp_XD(EPS eps) { PetscErrorCode ierr; EPS_DAVIDSON *data = (EPS_DAVIDSON*)eps->data; dvdDashboard *dvd = &data->ddb; dvdBlackboard b; PetscInt min_size_V,plusk,bs,initv,i,cX_in_proj,cX_in_impr,nmat; Mat A,B; KSP ksp; PetscBool t,ipB,ispositive,dynamic; HarmType_t harm; InitType_t init; PetscReal fix; PetscScalar target; PetscFunctionBegin; /* Setup EPS options and get the problem specification */ ierr = EPSXDGetBlockSize_XD(eps,&bs);CHKERRQ(ierr); if (bs <= 0) bs = 1; if (eps->ncv) { if (eps->ncv<eps->nev) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"The value of ncv must be at least nev"); } else if (eps->mpd) eps->ncv = eps->mpd + eps->nev + bs; else if (eps->nev<500) eps->ncv = PetscMin(eps->n-bs,PetscMax(2*eps->nev,eps->nev+15))+bs; else eps->ncv = PetscMin(eps->n-bs,eps->nev+500)+bs; if (!eps->mpd) eps->mpd = eps->ncv; if (eps->mpd > eps->ncv) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"The mpd has to be less or equal than ncv"); if (eps->mpd < 2) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"The mpd has to be greater than 2"); if (!eps->max_it) eps->max_it = PetscMax(100*eps->ncv,2*eps->n); if (!eps->which) eps->which = EPS_LARGEST_MAGNITUDE; if (eps->ishermitian && (eps->which==EPS_LARGEST_IMAGINARY || eps->which==EPS_SMALLEST_IMAGINARY)) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Wrong value of eps->which"); if (!(eps->nev + bs <= eps->ncv)) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"The ncv has to be greater than nev plus blocksize"); if (eps->trueres) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"-eps_true_residual is temporally disable in this solver."); ierr = EPSXDGetRestart_XD(eps,&min_size_V,&plusk);CHKERRQ(ierr); if (!min_size_V) min_size_V = PetscMin(PetscMax(bs,5),eps->mpd/2); if (!(min_size_V+bs <= eps->mpd)) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"The value of minv must be less than mpd minus blocksize"); ierr = EPSXDGetInitialSize_XD(eps,&initv);CHKERRQ(ierr); if (eps->mpd < initv) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"The initv has to be less or equal than mpd"); /* Set STPrecond as the default ST */ if (!((PetscObject)eps->st)->type_name) { ierr = STSetType(eps->st,STPRECOND);CHKERRQ(ierr); } ierr = STPrecondSetKSPHasMat(eps->st,PETSC_FALSE);CHKERRQ(ierr); /* Change the default sigma to inf if necessary */ if (eps->which == EPS_LARGEST_MAGNITUDE || eps->which == EPS_LARGEST_REAL || eps->which == EPS_LARGEST_IMAGINARY) { ierr = STSetDefaultShift(eps->st,PETSC_MAX_REAL);CHKERRQ(ierr); } /* Davidson solvers only support STPRECOND */ ierr = STSetUp(eps->st);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)eps->st,STPRECOND,&t);CHKERRQ(ierr); if (!t) SETERRQ1(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"%s only works with precond spectral transformation", ((PetscObject)eps)->type_name); /* Setup problem specification in dvd */ ierr = STGetNumMatrices(eps->st,&nmat);CHKERRQ(ierr); ierr = STGetOperators(eps->st,0,&A);CHKERRQ(ierr); if (nmat>1) { ierr = STGetOperators(eps->st,1,&B);CHKERRQ(ierr); } ierr = EPSReset_XD(eps);CHKERRQ(ierr); ierr = PetscMemzero(dvd,sizeof(dvdDashboard));CHKERRQ(ierr); dvd->A = A; dvd->B = eps->isgeneralized? B : NULL; ispositive = eps->ispositive; dvd->sA = DVD_MAT_IMPLICIT | (eps->ishermitian? DVD_MAT_HERMITIAN : 0) | ((ispositive && !eps->isgeneralized) ? DVD_MAT_POS_DEF : 0); /* Asume -eps_hermitian means hermitian-definite in generalized problems */ if (!ispositive && !eps->isgeneralized && eps->ishermitian) ispositive = PETSC_TRUE; if (!eps->isgeneralized) dvd->sB = DVD_MAT_IMPLICIT | DVD_MAT_HERMITIAN | DVD_MAT_IDENTITY | DVD_MAT_UNITARY | DVD_MAT_POS_DEF; else dvd->sB = DVD_MAT_IMPLICIT | (eps->ishermitian? DVD_MAT_HERMITIAN : 0) | (ispositive? DVD_MAT_POS_DEF : 0); ipB = (dvd->B && data->ipB && DVD_IS(dvd->sB,DVD_MAT_HERMITIAN))?PETSC_TRUE:PETSC_FALSE; if (data->ipB && !ipB) data->ipB = PETSC_FALSE; dvd->correctXnorm = ipB; dvd->sEP = ((!eps->isgeneralized || (eps->isgeneralized && ipB))? DVD_EP_STD : 0) | (ispositive? DVD_EP_HERMITIAN : 0) | ((eps->problem_type == EPS_GHIEP && ipB) ? DVD_EP_INDEFINITE : 0); dvd->nev = eps->nev; dvd->which = eps->which; dvd->withTarget = PETSC_TRUE; switch (eps->which) { case EPS_TARGET_MAGNITUDE: case EPS_TARGET_IMAGINARY: dvd->target[0] = target = eps->target; dvd->target[1] = 1.0; break; case EPS_TARGET_REAL: dvd->target[0] = PetscRealPart(target = eps->target); dvd->target[1] = 1.0; break; case EPS_LARGEST_REAL: case EPS_LARGEST_MAGNITUDE: case EPS_LARGEST_IMAGINARY: /* TODO: think about this case */ dvd->target[0] = 1.0; dvd->target[1] = target = 0.0; break; case EPS_SMALLEST_MAGNITUDE: case EPS_SMALLEST_REAL: case EPS_SMALLEST_IMAGINARY: /* TODO: think about this case */ dvd->target[0] = target = 0.0; dvd->target[1] = 1.0; break; case EPS_WHICH_USER: ierr = STGetShift(eps->st,&target);CHKERRQ(ierr); dvd->target[0] = target; dvd->target[1] = 1.0; break; case EPS_ALL: SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Unsupported option: which == EPS_ALL"); break; default: SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Unsupported value of option 'which'"); } dvd->tol = eps->tol==PETSC_DEFAULT?SLEPC_DEFAULT_TOL:eps->tol; dvd->eps = eps; /* Setup the extraction technique */ if (!eps->extraction) { if (ipB || ispositive) eps->extraction = EPS_RITZ; else { switch (eps->which) { case EPS_TARGET_REAL: case EPS_TARGET_MAGNITUDE: case EPS_TARGET_IMAGINARY: case EPS_SMALLEST_MAGNITUDE: case EPS_SMALLEST_REAL: case EPS_SMALLEST_IMAGINARY: eps->extraction = EPS_HARMONIC; break; case EPS_LARGEST_REAL: case EPS_LARGEST_MAGNITUDE: case EPS_LARGEST_IMAGINARY: eps->extraction = EPS_HARMONIC_LARGEST; break; default: eps->extraction = EPS_RITZ; } } } switch (eps->extraction) { case EPS_RITZ: harm = DVD_HARM_NONE; break; case EPS_HARMONIC: harm = DVD_HARM_RR; break; case EPS_HARMONIC_RELATIVE: harm = DVD_HARM_RRR; break; case EPS_HARMONIC_RIGHT: harm = DVD_HARM_REIGS; break; case EPS_HARMONIC_LARGEST: harm = DVD_HARM_LEIGS; break; default: SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Unsupported extraction type"); } /* Setup the type of starting subspace */ ierr = EPSXDGetKrylovStart_XD(eps,&t);CHKERRQ(ierr); init = (!t)? DVD_INITV_CLASSIC : DVD_INITV_KRYLOV; /* Setup the presence of converged vectors in the projected problem and in the projector */ ierr = EPSXDGetWindowSizes_XD(eps,&cX_in_impr,&cX_in_proj);CHKERRQ(ierr); if (cX_in_impr>0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"The option pwindow is temporally disable in this solver."); if (cX_in_proj>0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"The option qwindow is temporally disable in this solver."); if (min_size_V <= cX_in_proj) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"minv has to be greater than qwindow"); if (bs > 1 && cX_in_impr > 0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Unsupported option: pwindow > 0 and bs > 1"); /* Get the fix parameter */ ierr = EPSXDGetFix_XD(eps,&fix);CHKERRQ(ierr); /* Get whether the stopping criterion is used */ ierr = EPSJDGetConstCorrectionTol_JD(eps,&dynamic);CHKERRQ(ierr); /* Preconfigure dvd */ ierr = STGetKSP(eps->st,&ksp);CHKERRQ(ierr); ierr = dvd_schm_basic_preconf(dvd,&b,eps->mpd,min_size_V,bs, initv, PetscAbs(eps->nini), plusk,harm, ksp,init,eps->trackall, data->ipB,cX_in_proj,cX_in_impr, data->scheme);CHKERRQ(ierr); /* Allocate memory */ ierr = EPSAllocateSolution(eps,0);CHKERRQ(ierr); /* Setup orthogonalization */ ierr = EPS_SetInnerProduct(eps);CHKERRQ(ierr); if (!(ipB && dvd->B)) { ierr = BVSetMatrix(eps->V,NULL,PETSC_FALSE);CHKERRQ(ierr); } for (i=0;i<eps->ncv;i++) eps->perm[i] = i; /* Configure dvd for a basic GD */ ierr = dvd_schm_basic_conf(dvd,&b,eps->mpd,min_size_V,bs, initv, PetscAbs(eps->nini),plusk, harm,dvd->withTarget, target,ksp, fix,init,eps->trackall, data->ipB,cX_in_proj,cX_in_impr,dynamic, data->scheme);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@ DMPlexOrient - Give a consistent orientation to the input mesh Input Parameters: . dm - The DM Note: The orientation data for the DM are change in-place. $ This routine will fail for non-orientable surfaces, such as the Moebius strip. Level: advanced .seealso: DMCreate(), DMPLEX @*/ PetscErrorCode DMPlexOrient(DM dm) { MPI_Comm comm; PetscSF sf; const PetscInt *lpoints; const PetscSFNode *rpoints; PetscSFNode *rorntComp = NULL, *lorntComp = NULL; PetscInt *numNeighbors, **neighbors; PetscSFNode *nrankComp; PetscBool *match, *flipped; PetscBT seenCells, flippedCells, seenFaces; PetscInt *faceFIFO, fTop, fBottom, *cellComp, *faceComp; PetscInt numLeaves, numRoots, dim, h, cStart, cEnd, c, cell, fStart, fEnd, face, off, totNeighbors = 0; PetscMPIInt rank, size, numComponents, comp = 0; PetscBool flg, flg2; PetscViewer viewer = NULL, selfviewer = NULL; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject) dm, &comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = PetscOptionsHasName(((PetscObject) dm)->options,((PetscObject) dm)->prefix, "-orientation_view", &flg);CHKERRQ(ierr); ierr = PetscOptionsHasName(((PetscObject) dm)->options,((PetscObject) dm)->prefix, "-orientation_view_synchronized", &flg2);CHKERRQ(ierr); ierr = DMGetPointSF(dm, &sf);CHKERRQ(ierr); ierr = PetscSFGetGraph(sf, &numRoots, &numLeaves, &lpoints, &rpoints);CHKERRQ(ierr); /* Truth Table mismatch flips do action mismatch flipA ^ flipB action F 0 flips no F F F F 1 flip yes F T T F 2 flips no T F T T 0 flips yes T T F T 1 flip no T 2 flips yes */ ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr); ierr = DMPlexGetVTKCellHeight(dm, &h);CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm, h, &cStart, &cEnd);CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm, h+1, &fStart, &fEnd);CHKERRQ(ierr); ierr = PetscBTCreate(cEnd - cStart, &seenCells);CHKERRQ(ierr); ierr = PetscBTMemzero(cEnd - cStart, seenCells);CHKERRQ(ierr); ierr = PetscBTCreate(cEnd - cStart, &flippedCells);CHKERRQ(ierr); ierr = PetscBTMemzero(cEnd - cStart, flippedCells);CHKERRQ(ierr); ierr = PetscBTCreate(fEnd - fStart, &seenFaces);CHKERRQ(ierr); ierr = PetscBTMemzero(fEnd - fStart, seenFaces);CHKERRQ(ierr); ierr = PetscCalloc3(fEnd - fStart, &faceFIFO, cEnd-cStart, &cellComp, fEnd-fStart, &faceComp);CHKERRQ(ierr); /* OLD STYLE - Add an integer array over cells and faces (component) for connected component number Foreach component - Mark the initial cell as seen - Process component as usual - Set component for all seenCells - Wipe seenCells and seenFaces (flippedCells can stay) - Generate parallel adjacency for component using SF and seenFaces - Collect numComponents adj data from each proc to 0 - Build same serial graph - Use same solver - Use Scatterv to to send back flipped flags for each component - Negate flippedCells by component NEW STYLE - Create the adj on each process - Bootstrap to complete graph on proc 0 */ /* Loop over components */ for (cell = cStart; cell < cEnd; ++cell) cellComp[cell-cStart] = -1; do { /* Look for first unmarked cell */ for (cell = cStart; cell < cEnd; ++cell) if (cellComp[cell-cStart] < 0) break; if (cell >= cEnd) break; /* Initialize FIFO with first cell in component */ { const PetscInt *cone; PetscInt coneSize; fTop = fBottom = 0; ierr = DMPlexGetConeSize(dm, cell, &coneSize);CHKERRQ(ierr); ierr = DMPlexGetCone(dm, cell, &cone);CHKERRQ(ierr); for (c = 0; c < coneSize; ++c) { faceFIFO[fBottom++] = cone[c]; ierr = PetscBTSet(seenFaces, cone[c]-fStart);CHKERRQ(ierr); } ierr = PetscBTSet(seenCells, cell-cStart);CHKERRQ(ierr); } /* Consider each face in FIFO */ while (fTop < fBottom) { ierr = DMPlexCheckFace_Internal(dm, faceFIFO, &fTop, &fBottom, cStart, fStart, fEnd, seenCells, flippedCells, seenFaces);CHKERRQ(ierr); } /* Set component for cells and faces */ for (cell = 0; cell < cEnd-cStart; ++cell) { if (PetscBTLookup(seenCells, cell)) cellComp[cell] = comp; } for (face = 0; face < fEnd-fStart; ++face) { if (PetscBTLookup(seenFaces, face)) faceComp[face] = comp; } /* Wipe seenCells and seenFaces for next component */ ierr = PetscBTMemzero(fEnd - fStart, seenFaces);CHKERRQ(ierr); ierr = PetscBTMemzero(cEnd - cStart, seenCells);CHKERRQ(ierr); ++comp; } while (1); numComponents = comp; if (flg) { PetscViewer v; ierr = PetscViewerASCIIGetStdout(comm, &v);CHKERRQ(ierr); ierr = PetscViewerASCIIPushSynchronized(v);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(v, "[%d]BT for serial flipped cells:\n", rank);CHKERRQ(ierr); ierr = PetscBTView(cEnd-cStart, flippedCells, v);CHKERRQ(ierr); ierr = PetscViewerFlush(v);CHKERRQ(ierr); ierr = PetscViewerASCIIPopSynchronized(v);CHKERRQ(ierr); } /* Now all subdomains are oriented, but we need a consistent parallel orientation */ if (numLeaves >= 0) { /* Store orientations of boundary faces*/ ierr = PetscCalloc2(numRoots,&rorntComp,numRoots,&lorntComp);CHKERRQ(ierr); for (face = fStart; face < fEnd; ++face) { const PetscInt *cone, *support, *ornt; PetscInt coneSize, supportSize; ierr = DMPlexGetSupportSize(dm, face, &supportSize);CHKERRQ(ierr); if (supportSize != 1) continue; ierr = DMPlexGetSupport(dm, face, &support);CHKERRQ(ierr); ierr = DMPlexGetCone(dm, support[0], &cone);CHKERRQ(ierr); ierr = DMPlexGetConeSize(dm, support[0], &coneSize);CHKERRQ(ierr); ierr = DMPlexGetConeOrientation(dm, support[0], &ornt);CHKERRQ(ierr); for (c = 0; c < coneSize; ++c) if (cone[c] == face) break; if (dim == 1) { /* Use cone position instead, shifted to -1 or 1 */ if (PetscBTLookup(flippedCells, support[0]-cStart)) rorntComp[face].rank = 1-c*2; else rorntComp[face].rank = c*2-1; } else { if (PetscBTLookup(flippedCells, support[0]-cStart)) rorntComp[face].rank = ornt[c] < 0 ? -1 : 1; else rorntComp[face].rank = ornt[c] < 0 ? 1 : -1; } rorntComp[face].index = faceComp[face-fStart]; } /* Communicate boundary edge orientations */ ierr = PetscSFBcastBegin(sf, MPIU_2INT, rorntComp, lorntComp);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf, MPIU_2INT, rorntComp, lorntComp);CHKERRQ(ierr); } /* Get process adjacency */ ierr = PetscMalloc2(numComponents, &numNeighbors, numComponents, &neighbors);CHKERRQ(ierr); viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)dm)); if (flg2) {ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);} ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&selfviewer);CHKERRQ(ierr); for (comp = 0; comp < numComponents; ++comp) { PetscInt l, n; numNeighbors[comp] = 0; ierr = PetscMalloc1(PetscMax(numLeaves, 0), &neighbors[comp]);CHKERRQ(ierr); /* I know this is p^2 time in general, but for bounded degree its alright */ for (l = 0; l < numLeaves; ++l) { const PetscInt face = lpoints[l]; /* Find a representative face (edge) separating pairs of procs */ if ((face >= fStart) && (face < fEnd) && (faceComp[face-fStart] == comp)) { const PetscInt rrank = rpoints[l].rank; const PetscInt rcomp = lorntComp[face].index; for (n = 0; n < numNeighbors[comp]; ++n) if ((rrank == rpoints[neighbors[comp][n]].rank) && (rcomp == lorntComp[lpoints[neighbors[comp][n]]].index)) break; if (n >= numNeighbors[comp]) { PetscInt supportSize; ierr = DMPlexGetSupportSize(dm, face, &supportSize);CHKERRQ(ierr); if (supportSize != 1) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Boundary faces should see one cell, not %d", supportSize); if (flg) {ierr = PetscViewerASCIIPrintf(selfviewer, "[%d]: component %d, Found representative leaf %d (face %d) connecting to face %d on (%d, %d) with orientation %d\n", rank, comp, l, face, rpoints[l].index, rrank, rcomp, lorntComp[face].rank);CHKERRQ(ierr);} neighbors[comp][numNeighbors[comp]++] = l; } } } totNeighbors += numNeighbors[comp]; } ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&selfviewer);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); if (flg2) {ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);} ierr = PetscMalloc2(totNeighbors, &nrankComp, totNeighbors, &match);CHKERRQ(ierr); for (comp = 0, off = 0; comp < numComponents; ++comp) { PetscInt n; for (n = 0; n < numNeighbors[comp]; ++n, ++off) { const PetscInt face = lpoints[neighbors[comp][n]]; const PetscInt o = rorntComp[face].rank*lorntComp[face].rank; if (o < 0) match[off] = PETSC_TRUE; else if (o > 0) match[off] = PETSC_FALSE; else SETERRQ5(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid face %d (%d, %d) neighbor: %d comp: %d", face, rorntComp[face], lorntComp[face], neighbors[comp][n], comp); nrankComp[off].rank = rpoints[neighbors[comp][n]].rank; nrankComp[off].index = lorntComp[lpoints[neighbors[comp][n]]].index; } ierr = PetscFree(neighbors[comp]);CHKERRQ(ierr); } /* Collect the graph on 0 */ if (numLeaves >= 0) { Mat G; PetscBT seenProcs, flippedProcs; PetscInt *procFIFO, pTop, pBottom; PetscInt *N = NULL, *Noff; PetscSFNode *adj = NULL; PetscBool *val = NULL; PetscMPIInt *recvcounts = NULL, *displs = NULL, *Nc, p, o; PetscMPIInt size = 0; ierr = PetscCalloc1(numComponents, &flipped);CHKERRQ(ierr); if (!rank) {ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);} ierr = PetscCalloc4(size, &recvcounts, size+1, &displs, size, &Nc, size+1, &Noff);CHKERRQ(ierr); ierr = MPI_Gather(&numComponents, 1, MPI_INT, Nc, 1, MPI_INT, 0, comm);CHKERRQ(ierr); for (p = 0; p < size; ++p) { displs[p+1] = displs[p] + Nc[p]; } if (!rank) {ierr = PetscMalloc1(displs[size],&N);CHKERRQ(ierr);} ierr = MPI_Gatherv(numNeighbors, numComponents, MPIU_INT, N, Nc, displs, MPIU_INT, 0, comm);CHKERRQ(ierr); for (p = 0, o = 0; p < size; ++p) { recvcounts[p] = 0; for (c = 0; c < Nc[p]; ++c, ++o) recvcounts[p] += N[o]; displs[p+1] = displs[p] + recvcounts[p]; } if (!rank) {ierr = PetscMalloc2(displs[size], &adj, displs[size], &val);CHKERRQ(ierr);} ierr = MPI_Gatherv(nrankComp, totNeighbors, MPIU_2INT, adj, recvcounts, displs, MPIU_2INT, 0, comm);CHKERRQ(ierr); ierr = MPI_Gatherv(match, totNeighbors, MPIU_BOOL, val, recvcounts, displs, MPIU_BOOL, 0, comm);CHKERRQ(ierr); ierr = PetscFree2(numNeighbors, neighbors);CHKERRQ(ierr); if (!rank) { for (p = 1; p <= size; ++p) {Noff[p] = Noff[p-1] + Nc[p-1];} if (flg) { PetscInt n; for (p = 0, off = 0; p < size; ++p) { for (c = 0; c < Nc[p]; ++c) { ierr = PetscPrintf(PETSC_COMM_SELF, "Proc %d Comp %d:\n", p, c);CHKERRQ(ierr); for (n = 0; n < N[Noff[p]+c]; ++n, ++off) { ierr = PetscPrintf(PETSC_COMM_SELF, " edge (%d, %d) (%d):\n", adj[off].rank, adj[off].index, val[off]);CHKERRQ(ierr); } } } } /* Symmetrize the graph */ ierr = MatCreate(PETSC_COMM_SELF, &G);CHKERRQ(ierr); ierr = MatSetSizes(G, Noff[size], Noff[size], Noff[size], Noff[size]);CHKERRQ(ierr); ierr = MatSetUp(G);CHKERRQ(ierr); for (p = 0, off = 0; p < size; ++p) { for (c = 0; c < Nc[p]; ++c) { const PetscInt r = Noff[p]+c; PetscInt n; for (n = 0; n < N[r]; ++n, ++off) { const PetscInt q = Noff[adj[off].rank] + adj[off].index; const PetscScalar o = val[off] ? 1.0 : 0.0; ierr = MatSetValues(G, 1, &r, 1, &q, &o, INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(G, 1, &q, 1, &r, &o, INSERT_VALUES);CHKERRQ(ierr); } } } ierr = MatAssemblyBegin(G, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(G, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscBTCreate(Noff[size], &seenProcs);CHKERRQ(ierr); ierr = PetscBTMemzero(Noff[size], seenProcs);CHKERRQ(ierr); ierr = PetscBTCreate(Noff[size], &flippedProcs);CHKERRQ(ierr); ierr = PetscBTMemzero(Noff[size], flippedProcs);CHKERRQ(ierr); ierr = PetscMalloc1(Noff[size], &procFIFO);CHKERRQ(ierr); pTop = pBottom = 0; for (p = 0; p < Noff[size]; ++p) { if (PetscBTLookup(seenProcs, p)) continue; /* Initialize FIFO with next proc */ procFIFO[pBottom++] = p; ierr = PetscBTSet(seenProcs, p);CHKERRQ(ierr); /* Consider each proc in FIFO */ while (pTop < pBottom) { const PetscScalar *ornt; const PetscInt *neighbors; PetscInt proc, nproc, seen, flippedA, flippedB, mismatch, numNeighbors, n; proc = procFIFO[pTop++]; flippedA = PetscBTLookup(flippedProcs, proc) ? 1 : 0; ierr = MatGetRow(G, proc, &numNeighbors, &neighbors, &ornt);CHKERRQ(ierr); /* Loop over neighboring procs */ for (n = 0; n < numNeighbors; ++n) { nproc = neighbors[n]; mismatch = PetscRealPart(ornt[n]) > 0.5 ? 0 : 1; seen = PetscBTLookup(seenProcs, nproc); flippedB = PetscBTLookup(flippedProcs, nproc) ? 1 : 0; if (mismatch ^ (flippedA ^ flippedB)) { if (seen) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Previously seen procs %d and %d do not match: Fault mesh is non-orientable", proc, nproc); if (!flippedB) { ierr = PetscBTSet(flippedProcs, nproc);CHKERRQ(ierr); } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Inconsistent mesh orientation: Fault mesh is non-orientable"); } else if (mismatch && flippedA && flippedB) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Attempt to flip already flipped cell: Fault mesh is non-orientable"); if (!seen) { procFIFO[pBottom++] = nproc; ierr = PetscBTSet(seenProcs, nproc);CHKERRQ(ierr); } } } } ierr = PetscFree(procFIFO);CHKERRQ(ierr); ierr = MatDestroy(&G);CHKERRQ(ierr); ierr = PetscFree2(adj, val);CHKERRQ(ierr); ierr = PetscBTDestroy(&seenProcs);CHKERRQ(ierr); } /* Scatter flip flags */ { PetscBool *flips = NULL; if (!rank) { ierr = PetscMalloc1(Noff[size], &flips);CHKERRQ(ierr); for (p = 0; p < Noff[size]; ++p) { flips[p] = PetscBTLookup(flippedProcs, p) ? PETSC_TRUE : PETSC_FALSE; if (flg && flips[p]) {ierr = PetscPrintf(comm, "Flipping Proc+Comp %d:\n", p);CHKERRQ(ierr);} } for (p = 0; p < size; ++p) { displs[p+1] = displs[p] + Nc[p]; } } ierr = MPI_Scatterv(flips, Nc, displs, MPIU_BOOL, flipped, numComponents, MPIU_BOOL, 0, comm);CHKERRQ(ierr); ierr = PetscFree(flips);CHKERRQ(ierr); } if (!rank) {ierr = PetscBTDestroy(&flippedProcs);CHKERRQ(ierr);} ierr = PetscFree(N);CHKERRQ(ierr); ierr = PetscFree4(recvcounts, displs, Nc, Noff);CHKERRQ(ierr); ierr = PetscFree2(nrankComp, match);CHKERRQ(ierr); /* Decide whether to flip cells in each component */ for (c = 0; c < cEnd-cStart; ++c) {if (flipped[cellComp[c]]) {ierr = PetscBTNegate(flippedCells, c);CHKERRQ(ierr);}} ierr = PetscFree(flipped);CHKERRQ(ierr); } if (flg) { PetscViewer v; ierr = PetscViewerASCIIGetStdout(comm, &v);CHKERRQ(ierr); ierr = PetscViewerASCIIPushSynchronized(v);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(v, "[%d]BT for parallel flipped cells:\n", rank);CHKERRQ(ierr); ierr = PetscBTView(cEnd-cStart, flippedCells, v);CHKERRQ(ierr); ierr = PetscViewerFlush(v);CHKERRQ(ierr); ierr = PetscViewerASCIIPopSynchronized(v);CHKERRQ(ierr); } /* Reverse flipped cells in the mesh */ for (c = cStart; c < cEnd; ++c) { if (PetscBTLookup(flippedCells, c-cStart)) { ierr = DMPlexReverseCell(dm, c);CHKERRQ(ierr); } } ierr = PetscBTDestroy(&seenCells);CHKERRQ(ierr); ierr = PetscBTDestroy(&flippedCells);CHKERRQ(ierr); ierr = PetscBTDestroy(&seenFaces);CHKERRQ(ierr); ierr = PetscFree2(numNeighbors, neighbors);CHKERRQ(ierr); ierr = PetscFree2(rorntComp, lorntComp);CHKERRQ(ierr); ierr = PetscFree3(faceFIFO, cellComp, faceComp);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C MatCreateLMVM - Creates a limited memory matrix for lmvm algorithms. Collective on A Input Parameters: + comm - MPI Communicator . n - local size of vectors - N - global size of vectors Output Parameters: . A - New LMVM matrix Level: developer @*/ extern PetscErrorCode MatCreateLMVM(MPI_Comm comm, PetscInt n, PetscInt N, Mat *A) { MatLMVMCtx *ctx; PetscErrorCode ierr; PetscInt nhistory; PetscFunctionBegin; /* create data structure and populate with default values */ ierr = PetscNew(&ctx);CHKERRQ(ierr); ctx->lm=5; ctx->eps=0.0; ctx->limitType=MatLMVM_Limit_None; ctx->scaleType=MatLMVM_Scale_Broyden; ctx->rScaleType = MatLMVM_Rescale_Scalar; ctx->s_alpha = 1.0; ctx->r_alpha = 1.0; ctx->r_beta = 0.5; ctx->mu = 1.0; ctx->nu = 100.0; ctx->phi = 0.125; ctx->scalar_history = 1; ctx->rescale_history = 1; ctx->delta_min = 1e-7; ctx->delta_max = 100.0; /* Begin configuration */ ierr = PetscOptionsInt("-tao_lmm_vectors", "vectors to use for approximation", "", ctx->lm, &ctx->lm, 0);CHKERRQ(ierr); ierr = PetscOptionsReal("-tao_lmm_limit_mu", "mu limiting factor", "", ctx->mu, &ctx->mu, 0);CHKERRQ(ierr); ierr = PetscOptionsReal("-tao_lmm_limit_nu", "nu limiting factor", "", ctx->nu, &ctx->nu, 0);CHKERRQ(ierr); ierr = PetscOptionsReal("-tao_lmm_broyden_phi", "phi factor for Broyden scaling", "", ctx->phi, &ctx->phi, 0);CHKERRQ(ierr); ierr = PetscOptionsReal("-tao_lmm_scalar_alpha", "alpha factor for scalar scaling", "",ctx->s_alpha, &ctx->s_alpha, 0);CHKERRQ(ierr); ierr = PetscOptionsReal("-tao_lmm_rescale_alpha", "alpha factor for rescaling diagonal", "", ctx->r_alpha, &ctx->r_alpha, 0);CHKERRQ(ierr); ierr = PetscOptionsReal("-tao_lmm_rescale_beta", "beta factor for rescaling diagonal", "", ctx->r_beta, &ctx->r_beta, 0);CHKERRQ(ierr); ierr = PetscOptionsInt("-tao_lmm_scalar_history", "amount of history for scalar scaling", "", ctx->scalar_history, &ctx->scalar_history, 0);CHKERRQ(ierr); ierr = PetscOptionsInt("-tao_lmm_rescale_history", "amount of history for rescaling diagonal", "", ctx->rescale_history, &ctx->rescale_history, 0);CHKERRQ(ierr); ierr = PetscOptionsReal("-tao_lmm_eps", "rejection tolerance", "", ctx->eps, &ctx->eps, 0);CHKERRQ(ierr); ierr = PetscOptionsEList("-tao_lmm_scale_type", "scale type", "", Scale_Table, MatLMVM_Scale_Types, Scale_Table[ctx->scaleType], &ctx->scaleType, 0);CHKERRQ(ierr); ierr = PetscOptionsEList("-tao_lmm_rescale_type", "rescale type", "", Rescale_Table, MatLMVM_Rescale_Types, Rescale_Table[ctx->rScaleType], &ctx->rScaleType, 0);CHKERRQ(ierr); ierr = PetscOptionsEList("-tao_lmm_limit_type", "limit type", "", Limit_Table, MatLMVM_Limit_Types, Limit_Table[ctx->limitType], &ctx->limitType, 0);CHKERRQ(ierr); ierr = PetscOptionsReal("-tao_lmm_delta_min", "minimum delta value", "", ctx->delta_min, &ctx->delta_min, 0);CHKERRQ(ierr); ierr = PetscOptionsReal("-tao_lmm_delta_max", "maximum delta value", "", ctx->delta_max, &ctx->delta_max, 0);CHKERRQ(ierr); /* Complete configuration */ ctx->rescale_history = PetscMin(ctx->rescale_history, ctx->lm); ierr = PetscMalloc1(ctx->lm+1,&ctx->rho);CHKERRQ(ierr); ierr = PetscMalloc1(ctx->lm+1,&ctx->beta);CHKERRQ(ierr); nhistory = PetscMax(ctx->scalar_history,1); ierr = PetscMalloc1(nhistory,&ctx->yy_history);CHKERRQ(ierr); ierr = PetscMalloc1(nhistory,&ctx->ys_history);CHKERRQ(ierr); ierr = PetscMalloc1(nhistory,&ctx->ss_history);CHKERRQ(ierr); nhistory = PetscMax(ctx->rescale_history,1); ierr = PetscMalloc1(nhistory,&ctx->yy_rhistory);CHKERRQ(ierr); ierr = PetscMalloc1(nhistory,&ctx->ys_rhistory);CHKERRQ(ierr); ierr = PetscMalloc1(nhistory,&ctx->ss_rhistory);CHKERRQ(ierr); /* Finish initializations */ ctx->lmnow = 0; ctx->iter = 0; ctx->nupdates = 0; ctx->nrejects = 0; ctx->delta = 1.0; ctx->Gprev = 0; ctx->Xprev = 0; ctx->scale = 0; ctx->useScale = PETSC_FALSE; ctx->H0 = 0; ctx->useDefaultH0=PETSC_TRUE; ierr = MatCreateShell(comm, n, n, N, N, ctx, A);CHKERRQ(ierr); ierr = MatShellSetOperation(*A,MATOP_DESTROY,(void(*)(void))MatDestroy_LMVM);CHKERRQ(ierr); ierr = MatShellSetOperation(*A,MATOP_VIEW,(void(*)(void))MatView_LMVM);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode TaoSolve_TRON(Tao tao) { TAO_TRON *tron = (TAO_TRON *)tao->data; PetscErrorCode ierr; PetscInt iter=0,its; TaoConvergedReason reason = TAO_CONTINUE_ITERATING; TaoLineSearchConvergedReason ls_reason = TAOLINESEARCH_CONTINUE_ITERATING; PetscReal prered,actred,delta,f,f_new,rhok,gdx,xdiff,stepsize; PetscFunctionBegin; tron->pgstepsize=1.0; tao->trust = tao->trust0; /* Project the current point onto the feasible set */ ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr); ierr = VecMedian(tao->XL,tao->solution,tao->XU,tao->solution);CHKERRQ(ierr); ierr = TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU);CHKERRQ(ierr); ierr = TaoComputeObjectiveAndGradient(tao,tao->solution,&tron->f,tao->gradient);CHKERRQ(ierr); ierr = ISDestroy(&tron->Free_Local);CHKERRQ(ierr); ierr = VecWhichBetween(tao->XL,tao->solution,tao->XU,&tron->Free_Local);CHKERRQ(ierr); /* Project the gradient and calculate the norm */ ierr = VecBoundGradientProjection(tao->gradient,tao->solution, tao->XL, tao->XU, tao->gradient);CHKERRQ(ierr); ierr = VecNorm(tao->gradient,NORM_2,&tron->gnorm);CHKERRQ(ierr); if (PetscIsInfOrNanReal(tron->f) || PetscIsInfOrNanReal(tron->gnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf pr NaN"); if (tao->trust <= 0) { tao->trust=PetscMax(tron->gnorm*tron->gnorm,1.0); } tron->stepsize=tao->trust; ierr = TaoMonitor(tao, iter, tron->f, tron->gnorm, 0.0, tron->stepsize, &reason);CHKERRQ(ierr); while (reason==TAO_CONTINUE_ITERATING){ ierr = TronGradientProjections(tao,tron);CHKERRQ(ierr); f=tron->f; delta=tao->trust; tron->n_free_last = tron->n_free; ierr = TaoComputeHessian(tao,tao->solution,tao->hessian,tao->hessian_pre);CHKERRQ(ierr); ierr = ISGetSize(tron->Free_Local, &tron->n_free);CHKERRQ(ierr); /* If no free variables */ if (tron->n_free == 0) { actred=0; PetscInfo(tao,"No free variables in tron iteration."); break; } /* use free_local to mask/submat gradient, hessian, stepdirection */ ierr = TaoVecGetSubVec(tao->gradient,tron->Free_Local,tao->subset_type,0.0,&tron->R);CHKERRQ(ierr); ierr = TaoVecGetSubVec(tao->gradient,tron->Free_Local,tao->subset_type,0.0,&tron->DXFree);CHKERRQ(ierr); ierr = VecSet(tron->DXFree,0.0);CHKERRQ(ierr); ierr = VecScale(tron->R, -1.0);CHKERRQ(ierr); ierr = TaoMatGetSubMat(tao->hessian, tron->Free_Local, tron->diag, tao->subset_type, &tron->H_sub);CHKERRQ(ierr); if (tao->hessian == tao->hessian_pre) { ierr = MatDestroy(&tron->Hpre_sub);CHKERRQ(ierr); ierr = PetscObjectReference((PetscObject)(tron->H_sub));CHKERRQ(ierr); tron->Hpre_sub = tron->H_sub; } else { ierr = TaoMatGetSubMat(tao->hessian_pre, tron->Free_Local, tron->diag, tao->subset_type,&tron->Hpre_sub);CHKERRQ(ierr); } ierr = KSPReset(tao->ksp);CHKERRQ(ierr); ierr = KSPSetOperators(tao->ksp, tron->H_sub, tron->Hpre_sub);CHKERRQ(ierr); while (1) { /* Approximately solve the reduced linear system */ ierr = KSPSTCGSetRadius(tao->ksp,delta);CHKERRQ(ierr); ierr = KSPSolve(tao->ksp, tron->R, tron->DXFree);CHKERRQ(ierr); ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr); tao->ksp_its+=its; ierr = VecSet(tao->stepdirection,0.0);CHKERRQ(ierr); /* Add dxfree matrix to compute step direction vector */ ierr = VecISAXPY(tao->stepdirection,tron->Free_Local,1.0,tron->DXFree);CHKERRQ(ierr); if (0) { PetscReal rhs,stepnorm; ierr = VecNorm(tron->R,NORM_2,&rhs);CHKERRQ(ierr); ierr = VecNorm(tron->DXFree,NORM_2,&stepnorm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"|rhs|=%g\t|s|=%g\n",(double)rhs,(double)stepnorm);CHKERRQ(ierr); } ierr = VecDot(tao->gradient, tao->stepdirection, &gdx);CHKERRQ(ierr); ierr = PetscInfo1(tao,"Expected decrease in function value: %14.12e\n",(double)gdx);CHKERRQ(ierr); ierr = VecCopy(tao->solution, tron->X_New);CHKERRQ(ierr); ierr = VecCopy(tao->gradient, tron->G_New);CHKERRQ(ierr); stepsize=1.0;f_new=f; ierr = TaoLineSearchSetInitialStepLength(tao->linesearch,1.0);CHKERRQ(ierr); ierr = TaoLineSearchApply(tao->linesearch, tron->X_New, &f_new, tron->G_New, tao->stepdirection,&stepsize,&ls_reason);CHKERRQ(ierr);CHKERRQ(ierr); ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr); ierr = MatMult(tao->hessian, tao->stepdirection, tron->Work);CHKERRQ(ierr); ierr = VecAYPX(tron->Work, 0.5, tao->gradient);CHKERRQ(ierr); ierr = VecDot(tao->stepdirection, tron->Work, &prered);CHKERRQ(ierr); actred = f_new - f; if (actred<0) { rhok=PetscAbs(-actred/prered); } else { rhok=0.0; } /* Compare actual improvement to the quadratic model */ if (rhok > tron->eta1) { /* Accept the point */ /* d = x_new - x */ ierr = VecCopy(tron->X_New, tao->stepdirection);CHKERRQ(ierr); ierr = VecAXPY(tao->stepdirection, -1.0, tao->solution);CHKERRQ(ierr); ierr = VecNorm(tao->stepdirection, NORM_2, &xdiff);CHKERRQ(ierr); xdiff *= stepsize; /* Adjust trust region size */ if (rhok < tron->eta2 ){ delta = PetscMin(xdiff,delta)*tron->sigma1; } else if (rhok > tron->eta4 ){ delta= PetscMin(xdiff,delta)*tron->sigma3; } else if (rhok > tron->eta3 ){ delta=PetscMin(xdiff,delta)*tron->sigma2; } ierr = VecBoundGradientProjection(tron->G_New,tron->X_New, tao->XL, tao->XU, tao->gradient);CHKERRQ(ierr); if (tron->Free_Local) { ierr = ISDestroy(&tron->Free_Local);CHKERRQ(ierr); } ierr = VecWhichBetween(tao->XL, tron->X_New, tao->XU, &tron->Free_Local);CHKERRQ(ierr); f=f_new; ierr = VecNorm(tao->gradient,NORM_2,&tron->gnorm);CHKERRQ(ierr); ierr = VecCopy(tron->X_New, tao->solution);CHKERRQ(ierr); ierr = VecCopy(tron->G_New, tao->gradient);CHKERRQ(ierr); break; } else if (delta <= 1e-30) { break; } else { delta /= 4.0; } } /* end linear solve loop */ tron->f=f; tron->actred=actred; tao->trust=delta; iter++; ierr = TaoMonitor(tao, iter, tron->f, tron->gnorm, 0.0, delta, &reason);CHKERRQ(ierr); } /* END MAIN LOOP */ PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_PIPEFCG_cycle(KSP ksp) { PetscErrorCode ierr; PetscInt i,j,k,idx,kdx,mi; KSP_PIPEFCG *pipefcg; PetscScalar alpha=0.0,gamma,*betas,*dots; PetscReal dp=0.0, delta,*eta,*etas; Vec B,R,Z,X,Qcurr,W,ZETAcurr,M,N,Pcurr,Scurr,*redux; Mat Amat,Pmat; PetscFunctionBegin; /* We have not checked these routines for use with complex numbers. The inner products are likely not defined correctly for that case */ #if (defined(PETSC_USE_COMPLEX) && !defined(PETSC_SKIP_COMPLEX)) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars"); #endif #define VecXDot(x,y,a) (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecDot (x,y,a) : VecTDot (x,y,a)) #define VecXDotBegin(x,y,a) (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecDotBegin (x,y,a) : VecTDotBegin (x,y,a)) #define VecXDotEnd(x,y,a) (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecDotEnd (x,y,a) : VecTDotEnd (x,y,a)) #define VecMXDot(x,n,y,a) (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecMDot (x,n,y,a) : VecMTDot (x,n,y,a)) #define VecMXDotBegin(x,n,y,a) (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecMDotBegin (x,n,y,a) : VecMTDotBegin (x,n,y,a)) #define VecMXDotEnd(x,n,y,a) (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecMDotEnd (x,n,y,a) : VecMTDotEnd (x,n,y,a)) pipefcg = (KSP_PIPEFCG*)ksp->data; X = ksp->vec_sol; B = ksp->vec_rhs; R = ksp->work[0]; Z = ksp->work[1]; W = ksp->work[2]; M = ksp->work[3]; N = ksp->work[4]; redux = pipefcg->redux; dots = pipefcg->dots; etas = pipefcg->etas; betas = dots; /* dots takes the result of all dot products of which the betas are a subset */ ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); /* Compute cycle initial residual */ ierr = KSP_MatMult(ksp,Amat,X,R);CHKERRQ(ierr); ierr = VecAYPX(R,-1.0,B);CHKERRQ(ierr); /* r <- b - Ax */ ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- Br */ Pcurr = pipefcg->Pvecs[0]; Scurr = pipefcg->Svecs[0]; Qcurr = pipefcg->Qvecs[0]; ZETAcurr = pipefcg->ZETAvecs[0]; ierr = VecCopy(Z,Pcurr);CHKERRQ(ierr); ierr = KSP_MatMult(ksp,Amat,Pcurr,Scurr);CHKERRQ(ierr); /* S = Ap */ ierr = VecCopy(Scurr,W);CHKERRQ(ierr); /* w = s = Az */ /* Initial state of pipelining intermediates */ redux[0] = R; redux[1] = W; ierr = VecMXDotBegin(Z,2,redux,dots);CHKERRQ(ierr); ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Z));CHKERRQ(ierr); /* perform asynchronous reduction */ ierr = KSP_PCApply(ksp,W,M);CHKERRQ(ierr); /* m = B(w) */ ierr = KSP_MatMult(ksp,Amat,M,N);CHKERRQ(ierr); /* n = Am */ ierr = VecCopy(M,Qcurr);CHKERRQ(ierr); /* q = m */ ierr = VecCopy(N,ZETAcurr);CHKERRQ(ierr); /* zeta = n */ ierr = VecMXDotEnd(Z,2,redux,dots);CHKERRQ(ierr); gamma = dots[0]; delta = PetscRealPart(dots[1]); etas[0] = delta; alpha = gamma/delta; i = 0; do { ksp->its++; /* Update X, R, Z, W */ ierr = VecAXPY(X,+alpha,Pcurr);CHKERRQ(ierr); /* x <- x + alpha * pi */ ierr = VecAXPY(R,-alpha,Scurr);CHKERRQ(ierr); /* r <- r - alpha * si */ ierr = VecAXPY(Z,-alpha,Qcurr);CHKERRQ(ierr); /* z <- z - alpha * qi */ ierr = VecAXPY(W,-alpha,ZETAcurr);CHKERRQ(ierr); /* w <- w - alpha * zetai */ /* Compute norm for convergence check */ switch (ksp->normtype) { case KSP_NORM_PRECONDITIONED: ierr = VecNorm(Z,NORM_2,&dp);CHKERRQ(ierr); /* dp <- sqrt(z'*z) = sqrt(e'*A'*B'*B*A*e) */ break; case KSP_NORM_UNPRECONDITIONED: ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); /* dp <- sqrt(r'*r) = sqrt(e'*A'*A*e) */ break; case KSP_NORM_NATURAL: dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- sqrt(r'*z) = sqrt(e'*A'*B*A*e) */ break; case KSP_NORM_NONE: dp = 0.0; break; default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]); } /* Check for convergence */ ksp->rnorm = dp; KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,dp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,ksp->its+1,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; /* Computations of current iteration done */ ++i; /* If needbe, allocate a new chunk of vectors in P and C */ ierr = KSPAllocateVectors_PIPEFCG(ksp,i+1,pipefcg->vecb);CHKERRQ(ierr); /* Note that we wrap around and start clobbering old vectors */ idx = i % (pipefcg->mmax+1); Pcurr = pipefcg->Pvecs[idx]; Scurr = pipefcg->Svecs[idx]; Qcurr = pipefcg->Qvecs[idx]; ZETAcurr = pipefcg->ZETAvecs[idx]; eta = pipefcg->etas+idx; /* number of old directions to orthogonalize against */ switch(pipefcg->truncstrat){ case KSP_FCD_TRUNC_TYPE_STANDARD: mi = pipefcg->mmax; break; case KSP_FCD_TRUNC_TYPE_NOTAY: mi = ((i-1) % pipefcg->mmax)+1; break; default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unrecognized Truncation Strategy"); } /* Pick old p,s,q,zeta in a way suitable for VecMDot */ ierr = VecCopy(Z,Pcurr);CHKERRQ(ierr); for(k=PetscMax(0,i-mi),j=0;k<i;++j,++k){ kdx = k % (pipefcg->mmax+1); pipefcg->Pold[j] = pipefcg->Pvecs[kdx]; pipefcg->Sold[j] = pipefcg->Svecs[kdx]; pipefcg->Qold[j] = pipefcg->Qvecs[kdx]; pipefcg->ZETAold[j] = pipefcg->ZETAvecs[kdx]; redux[j] = pipefcg->Svecs[kdx]; } redux[j] = R; /* If the above loop is not executed redux contains only R => all beta_k = 0, only gamma, delta != 0 */ redux[j+1] = W; ierr = VecMXDotBegin(Z,j+2,redux,betas);CHKERRQ(ierr); /* Start split reductions for beta_k = (z,s_k), gamma = (z,r), delta = (z,w) */ ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Z));CHKERRQ(ierr); /* perform asynchronous reduction */ ierr = VecWAXPY(N,-1.0,R,W);CHKERRQ(ierr); /* m = u + B(w-r): (a) ntmp = w-r */ ierr = KSP_PCApply(ksp,N,M);CHKERRQ(ierr); /* m = u + B(w-r): (b) mtmp = B(ntmp) = B(w-r) */ ierr = VecAXPY(M,1.0,Z);CHKERRQ(ierr); /* m = u + B(w-r): (c) m = z + mtmp */ ierr = KSP_MatMult(ksp,Amat,M,N);CHKERRQ(ierr); /* n = Am */ ierr = VecMXDotEnd(Z,j+2,redux,betas);CHKERRQ(ierr); /* Finish split reductions */ gamma = betas[j]; delta = PetscRealPart(betas[j+1]); *eta = 0.; for(k=PetscMax(0,i-mi),j=0;k<i;++j,++k){ kdx = k % (pipefcg->mmax+1); betas[j] /= -etas[kdx]; /* betak /= etak */ *eta -= ((PetscReal)(PetscAbsScalar(betas[j])*PetscAbsScalar(betas[j]))) * etas[kdx]; /* etaitmp = -betaik^2 * etak */ } *eta += delta; /* etai = delta -betaik^2 * etak */ if(*eta < 0.) { pipefcg->norm_breakdown = PETSC_TRUE; ierr = PetscInfo1(ksp,"Restart due to square root breakdown at it = \n",ksp->its);CHKERRQ(ierr); break; } else { alpha= gamma/(*eta); /* alpha = gamma/etai */ } /* project out stored search directions using classical G-S */ ierr = VecCopy(Z,Pcurr);CHKERRQ(ierr); ierr = VecCopy(W,Scurr);CHKERRQ(ierr); ierr = VecCopy(M,Qcurr);CHKERRQ(ierr); ierr = VecCopy(N,ZETAcurr);CHKERRQ(ierr); ierr = VecMAXPY(Pcurr ,j,betas,pipefcg->Pold);CHKERRQ(ierr); /* pi <- ui - sum_k beta_k p_k */ ierr = VecMAXPY(Scurr ,j,betas,pipefcg->Sold);CHKERRQ(ierr); /* si <- wi - sum_k beta_k s_k */ ierr = VecMAXPY(Qcurr ,j,betas,pipefcg->Qold);CHKERRQ(ierr); /* qi <- m - sum_k beta_k q_k */ ierr = VecMAXPY(ZETAcurr,j,betas,pipefcg->ZETAold);CHKERRQ(ierr); /* zetai <- n - sum_k beta_k zeta_k */ } while (ksp->its < ksp->max_it); PetscFunctionReturn(0); }
static PetscErrorCode Tao_mcstep(TaoLineSearch ls,PetscReal *stx,PetscReal *fx,PetscReal *dx,PetscReal *sty,PetscReal *fy,PetscReal *dy,PetscReal *stp,PetscReal *fp,PetscReal *dp) { TaoLineSearch_MT *mtP = (TaoLineSearch_MT *) ls->data; PetscReal gamma1, p, q, r, s, sgnd, stpc, stpf, stpq, theta; PetscInt bound; PetscFunctionBegin; /* Check the input parameters for errors */ mtP->infoc = 0; if (mtP->bracket && (*stp <= PetscMin(*stx,*sty) || (*stp >= PetscMax(*stx,*sty)))) SETERRQ(PETSC_COMM_SELF,1,"bad stp in bracket"); if (*dx * (*stp-*stx) >= 0.0) SETERRQ(PETSC_COMM_SELF,1,"dx * (stp-stx) >= 0.0"); if (ls->stepmax < ls->stepmin) SETERRQ(PETSC_COMM_SELF,1,"stepmax > stepmin"); /* Determine if the derivatives have opposite sign */ sgnd = *dp * (*dx / PetscAbsReal(*dx)); if (*fp > *fx) { /* Case 1: a higher function value. The minimum is bracketed. If the cubic step is closer to stx than the quadratic step, the cubic step is taken, else the average of the cubic and quadratic steps is taken. */ mtP->infoc = 1; bound = 1; theta = 3 * (*fx - *fp) / (*stp - *stx) + *dx + *dp; s = PetscMax(PetscAbsReal(theta),PetscAbsReal(*dx)); s = PetscMax(s,PetscAbsReal(*dp)); gamma1 = s*PetscSqrtScalar(PetscPowScalar(theta/s,2.0) - (*dx/s)*(*dp/s)); if (*stp < *stx) gamma1 = -gamma1; /* Can p be 0? Check */ p = (gamma1 - *dx) + theta; q = ((gamma1 - *dx) + gamma1) + *dp; r = p/q; stpc = *stx + r*(*stp - *stx); stpq = *stx + ((*dx/((*fx-*fp)/(*stp-*stx)+*dx))*0.5) * (*stp - *stx); if (PetscAbsReal(stpc-*stx) < PetscAbsReal(stpq-*stx)) { stpf = stpc; } else { stpf = stpc + 0.5*(stpq - stpc); } mtP->bracket = 1; } else if (sgnd < 0.0) { /* Case 2: A lower function value and derivatives of opposite sign. The minimum is bracketed. If the cubic step is closer to stx than the quadratic (secant) step, the cubic step is taken, else the quadratic step is taken. */ mtP->infoc = 2; bound = 0; theta = 3*(*fx - *fp)/(*stp - *stx) + *dx + *dp; s = PetscMax(PetscAbsReal(theta),PetscAbsReal(*dx)); s = PetscMax(s,PetscAbsReal(*dp)); gamma1 = s*PetscSqrtScalar(PetscPowScalar(theta/s,2.0) - (*dx/s)*(*dp/s)); if (*stp > *stx) gamma1 = -gamma1; p = (gamma1 - *dp) + theta; q = ((gamma1 - *dp) + gamma1) + *dx; r = p/q; stpc = *stp + r*(*stx - *stp); stpq = *stp + (*dp/(*dp-*dx))*(*stx - *stp); if (PetscAbsReal(stpc-*stp) > PetscAbsReal(stpq-*stp)) { stpf = stpc; } else { stpf = stpq; } mtP->bracket = 1; } else if (PetscAbsReal(*dp) < PetscAbsReal(*dx)) { /* Case 3: A lower function value, derivatives of the same sign, and the magnitude of the derivative decreases. The cubic step is only used if the cubic tends to infinity in the direction of the step or if the minimum of the cubic is beyond stp. Otherwise the cubic step is defined to be either stepmin or stepmax. The quadratic (secant) step is also computed and if the minimum is bracketed then the step closest to stx is taken, else the step farthest away is taken. */ mtP->infoc = 3; bound = 1; theta = 3*(*fx - *fp)/(*stp - *stx) + *dx + *dp; s = PetscMax(PetscAbsReal(theta),PetscAbsReal(*dx)); s = PetscMax(s,PetscAbsReal(*dp)); /* The case gamma1 = 0 only arises if the cubic does not tend to infinity in the direction of the step. */ gamma1 = s*PetscSqrtScalar(PetscMax(0.0,PetscPowScalar(theta/s,2.0) - (*dx/s)*(*dp/s))); if (*stp > *stx) gamma1 = -gamma1; p = (gamma1 - *dp) + theta; q = (gamma1 + (*dx - *dp)) + gamma1; r = p/q; if (r < 0.0 && gamma1 != 0.0) stpc = *stp + r*(*stx - *stp); else if (*stp > *stx) stpc = ls->stepmax; else stpc = ls->stepmin; stpq = *stp + (*dp/(*dp-*dx)) * (*stx - *stp); if (mtP->bracket) { if (PetscAbsReal(*stp-stpc) < PetscAbsReal(*stp-stpq)) { stpf = stpc; } else { stpf = stpq; } } else { if (PetscAbsReal(*stp-stpc) > PetscAbsReal(*stp-stpq)) { stpf = stpc; } else { stpf = stpq; } } } else { /* Case 4: A lower function value, derivatives of the same sign, and the magnitude of the derivative does not decrease. If the minimum is not bracketed, the step is either stpmin or stpmax, else the cubic step is taken. */ mtP->infoc = 4; bound = 0; if (mtP->bracket) { theta = 3*(*fp - *fy)/(*sty - *stp) + *dy + *dp; s = PetscMax(PetscAbsReal(theta),PetscAbsReal(*dy)); s = PetscMax(s,PetscAbsReal(*dp)); gamma1 = s*PetscSqrtScalar(PetscPowScalar(theta/s,2.0) - (*dy/s)*(*dp/s)); if (*stp > *sty) gamma1 = -gamma1; p = (gamma1 - *dp) + theta; q = ((gamma1 - *dp) + gamma1) + *dy; r = p/q; stpc = *stp + r*(*sty - *stp); stpf = stpc; } else if (*stp > *stx) { stpf = ls->stepmax; } else { stpf = ls->stepmin; } } /* Update the interval of uncertainty. This update does not depend on the new step or the case analysis above. */ if (*fp > *fx) { *sty = *stp; *fy = *fp; *dy = *dp; } else { if (sgnd < 0.0) { *sty = *stx; *fy = *fx; *dy = *dx; } *stx = *stp; *fx = *fp; *dx = *dp; } /* Compute the new step and safeguard it. */ stpf = PetscMin(ls->stepmax,stpf); stpf = PetscMax(ls->stepmin,stpf); *stp = stpf; if (mtP->bracket && bound) { if (*sty > *stx) { *stp = PetscMin(*stx+0.66*(*sty-*stx),*stp); } else { *stp = PetscMax(*stx+0.66*(*sty-*stx),*stp); } } PetscFunctionReturn(0); }
int main(int argc,char **args) { User user; Mat A,S; PetscScalar *data,diag = 1.3; PetscReal tol = PETSC_SMALL; PetscInt i,j,m = PETSC_DECIDE,n = PETSC_DECIDE,M = 17,N = 15,s1,s2; PetscInt test, ntest = 2; PetscMPIInt rank,size; PetscBool nc = PETSC_FALSE, cong; PetscBool ronl = PETSC_TRUE; PetscBool randomize = PETSC_FALSE; PetscBool keep = PETSC_FALSE; PetscBool testzerorows = PETSC_TRUE, testdiagscale = PETSC_TRUE, testgetdiag = PETSC_TRUE; PetscBool testshift = PETSC_TRUE, testscale = PETSC_TRUE, testdup = PETSC_TRUE, testreset = PETSC_TRUE; PetscErrorCode ierr; ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-M",&M,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-N",&N,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-ml",&m,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-nl",&n,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,NULL,"-square_nc",&nc,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,NULL,"-rows_only",&ronl,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,NULL,"-randomize",&randomize,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,NULL,"-test_zerorows",&testzerorows,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,NULL,"-test_diagscale",&testdiagscale,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,NULL,"-test_getdiag",&testgetdiag,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,NULL,"-test_shift",&testshift,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,NULL,"-test_scale",&testscale,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,NULL,"-test_dup",&testdup,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,NULL,"-test_reset",&testreset,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-loop",&ntest,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetReal(NULL,NULL,"-tol",&tol,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetScalar(NULL,NULL,"-diag",&diag,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,NULL,"-keep",&keep,NULL);CHKERRQ(ierr); /* This tests square matrices with different row/col layout */ if (nc && size > 1) { M = PetscMax(PetscMax(N,M),1); N = M; m = n = 0; if (rank == 0) { m = M-1; n = 1; } else if (rank == 1) { m = 1; n = N-1; } } ierr = MatCreateDense(PETSC_COMM_WORLD,m,n,M,N,NULL,&A);CHKERRQ(ierr); ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A,&s1,NULL);CHKERRQ(ierr); s2 = 1; while (s2 < M) s2 *= 10; ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); for (j = 0; j < N; j++) { for (i = 0; i < m; i++) { data[j*m + i] = s2*j + i + s1 + 1; } } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatConvert(A,MATAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); ierr = MatSetOption(A,MAT_KEEP_NONZERO_PATTERN,keep);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)A,"initial");CHKERRQ(ierr); ierr = MatViewFromOptions(A,NULL,"-view_mat");CHKERRQ(ierr); ierr = PetscNew(&user);CHKERRQ(ierr); ierr = MatCreateShell(PETSC_COMM_WORLD,m,n,M,N,user,&S);CHKERRQ(ierr); ierr = MatShellSetOperation(S,MATOP_MULT,(void (*)(void))MatMult_User);CHKERRQ(ierr); ierr = MatShellSetOperation(S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_User);CHKERRQ(ierr); if (cong) { ierr = MatShellSetOperation(S,MATOP_GET_DIAGONAL,(void (*)(void))MatGetDiagonal_User);CHKERRQ(ierr); } ierr = MatDuplicate(A,MAT_COPY_VALUES,&user->B);CHKERRQ(ierr); /* Square and rows only scaling */ ronl = cong ? ronl : PETSC_TRUE; for (test = 0; test < ntest; test++) { PetscReal err; if (testzerorows) { Mat ST,B,C,BT,BTT; IS zr; Vec x = NULL, b1 = NULL, b2 = NULL; PetscInt *idxs = NULL, nr = 0; if (rank == (test%size)) { nr = 1; ierr = PetscMalloc1(nr,&idxs);CHKERRQ(ierr); if (test%2) { idxs[0] = (2*M - 1 - test/2)%M; } else { idxs[0] = (test/2)%M; } idxs[0] = PetscMax(idxs[0],0); } ierr = ISCreateGeneral(PETSC_COMM_WORLD,nr,idxs,PETSC_OWN_POINTER,&zr);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)zr,"ZR");CHKERRQ(ierr); ierr = ISViewFromOptions(zr,NULL,"-view_is");CHKERRQ(ierr); ierr = MatCreateVecs(A,&x,&b1);CHKERRQ(ierr); if (randomize) { ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); ierr = VecSetRandom(b1,NULL);CHKERRQ(ierr); } else { ierr = VecSet(x,11.4);CHKERRQ(ierr); ierr = VecSet(b1,-14.2);CHKERRQ(ierr); } ierr = VecDuplicate(b1,&b2);CHKERRQ(ierr); ierr = VecCopy(b1,b2);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)b1,"A_B1");CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)b2,"A_B2");CHKERRQ(ierr); if (size > 1 && !cong) { /* MATMPIAIJ ZeroRows and ZeroRowsColumns are buggy in this case */ ierr = VecDestroy(&b1);CHKERRQ(ierr); } if (ronl) { ierr = MatZeroRowsIS(A,zr,diag,x,b1);CHKERRQ(ierr); ierr = MatZeroRowsIS(S,zr,diag,x,b2);CHKERRQ(ierr); } else { ierr = MatZeroRowsColumnsIS(A,zr,diag,x,b1);CHKERRQ(ierr); ierr = MatZeroRowsColumnsIS(S,zr,diag,x,b2);CHKERRQ(ierr); ierr = ISDestroy(&zr);CHKERRQ(ierr); /* Mix zerorows and zerorowscols */ nr = 0; idxs = NULL; if (!rank) { nr = 1; ierr = PetscMalloc1(nr,&idxs);CHKERRQ(ierr); if (test%2) { idxs[0] = (3*M - 2 - test/2)%M; } else { idxs[0] = (test/2+1)%M; } idxs[0] = PetscMax(idxs[0],0); } ierr = ISCreateGeneral(PETSC_COMM_WORLD,nr,idxs,PETSC_OWN_POINTER,&zr);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)zr,"ZR2");CHKERRQ(ierr); ierr = ISViewFromOptions(zr,NULL,"-view_is");CHKERRQ(ierr); ierr = MatZeroRowsIS(A,zr,diag*2.0+PETSC_SMALL,NULL,NULL);CHKERRQ(ierr); ierr = MatZeroRowsIS(S,zr,diag*2.0+PETSC_SMALL,NULL,NULL);CHKERRQ(ierr); } ierr = ISDestroy(&zr);CHKERRQ(ierr); if (b1) { Vec b; ierr = VecViewFromOptions(b1,NULL,"-view_b");CHKERRQ(ierr); ierr = VecViewFromOptions(b2,NULL,"-view_b");CHKERRQ(ierr); ierr = VecDuplicate(b1,&b);CHKERRQ(ierr); ierr = VecCopy(b1,b);CHKERRQ(ierr); ierr = VecAXPY(b,-1.0,b2);CHKERRQ(ierr); ierr = VecNorm(b,NORM_INFINITY,&err);CHKERRQ(ierr); if (err >= tol) { ierr = PetscPrintf(PETSC_COMM_WORLD,"[test %D] Error b %g\n",test,(double)err);CHKERRQ(ierr); } ierr = VecDestroy(&b);CHKERRQ(ierr); } ierr = VecDestroy(&b1);CHKERRQ(ierr); ierr = VecDestroy(&b2);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = MatConvert(S,MATDENSE,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); ierr = MatCreateTranspose(S,&ST);CHKERRQ(ierr); ierr = MatComputeOperator(ST,MATDENSE,&BT);CHKERRQ(ierr); ierr = MatTranspose(BT,MAT_INITIAL_MATRIX,&BTT);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)B,"S");CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)BTT,"STT");CHKERRQ(ierr); ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)C,"A");CHKERRQ(ierr); ierr = MatViewFromOptions(C,NULL,"-view_mat");CHKERRQ(ierr); ierr = MatViewFromOptions(B,NULL,"-view_mat");CHKERRQ(ierr); ierr = MatViewFromOptions(BTT,NULL,"-view_mat");CHKERRQ(ierr); ierr = MatAXPY(C,-1.0,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatNorm(C,NORM_FROBENIUS,&err);CHKERRQ(ierr); if (err >= tol) { ierr = PetscPrintf(PETSC_COMM_WORLD,"[test %D] Error mat mult %g\n",test,(double)err);CHKERRQ(ierr); } ierr = MatConvert(A,MATDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); ierr = MatAXPY(C,-1.0,BTT,SAME_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatNorm(C,NORM_FROBENIUS,&err);CHKERRQ(ierr); if (err >= tol) { ierr = PetscPrintf(PETSC_COMM_WORLD,"[test %D] Error mat mult transpose %g\n",test,(double)err);CHKERRQ(ierr); } ierr = MatDestroy(&ST);CHKERRQ(ierr); ierr = MatDestroy(&BTT);CHKERRQ(ierr); ierr = MatDestroy(&BT);CHKERRQ(ierr); ierr = MatDestroy(&B);CHKERRQ(ierr); ierr = MatDestroy(&C);CHKERRQ(ierr); } if (testdiagscale) { /* MatDiagonalScale() */ Vec vr,vl; ierr = MatCreateVecs(A,&vr,&vl);CHKERRQ(ierr); if (randomize) { ierr = VecSetRandom(vr,NULL);CHKERRQ(ierr); ierr = VecSetRandom(vl,NULL);CHKERRQ(ierr); } else { ierr = VecSet(vr,test%2 ? 0.15 : 1.0/0.15);CHKERRQ(ierr); ierr = VecSet(vl,test%2 ? -1.2 : 1.0/-1.2);CHKERRQ(ierr); } ierr = MatDiagonalScale(A,vl,vr);CHKERRQ(ierr); ierr = MatDiagonalScale(S,vl,vr);CHKERRQ(ierr); ierr = VecDestroy(&vr);CHKERRQ(ierr); ierr = VecDestroy(&vl);CHKERRQ(ierr); } if (testscale) { /* MatScale() */ ierr = MatScale(A,test%2 ? 1.4 : 1.0/1.4);CHKERRQ(ierr); ierr = MatScale(S,test%2 ? 1.4 : 1.0/1.4);CHKERRQ(ierr); } if (testshift && cong) { /* MatShift() : MATSHELL shift is broken when row/cols layout are not congruent and left/right scaling have been applied */ ierr = MatShift(A,test%2 ? -77.5 : 77.5);CHKERRQ(ierr); ierr = MatShift(S,test%2 ? -77.5 : 77.5);CHKERRQ(ierr); } if (testgetdiag && cong) { /* MatGetDiagonal() */ Vec dA,dS; ierr = MatCreateVecs(A,&dA,NULL);CHKERRQ(ierr); ierr = MatCreateVecs(S,&dS,NULL);CHKERRQ(ierr); ierr = MatGetDiagonal(A,dA);CHKERRQ(ierr); ierr = MatGetDiagonal(S,dS);CHKERRQ(ierr); ierr = VecAXPY(dA,-1.0,dS);CHKERRQ(ierr); ierr = VecNorm(dA,NORM_INFINITY,&err);CHKERRQ(ierr); if (err >= tol) { ierr = PetscPrintf(PETSC_COMM_WORLD,"[test %D] Error diag %g\n",test,(double)err);CHKERRQ(ierr); } ierr = VecDestroy(&dA);CHKERRQ(ierr); ierr = VecDestroy(&dS);CHKERRQ(ierr); } if (testdup && !test) { Mat A2, S2; ierr = MatDuplicate(A,MAT_COPY_VALUES,&A2);CHKERRQ(ierr); ierr = MatDuplicate(S,MAT_COPY_VALUES,&S2);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&S);CHKERRQ(ierr); A = A2; S = S2; } if (testreset && (ntest == 1 || test == ntest-2)) { /* reset MATSHELL */ ierr = MatAssemblyBegin(S,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(S,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* reset A */ ierr = MatCopy(user->B,A,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); } } ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&user->B);CHKERRQ(ierr); ierr = MatDestroy(&S);CHKERRQ(ierr); ierr = PetscFree(user);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
int main(int argc,char **argv) { TS ts; /* timestepping context */ Vec x,r; /* solution, residual vectors */ Mat J; /* Jacobian matrix */ AppCtx user; /* user-defined work context */ PetscInt its,N; /* iterations for convergence */ PetscErrorCode ierr; PetscReal param_max = 6.81,param_min = 0.,dt; PetscReal ftime; PetscMPIInt size; PetscInitialize(&argc,&argv,PETSC_NULL,help); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size); if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This is a uniprocessor example only"); user.mx = 4; user.my = 4; user.param = 6.0; /* Allow user to set the grid dimensions and nonlinearity parameter at run-time */ PetscOptionsGetInt(PETSC_NULL,"-mx",&user.mx,PETSC_NULL); PetscOptionsGetInt(PETSC_NULL,"-my",&user.my,PETSC_NULL); PetscOptionsGetReal(PETSC_NULL,"-param",&user.param,PETSC_NULL); if (user.param >= param_max || user.param <= param_min) SETERRQ(PETSC_COMM_SELF,1,"Parameter is out of range"); dt = .5/PetscMax(user.mx,user.my); ierr = PetscOptionsGetReal(PETSC_NULL,"-dt",&dt,PETSC_NULL);CHKERRQ(ierr); N = user.mx*user.my; /* Create vectors to hold the solution and function value */ ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x);CHKERRQ(ierr); ierr = VecDuplicate(x,&r);CHKERRQ(ierr); /* Create matrix to hold Jacobian. Preallocate 5 nonzeros per row in the sparse matrix. Note that this is not the optimal strategy; see the Performance chapter of the users manual for information on preallocating memory in sparse matrices. */ ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,N,N,5,0,&J);CHKERRQ(ierr); /* Create timestepper context */ ierr = TSCreate(PETSC_COMM_WORLD,&ts);CHKERRQ(ierr); ierr = TSSetProblemType(ts,TS_NONLINEAR);CHKERRQ(ierr); /* Tell the timestepper context where to compute solutions */ ierr = TSSetSolution(ts,x);CHKERRQ(ierr); /* Provide the call-back for the nonlinear function we are evaluating. Thus whenever the timestepping routines need the function they will call this routine. Note the final argument is the application context used by the call-back functions. */ ierr = TSSetRHSFunction(ts,PETSC_NULL,FormFunction,&user);CHKERRQ(ierr); /* Set the Jacobian matrix and the function used to compute Jacobians. */ ierr = TSSetRHSJacobian(ts,J,J,FormJacobian,&user);CHKERRQ(ierr); /* For the initial guess for the problem */ ierr = FormInitialGuess(x,&user); /* This indicates that we are using pseudo timestepping to find a steady state solution to the nonlinear problem. */ ierr = TSSetType(ts,TSPSEUDO);CHKERRQ(ierr); /* Set the initial time to start at (this is arbitrary for steady state problems; and the initial timestep given above */ ierr = TSSetInitialTimeStep(ts,0.0,dt);CHKERRQ(ierr); /* Set a large number of timesteps and final duration time to insure convergence to steady state. */ ierr = TSSetDuration(ts,1000,1.e12); /* Use the default strategy for increasing the timestep */ ierr = TSPseudoSetTimeStep(ts,TSPseudoDefaultTimeStep,0);CHKERRQ(ierr); /* Set any additional options from the options database. This includes all options for the nonlinear and linear solvers used internally the the timestepping routines. */ ierr = TSSetFromOptions(ts);CHKERRQ(ierr); ierr = TSSetUp(ts);CHKERRQ(ierr); /* Perform the solve. This is where the timestepping takes place. */ ierr = TSSolve(ts,x,&ftime);CHKERRQ(ierr); /* Get the number of steps */ ierr = TSGetTimeStepNumber(ts,&its);CHKERRQ(ierr); printf("Number of pseudo timesteps = %d final time %4.2e\n",(int)its,ftime); /* Free the data structures constructed above */ ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&r);CHKERRQ(ierr); ierr = MatDestroy(&J);CHKERRQ(ierr); ierr = TSDestroy(&ts);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }