static PetscErrorCode KSPSolve_SpecEst(KSP ksp) { PetscErrorCode ierr; KSP_SpecEst *spec = (KSP_SpecEst*)ksp->data; PetscFunctionBegin; if (spec->current) { ierr = KSPSolve(spec->kspcheap,ksp->vec_rhs,ksp->vec_sol);CHKERRQ(ierr); ierr = KSPSpecEstPropagateUp(ksp,spec->kspcheap);CHKERRQ(ierr); } else { PetscInt i,its,neig; PetscReal *real,*imag,rad = 0; ierr = KSPSolve(spec->kspest,ksp->vec_rhs,ksp->vec_sol);CHKERRQ(ierr); ierr = KSPSpecEstPropagateUp(ksp,spec->kspest);CHKERRQ(ierr); ierr = KSPComputeExtremeSingularValues(spec->kspest,&spec->max,&spec->min);CHKERRQ(ierr); ierr = KSPGetIterationNumber(spec->kspest,&its);CHKERRQ(ierr); ierr = PetscMalloc2(its,PetscReal,&real,its,PetscReal,&imag);CHKERRQ(ierr); ierr = KSPComputeEigenvalues(spec->kspest,its,real,imag,&neig);CHKERRQ(ierr); for (i=0; i<neig; i++) { /* We would really like to compute w (nominally 1/radius) to minimize |1-wB|. Empirically it is better to compute rad = |1-B| than rad = |B|. There must be a cheap way to do better. */ rad = PetscMax(rad,PetscRealPart(PetscSqrtScalar((PetscScalar)(PetscSqr(real[i]-1.) + PetscSqr(imag[i]))))); } ierr = PetscFree2(real,imag);CHKERRQ(ierr); spec->radius = rad; ierr = KSPChebyshevSetEigenvalues(spec->kspcheap,spec->max*spec->maxfactor,spec->min*spec->minfactor);CHKERRQ(ierr); ierr = KSPRichardsonSetScale(spec->kspcheap,spec->richfactor/spec->radius); ierr = PetscInfo3(ksp,"Estimated singular value min=%G max=%G, spectral radius=%G",spec->min,spec->max,spec->radius);CHKERRQ(ierr); spec->current = PETSC_TRUE; } PetscFunctionReturn(0); }
/*@C MatCheckCompressedRow - Determines whether the compressed row matrix format should be used. If the format is to be used, this routine creates Mat_CompressedRow struct. Compressed row format provides high performance routines by taking advantage of zero rows. Supported types are MATAIJ, MATBAIJ and MATSBAIJ. Collective Input Parameters: + A - the matrix . compressedrow - pointer to the struct Mat_CompressedRow . ai - row pointer used by seqaij and seqbaij . mbs - number of (block) rows represented by ai - ratio - ratio of (num of zero rows)/m, used to determine if the compressed row format should be used Notes: By default PETSc will not check for compressed rows on sequential matrices. Call MatSetOption(Mat,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE); before MatAssemblyBegin() to have it check. Developer Note: The reason this takes the compressedrow, ai and mbs arguments is because it is called by both the SeqAIJ and SEQBAIJ matrices and the values are not therefore obtained by directly taking the values from the matrix object. Level: developer @*/ PetscErrorCode MatCheckCompressedRow(Mat A,Mat_CompressedRow *compressedrow,PetscInt *ai,PetscInt mbs,PetscReal ratio) { PetscErrorCode ierr; PetscInt nrows,*cpi=PETSC_NULL,*ridx=PETSC_NULL,nz,i,row; PetscFunctionBegin; if (!compressedrow->check) PetscFunctionReturn(0); /* in case this is being reused, delete old space */ ierr = PetscFree2(compressedrow->i,compressedrow->rindex);CHKERRQ(ierr); compressedrow->i = PETSC_NULL; compressedrow->rindex = PETSC_NULL; /* compute number of zero rows */ nrows = 0; for (i=0; i<mbs; i++){ /* for each row */ nz = ai[i+1] - ai[i]; /* number of nonzeros */ if (nz == 0) nrows++; } /* if a large number of zero rows is found, use compressedrow data structure */ if (nrows < ratio*mbs) { compressedrow->use = PETSC_FALSE; ierr = PetscInfo3(A,"Found the ratio (num_zerorows %d)/(num_localrows %d) < %G. Do not use CompressedRow routines.\n",nrows,mbs,ratio);CHKERRQ(ierr); } else { compressedrow->use = PETSC_TRUE; ierr = PetscInfo3(A,"Found the ratio (num_zerorows %d)/(num_localrows %d) > %G. Use CompressedRow routines.\n",nrows,mbs,ratio);CHKERRQ(ierr); /* set compressed row format */ nrows = mbs - nrows; /* num of non-zero rows */ ierr = PetscMalloc2(nrows+1,PetscInt,&cpi,nrows,PetscInt,&ridx);CHKERRQ(ierr); row = 0; cpi[0] = 0; for (i=0; i<mbs; i++){ nz = ai[i+1] - ai[i]; if (nz == 0) continue; cpi[row+1] = ai[i+1]; /* compressed row pointer */ ridx[row++] = i; /* compressed row local index */ } compressedrow->nrows = nrows; compressedrow->i = cpi; compressedrow->rindex = ridx; } PetscFunctionReturn(0); }
static PetscErrorCode TaoSolve_SSILS(Tao tao) { TAO_SSLS *ssls = (TAO_SSLS *)tao->data; PetscReal psi, ndpsi, normd, innerd, t=0; PetscReal delta, rho; PetscInt iter=0,kspits; TaoConvergedReason reason; TaoLineSearchConvergedReason ls_reason; PetscErrorCode ierr; PetscFunctionBegin; /* Assume that Setup has been called! Set the structure for the Jacobian and create a linear solver. */ delta = ssls->delta; rho = ssls->rho; ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr); ierr = VecMedian(tao->XL,tao->solution,tao->XU,tao->solution);CHKERRQ(ierr); ierr = TaoLineSearchSetObjectiveAndGradientRoutine(tao->linesearch,Tao_SSLS_FunctionGradient,tao);CHKERRQ(ierr); ierr = TaoLineSearchSetObjectiveRoutine(tao->linesearch,Tao_SSLS_Function,tao);CHKERRQ(ierr); /* Calculate the function value and fischer function value at the current iterate */ ierr = TaoLineSearchComputeObjectiveAndGradient(tao->linesearch,tao->solution,&psi,ssls->dpsi);CHKERRQ(ierr); ierr = VecNorm(ssls->dpsi,NORM_2,&ndpsi);CHKERRQ(ierr); while (1) { ierr=PetscInfo3(tao, "iter: %D, merit: %g, ndpsi: %g\n",iter, (double)ssls->merit, (double)ndpsi);CHKERRQ(ierr); /* Check the termination criteria */ ierr = TaoMonitor(tao,iter++,ssls->merit,ndpsi,0.0,t,&reason);CHKERRQ(ierr); if (reason!=TAO_CONTINUE_ITERATING) break; /* Calculate direction. (Really negative of newton direction. Therefore, rest of the code uses -d.) */ ierr = KSPSetOperators(tao->ksp,tao->jacobian,tao->jacobian_pre);CHKERRQ(ierr); ierr = KSPSolve(tao->ksp,ssls->ff,tao->stepdirection);CHKERRQ(ierr); ierr = KSPGetIterationNumber(tao->ksp,&kspits);CHKERRQ(ierr); tao->ksp_its+=kspits; ierr = VecNorm(tao->stepdirection,NORM_2,&normd);CHKERRQ(ierr); ierr = VecDot(tao->stepdirection,ssls->dpsi,&innerd);CHKERRQ(ierr); /* Make sure that we have a descent direction */ if (innerd <= delta*pow(normd, rho)) { ierr = PetscInfo(tao, "newton direction not descent\n");CHKERRQ(ierr); ierr = VecCopy(ssls->dpsi,tao->stepdirection);CHKERRQ(ierr); ierr = VecDot(tao->stepdirection,ssls->dpsi,&innerd);CHKERRQ(ierr); } ierr = VecScale(tao->stepdirection, -1.0);CHKERRQ(ierr); innerd = -innerd; ierr = TaoLineSearchSetInitialStepLength(tao->linesearch,1.0); ierr = TaoLineSearchApply(tao->linesearch,tao->solution,&psi,ssls->dpsi,tao->stepdirection,&t,&ls_reason);CHKERRQ(ierr); ierr = VecNorm(ssls->dpsi,NORM_2,&ndpsi);CHKERRQ(ierr); } PetscFunctionReturn(0); }
static void CholmodErrorHandler(int status,const char *file,int line,const char *message) { PetscFunctionBegin; if (status > CHOLMOD_OK) { PetscInfo4(static_F,"CHOLMOD warning %d at %s:%d: %s",status,file,line,message); } else if (status == CHOLMOD_OK) { /* Documentation says this can happen, but why? */ PetscInfo3(static_F,"CHOLMOD OK at %s:%d: %s",file,line,message); } else { PetscErrorPrintf("CHOLMOD error %d at %s:%d: %s\n",status,file,line,message); } PetscFunctionReturnVoid(); }
static PetscErrorCode TSAdaptChoose_CFL(TSAdapt adapt,TS ts,PetscReal h,PetscInt *next_sc,PetscReal *next_h,PetscBool *accept,PetscReal *wlte) { TSAdapt_CFL *cfl = (TSAdapt_CFL*)adapt->data; PetscErrorCode ierr; PetscReal hcfl,cfltime; PetscInt stepno,ncandidates; const PetscInt *order; const PetscReal *ccfl; PetscFunctionBegin; ierr = TSGetTimeStepNumber(ts,&stepno);CHKERRQ(ierr); ierr = TSGetCFLTime(ts,&cfltime);CHKERRQ(ierr); ierr = TSAdaptCandidatesGet(adapt,&ncandidates,&order,NULL,&ccfl,NULL);CHKERRQ(ierr); hcfl = cfl->safety * cfltime * ccfl[0]; if (hcfl < adapt->dt_min) { ierr = PetscInfo4(adapt,"Cannot satisfy CFL constraint %g (with %g safety) at minimum time step %g with method coefficient %g, proceding anyway\n",(double)cfltime,(double)cfl->safety,(double)adapt->dt_min,(double)ccfl[0]);CHKERRQ(ierr); } if (h > cfltime * ccfl[0]) { if (cfl->always_accept) { ierr = PetscInfo3(adapt,"Step length %g with scheme of CFL coefficient %g did not satisfy user-provided CFL constraint %g, proceeding anyway\n",(double)h,(double)ccfl[0],(double)cfltime);CHKERRQ(ierr); } else { ierr = PetscInfo3(adapt,"Step length %g with scheme of CFL coefficient %g did not satisfy user-provided CFL constraint %g, step REJECTED\n",(double)h,(double)ccfl[0],(double)cfltime);CHKERRQ(ierr); *next_sc = 0; *next_h = PetscClipInterval(hcfl,adapt->dt_min,adapt->dt_max); *accept = PETSC_FALSE; } } *next_sc = 0; *next_h = PetscClipInterval(hcfl,adapt->dt_min,adapt->dt_max); *accept = PETSC_TRUE; *wlte = -1; /* Weighted local truncation error was not evaluated */ PetscFunctionReturn(0); }
/*@C PetscDrawSetSave - Saves images produced in a PetscDraw into a file Collective on PetscDraw Input Parameter: + draw - the graphics context . filename - name of the file, if .ext then uses name of draw object plus .ext using .ext to determine the image type - movieext - if not NULL, produces a movie of all the images Options Database Command: + -draw_save <filename> - filename could be name.ext or .ext (where .ext determines the type of graphics file to save, for example .png) . -draw_save_movie <.ext> - saves a movie to filename.ext . -draw_save_final_image [optional filename] - saves the final image displayed in a window - -draw_save_single_file - saves each new image in the same file, normally each new image is saved in a new file with filename/filename_%d.ext Level: intermediate Concepts: X windows^graphics Notes: You should call this BEFORE creating your image and calling PetscDrawSave(). The supported image types are .png, .gif, .jpg, and .ppm (PETSc chooses the default in that order). Support for .png images requires configure --with-libpng. Support for .gif images requires configure --with-giflib. Support for .jpg images requires configure --with-libjpeg. Support for .ppm images is built-in. The PPM format has no compression (640x480 pixels ~ 900 KiB). The ffmpeg utility must be in your path to make the movie. .seealso: PetscDrawSetFromOptions(), PetscDrawCreate(), PetscDrawDestroy(), PetscDrawSetSaveFinalImage() @*/ PetscErrorCode PetscDrawSetSave(PetscDraw draw,const char filename[],const char movieext[]) { const char *savename = NULL; const char *imageext = NULL; char buf[PETSC_MAX_PATH_LEN]; PetscErrorCode ierr; PetscFunctionBegin; PetscValidHeaderSpecific(draw,PETSC_DRAW_CLASSID,1); if (filename) PetscValidCharPointer(filename,2); if (movieext) PetscValidCharPointer(movieext,2); /* determine save filename and image extension */ if (filename && filename[0]) { ierr = PetscStrchr(filename,'.',(char **)&imageext);CHKERRQ(ierr); if (!imageext) savename = filename; else if (imageext != filename) { size_t l1 = 0,l2 = 0; ierr = PetscStrlen(filename,&l1);CHKERRQ(ierr); ierr = PetscStrlen(imageext,&l2);CHKERRQ(ierr); ierr = PetscStrncpy(buf,filename,l1-l2+1);CHKERRQ(ierr); savename = buf; } } if (!savename) {ierr = PetscObjectGetName((PetscObject)draw,&savename);CHKERRQ(ierr);} ierr = PetscDrawImageCheckFormat(&imageext);CHKERRQ(ierr); if (movieext) {ierr = PetscDrawMovieCheckFormat(&movieext);CHKERRQ(ierr);} if (movieext) draw->savesinglefile = PETSC_FALSE; /* otherwise we cannot generage movies */ if (draw->savesinglefile) { ierr = PetscInfo2(NULL,"Will save image to file %s%s\n",savename,imageext);CHKERRQ(ierr); } else { ierr = PetscInfo3(NULL,"Will save images to file %s/%s_%%d%s\n",savename,savename,imageext);CHKERRQ(ierr); } if (movieext) { ierr = PetscInfo2(NULL,"Will save movie to file %s%s\n",savename,movieext);CHKERRQ(ierr); } draw->savefilecount = 0; ierr = PetscFree(draw->savefilename);CHKERRQ(ierr); ierr = PetscFree(draw->saveimageext);CHKERRQ(ierr); ierr = PetscFree(draw->savemovieext);CHKERRQ(ierr); ierr = PetscStrallocpy(savename,&draw->savefilename);CHKERRQ(ierr); ierr = PetscStrallocpy(imageext,&draw->saveimageext);CHKERRQ(ierr); ierr = PetscStrallocpy(movieext,&draw->savemovieext);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* SNESTR_Converged_Private -test convergence JUST for the trust region tolerance. */ static PetscErrorCode SNESTR_Converged_Private(SNES snes,PetscInt it,PetscReal xnorm,PetscReal pnorm,PetscReal fnorm,SNESConvergedReason *reason,void *dummy) { SNES_NEWTONTR *neP = (SNES_NEWTONTR*)snes->data; PetscErrorCode ierr; PetscFunctionBegin; *reason = SNES_CONVERGED_ITERATING; if (neP->delta < xnorm * snes->deltatol) { ierr = PetscInfo3(snes,"Converged due to trust region param %g<%g*%g\n",(double)neP->delta,(double)xnorm,(double)snes->deltatol);CHKERRQ(ierr); *reason = SNES_CONVERGED_TR_DELTA; } else if (snes->nfuncs >= snes->max_funcs && snes->max_funcs >= 0) { ierr = PetscInfo1(snes,"Exceeded maximum number of function evaluations: %D\n",snes->max_funcs);CHKERRQ(ierr); *reason = SNES_DIVERGED_FUNCTION_COUNT; } PetscFunctionReturn(0); }
static PetscErrorCode TSStep_Pseudo(TS ts) { TS_Pseudo *pseudo = (TS_Pseudo*)ts->data; PetscInt its,lits,reject; PetscBool stepok; PetscReal next_time_step; SNESConvergedReason snesreason = SNES_CONVERGED_ITERATING; PetscErrorCode ierr; PetscFunctionBegin; if (ts->steps == 0) pseudo->dt_initial = ts->time_step; ierr = VecCopy(ts->vec_sol,pseudo->update);CHKERRQ(ierr); next_time_step = ts->time_step; ierr = TSPseudoComputeTimeStep(ts,&next_time_step);CHKERRQ(ierr); for (reject=0; reject<ts->max_reject; reject++,ts->reject++) { ts->time_step = next_time_step; ierr = TSPreStep(ts);CHKERRQ(ierr); ierr = TSPreStage(ts,ts->ptime+ts->time_step);CHKERRQ(ierr); ierr = SNESSolve(ts->snes,NULL,pseudo->update);CHKERRQ(ierr); ierr = SNESGetConvergedReason(ts->snes,&snesreason);CHKERRQ(ierr); ierr = SNESGetLinearSolveIterations(ts->snes,&lits);CHKERRQ(ierr); ierr = SNESGetIterationNumber(ts->snes,&its);CHKERRQ(ierr); ierr = TSPostStage(ts,ts->ptime+ts->time_step,0,&(pseudo->update));CHKERRQ(ierr); ts->snes_its += its; ts->ksp_its += lits; ierr = PetscInfo3(ts,"step=%D, nonlinear solve iterations=%D, linear solve iterations=%D\n",ts->steps,its,lits);CHKERRQ(ierr); pseudo->fnorm = -1; /* The current norm is no longer valid, monitor must recompute it. */ ierr = TSPseudoVerifyTimeStep(ts,pseudo->update,&next_time_step,&stepok);CHKERRQ(ierr); if (stepok) break; } if (snesreason < 0 && ts->max_snes_failures > 0 && ++ts->num_snes_failures >= ts->max_snes_failures) { ts->reason = TS_DIVERGED_NONLINEAR_SOLVE; ierr = PetscInfo2(ts,"step=%D, nonlinear solve solve failures %D greater than current TS allowed, stopping solve\n",ts->steps,ts->num_snes_failures);CHKERRQ(ierr); PetscFunctionReturn(0); } if (reject >= ts->max_reject) { ts->reason = TS_DIVERGED_STEP_REJECTED; ierr = PetscInfo2(ts,"step=%D, step rejections %D greater than current TS allowed, stopping solve\n",ts->steps,reject);CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = VecCopy(pseudo->update,ts->vec_sol);CHKERRQ(ierr); ts->ptime += ts->time_step; ts->time_step = next_time_step; ts->steps++; PetscFunctionReturn(0); }
/*@C SNESDefaultConverged - Convergence test of the solvers for systems of nonlinear equations (default). Collective on SNES Input Parameters: + snes - the SNES context . it - the iteration (0 indicates before any Newton steps) . xnorm - 2-norm of current iterate . snorm - 2-norm of current step . fnorm - 2-norm of function at current iterate - dummy - unused context Output Parameter: . reason - one of $ SNES_CONVERGED_FNORM_ABS - (fnorm < abstol), $ SNES_CONVERGED_SNORM_RELATIVE - (snorm < stol*xnorm), $ SNES_CONVERGED_FNORM_RELATIVE - (fnorm < rtol*fnorm0), $ SNES_DIVERGED_FUNCTION_COUNT - (nfct > maxf), $ SNES_DIVERGED_FNORM_NAN - (fnorm == NaN), $ SNES_CONVERGED_ITERATING - (otherwise), where + maxf - maximum number of function evaluations, set with SNESSetTolerances() . nfct - number of function evaluations, . abstol - absolute function norm tolerance, set with SNESSetTolerances() - rtol - relative function norm tolerance, set with SNESSetTolerances() Level: intermediate .keywords: SNES, nonlinear, default, converged, convergence .seealso: SNESSetConvergenceTest() @*/ PetscErrorCode SNESDefaultConverged(SNES snes,PetscInt it,PetscReal xnorm,PetscReal snorm,PetscReal fnorm,SNESConvergedReason *reason,void *dummy) { PetscErrorCode ierr; PetscFunctionBegin; PetscValidHeaderSpecific(snes,SNES_CLASSID,1); PetscValidPointer(reason,6); *reason = SNES_CONVERGED_ITERATING; if (!it) { /* set parameter for default relative tolerance convergence test */ snes->ttol = fnorm*snes->rtol; } if (PetscIsInfOrNanReal(fnorm)) { ierr = PetscInfo(snes,"Failed to converged, function norm is NaN\n");CHKERRQ(ierr); *reason = SNES_DIVERGED_FNORM_NAN; } else if (fnorm < snes->abstol) { ierr = PetscInfo2(snes,"Converged due to function norm %14.12e < %14.12e\n",(double)fnorm,(double)snes->abstol);CHKERRQ(ierr); *reason = SNES_CONVERGED_FNORM_ABS; } else if (snes->nfuncs >= snes->max_funcs) { ierr = PetscInfo2(snes,"Exceeded maximum number of function evaluations: %D > %D\n",snes->nfuncs,snes->max_funcs);CHKERRQ(ierr); *reason = SNES_DIVERGED_FUNCTION_COUNT; } if (it && !*reason) { if (fnorm <= snes->ttol) { ierr = PetscInfo2(snes,"Converged due to function norm %14.12e < %14.12e (relative tolerance)\n",(double)fnorm,(double)snes->ttol);CHKERRQ(ierr); *reason = SNES_CONVERGED_FNORM_RELATIVE; } else if (snorm < snes->stol*xnorm) { ierr = PetscInfo3(snes,"Converged due to small update length: %14.12e < %14.12e * %14.12e\n",(double)snorm,(double)snes->stol,(double)xnorm);CHKERRQ(ierr); *reason = SNES_CONVERGED_SNORM_RELATIVE; } } PetscFunctionReturn(0); }
/* SNESSolve_NEWTONTR - Implements Newton's Method with a very simple trust region approach for solving systems of nonlinear equations. */ static PetscErrorCode SNESSolve_NEWTONTR(SNES snes) { SNES_NEWTONTR *neP = (SNES_NEWTONTR*)snes->data; Vec X,F,Y,G,Ytmp; PetscErrorCode ierr; PetscInt maxits,i,lits; PetscReal rho,fnorm,gnorm,gpnorm,xnorm=0,delta,nrm,ynorm,norm1; PetscScalar cnorm; KSP ksp; SNESConvergedReason reason = SNES_CONVERGED_ITERATING; PetscBool conv = PETSC_FALSE,breakout = PETSC_FALSE; PetscFunctionBegin; if (snes->xl || snes->xu || snes->ops->computevariablebounds) SETERRQ1(PetscObjectComm((PetscObject)snes),PETSC_ERR_ARG_WRONGSTATE, "SNES solver %s does not support bounds", ((PetscObject)snes)->type_name); maxits = snes->max_its; /* maximum number of iterations */ X = snes->vec_sol; /* solution vector */ F = snes->vec_func; /* residual vector */ Y = snes->work[0]; /* work vectors */ G = snes->work[1]; Ytmp = snes->work[2]; ierr = PetscObjectSAWsTakeAccess((PetscObject)snes);CHKERRQ(ierr); snes->iter = 0; ierr = PetscObjectSAWsGrantAccess((PetscObject)snes);CHKERRQ(ierr); if (!snes->vec_func_init_set) { ierr = SNESComputeFunction(snes,X,F);CHKERRQ(ierr); /* F(X) */ } else snes->vec_func_init_set = PETSC_FALSE; ierr = VecNorm(F,NORM_2,&fnorm);CHKERRQ(ierr); /* fnorm <- || F || */ SNESCheckFunctionNorm(snes,fnorm); ierr = VecNorm(X,NORM_2,&xnorm);CHKERRQ(ierr); /* fnorm <- || F || */ ierr = PetscObjectSAWsTakeAccess((PetscObject)snes);CHKERRQ(ierr); snes->norm = fnorm; ierr = PetscObjectSAWsGrantAccess((PetscObject)snes);CHKERRQ(ierr); delta = xnorm ? neP->delta0*xnorm : neP->delta0; neP->delta = delta; ierr = SNESLogConvergenceHistory(snes,fnorm,0);CHKERRQ(ierr); ierr = SNESMonitor(snes,0,fnorm);CHKERRQ(ierr); /* test convergence */ ierr = (*snes->ops->converged)(snes,snes->iter,0.0,0.0,fnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr); if (snes->reason) PetscFunctionReturn(0); /* Set the stopping criteria to use the More' trick. */ ierr = PetscOptionsGetBool(((PetscObject)snes)->options,((PetscObject)snes)->prefix,"-snes_tr_ksp_regular_convergence_test",&conv,NULL);CHKERRQ(ierr); if (!conv) { SNES_TR_KSPConverged_Ctx *ctx; ierr = SNESGetKSP(snes,&ksp);CHKERRQ(ierr); ierr = PetscNew(&ctx);CHKERRQ(ierr); ctx->snes = snes; ierr = KSPConvergedDefaultCreate(&ctx->ctx);CHKERRQ(ierr); ierr = KSPSetConvergenceTest(ksp,SNESTR_KSPConverged_Private,ctx,SNESTR_KSPConverged_Destroy);CHKERRQ(ierr); ierr = PetscInfo(snes,"Using Krylov convergence test SNESTR_KSPConverged_Private\n");CHKERRQ(ierr); } for (i=0; i<maxits; i++) { /* Call general purpose update function */ if (snes->ops->update) { ierr = (*snes->ops->update)(snes, snes->iter);CHKERRQ(ierr); } /* Solve J Y = F, where J is Jacobian matrix */ ierr = SNESComputeJacobian(snes,X,snes->jacobian,snes->jacobian_pre);CHKERRQ(ierr); SNESCheckJacobianDomainerror(snes); ierr = KSPSetOperators(snes->ksp,snes->jacobian,snes->jacobian_pre);CHKERRQ(ierr); ierr = KSPSolve(snes->ksp,F,Ytmp);CHKERRQ(ierr); ierr = KSPGetIterationNumber(snes->ksp,&lits);CHKERRQ(ierr); snes->linear_its += lits; ierr = PetscInfo2(snes,"iter=%D, linear solve iterations=%D\n",snes->iter,lits);CHKERRQ(ierr); ierr = VecNorm(Ytmp,NORM_2,&nrm);CHKERRQ(ierr); norm1 = nrm; while (1) { ierr = VecCopy(Ytmp,Y);CHKERRQ(ierr); nrm = norm1; /* Scale Y if need be and predict new value of F norm */ if (nrm >= delta) { nrm = delta/nrm; gpnorm = (1.0 - nrm)*fnorm; cnorm = nrm; ierr = PetscInfo1(snes,"Scaling direction by %g\n",(double)nrm);CHKERRQ(ierr); ierr = VecScale(Y,cnorm);CHKERRQ(ierr); nrm = gpnorm; ynorm = delta; } else { gpnorm = 0.0; ierr = PetscInfo(snes,"Direction is in Trust Region\n");CHKERRQ(ierr); ynorm = nrm; } ierr = VecAYPX(Y,-1.0,X);CHKERRQ(ierr); /* Y <- X - Y */ ierr = VecCopy(X,snes->vec_sol_update);CHKERRQ(ierr); ierr = SNESComputeFunction(snes,Y,G);CHKERRQ(ierr); /* F(X) */ ierr = VecNorm(G,NORM_2,&gnorm);CHKERRQ(ierr); /* gnorm <- || g || */ if (fnorm == gpnorm) rho = 0.0; else rho = (fnorm*fnorm - gnorm*gnorm)/(fnorm*fnorm - gpnorm*gpnorm); /* Update size of trust region */ if (rho < neP->mu) delta *= neP->delta1; else if (rho < neP->eta) delta *= neP->delta2; else delta *= neP->delta3; ierr = PetscInfo3(snes,"fnorm=%g, gnorm=%g, ynorm=%g\n",(double)fnorm,(double)gnorm,(double)ynorm);CHKERRQ(ierr); ierr = PetscInfo3(snes,"gpred=%g, rho=%g, delta=%g\n",(double)gpnorm,(double)rho,(double)delta);CHKERRQ(ierr); neP->delta = delta; if (rho > neP->sigma) break; ierr = PetscInfo(snes,"Trying again in smaller region\n");CHKERRQ(ierr); /* check to see if progress is hopeless */ neP->itflag = PETSC_FALSE; ierr = SNESTR_Converged_Private(snes,snes->iter,xnorm,ynorm,fnorm,&reason,snes->cnvP);CHKERRQ(ierr); if (!reason) { ierr = (*snes->ops->converged)(snes,snes->iter,xnorm,ynorm,fnorm,&reason,snes->cnvP);CHKERRQ(ierr); } if (reason) { /* We're not progressing, so return with the current iterate */ ierr = SNESMonitor(snes,i+1,fnorm);CHKERRQ(ierr); breakout = PETSC_TRUE; break; } snes->numFailures++; } if (!breakout) { /* Update function and solution vectors */ fnorm = gnorm; ierr = VecCopy(G,F);CHKERRQ(ierr); ierr = VecCopy(Y,X);CHKERRQ(ierr); /* Monitor convergence */ ierr = PetscObjectSAWsTakeAccess((PetscObject)snes);CHKERRQ(ierr); snes->iter = i+1; snes->norm = fnorm; snes->xnorm = xnorm; snes->ynorm = ynorm; ierr = PetscObjectSAWsGrantAccess((PetscObject)snes);CHKERRQ(ierr); ierr = SNESLogConvergenceHistory(snes,snes->norm,lits);CHKERRQ(ierr); ierr = SNESMonitor(snes,snes->iter,snes->norm);CHKERRQ(ierr); /* Test for convergence, xnorm = || X || */ neP->itflag = PETSC_TRUE; if (snes->ops->converged != SNESConvergedSkip) { ierr = VecNorm(X,NORM_2,&xnorm);CHKERRQ(ierr); } ierr = (*snes->ops->converged)(snes,snes->iter,xnorm,ynorm,fnorm,&reason,snes->cnvP);CHKERRQ(ierr); if (reason) break; } else break; } if (i == maxits) { ierr = PetscInfo1(snes,"Maximum number of iterations has been reached: %D\n",maxits);CHKERRQ(ierr); if (!reason) reason = SNES_DIVERGED_MAX_IT; } ierr = PetscObjectSAWsTakeAccess((PetscObject)snes);CHKERRQ(ierr); snes->reason = reason; ierr = PetscObjectSAWsGrantAccess((PetscObject)snes);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode TaoSolve_ASILS(Tao tao) { TAO_SSLS *asls = (TAO_SSLS *)tao->data; PetscReal psi,ndpsi, normd, innerd, t=0; PetscInt iter=0, nf; PetscErrorCode ierr; TaoConvergedReason reason; TaoLineSearchConvergedReason ls_reason; PetscFunctionBegin; /* Assume that Setup has been called! Set the structure for the Jacobian and create a linear solver. */ ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr); ierr = TaoLineSearchSetObjectiveAndGradientRoutine(tao->linesearch,Tao_ASLS_FunctionGradient,tao);CHKERRQ(ierr); ierr = TaoLineSearchSetObjectiveRoutine(tao->linesearch,Tao_SSLS_Function,tao);CHKERRQ(ierr); /* Calculate the function value and fischer function value at the current iterate */ ierr = TaoLineSearchComputeObjectiveAndGradient(tao->linesearch,tao->solution,&psi,asls->dpsi);CHKERRQ(ierr); ierr = VecNorm(asls->dpsi,NORM_2,&ndpsi);CHKERRQ(ierr); while (1) { /* Check the termination criteria */ ierr = PetscInfo3(tao,"iter %D, merit: %g, ||dpsi||: %g\n",iter, (double)asls->merit, (double)ndpsi);CHKERRQ(ierr); ierr = TaoMonitor(tao, iter++, asls->merit, ndpsi, 0.0, t, &reason);CHKERRQ(ierr); if (TAO_CONTINUE_ITERATING != reason) break; /* We are going to solve a linear system of equations. We need to set the tolerances for the solve so that we maintain an asymptotic rate of convergence that is superlinear. Note: these tolerances are for the reduced system. We really need to make sure that the full system satisfies the full-space conditions. This rule gives superlinear asymptotic convergence asls->atol = min(0.5, asls->merit*sqrt(asls->merit)); asls->rtol = 0.0; This rule gives quadratic asymptotic convergence asls->atol = min(0.5, asls->merit*asls->merit); asls->rtol = 0.0; Calculate a free and fixed set of variables. The fixed set of variables are those for the d_b is approximately equal to zero. The definition of approximately changes as we approach the solution to the problem. No one rule is guaranteed to work in all cases. The following definition is based on the norm of the Jacobian matrix. If the norm is large, the tolerance becomes smaller. */ ierr = MatNorm(tao->jacobian,NORM_1,&asls->identifier);CHKERRQ(ierr); asls->identifier = PetscMin(asls->merit, 1e-2) / (1 + asls->identifier); ierr = VecSet(asls->t1,-asls->identifier);CHKERRQ(ierr); ierr = VecSet(asls->t2, asls->identifier);CHKERRQ(ierr); ierr = ISDestroy(&asls->fixed);CHKERRQ(ierr); ierr = ISDestroy(&asls->free);CHKERRQ(ierr); ierr = VecWhichBetweenOrEqual(asls->t1, asls->db, asls->t2, &asls->fixed);CHKERRQ(ierr); ierr = ISComplementVec(asls->fixed,asls->t1, &asls->free);CHKERRQ(ierr); ierr = ISGetSize(asls->fixed,&nf);CHKERRQ(ierr); ierr = PetscInfo1(tao,"Number of fixed variables: %D\n", nf);CHKERRQ(ierr); /* We now have our partition. Now calculate the direction in the fixed variable space. */ ierr = TaoVecGetSubVec(asls->ff, asls->fixed, tao->subset_type, 0.0, &asls->r1); ierr = TaoVecGetSubVec(asls->da, asls->fixed, tao->subset_type, 1.0, &asls->r2); ierr = VecPointwiseDivide(asls->r1,asls->r1,asls->r2);CHKERRQ(ierr); ierr = VecSet(tao->stepdirection,0.0);CHKERRQ(ierr); ierr = VecISAXPY(tao->stepdirection, asls->fixed,1.0,asls->r1);CHKERRQ(ierr); /* Our direction in the Fixed Variable Set is fixed. Calculate the information needed for the step in the Free Variable Set. To do this, we need to know the diagonal perturbation and the right hand side. */ ierr = TaoVecGetSubVec(asls->da, asls->free, tao->subset_type, 0.0, &asls->r1);CHKERRQ(ierr); ierr = TaoVecGetSubVec(asls->ff, asls->free, tao->subset_type, 0.0, &asls->r2);CHKERRQ(ierr); ierr = TaoVecGetSubVec(asls->db, asls->free, tao->subset_type, 1.0, &asls->r3);CHKERRQ(ierr); ierr = VecPointwiseDivide(asls->r1,asls->r1, asls->r3);CHKERRQ(ierr); ierr = VecPointwiseDivide(asls->r2,asls->r2, asls->r3);CHKERRQ(ierr); /* r1 is the diagonal perturbation r2 is the right hand side r3 is no longer needed Now need to modify r2 for our direction choice in the fixed variable set: calculate t1 = J*d, take the reduced vector of t1 and modify r2. */ ierr = MatMult(tao->jacobian, tao->stepdirection, asls->t1);CHKERRQ(ierr); ierr = TaoVecGetSubVec(asls->t1,asls->free,tao->subset_type,0.0,&asls->r3);CHKERRQ(ierr); ierr = VecAXPY(asls->r2, -1.0, asls->r3);CHKERRQ(ierr); /* Calculate the reduced problem matrix and the direction */ if (!asls->w && (tao->subset_type == TAO_SUBSET_MASK || tao->subset_type == TAO_SUBSET_MATRIXFREE)) { ierr = VecDuplicate(tao->solution, &asls->w);CHKERRQ(ierr); } ierr = TaoMatGetSubMat(tao->jacobian, asls->free, asls->w, tao->subset_type,&asls->J_sub);CHKERRQ(ierr); if (tao->jacobian != tao->jacobian_pre) { ierr = TaoMatGetSubMat(tao->jacobian_pre, asls->free, asls->w, tao->subset_type, &asls->Jpre_sub);CHKERRQ(ierr); } else { ierr = MatDestroy(&asls->Jpre_sub);CHKERRQ(ierr); asls->Jpre_sub = asls->J_sub; ierr = PetscObjectReference((PetscObject)(asls->Jpre_sub));CHKERRQ(ierr); } ierr = MatDiagonalSet(asls->J_sub, asls->r1,ADD_VALUES);CHKERRQ(ierr); ierr = TaoVecGetSubVec(tao->stepdirection, asls->free, tao->subset_type, 0.0, &asls->dxfree);CHKERRQ(ierr); ierr = VecSet(asls->dxfree, 0.0);CHKERRQ(ierr); /* Calculate the reduced direction. (Really negative of Newton direction. Therefore, rest of the code uses -d.) */ ierr = KSPReset(tao->ksp); ierr = KSPSetOperators(tao->ksp, asls->J_sub, asls->Jpre_sub);CHKERRQ(ierr); ierr = KSPSolve(tao->ksp, asls->r2, asls->dxfree);CHKERRQ(ierr); /* Add the direction in the free variables back into the real direction. */ ierr = VecISAXPY(tao->stepdirection, asls->free, 1.0,asls->dxfree);CHKERRQ(ierr); /* Check the real direction for descent and if not, use the negative gradient direction. */ ierr = VecNorm(tao->stepdirection, NORM_2, &normd);CHKERRQ(ierr); ierr = VecDot(tao->stepdirection, asls->dpsi, &innerd);CHKERRQ(ierr); if (innerd <= asls->delta*pow(normd, asls->rho)) { ierr = PetscInfo1(tao,"Gradient direction: %5.4e.\n", (double)innerd);CHKERRQ(ierr); ierr = PetscInfo1(tao, "Iteration %D: newton direction not descent\n", iter);CHKERRQ(ierr); ierr = VecCopy(asls->dpsi, tao->stepdirection);CHKERRQ(ierr); ierr = VecDot(asls->dpsi, tao->stepdirection, &innerd);CHKERRQ(ierr); } ierr = VecScale(tao->stepdirection, -1.0);CHKERRQ(ierr); innerd = -innerd; /* We now have a correct descent direction. Apply a linesearch to find the new iterate. */ ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr); ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &psi,asls->dpsi, tao->stepdirection, &t, &ls_reason);CHKERRQ(ierr); ierr = VecNorm(asls->dpsi, NORM_2, &ndpsi);CHKERRQ(ierr); } PetscFunctionReturn(0); }
static PetscErrorCode SNESSolve_TR(SNES snes) { SNES_TR *neP = (SNES_TR*)snes->data; Vec X,F,Y,G,Ytmp; PetscErrorCode ierr; PetscInt maxits,i,lits; MatStructure flg = DIFFERENT_NONZERO_PATTERN; PetscReal rho,fnorm,gnorm,gpnorm,xnorm=0,delta,nrm,ynorm,norm1; PetscScalar cnorm; KSP ksp; SNESConvergedReason reason = SNES_CONVERGED_ITERATING; PetscBool conv = PETSC_FALSE,breakout = PETSC_FALSE; PetscBool domainerror; PetscFunctionBegin; maxits = snes->max_its; /* maximum number of iterations */ X = snes->vec_sol; /* solution vector */ F = snes->vec_func; /* residual vector */ Y = snes->work[0]; /* work vectors */ G = snes->work[1]; Ytmp = snes->work[2]; ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); snes->iter = 0; ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); if (!snes->vec_func_init_set) { ierr = SNESComputeFunction(snes,X,F);CHKERRQ(ierr); /* F(X) */ ierr = SNESGetFunctionDomainError(snes, &domainerror);CHKERRQ(ierr); if (domainerror) { snes->reason = SNES_DIVERGED_FUNCTION_DOMAIN; PetscFunctionReturn(0); } } else { snes->vec_func_init_set = PETSC_FALSE; } if (!snes->norm_init_set) { ierr = VecNorm(F,NORM_2,&fnorm);CHKERRQ(ierr); /* fnorm <- || F || */ if (PetscIsInfOrNanReal(fnorm)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"User provided compute function generated a Not-a-Number"); } else { fnorm = snes->norm_init; snes->norm_init_set = PETSC_FALSE; } ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); snes->norm = fnorm; ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); delta = neP->delta0*fnorm; neP->delta = delta; SNESLogConvHistory(snes,fnorm,0); ierr = SNESMonitor(snes,0,fnorm);CHKERRQ(ierr); /* set parameter for default relative tolerance convergence test */ snes->ttol = fnorm*snes->rtol; /* test convergence */ ierr = (*snes->ops->converged)(snes,snes->iter,0.0,0.0,fnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr); if (snes->reason) PetscFunctionReturn(0); /* Set the stopping criteria to use the More' trick. */ ierr = PetscOptionsGetBool(PETSC_NULL,"-snes_tr_ksp_regular_convergence_test",&conv,PETSC_NULL);CHKERRQ(ierr); if (!conv) { SNES_TR_KSPConverged_Ctx *ctx; ierr = SNESGetKSP(snes,&ksp);CHKERRQ(ierr); ierr = PetscNew(SNES_TR_KSPConverged_Ctx,&ctx);CHKERRQ(ierr); ctx->snes = snes; ierr = KSPDefaultConvergedCreate(&ctx->ctx);CHKERRQ(ierr); ierr = KSPSetConvergenceTest(ksp,SNES_TR_KSPConverged_Private,ctx,SNES_TR_KSPConverged_Destroy);CHKERRQ(ierr); ierr = PetscInfo(snes,"Using Krylov convergence test SNES_TR_KSPConverged_Private\n");CHKERRQ(ierr); } for (i=0; i<maxits; i++) { /* Call general purpose update function */ if (snes->ops->update) { ierr = (*snes->ops->update)(snes, snes->iter);CHKERRQ(ierr); } /* Solve J Y = F, where J is Jacobian matrix */ ierr = SNESComputeJacobian(snes,X,&snes->jacobian,&snes->jacobian_pre,&flg);CHKERRQ(ierr); ierr = KSPSetOperators(snes->ksp,snes->jacobian,snes->jacobian_pre,flg);CHKERRQ(ierr); ierr = SNES_KSPSolve(snes,snes->ksp,F,Ytmp);CHKERRQ(ierr); ierr = KSPGetIterationNumber(snes->ksp,&lits);CHKERRQ(ierr); snes->linear_its += lits; ierr = PetscInfo2(snes,"iter=%D, linear solve iterations=%D\n",snes->iter,lits);CHKERRQ(ierr); ierr = VecNorm(Ytmp,NORM_2,&nrm);CHKERRQ(ierr); norm1 = nrm; while(1) { ierr = VecCopy(Ytmp,Y);CHKERRQ(ierr); nrm = norm1; /* Scale Y if need be and predict new value of F norm */ if (nrm >= delta) { nrm = delta/nrm; gpnorm = (1.0 - nrm)*fnorm; cnorm = nrm; ierr = PetscInfo1(snes,"Scaling direction by %G\n",nrm);CHKERRQ(ierr); ierr = VecScale(Y,cnorm);CHKERRQ(ierr); nrm = gpnorm; ynorm = delta; } else { gpnorm = 0.0; ierr = PetscInfo(snes,"Direction is in Trust Region\n");CHKERRQ(ierr); ynorm = nrm; } ierr = VecAYPX(Y,-1.0,X);CHKERRQ(ierr); /* Y <- X - Y */ ierr = VecCopy(X,snes->vec_sol_update);CHKERRQ(ierr); ierr = SNESComputeFunction(snes,Y,G);CHKERRQ(ierr); /* F(X) */ ierr = VecNorm(G,NORM_2,&gnorm);CHKERRQ(ierr); /* gnorm <- || g || */ if (fnorm == gpnorm) rho = 0.0; else rho = (fnorm*fnorm - gnorm*gnorm)/(fnorm*fnorm - gpnorm*gpnorm); /* Update size of trust region */ if (rho < neP->mu) delta *= neP->delta1; else if (rho < neP->eta) delta *= neP->delta2; else delta *= neP->delta3; ierr = PetscInfo3(snes,"fnorm=%G, gnorm=%G, ynorm=%G\n",fnorm,gnorm,ynorm);CHKERRQ(ierr); ierr = PetscInfo3(snes,"gpred=%G, rho=%G, delta=%G\n",gpnorm,rho,delta);CHKERRQ(ierr); neP->delta = delta; if (rho > neP->sigma) break; ierr = PetscInfo(snes,"Trying again in smaller region\n");CHKERRQ(ierr); /* check to see if progress is hopeless */ neP->itflag = PETSC_FALSE; ierr = SNES_TR_Converged_Private(snes,snes->iter,xnorm,ynorm,fnorm,&reason,snes->cnvP);CHKERRQ(ierr); if (!reason) { ierr = (*snes->ops->converged)(snes,snes->iter,xnorm,ynorm,fnorm,&reason,snes->cnvP);CHKERRQ(ierr); } if (reason) { /* We're not progressing, so return with the current iterate */ ierr = SNESMonitor(snes,i+1,fnorm);CHKERRQ(ierr); breakout = PETSC_TRUE; break; } snes->numFailures++; } if (!breakout) { /* Update function and solution vectors */ fnorm = gnorm; ierr = VecCopy(G,F);CHKERRQ(ierr); ierr = VecCopy(Y,X);CHKERRQ(ierr); /* Monitor convergence */ ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); snes->iter = i+1; snes->norm = fnorm; ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); SNESLogConvHistory(snes,snes->norm,lits); ierr = SNESMonitor(snes,snes->iter,snes->norm);CHKERRQ(ierr); /* Test for convergence, xnorm = || X || */ neP->itflag = PETSC_TRUE; if (snes->ops->converged != SNESSkipConverged) { ierr = VecNorm(X,NORM_2,&xnorm);CHKERRQ(ierr); } ierr = (*snes->ops->converged)(snes,snes->iter,xnorm,ynorm,fnorm,&reason,snes->cnvP);CHKERRQ(ierr); if (reason) break; } else { break; } } if (i == maxits) { ierr = PetscInfo1(snes,"Maximum number of iterations has been reached: %D\n",maxits);CHKERRQ(ierr); if (!reason) reason = SNES_DIVERGED_MAX_IT; } ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); snes->reason = reason; ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2; PetscBool row_identity,col_identity,both_identity; IS isicol; PetscErrorCode ierr; const PetscInt *r,*ic; PetscInt i,*ai=a->i,*aj=a->j; PetscInt *bi,*bj,*ajtmp; PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; PetscReal f; PetscInt nlnk,*lnk,k,**bi_ptr; PetscFreeSpaceList free_space=NULL,current_space=NULL; PetscBT lnkbt; PetscBool missing; PetscFunctionBegin; if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); bi[0] = bdiag[0] = 0; /* linked list for storing column indices of the active row */ nlnk = n + 1; ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); /* initial FreeSpace size is f*(ai[n]+1) */ f = info->fill; ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); current_space = free_space; for (i=0; i<n; i++) { /* copy previous fill into linked list */ nzi = 0; nnz = ai[r[i]+1] - ai[r[i]]; ajtmp = aj + ai[r[i]]; ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); nzi += nlnk; /* add pivot rows into linked list */ row = lnk[n]; while (row < i) { nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); nzi += nlnk; row = lnk[row]; } bi[i+1] = bi[i] + nzi; im[i] = nzi; /* mark bdiag */ nzbd = 0; nnz = nzi; k = lnk[n]; while (nnz-- && k < i) { nzbd++; k = lnk[k]; } bdiag[i] = bi[i] + nzbd; /* if free space is not available, make more free space */ if (current_space->local_remaining<nzi) { nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */ ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); reallocs++; } /* copy data into free space, then initialize lnk */ ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); bi_ptr[i] = current_space->array; current_space->array += nzi; current_space->local_used += nzi; current_space->local_remaining -= nzi; } #if defined(PETSC_USE_INFO) if (ai[n] != 0) { PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); } else { ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); } #endif ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); /* destroy list of free space and other temporary array(s) */ ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); /* put together the new matrix */ ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); b = (Mat_SeqBAIJ*)(B)->data; b->free_a = PETSC_TRUE; b->free_ij = PETSC_TRUE; b->singlemalloc = PETSC_FALSE; ierr = PetscMalloc1((bi[n]+1)*bs2,&b->a);CHKERRQ(ierr); b->j = bj; b->i = bi; b->diag = bdiag; b->free_diag = PETSC_TRUE; b->ilen = 0; b->imax = 0; b->row = isrow; b->col = iscol; b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); b->icol = isicol; ierr = PetscMalloc1(bs*n+bs,&b->solve_work);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); b->maxnz = b->nz = bi[n]; (B)->factortype = MAT_FACTOR_LU; (B)->info.factor_mallocs = reallocs; (B)->info.fill_ratio_given = f; if (ai[n] != 0) { (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); } else { (B)->info.fill_ratio_needed = 0.0; } ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); both_identity = (PetscBool) (row_identity && col_identity); ierr = MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);CHKERRQ(ierr); PetscFunctionReturn(0); }
static int TaoSolve_SSFLS(TAO_SOLVER tao, void *solver) { TAO_SSLS *ssls = (TAO_SSLS *)solver; // TaoLinearSolver *lsolver; TaoVec *x, *l, *u, *ff, *dpsi, *d, *w; TaoMat *J; double psi, psi_full, ndpsi, normd, innerd, t=0; double delta, rho; int iter=0, info; TaoTerminateReason reason; TaoTruth flag; TaoFunctionBegin; // Assume that Setup has been called! // Set the structure for the Jacobian and create a linear solver. delta = ssls->delta; rho = ssls->rho; info = TaoGetSolution(tao, &x); CHKERRQ(info); l=ssls->xl; u=ssls->xu; info = TaoEvaluateVariableBounds(tao,l,u); CHKERRQ(info); info = x->Median(l,x,u); CHKERRQ(info); info = TaoGetJacobian(tao, &J); CHKERRQ(info); ff = ssls->ff; dpsi = ssls->dpsi; d = ssls->d; w = ssls->w; info = x->PointwiseMaximum(x, l); CHKERRQ(info); info = x->PointwiseMinimum(x, u); CHKERRQ(info); info = TaoSetMeritFunction(tao, Tao_SSLS_Function, Tao_SSLS_FunctionGradient, TAO_NULL, TAO_NULL, TAO_NULL, ssls); CHKERRQ(info); // Calculate the function value and fischer function value at the // current iterate info = TaoComputeMeritFunctionGradient(tao, x, &psi, dpsi); CHKERRQ(info); info = dpsi->Norm2(&ndpsi); while (1) { info=PetscInfo3(tao, "TaoSolve_SSFLS: %d: merit: %5.4e, ndpsi: %5.4e\n", iter, ssls->merit, ndpsi);CHKERRQ(info); // Check the termination criteria info = TaoMonitor(tao,iter++,ssls->merit,ndpsi,0.0,t,&reason); CHKERRQ(info); if (reason!=TAO_CONTINUE_ITERATING) break; // Calculate direction. (Really negative of newton direction. Therefore, // rest of the code uses -d.) info = TaoPreLinearSolve(tao, J); CHKERRQ(info); info = TaoLinearSolve(tao, J, ff, d, &flag); CHKERRQ(info); info = w->CopyFrom(d); CHKERRQ(info); info = w->Negate(); CHKERRQ(info); info = w->BoundGradientProjection(w,l, x, u); info = w->Norm2(&normd); CHKERRQ(info); info = w->Dot(dpsi, &innerd); CHKERRQ(info); // Make sure that we have a descent direction if (innerd >= -delta*pow(normd, rho)) { info = PetscInfo1(tao, "TaoSolve_SSFLS: %d: newton direction not descent\n", iter); CHKERRQ(info); info = d->CopyFrom(dpsi); CHKERRQ(info); info = w->Dot(dpsi, &innerd); CHKERRQ(info); } info = d->Negate(); CHKERRQ(info); innerd = -innerd; t = 1; info = TaoLineSearchApply(tao, x, dpsi, d, w, &psi, &psi_full, &t, &tao->lsflag); CHKERRQ(info); info = dpsi->Norm2(&ndpsi); } TaoFunctionReturn(0); }
PetscErrorCode FiberField_AddToSendbufs( FiberField field ) { int i; int e; const int vlen = ArrayLength(field->verts); const int elen = ArrayLength(field->edges); const BoundingBox lbbox = field->localBounds; int neiIdx; VertexEdgeMPI *evmpi; iCoor n; // index in 3x3x3 array nei Vertex v; PetscMPIInt sendRank; const PetscMPIInt *neiRanks; PetscErrorCode ierr; PetscFunctionBegin; ierr = DMDAGetNeighbors(field->da, &neiRanks); CHKERRQ(ierr); // clear send arrays // for each vert // if outside nei, err // else add vert to send list for (i = 0; i < NUMNEI; i++) { ArraySetSize( field->sendbufs[i], 0); } for (i = 0; i < vlen; i++) { ierr = ArrayGet( field->verts, i, &v ); CHKERRQ(ierr); PositionToNeiIdx( &lbbox, &v->X, &n, &neiIdx); // if vertex outside 3x3x3 nei, something went terribly wrong if (n.x < 0 || n.x > 2 || n.y < 0 || n.y > 2 || n.z < 0 || n.z > 2 ) { ierr = PetscInfo(0, "ERROR: Vertex outside 3x3x3 neighbor region\n"); CHKERRQ(ierr); ierr = PetscInfo1(0, "i = %d\n",i); CHKERRQ(ierr); ierr = PetscInfo3(0, "X = {%f, %f, %f}\n",v->X.x,v->X.y,v->X.z); CHKERRQ(ierr); ierr = PetscInfo3(0, "n = {%d, %d, %d}\n",n.x,n.y,n.z); CHKERRQ(ierr); ierr = PetscInfo(0, "ERROR: END MESSAGE\n"); CHKERRQ(ierr); SETERRQ(field->comm, 0, "Vertex outside 3x3x3 neighbor region"); } else { // convert nei index to mpi rank sendRank = neiRanks[neiIdx]; // in the edge case where a vertex leaves the global bounding box, abort // handle this case in the physics, not in the communication routine if ( sendRank == MPI_PROC_NULL) { ierr = PetscInfo(0, "ERROR: Vertex outside global bbox\n"); CHKERRQ(ierr); ierr = PetscInfo1(0, "i = %d\n",i); CHKERRQ(ierr); ierr = PetscInfo3(0, "X = {%f, %f, %f}\n",v->X.x,v->X.y,v->X.z); CHKERRQ(ierr); ierr = PetscInfo3(0, "n = {%d, %d, %d}\n",n.x,n.y,n.z); CHKERRQ(ierr); ierr = PetscInfo1(0, "neiIdx = %d\n",neiIdx); CHKERRQ(ierr); ierr = PetscInfo(0, "ERROR: END MESSAGE\n"); CHKERRQ(ierr); SETERRQ(field->comm, 0, "Vertex outside global bbox\n"); } // add vertex to send list[rank] ierr = ArrayAppend( field->sendbufs[neiIdx], &evmpi); CHKERRQ(ierr); evmpi->xID = v->vID; evmpi->type= v->type; evmpi->X = v->X; evmpi->V = v->V; for (e = 0; e < MAXEDGES; e++) { evmpi->yIDs[e] = v->eID[e]; } } } int min; int vPO; struct _Edge *edges = ArrayGetData(field->edges); struct _Vertex *vertsPO; ierr = FiberFieldGetVertexArrayPO( field, &vertsPO ); CHKERRQ(ierr); for (e = 0; e < elen; e++) { // the edge is 'owned' by the vertex with the smallest ID min = edges[e].vID[0] < edges[e].vID[1] ? 0 : 1; vPO = edges[e].vPO[min]; v = &vertsPO[vPO]; PositionToNeiIdx( &lbbox, &v->X, &n, &neiIdx); if (v->vID != edges[e].vID[min] ) { ierr = PetscInfo1(0, "v->vID = %d\n", v->vID); CHKERRQ(ierr); ierr = PetscInfo1(0, "edges[e].vID[min] = %d\n", edges[e].vID[min]); CHKERRQ(ierr); SETERRQ(PETSC_COMM_SELF, 0, "Bad vertex"); } ierr = ArrayAppend( field->sendbufs[neiIdx], &evmpi); CHKERRQ(ierr); evmpi->xID = edges[e].eID; evmpi->type = edges[e].type; evmpi->yIDs[0] = edges[e].vID[0]; evmpi->yIDs[1] = edges[e].vID[1]; evmpi->X.x = edges[e].l0; } PetscFunctionReturn(0); }
PetscErrorCode KSPSolve_QCG(KSP ksp) { /* Correpondence with documentation above: B = g = gradient, X = s = step Note: This is not coded correctly for complex arithmetic! */ KSP_QCG *pcgP = (KSP_QCG*)ksp->data; Mat Amat,Pmat; Vec W,WA,WA2,R,P,ASP,BS,X,B; PetscScalar scal,beta,rntrn,step; PetscReal q1,q2,xnorm,step1,step2,rnrm,btx,xtax; PetscReal ptasp,rtr,wtasp,bstp; PetscReal dzero = 0.0,bsnrm; PetscErrorCode ierr; PetscInt i,maxit; PC pc = ksp->pc; PCSide side; PetscBool diagonalscale; PetscFunctionBegin; ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); if (ksp->transpose_solve) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Currently does not support transpose solve"); ksp->its = 0; maxit = ksp->max_it; WA = ksp->work[0]; R = ksp->work[1]; P = ksp->work[2]; ASP = ksp->work[3]; BS = ksp->work[4]; W = ksp->work[5]; WA2 = ksp->work[6]; X = ksp->vec_sol; B = ksp->vec_rhs; if (pcgP->delta <= dzero) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_OUTOFRANGE,"Input error: delta <= 0"); ierr = KSPGetPCSide(ksp,&side);CHKERRQ(ierr); if (side != PC_SYMMETRIC) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_OUTOFRANGE,"Requires symmetric preconditioner!"); /* Initialize variables */ ierr = VecSet(W,0.0);CHKERRQ(ierr); /* W = 0 */ ierr = VecSet(X,0.0);CHKERRQ(ierr); /* X = 0 */ ierr = PCGetOperators(pc,&Amat,&Pmat);CHKERRQ(ierr); /* Compute: BS = D^{-1} B */ ierr = PCApplySymmetricLeft(pc,B,BS);CHKERRQ(ierr); ierr = VecNorm(BS,NORM_2,&bsnrm);CHKERRQ(ierr); ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = bsnrm; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,bsnrm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,bsnrm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,0,bsnrm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); /* Compute the initial scaled direction and scaled residual */ ierr = VecCopy(BS,R);CHKERRQ(ierr); ierr = VecScale(R,-1.0);CHKERRQ(ierr); ierr = VecCopy(R,P);CHKERRQ(ierr); ierr = VecDotRealPart(R,R,&rtr);CHKERRQ(ierr); for (i=0; i<=maxit; i++) { ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its++; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); /* Compute: asp = D^{-T}*A*D^{-1}*p */ ierr = PCApplySymmetricRight(pc,P,WA);CHKERRQ(ierr); ierr = KSP_MatMult(ksp,Amat,WA,WA2);CHKERRQ(ierr); ierr = PCApplySymmetricLeft(pc,WA2,ASP);CHKERRQ(ierr); /* Check for negative curvature */ ierr = VecDotRealPart(P,ASP,&ptasp);CHKERRQ(ierr); if (ptasp <= dzero) { /* Scaled negative curvature direction: Compute a step so that ||w + step*p|| = delta and QS(w + step*p) is least */ if (!i) { ierr = VecCopy(P,X);CHKERRQ(ierr); ierr = VecNorm(X,NORM_2,&xnorm);CHKERRQ(ierr); scal = pcgP->delta / xnorm; ierr = VecScale(X,scal);CHKERRQ(ierr); } else { /* Compute roots of quadratic */ ierr = KSPQCGQuadraticRoots(W,P,pcgP->delta,&step1,&step2);CHKERRQ(ierr); ierr = VecDotRealPart(W,ASP,&wtasp);CHKERRQ(ierr); ierr = VecDotRealPart(BS,P,&bstp);CHKERRQ(ierr); ierr = VecCopy(W,X);CHKERRQ(ierr); q1 = step1*(bstp + wtasp + .5*step1*ptasp); q2 = step2*(bstp + wtasp + .5*step2*ptasp); if (q1 <= q2) { ierr = VecAXPY(X,step1,P);CHKERRQ(ierr); } else { ierr = VecAXPY(X,step2,P);CHKERRQ(ierr); } } pcgP->ltsnrm = pcgP->delta; /* convergence in direction of */ ksp->reason = KSP_CONVERGED_CG_NEG_CURVE; /* negative curvature */ if (!i) { ierr = PetscInfo1(ksp,"negative curvature: delta=%g\n",(double)pcgP->delta);CHKERRQ(ierr); } else { ierr = PetscInfo3(ksp,"negative curvature: step1=%g, step2=%g, delta=%g\n",(double)step1,(double)step2,(double)pcgP->delta);CHKERRQ(ierr); } } else { /* Compute step along p */ step = rtr/ptasp; ierr = VecCopy(W,X);CHKERRQ(ierr); /* x = w */ ierr = VecAXPY(X,step,P);CHKERRQ(ierr); /* x <- step*p + x */ ierr = VecNorm(X,NORM_2,&pcgP->ltsnrm);CHKERRQ(ierr); if (pcgP->ltsnrm > pcgP->delta) { /* Since the trial iterate is outside the trust region, evaluate a constrained step along p so that ||w + step*p|| = delta The positive step is always better in this case. */ if (!i) { scal = pcgP->delta / pcgP->ltsnrm; ierr = VecScale(X,scal);CHKERRQ(ierr); } else { /* Compute roots of quadratic */ ierr = KSPQCGQuadraticRoots(W,P,pcgP->delta,&step1,&step2);CHKERRQ(ierr); ierr = VecCopy(W,X);CHKERRQ(ierr); ierr = VecAXPY(X,step1,P);CHKERRQ(ierr); /* x <- step1*p + x */ } pcgP->ltsnrm = pcgP->delta; ksp->reason = KSP_CONVERGED_CG_CONSTRAINED; /* convergence along constrained step */ if (!i) { ierr = PetscInfo1(ksp,"constrained step: delta=%g\n",(double)pcgP->delta);CHKERRQ(ierr); } else { ierr = PetscInfo3(ksp,"constrained step: step1=%g, step2=%g, delta=%g\n",(double)step1,(double)step2,(double)pcgP->delta);CHKERRQ(ierr); } } else { /* Evaluate the current step */ ierr = VecCopy(X,W);CHKERRQ(ierr); /* update interior iterate */ ierr = VecAXPY(R,-step,ASP);CHKERRQ(ierr); /* r <- -step*asp + r */ ierr = VecNorm(R,NORM_2,&rnrm);CHKERRQ(ierr); ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->rnorm = rnrm; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,rnrm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i+1,rnrm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,rnrm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { /* convergence for */ ierr = PetscInfo3(ksp,"truncated step: step=%g, rnrm=%g, delta=%g\n",(double)PetscRealPart(step),(double)rnrm,(double)pcgP->delta);CHKERRQ(ierr); } } } if (ksp->reason) break; /* Convergence has been attained */ else { /* Compute a new AS-orthogonal direction */ ierr = VecDot(R,R,&rntrn);CHKERRQ(ierr); beta = rntrn/rtr; ierr = VecAYPX(P,beta,R);CHKERRQ(ierr); /* p <- r + beta*p */ rtr = PetscRealPart(rntrn); } } if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; /* Unscale x */ ierr = VecCopy(X,WA2);CHKERRQ(ierr); ierr = PCApplySymmetricRight(pc,WA2,X);CHKERRQ(ierr); ierr = KSP_MatMult(ksp,Amat,X,WA);CHKERRQ(ierr); ierr = VecDotRealPart(B,X,&btx);CHKERRQ(ierr); ierr = VecDotRealPart(X,WA,&xtax);CHKERRQ(ierr); pcgP->quadratic = btx + .5*xtax; PetscFunctionReturn(0); }
PetscErrorCode MatFDColoringSetUp_MPIXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) { PetscErrorCode ierr; PetscMPIInt size,*ncolsonproc,*disp,nn; PetscInt i,n,nrows,nrows_i,j,k,m,ncols,col,*rowhit,cstart,cend,colb; const PetscInt *is,*A_ci,*A_cj,*B_ci,*B_cj,*row=NULL,*ltog=NULL; PetscInt nis=iscoloring->n,nctot,*cols; IS *isa; ISLocalToGlobalMapping map=mat->cmap->mapping; PetscInt ctype=c->ctype,*spidxA,*spidxB,nz,bs,bs2,spidx; Mat A,B; PetscScalar *A_val,*B_val,**valaddrhit; MatEntry *Jentry; MatEntry2 *Jentry2; PetscBool isBAIJ; PetscInt bcols=c->bcols; #if defined(PETSC_USE_CTABLE) PetscTable colmap=NULL; #else PetscInt *colmap=NULL; /* local col number of off-diag col */ #endif PetscFunctionBegin; if (ctype == IS_COLORING_GHOSTED) { if (!map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping"); ierr = ISLocalToGlobalMappingGetIndices(map,<og);CHKERRQ(ierr); } ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr); if (isBAIJ) { Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; Mat_SeqBAIJ *spA,*spB; A = baij->A; spA = (Mat_SeqBAIJ*)A->data; A_val = spA->a; B = baij->B; spB = (Mat_SeqBAIJ*)B->data; B_val = spB->a; nz = spA->nz + spB->nz; /* total nonzero entries of mat */ if (!baij->colmap) { ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); colmap = baij->colmap; } ierr = MatGetColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); ierr = MatGetColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */ PetscInt *garray; ierr = PetscMalloc1(B->cmap->n,&garray);CHKERRQ(ierr); for (i=0; i<baij->B->cmap->n/bs; i++) { for (j=0; j<bs; j++) { garray[i*bs+j] = bs*baij->garray[i]+j; } } ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->cmap->n,PETSC_DETERMINE,B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); ierr = PetscFree(garray);CHKERRQ(ierr); } } else { Mat_MPIAIJ *aij=(Mat_MPIAIJ*)mat->data; Mat_SeqAIJ *spA,*spB; A = aij->A; spA = (Mat_SeqAIJ*)A->data; A_val = spA->a; B = aij->B; spB = (Mat_SeqAIJ*)B->data; B_val = spB->a; nz = spA->nz + spB->nz; /* total nonzero entries of mat */ if (!aij->colmap) { /* Allow access to data structures of local part of matrix - creates aij->colmap which maps global column number to local number in part B */ ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); colmap = aij->colmap; } ierr = MatGetColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); ierr = MatGetColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */ if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */ ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->cmap->n,PETSC_DETERMINE,B->cmap->n,aij->garray,&c->vscale);CHKERRQ(ierr); } } m = mat->rmap->n/bs; cstart = mat->cmap->rstart/bs; cend = mat->cmap->rend/bs; ierr = PetscMalloc1(nis,&c->ncolumns);CHKERRQ(ierr); ierr = PetscMalloc1(nis,&c->columns);CHKERRQ(ierr); ierr = PetscMalloc1(nis,&c->nrows);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr); if (c->htype[0] == 'd') { ierr = PetscMalloc1(nz,&Jentry);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry));CHKERRQ(ierr); c->matentry = Jentry; } else if (c->htype[0] == 'w') { ierr = PetscMalloc1(nz,&Jentry2);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry2));CHKERRQ(ierr); c->matentry2 = Jentry2; } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"htype is not supported"); ierr = PetscMalloc2(m+1,&rowhit,m+1,&valaddrhit);CHKERRQ(ierr); nz = 0; ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); for (i=0; i<nis; i++) { /* for each local color */ ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); c->ncolumns[i] = n; /* local number of columns of this color on this process */ if (n) { ierr = PetscMalloc1(n,&c->columns[i]);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); } else { c->columns[i] = 0; } if (ctype == IS_COLORING_GLOBAL) { /* Determine nctot, the total (parallel) number of columns of this color */ ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); ierr = PetscMalloc2(size,&ncolsonproc,size,&disp);CHKERRQ(ierr); /* ncolsonproc[j]: local ncolumns on proc[j] of this color */ ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; if (!nctot) { ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); } disp[0] = 0; for (j=1; j<size; j++) { disp[j] = disp[j-1] + ncolsonproc[j-1]; } /* Get cols, the complete list of columns for this color on each process */ ierr = PetscMalloc1(nctot+1,&cols);CHKERRQ(ierr); ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); } else if (ctype == IS_COLORING_GHOSTED) { /* Determine local number of columns of this color on this process, including ghost points */ nctot = n; ierr = PetscMalloc1(nctot+1,&cols);CHKERRQ(ierr); ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); /* Mark all rows affect by these columns */ ierr = PetscMemzero(rowhit,m*sizeof(PetscInt));CHKERRQ(ierr); bs2 = bs*bs; nrows_i = 0; for (j=0; j<nctot; j++) { /* loop over columns*/ if (ctype == IS_COLORING_GHOSTED) { col = ltog[cols[j]]; } else { col = cols[j]; } if (col >= cstart && col < cend) { /* column is in A, diagonal block of mat */ row = A_cj + A_ci[col-cstart]; nrows = A_ci[col-cstart+1] - A_ci[col-cstart]; nrows_i += nrows; /* loop over columns of A marking them in rowhit */ for (k=0; k<nrows; k++) { /* set valaddrhit for part A */ spidx = bs2*spidxA[A_ci[col-cstart] + k]; valaddrhit[*row] = &A_val[spidx]; rowhit[*row++] = col - cstart + 1; /* local column index */ } } else { /* column is in B, off-diagonal block of mat */ #if defined(PETSC_USE_CTABLE) ierr = PetscTableFind(colmap,col+1,&colb);CHKERRQ(ierr); colb--; #else colb = colmap[col] - 1; /* local column index */ #endif if (colb == -1) { nrows = 0; } else { colb = colb/bs; row = B_cj + B_ci[colb]; nrows = B_ci[colb+1] - B_ci[colb]; } nrows_i += nrows; /* loop over columns of B marking them in rowhit */ for (k=0; k<nrows; k++) { /* set valaddrhit for part B */ spidx = bs2*spidxB[B_ci[colb] + k]; valaddrhit[*row] = &B_val[spidx]; rowhit[*row++] = colb + 1 + cend - cstart; /* local column index */ } } } c->nrows[i] = nrows_i; if (c->htype[0] == 'd') { for (j=0; j<m; j++) { if (rowhit[j]) { Jentry[nz].row = j; /* local row index */ Jentry[nz].col = rowhit[j] - 1; /* local column index */ Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ nz++; } } } else { /* c->htype == 'wp' */ for (j=0; j<m; j++) { if (rowhit[j]) { Jentry2[nz].row = j; /* local row index */ Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ nz++; } } } ierr = PetscFree(cols);CHKERRQ(ierr); } if (bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */ ierr = MatFDColoringSetUpBlocked_AIJ_Private(mat,c,nz);CHKERRQ(ierr); } if (isBAIJ) { ierr = MatRestoreColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); ierr = MatRestoreColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); ierr = PetscMalloc1(bs*mat->rmap->n,&c->dy);CHKERRQ(ierr); } else { ierr = MatRestoreColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); ierr = MatRestoreColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); } ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr); if (ctype == IS_COLORING_GHOSTED) { ierr = ISLocalToGlobalMappingRestoreIndices(map,<og);CHKERRQ(ierr); } ierr = PetscInfo3(c,"ncolors %D, brows %D and bcols %D are used.\n",c->ncolors,c->brows,c->bcols);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode BSSCR_KSPNormInfConverged(KSP ksp,PetscInt n,PetscReal rnorm,KSPConvergedReason *reason,void *ctx) { PetscErrorCode ierr; KSPPWConvergedCtx *cctx = (KSPPWConvergedCtx*)ctx; KSPNormType normtype; PetscReal min, max, R_max, R_min, R_Ninf; Vec R, work, w1,w2; PetscFunctionBegin; PetscValidHeaderSpecific(ksp,KSP_COOKIE,1); PetscValidPointer(reason,4); *reason = KSP_CONVERGED_ITERATING; ierr = VecDuplicate(ksp->vec_rhs,&work);CHKERRQ(ierr); ierr = VecDuplicate(ksp->vec_rhs,&w1);CHKERRQ(ierr); ierr = VecDuplicate(ksp->vec_rhs,&w2);CHKERRQ(ierr); KSPBuildResidual( ksp, w1,w2, &R ); VecNorm( R, NORM_INFINITY, &R_Ninf ); //PetscPrintf( PETSC_COMM_WORLD, "Norm inf convergence %s\n ", ksp->prefix); cctx->pointwise_max = R_Ninf; ierr = KSPGetNormType(ksp,&normtype); CHKERRQ(ierr); if (normtype == KSP_NORM_NO) Stg_SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Use BSSCR_KSPSkipConverged() with KSPNormType of KSP_NORM_NO"); if (!cctx) Stg_SETERRQ(PETSC_ERR_ARG_NULL,"Convergence context must have been created with BSSCR_KSPDefaultConvergedCreate()"); if (!n) { /* if user gives initial guess need to compute norm of b */ if (!ksp->guess_zero && !cctx->initialrtol) { PetscReal snorm; if (ksp->normtype == KSP_NORM_UNPRECONDITIONED || ksp->pc_side == PC_RIGHT) { ierr = PetscInfo(ksp,"user has provided nonzero initial guess, computing 2-norm of RHS\n"); CHKERRQ(ierr); ierr = VecNorm(ksp->vec_rhs,NORM_INFINITY,&snorm);CHKERRQ(ierr); /* <- b'*b */ PetscPrintf( PETSC_COMM_WORLD, "Non Zero Guess; RHS - %g\n", snorm); } else { Vec z; if (!cctx->work) { ierr = VecDuplicate(ksp->vec_rhs,&cctx->work);CHKERRQ(ierr); } z = cctx->work; ierr = KSP_PCApply(ksp,ksp->vec_rhs,z);CHKERRQ(ierr); if (ksp->normtype == KSP_NORM_PRECONDITIONED) { ierr = PetscInfo(ksp,"user has provided nonzero initial guess, computing 2-norm of preconditioned RHS\n");CHKERRQ(ierr); ierr = VecNorm(z,NORM_INFINITY,&snorm);CHKERRQ(ierr); /* dp <- b'*B'*B*b */ } else if (ksp->normtype == KSP_NORM_NATURAL) { PetscScalar norm; Vec bz; ierr = PetscInfo(ksp,"user has provided nonzero initial guess, computing natural norm of RHS\n");CHKERRQ(ierr); // ierr = VecDot(ksp->vec_rhs,z,&norm); // snorm = sqrt(PetscAbsScalar(norm)); /* dp <- b'*B*b */ VecDuplicate( z, &bz ); VecPointwiseMult( bz, ksp->vec_rhs, z ); ierr = VecNorm(bz,NORM_INFINITY,&snorm);CHKERRQ(ierr); Stg_VecDestroy(&bz); } } /* handle special case of zero RHS and nonzero guess */ if (!snorm) { ierr = PetscInfo(ksp,"Special case, user has provided nonzero initial guess and zero RHS\n");CHKERRQ(ierr); snorm = rnorm; } if (cctx->mininitialrtol) { ksp->rnorm0 = PetscMin(snorm,rnorm); } else { ksp->rnorm0 = snorm; } } else { ksp->rnorm0 = rnorm; } ksp->ttol = PetscMax(ksp->rtol*ksp->rnorm0,ksp->abstol); } // if (n <= ksp->chknorm) PetscFunctionReturn(0); if ( R_Ninf != R_Ninf ) { ierr = PetscInfo(ksp,"Linear solver has created a not a number (NaN) as the pointwise residual norm, declaring divergence \n");CHKERRQ(ierr); *reason = KSP_DIVERGED_NAN; } else if (R_Ninf <= ksp->ttol) { if (R_Ninf < ksp->abstol) { ierr = PetscInfo3(ksp,"Linear solver has converged. Pointwise residual %G is less than absolute tolerance %G at iteration %D\n",R_Ninf,ksp->abstol,n); CHKERRQ(ierr); *reason = KSP_CONVERGED_ATOL; } else { if (cctx->initialrtol) { ierr = PetscInfo4(ksp,"Linear solver has converged. Norm_infinity %G is less than relative tolerance %G times initial Norm_infinity %G at iteration %D\n",R_Ninf,ksp->rtol,ksp->rnorm0,n); CHKERRQ(ierr); } else { ierr = PetscInfo4(ksp,"Linear solver has converged. Norm_infinity %G is less than relative tolerance %G times initial norm_infinity right hand side %G at iteration %D\n",R_Ninf,ksp->rtol,ksp->rnorm0,n);CHKERRQ(ierr); } *reason = KSP_CONVERGED_RTOL; } } else if (R_Ninf >= ksp->divtol*ksp->rnorm0) { ierr = PetscInfo3(ksp,"Linear solver is diverging. Initial right hand size Norm_infinity value %G, current residual norm %G at iteration %D\n",ksp->rnorm0,R_Ninf,n);CHKERRQ(ierr); *reason = KSP_DIVERGED_DTOL; } /* trash all work vectors here */ Stg_VecDestroy(&work); Stg_VecDestroy(&w1); Stg_VecDestroy(&w2); PetscFunctionReturn(0); }
PetscErrorCode FluidFieldSetup( FluidField f ) { PetscLogDouble t1,t2; PetscErrorCode ierr; PetscFunctionBegin; // Assemble viscous matricies ierr = FluidFieldMatAssemble( f ); CHKERRQ(ierr); ierr = PetscInfo3( 0, "Lengths: %e %e %e\n", f->lens.x, f->lens.y, f->lens.z ); CHKERRQ(ierr); ierr = PetscInfo3( 0, "Size: %d %d %d\n", f->dims.x, f->dims.y, f->dims.z ); CHKERRQ(ierr); ierr = PetscInfo3( 0, "dx: %e %e %e\n", f->dh.x, f->dh.y, f->dh.z ); CHKERRQ(ierr); ierr = PetscTime(&t1); CHKERRQ(ierr); // Create vectors ierr = GACreate( f->daV, &f->ga); CHKERRQ(ierr); ierr = DMCreateGlobalVector(f->daV,&f->rhs); CHKERRQ(ierr); ierr = VecDuplicate(f->rhs,&f->vel); CHKERRQ(ierr); ierr = VecDuplicate(f->rhs,&f->vel0); CHKERRQ(ierr); // ierr = DACreateGlobalVector(f->daE,&f->E); CHKERRQ(ierr); ierr = DMCreateGlobalVector(f->daB,&f->buf); CHKERRQ(ierr); // Set up the outer solver ierr = KSPCreate(f->comm,&f->ksp); CHKERRQ(ierr); ierr = KSPSetOperators(f->ksp,f->mat,f->mat, SAME_PRECONDITIONER); CHKERRQ(ierr); ierr = KSPSetType(f->ksp,KSPFGMRES); CHKERRQ(ierr); ierr = KSPSetInitialGuessNonzero(f->ksp,PETSC_TRUE); CHKERRQ(ierr); ierr = KSPSetTolerances(f->ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT); CHKERRQ(ierr); ierr = KSPSetFromOptions(f->ksp);CHKERRQ(ierr); // Split pressure from velocity [ u v w | p ] PC pc; ierr = KSPGetPC(f->ksp,&pc); CHKERRQ(ierr); ierr = PCSetType(pc, PCFIELDSPLIT); CHKERRQ(ierr); ierr = PCFieldSplitSetType(pc,PC_COMPOSITE_SCHUR); CHKERRQ(ierr); if( f->is3D ) { const PetscInt ufields[] = {U_FACE,V_FACE,W_FACE}; const PetscInt pfields[] = {CELL_CENTER}; ierr = PCFieldSplitSetBlockSize(pc,4); CHKERRQ(ierr); // [p u v w] ierr = PCFieldSplitSetFields(pc,"v",3,ufields,ufields); CHKERRQ(ierr); // [u v w] ierr = PCFieldSplitSetFields(pc,"p",1,pfields,pfields); CHKERRQ(ierr); // [ p ] } else { const PetscInt ufields[] = {U_FACE,V_FACE}; const PetscInt pfields[] = {CELL_CENTER}; ierr = PCFieldSplitSetBlockSize(pc,3); CHKERRQ(ierr); // [p u v] ierr = PCFieldSplitSetFields(pc,"v",2,ufields,ufields); CHKERRQ(ierr); // [u v] ierr = PCFieldSplitSetFields(pc,"p",1,pfields,pfields); CHKERRQ(ierr); // [ p ] } ierr = PCSetUp(pc); CHKERRQ(ierr); int nVelP; KSP *kspVelP; ierr = PCFieldSplitGetSubKSP(pc,&nVelP,&kspVelP); CHKERRQ(ierr); ierr = KSPSetTolerances(kspVelP[1],PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,4); CHKERRQ(ierr); ierr = KSPSetType(kspVelP[1],KSPGMRES); CHKERRQ(ierr); ierr = KSPGetPC(kspVelP[1],&pc); CHKERRQ(ierr); ierr = PCSetType(pc,PCNONE); CHKERRQ(ierr); ierr = KSPSetFromOptions(kspVelP[1]);CHKERRQ(ierr); // Split velocity [u v w] into component matricies [u], [v], [w] ierr = KSPSetType(kspVelP[0],KSPPREONLY); CHKERRQ(ierr); ierr = KSPGetPC(kspVelP[0],&pc); CHKERRQ(ierr); ierr = PCSetType(pc, PCFIELDSPLIT); CHKERRQ(ierr); ierr = PCFieldSplitSetType(pc,PC_COMPOSITE_ADDITIVE); CHKERRQ(ierr); ierr = PCFieldSplitSetBlockSize(pc,f->is3D?3:2); CHKERRQ(ierr); ierr = PCSetUp(pc); CHKERRQ(ierr); /* Set solver for each velocity component * Split component velocity as parallel blocks along processors * Use direct solver for each block * TODO: use MG, w/FFT on coarse grid */ ierr = PetscTime(&t2); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Finished Solver Setup: %f sec\n",t2-t1); CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) { PetscErrorCode ierr; Mat Cmpi; Mat_PtAPMPI *ptap; PetscFreeSpaceList free_space=NULL,current_space=NULL; Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c; Mat_SeqAIJ *ad =(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; Mat_SeqAIJ *p_loc,*p_oth; PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pdti,*pdtj,*poti,*potj,*ptJ; PetscInt *adi=ad->i,*aj,*aoi=ao->i,nnz; PetscInt *lnk,*owners_co,*coi,*coj,i,k,pnz,row; PetscInt am=A->rmap->n,pN=P->cmap->N,pm=P->rmap->n,pn=P->cmap->n; PetscBT lnkbt; MPI_Comm comm; PetscMPIInt size,rank,tagi,tagj,*len_si,*len_s,*len_ri,icompleted=0; PetscInt **buf_rj,**buf_ri,**buf_ri_k; PetscInt len,proc,*dnz,*onz,*owners; PetscInt nzi,*pti,*ptj; PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; MPI_Request *swaits,*rwaits; MPI_Status *sstatus,rstatus; Mat_Merge_SeqsToMPI *merge; PetscInt *api,*apj,*Jptr,apnz,*prmap=p->garray,pon,nspacedouble=0,j,ap_rmax=0; PetscReal afill=1.0,afill_tmp; PetscInt rmax; #if defined(PTAP_PROFILE) PetscLogDouble t0,t1,t2,t3,t4; #endif PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); #if defined(PTAP_PROFILE) ierr = PetscTime(&t0);CHKERRQ(ierr); #endif /* check if matrix local sizes are compatible */ if (A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend) { SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%D, %D) != Prow (%D,%D)",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend); } if (A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend) { SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%D, %D) != Prow (%D,%D)",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend); } ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); /* create struct Mat_PtAPMPI and attached it to C later */ ierr = PetscNew(&ptap);CHKERRQ(ierr); ierr = PetscNew(&merge);CHKERRQ(ierr); ptap->merge = merge; ptap->reuse = MAT_INITIAL_MATRIX; /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&ptap->startsj_s,&ptap->startsj_r,&ptap->bufa,&ptap->P_oth);CHKERRQ(ierr); /* get P_loc by taking all local rows of P */ ierr = MatMPIAIJGetLocalMat(P,MAT_INITIAL_MATRIX,&ptap->P_loc);CHKERRQ(ierr); p_loc = (Mat_SeqAIJ*)(ptap->P_loc)->data; p_oth = (Mat_SeqAIJ*)(ptap->P_oth)->data; pi_loc = p_loc->i; pj_loc = p_loc->j; pi_oth = p_oth->i; pj_oth = p_oth->j; #if defined(PTAP_PROFILE) ierr = PetscTime(&t1);CHKERRQ(ierr); #endif /* first, compute symbolic AP = A_loc*P = A_diag*P_loc + A_off*P_oth */ /*-------------------------------------------------------------------*/ ierr = PetscMalloc1((am+1),&api);CHKERRQ(ierr); api[0] = 0; /* create and initialize a linked list */ ierr = PetscLLCondensedCreate(pN,pN,&lnk,&lnkbt);CHKERRQ(ierr); /* Initial FreeSpace size is fill*(nnz(A) + nnz(P)) -OOM for ex56, np=8k on Intrepid! */ ierr = PetscFreeSpaceGet((PetscInt)(fill*(adi[am]+aoi[am]+pi_loc[pm])),&free_space);CHKERRQ(ierr); current_space = free_space; for (i=0; i<am; i++) { /* diagonal portion of A */ nzi = adi[i+1] - adi[i]; aj = ad->j + adi[i]; for (j=0; j<nzi; j++) { row = aj[j]; pnz = pi_loc[row+1] - pi_loc[row]; Jptr = pj_loc + pi_loc[row]; /* add non-zero cols of P into the sorted linked list lnk */ ierr = PetscLLCondensedAddSorted(pnz,Jptr,lnk,lnkbt);CHKERRQ(ierr); } /* off-diagonal portion of A */ nzi = aoi[i+1] - aoi[i]; aj = ao->j + aoi[i]; for (j=0; j<nzi; j++) { row = aj[j]; pnz = pi_oth[row+1] - pi_oth[row]; Jptr = pj_oth + pi_oth[row]; ierr = PetscLLCondensedAddSorted(pnz,Jptr,lnk,lnkbt);CHKERRQ(ierr); } apnz = lnk[0]; api[i+1] = api[i] + apnz; if (ap_rmax < apnz) ap_rmax = apnz; /* if free space is not available, double the total space in the list */ if (current_space->local_remaining<apnz) { ierr = PetscFreeSpaceGet(apnz+current_space->total_array_size,¤t_space);CHKERRQ(ierr); nspacedouble++; } /* Copy data into free space, then initialize lnk */ ierr = PetscLLCondensedClean(pN,apnz,current_space->array,lnk,lnkbt);CHKERRQ(ierr); current_space->array += apnz; current_space->local_used += apnz; current_space->local_remaining -= apnz; } /* Allocate space for apj, initialize apj, and */ /* destroy list of free space and other temporary array(s) */ ierr = PetscMalloc1((api[am]+1),&apj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,apj);CHKERRQ(ierr); afill_tmp = (PetscReal)api[am]/(adi[am]+aoi[am]+pi_loc[pm]+1); if (afill_tmp > afill) afill = afill_tmp; #if defined(PTAP_PROFILE) ierr = PetscTime(&t2);CHKERRQ(ierr); #endif /* determine symbolic Co=(p->B)^T*AP - send to others */ /*----------------------------------------------------*/ ierr = MatGetSymbolicTranspose_SeqAIJ(p->B,&poti,&potj);CHKERRQ(ierr); /* then, compute symbolic Co = (p->B)^T*AP */ pon = (p->B)->cmap->n; /* total num of rows to be sent to other processors >= (num of nonzero rows of C_seq) - pn */ ierr = PetscMalloc1((pon+1),&coi);CHKERRQ(ierr); coi[0] = 0; /* set initial free space to be fill*(nnz(p->B) + nnz(AP)) */ nnz = fill*(poti[pon] + api[am]); ierr = PetscFreeSpaceGet(nnz,&free_space);CHKERRQ(ierr); current_space = free_space; for (i=0; i<pon; i++) { pnz = poti[i+1] - poti[i]; ptJ = potj + poti[i]; for (j=0; j<pnz; j++) { row = ptJ[j]; /* row of AP == col of Pot */ apnz = api[row+1] - api[row]; Jptr = apj + api[row]; /* add non-zero cols of AP into the sorted linked list lnk */ ierr = PetscLLCondensedAddSorted(apnz,Jptr,lnk,lnkbt);CHKERRQ(ierr); } nnz = lnk[0]; /* If free space is not available, double the total space in the list */ if (current_space->local_remaining<nnz) { ierr = PetscFreeSpaceGet(nnz+current_space->total_array_size,¤t_space);CHKERRQ(ierr); nspacedouble++; } /* Copy data into free space, and zero out denserows */ ierr = PetscLLCondensedClean(pN,nnz,current_space->array,lnk,lnkbt);CHKERRQ(ierr); current_space->array += nnz; current_space->local_used += nnz; current_space->local_remaining -= nnz; coi[i+1] = coi[i] + nnz; } ierr = PetscMalloc1((coi[pon]+1),&coj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,coj);CHKERRQ(ierr); afill_tmp = (PetscReal)coi[pon]/(poti[pon] + api[am]+1); if (afill_tmp > afill) afill = afill_tmp; ierr = MatRestoreSymbolicTranspose_SeqAIJ(p->B,&poti,&potj);CHKERRQ(ierr); /* send j-array (coj) of Co to other processors */ /*----------------------------------------------*/ /* determine row ownership */ ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); merge->rowmap->n = pn; merge->rowmap->bs = 1; ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); owners = merge->rowmap->range; /* determine the number of messages to send, their lengths */ ierr = PetscMalloc2(size,&len_si,size,&sstatus);CHKERRQ(ierr); ierr = PetscMemzero(len_si,size*sizeof(PetscMPIInt));CHKERRQ(ierr); ierr = PetscCalloc1(size,&merge->len_s);CHKERRQ(ierr); len_s = merge->len_s; merge->nsend = 0; ierr = PetscMalloc1((size+2),&owners_co);CHKERRQ(ierr); proc = 0; for (i=0; i<pon; i++) { while (prmap[i] >= owners[proc+1]) proc++; len_si[proc]++; /* num of rows in Co to be sent to [proc] */ len_s[proc] += coi[i+1] - coi[i]; } len = 0; /* max length of buf_si[] */ owners_co[0] = 0; for (proc=0; proc<size; proc++) { owners_co[proc+1] = owners_co[proc] + len_si[proc]; if (len_si[proc]) { merge->nsend++; len_si[proc] = 2*(len_si[proc] + 1); len += len_si[proc]; } } /* determine the number and length of messages to receive for coi and coj */ ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); /* post the Irecv and Isend of coj */ ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rwaits);CHKERRQ(ierr); ierr = PetscMalloc1((merge->nsend+1),&swaits);CHKERRQ(ierr); for (proc=0, k=0; proc<size; proc++) { if (!len_s[proc]) continue; i = owners_co[proc]; ierr = MPI_Isend(coj+coi[i],len_s[proc],MPIU_INT,proc,tagj,comm,swaits+k);CHKERRQ(ierr); k++; } /* receives and sends of coj are complete */ for (i=0; i<merge->nrecv; i++) { ierr = MPI_Waitany(merge->nrecv,rwaits,&icompleted,&rstatus);CHKERRQ(ierr); } ierr = PetscFree(rwaits);CHKERRQ(ierr); if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,swaits,sstatus);CHKERRQ(ierr);} /* send and recv coi */ /*-------------------*/ ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&rwaits);CHKERRQ(ierr); ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ for (proc=0,k=0; proc<size; proc++) { if (!len_s[proc]) continue; /* form outgoing message for i-structure: buf_si[0]: nrows to be sent [1:nrows]: row index (global) [nrows+1:2*nrows+1]: i-structure index */ /*-------------------------------------------*/ nrows = len_si[proc]/2 - 1; buf_si_i = buf_si + nrows+1; buf_si[0] = nrows; buf_si_i[0] = 0; nrows = 0; for (i=owners_co[proc]; i<owners_co[proc+1]; i++) { nzi = coi[i+1] - coi[i]; buf_si_i[nrows+1] = buf_si_i[nrows] + nzi; /* i-structure */ buf_si[nrows+1] = prmap[i] -owners[proc]; /* local row index */ nrows++; } ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,swaits+k);CHKERRQ(ierr); k++; buf_si += len_si[proc]; } i = merge->nrecv; while (i--) { ierr = MPI_Waitany(merge->nrecv,rwaits,&icompleted,&rstatus);CHKERRQ(ierr); } ierr = PetscFree(rwaits);CHKERRQ(ierr); if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,swaits,sstatus);CHKERRQ(ierr);} ierr = PetscFree2(len_si,sstatus);CHKERRQ(ierr); ierr = PetscFree(len_ri);CHKERRQ(ierr); ierr = PetscFree(swaits);CHKERRQ(ierr); ierr = PetscFree(buf_s);CHKERRQ(ierr); #if defined(PTAP_PROFILE) ierr = PetscTime(&t3);CHKERRQ(ierr); #endif /* compute the local portion of C (mpi mat) */ /*------------------------------------------*/ ierr = MatGetSymbolicTranspose_SeqAIJ(p->A,&pdti,&pdtj);CHKERRQ(ierr); /* allocate pti array and free space for accumulating nonzero column info */ ierr = PetscMalloc1((pn+1),&pti);CHKERRQ(ierr); pti[0] = 0; /* set initial free space to be fill*(nnz(P) + nnz(AP)) */ nnz = fill*(pi_loc[pm] + api[am]); ierr = PetscFreeSpaceGet(nnz,&free_space);CHKERRQ(ierr); current_space = free_space; ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextci);CHKERRQ(ierr); for (k=0; k<merge->nrecv; k++) { buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ nrows = *buf_ri_k[k]; nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ nextci[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ } ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); rmax = 0; for (i=0; i<pn; i++) { /* add pdt[i,:]*AP into lnk */ pnz = pdti[i+1] - pdti[i]; ptJ = pdtj + pdti[i]; for (j=0; j<pnz; j++) { row = ptJ[j]; /* row of AP == col of Pt */ apnz = api[row+1] - api[row]; Jptr = apj + api[row]; /* add non-zero cols of AP into the sorted linked list lnk */ ierr = PetscLLCondensedAddSorted(apnz,Jptr,lnk,lnkbt);CHKERRQ(ierr); } /* add received col data into lnk */ for (k=0; k<merge->nrecv; k++) { /* k-th received message */ if (i == *nextrow[k]) { /* i-th row */ nzi = *(nextci[k]+1) - *nextci[k]; Jptr = buf_rj[k] + *nextci[k]; ierr = PetscLLCondensedAddSorted(nzi,Jptr,lnk,lnkbt);CHKERRQ(ierr); nextrow[k]++; nextci[k]++; } } nnz = lnk[0]; /* if free space is not available, make more free space */ if (current_space->local_remaining<nnz) { ierr = PetscFreeSpaceGet(nnz+current_space->total_array_size,¤t_space);CHKERRQ(ierr); nspacedouble++; } /* copy data into free space, then initialize lnk */ ierr = PetscLLCondensedClean(pN,nnz,current_space->array,lnk,lnkbt);CHKERRQ(ierr); ierr = MatPreallocateSet(i+owners[rank],nnz,current_space->array,dnz,onz);CHKERRQ(ierr); current_space->array += nnz; current_space->local_used += nnz; current_space->local_remaining -= nnz; pti[i+1] = pti[i] + nnz; if (nnz > rmax) rmax = nnz; } ierr = MatRestoreSymbolicTranspose_SeqAIJ(p->A,&pdti,&pdtj);CHKERRQ(ierr); ierr = PetscFree3(buf_ri_k,nextrow,nextci);CHKERRQ(ierr); ierr = PetscMalloc1((pti[pn]+1),&ptj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,ptj);CHKERRQ(ierr); afill_tmp = (PetscReal)pti[pn]/(pi_loc[pm] + api[am]+1); if (afill_tmp > afill) afill = afill_tmp; ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); /* create symbolic parallel matrix Cmpi */ /*--------------------------------------*/ ierr = MatCreate(comm,&Cmpi);CHKERRQ(ierr); ierr = MatSetSizes(Cmpi,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetBlockSizes(Cmpi,P->cmap->bs,P->cmap->bs);CHKERRQ(ierr); ierr = MatSetType(Cmpi,MATMPIAIJ);CHKERRQ(ierr); ierr = MatMPIAIJSetPreallocation(Cmpi,0,dnz,0,onz);CHKERRQ(ierr); ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); merge->bi = pti; /* Cseq->i */ merge->bj = ptj; /* Cseq->j */ merge->coi = coi; /* Co->i */ merge->coj = coj; /* Co->j */ merge->buf_ri = buf_ri; merge->buf_rj = buf_rj; merge->owners_co = owners_co; merge->destroy = Cmpi->ops->destroy; merge->duplicate = Cmpi->ops->duplicate; /* Cmpi is not ready for use - assembly will be done by MatPtAPNumeric() */ Cmpi->assembled = PETSC_FALSE; Cmpi->ops->destroy = MatDestroy_MPIAIJ_PtAP; Cmpi->ops->duplicate = MatDuplicate_MPIAIJ_MatPtAP; /* attach the supporting struct to Cmpi for reuse */ c = (Mat_MPIAIJ*)Cmpi->data; c->ptap = ptap; ptap->api = api; ptap->apj = apj; ptap->rmax = ap_rmax; *C = Cmpi; /* flag 'scalable' determines which implementations to be used: 0: do dense axpy in MatPtAPNumeric() - fast, but requires storage of a nonscalable dense array apa; 1: do sparse axpy in MatPtAPNumeric() - might slow, uses a sparse array apa */ /* set default scalable */ ptap->scalable = PETSC_TRUE; ierr = PetscOptionsGetBool(((PetscObject)Cmpi)->prefix,"-matptap_scalable",&ptap->scalable,NULL);CHKERRQ(ierr); if (!ptap->scalable) { /* Do dense axpy */ ierr = PetscCalloc1(pN,&ptap->apa);CHKERRQ(ierr); } else { ierr = PetscCalloc1(ap_rmax+1,&ptap->apa);CHKERRQ(ierr); } #if defined(PTAP_PROFILE) ierr = PetscTime(&t4);CHKERRQ(ierr); if (rank==1) PetscPrintf(MPI_COMM_SELF," [%d] PtAPSymbolic %g/P + %g/AP + %g/comm + %g/PtAP = %g\n",rank,t1-t0,t2-t1,t3-t2,t4-t3,t4-t0);CHKERRQ(ierr); #endif #if defined(PETSC_USE_INFO) if (pti[pn] != 0) { ierr = PetscInfo3(Cmpi,"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); ierr = PetscInfo1(Cmpi,"Use MatPtAP(A,P,MatReuse,%G,&C) for best performance.\n",afill);CHKERRQ(ierr); } else { ierr = PetscInfo(Cmpi,"Empty matrix product\n");CHKERRQ(ierr); } #endif PetscFunctionReturn(0); }
PetscErrorCode maxIndSetAgg(IS perm,Mat Gmat,PetscBool strict_aggs,PetscCoarsenData **a_locals_llist) { PetscErrorCode ierr; Mat_SeqAIJ *matA,*matB=NULL; Mat_MPIAIJ *mpimat=NULL; MPI_Comm comm; PetscInt num_fine_ghosts,kk,n,ix,j,*idx,*ii,iter,Iend,my0,nremoved,gid,lid,cpid,lidj,sgid,t1,t2,slid,nDone,nselected=0,state,statej; PetscInt *cpcol_gid,*cpcol_state,*lid_cprowID,*lid_gid,*cpcol_sel_gid,*icpcol_gid,*lid_state,*lid_parent_gid=NULL; PetscBool *lid_removed; PetscBool isMPI,isAIJ,isOK; const PetscInt *perm_ix; const PetscInt nloc = Gmat->rmap->n; PetscCoarsenData *agg_lists; PetscLayout layout; PetscSF sf; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)Gmat,&comm); CHKERRQ(ierr); /* get submatrices */ ierr = PetscObjectTypeCompare((PetscObject)Gmat,MATMPIAIJ,&isMPI); CHKERRQ(ierr); if (isMPI) { mpimat = (Mat_MPIAIJ*)Gmat->data; matA = (Mat_SeqAIJ*)mpimat->A->data; matB = (Mat_SeqAIJ*)mpimat->B->data; /* force compressed storage of B */ ierr = MatCheckCompressedRow(mpimat->B,matB->nonzerorowcnt,&matB->compressedrow,matB->i,Gmat->rmap->n,-1.0); CHKERRQ(ierr); } else { ierr = PetscObjectTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isAIJ); CHKERRQ(ierr); matA = (Mat_SeqAIJ*)Gmat->data; } ierr = MatGetOwnershipRange(Gmat,&my0,&Iend); CHKERRQ(ierr); ierr = PetscMalloc1(nloc,&lid_gid); CHKERRQ(ierr); /* explicit array needed */ if (mpimat) { for (kk=0,gid=my0; kk<nloc; kk++,gid++) { lid_gid[kk] = gid; } ierr = VecGetLocalSize(mpimat->lvec, &num_fine_ghosts); CHKERRQ(ierr); ierr = PetscMalloc1(num_fine_ghosts,&cpcol_gid); CHKERRQ(ierr); ierr = PetscMalloc1(num_fine_ghosts,&cpcol_state); CHKERRQ(ierr); ierr = PetscSFCreate(PetscObjectComm((PetscObject)Gmat),&sf); CHKERRQ(ierr); ierr = MatGetLayouts(Gmat,&layout,NULL); CHKERRQ(ierr); ierr = PetscSFSetGraphLayout(sf,layout,num_fine_ghosts,NULL,PETSC_COPY_VALUES,mpimat->garray); CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPIU_INT,lid_gid,cpcol_gid); CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_INT,lid_gid,cpcol_gid); CHKERRQ(ierr); for (kk=0; kk<num_fine_ghosts; kk++) { cpcol_state[kk]=MIS_NOT_DONE; } } else num_fine_ghosts = 0; ierr = PetscMalloc1(nloc, &lid_cprowID); CHKERRQ(ierr); ierr = PetscMalloc1(nloc, &lid_removed); CHKERRQ(ierr); /* explicit array needed */ if (strict_aggs) { ierr = PetscMalloc1(nloc,&lid_parent_gid); CHKERRQ(ierr); } ierr = PetscMalloc1(nloc,&lid_state); CHKERRQ(ierr); /* has ghost nodes for !strict and uses local indexing (yuck) */ ierr = PetscCDCreate(strict_aggs ? nloc : num_fine_ghosts+nloc, &agg_lists); CHKERRQ(ierr); if (a_locals_llist) *a_locals_llist = agg_lists; /* need an inverse map - locals */ for (kk=0; kk<nloc; kk++) { lid_cprowID[kk] = -1; lid_removed[kk] = PETSC_FALSE; if (strict_aggs) { lid_parent_gid[kk] = -1.0; } lid_state[kk] = MIS_NOT_DONE; } /* set index into cmpressed row 'lid_cprowID' */ if (matB) { for (ix=0; ix<matB->compressedrow.nrows; ix++) { lid = matB->compressedrow.rindex[ix]; lid_cprowID[lid] = ix; } } /* MIS */ iter = nremoved = nDone = 0; ierr = ISGetIndices(perm, &perm_ix); CHKERRQ(ierr); while (nDone < nloc || PETSC_TRUE) { /* asyncronous not implemented */ iter++; /* check all vertices */ for (kk=0; kk<nloc; kk++) { lid = perm_ix[kk]; state = lid_state[lid]; if (lid_removed[lid]) continue; if (state == MIS_NOT_DONE) { /* parallel test, delete if selected ghost */ isOK = PETSC_TRUE; if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */ ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix]; idx = matB->j + ii[ix]; for (j=0; j<n; j++) { cpid = idx[j]; /* compressed row ID in B mat */ gid = cpcol_gid[cpid]; statej = cpcol_state[cpid]; if (statej == MIS_NOT_DONE && gid >= Iend) { /* should be (pe>rank), use gid as pe proxy */ isOK = PETSC_FALSE; /* can not delete */ break; } } } /* parallel test */ if (isOK) { /* select or remove this vertex */ nDone++; /* check for singleton */ ii = matA->i; n = ii[lid+1] - ii[lid]; if (n < 2) { /* if I have any ghost adj then not a sing */ ix = lid_cprowID[lid]; if (ix==-1 || (matB->compressedrow.i[ix+1]-matB->compressedrow.i[ix])==0) { nremoved++; lid_removed[lid] = PETSC_TRUE; /* should select this because it is technically in the MIS but lets not */ continue; /* one local adj (me) and no ghost - singleton */ } } /* SELECTED state encoded with global index */ lid_state[lid] = lid+my0; /* needed???? */ nselected++; if (strict_aggs) { ierr = PetscCDAppendID(agg_lists, lid, lid+my0); CHKERRQ(ierr); } else { ierr = PetscCDAppendID(agg_lists, lid, lid); CHKERRQ(ierr); } /* delete local adj */ idx = matA->j + ii[lid]; for (j=0; j<n; j++) { lidj = idx[j]; statej = lid_state[lidj]; if (statej == MIS_NOT_DONE) { nDone++; if (strict_aggs) { ierr = PetscCDAppendID(agg_lists, lid, lidj+my0); CHKERRQ(ierr); } else { ierr = PetscCDAppendID(agg_lists, lid, lidj); CHKERRQ(ierr); } lid_state[lidj] = MIS_DELETED; /* delete this */ } } /* delete ghost adj of lid - deleted ghost done later for strict_aggs */ if (!strict_aggs) { if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */ ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix]; idx = matB->j + ii[ix]; for (j=0; j<n; j++) { cpid = idx[j]; /* compressed row ID in B mat */ statej = cpcol_state[cpid]; if (statej == MIS_NOT_DONE) { ierr = PetscCDAppendID(agg_lists, lid, nloc+cpid); CHKERRQ(ierr); } } } } } /* selected */ } /* not done vertex */ } /* vertex loop */ /* update ghost states and count todos */ if (mpimat) { /* scatter states, check for done */ ierr = PetscSFBcastBegin(sf,MPIU_INT,lid_state,cpcol_state); CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_INT,lid_state,cpcol_state); CHKERRQ(ierr); ii = matB->compressedrow.i; for (ix=0; ix<matB->compressedrow.nrows; ix++) { lid = matB->compressedrow.rindex[ix]; /* local boundary node */ state = lid_state[lid]; if (state == MIS_NOT_DONE) { /* look at ghosts */ n = ii[ix+1] - ii[ix]; idx = matB->j + ii[ix]; for (j=0; j<n; j++) { cpid = idx[j]; /* compressed row ID in B mat */ statej = cpcol_state[cpid]; if (MIS_IS_SELECTED(statej)) { /* lid is now deleted, do it */ nDone++; lid_state[lid] = MIS_DELETED; /* delete this */ if (!strict_aggs) { lidj = nloc + cpid; ierr = PetscCDAppendID(agg_lists, lidj, lid); CHKERRQ(ierr); } else { sgid = cpcol_gid[cpid]; lid_parent_gid[lid] = sgid; /* keep track of proc that I belong to */ } break; } } } } /* all done? */ t1 = nloc - nDone; ierr = MPI_Allreduce(&t1, &t2, 1, MPIU_INT, MPI_SUM, comm); CHKERRQ(ierr); /* synchronous version */ if (t2 == 0) break; } else break; /* all done */ } /* outer parallel MIS loop */ ierr = ISRestoreIndices(perm,&perm_ix); CHKERRQ(ierr); ierr = PetscInfo3(Gmat,"\t removed %D of %D vertices. %D selected.\n",nremoved,nloc,nselected); CHKERRQ(ierr); /* tell adj who my lid_parent_gid vertices belong to - fill in agg_lists selected ghost lists */ if (strict_aggs && matB) { /* need to copy this to free buffer -- should do this globaly */ ierr = PetscMalloc1(num_fine_ghosts, &cpcol_sel_gid); CHKERRQ(ierr); ierr = PetscMalloc1(num_fine_ghosts, &icpcol_gid); CHKERRQ(ierr); for (cpid=0; cpid<num_fine_ghosts; cpid++) icpcol_gid[cpid] = cpcol_gid[cpid]; /* get proc of deleted ghost */ ierr = PetscSFBcastBegin(sf,MPIU_INT,lid_parent_gid,cpcol_sel_gid); CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_INT,lid_parent_gid,cpcol_sel_gid); CHKERRQ(ierr); for (cpid=0; cpid<num_fine_ghosts; cpid++) { sgid = cpcol_sel_gid[cpid]; gid = icpcol_gid[cpid]; if (sgid >= my0 && sgid < Iend) { /* I own this deleted */ slid = sgid - my0; ierr = PetscCDAppendID(agg_lists, slid, gid); CHKERRQ(ierr); } } ierr = PetscFree(icpcol_gid); CHKERRQ(ierr); ierr = PetscFree(cpcol_sel_gid); CHKERRQ(ierr); } if (mpimat) { ierr = PetscSFDestroy(&sf); CHKERRQ(ierr); ierr = PetscFree(cpcol_gid); CHKERRQ(ierr); ierr = PetscFree(cpcol_state); CHKERRQ(ierr); } ierr = PetscFree(lid_cprowID); CHKERRQ(ierr); ierr = PetscFree(lid_gid); CHKERRQ(ierr); ierr = PetscFree(lid_removed); CHKERRQ(ierr); if (strict_aggs) { ierr = PetscFree(lid_parent_gid); CHKERRQ(ierr); } ierr = PetscFree(lid_state); CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy2(Mat A,Mat P,PetscReal fill,Mat *C) { PetscErrorCode ierr; PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj,nspacedouble=0; PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N; PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; MatScalar *ca; PetscBT lnkbt; PetscFunctionBegin; /* Get ij structure of P^T */ ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); ptJ=ptj; /* Allocate ci array, arrays for fill computation and */ /* free space for accumulating nonzero column info */ ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); ci[0] = 0; ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); ptasparserow = ptadenserow + an; /* create and initialize a linked list */ nlnk = pn+1; ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); /* Set initial free space to be fill*nnz(A). */ /* This should be reasonable if sparsity of PtAP is similar to that of A. */ ierr = PetscFreeSpaceGet((PetscInt)(fill*ai[am]),&free_space); current_space = free_space; /* Determine symbolic info for each row of C: */ for (i=0;i<pn;i++) { ptnzi = pti[i+1] - pti[i]; ptanzi = 0; /* Determine symbolic row of PtA: */ for (j=0;j<ptnzi;j++) { arow = *ptJ++; anzj = ai[arow+1] - ai[arow]; ajj = aj + ai[arow]; for (k=0;k<anzj;k++) { if (!ptadenserow[ajj[k]]) { ptadenserow[ajj[k]] = -1; ptasparserow[ptanzi++] = ajj[k]; } } } /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ ptaj = ptasparserow; cnzi = 0; for (j=0;j<ptanzi;j++) { prow = *ptaj++; pnzj = pi[prow+1] - pi[prow]; pjj = pj + pi[prow]; /* add non-zero cols of P into the sorted linked list lnk */ ierr = PetscLLAddSorted(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); cnzi += nlnk; } /* If free space is not available, make more free space */ /* Double the amount of total space in the list */ if (current_space->local_remaining<cnzi) { ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); nspacedouble++; } /* Copy data into free space, and zero out denserows */ ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); current_space->array += cnzi; current_space->local_used += cnzi; current_space->local_remaining -= cnzi; for (j=0;j<ptanzi;j++) { ptadenserow[ptasparserow[j]] = 0; } /* Aside: Perhaps we should save the pta info for the numerical factorization. */ /* For now, we will recompute what is needed. */ ci[i+1] = ci[i] + cnzi; } /* nnz is now stored in ci[ptm], column indices are in the list of free space */ /* Allocate space for cj, initialize cj, and */ /* destroy list of free space and other temporary array(s) */ ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); ierr = PetscFree(ptadenserow);CHKERRQ(ierr); ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); /* Allocate space for ca */ ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); /* put together the new matrix */ ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); (*C)->rmap->bs = P->cmap->bs; (*C)->cmap->bs = P->cmap->bs; PetscPrintf(PETSC_COMM_SELF,"************%s C.bs=%d,%d\n",__FUNCT__,(*C)->rmap->bs,(*C)->cmap->bs); /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ /* Since these are PETSc arrays, change flags to free them as necessary. */ c = (Mat_SeqAIJ *)((*C)->data); c->free_a = PETSC_TRUE; c->free_ij = PETSC_TRUE; c->nonew = 0; A->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ_SparseAxpy2; /* should use *C->ops until PtAP insterface is updated to double dispatch as MatMatMult() */ /* Clean up. */ ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); #if defined(PETSC_USE_INFO) if (ci[pn] != 0) { PetscReal afill = ((PetscReal)ci[pn])/ai[am]; if (afill < 1.0) afill = 1.0; ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); ierr = PetscInfo1((*C),"Use MatPtAP(A,P,MatReuse,%G,&C) for best performance.\n",afill);CHKERRQ(ierr); } else { ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); } #endif PetscFunctionReturn(0); }
/*@C PetscCommDuplicate - Duplicates the communicator only if it is not already a PETSc communicator. Collective on MPI_Comm Input Parameters: . comm_in - Input communicator Output Parameters: + comm_out - Output communicator. May be comm_in. - first_tag - Tag available that has not already been used with this communicator (you may pass in NULL if you do not need a tag) PETSc communicators are just regular MPI communicators that keep track of which tags have been used to prevent tag conflict. If you pass a non-PETSc communicator into a PETSc creation routine it will attach a private communicator for use in the objects communications. The internal MPI_Comm is used to perform all the MPI calls for PETSc, the outer MPI_Comm is a user level MPI_Comm that may be performing communication for the user or other library and so IS NOT used by PETSc. Level: developer Concepts: communicator^duplicate .seealso: PetscObjectGetNewTag(), PetscCommGetNewTag(), PetscCommDestroy() @*/ PetscErrorCode PetscCommDuplicate(MPI_Comm comm_in,MPI_Comm *comm_out,PetscMPIInt *first_tag) { PetscErrorCode ierr; PetscCommCounter *counter; PetscMPIInt *maxval,flg; PetscFunctionBegin; ierr = PetscSpinlockLock(&PetscCommSpinLock);CHKERRQ(ierr); ierr = MPI_Attr_get(comm_in,Petsc_Counter_keyval,&counter,&flg);CHKERRQ(ierr); if (!flg) { /* this is NOT a PETSc comm */ union {MPI_Comm comm; void *ptr;} ucomm; /* check if this communicator has a PETSc communicator imbedded in it */ ierr = MPI_Attr_get(comm_in,Petsc_InnerComm_keyval,&ucomm,&flg);CHKERRQ(ierr); if (!flg) { /* This communicator is not yet known to this system, so we duplicate it and make an internal communicator */ ierr = MPI_Comm_dup(comm_in,comm_out);CHKERRQ(ierr); ierr = MPI_Attr_get(MPI_COMM_WORLD,MPI_TAG_UB,&maxval,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI error: MPI_Attr_get() is not returning a MPI_TAG_UB"); ierr = PetscNew(&counter);CHKERRQ(ierr); counter->tag = *maxval; counter->refcount = 0; counter->namecount = 0; ierr = MPI_Attr_put(*comm_out,Petsc_Counter_keyval,counter);CHKERRQ(ierr); ierr = PetscInfo3(0,"Duplicating a communicator %ld %ld max tags = %d\n",(long)comm_in,(long)*comm_out,*maxval);CHKERRQ(ierr); /* save PETSc communicator inside user communicator, so we can get it next time */ ucomm.comm = *comm_out; /* ONLY the comm part of the union is significant. */ ierr = MPI_Attr_put(comm_in,Petsc_InnerComm_keyval,ucomm.ptr);CHKERRQ(ierr); ucomm.comm = comm_in; ierr = MPI_Attr_put(*comm_out,Petsc_OuterComm_keyval,ucomm.ptr);CHKERRQ(ierr); } else { *comm_out = ucomm.comm; /* pull out the inner MPI_Comm and hand it back to the caller */ ierr = MPI_Attr_get(*comm_out,Petsc_Counter_keyval,&counter,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set"); ierr = PetscInfo2(0,"Using internal PETSc communicator %ld %ld\n",(long)comm_in,(long)*comm_out);CHKERRQ(ierr); } } else *comm_out = comm_in; #if defined(PETSC_USE_DEBUG) /* Hanging here means that some processes have called PetscCommDuplicate() and others have not. This likley means that a subset of processes in a MPI_Comm have attempted to create a PetscObject! ALL processes that share a communicator MUST shared objects created from that communicator. */ ierr = MPI_Barrier(comm_in);CHKERRQ(ierr); #endif if (counter->tag < 1) { ierr = PetscInfo1(0,"Out of tags for object, starting to recycle. Comm reference count %d\n",counter->refcount);CHKERRQ(ierr); ierr = MPI_Attr_get(MPI_COMM_WORLD,MPI_TAG_UB,&maxval,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI error: MPI_Attr_get() is not returning a MPI_TAG_UB"); counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */ } if (first_tag) *first_tag = counter->tag--; counter->refcount++; /* number of references to this comm */ ierr = PetscSpinlockUnlock(&PetscCommSpinLock);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(Mat A,Mat P,PetscReal fill,Mat *C) { PetscErrorCode ierr; PetscFreeSpaceList free_space=NULL,current_space=NULL; Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj,nspacedouble=0; PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N; PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; MatScalar *ca; PetscBT lnkbt; PetscReal afill; PetscFunctionBegin; /* Get ij structure of P^T */ ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); ptJ = ptj; /* Allocate ci array, arrays for fill computation and */ /* free space for accumulating nonzero column info */ ierr = PetscMalloc1(pn+1,&ci);CHKERRQ(ierr); ci[0] = 0; ierr = PetscCalloc1(2*an+1,&ptadenserow);CHKERRQ(ierr); ptasparserow = ptadenserow + an; /* create and initialize a linked list */ nlnk = pn+1; ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); /* Set initial free space to be fill*(nnz(A)+ nnz(P)) */ ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],pi[pm])),&free_space);CHKERRQ(ierr); current_space = free_space; /* Determine symbolic info for each row of C: */ for (i=0; i<pn; i++) { ptnzi = pti[i+1] - pti[i]; ptanzi = 0; /* Determine symbolic row of PtA: */ for (j=0; j<ptnzi; j++) { arow = *ptJ++; anzj = ai[arow+1] - ai[arow]; ajj = aj + ai[arow]; for (k=0; k<anzj; k++) { if (!ptadenserow[ajj[k]]) { ptadenserow[ajj[k]] = -1; ptasparserow[ptanzi++] = ajj[k]; } } } /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ ptaj = ptasparserow; cnzi = 0; for (j=0; j<ptanzi; j++) { prow = *ptaj++; pnzj = pi[prow+1] - pi[prow]; pjj = pj + pi[prow]; /* add non-zero cols of P into the sorted linked list lnk */ ierr = PetscLLAddSorted(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); cnzi += nlnk; } /* If free space is not available, make more free space */ /* Double the amount of total space in the list */ if (current_space->local_remaining<cnzi) { ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); nspacedouble++; } /* Copy data into free space, and zero out denserows */ ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); current_space->array += cnzi; current_space->local_used += cnzi; current_space->local_remaining -= cnzi; for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; /* Aside: Perhaps we should save the pta info for the numerical factorization. */ /* For now, we will recompute what is needed. */ ci[i+1] = ci[i] + cnzi; } /* nnz is now stored in ci[ptm], column indices are in the list of free space */ /* Allocate space for cj, initialize cj, and */ /* destroy list of free space and other temporary array(s) */ ierr = PetscMalloc1(ci[pn]+1,&cj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); ierr = PetscFree(ptadenserow);CHKERRQ(ierr); ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); ierr = PetscCalloc1(ci[pn]+1,&ca);CHKERRQ(ierr); /* put together the new matrix */ ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),pn,pn,ci,cj,ca,C);CHKERRQ(ierr); ierr = MatSetBlockSizes(*C,PetscAbs(P->cmap->bs),PetscAbs(P->cmap->bs));CHKERRQ(ierr); /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ /* Since these are PETSc arrays, change flags to free them as necessary. */ c = (Mat_SeqAIJ*)((*C)->data); c->free_a = PETSC_TRUE; c->free_ij = PETSC_TRUE; c->nonew = 0; (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ_SparseAxpy; /* set MatInfo */ afill = (PetscReal)ci[pn]/(ai[am]+pi[pm] + 1.e-5); if (afill < 1.0) afill = 1.0; c->maxnz = ci[pn]; c->nz = ci[pn]; (*C)->info.mallocs = nspacedouble; (*C)->info.fill_ratio_given = fill; (*C)->info.fill_ratio_needed = afill; /* Clean up. */ ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); #if defined(PETSC_USE_INFO) if (ci[pn] != 0) { ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %g needed %g.\n",nspacedouble,(double)fill,(double)afill);CHKERRQ(ierr); ierr = PetscInfo1((*C),"Use MatPtAP(A,P,MatReuse,%g,&C) for best performance.\n",(double)afill);CHKERRQ(ierr); } else { ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); } #endif PetscFunctionReturn(0); }
static PetscErrorCode TSStep_Alpha(TS ts) { TS_Alpha *th = (TS_Alpha*)ts->data; PetscInt its,lits,reject; PetscReal next_time_step; SNESConvergedReason snesreason = SNES_CONVERGED_ITERATING; PetscErrorCode ierr; PetscFunctionBegin; if (ts->steps == 0) { ierr = VecSet(th->V0,0.0);CHKERRQ(ierr); } else { ierr = VecCopy(th->V1,th->V0);CHKERRQ(ierr); } ierr = VecCopy(ts->vec_sol,th->X0);CHKERRQ(ierr); next_time_step = ts->time_step; for (reject=0; reject<ts->max_reject; reject++,ts->reject++) { ts->time_step = next_time_step; th->stage_time = ts->ptime + th->Alpha_f*ts->time_step; th->shift = th->Alpha_m/(th->Alpha_f*th->Gamma*ts->time_step); ierr = TSPreStep(ts);CHKERRQ(ierr); ierr = TSPreStage(ts,th->stage_time);CHKERRQ(ierr); /* predictor */ ierr = VecCopy(th->X0,th->X1);CHKERRQ(ierr); /* solve R(X,V) = 0 */ ierr = SNESSolve(ts->snes,PETSC_NULL,th->X1);CHKERRQ(ierr); /* V1 = (1-1/Gamma)*V0 + 1/(Gamma*dT)*(X1-X0) */ ierr = VecWAXPY(th->V1,-1,th->X0,th->X1);CHKERRQ(ierr); ierr = VecAXPBY(th->V1,1-1/th->Gamma,1/(th->Gamma*ts->time_step),th->V0);CHKERRQ(ierr); /* nonlinear solve convergence */ ierr = SNESGetConvergedReason(ts->snes,&snesreason);CHKERRQ(ierr); if (snesreason < 0 && !th->adapt) break; ierr = SNESGetIterationNumber(ts->snes,&its);CHKERRQ(ierr); ierr = SNESGetLinearSolveIterations(ts->snes,&lits);CHKERRQ(ierr); ts->snes_its += its; ts->ksp_its += lits; ierr = PetscInfo3(ts,"step=%D, nonlinear solve iterations=%D, linear solve iterations=%D\n",ts->steps,its,lits);CHKERRQ(ierr); /* time step adaptativity */ if (!th->adapt) break; else { PetscReal t1 = ts->ptime + ts->time_step; PetscBool stepok = (reject==0) ? PETSC_TRUE : PETSC_FALSE; ierr = th->adapt(ts,t1,th->X1,th->V1,&next_time_step,&stepok,th->adaptctx);CHKERRQ(ierr); ierr = PetscInfo5(ts,"Step %D (t=%G,dt=%G) %s, next dt=%G\n",ts->steps,ts->ptime,ts->time_step,stepok?"accepted":"rejected",next_time_step);CHKERRQ(ierr); if (stepok) break; } } if (snesreason < 0 && ts->max_snes_failures > 0 && ++ts->num_snes_failures >= ts->max_snes_failures) { ts->reason = TS_DIVERGED_NONLINEAR_SOLVE; ierr = PetscInfo2(ts,"Step=%D, nonlinear solve solve failures %D greater than current TS allowed, stopping solve\n",ts->steps,ts->num_snes_failures);CHKERRQ(ierr); PetscFunctionReturn(0); } if (reject >= ts->max_reject) { ts->reason = TS_DIVERGED_STEP_REJECTED; ierr = PetscInfo2(ts,"Step=%D, step rejections %D greater than current TS allowed, stopping solve\n",ts->steps,reject);CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = VecCopy(th->X1,ts->vec_sol);CHKERRQ(ierr); ts->ptime += ts->time_step; ts->time_step = next_time_step; ts->steps++; PetscFunctionReturn(0); }
/*@C KSPConvergedDefault - Determines convergence of the linear iterative solvers by default Collective on KSP Input Parameters: + ksp - iterative context . n - iteration number . rnorm - residual norm (may be estimated, depending on the method may be the preconditioned residual norm) - ctx - convergence context which must be created by KSPConvergedDefaultCreate() Output Parameter: + positive - if the iteration has converged; . negative - if residual norm exceeds divergence threshold; - 0 - otherwise. Notes: KSPConvergedDefault() reaches convergence when rnorm < MAX (rtol * rnorm_0, abstol); Divergence is detected if rnorm > dtol * rnorm_0, where: + rtol = relative tolerance, . abstol = absolute tolerance. . dtol = divergence tolerance, - rnorm_0 is the two norm of the right hand side. When initial guess is non-zero you can call KSPConvergedDefaultSetUIRNorm() to use the norm of (b - A*(initial guess)) as the starting point for relative norm convergence testing, that is as rnorm_0 Use KSPSetTolerances() to alter the defaults for rtol, abstol, dtol. Use KSPSetNormType() (or -ksp_norm_type <none,preconditioned,unpreconditioned,natural>) to change the norm used for computing rnorm The precise values of reason are macros such as KSP_CONVERGED_RTOL, which are defined in petscksp.h. This routine is used by KSP by default so the user generally never needs call it directly. Use KSPSetConvergenceTest() to provide your own test instead of using this one. Level: intermediate .keywords: KSP, default, convergence, residual .seealso: KSPSetConvergenceTest(), KSPSetTolerances(), KSPConvergedSkip(), KSPConvergedReason, KSPGetConvergedReason(), KSPConvergedDefaultSetUIRNorm(), KSPConvergedDefaultSetUMIRNorm(), KSPConvergedDefaultCreate(), KSPConvergedDefaultDestroy() @*/ PetscErrorCode KSPConvergedDefault(KSP ksp,PetscInt n,PetscReal rnorm,KSPConvergedReason *reason,void *ctx) { PetscErrorCode ierr; KSPConvergedDefaultCtx *cctx = (KSPConvergedDefaultCtx*) ctx; KSPNormType normtype; PetscFunctionBegin; PetscValidHeaderSpecific(ksp,KSP_CLASSID,1); PetscValidPointer(reason,4); *reason = KSP_CONVERGED_ITERATING; ierr = KSPGetNormType(ksp,&normtype);CHKERRQ(ierr); if (normtype == KSP_NORM_NONE) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_WRONGSTATE,"Use KSPConvergedSkip() with KSPNormType of KSP_NORM_NONE"); if (!cctx) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_NULL,"Convergence context must have been created with KSPConvergedDefaultCreate()"); if (!n) { /* if user gives initial guess need to compute norm of b */ if (!ksp->guess_zero && !cctx->initialrtol) { PetscReal snorm; if (ksp->normtype == KSP_NORM_UNPRECONDITIONED || ksp->pc_side == PC_RIGHT) { ierr = PetscInfo(ksp,"user has provided nonzero initial guess, computing 2-norm of RHS\n");CHKERRQ(ierr); ierr = VecNorm(ksp->vec_rhs,NORM_2,&snorm);CHKERRQ(ierr); /* <- b'*b */ } else { Vec z; /* Should avoid allocating the z vector each time but cannot stash it in cctx because if KSPReset() is called the vector size might change */ ierr = VecDuplicate(ksp->vec_rhs,&z);CHKERRQ(ierr); ierr = KSP_PCApply(ksp,ksp->vec_rhs,z);CHKERRQ(ierr); if (ksp->normtype == KSP_NORM_PRECONDITIONED) { ierr = PetscInfo(ksp,"user has provided nonzero initial guess, computing 2-norm of preconditioned RHS\n");CHKERRQ(ierr); ierr = VecNorm(z,NORM_2,&snorm);CHKERRQ(ierr); /* dp <- b'*B'*B*b */ } else if (ksp->normtype == KSP_NORM_NATURAL) { PetscScalar norm; ierr = PetscInfo(ksp,"user has provided nonzero initial guess, computing natural norm of RHS\n");CHKERRQ(ierr); ierr = VecDot(ksp->vec_rhs,z,&norm);CHKERRQ(ierr); snorm = PetscSqrtReal(PetscAbsScalar(norm)); /* dp <- b'*B*b */ } ierr = VecDestroy(&z);CHKERRQ(ierr); } /* handle special case of zero RHS and nonzero guess */ if (!snorm) { ierr = PetscInfo(ksp,"Special case, user has provided nonzero initial guess and zero RHS\n");CHKERRQ(ierr); snorm = rnorm; } if (cctx->mininitialrtol) ksp->rnorm0 = PetscMin(snorm,rnorm); else ksp->rnorm0 = snorm; } else { ksp->rnorm0 = rnorm; } ksp->ttol = PetscMax(ksp->rtol*ksp->rnorm0,ksp->abstol); } if (n <= ksp->chknorm) PetscFunctionReturn(0); if (PetscIsInfOrNanReal(rnorm)) { ierr = PetscInfo(ksp,"Linear solver has created a not a number (NaN) as the residual norm, declaring divergence \n");CHKERRQ(ierr); *reason = KSP_DIVERGED_NANORINF; } else if (rnorm <= ksp->ttol) { if (rnorm < ksp->abstol) { ierr = PetscInfo3(ksp,"Linear solver has converged. Residual norm %14.12e is less than absolute tolerance %14.12e at iteration %D\n",(double)rnorm,(double)ksp->abstol,n);CHKERRQ(ierr); *reason = KSP_CONVERGED_ATOL; } else { if (cctx->initialrtol) { ierr = PetscInfo4(ksp,"Linear solver has converged. Residual norm %14.12e is less than relative tolerance %14.12e times initial residual norm %14.12e at iteration %D\n",(double)rnorm,(double)ksp->rtol,(double)ksp->rnorm0,n);CHKERRQ(ierr); } else { ierr = PetscInfo4(ksp,"Linear solver has converged. Residual norm %14.12e is less than relative tolerance %14.12e times initial right hand side norm %14.12e at iteration %D\n",(double)rnorm,(double)ksp->rtol,(double)ksp->rnorm0,n);CHKERRQ(ierr); } *reason = KSP_CONVERGED_RTOL; } } else if (rnorm >= ksp->divtol*ksp->rnorm0) { ierr = PetscInfo3(ksp,"Linear solver is diverging. Initial right hand size norm %14.12e, current residual norm %14.12e at iteration %D\n",(double)ksp->rnorm0,(double)rnorm,n);CHKERRQ(ierr); *reason = KSP_DIVERGED_DTOL; } PetscFunctionReturn(0); }
/* SNESMatrixFreeMult2_Private - Default matrix-free form for Jacobian-vector product, y = F'(u)*a: y = (F(u + ha) - F(u)) /h, where F = nonlinear function, as set by SNESSetFunction() u = current iterate h = difference interval */ PetscErrorCode SNESMatrixFreeMult2_Private(Mat mat,Vec a,Vec y) { MFCtx_Private *ctx; SNES snes; PetscReal h,norm,sum,umin,noise; PetscScalar hs,dot; Vec w,U,F; PetscErrorCode ierr,(*eval_fct)(SNES,Vec,Vec); MPI_Comm comm; PetscInt iter; PetscFunctionBegin; /* We log matrix-free matrix-vector products separately, so that we can separate the performance monitoring from the cases that use conventional storage. We may eventually modify event logging to associate events with particular objects, hence alleviating the more general problem. */ ierr = PetscLogEventBegin(MATMFFD_Mult,a,y,0,0);CHKERRQ(ierr); ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); ierr = MatShellGetContext(mat,(void**)&ctx);CHKERRQ(ierr); snes = ctx->snes; w = ctx->w; umin = ctx->umin; ierr = SNESGetSolution(snes,&U);CHKERRQ(ierr); eval_fct = SNESComputeFunction; ierr = SNESGetFunction(snes,&F,NULL,NULL);CHKERRQ(ierr); /* Determine a "good" step size, h */ if (ctx->need_h) { /* Use Jorge's method to compute h */ if (ctx->jorge) { ierr = SNESDiffParameterCompute_More(snes,ctx->data,U,a,&noise,&h);CHKERRQ(ierr); /* Use the Brown/Saad method to compute h */ } else { /* Compute error if desired */ ierr = SNESGetIterationNumber(snes,&iter);CHKERRQ(ierr); if ((ctx->need_err) || ((ctx->compute_err_freq) && (ctx->compute_err_iter != iter) && (!((iter-1)%ctx->compute_err_freq)))) { /* Use Jorge's method to compute noise */ ierr = SNESDiffParameterCompute_More(snes,ctx->data,U,a,&noise,&h);CHKERRQ(ierr); ctx->error_rel = PetscSqrtReal(noise); ierr = PetscInfo3(snes,"Using Jorge's noise: noise=%g, sqrt(noise)=%g, h_more=%g\n",(double)noise,(double)ctx->error_rel,(double)h);CHKERRQ(ierr); ctx->compute_err_iter = iter; ctx->need_err = PETSC_FALSE; } ierr = VecDotBegin(U,a,&dot);CHKERRQ(ierr); ierr = VecNormBegin(a,NORM_1,&sum);CHKERRQ(ierr); ierr = VecNormBegin(a,NORM_2,&norm);CHKERRQ(ierr); ierr = VecDotEnd(U,a,&dot);CHKERRQ(ierr); ierr = VecNormEnd(a,NORM_1,&sum);CHKERRQ(ierr); ierr = VecNormEnd(a,NORM_2,&norm);CHKERRQ(ierr); /* Safeguard for step sizes too small */ if (sum == 0.0) { dot = 1.0; norm = 1.0; } else if (PetscAbsScalar(dot) < umin*sum && PetscRealPart(dot) >= 0.0) dot = umin*sum; else if (PetscAbsScalar(dot) < 0.0 && PetscRealPart(dot) > -umin*sum) dot = -umin*sum; h = PetscRealPart(ctx->error_rel*dot/(norm*norm)); } } else h = ctx->h; if (!ctx->jorge || !ctx->need_h) {ierr = PetscInfo1(snes,"h = %g\n",(double)h);CHKERRQ(ierr);} /* Evaluate function at F(u + ha) */ hs = h; ierr = VecWAXPY(w,hs,a,U);CHKERRQ(ierr); ierr = eval_fct(snes,w,y);CHKERRQ(ierr); ierr = VecAXPY(y,-1.0,F);CHKERRQ(ierr); ierr = VecScale(y,1.0/hs);CHKERRQ(ierr); if (ctx->sp) {ierr = MatNullSpaceRemove(ctx->sp,y);CHKERRQ(ierr);} ierr = PetscLogEventEnd(MATMFFD_Mult,a,y,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }