PetscErrorCode ComputeMatrix(KSP ksp, Mat J,Mat jac, void *ctx) { UserContext *user = (UserContext*)ctx; PetscErrorCode ierr; PetscInt i,j,mx,my,xm,ym,xs,ys,num, numi, numj; PetscScalar v[5],Hx,Hy,HydHx,HxdHy; MatStencil row, col[5]; DM da; PetscFunctionBeginUser; ierr = KSPGetDM(ksp,&da);CHKERRQ(ierr); ierr = DMDAGetInfo(da,0,&mx,&my,0,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr); Hx = 1.0 / (PetscReal)(mx); Hy = 1.0 / (PetscReal)(my); HxdHy = Hx/Hy; HydHx = Hy/Hx; ierr = DMDAGetCorners(da,&xs,&ys,0,&xm,&ym,0);CHKERRQ(ierr); for (j=ys; j<ys+ym; j++) { for (i=xs; i<xs+xm; i++) { row.i = i; row.j = j; if (i==0 || j==0 || i==mx-1 || j==my-1) { if (user->bcType == DIRICHLET) { v[0] = 2.0*(HxdHy + HydHx); ierr = MatSetValuesStencil(jac,1,&row,1,&row,v,INSERT_VALUES);CHKERRQ(ierr); SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Dirichlet boundary conditions not supported !\n"); } else if (user->bcType == NEUMANN) { num = 0; numi=0; numj=0; if (j!=0) { v[num] = -HxdHy; col[num].i = i; col[num].j = j-1; num++; numj++; } if (i!=0) { v[num] = -HydHx; col[num].i = i-1; col[num].j = j; num++; numi++; } if (i!=mx-1) { v[num] = -HydHx; col[num].i = i+1; col[num].j = j; num++; numi++; } if (j!=my-1) { v[num] = -HxdHy; col[num].i = i; col[num].j = j+1; num++; numj++; } v[num] = (PetscReal)(numj)*HxdHy + (PetscReal)(numi)*HydHx; col[num].i = i; col[num].j = j; num++; ierr = MatSetValuesStencil(jac,1,&row,num,col,v,INSERT_VALUES);CHKERRQ(ierr); } } else { v[0] = -HxdHy; col[0].i = i; col[0].j = j-1; v[1] = -HydHx; col[1].i = i-1; col[1].j = j; v[2] = 2.0*(HxdHy + HydHx); col[2].i = i; col[2].j = j; v[3] = -HydHx; col[3].i = i+1; col[3].j = j; v[4] = -HxdHy; col[4].i = i; col[4].j = j+1; ierr = MatSetValuesStencil(jac,1,&row,5,col,v,INSERT_VALUES);CHKERRQ(ierr); } } } ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (user->bcType == NEUMANN) { MatNullSpace nullspace; ierr = MatNullSpaceCreate(PETSC_COMM_WORLD,PETSC_TRUE,0,0,&nullspace);CHKERRQ(ierr); ierr = MatSetNullSpace(J,nullspace);CHKERRQ(ierr); ierr = MatNullSpaceDestroy(&nullspace);CHKERRQ(ierr); } PetscFunctionReturn(0); }

PetscInt main(PetscInt argc,char **args) { ptrdiff_t N0=256,N1=256,N2=256,N3=2,dim[4]; fftw_plan bplan,fplan; fftw_complex *out; double *in1,*in2; ptrdiff_t alloc_local,local_n0,local_0_start; ptrdiff_t local_n1,local_1_start; PetscInt i,j,indx,n1; PetscInt size,rank,n,N,*in,N_factor,NM; PetscScalar *data_fin,value1,one=1.57,zero=0.0; PetscScalar a,*x_arr,*y_arr,*z_arr,enorm; Vec fin,fout,fout1,ini,final; PetscRandom rnd; PetscErrorCode ierr; VecScatter vecscat,vecscat1; IS indx1,indx2; PetscInt *indx3,k,l,*indx4; PetscInt low,tempindx,tempindx1; ierr = PetscInitialize(&argc,&args,(char*)0,help);CHKERRQ(ierr); #if defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires real numbers. Your current scalar type is complex"); #endif ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); PetscRandomCreate(PETSC_COMM_WORLD,&rnd); alloc_local = fftw_mpi_local_size_3d_transposed(N0,N1,N2/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); /* printf("The value alloc_local is %ld from process %d\n",alloc_local,rank); */ printf("The value local_n0 is %ld from process %d\n",local_n0,rank); /* printf("The value local_0_start is %ld from process %d\n",local_0_start,rank);*/ /* printf("The value local_n1 is %ld from process %d\n",local_n1,rank); */ /* printf("The value local_1_start is %ld from process %d\n",local_1_start,rank);*/ /* Allocate space for input and output arrays */ in1=(double*)fftw_malloc(sizeof(double)*alloc_local*2); in2=(double*)fftw_malloc(sizeof(double)*alloc_local*2); out=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); N=2*N0*N1*(N2/2+1);N_factor=N0*N1*N2; n=2*local_n0*N1*(N2/2+1);n1=local_n1*N0*2*N1; /* printf("The value N is %d from process %d\n",N,rank); */ /* printf("The value n is %d from process %d\n",n,rank); */ /* printf("The value n1 is %d from process %d\n",n1,rank); */ /* Creating data vector and accompanying array with VeccreateMPIWithArray */ ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)in1,&fin);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)out,&fout);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)in2,&fout1);CHKERRQ(ierr); /* VecGetSize(fin,&size); */ /* printf("The size is %d\n",size); */ VecSet(fin,one); VecSet(fout,zero); VecSet(fout1,zero); VecAssemblyBegin(fin); VecAssemblyEnd(fin); /* VecView(fin,PETSC_VIEWER_STDOUT_WORLD); */ VecGetArray(fin,&x_arr); VecGetArray(fout1,&z_arr); VecGetArray(fout,&y_arr); fplan=fftw_mpi_plan_dft_r2c_3d(N0,N1,N2,(double*)x_arr,(fftw_complex*)y_arr,PETSC_COMM_WORLD,FFTW_ESTIMATE); bplan=fftw_mpi_plan_dft_c2r_3d(N0,N1,N2,(fftw_complex*)y_arr,(double*)z_arr,PETSC_COMM_WORLD,FFTW_ESTIMATE); fftw_execute(fplan); fftw_execute(bplan); VecRestoreArray(fin,&x_arr); VecRestoreArray(fout1,&z_arr); VecRestoreArray(fout,&y_arr); /* a = 1.0/(PetscReal)N_factor; */ /* ierr = VecScale(fout1,a);CHKERRQ(ierr); */ VecCreate(PETSC_COMM_WORLD,&ini); VecCreate(PETSC_COMM_WORLD,&final); VecSetSizes(ini,local_n0*N1*N2,N_factor); VecSetSizes(final,local_n0*N1*N2,N_factor); /* VecSetSizes(ini,PETSC_DECIDE,N_factor); */ /* VecSetSizes(final,PETSC_DECIDE,N_factor); */ VecSetFromOptions(ini); VecSetFromOptions(final); if (N2%2==0) NM=N2+2; else NM=N2+1; ierr = VecGetOwnershipRange(fin,&low,NULL); printf("The local index is %d from %d\n",low,rank); ierr = PetscMalloc(sizeof(PetscInt)*local_n0*N1*N2,&indx3); ierr = PetscMalloc(sizeof(PetscInt)*local_n0*N1*N2,&indx4); for (i=0; i<local_n0; i++) { for (j=0;j<N1;j++) { for (k=0;k<N2;k++) { tempindx = i*N1*N2 + j*N2 + k; tempindx1 = i*N1*NM + j*NM + k; indx3[tempindx]=local_0_start*N1*N2+tempindx; indx4[tempindx]=low+tempindx1; } /* printf("index3 %d from proc %d is \n",indx3[tempindx],rank); */ /* printf("index4 %d from proc %d is \n",indx4[tempindx],rank); */ } } VecGetValues(fin,local_n0*N1*N2,indx4,x_arr); VecSetValues(ini,local_n0*N1*N2,indx3,x_arr,INSERT_VALUES); VecAssemblyBegin(ini); VecAssemblyEnd(ini); VecGetValues(fout1,local_n0*N1*N2,indx4,y_arr); VecSetValues(final,local_n0*N1*N2,indx3,y_arr,INSERT_VALUES); VecAssemblyBegin(final); VecAssemblyEnd(final); printf("The local index value is %ld from %d",local_n0*N1*N2,rank); /* for (i=0;i<N0;i++) { for (j=0;j<N1;j++) { indx=i*N1*NM+j*NM; ISCreateStride(PETSC_COMM_WORLD,N2,indx,1,&indx1); indx=i*N1*N2+j*N2; ISCreateStride(PETSC_COMM_WORLD,N2,indx,1,&indx2); VecScatterCreate(fin,indx1,ini,indx2,&vecscat); VecScatterBegin(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); VecScatterEnd(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); VecScatterCreate(fout1,indx1,final,indx2,&vecscat1); VecScatterBegin(vecscat1,fout1,final,INSERT_VALUES,SCATTER_FORWARD); VecScatterEnd(vecscat1,fout1,final,INSERT_VALUES,SCATTER_FORWARD); } } */ a = 1.0/(PetscReal)N_factor; ierr = VecScale(fout1,a);CHKERRQ(ierr); ierr = VecScale(final,a);CHKERRQ(ierr); VecAssemblyBegin(ini); VecAssemblyEnd(ini); VecAssemblyBegin(final); VecAssemblyEnd(final); /* VecView(final,PETSC_VIEWER_STDOUT_WORLD); */ ierr = VecAXPY(final,-1.0,ini);CHKERRQ(ierr); ierr = VecNorm(final,NORM_1,&enorm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD," Error norm of |x - z| = %e\n",enorm);CHKERRQ(ierr); fftw_destroy_plan(fplan); fftw_destroy_plan(bplan); fftw_free(in1); ierr = VecDestroy(&fin);CHKERRQ(ierr); fftw_free(out); ierr = VecDestroy(&fout);CHKERRQ(ierr); fftw_free(in2); ierr = VecDestroy(&fout1);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }

static PetscErrorCode SNESMultiblockSetDefaults(SNES snes) { SNES_Multiblock *mb = (SNES_Multiblock *) snes->data; BlockDesc blocks = mb->blocks; PetscInt i; PetscErrorCode ierr; PetscFunctionBegin; if (!blocks) { if (snes->dm) { PetscBool dmcomposite; ierr = PetscObjectTypeCompare((PetscObject) snes->dm, DMCOMPOSITE, &dmcomposite);CHKERRQ(ierr); if (dmcomposite) { PetscInt nDM; IS *fields; ierr = PetscInfo(snes,"Setting up physics based multiblock solver using the embedded DM\n");CHKERRQ(ierr); ierr = DMCompositeGetNumberDM(snes->dm, &nDM);CHKERRQ(ierr); ierr = DMCompositeGetGlobalISs(snes->dm, &fields);CHKERRQ(ierr); for (i = 0; i < nDM; ++i) { char name[8]; ierr = PetscSNPrintf(name, sizeof(name), "%D", i);CHKERRQ(ierr); ierr = SNESMultiblockSetIS(snes, name, fields[i]);CHKERRQ(ierr); ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); } ierr = PetscFree(fields);CHKERRQ(ierr); } } else { PetscBool flg = PETSC_FALSE; PetscBool stokes = PETSC_FALSE; if (mb->bs <= 0) { if (snes->jacobian_pre) { ierr = MatGetBlockSize(snes->jacobian_pre, &mb->bs);CHKERRQ(ierr); } else { mb->bs = 1; } } ierr = PetscOptionsGetBool(((PetscObject) snes)->prefix, "-snes_multiblock_default", &flg, PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(((PetscObject) snes)->prefix, "-snes_multiblock_detect_saddle_point", &stokes, PETSC_NULL);CHKERRQ(ierr); if (stokes) { IS zerodiags, rest; PetscInt nmin, nmax; ierr = MatGetOwnershipRange(snes->jacobian_pre, &nmin, &nmax);CHKERRQ(ierr); ierr = MatFindZeroDiagonals(snes->jacobian_pre, &zerodiags);CHKERRQ(ierr); ierr = ISComplement(zerodiags, nmin, nmax, &rest);CHKERRQ(ierr); ierr = SNESMultiblockSetIS(snes, "0", rest);CHKERRQ(ierr); ierr = SNESMultiblockSetIS(snes, "1", zerodiags);CHKERRQ(ierr); ierr = ISDestroy(&zerodiags);CHKERRQ(ierr); ierr = ISDestroy(&rest);CHKERRQ(ierr); } else { if (!flg) { /* Allow user to set fields from command line, if bs was known at the time of SNESSetFromOptions_Multiblock() then it is set there. This is not ideal because we should only have options set in XXSetFromOptions(). */ ierr = SNESMultiblockSetFieldsRuntime_Private(snes);CHKERRQ(ierr); if (mb->defined) {ierr = PetscInfo(snes, "Blocks defined using the options database\n");CHKERRQ(ierr);} } if (flg || !mb->defined) { ierr = PetscInfo(snes, "Using default splitting of fields\n");CHKERRQ(ierr); for (i = 0; i < mb->bs; ++i) { char name[8]; ierr = PetscSNPrintf(name, sizeof(name), "%D", i);CHKERRQ(ierr); ierr = SNESMultiblockSetFields(snes, name, 1, &i);CHKERRQ(ierr); } mb->defaultblocks = PETSC_TRUE; } } } } else if (mb->numBlocks == 1) { if (blocks->is) { IS is2; PetscInt nmin, nmax; ierr = MatGetOwnershipRange(snes->jacobian_pre, &nmin, &nmax);CHKERRQ(ierr); ierr = ISComplement(blocks->is, nmin, nmax, &is2);CHKERRQ(ierr); ierr = SNESMultiblockSetIS(snes, "1", is2);CHKERRQ(ierr); ierr = ISDestroy(&is2);CHKERRQ(ierr); } else { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must provide at least two sets of fields to SNES multiblock"); } } if (mb->numBlocks < 2) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unhandled case, must have at least two blocks"); PetscFunctionReturn(0); }

int main(int argc,char **argv) { TS ts; /* time integration context */ Vec X; /* solution, residual vectors */ Mat J; /* Jacobian matrix */ PetscErrorCode ierr; PetscScalar *x; PetscReal ftime; PetscInt i,steps,nits,lits; PetscBool view_final; Ctx ctx; PetscInitialize(&argc,&argv,(char*)0,help); ctx.n = 3; ierr = PetscOptionsGetInt(NULL,NULL,"-n",&ctx.n,NULL);CHKERRQ(ierr); if (ctx.n < 2) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_ARG_OUTOFRANGE,"The dimension specified with -n must be at least 2"); view_final = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-view_final",&view_final,NULL);CHKERRQ(ierr); ctx.monitor_short = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-monitor_short",&ctx.monitor_short,NULL);CHKERRQ(ierr); /* Create Jacobian matrix data structure and state vector */ ierr = MatCreate(PETSC_COMM_WORLD,&J);CHKERRQ(ierr); ierr = MatSetSizes(J,PETSC_DECIDE,PETSC_DECIDE,ctx.n,ctx.n);CHKERRQ(ierr); ierr = MatSetFromOptions(J);CHKERRQ(ierr); ierr = MatSetUp(J);CHKERRQ(ierr); ierr = MatCreateVecs(J,&X,NULL);CHKERRQ(ierr); /* Create time integration context */ ierr = TSCreate(PETSC_COMM_WORLD,&ts);CHKERRQ(ierr); ierr = TSSetType(ts,TSPSEUDO);CHKERRQ(ierr); ierr = TSSetIFunction(ts,NULL,FormIFunction,&ctx);CHKERRQ(ierr); ierr = TSSetIJacobian(ts,J,J,FormIJacobian,&ctx);CHKERRQ(ierr); ierr = TSSetDuration(ts,1000,1e14);CHKERRQ(ierr); ierr = TSSetExactFinalTime(ts,TS_EXACTFINALTIME_STEPOVER);CHKERRQ(ierr); ierr = TSSetInitialTimeStep(ts,0.0,1e-3);CHKERRQ(ierr); ierr = TSMonitorSet(ts,MonitorObjective,&ctx,NULL);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Customize time integrator; set runtime options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSSetFromOptions(ts);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Evaluate initial guess; then solve nonlinear system - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = VecSet(X,0.0);CHKERRQ(ierr); ierr = VecGetArray(X,&x);CHKERRQ(ierr); #if 1 x[0] = 5.; x[1] = -5.; for (i=2; i<ctx.n; i++) x[i] = 5.; #else x[0] = 1.0; x[1] = 15.0; for (i=2; i<ctx.n; i++) x[i] = 10.0; #endif ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); ierr = TSSolve(ts,X);CHKERRQ(ierr); ierr = TSGetSolveTime(ts,&ftime);CHKERRQ(ierr); ierr = TSGetTimeStepNumber(ts,&steps);CHKERRQ(ierr); ierr = TSGetSNESIterations(ts,&nits);CHKERRQ(ierr); ierr = TSGetKSPIterations(ts,&lits);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Time integrator took (%D,%D,%D) iterations to reach final time %g\n",steps,nits,lits,(double)ftime);CHKERRQ(ierr); if (view_final) { ierr = VecView(X,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Free work space. All PETSc objects should be destroyed when they are no longer needed. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = VecDestroy(&X);CHKERRQ(ierr); ierr = MatDestroy(&J);CHKERRQ(ierr); ierr = TSDestroy(&ts);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }

static PetscErrorCode SNESLineSearchApply_BT(SNESLineSearch linesearch) { PetscBool changed_y,changed_w; PetscErrorCode ierr; Vec X,F,Y,W,G; SNES snes; PetscReal fnorm, xnorm, ynorm, gnorm; PetscReal lambda,lambdatemp,lambdaprev,minlambda,maxstep,initslope,alpha,stol; PetscReal t1,t2,a,b,d; PetscReal f; PetscReal g,gprev; PetscBool domainerror; PetscViewer monitor; PetscInt max_its,count; SNESLineSearch_BT *bt; Mat jac; PetscErrorCode (*objective)(SNES,Vec,PetscReal*,void*); PetscFunctionBegin; ierr = SNESLineSearchGetVecs(linesearch, &X, &F, &Y, &W, &G);CHKERRQ(ierr); ierr = SNESLineSearchGetNorms(linesearch, &xnorm, &fnorm, &ynorm);CHKERRQ(ierr); ierr = SNESLineSearchGetLambda(linesearch, &lambda);CHKERRQ(ierr); ierr = SNESLineSearchGetSNES(linesearch, &snes);CHKERRQ(ierr); ierr = SNESLineSearchGetMonitor(linesearch, &monitor);CHKERRQ(ierr); ierr = SNESLineSearchGetTolerances(linesearch,&minlambda,&maxstep,NULL,NULL,NULL,&max_its);CHKERRQ(ierr); ierr = SNESGetTolerances(snes,NULL,NULL,&stol,NULL,NULL);CHKERRQ(ierr); ierr = SNESGetObjective(snes,&objective,NULL);CHKERRQ(ierr); bt = (SNESLineSearch_BT*)linesearch->data; alpha = bt->alpha; ierr = SNESGetJacobian(snes, &jac, NULL, NULL, NULL);CHKERRQ(ierr); if (!jac && !objective) SETERRQ(PetscObjectComm((PetscObject)linesearch), PETSC_ERR_USER, "SNESLineSearchBT requires a Jacobian matrix"); /* precheck */ ierr = SNESLineSearchPreCheck(linesearch,X,Y,&changed_y);CHKERRQ(ierr); ierr = SNESLineSearchSetSuccess(linesearch, PETSC_TRUE);CHKERRQ(ierr); ierr = VecNormBegin(Y, NORM_2, &ynorm);CHKERRQ(ierr); ierr = VecNormBegin(X, NORM_2, &xnorm);CHKERRQ(ierr); ierr = VecNormEnd(Y, NORM_2, &ynorm);CHKERRQ(ierr); ierr = VecNormEnd(X, NORM_2, &xnorm);CHKERRQ(ierr); if (ynorm == 0.0) { if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search: Initial direction and size is 0\n");CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } ierr = VecCopy(X,W);CHKERRQ(ierr); ierr = VecCopy(F,G);CHKERRQ(ierr); ierr = SNESLineSearchSetNorms(linesearch,xnorm,fnorm,ynorm);CHKERRQ(ierr); ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); PetscFunctionReturn(0); } if (ynorm > maxstep) { /* Step too big, so scale back */ if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search: Scaling step by %14.12e old ynorm %14.12e\n", (double)(maxstep/ynorm),(double)ynorm);CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } ierr = VecScale(Y,maxstep/(ynorm));CHKERRQ(ierr); ynorm = maxstep; } /* if the SNES has an objective set, use that instead of the function value */ if (objective) { ierr = SNESComputeObjective(snes,X,&f);CHKERRQ(ierr); } else { f = fnorm*fnorm; } /* compute the initial slope */ if (objective) { /* slope comes from the function (assumed to be the gradient of the objective */ ierr = VecDotRealPart(Y,F,&initslope);CHKERRQ(ierr); } else { /* slope comes from the normal equations */ ierr = MatMult(jac,Y,W);CHKERRQ(ierr); ierr = VecDotRealPart(F,W,&initslope);CHKERRQ(ierr); if (initslope > 0.0) initslope = -initslope; if (initslope == 0.0) initslope = -1.0; } ierr = VecWAXPY(W,-lambda,Y,X);CHKERRQ(ierr); if (linesearch->ops->viproject) { ierr = (*linesearch->ops->viproject)(snes, W);CHKERRQ(ierr); } if (snes->nfuncs >= snes->max_funcs) { ierr = PetscInfo(snes,"Exceeded maximum function evaluations, while checking full step length!\n");CHKERRQ(ierr); snes->reason = SNES_DIVERGED_FUNCTION_COUNT; ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); PetscFunctionReturn(0); } if (objective) { ierr = SNESComputeObjective(snes,W,&g);CHKERRQ(ierr); } else { ierr = (*linesearch->ops->snesfunc)(snes,W,G);CHKERRQ(ierr); ierr = SNESGetFunctionDomainError(snes, &domainerror);CHKERRQ(ierr); if (domainerror) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); PetscFunctionReturn(0); } if (linesearch->ops->vinorm) { gnorm = fnorm; ierr = (*linesearch->ops->vinorm)(snes, G, W, &gnorm);CHKERRQ(ierr); } else { ierr = VecNorm(G,NORM_2,&gnorm);CHKERRQ(ierr); } g = PetscSqr(gnorm); } if (PetscIsInfOrNanReal(g)) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); ierr = PetscInfo(monitor,"Aborted due to Nan or Inf in function evaluation\n");CHKERRQ(ierr); PetscFunctionReturn(0); } if (!objective) { ierr = PetscInfo2(snes,"Initial fnorm %14.12e gnorm %14.12e\n", (double)fnorm, (double)gnorm);CHKERRQ(ierr); } if (.5*g <= .5*f + lambda*alpha*initslope) { /* Sufficient reduction or step tolerance convergence */ if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); if (!objective) { ierr = PetscViewerASCIIPrintf(monitor," Line search: Using full step: fnorm %14.12e gnorm %14.12e\n", (double)fnorm, (double)gnorm);CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(monitor," Line search: Using full step: obj0 %14.12e obj %14.12e\n", (double)f, (double)g);CHKERRQ(ierr); } ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } } else { /* Since the full step didn't work and the step is tiny, quit */ if (stol*xnorm > ynorm) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); ierr = PetscInfo2(monitor,"Aborted due to ynorm < stol*xnorm (%14.12e < %14.12e) and inadequate full step.\n",(double)ynorm,(double)stol*xnorm);CHKERRQ(ierr); PetscFunctionReturn(0); } /* Fit points with quadratic */ lambdatemp = -initslope/(g - f - 2.0*lambda*initslope); lambdaprev = lambda; gprev = g; if (lambdatemp > .5*lambda) lambdatemp = .5*lambda; if (lambdatemp <= .1*lambda) lambda = .1*lambda; else lambda = lambdatemp; ierr = VecWAXPY(W,-lambda,Y,X);CHKERRQ(ierr); if (linesearch->ops->viproject) { ierr = (*linesearch->ops->viproject)(snes, W);CHKERRQ(ierr); } if (snes->nfuncs >= snes->max_funcs) { ierr = PetscInfo1(snes,"Exceeded maximum function evaluations, while attempting quadratic backtracking! %D \n",snes->nfuncs);CHKERRQ(ierr); snes->reason = SNES_DIVERGED_FUNCTION_COUNT; ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); PetscFunctionReturn(0); } if (objective) { ierr = SNESComputeObjective(snes,W,&g);CHKERRQ(ierr); } else { ierr = (*linesearch->ops->snesfunc)(snes,W,G);CHKERRQ(ierr); ierr = SNESGetFunctionDomainError(snes, &domainerror);CHKERRQ(ierr); if (domainerror) PetscFunctionReturn(0); if (linesearch->ops->vinorm) { gnorm = fnorm; ierr = (*linesearch->ops->vinorm)(snes, G, W, &gnorm);CHKERRQ(ierr); } else { ierr = VecNorm(G,NORM_2,&gnorm);CHKERRQ(ierr); } g = PetscSqr(gnorm); } if (PetscIsInfOrNanReal(g)) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); ierr = PetscInfo(monitor,"Aborted due to Nan or Inf in function evaluation\n");CHKERRQ(ierr); PetscFunctionReturn(0); } if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); if (!objective) { ierr = PetscViewerASCIIPrintf(monitor," Line search: gnorm after quadratic fit %14.12e\n",(double)gnorm);CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(monitor," Line search: obj after quadratic fit %14.12e\n",(double)g);CHKERRQ(ierr); } ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } if (.5*g < .5*f + lambda*alpha*initslope) { /* sufficient reduction */ if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search: Quadratically determined step, lambda=%18.16e\n",(double)lambda);CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } } else { /* Fit points with cubic */ for (count = 0; count < max_its; count++) { if (lambda <= minlambda) { if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search: unable to find good step length! After %D tries \n",count);CHKERRQ(ierr); if (!objective) { ierr = PetscViewerASCIIPrintf(monitor, " Line search: fnorm=%18.16e, gnorm=%18.16e, ynorm=%18.16e, minlambda=%18.16e, lambda=%18.16e, initial slope=%18.16e\n", (double)fnorm, (double)gnorm, (double)ynorm, (double)minlambda, (double)lambda, (double)initslope);CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(monitor, " Line search: obj(0)=%18.16e, obj=%18.16e, ynorm=%18.16e, minlambda=%18.16e, lambda=%18.16e, initial slope=%18.16e\n", (double)f, (double)g, (double)ynorm, (double)minlambda, (double)lambda, (double)initslope);CHKERRQ(ierr); } ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); PetscFunctionReturn(0); } if (linesearch->order == SNES_LINESEARCH_ORDER_CUBIC) { t1 = .5*(g - f) - lambda*initslope; t2 = .5*(gprev - f) - lambdaprev*initslope; a = (t1/(lambda*lambda) - t2/(lambdaprev*lambdaprev))/(lambda-lambdaprev); b = (-lambdaprev*t1/(lambda*lambda) + lambda*t2/(lambdaprev*lambdaprev))/(lambda-lambdaprev); d = b*b - 3*a*initslope; if (d < 0.0) d = 0.0; if (a == 0.0) lambdatemp = -initslope/(2.0*b); else lambdatemp = (-b + PetscSqrtReal(d))/(3.0*a); } else if (linesearch->order == SNES_LINESEARCH_ORDER_QUADRATIC) { lambdatemp = -initslope/(g - f - 2.0*initslope); } else SETERRQ(PetscObjectComm((PetscObject)linesearch), PETSC_ERR_SUP, "unsupported line search order for type bt"); lambdaprev = lambda; gprev = g; if (lambdatemp > .5*lambda) lambdatemp = .5*lambda; if (lambdatemp <= .1*lambda) lambda = .1*lambda; else lambda = lambdatemp; ierr = VecWAXPY(W,-lambda,Y,X);CHKERRQ(ierr); if (linesearch->ops->viproject) { ierr = (*linesearch->ops->viproject)(snes,W);CHKERRQ(ierr); } if (snes->nfuncs >= snes->max_funcs) { ierr = PetscInfo1(snes,"Exceeded maximum function evaluations, while looking for good step length! %D \n",count);CHKERRQ(ierr); if (!objective) { ierr = PetscInfo5(snes,"fnorm=%18.16e, gnorm=%18.16e, ynorm=%18.16e, lambda=%18.16e, initial slope=%18.16e\n", (double)fnorm,(double)gnorm,(double)ynorm,(double)lambda,(double)initslope);CHKERRQ(ierr); } ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); snes->reason = SNES_DIVERGED_FUNCTION_COUNT; PetscFunctionReturn(0); } if (objective) { ierr = SNESComputeObjective(snes,W,&g);CHKERRQ(ierr); } else { ierr = (*linesearch->ops->snesfunc)(snes,W,G);CHKERRQ(ierr); ierr = SNESGetFunctionDomainError(snes, &domainerror);CHKERRQ(ierr); if (domainerror) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); PetscFunctionReturn(0); } if (linesearch->ops->vinorm) { gnorm = fnorm; ierr = (*linesearch->ops->vinorm)(snes, G, W, &gnorm);CHKERRQ(ierr); } else { ierr = VecNorm(G,NORM_2,&gnorm);CHKERRQ(ierr); } g = PetscSqr(gnorm); } if (PetscIsInfOrNanReal(gnorm)) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); ierr = PetscInfo(monitor,"Aborted due to Nan or Inf in function evaluation\n");CHKERRQ(ierr); PetscFunctionReturn(0); } if (.5*g < .5*f + lambda*alpha*initslope) { /* is reduction enough? */ if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); if (!objective) { if (linesearch->order == SNES_LINESEARCH_ORDER_CUBIC) { ierr = PetscViewerASCIIPrintf(monitor," Line search: Cubically determined step, current gnorm %14.12e lambda=%18.16e\n",(double)gnorm,(double)lambda);CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(monitor," Line search: Quadratically determined step, current gnorm %14.12e lambda=%18.16e\n",(double)gnorm,(double)lambda);CHKERRQ(ierr); } ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } else { if (linesearch->order == SNES_LINESEARCH_ORDER_CUBIC) { ierr = PetscViewerASCIIPrintf(monitor," Line search: Cubically determined step, obj %14.12e lambda=%18.16e\n",(double)g,(double)lambda);CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(monitor," Line search: Quadratically determined step, obj %14.12e lambda=%18.16e\n",(double)g,(double)lambda);CHKERRQ(ierr); } ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } } break; } else if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); if (!objective) { if (linesearch->order == SNES_LINESEARCH_ORDER_CUBIC) { ierr = PetscViewerASCIIPrintf(monitor," Line search: Cubic step no good, shrinking lambda, current gnorm %12.12e lambda=%18.16e\n",(double)gnorm,(double)lambda);CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(monitor," Line search: Quadratic step no good, shrinking lambda, current gnorm %12.12e lambda=%18.16e\n",(double)gnorm,(double)lambda);CHKERRQ(ierr); } ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } else { if (linesearch->order == SNES_LINESEARCH_ORDER_CUBIC) { ierr = PetscViewerASCIIPrintf(monitor," Line search: Cubic step no good, shrinking lambda, obj %12.12e lambda=%18.16e\n",(double)g,(double)lambda);CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(monitor," Line search: Quadratic step no good, shrinking lambda, obj %12.12e lambda=%18.16e\n",(double)g,(double)lambda);CHKERRQ(ierr); } ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } } } } } /* postcheck */ ierr = SNESLineSearchPostCheck(linesearch,X,Y,W,&changed_y,&changed_w);CHKERRQ(ierr); if (changed_y) { ierr = VecWAXPY(W,-lambda,Y,X);CHKERRQ(ierr); if (linesearch->ops->viproject) { ierr = (*linesearch->ops->viproject)(snes, W);CHKERRQ(ierr); } } if (changed_y || changed_w || objective) { /* recompute the function norm if the step has changed or the objective isn't the norm */ ierr = (*linesearch->ops->snesfunc)(snes,W,G);CHKERRQ(ierr); ierr = SNESGetFunctionDomainError(snes, &domainerror);CHKERRQ(ierr); if (domainerror) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); PetscFunctionReturn(0); } if (linesearch->ops->vinorm) { gnorm = fnorm; ierr = (*linesearch->ops->vinorm)(snes, G, W, &gnorm);CHKERRQ(ierr); } else { ierr = VecNorm(G,NORM_2,&gnorm);CHKERRQ(ierr); } ierr = VecNorm(Y,NORM_2,&ynorm);CHKERRQ(ierr); if (PetscIsInfOrNanReal(gnorm)) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); ierr = PetscInfo(monitor,"Aborted due to Nan or Inf in function evaluation\n");CHKERRQ(ierr); PetscFunctionReturn(0); } } /* copy the solution over */ ierr = VecCopy(W, X);CHKERRQ(ierr); ierr = VecCopy(G, F);CHKERRQ(ierr); ierr = VecNorm(X, NORM_2, &xnorm);CHKERRQ(ierr); ierr = SNESLineSearchSetLambda(linesearch, lambda);CHKERRQ(ierr); ierr = SNESLineSearchSetNorms(linesearch, xnorm, gnorm, ynorm);CHKERRQ(ierr); PetscFunctionReturn(0); }

static PetscErrorCode KSPCGSolve_NASH(KSP ksp) { #if defined(PETSC_USE_COMPLEX) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP, "NASH is not available for complex systems"); #else KSPCG_NASH *cg = (KSPCG_NASH*)ksp->data; PetscErrorCode ierr; Mat Qmat, Mmat; Vec r, z, p, d; PC pc; PetscReal norm_r, norm_d, norm_dp1, norm_p, dMp; PetscReal alpha, beta, kappa, rz, rzm1; PetscReal rr, r2, step; PetscInt max_cg_its; PetscBool diagonalscale; PetscFunctionBegin; /***************************************************************************/ /* Check the arguments and parameters. */ /***************************************************************************/ ierr = PCGetDiagonalScale(ksp->pc, &diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name); if (cg->radius < 0.0) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_OUTOFRANGE, "Input error: radius < 0"); /***************************************************************************/ /* Get the workspace vectors and initialize variables */ /***************************************************************************/ r2 = cg->radius * cg->radius; r = ksp->work[0]; z = ksp->work[1]; p = ksp->work[2]; d = ksp->vec_sol; pc = ksp->pc; ierr = PCGetOperators(pc, &Qmat, &Mmat);CHKERRQ(ierr); ierr = VecGetSize(d, &max_cg_its);CHKERRQ(ierr); max_cg_its = PetscMin(max_cg_its, ksp->max_it); ksp->its = 0; /***************************************************************************/ /* Initialize objective function and direction. */ /***************************************************************************/ cg->o_fcn = 0.0; ierr = VecSet(d, 0.0);CHKERRQ(ierr); /* d = 0 */ cg->norm_d = 0.0; /***************************************************************************/ /* Begin the conjugate gradient method. Check the right-hand side for */ /* numerical problems. The check for not-a-number and infinite values */ /* need be performed only once. */ /***************************************************************************/ ierr = VecCopy(ksp->vec_rhs, r);CHKERRQ(ierr); /* r = -grad */ ierr = VecDot(r, r, &rr);CHKERRQ(ierr); /* rr = r^T r */ KSPCheckDot(ksp,rr); /***************************************************************************/ /* Check the preconditioner for numerical problems and for positive */ /* definiteness. The check for not-a-number and infinite values need be */ /* performed only once. */ /***************************************************************************/ ierr = KSP_PCApply(ksp, r, z);CHKERRQ(ierr); /* z = inv(M) r */ ierr = VecDot(r, z, &rz);CHKERRQ(ierr); /* rz = r^T inv(M) r */ if (PetscIsInfOrNanScalar(rz)) { /*************************************************************************/ /* The preconditioner contains not-a-number or an infinite value. */ /* Return the gradient direction intersected with the trust region. */ /*************************************************************************/ ksp->reason = KSP_DIVERGED_NANORINF; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: bad preconditioner: rz=%g\n", (double)rz);CHKERRQ(ierr); if (cg->radius) { if (r2 >= rr) { alpha = 1.0; cg->norm_d = PetscSqrtReal(rr); } else { alpha = PetscSqrtReal(r2 / rr); cg->norm_d = cg->radius; } ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr); /* d = d + alpha r */ /***********************************************************************/ /* Compute objective function. */ /***********************************************************************/ ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr); ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr); ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr); cg->o_fcn = -cg->o_fcn; ++ksp->its; } PetscFunctionReturn(0); } if (rz < 0.0) { /*************************************************************************/ /* The preconditioner is indefinite. Because this is the first */ /* and we do not have a direction yet, we use the gradient step. Note */ /* that we cannot use the preconditioned norm when computing the step */ /* because the matrix is indefinite. */ /*************************************************************************/ ksp->reason = KSP_DIVERGED_INDEFINITE_PC; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: indefinite preconditioner: rz=%g\n", (double)rz);CHKERRQ(ierr); if (cg->radius) { if (r2 >= rr) { alpha = 1.0; cg->norm_d = PetscSqrtReal(rr); } else { alpha = PetscSqrtReal(r2 / rr); cg->norm_d = cg->radius; } ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr); /* d = d + alpha r */ /***********************************************************************/ /* Compute objective function. */ /***********************************************************************/ ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr); ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr); ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr); cg->o_fcn = -cg->o_fcn; ++ksp->its; } PetscFunctionReturn(0); } /***************************************************************************/ /* As far as we know, the preconditioner is positive semidefinite. */ /* Compute and log the residual. Check convergence because this */ /* initializes things, but do not terminate until at least one conjugate */ /* gradient iteration has been performed. */ /***************************************************************************/ switch (ksp->normtype) { case KSP_NORM_PRECONDITIONED: ierr = VecNorm(z, NORM_2, &norm_r);CHKERRQ(ierr); /* norm_r = |z| */ break; case KSP_NORM_UNPRECONDITIONED: norm_r = PetscSqrtReal(rr); /* norm_r = |r| */ break; case KSP_NORM_NATURAL: norm_r = PetscSqrtReal(rz); /* norm_r = |r|_M */ break; default: norm_r = 0.0; break; } ierr = KSPLogResidualHistory(ksp, norm_r);CHKERRQ(ierr); ierr = KSPMonitor(ksp, ksp->its, norm_r);CHKERRQ(ierr); ksp->rnorm = norm_r; ierr = (*ksp->converged)(ksp, ksp->its, norm_r, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); /***************************************************************************/ /* Compute the first direction and update the iteration. */ /***************************************************************************/ ierr = VecCopy(z, p);CHKERRQ(ierr); /* p = z */ ierr = KSP_MatMult(ksp, Qmat, p, z);CHKERRQ(ierr); /* z = Q * p */ ++ksp->its; /***************************************************************************/ /* Check the matrix for numerical problems. */ /***************************************************************************/ ierr = VecDot(p, z, &kappa);CHKERRQ(ierr); /* kappa = p^T Q p */ if (PetscIsInfOrNanScalar(kappa)) { /*************************************************************************/ /* The matrix produced not-a-number or an infinite value. In this case, */ /* we must stop and use the gradient direction. This condition need */ /* only be checked once. */ /*************************************************************************/ ksp->reason = KSP_DIVERGED_NANORINF; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: bad matrix: kappa=%g\n", (double)kappa);CHKERRQ(ierr); if (cg->radius) { if (r2 >= rr) { alpha = 1.0; cg->norm_d = PetscSqrtReal(rr); } else { alpha = PetscSqrtReal(r2 / rr); cg->norm_d = cg->radius; } ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr); /* d = d + alpha r */ /***********************************************************************/ /* Compute objective function. */ /***********************************************************************/ ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr); ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr); ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr); cg->o_fcn = -cg->o_fcn; ++ksp->its; } PetscFunctionReturn(0); } /***************************************************************************/ /* Initialize variables for calculating the norm of the direction. */ /***************************************************************************/ dMp = 0.0; norm_d = 0.0; switch (cg->dtype) { case NASH_PRECONDITIONED_DIRECTION: norm_p = rz; break; default: ierr = VecDot(p, p, &norm_p);CHKERRQ(ierr); break; } /***************************************************************************/ /* Check for negative curvature. */ /***************************************************************************/ if (kappa <= 0.0) { /*************************************************************************/ /* In this case, the matrix is indefinite and we have encountered a */ /* direction of negative curvature. Because negative curvature occurs */ /* during the first step, we must follow a direction. */ /*************************************************************************/ ksp->reason = KSP_CONVERGED_CG_NEG_CURVE; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: negative curvature: kappa=%g\n", (double)kappa);CHKERRQ(ierr); if (cg->radius && norm_p > 0.0) { /***********************************************************************/ /* Follow direction of negative curvature to the boundary of the */ /* trust region. */ /***********************************************************************/ step = PetscSqrtReal(r2 / norm_p); cg->norm_d = cg->radius; ierr = VecAXPY(d, step, p);CHKERRQ(ierr); /* d = d + step p */ /***********************************************************************/ /* Update objective function. */ /***********************************************************************/ cg->o_fcn += step * (0.5 * step * kappa - rz); } else if (cg->radius) { /***********************************************************************/ /* The norm of the preconditioned direction is zero; use the gradient */ /* step. */ /***********************************************************************/ if (r2 >= rr) { alpha = 1.0; cg->norm_d = PetscSqrtReal(rr); } else { alpha = PetscSqrtReal(r2 / rr); cg->norm_d = cg->radius; } ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr); /* d = d + alpha r */ /***********************************************************************/ /* Compute objective function. */ /***********************************************************************/ ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr); ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr); ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr); cg->o_fcn = -cg->o_fcn; ++ksp->its; } PetscFunctionReturn(0); } /***************************************************************************/ /* Run the conjugate gradient method until either the problem is solved, */ /* we encounter the boundary of the trust region, or the conjugate */ /* gradient method breaks down. */ /***************************************************************************/ while (1) { /*************************************************************************/ /* Know that kappa is nonzero, because we have not broken down, so we */ /* can compute the steplength. */ /*************************************************************************/ alpha = rz / kappa; /*************************************************************************/ /* Compute the steplength and check for intersection with the trust */ /* region. */ /*************************************************************************/ norm_dp1 = norm_d + alpha*(2.0*dMp + alpha*norm_p); if (cg->radius && norm_dp1 >= r2) { /***********************************************************************/ /* In this case, the matrix is positive definite as far as we know. */ /* However, the full step goes beyond the trust region. */ /***********************************************************************/ ksp->reason = KSP_CONVERGED_CG_CONSTRAINED; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: constrained step: radius=%g\n", (double)cg->radius);CHKERRQ(ierr); if (norm_p > 0.0) { /*********************************************************************/ /* Follow the direction to the boundary of the trust region. */ /*********************************************************************/ step = (PetscSqrtReal(dMp*dMp+norm_p*(r2-norm_d))-dMp)/norm_p; cg->norm_d = cg->radius; ierr = VecAXPY(d, step, p);CHKERRQ(ierr); /* d = d + step p */ /*********************************************************************/ /* Update objective function. */ /*********************************************************************/ cg->o_fcn += step * (0.5 * step * kappa - rz); } else { /*********************************************************************/ /* The norm of the direction is zero; there is nothing to follow. */ /*********************************************************************/ } break; } /*************************************************************************/ /* Now we can update the direction and residual. */ /*************************************************************************/ ierr = VecAXPY(d, alpha, p);CHKERRQ(ierr); /* d = d + alpha p */ ierr = VecAXPY(r, -alpha, z);CHKERRQ(ierr); /* r = r - alpha Q p */ ierr = KSP_PCApply(ksp, r, z);CHKERRQ(ierr); /* z = inv(M) r */ switch (cg->dtype) { case NASH_PRECONDITIONED_DIRECTION: norm_d = norm_dp1; break; default: ierr = VecDot(d, d, &norm_d);CHKERRQ(ierr); break; } cg->norm_d = PetscSqrtReal(norm_d); /*************************************************************************/ /* Update objective function. */ /*************************************************************************/ cg->o_fcn -= 0.5 * alpha * rz; /*************************************************************************/ /* Check that the preconditioner appears positive semidefinite. */ /*************************************************************************/ rzm1 = rz; ierr = VecDot(r, z, &rz);CHKERRQ(ierr); /* rz = r^T z */ if (rz < 0.0) { /***********************************************************************/ /* The preconditioner is indefinite. */ /***********************************************************************/ ksp->reason = KSP_DIVERGED_INDEFINITE_PC; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: cg indefinite preconditioner: rz=%g\n", (double)rz);CHKERRQ(ierr); break; } /*************************************************************************/ /* As far as we know, the preconditioner is positive semidefinite. */ /* Compute the residual and check for convergence. */ /*************************************************************************/ switch (ksp->normtype) { case KSP_NORM_PRECONDITIONED: ierr = VecNorm(z, NORM_2, &norm_r);CHKERRQ(ierr); /* norm_r = |z| */ break; case KSP_NORM_UNPRECONDITIONED: ierr = VecNorm(r, NORM_2, &norm_r);CHKERRQ(ierr); /* norm_r = |r| */ break; case KSP_NORM_NATURAL: norm_r = PetscSqrtReal(rz); /* norm_r = |r|_M */ break; default: norm_r = 0.; break; } ierr = KSPLogResidualHistory(ksp, norm_r);CHKERRQ(ierr); ierr = KSPMonitor(ksp, ksp->its, norm_r);CHKERRQ(ierr); ksp->rnorm = norm_r; ierr = (*ksp->converged)(ksp, ksp->its, norm_r, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { /***********************************************************************/ /* The method has converged. */ /***********************************************************************/ ierr = PetscInfo2(ksp, "KSPCGSolve_NASH: truncated step: rnorm=%g, radius=%g\n", (double)norm_r, (double)cg->radius);CHKERRQ(ierr); break; } /*************************************************************************/ /* We have not converged yet. Check for breakdown. */ /*************************************************************************/ beta = rz / rzm1; if (PetscAbsReal(beta) <= 0.0) { /***********************************************************************/ /* Conjugate gradients has broken down. */ /***********************************************************************/ ksp->reason = KSP_DIVERGED_BREAKDOWN; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: breakdown: beta=%g\n", (double)beta);CHKERRQ(ierr); break; } /*************************************************************************/ /* Check iteration limit. */ /*************************************************************************/ if (ksp->its >= max_cg_its) { ksp->reason = KSP_DIVERGED_ITS; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: iterlim: its=%D\n", ksp->its);CHKERRQ(ierr); break; } /*************************************************************************/ /* Update p and the norms. */ /*************************************************************************/ ierr = VecAYPX(p, beta, z);CHKERRQ(ierr); /* p = z + beta p */ switch (cg->dtype) { case NASH_PRECONDITIONED_DIRECTION: dMp = beta*(dMp + alpha*norm_p); norm_p = beta*(rzm1 + beta*norm_p); break; default: ierr = VecDot(d, p, &dMp);CHKERRQ(ierr); ierr = VecDot(p, p, &norm_p);CHKERRQ(ierr); break; } /*************************************************************************/ /* Compute the new direction and update the iteration. */ /*************************************************************************/ ierr = KSP_MatMult(ksp, Qmat, p, z);CHKERRQ(ierr); /* z = Q * p */ ierr = VecDot(p, z, &kappa);CHKERRQ(ierr); /* kappa = p^T Q p */ ++ksp->its; /*************************************************************************/ /* Check for negative curvature. */ /*************************************************************************/ if (kappa <= 0.0) { /***********************************************************************/ /* In this case, the matrix is indefinite and we have encountered */ /* a direction of negative curvature. Stop at the base. */ /***********************************************************************/ ksp->reason = KSP_CONVERGED_CG_NEG_CURVE; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: negative curvature: kappa=%g\n", (double)kappa);CHKERRQ(ierr); break; } } PetscFunctionReturn(0); #endif }

static PetscErrorCode ISCopy_Block(IS is,IS isy) { IS_Block *is_block = (IS_Block*)is->data,*isy_block = (IS_Block*)isy->data; PetscErrorCode ierr; PetscFunctionBegin; if (is_block->n != isy_block->n || is_block->N != isy_block->N || is->bs != isy->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Index sets incompatible"); isy_block->sorted = is_block->sorted; ierr = PetscMemcpy(isy_block->idx,is_block->idx,is_block->n*sizeof(PetscInt));CHKERRQ(ierr); PetscFunctionReturn(0); }

int main(int argc,char **args) { Mat A,B,C; PetscInt i,j,k,m = 3,n = 3,bs = 1; PetscErrorCode ierr; PetscMPIInt size; ierr = PetscInitialize(&argc,&args,(char*)0,help);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This is a uniprocessor example only!"); ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-bs",&bs,NULL);CHKERRQ(ierr); /* adjust sizes by block size */ if (m%bs) m += bs-m%bs; if (n%bs) n += bs-n%bs; ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,m*n,m*n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); ierr = MatSetBlockSize(A,bs);CHKERRQ(ierr); ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSetUp(A);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); ierr = MatSetSizes(B,m*n,m*n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr); ierr = MatSetType(B,MATSEQBAIJ);CHKERRQ(ierr); ierr = MatSetUp(B);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_SELF,&C);CHKERRQ(ierr); ierr = MatSetSizes(C,m*n,m*n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); ierr = MatSetBlockSize(C,bs);CHKERRQ(ierr); ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); ierr = MatSetUp(C);CHKERRQ(ierr); ierr = MatSetOption(C,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); for (i=0; i<m; i++) { for (j=0; j<n; j++) { PetscScalar v = -1.0; PetscInt Ii = j + n*i,J; J = Ii - n; if (J>=0) { ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(B,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); } J = Ii + n; if (J<m*n) { ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(B,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); } J = Ii - 1; if (J>=0) { ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(B,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); } J = Ii + 1; if (J<m*n) { ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(B,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); } v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(B,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(C,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); } } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* test MatGetRowIJ for the three Mat types */ ierr = MatView(A,NULL);CHKERRQ(ierr); ierr = MatView(B,NULL);CHKERRQ(ierr); ierr = MatView(C,NULL);CHKERRQ(ierr); for (i=0;i<2;i++) { PetscInt shift = i; for (j=0;j<2;j++) { PetscBool symmetric = ((j>0) ? PETSC_FALSE : PETSC_TRUE); for (k=0;k<2;k++) { PetscBool compressed = ((k>0) ? PETSC_FALSE : PETSC_TRUE); ierr = DumpCSR(A,shift,symmetric,compressed);CHKERRQ(ierr); ierr = DumpCSR(B,shift,symmetric,compressed);CHKERRQ(ierr); ierr = DumpCSR(C,shift,symmetric,compressed);CHKERRQ(ierr); } } } ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&B);CHKERRQ(ierr); ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }

/* Mm_ratio - ration of grid lines between fine and coarse grids. */ int main(int argc,char **argv) { PetscErrorCode ierr; AppCtx user; PetscMPIInt size,rank; PetscInt m,n,M,N,i,nrows; PetscScalar one = 1.0; PetscReal fill=2.0; Mat A,P,R,C,PtAP; PetscScalar *array; PetscRandom rdm; PetscBool Test_3D=PETSC_FALSE,flg; const PetscInt *ia,*ja; ierr = PetscInitialize(&argc,&argv,NULL,help);if (ierr) return ierr; ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); /* Get size of fine grids and coarse grids */ user.ratio = 2; user.coarse.mx = 4; user.coarse.my = 4; user.coarse.mz = 4; ierr = PetscOptionsGetInt(NULL,NULL,"-Mx",&user.coarse.mx,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-My",&user.coarse.my,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-Mz",&user.coarse.mz,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-ratio",&user.ratio,NULL);CHKERRQ(ierr); if (user.coarse.mz) Test_3D = PETSC_TRUE; user.fine.mx = user.ratio*(user.coarse.mx-1)+1; user.fine.my = user.ratio*(user.coarse.my-1)+1; user.fine.mz = user.ratio*(user.coarse.mz-1)+1; if (!rank) { if (!Test_3D) { ierr = PetscPrintf(PETSC_COMM_SELF,"coarse grids: %D %D; fine grids: %D %D\n",user.coarse.mx,user.coarse.my,user.fine.mx,user.fine.my);CHKERRQ(ierr); } else { ierr = PetscPrintf(PETSC_COMM_SELF,"coarse grids: %D %D %D; fine grids: %D %D %D\n",user.coarse.mx,user.coarse.my,user.coarse.mz,user.fine.mx,user.fine.my,user.fine.mz);CHKERRQ(ierr); } } /* Set up distributed array for fine grid */ if (!Test_3D) { ierr = DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE,DMDA_STENCIL_STAR,user.fine.mx,user.fine.my,PETSC_DECIDE,PETSC_DECIDE,1,1,NULL,NULL,&user.fine.da);CHKERRQ(ierr); } else { ierr = DMDACreate3d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,DM_BOUNDARY_NONE,DM_BOUNDARY_NONE,DMDA_STENCIL_STAR,user.fine.mx,user.fine.my,user.fine.mz,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE, 1,1,NULL,NULL,NULL,&user.fine.da);CHKERRQ(ierr); } ierr = DMSetFromOptions(user.fine.da);CHKERRQ(ierr); ierr = DMSetUp(user.fine.da);CHKERRQ(ierr); /* Create and set A at fine grids */ ierr = DMSetMatType(user.fine.da,MATAIJ);CHKERRQ(ierr); ierr = DMCreateMatrix(user.fine.da,&A);CHKERRQ(ierr); ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); /* set val=one to A (replace with random values!) */ ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rdm);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr); if (size == 1) { ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&nrows,&ia,&ja,&flg);CHKERRQ(ierr); if (flg) { ierr = MatSeqAIJGetArray(A,&array);CHKERRQ(ierr); for (i=0; i<ia[nrows]; i++) array[i] = one; ierr = MatSeqAIJRestoreArray(A,&array);CHKERRQ(ierr); } ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&nrows,&ia,&ja,&flg);CHKERRQ(ierr); } else { Mat AA,AB; ierr = MatMPIAIJGetSeqAIJ(A,&AA,&AB,NULL);CHKERRQ(ierr); ierr = MatGetRowIJ(AA,0,PETSC_FALSE,PETSC_FALSE,&nrows,&ia,&ja,&flg);CHKERRQ(ierr); if (flg) { ierr = MatSeqAIJGetArray(AA,&array);CHKERRQ(ierr); for (i=0; i<ia[nrows]; i++) array[i] = one; ierr = MatSeqAIJRestoreArray(AA,&array);CHKERRQ(ierr); } ierr = MatRestoreRowIJ(AA,0,PETSC_FALSE,PETSC_FALSE,&nrows,&ia,&ja,&flg);CHKERRQ(ierr); ierr = MatGetRowIJ(AB,0,PETSC_FALSE,PETSC_FALSE,&nrows,&ia,&ja,&flg);CHKERRQ(ierr); if (flg) { ierr = MatSeqAIJGetArray(AB,&array);CHKERRQ(ierr); for (i=0; i<ia[nrows]; i++) array[i] = one; ierr = MatSeqAIJRestoreArray(AB,&array);CHKERRQ(ierr); } ierr = MatRestoreRowIJ(AB,0,PETSC_FALSE,PETSC_FALSE,&nrows,&ia,&ja,&flg);CHKERRQ(ierr); } /* Set up distributed array for coarse grid */ if (!Test_3D) { ierr = DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE,DMDA_STENCIL_STAR,user.coarse.mx,user.coarse.my,PETSC_DECIDE,PETSC_DECIDE,1,1,NULL,NULL,&user.coarse.da);CHKERRQ(ierr); } else { ierr = DMDACreate3d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,DM_BOUNDARY_NONE,DM_BOUNDARY_NONE,DMDA_STENCIL_STAR,user.coarse.mx,user.coarse.my,user.coarse.mz,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,1,1,NULL,NULL,NULL,&user.coarse.da);CHKERRQ(ierr); } ierr = DMSetFromOptions(user.coarse.da);CHKERRQ(ierr); ierr = DMSetUp(user.coarse.da);CHKERRQ(ierr); /* Create interpolation between the fine and coarse grids */ ierr = DMCreateInterpolation(user.coarse.da,user.fine.da,&P,NULL);CHKERRQ(ierr); /* Get R = P^T */ ierr = MatTranspose(P,MAT_INITIAL_MATRIX,&R);CHKERRQ(ierr); /* C = R*A*P */ ierr = MatMatMatMult(R,A,P,MAT_INITIAL_MATRIX,fill,&C);CHKERRQ(ierr); ierr = MatMatMatMult(R,A,P,MAT_REUSE_MATRIX,fill,&C);CHKERRQ(ierr); /* Test C == PtAP */ ierr = MatPtAP(A,P,MAT_INITIAL_MATRIX,fill,&PtAP);CHKERRQ(ierr); ierr = MatPtAP(A,P,MAT_REUSE_MATRIX,fill,&PtAP);CHKERRQ(ierr); ierr = MatEqual(C,PtAP,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"Matrices are not equal"); ierr = MatDestroy(&PtAP);CHKERRQ(ierr); /* Clean up */ ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr); ierr = DMDestroy(&user.fine.da);CHKERRQ(ierr); ierr = DMDestroy(&user.coarse.da);CHKERRQ(ierr); ierr = MatDestroy(&P);CHKERRQ(ierr); ierr = MatDestroy(&R);CHKERRQ(ierr); ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }

Example: mpiexec -n <np> ./ex130 -f <matrix binary file> -mat_solver_package 1 -mat_superlu_equil \n\n"; #include <petscmat.h> #undef __FUNCT__ #define __FUNCT__ "main" int main(int argc,char **args) { Mat A,F; Vec u,x,b; PetscErrorCode ierr; PetscMPIInt rank,nproc; PetscInt m,n,nfact,ipack=0; PetscReal norm,tol=1.e-12,Anorm; IS perm,iperm; MatFactorInfo info; PetscBool flg,testMatSolve=PETSC_TRUE; PetscViewer fd; /* viewer */ char file[PETSC_MAX_PATH_LEN]; /* input file name */ PetscInitialize(&argc,&args,(char*)0,help); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &nproc);CHKERRQ(ierr); /* Determine file from which we read the matrix A */ ierr = PetscOptionsGetString(NULL,"-f",file,PETSC_MAX_PATH_LEN,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_WORLD,1,"Must indicate binary file with the -f option"); /* Load matrix A */ ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,file,FILE_MODE_READ,&fd);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatLoad(A,fd);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&b);CHKERRQ(ierr); ierr = VecLoad(b,fd);CHKERRQ(ierr); ierr = PetscViewerDestroy(&fd);CHKERRQ(ierr); ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ, "This example is not intended for rectangular matrices (%d, %d)", m, n); ierr = MatNorm(A,NORM_INFINITY,&Anorm);CHKERRQ(ierr); /* Create vectors */ ierr = VecDuplicate(b,&x);CHKERRQ(ierr); ierr = VecDuplicate(x,&u);CHKERRQ(ierr); /* save the true solution */ /* Test LU Factorization */ ierr = MatGetOrdering(A,MATORDERINGNATURAL,&perm,&iperm);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,"-mat_solver_package",&ipack,NULL);CHKERRQ(ierr); switch (ipack) { case 1: #if defined(PETSC_HAVE_SUPERLU) if (!rank) printf(" SUPERLU LU:\n"); ierr = MatGetFactor(A,MATSOLVERSUPERLU,MAT_FACTOR_LU,&F);CHKERRQ(ierr); break; #endif case 2: #if defined(PETSC_HAVE_MUMPS) if (!rank) printf(" MUMPS LU:\n"); ierr = MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_LU,&F);CHKERRQ(ierr); { /* test mumps options */ PetscInt icntl_7 = 5; ierr = MatMumpsSetIcntl(F,7,icntl_7);CHKERRQ(ierr); } break; #endif default: if (!rank) printf(" PETSC LU:\n"); ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&F);CHKERRQ(ierr); } info.fill = 5.0; ierr = MatLUFactorSymbolic(F,A,perm,iperm,&info);CHKERRQ(ierr); for (nfact = 0; nfact < 1; nfact++) { if (!rank) printf(" %d-the LU numfactorization \n",nfact); ierr = MatLUFactorNumeric(F,A,&info);CHKERRQ(ierr); /* Test MatSolve() */ if (testMatSolve) { ierr = MatSolve(F,b,x);CHKERRQ(ierr); /* Check the residual */ ierr = MatMult(A,x,u);CHKERRQ(ierr); ierr = VecAXPY(u,-1.0,b);CHKERRQ(ierr); ierr = VecNorm(u,NORM_INFINITY,&norm);CHKERRQ(ierr); if (norm > tol) { if (!rank) { ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve: rel residual %g/%g = %g, LU numfact %d\n",norm,Anorm,norm/Anorm,nfact);CHKERRQ(ierr); } } } } /* Free data structures */ ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&F);CHKERRQ(ierr); ierr = ISDestroy(&perm);CHKERRQ(ierr); ierr = ISDestroy(&iperm);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }

/*@C AOCreateMapping - Creates a basic application mapping using two integer arrays. Input Parameters: + comm - MPI communicator that is to share AO . napp - size of integer arrays . myapp - integer array that defines an ordering - mypetsc - integer array that defines another ordering (may be NULL to indicate the identity ordering) Output Parameter: . aoout - the new application mapping Options Database Key: . -ao_view : call AOView() at the conclusion of AOCreateMapping() Level: beginner Notes: the arrays myapp and mypetsc need NOT contain the all the integers 0 to napp-1, that is there CAN be "holes" in the indices. Use AOCreateBasic() or AOCreateBasicIS() if they do not have holes for better performance. .keywords: AO, create .seealso: AOCreateBasic(), AOCreateBasic(), AOCreateMappingIS(), AODestroy() @*/ PetscErrorCode AOCreateMapping(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout) { AO ao; AO_Mapping *aomap; PetscInt *allpetsc, *allapp; PetscInt *petscPerm, *appPerm; PetscInt *petsc; PetscMPIInt size, rank,*lens, *disp,nnapp; PetscInt N, start; PetscInt i; PetscErrorCode ierr; PetscFunctionBegin; PetscValidPointer(aoout,5); *aoout = 0; ierr = AOInitializePackage();CHKERRQ(ierr); ierr = PetscHeaderCreate(ao, AO_CLASSID, "AO", "Application Ordering", "AO", comm, AODestroy, AOView);CHKERRQ(ierr); ierr = PetscNewLog(ao,&aomap);CHKERRQ(ierr); ierr = PetscMemcpy(ao->ops, &AOps, sizeof(AOps));CHKERRQ(ierr); ao->data = (void*) aomap; /* transmit all lengths to all processors */ ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = PetscMalloc2(size, &lens,size,&disp);CHKERRQ(ierr); nnapp = napp; ierr = MPI_Allgather(&nnapp, 1, MPI_INT, lens, 1, MPI_INT, comm);CHKERRQ(ierr); N = 0; for (i = 0; i < size; i++) { disp[i] = N; N += lens[i]; } aomap->N = N; ao->N = N; ao->n = N; /* If mypetsc is 0 then use "natural" numbering */ if (!mypetsc) { start = disp[rank]; ierr = PetscMalloc1(napp+1, &petsc);CHKERRQ(ierr); for (i = 0; i < napp; i++) petsc[i] = start + i; } else { petsc = (PetscInt*)mypetsc; } /* get all indices on all processors */ ierr = PetscMalloc4(N, &allapp,N,&appPerm,N,&allpetsc,N,&petscPerm);CHKERRQ(ierr); ierr = MPI_Allgatherv((void*)myapp, napp, MPIU_INT, allapp, lens, disp, MPIU_INT, comm);CHKERRQ(ierr); ierr = MPI_Allgatherv((void*)petsc, napp, MPIU_INT, allpetsc, lens, disp, MPIU_INT, comm);CHKERRQ(ierr); ierr = PetscFree2(lens,disp);CHKERRQ(ierr); /* generate a list of application and PETSc node numbers */ ierr = PetscMalloc4(N, &aomap->app,N,&aomap->appPerm,N,&aomap->petsc,N,&aomap->petscPerm);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)ao, 4*N * sizeof(PetscInt));CHKERRQ(ierr); for (i = 0; i < N; i++) { appPerm[i] = i; petscPerm[i] = i; } ierr = PetscSortIntWithPermutation(N, allpetsc, petscPerm);CHKERRQ(ierr); ierr = PetscSortIntWithPermutation(N, allapp, appPerm);CHKERRQ(ierr); /* Form sorted arrays of indices */ for (i = 0; i < N; i++) { aomap->app[i] = allapp[appPerm[i]]; aomap->petsc[i] = allpetsc[petscPerm[i]]; } /* Invert petscPerm[] into aomap->petscPerm[] */ for (i = 0; i < N; i++) aomap->petscPerm[petscPerm[i]] = i; /* Form map between aomap->app[] and aomap->petsc[] */ for (i = 0; i < N; i++) aomap->appPerm[i] = aomap->petscPerm[appPerm[i]]; /* Invert appPerm[] into allapp[] */ for (i = 0; i < N; i++) allapp[appPerm[i]] = i; /* Form map between aomap->petsc[] and aomap->app[] */ for (i = 0; i < N; i++) aomap->petscPerm[i] = allapp[petscPerm[i]]; #if defined(PETSC_USE_DEBUG) /* Check that the permutations are complementary */ for (i = 0; i < N; i++) { if (i != aomap->appPerm[aomap->petscPerm[i]]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Invalid ordering"); } #endif /* Cleanup */ if (!mypetsc) { ierr = PetscFree(petsc);CHKERRQ(ierr); } ierr = PetscFree4(allapp,appPerm,allpetsc,petscPerm);CHKERRQ(ierr); ierr = AOViewFromOptions(ao,NULL,"-ao_view");CHKERRQ(ierr); *aoout = ao; PetscFunctionReturn(0); }

int main(int argc,char **argv) { Mat A; /* operator matrix */ EPS eps; /* eigenproblem solver context */ EPSType type; PetscReal tol; PetscInt nev,maxit,its; char filename[PETSC_MAX_PATH_LEN]; PetscViewer viewer; PetscBool flg; PetscErrorCode ierr; SlepcInitialize(&argc,&argv,(char*)0,help); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Load the operator matrix that defines the eigensystem, Ax=kx - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = PetscPrintf(PETSC_COMM_WORLD,"\nEigenproblem stored in file.\n\n"); CHKERRQ(ierr); ierr = PetscOptionsGetString(NULL,"-file",filename,PETSC_MAX_PATH_LEN,&flg); CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_WORLD,1,"Must indicate a file name with the -file option"); #if defined(PETSC_USE_COMPLEX) ierr = PetscPrintf(PETSC_COMM_WORLD," Reading COMPLEX matrix from a binary file...\n"); CHKERRQ(ierr); #else ierr = PetscPrintf(PETSC_COMM_WORLD," Reading REAL matrix from a binary file...\n"); CHKERRQ(ierr); #endif ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,filename,FILE_MODE_READ,&viewer); CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&A); CHKERRQ(ierr); ierr = MatSetFromOptions(A); CHKERRQ(ierr); ierr = MatLoad(A,viewer); CHKERRQ(ierr); ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create the eigensolver and set various options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Create eigensolver context */ ierr = EPSCreate(PETSC_COMM_WORLD,&eps); CHKERRQ(ierr); /* Set operators. In this case, it is a standard eigenvalue problem */ ierr = EPSSetOperators(eps,A,NULL); CHKERRQ(ierr); /* Set solver parameters at runtime */ ierr = EPSSetFromOptions(eps); CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Solve the eigensystem - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = EPSSolve(eps); CHKERRQ(ierr); ierr = EPSGetIterationNumber(eps,&its); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD," Number of iterations of the method: %D\n",its); CHKERRQ(ierr); /* Optional: Get some information from the solver and display it */ ierr = EPSGetType(eps,&type); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD," Solution method: %s\n\n",type); CHKERRQ(ierr); ierr = EPSGetDimensions(eps,&nev,NULL,NULL); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD," Number of requested eigenvalues: %D\n",nev); CHKERRQ(ierr); ierr = EPSGetTolerances(eps,&tol,&maxit); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD," Stopping condition: tol=%.4g, maxit=%D\n",(double)tol,maxit); CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Display solution and clean up - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = EPSPrintSolution(eps,NULL); CHKERRQ(ierr); ierr = EPSDestroy(&eps); CHKERRQ(ierr); ierr = MatDestroy(&A); CHKERRQ(ierr); ierr = SlepcFinalize(); return 0; }

int main(int argc, char **argv) { TS ts; /* time-stepping context */ Vec x; /* State vector */ Mat J; /* Jacobian matrix */ AppCtx user; /* user-defined context */ PetscErrorCode ierr; PetscReal ftime; PetscInt its; PetscMPIInt size; PetscInitialize(&argc, &argv, NULL, help); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size); if(size != 1) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This is a uniprocessor example only"); /* * Allow user to set the grid dimensions and the equations parameters */ user.nb_cells = 50; user.alpha = 10.; user.beta = 1.; user.rho_a = 1.; user.rho_h = 2.; user.mu_a = 2.; user.mu_h = 3.; user.D_a = 0.; user.D_h = 30.; ierr = PetscOptionsBegin(PETSC_COMM_WORLD, "", "Problem settings", "PROBLEM"); ierr = PetscOptionsInt("-nb_cells", "Number of cells", "ex42.c",user.nb_cells, &user.nb_cells,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-alpha", "Autocatalysis factor", "ex42.c",user.alpha, &user.alpha,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-beta", "Inhibition factor", "ex42.c",user.beta, &user.beta,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-rho_a", "Default production of the activator", "ex42.c",user.rho_a, &user.rho_a,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-mu_a", "Degradation rate of the activator", "ex42.c",user.mu_a, &user.mu_a,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-D_a", "Diffusion rate of the activator", "ex42.c",user.D_a, &user.D_a,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-rho_h", "Default production of the inhibitor", "ex42.c",user.rho_h, &user.rho_h,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-mu_h", "Degradation rate of the inhibitor", "ex42.c",user.mu_h, &user.mu_h,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-D_h", "Diffusion rate of the inhibitor", "ex42.c",user.D_h, &user.D_h,NULL);CHKERRQ(ierr); ierr = PetscOptionsEnd(); ierr = PetscPrintf(PETSC_COMM_WORLD, "nb_cells: %D\n", user.nb_cells);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "alpha: %5.5g\n", user.alpha);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "beta: %5.5g\n", user.beta);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "rho_a: %5.5g\n", user.rho_a);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "mu_a: %5.5g\n", user.mu_a);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "D_a: %5.5g\n", user.D_a);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "rho_h: %5.5g\n", user.rho_h);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "mu_h: %5.5g\n", user.mu_h);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "D_h: %5.5g\n", user.D_h);CHKERRQ(ierr); /* * Create vector to hold the solution */ ierr = VecCreateSeq(PETSC_COMM_WORLD, 2*user.nb_cells, &x);CHKERRQ(ierr); /* * Create time-stepper context */ ierr = TSCreate(PETSC_COMM_WORLD, &ts);CHKERRQ(ierr); ierr = TSSetProblemType(ts, TS_NONLINEAR);CHKERRQ(ierr); /* * Tell the time-stepper context where to compute the solution */ ierr = TSSetSolution(ts, x);CHKERRQ(ierr); /* * Allocate the jacobian matrix */ ierr = MatCreateSeqAIJ(PETSC_COMM_WORLD, 2*user.nb_cells, 2*user.nb_cells, 4, 0, &J);CHKERRQ(ierr); /* * Provide the call-back for the non-linear function we are evaluating. */ ierr = TSSetRHSFunction(ts, NULL, RHSFunction, &user);CHKERRQ(ierr); /* * Set the Jacobian matrix and the function user to compute Jacobians */ ierr = TSSetRHSJacobian(ts, J, J, RHSJacobian, &user);CHKERRQ(ierr); /* * Set the function checking the domain */ ierr = TSSetFunctionDomainError(ts, &DomainErrorFunction);CHKERRQ(ierr); /* * Initialize the problem with random values */ ierr = FormInitialState(x, &user);CHKERRQ(ierr); /* * Read the solver type from options */ ierr = TSSetType(ts, TSPSEUDO);CHKERRQ(ierr); /* * Set a large number of timesteps and final duration time to insure * convergenge to steady state */ ierr = TSSetDuration(ts, 5000, 1e12); /* * Set a larger number of potential errors */ ierr = TSSetMaxStepRejections(ts, 50);CHKERRQ(ierr); /* * Also start with a very small dt */ ierr = TSSetTimeStep(ts, 0.05);CHKERRQ(ierr); /* * Set a larger time step increment */ ierr = TSPseudoSetTimeStepIncrement(ts, 1.5);CHKERRQ(ierr); /* * Let the user personalise TS */ ierr = TSSetFromOptions(ts);CHKERRQ(ierr); /* * Set the context for the time stepper */ ierr = TSSetApplicationContext(ts, &user);CHKERRQ(ierr); /* * Setup the time stepper, ready for evaluation */ ierr = TSSetUp(ts);CHKERRQ(ierr); /* * Perform the solve. */ ierr = TSSolve(ts, x);CHKERRQ(ierr); ierr = TSGetSolveTime(ts, &ftime);CHKERRQ(ierr); ierr = TSGetTimeStepNumber(ts,&its);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Number of time steps = %D, final time: %4.2e\nResult:\n\n", its, (double)ftime);CHKERRQ(ierr); ierr = PrintSolution(x, &user);CHKERRQ(ierr); /* * Free the data structures */ ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = MatDestroy(&J);CHKERRQ(ierr); ierr = TSDestroy(&ts);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }

static PetscErrorCode InitializeDomainData(DomainData *dd) { PetscErrorCode ierr; PetscMPIInt sizes,rank; PetscInt factor; PetscFunctionBeginUser; dd->gcomm = PETSC_COMM_WORLD; ierr = MPI_Comm_size(dd->gcomm,&sizes);CHKERRQ(ierr); ierr = MPI_Comm_rank(dd->gcomm,&rank);CHKERRQ(ierr); /* test data passed in */ if (sizes<2) SETERRQ(dd->gcomm,PETSC_ERR_USER,"This is not a uniprocessor test"); /* Get informations from command line */ /* Processors/subdomains per dimension */ /* Default is 1d problem */ dd->npx = sizes; dd->npy = 0; dd->npz = 0; dd->dim = 1; ierr = PetscOptionsGetInt(NULL,NULL,"-npx",&dd->npx,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-npy",&dd->npy,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-npz",&dd->npz,NULL);CHKERRQ(ierr); if (dd->npy) dd->dim++; if (dd->npz) dd->dim++; /* Number of elements per dimension */ /* Default is one element per subdomain */ dd->nex = dd->npx; dd->ney = dd->npy; dd->nez = dd->npz; ierr = PetscOptionsGetInt(NULL,NULL,"-nex",&dd->nex,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-ney",&dd->ney,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-nez",&dd->nez,NULL);CHKERRQ(ierr); if (!dd->npy) { dd->ney=0; dd->nez=0; } if (!dd->npz) dd->nez=0; /* Spectral degree */ dd->p = 3; ierr = PetscOptionsGetInt(NULL,NULL,"-p",&dd->p,NULL);CHKERRQ(ierr); /* pure neumann problem? */ dd->pure_neumann = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-pureneumann",&dd->pure_neumann,NULL);CHKERRQ(ierr); /* How to enforce dirichlet boundary conditions (in case pureneumann has not been requested explicitly) */ dd->DBC_zerorows = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-usezerorows",&dd->DBC_zerorows,NULL);CHKERRQ(ierr); if (dd->pure_neumann) dd->DBC_zerorows = PETSC_FALSE; dd->scalingfactor = 1.0; factor = 0.0; ierr = PetscOptionsGetInt(NULL,NULL,"-jump",&factor,NULL);CHKERRQ(ierr); /* checkerboard pattern */ dd->scalingfactor = PetscPowScalar(10.0,(PetscScalar)factor*PetscPowScalar(-1.0,(PetscScalar)rank)); /* test data passed in */ if (dd->dim==1) { if (sizes!=dd->npx) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 1D must be equal to npx"); if (dd->nex<dd->npx) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim"); } else if (dd->dim==2) { if (sizes!=dd->npx*dd->npy) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 2D must be equal to npx*npy"); if (dd->nex<dd->npx || dd->ney<dd->npy) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim"); } else { if (sizes!=dd->npx*dd->npy*dd->npz) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 3D must be equal to npx*npy*npz"); if (dd->nex<dd->npx || dd->ney<dd->npy || dd->nez<dd->npz) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim"); } PetscFunctionReturn(0); }

static PetscErrorCode gsi_via_bit_mask(gs_id *gs) { PetscInt i, nel, *elms; PetscInt t1; PetscInt **reduce; PetscInt *map; PetscErrorCode ierr; PetscFunctionBegin; /* totally local removes ... ct_bits == 0 */ get_ngh_buf(gs); if (gs->level) {set_pairwise(gs);} if (gs->max_left_over) {set_tree(gs);} /* intersection local and pairwise/tree? */ gs->num_local_total = gs->num_local; gs->gop_local_reduce = gs->local_reduce; gs->num_gop_local_reduce = gs->num_local_reduce; map = gs->companion; /* is there any local compression */ if (!gs->num_local) { gs->local_strength = NONE; gs->num_local_gop = 0; } else { /* ok find intersection */ map = gs->companion; reduce = gs->local_reduce; for (i=0, t1=0; i<gs->num_local; i++, reduce++) { if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) || ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) { t1++; if (gs->num_local_reduce[i]<=0) SETERRQ(PETSC_ERR_PLIB,"nobody in list?"); gs->num_local_reduce[i] *= -1; } **reduce=map[**reduce]; } /* intersection is empty */ if (!t1) { gs->local_strength = FULL; gs->num_local_gop = 0; } /* intersection not empty */ else { gs->local_strength = PARTIAL; ierr = SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR);CHKERRQ(ierr); gs->num_local_gop = t1; gs->num_local_total = gs->num_local; gs->num_local -= t1; gs->gop_local_reduce = gs->local_reduce; gs->num_gop_local_reduce = gs->num_local_reduce; for (i=0; i<t1; i++) { if (gs->num_gop_local_reduce[i]>=0) SETERRQ(PETSC_ERR_PLIB,"they aren't negative?"); gs->num_gop_local_reduce[i] *= -1; gs->local_reduce++; gs->num_local_reduce++; } gs->local_reduce++; gs->num_local_reduce++; } } elms = gs->pw_elm_list; nel = gs->len_pw_list; for (i=0; i<nel; i++) {elms[i] = map[elms[i]];} elms = gs->tree_map_in; nel = gs->tree_map_sz; for (i=0; i<nel; i++) {elms[i] = map[elms[i]];} /* clean up */ free((void*) gs->local_elms); free((void*) gs->companion); free((void*) gs->elms); free((void*) gs->ngh_buf); gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; PetscFunctionReturn(0); }

PetscErrorCode MatGetSubMatrices_MPIDense_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats) { Mat_MPIDense *c = (Mat_MPIDense*)C->data; Mat A = c->A; Mat_SeqDense *a = (Mat_SeqDense*)A->data,*mat; PetscErrorCode ierr; PetscMPIInt rank,size,tag0,tag1,idex,end,i; PetscInt N = C->cmap->N,rstart = C->rmap->rstart,count; const PetscInt **irow,**icol,*irow_i; PetscInt *nrow,*ncol,*w1,*w3,*w4,*rtable,start; PetscInt **sbuf1,m,j,k,l,ct1,**rbuf1,row,proc; PetscInt nrqs,msz,**ptr,*ctr,*pa,*tmp,bsz,nrqr; PetscInt is_no,jmax,**rmap,*rmap_i; PetscInt ctr_j,*sbuf1_j,*rbuf1_i; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; MPI_Status *r_status1,*r_status2,*s_status1,*s_status2; MPI_Comm comm; PetscScalar **rbuf2,**sbuf2; PetscBool sorted; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); tag0 = ((PetscObject)C)->tag; size = c->size; rank = c->rank; m = C->rmap->N; /* Get some new tags to keep the communication clean */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); /* Check if the col indices are sorted */ for (i=0; i<ismax; i++) { ierr = ISSorted(isrow[i],&sorted);CHKERRQ(ierr); if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted"); ierr = ISSorted(iscol[i],&sorted);CHKERRQ(ierr); if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted"); } ierr = PetscMalloc5(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol,m,PetscInt,&rtable);CHKERRQ(ierr); for (i=0; i<ismax; i++) { ierr = ISGetIndices(isrow[i],&irow[i]);CHKERRQ(ierr); ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr); ierr = ISGetLocalSize(isrow[i],&nrow[i]);CHKERRQ(ierr); ierr = ISGetLocalSize(iscol[i],&ncol[i]);CHKERRQ(ierr); } /* Create hash table for the mapping :row -> proc*/ for (i=0,j=0; i<size; i++) { jmax = C->rmap->range[i+1]; for (; j<jmax; j++) rtable[j] = i; } /* evaluate communication - mesg to who,length of mesg, and buffer space required. Based on this, buffers are allocated, and data copied into them*/ ierr = PetscMalloc3(2*size,PetscInt,&w1,size,PetscInt,&w3,size,PetscInt,&w4);CHKERRQ(ierr); ierr = PetscMemzero(w1,size*2*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ ierr = PetscMemzero(w3,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ for (i=0; i<ismax; i++) { ierr = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ jmax = nrow[i]; irow_i = irow[i]; for (j=0; j<jmax; j++) { row = irow_i[j]; proc = rtable[row]; w4[proc]++; } for (j=0; j<size; j++) { if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;} } } nrqs = 0; /* no of outgoing messages */ msz = 0; /* total mesg length (for all procs) */ w1[2*rank] = 0; /* no mesg sent to self */ w3[rank] = 0; for (i=0; i<size; i++) { if (w1[2*i]) { w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */ } ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr); /*(proc -array)*/ for (i=0,j=0; i<size; i++) { if (w1[2*i]) { pa[j] = i; j++; } } /* Each message would have a header = 1 + 2*(no of IS) + data */ for (i=0; i<nrqs; i++) { j = pa[i]; w1[2*j] += w1[2*j+1] + 2* w3[j]; msz += w1[2*j]; } /* Do a global reduction to determine how many messages to expect*/ ierr = PetscMaxSum(comm,w1,&bsz,&nrqr);CHKERRQ(ierr); /* Allocate memory for recv buffers . Make sure rbuf1[0] exists by adding 1 to the buffer length */ ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&rbuf1);CHKERRQ(ierr); ierr = PetscMalloc(nrqr*bsz*sizeof(PetscInt),&rbuf1[0]);CHKERRQ(ierr); for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz; /* Post the receives */ ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);CHKERRQ(ierr); for (i=0; i<nrqr; ++i) { ierr = MPI_Irecv(rbuf1[i],bsz,MPIU_INT,MPI_ANY_SOURCE,tag0,comm,r_waits1+i);CHKERRQ(ierr); } /* Allocate Memory for outgoing messages */ ierr = PetscMalloc4(size,PetscInt*,&sbuf1,size,PetscInt*,&ptr,2*msz,PetscInt,&tmp,size,PetscInt,&ctr);CHKERRQ(ierr); ierr = PetscMemzero(sbuf1,size*sizeof(PetscInt*));CHKERRQ(ierr); ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr); { PetscInt *iptr = tmp,ict = 0; for (i=0; i<nrqs; i++) { j = pa[i]; iptr += ict; sbuf1[j] = iptr; ict = w1[2*j]; } } /* Form the outgoing messages */ /* Initialize the header space */ for (i=0; i<nrqs; i++) { j = pa[i]; sbuf1[j][0] = 0; ierr = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr); ptr[j] = sbuf1[j] + 2*w3[j] + 1; } /* Parse the isrow and copy data into outbuf */ for (i=0; i<ismax; i++) { ierr = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr); irow_i = irow[i]; jmax = nrow[i]; for (j=0; j<jmax; j++) { /* parse the indices of each IS */ row = irow_i[j]; proc = rtable[row]; if (proc != rank) { /* copy to the outgoing buf*/ ctr[proc]++; *ptr[proc] = row; ptr[proc]++; } } /* Update the headers for the current IS */ for (j=0; j<size; j++) { /* Can Optimise this loop too */ if ((ctr_j = ctr[j])) { sbuf1_j = sbuf1[j]; k = ++sbuf1_j[0]; sbuf1_j[2*k] = ctr_j; sbuf1_j[2*k-1] = i; } } } /* Now post the sends */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr); for (i=0; i<nrqs; ++i) { j = pa[i]; ierr = MPI_Isend(sbuf1[j],w1[2*j],MPIU_INT,j,tag0,comm,s_waits1+i);CHKERRQ(ierr); } /* Post recieves to capture the row_data from other procs */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(PetscScalar*),&rbuf2);CHKERRQ(ierr); for (i=0; i<nrqs; i++) { j = pa[i]; count = (w1[2*j] - (2*sbuf1[j][0] + 1))*N; ierr = PetscMalloc((count+1)*sizeof(PetscScalar),&rbuf2[i]);CHKERRQ(ierr); ierr = MPI_Irecv(rbuf2[i],count,MPIU_SCALAR,j,tag1,comm,r_waits2+i);CHKERRQ(ierr); } /* Receive messages(row_nos) and then, pack and send off the rowvalues to the correct processors */ ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(PetscScalar*),&sbuf2);CHKERRQ(ierr); { PetscScalar *sbuf2_i,*v_start; PetscInt s_proc; for (i=0; i<nrqr; ++i) { ierr = MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);CHKERRQ(ierr); s_proc = r_status1[i].MPI_SOURCE; /* send processor */ rbuf1_i = rbuf1[idex]; /* Actual message from s_proc */ /* no of rows = end - start; since start is array idex[], 0idex, whel end is length of the buffer - which is 1idex */ start = 2*rbuf1_i[0] + 1; ierr = MPI_Get_count(r_status1+i,MPIU_INT,&end);CHKERRQ(ierr); /* allocate memory sufficinet to hold all the row values */ ierr = PetscMalloc((end-start)*N*sizeof(PetscScalar),&sbuf2[idex]);CHKERRQ(ierr); sbuf2_i = sbuf2[idex]; /* Now pack the data */ for (j=start; j<end; j++) { row = rbuf1_i[j] - rstart; v_start = a->v + row; for (k=0; k<N; k++) { sbuf2_i[0] = v_start[0]; sbuf2_i++; v_start += C->rmap->n; } } /* Now send off the data */ ierr = MPI_Isend(sbuf2[idex],(end-start)*N,MPIU_SCALAR,s_proc,tag1,comm,s_waits2+i);CHKERRQ(ierr); } } /* End Send-Recv of IS + row_numbers */ ierr = PetscFree(r_status1);CHKERRQ(ierr); ierr = PetscFree(r_waits1);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);CHKERRQ(ierr); if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr);} ierr = PetscFree(s_status1);CHKERRQ(ierr); ierr = PetscFree(s_waits1);CHKERRQ(ierr); /* Create the submatrices */ if (scall == MAT_REUSE_MATRIX) { for (i=0; i<ismax; i++) { mat = (Mat_SeqDense*)(submats[i]->data); if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); ierr = PetscMemzero(mat->v,submats[i]->rmap->n*submats[i]->cmap->n*sizeof(PetscScalar));CHKERRQ(ierr); submats[i]->factortype = C->factortype; } } else { for (i=0; i<ismax; i++) { ierr = MatCreate(PETSC_COMM_SELF,submats+i);CHKERRQ(ierr); ierr = MatSetSizes(submats[i],nrow[i],ncol[i],nrow[i],ncol[i]);CHKERRQ(ierr); ierr = MatSetType(submats[i],((PetscObject)A)->type_name);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(submats[i],NULL);CHKERRQ(ierr); } } /* Assemble the matrices */ { PetscInt col; PetscScalar *imat_v,*mat_v,*imat_vi,*mat_vi; for (i=0; i<ismax; i++) { mat = (Mat_SeqDense*)submats[i]->data; mat_v = a->v; imat_v = mat->v; irow_i = irow[i]; m = nrow[i]; for (j=0; j<m; j++) { row = irow_i[j]; proc = rtable[row]; if (proc == rank) { row = row - rstart; mat_vi = mat_v + row; imat_vi = imat_v + j; for (k=0; k<ncol[i]; k++) { col = icol[i][k]; imat_vi[k*m] = mat_vi[col*C->rmap->n]; } } } } } /* Create row map-> This maps c->row to submat->row for each submat*/ /* this is a very expensive operation wrt memory usage */ ierr = PetscMalloc(ismax*sizeof(PetscInt*),&rmap);CHKERRQ(ierr); ierr = PetscMalloc(ismax*C->rmap->N*sizeof(PetscInt),&rmap[0]);CHKERRQ(ierr); ierr = PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));CHKERRQ(ierr); for (i=1; i<ismax; i++) rmap[i] = rmap[i-1] + C->rmap->N; for (i=0; i<ismax; i++) { rmap_i = rmap[i]; irow_i = irow[i]; jmax = nrow[i]; for (j=0; j<jmax; j++) { rmap_i[irow_i[j]] = j; } } /* Now Receive the row_values and assemble the rest of the matrix */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);CHKERRQ(ierr); { PetscInt is_max,tmp1,col,*sbuf1_i,is_sz; PetscScalar *rbuf2_i,*imat_v,*imat_vi; for (tmp1=0; tmp1<nrqs; tmp1++) { /* For each message */ ierr = MPI_Waitany(nrqs,r_waits2,&i,r_status2+tmp1);CHKERRQ(ierr); /* Now dig out the corresponding sbuf1, which contains the IS data_structure */ sbuf1_i = sbuf1[pa[i]]; is_max = sbuf1_i[0]; ct1 = 2*is_max+1; rbuf2_i = rbuf2[i]; for (j=1; j<=is_max; j++) { /* For each IS belonging to the message */ is_no = sbuf1_i[2*j-1]; is_sz = sbuf1_i[2*j]; mat = (Mat_SeqDense*)submats[is_no]->data; imat_v = mat->v; rmap_i = rmap[is_no]; m = nrow[is_no]; for (k=0; k<is_sz; k++,rbuf2_i+=N) { /* For each row */ row = sbuf1_i[ct1]; ct1++; row = rmap_i[row]; imat_vi = imat_v + row; for (l=0; l<ncol[is_no]; l++) { /* For each col */ col = icol[is_no][l]; imat_vi[l*m] = rbuf2_i[col]; } } } } } /* End Send-Recv of row_values */ ierr = PetscFree(r_status2);CHKERRQ(ierr); ierr = PetscFree(r_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);CHKERRQ(ierr); if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,s_status2);CHKERRQ(ierr);} ierr = PetscFree(s_status2);CHKERRQ(ierr); ierr = PetscFree(s_waits2);CHKERRQ(ierr); /* Restore the indices */ for (i=0; i<ismax; i++) { ierr = ISRestoreIndices(isrow[i],irow+i);CHKERRQ(ierr); ierr = ISRestoreIndices(iscol[i],icol+i);CHKERRQ(ierr); } /* Destroy allocated memory */ ierr = PetscFree5(irow,icol,nrow,ncol,rtable);CHKERRQ(ierr); ierr = PetscFree3(w1,w3,w4);CHKERRQ(ierr); ierr = PetscFree(pa);CHKERRQ(ierr); for (i=0; i<nrqs; ++i) { ierr = PetscFree(rbuf2[i]);CHKERRQ(ierr); } ierr = PetscFree(rbuf2);CHKERRQ(ierr); ierr = PetscFree4(sbuf1,ptr,tmp,ctr);CHKERRQ(ierr); ierr = PetscFree(rbuf1[0]);CHKERRQ(ierr); ierr = PetscFree(rbuf1);CHKERRQ(ierr); for (i=0; i<nrqr; ++i) { ierr = PetscFree(sbuf2[i]);CHKERRQ(ierr); } ierr = PetscFree(sbuf2);CHKERRQ(ierr); ierr = PetscFree(rmap[0]);CHKERRQ(ierr); ierr = PetscFree(rmap);CHKERRQ(ierr); for (i=0; i<ismax; i++) { ierr = MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } PetscFunctionReturn(0); }

int main(int argc,char **argv) { TS ts; /* nonlinear solver */ Vec x; /* solution, residual vectors */ Mat A; /* Jacobian matrix */ PetscInt steps; PetscReal ftime = 0.5; PetscBool monitor = PETSC_FALSE; PetscScalar *x_ptr; PetscMPIInt size; struct _n_User user; PetscErrorCode ierr; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Initialize program - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ PetscInitialize(&argc,&argv,NULL,help); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size != 1) SETERRQ(PETSC_COMM_SELF,1,"This is a uniprocessor example only!"); /* Register user-specified ARKIMEX method */ ierr = RegisterMyARK2();CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set runtime options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ user.imex = PETSC_TRUE; user.next_output = 0.0; user.mu = 1.0e6; ierr = PetscOptionsGetBool(NULL,"-imex",&user.imex,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,"-monitor",&monitor,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-mu","Stiffness parameter","<1.0e6>",user.mu,&user.mu,NULL); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create necessary matrix and vectors, solve same ODE on every process - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,2,2);CHKERRQ(ierr); ierr = MatSetFromOptions(A);CHKERRQ(ierr); ierr = MatSetUp(A);CHKERRQ(ierr); ierr = MatGetVecs(A,&x,NULL);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create timestepping solver context - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSCreate(PETSC_COMM_WORLD,&ts);CHKERRQ(ierr); ierr = TSSetType(ts,TSBEULER);CHKERRQ(ierr); ierr = TSSetRHSFunction(ts,NULL,RHSFunction,&user);CHKERRQ(ierr); ierr = TSSetIFunction(ts,NULL,IFunction,&user);CHKERRQ(ierr); ierr = TSSetIJacobian(ts,A,A,IJacobian,&user);CHKERRQ(ierr); ierr = TSSetDuration(ts,PETSC_DEFAULT,ftime);CHKERRQ(ierr); if (monitor) { ierr = TSMonitorSet(ts,Monitor,&user,NULL);CHKERRQ(ierr); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set initial conditions - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = VecGetArray(x,&x_ptr);CHKERRQ(ierr); x_ptr[0] = 2.0; x_ptr[1] = -6.666665432100101e-01; ierr = VecRestoreArray(x,&x_ptr);CHKERRQ(ierr); ierr = TSSetInitialTimeStep(ts,0.0,.001);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set runtime options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSSetFromOptions(ts);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Solve nonlinear system - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSSolve(ts,x);CHKERRQ(ierr); ierr = TSGetSolveTime(ts,&ftime);CHKERRQ(ierr); ierr = TSGetTimeStepNumber(ts,&steps);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"steps %D, ftime %g\n",steps,(double)ftime);CHKERRQ(ierr); ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Free work space. All PETSc objects should be destroyed when they are no longer needed. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = TSDestroy(&ts);CHKERRQ(ierr); ierr = PetscFinalize(); PetscFunctionReturn(0); }

/*@C PetscViewerASCIIPrintf - Prints to a file, only from the first processor in the PetscViewer Not Collective, but only first processor in set has any effect Input Parameters: + viewer - optained with PetscViewerASCIIOpen() - format - the usual printf() format string Level: developer Fortran Note: The call sequence is PetscViewerASCIIPrintf(PetscViewer, character(*), int ierr) from Fortran. That is, you can only pass a single character string from Fortran. Concepts: PetscViewerASCII^printing Concepts: printing^to file Concepts: printf .seealso: PetscPrintf(), PetscSynchronizedPrintf(), PetscViewerASCIIOpen(), PetscViewerASCIIPushTab(), PetscViewerASCIIPopTab(), PetscViewerASCIISynchronizedPrintf(), PetscViewerCreate(), PetscViewerDestroy(), PetscViewerSetType(), PetscViewerASCIIGetPointer(), PetscViewerASCIISynchronizedAllow() @*/ PetscErrorCode PetscViewerASCIIPrintf(PetscViewer viewer,const char format[],...) { PetscViewer_ASCII *ascii = (PetscViewer_ASCII*)viewer->data; PetscMPIInt rank; PetscInt tab,intab = ascii->tab; PetscErrorCode ierr; FILE *fd = ascii->fd; PetscBool iascii,issingleton = PETSC_FALSE; int err; PetscFunctionBegin; PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,1); PetscValidCharPointer(format,2); ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); if (!iascii) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not ASCII PetscViewer"); if (ascii->bviewer) { viewer = ascii->bviewer; ascii = (PetscViewer_ASCII*)viewer->data; fd = ascii->fd; issingleton = PETSC_TRUE; } ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)viewer),&rank);CHKERRQ(ierr); if (!rank) { va_list Argp; tab = intab; while (tab--) { ierr = PetscFPrintf(PETSC_COMM_SELF,fd," ");CHKERRQ(ierr); } va_start(Argp,format); ierr = (*PetscVFPrintf)(fd,format,Argp);CHKERRQ(ierr); err = fflush(fd); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); if (petsc_history) { va_start(Argp,format); tab = intab; while (tab--) { ierr = PetscFPrintf(PETSC_COMM_SELF,petsc_history," ");CHKERRQ(ierr); } ierr = (*PetscVFPrintf)(petsc_history,format,Argp);CHKERRQ(ierr); err = fflush(petsc_history); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } va_end(Argp); } else if (issingleton) { char *string; va_list Argp; size_t fullLength; PrintfQueue next; ierr = PetscNew(&next);CHKERRQ(ierr); if (petsc_printfqueue) { petsc_printfqueue->next = next; petsc_printfqueue = next; } else { petsc_printfqueuebase = petsc_printfqueue = next; } petsc_printfqueuelength++; next->size = QUEUESTRINGSIZE; ierr = PetscCalloc1(next->size, &next->string);CHKERRQ(ierr); string = next->string; tab = intab; tab *= 2; while (tab--) { *string++ = ' '; } va_start(Argp,format); ierr = PetscVSNPrintf(string,next->size-2*ascii->tab,format,&fullLength,Argp);CHKERRQ(ierr); va_end(Argp); } PetscFunctionReturn(0); }

int main(int argc, char *argv[]) { double r,dt; int n; unsigned long i,myNumSim,totalNumSim,numdim; /* double payoff; */ double *vol, *St0, x, totalx; int np,myid; time_t start,stop; double *eps; himaInfo hinfo; PetscRandom ran; PetscErrorCode ierr; PetscBool flg; PetscInitialize(&argc,&argv,(char *)0,help); #if defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This example does not work for scalar type complex\n"); #endif time(&start); ierr = PetscRandomCreate(PETSC_COMM_WORLD,&ran); CHKERRQ(ierr); #if defined(PETSC_HAVE_SPRNG) ierr = PetscRandomSetType(ran,PETSCSPRNG); CHKERRQ(ierr); #elif defined(PETSC_HAVE_RAND) ierr = PetscRandomSetType(ran,PETSCRAND); CHKERRQ(ierr); #endif ierr = PetscRandomSetFromOptions(ran); CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &np); CHKERRQ(ierr); /* number of nodes */ ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &myid); CHKERRQ(ierr); /* my ranking */ ierr = PetscOptionsHasName(PETSC_NULL, "-check_generators", &flg); CHKERRQ(ierr); if (flg) { ierr = PetscRandomGetValue(ran,(PetscScalar *)&r); ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] rval: %g\n",myid,r); ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD); } hinfo.n = 31; hinfo.r = 0.04; hinfo.dt = 1.0/12; /* a month as a period */ hinfo.totalNumSim = 1000; ierr = PetscOptionsGetInt(PETSC_NULL,"-num_of_stocks",&(hinfo.n),PETSC_NULL); CHKERRQ(ierr); if (hinfo.n <1 || hinfo.n > 31) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only 31 stocks listed in stock.txt. num_of_stocks %D must between 1 and 31",hinfo.n); ierr = PetscOptionsGetReal(PETSC_NULL,"-interest_rate",&(hinfo.r),PETSC_NULL); CHKERRQ(ierr); ierr = PetscOptionsGetReal(PETSC_NULL,"-time_interval",&(hinfo.dt),PETSC_NULL); CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-num_of_simulations",&(hinfo.totalNumSim),PETSC_NULL); CHKERRQ(ierr); n = hinfo.n; r = hinfo.r; dt = hinfo.dt; totalNumSim = hinfo.totalNumSim; vol = hinfo.vol = (double *)malloc(sizeof(double)*(2*n+1)); St0 = hinfo.St0 = hinfo.vol + n; ierr = readData(PETSC_COMM_WORLD,&hinfo); CHKERRQ(ierr); numdim = n*(n+1)/2; if (numdim%2 == 1) { numdim++; } eps = (double *)malloc(sizeof(double)*numdim); myNumSim = divWork(myid,totalNumSim,np); x = 0; for (i=0; i<myNumSim; i++) { stdNormalArray(eps,numdim,ran); x += basketPayoff(vol,St0,n,r,dt,eps); } ierr = MPI_Reduce(&x, &totalx, 1, MPI_DOUBLE, MPI_SUM,0,PETSC_COMM_WORLD); CHKERRQ(ierr); time(&stop); /* payoff = exp(-r*dt*n)*(totalx/totalNumSim); ierr = PetscPrintf(PETSC_COMM_WORLD,"Option price = $%.3f using %ds of %s computation with %d %s for %d stocks, %d trading period per year, %.2f%% interest rate\n", payoff,(int)(stop - start),"parallel",np,"processors",n,(int)(1/dt),r);CHKERRQ(ierr); */ free(vol); free(eps); ierr = PetscRandomDestroy(&ran); CHKERRQ(ierr); PetscFinalize(); return 0; }

/*@C PetscViewerASCIISynchronizedPrintf - Prints synchronized output to the specified file from several processors. Output of the first processor is followed by that of the second, etc. Not Collective, must call collective PetscViewerFlush() to get the results out Input Parameters: + viewer - the ASCII PetscViewer - format - the usual printf() format string Level: intermediate Notes: You must have previously called PetscViewerASCIISynchronizeAllow() to allow this routine to be called. Fortran Note: Can only print a single character* string .seealso: PetscSynchronizedPrintf(), PetscSynchronizedFlush(), PetscFPrintf(), PetscFOpen(), PetscViewerFlush(), PetscViewerASCIIGetPointer(), PetscViewerDestroy(), PetscViewerASCIIOpen(), PetscViewerASCIIPrintf(), PetscViewerASCIISynchronizedAllow() @*/ PetscErrorCode PetscViewerASCIISynchronizedPrintf(PetscViewer viewer,const char format[],...) { PetscViewer_ASCII *vascii = (PetscViewer_ASCII*)viewer->data; PetscErrorCode ierr; PetscMPIInt rank,size; PetscInt tab = vascii->tab; MPI_Comm comm; FILE *fp; PetscBool iascii; int err; PetscFunctionBegin; PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,1); PetscValidCharPointer(format,2); ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); if (!iascii) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not ASCII PetscViewer"); ierr = MPI_Comm_size(PetscObjectComm((PetscObject)viewer),&size);CHKERRQ(ierr); if (size > 1 && !vascii->allowsynchronized) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"First call PetscViewerASCIISynchronizedAllow() to allow this call"); if (!viewer->ops->flush) PetscFunctionReturn(0); /* This viewer obtained via PetscViewerGetSubcomm_ASCII(), should not participate. */ ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); fp = vascii->fd; ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); /* First processor prints immediately to fp */ if (!rank) { va_list Argp; while (tab--) { ierr = PetscFPrintf(PETSC_COMM_SELF,fp," ");CHKERRQ(ierr); } va_start(Argp,format); ierr = (*PetscVFPrintf)(fp,format,Argp);CHKERRQ(ierr); err = fflush(fp); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); if (petsc_history) { va_start(Argp,format); ierr = (*PetscVFPrintf)(petsc_history,format,Argp);CHKERRQ(ierr); err = fflush(petsc_history); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } va_end(Argp); } else { /* other processors add to local queue */ char *string; va_list Argp; size_t fullLength; PrintfQueue next; ierr = PetscNew(&next);CHKERRQ(ierr); if (petsc_printfqueue) { petsc_printfqueue->next = next; petsc_printfqueue = next; } else { petsc_printfqueuebase = petsc_printfqueue = next; } petsc_printfqueuelength++; next->size = QUEUESTRINGSIZE; ierr = PetscMalloc1(next->size, &next->string);CHKERRQ(ierr); ierr = PetscMemzero(next->string,next->size);CHKERRQ(ierr); string = next->string; tab *= 2; while (tab--) { *string++ = ' '; } va_start(Argp,format); ierr = PetscVSNPrintf(string,next->size-2*vascii->tab,format,&fullLength,Argp);CHKERRQ(ierr); va_end(Argp); } PetscFunctionReturn(0); }

PetscErrorCode SNESSolve_FAS(SNES snes) { PetscErrorCode ierr; PetscInt i, maxits; Vec X, F; PetscReal fnorm; SNES_FAS *fas = (SNES_FAS *)snes->data,*ffas; DM dm; PetscBool isFine; PetscFunctionBegin; maxits = snes->max_its; /* maximum number of iterations */ snes->reason = SNES_CONVERGED_ITERATING; X = snes->vec_sol; F = snes->vec_func; ierr = SNESFASCycleIsFine(snes, &isFine);CHKERRQ(ierr); /*norm setup */ ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); snes->iter = 0; snes->norm = 0.; ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); if (!snes->vec_func_init_set) { ierr = SNESComputeFunction(snes,X,F);CHKERRQ(ierr); if (snes->domainerror) { snes->reason = SNES_DIVERGED_FUNCTION_DOMAIN; PetscFunctionReturn(0); } } else { snes->vec_func_init_set = PETSC_FALSE; } if (!snes->norm_init_set) { ierr = VecNorm(F, NORM_2, &fnorm);CHKERRQ(ierr); /* fnorm <- ||F|| */ if (PetscIsInfOrNanReal(fnorm)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Infinite or not-a-number generated in norm"); ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); } else { fnorm = snes->norm_init; snes->norm_init_set = PETSC_FALSE; } snes->norm = fnorm; ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); SNESLogConvHistory(snes,fnorm,0); ierr = SNESMonitor(snes,0,fnorm);CHKERRQ(ierr); /* set parameter for default relative tolerance convergence test */ snes->ttol = fnorm*snes->rtol; /* test convergence */ ierr = (*snes->ops->converged)(snes,0,0.0,0.0,fnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr); if (snes->reason) PetscFunctionReturn(0); if (isFine) { /* propagate scale-dependent data up the hierarchy */ ierr = SNESGetDM(snes,&dm);CHKERRQ(ierr); for (ffas=fas; ffas->next; ffas=(SNES_FAS*)ffas->next->data) { DM dmcoarse; ierr = SNESGetDM(ffas->next,&dmcoarse);CHKERRQ(ierr); ierr = DMRestrict(dm,ffas->restrct,ffas->rscale,ffas->inject,dmcoarse);CHKERRQ(ierr); dm = dmcoarse; } } for (i = 0; i < maxits; i++) { /* Call general purpose update function */ if (snes->ops->update) { ierr = (*snes->ops->update)(snes, snes->iter);CHKERRQ(ierr); } if (fas->fastype == SNES_FAS_MULTIPLICATIVE) { ierr = SNESFASCycle_Multiplicative(snes, X);CHKERRQ(ierr); } else { ierr = SNESFASCycle_Additive(snes, X);CHKERRQ(ierr); } /* check for FAS cycle divergence */ if (snes->reason != SNES_CONVERGED_ITERATING) { PetscFunctionReturn(0); } /* Monitor convergence */ ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); snes->iter = i+1; ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); SNESLogConvHistory(snes,snes->norm,0); ierr = SNESMonitor(snes,snes->iter,snes->norm);CHKERRQ(ierr); /* Test for convergence */ if (isFine) { ierr = (*snes->ops->converged)(snes,snes->iter,0.0,0.0,snes->norm,&snes->reason,snes->cnvP);CHKERRQ(ierr); if (snes->reason) break; } } if (i == maxits) { ierr = PetscInfo1(snes, "Maximum number of iterations has been reached: %D\n", maxits);CHKERRQ(ierr); if (!snes->reason) snes->reason = SNES_DIVERGED_MAX_IT; } PetscFunctionReturn(0); }

PetscErrorCode KSPSolve_QCG(KSP ksp) { /* Correpondence with documentation above: B = g = gradient, X = s = step Note: This is not coded correctly for complex arithmetic! */ KSP_QCG *pcgP = (KSP_QCG*)ksp->data; MatStructure pflag; Mat Amat,Pmat; Vec W,WA,WA2,R,P,ASP,BS,X,B; PetscScalar scal,beta,rntrn,step; PetscReal q1,q2,xnorm,step1,step2,rnrm,btx,xtax; PetscReal ptasp,rtr,wtasp,bstp; PetscReal dzero = 0.0,bsnrm; PetscErrorCode ierr; PetscInt i,maxit; PC pc = ksp->pc; PCSide side; PetscBool diagonalscale; PetscFunctionBegin; ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); if (ksp->transpose_solve) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Currently does not support transpose solve"); ksp->its = 0; maxit = ksp->max_it; WA = ksp->work[0]; R = ksp->work[1]; P = ksp->work[2]; ASP = ksp->work[3]; BS = ksp->work[4]; W = ksp->work[5]; WA2 = ksp->work[6]; X = ksp->vec_sol; B = ksp->vec_rhs; if (pcgP->delta <= dzero) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_OUTOFRANGE,"Input error: delta <= 0"); ierr = KSPGetPCSide(ksp,&side);CHKERRQ(ierr); if (side != PC_SYMMETRIC) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_OUTOFRANGE,"Requires symmetric preconditioner!"); /* Initialize variables */ ierr = VecSet(W,0.0);CHKERRQ(ierr); /* W = 0 */ ierr = VecSet(X,0.0);CHKERRQ(ierr); /* X = 0 */ ierr = PCGetOperators(pc,&Amat,&Pmat,&pflag);CHKERRQ(ierr); /* Compute: BS = D^{-1} B */ ierr = PCApplySymmetricLeft(pc,B,BS);CHKERRQ(ierr); ierr = VecNorm(BS,NORM_2,&bsnrm);CHKERRQ(ierr); ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = bsnrm; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,bsnrm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,bsnrm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,0,bsnrm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); /* Compute the initial scaled direction and scaled residual */ ierr = VecCopy(BS,R);CHKERRQ(ierr); ierr = VecScale(R,-1.0);CHKERRQ(ierr); ierr = VecCopy(R,P);CHKERRQ(ierr); ierr = VecDotRealPart(R,R,&rtr);CHKERRQ(ierr); for (i=0; i<=maxit; i++) { ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its++; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); /* Compute: asp = D^{-T}*A*D^{-1}*p */ ierr = PCApplySymmetricRight(pc,P,WA);CHKERRQ(ierr); ierr = MatMult(Amat,WA,WA2);CHKERRQ(ierr); ierr = PCApplySymmetricLeft(pc,WA2,ASP);CHKERRQ(ierr); /* Check for negative curvature */ ierr = VecDotRealPart(P,ASP,&ptasp);CHKERRQ(ierr); if (ptasp <= dzero) { /* Scaled negative curvature direction: Compute a step so that ||w + step*p|| = delta and QS(w + step*p) is least */ if (!i) { ierr = VecCopy(P,X);CHKERRQ(ierr); ierr = VecNorm(X,NORM_2,&xnorm);CHKERRQ(ierr); scal = pcgP->delta / xnorm; ierr = VecScale(X,scal);CHKERRQ(ierr); } else { /* Compute roots of quadratic */ ierr = KSPQCGQuadraticRoots(W,P,pcgP->delta,&step1,&step2);CHKERRQ(ierr); ierr = VecDotRealPart(W,ASP,&wtasp);CHKERRQ(ierr); ierr = VecDotRealPart(BS,P,&bstp);CHKERRQ(ierr); ierr = VecCopy(W,X);CHKERRQ(ierr); q1 = step1*(bstp + wtasp + .5*step1*ptasp); q2 = step2*(bstp + wtasp + .5*step2*ptasp); if (q1 <= q2) { ierr = VecAXPY(X,step1,P);CHKERRQ(ierr); } else { ierr = VecAXPY(X,step2,P);CHKERRQ(ierr); } } pcgP->ltsnrm = pcgP->delta; /* convergence in direction of */ ksp->reason = KSP_CONVERGED_CG_NEG_CURVE; /* negative curvature */ if (!i) { ierr = PetscInfo1(ksp,"negative curvature: delta=%G\n",pcgP->delta);CHKERRQ(ierr); } else { ierr = PetscInfo3(ksp,"negative curvature: step1=%G, step2=%G, delta=%G\n",step1,step2,pcgP->delta);CHKERRQ(ierr); } } else { /* Compute step along p */ step = rtr/ptasp; ierr = VecCopy(W,X);CHKERRQ(ierr); /* x = w */ ierr = VecAXPY(X,step,P);CHKERRQ(ierr); /* x <- step*p + x */ ierr = VecNorm(X,NORM_2,&pcgP->ltsnrm);CHKERRQ(ierr); if (pcgP->ltsnrm > pcgP->delta) { /* Since the trial iterate is outside the trust region, evaluate a constrained step along p so that ||w + step*p|| = delta The positive step is always better in this case. */ if (!i) { scal = pcgP->delta / pcgP->ltsnrm; ierr = VecScale(X,scal);CHKERRQ(ierr); } else { /* Compute roots of quadratic */ ierr = KSPQCGQuadraticRoots(W,P,pcgP->delta,&step1,&step2);CHKERRQ(ierr); ierr = VecCopy(W,X);CHKERRQ(ierr); ierr = VecAXPY(X,step1,P);CHKERRQ(ierr); /* x <- step1*p + x */ } pcgP->ltsnrm = pcgP->delta; ksp->reason = KSP_CONVERGED_CG_CONSTRAINED; /* convergence along constrained step */ if (!i) { ierr = PetscInfo1(ksp,"constrained step: delta=%G\n",pcgP->delta);CHKERRQ(ierr); } else { ierr = PetscInfo3(ksp,"constrained step: step1=%G, step2=%G, delta=%G\n",step1,step2,pcgP->delta);CHKERRQ(ierr); } } else { /* Evaluate the current step */ ierr = VecCopy(X,W);CHKERRQ(ierr); /* update interior iterate */ ierr = VecAXPY(R,-step,ASP);CHKERRQ(ierr); /* r <- -step*asp + r */ ierr = VecNorm(R,NORM_2,&rnrm);CHKERRQ(ierr); ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->rnorm = rnrm; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,rnrm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i+1,rnrm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,rnrm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { /* convergence for */ ierr = PetscInfo3(ksp,"truncated step: step=%G, rnrm=%G, delta=%G\n",PetscRealPart(step),rnrm,pcgP->delta);CHKERRQ(ierr); } } } if (ksp->reason) break; /* Convergence has been attained */ else { /* Compute a new AS-orthogonal direction */ ierr = VecDot(R,R,&rntrn);CHKERRQ(ierr); beta = rntrn/rtr; ierr = VecAYPX(P,beta,R);CHKERRQ(ierr); /* p <- r + beta*p */ rtr = PetscRealPart(rntrn); } } if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; /* Unscale x */ ierr = VecCopy(X,WA2);CHKERRQ(ierr); ierr = PCApplySymmetricRight(pc,WA2,X);CHKERRQ(ierr); ierr = MatMult(Amat,X,WA);CHKERRQ(ierr); ierr = VecDotRealPart(B,X,&btx);CHKERRQ(ierr); ierr = VecDotRealPart(X,WA,&xtax);CHKERRQ(ierr); pcgP->quadratic = btx + .5*xtax; PetscFunctionReturn(0); }

static PetscErrorCode TaoSolve_BLMVM(Tao tao) { PetscErrorCode ierr; TAO_BLMVM *blmP = (TAO_BLMVM *)tao->data; TaoConvergedReason reason = TAO_CONTINUE_ITERATING; TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING; PetscReal f, fold, gdx, gnorm; PetscReal stepsize = 1.0,delta; PetscFunctionBegin; /* Project initial point onto bounds */ ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr); ierr = VecMedian(tao->XL,tao->solution,tao->XU,tao->solution);CHKERRQ(ierr); ierr = TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU);CHKERRQ(ierr); /* Check convergence criteria */ ierr = TaoComputeObjectiveAndGradient(tao, tao->solution,&f,blmP->unprojected_gradient);CHKERRQ(ierr); ierr = VecBoundGradientProjection(blmP->unprojected_gradient,tao->solution, tao->XL,tao->XU,tao->gradient);CHKERRQ(ierr); ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr); if (PetscIsInfOrNanReal(f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf pr NaN"); ierr = TaoMonitor(tao, tao->niter, f, gnorm, 0.0, stepsize, &reason);CHKERRQ(ierr); if (reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); /* Set initial scaling for the function */ if (f != 0.0) { delta = 2.0*PetscAbsScalar(f) / (gnorm*gnorm); } else { delta = 2.0 / (gnorm*gnorm); } ierr = MatLMVMSetDelta(blmP->M,delta);CHKERRQ(ierr); /* Set counter for gradient/reset steps */ blmP->grad = 0; blmP->reset = 0; /* Have not converged; continue with Newton method */ while (reason == TAO_CONTINUE_ITERATING) { /* Compute direction */ ierr = MatLMVMUpdate(blmP->M, tao->solution, tao->gradient);CHKERRQ(ierr); ierr = MatLMVMSolve(blmP->M, blmP->unprojected_gradient, tao->stepdirection);CHKERRQ(ierr); ierr = VecBoundGradientProjection(tao->stepdirection,tao->solution,tao->XL,tao->XU,tao->gradient);CHKERRQ(ierr); /* Check for success (descent direction) */ ierr = VecDot(blmP->unprojected_gradient, tao->gradient, &gdx);CHKERRQ(ierr); if (gdx <= 0) { /* Step is not descent or solve was not successful Use steepest descent direction (scaled) */ ++blmP->grad; if (f != 0.0) { delta = 2.0*PetscAbsScalar(f) / (gnorm*gnorm); } else { delta = 2.0 / (gnorm*gnorm); } ierr = MatLMVMSetDelta(blmP->M,delta);CHKERRQ(ierr); ierr = MatLMVMReset(blmP->M);CHKERRQ(ierr); ierr = MatLMVMUpdate(blmP->M, tao->solution, blmP->unprojected_gradient);CHKERRQ(ierr); ierr = MatLMVMSolve(blmP->M,blmP->unprojected_gradient, tao->stepdirection);CHKERRQ(ierr); } ierr = VecScale(tao->stepdirection,-1.0);CHKERRQ(ierr); /* Perform the linesearch */ fold = f; ierr = VecCopy(tao->solution, blmP->Xold);CHKERRQ(ierr); ierr = VecCopy(blmP->unprojected_gradient, blmP->Gold);CHKERRQ(ierr); ierr = TaoLineSearchSetInitialStepLength(tao->linesearch,1.0);CHKERRQ(ierr); ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &f, blmP->unprojected_gradient, tao->stepdirection, &stepsize, &ls_status);CHKERRQ(ierr); ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr); if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { /* Linesearch failed Reset factors and use scaled (projected) gradient step */ ++blmP->reset; f = fold; ierr = VecCopy(blmP->Xold, tao->solution);CHKERRQ(ierr); ierr = VecCopy(blmP->Gold, blmP->unprojected_gradient);CHKERRQ(ierr); if (f != 0.0) { delta = 2.0* PetscAbsScalar(f) / (gnorm*gnorm); } else { delta = 2.0/ (gnorm*gnorm); } ierr = MatLMVMSetDelta(blmP->M,delta);CHKERRQ(ierr); ierr = MatLMVMReset(blmP->M);CHKERRQ(ierr); ierr = MatLMVMUpdate(blmP->M, tao->solution, blmP->unprojected_gradient);CHKERRQ(ierr); ierr = MatLMVMSolve(blmP->M, blmP->unprojected_gradient, tao->stepdirection);CHKERRQ(ierr); ierr = VecScale(tao->stepdirection, -1.0);CHKERRQ(ierr); /* This may be incorrect; linesearch has values fo stepmax and stepmin that should be reset. */ ierr = TaoLineSearchSetInitialStepLength(tao->linesearch,1.0);CHKERRQ(ierr); ierr = TaoLineSearchApply(tao->linesearch,tao->solution,&f, blmP->unprojected_gradient, tao->stepdirection, &stepsize, &ls_status);CHKERRQ(ierr); ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr); if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { tao->reason = TAO_DIVERGED_LS_FAILURE; break; } } /* Check for converged */ ierr = VecBoundGradientProjection(blmP->unprojected_gradient, tao->solution, tao->XL, tao->XU, tao->gradient);CHKERRQ(ierr); ierr = VecNorm(tao->gradient, NORM_2, &gnorm);CHKERRQ(ierr); if (PetscIsInfOrNanReal(f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Not-a-Number"); tao->niter++; ierr = TaoMonitor(tao, tao->niter, f, gnorm, 0.0, stepsize, &reason);CHKERRQ(ierr); } PetscFunctionReturn(0); }

int main(int argc,char **args) { Mat C; PetscInt i,j,m = 3,n = 3,Ii,J; PetscErrorCode ierr; PetscBool flg; PetscScalar v; IS perm,iperm; Vec x,u,b,y; PetscReal norm,tol=PETSC_SMALL; MatFactorInfo info; PetscMPIInt size; PetscInitialize(&argc,&args,(char*)0,help); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size); CHKERRQ(ierr); if (size != 1) SETERRQ(PETSC_COMM_WORLD,1,"This is a uniprocessor example only!"); ierr = MatCreate(PETSC_COMM_WORLD,&C); CHKERRQ(ierr); ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n); CHKERRQ(ierr); ierr = MatSetFromOptions(C); CHKERRQ(ierr); ierr = MatSetUp(C); CHKERRQ(ierr); ierr = PetscOptionsHasName(NULL,"-symmetric",&flg); CHKERRQ(ierr); if (flg) { /* Treat matrix as symmetric only if we set this flag */ ierr = MatSetOption(C,MAT_SYMMETRIC,PETSC_TRUE); CHKERRQ(ierr); ierr = MatSetOption(C,MAT_SYMMETRY_ETERNAL,PETSC_TRUE); CHKERRQ(ierr); } /* Create the matrix for the five point stencil, YET AGAIN */ for (i=0; i<m; i++) { for (j=0; j<n; j++) { v = -1.0; Ii = j + n*i; if (i>0) { J = Ii - n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES); CHKERRQ(ierr); } if (i<m-1) { J = Ii + n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES); CHKERRQ(ierr); } if (j>0) { J = Ii - 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES); CHKERRQ(ierr); } if (j<n-1) { J = Ii + 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES); CHKERRQ(ierr); } v = 4.0; ierr = MatSetValues(C,1,&Ii,1,&Ii,&v,INSERT_VALUES); CHKERRQ(ierr); } } ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatGetOrdering(C,MATORDERINGRCM,&perm,&iperm); CHKERRQ(ierr); ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD); CHKERRQ(ierr); ierr = ISView(perm,PETSC_VIEWER_STDOUT_SELF); CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,m*n,&u); CHKERRQ(ierr); ierr = VecSet(u,1.0); CHKERRQ(ierr); ierr = VecDuplicate(u,&x); CHKERRQ(ierr); ierr = VecDuplicate(u,&b); CHKERRQ(ierr); ierr = VecDuplicate(u,&y); CHKERRQ(ierr); ierr = MatMult(C,u,b); CHKERRQ(ierr); ierr = VecCopy(b,y); CHKERRQ(ierr); ierr = VecScale(y,2.0); CHKERRQ(ierr); ierr = MatNorm(C,NORM_FROBENIUS,&norm); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF,"Frobenius norm of matrix %g\n",(double)norm); CHKERRQ(ierr); ierr = MatNorm(C,NORM_1,&norm); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF,"One norm of matrix %g\n",(double)norm); CHKERRQ(ierr); ierr = MatNorm(C,NORM_INFINITY,&norm); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF,"Infinity norm of matrix %g\n",(double)norm); CHKERRQ(ierr); ierr = MatFactorInfoInitialize(&info); CHKERRQ(ierr); info.fill = 2.0; info.dtcol = 0.0; info.zeropivot = 1.e-14; info.pivotinblocks = 1.0; ierr = MatLUFactor(C,perm,iperm,&info); CHKERRQ(ierr); /* Test MatSolve */ ierr = MatSolve(C,b,x); CHKERRQ(ierr); ierr = VecView(b,PETSC_VIEWER_STDOUT_SELF); CHKERRQ(ierr); ierr = VecView(x,PETSC_VIEWER_STDOUT_SELF); CHKERRQ(ierr); ierr = VecAXPY(x,-1.0,u); CHKERRQ(ierr); ierr = VecNorm(x,NORM_2,&norm); CHKERRQ(ierr); if (norm > tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve: Norm of error %g\n",(double)norm); CHKERRQ(ierr); } /* Test MatSolveAdd */ ierr = MatSolveAdd(C,b,y,x); CHKERRQ(ierr); ierr = VecAXPY(x,-1.0,y); CHKERRQ(ierr); ierr = VecAXPY(x,-1.0,u); CHKERRQ(ierr); ierr = VecNorm(x,NORM_2,&norm); CHKERRQ(ierr); if (norm > tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolveAdd(): Norm of error %g\n",(double)norm); CHKERRQ(ierr); } ierr = ISDestroy(&perm); CHKERRQ(ierr); ierr = ISDestroy(&iperm); CHKERRQ(ierr); ierr = VecDestroy(&u); CHKERRQ(ierr); ierr = VecDestroy(&y); CHKERRQ(ierr); ierr = VecDestroy(&b); CHKERRQ(ierr); ierr = VecDestroy(&x); CHKERRQ(ierr); ierr = MatDestroy(&C); CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }

int main(int argc,char **argv) { TS ts; SNES snes_alg; PetscErrorCode ierr; PetscMPIInt size; Userctx user; PetscViewer Xview,Ybusview; Vec X; Mat J; PetscInt i; ierr = PetscInitialize(&argc,&argv,"petscoptions",help);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size > 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Only for sequential runs"); user.neqs_gen = 9*ngen; /* # eqs. for generator subsystem */ user.neqs_net = 2*nbus; /* # eqs. for network subsystem */ user.neqs_pgrid = user.neqs_gen + user.neqs_net; /* Create indices for differential and algebraic equations */ PetscInt *idx2; ierr = PetscMalloc1(7*ngen,&idx2);CHKERRQ(ierr); for (i=0; i<ngen; i++) { idx2[7*i] = 9*i; idx2[7*i+1] = 9*i+1; idx2[7*i+2] = 9*i+2; idx2[7*i+3] = 9*i+3; idx2[7*i+4] = 9*i+6; idx2[7*i+5] = 9*i+7; idx2[7*i+6] = 9*i+8; } ierr = ISCreateGeneral(PETSC_COMM_WORLD,7*ngen,idx2,PETSC_COPY_VALUES,&user.is_diff);CHKERRQ(ierr); ierr = ISComplement(user.is_diff,0,user.neqs_pgrid,&user.is_alg);CHKERRQ(ierr); ierr = PetscFree(idx2);CHKERRQ(ierr); /* Read initial voltage vector and Ybus */ ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,"X.bin",FILE_MODE_READ,&Xview);CHKERRQ(ierr); ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,"Ybus.bin",FILE_MODE_READ,&Ybusview);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&user.V0);CHKERRQ(ierr); ierr = VecSetSizes(user.V0,PETSC_DECIDE,user.neqs_net);CHKERRQ(ierr); ierr = VecLoad(user.V0,Xview);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&user.Ybus);CHKERRQ(ierr); ierr = MatSetSizes(user.Ybus,PETSC_DECIDE,PETSC_DECIDE,user.neqs_net,user.neqs_net);CHKERRQ(ierr); ierr = MatSetType(user.Ybus,MATBAIJ);CHKERRQ(ierr); /* ierr = MatSetBlockSize(user.Ybus,2);CHKERRQ(ierr); */ ierr = MatLoad(user.Ybus,Ybusview);CHKERRQ(ierr); /* Set run time options */ ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Transient stability fault options","");CHKERRQ(ierr); { user.tfaulton = 1.0; user.tfaultoff = 1.2; user.Rfault = 0.0001; user.faultbus = 8; ierr = PetscOptionsReal("-tfaulton","","",user.tfaulton,&user.tfaulton,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-tfaultoff","","",user.tfaultoff,&user.tfaultoff,NULL);CHKERRQ(ierr); ierr = PetscOptionsInt("-faultbus","","",user.faultbus,&user.faultbus,NULL);CHKERRQ(ierr); user.t0 = 0.0; user.tmax = 5.0; ierr = PetscOptionsReal("-t0","","",user.t0,&user.t0,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-tmax","","",user.tmax,&user.tmax,NULL);CHKERRQ(ierr); } ierr = PetscOptionsEnd();CHKERRQ(ierr); ierr = PetscViewerDestroy(&Xview);CHKERRQ(ierr); ierr = PetscViewerDestroy(&Ybusview);CHKERRQ(ierr); /* Create DMs for generator and network subsystems */ ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,user.neqs_gen,1,1,NULL,&user.dmgen);CHKERRQ(ierr); ierr = DMSetOptionsPrefix(user.dmgen,"dmgen_");CHKERRQ(ierr); ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,user.neqs_net,1,1,NULL,&user.dmnet);CHKERRQ(ierr); ierr = DMSetOptionsPrefix(user.dmnet,"dmnet_");CHKERRQ(ierr); /* Create a composite DM packer and add the two DMs */ ierr = DMCompositeCreate(PETSC_COMM_WORLD,&user.dmpgrid);CHKERRQ(ierr); ierr = DMSetOptionsPrefix(user.dmpgrid,"pgrid_");CHKERRQ(ierr); ierr = DMCompositeAddDM(user.dmpgrid,user.dmgen);CHKERRQ(ierr); ierr = DMCompositeAddDM(user.dmpgrid,user.dmnet);CHKERRQ(ierr); ierr = DMCreateGlobalVector(user.dmpgrid,&X);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&J);CHKERRQ(ierr); ierr = MatSetSizes(J,PETSC_DECIDE,PETSC_DECIDE,user.neqs_pgrid,user.neqs_pgrid);CHKERRQ(ierr); ierr = MatSetFromOptions(J);CHKERRQ(ierr); ierr = PreallocateJacobian(J,&user);CHKERRQ(ierr); /* Create matrix to save solutions at each time step */ user.stepnum = 0; ierr = MatCreateSeqDense(PETSC_COMM_SELF,user.neqs_pgrid+1,1002,NULL,&user.Sol);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create timestepping solver context - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSCreate(PETSC_COMM_WORLD,&ts);CHKERRQ(ierr); ierr = TSSetProblemType(ts,TS_NONLINEAR);CHKERRQ(ierr); ierr = TSSetEquationType(ts,TS_EQ_DAE_IMPLICIT_INDEX1);CHKERRQ(ierr); ierr = TSARKIMEXSetFullyImplicit(ts,PETSC_TRUE);CHKERRQ(ierr); ierr = TSSetIFunction(ts,NULL,(TSIFunction) IFunction,&user);CHKERRQ(ierr); ierr = TSSetIJacobian(ts,J,J,(TSIJacobian)IJacobian,&user);CHKERRQ(ierr); ierr = TSSetApplicationContext(ts,&user);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set initial conditions - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = SetInitialGuess(X,&user);CHKERRQ(ierr); /* Just to set up the Jacobian structure */ Vec Xdot; ierr = VecDuplicate(X,&Xdot);CHKERRQ(ierr); ierr = IJacobian(ts,0.0,X,Xdot,0.0,J,J,&user);CHKERRQ(ierr); ierr = VecDestroy(&Xdot);CHKERRQ(ierr); /* Save initial solution */ PetscScalar *x,*mat; PetscInt idx=user.stepnum*(user.neqs_pgrid+1); ierr = MatDenseGetArray(user.Sol,&mat);CHKERRQ(ierr); ierr = VecGetArray(X,&x);CHKERRQ(ierr); mat[idx] = 0.0; ierr = PetscMemcpy(mat+idx+1,x,user.neqs_pgrid*sizeof(PetscScalar));CHKERRQ(ierr); ierr = MatDenseRestoreArray(user.Sol,&mat);CHKERRQ(ierr); ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); user.stepnum++; ierr = TSSetDuration(ts,1000,user.tfaulton);CHKERRQ(ierr); ierr = TSSetInitialTimeStep(ts,0.0,0.01);CHKERRQ(ierr); ierr = TSSetFromOptions(ts);CHKERRQ(ierr); ierr = TSSetPostStep(ts,SaveSolution);CHKERRQ(ierr); user.alg_flg = PETSC_FALSE; /* Prefault period */ ierr = TSSolve(ts,X);CHKERRQ(ierr); /* Create the nonlinear solver for solving the algebraic system */ /* Note that although the algebraic system needs to be solved only for Idq and V, we reuse the entire system including xgen. The xgen variables are held constant by setting their residuals to 0 and putting a 1 on the Jacobian diagonal for xgen rows */ Vec F_alg; ierr = VecDuplicate(X,&F_alg);CHKERRQ(ierr); ierr = SNESCreate(PETSC_COMM_WORLD,&snes_alg);CHKERRQ(ierr); ierr = SNESSetFunction(snes_alg,F_alg,AlgFunction,&user);CHKERRQ(ierr); ierr = MatZeroEntries(J);CHKERRQ(ierr); ierr = SNESSetJacobian(snes_alg,J,J,AlgJacobian,&user);CHKERRQ(ierr); ierr = SNESSetOptionsPrefix(snes_alg,"alg_");CHKERRQ(ierr); ierr = SNESSetFromOptions(snes_alg);CHKERRQ(ierr); /* Apply disturbance - resistive fault at user.faultbus */ /* This is done by adding shunt conductance to the diagonal location in the Ybus matrix */ PetscInt row_loc,col_loc; PetscScalar val; row_loc = 2*user.faultbus; col_loc = 2*user.faultbus+1; /* Location for G */ val = 1/user.Rfault; ierr = MatSetValues(user.Ybus,1,&row_loc,1,&col_loc,&val,ADD_VALUES);CHKERRQ(ierr); row_loc = 2*user.faultbus+1; col_loc = 2*user.faultbus; /* Location for G */ val = 1/user.Rfault; ierr = MatSetValues(user.Ybus,1,&row_loc,1,&col_loc,&val,ADD_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(user.Ybus,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(user.Ybus,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); user.alg_flg = PETSC_TRUE; /* Solve the algebraic equations */ ierr = SNESSolve(snes_alg,NULL,X);CHKERRQ(ierr); /* Save fault-on solution */ idx = user.stepnum*(user.neqs_pgrid+1); ierr = MatDenseGetArray(user.Sol,&mat);CHKERRQ(ierr); ierr = VecGetArray(X,&x);CHKERRQ(ierr); mat[idx] = user.tfaulton; ierr = PetscMemcpy(mat+idx+1,x,user.neqs_pgrid*sizeof(PetscScalar));CHKERRQ(ierr); ierr = MatDenseRestoreArray(user.Sol,&mat);CHKERRQ(ierr); ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); user.stepnum++; /* Disturbance period */ ierr = TSSetDuration(ts,1000,user.tfaultoff);CHKERRQ(ierr); ierr = TSSetInitialTimeStep(ts,user.tfaulton,.01);CHKERRQ(ierr); user.alg_flg = PETSC_FALSE; ierr = TSSolve(ts,X);CHKERRQ(ierr); /* Remove the fault */ row_loc = 2*user.faultbus; col_loc = 2*user.faultbus+1; val = -1/user.Rfault; ierr = MatSetValues(user.Ybus,1,&row_loc,1,&col_loc,&val,ADD_VALUES);CHKERRQ(ierr); row_loc = 2*user.faultbus+1; col_loc = 2*user.faultbus; val = -1/user.Rfault; ierr = MatSetValues(user.Ybus,1,&row_loc,1,&col_loc,&val,ADD_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(user.Ybus,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(user.Ybus,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatZeroEntries(J);CHKERRQ(ierr); user.alg_flg = PETSC_TRUE; /* Solve the algebraic equations */ ierr = SNESSolve(snes_alg,NULL,X);CHKERRQ(ierr); /* Save tfault off solution */ idx = user.stepnum*(user.neqs_pgrid+1); ierr = MatDenseGetArray(user.Sol,&mat);CHKERRQ(ierr); ierr = VecGetArray(X,&x);CHKERRQ(ierr); mat[idx] = user.tfaultoff; ierr = PetscMemcpy(mat+idx+1,x,user.neqs_pgrid*sizeof(PetscScalar));CHKERRQ(ierr); ierr = MatDenseRestoreArray(user.Sol,&mat);CHKERRQ(ierr); ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); user.stepnum++; /* Post-disturbance period */ ierr = TSSetDuration(ts,1000,user.tmax);CHKERRQ(ierr); ierr = TSSetInitialTimeStep(ts,user.tfaultoff,.01);CHKERRQ(ierr); user.alg_flg = PETSC_TRUE; ierr = TSSolve(ts,X);CHKERRQ(ierr); ierr = MatAssemblyBegin(user.Sol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(user.Sol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); Mat A; PetscScalar *amat; ierr = MatCreateSeqDense(PETSC_COMM_SELF,user.neqs_pgrid+1,user.stepnum,NULL,&A);CHKERRQ(ierr); ierr = MatDenseGetArray(user.Sol,&mat);CHKERRQ(ierr); ierr = MatDenseGetArray(A,&amat);CHKERRQ(ierr); ierr = PetscMemcpy(amat,mat,(user.stepnum*(user.neqs_pgrid+1))*sizeof(PetscScalar));CHKERRQ(ierr); ierr = MatDenseRestoreArray(A,&amat);CHKERRQ(ierr); ierr = MatDenseRestoreArray(user.Sol,&mat);CHKERRQ(ierr); PetscViewer viewer; ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,"out.bin",FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); ierr = MatView(A,viewer);CHKERRQ(ierr); ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = SNESDestroy(&snes_alg);CHKERRQ(ierr); ierr = VecDestroy(&F_alg);CHKERRQ(ierr); ierr = MatDestroy(&J);CHKERRQ(ierr); ierr = MatDestroy(&user.Ybus);CHKERRQ(ierr); ierr = MatDestroy(&user.Sol);CHKERRQ(ierr); ierr = VecDestroy(&X);CHKERRQ(ierr); ierr = VecDestroy(&user.V0);CHKERRQ(ierr); ierr = DMDestroy(&user.dmgen);CHKERRQ(ierr); ierr = DMDestroy(&user.dmnet);CHKERRQ(ierr); ierr = DMDestroy(&user.dmpgrid);CHKERRQ(ierr); ierr = ISDestroy(&user.is_diff);CHKERRQ(ierr); ierr = ISDestroy(&user.is_alg);CHKERRQ(ierr); ierr = TSDestroy(&ts);CHKERRQ(ierr); ierr = PetscFinalize(); return(0); }

PetscInt main(PetscInt argc,char **args) { Mat A,A_dense,B; Vec *evecs; PetscBool flg,TestZHEEV=PETSC_TRUE,TestZHEEVX=PETSC_FALSE,TestZHEGV=PETSC_FALSE,TestZHEGVX=PETSC_FALSE; PetscErrorCode ierr; PetscBool isSymmetric; PetscScalar sigma,*arrayA,*arrayB,*evecs_array=NULL,*work; PetscReal *evals,*rwork; PetscMPIInt size; PetscInt m,i,j,nevs,il,iu,cklvl=2; PetscReal vl,vu,abstol=1.e-8; PetscBLASInt *iwork,*ifail,lwork,lierr,bn; PetscReal tols[2]; PetscInt nzeros[2],nz; PetscReal ratio; PetscScalar v,none = -1.0,sigma2,pfive = 0.5,*xa; PetscRandom rctx; PetscReal h2,sigma1 = 100.0; PetscInt dim,Ii,J,Istart,Iend,n = 6,its,use_random,one=1; PetscInitialize(&argc,&args,(char*)0,help); #if !defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,1,"This example requires complex numbers"); #endif ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This is a uniprocessor example only!"); ierr = PetscOptionsHasName(NULL,NULL, "-test_zheevx", &flg);CHKERRQ(ierr); if (flg) { TestZHEEV = PETSC_FALSE; TestZHEEVX = PETSC_TRUE; } ierr = PetscOptionsHasName(NULL,NULL, "-test_zhegv", &flg);CHKERRQ(ierr); if (flg) { TestZHEEV = PETSC_FALSE; TestZHEGV = PETSC_TRUE; } ierr = PetscOptionsHasName(NULL,NULL, "-test_zhegvx", &flg);CHKERRQ(ierr); if (flg) { TestZHEEV = PETSC_FALSE; TestZHEGVX = PETSC_TRUE; } ierr = PetscOptionsGetReal(NULL,NULL,"-sigma1",&sigma1,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); dim = n*n; ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,dim,dim);CHKERRQ(ierr); ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSetFromOptions(A);CHKERRQ(ierr); ierr = PetscOptionsHasName(NULL,NULL,"-norandom",&flg);CHKERRQ(ierr); if (flg) use_random = 0; else use_random = 1; if (use_random) { ierr = PetscRandomCreate(PETSC_COMM_SELF,&rctx);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rctx);CHKERRQ(ierr); ierr = PetscRandomSetInterval(rctx,0.0,PETSC_i);CHKERRQ(ierr); } else { sigma2 = 10.0*PETSC_i; } h2 = 1.0/((n+1)*(n+1)); for (Ii=0; Ii<dim; Ii++) { v = -1.0; i = Ii/n; j = Ii - i*n; if (i>0) { J = Ii-n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr); } if (i<n-1) { J = Ii+n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr); } if (j>0) { J = Ii-1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr); } if (j<n-1) { J = Ii+1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr); } if (use_random) {ierr = PetscRandomGetValue(rctx,&sigma2);CHKERRQ(ierr);} v = 4.0 - sigma1*h2; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr); } /* make A complex Hermitian */ v = sigma2*h2; Ii = 0; J = 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr); v = -sigma2*h2; ierr = MatSetValues(A,1,&J,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr); if (use_random) {ierr = PetscRandomDestroy(&rctx);CHKERRQ(ierr);} ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); m = n = dim; /* Check whether A is symmetric */ ierr = PetscOptionsHasName(NULL,NULL, "-check_symmetry", &flg);CHKERRQ(ierr); if (flg) { Mat Trans; ierr = MatTranspose(A,MAT_INITIAL_MATRIX, &Trans); ierr = MatEqual(A, Trans, &isSymmetric); if (!isSymmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"A must be symmetric"); ierr = MatDestroy(&Trans);CHKERRQ(ierr); } /* Convert aij matrix to MatSeqDense for LAPACK */ ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr); if (flg) { ierr = MatDuplicate(A,MAT_COPY_VALUES,&A_dense);CHKERRQ(ierr); } else { ierr = MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&A_dense);CHKERRQ(ierr); } ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,dim,dim);CHKERRQ(ierr); ierr = MatSetType(B,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSetFromOptions(B);CHKERRQ(ierr); v = 1.0; for (Ii=0; Ii<dim; Ii++) { ierr = MatSetValues(B,1,&Ii,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr); } /* Solve standard eigenvalue problem: A*x = lambda*x */ /*===================================================*/ ierr = PetscBLASIntCast(2*n,&lwork);CHKERRQ(ierr); ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); ierr = PetscMalloc1(n,&evals);CHKERRQ(ierr); ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); ierr = MatDenseGetArray(A_dense,&arrayA);CHKERRQ(ierr); if (TestZHEEV) { /* test zheev() */ printf(" LAPACKsyev: compute all %d eigensolutions...\n",m); ierr = PetscMalloc1(3*n-2,&rwork);CHKERRQ(ierr); LAPACKsyev_("V","U",&bn,arrayA,&bn,evals,work,&lwork,rwork,&lierr); ierr = PetscFree(rwork);CHKERRQ(ierr); evecs_array = arrayA; nevs = m; il =1; iu=m; } if (TestZHEEVX) { il = 1; ierr = PetscBLASIntCast((0.2*m),&iu);CHKERRQ(ierr); printf(" LAPACKsyevx: compute %d to %d-th eigensolutions...\n",il,iu); ierr = PetscMalloc1(m*n+1,&evecs_array);CHKERRQ(ierr); ierr = PetscMalloc1(7*n+1,&rwork);CHKERRQ(ierr); ierr = PetscMalloc1(5*n+1,&iwork);CHKERRQ(ierr); ierr = PetscMalloc1(n+1,&ifail);CHKERRQ(ierr); /* in the case "I", vl and vu are not referenced */ vl = 0.0; vu = 8.0; LAPACKsyevx_("V","I","U",&bn,arrayA,&bn,&vl,&vu,&il,&iu,&abstol,&nevs,evals,evecs_array,&n,work,&lwork,rwork,iwork,ifail,&lierr); ierr = PetscFree(iwork);CHKERRQ(ierr); ierr = PetscFree(ifail);CHKERRQ(ierr); ierr = PetscFree(rwork);CHKERRQ(ierr); } if (TestZHEGV) { printf(" LAPACKsygv: compute all %d eigensolutions...\n",m); ierr = PetscMalloc1(3*n+1,&rwork);CHKERRQ(ierr); ierr = MatDenseGetArray(B,&arrayB);CHKERRQ(ierr); LAPACKsygv_(&one,"V","U",&bn,arrayA,&bn,arrayB,&bn,evals,work,&lwork,rwork,&lierr); evecs_array = arrayA; nevs = m; il = 1; iu=m; ierr = MatDenseRestoreArray(B,&arrayB);CHKERRQ(ierr); ierr = PetscFree(rwork);CHKERRQ(ierr); } if (TestZHEGVX) { il = 1; ierr = PetscBLASIntCast((0.2*m),&iu);CHKERRQ(ierr); printf(" LAPACKsygv: compute %d to %d-th eigensolutions...\n",il,iu); ierr = PetscMalloc1(m*n+1,&evecs_array);CHKERRQ(ierr); ierr = PetscMalloc1(6*n+1,&iwork);CHKERRQ(ierr); ifail = iwork + 5*n; ierr = PetscMalloc1(7*n+1,&rwork);CHKERRQ(ierr); ierr = MatDenseGetArray(B,&arrayB);CHKERRQ(ierr); vl = 0.0; vu = 8.0; LAPACKsygvx_(&one,"V","I","U",&bn,arrayA,&bn,arrayB,&bn,&vl,&vu,&il,&iu,&abstol,&nevs,evals,evecs_array,&n,work,&lwork,rwork,iwork,ifail,&lierr); ierr = MatDenseRestoreArray(B,&arrayB);CHKERRQ(ierr); ierr = PetscFree(iwork);CHKERRQ(ierr); ierr = PetscFree(rwork);CHKERRQ(ierr); } ierr = MatDenseRestoreArray(A_dense,&arrayA);CHKERRQ(ierr); if (nevs <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED, "nev=%d, no eigensolution has found", nevs); /* View evals */ ierr = PetscOptionsHasName(NULL,NULL, "-eig_view", &flg);CHKERRQ(ierr); if (flg) { printf(" %d evals: \n",nevs); for (i=0; i<nevs; i++) printf("%d %g\n",i+il,(double)evals[i]); } /* Check residuals and orthogonality */ ierr = PetscMalloc1(nevs+1,&evecs);CHKERRQ(ierr); for (i=0; i<nevs; i++) { ierr = VecCreate(PETSC_COMM_SELF,&evecs[i]);CHKERRQ(ierr); ierr = VecSetSizes(evecs[i],PETSC_DECIDE,n);CHKERRQ(ierr); ierr = VecSetFromOptions(evecs[i]);CHKERRQ(ierr); ierr = VecPlaceArray(evecs[i],evecs_array+i*n);CHKERRQ(ierr); } tols[0] = 1.e-8; tols[1] = 1.e-8; ierr = CkEigenSolutions(cklvl,A,il-1,iu-1,evals,evecs,tols);CHKERRQ(ierr); for (i=0; i<nevs; i++) { ierr = VecDestroy(&evecs[i]);CHKERRQ(ierr);} ierr = PetscFree(evecs);CHKERRQ(ierr); /* Free work space. */ if (TestZHEEVX || TestZHEGVX) { ierr = PetscFree(evecs_array);CHKERRQ(ierr); } ierr = PetscFree(evals);CHKERRQ(ierr); ierr = PetscFree(work);CHKERRQ(ierr); ierr = MatDestroy(&A_dense);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&B);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }

PetscErrorCode KSPSolve_CG(KSP ksp) { PetscErrorCode ierr; PetscInt i,stored_max_it,eigs; PetscScalar dpi = 0.0,a = 1.0,beta,betaold = 1.0,b = 0,*e = 0,*d = 0,delta,dpiold; PetscReal dp = 0.0; Vec X,B,Z,R,P,S,W; KSP_CG *cg; Mat Amat,Pmat; MatStructure pflag; PetscBool diagonalscale; PetscFunctionBegin; ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale); CHKERRQ(ierr); if (diagonalscale) SETERRQ1(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); cg = (KSP_CG*)ksp->data; eigs = ksp->calc_sings; stored_max_it = ksp->max_it; X = ksp->vec_sol; B = ksp->vec_rhs; R = ksp->work[0]; Z = ksp->work[1]; P = ksp->work[2]; if (cg->singlereduction) { S = ksp->work[3]; W = ksp->work[4]; } else { S = 0; /* unused */ W = Z; } #define VecXDot(x,y,a) (((cg->type) == (KSP_CG_HERMITIAN)) ? VecDot(x,y,a) : VecTDot(x,y,a)) if (eigs) { e = cg->e; d = cg->d; e[0] = 0.0; } ierr = PCGetOperators(ksp->pc,&Amat,&Pmat,&pflag); CHKERRQ(ierr); ksp->its = 0; if (!ksp->guess_zero) { ierr = KSP_MatMult(ksp,Amat,X,R); CHKERRQ(ierr); /* r <- b - Ax */ ierr = VecAYPX(R,-1.0,B); CHKERRQ(ierr); } else { ierr = VecCopy(B,R); CHKERRQ(ierr); /* r <- b (x is 0) */ } switch (ksp->normtype) { case KSP_NORM_PRECONDITIONED: ierr = KSP_PCApply(ksp,R,Z); CHKERRQ(ierr); /* z <- Br */ ierr = VecNorm(Z,NORM_2,&dp); CHKERRQ(ierr); /* dp <- z'*z = e'*A'*B'*B*A'*e' */ break; case KSP_NORM_UNPRECONDITIONED: ierr = VecNorm(R,NORM_2,&dp); CHKERRQ(ierr); /* dp <- r'*r = e'*A'*A*e */ break; case KSP_NORM_NATURAL: ierr = KSP_PCApply(ksp,R,Z); CHKERRQ(ierr); /* z <- Br */ if (cg->singlereduction) { ierr = KSP_MatMult(ksp,Amat,Z,S); CHKERRQ(ierr); ierr = VecXDot(Z,S,&delta); CHKERRQ(ierr); } ierr = VecXDot(Z,R,&beta); CHKERRQ(ierr); /* beta <- z'*r */ if (PetscIsInfOrNanScalar(beta)) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_FP,"Infinite or not-a-number generated in dot product"); dp = PetscSqrtReal(PetscAbsScalar(beta)); /* dp <- r'*z = r'*B*r = e'*A'*B*A*e */ break; case KSP_NORM_NONE: dp = 0.0; break; default: SETERRQ1(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]); } KSPLogResidualHistory(ksp,dp); ierr = KSPMonitor(ksp,0,dp); CHKERRQ(ierr); ksp->rnorm = dp; ierr = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP); CHKERRQ(ierr); /* test for convergence */ if (ksp->reason) PetscFunctionReturn(0); if (ksp->normtype != KSP_NORM_PRECONDITIONED && (ksp->normtype != KSP_NORM_NATURAL)) { ierr = KSP_PCApply(ksp,R,Z); CHKERRQ(ierr); /* z <- Br */ } if (ksp->normtype != KSP_NORM_NATURAL) { if (cg->singlereduction) { ierr = KSP_MatMult(ksp,Amat,Z,S); CHKERRQ(ierr); ierr = VecXDot(Z,S,&delta); CHKERRQ(ierr); } ierr = VecXDot(Z,R,&beta); CHKERRQ(ierr); /* beta <- z'*r */ if (PetscIsInfOrNanScalar(beta)) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_FP,"Infinite or not-a-number generated in dot product"); } i = 0; do { ksp->its = i+1; if (beta == 0.0) { ksp->reason = KSP_CONVERGED_ATOL; ierr = PetscInfo(ksp,"converged due to beta = 0\n"); CHKERRQ(ierr); break; #if !defined(PETSC_USE_COMPLEX) } else if ((i > 0) && (beta*betaold < 0.0)) { ksp->reason = KSP_DIVERGED_INDEFINITE_PC; ierr = PetscInfo(ksp,"diverging due to indefinite preconditioner\n"); CHKERRQ(ierr); break; #endif } if (!i) { ierr = VecCopy(Z,P); CHKERRQ(ierr); /* p <- z */ b = 0.0; } else { b = beta/betaold; if (eigs) { if (ksp->max_it != stored_max_it) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"Can not change maxit AND calculate eigenvalues"); e[i] = PetscSqrtReal(PetscAbsScalar(b))/a; } ierr = VecAYPX(P,b,Z); CHKERRQ(ierr); /* p <- z + b* p */ } dpiold = dpi; if (!cg->singlereduction || !i) { ierr = KSP_MatMult(ksp,Amat,P,W); CHKERRQ(ierr); /* w <- Ap */ ierr = VecXDot(P,W,&dpi); CHKERRQ(ierr); /* dpi <- p'w */ } else { ierr = VecAYPX(W,beta/betaold,S); CHKERRQ(ierr); /* w <- Ap */ dpi = delta - beta*beta*dpiold/(betaold*betaold); /* dpi <- p'w */ } betaold = beta; if (PetscIsInfOrNanScalar(dpi)) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_FP,"Infinite or not-a-number generated in dot product"); if ((dpi == 0.0) || ((i > 0) && (PetscRealPart(dpi*dpiold) <= 0.0))) { ksp->reason = KSP_DIVERGED_INDEFINITE_MAT; ierr = PetscInfo(ksp,"diverging due to indefinite or negative definite matrix\n"); CHKERRQ(ierr); break; } a = beta/dpi; /* a = beta/p'w */ if (eigs) { d[i] = PetscSqrtReal(PetscAbsScalar(b))*e[i] + 1.0/a; } ierr = VecAXPY(X,a,P); CHKERRQ(ierr); /* x <- x + ap */ ierr = VecAXPY(R,-a,W); CHKERRQ(ierr); /* r <- r - aw */ if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->chknorm < i+2) { ierr = KSP_PCApply(ksp,R,Z); CHKERRQ(ierr); /* z <- Br */ if (cg->singlereduction) { ierr = KSP_MatMult(ksp,Amat,Z,S); CHKERRQ(ierr); } ierr = VecNorm(Z,NORM_2,&dp); CHKERRQ(ierr); /* dp <- z'*z */ } else if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->chknorm < i+2) { ierr = VecNorm(R,NORM_2,&dp); CHKERRQ(ierr); /* dp <- r'*r */ } else if (ksp->normtype == KSP_NORM_NATURAL) { ierr = KSP_PCApply(ksp,R,Z); CHKERRQ(ierr); /* z <- Br */ if (cg->singlereduction) { PetscScalar tmp[2]; Vec vecs[2]; vecs[0] = S; vecs[1] = R; ierr = KSP_MatMult(ksp,Amat,Z,S); CHKERRQ(ierr); /*ierr = VecXDot(Z,S,&delta);CHKERRQ(ierr); ierr = VecXDot(Z,R,&beta);CHKERRQ(ierr); */ /* beta <- r'*z */ ierr = VecMDot(Z,2,vecs,tmp); CHKERRQ(ierr); delta = tmp[0]; beta = tmp[1]; } else { ierr = VecXDot(Z,R,&beta); CHKERRQ(ierr); /* beta <- r'*z */ } if (PetscIsInfOrNanScalar(beta)) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_FP,"Infinite or not-a-number generated in dot product"); dp = PetscSqrtReal(PetscAbsScalar(beta)); } else { dp = 0.0; } ksp->rnorm = dp; KSPLogResidualHistory(ksp,dp); ierr = KSPMonitor(ksp,i+1,dp); CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,dp,&ksp->reason,ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) break; if ((ksp->normtype != KSP_NORM_PRECONDITIONED && (ksp->normtype != KSP_NORM_NATURAL)) || (ksp->chknorm >= i+2)) { ierr = KSP_PCApply(ksp,R,Z); CHKERRQ(ierr); /* z <- Br */ if (cg->singlereduction) { ierr = KSP_MatMult(ksp,Amat,Z,S); CHKERRQ(ierr); } } if ((ksp->normtype != KSP_NORM_NATURAL) || (ksp->chknorm >= i+2)) { if (cg->singlereduction) { PetscScalar tmp[2]; Vec vecs[2]; vecs[0] = S; vecs[1] = R; /* ierr = VecXDot(Z,R,&beta);CHKERRQ(ierr); */ /* beta <- z'*r */ /* ierr = VecXDot(Z,S,&delta);CHKERRQ(ierr);*/ ierr = VecMDot(Z,2,vecs,tmp); CHKERRQ(ierr); delta = tmp[0]; beta = tmp[1]; } else { ierr = VecXDot(Z,R,&beta); CHKERRQ(ierr); /* beta <- z'*r */ } if (PetscIsInfOrNanScalar(beta)) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_FP,"Infinite or not-a-number generated in dot product"); } i++; } while (i<ksp->max_it); if (i >= ksp->max_it) { ksp->reason = KSP_DIVERGED_ITS; } PetscFunctionReturn(0); }

PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options) { const char *bcTypes[3] = {"neumann", "dirichlet", "none"}; const char *runTypes[3] = {"full", "test", "perf"}; const char *coeffTypes[4] = {"none", "analytic", "field", "nonlinear"}; PetscInt bc, run, coeff; PetscBool flg; PetscErrorCode ierr; PetscFunctionBeginUser; options->debug = 0; options->runType = RUN_FULL; options->dim = 2; options->filename[0] = '\0'; options->interpolate = PETSC_FALSE; options->refinementLimit = 0.0; options->refinementUniform = PETSC_FALSE; options->refinementRounds = 1; options->bcType = DIRICHLET; options->variableCoefficient = COEFF_NONE; options->jacobianMF = PETSC_FALSE; options->showInitial = PETSC_FALSE; options->showSolution = PETSC_FALSE; options->restart = PETSC_FALSE; options->check = PETSC_FALSE; options->checkpoint = NULL; ierr = PetscOptionsBegin(comm, "", "Poisson Problem Options", "DMPLEX");CHKERRQ(ierr); ierr = PetscOptionsInt("-debug", "The debugging level", "ex12.c", options->debug, &options->debug, NULL);CHKERRQ(ierr); run = options->runType; ierr = PetscOptionsEList("-run_type", "The run type", "ex12.c", runTypes, 3, runTypes[options->runType], &run, NULL);CHKERRQ(ierr); options->runType = (RunType) run; ierr = PetscOptionsInt("-dim", "The topological mesh dimension", "ex12.c", options->dim, &options->dim, NULL);CHKERRQ(ierr); spatialDim = options->dim; ierr = PetscOptionsString("-f", "Exodus.II filename to read", "ex12.c", options->filename, options->filename, sizeof(options->filename), &flg);CHKERRQ(ierr); #if !defined(PETSC_HAVE_EXODUSII) if (flg) SETERRQ(comm, PETSC_ERR_ARG_WRONG, "This option requires ExodusII support. Reconfigure using --download-exodusii"); #endif ierr = PetscOptionsBool("-interpolate", "Generate intermediate mesh elements", "ex12.c", options->interpolate, &options->interpolate, NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-refinement_limit", "The largest allowable cell volume", "ex12.c", options->refinementLimit, &options->refinementLimit, NULL);CHKERRQ(ierr); ierr = PetscOptionsBool("-refinement_uniform", "Uniformly refine the mesh", "ex52.c", options->refinementUniform, &options->refinementUniform, NULL);CHKERRQ(ierr); ierr = PetscOptionsInt("-refinement_rounds", "The number of uniform refinements", "ex52.c", options->refinementRounds, &options->refinementRounds, NULL);CHKERRQ(ierr); ierr = PetscStrcpy(options->partitioner, "chaco");CHKERRQ(ierr); ierr = PetscOptionsString("-partitioner", "The graph partitioner", "pflotran.cxx", options->partitioner, options->partitioner, 2048, NULL);CHKERRQ(ierr); bc = options->bcType; ierr = PetscOptionsEList("-bc_type","Type of boundary condition","ex12.c",bcTypes,3,bcTypes[options->bcType],&bc,NULL);CHKERRQ(ierr); options->bcType = (BCType) bc; coeff = options->variableCoefficient; ierr = PetscOptionsEList("-variable_coefficient","Type of variable coefficent","ex12.c",coeffTypes,4,coeffTypes[options->variableCoefficient],&coeff,NULL);CHKERRQ(ierr); options->variableCoefficient = (CoeffType) coeff; ierr = PetscOptionsBool("-jacobian_mf", "Calculate the action of the Jacobian on the fly", "ex12.c", options->jacobianMF, &options->jacobianMF, NULL);CHKERRQ(ierr); ierr = PetscOptionsBool("-show_initial", "Output the initial guess for verification", "ex12.c", options->showInitial, &options->showInitial, NULL);CHKERRQ(ierr); ierr = PetscOptionsBool("-show_solution", "Output the solution for verification", "ex12.c", options->showSolution, &options->showSolution, NULL);CHKERRQ(ierr); ierr = PetscOptionsBool("-restart", "Read in the mesh and solution from a file", "ex12.c", options->restart, &options->restart, NULL);CHKERRQ(ierr); ierr = PetscOptionsBool("-check", "Compare with default integration routines", "ex12.c", options->check, &options->check, NULL);CHKERRQ(ierr); ierr = PetscOptionsEnd(); ierr = PetscLogEventRegister("CreateMesh", DM_CLASSID, &options->createMeshEvent);CHKERRQ(ierr); if (options->restart) { ierr = PetscViewerCreate(comm, &options->checkpoint);CHKERRQ(ierr); ierr = PetscViewerSetType(options->checkpoint, PETSCVIEWERHDF5);CHKERRQ(ierr); ierr = PetscViewerFileSetMode(options->checkpoint, FILE_MODE_READ);CHKERRQ(ierr); ierr = PetscViewerFileSetName(options->checkpoint, options->filename);CHKERRQ(ierr); } PetscFunctionReturn(0); }

PetscErrorCode SNESSetUp_Multiblock(SNES snes) { SNES_Multiblock *mb = (SNES_Multiblock *) snes->data; BlockDesc blocks; PetscInt i, numBlocks; PetscErrorCode ierr; PetscFunctionBegin; /* ierr = SNESDefaultGetWork(snes, 1);CHKERRQ(ierr); */ ierr = SNESMultiblockSetDefaults(snes);CHKERRQ(ierr); numBlocks = mb->numBlocks; blocks = mb->blocks; /* Create ISs */ if (!mb->issetup) { PetscInt ccsize, rstart, rend, nslots, bs; PetscBool sorted; mb->issetup = PETSC_TRUE; bs = mb->bs; ierr = MatGetOwnershipRange(snes->jacobian_pre, &rstart, &rend);CHKERRQ(ierr); ierr = MatGetLocalSize(snes->jacobian_pre, PETSC_NULL, &ccsize);CHKERRQ(ierr); nslots = (rend - rstart)/bs; for (i = 0; i < numBlocks; ++i) { if (mb->defaultblocks) { ierr = ISCreateStride(((PetscObject) snes)->comm, nslots, rstart+i, numBlocks, &blocks->is);CHKERRQ(ierr); } else if (!blocks->is) { if (blocks->nfields > 1) { PetscInt *ii, j, k, nfields = blocks->nfields, *fields = blocks->fields; ierr = PetscMalloc(nfields*nslots*sizeof(PetscInt), &ii);CHKERRQ(ierr); for (j = 0; j < nslots; ++j) { for (k = 0; k < nfields; ++k) { ii[nfields*j + k] = rstart + bs*j + fields[k]; } } ierr = ISCreateGeneral(((PetscObject) snes)->comm, nslots*nfields, ii, PETSC_OWN_POINTER, &blocks->is);CHKERRQ(ierr); } else { ierr = ISCreateStride(((PetscObject) snes)->comm, nslots, rstart+blocks->fields[0], bs, &blocks->is);CHKERRQ(ierr); } } ierr = ISSorted(blocks->is, &sorted);CHKERRQ(ierr); if (!sorted) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Fields must be sorted when creating split"); blocks = blocks->next; } } #if 0 /* Create matrices */ ilink = jac->head; if (!jac->pmat) { ierr = PetscMalloc(nsplit*sizeof(Mat),&jac->pmat);CHKERRQ(ierr); for (i=0; i<nsplit; i++) { ierr = MatGetSubMatrix(pc->pmat,ilink->is,ilink->is,MAT_INITIAL_MATRIX,&jac->pmat[i]);CHKERRQ(ierr); ilink = ilink->next; } } else { for (i=0; i<nsplit; i++) { ierr = MatGetSubMatrix(pc->pmat,ilink->is,ilink->is,MAT_REUSE_MATRIX,&jac->pmat[i]);CHKERRQ(ierr); ilink = ilink->next; } } if (jac->realdiagonal) { ilink = jac->head; if (!jac->mat) { ierr = PetscMalloc(nsplit*sizeof(Mat),&jac->mat);CHKERRQ(ierr); for (i=0; i<nsplit; i++) { ierr = MatGetSubMatrix(pc->mat,ilink->is,ilink->is,MAT_INITIAL_MATRIX,&jac->mat[i]);CHKERRQ(ierr); ilink = ilink->next; } } else { for (i=0; i<nsplit; i++) { if (jac->mat[i]) {ierr = MatGetSubMatrix(pc->mat,ilink->is,ilink->is,MAT_REUSE_MATRIX,&jac->mat[i]);CHKERRQ(ierr);} ilink = ilink->next; } } } else { jac->mat = jac->pmat; } #endif #if 0 if (jac->type != PC_COMPOSITE_ADDITIVE && jac->type != PC_COMPOSITE_SCHUR) { /* extract the rows of the matrix associated with each field: used for efficient computation of residual inside algorithm */ ilink = jac->head; if (!jac->Afield) { ierr = PetscMalloc(nsplit*sizeof(Mat),&jac->Afield);CHKERRQ(ierr); for (i=0; i<nsplit; i++) { ierr = MatGetSubMatrix(pc->mat,ilink->is,PETSC_NULL,MAT_INITIAL_MATRIX,&jac->Afield[i]);CHKERRQ(ierr); ilink = ilink->next; } } else { for (i=0; i<nsplit; i++) { ierr = MatGetSubMatrix(pc->mat,ilink->is,PETSC_NULL,MAT_REUSE_MATRIX,&jac->Afield[i]);CHKERRQ(ierr); ilink = ilink->next; } } } #endif if (mb->type == PC_COMPOSITE_SCHUR) { #if 0 IS ccis; PetscInt rstart,rend; if (nsplit != 2) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_INCOMP,"To use Schur complement preconditioner you must have exactly 2 fields"); /* When extracting off-diagonal submatrices, we take complements from this range */ ierr = MatGetOwnershipRangeColumn(pc->mat,&rstart,&rend);CHKERRQ(ierr); /* need to handle case when one is resetting up the preconditioner */ if (jac->schur) { ilink = jac->head; ierr = ISComplement(ilink->is,rstart,rend,&ccis);CHKERRQ(ierr); ierr = MatGetSubMatrix(pc->mat,ilink->is,ccis,MAT_REUSE_MATRIX,&jac->B);CHKERRQ(ierr); ierr = ISDestroy(&ccis);CHKERRQ(ierr); ilink = ilink->next; ierr = ISComplement(ilink->is,rstart,rend,&ccis);CHKERRQ(ierr); ierr = MatGetSubMatrix(pc->mat,ilink->is,ccis,MAT_REUSE_MATRIX,&jac->C);CHKERRQ(ierr); ierr = ISDestroy(&ccis);CHKERRQ(ierr); ierr = MatSchurComplementUpdate(jac->schur,jac->mat[0],jac->pmat[0],jac->B,jac->C,jac->pmat[1],pc->flag);CHKERRQ(ierr); ierr = KSPSetOperators(jac->kspschur,jac->schur,FieldSplitSchurPre(jac),pc->flag);CHKERRQ(ierr); } else { KSP ksp; char schurprefix[256]; /* extract the A01 and A10 matrices */ ilink = jac->head; ierr = ISComplement(ilink->is,rstart,rend,&ccis);CHKERRQ(ierr); ierr = MatGetSubMatrix(pc->mat,ilink->is,ccis,MAT_INITIAL_MATRIX,&jac->B);CHKERRQ(ierr); ierr = ISDestroy(&ccis);CHKERRQ(ierr); ilink = ilink->next; ierr = ISComplement(ilink->is,rstart,rend,&ccis);CHKERRQ(ierr); ierr = MatGetSubMatrix(pc->mat,ilink->is,ccis,MAT_INITIAL_MATRIX,&jac->C);CHKERRQ(ierr); ierr = ISDestroy(&ccis);CHKERRQ(ierr); /* Use mat[0] (diagonal block of the real matrix) preconditioned by pmat[0] */ ierr = MatCreateSchurComplement(jac->mat[0],jac->pmat[0],jac->B,jac->C,jac->mat[1],&jac->schur);CHKERRQ(ierr); /* set tabbing and options prefix of KSP inside the MatSchur */ ierr = MatSchurComplementGetKSP(jac->schur,&ksp);CHKERRQ(ierr); ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,2);CHKERRQ(ierr); ierr = PetscSNPrintf(schurprefix,sizeof(schurprefix),"%sfieldsplit_%s_",((PetscObject)pc)->prefix?((PetscObject)pc)->prefix:"",jac->head->splitname);CHKERRQ(ierr); ierr = KSPSetOptionsPrefix(ksp,schurprefix);CHKERRQ(ierr); ierr = MatSetFromOptions(jac->schur);CHKERRQ(ierr); ierr = KSPCreate(((PetscObject)pc)->comm,&jac->kspschur);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)jac->kspschur);CHKERRQ(ierr); ierr = PetscObjectIncrementTabLevel((PetscObject)jac->kspschur,(PetscObject)pc,1);CHKERRQ(ierr); ierr = KSPSetOperators(jac->kspschur,jac->schur,FieldSplitSchurPre(jac),DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); if (jac->schurpre == PC_FIELDSPLIT_SCHUR_PRE_SELF) { PC pc; ierr = KSPGetPC(jac->kspschur,&pc);CHKERRQ(ierr); ierr = PCSetType(pc,PCNONE);CHKERRQ(ierr); /* Note: This is bad if there exist preconditioners for MATSCHURCOMPLEMENT */ } ierr = PetscSNPrintf(schurprefix,sizeof(schurprefix),"%sfieldsplit_%s_",((PetscObject)pc)->prefix?((PetscObject)pc)->prefix:"",ilink->splitname);CHKERRQ(ierr); ierr = KSPSetOptionsPrefix(jac->kspschur,schurprefix);CHKERRQ(ierr); /* really want setfromoptions called in PCSetFromOptions_FieldSplit(), but it is not ready yet */ ierr = KSPSetFromOptions(jac->kspschur);CHKERRQ(ierr); ierr = PetscMalloc2(2,Vec,&jac->x,2,Vec,&jac->y);CHKERRQ(ierr); ierr = MatGetVecs(jac->pmat[0],&jac->x[0],&jac->y[0]);CHKERRQ(ierr); ierr = MatGetVecs(jac->pmat[1],&jac->x[1],&jac->y[1]);CHKERRQ(ierr); ilink = jac->head; ilink->x = jac->x[0]; ilink->y = jac->y[0]; ilink = ilink->next; ilink->x = jac->x[1]; ilink->y = jac->y[1]; } #endif } else { /* Set up the individual SNESs */ blocks = mb->blocks; i = 0; while (blocks) { /*TODO: Set these correctly */ /*ierr = SNESSetFunction(blocks->snes, blocks->x, func);CHKERRQ(ierr);*/ /*ierr = SNESSetJacobian(blocks->snes, blocks->x, jac);CHKERRQ(ierr);*/ ierr = VecDuplicate(blocks->snes->vec_sol, &blocks->x);CHKERRQ(ierr); /* really want setfromoptions called in SNESSetFromOptions_Multiblock(), but it is not ready yet */ ierr = SNESSetFromOptions(blocks->snes);CHKERRQ(ierr); ierr = SNESSetUp(blocks->snes);CHKERRQ(ierr); blocks = blocks->next; i++; } } /* Compute scatter contexts needed by multiplicative versions and non-default splits */ if (!mb->blocks->sctx) { Vec xtmp; blocks = mb->blocks; ierr = MatGetVecs(snes->jacobian_pre, &xtmp, PETSC_NULL);CHKERRQ(ierr); while(blocks) { ierr = VecScatterCreate(xtmp, blocks->is, blocks->x, PETSC_NULL, &blocks->sctx);CHKERRQ(ierr); blocks = blocks->next; } ierr = VecDestroy(&xtmp);CHKERRQ(ierr); } PetscFunctionReturn(0); }

/*@C PetscFinalize - Checks for options to be called at the conclusion of the program. MPI_Finalize() is called only if the user had not called MPI_Init() before calling PetscInitialize(). Collective on PETSC_COMM_WORLD Options Database Keys: + -options_table - Calls PetscOptionsView() . -options_left - Prints unused options that remain in the database . -objects_dump [all] - Prints list of objects allocated by the user that have not been freed, the option all cause all outstanding objects to be listed . -mpidump - Calls PetscMPIDump() . -malloc_dump - Calls PetscMallocDump() . -malloc_info - Prints total memory usage - -malloc_log - Prints summary of memory usage Level: beginner Note: See PetscInitialize() for more general runtime options. .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscEnd() @*/ PetscErrorCode PetscFinalize(void) { PetscErrorCode ierr; PetscMPIInt rank; PetscInt nopt; PetscBool flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE; #if defined(PETSC_HAVE_AMS) PetscBool flg = PETSC_FALSE; #endif #if defined(PETSC_USE_LOG) char mname[PETSC_MAX_PATH_LEN]; #endif PetscFunctionBegin; if (!PetscInitializeCalled) { printf("PetscInitialize() must be called before PetscFinalize()\n"); PetscFunctionReturn(PETSC_ERR_ARG_WRONGSTATE); } ierr = PetscInfo(NULL,"PetscFinalize() called\n");CHKERRQ(ierr); #if defined(PETSC_SERIALIZE_FUNCTIONS) ierr = PetscFPTDestroy();CHKERRQ(ierr); #endif #if defined(PETSC_HAVE_AMS) ierr = PetscOptionsGetBool(NULL,"-options_gui",&flg,NULL);CHKERRQ(ierr); if (flg) { ierr = PetscOptionsAMSDestroy();CHKERRQ(ierr); } #endif #if defined(PETSC_HAVE_SERVER) flg1 = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-server",&flg1,NULL);CHKERRQ(ierr); if (flg1) { /* this is a crude hack, but better than nothing */ ierr = PetscPOpen(PETSC_COMM_WORLD,NULL,"pkill -9 petscwebserver","r",NULL);CHKERRQ(ierr); } #endif ierr = PetscHMPIFinalize();CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,"-malloc_info",&flg2,NULL);CHKERRQ(ierr); if (!flg2) { flg2 = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-memory_info",&flg2,NULL);CHKERRQ(ierr); } if (flg2) { ierr = PetscMemoryShowUsage(PETSC_VIEWER_STDOUT_WORLD,"Summary of Memory Usage in PETSc\n");CHKERRQ(ierr); } #if defined(PETSC_USE_LOG) flg1 = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-get_total_flops",&flg1,NULL);CHKERRQ(ierr); if (flg1) { PetscLogDouble flops = 0; ierr = MPI_Reduce(&petsc_TotalFlops,&flops,1,MPI_DOUBLE,MPI_SUM,0,PETSC_COMM_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Total flops over all processors %g\n",flops);CHKERRQ(ierr); } #endif #if defined(PETSC_USE_LOG) #if defined(PETSC_HAVE_MPE) mname[0] = 0; ierr = PetscOptionsGetString(NULL,"-log_mpe",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr); if (flg1) { if (mname[0]) {ierr = PetscLogMPEDump(mname);CHKERRQ(ierr);} else {ierr = PetscLogMPEDump(0);CHKERRQ(ierr);} } #endif mname[0] = 0; ierr = PetscOptionsGetString(NULL,"-log_summary",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr); if (flg1) { PetscViewer viewer; if (mname[0]) { ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,mname,&viewer);CHKERRQ(ierr); ierr = PetscLogView(viewer);CHKERRQ(ierr); ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); } else { viewer = PETSC_VIEWER_STDOUT_WORLD; ierr = PetscLogView(viewer);CHKERRQ(ierr); } } mname[0] = 0; ierr = PetscOptionsGetString(NULL,"-log_summary_python",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr); if (flg1) { PetscViewer viewer; if (mname[0]) { ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,mname,&viewer);CHKERRQ(ierr); ierr = PetscLogViewPython(viewer);CHKERRQ(ierr); ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); } else { viewer = PETSC_VIEWER_STDOUT_WORLD; ierr = PetscLogViewPython(viewer);CHKERRQ(ierr); } } ierr = PetscOptionsGetString(NULL,"-log_detailed",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr); if (flg1) { if (mname[0]) {ierr = PetscLogPrintDetailed(PETSC_COMM_WORLD,mname);CHKERRQ(ierr);} else {ierr = PetscLogPrintDetailed(PETSC_COMM_WORLD,0);CHKERRQ(ierr);} } mname[0] = 0; ierr = PetscOptionsGetString(NULL,"-log_all",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr); ierr = PetscOptionsGetString(NULL,"-log",mname,PETSC_MAX_PATH_LEN,&flg2);CHKERRQ(ierr); if (flg1 || flg2) { if (mname[0]) PetscLogDump(mname); else PetscLogDump(0); } #endif /* Free all objects registered with PetscObjectRegisterDestroy() such as PETSC_VIEWER_XXX_(). */ ierr = PetscObjectRegisterDestroyAll();CHKERRQ(ierr); { PetscThreadComm tcomm_world; ierr = PetscGetThreadCommWorld(&tcomm_world);CHKERRQ(ierr); /* Free global thread communicator */ ierr = PetscThreadCommDestroy(&tcomm_world);CHKERRQ(ierr); } #if defined(PETSC_USE_DEBUG) ierr = PetscStackDestroy();CHKERRQ(ierr); #endif flg1 = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-no_signal_handler",&flg1,NULL);CHKERRQ(ierr); if (!flg1) { ierr = PetscPopSignalHandler();CHKERRQ(ierr);} flg1 = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-mpidump",&flg1,NULL);CHKERRQ(ierr); if (flg1) { ierr = PetscMPIDump(stdout);CHKERRQ(ierr); } flg1 = PETSC_FALSE; flg2 = PETSC_FALSE; /* preemptive call to avoid listing this option in options table as unused */ ierr = PetscOptionsHasName(NULL,"-malloc_dump",&flg1);CHKERRQ(ierr); ierr = PetscOptionsHasName(NULL,"-objects_dump",&flg1);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,"-options_table",&flg2,NULL);CHKERRQ(ierr); if (flg2) { PetscViewer viewer; ierr = PetscViewerASCIIGetStdout(PETSC_COMM_WORLD,&viewer);CHKERRQ(ierr); ierr = PetscOptionsView(viewer);CHKERRQ(ierr); ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); } /* to prevent PETSc -options_left from warning */ ierr = PetscOptionsHasName(NULL,"-nox",&flg1);CHKERRQ(ierr); ierr = PetscOptionsHasName(NULL,"-nox_warning",&flg1);CHKERRQ(ierr); if (!PetscHMPIWorker) { /* worker processes skip this because they do not usually process options */ flg3 = PETSC_FALSE; /* default value is required */ ierr = PetscOptionsGetBool(NULL,"-options_left",&flg3,&flg1);CHKERRQ(ierr); ierr = PetscOptionsAllUsed(&nopt);CHKERRQ(ierr); if (flg3) { if (!flg2) { /* have not yet printed the options */ PetscViewer viewer; ierr = PetscViewerASCIIGetStdout(PETSC_COMM_WORLD,&viewer);CHKERRQ(ierr); ierr = PetscOptionsView(viewer);CHKERRQ(ierr); ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); } if (!nopt) { ierr = PetscPrintf(PETSC_COMM_WORLD,"There are no unused options.\n");CHKERRQ(ierr); } else if (nopt == 1) { ierr = PetscPrintf(PETSC_COMM_WORLD,"There is one unused database option. It is:\n");CHKERRQ(ierr); } else { ierr = PetscPrintf(PETSC_COMM_WORLD,"There are %D unused database options. They are:\n",nopt);CHKERRQ(ierr); } } #if defined(PETSC_USE_DEBUG) if (nopt && !flg3 && !flg1) { ierr = PetscPrintf(PETSC_COMM_WORLD,"WARNING! There are options you set that were not used!\n");CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"WARNING! could be spelling mistake, etc!\n");CHKERRQ(ierr); ierr = PetscOptionsLeft();CHKERRQ(ierr); } else if (nopt && flg3) { #else if (nopt && flg3) { #endif ierr = PetscOptionsLeft();CHKERRQ(ierr); } } /* List all objects the user may have forgot to free */ ierr = PetscOptionsHasName(NULL,"-objects_dump",&flg1);CHKERRQ(ierr); if (flg1) { MPI_Comm local_comm; char string[64]; ierr = PetscOptionsGetString(NULL,"-objects_dump",string,64,NULL);CHKERRQ(ierr); ierr = MPI_Comm_dup(MPI_COMM_WORLD,&local_comm);CHKERRQ(ierr); ierr = PetscSequentialPhaseBegin_Private(local_comm,1);CHKERRQ(ierr); ierr = PetscObjectsDump(stdout,(string[0] == 'a') ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr); ierr = PetscSequentialPhaseEnd_Private(local_comm,1);CHKERRQ(ierr); ierr = MPI_Comm_free(&local_comm);CHKERRQ(ierr); } PetscObjectsCounts = 0; PetscObjectsMaxCounts = 0; ierr = PetscFree(PetscObjects);CHKERRQ(ierr); #if defined(PETSC_USE_LOG) ierr = PetscLogDestroy();CHKERRQ(ierr); #endif /* Destroy any packages that registered a finalize */ ierr = PetscRegisterFinalizeAll();CHKERRQ(ierr); /* Destroy all the function registration lists created */ ierr = PetscFinalize_DynamicLibraries();CHKERRQ(ierr); /* Print PetscFunctionLists that have not been properly freed ierr = PetscFunctionListPrintAll();CHKERRQ(ierr); */ if (petsc_history) { ierr = PetscCloseHistoryFile(&petsc_history);CHKERRQ(ierr); petsc_history = 0; } ierr = PetscInfoAllow(PETSC_FALSE,NULL);CHKERRQ(ierr); { char fname[PETSC_MAX_PATH_LEN]; FILE *fd; int err; fname[0] = 0; ierr = PetscOptionsGetString(NULL,"-malloc_dump",fname,250,&flg1);CHKERRQ(ierr); flg2 = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-malloc_test",&flg2,NULL);CHKERRQ(ierr); #if defined(PETSC_USE_DEBUG) if (PETSC_RUNNING_ON_VALGRIND) flg2 = PETSC_FALSE; #else flg2 = PETSC_FALSE; /* Skip reporting for optimized builds regardless of -malloc_test */ #endif if (flg1 && fname[0]) { char sname[PETSC_MAX_PATH_LEN]; sprintf(sname,"%s_%d",fname,rank); fd = fopen(sname,"w"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open log file: %s",sname); ierr = PetscMallocDump(fd);CHKERRQ(ierr); err = fclose(fd); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file"); } else if (flg1 || flg2) { MPI_Comm local_comm; ierr = MPI_Comm_dup(MPI_COMM_WORLD,&local_comm);CHKERRQ(ierr); ierr = PetscSequentialPhaseBegin_Private(local_comm,1);CHKERRQ(ierr); ierr = PetscMallocDump(stdout);CHKERRQ(ierr); ierr = PetscSequentialPhaseEnd_Private(local_comm,1);CHKERRQ(ierr); ierr = MPI_Comm_free(&local_comm);CHKERRQ(ierr); } } { char fname[PETSC_MAX_PATH_LEN]; FILE *fd = NULL; fname[0] = 0; ierr = PetscOptionsGetString(NULL,"-malloc_log",fname,250,&flg1);CHKERRQ(ierr); ierr = PetscOptionsHasName(NULL,"-malloc_log_threshold",&flg2);CHKERRQ(ierr); if (flg1 && fname[0]) { int err; if (!rank) { fd = fopen(fname,"w"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open log file: %s",fname); } ierr = PetscMallocDumpLog(fd);CHKERRQ(ierr); if (fd) { err = fclose(fd); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file"); } } else if (flg1 || flg2) { ierr = PetscMallocDumpLog(stdout);CHKERRQ(ierr); } } /* Can be destroyed only after all the options are used */ ierr = PetscOptionsDestroy();CHKERRQ(ierr); PetscGlobalArgc = 0; PetscGlobalArgs = 0; #if defined(PETSC_USE_REAL___FLOAT128) ierr = MPI_Type_free(&MPIU___FLOAT128);CHKERRQ(ierr); #if defined(PETSC_HAVE_COMPLEX) ierr = MPI_Type_free(&MPIU___COMPLEX128);CHKERRQ(ierr); #endif ierr = MPI_Op_free(&MPIU_MAX);CHKERRQ(ierr); ierr = MPI_Op_free(&MPIU_MIN);CHKERRQ(ierr); #endif #if defined(PETSC_HAVE_COMPLEX) #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX) ierr = MPI_Type_free(&MPIU_C_DOUBLE_COMPLEX);CHKERRQ(ierr); ierr = MPI_Type_free(&MPIU_C_COMPLEX);CHKERRQ(ierr); #endif #endif #if (defined(PETSC_HAVE_COMPLEX) && !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)) || defined(PETSC_USE_REAL___FLOAT128) ierr = MPI_Op_free(&MPIU_SUM);CHKERRQ(ierr); #endif ierr = MPI_Type_free(&MPIU_2SCALAR);CHKERRQ(ierr); #if defined(PETSC_USE_64BIT_INDICES) || !defined(MPI_2INT) ierr = MPI_Type_free(&MPIU_2INT);CHKERRQ(ierr); #endif ierr = MPI_Op_free(&PetscMaxSum_Op);CHKERRQ(ierr); ierr = MPI_Op_free(&PetscADMax_Op);CHKERRQ(ierr); ierr = MPI_Op_free(&PetscADMin_Op);CHKERRQ(ierr); /* Destroy any known inner MPI_Comm's and attributes pointing to them Note this will not destroy any new communicators the user has created. If all PETSc objects were not destroyed those left over objects will have hanging references to the MPI_Comms that were freed; but that is ok because those PETSc objects will never be used again */ { PetscCommCounter *counter; PetscMPIInt flg; MPI_Comm icomm; void *ptr; ierr = MPI_Attr_get(PETSC_COMM_SELF,Petsc_InnerComm_keyval,&ptr,&flg);CHKERRQ(ierr); if (flg) { /* Use PetscMemcpy() because casting from pointer to integer of different size is not allowed with some compilers */ ierr = PetscMemcpy(&icomm,&ptr,sizeof(MPI_Comm));CHKERRQ(ierr); ierr = MPI_Attr_get(icomm,Petsc_Counter_keyval,&counter,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Inner MPI_Comm does not have expected tag/name counter, problem with corrupted memory"); ierr = MPI_Attr_delete(PETSC_COMM_SELF,Petsc_InnerComm_keyval);CHKERRQ(ierr); ierr = MPI_Attr_delete(icomm,Petsc_Counter_keyval);CHKERRQ(ierr); ierr = MPI_Comm_free(&icomm);CHKERRQ(ierr); } ierr = MPI_Attr_get(PETSC_COMM_WORLD,Petsc_InnerComm_keyval,&ptr,&flg);CHKERRQ(ierr); if (flg) { /* Use PetscMemcpy() because casting from pointer to integer of different size is not allowed with some compilers */ ierr = PetscMemcpy(&icomm,&ptr,sizeof(MPI_Comm));CHKERRQ(ierr); ierr = MPI_Attr_get(icomm,Petsc_Counter_keyval,&counter,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_ARG_CORRUPT,"Inner MPI_Comm does not have expected tag/name counter, problem with corrupted memory"); ierr = MPI_Attr_delete(PETSC_COMM_WORLD,Petsc_InnerComm_keyval);CHKERRQ(ierr); ierr = MPI_Attr_delete(icomm,Petsc_Counter_keyval);CHKERRQ(ierr); ierr = MPI_Comm_free(&icomm);CHKERRQ(ierr); } } ierr = MPI_Keyval_free(&Petsc_Counter_keyval);CHKERRQ(ierr); ierr = MPI_Keyval_free(&Petsc_InnerComm_keyval);CHKERRQ(ierr); ierr = MPI_Keyval_free(&Petsc_OuterComm_keyval);CHKERRQ(ierr); #if defined(PETSC_HAVE_CUDA) { PetscInt p; for (p = 0; p < PetscGlobalSize; ++p) { if (p == PetscGlobalRank) cublasShutdown(); ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRQ(ierr); } } #endif if (PetscBeganMPI) { #if defined(PETSC_HAVE_MPI_FINALIZED) PetscMPIInt flag; ierr = MPI_Finalized(&flag);CHKERRQ(ierr); if (flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Finalize() has already been called, even though MPI_Init() was called by PetscInitialize()"); #endif ierr = MPI_Finalize();CHKERRQ(ierr); } /* Note: In certain cases PETSC_COMM_WORLD is never MPI_Comm_free()ed because the communicator has some outstanding requests on it. Specifically if the flag PETSC_HAVE_BROKEN_REQUEST_FREE is set (for IBM MPI implementation). See src/vec/utils/vpscat.c. Due to this the memory allocated in PetscCommDuplicate() is never freed as it should be. Thus one may obtain messages of the form [ 1] 8 bytes PetscCommDuplicate() line 645 in src/sys/mpiu.c indicating the memory was not freed. */ ierr = PetscMallocClear();CHKERRQ(ierr); PetscInitializeCalled = PETSC_FALSE; PetscFinalizeCalled = PETSC_TRUE; PetscFunctionReturn(ierr); } #if defined(PETSC_MISSING_LAPACK_lsame_) PETSC_EXTERN int lsame_(char *a,char *b) { if (*a == *b) return 1; if (*a + 32 == *b) return 1; if (*a - 32 == *b) return 1; return 0; } #endif #if defined(PETSC_MISSING_LAPACK_lsame) PETSC_EXTERN int lsame(char *a,char *b) { if (*a == *b) return 1; if (*a + 32 == *b) return 1; if (*a - 32 == *b) return 1; return 0; }