/* FormFunctionGradient - Evaluates function and corresponding gradient. Input Parameters: . tao - the Tao context . X - input vector . userCtx - optional user-defined context, as set by TaoSetFunctionGradient() Output Parameters: . fcn - the newly evaluated function . G - vector containing the newly evaluated gradient */ PetscErrorCode FormFunctionGradient(Tao tao,Vec X,PetscReal *fcn,Vec G,void *userCtx) { AppCtx *user = (AppCtx *) userCtx; PetscErrorCode ierr; PetscInt i,j,row; PetscInt mx=user->mx, my=user->my; PetscReal rhx=mx+1, rhy=my+1; PetscReal hx=1.0/(mx+1),hy=1.0/(my+1), hydhx=hy/hx, hxdhy=hx/hy, area=0.5*hx*hy, ft=0; PetscReal f1,f2,f3,f4,f5,f6,d1,d2,d3,d4,d5,d6,d7,d8,xc,xl,xr,xt,xb,xlt,xrb; PetscReal df1dxc,df2dxc,df3dxc,df4dxc,df5dxc,df6dxc; PetscReal zero=0.0; PetscReal *g, *x; ierr = VecSet(G, zero);CHKERRQ(ierr); ierr = VecGetArray(X,&x);CHKERRQ(ierr); ierr = VecGetArray(G,&g);CHKERRQ(ierr); /* Compute function over the locally owned part of the mesh */ for (j=0; j<my; j++){ for (i=0; i< mx; i++){ row=(j)*mx + (i); xc = x[row]; xlt=xrb=xl=xr=xb=xt=xc; if (i==0){ /* left side */ xl= user->left[j+1]; xlt = user->left[j+2]; } else { xl = x[row-1]; } if (j==0){ /* bottom side */ xb=user->bottom[i+1]; xrb = user->bottom[i+2]; } else { xb = x[row-mx]; } if (i+1 == mx){ /* right side */ xr=user->right[j+1]; xrb = user->right[j]; } else { xr = x[row+1]; } if (j+1==0+my){ /* top side */ xt=user->top[i+1]; xlt = user->top[i]; }else { xt = x[row+mx]; } if (i>0 && j+1<my){ xlt = x[row-1+mx]; } if (j>0 && i+1<mx){ xrb = x[row+1-mx]; } d1 = (xc-xl); d2 = (xc-xr); d3 = (xc-xt); d4 = (xc-xb); d5 = (xr-xrb); d6 = (xrb-xb); d7 = (xlt-xl); d8 = (xt-xlt); df1dxc = d1*hydhx; df2dxc = ( d1*hydhx + d4*hxdhy ); df3dxc = d3*hxdhy; df4dxc = ( d2*hydhx + d3*hxdhy ); df5dxc = d2*hydhx; df6dxc = d4*hxdhy; d1 *= rhx; d2 *= rhx; d3 *= rhy; d4 *= rhy; d5 *= rhy; d6 *= rhx; d7 *= rhy; d8 *= rhx; f1 = PetscSqrtScalar( 1.0 + d1*d1 + d7*d7); f2 = PetscSqrtScalar( 1.0 + d1*d1 + d4*d4); f3 = PetscSqrtScalar( 1.0 + d3*d3 + d8*d8); f4 = PetscSqrtScalar( 1.0 + d3*d3 + d2*d2); f5 = PetscSqrtScalar( 1.0 + d2*d2 + d5*d5); f6 = PetscSqrtScalar( 1.0 + d4*d4 + d6*d6); ft = ft + (f2 + f4); df1dxc /= f1; df2dxc /= f2; df3dxc /= f3; df4dxc /= f4; df5dxc /= f5; df6dxc /= f6; g[row] = (df1dxc+df2dxc+df3dxc+df4dxc+df5dxc+df6dxc )/2.0; } } for (j=0; j<my; j++){ /* left side */ d3=(user->left[j+1] - user->left[j+2])*rhy; d2=(user->left[j+1] - x[j*mx])*rhx; ft = ft+PetscSqrtScalar( 1.0 + d3*d3 + d2*d2); } for (i=0; i<mx; i++){ /* bottom */ d2=(user->bottom[i+1]-user->bottom[i+2])*rhx; d3=(user->bottom[i+1]-x[i])*rhy; ft = ft+PetscSqrtScalar( 1.0 + d3*d3 + d2*d2); } for (j=0; j< my; j++){ /* right side */ d1=(x[(j+1)*mx-1]-user->right[j+1])*rhx; d4=(user->right[j]-user->right[j+1])*rhy; ft = ft+PetscSqrtScalar( 1.0 + d1*d1 + d4*d4); } for (i=0; i<mx; i++){ /* top side */ d1=(x[(my-1)*mx + i] - user->top[i+1])*rhy; d4=(user->top[i+1] - user->top[i])*rhx; ft = ft+PetscSqrtScalar( 1.0 + d1*d1 + d4*d4); } /* Bottom left corner */ d1=(user->left[0]-user->left[1])*rhy; d2=(user->bottom[0]-user->bottom[1])*rhx; ft +=PetscSqrtScalar( 1.0 + d1*d1 + d2*d2); /* Top right corner */ d1=(user->right[my+1] - user->right[my])*rhy; d2=(user->top[mx+1] - user->top[mx])*rhx; ft +=PetscSqrtScalar( 1.0 + d1*d1 + d2*d2); (*fcn)=ft*area; /* Restore vectors */ ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); ierr = VecRestoreArray(G,&g);CHKERRQ(ierr); ierr = PetscLogFlops(67*mx*my);CHKERRQ(ierr); return 0; }

int main(int argc,char **argv) { Vec p; PetscScalar *x_ptr; PetscErrorCode ierr; PetscMPIInt size; AppCtx ctx; Vec lowerb,upperb; Tao tao; TaoConvergedReason reason; KSP ksp; PC pc; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Initialize program - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ PetscInitialize(&argc,&argv,NULL,help); PetscFunctionBeginUser; ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size != 1) SETERRQ(PETSC_COMM_SELF,1,"This is a uniprocessor example only!"); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set runtime options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Swing equation options","");CHKERRQ(ierr); { ctx.beta = 2; ctx.c = 10000.0; ctx.u_s = 1.0; ctx.omega_s = 1.0; ctx.omega_b = 120.0*PETSC_PI; ctx.H = 5.0; ierr = PetscOptionsScalar("-Inertia","","",ctx.H,&ctx.H,NULL);CHKERRQ(ierr); ctx.D = 5.0; ierr = PetscOptionsScalar("-D","","",ctx.D,&ctx.D,NULL);CHKERRQ(ierr); ctx.E = 1.1378; ctx.V = 1.0; ctx.X = 0.545; ctx.Pmax = ctx.E*ctx.V/ctx.X;; ierr = PetscOptionsScalar("-Pmax","","",ctx.Pmax,&ctx.Pmax,NULL);CHKERRQ(ierr); ctx.Pm = 0.4; ierr = PetscOptionsScalar("-Pm","","",ctx.Pm,&ctx.Pm,NULL);CHKERRQ(ierr); ctx.tf = 0.1; ctx.tcl = 0.2; ierr = PetscOptionsReal("-tf","Time to start fault","",ctx.tf,&ctx.tf,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-tcl","Time to end fault","",ctx.tcl,&ctx.tcl,NULL);CHKERRQ(ierr); } ierr = PetscOptionsEnd();CHKERRQ(ierr); /* Create TAO solver and set desired solution method */ ierr = TaoCreate(PETSC_COMM_WORLD,&tao);CHKERRQ(ierr); ierr = TaoSetType(tao,TAOBLMVM);CHKERRQ(ierr); /* Optimization starts */ /* Set initial solution guess */ ierr = VecCreateSeq(PETSC_COMM_WORLD,1,&p);CHKERRQ(ierr); ierr = VecGetArray(p,&x_ptr);CHKERRQ(ierr); x_ptr[0] = ctx.Pm; ierr = VecRestoreArray(p,&x_ptr);CHKERRQ(ierr); ierr = TaoSetInitialVector(tao,p);CHKERRQ(ierr); /* Set routine for function and gradient evaluation */ ierr = TaoSetObjectiveRoutine(tao,FormFunction,(void *)&ctx);CHKERRQ(ierr); ierr = TaoSetGradientRoutine(tao,TaoDefaultComputeGradient,(void *)&ctx);CHKERRQ(ierr); /* Set bounds for the optimization */ ierr = VecDuplicate(p,&lowerb);CHKERRQ(ierr); ierr = VecDuplicate(p,&upperb);CHKERRQ(ierr); ierr = VecGetArray(lowerb,&x_ptr);CHKERRQ(ierr); x_ptr[0] = 0.; ierr = VecRestoreArray(lowerb,&x_ptr);CHKERRQ(ierr); ierr = VecGetArray(upperb,&x_ptr);CHKERRQ(ierr); x_ptr[0] = 1.1;; ierr = VecRestoreArray(upperb,&x_ptr);CHKERRQ(ierr); ierr = TaoSetVariableBounds(tao,lowerb,upperb); /* Check for any TAO command line options */ ierr = TaoSetFromOptions(tao);CHKERRQ(ierr); ierr = TaoGetKSP(tao,&ksp);CHKERRQ(ierr); if (ksp) { ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr); ierr = PCSetType(pc,PCNONE);CHKERRQ(ierr); } ierr = TaoSetTolerances(tao,1e-15,1e-15,1e-15,1e-15,1e-15); /* SOLVE THE APPLICATION */ ierr = TaoSolve(tao); CHKERRQ(ierr); /* Get information on termination */ ierr = TaoGetConvergedReason(tao,&reason);CHKERRQ(ierr); if (reason <= 0){ ierr=PetscPrintf(MPI_COMM_WORLD, "Try another method! \n");CHKERRQ(ierr); } ierr = VecView(p,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* Free TAO data structures */ ierr = TaoDestroy(&tao);CHKERRQ(ierr); ierr = VecDestroy(&p);CHKERRQ(ierr); ierr = VecDestroy(&lowerb);CHKERRQ(ierr); ierr = VecDestroy(&upperb);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }

int main(int argc,char **argv) { SNES snes; /* nonlinear solver context */ KSP ksp; /* linear solver context */ PC pc; /* preconditioner context */ Vec x,r; /* solution, residual vectors */ Mat J; /* Jacobian matrix */ PetscErrorCode ierr; PetscInt its; PetscMPIInt size,rank; PetscScalar pfive = .5,*xx; PetscBool flg; AppCtx user; /* user-defined work context */ IS isglobal,islocal; PetscInitialize(&argc,&argv,(char *)0,help); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create nonlinear solver context - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create matrix and vector data structures; set corresponding routines - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Create vectors for solution and nonlinear function */ ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,PETSC_DECIDE,2);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); ierr = VecDuplicate(x,&r);CHKERRQ(ierr); if (size > 1){ ierr = VecCreateSeq(PETSC_COMM_SELF,2,&user.xloc);CHKERRQ(ierr); ierr = VecDuplicate(user.xloc,&user.rloc);CHKERRQ(ierr); /* Create the scatter between the global x and local xloc */ ierr = ISCreateStride(MPI_COMM_SELF,2,0,1,&islocal);CHKERRQ(ierr); ierr = ISCreateStride(MPI_COMM_SELF,2,0,1,&isglobal);CHKERRQ(ierr); ierr = VecScatterCreate(x,isglobal,user.xloc,islocal,&user.scatter);CHKERRQ(ierr); ierr = ISDestroy(&isglobal);CHKERRQ(ierr); ierr = ISDestroy(&islocal);CHKERRQ(ierr); } /* Create Jacobian matrix data structure */ ierr = MatCreate(PETSC_COMM_WORLD,&J);CHKERRQ(ierr); ierr = MatSetSizes(J,PETSC_DECIDE,PETSC_DECIDE,2,2);CHKERRQ(ierr); ierr = MatSetFromOptions(J);CHKERRQ(ierr); ierr = MatSetUp(J);CHKERRQ(ierr); ierr = PetscOptionsHasName(PETSC_NULL,"-hard",&flg);CHKERRQ(ierr); if (!flg) { /* Set function evaluation routine and vector. */ ierr = SNESSetFunction(snes,r,FormFunction1,&user);CHKERRQ(ierr); /* Set Jacobian matrix data structure and Jacobian evaluation routine */ ierr = SNESSetJacobian(snes,J,J,FormJacobian1,PETSC_NULL);CHKERRQ(ierr); } else { if (size != 1) SETERRQ(PETSC_COMM_SELF,1,"This case is a uniprocessor example only!"); ierr = SNESSetFunction(snes,r,FormFunction2,PETSC_NULL);CHKERRQ(ierr); ierr = SNESSetJacobian(snes,J,J,FormJacobian2,PETSC_NULL);CHKERRQ(ierr); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Customize nonlinear solver; set runtime options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Set linear solver defaults for this problem. By extracting the KSP, KSP, and PC contexts from the SNES context, we can then directly call any KSP, KSP, and PC routines to set various options. */ ierr = SNESGetKSP(snes,&ksp);CHKERRQ(ierr); ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr); ierr = PCSetType(pc,PCNONE);CHKERRQ(ierr); ierr = KSPSetTolerances(ksp,1.e-4,PETSC_DEFAULT,PETSC_DEFAULT,20);CHKERRQ(ierr); /* Set SNES/KSP/KSP/PC runtime options, e.g., -snes_view -snes_monitor -ksp_type <ksp> -pc_type <pc> These options will override those specified above as long as SNESSetFromOptions() is called _after_ any other customization routines. */ ierr = SNESSetFromOptions(snes);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Evaluate initial guess; then solve nonlinear system - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ if (!flg) { ierr = VecSet(x,pfive);CHKERRQ(ierr); } else { ierr = VecGetArray(x,&xx);CHKERRQ(ierr); xx[0] = 2.0; xx[1] = 3.0; ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); } /* Note: The user should initialize the vector, x, with the initial guess for the nonlinear solver prior to calling SNESSolve(). In particular, to employ an initial guess of zero, the user should explicitly set this vector to zero by calling VecSet(). */ ierr = SNESSolve(snes,PETSC_NULL,x);CHKERRQ(ierr); ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr); if (flg) { Vec f; ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = SNESGetFunction(snes,&f,0,0);CHKERRQ(ierr); ierr = VecView(r,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } ierr = PetscPrintf(PETSC_COMM_WORLD,"number of SNES iterations = %D\n",its);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Free work space. All PETSc objects should be destroyed when they are no longer needed. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&r);CHKERRQ(ierr); ierr = MatDestroy(&J);CHKERRQ(ierr); ierr = SNESDestroy(&snes);CHKERRQ(ierr); if (size > 1){ ierr = VecDestroy(&user.xloc);CHKERRQ(ierr); ierr = VecDestroy(&user.rloc);CHKERRQ(ierr); ierr = VecScatterDestroy(&user.scatter);CHKERRQ(ierr); } ierr = PetscFinalize(); return 0; }

int main(int argc, char **argv) { PetscInitialize(&argc, &argv, "wave.opt", help); int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); double startTime, endTime; int Ns = 32; unsigned int dof = 1; // double dtratio = 1.0; DA da; // Underlying DA Vec rho; // density - elemental scalar Vec nu; // Lame parameter - lambda - elemental scalar std::vector < std::vector<Vec> > fBasis; // the scalar activation - nodal scalar // std::vector<Vec> truth; // the ground truth. // Initial conditions Vec initialDisplacement; Vec initialVelocity; timeInfo ti; // get Ns CHKERRQ ( PetscOptionsGetInt(0,"-Ns", &Ns,0) ); double t0 = 0.0; double dt = 1.0/(Ns); double t1 = 1.0; double nuVal = 1.0; double beta = 0.0001; int numParams = 5; CHKERRQ ( PetscOptionsGetInt(0,"-nump",&numParams,0) ); CHKERRQ ( PetscOptionsGetScalar(0,"-t0",&t0,0) ); CHKERRQ ( PetscOptionsGetScalar(0,"-t1",&t1,0) ); CHKERRQ ( PetscOptionsGetScalar(0,"-dt",&dt,0) ); CHKERRQ ( PetscOptionsGetScalar(0,"-nu",&nuVal,0) ); CHKERRQ ( PetscOptionsGetScalar(0,"-beta",&beta,0) ); // CHKERRQ ( PetscOptionsGetString(PETSC_NULL, "-pn", problemName, PETSC_MAX_PATH_LEN-1, PETSC_NULL)); // Time info for timestepping ti.start = t0; ti.stop = t1; ti.step = dt; if (!rank) { std::cout << "Problem size is " << Ns+1 << " spatially and NT = " << (int)ceil(1.0/dt) << std::endl << std::endl; std::cout << "Number of parameters is " << numParams << std::endl; } // create DA CHKERRQ ( DACreate3d ( PETSC_COMM_WORLD, DA_NONPERIODIC, DA_STENCIL_BOX, Ns+1, Ns+1, Ns+1, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, 1, 1, 0, 0, 0, &da) ); massMatrix *Mass = new massMatrix(feMat::PETSC); // Mass Matrix stiffnessMatrix *Stiffness = new stiffnessMatrix(feMat::PETSC); // Stiffness matrix waveDamping *Damping = new waveDamping(feMat::PETSC); // Damping Matrix fdynamicVector *Force = new fdynamicVector(feVec::PETSC); // Force Vector // create vectors CHKERRQ( DACreateGlobalVector(da, &rho) ); CHKERRQ( DACreateGlobalVector(da, &nu) ); CHKERRQ( DACreateGlobalVector(da, &initialDisplacement) ); CHKERRQ( DACreateGlobalVector(da, &initialVelocity) ); // Set initial conditions CHKERRQ( VecSet ( initialDisplacement, 0.0) ); CHKERRQ( VecSet ( initialVelocity, 0.0) ); VecZeroEntries( nu ); VecZeroEntries( rho ); CHKERRQ( VecSet ( nu, nuVal) ); CHKERRQ( VecSet ( rho, 1.0) ); int x, y, z, m, n, p; int mx,my,mz; CHKERRQ( DAGetCorners(da, &x, &y, &z, &m, &n, &p) ); CHKERRQ( DAGetInfo(da,0, &mx, &my, &mz, 0,0,0,0,0,0,0) ); double acx,acy,acz; double hx = 1.0/((double)Ns); // allocate for temporary buffers ... // unsigned int elemSize = Ns*Ns*Ns; // std::cout << "Elem size is " << elemSize << std::endl; // unsigned int nodeSize = (Ns+1)*(Ns+1)*(Ns+1); // Now set the activation ... unsigned int numSteps = (unsigned int)(ceil(( ti.stop - ti.start)/ti.step)); // Vec tauVec; // PetscScalar ***tauArray; unsigned int paramTimeSteps = (unsigned int)(ceil(( (double)(numSteps))/ ((double)(2*numParams)) )); /* for (int b=0; b<numParams; b++) { std::vector<Vec> tau; unsigned int tBegin = paramTimeSteps*b; unsigned int tEnd = tBegin + numSteps/2; // paramTimeSteps*(b+2); // std::cout << "For param " << b << ": Time step range is " << tBegin << " -> " << tEnd << std::endl; for (unsigned int t=0; t<numSteps+1; t++) { double newTime = (dt*(t-tBegin)*numSteps)/((double)(paramTimeSteps)); // double fff = 0.0; CHKERRQ( DACreateGlobalVector(da, &tauVec) ); CHKERRQ( VecSet( tauVec, 0.0)); if ( (t>=tBegin) && (t<=tEnd)) { CHKERRQ(DAVecGetArray(da, tauVec, &tauArray)); for (int k = z; k < z + p ; k++) { for (int j = y; j < y + n; j++) { for (int i = x; i < x + m; i++) { acx = (i)*hx; acy = (j)*hx; acz = (k)*hx; tauArray[k][j][i] = sin(M_PI*newTime)*cos(2*M_PI*acx)*cos(2*M_PI*acy)*cos(2*M_PI*acz); } } } CHKERRQ( DAVecRestoreArray ( da, tauVec, &tauArray ) ); } tau.push_back(tauVec); } fBasis.push_back(tau); } */ // std::cout << "Finished setting basis" << std::endl; /* // Set initial velocity ... CHKERRQ(DAVecGetArray(da, initialVelocity, &solArray)); for (int k = z; k < z + p ; k++) { for (int j = y; j < y + n; j++) { for (int i = x; i < x + m; i++) { acx = (i)*hx; acy = (j)*hx; acz = (k)*hx; solArray[k][j][i] = M_PI*cos(2*M_PI*acx)*cos(2*M_PI*acy)*cos(2*M_PI*acz); } } } CHKERRQ( DAVecRestoreArray ( da, initialVelocity, &solArray ) ); */ std::vector<Vec> newF; Vec alpha; PetscScalar *avec; VecCreateSeq(PETSC_COMM_SELF, numParams, &alpha); /* VecCreate(PETSC_COMM_WORLD, &alpha); VecSetSizes(alpha, numParams, PETSC_DECIDE); VecSetFromOptions(alpha); */ VecGetArray(alpha, &avec); for (int j=0; j<numParams; j++) avec[j] = 0.5 + 0.5*j; VecRestoreArray(alpha, &avec); // getForces(alpha, fBasis, newF); getForces(alpha, newF, da, ti, numParams); // Setup Matrices and Force Vector ... Mass->setProblemDimensions(1.0, 1.0, 1.0); Mass->setDA(da); Mass->setDof(dof); Mass->setNuVec(rho); Stiffness->setProblemDimensions(1.0, 1.0, 1.0); Stiffness->setDA(da); Stiffness->setDof(dof); Stiffness->setNuVec(nu); Damping->setAlpha(0.0); Damping->setBeta(0.00075); Damping->setMassMatrix(Mass); Damping->setStiffnessMatrix(Stiffness); Damping->setDA(da); Damping->setDof(dof); // Force Vector Force->setProblemDimensions(1.0,1.0,1.0); Force->setDA(da); Force->setFDynamic(newF); Force->setTimeInfo(&ti); // Newmark time stepper ... newmark *ts = new newmark; ts->setMassMatrix(Mass); ts->setDampingMatrix(Damping); ts->setStiffnessMatrix(Stiffness); ts->damp(false); ts->setTimeFrames(1); ts->storeVec(true); ts->setAdjoint(false); ts->setForceVector(Force); ts->setInitialDisplacement(initialDisplacement); ts->setInitialVelocity(initialVelocity); ts->setTimeInfo(&ti); ts->setAdjoint(false); // set if adjoint or forward ts->init(); // initialize IMPORTANT // if (!rank) // std::cout << RED"Starting initial forward solve"NRM << std::endl; ts->solve();// solve // if (!rank) // std::cout << GRN"Finished with initial forward solve"NRM << std::endl; std::vector<Vec> solvec = ts->getSolution(); // Now lets check the error ... // Vec nr; // concatenateVecs(solvec, nr); // VecDestroy(nr); // VecDestroy(gt); // std::cout << std::endl; /************* * INVERSE * *************/ // True solution is tau ... we want to recover it. // The observations in this case are, solvec /* Set very initial guess for the inverse problem*/ // Now can clear memory ... /* for (int i=0; i<newF.size(); i++) { if (newF[i] != NULL) { VecDestroy(newF[i]); } } newF.clear(); for (int i=0; i<solvec.size(); i++) { if (solvec[i] != NULL) { VecDestroy(solvec[i]); } } solvec.clear(); ts->destroy(); VecDestroy(rho); VecDestroy(nu); VecDestroy(initialDisplacement); VecDestroy(initialVelocity); VecDestroy(alpha); DADestroy(da); PetscFinalize(); return 0; */ Vec gt, nr; concatenateVecs(solvec, gt); Vec guess; VecDuplicate(alpha, &guess); VecZeroEntries(guess); // VecDuplicate(guess, &Out); // VecZeroEntries(Out); // double norm; /* PetscRandom rctx; PetscRandomCreate(PETSC_COMM_WORLD, &rctx); PetscRandomSetFromOptions(rctx); VecSetRandom(guess, rctx); VecNorm(guess, NORM_2, &norm); PetscPrintf(0, "guess norm = %g\n", norm); */ // double errnorm; // double exsolnorm; // Inverse solver set up // std::cout << RED"Setting up Inverse Solver"NRM << std::endl; parametricWaveInverse* hyperInv = new parametricWaveInverse; // std::cout << GRN"Finished setting up Inverse Solver"NRM << std::endl; hyperInv->setTimeStepper(ts); // set the timestepper hyperInv->setForwardInitialConditions(initialDisplacement, initialVelocity); // std::cout << RED"Setting initial guess"NRM << std::endl; // hyperInv->setInitialGuess(truth);// set the initial guess hyperInv->setInitialGuess(guess);// set the initial guess // std::cout << GRN"Done setting initial guess"NRM << std::endl; hyperInv->setRegularizationParameter(beta); // set the regularization paramter hyperInv->setAdjoints(solvec); // set the data for the problem // hyperInv->setForceBasis(fBasis); hyperInv->setNumberOfParameter(numParams); // std::cout << RED"Initializing Inverse Solver"NRM << std::endl; hyperInv->init(); // initialize the inverse solver // if (!rank) // std::cout << RED"Starting Inverse Solve"NRM << std::endl; startTime = MPI_Wtime(); hyperInv->solve(); // solve endTime = MPI_Wtime(); // if (!rank) // std::cout << GRN"FINISHED HESSIAN SOLVE"NRM << std::endl; hyperInv->getCurrentControl(guess); // get the solution hyperInv->destroy(); /* for (int i=0; i<solvec.size(); i++) { if (solvec[i] != NULL) { VecDestroy(solvec[i]); } } solvec.clear(); */ // VecView(guess, 0); if (!rank) std::cout << std::endl << "Error Norms " << std::endl; Vec Err; double gtNorm, solNorm, errNorm; VecDuplicate(guess, &Err); VecWAXPY(Err, -1.0, guess, alpha); VecNorm(alpha, NORM_2, >Norm); VecNorm(guess, NORM_2, &solNorm); VecNorm(Err, NORM_2, &errNorm); if (!rank) { std::cout << "The norms are " << gtNorm << ", " << solNorm << ", " << errNorm << std::endl; std::cout << "Relative error is " << errNorm/gtNorm << std::endl; } // Now we shall do another forward solve ... getForces(guess, newF, da, ti, numParams); Force->setFDynamic(newF); ts->setInitialDisplacement(initialDisplacement); ts->setInitialVelocity(initialVelocity); ts->setAdjoint(false); ts->clearMonitor(); ts->solve(); std::vector<Vec> solvec2 = ts->getSolution(); ts->destroy(); concatenateVecs(solvec2, nr); // Now can clear memory ... for (int i=0; i<solvec2.size(); i++) { if (solvec2[i] != NULL) { VecDestroy(solvec2[i]); } } solvec2.clear(); // Now can clear memory ... for (int i=0; i<newF.size(); i++) { if (newF[i] != NULL) { VecDestroy(newF[i]); } } newF.clear(); /* for (unsigned int i=0; i<truth.size(); i++) { VecNorm(truth[i], NORM_2, >Norm); VecNorm(solvec[i], NORM_2, &solNorm); VecAXPY(solvec[i], -1.0, truth[i]); VecNorm(solvec[i], NORM_2, &errNorm); PetscPrintf(0, "Ground truth at timestep %d is %g, %g, %g\n", i, gtNorm, solNorm, errNorm); // PetscPrintf(0, "Relative Error at timestep %d is %g\n", i, errNorm/gtNorm); } */ VecNorm(gt, NORM_2, >Norm); VecAXPY(nr, -1.0, gt); VecNorm(nr, NORM_2, &errNorm); if (!rank) std::cout << "Total Relative error on state is " << errNorm/gtNorm << std::endl; if (!rank) std::cout << "Wall time is " << endTime - startTime << std::endl; VecDestroy(gt); VecDestroy(nr); VecDestroy(Err); VecDestroy(alpha); VecDestroy(guess); VecDestroy(rho); VecDestroy(nu); VecDestroy(initialDisplacement); VecDestroy(initialVelocity); DADestroy(da); PetscFinalize(); }

/* FormInitialGuess - Forms initial approximation. Input Parameters: user - user-defined application context X - vector Output Parameter: X - vector */ int FormInitialGuess(AppCtx *user,Vec X) { int i,j,row,mx,my,ierr; PetscReal one = 1.0,lambda,temp1,temp,hx,hy,hxdhy,hydhx,sc; PetscScalar *x; /* Process 0 has to wait for all other processes to get here before proceeding to write in the shared vector */ ierr = PetscBarrier((PetscObject)X);CHKERRQ(ierr); if (user->rank) { /* All the non-busy processors have to wait here for process 0 to finish evaluating the function; otherwise they will start using the vector values before they have been computed */ ierr = PetscBarrier((PetscObject)X);CHKERRQ(ierr); return 0; } mx = user->mx; my = user->my; lambda = user->param; hx = one/(PetscReal)(mx-1); hy = one/(PetscReal)(my-1); sc = hx*hy*lambda; hxdhy = hx/hy; hydhx = hy/hx; temp1 = lambda/(lambda + one); /* Get a pointer to vector data. - For default PETSc vectors, VecGetArray() returns a pointer to the data array. Otherwise, the routine is implementation dependent. - You MUST call VecRestoreArray() when you no longer need access to the array. */ ierr = VecGetArray(X,&x);CHKERRQ(ierr); /* Compute initial guess over the locally owned part of the grid */ #pragma arl(4) #pragma distinct (*x,*f) #pragma no side effects (sqrt) for (j=0; j<my; j++) { temp = (PetscReal)(PetscMin(j,my-j-1))*hy; for (i=0; i<mx; i++) { row = i + j*mx; if (i == 0 || j == 0 || i == mx-1 || j == my-1) { x[row] = 0.0; continue; } x[row] = temp1*sqrt(PetscMin((PetscReal)(PetscMin(i,mx-i-1))*hx,temp)); } } /* Restore vector */ ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); ierr = PetscBarrier((PetscObject)X);CHKERRQ(ierr); return 0; }

void Stokes_SLE_PenaltySolver_MakePenalty( Stokes_SLE_PenaltySolver* self, Stokes_SLE* sle, Vec* _penalty ) { Vec fVec = sle->fForceVec->vector, hVec = sle->hForceVec->vector, penalty, lambda; Mat kMat = sle->kStiffMat->matrix; FeMesh *mesh = sle->kStiffMat->rowVariable->feMesh; FeVariable *velField = sle->kStiffMat->rowVariable; SolutionVector* uVec = sle->uSolnVec; FeEquationNumber *eqNum = uVec->eqNum; IArray *inc; PetscScalar *lambdaVals, lambdaMin, *penaltyVals; int numDofs, numLocalElems, nodeCur, numLocalNodes, rank, eq; SolutionVector *solVec = sle->uSolnVec; double *velBackup; Vec vecBackup; int ii, jj, kk; MPI_Comm_rank( MPI_COMM_WORLD, &rank ); numDofs = Mesh_GetDimSize( mesh ); numLocalElems = FeMesh_GetElementLocalSize( mesh ); numLocalNodes = FeMesh_GetNodeLocalSize( mesh ); velBackup = (double*)malloc( numLocalNodes*numDofs*sizeof(double) ); for( ii = 0; ii < numLocalNodes; ii++ ) FeVariable_GetValueAtNode( velField, ii, velBackup + ii*numDofs ); VecDuplicate( hVec, &penalty ); VecGetArray( penalty, &penaltyVals ); VecDuplicate( fVec, &lambda ); MatGetDiagonal( kMat, lambda ); { PetscInt idx; PetscReal min, max; VecMin( lambda, &idx, &min ); VecMax( lambda, &idx, &max ); if( rank == 0 ) { printf( "LAMBDA RANGE:\n" ); printf( " MIN: %e\n", min ); printf( " MAX: %e\n", max ); } } vecBackup = solVec->vector; solVec->vector = lambda; SolutionVector_UpdateSolutionOntoNodes( solVec ); inc = IArray_New(); lambdaVals = (double*)malloc( numDofs*sizeof(double) ); for( ii = 0; ii < numLocalElems; ii++ ) { lambdaMin = DBL_MAX; FeMesh_GetElementNodes( mesh, ii, inc ); for( jj = 0; jj < inc->size; jj++ ) { nodeCur = inc->ptr[jj]; FeVariable_GetValueAtNode( velField, nodeCur, lambdaVals ); for( kk = 0; kk < numDofs; kk++ ) { eq = eqNum->mapNodeDof2Eq[nodeCur][kk]; if( eq == -1 ) continue; /* eq = *(int*)STreeMap_Map( eqNum->ownedMap, &eq ); VecGetValues( lambda, 1, &eq, &lambdaVal ); */ if( lambdaVals[kk] < 0.0 ) printf( "%g\n", lambdaVals[kk] ); if( lambdaVals[kk] < lambdaMin ) lambdaMin = lambdaVals[kk]; } } penaltyVals[ii] = lambdaMin; } if( lambdaVals ) free( lambdaVals ); Stg_Class_Delete( inc ); solVec->vector = vecBackup; for( ii = 0; ii < numLocalNodes; ii++ ) FeVariable_SetValueAtNode( velField, ii, velBackup + ii*numDofs ); if( velBackup ) free( velBackup ); FeVariable_SyncShadowValues( velField ); Stg_VecDestroy(&lambda ); VecRestoreArray( penalty, &penaltyVals ); VecAssemblyBegin( penalty ); VecAssemblyEnd( penalty ); { PetscInt idx; PetscReal min, max; VecMin( penalty, &idx, &min ); VecMax( penalty, &idx, &max ); if( rank == 0 ) { printf( "SEMI-PENALTY RANGE:\n" ); printf( " MIN: %e\n", min ); printf( " MAX: %e\n", max ); } } *_penalty = penalty; }

PetscErrorCode MatSolve_SeqSpooles(Mat A,Vec b,Vec x) { Mat_Spooles *lu = (Mat_Spooles*)A->spptr; PetscScalar *array; DenseMtx *mtxY, *mtxX ; PetscErrorCode ierr; PetscInt irow,neqns=A->cmap->n,nrow=A->rmap->n,*iv; #if defined(PETSC_USE_COMPLEX) double x_real,x_imag; #else double *entX; #endif PetscFunctionBegin; mtxY = DenseMtx_new(); DenseMtx_init(mtxY, lu->options.typeflag, 0, 0, nrow, 1, 1, nrow); /* column major */ ierr = VecGetArray(b,&array);CHKERRQ(ierr); if (lu->options.useQR) { /* copy b to mtxY */ for ( irow = 0 ; irow < nrow; irow++ ) #if !defined(PETSC_USE_COMPLEX) DenseMtx_setRealEntry(mtxY, irow, 0, *array++); #else DenseMtx_setComplexEntry(mtxY, irow, 0, PetscRealPart(array[irow]), PetscImaginaryPart(array[irow])); #endif } else { /* copy permuted b to mtxY */ iv = IV_entries(lu->oldToNewIV); for ( irow = 0 ; irow < nrow; irow++ ) #if !defined(PETSC_USE_COMPLEX) DenseMtx_setRealEntry(mtxY, *iv++, 0, *array++); #else DenseMtx_setComplexEntry(mtxY,*iv++,0,PetscRealPart(array[irow]),PetscImaginaryPart(array[irow])); #endif } ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); mtxX = DenseMtx_new(); DenseMtx_init(mtxX, lu->options.typeflag, 0, 0, neqns, 1, 1, neqns); if (lu->options.useQR) { FrontMtx_QR_solve(lu->frontmtx, lu->mtxA, mtxX, mtxY, lu->mtxmanager, lu->cpus, lu->options.msglvl, lu->options.msgFile); } else { FrontMtx_solve(lu->frontmtx, mtxX, mtxY, lu->mtxmanager, lu->cpus, lu->options.msglvl, lu->options.msgFile); } if ( lu->options.msglvl > 2 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n right hand side matrix after permutation");CHKERRQ(ierr); DenseMtx_writeForHumanEye(mtxY, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n solution matrix in new ordering");CHKERRQ(ierr); DenseMtx_writeForHumanEye(mtxX, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } /* permute solution into original ordering, then copy to x */ DenseMtx_permuteRows(mtxX, lu->newToOldIV); ierr = VecGetArray(x,&array);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) entX = DenseMtx_entries(mtxX); DVcopy(neqns, array, entX); #else for (irow=0; irow<nrow; irow++){ DenseMtx_complexEntry(mtxX,irow,0,&x_real,&x_imag); array[irow] = x_real+x_imag*PETSC_i; } #endif ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); /* free memory */ DenseMtx_free(mtxX); DenseMtx_free(mtxY); PetscFunctionReturn(0); }

int main(int argc,char **args) { typedef enum {RANDOM, CONSTANT, TANH, NUM_FUNCS} FuncType; const char *funcNames[NUM_FUNCS] = {"random", "constant", "tanh"}; PetscMPIInt size; int n = 10,N,Ny,ndim=4,i,dim[4],DIM; Vec x,y,z; PetscScalar s; PetscRandom rdm; PetscReal enorm; PetscInt func = RANDOM; FuncType function = RANDOM; PetscBool view = PETSC_FALSE; PetscErrorCode ierr; PetscScalar *x_array,*y_array,*z_array; fftw_plan fplan,bplan; ierr = PetscInitialize(&argc,&args,(char*)0,help);CHKERRQ(ierr); #if defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires real numbers"); #endif ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This is a uniprocessor example only!"); ierr = PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "FFTW Options", "ex142");CHKERRQ(ierr); ierr = PetscOptionsEList("-function", "Function type", "ex142", funcNames, NUM_FUNCS, funcNames[function], &func, NULL);CHKERRQ(ierr); ierr = PetscOptionsBool("-vec_view draw", "View the functions", "ex142", view, &view, NULL);CHKERRQ(ierr); function = (FuncType) func; ierr = PetscOptionsEnd();CHKERRQ(ierr); for (DIM = 0; DIM < ndim; DIM++) { dim[DIM] = n; /* size of real space vector in DIM-dimension */ } ierr = PetscRandomCreate(PETSC_COMM_SELF, &rdm);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr); for (DIM = 1; DIM < 5; DIM++) { /* create vectors of length N=dim[0]*dim[1]* ...*dim[DIM-1] */ /*----------------------------------------------------------*/ N = Ny = 1; for (i = 0; i < DIM-1; i++) { N *= dim[i]; } Ny = N; Ny *= 2*(dim[DIM-1]/2 + 1); /* add padding elements to output vector y */ N *= dim[DIM-1]; ierr = PetscPrintf(PETSC_COMM_SELF, "\n %d-D: FFTW on vector of size %d \n",DIM,N);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) x, "Real space vector");CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,Ny,&y);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr); ierr = VecDuplicate(x,&z);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr); /* Set fftw plan */ /*----------------------------------*/ ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); ierr = VecGetArray(z,&z_array);CHKERRQ(ierr); unsigned int flags = FFTW_ESTIMATE; /*or FFTW_MEASURE */ /* The data in the in/out arrays is overwritten during FFTW_MEASURE planning, so such planning should be done before the input is initialized by the user. */ ierr = PetscPrintf(PETSC_COMM_SELF,"DIM: %d, N %d, Ny %d\n",DIM,N,Ny);CHKERRQ(ierr); switch (DIM) { case 1: fplan = fftw_plan_dft_r2c_1d(dim[0], (double*)x_array, (fftw_complex*)y_array, flags); bplan = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex*)y_array, (double*)z_array, flags); break; case 2: fplan = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array, (fftw_complex*)y_array,flags); bplan = fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)y_array,(double*)z_array,flags); break; case 3: fplan = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array, (fftw_complex*)y_array,flags); bplan = fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)y_array,(double*)z_array,flags); break; default: fplan = fftw_plan_dft_r2c(DIM,(int*)dim,(double*)x_array, (fftw_complex*)y_array,flags); bplan = fftw_plan_dft_c2r(DIM,(int*)dim,(fftw_complex*)y_array,(double*)z_array,flags); break; } ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); ierr = VecRestoreArray(z,&z_array);CHKERRQ(ierr); /* Initialize Real space vector x: The data in the in/out arrays is overwritten during FFTW_MEASURE planning, so planning should be done before the input is initialized by the user. --------------------------------------------------------*/ if (function == RANDOM) { ierr = VecSetRandom(x, rdm);CHKERRQ(ierr); } else if (function == CONSTANT) { ierr = VecSet(x, 1.0);CHKERRQ(ierr); } else if (function == TANH) { ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); for (i = 0; i < N; ++i) { x_array[i] = tanh((i - N/2.0)*(10.0/N)); } ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); } if (view) { ierr = VecView(x, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* FFT - also test repeated transformation */ /*-------------------------------------------*/ ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); ierr = VecGetArray(z,&z_array);CHKERRQ(ierr); for (i=0; i<4; i++) { /* FFTW_FORWARD */ fftw_execute(fplan); /* FFTW_BACKWARD: destroys its input array 'y_array' even for out-of-place transforms! */ fftw_execute(bplan); } ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); ierr = VecRestoreArray(z,&z_array);CHKERRQ(ierr); /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ /*------------------------------------------------------------------*/ s = 1.0/(PetscReal)N; ierr = VecScale(z,s);CHKERRQ(ierr); if (view) {ierr = VecView(x, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);} if (view) {ierr = VecView(z, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);} ierr = VecAXPY(z,-1.0,x);CHKERRQ(ierr); ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr); if (enorm > 1.e-11) { ierr = PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %g\n",(double)enorm);CHKERRQ(ierr); } /* free spaces */ fftw_destroy_plan(fplan); fftw_destroy_plan(bplan); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = VecDestroy(&z);CHKERRQ(ierr); } ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }

PetscErrorCode SetInitialGuess(Vec X,AppCtx *user) { PetscErrorCode ierr; PetscInt nele,nen,n,i; const PetscInt *ele; Vec coords, rand1, rand2; const PetscScalar *_coords; PetscScalar x[3],y[3]; PetscInt idx[3]; PetscScalar *xx,*w1,*w2,*u1,*u2,*u3; PetscViewer view_out; PetscFunctionBeginUser; /* Get ghosted coordinates */ ierr = DMGetCoordinatesLocal(user->da,&coords);CHKERRQ(ierr); ierr = VecDuplicate(user->u1,&rand1); ierr = VecDuplicate(user->u1,&rand2); ierr = VecSetRandom(rand1,NULL); ierr = VecSetRandom(rand2,NULL); ierr = VecGetLocalSize(X,&n);CHKERRQ(ierr); ierr = VecGetArrayRead(coords,&_coords);CHKERRQ(ierr); ierr = VecGetArray(X,&xx);CHKERRQ(ierr); ierr = VecGetArray(user->work1,&w1); ierr = VecGetArray(user->work2,&w2); ierr = VecGetArray(user->u1,&u1); ierr = VecGetArray(user->u2,&u2); ierr = VecGetArray(user->u3,&u3); /* Get local element info */ ierr = DMDAGetElements(user->da,&nele,&nen,&ele);CHKERRQ(ierr); for (i=0; i < nele; i++) { idx[0] = ele[3*i]; idx[1] = ele[3*i+1]; idx[2] = ele[3*i+2]; x[0] = _coords[2*idx[0]]; y[0] = _coords[2*idx[0]+1]; x[1] = _coords[2*idx[1]]; y[1] = _coords[2*idx[1]+1]; x[2] = _coords[2*idx[2]]; y[2] = _coords[2*idx[2]+1]; PetscScalar vals1[3],vals2[3],valsrand[3]; PetscInt r; for (r=0; r<3; r++) { valsrand[r]=5*x[r]*(1-x[r])*y[r]*(1-y[r]); if (x[r]>=0.5 && y[r]>=0.5) { vals1[r]=0.75; vals2[r]=0.0; } if (x[r]>=0.5 && y[r]<0.5) { vals1[r]=0.0; vals2[r]=0.0; } if (x[r]<0.5 && y[r]>=0.5) { vals1[r]=0.0; vals2[r]=0.75; } if (x[r]<0.5 && y[r]<0.5) { vals1[r]=0.75; vals2[r]=0.0; } } ierr = VecSetValues(user->work1,3,idx,vals1,INSERT_VALUES);CHKERRQ(ierr); ierr = VecSetValues(user->work2,3,idx,vals2,INSERT_VALUES);CHKERRQ(ierr); ierr = VecSetValues(user->work3,3,idx,valsrand,INSERT_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(user->work1);CHKERRQ(ierr); ierr = VecAssemblyEnd(user->work1);CHKERRQ(ierr); ierr = VecAssemblyBegin(user->work2);CHKERRQ(ierr); ierr = VecAssemblyEnd(user->work2);CHKERRQ(ierr); ierr = VecAssemblyBegin(user->work3);CHKERRQ(ierr); ierr = VecAssemblyEnd(user->work3);CHKERRQ(ierr); ierr = VecAXPY(user->work1,1.0,user->work3);CHKERRQ(ierr); ierr = VecAXPY(user->work2,1.0,user->work3);CHKERRQ(ierr); for (i=0; i<n/4; i++) { xx[4*i] = w1[i]; if (xx[4*i]>1) xx[4*i]=1; xx[4*i+1] = w2[i]; if (xx[4*i+1]>1) xx[4*i+1]=1; if (xx[4*i]+xx[4*i+1]>1) xx[4*i+1] = 1.0 - xx[4*i]; xx[4*i+2] = 1.0 - xx[4*i] - xx[4*i+1]; xx[4*i+3] = 0.0; u1[i] = xx[4*i]; u2[i] = xx[4*i+1]; u3[i] = xx[4*i+2]; } ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,"file_initial",FILE_MODE_WRITE,&view_out);CHKERRQ(ierr); ierr = VecView(user->u1,view_out);CHKERRQ(ierr); ierr = VecView(user->u2,view_out);CHKERRQ(ierr); ierr = VecView(user->u3,view_out);CHKERRQ(ierr); PetscViewerDestroy(&view_out); ierr = DMDARestoreElements(user->da,&nele,&nen,&ele);CHKERRQ(ierr); ierr = VecRestoreArrayRead(coords,&_coords);CHKERRQ(ierr); ierr = VecRestoreArray(X,&xx);CHKERRQ(ierr); ierr = VecRestoreArray(user->work2,&w1);CHKERRQ(ierr); ierr = VecRestoreArray(user->work4,&w2);CHKERRQ(ierr); ierr = VecRestoreArray(user->u1,&u1);CHKERRQ(ierr); ierr = VecRestoreArray(user->u2,&u2);CHKERRQ(ierr); ierr = VecRestoreArray(user->u3,&u3);CHKERRQ(ierr); ierr = VecDestroy(&rand1);CHKERRQ(ierr); ierr = VecDestroy(&rand2);CHKERRQ(ierr); PetscFunctionReturn(0); }

int main(int argc,char **argv) { PetscErrorCode ierr; PetscMPIInt rank,size; PetscInt rstart,rend,i,k,N,numPoints=1000000; PetscScalar dummy,result=0,h=1.0/numPoints,*xarray; Vec x,xend; ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); /* Create a parallel vector. Here we set up our x vector which will be given values below. The xend vector is a dummy vector to find the value of the elements at the endpoints for use in the trapezoid rule. */ ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,PETSC_DECIDE,numPoints);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); ierr = VecGetSize(x,&N);CHKERRQ(ierr); ierr = VecSet(x,result);CHKERRQ(ierr); ierr = VecDuplicate(x,&xend);CHKERRQ(ierr); result = 0.5; if (!rank) { i = 0; ierr = VecSetValues(xend,1,&i,&result,INSERT_VALUES);CHKERRQ(ierr); } if (rank == size-1) { i = N-1; ierr = VecSetValues(xend,1,&i,&result,INSERT_VALUES);CHKERRQ(ierr); } /* Assemble vector, using the 2-step process: VecAssemblyBegin(), VecAssemblyEnd() Computations can be done while messages are in transition by placing code between these two statements. */ ierr = VecAssemblyBegin(xend);CHKERRQ(ierr); ierr = VecAssemblyEnd(xend);CHKERRQ(ierr); /* Set the x vector elements. i*h will return 0 for i=0 and 1 for i=N-1. The function evaluated (2x/(1+x^2)) is defined above. Each evaluation is put into the local array of the vector without message passing. */ ierr = VecGetOwnershipRange(x,&rstart,&rend);CHKERRQ(ierr); ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); k = 0; for (i=rstart; i<rend; i++) { xarray[k] = i*h; xarray[k] = func(xarray[k]); k++; } ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); /* Evaluates the integral. First the sum of all the points is taken. That result is multiplied by the step size for the trapezoid rule. Then half the value at each endpoint is subtracted, this is part of the composite trapezoid rule. */ ierr = VecSum(x,&result);CHKERRQ(ierr); result = result*h; ierr = VecDot(x,xend,&dummy);CHKERRQ(ierr); result = result-h*dummy; /* Return the value of the integral. */ ierr = PetscPrintf(PETSC_COMM_WORLD,"ln(2) is %g\n",(double)result);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&xend);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }

static PetscErrorCode PCSetUp_SVD(PC pc) { #if defined(PETSC_MISSING_LAPACK_GESVD) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable\nNot able to provide singular value estimates."); #else PC_SVD *jac = (PC_SVD*)pc->data; PetscErrorCode ierr; PetscScalar *a,*u,*v,*d,*work; PetscBLASInt nb,lwork; PetscInt i,n; PetscMPIInt size; PetscFunctionBegin; ierr = MatDestroy(&jac->A);CHKERRQ(ierr); ierr = MPI_Comm_size(((PetscObject)pc->pmat)->comm,&size);CHKERRQ(ierr); if (size > 1) { Mat redmat; PetscInt M; ierr = MatGetSize(pc->pmat,&M,NULL);CHKERRQ(ierr); ierr = MatGetRedundantMatrix(pc->pmat,size,PETSC_COMM_SELF,M,MAT_INITIAL_MATRIX,&redmat);CHKERRQ(ierr); ierr = MatConvert(redmat,MATSEQDENSE,MAT_INITIAL_MATRIX,&jac->A);CHKERRQ(ierr); ierr = MatDestroy(&redmat);CHKERRQ(ierr); } else { ierr = MatConvert(pc->pmat,MATSEQDENSE,MAT_INITIAL_MATRIX,&jac->A);CHKERRQ(ierr); } if (!jac->diag) { /* assume square matrices */ ierr = MatGetVecs(jac->A,&jac->diag,&jac->work);CHKERRQ(ierr); } if (!jac->U) { ierr = MatDuplicate(jac->A,MAT_DO_NOT_COPY_VALUES,&jac->U);CHKERRQ(ierr); ierr = MatDuplicate(jac->A,MAT_DO_NOT_COPY_VALUES,&jac->Vt);CHKERRQ(ierr); } ierr = MatGetSize(pc->pmat,&n,NULL);CHKERRQ(ierr); ierr = PetscBLASIntCast(n,&nb);CHKERRQ(ierr); lwork = 5*nb; ierr = PetscMalloc(lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); ierr = MatDenseGetArray(jac->A,&a);CHKERRQ(ierr); ierr = MatDenseGetArray(jac->U,&u);CHKERRQ(ierr); ierr = MatDenseGetArray(jac->Vt,&v);CHKERRQ(ierr); ierr = VecGetArray(jac->diag,&d);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) { PetscBLASInt lierr; ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&nb,&nb,a,&nb,d,u,&nb,v,&nb,work,&lwork,&lierr)); if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"gesv() error %d",lierr); ierr = PetscFPTrapPop();CHKERRQ(ierr); } #else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for complex"); #endif ierr = MatDenseRestoreArray(jac->A,&a);CHKERRQ(ierr); ierr = MatDenseRestoreArray(jac->U,&u);CHKERRQ(ierr); ierr = MatDenseRestoreArray(jac->Vt,&v);CHKERRQ(ierr); for (i=n-1; i>=0; i--) if (PetscRealPart(d[i]) > jac->zerosing) break; jac->nzero = n-1-i; if (jac->monitor) { ierr = PetscViewerASCIIAddTab(jac->monitor,((PetscObject)pc)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(jac->monitor," SVD: condition number %14.12e, %D of %D singular values are (nearly) zero\n",(double)PetscRealPart(d[0]/d[n-1]),jac->nzero,n);CHKERRQ(ierr); if (n >= 10) { /* print 5 smallest and 5 largest */ ierr = PetscViewerASCIIPrintf(jac->monitor," SVD: smallest singular values: %14.12e %14.12e %14.12e %14.12e %14.12e\n",(double)PetscRealPart(d[n-1]),(double)PetscRealPart(d[n-2]),(double)PetscRealPart(d[n-3]),(double)PetscRealPart(d[n-4]),(double)PetscRealPart(d[n-5]));CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(jac->monitor," SVD: largest singular values : %14.12e %14.12e %14.12e %14.12e %14.12e\n",(double)PetscRealPart(d[4]),(double)PetscRealPart(d[3]),(double)PetscRealPart(d[2]),(double)PetscRealPart(d[1]),(double)PetscRealPart(d[0]));CHKERRQ(ierr); } else { /* print all singular values */ char buf[256],*p; size_t left = sizeof(buf),used; PetscInt thisline; for (p=buf,i=n-1,thisline=1; i>=0; i--,thisline++) { ierr = PetscSNPrintfCount(p,left," %14.12e",&used,(double)PetscRealPart(d[i]));CHKERRQ(ierr); left -= used; p += used; if (thisline > 4 || i==0) { ierr = PetscViewerASCIIPrintf(jac->monitor," SVD: singular values:%s\n",buf);CHKERRQ(ierr); p = buf; thisline = 0; } } } ierr = PetscViewerASCIISubtractTab(jac->monitor,((PetscObject)pc)->tablevel);CHKERRQ(ierr); } ierr = PetscInfo2(pc,"Largest and smallest singular values %14.12e %14.12e\n",(double)PetscRealPart(d[0]),(double)PetscRealPart(d[n-1]));CHKERRQ(ierr); for (i=0; i<n-jac->nzero; i++) d[i] = 1.0/d[i]; for (; i<n; i++) d[i] = 0.0; if (jac->essrank > 0) for (i=0; i<n-jac->nzero-jac->essrank; i++) d[i] = 0.0; /* Skip all but essrank eigenvalues */ ierr = PetscInfo1(pc,"Number of zero or nearly singular values %D\n",jac->nzero);CHKERRQ(ierr); ierr = VecRestoreArray(jac->diag,&d);CHKERRQ(ierr); #if defined(foo) { PetscViewer viewer; ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,"joe",FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); ierr = MatView(jac->A,viewer);CHKERRQ(ierr); ierr = MatView(jac->U,viewer);CHKERRQ(ierr); ierr = MatView(jac->Vt,viewer);CHKERRQ(ierr); ierr = VecView(jac->diag,viewer);CHKERRQ(ierr); ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); } #endif ierr = PetscFree(work);CHKERRQ(ierr); PetscFunctionReturn(0); #endif }

int main(int argc,char **argv) { PetscMPIInt rank,size; PetscInt nlocal = 6,nghost = 2,ifrom[2],i,rstart,rend; PetscErrorCode ierr; PetscBool flg,flg2; PetscScalar value,*array,*tarray=0; Vec lx,gx,gxs; PetscInitialize(&argc,&argv,(char*)0,help); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size != 2) SETERRQ(PETSC_COMM_SELF,1,"Must run example with two processors\n"); /* Construct a two dimensional graph connecting nlocal degrees of freedom per processor. From this we will generate the global indices of needed ghost values For simplicity we generate the entire graph on each processor: in real application the graph would stored in parallel, but this example is only to demonstrate the management of ghost padding with VecCreateGhost(). In this example we consider the vector as representing degrees of freedom in a one dimensional grid with periodic boundary conditions. ----Processor 1--------- ----Processor 2 -------- 0 1 2 3 4 5 6 7 8 9 10 11 |----| |-------------------------------------------------| */ if (!rank) { ifrom[0] = 11; ifrom[1] = 6; } else { ifrom[0] = 0; ifrom[1] = 5; } /* Create the vector with two slots for ghost points. Note that both the local vector (lx) and the global vector (gx) share the same array for storing vector values. */ ierr = PetscOptionsHasName(NULL,"-allocate",&flg);CHKERRQ(ierr); ierr = PetscOptionsHasName(NULL,"-vecmpisetghost",&flg2);CHKERRQ(ierr); if (flg) { ierr = PetscMalloc1((nlocal+nghost),&tarray);CHKERRQ(ierr); ierr = VecCreateGhostWithArray(PETSC_COMM_WORLD,nlocal,PETSC_DECIDE,nghost,ifrom,tarray,&gxs);CHKERRQ(ierr); } else if (flg2) { ierr = VecCreate(PETSC_COMM_WORLD,&gxs);CHKERRQ(ierr); ierr = VecSetType(gxs,VECMPI);CHKERRQ(ierr); ierr = VecSetSizes(gxs,nlocal,PETSC_DECIDE);CHKERRQ(ierr); ierr = VecMPISetGhost(gxs,nghost,ifrom);CHKERRQ(ierr); } else { ierr = VecCreateGhost(PETSC_COMM_WORLD,nlocal,PETSC_DECIDE,nghost,ifrom,&gxs);CHKERRQ(ierr); } /* Test VecDuplicate() */ ierr = VecDuplicate(gxs,&gx);CHKERRQ(ierr); ierr = VecDestroy(&gxs);CHKERRQ(ierr); /* Access the local representation */ ierr = VecGhostGetLocalForm(gx,&lx);CHKERRQ(ierr); /* Set the values from 0 to 12 into the "global" vector */ ierr = VecGetOwnershipRange(gx,&rstart,&rend);CHKERRQ(ierr); for (i=rstart; i<rend; i++) { value = (PetscScalar) i; ierr = VecSetValues(gx,1,&i,&value,INSERT_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(gx);CHKERRQ(ierr); ierr = VecAssemblyEnd(gx);CHKERRQ(ierr); ierr = VecGhostUpdateBegin(gx,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecGhostUpdateEnd(gx,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Print out each vector, including the ghost padding region. */ ierr = VecGetArray(lx,&array);CHKERRQ(ierr); for (i=0; i<nlocal+nghost; i++) { ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%D %g\n",i,(double)PetscRealPart(array[i]));CHKERRQ(ierr); } ierr = VecRestoreArray(lx,&array);CHKERRQ(ierr); ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); ierr = VecGhostRestoreLocalForm(gx,&lx);CHKERRQ(ierr); ierr = VecDestroy(&gx);CHKERRQ(ierr); if (flg) {ierr = PetscFree(tarray);CHKERRQ(ierr);} ierr = PetscFinalize(); return 0; }

static PetscErrorCode PCApply_Kaczmarz(PC pc,Vec x,Vec y) { PC_Kaczmarz *jac = (PC_Kaczmarz*)pc->data; PetscInt xs,xe,ys,ye,ncols,i,j; const PetscInt *cols; const PetscScalar *vals; PetscErrorCode ierr; PetscScalar r; PetscReal anrm; PetscScalar *xarray,*yarray; PetscReal lambda=jac->lambda; PetscFunctionBegin; ierr = MatGetOwnershipRange(pc->pmat,&xs,&xe);CHKERRQ(ierr); ierr = MatGetOwnershipRangeColumn(pc->pmat,&ys,&ye);CHKERRQ(ierr); ierr = VecSet(y,0.);CHKERRQ(ierr); ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); for (i=xs;i<xe;i++) { /* get the maximum row width and row norms */ ierr = MatGetRow(pc->pmat,i,&ncols,&cols,&vals);CHKERRQ(ierr); r = xarray[i-xs]; anrm = 0.; for (j=0;j<ncols;j++) { if (cols[j] >= ys && cols[j] < ye) { r -= yarray[cols[j]-ys]*vals[j]; } anrm += PetscRealPart(PetscSqr(vals[j])); } if (anrm > 0.) { for (j=0;j<ncols;j++) { if (cols[j] >= ys && cols[j] < ye) { yarray[cols[j]-ys] += vals[j]*lambda*r/anrm; } } } ierr = MatRestoreRow(pc->pmat,i,&ncols,&cols,&vals);CHKERRQ(ierr); } if (jac->symmetric) { for (i=xe-1;i>=xs;i--) { ierr = MatGetRow(pc->pmat,i,&ncols,&cols,&vals);CHKERRQ(ierr); r = xarray[i-xs]; anrm = 0.; for (j=0;j<ncols;j++) { if (cols[j] >= ys && cols[j] < ye) { r -= yarray[cols[j]-ys]*vals[j]; } anrm += PetscRealPart(PetscSqr(vals[j])); } if (anrm > 0.) { for (j=0;j<ncols;j++) { if (cols[j] >= ys && cols[j] < ye) { yarray[cols[j]-ys] += vals[j]*lambda*r/anrm; } } } ierr = MatRestoreRow(pc->pmat,i,&ncols,&cols,&vals);CHKERRQ(ierr); } } ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); PetscFunctionReturn(0); }

/* QuadraticH - Evaluates the Hessian matrix. Input Parameters: . user - user-defined context, as set by TaoSetHessian() . X - input vector Output Parameter: . H - Hessian matrix */ PetscErrorCode QuadraticH(AppCtx *user, Vec X, Mat Hessian) { PetscErrorCode ierr; PetscInt i,j,k,row; PetscInt mx=user->mx, my=user->my; PetscInt col[7]; PetscReal hx=1.0/(mx+1), hy=1.0/(my+1), hydhx=hy/hx, hxdhy=hx/hy; PetscReal rhx=mx+1, rhy=my+1; PetscReal f1,f2,f3,f4,f5,f6,d1,d2,d3,d4,d5,d6,d7,d8,xc,xl,xr,xt,xb,xlt,xrb; PetscReal hl,hr,ht,hb,hc,htl,hbr; PetscReal *x, v[7]; /* Get pointers to vector data */ ierr = VecGetArray(X,&x);CHKERRQ(ierr); /* Initialize matrix entries to zero */ ierr = MatZeroEntries(Hessian); CHKERRQ(ierr); /* Set various matrix options */ ierr = MatSetOption(Hessian,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); /* Compute Hessian over the locally owned part of the mesh */ for (i=0; i< mx; i++){ for (j=0; j<my; j++){ row=(j)*mx + (i); xc = x[row]; xlt=xrb=xl=xr=xb=xt=xc; /* Left side */ if (i==0){ xl= user->left[j+1]; xlt = user->left[j+2]; } else { xl = x[row-1]; } if (j==0){ xb=user->bottom[i+1]; xrb = user->bottom[i+2]; } else { xb = x[row-mx]; } if (i+1 == mx){ xr=user->right[j+1]; xrb = user->right[j]; } else { xr = x[row+1]; } if (j+1==my){ xt=user->top[i+1]; xlt = user->top[i]; }else { xt = x[row+mx]; } if (i>0 && j+1<my){ xlt = x[row-1+mx]; } if (j>0 && i+1<mx){ xrb = x[row+1-mx]; } d1 = (xc-xl)*rhx; d2 = (xc-xr)*rhx; d3 = (xc-xt)*rhy; d4 = (xc-xb)*rhy; d5 = (xrb-xr)*rhy; d6 = (xrb-xb)*rhx; d7 = (xlt-xl)*rhy; d8 = (xlt-xt)*rhx; f1 = PetscSqrtScalar( 1.0 + d1*d1 + d7*d7); f2 = PetscSqrtScalar( 1.0 + d1*d1 + d4*d4); f3 = PetscSqrtScalar( 1.0 + d3*d3 + d8*d8); f4 = PetscSqrtScalar( 1.0 + d3*d3 + d2*d2); f5 = PetscSqrtScalar( 1.0 + d2*d2 + d5*d5); f6 = PetscSqrtScalar( 1.0 + d4*d4 + d6*d6); hl = (-hydhx*(1.0+d7*d7)+d1*d7)/(f1*f1*f1)+(-hydhx*(1.0+d4*d4)+d1*d4)/(f2*f2*f2); hr = (-hydhx*(1.0+d5*d5)+d2*d5)/(f5*f5*f5)+(-hydhx*(1.0+d3*d3)+d2*d3)/(f4*f4*f4); ht = (-hxdhy*(1.0+d8*d8)+d3*d8)/(f3*f3*f3)+(-hxdhy*(1.0+d2*d2)+d2*d3)/(f4*f4*f4); hb = (-hxdhy*(1.0+d6*d6)+d4*d6)/(f6*f6*f6)+(-hxdhy*(1.0+d1*d1)+d1*d4)/(f2*f2*f2); hbr = -d2*d5/(f5*f5*f5) - d4*d6/(f6*f6*f6); htl = -d1*d7/(f1*f1*f1) - d3*d8/(f3*f3*f3); hc = hydhx*(1.0+d7*d7)/(f1*f1*f1) + hxdhy*(1.0+d8*d8)/(f3*f3*f3) + hydhx*(1.0+d5*d5)/(f5*f5*f5) + hxdhy*(1.0+d6*d6)/(f6*f6*f6) + (hxdhy*(1.0+d1*d1)+hydhx*(1.0+d4*d4)-2*d1*d4)/(f2*f2*f2) + (hxdhy*(1.0+d2*d2)+hydhx*(1.0+d3*d3)-2*d2*d3)/(f4*f4*f4); hl*=0.5; hr*=0.5; ht*=0.5; hb*=0.5; hbr*=0.5; htl*=0.5; hc*=0.5; k=0; if (j>0){ v[k]=hb; col[k]=row - mx; k++; } if (j>0 && i < mx -1){ v[k]=hbr; col[k]=row - mx+1; k++; } if (i>0){ v[k]= hl; col[k]=row - 1; k++; } v[k]= hc; col[k]=row; k++; if (i < mx-1 ){ v[k]= hr; col[k]=row+1; k++; } if (i>0 && j < my-1 ){ v[k]= htl; col[k] = row+mx-1; k++; } if (j < my-1 ){ v[k]= ht; col[k] = row+mx; k++; } /* Set matrix values using local numbering, which was defined earlier, in the main routine. */ ierr = MatSetValues(Hessian,1,&row,k,col,v,INSERT_VALUES);CHKERRQ(ierr); } } /* Restore vectors */ ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); /* Assemble the matrix */ ierr = MatAssemblyBegin(Hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(Hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscLogFlops(199*mx*my);CHKERRQ(ierr); return 0; }

void UniqueVec::restoreVec(PetscScalar *local_array) { VecRestoreArray(*data_.get(), &local_array); }

/*@ MatNullSpaceCreateRigidBody - create rigid body modes from coordinates Collective on Vec Input Argument: . coords - block of coordinates of each node, must have block size set Output Argument: . sp - the null space Level: advanced Notes: If you are solving an elasticity problems you should likely use this, in conjunction with ee MatSetNearNullspace(), to provide information that the PCGAMG preconditioner can use to construct a much more efficient preconditioner. If you are solving an elasticity problem with pure Neumann boundary conditions you can use this in conjunction with MatSetNullspace() to provide this information to the linear solver so it can handle the null space appropriately in the linear solution. .seealso: MatNullSpaceCreate(), MatSetNearNullspace(), MatSetNullspace() @*/ PetscErrorCode MatNullSpaceCreateRigidBody(Vec coords,MatNullSpace *sp) { PetscErrorCode ierr; const PetscScalar *x; PetscScalar *v[6],dots[5]; Vec vec[6]; PetscInt n,N,dim,nmodes,i,j; PetscReal sN; PetscFunctionBegin; ierr = VecGetBlockSize(coords,&dim);CHKERRQ(ierr); ierr = VecGetLocalSize(coords,&n);CHKERRQ(ierr); ierr = VecGetSize(coords,&N);CHKERRQ(ierr); n /= dim; N /= dim; sN = 1./PetscSqrtReal((PetscReal)N); switch (dim) { case 1: ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coords),PETSC_TRUE,0,NULL,sp);CHKERRQ(ierr); break; case 2: case 3: nmodes = (dim == 2) ? 3 : 6; ierr = VecCreate(PetscObjectComm((PetscObject)coords),&vec[0]);CHKERRQ(ierr); ierr = VecSetSizes(vec[0],dim*n,dim*N);CHKERRQ(ierr); ierr = VecSetBlockSize(vec[0],dim);CHKERRQ(ierr); ierr = VecSetUp(vec[0]);CHKERRQ(ierr); for (i=1; i<nmodes; i++) {ierr = VecDuplicate(vec[0],&vec[i]);CHKERRQ(ierr);} for (i=0; i<nmodes; i++) {ierr = VecGetArray(vec[i],&v[i]);CHKERRQ(ierr);} ierr = VecGetArrayRead(coords,&x);CHKERRQ(ierr); for (i=0; i<n; i++) { if (dim == 2) { v[0][i*2+0] = sN; v[0][i*2+1] = 0.; v[1][i*2+0] = 0.; v[1][i*2+1] = sN; /* Rotations */ v[2][i*2+0] = -x[i*2+1]; v[2][i*2+1] = x[i*2+0]; } else { v[0][i*3+0] = sN; v[0][i*3+1] = 0.; v[0][i*3+2] = 0.; v[1][i*3+0] = 0.; v[1][i*3+1] = sN; v[1][i*3+2] = 0.; v[2][i*3+0] = 0.; v[2][i*3+1] = 0.; v[2][i*3+2] = sN; v[3][i*3+0] = x[i*3+1]; v[3][i*3+1] = -x[i*3+0]; v[3][i*3+2] = 0.; v[4][i*3+0] = 0.; v[4][i*3+1] = -x[i*3+2]; v[4][i*3+2] = x[i*3+1]; v[5][i*3+0] = x[i*3+2]; v[5][i*3+1] = 0.; v[5][i*3+2] = -x[i*3+0]; } } for (i=0; i<nmodes; i++) {ierr = VecRestoreArray(vec[i],&v[i]);CHKERRQ(ierr);} ierr = VecRestoreArrayRead(coords,&x);CHKERRQ(ierr); for (i=dim; i<nmodes; i++) { /* Orthonormalize vec[i] against vec[0:i-1] */ ierr = VecMDot(vec[i],i,vec,dots);CHKERRQ(ierr); for (j=0; j<i; j++) dots[j] *= -1.; ierr = VecMAXPY(vec[i],i,dots,vec);CHKERRQ(ierr); ierr = VecNormalize(vec[i],NULL);CHKERRQ(ierr); } ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coords),PETSC_FALSE,nmodes,vec,sp);CHKERRQ(ierr); for (i=0; i<nmodes; i++) {ierr = VecDestroy(&vec[i]);CHKERRQ(ierr);} } PetscFunctionReturn(0); }

/*@C SNESComputeJacobianDefault - Computes the Jacobian using finite differences. Collective on SNES Input Parameters: + x1 - compute Jacobian at this point - ctx - application's function context, as set with SNESSetFunction() Output Parameters: + J - Jacobian matrix (not altered in this routine) - B - newly computed Jacobian matrix to use with preconditioner (generally the same as J) Options Database Key: + -snes_fd - Activates SNESComputeJacobianDefault() . -snes_test_err - Square root of function error tolerance, default square root of machine epsilon (1.e-8 in double, 3.e-4 in single) - -mat_fd_type - Either wp or ds (see MATMFFD_WP or MATMFFD_DS) Notes: This routine is slow and expensive, and is not currently optimized to take advantage of sparsity in the problem. Although SNESComputeJacobianDefault() is not recommended for general use in large-scale applications, It can be useful in checking the correctness of a user-provided Jacobian. An alternative routine that uses coloring to exploit matrix sparsity is SNESComputeJacobianDefaultColor(). Level: intermediate .keywords: SNES, finite differences, Jacobian .seealso: SNESSetJacobian(), SNESComputeJacobianDefaultColor(), MatCreateSNESMF() @*/ PetscErrorCode SNESComputeJacobianDefault(SNES snes,Vec x1,Mat J,Mat B,void *ctx) { Vec j1a,j2a,x2; PetscErrorCode ierr; PetscInt i,N,start,end,j,value,root; PetscScalar dx,*y,*xx,wscale; PetscReal amax,epsilon = PETSC_SQRT_MACHINE_EPSILON; PetscReal dx_min = 1.e-16,dx_par = 1.e-1,unorm; MPI_Comm comm; PetscErrorCode (*eval_fct)(SNES,Vec,Vec)=0; PetscBool assembled,use_wp = PETSC_TRUE,flg; const char *list[2] = {"ds","wp"}; PetscMPIInt size; const PetscInt *ranges; PetscFunctionBegin; ierr = PetscOptionsGetReal(((PetscObject)snes)->prefix,"-snes_test_err",&epsilon,0);CHKERRQ(ierr); eval_fct = SNESComputeFunction; ierr = PetscObjectGetComm((PetscObject)x1,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MatAssembled(B,&assembled);CHKERRQ(ierr); if (assembled) { ierr = MatZeroEntries(B);CHKERRQ(ierr); } if (!snes->nvwork) { snes->nvwork = 3; ierr = VecDuplicateVecs(x1,snes->nvwork,&snes->vwork);CHKERRQ(ierr); ierr = PetscLogObjectParents(snes,snes->nvwork,snes->vwork);CHKERRQ(ierr); } j1a = snes->vwork[0]; j2a = snes->vwork[1]; x2 = snes->vwork[2]; ierr = VecGetSize(x1,&N);CHKERRQ(ierr); ierr = VecGetOwnershipRange(x1,&start,&end);CHKERRQ(ierr); ierr = (*eval_fct)(snes,x1,j1a);CHKERRQ(ierr); ierr = PetscOptionsEList("-mat_fd_type","Algorithm to compute difference parameter","SNESComputeJacobianDefault",list,2,"wp",&value,&flg);CHKERRQ(ierr); if (flg && !value) use_wp = PETSC_FALSE; if (use_wp) { ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); } /* Compute Jacobian approximation, 1 column at a time. x1 = current iterate, j1a = F(x1) x2 = perturbed iterate, j2a = F(x2) */ for (i=0; i<N; i++) { ierr = VecCopy(x1,x2);CHKERRQ(ierr); if (i>= start && i<end) { ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); if (use_wp) dx = 1.0 + unorm; else dx = xx[i-start]; ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); if (PetscAbsScalar(dx) < dx_min) dx = (PetscRealPart(dx) < 0. ? -1. : 1.) * dx_par; dx *= epsilon; wscale = 1.0/dx; ierr = VecSetValues(x2,1,&i,&dx,ADD_VALUES);CHKERRQ(ierr); } else { wscale = 0.0; } ierr = VecAssemblyBegin(x2);CHKERRQ(ierr); ierr = VecAssemblyEnd(x2);CHKERRQ(ierr); ierr = (*eval_fct)(snes,x2,j2a);CHKERRQ(ierr); ierr = VecAXPY(j2a,-1.0,j1a);CHKERRQ(ierr); /* Communicate scale=1/dx_i to all processors */ ierr = VecGetOwnershipRanges(x1,&ranges);CHKERRQ(ierr); root = size; for (j=size-1; j>-1; j--) { root--; if (i>=ranges[j]) break; } ierr = MPI_Bcast(&wscale,1,MPIU_SCALAR,root,comm);CHKERRQ(ierr); ierr = VecScale(j2a,wscale);CHKERRQ(ierr); ierr = VecNorm(j2a,NORM_INFINITY,&amax);CHKERRQ(ierr); amax *= 1.e-14; ierr = VecGetArray(j2a,&y);CHKERRQ(ierr); for (j=start; j<end; j++) { if (PetscAbsScalar(y[j-start]) > amax || j == i) { ierr = MatSetValues(B,1,&j,1,&i,y+j-start,INSERT_VALUES);CHKERRQ(ierr); } } ierr = VecRestoreArray(j2a,&y);CHKERRQ(ierr); } ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (B != J) { ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } PetscFunctionReturn(0); }

int main(int argc,char **argv) { PetscErrorCode ierr; DM dm; Vec vec,vecLocal1,vecLocal2; PetscScalar *a,***a1,***a2,expected; PetscInt startx,starty,nx,ny,i,j,d,is,js,dof0,dof1,dof2,dofTotal,stencilWidth,Nx,Ny; DMBoundaryType boundaryTypex,boundaryTypey; PetscMPIInt rank; ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); dof0 = 1; dof1 = 1; dof2 = 1; stencilWidth = 2; ierr = DMStagCreate2d(PETSC_COMM_WORLD,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,4,4,PETSC_DECIDE,PETSC_DECIDE,dof0,dof1,dof2,DMSTAG_STENCIL_BOX,stencilWidth,NULL,NULL,&dm);CHKERRQ(ierr); ierr = DMSetFromOptions(dm);CHKERRQ(ierr); ierr = DMSetUp(dm);CHKERRQ(ierr); ierr = DMStagGetDOF(dm,&dof0,&dof1,&dof2,NULL);CHKERRQ(ierr); dofTotal = dof0 + 2*dof1 + dof2; ierr = DMStagGetStencilWidth(dm,&stencilWidth);CHKERRQ(ierr); ierr = DMCreateLocalVector(dm,&vecLocal1);CHKERRQ(ierr); ierr = VecDuplicate(vecLocal1,&vecLocal2);CHKERRQ(ierr); ierr = DMCreateGlobalVector(dm,&vec);CHKERRQ(ierr); ierr = VecSet(vec,1.0);CHKERRQ(ierr); ierr = VecSet(vecLocal1,0.0);CHKERRQ(ierr); ierr = DMGlobalToLocalBegin(dm,vec,INSERT_VALUES,vecLocal1);CHKERRQ(ierr); ierr = DMGlobalToLocalEnd(dm,vec,INSERT_VALUES,vecLocal1);CHKERRQ(ierr); ierr = DMStagGetCorners(dm,&startx,&starty,NULL,&nx,&ny,NULL,NULL,NULL,NULL);CHKERRQ(ierr); ierr = DMStagVecGetArrayDOFRead(dm,vecLocal1,&a1);CHKERRQ(ierr); ierr = DMStagVecGetArrayDOF(dm,vecLocal2,&a2);CHKERRQ(ierr); for (j=starty; j<starty + ny; ++j) { for (i=startx; i<startx + nx; ++i) { for (d=0; d<dofTotal; ++d) { if (a1[j][i][d] != 1.0) { PetscPrintf(PETSC_COMM_SELF,"[%d] Unexpected value %g (expecting %g)\n",rank,a1[j][i][d],1.0);CHKERRQ(ierr); } a2[j][i][d] = 0.0; for (js = -stencilWidth; js <= stencilWidth; ++js) { for (is = -stencilWidth; is <= stencilWidth; ++is) { a2[j][i][d] += a1[j+js][i+is][d]; } } } } } ierr = DMStagVecRestoreArrayDOFRead(dm,vecLocal1,&a1);CHKERRQ(ierr); ierr = DMStagVecRestoreArrayDOF(dm,vecLocal2,&a2);CHKERRQ(ierr); DMLocalToGlobalBegin(dm,vecLocal2,INSERT_VALUES,vec);CHKERRQ(ierr); DMLocalToGlobalEnd(dm,vecLocal2,INSERT_VALUES,vec);CHKERRQ(ierr); /* For the all-periodic case, all values are the same . Otherwise, just check the local version */ ierr = DMStagGetBoundaryTypes(dm,&boundaryTypex,&boundaryTypey,NULL);CHKERRQ(ierr); if (boundaryTypex == DM_BOUNDARY_PERIODIC && boundaryTypey == DM_BOUNDARY_PERIODIC) { ierr = VecGetArray(vec,&a);CHKERRQ(ierr); expected = 1.0; for(d=0;d<2;++d) expected *= (2*stencilWidth+1); for (i=0; i<ny*nx*dofTotal; ++i) { if (a[i] != expected) { ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] Unexpected value %g (expecting %g)\n",rank,a[i],expected);CHKERRQ(ierr); } } ierr = VecRestoreArray(vec,&a);CHKERRQ(ierr); } else { ierr = DMStagVecGetArrayDOFRead(dm,vecLocal2,&a2);CHKERRQ(ierr); ierr = DMStagGetGlobalSizes(dm,&Nx,&Ny,NULL);CHKERRQ(ierr); if (stencilWidth > 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Non-periodic check implemented assuming stencilWidth = 1"); for (j=starty; j<starty + ny; ++j) { for (i=startx; i<startx + nx; ++i) { PetscInt dd,extra[2]; PetscBool bnd[2]; bnd[0] = (PetscBool)((i == 0 || i == Nx-1) && boundaryTypex != DM_BOUNDARY_PERIODIC); bnd[1] = (PetscBool)((j == 0 || j == Ny-1) && boundaryTypey != DM_BOUNDARY_PERIODIC); extra[0] = i == Nx-1 && boundaryTypex != DM_BOUNDARY_PERIODIC ? 1 : 0; extra[1] = j == Ny-1 && boundaryTypey != DM_BOUNDARY_PERIODIC ? 1 : 0; { /* vertices */ PetscScalar expected = 1.0; for (dd=0;dd<2;++dd) expected *= (bnd[dd] ? stencilWidth + 1 + extra[dd] : 2*stencilWidth + 1); for (d=0; d<dof0; ++d) { if (a2[j][i][d] != expected) { ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] Element (%D,%D)[%D] Unexpected value %g (expecting %g)\n",rank,i,j,d,a2[j][i][d],expected);CHKERRQ(ierr); } } } { /* down edges */ PetscScalar expected = (bnd[1] ? stencilWidth + 1 + extra[1] : 2*stencilWidth + 1); expected *= ((bnd[0] ? 1 : 2) * stencilWidth + 1); for (d=dof0; d<dof0+dof1; ++d) { if (a2[j][i][d] != expected) { ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] Element (%D,%D)[%D] Unexpected value %g (expecting %g)\n",rank,i,j,d,a2[j][i][d],expected);CHKERRQ(ierr); } } } { /* left edges */ PetscScalar expected = (bnd[0] ? stencilWidth + 1 + extra[0] : 2*stencilWidth + 1); expected *= ((bnd[1] ? 1 : 2) * stencilWidth + 1); for (d=dof0+dof1; d<dof0+2*dof1; ++d) { if (a2[j][i][d] != expected) { ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] Element (%D,%D)[%D] Unexpected value %g (expecting %g)\n",rank,i,j,d,a2[j][i][d],expected);CHKERRQ(ierr); } } } { /* elements */ PetscScalar expected = 1.0; for (dd=0;dd<2;++dd) expected *= ((bnd[dd] ? 1 : 2) * stencilWidth + 1); for (d=dofTotal-dof2; d<dofTotal; ++d) { if (a2[j][i][d] != expected) { ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] Element (%D,%D)[%D] Unexpected value %g (expecting %g)\n",rank,i,j,d,a2[j][i][d],expected);CHKERRQ(ierr); } } } } } ierr = DMStagVecRestoreArrayDOFRead(dm,vecLocal2,&a2);CHKERRQ(ierr); } ierr = VecDestroy(&vec);CHKERRQ(ierr); ierr = VecDestroy(&vecLocal1);CHKERRQ(ierr); ierr = VecDestroy(&vecLocal2);CHKERRQ(ierr); ierr = DMDestroy(&dm);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }

int main(int argc, char *args[]) { PFLOTRANMesh data; Mat Adj; /* The adjacency matrix of the mesh */ PetscInt bs = 3; PetscScalar values[9],*cc; PetscMPIInt size; PetscInt i; PetscErrorCode ierr; PetscViewer binaryviewer; Vec cellCenters; PetscViewer hdf5viewer; hid_t file_id, dataset_id, dataspace_id; herr_t status; PetscFunctionBegin; ierr = PetscInitialize(&argc, &args, (char *) 0, help); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); if (size > 1) SETERRQ(PETSC_ERR_SUP,"This preprocessor runs only on one process"); /* Open Glenn's file */ ierr = PetscViewerCreate(PETSC_COMM_SELF, &hdf5viewer);CHKERRQ(ierr); ierr = PetscViewerSetType(hdf5viewer, PETSC_VIEWER_HDF5);CHKERRQ(ierr); ierr = PetscViewerFileSetMode(hdf5viewer, FILE_MODE_READ);CHKERRQ(ierr); ierr = PetscViewerFileSetName(hdf5viewer, "mesh.h5");CHKERRQ(ierr); ierr = PetscViewerHDF5GetFileId(hdf5viewer, &file_id);CHKERRQ(ierr); /* get number of cells and then number of edges */ dataset_id = H5Dopen(file_id, "/Cells/Natural IDs"); dataspace_id = H5Dget_space(dataset_id); status = H5Sget_simple_extent_dims(dataspace_id, &data.numCells, NULL);if (status < 0) SETERRQ(PETSC_ERR_LIB,"Bad dimension"); status = H5Sclose(dataspace_id);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Connections/Areas"); dataspace_id = H5Dget_space(dataset_id); status = H5Sget_simple_extent_dims(dataspace_id, &data.numFaces, NULL);if (status < 0) SETERRQ(PETSC_ERR_LIB,"Bad dimension"); status = H5Sclose(dataspace_id);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); ierr = PetscPrintf(PETSC_COMM_SELF, "Number of cells %D Number of faces %D \n",(PetscInt)data.numCells,(PetscInt)data.numFaces);CHKERRQ(ierr); /* read face data */ ierr = PetscMalloc5(data.numFaces,double,&data.faceAreas,data.numFaces,int,&data.downCells,data.numFaces,double,&data.downX,data.numFaces,double,&data.downY,data.numFaces,double,&data.downZ);CHKERRQ(ierr); dataset_id = H5Dopen(file_id, "/Connections/Areas"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.faceAreas);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Connections/Downwind Cell IDs"); status = H5Dread(dataset_id, H5T_STD_I32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.downCells);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Connections/Downwind Distance X"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.downX);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Connections/Downwind Distance Y"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.downY);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Connections/Downwind Distance Z"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.downZ);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); ierr = PetscMalloc4(data.numFaces,int,&data.upCells,data.numFaces,double,&data.upX,data.numFaces,double,&data.upY,data.numFaces,double,&data.upZ);CHKERRQ(ierr); dataset_id = H5Dopen(file_id, "/Connections/Upwind Cell IDs"); status = H5Dread(dataset_id, H5T_STD_I32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.upCells);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Connections/Upwind Distance X"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.upX);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Connections/Upwind Distance Y"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.upY);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Connections/Upwind Distance Z"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.upZ);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); // Put face data into matrix ierr = MatCreate(PETSC_COMM_WORLD, &Adj);CHKERRQ(ierr); ierr = MatSetSizes(Adj, data.numCells*bs, data.numCells*bs, PETSC_DECIDE, PETSC_DECIDE);CHKERRQ(ierr); ierr = MatSetFromOptions(Adj);CHKERRQ(ierr); ierr = MatSetType(Adj,MATSEQBAIJ);CHKERRQ(ierr); ierr = MatSeqBAIJSetPreallocation(Adj, bs, 6,PETSC_NULL);CHKERRQ(ierr); //ierr = MatSetType(Adj,MATSEQAIJ);CHKERRQ(ierr); //ierr = MatSeqAIJSetPreallocation(Adj, 6,PETSC_NULL);CHKERRQ(ierr); for(i = 0; i < data.numFaces; ++i) { values[0] = data.faceAreas[i]; values[1] = data.downCells[i]; values[2] = data.downX[i]; values[3] = data.downY[i]; values[4] = data.downZ[i]; values[5] = data.upCells[i]; values[6] = data.upX[i]; values[7] = data.upY[i]; values[8] = data.upZ[i]; ierr = MatSetValuesBlocked(Adj, 1, &data.downCells[i], 1, &data.upCells[i], values, INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValuesBlocked(Adj, 1, &data.upCells[i], 1, &data.downCells[i], values, INSERT_VALUES);CHKERRQ(ierr); //ierr = MatSetValues(Adj, 1, &data.downCells[i], 1, &data.upCells[i], values, INSERT_VALUES);CHKERRQ(ierr); //ierr = MatSetValues(Adj, 1, &data.upCells[i], 1, &data.downCells[i], values, INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(Adj, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(Adj, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscFree5(data.faceAreas, data.downCells, data.downX, data.downY, data.downZ);CHKERRQ(ierr); ierr = PetscFree4(data.upCells, data.upX, data.upY, data.upZ);CHKERRQ(ierr); ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,"mesh.petsc", FILE_MODE_WRITE,&binaryviewer);CHKERRQ(ierr); ierr = MatView(Adj, binaryviewer);CHKERRQ(ierr); ierr = MatDestroy(Adj);CHKERRQ(ierr); /* read cell information */ ierr = PetscMalloc5(data.numCells,int,&data.cellIds,data.numCells,double,&data.cellVols,data.numCells,double,&data.cellX,data.numCells,double,&data.cellY,data.numCells,double,&data.cellZ);CHKERRQ(ierr); dataset_id = H5Dopen(file_id, "/Cells/Natural IDs"); status = H5Dread(dataset_id, H5T_STD_I32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.cellIds);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Cells/Volumes"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.cellVols);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Cells/X-Coordinates"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.cellX);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Cells/Y-Coordinates"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.cellY);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); dataset_id = H5Dopen(file_id, "/Cells/Z-Coordinates"); status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.cellZ);CHKERRQ(status); status = H5Dclose(dataset_id);CHKERRQ(status); ierr = PetscViewerDestroy(hdf5viewer);CHKERRQ(ierr); /* put cell information into vectors */ ierr = VecCreateSeq(PETSC_COMM_SELF,3*data.numCells,&cellCenters);CHKERRQ(ierr); ierr = VecSetBlockSize(cellCenters,3);CHKERRQ(ierr); ierr = VecGetArray(cellCenters,&cc);CHKERRQ(ierr); for (i=0; i<data.numCells; i++) { cc[3*i] = data.cellX[i]; cc[3*i+1] = data.cellY[i]; cc[3*i+2] = data.cellZ[i]; } ierr = VecRestoreArray(cellCenters,&cc);CHKERRQ(ierr); ierr = VecView(cellCenters,binaryviewer);CHKERRQ(ierr); ierr = VecGetArray(cellCenters,&cc);CHKERRQ(ierr); for (i=0; i<data.numCells; i++) { cc[3*i] = data.cellIds[i]; cc[3*i+1] = data.cellVols[i]; cc[3*i+2] = 0.0; } ierr = VecRestoreArray(cellCenters,&cc);CHKERRQ(ierr); ierr = VecView(cellCenters,binaryviewer);CHKERRQ(ierr); ierr = PetscFree5(data.cellIds, data.cellVols, data.cellX, data.cellY, data.cellZ);CHKERRQ(ierr); ierr = VecDestroy(cellCenters); ierr = PetscViewerDestroy(binaryviewer);CHKERRQ(ierr); ierr = PetscFinalize();CHKERRQ(ierr); PetscFunctionReturn(0); }

static PetscErrorCode KSPSolve_CGS(KSP ksp) { PetscErrorCode ierr; PetscInt i; PetscScalar rho,rhoold,a,s,b; Vec X,B,V,P,R,RP,T,Q,U,AUQ; PetscReal dp = 0.0; PetscBool diagonalscale; PetscFunctionBegin; /* not sure what residual norm it does use, should use for right preconditioning */ ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); X = ksp->vec_sol; B = ksp->vec_rhs; R = ksp->work[0]; RP = ksp->work[1]; V = ksp->work[2]; T = ksp->work[3]; Q = ksp->work[4]; P = ksp->work[5]; U = ksp->work[6]; AUQ = V; /* Compute initial preconditioned residual */ ierr = KSPInitialResidual(ksp,X,V,T,R,B);CHKERRQ(ierr); /* Test for nothing to do */ ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); KSPCheckNorm(ksp,dp); if (ksp->normtype == KSP_NORM_NATURAL) dp *= dp; ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = dp; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,dp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); /* Make the initial Rp == R */ ierr = VecCopy(R,RP);CHKERRQ(ierr); /* added for Fidap */ /* Penalize Startup - Isaac Hasbani Trick for CGS Since most initial conditions result in a mostly 0 residual, we change all the 0 values in the vector RP to the maximum. */ if (ksp->normtype == KSP_NORM_NATURAL) { PetscReal vr0max; PetscScalar *tmp_RP=0; PetscInt numnp =0, *max_pos=0; ierr = VecMax(RP, max_pos, &vr0max);CHKERRQ(ierr); ierr = VecGetArray(RP, &tmp_RP);CHKERRQ(ierr); ierr = VecGetLocalSize(RP, &numnp);CHKERRQ(ierr); for (i=0; i<numnp; i++) { if (tmp_RP[i] == 0.0) tmp_RP[i] = vr0max; } ierr = VecRestoreArray(RP, &tmp_RP);CHKERRQ(ierr); } /* end of addition for Fidap */ /* Set the initial conditions */ ierr = VecDot(R,RP,&rhoold);CHKERRQ(ierr); /* rhoold = (r,rp) */ ierr = VecCopy(R,U);CHKERRQ(ierr); ierr = VecCopy(R,P);CHKERRQ(ierr); ierr = KSP_PCApplyBAorAB(ksp,P,V,T);CHKERRQ(ierr); i = 0; do { ierr = VecDot(V,RP,&s);CHKERRQ(ierr); /* s <- (v,rp) */ KSPCheckDot(ksp,s); a = rhoold / s; /* a <- rho / s */ ierr = VecWAXPY(Q,-a,V,U);CHKERRQ(ierr); /* q <- u - a v */ ierr = VecWAXPY(T,1.0,U,Q);CHKERRQ(ierr); /* t <- u + q */ ierr = VecAXPY(X,a,T);CHKERRQ(ierr); /* x <- x + a (u + q) */ ierr = KSP_PCApplyBAorAB(ksp,T,AUQ,U);CHKERRQ(ierr); ierr = VecAXPY(R,-a,AUQ);CHKERRQ(ierr); /* r <- r - a K (u + q) */ ierr = VecDot(R,RP,&rho);CHKERRQ(ierr); /* rho <- (r,rp) */ KSPCheckDot(ksp,rho); if (ksp->normtype == KSP_NORM_NATURAL) { dp = PetscAbsScalar(rho); } else { ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr); KSPCheckNorm(ksp,dp); } ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its++; ksp->rnorm = dp; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i+1,dp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; b = rho / rhoold; /* b <- rho / rhoold */ ierr = VecWAXPY(U,b,Q,R);CHKERRQ(ierr); /* u <- r + b q */ ierr = VecAXPY(Q,b,P);CHKERRQ(ierr); ierr = VecWAXPY(P,b,Q,U);CHKERRQ(ierr); /* p <- u + b(q + b p) */ ierr = KSP_PCApplyBAorAB(ksp,P,V,Q);CHKERRQ(ierr); /* v <- K p */ rhoold = rho; i++; } while (i<ksp->max_it); if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; ierr = KSPUnwindPreconditioner(ksp,X,T);CHKERRQ(ierr); PetscFunctionReturn(0); }

PetscErrorCode SetInitialGuess(Vec X,AppCtx *user) { PetscErrorCode ierr; PetscInt n,i,Mda; PetscScalar *xx,*cv_p,*wv_p,*eta_p; PetscViewer view_out; /* needed for the void growth case */ PetscScalar xmid,cv_v=1.0,cv_m=user->Sv*user->cv0,eta_v=1.0,eta_m=0.0,h,lambda; PetscInt nele,nen,idx[2]; const PetscInt *ele; PetscScalar x[2]; Vec coords; const PetscScalar *_coords; PetscScalar xwidth = user->xmax - user->xmin; PetscFunctionBeginUser; ierr = VecGetLocalSize(X,&n);CHKERRQ(ierr); ierr = DMDAGetInfo(user->da2,NULL,&Mda,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); ierr = DMGetCoordinatesLocal(user->da2,&coords);CHKERRQ(ierr); ierr = VecGetArrayRead(coords,&_coords);CHKERRQ(ierr); if (user->periodic) h = (user->xmax-user->xmin)/Mda; else h = (user->xmax-user->xmin)/(Mda-1.0); xmid = (user->xmax + user->xmin)/2.0; lambda = 4.0*h; ierr = DMDAGetElements(user->da2,&nele,&nen,&ele);CHKERRQ(ierr); for (i=0; i < nele; i++) { idx[0] = ele[2*i]; idx[1] = ele[2*i+1]; x[0] = _coords[idx[0]]; x[1] = _coords[idx[1]]; PetscInt k; PetscScalar vals_DDcv[2],vals_cv[2],vals_eta[2],s,hhr,r; for (k=0; k < 2; k++) { s = PetscAbs(x[k] - xmid); if (s <= xwidth*(5.0/64.0)) { vals_cv[k] = cv_v; vals_eta[k] = eta_v; vals_DDcv[k] = 0.0; } else if (s> xwidth*(5.0/64.0) && s<= xwidth*(7.0/64.0)) { /*r = (s - xwidth*(6.0/64.0))/(0.5*lambda);*/ r = (s - xwidth*(6.0/64.0))/(xwidth/64.0); hhr = 0.25*(-r*r*r + 3*r + 2); vals_cv[k] = cv_m + (1.0 - hhr)*(cv_v - cv_m); vals_eta[k] = eta_m + (1.0 - hhr)*(eta_v - eta_m); vals_DDcv[k] = (cv_v - cv_m)*r*6.0/(lambda*lambda); } else { vals_cv[k] = cv_m; vals_eta[k] = eta_m; vals_DDcv[k] = 0.0; } } ierr = VecSetValuesLocal(user->cv,2,idx,vals_cv,INSERT_VALUES);CHKERRQ(ierr); ierr = VecSetValuesLocal(user->eta,2,idx,vals_eta,INSERT_VALUES);CHKERRQ(ierr); ierr = VecSetValuesLocal(user->work2,2,idx,vals_DDcv,INSERT_VALUES);CHKERRQ(ierr); } ierr = DMDARestoreElements(user->da2,&nele,&nen,&ele);CHKERRQ(ierr); ierr = VecRestoreArrayRead(coords,&_coords);CHKERRQ(ierr); ierr = VecAssemblyBegin(user->cv);CHKERRQ(ierr); ierr = VecAssemblyEnd(user->cv);CHKERRQ(ierr); ierr = VecAssemblyBegin(user->eta);CHKERRQ(ierr); ierr = VecAssemblyEnd(user->eta);CHKERRQ(ierr); ierr = VecAssemblyBegin(user->work2);CHKERRQ(ierr); ierr = VecAssemblyEnd(user->work2);CHKERRQ(ierr); ierr = DPsi(user);CHKERRQ(ierr); ierr = VecCopy(user->DPsiv,user->wv);CHKERRQ(ierr); ierr = VecAXPY(user->wv,-2.0*user->kav,user->work2);CHKERRQ(ierr); ierr = VecGetArray(X,&xx);CHKERRQ(ierr); ierr = VecGetArray(user->wv,&wv_p);CHKERRQ(ierr); ierr = VecGetArray(user->cv,&cv_p);CHKERRQ(ierr); ierr = VecGetArray(user->eta,&eta_p);CHKERRQ(ierr); for (i=0; i<n/3; i++) { xx[3*i] =wv_p[i]; xx[3*i+1]=cv_p[i]; xx[3*i+2]=eta_p[i]; } ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,"file_initial",FILE_MODE_WRITE,&view_out);CHKERRQ(ierr); ierr = VecView(user->wv,view_out);CHKERRQ(ierr); ierr = VecView(user->cv,view_out);CHKERRQ(ierr); ierr = VecView(user->eta,view_out);CHKERRQ(ierr); ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr); ierr = VecRestoreArray(X,&xx);CHKERRQ(ierr); ierr = VecRestoreArray(user->wv,&wv_p);CHKERRQ(ierr); ierr = VecRestoreArray(user->cv,&cv_p);CHKERRQ(ierr); ierr = VecRestoreArray(user->eta,&eta_p);CHKERRQ(ierr); PetscFunctionReturn(0); }

PetscErrorCode ComputeJacobian_LS(DM dm, Vec locX, PetscInt cell, PetscScalar CellValues[], void *ctx) { User user = (User) ctx; Physics phys = user->model->physics; PetscInt dof = phys->dof; const PetscScalar *facegeom, *cellgeom,*x; PetscErrorCode ierr; DM dmFace, dmCell; DM dmGrad = user->dmGrad; PetscInt fStart, fEnd, face, cStart; Vec locGrad, locGradLimiter, Grad; /*here the localGradLimiter refers to the gradient that has been multiplied by the limiter function. The locGradLimiter is used to construct the uL and uR, and the locGrad is used to caculate the diffusion term*/ Vec TempVec; /*a temperal vec for the vector restore*/ PetscFunctionBeginUser; ierr = VecGetDM(user->facegeom,&dmFace);CHKERRQ(ierr); ierr = VecGetDM(user->cellgeom,&dmCell);CHKERRQ(ierr); ierr = DMGetGlobalVector(dmGrad,&Grad);CHKERRQ(ierr); ierr = VecDuplicate(Grad, &TempVec);CHKERRQ(ierr); ierr = VecCopy(Grad, TempVec);CHKERRQ(ierr); /*Backup the original vector and use it to restore the value of dmGrad, because I do not want to change the values of the cell gradient*/ ierr = VecGetArrayRead(user->facegeom,&facegeom);CHKERRQ(ierr); ierr = VecGetArrayRead(user->cellgeom,&cellgeom);CHKERRQ(ierr); ierr = VecGetArrayRead(locX,&x);CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd);CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm, 0, &cStart, NULL);CHKERRQ(ierr); { PetscScalar *grad; ierr = VecGetArray(Grad,&grad);CHKERRQ(ierr); /* Limit interior gradients. Using cell-based loop because it generalizes better to vector limiters. */ const PetscInt *faces; PetscInt numFaces,f; PetscReal *cellPhi; /* Scalar limiter applied to each component separately */ const PetscScalar *cx; const CellGeom *cg; PetscScalar *cgrad; PetscInt i; ierr = PetscMalloc(phys->dof*sizeof(PetscScalar),&cellPhi);CHKERRQ(ierr); ierr = DMPlexGetConeSize(dm,cell,&numFaces);CHKERRQ(ierr); ierr = DMPlexGetCone(dm,cell,&faces);CHKERRQ(ierr); ierr = DMPlexPointLocalRead(dm,cell,x,&cx);CHKERRQ(ierr); ierr = DMPlexPointLocalRead(dmCell,cell,cellgeom,&cg);CHKERRQ(ierr); ierr = DMPlexPointGlobalRef(dmGrad,cell,grad,&cgrad);CHKERRQ(ierr); /* Limiter will be minimum value over all neighbors */ for (i=0; i<dof; i++) { cellPhi[i] = PETSC_MAX_REAL; } for (f=0; f<numFaces; f++) { const PetscScalar *ncx; const CellGeom *ncg; const PetscInt *fcells; PetscInt face = faces[f],ncell; PetscScalar v[DIM]; PetscBool ghost; ierr = IsExteriorGhostFace(dm,face,&ghost);CHKERRQ(ierr); if (ghost) continue; ierr = DMPlexGetSupport(dm,face,&fcells);CHKERRQ(ierr); ncell = cell == fcells[0] ? fcells[1] : fcells[0]; /*The expression (x ? y : z) has the value of y if x is nonzero, z otherwise */ ierr = DMPlexPointLocalRead(dm,ncell,x,&ncx);CHKERRQ(ierr); ierr = DMPlexPointLocalRead(dmCell,ncell,cellgeom,&ncg);CHKERRQ(ierr); Waxpy2(-1, cg->centroid, ncg->centroid, v); for (i=0; i<dof; i++) { /* We use the symmetric slope limited form of Berger, Aftosmis, and Murman 2005 */ PetscScalar phi,flim = 0.5 * (ncx[i] - cx[i]) / Dot2(&cgrad[i*DIM],v); phi = (*user->LimitGrad)(flim); cellPhi[i] = PetscMin(cellPhi[i],phi); } } /* Apply limiter to gradient */ for (i=0; i<dof; i++) Scale2(cellPhi[i],&cgrad[i*DIM],&cgrad[i*DIM]); ierr = PetscFree(cellPhi);CHKERRQ(ierr); ierr = VecRestoreArray(Grad,&grad);CHKERRQ(ierr); } ierr = DMGetLocalVector(dmGrad,&locGradLimiter);CHKERRQ(ierr); ierr = DMGlobalToLocalBegin(dmGrad,Grad,INSERT_VALUES,locGradLimiter);CHKERRQ(ierr); ierr = DMGlobalToLocalEnd(dmGrad,Grad,INSERT_VALUES,locGradLimiter);CHKERRQ(ierr); ierr = VecCopy(TempVec, Grad);CHKERRQ(ierr);/*Restore the vector*/ ierr = DMGetLocalVector(dmGrad,&locGrad);CHKERRQ(ierr); ierr = DMGlobalToLocalBegin(dmGrad,Grad,INSERT_VALUES,locGrad);CHKERRQ(ierr); ierr = DMGlobalToLocalEnd(dmGrad,Grad,INSERT_VALUES,locGrad);CHKERRQ(ierr); ierr = DMRestoreGlobalVector(dmGrad,&Grad);CHKERRQ(ierr); ierr = VecDestroy(&TempVec);CHKERRQ(ierr); { const PetscScalar *grad, *gradlimiter; ierr = VecGetArrayRead(locGrad,&grad);CHKERRQ(ierr); ierr = VecGetArrayRead(locGradLimiter,&gradlimiter);CHKERRQ(ierr); for (face=fStart; face<fEnd; face++) { const PetscInt *cells; PetscInt ghost,i,j; PetscScalar *fluxcon, *fluxdiff, *fx[2]; const FaceGeom *fg; const CellGeom *cg[2]; const PetscScalar *cx[2],*cgrad[2], *cgradlimiter[2]; PetscScalar *uL, *uR; PetscReal FaceArea; ierr = PetscMalloc(phys->dof * phys->dof * sizeof(PetscScalar), &fluxcon);CHKERRQ(ierr); /*For the convection terms*/ ierr = PetscMalloc(phys->dof * phys->dof * sizeof(PetscScalar), &fluxdiff);CHKERRQ(ierr); /*For the diffusion terms*/ ierr = PetscMalloc(phys->dof * sizeof(PetscScalar), &uL);CHKERRQ(ierr); ierr = PetscMalloc(phys->dof * sizeof(PetscScalar), &uR);CHKERRQ(ierr); fx[0] = uL; fx[1] = uR; ierr = DMPlexGetLabelValue(dm, "ghost", face, &ghost);CHKERRQ(ierr); if (ghost >= 0) continue; ierr = DMPlexGetSupport(dm, face, &cells);CHKERRQ(ierr); ierr = DMPlexPointLocalRead(dmFace,face,facegeom,&fg);CHKERRQ(ierr); for (i=0; i<2; i++) { PetscScalar dx[DIM]; ierr = DMPlexPointLocalRead(dmCell,cells[i],cellgeom,&cg[i]);CHKERRQ(ierr); ierr = DMPlexPointLocalRead(dm,cells[i],x,&cx[i]);CHKERRQ(ierr); ierr = DMPlexPointLocalRead(dmGrad,cells[i],gradlimiter,&cgradlimiter[i]);CHKERRQ(ierr); ierr = DMPlexPointLocalRead(dmGrad,cells[i],grad,&cgrad[i]);CHKERRQ(ierr); Waxpy2(-1,cg[i]->centroid,fg->centroid,dx); for (j=0; j<dof; j++) { fx[i][j] = cx[i][j] + Dot2(cgradlimiter[i],dx); } /*fx[0] and fx[1] are the value of the variables on the left and right side of the face, respectively, that is u_L and u_R.*/ } ierr = RiemannSolver_Rusanov_Jacobian(user, cgrad[0], cgrad[1], fg->centroid, cg[0]->centroid, cg[1]->centroid, fg->normal, fx[0], fx[1], fluxcon, fluxdiff);CHKERRQ(ierr); ierr = DMPlexComputeCellGeometryFVM(dm, face, &FaceArea, NULL, NULL);CHKERRQ(ierr); /*Compute the face area*/ for (i=0; i<phys->dof; i++) { for (j=0; j<phys->dof; j++) { if(cells[0]<user->cEndInterior) CellValues[cells[0]*dof*dof + i*dof + j] -= cells[0]*1.0; if(cells[1]<user->cEndInterior) CellValues[cells[1]*dof*dof + i*dof + j] += cells[1]*1.2; } } // ierr = PetscPrintf(PETSC_COMM_WORLD,"\n");CHKERRQ(ierr); ierr = PetscFree(fluxcon);CHKERRQ(ierr); ierr = PetscFree(fluxdiff);CHKERRQ(ierr); ierr = PetscFree(uL);CHKERRQ(ierr); ierr = PetscFree(uR);CHKERRQ(ierr); } ierr = VecRestoreArrayRead(locGrad,&grad);CHKERRQ(ierr); ierr = VecRestoreArrayRead(locGradLimiter,&gradlimiter);CHKERRQ(ierr); } ierr = VecRestoreArrayRead(user->facegeom,&facegeom);CHKERRQ(ierr); ierr = VecRestoreArrayRead(user->cellgeom,&cellgeom);CHKERRQ(ierr); ierr = VecRestoreArrayRead(locX,&x);CHKERRQ(ierr); ierr = DMRestoreLocalVector(dmGrad,&locGradLimiter);CHKERRQ(ierr); ierr = DMRestoreLocalVector(dmGrad,&locGrad);CHKERRQ(ierr); PetscFunctionReturn(0); }

void getForces(Vec params, std::vector<Vec> &forces, DA da, timeInfo ti, int numParams) { #ifdef __DEBUG__ std::cout << RED"Entering "NRM << __func__ << std::endl; #endif // Generate the force vector based on the current parameters ... // F = Sum { B_i a_i} PetscScalar * pVec; VecGetArray(params, &pVec); // Clear the Forces for (unsigned int i=0; i<forces.size(); i++) { if (forces[i] != NULL) { VecDestroy(forces[i]); } } forces.clear(); unsigned int numSteps = (unsigned int)(ceil(( ti.stop - ti.start)/ti.step)); // create and initialize to 0 for (unsigned int i=0; i<numSteps+1; i++) { Vec tmp; DACreateGlobalVector(da, &tmp); VecZeroEntries(tmp); forces.push_back(tmp); } //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PetscScalar ***tauArray; unsigned int paramTimeSteps = (unsigned int)(ceil(( (double)(numSteps))/ ((double)(2*numParams)) )); double acx,acy,acz; int x, y, z, m, n, p; int mx,my,mz; DAGetCorners(da, &x, &y, &z, &m, &n, &p); DAGetInfo(da,0, &mx, &my, &mz, 0,0,0,0,0,0,0); double hx = 1.0/(mx-1.0); for (int b=0; b<numParams; b++) { std::vector<Vec> tau; unsigned int tBegin = paramTimeSteps*b; unsigned int tEnd = tBegin + numSteps/2; // paramTimeSteps*(b+2); // std::cout << "For param " << b << ": Time step range is " << tBegin << " -> " << tEnd << std::endl; for (unsigned int t=0; t<numSteps+1; t++) { double newTime = (ti.step*(t-tBegin)*numSteps)/((double)(paramTimeSteps)); if ( (t>=tBegin) && (t<=tEnd)) { DAVecGetArray(da, forces[t], &tauArray); for (int k = z; k < z + p ; k++) { for (int j = y; j < y + n; j++) { for (int i = x; i < x + m; i++) { acx = (i)*hx; acy = (j)*hx; acz = (k)*hx; tauArray[k][j][i] += pVec[b]*sin(M_PI*newTime)*cos(2*M_PI*acx)*cos(2*M_PI*acy)*cos(2*M_PI*acz); } } } DAVecRestoreArray ( da, forces[t], &tauArray ) ; } } } VecRestoreArray(params, &pVec); #ifdef __DEBUG__ // Get the norms of the forces ... just to be safe .. double fNorm1, fNorm2; for (unsigned int i=0; i<forces.size(); i++) { VecNorm(forces[i], NORM_INFINITY, &fNorm1); VecNorm(forces[i], NORM_2, &fNorm2); PetscPrintf(0, "Force Norms at timestep %d are %g and %g\n", i, fNorm1, fNorm2); } #endif #ifdef __DEBUG__ std::cout << GRN"Leaving "NRM << __func__ << std::endl; #endif }

/** Compute the gadient of the cell center gradient obtained by the least-square method */ PetscErrorCode GradientGradientJacobian(DM dm, Vec locX, PetscScalar elemMat[], void *ctx) { User user = (User) ctx; Physics phys = user->model->physics; PetscInt dof = phys->dof; const PetscScalar *facegeom, *cellgeom,*x; PetscErrorCode ierr; DM dmFace, dmCell; DM dmGrad = user->dmGrad; PetscInt fStart, fEnd, face, cStart; Vec Grad; /*here the localGradLimiter refers to the gradient that has been multiplied by the limiter function. The locGradLimiter is used to construct the uL and uR, and the locGrad is used to caculate the diffusion term*/ Vec TempVec; /*a temperal vec for the vector restore*/ PetscFunctionBeginUser; ierr = VecGetDM(user->facegeom,&dmFace);CHKERRQ(ierr); ierr = VecGetDM(user->cellgeom,&dmCell);CHKERRQ(ierr); ierr = DMGetGlobalVector(dmGrad,&Grad);CHKERRQ(ierr); ierr = VecZeroEntries(Grad);CHKERRQ(ierr); ierr = VecDuplicate(Grad, &TempVec);CHKERRQ(ierr); ierr = VecCopy(Grad, TempVec);CHKERRQ(ierr); ierr = VecGetArrayRead(user->facegeom,&facegeom);CHKERRQ(ierr); ierr = VecGetArrayRead(user->cellgeom,&cellgeom);CHKERRQ(ierr); ierr = VecGetArrayRead(locX,&x);CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd);CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm, 0, &cStart, NULL);CHKERRQ(ierr); { PetscScalar *grad; ierr = VecGetArray(TempVec,&grad);CHKERRQ(ierr); /* Reconstruct gradients */ for (face=fStart; face<fEnd; face++) { const PetscInt *cells; const PetscScalar *cx[2]; const FaceGeom *fg; PetscScalar *cgrad[2]; PetscInt i,j; PetscBool ghost; ierr = IsExteriorGhostFace(dm,face,&ghost);CHKERRQ(ierr); if (ghost) continue; ierr = DMPlexGetSupport(dm,face,&cells);CHKERRQ(ierr); ierr = DMPlexPointLocalRead(dmFace,face,facegeom,&fg);CHKERRQ(ierr); for (i=0; i<2; i++) { ierr = DMPlexPointLocalRead(dm,cells[i],x,&cx[i]);CHKERRQ(ierr); ierr = DMPlexPointGlobalRef(dmGrad,cells[i],grad,&cgrad[i]);CHKERRQ(ierr); } for (i=0; i<dof; i++) { PetscScalar delta = cx[1][i] - cx[0][i]; for (j=0; j<DIM; j++) { if (cgrad[0]) cgrad[0][i*DIM+j] += fg->grad[0][j] * delta; if (cgrad[1]) cgrad[1][i*DIM+j] -= fg->grad[1][j] * delta; } } for (i=0; i<phys->dof; i++) { for (j=0; j<phys->dof; j++) { if(cells[0]<user->cEndInterior) elemMat[cells[0]*dof*dof + i*dof + j] -= cells[0]*1.0; if(cells[1]<user->cEndInterior) elemMat[cells[1]*dof*dof + i*dof + j] += cells[1]*1.2; } } } ierr = VecRestoreArray(TempVec,&grad);CHKERRQ(ierr); } ierr = DMRestoreGlobalVector(dmGrad,&Grad);CHKERRQ(ierr); ierr = VecRestoreArrayRead(user->facegeom,&facegeom);CHKERRQ(ierr); ierr = VecRestoreArrayRead(user->cellgeom,&cellgeom);CHKERRQ(ierr); ierr = VecRestoreArrayRead(locX,&x);CHKERRQ(ierr); PetscFunctionReturn(0); }

/* FormFunction - Evaluates nonlinear function, F(x). Input Parameters: . snes - the SNES context . X - input vector . ptr - optional user-defined context, as set by SNESSetFunction() Output Parameter: . F - function vector */ int FormFunction(SNES snes,Vec X,Vec F,void *ptr) { AppCtx *user = (AppCtx*)ptr; int ierr,i,j,row,mx,my; PetscReal two = 2.0,one = 1.0,lambda,hx,hy,hxdhy,hydhx,sc; PetscScalar u,uxx,uyy,*x,*f; /* Process 0 has to wait for all other processes to get here before proceeding to write in the shared vector */ ierr = PetscBarrier((PetscObject)X);CHKERRQ(ierr); if (user->rank) { /* All the non-busy processors have to wait here for process 0 to finish evaluating the function; otherwise they will start using the vector values before they have been computed */ ierr = PetscBarrier((PetscObject)X);CHKERRQ(ierr); return 0; } mx = user->mx; my = user->my; lambda = user->param; hx = one/(PetscReal)(mx-1); hy = one/(PetscReal)(my-1); sc = hx*hy*lambda; hxdhy = hx/hy; hydhx = hy/hx; /* Get pointers to vector data */ ierr = VecGetArray(X,&x);CHKERRQ(ierr); ierr = VecGetArray(F,&f);CHKERRQ(ierr); /* The next line tells the SGI compiler that x and f contain no overlapping regions and thus it can use addition optimizations. */ #pragma arl(4) #pragma distinct (*x,*f) #pragma no side effects (exp) /* Compute function over the entire grid */ for (j=0; j<my; j++) { for (i=0; i<mx; i++) { row = i + j*mx; if (i == 0 || j == 0 || i == mx-1 || j == my-1) { f[row] = x[row]; continue; } u = x[row]; uxx = (two*u - x[row-1] - x[row+1])*hydhx; uyy = (two*u - x[row-mx] - x[row+mx])*hxdhy; f[row] = uxx + uyy - sc*exp(u); } } /* Restore vectors */ ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); ierr = VecRestoreArray(F,&f);CHKERRQ(ierr); ierr = PetscLogFlops(11.0*(mx-2)*(my-2))CHKERRQ(ierr); ierr = PetscBarrier((PetscObject)X);CHKERRQ(ierr); return 0; }

PetscErrorCode ApplyBC(DM dm, PetscReal time, Vec locX, User user) { const char *name = "Face Sets"; /*Set up in the function DMPlexCreateExodus. is the side set*/ DM dmFace; IS idIS; const PetscInt *ids; PetscScalar *x; const PetscScalar *facegeom; PetscInt numFS, fs; PetscErrorCode ierr; PetscMPIInt rank; PetscFunctionBeginUser; ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = VecGetDM(user->facegeom,&dmFace);CHKERRQ(ierr); ierr = DMPlexGetLabelIdIS(dm, name, &idIS);CHKERRQ(ierr); // ISView(idIS, PETSC_VIEWER_STDOUT_SELF); if (!idIS) PetscFunctionReturn(0); ierr = ISGetLocalSize(idIS, &numFS);CHKERRQ(ierr); ierr = ISGetIndices(idIS, &ids);CHKERRQ(ierr); ierr = VecGetArrayRead(user->facegeom, &facegeom);CHKERRQ(ierr); ierr = VecGetArray(locX, &x);CHKERRQ(ierr); for (fs = 0; fs < numFS; ++fs) { IS faceIS; const PetscInt *faces; PetscInt numFaces, f; ierr = DMPlexGetStratumIS(dm, name, ids[fs], &faceIS);CHKERRQ(ierr); ierr = ISGetLocalSize(faceIS, &numFaces);CHKERRQ(ierr); ierr = ISGetIndices(faceIS, &faces);CHKERRQ(ierr); for (f = 0; f < numFaces; ++f) { // PetscPrintf(PETSC_COMM_SELF, "rank[%d]: ids[%d] = %d, faceIS[%d] = %d, numFaces = %d\n", rank, fs, ids[fs], f, faces[f], numFaces); const PetscInt face = faces[f], *cells; const PetscScalar *xI; /*Inner point*/ PetscScalar *xG; /*Ghost point*/ const FaceGeom *fg; ierr = DMPlexPointLocalRead(dmFace, face, facegeom, &fg);CHKERRQ(ierr); ierr = DMPlexGetSupport(dm, face, &cells);CHKERRQ(ierr); ierr = DMPlexPointLocalRead(dm, cells[0], x, &xI);CHKERRQ(ierr); ierr = DMPlexPointLocalRef(dm, cells[1], x, &xG);CHKERRQ(ierr); if (ids[fs]==1){ //PetscPrintf(PETSC_COMM_SELF, "Set Inlfow Boundary Condition! \n"); ierr = BoundaryInflow(time, fg->centroid, fg->normal, xI, xG, user);CHKERRQ(ierr); // DM dmCell; // const PetscScalar *cellgeom; // const CellGeom *cgL, *cgR; // ierr = VecGetDM(user->cellgeom,&dmCell);CHKERRQ(ierr); // ierr = VecGetArrayRead(user->cellgeom, &cellgeom);CHKERRQ(ierr); // ierr = DMPlexPointLocalRead(dmCell, cells[0], cellgeom, &cgL);CHKERRQ(ierr); // ierr = DMPlexPointLocalRead(dmCell, cells[1], cellgeom, &cgR);CHKERRQ(ierr); // ierr = PetscPrintf(PETSC_COMM_WORLD,"cells[0] = (%f, %f, %f), cells[1] = (%f, %f, %f)\n",cgL->centroid[0], cgL->centroid[1], cgL->centroid[2],cgR->centroid[0], cgR->centroid[1], cgR->centroid[2]);CHKERRQ(ierr); }else if (ids[fs]==2){ //PetscPrintf(PETSC_COMM_SELF, "Set Outlfow Boundary Condition! \n"); ierr = BoundaryOutflow(time, fg->centroid, fg->normal, xI, xG, user);CHKERRQ(ierr); }else if (ids[fs]==3){ //PetscPrintf(PETSC_COMM_SELF, "Set Wall Boundary Condition! \n"); ierr = BoundaryWallflow(time, fg->centroid, fg->normal, xI, xG, user);CHKERRQ(ierr); }else { SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Wrong type of boundary condition setup!!! \n The set up of the boundary should be: 1 for the inflow, 2 for the outflow, and 3 for the wallflow"); } } // PetscPrintf(PETSC_COMM_SELF, " \n"); ierr = ISRestoreIndices(faceIS, &faces);CHKERRQ(ierr); ierr = ISDestroy(&faceIS);CHKERRQ(ierr); } ierr = VecRestoreArray(locX, &x);CHKERRQ(ierr); ierr = VecRestoreArrayRead(user->facegeom,&facegeom);CHKERRQ(ierr); ierr = ISRestoreIndices(idIS, &ids);CHKERRQ(ierr); ierr = ISDestroy(&idIS);CHKERRQ(ierr); PetscFunctionReturn(0); }

/* FormFunction - Evaluates the function and corresponding gradient. Input Parameters: tao - the Tao context X - the input vector ptr - optional user-defined context, as set by TaoSetObjectiveAndGradientRoutine() Output Parameters: f - the newly evaluated function */ PetscErrorCode FormFunction(Tao tao,Vec P,PetscReal *f,void *ctx0) { AppCtx *ctx = (AppCtx*)ctx0; TS ts; Vec U; /* solution will be stored here */ Mat A; /* Jacobian matrix */ Mat Jacp; /* Jacobian matrix */ PetscErrorCode ierr; PetscInt n = 2; PetscReal ftime; PetscInt steps; PetscScalar *u; PetscScalar *x_ptr,*y_ptr; Vec lambda[1],q,mu[1]; ierr = VecGetArray(P,&x_ptr);CHKERRQ(ierr); ctx->Pm = x_ptr[0]; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create necessary matrix and vectors - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,n,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetType(A,MATDENSE);CHKERRQ(ierr); ierr = MatSetFromOptions(A);CHKERRQ(ierr); ierr = MatSetUp(A);CHKERRQ(ierr); ierr = MatCreateVecs(A,&U,NULL);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&Jacp);CHKERRQ(ierr); ierr = MatSetSizes(Jacp,PETSC_DECIDE,PETSC_DECIDE,2,1);CHKERRQ(ierr); ierr = MatSetFromOptions(Jacp);CHKERRQ(ierr); ierr = MatSetUp(Jacp);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create timestepping solver context - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSCreate(PETSC_COMM_WORLD,&ts);CHKERRQ(ierr); ierr = TSSetProblemType(ts,TS_NONLINEAR);CHKERRQ(ierr); ierr = TSSetType(ts,TSBEULER);CHKERRQ(ierr); ierr = TSSetIFunction(ts,NULL,(TSIFunction) IFunction,ctx);CHKERRQ(ierr); ierr = TSSetIJacobian(ts,A,A,(TSIJacobian)IJacobian,ctx);CHKERRQ(ierr); ierr = TSSetExactFinalTime(ts,TS_EXACTFINALTIME_MATCHSTEP);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set initial conditions - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = VecGetArray(U,&u);CHKERRQ(ierr); u[0] = PetscAsinScalar(ctx->Pm/ctx->Pmax); u[1] = 1.0; ierr = VecRestoreArray(U,&u);CHKERRQ(ierr); ierr = TSSetSolution(ts,U);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Save trajectory of solution so that TSAdjointSolve() may be used - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSSetSaveTrajectory(ts);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set solver options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSSetDuration(ts,PETSC_DEFAULT,10.0);CHKERRQ(ierr); ierr = TSSetInitialTimeStep(ts,0.0,.01);CHKERRQ(ierr); ierr = TSSetFromOptions(ts);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Solve nonlinear system - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSSolve(ts,U);CHKERRQ(ierr); ierr = TSGetSolveTime(ts,&ftime);CHKERRQ(ierr); ierr = TSGetTimeStepNumber(ts,&steps);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Adjoint model starts here - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = MatCreateVecs(A,&lambda[0],NULL);CHKERRQ(ierr); /* Set initial conditions for the adjoint integration */ ierr = VecGetArray(lambda[0],&y_ptr);CHKERRQ(ierr); y_ptr[0] = 0.0; y_ptr[1] = 0.0; ierr = VecRestoreArray(lambda[0],&y_ptr);CHKERRQ(ierr); ierr = MatCreateVecs(Jacp,&mu[0],NULL);CHKERRQ(ierr); ierr = VecGetArray(mu[0],&x_ptr);CHKERRQ(ierr); x_ptr[0] = -1.0; ierr = VecRestoreArray(mu[0],&x_ptr);CHKERRQ(ierr); ierr = TSAdjointSetCostGradients(ts,1,lambda,mu);CHKERRQ(ierr); ierr = TSAdjointSetRHSJacobian(ts,Jacp,RHSJacobianP,ctx);CHKERRQ(ierr); ierr = TSAdjointSetCostIntegrand(ts,1,(PetscErrorCode (*)(TS,PetscReal,Vec,Vec,void*))CostIntegrand, (PetscErrorCode (*)(TS,PetscReal,Vec,Vec*,void*))DRDYFunction, (PetscErrorCode (*)(TS,PetscReal,Vec,Vec*,void*))DRDPFunction,ctx);CHKERRQ(ierr); ierr = TSAdjointSolve(ts);CHKERRQ(ierr); ierr = TSAdjointGetCostIntegral(ts,&q);CHKERRQ(ierr); ierr = ComputeSensiP(lambda[0],mu[0],ctx);CHKERRQ(ierr); ierr = VecGetArray(q,&x_ptr);CHKERRQ(ierr); *f = -ctx->Pm + x_ptr[0]; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Free work space. All PETSc objects should be destroyed when they are no longer needed. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&Jacp);CHKERRQ(ierr); ierr = VecDestroy(&U);CHKERRQ(ierr); ierr = VecDestroy(&lambda[0]);CHKERRQ(ierr); ierr = VecDestroy(&mu[0]);CHKERRQ(ierr); ierr = TSDestroy(&ts);CHKERRQ(ierr); return 0; }

int main(int argc,char **argv) { TS ts; /* ODE integrator */ Vec U; /* solution will be stored here */ Mat A; /* Jacobian matrix */ PetscErrorCode ierr; PetscMPIInt size; PetscInt n = 2; AppCtx ctx; PetscScalar *u; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Initialize program - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size > 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Only for sequential runs"); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create necessary matrix and vectors - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,n,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetFromOptions(A);CHKERRQ(ierr); ierr = MatSetUp(A);CHKERRQ(ierr); ierr = MatCreateVecs(A,&U,NULL);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set runtime options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Reaction options","");CHKERRQ(ierr); { ctx.omega_s = 1.0; ierr = PetscOptionsScalar("-omega_s","","",ctx.omega_s,&ctx.omega_s,NULL);CHKERRQ(ierr); ctx.H = 1.0; ierr = PetscOptionsScalar("-H","","",ctx.H,&ctx.H,NULL);CHKERRQ(ierr); ctx.E = 1.0; ierr = PetscOptionsScalar("-E","","",ctx.E,&ctx.E,NULL);CHKERRQ(ierr); ctx.V = 1.0; ierr = PetscOptionsScalar("-V","","",ctx.V,&ctx.V,NULL);CHKERRQ(ierr); ctx.X = 1.0; ierr = PetscOptionsScalar("-X","","",ctx.X,&ctx.X,NULL);CHKERRQ(ierr); ierr = VecGetArray(U,&u);CHKERRQ(ierr); u[0] = 1; u[1] = .7; ierr = VecRestoreArray(U,&u);CHKERRQ(ierr); ierr = PetscOptionsGetVec(NULL,NULL,"-initial",U,NULL);CHKERRQ(ierr); } ierr = PetscOptionsEnd();CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD,&ctx.rand);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(ctx.rand);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create timestepping solver context - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSCreate(PETSC_COMM_WORLD,&ts);CHKERRQ(ierr); ierr = TSSetProblemType(ts,TS_NONLINEAR);CHKERRQ(ierr); ierr = TSSetType(ts,TSROSW);CHKERRQ(ierr); ierr = TSSetIFunction(ts,NULL,(TSIFunction) IFunction,&ctx);CHKERRQ(ierr); ierr = TSSetIJacobian(ts,A,A,(TSIJacobian)IJacobian,&ctx);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set initial conditions - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSSetSolution(ts,U);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set solver options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSSetDuration(ts,100000,2000.0);CHKERRQ(ierr); ierr = TSSetExactFinalTime(ts,TS_EXACTFINALTIME_STEPOVER);CHKERRQ(ierr); ierr = TSSetInitialTimeStep(ts,0.0,.001);CHKERRQ(ierr); ierr = TSSetFromOptions(ts);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Solve nonlinear system - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSSolve(ts,U);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Free work space. All PETSc objects should be destroyed when they are no longer needed. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = VecDestroy(&U);CHKERRQ(ierr); ierr = TSDestroy(&ts);CHKERRQ(ierr); ierr = PetscRandomDestroy(&ctx.rand);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }

/* PEPBuildDiagonalScaling - compute two diagonal matrices to be applied for balancing in polynomial eigenproblems. */ PetscErrorCode PEPBuildDiagonalScaling(PEP pep) { PetscErrorCode ierr; PetscInt it,i,j,k,nmat,nr,e,nz,lst,lend,nc=0,*cols,emax,emin,emaxl,eminl; const PetscInt *cidx,*ridx; Mat M,*T,A; PetscMPIInt n; PetscBool cont=PETSC_TRUE,flg=PETSC_FALSE; PetscScalar *array,*Dr,*Dl,t; PetscReal l2,d,*rsum,*aux,*csum,w=1.0; MatStructure str; MatInfo info; PetscFunctionBegin; l2 = 2*PetscLogReal(2.0); nmat = pep->nmat; ierr = PetscMPIIntCast(pep->n,&n); ierr = STGetMatStructure(pep->st,&str);CHKERRQ(ierr); ierr = PetscMalloc1(nmat,&T);CHKERRQ(ierr); for (k=0;k<nmat;k++) { ierr = STGetTOperators(pep->st,k,&T[k]);CHKERRQ(ierr); } /* Form local auxiliar matrix M */ ierr = PetscObjectTypeCompareAny((PetscObject)T[0],&cont,MATMPIAIJ,MATSEQAIJ);CHKERRQ(ierr); if (!cont) SETERRQ(PetscObjectComm((PetscObject)T[0]),PETSC_ERR_SUP,"Only for MPIAIJ or SEQAIJ matrix types"); ierr = PetscObjectTypeCompare((PetscObject)T[0],MATMPIAIJ,&cont);CHKERRQ(ierr); if (cont) { ierr = MatMPIAIJGetLocalMat(T[0],MAT_INITIAL_MATRIX,&M);CHKERRQ(ierr); flg = PETSC_TRUE; } else { ierr = MatDuplicate(T[0],MAT_COPY_VALUES,&M);CHKERRQ(ierr); } ierr = MatGetInfo(M,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (i=0;i<nz;i++) { t = PetscAbsScalar(array[i]); array[i] = t*t; } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); for (k=1;k<nmat;k++) { if (flg) { ierr = MatMPIAIJGetLocalMat(T[k],MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); } else { if (str==SAME_NONZERO_PATTERN) { ierr = MatCopy(T[k],A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); } else { ierr = MatDuplicate(T[k],MAT_COPY_VALUES,&A);CHKERRQ(ierr); } } ierr = MatGetInfo(A,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = MatSeqAIJGetArray(A,&array);CHKERRQ(ierr); for (i=0;i<nz;i++) { t = PetscAbsScalar(array[i]); array[i] = t*t; } ierr = MatSeqAIJRestoreArray(A,&array);CHKERRQ(ierr); w *= pep->slambda*pep->slambda*pep->sfactor; ierr = MatAXPY(M,w,A,str);CHKERRQ(ierr); if (flg || str!=SAME_NONZERO_PATTERN || k==nmat-2) { ierr = MatDestroy(&A);CHKERRQ(ierr); } } ierr = MatGetRowIJ(M,0,PETSC_FALSE,PETSC_FALSE,&nr,&ridx,&cidx,&cont);CHKERRQ(ierr); if (!cont) SETERRQ(PetscObjectComm((PetscObject)T[0]), PETSC_ERR_SUP,"It is not possible to compute scaling diagonals for these PEP matrices"); ierr = MatGetInfo(M,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = VecGetOwnershipRange(pep->Dl,&lst,&lend);CHKERRQ(ierr); ierr = PetscMalloc4(nr,&rsum,pep->n,&csum,pep->n,&aux,PetscMin(pep->n-lend+lst,nz),&cols);CHKERRQ(ierr); ierr = VecSet(pep->Dr,1.0);CHKERRQ(ierr); ierr = VecSet(pep->Dl,1.0);CHKERRQ(ierr); ierr = VecGetArray(pep->Dl,&Dl);CHKERRQ(ierr); ierr = VecGetArray(pep->Dr,&Dr);CHKERRQ(ierr); ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); ierr = PetscMemzero(aux,pep->n*sizeof(PetscReal));CHKERRQ(ierr); for (j=0;j<nz;j++) { /* Search non-zero columns outsize lst-lend */ if (aux[cidx[j]]==0 && (cidx[j]<lst || lend<=cidx[j])) cols[nc++] = cidx[j]; /* Local column sums */ aux[cidx[j]] += PetscAbsScalar(array[j]); } for (it=0;it<pep->sits && cont;it++) { emaxl = 0; eminl = 0; /* Column sum */ if (it>0) { /* it=0 has been already done*/ ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); ierr = PetscMemzero(aux,pep->n*sizeof(PetscReal));CHKERRQ(ierr); for (j=0;j<nz;j++) aux[cidx[j]] += PetscAbsScalar(array[j]); ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); } ierr = MPI_Allreduce(aux,csum,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)pep->Dr)); /* Update Dr */ for (j=lst;j<lend;j++) { d = PetscLogReal(csum[j])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); Dr[j-lst] *= d; aux[j] = d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } for (j=0;j<nc;j++) { d = PetscLogReal(csum[cols[j]])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); aux[cols[j]] = d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } /* Scale M */ ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (j=0;j<nz;j++) { array[j] *= aux[cidx[j]]; } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); /* Row sum */ ierr = PetscMemzero(rsum,nr*sizeof(PetscReal));CHKERRQ(ierr); ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (i=0;i<nr;i++) { for (j=ridx[i];j<ridx[i+1];j++) rsum[i] += PetscAbsScalar(array[j]); /* Update Dl */ d = PetscLogReal(rsum[i])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); Dl[i] *= d; /* Scale M */ for (j=ridx[i];j<ridx[i+1];j++) array[j] *= d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); /* Compute global max and min */ ierr = MPI_Allreduce(&emaxl,&emax,1,MPIU_INT,MPIU_MAX,PetscObjectComm((PetscObject)pep->Dl)); ierr = MPI_Allreduce(&eminl,&emin,1,MPIU_INT,MPIU_MIN,PetscObjectComm((PetscObject)pep->Dl)); if (emax<=emin+2) cont = PETSC_FALSE; } ierr = VecRestoreArray(pep->Dr,&Dr);CHKERRQ(ierr); ierr = VecRestoreArray(pep->Dl,&Dl);CHKERRQ(ierr); /* Free memory*/ ierr = MatDestroy(&M);CHKERRQ(ierr); ierr = PetscFree4(rsum,csum,aux,cols);CHKERRQ(ierr); ierr = PetscFree(T);CHKERRQ(ierr); PetscFunctionReturn(0); }

static PetscErrorCode KSPSolve_IBCGS(KSP ksp) { PetscErrorCode ierr; PetscInt i,N; PetscReal rnorm,rnormin = 0.0; #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) /* Because of possible instabilities in the algorithm (as indicated by different residual histories for the same problem on the same number of processes with different runs) we support computing the inner products using Intel's 80 bit arithematic rather than just 64 bit. Thus we copy our double precision values into long doubles (hoping this keeps the 16 extra bits) and tell MPI to do its ALlreduces with MPI_LONG_DOUBLE. Note for developers that does not effect the code. Intel's long double is implemented by storing the 80 bits of extended double precision into a 16 byte space (the rest of the space is ignored) */ long double insums[7],outsums[7]; #else PetscScalar insums[7],outsums[7]; #endif PetscScalar sigman_2, sigman_1, sigman, pin_1, pin, phin_1, phin,tmp1,tmp2; PetscScalar taun_1, taun, rhon, alphan_1, alphan, omegan_1, omegan; const PetscScalar *PETSC_RESTRICT r0, *PETSC_RESTRICT f0, *PETSC_RESTRICT qn, *PETSC_RESTRICT b, *PETSC_RESTRICT un; PetscScalar *PETSC_RESTRICT rn, *PETSC_RESTRICT xn, *PETSC_RESTRICT vn, *PETSC_RESTRICT zn; /* the rest do not have to keep n_1 values */ PetscScalar kappan, thetan, etan, gamman, betan, deltan; const PetscScalar *PETSC_RESTRICT tn; PetscScalar *PETSC_RESTRICT sn; Vec R0,Rn,Xn,F0,Vn,Zn,Qn,Tn,Sn,B,Un; Mat A; PetscFunctionBegin; if (!ksp->vec_rhs->petscnative) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"Only coded for PETSc vectors"); ierr = PCGetOperators(ksp->pc,&A,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); ierr = VecGetLocalSize(ksp->vec_sol,&N);CHKERRQ(ierr); Xn = ksp->vec_sol;ierr = VecGetArray(Xn_1,(PetscScalar**)&xn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Xn_1,PETSC_NULL);CHKERRQ(ierr); B = ksp->vec_rhs;ierr = VecGetArrayRead(B,(const PetscScalar**)&b);ierr = VecRestoreArrayRead(B,PETSC_NULL);CHKERRQ(ierr); R0 = ksp->work[0];ierr = VecGetArrayRead(R0,(const PetscScalar**)&r0);CHKERRQ(ierr);ierr = VecRestoreArrayRead(R0,PETSC_NULL);CHKERRQ(ierr); Rn = ksp->work[1];ierr = VecGetArray(Rn_1,(PetscScalar**)&rn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Rn_1,PETSC_NULL);CHKERRQ(ierr); Un = ksp->work[2];ierr = VecGetArrayRead(Un_1,(const PetscScalar**)&un_1);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Un_1,PETSC_NULL);CHKERRQ(ierr); F0 = ksp->work[3];ierr = VecGetArrayRead(F0,(const PetscScalar**)&f0);CHKERRQ(ierr);ierr = VecRestoreArrayRead(F0,PETSC_NULL);CHKERRQ(ierr); Vn = ksp->work[4];ierr = VecGetArray(Vn_1,(PetscScalar**)&vn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Vn_1,PETSC_NULL);CHKERRQ(ierr); Zn = ksp->work[5];ierr = VecGetArray(Zn_1,(PetscScalar**)&zn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Zn_1,PETSC_NULL);CHKERRQ(ierr); Qn = ksp->work[6];ierr = VecGetArrayRead(Qn_1,(const PetscScalar**)&qn_1);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Qn_1,PETSC_NULL);CHKERRQ(ierr); Tn = ksp->work[7];ierr = VecGetArrayRead(Tn,(const PetscScalar**)&tn);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Tn,PETSC_NULL);CHKERRQ(ierr); Sn = ksp->work[8];ierr = VecGetArrayRead(Sn,(const PetscScalar**)&sn);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Sn,PETSC_NULL);CHKERRQ(ierr); /* r0 = rn_1 = b - A*xn_1; */ /* ierr = KSP_PCApplyBAorAB(ksp,Xn_1,Rn_1,Tn);CHKERRQ(ierr); ierr = VecAYPX(Rn_1,-1.0,B);CHKERRQ(ierr); */ ierr = KSPInitialResidual(ksp,Xn_1,Tn,Sn,Rn_1,B);CHKERRQ(ierr); ierr = VecNorm(Rn_1,NORM_2,&rnorm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,rnorm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,0,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); ierr = VecCopy(Rn_1,R0);CHKERRQ(ierr); /* un_1 = A*rn_1; */ ierr = KSP_PCApplyBAorAB(ksp,Rn_1,Un_1,Tn);CHKERRQ(ierr); /* f0 = A'*rn_1; */ if (ksp->pc_side == PC_RIGHT) { /* B' A' */ ierr = MatMultTranspose(A,R0,Tn);CHKERRQ(ierr); ierr = PCApplyTranspose(ksp->pc,Tn,F0);CHKERRQ(ierr); } else if (ksp->pc_side == PC_LEFT) { /* A' B' */ ierr = PCApplyTranspose(ksp->pc,R0,Tn);CHKERRQ(ierr); ierr = MatMultTranspose(A,Tn,F0);CHKERRQ(ierr); } /*qn_1 = vn_1 = zn_1 = 0.0; */ ierr = VecSet(Qn_1,0.0);CHKERRQ(ierr); ierr = VecSet(Vn_1,0.0);CHKERRQ(ierr); ierr = VecSet(Zn_1,0.0);CHKERRQ(ierr); sigman_2 = pin_1 = taun_1 = 0.0; /* the paper says phin_1 should be initialized to zero, it is actually R0'R0 */ ierr = VecDot(R0,R0,&phin_1);CHKERRQ(ierr); /* sigman_1 = rn_1'un_1 */ ierr = VecDot(R0,Un_1,&sigman_1);CHKERRQ(ierr); alphan_1 = omegan_1 = 1.0; for (ksp->its = 1; ksp->its<ksp->max_it+1; ksp->its++) { rhon = phin_1 - omegan_1*sigman_2 + omegan_1*alphan_1*pin_1; /* if (rhon == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"rhon is zero, iteration %D",n); */ if (ksp->its == 1) deltan = rhon; else deltan = rhon/taun_1; betan = deltan/omegan_1; taun = sigman_1 + betan*taun_1 - deltan*pin_1; if (taun == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"taun is zero, iteration %D",ksp->its); alphan = rhon/taun; ierr = PetscLogFlops(15.0); /* zn = alphan*rn_1 + (alphan/alphan_1)betan*zn_1 - alphan*deltan*vn_1 vn = un_1 + betan*vn_1 - deltan*qn_1 sn = rn_1 - alphan*vn The algorithm in the paper is missing the alphan/alphan_1 term in the zn update */ ierr = PetscLogEventBegin(VEC_Ops,0,0,0,0);CHKERRQ(ierr); tmp1 = (alphan/alphan_1)*betan; tmp2 = alphan*deltan; for (i=0; i<N; i++) { zn[i] = alphan*rn_1[i] + tmp1*zn_1[i] - tmp2*vn_1[i]; vn[i] = un_1[i] + betan*vn_1[i] - deltan*qn_1[i]; sn[i] = rn_1[i] - alphan*vn[i]; } ierr = PetscLogFlops(3.0+11.0*N); ierr = PetscLogEventEnd(VEC_Ops,0,0,0,0);CHKERRQ(ierr); /* qn = A*vn */ ierr = KSP_PCApplyBAorAB(ksp,Vn,Qn,Tn);CHKERRQ(ierr); /* tn = un_1 - alphan*qn */ ierr = VecWAXPY(Tn,-alphan,Qn,Un_1);CHKERRQ(ierr); /* phin = r0'sn pin = r0'qn gamman = f0'sn etan = f0'tn thetan = sn'tn kappan = tn'tn */ ierr = PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); phin = pin = gamman = etan = thetan = kappan = 0.0; for (i=0; i<N; i++) { phin += r0[i]*sn[i]; pin += r0[i]*qn[i]; gamman += f0[i]*sn[i]; etan += f0[i]*tn[i]; thetan += sn[i]*tn[i]; kappan += tn[i]*tn[i]; } ierr = PetscLogFlops(12.0*N); ierr = PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); insums[0] = phin; insums[1] = pin; insums[2] = gamman; insums[3] = etan; insums[4] = thetan; insums[5] = kappan; insums[6] = rnormin; ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) if (ksp->lagnorm && ksp->its > 1) { ierr = MPI_Allreduce(insums,outsums,7,MPI_LONG_DOUBLE,MPI_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); } else { ierr = MPI_Allreduce(insums,outsums,6,MPI_LONG_DOUBLE,MPI_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); } #else if (ksp->lagnorm && ksp->its > 1) { ierr = MPI_Allreduce(insums,outsums,7,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); } else { ierr = MPI_Allreduce(insums,outsums,6,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); } #endif ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); phin = outsums[0]; pin = outsums[1]; gamman = outsums[2]; etan = outsums[3]; thetan = outsums[4]; kappan = outsums[5]; if (ksp->lagnorm && ksp->its > 1) rnorm = PetscSqrtReal(PetscRealPart(outsums[6])); if (kappan == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"kappan is zero, iteration %D",ksp->its); if (thetan == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"thetan is zero, iteration %D",ksp->its); omegan = thetan/kappan; sigman = gamman - omegan*etan; /* rn = sn - omegan*tn xn = xn_1 + zn + omegan*sn */ ierr = PetscLogEventBegin(VEC_Ops,0,0,0,0);CHKERRQ(ierr); rnormin = 0.0; for (i=0; i<N; i++) { rn[i] = sn[i] - omegan*tn[i]; rnormin += PetscRealPart(PetscConj(rn[i])*rn[i]); xn[i] += zn[i] + omegan*sn[i]; } ierr = PetscObjectStateIncrease((PetscObject)Xn);CHKERRQ(ierr); ierr = PetscLogFlops(7.0*N); ierr = PetscLogEventEnd(VEC_Ops,0,0,0,0);CHKERRQ(ierr); if (!ksp->lagnorm && ksp->chknorm < ksp->its) { ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&rnormin,&rnorm,1,MPIU_REAL,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); rnorm = PetscSqrtReal(rnorm); } /* Test for convergence */ ierr = KSPMonitor(ksp,ksp->its,rnorm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,ksp->its,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; /* un = A*rn */ ierr = KSP_PCApplyBAorAB(ksp,Rn,Un,Tn);CHKERRQ(ierr); /* Update n-1 locations with n locations */ sigman_2 = sigman_1; sigman_1 = sigman; pin_1 = pin; phin_1 = phin; alphan_1 = alphan; taun_1 = taun; omegan_1 = omegan; } if (ksp->its >= ksp->max_it) { ksp->reason = KSP_DIVERGED_ITS; } ierr = KSPUnwindPreconditioner(ksp,Xn,Tn);CHKERRQ(ierr); PetscFunctionReturn(0); }