int main(int argc,char **argv) { PetscErrorCode ierr; PetscBool view = PETSC_FALSE, viewsoln = PETSC_FALSE, noprealloc = PETSC_FALSE; char root[256] = "", nodesname[256], issname[256], solnname[256]; UM mesh; unfemCtx user; SNES snes; KSP ksp; PC pc; Mat A; Vec r, u, uexact; double err, h_max; PetscInitialize(&argc,&argv,NULL,help); ierr = PetscLogStageRegister("Read mesh ", &user.readstage); CHKERRQ(ierr); //STRIP ierr = PetscLogStageRegister("Set-up ", &user.setupstage); CHKERRQ(ierr); //STRIP ierr = PetscLogStageRegister("Solver ", &user.solverstage); CHKERRQ(ierr); //STRIP ierr = PetscLogStageRegister("Residual eval ", &user.resstage); CHKERRQ(ierr); //STRIP ierr = PetscLogStageRegister("Jacobian eval ", &user.jacstage); CHKERRQ(ierr); //STRIP user.quaddeg = 1; user.solncase = 0; ierr = PetscOptionsBegin(PETSC_COMM_WORLD, "un_", "options for unfem", ""); CHKERRQ(ierr); ierr = PetscOptionsInt("-case", "exact solution cases: 0=linear, 1=nonlinear, 2=nonhomoNeumann, 3=chapter3, 4=koch", "unfem.c",user.solncase,&(user.solncase),NULL); CHKERRQ(ierr); ierr = PetscOptionsString("-mesh", "file name root of mesh stored in PETSc binary with .vec,.is extensions", "unfem.c",root,root,sizeof(root),NULL); CHKERRQ(ierr); ierr = PetscOptionsInt("-quaddeg", "quadrature degree (1,2,3)", "unfem.c",user.quaddeg,&(user.quaddeg),NULL); CHKERRQ(ierr); ierr = PetscOptionsBool("-view", "view loaded nodes and elements at stdout", "unfem.c",view,&view,NULL); CHKERRQ(ierr); ierr = PetscOptionsBool("-view_solution", "view solution u(x,y) to binary file; uses root name of mesh plus .soln\nsee petsc2tricontour.py to view graphically", "unfem.c",viewsoln,&viewsoln,NULL); CHKERRQ(ierr); ierr = PetscOptionsBool("-noprealloc", "do not perform preallocation before matrix assembly", "unfem.c",noprealloc,&noprealloc,NULL); CHKERRQ(ierr); ierr = PetscOptionsEnd(); CHKERRQ(ierr); // set parameters and exact solution user.a_fcn = &a_lin; user.f_fcn = &f_lin; user.uexact_fcn = &uexact_lin; user.gD_fcn = &gD_lin; user.gN_fcn = &gN_lin; switch (user.solncase) { case 0 : break; case 1 : user.a_fcn = &a_nonlin; user.f_fcn = &f_nonlin; break; case 2 : user.gN_fcn = &gN_linneu; break; case 3 : user.a_fcn = &a_square; user.f_fcn = &f_square; user.uexact_fcn = &uexact_square; user.gD_fcn = &gD_square; user.gN_fcn = NULL; // seg fault if ever called break; case 4 : user.a_fcn = &a_koch; user.f_fcn = &f_koch; user.uexact_fcn = NULL; user.gD_fcn = &gD_koch; user.gN_fcn = NULL; // seg fault if ever called break; default : SETERRQ(PETSC_COMM_WORLD,1,"other solution cases not implemented"); } // determine filenames strcpy(nodesname, root); strncat(nodesname, ".vec", 4); strcpy(issname, root); strncat(issname, ".is", 3); //STARTMAININITIAL PetscLogStagePush(user.readstage); //STRIP // read mesh object of type UM ierr = UMInitialize(&mesh); CHKERRQ(ierr); ierr = UMReadNodes(&mesh,nodesname); CHKERRQ(ierr); ierr = UMReadISs(&mesh,issname); CHKERRQ(ierr); ierr = UMStats(&mesh, &h_max, NULL, NULL, NULL); CHKERRQ(ierr); if (view) { //STRIP PetscViewer stdoutviewer; //STRIP ierr = PetscViewerASCIIGetStdout(PETSC_COMM_WORLD,&stdoutviewer); CHKERRQ(ierr); //STRIP ierr = UMViewASCII(&mesh,stdoutviewer); CHKERRQ(ierr); //STRIP } //STRIP user.mesh = &mesh; PetscLogStagePop(); //STRIP // configure Vecs and SNES PetscLogStagePush(user.setupstage); //STRIP ierr = VecCreate(PETSC_COMM_WORLD,&r); CHKERRQ(ierr); ierr = VecSetSizes(r,PETSC_DECIDE,mesh.N); CHKERRQ(ierr); ierr = VecSetFromOptions(r); CHKERRQ(ierr); ierr = VecDuplicate(r,&u); CHKERRQ(ierr); ierr = VecSet(u,0.0); CHKERRQ(ierr); ierr = SNESCreate(PETSC_COMM_WORLD,&snes); CHKERRQ(ierr); ierr = SNESSetFunction(snes,r,FormFunction,&user); CHKERRQ(ierr); // reset default KSP and PC ierr = SNESGetKSP(snes,&ksp); CHKERRQ(ierr); ierr = KSPSetType(ksp,KSPCG); CHKERRQ(ierr); ierr = KSPGetPC(ksp,&pc); CHKERRQ(ierr); ierr = PCSetType(pc,PCICC); CHKERRQ(ierr); // setup matrix for Picard iteration, including preallocation ierr = MatCreate(PETSC_COMM_WORLD,&A); CHKERRQ(ierr); ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,mesh.N,mesh.N); CHKERRQ(ierr); ierr = MatSetFromOptions(A); CHKERRQ(ierr); ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE); CHKERRQ(ierr); if (noprealloc) { ierr = MatSetUp(A); CHKERRQ(ierr); } else { ierr = Preallocation(A,&user); CHKERRQ(ierr); } ierr = SNESSetJacobian(snes,A,A,FormPicard,&user); CHKERRQ(ierr); ierr = SNESSetFromOptions(snes); CHKERRQ(ierr); PetscLogStagePop(); //STRIP // solve PetscLogStagePush(user.solverstage); //STRIP ierr = SNESSolve(snes,NULL,u);CHKERRQ(ierr); PetscLogStagePop(); //STRIP //ENDMAININITIAL if (viewsoln) { strcpy(solnname, root); strncat(solnname, ".soln", 5); ierr = UMViewSolutionBinary(&mesh,solnname,u); CHKERRQ(ierr); } if (user.uexact_fcn) { // measure error relative to exact solution ierr = VecDuplicate(r,&uexact); CHKERRQ(ierr); ierr = FillExact(uexact,&user); CHKERRQ(ierr); ierr = VecAXPY(u,-1.0,uexact); CHKERRQ(ierr); // u <- u + (-1.0) uexact ierr = VecNorm(u,NORM_INFINITY,&err); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "case %d result for N=%d nodes with h = %.3e : |u-u_ex|_inf = %g\n", user.solncase,mesh.N,h_max,err); CHKERRQ(ierr); VecDestroy(&uexact); } else { ierr = PetscPrintf(PETSC_COMM_WORLD, "case %d completed for N=%d nodes with h = %.3e (no exact solution)\n", user.solncase,mesh.N,h_max); CHKERRQ(ierr); } // clean-up SNESDestroy(&snes); MatDestroy(&A); VecDestroy(&u); VecDestroy(&r); UMDestroy(&mesh); PetscFinalize(); return 0; }
static PetscErrorCode KSPSolve_FBCGSR(KSP ksp) { PetscErrorCode ierr; PetscInt i,j,N; PetscScalar tau,sigma,alpha,omega,beta; PetscReal rho; PetscScalar xi1,xi2,xi3,xi4; Vec X,B,P,P2,RP,R,V,S,T,S2; PetscScalar *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p; PetscScalar *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2; PetscScalar insums[4],outsums[4]; KSP_BCGS *bcgs = (KSP_BCGS*)ksp->data; PC pc; Mat mat; PetscFunctionBegin; if (!ksp->vec_rhs->petscnative) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Only coded for PETSc vectors"); ierr = VecGetLocalSize(ksp->vec_sol,&N);CHKERRQ(ierr); X = ksp->vec_sol; B = ksp->vec_rhs; P2 = ksp->work[0]; /* The followings are involved in modified inner product calculations and vector updates */ RP = ksp->work[1]; ierr = VecGetArray(RP,(PetscScalar**)&rp);CHKERRQ(ierr); ierr = VecRestoreArray(RP,NULL);CHKERRQ(ierr); R = ksp->work[2]; ierr = VecGetArray(R,(PetscScalar**)&r);CHKERRQ(ierr); ierr = VecRestoreArray(R,NULL);CHKERRQ(ierr); P = ksp->work[3]; ierr = VecGetArray(P,(PetscScalar**)&p);CHKERRQ(ierr); ierr = VecRestoreArray(P,NULL);CHKERRQ(ierr); V = ksp->work[4]; ierr = VecGetArray(V,(PetscScalar**)&v);CHKERRQ(ierr); ierr = VecRestoreArray(V,NULL);CHKERRQ(ierr); S = ksp->work[5]; ierr = VecGetArray(S,(PetscScalar**)&s);CHKERRQ(ierr); ierr = VecRestoreArray(S,NULL);CHKERRQ(ierr); T = ksp->work[6]; ierr = VecGetArray(T,(PetscScalar**)&t);CHKERRQ(ierr); ierr = VecRestoreArray(T,NULL);CHKERRQ(ierr); S2 = ksp->work[7]; ierr = VecGetArray(S2,(PetscScalar**)&s2);CHKERRQ(ierr); ierr = VecRestoreArray(S2,NULL);CHKERRQ(ierr); /* Only supports right preconditioning */ if (ksp->pc_side != PC_RIGHT) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"KSP fbcgsr does not support %s",PCSides[ksp->pc_side]); if (!ksp->guess_zero) { if (!bcgs->guess) { ierr = VecDuplicate(X,&bcgs->guess);CHKERRQ(ierr); } ierr = VecCopy(X,bcgs->guess);CHKERRQ(ierr); } else { ierr = VecSet(X,0.0);CHKERRQ(ierr); } /* Compute initial residual */ ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr); ierr = PCSetUp(pc);CHKERRQ(ierr); ierr = PCGetOperators(pc,&mat,NULL);CHKERRQ(ierr); if (!ksp->guess_zero) { ierr = KSP_MatMult(ksp,mat,X,P2);CHKERRQ(ierr); /* P2 is used as temporary storage */ ierr = VecCopy(B,R);CHKERRQ(ierr); ierr = VecAXPY(R,-1.0,P2);CHKERRQ(ierr); } else { ierr = VecCopy(B,R);CHKERRQ(ierr); } /* Test for nothing to do */ ierr = VecNorm(R,NORM_2,&rho);CHKERRQ(ierr); ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = rho; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,rho);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,rho);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,0,rho,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); /* Initialize iterates */ ierr = VecCopy(R,RP);CHKERRQ(ierr); /* rp <- r */ ierr = VecCopy(R,P);CHKERRQ(ierr); /* p <- r */ /* Big loop */ for (i=0; i<ksp->max_it; i++) { /* matmult and pc */ ierr = KSP_PCApply(ksp,P,P2);CHKERRQ(ierr); /* p2 <- K p */ ierr = KSP_MatMult(ksp,mat,P2,V);CHKERRQ(ierr); /* v <- A p2 */ /* inner prodcuts */ if (i==0) { tau = rho*rho; ierr = VecDot(V,RP,&sigma);CHKERRQ(ierr); /* sigma <- (v,rp) */ } else { ierr = PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); tau = sigma = 0.0; for (j=0; j<N; j++) { tau += r[j]*rp[j]; /* tau <- (r,rp) */ sigma += v[j]*rp[j]; /* sigma <- (v,rp) */ } ierr = PetscLogFlops(4.0*N);CHKERRQ(ierr); ierr = PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); insums[0] = tau; insums[1] = sigma; ierr = PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0);CHKERRQ(ierr); ierr = MPIU_Allreduce(insums,outsums,2,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)ksp));CHKERRQ(ierr); ierr = PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0);CHKERRQ(ierr); tau = outsums[0]; sigma = outsums[1]; } /* scalar update */ alpha = tau / sigma; /* vector update */ ierr = VecWAXPY(S,-alpha,V,R);CHKERRQ(ierr); /* s <- r - alpha v */ /* matmult and pc */ ierr = KSP_PCApply(ksp,S,S2);CHKERRQ(ierr); /* s2 <- K s */ ierr = KSP_MatMult(ksp,mat,S2,T);CHKERRQ(ierr); /* t <- A s2 */ /* inner prodcuts */ ierr = PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); xi1 = xi2 = xi3 = xi4 = 0.0; for (j=0; j<N; j++) { xi1 += s[j]*s[j]; /* xi1 <- (s,s) */ xi2 += t[j]*s[j]; /* xi2 <- (t,s) */ xi3 += t[j]*t[j]; /* xi3 <- (t,t) */ xi4 += t[j]*rp[j]; /* xi4 <- (t,rp) */ } ierr = PetscLogFlops(8.0*N);CHKERRQ(ierr); ierr = PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); insums[0] = xi1; insums[1] = xi2; insums[2] = xi3; insums[3] = xi4; ierr = PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0);CHKERRQ(ierr); ierr = MPIU_Allreduce(insums,outsums,4,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)ksp));CHKERRQ(ierr); ierr = PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0);CHKERRQ(ierr); xi1 = outsums[0]; xi2 = outsums[1]; xi3 = outsums[2]; xi4 = outsums[3]; /* test denominator */ if (xi3 == 0.0) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Divide by zero"); if (sigma == 0.0) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Divide by zero"); /* scalar updates */ omega = xi2 / xi3; beta = -xi4 / sigma; rho = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */ /* vector updates */ ierr = VecAXPBYPCZ(X,alpha,omega,1.0,P2,S2);CHKERRQ(ierr); /* x <- alpha * p2 + omega * s2 + x */ /* convergence test */ ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its++; ksp->rnorm = rho; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,rho);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i+1,rho);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,rho,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; /* vector updates */ ierr = PetscLogEventBegin(VEC_Ops,0,0,0,0);CHKERRQ(ierr); for (j=0; j<N; j++) { r[j] = s[j] - omega * t[j]; /* r <- s - omega t */ p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */ } ierr = PetscLogFlops(6.0*N);CHKERRQ(ierr); ierr = PetscLogEventEnd(VEC_Ops,0,0,0,0);CHKERRQ(ierr); } if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }
PetscErrorCode KSPSolve_SYMMLQ(KSP ksp) { PetscErrorCode ierr; PetscInt i; PetscScalar alpha,beta,ibeta,betaold,beta1,ceta = 0,ceta_oold = 0.0, ceta_old = 0.0,ceta_bar; PetscScalar c = 1.0,cold=1.0,s=0.0,sold=0.0,coold,soold,rho0,rho1,rho2,rho3; PetscScalar dp = 0.0; PetscReal np,s_prod; Vec X,B,R,Z,U,V,W,UOLD,VOLD,Wbar; Mat Amat,Pmat; KSP_SYMMLQ *symmlq = (KSP_SYMMLQ*)ksp->data; PetscBool diagonalscale; PetscFunctionBegin; ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); X = ksp->vec_sol; B = ksp->vec_rhs; R = ksp->work[0]; Z = ksp->work[1]; U = ksp->work[2]; V = ksp->work[3]; W = ksp->work[4]; UOLD = ksp->work[5]; VOLD = ksp->work[6]; Wbar = ksp->work[7]; ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); ksp->its = 0; ierr = VecSet(UOLD,0.0);CHKERRQ(ierr); /* u_old <- zeros; */ ierr = VecCopy(UOLD,VOLD);CHKERRQ(ierr); /* v_old <- u_old; */ ierr = VecCopy(UOLD,W);CHKERRQ(ierr); /* w <- u_old; */ ierr = VecCopy(UOLD,Wbar);CHKERRQ(ierr); /* w_bar <- u_old; */ if (!ksp->guess_zero) { ierr = KSP_MatMult(ksp,Amat,X,R);CHKERRQ(ierr); /* r <- b - A*x */ ierr = VecAYPX(R,-1.0,B);CHKERRQ(ierr); } else { ierr = VecCopy(B,R);CHKERRQ(ierr); /* r <- b (x is 0) */ } ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- B*r */ ierr = VecDot(R,Z,&dp);CHKERRQ(ierr); /* dp = r'*z; */ KSPCheckDot(ksp,dp); if (PetscAbsScalar(dp) < symmlq->haptol) { ierr = PetscInfo2(ksp,"Detected happy breakdown %g tolerance %g\n",(double)PetscAbsScalar(dp),(double)symmlq->haptol);CHKERRQ(ierr); ksp->rnorm = 0.0; /* what should we really put here? */ ksp->reason = KSP_CONVERGED_HAPPY_BREAKDOWN; /* bugfix proposed by Lourens ([email protected]) */ PetscFunctionReturn(0); } #if !defined(PETSC_USE_COMPLEX) if (dp < 0.0) { ksp->reason = KSP_DIVERGED_INDEFINITE_PC; PetscFunctionReturn(0); } #endif dp = PetscSqrtScalar(dp); beta = dp; /* beta <- sqrt(r'*z) */ beta1 = beta; s_prod = PetscAbsScalar(beta1); ierr = VecCopy(R,V);CHKERRQ(ierr); /* v <- r; */ ierr = VecCopy(Z,U);CHKERRQ(ierr); /* u <- z; */ ibeta = 1.0 / beta; ierr = VecScale(V,ibeta);CHKERRQ(ierr); /* v <- ibeta*v; */ ierr = VecScale(U,ibeta);CHKERRQ(ierr); /* u <- ibeta*u; */ ierr = VecCopy(U,Wbar);CHKERRQ(ierr); /* w_bar <- u; */ ierr = VecNorm(Z,NORM_2,&np);CHKERRQ(ierr); /* np <- ||z|| */ KSPCheckNorm(ksp,np); ierr = KSPLogResidualHistory(ksp,np);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,np);CHKERRQ(ierr); ksp->rnorm = np; ierr = (*ksp->converged)(ksp,0,np,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); /* test for convergence */ if (ksp->reason) PetscFunctionReturn(0); i = 0; ceta = 0.; do { ksp->its = i+1; /* Update */ if (ksp->its > 1) { ierr = VecCopy(V,VOLD);CHKERRQ(ierr); /* v_old <- v; */ ierr = VecCopy(U,UOLD);CHKERRQ(ierr); /* u_old <- u; */ ierr = VecCopy(R,V);CHKERRQ(ierr); ierr = VecScale(V,1.0/beta);CHKERRQ(ierr); /* v <- ibeta*r; */ ierr = VecCopy(Z,U);CHKERRQ(ierr); ierr = VecScale(U,1.0/beta);CHKERRQ(ierr); /* u <- ibeta*z; */ ierr = VecCopy(Wbar,W);CHKERRQ(ierr); ierr = VecScale(W,c);CHKERRQ(ierr); ierr = VecAXPY(W,s,U);CHKERRQ(ierr); /* w <- c*w_bar + s*u; (w_k) */ ierr = VecScale(Wbar,-s);CHKERRQ(ierr); ierr = VecAXPY(Wbar,c,U);CHKERRQ(ierr); /* w_bar <- -s*w_bar + c*u; (w_bar_(k+1)) */ ierr = VecAXPY(X,ceta,W);CHKERRQ(ierr); /* x <- x + ceta * w; (xL_k) */ ceta_oold = ceta_old; ceta_old = ceta; } /* Lanczos */ ierr = KSP_MatMult(ksp,Amat,U,R);CHKERRQ(ierr); /* r <- Amat*u; */ ierr = VecDot(U,R,&alpha);CHKERRQ(ierr); /* alpha <- u'*r; */ ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr); /* z <- B*r; */ ierr = VecAXPY(R,-alpha,V);CHKERRQ(ierr); /* r <- r - alpha* v; */ ierr = VecAXPY(Z,-alpha,U);CHKERRQ(ierr); /* z <- z - alpha* u; */ ierr = VecAXPY(R,-beta,VOLD);CHKERRQ(ierr); /* r <- r - beta * v_old; */ ierr = VecAXPY(Z,-beta,UOLD);CHKERRQ(ierr); /* z <- z - beta * u_old; */ betaold = beta; /* beta_k */ ierr = VecDot(R,Z,&dp);CHKERRQ(ierr); /* dp <- r'*z; */ KSPCheckDot(ksp,dp); if (PetscAbsScalar(dp) < symmlq->haptol) { ierr = PetscInfo2(ksp,"Detected happy breakdown %g tolerance %g\n",(double)PetscAbsScalar(dp),(double)symmlq->haptol);CHKERRQ(ierr); dp = 0.0; } #if !defined(PETSC_USE_COMPLEX) if (dp < 0.0) { ksp->reason = KSP_DIVERGED_INDEFINITE_PC; break; } #endif beta = PetscSqrtScalar(dp); /* beta = sqrt(dp); */ /* QR factorization */ coold = cold; cold = c; soold = sold; sold = s; rho0 = cold * alpha - coold * sold * betaold; /* gamma_bar */ rho1 = PetscSqrtScalar(rho0*rho0 + beta*beta); /* gamma */ rho2 = sold * alpha + coold * cold * betaold; /* delta */ rho3 = soold * betaold; /* epsilon */ /* Givens rotation: [c -s; s c] (different from the Reference!) */ c = rho0 / rho1; s = beta / rho1; if (ksp->its==1) ceta = beta1/rho1; else ceta = -(rho2*ceta_old + rho3*ceta_oold)/rho1; s_prod = s_prod*PetscAbsScalar(s); if (c == 0.0) np = s_prod*1.e16; else np = s_prod/PetscAbsScalar(c); /* residual norm for xc_k (CGNORM) */ ksp->rnorm = np; ierr = KSPLogResidualHistory(ksp,np);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i+1,np);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,np,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); /* test for convergence */ if (ksp->reason) break; i++; } while (i<ksp->max_it); /* move to the CG point: xc_(k+1) */ if (c == 0.0) ceta_bar = ceta*1.e15; else ceta_bar = ceta/c; ierr = VecAXPY(X,ceta_bar,Wbar);CHKERRQ(ierr); /* x <- x + ceta_bar*w_bar */ if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }
/*------------------------------------------------------------*/ static PetscErrorCode TaoSolve_NM(Tao tao) { PetscErrorCode ierr; TAO_NelderMead *nm = (TAO_NelderMead*)tao->data; TaoConvergedReason reason; PetscReal *x; PetscInt i; Vec Xmur=nm->Xmur, Xmue=nm->Xmue, Xmuc=nm->Xmuc, Xbar=nm->Xbar; PetscReal fr,fe,fc; PetscInt shrink; PetscInt low,high; PetscFunctionBegin; nm->nshrink = 0; nm->nreflect = 0; nm->nincontract = 0; nm->noutcontract = 0; nm->nexpand = 0; if (tao->XL || tao->XU || tao->ops->computebounds) { ierr = PetscPrintf(((PetscObject)tao)->comm,"WARNING: Variable bounds have been set but will be ignored by NelderMead algorithm\n");CHKERRQ(ierr); } ierr = VecCopy(tao->solution,nm->simplex[0]);CHKERRQ(ierr); ierr = TaoComputeObjective(tao,nm->simplex[0],&nm->f_values[0]);CHKERRQ(ierr); nm->indices[0]=0; for (i=1;i<nm->N+1;i++){ ierr = VecCopy(tao->solution,nm->simplex[i]);CHKERRQ(ierr); ierr = VecGetOwnershipRange(nm->simplex[i],&low,&high);CHKERRQ(ierr); if (i-1 >= low && i-1 < high) { ierr = VecGetArray(nm->simplex[i],&x);CHKERRQ(ierr); x[i-1-low] += nm->lamda; ierr = VecRestoreArray(nm->simplex[i],&x);CHKERRQ(ierr); } ierr = TaoComputeObjective(tao,nm->simplex[i],&nm->f_values[i]);CHKERRQ(ierr); nm->indices[i] = i; } /* Xbar = (Sum of all simplex vectors - worst vector)/N */ ierr = NelderMeadSort(nm);CHKERRQ(ierr); ierr = VecSet(Xbar,0.0);CHKERRQ(ierr); for (i=0;i<nm->N;i++) { ierr = VecAXPY(Xbar,1.0,nm->simplex[nm->indices[i]]);CHKERRQ(ierr); } ierr = VecScale(Xbar,nm->oneOverN);CHKERRQ(ierr); reason = TAO_CONTINUE_ITERATING; while (1) { shrink = 0; ierr = VecCopy(nm->simplex[nm->indices[0]],tao->solution);CHKERRQ(ierr); ierr = TaoMonitor(tao,tao->niter++,nm->f_values[nm->indices[0]],nm->f_values[nm->indices[nm->N]]-nm->f_values[nm->indices[0]],0.0,1.0,&reason);CHKERRQ(ierr); if (reason != TAO_CONTINUE_ITERATING) break; /* x(mu) = (1 + mu)Xbar - mu*X_N+1 */ ierr = VecAXPBYPCZ(Xmur,1+nm->mu_r,-nm->mu_r,0,Xbar,nm->simplex[nm->indices[nm->N]]);CHKERRQ(ierr); ierr = TaoComputeObjective(tao,Xmur,&fr);CHKERRQ(ierr); if (nm->f_values[nm->indices[0]] <= fr && fr < nm->f_values[nm->indices[nm->N-1]]) { /* reflect */ nm->nreflect++; ierr = PetscInfo(0,"Reflect\n");CHKERRQ(ierr); ierr = NelderMeadReplace(nm,nm->indices[nm->N],Xmur,fr);CHKERRQ(ierr); } else if (fr < nm->f_values[nm->indices[0]]) { /* expand */ nm->nexpand++; ierr = PetscInfo(0,"Expand\n");CHKERRQ(ierr); ierr = VecAXPBYPCZ(Xmue,1+nm->mu_e,-nm->mu_e,0,Xbar,nm->simplex[nm->indices[nm->N]]);CHKERRQ(ierr); ierr = TaoComputeObjective(tao,Xmue,&fe);CHKERRQ(ierr); if (fe < fr) { ierr = NelderMeadReplace(nm,nm->indices[nm->N],Xmue,fe);CHKERRQ(ierr); } else { ierr = NelderMeadReplace(nm,nm->indices[nm->N],Xmur,fr);CHKERRQ(ierr); } } else if (nm->f_values[nm->indices[nm->N-1]] <= fr && fr < nm->f_values[nm->indices[nm->N]]) { /* outside contraction */ nm->noutcontract++; ierr = PetscInfo(0,"Outside Contraction\n");CHKERRQ(ierr); ierr = VecAXPBYPCZ(Xmuc,1+nm->mu_oc,-nm->mu_oc,0,Xbar,nm->simplex[nm->indices[nm->N]]);CHKERRQ(ierr); ierr = TaoComputeObjective(tao,Xmuc,&fc);CHKERRQ(ierr); if (fc <= fr) { ierr = NelderMeadReplace(nm,nm->indices[nm->N],Xmuc,fc);CHKERRQ(ierr); } else shrink=1; } else { /* inside contraction */ nm->nincontract++; ierr = PetscInfo(0,"Inside Contraction\n");CHKERRQ(ierr); ierr = VecAXPBYPCZ(Xmuc,1+nm->mu_ic,-nm->mu_ic,0,Xbar,nm->simplex[nm->indices[nm->N]]);CHKERRQ(ierr); ierr = TaoComputeObjective(tao,Xmuc,&fc);CHKERRQ(ierr); if (fc < nm->f_values[nm->indices[nm->N]]) { ierr = NelderMeadReplace(nm,nm->indices[nm->N],Xmuc,fc);CHKERRQ(ierr); } else shrink = 1; } if (shrink) { nm->nshrink++; ierr = PetscInfo(0,"Shrink\n");CHKERRQ(ierr); for (i=1;i<nm->N+1;i++) { ierr = VecAXPBY(nm->simplex[nm->indices[i]],1.5,-0.5,nm->simplex[nm->indices[0]]);CHKERRQ(ierr); ierr = TaoComputeObjective(tao,nm->simplex[nm->indices[i]], &nm->f_values[nm->indices[i]]);CHKERRQ(ierr); } ierr = VecAXPBY(Xbar,1.5*nm->oneOverN,-0.5,nm->simplex[nm->indices[0]]);CHKERRQ(ierr); /* Add last vector's fraction of average */ ierr = VecAXPY(Xbar,nm->oneOverN,nm->simplex[nm->indices[nm->N]]);CHKERRQ(ierr); ierr = NelderMeadSort(nm);CHKERRQ(ierr); /* Subtract new last vector from average */ ierr = VecAXPY(Xbar,-nm->oneOverN,nm->simplex[nm->indices[nm->N]]);CHKERRQ(ierr); } } PetscFunctionReturn(0); }
int main(int argc,char **argv) { SNES snes; /* nonlinear solver context */ KSP ksp; /* linear solver context */ PC pc; /* preconditioner context */ Vec x,r; /* solution, residual vectors */ Mat J; /* Jacobian matrix */ PetscErrorCode ierr; PetscInt its; PetscMPIInt size; PetscScalar pfive = .5,*xx; PetscBool flg; PetscInitialize(&argc,&argv,(char*)0,help); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size != 1) SETERRQ(PETSC_COMM_SELF,1,"This is a uniprocessor example only!"); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create nonlinear solver context - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create matrix and vector data structures; set corresponding routines - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Create vectors for solution and nonlinear function */ ierr = VecCreateSeq(PETSC_COMM_SELF,2,&x);CHKERRQ(ierr); ierr = VecDuplicate(x,&r);CHKERRQ(ierr); /* Create Jacobian matrix data structure */ ierr = MatCreate(PETSC_COMM_SELF,&J);CHKERRQ(ierr); ierr = MatSetSizes(J,PETSC_DECIDE,PETSC_DECIDE,2,2);CHKERRQ(ierr); ierr = MatSetFromOptions(J);CHKERRQ(ierr); ierr = PetscOptionsHasName(NULL,"-hard",&flg);CHKERRQ(ierr); if (!flg) { /* Set function evaluation routine and vector. */ ierr = SNESSetFunction(snes,r,FormFunction1,NULL);CHKERRQ(ierr); /* Set Jacobian matrix data structure and Jacobian evaluation routine */ ierr = SNESSetJacobian(snes,J,J,FormJacobian1,NULL);CHKERRQ(ierr); } else { ierr = SNESSetFunction(snes,r,FormFunction2,NULL);CHKERRQ(ierr); ierr = SNESSetJacobian(snes,J,J,FormJacobian2,NULL);CHKERRQ(ierr); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Customize nonlinear solver; set runtime options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Set linear solver defaults for this problem. By extracting the KSP, KSP, and PC contexts from the SNES context, we can then directly call any KSP, KSP, and PC routines to set various options. */ ierr = SNESGetKSP(snes,&ksp);CHKERRQ(ierr); ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr); ierr = PCSetType(pc,PCNONE);CHKERRQ(ierr); ierr = KSPSetTolerances(ksp,1.e-4,PETSC_DEFAULT,PETSC_DEFAULT,20);CHKERRQ(ierr); /* Set SNES/KSP/KSP/PC runtime options, e.g., -snes_view -snes_monitor -ksp_type <ksp> -pc_type <pc> These options will override those specified above as long as SNESSetFromOptions() is called _after_ any other customization routines. */ ierr = SNESSetFromOptions(snes);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Evaluate initial guess; then solve nonlinear system - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ if (!flg) { ierr = VecSet(x,pfive);CHKERRQ(ierr); } else { ierr = VecGetArray(x,&xx);CHKERRQ(ierr); xx[0] = 2.0; xx[1] = 3.0; ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); } /* Note: The user should initialize the vector, x, with the initial guess for the nonlinear solver prior to calling SNESSolve(). In particular, to employ an initial guess of zero, the user should explicitly set this vector to zero by calling VecSet(). */ ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr); ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr); if (flg) { Vec f; ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = SNESGetFunction(snes,&f,0,0);CHKERRQ(ierr); ierr = VecView(r,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } ierr = PetscPrintf(PETSC_COMM_SELF,"number of SNES iterations = %D\n\n",its);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Free work space. All PETSc objects should be destroyed when they are no longer needed. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&r);CHKERRQ(ierr); ierr = MatDestroy(&J);CHKERRQ(ierr); ierr = SNESDestroy(&snes);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
static PetscErrorCode TaoSolve_GPCG(Tao tao) { TAO_GPCG *gpcg = (TAO_GPCG *)tao->data; PetscErrorCode ierr; PetscInt its; PetscReal actred,f,f_new,gnorm,gdx,stepsize,xtb; PetscReal xtHx; TaoConvergedReason reason = TAO_CONTINUE_ITERATING; TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING; PetscFunctionBegin; ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr); ierr = VecMedian(tao->XL,tao->solution,tao->XU,tao->solution);CHKERRQ(ierr); ierr = TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU);CHKERRQ(ierr); /* Using f = .5*x'Hx + x'b + c and g=Hx + b, compute b,c */ ierr = TaoComputeHessian(tao,tao->solution,tao->hessian,tao->hessian_pre);CHKERRQ(ierr); ierr = TaoComputeObjectiveAndGradient(tao,tao->solution,&f,tao->gradient);CHKERRQ(ierr); ierr = VecCopy(tao->gradient, gpcg->B);CHKERRQ(ierr); ierr = MatMult(tao->hessian,tao->solution,gpcg->Work);CHKERRQ(ierr); ierr = VecDot(gpcg->Work, tao->solution, &xtHx);CHKERRQ(ierr); ierr = VecAXPY(gpcg->B,-1.0,gpcg->Work);CHKERRQ(ierr); ierr = VecDot(gpcg->B,tao->solution,&xtb);CHKERRQ(ierr); gpcg->c=f-xtHx/2.0-xtb; if (gpcg->Free_Local) { ierr = ISDestroy(&gpcg->Free_Local);CHKERRQ(ierr); } ierr = VecWhichBetween(tao->XL,tao->solution,tao->XU,&gpcg->Free_Local);CHKERRQ(ierr); /* Project the gradient and calculate the norm */ ierr = VecCopy(tao->gradient,gpcg->G_New);CHKERRQ(ierr); ierr = VecBoundGradientProjection(tao->gradient,tao->solution,tao->XL,tao->XU,gpcg->PG);CHKERRQ(ierr); ierr = VecNorm(gpcg->PG,NORM_2,&gpcg->gnorm);CHKERRQ(ierr); tao->step=1.0; gpcg->f = f; /* Check Stopping Condition */ ierr=TaoMonitor(tao,tao->niter,f,gpcg->gnorm,0.0,tao->step,&reason);CHKERRQ(ierr); while (reason == TAO_CONTINUE_ITERATING){ tao->ksp_its=0; ierr = GPCGGradProjections(tao);CHKERRQ(ierr); ierr = ISGetSize(gpcg->Free_Local,&gpcg->n_free);CHKERRQ(ierr); f=gpcg->f; gnorm=gpcg->gnorm; ierr = KSPReset(tao->ksp);CHKERRQ(ierr); if (gpcg->n_free > 0){ /* Create a reduced linear system */ ierr = VecDestroy(&gpcg->R);CHKERRQ(ierr); ierr = VecDestroy(&gpcg->DXFree);CHKERRQ(ierr); ierr = TaoVecGetSubVec(tao->gradient,gpcg->Free_Local, tao->subset_type, 0.0, &gpcg->R);CHKERRQ(ierr); ierr = VecScale(gpcg->R, -1.0);CHKERRQ(ierr); ierr = TaoVecGetSubVec(tao->stepdirection,gpcg->Free_Local,tao->subset_type, 0.0, &gpcg->DXFree);CHKERRQ(ierr); ierr = VecSet(gpcg->DXFree,0.0);CHKERRQ(ierr); ierr = TaoMatGetSubMat(tao->hessian, gpcg->Free_Local, gpcg->Work, tao->subset_type, &gpcg->Hsub);CHKERRQ(ierr); if (tao->hessian_pre == tao->hessian) { ierr = MatDestroy(&gpcg->Hsub_pre);CHKERRQ(ierr); ierr = PetscObjectReference((PetscObject)gpcg->Hsub);CHKERRQ(ierr); gpcg->Hsub_pre = gpcg->Hsub; } else { ierr = TaoMatGetSubMat(tao->hessian, gpcg->Free_Local, gpcg->Work, tao->subset_type, &gpcg->Hsub_pre);CHKERRQ(ierr); } ierr = KSPReset(tao->ksp);CHKERRQ(ierr); ierr = KSPSetOperators(tao->ksp,gpcg->Hsub,gpcg->Hsub_pre);CHKERRQ(ierr); ierr = KSPSolve(tao->ksp,gpcg->R,gpcg->DXFree);CHKERRQ(ierr); ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr); tao->ksp_its+=its; tao->ksp_tot_its+=its; ierr = VecSet(tao->stepdirection,0.0);CHKERRQ(ierr); ierr = VecISAXPY(tao->stepdirection,gpcg->Free_Local,1.0,gpcg->DXFree);CHKERRQ(ierr); ierr = VecDot(tao->stepdirection,tao->gradient,&gdx);CHKERRQ(ierr); ierr = TaoLineSearchSetInitialStepLength(tao->linesearch,1.0);CHKERRQ(ierr); f_new=f; ierr = TaoLineSearchApply(tao->linesearch,tao->solution,&f_new,tao->gradient,tao->stepdirection,&stepsize,&ls_status);CHKERRQ(ierr); actred = f_new - f; /* Evaluate the function and gradient at the new point */ ierr = VecBoundGradientProjection(tao->gradient,tao->solution,tao->XL,tao->XU, gpcg->PG);CHKERRQ(ierr); ierr = VecNorm(gpcg->PG, NORM_2, &gnorm);CHKERRQ(ierr); f=f_new; ierr = ISDestroy(&gpcg->Free_Local);CHKERRQ(ierr); ierr = VecWhichBetween(tao->XL,tao->solution,tao->XU,&gpcg->Free_Local);CHKERRQ(ierr); } else { actred = 0; gpcg->step=1.0; /* if there were no free variables, no cg method */ } tao->niter++; ierr = TaoMonitor(tao,tao->niter,f,gnorm,0.0,gpcg->step,&reason);CHKERRQ(ierr); gpcg->f=f;gpcg->gnorm=gnorm; gpcg->actred=actred; if (reason!=TAO_CONTINUE_ITERATING) break; } /* END MAIN LOOP */ PetscFunctionReturn(0); }
static PetscErrorCode KSPCGSolve_NASH(KSP ksp) { #if defined(PETSC_USE_COMPLEX) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP, "NASH is not available for complex systems"); #else KSPCG_NASH *cg = (KSPCG_NASH*)ksp->data; PetscErrorCode ierr; Mat Qmat, Mmat; Vec r, z, p, d; PC pc; PetscReal norm_r, norm_d, norm_dp1, norm_p, dMp; PetscReal alpha, beta, kappa, rz, rzm1; PetscReal rr, r2, step; PetscInt max_cg_its; PetscBool diagonalscale; PetscFunctionBegin; /***************************************************************************/ /* Check the arguments and parameters. */ /***************************************************************************/ ierr = PCGetDiagonalScale(ksp->pc, &diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name); if (cg->radius < 0.0) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_OUTOFRANGE, "Input error: radius < 0"); /***************************************************************************/ /* Get the workspace vectors and initialize variables */ /***************************************************************************/ r2 = cg->radius * cg->radius; r = ksp->work[0]; z = ksp->work[1]; p = ksp->work[2]; d = ksp->vec_sol; pc = ksp->pc; ierr = PCGetOperators(pc, &Qmat, &Mmat);CHKERRQ(ierr); ierr = VecGetSize(d, &max_cg_its);CHKERRQ(ierr); max_cg_its = PetscMin(max_cg_its, ksp->max_it); ksp->its = 0; /***************************************************************************/ /* Initialize objective function and direction. */ /***************************************************************************/ cg->o_fcn = 0.0; ierr = VecSet(d, 0.0);CHKERRQ(ierr); /* d = 0 */ cg->norm_d = 0.0; /***************************************************************************/ /* Begin the conjugate gradient method. Check the right-hand side for */ /* numerical problems. The check for not-a-number and infinite values */ /* need be performed only once. */ /***************************************************************************/ ierr = VecCopy(ksp->vec_rhs, r);CHKERRQ(ierr); /* r = -grad */ ierr = VecDot(r, r, &rr);CHKERRQ(ierr); /* rr = r^T r */ KSPCheckDot(ksp,rr); /***************************************************************************/ /* Check the preconditioner for numerical problems and for positive */ /* definiteness. The check for not-a-number and infinite values need be */ /* performed only once. */ /***************************************************************************/ ierr = KSP_PCApply(ksp, r, z);CHKERRQ(ierr); /* z = inv(M) r */ ierr = VecDot(r, z, &rz);CHKERRQ(ierr); /* rz = r^T inv(M) r */ if (PetscIsInfOrNanScalar(rz)) { /*************************************************************************/ /* The preconditioner contains not-a-number or an infinite value. */ /* Return the gradient direction intersected with the trust region. */ /*************************************************************************/ ksp->reason = KSP_DIVERGED_NANORINF; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: bad preconditioner: rz=%g\n", (double)rz);CHKERRQ(ierr); if (cg->radius) { if (r2 >= rr) { alpha = 1.0; cg->norm_d = PetscSqrtReal(rr); } else { alpha = PetscSqrtReal(r2 / rr); cg->norm_d = cg->radius; } ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr); /* d = d + alpha r */ /***********************************************************************/ /* Compute objective function. */ /***********************************************************************/ ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr); ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr); ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr); cg->o_fcn = -cg->o_fcn; ++ksp->its; } PetscFunctionReturn(0); } if (rz < 0.0) { /*************************************************************************/ /* The preconditioner is indefinite. Because this is the first */ /* and we do not have a direction yet, we use the gradient step. Note */ /* that we cannot use the preconditioned norm when computing the step */ /* because the matrix is indefinite. */ /*************************************************************************/ ksp->reason = KSP_DIVERGED_INDEFINITE_PC; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: indefinite preconditioner: rz=%g\n", (double)rz);CHKERRQ(ierr); if (cg->radius) { if (r2 >= rr) { alpha = 1.0; cg->norm_d = PetscSqrtReal(rr); } else { alpha = PetscSqrtReal(r2 / rr); cg->norm_d = cg->radius; } ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr); /* d = d + alpha r */ /***********************************************************************/ /* Compute objective function. */ /***********************************************************************/ ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr); ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr); ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr); cg->o_fcn = -cg->o_fcn; ++ksp->its; } PetscFunctionReturn(0); } /***************************************************************************/ /* As far as we know, the preconditioner is positive semidefinite. */ /* Compute and log the residual. Check convergence because this */ /* initializes things, but do not terminate until at least one conjugate */ /* gradient iteration has been performed. */ /***************************************************************************/ switch (ksp->normtype) { case KSP_NORM_PRECONDITIONED: ierr = VecNorm(z, NORM_2, &norm_r);CHKERRQ(ierr); /* norm_r = |z| */ break; case KSP_NORM_UNPRECONDITIONED: norm_r = PetscSqrtReal(rr); /* norm_r = |r| */ break; case KSP_NORM_NATURAL: norm_r = PetscSqrtReal(rz); /* norm_r = |r|_M */ break; default: norm_r = 0.0; break; } ierr = KSPLogResidualHistory(ksp, norm_r);CHKERRQ(ierr); ierr = KSPMonitor(ksp, ksp->its, norm_r);CHKERRQ(ierr); ksp->rnorm = norm_r; ierr = (*ksp->converged)(ksp, ksp->its, norm_r, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); /***************************************************************************/ /* Compute the first direction and update the iteration. */ /***************************************************************************/ ierr = VecCopy(z, p);CHKERRQ(ierr); /* p = z */ ierr = KSP_MatMult(ksp, Qmat, p, z);CHKERRQ(ierr); /* z = Q * p */ ++ksp->its; /***************************************************************************/ /* Check the matrix for numerical problems. */ /***************************************************************************/ ierr = VecDot(p, z, &kappa);CHKERRQ(ierr); /* kappa = p^T Q p */ if (PetscIsInfOrNanScalar(kappa)) { /*************************************************************************/ /* The matrix produced not-a-number or an infinite value. In this case, */ /* we must stop and use the gradient direction. This condition need */ /* only be checked once. */ /*************************************************************************/ ksp->reason = KSP_DIVERGED_NANORINF; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: bad matrix: kappa=%g\n", (double)kappa);CHKERRQ(ierr); if (cg->radius) { if (r2 >= rr) { alpha = 1.0; cg->norm_d = PetscSqrtReal(rr); } else { alpha = PetscSqrtReal(r2 / rr); cg->norm_d = cg->radius; } ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr); /* d = d + alpha r */ /***********************************************************************/ /* Compute objective function. */ /***********************************************************************/ ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr); ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr); ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr); cg->o_fcn = -cg->o_fcn; ++ksp->its; } PetscFunctionReturn(0); } /***************************************************************************/ /* Initialize variables for calculating the norm of the direction. */ /***************************************************************************/ dMp = 0.0; norm_d = 0.0; switch (cg->dtype) { case NASH_PRECONDITIONED_DIRECTION: norm_p = rz; break; default: ierr = VecDot(p, p, &norm_p);CHKERRQ(ierr); break; } /***************************************************************************/ /* Check for negative curvature. */ /***************************************************************************/ if (kappa <= 0.0) { /*************************************************************************/ /* In this case, the matrix is indefinite and we have encountered a */ /* direction of negative curvature. Because negative curvature occurs */ /* during the first step, we must follow a direction. */ /*************************************************************************/ ksp->reason = KSP_CONVERGED_CG_NEG_CURVE; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: negative curvature: kappa=%g\n", (double)kappa);CHKERRQ(ierr); if (cg->radius && norm_p > 0.0) { /***********************************************************************/ /* Follow direction of negative curvature to the boundary of the */ /* trust region. */ /***********************************************************************/ step = PetscSqrtReal(r2 / norm_p); cg->norm_d = cg->radius; ierr = VecAXPY(d, step, p);CHKERRQ(ierr); /* d = d + step p */ /***********************************************************************/ /* Update objective function. */ /***********************************************************************/ cg->o_fcn += step * (0.5 * step * kappa - rz); } else if (cg->radius) { /***********************************************************************/ /* The norm of the preconditioned direction is zero; use the gradient */ /* step. */ /***********************************************************************/ if (r2 >= rr) { alpha = 1.0; cg->norm_d = PetscSqrtReal(rr); } else { alpha = PetscSqrtReal(r2 / rr); cg->norm_d = cg->radius; } ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr); /* d = d + alpha r */ /***********************************************************************/ /* Compute objective function. */ /***********************************************************************/ ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr); ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr); ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr); cg->o_fcn = -cg->o_fcn; ++ksp->its; } PetscFunctionReturn(0); } /***************************************************************************/ /* Run the conjugate gradient method until either the problem is solved, */ /* we encounter the boundary of the trust region, or the conjugate */ /* gradient method breaks down. */ /***************************************************************************/ while (1) { /*************************************************************************/ /* Know that kappa is nonzero, because we have not broken down, so we */ /* can compute the steplength. */ /*************************************************************************/ alpha = rz / kappa; /*************************************************************************/ /* Compute the steplength and check for intersection with the trust */ /* region. */ /*************************************************************************/ norm_dp1 = norm_d + alpha*(2.0*dMp + alpha*norm_p); if (cg->radius && norm_dp1 >= r2) { /***********************************************************************/ /* In this case, the matrix is positive definite as far as we know. */ /* However, the full step goes beyond the trust region. */ /***********************************************************************/ ksp->reason = KSP_CONVERGED_CG_CONSTRAINED; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: constrained step: radius=%g\n", (double)cg->radius);CHKERRQ(ierr); if (norm_p > 0.0) { /*********************************************************************/ /* Follow the direction to the boundary of the trust region. */ /*********************************************************************/ step = (PetscSqrtReal(dMp*dMp+norm_p*(r2-norm_d))-dMp)/norm_p; cg->norm_d = cg->radius; ierr = VecAXPY(d, step, p);CHKERRQ(ierr); /* d = d + step p */ /*********************************************************************/ /* Update objective function. */ /*********************************************************************/ cg->o_fcn += step * (0.5 * step * kappa - rz); } else { /*********************************************************************/ /* The norm of the direction is zero; there is nothing to follow. */ /*********************************************************************/ } break; } /*************************************************************************/ /* Now we can update the direction and residual. */ /*************************************************************************/ ierr = VecAXPY(d, alpha, p);CHKERRQ(ierr); /* d = d + alpha p */ ierr = VecAXPY(r, -alpha, z);CHKERRQ(ierr); /* r = r - alpha Q p */ ierr = KSP_PCApply(ksp, r, z);CHKERRQ(ierr); /* z = inv(M) r */ switch (cg->dtype) { case NASH_PRECONDITIONED_DIRECTION: norm_d = norm_dp1; break; default: ierr = VecDot(d, d, &norm_d);CHKERRQ(ierr); break; } cg->norm_d = PetscSqrtReal(norm_d); /*************************************************************************/ /* Update objective function. */ /*************************************************************************/ cg->o_fcn -= 0.5 * alpha * rz; /*************************************************************************/ /* Check that the preconditioner appears positive semidefinite. */ /*************************************************************************/ rzm1 = rz; ierr = VecDot(r, z, &rz);CHKERRQ(ierr); /* rz = r^T z */ if (rz < 0.0) { /***********************************************************************/ /* The preconditioner is indefinite. */ /***********************************************************************/ ksp->reason = KSP_DIVERGED_INDEFINITE_PC; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: cg indefinite preconditioner: rz=%g\n", (double)rz);CHKERRQ(ierr); break; } /*************************************************************************/ /* As far as we know, the preconditioner is positive semidefinite. */ /* Compute the residual and check for convergence. */ /*************************************************************************/ switch (ksp->normtype) { case KSP_NORM_PRECONDITIONED: ierr = VecNorm(z, NORM_2, &norm_r);CHKERRQ(ierr); /* norm_r = |z| */ break; case KSP_NORM_UNPRECONDITIONED: ierr = VecNorm(r, NORM_2, &norm_r);CHKERRQ(ierr); /* norm_r = |r| */ break; case KSP_NORM_NATURAL: norm_r = PetscSqrtReal(rz); /* norm_r = |r|_M */ break; default: norm_r = 0.; break; } ierr = KSPLogResidualHistory(ksp, norm_r);CHKERRQ(ierr); ierr = KSPMonitor(ksp, ksp->its, norm_r);CHKERRQ(ierr); ksp->rnorm = norm_r; ierr = (*ksp->converged)(ksp, ksp->its, norm_r, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { /***********************************************************************/ /* The method has converged. */ /***********************************************************************/ ierr = PetscInfo2(ksp, "KSPCGSolve_NASH: truncated step: rnorm=%g, radius=%g\n", (double)norm_r, (double)cg->radius);CHKERRQ(ierr); break; } /*************************************************************************/ /* We have not converged yet. Check for breakdown. */ /*************************************************************************/ beta = rz / rzm1; if (PetscAbsReal(beta) <= 0.0) { /***********************************************************************/ /* Conjugate gradients has broken down. */ /***********************************************************************/ ksp->reason = KSP_DIVERGED_BREAKDOWN; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: breakdown: beta=%g\n", (double)beta);CHKERRQ(ierr); break; } /*************************************************************************/ /* Check iteration limit. */ /*************************************************************************/ if (ksp->its >= max_cg_its) { ksp->reason = KSP_DIVERGED_ITS; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: iterlim: its=%D\n", ksp->its);CHKERRQ(ierr); break; } /*************************************************************************/ /* Update p and the norms. */ /*************************************************************************/ ierr = VecAYPX(p, beta, z);CHKERRQ(ierr); /* p = z + beta p */ switch (cg->dtype) { case NASH_PRECONDITIONED_DIRECTION: dMp = beta*(dMp + alpha*norm_p); norm_p = beta*(rzm1 + beta*norm_p); break; default: ierr = VecDot(d, p, &dMp);CHKERRQ(ierr); ierr = VecDot(p, p, &norm_p);CHKERRQ(ierr); break; } /*************************************************************************/ /* Compute the new direction and update the iteration. */ /*************************************************************************/ ierr = KSP_MatMult(ksp, Qmat, p, z);CHKERRQ(ierr); /* z = Q * p */ ierr = VecDot(p, z, &kappa);CHKERRQ(ierr); /* kappa = p^T Q p */ ++ksp->its; /*************************************************************************/ /* Check for negative curvature. */ /*************************************************************************/ if (kappa <= 0.0) { /***********************************************************************/ /* In this case, the matrix is indefinite and we have encountered */ /* a direction of negative curvature. Stop at the base. */ /***********************************************************************/ ksp->reason = KSP_CONVERGED_CG_NEG_CURVE; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: negative curvature: kappa=%g\n", (double)kappa);CHKERRQ(ierr); break; } } PetscFunctionReturn(0); #endif }
PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx fetidpmat_ctx ) { PetscErrorCode ierr; PC_IS *pcis=(PC_IS*)fetidpmat_ctx->pc->data; PC_BDDC *pcbddc=(PC_BDDC*)fetidpmat_ctx->pc->data; PCBDDCGraph mat_graph=pcbddc->mat_graph; Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; MPI_Comm comm; Mat ScalingMat; Vec lambda_global; IS IS_l2g_lambda; PetscBool skip_node,fully_redundant; PetscInt i,j,k,s,n_boundary_dofs,n_global_lambda,n_vertices,partial_sum; PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; PetscMPIInt rank,size,buf_size,neigh; PetscScalar scalar_value; PetscInt *vertex_indices; PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; PetscInt *aux_sums,*cols_B_delta,*l2g_indices; PetscScalar *array,*scaling_factors,*vals_B_delta; PetscInt *aux_local_numbering_2; /* For communication of scaling factors */ PetscInt *ptrs_buffer,neigh_position; PetscScalar **all_factors,*send_buffer,*recv_buffer; MPI_Request *send_reqs,*recv_reqs; /* tests */ Vec test_vec; PetscBool test_fetidp; PetscViewer viewer; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)(fetidpmat_ctx->pc),&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); /* Default type of lagrange multipliers is non-redundant */ fully_redundant = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-fetidp_fullyredundant",&fully_redundant,NULL);CHKERRQ(ierr); /* Evaluate local and global number of lagrange multipliers */ ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); n_local_lambda = 0; partial_sum = 0; n_boundary_dofs = 0; s = 0; /* Get Vertices used to define the BDDC */ ierr = PCBDDCGetPrimalVerticesLocalIdx(fetidpmat_ctx->pc,&n_vertices,&vertex_indices);CHKERRQ(ierr); dual_size = pcis->n_B-n_vertices; ierr = PetscSortInt(n_vertices,vertex_indices);CHKERRQ(ierr); ierr = PetscMalloc1(dual_size,&dual_dofs_boundary_indices);CHKERRQ(ierr); ierr = PetscMalloc1(dual_size,&aux_local_numbering_1);CHKERRQ(ierr); ierr = PetscMalloc1(dual_size,&aux_local_numbering_2);CHKERRQ(ierr); ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); for (i=0;i<pcis->n;i++){ j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ if ( j > 0 ) { n_boundary_dofs++; } skip_node = PETSC_FALSE; if ( s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */ skip_node = PETSC_TRUE; s++; } if (j < 1) { skip_node = PETSC_TRUE; } if ( !skip_node ) { if (fully_redundant) { /* fully redundant set of lagrange multipliers */ n_lambda_for_dof = (j*(j+1))/2; } else { n_lambda_for_dof = j; } n_local_lambda += j; /* needed to evaluate global number of lagrange multipliers */ array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ /* store some data needed */ dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; aux_local_numbering_1[partial_sum] = i; aux_local_numbering_2[partial_sum] = n_lambda_for_dof; partial_sum++; } } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); fetidpmat_ctx->n_lambda = (PetscInt)PetscRealPart(scalar_value); /* compute global ordering of lagrange multipliers and associate l2g map */ ierr = PCBDDCSubsetNumbering(comm,matis->mapping,partial_sum,aux_local_numbering_1,aux_local_numbering_2,&i,&aux_global_numbering);CHKERRQ(ierr); if (i != fetidpmat_ctx->n_lambda) { SETERRQ3(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"Error in %s: global number of multipliers mismatch! (%d!=%d)\n",__FUNCT__,fetidpmat_ctx->n_lambda,i); } ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); /* init data for scaling factors exchange */ partial_sum = 0; j = 0; ierr = PetscMalloc1(pcis->n_neigh,&ptrs_buffer);CHKERRQ(ierr); ierr = PetscMalloc1(pcis->n_neigh-1,&send_reqs);CHKERRQ(ierr); ierr = PetscMalloc1(pcis->n_neigh-1,&recv_reqs);CHKERRQ(ierr); ierr = PetscMalloc1(pcis->n,&all_factors);CHKERRQ(ierr); ptrs_buffer[0]=0; for (i=1;i<pcis->n_neigh;i++) { partial_sum += pcis->n_shared[i]; ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; } ierr = PetscMalloc1(partial_sum,&send_buffer);CHKERRQ(ierr); ierr = PetscMalloc1(partial_sum,&recv_buffer);CHKERRQ(ierr); ierr = PetscMalloc1(partial_sum,&all_factors[0]);CHKERRQ(ierr); for (i=0;i<pcis->n-1;i++) { j = mat_graph->count[i]; all_factors[i+1]=all_factors[i]+j; } /* scatter B scaling to N vec */ ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); /* communications */ ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); for (i=1;i<pcis->n_neigh;i++) { for (j=0;j<pcis->n_shared[i];j++) { send_buffer[ptrs_buffer[i-1]+j]=array[pcis->shared[i][j]]; } ierr = PetscMPIIntCast(ptrs_buffer[i]-ptrs_buffer[i-1],&buf_size);CHKERRQ(ierr); ierr = PetscMPIIntCast(pcis->neigh[i],&neigh);CHKERRQ(ierr); ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],buf_size,MPIU_SCALAR,neigh,0,comm,&send_reqs[i-1]);CHKERRQ(ierr); ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],buf_size,MPIU_SCALAR,neigh,0,comm,&recv_reqs[i-1]);CHKERRQ(ierr); } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); ierr = MPI_Waitall((pcis->n_neigh-1),recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); /* put values in correct places */ for (i=1;i<pcis->n_neigh;i++) { for (j=0;j<pcis->n_shared[i];j++) { k = pcis->shared[i][j]; neigh_position = 0; while(mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) {neigh_position++;} all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; } } ierr = MPI_Waitall((pcis->n_neigh-1),send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); ierr = PetscFree(send_reqs);CHKERRQ(ierr); ierr = PetscFree(recv_reqs);CHKERRQ(ierr); ierr = PetscFree(send_buffer);CHKERRQ(ierr); ierr = PetscFree(recv_buffer);CHKERRQ(ierr); ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); /* Compute B and B_delta (local actions) */ ierr = PetscMalloc1(pcis->n_neigh,&aux_sums);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&l2g_indices);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&vals_B_delta);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&cols_B_delta);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&scaling_factors);CHKERRQ(ierr); n_global_lambda=0; partial_sum=0; for (i=0;i<dual_size;i++) { n_global_lambda = aux_global_numbering[i]; j = mat_graph->count[aux_local_numbering_1[i]]; aux_sums[0]=0; for (s=1;s<j;s++) { aux_sums[s]=aux_sums[s-1]+j-s+1; } array = all_factors[aux_local_numbering_1[i]]; n_neg_values = 0; while(n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values] < rank) {n_neg_values++;} n_pos_values = j - n_neg_values; if (fully_redundant) { for (s=0;s<n_neg_values;s++) { l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; vals_B_delta [partial_sum+s]=-1.0; scaling_factors[partial_sum+s]=array[s]; } for (s=0;s<n_pos_values;s++) { l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; vals_B_delta [partial_sum+s+n_neg_values]=1.0; scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; } partial_sum += j; } else { /* l2g_indices and default cols and vals of B_delta */ for (s=0;s<j;s++) { l2g_indices [partial_sum+s]=n_global_lambda+s; cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; vals_B_delta [partial_sum+s]=0.0; } /* B_delta */ if ( n_neg_values > 0 ) { /* there's a rank next to me to the left */ vals_B_delta [partial_sum+n_neg_values-1]=-1.0; } if ( n_neg_values < j ) { /* there's a rank next to me to the right */ vals_B_delta [partial_sum+n_neg_values]=1.0; } /* scaling as in Klawonn-Widlund 1999*/ for (s=0;s<n_neg_values;s++) { scalar_value = 0.0; for (k=0;k<s+1;k++) { scalar_value += array[k]; } scaling_factors[partial_sum+s] = -scalar_value; } for (s=0;s<n_pos_values;s++) { scalar_value = 0.0; for (k=s+n_neg_values;k<j;k++) { scalar_value += array[k]; } scaling_factors[partial_sum+s+n_neg_values] = scalar_value; } partial_sum += j; } } ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); ierr = PetscFree(aux_sums);CHKERRQ(ierr); ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); ierr = PetscFree(all_factors);CHKERRQ(ierr); /* Local to global mapping of fetidpmat */ ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); /* Create local part of B_delta */ ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,NULL);CHKERRQ(ierr); ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); for (i=0;i<n_local_lambda;i++) { ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); } ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (fully_redundant) { ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(ScalingMat,1,NULL);CHKERRQ(ierr); for (i=0;i<n_local_lambda;i++) { ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); } else { ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,NULL);CHKERRQ(ierr); for (i=0;i<n_local_lambda;i++) { ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } ierr = PetscFree(scaling_factors);CHKERRQ(ierr); ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); /* Create some vectors needed by fetidp */ ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); test_fetidp = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-fetidp_check",&test_fetidp,NULL);CHKERRQ(ierr); if (test_fetidp && !pcbddc->use_deluxe_scaling) { PetscReal real_value; ierr = PetscViewerASCIIGetStdout(comm,&viewer);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); if (fully_redundant) { ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); } ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); /******************************************************************/ /* TEST A/B: Test numbering of global lambda dofs */ /******************************************************************/ ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); scalar_value = -1.0; ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecNorm(test_vec,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = VecDestroy(&test_vec);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); if (fully_redundant) { ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,PetscRealPart(scalar_value)-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); } /******************************************************************/ /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ /* This is the meaning of the B matrix */ /******************************************************************/ ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecNorm(lambda_global,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); /******************************************************************/ /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ /* E_D = R_D^TR */ /* P_D = B_{D,delta}^T B_{delta} */ /* eq.44 Mandel Tezaur and Dohrmann 2005 */ /******************************************************************/ /* compute a random vector in \widetilde{W} */ ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); scalar_value = 0.0; /* set zero at vertices */ ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); for (i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); /* store w for final comparison */ ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Jump operator P_D : results stored in pcis->vec1_B */ ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_Ddelta^T */ ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); /* Average operator E_D : results stored in pcis->vec2_B */ ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = PCBDDCScalingExtension(fetidpmat_ctx->pc,pcis->vec2_B,pcis->vec1_global);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_global,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* test E_D=I-P_D */ scalar_value = 1.0; ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); scalar_value = -1.0; ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = VecDestroy(&test_vec);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); /******************************************************************/ /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ /* eq.48 Mandel Tezaur and Dohrmann 2005 */ /******************************************************************/ ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); scalar_value = 0.0; /* set zero at vertices */ for (i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); /* Jump operator P_D : results stored in pcis->vec1_B */ ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_Ddelta^T */ ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); /* scaling */ ierr = PCBDDCScalingExtension(fetidpmat_ctx->pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr); ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); if (!fully_redundant) { /******************************************************************/ /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ /******************************************************************/ ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); ierr = VecSetRandom(lambda_global,NULL);CHKERRQ(ierr); /* Action of B_Ddelta^T */ ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); scalar_value = -1.0; ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); ierr = VecNorm(lambda_global,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); ierr = VecDestroy(&test_vec);CHKERRQ(ierr); } } /* final cleanup */ ierr = PetscFree(vertex_indices);CHKERRQ(ierr); ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc,char **argv) { PetscErrorCode ierr; PetscMPIInt rank,nproc; PetscInt rstart,rend,i,k,N,numPoints=1000000; PetscScalar dummy,result=0,h=1.0/numPoints,*xarray; Vec x,xend; PetscInitialize(&argc,&argv,(char *)0,help); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&nproc);CHKERRQ(ierr); /* Create a parallel vector. Here we set up our x vector which will be given values below. The xend vector is a dummy vector to find the value of the elements at the endpoints for use in the trapezoid rule. */ ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,PETSC_DECIDE,numPoints);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); ierr = VecGetSize(x,&N);CHKERRQ(ierr); ierr = VecSet(x,result);CHKERRQ(ierr); ierr = VecDuplicate(x,&xend);CHKERRQ(ierr); result=0.5; if (rank == 0){ i=0; ierr = VecSetValues(xend,1,&i,&result,INSERT_VALUES);CHKERRQ(ierr); } else if (rank == nproc){ i=N-1; ierr = VecSetValues(xend,1,&i,&result,INSERT_VALUES);CHKERRQ(ierr); } /* Assemble vector, using the 2-step process: VecAssemblyBegin(), VecAssemblyEnd() Computations can be done while messages are in transition by placing code between these two statements. */ ierr = VecAssemblyBegin(xend);CHKERRQ(ierr); ierr = VecAssemblyEnd(xend);CHKERRQ(ierr); /* Set the x vector elements. i*h will return 0 for i=0 and 1 for i=N-1. The function evaluated (2x/(1+x^2)) is defined above. Each evaluation is put into the local array of the vector without message passing. */ ierr = VecGetOwnershipRange(x,&rstart,&rend);CHKERRQ(ierr); ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); k = 0; for (i=rstart; i<rend; i++){ xarray[k] = i*h; xarray[k] = func(xarray[k]); k++; } ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); /* Evaluates the integral. First the sum of all the points is taken. That result is multiplied by the step size for the trapezoid rule. Then half the value at each endpoint is subtracted, this is part of the composite trapezoid rule. */ ierr = VecSum(x,&result);CHKERRQ(ierr); result = result*h; ierr = VecDot(x,xend,&dummy);CHKERRQ(ierr); result = result-h*dummy; /* Return the value of the integral. */ ierr = PetscPrintf(PETSC_COMM_WORLD,"ln(2) is %G\n",result);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&xend);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
int main(int argc,char **args) { Mat C; int i,m = 5,rank,size,N,start,end,M; int ierr,idx[4]; PetscScalar Ke[16]; PetscReal h; Vec u,b; KSP ksp; MatNullSpace nullsp; ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr); N = (m+1)*(m+1); /* dimension of matrix */ M = m*m; /* number of elements */ h = 1.0/m; /* mesh width */ ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); /* Create stiffness matrix */ ierr = MatCreate(PETSC_COMM_WORLD,&C);CHKERRQ(ierr); ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,N,N);CHKERRQ(ierr); ierr = MatSetFromOptions(C);CHKERRQ(ierr); ierr = MatSetUp(C);CHKERRQ(ierr); start = rank*(M/size) + ((M%size) < rank ? (M%size) : rank); end = start + M/size + ((M%size) > rank); /* Assemble matrix */ ierr = FormElementStiffness(h*h,Ke); /* element stiffness for Laplacian */ for (i=start; i<end; i++) { /* location of lower left corner of element */ /* node numbers for the four corners of element */ idx[0] = (m+1)*(i/m) + (i % m); idx[1] = idx[0]+1; idx[2] = idx[1] + m + 1; idx[3] = idx[2] - 1; ierr = MatSetValues(C,4,idx,4,idx,Ke,ADD_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* Create right-hand-side and solution vectors */ ierr = VecCreate(PETSC_COMM_WORLD,&u);CHKERRQ(ierr); ierr = VecSetSizes(u,PETSC_DECIDE,N);CHKERRQ(ierr); ierr = VecSetFromOptions(u);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)u,"Approx. Solution");CHKERRQ(ierr); ierr = VecDuplicate(u,&b);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)b,"Right hand side");CHKERRQ(ierr); ierr = VecSet(b,1.0);CHKERRQ(ierr); ierr = VecSetValue(b,0,1.2,ADD_VALUES);CHKERRQ(ierr); ierr = VecSet(u,0.0);CHKERRQ(ierr); /* Solve linear system */ ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); ierr = KSPSetOperators(ksp,C,C);CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); ierr = KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);CHKERRQ(ierr); ierr = MatNullSpaceCreate(PETSC_COMM_WORLD,PETSC_TRUE,0,NULL,&nullsp);CHKERRQ(ierr); /* The KSP solver will remove this nullspace from the solution at each iteration */ ierr = MatSetNullSpace(C,nullsp);CHKERRQ(ierr); /* The KSP solver will remove from the right hand side any portion in this nullspace, thus making the linear system consistent. */ ierr = MatSetTransposeNullSpace(C,nullsp);CHKERRQ(ierr); ierr = MatNullSpaceDestroy(&nullsp);CHKERRQ(ierr); ierr = KSPSolve(ksp,b,u);CHKERRQ(ierr); /* Free work space */ ierr = KSPDestroy(&ksp);CHKERRQ(ierr); ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
int main(int argc,char **argv) { PetscMPIInt rank; PetscErrorCode ierr; PetscInt M = 10,N = 8,m = PETSC_DECIDE; PetscInt s=2,w=2,n = PETSC_DECIDE,nloc,l,i,j,kk; PetscInt Xs,Xm,Ys,Ym,iloc,*iglobal,*ltog; PetscInt *lx = PETSC_NULL,*ly = PETSC_NULL; PetscBool testorder = PETSC_FALSE,flg; DMDABoundaryType bx = DMDA_BOUNDARY_NONE,by= DMDA_BOUNDARY_NONE; DM da; PetscViewer viewer; Vec local,global; PetscScalar value; DMDAStencilType st = DMDA_STENCIL_BOX; AO ao; ierr = PetscInitialize(&argc,&argv,(char*)0,help);CHKERRQ(ierr); ierr = PetscViewerDrawOpen(PETSC_COMM_WORLD,0,"",300,0,400,400,&viewer);CHKERRQ(ierr); /* Readoptions */ ierr = PetscOptionsGetInt(PETSC_NULL,"-NX",&M,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-NY",&N,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-m",&m,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-n",&n,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-s",&s,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-w",&w,PETSC_NULL);CHKERRQ(ierr); flg = PETSC_FALSE; ierr = PetscOptionsGetBool(PETSC_NULL,"-xperiodic",&flg,PETSC_NULL);CHKERRQ(ierr); if (flg) bx = DMDA_BOUNDARY_PERIODIC; flg = PETSC_FALSE; ierr = PetscOptionsGetBool(PETSC_NULL,"-yperiodic",&flg,PETSC_NULL);CHKERRQ(ierr); if (flg) by = DMDA_BOUNDARY_PERIODIC; flg = PETSC_FALSE; ierr = PetscOptionsGetBool(PETSC_NULL,"-xghosted",&flg,PETSC_NULL);CHKERRQ(ierr); if (flg) bx = DMDA_BOUNDARY_GHOSTED; flg = PETSC_FALSE; ierr = PetscOptionsGetBool(PETSC_NULL,"-yghosted",&flg,PETSC_NULL);CHKERRQ(ierr); if (flg) by = DMDA_BOUNDARY_GHOSTED; flg = PETSC_FALSE; ierr = PetscOptionsGetBool(PETSC_NULL,"-star",&flg,PETSC_NULL);CHKERRQ(ierr); if (flg) st = DMDA_STENCIL_STAR; flg = PETSC_FALSE; ierr = PetscOptionsGetBool(PETSC_NULL,"-box",&flg,PETSC_NULL);CHKERRQ(ierr); if (flg) st = DMDA_STENCIL_BOX; flg = PETSC_FALSE; ierr = PetscOptionsGetBool(PETSC_NULL,"-testorder",&testorder,PETSC_NULL);CHKERRQ(ierr); /* Test putting two nodes in x and y on each processor, exact last processor in x and y gets the rest. */ flg = PETSC_FALSE; ierr = PetscOptionsGetBool(PETSC_NULL,"-distribute",&flg,PETSC_NULL);CHKERRQ(ierr); if (flg) { if (m == PETSC_DECIDE) SETERRQ(PETSC_COMM_WORLD,1,"Must set -m option with -distribute option"); ierr = PetscMalloc(m*sizeof(PetscInt),&lx);CHKERRQ(ierr); for (i=0; i<m-1; i++) { lx[i] = 4;} lx[m-1] = M - 4*(m-1); if (n == PETSC_DECIDE) SETERRQ(PETSC_COMM_WORLD,1,"Must set -n option with -distribute option"); ierr = PetscMalloc(n*sizeof(PetscInt),&ly);CHKERRQ(ierr); for (i=0; i<n-1; i++) { ly[i] = 2;} ly[n-1] = N - 2*(n-1); } /* Create distributed array and get vectors */ ierr = DMDACreate2d(PETSC_COMM_WORLD,bx,by,st,M,N,m,n,w,s,lx,ly,&da);CHKERRQ(ierr); ierr = PetscFree(lx);CHKERRQ(ierr); ierr = PetscFree(ly);CHKERRQ(ierr); ierr = DMView(da,viewer);CHKERRQ(ierr); ierr = DMCreateGlobalVector(da,&global);CHKERRQ(ierr); ierr = DMCreateLocalVector(da,&local);CHKERRQ(ierr); /* Set global vector; send ghost points to local vectors */ value = 1; ierr = VecSet(global,value);CHKERRQ(ierr); ierr = DMGlobalToLocalBegin(da,global,INSERT_VALUES,local);CHKERRQ(ierr); ierr = DMGlobalToLocalEnd(da,global,INSERT_VALUES,local);CHKERRQ(ierr); /* Scale local vectors according to processor rank; pass to global vector */ ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); value = rank; ierr = VecScale(local,value);CHKERRQ(ierr); ierr = DMLocalToGlobalBegin(da,local,INSERT_VALUES,global);CHKERRQ(ierr); ierr = DMLocalToGlobalEnd(da,local,INSERT_VALUES,global);CHKERRQ(ierr); if (!testorder) { /* turn off printing when testing ordering mappings */ ierr = PetscPrintf (PETSC_COMM_WORLD,"\nGlobal Vectors:\n");CHKERRQ(ierr); ierr = VecView(global,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf (PETSC_COMM_WORLD,"\n\n");CHKERRQ(ierr); } /* Send ghost points to local vectors */ ierr = DMGlobalToLocalBegin(da,global,INSERT_VALUES,local);CHKERRQ(ierr); ierr = DMGlobalToLocalEnd(da,global,INSERT_VALUES,local);CHKERRQ(ierr); flg = PETSC_FALSE; ierr = PetscOptionsGetBool(PETSC_NULL,"-local_print",&flg,PETSC_NULL);CHKERRQ(ierr); if (flg) { PetscViewer sviewer; ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"\nLocal Vector: processor %d\n",rank);CHKERRQ(ierr); ierr = PetscViewerGetSingleton(PETSC_VIEWER_STDOUT_WORLD,&sviewer);CHKERRQ(ierr); ierr = VecView(local,sviewer);CHKERRQ(ierr); ierr = PetscViewerRestoreSingleton(PETSC_VIEWER_STDOUT_WORLD,&sviewer);CHKERRQ(ierr); } /* Tests mappings betweeen application/PETSc orderings */ if (testorder) { ierr = DMDAGetGhostCorners(da,&Xs,&Ys,PETSC_NULL,&Xm,&Ym,PETSC_NULL);CHKERRQ(ierr); ierr = DMDAGetGlobalIndices(da,&nloc,<og);CHKERRQ(ierr); ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); ierr = PetscMalloc(nloc*sizeof(PetscInt),&iglobal);CHKERRQ(ierr); /* Set iglobal to be global indices for each processor's local and ghost nodes, using the DMDA ordering of grid points */ kk = 0; for (j=Ys; j<Ys+Ym; j++) { for (i=Xs; i<Xs+Xm; i++) { iloc = w*((j-Ys)*Xm + i-Xs); for (l=0; l<w; l++) { iglobal[kk++] = ltog[iloc+l]; } } } /* Map this to the application ordering (which for DMDAs is just the natural ordering that would be used for 1 processor, numbering most rapidly by x, then y) */ ierr = AOPetscToApplication(ao,nloc,iglobal);CHKERRQ(ierr); /* Then map the application ordering back to the PETSc DMDA ordering */ ierr = AOApplicationToPetsc(ao,nloc,iglobal);CHKERRQ(ierr); /* Verify the mappings */ kk=0; for (j=Ys; j<Ys+Ym; j++) { for (i=Xs; i<Xs+Xm; i++) { iloc = w*((j-Ys)*Xm + i-Xs); for (l=0; l<w; l++) { if (iglobal[kk] != ltog[iloc+l]) { ierr = PetscFPrintf(PETSC_COMM_SELF,stdout,"[%d] Problem with mapping: j=%D, i=%D, l=%D, petsc1=%D, petsc2=%D\n", rank,j,i,l,ltog[iloc+l],iglobal[kk]);} kk++; } } } ierr = PetscFree(iglobal);CHKERRQ(ierr); } /* Free memory */ ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); ierr = VecDestroy(&local);CHKERRQ(ierr); ierr = VecDestroy(&global);CHKERRQ(ierr); ierr = DMDestroy(&da);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
-m # : the size of the vectors\n \ -n # : the numer of indices (with n<=m)\n \ -toFirst # : the starting index of the output vector for strided scatters\n \ -toStep # : the step size into the output vector for strided scatters\n \ -fromFirst # : the starting index of the input vector for strided scatters\n\ -fromStep # : the step size into the input vector for strided scatters\n\n"; int main(int argc, char * argv[]) { Vec X,Y; PetscInt m,n,i,n1,n2; PetscInt toFirst,toStep,fromFirst,fromStep; PetscInt *idx,*idy; PetscBool flg; IS toISStrided,fromISStrided,toISGeneral,fromISGeneral; VecScatter vscatSStoSS,vscatSStoSG,vscatSGtoSS,vscatSGtoSG; ScatterMode mode; InsertMode addv; PetscErrorCode ierr; ierr = PetscInitialize(&argc,&argv,0,help);if (ierr) return ierr; ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,&flg);CHKERRQ(ierr); if (!flg) m = 100; ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,&flg);CHKERRQ(ierr); if (!flg) n = 30; ierr = PetscOptionsGetInt(NULL,NULL,"-toFirst",&toFirst,&flg);CHKERRQ(ierr); if (!flg) toFirst = 3; ierr = PetscOptionsGetInt(NULL,NULL,"-toStep",&toStep,&flg);CHKERRQ(ierr); if (!flg) toStep = 3; ierr = PetscOptionsGetInt(NULL,NULL,"-fromFirst",&fromFirst,&flg);CHKERRQ(ierr); if (!flg) fromFirst = 2; ierr = PetscOptionsGetInt(NULL,NULL,"-fromStep",&fromStep,&flg);CHKERRQ(ierr); if (!flg) fromStep = 2; if (n>m) { ierr = PetscPrintf(PETSC_COMM_WORLD,"The vector sizes are %D. The number of elements being scattered is %D\n",m,n);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Adjust the parameters such that m>=n\n");CHKERRQ(ierr); } else if (toFirst+(n-1)*toStep >=m) { ierr = PetscPrintf(PETSC_COMM_WORLD,"The vector sizes are %D. The number of elements being scattered is %D\n",m,n);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"For the Strided Scatter, toFirst=%D and toStep=%D.\n",toFirst,toStep);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"This produces an index (toFirst+(n-1)*toStep)>=m\n");CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Adjust the parameterrs accordingly with -m, -n, -toFirst, or -toStep\n");CHKERRQ(ierr); } else if (fromFirst+(n-1)*fromStep>=m) { ierr = PetscPrintf(PETSC_COMM_WORLD,"The vector sizes are %D. The number of elements being scattered is %D\n",m,n);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"For the Strided Scatter, fromFirst=%D and fromStep=%D.\n",fromFirst,toStep);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"This produces an index (fromFirst+(n-1)*fromStep)>=m\n");CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Adjust the parameterrs accordingly with -m, -n, -fromFirst, or -fromStep\n");CHKERRQ(ierr); } else { ierr = PetscPrintf(PETSC_COMM_WORLD,"m=%D\tn=%D\tfromFirst=%D\tfromStep=%D\ttoFirst=%D\ttoStep=%D\n",m,n,fromFirst,fromStep,toFirst,toStep);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"fromFirst+(n-1)*fromStep=%D\ttoFirst+(n-1)*toStep=%D\n",fromFirst+(n-1)*fromStep,toFirst+(n-1)*toStep);CHKERRQ(ierr); /* Build the vectors */ ierr = VecCreate(PETSC_COMM_WORLD,&Y);CHKERRQ(ierr); ierr = VecSetSizes(Y,m,PETSC_DECIDE);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&X);CHKERRQ(ierr); ierr = VecSetSizes(X,m,PETSC_DECIDE);CHKERRQ(ierr); ierr = VecSetFromOptions(Y);CHKERRQ(ierr); ierr = VecSetFromOptions(X);CHKERRQ(ierr); ierr = VecSet(X,2.0);CHKERRQ(ierr); ierr = VecSet(Y,1.0);CHKERRQ(ierr); /* Build the strided index sets */ ierr = ISCreate(PETSC_COMM_WORLD,&toISStrided);CHKERRQ(ierr); ierr = ISCreate(PETSC_COMM_WORLD,&fromISStrided);CHKERRQ(ierr); ierr = ISSetType(toISStrided, ISSTRIDE);CHKERRQ(ierr); ierr = ISSetType(fromISStrided, ISSTRIDE);CHKERRQ(ierr); ierr = ISStrideSetStride(fromISStrided,n,fromFirst,fromStep);CHKERRQ(ierr); ierr = ISStrideSetStride(toISStrided,n,toFirst,toStep);CHKERRQ(ierr); /* Build the general index sets */ ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); ierr = PetscMalloc1(n,&idy);CHKERRQ(ierr); for (i=0; i<n; i++) { idx[i] = i % m; idy[i] = (i+m) % m; } n1 = n; n2 = n; ierr = PetscSortRemoveDupsInt(&n1,idx);CHKERRQ(ierr); ierr = PetscSortRemoveDupsInt(&n2,idy);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_WORLD,n1,idx,PETSC_COPY_VALUES,&toISGeneral);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_WORLD,n2,idy,PETSC_COPY_VALUES,&fromISGeneral);CHKERRQ(ierr); /* set the mode and the insert/add parameter */ mode = SCATTER_FORWARD; addv = ADD_VALUES; /* VecScatter : Seq Strided to Seq Strided */ ierr = VecScatterCreate(X,fromISStrided,Y,toISStrided,&vscatSStoSS);CHKERRQ(ierr); ierr = VecScatterBegin(vscatSStoSS,X,Y,addv,mode);CHKERRQ(ierr); ierr = VecScatterEnd(vscatSStoSS,X,Y,addv,mode);CHKERRQ(ierr); ierr = VecScatterDestroy(&vscatSStoSS);CHKERRQ(ierr); /* VecScatter : Seq General to Seq Strided */ ierr = VecScatterCreate(Y,fromISGeneral,X,toISStrided,&vscatSGtoSS);CHKERRQ(ierr); ierr = VecScatterBegin(vscatSGtoSS,Y,X,addv,mode);CHKERRQ(ierr); ierr = VecScatterEnd(vscatSGtoSS,Y,X,addv,mode);CHKERRQ(ierr); ierr = VecScatterDestroy(&vscatSGtoSS);CHKERRQ(ierr); /* VecScatter : Seq General to Seq General */ ierr = VecScatterCreate(X,fromISGeneral,Y,toISGeneral,&vscatSGtoSG);CHKERRQ(ierr); ierr = VecScatterBegin(vscatSGtoSG,X,Y,addv,mode);CHKERRQ(ierr); ierr = VecScatterEnd(vscatSGtoSG,X,Y,addv,mode);CHKERRQ(ierr); ierr = VecScatterDestroy(&vscatSGtoSG);CHKERRQ(ierr); /* VecScatter : Seq Strided to Seq General */ ierr = VecScatterCreate(Y,fromISStrided,X,toISGeneral,&vscatSStoSG);CHKERRQ(ierr); ierr = VecScatterBegin(vscatSStoSG,Y,X,addv,mode);CHKERRQ(ierr); ierr = VecScatterEnd(vscatSStoSG,Y,X,addv,mode);CHKERRQ(ierr); ierr = VecScatterDestroy(&vscatSStoSG);CHKERRQ(ierr); /* view the results */ ierr = VecView(Y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* Cleanup */ ierr = VecDestroy(&X);CHKERRQ(ierr); ierr = VecDestroy(&Y);CHKERRQ(ierr); ierr = ISDestroy(&toISStrided);CHKERRQ(ierr); ierr = ISDestroy(&fromISStrided);CHKERRQ(ierr); ierr = ISDestroy(&toISGeneral);CHKERRQ(ierr); ierr = ISDestroy(&fromISGeneral);CHKERRQ(ierr); ierr = PetscFree(idx);CHKERRQ(ierr); ierr = PetscFree(idy);CHKERRQ(ierr); } ierr = PetscFinalize(); return ierr; }
int main(int argc,char **argv) { Mat A,B,C,D,mat[4]; ST st; Vec v,w; STType type; PetscScalar value[3],sigma; PetscInt n=10,i,Istart,Iend,col[3]; PetscBool FirstBlock=PETSC_FALSE,LastBlock=PETSC_FALSE; PetscErrorCode ierr; SlepcInitialize(&argc,&argv,(char*)0,help); ierr = PetscOptionsGetInt(NULL,"-n",&n,NULL);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"\n1-D Laplacian plus diagonal, n=%D\n\n",n);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Compute the operator matrix for the 1-D Laplacian - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,n,n);CHKERRQ(ierr); ierr = MatSetFromOptions(A);CHKERRQ(ierr); ierr = MatSetUp(A);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&B);CHKERRQ(ierr); ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,n,n);CHKERRQ(ierr); ierr = MatSetFromOptions(B);CHKERRQ(ierr); ierr = MatSetUp(B);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&C);CHKERRQ(ierr); ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,n,n);CHKERRQ(ierr); ierr = MatSetFromOptions(C);CHKERRQ(ierr); ierr = MatSetUp(C);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&D);CHKERRQ(ierr); ierr = MatSetSizes(D,PETSC_DECIDE,PETSC_DECIDE,n,n);CHKERRQ(ierr); ierr = MatSetFromOptions(D);CHKERRQ(ierr); ierr = MatSetUp(D);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); if (Istart==0) FirstBlock=PETSC_TRUE; if (Iend==n) LastBlock=PETSC_TRUE; value[0]=-1.0; value[1]=2.0; value[2]=-1.0; for (i=(FirstBlock? Istart+1: Istart); i<(LastBlock? Iend-1: Iend); i++) { col[0]=i-1; col[1]=i; col[2]=i+1; ierr = MatSetValues(A,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValue(B,i,i,(PetscScalar)i,INSERT_VALUES);CHKERRQ(ierr); } if (LastBlock) { i=n-1; col[0]=n-2; col[1]=n-1; ierr = MatSetValues(A,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValue(B,i,i,(PetscScalar)i,INSERT_VALUES);CHKERRQ(ierr); } if (FirstBlock) { i=0; col[0]=0; col[1]=1; value[0]=2.0; value[1]=-1.0; ierr = MatSetValues(A,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValue(B,i,i,-1.0,INSERT_VALUES);CHKERRQ(ierr); } for (i=Istart;i<Iend;i++) { ierr = MatSetValue(C,i,n-i-1,1.0,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValue(D,i,i,i*.1,INSERT_VALUES);CHKERRQ(ierr); if (i==0) { ierr = MatSetValue(D,0,n-1,1.0,INSERT_VALUES);CHKERRQ(ierr); } if (i==n-1) { ierr = MatSetValue(D,n-1,0,1.0,INSERT_VALUES);CHKERRQ(ierr); } } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyBegin(D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatGetVecs(A,&v,&w);CHKERRQ(ierr); ierr = VecSet(v,1.0);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create the spectral transformation object - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = STCreate(PETSC_COMM_WORLD,&st);CHKERRQ(ierr); mat[0] = A; mat[1] = B; mat[2] = C; mat[3] = D; ierr = STSetOperators(st,4,mat);CHKERRQ(ierr); ierr = STSetFromOptions(st);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Apply the transformed operator for several ST's - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* shift, sigma=0.0 */ ierr = STSetUp(st);CHKERRQ(ierr); ierr = STGetType(st,&type);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"ST type %s\n",type);CHKERRQ(ierr); for (i=0;i<4;i++) { ierr = STMatMult(st,i,v,w); ierr = PetscPrintf(PETSC_COMM_WORLD,"k= %D\n",i);CHKERRQ(ierr); ierr = VecView(w,NULL);CHKERRQ(ierr); } ierr = STMatSolve(st,v,w); ierr = PetscPrintf(PETSC_COMM_WORLD,"solve\n");CHKERRQ(ierr); ierr = VecView(w,NULL);CHKERRQ(ierr); /* shift, sigma=0.1 */ sigma = 0.1; ierr = STSetShift(st,sigma);CHKERRQ(ierr); ierr = STGetShift(st,&sigma);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"With shift=%g\n",(double)PetscRealPart(sigma));CHKERRQ(ierr); for (i=0;i<4;i++) { ierr = STMatMult(st,i,v,w); ierr = PetscPrintf(PETSC_COMM_WORLD,"k= %D\n",i);CHKERRQ(ierr); ierr = VecView(w,NULL);CHKERRQ(ierr); } ierr = STMatSolve(st,v,w); ierr = PetscPrintf(PETSC_COMM_WORLD,"solve\n");CHKERRQ(ierr); ierr = VecView(w,NULL);CHKERRQ(ierr); /* sinvert, sigma=0.1 */ ierr = STPostSolve(st);CHKERRQ(ierr); ierr = STSetType(st,STSINVERT);CHKERRQ(ierr); ierr = STGetType(st,&type);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"ST type %s\n",type);CHKERRQ(ierr); ierr = STGetShift(st,&sigma);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"With shift=%g\n",(double)PetscRealPart(sigma));CHKERRQ(ierr); for (i=0;i<4;i++) { ierr = STMatMult(st,i,v,w); ierr = PetscPrintf(PETSC_COMM_WORLD,"k= %D\n",i);CHKERRQ(ierr); ierr = VecView(w,NULL);CHKERRQ(ierr); } ierr = STMatSolve(st,v,w); ierr = PetscPrintf(PETSC_COMM_WORLD,"solve\n");CHKERRQ(ierr); ierr = VecView(w,NULL);CHKERRQ(ierr); /* sinvert, sigma=-0.5 */ sigma = -0.5; ierr = STSetShift(st,sigma);CHKERRQ(ierr); ierr = STGetShift(st,&sigma);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"With shift=%g\n",(double)PetscRealPart(sigma));CHKERRQ(ierr); for (i=0;i<4;i++) { ierr = STMatMult(st,i,v,w); ierr = PetscPrintf(PETSC_COMM_WORLD,"k= %D\n",i);CHKERRQ(ierr); ierr = VecView(w,NULL);CHKERRQ(ierr); } ierr = STMatSolve(st,v,w); ierr = PetscPrintf(PETSC_COMM_WORLD,"solve\n");CHKERRQ(ierr); ierr = VecView(w,NULL);CHKERRQ(ierr); ierr = STDestroy(&st);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&B);CHKERRQ(ierr); ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = MatDestroy(&D);CHKERRQ(ierr); ierr = VecDestroy(&v);CHKERRQ(ierr); ierr = VecDestroy(&w);CHKERRQ(ierr); ierr = SlepcFinalize(); return 0; }
//STARTBDRYRESIDUALS PetscErrorCode FormFunction(SNES snes, Vec u, Vec F, void *ctx) { PetscErrorCode ierr; unfemCtx *user = (unfemCtx*)ctx; const int *abfn, *ae, *as, *abfs, *en, deg = user->quaddeg - 1; const Node *aloc; const double *au; double *aF, unode[3], gradu[2], gradpsi[3][2], uquad[4], aquad[4], fquad[4], dx, dy, dx1, dx2, dy1, dy2, detJ, ls, xmid, ymid, sint, xx, yy, sum; int n, p, na, nb, k, l, q; PetscLogStagePush(user->resstage); //STRIP ierr = VecGetArrayRead(u,&au); CHKERRQ(ierr); ierr = VecSet(F,0.0); CHKERRQ(ierr); ierr = VecGetArray(F,&aF); CHKERRQ(ierr); ierr = UMGetNodeCoordArrayRead(user->mesh,&aloc); CHKERRQ(ierr); // Dirichlet node residuals ierr = ISGetIndices(user->mesh->bfn,&abfn); CHKERRQ(ierr); for (n = 0; n < user->mesh->N; n++) { if (abfn[n] == 2) // node is Dirichlet aF[n] = au[n] - user->gD_fcn(aloc[n].x,aloc[n].y); } // Neumann segment contributions ierr = ISGetIndices(user->mesh->s,&as); CHKERRQ(ierr); ierr = ISGetIndices(user->mesh->bfs,&abfs); CHKERRQ(ierr); for (p = 0; p < user->mesh->P; p++) { if (abfs[p] == 1) { // segment is Neumann na = as[2*p+0]; // nodes at end of segment nb = as[2*p+1]; // length of segment dx = aloc[na].x-aloc[nb].x; dy = aloc[na].y-aloc[nb].y; ls = sqrt(dx * dx + dy * dy); // midpoint rule; psi_na=psi_nb=0.5 at midpoint of segment xmid = 0.5*(aloc[na].x+aloc[nb].x); ymid = 0.5*(aloc[na].y+aloc[nb].y); sint = 0.5 * user->gN_fcn(xmid,ymid) * ls; // nodes at end of segment could be Dirichlet if (abfn[na] != 2) aF[na] -= sint; if (abfn[nb] != 2) aF[nb] -= sint; } } ierr = ISRestoreIndices(user->mesh->s,&as); CHKERRQ(ierr); ierr = ISRestoreIndices(user->mesh->bfs,&abfs); CHKERRQ(ierr); //ENDBDRYRESIDUALS //STARTELEMENTRESIDUALS // element contributions ierr = ISGetIndices(user->mesh->e,&ae); CHKERRQ(ierr); for (k = 0; k < user->mesh->K; k++) { en = ae + 3*k; // en[0], en[1], en[2] are nodes of element k // geometry of element dx1 = aloc[en[1]].x - aloc[en[0]].x; dx2 = aloc[en[2]].x - aloc[en[0]].x; dy1 = aloc[en[1]].y - aloc[en[0]].y; dy2 = aloc[en[2]].y - aloc[en[0]].y; detJ = dx1 * dy2 - dx2 * dy1; // gradients of hat functions for (l = 0; l < 3; l++) { gradpsi[l][0] = ( dy2 * dchi[l][0] - dy1 * dchi[l][1]) / detJ; gradpsi[l][1] = (-dx2 * dchi[l][0] + dx1 * dchi[l][1]) / detJ; } // u and grad u on element gradu[0] = 0.0; gradu[1] = 0.0; for (l = 0; l < 3; l++) { if (abfn[en[l]] == 2) unode[l] = user->gD_fcn(aloc[en[l]].x,aloc[en[l]].y); else unode[l] = au[en[l]]; gradu[0] += unode[l] * gradpsi[l][0]; gradu[1] += unode[l] * gradpsi[l][1]; } // function values at quadrature points on element for (q = 0; q < Q[deg]; q++) { uquad[q] = eval(unode,xi[deg][q],eta[deg][q]); xx = aloc[en[0]].x + dx1 * xi[deg][q] + dx2 * eta[deg][q]; yy = aloc[en[0]].y + dy1 * xi[deg][q] + dy2 * eta[deg][q]; aquad[q] = user->a_fcn(uquad[q],xx,yy); fquad[q] = user->f_fcn(uquad[q],xx,yy); } // residual contribution for each node of element for (l = 0; l < 3; l++) { if (abfn[en[l]] < 2) { // if NOT a Dirichlet node sum = 0.0; for (q = 0; q < Q[deg]; q++) sum += w[deg][q] * ( aquad[q] * InnerProd(gradu,gradpsi[l]) - fquad[q] * chi(l,xi[deg][q],eta[deg][q]) ); aF[en[l]] += fabs(detJ) * sum; } } } ierr = ISRestoreIndices(user->mesh->e,&ae); CHKERRQ(ierr); ierr = ISRestoreIndices(user->mesh->bfn,&abfn); CHKERRQ(ierr); ierr = UMRestoreNodeCoordArrayRead(user->mesh,&aloc); CHKERRQ(ierr); ierr = VecRestoreArrayRead(u,&au); CHKERRQ(ierr); ierr = VecRestoreArray(F,&aF); CHKERRQ(ierr); PetscLogStagePop(); //STRIP return 0; }
int main(int argc,char **args) { const ptrdiff_t N0=2056,N1=2056; fftw_plan bplan,fplan; fftw_complex *out; double *in1,*in2; ptrdiff_t alloc_local,local_n0,local_0_start; ptrdiff_t local_n1,local_1_start; PetscInt i,j; PetscMPIInt size,rank; int n,N,N_factor,NM; PetscScalar one=2.0,zero=0.5; PetscScalar two=4.0,three=8.0,four=16.0; PetscScalar a,*x_arr,*y_arr,*z_arr,enorm; Vec fin,fout,fout1; Vec ini,final; PetscRandom rnd; PetscErrorCode ierr; PetscInt *indx3,tempindx,low,*indx4,tempindx1; ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rnd);CHKERRQ(ierr); alloc_local = fftw_mpi_local_size_2d_transposed(N0,N1/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); #if defined(DEBUGGING) printf("The value alloc_local is %ld from process %d\n",alloc_local,rank); printf("The value local_n0 is %ld from process %d\n",local_n0,rank); printf("The value local_0_start is %ld from process %d\n",local_0_start,rank); /* printf("The value local_n1 is %ld from process %d\n",local_n1,rank); */ /* printf("The value local_1_start is %ld from process %d\n",local_1_start,rank); */ /* printf("The value local_n0 is %ld from process %d\n",local_n0,rank); */ #endif /* Allocate space for input and output arrays */ in1=(double*)fftw_malloc(sizeof(double)*alloc_local*2); in2=(double*)fftw_malloc(sizeof(double)*alloc_local*2); out=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); N = 2*N0*(N1/2+1); N_factor = N0*N1; n = 2*local_n0*(N1/2+1); /* printf("The value N is %d from process %d\n",N,rank); */ /* printf("The value n is %d from process %d\n",n,rank); */ /* printf("The value n1 is %d from process %d\n",n1,rank);*/ /* Creating data vector and accompanying array with VeccreateMPIWithArray */ ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)in1,&fin);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)out,&fout);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)in2,&fout1);CHKERRQ(ierr); /* Set the vector with random data */ ierr = VecSet(fin,zero);CHKERRQ(ierr); /* for (i=0;i<N0*N1;i++) */ /* { */ /* VecSetValues(fin,1,&i,&one,INSERT_VALUES); */ /* } */ /* VecSet(fin,one); */ i =0; ierr = VecSetValues(fin,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr); i =1; ierr = VecSetValues(fin,1,&i,&two,INSERT_VALUES);CHKERRQ(ierr); i =4; ierr = VecSetValues(fin,1,&i,&three,INSERT_VALUES);CHKERRQ(ierr); i =5; ierr = VecSetValues(fin,1,&i,&four,INSERT_VALUES);CHKERRQ(ierr); ierr = VecAssemblyBegin(fin);CHKERRQ(ierr); ierr = VecAssemblyEnd(fin);CHKERRQ(ierr); ierr = VecSet(fout,zero);CHKERRQ(ierr); ierr = VecSet(fout1,zero);CHKERRQ(ierr); /* Get the meaningful portion of array */ ierr = VecGetArray(fin,&x_arr);CHKERRQ(ierr); ierr = VecGetArray(fout1,&z_arr);CHKERRQ(ierr); ierr = VecGetArray(fout,&y_arr);CHKERRQ(ierr); fplan=fftw_mpi_plan_dft_r2c_2d(N0,N1,(double*)x_arr,(fftw_complex*)y_arr,PETSC_COMM_WORLD,FFTW_ESTIMATE); bplan=fftw_mpi_plan_dft_c2r_2d(N0,N1,(fftw_complex*)y_arr,(double*)z_arr,PETSC_COMM_WORLD,FFTW_ESTIMATE); fftw_execute(fplan); fftw_execute(bplan); ierr = VecRestoreArray(fin,&x_arr); ierr = VecRestoreArray(fout1,&z_arr); ierr = VecRestoreArray(fout,&y_arr); /* VecView(fin,PETSC_VIEWER_STDOUT_WORLD); */ ierr = VecCreate(PETSC_COMM_WORLD,&ini);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&final);CHKERRQ(ierr); ierr = VecSetSizes(ini,local_n0*N1,N0*N1);CHKERRQ(ierr); ierr = VecSetSizes(final,local_n0*N1,N0*N1);CHKERRQ(ierr); ierr = VecSetFromOptions(ini);CHKERRQ(ierr); ierr = VecSetFromOptions(final);CHKERRQ(ierr); if (N1%2==0) { NM = N1+2; } else { NM = N1+1; } /*printf("The Value of NM is %d",NM); */ ierr = VecGetOwnershipRange(fin,&low,NULL); /*printf("The local index is %d from %d\n",low,rank); */ ierr = PetscMalloc1(local_n0*N1,&indx3); ierr = PetscMalloc1(local_n0*N1,&indx4); for (i=0;i<local_n0;i++) { for (j=0;j<N1;j++) { tempindx = i*N1 + j; tempindx1 = i*NM + j; indx3[tempindx]=local_0_start*N1+tempindx; indx4[tempindx]=low+tempindx1; /* printf("index3 %d from proc %d is \n",indx3[tempindx],rank); */ /* printf("index4 %d from proc %d is \n",indx4[tempindx],rank); */ } } ierr = PetscMalloc2(local_n0*N1,&x_arr,local_n0*N1,&y_arr);CHKERRQ(ierr); /* arr must be allocated for VecGetValues() */ ierr = VecGetValues(fin,local_n0*N1,indx4,(PetscScalar*)x_arr);CHKERRQ(ierr); ierr = VecSetValues(ini,local_n0*N1,indx3,x_arr,INSERT_VALUES);CHKERRQ(ierr); ierr = VecAssemblyBegin(ini);CHKERRQ(ierr); ierr = VecAssemblyEnd(ini);CHKERRQ(ierr); ierr = VecGetValues(fout1,local_n0*N1,indx4,y_arr); ierr = VecSetValues(final,local_n0*N1,indx3,y_arr,INSERT_VALUES); ierr = VecAssemblyBegin(final); ierr = VecAssemblyEnd(final); ierr = PetscFree2(x_arr,y_arr);CHKERRQ(ierr); /* VecScatter vecscat; IS indx1,indx2; for (i=0;i<N0;i++) { indx = i*NM; ISCreateStride(PETSC_COMM_WORLD,N1,indx,1,&indx1); indx = i*N1; ISCreateStride(PETSC_COMM_WORLD,N1,indx,1,&indx2); VecScatterCreate(fin,indx1,ini,indx2,&vecscat); VecScatterBegin(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); VecScatterEnd(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); VecScatterBegin(vecscat,fout1,final,INSERT_VALUES,SCATTER_FORWARD); VecScatterEnd(vecscat,fout1,final,INSERT_VALUES,SCATTER_FORWARD); } */ a = 1.0/(PetscReal)N_factor; ierr = VecScale(fout1,a);CHKERRQ(ierr); ierr = VecScale(final,a);CHKERRQ(ierr); /* VecView(ini,PETSC_VIEWER_STDOUT_WORLD); */ /* VecView(final,PETSC_VIEWER_STDOUT_WORLD); */ ierr = VecAXPY(final,-1.0,ini);CHKERRQ(ierr); ierr = VecNorm(final,NORM_1,&enorm);CHKERRQ(ierr); if (enorm > 1.e-10) { ierr = PetscPrintf(PETSC_COMM_WORLD," Error norm of |x - z| = %e\n",enorm);CHKERRQ(ierr); } /* Execute fftw with function fftw_execute and destory it after execution */ fftw_destroy_plan(fplan); fftw_destroy_plan(bplan); fftw_free(in1); ierr = VecDestroy(&fin);CHKERRQ(ierr); fftw_free(out); ierr = VecDestroy(&fout);CHKERRQ(ierr); fftw_free(in2); ierr = VecDestroy(&fout1);CHKERRQ(ierr); ierr = VecDestroy(&ini);CHKERRQ(ierr); ierr = VecDestroy(&final);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rnd);CHKERRQ(ierr); ierr = PetscFree(indx3);CHKERRQ(ierr); ierr = PetscFree(indx4);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
PetscInt main(PetscInt argc,char **args) { typedef enum {RANDOM, CONSTANT, TANH, NUM_FUNCS} FuncType; const char *funcNames[NUM_FUNCS] = {"random", "constant", "tanh"}; Mat A, AA; PetscMPIInt size; PetscInt N,i, stencil=1,dof=3; PetscInt dim[3] = {10,10,10}, ndim = 3; Vec coords,x,y,z,xx, yy, zz; Vec xxsplit[DOF], yysplit[DOF], zzsplit[DOF]; PetscReal h[3]; PetscScalar s; PetscRandom rdm; PetscReal norm, enorm; PetscInt func; FuncType function = TANH; DM da, da1, coordsda; PetscBool view_x = PETSC_FALSE, view_y = PETSC_FALSE, view_z = PETSC_FALSE; PetscErrorCode ierr; ierr = PetscInitialize(&argc,&args,(char *)0,help);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires complex numbers"); #endif ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This is a uniprocessor example only!"); ierr = PetscOptionsBegin(PETSC_COMM_WORLD, PETSC_NULL, "USFFT Options", "ex27");CHKERRQ(ierr); ierr = PetscOptionsEList("-function", "Function type", "ex27", funcNames, NUM_FUNCS, funcNames[function], &func, PETSC_NULL);CHKERRQ(ierr); function = (FuncType) func; ierr = PetscOptionsEnd();CHKERRQ(ierr); ierr = PetscOptionsGetBool(PETSC_NULL,"-view_x",&view_x,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(PETSC_NULL,"-view_y",&view_y,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(PETSC_NULL,"-view_z",&view_z,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetIntArray(PETSC_NULL,"-dim",dim,&ndim,PETSC_NULL);CHKERRQ(ierr); // DMDA with the correct fiber dimension ierr = DMDACreate3d(PETSC_COMM_SELF,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_STENCIL_STAR, dim[0], dim[1], dim[2], PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, dof, stencil, PETSC_NULL, PETSC_NULL, PETSC_NULL, &da); CHKERRQ(ierr); // DMDA with fiber dimension 1 for split fields ierr = DMDACreate3d(PETSC_COMM_SELF,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_STENCIL_STAR, dim[0], dim[1], dim[2], PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, 1, stencil, PETSC_NULL, PETSC_NULL, PETSC_NULL, &da1); CHKERRQ(ierr); // Coordinates ierr = DMDAGetCoordinateDA(da, &coordsda); ierr = DMGetGlobalVector(coordsda, &coords);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) coords, "Grid coordinates");CHKERRQ(ierr); for(i = 0, N = 1; i < 3; i++) { h[i] = 1.0/dim[i]; PetscScalar *a; ierr = VecGetArray(coords, &a);CHKERRQ(ierr); PetscInt j,k,n = 0; for(i = 0; i < 3; ++i) { for(j = 0; j < dim[i]; ++j){ for(k = 0; k < 3; ++k) { a[n] = j*h[i]; // coordinate along the j-th point in the i-th dimension ++n; } } } ierr = VecRestoreArray(coords, &a);CHKERRQ(ierr); } ierr = DMDASetCoordinates(da, coords);CHKERRQ(ierr); ierr = VecDestroy(&coords);CHKERRQ(ierr); // Work vectors ierr = DMGetGlobalVector(da, &x);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) x, "Real space vector");CHKERRQ(ierr); ierr = DMGetGlobalVector(da, &xx);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) xx, "Real space vector");CHKERRQ(ierr); ierr = DMGetGlobalVector(da, &y);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) y, "USFFT frequency space vector");CHKERRQ(ierr); ierr = DMGetGlobalVector(da, &yy);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) yy, "FFTW frequency space vector");CHKERRQ(ierr); ierr = DMGetGlobalVector(da, &z);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) z, "USFFT reconstructed vector");CHKERRQ(ierr); ierr = DMGetGlobalVector(da, &zz);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) zz, "FFTW reconstructed vector");CHKERRQ(ierr); // Split vectors for FFTW for(int ii = 0; ii < 3; ++ii) { ierr = DMGetGlobalVector(da1, &xxsplit[ii]);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) xxsplit[ii], "Real space split vector");CHKERRQ(ierr); ierr = DMGetGlobalVector(da1, &yysplit[ii]);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) yysplit[ii], "FFTW frequency space split vector");CHKERRQ(ierr); ierr = DMGetGlobalVector(da1, &zzsplit[ii]);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) zzsplit[ii], "FFTW reconstructed split vector");CHKERRQ(ierr); } ierr = PetscPrintf(PETSC_COMM_SELF, "%3-D: USFFT on vector of ");CHKERRQ(ierr); for(i = 0, N = 1; i < 3; i++) { ierr = PetscPrintf(PETSC_COMM_SELF, "dim[%d] = %d ",i,dim[i]);CHKERRQ(ierr); N *= dim[i]; } ierr = PetscPrintf(PETSC_COMM_SELF, "; total size %d \n",N);CHKERRQ(ierr); if (function == RANDOM) { ierr = PetscRandomCreate(PETSC_COMM_SELF, &rdm);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr); ierr = VecSetRandom(x, rdm);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr); } else if (function == CONSTANT) { ierr = VecSet(x, 1.0);CHKERRQ(ierr); } else if (function == TANH) { PetscScalar *a; ierr = VecGetArray(x, &a);CHKERRQ(ierr); PetscInt j,k = 0; for(i = 0; i < 3; ++i) { for(j = 0; j < dim[i]; ++j) { a[k] = tanh((j - dim[i]/2.0)*(10.0/dim[i])); ++k; } } ierr = VecRestoreArray(x, &a);CHKERRQ(ierr); } if(view_x) { ierr = VecView(x, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } ierr = VecCopy(x,xx);CHKERRQ(ierr); // Split xx ierr = VecStrideGatherAll(xx,xxsplit, INSERT_VALUES);CHKERRQ(ierr); //YES! 'Gather' means 'split' (or maybe 'scatter'?)! ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, "|x|_2 = %g\n",norm);CHKERRQ(ierr); /* create USFFT object */ ierr = MatCreateSeqUSFFT(da,da,&A);CHKERRQ(ierr); /* create FFTW object */ ierr = MatCreateSeqFFTW(PETSC_COMM_SELF,3,dim,&AA);CHKERRQ(ierr); /* apply USFFT and FFTW FORWARD "preemptively", so the fftw_plans can be reused on different vectors */ ierr = MatMult(A,x,z);CHKERRQ(ierr); for(int ii = 0; ii < 3; ++ii) { ierr = MatMult(AA,xxsplit[ii],zzsplit[ii]);CHKERRQ(ierr); } // Now apply USFFT and FFTW forward several (3) times for (i=0; i<3; ++i){ ierr = MatMult(A,x,y);CHKERRQ(ierr); for(int ii = 0; ii < 3; ++ii) { ierr = MatMult(AA,xxsplit[ii],yysplit[ii]);CHKERRQ(ierr); } ierr = MatMultTranspose(A,y,z);CHKERRQ(ierr); for(int ii = 0; ii < 3; ++ii) { ierr = MatMult(AA,yysplit[ii],zzsplit[ii]);CHKERRQ(ierr); } } // Unsplit yy ierr = VecStrideScatterAll(yysplit, yy, INSERT_VALUES);CHKERRQ(ierr); //YES! 'Scatter' means 'collect' (or maybe 'gather'?)! // Unsplit zz ierr = VecStrideScatterAll(zzsplit, zz, INSERT_VALUES);CHKERRQ(ierr); //YES! 'Scatter' means 'collect' (or maybe 'gather'?)! if(view_y) { ierr = PetscPrintf(PETSC_COMM_WORLD, "y = \n");CHKERRQ(ierr); ierr = VecView(y, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "yy = \n");CHKERRQ(ierr); ierr = VecView(yy, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } if(view_z) { ierr = PetscPrintf(PETSC_COMM_WORLD, "z = \n");CHKERRQ(ierr); ierr = VecView(z, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "zz = \n");CHKERRQ(ierr); ierr = VecView(zz, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* compare x and z. USFFT computes an unnormalized DFT, thus z = N*x */ s = 1.0/(PetscReal)N; ierr = VecScale(z,s);CHKERRQ(ierr); ierr = VecAXPY(x,-1.0,z);CHKERRQ(ierr); ierr = VecNorm(x,NORM_1,&enorm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, "|x-z| = %g\n",enorm);CHKERRQ(ierr); /* compare xx and zz. FFTW computes an unnormalized DFT, thus zz = N*x */ s = 1.0/(PetscReal)N; ierr = VecScale(zz,s);CHKERRQ(ierr); ierr = VecAXPY(xx,-1.0,zz);CHKERRQ(ierr); ierr = VecNorm(xx,NORM_1,&enorm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, "|xx-zz| = %g\n",enorm);CHKERRQ(ierr); /* compare y and yy: USFFT and FFTW results*/ ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); ierr = VecAXPY(y,-1.0,yy);CHKERRQ(ierr); ierr = VecNorm(y,NORM_1,&enorm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, "|y|_2 = %g\n",norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, "|y-yy| = %g\n",enorm);CHKERRQ(ierr); /* compare z and zz: USFFT and FFTW results*/ ierr = VecNorm(z,NORM_2,&norm);CHKERRQ(ierr); ierr = VecAXPY(z,-1.0,zz);CHKERRQ(ierr); ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, "|z|_2 = %g\n",norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF, "|z-zz| = %g\n",enorm);CHKERRQ(ierr); /* free spaces */ ierr = DMRestoreGlobalVector(da,&x);CHKERRQ(ierr); ierr = DMRestoreGlobalVector(da,&xx);CHKERRQ(ierr); ierr = DMRestoreGlobalVector(da,&y);CHKERRQ(ierr); ierr = DMRestoreGlobalVector(da,&yy);CHKERRQ(ierr); ierr = DMRestoreGlobalVector(da,&z);CHKERRQ(ierr); ierr = DMRestoreGlobalVector(da,&zz);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
static PetscErrorCode KSPSolve_IBCGS(KSP ksp) { PetscErrorCode ierr; PetscInt i,N; PetscReal rnorm,rnormin = 0.0; #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) /* Because of possible instabilities in the algorithm (as indicated by different residual histories for the same problem on the same number of processes with different runs) we support computing the inner products using Intel's 80 bit arithematic rather than just 64 bit. Thus we copy our double precision values into long doubles (hoping this keeps the 16 extra bits) and tell MPI to do its ALlreduces with MPI_LONG_DOUBLE. Note for developers that does not effect the code. Intel's long double is implemented by storing the 80 bits of extended double precision into a 16 byte space (the rest of the space is ignored) */ long double insums[7],outsums[7]; #else PetscScalar insums[7],outsums[7]; #endif PetscScalar sigman_2, sigman_1, sigman, pin_1, pin, phin_1, phin,tmp1,tmp2; PetscScalar taun_1, taun, rhon, alphan_1, alphan, omegan_1, omegan; const PetscScalar *PETSC_RESTRICT r0, *PETSC_RESTRICT f0, *PETSC_RESTRICT qn, *PETSC_RESTRICT b, *PETSC_RESTRICT un; PetscScalar *PETSC_RESTRICT rn, *PETSC_RESTRICT xn, *PETSC_RESTRICT vn, *PETSC_RESTRICT zn; /* the rest do not have to keep n_1 values */ PetscScalar kappan, thetan, etan, gamman, betan, deltan; const PetscScalar *PETSC_RESTRICT tn; PetscScalar *PETSC_RESTRICT sn; Vec R0,Rn,Xn,F0,Vn,Zn,Qn,Tn,Sn,B,Un; Mat A; PetscFunctionBegin; if (!ksp->vec_rhs->petscnative) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Only coded for PETSc vectors"); #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) /* since 80 bit long doubls do not fill the upper bits, we fill them initially so that valgrind won't detect MPI_Allreduce() with uninitialized data */ ierr = PetscMemzero(insums,sizeof(insums));CHKERRQ(ierr); ierr = PetscMemzero(insums,sizeof(insums));CHKERRQ(ierr); #endif ierr = PCGetOperators(ksp->pc,&A,NULL);CHKERRQ(ierr); ierr = VecGetLocalSize(ksp->vec_sol,&N);CHKERRQ(ierr); Xn = ksp->vec_sol; ierr = VecGetArray(Xn_1,(PetscScalar**)&xn_1);CHKERRQ(ierr); ierr = VecRestoreArray(Xn_1,NULL);CHKERRQ(ierr); B = ksp->vec_rhs; ierr = VecGetArrayRead(B,(const PetscScalar**)&b);CHKERRQ(ierr); ierr = VecRestoreArrayRead(B,NULL);CHKERRQ(ierr); R0 = ksp->work[0]; ierr = VecGetArrayRead(R0,(const PetscScalar**)&r0);CHKERRQ(ierr); ierr = VecRestoreArrayRead(R0,NULL);CHKERRQ(ierr); Rn = ksp->work[1]; ierr = VecGetArray(Rn_1,(PetscScalar**)&rn_1);CHKERRQ(ierr); ierr = VecRestoreArray(Rn_1,NULL);CHKERRQ(ierr); Un = ksp->work[2]; ierr = VecGetArrayRead(Un_1,(const PetscScalar**)&un_1);CHKERRQ(ierr); ierr = VecRestoreArrayRead(Un_1,NULL);CHKERRQ(ierr); F0 = ksp->work[3]; ierr = VecGetArrayRead(F0,(const PetscScalar**)&f0);CHKERRQ(ierr); ierr = VecRestoreArrayRead(F0,NULL);CHKERRQ(ierr); Vn = ksp->work[4]; ierr = VecGetArray(Vn_1,(PetscScalar**)&vn_1);CHKERRQ(ierr); ierr = VecRestoreArray(Vn_1,NULL);CHKERRQ(ierr); Zn = ksp->work[5]; ierr = VecGetArray(Zn_1,(PetscScalar**)&zn_1);CHKERRQ(ierr); ierr = VecRestoreArray(Zn_1,NULL);CHKERRQ(ierr); Qn = ksp->work[6]; ierr = VecGetArrayRead(Qn_1,(const PetscScalar**)&qn_1);CHKERRQ(ierr); ierr = VecRestoreArrayRead(Qn_1,NULL);CHKERRQ(ierr); Tn = ksp->work[7]; ierr = VecGetArrayRead(Tn,(const PetscScalar**)&tn);CHKERRQ(ierr); ierr = VecRestoreArrayRead(Tn,NULL);CHKERRQ(ierr); Sn = ksp->work[8]; ierr = VecGetArrayRead(Sn,(const PetscScalar**)&sn);CHKERRQ(ierr); ierr = VecRestoreArrayRead(Sn,NULL);CHKERRQ(ierr); /* r0 = rn_1 = b - A*xn_1; */ /* ierr = KSP_PCApplyBAorAB(ksp,Xn_1,Rn_1,Tn);CHKERRQ(ierr); ierr = VecAYPX(Rn_1,-1.0,B);CHKERRQ(ierr); */ ierr = KSPInitialResidual(ksp,Xn_1,Tn,Sn,Rn_1,B);CHKERRQ(ierr); ierr = VecNorm(Rn_1,NORM_2,&rnorm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,rnorm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,0,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); ierr = VecCopy(Rn_1,R0);CHKERRQ(ierr); /* un_1 = A*rn_1; */ ierr = KSP_PCApplyBAorAB(ksp,Rn_1,Un_1,Tn);CHKERRQ(ierr); /* f0 = A'*rn_1; */ if (ksp->pc_side == PC_RIGHT) { /* B' A' */ ierr = KSP_MatMultTranspose(ksp,A,R0,Tn);CHKERRQ(ierr); ierr = KSP_PCApplyTranspose(ksp,Tn,F0);CHKERRQ(ierr); } else if (ksp->pc_side == PC_LEFT) { /* A' B' */ ierr = KSP_PCApplyTranspose(ksp,R0,Tn);CHKERRQ(ierr); ierr = KSP_MatMultTranspose(ksp,A,Tn,F0);CHKERRQ(ierr); } /*qn_1 = vn_1 = zn_1 = 0.0; */ ierr = VecSet(Qn_1,0.0);CHKERRQ(ierr); ierr = VecSet(Vn_1,0.0);CHKERRQ(ierr); ierr = VecSet(Zn_1,0.0);CHKERRQ(ierr); sigman_2 = pin_1 = taun_1 = 0.0; /* the paper says phin_1 should be initialized to zero, it is actually R0'R0 */ ierr = VecDot(R0,R0,&phin_1);CHKERRQ(ierr); /* sigman_1 = rn_1'un_1 */ ierr = VecDot(R0,Un_1,&sigman_1);CHKERRQ(ierr); alphan_1 = omegan_1 = 1.0; for (ksp->its = 1; ksp->its<ksp->max_it+1; ksp->its++) { rhon = phin_1 - omegan_1*sigman_2 + omegan_1*alphan_1*pin_1; if (ksp->its == 1) deltan = rhon; else deltan = rhon/taun_1; betan = deltan/omegan_1; taun = sigman_1 + betan*taun_1 - deltan*pin_1; if (taun == 0.0) { if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSPSolve has not converged due to taun is zero, iteration %D",ksp->its); else { ksp->reason = KSP_DIVERGED_NANORINF; PetscFunctionReturn(0); } } alphan = rhon/taun; ierr = PetscLogFlops(15.0);CHKERRQ(ierr); /* zn = alphan*rn_1 + (alphan/alphan_1)betan*zn_1 - alphan*deltan*vn_1 vn = un_1 + betan*vn_1 - deltan*qn_1 sn = rn_1 - alphan*vn The algorithm in the paper is missing the alphan/alphan_1 term in the zn update */ ierr = PetscLogEventBegin(VEC_Ops,0,0,0,0);CHKERRQ(ierr); tmp1 = (alphan/alphan_1)*betan; tmp2 = alphan*deltan; for (i=0; i<N; i++) { zn[i] = alphan*rn_1[i] + tmp1*zn_1[i] - tmp2*vn_1[i]; vn[i] = un_1[i] + betan*vn_1[i] - deltan*qn_1[i]; sn[i] = rn_1[i] - alphan*vn[i]; } ierr = PetscLogFlops(3.0+11.0*N);CHKERRQ(ierr); ierr = PetscLogEventEnd(VEC_Ops,0,0,0,0);CHKERRQ(ierr); /* qn = A*vn */ ierr = KSP_PCApplyBAorAB(ksp,Vn,Qn,Tn);CHKERRQ(ierr); /* tn = un_1 - alphan*qn */ ierr = VecWAXPY(Tn,-alphan,Qn,Un_1);CHKERRQ(ierr); /* phin = r0'sn pin = r0'qn gamman = f0'sn etan = f0'tn thetan = sn'tn kappan = tn'tn */ ierr = PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); phin = pin = gamman = etan = thetan = kappan = 0.0; for (i=0; i<N; i++) { phin += r0[i]*sn[i]; pin += r0[i]*qn[i]; gamman += f0[i]*sn[i]; etan += f0[i]*tn[i]; thetan += sn[i]*tn[i]; kappan += tn[i]*tn[i]; } ierr = PetscLogFlops(12.0*N);CHKERRQ(ierr); ierr = PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); insums[0] = phin; insums[1] = pin; insums[2] = gamman; insums[3] = etan; insums[4] = thetan; insums[5] = kappan; insums[6] = rnormin; ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,PetscObjectComm((PetscObject)ksp));CHKERRQ(ierr); #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) if (ksp->lagnorm && ksp->its > 1) { ierr = MPIU_Allreduce(insums,outsums,7,MPI_LONG_DOUBLE,MPI_SUM,PetscObjectComm((PetscObject)ksp));CHKERRQ(ierr); } else { ierr = MPIU_Allreduce(insums,outsums,6,MPI_LONG_DOUBLE,MPI_SUM,PetscObjectComm((PetscObject)ksp));CHKERRQ(ierr); } #else if (ksp->lagnorm && ksp->its > 1) { ierr = MPIU_Allreduce(insums,outsums,7,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)ksp));CHKERRQ(ierr); } else { ierr = MPIU_Allreduce(insums,outsums,6,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)ksp));CHKERRQ(ierr); } #endif ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,PetscObjectComm((PetscObject)ksp));CHKERRQ(ierr); phin = outsums[0]; pin = outsums[1]; gamman = outsums[2]; etan = outsums[3]; thetan = outsums[4]; kappan = outsums[5]; if (ksp->lagnorm && ksp->its > 1) rnorm = PetscSqrtReal(PetscRealPart(outsums[6])); if (kappan == 0.0) { if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSPSolve has not converged due to kappan is zero, iteration %D",ksp->its); else { ksp->reason = KSP_DIVERGED_NANORINF; PetscFunctionReturn(0); } } if (thetan == 0.0) { if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSPSolve has not converged due to thetan is zero, iteration %D",ksp->its); else { ksp->reason = KSP_DIVERGED_NANORINF; PetscFunctionReturn(0); } } omegan = thetan/kappan; sigman = gamman - omegan*etan; /* rn = sn - omegan*tn xn = xn_1 + zn + omegan*sn */ ierr = PetscLogEventBegin(VEC_Ops,0,0,0,0);CHKERRQ(ierr); rnormin = 0.0; for (i=0; i<N; i++) { rn[i] = sn[i] - omegan*tn[i]; rnormin += PetscRealPart(PetscConj(rn[i])*rn[i]); xn[i] += zn[i] + omegan*sn[i]; } ierr = PetscObjectStateIncrease((PetscObject)Xn);CHKERRQ(ierr); ierr = PetscLogFlops(7.0*N);CHKERRQ(ierr); ierr = PetscLogEventEnd(VEC_Ops,0,0,0,0);CHKERRQ(ierr); if (!ksp->lagnorm && ksp->chknorm < ksp->its) { ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,PetscObjectComm((PetscObject)ksp));CHKERRQ(ierr); ierr = MPIU_Allreduce(&rnormin,&rnorm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)ksp));CHKERRQ(ierr); ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,PetscObjectComm((PetscObject)ksp));CHKERRQ(ierr); rnorm = PetscSqrtReal(rnorm); } /* Test for convergence */ ierr = KSPMonitor(ksp,ksp->its,rnorm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,ksp->its,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; /* un = A*rn */ ierr = KSP_PCApplyBAorAB(ksp,Rn,Un,Tn);CHKERRQ(ierr); /* Update n-1 locations with n locations */ sigman_2 = sigman_1; sigman_1 = sigman; pin_1 = pin; phin_1 = phin; alphan_1 = alphan; taun_1 = taun; omegan_1 = omegan; } if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; ierr = KSPUnwindPreconditioner(ksp,Xn,Tn);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode FormGradient(SNES snes, Vec X, Vec G,void *ctx) { AppCtx *user=(AppCtx*)ctx; PetscErrorCode info; PetscInt i,j,k,kk; PetscInt row[5],col[5]; PetscInt nx,ny,xs,xm,ys,ym; PetscReal one=1.0, two=2.0, six=6.0,pi=4.0*atan(1.0); PetscReal hx,hy,hxhy,hxhx,hyhy; PetscReal xi,v[5]; PetscReal ecc=user->ecc, trule1,trule2,trule3,trule4,trule5,trule6; PetscReal vmiddle, vup, vdown, vleft, vright; PetscReal tt; PetscReal **x,**g; PetscReal zero=0.0; Vec localX; PetscFunctionBeginUser; nx = user->nx; ny = user->ny; hx = two*pi/(nx+1.0); hy = two*user->b/(ny+1.0); hxhy = hx*hy; hxhx = one/(hx*hx); hyhy = one/(hy*hy); info = VecSet(G, zero);CHKERRQ(info); /* Get local vector */ info = DMGetLocalVector(user->da,&localX);CHKERRQ(info); /* Get ghoist points */ info = DMGlobalToLocalBegin(user->da,X,INSERT_VALUES,localX);CHKERRQ(info); info = DMGlobalToLocalEnd(user->da,X,INSERT_VALUES,localX);CHKERRQ(info); /* Get pointer to vector data */ info = DMDAVecGetArray(user->da,localX,&x);CHKERRQ(info); info = DMDAVecGetArray(user->da,G,&g);CHKERRQ(info); info = DMDAGetCorners(user->da,&xs,&ys,NULL,&xm,&ym,NULL);CHKERRQ(info); for (i=xs; i< xs+xm; i++) { xi = (i+1)*hx; trule1 = hxhy*(p(xi,ecc) + p(xi+hx,ecc) + p(xi,ecc)) / six; /* L(i,j) */ trule2 = hxhy*(p(xi,ecc) + p(xi-hx,ecc) + p(xi,ecc)) / six; /* U(i,j) */ trule3 = hxhy*(p(xi,ecc) + p(xi+hx,ecc) + p(xi+hx,ecc)) / six; /* U(i+1,j) */ trule4 = hxhy*(p(xi,ecc) + p(xi-hx,ecc) + p(xi-hx,ecc)) / six; /* L(i-1,j) */ trule5 = trule1; /* L(i,j-1) */ trule6 = trule2; /* U(i,j+1) */ vdown = -(trule5+trule2)*hyhy; vleft = -hxhx*(trule2+trule4); vright = -hxhx*(trule1+trule3); vup = -hyhy*(trule1+trule6); vmiddle = (hxhx)*(trule1+trule2+trule3+trule4)+hyhy*(trule1+trule2+trule5+trule6); for (j=ys; j<ys+ym; j++) { v[0]=0; v[1]=0; v[2]=0; v[3]=0; v[4]=0; k=0; if (j > 0) { v[k]=vdown; row[k] = i; col[k] = j-1; k++; } if (i > 0) { v[k]= vleft; row[k] = i-1; col[k] = j; k++; } v[k]= vmiddle; row[k] = i; col[k] = j; k++; if (i+1 < nx) { v[k]= vright; row[k] = i+1; col[k] = j; k++; } if (j+1 < ny) { v[k]= vup; row[k] = i; col[k] = j+1; k++; } tt=0; for (kk=0; kk<k; kk++) tt+=v[kk]*x[col[kk]][row[kk]]; g[j][i] = tt; } } /* Restore vectors */ info = DMDAVecRestoreArray(user->da,localX, &x);CHKERRQ(info); info = DMDAVecRestoreArray(user->da,G, &g);CHKERRQ(info); info = DMRestoreLocalVector(user->da,&localX);CHKERRQ(info); info = VecAXPY(G, one, user->B);CHKERRQ(info); info = PetscLogFlops((91 + 10*ym) * xm);CHKERRQ(info); PetscFunctionReturn(0); }
int main(int argc,char **args) { Vec x,y,u,s1,s2; Mat A,sA,sB; PetscRandom rctx; PetscReal r1,r2,rnorm,tol=1.e-10; PetscScalar one=1.0, neg_one=-1.0, value[3], four=4.0,alpha=0.1; PetscInt n,col[3],n1,block,row,i,j,i2,j2,Ii,J,rstart,rend,bs=1,mbs=16,d_nz=3,o_nz=3,prob=2; PetscErrorCode ierr; PetscMPIInt size,rank; PetscBool flg; const MatType type; PetscInitialize(&argc,&args,(char *)0,help); ierr = PetscOptionsGetInt(PETSC_NULL,"-mbs",&mbs,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-bs",&bs,PETSC_NULL);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); n = mbs*bs; /* Assemble MPISBAIJ matrix sA */ ierr = MatCreate(PETSC_COMM_WORLD,&sA);CHKERRQ(ierr); ierr = MatSetSizes(sA,PETSC_DECIDE,PETSC_DECIDE,n,n);CHKERRQ(ierr); ierr = MatSetType(sA,MATSBAIJ);CHKERRQ(ierr); ierr = MatSetFromOptions(sA);CHKERRQ(ierr); ierr = MatGetType(sA,&type);CHKERRQ(ierr); /* printf(" mattype: %s\n",type); */ ierr = MatMPISBAIJSetPreallocation(sA,bs,d_nz,PETSC_NULL,o_nz,PETSC_NULL);CHKERRQ(ierr); ierr = MatSeqSBAIJSetPreallocation(sA,bs,d_nz,PETSC_NULL);CHKERRQ(ierr); ierr = MatSetOption(sA,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); if (bs == 1){ if (prob == 1){ /* tridiagonal matrix */ value[0] = -1.0; value[1] = 2.0; value[2] = -1.0; for (i=1; i<n-1; i++) { col[0] = i-1; col[1] = i; col[2] = i+1; ierr = MatSetValues(sA,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); } i = n - 1; col[0]=0; col[1] = n - 2; col[2] = n - 1; value[0]= 0.1; value[1]=-1; value[2]=2; ierr = MatSetValues(sA,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); i = 0; col[0] = 0; col[1] = 1; col[2]=n-1; value[0] = 2.0; value[1] = -1.0; value[2]=0.1; ierr = MatSetValues(sA,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); } else if (prob ==2){ /* matrix for the five point stencil */ n1 = (int) PetscSqrtReal((PetscReal)n); if (n1*n1 != n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n must be a perfect square of n1"); for (i=0; i<n1; i++) { for (j=0; j<n1; j++) { Ii = j + n1*i; if (i>0) {J = Ii - n1; ierr = MatSetValues(sA,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr);} if (i<n1-1) {J = Ii + n1; ierr = MatSetValues(sA,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr);} if (j>0) {J = Ii - 1; ierr = MatSetValues(sA,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr);} if (j<n1-1) {J = Ii + 1; ierr = MatSetValues(sA,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr);} ierr = MatSetValues(sA,1,&Ii,1,&Ii,&four,INSERT_VALUES);CHKERRQ(ierr); } } } } /* end of if (bs == 1) */ else { /* bs > 1 */ for (block=0; block<n/bs; block++){ /* diagonal blocks */ value[0] = -1.0; value[1] = 4.0; value[2] = -1.0; for (i=1+block*bs; i<bs-1+block*bs; i++) { col[0] = i-1; col[1] = i; col[2] = i+1; ierr = MatSetValues(sA,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); } i = bs - 1+block*bs; col[0] = bs - 2+block*bs; col[1] = bs - 1+block*bs; value[0]=-1.0; value[1]=4.0; ierr = MatSetValues(sA,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); i = 0+block*bs; col[0] = 0+block*bs; col[1] = 1+block*bs; value[0]=4.0; value[1] = -1.0; ierr = MatSetValues(sA,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); } /* off-diagonal blocks */ value[0]=-1.0; for (i=0; i<(n/bs-1)*bs; i++){ col[0]=i+bs; ierr = MatSetValues(sA,1,&i,1,col,value,INSERT_VALUES);CHKERRQ(ierr); col[0]=i; row=i+bs; ierr = MatSetValues(sA,1,&row,1,col,value,INSERT_VALUES);CHKERRQ(ierr); } } ierr = MatAssemblyBegin(sA,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(sA,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* Test MatView() */ /* ierr = MatView(sA, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = MatView(sA, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); */ /* Assemble MPIBAIJ matrix A */ ierr = MatCreateBAIJ(PETSC_COMM_WORLD,bs,PETSC_DECIDE,PETSC_DECIDE,n,n,d_nz,PETSC_NULL,o_nz,PETSC_NULL,&A);CHKERRQ(ierr); ierr = MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); if (bs == 1){ if (prob == 1){ /* tridiagonal matrix */ value[0] = -1.0; value[1] = 2.0; value[2] = -1.0; for (i=1; i<n-1; i++) { col[0] = i-1; col[1] = i; col[2] = i+1; ierr = MatSetValues(A,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); } i = n - 1; col[0]=0; col[1] = n - 2; col[2] = n - 1; value[0]= 0.1; value[1]=-1; value[2]=2; ierr = MatSetValues(A,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); i = 0; col[0] = 0; col[1] = 1; col[2]=n-1; value[0] = 2.0; value[1] = -1.0; value[2]=0.1; ierr = MatSetValues(A,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); } else if (prob ==2){ /* matrix for the five point stencil */ n1 = (int) PetscSqrtReal((PetscReal)n); for (i=0; i<n1; i++) { for (j=0; j<n1; j++) { Ii = j + n1*i; if (i>0) {J = Ii - n1; ierr = MatSetValues(A,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr);} if (i<n1-1) {J = Ii + n1; ierr = MatSetValues(A,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr);} if (j>0) {J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr);} if (j<n1-1) {J = Ii + 1; ierr = MatSetValues(A,1,&Ii,1,&J,&neg_one,INSERT_VALUES);CHKERRQ(ierr);} ierr = MatSetValues(A,1,&Ii,1,&Ii,&four,INSERT_VALUES);CHKERRQ(ierr); } } } } /* end of if (bs == 1) */ else { /* bs > 1 */ for (block=0; block<n/bs; block++){ /* diagonal blocks */ value[0] = -1.0; value[1] = 4.0; value[2] = -1.0; for (i=1+block*bs; i<bs-1+block*bs; i++) { col[0] = i-1; col[1] = i; col[2] = i+1; ierr = MatSetValues(A,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); } i = bs - 1+block*bs; col[0] = bs - 2+block*bs; col[1] = bs - 1+block*bs; value[0]=-1.0; value[1]=4.0; ierr = MatSetValues(A,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); i = 0+block*bs; col[0] = 0+block*bs; col[1] = 1+block*bs; value[0]=4.0; value[1] = -1.0; ierr = MatSetValues(A,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); } /* off-diagonal blocks */ value[0]=-1.0; for (i=0; i<(n/bs-1)*bs; i++){ col[0]=i+bs; ierr = MatSetValues(A,1,&i,1,col,value,INSERT_VALUES);CHKERRQ(ierr); col[0]=i; row=i+bs; ierr = MatSetValues(A,1,&row,1,col,value,INSERT_VALUES);CHKERRQ(ierr); } } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* Test MatGetSize(), MatGetLocalSize() */ ierr = MatGetSize(sA, &i,&j); ierr = MatGetSize(A, &i2,&j2); i -= i2; j -= j2; if (i || j) { PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], Error: MatGetSize()\n",rank); PetscSynchronizedFlush(PETSC_COMM_WORLD); } ierr = MatGetLocalSize(sA, &i,&j); ierr = MatGetLocalSize(A, &i2,&j2); i2 -= i; j2 -= j; if (i2 || j2) { PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], Error: MatGetLocalSize()\n",rank); PetscSynchronizedFlush(PETSC_COMM_WORLD); } /* vectors */ /*--------------------*/ /* i is obtained from MatGetLocalSize() */ ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,i,PETSC_DECIDE);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); ierr = VecDuplicate(x,&y);CHKERRQ(ierr); ierr = VecDuplicate(x,&u);CHKERRQ(ierr); ierr = VecDuplicate(x,&s1);CHKERRQ(ierr); ierr = VecDuplicate(x,&s2);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rctx);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rctx);CHKERRQ(ierr); ierr = VecSetRandom(x,rctx);CHKERRQ(ierr); ierr = VecSet(u,one);CHKERRQ(ierr); /* Test MatNorm() */ ierr = MatNorm(A,NORM_FROBENIUS,&r1);CHKERRQ(ierr); ierr = MatNorm(sA,NORM_FROBENIUS,&r2);CHKERRQ(ierr); rnorm = PetscAbsScalar(r1-r2)/r2; if (rnorm > tol && !rank){ PetscPrintf(PETSC_COMM_SELF,"Error: MatNorm_FROBENIUS(), Anorm=%16.14e, sAnorm=%16.14e bs=%D\n",r1,r2,bs); } ierr = MatNorm(A,NORM_INFINITY,&r1);CHKERRQ(ierr); ierr = MatNorm(sA,NORM_INFINITY,&r2);CHKERRQ(ierr); rnorm = PetscAbsScalar(r1-r2)/r2; if (rnorm > tol && !rank){ PetscPrintf(PETSC_COMM_WORLD,"Error: MatNorm_INFINITY(), Anorm=%16.14e, sAnorm=%16.14e bs=%D\n",r1,r2,bs); } ierr = MatNorm(A,NORM_1,&r1);CHKERRQ(ierr); ierr = MatNorm(sA,NORM_1,&r2);CHKERRQ(ierr); rnorm = PetscAbsScalar(r1-r2)/r2; if (rnorm > tol && !rank){ PetscPrintf(PETSC_COMM_WORLD,"Error: MatNorm_1(), Anorm=%16.14e, sAnorm=%16.14e bs=%D\n",r1,r2,bs); } /* Test MatGetOwnershipRange() */ ierr = MatGetOwnershipRange(sA,&rstart,&rend);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A,&i2,&j2);CHKERRQ(ierr); i2 -= rstart; j2 -= rend; if (i2 || j2) { PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], Error: MaGetOwnershipRange()\n",rank); PetscSynchronizedFlush(PETSC_COMM_WORLD); } /* Test MatDiagonalScale() */ ierr = MatDiagonalScale(A,x,x);CHKERRQ(ierr); ierr = MatDiagonalScale(sA,x,x);CHKERRQ(ierr); ierr = MatMultEqual(A,sA,10,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMETYPE,"Error in MatDiagonalScale"); /* Test MatGetDiagonal(), MatScale() */ ierr = MatGetDiagonal(A,s1);CHKERRQ(ierr); ierr = MatGetDiagonal(sA,s2);CHKERRQ(ierr); ierr = VecNorm(s1,NORM_1,&r1);CHKERRQ(ierr); ierr = VecNorm(s2,NORM_1,&r2);CHKERRQ(ierr); r1 -= r2; if (r1<-tol || r1>tol) { PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], Error: MatDiagonalScale() or MatGetDiagonal(), r1=%G \n",rank,r1); PetscSynchronizedFlush(PETSC_COMM_WORLD); } ierr = MatScale(A,alpha);CHKERRQ(ierr); ierr = MatScale(sA,alpha);CHKERRQ(ierr); /* Test MatGetRowMaxAbs() */ ierr = MatGetRowMaxAbs(A,s1,PETSC_NULL);CHKERRQ(ierr); ierr = MatGetRowMaxAbs(sA,s2,PETSC_NULL);CHKERRQ(ierr); ierr = VecNorm(s1,NORM_1,&r1);CHKERRQ(ierr); ierr = VecNorm(s2,NORM_1,&r2);CHKERRQ(ierr); r1 -= r2; if (r1<-tol || r1>tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: MatGetRowMaxAbs() \n");CHKERRQ(ierr); } /* Test MatMult(), MatMultAdd() */ ierr = MatMultEqual(A,sA,10,&flg);CHKERRQ(ierr); if (!flg){ PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], Error: MatMult() or MatScale()\n",rank); PetscSynchronizedFlush(PETSC_COMM_WORLD); } ierr = MatMultAddEqual(A,sA,10,&flg);CHKERRQ(ierr); if (!flg){ PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], Error: MatMultAdd()\n",rank); PetscSynchronizedFlush(PETSC_COMM_WORLD); } /* Test MatMultTranspose(), MatMultTransposeAdd() */ for (i=0; i<10; i++) { ierr = VecSetRandom(x,rctx);CHKERRQ(ierr); ierr = MatMultTranspose(A,x,s1);CHKERRQ(ierr); ierr = MatMultTranspose(sA,x,s2);CHKERRQ(ierr); ierr = VecNorm(s1,NORM_1,&r1);CHKERRQ(ierr); ierr = VecNorm(s2,NORM_1,&r2);CHKERRQ(ierr); r1 -= r2; if (r1<-tol || r1>tol) { PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], Error: MatMult() or MatScale(), err=%G\n",rank,r1); PetscSynchronizedFlush(PETSC_COMM_WORLD); } } for (i=0; i<10; i++) { ierr = VecSetRandom(x,rctx);CHKERRQ(ierr); ierr = VecSetRandom(y,rctx);CHKERRQ(ierr); ierr = MatMultTransposeAdd(A,x,y,s1);CHKERRQ(ierr); ierr = MatMultTransposeAdd(sA,x,y,s2);CHKERRQ(ierr); ierr = VecNorm(s1,NORM_1,&r1);CHKERRQ(ierr); ierr = VecNorm(s2,NORM_1,&r2);CHKERRQ(ierr); r1 -= r2; if (r1<-tol || r1>tol) { PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], Error: MatMultAdd(), err=%G \n",rank,r1); PetscSynchronizedFlush(PETSC_COMM_WORLD); } } /* ierr = MatView(sA, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */ /* ierr = MatView(sA, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); */ /* Test MatDuplicate() */ ierr = MatDuplicate(sA,MAT_COPY_VALUES,&sB);CHKERRQ(ierr); ierr = MatEqual(sA,sB,&flg);CHKERRQ(ierr); if (!flg){ PetscPrintf(PETSC_COMM_WORLD," Error in MatDuplicate(), sA != sB \n");CHKERRQ(ierr); } ierr = MatMultEqual(sA,sB,5,&flg);CHKERRQ(ierr); if (!flg){ PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], Error: MatDuplicate() or MatMult()\n",rank); PetscSynchronizedFlush(PETSC_COMM_WORLD); } ierr = MatMultAddEqual(sA,sB,5,&flg);CHKERRQ(ierr); if (!flg){ PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], Error: MatDuplicate() or MatMultAdd(()\n",rank); PetscSynchronizedFlush(PETSC_COMM_WORLD); } ierr = MatDestroy(&sB);CHKERRQ(ierr); ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = VecDestroy(&s1);CHKERRQ(ierr); ierr = VecDestroy(&s2);CHKERRQ(ierr); ierr = MatDestroy(&sA);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rctx);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
int main(int argc, char **argv) { PetscErrorCode info; /* used to check for functions returning nonzeros */ Vec x; /* variables vector */ Vec xl,xu; /* lower and upper bound on variables */ PetscBool flg; /* A return variable when checking for user options */ SNESConvergedReason reason; AppCtx user; /* user-defined work context */ SNES snes; Vec r; PetscReal zero=0.0,thnd=1000; /* Initialize PETSC */ PetscInitialize(&argc, &argv,(char*)0,help); #if defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This example does not work for scalar type complex\n"); #endif /* Set the default values for the problem parameters */ user.nx = 50; user.ny = 50; user.ecc = 0.1; user.b = 10.0; /* Check for any command line arguments that override defaults */ info = PetscOptionsGetReal(NULL,"-ecc",&user.ecc,&flg);CHKERRQ(info); info = PetscOptionsGetReal(NULL,"-b",&user.b,&flg);CHKERRQ(info); /* A two dimensional distributed array will help define this problem, which derives from an elliptic PDE on two dimensional domain. From the distributed array, Create the vectors. */ info = DMDACreate2d(PETSC_COMM_WORLD, DMDA_BOUNDARY_NONE, DMDA_BOUNDARY_NONE,DMDA_STENCIL_STAR,-50,-50,PETSC_DECIDE,PETSC_DECIDE,1,1,NULL,NULL,&user.da);CHKERRQ(info); info = DMDAGetInfo(user.da,PETSC_IGNORE,&user.nx,&user.ny,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);CHKERRQ(info); PetscPrintf(PETSC_COMM_WORLD,"\n---- Journal Bearing Problem -----\n"); PetscPrintf(PETSC_COMM_WORLD,"mx: %d, my: %d, ecc: %4.3f, b:%3.1f \n", user.nx,user.ny,user.ecc,user.b); /* Extract global and local vectors from DA; the vector user.B is used solely as work space for the evaluation of the function, gradient, and Hessian. Duplicate for remaining vectors that are the same types. */ info = DMCreateGlobalVector(user.da,&x);CHKERRQ(info); /* Solution */ info = VecDuplicate(x,&user.B);CHKERRQ(info); /* Linear objective */ info = VecDuplicate(x,&r);CHKERRQ(info); /* Create matrix user.A to store quadratic, Create a local ordering scheme. */ info = DMCreateMatrix(user.da,MATAIJ,&user.A);CHKERRQ(info); /* User defined function -- compute linear term of quadratic */ info = ComputeB(&user);CHKERRQ(info); /* Create nonlinear solver context */ info = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(info); /* Set function evaluation and Jacobian evaluation routines */ info = SNESSetFunction(snes,r,FormGradient,&user);CHKERRQ(info); info = SNESSetJacobian(snes,user.A,user.A,FormHessian,&user);CHKERRQ(info); /* Set the initial solution guess */ info = VecSet(x, zero);CHKERRQ(info); info = SNESSetFromOptions(snes);CHKERRQ(info); /* Set variable bounds */ info = VecDuplicate(x,&xl);CHKERRQ(info); info = VecDuplicate(x,&xu);CHKERRQ(info); info = VecSet(xl,zero);CHKERRQ(info); info = VecSet(xu,thnd);CHKERRQ(info); info = SNESVISetVariableBounds(snes,xl,xu);CHKERRQ(info); /* Solve the application */ info = SNESSolve(snes,NULL,x);CHKERRQ(info); info = SNESGetConvergedReason(snes,&reason);CHKERRQ(info); if (reason <= 0) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"The SNESVI solver did not converge, adjust some parameters, or check the function evaluation routines\n"); /* Free memory */ info = VecDestroy(&x);CHKERRQ(info); info = VecDestroy(&xl);CHKERRQ(info); info = VecDestroy(&xu);CHKERRQ(info); info = VecDestroy(&r);CHKERRQ(info); info = MatDestroy(&user.A);CHKERRQ(info); info = VecDestroy(&user.B);CHKERRQ(info); info = DMDestroy(&user.da);CHKERRQ(info); info = SNESDestroy(&snes);CHKERRQ(info); info = PetscFinalize(); return 0; }
/* Compute cyclicly eigenvalue */ PetscErrorCode Arnoldi(com_lsa * com, Mat * A, Vec *v){ EPS eps; /* eigensolver context */ char load_path[PETSC_MAX_PATH_LEN],export_path[PETSC_MAX_PATH_LEN]; PetscInt end,first,validated; PetscErrorCode ierr; /* eigenvalues number is set to 100, can be changed if needed we choosed to fix it because mallocs weren't working properly */ PetscScalar eigenvalues[1000], ei, er; PetscReal re,im,vnorm; PetscInt eigen_nb,j,i,size,one=1, taille; Vec initialv,nullv,*vs; PetscBool flag,data_load,data_export,continuous_export,load_any; int exit_type=0, counter = 0, l; int sos_type = 911; Vec vecteur_initial; PetscViewer viewer; PetscBool need_new_init = PETSC_FALSE, exit = PETSC_FALSE; sprintf(load_path,"./arnoldi.bin"); sprintf(export_path,"./arnoldi.bin"); PetscViewerCreate(PETSC_COMM_WORLD,&viewer); // PetscViewerSetType(viewer,PETSCVIEWERBINARY); // if (skippheader) { PetscViewerBinarySetSkipHeader(viewer,PETSC_TRUE); } // PetscViewerFileSetMode(viewer,FILE_MODE_WRITE); // PetscViewerBinarySetUseMPIIO(viewer,PETSC_TRUE); // PetscViewerFileSetName(viewer,"arnoldidbg.txt"); /* create the eigensolver */ ierr=EPSCreate(PETSC_COMM_WORLD,&eps);CHKERRQ(ierr); /* set the matrix operator */ ierr=EPSSetOperators(eps,*A,PETSC_NULL); /* set options */ ierr=EPSSetType(eps,EPSARNOLDI); ierr=EPSSetFromOptions(eps);CHKERRQ(ierr); /* duplicate vector properties */ ierr=VecDuplicate(*v,&initialv);CHKERRQ(ierr); ierr=VecDuplicate(*v,&nullv);CHKERRQ(ierr); ierr=VecSet(nullv,(PetscScalar)0.0);CHKERRQ(ierr); /* ierr=VecSet(initialv,(PetscScalar)1.0);CHKERRQ(ierr);*/ ierr=VecSetRandom(initialv,PETSC_NULL);//initialize initial vector to random ierr=VecGetSize(initialv,&size);CHKERRQ(ierr); ierr=PetscOptionsGetInt(PETSC_NULL,"-ksp_ls_eigen",&eigen_nb,&flag);CHKERRQ(ierr); if(!flag) eigen_nb=EIGEN_ALL; ierr=PetscOptionsGetString(PETSC_NULL,"-ksp_arnoldi_load",load_path,PETSC_MAX_PATH_LEN,&data_load);CHKERRQ(ierr); ierr=PetscOptionsGetString(PETSC_NULL,"-ksp_arnoldi_export",export_path,PETSC_MAX_PATH_LEN,&data_export);CHKERRQ(ierr); ierr=PetscOptionsHasName(PETSC_NULL,"-ksp_arnoldi_load_any",&load_any);CHKERRQ(ierr); ierr=PetscOptionsHasName(PETSC_NULL,"-ksp_arnoldi_cexport",&continuous_export);CHKERRQ(ierr); if(load_any) PetscPrintf(PETSC_COMM_WORLD,"*} Arnoldi loading default data file\n"); PetscPrintf(PETSC_COMM_WORLD,"*} Arnoldi path in= %s out= %s\n",load_path,export_path); PetscPrintf(PETSC_COMM_WORLD,"*} Arnoldi allocating buffer of %d for invariant subspace\n",eigen_nb*2); vs=malloc(size*sizeof(Vec)); for(i=0;i<size;i++){ ierr=VecDuplicate(*v,&vs[i]);CHKERRQ(ierr); } ierr=VecDuplicate(initialv,&vecteur_initial);CHKERRQ(ierr); /* vecteur_initial = malloc(size * sizeof(PetscScalar));*/ // setting_out_vec_sizes( com, v); end=0; first=1; validated=1; while(!end){ /*check if the program need to exit */ if(exit == PETSC_TRUE) break; /* check if we received an exit message from Father*/ if(!mpi_lsa_com_type_recv(com,&exit_type)){ if(exit_type==666){ end=1; PetscPrintf(PETSC_COMM_WORLD,"*} Arnoldi Sending Exit message\n"); mpi_lsa_com_type_send(com,&exit_type); break; } } /* check if we received some data from GMRES */ if(!mpi_lsa_com_vec_recv(com, &initialv)){ VecGetSize(initialv, &taille); /* printf(" ========= %d I RECEIVED %d DATA FROM GMRES ============\n",com->rank_world, taille);*/ /* ierr = VecCopy(vecteur_initial, initialv);*/ } /* */ /* if(!mpi_lsa_com_array_recv(com, &taille, vecteur_initial)){*/ /* // VecGetSize(initialv, &taille);*/ /* printf(" ========= %d I RECEIVED %d DATA FROM GMRES ============\n",com->rank_world, taille);*/ /* for (i = 0; i < taille; i++)*/ /* PetscPrintf(PETSC_COMM_WORLD,"==== > arnoldi %d [%d] = %e\n",com->rank_world, i, vecteur_initial[i]);*/ /* } */ for(j=0;j<eigen_nb;j++){ eigenvalues[j]=(PetscScalar)0.0; } //FIXME: refactoriser les if suivants + flags file read, c'est très très moche if(data_load&&load_any){ load_any=PETSC_FALSE; data_load=PETSC_TRUE; } ierr = VecAssemblyBegin(initialv);CHKERRQ(ierr); ierr = VecAssemblyEnd(initialv);CHKERRQ(ierr); if(!(data_load^=load_any)){ ierr=EPSSetInitialSpace(eps,1,&initialv);CHKERRQ(ierr); } else { /* PetscPrintf(PETSC_COMM_WORLD,"==== > I AM LOADING DATA FROM FILE\n");*/ /* PetscPrintf(PETSC_COMM_WORLD,"*} Arnoldi Reading file %s\n",load_path);*/ ierr=readBinaryVecArray(load_path,(int*)one,&initialv);CHKERRQ(ierr); data_load=PETSC_FALSE; load_any=PETSC_FALSE; ierr=EPSSetInitialSpace(eps,1,&initialv);CHKERRQ(ierr); /* PetscPrintf(PETSC_COMM_WORLD,"*} Arnoldi Has Read file %s\n",load_path);*/ } ierr=EPSSolve(eps);CHKERRQ(ierr); /*construct new initial vector*/ ierr=EPSGetInvariantSubspace(eps, vs);CHKERRQ(ierr); ++counter; /* get the number of guessed eigenvalues */ ierr=EPSGetConverged(eps,&eigen_nb);CHKERRQ(ierr); /* #ifdef DEBUG*/ /* PetscPrintf(PETSC_COMM_WORLD,"*} Arnoldi %d converged eigenvalues\n",eigen_nb);*/ /* #endif*/ /* send them */ for(j=0;j<eigen_nb;j++){ //EPSGetValue(eps,j,&er,&ei); //ierr = EPSGetEigenpair(eps,j,&er,&ei,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); ierr = EPSGetEigenvalue(eps,j,&er,&ei);CHKERRQ(ierr); #ifdef PETSC_USE_COMPLEX re=PetscRealPart(er); im=PetscImaginaryPart(er); #else re=er; im=ei; #endif eigenvalues[j]=(PetscScalar)re+PETSC_i*(PetscScalar)im; // #ifdef DEBUG if(im!=0.0) PetscPrintf(PETSC_COMM_WORLD,"*} Arnoldi %d/%d val : %e %e\n",j,eigen_nb,re,im); else PetscPrintf(PETSC_COMM_WORLD,"*} Arnoldi %d/%d val : %e\n",j,eigen_nb,er); // #endif } /* ierr=VecGetSize(initialv,&taille);CHKERRQ(ierr);*/ /* PetscPrintf(PETSC_COMM_WORLD,"==== > OUR INITIALV IS OF SIZE %d\n", taille);*/ /* vecteur_initial = realloc(vecteur_initial,taille); */ /* ierr=VecGetArray(initialv, &vecteur_initial);CHKERRQ(ierr);*/ /* for (i = 0; i < taille; i++)*/ /* PetscPrintf(PETSC_COMM_WORLD,"==== > initialv[%d] = %e\n", i, vecteur_initial[i]);*/ /* ierr= VecRestoreArray(initialv, &vecteur_initial);CHKERRQ(ierr);*/ if( eigen_nb != 0){ /* #ifdef DEBUG*/ /* PetscPrintf(PETSC_COMM_WORLD,"*} Arnoldi Sending to LS\n");*/ /* #endif*/ /* send the data array */ mpi_lsa_com_array_send(com, &eigen_nb, eigenvalues); /*construct new initial vector*/ /* ierr=EPSGetInvariantSubspace(eps, vs);CHKERRQ(ierr);*/ ierr=VecCopy(vs[0],initialv);CHKERRQ(ierr); for(j=1;j<eigen_nb;j++){ ierr=VecAYPX(initialv,(PetscScalar)1.0,vs[j]); } ierr=VecNorm(initialv,NORM_2,&vnorm);CHKERRQ(ierr); ierr=VecAYPX(initialv,(PetscScalar)(1.0/vnorm),nullv);CHKERRQ(ierr); if(continuous_export){ /* ierr=writeBinaryVecArray(data_export?export_path:"./arnoldi.bin", 1, &initialv);*/ } if(!mpi_lsa_com_type_recv(com,&exit_type)){ if(exit_type==666){ end=1; PetscPrintf(PETSC_COMM_WORLD,"*} Arnoldi Sending Exit message\n"); mpi_lsa_com_type_send(com,&exit_type); need_new_init = PETSC_FALSE; exit = PETSC_TRUE; break; } } }else{ need_new_init = PETSC_TRUE; PetscPrintf(PETSC_COMM_WORLD, "!!! Arnoldi has not converged so we change the initial vector !!!\n"); while(need_new_init){ // this was my first try to solve the poblem but it doesn't work better /*ierr=VecSetRandom(initialv,PETSC_NULL);CHKERRQ(ier);*/ // Now the best to do i think is to develop a kind of help from GMRES // Arnoldi when no convergence observed will send a msg to GMRES like an SOS //and GMRES will send a vector wich will be used to generate a new initial vector that make arnoldi converge **I HOPE ** //need_new_init = PETSC_TRUE // mpi_lsa_com_type_send(com,&sos_type); // here we send the message // ierr=VecDuplicate(initialv,&vec_tmp_receive); //PetscPrintf(PETSC_COMM_WORLD, "!!! Arnoldi has not converged so we change the initial vector !!!\n"); /* check if there's an incoming message */ if(!mpi_lsa_com_vec_recv(com, &initialv)){ /* if(!mpi_lsa_com_array_recv(com, &taille, vecteur_initial)){*/ /* printf(" ========= I RECEIVED SOME DATA FROM GMRES ============\n");*/ /* ierr = VecCopy(vecteur_initial, initialv);*/ need_new_init = PETSC_FALSE; }else{ if(!mpi_lsa_com_type_recv(com,&exit_type)){ if(exit_type==666){ end=1; /* PetscPrintf(PETSC_COMM_WORLD,"*} Arnoldi Sending Exit message\n");*/ mpi_lsa_com_type_send(com,&exit_type); need_new_init = PETSC_FALSE; exit = PETSC_TRUE; break; } } } //goto checking; //return 1; } if(exit == PETSC_TRUE) break; } // i will check it later } /* if(data_export){*/ /* ierr=writeBinaryVecArray(export_path, 1, &initialv);*/ /* }*/ for(i=0;i<eigen_nb;i++){ ierr=VecDestroy(&(vs[i]));CHKERRQ(ierr); } /* ierr=PetscFree(vs);CHKERRQ(ierr);*/ /* and destroy the eps */ ierr=EPSDestroy(&eps);CHKERRQ(ierr); ierr=VecDestroy(&initialv);CHKERRQ(ierr); ierr=VecDestroy(&nullv);CHKERRQ(ierr); return 0; }
PetscErrorCode PCMGMCycle_Private(PC pc,PC_MG_Levels **mglevelsin,PCRichardsonConvergedReason *reason) { PC_MG *mg = (PC_MG*)pc->data; PC_MG_Levels *mgc,*mglevels = *mglevelsin; PetscErrorCode ierr; PetscInt cycles = (mglevels->level == 1) ? 1 : (PetscInt) mglevels->cycles; PetscFunctionBegin; if (mglevels->eventsmoothsolve) { ierr = PetscLogEventBegin(mglevels->eventsmoothsolve,0,0,0,0); CHKERRQ(ierr); } ierr = KSPSolve(mglevels->smoothd,mglevels->b,mglevels->x); CHKERRQ(ierr); /* pre-smooth */ if (mglevels->eventsmoothsolve) { ierr = PetscLogEventEnd(mglevels->eventsmoothsolve,0,0,0,0); CHKERRQ(ierr); } if (mglevels->level) { /* not the coarsest grid */ if (mglevels->eventresidual) { ierr = PetscLogEventBegin(mglevels->eventresidual,0,0,0,0); CHKERRQ(ierr); } ierr = (*mglevels->residual)(mglevels->A,mglevels->b,mglevels->x,mglevels->r); CHKERRQ(ierr); if (mglevels->eventresidual) { ierr = PetscLogEventEnd(mglevels->eventresidual,0,0,0,0); CHKERRQ(ierr); } /* if on finest level and have convergence criteria set */ if (mglevels->level == mglevels->levels-1 && mg->ttol && reason) { PetscReal rnorm; ierr = VecNorm(mglevels->r,NORM_2,&rnorm); CHKERRQ(ierr); if (rnorm <= mg->ttol) { if (rnorm < mg->abstol) { *reason = PCRICHARDSON_CONVERGED_ATOL; ierr = PetscInfo2(pc,"Linear solver has converged. Residual norm %G is less than absolute tolerance %G\n",rnorm,mg->abstol); CHKERRQ(ierr); } else { *reason = PCRICHARDSON_CONVERGED_RTOL; ierr = PetscInfo2(pc,"Linear solver has converged. Residual norm %G is less than relative tolerance times initial residual norm %G\n",rnorm,mg->ttol); CHKERRQ(ierr); } PetscFunctionReturn(0); } } mgc = *(mglevelsin - 1); if (mglevels->eventinterprestrict) { ierr = PetscLogEventBegin(mglevels->eventinterprestrict,0,0,0,0); CHKERRQ(ierr); } ierr = MatRestrict(mglevels->restrct,mglevels->r,mgc->b); CHKERRQ(ierr); if (mglevels->eventinterprestrict) { ierr = PetscLogEventEnd(mglevels->eventinterprestrict,0,0,0,0); CHKERRQ(ierr); } ierr = VecSet(mgc->x,0.0); CHKERRQ(ierr); while (cycles--) { ierr = PCMGMCycle_Private(pc,mglevelsin-1,reason); CHKERRQ(ierr); } if (mglevels->eventinterprestrict) { ierr = PetscLogEventBegin(mglevels->eventinterprestrict,0,0,0,0); CHKERRQ(ierr); } ierr = MatInterpolateAdd(mglevels->interpolate,mgc->x,mglevels->x,mglevels->x); CHKERRQ(ierr); if (mglevels->eventinterprestrict) { ierr = PetscLogEventEnd(mglevels->eventinterprestrict,0,0,0,0); CHKERRQ(ierr); } if (mglevels->eventsmoothsolve) { ierr = PetscLogEventBegin(mglevels->eventsmoothsolve,0,0,0,0); CHKERRQ(ierr); } ierr = KSPSolve(mglevels->smoothu,mglevels->b,mglevels->x); CHKERRQ(ierr); /* post smooth */ if (mglevels->eventsmoothsolve) { ierr = PetscLogEventEnd(mglevels->eventsmoothsolve,0,0,0,0); CHKERRQ(ierr); } } PetscFunctionReturn(0); }
int main(int argc,char **argv) { SNES snes; /* nonlinear solver context */ Vec x,r; /* solution, residual vectors */ Mat J; /* Jacobian matrix */ PetscErrorCode ierr; PetscScalar *xx; PetscInt i,max_snes_solves = 20,snes_steps_per_solve = 2,criteria_reduce = 1; Ctx ctx; SNESConvergedReason reason; PetscInitialize(&argc,&argv,(char*)0,help); ctx.n = 0; ierr = PetscOptionsGetInt(NULL,"-n",&ctx.n,NULL);CHKERRQ(ierr); ctx.p = 0; ierr = PetscOptionsGetInt(NULL,"-p",&ctx.p,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,"-max_snes_solves",&max_snes_solves,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,"-snes_steps_per_solve",&snes_steps_per_solve,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,"-criteria_reduce",&criteria_reduce,NULL);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create nonlinear solver context - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create matrix and vector data structures; set corresponding routines - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Create vectors for solution and nonlinear function */ ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,PETSC_DECIDE,2+ctx.n+ctx.p);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); ierr = VecDuplicate(x,&r);CHKERRQ(ierr); /* Create Jacobian matrix data structure */ ierr = MatCreate(PETSC_COMM_WORLD,&J);CHKERRQ(ierr); ierr = MatSetSizes(J,PETSC_DECIDE,PETSC_DECIDE,2+ctx.p+ctx.n,2+ctx.p+ctx.n);CHKERRQ(ierr); ierr = MatSetFromOptions(J);CHKERRQ(ierr); /* Set function evaluation routine and vector. */ ierr = SNESSetFunction(snes,r,FormFunction1,(void*)&ctx);CHKERRQ(ierr); /* Set Jacobian matrix data structure and Jacobian evaluation routine */ ierr = SNESSetJacobian(snes,J,J,FormJacobian1,(void*)&ctx);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Customize nonlinear solver; set runtime options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = SNESSetFromOptions(snes);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Evaluate initial guess; then solve nonlinear system - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = VecSet(x,0.0);CHKERRQ(ierr); ierr = VecGetArray(x,&xx);CHKERRQ(ierr); xx[0] = -1.2; for (i=1; i<ctx.p+2; i++) xx[i] = 1.0; ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); /* Note: The user should initialize the vector, x, with the initial guess for the nonlinear solver prior to calling SNESSolve(). In particular, to employ an initial guess of zero, the user should explicitly set this vector to zero by calling VecSet(). */ ierr = SNESMonitorSet(snes,MonitorRange,0,0);CHKERRQ(ierr); ierr = SNESSetTolerances(snes,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,snes_steps_per_solve,PETSC_DEFAULT);CHKERRQ(ierr); for (i=0; i<max_snes_solves; i++) { ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes,&reason);CHKERRQ(ierr); if (reason && reason != SNES_DIVERGED_MAX_IT) break; if (CountGood > criteria_reduce) { ierr = SolveSubproblem(snes);CHKERRQ(ierr); CountGood = 0; } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Free work space. All PETSc objects should be destroyed when they are no longer needed. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&r);CHKERRQ(ierr); ierr = MatDestroy(&J);CHKERRQ(ierr); ierr = SNESDestroy(&snes);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
/* ‘ункци¤ рисовани¤. * ј–√”ћ≈Ќ“џ: * - геометрический объект: * ap6GEOM *G; * ¬ќ«¬–јўј≈ћќ≈ «Ќј„≈Ќ»≈: Ќет. */ VOID AP6_GeomDraw( ap6GEOM *G ) { INT i, loc; MATR WVP, MatrWorldInvTrans = {{{0}}}; VEC V; UINT SaveProgId; /* вычислили матрицы преобразовани¤ */ WVP = MatrMulMatr(AP6_Anim.MatrWorld, MatrMulMatr(AP6_Anim.MatrView, AP6_Anim.MatrProjection)); MatrWorldInvTrans = MatrTranspose(MatrInverse(AP6_Anim.MatrWorld)); /* отладочный вывод */ glLoadMatrixf(WVP.A[0]); /* ѕерезагружаем шейдера раз в секунду */ if (G->ProgName[0] != 0) { if (G->ProgTime + 1.0 < AP6_Anim.GlobalTime) { static CHAR Buf[2][MAX_STR]; AP6_ShadProgClose(G->ProgId); sprintf(Buf[0], "SHADERS\\%s.vert", G->ProgName); sprintf(Buf[1], "SHADERS\\%s.frag", G->ProgName); G->ProgTime = AP6_Anim.GlobalTime; G->ProgId = AP6_ShadProgInit(Buf[0], Buf[1]); } } /* сохран¤ем шейдера */ if (G->ProgId != 0) { SaveProgId = AP6_ShaderProg; AP6_ShaderProg = G->ProgId; } /* выбор программы шейдеров вывода примитивов */ glUseProgram(AP6_ShaderProg); loc = glGetUniformLocation(AP6_ShaderProg, "MatrWVP"); if (loc != -1) glUniformMatrix4fv(loc, 1, FALSE, WVP.A[0]); loc = glGetUniformLocation(AP6_ShaderProg, "MatrWorldInverseTranspose"); if (loc != -1) glUniformMatrix4fv(loc, 1, FALSE, MatrWorldInvTrans.A[0]); loc = glGetUniformLocation(AP6_ShaderProg, "MatrWorld"); if (loc != -1) glUniformMatrix4fv(loc, 1, FALSE, AP6_Anim.MatrWorld.A[0]); loc = glGetUniformLocation(AP6_ShaderProg, "MatrView"); if (loc != -1) glUniformMatrix4fv(loc, 1, FALSE, AP6_Anim.MatrView.A[0]); loc = glGetUniformLocation(AP6_ShaderProg, "Time"); if (loc != -1) glUniform1f(loc, AP6_Anim.Time); V = VecSet(-AP6_Anim.MatrView.A[0][2], -AP6_Anim.MatrView.A[1][2], -AP6_Anim.MatrView.A[2][2]); loc = glGetUniformLocation(AP6_ShaderProg, "ViewDir"); if (loc != -1) glUniform3fv(loc, 1, &V.X); V = VecSet(AP6_Anim.MatrView.A[3][0], AP6_Anim.MatrView.A[3][1], AP6_Anim.MatrView.A[3][2]); loc = glGetUniformLocation(AP6_ShaderProg, "ViewPos"); if (loc != -1) glUniform3fv(loc, 1, &V.X); loc = glGetUniformLocation(AP6_ShaderProg, "NumOfParts"); if (loc != -1) glUniform1f(loc, G->NumOfPrims); for (i = 0; i < G->NumOfPrims; i++) { INT mtl = G->Prims[i].Mtl; loc = glGetUniformLocation(AP6_ShaderProg, "PartNo"); if (loc != -1) glUniform1f(loc, i); /* подготавливаем материал */ if (G->Mtls != NULL && mtl >= 0 && mtl < G->NumOfMtls) { INT loc; if (G->Mtls[mtl].TexNo == 0 && G->Mtls[mtl].MapD[0] != 0) { INT j; IMAGE Img; ImageLoad(&Img, G->Mtls[mtl].MapD); /* получаем свободный номер текстуры */ glGenTextures(1, &G->Mtls[mtl].TexNo); /* делаем ее активной */ glBindTexture(GL_TEXTURE_2D, G->Mtls[mtl].TexNo); for (j = 0; j < Img.W * Img.H; j++) Img.Bits[j] |= 0xFF000000; /* отправл¤ем картинку в видеопам¤ть */ gluBuild2DMipmaps(GL_TEXTURE_2D, 4, Img.W, Img.H, GL_BGRA_EXT, GL_UNSIGNED_BYTE, Img.Bits); /* ѕараметры вывода */ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glBindTexture(GL_TEXTURE_2D, 0); } /* передаем параметры */ if (G->Mtls[mtl].TexNo != 0) { loc = glGetUniformLocation(AP6_ShaderProg, "IsTextureUsed"); if (loc != -1) glUniform1f(loc, 1); loc = glGetUniformLocation(AP6_ShaderProg, "DrawTexture"); if (loc != -1) glUniform1i(loc, 0); //glEnable(GL_TEXTURE_2D); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, G->Mtls[mtl].TexNo); //glActiveTexture(GL_TEXTURE1); //glBindTexture(GL_TEXTURE_2D, G->Mtls[mtl].TexNo); } else { loc = glGetUniformLocation(AP6_ShaderProg, "IsTextureUsed"); if (loc != -1) glUniform1f(loc, 0); } loc = glGetUniformLocation(AP6_ShaderProg, "Ka"); if (loc != -1) glUniform3fv(loc, 1, &G->Mtls[mtl].Ka.X); loc = glGetUniformLocation(AP6_ShaderProg, "Kd"); if (loc != -1) glUniform3fv(loc, 1, &G->Mtls[mtl].Kd.X); loc = glGetUniformLocation(AP6_ShaderProg, "Ks"); if (loc != -1) glUniform3fv(loc, 1, &G->Mtls[mtl].Ks.X); loc = glGetUniformLocation(AP6_ShaderProg, "Phong"); if (loc != -1) glUniform1f(loc, G->Mtls[mtl].Phong); loc = glGetUniformLocation(AP6_ShaderProg, "Trans"); if (loc != -1) glUniform1f(loc, G->Mtls[mtl].Trans); } loc = glGetUniformLocation(AP6_ShaderProg, "PartNo"); if (loc != -1) glUniform1f(loc, i); AP6_PrimDraw(G->Prims + i); glDisable(GL_TEXTURE_2D); glBindTexture(GL_TEXTURE_2D, 0); } /* восстанавливаем шейдера */ if (G->ProgId != 0) AP6_ShaderProg = SaveProgId; } /* End of 'AP6_GeomDraw' function */
int main(int argc,char **argv) { PetscErrorCode ierr; DM da,*subda; PetscInt i; PetscMPIInt size,rank; Vec v; Vec slvec,sgvec; IS *ois,*iis; VecScatter oscata; VecScatter *iscat,*oscat,*gscat; DMDALocalInfo info; ierr = PetscInitialize(&argc,&argv,(char*)0,help);CHKERRQ(ierr); /* Create distributed array and get vectors */ ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = DMDACreate2d(PETSC_COMM_WORLD, DMDA_BOUNDARY_NONE, DMDA_BOUNDARY_NONE,DMDA_STENCIL_STAR,-4,-4,PETSC_DECIDE,PETSC_DECIDE,2,1,PETSC_NULL,PETSC_NULL,&da);CHKERRQ(ierr); ierr = DMDAGetLocalInfo(da,&info);CHKERRQ(ierr); ierr = DMCreateDomainDecomposition(da,PETSC_NULL,PETSC_NULL,&iis,&ois,&subda);CHKERRQ(ierr); ierr = DMCreateDomainDecompositionScatters(da,1,subda,&iscat,&oscat,&gscat);CHKERRQ(ierr); { DMDALocalInfo subinfo; MatStencil lower,upper; IS patchis; Vec smallvec; Vec largevec; VecScatter patchscat; ierr = DMDAGetLocalInfo(subda[0],&subinfo);CHKERRQ(ierr); lower.i = subinfo.xs; lower.j = subinfo.ys; lower.k = subinfo.zs; upper.i = subinfo.xs + subinfo.xm; upper.j = subinfo.ys + subinfo.ym; upper.k = subinfo.zs + subinfo.zm; /* test the patch IS as a thing to scatter to/from */ ierr = DMDACreatePatchIS(da,&lower,&upper,&patchis);CHKERRQ(ierr); ierr = ISView(patchis,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = DMGetGlobalVector(da,&largevec);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_SELF,&smallvec);CHKERRQ(ierr); ierr = VecSetSizes(smallvec,2*(upper.i - lower.i)*(upper.j - lower.j),PETSC_DECIDE);CHKERRQ(ierr); ierr = VecSetFromOptions(smallvec);CHKERRQ(ierr); ierr = VecScatterCreate(largevec,patchis,smallvec,PETSC_NULL,&patchscat);CHKERRQ(ierr); ierr = VecSet(smallvec,1.);CHKERRQ(ierr); ierr = VecScatterBegin(patchscat,smallvec,largevec,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(patchscat,smallvec,largevec,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecView(largevec,PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = VecDestroy(&smallvec);CHKERRQ(ierr); ierr = DMRestoreGlobalVector(da,&largevec);CHKERRQ(ierr); ierr = ISDestroy(&patchis);CHKERRQ(ierr); ierr = VecScatterDestroy(&patchscat);CHKERRQ(ierr); } /* view the various parts */ for (i = 0; i < size; i++) { if (i == rank) { ierr = PetscPrintf(PETSC_COMM_SELF,"Processor %d: \n",i);CHKERRQ(ierr); ierr = DMView(subda[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRQ(ierr); } ierr = DMGetLocalVector(subda[0],&slvec);CHKERRQ(ierr); ierr = DMGetGlobalVector(subda[0],&sgvec);CHKERRQ(ierr); ierr = DMGetGlobalVector(da,&v);CHKERRQ(ierr); /* test filling outer between the big DM and the small ones with the IS scatter*/ ierr = VecScatterCreate(v,ois[0],sgvec,PETSC_NULL,&oscata);CHKERRQ(ierr); ierr = FillLocalSubdomain(subda[0],sgvec);CHKERRQ(ierr); ierr = VecScatterBegin(oscata,sgvec,v,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(oscata,sgvec,v,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); /* test the local-to-local scatter */ /* fill up the local subdomain and then add them together */ ierr = FillLocalSubdomain(da,v);CHKERRQ(ierr); ierr = VecScatterBegin(gscat[0],v,slvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(gscat[0],v,slvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecView(v,PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); /* test ghost scattering backwards */ ierr = VecSet(v,0);CHKERRQ(ierr); ierr = VecScatterBegin(gscat[0],slvec,v,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(gscat[0],slvec,v,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecView(v,PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); /* test overlap scattering backwards */ ierr = DMLocalToGlobalBegin(subda[0],slvec,ADD_VALUES,sgvec);CHKERRQ(ierr); ierr = DMLocalToGlobalEnd(subda[0],slvec,ADD_VALUES,sgvec);CHKERRQ(ierr); ierr = VecSet(v,0);CHKERRQ(ierr); ierr = VecScatterBegin(oscat[0],sgvec,v,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(oscat[0],sgvec,v,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecView(v,PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); /* test interior scattering backwards */ ierr = VecSet(v,0);CHKERRQ(ierr); ierr = VecScatterBegin(iscat[0],sgvec,v,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(iscat[0],sgvec,v,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecView(v,PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); /* test matrix allocation */ for (i = 0; i < size; i++) { if (i == rank) { Mat m; ierr = PetscPrintf(PETSC_COMM_SELF,"Processor %d: \n",i);CHKERRQ(ierr); ierr = DMCreateMatrix(subda[0],"mpiaij",&m);CHKERRQ(ierr); ierr = MatView(m,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); ierr = MatDestroy(&m);CHKERRQ(ierr); } ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRQ(ierr); } ierr = DMRestoreLocalVector(subda[0],&slvec);CHKERRQ(ierr); ierr = DMRestoreGlobalVector(subda[0],&sgvec);CHKERRQ(ierr); ierr = DMRestoreGlobalVector(da,&v);CHKERRQ(ierr); ierr = DMDestroy(&subda[0]);CHKERRQ(ierr); ierr = ISDestroy(&ois[0]);CHKERRQ(ierr); ierr = ISDestroy(&iis[0]);CHKERRQ(ierr); ierr = VecScatterDestroy(&iscat[0]);CHKERRQ(ierr); ierr = VecScatterDestroy(&oscat[0]);CHKERRQ(ierr); ierr = VecScatterDestroy(&gscat[0]);CHKERRQ(ierr); ierr = VecScatterDestroy(&oscata);CHKERRQ(ierr); ierr = PetscFree(iscat);CHKERRQ(ierr); ierr = PetscFree(oscat);CHKERRQ(ierr); ierr = PetscFree(gscat);CHKERRQ(ierr); ierr = PetscFree(oscata);CHKERRQ(ierr); ierr = PetscFree(subda);CHKERRQ(ierr); ierr = PetscFree(ois);CHKERRQ(ierr); ierr = PetscFree(iis);CHKERRQ(ierr); ierr = DMDestroy(&da);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
static PetscErrorCode TaoSolve_BQPIP(Tao tao) { TAO_BQPIP *qp = (TAO_BQPIP*)tao->data; PetscErrorCode ierr; PetscInt iter=0,its; PetscReal d1,d2,ksptol,sigma; PetscReal sigmamu; PetscReal dstep,pstep,step=0; PetscReal gap[4]; TaoConvergedReason reason; PetscFunctionBegin; qp->dobj = 0.0; qp->pobj = 1.0; qp->gap = 10.0; qp->rgap = 1.0; qp->mu = 1.0; qp->sigma = 1.0; qp->dinfeas = 1.0; qp->psteplength = 0.0; qp->dsteplength = 0.0; /* Tighten infinite bounds, things break when we don't do this -- see test_bqpip.c */ ierr = VecSet(qp->XU,1.0e20);CHKERRQ(ierr); ierr = VecSet(qp->XL,-1.0e20);CHKERRQ(ierr); ierr = VecPointwiseMax(qp->XL,qp->XL,tao->XL);CHKERRQ(ierr); ierr = VecPointwiseMin(qp->XU,qp->XU,tao->XU);CHKERRQ(ierr); ierr = TaoComputeObjectiveAndGradient(tao,tao->solution,&qp->c,qp->C0);CHKERRQ(ierr); ierr = TaoComputeHessian(tao,tao->solution,tao->hessian,tao->hessian_pre);CHKERRQ(ierr); ierr = MatMult(tao->hessian, tao->solution, qp->Work);CHKERRQ(ierr); ierr = VecDot(tao->solution, qp->Work, &d1);CHKERRQ(ierr); ierr = VecAXPY(qp->C0, -1.0, qp->Work);CHKERRQ(ierr); ierr = VecDot(qp->C0, tao->solution, &d2);CHKERRQ(ierr); qp->c -= (d1/2.0+d2); ierr = MatGetDiagonal(tao->hessian, qp->HDiag);CHKERRQ(ierr); ierr = QPIPSetInitialPoint(qp,tao);CHKERRQ(ierr); ierr = QPIPComputeResidual(qp,tao);CHKERRQ(ierr); /* Enter main loop */ while (1){ /* Check Stopping Condition */ ierr = TaoMonitor(tao,iter++,qp->pobj,PetscSqrtScalar(qp->gap + qp->dinfeas), qp->pinfeas, step, &reason);CHKERRQ(ierr); if (reason != TAO_CONTINUE_ITERATING) break; /* Dual Infeasibility Direction should already be in the right hand side from computing the residuals */ ierr = QPIPComputeNormFromCentralPath(qp,&d1);CHKERRQ(ierr); if (iter > 0 && (qp->rnorm>5*qp->mu || d1*d1>qp->m*qp->mu*qp->mu) ) { sigma=1.0;sigmamu=qp->mu; sigma=0.0;sigmamu=0; } else { sigma=0.0;sigmamu=0; } ierr = VecSet(qp->DZ, sigmamu);CHKERRQ(ierr); ierr = VecSet(qp->DS, sigmamu);CHKERRQ(ierr); if (sigmamu !=0){ ierr = VecPointwiseDivide(qp->DZ, qp->DZ, qp->G);CHKERRQ(ierr); ierr = VecPointwiseDivide(qp->DS, qp->DS, qp->T);CHKERRQ(ierr); ierr = VecCopy(qp->DZ,qp->RHS2);CHKERRQ(ierr); ierr = VecAXPY(qp->RHS2, 1.0, qp->DS);CHKERRQ(ierr); } else { ierr = VecZeroEntries(qp->RHS2);CHKERRQ(ierr); } /* Compute the Primal Infeasiblitiy RHS and the Diagonal Matrix to be added to H and store in Work */ ierr = VecPointwiseDivide(qp->DiagAxpy, qp->Z, qp->G);CHKERRQ(ierr); ierr = VecPointwiseMult(qp->GZwork, qp->DiagAxpy, qp->R3);CHKERRQ(ierr); ierr = VecAXPY(qp->RHS, -1.0, qp->GZwork);CHKERRQ(ierr); ierr = VecPointwiseDivide(qp->TSwork, qp->S, qp->T);CHKERRQ(ierr); ierr = VecAXPY(qp->DiagAxpy, 1.0, qp->TSwork);CHKERRQ(ierr); ierr = VecPointwiseMult(qp->TSwork, qp->TSwork, qp->R5);CHKERRQ(ierr); ierr = VecAXPY(qp->RHS, -1.0, qp->TSwork);CHKERRQ(ierr); ierr = VecAXPY(qp->RHS2, 1.0, qp->RHS);CHKERRQ(ierr); /* Determine the solving tolerance */ ksptol = qp->mu/10.0; ksptol = PetscMin(ksptol,0.001); ierr = MatDiagonalSet(tao->hessian, qp->DiagAxpy, ADD_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(tao->hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(tao->hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = KSPSetOperators(tao->ksp, tao->hessian, tao->hessian_pre);CHKERRQ(ierr); ierr = KSPSolve(tao->ksp, qp->RHS, tao->stepdirection);CHKERRQ(ierr); ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr); tao->ksp_its+=its; ierr = VecScale(qp->DiagAxpy, -1.0);CHKERRQ(ierr); ierr = MatDiagonalSet(tao->hessian, qp->DiagAxpy, ADD_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(tao->hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(tao->hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = VecScale(qp->DiagAxpy, -1.0);CHKERRQ(ierr); ierr = QPComputeStepDirection(qp,tao);CHKERRQ(ierr); ierr = QPStepLength(qp); CHKERRQ(ierr); /* Calculate New Residual R1 in Work vector */ ierr = MatMult(tao->hessian, tao->stepdirection, qp->RHS2);CHKERRQ(ierr); ierr = VecAXPY(qp->RHS2, 1.0, qp->DS);CHKERRQ(ierr); ierr = VecAXPY(qp->RHS2, -1.0, qp->DZ);CHKERRQ(ierr); ierr = VecAYPX(qp->RHS2, qp->dsteplength, tao->gradient);CHKERRQ(ierr); ierr = VecNorm(qp->RHS2, NORM_2, &qp->dinfeas);CHKERRQ(ierr); ierr = VecDot(qp->DZ, qp->DG, gap);CHKERRQ(ierr); ierr = VecDot(qp->DS, qp->DT, gap+1);CHKERRQ(ierr); qp->rnorm=(qp->dinfeas+qp->psteplength*qp->pinfeas)/(qp->m+qp->n); pstep = qp->psteplength; dstep = qp->dsteplength; step = PetscMin(qp->psteplength,qp->dsteplength); sigmamu= ( pstep*pstep*(gap[0]+gap[1]) + (1 - pstep + pstep*sigma)*qp->gap )/qp->m; if (qp->predcorr && step < 0.9){ if (sigmamu < qp->mu){ sigmamu=sigmamu/qp->mu; sigmamu=sigmamu*sigmamu*sigmamu; } else {sigmamu = 1.0;} sigmamu = sigmamu*qp->mu; /* Compute Corrector Step */ ierr = VecPointwiseMult(qp->DZ, qp->DG, qp->DZ);CHKERRQ(ierr); ierr = VecScale(qp->DZ, -1.0);CHKERRQ(ierr); ierr = VecShift(qp->DZ, sigmamu);CHKERRQ(ierr); ierr = VecPointwiseDivide(qp->DZ, qp->DZ, qp->G);CHKERRQ(ierr); ierr = VecPointwiseMult(qp->DS, qp->DS, qp->DT);CHKERRQ(ierr); ierr = VecScale(qp->DS, -1.0);CHKERRQ(ierr); ierr = VecShift(qp->DS, sigmamu);CHKERRQ(ierr); ierr = VecPointwiseDivide(qp->DS, qp->DS, qp->T);CHKERRQ(ierr); ierr = VecCopy(qp->DZ, qp->RHS2);CHKERRQ(ierr); ierr = VecAXPY(qp->RHS2, -1.0, qp->DS);CHKERRQ(ierr); ierr = VecAXPY(qp->RHS2, 1.0, qp->RHS);CHKERRQ(ierr); /* Approximately solve the linear system */ ierr = MatDiagonalSet(tao->hessian, qp->DiagAxpy, ADD_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(tao->hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(tao->hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = KSPSolve(tao->ksp, qp->RHS2, tao->stepdirection);CHKERRQ(ierr); ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr); tao->ksp_its+=its; ierr = MatDiagonalSet(tao->hessian, qp->HDiag, INSERT_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(tao->hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(tao->hessian,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = QPComputeStepDirection(qp,tao);CHKERRQ(ierr); ierr = QPStepLength(qp);CHKERRQ(ierr); } /* End Corrector step */ /* Take the step */ pstep = qp->psteplength; dstep = qp->dsteplength; ierr = VecAXPY(qp->Z, dstep, qp->DZ);CHKERRQ(ierr); ierr = VecAXPY(qp->S, dstep, qp->DS);CHKERRQ(ierr); ierr = VecAXPY(tao->solution, dstep, tao->stepdirection);CHKERRQ(ierr); ierr = VecAXPY(qp->G, dstep, qp->DG);CHKERRQ(ierr); ierr = VecAXPY(qp->T, dstep, qp->DT);CHKERRQ(ierr); /* Compute Residuals */ ierr = QPIPComputeResidual(qp,tao);CHKERRQ(ierr); /* Evaluate quadratic function */ ierr = MatMult(tao->hessian, tao->solution, qp->Work);CHKERRQ(ierr); ierr = VecDot(tao->solution, qp->Work, &d1);CHKERRQ(ierr); ierr = VecDot(tao->solution, qp->C0, &d2);CHKERRQ(ierr); ierr = VecDot(qp->G, qp->Z, gap);CHKERRQ(ierr); ierr = VecDot(qp->T, qp->S, gap+1);CHKERRQ(ierr); qp->pobj=d1/2.0 + d2+qp->c; /* Compute the duality gap */ qp->gap = (gap[0]+gap[1]); qp->dobj = qp->pobj - qp->gap; if (qp->m>0) qp->mu=qp->gap/(qp->m); qp->rgap=qp->gap/( PetscAbsReal(qp->dobj) + PetscAbsReal(qp->pobj) + 1.0 ); } /* END MAIN LOOP */ PetscFunctionReturn(0); }
int main(int argc,char **args) { Mat mat; /* matrix */ Vec b,ustar,u; /* vectors (RHS, exact solution, approx solution) */ PC pc; /* PC context */ KSP ksp; /* KSP context */ PetscErrorCode ierr; PetscInt n = 10,i,its,col[3]; PetscScalar value[3]; PCType pcname; KSPType kspname; PetscReal norm,tol=1000.*PETSC_MACHINE_EPSILON; PetscInitialize(&argc,&args,(char*)0,help); /* Create and initialize vectors */ ierr = VecCreateSeq(PETSC_COMM_SELF,n,&b);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,n,&ustar);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,n,&u);CHKERRQ(ierr); ierr = VecSet(ustar,1.0);CHKERRQ(ierr); ierr = VecSet(u,0.0);CHKERRQ(ierr); /* Create and assemble matrix */ ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,n,n,3,NULL,&mat);CHKERRQ(ierr); value[0] = -1.0; value[1] = 2.0; value[2] = -1.0; for (i=1; i<n-1; i++) { col[0] = i-1; col[1] = i; col[2] = i+1; ierr = MatSetValues(mat,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); } i = n - 1; col[0] = n - 2; col[1] = n - 1; ierr = MatSetValues(mat,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); i = 0; col[0] = 0; col[1] = 1; value[0] = 2.0; value[1] = -1.0; ierr = MatSetValues(mat,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* Compute right-hand-side vector */ ierr = MatMult(mat,ustar,b);CHKERRQ(ierr); /* Create PC context and set up data structures */ ierr = PCCreate(PETSC_COMM_WORLD,&pc);CHKERRQ(ierr); ierr = PCSetType(pc,PCNONE);CHKERRQ(ierr); ierr = PCSetFromOptions(pc);CHKERRQ(ierr); ierr = PCSetOperators(pc,mat,mat);CHKERRQ(ierr); ierr = PCSetUp(pc);CHKERRQ(ierr); /* Create KSP context and set up data structures */ ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); ierr = KSPSetType(ksp,KSPRICHARDSON);CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); ierr = PCSetOperators(pc,mat,mat);CHKERRQ(ierr); ierr = KSPSetPC(ksp,pc);CHKERRQ(ierr); ierr = KSPSetUp(ksp);CHKERRQ(ierr); /* Solve the problem */ ierr = KSPGetType(ksp,&kspname);CHKERRQ(ierr); ierr = PCGetType(pc,&pcname);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF,"Running %s with %s preconditioning\n",kspname,pcname);CHKERRQ(ierr); ierr = KSPSolve(ksp,b,u);CHKERRQ(ierr); ierr = VecAXPY(u,-1.0,ustar);CHKERRQ(ierr); ierr = VecNorm(u,NORM_2,&norm); ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr); if (norm > tol) { ierr = PetscPrintf(PETSC_COMM_SELF,"2 norm of error %g Number of iterations %D\n",(double)norm,its);CHKERRQ(ierr); } /* Free data structures */ ierr = KSPDestroy(&ksp);CHKERRQ(ierr); ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = VecDestroy(&ustar);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = MatDestroy(&mat);CHKERRQ(ierr); ierr = PCDestroy(&pc);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
static PetscErrorCode QPIPSetInitialPoint(TAO_BQPIP *qp, Tao tao) { PetscErrorCode ierr; PetscReal two=2.0,p01=1; PetscReal gap1,gap2,fff,mu; PetscFunctionBegin; /* Compute function, Gradient R=Hx+b, and Hessian */ ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr); ierr = VecMedian(qp->XL, tao->solution, qp->XU, tao->solution);CHKERRQ(ierr); ierr = MatMult(tao->hessian, tao->solution, tao->gradient);CHKERRQ(ierr); ierr = VecCopy(qp->C0, qp->Work);CHKERRQ(ierr); ierr = VecAXPY(qp->Work, 0.5, tao->gradient);CHKERRQ(ierr); ierr = VecAXPY(tao->gradient, 1.0, qp->C0);CHKERRQ(ierr); ierr = VecDot(tao->solution, qp->Work, &fff);CHKERRQ(ierr); qp->pobj = fff + qp->c; /* Initialize Primal Vectors */ /* T = XU - X; G = X - XL */ ierr = VecCopy(qp->XU, qp->T);CHKERRQ(ierr); ierr = VecAXPY(qp->T, -1.0, tao->solution);CHKERRQ(ierr); ierr = VecCopy(tao->solution, qp->G);CHKERRQ(ierr); ierr = VecAXPY(qp->G, -1.0, qp->XL);CHKERRQ(ierr); ierr = VecSet(qp->GZwork, p01);CHKERRQ(ierr); ierr = VecSet(qp->TSwork, p01);CHKERRQ(ierr); ierr = VecPointwiseMax(qp->G, qp->G, qp->GZwork);CHKERRQ(ierr); ierr = VecPointwiseMax(qp->T, qp->T, qp->TSwork);CHKERRQ(ierr); /* Initialize Dual Variable Vectors */ ierr = VecCopy(qp->G, qp->Z);CHKERRQ(ierr); ierr = VecReciprocal(qp->Z);CHKERRQ(ierr); ierr = VecCopy(qp->T, qp->S);CHKERRQ(ierr); ierr = VecReciprocal(qp->S);CHKERRQ(ierr); ierr = MatMult(tao->hessian, qp->Work, qp->RHS);CHKERRQ(ierr); ierr = VecAbs(qp->RHS);CHKERRQ(ierr); ierr = VecSet(qp->Work, p01);CHKERRQ(ierr); ierr = VecPointwiseMax(qp->RHS, qp->RHS, qp->Work);CHKERRQ(ierr); ierr = VecPointwiseDivide(qp->RHS, tao->gradient, qp->RHS);CHKERRQ(ierr); ierr = VecNorm(qp->RHS, NORM_1, &gap1);CHKERRQ(ierr); mu = PetscMin(10.0,(gap1+10.0)/qp->m); ierr = VecScale(qp->S, mu);CHKERRQ(ierr); ierr = VecScale(qp->Z, mu);CHKERRQ(ierr); ierr = VecSet(qp->TSwork, p01);CHKERRQ(ierr); ierr = VecSet(qp->GZwork, p01);CHKERRQ(ierr); ierr = VecPointwiseMax(qp->S, qp->S, qp->TSwork);CHKERRQ(ierr); ierr = VecPointwiseMax(qp->Z, qp->Z, qp->GZwork);CHKERRQ(ierr); qp->mu=0;qp->dinfeas=1.0;qp->pinfeas=1.0; while ( (qp->dinfeas+qp->pinfeas)/(qp->m+qp->n) >= qp->mu ){ ierr = VecScale(qp->G, two);CHKERRQ(ierr); ierr = VecScale(qp->Z, two);CHKERRQ(ierr); ierr = VecScale(qp->S, two);CHKERRQ(ierr); ierr = VecScale(qp->T, two);CHKERRQ(ierr); ierr = QPIPComputeResidual(qp,tao);CHKERRQ(ierr); ierr = VecCopy(tao->solution, qp->R3);CHKERRQ(ierr); ierr = VecAXPY(qp->R3, -1.0, qp->G);CHKERRQ(ierr); ierr = VecAXPY(qp->R3, -1.0, qp->XL);CHKERRQ(ierr); ierr = VecCopy(tao->solution, qp->R5);CHKERRQ(ierr); ierr = VecAXPY(qp->R5, 1.0, qp->T);CHKERRQ(ierr); ierr = VecAXPY(qp->R5, -1.0, qp->XU);CHKERRQ(ierr); ierr = VecNorm(qp->R3, NORM_INFINITY, &gap1);CHKERRQ(ierr); ierr = VecNorm(qp->R5, NORM_INFINITY, &gap2);CHKERRQ(ierr); qp->pinfeas=PetscMax(gap1,gap2); /* Compute the duality gap */ ierr = VecDot(qp->G, qp->Z, &gap1);CHKERRQ(ierr); ierr = VecDot(qp->T, qp->S, &gap2);CHKERRQ(ierr); qp->gap = (gap1+gap2); qp->dobj = qp->pobj - qp->gap; if (qp->m>0) qp->mu=qp->gap/(qp->m); else qp->mu=0.0; qp->rgap=qp->gap/( PetscAbsReal(qp->dobj) + PetscAbsReal(qp->pobj) + 1.0 ); } PetscFunctionReturn(0); }
int main(int argc,char **args) { Mat C; PetscMPIInt rank,size; PetscInt i,m = 5,N,start,end,M,its; PetscScalar val,Ke[16],r[4]; PetscReal x,y,h,norm; PetscErrorCode ierr; PetscInt idx[4],count,*rows; Vec u,ustar,b; KSP ksp; PetscInitialize(&argc,&args,(char *)0,help); ierr = PetscOptionsGetInt(PETSC_NULL,"-m",&m,PETSC_NULL);CHKERRQ(ierr); N = (m+1)*(m+1); /* dimension of matrix */ M = m*m; /* number of elements */ h = 1.0/m; /* mesh width */ ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); /* Create stiffness matrix */ ierr = MatCreate(PETSC_COMM_WORLD,&C);CHKERRQ(ierr); ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,N,N);CHKERRQ(ierr); ierr = MatSetFromOptions(C);CHKERRQ(ierr); start = rank*(M/size) + ((M%size) < rank ? (M%size) : rank); end = start + M/size + ((M%size) > rank); /* Assemble matrix */ ierr = FormElementStiffness(h*h,Ke); /* element stiffness for Laplacian */ for (i=start; i<end; i++) { /* location of lower left corner of element */ x = h*(i % m); y = h*(i/m); /* node numbers for the four corners of element */ idx[0] = (m+1)*(i/m) + (i % m); idx[1] = idx[0]+1; idx[2] = idx[1] + m + 1; idx[3] = idx[2] - 1; ierr = MatSetValues(C,4,idx,4,idx,Ke,ADD_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* Create right-hand-side and solution vectors */ ierr = VecCreate(PETSC_COMM_WORLD,&u);CHKERRQ(ierr); ierr = VecSetSizes(u,PETSC_DECIDE,N);CHKERRQ(ierr); ierr = VecSetFromOptions(u);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)u,"Approx. Solution");CHKERRQ(ierr); ierr = VecDuplicate(u,&b);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)b,"Right hand side");CHKERRQ(ierr); ierr = VecDuplicate(b,&ustar);CHKERRQ(ierr); ierr = VecSet(u,0.0);CHKERRQ(ierr); ierr = VecSet(b,0.0);CHKERRQ(ierr); /* Assemble right-hand-side vector */ for (i=start; i<end; i++) { /* location of lower left corner of element */ x = h*(i % m); y = h*(i/m); /* node numbers for the four corners of element */ idx[0] = (m+1)*(i/m) + (i % m); idx[1] = idx[0]+1; idx[2] = idx[1] + m + 1; idx[3] = idx[2] - 1; ierr = FormElementRhs(x,y,h*h,r);CHKERRQ(ierr); ierr = VecSetValues(b,4,idx,r,ADD_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(b);CHKERRQ(ierr); ierr = VecAssemblyEnd(b);CHKERRQ(ierr); /* Modify matrix and right-hand-side for Dirichlet boundary conditions */ ierr = PetscMalloc(4*m*sizeof(PetscInt),&rows);CHKERRQ(ierr); for (i=0; i<m+1; i++) { rows[i] = i; /* bottom */ rows[3*m - 1 +i] = m*(m+1) + i; /* top */ } count = m+1; /* left side */ for (i=m+1; i<m*(m+1); i+= m+1) { rows[count++] = i; } count = 2*m; /* left side */ for (i=2*m+1; i<m*(m+1); i+= m+1) { rows[count++] = i; } for (i=0; i<4*m; i++) { x = h*(rows[i] % (m+1)); y = h*(rows[i]/(m+1)); val = y; ierr = VecSetValues(u,1,&rows[i],&val,INSERT_VALUES);CHKERRQ(ierr); ierr = VecSetValues(b,1,&rows[i],&val,INSERT_VALUES);CHKERRQ(ierr); } ierr = MatZeroRows(C,4*m,rows,1.0);CHKERRQ(ierr); ierr = PetscFree(rows);CHKERRQ(ierr); ierr = VecAssemblyBegin(u);CHKERRQ(ierr); ierr = VecAssemblyEnd(u);CHKERRQ(ierr); ierr = VecAssemblyBegin(b);CHKERRQ(ierr); ierr = VecAssemblyEnd(b);CHKERRQ(ierr); { Mat A; ierr = MatConvert(C,MATSAME,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); ierr = MatDestroy(C);CHKERRQ(ierr); ierr = MatConvert(A,MATSAME,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr); ierr = MatDestroy(A);CHKERRQ(ierr); } /* Solve linear system */ ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); ierr = KSPSetOperators(ksp,C,C,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); ierr = KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);CHKERRQ(ierr); ierr = KSPSolve(ksp,b,u);CHKERRQ(ierr); /* Check error */ ierr = VecGetOwnershipRange(ustar,&start,&end);CHKERRQ(ierr); for (i=start; i<end; i++) { x = h*(i % (m+1)); y = h*(i/(m+1)); val = y; ierr = VecSetValues(ustar,1,&i,&val,INSERT_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(ustar);CHKERRQ(ierr); ierr = VecAssemblyEnd(ustar);CHKERRQ(ierr); ierr = VecAXPY(u,-1.0,ustar);CHKERRQ(ierr); ierr = VecNorm(u,NORM_2,&norm);CHKERRQ(ierr); ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %A Iterations %D\n",norm*h,its);CHKERRQ(ierr); /* Free work space */ ierr = KSPDestroy(ksp);CHKERRQ(ierr); ierr = VecDestroy(ustar);CHKERRQ(ierr); ierr = VecDestroy(u);CHKERRQ(ierr); ierr = VecDestroy(b);CHKERRQ(ierr); ierr = MatDestroy(C);CHKERRQ(ierr); ierr = PetscFinalize();CHKERRQ(ierr); return 0; }
int main(int argc, char *argv[]) { PetscViewer viewer; Vec u; PetscScalar v; int VECTOR_GENERATE, VECTOR_READ; int i, m = 10, rank, size, low, high, ldim, iglobal; int ierr; ierr = PetscInitialize(&argc, &argv, NULL, help);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL, "-m", &m, NULL);CHKERRQ(ierr); /* PART 1: Generate vector, then write it to Mathematica */ ierr = PetscLogEventRegister("Generate Vector", VEC_CLASSID,&VECTOR_GENERATE);CHKERRQ(ierr); ierr = PetscLogEventBegin(VECTOR_GENERATE, 0, 0, 0, 0);CHKERRQ(ierr); /* Generate vector */ ierr = VecCreate(PETSC_COMM_WORLD, &u);CHKERRQ(ierr); ierr = VecSetSizes(u, PETSC_DECIDE, m);CHKERRQ(ierr); ierr = VecSetFromOptions(u);CHKERRQ(ierr); ierr = VecGetOwnershipRange(u, &low, &high);CHKERRQ(ierr); ierr = VecGetLocalSize(u, &ldim);CHKERRQ(ierr); for (i = 0; i < ldim; i++) { iglobal = i + low; v = (PetscScalar) (i + 100*rank); ierr = VecSetValues(u, 1, &iglobal, &v, INSERT_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(u);CHKERRQ(ierr); ierr = VecAssemblyEnd(u);CHKERRQ(ierr); ierr = VecView(u, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "writing vector to Mathematica...\n");CHKERRQ(ierr); #if 0 ierr = PetscViewerMathematicaOpen(PETSC_COMM_WORLD, 8000, "192.168.119.1", "Connect", &viewer);CHKERRQ(ierr); ierr = VecView(u, viewer);CHKERRQ(ierr); #else ierr = VecView(u, PETSC_VIEWER_MATHEMATICA_WORLD);CHKERRQ(ierr); #endif v = 0.0; ierr = VecSet(u,v);CHKERRQ(ierr); ierr = PetscLogEventEnd(VECTOR_GENERATE, 0, 0, 0, 0);CHKERRQ(ierr); /* All processors wait until test vector has been dumped */ ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRQ(ierr); ierr = PetscSleep(10);CHKERRQ(ierr); /* PART 2: Read in vector in from Mathematica */ ierr = PetscLogEventRegister("Read Vector", VEC_CLASSID,&VECTOR_READ);CHKERRQ(ierr); ierr = PetscLogEventBegin(VECTOR_READ, 0, 0, 0, 0);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "reading vector from Mathematica...\n");CHKERRQ(ierr); /* Read new vector in binary format */ #if 0 ierr = PetscViewerMathematicaGetVector(viewer, u);CHKERRQ(ierr); ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); #else ierr = PetscViewerMathematicaGetVector(PETSC_VIEWER_MATHEMATICA_WORLD, u);CHKERRQ(ierr); #endif ierr = PetscLogEventEnd(VECTOR_READ, 0, 0, 0, 0);CHKERRQ(ierr); ierr = VecView(u, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* Free data structures */ ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }