int main(int argc,char **argv) { Mat A[NMAT]; /* problem matrices */ PEP pep; /* polynomial eigenproblem solver context */ PetscInt n=128,nlocal,k,Istart,Iend,i,j,start_ct,end_ct; PetscReal w=9.92918,a=0.0,b=2.0,h,deltasq; PetscReal nref[NL],K2[NL],q[NL],*md,*supd,*subd; PetscScalar v,alpha; PetscBool terse; PetscErrorCode ierr; PetscLogDouble time1,time2; SlepcInitialize(&argc,&argv,(char*)0,help); ierr = PetscOptionsGetInt(NULL,"-n",&n,NULL);CHKERRQ(ierr); n = (n/4)*4; ierr = PetscPrintf(PETSC_COMM_WORLD,"\nPlanar waveguide, n=%D\n\n",n+1);CHKERRQ(ierr); h = (b-a)/n; nlocal = (n/4)-1; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set waveguide parameters used in construction of matrices - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* refractive indices in each layer */ nref[0] = 1.5; nref[1] = 1.66; nref[2] = 1.6; nref[3] = 1.53; nref[4] = 1.66; nref[5] = 1.0; for (i=0;i<NL;i++) K2[i] = w*w*nref[i]*nref[i]; deltasq = K2[0] - K2[NL-1]; for (i=0;i<NL;i++) q[i] = K2[i] - (K2[0] + K2[NL-1])/2; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Compute the polynomial matrices - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* initialize matrices */ for (i=0;i<NMAT;i++) { ierr = MatCreate(PETSC_COMM_WORLD,&A[i]);CHKERRQ(ierr); ierr = MatSetSizes(A[i],PETSC_DECIDE,PETSC_DECIDE,n+1,n+1);CHKERRQ(ierr); ierr = MatSetFromOptions(A[i]);CHKERRQ(ierr); ierr = MatSetUp(A[i]);CHKERRQ(ierr); } ierr = MatGetOwnershipRange(A[0],&Istart,&Iend);CHKERRQ(ierr); /* A0 */ alpha = (h/6)*(deltasq*deltasq/16); for (i=Istart;i<Iend;i++) { v = 4.0; if (i==0 || i==n) v = 2.0; ierr = MatSetValue(A[0],i,i,v*alpha,INSERT_VALUES);CHKERRQ(ierr); if (i>0) { ierr = MatSetValue(A[0],i,i-1,alpha,INSERT_VALUES);CHKERRQ(ierr); } if (i<n) { ierr = MatSetValue(A[0],i,i+1,alpha,INSERT_VALUES);CHKERRQ(ierr); } } /* A1 */ if (Istart==0) { ierr = MatSetValue(A[1],0,0,-deltasq/4,INSERT_VALUES);CHKERRQ(ierr); } if (Iend==n+1) { ierr = MatSetValue(A[1],n,n,deltasq/4,INSERT_VALUES);CHKERRQ(ierr); } /* A2 */ alpha = 1.0/h; for (i=Istart;i<Iend;i++) { v = 2.0; if (i==0 || i==n) v = 1.0; ierr = MatSetValue(A[2],i,i,v*alpha,ADD_VALUES);CHKERRQ(ierr); if (i>0) { ierr = MatSetValue(A[2],i,i-1,-alpha,ADD_VALUES);CHKERRQ(ierr); } if (i<n) { ierr = MatSetValue(A[2],i,i+1,-alpha,ADD_VALUES);CHKERRQ(ierr); } } ierr = PetscMalloc3(n+1,&md,n+1,&supd,n+1,&subd);CHKERRQ(ierr); md[0] = 2.0*q[1]; supd[1] = q[1]; subd[0] = q[1]; for (k=1;k<=NL-2;k++) { end_ct = k*(nlocal+1); start_ct = end_ct-nlocal; for (j=start_ct;j<end_ct;j++) { md[j] = 4*q[k]; supd[j+1] = q[k]; subd[j] = q[k]; } if (k < 4) { /* interface points */ md[end_ct] = 4*(q[k] + q[k+1])/2.0; supd[end_ct+1] = q[k+1]; subd[end_ct] = q[k+1]; } } md[n] = 2*q[NL-2]; supd[n] = q[NL-2]; subd[n] = q[NL-2]; alpha = -h/6.0; for (i=Istart;i<Iend;i++) { ierr = MatSetValue(A[2],i,i,md[i]*alpha,ADD_VALUES);CHKERRQ(ierr); if (i>0) { ierr = MatSetValue(A[2],i,i-1,subd[i-1]*alpha,ADD_VALUES);CHKERRQ(ierr); } if (i<n) { ierr = MatSetValue(A[2],i,i+1,supd[i+1]*alpha,ADD_VALUES);CHKERRQ(ierr); } } ierr = PetscFree3(md,supd,subd);CHKERRQ(ierr); /* A3 */ if (Istart==0) { ierr = MatSetValue(A[3],0,0,1.0,INSERT_VALUES);CHKERRQ(ierr); } if (Iend==n+1) { ierr = MatSetValue(A[3],n,n,1.0,INSERT_VALUES);CHKERRQ(ierr); } /* A4 */ alpha = (h/6); for (i=Istart;i<Iend;i++) { v = 4.0; if (i==0 || i==n) v = 2.0; ierr = MatSetValue(A[4],i,i,v*alpha,INSERT_VALUES);CHKERRQ(ierr); if (i>0) { ierr = MatSetValue(A[4],i,i-1,alpha,INSERT_VALUES);CHKERRQ(ierr); } if (i<n) { ierr = MatSetValue(A[4],i,i+1,alpha,INSERT_VALUES);CHKERRQ(ierr); } } /* assemble matrices */ for (i=0;i<NMAT;i++) { ierr = MatAssemblyBegin(A[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } for (i=0;i<NMAT;i++) { ierr = MatAssemblyEnd(A[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create the eigensolver and solve the problem - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = PEPCreate(PETSC_COMM_WORLD,&pep);CHKERRQ(ierr); ierr = PEPSetOperators(pep,NMAT,A);CHKERRQ(ierr); ierr = PEPSetFromOptions(pep);CHKERRQ(ierr); ierr = PetscTime(&time1); CHKERRQ(ierr); ierr = PEPSolve(pep);CHKERRQ(ierr); ierr = PetscTime(&time2); CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Display solution and clean up - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* show detailed info unless -terse option is given by user */ ierr = PetscOptionsHasName(NULL,"-terse",&terse);CHKERRQ(ierr); if (terse) { ierr = PEPErrorView(pep,PEP_ERROR_BACKWARD,NULL);CHKERRQ(ierr); } else { ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO_DETAIL);CHKERRQ(ierr); ierr = PEPReasonView(pep,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PEPErrorView(pep,PEP_ERROR_BACKWARD,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } ierr = PetscPrintf(PETSC_COMM_WORLD,"Time: %g\n\n\n",time2-time1);CHKERRQ(ierr); ierr = PEPDestroy(&pep);CHKERRQ(ierr); for (i=0;i<NMAT;i++) { ierr = MatDestroy(&A[i]);CHKERRQ(ierr); } ierr = SlepcFinalize();CHKERRQ(ierr); return 0; }
static PetscErrorCode GLLStuffs(DomainData dd, GLLData *glldata) { PetscErrorCode ierr; PetscReal *M,si; PetscScalar x,z0,z1,z2,Lpj,Lpr,rhoGLj,rhoGLk; PetscBLASInt pm1,lierr; PetscInt i,j,n,k,s,r,q,ii,jj,p=dd.p; PetscInt xloc,yloc,zloc,xyloc,xyzloc; PetscFunctionBeginUser; /* Gauss-Lobatto-Legendre nodes zGL on [-1,1] */ ierr = PetscMalloc1(p+1,&glldata->zGL);CHKERRQ(ierr); ierr = PetscMemzero(glldata->zGL,(p+1)*sizeof(*glldata->zGL));CHKERRQ(ierr); glldata->zGL[0]=-1.0; glldata->zGL[p]= 1.0; if (p > 1) { if (p == 2) glldata->zGL[1]=0.0; else { ierr = PetscMalloc1(p-1,&M);CHKERRQ(ierr); for (i=0; i<p-1; i++) { si = (PetscReal)(i+1.0); M[i]=0.5*PetscSqrtReal(si*(si+2.0)/((si+0.5)*(si+1.5))); } pm1 = p-1; ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("N",&pm1,&glldata->zGL[1],M,&x,&pm1,M,&lierr)); if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in STERF Lapack routine %d",(int)lierr); ierr = PetscFPTrapPop();CHKERRQ(ierr); ierr = PetscFree(M);CHKERRQ(ierr); } } /* Weights for 1D quadrature */ ierr = PetscMalloc1(p+1,&glldata->rhoGL);CHKERRQ(ierr); glldata->rhoGL[0]=2.0/(PetscScalar)(p*(p+1.0)); glldata->rhoGL[p]=glldata->rhoGL[0]; z2 = -1; /* Dummy value to avoid -Wmaybe-initialized */ for (i=1; i<p; i++) { x = glldata->zGL[i]; z0 = 1.0; z1 = x; for (n=1; n<p; n++) { z2 = x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0)); z0 = z1; z1 = z2; } glldata->rhoGL[i]=2.0/(p*(p+1.0)*z2*z2); } /* Auxiliary mat for laplacian */ ierr = PetscMalloc1(p+1,&glldata->A);CHKERRQ(ierr); ierr = PetscMalloc1((p+1)*(p+1),&glldata->A[0]);CHKERRQ(ierr); for (i=1; i<p+1; i++) glldata->A[i]=glldata->A[i-1]+p+1; for (j=1; j<p; j++) { x =glldata->zGL[j]; z0=1.0; z1=x; for (n=1; n<p; n++) { z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0)); z0=z1; z1=z2; } Lpj=z2; for (r=1; r<p; r++) { if (r == j) { glldata->A[j][j]=2.0/(3.0*(1.0-glldata->zGL[j]*glldata->zGL[j])*Lpj*Lpj); } else { x = glldata->zGL[r]; z0 = 1.0; z1 = x; for (n=1; n<p; n++) { z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0)); z0=z1; z1=z2; } Lpr = z2; glldata->A[r][j]=4.0/(p*(p+1.0)*Lpj*Lpr*(glldata->zGL[j]-glldata->zGL[r])*(glldata->zGL[j]-glldata->zGL[r])); } } } for (j=1; j<p+1; j++) { x = glldata->zGL[j]; z0 = 1.0; z1 = x; for (n=1; n<p; n++) { z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0)); z0=z1; z1=z2; } Lpj = z2; glldata->A[j][0]=4.0*PetscPowRealInt(-1.0,p)/(p*(p+1.0)*Lpj*(1.0+glldata->zGL[j])*(1.0+glldata->zGL[j])); glldata->A[0][j]=glldata->A[j][0]; } for (j=0; j<p; j++) { x = glldata->zGL[j]; z0 = 1.0; z1 = x; for (n=1; n<p; n++) { z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0)); z0=z1; z1=z2; } Lpj=z2; glldata->A[p][j]=4.0/(p*(p+1.0)*Lpj*(1.0-glldata->zGL[j])*(1.0-glldata->zGL[j])); glldata->A[j][p]=glldata->A[p][j]; } glldata->A[0][0]=0.5+(p*(p+1.0)-2.0)/6.0; glldata->A[p][p]=glldata->A[0][0]; /* compute element matrix */ xloc = p+1; yloc = p+1; zloc = p+1; if (dd.dim<2) yloc=1; if (dd.dim<3) zloc=1; xyloc = xloc*yloc; xyzloc = xloc*yloc*zloc; ierr = MatCreate(PETSC_COMM_SELF,&glldata->elem_mat);CHKERRQ(ierr); ierr = MatSetSizes(glldata->elem_mat,xyzloc,xyzloc,xyzloc,xyzloc);CHKERRQ(ierr); ierr = MatSetType(glldata->elem_mat,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(glldata->elem_mat,xyzloc,NULL);CHKERRQ(ierr); /* overestimated */ ierr = MatZeroEntries(glldata->elem_mat);CHKERRQ(ierr); ierr = MatSetOption(glldata->elem_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); for (k=0; k<zloc; k++) { if (dd.dim>2) rhoGLk=glldata->rhoGL[k]; else rhoGLk=1.0; for (j=0; j<yloc; j++) { if (dd.dim>1) rhoGLj=glldata->rhoGL[j]; else rhoGLj=1.0; for (i=0; i<xloc; i++) { ii = k*xyloc+j*xloc+i; s = k; r = j; for (q=0; q<xloc; q++) { jj = s*xyloc+r*xloc+q; ierr = MatSetValue(glldata->elem_mat,jj,ii,glldata->A[i][q]*rhoGLj*rhoGLk,ADD_VALUES);CHKERRQ(ierr); } if (dd.dim>1) { s=k; q=i; for (r=0; r<yloc; r++) { jj = s*xyloc+r*xloc+q; ierr = MatSetValue(glldata->elem_mat,jj,ii,glldata->A[j][r]*glldata->rhoGL[i]*rhoGLk,ADD_VALUES);CHKERRQ(ierr); } } if (dd.dim>2) { r=j; q=i; for (s=0; s<zloc; s++) { jj = s*xyloc+r*xloc+q; ierr = MatSetValue(glldata->elem_mat,jj,ii,glldata->A[k][s]*rhoGLj*glldata->rhoGL[i],ADD_VALUES);CHKERRQ(ierr); } } } } } ierr = MatAssemblyBegin(glldata->elem_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (glldata->elem_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); #if DEBUG { Vec lvec,rvec; PetscReal norm; ierr = MatCreateVecs(glldata->elem_mat,&lvec,&rvec);CHKERRQ(ierr); ierr = VecSet(lvec,1.0);CHKERRQ(ierr); ierr = MatMult(glldata->elem_mat,lvec,rvec);CHKERRQ(ierr); ierr = VecNorm(rvec,NORM_INFINITY,&norm);CHKERRQ(ierr); printf("Test null space of elem mat % 1.14e\n",norm); ierr = VecDestroy(&lvec);CHKERRQ(ierr); ierr = VecDestroy(&rvec);CHKERRQ(ierr); } #endif PetscFunctionReturn(0); }
PetscErrorCode RHSJacobian(TS ts, PetscReal t, Vec X, Mat J, Mat B, void *ptr) { AppCtx *user = (AppCtx*)ptr; PetscInt nb_cells, i, idx; PetscReal alpha, beta; PetscReal mu_a, D_a; PetscReal mu_h, D_h; PetscReal a, h; const PetscScalar *x; PetscScalar va[4], vh[4]; PetscInt ca[4], ch[4], rowa, rowh; PetscErrorCode ierr; PetscFunctionBegin; nb_cells = user->nb_cells; alpha = user->alpha; beta = user->beta; mu_a = user->mu_a; D_a = user->D_a; mu_h = user->mu_h; D_h = user->D_h; ierr = VecGetArrayRead(X, &x);CHKERRQ(ierr); for(i = 0; i < nb_cells ; ++i) { rowa = 2*i; rowh = 2*i+1; a = x[2*i]; h = x[2*i+1]; ca[0] = ch[1] = 2*i; va[0] = 2*alpha*a / (1.+beta*h) - mu_a; vh[1] = 2*alpha*a; ca[1] = ch[0] = 2*i+1; va[1] = -beta*alpha*a*a / ((1.+beta*h)*(1.+beta*h)); vh[0] = -mu_h; idx = 2; if(i > 0) { ca[idx] = 2*(i-1); ch[idx] = 2*(i-1)+1; va[idx] = D_a; vh[idx] = D_h; va[0] -= D_a; vh[0] -= D_h; idx++; } if(i < nb_cells-1) { ca[idx] = 2*(i+1); ch[idx] = 2*(i+1)+1; va[idx] = D_a; vh[idx] = D_h; va[0] -= D_a; vh[0] -= D_h; idx++; } ierr = MatSetValues(B, 1, &rowa, idx, ca, va, INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(B, 1, &rowh, idx, ch, vh, INSERT_VALUES);CHKERRQ(ierr); } ierr = VecRestoreArrayRead(X, &x);CHKERRQ(ierr); ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (J != B) { ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode ComputeMatrix(KSP ksp, Mat J,Mat jac, void *ctx) { UserContext *user = (UserContext*)ctx; PetscErrorCode ierr; PetscInt i,j,mx,my,xm,ym,xs,ys,num, numi, numj; PetscScalar v[5],Hx,Hy,HydHx,HxdHy; MatStencil row, col[5]; DM da; PetscFunctionBeginUser; ierr = KSPGetDM(ksp,&da);CHKERRQ(ierr); ierr = DMDAGetInfo(da,0,&mx,&my,0,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr); Hx = 1.0 / (PetscReal)(mx); Hy = 1.0 / (PetscReal)(my); HxdHy = Hx/Hy; HydHx = Hy/Hx; ierr = DMDAGetCorners(da,&xs,&ys,0,&xm,&ym,0);CHKERRQ(ierr); for (j=ys; j<ys+ym; j++) { for (i=xs; i<xs+xm; i++) { row.i = i; row.j = j; if (i==0 || j==0 || i==mx-1 || j==my-1) { if (user->bcType == DIRICHLET) { v[0] = 2.0*(HxdHy + HydHx); ierr = MatSetValuesStencil(jac,1,&row,1,&row,v,INSERT_VALUES);CHKERRQ(ierr); SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Dirichlet boundary conditions not supported !\n"); } else if (user->bcType == NEUMANN) { num = 0; numi=0; numj=0; if (j!=0) { v[num] = -HxdHy; col[num].i = i; col[num].j = j-1; num++; numj++; } if (i!=0) { v[num] = -HydHx; col[num].i = i-1; col[num].j = j; num++; numi++; } if (i!=mx-1) { v[num] = -HydHx; col[num].i = i+1; col[num].j = j; num++; numi++; } if (j!=my-1) { v[num] = -HxdHy; col[num].i = i; col[num].j = j+1; num++; numj++; } v[num] = (PetscReal)(numj)*HxdHy + (PetscReal)(numi)*HydHx; col[num].i = i; col[num].j = j; num++; ierr = MatSetValuesStencil(jac,1,&row,num,col,v,INSERT_VALUES);CHKERRQ(ierr); } } else { v[0] = -HxdHy; col[0].i = i; col[0].j = j-1; v[1] = -HydHx; col[1].i = i-1; col[1].j = j; v[2] = 2.0*(HxdHy + HydHx); col[2].i = i; col[2].j = j; v[3] = -HydHx; col[3].i = i+1; col[3].j = j; v[4] = -HxdHy; col[4].i = i; col[4].j = j+1; ierr = MatSetValuesStencil(jac,1,&row,5,col,v,INSERT_VALUES);CHKERRQ(ierr); } } } ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (user->bcType == NEUMANN) { MatNullSpace nullspace; ierr = MatNullSpaceCreate(PETSC_COMM_WORLD,PETSC_TRUE,0,0,&nullspace);CHKERRQ(ierr); ierr = MatSetNullSpace(J,nullspace);CHKERRQ(ierr); ierr = MatNullSpaceDestroy(&nullspace);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R,Mat A,Mat B,Mat RAB,PetscScalar *work) { Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*r=(Mat_SeqAIJ*)R->data; PetscErrorCode ierr; PetscScalar *b,r1,r2,r3,r4,*b1,*b2,*b3,*b4; MatScalar *aa,*ra; PetscInt cn=B->cmap->n,bm=B->rmap->n,col,i,j,n,*ai=a->i,*aj,am=A->rmap->n; PetscInt am2=2*am,am3=3*am,bm4=4*bm; PetscScalar *d,*c,*c2,*c3,*c4; PetscInt *rj,rm=R->rmap->n,dm=RAB->rmap->n,dn=RAB->cmap->n; PetscInt rm2=2*rm,rm3=3*rm,colrm; PetscFunctionBegin; if (!dm || !dn) PetscFunctionReturn(0); if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); if (am != R->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in R %D not equal rows in A %D\n",R->cmap->n,am); if (R->rmap->n != RAB->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in RAB %D not equal rows in R %D\n",RAB->rmap->n,R->rmap->n); if (B->cmap->n != RAB->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in RAB %D not equal columns in B %D\n",RAB->cmap->n,B->cmap->n); ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); ierr = MatDenseGetArray(RAB,&d);CHKERRQ(ierr); b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; c = work; c2 = c + am; c3 = c2 + am; c4 = c3 + am; for (col=0; col<cn-4; col += 4){ /* over columns of C */ for (i=0; i<am; i++) { /* over rows of A in those columns */ r1 = r2 = r3 = r4 = 0.0; n = ai[i+1] - ai[i]; aj = a->j + ai[i]; aa = a->a + ai[i]; for (j=0; j<n; j++) { r1 += (*aa)*b1[*aj]; r2 += (*aa)*b2[*aj]; r3 += (*aa)*b3[*aj]; r4 += (*aa++)*b4[*aj++]; } c[i] = r1; c[am + i] = r2; c[am2 + i] = r3; c[am3 + i] = r4; } b1 += bm4; b2 += bm4; b3 += bm4; b4 += bm4; /* RAB[:,col] = R*C[:,col] */ colrm = col*rm; for (i=0; i<rm; i++) { /* over rows of R in those columns */ r1 = r2 = r3 = r4 = 0.0; n = r->i[i+1] - r->i[i]; rj = r->j + r->i[i]; ra = r->a + r->i[i]; for (j=0; j<n; j++) { r1 += (*ra)*c[*rj]; r2 += (*ra)*c2[*rj]; r3 += (*ra)*c3[*rj]; r4 += (*ra++)*c4[*rj++]; } d[colrm + i] = r1; d[colrm + rm + i] = r2; d[colrm + rm2 + i] = r3; d[colrm + rm3 + i] = r4; } } for (;col<cn; col++){ /* over extra columns of C */ for (i=0; i<am; i++) { /* over rows of A in those columns */ r1 = 0.0; n = a->i[i+1] - a->i[i]; aj = a->j + a->i[i]; aa = a->a + a->i[i]; for (j=0; j<n; j++) { r1 += (*aa++)*b1[*aj++]; } c[i] = r1; } b1 += bm; for (i=0; i<rm; i++) { /* over rows of R in those columns */ r1 = 0.0; n = r->i[i+1] - r->i[i]; rj = r->j + r->i[i]; ra = r->a + r->i[i]; for (j=0; j<n; j++) { r1 += (*ra++)*c[*rj++]; } d[col*rm + i] = r1; } } ierr = PetscLogFlops(cn*2.0*(a->nz + r->nz));CHKERRQ(ierr); ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); ierr = MatDenseRestoreArray(RAB,&d);CHKERRQ(ierr); ierr = MatAssemblyBegin(RAB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(RAB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc,char **args) { const int iM11=0, iM12=1, iL11=2, iL22=3, iL21=4, ix=5; Mat M11,M12,L11,L22,L21; /* matrix */ Vec x,y; /* input and output vectors */ Vec omg1,omg2,omg3,omg4; /* temporary vectors for the operation y=Ax */ KSP ksp; /* linear solver context */ PetscViewer fd[5]; /* viewer */ char file[6][PETSC_MAX_PATH_LEN]; /* input file name */ PetscErrorCode ierr; PetscInt M, N; /* number of rows and columns of the GLOBAL matrices (they should be the same) */ PetscInt m, n; /* number of rows and columns of the LOCAL matrices */ PetscInt istart, iend; /* ownership row range of the process using GLOBAL indexes */ PetscScalar one=1.0; PetscMPIInt rank, size; PetscBool flg[6]; PetscInitialize(&argc,&args,(char *)0,help); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); PetscPrintf(PETSC_COMM_WORLD, "Number of processes size=%d\n", size); // ************ READ MATRICES AND VECTOR x FROM INPUT ***** // M11 and M12 ierr = PetscOptionsGetString(PETSC_NULL,"-m11",file[iM11],PETSC_MAX_PATH_LEN,&flg[iM11]);CHKERRQ(ierr); ierr = PetscOptionsGetString(PETSC_NULL,"-m12",file[iM12],PETSC_MAX_PATH_LEN,&flg[iM12]);CHKERRQ(ierr); // L11 L22 and L21 ierr = PetscOptionsGetString(PETSC_NULL,"-l11",file[iL11],PETSC_MAX_PATH_LEN,&flg[iL11]);CHKERRQ(ierr); ierr = PetscOptionsGetString(PETSC_NULL,"-l22",file[iL22],PETSC_MAX_PATH_LEN,&flg[iL22]);CHKERRQ(ierr); ierr = PetscOptionsGetString(PETSC_NULL,"-l21",file[iL21],PETSC_MAX_PATH_LEN,&flg[iL21]);CHKERRQ(ierr); // All of the matrix have to be defined by the user. // If the user don't specify none of them, it will generate laplacian matrices. if (flg[iM11] && flg[iM12] && flg[iL11] && flg[iL22] && flg[iL21]){ ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,file[iM11],FILE_MODE_READ,\ &fd[iM11]);CHKERRQ(ierr); ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,file[iM12],FILE_MODE_READ,\ &fd[iM12]);CHKERRQ(ierr); ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,file[iL11],FILE_MODE_READ,\ &fd[iL11]);CHKERRQ(ierr); ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,file[iL22],FILE_MODE_READ,\ &fd[iL22]);CHKERRQ(ierr); ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,file[iL21],FILE_MODE_READ,\ &fd[iL21]);CHKERRQ(ierr); // Load the matrix and vector; then destroy the viewer. // M11 and M12 ierr = MatCreate(PETSC_COMM_WORLD,&M11);CHKERRQ(ierr); ierr = MatSetType(M11,MATAIJ);CHKERRQ(ierr); ierr = MatSetFromOptions(M11);CHKERRQ(ierr); ierr = MatLoad(M11,fd[iM11]);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&M12);CHKERRQ(ierr); ierr = MatSetType(M12,MATAIJ);CHKERRQ(ierr); ierr = MatSetFromOptions(M12);CHKERRQ(ierr); ierr = MatLoad(M12,fd[iM12]);CHKERRQ(ierr); // L11 L22 and L21 ierr = MatCreate(PETSC_COMM_WORLD,&L11);CHKERRQ(ierr); ierr = MatSetType(L11,MATAIJ);CHKERRQ(ierr); ierr = MatSetFromOptions(L11);CHKERRQ(ierr); ierr = MatLoad(L11,fd[iL11]);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&L22);CHKERRQ(ierr); ierr = MatSetType(L22,MATAIJ);CHKERRQ(ierr); ierr = MatSetFromOptions(L22);CHKERRQ(ierr); ierr = MatLoad(L22,fd[iL22]);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&L21);CHKERRQ(ierr); ierr = MatSetType(L21,MATAIJ);CHKERRQ(ierr); ierr = MatSetFromOptions(L21);CHKERRQ(ierr); ierr = MatLoad(L21,fd[iL21]);CHKERRQ(ierr); ierr = PetscViewerDestroy(&fd[iM11]);CHKERRQ(ierr); ierr = PetscViewerDestroy(&fd[iM12]);CHKERRQ(ierr); ierr = PetscViewerDestroy(&fd[iL11]);CHKERRQ(ierr); ierr = PetscViewerDestroy(&fd[iL22]);CHKERRQ(ierr); ierr = PetscViewerDestroy(&fd[iL21]);CHKERRQ(ierr); } else if(!flg[iM11] && !flg[iM12] && !flg[iL11] && !flg[iL22] && !flg[iL21]){ // ******************* CREATING FAKE MATRICES ***************** PetscInt i,col[3]; M = N = 100; PetscScalar value[3]; ierr = MatCreate(PETSC_COMM_WORLD,&M11);CHKERRQ(ierr); ierr = MatSetSizes(M11,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); ierr = MatSetFromOptions(M11);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&M12);CHKERRQ(ierr); ierr = MatSetSizes(M12,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); ierr = MatSetFromOptions(M12);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&L11);CHKERRQ(ierr); ierr = MatSetSizes(L11,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); ierr = MatSetFromOptions(L11);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&L22);CHKERRQ(ierr); ierr = MatSetSizes(L22,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); ierr = MatSetFromOptions(L22);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&L21);CHKERRQ(ierr); ierr = MatSetSizes(L21,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); ierr = MatSetFromOptions(L21);CHKERRQ(ierr); // Set values for them value[0] = -1.0; value[1] = 2.0; value[2] = -1.0; for (i=1; i<M-1; i++) { col[0] = i-1; col[1] = i; col[2] = i+1; ierr = MatSetValues(M11,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(M12,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(L11,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(L22,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(L21,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); } i = N - 1; col[0] = N - 2; col[1] = N - 1; ierr = MatSetValues(M11,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(M12,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(L11,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(L22,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(L21,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); i = 0; col[0] = 0; col[1] = 1; value[0] = 2.0; value[1] = -1.0; ierr = MatSetValues(M11,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(M12,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(L11,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(L22,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(L21,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(M11,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(M11,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyBegin(M12,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(M12,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyBegin(L11,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(L11,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyBegin(L22,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(L22,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyBegin(L21,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(L21,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); // ******************** END CREATING FAKE MATRICES ************* } else{ SETERRQ(PETSC_COMM_WORLD,1,"You must either indicate the ascii file for each matrix M11 M12 L11 L22 L21 using the option -m11 -m12 .... or without any of these options."); PetscFinalize(); return 1; } // Get some information about the partitioning of the matrix ierr = MatGetSize(M11,&M,&N);CHKERRQ(ierr); printf("Global dimension of the matrix M11 M=%d N=%d\n",M,N); ierr = MatGetLocalSize(M11,&m,&n); printf("Local dimension of the matrix M11 m=%d n=%d\n",m,n); ierr = MatGetOwnershipRange(M11,&istart,&iend); printf("Ownership range of the rows for process %d istart=%d iend=%d\n",rank,istart,iend); // Read vector x from input. If it's not specified by the user, the vector x will be a unitary vector. ierr = PetscOptionsGetString(PETSC_NULL,"-x",file[ix],PETSC_MAX_PATH_LEN,&flg[ix]);CHKERRQ(ierr); if(!flg[ix]){ ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,PETSC_DECIDE,M);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); ierr = VecSet(x,one);CHKERRQ(ierr); /*ierr = VecAssemblyBegin(x);CHKERRQ(ierr);*/ /*ierr = VecAssemblyEnd(x);CHKERRQ(ierr);*/ } else{ ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,file[ix],FILE_MODE_READ,&fd[ix]);CHKERRQ(ierr); ierr = VecLoad(x,fd[ix]);CHKERRQ(ierr); } ierr = PetscObjectSetName((PetscObject) x, "The input vector");CHKERRQ(ierr); // ************ END READ MATRICES AND VECTOR x FROM INPUT ***** // Create the temporary vectors and y ierr = VecDuplicate(x,&y);CHKERRQ(ierr); ierr = VecDuplicate(x,&omg1);CHKERRQ(ierr); ierr = VecDuplicate(x,&omg2);CHKERRQ(ierr); ierr = VecDuplicate(x,&omg3);CHKERRQ(ierr); ierr = VecDuplicate(x,&omg4);CHKERRQ(ierr); // ****************** COMPUTE y=Ax ******************* // Set the Krylov object ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); // Set operators. Here the matrix that defines the linear system // also serves as the preconditioning matrix. ierr = KSPSetOperators(ksp,L22,L22,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); // Set runtime options, e.g., // -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol> // These options will override those specified above as long as // KSPSetFromOptions() is called _after_ any other customization // routines. ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); // Multiplication y = A*x // omg1 = M11*x // omg2 = L21*x // L22*omg3 = omg2 // omg4 = omg1 + M12*omg3 // L11*y = omg4 ierr = MatMult(M11, x, omg1);CHKERRQ(ierr); ierr = MatMult(L21, x, omg2);CHKERRQ(ierr); ierr = KSPSolve(ksp,omg2,omg3);CHKERRQ(ierr); ierr = MatMult(M12, omg3, omg4);CHKERRQ(ierr); ierr = VecAXPY(omg4, one, omg1);CHKERRQ(ierr); ierr = KSPSetOperators(ksp,L11,L11,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); ierr = KSPSolve(ksp,omg4,y);CHKERRQ(ierr); // ****************** END COMPUTE y=Ax ************************** /*ierr = VecView(y, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);*/ PetscBool test = PETSC_FALSE; ierr = PetscOptionsGetBool(PETSC_NULL,"-test",&test,PETSC_NULL);CHKERRQ(ierr); if(test){ // ******************** TESTING ********************************* // the testing doesn't work if the number of process are more than one // because the type of the matrices must be different from matmpiaij. Let's try matseqaij Mat L11_inv, L22_inv, I; Mat A; Mat M11_d; Vec y2; PetscInt i; PetscScalar val; // Create identity matrix ierr = MatCreate(PETSC_COMM_WORLD,&I);CHKERRQ(ierr); ierr = MatSetType(I, MATDENSE); ierr = MatSetSizes(I,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); ierr = MatSetFromOptions(I);CHKERRQ(ierr); val = 1.0; for (i=0; i<M; i++) { ierr = MatSetValues(I,1,&i,1,&i,&val,INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(I,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(I,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); // Create L11_inv ierr = MatCreate(PETSC_COMM_WORLD,&L11_inv);CHKERRQ(ierr); ierr = MatSetType(L11_inv, MATDENSE); ierr = MatSetSizes(L11_inv,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); ierr = MatSetFromOptions(L11_inv);CHKERRQ(ierr); ierr = MatAssemblyBegin(L11_inv,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(L11_inv,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); // Create L22_inv ierr = MatCreate(PETSC_COMM_WORLD,&L22_inv);CHKERRQ(ierr); ierr = MatSetType(L22_inv, MATDENSE); ierr = MatSetSizes(L22_inv,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); ierr = MatSetFromOptions(L22_inv);CHKERRQ(ierr); ierr = MatAssemblyBegin(L22_inv,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(L22_inv,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); // Create A ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); ierr = MatSetFromOptions(A);CHKERRQ(ierr); ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); // Define M11_d ierr = MatCreate(PETSC_COMM_WORLD,&M11_d);CHKERRQ(ierr); ierr = MatSetType(M11_d, MATDENSE); ierr = MatSetSizes(M11_d,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); ierr = MatSetFromOptions(M11_d);CHKERRQ(ierr); ierr = MatAssemblyBegin(M11_d,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(M11_d,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); // Calculate A = L11^{-1}*(M11 + M12*L22^{-1}*L21) IS perm, iperm; MatFactorInfo info; ierr = MatGetOrdering(L11,MATORDERINGNATURAL,&perm,&iperm);CHKERRQ(ierr); ierr = MatFactorInfoInitialize(&info); CHKERRQ(ierr); ierr = MatLUFactor(L11, perm, iperm, &info); CHKERRQ(ierr); ierr = MatMatSolve(L11, I, L11_inv);CHKERRQ(ierr); // TODO try to convert L11_inv to be sparse such as matseqaij // ierr = MatView(L11_inv, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); ierr = MatGetOrdering(L22,MATORDERINGNATURAL,&perm,&iperm);CHKERRQ(ierr); ierr = MatFactorInfoInitialize(&info); CHKERRQ(ierr); ierr = MatLUFactor(L22, perm, iperm, &info); CHKERRQ(ierr); ierr = MatMatSolve(L22, I, L22_inv);CHKERRQ(ierr); ierr = MatMatMult(M12, L22_inv, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &A);CHKERRQ(ierr); ierr = MatMatMult(A, L21, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &A);CHKERRQ(ierr); ierr = MatConvert(M11, MATDENSE, MAT_INITIAL_MATRIX, &M11_d); ierr = MatAXPY(A,1.0,M11_d,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatMatMult(L11_inv, A, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &A);CHKERRQ(ierr); ierr = VecDuplicate(x,&y2);CHKERRQ(ierr); ierr = MatMult(A, x, y2);CHKERRQ(ierr); /*ierr = VecView(y2, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);*/ // Check the error PetscReal norm; ierr = VecAXPY(y2,-1.0,y);CHKERRQ(ierr); ierr = VecNorm(y2,NORM_2,&norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %A\n", norm);CHKERRQ(ierr); // ******************** END TESTING ***************************** } PetscFinalize(); return 0; }
int main(int argc,char **args) { Mat C; PetscErrorCode ierr; PetscInt N = 2,rowidx,colidx; Vec u,b,r; KSP ksp; PetscReal norm; PetscMPIInt rank,size; PetscScalar v; PetscInitialize(&argc,&args,(char *)0,help); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); /* create stiffness matrix C = [1 2; 2 3] */ ierr = MatCreate(PETSC_COMM_WORLD,&C);CHKERRQ(ierr); ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,N,N);CHKERRQ(ierr); ierr = MatSetFromOptions(C);CHKERRQ(ierr); ierr = MatSetUp(C);CHKERRQ(ierr); if (rank == 0){ rowidx = 0; colidx = 0; v = 1.0; ierr = MatSetValues(C,1,&rowidx,1,&colidx,&v,INSERT_VALUES);CHKERRQ(ierr); rowidx = 0; colidx = 1; v = 2.0; ierr = MatSetValues(C,1,&rowidx,1,&colidx,&v,INSERT_VALUES);CHKERRQ(ierr); rowidx = 1; colidx = 0; v = 2.0; ierr = MatSetValues(C,1,&rowidx,1,&colidx,&v,INSERT_VALUES);CHKERRQ(ierr); rowidx = 1; colidx = 1; v = 3.0; ierr = MatSetValues(C,1,&rowidx,1,&colidx,&v,INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* create right hand side and solution */ ierr = VecCreate(PETSC_COMM_WORLD,&u);CHKERRQ(ierr); ierr = VecSetSizes(u,PETSC_DECIDE,N);CHKERRQ(ierr); ierr = VecSetFromOptions(u);CHKERRQ(ierr); ierr = VecDuplicate(u,&b);CHKERRQ(ierr); ierr = VecDuplicate(u,&r);CHKERRQ(ierr); ierr = VecSet(u,0.0);CHKERRQ(ierr); ierr = VecSet(b,1.0);CHKERRQ(ierr); /* solve linear system C*u = b */ ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); ierr = KSPSetOperators(ksp,C,C,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); ierr = KSPSolve(ksp,b,u);CHKERRQ(ierr); /* check residual r = C*u - b */ ierr = MatMult(C,u,r);CHKERRQ(ierr); ierr = VecAXPY(r,-1.0,b);CHKERRQ(ierr); ierr = VecNorm(r,NORM_2,&norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"|| C*u - b|| = %G\n",norm);CHKERRQ(ierr); /* solve C^T*u = b twice */ ierr = KSPSolveTranspose(ksp,b,u);CHKERRQ(ierr); /* check residual r = C^T*u - b */ ierr = MatMultTranspose(C,u,r);CHKERRQ(ierr); ierr = VecAXPY(r,-1.0,b);CHKERRQ(ierr); ierr = VecNorm(r,NORM_2,&norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"|| C^T*u - b|| = %G\n",norm);CHKERRQ(ierr); ierr = KSPSolveTranspose(ksp,b,u);CHKERRQ(ierr); ierr = MatMultTranspose(C,u,r);CHKERRQ(ierr); ierr = VecAXPY(r,-1.0,b);CHKERRQ(ierr); ierr = VecNorm(r,NORM_2,&norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"|| C^T*u - b|| = %G\n",norm);CHKERRQ(ierr); /* solve C*u = b again */ ierr = KSPSolve(ksp,b,u);CHKERRQ(ierr); ierr = MatMult(C,u,r);CHKERRQ(ierr); ierr = VecAXPY(r,-1.0,b);CHKERRQ(ierr); ierr = VecNorm(r,NORM_2,&norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"|| C*u - b|| = %G\n",norm);CHKERRQ(ierr); ierr = KSPDestroy(&ksp);CHKERRQ(ierr); ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = VecDestroy(&r);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
int main(int argc,char **args) { PetscErrorCode ierr; PetscInitialize(&argc,&args,(char *)0,help); PetscInt m = 10; /* default number of rows and columns IN GRID, but matrix is (m^2) x (m^2) */ ierr = PetscOptionsGetInt(PETSC_NULL,"-m",&m,PETSC_NULL);CHKERRQ(ierr); MPI_Comm com = PETSC_COMM_WORLD; PetscMPIInt rank, size; ierr = MPI_Comm_rank(com, &rank); CHKERRQ(ierr); ierr = MPI_Comm_size(com, &size); CHKERRQ(ierr); /* create m x m two-dimensional grid for periodic boundary condition problem */ DA da2; PetscInt dof=1, stencilwidth=1; ierr = DACreate2d(com, DA_XYPERIODIC, DA_STENCIL_STAR, m,m,PETSC_DECIDE,PETSC_DECIDE, dof,stencilwidth,PETSC_NULL,PETSC_NULL,&da2); CHKERRQ(ierr); /* get da2-managed Vecs */ Vec x,b,u; ierr = DACreateGlobalVector(da2,&x); CHKERRQ(ierr); ierr = VecDuplicate(x,&b); CHKERRQ(ierr); ierr = VecDuplicate(x,&u); CHKERRQ(ierr); Mat A; ierr = DAGetMatrix(da2, MATMPIAIJ, &A); CHKERRQ(ierr); /* alternative call below is not quite same as result from DAGetMatrix(), because of nonzero allocation; the Mat ownership ranges are same */ /* ierr = MatCreateMPIAIJ(com, mlocal, mlocal, m*m, m*m, 5, PETSC_NULL, 4, PETSC_NULL, &A); CHKERRQ(ierr) */ ierr = MatSetFromOptions(A);CHKERRQ(ierr); /* report on ownership range */ PetscInt rstart,rend,mlocal; ierr = VecGetOwnershipRange(x,&rstart,&rend);CHKERRQ(ierr); ierr = VecGetLocalSize(x,&mlocal);CHKERRQ(ierr); PetscInt A_rstart,A_rend; ierr = MatGetOwnershipRange(A,&A_rstart,&A_rend);CHKERRQ(ierr); if ((rstart != A_rstart) || (rend != A_rend)) { ierr = PetscPrintf(com, "Vec and Mat ownership ranges different!!! ending ...\n"); CHKERRQ(ierr); PetscEnd(); } else { ierr = PetscSynchronizedPrintf(com, "rank=%d has Vec and Mat ownership: mlocal=%d, rstart=%d, rend=%d\n", rank,mlocal,rstart,rend); CHKERRQ(ierr); } PetscSynchronizedFlush(com); /* get local part of grid */ PetscInt xm,ym,xs,ys; DAGetCorners(da2,&xs,&ys,0,&xm,&ym,0); /* report on local part of grid */ ierr = PetscSynchronizedPrintf(com, "rank=%d has da2-managed-Vec local ranges: xs=%d, xm=%d, ys=%d, ym=%d\n", rank,xs,xm,ys,ym); CHKERRQ(ierr); PetscSynchronizedFlush(com); /* set up linear system */ PetscScalar **barr, **uarr; /* RHS and exact soln, resp. */ DAVecGetArray(da2, b, &barr); DAVecGetArray(da2, u, &uarr); PetscScalar dx = 1.0/(double)m, dy = dx, pi = 3.14159265358979; PetscScalar xi,yj; PetscInt diag=0,north=1,east=2,south=3,west=4; PetscScalar vals[5] = {-4.0 + dx * dx, 1.0, 1.0, 1.0, 1.0}; MatStencil row, col[5]; /* these are not "stencils" at all, but local grid to global indices helpers */ PetscInt i,j,num; for (j=ys; j<ys+ym; j++) { for(i=xs; i<xs+xm; i++) { /* entries of matrix A */ row.i = i; row.j = j; row.c = 0; /* dof = 1 so first component; note row.k is for 3d DAs */ for (num=0; num<5; num++) col[num].c = 0; /* set diag first, then go through stencil neighbors */ col[diag].i = i; col[diag].j = j; col[north].i = i; col[north].j = j+1; col[east].i = i+1; col[east].j = j; col[south].i = i; col[south].j = j-1; col[west].i = i-1; col[west].j = j; ierr = MatSetValuesStencil(A,1,&row,5,col,vals,INSERT_VALUES); CHKERRQ(ierr); /* entries of vectors: exact solution u and right-hand-side b */ xi = (double)i * dx; yj = (double)j * dy; uarr[j][i] = sin(2.0 * pi * xi) * cos(4.0 * pi * yj); barr[j][i] = (1.0 - 20.0 * pi * pi) * uarr[j][i]; barr[j][i] *= dx * dx; } } DAVecRestoreArray(da2, b, &barr); DAVecRestoreArray(da2, u, &uarr); ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = VecAssemblyBegin(b); CHKERRQ(ierr); ierr = VecAssemblyEnd(b); CHKERRQ(ierr); ierr = VecAssemblyBegin(u); CHKERRQ(ierr); ierr = VecAssemblyEnd(u); CHKERRQ(ierr); /* uncomment for dense view; -mat_view default format is good enough for most purposes PetscViewer viewer; PetscViewerCreate(com, &viewer); PetscViewerSetType(viewer, PETSC_VIEWER_ASCII); PetscViewerSetFormat(viewer, PETSC_VIEWER_ASCII_DENSE); MatView(A,viewer); PetscViewerDestroy(viewer); */ /* setup solver context now that Mat and Vec are assembled */ KSP ksp; ierr = KSPCreate(com,&ksp);CHKERRQ(ierr); /* Set "operators". Here the matrix that defines the linear system also serves as the preconditioning matrix. But we do not assert a relationship between their nonzero patterns.(???) */ ierr = KSPSetOperators(ksp,A,A,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); /* Following is optional; parameters could be set at runtime. */ ierr = KSPSetTolerances(ksp,1.e-7,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT); CHKERRQ(ierr); /* Set runtime options, e.g., -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol> */ ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); /* Solve linear system */ ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr); /* Compute and report the error (and the iteration count and reason). */ PetscScalar norminf, normtwo, neg_one=-1.0; PetscInt its; KSPConvergedReason reason; ierr = VecAXPY(x,neg_one,u);CHKERRQ(ierr); // x = x - u ierr = VecNorm(x,NORM_INFINITY,&norminf);CHKERRQ(ierr); ierr = VecNorm(x,NORM_2,&normtwo);CHKERRQ(ierr); // discrete norm normtwo *= dx * dy; // integral norm ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr); ierr = KSPGetConvergedReason(ksp,&reason); CHKERRQ(ierr); ierr = PetscPrintf(com, "Error norms ||err||_inf = %.3e, ||err||_2 = %.3e;\n" "Iterations = %d; Reason = %d\n", norminf, normtwo, its, (int) reason);CHKERRQ(ierr); /* destroy */ ierr = KSPDestroy(ksp);CHKERRQ(ierr); ierr = MatDestroy(A);CHKERRQ(ierr); ierr = VecDestroy(x);CHKERRQ(ierr); ierr = VecDestroy(u);CHKERRQ(ierr); ierr = VecDestroy(b);CHKERRQ(ierr); ierr = DADestroy(da2);CHKERRQ(ierr); /* Always call PetscFinalize() before exiting a program. */ ierr = PetscFinalize();CHKERRQ(ierr); return 0; }
PetscErrorCode MatGetSubMatrices_MPIDense_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats) { Mat_MPIDense *c = (Mat_MPIDense*)C->data; Mat A = c->A; Mat_SeqDense *a = (Mat_SeqDense*)A->data,*mat; PetscErrorCode ierr; PetscMPIInt rank,size,tag0,tag1,idex,end,i; PetscInt N = C->cmap->N,rstart = C->rmap->rstart,count; const PetscInt **irow,**icol,*irow_i; PetscInt *nrow,*ncol,*w1,*w3,*w4,*rtable,start; PetscInt **sbuf1,m,j,k,l,ct1,**rbuf1,row,proc; PetscInt nrqs,msz,**ptr,*ctr,*pa,*tmp,bsz,nrqr; PetscInt is_no,jmax,**rmap,*rmap_i; PetscInt ctr_j,*sbuf1_j,*rbuf1_i; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; MPI_Status *r_status1,*r_status2,*s_status1,*s_status2; MPI_Comm comm; PetscScalar **rbuf2,**sbuf2; PetscTruth sorted; PetscFunctionBegin; comm = ((PetscObject)C)->comm; tag0 = ((PetscObject)C)->tag; size = c->size; rank = c->rank; m = C->rmap->N; /* Get some new tags to keep the communication clean */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); /* Check if the col indices are sorted */ for (i=0; i<ismax; i++) { ierr = ISSorted(isrow[i],&sorted);CHKERRQ(ierr); if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted"); ierr = ISSorted(iscol[i],&sorted);CHKERRQ(ierr); if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted"); } ierr = PetscMalloc5(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol,m,PetscInt,&rtable);CHKERRQ(ierr); for (i=0; i<ismax; i++) { ierr = ISGetIndices(isrow[i],&irow[i]);CHKERRQ(ierr); ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr); ierr = ISGetLocalSize(isrow[i],&nrow[i]);CHKERRQ(ierr); ierr = ISGetLocalSize(iscol[i],&ncol[i]);CHKERRQ(ierr); } /* Create hash table for the mapping :row -> proc*/ for (i=0,j=0; i<size; i++) { jmax = C->rmap->range[i+1]; for (; j<jmax; j++) { rtable[j] = i; } } /* evaluate communication - mesg to who,length of mesg, and buffer space required. Based on this, buffers are allocated, and data copied into them*/ ierr = PetscMalloc3(2*size,PetscInt,&w1,size,PetscInt,&w3,size,PetscInt,&w4);CHKERRQ(ierr); ierr = PetscMemzero(w1,size*2*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ ierr = PetscMemzero(w3,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ for (i=0; i<ismax; i++) { ierr = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ jmax = nrow[i]; irow_i = irow[i]; for (j=0; j<jmax; j++) { row = irow_i[j]; proc = rtable[row]; w4[proc]++; } for (j=0; j<size; j++) { if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;} } } nrqs = 0; /* no of outgoing messages */ msz = 0; /* total mesg length (for all procs) */ w1[2*rank] = 0; /* no mesg sent to self */ w3[rank] = 0; for (i=0; i<size; i++) { if (w1[2*i]) { w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */ } ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr); /*(proc -array)*/ for (i=0,j=0; i<size; i++) { if (w1[2*i]) { pa[j] = i; j++; } } /* Each message would have a header = 1 + 2*(no of IS) + data */ for (i=0; i<nrqs; i++) { j = pa[i]; w1[2*j] += w1[2*j+1] + 2* w3[j]; msz += w1[2*j]; } /* Do a global reduction to determine how many messages to expect*/ ierr = PetscMaxSum(comm,w1,&bsz,&nrqr);CHKERRQ(ierr); /* Allocate memory for recv buffers . Make sure rbuf1[0] exists by adding 1 to the buffer length */ ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&rbuf1);CHKERRQ(ierr); ierr = PetscMalloc(nrqr*bsz*sizeof(PetscInt),&rbuf1[0]);CHKERRQ(ierr); for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz; /* Post the receives */ ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);CHKERRQ(ierr); for (i=0; i<nrqr; ++i) { ierr = MPI_Irecv(rbuf1[i],bsz,MPIU_INT,MPI_ANY_SOURCE,tag0,comm,r_waits1+i);CHKERRQ(ierr); } /* Allocate Memory for outgoing messages */ ierr = PetscMalloc4(size,PetscInt*,&sbuf1,size,PetscInt*,&ptr,2*msz,PetscInt,&tmp,size,PetscInt,&ctr);CHKERRQ(ierr); ierr = PetscMemzero(sbuf1,size*sizeof(PetscInt*));CHKERRQ(ierr); ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr); { PetscInt *iptr = tmp,ict = 0; for (i=0; i<nrqs; i++) { j = pa[i]; iptr += ict; sbuf1[j] = iptr; ict = w1[2*j]; } } /* Form the outgoing messages */ /* Initialize the header space */ for (i=0; i<nrqs; i++) { j = pa[i]; sbuf1[j][0] = 0; ierr = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr); ptr[j] = sbuf1[j] + 2*w3[j] + 1; } /* Parse the isrow and copy data into outbuf */ for (i=0; i<ismax; i++) { ierr = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr); irow_i = irow[i]; jmax = nrow[i]; for (j=0; j<jmax; j++) { /* parse the indices of each IS */ row = irow_i[j]; proc = rtable[row]; if (proc != rank) { /* copy to the outgoing buf*/ ctr[proc]++; *ptr[proc] = row; ptr[proc]++; } } /* Update the headers for the current IS */ for (j=0; j<size; j++) { /* Can Optimise this loop too */ if ((ctr_j = ctr[j])) { sbuf1_j = sbuf1[j]; k = ++sbuf1_j[0]; sbuf1_j[2*k] = ctr_j; sbuf1_j[2*k-1] = i; } } } /* Now post the sends */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr); for (i=0; i<nrqs; ++i) { j = pa[i]; ierr = MPI_Isend(sbuf1[j],w1[2*j],MPIU_INT,j,tag0,comm,s_waits1+i);CHKERRQ(ierr); } /* Post recieves to capture the row_data from other procs */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(PetscScalar*),&rbuf2);CHKERRQ(ierr); for (i=0; i<nrqs; i++) { j = pa[i]; count = (w1[2*j] - (2*sbuf1[j][0] + 1))*N; ierr = PetscMalloc((count+1)*sizeof(PetscScalar),&rbuf2[i]);CHKERRQ(ierr); ierr = MPI_Irecv(rbuf2[i],count,MPIU_SCALAR,j,tag1,comm,r_waits2+i);CHKERRQ(ierr); } /* Receive messages(row_nos) and then, pack and send off the rowvalues to the correct processors */ ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(PetscScalar*),&sbuf2);CHKERRQ(ierr); { PetscScalar *sbuf2_i,*v_start; PetscInt s_proc; for (i=0; i<nrqr; ++i) { ierr = MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);CHKERRQ(ierr); s_proc = r_status1[i].MPI_SOURCE; /* send processor */ rbuf1_i = rbuf1[idex]; /* Actual message from s_proc */ /* no of rows = end - start; since start is array idex[], 0idex, whel end is length of the buffer - which is 1idex */ start = 2*rbuf1_i[0] + 1; ierr = MPI_Get_count(r_status1+i,MPIU_INT,&end);CHKERRQ(ierr); /* allocate memory sufficinet to hold all the row values */ ierr = PetscMalloc((end-start)*N*sizeof(PetscScalar),&sbuf2[idex]);CHKERRQ(ierr); sbuf2_i = sbuf2[idex]; /* Now pack the data */ for (j=start; j<end; j++) { row = rbuf1_i[j] - rstart; v_start = a->v + row; for (k=0; k<N; k++) { sbuf2_i[0] = v_start[0]; sbuf2_i++; v_start += C->rmap->n; } } /* Now send off the data */ ierr = MPI_Isend(sbuf2[idex],(end-start)*N,MPIU_SCALAR,s_proc,tag1,comm,s_waits2+i);CHKERRQ(ierr); } } /* End Send-Recv of IS + row_numbers */ ierr = PetscFree(r_status1);CHKERRQ(ierr); ierr = PetscFree(r_waits1);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);CHKERRQ(ierr); if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr);} ierr = PetscFree(s_status1);CHKERRQ(ierr); ierr = PetscFree(s_waits1);CHKERRQ(ierr); /* Create the submatrices */ if (scall == MAT_REUSE_MATRIX) { for (i=0; i<ismax; i++) { mat = (Mat_SeqDense *)(submats[i]->data); if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) { SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); } ierr = PetscMemzero(mat->v,submats[i]->rmap->n*submats[i]->cmap->n*sizeof(PetscScalar));CHKERRQ(ierr); submats[i]->factor = C->factor; } } else { for (i=0; i<ismax; i++) { ierr = MatCreate(PETSC_COMM_SELF,submats+i);CHKERRQ(ierr); ierr = MatSetSizes(submats[i],nrow[i],ncol[i],nrow[i],ncol[i]);CHKERRQ(ierr); ierr = MatSetType(submats[i],((PetscObject)A)->type_name);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(submats[i],PETSC_NULL);CHKERRQ(ierr); } } /* Assemble the matrices */ { PetscInt col; PetscScalar *imat_v,*mat_v,*imat_vi,*mat_vi; for (i=0; i<ismax; i++) { mat = (Mat_SeqDense*)submats[i]->data; mat_v = a->v; imat_v = mat->v; irow_i = irow[i]; m = nrow[i]; for (j=0; j<m; j++) { row = irow_i[j] ; proc = rtable[row]; if (proc == rank) { row = row - rstart; mat_vi = mat_v + row; imat_vi = imat_v + j; for (k=0; k<ncol[i]; k++) { col = icol[i][k]; imat_vi[k*m] = mat_vi[col*C->rmap->n]; } } } } } /* Create row map-> This maps c->row to submat->row for each submat*/ /* this is a very expensive operation wrt memory usage */ ierr = PetscMalloc(ismax*sizeof(PetscInt*),&rmap);CHKERRQ(ierr); ierr = PetscMalloc(ismax*C->rmap->N*sizeof(PetscInt),&rmap[0]);CHKERRQ(ierr); ierr = PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));CHKERRQ(ierr); for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + C->rmap->N;} for (i=0; i<ismax; i++) { rmap_i = rmap[i]; irow_i = irow[i]; jmax = nrow[i]; for (j=0; j<jmax; j++) { rmap_i[irow_i[j]] = j; } } /* Now Receive the row_values and assemble the rest of the matrix */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);CHKERRQ(ierr); { PetscInt is_max,tmp1,col,*sbuf1_i,is_sz; PetscScalar *rbuf2_i,*imat_v,*imat_vi; for (tmp1=0; tmp1<nrqs; tmp1++) { /* For each message */ ierr = MPI_Waitany(nrqs,r_waits2,&i,r_status2+tmp1);CHKERRQ(ierr); /* Now dig out the corresponding sbuf1, which contains the IS data_structure */ sbuf1_i = sbuf1[pa[i]]; is_max = sbuf1_i[0]; ct1 = 2*is_max+1; rbuf2_i = rbuf2[i]; for (j=1; j<=is_max; j++) { /* For each IS belonging to the message */ is_no = sbuf1_i[2*j-1]; is_sz = sbuf1_i[2*j]; mat = (Mat_SeqDense*)submats[is_no]->data; imat_v = mat->v; rmap_i = rmap[is_no]; m = nrow[is_no]; for (k=0; k<is_sz; k++,rbuf2_i+=N) { /* For each row */ row = sbuf1_i[ct1]; ct1++; row = rmap_i[row]; imat_vi = imat_v + row; for (l=0; l<ncol[is_no]; l++) { /* For each col */ col = icol[is_no][l]; imat_vi[l*m] = rbuf2_i[col]; } } } } } /* End Send-Recv of row_values */ ierr = PetscFree(r_status2);CHKERRQ(ierr); ierr = PetscFree(r_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);CHKERRQ(ierr); if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,s_status2);CHKERRQ(ierr);} ierr = PetscFree(s_status2);CHKERRQ(ierr); ierr = PetscFree(s_waits2);CHKERRQ(ierr); /* Restore the indices */ for (i=0; i<ismax; i++) { ierr = ISRestoreIndices(isrow[i],irow+i);CHKERRQ(ierr); ierr = ISRestoreIndices(iscol[i],icol+i);CHKERRQ(ierr); } /* Destroy allocated memory */ ierr = PetscFree5(irow,icol,nrow,ncol,rtable);CHKERRQ(ierr); ierr = PetscFree3(w1,w3,w4);CHKERRQ(ierr); ierr = PetscFree(pa);CHKERRQ(ierr); for (i=0; i<nrqs; ++i) { ierr = PetscFree(rbuf2[i]);CHKERRQ(ierr); } ierr = PetscFree(rbuf2);CHKERRQ(ierr); ierr = PetscFree4(sbuf1,ptr,tmp,ctr);CHKERRQ(ierr); ierr = PetscFree(rbuf1[0]);CHKERRQ(ierr); ierr = PetscFree(rbuf1);CHKERRQ(ierr); for (i=0; i<nrqr; ++i) { ierr = PetscFree(sbuf2[i]);CHKERRQ(ierr); } ierr = PetscFree(sbuf2);CHKERRQ(ierr); ierr = PetscFree(rmap[0]);CHKERRQ(ierr); ierr = PetscFree(rmap);CHKERRQ(ierr); for (i=0; i<ismax; i++) { ierr = MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } PetscFunctionReturn(0); }
/*@ KSPComputeEigenvaluesExplicitly - Computes all of the eigenvalues of the preconditioned operator using LAPACK. Collective on KSP Input Parameter: + ksp - iterative context obtained from KSPCreate() - n - size of arrays r and c Output Parameters: + r - real part of computed eigenvalues, provided by user with a dimension at least of n - c - complex part of computed eigenvalues, provided by user with a dimension at least of n Notes: This approach is very slow but will generally provide accurate eigenvalue estimates. This routine explicitly forms a dense matrix representing the preconditioned operator, and thus will run only for relatively small problems, say n < 500. Many users may just want to use the monitoring routine KSPMonitorSingularValue() (which can be set with option -ksp_monitor_singular_value) to print the singular values at each iteration of the linear solve. The preconditoner operator, rhs vector, solution vectors should be set before this routine is called. i.e use KSPSetOperators(),KSPSolve() or KSPSetOperators() Level: advanced .keywords: KSP, compute, eigenvalues, explicitly .seealso: KSPComputeEigenvalues(), KSPMonitorSingularValue(), KSPComputeExtremeSingularValues(), KSPSetOperators(), KSPSolve() @*/ PetscErrorCode KSPComputeEigenvaluesExplicitly(KSP ksp,PetscInt nmax,PetscReal r[],PetscReal c[]) { Mat BA; PetscErrorCode ierr; PetscMPIInt size,rank; MPI_Comm comm; PetscScalar *array; Mat A; PetscInt m,row,nz,i,n,dummy; const PetscInt *cols; const PetscScalar *vals; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)ksp,&comm);CHKERRQ(ierr); ierr = KSPComputeExplicitOperator(ksp,&BA);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MatGetSize(BA,&n,&n);CHKERRQ(ierr); if (size > 1) { /* assemble matrix on first processor */ ierr = MatCreate(PetscObjectComm((PetscObject)ksp),&A);CHKERRQ(ierr); if (!rank) { ierr = MatSetSizes(A,n,n,n,n);CHKERRQ(ierr); } else { ierr = MatSetSizes(A,0,0,n,n);CHKERRQ(ierr); } ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)BA,(PetscObject)A);CHKERRQ(ierr); ierr = MatGetOwnershipRange(BA,&row,&dummy);CHKERRQ(ierr); ierr = MatGetLocalSize(BA,&m,&dummy);CHKERRQ(ierr); for (i=0; i<m; i++) { ierr = MatGetRow(BA,row,&nz,&cols,&vals);CHKERRQ(ierr); ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); ierr = MatRestoreRow(BA,row,&nz,&cols,&vals);CHKERRQ(ierr); row++; } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatDenseGetArray(A,&array);CHKERRQ(ierr); } else { ierr = MatDenseGetArray(BA,&array);CHKERRQ(ierr); } #if defined(PETSC_HAVE_ESSL) /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */ if (!rank) { PetscScalar sdummy,*cwork; PetscReal *work,*realpart; PetscBLASInt clen,idummy,lwork,bn,zero = 0; PetscInt *perm; #if !defined(PETSC_USE_COMPLEX) clen = n; #else clen = 2*n; #endif ierr = PetscMalloc1(clen,&cwork);CHKERRQ(ierr); idummy = -1; /* unused */ ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); lwork = 5*n; ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); ierr = PetscMalloc1(n,&realpart);CHKERRQ(ierr); ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_(&zero,array,&bn,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork)); ierr = PetscFPTrapPop();CHKERRQ(ierr); ierr = PetscFree(work);CHKERRQ(ierr); /* For now we stick with the convention of storing the real and imaginary components of evalues separately. But is this what we really want? */ ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) for (i=0; i<n; i++) { realpart[i] = cwork[2*i]; perm[i] = i; } ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = cwork[2*perm[i]]; c[i] = cwork[2*perm[i]+1]; } #else for (i=0; i<n; i++) { realpart[i] = PetscRealPart(cwork[i]); perm[i] = i; } ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = PetscRealPart(cwork[perm[i]]); c[i] = PetscImaginaryPart(cwork[perm[i]]); } #endif ierr = PetscFree(perm);CHKERRQ(ierr); ierr = PetscFree(realpart);CHKERRQ(ierr); ierr = PetscFree(cwork);CHKERRQ(ierr); } #elif !defined(PETSC_USE_COMPLEX) if (!rank) { PetscScalar *work; PetscReal *realpart,*imagpart; PetscBLASInt idummy,lwork; PetscInt *perm; idummy = n; lwork = 5*n; ierr = PetscMalloc2(n,&realpart,n,&imagpart);CHKERRQ(ierr); ierr = PetscMalloc1(5*n,&work);CHKERRQ(ierr); #if defined(PETSC_MISSING_LAPACK_GEEV) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values."); #else { PetscBLASInt lierr; PetscScalar sdummy; PetscBLASInt bn; ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,array,&bn,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr)); if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr); ierr = PetscFPTrapPop();CHKERRQ(ierr); } #endif ierr = PetscFree(work);CHKERRQ(ierr); ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); for (i=0; i<n; i++) perm[i] = i; ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = realpart[perm[i]]; c[i] = imagpart[perm[i]]; } ierr = PetscFree(perm);CHKERRQ(ierr); ierr = PetscFree2(realpart,imagpart);CHKERRQ(ierr); } #else if (!rank) { PetscScalar *work,*eigs; PetscReal *rwork; PetscBLASInt idummy,lwork; PetscInt *perm; idummy = n; lwork = 5*n; ierr = PetscMalloc1(5*n,&work);CHKERRQ(ierr); ierr = PetscMalloc1(2*n,&rwork);CHKERRQ(ierr); ierr = PetscMalloc1(n,&eigs);CHKERRQ(ierr); #if defined(PETSC_MISSING_LAPACK_GEEV) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values."); #else { PetscBLASInt lierr; PetscScalar sdummy; PetscBLASInt nb; ierr = PetscBLASIntCast(n,&nb);CHKERRQ(ierr); ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&nb,array,&nb,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,rwork,&lierr)); if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr); ierr = PetscFPTrapPop();CHKERRQ(ierr); } #endif ierr = PetscFree(work);CHKERRQ(ierr); ierr = PetscFree(rwork);CHKERRQ(ierr); ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); for (i=0; i<n; i++) perm[i] = i; for (i=0; i<n; i++) r[i] = PetscRealPart(eigs[i]); ierr = PetscSortRealWithPermutation(n,r,perm);CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = PetscRealPart(eigs[perm[i]]); c[i] = PetscImaginaryPart(eigs[perm[i]]); } ierr = PetscFree(perm);CHKERRQ(ierr); ierr = PetscFree(eigs);CHKERRQ(ierr); } #endif if (size > 1) { ierr = MatDenseRestoreArray(A,&array);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); } else { ierr = MatDenseRestoreArray(BA,&array);CHKERRQ(ierr); } ierr = MatDestroy(&BA);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc,char **args) { Vec x, b, u; /* approx solution, RHS, exact solution */ Mat A; /* linear system matrix */ KSP ksp; /* linear solver context */ PC pc; /* preconditioner context */ PetscReal norm; /* norm of solution error */ PetscErrorCode ierr; PetscInt i,n = 10,col[3],its,rstart,rend,nlocal; PetscScalar neg_one = -1.0,one = 1.0,value[3]; PetscBool TEST_PROCEDURAL=PETSC_FALSE; PetscInitialize(&argc,&args,(char*)0,help); ierr = PetscOptionsGetInt(NULL,"-n",&n,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,"-procedural",&TEST_PROCEDURAL,NULL);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Compute the matrix and right-hand-side vector that define the linear system, Ax = b. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Create vectors. Note that we form 1 vector from scratch and then duplicate as needed. For this simple case let PETSc decide how many elements of the vector are stored on each processor. The second argument to VecSetSizes() below causes PETSc to decide. */ ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,PETSC_DECIDE,n);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); ierr = VecDuplicate(x,&b);CHKERRQ(ierr); ierr = VecDuplicate(x,&u);CHKERRQ(ierr); /* Identify the starting and ending mesh points on each processor for the interior part of the mesh. We let PETSc decide above. */ ierr = VecGetOwnershipRange(x,&rstart,&rend);CHKERRQ(ierr); ierr = VecGetLocalSize(x,&nlocal);CHKERRQ(ierr); /* Create a tridiagonal matrix. See ../tutorials/ex23.c */ ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,nlocal,nlocal,n,n);CHKERRQ(ierr); ierr = MatSetFromOptions(A);CHKERRQ(ierr); ierr = MatSetUp(A);CHKERRQ(ierr); /* Assemble matrix */ if (!rstart) { rstart = 1; i = 0; col[0] = 0; col[1] = 1; value[0] = 2.0; value[1] = -1.0; ierr = MatSetValues(A,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); } if (rend == n) { rend = n-1; i = n-1; col[0] = n-2; col[1] = n-1; value[0] = -1.0; value[1] = 2.0; ierr = MatSetValues(A,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); } /* Set entries corresponding to the mesh interior */ value[0] = -1.0; value[1] = 2.0; value[2] = -1.0; for (i=rstart; i<rend; i++) { col[0] = i-1; col[1] = i; col[2] = i+1; ierr = MatSetValues(A,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* Set exact solution; then compute right-hand-side vector. */ ierr = VecSet(u,one);CHKERRQ(ierr); ierr = MatMult(A,u,b);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create the linear solver and set various options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); ierr = KSPSetOperators(ksp,A,A,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); /* Set linear solver defaults for this problem (optional). - By extracting the KSP and PC contexts from the KSP context, we can then directly call any KSP and PC routines to set various options. - The following statements are optional; all of these parameters could alternatively be specified at runtime via KSPSetFromOptions(); */ if (TEST_PROCEDURAL) { /* Example of runtime options: '-pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type bjacobi' */ PetscMPIInt size,rank,subsize; Mat A_redundant; KSP innerksp; PC innerpc; MPI_Comm subcomm; ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr); ierr = PCSetType(pc,PCREDUNDANT);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); if (size < 3) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ, "Num of processes %d must greater than 2",size); ierr = PCRedundantSetNumber(pc,size-2);CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); /* Get subcommunicator and redundant matrix */ ierr = KSPSetUp(ksp);CHKERRQ(ierr); ierr = PCRedundantGetKSP(pc,&innerksp);CHKERRQ(ierr); ierr = KSPGetPC(innerksp,&innerpc);CHKERRQ(ierr); ierr = PCGetOperators(innerpc,NULL,&A_redundant,NULL);CHKERRQ(ierr); ierr = PetscObjectGetComm((PetscObject)A_redundant,&subcomm);CHKERRQ(ierr); ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); if (subsize==1 && !rank) { printf("A_redundant:\n"); ierr = MatView(A_redundant,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } } else { ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); } /* Solve linear system */ ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr); /* Check the error */ ierr = VecAXPY(x,neg_one,u);CHKERRQ(ierr); ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr); if (norm > 1.e-14) { ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %G, Iterations %D\n",norm,its);CHKERRQ(ierr); } /* Free work space. */ ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = KSPDestroy(&ksp);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
/* FormJacobianLocal - Evaluates Jacobian matrix. */ PetscErrorCode FormJacobianLocal(DMDALocalInfo *info, Field **x, Mat A,Mat jac, AppCtx *user) { Field uLocal[4]; PetscScalar JLocal[144]; MatStencil rows[4*3], cols[4*3], ident; PetscInt lowerRow[3] = {0, 1, 3}; PetscInt upperRow[3] = {2, 3, 1}; PetscInt hasLower[3], hasUpper[3], velocityRows[4], pressureRows[4]; PetscScalar alpha,lambda,hx,hy,hxhy,detJInv,G[4],sc,one = 1.0; PetscInt i,j,k,l,numRows,dof = info->dof; PetscErrorCode ierr; MatNullSpace nullspace; Vec N; PetscFunctionBeginUser; alpha = user->alpha; lambda = user->lambda; hx = 1.0/(PetscReal)(info->mx-1); hy = 1.0/(PetscReal)(info->my-1); sc = hx*hy*lambda; hxhy = hx*hy; detJInv = hxhy; G[0] = (1.0/(hx*hx)) * detJInv; G[1] = 0.0; G[2] = G[1]; G[3] = (1.0/(hy*hy)) * detJInv; for (k = 0; k < 4; k++) { /* printf("G[%d] = %g\n", k, G[k]);*/ } ierr = MatZeroEntries(jac);CHKERRQ(ierr); /* Compute entries for the locally owned part of the Jacobian. - Currently, all PETSc parallel matrix formats are partitioned by contiguous chunks of rows across the processors. - Each processor needs to insert only elements that it owns locally (but any non-local elements will be sent to the appropriate processor during matrix assembly). - Here, we set all entries for a particular row at once. - We can set matrix entries either using either MatSetValuesLocal() or MatSetValues(), as discussed above. */ #define NOT_PRES_BC 1 for (j=info->ys; j<info->ys+info->ym-1; j++) { for (i=info->xs; i<info->xs+info->xm-1; i++) { ierr = PetscMemzero(JLocal, 144 * sizeof(PetscScalar));CHKERRQ(ierr); numRows = 0; /* Lower element */ uLocal[0] = x[j][i]; uLocal[1] = x[j][i+1]; uLocal[2] = x[j+1][i+1]; uLocal[3] = x[j+1][i]; /* i,j */ if (i == 0 || j == 0) { hasLower[0] = 0; ierr = MatAssemblyBegin(jac,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(jac,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); ident.i = i; ident.j = j; ident.c = 0; ierr = MatSetValuesStencil(jac,1,&ident,1,&ident,&one,INSERT_VALUES);CHKERRQ(ierr); ident.i = i; ident.j = j; ident.c = 1; ierr = MatSetValuesStencil(jac,1,&ident,1,&ident,&one,INSERT_VALUES);CHKERRQ(ierr); #if defined(PRES_BC) ident.i = i; ident.j = j; ident.c = 2; ierr = MatSetValuesStencil(jac,1,&ident,1,&ident,&one,INSERT_VALUES);CHKERRQ(ierr); #endif ierr = MatAssemblyBegin(jac,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(jac,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); } else { hasLower[0] = 1; velocityRows[0] = numRows; rows[numRows].i = i; rows[numRows].j = j; rows[numRows].c = 0; numRows++; rows[numRows].i = i; rows[numRows].j = j; rows[numRows].c = 1; numRows++; #if defined(PRES_BC) pressureRows[0] = numRows; rows[numRows].i = i; rows[numRows].j = j; rows[numRows].c = 2; numRows++; #endif } #if !defined(PRES_BC) pressureRows[0] = numRows; rows[numRows].i = i; rows[numRows].j = j; rows[numRows].c = 2; numRows++; #endif cols[0*dof+0].i = i; cols[0*dof+0].j = j; cols[0*dof+0].c = 0; cols[0*dof+1].i = i; cols[0*dof+1].j = j; cols[0*dof+1].c = 1; cols[0*dof+2].i = i; cols[0*dof+2].j = j; cols[0*dof+2].c = 2; /* i+1,j */ if ((i == info->mx-2) || (j == 0)) { hasLower[1] = 0; hasUpper[2] = 0; ierr = MatAssemblyBegin(jac,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(jac,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); ident.i = i+1; ident.j = j; ident.c = 0; ierr = MatSetValuesStencil(jac,1,&ident,1,&ident,&one,INSERT_VALUES);CHKERRQ(ierr); ident.i = i+1; ident.j = j; ident.c = 1; ierr = MatSetValuesStencil(jac,1,&ident,1,&ident,&one,INSERT_VALUES);CHKERRQ(ierr); #if defined(PRES_BC) ident.i = i+1; ident.j = j; ident.c = 2; ierr = MatSetValuesStencil(jac,1,&ident,1,&ident,&one,INSERT_VALUES);CHKERRQ(ierr); #endif ierr = MatAssemblyBegin(jac,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(jac,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); } else { hasLower[1] = 1; hasUpper[2] = 1; velocityRows[1] = numRows; rows[numRows].i = i+1; rows[numRows].j = j; rows[numRows].c = 0; numRows++; rows[numRows].i = i+1; rows[numRows].j = j; rows[numRows].c = 1; numRows++; #if defined(PRES_BC) pressureRows[1] = numRows; rows[numRows].i = i+1; rows[numRows].j = j; rows[numRows].c = 2; numRows++; #endif } #if !defined(PRES_BC) pressureRows[1] = numRows; rows[numRows].i = i+1; rows[numRows].j = j; rows[numRows].c = 2; numRows++; #endif cols[1*dof+0].i = i+1; cols[1*dof+0].j = j; cols[1*dof+0].c = 0; cols[1*dof+1].i = i+1; cols[1*dof+1].j = j; cols[1*dof+1].c = 1; cols[1*dof+2].i = i+1; cols[1*dof+2].j = j; cols[1*dof+2].c = 2; /* i+1,j+1 */ if ((i == info->mx-2) || (j == info->my-2)) { hasUpper[0] = 0; ierr = MatAssemblyBegin(jac,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(jac,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); ident.i = i+1; ident.j = j+1; ident.c = 0; ierr = MatSetValuesStencil(jac,1,&ident,1,&ident,&one,INSERT_VALUES);CHKERRQ(ierr); ident.i = i+1; ident.j = j+1; ident.c = 1; ierr = MatSetValuesStencil(jac,1,&ident,1,&ident,&one,INSERT_VALUES);CHKERRQ(ierr); #if defined(PRES_BC) ident.i = i+1; ident.j = j+1; ident.c = 2; ierr = MatSetValuesStencil(jac,1,&ident,1,&ident,&one,INSERT_VALUES);CHKERRQ(ierr); #endif ierr = MatAssemblyBegin(jac,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(jac,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); } else { hasUpper[0] = 1; velocityRows[2] = numRows; rows[numRows].i = i+1; rows[numRows].j = j+1; rows[numRows].c = 0; numRows++; rows[numRows].i = i+1; rows[numRows].j = j+1; rows[numRows].c = 1; numRows++; #if defined(PRES_BC) pressureRows[2] = numRows; rows[numRows].i = i+1; rows[numRows].j = j+1; rows[numRows].c = 2; numRows++; #endif } #if !defined(PRES_BC) pressureRows[2] = numRows; rows[numRows].i = i+1; rows[numRows].j = j+1; rows[numRows].c = 2; numRows++; #endif cols[2*dof+0].i = i+1; cols[2*dof+0].j = j+1; cols[2*dof+0].c = 0; cols[2*dof+1].i = i+1; cols[2*dof+1].j = j+1; cols[2*dof+1].c = 1; cols[2*dof+2].i = i+1; cols[2*dof+2].j = j+1; cols[2*dof+2].c = 2; /* i,j+1 */ if ((i == 0) || (j == info->my-2)) { hasLower[2] = 0; hasUpper[1] = 0; ierr = MatAssemblyBegin(jac,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(jac,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); ident.i = i; ident.j = j+1; ident.c = 0; ierr = MatSetValuesStencil(jac,1,&ident,1,&ident,&one,INSERT_VALUES);CHKERRQ(ierr); ident.i = i; ident.j = j+1; ident.c = 1; ierr = MatSetValuesStencil(jac,1,&ident,1,&ident,&one,INSERT_VALUES);CHKERRQ(ierr); #if defined(PRES_BC) ident.i = i; ident.j = j+1; ident.c = 2; ierr = MatSetValuesStencil(jac,1,&ident,1,&ident,&one,INSERT_VALUES);CHKERRQ(ierr); #endif ierr = MatAssemblyBegin(jac,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(jac,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); } else { hasLower[2] = 1; hasUpper[1] = 1; velocityRows[3] = numRows; rows[numRows].i = i; rows[numRows].j = j+1; rows[numRows].c = 0; numRows++; rows[numRows].i = i; rows[numRows].j = j+1; rows[numRows].c = 1; numRows++; #if defined(PRES_BC) pressureRows[3] = numRows; rows[numRows].i = i; rows[numRows].j = j+1; rows[numRows].c = 2; numRows++; #endif } #if !defined(PRES_BC) pressureRows[3] = numRows; rows[numRows].i = i; rows[numRows].j = j+1; rows[numRows].c = 2; numRows++; #endif cols[3*dof+0].i = i; cols[3*dof+0].j = j+1; cols[3*dof+0].c = 0; cols[3*dof+1].i = i; cols[3*dof+1].j = j+1; cols[3*dof+1].c = 1; cols[3*dof+2].i = i; cols[3*dof+2].j = j+1; cols[3*dof+2].c = 2; /* Lower Element */ for (k = 0; k < 3; k++) { #if defined(PRES_BC) if (!hasLower[k]) continue; #endif for (l = 0; l < 3; l++) { /* Divergence */ JLocal[pressureRows[lowerRow[k]]*dof*4 + lowerRow[l]*dof+0] += hx*Divergence[(k*2+0)*3 + l]; JLocal[pressureRows[lowerRow[k]]*dof*4 + lowerRow[l]*dof+1] += hy*Divergence[(k*2+1)*3 + l]; /* JLocal[pressureRows[lowerRow[k]]*dof*4 + lowerRow[l]*dof+2] += Identity[k*3 + l]; */ } if (!hasLower[k]) continue; for (l = 0; l < 3; l++) { /* Laplacian */ JLocal[velocityRows[lowerRow[k]]*dof*4 + lowerRow[l]*dof+0] += alpha*(G[0]*Kref[(k*2*3 + l)*2]+G[1]*Kref[(k*2*3 + l)*2+1]+G[2]*Kref[((k*2+1)*3 + l)*2]+G[3]*Kref[((k*2+1)*3 + l)*2+1]); JLocal[(velocityRows[lowerRow[k]]+1)*dof*4 + lowerRow[l]*dof+1] += alpha*(G[0]*Kref[(k*2*3 + l)*2]+G[1]*Kref[(k*2*3 + l)*2+1]+G[2]*Kref[((k*2+1)*3 + l)*2]+G[3]*Kref[((k*2+1)*3 + l)*2+1]); /* JLocal[velocityRows[lowerRow[k]]*dof*4 + lowerRow[l]*dof+0] += Identity[k*3 + l]; */ /* JLocal[(velocityRows[lowerRow[k]]+1)*dof*4 + lowerRow[l]*dof+1] += Identity[k*3 + l]; */ /* Gradient */ JLocal[velocityRows[lowerRow[k]]*dof*4 + lowerRow[l]*dof+2] += hx*Gradient[(k*2+0)*3 + l]; JLocal[(velocityRows[lowerRow[k]]+1)*dof*4 + lowerRow[l]*dof+2] += hy*Gradient[(k*2+1)*3 + l]; } } /* Upper Element */ for (k = 0; k < 3; k++) { #if defined(PRES_BC) if (!hasUpper[k]) continue; #endif for (l = 0; l < 3; l++) { /* Divergence */ JLocal[pressureRows[upperRow[k]]*dof*4 + upperRow[l]*dof+0] += hx*Divergence[(k*2+0)*3 + l]; JLocal[pressureRows[upperRow[k]]*dof*4 + upperRow[l]*dof+1] += hy*Divergence[(k*2+1)*3 + l]; /* JLocal[pressureRows[upperRow[k]]*dof*4 + upperRow[l]*dof+2] += Identity[k*3 + l]; */ } if (!hasUpper[k]) continue; for (l = 0; l < 3; l++) { /* Laplacian */ JLocal[velocityRows[upperRow[k]]*dof*4 + upperRow[l]*dof+0] += alpha*(G[0]*Kref[(k*2*3 + l)*2]+G[1]*Kref[(k*2*3 + l)*2+1]+G[2]*Kref[((k*2+1)*3 + l)*2]+G[3]*Kref[((k*2+1)*3 + l)*2+1]); JLocal[(velocityRows[upperRow[k]]+1)*dof*4 + upperRow[l]*dof+1] += alpha*(G[0]*Kref[(k*2*3 + l)*2]+G[1]*Kref[(k*2*3 + l)*2+1]+G[2]*Kref[((k*2+1)*3 + l)*2]+G[3]*Kref[((k*2+1)*3 + l)*2+1]); /* Gradient */ JLocal[velocityRows[upperRow[k]]*dof*4 + upperRow[l]*dof+2] += hx*Gradient[(k*2+0)*3 + l]; JLocal[(velocityRows[upperRow[k]]+1)*dof*4 + upperRow[l]*dof+2] += hy*Gradient[(k*2+1)*3 + l]; } } ierr = nonlinearJacobian(-1.0*PetscAbsScalar(sc), uLocal, JLocal);CHKERRQ(ierr); /* printf("Element matrix for (%d, %d)\n", i, j);*/ /* printf(" col ");*/ for (l = 0; l < 4*3; l++) { /* printf("(%d, %d, %d) ", cols[l].i, cols[l].j, cols[l].c);*/ } /* printf("\n");*/ for (k = 0; k < numRows; k++) { /* printf("row (%d, %d, %d): ", rows[k].i, rows[k].j, rows[k].c);*/ for (l = 0; l < 4; l++) { /* printf("%9.6g %9.6g %9.6g ", JLocal[k*dof*4 + l*dof+0], JLocal[k*dof*4 + l*dof+1], JLocal[k*dof*4 + l*dof+2]);*/ } /* printf("\n");*/ } ierr = MatSetValuesStencil(jac,numRows,rows,4*dof,cols, JLocal,ADD_VALUES);CHKERRQ(ierr); } } /* Assemble matrix, using the 2-step process: MatAssemblyBegin(), MatAssemblyEnd(). */ ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (A != jac) { ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } /* Tell the matrix we will never add a new nonzero location to the matrix. If we do, it will generate an error. */ ierr = MatSetOption(jac,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); ierr = CreateNullSpace(info->da,&N);CHKERRQ(ierr); ierr = MatNullSpaceCreate(PETSC_COMM_WORLD,PETSC_FALSE,1,&N,&nullspace);CHKERRQ(ierr); ierr = VecDestroy(&N);CHKERRQ(ierr); ierr = MatSetNullSpace(jac,nullspace);CHKERRQ(ierr); ierr = MatNullSpaceDestroy(&nullspace);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatGetMultiProcBlock_MPIBAIJ(Mat mat, MPI_Comm subComm, MatReuse scall,Mat *subMat) { PetscErrorCode ierr; Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; Mat_SeqBAIJ *aijB = (Mat_SeqBAIJ*)aij->B->data; PetscMPIInt commRank,subCommSize,subCommRank; PetscMPIInt *commRankMap,subRank,rank,commsize; PetscInt *garrayCMap,col,i,j,*nnz,newRow,newCol,*newbRow,*newbCol,k,k1; PetscInt bs=mat->rmap->bs; PetscScalar *vals,*aijBvals; PetscFunctionBegin; ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&commsize);CHKERRQ(ierr); ierr = MPI_Comm_size(subComm,&subCommSize);CHKERRQ(ierr); /* create subMat object with the relavent layout */ if (scall == MAT_INITIAL_MATRIX) { ierr = MatCreate(subComm,subMat);CHKERRQ(ierr); ierr = MatSetType(*subMat,MATMPIBAIJ);CHKERRQ(ierr); ierr = MatSetSizes(*subMat,mat->rmap->n,mat->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); ierr = MatSetBlockSizes(*subMat,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); /* need to setup rmap and cmap before Preallocation */ ierr = PetscLayoutSetBlockSize((*subMat)->rmap,mat->rmap->bs);CHKERRQ(ierr); ierr = PetscLayoutSetBlockSize((*subMat)->cmap,mat->cmap->bs);CHKERRQ(ierr); ierr = PetscLayoutSetUp((*subMat)->rmap);CHKERRQ(ierr); ierr = PetscLayoutSetUp((*subMat)->cmap);CHKERRQ(ierr); } /* create a map of comm_rank from subComm to comm - should commRankMap and garrayCMap be kept for reused? */ ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&commRank);CHKERRQ(ierr); ierr = MPI_Comm_rank(subComm,&subCommRank);CHKERRQ(ierr); ierr = PetscMalloc1(subCommSize,&commRankMap);CHKERRQ(ierr); ierr = MPI_Allgather(&commRank,1,MPI_INT,commRankMap,1,MPI_INT,subComm);CHKERRQ(ierr); /* Traverse garray and identify blocked column indices [of offdiag mat] that should be discarded. For the ones not discarded, store the newCol+1 value in garrayCMap */ ierr = PetscMalloc1(aij->B->cmap->n/bs,&garrayCMap);CHKERRQ(ierr); ierr = PetscMemzero(garrayCMap,aij->B->cmap->n/bs*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i<aij->B->cmap->n/bs; i++) { col = aij->garray[i]; /* blocked column index */ for (subRank=0; subRank<subCommSize; subRank++) { rank = commRankMap[subRank]; if ((col >= mat->cmap->range[rank]/bs) && (col < mat->cmap->range[rank+1]/bs)) { garrayCMap[i] = (((*subMat)->cmap->range[subRank]- mat->cmap->range[rank])/bs + col + 1); break; } } } if (scall == MAT_INITIAL_MATRIX) { /* Now compute preallocation for the offdiag mat */ ierr = PetscMalloc1(aij->B->rmap->n/bs,&nnz);CHKERRQ(ierr); ierr = PetscMemzero(nnz,aij->B->rmap->n/bs*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i<aij->B->rmap->n/bs; i++) { for (j=aijB->i[i]; j<aijB->i[i+1]; j++) { if (garrayCMap[aijB->j[j]]) nnz[i]++; } } ierr = MatMPIBAIJSetPreallocation(*(subMat),bs,0,NULL,0,nnz);CHKERRQ(ierr); /* reuse diag block with the new submat */ ierr = MatDestroy(&((Mat_MPIBAIJ*)((*subMat)->data))->A);CHKERRQ(ierr); ((Mat_MPIBAIJ*)((*subMat)->data))->A = aij->A; ierr = PetscObjectReference((PetscObject)aij->A);CHKERRQ(ierr); } else if (((Mat_MPIBAIJ*)(*subMat)->data)->A != aij->A) { PetscObject obj = (PetscObject)((Mat_MPIBAIJ*)((*subMat)->data))->A; ierr = PetscObjectReference((PetscObject)obj);CHKERRQ(ierr); ((Mat_MPIBAIJ*)((*subMat)->data))->A = aij->A; ierr = PetscObjectReference((PetscObject)aij->A);CHKERRQ(ierr); } /* Now traverse aij->B and insert values into subMat */ ierr = PetscMalloc3(bs,&newbRow,bs,&newbCol,bs*bs,&vals);CHKERRQ(ierr); for (i=0; i<aij->B->rmap->n/bs; i++) { newRow = (*subMat)->rmap->range[subCommRank] + i*bs; for (j=aijB->i[i]; j<aijB->i[i+1]; j++) { newCol = garrayCMap[aijB->j[j]]; if (newCol) { newCol--; /* remove the increment */ newCol *= bs; for (k=0; k<bs; k++) { newbRow[k] = newRow + k; newbCol[k] = newCol + k; } /* copy column-oriented aijB->a into row-oriented vals */ aijBvals = aijB->a + j*bs*bs; for (k1=0; k1<bs; k1++) { for (k=0; k<bs; k++) { vals[k1+k*bs] = *aijBvals++; } } ierr = MatSetValues(*subMat,bs,newbRow,bs,newbCol,vals,INSERT_VALUES);CHKERRQ(ierr); } } } ierr = MatAssemblyBegin(*subMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(*subMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* deallocate temporary data */ ierr = PetscFree3(newbRow,newbCol,vals);CHKERRQ(ierr); ierr = PetscFree(commRankMap);CHKERRQ(ierr); ierr = PetscFree(garrayCMap);CHKERRQ(ierr); if (scall == MAT_INITIAL_MATRIX) { ierr = PetscFree(nnz);CHKERRQ(ierr); } PetscFunctionReturn(0); }
Matrix C is copied from ~petsc/src/ksp/ksp/examples/tutorials/ex5.c\n\n"; /* Example: ./ex132 -mat_view_info */ #include <petscmat.h> #undef __FUNCT__ #define __FUNCT__ "main" int main(int argc,char **args) { Mat C,C1,C2; /* matrix */ PetscScalar v; PetscInt Ii,J,Istart,Iend; PetscErrorCode ierr; PetscInt i,j,m = 3,n = 2; PetscMPIInt size,rank; PetscBool mat_nonsymmetric = PETSC_FALSE; MatInfo info; PetscInitialize(&argc,&args,(char *)0,help); ierr = PetscOptionsGetInt(PETSC_NULL,"-m",&m,PETSC_NULL);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); n = 2*size; /* Set flag if we are doing a nonsymmetric problem; the default is symmetric. */ ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_nonsym",&mat_nonsymmetric,PETSC_NULL);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&C);CHKERRQ(ierr); ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);CHKERRQ(ierr); ierr = MatSetFromOptions(C);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(C,5,PETSC_NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRange(C,&Istart,&Iend);CHKERRQ(ierr); for (Ii=Istart; Ii<Iend; Ii++) { v = -1.0; i = Ii/n; j = Ii - i*n; if (i>0) {J = Ii - n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} if (i<m-1) {J = Ii + n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} if (j>0) {J = Ii - 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} if (j<n-1) {J = Ii + 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} v = 4.0; ierr = MatSetValues(C,1,&Ii,1,&Ii,&v,ADD_VALUES); } /* Make the matrix nonsymmetric if desired */ if (mat_nonsymmetric) { for (Ii=Istart; Ii<Iend; Ii++) { v = -1.5; i = Ii/n; if (i>1) {J = Ii-n-1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} } } else { ierr = MatSetOption(C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); ierr = MatSetOption(C,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);CHKERRQ(ierr); } ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* First, create C1 = 2.0*C1 + C, C1 has less non-zeros than C */ ierr = PetscPrintf(PETSC_COMM_WORLD, "\ncreate C1 = 2.0*C1 + C, C1 has less non-zeros than C \n"); ierr = MatCreate(PETSC_COMM_WORLD,&C1);CHKERRQ(ierr); ierr = MatSetSizes(C1,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);CHKERRQ(ierr); ierr = MatSetFromOptions(C1);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(C1,1,PETSC_NULL);CHKERRQ(ierr); for (Ii=Istart; Ii<Iend; Ii++) { ierr = MatSetValues(C1,1,&Ii,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(C1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD," MatAXPY(C1,2.0,C,DIFFERENT_NONZERO_PATTERN)...\n"); ierr = MatAXPY(C1,2.0,C,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatGetInfo(C1,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD," C1: nz_allocated = %g; nz_used = %g; nz_unneeded = %g\n",info.nz_allocated,info.nz_used, info.nz_unneeded); /* Secondly, create C2 = 2.0*C2 + C, C2 has non-zero pattern of C2 + C */ ierr = PetscPrintf(PETSC_COMM_WORLD, "\ncreate C2 = 2.0*C2 + C, C2 has non-zero pattern of C2 + C \n"); ierr = MatDuplicate(C,MAT_DO_NOT_COPY_VALUES,&C2);CHKERRQ(ierr); /* ierr = MatCreate(PETSC_COMM_WORLD,&C2);CHKERRQ(ierr); ierr = MatSetSizes(C2,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);CHKERRQ(ierr); ierr = MatSetFromOptions(C2);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(C2,5,PETSC_NULL);CHKERRQ(ierr); */ for (Ii=Istart; Ii<Iend; Ii++) { v = 1.0; ierr = MatSetValues(C2,1,&Ii,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(C2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); printf(" MatAXPY(C2,2.0,C,SUBSET_NONZERO_PATTERN)...\n"); ierr = MatAXPY(C2,2.0,C,SUBSET_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatGetInfo(C2,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD," C2: nz_allocated = %g; nz_used = %g; nz_unneeded = %g\n",info.nz_allocated,info.nz_used, info.nz_unneeded); ierr = MatDestroy(&C1);CHKERRQ(ierr); ierr = MatDestroy(&C2);CHKERRQ(ierr); ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
/* Initialize M-Matrix with appropriate stencil */ PetscErrorCode InitializeLaplaceMatrix(DMMG dmmg,Mat J,Mat M) { State *BHD; PData PD; DA da; int x[3], n[3], i[3], N[3], dim, border; MatStencil col[3],row; PetscScalar v[3], vb=1.0; real h[3], L, zpad; PetscPrintf(PETSC_COMM_WORLD,"Assembling Matrix..."); BHD = (State*) dmmg->user; da = (DA)dmmg->dm; PD = BHD->PD; MatZeroEntries(M); L= PD->interval[1]-PD->interval[0]; zpad = BHD->zpad; DAGetInfo(da,0,&(N[0]),&(N[1]),&(N[2]),0,0,0,0,0,0,0); PetscPrintf(PETSC_COMM_WORLD,"%d %d %d \n", N[0], N[1], N[2]); FOR_DIM h[dim] = L/N[dim]; border = (int) ceil( ((PD->interval[1]-PD->interval[0])-(2.*zpad))/h[0]/2. ); /* Get local portion of the grid */ DAGetCorners(da, &(x[0]), &(x[1]), &(x[2]), &(n[0]), &(n[1]), &(n[2])); /* loop over local portion of grid */ for(i[2]=x[2]; i[2]<x[2]+n[2]; i[2]++) for(i[1]=x[1]; i[1]<x[1]+n[1]; i[1]++) for(i[0]=x[0]; i[0]<x[0]+n[0]; i[0]++) { FOR_DIM { col[dim].i=i[0]; col[dim].j=i[1]; col[dim].k=i[2]; row.i=i[0]; row.j=i[1]; row.k=i[2]; } /* Boundary */ if( i[0] <= border+1 || i[1] <= border+1 || i[2] <= border+1) MatSetValuesStencil(M,1,&row,1,col+1,&vb,ADD_VALUES); else if( i[0]>=N[0]-1-border || i[1]>=N[1]-1-border || i[2]>=N[2]-1-border) MatSetValuesStencil(M,1,&row,1,col+1,&vb,ADD_VALUES); else { FOR_DIM { /* position in matrix */ switch(dim) { case 0: col[0].i -= 1; col[2].i += 1;break; case 1: col[0].j -= 1; col[2].j += 1;break; case 2: col[0].k -= 1; col[2].k += 1;break; } /* values to enter */ v[0]=1.0/SQR(h[dim]); v[1]=-2.0/SQR(h[dim]); v[2]=+1.0/SQR(h[dim]); MatSetValuesStencil(M,1,&row,3,col,v,ADD_VALUES); switch(dim) { case 0: col[0].i += 1; col[2].i -= 1;break; case 1: col[0].j += 1; col[2].j -= 1;break; case 2: col[0].k += 1; col[2].k -= 1;break; } } } } MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); PetscPrintf(PETSC_COMM_WORLD,"done.\n"); return 0; }
void NRSolver :: applyConstraintsToStiffness(SparseMtrx *k) { if ( this->smConstraintVersion == k->giveVersion() ) { return; } #if 0 #ifdef __PETSC_MODULE if ( solverType == ST_Petsc ) { PetscScalar diagVal = 1.0; if ( k->giveType() != SMT_PetscMtrx ) { OOFEM_ERROR("NRSolver :: applyConstraintsToStiffness: PetscSparseMtrx Expected"); } PetscSparseMtrx *lhs = ( PetscSparseMtrx * ) k; if ( !prescribedEgsIS_defined ) { IntArray eqs; #ifdef __PARALLEL_MODE Natural2GlobalOrdering *n2lpm = engngModel->givePetscContext(1)->giveN2Gmap(); int s = prescribedEqs.giveSize(); eqs.resize(s); for ( int i = 1; i <= s; i++ ) { eqs.at(i) = n2lpm->giveNewEq( prescribedEqs.at(i) ); } ISCreateGeneral(PETSC_COMM_WORLD, s, eqs.givePointer(), & prescribedEgsIS); //ISView(prescribedEgsIS,PETSC_VIEWER_STDOUT_WORLD); #else eqs.resize(numberOfPrescribedDofs); for ( int i = 1; i <= numberOfPrescribedDofs; i++ ) { eqs.at(i) = prescribedEqs.at(i) - 1; } ISCreateGeneral(PETSC_COMM_SELF, numberOfPrescribedDofs, eqs.givePointer(), & prescribedEgsIS); //ISView(prescribedEgsIS,PETSC_VIEWER_STDOUT_SELF); #endif prescribedEgsIS_defined = true; } //MatView(*(lhs->giveMtrx()),PETSC_VIEWER_STDOUT_WORLD); MatZeroRows(* ( lhs->giveMtrx() ), prescribedEgsIS, & diagVal); //MatView(*(lhs->giveMtrx()),PETSC_VIEWER_STDOUT_WORLD); if ( numberOfPrescribedDofs ) { this->smConstraintVersion = k->giveVersion(); } return; } #endif // __PETSC_MODULE #else #ifdef __PETSC_MODULE if ( solverType == ST_Petsc ) { if ( k->giveType() != SMT_PetscMtrx ) { OOFEM_ERROR("NRSolver :: applyConstraintsToStiffness: PetscSparseMtrx Expected"); } PetscSparseMtrx *lhs = ( PetscSparseMtrx * ) k; Vec diag; PetscScalar *ptr; int eq; PetscContext *parallel_context = engngModel->givePetscContext(this->domain->giveNumber()); parallel_context->createVecGlobal(& diag); MatGetDiagonal(* lhs->giveMtrx(), diag); VecGetArray(diag, & ptr); for ( int i = 1; i <= numberOfPrescribedDofs; i++ ) { eq = prescribedEqs.at(i) - 1; MatSetValue(* ( lhs->giveMtrx() ), eq, eq, ptr [ eq ] * 1.e6, INSERT_VALUES); } MatAssemblyBegin(* lhs->giveMtrx(), MAT_FINAL_ASSEMBLY); MatAssemblyEnd(* lhs->giveMtrx(), MAT_FINAL_ASSEMBLY); VecRestoreArray(diag, & ptr); VecDestroy(&diag); if ( numberOfPrescribedDofs ) { this->smConstraintVersion = k->giveVersion(); } return; } #endif // __PETSC_MODULE #endif for ( int i = 1; i <= numberOfPrescribedDofs; i++ ) { k->at( prescribedEqs.at(i), prescribedEqs.at(i) ) *= 1.e6; } if ( numberOfPrescribedDofs ) { this->smConstraintVersion = k->giveVersion(); } }
int main(int argc, char * argv[]) { typedef MPI::HGeometryForest<DIM,DOW> forest_t; typedef MPI::BirdView<forest_t> ir_mesh_t; typedef FEMSpace<double,DIM,DOW> fe_space_t; typedef MPI::DOF::GlobalIndex<forest_t, fe_space_t> global_index_t; PetscInitialize(&argc, &argv, (char *)NULL, help); forest_t forest(PETSC_COMM_WORLD); forest.readMesh(argv[1]); ir_mesh_t ir_mesh(forest); int round = 0; if (argc >= 3) round = atoi(argv[2]); ir_mesh.globalRefine(round); ir_mesh.semiregularize(); ir_mesh.regularize(false); setenv("AFEPACK_TEMPLATE_PATH", "/usr/local/AFEPack/template/triangle", 1); TemplateGeometry<DIM> tri; tri.readData("triangle.tmp_geo"); CoordTransform<DIM,DIM> tri_ct; tri_ct.readData("triangle.crd_trs"); TemplateDOF<DIM> tri_td(tri); tri_td.readData("triangle.1.tmp_dof"); BasisFunctionAdmin<double,DIM,DIM> tri_bf(tri_td); tri_bf.readData("triangle.1.bas_fun"); std::vector<TemplateElement<double,DIM,DIM> > tmp_ele(1); tmp_ele[0].reinit(tri, tri_td, tri_ct, tri_bf); RegularMesh<DIM,DOW>& mesh = ir_mesh.regularMesh(); fe_space_t fem_space(mesh, tmp_ele); u_int n_ele = mesh.n_geometry(DIM); fem_space.element().resize(n_ele); for (int i = 0;i < n_ele;i ++) { fem_space.element(i).reinit(fem_space, i, 0); } fem_space.buildElement(); fem_space.buildDof(); fem_space.buildDofBoundaryMark(); std::cout << "Building global indices ... " << std::flush; global_index_t global_index(forest, fem_space); global_index.build(); std::cout << "OK!" << std::endl; std::cout << "Building the linear system ... " << std::flush; Mat A; Vec x, b; MatCreateMPIAIJ(PETSC_COMM_WORLD, global_index.n_primary_dof(), global_index.n_primary_dof(), PETSC_DECIDE, PETSC_DECIDE, 0, PETSC_NULL, 0, PETSC_NULL, &A); VecCreateMPI(PETSC_COMM_WORLD, global_index.n_primary_dof(), PETSC_DECIDE, &b); fe_space_t::ElementIterator the_ele = fem_space.beginElement(), end_ele = fem_space.endElement(); for (;the_ele != end_ele;++ the_ele) { double vol = the_ele->templateElement().volume(); const QuadratureInfo<DIM>& qi = the_ele->findQuadratureInfo(5); std::vector<Point<DIM> > q_pnt = the_ele->local_to_global(qi.quadraturePoint()); int n_q_pnt = qi.n_quadraturePoint(); std::vector<double> jac = the_ele->local_to_global_jacobian(qi.quadraturePoint()); std::vector<std::vector<double> > bas_val = the_ele->basis_function_value(q_pnt); std::vector<std::vector<std::vector<double> > > bas_grad = the_ele->basis_function_gradient(q_pnt); const std::vector<int>& ele_dof = the_ele->dof(); u_int n_ele_dof = ele_dof.size(); FullMatrix<double> ele_mat(n_ele_dof, n_ele_dof); Vector<double> ele_rhs(n_ele_dof); for (u_int l = 0;l < n_q_pnt;++ l) { double JxW = vol*jac[l]*qi.weight(l); double f_val = _f_(q_pnt[l]); for (u_int i = 0;i < n_ele_dof;++ i) { for (u_int j = 0;j < n_ele_dof;++ j) { ele_mat(i, j) += JxW*(bas_val[i][l]*bas_val[j][l] + innerProduct(bas_grad[i][l], bas_grad[j][l])); } ele_rhs(i) += JxW*f_val*bas_val[i][l]; } } /** * 此处将单元矩阵和单元载荷先计算好,然后向全局的矩阵和载荷向量上 * 集中,可以提高效率。 */ std::vector<int> indices(n_ele_dof); for (u_int i = 0;i < n_ele_dof;++ i) { indices[i] = global_index(ele_dof[i]); } MatSetValues(A, n_ele_dof, &indices[0], n_ele_dof, &indices[0], &ele_mat(0,0), ADD_VALUES); VecSetValues(b, n_ele_dof, &indices[0], &ele_rhs(0), ADD_VALUES); } MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY); VecAssemblyBegin(b); MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY); VecAssemblyEnd(b); VecDuplicate(b, &x); std::cout << "OK!" << std::endl; KSP solver; KSPCreate(PETSC_COMM_WORLD, &solver); KSPSetOperators(solver, A, A, SAME_NONZERO_PATTERN); KSPSetType(solver, KSPCG); KSPSetFromOptions(solver); KSPSolve(solver, b, x); if (forest.rank() == 0) { KSPConvergedReason reason; KSPGetConvergedReason(solver,&reason); if (reason == KSP_DIVERGED_INDEFINITE_PC) { printf("\nDivergence because of indefinite preconditioner;\n"); printf("Run the executable again but with -pc_ilu_shift option.\n"); } else if (reason<0) { printf("\nOther kind of divergence: this should not happen.\n"); } else { PetscInt its; KSPGetIterationNumber(solver,&its); printf("\nConvergence in %d iterations.\n",(int)its); } printf("\n"); } MatDestroy(A); VecDestroy(b); KSPDestroy(solver); /// 准备解函数 FEMFunction<double,DIM> u_h(fem_space); Vec X; VecCreateSeqWithArray(PETSC_COMM_SELF, global_index.n_local_dof(), &u_h(0), &X); /// 将 PETSc 解出来的向量取出到有限元函数 u_h 中来 std::vector<int> primary_idx(global_index.n_primary_dof()); global_index.build_primary_index(&primary_idx[0]); IS is; ISCreateGeneralWithArray(forest.communicator(), global_index.n_local_dof(), &global_index(0), &is); VecScatter scatter; VecScatterCreate(x, is, X, PETSC_NULL, &scatter); VecScatterBegin(scatter, x, X, INSERT_VALUES, SCATTER_FORWARD); VecScatterEnd(scatter, x, X, INSERT_VALUES, SCATTER_FORWARD); /// 清理 PETSc 的变量 VecDestroy(x); VecDestroy(X); VecScatterDestroy(scatter); ISDestroy(is); char filename[1024]; sprintf(filename, "u_h%d.dx", forest.rank()); u_h.writeOpenDXData(filename); PetscFinalize(); return 0; }
PetscErrorCode FormIJacobian(TS ts,PetscReal t,Vec U,Vec Udot,PetscReal a,Mat *J,Mat *Jpre,MatStructure *str,void *ctx) { PetscErrorCode ierr; PetscInt i,rstart,rend,Mx; PetscReal hx,sx; AppCtx *user = (AppCtx*)ctx; DM da; MatStencil col[3],row; PetscInt nc; PetscScalar vals[3]; PetscFunctionBeginUser; ierr = TSGetDM(ts,&da); CHKERRQ(ierr); ierr = MatGetOwnershipRange(*Jpre,&rstart,&rend); CHKERRQ(ierr); ierr = DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE, PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE); hx = 1.0/(PetscReal)(Mx-1); sx = 1.0/(hx*hx); for (i=rstart; i<rend; i++) { nc = 0; row.i = i; if (user->boundary == 0 && (i == 0 || i == Mx-1)) { col[nc].i = i; vals[nc++] = 1.0; } else if (user->boundary > 0 && i == 0) { /* Left Neumann */ col[nc].i = i; vals[nc++] = 1.0; col[nc].i = i+1; vals[nc++] = -1.0; } else if (user->boundary > 0 && i == Mx-1) { /* Right Neumann */ col[nc].i = i-1; vals[nc++] = -1.0; col[nc].i = i; vals[nc++] = 1.0; } else { /* Interior */ col[nc].i = i-1; vals[nc++] = -1.0*sx; col[nc].i = i; vals[nc++] = 2.0*sx + a; col[nc].i = i+1; vals[nc++] = -1.0*sx; } ierr = MatSetValuesStencil(*Jpre,1,&row,nc,col,vals,INSERT_VALUES); CHKERRQ(ierr); } ierr = MatAssemblyBegin(*Jpre,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(*Jpre,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); if (*J != *Jpre) { ierr = MatAssemblyBegin(*J,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(*J,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); } if (user->viewJacobian) { ierr = PetscPrintf(PETSC_COMM_WORLD,"Jpre:\n"); CHKERRQ(ierr); ierr = MatView(*Jpre,PETSC_VIEWER_STDOUT_WORLD); CHKERRQ(ierr); } PetscFunctionReturn(0); }
/* FormJacobian - Evaluates Jacobian matrix. Input Parameters: . snes - the SNES context . x - input vector . dummy - optional user-defined context (not used here) Output Parameters: . jac - Jacobian matrix . B - optionally different preconditioning matrix . flag - flag indicating matrix structure */ PetscErrorCode FormJacobian(SNES snes,Vec x,Mat jac,Mat B,void *ctx) { ApplicationCtx *user = (ApplicationCtx*) ctx; PetscScalar *xx,d,A[3]; PetscErrorCode ierr; PetscInt i,j[3],M,xs,xm; DM da = user->da; PetscFunctionBeginUser; /* Get pointer to vector data */ ierr = DMDAVecGetArrayRead(da,x,&xx);CHKERRQ(ierr); ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); /* Get range of locally owned matrix */ ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); /* Determine starting and ending local indices for interior grid points. Set Jacobian entries for boundary points. */ if (xs == 0) { /* left boundary */ i = 0; A[0] = 1.0; ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr); xs++;xm--; } if (xs+xm == M) { /* right boundary */ i = M-1; A[0] = 1.0; ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr); xm--; } /* Interior grid points - Note that in this case we set all elements for a particular row at once. */ d = 1.0/(user->h*user->h); for (i=xs; i<xs+xm; i++) { j[0] = i - 1; j[1] = i; j[2] = i + 1; A[0] = A[2] = d; A[1] = -2.0*d + 2.0*xx[i]; ierr = MatSetValues(jac,1,&i,3,j,A,INSERT_VALUES);CHKERRQ(ierr); } /* Assemble matrix, using the 2-step process: MatAssemblyBegin(), MatAssemblyEnd(). By placing code between these two statements, computations can be done while messages are in transition. Also, restore vector. */ ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = DMDAVecRestoreArrayRead(da,x,&xx);CHKERRQ(ierr); ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* WholeMSurfHessian - Evaluates Hessian over the whole grid Input: daapplication - TAO application object da - distributed array X - the current point, at which the function and gradient are evaluated ptr - user-defined application context Output: H - Hessian at X */ static int WholeMSurfHessian(TAO_APPLICATION daapplication, DA da, Vec X, Mat H, void *ptr) { AppCtx *user = (AppCtx*)ptr; Vec localX; int info; PetscInt i, j, ind[4]; PetscInt xs, xm, gxs, gxm, xe, ys, ym, gys, gym, ye; double smallH[4][4]; double **x; double hx, hy, area, byhxhx, byhyhy; double dvdx, dvdy, flow, fup; double areadivf, areadivf3; PetscTruth assembled; hx = user->hx; hy = user->hy; area = user->area; byhxhx = 1.0 / (hx * hx); byhyhy = 1.0 / (hy * hy); info = DAGetLocalVector(da, &localX); CHKERRQ(info); info = MatAssembled(H,&assembled); CHKERRQ(info); if (assembled){info = MatZeroEntries(H); CHKERRQ(info);} info = DAGlobalToLocalBegin(da, X, INSERT_VALUES, localX); CHKERRQ(info); info = DAGlobalToLocalEnd(da, X, INSERT_VALUES, localX); CHKERRQ(info); info = DAVecGetArray(da, localX, (void**)&x); CHKERRQ(info); info = DAGetCorners(da, &xs, &ys, TAO_NULL, &xm, &ym, TAO_NULL); CHKERRQ(info); info = DAGetGhostCorners(da, &gxs, &gys, TAO_NULL, &gxm, &gym, TAO_NULL); CHKERRQ(info); xe = gxs + gxm - 1; ye = gys + gym - 1; for (j = ys; j < ye; j++) { for (i = xs; i < xe; i++) { /* 0 is 0,0; 1 is 1,0; 2 is 0,1; 3 is 1,1 */ dvdx = (x[j][i] - x[j][i+1]) / hx; /* lower triangle contribution */ dvdy = (x[j][i] - x[j+1][i]) / hy; flow = sqrt( 1 + dvdx * dvdx + dvdy * dvdy ); dvdx = dvdx / hx; dvdy = dvdy / hy; areadivf = area / flow; areadivf3 = areadivf / (flow * flow); smallH[0][0] = areadivf * (byhxhx + byhyhy) - areadivf3 * (dvdx + dvdy) * (dvdx + dvdy); smallH[0][1] = areadivf * (-byhxhx) + areadivf3 * (dvdx + dvdy) * (dvdx); smallH[0][2] = areadivf * (-byhyhy) + areadivf3 * (dvdx + dvdy) * (dvdy); smallH[0][3] = 0.0; smallH[1][1] = areadivf * byhxhx - areadivf3 * dvdx * dvdx; smallH[1][2] = areadivf3 * (-dvdx) * dvdy; smallH[2][2] = areadivf * byhyhy - areadivf3 * dvdy * dvdy; /* upper triangle contribution */ dvdx = (x[j+1][i+1] - x[j+1][i]) / hx; dvdy = (x[j+1][i+1] - x[j][i+1]) / hy; fup = sqrt( 1 + dvdx * dvdx + dvdy * dvdy ); dvdx = dvdx / hx; dvdy = dvdy / hy; areadivf = area / fup; areadivf3 = areadivf / (fup * fup); smallH[1][1] += areadivf * byhyhy - areadivf3 * dvdy * dvdy; smallH[1][2] += areadivf3 * (-dvdy) * dvdx; smallH[2][2] += areadivf * byhxhx - areadivf3 * (dvdx * dvdx); smallH[1][3] = areadivf * (-byhyhy) + areadivf3 * (dvdx + dvdy) * dvdy; smallH[2][3] = areadivf * (-byhxhx) + areadivf3 * (dvdx + dvdy) * dvdx; smallH[3][3] = areadivf * (byhxhx + byhyhy) - areadivf3 * (dvdx + dvdy) * (dvdx + dvdy); smallH[1][0] = smallH[0][1]; smallH[2][0] = smallH[0][2]; smallH[3][0] = smallH[0][3]; smallH[2][1] = smallH[1][2]; smallH[3][1] = smallH[1][3]; smallH[3][2] = smallH[2][3]; ind[0] = (j-gys) * gxm + (i-gxs); ind[1] = ind[0] + 1; ind[2] = ind[0] + gxm; ind[3] = ind[2] + 1; info = MatSetValuesLocal(H,4,ind,4,ind,(PetscScalar*)smallH,ADD_VALUES); CHKERRQ(info); } } info = DAVecRestoreArray(da, localX, (void**)&x); CHKERRQ(info); info = MatAssemblyBegin(H, MAT_FINAL_ASSEMBLY); CHKERRQ(info); info = MatAssemblyEnd(H, MAT_FINAL_ASSEMBLY); CHKERRQ(info); info = MatSetOption(H, MAT_SYMMETRIC, PETSC_TRUE); CHKERRQ(info); info = DARestoreLocalVector(da, &localX); CHKERRQ(info); info = PetscLogFlops((xe-xs) * (ye-ys) * 83 + 4); CHKERRQ(info); return 0; } /* WholeMSurfHessian */
int main(int argc,char **args) { Mat C; PetscScalar v,none = -1.0; PetscInt i,j,Ii,J,Istart,Iend,N,m = 4,n = 4,its,k; PetscErrorCode ierr; PetscMPIInt size,rank; PetscReal err_norm,res_norm,err_tol=1.e-7,res_tol=1.e-6; Vec x,b,u,u_tmp; PetscRandom r; PC pc; KSP ksp; PetscInitialize(&argc,&args,(char *)0,help); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-m",&m,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-n",&n,PETSC_NULL);CHKERRQ(ierr); N = m*n; /* Generate matrix */ ierr = MatCreate(PETSC_COMM_WORLD,&C);CHKERRQ(ierr); ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,N,N);CHKERRQ(ierr); ierr = MatSetFromOptions(C);CHKERRQ(ierr); ierr = MatSetUp(C);CHKERRQ(ierr); ierr = MatGetOwnershipRange(C,&Istart,&Iend);CHKERRQ(ierr); for (Ii=Istart; Ii<Iend; Ii++) { v = -1.0; i = Ii/n; j = Ii - i*n; if (i>0) {J = Ii - n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} if (i<m-1) {J = Ii + n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} if (j>0) {J = Ii - 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} if (j<n-1) {J = Ii + 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} v = 4.0; ierr = MatSetValues(C,1,&Ii,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* a shift can make C indefinite. Preconditioners LU, ILU (for BAIJ format) and ICC may fail */ /* ierr = MatShift(C,alpha);CHKERRQ(ierr); */ /* ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */ /* Setup and solve for system */ /* Create vectors. */ ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,PETSC_DECIDE,N);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); ierr = VecDuplicate(x,&b);CHKERRQ(ierr); ierr = VecDuplicate(x,&u);CHKERRQ(ierr); ierr = VecDuplicate(x,&u_tmp);CHKERRQ(ierr); /* Set exact solution u; then compute right-hand-side vector b. */ ierr = PetscRandomCreate(PETSC_COMM_SELF,&r);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(r);CHKERRQ(ierr); ierr = VecSetRandom(u,r);CHKERRQ(ierr); ierr = PetscRandomDestroy(&r);CHKERRQ(ierr); ierr = MatMult(C,u,b);CHKERRQ(ierr); for (k=0; k<3; k++){ if (k == 0){ /* CG */ ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); ierr = KSPSetOperators(ksp,C,C,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"\n CG: \n");CHKERRQ(ierr); ierr = KSPSetType(ksp,KSPCG);CHKERRQ(ierr); } else if (k == 1){ /* MINRES */ ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); ierr = KSPSetOperators(ksp,C,C,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"\n MINRES: \n");CHKERRQ(ierr); ierr = KSPSetType(ksp,KSPMINRES);CHKERRQ(ierr); } else { /* SYMMLQ */ ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); ierr = KSPSetOperators(ksp,C,C,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"\n SYMMLQ: \n");CHKERRQ(ierr); ierr = KSPSetType(ksp,KSPSYMMLQ);CHKERRQ(ierr); } ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr); /* ierr = PCSetType(pc,PCICC);CHKERRQ(ierr); */ ierr = PCSetType(pc,PCJACOBI);CHKERRQ(ierr); ierr = KSPSetTolerances(ksp,1.e-7,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);CHKERRQ(ierr); /* Set runtime options, e.g., -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol> These options will override those specified above as long as KSPSetFromOptions() is called _after_ any other customization routines. */ ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); /* Solve linear system; */ ierr = KSPSetUp(ksp);CHKERRQ(ierr); ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr); ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr); /* Check error */ ierr = VecCopy(u,u_tmp);CHKERRQ(ierr); ierr = VecAXPY(u_tmp,none,x);CHKERRQ(ierr); ierr = VecNorm(u_tmp,NORM_2,&err_norm);CHKERRQ(ierr); ierr = MatMult(C,x,u_tmp);CHKERRQ(ierr); ierr = VecAXPY(u_tmp,none,b);CHKERRQ(ierr); ierr = VecNorm(u_tmp,NORM_2,&res_norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Number of iterations = %3D\n",its);CHKERRQ(ierr); if (res_norm > res_tol){ ierr = PetscPrintf(PETSC_COMM_WORLD,"Residual norm %G;",res_norm);CHKERRQ(ierr); } if (err_norm > err_tol){ ierr = PetscPrintf(PETSC_COMM_WORLD," Error norm %G.\n",err_norm);CHKERRQ(ierr); } ierr = KSPDestroy(&ksp);CHKERRQ(ierr); } /* Free work space. All PETSc objects should be destroyed when they are no longer needed. */ ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&u_tmp);CHKERRQ(ierr); ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
int main(int argc,char **args) { Mat C; Vec s,u,w,x,y,z; PetscErrorCode ierr; PetscInt i,j,m = 8,n,rstart,rend,vstart,vend; PetscScalar one = 1.0,negone = -1.0,v,alpha=0.1; PetscReal norm, tol = PETSC_SQRT_MACHINE_EPSILON; PetscBool flg; ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_COMMON);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr); n = m; ierr = PetscOptionsHasName(NULL,NULL,"-rectA",&flg);CHKERRQ(ierr); if (flg) n += 2; ierr = PetscOptionsHasName(NULL,NULL,"-rectB",&flg);CHKERRQ(ierr); if (flg) n -= 2; /* ---------- Assemble matrix and vectors ----------- */ ierr = MatCreate(PETSC_COMM_WORLD,&C);CHKERRQ(ierr); ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,m,n);CHKERRQ(ierr); ierr = MatSetFromOptions(C);CHKERRQ(ierr); ierr = MatSetUp(C);CHKERRQ(ierr); ierr = MatGetOwnershipRange(C,&rstart,&rend);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,PETSC_DECIDE,m);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); ierr = VecDuplicate(x,&z);CHKERRQ(ierr); ierr = VecDuplicate(x,&w);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&y);CHKERRQ(ierr); ierr = VecSetSizes(y,PETSC_DECIDE,n);CHKERRQ(ierr); ierr = VecSetFromOptions(y);CHKERRQ(ierr); ierr = VecDuplicate(y,&u);CHKERRQ(ierr); ierr = VecDuplicate(y,&s);CHKERRQ(ierr); ierr = VecGetOwnershipRange(y,&vstart,&vend);CHKERRQ(ierr); /* Assembly */ for (i=rstart; i<rend; i++) { v = 100*(i+1); ierr = VecSetValues(z,1,&i,&v,INSERT_VALUES);CHKERRQ(ierr); for (j=0; j<n; j++) { v = 10*(i+1)+j+1; ierr = MatSetValues(C,1,&i,1,&j,&v,INSERT_VALUES);CHKERRQ(ierr); } } /* Flush off proc Vec values and do more assembly */ ierr = VecAssemblyBegin(z);CHKERRQ(ierr); for (i=vstart; i<vend; i++) { v = one*((PetscReal)i); ierr = VecSetValues(y,1,&i,&v,INSERT_VALUES);CHKERRQ(ierr); v = 100.0*i; ierr = VecSetValues(u,1,&i,&v,INSERT_VALUES);CHKERRQ(ierr); } /* Flush off proc Mat values and do more assembly */ ierr = MatAssemblyBegin(C,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); for (i=rstart; i<rend; i++) { for (j=0; j<n; j++) { v = 10*(i+1)+j+1; ierr = MatSetValues(C,1,&i,1,&j,&v,INSERT_VALUES);CHKERRQ(ierr); } } /* Try overlap Coomunication with the next stage XXXSetValues */ ierr = VecAssemblyEnd(z);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); CHKMEMQ; /* The Assembly for the second Stage */ ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = VecAssemblyBegin(y);CHKERRQ(ierr); ierr = VecAssemblyEnd(y);CHKERRQ(ierr); ierr = MatScale(C,alpha);CHKERRQ(ierr); ierr = VecAssemblyBegin(u);CHKERRQ(ierr); ierr = VecAssemblyEnd(u);CHKERRQ(ierr); /* ------------ Test MatMult(), MatMultAdd() ---------- */ ierr = PetscPrintf(PETSC_COMM_WORLD,"testing MatMult()\n");CHKERRQ(ierr); CHKMEMQ; ierr = MatMult(C,y,x);CHKERRQ(ierr); CHKMEMQ; ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"testing MatMultAdd()\n");CHKERRQ(ierr); ierr = MatMultAdd(C,y,z,w);CHKERRQ(ierr); ierr = VecAXPY(x,one,z);CHKERRQ(ierr); ierr = VecAXPY(x,negone,w);CHKERRQ(ierr); ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); if (norm > tol) { ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error difference = %g\n",(double)norm);CHKERRQ(ierr); } /* ------- Test MatMultTranspose(), MatMultTransposeAdd() ------- */ for (i=rstart; i<rend; i++) { v = one*((PetscReal)i); ierr = VecSetValues(x,1,&i,&v,INSERT_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(x);CHKERRQ(ierr); ierr = VecAssemblyEnd(x);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"testing MatMultTranspose()\n");CHKERRQ(ierr); ierr = MatMultTranspose(C,x,y);CHKERRQ(ierr); ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"testing MatMultTransposeAdd()\n");CHKERRQ(ierr); ierr = MatMultTransposeAdd(C,x,u,s);CHKERRQ(ierr); ierr = VecAXPY(y,one,u);CHKERRQ(ierr); ierr = VecAXPY(y,negone,s);CHKERRQ(ierr); ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); if (norm > tol) { ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error difference = %g\n",(double)norm);CHKERRQ(ierr); } /* -------------------- Test MatGetDiagonal() ------------------ */ ierr = PetscPrintf(PETSC_COMM_WORLD,"testing MatGetDiagonal(), MatDiagonalScale()\n");CHKERRQ(ierr); ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = VecSet(x,one);CHKERRQ(ierr); ierr = MatGetDiagonal(C,x);CHKERRQ(ierr); ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); for (i=vstart; i<vend; i++) { v = one*((PetscReal)(i+1)); ierr = VecSetValues(y,1,&i,&v,INSERT_VALUES);CHKERRQ(ierr); } /* -------------------- Test () MatDiagonalScale ------------------ */ ierr = PetscOptionsHasName(NULL,NULL,"-test_diagonalscale",&flg);CHKERRQ(ierr); if (flg) { ierr = MatDiagonalScale(C,x,y);CHKERRQ(ierr); ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* Free data structures */ ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = VecDestroy(&s);CHKERRQ(ierr); ierr = VecDestroy(&w);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = VecDestroy(&z);CHKERRQ(ierr); ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
PetscErrorCode MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) { PetscErrorCode ierr; PetscInt flops=0; Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; PetscInt *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj; PetscInt *ci=c->i,*cj=c->j; PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,cn=C->cmap->N,cm=C->rmap->N; PetscInt i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi; MatScalar *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum; PetscFunctionBegin; /* This error checking should be unnecessary if the symbolic was performed */ if (pm!=cm) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm,cm); if (pn!=am) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am); if (am!=an) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an); if (pm!=cn) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm, cn); /* Set up timers */ ierr = PetscLogEventBegin(MAT_Applypapt_numeric,A,P,C,0);CHKERRQ(ierr); ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); ierr = PetscMalloc3(an,MatScalar,&paa,an,PetscInt,&paj,an,PetscInt,&pajdense);CHKERRQ(ierr); ierr = PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); for (i=0;i<pm;i++) { /* Form sparse row of P*A */ pnzi = pi[i+1] - pi[i]; panzj = 0; for (j=0;j<pnzi;j++) { arow = *pj++; anzj = ai[arow+1] - ai[arow]; ajj = aj + ai[arow]; aaj = aa + ai[arow]; for (k=0;k<anzj;k++) { if (!pajdense[ajj[k]]) { pajdense[ajj[k]] = -1; paj[panzj++] = ajj[k]; } paa[ajj[k]] += (*pa)*aaj[k]; } flops += 2*anzj; pa++; } /* Sort the j index array for quick sparse axpy. */ ierr = PetscSortInt(panzj,paj);CHKERRQ(ierr); /* Compute P*A*P^T using sparse inner products. */ /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */ cnzi = ci[i+1] - ci[i]; for (j=0;j<cnzi;j++) { /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */ ptcol = *cj++; ptnzj = pi[ptcol+1] - pi[ptcol]; ptj = pjj + pi[ptcol]; ptaj = pta + pi[ptcol]; sum = 0.; k1 = 0; k2 = 0; while ((k1<panzj) && (k2<ptnzj)) { if (paj[k1]==ptj[k2]) { sum += paa[paj[k1++]]*ptaj[k2++]; } else if (paj[k1] < ptj[k2]) { k1++; } else /* if (paj[k1] > ptj[k2]) */ { k2++; } } *ca++ = sum; } /* Zero the current row info for P*A */ for (j=0;j<panzj;j++) { paa[paj[j]] = 0.; pajdense[paj[j]] = 0; } } ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscFree3(paa,paj,pajdense);CHKERRQ(ierr); ierr = PetscLogFlops(flops);CHKERRQ(ierr); ierr = PetscLogEventEnd(MAT_Applypapt_numeric,A,P,C,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* FormJacobianLocal - Evaluates Jacobian matrix. */ PetscErrorCode FormJacobianLocal(DMDALocalInfo *info,PetscScalar **x,Mat jac,AppCtx *user) { MatStencil col[5], row; PetscScalar D, K, A, v[5], hx, hy, hxdhy, hydhx, ux, uy; PetscReal normGradZ; PetscInt i, j,k; PetscErrorCode ierr; PetscFunctionBeginUser; D = user->D; K = user->K; hx = 1.0/(PetscReal)(info->mx-1); hy = 1.0/(PetscReal)(info->my-1); hxdhy = hx/hy; hydhx = hy/hx; /* Compute entries for the locally owned part of the Jacobian. - Currently, all PETSc parallel matrix formats are partitioned by contiguous chunks of rows across the processors. - Each processor needs to insert only elements that it owns locally (but any non-local elements will be sent to the appropriate processor during matrix assembly). - Here, we set all entries for a particular row at once. - We can set matrix entries either using either MatSetValuesLocal() or MatSetValues(), as discussed above. */ for (j=info->ys; j<info->ys+info->ym; j++) { for (i=info->xs; i<info->xs+info->xm; i++) { row.j = j; row.i = i; if (i == 0 || j == 0 || i == info->mx-1 || j == info->my-1) { /* boundary points */ v[0] = 1.0; ierr = MatSetValuesStencil(jac,1,&row,1,&row,v,INSERT_VALUES);CHKERRQ(ierr); } else { /* interior grid points */ ux = (x[j][i+1] - x[j][i])/hx; uy = (x[j+1][i] - x[j][i])/hy; normGradZ = PetscRealPart(sqrt(ux*ux + uy*uy)); /* PetscPrintf(PETSC_COMM_SELF, "i: %d j: %d normGradZ: %g\n", i, j, normGradZ); */ if (normGradZ < 1.0e-8) normGradZ = 1.0e-8; A = funcA(x[j][i], user); v[0] = -D*hxdhy; col[0].j = j - 1; col[0].i = i; v[1] = -D*hydhx; col[1].j = j; col[1].i = i-1; v[2] = D*2.0*(hydhx + hxdhy) + K*(funcADer(x[j][i], user)*normGradZ - A/normGradZ)*hx*hy; col[2].j = row.j; col[2].i = row.i; v[3] = -D*hydhx + K*A*hx*hy/(2.0*normGradZ); col[3].j = j; col[3].i = i+1; v[4] = -D*hxdhy + K*A*hx*hy/(2.0*normGradZ); col[4].j = j + 1; col[4].i = i; for (k = 0; k < 5; ++k) { if (PetscIsInfOrNanScalar(v[k])) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FP, "Invalid residual: %g", PetscRealPart(v[k])); } ierr = MatSetValuesStencil(jac,1,&row,5,col,v,INSERT_VALUES);CHKERRQ(ierr); } } } /* Assemble matrix, using the 2-step process: MatAssemblyBegin(), MatAssemblyEnd(). */ ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* Tell the matrix we will never add a new nonzero location to the matrix. If we do, it will generate an error. */ ierr = MatSetOption(jac,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode ComputeSubdomainMatrix(DomainData dd, GLLData glldata, Mat *local_mat) { PetscErrorCode ierr; PetscInt localsize,zloc,yloc,xloc,auxnex,auxney,auxnez; PetscInt ie,je,ke,i,j,k,ig,jg,kg,ii,ming; PetscInt *indexg,*cols,*colsg; PetscScalar *vals; Mat temp_local_mat,elem_mat_DBC=0,*usedmat; IS submatIS; PetscFunctionBeginUser; ierr = MatGetSize(glldata.elem_mat,&i,&j);CHKERRQ(ierr); ierr = PetscMalloc1(i,&indexg);CHKERRQ(ierr); ierr = PetscMalloc1(i,&colsg);CHKERRQ(ierr); /* get submatrix of elem_mat without dirichlet nodes */ if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) { xloc = dd.p+1; yloc = 1; zloc = 1; if (dd.dim>1) yloc = dd.p+1; if (dd.dim>2) zloc = dd.p+1; ii = 0; for (k=0;k<zloc;k++) { for (j=0;j<yloc;j++) { for (i=1;i<xloc;i++) { indexg[ii]=k*xloc*yloc+j*xloc+i; ii++; } } } ierr = ISCreateGeneral(PETSC_COMM_SELF,ii,indexg,PETSC_COPY_VALUES,&submatIS);CHKERRQ(ierr); ierr = MatGetSubMatrix(glldata.elem_mat,submatIS,submatIS,MAT_INITIAL_MATRIX,&elem_mat_DBC);CHKERRQ(ierr); ierr = ISDestroy(&submatIS);CHKERRQ(ierr); } /* Assemble subdomain matrix */ localsize = dd.xm_l*dd.ym_l*dd.zm_l; ierr = MatCreate(PETSC_COMM_SELF,&temp_local_mat);CHKERRQ(ierr); ierr = MatSetSizes(temp_local_mat,localsize,localsize,localsize,localsize);CHKERRQ(ierr); ierr = MatSetOptionsPrefix(temp_local_mat,"subdomain_");CHKERRQ(ierr); /* set local matrices type: here we use SEQSBAIJ primarily for testing purpose */ /* in order to avoid conversions inside the BDDC code, use SeqAIJ if possible */ if (dd.DBC_zerorows && !dd.ipx) { /* in this case, we need to zero out some of the rows, so use seqaij */ ierr = MatSetType(temp_local_mat,MATSEQAIJ);CHKERRQ(ierr); } else { ierr = MatSetType(temp_local_mat,MATSEQSBAIJ);CHKERRQ(ierr); } ierr = MatSetFromOptions(temp_local_mat);CHKERRQ(ierr); i = PetscPowRealInt(3.0*(dd.p+1.0),dd.dim); ierr = MatSeqAIJSetPreallocation(temp_local_mat,i,NULL);CHKERRQ(ierr); /* very overestimated */ ierr = MatSeqSBAIJSetPreallocation(temp_local_mat,1,i,NULL);CHKERRQ(ierr); /* very overestimated */ ierr = MatSetOption(temp_local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); yloc = dd.p+1; zloc = dd.p+1; if (dd.dim < 3) zloc = 1; if (dd.dim < 2) yloc = 1; auxnez = dd.nez_l; auxney = dd.ney_l; auxnex = dd.nex_l; if (dd.dim < 3) auxnez = 1; if (dd.dim < 2) auxney = 1; for (ke=0; ke<auxnez; ke++) { for (je=0; je<auxney; je++) { for (ie=0; ie<auxnex; ie++) { /* customize element accounting for BC */ xloc = dd.p+1; ming = 0; usedmat = &glldata.elem_mat; if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) { if (ie == 0) { xloc = dd.p; usedmat = &elem_mat_DBC; } else { ming = -1; usedmat = &glldata.elem_mat; } } /* local to the element/global to the subdomain indexing */ for (k=0; k<zloc; k++) { kg = ke*dd.p+k; for (j=0; j<yloc; j++) { jg = je*dd.p+j; for (i=0; i<xloc; i++) { ig = ie*dd.p+i+ming; ii = k*xloc*yloc+j*xloc+i; indexg[ii] = kg*dd.xm_l*dd.ym_l+jg*dd.xm_l+ig; } } } /* Set values */ for (i=0; i<xloc*yloc*zloc; i++) { ierr = MatGetRow(*usedmat,i,&j,(const PetscInt**)&cols,(const PetscScalar**)&vals);CHKERRQ(ierr); for (k=0; k<j; k++) colsg[k] = indexg[cols[k]]; ierr = MatSetValues(temp_local_mat,1,&indexg[i],j,colsg,vals,ADD_VALUES);CHKERRQ(ierr); ierr = MatRestoreRow(*usedmat,i,&j,(const PetscInt**)&cols,(const PetscScalar**)&vals);CHKERRQ(ierr); } } } } ierr = PetscFree(indexg);CHKERRQ(ierr); ierr = PetscFree(colsg);CHKERRQ(ierr); ierr = MatAssemblyBegin(temp_local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (temp_local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); #if DEBUG { Vec lvec,rvec; PetscReal norm; ierr = MatCreateVecs(temp_local_mat,&lvec,&rvec);CHKERRQ(ierr); ierr = VecSet(lvec,1.0);CHKERRQ(ierr); ierr = MatMult(temp_local_mat,lvec,rvec);CHKERRQ(ierr); ierr = VecNorm(rvec,NORM_INFINITY,&norm);CHKERRQ(ierr); printf("Test null space of local mat % 1.14e\n",norm); ierr = VecDestroy(&lvec);CHKERRQ(ierr); ierr = VecDestroy(&rvec);CHKERRQ(ierr); } #endif *local_mat = temp_local_mat; ierr = MatDestroy(&elem_mat_DBC);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc,char **args) { Mat C; PetscErrorCode ierr; PetscInt i,m = 2,N,M,its,idx[4],count,*rows; PetscScalar val,Ke[16],r[4]; PetscReal x,y,h,norm,tol=1.e-14; Vec u,ustar,b; KSP ksp; PetscInitialize(&argc,&args,(char *)0,help); ierr = PetscOptionsGetInt(PETSC_NULL,"-m",&m,PETSC_NULL);CHKERRQ(ierr); N = (m+1)*(m+1); /* dimension of matrix */ M = m*m; /* number of elements */ h = 1.0/m; /* mesh width */ /* create stiffness matrix */ ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,N,N,9,PETSC_NULL,&C);CHKERRQ(ierr); ierr = MatSetUp(C);CHKERRQ(ierr); /* forms the element stiffness for the Laplacian */ ierr = FormElementStiffness(h*h,Ke);CHKERRQ(ierr); for (i=0; i<M; i++) { /* location of lower left corner of element */ x = h*(i % m); y = h*(i/m); /* node numbers for the four corners of element */ idx[0] = (m+1)*(i/m) + (i % m); idx[1] = idx[0]+1; idx[2] = idx[1] + m + 1; idx[3] = idx[2] - 1; ierr = MatSetValues(C,4,idx,4,idx,Ke,ADD_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* create right hand side and solution */ ierr = VecCreateSeq(PETSC_COMM_SELF,N,&u);CHKERRQ(ierr); ierr = VecDuplicate(u,&b);CHKERRQ(ierr); ierr = VecDuplicate(b,&ustar);CHKERRQ(ierr); ierr = VecSet(u,0.0);CHKERRQ(ierr); ierr = VecSet(b,0.0);CHKERRQ(ierr); for (i=0; i<M; i++) { /* location of lower left corner of element */ x = h*(i % m); y = h*(i/m); /* node numbers for the four corners of element */ idx[0] = (m+1)*(i/m) + (i % m); idx[1] = idx[0]+1; idx[2] = idx[1] + m + 1; idx[3] = idx[2] - 1; ierr = FormElementRhs(x,y,h*h,r);CHKERRQ(ierr); ierr = VecSetValues(b,4,idx,r,ADD_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(b);CHKERRQ(ierr); ierr = VecAssemblyEnd(b);CHKERRQ(ierr); /* modify matrix and rhs for Dirichlet boundary conditions */ ierr = PetscMalloc((4*m+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); for (i=0; i<m+1; i++) { rows[i] = i; /* bottom */ rows[3*m - 1 +i] = m*(m+1) + i; /* top */ } count = m+1; /* left side */ for (i=m+1; i<m*(m+1); i+= m+1) { rows[count++] = i; } count = 2*m; /* left side */ for (i=2*m+1; i<m*(m+1); i+= m+1) { rows[count++] = i; } for (i=0; i<4*m; i++) { x = h*(rows[i] % (m+1)); y = h*(rows[i]/(m+1)); val = y; ierr = VecSetValues(u,1,&rows[i],&val,INSERT_VALUES);CHKERRQ(ierr); ierr = VecSetValues(b,1,&rows[i],&val,INSERT_VALUES);CHKERRQ(ierr); } ierr = MatZeroRows(C,4*m,rows,1.0,0,0);CHKERRQ(ierr); ierr = PetscFree(rows);CHKERRQ(ierr); ierr = VecAssemblyBegin(u);CHKERRQ(ierr); ierr = VecAssemblyEnd(u);CHKERRQ(ierr); ierr = VecAssemblyBegin(b);CHKERRQ(ierr); ierr = VecAssemblyEnd(b);CHKERRQ(ierr); /* solve linear system */ ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); ierr = KSPSetOperators(ksp,C,C,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); ierr = KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);CHKERRQ(ierr); ierr = KSPSolve(ksp,b,u);CHKERRQ(ierr); /* check error */ for (i=0; i<N; i++) { x = h*(i % (m+1)); y = h*(i/(m+1)); val = y; ierr = VecSetValues(ustar,1,&i,&val,INSERT_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(ustar);CHKERRQ(ierr); ierr = VecAssemblyEnd(ustar);CHKERRQ(ierr); ierr = VecAXPY(u,-1.0,ustar);CHKERRQ(ierr); ierr = VecNorm(u,NORM_2,&norm);CHKERRQ(ierr); ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr); if (norm > tol){ ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %G Iterations %D\n",norm*h,its);CHKERRQ(ierr); } ierr = KSPDestroy(&ksp);CHKERRQ(ierr); ierr = VecDestroy(&ustar);CHKERRQ(ierr); ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
static PetscErrorCode ComputeMatrix(DomainData dd, Mat *A) { PetscErrorCode ierr; GLLData gll; Mat local_mat =0,temp_A=0; ISLocalToGlobalMapping matis_map =0; IS dirichletIS=0; PetscFunctionBeginUser; /* Compute some stuff of Gauss-Legendre-Lobatto quadrature rule */ ierr = GLLStuffs(dd,&gll);CHKERRQ(ierr); /* Compute matrix of subdomain Neumann problem */ ierr = ComputeSubdomainMatrix(dd,gll,&local_mat);CHKERRQ(ierr); /* Compute global mapping of local dofs */ ierr = ComputeMapping(dd,&matis_map);CHKERRQ(ierr); /* Create MATIS object needed by BDDC */ ierr = MatCreateIS(dd.gcomm,1,PETSC_DECIDE,PETSC_DECIDE,dd.xm*dd.ym*dd.zm,dd.xm*dd.ym*dd.zm,matis_map,NULL,&temp_A);CHKERRQ(ierr); /* Set local subdomain matrices into MATIS object */ ierr = MatScale(local_mat,dd.scalingfactor);CHKERRQ(ierr); ierr = MatISSetLocalMat(temp_A,local_mat);CHKERRQ(ierr); /* Call assembly functions */ ierr = MatAssemblyBegin(temp_A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(temp_A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (dd.DBC_zerorows) { PetscInt dirsize; ierr = ComputeSpecialBoundaryIndices(dd,&dirichletIS,NULL);CHKERRQ(ierr); ierr = MatSetOption(local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); ierr = MatZeroRowsLocalIS(temp_A,dirichletIS,1.0,NULL,NULL);CHKERRQ(ierr); ierr = ISGetLocalSize(dirichletIS,&dirsize);CHKERRQ(ierr); /* giving hints to local and global matrices could be useful for the BDDC */ if (!dirsize) { ierr = MatSetOption(local_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); ierr = MatSetOption(local_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); } else { ierr = MatSetOption(local_mat,MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); ierr = MatSetOption(local_mat,MAT_SPD,PETSC_FALSE);CHKERRQ(ierr); } ierr = ISDestroy(&dirichletIS);CHKERRQ(ierr); } else { /* safe to set the options for the global matrices (they will be communicated to the matis local matrices) */ ierr = MatSetOption(temp_A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); ierr = MatSetOption(temp_A,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); } #if DEBUG { Vec lvec,rvec; PetscReal norm; ierr = MatCreateVecs(temp_A,&lvec,&rvec);CHKERRQ(ierr); ierr = VecSet(lvec,1.0);CHKERRQ(ierr); ierr = MatMult(temp_A,lvec,rvec);CHKERRQ(ierr); ierr = VecNorm(rvec,NORM_INFINITY,&norm);CHKERRQ(ierr); printf("Test null space of global mat % 1.14e\n",norm); ierr = VecDestroy(&lvec);CHKERRQ(ierr); ierr = VecDestroy(&rvec);CHKERRQ(ierr); } #endif /* free allocated workspace */ ierr = PetscFree(gll.zGL);CHKERRQ(ierr); ierr = PetscFree(gll.rhoGL);CHKERRQ(ierr); ierr = PetscFree(gll.A[0]);CHKERRQ(ierr); ierr = PetscFree(gll.A);CHKERRQ(ierr); ierr = MatDestroy(&gll.elem_mat);CHKERRQ(ierr); ierr = MatDestroy(&local_mat);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingDestroy(&matis_map);CHKERRQ(ierr); /* give back the pointer to te MATIS object */ *A = temp_A; PetscFunctionReturn(0); }
int PetscSparseMtrx :: assembleEnd() { this->newValues = true; return MatAssemblyEnd(this->mtrx, MAT_FINAL_ASSEMBLY); }
int main(int argc,char **argv) { Mat A; /* problem matrix */ EPS eps; /* eigenproblem solver context */ EPSType type; PetscReal error,tol,re,im; PetscScalar kr,ki,value[3]; Vec xr,xi; PetscInt n=30,i,Istart,Iend,col[3],nev,maxit,its,nconv; PetscBool FirstBlock=PETSC_FALSE,LastBlock=PETSC_FALSE; PetscErrorCode ierr; SlepcInitialize(&argc,&argv,(char*)0,help); ierr = PetscOptionsGetInt(NULL,"-n",&n,NULL);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"\n1-D Laplacian Eigenproblem, n=%D\n\n",n);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Compute the operator matrix that defines the eigensystem, Ax=kx - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,n,n);CHKERRQ(ierr); ierr = MatSetFromOptions(A);CHKERRQ(ierr); ierr = MatSetUp(A);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); if (Istart==0) FirstBlock=PETSC_TRUE; if (Iend==n) LastBlock=PETSC_TRUE; value[0]=-1.0; value[1]=2.0; value[2]=-1.0; for (i=(FirstBlock? Istart+1: Istart); i<(LastBlock? Iend-1: Iend); i++) { col[0]=i-1; col[1]=i; col[2]=i+1; ierr = MatSetValues(A,1,&i,3,col,value,INSERT_VALUES);CHKERRQ(ierr); } if (LastBlock) { i=n-1; col[0]=n-2; col[1]=n-1; ierr = MatSetValues(A,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); } if (FirstBlock) { i=0; col[0]=0; col[1]=1; value[0]=2.0; value[1]=-1.0; ierr = MatSetValues(A,1,&i,2,col,value,INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatGetVecs(A,NULL,&xr);CHKERRQ(ierr); ierr = MatGetVecs(A,NULL,&xi);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create the eigensolver and set various options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Create eigensolver context */ ierr = EPSCreate(PETSC_COMM_WORLD,&eps);CHKERRQ(ierr); /* Set operators. In this case, it is a standard eigenvalue problem */ ierr = EPSSetOperators(eps,A,NULL);CHKERRQ(ierr); ierr = EPSSetProblemType(eps,EPS_HEP);CHKERRQ(ierr); /* Set solver parameters at runtime */ ierr = EPSSetFromOptions(eps);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Solve the eigensystem - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = EPSSolve(eps);CHKERRQ(ierr); /* Optional: Get some information from the solver and display it */ ierr = EPSGetIterationNumber(eps,&its);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD," Number of iterations of the method: %D\n",its);CHKERRQ(ierr); ierr = EPSGetType(eps,&type);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD," Solution method: %s\n\n",type);CHKERRQ(ierr); ierr = EPSGetDimensions(eps,&nev,NULL,NULL);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD," Number of requested eigenvalues: %D\n",nev);CHKERRQ(ierr); ierr = EPSGetTolerances(eps,&tol,&maxit);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD," Stopping condition: tol=%.4g, maxit=%D\n",(double)tol,maxit);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Display solution and clean up - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Get number of converged approximate eigenpairs */ ierr = EPSGetConverged(eps,&nconv);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD," Number of converged eigenpairs: %D\n\n",nconv);CHKERRQ(ierr); if (nconv>0) { /* Display eigenvalues and relative errors */ ierr = PetscPrintf(PETSC_COMM_WORLD, " k ||Ax-kx||/||kx||\n" " ----------------- ------------------\n");CHKERRQ(ierr); for (i=0;i<nconv;i++) { /* Get converged eigenpairs: i-th eigenvalue is stored in kr (real part) and ki (imaginary part) */ ierr = EPSGetEigenpair(eps,i,&kr,&ki,xr,xi);CHKERRQ(ierr); /* Compute the relative error associated to each eigenpair */ ierr = EPSComputeRelativeError(eps,i,&error);CHKERRQ(ierr); #if defined(PETSC_USE_COMPLEX) re = PetscRealPart(kr); im = PetscImaginaryPart(kr); #else re = kr; im = ki; #endif if (im!=0.0) { ierr = PetscPrintf(PETSC_COMM_WORLD," %9f%+9f j %12g\n",(double)re,(double)im,(double)error);CHKERRQ(ierr); } else { ierr = PetscPrintf(PETSC_COMM_WORLD," %12f %12g\n",(double)re,(double)error);CHKERRQ(ierr); } } ierr = PetscPrintf(PETSC_COMM_WORLD,"\n");CHKERRQ(ierr); } /* Free work space */ ierr = EPSDestroy(&eps);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = VecDestroy(&xr);CHKERRQ(ierr); ierr = VecDestroy(&xi);CHKERRQ(ierr); ierr = SlepcFinalize(); return 0; }
int main(int argc,char **args) { Mat A,Atrans,sA,*submatA,*submatsA; PetscErrorCode ierr; PetscMPIInt size,rank; PetscInt bs=1,mbs=10,ov=1,i,j,k,*rows,*cols,nd=2,*idx,rstart,rend,sz,M,N,Mbs; PetscScalar *vals,rval,one=1.0; IS *is1,*is2; PetscRandom rand; PetscBool flg,TestOverlap,TestSubMat,TestAllcols; PetscLogStage stages[2]; PetscInt vid = -1; PetscInitialize(&argc,&args,(char *)0,help); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_mbs",&mbs,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-ov",&ov,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-nd",&nd,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-view_id",&vid,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsHasName(PETSC_NULL, "-test_overlap", &TestOverlap);CHKERRQ(ierr); ierr = PetscOptionsHasName(PETSC_NULL, "-test_submat", &TestSubMat);CHKERRQ(ierr); ierr = PetscOptionsHasName(PETSC_NULL, "-test_allcols", &TestAllcols);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,mbs*bs,mbs*bs,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); ierr = MatSetType(A,MATBAIJ);CHKERRQ(ierr); ierr = MatSeqBAIJSetPreallocation(A,bs,PETSC_DEFAULT,PETSC_NULL); ierr = MatMPIBAIJSetPreallocation(A,bs,PETSC_DEFAULT,PETSC_NULL,PETSC_DEFAULT,PETSC_NULL);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rand);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rand);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); ierr = MatGetSize(A,&M,&N); Mbs = M/bs; ierr = PetscMalloc(bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); ierr = PetscMalloc(bs*sizeof(PetscInt),&cols);CHKERRQ(ierr); ierr = PetscMalloc(bs*bs*sizeof(PetscScalar),&vals);CHKERRQ(ierr); ierr = PetscMalloc(M*sizeof(PetscScalar),&idx);CHKERRQ(ierr); /* Now set blocks of values */ for (j=0; j<bs*bs; j++) vals[j] = 0.0; for (i=0; i<Mbs; i++){ cols[0] = i*bs; rows[0] = i*bs; for (j=1; j<bs; j++) { rows[j] = rows[j-1]+1; cols[j] = cols[j-1]+1; } ierr = MatSetValues(A,bs,rows,bs,cols,vals,ADD_VALUES);CHKERRQ(ierr); } /* second, add random blocks */ for (i=0; i<20*bs; i++) { ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); cols[0] = bs*(PetscInt)(PetscRealPart(rval)*Mbs); ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); rows[0] = rstart + bs*(PetscInt)(PetscRealPart(rval)*mbs); for (j=1; j<bs; j++) { rows[j] = rows[j-1]+1; cols[j] = cols[j-1]+1; } for (j=0; j<bs*bs; j++) { ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); vals[j] = rval; } ierr = MatSetValues(A,bs,rows,bs,cols,vals,ADD_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* make A a symmetric matrix: A <- A^T + A */ ierr = MatTranspose(A,MAT_INITIAL_MATRIX, &Atrans);CHKERRQ(ierr); ierr = MatAXPY(A,one,Atrans,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatDestroy(&Atrans);CHKERRQ(ierr); ierr = MatTranspose(A,MAT_INITIAL_MATRIX, &Atrans); ierr = MatEqual(A, Atrans, &flg); if (flg) { ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); } else { SETERRQ(PETSC_COMM_SELF,1,"A+A^T is non-symmetric"); } ierr = MatDestroy(&Atrans);CHKERRQ(ierr); /* create a SeqSBAIJ matrix sA (= A) */ ierr = MatConvert(A,MATSBAIJ,MAT_INITIAL_MATRIX,&sA);CHKERRQ(ierr); if (vid >= 0 && vid < size){ if (!rank) printf("A: \n"); ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); if (!rank) printf("sA: \n"); ierr = MatView(sA,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* Test sA==A through MatMult() */ ierr = MatMultEqual(A,sA,10,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG ,"Error in MatConvert(): A != sA"); /* Test MatIncreaseOverlap() */ ierr = PetscMalloc(nd*sizeof(IS **),&is1);CHKERRQ(ierr); ierr = PetscMalloc(nd*sizeof(IS **),&is2);CHKERRQ(ierr); for (i=0; i<nd; i++) { if (!TestAllcols){ ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); sz = (PetscInt)((0.5+0.2*PetscRealPart(rval))*mbs); /* 0.5*mbs < sz < 0.7*mbs */ for (j=0; j<sz; j++) { ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); idx[j*bs] = bs*(PetscInt)(PetscRealPart(rval)*Mbs); for (k=1; k<bs; k++) idx[j*bs+k] = idx[j*bs]+k; } ierr = ISCreateGeneral(PETSC_COMM_SELF,sz*bs,idx,PETSC_COPY_VALUES,is1+i);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,sz*bs,idx,PETSC_COPY_VALUES,is2+i);CHKERRQ(ierr); if (rank == vid){ ierr = PetscPrintf(PETSC_COMM_SELF," [%d] IS sz[%d]: %d\n",rank,i,sz);CHKERRQ(ierr); ierr = ISView(is2[i],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } } else { /* Test all rows and colums */ sz = M; ierr = ISCreateStride(PETSC_COMM_SELF,sz,0,1,is1+i);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,sz,0,1,is2+i);CHKERRQ(ierr); if (rank == vid){ PetscBool colflag; ierr = ISIdentity(is2[i],&colflag);CHKERRQ(ierr); printf("[%d] is2[%d], colflag %d\n",rank,i,colflag); ierr = ISView(is2[i],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } } } ierr = PetscLogStageRegister("MatOv_SBAIJ",&stages[0]); ierr = PetscLogStageRegister("MatOv_BAIJ",&stages[1]); /* Test MatIncreaseOverlap */ if (TestOverlap){ ierr = PetscLogStagePush(stages[0]);CHKERRQ(ierr); ierr = MatIncreaseOverlap(sA,nd,is2,ov);CHKERRQ(ierr); ierr = PetscLogStagePop();CHKERRQ(ierr); ierr = PetscLogStagePush(stages[1]);CHKERRQ(ierr); ierr = MatIncreaseOverlap(A,nd,is1,ov);CHKERRQ(ierr); ierr = PetscLogStagePop();CHKERRQ(ierr); if (rank == vid){ printf("\n[%d] IS from BAIJ:\n",rank); ierr = ISView(is1[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); printf("\n[%d] IS from SBAIJ:\n",rank); ierr = ISView(is2[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } for (i=0; i<nd; ++i) { ierr = ISEqual(is1[i],is2[i],&flg);CHKERRQ(ierr); if (!flg ){ if (rank == 0){ ierr = ISSort(is1[i]);CHKERRQ(ierr); /* ISView(is1[i],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); */ ierr = ISSort(is2[i]);CHKERRQ(ierr); /* ISView(is2[i],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); */ } SETERRQ1(PETSC_COMM_SELF,1,"i=%D, is1 != is2",i); } } } /* Test MatGetSubmatrices */ if (TestSubMat){ for(i = 0; i < nd; ++i) { ierr = ISSort(is1[i]); CHKERRQ(ierr); ierr = ISSort(is2[i]); CHKERRQ(ierr); } ierr = MatGetSubMatrices(A,nd,is1,is1,MAT_INITIAL_MATRIX,&submatA);CHKERRQ(ierr); ierr = MatGetSubMatrices(sA,nd,is2,is2,MAT_INITIAL_MATRIX,&submatsA);CHKERRQ(ierr); ierr = MatMultEqual(A,sA,10,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A != sA"); /* Now test MatGetSubmatrices with MAT_REUSE_MATRIX option */ ierr = MatGetSubMatrices(A,nd,is1,is1,MAT_REUSE_MATRIX,&submatA);CHKERRQ(ierr); ierr = MatGetSubMatrices(sA,nd,is2,is2,MAT_REUSE_MATRIX,&submatsA);CHKERRQ(ierr); ierr = MatMultEqual(A,sA,10,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetSubmatrices(): A != sA"); for (i=0; i<nd; ++i) { ierr = MatDestroy(&submatA[i]);CHKERRQ(ierr); ierr = MatDestroy(&submatsA[i]);CHKERRQ(ierr); } ierr = PetscFree(submatA);CHKERRQ(ierr); ierr = PetscFree(submatsA);CHKERRQ(ierr); } /* Free allocated memory */ for (i=0; i<nd; ++i) { ierr = ISDestroy(&is1[i]);CHKERRQ(ierr); ierr = ISDestroy(&is2[i]);CHKERRQ(ierr); } ierr = PetscFree(is1);CHKERRQ(ierr); ierr = PetscFree(is2);CHKERRQ(ierr); ierr = PetscFree(idx);CHKERRQ(ierr); ierr = PetscFree(rows);CHKERRQ(ierr); ierr = PetscFree(cols);CHKERRQ(ierr); ierr = PetscFree(vals);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&sA);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rand);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }