/* get adjacencies due to point-to-point constraints that can't be found with DMPlexGetAdjacency() */ static PetscErrorCode DMPlexComputeAnchorAdjacencies(DM dm, PetscSection section, PetscSection sectionGlobal, PetscSection *anchorSectionAdj, PetscInt *anchorAdj[]) { PetscInt pStart, pEnd; PetscSection adjSec, aSec; IS aIS; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscSectionCreate(PetscObjectComm((PetscObject)section),&adjSec);CHKERRQ(ierr); ierr = PetscSectionGetChart(section,&pStart,&pEnd);CHKERRQ(ierr); ierr = PetscSectionSetChart(adjSec,pStart,pEnd);CHKERRQ(ierr); ierr = DMPlexGetAnchors(dm,&aSec,&aIS);CHKERRQ(ierr); if (aSec) { const PetscInt *anchors; PetscInt p, q, a, aSize, *offsets, aStart, aEnd, *inverse, iSize, *adj, adjSize; PetscInt *tmpAdjP = NULL, *tmpAdjQ = NULL; PetscSection inverseSec; /* invert the constraint-to-anchor map */ ierr = PetscSectionCreate(PetscObjectComm((PetscObject)aSec),&inverseSec);CHKERRQ(ierr); ierr = PetscSectionSetChart(inverseSec,pStart,pEnd);CHKERRQ(ierr); ierr = ISGetLocalSize(aIS, &aSize);CHKERRQ(ierr); ierr = ISGetIndices(aIS, &anchors);CHKERRQ(ierr); for (p = 0; p < aSize; p++) { PetscInt a = anchors[p]; ierr = PetscSectionAddDof(inverseSec,a,1);CHKERRQ(ierr); } ierr = PetscSectionSetUp(inverseSec);CHKERRQ(ierr); ierr = PetscSectionGetStorageSize(inverseSec,&iSize);CHKERRQ(ierr); ierr = PetscMalloc1(iSize,&inverse);CHKERRQ(ierr); ierr = PetscCalloc1(pEnd-pStart,&offsets);CHKERRQ(ierr); ierr = PetscSectionGetChart(aSec,&aStart,&aEnd);CHKERRQ(ierr); for (p = aStart; p < aEnd; p++) { PetscInt dof, off; ierr = PetscSectionGetDof(aSec, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(aSec, p, &off);CHKERRQ(ierr); for (q = 0; q < dof; q++) { PetscInt iOff; a = anchors[off + q]; ierr = PetscSectionGetOffset(inverseSec, a, &iOff);CHKERRQ(ierr); inverse[iOff + offsets[a-pStart]++] = p; } } ierr = ISRestoreIndices(aIS, &anchors);CHKERRQ(ierr); ierr = PetscFree(offsets);CHKERRQ(ierr); /* construct anchorAdj and adjSec * * loop over anchors: * construct anchor adjacency * loop over constrained: * construct constrained adjacency * if not in anchor adjacency, add to dofs * setup adjSec, allocate anchorAdj * loop over anchors: * construct anchor adjacency * loop over constrained: * construct constrained adjacency * if not in anchor adjacency * if not already in list, put in list * sort, unique, reduce dof count * optional: compactify */ for (p = pStart; p < pEnd; p++) { PetscInt iDof, iOff, i, r, s, numAdjP = PETSC_DETERMINE; ierr = PetscSectionGetDof(inverseSec,p,&iDof);CHKERRQ(ierr); if (!iDof) continue; ierr = PetscSectionGetOffset(inverseSec,p,&iOff);CHKERRQ(ierr); ierr = DMPlexGetAdjacency(dm,p,&numAdjP,&tmpAdjP);CHKERRQ(ierr); for (i = 0; i < iDof; i++) { PetscInt iNew = 0, qAdj, qAdjDof, qAdjCDof, numAdjQ = PETSC_DETERMINE; q = inverse[iOff + i]; ierr = DMPlexGetAdjacency(dm,q,&numAdjQ,&tmpAdjQ);CHKERRQ(ierr); for (r = 0; r < numAdjQ; r++) { qAdj = tmpAdjQ[r]; if ((qAdj < pStart) || (qAdj >= pEnd)) continue; for (s = 0; s < numAdjP; s++) { if (qAdj == tmpAdjP[s]) break; } if (s < numAdjP) continue; ierr = PetscSectionGetDof(section,qAdj,&qAdjDof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section,qAdj,&qAdjCDof);CHKERRQ(ierr); iNew += qAdjDof - qAdjCDof; } ierr = PetscSectionAddDof(adjSec,p,iNew);CHKERRQ(ierr); } } ierr = PetscSectionSetUp(adjSec);CHKERRQ(ierr); ierr = PetscSectionGetStorageSize(adjSec,&adjSize);CHKERRQ(ierr); ierr = PetscMalloc1(adjSize,&adj);CHKERRQ(ierr); for (p = pStart; p < pEnd; p++) { PetscInt iDof, iOff, i, r, s, aOff, aOffOrig, aDof, numAdjP = PETSC_DETERMINE; ierr = PetscSectionGetDof(inverseSec,p,&iDof);CHKERRQ(ierr); if (!iDof) continue; ierr = PetscSectionGetOffset(inverseSec,p,&iOff);CHKERRQ(ierr); ierr = DMPlexGetAdjacency(dm,p,&numAdjP,&tmpAdjP);CHKERRQ(ierr); ierr = PetscSectionGetDof(adjSec,p,&aDof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(adjSec,p,&aOff);CHKERRQ(ierr); aOffOrig = aOff; for (i = 0; i < iDof; i++) { PetscInt qAdj, qAdjDof, qAdjCDof, qAdjOff, nd, numAdjQ = PETSC_DETERMINE; q = inverse[iOff + i]; ierr = DMPlexGetAdjacency(dm,q,&numAdjQ,&tmpAdjQ);CHKERRQ(ierr); for (r = 0; r < numAdjQ; r++) { qAdj = tmpAdjQ[r]; if ((qAdj < pStart) || (qAdj >= pEnd)) continue; for (s = 0; s < numAdjP; s++) { if (qAdj == tmpAdjP[s]) break; } if (s < numAdjP) continue; ierr = PetscSectionGetDof(section,qAdj,&qAdjDof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section,qAdj,&qAdjCDof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionGlobal,qAdj,&qAdjOff);CHKERRQ(ierr); for (nd = 0; nd < qAdjDof-qAdjCDof; ++nd) { adj[aOff++] = (qAdjOff < 0 ? -(qAdjOff+1) : qAdjOff) + nd; } } } ierr = PetscSortRemoveDupsInt(&aDof,&adj[aOffOrig]);CHKERRQ(ierr); ierr = PetscSectionSetDof(adjSec,p,aDof);CHKERRQ(ierr); } *anchorAdj = adj; /* clean up */ ierr = PetscSectionDestroy(&inverseSec);CHKERRQ(ierr); ierr = PetscFree(inverse);CHKERRQ(ierr); ierr = PetscFree(tmpAdjP);CHKERRQ(ierr); ierr = PetscFree(tmpAdjQ);CHKERRQ(ierr); } else { *anchorAdj = NULL; ierr = PetscSectionSetUp(adjSec);CHKERRQ(ierr); } *anchorSectionAdj = adjSec; PetscFunctionReturn(0); }
PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) { Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); PetscErrorCode ierr; PetscInt i,j,*aj = B->j,ec = 0,*garray; IS from,to; Vec gvec; #if defined(PETSC_USE_CTABLE) PetscTable gid1_lid1; PetscTablePosition tpos; PetscInt gid,lid; #else PetscInt N = mat->cmap->N,*indices; #endif PetscFunctionBegin; if (!aij->garray) { #if defined(PETSC_USE_CTABLE) /* use a table */ ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt data,gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); if (!data) { /* one based table */ ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); } } } /* form array of columns we need */ ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); while (tpos) { ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); gid--; lid--; garray[lid] = gid; } ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); for (i=0; i<ec; i++) { ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); } /* compact out the extra columns in B */ for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); lid--; aj[B->i[i] + j] = lid; } } aij->B->cmap->n = aij->B->cmap->N = ec; aij->B->cmap->bs = 1; ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); #else /* Make an array as long as the number of columns */ /* mark those columns that are in aij->B */ ierr = PetscCalloc1(N+1,&indices);CHKERRQ(ierr); for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { if (!indices[aj[B->i[i] + j]]) ec++; indices[aj[B->i[i] + j]] = 1; } } /* form array of columns we need */ ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); ec = 0; for (i=0; i<N; i++) { if (indices[i]) garray[ec++] = i; } /* make indices now point into garray */ for (i=0; i<ec; i++) { indices[garray[i]] = i; } /* compact out the extra columns in B */ for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; } } aij->B->cmap->n = aij->B->cmap->N = ec; aij->B->cmap->bs = 1; ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); ierr = PetscFree(indices);CHKERRQ(ierr); #endif } else { garray = aij->garray; } if (!aij->lvec) { /* create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); } else { ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); } /* create two temporary Index sets for build scatter gather */ ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); /* create temporary global vector to generate scatter context */ /* This does not allocate the array's memory so is efficient */ ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); /* generate the scatter context */ if (aij->Mvctx_mpi1_flg) { ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr); ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx_mpi1);CHKERRQ(ierr); ierr = VecScatterSetType(aij->Mvctx_mpi1,VECSCATTERMPI1);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx_mpi1);CHKERRQ(ierr); } else { ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); } aij->garray = garray; ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = VecDestroy(&gvec);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscInt main(PetscInt argc,char **args) { ptrdiff_t N0=256,N1=256,N2=256,N3=2,dim[4]; fftw_plan bplan,fplan; fftw_complex *out; double *in1,*in2; ptrdiff_t alloc_local,local_n0,local_0_start; ptrdiff_t local_n1,local_1_start; PetscInt i,j,indx,n1; PetscInt size,rank,n,N,*in,N_factor,NM; PetscScalar *data_fin,value1,one=1.57,zero=0.0; PetscScalar a,*x_arr,*y_arr,*z_arr,enorm; Vec fin,fout,fout1,ini,final; PetscRandom rnd; PetscErrorCode ierr; VecScatter vecscat,vecscat1; IS indx1,indx2; PetscInt *indx3,k,l,*indx4; PetscInt low,tempindx,tempindx1; ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; #if defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires real numbers. Your current scalar type is complex"); #endif ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); PetscRandomCreate(PETSC_COMM_WORLD,&rnd); alloc_local = fftw_mpi_local_size_3d_transposed(N0,N1,N2/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); /* printf("The value alloc_local is %ld from process %d\n",alloc_local,rank); */ printf("The value local_n0 is %ld from process %d\n",local_n0,rank); /* printf("The value local_0_start is %ld from process %d\n",local_0_start,rank);*/ /* printf("The value local_n1 is %ld from process %d\n",local_n1,rank); */ /* printf("The value local_1_start is %ld from process %d\n",local_1_start,rank);*/ /* Allocate space for input and output arrays */ in1=(double*)fftw_malloc(sizeof(double)*alloc_local*2); in2=(double*)fftw_malloc(sizeof(double)*alloc_local*2); out=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); N=2*N0*N1*(N2/2+1);N_factor=N0*N1*N2; n=2*local_n0*N1*(N2/2+1);n1=local_n1*N0*2*N1; /* printf("The value N is %d from process %d\n",N,rank); */ /* printf("The value n is %d from process %d\n",n,rank); */ /* printf("The value n1 is %d from process %d\n",n1,rank); */ /* Creating data vector and accompanying array with VeccreateMPIWithArray */ ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)in1,&fin);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)out,&fout);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)in2,&fout1);CHKERRQ(ierr); /* VecGetSize(fin,&size); */ /* printf("The size is %d\n",size); */ VecSet(fin,one); VecSet(fout,zero); VecSet(fout1,zero); VecAssemblyBegin(fin); VecAssemblyEnd(fin); /* VecView(fin,PETSC_VIEWER_STDOUT_WORLD); */ VecGetArray(fin,&x_arr); VecGetArray(fout1,&z_arr); VecGetArray(fout,&y_arr); fplan=fftw_mpi_plan_dft_r2c_3d(N0,N1,N2,(double*)x_arr,(fftw_complex*)y_arr,PETSC_COMM_WORLD,FFTW_ESTIMATE); bplan=fftw_mpi_plan_dft_c2r_3d(N0,N1,N2,(fftw_complex*)y_arr,(double*)z_arr,PETSC_COMM_WORLD,FFTW_ESTIMATE); fftw_execute(fplan); fftw_execute(bplan); VecRestoreArray(fin,&x_arr); VecRestoreArray(fout1,&z_arr); VecRestoreArray(fout,&y_arr); /* a = 1.0/(PetscReal)N_factor; */ /* ierr = VecScale(fout1,a);CHKERRQ(ierr); */ VecCreate(PETSC_COMM_WORLD,&ini); VecCreate(PETSC_COMM_WORLD,&final); VecSetSizes(ini,local_n0*N1*N2,N_factor); VecSetSizes(final,local_n0*N1*N2,N_factor); /* VecSetSizes(ini,PETSC_DECIDE,N_factor); */ /* VecSetSizes(final,PETSC_DECIDE,N_factor); */ VecSetFromOptions(ini); VecSetFromOptions(final); if (N2%2==0) NM=N2+2; else NM=N2+1; ierr = VecGetOwnershipRange(fin,&low,NULL); printf("The local index is %d from %d\n",low,rank); ierr = PetscMalloc1(local_n0*N1*N2,&indx3); ierr = PetscMalloc1(local_n0*N1*N2,&indx4); for (i=0; i<local_n0; i++) { for (j=0;j<N1;j++) { for (k=0;k<N2;k++) { tempindx = i*N1*N2 + j*N2 + k; tempindx1 = i*N1*NM + j*NM + k; indx3[tempindx]=local_0_start*N1*N2+tempindx; indx4[tempindx]=low+tempindx1; } /* printf("index3 %d from proc %d is \n",indx3[tempindx],rank); */ /* printf("index4 %d from proc %d is \n",indx4[tempindx],rank); */ } } VecGetValues(fin,local_n0*N1*N2,indx4,x_arr); VecSetValues(ini,local_n0*N1*N2,indx3,x_arr,INSERT_VALUES); VecAssemblyBegin(ini); VecAssemblyEnd(ini); VecGetValues(fout1,local_n0*N1*N2,indx4,y_arr); VecSetValues(final,local_n0*N1*N2,indx3,y_arr,INSERT_VALUES); VecAssemblyBegin(final); VecAssemblyEnd(final); printf("The local index value is %ld from %d",local_n0*N1*N2,rank); /* for (i=0;i<N0;i++) { for (j=0;j<N1;j++) { indx=i*N1*NM+j*NM; ISCreateStride(PETSC_COMM_WORLD,N2,indx,1,&indx1); indx=i*N1*N2+j*N2; ISCreateStride(PETSC_COMM_WORLD,N2,indx,1,&indx2); VecScatterCreate(fin,indx1,ini,indx2,&vecscat); VecScatterBegin(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); VecScatterEnd(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); VecScatterCreate(fout1,indx1,final,indx2,&vecscat1); VecScatterBegin(vecscat1,fout1,final,INSERT_VALUES,SCATTER_FORWARD); VecScatterEnd(vecscat1,fout1,final,INSERT_VALUES,SCATTER_FORWARD); } } */ a = 1.0/(PetscReal)N_factor; ierr = VecScale(fout1,a);CHKERRQ(ierr); ierr = VecScale(final,a);CHKERRQ(ierr); VecAssemblyBegin(ini); VecAssemblyEnd(ini); VecAssemblyBegin(final); VecAssemblyEnd(final); /* VecView(final,PETSC_VIEWER_STDOUT_WORLD); */ ierr = VecAXPY(final,-1.0,ini);CHKERRQ(ierr); ierr = VecNorm(final,NORM_1,&enorm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD," Error norm of |x - z| = %e\n",enorm);CHKERRQ(ierr); fftw_destroy_plan(fplan); fftw_destroy_plan(bplan); fftw_free(in1); ierr = VecDestroy(&fin);CHKERRQ(ierr); fftw_free(out); ierr = VecDestroy(&fout);CHKERRQ(ierr); fftw_free(in2); ierr = VecDestroy(&fout1);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
int main(int argc, char **argv) { PetscInt ierr; PetscSF sf; Vec A,Aout; Vec B,Bout; PetscScalar *bufA; PetscScalar *bufAout; PetscScalar *bufB; PetscScalar *bufBout; PetscMPIInt rank, size; PetscInt nroots, nleaves; PetscInt i; PetscInt *ilocal; PetscSFNode *iremote; ierr = PetscInitialize(&argc,&argv,NULL,help);if (ierr) return ierr; ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size != 2) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER, "Only coded for two MPI processes\n"); ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); nleaves = 2; nroots = 1; ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr); for (i = 0; i<nleaves; i++) { ilocal[i] = i; } ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); if (rank == 0) { iremote[0].rank = 0; iremote[0].index = 0; iremote[1].rank = 1; iremote[1].index = 0; } else { iremote[0].rank = 1; iremote[0].index = 0; iremote[1].rank = 0; iremote[1].index = 0; } ierr = PetscSFSetGraph(sf,nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); ierr = PetscSFSetUp(sf);CHKERRQ(ierr); ierr = PetscSFView(sf,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = VecSetSizes(A,2,PETSC_DETERMINE);CHKERRQ(ierr); ierr = VecSetFromOptions(A);CHKERRQ(ierr); ierr = VecSetUp(A);CHKERRQ(ierr); ierr = VecDuplicate(A,&B);CHKERRQ(ierr); ierr = VecDuplicate(A,&Aout);CHKERRQ(ierr); ierr = VecDuplicate(A,&Bout);CHKERRQ(ierr); ierr = VecGetArray(A,&bufA);CHKERRQ(ierr); ierr = VecGetArray(B,&bufB);CHKERRQ(ierr); for (i=0; i<2; i++) { bufA[i] = (PetscScalar)rank; bufB[i] = (PetscScalar)(rank) + 10.0; } ierr = VecRestoreArray(A,&bufA);CHKERRQ(ierr); ierr = VecRestoreArray(B,&bufB);CHKERRQ(ierr); ierr = VecGetArrayRead(A,(const PetscScalar**)&bufA);CHKERRQ(ierr); ierr = VecGetArrayRead(B,(const PetscScalar**)&bufB);CHKERRQ(ierr); ierr = VecGetArray(Aout,&bufAout);CHKERRQ(ierr); ierr = VecGetArray(Bout,&bufBout);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,(const void*)bufA,(void *)bufAout);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,(const void*)bufB,(void *)bufBout);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,(const void*)bufA,(void *)bufAout);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,(const void*)bufB,(void *)bufBout);CHKERRQ(ierr); ierr = VecRestoreArrayRead(A,(const PetscScalar**)&bufA);CHKERRQ(ierr); ierr = VecRestoreArrayRead(B,(const PetscScalar**)&bufB);CHKERRQ(ierr); ierr = VecRestoreArray(Aout,&bufAout);CHKERRQ(ierr); ierr = VecRestoreArray(Bout,&bufBout);CHKERRQ(ierr); ierr = VecView(Aout,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = VecView(Bout,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = VecDestroy(&A);CHKERRQ(ierr); ierr = VecDestroy(&B);CHKERRQ(ierr); ierr = VecDestroy(&Aout);CHKERRQ(ierr); ierr = VecDestroy(&Bout);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
/* Version for when blocks are 4 by 4 */ PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat C,Mat A,const MatFactorInfo *info) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data; IS perm = b->row; PetscErrorCode ierr; const PetscInt *ai,*aj,*perm_ptr,mbs=a->mbs,*bi=b->i,*bj=b->j; PetscInt i,j,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; MatScalar *ba = b->a,*aa,*ap,*dk,*uik; MatScalar *u,*diag,*rtmp,*rtmp_ptr; PetscBool pivotinblocks = b->pivotinblocks; PetscReal shift = info->shiftamount; PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; PetscFunctionBegin; /* initialization */ allowzeropivot = PetscNot(A->erroriffailure); ierr = PetscCalloc1(16*mbs,&rtmp);CHKERRQ(ierr); ierr = PetscMalloc2(mbs,&il,mbs,&jl);CHKERRQ(ierr); il[0] = 0; for (i=0; i<mbs; i++) jl[i] = mbs; ierr = PetscMalloc2(16,&dk,16,&uik);CHKERRQ(ierr); ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); /* check permutation */ if (!a->permute) { ai = a->i; aj = a->j; aa = a->a; } else { ai = a->inew; aj = a->jnew; ierr = PetscMalloc1(16*ai[mbs],&aa);CHKERRQ(ierr); ierr = PetscMemcpy(aa,a->a,16*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); ierr = PetscMalloc1(ai[mbs],&a2anew);CHKERRQ(ierr); ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i<mbs; i++) { jmin = ai[i]; jmax = ai[i+1]; for (j=jmin; j<jmax; j++) { while (a2anew[j] != j) { k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; for (k1=0; k1<16; k1++) { dk[k1] = aa[k*16+k1]; aa[k*16+k1] = aa[j*16+k1]; aa[j*16+k1] = dk[k1]; } } /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ if (i > aj[j]) { /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ ap = aa + j*16; /* ptr to the beginning of j-th block of aa */ for (k=0; k<16; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ for (k=0; k<4; k++) { /* j-th block of aa <- dk^T */ for (k1=0; k1<4; k1++) *ap++ = dk[k + 4*k1]; } } } } ierr = PetscFree(a2anew);CHKERRQ(ierr); } /* for each row k */ for (k = 0; k<mbs; k++) { /*initialize k-th row with elements nonzero in row perm(k) of A */ jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; if (jmin < jmax) { ap = aa + jmin*16; for (j = jmin; j < jmax; j++) { vj = perm_ptr[aj[j]]; /* block col. index */ rtmp_ptr = rtmp + vj*16; for (i=0; i<16; i++) *rtmp_ptr++ = *ap++; } } /* modify k-th row by adding in those rows i with U(i,k) != 0 */ ierr = PetscMemcpy(dk,rtmp+k*16,16*sizeof(MatScalar));CHKERRQ(ierr); i = jl[k]; /* first row to be added to k_th row */ while (i < mbs) { nexti = jl[i]; /* next row to be added to k_th row */ /* compute multiplier */ ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ /* uik = -inv(Di)*U_bar(i,k) */ diag = ba + i*16; u = ba + ili*16; uik[0] = -(diag[0]*u[0] + diag[4]*u[1] + diag[8]*u[2] + diag[12]*u[3]); uik[1] = -(diag[1]*u[0] + diag[5]*u[1] + diag[9]*u[2] + diag[13]*u[3]); uik[2] = -(diag[2]*u[0] + diag[6]*u[1] + diag[10]*u[2]+ diag[14]*u[3]); uik[3] = -(diag[3]*u[0] + diag[7]*u[1] + diag[11]*u[2]+ diag[15]*u[3]); uik[4] = -(diag[0]*u[4] + diag[4]*u[5] + diag[8]*u[6] + diag[12]*u[7]); uik[5] = -(diag[1]*u[4] + diag[5]*u[5] + diag[9]*u[6] + diag[13]*u[7]); uik[6] = -(diag[2]*u[4] + diag[6]*u[5] + diag[10]*u[6]+ diag[14]*u[7]); uik[7] = -(diag[3]*u[4] + diag[7]*u[5] + diag[11]*u[6]+ diag[15]*u[7]); uik[8] = -(diag[0]*u[8] + diag[4]*u[9] + diag[8]*u[10] + diag[12]*u[11]); uik[9] = -(diag[1]*u[8] + diag[5]*u[9] + diag[9]*u[10] + diag[13]*u[11]); uik[10]= -(diag[2]*u[8] + diag[6]*u[9] + diag[10]*u[10]+ diag[14]*u[11]); uik[11]= -(diag[3]*u[8] + diag[7]*u[9] + diag[11]*u[10]+ diag[15]*u[11]); uik[12]= -(diag[0]*u[12] + diag[4]*u[13] + diag[8]*u[14] + diag[12]*u[15]); uik[13]= -(diag[1]*u[12] + diag[5]*u[13] + diag[9]*u[14] + diag[13]*u[15]); uik[14]= -(diag[2]*u[12] + diag[6]*u[13] + diag[10]*u[14]+ diag[14]*u[15]); uik[15]= -(diag[3]*u[12] + diag[7]*u[13] + diag[11]*u[14]+ diag[15]*u[15]); /* update D(k) += -U(i,k)^T * U_bar(i,k) */ dk[0] += uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3]; dk[1] += uik[4]*u[0] + uik[5]*u[1] + uik[6]*u[2] + uik[7]*u[3]; dk[2] += uik[8]*u[0] + uik[9]*u[1] + uik[10]*u[2]+ uik[11]*u[3]; dk[3] += uik[12]*u[0]+ uik[13]*u[1]+ uik[14]*u[2]+ uik[15]*u[3]; dk[4] += uik[0]*u[4] + uik[1]*u[5] + uik[2]*u[6] + uik[3]*u[7]; dk[5] += uik[4]*u[4] + uik[5]*u[5] + uik[6]*u[6] + uik[7]*u[7]; dk[6] += uik[8]*u[4] + uik[9]*u[5] + uik[10]*u[6]+ uik[11]*u[7]; dk[7] += uik[12]*u[4]+ uik[13]*u[5]+ uik[14]*u[6]+ uik[15]*u[7]; dk[8] += uik[0]*u[8] + uik[1]*u[9] + uik[2]*u[10] + uik[3]*u[11]; dk[9] += uik[4]*u[8] + uik[5]*u[9] + uik[6]*u[10] + uik[7]*u[11]; dk[10]+= uik[8]*u[8] + uik[9]*u[9] + uik[10]*u[10]+ uik[11]*u[11]; dk[11]+= uik[12]*u[8]+ uik[13]*u[9]+ uik[14]*u[10]+ uik[15]*u[11]; dk[12]+= uik[0]*u[12] + uik[1]*u[13] + uik[2]*u[14] + uik[3]*u[15]; dk[13]+= uik[4]*u[12] + uik[5]*u[13] + uik[6]*u[14] + uik[7]*u[15]; dk[14]+= uik[8]*u[12] + uik[9]*u[13] + uik[10]*u[14]+ uik[11]*u[15]; dk[15]+= uik[12]*u[12]+ uik[13]*u[13]+ uik[14]*u[14]+ uik[15]*u[15]; ierr = PetscLogFlops(64.0*4.0);CHKERRQ(ierr); /* update -U(i,k) */ ierr = PetscMemcpy(ba+ili*16,uik,16*sizeof(MatScalar));CHKERRQ(ierr); /* add multiple of row i to k-th row ... */ jmin = ili + 1; jmax = bi[i+1]; if (jmin < jmax) { for (j=jmin; j<jmax; j++) { /* rtmp += -U(i,k)^T * U_bar(i,j) */ rtmp_ptr = rtmp + bj[j]*16; u = ba + j*16; rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3]; rtmp_ptr[1] += uik[4]*u[0] + uik[5]*u[1] + uik[6]*u[2] + uik[7]*u[3]; rtmp_ptr[2] += uik[8]*u[0] + uik[9]*u[1] + uik[10]*u[2]+ uik[11]*u[3]; rtmp_ptr[3] += uik[12]*u[0]+ uik[13]*u[1]+ uik[14]*u[2]+ uik[15]*u[3]; rtmp_ptr[4] += uik[0]*u[4] + uik[1]*u[5] + uik[2]*u[6] + uik[3]*u[7]; rtmp_ptr[5] += uik[4]*u[4] + uik[5]*u[5] + uik[6]*u[6] + uik[7]*u[7]; rtmp_ptr[6] += uik[8]*u[4] + uik[9]*u[5] + uik[10]*u[6]+ uik[11]*u[7]; rtmp_ptr[7] += uik[12]*u[4]+ uik[13]*u[5]+ uik[14]*u[6]+ uik[15]*u[7]; rtmp_ptr[8] += uik[0]*u[8] + uik[1]*u[9] + uik[2]*u[10] + uik[3]*u[11]; rtmp_ptr[9] += uik[4]*u[8] + uik[5]*u[9] + uik[6]*u[10] + uik[7]*u[11]; rtmp_ptr[10]+= uik[8]*u[8] + uik[9]*u[9] + uik[10]*u[10]+ uik[11]*u[11]; rtmp_ptr[11]+= uik[12]*u[8]+ uik[13]*u[9]+ uik[14]*u[10]+ uik[15]*u[11]; rtmp_ptr[12]+= uik[0]*u[12] + uik[1]*u[13] + uik[2]*u[14] + uik[3]*u[15]; rtmp_ptr[13]+= uik[4]*u[12] + uik[5]*u[13] + uik[6]*u[14] + uik[7]*u[15]; rtmp_ptr[14]+= uik[8]*u[12] + uik[9]*u[13] + uik[10]*u[14]+ uik[11]*u[15]; rtmp_ptr[15]+= uik[12]*u[12]+ uik[13]*u[13]+ uik[14]*u[14]+ uik[15]*u[15]; } ierr = PetscLogFlops(2.0*64.0*(jmax-jmin));CHKERRQ(ierr); /* ... add i to row list for next nonzero entry */ il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; /* update jl */ } i = nexti; } /* save nonzero entries in k-th row of U ... */ /* invert diagonal block */ diag = ba+k*16; ierr = PetscMemcpy(diag,dk,16*sizeof(MatScalar));CHKERRQ(ierr); if (pivotinblocks) { ierr = PetscKernel_A_gets_inverse_A_4(diag,shift, allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; } else { ierr = PetscKernel_A_gets_inverse_A_4_nopivot(diag);CHKERRQ(ierr); } jmin = bi[k]; jmax = bi[k+1]; if (jmin < jmax) { for (j=jmin; j<jmax; j++) { vj = bj[j]; /* block col. index of U */ u = ba + j*16; rtmp_ptr = rtmp + vj*16; for (k1=0; k1<16; k1++) { *u++ = *rtmp_ptr; *rtmp_ptr++ = 0.0; } } /* ... add k to row list for first nonzero entry in k-th row */ il[k] = jmin; i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; } } ierr = PetscFree(rtmp);CHKERRQ(ierr); ierr = PetscFree2(il,jl);CHKERRQ(ierr); ierr = PetscFree2(dk,uik);CHKERRQ(ierr); if (a->permute) { ierr = PetscFree(aa);CHKERRQ(ierr); } ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); C->ops->solve = MatSolve_SeqSBAIJ_4_inplace; C->ops->solvetranspose = MatSolve_SeqSBAIJ_4_inplace; C->assembled = PETSC_TRUE; C->preallocated = PETSC_TRUE; ierr = PetscLogFlops(1.3333*64*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); }
int main(int argc,char **argv) { TS ts; /* time integrator */ TSAdapt adapt; Vec X; /* solution vector */ Mat J; /* Jacobian matrix */ PetscInt steps,maxsteps,ncells,xs,xm,i; PetscErrorCode ierr; PetscReal ftime,dt; char chemfile[PETSC_MAX_PATH_LEN] = "chem.inp",thermofile[PETSC_MAX_PATH_LEN] = "therm.dat"; struct _User user; TSConvergedReason reason; PetscBool showsolutions = PETSC_FALSE; char **snames,*names; Vec lambda; /* used with TSAdjoint for sensitivities */ ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Chemistry solver options","");CHKERRQ(ierr); ierr = PetscOptionsString("-chem","CHEMKIN input file","",chemfile,chemfile,sizeof(chemfile),NULL);CHKERRQ(ierr); ierr = PetscOptionsString("-thermo","NASA thermo input file","",thermofile,thermofile,sizeof(thermofile),NULL);CHKERRQ(ierr); user.pressure = 1.01325e5; /* Pascal */ ierr = PetscOptionsReal("-pressure","Pressure of reaction [Pa]","",user.pressure,&user.pressure,NULL);CHKERRQ(ierr); user.Tini = 1550; ierr = PetscOptionsReal("-Tini","Initial temperature [K]","",user.Tini,&user.Tini,NULL);CHKERRQ(ierr); user.diffus = 100; ierr = PetscOptionsReal("-diffus","Diffusion constant","",user.diffus,&user.diffus,NULL);CHKERRQ(ierr); ierr = PetscOptionsBool("-draw_solution","Plot the solution for each cell","",showsolutions,&showsolutions,NULL);CHKERRQ(ierr); user.diffusion = PETSC_TRUE; ierr = PetscOptionsBool("-diffusion","Have diffusion","",user.diffusion,&user.diffusion,NULL);CHKERRQ(ierr); user.reactions = PETSC_TRUE; ierr = PetscOptionsBool("-reactions","Have reactions","",user.reactions,&user.reactions,NULL);CHKERRQ(ierr); ierr = PetscOptionsEnd();CHKERRQ(ierr); ierr = TC_initChem(chemfile, thermofile, 0, 1.0);TCCHKERRQ(ierr); user.Nspec = TC_getNspec(); user.Nreac = TC_getNreac(); ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_PERIODIC,-1,user.Nspec+1,1,NULL,&user.dm);CHKERRQ(ierr); ierr = DMDAGetInfo(user.dm,NULL,&ncells,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); user.dx = 1.0/ncells; /* Set the coordinates of the cell centers; note final ghost cell is at x coordinate 1.0 */ ierr = DMDASetUniformCoordinates(user.dm,0.0,1.0,0.0,1.0,0.0,1.0);CHKERRQ(ierr); /* set the names of each field in the DMDA based on the species name */ ierr = PetscMalloc1((user.Nspec+1)*LENGTHOFSPECNAME,&names);CHKERRQ(ierr); ierr = PetscStrcpy(names,"Temp");CHKERRQ(ierr); TC_getSnames(user.Nspec,names+LENGTHOFSPECNAME);CHKERRQ(ierr); ierr = PetscMalloc1((user.Nspec+2),&snames);CHKERRQ(ierr); for (i=0; i<user.Nspec+1; i++) snames[i] = names+i*LENGTHOFSPECNAME; snames[user.Nspec+1] = NULL; ierr = DMDASetFieldNames(user.dm,(const char * const *)snames);CHKERRQ(ierr); ierr = PetscFree(snames);CHKERRQ(ierr); ierr = PetscFree(names);CHKERRQ(ierr); ierr = DMCreateMatrix(user.dm,&J);CHKERRQ(ierr); ierr = DMCreateGlobalVector(user.dm,&X);CHKERRQ(ierr); ierr = PetscMalloc3(user.Nspec+1,&user.tchemwork,PetscSqr(user.Nspec+1),&user.Jdense,user.Nspec+1,&user.rows);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create timestepping solver context - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSCreate(PETSC_COMM_WORLD,&ts);CHKERRQ(ierr); ierr = TSSetDM(ts,user.dm);CHKERRQ(ierr); ierr = TSSetType(ts,TSARKIMEX);CHKERRQ(ierr); ierr = TSARKIMEXSetFullyImplicit(ts,PETSC_TRUE);CHKERRQ(ierr); ierr = TSARKIMEXSetType(ts,TSARKIMEX4);CHKERRQ(ierr); ierr = TSSetRHSFunction(ts,NULL,FormRHSFunction,&user);CHKERRQ(ierr); ierr = TSSetRHSJacobian(ts,J,J,FormRHSJacobian,&user);CHKERRQ(ierr); ftime = 1.0; maxsteps = 10000; ierr = TSSetDuration(ts,maxsteps,ftime);CHKERRQ(ierr); ierr = TSSetExactFinalTime(ts,TS_EXACTFINALTIME_STEPOVER);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set initial conditions - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = FormInitialSolution(ts,X,&user);CHKERRQ(ierr); ierr = TSSetSolution(ts,X);CHKERRQ(ierr); dt = 1e-10; /* Initial time step */ ierr = TSSetInitialTimeStep(ts,0.0,dt);CHKERRQ(ierr); ierr = TSGetAdapt(ts,&adapt);CHKERRQ(ierr); ierr = TSAdaptSetStepLimits(adapt,1e-12,1e-4);CHKERRQ(ierr); /* Also available with -ts_adapt_dt_min/-ts_adapt_dt_max */ ierr = TSSetMaxSNESFailures(ts,-1);CHKERRQ(ierr); /* Retry step an unlimited number of times */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Pass information to graphical monitoring routine - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ if (showsolutions) { ierr = DMDAGetCorners(user.dm,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); for (i=xs;i<xs+xm;i++) { ierr = MonitorCell(ts,&user,i);CHKERRQ(ierr); } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set runtime options - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSSetFromOptions(ts);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set final conditions for sensitivities - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = DMCreateGlobalVector(user.dm,&lambda);CHKERRQ(ierr); ierr = TSSetCostGradients(ts,1,&lambda,NULL);CHKERRQ(ierr); ierr = VecSetValue(lambda,0,1.0,INSERT_VALUES);CHKERRQ(ierr); ierr = VecAssemblyBegin(lambda);CHKERRQ(ierr); ierr = VecAssemblyEnd(lambda);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Solve ODE - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = TSSolve(ts,X);CHKERRQ(ierr); ierr = TSGetSolveTime(ts,&ftime);CHKERRQ(ierr); ierr = TSGetTimeStepNumber(ts,&steps);CHKERRQ(ierr); ierr = TSGetConvergedReason(ts,&reason);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"%s at time %g after %D steps\n",TSConvergedReasons[reason],(double)ftime,steps);CHKERRQ(ierr); { Vec max; const char * const *names; PetscInt i; const PetscReal *bmax; ierr = TSMonitorEnvelopeGetBounds(ts,&max,NULL);CHKERRQ(ierr); if (max) { ierr = TSMonitorLGGetVariableNames(ts,&names);CHKERRQ(ierr); if (names) { ierr = VecGetArrayRead(max,&bmax);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF,"Species - maximum mass fraction\n");CHKERRQ(ierr); for (i=1; i<user.Nspec; i++) { if (bmax[i] > .01) {ierr = PetscPrintf(PETSC_COMM_SELF,"%s %g\n",names[i],bmax[i]);CHKERRQ(ierr);} } ierr = VecRestoreArrayRead(max,&bmax);CHKERRQ(ierr); } } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Free work space. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ TC_reset(); ierr = DMDestroy(&user.dm);CHKERRQ(ierr); ierr = MatDestroy(&J);CHKERRQ(ierr); ierr = VecDestroy(&X);CHKERRQ(ierr); ierr = VecDestroy(&lambda);CHKERRQ(ierr); ierr = TSDestroy(&ts);CHKERRQ(ierr); ierr = PetscFree3(user.tchemwork,user.Jdense,user.rows);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
PetscErrorCode PCGAMGCreateGraph(const Mat Amat, Mat *a_Gmat) { PetscErrorCode ierr; PetscInt Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs; PetscMPIInt rank, size; MPI_Comm comm; Mat Gmat; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MatGetOwnershipRange(Amat, &Istart, &Iend);CHKERRQ(ierr); ierr = MatGetSize(Amat, &MM, &NN);CHKERRQ(ierr); ierr = MatGetBlockSize(Amat, &bs);CHKERRQ(ierr); nloc = (Iend-Istart)/bs; #if defined PETSC_GAMG_USE_LOG ierr = PetscLogEventBegin(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr); #endif if (bs > 1) { const PetscScalar *vals; const PetscInt *idx; PetscInt *d_nnz, *o_nnz; /* count nnz, there is sparcity in here so this might not be enough */ ierr = PetscMalloc1(nloc, &d_nnz);CHKERRQ(ierr); ierr = PetscMalloc1(nloc, &o_nnz);CHKERRQ(ierr); for (Ii = Istart, jj = 0; Ii < Iend; Ii += bs, jj++) { d_nnz[jj] = 0; for (kk=0; kk<bs; kk++) { ierr = MatGetRow(Amat,Ii+kk,&ncols,0,0);CHKERRQ(ierr); if (ncols > d_nnz[jj]) { d_nnz[jj] = ncols; /* very pessimistic but could be too low in theory */ o_nnz[jj] = ncols; if (d_nnz[jj] > nloc) d_nnz[jj] = nloc; if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc; } ierr = MatRestoreRow(Amat,Ii+kk,&ncols,0,0);CHKERRQ(ierr); } } /* get scalar copy (norms) of matrix -- AIJ specific!!! */ ierr = MatCreateAIJ(comm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE,0, d_nnz, 0, o_nnz, &Gmat);CHKERRQ(ierr); ierr = PetscFree(d_nnz);CHKERRQ(ierr); ierr = PetscFree(o_nnz);CHKERRQ(ierr); for (Ii = Istart; Ii < Iend; Ii++) { PetscInt dest_row = Ii/bs; ierr = MatGetRow(Amat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr); for (jj=0; jj<ncols; jj++) { PetscInt dest_col = idx[jj]/bs; PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); ierr = MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES);CHKERRQ(ierr); } ierr = MatRestoreRow(Amat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr); } ierr = MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } else { /* just copy scalar matrix - abs() not taken here but scaled later */ ierr = MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat);CHKERRQ(ierr); } #if defined PETSC_GAMG_USE_LOG ierr = PetscLogEventEnd(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr); #endif *a_Gmat = Gmat; PetscFunctionReturn(0); }
int main(int argc,char **argv) { PetscErrorCode ierr; PetscInt *indices,n; const PetscInt *nindices; PetscMPIInt rank; IS is; ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); /* Create an index set with 5 entries. Each processor creates its own index set with its own list of integers. */ ierr = PetscMalloc1(5,&indices);CHKERRQ(ierr); indices[0] = rank + 1; indices[1] = rank + 2; indices[2] = rank + 3; indices[3] = rank + 4; indices[4] = rank + 5; ierr = ISCreateGeneral(PETSC_COMM_SELF,5,indices,PETSC_COPY_VALUES,&is);CHKERRQ(ierr); /* Note that ISCreateGeneral() has made a copy of the indices so we may (and generally should) free indices[] */ ierr = PetscFree(indices);CHKERRQ(ierr); /* Print the index set to stdout */ ierr = ISView(is,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); /* Get the number of indices in the set */ ierr = ISGetLocalSize(is,&n);CHKERRQ(ierr); /* Get the indices in the index set */ ierr = ISGetIndices(is,&nindices);CHKERRQ(ierr); /* Now any code that needs access to the list of integers has access to it here through indices[]. */ ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] First index %D\n",rank,nindices[0]);CHKERRQ(ierr); /* Once we no longer need access to the indices they should returned to the system */ ierr = ISRestoreIndices(is,&nindices);CHKERRQ(ierr); /* One should destroy any PETSc object once one is completely done with it. */ ierr = ISDestroy(&is);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
/*@C PetscBinaryRead - Reads from a binary file. Not Collective Input Parameters: + fd - the file descriptor . num - the maximum number of items to read - type - the type of items to read (PETSC_INT, PETSC_REAL, PETSC_SCALAR, etc.) Output Parameters: + data - the buffer - count - the number of items read, optional Level: developer Notes: If count is not provided and the number of items read is less than the maximum number of items to read, then this routine errors. PetscBinaryRead() uses byte swapping to work on all machines; the files are written to file ALWAYS using big-endian ordering. On small-endian machines the numbers are converted to the small-endian format when they are read in from the file. When PETSc is ./configure with --with-64bit-indices the integers are written to the file as 64 bit integers, this means they can only be read back in when the option --with-64bit-indices is used. Concepts: files^reading binary Concepts: binary files^reading .seealso: PetscBinaryWrite(), PetscBinaryOpen(), PetscBinaryClose(), PetscViewerBinaryGetDescriptor(), PetscBinarySynchronizedWrite(), PetscBinarySynchronizedRead(), PetscBinarySynchronizedSeek() @*/ PetscErrorCode PetscBinaryRead(int fd,void *data,PetscInt num,PetscInt *count,PetscDataType type) { size_t typesize, m = (size_t) num, n = 0, maxblock = 65536; char *p = (char*)data; #if defined(PETSC_USE_REAL___FLOAT128) PetscBool readdouble = PETSC_FALSE; double *pdouble; #endif void *ptmp = data; char *fname = NULL; PetscErrorCode ierr; PetscFunctionBegin; if (count) *count = 0; if (num < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Trying to read a negative amount of data %D",num); if (!num) PetscFunctionReturn(0); if (type == PETSC_FUNCTION) { m = 64; type = PETSC_CHAR; fname = (char*)malloc(m*sizeof(char)); p = (char*)fname; ptmp = (void*)fname; if (!fname) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"Cannot allocate space for function name"); } if (type == PETSC_BIT_LOGICAL) m = PetscBTLength(m); ierr = PetscDataTypeGetSize(type,&typesize);CHKERRQ(ierr); #if defined(PETSC_USE_REAL___FLOAT128) ierr = PetscOptionsGetBool(NULL,NULL,"-binary_read_double",&readdouble,NULL);CHKERRQ(ierr); /* If using __float128 precision we still read in doubles from file */ if ((type == PETSC_REAL || type == PETSC_COMPLEX) && readdouble) { PetscInt cnt = num * ((type == PETSC_REAL) ? 1 : 2); ierr = PetscMalloc1(cnt,&pdouble);CHKERRQ(ierr); p = (char*)pdouble; typesize /= 2; } #endif m *= typesize; while (m) { size_t len = (m < maxblock) ? m : maxblock; int ret = (int)read(fd,p,len); if (ret < 0 && errno == EINTR) continue; if (!ret && len > 0) break; /* Proxy for EOF */ if (ret < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_READ,"Error reading from file, errno %d",errno); m -= ret; p += ret; n += ret; } if (m && !count) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_READ,"Read past end of file"); num = (PetscInt)(n/typesize); /* Should we require `n % typesize == 0` ? */ if (count) *count = num; /* TODO: This is most likely wrong for PETSC_BIT_LOGICAL */ #if defined(PETSC_USE_REAL___FLOAT128) if ((type == PETSC_REAL || type == PETSC_COMPLEX) && readdouble) { PetscInt i, cnt = num * ((type == PETSC_REAL) ? 1 : 2); PetscReal *preal = (PetscReal*)data; if (!PetscBinaryBigEndian()) {ierr = PetscByteSwapDouble(pdouble,cnt);CHKERRQ(ierr);} for (i=0; i<cnt; i++) preal[i] = pdouble[i]; ierr = PetscFree(pdouble);CHKERRQ(ierr); PetscFunctionReturn(0); } #endif if (!PetscBinaryBigEndian()) {ierr = PetscByteSwap(ptmp,type,num);CHKERRQ(ierr);} if (type == PETSC_FUNCTION) { #if defined(PETSC_SERIALIZE_FUNCTIONS) ierr = PetscDLSym(NULL,fname,(void**)data);CHKERRQ(ierr); #else *(void**)data = NULL; #endif free(fname); } PetscFunctionReturn(0); }
PetscErrorCode PCBDDCNullSpaceAssembleCorrection(PC pc, PetscBool isdir, IS local_dofs) { PC_BDDC *pcbddc = (PC_BDDC*)pc->data; PC_IS *pcis = (PC_IS*)pc->data; Mat_IS* matis = (Mat_IS*)pc->pmat->data; KSP local_ksp; PC newpc; NullSpaceCorrection_ctx shell_ctx; Mat local_mat,local_pmat,small_mat,inv_small_mat; Vec work1,work2; const Vec *nullvecs; VecScatter scatter_ctx; IS is_aux; MatFactorInfo matinfo; PetscScalar *basis_mat,*Kbasis_mat,*array,*array_mat; PetscScalar one = 1.0,zero = 0.0, m_one = -1.0; PetscInt basis_dofs,basis_size,nnsp_size,i,k; PetscBool nnsp_has_cnst; PetscErrorCode ierr; PetscFunctionBegin; /* Infer the local solver */ ierr = ISGetSize(local_dofs,&basis_dofs);CHKERRQ(ierr); if (isdir) { /* Dirichlet solver */ local_ksp = pcbddc->ksp_D; } else { /* Neumann solver */ local_ksp = pcbddc->ksp_R; } ierr = KSPGetOperators(local_ksp,&local_mat,&local_pmat);CHKERRQ(ierr); /* Get null space vecs */ ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nnsp_has_cnst,&nnsp_size,&nullvecs);CHKERRQ(ierr); basis_size = nnsp_size; if (nnsp_has_cnst) { basis_size++; } if (basis_dofs) { /* Create shell ctx */ ierr = PetscNew(&shell_ctx);CHKERRQ(ierr); /* Create work vectors in shell context */ ierr = VecCreate(PETSC_COMM_SELF,&shell_ctx->work_small_1);CHKERRQ(ierr); ierr = VecSetSizes(shell_ctx->work_small_1,basis_size,basis_size);CHKERRQ(ierr); ierr = VecSetType(shell_ctx->work_small_1,VECSEQ);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_small_1,&shell_ctx->work_small_2);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_SELF,&shell_ctx->work_full_1);CHKERRQ(ierr); ierr = VecSetSizes(shell_ctx->work_full_1,basis_dofs,basis_dofs);CHKERRQ(ierr); ierr = VecSetType(shell_ctx->work_full_1,VECSEQ);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_full_1,&shell_ctx->work_full_2);CHKERRQ(ierr); /* Allocate workspace */ ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_dofs,basis_size,NULL,&shell_ctx->basis_mat );CHKERRQ(ierr); ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_dofs,basis_size,NULL,&shell_ctx->Kbasis_mat);CHKERRQ(ierr); ierr = MatDenseGetArray(shell_ctx->basis_mat,&basis_mat);CHKERRQ(ierr); ierr = MatDenseGetArray(shell_ctx->Kbasis_mat,&Kbasis_mat);CHKERRQ(ierr); /* Restrict local null space on selected dofs (Dirichlet or Neumann) and compute matrices N and K*N */ ierr = VecDuplicate(shell_ctx->work_full_1,&work1);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_full_1,&work2);CHKERRQ(ierr); ierr = VecScatterCreate(pcis->vec1_N,local_dofs,work1,(IS)0,&scatter_ctx);CHKERRQ(ierr); } for (k=0;k<nnsp_size;k++) { ierr = VecScatterBegin(matis->rctx,nullvecs[k],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(matis->rctx,nullvecs[k],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); if (basis_dofs) { ierr = VecPlaceArray(work1,(const PetscScalar*)&basis_mat[k*basis_dofs]);CHKERRQ(ierr); ierr = VecScatterBegin(scatter_ctx,pcis->vec1_N,work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(scatter_ctx,pcis->vec1_N,work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecPlaceArray(work2,(const PetscScalar*)&Kbasis_mat[k*basis_dofs]);CHKERRQ(ierr); ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); ierr = VecResetArray(work1);CHKERRQ(ierr); ierr = VecResetArray(work2);CHKERRQ(ierr); } } if (basis_dofs) { if (nnsp_has_cnst) { ierr = VecPlaceArray(work1,(const PetscScalar*)&basis_mat[k*basis_dofs]);CHKERRQ(ierr); ierr = VecSet(work1,one);CHKERRQ(ierr); ierr = VecPlaceArray(work2,(const PetscScalar*)&Kbasis_mat[k*basis_dofs]);CHKERRQ(ierr); ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); ierr = VecResetArray(work1);CHKERRQ(ierr); ierr = VecResetArray(work2);CHKERRQ(ierr); } ierr = VecDestroy(&work1);CHKERRQ(ierr); ierr = VecDestroy(&work2);CHKERRQ(ierr); ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); ierr = MatDenseRestoreArray(shell_ctx->basis_mat,&basis_mat);CHKERRQ(ierr); ierr = MatDenseRestoreArray(shell_ctx->Kbasis_mat,&Kbasis_mat);CHKERRQ(ierr); /* Assemble another Mat object in shell context */ ierr = MatTransposeMatMult(shell_ctx->basis_mat,shell_ctx->Kbasis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&small_mat);CHKERRQ(ierr); ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,basis_size,0,1,&is_aux);CHKERRQ(ierr); ierr = MatLUFactor(small_mat,is_aux,is_aux,&matinfo);CHKERRQ(ierr); ierr = ISDestroy(&is_aux);CHKERRQ(ierr); ierr = PetscMalloc1(basis_size*basis_size,&array_mat);CHKERRQ(ierr); for (k=0;k<basis_size;k++) { ierr = VecSet(shell_ctx->work_small_1,zero);CHKERRQ(ierr); ierr = VecSetValue(shell_ctx->work_small_1,k,one,INSERT_VALUES);CHKERRQ(ierr); ierr = VecAssemblyBegin(shell_ctx->work_small_1);CHKERRQ(ierr); ierr = VecAssemblyEnd(shell_ctx->work_small_1);CHKERRQ(ierr); ierr = MatSolve(small_mat,shell_ctx->work_small_1,shell_ctx->work_small_2);CHKERRQ(ierr); ierr = VecGetArrayRead(shell_ctx->work_small_2,(const PetscScalar**)&array);CHKERRQ(ierr); for (i=0;i<basis_size;i++) { array_mat[i*basis_size+k]=array[i]; } ierr = VecRestoreArrayRead(shell_ctx->work_small_2,(const PetscScalar**)&array);CHKERRQ(ierr); } ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_size,basis_size,array_mat,&inv_small_mat);CHKERRQ(ierr); ierr = MatMatMult(shell_ctx->basis_mat,inv_small_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&shell_ctx->Lbasis_mat);CHKERRQ(ierr); ierr = PetscFree(array_mat);CHKERRQ(ierr); ierr = MatDestroy(&inv_small_mat);CHKERRQ(ierr); ierr = MatDestroy(&small_mat);CHKERRQ(ierr); ierr = MatScale(shell_ctx->Kbasis_mat,m_one);CHKERRQ(ierr); /* Rebuild local PC */ ierr = KSPGetPC(local_ksp,&shell_ctx->local_pc);CHKERRQ(ierr); ierr = PetscObjectReference((PetscObject)shell_ctx->local_pc);CHKERRQ(ierr); ierr = PCCreate(PETSC_COMM_SELF,&newpc);CHKERRQ(ierr); ierr = PCSetOperators(newpc,local_mat,local_mat);CHKERRQ(ierr); ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); ierr = PCShellSetContext(newpc,shell_ctx);CHKERRQ(ierr); ierr = PCShellSetApply(newpc,PCBDDCApplyNullSpaceCorrectionPC);CHKERRQ(ierr); ierr = PCShellSetDestroy(newpc,PCBDDCDestroyNullSpaceCorrectionPC);CHKERRQ(ierr); ierr = PCSetUp(newpc);CHKERRQ(ierr); ierr = KSPSetPC(local_ksp,newpc);CHKERRQ(ierr); ierr = PCDestroy(&newpc);CHKERRQ(ierr); ierr = KSPSetUp(local_ksp);CHKERRQ(ierr); } /* test */ if (pcbddc->dbg_flag && basis_dofs) { KSP check_ksp; PC check_pc; Mat test_mat; Vec work3; PetscReal test_err,lambda_min,lambda_max; PetscBool setsym,issym=PETSC_FALSE; PetscInt tabs; ierr = PetscViewerASCIIGetTab(pcbddc->dbg_viewer,&tabs);CHKERRQ(ierr); ierr = KSPGetPC(local_ksp,&check_pc);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_full_1,&work1);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_full_1,&work2);CHKERRQ(ierr); ierr = VecDuplicate(shell_ctx->work_full_1,&work3);CHKERRQ(ierr); ierr = VecSetRandom(shell_ctx->work_small_1,NULL);CHKERRQ(ierr); ierr = MatMult(shell_ctx->basis_mat,shell_ctx->work_small_1,work1);CHKERRQ(ierr); ierr = VecCopy(work1,work2);CHKERRQ(ierr); ierr = MatMult(local_mat,work1,work3);CHKERRQ(ierr); ierr = PCApply(check_pc,work3,work1);CHKERRQ(ierr); ierr = VecAXPY(work1,m_one,work2);CHKERRQ(ierr); ierr = VecNorm(work1,NORM_INFINITY,&test_err);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d error for nullspace correction for ",PetscGlobalRank);CHKERRQ(ierr); ierr = PetscViewerASCIIUseTabs(pcbddc->dbg_viewer,PETSC_FALSE);CHKERRQ(ierr); if (isdir) { ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Dirichlet ");CHKERRQ(ierr); } else { ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Neumann ");CHKERRQ(ierr); } ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"solver is :%1.14e\n",test_err);CHKERRQ(ierr); ierr = PetscViewerASCIISetTab(pcbddc->dbg_viewer,tabs);CHKERRQ(ierr); ierr = PetscViewerASCIIUseTabs(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); ierr = MatTransposeMatMult(shell_ctx->Lbasis_mat,shell_ctx->Kbasis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&test_mat);CHKERRQ(ierr); ierr = MatShift(test_mat,one);CHKERRQ(ierr); ierr = MatNorm(test_mat,NORM_INFINITY,&test_err);CHKERRQ(ierr); ierr = MatDestroy(&test_mat);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d error for nullspace matrices is :%1.14e\n",PetscGlobalRank,test_err);CHKERRQ(ierr); /* Create ksp object suitable for extreme eigenvalues' estimation */ ierr = KSPCreate(PETSC_COMM_SELF,&check_ksp);CHKERRQ(ierr); ierr = KSPSetErrorIfNotConverged(check_ksp,pc->erroriffailure);CHKERRQ(ierr); ierr = KSPSetOperators(check_ksp,local_mat,local_mat);CHKERRQ(ierr); ierr = KSPSetTolerances(check_ksp,1.e-8,1.e-8,PETSC_DEFAULT,basis_dofs);CHKERRQ(ierr); ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); if (issym) { ierr = KSPSetType(check_ksp,KSPCG);CHKERRQ(ierr); } ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); ierr = VecSetRandom(work1,NULL);CHKERRQ(ierr); ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); ierr = KSPSolve(check_ksp,work2,work2);CHKERRQ(ierr); ierr = VecAXPY(work2,m_one,work1);CHKERRQ(ierr); ierr = VecNorm(work2,NORM_INFINITY,&test_err);CHKERRQ(ierr); ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d error for adapted KSP %1.14e (it %d, eigs %1.6e %1.6e)\n",PetscGlobalRank,test_err,k,lambda_min,lambda_max);CHKERRQ(ierr); ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); ierr = VecDestroy(&work1);CHKERRQ(ierr); ierr = VecDestroy(&work2);CHKERRQ(ierr); ierr = VecDestroy(&work3);CHKERRQ(ierr); } /* all processes shoud call this, even the void ones */ if (pcbddc->dbg_flag) { ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode PCBDDCNullSpaceAssembleCoarse(PC pc, Mat coarse_mat, MatNullSpace* CoarseNullSpace) { PC_BDDC *pcbddc = (PC_BDDC*)pc->data; Mat_IS *matis = (Mat_IS*)pc->pmat->data; MatNullSpace tempCoarseNullSpace=NULL; const Vec *nsp_vecs; Vec *coarse_nsp_vecs,local_vec,local_primal_vec,wcoarse_vec,wcoarse_rhs; PetscInt nsp_size,coarse_nsp_size,i; PetscBool nsp_has_cnst; PetscReal test_null; PetscErrorCode ierr; PetscFunctionBegin; tempCoarseNullSpace = 0; coarse_nsp_size = 0; coarse_nsp_vecs = 0; ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nsp_has_cnst,&nsp_size,&nsp_vecs);CHKERRQ(ierr); if (coarse_mat) { ierr = PetscMalloc1(nsp_size+1,&coarse_nsp_vecs);CHKERRQ(ierr); for (i=0;i<nsp_size+1;i++) { ierr = MatCreateVecs(coarse_mat,&coarse_nsp_vecs[i],NULL);CHKERRQ(ierr); } if (pcbddc->dbg_flag) { ierr = MatCreateVecs(coarse_mat,&wcoarse_vec,&wcoarse_rhs);CHKERRQ(ierr); } } ierr = MatCreateVecs(pcbddc->ConstraintMatrix,&local_vec,&local_primal_vec);CHKERRQ(ierr); if (nsp_has_cnst) { ierr = VecSet(local_vec,1.0);CHKERRQ(ierr); ierr = MatMult(pcbddc->ConstraintMatrix,local_vec,local_primal_vec);CHKERRQ(ierr); ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,local_primal_vec,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,local_primal_vec,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); if (coarse_mat) { PetscScalar *array_out; const PetscScalar *array_in; PetscInt lsize; if (pcbddc->dbg_flag) { PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); ierr = MatMult(coarse_mat,wcoarse_vec,wcoarse_rhs);CHKERRQ(ierr); ierr = VecNorm(wcoarse_rhs,NORM_INFINITY,&test_null);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(dbg_viewer,"Constant coarse null space error % 1.14e\n",test_null);CHKERRQ(ierr); ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); } ierr = VecGetLocalSize(pcbddc->coarse_vec,&lsize);CHKERRQ(ierr); ierr = VecGetArrayRead(pcbddc->coarse_vec,&array_in);CHKERRQ(ierr); ierr = VecGetArray(coarse_nsp_vecs[coarse_nsp_size],&array_out);CHKERRQ(ierr); ierr = PetscMemcpy(array_out,array_in,lsize*sizeof(PetscScalar));CHKERRQ(ierr); ierr = VecRestoreArray(coarse_nsp_vecs[coarse_nsp_size],&array_out);CHKERRQ(ierr); ierr = VecRestoreArrayRead(pcbddc->coarse_vec,&array_in);CHKERRQ(ierr); coarse_nsp_size++; } } for (i=0;i<nsp_size;i++) { ierr = VecScatterBegin(matis->rctx,nsp_vecs[i],local_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(matis->rctx,nsp_vecs[i],local_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = MatMult(pcbddc->ConstraintMatrix,local_vec,local_primal_vec);CHKERRQ(ierr); ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,local_primal_vec,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,local_primal_vec,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); if (coarse_mat) { PetscScalar *array_out; const PetscScalar *array_in; PetscInt lsize; if (pcbddc->dbg_flag) { PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); ierr = MatMult(coarse_mat,wcoarse_vec,wcoarse_rhs);CHKERRQ(ierr); ierr = VecNorm(wcoarse_rhs,NORM_2,&test_null);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(dbg_viewer,"Vec %d coarse null space error % 1.14e\n",i,test_null);CHKERRQ(ierr); ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); } ierr = VecGetLocalSize(pcbddc->coarse_vec,&lsize);CHKERRQ(ierr); ierr = VecGetArrayRead(pcbddc->coarse_vec,&array_in);CHKERRQ(ierr); ierr = VecGetArray(coarse_nsp_vecs[coarse_nsp_size],&array_out);CHKERRQ(ierr); ierr = PetscMemcpy(array_out,array_in,lsize*sizeof(PetscScalar));CHKERRQ(ierr); ierr = VecRestoreArray(coarse_nsp_vecs[coarse_nsp_size],&array_out);CHKERRQ(ierr); ierr = VecRestoreArrayRead(pcbddc->coarse_vec,&array_in);CHKERRQ(ierr); coarse_nsp_size++; } } if (coarse_nsp_size > 0) { ierr = PCBDDCOrthonormalizeVecs(coarse_nsp_size,coarse_nsp_vecs);CHKERRQ(ierr); ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,coarse_nsp_size,coarse_nsp_vecs,&tempCoarseNullSpace);CHKERRQ(ierr); for (i=0;i<nsp_size+1;i++) { ierr = VecDestroy(&coarse_nsp_vecs[i]);CHKERRQ(ierr); } } if (coarse_mat) { ierr = PetscFree(coarse_nsp_vecs);CHKERRQ(ierr); if (pcbddc->dbg_flag) { ierr = VecDestroy(&wcoarse_vec);CHKERRQ(ierr); ierr = VecDestroy(&wcoarse_rhs);CHKERRQ(ierr); } } ierr = VecDestroy(&local_vec);CHKERRQ(ierr); ierr = VecDestroy(&local_primal_vec);CHKERRQ(ierr); *CoarseNullSpace = tempCoarseNullSpace; PetscFunctionReturn(0); }
/* PEPBuildDiagonalScaling - compute two diagonal matrices to be applied for balancing in polynomial eigenproblems. */ PetscErrorCode PEPBuildDiagonalScaling(PEP pep) { PetscErrorCode ierr; PetscInt it,i,j,k,nmat,nr,e,nz,lst,lend,nc=0,*cols,emax,emin,emaxl,eminl; const PetscInt *cidx,*ridx; Mat M,*T,A; PetscMPIInt n; PetscBool cont=PETSC_TRUE,flg=PETSC_FALSE; PetscScalar *array,*Dr,*Dl,t; PetscReal l2,d,*rsum,*aux,*csum,w=1.0; MatStructure str; MatInfo info; PetscFunctionBegin; l2 = 2*PetscLogReal(2.0); nmat = pep->nmat; ierr = PetscMPIIntCast(pep->n,&n); ierr = STGetMatStructure(pep->st,&str);CHKERRQ(ierr); ierr = PetscMalloc1(nmat,&T);CHKERRQ(ierr); for (k=0;k<nmat;k++) { ierr = STGetTOperators(pep->st,k,&T[k]);CHKERRQ(ierr); } /* Form local auxiliar matrix M */ ierr = PetscObjectTypeCompareAny((PetscObject)T[0],&cont,MATMPIAIJ,MATSEQAIJ);CHKERRQ(ierr); if (!cont) SETERRQ(PetscObjectComm((PetscObject)T[0]),PETSC_ERR_SUP,"Only for MPIAIJ or SEQAIJ matrix types"); ierr = PetscObjectTypeCompare((PetscObject)T[0],MATMPIAIJ,&cont);CHKERRQ(ierr); if (cont) { ierr = MatMPIAIJGetLocalMat(T[0],MAT_INITIAL_MATRIX,&M);CHKERRQ(ierr); flg = PETSC_TRUE; } else { ierr = MatDuplicate(T[0],MAT_COPY_VALUES,&M);CHKERRQ(ierr); } ierr = MatGetInfo(M,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (i=0;i<nz;i++) { t = PetscAbsScalar(array[i]); array[i] = t*t; } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); for (k=1;k<nmat;k++) { if (flg) { ierr = MatMPIAIJGetLocalMat(T[k],MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); } else { if (str==SAME_NONZERO_PATTERN) { ierr = MatCopy(T[k],A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); } else { ierr = MatDuplicate(T[k],MAT_COPY_VALUES,&A);CHKERRQ(ierr); } } ierr = MatGetInfo(A,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = MatSeqAIJGetArray(A,&array);CHKERRQ(ierr); for (i=0;i<nz;i++) { t = PetscAbsScalar(array[i]); array[i] = t*t; } ierr = MatSeqAIJRestoreArray(A,&array);CHKERRQ(ierr); w *= pep->slambda*pep->slambda*pep->sfactor; ierr = MatAXPY(M,w,A,str);CHKERRQ(ierr); if (flg || str!=SAME_NONZERO_PATTERN || k==nmat-2) { ierr = MatDestroy(&A);CHKERRQ(ierr); } } ierr = MatGetRowIJ(M,0,PETSC_FALSE,PETSC_FALSE,&nr,&ridx,&cidx,&cont);CHKERRQ(ierr); if (!cont) SETERRQ(PetscObjectComm((PetscObject)T[0]), PETSC_ERR_SUP,"It is not possible to compute scaling diagonals for these PEP matrices"); ierr = MatGetInfo(M,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = VecGetOwnershipRange(pep->Dl,&lst,&lend);CHKERRQ(ierr); ierr = PetscMalloc4(nr,&rsum,pep->n,&csum,pep->n,&aux,PetscMin(pep->n-lend+lst,nz),&cols);CHKERRQ(ierr); ierr = VecSet(pep->Dr,1.0);CHKERRQ(ierr); ierr = VecSet(pep->Dl,1.0);CHKERRQ(ierr); ierr = VecGetArray(pep->Dl,&Dl);CHKERRQ(ierr); ierr = VecGetArray(pep->Dr,&Dr);CHKERRQ(ierr); ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); ierr = PetscMemzero(aux,pep->n*sizeof(PetscReal));CHKERRQ(ierr); for (j=0;j<nz;j++) { /* Search non-zero columns outsize lst-lend */ if (aux[cidx[j]]==0 && (cidx[j]<lst || lend<=cidx[j])) cols[nc++] = cidx[j]; /* Local column sums */ aux[cidx[j]] += PetscAbsScalar(array[j]); } for (it=0;it<pep->sits && cont;it++) { emaxl = 0; eminl = 0; /* Column sum */ if (it>0) { /* it=0 has been already done*/ ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); ierr = PetscMemzero(aux,pep->n*sizeof(PetscReal));CHKERRQ(ierr); for (j=0;j<nz;j++) aux[cidx[j]] += PetscAbsScalar(array[j]); ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); } ierr = MPI_Allreduce(aux,csum,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)pep->Dr)); /* Update Dr */ for (j=lst;j<lend;j++) { d = PetscLogReal(csum[j])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); Dr[j-lst] *= d; aux[j] = d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } for (j=0;j<nc;j++) { d = PetscLogReal(csum[cols[j]])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); aux[cols[j]] = d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } /* Scale M */ ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (j=0;j<nz;j++) { array[j] *= aux[cidx[j]]; } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); /* Row sum */ ierr = PetscMemzero(rsum,nr*sizeof(PetscReal));CHKERRQ(ierr); ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (i=0;i<nr;i++) { for (j=ridx[i];j<ridx[i+1];j++) rsum[i] += PetscAbsScalar(array[j]); /* Update Dl */ d = PetscLogReal(rsum[i])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); Dl[i] *= d; /* Scale M */ for (j=ridx[i];j<ridx[i+1];j++) array[j] *= d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); /* Compute global max and min */ ierr = MPI_Allreduce(&emaxl,&emax,1,MPIU_INT,MPIU_MAX,PetscObjectComm((PetscObject)pep->Dl)); ierr = MPI_Allreduce(&eminl,&emin,1,MPIU_INT,MPIU_MIN,PetscObjectComm((PetscObject)pep->Dl)); if (emax<=emin+2) cont = PETSC_FALSE; } ierr = VecRestoreArray(pep->Dr,&Dr);CHKERRQ(ierr); ierr = VecRestoreArray(pep->Dl,&Dl);CHKERRQ(ierr); /* Free memory*/ ierr = MatDestroy(&M);CHKERRQ(ierr); ierr = PetscFree4(rsum,csum,aux,cols);CHKERRQ(ierr); ierr = PetscFree(T);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatISGetMPIXAIJ_IS(Mat mat, MatReuse reuse, Mat *M) { Mat_IS *matis = (Mat_IS*)(mat->data); Mat local_mat; /* info on mat */ PetscInt bs,rows,cols,lrows,lcols; PetscInt local_rows,local_cols; PetscBool isdense,issbaij,isseqaij; PetscMPIInt nsubdomains; /* values insertion */ PetscScalar *array; /* work */ PetscErrorCode ierr; PetscFunctionBegin; /* get info from mat */ ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); if (nsubdomains == 1) { if (reuse == MAT_INITIAL_MATRIX) { ierr = MatDuplicate(matis->A,MAT_COPY_VALUES,&(*M));CHKERRQ(ierr); } else { ierr = MatCopy(matis->A,*M,SAME_NONZERO_PATTERN);CHKERRQ(ierr); } PetscFunctionReturn(0); } ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); ierr = MatGetLocalSize(mat,&lrows,&lcols);CHKERRQ(ierr); ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); if (reuse == MAT_INITIAL_MATRIX) { MatType new_mat_type; PetscBool issbaij_red; /* determining new matrix type */ ierr = MPIU_Allreduce(&issbaij,&issbaij_red,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); if (issbaij_red) { new_mat_type = MATSBAIJ; } else { if (bs>1) { new_mat_type = MATBAIJ; } else { new_mat_type = MATAIJ; } } ierr = MatCreate(PetscObjectComm((PetscObject)mat),M);CHKERRQ(ierr); ierr = MatSetSizes(*M,lrows,lcols,rows,cols);CHKERRQ(ierr); ierr = MatSetBlockSize(*M,bs);CHKERRQ(ierr); ierr = MatSetType(*M,new_mat_type);CHKERRQ(ierr); ierr = MatISSetMPIXAIJPreallocation_Private(mat,*M,PETSC_FALSE);CHKERRQ(ierr); } else { PetscInt mbs,mrows,mcols,mlrows,mlcols; /* some checks */ ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); ierr = MatGetLocalSize(*M,&mlrows,&mlcols);CHKERRQ(ierr); if (mrows != rows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); if (mcols != cols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); if (mlrows != lrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of local rows (%d != %d)",lrows,mlrows); if (mlcols != lcols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of local cols (%d != %d)",lcols,mlcols); if (mbs != bs) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); ierr = MatZeroEntries(*M);CHKERRQ(ierr); } if (issbaij) { ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&local_mat);CHKERRQ(ierr); } else { ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); local_mat = matis->A; } /* Set values */ ierr = MatSetLocalToGlobalMapping(*M,mat->rmap->mapping,mat->cmap->mapping);CHKERRQ(ierr); if (isdense) { /* special case for dense local matrices */ PetscInt i,*dummy_rows,*dummy_cols; if (local_rows != local_cols) { ierr = PetscMalloc2(local_rows,&dummy_rows,local_cols,&dummy_cols);CHKERRQ(ierr); for (i=0;i<local_rows;i++) dummy_rows[i] = i; for (i=0;i<local_cols;i++) dummy_cols[i] = i; } else { ierr = PetscMalloc1(local_rows,&dummy_rows);CHKERRQ(ierr); for (i=0;i<local_rows;i++) dummy_rows[i] = i; dummy_cols = dummy_rows; } ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); ierr = MatDenseGetArray(local_mat,&array);CHKERRQ(ierr); ierr = MatSetValuesLocal(*M,local_rows,dummy_rows,local_cols,dummy_cols,array,ADD_VALUES);CHKERRQ(ierr); ierr = MatDenseRestoreArray(local_mat,&array);CHKERRQ(ierr); if (dummy_rows != dummy_cols) { ierr = PetscFree2(dummy_rows,dummy_cols);CHKERRQ(ierr); } else { ierr = PetscFree(dummy_rows);CHKERRQ(ierr); } } else if (isseqaij) { PetscInt i,nvtxs,*xadj,*adjncy; PetscBool done; ierr = MatGetRowIJ(local_mat,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); if (!done) SETERRQ1(PetscObjectComm((PetscObject)local_mat),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); ierr = MatSeqAIJGetArray(local_mat,&array);CHKERRQ(ierr); for (i=0;i<nvtxs;i++) { ierr = MatSetValuesLocal(*M,1,&i,xadj[i+1]-xadj[i],adjncy+xadj[i],array+xadj[i],ADD_VALUES);CHKERRQ(ierr); } ierr = MatRestoreRowIJ(local_mat,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); if (!done) SETERRQ1(PetscObjectComm((PetscObject)local_mat),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); ierr = MatSeqAIJRestoreArray(local_mat,&array);CHKERRQ(ierr); } else { /* very basic values insertion for all other matrix types */ PetscInt i; for (i=0;i<local_rows;i++) { PetscInt j; const PetscInt *local_indices_cols; ierr = MatGetRow(local_mat,i,&j,&local_indices_cols,(const PetscScalar**)&array);CHKERRQ(ierr); ierr = MatSetValuesLocal(*M,1,&i,j,local_indices_cols,array,ADD_VALUES);CHKERRQ(ierr); ierr = MatRestoreRow(local_mat,i,&j,&local_indices_cols,(const PetscScalar**)&array);CHKERRQ(ierr); } } ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatDestroy(&local_mat);CHKERRQ(ierr); ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (isdense) { ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PETSC_EXTERN PetscErrorCode MatISSetMPIXAIJPreallocation_Private(Mat A, Mat B, PetscBool maxreduce) { Mat_IS *matis = (Mat_IS*)(A->data); PetscInt *my_dnz,*my_onz,*dnz,*onz,*mat_ranges,*row_ownership; const PetscInt *global_indices_r,*global_indices_c; PetscInt i,j,bs,rows,cols; PetscInt lrows,lcols; PetscInt local_rows,local_cols; PetscMPIInt nsubdomains; PetscBool isdense,issbaij; PetscErrorCode ierr; PetscFunctionBegin; ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&nsubdomains);CHKERRQ(ierr); ierr = MatGetSize(A,&rows,&cols);CHKERRQ(ierr); ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingGetIndices(A->rmap->mapping,&global_indices_r);CHKERRQ(ierr); if (A->rmap->mapping != A->cmap->mapping) { ierr = ISLocalToGlobalMappingGetIndices(A->rmap->mapping,&global_indices_c);CHKERRQ(ierr); } else { global_indices_c = global_indices_r; } if (issbaij) { ierr = MatGetRowUpperTriangular(matis->A);CHKERRQ(ierr); } /* An SF reduce is needed to sum up properly on shared rows. Note that generally preallocation is not exact, since it overestimates nonzeros */ if (!matis->sf) { /* setup SF if not yet created and allocate rootdata and leafdata */ ierr = MatISComputeSF_Private(A);CHKERRQ(ierr); } ierr = MatGetLocalSize(A,&lrows,&lcols);CHKERRQ(ierr); ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)A),lrows,lcols,dnz,onz);CHKERRQ(ierr); /* All processes need to compute entire row ownership */ ierr = PetscMalloc1(rows,&row_ownership);CHKERRQ(ierr); ierr = MatGetOwnershipRanges(A,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); for (i=0;i<nsubdomains;i++) { for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { row_ownership[j] = i; } } /* my_dnz and my_onz contains exact contribution to preallocation from each local mat then, they will be summed up properly. This way, preallocation is always sufficient */ ierr = PetscCalloc2(local_rows,&my_dnz,local_rows,&my_onz);CHKERRQ(ierr); /* preallocation as a MATAIJ */ if (isdense) { /* special case for dense local matrices */ for (i=0;i<local_rows;i++) { PetscInt index_row = global_indices_r[i]; for (j=i;j<local_rows;j++) { PetscInt owner = row_ownership[index_row]; PetscInt index_col = global_indices_c[j]; if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ my_dnz[i] += 1; } else { /* offdiag block */ my_onz[i] += 1; } /* same as before, interchanging rows and cols */ if (i != j) { owner = row_ownership[index_col]; if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { my_dnz[j] += 1; } else { my_onz[j] += 1; } } } } } else { /* TODO: this could be optimized using MatGetRowIJ */ for (i=0;i<local_rows;i++) { const PetscInt *cols; PetscInt ncols,index_row = global_indices_r[i]; ierr = MatGetRow(matis->A,i,&ncols,&cols,NULL);CHKERRQ(ierr); for (j=0;j<ncols;j++) { PetscInt owner = row_ownership[index_row]; PetscInt index_col = global_indices_c[cols[j]]; if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ my_dnz[i] += 1; } else { /* offdiag block */ my_onz[i] += 1; } /* same as before, interchanging rows and cols */ if (issbaij && index_col != index_row) { owner = row_ownership[index_col]; if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { my_dnz[cols[j]] += 1; } else { my_onz[cols[j]] += 1; } } } ierr = MatRestoreRow(matis->A,i,&ncols,&cols,NULL);CHKERRQ(ierr); } } ierr = ISLocalToGlobalMappingRestoreIndices(A->rmap->mapping,&global_indices_r);CHKERRQ(ierr); if (global_indices_c != global_indices_r) { ierr = ISLocalToGlobalMappingRestoreIndices(A->rmap->mapping,&global_indices_c);CHKERRQ(ierr); } ierr = PetscFree(row_ownership);CHKERRQ(ierr); /* Reduce my_dnz and my_onz */ if (maxreduce) { ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,my_dnz,dnz,MPI_MAX);CHKERRQ(ierr); ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,my_dnz,dnz,MPI_MAX);CHKERRQ(ierr); ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,my_onz,onz,MPI_MAX);CHKERRQ(ierr); ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,my_onz,onz,MPI_MAX);CHKERRQ(ierr); } else { ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,my_dnz,dnz,MPI_SUM);CHKERRQ(ierr); ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,my_dnz,dnz,MPI_SUM);CHKERRQ(ierr); ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,my_onz,onz,MPI_SUM);CHKERRQ(ierr); ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,my_onz,onz,MPI_SUM);CHKERRQ(ierr); } ierr = PetscFree2(my_dnz,my_onz);CHKERRQ(ierr); /* Resize preallocation if overestimated */ for (i=0;i<lrows;i++) { dnz[i] = PetscMin(dnz[i],lcols); onz[i] = PetscMin(onz[i],cols-lcols); } /* set preallocation */ ierr = MatMPIAIJSetPreallocation(B,0,dnz,0,onz);CHKERRQ(ierr); for (i=0;i<lrows/bs;i++) { dnz[i] = dnz[i*bs]/bs; onz[i] = onz[i*bs]/bs; } ierr = MatMPIBAIJSetPreallocation(B,bs,0,dnz,0,onz);CHKERRQ(ierr); ierr = MatMPISBAIJSetPreallocation(B,bs,0,dnz,0,onz);CHKERRQ(ierr); ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); if (issbaij) { ierr = MatRestoreRowUpperTriangular(matis->A);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode ComputeMatrix(DM da,Mat B) { PetscErrorCode ierr; PetscInt i,j,k,mx,my,mz,xm,ym,zm,xs,ys,zs,dof,k1,k2,k3; PetscScalar *v,*v_neighbor,Hx,Hy,Hz,HxHydHz,HyHzdHx,HxHzdHy; MatStencil row,col; PetscFunctionBeginUser; ierr = DMDAGetInfo(da,0,&mx,&my,&mz,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); /* For simplicity, this example only works on mx=my=mz */ if (mx != my || mx != mz) SETERRQ3(PETSC_COMM_SELF,1,"This example only works with mx %D = my %D = mz %D\n",mx,my,mz); Hx = 1.0 / (PetscReal)(mx-1); Hy = 1.0 / (PetscReal)(my-1); Hz = 1.0 / (PetscReal)(mz-1); HxHydHz = Hx*Hy/Hz; HxHzdHy = Hx*Hz/Hy; HyHzdHx = Hy*Hz/Hx; ierr = PetscMalloc1(2*dof*dof+1,&v);CHKERRQ(ierr); v_neighbor = v + dof*dof; ierr = PetscMemzero(v,(2*dof*dof+1)*sizeof(PetscScalar));CHKERRQ(ierr); k3 = 0; for (k1=0; k1<dof; k1++) { for (k2=0; k2<dof; k2++) { if (k1 == k2) { v[k3] = 2.0*(HxHydHz + HxHzdHy + HyHzdHx); v_neighbor[k3] = -HxHydHz; } else { v[k3] = k1/(dof*dof);; v_neighbor[k3] = k2/(dof*dof); } k3++; } } ierr = DMDAGetCorners(da,&xs,&ys,&zs,&xm,&ym,&zm);CHKERRQ(ierr); for (k=zs; k<zs+zm; k++) { for (j=ys; j<ys+ym; j++) { for (i=xs; i<xs+xm; i++) { row.i = i; row.j = j; row.k = k; if (i==0 || j==0 || k==0 || i==mx-1 || j==my-1 || k==mz-1) { /* boudary points */ ierr = MatSetValuesBlockedStencil(B,1,&row,1,&row,v,INSERT_VALUES);CHKERRQ(ierr); } else { /* interior points */ /* center */ col.i = i; col.j = j; col.k = k; ierr = MatSetValuesBlockedStencil(B,1,&row,1,&col,v,INSERT_VALUES);CHKERRQ(ierr); /* x neighbors */ col.i = i-1; col.j = j; col.k = k; ierr = MatSetValuesBlockedStencil(B,1,&row,1,&col,v_neighbor,INSERT_VALUES);CHKERRQ(ierr); col.i = i+1; col.j = j; col.k = k; ierr = MatSetValuesBlockedStencil(B,1,&row,1,&col,v_neighbor,INSERT_VALUES);CHKERRQ(ierr); /* y neighbors */ col.i = i; col.j = j-1; col.k = k; ierr = MatSetValuesBlockedStencil(B,1,&row,1,&col,v_neighbor,INSERT_VALUES);CHKERRQ(ierr); col.i = i; col.j = j+1; col.k = k; ierr = MatSetValuesBlockedStencil(B,1,&row,1,&col,v_neighbor,INSERT_VALUES);CHKERRQ(ierr); /* z neighbors */ col.i = i; col.j = j; col.k = k-1; ierr = MatSetValuesBlockedStencil(B,1,&row,1,&col,v_neighbor,INSERT_VALUES);CHKERRQ(ierr); col.i = i; col.j = j; col.k = k+1; ierr = MatSetValuesBlockedStencil(B,1,&row,1,&col,v_neighbor,INSERT_VALUES);CHKERRQ(ierr); } } } } ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscFree(v);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@ PetscBinaryWrite - Writes to a binary file. Not Collective Input Parameters: + fd - the file . p - the buffer . n - the number of items to write . type - the type of items to read (PETSC_INT, PETSC_DOUBLE or PETSC_SCALAR) - istemp - PETSC_FALSE if buffer data should be preserved, PETSC_TRUE otherwise. Level: advanced Notes: PetscBinaryWrite() uses byte swapping to work on all machines; the files are written using big-endian ordering to the file. On small-endian machines the numbers are converted to the big-endian format when they are written to disk. When PETSc is ./configure with --with-64bit-indices the integers are written to the file as 64 bit integers, this means they can only be read back in when the option --with-64bit-indices is used. If running with __float128 precision the output is in __float128 unless one uses the -binary_write_double option The Buffer p should be read-write buffer, and not static data. This way, byte-swapping is done in-place, and then the buffer is written to the file. This routine restores the original contents of the buffer, after it is written to the file. This is done by byte-swapping in-place the second time. If the flag istemp is set to PETSC_TRUE, the second byte-swapping operation is not done, thus saving some computation, but the buffer is left corrupted. Because byte-swapping may be done on the values in data it cannot be declared const Concepts: files^writing binary Concepts: binary files^writing .seealso: PetscBinaryRead(), PetscBinaryOpen(), PetscBinaryClose(), PetscViewerBinaryGetDescriptor(), PetscBinarySynchronizedWrite(), PetscBinarySynchronizedRead(), PetscBinarySynchronizedSeek() @*/ PetscErrorCode PetscBinaryWrite(int fd,void *p,PetscInt n,PetscDataType type,PetscBool istemp) { char *pp = (char*)p; int err,wsize; size_t m = (size_t)n,maxblock=65536; PetscErrorCode ierr; void *ptmp = p; char *fname = NULL; #if defined(PETSC_USE_REAL___FLOAT128) PetscBool writedouble = PETSC_FALSE; double *ppp; PetscReal *pv; PetscInt i; #endif PetscDataType wtype = type; PetscFunctionBegin; if (n < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Trying to write a negative amount of data %D",n); if (!n) PetscFunctionReturn(0); if (type == PETSC_FUNCTION) { #if defined(PETSC_SERIALIZE_FUNCTIONS) const char *fnametmp; #endif m = 64; fname = (char*)malloc(m*sizeof(char)); if (!fname) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"Cannot allocate space for function name"); #if defined(PETSC_SERIALIZE_FUNCTIONS) if (n > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only binary view a single function at a time"); ierr = PetscFPTFind(*(void**)p,&fnametmp);CHKERRQ(ierr); ierr = PetscStrncpy(fname,fnametmp,m);CHKERRQ(ierr); #else ierr = PetscStrncpy(fname,"",m);CHKERRQ(ierr); #endif wtype = PETSC_CHAR; pp = (char*)fname; ptmp = (void*)fname; } #if defined(PETSC_USE_REAL___FLOAT128) ierr = PetscOptionsGetBool(NULL,NULL,"-binary_write_double",&writedouble,NULL);CHKERRQ(ierr); /* If using __float128 precision we still write in doubles to file */ if ((type == PETSC_SCALAR || type == PETSC_REAL) && writedouble) { wtype = PETSC_DOUBLE; ierr = PetscMalloc1(n,&ppp);CHKERRQ(ierr); pv = (PetscReal*)pp; for (i=0; i<n; i++) { ppp[i] = (double) pv[i]; } pp = (char*)ppp; ptmp = (char*)ppp; } #endif if (wtype == PETSC_INT) m *= sizeof(PetscInt); else if (wtype == PETSC_SCALAR) m *= sizeof(PetscScalar); else if (wtype == PETSC_REAL) m *= sizeof(PetscReal); else if (wtype == PETSC_DOUBLE) m *= sizeof(double); else if (wtype == PETSC_FLOAT) m *= sizeof(float); else if (wtype == PETSC_SHORT) m *= sizeof(short); else if (wtype == PETSC_LONG) m *= sizeof(long); else if (wtype == PETSC_CHAR) m *= sizeof(char); else if (wtype == PETSC_ENUM) m *= sizeof(PetscEnum); else if (wtype == PETSC_BOOL) m *= sizeof(PetscBool); else if (wtype == PETSC_INT64) m *= sizeof(PetscInt64); else if (wtype == PETSC_BIT_LOGICAL) m = PetscBTLength(m)*sizeof(char); else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Unknown type"); if (!PetscBinaryBigEndian()) {ierr = PetscByteSwap(ptmp,wtype,n);CHKERRQ(ierr);} while (m) { wsize = (m < maxblock) ? m : maxblock; err = write(fd,pp,wsize); if (err < 0 && errno == EINTR) continue; if (err != wsize) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Error writing to file total size %d err %d wsize %d",(int)n,(int)err,(int)wsize); m -= wsize; pp += wsize; } if (!istemp) { if (!PetscBinaryBigEndian()) {ierr = PetscByteSwap(ptmp,wtype,n);CHKERRQ(ierr);} } if (type == PETSC_FUNCTION) { free(fname); } #if defined(PETSC_USE_REAL___FLOAT128) if ((type == PETSC_SCALAR || type == PETSC_REAL) && writedouble) { ierr = PetscFree(ppp);CHKERRQ(ierr); } #endif PetscFunctionReturn(0); }
static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc) { PC_BDDC *pcbddc=(PC_BDDC*)pc->data; PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; PetscScalar *matdata,*matdata2; PetscInt i,max_subset_size,cum,cum2; const PetscInt *idxs; PetscBool newsetup = PETSC_FALSE; PetscErrorCode ierr; PetscFunctionBegin; if (!sub_schurs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Missing PCBDDCSubSchurs"); if (!sub_schurs->n_subs) PetscFunctionReturn(0); /* Allocate arrays for subproblems */ if (!deluxe_ctx->seq_n) { deluxe_ctx->seq_n = sub_schurs->n_subs; ierr = PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr); newsetup = PETSC_TRUE; } else if (deluxe_ctx->seq_n != sub_schurs->n_subs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %D is different from the sub_schurs %D",deluxe_ctx->seq_n,sub_schurs->n_subs); /* the change of basis is just a reference to sub_schurs->change (if any) */ deluxe_ctx->change = sub_schurs->change; deluxe_ctx->change_with_qr = sub_schurs->change_with_qr; /* Create objects for deluxe */ max_subset_size = 0; for (i=0;i<sub_schurs->n_subs;i++) { PetscInt subset_size; ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); max_subset_size = PetscMax(subset_size,max_subset_size); } if (newsetup) { ierr = PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);CHKERRQ(ierr); } cum = cum2 = 0; ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr); ierr = MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr); ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr); for (i=0;i<deluxe_ctx->seq_n;i++) { PetscInt subset_size; ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); if (newsetup) { IS sub; /* work vectors */ ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); /* scatters */ ierr = ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);CHKERRQ(ierr); ierr = VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr); ierr = ISDestroy(&sub);CHKERRQ(ierr); } /* S_E_j */ ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); /* \sum_k S^k_E_j */ ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); ierr = MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_SPD,sub_schurs->is_posdef);CHKERRQ(ierr); ierr = MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_HERMITIAN,sub_schurs->is_hermitian);CHKERRQ(ierr); if (sub_schurs->is_hermitian) { ierr = MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);CHKERRQ(ierr); } else { ierr = MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);CHKERRQ(ierr); } if (pcbddc->deluxe_singlemat) { Mat X,Y; if (!sub_schurs->is_hermitian) { ierr = MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);CHKERRQ(ierr); } else { ierr = PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); X = deluxe_ctx->seq_mat[i]; } ierr = MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);CHKERRQ(ierr); if (!sub_schurs->is_hermitian) { ierr = PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr); } else { ierr = MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr); } ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); ierr = MatDestroy(&X);CHKERRQ(ierr); if (deluxe_ctx->change) { Mat C,CY; if (!deluxe_ctx->change_with_qr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only QR based change of basis"); ierr = KSPGetOperators(deluxe_ctx->change[i],&C,NULL);CHKERRQ(ierr); ierr = MatMatMult(C,Y,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&CY);CHKERRQ(ierr); ierr = MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr); ierr = MatDestroy(&CY);CHKERRQ(ierr); } ierr = MatTranspose(Y,MAT_INPLACE_MATRIX,&Y);CHKERRQ(ierr); deluxe_ctx->seq_mat[i] = Y; } cum += subset_size; cum2 += subset_size*subset_size; } ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr); ierr = MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr); ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr); if (pcbddc->deluxe_singlemat) { deluxe_ctx->change = NULL; deluxe_ctx->change_with_qr = PETSC_FALSE; } if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) { for (i=0;i<deluxe_ctx->seq_n;i++) { if (newsetup) { PC pc; ierr = KSPGetPC(deluxe_ctx->change[i],&pc);CHKERRQ(ierr); ierr = PCSetType(pc,PCLU);CHKERRQ(ierr); ierr = KSPSetFromOptions(deluxe_ctx->change[i]);CHKERRQ(ierr); } ierr = KSPSetUp(deluxe_ctx->change[i]);CHKERRQ(ierr); } } PetscFunctionReturn(0); }
int main(int argc,char **argv) { PetscMPIInt rank; PetscErrorCode ierr; PetscInt M = 10,N = 8,m = PETSC_DECIDE; PetscInt s =2,w=2,n = PETSC_DECIDE,nloc,l,i,j,kk; PetscInt Xs,Xm,Ys,Ym,iloc,*iglobal; const PetscInt *ltog; PetscInt *lx = NULL,*ly = NULL; PetscBool testorder = PETSC_FALSE,flg; DMBoundaryType bx = DM_BOUNDARY_NONE,by= DM_BOUNDARY_NONE; DM da; PetscViewer viewer; Vec local,global; PetscScalar value; DMDAStencilType st = DMDA_STENCIL_BOX; AO ao; ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; ierr = PetscViewerDrawOpen(PETSC_COMM_WORLD,0,"",300,0,400,400,&viewer);CHKERRQ(ierr); /* Readoptions */ ierr = PetscOptionsGetInt(NULL,NULL,"-NX",&M,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-NY",&N,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-s",&s,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-w",&w,NULL);CHKERRQ(ierr); flg = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-xperiodic",&flg,NULL);CHKERRQ(ierr); if (flg) bx = DM_BOUNDARY_PERIODIC; flg = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-yperiodic",&flg,NULL);CHKERRQ(ierr); if (flg) by = DM_BOUNDARY_PERIODIC; flg = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-xghosted",&flg,NULL);CHKERRQ(ierr); if (flg) bx = DM_BOUNDARY_GHOSTED; flg = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-yghosted",&flg,NULL);CHKERRQ(ierr); if (flg) by = DM_BOUNDARY_GHOSTED; flg = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-star",&flg,NULL);CHKERRQ(ierr); if (flg) st = DMDA_STENCIL_STAR; flg = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-box",&flg,NULL);CHKERRQ(ierr); if (flg) st = DMDA_STENCIL_BOX; flg = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-testorder",&testorder,NULL);CHKERRQ(ierr); /* Test putting two nodes in x and y on each processor, exact last processor in x and y gets the rest. */ flg = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-distribute",&flg,NULL);CHKERRQ(ierr); if (flg) { if (m == PETSC_DECIDE) SETERRQ(PETSC_COMM_WORLD,1,"Must set -m option with -distribute option"); ierr = PetscMalloc1(m,&lx);CHKERRQ(ierr); for (i=0; i<m-1; i++) { lx[i] = 4;} lx[m-1] = M - 4*(m-1); if (n == PETSC_DECIDE) SETERRQ(PETSC_COMM_WORLD,1,"Must set -n option with -distribute option"); ierr = PetscMalloc1(n,&ly);CHKERRQ(ierr); for (i=0; i<n-1; i++) { ly[i] = 2;} ly[n-1] = N - 2*(n-1); } /* Create distributed array and get vectors */ ierr = DMDACreate2d(PETSC_COMM_WORLD,bx,by,st,M,N,m,n,w,s,lx,ly,&da);CHKERRQ(ierr); ierr = PetscFree(lx);CHKERRQ(ierr); ierr = PetscFree(ly);CHKERRQ(ierr); ierr = DMView(da,viewer);CHKERRQ(ierr); ierr = DMCreateGlobalVector(da,&global);CHKERRQ(ierr); ierr = DMCreateLocalVector(da,&local);CHKERRQ(ierr); /* Set global vector; send ghost points to local vectors */ value = 1; ierr = VecSet(global,value);CHKERRQ(ierr); ierr = DMGlobalToLocalBegin(da,global,INSERT_VALUES,local);CHKERRQ(ierr); ierr = DMGlobalToLocalEnd(da,global,INSERT_VALUES,local);CHKERRQ(ierr); /* Scale local vectors according to processor rank; pass to global vector */ ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); value = rank; ierr = VecScale(local,value);CHKERRQ(ierr); ierr = DMLocalToGlobalBegin(da,local,INSERT_VALUES,global);CHKERRQ(ierr); ierr = DMLocalToGlobalEnd(da,local,INSERT_VALUES,global);CHKERRQ(ierr); if (!testorder) { /* turn off printing when testing ordering mappings */ ierr = PetscPrintf(PETSC_COMM_WORLD,"\nGlobal Vectors:\n");CHKERRQ(ierr); ierr = VecView(global,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"\n\n");CHKERRQ(ierr); } /* Send ghost points to local vectors */ ierr = DMGlobalToLocalBegin(da,global,INSERT_VALUES,local);CHKERRQ(ierr); ierr = DMGlobalToLocalEnd(da,global,INSERT_VALUES,local);CHKERRQ(ierr); flg = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-local_print",&flg,NULL);CHKERRQ(ierr); if (flg) { PetscViewer sviewer; ierr = PetscViewerASCIIPushSynchronized(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"\nLocal Vector: processor %d\n",rank);CHKERRQ(ierr); ierr = PetscViewerGetSubViewer(PETSC_VIEWER_STDOUT_WORLD,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); ierr = VecView(local,sviewer);CHKERRQ(ierr); ierr = PetscViewerRestoreSubViewer(PETSC_VIEWER_STDOUT_WORLD,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); ierr = PetscViewerFlush(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscViewerASCIIPopSynchronized(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* Tests mappings betweeen application/PETSc orderings */ if (testorder) { ISLocalToGlobalMapping ltogm; ierr = DMGetLocalToGlobalMapping(da,<ogm);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingGetSize(ltogm,&nloc);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingGetIndices(ltogm,<og);CHKERRQ(ierr); ierr = DMDAGetGhostCorners(da,&Xs,&Ys,NULL,&Xm,&Ym,NULL);CHKERRQ(ierr); ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); ierr = PetscMalloc1(nloc,&iglobal);CHKERRQ(ierr); /* Set iglobal to be global indices for each processor's local and ghost nodes, using the DMDA ordering of grid points */ kk = 0; for (j=Ys; j<Ys+Ym; j++) { for (i=Xs; i<Xs+Xm; i++) { iloc = w*((j-Ys)*Xm + i-Xs); for (l=0; l<w; l++) { iglobal[kk++] = ltog[iloc+l]; } } } /* Map this to the application ordering (which for DMDAs is just the natural ordering that would be used for 1 processor, numbering most rapidly by x, then y) */ ierr = AOPetscToApplication(ao,nloc,iglobal);CHKERRQ(ierr); /* Then map the application ordering back to the PETSc DMDA ordering */ ierr = AOApplicationToPetsc(ao,nloc,iglobal);CHKERRQ(ierr); /* Verify the mappings */ kk=0; for (j=Ys; j<Ys+Ym; j++) { for (i=Xs; i<Xs+Xm; i++) { iloc = w*((j-Ys)*Xm + i-Xs); for (l=0; l<w; l++) { if (iglobal[kk] != ltog[iloc+l]) { ierr = PetscFPrintf(PETSC_COMM_SELF,stdout,"[%d] Problem with mapping: j=%D, i=%D, l=%D, petsc1=%D, petsc2=%D\n",rank,j,i,l,ltog[iloc+l],iglobal[kk]);CHKERRQ(ierr); } kk++; } } } ierr = PetscFree(iglobal);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingRestoreIndices(ltogm,<og);CHKERRQ(ierr); } /* Free memory */ ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); ierr = VecDestroy(&local);CHKERRQ(ierr); ierr = VecDestroy(&global);CHKERRQ(ierr); ierr = DMDestroy(&da);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
PetscErrorCode PCGAMGFilterGraph(Mat *a_Gmat,const PetscReal vfilter,const PetscBool symm,const PetscInt verbose) { PetscErrorCode ierr; PetscInt Istart,Iend,Ii,jj,ncols,nnz0,nnz1, NN, MM, nloc; PetscMPIInt rank, size; Mat Gmat = *a_Gmat, tGmat, matTrans; MPI_Comm comm; const PetscScalar *vals; const PetscInt *idx; PetscInt *d_nnz, *o_nnz; Vec diag; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)Gmat,&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MatGetOwnershipRange(Gmat, &Istart, &Iend);CHKERRQ(ierr); nloc = Iend - Istart; ierr = MatGetSize(Gmat, &MM, &NN);CHKERRQ(ierr); #if defined PETSC_GAMG_USE_LOG ierr = PetscLogEventBegin(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr); #endif /* scale Gmat so filter works */ ierr = MatGetVecs(Gmat, &diag, 0);CHKERRQ(ierr); ierr = MatGetDiagonal(Gmat, diag);CHKERRQ(ierr); ierr = VecReciprocal(diag);CHKERRQ(ierr); ierr = VecSqrtAbs(diag);CHKERRQ(ierr); ierr = MatDiagonalScale(Gmat, diag, diag);CHKERRQ(ierr); ierr = VecDestroy(&diag);CHKERRQ(ierr); if (symm) { ierr = MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans);CHKERRQ(ierr); } /* filter - dup zeros out matrix */ ierr = PetscMalloc1(nloc, &d_nnz);CHKERRQ(ierr); ierr = PetscMalloc1(nloc, &o_nnz);CHKERRQ(ierr); for (Ii = Istart, jj = 0; Ii < Iend; Ii++, jj++) { ierr = MatGetRow(Gmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr); d_nnz[jj] = ncols; o_nnz[jj] = ncols; ierr = MatRestoreRow(Gmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr); if (symm) { ierr = MatGetRow(matTrans,Ii,&ncols,NULL,NULL);CHKERRQ(ierr); d_nnz[jj] += ncols; o_nnz[jj] += ncols; ierr = MatRestoreRow(matTrans,Ii,&ncols,NULL,NULL);CHKERRQ(ierr); } if (d_nnz[jj] > nloc) d_nnz[jj] = nloc; if (o_nnz[jj] > (MM-nloc)) o_nnz[jj] = MM - nloc; } ierr = MatCreateAIJ(comm, nloc, nloc, MM, MM, 0, d_nnz, 0, o_nnz, &tGmat);CHKERRQ(ierr); ierr = PetscFree(d_nnz);CHKERRQ(ierr); ierr = PetscFree(o_nnz);CHKERRQ(ierr); if (symm) { ierr = MatDestroy(&matTrans);CHKERRQ(ierr); } for (Ii = Istart, nnz0 = nnz1 = 0; Ii < Iend; Ii++) { ierr = MatGetRow(Gmat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr); for (jj=0; jj<ncols; jj++,nnz0++) { PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); if (PetscRealPart(sv) > vfilter) { nnz1++; if (symm) { sv *= 0.5; ierr = MatSetValues(tGmat,1,&Ii,1,&idx[jj],&sv,ADD_VALUES);CHKERRQ(ierr); ierr = MatSetValues(tGmat,1,&idx[jj],1,&Ii,&sv,ADD_VALUES);CHKERRQ(ierr); } else { ierr = MatSetValues(tGmat,1,&Ii,1,&idx[jj],&sv,ADD_VALUES);CHKERRQ(ierr); } } } ierr = MatRestoreRow(Gmat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr); } ierr = MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); #if defined PETSC_GAMG_USE_LOG ierr = PetscLogEventEnd(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr); #endif if (verbose) { if (verbose == 1) { ierr = PetscPrintf(comm,"\t[%d]%s %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%d)\n",rank,__FUNCT__, 100.*(double)nnz1/(double)nnz0,vfilter,(double)nnz0/(double)nloc,MM);CHKERRQ(ierr); } else { PetscInt nnz[2],out[2]; nnz[0] = nnz0; nnz[1] = nnz1; ierr = MPI_Allreduce(nnz, out, 2, MPIU_INT, MPI_SUM, comm);CHKERRQ(ierr); ierr = PetscPrintf(comm,"\t[%d]%s %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%d)\n",rank,__FUNCT__, 100.*(double)out[1]/(double)out[0],vfilter,(double)out[0]/(double)MM,MM);CHKERRQ(ierr); } } ierr = MatDestroy(&Gmat);CHKERRQ(ierr); *a_Gmat = tGmat; PetscFunctionReturn(0); }
/*@C TSSetEventHandler - Sets a monitoring function used for detecting events Logically Collective on TS Input Parameters: + ts - the TS context obtained from TSCreate() . nevents - number of local events . direction - direction of zero crossing to be detected. -1 => Zero crossing in negative direction, +1 => Zero crossing in positive direction, 0 => both ways (one for each event) . terminate - flag to indicate whether time stepping should be terminated after event is detected (one for each event) . eventhandler - event monitoring routine . postevent - [optional] post-event function - ctx - [optional] user-defined context for private data for the event monitor and post event routine (use NULL if no context is desired) Calling sequence of eventhandler: PetscErrorCode PetscEventHandler(TS ts,PetscReal t,Vec U,PetscScalar fvalue[],void* ctx) Input Parameters: + ts - the TS context . t - current time . U - current iterate - ctx - [optional] context passed with eventhandler Output parameters: . fvalue - function value of events at time t Calling sequence of postevent: PetscErrorCode PostEvent(TS ts,PetscInt nevents_zero,PetscInt events_zero[],PetscReal t,Vec U,PetscBool forwardsolve,void* ctx) Input Parameters: + ts - the TS context . nevents_zero - number of local events whose event function is zero . events_zero - indices of local events which have reached zero . t - current time . U - current solution . forwardsolve - Flag to indicate whether TS is doing a forward solve (1) or adjoint solve (0) - ctx - the context passed with eventhandler Level: intermediate .keywords: TS, event, set .seealso: TSCreate(), TSSetTimeStep(), TSSetConvergedReason() @*/ PetscErrorCode TSSetEventHandler(TS ts,PetscInt nevents,PetscInt direction[],PetscBool terminate[],PetscErrorCode (*eventhandler)(TS,PetscReal,Vec,PetscScalar[],void*),PetscErrorCode (*postevent)(TS,PetscInt,PetscInt[],PetscReal,Vec,PetscBool,void*),void *ctx) { PetscErrorCode ierr; TSEvent event; PetscInt i; PetscBool flg; #if defined PETSC_USE_REAL_SINGLE PetscReal tol=1e-4; #else PetscReal tol=1e-6; #endif PetscFunctionBegin; PetscValidHeaderSpecific(ts,TS_CLASSID,1); if (nevents) { PetscValidIntPointer(direction,2); PetscValidIntPointer(terminate,3); } ierr = PetscNewLog(ts,&event);CHKERRQ(ierr); ierr = PetscMalloc1(nevents,&event->fvalue);CHKERRQ(ierr); ierr = PetscMalloc1(nevents,&event->fvalue_prev);CHKERRQ(ierr); ierr = PetscMalloc1(nevents,&event->fvalue_right);CHKERRQ(ierr); ierr = PetscMalloc1(nevents,&event->zerocrossing);CHKERRQ(ierr); ierr = PetscMalloc1(nevents,&event->side);CHKERRQ(ierr); ierr = PetscMalloc1(nevents,&event->direction);CHKERRQ(ierr); ierr = PetscMalloc1(nevents,&event->terminate);CHKERRQ(ierr); ierr = PetscMalloc1(nevents,&event->vtol);CHKERRQ(ierr); for (i=0; i < nevents; i++) { event->direction[i] = direction[i]; event->terminate[i] = terminate[i]; event->zerocrossing[i] = PETSC_FALSE; event->side[i] = 0; } ierr = PetscMalloc1(nevents,&event->events_zero);CHKERRQ(ierr); event->nevents = nevents; event->eventhandler = eventhandler; event->postevent = postevent; event->ctx = ctx; event->recsize = 8; /* Initial size of the recorder */ ierr = PetscOptionsBegin(((PetscObject)ts)->comm,((PetscObject)ts)->prefix,"TS Event options","TS");CHKERRQ(ierr); { ierr = PetscOptionsReal("-ts_event_tol","Scalar event tolerance for zero crossing check","TSSetEventTolerances",tol,&tol,NULL);CHKERRQ(ierr); ierr = PetscOptionsName("-ts_event_monitor","Print choices made by event handler","",&flg);CHKERRQ(ierr); ierr = PetscOptionsInt("-ts_event_recorder_initial_size","Initial size of event recorder","",event->recsize,&event->recsize,NULL);CHKERRQ(ierr); } ierr = PetscOptionsEnd();CHKERRQ(ierr); ierr = PetscMalloc1(event->recsize,&event->recorder.time);CHKERRQ(ierr); ierr = PetscMalloc1(event->recsize,&event->recorder.stepnum);CHKERRQ(ierr); ierr = PetscMalloc1(event->recsize,&event->recorder.nevents);CHKERRQ(ierr); ierr = PetscMalloc1(event->recsize,&event->recorder.eventidx);CHKERRQ(ierr); for (i=0; i < event->recsize; i++) { ierr = PetscMalloc1(event->nevents,&event->recorder.eventidx[i]);CHKERRQ(ierr); } /* Initialize the event recorder */ event->recorder.ctr = 0; for (i=0; i < event->nevents; i++) event->vtol[i] = tol; if (flg) {ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,"stdout",&event->monitor);CHKERRQ(ierr);} ierr = TSEventDestroy(&ts->event);CHKERRQ(ierr); ts->event = event; ts->event->refct = 1; PetscFunctionReturn(0); }
int main(int argc,char **argv) { PetscErrorCode ierr; PetscMPIInt rank,size,*toranks,*fromranks,nto,nfrom; PetscInt i,n; PetscBool verbose,build_twosided_f; Unit *todata,*fromdata; MPI_Datatype dtype; PetscInitialize(&argc,&argv,(char*)0,help); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); verbose = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-verbose",&verbose,NULL);CHKERRQ(ierr); build_twosided_f = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-build_twosided_f",&build_twosided_f,NULL);CHKERRQ(ierr); for (i=1,nto=0; i<size; i*=2) nto++; ierr = PetscMalloc2(nto,&todata,nto,&toranks);CHKERRQ(ierr); for (n=0,i=1; i<size; n++,i*=2) { toranks[n] = (rank+i) % size; todata[n].rank = (rank+i) % size; todata[n].value = (PetscScalar)rank; todata[n].ok[0] = 'o'; todata[n].ok[1] = 'k'; todata[n].ok[2] = 0; } if (verbose) { for (i=0; i<nto; i++) { ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] TO %d: {%D, %g, \"%s\"}\n",rank,toranks[i],todata[i].rank,(double)PetscRealPart(todata[i].value),todata[i].ok);CHKERRQ(ierr); } ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); } ierr = MakeDatatype(&dtype);CHKERRQ(ierr); if (build_twosided_f) { struct FCtx fctx; PetscMPIInt *todummy,*fromdummy; fctx.rank = rank; fctx.nto = nto; fctx.toranks = toranks; fctx.todata = todata; ierr = PetscSegBufferCreate(sizeof(Unit),1,&fctx.seg);CHKERRQ(ierr); ierr = PetscMalloc1(nto,&todummy);CHKERRQ(ierr); for (i=0; i<nto; i++) todummy[i] = rank; ierr = PetscCommBuildTwoSidedF(PETSC_COMM_WORLD,1,MPI_INT,nto,toranks,todummy,&nfrom,&fromranks,&fromdummy,2,FSend,FRecv,&fctx);CHKERRQ(ierr); ierr = PetscFree(todummy);CHKERRQ(ierr); ierr = PetscFree(fromdummy);CHKERRQ(ierr); ierr = PetscSegBufferExtractAlloc(fctx.seg,&fromdata);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&fctx.seg);CHKERRQ(ierr); } else { ierr = PetscCommBuildTwoSided(PETSC_COMM_WORLD,1,dtype,nto,toranks,todata,&nfrom,&fromranks,&fromdata);CHKERRQ(ierr); } ierr = MPI_Type_free(&dtype);CHKERRQ(ierr); if (verbose) { PetscInt *iranks,*iperm; ierr = PetscMalloc2(nfrom,&iranks,nfrom,&iperm);CHKERRQ(ierr); for (i=0; i<nfrom; i++) { iranks[i] = fromranks[i]; iperm[i] = i; } /* Receive ordering is non-deterministic in general, so sort to make verbose output deterministic. */ ierr = PetscSortIntWithPermutation(nfrom,iranks,iperm);CHKERRQ(ierr); for (i=0; i<nfrom; i++) { PetscInt ip = iperm[i]; ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] FROM %d: {%D, %g, \"%s\"}\n",rank,fromranks[ip],fromdata[ip].rank,(double)PetscRealPart(fromdata[ip].value),fromdata[ip].ok);CHKERRQ(ierr); } ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); ierr = PetscFree2(iranks,iperm);CHKERRQ(ierr); } if (nto != nfrom) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"[%d] From ranks %d does not match To ranks %d",rank,nto,nfrom); for (i=1; i<size; i*=2) { PetscMPIInt expected_rank = (rank-i+size)%size; PetscBool flg; for (n=0; n<nfrom; n++) { if (expected_rank == fromranks[n]) goto found; } SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"[%d] Could not find expected from rank %d",rank,expected_rank); found: if (PetscRealPart(fromdata[n].value) != expected_rank) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"[%d] Got data %g from rank %d",rank,(double)PetscRealPart(fromdata[n].value),expected_rank); ierr = PetscStrcmp(fromdata[n].ok,"ok",&flg);CHKERRQ(ierr); if (!flg) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"[%d] Got string %s from rank %d",rank,fromdata[n].ok,expected_rank); } ierr = PetscFree2(todata,toranks);CHKERRQ(ierr); ierr = PetscFree(fromdata);CHKERRQ(ierr); ierr = PetscFree(fromranks);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
int main(int argc,char **argv) { PetscErrorCode ierr; PetscMPIInt size,rank; PetscInt n = 5,i,*blks,bs = 1,m = 2; PetscScalar value; Vec x,y; IS is1,is2; VecScatter ctx = 0; PetscViewer sviewer; ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-bs",&bs,NULL);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); /* create two vectors */ ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,PETSC_DECIDE,size*bs*n);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); /* create two index sets */ if (rank < size-1) m = n + 2; else m = n; ierr = PetscMalloc1(m,&blks);CHKERRQ(ierr); blks[0] = n*rank; for (i=1; i<m; i++) blks[i] = blks[i-1] + 1; ierr = ISCreateBlock(PETSC_COMM_SELF,bs,m,blks,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr); ierr = PetscFree(blks);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,bs*m,&y);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,bs*m,0,1,&is2);CHKERRQ(ierr); /* each processor inserts the entire vector */ /* this is redundant but tests assembly */ for (i=0; i<bs*n*size; i++) { value = (PetscScalar) i; ierr = VecSetValues(x,1,&i,&value,INSERT_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(x);CHKERRQ(ierr); ierr = VecAssemblyEnd(x);CHKERRQ(ierr); ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = VecScatterCreate(x,is1,y,is2,&ctx);CHKERRQ(ierr); ierr = VecScatterBegin(ctx,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(ctx,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = PetscViewerASCIIPushSynchronized(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(PETSC_VIEWER_STDOUT_WORLD,"----\n");CHKERRQ(ierr); ierr = PetscViewerGetSubViewer(PETSC_VIEWER_STDOUT_WORLD,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); ierr = VecView(y,sviewer);CHKERRQ(ierr); fflush(stdout); ierr = PetscViewerRestoreSubViewer(PETSC_VIEWER_STDOUT_WORLD,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); ierr = PetscViewerFlush(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscViewerASCIIPopSynchronized(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = VecScatterDestroy(&ctx);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = ISDestroy(&is1);CHKERRQ(ierr); ierr = ISDestroy(&is2);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
PetscErrorCode PCISSetUp(PC pc) { PC_IS *pcis = (PC_IS*)(pc->data); Mat_IS *matis; PetscErrorCode ierr; PetscBool flg,issbaij; Vec counter; PetscFunctionBegin; ierr = PetscObjectTypeCompare((PetscObject)pc->pmat,MATIS,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONG,"Preconditioner type of Neumann Neumman requires matrix of type MATIS"); matis = (Mat_IS*)pc->pmat->data; pcis->pure_neumann = matis->pure_neumann; /* get info on mapping */ ierr = PetscObjectReference((PetscObject)matis->mapping);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingDestroy(&pcis->mapping);CHKERRQ(ierr); pcis->mapping = matis->mapping; ierr = ISLocalToGlobalMappingGetSize(pcis->mapping,&pcis->n);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingGetInfo(pcis->mapping,&(pcis->n_neigh),&(pcis->neigh),&(pcis->n_shared),&(pcis->shared));CHKERRQ(ierr); /* Creating local and global index sets for interior and inteface nodes. */ { PetscInt n_I; PetscInt *idx_I_local,*idx_B_local,*idx_I_global,*idx_B_global; PetscInt *array; PetscInt i,j; /* Identifying interior and interface nodes, in local numbering */ ierr = PetscMalloc1(pcis->n,&array);CHKERRQ(ierr); ierr = PetscMemzero(array,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); for (i=0;i<pcis->n_neigh;i++) for (j=0;j<pcis->n_shared[i];j++) array[pcis->shared[i][j]] += 1; ierr = PetscMalloc1(pcis->n,&idx_I_local);CHKERRQ(ierr); ierr = PetscMalloc1(pcis->n,&idx_B_local);CHKERRQ(ierr); for (i=0, pcis->n_B=0, n_I=0; i<pcis->n; i++) { if (!array[i]) { idx_I_local[n_I] = i; n_I++; } else { idx_B_local[pcis->n_B] = i; pcis->n_B++; } } /* Getting the global numbering */ idx_B_global = idx_I_local + n_I; /* Just avoiding allocating extra memory, since we have vacant space */ idx_I_global = idx_B_local + pcis->n_B; ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcis->n_B,idx_B_local,idx_B_global);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingApply(pcis->mapping,n_I, idx_I_local,idx_I_global);CHKERRQ(ierr); /* Creating the index sets. */ ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n_B,idx_B_local,PETSC_COPY_VALUES, &pcis->is_B_local);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n_B,idx_B_global,PETSC_COPY_VALUES,&pcis->is_B_global);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,n_I,idx_I_local,PETSC_COPY_VALUES, &pcis->is_I_local);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,n_I,idx_I_global,PETSC_COPY_VALUES,&pcis->is_I_global);CHKERRQ(ierr); /* Freeing memory and restoring arrays */ ierr = PetscFree(idx_B_local);CHKERRQ(ierr); ierr = PetscFree(idx_I_local);CHKERRQ(ierr); ierr = PetscFree(array);CHKERRQ(ierr); } /* Extracting the blocks A_II, A_BI, A_IB and A_BB from A. If the numbering is such that interior nodes come first than the interface ones, we have [ | ] [ A_II | A_IB ] A = [ | ] [-----------+------] [ A_BI | A_BB ] */ ierr = MatGetSubMatrix(matis->A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_II);CHKERRQ(ierr); ierr = MatGetSubMatrix(matis->A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); if (!issbaij) { ierr = MatGetSubMatrix(matis->A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); ierr = MatGetSubMatrix(matis->A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); } else { Mat newmat; ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); ierr = MatDestroy(&newmat);CHKERRQ(ierr); } /* Creating work vectors and arrays */ ierr = VecDuplicate(matis->x,&pcis->vec1_N);CHKERRQ(ierr); ierr = VecDuplicate(pcis->vec1_N,&pcis->vec2_N);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,pcis->n-pcis->n_B,&pcis->vec1_D);CHKERRQ(ierr); ierr = VecDuplicate(pcis->vec1_D,&pcis->vec2_D);CHKERRQ(ierr); ierr = VecDuplicate(pcis->vec1_D,&pcis->vec3_D);CHKERRQ(ierr); ierr = VecDuplicate(pcis->vec1_D,&pcis->vec4_D);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,pcis->n_B,&pcis->vec1_B);CHKERRQ(ierr); ierr = VecDuplicate(pcis->vec1_B,&pcis->vec2_B);CHKERRQ(ierr); ierr = VecDuplicate(pcis->vec1_B,&pcis->vec3_B);CHKERRQ(ierr); ierr = MatCreateVecs(pc->pmat,&pcis->vec1_global,0);CHKERRQ(ierr); ierr = PetscMalloc1(pcis->n,&pcis->work_N);CHKERRQ(ierr); /* Creating the scatter contexts */ ierr = VecScatterCreate(pcis->vec1_global,pcis->is_I_global,pcis->vec1_D,(IS)0,&pcis->global_to_D);CHKERRQ(ierr); ierr = VecScatterCreate(pcis->vec1_N,pcis->is_B_local,pcis->vec1_B,(IS)0,&pcis->N_to_B);CHKERRQ(ierr); ierr = VecScatterCreate(pcis->vec1_global,pcis->is_B_global,pcis->vec1_B,(IS)0,&pcis->global_to_B);CHKERRQ(ierr); /* Creating scaling "matrix" D */ ierr = PetscOptionsGetBool(((PetscObject)pc)->prefix,"-pc_is_use_stiffness_scaling",&pcis->use_stiffness_scaling,NULL);CHKERRQ(ierr); if (!pcis->D) { ierr = VecDuplicate(pcis->vec1_B,&pcis->D);CHKERRQ(ierr); if (!pcis->use_stiffness_scaling) { ierr = VecSet(pcis->D,pcis->scaling_factor);CHKERRQ(ierr); } else { ierr = MatGetDiagonal(matis->A,pcis->vec1_N);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); } } ierr = VecCopy(pcis->D,pcis->vec1_B);CHKERRQ(ierr); ierr = MatCreateVecs(pc->pmat,&counter,0);CHKERRQ(ierr); /* temporary auxiliar vector */ ierr = VecSet(counter,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,counter,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,counter,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->global_to_B,counter,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->global_to_B,counter,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);CHKERRQ(ierr); ierr = VecDestroy(&counter);CHKERRQ(ierr); /* See historical note 01, at the bottom of this file. */ /* Creating the KSP contexts for the local Dirichlet and Neumann problems. */ if (pcis->computesolvers) { PC pc_ctx; /* Dirichlet */ ierr = KSPCreate(PETSC_COMM_SELF,&pcis->ksp_D);CHKERRQ(ierr); ierr = PetscObjectIncrementTabLevel((PetscObject)pcis->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); ierr = KSPSetOperators(pcis->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); ierr = KSPSetOptionsPrefix(pcis->ksp_D,"is_localD_");CHKERRQ(ierr); ierr = KSPGetPC(pcis->ksp_D,&pc_ctx);CHKERRQ(ierr); ierr = PCSetType(pc_ctx,PCLU);CHKERRQ(ierr); ierr = KSPSetType(pcis->ksp_D,KSPPREONLY);CHKERRQ(ierr); ierr = KSPSetFromOptions(pcis->ksp_D);CHKERRQ(ierr); /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */ ierr = KSPSetUp(pcis->ksp_D);CHKERRQ(ierr); /* Neumann */ ierr = KSPCreate(PETSC_COMM_SELF,&pcis->ksp_N);CHKERRQ(ierr); ierr = PetscObjectIncrementTabLevel((PetscObject)pcis->ksp_N,(PetscObject)pc,1);CHKERRQ(ierr); ierr = KSPSetOperators(pcis->ksp_N,matis->A,matis->A);CHKERRQ(ierr); ierr = KSPSetOptionsPrefix(pcis->ksp_N,"is_localN_");CHKERRQ(ierr); ierr = KSPGetPC(pcis->ksp_N,&pc_ctx);CHKERRQ(ierr); ierr = PCSetType(pc_ctx,PCLU);CHKERRQ(ierr); ierr = KSPSetType(pcis->ksp_N,KSPPREONLY);CHKERRQ(ierr); ierr = KSPSetFromOptions(pcis->ksp_N);CHKERRQ(ierr); { PetscBool damp_fixed = PETSC_FALSE, remove_nullspace_fixed = PETSC_FALSE, set_damping_factor_floating = PETSC_FALSE, not_damp_floating = PETSC_FALSE, not_remove_nullspace_floating = PETSC_FALSE; PetscReal fixed_factor, floating_factor; ierr = PetscOptionsGetReal(((PetscObject)pc_ctx)->prefix,"-pc_is_damp_fixed",&fixed_factor,&damp_fixed);CHKERRQ(ierr); if (!damp_fixed) fixed_factor = 0.0; ierr = PetscOptionsGetBool(((PetscObject)pc_ctx)->prefix,"-pc_is_damp_fixed",&damp_fixed,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(((PetscObject)pc_ctx)->prefix,"-pc_is_remove_nullspace_fixed",&remove_nullspace_fixed,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetReal(((PetscObject)pc_ctx)->prefix,"-pc_is_set_damping_factor_floating", &floating_factor,&set_damping_factor_floating);CHKERRQ(ierr); if (!set_damping_factor_floating) floating_factor = 0.0; ierr = PetscOptionsGetBool(((PetscObject)pc_ctx)->prefix,"-pc_is_set_damping_factor_floating",&set_damping_factor_floating,NULL);CHKERRQ(ierr); if (!set_damping_factor_floating) floating_factor = 1.e-12; ierr = PetscOptionsGetBool(((PetscObject)pc_ctx)->prefix,"-pc_is_not_damp_floating",¬_damp_floating,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(((PetscObject)pc_ctx)->prefix,"-pc_is_not_remove_nullspace_floating",¬_remove_nullspace_floating,NULL);CHKERRQ(ierr); if (pcis->pure_neumann) { /* floating subdomain */ if (!(not_damp_floating)) { ierr = PCFactorSetShiftType(pc_ctx,MAT_SHIFT_NONZERO);CHKERRQ(ierr); ierr = PCFactorSetShiftAmount(pc_ctx,floating_factor);CHKERRQ(ierr); } if (!(not_remove_nullspace_floating)) { MatNullSpace nullsp; ierr = MatNullSpaceCreate(PETSC_COMM_SELF,PETSC_TRUE,0,NULL,&nullsp);CHKERRQ(ierr); ierr = KSPSetNullSpace(pcis->ksp_N,nullsp);CHKERRQ(ierr); ierr = MatNullSpaceDestroy(&nullsp);CHKERRQ(ierr); } } else { /* fixed subdomain */ if (damp_fixed) { ierr = PCFactorSetShiftType(pc_ctx,MAT_SHIFT_NONZERO);CHKERRQ(ierr); ierr = PCFactorSetShiftAmount(pc_ctx,floating_factor);CHKERRQ(ierr); } if (remove_nullspace_fixed) { MatNullSpace nullsp; ierr = MatNullSpaceCreate(PETSC_COMM_SELF,PETSC_TRUE,0,NULL,&nullsp);CHKERRQ(ierr); ierr = KSPSetNullSpace(pcis->ksp_N,nullsp);CHKERRQ(ierr); ierr = MatNullSpaceDestroy(&nullsp);CHKERRQ(ierr); } } } /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */ ierr = KSPSetUp(pcis->ksp_N);CHKERRQ(ierr); } PetscFunctionReturn(0); }
/*@C PetscProcessTree - Prepares tree data to be displayed graphically Not Collective Input Parameters: + n - number of values . mask - indicates those entries in the tree, location 0 is always masked - parentid - indicates the parent of each entry Output Parameters: + Nlevels - the number of levels . Level - for each node tells its level . Levelcnts - the number of nodes on each level . Idbylevel - a list of ids on each of the levels, first level followed by second etc - Column - for each id tells its column index Level: developer Notes: This code is not currently used .seealso: PetscSortReal(), PetscSortIntWithPermutation() @*/ PetscErrorCode PetscProcessTree(PetscInt n,const PetscBool mask[],const PetscInt parentid[],PetscInt *Nlevels,PetscInt **Level,PetscInt **Levelcnt,PetscInt **Idbylevel,PetscInt **Column) { PetscInt i,j,cnt,nmask = 0,nlevels = 0,*level,*levelcnt,levelmax = 0,*workid,*workparentid,tcnt = 0,*idbylevel,*column; PetscErrorCode ierr; PetscBool done = PETSC_FALSE; PetscFunctionBegin; if (!mask[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mask of 0th location must be set"); for (i=0; i<n; i++) { if (mask[i]) continue; if (parentid[i] == i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Node labeled as own parent"); if (parentid[i] && mask[parentid[i]]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Parent is masked"); } for (i=0; i<n; i++) { if (!mask[i]) nmask++; } /* determine the level in the tree of each node */ ierr = PetscCalloc1(n,&level);CHKERRQ(ierr); level[0] = 1; while (!done) { done = PETSC_TRUE; for (i=0; i<n; i++) { if (mask[i]) continue; if (!level[i] && level[parentid[i]]) level[i] = level[parentid[i]] + 1; else if (!level[i]) done = PETSC_FALSE; } } for (i=0; i<n; i++) { level[i]--; nlevels = PetscMax(nlevels,level[i]); } /* count the number of nodes on each level and its max */ ierr = PetscCalloc1(nlevels,&levelcnt);CHKERRQ(ierr); for (i=0; i<n; i++) { if (mask[i]) continue; levelcnt[level[i]-1]++; } for (i=0; i<nlevels;i++) levelmax = PetscMax(levelmax,levelcnt[i]); /* for each level sort the ids by the parent id */ ierr = PetscMalloc2(levelmax,&workid,levelmax,&workparentid);CHKERRQ(ierr); ierr = PetscMalloc1(nmask,&idbylevel);CHKERRQ(ierr); for (j=1; j<=nlevels;j++) { cnt = 0; for (i=0; i<n; i++) { if (mask[i]) continue; if (level[i] != j) continue; workid[cnt] = i; workparentid[cnt++] = parentid[i]; } /* PetscIntView(cnt,workparentid,0); PetscIntView(cnt,workid,0); ierr = PetscSortIntWithArray(cnt,workparentid,workid);CHKERRQ(ierr); PetscIntView(cnt,workparentid,0); PetscIntView(cnt,workid,0);*/ ierr = PetscMemcpy(idbylevel+tcnt,workid,cnt*sizeof(PetscInt));CHKERRQ(ierr); tcnt += cnt; } if (tcnt != nmask) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inconsistent count of unmasked nodes"); ierr = PetscFree2(workid,workparentid);CHKERRQ(ierr); /* for each node list its column */ ierr = PetscMalloc1(n,&column);CHKERRQ(ierr); cnt = 0; for (j=0; j<nlevels; j++) { for (i=0; i<levelcnt[j]; i++) { column[idbylevel[cnt++]] = i; } } *Nlevels = nlevels; *Level = level; *Levelcnt = levelcnt; *Idbylevel = idbylevel; *Column = column; PetscFunctionReturn(0); }
/* Takes the local part of an already assembled MPIAIJ matrix and disassembles it. This is to allow new nonzeros into the matrix that require more communication in the matrix vector multiply. Thus certain data-structures must be rebuilt. Kind of slow! But that's what application programmers get when they are sloppy. */ PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) { Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; Mat B = aij->B,Bnew; Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; PetscErrorCode ierr; PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; PetscScalar v; PetscFunctionBegin; /* free stuff related to matrix-vec multiply */ ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); if (aij->colmap) { #if defined(PETSC_USE_CTABLE) ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); #else ierr = PetscFree(aij->colmap);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); #endif } /* make sure that B is assembled so we can access its values */ ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* invent new B and copy stuff over */ ierr = PetscMalloc1(m+1,&nz);CHKERRQ(ierr); for (i=0; i<m; i++) { nz[i] = Baij->i[i+1] - Baij->i[i]; } ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); ierr = MatSetBlockSizesFromMats(Bnew,A,A);CHKERRQ(ierr); ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */ ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; } /* Ensure that B's nonzerostate is monotonically increasing. Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? */ Bnew->nonzerostate = B->nonzerostate; ierr = PetscFree(nz);CHKERRQ(ierr); for (i=0; i<m; i++) { for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { col = garray[Baij->j[ct]]; v = Baij->a[ct++]; ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); } } ierr = PetscFree(aij->garray);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); ierr = MatDestroy(&B);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); aij->B = Bnew; A->was_assembled = PETSC_FALSE; PetscFunctionReturn(0); }
/* * Increase overlap for the sub-matrix across sub communicator * sub-matrix could be a graph or numerical matrix * */ PetscErrorCode MatIncreaseOverlapSplit_Single(Mat mat,IS *is,PetscInt ov) { PetscInt i,nindx,*indices_sc,*indices_ov,localsize,*localsizes_sc,localsize_tmp; PetscInt *indices_ov_rd,nroots,nleaves,*localoffsets,*indices_recv,*sources_sc,*sources_sc_rd; const PetscInt *indices; PetscMPIInt srank,ssize,issamecomm,k,grank; IS is_sc,allis_sc,partitioning; MPI_Comm gcomm,dcomm,scomm; PetscSF sf; PetscSFNode *remote; Mat *smat; MatPartitioning part; PetscErrorCode ierr; PetscFunctionBegin; /* get a sub communicator before call individual MatIncreaseOverlap * since the sub communicator may be changed. * */ ierr = PetscObjectGetComm((PetscObject)(*is),&dcomm);CHKERRQ(ierr); /*make a copy before the original one is deleted*/ ierr = PetscCommDuplicate(dcomm,&scomm,NULL);CHKERRQ(ierr); /*get a global communicator, where mat should be a global matrix */ ierr = PetscObjectGetComm((PetscObject)mat,&gcomm);CHKERRQ(ierr); /*increase overlap on each individual subdomain*/ ierr = (*mat->ops->increaseoverlap)(mat,1,is,ov);CHKERRQ(ierr); /*compare communicators */ ierr = MPI_Comm_compare(gcomm,scomm,&issamecomm);CHKERRQ(ierr); /* if the sub-communicator is the same as the global communicator, * user does not want to use a sub-communicator * */ if(issamecomm == MPI_IDENT || issamecomm == MPI_CONGRUENT) PetscFunctionReturn(0); /* if the sub-communicator is petsc_comm_self, * user also does not care the sub-communicator * */ ierr = MPI_Comm_compare(scomm,PETSC_COMM_SELF,&issamecomm);CHKERRQ(ierr); if(issamecomm == MPI_IDENT || issamecomm == MPI_CONGRUENT){PetscFunctionReturn(0);} /*local rank, size in a sub-communicator */ ierr = MPI_Comm_rank(scomm,&srank);CHKERRQ(ierr); ierr = MPI_Comm_size(scomm,&ssize);CHKERRQ(ierr); ierr = MPI_Comm_rank(gcomm,&grank);CHKERRQ(ierr); /*create a new IS based on sub-communicator * since the old IS is often based on petsc_comm_self * */ ierr = ISGetLocalSize(*is,&nindx);CHKERRQ(ierr); ierr = PetscCalloc1(nindx,&indices_sc);CHKERRQ(ierr); ierr = ISGetIndices(*is,&indices);CHKERRQ(ierr); ierr = PetscMemcpy(indices_sc,indices,sizeof(PetscInt)*nindx);CHKERRQ(ierr); ierr = ISRestoreIndices(*is,&indices);CHKERRQ(ierr); /*we do not need any more*/ ierr = ISDestroy(is);CHKERRQ(ierr); /*create a index set based on the sub communicator */ ierr = ISCreateGeneral(scomm,nindx,indices_sc,PETSC_OWN_POINTER,&is_sc);CHKERRQ(ierr); /*gather all indices within the sub communicator*/ ierr = ISAllGather(is_sc,&allis_sc);CHKERRQ(ierr); ierr = ISDestroy(&is_sc);CHKERRQ(ierr); /* gather local sizes */ ierr = PetscMalloc1(ssize,&localsizes_sc);CHKERRQ(ierr); /*get individual local sizes for all index sets*/ ierr = MPI_Gather(&nindx,1,MPIU_INT,localsizes_sc,1,MPIU_INT,0,scomm);CHKERRQ(ierr); /*only root does these computations */ if(!srank){ /*get local size for the big index set*/ ierr = ISGetLocalSize(allis_sc,&localsize);CHKERRQ(ierr); ierr = PetscCalloc2(localsize,&indices_ov,localsize,&sources_sc);CHKERRQ(ierr); ierr = PetscCalloc2(localsize,&indices_ov_rd,localsize,&sources_sc_rd);CHKERRQ(ierr); ierr = ISGetIndices(allis_sc,&indices);CHKERRQ(ierr); ierr = PetscMemcpy(indices_ov,indices,sizeof(PetscInt)*localsize);CHKERRQ(ierr); ierr = ISRestoreIndices(allis_sc,&indices);CHKERRQ(ierr); /*we do not need it any more */ ierr = ISDestroy(&allis_sc);CHKERRQ(ierr); /*assign corresponding sources */ localsize_tmp = 0; for(k=0; k<ssize; k++){ for(i=0; i<localsizes_sc[k]; i++){ sources_sc[localsize_tmp++] = k; } } /*record where indices come from */ ierr = PetscSortIntWithArray(localsize,indices_ov,sources_sc);CHKERRQ(ierr); /*count local sizes for reduced indices */ ierr = PetscMemzero(localsizes_sc,sizeof(PetscInt)*ssize);CHKERRQ(ierr); /*initialize the first entity*/ if(localsize){ indices_ov_rd[0] = indices_ov[0]; sources_sc_rd[0] = sources_sc[0]; localsizes_sc[sources_sc[0]]++; } localsize_tmp = 1; /*remove duplicate integers */ for(i=1; i<localsize; i++){ if(indices_ov[i] != indices_ov[i-1]){ indices_ov_rd[localsize_tmp] = indices_ov[i]; sources_sc_rd[localsize_tmp++] = sources_sc[i]; localsizes_sc[sources_sc[i]]++; } } ierr = PetscFree2(indices_ov,sources_sc);CHKERRQ(ierr); ierr = PetscCalloc1(ssize+1,&localoffsets);CHKERRQ(ierr); for(k=0; k<ssize; k++){ localoffsets[k+1] = localoffsets[k] + localsizes_sc[k]; } /*construct a star forest to send data back */ nleaves = localoffsets[ssize]; ierr = PetscMemzero(localoffsets,(ssize+1)*sizeof(PetscInt));CHKERRQ(ierr); nroots = localsizes_sc[srank]; ierr = PetscCalloc1(nleaves,&remote);CHKERRQ(ierr); for(i=0; i<nleaves; i++){ remote[i].rank = sources_sc_rd[i]; remote[i].index = localoffsets[sources_sc_rd[i]]++; } ierr = PetscFree(localoffsets);CHKERRQ(ierr); }else{ ierr = ISDestroy(&allis_sc);CHKERRQ(ierr); /*Allocate a 'zero' pointer */ ierr = PetscCalloc1(0,&remote);CHKERRQ(ierr); nleaves = 0; indices_ov_rd = 0; sources_sc_rd = 0; } /*scatter sizes to everybody */ ierr = MPI_Scatter(localsizes_sc,1, MPIU_INT,&nroots,1, MPIU_INT,0,scomm);CHKERRQ(ierr); /*free memory */ ierr = PetscFree(localsizes_sc);CHKERRQ(ierr); ierr = PetscCalloc1(nroots,&indices_recv);CHKERRQ(ierr); /*ierr = MPI_Comm_dup(scomm,&dcomm);CHKERRQ(ierr);*/ /*set data back to every body */ ierr = PetscSFCreate(scomm,&sf);CHKERRQ(ierr); ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr); ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,nroots,nleaves,PETSC_NULL,PETSC_OWN_POINTER,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); ierr = PetscSFReduceBegin(sf,MPIU_INT,indices_ov_rd,indices_recv,MPIU_REPLACE);CHKERRQ(ierr); ierr = PetscSFReduceEnd(sf,MPIU_INT,indices_ov_rd,indices_recv,MPIU_REPLACE);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); /* free memory */ ierr = PetscFree2(indices_ov_rd,sources_sc_rd);CHKERRQ(ierr); /*create a index set*/ ierr = ISCreateGeneral(scomm,nroots,indices_recv,PETSC_OWN_POINTER,&is_sc);CHKERRQ(ierr); /*construct a parallel submatrix */ ierr = MatGetSubMatricesMPI(mat,1,&is_sc,&is_sc,MAT_INITIAL_MATRIX,&smat);CHKERRQ(ierr); /* we do not need them any more */ ierr = ISDestroy(&allis_sc);CHKERRQ(ierr); /*create a partitioner to repartition the sub-matrix*/ ierr = MatPartitioningCreate(scomm,&part);CHKERRQ(ierr); ierr = MatPartitioningSetAdjacency(part,smat[0]);CHKERRQ(ierr); #if PETSC_HAVE_PARMETIS /* if there exists a ParMETIS installation, we try to use ParMETIS * because a repartition routine possibly work better * */ ierr = MatPartitioningSetType(part,MATPARTITIONINGPARMETIS);CHKERRQ(ierr); /*try to use reparition function, instead of partition function */ ierr = MatPartitioningParmetisSetRepartition(part);CHKERRQ(ierr); #else /*we at least provide a default partitioner to rebalance the computation */ ierr = MatPartitioningSetType(part,MATPARTITIONINGAVERAGE);CHKERRQ(ierr); #endif /*user can pick up any partitioner by using an option*/ ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr); /* apply partition */ ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr); ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr); ierr = MatDestroy(&(smat[0]));CHKERRQ(ierr); ierr = PetscFree(smat);CHKERRQ(ierr); /* get local rows including overlap */ ierr = ISBuildTwoSided(partitioning,is_sc,is);CHKERRQ(ierr); /* destroy */ ierr = ISDestroy(&is_sc);CHKERRQ(ierr); ierr = ISDestroy(&partitioning);CHKERRQ(ierr); ierr = PetscCommDestroy(&scomm);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* Makes the communication map symmetric */ PetscErrorCode _DataExCompleteCommunicationMap(MPI_Comm comm,PetscMPIInt n,PetscMPIInt proc_neighbours[],PetscMPIInt *n_new,PetscMPIInt **proc_neighbours_new) { Mat A; PetscInt i,j,nc; PetscInt n_, *proc_neighbours_; PetscInt rank_i_; PetscMPIInt size, rank_i; PetscScalar *vals; const PetscInt *cols; const PetscScalar *red_vals; PetscMPIInt _n_new, *_proc_neighbours_new; PetscErrorCode ierr; PetscFunctionBegin; n_ = n; ierr = PetscMalloc(sizeof(PetscInt) * n_, &proc_neighbours_);CHKERRQ(ierr); for (i = 0; i < n_; ++i) { proc_neighbours_[i] = proc_neighbours[i]; } ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank_i);CHKERRQ(ierr); rank_i_ = rank_i; ierr = MatCreate(comm,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,size,size);CHKERRQ(ierr); ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(A,1,NULL);CHKERRQ(ierr); ierr = MatMPIAIJSetPreallocation(A,n_,NULL,n_,NULL);CHKERRQ(ierr); ierr = MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);CHKERRQ(ierr); /* Build original map */ ierr = PetscMalloc1(n_, &vals);CHKERRQ(ierr); for (i = 0; i < n_; ++i) { vals[i] = 1.0; } ierr = MatSetValues( A, 1,&rank_i_, n_,proc_neighbours_, vals, INSERT_VALUES );CHKERRQ(ierr); ierr = MatAssemblyBegin(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); /* Now force all other connections if they are not already there */ /* It's more efficient to do them all at once */ for (i = 0; i < n_; ++i) { vals[i] = 2.0; } ierr = MatSetValues( A, n_,proc_neighbours_, 1,&rank_i_, vals, INSERT_VALUES );CHKERRQ(ierr); ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr); ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */ if ((n_new != NULL) && (proc_neighbours_new != NULL)) { ierr = MatGetRow(A, rank_i_, &nc, &cols, &red_vals);CHKERRQ(ierr); _n_new = (PetscMPIInt) nc; ierr = PetscMalloc1(_n_new, &_proc_neighbours_new);CHKERRQ(ierr); for (j = 0; j < nc; ++j) { _proc_neighbours_new[j] = (PetscMPIInt)cols[j]; } ierr = MatRestoreRow( A, rank_i_, &nc, &cols, &red_vals );CHKERRQ(ierr); *n_new = (PetscMPIInt)_n_new; *proc_neighbours_new = (PetscMPIInt*)_proc_neighbours_new; } ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = PetscFree(vals);CHKERRQ(ierr); ierr = PetscFree(proc_neighbours_);CHKERRQ(ierr); ierr = MPI_Barrier(comm);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode VecStashScatterBegin_Private(VecStash *stash,PetscInt *owners) { PetscErrorCode ierr; PetscMPIInt size = stash->size,tag1=stash->tag1,tag2=stash->tag2; PetscInt *owner,*start,*nprocs,nsends,nreceives; PetscInt nmax,count,*sindices,*rindices,i,j,idx,bs=stash->bs,lastidx; PetscScalar *rvalues,*svalues; MPI_Comm comm = stash->comm; MPI_Request *send_waits,*recv_waits; PetscFunctionBegin; /* first count number of contributors to each processor */ ierr = PetscCalloc1(2*size,&nprocs);CHKERRQ(ierr); ierr = PetscMalloc1(stash->n,&owner);CHKERRQ(ierr); j = 0; lastidx = -1; for (i=0; i<stash->n; i++) { /* if indices are NOT locally sorted, need to start search at the beginning */ if (lastidx > (idx = stash->idx[i])) j = 0; lastidx = idx; for (; j<size; j++) { if (idx >= owners[j] && idx < owners[j+1]) { nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break; } } } nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; /* inform other processors of number of messages and max length*/ ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr); /* post receives: since we don't know how long each individual message is we allocate the largest needed buffer for each receive. Potentially this is a lot of wasted space. */ ierr = PetscMalloc2(nreceives*nmax*bs,&rvalues,nreceives*nmax,&rindices);CHKERRQ(ierr); ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); for (i=0,count=0; i<nreceives; i++) { ierr = MPI_Irecv(rvalues+bs*nmax*i,bs*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRQ(ierr); ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);CHKERRQ(ierr); } /* do sends: 1) starts[i] gives the starting index in svalues for stuff going to the ith processor */ ierr = PetscMalloc2(stash->n*bs,&svalues,stash->n,&sindices);CHKERRQ(ierr); ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); ierr = PetscMalloc1(size,&start);CHKERRQ(ierr); /* use 2 sends the first with all_v, the next with all_i */ start[0] = 0; for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2]; for (i=0; i<stash->n; i++) { j = owner[i]; if (bs == 1) svalues[start[j]] = stash->array[i]; else { ierr = PetscMemcpy(svalues+bs*start[j],stash->array+bs*i,bs*sizeof(PetscScalar));CHKERRQ(ierr); } sindices[start[j]] = stash->idx[i]; start[j]++; } start[0] = 0; for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2]; for (i=0,count=0; i<size; i++) { if (nprocs[2*i+1]) { ierr = MPI_Isend(svalues+bs*start[i],bs*nprocs[2*i],MPIU_SCALAR,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); ierr = MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); } } ierr = PetscFree(owner);CHKERRQ(ierr); ierr = PetscFree(start);CHKERRQ(ierr); /* This memory is reused in scatter end for a different purpose*/ for (i=0; i<2*size; i++) nprocs[i] = -1; stash->nprocs = nprocs; stash->svalues = svalues; stash->sindices = sindices; stash->rvalues = rvalues; stash->rindices = rindices; stash->nsends = nsends; stash->nrecvs = nreceives; stash->send_waits = send_waits; stash->recv_waits = recv_waits; stash->rmax = nmax; PetscFunctionReturn(0); }
static PetscErrorCode MatPartitioningApply_PTScotch_Private(MatPartitioning part, PetscBool useND, IS *partitioning) { MPI_Comm pcomm,comm; MatPartitioning_PTScotch *scotch = (MatPartitioning_PTScotch*)part->data; PetscErrorCode ierr; PetscMPIInt rank; Mat mat = part->adj; Mat_MPIAdj *adj = (Mat_MPIAdj*)mat->data; PetscBool flg,distributed; PetscBool proc_weight_flg; PetscInt i,j,p,bs=1,nold; PetscInt *NDorder = NULL; PetscReal *vwgttab,deltval; SCOTCH_Num *locals,*velotab,*veloloctab,*edloloctab,vertlocnbr,edgelocnbr,nparts=part->n; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)part,&pcomm);CHKERRQ(ierr); /* Duplicate the communicator to be sure that PTSCOTCH attribute caching does not interfere with PETSc. */ ierr = MPI_Comm_dup(pcomm,&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIADJ,&flg);CHKERRQ(ierr); if (!flg) { /* bs indicates if the converted matrix is "reduced" from the original and hence the resulting partition results need to be stretched to match the original matrix */ nold = mat->rmap->n; ierr = MatConvert(mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat);CHKERRQ(ierr); if (mat->rmap->n > 0) bs = nold/mat->rmap->n; adj = (Mat_MPIAdj*)mat->data; } proc_weight_flg = PETSC_TRUE; ierr = PetscOptionsGetBool(NULL, NULL, "-mat_partitioning_ptscotch_proc_weight", &proc_weight_flg, NULL);CHKERRQ(ierr); ierr = PetscMalloc1(mat->rmap->n+1,&locals);CHKERRQ(ierr); if (useND) { #if defined(PETSC_HAVE_SCOTCH_PARMETIS_V3_NODEND) PetscInt *sizes, *seps, log2size, subd, *level, base = 0; PetscMPIInt size; ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); log2size = PetscLog2Real(size); subd = PetscPowInt(2,log2size); if (subd != size) SETERRQ(comm,PETSC_ERR_SUP,"Only power of 2 communicator sizes"); ierr = PetscMalloc1(mat->rmap->n,&NDorder);CHKERRQ(ierr); ierr = PetscMalloc3(2*size,&sizes,4*size,&seps,size,&level);CHKERRQ(ierr); SCOTCH_ParMETIS_V3_NodeND(mat->rmap->range,adj->i,adj->j,&base,NULL,NDorder,sizes,&comm); ierr = MatPartitioningSizesToSep_Private(subd,sizes,seps,level);CHKERRQ(ierr); for (i=0;i<mat->rmap->n;i++) { PetscInt loc; ierr = PetscFindInt(NDorder[i],2*subd,seps,&loc);CHKERRQ(ierr); if (loc < 0) { loc = -(loc+1); if (loc%2) { /* part of subdomain */ locals[i] = loc/2; } else { ierr = PetscFindInt(NDorder[i],2*(subd-1),seps+2*subd,&loc);CHKERRQ(ierr); loc = loc < 0 ? -(loc+1)/2 : loc/2; locals[i] = level[loc]; } } else locals[i] = loc/2; } ierr = PetscFree3(sizes,seps,level);CHKERRQ(ierr); #else SETERRQ(pcomm,PETSC_ERR_SUP,"Need libptscotchparmetis.a compiled with -DSCOTCH_METIS_PREFIX"); #endif } else { velotab = NULL; if (proc_weight_flg) { ierr = PetscMalloc1(nparts,&vwgttab);CHKERRQ(ierr); ierr = PetscMalloc1(nparts,&velotab);CHKERRQ(ierr); for (j=0; j<nparts; j++) { if (part->part_weights) vwgttab[j] = part->part_weights[j]*nparts; else vwgttab[j] = 1.0; } for (i=0; i<nparts; i++) { deltval = PetscAbsReal(vwgttab[i]-PetscFloorReal(vwgttab[i]+0.5)); if (deltval>0.01) { for (j=0; j<nparts; j++) vwgttab[j] /= deltval; } } for (i=0; i<nparts; i++) velotab[i] = (SCOTCH_Num)(vwgttab[i] + 0.5); ierr = PetscFree(vwgttab);CHKERRQ(ierr); } vertlocnbr = mat->rmap->range[rank+1] - mat->rmap->range[rank]; edgelocnbr = adj->i[vertlocnbr]; veloloctab = part->vertex_weights; edloloctab = adj->values; /* detect whether all vertices are located at the same process in original graph */ for (p = 0; !mat->rmap->range[p+1] && p < nparts; ++p); distributed = (mat->rmap->range[p+1] == mat->rmap->N) ? PETSC_FALSE : PETSC_TRUE; if (distributed) { SCOTCH_Arch archdat; SCOTCH_Dgraph grafdat; SCOTCH_Dmapping mappdat; SCOTCH_Strat stradat; ierr = SCOTCH_dgraphInit(&grafdat,comm);CHKERRQ(ierr); ierr = SCOTCH_dgraphBuild(&grafdat,0,vertlocnbr,vertlocnbr,adj->i,adj->i+1,veloloctab, NULL,edgelocnbr,edgelocnbr,adj->j,NULL,edloloctab);CHKERRQ(ierr); #if defined(PETSC_USE_DEBUG) ierr = SCOTCH_dgraphCheck(&grafdat);CHKERRQ(ierr); #endif ierr = SCOTCH_archInit(&archdat);CHKERRQ(ierr); ierr = SCOTCH_stratInit(&stradat);CHKERRQ(ierr); ierr = SCOTCH_stratDgraphMapBuild(&stradat,scotch->strategy,nparts,nparts,scotch->imbalance);CHKERRQ(ierr); if (velotab) { ierr = SCOTCH_archCmpltw(&archdat,nparts,velotab);CHKERRQ(ierr); } else { ierr = SCOTCH_archCmplt( &archdat,nparts);CHKERRQ(ierr); } ierr = SCOTCH_dgraphMapInit(&grafdat,&mappdat,&archdat,locals);CHKERRQ(ierr); ierr = SCOTCH_dgraphMapCompute(&grafdat,&mappdat,&stradat);CHKERRQ(ierr); SCOTCH_dgraphMapExit(&grafdat,&mappdat); SCOTCH_archExit(&archdat); SCOTCH_stratExit(&stradat); SCOTCH_dgraphExit(&grafdat); } else if (rank == p) { SCOTCH_Graph grafdat; SCOTCH_Strat stradat; ierr = SCOTCH_graphInit(&grafdat);CHKERRQ(ierr); ierr = SCOTCH_graphBuild(&grafdat,0,vertlocnbr,adj->i,adj->i+1,veloloctab,NULL,edgelocnbr,adj->j,edloloctab);CHKERRQ(ierr); #if defined(PETSC_USE_DEBUG) ierr = SCOTCH_graphCheck(&grafdat);CHKERRQ(ierr); #endif ierr = SCOTCH_stratInit(&stradat);CHKERRQ(ierr); ierr = SCOTCH_stratGraphMapBuild(&stradat,scotch->strategy,nparts,scotch->imbalance);CHKERRQ(ierr); ierr = SCOTCH_graphPart(&grafdat,nparts,&stradat,locals);CHKERRQ(ierr); SCOTCH_stratExit(&stradat); SCOTCH_graphExit(&grafdat); } ierr = PetscFree(velotab);CHKERRQ(ierr); } ierr = MPI_Comm_free(&comm);CHKERRQ(ierr); if (bs > 1) { PetscInt *newlocals; ierr = PetscMalloc1(bs*mat->rmap->n,&newlocals);CHKERRQ(ierr); for (i=0;i<mat->rmap->n;i++) { for (j=0;j<bs;j++) { newlocals[bs*i+j] = locals[i]; } } ierr = PetscFree(locals);CHKERRQ(ierr); ierr = ISCreateGeneral(pcomm,bs*mat->rmap->n,newlocals,PETSC_OWN_POINTER,partitioning);CHKERRQ(ierr); } else { ierr = ISCreateGeneral(pcomm,mat->rmap->n,locals,PETSC_OWN_POINTER,partitioning);CHKERRQ(ierr); } if (useND) { IS ndis; if (bs > 1) { ierr = ISCreateBlock(pcomm,bs,mat->rmap->n,NDorder,PETSC_OWN_POINTER,&ndis);CHKERRQ(ierr); } else { ierr = ISCreateGeneral(pcomm,mat->rmap->n,NDorder,PETSC_OWN_POINTER,&ndis);CHKERRQ(ierr); } ierr = ISSetPermutation(ndis);CHKERRQ(ierr); ierr = PetscObjectCompose((PetscObject)(*partitioning),"_petsc_matpartitioning_ndorder",(PetscObject)ndis);CHKERRQ(ierr); ierr = ISDestroy(&ndis);CHKERRQ(ierr); } if (!flg) { ierr = MatDestroy(&mat);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode DMPlexPreallocateOperator(DM dm, PetscInt bs, PetscSection section, PetscSection sectionGlobal, PetscInt dnz[], PetscInt onz[], PetscInt dnzu[], PetscInt onzu[], Mat A, PetscBool fillMatrix) { MPI_Comm comm; MatType mtype; PetscSF sf, sfDof, sfAdj; PetscSection leafSectionAdj, rootSectionAdj, sectionAdj, anchorSectionAdj; PetscInt nroots, nleaves, l, p; const PetscInt *leaves; const PetscSFNode *remotes; PetscInt dim, pStart, pEnd, numDof, globalOffStart, globalOffEnd, numCols; PetscInt *tmpAdj = NULL, *adj, *rootAdj, *anchorAdj = NULL, *cols, *remoteOffsets; PetscInt adjSize; PetscLayout rLayout; PetscInt locRows, rStart, rEnd, r; PetscMPIInt size; PetscBool doCommLocal, doComm, debug = PETSC_FALSE, isSymBlock, isSymSeqBlock, isSymMPIBlock; PetscBool useAnchors; PetscErrorCode ierr; PetscFunctionBegin; PetscValidHeaderSpecific(dm, DM_CLASSID, 1); PetscValidHeaderSpecific(section, PETSC_SECTION_CLASSID, 3); PetscValidHeaderSpecific(sectionGlobal, PETSC_SECTION_CLASSID, 4); PetscValidHeaderSpecific(A, MAT_CLASSID, 9); if (dnz) PetscValidPointer(dnz,5); if (onz) PetscValidPointer(onz,6); if (dnzu) PetscValidPointer(dnzu,7); if (onzu) PetscValidPointer(onzu,8); ierr = PetscLogEventBegin(DMPLEX_Preallocate,dm,0,0,0);CHKERRQ(ierr); ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL, "-dm_view_preallocation", &debug, NULL);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr); ierr = DMGetPointSF(dm, &sf);CHKERRQ(ierr); ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); doCommLocal = (size > 1) && (nroots >= 0) ? PETSC_TRUE : PETSC_FALSE; ierr = MPI_Allreduce(&doCommLocal, &doComm, 1, MPIU_BOOL, MPI_LAND, comm);CHKERRQ(ierr); /* Create dof SF based on point SF */ if (debug) { ierr = PetscPrintf(comm, "Input Section for Preallocation:\n");CHKERRQ(ierr); ierr = PetscSectionView(section, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(comm, "Input Global Section for Preallocation:\n");CHKERRQ(ierr); ierr = PetscSectionView(sectionGlobal, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(comm, "Input SF for Preallocation:\n");CHKERRQ(ierr); ierr = PetscSFView(sf, NULL);CHKERRQ(ierr); } ierr = PetscSFCreateRemoteOffsets(sf, section, section, &remoteOffsets);CHKERRQ(ierr); ierr = PetscSFCreateSectionSF(sf, section, remoteOffsets, section, &sfDof);CHKERRQ(ierr); if (debug) { ierr = PetscPrintf(comm, "Dof SF for Preallocation:\n");CHKERRQ(ierr); ierr = PetscSFView(sfDof, NULL);CHKERRQ(ierr); } /* Create section for dof adjacency (dof ==> # adj dof) */ ierr = PetscSectionGetChart(section, &pStart, &pEnd);CHKERRQ(ierr); ierr = PetscSectionGetStorageSize(section, &numDof);CHKERRQ(ierr); ierr = PetscSectionCreate(comm, &leafSectionAdj);CHKERRQ(ierr); ierr = PetscSectionSetChart(leafSectionAdj, 0, numDof);CHKERRQ(ierr); ierr = PetscSectionCreate(comm, &rootSectionAdj);CHKERRQ(ierr); ierr = PetscSectionSetChart(rootSectionAdj, 0, numDof);CHKERRQ(ierr); /* Fill in the ghost dofs on the interface */ ierr = PetscSFGetGraph(sf, NULL, &nleaves, &leaves, &remotes);CHKERRQ(ierr); /* use constraints in finding adjacency in this routine */ ierr = DMPlexGetAdjacencyUseAnchors(dm,&useAnchors);CHKERRQ(ierr); ierr = DMPlexSetAdjacencyUseAnchors(dm,PETSC_TRUE);CHKERRQ(ierr); /* section - maps points to (# dofs, local dofs) sectionGlobal - maps points to (# dofs, global dofs) leafSectionAdj - maps unowned local dofs to # adj dofs rootSectionAdj - maps owned local dofs to # adj dofs adj - adj global dofs indexed by leafSectionAdj rootAdj - adj global dofs indexed by rootSectionAdj sf - describes shared points across procs sfDof - describes shared dofs across procs sfAdj - describes shared adjacent dofs across procs ** The bootstrapping process involves six rounds with similar structure of visiting neighbors of each point. (0). If there are point-to-point constraints, add the adjacencies of constrained points to anchors in anchorAdj (This is done in DMPlexComputeAnchorAdjacencies()) 1. Visit unowned points on interface, count adjacencies placing in leafSectionAdj Reduce those counts to rootSectionAdj (now redundantly counting some interface points) 2. Visit owned points on interface, count adjacencies placing in rootSectionAdj Create sfAdj connecting rootSectionAdj and leafSectionAdj 3. Visit unowned points on interface, write adjacencies to adj Gather adj to rootAdj (note that there is redundancy in rootAdj when multiple procs find the same adjacencies) 4. Visit owned points on interface, write adjacencies to rootAdj Remove redundancy in rootAdj ** The last two traversals use transitive closure 5. Visit all owned points in the subdomain, count dofs for each point (sectionAdj) Allocate memory addressed by sectionAdj (cols) 6. Visit all owned points in the subdomain, insert dof adjacencies into cols ** Knowing all the column adjacencies, check ownership and sum into dnz and onz */ ierr = DMPlexComputeAnchorAdjacencies(dm,section,sectionGlobal,&anchorSectionAdj,&anchorAdj);CHKERRQ(ierr); for (l = 0; l < nleaves; ++l) { PetscInt dof, off, d, q, anDof; PetscInt p = leaves[l], numAdj = PETSC_DETERMINE; if ((p < pStart) || (p >= pEnd)) continue; ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr); ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr); for (q = 0; q < numAdj; ++q) { const PetscInt padj = tmpAdj[q]; PetscInt ndof, ncdof; if ((padj < pStart) || (padj >= pEnd)) continue; ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr); for (d = off; d < off+dof; ++d) { ierr = PetscSectionAddDof(leafSectionAdj, d, ndof-ncdof);CHKERRQ(ierr); } } ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr); if (anDof) { for (d = off; d < off+dof; ++d) { ierr = PetscSectionAddDof(leafSectionAdj, d, anDof);CHKERRQ(ierr); } } } ierr = PetscSectionSetUp(leafSectionAdj);CHKERRQ(ierr); if (debug) { ierr = PetscPrintf(comm, "Adjacency Section for Preallocation on Leaves:\n");CHKERRQ(ierr); ierr = PetscSectionView(leafSectionAdj, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* Get maximum remote adjacency sizes for owned dofs on interface (roots) */ if (doComm) { ierr = PetscSFReduceBegin(sfDof, MPIU_INT, leafSectionAdj->atlasDof, rootSectionAdj->atlasDof, MPI_SUM);CHKERRQ(ierr); ierr = PetscSFReduceEnd(sfDof, MPIU_INT, leafSectionAdj->atlasDof, rootSectionAdj->atlasDof, MPI_SUM);CHKERRQ(ierr); } if (debug) { ierr = PetscPrintf(comm, "Adjancency Section for Preallocation on Roots:\n");CHKERRQ(ierr); ierr = PetscSectionView(rootSectionAdj, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* Add in local adjacency sizes for owned dofs on interface (roots) */ for (p = pStart; p < pEnd; ++p) { PetscInt numAdj = PETSC_DETERMINE, adof, dof, off, d, q, anDof; ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr); if (!dof) continue; ierr = PetscSectionGetDof(rootSectionAdj, off, &adof);CHKERRQ(ierr); if (adof <= 0) continue; ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr); for (q = 0; q < numAdj; ++q) { const PetscInt padj = tmpAdj[q]; PetscInt ndof, ncdof; if ((padj < pStart) || (padj >= pEnd)) continue; ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr); for (d = off; d < off+dof; ++d) { ierr = PetscSectionAddDof(rootSectionAdj, d, ndof-ncdof);CHKERRQ(ierr); } } ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr); if (anDof) { for (d = off; d < off+dof; ++d) { ierr = PetscSectionAddDof(rootSectionAdj, d, anDof);CHKERRQ(ierr); } } } ierr = PetscSectionSetUp(rootSectionAdj);CHKERRQ(ierr); if (debug) { ierr = PetscPrintf(comm, "Adjancency Section for Preallocation on Roots after local additions:\n");CHKERRQ(ierr); ierr = PetscSectionView(rootSectionAdj, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* Create adj SF based on dof SF */ ierr = PetscSFCreateRemoteOffsets(sfDof, rootSectionAdj, leafSectionAdj, &remoteOffsets);CHKERRQ(ierr); ierr = PetscSFCreateSectionSF(sfDof, rootSectionAdj, remoteOffsets, leafSectionAdj, &sfAdj);CHKERRQ(ierr); if (debug) { ierr = PetscPrintf(comm, "Adjacency SF for Preallocation:\n");CHKERRQ(ierr); ierr = PetscSFView(sfAdj, NULL);CHKERRQ(ierr); } ierr = PetscSFDestroy(&sfDof);CHKERRQ(ierr); /* Create leaf adjacency */ ierr = PetscSectionSetUp(leafSectionAdj);CHKERRQ(ierr); ierr = PetscSectionGetStorageSize(leafSectionAdj, &adjSize);CHKERRQ(ierr); ierr = PetscCalloc1(adjSize, &adj);CHKERRQ(ierr); for (l = 0; l < nleaves; ++l) { PetscInt dof, off, d, q, anDof, anOff; PetscInt p = leaves[l], numAdj = PETSC_DETERMINE; if ((p < pStart) || (p >= pEnd)) continue; ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr); ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr); ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(anchorSectionAdj, p, &anOff);CHKERRQ(ierr); for (d = off; d < off+dof; ++d) { PetscInt aoff, i = 0; ierr = PetscSectionGetOffset(leafSectionAdj, d, &aoff);CHKERRQ(ierr); for (q = 0; q < numAdj; ++q) { const PetscInt padj = tmpAdj[q]; PetscInt ndof, ncdof, ngoff, nd; if ((padj < pStart) || (padj >= pEnd)) continue; ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionGlobal, padj, &ngoff);CHKERRQ(ierr); for (nd = 0; nd < ndof-ncdof; ++nd) { adj[aoff+i] = (ngoff < 0 ? -(ngoff+1) : ngoff) + nd; ++i; } } for (q = 0; q < anDof; q++) { adj[aoff+i] = anchorAdj[anOff+q]; ++i; } } } /* Debugging */ if (debug) { IS tmp; ierr = PetscPrintf(comm, "Leaf adjacency indices\n");CHKERRQ(ierr); ierr = ISCreateGeneral(comm, adjSize, adj, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr); ierr = ISView(tmp, NULL);CHKERRQ(ierr); ierr = ISDestroy(&tmp);CHKERRQ(ierr); } /* Gather adjacenct indices to root */ ierr = PetscSectionGetStorageSize(rootSectionAdj, &adjSize);CHKERRQ(ierr); ierr = PetscMalloc1(adjSize, &rootAdj);CHKERRQ(ierr); for (r = 0; r < adjSize; ++r) rootAdj[r] = -1; if (doComm) { ierr = PetscSFGatherBegin(sfAdj, MPIU_INT, adj, rootAdj);CHKERRQ(ierr); ierr = PetscSFGatherEnd(sfAdj, MPIU_INT, adj, rootAdj);CHKERRQ(ierr); } ierr = PetscSFDestroy(&sfAdj);CHKERRQ(ierr); ierr = PetscFree(adj);CHKERRQ(ierr); /* Debugging */ if (debug) { IS tmp; ierr = PetscPrintf(comm, "Root adjacency indices after gather\n");CHKERRQ(ierr); ierr = ISCreateGeneral(comm, adjSize, rootAdj, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr); ierr = ISView(tmp, NULL);CHKERRQ(ierr); ierr = ISDestroy(&tmp);CHKERRQ(ierr); } /* Add in local adjacency indices for owned dofs on interface (roots) */ for (p = pStart; p < pEnd; ++p) { PetscInt numAdj = PETSC_DETERMINE, adof, dof, off, d, q, anDof, anOff; ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr); if (!dof) continue; ierr = PetscSectionGetDof(rootSectionAdj, off, &adof);CHKERRQ(ierr); if (adof <= 0) continue; ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr); ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(anchorSectionAdj, p, &anOff);CHKERRQ(ierr); for (d = off; d < off+dof; ++d) { PetscInt adof, aoff, i; ierr = PetscSectionGetDof(rootSectionAdj, d, &adof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(rootSectionAdj, d, &aoff);CHKERRQ(ierr); i = adof-1; for (q = 0; q < anDof; q++) { rootAdj[aoff+i] = anchorAdj[anOff+q]; --i; } for (q = 0; q < numAdj; ++q) { const PetscInt padj = tmpAdj[q]; PetscInt ndof, ncdof, ngoff, nd; if ((padj < pStart) || (padj >= pEnd)) continue; ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionGlobal, padj, &ngoff);CHKERRQ(ierr); for (nd = 0; nd < ndof-ncdof; ++nd) { rootAdj[aoff+i] = ngoff < 0 ? -(ngoff+1)+nd : ngoff+nd; --i; } } } } /* Debugging */ if (debug) { IS tmp; ierr = PetscPrintf(comm, "Root adjacency indices\n");CHKERRQ(ierr); ierr = ISCreateGeneral(comm, adjSize, rootAdj, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr); ierr = ISView(tmp, NULL);CHKERRQ(ierr); ierr = ISDestroy(&tmp);CHKERRQ(ierr); } /* Compress indices */ ierr = PetscSectionSetUp(rootSectionAdj);CHKERRQ(ierr); for (p = pStart; p < pEnd; ++p) { PetscInt dof, cdof, off, d; PetscInt adof, aoff; ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, p, &cdof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr); if (!dof) continue; ierr = PetscSectionGetDof(rootSectionAdj, off, &adof);CHKERRQ(ierr); if (adof <= 0) continue; for (d = off; d < off+dof-cdof; ++d) { ierr = PetscSectionGetDof(rootSectionAdj, d, &adof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(rootSectionAdj, d, &aoff);CHKERRQ(ierr); ierr = PetscSortRemoveDupsInt(&adof, &rootAdj[aoff]);CHKERRQ(ierr); ierr = PetscSectionSetDof(rootSectionAdj, d, adof);CHKERRQ(ierr); } } /* Debugging */ if (debug) { IS tmp; ierr = PetscPrintf(comm, "Adjancency Section for Preallocation on Roots after compression:\n");CHKERRQ(ierr); ierr = PetscSectionView(rootSectionAdj, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(comm, "Root adjacency indices after compression\n");CHKERRQ(ierr); ierr = ISCreateGeneral(comm, adjSize, rootAdj, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr); ierr = ISView(tmp, NULL);CHKERRQ(ierr); ierr = ISDestroy(&tmp);CHKERRQ(ierr); } /* Build adjacency section: Maps global indices to sets of adjacent global indices */ ierr = PetscSectionGetOffsetRange(sectionGlobal, &globalOffStart, &globalOffEnd);CHKERRQ(ierr); ierr = PetscSectionCreate(comm, §ionAdj);CHKERRQ(ierr); ierr = PetscSectionSetChart(sectionAdj, globalOffStart, globalOffEnd);CHKERRQ(ierr); for (p = pStart; p < pEnd; ++p) { PetscInt numAdj = PETSC_DETERMINE, dof, cdof, off, goff, d, q, anDof; PetscBool found = PETSC_TRUE; ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, p, &cdof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionGlobal, p, &goff);CHKERRQ(ierr); for (d = 0; d < dof-cdof; ++d) { PetscInt ldof, rdof; ierr = PetscSectionGetDof(leafSectionAdj, off+d, &ldof);CHKERRQ(ierr); ierr = PetscSectionGetDof(rootSectionAdj, off+d, &rdof);CHKERRQ(ierr); if (ldof > 0) { /* We do not own this point */ } else if (rdof > 0) { ierr = PetscSectionSetDof(sectionAdj, goff+d, rdof);CHKERRQ(ierr); } else { found = PETSC_FALSE; } } if (found) continue; ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionGlobal, p, &goff);CHKERRQ(ierr); ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr); for (q = 0; q < numAdj; ++q) { const PetscInt padj = tmpAdj[q]; PetscInt ndof, ncdof, noff; if ((padj < pStart) || (padj >= pEnd)) continue; ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, padj, &noff);CHKERRQ(ierr); for (d = goff; d < goff+dof-cdof; ++d) { ierr = PetscSectionAddDof(sectionAdj, d, ndof-ncdof);CHKERRQ(ierr); } } ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr); if (anDof) { for (d = goff; d < goff+dof-cdof; ++d) { ierr = PetscSectionAddDof(sectionAdj, d, anDof);CHKERRQ(ierr); } } } ierr = PetscSectionSetUp(sectionAdj);CHKERRQ(ierr); if (debug) { ierr = PetscPrintf(comm, "Adjacency Section for Preallocation:\n");CHKERRQ(ierr); ierr = PetscSectionView(sectionAdj, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* Get adjacent indices */ ierr = PetscSectionGetStorageSize(sectionAdj, &numCols);CHKERRQ(ierr); ierr = PetscMalloc1(numCols, &cols);CHKERRQ(ierr); for (p = pStart; p < pEnd; ++p) { PetscInt numAdj = PETSC_DETERMINE, dof, cdof, off, goff, d, q, anDof, anOff; PetscBool found = PETSC_TRUE; ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, p, &cdof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionGlobal, p, &goff);CHKERRQ(ierr); for (d = 0; d < dof-cdof; ++d) { PetscInt ldof, rdof; ierr = PetscSectionGetDof(leafSectionAdj, off+d, &ldof);CHKERRQ(ierr); ierr = PetscSectionGetDof(rootSectionAdj, off+d, &rdof);CHKERRQ(ierr); if (ldof > 0) { /* We do not own this point */ } else if (rdof > 0) { PetscInt aoff, roff; ierr = PetscSectionGetOffset(sectionAdj, goff+d, &aoff);CHKERRQ(ierr); ierr = PetscSectionGetOffset(rootSectionAdj, off+d, &roff);CHKERRQ(ierr); ierr = PetscMemcpy(&cols[aoff], &rootAdj[roff], rdof * sizeof(PetscInt));CHKERRQ(ierr); } else { found = PETSC_FALSE; } } if (found) continue; ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr); ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(anchorSectionAdj, p, &anOff);CHKERRQ(ierr); for (d = goff; d < goff+dof-cdof; ++d) { PetscInt adof, aoff, i = 0; ierr = PetscSectionGetDof(sectionAdj, d, &adof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionAdj, d, &aoff);CHKERRQ(ierr); for (q = 0; q < numAdj; ++q) { const PetscInt padj = tmpAdj[q]; PetscInt ndof, ncdof, ngoff, nd; const PetscInt *ncind; /* Adjacent points may not be in the section chart */ if ((padj < pStart) || (padj >= pEnd)) continue; ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr); ierr = PetscSectionGetConstraintIndices(section, padj, &ncind);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionGlobal, padj, &ngoff);CHKERRQ(ierr); for (nd = 0; nd < ndof-ncdof; ++nd, ++i) { cols[aoff+i] = ngoff < 0 ? -(ngoff+1)+nd : ngoff+nd; } } for (q = 0; q < anDof; q++, i++) { cols[aoff+i] = anchorAdj[anOff + q]; } if (i != adof) SETERRQ4(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of entries %D != %D for dof %D (point %D)", i, adof, d, p); } } ierr = PetscSectionDestroy(&anchorSectionAdj);CHKERRQ(ierr); ierr = PetscSectionDestroy(&leafSectionAdj);CHKERRQ(ierr); ierr = PetscSectionDestroy(&rootSectionAdj);CHKERRQ(ierr); ierr = PetscFree(anchorAdj);CHKERRQ(ierr); ierr = PetscFree(rootAdj);CHKERRQ(ierr); ierr = PetscFree(tmpAdj);CHKERRQ(ierr); /* Debugging */ if (debug) { IS tmp; ierr = PetscPrintf(comm, "Column indices\n");CHKERRQ(ierr); ierr = ISCreateGeneral(comm, numCols, cols, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr); ierr = ISView(tmp, NULL);CHKERRQ(ierr); ierr = ISDestroy(&tmp);CHKERRQ(ierr); } /* Create allocation vectors from adjacency graph */ ierr = MatGetLocalSize(A, &locRows, NULL);CHKERRQ(ierr); ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)A), &rLayout);CHKERRQ(ierr); ierr = PetscLayoutSetLocalSize(rLayout, locRows);CHKERRQ(ierr); ierr = PetscLayoutSetBlockSize(rLayout, 1);CHKERRQ(ierr); ierr = PetscLayoutSetUp(rLayout);CHKERRQ(ierr); ierr = PetscLayoutGetRange(rLayout, &rStart, &rEnd);CHKERRQ(ierr); ierr = PetscLayoutDestroy(&rLayout);CHKERRQ(ierr); /* Only loop over blocks of rows */ if (rStart%bs || rEnd%bs) SETERRQ3(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid layout [%d, %d) for matrix, must be divisible by block size %d", rStart, rEnd, bs); for (r = rStart/bs; r < rEnd/bs; ++r) { const PetscInt row = r*bs; PetscInt numCols, cStart, c; ierr = PetscSectionGetDof(sectionAdj, row, &numCols);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionAdj, row, &cStart);CHKERRQ(ierr); for (c = cStart; c < cStart+numCols; ++c) { if ((cols[c] >= rStart*bs) && (cols[c] < rEnd*bs)) { ++dnz[r-rStart]; if (cols[c] >= row) ++dnzu[r-rStart]; } else { ++onz[r-rStart]; if (cols[c] >= row) ++onzu[r-rStart]; } } } if (bs > 1) { for (r = 0; r < locRows/bs; ++r) { dnz[r] /= bs; onz[r] /= bs; dnzu[r] /= bs; onzu[r] /= bs; } } /* Set matrix pattern */ ierr = MatXAIJSetPreallocation(A, bs, dnz, onz, dnzu, onzu);CHKERRQ(ierr); ierr = MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); /* Check for symmetric storage */ ierr = MatGetType(A, &mtype);CHKERRQ(ierr); ierr = PetscStrcmp(mtype, MATSBAIJ, &isSymBlock);CHKERRQ(ierr); ierr = PetscStrcmp(mtype, MATSEQSBAIJ, &isSymSeqBlock);CHKERRQ(ierr); ierr = PetscStrcmp(mtype, MATMPISBAIJ, &isSymMPIBlock);CHKERRQ(ierr); if (isSymBlock || isSymSeqBlock || isSymMPIBlock) {ierr = MatSetOption(A, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);} /* Fill matrix with zeros */ if (fillMatrix) { PetscScalar *values; PetscInt maxRowLen = 0; for (r = rStart; r < rEnd; ++r) { PetscInt len; ierr = PetscSectionGetDof(sectionAdj, r, &len);CHKERRQ(ierr); maxRowLen = PetscMax(maxRowLen, len); } ierr = PetscCalloc1(maxRowLen, &values);CHKERRQ(ierr); for (r = rStart; r < rEnd; ++r) { PetscInt numCols, cStart; ierr = PetscSectionGetDof(sectionAdj, r, &numCols);CHKERRQ(ierr); ierr = PetscSectionGetOffset(sectionAdj, r, &cStart);CHKERRQ(ierr); ierr = MatSetValues(A, 1, &r, numCols, &cols[cStart], values, INSERT_VALUES);CHKERRQ(ierr); } ierr = PetscFree(values);CHKERRQ(ierr); ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } /* restore original useAnchors */ ierr = DMPlexSetAdjacencyUseAnchors(dm,useAnchors);CHKERRQ(ierr); ierr = PetscSectionDestroy(§ionAdj);CHKERRQ(ierr); ierr = PetscFree(cols);CHKERRQ(ierr); ierr = PetscLogEventEnd(DMPLEX_Preallocate,dm,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }