PetscErrorCode MPIPetsc_Type_compare(MPI_Datatype a,MPI_Datatype b,PetscBool *match) { PetscErrorCode ierr; MPI_Datatype atype,btype; PetscMPIInt aintcount,aaddrcount,atypecount,acombiner; PetscMPIInt bintcount,baddrcount,btypecount,bcombiner; PetscBool freeatype, freebtype; PetscFunctionBegin; ierr = MPIPetsc_Type_unwrap(a,&atype,&freeatype);CHKERRQ(ierr); ierr = MPIPetsc_Type_unwrap(b,&btype,&freebtype);CHKERRQ(ierr); *match = PETSC_FALSE; if (atype == btype) { *match = PETSC_TRUE; goto free_types; } ierr = MPI_Type_get_envelope(atype,&aintcount,&aaddrcount,&atypecount,&acombiner);CHKERRQ(ierr); ierr = MPI_Type_get_envelope(btype,&bintcount,&baddrcount,&btypecount,&bcombiner);CHKERRQ(ierr); if (acombiner == bcombiner && aintcount == bintcount && aaddrcount == baddrcount && atypecount == btypecount && (aintcount > 0 || aaddrcount > 0 || atypecount > 0)) { PetscMPIInt *aints,*bints; MPI_Aint *aaddrs,*baddrs; MPI_Datatype *atypes,*btypes; PetscInt i; PetscBool same; ierr = PetscMalloc6(aintcount,&aints,bintcount,&bints,aaddrcount,&aaddrs,baddrcount,&baddrs,atypecount,&atypes,btypecount,&btypes);CHKERRQ(ierr); ierr = MPI_Type_get_contents(atype,aintcount,aaddrcount,atypecount,aints,aaddrs,atypes);CHKERRQ(ierr); ierr = MPI_Type_get_contents(btype,bintcount,baddrcount,btypecount,bints,baddrs,btypes);CHKERRQ(ierr); ierr = PetscMemcmp(aints,bints,aintcount*sizeof(aints[0]),&same);CHKERRQ(ierr); if (same) { ierr = PetscMemcmp(aaddrs,baddrs,aaddrcount*sizeof(aaddrs[0]),&same);CHKERRQ(ierr); if (same) { /* Check for identity first */ ierr = PetscMemcmp(atypes,btypes,atypecount*sizeof(atypes[0]),&same);CHKERRQ(ierr); if (!same) { /* If the atype or btype were not predefined data types, then the types returned from MPI_Type_get_contents * will merely be equivalent to the types used in the construction, so we must recursively compare. */ for (i=0; i<atypecount; i++) { ierr = MPIPetsc_Type_compare(atypes[i],btypes[i],&same);CHKERRQ(ierr); if (!same) break; } } } } for (i=0; i<atypecount; i++) { ierr = MPIPetsc_Type_free(&(atypes[i]));CHKERRQ(ierr); ierr = MPIPetsc_Type_free(&(btypes[i]));CHKERRQ(ierr); } ierr = PetscFree6(aints,bints,aaddrs,baddrs,atypes,btypes);CHKERRQ(ierr); if (same) *match = PETSC_TRUE; } free_types: if (freeatype) { ierr = MPIPetsc_Type_free(&atype);CHKERRQ(ierr); } if (freebtype) { ierr = MPIPetsc_Type_free(&btype);CHKERRQ(ierr); } PetscFunctionReturn(0); }
/*@C PetscDTGaussJacobiQuadrature - create Gauss-Jacobi quadrature for a simplex Not Collective Input Arguments: + dim - The simplex dimension . order - The number of points in one dimension . a - left end of interval (often-1) - b - right end of interval (often +1) Output Argument: . q - A PetscQuadrature object Level: intermediate References: Karniadakis and Sherwin. FIAT .seealso: PetscDTGaussTensorQuadrature(), PetscDTGaussQuadrature() @*/ PetscErrorCode PetscDTGaussJacobiQuadrature(PetscInt dim, PetscInt order, PetscReal a, PetscReal b, PetscQuadrature *q) { PetscInt npoints = dim > 1 ? dim > 2 ? order*PetscSqr(order) : PetscSqr(order) : order; PetscReal *px, *wx, *py, *wy, *pz, *wz, *x, *w; PetscInt i, j, k; PetscErrorCode ierr; PetscFunctionBegin; if ((a != -1.0) || (b != 1.0)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must use default internal right now"); ierr = PetscMalloc1(npoints*dim, &x);CHKERRQ(ierr); ierr = PetscMalloc1(npoints, &w);CHKERRQ(ierr); switch (dim) { case 0: ierr = PetscFree(x);CHKERRQ(ierr); ierr = PetscFree(w);CHKERRQ(ierr); ierr = PetscMalloc1(1, &x);CHKERRQ(ierr); ierr = PetscMalloc1(1, &w);CHKERRQ(ierr); x[0] = 0.0; w[0] = 1.0; break; case 1: ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 0.0, 0.0, x, w);CHKERRQ(ierr); break; case 2: ierr = PetscMalloc4(order,&px,order,&wx,order,&py,order,&wy);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 0.0, 0.0, px, wx);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 1.0, 0.0, py, wy);CHKERRQ(ierr); for (i = 0; i < order; ++i) { for (j = 0; j < order; ++j) { ierr = PetscDTMapSquareToTriangle_Internal(px[i], py[j], &x[(i*order+j)*2+0], &x[(i*order+j)*2+1]);CHKERRQ(ierr); w[i*order+j] = 0.5 * wx[i] * wy[j]; } } ierr = PetscFree4(px,wx,py,wy);CHKERRQ(ierr); break; case 3: ierr = PetscMalloc6(order,&px,order,&wx,order,&py,order,&wy,order,&pz,order,&wz);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 0.0, 0.0, px, wx);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 1.0, 0.0, py, wy);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 2.0, 0.0, pz, wz);CHKERRQ(ierr); for (i = 0; i < order; ++i) { for (j = 0; j < order; ++j) { for (k = 0; k < order; ++k) { ierr = PetscDTMapCubeToTetrahedron_Internal(px[i], py[j], pz[k], &x[((i*order+j)*order+k)*3+0], &x[((i*order+j)*order+k)*3+1], &x[((i*order+j)*order+k)*3+2]);CHKERRQ(ierr); w[(i*order+j)*order+k] = 0.125 * wx[i] * wy[j] * wz[k]; } } } ierr = PetscFree6(px,wx,py,wy,pz,wz);CHKERRQ(ierr); break; default: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot construct quadrature rule for dimension %d", dim); } ierr = PetscQuadratureCreate(PETSC_COMM_SELF, q);CHKERRQ(ierr); ierr = PetscQuadratureSetData(*q, dim, npoints, x, w);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C PetscDTGaussJacobiQuadrature - create Gauss-Jacobi quadrature for a simplex Not Collective Input Arguments: + dim - The simplex dimension . npoints - number of points . a - left end of interval (often-1) - b - right end of interval (often +1) Output Arguments: + points - quadrature points - weights - quadrature weights Level: intermediate References: Karniadakis and Sherwin. FIAT .seealso: PetscDTGaussQuadrature() @*/ PetscErrorCode PetscDTGaussJacobiQuadrature(PetscInt dim, PetscInt npoints, PetscReal a, PetscReal b, PetscReal *points[], PetscReal *weights[]) { PetscReal *px, *wx, *py, *wy, *pz, *wz, *x, *w; PetscInt i, j, k; PetscErrorCode ierr; PetscFunctionBegin; if ((a != -1.0) || (b != 1.0)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must use default internal right now"); switch (dim) { case 1: ierr = PetscMalloc(npoints * sizeof(PetscReal), &x);CHKERRQ(ierr); ierr = PetscMalloc(npoints * sizeof(PetscReal), &w);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 0.0, 0.0, x, w);CHKERRQ(ierr); break; case 2: ierr = PetscMalloc(npoints*npoints*2 * sizeof(PetscReal), &x);CHKERRQ(ierr); ierr = PetscMalloc(npoints*npoints * sizeof(PetscReal), &w);CHKERRQ(ierr); ierr = PetscMalloc4(npoints,PetscReal,&px,npoints,PetscReal,&wx,npoints,PetscReal,&py,npoints,PetscReal,&wy);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 0.0, 0.0, px, wx);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 1.0, 0.0, py, wy);CHKERRQ(ierr); for (i = 0; i < npoints; ++i) { for (j = 0; j < npoints; ++j) { ierr = PetscDTMapSquareToTriangle_Internal(px[i], py[j], &x[(i*npoints+j)*2+0], &x[(i*npoints+j)*2+1]);CHKERRQ(ierr); w[i*npoints+j] = 0.5 * wx[i] * wy[j]; } } ierr = PetscFree4(px,wx,py,wy);CHKERRQ(ierr); break; case 3: ierr = PetscMalloc(npoints*npoints*3 * sizeof(PetscReal), &x);CHKERRQ(ierr); ierr = PetscMalloc(npoints*npoints * sizeof(PetscReal), &w);CHKERRQ(ierr); ierr = PetscMalloc6(npoints,PetscReal,&px,npoints,PetscReal,&wx,npoints,PetscReal,&py,npoints,PetscReal,&wy,npoints,PetscReal,&pz,npoints,PetscReal,&wz);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 0.0, 0.0, px, wx);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 1.0, 0.0, py, wy);CHKERRQ(ierr); ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 2.0, 0.0, pz, wz);CHKERRQ(ierr); for (i = 0; i < npoints; ++i) { for (j = 0; j < npoints; ++j) { for (k = 0; k < npoints; ++k) { ierr = PetscDTMapCubeToTetrahedron_Internal(px[i], py[j], pz[k], &x[((i*npoints+j)*npoints+k)*3+0], &x[((i*npoints+j)*npoints+k)*3+1], &x[((i*npoints+j)*npoints+k)*3+2]);CHKERRQ(ierr); w[(i*npoints+j)*npoints+k] = 0.125 * wx[i] * wy[j] * wz[k]; } } } ierr = PetscFree6(px,wx,py,wy,pz,wz);CHKERRQ(ierr); break; default: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot construct quadrature rule for dimension %d", dim); } if (points) *points = x; if (weights) *weights = w; PetscFunctionReturn(0); }
PetscErrorCode MPIPetsc_Type_compare(MPI_Datatype a,MPI_Datatype b,PetscBool *match) { PetscErrorCode ierr; MPI_Datatype atype,btype; PetscMPIInt aintcount,aaddrcount,atypecount,acombiner; PetscMPIInt bintcount,baddrcount,btypecount,bcombiner; PetscBool freeatype, freebtype; PetscFunctionBegin; ierr = MPIPetsc_Type_unwrap(a,&atype,&freeatype);CHKERRQ(ierr); ierr = MPIPetsc_Type_unwrap(b,&btype,&freebtype);CHKERRQ(ierr); *match = PETSC_FALSE; if (atype == btype) { *match = PETSC_TRUE; goto free_types; } ierr = MPI_Type_get_envelope(atype,&aintcount,&aaddrcount,&atypecount,&acombiner);CHKERRQ(ierr); ierr = MPI_Type_get_envelope(btype,&bintcount,&baddrcount,&btypecount,&bcombiner);CHKERRQ(ierr); if (acombiner == bcombiner && aintcount == bintcount && aaddrcount == baddrcount && atypecount == btypecount && (aintcount > 0 || aaddrcount > 0 || atypecount > 0)) { PetscMPIInt *aints,*bints; MPI_Aint *aaddrs,*baddrs; MPI_Datatype *atypes,*btypes; PetscInt i; PetscBool same; ierr = PetscMalloc6(aintcount,&aints,bintcount,&bints,aaddrcount,&aaddrs,baddrcount,&baddrs,atypecount,&atypes,btypecount,&btypes);CHKERRQ(ierr); ierr = MPI_Type_get_contents(atype,aintcount,aaddrcount,atypecount,aints,aaddrs,atypes);CHKERRQ(ierr); ierr = MPI_Type_get_contents(btype,bintcount,baddrcount,btypecount,bints,baddrs,btypes);CHKERRQ(ierr); ierr = PetscMemcmp(aints,bints,aintcount*sizeof(aints[0]),&same);CHKERRQ(ierr); if (same) { ierr = PetscMemcmp(aaddrs,baddrs,aaddrcount*sizeof(aaddrs[0]),&same);CHKERRQ(ierr); if (same) { /* This comparison should be recursive */ ierr = PetscMemcmp(atypes,btypes,atypecount*sizeof(atypes[0]),&same);CHKERRQ(ierr); } } for (i=0; i<atypecount; i++) { ierr = MPIPetsc_Type_free(&(atypes[i]));CHKERRQ(ierr); ierr = MPIPetsc_Type_free(&(btypes[i]));CHKERRQ(ierr); } ierr = PetscFree6(aints,bints,aaddrs,baddrs,atypes,btypes);CHKERRQ(ierr); if (same) *match = PETSC_TRUE; } free_types: if (freeatype) { ierr = MPIPetsc_Type_free(&atype);CHKERRQ(ierr); } if (freebtype) { ierr = MPIPetsc_Type_free(&btype);CHKERRQ(ierr); } PetscFunctionReturn(0); }
/* EPSSelectiveLanczos - Selective reorthogonalization. */ static PetscErrorCode EPSSelectiveLanczos(EPS eps,PetscReal *alpha,PetscReal *beta,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscReal anorm) { PetscErrorCode ierr; EPS_LANCZOS *lanczos = (EPS_LANCZOS*)eps->data; PetscInt i,j,m = *M,n,nritz=0,nritzo; Vec vj,vj1,av; PetscReal *d,*e,*ritz,norm; PetscScalar *Y,*hwork; PetscBool *which; PetscFunctionBegin; ierr = PetscCalloc6(m+1,&d,m,&e,m,&ritz,m*m,&Y,m,&which,m,&hwork);CHKERRQ(ierr); for (i=0;i<k;i++) which[i] = PETSC_TRUE; for (j=k;j<m;j++) { ierr = BVSetActiveColumns(eps->V,0,m);CHKERRQ(ierr); /* Lanczos step */ ierr = BVGetColumn(eps->V,j,&vj);CHKERRQ(ierr); ierr = BVGetColumn(eps->V,j+1,&vj1);CHKERRQ(ierr); ierr = STApply(eps->st,vj,vj1);CHKERRQ(ierr); ierr = BVRestoreColumn(eps->V,j,&vj);CHKERRQ(ierr); ierr = BVRestoreColumn(eps->V,j+1,&vj1);CHKERRQ(ierr); which[j] = PETSC_TRUE; if (j-2>=k) which[j-2] = PETSC_FALSE; ierr = BVOrthogonalizeSomeColumn(eps->V,j+1,which,hwork,&norm,breakdown);CHKERRQ(ierr); alpha[j] = PetscRealPart(hwork[j]); beta[j] = norm; if (*breakdown) { *M = j+1; break; } /* Compute eigenvalues and eigenvectors Y of the tridiagonal block */ n = j-k+1; for (i=0;i<n;i++) { d[i] = alpha[i+k]; e[i] = beta[i+k]; } ierr = DenseTridiagonal(n,d,e,ritz,Y);CHKERRQ(ierr); /* Estimate ||A|| */ for (i=0;i<n;i++) if (PetscAbsReal(ritz[i]) > anorm) anorm = PetscAbsReal(ritz[i]); /* Compute nearly converged Ritz vectors */ nritzo = 0; for (i=0;i<n;i++) { if (norm*PetscAbsScalar(Y[i*n+n-1]) < PETSC_SQRT_MACHINE_EPSILON*anorm) nritzo++; } if (nritzo>nritz) { nritz = 0; for (i=0;i<n;i++) { if (norm*PetscAbsScalar(Y[i*n+n-1]) < PETSC_SQRT_MACHINE_EPSILON*anorm) { ierr = BVSetActiveColumns(eps->V,k,k+n);CHKERRQ(ierr); ierr = BVGetColumn(lanczos->AV,nritz,&av);CHKERRQ(ierr); ierr = BVMultVec(eps->V,1.0,0.0,av,Y+i*n);CHKERRQ(ierr); ierr = BVRestoreColumn(lanczos->AV,nritz,&av);CHKERRQ(ierr); nritz++; } } } if (nritz > 0) { ierr = BVGetColumn(eps->V,j+1,&vj1);CHKERRQ(ierr); ierr = BVSetActiveColumns(lanczos->AV,0,nritz);CHKERRQ(ierr); ierr = BVOrthogonalizeVec(lanczos->AV,vj1,hwork,&norm,breakdown);CHKERRQ(ierr); ierr = BVRestoreColumn(eps->V,j+1,&vj1);CHKERRQ(ierr); if (*breakdown) { *M = j+1; break; } } ierr = BVScaleColumn(eps->V,j+1,1.0/norm);CHKERRQ(ierr); } ierr = PetscFree6(d,e,ritz,Y,which,hwork);CHKERRQ(ierr); PetscFunctionReturn(0); }
PETSC_EXTERN PetscErrorCode MatColoringLocalColor(MatColoring mc,PetscSF etoc,PetscSF etor,PetscReal *wts,ISColoringValue *color, ISColoringValue *maxcolor) { PetscInt nrows,ncols,ncolentries,nrowentries,idx,neighoffset; PetscInt i,j,k; PetscInt dist = mc->dist; PetscInt totalcolors; PetscBool *colormask; PetscErrorCode ierr; PetscBool *rowseen,*colseen; const PetscInt *rowdegrees; PetscInt *rowoffsets; const PetscInt *coldegrees; PetscInt *coloffsets; PetscInt offset; PetscInt *ll_ptr; PetscInt *ll_idx; PetscReal *swts; PetscInt *sidx; PetscInt unused; PetscInt rowlist,collist; PetscInt swp; PetscMPIInt rank; const PetscSFNode *colentries,*rowentries; PetscFunctionBegin; ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mc),&rank);CHKERRQ(ierr); ierr = PetscSFGetGraph(etoc,&ncols,&ncolentries,NULL,&colentries);CHKERRQ(ierr); ierr = PetscSFGetGraph(etor,&nrows,&nrowentries,NULL,&rowentries);CHKERRQ(ierr); ierr = PetscMalloc6(nrows,&rowseen,ncols,&colseen,ncols,&coloffsets,nrows,&rowoffsets,2*ncols,&ll_ptr,2*ncols,&ll_idx);CHKERRQ(ierr); ierr = PetscMalloc2(ncols,&sidx,ncols,&swts);CHKERRQ(ierr); ierr = PetscSFComputeDegreeBegin(etoc,&rowdegrees);CHKERRQ(ierr); ierr = PetscSFComputeDegreeEnd(etoc,&rowdegrees);CHKERRQ(ierr); ierr = PetscSFComputeDegreeBegin(etor,&coldegrees);CHKERRQ(ierr); ierr = PetscSFComputeDegreeEnd(etor,&coldegrees);CHKERRQ(ierr); /* sort by weight */ for (i=0;i<ncols;i++) { sidx[i] = i; swts[i] = wts[i]; } ierr = PetscSortRealWithPermutation(ncols,swts,sidx);CHKERRQ(ierr); for (i=0;i<ncols/2;i++) { swp = sidx[i]; sidx[i] = sidx[ncols-1-i]; sidx[ncols-1-i] = swp; } /* set up the "unused" linked list */ unused = 0; ll_ptr[2*ncols-1] = -1; for (i=0;i<2*ncols-1;i++) { ll_ptr[i] = i+1; } /* initialize the offsets */ offset=0; for (i=0;i<ncols;i++) { coloffsets[i] = offset; offset+=coldegrees[i]; colseen[i] = PETSC_FALSE; } offset=0; for (i=0;i<nrows;i++) { rowoffsets[i] = offset; offset+=rowdegrees[i]; rowseen[i] = PETSC_FALSE; } /* discover the maximum current color */ totalcolors = 1; for (i=0;i<ncols;i++) { if (color[i] > totalcolors-1 && color[i] != IS_COLORING_MAX) totalcolors = color[i]+1; } if (totalcolors < 10) totalcolors=10; ierr = PetscMalloc(sizeof(PetscBool)*totalcolors,&colormask);CHKERRQ(ierr); /* alternate between rows and columns to get the distance k minimum coloring */ for (i=0;i<ncols;i++) { collist = -1; rowlist = -1; if (color[sidx[i]] == IS_COLORING_MAX) { for (j=0;j<totalcolors;j++) colormask[j] = PETSC_FALSE; swp = unused; unused = ll_ptr[unused]; ll_ptr[swp] = collist; ll_idx[swp] = sidx[i]; collist = swp; colseen[sidx[i]] = PETSC_TRUE; for (k=0;k<=dist;k++) { if (k % 2 == 0) { while (collist >= 0) { if (k != dist) { for (j=0;j<coldegrees[ll_idx[collist]];j++) { neighoffset = coloffsets[ll_idx[collist]]+j; idx = colentries[neighoffset].index; if (colentries[neighoffset].rank == rank && !rowseen[idx]) { swp = unused; unused = ll_ptr[unused]; ll_ptr[swp] = rowlist; ll_idx[swp] = idx; rowlist = swp; rowseen[idx] = PETSC_TRUE; } } } if (color[ll_idx[collist]] != IS_COLORING_MAX) colormask[color[ll_idx[collist]]] = PETSC_TRUE; colseen[ll_idx[collist]] = PETSC_FALSE; swp = collist; collist = ll_ptr[collist]; ll_ptr[swp] = unused; unused = swp; } } else { while (rowlist >= 0) { if (k != dist) { for (j=0;j<rowdegrees[ll_idx[rowlist]];j++) { neighoffset = rowoffsets[ll_idx[rowlist]]+j; idx = rowentries[neighoffset].index; if (rowentries[neighoffset].rank == rank && !colseen[idx]) { swp = unused; unused = ll_ptr[unused]; ll_ptr[swp] = collist; ll_idx[swp] = idx; collist = swp; colseen[idx] = PETSC_TRUE; } } } if (color[ll_idx[rowlist]] != IS_COLORING_MAX) colormask[color[ll_idx[rowlist]]] = PETSC_TRUE; rowseen[ll_idx[rowlist]] = PETSC_FALSE; swp = rowlist; rowlist = ll_ptr[rowlist]; ll_ptr[swp] = unused; unused = swp; } } } color[sidx[i]] = totalcolors; for (k=0;k<totalcolors;k++) { if (!colormask[k]) {color[sidx[i]] = k; break;} } if (color[sidx[i]] >= mc->maxcolors && mc->maxcolors > 0) color[sidx[i]] = mc->maxcolors; if (color[sidx[i]] > *maxcolor) *maxcolor = color[sidx[i]]; if (color[sidx[i]] > totalcolors-1) { totalcolors *= 2; ierr = PetscFree(colormask);CHKERRQ(ierr); ierr = PetscMalloc(sizeof(PetscBool)*totalcolors,&colormask);CHKERRQ(ierr); } } if (collist != -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_NOT_CONVERGED,"Likely error in local coloring BFS -- column queue still has %d\n",collist); if (rowlist != -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_NOT_CONVERGED,"Likely error in local coloring BFS -- row queue still has %d\n",rowlist); } for (i=0;i<ncols;i++) { if (colseen[i]) {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_NOT_CONVERGED,"Likely error in local coloring BFS -- column %d still seen\n",i);} } for (i=0;i<nrows;i++) { if (rowseen[i]) {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_NOT_CONVERGED,"Likely error in local coloring BFS -- row %d still seen\n",i);} } ierr = PetscFree6(rowseen,colseen,coloffsets,rowoffsets,ll_ptr,ll_idx);CHKERRQ(ierr); ierr = PetscFree2(sidx,swts);CHKERRQ(ierr); ierr = PetscFree(colormask);CHKERRQ(ierr); PetscFunctionReturn(0); }
PETSC_EXTERN PetscErrorCode MatColoringDiscoverBoundary(MatColoring mc,PetscSF etoc,PetscSF etor,PetscInt *nboundary,PetscInt **boundary) { PetscInt nrows,ncols,ncolentries,nrowentries,idx,bidx,neighoffset; PetscInt i,j,k; PetscInt dist = mc->dist; PetscBool onBoundary; PetscErrorCode ierr; PetscBool *rowseen,*colseen; const PetscInt *rowdegrees; PetscInt *rowoffsets; const PetscInt *coldegrees; PetscInt *coloffsets; PetscInt offset; PetscInt *ll_ptr; PetscInt *ll_idx; PetscInt unused; PetscInt rowlist,collist; PetscInt swp; PetscMPIInt rank; const PetscSFNode *colentries,*rowentries; PetscFunctionBegin; *nboundary = 0; ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mc),&rank);CHKERRQ(ierr); ierr = PetscSFGetGraph(etoc,&ncols,&ncolentries,NULL,&colentries);CHKERRQ(ierr); ierr = PetscSFGetGraph(etor,&nrows,&nrowentries,NULL,&rowentries);CHKERRQ(ierr); ierr = PetscMalloc6(nrows,&rowseen, ncols,&colseen, ncols,&coloffsets, nrows,&rowoffsets, 2*ncols,&ll_ptr, 2*ncols,&ll_idx);CHKERRQ(ierr); ierr = PetscSFComputeDegreeBegin(etoc,&rowdegrees);CHKERRQ(ierr); ierr = PetscSFComputeDegreeEnd(etoc,&rowdegrees);CHKERRQ(ierr); ierr = PetscSFComputeDegreeBegin(etor,&coldegrees);CHKERRQ(ierr); ierr = PetscSFComputeDegreeEnd(etor,&coldegrees);CHKERRQ(ierr); /* set up the "unused" linked list -- double the size of the number of items as in tiny or large distance cases we may have a clique */ unused = 0; ll_ptr[2*ncols-1] = -1; for (i=0;i<2*ncols-1;i++) { ll_ptr[i] = i+1; } /* initialize the offsets */ offset=0; for (i=0;i<ncols;i++) { coloffsets[i] = offset; offset+=coldegrees[i]; colseen[i] = PETSC_FALSE; } offset=0; for (i=0;i<nrows;i++) { rowoffsets[i] = offset; offset+=rowdegrees[i]; rowseen[i] = PETSC_FALSE; } /* count the number of boundary nodes */ for (i=0;i<ncols;i++) { onBoundary = PETSC_FALSE; collist = -1; rowlist = -1; swp = unused; unused = ll_ptr[unused]; ll_ptr[swp] = collist; ll_idx[swp] = i; collist = swp; colseen[i] = PETSC_TRUE; for (k=0;k<=dist;k++) { if (k % 2 == 0) { while (collist >= 0) { if (k != dist) { for (j=0;j<coldegrees[ll_idx[collist]];j++) { neighoffset = coloffsets[ll_idx[collist]]+j; idx = colentries[neighoffset].index; if (colentries[neighoffset].rank == rank) { if (!rowseen[idx] && !onBoundary) { swp = unused; unused = ll_ptr[unused]; ll_ptr[swp] = rowlist; rowlist = swp; ll_idx[swp] = idx; rowseen[idx] = PETSC_TRUE; } } else { onBoundary = PETSC_TRUE; } } } colseen[ll_idx[collist]] = PETSC_FALSE; swp = collist; collist = ll_ptr[collist]; ll_ptr[swp] = unused; unused = swp; } } else { while (rowlist >= 0) { if (k != dist) { for (j=0;j<rowdegrees[ll_idx[rowlist]];j++) { neighoffset = rowoffsets[ll_idx[rowlist]]+j; if (rowentries[neighoffset].rank == rank) { idx = rowentries[neighoffset].index; if (!colseen[idx] && !onBoundary) { swp = unused; unused = ll_ptr[unused]; ll_ptr[swp] = collist; ll_idx[swp] = idx; colseen[idx] = PETSC_TRUE; collist = swp; } } else { onBoundary = PETSC_TRUE; } } } rowseen[ll_idx[rowlist]] = PETSC_FALSE; swp = rowlist; rowlist = ll_ptr[rowlist]; ll_ptr[swp] = unused; unused = swp; } } } if (onBoundary) {(*nboundary)++;} if (collist != -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_NOT_CONVERGED,"Likely error in boundary count BFS -- column queue still has %d\n",collist); if (rowlist != -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_NOT_CONVERGED,"Likely error in boundary count BFS -- row queue still has %d\n",collist); } for (i=0;i<ncols;i++) { if (colseen[i]) {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_NOT_CONVERGED,"Likely error in boundary count BFS -- column %d still seen\n",i);} } for (i=0;i<nrows;i++) { if (rowseen[i]) {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_NOT_CONVERGED,"Likely error in boundary count BFS -- row %d still seen\n",i);} } ierr = PetscMalloc(sizeof(PetscInt)*(*nboundary),boundary);CHKERRQ(ierr); /* set the boundary nodes */ bidx=0; for (i=0;i<ncols;i++) { onBoundary = PETSC_FALSE; collist = -1; rowlist = -1; swp = unused; unused = ll_ptr[unused]; ll_ptr[swp] = collist; ll_idx[swp] = i; collist = swp; colseen[i] = PETSC_TRUE; for (k=0;k<=dist;k++) { if (k % 2 == 0) { while (collist >= 0) { if (k != dist) { for (j=0;j<coldegrees[ll_idx[collist]];j++) { neighoffset = coloffsets[ll_idx[collist]]+j; idx = colentries[neighoffset].index; if (colentries[neighoffset].rank == rank) { if (!rowseen[idx] && !onBoundary) { swp = unused; unused = ll_ptr[unused]; ll_ptr[swp] = rowlist; rowlist = swp; ll_idx[swp] = idx; rowseen[idx] = PETSC_TRUE; } } else { onBoundary = PETSC_TRUE; } } } colseen[ll_idx[collist]] = PETSC_FALSE; swp = collist; collist = ll_ptr[collist]; ll_ptr[swp] = unused; unused = swp; } } else { while (rowlist >= 0) { if (k != dist) { for (j=0;j<rowdegrees[ll_idx[rowlist]];j++) { neighoffset = rowoffsets[ll_idx[rowlist]]+j; if (rowentries[neighoffset].rank == rank) { idx = rowentries[neighoffset].index; if (!colseen[idx] && !onBoundary) { swp = unused; unused = ll_ptr[unused]; ll_ptr[swp] = collist; ll_idx[swp] = idx; colseen[idx] = PETSC_TRUE; collist = swp; } } else { onBoundary = PETSC_TRUE; } } } rowseen[ll_idx[rowlist]] = PETSC_FALSE; swp = rowlist; rowlist = ll_ptr[rowlist]; ll_ptr[swp] = unused; unused = swp; } } } if (onBoundary) {(*boundary)[bidx] = i; bidx++;} } for (i=0;i<ncols;i++) { if (colseen[i]) {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_NOT_CONVERGED,"Likely error in boundary set BFS -- column %d still seen\n",i);} } for (i=0;i<nrows;i++) { if (rowseen[i]) {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_NOT_CONVERGED,"Likely error in boundary set BFS -- row %d still seen\n",i);} } if (bidx != *nboundary) {SETERRQ(PetscObjectComm((PetscObject)mc),PETSC_ERR_NOT_CONVERGED,"Number of boundary nodes not matched");} ierr = PetscFree6(rowseen,colseen,coloffsets,rowoffsets,ll_ptr,ll_idx);CHKERRQ(ierr); PetscFunctionReturn(0); }