PetscErrorCode KSPSetUp_AGMRES(KSP ksp) { PetscErrorCode ierr; PetscInt hes; PetscInt nloc; KSP_AGMRES *agmres = (KSP_AGMRES*)ksp->data; PetscInt neig = agmres->neig; PetscInt max_k = agmres->max_k; PetscInt N = MAXKSPSIZE; PetscInt lwork = PetscMax(8 * N + 16, 4 * neig * (N - neig)); PetscFunctionBegin; if (ksp->pc_side == PC_SYMMETRIC) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"no symmetric preconditioning for KSPAGMRES"); max_k = agmres->max_k; N = MAXKSPSIZE; /* Preallocate space during the call to KSPSetup_GMRES for the Krylov basis */ agmres->q_preallocate = PETSC_TRUE; /* No allocation on the fly */ /* Preallocate space to compute later the eigenvalues in GMRES */ ksp->calc_sings = PETSC_TRUE; agmres->max_k = N; /* Set the augmented size to be allocated in KSPSetup_GMRES */ ierr = KSPSetUp_DGMRES(ksp);CHKERRQ(ierr); agmres->max_k = max_k; hes = (N + 1) * (N + 1); /* Data for the Newton basis GMRES */ ierr = PetscCalloc4(max_k,&agmres->Rshift,max_k,&agmres->Ishift,hes,&agmres->Rloc,((N+1)*4),&agmres->wbufptr);CHKERRQ(ierr); ierr = PetscMalloc7((N+1),&agmres->Scale,(N+1),&agmres->sgn,(N+1),&agmres->tloc,(N+1),&agmres->temp,(N+1),&agmres->tau,lwork,&agmres->work,(N+1),&agmres->nrs);CHKERRQ(ierr); ierr = PetscMemzero(agmres->Scale, (N+1)*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(agmres->sgn, (N+1)*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(agmres->tloc, (N+1)*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(agmres->temp, (N+1)*sizeof(PetscScalar));CHKERRQ(ierr); /* Allocate space for the vectors in the orthogonalized basis*/ ierr = VecGetLocalSize(agmres->vecs[0], &nloc);CHKERRQ(ierr); ierr = PetscMalloc1(nloc*(N+1), &agmres->Qloc);CHKERRQ(ierr); /* Init the ring of processors for the roddec orthogonalization */ ierr = KSPAGMRESRoddecInitNeighboor(ksp);CHKERRQ(ierr); if (agmres->neig < 1) PetscFunctionReturn(0); /* Allocate space for the deflation */ ierr = PetscMalloc1(N, &agmres->select);CHKERRQ(ierr); ierr = VecDuplicateVecs(VEC_V(0), N, &agmres->TmpU);CHKERRQ(ierr); ierr = PetscMalloc2(N*N, &agmres->MatEigL, N*N, &agmres->MatEigR);CHKERRQ(ierr); /* ierr = PetscMalloc6(N*N, &agmres->Q, N*N, &agmres->Z, N, &agmres->wr, N, &agmres->wi, N, &agmres->beta, N, &agmres->modul);CHKERRQ(ierr); */ ierr = PetscMalloc3(N*N, &agmres->Q, N*N, &agmres->Z, N, &agmres->beta);CHKERRQ(ierr); ierr = PetscMalloc2((N+1),&agmres->perm,(2*neig*N),&agmres->iwork);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode PetscFEGeomCreate(PetscQuadrature quad, PetscInt numCells, PetscInt dimEmbed, PetscBool faceData, PetscFEGeom **geom) { PetscFEGeom *g; PetscInt dim, Nq, N; const PetscReal *p; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscQuadratureGetData(quad,&dim,NULL,&Nq,&p,NULL);CHKERRQ(ierr); ierr = PetscNew(&g);CHKERRQ(ierr); g->xi = p; g->numCells = numCells; g->numPoints = Nq; g->dim = dim; g->dimEmbed = dimEmbed; N = numCells * Nq; ierr = PetscCalloc3(N * dimEmbed, &g->v, N * dimEmbed * dimEmbed, &g->J, N, &g->detJ);CHKERRQ(ierr); if (faceData) { ierr = PetscCalloc4(numCells, &g->face, N * dimEmbed, &g->n, N * dimEmbed * dimEmbed, &(g->suppInvJ[0]), N * dimEmbed * dimEmbed, &(g->suppInvJ[1]));CHKERRQ(ierr); } ierr = PetscCalloc1(N * dimEmbed * dimEmbed, &g->invJ);CHKERRQ(ierr); *geom = g; PetscFunctionReturn(0); }
/* * The interface should be easy to use for both MatGetSubMatrix (parallel sub-matrix) and MatGetSubMatrices (sequential sub-matrices) * */ static PetscErrorCode MatGetSubMatrix_MPIAdj_data(Mat adj,IS irows, IS icols, PetscInt **sadj_xadj,PetscInt **sadj_adjncy,PetscInt **sadj_values) { PetscInt nlrows_is,icols_n,i,j,nroots,nleaves,owner,rlocalindex,*ncols_send,*ncols_recv; PetscInt nlrows_mat,*adjncy_recv,Ncols_recv,Ncols_send,*xadj_recv,*values_recv; PetscInt *ncols_recv_offsets,loc,rnclos,*sadjncy,*sxadj,*svalues,isvalue; const PetscInt *irows_indices,*icols_indices,*xadj, *adjncy; Mat_MPIAdj *a = (Mat_MPIAdj*)adj->data; PetscLayout rmap; MPI_Comm comm; PetscSF sf; PetscSFNode *iremote; PetscBool done; PetscErrorCode ierr; PetscFunctionBegin; /* communicator */ ierr = PetscObjectGetComm((PetscObject)adj,&comm);CHKERRQ(ierr); /* Layouts */ ierr = MatGetLayouts(adj,&rmap,PETSC_NULL);CHKERRQ(ierr); /* get rows information */ ierr = ISGetLocalSize(irows,&nlrows_is);CHKERRQ(ierr); ierr = ISGetIndices(irows,&irows_indices);CHKERRQ(ierr); ierr = PetscCalloc1(nlrows_is,&iremote);CHKERRQ(ierr); /* construct sf graph*/ nleaves = nlrows_is; for(i=0; i<nlrows_is; i++){ owner = -1; rlocalindex = -1; ierr = PetscLayoutFindOwnerIndex(rmap,irows_indices[i],&owner,&rlocalindex);CHKERRQ(ierr); iremote[i].rank = owner; iremote[i].index = rlocalindex; } ierr = MatGetRowIJ(adj,0,PETSC_FALSE,PETSC_FALSE,&nlrows_mat,&xadj,&adjncy,&done);CHKERRQ(ierr); ierr = PetscCalloc4(nlrows_mat,&ncols_send,nlrows_is,&xadj_recv,nlrows_is+1,&ncols_recv_offsets,nlrows_is,&ncols_recv);CHKERRQ(ierr); nroots = nlrows_mat; for(i=0; i<nlrows_mat; i++){ ncols_send[i] = xadj[i+1]-xadj[i]; } ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,nroots,nleaves,PETSC_NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr); ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPIU_INT,ncols_send,ncols_recv);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_INT,ncols_send,ncols_recv);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPIU_INT,xadj,xadj_recv);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_INT,xadj,xadj_recv);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); Ncols_recv =0; for(i=0; i<nlrows_is; i++){ Ncols_recv += ncols_recv[i]; ncols_recv_offsets[i+1] = ncols_recv[i]+ncols_recv_offsets[i]; } Ncols_send = 0; for(i=0; i<nlrows_mat; i++){ Ncols_send += ncols_send[i]; } ierr = PetscCalloc1(Ncols_recv,&iremote);CHKERRQ(ierr); ierr = PetscCalloc1(Ncols_recv,&adjncy_recv);CHKERRQ(ierr); nleaves = Ncols_recv; Ncols_recv = 0; for(i=0; i<nlrows_is; i++){ ierr = PetscLayoutFindOwner(rmap,irows_indices[i],&owner);CHKERRQ(ierr); for(j=0; j<ncols_recv[i]; j++){ iremote[Ncols_recv].rank = owner; iremote[Ncols_recv++].index = xadj_recv[i]+j; } } ierr = ISRestoreIndices(irows,&irows_indices);CHKERRQ(ierr); /*if we need to deal with edge weights ???*/ if(a->values){isvalue=1;}else{isvalue=0;} /*involve a global communication */ /*ierr = MPI_Allreduce(&isvalue,&isvalue,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);*/ if(isvalue){ierr = PetscCalloc1(Ncols_recv,&values_recv);CHKERRQ(ierr);} nroots = Ncols_send; ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,nroots,nleaves,PETSC_NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr); ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPIU_INT,adjncy,adjncy_recv);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_INT,adjncy,adjncy_recv);CHKERRQ(ierr); if(isvalue){ ierr = PetscSFBcastBegin(sf,MPIU_INT,a->values,values_recv);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_INT,a->values,values_recv);CHKERRQ(ierr); } ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); ierr = MatRestoreRowIJ(adj,0,PETSC_FALSE,PETSC_FALSE,&nlrows_mat,&xadj,&adjncy,&done);CHKERRQ(ierr); ierr = ISGetLocalSize(icols,&icols_n);CHKERRQ(ierr); ierr = ISGetIndices(icols,&icols_indices);CHKERRQ(ierr); rnclos = 0; for(i=0; i<nlrows_is; i++){ for(j=ncols_recv_offsets[i]; j<ncols_recv_offsets[i+1]; j++){ ierr = PetscFindInt(adjncy_recv[j], icols_n, icols_indices, &loc);CHKERRQ(ierr); if(loc<0){ adjncy_recv[j] = -1; if(isvalue) values_recv[j] = -1; ncols_recv[i]--; }else{ rnclos++; } } } ierr = ISRestoreIndices(icols,&icols_indices);CHKERRQ(ierr); ierr = PetscCalloc1(rnclos,&sadjncy);CHKERRQ(ierr); if(isvalue) {ierr = PetscCalloc1(rnclos,&svalues);CHKERRQ(ierr);} ierr = PetscCalloc1(nlrows_is+1,&sxadj);CHKERRQ(ierr); rnclos = 0; for(i=0; i<nlrows_is; i++){ for(j=ncols_recv_offsets[i]; j<ncols_recv_offsets[i+1]; j++){ if(adjncy_recv[j]<0) continue; sadjncy[rnclos] = adjncy_recv[j]; if(isvalue) svalues[rnclos] = values_recv[j]; rnclos++; } } for(i=0; i<nlrows_is; i++){ sxadj[i+1] = sxadj[i]+ncols_recv[i]; } if(sadj_xadj) { *sadj_xadj = sxadj;}else { ierr = PetscFree(sxadj);CHKERRQ(ierr);} if(sadj_adjncy){ *sadj_adjncy = sadjncy;}else{ ierr = PetscFree(sadjncy);CHKERRQ(ierr);} if(sadj_values){ if(isvalue) *sadj_values = svalues; else *sadj_values=0; }else{ if(isvalue) {ierr = PetscFree(svalues);CHKERRQ(ierr);} } ierr = PetscFree4(ncols_send,xadj_recv,ncols_recv_offsets,ncols_recv);CHKERRQ(ierr); ierr = PetscFree(adjncy_recv);CHKERRQ(ierr); if(isvalue) {ierr = PetscFree(values_recv);CHKERRQ(ierr);} PetscFunctionReturn(0); }
/*@ DMPlexOrient - Give a consistent orientation to the input mesh Input Parameters: . dm - The DM Note: The orientation data for the DM are change in-place. $ This routine will fail for non-orientable surfaces, such as the Moebius strip. Level: advanced .seealso: DMCreate(), DMPLEX @*/ PetscErrorCode DMPlexOrient(DM dm) { MPI_Comm comm; PetscSF sf; const PetscInt *lpoints; const PetscSFNode *rpoints; PetscSFNode *rorntComp = NULL, *lorntComp = NULL; PetscInt *numNeighbors, **neighbors; PetscSFNode *nrankComp; PetscBool *match, *flipped; PetscBT seenCells, flippedCells, seenFaces; PetscInt *faceFIFO, fTop, fBottom, *cellComp, *faceComp; PetscInt numLeaves, numRoots, dim, h, cStart, cEnd, c, cell, fStart, fEnd, face, off, totNeighbors = 0; PetscMPIInt rank, size, numComponents, comp = 0; PetscBool flg, flg2; PetscViewer viewer = NULL, selfviewer = NULL; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject) dm, &comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = PetscOptionsHasName(((PetscObject) dm)->options,((PetscObject) dm)->prefix, "-orientation_view", &flg);CHKERRQ(ierr); ierr = PetscOptionsHasName(((PetscObject) dm)->options,((PetscObject) dm)->prefix, "-orientation_view_synchronized", &flg2);CHKERRQ(ierr); ierr = DMGetPointSF(dm, &sf);CHKERRQ(ierr); ierr = PetscSFGetGraph(sf, &numRoots, &numLeaves, &lpoints, &rpoints);CHKERRQ(ierr); /* Truth Table mismatch flips do action mismatch flipA ^ flipB action F 0 flips no F F F F 1 flip yes F T T F 2 flips no T F T T 0 flips yes T T F T 1 flip no T 2 flips yes */ ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr); ierr = DMPlexGetVTKCellHeight(dm, &h);CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm, h, &cStart, &cEnd);CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm, h+1, &fStart, &fEnd);CHKERRQ(ierr); ierr = PetscBTCreate(cEnd - cStart, &seenCells);CHKERRQ(ierr); ierr = PetscBTMemzero(cEnd - cStart, seenCells);CHKERRQ(ierr); ierr = PetscBTCreate(cEnd - cStart, &flippedCells);CHKERRQ(ierr); ierr = PetscBTMemzero(cEnd - cStart, flippedCells);CHKERRQ(ierr); ierr = PetscBTCreate(fEnd - fStart, &seenFaces);CHKERRQ(ierr); ierr = PetscBTMemzero(fEnd - fStart, seenFaces);CHKERRQ(ierr); ierr = PetscCalloc3(fEnd - fStart, &faceFIFO, cEnd-cStart, &cellComp, fEnd-fStart, &faceComp);CHKERRQ(ierr); /* OLD STYLE - Add an integer array over cells and faces (component) for connected component number Foreach component - Mark the initial cell as seen - Process component as usual - Set component for all seenCells - Wipe seenCells and seenFaces (flippedCells can stay) - Generate parallel adjacency for component using SF and seenFaces - Collect numComponents adj data from each proc to 0 - Build same serial graph - Use same solver - Use Scatterv to to send back flipped flags for each component - Negate flippedCells by component NEW STYLE - Create the adj on each process - Bootstrap to complete graph on proc 0 */ /* Loop over components */ for (cell = cStart; cell < cEnd; ++cell) cellComp[cell-cStart] = -1; do { /* Look for first unmarked cell */ for (cell = cStart; cell < cEnd; ++cell) if (cellComp[cell-cStart] < 0) break; if (cell >= cEnd) break; /* Initialize FIFO with first cell in component */ { const PetscInt *cone; PetscInt coneSize; fTop = fBottom = 0; ierr = DMPlexGetConeSize(dm, cell, &coneSize);CHKERRQ(ierr); ierr = DMPlexGetCone(dm, cell, &cone);CHKERRQ(ierr); for (c = 0; c < coneSize; ++c) { faceFIFO[fBottom++] = cone[c]; ierr = PetscBTSet(seenFaces, cone[c]-fStart);CHKERRQ(ierr); } ierr = PetscBTSet(seenCells, cell-cStart);CHKERRQ(ierr); } /* Consider each face in FIFO */ while (fTop < fBottom) { ierr = DMPlexCheckFace_Internal(dm, faceFIFO, &fTop, &fBottom, cStart, fStart, fEnd, seenCells, flippedCells, seenFaces);CHKERRQ(ierr); } /* Set component for cells and faces */ for (cell = 0; cell < cEnd-cStart; ++cell) { if (PetscBTLookup(seenCells, cell)) cellComp[cell] = comp; } for (face = 0; face < fEnd-fStart; ++face) { if (PetscBTLookup(seenFaces, face)) faceComp[face] = comp; } /* Wipe seenCells and seenFaces for next component */ ierr = PetscBTMemzero(fEnd - fStart, seenFaces);CHKERRQ(ierr); ierr = PetscBTMemzero(cEnd - cStart, seenCells);CHKERRQ(ierr); ++comp; } while (1); numComponents = comp; if (flg) { PetscViewer v; ierr = PetscViewerASCIIGetStdout(comm, &v);CHKERRQ(ierr); ierr = PetscViewerASCIIPushSynchronized(v);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(v, "[%d]BT for serial flipped cells:\n", rank);CHKERRQ(ierr); ierr = PetscBTView(cEnd-cStart, flippedCells, v);CHKERRQ(ierr); ierr = PetscViewerFlush(v);CHKERRQ(ierr); ierr = PetscViewerASCIIPopSynchronized(v);CHKERRQ(ierr); } /* Now all subdomains are oriented, but we need a consistent parallel orientation */ if (numLeaves >= 0) { /* Store orientations of boundary faces*/ ierr = PetscCalloc2(numRoots,&rorntComp,numRoots,&lorntComp);CHKERRQ(ierr); for (face = fStart; face < fEnd; ++face) { const PetscInt *cone, *support, *ornt; PetscInt coneSize, supportSize; ierr = DMPlexGetSupportSize(dm, face, &supportSize);CHKERRQ(ierr); if (supportSize != 1) continue; ierr = DMPlexGetSupport(dm, face, &support);CHKERRQ(ierr); ierr = DMPlexGetCone(dm, support[0], &cone);CHKERRQ(ierr); ierr = DMPlexGetConeSize(dm, support[0], &coneSize);CHKERRQ(ierr); ierr = DMPlexGetConeOrientation(dm, support[0], &ornt);CHKERRQ(ierr); for (c = 0; c < coneSize; ++c) if (cone[c] == face) break; if (dim == 1) { /* Use cone position instead, shifted to -1 or 1 */ if (PetscBTLookup(flippedCells, support[0]-cStart)) rorntComp[face].rank = 1-c*2; else rorntComp[face].rank = c*2-1; } else { if (PetscBTLookup(flippedCells, support[0]-cStart)) rorntComp[face].rank = ornt[c] < 0 ? -1 : 1; else rorntComp[face].rank = ornt[c] < 0 ? 1 : -1; } rorntComp[face].index = faceComp[face-fStart]; } /* Communicate boundary edge orientations */ ierr = PetscSFBcastBegin(sf, MPIU_2INT, rorntComp, lorntComp);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf, MPIU_2INT, rorntComp, lorntComp);CHKERRQ(ierr); } /* Get process adjacency */ ierr = PetscMalloc2(numComponents, &numNeighbors, numComponents, &neighbors);CHKERRQ(ierr); viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)dm)); if (flg2) {ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);} ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&selfviewer);CHKERRQ(ierr); for (comp = 0; comp < numComponents; ++comp) { PetscInt l, n; numNeighbors[comp] = 0; ierr = PetscMalloc1(PetscMax(numLeaves, 0), &neighbors[comp]);CHKERRQ(ierr); /* I know this is p^2 time in general, but for bounded degree its alright */ for (l = 0; l < numLeaves; ++l) { const PetscInt face = lpoints[l]; /* Find a representative face (edge) separating pairs of procs */ if ((face >= fStart) && (face < fEnd) && (faceComp[face-fStart] == comp)) { const PetscInt rrank = rpoints[l].rank; const PetscInt rcomp = lorntComp[face].index; for (n = 0; n < numNeighbors[comp]; ++n) if ((rrank == rpoints[neighbors[comp][n]].rank) && (rcomp == lorntComp[lpoints[neighbors[comp][n]]].index)) break; if (n >= numNeighbors[comp]) { PetscInt supportSize; ierr = DMPlexGetSupportSize(dm, face, &supportSize);CHKERRQ(ierr); if (supportSize != 1) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Boundary faces should see one cell, not %d", supportSize); if (flg) {ierr = PetscViewerASCIIPrintf(selfviewer, "[%d]: component %d, Found representative leaf %d (face %d) connecting to face %d on (%d, %d) with orientation %d\n", rank, comp, l, face, rpoints[l].index, rrank, rcomp, lorntComp[face].rank);CHKERRQ(ierr);} neighbors[comp][numNeighbors[comp]++] = l; } } } totNeighbors += numNeighbors[comp]; } ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&selfviewer);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); if (flg2) {ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);} ierr = PetscMalloc2(totNeighbors, &nrankComp, totNeighbors, &match);CHKERRQ(ierr); for (comp = 0, off = 0; comp < numComponents; ++comp) { PetscInt n; for (n = 0; n < numNeighbors[comp]; ++n, ++off) { const PetscInt face = lpoints[neighbors[comp][n]]; const PetscInt o = rorntComp[face].rank*lorntComp[face].rank; if (o < 0) match[off] = PETSC_TRUE; else if (o > 0) match[off] = PETSC_FALSE; else SETERRQ5(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid face %d (%d, %d) neighbor: %d comp: %d", face, rorntComp[face], lorntComp[face], neighbors[comp][n], comp); nrankComp[off].rank = rpoints[neighbors[comp][n]].rank; nrankComp[off].index = lorntComp[lpoints[neighbors[comp][n]]].index; } ierr = PetscFree(neighbors[comp]);CHKERRQ(ierr); } /* Collect the graph on 0 */ if (numLeaves >= 0) { Mat G; PetscBT seenProcs, flippedProcs; PetscInt *procFIFO, pTop, pBottom; PetscInt *N = NULL, *Noff; PetscSFNode *adj = NULL; PetscBool *val = NULL; PetscMPIInt *recvcounts = NULL, *displs = NULL, *Nc, p, o; PetscMPIInt size = 0; ierr = PetscCalloc1(numComponents, &flipped);CHKERRQ(ierr); if (!rank) {ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);} ierr = PetscCalloc4(size, &recvcounts, size+1, &displs, size, &Nc, size+1, &Noff);CHKERRQ(ierr); ierr = MPI_Gather(&numComponents, 1, MPI_INT, Nc, 1, MPI_INT, 0, comm);CHKERRQ(ierr); for (p = 0; p < size; ++p) { displs[p+1] = displs[p] + Nc[p]; } if (!rank) {ierr = PetscMalloc1(displs[size],&N);CHKERRQ(ierr);} ierr = MPI_Gatherv(numNeighbors, numComponents, MPIU_INT, N, Nc, displs, MPIU_INT, 0, comm);CHKERRQ(ierr); for (p = 0, o = 0; p < size; ++p) { recvcounts[p] = 0; for (c = 0; c < Nc[p]; ++c, ++o) recvcounts[p] += N[o]; displs[p+1] = displs[p] + recvcounts[p]; } if (!rank) {ierr = PetscMalloc2(displs[size], &adj, displs[size], &val);CHKERRQ(ierr);} ierr = MPI_Gatherv(nrankComp, totNeighbors, MPIU_2INT, adj, recvcounts, displs, MPIU_2INT, 0, comm);CHKERRQ(ierr); ierr = MPI_Gatherv(match, totNeighbors, MPIU_BOOL, val, recvcounts, displs, MPIU_BOOL, 0, comm);CHKERRQ(ierr); ierr = PetscFree2(numNeighbors, neighbors);CHKERRQ(ierr); if (!rank) { for (p = 1; p <= size; ++p) {Noff[p] = Noff[p-1] + Nc[p-1];} if (flg) { PetscInt n; for (p = 0, off = 0; p < size; ++p) { for (c = 0; c < Nc[p]; ++c) { ierr = PetscPrintf(PETSC_COMM_SELF, "Proc %d Comp %d:\n", p, c);CHKERRQ(ierr); for (n = 0; n < N[Noff[p]+c]; ++n, ++off) { ierr = PetscPrintf(PETSC_COMM_SELF, " edge (%d, %d) (%d):\n", adj[off].rank, adj[off].index, val[off]);CHKERRQ(ierr); } } } } /* Symmetrize the graph */ ierr = MatCreate(PETSC_COMM_SELF, &G);CHKERRQ(ierr); ierr = MatSetSizes(G, Noff[size], Noff[size], Noff[size], Noff[size]);CHKERRQ(ierr); ierr = MatSetUp(G);CHKERRQ(ierr); for (p = 0, off = 0; p < size; ++p) { for (c = 0; c < Nc[p]; ++c) { const PetscInt r = Noff[p]+c; PetscInt n; for (n = 0; n < N[r]; ++n, ++off) { const PetscInt q = Noff[adj[off].rank] + adj[off].index; const PetscScalar o = val[off] ? 1.0 : 0.0; ierr = MatSetValues(G, 1, &r, 1, &q, &o, INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(G, 1, &q, 1, &r, &o, INSERT_VALUES);CHKERRQ(ierr); } } } ierr = MatAssemblyBegin(G, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(G, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscBTCreate(Noff[size], &seenProcs);CHKERRQ(ierr); ierr = PetscBTMemzero(Noff[size], seenProcs);CHKERRQ(ierr); ierr = PetscBTCreate(Noff[size], &flippedProcs);CHKERRQ(ierr); ierr = PetscBTMemzero(Noff[size], flippedProcs);CHKERRQ(ierr); ierr = PetscMalloc1(Noff[size], &procFIFO);CHKERRQ(ierr); pTop = pBottom = 0; for (p = 0; p < Noff[size]; ++p) { if (PetscBTLookup(seenProcs, p)) continue; /* Initialize FIFO with next proc */ procFIFO[pBottom++] = p; ierr = PetscBTSet(seenProcs, p);CHKERRQ(ierr); /* Consider each proc in FIFO */ while (pTop < pBottom) { const PetscScalar *ornt; const PetscInt *neighbors; PetscInt proc, nproc, seen, flippedA, flippedB, mismatch, numNeighbors, n; proc = procFIFO[pTop++]; flippedA = PetscBTLookup(flippedProcs, proc) ? 1 : 0; ierr = MatGetRow(G, proc, &numNeighbors, &neighbors, &ornt);CHKERRQ(ierr); /* Loop over neighboring procs */ for (n = 0; n < numNeighbors; ++n) { nproc = neighbors[n]; mismatch = PetscRealPart(ornt[n]) > 0.5 ? 0 : 1; seen = PetscBTLookup(seenProcs, nproc); flippedB = PetscBTLookup(flippedProcs, nproc) ? 1 : 0; if (mismatch ^ (flippedA ^ flippedB)) { if (seen) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Previously seen procs %d and %d do not match: Fault mesh is non-orientable", proc, nproc); if (!flippedB) { ierr = PetscBTSet(flippedProcs, nproc);CHKERRQ(ierr); } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Inconsistent mesh orientation: Fault mesh is non-orientable"); } else if (mismatch && flippedA && flippedB) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Attempt to flip already flipped cell: Fault mesh is non-orientable"); if (!seen) { procFIFO[pBottom++] = nproc; ierr = PetscBTSet(seenProcs, nproc);CHKERRQ(ierr); } } } } ierr = PetscFree(procFIFO);CHKERRQ(ierr); ierr = MatDestroy(&G);CHKERRQ(ierr); ierr = PetscFree2(adj, val);CHKERRQ(ierr); ierr = PetscBTDestroy(&seenProcs);CHKERRQ(ierr); } /* Scatter flip flags */ { PetscBool *flips = NULL; if (!rank) { ierr = PetscMalloc1(Noff[size], &flips);CHKERRQ(ierr); for (p = 0; p < Noff[size]; ++p) { flips[p] = PetscBTLookup(flippedProcs, p) ? PETSC_TRUE : PETSC_FALSE; if (flg && flips[p]) {ierr = PetscPrintf(comm, "Flipping Proc+Comp %d:\n", p);CHKERRQ(ierr);} } for (p = 0; p < size; ++p) { displs[p+1] = displs[p] + Nc[p]; } } ierr = MPI_Scatterv(flips, Nc, displs, MPIU_BOOL, flipped, numComponents, MPIU_BOOL, 0, comm);CHKERRQ(ierr); ierr = PetscFree(flips);CHKERRQ(ierr); } if (!rank) {ierr = PetscBTDestroy(&flippedProcs);CHKERRQ(ierr);} ierr = PetscFree(N);CHKERRQ(ierr); ierr = PetscFree4(recvcounts, displs, Nc, Noff);CHKERRQ(ierr); ierr = PetscFree2(nrankComp, match);CHKERRQ(ierr); /* Decide whether to flip cells in each component */ for (c = 0; c < cEnd-cStart; ++c) {if (flipped[cellComp[c]]) {ierr = PetscBTNegate(flippedCells, c);CHKERRQ(ierr);}} ierr = PetscFree(flipped);CHKERRQ(ierr); } if (flg) { PetscViewer v; ierr = PetscViewerASCIIGetStdout(comm, &v);CHKERRQ(ierr); ierr = PetscViewerASCIIPushSynchronized(v);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(v, "[%d]BT for parallel flipped cells:\n", rank);CHKERRQ(ierr); ierr = PetscBTView(cEnd-cStart, flippedCells, v);CHKERRQ(ierr); ierr = PetscViewerFlush(v);CHKERRQ(ierr); ierr = PetscViewerASCIIPopSynchronized(v);CHKERRQ(ierr); } /* Reverse flipped cells in the mesh */ for (c = cStart; c < cEnd; ++c) { if (PetscBTLookup(flippedCells, c-cStart)) { ierr = DMPlexReverseCell(dm, c);CHKERRQ(ierr); } } ierr = PetscBTDestroy(&seenCells);CHKERRQ(ierr); ierr = PetscBTDestroy(&flippedCells);CHKERRQ(ierr); ierr = PetscBTDestroy(&seenFaces);CHKERRQ(ierr); ierr = PetscFree2(numNeighbors, neighbors);CHKERRQ(ierr); ierr = PetscFree2(rorntComp, lorntComp);CHKERRQ(ierr); ierr = PetscFree3(faceFIFO, cellComp, faceComp);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode PipesView(Vec X,DM networkdm,Wash wash) { PetscErrorCode ierr; Pipe pipe; DMNetworkComponentGenericDataType *nwarr; PetscInt pipeOffset,key,Start,End; PetscMPIInt rank; PetscInt nx,nnodes,nidx,*idx1,*idx2,*idx1_h,*idx2_h,idx_start,i,k,k1,xstart,j1; Vec Xq,Xh,localX; IS is1_q,is2_q,is1_h,is2_h; VecScatter ctx_q,ctx_h; PetscFunctionBegin; ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); /* get num of local and global total nnodes */ nidx = wash->nnodes_loc; ierr = MPIU_Allreduce(&nidx,&nx,1,MPIU_INT,MPI_SUM,PETSC_COMM_WORLD);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&Xq);CHKERRQ(ierr); if (rank == 0) { /* all entries of Xq are in proc[0] */ ierr = VecSetSizes(Xq,nx,PETSC_DECIDE);CHKERRQ(ierr); } else { ierr = VecSetSizes(Xq,0,PETSC_DECIDE);CHKERRQ(ierr); } ierr = VecSetFromOptions(Xq);CHKERRQ(ierr); ierr = VecSet(Xq,0.0);CHKERRQ(ierr); ierr = VecDuplicate(Xq,&Xh);CHKERRQ(ierr); ierr = DMGetLocalVector(networkdm,&localX);CHKERRQ(ierr); /* set idx1 and idx2 */ ierr = PetscCalloc4(nidx,&idx1,nidx,&idx2,nidx,&idx1_h,nidx,&idx2_h);CHKERRQ(ierr); ierr = DMNetworkGetComponentDataArray(networkdm,&nwarr);CHKERRQ(ierr); ierr = DMNetworkGetEdgeRange(networkdm,&Start, &End);CHKERRQ(ierr); ierr = VecGetOwnershipRange(X,&xstart,NULL);CHKERRQ(ierr); k1 = 0; j1 = 0; for (i = Start; i < End; i++) { ierr = DMNetworkGetComponentTypeOffset(networkdm,i,0,&key,&pipeOffset);CHKERRQ(ierr); pipe = (Pipe)(nwarr+pipeOffset); nnodes = pipe->nnodes; idx_start = pipe->id*nnodes; for (k=0; k<nnodes; k++) { idx1[k1] = xstart + j1*2*nnodes + 2*k; idx2[k1] = idx_start + k; idx1_h[k1] = xstart + j1*2*nnodes + 2*k + 1; idx2_h[k1] = idx_start + k; k1++; } j1++; } ierr = ISCreateGeneral(PETSC_COMM_SELF,nidx,idx1,PETSC_COPY_VALUES,&is1_q);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,nidx,idx2,PETSC_COPY_VALUES,&is2_q);CHKERRQ(ierr); ierr = VecScatterCreate(X,is1_q,Xq,is2_q,&ctx_q);CHKERRQ(ierr); ierr = VecScatterBegin(ctx_q,X,Xq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(ctx_q,X,Xq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,nidx,idx1_h,PETSC_COPY_VALUES,&is1_h);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,nidx,idx2_h,PETSC_COPY_VALUES,&is2_h);CHKERRQ(ierr); ierr = VecScatterCreate(X,is1_h,Xh,is2_h,&ctx_h);CHKERRQ(ierr); ierr = VecScatterBegin(ctx_h,X,Xh,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(ctx_h,X,Xh,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); if (!rank) printf("Xq: \n"); ierr = VecView(Xq,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); if (!rank) printf("Xh: \n"); ierr = VecView(Xh,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = VecScatterDestroy(&ctx_q);CHKERRQ(ierr); ierr = PetscFree4(idx1,idx2,idx1_h,idx2_h);CHKERRQ(ierr); ierr = ISDestroy(&is1_q);CHKERRQ(ierr); ierr = ISDestroy(&is2_q);CHKERRQ(ierr); ierr = VecScatterDestroy(&ctx_h);CHKERRQ(ierr); ierr = ISDestroy(&is1_h);CHKERRQ(ierr); ierr = ISDestroy(&is2_h);CHKERRQ(ierr); ierr = VecDestroy(&Xq);CHKERRQ(ierr); ierr = VecDestroy(&Xh);CHKERRQ(ierr); ierr = DMRestoreLocalVector(networkdm,&localX);CHKERRQ(ierr); PetscFunctionReturn(0); }