static PetscErrorCode getGIDsOnSquareGraph(PetscInt nselected_1,const PetscInt clid_lid_1[],const Mat Gmat1,IS *a_selected_2,Mat *a_Gmat_2,PetscInt **a_crsGID) { PetscErrorCode ierr; PetscMPIInt size; PetscInt *crsGID, kk,my0,Iend,nloc; MPI_Comm comm; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)Gmat1,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MatGetOwnershipRange(Gmat1,&my0,&Iend);CHKERRQ(ierr); /* AIJ */ nloc = Iend - my0; /* this does not change */ if (size == 1) { /* not much to do in serial */ ierr = PetscMalloc1(nselected_1, &crsGID);CHKERRQ(ierr); for (kk=0; kk<nselected_1; kk++) crsGID[kk] = kk; *a_Gmat_2 = 0; ierr = ISCreateGeneral(PETSC_COMM_SELF,nselected_1,clid_lid_1,PETSC_COPY_VALUES,a_selected_2);CHKERRQ(ierr); } else { PetscInt idx,num_fine_ghosts,num_crs_ghost,myCrs0; Mat_MPIAIJ *mpimat2; Mat Gmat2; Vec locState; PetscScalar *cpcol_state; /* scan my coarse zero gid, set 'lid_state' with coarse GID */ kk = nselected_1; ierr = MPI_Scan(&kk, &myCrs0, 1, MPIU_INT, MPI_SUM, comm);CHKERRQ(ierr); myCrs0 -= nselected_1; if (a_Gmat_2) { /* output */ /* grow graph to get wider set of selected vertices to cover fine grid, invalidates 'llist' */ ierr = MatTransposeMatMult(Gmat1, Gmat1, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Gmat2);CHKERRQ(ierr); *a_Gmat_2 = Gmat2; /* output */ } else Gmat2 = Gmat1; /* use local to get crsGIDs at least */ /* get coarse grid GIDS for selected (locals and ghosts) */ mpimat2 = (Mat_MPIAIJ*)Gmat2->data; ierr = MatCreateVecs(Gmat2, &locState, 0);CHKERRQ(ierr); ierr = VecSet(locState, (PetscScalar)(PetscReal)(-1));CHKERRQ(ierr); /* set with UNKNOWN state */ for (kk=0; kk<nselected_1; kk++) { PetscInt fgid = clid_lid_1[kk] + my0; PetscScalar v = (PetscScalar)(kk+myCrs0); ierr = VecSetValues(locState, 1, &fgid, &v, INSERT_VALUES);CHKERRQ(ierr); /* set with PID */ } ierr = VecAssemblyBegin(locState);CHKERRQ(ierr); ierr = VecAssemblyEnd(locState);CHKERRQ(ierr); ierr = VecScatterBegin(mpimat2->Mvctx,locState,mpimat2->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(mpimat2->Mvctx,locState,mpimat2->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecGetLocalSize(mpimat2->lvec, &num_fine_ghosts);CHKERRQ(ierr); ierr = VecGetArray(mpimat2->lvec, &cpcol_state);CHKERRQ(ierr); for (kk=0,num_crs_ghost=0; kk<num_fine_ghosts; kk++) { if ((PetscInt)PetscRealPart(cpcol_state[kk]) != -1) num_crs_ghost++; } ierr = PetscMalloc1(nselected_1+num_crs_ghost, &crsGID);CHKERRQ(ierr); /* output */ { PetscInt *selected_set; ierr = PetscMalloc1(nselected_1+num_crs_ghost, &selected_set);CHKERRQ(ierr); /* do ghost of 'crsGID' */ for (kk=0,idx=nselected_1; kk<num_fine_ghosts; kk++) { if ((PetscInt)PetscRealPart(cpcol_state[kk]) != -1) { PetscInt cgid = (PetscInt)PetscRealPart(cpcol_state[kk]); selected_set[idx] = nloc + kk; crsGID[idx++] = cgid; } } if (idx != (nselected_1+num_crs_ghost)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"idx %D != (nselected_1 %D + num_crs_ghost %D)",idx,nselected_1,num_crs_ghost); ierr = VecRestoreArray(mpimat2->lvec, &cpcol_state);CHKERRQ(ierr); /* do locals in 'crsGID' */ ierr = VecGetArray(locState, &cpcol_state);CHKERRQ(ierr); for (kk=0,idx=0; kk<nloc; kk++) { if ((PetscInt)PetscRealPart(cpcol_state[kk]) != -1) { PetscInt cgid = (PetscInt)PetscRealPart(cpcol_state[kk]); selected_set[idx] = kk; crsGID[idx++] = cgid; } } if (idx != nselected_1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"idx %D != nselected_1 %D",idx,nselected_1); ierr = VecRestoreArray(locState, &cpcol_state);CHKERRQ(ierr); if (a_selected_2 != 0) { /* output */ ierr = ISCreateGeneral(PETSC_COMM_SELF,(nselected_1+num_crs_ghost),selected_set,PETSC_OWN_POINTER,a_selected_2);CHKERRQ(ierr); } else { ierr = PetscFree(selected_set);CHKERRQ(ierr); } } ierr = VecDestroy(&locState);CHKERRQ(ierr); } *a_crsGID = crsGID; /* output */ PetscFunctionReturn(0); }
/*@ DMPlexOrient - Give a consistent orientation to the input mesh Input Parameters: . dm - The DM Note: The orientation data for the DM are change in-place. $ This routine will fail for non-orientable surfaces, such as the Moebius strip. Level: advanced .seealso: DMCreate(), DMPLEX @*/ PetscErrorCode DMPlexOrient(DM dm) { MPI_Comm comm; PetscSF sf; const PetscInt *lpoints; const PetscSFNode *rpoints; PetscSFNode *rorntComp = NULL, *lorntComp = NULL; PetscInt *numNeighbors, **neighbors; PetscSFNode *nrankComp; PetscBool *match, *flipped; PetscBT seenCells, flippedCells, seenFaces; PetscInt *faceFIFO, fTop, fBottom, *cellComp, *faceComp; PetscInt numLeaves, numRoots, dim, h, cStart, cEnd, c, cell, fStart, fEnd, face, off, totNeighbors = 0; PetscMPIInt rank, size, numComponents, comp = 0; PetscBool flg, flg2; PetscViewer viewer = NULL, selfviewer = NULL; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject) dm, &comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = PetscOptionsHasName(((PetscObject) dm)->options,((PetscObject) dm)->prefix, "-orientation_view", &flg);CHKERRQ(ierr); ierr = PetscOptionsHasName(((PetscObject) dm)->options,((PetscObject) dm)->prefix, "-orientation_view_synchronized", &flg2);CHKERRQ(ierr); ierr = DMGetPointSF(dm, &sf);CHKERRQ(ierr); ierr = PetscSFGetGraph(sf, &numRoots, &numLeaves, &lpoints, &rpoints);CHKERRQ(ierr); /* Truth Table mismatch flips do action mismatch flipA ^ flipB action F 0 flips no F F F F 1 flip yes F T T F 2 flips no T F T T 0 flips yes T T F T 1 flip no T 2 flips yes */ ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr); ierr = DMPlexGetVTKCellHeight(dm, &h);CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm, h, &cStart, &cEnd);CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm, h+1, &fStart, &fEnd);CHKERRQ(ierr); ierr = PetscBTCreate(cEnd - cStart, &seenCells);CHKERRQ(ierr); ierr = PetscBTMemzero(cEnd - cStart, seenCells);CHKERRQ(ierr); ierr = PetscBTCreate(cEnd - cStart, &flippedCells);CHKERRQ(ierr); ierr = PetscBTMemzero(cEnd - cStart, flippedCells);CHKERRQ(ierr); ierr = PetscBTCreate(fEnd - fStart, &seenFaces);CHKERRQ(ierr); ierr = PetscBTMemzero(fEnd - fStart, seenFaces);CHKERRQ(ierr); ierr = PetscCalloc3(fEnd - fStart, &faceFIFO, cEnd-cStart, &cellComp, fEnd-fStart, &faceComp);CHKERRQ(ierr); /* OLD STYLE - Add an integer array over cells and faces (component) for connected component number Foreach component - Mark the initial cell as seen - Process component as usual - Set component for all seenCells - Wipe seenCells and seenFaces (flippedCells can stay) - Generate parallel adjacency for component using SF and seenFaces - Collect numComponents adj data from each proc to 0 - Build same serial graph - Use same solver - Use Scatterv to to send back flipped flags for each component - Negate flippedCells by component NEW STYLE - Create the adj on each process - Bootstrap to complete graph on proc 0 */ /* Loop over components */ for (cell = cStart; cell < cEnd; ++cell) cellComp[cell-cStart] = -1; do { /* Look for first unmarked cell */ for (cell = cStart; cell < cEnd; ++cell) if (cellComp[cell-cStart] < 0) break; if (cell >= cEnd) break; /* Initialize FIFO with first cell in component */ { const PetscInt *cone; PetscInt coneSize; fTop = fBottom = 0; ierr = DMPlexGetConeSize(dm, cell, &coneSize);CHKERRQ(ierr); ierr = DMPlexGetCone(dm, cell, &cone);CHKERRQ(ierr); for (c = 0; c < coneSize; ++c) { faceFIFO[fBottom++] = cone[c]; ierr = PetscBTSet(seenFaces, cone[c]-fStart);CHKERRQ(ierr); } ierr = PetscBTSet(seenCells, cell-cStart);CHKERRQ(ierr); } /* Consider each face in FIFO */ while (fTop < fBottom) { ierr = DMPlexCheckFace_Internal(dm, faceFIFO, &fTop, &fBottom, cStart, fStart, fEnd, seenCells, flippedCells, seenFaces);CHKERRQ(ierr); } /* Set component for cells and faces */ for (cell = 0; cell < cEnd-cStart; ++cell) { if (PetscBTLookup(seenCells, cell)) cellComp[cell] = comp; } for (face = 0; face < fEnd-fStart; ++face) { if (PetscBTLookup(seenFaces, face)) faceComp[face] = comp; } /* Wipe seenCells and seenFaces for next component */ ierr = PetscBTMemzero(fEnd - fStart, seenFaces);CHKERRQ(ierr); ierr = PetscBTMemzero(cEnd - cStart, seenCells);CHKERRQ(ierr); ++comp; } while (1); numComponents = comp; if (flg) { PetscViewer v; ierr = PetscViewerASCIIGetStdout(comm, &v);CHKERRQ(ierr); ierr = PetscViewerASCIIPushSynchronized(v);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(v, "[%d]BT for serial flipped cells:\n", rank);CHKERRQ(ierr); ierr = PetscBTView(cEnd-cStart, flippedCells, v);CHKERRQ(ierr); ierr = PetscViewerFlush(v);CHKERRQ(ierr); ierr = PetscViewerASCIIPopSynchronized(v);CHKERRQ(ierr); } /* Now all subdomains are oriented, but we need a consistent parallel orientation */ if (numLeaves >= 0) { /* Store orientations of boundary faces*/ ierr = PetscCalloc2(numRoots,&rorntComp,numRoots,&lorntComp);CHKERRQ(ierr); for (face = fStart; face < fEnd; ++face) { const PetscInt *cone, *support, *ornt; PetscInt coneSize, supportSize; ierr = DMPlexGetSupportSize(dm, face, &supportSize);CHKERRQ(ierr); if (supportSize != 1) continue; ierr = DMPlexGetSupport(dm, face, &support);CHKERRQ(ierr); ierr = DMPlexGetCone(dm, support[0], &cone);CHKERRQ(ierr); ierr = DMPlexGetConeSize(dm, support[0], &coneSize);CHKERRQ(ierr); ierr = DMPlexGetConeOrientation(dm, support[0], &ornt);CHKERRQ(ierr); for (c = 0; c < coneSize; ++c) if (cone[c] == face) break; if (dim == 1) { /* Use cone position instead, shifted to -1 or 1 */ if (PetscBTLookup(flippedCells, support[0]-cStart)) rorntComp[face].rank = 1-c*2; else rorntComp[face].rank = c*2-1; } else { if (PetscBTLookup(flippedCells, support[0]-cStart)) rorntComp[face].rank = ornt[c] < 0 ? -1 : 1; else rorntComp[face].rank = ornt[c] < 0 ? 1 : -1; } rorntComp[face].index = faceComp[face-fStart]; } /* Communicate boundary edge orientations */ ierr = PetscSFBcastBegin(sf, MPIU_2INT, rorntComp, lorntComp);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf, MPIU_2INT, rorntComp, lorntComp);CHKERRQ(ierr); } /* Get process adjacency */ ierr = PetscMalloc2(numComponents, &numNeighbors, numComponents, &neighbors);CHKERRQ(ierr); viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)dm)); if (flg2) {ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);} ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&selfviewer);CHKERRQ(ierr); for (comp = 0; comp < numComponents; ++comp) { PetscInt l, n; numNeighbors[comp] = 0; ierr = PetscMalloc1(PetscMax(numLeaves, 0), &neighbors[comp]);CHKERRQ(ierr); /* I know this is p^2 time in general, but for bounded degree its alright */ for (l = 0; l < numLeaves; ++l) { const PetscInt face = lpoints[l]; /* Find a representative face (edge) separating pairs of procs */ if ((face >= fStart) && (face < fEnd) && (faceComp[face-fStart] == comp)) { const PetscInt rrank = rpoints[l].rank; const PetscInt rcomp = lorntComp[face].index; for (n = 0; n < numNeighbors[comp]; ++n) if ((rrank == rpoints[neighbors[comp][n]].rank) && (rcomp == lorntComp[lpoints[neighbors[comp][n]]].index)) break; if (n >= numNeighbors[comp]) { PetscInt supportSize; ierr = DMPlexGetSupportSize(dm, face, &supportSize);CHKERRQ(ierr); if (supportSize != 1) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Boundary faces should see one cell, not %d", supportSize); if (flg) {ierr = PetscViewerASCIIPrintf(selfviewer, "[%d]: component %d, Found representative leaf %d (face %d) connecting to face %d on (%d, %d) with orientation %d\n", rank, comp, l, face, rpoints[l].index, rrank, rcomp, lorntComp[face].rank);CHKERRQ(ierr);} neighbors[comp][numNeighbors[comp]++] = l; } } } totNeighbors += numNeighbors[comp]; } ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&selfviewer);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); if (flg2) {ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);} ierr = PetscMalloc2(totNeighbors, &nrankComp, totNeighbors, &match);CHKERRQ(ierr); for (comp = 0, off = 0; comp < numComponents; ++comp) { PetscInt n; for (n = 0; n < numNeighbors[comp]; ++n, ++off) { const PetscInt face = lpoints[neighbors[comp][n]]; const PetscInt o = rorntComp[face].rank*lorntComp[face].rank; if (o < 0) match[off] = PETSC_TRUE; else if (o > 0) match[off] = PETSC_FALSE; else SETERRQ5(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid face %d (%d, %d) neighbor: %d comp: %d", face, rorntComp[face], lorntComp[face], neighbors[comp][n], comp); nrankComp[off].rank = rpoints[neighbors[comp][n]].rank; nrankComp[off].index = lorntComp[lpoints[neighbors[comp][n]]].index; } ierr = PetscFree(neighbors[comp]);CHKERRQ(ierr); } /* Collect the graph on 0 */ if (numLeaves >= 0) { Mat G; PetscBT seenProcs, flippedProcs; PetscInt *procFIFO, pTop, pBottom; PetscInt *N = NULL, *Noff; PetscSFNode *adj = NULL; PetscBool *val = NULL; PetscMPIInt *recvcounts = NULL, *displs = NULL, *Nc, p, o; PetscMPIInt size = 0; ierr = PetscCalloc1(numComponents, &flipped);CHKERRQ(ierr); if (!rank) {ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);} ierr = PetscCalloc4(size, &recvcounts, size+1, &displs, size, &Nc, size+1, &Noff);CHKERRQ(ierr); ierr = MPI_Gather(&numComponents, 1, MPI_INT, Nc, 1, MPI_INT, 0, comm);CHKERRQ(ierr); for (p = 0; p < size; ++p) { displs[p+1] = displs[p] + Nc[p]; } if (!rank) {ierr = PetscMalloc1(displs[size],&N);CHKERRQ(ierr);} ierr = MPI_Gatherv(numNeighbors, numComponents, MPIU_INT, N, Nc, displs, MPIU_INT, 0, comm);CHKERRQ(ierr); for (p = 0, o = 0; p < size; ++p) { recvcounts[p] = 0; for (c = 0; c < Nc[p]; ++c, ++o) recvcounts[p] += N[o]; displs[p+1] = displs[p] + recvcounts[p]; } if (!rank) {ierr = PetscMalloc2(displs[size], &adj, displs[size], &val);CHKERRQ(ierr);} ierr = MPI_Gatherv(nrankComp, totNeighbors, MPIU_2INT, adj, recvcounts, displs, MPIU_2INT, 0, comm);CHKERRQ(ierr); ierr = MPI_Gatherv(match, totNeighbors, MPIU_BOOL, val, recvcounts, displs, MPIU_BOOL, 0, comm);CHKERRQ(ierr); ierr = PetscFree2(numNeighbors, neighbors);CHKERRQ(ierr); if (!rank) { for (p = 1; p <= size; ++p) {Noff[p] = Noff[p-1] + Nc[p-1];} if (flg) { PetscInt n; for (p = 0, off = 0; p < size; ++p) { for (c = 0; c < Nc[p]; ++c) { ierr = PetscPrintf(PETSC_COMM_SELF, "Proc %d Comp %d:\n", p, c);CHKERRQ(ierr); for (n = 0; n < N[Noff[p]+c]; ++n, ++off) { ierr = PetscPrintf(PETSC_COMM_SELF, " edge (%d, %d) (%d):\n", adj[off].rank, adj[off].index, val[off]);CHKERRQ(ierr); } } } } /* Symmetrize the graph */ ierr = MatCreate(PETSC_COMM_SELF, &G);CHKERRQ(ierr); ierr = MatSetSizes(G, Noff[size], Noff[size], Noff[size], Noff[size]);CHKERRQ(ierr); ierr = MatSetUp(G);CHKERRQ(ierr); for (p = 0, off = 0; p < size; ++p) { for (c = 0; c < Nc[p]; ++c) { const PetscInt r = Noff[p]+c; PetscInt n; for (n = 0; n < N[r]; ++n, ++off) { const PetscInt q = Noff[adj[off].rank] + adj[off].index; const PetscScalar o = val[off] ? 1.0 : 0.0; ierr = MatSetValues(G, 1, &r, 1, &q, &o, INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(G, 1, &q, 1, &r, &o, INSERT_VALUES);CHKERRQ(ierr); } } } ierr = MatAssemblyBegin(G, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(G, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscBTCreate(Noff[size], &seenProcs);CHKERRQ(ierr); ierr = PetscBTMemzero(Noff[size], seenProcs);CHKERRQ(ierr); ierr = PetscBTCreate(Noff[size], &flippedProcs);CHKERRQ(ierr); ierr = PetscBTMemzero(Noff[size], flippedProcs);CHKERRQ(ierr); ierr = PetscMalloc1(Noff[size], &procFIFO);CHKERRQ(ierr); pTop = pBottom = 0; for (p = 0; p < Noff[size]; ++p) { if (PetscBTLookup(seenProcs, p)) continue; /* Initialize FIFO with next proc */ procFIFO[pBottom++] = p; ierr = PetscBTSet(seenProcs, p);CHKERRQ(ierr); /* Consider each proc in FIFO */ while (pTop < pBottom) { const PetscScalar *ornt; const PetscInt *neighbors; PetscInt proc, nproc, seen, flippedA, flippedB, mismatch, numNeighbors, n; proc = procFIFO[pTop++]; flippedA = PetscBTLookup(flippedProcs, proc) ? 1 : 0; ierr = MatGetRow(G, proc, &numNeighbors, &neighbors, &ornt);CHKERRQ(ierr); /* Loop over neighboring procs */ for (n = 0; n < numNeighbors; ++n) { nproc = neighbors[n]; mismatch = PetscRealPart(ornt[n]) > 0.5 ? 0 : 1; seen = PetscBTLookup(seenProcs, nproc); flippedB = PetscBTLookup(flippedProcs, nproc) ? 1 : 0; if (mismatch ^ (flippedA ^ flippedB)) { if (seen) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Previously seen procs %d and %d do not match: Fault mesh is non-orientable", proc, nproc); if (!flippedB) { ierr = PetscBTSet(flippedProcs, nproc);CHKERRQ(ierr); } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Inconsistent mesh orientation: Fault mesh is non-orientable"); } else if (mismatch && flippedA && flippedB) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Attempt to flip already flipped cell: Fault mesh is non-orientable"); if (!seen) { procFIFO[pBottom++] = nproc; ierr = PetscBTSet(seenProcs, nproc);CHKERRQ(ierr); } } } } ierr = PetscFree(procFIFO);CHKERRQ(ierr); ierr = MatDestroy(&G);CHKERRQ(ierr); ierr = PetscFree2(adj, val);CHKERRQ(ierr); ierr = PetscBTDestroy(&seenProcs);CHKERRQ(ierr); } /* Scatter flip flags */ { PetscBool *flips = NULL; if (!rank) { ierr = PetscMalloc1(Noff[size], &flips);CHKERRQ(ierr); for (p = 0; p < Noff[size]; ++p) { flips[p] = PetscBTLookup(flippedProcs, p) ? PETSC_TRUE : PETSC_FALSE; if (flg && flips[p]) {ierr = PetscPrintf(comm, "Flipping Proc+Comp %d:\n", p);CHKERRQ(ierr);} } for (p = 0; p < size; ++p) { displs[p+1] = displs[p] + Nc[p]; } } ierr = MPI_Scatterv(flips, Nc, displs, MPIU_BOOL, flipped, numComponents, MPIU_BOOL, 0, comm);CHKERRQ(ierr); ierr = PetscFree(flips);CHKERRQ(ierr); } if (!rank) {ierr = PetscBTDestroy(&flippedProcs);CHKERRQ(ierr);} ierr = PetscFree(N);CHKERRQ(ierr); ierr = PetscFree4(recvcounts, displs, Nc, Noff);CHKERRQ(ierr); ierr = PetscFree2(nrankComp, match);CHKERRQ(ierr); /* Decide whether to flip cells in each component */ for (c = 0; c < cEnd-cStart; ++c) {if (flipped[cellComp[c]]) {ierr = PetscBTNegate(flippedCells, c);CHKERRQ(ierr);}} ierr = PetscFree(flipped);CHKERRQ(ierr); } if (flg) { PetscViewer v; ierr = PetscViewerASCIIGetStdout(comm, &v);CHKERRQ(ierr); ierr = PetscViewerASCIIPushSynchronized(v);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(v, "[%d]BT for parallel flipped cells:\n", rank);CHKERRQ(ierr); ierr = PetscBTView(cEnd-cStart, flippedCells, v);CHKERRQ(ierr); ierr = PetscViewerFlush(v);CHKERRQ(ierr); ierr = PetscViewerASCIIPopSynchronized(v);CHKERRQ(ierr); } /* Reverse flipped cells in the mesh */ for (c = cStart; c < cEnd; ++c) { if (PetscBTLookup(flippedCells, c-cStart)) { ierr = DMPlexReverseCell(dm, c);CHKERRQ(ierr); } } ierr = PetscBTDestroy(&seenCells);CHKERRQ(ierr); ierr = PetscBTDestroy(&flippedCells);CHKERRQ(ierr); ierr = PetscBTDestroy(&seenFaces);CHKERRQ(ierr); ierr = PetscFree2(numNeighbors, neighbors);CHKERRQ(ierr); ierr = PetscFree2(rorntComp, lorntComp);CHKERRQ(ierr); ierr = PetscFree3(faceFIFO, cellComp, faceComp);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode CheckFunctions(DM dm, PetscInt order, Vec u, AppCtx *user) { void (*exactFuncs[1]) (const PetscReal x[], PetscScalar *u, void *ctx); void (*exactFuncDers[1]) (const PetscReal x[], const PetscReal n[], PetscScalar *u, void *ctx); PetscReal n[3] = {1.0, 1.0, 1.0}; PetscReal constants[3] = {1.0, 2.0, 3.0}; void *exactCtxs[3] = {NULL, NULL, NULL}; MPI_Comm comm; PetscInt dim = user->dim, Nc; PetscQuadrature fq; PetscReal error, errorDer, tol = 1.0e-10; PetscBool isPlex, isDA; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject) dm, DMPLEX, &isPlex);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject) dm, DMDA, &isDA);CHKERRQ(ierr); ierr = PetscFEGetQuadrature(user->fe, &fq);CHKERRQ(ierr); ierr = PetscFEGetNumComponents(user->fe, &Nc);CHKERRQ(ierr); /* Setup functions to approximate */ switch (order) { case 0: exactFuncs[0] = constant; exactFuncDers[0] = constantDer; exactCtxs[0] = &constants[0]; exactCtxs[1] = &constants[1]; exactCtxs[2] = &constants[2]; break; case 1: exactFuncs[0] = linear; exactFuncDers[0] = linearDer; break; case 2: exactFuncs[0] = quadratic; exactFuncDers[0] = quadraticDer; break; default: SETERRQ2(comm, PETSC_ERR_ARG_OUTOFRANGE, "Could not determine functions to test for dimension %d order %d", dim, order); } /* Project function into FE function space */ if (isPlex) { ierr = DMPlexProjectFunction(dm, &user->fe, exactFuncs, exactCtxs, INSERT_ALL_VALUES, u);CHKERRQ(ierr); } else if (isDA) { ierr = DMDAProjectFunction(dm, &user->fe, exactFuncs, exactCtxs, INSERT_ALL_VALUES, u);CHKERRQ(ierr); } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "No FEM projection routine for this type of DM"); /* Compare approximation to exact in L_2 */ if (isPlex) { ierr = DMPlexComputeL2Diff(dm, &user->fe, exactFuncs, exactCtxs, u, &error);CHKERRQ(ierr); ierr = DMPlexComputeL2GradientDiff(dm, &user->fe, exactFuncDers, exactCtxs, u, n, &errorDer);CHKERRQ(ierr); } else if (isDA) { ierr = DMDAComputeL2Diff(dm, &user->fe, exactFuncs, exactCtxs, u, &error);CHKERRQ(ierr); } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "No FEM projection routine for this type of DM"); if (error > tol) { ierr = PetscPrintf(comm, "Tests FAIL for order %d at tolerance %g error %g\n", order, tol, error);CHKERRQ(ierr); } else { ierr = PetscPrintf(comm, "Tests pass for order %d at tolerance %g\n", order, tol);CHKERRQ(ierr); } if (errorDer > tol) { ierr = PetscPrintf(comm, "Tests FAIL for order %d derivatives at tolerance %g error %g\n", order, tol, errorDer);CHKERRQ(ierr); } else { ierr = PetscPrintf(comm, "Tests pass for order %d derivatives at tolerance %g\n", order, tol);CHKERRQ(ierr); } PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_LSQR(KSP ksp) { PetscErrorCode ierr; PetscInt i,size1,size2; PetscScalar rho,rhobar,phi,phibar,theta,c,s,tmp,tau; PetscReal beta,alpha,rnorm; Vec X,B,V,V1,U,U1,TMP,W,W2,SE,Z = NULL; Mat Amat,Pmat; KSP_LSQR *lsqr = (KSP_LSQR*)ksp->data; PetscBool diagonalscale,nopreconditioner; PetscFunctionBegin; ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)ksp->pc,PCNONE,&nopreconditioner);CHKERRQ(ierr); /* nopreconditioner =PETSC_FALSE; */ /* Calculate norm of right hand side */ ierr = VecNorm(ksp->vec_rhs,NORM_2,&lsqr->rhs_norm);CHKERRQ(ierr); /* mark norm of matrix with negative number to indicate it has not yet been computed */ lsqr->anorm = -1.0; /* vectors of length m, where system size is mxn */ B = ksp->vec_rhs; U = lsqr->vwork_m[0]; U1 = lsqr->vwork_m[1]; /* vectors of length n */ X = ksp->vec_sol; W = lsqr->vwork_n[0]; V = lsqr->vwork_n[1]; V1 = lsqr->vwork_n[2]; W2 = lsqr->vwork_n[3]; if (!nopreconditioner) Z = lsqr->vwork_n[4]; /* standard error vector */ SE = lsqr->se; if (SE) { ierr = VecGetSize(SE,&size1);CHKERRQ(ierr); ierr = VecGetSize(X,&size2);CHKERRQ(ierr); if (size1 != size2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Standard error vector (size %d) does not match solution vector (size %d)",size1,size2); ierr = VecSet(SE,0.0);CHKERRQ(ierr); } /* Compute initial residual, temporarily use work vector u */ if (!ksp->guess_zero) { ierr = KSP_MatMult(ksp,Amat,X,U);CHKERRQ(ierr); /* u <- b - Ax */ ierr = VecAYPX(U,-1.0,B);CHKERRQ(ierr); } else { ierr = VecCopy(B,U);CHKERRQ(ierr); /* u <- b (x is 0) */ } /* Test for nothing to do */ ierr = VecNorm(U,NORM_2,&rnorm);CHKERRQ(ierr); ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = rnorm; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,rnorm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,rnorm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,0,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); beta = rnorm; ierr = VecScale(U,1.0/beta);CHKERRQ(ierr); ierr = KSP_MatMultTranspose(ksp,Amat,U,V);CHKERRQ(ierr); if (nopreconditioner) { ierr = VecNorm(V,NORM_2,&alpha);CHKERRQ(ierr); } else { ierr = PCApply(ksp->pc,V,Z);CHKERRQ(ierr); ierr = VecDotRealPart(V,Z,&alpha);CHKERRQ(ierr); if (alpha <= 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } alpha = PetscSqrtReal(alpha); ierr = VecScale(Z,1.0/alpha);CHKERRQ(ierr); } ierr = VecScale(V,1.0/alpha);CHKERRQ(ierr); if (nopreconditioner) { ierr = VecCopy(V,W);CHKERRQ(ierr); } else { ierr = VecCopy(Z,W);CHKERRQ(ierr); } lsqr->arnorm = alpha * beta; phibar = beta; rhobar = alpha; i = 0; do { if (nopreconditioner) { ierr = KSP_MatMult(ksp,Amat,V,U1);CHKERRQ(ierr); } else { ierr = KSP_MatMult(ksp,Amat,Z,U1);CHKERRQ(ierr); } ierr = VecAXPY(U1,-alpha,U);CHKERRQ(ierr); ierr = VecNorm(U1,NORM_2,&beta);CHKERRQ(ierr); if (beta > 0.0) { ierr = VecScale(U1,1.0/beta);CHKERRQ(ierr); /* beta*U1 = Amat*V - alpha*U */ } ierr = KSP_MatMultTranspose(ksp,Amat,U1,V1);CHKERRQ(ierr); ierr = VecAXPY(V1,-beta,V);CHKERRQ(ierr); if (nopreconditioner) { ierr = VecNorm(V1,NORM_2,&alpha);CHKERRQ(ierr); } else { ierr = PCApply(ksp->pc,V1,Z);CHKERRQ(ierr); ierr = VecDotRealPart(V1,Z,&alpha);CHKERRQ(ierr); if (alpha <= 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; break; } alpha = PetscSqrtReal(alpha); ierr = VecScale(Z,1.0/alpha);CHKERRQ(ierr); } ierr = VecScale(V1,1.0/alpha);CHKERRQ(ierr); /* alpha*V1 = Amat^T*U1 - beta*V */ rho = PetscSqrtScalar(rhobar*rhobar + beta*beta); c = rhobar / rho; s = beta / rho; theta = s * alpha; rhobar = -c * alpha; phi = c * phibar; phibar = s * phibar; tau = s * phi; ierr = VecAXPY(X,phi/rho,W);CHKERRQ(ierr); /* x <- x + (phi/rho) w */ if (SE) { ierr = VecCopy(W,W2);CHKERRQ(ierr); ierr = VecSquare(W2);CHKERRQ(ierr); ierr = VecScale(W2,1.0/(rho*rho));CHKERRQ(ierr); ierr = VecAXPY(SE, 1.0, W2);CHKERRQ(ierr); /* SE <- SE + (w^2/rho^2) */ } if (nopreconditioner) { ierr = VecAYPX(W,-theta/rho,V1);CHKERRQ(ierr); /* w <- v - (theta/rho) w */ } else { ierr = VecAYPX(W,-theta/rho,Z);CHKERRQ(ierr); /* w <- z - (theta/rho) w */ } lsqr->arnorm = alpha*PetscAbsScalar(tau); rnorm = PetscRealPart(phibar); ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its++; ksp->rnorm = rnorm; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,rnorm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i+1,rnorm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; SWAP(U1,U,TMP); SWAP(V1,V,TMP); i++; } while (i<ksp->max_it); if (i >= ksp->max_it && !ksp->reason) ksp->reason = KSP_DIVERGED_ITS; /* Finish off the standard error estimates */ if (SE) { tmp = 1.0; ierr = MatGetSize(Amat,&size1,&size2);CHKERRQ(ierr); if (size1 > size2) tmp = size1 - size2; tmp = rnorm / PetscSqrtScalar(tmp); ierr = VecSqrtAbs(SE);CHKERRQ(ierr); ierr = VecScale(SE,tmp);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode PCBDDCGraphSetUp(PCBDDCGraph graph, PetscInt custom_minimal_size, IS neumann_is, IS dirichlet_is, PetscInt n_ISForDofs, IS ISForDofs[], IS custom_primal_vertices) { IS subset,subset_n; MPI_Comm comm; const PetscInt *is_indices; PetscInt n_neigh,*neigh,*n_shared,**shared,*queue_global; PetscInt i,j,k,s,total_counts,nodes_touched,is_size; PetscMPIInt commsize; PetscBool same_set,mirrors_found; PetscErrorCode ierr; PetscFunctionBegin; PetscValidLogicalCollectiveInt(graph->l2gmap,custom_minimal_size,2); if (neumann_is) { PetscValidHeaderSpecific(neumann_is,IS_CLASSID,3); PetscCheckSameComm(graph->l2gmap,1,neumann_is,3); } graph->has_dirichlet = PETSC_FALSE; if (dirichlet_is) { PetscValidHeaderSpecific(dirichlet_is,IS_CLASSID,4); PetscCheckSameComm(graph->l2gmap,1,dirichlet_is,4); graph->has_dirichlet = PETSC_TRUE; } PetscValidLogicalCollectiveInt(graph->l2gmap,n_ISForDofs,5); for (i=0;i<n_ISForDofs;i++) { PetscValidHeaderSpecific(ISForDofs[i],IS_CLASSID,6); PetscCheckSameComm(graph->l2gmap,1,ISForDofs[i],6); } if (custom_primal_vertices) { PetscValidHeaderSpecific(custom_primal_vertices,IS_CLASSID,6); PetscCheckSameComm(graph->l2gmap,1,custom_primal_vertices,7); } ierr = PetscObjectGetComm((PetscObject)(graph->l2gmap),&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); /* custom_minimal_size */ graph->custom_minimal_size = custom_minimal_size; /* get info l2gmap and allocate work vectors */ ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); /* check if we have any local periodic nodes (periodic BCs) */ mirrors_found = PETSC_FALSE; if (graph->nvtxs && n_neigh) { for (i=0; i<n_shared[0]; i++) graph->count[shared[0][i]] += 1; for (i=0; i<n_shared[0]; i++) { if (graph->count[shared[0][i]] > 1) { mirrors_found = PETSC_TRUE; break; } } } /* compute local mirrors (if any) */ if (mirrors_found) { IS to,from; PetscInt *local_indices,*global_indices; ierr = ISCreateStride(PETSC_COMM_SELF,graph->nvtxs,0,1,&to);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingApplyIS(graph->l2gmap,to,&from);CHKERRQ(ierr); /* get arrays of local and global indices */ ierr = PetscMalloc1(graph->nvtxs,&local_indices);CHKERRQ(ierr); ierr = ISGetIndices(to,(const PetscInt**)&is_indices);CHKERRQ(ierr); ierr = PetscMemcpy(local_indices,is_indices,graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISRestoreIndices(to,(const PetscInt**)&is_indices);CHKERRQ(ierr); ierr = PetscMalloc1(graph->nvtxs,&global_indices);CHKERRQ(ierr); ierr = ISGetIndices(from,(const PetscInt**)&is_indices);CHKERRQ(ierr); ierr = PetscMemcpy(global_indices,is_indices,graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISRestoreIndices(from,(const PetscInt**)&is_indices);CHKERRQ(ierr); /* allocate space for mirrors */ ierr = PetscMalloc2(graph->nvtxs,&graph->mirrors,graph->nvtxs,&graph->mirrors_set);CHKERRQ(ierr); ierr = PetscMemzero(graph->mirrors,graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); graph->mirrors_set[0] = 0; k=0; for (i=0;i<n_shared[0];i++) { j=shared[0][i]; if (graph->count[j] > 1) { graph->mirrors[j]++; k++; } } /* allocate space for set of mirrors */ ierr = PetscMalloc1(k,&graph->mirrors_set[0]);CHKERRQ(ierr); for (i=1;i<graph->nvtxs;i++) graph->mirrors_set[i]=graph->mirrors_set[i-1]+graph->mirrors[i-1]; /* fill arrays */ ierr = PetscMemzero(graph->mirrors,graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); for (j=0;j<n_shared[0];j++) { i=shared[0][j]; if (graph->count[i] > 1) graph->mirrors_set[i][graph->mirrors[i]++]=global_indices[i]; } ierr = PetscSortIntWithArray(graph->nvtxs,global_indices,local_indices);CHKERRQ(ierr); for (i=0;i<graph->nvtxs;i++) { if (graph->mirrors[i] > 0) { ierr = PetscFindInt(graph->mirrors_set[i][0],graph->nvtxs,global_indices,&k);CHKERRQ(ierr); j = global_indices[k]; while ( k > 0 && global_indices[k-1] == j) k--; for (j=0;j<graph->mirrors[i];j++) { graph->mirrors_set[i][j]=local_indices[k+j]; } ierr = PetscSortInt(graph->mirrors[i],graph->mirrors_set[i]);CHKERRQ(ierr); } } ierr = PetscFree(local_indices);CHKERRQ(ierr); ierr = PetscFree(global_indices);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); } ierr = PetscMemzero(graph->count,graph->nvtxs*sizeof(*graph->count));CHKERRQ(ierr); /* Count total number of neigh per node */ k = 0; for (i=1;i<n_neigh;i++) { k += n_shared[i]; for (j=0;j<n_shared[i];j++) { graph->count[shared[i][j]] += 1; } } /* Allocate space for storing the set of neighbours for each node */ if (graph->nvtxs) { ierr = PetscMalloc1(k,&graph->neighbours_set[0]);CHKERRQ(ierr); } for (i=1;i<graph->nvtxs;i++) { /* dont count myself */ graph->neighbours_set[i]=graph->neighbours_set[i-1]+graph->count[i-1]; } /* Get information for sharing subdomains */ ierr = PetscMemzero(graph->count,graph->nvtxs*sizeof(*graph->count));CHKERRQ(ierr); for (i=1;i<n_neigh;i++) { /* dont count myself */ s = n_shared[i]; for (j=0;j<s;j++) { k = shared[i][j]; graph->neighbours_set[k][graph->count[k]] = neigh[i]; graph->count[k] += 1; } } /* sort set of sharing subdomains */ for (i=0;i<graph->nvtxs;i++) { ierr = PetscSortRemoveDupsInt(&graph->count[i],graph->neighbours_set[i]);CHKERRQ(ierr); } /* free memory allocated by ISLocalToGlobalMappingGetInfo */ ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); /* Get info for dofs splitting User can specify just a subset; an additional field is considered as a complementary field */ for (i=0;i<graph->nvtxs;i++) graph->which_dof[i] = n_ISForDofs; /* by default a dof belongs to the complement set */ for (i=0;i<n_ISForDofs;i++) { ierr = ISGetLocalSize(ISForDofs[i],&is_size);CHKERRQ(ierr); ierr = ISGetIndices(ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); for (j=0;j<is_size;j++) { if (is_indices[j] > -1 && is_indices[j] < graph->nvtxs) { /* out of bounds indices (if any) are skipped */ graph->which_dof[is_indices[j]] = i; } } ierr = ISRestoreIndices(ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); } /* Take into account Neumann nodes */ if (neumann_is) { ierr = ISGetLocalSize(neumann_is,&is_size);CHKERRQ(ierr); ierr = ISGetIndices(neumann_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); for (i=0;i<is_size;i++) { if (is_indices[i] > -1 && is_indices[i] < graph->nvtxs) { /* out of bounds indices (if any) are skipped */ graph->special_dof[is_indices[i]] = PCBDDCGRAPH_NEUMANN_MARK; } } ierr = ISRestoreIndices(neumann_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); } /* Take into account Dirichlet nodes (they overwrite any neumann boundary mark previously set) */ if (dirichlet_is) { ierr = ISGetLocalSize(dirichlet_is,&is_size);CHKERRQ(ierr); ierr = ISGetIndices(dirichlet_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); for (i=0;i<is_size;i++){ if (is_indices[i] > -1 && is_indices[i] < graph->nvtxs) { /* out of bounds indices (if any) are skipped */ if (commsize > graph->commsizelimit) { /* dirichlet nodes treated as internal */ ierr = PetscBTSet(graph->touched,is_indices[i]);CHKERRQ(ierr); graph->subset[is_indices[i]] = 0; } graph->special_dof[is_indices[i]] = PCBDDCGRAPH_DIRICHLET_MARK; } } ierr = ISRestoreIndices(dirichlet_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); } /* mark local periodic nodes (if any) and adapt CSR graph (if any) */ if (graph->mirrors) { for (i=0;i<graph->nvtxs;i++) if (graph->mirrors[i]) graph->special_dof[i] = PCBDDCGRAPH_LOCAL_PERIODIC_MARK; if (graph->xadj) { PetscInt *new_xadj,*new_adjncy; /* sort CSR graph */ for (i=0;i<graph->nvtxs;i++) ierr = PetscSortInt(graph->xadj[i+1]-graph->xadj[i],&graph->adjncy[graph->xadj[i]]);CHKERRQ(ierr); /* adapt local CSR graph in case of local periodicity */ k = 0; for (i=0;i<graph->nvtxs;i++) for (j=graph->xadj[i];j<graph->xadj[i+1];j++) k += graph->mirrors[graph->adjncy[j]]; ierr = PetscMalloc1(graph->nvtxs+1,&new_xadj);CHKERRQ(ierr); ierr = PetscMalloc1(k+graph->xadj[graph->nvtxs],&new_adjncy);CHKERRQ(ierr); new_xadj[0] = 0; for (i=0;i<graph->nvtxs;i++) { k = graph->xadj[i+1]-graph->xadj[i]; ierr = PetscMemcpy(&new_adjncy[new_xadj[i]],&graph->adjncy[graph->xadj[i]],k*sizeof(PetscInt));CHKERRQ(ierr); new_xadj[i+1] = new_xadj[i]+k; for (j=graph->xadj[i];j<graph->xadj[i+1];j++) { k = graph->mirrors[graph->adjncy[j]]; ierr = PetscMemcpy(&new_adjncy[new_xadj[i+1]],graph->mirrors_set[graph->adjncy[j]],k*sizeof(PetscInt));CHKERRQ(ierr); new_xadj[i+1] += k; } k = new_xadj[i+1]-new_xadj[i]; ierr = PetscSortRemoveDupsInt(&k,&new_adjncy[new_xadj[i]]);CHKERRQ(ierr); new_xadj[i+1] = new_xadj[i]+k; } /* set new CSR into graph */ ierr = PetscFree(graph->xadj);CHKERRQ(ierr); ierr = PetscFree(graph->adjncy);CHKERRQ(ierr); graph->xadj = new_xadj; graph->adjncy = new_adjncy; } } /* mark special nodes (if any) -> each will become a single node equivalence class */ if (custom_primal_vertices) { ierr = ISGetLocalSize(custom_primal_vertices,&is_size);CHKERRQ(ierr); ierr = ISGetIndices(custom_primal_vertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); for (i=0,j=0;i<is_size;i++){ if (is_indices[i] > -1 && is_indices[i] < graph->nvtxs && graph->special_dof[is_indices[i]] != PCBDDCGRAPH_DIRICHLET_MARK) { /* out of bounds indices (if any) are skipped */ graph->special_dof[is_indices[i]] = PCBDDCGRAPH_SPECIAL_MARK-j; j++; } } ierr = ISRestoreIndices(custom_primal_vertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); } /* mark interior nodes (if commsize > graph->commsizelimit) as touched and belonging to partition number 0 */ if (commsize > graph->commsizelimit) { for (i=0;i<graph->nvtxs;i++) { if (!graph->count[i]) { ierr = PetscBTSet(graph->touched,i);CHKERRQ(ierr); graph->subset[i] = 0; } } } /* init graph structure and compute default subsets */ nodes_touched = 0; for (i=0;i<graph->nvtxs;i++) { if (PetscBTLookup(graph->touched,i)) { nodes_touched++; } } i = 0; graph->ncc = 0; total_counts = 0; /* allocated space for queues */ if (commsize == graph->commsizelimit) { ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,graph->nvtxs,&graph->queue);CHKERRQ(ierr); } else { PetscInt nused = graph->nvtxs - nodes_touched; ierr = PetscMalloc2(nused+1,&graph->cptr,nused,&graph->queue);CHKERRQ(ierr); } while (nodes_touched<graph->nvtxs) { /* find first untouched node in local ordering */ while (PetscBTLookup(graph->touched,i)) i++; ierr = PetscBTSet(graph->touched,i);CHKERRQ(ierr); graph->subset[i] = graph->ncc+1; graph->cptr[graph->ncc] = total_counts; graph->queue[total_counts] = i; total_counts++; nodes_touched++; /* now find all other nodes having the same set of sharing subdomains */ for (j=i+1;j<graph->nvtxs;j++) { /* check for same number of sharing subdomains, dof number and same special mark */ if (!PetscBTLookup(graph->touched,j) && graph->count[i] == graph->count[j] && graph->which_dof[i] == graph->which_dof[j] && graph->special_dof[i] == graph->special_dof[j]) { /* check for same set of sharing subdomains */ same_set = PETSC_TRUE; for (k=0;k<graph->count[j];k++){ if (graph->neighbours_set[i][k] != graph->neighbours_set[j][k]) { same_set = PETSC_FALSE; } } /* I found a friend of mine */ if (same_set) { ierr = PetscBTSet(graph->touched,j);CHKERRQ(ierr); graph->subset[j] = graph->ncc+1; nodes_touched++; graph->queue[total_counts] = j; total_counts++; } } } graph->ncc++; } /* set default number of subsets (at this point no info on csr and/or local_subs has been taken into account, so n_subsets = ncc */ graph->n_subsets = graph->ncc; ierr = PetscMalloc1(graph->n_subsets,&graph->subset_ncc);CHKERRQ(ierr); for (i=0;i<graph->n_subsets;i++) { graph->subset_ncc[i] = 1; } /* final pointer */ graph->cptr[graph->ncc] = total_counts; /* For consistency reasons (among neighbours), I need to sort (by global ordering) each connected component */ /* Get a reference node (min index in global ordering) for each subset for tagging messages */ ierr = PetscMalloc1(graph->ncc,&graph->subset_ref_node);CHKERRQ(ierr); ierr = PetscMalloc1(graph->cptr[graph->ncc],&queue_global);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingApply(graph->l2gmap,graph->cptr[graph->ncc],graph->queue,queue_global);CHKERRQ(ierr); for (j=0;j<graph->ncc;j++) { ierr = PetscSortIntWithArray(graph->cptr[j+1]-graph->cptr[j],&queue_global[graph->cptr[j]],&graph->queue[graph->cptr[j]]);CHKERRQ(ierr); graph->subset_ref_node[j] = graph->queue[graph->cptr[j]]; } ierr = PetscFree(queue_global);CHKERRQ(ierr); graph->queue_sorted = PETSC_TRUE; /* save information on subsets (needed when analyzing the connected components) */ if (graph->ncc) { ierr = PetscMalloc2(graph->ncc,&graph->subset_size,graph->ncc,&graph->subset_idxs);CHKERRQ(ierr); ierr = PetscMalloc1(graph->cptr[graph->ncc],&graph->subset_idxs[0]);CHKERRQ(ierr); ierr = PetscMemzero(graph->subset_idxs[0],graph->cptr[graph->ncc]*sizeof(PetscInt));CHKERRQ(ierr); for (j=1;j<graph->ncc;j++) { graph->subset_size[j-1] = graph->cptr[j] - graph->cptr[j-1]; graph->subset_idxs[j] = graph->subset_idxs[j-1] + graph->subset_size[j-1]; } graph->subset_size[graph->ncc-1] = graph->cptr[graph->ncc] - graph->cptr[graph->ncc-1]; ierr = PetscMemcpy(graph->subset_idxs[0],graph->queue,graph->cptr[graph->ncc]*sizeof(PetscInt));CHKERRQ(ierr); } /* renumber reference nodes */ ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(graph->l2gmap)),graph->ncc,graph->subset_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingApplyIS(graph->l2gmap,subset_n,&subset);CHKERRQ(ierr); ierr = ISDestroy(&subset_n);CHKERRQ(ierr); ierr = ISRenumber(subset,NULL,NULL,&subset_n);CHKERRQ(ierr); ierr = ISDestroy(&subset);CHKERRQ(ierr); ierr = ISGetLocalSize(subset_n,&k);CHKERRQ(ierr); if (k != graph->ncc) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %D != %D",k,graph->ncc); ierr = ISGetIndices(subset_n,&is_indices);CHKERRQ(ierr); ierr = PetscMemcpy(graph->subset_ref_node,is_indices,graph->ncc*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISRestoreIndices(subset_n,&is_indices);CHKERRQ(ierr); ierr = ISDestroy(&subset_n);CHKERRQ(ierr); /* free workspace */ graph->setupcalled = PETSC_TRUE; PetscFunctionReturn(0); }
static PetscErrorCode KSPLGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it) { PetscScalar tt; PetscErrorCode ierr; PetscInt ii,k,j; KSP_LGMRES *lgmres = (KSP_LGMRES*)(ksp->data); /*LGMRES_MOD */ PetscInt it_arnoldi, it_aug; PetscInt jj, spot = 0; PetscFunctionBegin; /* Solve for solution vector that minimizes the residual */ /* If it is < 0, no lgmres steps have been performed */ if (it < 0) { ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exists immediately if vguess == vdest */ PetscFunctionReturn(0); } /* so (it+1) lgmres steps HAVE been performed */ /* LGMRES_MOD - determine if we need to use augvecs for the soln - do not assume that this is called after the total its allowed for an approx space */ if (lgmres->approx_constant) { it_arnoldi = lgmres->max_k - lgmres->aug_ct; } else { it_arnoldi = lgmres->max_k - lgmres->aug_dim; } if (it_arnoldi >= it +1) { it_aug = 0; it_arnoldi = it+1; } else { it_aug = (it + 1) - it_arnoldi; } /* now it_arnoldi indicates the number of matvecs that took place */ lgmres->matvecs += it_arnoldi; /* solve the upper triangular system - GRS is the right side and HH is the upper triangular matrix - put soln in nrs */ if (*HH(it,it) == 0.0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"HH(it,it) is identically zero; it = %D GRS(it) = %g",it,(double)PetscAbsScalar(*GRS(it))); if (*HH(it,it) != 0.0) { nrs[it] = *GRS(it) / *HH(it,it); } else { nrs[it] = 0.0; } for (ii=1; ii<=it; ii++) { k = it - ii; tt = *GRS(k); for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j]; nrs[k] = tt / *HH(k,k); } /* Accumulate the correction to the soln of the preconditioned prob. in VEC_TEMP */ ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr); /* set VEC_TEMP components to 0 */ /*LGMRES_MOD - if augmenting has happened we need to form the solution using the augvecs */ if (!it_aug) { /* all its are from arnoldi */ ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));CHKERRQ(ierr); } else { /*use aug vecs */ /*first do regular krylov directions */ ierr = VecMAXPY(VEC_TEMP,it_arnoldi,nrs,&VEC_VV(0));CHKERRQ(ierr); /*now add augmented portions - add contribution of aug vectors one at a time*/ for (ii=0; ii<it_aug; ii++) { for (jj=0; jj<lgmres->aug_dim; jj++) { if (lgmres->aug_order[jj] == (ii+1)) { spot = jj; break; /* must have this because there will be duplicates before aug_ct = aug_dim */ } } ierr = VecAXPY(VEC_TEMP,nrs[it_arnoldi+ii],AUGVEC(spot));CHKERRQ(ierr); } } /* now VEC_TEMP is what we want to keep for augmenting purposes - grab before the preconditioner is "unwound" from right-precondtioning*/ ierr = VecCopy(VEC_TEMP, AUG_TEMP);CHKERRQ(ierr); ierr = KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);CHKERRQ(ierr); /* add solution to previous solution */ /* put updated solution into vdest.*/ ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C PetscDLOpen - opens dynamic library Not Collective Input Parameters: + name - name of library - flags - options on how to open library Output Parameter: . handle Level: developer @*/ PetscErrorCode PetscDLOpen(const char name[],int flags,PetscDLHandle *handle) { PETSC_UNUSED int dlflags1,dlflags2; /* There are some preprocessor paths where these variables are set, but not used */ dlhandle_t dlhandle; PetscFunctionBegin; PetscValidCharPointer(name,1); PetscValidPointer(handle,3); dlflags1 = 0; dlflags2 = 0; dlhandle = (dlhandle_t) 0; *handle = (PetscDLHandle) 0; /* --- LoadLibrary --- */ #if defined(PETSC_HAVE_WINDOWS_H) && defined(PETSC_HAVE_LOADLIBRARY) dlhandle = LoadLibrary(name); if (!dlhandle) { #if defined(PETSC_HAVE_GETLASTERROR) PetscErrorCode ierr; DWORD erc; char *buff = NULL; erc = GetLastError(); FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_IGNORE_INSERTS, NULL,erc,MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),(LPSTR)&buff,0,NULL); ierr = PetscError(PETSC_COMM_SELF,__LINE__,__FUNCT__,__FILE__,__SDIR__,PETSC_ERR_FILE_OPEN,PETSC_ERROR_REPEAT, "Unable to open dynamic library:\n %s\n Error message from LoadLibrary() %s\n",name,buff); LocalFree(buff); PetscFunctionReturn(ierr); #else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open dynamic library:\n %s\n Error message from LoadLibrary() %s\n",name,"unavailable"); #endif } /* --- dlopen --- */ #elif defined(PETSC_HAVE_DLFCN_H) && defined(PETSC_HAVE_DLOPEN) /* Mode indicates symbols required by symbol loaded with dlsym() are only loaded when required (not all together) also indicates symbols required can be contained in other libraries also opened with dlopen() */ #if defined(PETSC_HAVE_RTLD_LAZY) dlflags1 = RTLD_LAZY; #endif #if defined(PETSC_HAVE_RTLD_NOW) if (flags & PETSC_DL_NOW) dlflags1 = RTLD_NOW; #endif #if defined(PETSC_HAVE_RTLD_GLOBAL) dlflags2 = RTLD_GLOBAL; #endif #if defined(PETSC_HAVE_RTLD_LOCAL) if (flags & PETSC_DL_LOCAL) dlflags2 = RTLD_LOCAL; #endif #if defined(PETSC_HAVE_DLERROR) dlerror(); /* clear any previous error */ #endif dlhandle = dlopen(name,dlflags1|dlflags2); if (!dlhandle) { #if defined(PETSC_HAVE_DLERROR) const char *errmsg = dlerror(); #else const char *errmsg = "unavailable"; #endif SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open dynamic library:\n %s\n Error message from dlopen() %s\n",name,errmsg); } /* --- unimplemented --- */ #else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS, "Cannot use dynamic libraries on this platform"); #endif *handle = (PetscDLHandle) dlhandle; PetscFunctionReturn(0); }
int main(int argc,char **args) { Mat A,RHS,C,F,X; Vec u,x,b; PetscErrorCode ierr; PetscMPIInt rank,nproc; PetscInt i,m,n,nfact,nsolve,nrhs,ipack=0; PetscScalar *array,rval; PetscReal norm,tol=1.e-12; IS perm,iperm; MatFactorInfo info; PetscRandom rand; PetscBool flg,testMatSolve=PETSC_TRUE,testMatMatSolve=PETSC_TRUE; PetscViewer fd; /* viewer */ char file[PETSC_MAX_PATH_LEN]; /* input file name */ PetscInitialize(&argc,&args,(char*)0,help); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &nproc);CHKERRQ(ierr); /* Determine file from which we read the matrix A */ ierr = PetscOptionsGetString(NULL,"-f",file,PETSC_MAX_PATH_LEN,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_WORLD,1,"Must indicate binary file with the -f option"); /* Load matrix A */ ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,file,FILE_MODE_READ,&fd);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatLoad(A,fd);CHKERRQ(ierr); ierr = PetscViewerDestroy(&fd);CHKERRQ(ierr); ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ, "This example is not intended for rectangular matrices (%d, %d)", m, n); /* Create dense matrix C and X; C holds true solution with identical colums */ nrhs = 2; ierr = PetscOptionsGetInt(NULL,"-nrhs",&nrhs,NULL);CHKERRQ(ierr); if (!rank) printf("ex125: nrhs %d\n",nrhs); ierr = MatCreate(PETSC_COMM_WORLD,&C);CHKERRQ(ierr); ierr = MatSetSizes(C,m,PETSC_DECIDE,PETSC_DECIDE,nrhs);CHKERRQ(ierr); ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); ierr = MatSetFromOptions(C);CHKERRQ(ierr); ierr = MatSetUp(C);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rand);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rand);CHKERRQ(ierr); ierr = MatSetRandom(C,rand);CHKERRQ(ierr); ierr = MatDuplicate(C,MAT_DO_NOT_COPY_VALUES,&X);CHKERRQ(ierr); /* Create vectors */ ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,n,PETSC_DECIDE);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); ierr = VecDuplicate(x,&b);CHKERRQ(ierr); ierr = VecDuplicate(x,&u);CHKERRQ(ierr); /* save the true solution */ /* Test LU Factorization */ ierr = MatGetOrdering(A,MATORDERINGND,&perm,&iperm);CHKERRQ(ierr); /*ierr = ISView(perm,PETSC_VIEWER_STDOUT_WORLD);*/ /*ierr = ISView(perm,PETSC_VIEWER_STDOUT_SELF);*/ ierr = PetscOptionsGetInt(NULL,"-mat_solver_package",&ipack,NULL);CHKERRQ(ierr); switch (ipack) { case 0: #if defined(PETSC_HAVE_SUPERLU) if (!rank) printf(" SUPERLU LU:\n"); ierr = MatGetFactor(A,MATSOLVERSUPERLU,MAT_FACTOR_LU,&F);CHKERRQ(ierr); break; #endif case 1: #if defined(PETSC_HAVE_SUPERLU_DIST) if (!rank) printf(" SUPERLU_DIST LU:\n"); ierr = MatGetFactor(A,MATSOLVERSUPERLU_DIST,MAT_FACTOR_LU,&F);CHKERRQ(ierr); break; #endif case 2: #if defined(PETSC_HAVE_MUMPS) if (!rank) printf(" MUMPS LU:\n"); ierr = MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_LU,&F);CHKERRQ(ierr); { /* test mumps options */ PetscInt icntl_7 = 5; ierr = MatMumpsSetIcntl(F,7,icntl_7);CHKERRQ(ierr); } break; #endif default: if (!rank) printf(" PETSC LU:\n"); ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&F);CHKERRQ(ierr); } info.fill = 5.0; ierr = MatLUFactorSymbolic(F,A,perm,iperm,&info);CHKERRQ(ierr); for (nfact = 0; nfact < 2; nfact++) { if (!rank) printf(" %d-the LU numfactorization \n",nfact); ierr = MatLUFactorNumeric(F,A,&info);CHKERRQ(ierr); /* Test MatMatSolve() */ /* if ((ipack == 0 || ipack == 2) && testMatMatSolve) { printf(" MatMatSolve() is not implemented for this package. Skip the testing.\n"); testMatMatSolve = PETSC_FALSE; } */ if (testMatMatSolve) { if (!nfact) { ierr = MatMatMult(A,C,MAT_INITIAL_MATRIX,2.0,&RHS);CHKERRQ(ierr); } else { ierr = MatMatMult(A,C,MAT_REUSE_MATRIX,2.0,&RHS);CHKERRQ(ierr); } for (nsolve = 0; nsolve < 2; nsolve++) { if (!rank) printf(" %d-the MatMatSolve \n",nsolve); ierr = MatMatSolve(F,RHS,X);CHKERRQ(ierr); /* Check the error */ ierr = MatAXPY(X,-1.0,C,SAME_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatNorm(X,NORM_FROBENIUS,&norm);CHKERRQ(ierr); if (norm > tol) { if (!rank) { ierr = PetscPrintf(PETSC_COMM_SELF,"1st MatMatSolve: Norm of error %g, nsolve %d\n",norm,nsolve);CHKERRQ(ierr); } } } } /* Test MatSolve() */ if (testMatSolve) { for (nsolve = 0; nsolve < 2; nsolve++) { ierr = VecGetArray(x,&array);CHKERRQ(ierr); for (i=0; i<m; i++) { ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); array[i] = rval; } ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); ierr = VecCopy(x,u);CHKERRQ(ierr); ierr = MatMult(A,x,b);CHKERRQ(ierr); if (!rank) printf(" %d-the MatSolve \n",nsolve); ierr = MatSolve(F,b,x);CHKERRQ(ierr); /* Check the error */ ierr = VecAXPY(u,-1.0,x);CHKERRQ(ierr); /* u <- (-1.0)x + u */ ierr = VecNorm(u,NORM_2,&norm);CHKERRQ(ierr); if (norm > tol) { ierr = MatMult(A,x,u);CHKERRQ(ierr); /* u = A*x */ PetscReal resi; ierr = VecAXPY(u,-1.0,b);CHKERRQ(ierr); /* u <- (-1.0)b + u */ ierr = VecNorm(u,NORM_2,&resi);CHKERRQ(ierr); if (!rank) { ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve: Norm of error %g, resi %g, LU numfact %d\n",norm,resi,nfact);CHKERRQ(ierr); } } } } } /* Free data structures */ ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = MatDestroy(&F);CHKERRQ(ierr); ierr = MatDestroy(&X);CHKERRQ(ierr); if (testMatMatSolve) { ierr = MatDestroy(&RHS);CHKERRQ(ierr); } ierr = PetscRandomDestroy(&rand);CHKERRQ(ierr); ierr = ISDestroy(&perm);CHKERRQ(ierr); ierr = ISDestroy(&iperm);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
static PetscErrorCode KSPSolve_Chebyshev(KSP ksp) { KSP_Chebyshev *cheb = (KSP_Chebyshev*)ksp->data; PetscErrorCode ierr; PetscInt k,kp1,km1,maxit,ktmp,i; PetscScalar alpha,omegaprod,mu,omega,Gamma,c[3],scale; PetscReal rnorm = 0.0; Vec sol_orig,b,p[3],r; Mat Amat,Pmat; PetscBool diagonalscale; PetscFunctionBegin; ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); if (cheb->kspest) { PetscObjectId amatid, pmatid; PetscObjectState amatstate, pmatstate; ierr = PetscObjectGetId((PetscObject)Amat,&amatid);CHKERRQ(ierr); ierr = PetscObjectGetId((PetscObject)Pmat,&pmatid);CHKERRQ(ierr); ierr = PetscObjectStateGet((PetscObject)Amat,&amatstate);CHKERRQ(ierr); ierr = PetscObjectStateGet((PetscObject)Pmat,&pmatstate);CHKERRQ(ierr); if (amatid != cheb->amatid || pmatid != cheb->pmatid || amatstate != cheb->amatstate || pmatstate != cheb->pmatstate) { PetscReal max=0.0,min=0.0; Vec B; KSPConvergedReason reason; if (cheb->userandom) { B = ksp->work[1]; if (!cheb->random) { ierr = PetscRandomCreate(PetscObjectComm((PetscObject)B),&cheb->random);CHKERRQ(ierr); } ierr = VecSetRandom(B,cheb->random);CHKERRQ(ierr); } else { B = ksp->vec_rhs; } ierr = KSPSolve(cheb->kspest,B,ksp->work[0]);CHKERRQ(ierr); ierr = KSPGetConvergedReason(cheb->kspest,&reason);CHKERRQ(ierr); if (reason < 0) { if (reason == KSP_DIVERGED_ITS) { ierr = PetscInfo(ksp,"Eigen estimator ran for prescribed number of iterations\n");CHKERRQ(ierr); } else { PetscInt its; ierr = KSPGetIterationNumber(cheb->kspest,&its);CHKERRQ(ierr); SETERRQ2(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Eigen estimator failed: %s at iteration %D",KSPConvergedReasons[reason],its); } } else if (reason==KSP_CONVERGED_RTOL ||reason==KSP_CONVERGED_ATOL) { ierr = PetscInfo(ksp,"Eigen estimator converged prematurely. Should not happen except for small or low rank problem\n");CHKERRQ(ierr); } else { ierr = PetscInfo1(ksp,"Eigen estimator did not converge by iteration: %s\n",KSPConvergedReasons[reason]);CHKERRQ(ierr); } ierr = KSPChebyshevComputeExtremeEigenvalues_Private(cheb->kspest,&min,&max);CHKERRQ(ierr); cheb->emin = cheb->tform[0]*min + cheb->tform[1]*max; cheb->emax = cheb->tform[2]*min + cheb->tform[3]*max; cheb->amatid = amatid; cheb->pmatid = pmatid; cheb->amatstate = amatstate; cheb->pmatstate = pmatstate; } } ksp->its = 0; maxit = ksp->max_it; /* These three point to the three active solutions, we rotate these three at each solution update */ km1 = 0; k = 1; kp1 = 2; sol_orig = ksp->vec_sol; /* ksp->vec_sol will be asigned to rotating vector p[k], thus save its address */ b = ksp->vec_rhs; p[km1] = sol_orig; p[k] = ksp->work[0]; p[kp1] = ksp->work[1]; r = ksp->work[2]; /* use scale*B as our preconditioner */ scale = 2.0/(cheb->emax + cheb->emin); /* -alpha <= scale*lambda(B^{-1}A) <= alpha */ alpha = 1.0 - scale*(cheb->emin); Gamma = 1.0; mu = 1.0/alpha; omegaprod = 2.0/alpha; c[km1] = 1.0; c[k] = mu; if (!ksp->guess_zero) { ierr = KSP_MatMult(ksp,Amat,p[km1],r);CHKERRQ(ierr); /* r = b - A*p[km1] */ ierr = VecAYPX(r,-1.0,b);CHKERRQ(ierr); } else { ierr = VecCopy(b,r);CHKERRQ(ierr); } ierr = KSP_PCApply(ksp,r,p[k]);CHKERRQ(ierr); /* p[k] = scale B^{-1}r + p[km1] */ ierr = VecAYPX(p[k],scale,p[km1]);CHKERRQ(ierr); for (i=0; i<maxit; i++) { ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its++; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); c[kp1] = 2.0*mu*c[k] - c[km1]; omega = omegaprod*c[k]/c[kp1]; ierr = KSP_MatMult(ksp,Amat,p[k],r);CHKERRQ(ierr); /* r = b - Ap[k] */ ierr = VecAYPX(r,-1.0,b);CHKERRQ(ierr); ierr = KSP_PCApply(ksp,r,p[kp1]);CHKERRQ(ierr); /* p[kp1] = B^{-1}r */ ksp->vec_sol = p[k]; /* calculate residual norm if requested */ if (ksp->normtype != KSP_NORM_NONE || ksp->numbermonitors) { if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) { ierr = VecNorm(r,NORM_2,&rnorm);CHKERRQ(ierr); } else { ierr = VecNorm(p[kp1],NORM_2,&rnorm);CHKERRQ(ierr); } ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->rnorm = rnorm; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,rnorm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i,rnorm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; } /* y^{k+1} = omega(y^{k} - y^{k-1} + Gamma*r^{k}) + y^{k-1} */ ierr = VecAXPBYPCZ(p[kp1],1.0-omega,omega,omega*Gamma*scale,p[km1],p[k]);CHKERRQ(ierr); ktmp = km1; km1 = k; k = kp1; kp1 = ktmp; } if (!ksp->reason) { if (ksp->normtype != KSP_NORM_NONE) { ierr = KSP_MatMult(ksp,Amat,p[k],r);CHKERRQ(ierr); /* r = b - Ap[k] */ ierr = VecAYPX(r,-1.0,b);CHKERRQ(ierr); if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) { ierr = VecNorm(r,NORM_2,&rnorm);CHKERRQ(ierr); } else { ierr = KSP_PCApply(ksp,r,p[kp1]);CHKERRQ(ierr); /* p[kp1] = B^{-1}r */ ierr = VecNorm(p[kp1],NORM_2,&rnorm);CHKERRQ(ierr); } ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->rnorm = rnorm; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->vec_sol = p[k]; ierr = KSPLogResidualHistory(ksp,rnorm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i,rnorm);CHKERRQ(ierr); } if (ksp->its >= ksp->max_it) { if (ksp->normtype != KSP_NORM_NONE) { ierr = (*ksp->converged)(ksp,i,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; } else ksp->reason = KSP_CONVERGED_ITS; } } /* make sure solution is in vector x */ ksp->vec_sol = sol_orig; if (k) { ierr = VecCopy(p[k],sol_orig);CHKERRQ(ierr); } PetscFunctionReturn(0); }
int main(int argc,char *argv[]) { char mat_type[256] = "aij"; /* default matrix type */ PetscErrorCode ierr; MPI_Comm comm; PetscMPIInt rank,size; DM slice; PetscInt i,bs=1,N=5,n,m,rstart,ghosts[2],*d_nnz,*o_nnz,dfill[4]={1,0,0,1},ofill[4]={1,1,1,1}; PetscReal alpha =1,K=1,rho0=1,u0=0,sigma=0.2; PetscBool useblock=PETSC_TRUE; PetscScalar *xx; Mat A; Vec x,b,lf; ierr = PetscInitialize(&argc,&argv,0,help);CHKERRQ(ierr); comm = PETSC_COMM_WORLD; ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = PetscOptionsBegin(comm,0,"Options for DMSliced test",0);CHKERRQ(ierr); { ierr = PetscOptionsInt("-n","Global number of nodes","",N,&N,NULL);CHKERRQ(ierr); ierr = PetscOptionsInt("-bs","Block size (1 or 2)","",bs,&bs,NULL);CHKERRQ(ierr); if (bs != 1) { if (bs != 2) SETERRQ(PETSC_COMM_WORLD,1,"Block size must be 1 or 2"); ierr = PetscOptionsReal("-alpha","Inverse time step for wave operator","",alpha,&alpha,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-K","Bulk modulus of compressibility","",K,&K,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-rho0","Reference density","",rho0,&rho0,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-u0","Reference velocity","",u0,&u0,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-sigma","Width of Gaussian density perturbation","",sigma,&sigma,NULL);CHKERRQ(ierr); ierr = PetscOptionsBool("-block","Use block matrix assembly","",useblock,&useblock,NULL);CHKERRQ(ierr); } ierr = PetscOptionsString("-sliced_mat_type","Matrix type to use (aij or baij)","",mat_type,mat_type,sizeof(mat_type),NULL);CHKERRQ(ierr); } ierr = PetscOptionsEnd();CHKERRQ(ierr); /* Split ownership, set up periodic grid in 1D */ n = PETSC_DECIDE; ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); rstart = 0; ierr = MPI_Scan(&n,&rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); rstart -= n; ghosts[0] = (N+rstart-1)%N; ghosts[1] = (rstart+n)%N; ierr = PetscMalloc2(n,&d_nnz,n,&o_nnz);CHKERRQ(ierr); for (i=0; i<n; i++) { if (size > 1 && (i==0 || i==n-1)) { d_nnz[i] = 2; o_nnz[i] = 1; } else { d_nnz[i] = 3; o_nnz[i] = 0; } } ierr = DMSlicedCreate(comm,bs,n,2,ghosts,d_nnz,o_nnz,&slice);CHKERRQ(ierr); /* Currently does not copy X_nnz so we can't free them until after DMSlicedGetMatrix */ if (!useblock) {ierr = DMSlicedSetBlockFills(slice,dfill,ofill);CHKERRQ(ierr);} /* Irrelevant for baij formats */ ierr = DMSetMatType(slice,mat_type);CHKERRQ(ierr); ierr = DMCreateMatrix(slice,&A);CHKERRQ(ierr); ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); ierr = MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); ierr = DMCreateGlobalVector(slice,&x);CHKERRQ(ierr); ierr = VecDuplicate(x,&b);CHKERRQ(ierr); ierr = VecGhostGetLocalForm(x,&lf);CHKERRQ(ierr); ierr = VecGetSize(lf,&m);CHKERRQ(ierr); if (m != (n+2)*bs) SETERRQ2(PETSC_COMM_SELF,1,"size of local form %D, expected %D",m,(n+2)*bs); ierr = VecGetArray(lf,&xx);CHKERRQ(ierr); for (i=0; i<n; i++) { PetscInt row[2],col[9],im,ip; PetscScalar v[12]; const PetscReal xref = 2.0*(rstart+i)/N - 1; /* [-1,1] */ const PetscReal h = 1.0/N; /* grid spacing */ im = (i==0) ? n : i-1; ip = (i==n-1) ? n+1 : i+1; switch (bs) { case 1: /* Laplacian with periodic boundaries */ col[0] = im; col[1] = i; col[2] = ip; v[0] = -h; v[1] = 2*h; v[2] = -h; ierr = MatSetValuesLocal(A,1,&i,3,col,v,INSERT_VALUES);CHKERRQ(ierr); xx[i] = PetscSinReal(xref*PETSC_PI); break; case 2: /* Linear acoustic wave operator in variables [rho, u], central differences, periodic, timestep 1/alpha */ v[0] = -0.5*u0; v[1] = -0.5*K; v[2] = alpha; v[3] = 0; v[4] = 0.5*u0; v[5] = 0.5*K; v[6] = -0.5/rho0; v[7] = -0.5*u0; v[8] = 0; v[9] = alpha; v[10] = 0.5/rho0; v[11] = 0.5*u0; if (useblock) { row[0] = i; col[0] = im; col[1] = i; col[2] = ip; ierr = MatSetValuesBlockedLocal(A,1,row,3,col,v,INSERT_VALUES);CHKERRQ(ierr); } else { row[0] = 2*i; row[1] = 2*i+1; col[0] = 2*im; col[1] = 2*im+1; col[2] = 2*i; col[3] = 2*ip; col[4] = 2*ip+1; v[3] = v[4]; v[4] = v[5]; /* pack values in first row */ ierr = MatSetValuesLocal(A,1,row,5,col,v,INSERT_VALUES);CHKERRQ(ierr); col[2] = 2*i+1; v[8] = v[9]; v[9] = v[10]; v[10] = v[11]; /* pack values in second row */ ierr = MatSetValuesLocal(A,1,row+1,5,col,v+6,INSERT_VALUES);CHKERRQ(ierr); } /* Set current state (gaussian density perturbation) */ xx[2*i] = 0.2*PetscExpReal(-PetscSqr(xref)/(2*PetscSqr(sigma))); xx[2*i+1] = 0; break; default: SETERRQ1(PETSC_COMM_SELF,1,"not implemented for block size %D",bs); } } ierr = VecRestoreArray(lf,&xx);CHKERRQ(ierr); ierr = VecGhostRestoreLocalForm(x,&lf);CHKERRQ(ierr); ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatMult(A,x,b);CHKERRQ(ierr); ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = VecView(b,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* Update the ghosted values, view the result on rank 0. */ ierr = VecGhostUpdateBegin(b,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecGhostUpdateEnd(b,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); if (!rank) { ierr = VecGhostGetLocalForm(b,&lf);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_SELF,"Local form of b on rank 0, last two nodes are ghost nodes\n");CHKERRQ(ierr); ierr = VecView(lf,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); ierr = VecGhostRestoreLocalForm(b,&lf);CHKERRQ(ierr); } ierr = DMDestroy(&slice);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
static PetscErrorCode PCSetUp_Redistribute(PC pc) { PC_Redistribute *red = (PC_Redistribute*)pc->data; PetscErrorCode ierr; MPI_Comm comm; PetscInt rstart,rend,i,nz,cnt,*rows,ncnt,dcnt,*drows; PetscLayout map,nmap; PetscMPIInt size,imdex,tag,n; PetscInt *source = NULL; PetscMPIInt *sizes = NULL,nrecvs; PetscInt j,nsends; PetscInt *owner = NULL,*starts = NULL,count,slen; PetscInt *rvalues,*svalues,recvtotal; PetscMPIInt *onodes1,*olengths1; MPI_Request *send_waits = NULL,*recv_waits = NULL; MPI_Status recv_status,*send_status; Vec tvec,diag; Mat tmat; const PetscScalar *d; PetscFunctionBegin; if (pc->setupcalled) { ierr = KSPGetOperators(red->ksp,NULL,&tmat);CHKERRQ(ierr); ierr = MatGetSubMatrix(pc->pmat,red->is,red->is,MAT_REUSE_MATRIX,&tmat);CHKERRQ(ierr); ierr = KSPSetOperators(red->ksp,tmat,tmat);CHKERRQ(ierr); } else { PetscInt NN; ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr); /* count non-diagonal rows on process */ ierr = MatGetOwnershipRange(pc->mat,&rstart,&rend);CHKERRQ(ierr); cnt = 0; for (i=rstart; i<rend; i++) { ierr = MatGetRow(pc->mat,i,&nz,NULL,NULL);CHKERRQ(ierr); if (nz > 1) cnt++; ierr = MatRestoreRow(pc->mat,i,&nz,NULL,NULL);CHKERRQ(ierr); } ierr = PetscMalloc1(cnt,&rows);CHKERRQ(ierr); ierr = PetscMalloc1(rend - rstart - cnt,&drows);CHKERRQ(ierr); /* list non-diagonal rows on process */ cnt = 0; dcnt = 0; for (i=rstart; i<rend; i++) { ierr = MatGetRow(pc->mat,i,&nz,NULL,NULL);CHKERRQ(ierr); if (nz > 1) rows[cnt++] = i; else drows[dcnt++] = i - rstart; ierr = MatRestoreRow(pc->mat,i,&nz,NULL,NULL);CHKERRQ(ierr); } /* create PetscLayout for non-diagonal rows on each process */ ierr = PetscLayoutCreate(comm,&map);CHKERRQ(ierr); ierr = PetscLayoutSetLocalSize(map,cnt);CHKERRQ(ierr); ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); rstart = map->rstart; rend = map->rend; /* create PetscLayout for load-balanced non-diagonal rows on each process */ ierr = PetscLayoutCreate(comm,&nmap);CHKERRQ(ierr); ierr = MPI_Allreduce(&cnt,&ncnt,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); ierr = PetscLayoutSetSize(nmap,ncnt);CHKERRQ(ierr); ierr = PetscLayoutSetBlockSize(nmap,1);CHKERRQ(ierr); ierr = PetscLayoutSetUp(nmap);CHKERRQ(ierr); ierr = MatGetSize(pc->pmat,&NN,NULL);CHKERRQ(ierr); ierr = PetscInfo2(pc,"Number of diagonal rows eliminated %d, percentage eliminated %g\n",NN-ncnt,((PetscReal)(NN-ncnt))/((PetscReal)(NN)));CHKERRQ(ierr); /* this code is taken from VecScatterCreate_PtoS() Determines what rows need to be moved where to load balance the non-diagonal rows */ /* count number of contributors to each processor */ ierr = PetscMalloc2(size,&sizes,cnt,&owner);CHKERRQ(ierr); ierr = PetscMemzero(sizes,size*sizeof(PetscMPIInt));CHKERRQ(ierr); j = 0; nsends = 0; for (i=rstart; i<rend; i++) { if (i < nmap->range[j]) j = 0; for (; j<size; j++) { if (i < nmap->range[j+1]) { if (!sizes[j]++) nsends++; owner[i-rstart] = j; break; } } } /* inform other processors of number of messages and max length*/ ierr = PetscGatherNumberOfMessages(comm,NULL,sizes,&nrecvs);CHKERRQ(ierr); ierr = PetscGatherMessageLengths(comm,nsends,nrecvs,sizes,&onodes1,&olengths1);CHKERRQ(ierr); ierr = PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);CHKERRQ(ierr); recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i]; /* post receives: rvalues - rows I will own; count - nu */ ierr = PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);CHKERRQ(ierr); count = 0; for (i=0; i<nrecvs; i++) { ierr = MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);CHKERRQ(ierr); count += olengths1[i]; } /* do sends: 1) starts[i] gives the starting index in svalues for stuff going to the ith processor */ ierr = PetscMalloc3(cnt,&svalues,nsends,&send_waits,size,&starts);CHKERRQ(ierr); starts[0] = 0; for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[i-1]; for (i=0; i<cnt; i++) svalues[starts[owner[i]]++] = rows[i]; for (i=0; i<cnt; i++) rows[i] = rows[i] - rstart; red->drows = drows; red->dcnt = dcnt; ierr = PetscFree(rows);CHKERRQ(ierr); starts[0] = 0; for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[i-1]; count = 0; for (i=0; i<size; i++) { if (sizes[i]) { ierr = MPI_Isend(svalues+starts[i],sizes[i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); } } /* wait on receives */ count = nrecvs; slen = 0; while (count) { ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); /* unpack receives into our local space */ ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); slen += n; count--; } if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal); ierr = ISCreateGeneral(comm,slen,rvalues,PETSC_COPY_VALUES,&red->is);CHKERRQ(ierr); /* free up all work space */ ierr = PetscFree(olengths1);CHKERRQ(ierr); ierr = PetscFree(onodes1);CHKERRQ(ierr); ierr = PetscFree3(rvalues,source,recv_waits);CHKERRQ(ierr); ierr = PetscFree2(sizes,owner);CHKERRQ(ierr); if (nsends) { /* wait on sends */ ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); ierr = PetscFree(send_status);CHKERRQ(ierr); } ierr = PetscFree3(svalues,send_waits,starts);CHKERRQ(ierr); ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); ierr = PetscLayoutDestroy(&nmap);CHKERRQ(ierr); ierr = VecCreateMPI(comm,slen,PETSC_DETERMINE,&red->b);CHKERRQ(ierr); ierr = VecDuplicate(red->b,&red->x);CHKERRQ(ierr); ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); ierr = VecScatterCreate(tvec,red->is,red->b,NULL,&red->scatter);CHKERRQ(ierr); ierr = VecDestroy(&tvec);CHKERRQ(ierr); ierr = MatGetSubMatrix(pc->pmat,red->is,red->is,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); ierr = KSPSetOperators(red->ksp,tmat,tmat);CHKERRQ(ierr); ierr = MatDestroy(&tmat);CHKERRQ(ierr); } /* get diagonal portion of matrix */ ierr = PetscFree(red->diag);CHKERRQ(ierr); ierr = PetscMalloc1(red->dcnt,&red->diag);CHKERRQ(ierr); ierr = MatCreateVecs(pc->pmat,&diag,NULL);CHKERRQ(ierr); ierr = MatGetDiagonal(pc->pmat,diag);CHKERRQ(ierr); ierr = VecGetArrayRead(diag,&d);CHKERRQ(ierr); for (i=0; i<red->dcnt; i++) red->diag[i] = 1.0/d[red->drows[i]]; ierr = VecRestoreArrayRead(diag,&d);CHKERRQ(ierr); ierr = VecDestroy(&diag);CHKERRQ(ierr); ierr = KSPSetUp(red->ksp);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode MatPartitioningApply_Hierarchical(MatPartitioning part,IS *partitioning) { MatPartitioning_Hierarchical *hpart = (MatPartitioning_Hierarchical*)part->data; const PetscInt *fineparts_indices, *coarseparts_indices; PetscInt *parts_indices,i,j,mat_localsize; Mat mat = part->adj,adj,sadj; PetscBool flg; PetscInt bs = 1; MatPartitioning finePart, coarsePart; PetscInt *coarse_vertex_weights = 0; PetscMPIInt size,rank; MPI_Comm comm,scomm; IS destination,fineparts_temp; ISLocalToGlobalMapping mapping; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)part,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIADJ,&flg);CHKERRQ(ierr); if (flg) { adj = mat; ierr = PetscObjectReference((PetscObject)adj);CHKERRQ(ierr); }else { /* bs indicates if the converted matrix is "reduced" from the original and hence the resulting partition results need to be stretched to match the original matrix */ ierr = MatConvert(mat,MATMPIADJ,MAT_INITIAL_MATRIX,&adj);CHKERRQ(ierr); if (adj->rmap->n > 0) bs = mat->rmap->n/adj->rmap->n; } /* local size of mat */ mat_localsize = adj->rmap->n; /* check parameters */ /* how many small subdomains we want from a given 'big' suddomain */ if(!hpart->Nfineparts) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG," must set number of small subdomains for each big subdomain \n"); if(!hpart->Ncoarseparts && !part->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," did not either set number of coarse parts or total number of parts \n"); if(part->n && part->n%hpart->Nfineparts!=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP, " total number of parts %D can not be divided by number of fine parts %D\n",part->n,hpart->Nfineparts); if(part->n){ hpart->Ncoarseparts = part->n/hpart->Nfineparts; }else{ part->n = hpart->Ncoarseparts*hpart->Nfineparts; } /* we do not support this case currently, but this restriction should be * removed in the further * */ if(hpart->Ncoarseparts>size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP," we do not support number of coarse parts %D > size %D \n",hpart->Ncoarseparts,size); ierr = MatPartitioningCreate(comm,&coarsePart);CHKERRQ(ierr); /* if did not set partitioning type yet, use parmetis by default */ if(!hpart->coarseparttype){ ierr = MatPartitioningSetType(coarsePart,MATPARTITIONINGPARMETIS);CHKERRQ(ierr); }else{ ierr = MatPartitioningSetType(coarsePart,hpart->coarseparttype);CHKERRQ(ierr); } ierr = MatPartitioningSetAdjacency(coarsePart,adj);CHKERRQ(ierr); ierr = MatPartitioningSetNParts(coarsePart, hpart->Ncoarseparts);CHKERRQ(ierr); /* copy over vertex weights */ if(part->vertex_weights){ ierr = PetscMalloc(sizeof(PetscInt)*mat_localsize,&coarse_vertex_weights);CHKERRQ(ierr); ierr = PetscMemcpy(coarse_vertex_weights,part->vertex_weights,sizeof(PetscInt)*mat_localsize);CHKERRQ(ierr); ierr = MatPartitioningSetVertexWeights(coarsePart,coarse_vertex_weights);CHKERRQ(ierr); } /* It looks nontrivial to support part weights, * I will return back to implement it when have * an idea. * */ ierr = MatPartitioningApply(coarsePart,&hpart->coarseparts);CHKERRQ(ierr); ierr = MatPartitioningDestroy(&coarsePart);CHKERRQ(ierr); /* In the current implementation, destination should be the same as hpart->coarseparts, * and this interface is preserved to deal with the case hpart->coarseparts>size in the * future. * */ ierr = MatPartitioningHierarchical_DetermineDestination(part,hpart->coarseparts,0,hpart->Ncoarseparts,&destination);CHKERRQ(ierr); /* assemble a submatrix for partitioning subdomains */ ierr = MatPartitioningHierarchical_AssembleSubdomain(adj,destination,&sadj,&mapping);CHKERRQ(ierr); ierr = ISDestroy(&destination);CHKERRQ(ierr); ierr = PetscObjectGetComm((PetscObject)sadj,&scomm);CHKERRQ(ierr); /* create a fine partitioner */ ierr = MatPartitioningCreate(scomm,&finePart);CHKERRQ(ierr); /* if do not set partitioning type, use parmetis by default */ if(!hpart->fineparttype){ ierr = MatPartitioningSetType(finePart,MATPARTITIONINGPARMETIS);CHKERRQ(ierr); }else{ ierr = MatPartitioningSetType(finePart,hpart->fineparttype);CHKERRQ(ierr); } ierr = MatPartitioningSetAdjacency(finePart,sadj);CHKERRQ(ierr); ierr = MatPartitioningSetNParts(finePart, hpart->Nfineparts);CHKERRQ(ierr); ierr = MatPartitioningApply(finePart,&fineparts_temp);CHKERRQ(ierr); ierr = MatDestroy(&sadj);CHKERRQ(ierr); ierr = MatPartitioningDestroy(&finePart);CHKERRQ(ierr); ierr = MatPartitioningHierarchical_ReassembleFineparts(adj,fineparts_temp,mapping,&hpart->fineparts);CHKERRQ(ierr); ierr = ISDestroy(&fineparts_temp);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); ierr = ISGetIndices(hpart->fineparts,&fineparts_indices);CHKERRQ(ierr); ierr = ISGetIndices(hpart->coarseparts,&coarseparts_indices);CHKERRQ(ierr); ierr = PetscMalloc1(bs*adj->rmap->n,&parts_indices);CHKERRQ(ierr); for(i=0; i<adj->rmap->n; i++){ for(j=0; j<bs; j++){ parts_indices[bs*i+j] = fineparts_indices[i]+coarseparts_indices[i]*hpart->Nfineparts; } } ierr = ISCreateGeneral(comm,bs*adj->rmap->n,parts_indices,PETSC_OWN_POINTER,partitioning);CHKERRQ(ierr); ierr = MatDestroy(&adj);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc,char **argv) { PetscErrorCode ierr; PetscInt i,n = 5; PetscInt getpetsc[] = {0,3,4},getapp[] = {2,1,9,7}; PetscInt getpetsc1[] = {0,3,4},getapp1[] = {2,1,9,7}; PetscInt getpetsc2[] = {0,3,4},getapp2[] = {2,1,9,7}; PetscInt getpetsc3[] = {0,3,4},getapp3[] = {2,1,9,7}; PetscInt getpetsc4[] = {0,3,4},getapp4[] = {2,1,9,7}; PetscMPIInt rank,size; IS ispetsc,isapp; AO ao; const PetscInt *app; ierr = PetscInitialize(&argc,&argv,(char*)0,help);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-n",&n,PETSC_NULL);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); /* create the index sets */ ierr = ISCreateStride(PETSC_COMM_WORLD,n,rank,size,&isapp);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_WORLD,n,n*rank,1,&ispetsc);CHKERRQ(ierr); /* natural numbering */ /* create the application ordering */ ierr = AOCreateBasicIS(isapp,ispetsc,&ao);CHKERRQ(ierr); ierr = AOView(ao,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = AOPetscToApplication(ao,4,getapp);CHKERRQ(ierr); ierr = AOApplicationToPetsc(ao,3,getpetsc);CHKERRQ(ierr); ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] 2,1,9,7 PetscToApplication %D %D %D %D\n", rank,getapp[0],getapp[1],getapp[2],getapp[3]);CHKERRQ(ierr); ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] 0,3,4 ApplicationToPetsc %D %D %D\n", rank,getpetsc[0],getpetsc[1],getpetsc[2]);CHKERRQ(ierr); ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD);CHKERRQ(ierr); ierr = AODestroy(&ao);CHKERRQ(ierr); /* test MemoryScalable ao */ /*-------------------------*/ if (!rank){ ierr = PetscPrintf(PETSC_COMM_SELF,"\nTest AOCreateMemoryScalable: \n"); } ierr = AOCreateMemoryScalableIS(isapp,ispetsc,&ao);CHKERRQ(ierr);CHKERRQ(ierr); ierr = AOView(ao,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = AOPetscToApplication(ao,4,getapp1);CHKERRQ(ierr); ierr = AOApplicationToPetsc(ao,3,getpetsc1);CHKERRQ(ierr); /* Check accuracy */; for (i=0; i<4;i++) if (getapp1[i] != getapp[i]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"getapp1 %d != getapp %d",getapp1[i],getapp[i]); for (i=0; i<3;i++) if (getpetsc1[i] != getpetsc[i]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"getpetsc1 %d != getpetsc %d",getpetsc1[i],getpetsc[i]); ierr = AODestroy(&ao);CHKERRQ(ierr); /* test MemoryScalable ao: ispetsc = PETSC_NULL */ /*-----------------------------------------------*/ if (!rank){ ierr = PetscPrintf(PETSC_COMM_SELF,"\nTest AOCreateMemoryScalable with ispetsc=PETSC_NULL:\n"); } ierr = AOCreateMemoryScalableIS(isapp,PETSC_NULL,&ao);CHKERRQ(ierr);CHKERRQ(ierr); ierr = AOView(ao,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = AOPetscToApplication(ao,4,getapp2);CHKERRQ(ierr); ierr = AOApplicationToPetsc(ao,3,getpetsc2);CHKERRQ(ierr); /* Check accuracy */; for (i=0; i<4;i++) if (getapp2[i] != getapp[i]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"getapp2 %d != getapp %d",getapp2[i],getapp[i]); for (i=0; i<3;i++) if (getpetsc2[i] != getpetsc[i]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"getpetsc2 %d != getpetsc %d",getpetsc2[i],getpetsc[i]); ierr = AODestroy(&ao);CHKERRQ(ierr); /* test AOCreateMemoryScalable() ao: */ ierr = ISGetIndices(isapp,&app);CHKERRQ(ierr); ierr = AOCreateMemoryScalable(PETSC_COMM_WORLD,n,app,PETSC_NULL,&ao);CHKERRQ(ierr); ierr = ISRestoreIndices(isapp,&app);CHKERRQ(ierr); ierr = AOPetscToApplication(ao,4,getapp4);CHKERRQ(ierr); ierr = AOApplicationToPetsc(ao,3,getpetsc4);CHKERRQ(ierr); /* Check accuracy */; for (i=0; i<4;i++) if (getapp4[i] != getapp[i]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"getapp4 %d != getapp %d",getapp4[i],getapp[i]); for (i=0; i<3;i++) if (getpetsc4[i] != getpetsc[i]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"getpetsc4 %d != getpetsc %d",getpetsc4[i],getpetsc[i]); ierr = AODestroy(&ao);CHKERRQ(ierr); /* test general API */ /*------------------*/ if (!rank){ ierr = PetscPrintf(PETSC_COMM_SELF,"\nTest general API: \n"); } ierr = AOCreate(PETSC_COMM_WORLD,&ao);CHKERRQ(ierr); ierr = AOSetIS(ao,isapp,ispetsc);CHKERRQ(ierr); ierr = AOSetType(ao,AOMEMORYSCALABLE);CHKERRQ(ierr); ierr = AOSetFromOptions(ao);CHKERRQ(ierr); /* ispetsc and isapp are nolonger used. */ ierr = ISDestroy(&ispetsc);CHKERRQ(ierr); ierr = ISDestroy(&isapp);CHKERRQ(ierr); ierr = AOPetscToApplication(ao,4,getapp3);CHKERRQ(ierr); ierr = AOApplicationToPetsc(ao,3,getpetsc3);CHKERRQ(ierr); ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] 2,1,9,7 PetscToApplication %D %D %D %D\n", rank,getapp3[0],getapp3[1],getapp3[2],getapp3[3]);CHKERRQ(ierr); ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] 0,3,4 ApplicationToPetsc %D %D %D\n", rank,getpetsc3[0],getpetsc3[1],getpetsc3[2]);CHKERRQ(ierr); ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD);CHKERRQ(ierr); /* Check accuracy */; for (i=0; i<4;i++) if (getapp3[i] != getapp[i]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"getapp3 %d != getapp %d",getapp3[i],getapp[i]); for (i=0; i<3;i++) if (getpetsc3[i] != getpetsc[i]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"getpetsc3 %d != getpetsc %d",getpetsc3[i],getpetsc[i]); ierr = AODestroy(&ao);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
PetscErrorCode PCGAMGProlongator_GEO(PC pc,Mat Amat,Mat Gmat,PetscCoarsenData *agg_lists,Mat *a_P_out) { PC_MG *mg = (PC_MG*)pc->data; PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx; const PetscInt dim = pc_gamg->data_cell_cols, data_cols = pc_gamg->data_cell_cols; PetscErrorCode ierr; PetscInt Istart,Iend,nloc,my0,jj,kk,ncols,nLocalSelected,bs,*clid_flid; Mat Prol; PetscMPIInt rank, size; MPI_Comm comm; IS selected_2,selected_1; const PetscInt *selected_idx; MatType mtype; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); ierr = PetscLogEventBegin(PC_GAMGProlongator_GEO,0,0,0,0);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MatGetOwnershipRange(Amat, &Istart, &Iend);CHKERRQ(ierr); ierr = MatGetBlockSize(Amat, &bs);CHKERRQ(ierr); nloc = (Iend-Istart)/bs; my0 = Istart/bs; if ((Iend-Istart) % bs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"(Iend %D - Istart %D) % bs %D",Iend,Istart,bs); /* get 'nLocalSelected' */ ierr = PetscCDGetMIS(agg_lists, &selected_1);CHKERRQ(ierr); ierr = ISGetSize(selected_1, &jj);CHKERRQ(ierr); ierr = PetscMalloc1(jj, &clid_flid);CHKERRQ(ierr); ierr = ISGetIndices(selected_1, &selected_idx);CHKERRQ(ierr); for (kk=0,nLocalSelected=0; kk<jj; kk++) { PetscInt lid = selected_idx[kk]; if (lid<nloc) { ierr = MatGetRow(Gmat,lid+my0,&ncols,0,0);CHKERRQ(ierr); if (ncols>1) clid_flid[nLocalSelected++] = lid; /* fiter out singletons */ ierr = MatRestoreRow(Gmat,lid+my0,&ncols,0,0);CHKERRQ(ierr); } } ierr = ISRestoreIndices(selected_1, &selected_idx);CHKERRQ(ierr); ierr = ISDestroy(&selected_1);CHKERRQ(ierr); /* this is selected_1 in serial */ /* create prolongator matrix */ ierr = MatGetType(Amat,&mtype);CHKERRQ(ierr); ierr = MatCreate(comm, &Prol);CHKERRQ(ierr); ierr = MatSetSizes(Prol,nloc*bs,nLocalSelected*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetBlockSizes(Prol, bs, bs);CHKERRQ(ierr); ierr = MatSetType(Prol, mtype);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(Prol,3*data_cols,NULL);CHKERRQ(ierr); ierr = MatMPIAIJSetPreallocation(Prol,3*data_cols,NULL,3*data_cols,NULL);CHKERRQ(ierr); /* can get all points "removed" - but not on geomg */ ierr = MatGetSize(Prol, &kk, &jj);CHKERRQ(ierr); if (!jj) { ierr = PetscInfo(pc,"ERROE: no selected points on coarse grid\n");CHKERRQ(ierr); ierr = PetscFree(clid_flid);CHKERRQ(ierr); ierr = MatDestroy(&Prol);CHKERRQ(ierr); *a_P_out = NULL; /* out */ PetscFunctionReturn(0); } { PetscReal *coords; PetscInt data_stride; PetscInt *crsGID = NULL; Mat Gmat2; if (dim != data_cols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"dim %D != data_cols %D",dim,data_cols); /* grow ghost data for better coarse grid cover of fine grid */ #if defined PETSC_GAMG_USE_LOG ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET5],0,0,0,0);CHKERRQ(ierr); #endif /* messy method, squares graph and gets some data */ ierr = getGIDsOnSquareGraph(nLocalSelected, clid_flid, Gmat, &selected_2, &Gmat2, &crsGID);CHKERRQ(ierr); /* llist is now not valid wrt squared graph, but will work as iterator in 'triangulateAndFormProl' */ #if defined PETSC_GAMG_USE_LOG ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET5],0,0,0,0);CHKERRQ(ierr); #endif /* create global vector of coorindates in 'coords' */ if (size > 1) { ierr = PCGAMGGetDataWithGhosts(Gmat2, dim, pc_gamg->data, &data_stride, &coords);CHKERRQ(ierr); } else { coords = (PetscReal*)pc_gamg->data; data_stride = pc_gamg->data_sz/pc_gamg->data_cell_cols; } ierr = MatDestroy(&Gmat2);CHKERRQ(ierr); /* triangulate */ if (dim == 2) { PetscReal metric,tm; #if defined PETSC_GAMG_USE_LOG ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET6],0,0,0,0);CHKERRQ(ierr); #endif ierr = triangulateAndFormProl(selected_2, data_stride, coords,nLocalSelected, clid_flid, agg_lists, crsGID, bs, Prol, &metric);CHKERRQ(ierr); #if defined PETSC_GAMG_USE_LOG ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET6],0,0,0,0);CHKERRQ(ierr); #endif ierr = PetscFree(crsGID);CHKERRQ(ierr); /* clean up and create coordinates for coarse grid (output) */ if (size > 1) ierr = PetscFree(coords);CHKERRQ(ierr); ierr = MPI_Allreduce(&metric, &tm, 1, MPIU_REAL, MPIU_MAX, comm);CHKERRQ(ierr); if (tm > 1.) { /* needs to be globalized - should not happen */ ierr = PetscInfo1(pc," failed metric for coarse grid %e\n",(double)tm);CHKERRQ(ierr); ierr = MatDestroy(&Prol);CHKERRQ(ierr); } else if (metric > .0) { ierr = PetscInfo1(pc,"worst metric for coarse grid = %e\n",(double)metric);CHKERRQ(ierr); } } else SETERRQ(comm,PETSC_ERR_PLIB,"3D not implemented for 'geo' AMG"); { /* create next coords - output */ PetscReal *crs_crds; ierr = PetscMalloc1(dim*nLocalSelected, &crs_crds);CHKERRQ(ierr); for (kk=0; kk<nLocalSelected; kk++) { /* grab local select nodes to promote - output */ PetscInt lid = clid_flid[kk]; for (jj=0; jj<dim; jj++) crs_crds[jj*nLocalSelected + kk] = pc_gamg->data[jj*nloc + lid]; } ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr); pc_gamg->data = crs_crds; /* out */ pc_gamg->data_sz = dim*nLocalSelected; } ierr = ISDestroy(&selected_2);CHKERRQ(ierr); } *a_P_out = Prol; /* out */ ierr = PetscFree(clid_flid);CHKERRQ(ierr); ierr = PetscLogEventEnd(PC_GAMGProlongator_GEO,0,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@ DMPlexCreateGmsh - Create a DMPlex mesh from a Gmsh file viewer Collective on comm Input Parameters: + comm - The MPI communicator . viewer - The Viewer associated with a Gmsh file - interpolate - Create faces and edges in the mesh Output Parameter: . dm - The DM object representing the mesh Note: http://www.geuz.org/gmsh/doc/texinfo/#MSH-ASCII-file-format and http://www.geuz.org/gmsh/doc/texinfo/#MSH-binary-file-format Level: beginner .keywords: mesh,Gmsh .seealso: DMPLEX, DMCreate() @*/ PetscErrorCode DMPlexCreateGmsh(MPI_Comm comm, PetscViewer viewer, PetscBool interpolate, DM *dm) { PetscViewerType vtype; GmshElement *gmsh_elem; PetscSection coordSection; Vec coordinates; PetscScalar *coords, *coordsIn = NULL; PetscInt dim = 0, coordSize, c, v, d, r, cell; int i, numVertices = 0, numCells = 0, trueNumCells = 0, numRegions = 0, snum; PetscMPIInt num_proc, rank; char line[PETSC_MAX_PATH_LEN]; PetscBool match, binary, bswap = PETSC_FALSE; PetscErrorCode ierr; PetscFunctionBegin; ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &num_proc);CHKERRQ(ierr); ierr = DMCreate(comm, dm);CHKERRQ(ierr); ierr = DMSetType(*dm, DMPLEX);CHKERRQ(ierr); ierr = PetscLogEventBegin(DMPLEX_CreateGmsh,*dm,0,0,0);CHKERRQ(ierr); ierr = PetscViewerGetType(viewer, &vtype);CHKERRQ(ierr); ierr = PetscStrcmp(vtype, PETSCVIEWERBINARY, &binary);CHKERRQ(ierr); if (!rank || binary) { PetscBool match; int fileType, dataSize; float version; /* Read header */ ierr = PetscViewerRead(viewer, line, 1, NULL, PETSC_STRING);CHKERRQ(ierr); ierr = PetscStrncmp(line, "$MeshFormat", PETSC_MAX_PATH_LEN, &match);CHKERRQ(ierr); if (!match) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "File is not a valid Gmsh file"); ierr = PetscViewerRead(viewer, line, 3, NULL, PETSC_STRING);CHKERRQ(ierr); snum = sscanf(line, "%f %d %d", &version, &fileType, &dataSize); if (snum != 3) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unable to parse Gmsh file header: %s", line); if (version < 2.0) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Gmsh file must be at least version 2.0"); if (dataSize != sizeof(double)) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Data size %d is not valid for a Gmsh file", dataSize); if (binary) { int checkInt; ierr = PetscViewerRead(viewer, &checkInt, 1, NULL, PETSC_ENUM);CHKERRQ(ierr); if (checkInt != 1) { ierr = PetscByteSwap(&checkInt, PETSC_ENUM, 1);CHKERRQ(ierr); if (checkInt == 1) bswap = PETSC_TRUE; else SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "File type %d is not a valid Gmsh binary file", fileType); } } else if (fileType) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "File type %d is not a valid Gmsh ASCII file", fileType); ierr = PetscViewerRead(viewer, line, 1, NULL, PETSC_STRING);CHKERRQ(ierr); ierr = PetscStrncmp(line, "$EndMeshFormat", PETSC_MAX_PATH_LEN, &match);CHKERRQ(ierr); if (!match) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "File is not a valid Gmsh file"); /* OPTIONAL Read physical names */ ierr = PetscViewerRead(viewer, line, 1, NULL, PETSC_STRING);CHKERRQ(ierr); ierr = PetscStrncmp(line, "$PhysicalNames", PETSC_MAX_PATH_LEN, &match);CHKERRQ(ierr); if (match) { ierr = PetscViewerRead(viewer, line, 1, NULL, PETSC_STRING);CHKERRQ(ierr); snum = sscanf(line, "%d", &numRegions); if (snum != 1) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "File is not a valid Gmsh file"); for (r = 0; r < numRegions; ++r) { PetscInt rdim, tag; ierr = PetscViewerRead(viewer, &rdim, 1, NULL, PETSC_ENUM);CHKERRQ(ierr); ierr = PetscViewerRead(viewer, &tag, 1, NULL, PETSC_ENUM);CHKERRQ(ierr); ierr = PetscViewerRead(viewer, line, 1, NULL, PETSC_STRING);CHKERRQ(ierr); } ierr = PetscViewerRead(viewer, line, 1, NULL, PETSC_STRING);CHKERRQ(ierr); ierr = PetscStrncmp(line, "$EndPhysicalNames", PETSC_MAX_PATH_LEN, &match);CHKERRQ(ierr); if (!match) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "File is not a valid Gmsh file"); /* Initial read for vertex section */ ierr = PetscViewerRead(viewer, line, 1, NULL, PETSC_STRING);CHKERRQ(ierr); } /* Read vertices */ ierr = PetscStrncmp(line, "$Nodes", PETSC_MAX_PATH_LEN, &match);CHKERRQ(ierr); if (!match) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "File is not a valid Gmsh file"); ierr = PetscViewerRead(viewer, line, 1, NULL, PETSC_STRING);CHKERRQ(ierr); snum = sscanf(line, "%d", &numVertices); if (snum != 1) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "File is not a valid Gmsh file"); ierr = PetscMalloc1(numVertices*3, &coordsIn);CHKERRQ(ierr); if (binary) { size_t doubleSize, intSize; PetscInt elementSize; char *buffer; PetscScalar *baseptr; ierr = PetscDataTypeGetSize(PETSC_ENUM, &intSize);CHKERRQ(ierr); ierr = PetscDataTypeGetSize(PETSC_DOUBLE, &doubleSize);CHKERRQ(ierr); elementSize = (intSize + 3*doubleSize); ierr = PetscMalloc1(elementSize*numVertices, &buffer);CHKERRQ(ierr); ierr = PetscViewerRead(viewer, buffer, elementSize*numVertices, NULL, PETSC_CHAR);CHKERRQ(ierr); if (bswap) ierr = PetscByteSwap(buffer, PETSC_CHAR, elementSize*numVertices);CHKERRQ(ierr); for (v = 0; v < numVertices; ++v) { baseptr = ((PetscScalar*)(buffer+v*elementSize+intSize)); coordsIn[v*3+0] = baseptr[0]; coordsIn[v*3+1] = baseptr[1]; coordsIn[v*3+2] = baseptr[2]; } ierr = PetscFree(buffer);CHKERRQ(ierr); } else { for (v = 0; v < numVertices; ++v) { ierr = PetscViewerRead(viewer, &i, 1, NULL, PETSC_ENUM);CHKERRQ(ierr); ierr = PetscViewerRead(viewer, &(coordsIn[v*3]), 3, NULL, PETSC_DOUBLE);CHKERRQ(ierr); if (i != (int)v+1) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid node number %d should be %d", i, v+1); } } ierr = PetscViewerRead(viewer, line, 1, NULL, PETSC_STRING);CHKERRQ(ierr);; ierr = PetscStrncmp(line, "$EndNodes", PETSC_MAX_PATH_LEN, &match);CHKERRQ(ierr); if (!match) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "File is not a valid Gmsh file"); /* Read cells */ ierr = PetscViewerRead(viewer, line, 1, NULL, PETSC_STRING);CHKERRQ(ierr);; ierr = PetscStrncmp(line, "$Elements", PETSC_MAX_PATH_LEN, &match);CHKERRQ(ierr); if (!match) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "File is not a valid Gmsh file"); ierr = PetscViewerRead(viewer, line, 1, NULL, PETSC_STRING);CHKERRQ(ierr);; snum = sscanf(line, "%d", &numCells); if (snum != 1) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "File is not a valid Gmsh file"); } if (!rank || binary) { /* Gmsh elements can be of any dimension/co-dimension, so we need to traverse the file contents multiple times to figure out the true number of cells and facets in the given mesh. To make this more efficient we read the file contents only once and store them in memory, while determining the true number of cells. */ ierr = DMPlexCreateGmsh_ReadElement(viewer, numCells, binary, bswap, &gmsh_elem);CHKERRQ(ierr); for (trueNumCells=0, c = 0; c < numCells; ++c) { if (gmsh_elem[c].dim > dim) {dim = gmsh_elem[c].dim; trueNumCells = 0;} if (gmsh_elem[c].dim == dim) trueNumCells++; } ierr = PetscViewerRead(viewer, line, 1, NULL, PETSC_STRING);CHKERRQ(ierr);; ierr = PetscStrncmp(line, "$EndElements", PETSC_MAX_PATH_LEN, &match);CHKERRQ(ierr); if (!match) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "File is not a valid Gmsh file"); } /* For binary we read on all ranks, but only build the plex on rank 0 */ if (binary && rank) {trueNumCells = 0; numVertices = 0;}; /* Allocate the cell-vertex mesh */ ierr = DMPlexSetChart(*dm, 0, trueNumCells+numVertices);CHKERRQ(ierr); if (!rank) { for (cell = 0, c = 0; c < numCells; ++c) { if (gmsh_elem[c].dim == dim) { ierr = DMPlexSetConeSize(*dm, cell, gmsh_elem[c].numNodes);CHKERRQ(ierr); cell++; } } } ierr = DMSetUp(*dm);CHKERRQ(ierr); /* Add cell-vertex connections */ if (!rank) { PetscInt pcone[8], corner; for (cell = 0, c = 0; c < numCells; ++c) { if (gmsh_elem[c].dim == dim) { for (corner = 0; corner < gmsh_elem[c].numNodes; ++corner) { pcone[corner] = gmsh_elem[c].nodes[corner] + trueNumCells-1; } if (dim == 3) { /* Tetrahedra are inverted */ if (gmsh_elem[c].numNodes == 4) { PetscInt tmp = pcone[0]; pcone[0] = pcone[1]; pcone[1] = tmp; } /* Hexahedra are inverted */ if (gmsh_elem[c].numNodes == 8) { PetscInt tmp = pcone[1]; pcone[1] = pcone[3]; pcone[3] = tmp; } } ierr = DMPlexSetCone(*dm, cell, pcone);CHKERRQ(ierr); cell++; } } } ierr = MPI_Bcast(&dim, 1, MPIU_INT, 0, comm);CHKERRQ(ierr); ierr = DMSetDimension(*dm, dim);CHKERRQ(ierr); ierr = DMPlexSymmetrize(*dm);CHKERRQ(ierr); ierr = DMPlexStratify(*dm);CHKERRQ(ierr); if (interpolate) { DM idm = NULL; ierr = DMPlexInterpolate(*dm, &idm);CHKERRQ(ierr); ierr = DMDestroy(dm);CHKERRQ(ierr); *dm = idm; } if (!rank) { /* Apply boundary IDs by finding the relevant facets with vertex joins */ PetscInt pcone[8], corner, vStart, vEnd; ierr = DMPlexGetDepthStratum(*dm, 0, &vStart, &vEnd);CHKERRQ(ierr); for (c = 0; c < numCells; ++c) { if (gmsh_elem[c].dim == dim-1) { PetscInt joinSize; const PetscInt *join; for (corner = 0; corner < gmsh_elem[c].numNodes; ++corner) { pcone[corner] = gmsh_elem[c].nodes[corner] + vStart - 1; } ierr = DMPlexGetFullJoin(*dm, gmsh_elem[c].numNodes, (const PetscInt *) pcone, &joinSize, &join);CHKERRQ(ierr); if (joinSize != 1) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Could not determine Plex facet for element %d", gmsh_elem[c].id); ierr = DMSetLabelValue(*dm, "Face Sets", join[0], gmsh_elem[c].tags[0]);CHKERRQ(ierr); ierr = DMPlexRestoreJoin(*dm, gmsh_elem[c].numNodes, (const PetscInt *) pcone, &joinSize, &join);CHKERRQ(ierr); } } /* Create cell sets */ for (cell = 0, c = 0; c < numCells; ++c) { if (gmsh_elem[c].dim == dim) { if (gmsh_elem[c].numTags > 0) { ierr = DMSetLabelValue(*dm, "Cell Sets", cell, gmsh_elem[c].tags[0]);CHKERRQ(ierr); cell++; } } } } /* Read coordinates */ ierr = DMGetCoordinateSection(*dm, &coordSection);CHKERRQ(ierr); ierr = PetscSectionSetNumFields(coordSection, 1);CHKERRQ(ierr); ierr = PetscSectionSetFieldComponents(coordSection, 0, dim);CHKERRQ(ierr); ierr = PetscSectionSetChart(coordSection, trueNumCells, trueNumCells + numVertices);CHKERRQ(ierr); for (v = trueNumCells; v < trueNumCells+numVertices; ++v) { ierr = PetscSectionSetDof(coordSection, v, dim);CHKERRQ(ierr); ierr = PetscSectionSetFieldDof(coordSection, v, 0, dim);CHKERRQ(ierr); } ierr = PetscSectionSetUp(coordSection);CHKERRQ(ierr); ierr = PetscSectionGetStorageSize(coordSection, &coordSize);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_SELF, &coordinates);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) coordinates, "coordinates");CHKERRQ(ierr); ierr = VecSetSizes(coordinates, coordSize, PETSC_DETERMINE);CHKERRQ(ierr); ierr = VecSetBlockSize(coordinates, dim);CHKERRQ(ierr); ierr = VecSetType(coordinates, VECSTANDARD);CHKERRQ(ierr); ierr = VecGetArray(coordinates, &coords);CHKERRQ(ierr); if (!rank) { for (v = 0; v < numVertices; ++v) { for (d = 0; d < dim; ++d) { coords[v*dim+d] = coordsIn[v*3+d]; } } } ierr = VecRestoreArray(coordinates, &coords);CHKERRQ(ierr); ierr = PetscFree(coordsIn);CHKERRQ(ierr); ierr = DMSetCoordinatesLocal(*dm, coordinates);CHKERRQ(ierr); ierr = VecDestroy(&coordinates);CHKERRQ(ierr); /* Clean up intermediate storage */ if (!rank || binary) ierr = PetscFree(gmsh_elem);CHKERRQ(ierr); ierr = PetscLogEventEnd(DMPLEX_CreateGmsh,*dm,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode DMCreateAggregates_ADDA(DM dmc,DM dmf,Mat *rest) { PetscErrorCode ierr=0; PetscInt i; PetscInt dim; PetscInt dofc, doff; PetscInt *lcs_c, *lce_c; PetscInt *lcs_f, *lce_f; PetscInt *fgs, *fge; PetscInt fgdofs, fgdofe; ADDAIdx iter_c, iter_f; PetscInt max_agg_size; PetscMPIInt comm_size; ADDAIdx *fine_nodes; PetscInt fn_idx; PetscScalar *one_vec; DM_ADDA *ddc = (DM_ADDA*)dmc->data; DM_ADDA *ddf = (DM_ADDA*)dmf->data; PetscFunctionBegin; PetscValidHeaderSpecific(dmc, DM_CLASSID, 1); PetscValidHeaderSpecific(dmf, DM_CLASSID, 2); PetscValidPointer(rest,3); if (ddc->dim != ddf->dim) SETERRQ2(((PetscObject)dmf)->comm,PETSC_ERR_ARG_INCOMP,"Dimensions of ADDA do not match %D %D", ddc->dim, ddf->dim);CHKERRQ(ierr); /* if (dmc->dof != dmf->dof) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"DOF of ADDA do not match %D %D", dmc->dof, dmf->dof);CHKERRQ(ierr); */ dim = ddc->dim; dofc = ddc->dof; doff = ddf->dof; ierr = DMADDAGetCorners(dmc, &lcs_c, &lce_c);CHKERRQ(ierr); ierr = DMADDAGetCorners(dmf, &lcs_f, &lce_f);CHKERRQ(ierr); /* compute maximum size of aggregate */ max_agg_size = 1; for(i=0; i<dim; i++) { max_agg_size *= ddf->nodes[i] / ddc->nodes[i] + 1; } max_agg_size *= doff / dofc + 1; /* create the matrix that will contain the restriction operator */ ierr = MPI_Comm_size(PETSC_COMM_WORLD,&comm_size);CHKERRQ(ierr); /* construct matrix */ if( comm_size == 1 ) { ierr = DMADDAGetMatrixNS(dmc, dmf, MATSEQAIJ, rest);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(*rest, max_agg_size, PETSC_NULL);CHKERRQ(ierr); } else { ierr = DMADDAGetMatrixNS(dmc, dmf, MATMPIAIJ, rest);CHKERRQ(ierr); ierr = MatMPIAIJSetPreallocation(*rest, max_agg_size, PETSC_NULL, max_agg_size, PETSC_NULL);CHKERRQ(ierr); } /* store nodes in the fine grid here */ ierr = PetscMalloc(sizeof(ADDAIdx)*max_agg_size, &fine_nodes);CHKERRQ(ierr); /* these are the values to set to, a collection of 1's */ ierr = PetscMalloc(sizeof(PetscScalar)*max_agg_size, &one_vec);CHKERRQ(ierr); /* initialize */ for(i=0; i<max_agg_size; i++) { ierr = PetscMalloc(sizeof(PetscInt)*dim, &(fine_nodes[i].x));CHKERRQ(ierr); one_vec[i] = 1.0; } /* get iterators */ ierr = PetscMalloc(sizeof(PetscInt)*dim, &(iter_c.x));CHKERRQ(ierr); ierr = PetscMalloc(sizeof(PetscInt)*dim, &(iter_f.x));CHKERRQ(ierr); /* the fine grid node corner for each coarse grid node */ ierr = PetscMalloc(sizeof(PetscInt)*dim, &fgs);CHKERRQ(ierr); ierr = PetscMalloc(sizeof(PetscInt)*dim, &fge);CHKERRQ(ierr); /* loop over all coarse nodes */ ierr = PetscMemcpy(iter_c.x, lcs_c, sizeof(PetscInt)*dim);CHKERRQ(ierr); if( ADDAHCiterStartup(dim, lcs_c, lce_c, iter_c.x) ) { do { /* find corresponding fine grid nodes */ for(i=0; i<dim; i++) { fgs[i] = iter_c.x[i]*ddf->nodes[i]/ddc->nodes[i]; fge[i] = PetscMin((iter_c.x[i]+1)*ddf->nodes[i]/ddc->nodes[i], ddf->nodes[i]); } /* treat all dof of the coarse grid */ for(iter_c.d=0; iter_c.d<dofc; iter_c.d++) { /* find corresponding fine grid dof's */ fgdofs = iter_c.d*doff/dofc; fgdofe = PetscMin((iter_c.d+1)*doff/dofc, doff); /* we now know the "box" of all the fine grid nodes that are mapped to one coarse grid node */ fn_idx = 0; /* loop over those corresponding fine grid nodes */ if( ADDAHCiterStartup(dim, fgs, fge, iter_f.x) ) { do { /* loop over all corresponding fine grid dof */ for(iter_f.d=fgdofs; iter_f.d<fgdofe; iter_f.d++) { ierr = PetscMemcpy(fine_nodes[fn_idx].x, iter_f.x, sizeof(PetscInt)*dim);CHKERRQ(ierr); fine_nodes[fn_idx].d = iter_f.d; fn_idx++; } } while( ADDAHCiter(dim, fgs, fge, iter_f.x) ); } /* add all these points to one aggregate */ ierr = DMADDAMatSetValues(*rest, dmc, 1, &iter_c, dmf, fn_idx, fine_nodes, one_vec, INSERT_VALUES);CHKERRQ(ierr); } } while( ADDAHCiter(dim, lcs_c, lce_c, iter_c.x) ); } /* free memory */ ierr = PetscFree(fgs);CHKERRQ(ierr); ierr = PetscFree(fge);CHKERRQ(ierr); ierr = PetscFree(iter_c.x);CHKERRQ(ierr); ierr = PetscFree(iter_f.x);CHKERRQ(ierr); ierr = PetscFree(lcs_c);CHKERRQ(ierr); ierr = PetscFree(lce_c);CHKERRQ(ierr); ierr = PetscFree(lcs_f);CHKERRQ(ierr); ierr = PetscFree(lce_f);CHKERRQ(ierr); ierr = PetscFree(one_vec);CHKERRQ(ierr); for(i=0; i<max_agg_size; i++) { ierr = PetscFree(fine_nodes[i].x);CHKERRQ(ierr); } ierr = PetscFree(fine_nodes);CHKERRQ(ierr); ierr = MatAssemblyBegin(*rest, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(*rest, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* Performs an efficient scatter on the rows of B needed by this process; this is a modification of the VecScatterBegin_() routines. */ PetscErrorCode MatMPIDenseScatter(Mat A,Mat B,Mat C,Mat *outworkB) { Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; PetscErrorCode ierr; PetscScalar *b,*w,*svalues,*rvalues; VecScatter ctx = aij->Mvctx; VecScatter_MPI_General *from = (VecScatter_MPI_General*) ctx->fromdata; VecScatter_MPI_General *to = ( VecScatter_MPI_General*) ctx->todata; PetscInt i,j,k; PetscInt *sindices,*sstarts,*rindices,*rstarts; PetscMPIInt *sprocs,*rprocs,nrecvs; MPI_Request *swaits,*rwaits; MPI_Comm comm = ((PetscObject)A)->comm; PetscMPIInt tag = ((PetscObject)ctx)->tag,ncols = B->cmap->N, nrows = aij->B->cmap->n,imdex,nrowsB = B->rmap->n; MPI_Status status; MPIAIJ_MPIDense *contents; PetscContainer cont; Mat workB; PetscFunctionBegin; ierr = PetscObjectQuery((PetscObject)C,"workB",(PetscObject*)&cont);CHKERRQ(ierr); ierr = PetscContainerGetPointer(cont,(void**)&contents);CHKERRQ(ierr); workB = *outworkB = contents->workB; if (nrows != workB->rmap->n) SETERRQ2(PETSC_ERR_PLIB,"Number of rows of workB %D not equal to columns of aij->B %D",nrows,workB->cmap->n); sindices = to->indices; sstarts = to->starts; sprocs = to->procs; swaits = contents->swaits; svalues = contents->svalues; rindices = from->indices; rstarts = from->starts; rprocs = from->procs; rwaits = contents->rwaits; rvalues = contents->rvalues; ierr = MatGetArray(B,&b);CHKERRQ(ierr); ierr = MatGetArray(workB,&w);CHKERRQ(ierr); for (i=0; i<from->n; i++) { ierr = MPI_Irecv(rvalues+ncols*rstarts[i],ncols*(rstarts[i+1]-rstarts[i]),MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); } for (i=0; i<to->n; i++) { /* pack a message at a time */ CHKMEMQ; for (j=0; j<sstarts[i+1]-sstarts[i]; j++){ for (k=0; k<ncols; k++) { svalues[ncols*(sstarts[i] + j) + k] = b[sindices[sstarts[i]+j] + nrowsB*k]; } } CHKMEMQ; ierr = MPI_Isend(svalues+ncols*sstarts[i],ncols*(sstarts[i+1]-sstarts[i]),MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); } nrecvs = from->n; while (nrecvs) { ierr = MPI_Waitany(from->n,rwaits,&imdex,&status);CHKERRQ(ierr); nrecvs--; /* unpack a message at a time */ CHKMEMQ; for (j=0; j<rstarts[imdex+1]-rstarts[imdex]; j++){ for (k=0; k<ncols; k++) { w[rindices[rstarts[imdex]+j] + nrows*k] = rvalues[ncols*(rstarts[imdex] + j) + k]; } } CHKMEMQ; } if (to->n) {ierr = MPI_Waitall(to->n,swaits,to->sstatus);CHKERRQ(ierr)}
EXTERN_C_END EXTERN_C_BEGIN #undef __FUNCT__ #define __FUNCT__ "MatSeqAIJFromMatlab" /*@C MatSeqAIJFromMatlab - Given a MATLAB sparse matrix, fills a SeqAIJ matrix with its transpose. Not Collective Input Parameters: + mmat - a MATLAB sparse matris - mat - a already created MATSEQAIJ Level: intermediate @*/ PetscErrorCode MatSeqAIJFromMatlab(mxArray *mmat,Mat mat) { PetscErrorCode ierr; PetscInt nz,n,m,*i,*j,k; mwIndex nnz,nn,nm,*ii,*jj; Mat_SeqAIJ *aij = (Mat_SeqAIJ*)mat->data; PetscFunctionBegin; nn = mxGetN(mmat); /* rows of transpose of matrix */ nm = mxGetM(mmat); nnz = (mxGetJc(mmat))[nn]; ii = mxGetJc(mmat); jj = mxGetIr(mmat); n = (PetscInt) nn; m = (PetscInt) nm; nz = (PetscInt) nnz; if (mat->rmap->n < 0 && mat->cmap->n < 0) { /* matrix has not yet had its size set */ ierr = MatSetSizes(mat,n,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetUp(mat);CHKERRQ(ierr); } else { if (mat->rmap->n != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot change size of PETSc matrix %D to %D",mat->rmap->n,n); if (mat->cmap->n != m) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot change size of PETSc matrix %D to %D",mat->cmap->n,m); } if (nz != aij->nz) { /* number of nonzeros in matrix has changed, so need new data structure */ ierr = MatSeqXAIJFreeAIJ(mat,&aij->a,&aij->j,&aij->i);CHKERRQ(ierr); aij->nz = nz; ierr = PetscMalloc3(aij->nz,PetscScalar,&aij->a,aij->nz,PetscInt,&aij->j,mat->rmap->n+1,PetscInt,&aij->i);CHKERRQ(ierr); aij->singlemalloc = PETSC_TRUE; } ierr = PetscMemcpy(aij->a,mxGetPr(mmat),aij->nz*sizeof(PetscScalar));CHKERRQ(ierr); /* MATLAB stores by column, not row so we pass in the transpose of the matrix */ i = aij->i; for (k=0; k<n+1; k++) { i[k] = (PetscInt) ii[k]; } j = aij->j; for (k=0; k<nz; k++) { j[k] = (PetscInt) jj[k]; } for (k=0; k<mat->rmap->n; k++) { aij->ilen[k] = aij->imax[k] = aij->i[k+1] - aij->i[k]; } ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); PetscFunctionReturn(0); }
PETSC_EXTERN PetscErrorCode MatColoringCreateBipartiteGraph(MatColoring mc,PetscSF *etoc,PetscSF *etor) { PetscErrorCode ierr; PetscInt nentries,ncolentries,idx; PetscInt i,j,rs,re,cs,ce,cn; PetscInt *rowleaf,*colleaf,*rowdata; PetscInt ncol; const PetscScalar *vcol; const PetscInt *icol; const PetscInt *coldegrees,*rowdegrees; Mat m = mc->mat; PetscFunctionBegin; ierr = MatGetOwnershipRange(m,&rs,&re);CHKERRQ(ierr); ierr = MatGetOwnershipRangeColumn(m,&cs,&ce);CHKERRQ(ierr); cn = ce-cs; nentries=0; for (i=rs;i<re;i++) { ierr = MatGetRow(m,i,&ncol,NULL,&vcol);CHKERRQ(ierr); for (j=0;j<ncol;j++) { nentries++; } ierr = MatRestoreRow(m,i,&ncol,NULL,&vcol);CHKERRQ(ierr); } ierr = PetscMalloc1(nentries,&rowleaf);CHKERRQ(ierr); ierr = PetscMalloc1(nentries,&rowdata);CHKERRQ(ierr); idx=0; for (i=rs;i<re;i++) { ierr = MatGetRow(m,i,&ncol,&icol,&vcol);CHKERRQ(ierr); for (j=0;j<ncol;j++) { rowleaf[idx] = icol[j]; rowdata[idx] = i; idx++; } ierr = MatRestoreRow(m,i,&ncol,&icol,&vcol);CHKERRQ(ierr); } if (idx != nentries) SETERRQ2(PetscObjectComm((PetscObject)m),PETSC_ERR_NOT_CONVERGED,"Bad number of entries %d vs %d",idx,nentries); ierr = PetscSFCreate(PetscObjectComm((PetscObject)m),etoc);CHKERRQ(ierr); ierr = PetscSFCreate(PetscObjectComm((PetscObject)m),etor);CHKERRQ(ierr); ierr = PetscSFSetGraphLayout(*etoc,m->cmap,nentries,NULL,PETSC_COPY_VALUES,rowleaf);CHKERRQ(ierr); ierr = PetscSFSetFromOptions(*etoc);CHKERRQ(ierr); /* determine the number of entries in the column matrix */ ierr = PetscLogEventBegin(MATCOLORING_Comm,*etoc,0,0,0);CHKERRQ(ierr); ierr = PetscSFComputeDegreeBegin(*etoc,&coldegrees);CHKERRQ(ierr); ierr = PetscSFComputeDegreeEnd(*etoc,&coldegrees);CHKERRQ(ierr); ierr = PetscLogEventEnd(MATCOLORING_Comm,*etoc,0,0,0);CHKERRQ(ierr); ncolentries=0; for (i=0;i<cn;i++) { ncolentries += coldegrees[i]; } ierr = PetscMalloc1(ncolentries,&colleaf);CHKERRQ(ierr); /* create the one going the other way by building the leaf set */ ierr = PetscLogEventBegin(MATCOLORING_Comm,*etoc,0,0,0);CHKERRQ(ierr); ierr = PetscSFGatherBegin(*etoc,MPIU_INT,rowdata,colleaf);CHKERRQ(ierr); ierr = PetscSFGatherEnd(*etoc,MPIU_INT,rowdata,colleaf);CHKERRQ(ierr); ierr = PetscLogEventEnd(MATCOLORING_Comm,*etoc,0,0,0);CHKERRQ(ierr); /* this one takes mat entries in *columns* to rows -- you never have to actually be able to order the leaf entries. */ ierr = PetscSFSetGraphLayout(*etor,m->rmap,ncolentries,NULL,PETSC_COPY_VALUES,colleaf);CHKERRQ(ierr); ierr = PetscSFSetFromOptions(*etor);CHKERRQ(ierr); ierr = PetscLogEventBegin(MATCOLORING_Comm,*etor,0,0,0);CHKERRQ(ierr); ierr = PetscSFComputeDegreeBegin(*etor,&rowdegrees);CHKERRQ(ierr); ierr = PetscSFComputeDegreeEnd(*etor,&rowdegrees);CHKERRQ(ierr); ierr = PetscLogEventEnd(MATCOLORING_Comm,*etor,0,0,0);CHKERRQ(ierr); ierr = PetscFree(rowdata);CHKERRQ(ierr); ierr = PetscFree(rowleaf);CHKERRQ(ierr); ierr = PetscFree(colleaf);CHKERRQ(ierr); PetscFunctionReturn(0); }
PETSC_STATIC_INLINE PetscErrorCode DMInterpolate_Hex_Private(DMInterpolationInfo ctx, DM dm, Vec xLocal, Vec v) { DM dmCoord; SNES snes; KSP ksp; PC pc; Vec coordsLocal, r, ref, real; Mat J; PetscScalar *a, *coords; PetscInt p; PetscErrorCode ierr; PetscFunctionBegin; ierr = DMGetCoordinatesLocal(dm, &coordsLocal);CHKERRQ(ierr); ierr = DMGetCoordinateDM(dm, &dmCoord);CHKERRQ(ierr); ierr = SNESCreate(PETSC_COMM_SELF, &snes);CHKERRQ(ierr); ierr = SNESSetOptionsPrefix(snes, "hex_interp_");CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_SELF, &r);CHKERRQ(ierr); ierr = VecSetSizes(r, 3, 3);CHKERRQ(ierr); ierr = VecSetType(r,dm->vectype);CHKERRQ(ierr); ierr = VecDuplicate(r, &ref);CHKERRQ(ierr); ierr = VecDuplicate(r, &real);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_SELF, &J);CHKERRQ(ierr); ierr = MatSetSizes(J, 3, 3, 3, 3);CHKERRQ(ierr); ierr = MatSetType(J, MATSEQDENSE);CHKERRQ(ierr); ierr = MatSetUp(J);CHKERRQ(ierr); ierr = SNESSetFunction(snes, r, HexMap_Private, NULL);CHKERRQ(ierr); ierr = SNESSetJacobian(snes, J, J, HexJacobian_Private, NULL);CHKERRQ(ierr); ierr = SNESGetKSP(snes, &ksp);CHKERRQ(ierr); ierr = KSPGetPC(ksp, &pc);CHKERRQ(ierr); ierr = PCSetType(pc, PCLU);CHKERRQ(ierr); ierr = SNESSetFromOptions(snes);CHKERRQ(ierr); ierr = VecGetArray(ctx->coords, &coords);CHKERRQ(ierr); ierr = VecGetArray(v, &a);CHKERRQ(ierr); for (p = 0; p < ctx->n; ++p) { PetscScalar *x = NULL, *vertices = NULL; PetscScalar *xi; PetscReal xir[3]; PetscInt c = ctx->cells[p], comp, coordSize, xSize; /* Can make this do all points at once */ ierr = DMPlexVecGetClosure(dmCoord, NULL, coordsLocal, c, &coordSize, &vertices);CHKERRQ(ierr); if (8*3 != coordSize) SETERRQ2(ctx->comm, PETSC_ERR_ARG_SIZ, "Invalid closure size %d should be %d", coordSize, 8*3); ierr = DMPlexVecGetClosure(dm, NULL, xLocal, c, &xSize, &x);CHKERRQ(ierr); if (8*ctx->dof != xSize) SETERRQ2(ctx->comm, PETSC_ERR_ARG_SIZ, "Invalid closure size %d should be %d", xSize, 8*ctx->dof); ierr = SNESSetFunction(snes, NULL, NULL, (void*) vertices);CHKERRQ(ierr); ierr = SNESSetJacobian(snes, NULL, NULL, NULL, (void*) vertices);CHKERRQ(ierr); ierr = VecGetArray(real, &xi);CHKERRQ(ierr); xi[0] = coords[p*ctx->dim+0]; xi[1] = coords[p*ctx->dim+1]; xi[2] = coords[p*ctx->dim+2]; ierr = VecRestoreArray(real, &xi);CHKERRQ(ierr); ierr = SNESSolve(snes, real, ref);CHKERRQ(ierr); ierr = VecGetArray(ref, &xi);CHKERRQ(ierr); xir[0] = PetscRealPart(xi[0]); xir[1] = PetscRealPart(xi[1]); xir[2] = PetscRealPart(xi[2]); for (comp = 0; comp < ctx->dof; ++comp) { a[p*ctx->dof+comp] = x[0*ctx->dof+comp]*(1-xir[0])*(1-xir[1])*(1-xir[2]) + x[3*ctx->dof+comp]* xir[0]*(1-xir[1])*(1-xir[2]) + x[2*ctx->dof+comp]* xir[0]* xir[1]*(1-xir[2]) + x[1*ctx->dof+comp]*(1-xir[0])* xir[1]*(1-xir[2]) + x[4*ctx->dof+comp]*(1-xir[0])*(1-xir[1])* xir[2] + x[5*ctx->dof+comp]* xir[0]*(1-xir[1])* xir[2] + x[6*ctx->dof+comp]* xir[0]* xir[1]* xir[2] + x[7*ctx->dof+comp]*(1-xir[0])* xir[1]* xir[2]; } ierr = VecRestoreArray(ref, &xi);CHKERRQ(ierr); ierr = DMPlexVecRestoreClosure(dmCoord, NULL, coordsLocal, c, &coordSize, &vertices);CHKERRQ(ierr); ierr = DMPlexVecRestoreClosure(dm, NULL, xLocal, c, &xSize, &x);CHKERRQ(ierr); } ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); ierr = VecRestoreArray(ctx->coords, &coords);CHKERRQ(ierr); ierr = SNESDestroy(&snes);CHKERRQ(ierr); ierr = VecDestroy(&r);CHKERRQ(ierr); ierr = VecDestroy(&ref);CHKERRQ(ierr); ierr = VecDestroy(&real);CHKERRQ(ierr); ierr = MatDestroy(&J);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode DMSetUp_DA_2D(DM da) { DM_DA *dd = (DM_DA*)da->data; const PetscInt M = dd->M; const PetscInt N = dd->N; PetscInt m = dd->m; PetscInt n = dd->n; const PetscInt dof = dd->w; const PetscInt s = dd->s; DMDABoundaryType bx = dd->bx; DMDABoundaryType by = dd->by; DMDAStencilType stencil_type = dd->stencil_type; PetscInt *lx = dd->lx; PetscInt *ly = dd->ly; MPI_Comm comm; PetscMPIInt rank,size; PetscInt xs,xe,ys,ye,x,y,Xs,Xe,Ys,Ye,start,end,IXs,IXe,IYs,IYe; PetscInt up,down,left,right,i,n0,n1,n2,n3,n5,n6,n7,n8,*idx,nn; PetscInt xbase,*bases,*ldims,j,x_t,y_t,s_t,base,count; PetscInt s_x,s_y; /* s proportionalized to w */ PetscInt sn0 = 0,sn2 = 0,sn6 = 0,sn8 = 0; Vec local,global; VecScatter ltog,gtol; IS to,from,ltogis; PetscErrorCode ierr; PetscFunctionBegin; if (stencil_type == DMDA_STENCIL_BOX && (bx == DMDA_BOUNDARY_MIRROR || by == DMDA_BOUNDARY_MIRROR)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Mirror boundary and box stencil"); ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); #if !defined(PETSC_USE_64BIT_INDICES) if (((Petsc64bitInt) M)*((Petsc64bitInt) N)*((Petsc64bitInt) dof) > (Petsc64bitInt) PETSC_MPI_INT_MAX) SETERRQ3(comm,PETSC_ERR_INT_OVERFLOW,"Mesh of %D by %D by %D (dof) is too large for 32 bit indices",M,N,dof); #endif if (dof < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Must have 1 or more degrees of freedom per node: %D",dof); if (s < 0) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Stencil width cannot be negative: %D",s); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); if (m != PETSC_DECIDE) { if (m < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in X direction: %D",m); else if (m > size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in X direction: %D %d",m,size); } if (n != PETSC_DECIDE) { if (n < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in Y direction: %D",n); else if (n > size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in Y direction: %D %d",n,size); } if (m == PETSC_DECIDE || n == PETSC_DECIDE) { if (n != PETSC_DECIDE) { m = size/n; } else if (m != PETSC_DECIDE) { n = size/m; } else { /* try for squarish distribution */ m = (PetscInt)(0.5 + PetscSqrtReal(((PetscReal)M)*((PetscReal)size)/((PetscReal)N))); if (!m) m = 1; while (m > 0) { n = size/m; if (m*n == size) break; m--; } if (M > N && m < n) {PetscInt _m = m; m = n; n = _m;} } if (m*n != size) SETERRQ(comm,PETSC_ERR_PLIB,"Unable to create partition, check the size of the communicator and input m and n "); } else if (m*n != size) SETERRQ(comm,PETSC_ERR_ARG_OUTOFRANGE,"Given Bad partition"); if (M < m) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in x direction is too fine! %D %D",M,m); if (N < n) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in y direction is too fine! %D %D",N,n); /* Determine locally owned region xs is the first local node number, x is the number of local nodes */ if (!lx) { ierr = PetscMalloc1(m, &dd->lx);CHKERRQ(ierr); lx = dd->lx; for (i=0; i<m; i++) { lx[i] = M/m + ((M % m) > i); } } x = lx[rank % m]; xs = 0; for (i=0; i<(rank % m); i++) { xs += lx[i]; } #if defined(PETSC_USE_DEBUG) left = xs; for (i=(rank % m); i<m; i++) { left += lx[i]; } if (left != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of lx across processors not equal to M: %D %D",left,M); #endif /* Determine locally owned region ys is the first local node number, y is the number of local nodes */ if (!ly) { ierr = PetscMalloc1(n, &dd->ly);CHKERRQ(ierr); ly = dd->ly; for (i=0; i<n; i++) { ly[i] = N/n + ((N % n) > i); } } y = ly[rank/m]; ys = 0; for (i=0; i<(rank/m); i++) { ys += ly[i]; } #if defined(PETSC_USE_DEBUG) left = ys; for (i=(rank/m); i<n; i++) { left += ly[i]; } if (left != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of ly across processors not equal to N: %D %D",left,N); #endif /* check if the scatter requires more than one process neighbor or wraps around the domain more than once */ if ((x < s) && ((m > 1) || (bx == DMDA_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local x-width of domain x %D is smaller than stencil width s %D",x,s); if ((y < s) && ((n > 1) || (by == DMDA_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local y-width of domain y %D is smaller than stencil width s %D",y,s); xe = xs + x; ye = ys + y; /* determine ghost region (Xs) and region scattered into (IXs) */ if (xs-s > 0) { Xs = xs - s; IXs = xs - s; } else { if (bx) { Xs = xs - s; } else { Xs = 0; } IXs = 0; } if (xe+s <= M) { Xe = xe + s; IXe = xe + s; } else { if (bx) { Xs = xs - s; Xe = xe + s; } else { Xe = M; } IXe = M; } if (bx == DMDA_BOUNDARY_PERIODIC || bx == DMDA_BOUNDARY_MIRROR) { IXs = xs - s; IXe = xe + s; Xs = xs - s; Xe = xe + s; } if (ys-s > 0) { Ys = ys - s; IYs = ys - s; } else { if (by) { Ys = ys - s; } else { Ys = 0; } IYs = 0; } if (ye+s <= N) { Ye = ye + s; IYe = ye + s; } else { if (by) { Ye = ye + s; } else { Ye = N; } IYe = N; } if (by == DMDA_BOUNDARY_PERIODIC || by == DMDA_BOUNDARY_MIRROR) { IYs = ys - s; IYe = ye + s; Ys = ys - s; Ye = ye + s; } /* stencil length in each direction */ s_x = s; s_y = s; /* determine starting point of each processor */ nn = x*y; ierr = PetscMalloc2(size+1,&bases,size,&ldims);CHKERRQ(ierr); ierr = MPI_Allgather(&nn,1,MPIU_INT,ldims,1,MPIU_INT,comm);CHKERRQ(ierr); bases[0] = 0; for (i=1; i<=size; i++) { bases[i] = ldims[i-1]; } for (i=1; i<=size; i++) { bases[i] += bases[i-1]; } base = bases[rank]*dof; /* allocate the base parallel and sequential vectors */ dd->Nlocal = x*y*dof; ierr = VecCreateMPIWithArray(comm,dof,dd->Nlocal,PETSC_DECIDE,0,&global);CHKERRQ(ierr); dd->nlocal = (Xe-Xs)*(Ye-Ys)*dof; ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,dof,dd->nlocal,0,&local);CHKERRQ(ierr); /* generate appropriate vector scatters */ /* local to global inserts non-ghost point region into global */ ierr = VecGetOwnershipRange(global,&start,&end);CHKERRQ(ierr); ierr = ISCreateStride(comm,x*y*dof,start,1,&to);CHKERRQ(ierr); ierr = PetscMalloc1(x*y,&idx);CHKERRQ(ierr); left = xs - Xs; right = left + x; down = ys - Ys; up = down + y; count = 0; for (i=down; i<up; i++) { for (j=left; j<right; j++) { idx[count++] = i*(Xe-Xs) + j; } } ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&from);CHKERRQ(ierr); ierr = VecScatterCreate(local,from,global,to,<og);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)ltog);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); /* global to local must include ghost points within the domain, but not ghost points outside the domain that aren't periodic */ if (stencil_type == DMDA_STENCIL_BOX) { count = (IXe-IXs)*(IYe-IYs); ierr = PetscMalloc1(count,&idx);CHKERRQ(ierr); left = IXs - Xs; right = left + (IXe-IXs); down = IYs - Ys; up = down + (IYe-IYs); count = 0; for (i=down; i<up; i++) { for (j=left; j<right; j++) { idx[count++] = j + i*(Xe-Xs); } } ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); } else { /* must drop into cross shape region */ /* ---------| | top | |--- ---| up | middle | | | ---- ---- down | bottom | ----------- Xs xs xe Xe */ count = (ys-IYs)*x + y*(IXe-IXs) + (IYe-ye)*x; ierr = PetscMalloc1(count,&idx);CHKERRQ(ierr); left = xs - Xs; right = left + x; down = ys - Ys; up = down + y; count = 0; /* bottom */ for (i=(IYs-Ys); i<down; i++) { for (j=left; j<right; j++) { idx[count++] = j + i*(Xe-Xs); } } /* middle */ for (i=down; i<up; i++) { for (j=(IXs-Xs); j<(IXe-Xs); j++) { idx[count++] = j + i*(Xe-Xs); } } /* top */ for (i=up; i<up+IYe-ye; i++) { for (j=left; j<right; j++) { idx[count++] = j + i*(Xe-Xs); } } ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); } /* determine who lies on each side of us stored in n6 n7 n8 n3 n5 n0 n1 n2 */ /* Assume the Non-Periodic Case */ n1 = rank - m; if (rank % m) { n0 = n1 - 1; } else { n0 = -1; } if ((rank+1) % m) { n2 = n1 + 1; n5 = rank + 1; n8 = rank + m + 1; if (n8 >= m*n) n8 = -1; } else { n2 = -1; n5 = -1; n8 = -1; } if (rank % m) { n3 = rank - 1; n6 = n3 + m; if (n6 >= m*n) n6 = -1; } else { n3 = -1; n6 = -1; } n7 = rank + m; if (n7 >= m*n) n7 = -1; if (bx == DMDA_BOUNDARY_PERIODIC && by == DMDA_BOUNDARY_PERIODIC) { /* Modify for Periodic Cases */ /* Handle all four corners */ if ((n6 < 0) && (n7 < 0) && (n3 < 0)) n6 = m-1; if ((n8 < 0) && (n7 < 0) && (n5 < 0)) n8 = 0; if ((n2 < 0) && (n5 < 0) && (n1 < 0)) n2 = size-m; if ((n0 < 0) && (n3 < 0) && (n1 < 0)) n0 = size-1; /* Handle Top and Bottom Sides */ if (n1 < 0) n1 = rank + m * (n-1); if (n7 < 0) n7 = rank - m * (n-1); if ((n3 >= 0) && (n0 < 0)) n0 = size - m + rank - 1; if ((n3 >= 0) && (n6 < 0)) n6 = (rank%m)-1; if ((n5 >= 0) && (n2 < 0)) n2 = size - m + rank + 1; if ((n5 >= 0) && (n8 < 0)) n8 = (rank%m)+1; /* Handle Left and Right Sides */ if (n3 < 0) n3 = rank + (m-1); if (n5 < 0) n5 = rank - (m-1); if ((n1 >= 0) && (n0 < 0)) n0 = rank-1; if ((n1 >= 0) && (n2 < 0)) n2 = rank-2*m+1; if ((n7 >= 0) && (n6 < 0)) n6 = rank+2*m-1; if ((n7 >= 0) && (n8 < 0)) n8 = rank+1; } else if (by == DMDA_BOUNDARY_PERIODIC) { /* Handle Top and Bottom Sides */ if (n1 < 0) n1 = rank + m * (n-1); if (n7 < 0) n7 = rank - m * (n-1); if ((n3 >= 0) && (n0 < 0)) n0 = size - m + rank - 1; if ((n3 >= 0) && (n6 < 0)) n6 = (rank%m)-1; if ((n5 >= 0) && (n2 < 0)) n2 = size - m + rank + 1; if ((n5 >= 0) && (n8 < 0)) n8 = (rank%m)+1; } else if (bx == DMDA_BOUNDARY_PERIODIC) { /* Handle Left and Right Sides */ if (n3 < 0) n3 = rank + (m-1); if (n5 < 0) n5 = rank - (m-1); if ((n1 >= 0) && (n0 < 0)) n0 = rank-1; if ((n1 >= 0) && (n2 < 0)) n2 = rank-2*m+1; if ((n7 >= 0) && (n6 < 0)) n6 = rank+2*m-1; if ((n7 >= 0) && (n8 < 0)) n8 = rank+1; } ierr = PetscMalloc1(9,&dd->neighbors);CHKERRQ(ierr); dd->neighbors[0] = n0; dd->neighbors[1] = n1; dd->neighbors[2] = n2; dd->neighbors[3] = n3; dd->neighbors[4] = rank; dd->neighbors[5] = n5; dd->neighbors[6] = n6; dd->neighbors[7] = n7; dd->neighbors[8] = n8; if (stencil_type == DMDA_STENCIL_STAR) { /* save corner processor numbers */ sn0 = n0; sn2 = n2; sn6 = n6; sn8 = n8; n0 = n2 = n6 = n8 = -1; } ierr = PetscMalloc1((Xe-Xs)*(Ye-Ys),&idx);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)da,(Xe-Xs)*(Ye-Ys)*sizeof(PetscInt));CHKERRQ(ierr); nn = 0; xbase = bases[rank]; for (i=1; i<=s_y; i++) { if (n0 >= 0) { /* left below */ x_t = lx[n0 % m]; y_t = ly[(n0/m)]; s_t = bases[n0] + x_t*y_t - (s_y-i)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } if (n1 >= 0) { /* directly below */ x_t = x; y_t = ly[(n1/m)]; s_t = bases[n1] + x_t*y_t - (s_y+1-i)*x_t; for (j=0; j<x_t; j++) idx[nn++] = s_t++; } else if (by == DMDA_BOUNDARY_MIRROR) { for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(s_y - i + 1) + j; } if (n2 >= 0) { /* right below */ x_t = lx[n2 % m]; y_t = ly[(n2/m)]; s_t = bases[n2] + x_t*y_t - (s_y+1-i)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } } for (i=0; i<y; i++) { if (n3 >= 0) { /* directly left */ x_t = lx[n3 % m]; /* y_t = y; */ s_t = bases[n3] + (i+1)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (bx == DMDA_BOUNDARY_MIRROR) { for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*i + s_x - j; } for (j=0; j<x; j++) idx[nn++] = xbase++; /* interior */ if (n5 >= 0) { /* directly right */ x_t = lx[n5 % m]; /* y_t = y; */ s_t = bases[n5] + (i)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (bx == DMDA_BOUNDARY_MIRROR) { for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*(i + 1) - 2 - j; } } for (i=1; i<=s_y; i++) { if (n6 >= 0) { /* left above */ x_t = lx[n6 % m]; /* y_t = ly[(n6/m)]; */ s_t = bases[n6] + (i)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } if (n7 >= 0) { /* directly above */ x_t = x; /* y_t = ly[(n7/m)]; */ s_t = bases[n7] + (i-1)*x_t; for (j=0; j<x_t; j++) idx[nn++] = s_t++; } else if (by == DMDA_BOUNDARY_MIRROR) { for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(y - i - 1) + j; } if (n8 >= 0) { /* right above */ x_t = lx[n8 % m]; /* y_t = ly[(n8/m)]; */ s_t = bases[n8] + (i-1)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } } ierr = ISCreateBlock(comm,dof,nn,idx,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); ierr = VecScatterCreate(global,from,local,to,>ol);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)gtol);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); if (stencil_type == DMDA_STENCIL_STAR) { n0 = sn0; n2 = sn2; n6 = sn6; n8 = sn8; } if (((stencil_type == DMDA_STENCIL_STAR) || (bx && bx != DMDA_BOUNDARY_PERIODIC) || (by && by != DMDA_BOUNDARY_PERIODIC))) { /* Recompute the local to global mappings, this time keeping the information about the cross corner processor numbers and any ghosted but not periodic indices. */ nn = 0; xbase = bases[rank]; for (i=1; i<=s_y; i++) { if (n0 >= 0) { /* left below */ x_t = lx[n0 % m]; y_t = ly[(n0/m)]; s_t = bases[n0] + x_t*y_t - (s_y-i)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (xs-Xs > 0 && ys-Ys > 0) { for (j=0; j<s_x; j++) idx[nn++] = -1; } if (n1 >= 0) { /* directly below */ x_t = x; y_t = ly[(n1/m)]; s_t = bases[n1] + x_t*y_t - (s_y+1-i)*x_t; for (j=0; j<x_t; j++) idx[nn++] = s_t++; } else if (ys-Ys > 0) { if (by == DMDA_BOUNDARY_MIRROR) { for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(s_y - i + 1) + j; } else { for (j=0; j<x; j++) idx[nn++] = -1; } } if (n2 >= 0) { /* right below */ x_t = lx[n2 % m]; y_t = ly[(n2/m)]; s_t = bases[n2] + x_t*y_t - (s_y+1-i)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (Xe-xe> 0 && ys-Ys > 0) { for (j=0; j<s_x; j++) idx[nn++] = -1; } } for (i=0; i<y; i++) { if (n3 >= 0) { /* directly left */ x_t = lx[n3 % m]; /* y_t = y; */ s_t = bases[n3] + (i+1)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (xs-Xs > 0) { if (bx == DMDA_BOUNDARY_MIRROR) { for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*i + s_x - j; } else { for (j=0; j<s_x; j++) idx[nn++] = -1; } } for (j=0; j<x; j++) idx[nn++] = xbase++; /* interior */ if (n5 >= 0) { /* directly right */ x_t = lx[n5 % m]; /* y_t = y; */ s_t = bases[n5] + (i)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (Xe-xe > 0) { if (bx == DMDA_BOUNDARY_MIRROR) { for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*(i + 1) - 2 - j; } else { for (j=0; j<s_x; j++) idx[nn++] = -1; } } } for (i=1; i<=s_y; i++) { if (n6 >= 0) { /* left above */ x_t = lx[n6 % m]; /* y_t = ly[(n6/m)]; */ s_t = bases[n6] + (i)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (xs-Xs > 0 && Ye-ye > 0) { for (j=0; j<s_x; j++) idx[nn++] = -1; } if (n7 >= 0) { /* directly above */ x_t = x; /* y_t = ly[(n7/m)]; */ s_t = bases[n7] + (i-1)*x_t; for (j=0; j<x_t; j++) idx[nn++] = s_t++; } else if (Ye-ye > 0) { if (by == DMDA_BOUNDARY_MIRROR) { for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(y - i - 1) + j; } else { for (j=0; j<x; j++) idx[nn++] = -1; } } if (n8 >= 0) { /* right above */ x_t = lx[n8 % m]; /* y_t = ly[(n8/m)]; */ s_t = bases[n8] + (i-1)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (Xe-xe > 0 && Ye-ye > 0) { for (j=0; j<s_x; j++) idx[nn++] = -1; } } } /* Set the local to global ordering in the global vector, this allows use of VecSetValuesLocal(). */ ierr = ISCreateBlock(comm,dof,nn,idx,PETSC_OWN_POINTER,<ogis);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingCreateIS(ltogis,&da->ltogmap);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)da->ltogmap);CHKERRQ(ierr); ierr = ISDestroy(<ogis);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingBlock(da->ltogmap,dd->w,&da->ltogmapb);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)da->ltogmap);CHKERRQ(ierr); ierr = PetscFree2(bases,ldims);CHKERRQ(ierr); dd->m = m; dd->n = n; /* note petsc expects xs/xe/Xs/Xe to be multiplied by #dofs in many places */ dd->xs = xs*dof; dd->xe = xe*dof; dd->ys = ys; dd->ye = ye; dd->zs = 0; dd->ze = 1; dd->Xs = Xs*dof; dd->Xe = Xe*dof; dd->Ys = Ys; dd->Ye = Ye; dd->Zs = 0; dd->Ze = 1; ierr = VecDestroy(&local);CHKERRQ(ierr); ierr = VecDestroy(&global);CHKERRQ(ierr); dd->gtol = gtol; dd->ltog = ltog; dd->base = base; da->ops->view = DMView_DA_2d; dd->ltol = NULL; dd->ao = NULL; PetscFunctionReturn(0); }
/* This interpolates faces for cells at some stratum */ static PetscErrorCode DMPlexInterpolateFaces_Internal(DM dm, PetscInt cellDepth, DM idm) { DMLabel subpointMap; PetscHashIJKL faceTable; PetscInt *pStart, *pEnd; PetscInt cellDim, depth, faceDepth = cellDepth, numPoints = 0, faceSizeAll = 0, face, c, d; PetscErrorCode ierr; PetscFunctionBegin; ierr = DMPlexGetDimension(dm, &cellDim);CHKERRQ(ierr); /* HACK: I need a better way to determine face dimension, or an alternative to GetFaces() */ ierr = DMPlexGetSubpointMap(dm, &subpointMap);CHKERRQ(ierr); if (subpointMap) ++cellDim; ierr = DMPlexGetDepth(dm, &depth);CHKERRQ(ierr); ++depth; ++cellDepth; cellDim -= depth - cellDepth; ierr = PetscMalloc2(depth+1,&pStart,depth+1,&pEnd);CHKERRQ(ierr); for (d = depth-1; d >= faceDepth; --d) { ierr = DMPlexGetDepthStratum(dm, d, &pStart[d+1], &pEnd[d+1]);CHKERRQ(ierr); } ierr = DMPlexGetDepthStratum(dm, -1, NULL, &pStart[faceDepth]);CHKERRQ(ierr); pEnd[faceDepth] = pStart[faceDepth]; for (d = faceDepth-1; d >= 0; --d) { ierr = DMPlexGetDepthStratum(dm, d, &pStart[d], &pEnd[d]);CHKERRQ(ierr); } if (pEnd[cellDepth] > pStart[cellDepth]) {ierr = DMPlexGetFaces_Internal(dm, cellDim, pStart[cellDepth], NULL, &faceSizeAll, NULL);CHKERRQ(ierr);} if (faceSizeAll > 4) SETERRQ1(PetscObjectComm((PetscObject) dm), PETSC_ERR_ARG_WRONG, "Do not support interpolation of meshes with faces of %D vertices", faceSizeAll); ierr = PetscHashIJKLCreate(&faceTable);CHKERRQ(ierr); for (c = pStart[cellDepth], face = pStart[faceDepth]; c < pEnd[cellDepth]; ++c) { const PetscInt *cellFaces; PetscInt numCellFaces, faceSize, cf; ierr = DMPlexGetFaces_Internal(dm, cellDim, c, &numCellFaces, &faceSize, &cellFaces);CHKERRQ(ierr); if (faceSize != faceSizeAll) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inconsistent face for cell %D of size %D != %D", c, faceSize, faceSizeAll); for (cf = 0; cf < numCellFaces; ++cf) { const PetscInt *cellFace = &cellFaces[cf*faceSize]; PetscHashIJKLKey key; PetscHashIJKLIter missing, iter; if (faceSize == 2) { key.i = PetscMin(cellFace[0], cellFace[1]); key.j = PetscMax(cellFace[0], cellFace[1]); key.k = 0; key.l = 0; } else { key.i = cellFace[0]; key.j = cellFace[1]; key.k = cellFace[2]; key.l = faceSize > 3 ? cellFace[3] : 0; ierr = PetscSortInt(faceSize, (PetscInt *) &key); } ierr = PetscHashIJKLPut(faceTable, key, &missing, &iter);CHKERRQ(ierr); if (missing) {ierr = PetscHashIJKLSet(faceTable, iter, face++);CHKERRQ(ierr);} } ierr = DMPlexRestoreFaces_Internal(dm, cellDim, c, &numCellFaces, &faceSize, &cellFaces);CHKERRQ(ierr); } pEnd[faceDepth] = face; ierr = PetscHashIJKLDestroy(&faceTable);CHKERRQ(ierr); /* Count new points */ for (d = 0; d <= depth; ++d) { numPoints += pEnd[d]-pStart[d]; } ierr = DMPlexSetChart(idm, 0, numPoints);CHKERRQ(ierr); /* Set cone sizes */ for (d = 0; d <= depth; ++d) { PetscInt coneSize, p; if (d == faceDepth) { for (p = pStart[d]; p < pEnd[d]; ++p) { /* I see no way to do this if we admit faces of different shapes */ ierr = DMPlexSetConeSize(idm, p, faceSizeAll);CHKERRQ(ierr); } } else if (d == cellDepth) { for (p = pStart[d]; p < pEnd[d]; ++p) { /* Number of cell faces may be different from number of cell vertices*/ ierr = DMPlexGetFaces_Internal(dm, cellDim, p, &coneSize, NULL, NULL);CHKERRQ(ierr); ierr = DMPlexSetConeSize(idm, p, coneSize);CHKERRQ(ierr); } } else { for (p = pStart[d]; p < pEnd[d]; ++p) { ierr = DMPlexGetConeSize(dm, p, &coneSize);CHKERRQ(ierr); ierr = DMPlexSetConeSize(idm, p, coneSize);CHKERRQ(ierr); } } } ierr = DMSetUp(idm);CHKERRQ(ierr); /* Get face cones from subsets of cell vertices */ if (faceSizeAll > 4) SETERRQ1(PetscObjectComm((PetscObject) dm), PETSC_ERR_ARG_WRONG, "Do not support interpolation of meshes with faces of %D vertices", faceSizeAll); ierr = PetscHashIJKLCreate(&faceTable);CHKERRQ(ierr); for (d = depth; d > cellDepth; --d) { const PetscInt *cone; PetscInt p; for (p = pStart[d]; p < pEnd[d]; ++p) { ierr = DMPlexGetCone(dm, p, &cone);CHKERRQ(ierr); ierr = DMPlexSetCone(idm, p, cone);CHKERRQ(ierr); ierr = DMPlexGetConeOrientation(dm, p, &cone);CHKERRQ(ierr); ierr = DMPlexSetConeOrientation(idm, p, cone);CHKERRQ(ierr); } } for (c = pStart[cellDepth], face = pStart[faceDepth]; c < pEnd[cellDepth]; ++c) { const PetscInt *cellFaces; PetscInt numCellFaces, faceSize, cf; ierr = DMPlexGetFaces_Internal(dm, cellDim, c, &numCellFaces, &faceSize, &cellFaces);CHKERRQ(ierr); if (faceSize != faceSizeAll) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inconsistent face for cell %D of size %D != %D", c, faceSize, faceSizeAll); for (cf = 0; cf < numCellFaces; ++cf) { const PetscInt *cellFace = &cellFaces[cf*faceSize]; PetscHashIJKLKey key; PetscHashIJKLIter missing, iter; if (faceSize == 2) { key.i = PetscMin(cellFace[0], cellFace[1]); key.j = PetscMax(cellFace[0], cellFace[1]); key.k = 0; key.l = 0; } else { key.i = cellFace[0]; key.j = cellFace[1]; key.k = cellFace[2]; key.l = faceSize > 3 ? cellFace[3] : 0; ierr = PetscSortInt(faceSize, (PetscInt *) &key); } ierr = PetscHashIJKLPut(faceTable, key, &missing, &iter);CHKERRQ(ierr); if (missing) { ierr = DMPlexSetCone(idm, face, cellFace);CHKERRQ(ierr); ierr = PetscHashIJKLSet(faceTable, iter, face);CHKERRQ(ierr); ierr = DMPlexInsertCone(idm, c, cf, face++);CHKERRQ(ierr); } else { const PetscInt *cone; PetscInt coneSize, ornt, i, j, f; ierr = PetscHashIJKLGet(faceTable, iter, &f);CHKERRQ(ierr); ierr = DMPlexInsertCone(idm, c, cf, f);CHKERRQ(ierr); /* Orient face: Do not allow reverse orientation at the first vertex */ ierr = DMPlexGetConeSize(idm, f, &coneSize);CHKERRQ(ierr); ierr = DMPlexGetCone(idm, f, &cone);CHKERRQ(ierr); if (coneSize != faceSize) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of face vertices %D for face %D should be %D", coneSize, f, faceSize); /* - First find the initial vertex */ for (i = 0; i < faceSize; ++i) if (cellFace[0] == cone[i]) break; /* - Try forward comparison */ for (j = 0; j < faceSize; ++j) if (cellFace[j] != cone[(i+j)%faceSize]) break; if (j == faceSize) { if ((faceSize == 2) && (i == 1)) ornt = -2; else ornt = i; } else { /* - Try backward comparison */ for (j = 0; j < faceSize; ++j) if (cellFace[j] != cone[(i+faceSize-j)%faceSize]) break; if (j == faceSize) { if (i == 0) ornt = -faceSize; else ornt = -i; } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not determine face orientation"); } ierr = DMPlexInsertConeOrientation(idm, c, cf, ornt);CHKERRQ(ierr); } } ierr = DMPlexRestoreFaces_Internal(dm, cellDim, c, &numCellFaces, &faceSize, &cellFaces);CHKERRQ(ierr); } if (face != pEnd[faceDepth]) SETERRQ2(PetscObjectComm((PetscObject) dm), PETSC_ERR_PLIB, "Invalid number of faces %D should be %D", face-pStart[faceDepth], pEnd[faceDepth]-pStart[faceDepth]); ierr = PetscFree2(pStart,pEnd);CHKERRQ(ierr); ierr = PetscHashIJKLDestroy(&faceTable);CHKERRQ(ierr); ierr = PetscFree2(pStart,pEnd);CHKERRQ(ierr); ierr = DMPlexSymmetrize(idm);CHKERRQ(ierr); ierr = DMPlexStratify(idm);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc, char** argv) { PC pc; PetscErrorCode ierr; PetscInt m, nn, M, j, k, ne = 4; PetscReal* coords; Vec x, rhs; Mat A; KSP ksp; PetscMPIInt npe, rank; PetscInitialize(&argc, &argv, NULL, NULL); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank); CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &npe); CHKERRQ(ierr); ierr = PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Linear elasticity in 3D", ""); { char nestring[256]; ierr = PetscSNPrintf(nestring, sizeof nestring, "number of elements in each direction, ne+1 must be a multiple of %D (sizes^{1/3})", (PetscInt)(PetscPowReal((PetscReal)npe, 1.0 / 3.0) + 0.5)); ierr = PetscOptionsInt("-ne", nestring, "", ne, &ne, NULL); } ierr = PetscOptionsEnd(); CHKERRQ(ierr); const HpddmOption* const opt = HpddmOptionGet(); { HpddmOptionParse(opt, argc, argv, rank == 0); if (rank) HpddmOptionRemove(opt, "verbosity"); } nn = ne + 1; M = 3 * nn * nn * nn; if (npe == 2) { if (rank == 1) m = 0; else m = nn * nn * nn; npe = 1; } else { m = nn * nn * nn / npe; if (rank == npe - 1) m = nn * nn * nn - (npe - 1) * m; } m *= 3; ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); int i; { PetscInt Istart, Iend, jj, ic; const PetscInt NP = (PetscInt)(PetscPowReal((PetscReal)npe, 1.0 / 3.0) + 0.5); const PetscInt ipx = rank % NP, ipy = (rank % (NP * NP)) / NP, ipz = rank / (NP * NP); const PetscInt Ni0 = ipx * (nn / NP), Nj0 = ipy * (nn / NP), Nk0 = ipz * (nn / NP); const PetscInt Ni1 = Ni0 + (m > 0 ? (nn / NP) : 0), Nj1 = Nj0 + (nn / NP), Nk1 = Nk0 + (nn / NP); PetscInt *d_nnz, *o_nnz, osz[4] = {0, 9, 15, 19}, nbc; if (npe != NP * NP * NP) SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "npe=%d: npe^{1/3} must be integer", npe); if (nn != NP * (nn / NP)) SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "-ne %d: (ne+1)%(npe^{1/3}) must equal zero", ne); ierr = PetscMalloc1(m + 1, &d_nnz); CHKERRQ(ierr); ierr = PetscMalloc1(m + 1, &o_nnz); CHKERRQ(ierr); for (i = Ni0, ic = 0; i < Ni1; i++) { for (j = Nj0; j < Nj1; j++) { for (k = Nk0; k < Nk1; k++) { nbc = 0; if (i == Ni0 || i == Ni1 - 1) nbc++; if (j == Nj0 || j == Nj1 - 1) nbc++; if (k == Nk0 || k == Nk1 - 1) nbc++; for (jj = 0; jj < 3; jj++, ic++) { d_nnz[ic] = 3 * (27 - osz[nbc]); o_nnz[ic] = 3 * osz[nbc]; } } } } if (ic != m) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_PLIB, "ic %D does not equal m %D", ic, m); ierr = MatCreate(PETSC_COMM_WORLD, &A); CHKERRQ(ierr); ierr = MatSetSizes(A, m, m, M, M); CHKERRQ(ierr); ierr = MatSetBlockSize(A, 3); CHKERRQ(ierr); ierr = MatSetType(A, MATAIJ); CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(A, 0, d_nnz); CHKERRQ(ierr); ierr = MatMPIAIJSetPreallocation(A, 0, d_nnz, 0, o_nnz); CHKERRQ(ierr); ierr = PetscFree(d_nnz); CHKERRQ(ierr); ierr = PetscFree(o_nnz); CHKERRQ(ierr); ierr = MatGetOwnershipRange(A, &Istart, &Iend); CHKERRQ(ierr); if (m != Iend - Istart) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "m %D does not equal Iend %D - Istart %D", m, Iend, Istart); ierr = VecCreate(PETSC_COMM_WORLD, &x); CHKERRQ(ierr); ierr = VecSetSizes(x, m, M); CHKERRQ(ierr); ierr = VecSetBlockSize(x, 3); CHKERRQ(ierr); ierr = VecSetFromOptions(x); CHKERRQ(ierr); ierr = VecDuplicate(x, &rhs); CHKERRQ(ierr); ierr = PetscMalloc1(m + 1, &coords); CHKERRQ(ierr); coords[m] = -99.0; PetscReal h = 1.0 / ne; for (i = Ni0, ic = 0; i < Ni1; i++) { for (j = Nj0; j < Nj1; j++) { for (k = Nk0; k < Nk1; k++, ic++) { coords[3 * ic] = h * (PetscReal)i; coords[3 * ic + 1] = h * (PetscReal)j; coords[3 * ic + 2] = h * (PetscReal)k; } } } } PetscReal s_r[SIZE_ARRAY_R] = {30, 0.1, 20, 10}; PetscReal x_r[SIZE_ARRAY_R] = {0.5, 0.4, 0.4, 0.4}; PetscReal y_r[SIZE_ARRAY_R] = {0.5, 0.5, 0.4, 0.4}; PetscReal z_r[SIZE_ARRAY_R] = {0.5, 0.45, 0.4, 0.35}; PetscReal r[SIZE_ARRAY_R] = {0.5, 0.5, 0.4, 0.4}; AssembleSystem(A, rhs, s_r[0], x_r[0], y_r[0], z_r[0], r[0], ne, npe, rank, nn, m); ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr); MatNullSpace matnull; Vec vec_coords; PetscScalar* c; ierr = VecCreate(MPI_COMM_WORLD, &vec_coords); CHKERRQ(ierr); ierr = VecSetBlockSize(vec_coords, 3); CHKERRQ(ierr); ierr = VecSetSizes(vec_coords, m, PETSC_DECIDE); CHKERRQ(ierr); ierr = VecSetUp(vec_coords); CHKERRQ(ierr); ierr = VecGetArray(vec_coords, &c); CHKERRQ(ierr); for (i = 0; i < m; i++) c[i] = coords[i]; ierr = VecRestoreArray(vec_coords, &c); CHKERRQ(ierr); ierr = MatNullSpaceCreateRigidBody(vec_coords, &matnull); CHKERRQ(ierr); ierr = MatSetNearNullSpace(A, matnull); CHKERRQ(ierr); ierr = MatNullSpaceDestroy(&matnull); CHKERRQ(ierr); ierr = VecDestroy(&vec_coords); CHKERRQ(ierr); ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); double time = MPI_Wtime(); ierr = KSPSetUp(ksp); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); time = MPI_Wtime() - time; ierr = PetscPrintf(PETSC_COMM_WORLD, "--- PC setup = %f\n", time); CHKERRQ(ierr); float t_time[SIZE_ARRAY_R]; int t_its[SIZE_ARRAY_R]; { { ierr = KSPSolve(ksp, rhs, x); CHKERRQ(ierr); ierr = KSPReset(ksp); CHKERRQ(ierr); ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr); ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr); ierr = KSPSetUp(ksp); CHKERRQ(ierr); } for (i = 0; i < SIZE_ARRAY_R; ++i) { ierr = VecZeroEntries(x); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); time = MPI_Wtime(); ierr = KSPSolve(ksp, rhs, x); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); t_time[i] = MPI_Wtime() - time; PetscInt its; ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); t_its[i] = its; ierr = ComputeError(A, rhs, x); CHKERRQ(ierr); if (i == (SIZE_ARRAY_R - 1)) AssembleSystem(A, rhs, s_r[0], x_r[0], y_r[0], z_r[0], r[0], ne, npe, rank, nn, m); else AssembleSystem(A, rhs, s_r[i + 1], x_r[i + 1], y_r[i + 1], z_r[i + 1], r[i + 1], ne, npe, rank, nn, m); ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr); ierr = KSPSetUp(ksp); CHKERRQ(ierr); } for (i = 0; i < SIZE_ARRAY_R; ++i) { ierr = PetscPrintf(PETSC_COMM_WORLD, "%d\t%d\t%f\n", i + 1, t_its[i], t_time[i]); CHKERRQ(ierr); if (i > 0) { t_its[0] += t_its[i]; t_time[0] += t_time[i]; } } if (SIZE_ARRAY_R > 1) { ierr = PetscPrintf(PETSC_COMM_WORLD, "------------------------\n\t%d\t%f\n", t_its[0], t_time[0]); CHKERRQ(ierr); } } { ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); HpddmCustomOperator H; H._A = A; H._M = pc; H._mv = mv; H._precond = precond; H._b = rhs; H._x = x; int n; MatGetLocalSize(A, &n, NULL); { ierr = VecZeroEntries(x); K* pt_rhs; K* pt_x; VecGetArray(rhs, &pt_rhs); VecGetArray(x, &pt_x); int previous = HpddmOptionVal(opt, "verbosity"); if (previous > 0) HpddmOptionRemove(opt, "verbosity"); HpddmCustomOperatorSolve(&H, n, H._mv, H._precond, pt_rhs, pt_x, 1, &PETSC_COMM_WORLD); if (previous > 0) { char buffer[20]; snprintf(buffer, 20, "%d", previous); char* concat = malloc(strlen("-hpddm_verbosity ") + strlen(buffer) + 1); strcpy(concat, "-hpddm_verbosity "); strcat(concat, buffer); HpddmOptionParseString(opt, concat); free(concat); } VecRestoreArray(x, &pt_x); VecRestoreArray(rhs, &pt_rhs); previous = HpddmOptionVal(opt, "krylov_method"); if(previous == 4 || previous == 5) HpddmDestroyRecycling(); ierr = KSPReset(ksp); CHKERRQ(ierr); ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr); ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr); ierr = KSPSetUp(ksp); CHKERRQ(ierr); } for (i = 0; i < SIZE_ARRAY_R; ++i) { ierr = VecZeroEntries(x); CHKERRQ(ierr); K* pt_rhs; K* pt_x; VecGetArray(rhs, &pt_rhs); VecGetArray(x, &pt_x); MPI_Barrier(PETSC_COMM_WORLD); time = MPI_Wtime(); t_its[i] = HpddmCustomOperatorSolve(&H, n, H._mv, H._precond, pt_rhs, pt_x, 1, &PETSC_COMM_WORLD); MPI_Barrier(PETSC_COMM_WORLD); t_time[i] = MPI_Wtime() - time; VecRestoreArray(x, &pt_x); VecRestoreArray(rhs, &pt_rhs); ierr = ComputeError(A, rhs, x); CHKERRQ(ierr); if (i != (SIZE_ARRAY_R - 1)) { AssembleSystem(A, rhs, s_r[i + 1], x_r[i + 1], y_r[i + 1], z_r[i + 1], r[i + 1], ne, npe, rank, nn, m); ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr); ierr = KSPSetUp(ksp); CHKERRQ(ierr); } } for (i = 0; i < SIZE_ARRAY_R; ++i) { ierr = PetscPrintf(PETSC_COMM_WORLD, "%d\t%d\t%f\n", i + 1, t_its[i], t_time[i]); CHKERRQ(ierr); if (i > 0) { t_its[0] += t_its[i]; t_time[0] += t_time[i]; } } if (SIZE_ARRAY_R > 1) { ierr = PetscPrintf(PETSC_COMM_WORLD, "------------------------\n\t%d\t%f\n", t_its[0], t_time[0]); CHKERRQ(ierr); } } ierr = KSPDestroy(&ksp); CHKERRQ(ierr); ierr = VecDestroy(&x); CHKERRQ(ierr); ierr = VecDestroy(&rhs); CHKERRQ(ierr); ierr = MatDestroy(&A); CHKERRQ(ierr); ierr = PetscFree(coords); CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
/* DMPlexGetRawFaces_Internal - Gets groups of vertices that correspond to faces for the given cone */ PetscErrorCode DMPlexGetRawFaces_Internal(DM dm, PetscInt dim, PetscInt coneSize, const PetscInt cone[], PetscInt *numFaces, PetscInt *faceSize, const PetscInt *faces[]) { PetscInt *facesTmp; PetscInt maxConeSize, maxSupportSize; PetscErrorCode ierr; PetscFunctionBegin; PetscValidHeaderSpecific(dm, DM_CLASSID, 1); ierr = DMPlexGetMaxSizes(dm, &maxConeSize, &maxSupportSize);CHKERRQ(ierr); if (faces) {ierr = DMGetWorkArray(dm, PetscSqr(PetscMax(maxConeSize, maxSupportSize)), PETSC_INT, &facesTmp);CHKERRQ(ierr);} switch (dim) { case 1: switch (coneSize) { case 2: if (faces) { facesTmp[0] = cone[0]; facesTmp[1] = cone[1]; *faces = facesTmp; } if (numFaces) *numFaces = 2; if (faceSize) *faceSize = 1; break; default: SETERRQ2(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Cone size %D not supported for dimension %D", coneSize, dim); } break; case 2: switch (coneSize) { case 3: if (faces) { facesTmp[0] = cone[0]; facesTmp[1] = cone[1]; facesTmp[2] = cone[1]; facesTmp[3] = cone[2]; facesTmp[4] = cone[2]; facesTmp[5] = cone[0]; *faces = facesTmp; } if (numFaces) *numFaces = 3; if (faceSize) *faceSize = 2; break; case 4: /* Vertices follow right hand rule */ if (faces) { facesTmp[0] = cone[0]; facesTmp[1] = cone[1]; facesTmp[2] = cone[1]; facesTmp[3] = cone[2]; facesTmp[4] = cone[2]; facesTmp[5] = cone[3]; facesTmp[6] = cone[3]; facesTmp[7] = cone[0]; *faces = facesTmp; } if (numFaces) *numFaces = 4; if (faceSize) *faceSize = 2; break; default: SETERRQ2(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Cone size %D not supported for dimension %D", coneSize, dim); } break; case 3: switch (coneSize) { case 3: if (faces) { facesTmp[0] = cone[0]; facesTmp[1] = cone[1]; facesTmp[2] = cone[1]; facesTmp[3] = cone[2]; facesTmp[4] = cone[2]; facesTmp[5] = cone[0]; *faces = facesTmp; } if (numFaces) *numFaces = 3; if (faceSize) *faceSize = 2; break; case 4: /* Vertices of first face follow right hand rule and normal points away from last vertex */ if (faces) { facesTmp[0] = cone[0]; facesTmp[1] = cone[1]; facesTmp[2] = cone[2]; facesTmp[3] = cone[0]; facesTmp[4] = cone[3]; facesTmp[5] = cone[1]; facesTmp[6] = cone[0]; facesTmp[7] = cone[2]; facesTmp[8] = cone[3]; facesTmp[9] = cone[2]; facesTmp[10] = cone[1]; facesTmp[11] = cone[3]; *faces = facesTmp; } if (numFaces) *numFaces = 4; if (faceSize) *faceSize = 3; break; case 8: if (faces) { facesTmp[0] = cone[0]; facesTmp[1] = cone[1]; facesTmp[2] = cone[2]; facesTmp[3] = cone[3]; /* Bottom */ facesTmp[4] = cone[4]; facesTmp[5] = cone[5]; facesTmp[6] = cone[6]; facesTmp[7] = cone[7]; /* Top */ facesTmp[8] = cone[0]; facesTmp[9] = cone[3]; facesTmp[10] = cone[5]; facesTmp[11] = cone[4]; /* Front */ facesTmp[12] = cone[2]; facesTmp[13] = cone[1]; facesTmp[14] = cone[7]; facesTmp[15] = cone[6]; /* Back */ facesTmp[16] = cone[3]; facesTmp[17] = cone[2]; facesTmp[18] = cone[6]; facesTmp[19] = cone[5]; /* Right */ facesTmp[20] = cone[0]; facesTmp[21] = cone[4]; facesTmp[22] = cone[7]; facesTmp[23] = cone[1]; /* Left */ *faces = facesTmp; } if (numFaces) *numFaces = 6; if (faceSize) *faceSize = 4; break; default: SETERRQ2(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Cone size %D not supported for dimension %D", coneSize, dim); } break; default: SETERRQ1(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Dimension %D not supported", dim); } PetscFunctionReturn(0); }
PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx fetidpmat_ctx ) { PetscErrorCode ierr; PC_IS *pcis=(PC_IS*)fetidpmat_ctx->pc->data; PC_BDDC *pcbddc=(PC_BDDC*)fetidpmat_ctx->pc->data; PCBDDCGraph mat_graph=pcbddc->mat_graph; Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; MPI_Comm comm; Mat ScalingMat; Vec lambda_global; IS IS_l2g_lambda; IS subset,subset_mult,subset_n; PetscBool skip_node,fully_redundant; PetscInt i,j,k,s,n_boundary_dofs,n_global_lambda,n_vertices,partial_sum; PetscInt cum,n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; PetscMPIInt rank,size,buf_size,neigh; PetscScalar scalar_value; PetscInt *vertex_indices; PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1; const PetscInt *aux_global_numbering; PetscInt *aux_sums,*cols_B_delta,*l2g_indices; PetscScalar *array,*scaling_factors,*vals_B_delta; PetscInt *aux_local_numbering_2; /* For communication of scaling factors */ PetscInt *ptrs_buffer,neigh_position; PetscScalar **all_factors,*send_buffer,*recv_buffer; MPI_Request *send_reqs,*recv_reqs; /* tests */ Vec test_vec; PetscBool test_fetidp; PetscViewer viewer; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)(fetidpmat_ctx->pc),&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); /* Default type of lagrange multipliers is non-redundant */ fully_redundant = fetidpmat_ctx->fully_redundant; /* Evaluate local and global number of lagrange multipliers */ ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); n_local_lambda = 0; partial_sum = 0; n_boundary_dofs = 0; s = 0; /* Get Vertices used to define the BDDC */ n_vertices = pcbddc->n_vertices; vertex_indices = pcbddc->local_primal_ref_node; dual_size = pcis->n_B-n_vertices; ierr = PetscMalloc1(dual_size,&dual_dofs_boundary_indices);CHKERRQ(ierr); ierr = PetscMalloc1(dual_size,&aux_local_numbering_1);CHKERRQ(ierr); ierr = PetscMalloc1(dual_size,&aux_local_numbering_2);CHKERRQ(ierr); ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); for (i=0;i<pcis->n;i++){ j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ if ( j > 0 ) { n_boundary_dofs++; } skip_node = PETSC_FALSE; if ( s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */ skip_node = PETSC_TRUE; s++; } if (j < 1) { skip_node = PETSC_TRUE; } if ( !skip_node ) { if (fully_redundant) { /* fully redundant set of lagrange multipliers */ n_lambda_for_dof = (j*(j+1))/2; } else { n_lambda_for_dof = j; } n_local_lambda += j; /* needed to evaluate global number of lagrange multipliers */ array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ /* store some data needed */ dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; aux_local_numbering_1[partial_sum] = i; aux_local_numbering_2[partial_sum] = n_lambda_for_dof; partial_sum++; } } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); fetidpmat_ctx->n_lambda = (PetscInt)PetscRealPart(scalar_value); /* compute global ordering of lagrange multipliers and associate l2g map */ ierr = ISCreateGeneral(comm,partial_sum,aux_local_numbering_1,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); ierr = ISDestroy(&subset_n);CHKERRQ(ierr); ierr = ISCreateGeneral(comm,partial_sum,aux_local_numbering_2,PETSC_OWN_POINTER,&subset_mult);CHKERRQ(ierr); ierr = ISRenumber(subset,subset_mult,&i,&subset_n);CHKERRQ(ierr); ierr = ISDestroy(&subset);CHKERRQ(ierr); if (i != fetidpmat_ctx->n_lambda) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"Global number of multipliers mismatch! (%d!=%d)\n",fetidpmat_ctx->n_lambda,i); /* init data for scaling factors exchange */ partial_sum = 0; ierr = PetscMalloc1(pcis->n_neigh,&ptrs_buffer);CHKERRQ(ierr); ierr = PetscMalloc1(pcis->n_neigh-1,&send_reqs);CHKERRQ(ierr); ierr = PetscMalloc1(pcis->n_neigh-1,&recv_reqs);CHKERRQ(ierr); ierr = PetscMalloc1(pcis->n,&all_factors);CHKERRQ(ierr); ptrs_buffer[0]=0; for (i=1;i<pcis->n_neigh;i++) { partial_sum += pcis->n_shared[i]; ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; } ierr = PetscMalloc1(partial_sum,&send_buffer);CHKERRQ(ierr); ierr = PetscMalloc1(partial_sum,&recv_buffer);CHKERRQ(ierr); ierr = PetscMalloc1(partial_sum,&all_factors[0]);CHKERRQ(ierr); for (i=0;i<pcis->n-1;i++) { j = mat_graph->count[i]; all_factors[i+1]=all_factors[i]+j; } /* scatter B scaling to N vec */ ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); /* communications */ ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&array);CHKERRQ(ierr); for (i=1;i<pcis->n_neigh;i++) { for (j=0;j<pcis->n_shared[i];j++) { send_buffer[ptrs_buffer[i-1]+j]=array[pcis->shared[i][j]]; } ierr = PetscMPIIntCast(ptrs_buffer[i]-ptrs_buffer[i-1],&buf_size);CHKERRQ(ierr); ierr = PetscMPIIntCast(pcis->neigh[i],&neigh);CHKERRQ(ierr); ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],buf_size,MPIU_SCALAR,neigh,0,comm,&send_reqs[i-1]);CHKERRQ(ierr); ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],buf_size,MPIU_SCALAR,neigh,0,comm,&recv_reqs[i-1]);CHKERRQ(ierr); } ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&array);CHKERRQ(ierr); ierr = MPI_Waitall((pcis->n_neigh-1),recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); /* put values in correct places */ for (i=1;i<pcis->n_neigh;i++) { for (j=0;j<pcis->n_shared[i];j++) { k = pcis->shared[i][j]; neigh_position = 0; while(mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) {neigh_position++;} all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; } } ierr = MPI_Waitall((pcis->n_neigh-1),send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); ierr = PetscFree(send_reqs);CHKERRQ(ierr); ierr = PetscFree(recv_reqs);CHKERRQ(ierr); ierr = PetscFree(send_buffer);CHKERRQ(ierr); ierr = PetscFree(recv_buffer);CHKERRQ(ierr); ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); /* Compute B and B_delta (local actions) */ ierr = PetscMalloc1(pcis->n_neigh,&aux_sums);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&l2g_indices);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&vals_B_delta);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&cols_B_delta);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&scaling_factors);CHKERRQ(ierr); ierr = ISGetIndices(subset_n,&aux_global_numbering);CHKERRQ(ierr); partial_sum=0; cum = 0; for (i=0;i<dual_size;i++) { n_global_lambda = aux_global_numbering[cum]; j = mat_graph->count[aux_local_numbering_1[i]]; aux_sums[0]=0; for (s=1;s<j;s++) { aux_sums[s]=aux_sums[s-1]+j-s+1; } array = all_factors[aux_local_numbering_1[i]]; n_neg_values = 0; while(n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values] < rank) {n_neg_values++;} n_pos_values = j - n_neg_values; if (fully_redundant) { for (s=0;s<n_neg_values;s++) { l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; vals_B_delta [partial_sum+s]=-1.0; scaling_factors[partial_sum+s]=array[s]; } for (s=0;s<n_pos_values;s++) { l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; vals_B_delta [partial_sum+s+n_neg_values]=1.0; scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; } partial_sum += j; } else { /* l2g_indices and default cols and vals of B_delta */ for (s=0;s<j;s++) { l2g_indices [partial_sum+s]=n_global_lambda+s; cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; vals_B_delta [partial_sum+s]=0.0; } /* B_delta */ if ( n_neg_values > 0 ) { /* there's a rank next to me to the left */ vals_B_delta [partial_sum+n_neg_values-1]=-1.0; } if ( n_neg_values < j ) { /* there's a rank next to me to the right */ vals_B_delta [partial_sum+n_neg_values]=1.0; } /* scaling as in Klawonn-Widlund 1999*/ for (s=0;s<n_neg_values;s++) { scalar_value = 0.0; for (k=0;k<s+1;k++) { scalar_value += array[k]; } scaling_factors[partial_sum+s] = -scalar_value; } for (s=0;s<n_pos_values;s++) { scalar_value = 0.0; for (k=s+n_neg_values;k<j;k++) { scalar_value += array[k]; } scaling_factors[partial_sum+s+n_neg_values] = scalar_value; } partial_sum += j; } cum += aux_local_numbering_2[i]; } ierr = ISRestoreIndices(subset_n,&aux_global_numbering);CHKERRQ(ierr); ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); ierr = ISDestroy(&subset_n);CHKERRQ(ierr); ierr = PetscFree(aux_sums);CHKERRQ(ierr); ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); ierr = PetscFree(all_factors);CHKERRQ(ierr); /* Local to global mapping of fetidpmat */ ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); /* Create local part of B_delta */ ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta);CHKERRQ(ierr); ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,NULL);CHKERRQ(ierr); ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); for (i=0;i<n_local_lambda;i++) { ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); } ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (fully_redundant) { ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat);CHKERRQ(ierr); ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(ScalingMat,1,NULL);CHKERRQ(ierr); for (i=0;i<n_local_lambda;i++) { ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); } else { ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,NULL);CHKERRQ(ierr); for (i=0;i<n_local_lambda;i++) { ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } ierr = PetscFree(scaling_factors);CHKERRQ(ierr); ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); /* Create some vectors needed by fetidp */ ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); test_fetidp = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-fetidp_check",&test_fetidp,NULL);CHKERRQ(ierr); if (test_fetidp && !pcbddc->use_deluxe_scaling) { PetscReal real_value; ierr = PetscViewerASCIIGetStdout(comm,&viewer);CHKERRQ(ierr); ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); if (fully_redundant) { ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); } ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); /******************************************************************/ /* TEST A/B: Test numbering of global lambda dofs */ /******************************************************************/ ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); scalar_value = -1.0; ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecNorm(test_vec,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = VecDestroy(&test_vec);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); if (fully_redundant) { ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,PetscRealPart(scalar_value)-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); } /******************************************************************/ /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ /* This is the meaning of the B matrix */ /******************************************************************/ ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecNorm(lambda_global,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); /******************************************************************/ /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ /* E_D = R_D^TR */ /* P_D = B_{D,delta}^T B_{delta} */ /* eq.44 Mandel Tezaur and Dohrmann 2005 */ /******************************************************************/ /* compute a random vector in \widetilde{W} */ ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); scalar_value = 0.0; /* set zero at vertices */ ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); for (i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); /* store w for final comparison */ ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Jump operator P_D : results stored in pcis->vec1_B */ ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_Ddelta^T */ ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); /* Average operator E_D : results stored in pcis->vec2_B */ ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = PCBDDCScalingExtension(fetidpmat_ctx->pc,pcis->vec2_B,pcis->vec1_global);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_global,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* test E_D=I-P_D */ scalar_value = 1.0; ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); scalar_value = -1.0; ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = VecDestroy(&test_vec);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); /******************************************************************/ /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ /* eq.48 Mandel Tezaur and Dohrmann 2005 */ /******************************************************************/ ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); scalar_value = 0.0; /* set zero at vertices */ for (i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); /* Jump operator P_D : results stored in pcis->vec1_B */ ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_Ddelta^T */ ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); /* scaling */ ierr = PCBDDCScalingExtension(fetidpmat_ctx->pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr); ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); if (!fully_redundant) { /******************************************************************/ /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ /******************************************************************/ ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); ierr = VecSetRandom(lambda_global,NULL);CHKERRQ(ierr); /* Action of B_Ddelta^T */ ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); scalar_value = -1.0; ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); ierr = VecNorm(lambda_global,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); ierr = VecDestroy(&test_vec);CHKERRQ(ierr); } } /* final cleanup */ ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode IntegrateResidualBatchCPU(PetscInt Ne, PetscInt numFields, PetscInt field, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscQuadrature quad[], void (*f0_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]), void (*f1_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]), PetscScalar elemVec[], AppCtx *user) { const PetscInt debug = user->debug; const PetscInt dim = SPATIAL_DIM_0; PetscInt cOffset = 0; PetscInt eOffset = 0, e; PetscErrorCode ierr; PetscFunctionBeginUser; ierr = PetscLogEventBegin(user->integrateResCPUEvent,0,0,0,0);CHKERRQ(ierr); for (e = 0; e < Ne; ++e) { const PetscReal detJ = jacobianDeterminants[e]; const PetscReal *invJ = &jacobianInverses[e*dim*dim]; const PetscInt Nq = quad[field].numQuadPoints; PetscScalar f0[NUM_QUADRATURE_POINTS_0*dim]; PetscScalar f1[NUM_QUADRATURE_POINTS_0*dim*dim]; PetscInt q, f; if (Nq > NUM_QUADRATURE_POINTS_0) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_LIB, "Number of quadrature points %d should be <= %d", Nq, NUM_QUADRATURE_POINTS_0); if (debug > 1) { ierr = PetscPrintf(PETSC_COMM_SELF, " detJ: %g\n", detJ);CHKERRQ(ierr); ierr = DMPrintCellMatrix(e, "invJ", dim, dim, invJ);CHKERRQ(ierr); } for (q = 0; q < Nq; ++q) { if (debug) {ierr = PetscPrintf(PETSC_COMM_SELF, " quad point %d\n", q);CHKERRQ(ierr);} PetscScalar u[dim+1]; PetscScalar gradU[dim*(dim+1)]; PetscInt fOffset = 0; PetscInt dOffset = cOffset; const PetscInt Ncomp = quad[field].numComponents; const PetscReal *quadWeights = quad[field].quadWeights; PetscInt d, f, i; for (d = 0; d <= dim; ++d) u[d] = 0.0; for (d = 0; d < dim*(dim+1); ++d) gradU[d] = 0.0; for (f = 0; f < numFields; ++f) { const PetscInt Nb = quad[f].numBasisFuncs; const PetscInt Ncomp = quad[f].numComponents; const PetscReal *basis = quad[f].basis; const PetscReal *basisDer = quad[f].basisDer; PetscInt b, comp; for (b = 0; b < Nb; ++b) { for (comp = 0; comp < Ncomp; ++comp) { const PetscInt cidx = b*Ncomp+comp; PetscScalar realSpaceDer[dim]; PetscInt d, g; u[fOffset+comp] += coefficients[dOffset+cidx]*basis[q*Nb*Ncomp+cidx]; for (d = 0; d < dim; ++d) { realSpaceDer[d] = 0.0; for (g = 0; g < dim; ++g) { realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g]; } gradU[(fOffset+comp)*dim+d] += coefficients[dOffset+cidx]*realSpaceDer[d]; } } } if (debug > 1) { PetscInt d; for (comp = 0; comp < Ncomp; ++comp) { ierr = PetscPrintf(PETSC_COMM_SELF, " u[%d,%d]: %g\n", f, comp, u[fOffset+comp]);CHKERRQ(ierr); for (d = 0; d < dim; ++d) { ierr = PetscPrintf(PETSC_COMM_SELF, " gradU[%d,%d]_%c: %g\n", f, comp, 'x'+d, gradU[(fOffset+comp)*dim+d]);CHKERRQ(ierr); } } } fOffset += Ncomp; dOffset += Nb*Ncomp; } f0_func(u, gradU, &f0[q*Ncomp]); for (i = 0; i < Ncomp; ++i) { f0[q*Ncomp+i] *= detJ*quadWeights[q]; } f1_func(u, gradU, &f1[q*Ncomp*dim]); for (i = 0; i < Ncomp*dim; ++i) { f1[q*Ncomp*dim+i] *= detJ*quadWeights[q]; } if (debug > 1) { PetscInt c,d; for (c = 0; c < Ncomp; ++c) { ierr = PetscPrintf(PETSC_COMM_SELF, " f0[%d]: %g\n", c, f0[q*Ncomp+c]);CHKERRQ(ierr); for (d = 0; d < dim; ++d) { ierr = PetscPrintf(PETSC_COMM_SELF, " f1[%d]_%c: %g\n", c, 'x'+d, f1[(q*Ncomp + c)*dim+d]);CHKERRQ(ierr); } } } if (q == Nq-1) cOffset = dOffset; } for (f = 0; f < numFields; ++f) { const PetscInt Nq = quad[f].numQuadPoints; const PetscInt Nb = quad[f].numBasisFuncs; const PetscInt Ncomp = quad[f].numComponents; const PetscReal *basis = quad[f].basis; const PetscReal *basisDer = quad[f].basisDer; PetscInt b, comp; if (f == field) { for (b = 0; b < Nb; ++b) { for (comp = 0; comp < Ncomp; ++comp) { const PetscInt cidx = b*Ncomp+comp; PetscInt q; elemVec[eOffset+cidx] = 0.0; for (q = 0; q < Nq; ++q) { PetscScalar realSpaceDer[dim]; PetscInt d, g; elemVec[eOffset+cidx] += basis[q*Nb*Ncomp+cidx]*f0[q*Ncomp+comp]; for (d = 0; d < dim; ++d) { realSpaceDer[d] = 0.0; for (g = 0; g < dim; ++g) { realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g]; } elemVec[eOffset+cidx] += realSpaceDer[d]*f1[(q*Ncomp+comp)*dim+d]; } } } } if (debug > 1) { PetscInt b, comp; for (b = 0; b < Nb; ++b) { for (comp = 0; comp < Ncomp; ++comp) { ierr = PetscPrintf(PETSC_COMM_SELF, " elemVec[%d,%d]: %g\n", b, comp, elemVec[eOffset+b*Ncomp+comp]);CHKERRQ(ierr); } } } } eOffset += Nb*Ncomp; } } /* ierr = PetscLogFlops((((2+(2+2*dim)*dim)*Ncomp*Nb+(2+2)*dim*Ncomp)*Nq + (2+2*dim)*dim*Nq*Ncomp*Nb)*Ne);CHKERRQ(ierr); */ ierr = PetscLogEventEnd(user->integrateResCPUEvent,0,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); };
/* - Checks face match - Flips non-matching - Inserts faces of support cells in FIFO */ static PetscErrorCode DMPlexCheckFace_Internal(DM dm, PetscInt *faceFIFO, PetscInt *fTop, PetscInt *fBottom, PetscInt cStart, PetscInt fStart, PetscInt fEnd, PetscBT seenCells, PetscBT flippedCells, PetscBT seenFaces) { const PetscInt *support, *coneA, *coneB, *coneOA, *coneOB; PetscInt supportSize, coneSizeA, coneSizeB, posA = -1, posB = -1; PetscInt face, dim, seenA, flippedA, seenB, flippedB, mismatch, c; PetscErrorCode ierr; PetscFunctionBegin; face = faceFIFO[(*fTop)++]; ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr); ierr = DMPlexGetSupportSize(dm, face, &supportSize);CHKERRQ(ierr); ierr = DMPlexGetSupport(dm, face, &support);CHKERRQ(ierr); if (supportSize < 2) PetscFunctionReturn(0); if (supportSize != 2) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Faces should separate only two cells, not %d", supportSize); seenA = PetscBTLookup(seenCells, support[0]-cStart); flippedA = PetscBTLookup(flippedCells, support[0]-cStart) ? 1 : 0; seenB = PetscBTLookup(seenCells, support[1]-cStart); flippedB = PetscBTLookup(flippedCells, support[1]-cStart) ? 1 : 0; ierr = DMPlexGetConeSize(dm, support[0], &coneSizeA);CHKERRQ(ierr); ierr = DMPlexGetConeSize(dm, support[1], &coneSizeB);CHKERRQ(ierr); ierr = DMPlexGetCone(dm, support[0], &coneA);CHKERRQ(ierr); ierr = DMPlexGetCone(dm, support[1], &coneB);CHKERRQ(ierr); ierr = DMPlexGetConeOrientation(dm, support[0], &coneOA);CHKERRQ(ierr); ierr = DMPlexGetConeOrientation(dm, support[1], &coneOB);CHKERRQ(ierr); for (c = 0; c < coneSizeA; ++c) { if (!PetscBTLookup(seenFaces, coneA[c]-fStart)) { faceFIFO[(*fBottom)++] = coneA[c]; ierr = PetscBTSet(seenFaces, coneA[c]-fStart);CHKERRQ(ierr); } if (coneA[c] == face) posA = c; if (*fBottom > fEnd-fStart) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face %d was pushed exceeding capacity %d > %d", coneA[c], *fBottom, fEnd-fStart); } if (posA < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Face %d could not be located in cell %d", face, support[0]); for (c = 0; c < coneSizeB; ++c) { if (!PetscBTLookup(seenFaces, coneB[c]-fStart)) { faceFIFO[(*fBottom)++] = coneB[c]; ierr = PetscBTSet(seenFaces, coneB[c]-fStart);CHKERRQ(ierr); } if (coneB[c] == face) posB = c; if (*fBottom > fEnd-fStart) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face %d was pushed exceeding capacity %d > %d", coneA[c], *fBottom, fEnd-fStart); } if (posB < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Face %d could not be located in cell %d", face, support[1]); if (dim == 1) { mismatch = posA == posB; } else { mismatch = coneOA[posA] == coneOB[posB]; } if (mismatch ^ (flippedA ^ flippedB)) { if (seenA && seenB) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Previously seen cells %d and %d do not match: Fault mesh is non-orientable", support[0], support[1]); if (!seenA && !flippedA) { ierr = PetscBTSet(flippedCells, support[0]-cStart);CHKERRQ(ierr); } else if (!seenB && !flippedB) { ierr = PetscBTSet(flippedCells, support[1]-cStart);CHKERRQ(ierr); } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Inconsistent mesh orientation: Fault mesh is non-orientable"); } else if (mismatch && flippedA && flippedB) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Attempt to flip already flipped cell: Fault mesh is non-orientable"); ierr = PetscBTSet(seenCells, support[0]-cStart);CHKERRQ(ierr); ierr = PetscBTSet(seenCells, support[1]-cStart);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* FormFunctionLocal - Form the local residual F from the local input X Input Parameters: + dm - The mesh . X - Local input vector - user - The user context Output Parameter: . F - Local output vector Note: We form the residual one batch of elements at a time. This allows us to offload work onto an accelerator, like a GPU, or vectorize on a multicore machine. .seealso: FormJacobianLocal() */ PetscErrorCode FormFunctionLocal(DM dm, Vec X, Vec F, AppCtx *user) { const PetscInt debug = user->debug; const PetscInt dim = user->dim; PetscReal *coords, *v0, *J, *invJ, *detJ; PetscScalar *elemVec, *u; PetscInt cellDof = 0; PetscInt maxQuad = 0; PetscInt jacSize = dim*dim; PetscInt numCells, cStart, cEnd, c, field, d; PetscErrorCode ierr; PetscFunctionBeginUser; ierr = PetscLogEventBegin(user->residualEvent,0,0,0,0);CHKERRQ(ierr); ierr = VecSet(F, 0.0);CHKERRQ(ierr); ierr = DMDAGetHeightStratum(dm, 0, &cStart, &cEnd);CHKERRQ(ierr); numCells = cEnd - cStart; for (field = 0; field < numFields; ++field) { cellDof += user->q[field].numBasisFuncs*user->q[field].numComponents; maxQuad = PetscMax(maxQuad, user->q[field].numQuadPoints); } for (d = 0; d < dim; ++d) jacSize *= maxQuad; ierr = PetscMalloc3(dim,&coords,dim,&v0,jacSize,&J);CHKERRQ(ierr); ierr = PetscMalloc4(numCells*cellDof,&u,numCells*jacSize,&invJ,numCells*maxQuad,&detJ,numCells*cellDof,&elemVec);CHKERRQ(ierr); for (c = cStart; c < cEnd; ++c) { PetscScalar *x = NULL; PetscInt i; ierr = DMDAComputeCellGeometry(dm, c, &user->q[0], v0, J, &invJ[c*jacSize], &detJ[c]);CHKERRQ(ierr); if (detJ[c] <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ[c], c); ierr = DMDAVecGetClosure(dm, NULL, X, c, &x);CHKERRQ(ierr); for (i = 0; i < cellDof; ++i) u[c*cellDof+i] = x[i]; } for (field = 0; field < numFields; ++field) { const PetscInt numQuadPoints = user->q[field].numQuadPoints; const PetscInt numBasisFuncs = user->q[field].numBasisFuncs; void (*f0)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]) = user->f0Funcs[field]; void (*f1)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]) = user->f1Funcs[field]; /* Conforming batches */ PetscInt blockSize = numBasisFuncs*numQuadPoints; PetscInt numBlocks = 1; PetscInt batchSize = numBlocks * blockSize; PetscInt numBatches = user->numBatches; PetscInt numChunks = numCells / (numBatches*batchSize); ierr = IntegrateResidualBatchCPU(numChunks*numBatches*batchSize, numFields, field, u, invJ, detJ, user->q, f0, f1, elemVec, user);CHKERRQ(ierr); /* Remainder */ PetscInt numRemainder = numCells % (numBatches * batchSize); PetscInt offset = numCells - numRemainder; ierr = IntegrateResidualBatchCPU(numRemainder, numFields, field, &u[offset*cellDof], &invJ[offset*dim*dim], &detJ[offset], user->q, f0, f1, &elemVec[offset*cellDof], user);CHKERRQ(ierr); } for (c = cStart; c < cEnd; ++c) { if (debug) {ierr = DMPrintCellVector(c, "Residual", cellDof, &elemVec[c*cellDof]);CHKERRQ(ierr);} ierr = DMDAVecSetClosure(dm, NULL, F, c, &elemVec[c*cellDof], ADD_VALUES);CHKERRQ(ierr); } ierr = PetscFree4(u,invJ,detJ,elemVec);CHKERRQ(ierr); ierr = PetscFree3(coords,v0,J);CHKERRQ(ierr); if (user->showResidual) { PetscInt p; ierr = PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");CHKERRQ(ierr); for (p = 0; p < user->numProcs; ++p) { if (p == user->rank) { Vec f; ierr = VecDuplicate(F, &f);CHKERRQ(ierr); ierr = VecCopy(F, f);CHKERRQ(ierr); ierr = VecChop(f, 1.0e-10);CHKERRQ(ierr); ierr = VecView(f, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); ierr = VecDestroy(&f);CHKERRQ(ierr); } ierr = PetscBarrier((PetscObject) dm);CHKERRQ(ierr); } } ierr = PetscLogEventEnd(user->residualEvent,0,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc,char **argv) { PetscErrorCode ierr; PetscMPIInt rank,size,*toranks,*fromranks,nto,nfrom; PetscInt i,n; PetscBool verbose,build_twosided_f; Unit *todata,*fromdata; MPI_Datatype dtype; ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); verbose = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-verbose",&verbose,NULL);CHKERRQ(ierr); build_twosided_f = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-build_twosided_f",&build_twosided_f,NULL);CHKERRQ(ierr); for (i=1,nto=0; i<size; i*=2) nto++; ierr = PetscMalloc2(nto,&todata,nto,&toranks);CHKERRQ(ierr); for (n=0,i=1; i<size; n++,i*=2) { toranks[n] = (rank+i) % size; todata[n].rank = (rank+i) % size; todata[n].value = (PetscScalar)rank; todata[n].ok[0] = 'o'; todata[n].ok[1] = 'k'; todata[n].ok[2] = 0; } if (verbose) { for (i=0; i<nto; i++) { ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] TO %d: {%D, %g, \"%s\"}\n",rank,toranks[i],todata[i].rank,(double)PetscRealPart(todata[i].value),todata[i].ok);CHKERRQ(ierr); } ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); } ierr = MakeDatatype(&dtype);CHKERRQ(ierr); if (build_twosided_f) { struct FCtx fctx; PetscMPIInt *todummy,*fromdummy; fctx.rank = rank; fctx.nto = nto; fctx.toranks = toranks; fctx.todata = todata; ierr = PetscSegBufferCreate(sizeof(Unit),1,&fctx.seg);CHKERRQ(ierr); ierr = PetscMalloc1(nto,&todummy);CHKERRQ(ierr); for (i=0; i<nto; i++) todummy[i] = rank; ierr = PetscCommBuildTwoSidedF(PETSC_COMM_WORLD,1,MPI_INT,nto,toranks,todummy,&nfrom,&fromranks,&fromdummy,2,FSend,FRecv,&fctx);CHKERRQ(ierr); ierr = PetscFree(todummy);CHKERRQ(ierr); ierr = PetscFree(fromdummy);CHKERRQ(ierr); ierr = PetscSegBufferExtractAlloc(fctx.seg,&fromdata);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&fctx.seg);CHKERRQ(ierr); } else { ierr = PetscCommBuildTwoSided(PETSC_COMM_WORLD,1,dtype,nto,toranks,todata,&nfrom,&fromranks,&fromdata);CHKERRQ(ierr); } ierr = MPI_Type_free(&dtype);CHKERRQ(ierr); if (verbose) { PetscInt *iranks,*iperm; ierr = PetscMalloc2(nfrom,&iranks,nfrom,&iperm);CHKERRQ(ierr); for (i=0; i<nfrom; i++) { iranks[i] = fromranks[i]; iperm[i] = i; } /* Receive ordering is non-deterministic in general, so sort to make verbose output deterministic. */ ierr = PetscSortIntWithPermutation(nfrom,iranks,iperm);CHKERRQ(ierr); for (i=0; i<nfrom; i++) { PetscInt ip = iperm[i]; ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] FROM %d: {%D, %g, \"%s\"}\n",rank,fromranks[ip],fromdata[ip].rank,(double)PetscRealPart(fromdata[ip].value),fromdata[ip].ok);CHKERRQ(ierr); } ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);CHKERRQ(ierr); ierr = PetscFree2(iranks,iperm);CHKERRQ(ierr); } if (nto != nfrom) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"[%d] From ranks %d does not match To ranks %d",rank,nto,nfrom); for (i=1; i<size; i*=2) { PetscMPIInt expected_rank = (rank-i+size)%size; PetscBool flg; for (n=0; n<nfrom; n++) { if (expected_rank == fromranks[n]) goto found; } SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"[%d] Could not find expected from rank %d",rank,expected_rank); found: if (PetscRealPart(fromdata[n].value) != expected_rank) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"[%d] Got data %g from rank %d",rank,(double)PetscRealPart(fromdata[n].value),expected_rank); ierr = PetscStrcmp(fromdata[n].ok,"ok",&flg);CHKERRQ(ierr); if (!flg) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"[%d] Got string %s from rank %d",rank,fromdata[n].ok,expected_rank); } ierr = PetscFree2(todata,toranks);CHKERRQ(ierr); ierr = PetscFree(fromdata);CHKERRQ(ierr); ierr = PetscFree(fromranks);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
static PetscErrorCode triangulateAndFormProl(IS selected_2,PetscInt data_stride,PetscReal coords[],PetscInt nselected_1,const PetscInt clid_lid_1[],const PetscCoarsenData *agg_lists_1, const PetscInt crsGID[],PetscInt bs,Mat a_Prol,PetscReal *a_worst_best) { #if defined(PETSC_HAVE_TRIANGLE) PetscErrorCode ierr; PetscInt jj,tid,tt,idx,nselected_2; struct triangulateio in,mid; const PetscInt *selected_idx_2; PetscMPIInt rank; PetscInt Istart,Iend,nFineLoc,myFine0; int kk,nPlotPts,sid; MPI_Comm comm; PetscReal tm; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)a_Prol,&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = ISGetSize(selected_2, &nselected_2);CHKERRQ(ierr); if (nselected_2 == 1 || nselected_2 == 2) { /* 0 happens on idle processors */ *a_worst_best = 100.0; /* this will cause a stop, but not globalized (should not happen) */ } else *a_worst_best = 0.0; ierr = MPI_Allreduce(a_worst_best, &tm, 1, MPIU_REAL, MPIU_MAX, comm);CHKERRQ(ierr); if (tm > 0.0) { *a_worst_best = 100.0; PetscFunctionReturn(0); } ierr = MatGetOwnershipRange(a_Prol, &Istart, &Iend);CHKERRQ(ierr); nFineLoc = (Iend-Istart)/bs; myFine0 = Istart/bs; nPlotPts = nFineLoc; /* locals */ /* traingle */ /* Define input points - in*/ in.numberofpoints = nselected_2; in.numberofpointattributes = 0; /* get nselected points */ ierr = PetscMalloc1(2*nselected_2, &in.pointlist);CHKERRQ(ierr); ierr = ISGetIndices(selected_2, &selected_idx_2);CHKERRQ(ierr); for (kk=0,sid=0; kk<nselected_2; kk++,sid += 2) { PetscInt lid = selected_idx_2[kk]; in.pointlist[sid] = coords[lid]; in.pointlist[sid+1] = coords[data_stride + lid]; if (lid>=nFineLoc) nPlotPts++; } if (sid != 2*nselected_2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sid %D != 2*nselected_2 %D",sid,nselected_2); in.numberofsegments = 0; in.numberofedges = 0; in.numberofholes = 0; in.numberofregions = 0; in.trianglelist = 0; in.segmentmarkerlist = 0; in.pointattributelist = 0; in.pointmarkerlist = 0; in.triangleattributelist = 0; in.trianglearealist = 0; in.segmentlist = 0; in.holelist = 0; in.regionlist = 0; in.edgelist = 0; in.edgemarkerlist = 0; in.normlist = 0; /* triangulate */ mid.pointlist = 0; /* Not needed if -N switch used. */ /* Not needed if -N switch used or number of point attributes is zero: */ mid.pointattributelist = 0; mid.pointmarkerlist = 0; /* Not needed if -N or -B switch used. */ mid.trianglelist = 0; /* Not needed if -E switch used. */ /* Not needed if -E switch used or number of triangle attributes is zero: */ mid.triangleattributelist = 0; mid.neighborlist = 0; /* Needed only if -n switch used. */ /* Needed only if segments are output (-p or -c) and -P not used: */ mid.segmentlist = 0; /* Needed only if segments are output (-p or -c) and -P and -B not used: */ mid.segmentmarkerlist = 0; mid.edgelist = 0; /* Needed only if -e switch used. */ mid.edgemarkerlist = 0; /* Needed if -e used and -B not used. */ mid.numberoftriangles = 0; /* Triangulate the points. Switches are chosen to read and write a */ /* PSLG (p), preserve the convex hull (c), number everything from */ /* zero (z), assign a regional attribute to each element (A), and */ /* produce an edge list (e), a Voronoi diagram (v), and a triangle */ /* neighbor list (n). */ if (nselected_2 != 0) { /* inactive processor */ char args[] = "npczQ"; /* c is needed ? */ triangulate(args, &in, &mid, (struct triangulateio*) NULL); /* output .poly files for 'showme' */ if (!PETSC_TRUE) { static int level = 1; FILE *file; char fname[32]; sprintf(fname,"C%d_%d.poly",level,rank); file = fopen(fname, "w"); /*First line: <# of vertices> <dimension (must be 2)> <# of attributes> <# of boundary markers (0 or 1)>*/ fprintf(file, "%d %d %d %d\n",in.numberofpoints,2,0,0); /*Following lines: <vertex #> <x> <y> */ for (kk=0,sid=0; kk<in.numberofpoints; kk++,sid += 2) { fprintf(file, "%d %e %e\n",kk,in.pointlist[sid],in.pointlist[sid+1]); } /*One line: <# of segments> <# of boundary markers (0 or 1)> */ fprintf(file, "%d %d\n",0,0); /*Following lines: <segment #> <endpoint> <endpoint> [boundary marker] */ /* One line: <# of holes> */ fprintf(file, "%d\n",0); /* Following lines: <hole #> <x> <y> */ /* Optional line: <# of regional attributes and/or area constraints> */ /* Optional following lines: <region #> <x> <y> <attribute> <maximum area> */ fclose(file); /* elems */ sprintf(fname,"C%d_%d.ele",level,rank); file = fopen(fname, "w"); /* First line: <# of triangles> <nodes per triangle> <# of attributes> */ fprintf(file, "%d %d %d\n",mid.numberoftriangles,3,0); /* Remaining lines: <triangle #> <node> <node> <node> ... [attributes] */ for (kk=0,sid=0; kk<mid.numberoftriangles; kk++,sid += 3) { fprintf(file, "%d %d %d %d\n",kk,mid.trianglelist[sid],mid.trianglelist[sid+1],mid.trianglelist[sid+2]); } fclose(file); sprintf(fname,"C%d_%d.node",level,rank); file = fopen(fname, "w"); /* First line: <# of vertices> <dimension (must be 2)> <# of attributes> <# of boundary markers (0 or 1)> */ /* fprintf(file, "%d %d %d %d\n",in.numberofpoints,2,0,0); */ fprintf(file, "%d %d %d %d\n",nPlotPts,2,0,0); /*Following lines: <vertex #> <x> <y> */ for (kk=0,sid=0; kk<in.numberofpoints; kk++,sid+=2) { fprintf(file, "%d %e %e\n",kk,in.pointlist[sid],in.pointlist[sid+1]); } sid /= 2; for (jj=0; jj<nFineLoc; jj++) { PetscBool sel = PETSC_TRUE; for (kk=0; kk<nselected_2 && sel; kk++) { PetscInt lid = selected_idx_2[kk]; if (lid == jj) sel = PETSC_FALSE; } if (sel) fprintf(file, "%d %e %e\n",sid++,coords[jj],coords[data_stride + jj]); } fclose(file); if (sid != nPlotPts) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sid %D != nPlotPts %D",sid,nPlotPts); level++; } } #if defined PETSC_GAMG_USE_LOG ierr = PetscLogEventBegin(petsc_gamg_setup_events[FIND_V],0,0,0,0);CHKERRQ(ierr); #endif { /* form P - setup some maps */ PetscInt clid,mm,*nTri,*node_tri; ierr = PetscMalloc2(nselected_2, &node_tri,nselected_2, &nTri);CHKERRQ(ierr); /* need list of triangles on node */ for (kk=0; kk<nselected_2; kk++) nTri[kk] = 0; for (tid=0,kk=0; tid<mid.numberoftriangles; tid++) { for (jj=0; jj<3; jj++) { PetscInt cid = mid.trianglelist[kk++]; if (nTri[cid] == 0) node_tri[cid] = tid; nTri[cid]++; } } #define EPS 1.e-12 /* find points and set prolongation */ for (mm = clid = 0; mm < nFineLoc; mm++) { PetscBool ise; ierr = PetscCDEmptyAt(agg_lists_1,mm,&ise);CHKERRQ(ierr); if (!ise) { const PetscInt lid = mm; /* for (clid_iterator=0;clid_iterator<nselected_1;clid_iterator++) { */ PetscScalar AA[3][3]; PetscBLASInt N=3,NRHS=1,LDA=3,IPIV[3],LDB=3,INFO; PetscCDPos pos; ierr = PetscCDGetHeadPos(agg_lists_1,lid,&pos);CHKERRQ(ierr); while (pos) { PetscInt flid; ierr = PetscLLNGetID(pos, &flid);CHKERRQ(ierr); ierr = PetscCDGetNextPos(agg_lists_1,lid,&pos);CHKERRQ(ierr); if (flid < nFineLoc) { /* could be a ghost */ PetscInt bestTID = -1; PetscReal best_alpha = 1.e10; const PetscInt fgid = flid + myFine0; /* compute shape function for gid */ const PetscReal fcoord[3] = {coords[flid],coords[data_stride+flid],1.0}; PetscBool haveit =PETSC_FALSE; PetscScalar alpha[3]; PetscInt clids[3]; /* look for it */ for (tid = node_tri[clid], jj=0; jj < 5 && !haveit && tid != -1; jj++) { for (tt=0; tt<3; tt++) { PetscInt cid2 = mid.trianglelist[3*tid + tt]; PetscInt lid2 = selected_idx_2[cid2]; AA[tt][0] = coords[lid2]; AA[tt][1] = coords[data_stride + lid2]; AA[tt][2] = 1.0; clids[tt] = cid2; /* store for interp */ } for (tt=0; tt<3; tt++) alpha[tt] = (PetscScalar)fcoord[tt]; /* SUBROUTINE DGESV(N, NRHS, A, LDA, IPIV, B, LDB, INFO) */ PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&N, &NRHS, (PetscScalar*)AA, &LDA, IPIV, alpha, &LDB, &INFO)); { PetscBool have=PETSC_TRUE; PetscReal lowest=1.e10; for (tt = 0, idx = 0; tt < 3; tt++) { if (PetscRealPart(alpha[tt]) > (1.0+EPS) || PetscRealPart(alpha[tt]) < -EPS) have = PETSC_FALSE; if (PetscRealPart(alpha[tt]) < lowest) { lowest = PetscRealPart(alpha[tt]); idx = tt; } } haveit = have; } tid = mid.neighborlist[3*tid + idx]; } if (!haveit) { /* brute force */ for (tid=0; tid<mid.numberoftriangles && !haveit; tid++) { for (tt=0; tt<3; tt++) { PetscInt cid2 = mid.trianglelist[3*tid + tt]; PetscInt lid2 = selected_idx_2[cid2]; AA[tt][0] = coords[lid2]; AA[tt][1] = coords[data_stride + lid2]; AA[tt][2] = 1.0; clids[tt] = cid2; /* store for interp */ } for (tt=0; tt<3; tt++) alpha[tt] = fcoord[tt]; /* SUBROUTINE DGESV(N, NRHS, A, LDA, IPIV, B, LDB, INFO) */ PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&N, &NRHS, (PetscScalar*)AA, &LDA, IPIV, alpha, &LDB, &INFO)); { PetscBool have=PETSC_TRUE; PetscReal worst=0.0, v; for (tt=0; tt<3 && have; tt++) { if (PetscRealPart(alpha[tt]) > 1.0+EPS || PetscRealPart(alpha[tt]) < -EPS) have=PETSC_FALSE; if ((v=PetscAbs(PetscRealPart(alpha[tt])-0.5)) > worst) worst = v; } if (worst < best_alpha) { best_alpha = worst; bestTID = tid; } haveit = have; } } } if (!haveit) { if (best_alpha > *a_worst_best) *a_worst_best = best_alpha; /* use best one */ for (tt=0; tt<3; tt++) { PetscInt cid2 = mid.trianglelist[3*bestTID + tt]; PetscInt lid2 = selected_idx_2[cid2]; AA[tt][0] = coords[lid2]; AA[tt][1] = coords[data_stride + lid2]; AA[tt][2] = 1.0; clids[tt] = cid2; /* store for interp */ } for (tt=0; tt<3; tt++) alpha[tt] = fcoord[tt]; /* SUBROUTINE DGESV(N, NRHS, A, LDA, IPIV, B, LDB, INFO) */ PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&N, &NRHS, (PetscScalar*)AA, &LDA, IPIV, alpha, &LDB, &INFO)); } /* put in row of P */ for (idx=0; idx<3; idx++) { PetscScalar shp = alpha[idx]; if (PetscAbs(PetscRealPart(shp)) > 1.e-6) { PetscInt cgid = crsGID[clids[idx]]; PetscInt jj = cgid*bs, ii = fgid*bs; /* need to gloalize */ for (tt=0; tt < bs; tt++, ii++, jj++) { ierr = MatSetValues(a_Prol,1,&ii,1,&jj,&shp,INSERT_VALUES);CHKERRQ(ierr); } } } } } /* aggregates iterations */ clid++; } /* a coarse agg */ } /* for all fine nodes */ ierr = ISRestoreIndices(selected_2, &selected_idx_2);CHKERRQ(ierr); ierr = MatAssemblyBegin(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscFree2(node_tri,nTri);CHKERRQ(ierr); } #if defined PETSC_GAMG_USE_LOG ierr = PetscLogEventEnd(petsc_gamg_setup_events[FIND_V],0,0,0,0);CHKERRQ(ierr); #endif free(mid.trianglelist); free(mid.neighborlist); ierr = PetscFree(in.pointlist);CHKERRQ(ierr); PetscFunctionReturn(0); #else SETERRQ(PetscObjectComm((PetscObject)a_Prol),PETSC_ERR_PLIB,"configure with TRIANGLE to use geometric MG"); #endif }