Exemplo n.º 1
0
PetscErrorCode ISSortRemoveDups_General(IS is)
{
  IS_General     *sub = (IS_General*)is->data;
  PetscInt       n;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  if (sub->sorted) PetscFunctionReturn(0);
  ierr = PetscLayoutGetLocalSize(is->map, &n);CHKERRQ(ierr);
  ierr = PetscSortRemoveDupsInt(&n,sub->idx);CHKERRQ(ierr);
  ierr = PetscLayoutSetLocalSize(is->map, n);CHKERRQ(ierr);
  sub->sorted = PETSC_TRUE;
  PetscFunctionReturn(0);
}
Exemplo n.º 2
0
Arquivo: block.c Projeto: petsc/petsc
static PetscErrorCode ISSortRemoveDups_Block(IS is)
{
    IS_Block       *sub = (IS_Block*)is->data;
    PetscInt       bs, n, nb;
    PetscErrorCode ierr;

    PetscFunctionBegin;
    if (sub->sorted) PetscFunctionReturn(0);
    ierr = PetscLayoutGetBlockSize(is->map, &bs);
    CHKERRQ(ierr);
    ierr = PetscLayoutGetLocalSize(is->map, &n);
    CHKERRQ(ierr);
    nb   = n/bs;
    ierr = PetscSortRemoveDupsInt(&nb,sub->idx);
    CHKERRQ(ierr);
    ierr = PetscLayoutSetLocalSize(is->map, nb*bs);
    CHKERRQ(ierr);
    sub->sorted = PETSC_TRUE;
    PetscFunctionReturn(0);
}
Exemplo n.º 3
0
/* get adjacencies due to point-to-point constraints that can't be found with DMPlexGetAdjacency() */
static PetscErrorCode DMPlexComputeAnchorAdjacencies(DM dm, PetscSection section, PetscSection sectionGlobal, PetscSection *anchorSectionAdj, PetscInt *anchorAdj[])
{
  PetscInt       pStart, pEnd;
  PetscSection   adjSec, aSec;
  IS             aIS;
  PetscErrorCode ierr;

  PetscFunctionBegin;

  ierr = PetscSectionCreate(PetscObjectComm((PetscObject)section),&adjSec);CHKERRQ(ierr);
  ierr = PetscSectionGetChart(section,&pStart,&pEnd);CHKERRQ(ierr);
  ierr = PetscSectionSetChart(adjSec,pStart,pEnd);CHKERRQ(ierr);

  ierr = DMPlexGetAnchors(dm,&aSec,&aIS);CHKERRQ(ierr);
  if (aSec) {
    const PetscInt *anchors;
    PetscInt       p, q, a, aSize, *offsets, aStart, aEnd, *inverse, iSize, *adj, adjSize;
    PetscInt       *tmpAdjP = NULL, *tmpAdjQ = NULL;
    PetscSection   inverseSec;

    /* invert the constraint-to-anchor map */
    ierr = PetscSectionCreate(PetscObjectComm((PetscObject)aSec),&inverseSec);CHKERRQ(ierr);
    ierr = PetscSectionSetChart(inverseSec,pStart,pEnd);CHKERRQ(ierr);
    ierr = ISGetLocalSize(aIS, &aSize);CHKERRQ(ierr);
    ierr = ISGetIndices(aIS, &anchors);CHKERRQ(ierr);

    for (p = 0; p < aSize; p++) {
      PetscInt a = anchors[p];

      ierr = PetscSectionAddDof(inverseSec,a,1);CHKERRQ(ierr);
    }
    ierr = PetscSectionSetUp(inverseSec);CHKERRQ(ierr);
    ierr = PetscSectionGetStorageSize(inverseSec,&iSize);CHKERRQ(ierr);
    ierr = PetscMalloc1(iSize,&inverse);CHKERRQ(ierr);
    ierr = PetscCalloc1(pEnd-pStart,&offsets);CHKERRQ(ierr);
    ierr = PetscSectionGetChart(aSec,&aStart,&aEnd);CHKERRQ(ierr);
    for (p = aStart; p < aEnd; p++) {
      PetscInt dof, off;

      ierr = PetscSectionGetDof(aSec, p, &dof);CHKERRQ(ierr);
      ierr = PetscSectionGetOffset(aSec, p, &off);CHKERRQ(ierr);

      for (q = 0; q < dof; q++) {
        PetscInt iOff;

        a = anchors[off + q];
        ierr = PetscSectionGetOffset(inverseSec, a, &iOff);CHKERRQ(ierr);
        inverse[iOff + offsets[a-pStart]++] = p;
      }
    }
    ierr = ISRestoreIndices(aIS, &anchors);CHKERRQ(ierr);
    ierr = PetscFree(offsets);CHKERRQ(ierr);

    /* construct anchorAdj and adjSec
     *
     * loop over anchors:
     *   construct anchor adjacency
     *   loop over constrained:
     *     construct constrained adjacency
     *     if not in anchor adjacency, add to dofs
     * setup adjSec, allocate anchorAdj
     * loop over anchors:
     *   construct anchor adjacency
     *   loop over constrained:
     *     construct constrained adjacency
     *     if not in anchor adjacency
     *       if not already in list, put in list
     *   sort, unique, reduce dof count
     * optional: compactify
     */
    for (p = pStart; p < pEnd; p++) {
      PetscInt iDof, iOff, i, r, s, numAdjP = PETSC_DETERMINE;

      ierr = PetscSectionGetDof(inverseSec,p,&iDof);CHKERRQ(ierr);
      if (!iDof) continue;
      ierr = PetscSectionGetOffset(inverseSec,p,&iOff);CHKERRQ(ierr);
      ierr = DMPlexGetAdjacency(dm,p,&numAdjP,&tmpAdjP);CHKERRQ(ierr);
      for (i = 0; i < iDof; i++) {
        PetscInt iNew = 0, qAdj, qAdjDof, qAdjCDof, numAdjQ = PETSC_DETERMINE;

        q = inverse[iOff + i];
        ierr = DMPlexGetAdjacency(dm,q,&numAdjQ,&tmpAdjQ);CHKERRQ(ierr);
        for (r = 0; r < numAdjQ; r++) {
          qAdj = tmpAdjQ[r];
          if ((qAdj < pStart) || (qAdj >= pEnd)) continue;
          for (s = 0; s < numAdjP; s++) {
            if (qAdj == tmpAdjP[s]) break;
          }
          if (s < numAdjP) continue;
          ierr  = PetscSectionGetDof(section,qAdj,&qAdjDof);CHKERRQ(ierr);
          ierr  = PetscSectionGetConstraintDof(section,qAdj,&qAdjCDof);CHKERRQ(ierr);
          iNew += qAdjDof - qAdjCDof;
        }
        ierr = PetscSectionAddDof(adjSec,p,iNew);CHKERRQ(ierr);
      }
    }

    ierr = PetscSectionSetUp(adjSec);CHKERRQ(ierr);
    ierr = PetscSectionGetStorageSize(adjSec,&adjSize);CHKERRQ(ierr);
    ierr = PetscMalloc1(adjSize,&adj);CHKERRQ(ierr);

    for (p = pStart; p < pEnd; p++) {
      PetscInt iDof, iOff, i, r, s, aOff, aOffOrig, aDof, numAdjP = PETSC_DETERMINE;

      ierr = PetscSectionGetDof(inverseSec,p,&iDof);CHKERRQ(ierr);
      if (!iDof) continue;
      ierr = PetscSectionGetOffset(inverseSec,p,&iOff);CHKERRQ(ierr);
      ierr = DMPlexGetAdjacency(dm,p,&numAdjP,&tmpAdjP);CHKERRQ(ierr);
      ierr = PetscSectionGetDof(adjSec,p,&aDof);CHKERRQ(ierr);
      ierr = PetscSectionGetOffset(adjSec,p,&aOff);CHKERRQ(ierr);
      aOffOrig = aOff;
      for (i = 0; i < iDof; i++) {
        PetscInt qAdj, qAdjDof, qAdjCDof, qAdjOff, nd, numAdjQ = PETSC_DETERMINE;

        q = inverse[iOff + i];
        ierr = DMPlexGetAdjacency(dm,q,&numAdjQ,&tmpAdjQ);CHKERRQ(ierr);
        for (r = 0; r < numAdjQ; r++) {
          qAdj = tmpAdjQ[r];
          if ((qAdj < pStart) || (qAdj >= pEnd)) continue;
          for (s = 0; s < numAdjP; s++) {
            if (qAdj == tmpAdjP[s]) break;
          }
          if (s < numAdjP) continue;
          ierr  = PetscSectionGetDof(section,qAdj,&qAdjDof);CHKERRQ(ierr);
          ierr  = PetscSectionGetConstraintDof(section,qAdj,&qAdjCDof);CHKERRQ(ierr);
          ierr  = PetscSectionGetOffset(sectionGlobal,qAdj,&qAdjOff);CHKERRQ(ierr);
          for (nd = 0; nd < qAdjDof-qAdjCDof; ++nd) {
            adj[aOff++] = (qAdjOff < 0 ? -(qAdjOff+1) : qAdjOff) + nd;
          }
        }
      }
      ierr = PetscSortRemoveDupsInt(&aDof,&adj[aOffOrig]);CHKERRQ(ierr);
      ierr = PetscSectionSetDof(adjSec,p,aDof);CHKERRQ(ierr);
    }
    *anchorAdj = adj;

    /* clean up */
    ierr = PetscSectionDestroy(&inverseSec);CHKERRQ(ierr);
    ierr = PetscFree(inverse);CHKERRQ(ierr);
    ierr = PetscFree(tmpAdjP);CHKERRQ(ierr);
    ierr = PetscFree(tmpAdjQ);CHKERRQ(ierr);
  }
  else {
    *anchorAdj = NULL;
    ierr = PetscSectionSetUp(adjSec);CHKERRQ(ierr);
  }
  *anchorSectionAdj = adjSec;
  PetscFunctionReturn(0);
}
Exemplo n.º 4
0
PetscErrorCode DMPlexPreallocateOperator(DM dm, PetscInt bs, PetscSection section, PetscSection sectionGlobal, PetscInt dnz[], PetscInt onz[], PetscInt dnzu[], PetscInt onzu[], Mat A, PetscBool fillMatrix)
{
  MPI_Comm           comm;
  MatType            mtype;
  PetscSF            sf, sfDof, sfAdj;
  PetscSection       leafSectionAdj, rootSectionAdj, sectionAdj, anchorSectionAdj;
  PetscInt           nroots, nleaves, l, p;
  const PetscInt    *leaves;
  const PetscSFNode *remotes;
  PetscInt           dim, pStart, pEnd, numDof, globalOffStart, globalOffEnd, numCols;
  PetscInt          *tmpAdj = NULL, *adj, *rootAdj, *anchorAdj = NULL, *cols, *remoteOffsets;
  PetscInt           adjSize;
  PetscLayout        rLayout;
  PetscInt           locRows, rStart, rEnd, r;
  PetscMPIInt        size;
  PetscBool          doCommLocal, doComm, debug = PETSC_FALSE, isSymBlock, isSymSeqBlock, isSymMPIBlock;
  PetscBool          useAnchors;
  PetscErrorCode     ierr;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
  PetscValidHeaderSpecific(section, PETSC_SECTION_CLASSID, 3);
  PetscValidHeaderSpecific(sectionGlobal, PETSC_SECTION_CLASSID, 4);
  PetscValidHeaderSpecific(A, MAT_CLASSID, 9);
  if (dnz)  PetscValidPointer(dnz,5);
  if (onz)  PetscValidPointer(onz,6);
  if (dnzu) PetscValidPointer(dnzu,7);
  if (onzu) PetscValidPointer(onzu,8);
  ierr = PetscLogEventBegin(DMPLEX_Preallocate,dm,0,0,0);CHKERRQ(ierr);
  ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr);
  ierr = PetscOptionsGetBool(NULL, "-dm_view_preallocation", &debug, NULL);CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
  ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr);
  ierr = DMGetPointSF(dm, &sf);CHKERRQ(ierr);
  ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
  doCommLocal = (size > 1) && (nroots >= 0) ? PETSC_TRUE : PETSC_FALSE;
  ierr = MPI_Allreduce(&doCommLocal, &doComm, 1, MPIU_BOOL, MPI_LAND, comm);CHKERRQ(ierr);
  /* Create dof SF based on point SF */
  if (debug) {
    ierr = PetscPrintf(comm, "Input Section for Preallocation:\n");CHKERRQ(ierr);
    ierr = PetscSectionView(section, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
    ierr = PetscPrintf(comm, "Input Global Section for Preallocation:\n");CHKERRQ(ierr);
    ierr = PetscSectionView(sectionGlobal, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
    ierr = PetscPrintf(comm, "Input SF for Preallocation:\n");CHKERRQ(ierr);
    ierr = PetscSFView(sf, NULL);CHKERRQ(ierr);
  }
  ierr = PetscSFCreateRemoteOffsets(sf, section, section, &remoteOffsets);CHKERRQ(ierr);
  ierr = PetscSFCreateSectionSF(sf, section, remoteOffsets, section, &sfDof);CHKERRQ(ierr);
  if (debug) {
    ierr = PetscPrintf(comm, "Dof SF for Preallocation:\n");CHKERRQ(ierr);
    ierr = PetscSFView(sfDof, NULL);CHKERRQ(ierr);
  }
  /* Create section for dof adjacency (dof ==> # adj dof) */
  ierr = PetscSectionGetChart(section, &pStart, &pEnd);CHKERRQ(ierr);
  ierr = PetscSectionGetStorageSize(section, &numDof);CHKERRQ(ierr);
  ierr = PetscSectionCreate(comm, &leafSectionAdj);CHKERRQ(ierr);
  ierr = PetscSectionSetChart(leafSectionAdj, 0, numDof);CHKERRQ(ierr);
  ierr = PetscSectionCreate(comm, &rootSectionAdj);CHKERRQ(ierr);
  ierr = PetscSectionSetChart(rootSectionAdj, 0, numDof);CHKERRQ(ierr);
  /*   Fill in the ghost dofs on the interface */
  ierr = PetscSFGetGraph(sf, NULL, &nleaves, &leaves, &remotes);CHKERRQ(ierr);
  /* use constraints in finding adjacency in this routine */
  ierr = DMPlexGetAdjacencyUseAnchors(dm,&useAnchors);CHKERRQ(ierr);
  ierr = DMPlexSetAdjacencyUseAnchors(dm,PETSC_TRUE);CHKERRQ(ierr);

  /*
   section        - maps points to (# dofs, local dofs)
   sectionGlobal  - maps points to (# dofs, global dofs)
   leafSectionAdj - maps unowned local dofs to # adj dofs
   rootSectionAdj - maps   owned local dofs to # adj dofs
   adj            - adj global dofs indexed by leafSectionAdj
   rootAdj        - adj global dofs indexed by rootSectionAdj
   sf    - describes shared points across procs
   sfDof - describes shared dofs across procs
   sfAdj - describes shared adjacent dofs across procs
   ** The bootstrapping process involves six rounds with similar structure of visiting neighbors of each point.
  (0). If there are point-to-point constraints, add the adjacencies of constrained points to anchors in anchorAdj
       (This is done in DMPlexComputeAnchorAdjacencies())
    1. Visit unowned points on interface, count adjacencies placing in leafSectionAdj
       Reduce those counts to rootSectionAdj (now redundantly counting some interface points)
    2. Visit owned points on interface, count adjacencies placing in rootSectionAdj
       Create sfAdj connecting rootSectionAdj and leafSectionAdj
    3. Visit unowned points on interface, write adjacencies to adj
       Gather adj to rootAdj (note that there is redundancy in rootAdj when multiple procs find the same adjacencies)
    4. Visit owned points on interface, write adjacencies to rootAdj
       Remove redundancy in rootAdj
   ** The last two traversals use transitive closure
    5. Visit all owned points in the subdomain, count dofs for each point (sectionAdj)
       Allocate memory addressed by sectionAdj (cols)
    6. Visit all owned points in the subdomain, insert dof adjacencies into cols
   ** Knowing all the column adjacencies, check ownership and sum into dnz and onz
  */

  ierr = DMPlexComputeAnchorAdjacencies(dm,section,sectionGlobal,&anchorSectionAdj,&anchorAdj);CHKERRQ(ierr);

  for (l = 0; l < nleaves; ++l) {
    PetscInt dof, off, d, q, anDof;
    PetscInt p = leaves[l], numAdj = PETSC_DETERMINE;

    if ((p < pStart) || (p >= pEnd)) continue;
    ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr);
    ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr);
    ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr);
    for (q = 0; q < numAdj; ++q) {
      const PetscInt padj = tmpAdj[q];
      PetscInt ndof, ncdof;

      if ((padj < pStart) || (padj >= pEnd)) continue;
      ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr);
      ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr);
      for (d = off; d < off+dof; ++d) {
        ierr = PetscSectionAddDof(leafSectionAdj, d, ndof-ncdof);CHKERRQ(ierr);
      }
    }
    ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr);
    if (anDof) {
      for (d = off; d < off+dof; ++d) {
        ierr = PetscSectionAddDof(leafSectionAdj, d, anDof);CHKERRQ(ierr);
      }
    }
  }
  ierr = PetscSectionSetUp(leafSectionAdj);CHKERRQ(ierr);
  if (debug) {
    ierr = PetscPrintf(comm, "Adjacency Section for Preallocation on Leaves:\n");CHKERRQ(ierr);
    ierr = PetscSectionView(leafSectionAdj, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  }
  /* Get maximum remote adjacency sizes for owned dofs on interface (roots) */
  if (doComm) {
    ierr = PetscSFReduceBegin(sfDof, MPIU_INT, leafSectionAdj->atlasDof, rootSectionAdj->atlasDof, MPI_SUM);CHKERRQ(ierr);
    ierr = PetscSFReduceEnd(sfDof, MPIU_INT, leafSectionAdj->atlasDof, rootSectionAdj->atlasDof, MPI_SUM);CHKERRQ(ierr);
  }
  if (debug) {
    ierr = PetscPrintf(comm, "Adjancency Section for Preallocation on Roots:\n");CHKERRQ(ierr);
    ierr = PetscSectionView(rootSectionAdj, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  }
  /* Add in local adjacency sizes for owned dofs on interface (roots) */
  for (p = pStart; p < pEnd; ++p) {
    PetscInt numAdj = PETSC_DETERMINE, adof, dof, off, d, q, anDof;

    ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr);
    ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr);
    if (!dof) continue;
    ierr = PetscSectionGetDof(rootSectionAdj, off, &adof);CHKERRQ(ierr);
    if (adof <= 0) continue;
    ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr);
    for (q = 0; q < numAdj; ++q) {
      const PetscInt padj = tmpAdj[q];
      PetscInt ndof, ncdof;

      if ((padj < pStart) || (padj >= pEnd)) continue;
      ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr);
      ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr);
      for (d = off; d < off+dof; ++d) {
        ierr = PetscSectionAddDof(rootSectionAdj, d, ndof-ncdof);CHKERRQ(ierr);
      }
    }
    ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr);
    if (anDof) {
      for (d = off; d < off+dof; ++d) {
        ierr = PetscSectionAddDof(rootSectionAdj, d, anDof);CHKERRQ(ierr);
      }
    }
  }
  ierr = PetscSectionSetUp(rootSectionAdj);CHKERRQ(ierr);
  if (debug) {
    ierr = PetscPrintf(comm, "Adjancency Section for Preallocation on Roots after local additions:\n");CHKERRQ(ierr);
    ierr = PetscSectionView(rootSectionAdj, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  }
  /* Create adj SF based on dof SF */
  ierr = PetscSFCreateRemoteOffsets(sfDof, rootSectionAdj, leafSectionAdj, &remoteOffsets);CHKERRQ(ierr);
  ierr = PetscSFCreateSectionSF(sfDof, rootSectionAdj, remoteOffsets, leafSectionAdj, &sfAdj);CHKERRQ(ierr);
  if (debug) {
    ierr = PetscPrintf(comm, "Adjacency SF for Preallocation:\n");CHKERRQ(ierr);
    ierr = PetscSFView(sfAdj, NULL);CHKERRQ(ierr);
  }
  ierr = PetscSFDestroy(&sfDof);CHKERRQ(ierr);
  /* Create leaf adjacency */
  ierr = PetscSectionSetUp(leafSectionAdj);CHKERRQ(ierr);
  ierr = PetscSectionGetStorageSize(leafSectionAdj, &adjSize);CHKERRQ(ierr);
  ierr = PetscCalloc1(adjSize, &adj);CHKERRQ(ierr);
  for (l = 0; l < nleaves; ++l) {
    PetscInt dof, off, d, q, anDof, anOff;
    PetscInt p = leaves[l], numAdj = PETSC_DETERMINE;

    if ((p < pStart) || (p >= pEnd)) continue;
    ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr);
    ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr);
    ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr);
    ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr);
    ierr = PetscSectionGetOffset(anchorSectionAdj, p, &anOff);CHKERRQ(ierr);
    for (d = off; d < off+dof; ++d) {
      PetscInt aoff, i = 0;

      ierr = PetscSectionGetOffset(leafSectionAdj, d, &aoff);CHKERRQ(ierr);
      for (q = 0; q < numAdj; ++q) {
        const PetscInt padj = tmpAdj[q];
        PetscInt ndof, ncdof, ngoff, nd;

        if ((padj < pStart) || (padj >= pEnd)) continue;
        ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr);
        ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr);
        ierr = PetscSectionGetOffset(sectionGlobal, padj, &ngoff);CHKERRQ(ierr);
        for (nd = 0; nd < ndof-ncdof; ++nd) {
          adj[aoff+i] = (ngoff < 0 ? -(ngoff+1) : ngoff) + nd;
          ++i;
        }
      }
      for (q = 0; q < anDof; q++) {
        adj[aoff+i] = anchorAdj[anOff+q];
        ++i;
      }
    }
  }
  /* Debugging */
  if (debug) {
    IS tmp;
    ierr = PetscPrintf(comm, "Leaf adjacency indices\n");CHKERRQ(ierr);
    ierr = ISCreateGeneral(comm, adjSize, adj, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr);
    ierr = ISView(tmp, NULL);CHKERRQ(ierr);
    ierr = ISDestroy(&tmp);CHKERRQ(ierr);
  }
  /* Gather adjacenct indices to root */
  ierr = PetscSectionGetStorageSize(rootSectionAdj, &adjSize);CHKERRQ(ierr);
  ierr = PetscMalloc1(adjSize, &rootAdj);CHKERRQ(ierr);
  for (r = 0; r < adjSize; ++r) rootAdj[r] = -1;
  if (doComm) {
    ierr = PetscSFGatherBegin(sfAdj, MPIU_INT, adj, rootAdj);CHKERRQ(ierr);
    ierr = PetscSFGatherEnd(sfAdj, MPIU_INT, adj, rootAdj);CHKERRQ(ierr);
  }
  ierr = PetscSFDestroy(&sfAdj);CHKERRQ(ierr);
  ierr = PetscFree(adj);CHKERRQ(ierr);
  /* Debugging */
  if (debug) {
    IS tmp;
    ierr = PetscPrintf(comm, "Root adjacency indices after gather\n");CHKERRQ(ierr);
    ierr = ISCreateGeneral(comm, adjSize, rootAdj, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr);
    ierr = ISView(tmp, NULL);CHKERRQ(ierr);
    ierr = ISDestroy(&tmp);CHKERRQ(ierr);
  }
  /* Add in local adjacency indices for owned dofs on interface (roots) */
  for (p = pStart; p < pEnd; ++p) {
    PetscInt numAdj = PETSC_DETERMINE, adof, dof, off, d, q, anDof, anOff;

    ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr);
    ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr);
    if (!dof) continue;
    ierr = PetscSectionGetDof(rootSectionAdj, off, &adof);CHKERRQ(ierr);
    if (adof <= 0) continue;
    ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr);
    ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr);
    ierr = PetscSectionGetOffset(anchorSectionAdj, p, &anOff);CHKERRQ(ierr);
    for (d = off; d < off+dof; ++d) {
      PetscInt adof, aoff, i;

      ierr = PetscSectionGetDof(rootSectionAdj, d, &adof);CHKERRQ(ierr);
      ierr = PetscSectionGetOffset(rootSectionAdj, d, &aoff);CHKERRQ(ierr);
      i    = adof-1;
      for (q = 0; q < anDof; q++) {
        rootAdj[aoff+i] = anchorAdj[anOff+q];
        --i;
      }
      for (q = 0; q < numAdj; ++q) {
        const PetscInt padj = tmpAdj[q];
        PetscInt ndof, ncdof, ngoff, nd;

        if ((padj < pStart) || (padj >= pEnd)) continue;
        ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr);
        ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr);
        ierr = PetscSectionGetOffset(sectionGlobal, padj, &ngoff);CHKERRQ(ierr);
        for (nd = 0; nd < ndof-ncdof; ++nd) {
          rootAdj[aoff+i] = ngoff < 0 ? -(ngoff+1)+nd : ngoff+nd;
          --i;
        }
      }
    }
  }
  /* Debugging */
  if (debug) {
    IS tmp;
    ierr = PetscPrintf(comm, "Root adjacency indices\n");CHKERRQ(ierr);
    ierr = ISCreateGeneral(comm, adjSize, rootAdj, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr);
    ierr = ISView(tmp, NULL);CHKERRQ(ierr);
    ierr = ISDestroy(&tmp);CHKERRQ(ierr);
  }
  /* Compress indices */
  ierr = PetscSectionSetUp(rootSectionAdj);CHKERRQ(ierr);
  for (p = pStart; p < pEnd; ++p) {
    PetscInt dof, cdof, off, d;
    PetscInt adof, aoff;

    ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr);
    ierr = PetscSectionGetConstraintDof(section, p, &cdof);CHKERRQ(ierr);
    ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr);
    if (!dof) continue;
    ierr = PetscSectionGetDof(rootSectionAdj, off, &adof);CHKERRQ(ierr);
    if (adof <= 0) continue;
    for (d = off; d < off+dof-cdof; ++d) {
      ierr = PetscSectionGetDof(rootSectionAdj, d, &adof);CHKERRQ(ierr);
      ierr = PetscSectionGetOffset(rootSectionAdj, d, &aoff);CHKERRQ(ierr);
      ierr = PetscSortRemoveDupsInt(&adof, &rootAdj[aoff]);CHKERRQ(ierr);
      ierr = PetscSectionSetDof(rootSectionAdj, d, adof);CHKERRQ(ierr);
    }
  }
  /* Debugging */
  if (debug) {
    IS tmp;
    ierr = PetscPrintf(comm, "Adjancency Section for Preallocation on Roots after compression:\n");CHKERRQ(ierr);
    ierr = PetscSectionView(rootSectionAdj, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
    ierr = PetscPrintf(comm, "Root adjacency indices after compression\n");CHKERRQ(ierr);
    ierr = ISCreateGeneral(comm, adjSize, rootAdj, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr);
    ierr = ISView(tmp, NULL);CHKERRQ(ierr);
    ierr = ISDestroy(&tmp);CHKERRQ(ierr);
  }
  /* Build adjacency section: Maps global indices to sets of adjacent global indices */
  ierr = PetscSectionGetOffsetRange(sectionGlobal, &globalOffStart, &globalOffEnd);CHKERRQ(ierr);
  ierr = PetscSectionCreate(comm, &sectionAdj);CHKERRQ(ierr);
  ierr = PetscSectionSetChart(sectionAdj, globalOffStart, globalOffEnd);CHKERRQ(ierr);
  for (p = pStart; p < pEnd; ++p) {
    PetscInt  numAdj = PETSC_DETERMINE, dof, cdof, off, goff, d, q, anDof;
    PetscBool found  = PETSC_TRUE;

    ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr);
    ierr = PetscSectionGetConstraintDof(section, p, &cdof);CHKERRQ(ierr);
    ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr);
    ierr = PetscSectionGetOffset(sectionGlobal, p, &goff);CHKERRQ(ierr);
    for (d = 0; d < dof-cdof; ++d) {
      PetscInt ldof, rdof;

      ierr = PetscSectionGetDof(leafSectionAdj, off+d, &ldof);CHKERRQ(ierr);
      ierr = PetscSectionGetDof(rootSectionAdj, off+d, &rdof);CHKERRQ(ierr);
      if (ldof > 0) {
        /* We do not own this point */
      } else if (rdof > 0) {
        ierr = PetscSectionSetDof(sectionAdj, goff+d, rdof);CHKERRQ(ierr);
      } else {
        found = PETSC_FALSE;
      }
    }
    if (found) continue;
    ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr);
    ierr = PetscSectionGetOffset(sectionGlobal, p, &goff);CHKERRQ(ierr);
    ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr);
    for (q = 0; q < numAdj; ++q) {
      const PetscInt padj = tmpAdj[q];
      PetscInt ndof, ncdof, noff;

      if ((padj < pStart) || (padj >= pEnd)) continue;
      ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr);
      ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr);
      ierr = PetscSectionGetOffset(section, padj, &noff);CHKERRQ(ierr);
      for (d = goff; d < goff+dof-cdof; ++d) {
        ierr = PetscSectionAddDof(sectionAdj, d, ndof-ncdof);CHKERRQ(ierr);
      }
    }
    ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr);
    if (anDof) {
      for (d = goff; d < goff+dof-cdof; ++d) {
        ierr = PetscSectionAddDof(sectionAdj, d, anDof);CHKERRQ(ierr);
      }
    }
  }
  ierr = PetscSectionSetUp(sectionAdj);CHKERRQ(ierr);
  if (debug) {
    ierr = PetscPrintf(comm, "Adjacency Section for Preallocation:\n");CHKERRQ(ierr);
    ierr = PetscSectionView(sectionAdj, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  }
  /* Get adjacent indices */
  ierr = PetscSectionGetStorageSize(sectionAdj, &numCols);CHKERRQ(ierr);
  ierr = PetscMalloc1(numCols, &cols);CHKERRQ(ierr);
  for (p = pStart; p < pEnd; ++p) {
    PetscInt  numAdj = PETSC_DETERMINE, dof, cdof, off, goff, d, q, anDof, anOff;
    PetscBool found  = PETSC_TRUE;

    ierr = PetscSectionGetDof(section, p, &dof);CHKERRQ(ierr);
    ierr = PetscSectionGetConstraintDof(section, p, &cdof);CHKERRQ(ierr);
    ierr = PetscSectionGetOffset(section, p, &off);CHKERRQ(ierr);
    ierr = PetscSectionGetOffset(sectionGlobal, p, &goff);CHKERRQ(ierr);
    for (d = 0; d < dof-cdof; ++d) {
      PetscInt ldof, rdof;

      ierr = PetscSectionGetDof(leafSectionAdj, off+d, &ldof);CHKERRQ(ierr);
      ierr = PetscSectionGetDof(rootSectionAdj, off+d, &rdof);CHKERRQ(ierr);
      if (ldof > 0) {
        /* We do not own this point */
      } else if (rdof > 0) {
        PetscInt aoff, roff;

        ierr = PetscSectionGetOffset(sectionAdj, goff+d, &aoff);CHKERRQ(ierr);
        ierr = PetscSectionGetOffset(rootSectionAdj, off+d, &roff);CHKERRQ(ierr);
        ierr = PetscMemcpy(&cols[aoff], &rootAdj[roff], rdof * sizeof(PetscInt));CHKERRQ(ierr);
      } else {
        found = PETSC_FALSE;
      }
    }
    if (found) continue;
    ierr = DMPlexGetAdjacency(dm, p, &numAdj, &tmpAdj);CHKERRQ(ierr);
    ierr = PetscSectionGetDof(anchorSectionAdj, p, &anDof);CHKERRQ(ierr);
    ierr = PetscSectionGetOffset(anchorSectionAdj, p, &anOff);CHKERRQ(ierr);
    for (d = goff; d < goff+dof-cdof; ++d) {
      PetscInt adof, aoff, i = 0;

      ierr = PetscSectionGetDof(sectionAdj, d, &adof);CHKERRQ(ierr);
      ierr = PetscSectionGetOffset(sectionAdj, d, &aoff);CHKERRQ(ierr);
      for (q = 0; q < numAdj; ++q) {
        const PetscInt  padj = tmpAdj[q];
        PetscInt        ndof, ncdof, ngoff, nd;
        const PetscInt *ncind;

        /* Adjacent points may not be in the section chart */
        if ((padj < pStart) || (padj >= pEnd)) continue;
        ierr = PetscSectionGetDof(section, padj, &ndof);CHKERRQ(ierr);
        ierr = PetscSectionGetConstraintDof(section, padj, &ncdof);CHKERRQ(ierr);
        ierr = PetscSectionGetConstraintIndices(section, padj, &ncind);CHKERRQ(ierr);
        ierr = PetscSectionGetOffset(sectionGlobal, padj, &ngoff);CHKERRQ(ierr);
        for (nd = 0; nd < ndof-ncdof; ++nd, ++i) {
          cols[aoff+i] = ngoff < 0 ? -(ngoff+1)+nd : ngoff+nd;
        }
      }
      for (q = 0; q < anDof; q++, i++) {
        cols[aoff+i] = anchorAdj[anOff + q];
      }
      if (i != adof) SETERRQ4(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of entries %D != %D for dof %D (point %D)", i, adof, d, p);
    }
  }
  ierr = PetscSectionDestroy(&anchorSectionAdj);CHKERRQ(ierr);
  ierr = PetscSectionDestroy(&leafSectionAdj);CHKERRQ(ierr);
  ierr = PetscSectionDestroy(&rootSectionAdj);CHKERRQ(ierr);
  ierr = PetscFree(anchorAdj);CHKERRQ(ierr);
  ierr = PetscFree(rootAdj);CHKERRQ(ierr);
  ierr = PetscFree(tmpAdj);CHKERRQ(ierr);
  /* Debugging */
  if (debug) {
    IS tmp;
    ierr = PetscPrintf(comm, "Column indices\n");CHKERRQ(ierr);
    ierr = ISCreateGeneral(comm, numCols, cols, PETSC_USE_POINTER, &tmp);CHKERRQ(ierr);
    ierr = ISView(tmp, NULL);CHKERRQ(ierr);
    ierr = ISDestroy(&tmp);CHKERRQ(ierr);
  }
  /* Create allocation vectors from adjacency graph */
  ierr = MatGetLocalSize(A, &locRows, NULL);CHKERRQ(ierr);
  ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)A), &rLayout);CHKERRQ(ierr);
  ierr = PetscLayoutSetLocalSize(rLayout, locRows);CHKERRQ(ierr);
  ierr = PetscLayoutSetBlockSize(rLayout, 1);CHKERRQ(ierr);
  ierr = PetscLayoutSetUp(rLayout);CHKERRQ(ierr);
  ierr = PetscLayoutGetRange(rLayout, &rStart, &rEnd);CHKERRQ(ierr);
  ierr = PetscLayoutDestroy(&rLayout);CHKERRQ(ierr);
  /* Only loop over blocks of rows */
  if (rStart%bs || rEnd%bs) SETERRQ3(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid layout [%d, %d) for matrix, must be divisible by block size %d", rStart, rEnd, bs);
  for (r = rStart/bs; r < rEnd/bs; ++r) {
    const PetscInt row = r*bs;
    PetscInt       numCols, cStart, c;

    ierr = PetscSectionGetDof(sectionAdj, row, &numCols);CHKERRQ(ierr);
    ierr = PetscSectionGetOffset(sectionAdj, row, &cStart);CHKERRQ(ierr);
    for (c = cStart; c < cStart+numCols; ++c) {
      if ((cols[c] >= rStart*bs) && (cols[c] < rEnd*bs)) {
        ++dnz[r-rStart];
        if (cols[c] >= row) ++dnzu[r-rStart];
      } else {
        ++onz[r-rStart];
        if (cols[c] >= row) ++onzu[r-rStart];
      }
    }
  }
  if (bs > 1) {
    for (r = 0; r < locRows/bs; ++r) {
      dnz[r]  /= bs;
      onz[r]  /= bs;
      dnzu[r] /= bs;
      onzu[r] /= bs;
    }
  }
  /* Set matrix pattern */
  ierr = MatXAIJSetPreallocation(A, bs, dnz, onz, dnzu, onzu);CHKERRQ(ierr);
  ierr = MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
  /* Check for symmetric storage */
  ierr = MatGetType(A, &mtype);CHKERRQ(ierr);
  ierr = PetscStrcmp(mtype, MATSBAIJ, &isSymBlock);CHKERRQ(ierr);
  ierr = PetscStrcmp(mtype, MATSEQSBAIJ, &isSymSeqBlock);CHKERRQ(ierr);
  ierr = PetscStrcmp(mtype, MATMPISBAIJ, &isSymMPIBlock);CHKERRQ(ierr);
  if (isSymBlock || isSymSeqBlock || isSymMPIBlock) {ierr = MatSetOption(A, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);}
  /* Fill matrix with zeros */
  if (fillMatrix) {
    PetscScalar *values;
    PetscInt     maxRowLen = 0;

    for (r = rStart; r < rEnd; ++r) {
      PetscInt len;

      ierr      = PetscSectionGetDof(sectionAdj, r, &len);CHKERRQ(ierr);
      maxRowLen = PetscMax(maxRowLen, len);
    }
    ierr = PetscCalloc1(maxRowLen, &values);CHKERRQ(ierr);
    for (r = rStart; r < rEnd; ++r) {
      PetscInt numCols, cStart;

      ierr = PetscSectionGetDof(sectionAdj, r, &numCols);CHKERRQ(ierr);
      ierr = PetscSectionGetOffset(sectionAdj, r, &cStart);CHKERRQ(ierr);
      ierr = MatSetValues(A, 1, &r, numCols, &cols[cStart], values, INSERT_VALUES);CHKERRQ(ierr);
    }
    ierr = PetscFree(values);CHKERRQ(ierr);
    ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
    ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  }
  /* restore original useAnchors */
  ierr = DMPlexSetAdjacencyUseAnchors(dm,useAnchors);CHKERRQ(ierr);
  ierr = PetscSectionDestroy(&sectionAdj);CHKERRQ(ierr);
  ierr = PetscFree(cols);CHKERRQ(ierr);
  ierr = PetscLogEventEnd(DMPLEX_Preallocate,dm,0,0,0);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemplo n.º 5
0
Arquivo: ex44.c Projeto: 00liujj/petsc
  -m # : the size of the vectors\n					\
  -n # : the numer of indices (with n<=m)\n				\
  -toFirst # : the starting index of the output vector for strided scatters\n \
  -toStep # : the step size into the output vector for strided scatters\n \
  -fromFirst # : the starting index of the input vector for strided scatters\n\
  -fromStep # : the step size into the input vector for strided scatters\n\n";

int main(int argc, char * argv[]) {

  Vec            X,Y;
  PetscInt       m,n,i,n1,n2;
  PetscInt       toFirst,toStep,fromFirst,fromStep;
  PetscInt       *idx,*idy;
  PetscBool      flg;
  IS             toISStrided,fromISStrided,toISGeneral,fromISGeneral;
  VecScatter     vscatSStoSS,vscatSStoSG,vscatSGtoSS,vscatSGtoSG;
  ScatterMode    mode;
  InsertMode     addv;
  PetscErrorCode ierr;

  ierr = PetscInitialize(&argc,&argv,0,help);CHKERRQ(ierr);
  ierr = PetscOptionsGetInt(NULL,"-m",&m,&flg);CHKERRQ(ierr);
  if (!flg) m = 100;

  ierr = PetscOptionsGetInt(NULL,"-n",&n,&flg);CHKERRQ(ierr);
  if (!flg) n = 30;

  ierr = PetscOptionsGetInt(NULL,"-toFirst",&toFirst,&flg);CHKERRQ(ierr);
  if (!flg) toFirst = 3;

  ierr = PetscOptionsGetInt(NULL,"-toStep",&toStep,&flg);CHKERRQ(ierr);
  if (!flg) toStep = 3;

  ierr = PetscOptionsGetInt(NULL,"-fromFirst",&fromFirst,&flg);CHKERRQ(ierr);
  if (!flg) fromFirst = 2;

  ierr = PetscOptionsGetInt(NULL,"-fromStep",&fromStep,&flg);CHKERRQ(ierr);
  if (!flg) fromStep = 2;

  if (n>m) {
    PetscPrintf(PETSC_COMM_WORLD,"The vector sizes are %d. The number of elements being scattered is %d\n",m,n);CHKERRQ(ierr);
    PetscPrintf(PETSC_COMM_WORLD,"Adjust the parameters such that m>=n\n");CHKERRQ(ierr);
  } else if (toFirst+(n-1)*toStep >=m) {
    PetscPrintf(PETSC_COMM_WORLD,"The vector sizes are %d. The number of elements being scattered is %d\n",m,n);CHKERRQ(ierr);
    PetscPrintf(PETSC_COMM_WORLD,"For the Strided Scatter, toFirst=%d and toStep=%d.\n",toFirst,toStep);CHKERRQ(ierr);
    PetscPrintf(PETSC_COMM_WORLD,"This produces an index (toFirst+(n-1)*toStep)>=m\n");CHKERRQ(ierr);
    PetscPrintf(PETSC_COMM_WORLD,"Adjust the parameterrs accordingly with -m, -n, -toFirst, or -toStep\n");CHKERRQ(ierr);
  } else if (fromFirst+(n-1)*fromStep>=m) {
    PetscPrintf(PETSC_COMM_WORLD,"The vector sizes are %d. The number of elements being scattered is %d\n",m,n);CHKERRQ(ierr);
    PetscPrintf(PETSC_COMM_WORLD,"For the Strided Scatter, fromFirst=%d and fromStep=%d.\n",fromFirst,toStep);CHKERRQ(ierr);
    PetscPrintf(PETSC_COMM_WORLD,"This produces an index (fromFirst+(n-1)*fromStep)>=m\n");CHKERRQ(ierr);
    PetscPrintf(PETSC_COMM_WORLD,"Adjust the parameterrs accordingly with -m, -n, -fromFirst, or -fromStep\n");CHKERRQ(ierr);
  } else {
    PetscPrintf(PETSC_COMM_WORLD,"m=%d\tn=%d\tfromFirst=%d\tfromStep=%d\ttoFirst=%d\ttoStep=%d\n",m,n,fromFirst,fromStep,toFirst,toStep);CHKERRQ(ierr);
    PetscPrintf(PETSC_COMM_WORLD,"fromFirst+(n-1)*fromStep=%d\ttoFirst+(n-1)*toStep=%d\n",fromFirst+(n-1)*fromStep,toFirst+(n-1)*toStep);CHKERRQ(ierr);

    /* Build the vectors */
    ierr = VecCreate(PETSC_COMM_WORLD,&Y);CHKERRQ(ierr);
    ierr = VecSetSizes(Y,m,PETSC_DECIDE);CHKERRQ(ierr);
    ierr = VecCreate(PETSC_COMM_WORLD,&X);CHKERRQ(ierr);
    ierr = VecSetSizes(X,m,PETSC_DECIDE);CHKERRQ(ierr);

    ierr = VecSetFromOptions(Y);CHKERRQ(ierr);
    ierr = VecSetFromOptions(X);CHKERRQ(ierr);
    ierr = VecSet(X,2.0);CHKERRQ(ierr);
    ierr = VecSet(Y,1.0);CHKERRQ(ierr);
    
    /* Build the strided index sets */
    ierr = ISCreate(PETSC_COMM_WORLD,&toISStrided);CHKERRQ(ierr);
    ierr = ISCreate(PETSC_COMM_WORLD,&fromISStrided);CHKERRQ(ierr);
    ierr = ISSetType(toISStrided, ISSTRIDE);CHKERRQ(ierr);
    ierr = ISSetType(fromISStrided, ISSTRIDE);CHKERRQ(ierr);
    ierr = ISStrideSetStride(fromISStrided,n,fromFirst,fromStep);CHKERRQ(ierr);
    ierr = ISStrideSetStride(toISStrided,n,toFirst,toStep);CHKERRQ(ierr);
    
    /* Build the general index sets */
    ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr);
    ierr = PetscMalloc1(n,&idy);CHKERRQ(ierr);
    for (i=0; i<n; i++) {
      idx[i] = i % m;
      idy[i] = (i+m) % m;
    }
    n1 = n;
    n2 = n;
    ierr = PetscSortRemoveDupsInt(&n1,idx);CHKERRQ(ierr);
    ierr = PetscSortRemoveDupsInt(&n2,idy);CHKERRQ(ierr);

    ierr = ISCreateGeneral(PETSC_COMM_WORLD,n1,idx,PETSC_COPY_VALUES,&toISGeneral);CHKERRQ(ierr);
    ierr = ISCreateGeneral(PETSC_COMM_WORLD,n2,idy,PETSC_COPY_VALUES,&fromISGeneral);CHKERRQ(ierr);
    
    /* set the mode and the insert/add parameter */
    mode = SCATTER_FORWARD;
    addv = ADD_VALUES;
    
    /* VecScatter : Seq Strided to Seq Strided */
    ierr = VecScatterCreate(X,fromISStrided,Y,toISStrided,&vscatSStoSS);CHKERRQ(ierr);
    ierr = VecScatterBegin(vscatSStoSS,X,Y,addv,mode);CHKERRQ(ierr);
    ierr = VecScatterEnd(vscatSStoSS,X,Y,addv,mode);CHKERRQ(ierr);
    ierr = VecScatterDestroy(&vscatSStoSS);CHKERRQ(ierr);
    
    /* VecScatter : Seq General to Seq Strided */
    ierr = VecScatterCreate(Y,fromISGeneral,X,toISStrided,&vscatSGtoSS);CHKERRQ(ierr);
    ierr = VecScatterBegin(vscatSGtoSS,Y,X,addv,mode);CHKERRQ(ierr);
    ierr = VecScatterEnd(vscatSGtoSS,Y,X,addv,mode);CHKERRQ(ierr);
    ierr = VecScatterDestroy(&vscatSGtoSS);CHKERRQ(ierr);
    
    /* VecScatter : Seq General to Seq General */
    ierr = VecScatterCreate(X,fromISGeneral,Y,toISGeneral,&vscatSGtoSG);CHKERRQ(ierr);
    ierr = VecScatterBegin(vscatSGtoSG,X,Y,addv,mode);CHKERRQ(ierr);
    ierr = VecScatterEnd(vscatSGtoSG,X,Y,addv,mode);CHKERRQ(ierr);
    ierr = VecScatterDestroy(&vscatSGtoSG);CHKERRQ(ierr);
    
    /* VecScatter : Seq Strided to Seq General */
    ierr = VecScatterCreate(Y,fromISStrided,X,toISGeneral,&vscatSStoSG);CHKERRQ(ierr);
    ierr = VecScatterBegin(vscatSStoSG,Y,X,addv,mode);CHKERRQ(ierr);
    ierr = VecScatterEnd(vscatSStoSG,Y,X,addv,mode);CHKERRQ(ierr);
    ierr = VecScatterDestroy(&vscatSStoSG);CHKERRQ(ierr);
    
    /* view the results */
    ierr = VecView(Y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
    
    /* Cleanup */
    ierr = VecDestroy(&X);CHKERRQ(ierr);
    ierr = VecDestroy(&Y);CHKERRQ(ierr);
    ierr = ISDestroy(&toISStrided);CHKERRQ(ierr);
    ierr = ISDestroy(&fromISStrided);CHKERRQ(ierr);
    ierr = ISDestroy(&toISGeneral);CHKERRQ(ierr);
    ierr = ISDestroy(&fromISGeneral);CHKERRQ(ierr);
    ierr = PetscFree(idx);CHKERRQ(ierr);
    ierr = PetscFree(idy);CHKERRQ(ierr);
  }
  PetscFinalize();

  return 0;
}
Exemplo n.º 6
0
/*@C
   PetscSFSetGraph - Set a parallel star forest

   Collective

   Input Arguments:
+  sf - star forest
.  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
.  nleaves - number of leaf vertices on the current process, each of these references a root on any process
.  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage
.  localmode - copy mode for ilocal
.  iremote - remote locations of root vertices for each leaf on the current process
-  remotemode - copy mode for iremote

   Level: intermediate

.seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
@*/
PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
{
  PetscErrorCode     ierr;
  PetscTable         table;
  PetscTablePosition pos;
  PetscMPIInt        size;
  PetscInt           i,*rcount,*ranks;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
  ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
  if (nleaves && ilocal) PetscValidIntPointer(ilocal,4);
  if (nleaves) PetscValidPointer(iremote,6);
  if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"roots %D, cannot be negative",nroots);
  if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
  ierr        = PetscSFReset(sf);CHKERRQ(ierr);
  sf->nroots  = nroots;
  sf->nleaves = nleaves;
  if (ilocal) {
    switch (localmode) {
    case PETSC_COPY_VALUES:
      ierr        = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
      sf->mine    = sf->mine_alloc;
      ierr        = PetscMemcpy(sf->mine,ilocal,nleaves*sizeof(*sf->mine));CHKERRQ(ierr);
      sf->minleaf = PETSC_MAX_INT;
      sf->maxleaf = PETSC_MIN_INT;
      for (i=0; i<nleaves; i++) {
        sf->minleaf = PetscMin(sf->minleaf,ilocal[i]);
        sf->maxleaf = PetscMax(sf->maxleaf,ilocal[i]);
      }
      break;
    case PETSC_OWN_POINTER:
      sf->mine_alloc = (PetscInt*)ilocal;
      sf->mine       = sf->mine_alloc;
      break;
    case PETSC_USE_POINTER:
      sf->mine = (PetscInt*)ilocal;
      break;
    default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
    }
  }
  if (!ilocal || nleaves > 0) {
    sf->minleaf = 0;
    sf->maxleaf = nleaves - 1;
  }
  switch (remotemode) {
  case PETSC_COPY_VALUES:
    ierr       = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
    sf->remote = sf->remote_alloc;
    ierr       = PetscMemcpy(sf->remote,iremote,nleaves*sizeof(*sf->remote));CHKERRQ(ierr);
    break;
  case PETSC_OWN_POINTER:
    sf->remote_alloc = (PetscSFNode*)iremote;
    sf->remote       = sf->remote_alloc;
    break;
  case PETSC_USE_POINTER:
    sf->remote = (PetscSFNode*)iremote;
    break;
  default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
  }

  ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
  ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
  for (i=0; i<nleaves; i++) {
    /* Log 1-based rank */
    ierr = PetscTableAdd(table,iremote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
  }
  ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
  ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,nleaves,&sf->rmine,nleaves,&sf->rremote);CHKERRQ(ierr);
  ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
  ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
  for (i=0; i<sf->nranks; i++) {
    ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
    ranks[i]--;             /* Convert back to 0-based */
  }
  ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
  ierr = PetscSortIntWithArray(sf->nranks,ranks,rcount);CHKERRQ(ierr);
  sf->roffset[0] = 0;
  for (i=0; i<sf->nranks; i++) {
    ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
    sf->roffset[i+1] = sf->roffset[i] + rcount[i];
    rcount[i]        = 0;
  }
  for (i=0; i<nleaves; i++) {
    PetscInt lo,hi,irank;
    /* Search for index of iremote[i].rank in sf->ranks */
    lo = 0; hi = sf->nranks;
    while (hi - lo > 1) {
      PetscInt mid = lo + (hi - lo)/2;
      if (iremote[i].rank < sf->ranks[mid]) hi = mid;
      else                                  lo = mid;
    }
    if (hi - lo == 1 && iremote[i].rank == sf->ranks[lo]) irank = lo;
    else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",iremote[i].rank);
    sf->rmine[sf->roffset[irank] + rcount[irank]]   = ilocal ? ilocal[i] : i;
    sf->rremote[sf->roffset[irank] + rcount[irank]] = iremote[i].index;
    rcount[irank]++;
  }
  ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
#if !defined(PETSC_USE_64BIT_INDICES)
  if (nroots == PETSC_DETERMINE) {
    /* Jed, if you have a better way to do this, put it in */
    PetscInt *numRankLeaves, *leafOff, *leafIndices, *numRankRoots, *rootOff, *rootIndices, maxRoots = 0;

    /* All to all to determine number of leaf indices from each (you can do this using Scan and asynch messages) */
    ierr = PetscMalloc4(size,&numRankLeaves,size+1,&leafOff,size,&numRankRoots,size+1,&rootOff);CHKERRQ(ierr);
    ierr = PetscMemzero(numRankLeaves, size * sizeof(PetscInt));CHKERRQ(ierr);
    for (i = 0; i < nleaves; ++i) ++numRankLeaves[iremote[i].rank];
    ierr = MPI_Alltoall(numRankLeaves, 1, MPIU_INT, numRankRoots, 1, MPIU_INT, PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
    /* Could set nroots to this maximum */
    for (i = 0; i < size; ++i) maxRoots += numRankRoots[i];

    /* Gather all indices */
    ierr = PetscMalloc2(nleaves,&leafIndices,maxRoots,&rootIndices);CHKERRQ(ierr);
    leafOff[0] = 0;
    for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i];
    for (i = 0; i < nleaves; ++i) leafIndices[leafOff[iremote[i].rank]++] = iremote[i].index;
    leafOff[0] = 0;
    for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i];
    rootOff[0] = 0;
    for (i = 0; i < size; ++i) rootOff[i+1] = rootOff[i] + numRankRoots[i];
    ierr = MPI_Alltoallv(leafIndices, numRankLeaves, leafOff, MPIU_INT, rootIndices, numRankRoots, rootOff, MPIU_INT, PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
    /* Sort and reduce */
    ierr       = PetscSortRemoveDupsInt(&maxRoots, rootIndices);CHKERRQ(ierr);
    ierr       = PetscFree2(leafIndices,rootIndices);CHKERRQ(ierr);
    ierr       = PetscFree4(numRankLeaves,leafOff,numRankRoots,rootOff);CHKERRQ(ierr);
    sf->nroots = maxRoots;
  }
#endif

  sf->graphset = PETSC_TRUE;
  ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemplo n.º 7
0
static PetscErrorCode MatGetSubMatrices_MPIAdj_Private(Mat mat,PetscInt n,const IS irow[],const IS icol[],PetscBool subcomm,MatReuse scall,Mat *submat[])
{
  PetscInt           i,irow_n,icol_n,*sxadj,*sadjncy,*svalues;
  PetscInt          *indices,nindx,j,k,loc;
  PetscMPIInt        issame;
  const PetscInt    *irow_indices,*icol_indices;
  MPI_Comm           scomm_row,scomm_col,scomm_mat;
  PetscErrorCode     ierr;

  PetscFunctionBegin;
  nindx = 0;
  /*
   * Estimate a maximum number for allocating memory
   */
  for(i=0; i<n; i++){
    ierr = ISGetLocalSize(irow[i],&irow_n);CHKERRQ(ierr);
    ierr = ISGetLocalSize(icol[i],&icol_n);CHKERRQ(ierr);
    nindx = nindx>(irow_n+icol_n)? nindx:(irow_n+icol_n);
  }
  ierr = PetscCalloc1(nindx,&indices);CHKERRQ(ierr);
  /* construct a submat */
  for(i=0; i<n; i++){
	/*comms */
    if(subcomm){
	  ierr = PetscObjectGetComm((PetscObject)irow[i],&scomm_row);CHKERRQ(ierr);
	  ierr = PetscObjectGetComm((PetscObject)icol[i],&scomm_col);CHKERRQ(ierr);
	  ierr = MPI_Comm_compare(scomm_row,scomm_col,&issame);CHKERRQ(ierr);
	  if(issame != MPI_IDENT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"row index set must have the same comm as the col index set\n");
	  ierr = MPI_Comm_compare(scomm_row,PETSC_COMM_SELF,&issame);CHKERRQ(ierr);
	  if(issame == MPI_IDENT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP," can not use PETSC_COMM_SELF as comm when extracting a parallel submatrix\n");
	}else{
	  scomm_row = PETSC_COMM_SELF;
	}
	/*get sub-matrix data*/
	sxadj=0; sadjncy=0; svalues=0;
    ierr = MatGetSubMatrix_MPIAdj_data(mat,irow[i],icol[i],&sxadj,&sadjncy,&svalues);CHKERRQ(ierr);
    ierr = ISGetLocalSize(irow[i],&irow_n);CHKERRQ(ierr);
    ierr = ISGetLocalSize(icol[i],&icol_n);CHKERRQ(ierr);
    ierr = ISGetIndices(irow[i],&irow_indices);CHKERRQ(ierr);
    ierr = PetscMemcpy(indices,irow_indices,sizeof(PetscInt)*irow_n);CHKERRQ(ierr);
    ierr = ISRestoreIndices(irow[i],&irow_indices);CHKERRQ(ierr);
    ierr = ISGetIndices(icol[i],&icol_indices);CHKERRQ(ierr);
    ierr = PetscMemcpy(indices+irow_n,icol_indices,sizeof(PetscInt)*icol_n);CHKERRQ(ierr);
    ierr = ISRestoreIndices(icol[i],&icol_indices);CHKERRQ(ierr);
    nindx = irow_n+icol_n;
    ierr = PetscSortRemoveDupsInt(&nindx,indices);CHKERRQ(ierr);
    /* renumber columns */
    for(j=0; j<irow_n; j++){
      for(k=sxadj[j]; k<sxadj[j+1]; k++){
    	ierr = PetscFindInt(sadjncy[k],nindx,indices,&loc);CHKERRQ(ierr);
#if PETSC_USE_DEBUG
    	if(loc<0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"can not find col %d \n",sadjncy[k]);
#endif
        sadjncy[k] = loc;
      }
    }
    if(scall==MAT_INITIAL_MATRIX){
      ierr = MatCreateMPIAdj(scomm_row,irow_n,icol_n,sxadj,sadjncy,svalues,submat[i]);CHKERRQ(ierr);
    }else{
       Mat                sadj = *(submat[i]);
       Mat_MPIAdj         *sa  = (Mat_MPIAdj*)((sadj)->data);
       ierr = PetscObjectGetComm((PetscObject)sadj,&scomm_mat);CHKERRQ(ierr);
       ierr = MPI_Comm_compare(scomm_row,scomm_mat,&issame);CHKERRQ(ierr);
       if(issame != MPI_IDENT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"submatrix  must have the same comm as the col index set\n");
       ierr = PetscMemcpy(sa->i,sxadj,sizeof(PetscInt)*(irow_n+1));CHKERRQ(ierr);
       ierr = PetscMemcpy(sa->j,sadjncy,sizeof(PetscInt)*sxadj[irow_n]);CHKERRQ(ierr);
       if(svalues){ierr = PetscMemcpy(sa->values,svalues,sizeof(PetscInt)*sxadj[irow_n]);CHKERRQ(ierr);}
       ierr = PetscFree(sxadj);CHKERRQ(ierr);
       ierr = PetscFree(sadjncy);CHKERRQ(ierr);
       if(svalues) {ierr = PetscFree(svalues);CHKERRQ(ierr);}
    }
  }
  ierr = PetscFree(indices);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemplo n.º 8
0
PetscErrorCode PCBDDCGraphSetUp(PCBDDCGraph graph, PetscInt custom_minimal_size, IS neumann_is, IS dirichlet_is, PetscInt n_ISForDofs, IS ISForDofs[], IS custom_primal_vertices)
{
  IS             subset,subset_n;
  MPI_Comm       comm;
  const PetscInt *is_indices;
  PetscInt       n_neigh,*neigh,*n_shared,**shared,*queue_global;
  PetscInt       i,j,k,s,total_counts,nodes_touched,is_size;
  PetscMPIInt    commsize;
  PetscBool      same_set,mirrors_found;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  PetscValidLogicalCollectiveInt(graph->l2gmap,custom_minimal_size,2);
  if (neumann_is) {
    PetscValidHeaderSpecific(neumann_is,IS_CLASSID,3);
    PetscCheckSameComm(graph->l2gmap,1,neumann_is,3);
  }
  graph->has_dirichlet = PETSC_FALSE;
  if (dirichlet_is) {
    PetscValidHeaderSpecific(dirichlet_is,IS_CLASSID,4);
    PetscCheckSameComm(graph->l2gmap,1,dirichlet_is,4);
    graph->has_dirichlet = PETSC_TRUE;
  }
  PetscValidLogicalCollectiveInt(graph->l2gmap,n_ISForDofs,5);
  for (i=0;i<n_ISForDofs;i++) {
    PetscValidHeaderSpecific(ISForDofs[i],IS_CLASSID,6);
    PetscCheckSameComm(graph->l2gmap,1,ISForDofs[i],6);
  }
  if (custom_primal_vertices) {
    PetscValidHeaderSpecific(custom_primal_vertices,IS_CLASSID,6);
    PetscCheckSameComm(graph->l2gmap,1,custom_primal_vertices,7);
  }
  ierr = PetscObjectGetComm((PetscObject)(graph->l2gmap),&comm);CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);

  /* custom_minimal_size */
  graph->custom_minimal_size = custom_minimal_size;
  /* get info l2gmap and allocate work vectors  */
  ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr);
  /* check if we have any local periodic nodes (periodic BCs) */
  mirrors_found = PETSC_FALSE;
  if (graph->nvtxs && n_neigh) {
    for (i=0; i<n_shared[0]; i++) graph->count[shared[0][i]] += 1;
    for (i=0; i<n_shared[0]; i++) {
      if (graph->count[shared[0][i]] > 1) {
        mirrors_found = PETSC_TRUE;
        break;
      }
    }
  }
  /* compute local mirrors (if any) */
  if (mirrors_found) {
    IS       to,from;
    PetscInt *local_indices,*global_indices;

    ierr = ISCreateStride(PETSC_COMM_SELF,graph->nvtxs,0,1,&to);CHKERRQ(ierr);
    ierr = ISLocalToGlobalMappingApplyIS(graph->l2gmap,to,&from);CHKERRQ(ierr);
    /* get arrays of local and global indices */
    ierr = PetscMalloc1(graph->nvtxs,&local_indices);CHKERRQ(ierr);
    ierr = ISGetIndices(to,(const PetscInt**)&is_indices);CHKERRQ(ierr);
    ierr = PetscMemcpy(local_indices,is_indices,graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr);
    ierr = ISRestoreIndices(to,(const PetscInt**)&is_indices);CHKERRQ(ierr);
    ierr = PetscMalloc1(graph->nvtxs,&global_indices);CHKERRQ(ierr);
    ierr = ISGetIndices(from,(const PetscInt**)&is_indices);CHKERRQ(ierr);
    ierr = PetscMemcpy(global_indices,is_indices,graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr);
    ierr = ISRestoreIndices(from,(const PetscInt**)&is_indices);CHKERRQ(ierr);
    /* allocate space for mirrors */
    ierr = PetscMalloc2(graph->nvtxs,&graph->mirrors,graph->nvtxs,&graph->mirrors_set);CHKERRQ(ierr);
    ierr = PetscMemzero(graph->mirrors,graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr);
    graph->mirrors_set[0] = 0;

    k=0;
    for (i=0;i<n_shared[0];i++) {
      j=shared[0][i];
      if (graph->count[j] > 1) {
        graph->mirrors[j]++;
        k++;
      }
    }
    /* allocate space for set of mirrors */
    ierr = PetscMalloc1(k,&graph->mirrors_set[0]);CHKERRQ(ierr);
    for (i=1;i<graph->nvtxs;i++)
      graph->mirrors_set[i]=graph->mirrors_set[i-1]+graph->mirrors[i-1];

    /* fill arrays */
    ierr = PetscMemzero(graph->mirrors,graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr);
    for (j=0;j<n_shared[0];j++) {
      i=shared[0][j];
      if (graph->count[i] > 1)
        graph->mirrors_set[i][graph->mirrors[i]++]=global_indices[i];
    }
    ierr = PetscSortIntWithArray(graph->nvtxs,global_indices,local_indices);CHKERRQ(ierr);
    for (i=0;i<graph->nvtxs;i++) {
      if (graph->mirrors[i] > 0) {
        ierr = PetscFindInt(graph->mirrors_set[i][0],graph->nvtxs,global_indices,&k);CHKERRQ(ierr);
        j = global_indices[k];
        while ( k > 0 && global_indices[k-1] == j) k--;
        for (j=0;j<graph->mirrors[i];j++) {
          graph->mirrors_set[i][j]=local_indices[k+j];
        }
        ierr = PetscSortInt(graph->mirrors[i],graph->mirrors_set[i]);CHKERRQ(ierr);
      }
    }
    ierr = PetscFree(local_indices);CHKERRQ(ierr);
    ierr = PetscFree(global_indices);CHKERRQ(ierr);
    ierr = ISDestroy(&to);CHKERRQ(ierr);
    ierr = ISDestroy(&from);CHKERRQ(ierr);
  }
  ierr = PetscMemzero(graph->count,graph->nvtxs*sizeof(*graph->count));CHKERRQ(ierr);

  /* Count total number of neigh per node */
  k = 0;
  for (i=1;i<n_neigh;i++) {
    k += n_shared[i];
    for (j=0;j<n_shared[i];j++) {
      graph->count[shared[i][j]] += 1;
    }
  }
  /* Allocate space for storing the set of neighbours for each node */
  if (graph->nvtxs) {
    ierr = PetscMalloc1(k,&graph->neighbours_set[0]);CHKERRQ(ierr);
  }
  for (i=1;i<graph->nvtxs;i++) { /* dont count myself */
    graph->neighbours_set[i]=graph->neighbours_set[i-1]+graph->count[i-1];
  }
  /* Get information for sharing subdomains */
  ierr = PetscMemzero(graph->count,graph->nvtxs*sizeof(*graph->count));CHKERRQ(ierr);
  for (i=1;i<n_neigh;i++) { /* dont count myself */
    s = n_shared[i];
    for (j=0;j<s;j++) {
      k = shared[i][j];
      graph->neighbours_set[k][graph->count[k]] = neigh[i];
      graph->count[k] += 1;
    }
  }
  /* sort set of sharing subdomains */
  for (i=0;i<graph->nvtxs;i++) {
    ierr = PetscSortRemoveDupsInt(&graph->count[i],graph->neighbours_set[i]);CHKERRQ(ierr);
  }
  /* free memory allocated by ISLocalToGlobalMappingGetInfo */
  ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr);

  /*
     Get info for dofs splitting
     User can specify just a subset; an additional field is considered as a complementary field
  */
  for (i=0;i<graph->nvtxs;i++) graph->which_dof[i] = n_ISForDofs; /* by default a dof belongs to the complement set */
  for (i=0;i<n_ISForDofs;i++) {
    ierr = ISGetLocalSize(ISForDofs[i],&is_size);CHKERRQ(ierr);
    ierr = ISGetIndices(ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr);
    for (j=0;j<is_size;j++) {
      if (is_indices[j] > -1 && is_indices[j] < graph->nvtxs) { /* out of bounds indices (if any) are skipped */
        graph->which_dof[is_indices[j]] = i;
      }
    }
    ierr = ISRestoreIndices(ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr);
  }

  /* Take into account Neumann nodes */
  if (neumann_is) {
    ierr = ISGetLocalSize(neumann_is,&is_size);CHKERRQ(ierr);
    ierr = ISGetIndices(neumann_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
    for (i=0;i<is_size;i++) {
      if (is_indices[i] > -1 && is_indices[i] < graph->nvtxs) { /* out of bounds indices (if any) are skipped */
        graph->special_dof[is_indices[i]] = PCBDDCGRAPH_NEUMANN_MARK;
      }
    }
    ierr = ISRestoreIndices(neumann_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
  }
  /* Take into account Dirichlet nodes (they overwrite any neumann boundary mark previously set) */
  if (dirichlet_is) {
    ierr = ISGetLocalSize(dirichlet_is,&is_size);CHKERRQ(ierr);
    ierr = ISGetIndices(dirichlet_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
    for (i=0;i<is_size;i++){
      if (is_indices[i] > -1 && is_indices[i] < graph->nvtxs) { /* out of bounds indices (if any) are skipped */
        if (commsize > graph->commsizelimit) { /* dirichlet nodes treated as internal */
          ierr = PetscBTSet(graph->touched,is_indices[i]);CHKERRQ(ierr);
          graph->subset[is_indices[i]] = 0;
        }
        graph->special_dof[is_indices[i]] = PCBDDCGRAPH_DIRICHLET_MARK;
      }
    }
    ierr = ISRestoreIndices(dirichlet_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
  }
  /* mark local periodic nodes (if any) and adapt CSR graph (if any) */
  if (graph->mirrors) {
    for (i=0;i<graph->nvtxs;i++)
      if (graph->mirrors[i])
        graph->special_dof[i] = PCBDDCGRAPH_LOCAL_PERIODIC_MARK;

    if (graph->xadj) {
      PetscInt *new_xadj,*new_adjncy;
      /* sort CSR graph */
      for (i=0;i<graph->nvtxs;i++)
        ierr = PetscSortInt(graph->xadj[i+1]-graph->xadj[i],&graph->adjncy[graph->xadj[i]]);CHKERRQ(ierr);

      /* adapt local CSR graph in case of local periodicity */
      k = 0;
      for (i=0;i<graph->nvtxs;i++)
        for (j=graph->xadj[i];j<graph->xadj[i+1];j++)
          k += graph->mirrors[graph->adjncy[j]];

      ierr = PetscMalloc1(graph->nvtxs+1,&new_xadj);CHKERRQ(ierr);
      ierr = PetscMalloc1(k+graph->xadj[graph->nvtxs],&new_adjncy);CHKERRQ(ierr);
      new_xadj[0] = 0;
      for (i=0;i<graph->nvtxs;i++) {
        k = graph->xadj[i+1]-graph->xadj[i];
        ierr = PetscMemcpy(&new_adjncy[new_xadj[i]],&graph->adjncy[graph->xadj[i]],k*sizeof(PetscInt));CHKERRQ(ierr);
        new_xadj[i+1] = new_xadj[i]+k;
        for (j=graph->xadj[i];j<graph->xadj[i+1];j++) {
          k = graph->mirrors[graph->adjncy[j]];
          ierr = PetscMemcpy(&new_adjncy[new_xadj[i+1]],graph->mirrors_set[graph->adjncy[j]],k*sizeof(PetscInt));CHKERRQ(ierr);
          new_xadj[i+1] += k;
        }
        k = new_xadj[i+1]-new_xadj[i];
        ierr = PetscSortRemoveDupsInt(&k,&new_adjncy[new_xadj[i]]);CHKERRQ(ierr);
        new_xadj[i+1] = new_xadj[i]+k;
      }
      /* set new CSR into graph */
      ierr = PetscFree(graph->xadj);CHKERRQ(ierr);
      ierr = PetscFree(graph->adjncy);CHKERRQ(ierr);
      graph->xadj = new_xadj;
      graph->adjncy = new_adjncy;
    }
  }

  /* mark special nodes (if any) -> each will become a single node equivalence class */
  if (custom_primal_vertices) {
    ierr = ISGetLocalSize(custom_primal_vertices,&is_size);CHKERRQ(ierr);
    ierr = ISGetIndices(custom_primal_vertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
    for (i=0,j=0;i<is_size;i++){
      if (is_indices[i] > -1 && is_indices[i] < graph->nvtxs  && graph->special_dof[is_indices[i]] != PCBDDCGRAPH_DIRICHLET_MARK) { /* out of bounds indices (if any) are skipped */
        graph->special_dof[is_indices[i]] = PCBDDCGRAPH_SPECIAL_MARK-j;
        j++;
      }
    }
    ierr = ISRestoreIndices(custom_primal_vertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
  }

  /* mark interior nodes (if commsize > graph->commsizelimit) as touched and belonging to partition number 0 */
  if (commsize > graph->commsizelimit) {
    for (i=0;i<graph->nvtxs;i++) {
      if (!graph->count[i]) {
        ierr = PetscBTSet(graph->touched,i);CHKERRQ(ierr);
        graph->subset[i] = 0;
      }
    }
  }

  /* init graph structure and compute default subsets */
  nodes_touched = 0;
  for (i=0;i<graph->nvtxs;i++) {
    if (PetscBTLookup(graph->touched,i)) {
      nodes_touched++;
    }
  }
  i = 0;
  graph->ncc = 0;
  total_counts = 0;

  /* allocated space for queues */
  if (commsize == graph->commsizelimit) {
    ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,graph->nvtxs,&graph->queue);CHKERRQ(ierr);
  } else {
    PetscInt nused = graph->nvtxs - nodes_touched;
    ierr = PetscMalloc2(nused+1,&graph->cptr,nused,&graph->queue);CHKERRQ(ierr);
  }

  while (nodes_touched<graph->nvtxs) {
    /*  find first untouched node in local ordering */
    while (PetscBTLookup(graph->touched,i)) i++;
    ierr = PetscBTSet(graph->touched,i);CHKERRQ(ierr);
    graph->subset[i] = graph->ncc+1;
    graph->cptr[graph->ncc] = total_counts;
    graph->queue[total_counts] = i;
    total_counts++;
    nodes_touched++;
    /* now find all other nodes having the same set of sharing subdomains */
    for (j=i+1;j<graph->nvtxs;j++) {
      /* check for same number of sharing subdomains, dof number and same special mark */
      if (!PetscBTLookup(graph->touched,j) && graph->count[i] == graph->count[j] && graph->which_dof[i] == graph->which_dof[j] && graph->special_dof[i] == graph->special_dof[j]) {
        /* check for same set of sharing subdomains */
        same_set = PETSC_TRUE;
        for (k=0;k<graph->count[j];k++){
          if (graph->neighbours_set[i][k] != graph->neighbours_set[j][k]) {
            same_set = PETSC_FALSE;
          }
        }
        /* I found a friend of mine */
        if (same_set) {
          ierr = PetscBTSet(graph->touched,j);CHKERRQ(ierr);
          graph->subset[j] = graph->ncc+1;
          nodes_touched++;
          graph->queue[total_counts] = j;
          total_counts++;
        }
      }
    }
    graph->ncc++;
  }
  /* set default number of subsets (at this point no info on csr and/or local_subs has been taken into account, so n_subsets = ncc */
  graph->n_subsets = graph->ncc;
  ierr = PetscMalloc1(graph->n_subsets,&graph->subset_ncc);CHKERRQ(ierr);
  for (i=0;i<graph->n_subsets;i++) {
    graph->subset_ncc[i] = 1;
  }
  /* final pointer */
  graph->cptr[graph->ncc] = total_counts;

  /* For consistency reasons (among neighbours), I need to sort (by global ordering) each connected component */
  /* Get a reference node (min index in global ordering) for each subset for tagging messages */
  ierr = PetscMalloc1(graph->ncc,&graph->subset_ref_node);CHKERRQ(ierr);
  ierr = PetscMalloc1(graph->cptr[graph->ncc],&queue_global);CHKERRQ(ierr);
  ierr = ISLocalToGlobalMappingApply(graph->l2gmap,graph->cptr[graph->ncc],graph->queue,queue_global);CHKERRQ(ierr);
  for (j=0;j<graph->ncc;j++) {
    ierr = PetscSortIntWithArray(graph->cptr[j+1]-graph->cptr[j],&queue_global[graph->cptr[j]],&graph->queue[graph->cptr[j]]);CHKERRQ(ierr);
    graph->subset_ref_node[j] = graph->queue[graph->cptr[j]];
  }
  ierr = PetscFree(queue_global);CHKERRQ(ierr);
  graph->queue_sorted = PETSC_TRUE;

  /* save information on subsets (needed when analyzing the connected components) */
  if (graph->ncc) {
    ierr = PetscMalloc2(graph->ncc,&graph->subset_size,graph->ncc,&graph->subset_idxs);CHKERRQ(ierr);
    ierr = PetscMalloc1(graph->cptr[graph->ncc],&graph->subset_idxs[0]);CHKERRQ(ierr);
    ierr = PetscMemzero(graph->subset_idxs[0],graph->cptr[graph->ncc]*sizeof(PetscInt));CHKERRQ(ierr);
    for (j=1;j<graph->ncc;j++) {
      graph->subset_size[j-1] = graph->cptr[j] - graph->cptr[j-1];
      graph->subset_idxs[j] = graph->subset_idxs[j-1] + graph->subset_size[j-1];
    }
    graph->subset_size[graph->ncc-1] = graph->cptr[graph->ncc] - graph->cptr[graph->ncc-1];
    ierr = PetscMemcpy(graph->subset_idxs[0],graph->queue,graph->cptr[graph->ncc]*sizeof(PetscInt));CHKERRQ(ierr);
  }

  /* renumber reference nodes */
  ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(graph->l2gmap)),graph->ncc,graph->subset_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
  ierr = ISLocalToGlobalMappingApplyIS(graph->l2gmap,subset_n,&subset);CHKERRQ(ierr);
  ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
  ierr = ISRenumber(subset,NULL,NULL,&subset_n);CHKERRQ(ierr);
  ierr = ISDestroy(&subset);CHKERRQ(ierr);
  ierr = ISGetLocalSize(subset_n,&k);CHKERRQ(ierr);
  if (k != graph->ncc) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %D != %D",k,graph->ncc);
  ierr = ISGetIndices(subset_n,&is_indices);CHKERRQ(ierr);
  ierr = PetscMemcpy(graph->subset_ref_node,is_indices,graph->ncc*sizeof(PetscInt));CHKERRQ(ierr);
  ierr = ISRestoreIndices(subset_n,&is_indices);CHKERRQ(ierr);
  ierr = ISDestroy(&subset_n);CHKERRQ(ierr);

  /* free workspace */
  graph->setupcalled = PETSC_TRUE;
  PetscFunctionReturn(0);
}
Exemplo n.º 9
0
/*@
   VecScatterInitializeForGPU - Initializes a generalized scatter from one vector to
 another for GPU based computation.  Effectively, this function creates all the
 necessary indexing buffers and work vectors needed to move data only those data points
 in a vector which need to be communicated across ranks. This is done at the first time
 this function is called. Thereafter, this function launches a kernel,
 VecCUSPCopySomeToContiguousBufferGPU_Public, which moves the scattered data into a
 contiguous buffer on the GPU. Currently, this only used in the context of the parallel
 SpMV call in MatMult_MPIAIJCUSP (in mpi/mpicusp/mpiaijcusp.cu) or MatMult_MPIAIJCUSPARSE
 (in mpi/mpicusparse/mpiaijcusparse.cu). This function is executed before the call to
 MatMult. This enables the memory transfers to be overlapped with the MatMult SpMV kernel
 call.

   Input Parameters:
+  inctx - scatter context generated by VecScatterCreate()
.  x - the vector from which we scatter
-  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
    SCATTER_FORWARD or SCATTER_REVERSE

  Level: intermediate

.seealso: VecScatterCreate(), VecScatterEnd()
@*/
PetscErrorCode  VecScatterInitializeForGPU(VecScatter inctx,Vec x,ScatterMode mode)
{
  VecScatter_MPI_General *to,*from;
  PetscScalar            *xv,*svalues;
  MPI_Request            *rwaits,*swaits;
  PetscErrorCode         ierr;
  PetscInt               i,*indices,*sstartsSends,*sstartsRecvs,nrecvs,nsends,bs;

  PetscFunctionBegin;
  if (mode & SCATTER_REVERSE) {
    to   = (VecScatter_MPI_General*)inctx->fromdata;
    from = (VecScatter_MPI_General*)inctx->todata;
    rwaits   = from->rev_requests;
    swaits   = to->rev_requests;
  } else {
    to   = (VecScatter_MPI_General*)inctx->todata;
    from = (VecScatter_MPI_General*)inctx->fromdata;
    rwaits   = from->requests;
    swaits   = to->requests;
  }
  bs       = to->bs;
  svalues  = to->values;
  nrecvs   = from->n;
  nsends   = to->n;
  indices  = to->indices;
  sstartsSends  = to->starts;
  sstartsRecvs  = from->starts;
  if (x->valid_GPU_array != PETSC_CUSP_UNALLOCATED && (nsends>0 || nrecvs>0))
  {
    if (!inctx->spptr) {
      PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
      PetscInt ns = sstartsSends[nsends],nr = sstartsRecvs[nrecvs];
      /* Here we create indices for both the senders and receivers. */
      ierr = PetscMalloc(ns*sizeof(PetscInt),&tindicesSends);CHKERRQ(ierr);
      ierr = PetscMalloc(nr*sizeof(PetscInt),&tindicesRecvs);CHKERRQ(ierr);

      ierr = PetscMemcpy(tindicesSends,to->indices,ns*sizeof(PetscInt));CHKERRQ(ierr);
      ierr = PetscMemcpy(tindicesRecvs,from->indices,nr*sizeof(PetscInt));CHKERRQ(ierr);

      ierr = PetscSortRemoveDupsInt(&ns,tindicesSends);CHKERRQ(ierr);
      ierr = PetscSortRemoveDupsInt(&nr,tindicesRecvs);CHKERRQ(ierr);

      ierr = PetscMalloc(to->bs*ns*sizeof(PetscInt),&sindicesSends);CHKERRQ(ierr);
      ierr = PetscMalloc(from->bs*nr*sizeof(PetscInt),&sindicesRecvs);CHKERRQ(ierr);

      /* sender indices */
      for (i=0; i<ns; i++) {
        for (k=0; k<to->bs; k++) {
          sindicesSends[i*to->bs+k] = tindicesSends[i]+k;
        }
      }
      ierr = PetscFree(tindicesSends);CHKERRQ(ierr);

      /* receiver indices */
      for (i=0; i<nr; i++) {
        for (k=0; k<from->bs; k++) {
          sindicesRecvs[i*from->bs+k] = tindicesRecvs[i]+k;
        }
      }
      ierr = PetscFree(tindicesRecvs);CHKERRQ(ierr);

      /* create GPU indices, work vectors, ... */
      ierr = PetscCUSPIndicesCreate(ns*to->bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUSPIndices*)&inctx->spptr);CHKERRQ(ierr);
      ierr = PetscFree(sindicesSends);CHKERRQ(ierr);
      ierr = PetscFree(sindicesRecvs);CHKERRQ(ierr);
    }
    /*
     This should be called here.
     ... basically, we launch the copy kernel that takes the scattered data and puts it in a
         a contiguous buffer. Then, this buffer is messaged after the MatMult is called.
     */
#if 0 /* Paul, why did you leave this line commented after writing the note above explaining why it should be called? */
    /* I couldn't make this version run more efficiently. In theory, I would like to do it this way
       since the amount of data transfer between GPU and CPU is reduced. However, gather kernels
       really don't perform very well on the device. Thus, what I do is message (from GPU to CPU) the
       smallest contiguous chunk of the vector containing all those elements needing to be MPI-messaged.
       I would like to leave this code in here for now ... maybe I'll figure out how to do a better
       gather kernel on GPU. */
    ierr = VecCUSPCopySomeToContiguousBufferGPU_Public(x,(PetscCUSPIndices)inctx->spptr);CHKERRQ(ierr);
#endif
    } else {
    ierr = VecGetArrayRead(x,(const PetscScalar**)&xv);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
Exemplo n.º 10
0
/*@
   VecScatterInitializeForGPU - Initializes a generalized scatter from one vector to
 another for GPU based computation.  Effectively, this function creates all the
 necessary indexing buffers and work vectors needed to move data only those data points
 in a vector which need to be communicated across ranks. This is done at the first time
 this function is called. Currently, this only used in the context of the parallel
 SpMV call in MatMult_MPIAIJCUSP (in mpi/mpicusp/mpiaijcusp.cu) or MatMult_MPIAIJCUSPARSE
 (in mpi/mpicusparse/mpiaijcusparse.cu). This function is executed before the call to
 MatMult. This enables the memory transfers to be overlapped with the MatMult SpMV kernel
 call.

   Input Parameters:
+  inctx - scatter context generated by VecScatterCreate()
.  x - the vector from which we scatter
-  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
    SCATTER_FORWARD or SCATTER_REVERSE

  Level: intermediate

.seealso: VecScatterCreate(), VecScatterEnd()
@*/
PetscErrorCode  VecScatterInitializeForGPU(VecScatter inctx,Vec x,ScatterMode mode)
{
  VecScatter_MPI_General *to,*from;
  PetscErrorCode         ierr;
  PetscInt               i,*indices,*sstartsSends,*sstartsRecvs,nrecvs,nsends,bs;
  PetscBool              isSeq1,isSeq2;

  PetscFunctionBegin;
  ierr = VecScatterIsSequential_Private((VecScatter_Common*)inctx->fromdata,&isSeq1);CHKERRQ(ierr);
  ierr = VecScatterIsSequential_Private((VecScatter_Common*)inctx->todata,&isSeq2);CHKERRQ(ierr);
  if (isSeq1 || isSeq2) {
    PetscFunctionReturn(0);
  }
  if (mode & SCATTER_REVERSE) {
    to     = (VecScatter_MPI_General*)inctx->fromdata;
    from   = (VecScatter_MPI_General*)inctx->todata;
  } else {
    to     = (VecScatter_MPI_General*)inctx->todata;
    from   = (VecScatter_MPI_General*)inctx->fromdata;
  }
  bs           = to->bs;
  nrecvs       = from->n;
  nsends       = to->n;
  indices      = to->indices;
  sstartsSends = to->starts;
  sstartsRecvs = from->starts;
  if (x->valid_GPU_array != PETSC_CUSP_UNALLOCATED && (nsends>0 || nrecvs>0)) {
    if (!inctx->spptr) {
      PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
      PetscInt ns = sstartsSends[nsends],nr = sstartsRecvs[nrecvs];
      /* Here we create indices for both the senders and receivers. */
      ierr = PetscMalloc1(ns,&tindicesSends);CHKERRQ(ierr);
      ierr = PetscMalloc1(nr,&tindicesRecvs);CHKERRQ(ierr);

      ierr = PetscMemcpy(tindicesSends,indices,ns*sizeof(PetscInt));CHKERRQ(ierr);
      ierr = PetscMemcpy(tindicesRecvs,from->indices,nr*sizeof(PetscInt));CHKERRQ(ierr);

      ierr = PetscSortRemoveDupsInt(&ns,tindicesSends);CHKERRQ(ierr);
      ierr = PetscSortRemoveDupsInt(&nr,tindicesRecvs);CHKERRQ(ierr);

      ierr = PetscMalloc1(bs*ns,&sindicesSends);CHKERRQ(ierr);
      ierr = PetscMalloc1(from->bs*nr,&sindicesRecvs);CHKERRQ(ierr);

      /* sender indices */
      for (i=0; i<ns; i++) {
        for (k=0; k<bs; k++) sindicesSends[i*bs+k] = tindicesSends[i]+k;
      }
      ierr = PetscFree(tindicesSends);CHKERRQ(ierr);

      /* receiver indices */
      for (i=0; i<nr; i++) {
        for (k=0; k<from->bs; k++) sindicesRecvs[i*from->bs+k] = tindicesRecvs[i]+k;
      }
      ierr = PetscFree(tindicesRecvs);CHKERRQ(ierr);

      /* create GPU indices, work vectors, ... */
      ierr = VecScatterCUSPIndicesCreate_PtoP(ns*bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUSPIndices*)&inctx->spptr);CHKERRQ(ierr);
      ierr = PetscFree(sindicesSends);CHKERRQ(ierr);
      ierr = PetscFree(sindicesRecvs);CHKERRQ(ierr);
    }
  }
  PetscFunctionReturn(0);
}