Beispiel #1
0
static PetscErrorCode ISInvertPermutation_Block(IS is,PetscInt nlocal,IS *isout)
{
    IS_Block       *sub = (IS_Block*)is->data;
    PetscInt       i,*ii,bs,n,*idx = sub->idx;
    PetscMPIInt    size;
    PetscErrorCode ierr;

    PetscFunctionBegin;
    ierr = MPI_Comm_size(PetscObjectComm((PetscObject)is),&size);
    CHKERRQ(ierr);
    ierr = PetscLayoutGetBlockSize(is->map, &bs);
    CHKERRQ(ierr);
    ierr = PetscLayoutGetLocalSize(is->map, &n);
    CHKERRQ(ierr);
    n   /= bs;
    if (size == 1) {
        ierr = PetscMalloc1(n,&ii);
        CHKERRQ(ierr);
        for (i=0; i<n; i++) ii[idx[i]] = i;
        ierr = ISCreateBlock(PETSC_COMM_SELF,bs,n,ii,PETSC_OWN_POINTER,isout);
        CHKERRQ(ierr);
        ierr = ISSetPermutation(*isout);
        CHKERRQ(ierr);
    } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No inversion written yet for block IS");
    PetscFunctionReturn(0);
}
Beispiel #2
0
int main(int argc,char **argv)
{
  PetscErrorCode ierr;
  PetscInt       i,blocks[2],nlocal;
  PetscMPIInt    size,rank;
  PetscScalar    value;
  Vec            x,y;
  IS             is1,is2;
  VecScatter     ctx = 0;
  PetscViewer    subviewer;

  ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
  ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);

  if (size != 2) SETERRQ(PETSC_COMM_SELF,1,"Must run with 2 processors");

  /* create two vectors */
  if (!rank) nlocal = 8;
  else nlocal = 4;
  ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr);
  ierr = VecSetSizes(x,nlocal,12);CHKERRQ(ierr);
  ierr = VecSetFromOptions(x);CHKERRQ(ierr);
  ierr = VecCreate(PETSC_COMM_SELF,&y);CHKERRQ(ierr);
  ierr = VecSetSizes(y,8,PETSC_DECIDE);CHKERRQ(ierr);
  ierr = VecSetFromOptions(y);CHKERRQ(ierr);

  /* create two index sets */
  if (!rank) {
    blocks[0] = 0; blocks[1] = 2;
  } else {
    blocks[0] = 1; blocks[1] = 2;
  }
  ierr = ISCreateBlock(PETSC_COMM_SELF,4,2,blocks,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr);
  ierr = ISCreateStride(PETSC_COMM_SELF,8,0,1,&is2);CHKERRQ(ierr);

  for (i=0; i<12; i++) {
    value = i;
    ierr  = VecSetValues(x,1,&i,&value,INSERT_VALUES);CHKERRQ(ierr);
  }
  ierr = VecAssemblyBegin(x);CHKERRQ(ierr);
  ierr = VecAssemblyEnd(x);CHKERRQ(ierr);

  ierr = VecScatterCreateWithData(x,is1,y,is2,&ctx);CHKERRQ(ierr);
  ierr = VecScatterBegin(ctx,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
  ierr = VecScatterEnd(ctx,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
  ierr = VecScatterDestroy(&ctx);CHKERRQ(ierr);

  ierr = PetscViewerGetSubViewer(PETSC_VIEWER_STDOUT_WORLD,PETSC_COMM_SELF,&subviewer);CHKERRQ(ierr);
  ierr = VecView(y,subviewer);CHKERRQ(ierr);
  ierr = PetscViewerRestoreSubViewer(PETSC_VIEWER_STDOUT_WORLD,PETSC_COMM_SELF,&subviewer);CHKERRQ(ierr);

  ierr = VecDestroy(&x);CHKERRQ(ierr);
  ierr = VecDestroy(&y);CHKERRQ(ierr);
  ierr = ISDestroy(&is1);CHKERRQ(ierr);
  ierr = ISDestroy(&is2);CHKERRQ(ierr);

  ierr = PetscFinalize();
  return ierr;
}
Beispiel #3
0
PetscErrorCode ISDuplicate_Block(IS is,IS *newIS)
{
  PetscErrorCode ierr;
  IS_Block       *sub = (IS_Block*)is->data;

  PetscFunctionBegin;
  ierr = ISCreateBlock(PetscObjectComm((PetscObject)is),is->bs,sub->n,sub->idx,PETSC_COPY_VALUES,newIS);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #4
0
static PetscErrorCode ISOnComm_Block(IS is,MPI_Comm comm,PetscCopyMode mode,IS *newis)
{
  PetscErrorCode ierr;
  IS_Block       *sub = (IS_Block*)is->data;

  PetscFunctionBegin;
  if (mode == PETSC_OWN_POINTER) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_OWN_POINTER");
  ierr = ISCreateBlock(comm,is->bs,sub->n,sub->idx,mode,newis);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #5
0
/*@C
   VecCreateGhostBlockWithArray - Creates a parallel vector with ghost padding on each processor;
   the caller allocates the array space. Indices in the ghost region are based on blocks.

   Collective on MPI_Comm

   Input Parameters:
+  comm - the MPI communicator to use
.  bs - block size
.  n - local vector length
.  N - global vector length (or PETSC_DECIDE to have calculated if n is given)
.  nghost - number of local ghost blocks
.  ghosts - global indices of ghost blocks (or PETSC_NULL if not needed), counts are by block not by index, these do not need to be in increasing order (sorted)
-  array - the space to store the vector values (as long as n + nghost*bs)

   Output Parameter:
.  vv - the global vector representation (without ghost points as part of vector)

   Notes:
   Use VecGhostGetLocalForm() to access the local, ghosted representation
   of the vector.

   n is the local vector size (total local size not the number of blocks) while nghost
   is the number of blocks in the ghost portion, i.e. the number of elements in the ghost
   portion is bs*nghost

   Level: advanced

   Concepts: vectors^creating ghosted
   Concepts: vectors^creating with array

.seealso: VecCreate(), VecGhostGetLocalForm(), VecGhostRestoreLocalForm(),
          VecCreateGhost(), VecCreateSeqWithArray(), VecCreateMPIWithArray(),
          VecCreateGhostWithArray(), VecCreateGhostBlock()

@*/
PetscErrorCode  VecCreateGhostBlockWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,PetscInt nghost,const PetscInt ghosts[],const PetscScalar array[],Vec *vv)
{
  PetscErrorCode         ierr;
  Vec_MPI                *w;
  PetscScalar            *larray;
  IS                     from,to;
  ISLocalToGlobalMapping ltog;
  PetscInt               rstart,i,nb,*indices;

  PetscFunctionBegin;
  *vv = 0;

  if (n == PETSC_DECIDE)      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Must set local size");
  if (nghost == PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Must set local ghost size");
  if (nghost < 0)             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ghost length must be >= 0");
  if (n % bs)                 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Local size must be a multiple of block size");
  ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
  /* Create global representation */
  ierr = VecCreate(comm,vv);CHKERRQ(ierr);
  ierr = VecSetSizes(*vv,n,N);CHKERRQ(ierr);
  ierr = VecSetBlockSize(*vv,bs);CHKERRQ(ierr);
  ierr = VecCreate_MPI_Private(*vv,PETSC_TRUE,nghost*bs,array);CHKERRQ(ierr);
  w    = (Vec_MPI *)(*vv)->data;
  /* Create local representation */
  ierr = VecGetArray(*vv,&larray);CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs,n+bs*nghost,larray,&w->localrep);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(*vv,w->localrep);CHKERRQ(ierr);
  ierr = VecRestoreArray(*vv,&larray);CHKERRQ(ierr);

  /*
       Create scatter context for scattering (updating) ghost values
  */
  ierr = ISCreateBlock(comm,bs,nghost,ghosts,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
  ierr = ISCreateStride(PETSC_COMM_SELF,bs*nghost,n,1,&to);CHKERRQ(ierr);
  ierr = VecScatterCreate(*vv,from,w->localrep,to,&w->localupdate);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(*vv,w->localupdate);CHKERRQ(ierr);
  ierr = ISDestroy(&to);CHKERRQ(ierr);
  ierr = ISDestroy(&from);CHKERRQ(ierr);

  /* set local to global mapping for ghosted vector */
  nb = n/bs;
  ierr = PetscMalloc((nb+nghost)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
  ierr = VecGetOwnershipRange(*vv,&rstart,PETSC_NULL);CHKERRQ(ierr);
  for (i=0; i<nb; i++) {
    indices[i] = rstart + i*bs;
  }
  for (i=0; i<nghost; i++) {
    indices[nb+i] = ghosts[i];
  }
  ierr = ISLocalToGlobalMappingCreate(comm,nb+nghost,indices,PETSC_OWN_POINTER,&ltog);CHKERRQ(ierr);
  ierr = VecSetLocalToGlobalMappingBlock(*vv,ltog);CHKERRQ(ierr);
  ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #6
0
static PetscErrorCode ISOnComm_Block(IS is,MPI_Comm comm,PetscCopyMode mode,IS *newis)
{
  PetscErrorCode ierr;
  IS_Block       *sub = (IS_Block*)is->data;
  PetscInt       bs, n;

  PetscFunctionBegin;
  if (mode == PETSC_OWN_POINTER) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_OWN_POINTER");
  ierr = PetscLayoutGetBlockSize(is->map, &bs);CHKERRQ(ierr);
  ierr = PetscLayoutGetLocalSize(is->map, &n);CHKERRQ(ierr);
  ierr = ISCreateBlock(comm,bs,n/bs,sub->idx,mode,newis);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #7
0
PetscErrorCode ISDuplicate_Block(IS is,IS *newIS)
{
  PetscErrorCode ierr;
  IS_Block       *sub = (IS_Block*)is->data;
  PetscInt        bs, n;

  PetscFunctionBegin;
  ierr = PetscLayoutGetBlockSize(is->map, &bs);CHKERRQ(ierr);
  ierr = PetscLayoutGetLocalSize(is->map, &n);CHKERRQ(ierr);
  n   /= bs;
  ierr = ISCreateBlock(PetscObjectComm((PetscObject)is),bs,n,sub->idx,PETSC_COPY_VALUES,newIS);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #8
0
Datei: ex1.c Projekt: petsc/petsc
static PetscErrorCode ISGetBlockGlobalIS(IS is, Vec vec, PetscInt bs, IS *isBlockGlobal)
{
  const PetscInt *idxin;
  PetscInt       *idxout, i, n, rstart;
  PetscLayout    map;
  PetscErrorCode ierr;

  PetscFunctionBegin;

  ierr = VecGetLayout(vec,&map);CHKERRQ(ierr);
  rstart = map->rstart / bs;
  ierr = ISGetLocalSize(is, &n);CHKERRQ(ierr);
  ierr = PetscMalloc1(n, &idxout);CHKERRQ(ierr);
  ierr = ISGetIndices(is, &idxin);CHKERRQ(ierr);
  for (i = 0; i < n; i++) idxout[i] = rstart + idxin[i];
  ierr = ISRestoreIndices(is, &idxin);CHKERRQ(ierr);
  ierr = ISCreateBlock(PetscObjectComm((PetscObject)vec),bs,n,idxout,PETSC_OWN_POINTER,isBlockGlobal);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #9
0
static PetscErrorCode MatPartitioningApply_PTScotch_Private(MatPartitioning part, PetscBool useND, IS *partitioning)
{
  MPI_Comm                 pcomm,comm;
  MatPartitioning_PTScotch *scotch = (MatPartitioning_PTScotch*)part->data;
  PetscErrorCode           ierr;
  PetscMPIInt              rank;
  Mat                      mat  = part->adj;
  Mat_MPIAdj               *adj = (Mat_MPIAdj*)mat->data;
  PetscBool                flg,distributed;
  PetscBool                proc_weight_flg;
  PetscInt                 i,j,p,bs=1,nold;
  PetscInt                 *NDorder = NULL;
  PetscReal                *vwgttab,deltval;
  SCOTCH_Num               *locals,*velotab,*veloloctab,*edloloctab,vertlocnbr,edgelocnbr,nparts=part->n;

  PetscFunctionBegin;
  ierr = PetscObjectGetComm((PetscObject)part,&pcomm);CHKERRQ(ierr);
  /* Duplicate the communicator to be sure that PTSCOTCH attribute caching does not interfere with PETSc. */
  ierr = MPI_Comm_dup(pcomm,&comm);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
  ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIADJ,&flg);CHKERRQ(ierr);
  if (!flg) {
    /* bs indicates if the converted matrix is "reduced" from the original and hence the
       resulting partition results need to be stretched to match the original matrix */
    nold = mat->rmap->n;
    ierr = MatConvert(mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat);CHKERRQ(ierr);
    if (mat->rmap->n > 0) bs = nold/mat->rmap->n;
    adj  = (Mat_MPIAdj*)mat->data;
  }

  proc_weight_flg = PETSC_TRUE;
  ierr = PetscOptionsGetBool(NULL, NULL, "-mat_partitioning_ptscotch_proc_weight", &proc_weight_flg, NULL);CHKERRQ(ierr);

  ierr = PetscMalloc1(mat->rmap->n+1,&locals);CHKERRQ(ierr);

  if (useND) {
#if defined(PETSC_HAVE_SCOTCH_PARMETIS_V3_NODEND)
    PetscInt    *sizes, *seps, log2size, subd, *level, base = 0;
    PetscMPIInt size;

    ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
    log2size = PetscLog2Real(size);
    subd = PetscPowInt(2,log2size);
    if (subd != size) SETERRQ(comm,PETSC_ERR_SUP,"Only power of 2 communicator sizes");
    ierr = PetscMalloc1(mat->rmap->n,&NDorder);CHKERRQ(ierr);
    ierr = PetscMalloc3(2*size,&sizes,4*size,&seps,size,&level);CHKERRQ(ierr);
    SCOTCH_ParMETIS_V3_NodeND(mat->rmap->range,adj->i,adj->j,&base,NULL,NDorder,sizes,&comm);
    ierr = MatPartitioningSizesToSep_Private(subd,sizes,seps,level);CHKERRQ(ierr);
    for (i=0;i<mat->rmap->n;i++) {
      PetscInt loc;

      ierr = PetscFindInt(NDorder[i],2*subd,seps,&loc);CHKERRQ(ierr);
      if (loc < 0) {
        loc = -(loc+1);
        if (loc%2) { /* part of subdomain */
          locals[i] = loc/2;
        } else {
          ierr = PetscFindInt(NDorder[i],2*(subd-1),seps+2*subd,&loc);CHKERRQ(ierr);
          loc = loc < 0 ? -(loc+1)/2 : loc/2;
          locals[i] = level[loc];
        }
      } else locals[i] = loc/2;
    }
    ierr = PetscFree3(sizes,seps,level);CHKERRQ(ierr);
#else
    SETERRQ(pcomm,PETSC_ERR_SUP,"Need libptscotchparmetis.a compiled with -DSCOTCH_METIS_PREFIX");
#endif
  } else {
    velotab = NULL;
    if (proc_weight_flg) {
      ierr = PetscMalloc1(nparts,&vwgttab);CHKERRQ(ierr);
      ierr = PetscMalloc1(nparts,&velotab);CHKERRQ(ierr);
      for (j=0; j<nparts; j++) {
        if (part->part_weights) vwgttab[j] = part->part_weights[j]*nparts;
        else vwgttab[j] = 1.0;
      }
      for (i=0; i<nparts; i++) {
        deltval = PetscAbsReal(vwgttab[i]-PetscFloorReal(vwgttab[i]+0.5));
        if (deltval>0.01) {
          for (j=0; j<nparts; j++) vwgttab[j] /= deltval;
        }
      }
      for (i=0; i<nparts; i++) velotab[i] = (SCOTCH_Num)(vwgttab[i] + 0.5);
      ierr = PetscFree(vwgttab);CHKERRQ(ierr);
    }

    vertlocnbr = mat->rmap->range[rank+1] - mat->rmap->range[rank];
    edgelocnbr = adj->i[vertlocnbr];
    veloloctab = part->vertex_weights;
    edloloctab = adj->values;

    /* detect whether all vertices are located at the same process in original graph */
    for (p = 0; !mat->rmap->range[p+1] && p < nparts; ++p);
    distributed = (mat->rmap->range[p+1] == mat->rmap->N) ? PETSC_FALSE : PETSC_TRUE;

    if (distributed) {
      SCOTCH_Arch              archdat;
      SCOTCH_Dgraph            grafdat;
      SCOTCH_Dmapping          mappdat;
      SCOTCH_Strat             stradat;

      ierr = SCOTCH_dgraphInit(&grafdat,comm);CHKERRQ(ierr);
      ierr = SCOTCH_dgraphBuild(&grafdat,0,vertlocnbr,vertlocnbr,adj->i,adj->i+1,veloloctab,
                                NULL,edgelocnbr,edgelocnbr,adj->j,NULL,edloloctab);CHKERRQ(ierr);

#if defined(PETSC_USE_DEBUG)
      ierr = SCOTCH_dgraphCheck(&grafdat);CHKERRQ(ierr);
#endif

      ierr = SCOTCH_archInit(&archdat);CHKERRQ(ierr);
      ierr = SCOTCH_stratInit(&stradat);CHKERRQ(ierr);
      ierr = SCOTCH_stratDgraphMapBuild(&stradat,scotch->strategy,nparts,nparts,scotch->imbalance);CHKERRQ(ierr);

      if (velotab) {
        ierr = SCOTCH_archCmpltw(&archdat,nparts,velotab);CHKERRQ(ierr);
      } else {
        ierr = SCOTCH_archCmplt( &archdat,nparts);CHKERRQ(ierr);
      }
      ierr = SCOTCH_dgraphMapInit(&grafdat,&mappdat,&archdat,locals);CHKERRQ(ierr);
      ierr = SCOTCH_dgraphMapCompute(&grafdat,&mappdat,&stradat);CHKERRQ(ierr);

      SCOTCH_dgraphMapExit(&grafdat,&mappdat);
      SCOTCH_archExit(&archdat);
      SCOTCH_stratExit(&stradat);
      SCOTCH_dgraphExit(&grafdat);

    } else if (rank == p) {
      SCOTCH_Graph   grafdat;
      SCOTCH_Strat   stradat;

      ierr = SCOTCH_graphInit(&grafdat);CHKERRQ(ierr);
      ierr = SCOTCH_graphBuild(&grafdat,0,vertlocnbr,adj->i,adj->i+1,veloloctab,NULL,edgelocnbr,adj->j,edloloctab);CHKERRQ(ierr);

#if defined(PETSC_USE_DEBUG)
      ierr = SCOTCH_graphCheck(&grafdat);CHKERRQ(ierr);
#endif

      ierr = SCOTCH_stratInit(&stradat);CHKERRQ(ierr);
      ierr = SCOTCH_stratGraphMapBuild(&stradat,scotch->strategy,nparts,scotch->imbalance);CHKERRQ(ierr);

      ierr = SCOTCH_graphPart(&grafdat,nparts,&stradat,locals);CHKERRQ(ierr);

      SCOTCH_stratExit(&stradat);
      SCOTCH_graphExit(&grafdat);
    }

    ierr = PetscFree(velotab);CHKERRQ(ierr);
  }
  ierr = MPI_Comm_free(&comm);CHKERRQ(ierr);

  if (bs > 1) {
    PetscInt *newlocals;
    ierr = PetscMalloc1(bs*mat->rmap->n,&newlocals);CHKERRQ(ierr);
    for (i=0;i<mat->rmap->n;i++) {
      for (j=0;j<bs;j++) {
        newlocals[bs*i+j] = locals[i];
      }
    }
    ierr = PetscFree(locals);CHKERRQ(ierr);
    ierr = ISCreateGeneral(pcomm,bs*mat->rmap->n,newlocals,PETSC_OWN_POINTER,partitioning);CHKERRQ(ierr);
  } else {
    ierr = ISCreateGeneral(pcomm,mat->rmap->n,locals,PETSC_OWN_POINTER,partitioning);CHKERRQ(ierr);
  }
  if (useND) {
    IS ndis;

    if (bs > 1) {
      ierr = ISCreateBlock(pcomm,bs,mat->rmap->n,NDorder,PETSC_OWN_POINTER,&ndis);CHKERRQ(ierr);
    } else {
      ierr = ISCreateGeneral(pcomm,mat->rmap->n,NDorder,PETSC_OWN_POINTER,&ndis);CHKERRQ(ierr);
    }
    ierr = ISSetPermutation(ndis);CHKERRQ(ierr);
    ierr = PetscObjectCompose((PetscObject)(*partitioning),"_petsc_matpartitioning_ndorder",(PetscObject)ndis);CHKERRQ(ierr);
    ierr = ISDestroy(&ndis);CHKERRQ(ierr);
  }

  if (!flg) {
    ierr = MatDestroy(&mat);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
Beispiel #10
0
PetscErrorCode  DMSetUp_DA_2D(DM da)
{
    DM_DA            *dd = (DM_DA*)da->data;
    const PetscInt   M            = dd->M;
    const PetscInt   N            = dd->N;
    PetscInt         m            = dd->m;
    PetscInt         n            = dd->n;
    const PetscInt   dof          = dd->w;
    const PetscInt   s            = dd->s;
    DMDABoundaryType bx           = dd->bx;
    DMDABoundaryType by           = dd->by;
    DMDAStencilType  stencil_type = dd->stencil_type;
    PetscInt         *lx          = dd->lx;
    PetscInt         *ly          = dd->ly;
    MPI_Comm         comm;
    PetscMPIInt      rank,size;
    PetscInt         xs,xe,ys,ye,x,y,Xs,Xe,Ys,Ye,start,end,IXs,IXe,IYs,IYe;
    PetscInt         up,down,left,right,i,n0,n1,n2,n3,n5,n6,n7,n8,*idx,nn,*idx_cpy;
    const PetscInt   *idx_full;
    PetscInt         xbase,*bases,*ldims,j,x_t,y_t,s_t,base,count;
    PetscInt         s_x,s_y; /* s proportionalized to w */
    PetscInt         sn0 = 0,sn2 = 0,sn6 = 0,sn8 = 0;
    Vec              local,global;
    VecScatter       ltog,gtol;
    IS               to,from,ltogis;
    PetscErrorCode   ierr;

    PetscFunctionBegin;
    if (stencil_type == DMDA_STENCIL_BOX && (bx == DMDA_BOUNDARY_MIRROR || by == DMDA_BOUNDARY_MIRROR)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Mirror boundary and box stencil");
    ierr = PetscObjectGetComm((PetscObject)da,&comm);
    CHKERRQ(ierr);
#if !defined(PETSC_USE_64BIT_INDICES)
    if (((Petsc64bitInt) M)*((Petsc64bitInt) N)*((Petsc64bitInt) dof) > (Petsc64bitInt) PETSC_MPI_INT_MAX) SETERRQ3(comm,PETSC_ERR_INT_OVERFLOW,"Mesh of %D by %D by %D (dof) is too large for 32 bit indices",M,N,dof);
#endif

    if (dof < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Must have 1 or more degrees of freedom per node: %D",dof);
    if (s < 0) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Stencil width cannot be negative: %D",s);

    ierr = MPI_Comm_size(comm,&size);
    CHKERRQ(ierr);
    ierr = MPI_Comm_rank(comm,&rank);
    CHKERRQ(ierr);

    if (m != PETSC_DECIDE) {
        if (m < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in X direction: %D",m);
        else if (m > size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in X direction: %D %d",m,size);
    }
    if (n != PETSC_DECIDE) {
        if (n < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in Y direction: %D",n);
        else if (n > size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in Y direction: %D %d",n,size);
    }

    if (m == PETSC_DECIDE || n == PETSC_DECIDE) {
        if (n != PETSC_DECIDE) {
            m = size/n;
        } else if (m != PETSC_DECIDE) {
            n = size/m;
        } else {
            /* try for squarish distribution */
            m = (PetscInt)(0.5 + PetscSqrtReal(((PetscReal)M)*((PetscReal)size)/((PetscReal)N)));
            if (!m) m = 1;
            while (m > 0) {
                n = size/m;
                if (m*n == size) break;
                m--;
            }
            if (M > N && m < n) {
                PetscInt _m = m;
                m = n;
                n = _m;
            }
        }
        if (m*n != size) SETERRQ(comm,PETSC_ERR_PLIB,"Unable to create partition, check the size of the communicator and input m and n ");
    } else if (m*n != size) SETERRQ(comm,PETSC_ERR_ARG_OUTOFRANGE,"Given Bad partition");

    if (M < m) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in x direction is too fine! %D %D",M,m);
    if (N < n) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in y direction is too fine! %D %D",N,n);

    /*
       Determine locally owned region
       xs is the first local node number, x is the number of local nodes
    */
    if (!lx) {
        ierr = PetscMalloc(m*sizeof(PetscInt), &dd->lx);
        CHKERRQ(ierr);
        lx   = dd->lx;
        for (i=0; i<m; i++) {
            lx[i] = M/m + ((M % m) > i);
        }
    }
    x  = lx[rank % m];
    xs = 0;
    for (i=0; i<(rank % m); i++) {
        xs += lx[i];
    }
#if defined(PETSC_USE_DEBUG)
    left = xs;
    for (i=(rank % m); i<m; i++) {
        left += lx[i];
    }
    if (left != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of lx across processors not equal to M: %D %D",left,M);
#endif

    /*
       Determine locally owned region
       ys is the first local node number, y is the number of local nodes
    */
    if (!ly) {
        ierr = PetscMalloc(n*sizeof(PetscInt), &dd->ly);
        CHKERRQ(ierr);
        ly   = dd->ly;
        for (i=0; i<n; i++) {
            ly[i] = N/n + ((N % n) > i);
        }
    }
    y  = ly[rank/m];
    ys = 0;
    for (i=0; i<(rank/m); i++) {
        ys += ly[i];
    }
#if defined(PETSC_USE_DEBUG)
    left = ys;
    for (i=(rank/m); i<n; i++) {
        left += ly[i];
    }
    if (left != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of ly across processors not equal to N: %D %D",left,N);
#endif

    /*
     check if the scatter requires more than one process neighbor or wraps around
     the domain more than once
    */
    if ((x < s) && ((m > 1) || (bx == DMDA_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local x-width of domain x %D is smaller than stencil width s %D",x,s);
    if ((y < s) && ((n > 1) || (by == DMDA_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local y-width of domain y %D is smaller than stencil width s %D",y,s);
    xe = xs + x;
    ye = ys + y;

    /* determine ghost region (Xs) and region scattered into (IXs)  */
    if (xs-s > 0) {
        Xs = xs - s;
        IXs = xs - s;
    } else {
        if (bx) {
            Xs = xs - s;
        } else {
            Xs = 0;
        }
        IXs = 0;
    }
    if (xe+s <= M) {
        Xe = xe + s;
        IXe = xe + s;
    } else {
        if (bx) {
            Xs = xs - s;
            Xe = xe + s;
        } else {
            Xe = M;
        }
        IXe = M;
    }

    if (bx == DMDA_BOUNDARY_PERIODIC || bx == DMDA_BOUNDARY_MIRROR) {
        IXs = xs - s;
        IXe = xe + s;
        Xs  = xs - s;
        Xe  = xe + s;
    }

    if (ys-s > 0) {
        Ys = ys - s;
        IYs = ys - s;
    } else {
        if (by) {
            Ys = ys - s;
        } else {
            Ys = 0;
        }
        IYs = 0;
    }
    if (ye+s <= N) {
        Ye = ye + s;
        IYe = ye + s;
    } else {
        if (by) {
            Ye = ye + s;
        } else {
            Ye = N;
        }
        IYe = N;
    }

    if (by == DMDA_BOUNDARY_PERIODIC || by == DMDA_BOUNDARY_MIRROR) {
        IYs = ys - s;
        IYe = ye + s;
        Ys  = ys - s;
        Ye  = ye + s;
    }

    /* stencil length in each direction */
    s_x = s;
    s_y = s;

    /* determine starting point of each processor */
    nn       = x*y;
    ierr     = PetscMalloc2(size+1,PetscInt,&bases,size,PetscInt,&ldims);
    CHKERRQ(ierr);
    ierr     = MPI_Allgather(&nn,1,MPIU_INT,ldims,1,MPIU_INT,comm);
    CHKERRQ(ierr);
    bases[0] = 0;
    for (i=1; i<=size; i++) {
        bases[i] = ldims[i-1];
    }
    for (i=1; i<=size; i++) {
        bases[i] += bases[i-1];
    }
    base = bases[rank]*dof;

    /* allocate the base parallel and sequential vectors */
    dd->Nlocal = x*y*dof;
    ierr       = VecCreateMPIWithArray(comm,dof,dd->Nlocal,PETSC_DECIDE,0,&global);
    CHKERRQ(ierr);
    dd->nlocal = (Xe-Xs)*(Ye-Ys)*dof;
    ierr       = VecCreateSeqWithArray(PETSC_COMM_SELF,dof,dd->nlocal,0,&local);
    CHKERRQ(ierr);

    /* generate appropriate vector scatters */
    /* local to global inserts non-ghost point region into global */
    ierr = VecGetOwnershipRange(global,&start,&end);
    CHKERRQ(ierr);
    ierr = ISCreateStride(comm,x*y*dof,start,1,&to);
    CHKERRQ(ierr);

    ierr  = PetscMalloc(x*y*sizeof(PetscInt),&idx);
    CHKERRQ(ierr);
    left  = xs - Xs;
    right = left + x;
    down  = ys - Ys;
    up = down + y;
    count = 0;
    for (i=down; i<up; i++) {
        for (j=left; j<right; j++) {
            idx[count++] = i*(Xe-Xs) + j;
        }
    }

    ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&from);
    CHKERRQ(ierr);
    ierr = VecScatterCreate(local,from,global,to,&ltog);
    CHKERRQ(ierr);
    ierr = PetscLogObjectParent(dd,ltog);
    CHKERRQ(ierr);
    ierr = ISDestroy(&from);
    CHKERRQ(ierr);
    ierr = ISDestroy(&to);
    CHKERRQ(ierr);

    /* global to local must include ghost points within the domain,
       but not ghost points outside the domain that aren't periodic */
    if (stencil_type == DMDA_STENCIL_BOX) {
        count = (IXe-IXs)*(IYe-IYs);
        ierr  = PetscMalloc(count*sizeof(PetscInt),&idx);
        CHKERRQ(ierr);

        left  = IXs - Xs;
        right = left + (IXe-IXs);
        down  = IYs - Ys;
        up = down + (IYe-IYs);
        count = 0;
        for (i=down; i<up; i++) {
            for (j=left; j<right; j++) {
                idx[count++] = j + i*(Xe-Xs);
            }
        }
        ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&to);
        CHKERRQ(ierr);

    } else {
        /* must drop into cross shape region */
        /*       ---------|
                |  top    |
             |---         ---| up
             |   middle      |
             |               |
             ----         ---- down
                | bottom  |
                -----------
             Xs xs        xe Xe */
        count = (ys-IYs)*x + y*(IXe-IXs) + (IYe-ye)*x;
        ierr  = PetscMalloc(count*sizeof(PetscInt),&idx);
        CHKERRQ(ierr);

        left  = xs - Xs;
        right = left + x;
        down  = ys - Ys;
        up = down + y;
        count = 0;
        /* bottom */
        for (i=(IYs-Ys); i<down; i++) {
            for (j=left; j<right; j++) {
                idx[count++] = j + i*(Xe-Xs);
            }
        }
        /* middle */
        for (i=down; i<up; i++) {
            for (j=(IXs-Xs); j<(IXe-Xs); j++) {
                idx[count++] = j + i*(Xe-Xs);
            }
        }
        /* top */
        for (i=up; i<up+IYe-ye; i++) {
            for (j=left; j<right; j++) {
                idx[count++] = j + i*(Xe-Xs);
            }
        }
        ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&to);
        CHKERRQ(ierr);
    }


    /* determine who lies on each side of us stored in    n6 n7 n8
                                                          n3    n5
                                                          n0 n1 n2
    */

    /* Assume the Non-Periodic Case */
    n1 = rank - m;
    if (rank % m) {
        n0 = n1 - 1;
    } else {
        n0 = -1;
    }
    if ((rank+1) % m) {
        n2 = n1 + 1;
        n5 = rank + 1;
        n8 = rank + m + 1;
        if (n8 >= m*n) n8 = -1;
    } else {
        n2 = -1;
        n5 = -1;
        n8 = -1;
    }
    if (rank % m) {
        n3 = rank - 1;
        n6 = n3 + m;
        if (n6 >= m*n) n6 = -1;
    } else {
        n3 = -1;
        n6 = -1;
    }
    n7 = rank + m;
    if (n7 >= m*n) n7 = -1;

    if (bx == DMDA_BOUNDARY_PERIODIC && by == DMDA_BOUNDARY_PERIODIC) {
        /* Modify for Periodic Cases */
        /* Handle all four corners */
        if ((n6 < 0) && (n7 < 0) && (n3 < 0)) n6 = m-1;
        if ((n8 < 0) && (n7 < 0) && (n5 < 0)) n8 = 0;
        if ((n2 < 0) && (n5 < 0) && (n1 < 0)) n2 = size-m;
        if ((n0 < 0) && (n3 < 0) && (n1 < 0)) n0 = size-1;

        /* Handle Top and Bottom Sides */
        if (n1 < 0) n1 = rank + m * (n-1);
        if (n7 < 0) n7 = rank - m * (n-1);
        if ((n3 >= 0) && (n0 < 0)) n0 = size - m + rank - 1;
        if ((n3 >= 0) && (n6 < 0)) n6 = (rank%m)-1;
        if ((n5 >= 0) && (n2 < 0)) n2 = size - m + rank + 1;
        if ((n5 >= 0) && (n8 < 0)) n8 = (rank%m)+1;

        /* Handle Left and Right Sides */
        if (n3 < 0) n3 = rank + (m-1);
        if (n5 < 0) n5 = rank - (m-1);
        if ((n1 >= 0) && (n0 < 0)) n0 = rank-1;
        if ((n1 >= 0) && (n2 < 0)) n2 = rank-2*m+1;
        if ((n7 >= 0) && (n6 < 0)) n6 = rank+2*m-1;
        if ((n7 >= 0) && (n8 < 0)) n8 = rank+1;
    } else if (by == DMDA_BOUNDARY_PERIODIC) {  /* Handle Top and Bottom Sides */
        if (n1 < 0) n1 = rank + m * (n-1);
        if (n7 < 0) n7 = rank - m * (n-1);
        if ((n3 >= 0) && (n0 < 0)) n0 = size - m + rank - 1;
        if ((n3 >= 0) && (n6 < 0)) n6 = (rank%m)-1;
        if ((n5 >= 0) && (n2 < 0)) n2 = size - m + rank + 1;
        if ((n5 >= 0) && (n8 < 0)) n8 = (rank%m)+1;
    } else if (bx == DMDA_BOUNDARY_PERIODIC) { /* Handle Left and Right Sides */
        if (n3 < 0) n3 = rank + (m-1);
        if (n5 < 0) n5 = rank - (m-1);
        if ((n1 >= 0) && (n0 < 0)) n0 = rank-1;
        if ((n1 >= 0) && (n2 < 0)) n2 = rank-2*m+1;
        if ((n7 >= 0) && (n6 < 0)) n6 = rank+2*m-1;
        if ((n7 >= 0) && (n8 < 0)) n8 = rank+1;
    }

    ierr = PetscMalloc(9*sizeof(PetscInt),&dd->neighbors);
    CHKERRQ(ierr);

    dd->neighbors[0] = n0;
    dd->neighbors[1] = n1;
    dd->neighbors[2] = n2;
    dd->neighbors[3] = n3;
    dd->neighbors[4] = rank;
    dd->neighbors[5] = n5;
    dd->neighbors[6] = n6;
    dd->neighbors[7] = n7;
    dd->neighbors[8] = n8;

    if (stencil_type == DMDA_STENCIL_STAR) {
        /* save corner processor numbers */
        sn0 = n0;
        sn2 = n2;
        sn6 = n6;
        sn8 = n8;
        n0  = n2 = n6 = n8 = -1;
    }

    ierr = PetscMalloc((Xe-Xs)*(Ye-Ys)*sizeof(PetscInt),&idx);
    CHKERRQ(ierr);
    ierr = PetscLogObjectMemory(da,(Xe-Xs)*(Ye-Ys)*sizeof(PetscInt));
    CHKERRQ(ierr);

    nn = 0;
    xbase = bases[rank];
    for (i=1; i<=s_y; i++) {
        if (n0 >= 0) { /* left below */
            x_t = lx[n0 % m];
            y_t = ly[(n0/m)];
            s_t = bases[n0] + x_t*y_t - (s_y-i)*x_t - s_x;
            for (j=0; j<s_x; j++) idx[nn++] = s_t++;
        }

        if (n1 >= 0) { /* directly below */
            x_t = x;
            y_t = ly[(n1/m)];
            s_t = bases[n1] + x_t*y_t - (s_y+1-i)*x_t;
            for (j=0; j<x_t; j++) idx[nn++] = s_t++;
        } else if (by == DMDA_BOUNDARY_MIRROR) {
            for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(s_y - i + 1)  + j;
        }

        if (n2 >= 0) { /* right below */
            x_t = lx[n2 % m];
            y_t = ly[(n2/m)];
            s_t = bases[n2] + x_t*y_t - (s_y+1-i)*x_t;
            for (j=0; j<s_x; j++) idx[nn++] = s_t++;
        }
    }

    for (i=0; i<y; i++) {
        if (n3 >= 0) { /* directly left */
            x_t = lx[n3 % m];
            /* y_t = y; */
            s_t = bases[n3] + (i+1)*x_t - s_x;
            for (j=0; j<s_x; j++) idx[nn++] = s_t++;
        } else if (bx == DMDA_BOUNDARY_MIRROR) {
            for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*i + s_x - j;
        }

        for (j=0; j<x; j++) idx[nn++] = xbase++; /* interior */

        if (n5 >= 0) { /* directly right */
            x_t = lx[n5 % m];
            /* y_t = y; */
            s_t = bases[n5] + (i)*x_t;
            for (j=0; j<s_x; j++) idx[nn++] = s_t++;
        } else if (bx == DMDA_BOUNDARY_MIRROR) {
            for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*(i + 1) - 2 - j;
        }
    }

    for (i=1; i<=s_y; i++) {
        if (n6 >= 0) { /* left above */
            x_t = lx[n6 % m];
            /* y_t = ly[(n6/m)]; */
            s_t = bases[n6] + (i)*x_t - s_x;
            for (j=0; j<s_x; j++) idx[nn++] = s_t++;
        }

        if (n7 >= 0) { /* directly above */
            x_t = x;
            /* y_t = ly[(n7/m)]; */
            s_t = bases[n7] + (i-1)*x_t;
            for (j=0; j<x_t; j++) idx[nn++] = s_t++;
        } else if (by == DMDA_BOUNDARY_MIRROR) {
            for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(y - i - 1)  + j;
        }

        if (n8 >= 0) { /* right above */
            x_t = lx[n8 % m];
            /* y_t = ly[(n8/m)]; */
            s_t = bases[n8] + (i-1)*x_t;
            for (j=0; j<s_x; j++) idx[nn++] = s_t++;
        }
    }

    ierr = ISCreateBlock(comm,dof,nn,idx,PETSC_COPY_VALUES,&from);
    CHKERRQ(ierr);
    ierr = VecScatterCreate(global,from,local,to,&gtol);
    CHKERRQ(ierr);
    ierr = PetscLogObjectParent(da,gtol);
    CHKERRQ(ierr);
    ierr = ISDestroy(&to);
    CHKERRQ(ierr);
    ierr = ISDestroy(&from);
    CHKERRQ(ierr);

    if (stencil_type == DMDA_STENCIL_STAR) {
        n0 = sn0;
        n2 = sn2;
        n6 = sn6;
        n8 = sn8;
    }

    if (((stencil_type == DMDA_STENCIL_STAR)  ||
            (bx && bx != DMDA_BOUNDARY_PERIODIC) ||
            (by && by != DMDA_BOUNDARY_PERIODIC))) {
        /*
            Recompute the local to global mappings, this time keeping the
          information about the cross corner processor numbers and any ghosted
          but not periodic indices.
        */
        nn    = 0;
        xbase = bases[rank];
        for (i=1; i<=s_y; i++) {
            if (n0 >= 0) { /* left below */
                x_t = lx[n0 % m];
                y_t = ly[(n0/m)];
                s_t = bases[n0] + x_t*y_t - (s_y-i)*x_t - s_x;
                for (j=0; j<s_x; j++) idx[nn++] = s_t++;
            } else if (xs-Xs > 0 && ys-Ys > 0) {
                for (j=0; j<s_x; j++) idx[nn++] = -1;
            }
            if (n1 >= 0) { /* directly below */
                x_t = x;
                y_t = ly[(n1/m)];
                s_t = bases[n1] + x_t*y_t - (s_y+1-i)*x_t;
                for (j=0; j<x_t; j++) idx[nn++] = s_t++;
            } else if (ys-Ys > 0) {
                if (by == DMDA_BOUNDARY_MIRROR) {
                    for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(s_y - i + 1)  + j;
                } else {
                    for (j=0; j<x; j++) idx[nn++] = -1;
                }
            }
            if (n2 >= 0) { /* right below */
                x_t = lx[n2 % m];
                y_t = ly[(n2/m)];
                s_t = bases[n2] + x_t*y_t - (s_y+1-i)*x_t;
                for (j=0; j<s_x; j++) idx[nn++] = s_t++;
            } else if (Xe-xe> 0 && ys-Ys > 0) {
                for (j=0; j<s_x; j++) idx[nn++] = -1;
            }
        }

        for (i=0; i<y; i++) {
            if (n3 >= 0) { /* directly left */
                x_t = lx[n3 % m];
                /* y_t = y; */
                s_t = bases[n3] + (i+1)*x_t - s_x;
                for (j=0; j<s_x; j++) idx[nn++] = s_t++;
            } else if (xs-Xs > 0) {
                if (bx == DMDA_BOUNDARY_MIRROR) {
                    for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*i + s_x - j;
                } else {
                    for (j=0; j<s_x; j++) idx[nn++] = -1;
                }
            }

            for (j=0; j<x; j++) idx[nn++] = xbase++; /* interior */

            if (n5 >= 0) { /* directly right */
                x_t = lx[n5 % m];
                /* y_t = y; */
                s_t = bases[n5] + (i)*x_t;
                for (j=0; j<s_x; j++) idx[nn++] = s_t++;
            } else if (Xe-xe > 0) {
                if (bx == DMDA_BOUNDARY_MIRROR) {
                    for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*(i + 1) - 2 - j;
                } else {
                    for (j=0; j<s_x; j++) idx[nn++] = -1;
                }
            }
        }

        for (i=1; i<=s_y; i++) {
            if (n6 >= 0) { /* left above */
                x_t = lx[n6 % m];
                /* y_t = ly[(n6/m)]; */
                s_t = bases[n6] + (i)*x_t - s_x;
                for (j=0; j<s_x; j++) idx[nn++] = s_t++;
            } else if (xs-Xs > 0 && Ye-ye > 0) {
                for (j=0; j<s_x; j++) idx[nn++] = -1;
            }
            if (n7 >= 0) { /* directly above */
                x_t = x;
                /* y_t = ly[(n7/m)]; */
                s_t = bases[n7] + (i-1)*x_t;
                for (j=0; j<x_t; j++) idx[nn++] = s_t++;
            } else if (Ye-ye > 0) {
                if (by == DMDA_BOUNDARY_MIRROR) {
                    for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(y - i - 1)  + j;
                } else {
                    for (j=0; j<x; j++) idx[nn++] = -1;
                }
            }
            if (n8 >= 0) { /* right above */
                x_t = lx[n8 % m];
                /* y_t = ly[(n8/m)]; */
                s_t = bases[n8] + (i-1)*x_t;
                for (j=0; j<s_x; j++) idx[nn++] = s_t++;
            } else if (Xe-xe > 0 && Ye-ye > 0) {
                for (j=0; j<s_x; j++) idx[nn++] = -1;
            }
        }
    }
    /*
       Set the local to global ordering in the global vector, this allows use
       of VecSetValuesLocal().
    */
    ierr = ISCreateBlock(comm,dof,nn,idx,PETSC_OWN_POINTER,&ltogis);
    CHKERRQ(ierr);
    ierr = PetscMalloc(nn*dof*sizeof(PetscInt),&idx_cpy);
    CHKERRQ(ierr);
    ierr = PetscLogObjectMemory(da,nn*dof*sizeof(PetscInt));
    CHKERRQ(ierr);
    ierr = ISGetIndices(ltogis, &idx_full);
    CHKERRQ(ierr);
    ierr = PetscMemcpy(idx_cpy,idx_full,nn*dof*sizeof(PetscInt));
    CHKERRQ(ierr);
    ierr = ISRestoreIndices(ltogis, &idx_full);
    CHKERRQ(ierr);
    ierr = ISLocalToGlobalMappingCreateIS(ltogis,&da->ltogmap);
    CHKERRQ(ierr);
    ierr = PetscLogObjectParent(da,da->ltogmap);
    CHKERRQ(ierr);
    ierr = ISDestroy(&ltogis);
    CHKERRQ(ierr);
    ierr = ISLocalToGlobalMappingBlock(da->ltogmap,dd->w,&da->ltogmapb);
    CHKERRQ(ierr);
    ierr = PetscLogObjectParent(da,da->ltogmap);
    CHKERRQ(ierr);

    ierr  = PetscFree2(bases,ldims);
    CHKERRQ(ierr);
    dd->m = m;
    dd->n  = n;
    /* note petsc expects xs/xe/Xs/Xe to be multiplied by #dofs in many places */
    dd->xs = xs*dof;
    dd->xe = xe*dof;
    dd->ys = ys;
    dd->ye = ye;
    dd->zs = 0;
    dd->ze = 1;
    dd->Xs = Xs*dof;
    dd->Xe = Xe*dof;
    dd->Ys = Ys;
    dd->Ye = Ye;
    dd->Zs = 0;
    dd->Ze = 1;

    ierr = VecDestroy(&local);
    CHKERRQ(ierr);
    ierr = VecDestroy(&global);
    CHKERRQ(ierr);

    dd->gtol      = gtol;
    dd->ltog      = ltog;
    dd->idx       = idx_cpy;
    dd->Nl        = nn*dof;
    dd->base      = base;
    da->ops->view = DMView_DA_2d;
    dd->ltol      = NULL;
    dd->ao        = NULL;
    PetscFunctionReturn(0);
}
Beispiel #11
0
int main(int argc,char **argv)
{
  PetscErrorCode ierr;
  PetscMPIInt    size,rank;
  PetscInt       n = 5,i,*blks,bs = 1,m = 2;
  PetscScalar    value;
  Vec            x,y;
  IS             is1,is2;
  VecScatter     ctx = 0;
  PetscViewer    sviewer;

  ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;

  ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetInt(NULL,NULL,"-bs",&bs,NULL);CHKERRQ(ierr);

  ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);

  /* create two vectors */
  ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr);
  ierr = VecSetSizes(x,PETSC_DECIDE,size*bs*n);CHKERRQ(ierr);
  ierr = VecSetFromOptions(x);CHKERRQ(ierr);

  /* create two index sets */
  if (rank < size-1) m = n + 2;
  else m = n;

  ierr = PetscMalloc1(m,&blks);CHKERRQ(ierr);
  blks[0] = n*rank;
  for (i=1; i<m; i++) blks[i] = blks[i-1] + 1;
  ierr = ISCreateBlock(PETSC_COMM_SELF,bs,m,blks,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr);
  ierr = PetscFree(blks);CHKERRQ(ierr);

  ierr = VecCreateSeq(PETSC_COMM_SELF,bs*m,&y);CHKERRQ(ierr);
  ierr = ISCreateStride(PETSC_COMM_SELF,bs*m,0,1,&is2);CHKERRQ(ierr);

  /* each processor inserts the entire vector */
  /* this is redundant but tests assembly */
  for (i=0; i<bs*n*size; i++) {
    value = (PetscScalar) i;
    ierr  = VecSetValues(x,1,&i,&value,INSERT_VALUES);CHKERRQ(ierr);
  }
  ierr = VecAssemblyBegin(x);CHKERRQ(ierr);
  ierr = VecAssemblyEnd(x);CHKERRQ(ierr);
  ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);

  ierr = VecScatterCreate(x,is1,y,is2,&ctx);CHKERRQ(ierr);
  ierr = VecScatterBegin(ctx,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
  ierr = VecScatterEnd(ctx,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);

  ierr = PetscViewerASCIIPushSynchronized(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  ierr = PetscViewerASCIISynchronizedPrintf(PETSC_VIEWER_STDOUT_WORLD,"----\n");CHKERRQ(ierr);
  ierr = PetscViewerGetSubViewer(PETSC_VIEWER_STDOUT_WORLD,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
  ierr = VecView(y,sviewer);CHKERRQ(ierr); fflush(stdout);
  ierr = PetscViewerRestoreSubViewer(PETSC_VIEWER_STDOUT_WORLD,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
  ierr = PetscViewerFlush(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  ierr = PetscViewerASCIIPopSynchronized(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);

  ierr = VecScatterDestroy(&ctx);CHKERRQ(ierr);

  ierr = VecDestroy(&x);CHKERRQ(ierr);
  ierr = VecDestroy(&y);CHKERRQ(ierr);
  ierr = ISDestroy(&is1);CHKERRQ(ierr);
  ierr = ISDestroy(&is2);CHKERRQ(ierr);

  ierr = PetscFinalize();
  return ierr;
}
Beispiel #12
0
uses block index sets\n\n";

/* 'mpiexec -n 3 ./ex2 -vecscatter_type mpi3node' might give incorrect solution due to multiple cores write to the same variable */

#include <petscvec.h>

int main(int argc,char **argv)
{
  PetscErrorCode ierr;
  PetscInt       bs=1,n=5,i,low;
  PetscInt       ix0[3] = {5,7,9},iy0[3] = {1,2,4},ix1[3] = {2,3,4},iy1[3] = {0,1,3};
  PetscMPIInt    size,rank;
  PetscScalar    *array;
  Vec            x,y;
  IS             isx,isy;
  VecScatter     ctx;

  ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
  ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);

  if (size <2) SETERRQ(PETSC_COMM_SELF,1,"Must run more than one processor");

  ierr = PetscOptionsGetInt(NULL,NULL,"-bs",&bs,NULL);CHKERRQ(ierr);
  n    = bs*n;

  /* Create vector x over shared memory */
  ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr);
  ierr = VecSetSizes(x,n,PETSC_DECIDE);CHKERRQ(ierr);
  ierr = VecSetType(x,VECNODE);CHKERRQ(ierr);
  ierr = VecSetFromOptions(x);CHKERRQ(ierr);

  ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
  ierr = VecGetArray(x,&array);CHKERRQ(ierr);
  for (i=0; i<n; i++) {
    array[i] = (PetscScalar)(i + low);
  }
  ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);

  /* Create a sequential vector y */
  ierr = VecCreateSeq(PETSC_COMM_SELF,n,&y);CHKERRQ(ierr);
  ierr = VecSet(y,0.0);CHKERRQ(ierr);

  /* Create two index sets */
  if (!rank) {
    ierr = ISCreateBlock(PETSC_COMM_SELF,bs,3,ix0,PETSC_COPY_VALUES,&isx);CHKERRQ(ierr);
    ierr = ISCreateBlock(PETSC_COMM_SELF,bs,3,iy0,PETSC_COPY_VALUES,&isy);CHKERRQ(ierr);
  } else {
    ierr = ISCreateBlock(PETSC_COMM_SELF,bs,3,ix1,PETSC_COPY_VALUES,&isx);CHKERRQ(ierr);
    ierr = ISCreateBlock(PETSC_COMM_SELF,bs,3,iy1,PETSC_COPY_VALUES,&isy);CHKERRQ(ierr);
  }

  if (rank == 10) {
    ierr = PetscPrintf(PETSC_COMM_SELF,"\n[%d] isx:\n",rank);CHKERRQ(ierr);
    ierr = ISView(isx,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
  }

  ierr = VecScatterCreateWithData(x,isx,y,isy,&ctx);CHKERRQ(ierr);
  ierr = VecScatterSetFromOptions(ctx);CHKERRQ(ierr);

  /* Test forward vecscatter */
  ierr = VecScatterBegin(ctx,x,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
  ierr = VecScatterEnd(ctx,x,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
  if (rank == 0) {
    ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] y:\n",rank);CHKERRQ(ierr);
    ierr = VecView(y,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
  }

  /* Test reverse vecscatter */
  ierr = VecScale(y,-1.0);CHKERRQ(ierr);
  if (rank) {
    ierr = VecScale(y,1.0/(size - 1));CHKERRQ(ierr);
  }

  ierr = VecScatterBegin(ctx,y,x,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
  ierr = VecScatterEnd(ctx,y,x,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
  ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);

  /* Free spaces */
  ierr = VecScatterDestroy(&ctx);CHKERRQ(ierr);
  ierr = ISDestroy(&isx);CHKERRQ(ierr);
  ierr = ISDestroy(&isy);CHKERRQ(ierr);
  ierr = VecDestroy(&x);CHKERRQ(ierr);
  ierr = VecDestroy(&y);CHKERRQ(ierr);
  ierr = PetscFinalize();
  return ierr;
}
Beispiel #13
0
PetscErrorCode FACreate(FA *infa)
{
  FA             fa;
  PetscMPIInt    rank;
  PetscInt       tonglobal,globalrstart,x,nx,y,ny,*tonatural,i,j,*to,*from,offt[3];
  PetscInt       *fromnatural,fromnglobal,nscat,nlocal,cntl1,cntl2,cntl3,*indices;
  PetscErrorCode ierr;

  /* Each DMDA manages the local vector for the portion of region 1, 2, and 3 for that processor
     Each DMDA can belong on any subset (overlapping between DMDA's or not) of processors
     For processes that a particular DMDA does not exist on, the corresponding comm should be set to zero
  */
  DM da1 = 0,da2 = 0,da3 = 0;
  /*
      v1, v2, v3 represent the local vector for a single DMDA
  */
  Vec vl1 = 0,vl2 = 0,vl3 = 0, vg1 = 0, vg2 = 0,vg3 = 0;

  /*
     globalvec and friends represent the global vectors that are used for the PETSc solvers
     localvec represents the concatenation of the (up to) 3 local vectors; vl1, vl2, vl3

     tovec and friends represent intermediate vectors that are ONLY used for setting up the
     final communication patterns. Once this setup routine is complete they are destroyed.
     The tovec  is like the globalvec EXCEPT it has redundant locations for the ghost points
     between regions 2+3 and 1.
  */
  AO          toao,globalao;
  IS          tois,globalis,is;
  Vec         tovec,globalvec,localvec;
  VecScatter  vscat;
  PetscScalar *globalarray,*localarray,*toarray;

  ierr = PetscNew(struct _p_FA,&fa);CHKERRQ(ierr);
  /*
      fa->sw is the stencil width

      fa->p1 is the width of region 1, fa->p2 the width of region 2 (must be the same)
      fa->r1 height of region 1
      fa->r2 height of region 2

      fa->r2 is also the height of region 3-4
      (fa->p1 - fa->p2)/2 is the width of both region 3 and region 4
  */
  fa->p1  = 24;
  fa->p2  = 15;
  fa->r1  = 6;
  fa->r2  = 6;
  fa->sw  = 1;
  fa->r1g = fa->r1 + fa->sw;
  fa->r2g = fa->r2 + fa->sw;

  ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);

  fa->comm[0] = PETSC_COMM_WORLD;
  fa->comm[1] = PETSC_COMM_WORLD;
  fa->comm[2] = PETSC_COMM_WORLD;
  /* Test case with different communicators */
  /* Normally one would use MPI_Comm routines to build MPI communicators on which you wish to partition the DMDAs*/
  /*
  if (!rank) {
    fa->comm[0] = PETSC_COMM_SELF;
    fa->comm[1] = 0;
    fa->comm[2] = 0;
  } else if (rank == 1) {
    fa->comm[0] = 0;
    fa->comm[1] = PETSC_COMM_SELF;
    fa->comm[2] = 0;
  } else {
    fa->comm[0] = 0;
    fa->comm[1] = 0;
    fa->comm[2] = PETSC_COMM_SELF;
  } */

  if (fa->p2 > fa->p1 - 3) SETERRQ(PETSC_COMM_SELF,1,"Width of region fa->p2 must be at least 3 less then width of region 1");
  if (!((fa->p2 - fa->p1) % 2)) SETERRQ(PETSC_COMM_SELF,1,"width of region 3 must NOT be divisible by 2");

  if (fa->comm[1]) {
    ierr = DMDACreate2d(fa->comm[1],DMDA_BOUNDARY_PERIODIC,DMDA_BOUNDARY_NONE,DMDA_STENCIL_BOX,fa->p2,fa->r2g,PETSC_DECIDE,PETSC_DECIDE,1,fa->sw,NULL,NULL,&da2);CHKERRQ(ierr);
    ierr = DMGetLocalVector(da2,&vl2);CHKERRQ(ierr);
    ierr = DMGetGlobalVector(da2,&vg2);CHKERRQ(ierr);
  }
  if (fa->comm[2]) {
    ierr = DMDACreate2d(fa->comm[2],DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_STENCIL_BOX,fa->p1-fa->p2,fa->r2g,PETSC_DECIDE,PETSC_DECIDE,1,fa->sw,NULL,NULL,&da3);CHKERRQ(ierr);
    ierr = DMGetLocalVector(da3,&vl3);CHKERRQ(ierr);
    ierr = DMGetGlobalVector(da3,&vg3);CHKERRQ(ierr);
  }
  if (fa->comm[0]) {
    ierr = DMDACreate2d(fa->comm[0],DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_STENCIL_BOX,fa->p1,fa->r1g,PETSC_DECIDE,PETSC_DECIDE,1,fa->sw,NULL,NULL,&da1);CHKERRQ(ierr);
    ierr = DMGetLocalVector(da1,&vl1);CHKERRQ(ierr);
    ierr = DMGetGlobalVector(da1,&vg1);CHKERRQ(ierr);
  }

  /* count the number of unknowns owned on each processor and determine the starting point of each processors ownership
     for global vector with redundancy */
  tonglobal = 0;
  if (fa->comm[1]) {
    ierr       = DMDAGetCorners(da2,&x,&y,0,&nx,&ny,0);CHKERRQ(ierr);
    tonglobal += nx*ny;
  }
  if (fa->comm[2]) {
    ierr       = DMDAGetCorners(da3,&x,&y,0,&nx,&ny,0);CHKERRQ(ierr);
    tonglobal += nx*ny;
  }
  if (fa->comm[0]) {
    ierr       = DMDAGetCorners(da1,&x,&y,0,&nx,&ny,0);CHKERRQ(ierr);
    tonglobal += nx*ny;
  }
  ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] Number of unknowns owned %d\n",rank,tonglobal);CHKERRQ(ierr);
  ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD);CHKERRQ(ierr);

  /* Get tonatural number for each node */
  ierr      = PetscMalloc((tonglobal+1)*sizeof(PetscInt),&tonatural);CHKERRQ(ierr);
  tonglobal = 0;
  if (fa->comm[1]) {
    ierr = DMDAGetCorners(da2,&x,&y,0,&nx,&ny,0);CHKERRQ(ierr);
    for (j=0; j<ny; j++) {
      for (i=0; i<nx; i++) {
        tonatural[tonglobal++] = (fa->p1 - fa->p2)/2 + x + i + fa->p1*(y + j);
      }
    }
  }
  if (fa->comm[2]) {
    ierr = DMDAGetCorners(da3,&x,&y,0,&nx,&ny,0);CHKERRQ(ierr);
    for (j=0; j<ny; j++) {
      for (i=0; i<nx; i++) {
        if (x + i < (fa->p1 - fa->p2)/2) tonatural[tonglobal++] = x + i + fa->p1*(y + j);
        else tonatural[tonglobal++] = fa->p2 + x + i + fa->p1*(y + j);
      }
    }
  }
  if (fa->comm[0]) {
    ierr = DMDAGetCorners(da1,&x,&y,0,&nx,&ny,0);CHKERRQ(ierr);
    for (j=0; j<ny; j++) {
      for (i=0; i<nx; i++) {
        tonatural[tonglobal++] = fa->p1*fa->r2g + x + i + fa->p1*(y + j);
      }
    }
  }
  /*  ierr = PetscIntView(tonglobal,tonatural,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */
  ierr = AOCreateBasic(PETSC_COMM_WORLD,tonglobal,tonatural,0,&toao);CHKERRQ(ierr);
  ierr = PetscFree(tonatural);CHKERRQ(ierr);

  /* count the number of unknowns owned on each processor and determine the starting point of each processors ownership
     for global vector without redundancy */
  fromnglobal = 0;
  fa->offg[1] = 0;
  offt[1]     = 0;
  if (fa->comm[1]) {
    ierr    = DMDAGetCorners(da2,&x,&y,0,&nx,&ny,0);CHKERRQ(ierr);
    offt[2] = nx*ny;
    if (y+ny == fa->r2g) ny--;    /* includes the ghost points on the upper side */
    fromnglobal += nx*ny;
    fa->offg[2]  = fromnglobal;
  } else {
    offt[2]     = 0;
    fa->offg[2] = 0;
  }
  if (fa->comm[2]) {
    ierr    = DMDAGetCorners(da3,&x,&y,0,&nx,&ny,0);CHKERRQ(ierr);
    offt[0] = offt[2] + nx*ny;
    if (y+ny == fa->r2g) ny--;    /* includes the ghost points on the upper side */
    fromnglobal += nx*ny;
    fa->offg[0]  = fromnglobal;
  } else {
    offt[0]     = offt[2];
    fa->offg[0] = fromnglobal;
  }
  if (fa->comm[0]) {
    ierr = DMDAGetCorners(da1,&x,&y,0,&nx,&ny,0);CHKERRQ(ierr);
    if (y == 0) ny--;    /* includes the ghost points on the lower side */
    fromnglobal += nx*ny;
  }
  ierr          = MPI_Scan(&fromnglobal,&globalrstart,1,MPIU_INT,MPI_SUM,PETSC_COMM_WORLD);CHKERRQ(ierr);
  globalrstart -= fromnglobal;
  ierr          = PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] Number of unknowns owned %d\n",rank,fromnglobal);CHKERRQ(ierr);
  ierr          = PetscSynchronizedFlush(PETSC_COMM_WORLD);CHKERRQ(ierr);

  /* Get fromnatural number for each node */
  ierr        = PetscMalloc((fromnglobal+1)*sizeof(PetscInt),&fromnatural);CHKERRQ(ierr);
  fromnglobal = 0;
  if (fa->comm[1]) {
    ierr = DMDAGetCorners(da2,&x,&y,0,&nx,&ny,0);CHKERRQ(ierr);
    if (y+ny == fa->r2g) ny--;    /* includes the ghost points on the upper side */
    fa->xg[1] = x; fa->yg[1] = y; fa->mg[1] = nx; fa->ng[1] = ny;
    ierr      = DMDAGetGhostCorners(da2,&fa->xl[1],&fa->yl[1],0,&fa->ml[1],&fa->nl[1],0);CHKERRQ(ierr);
    for (j=0; j<ny; j++) {
      for (i=0; i<nx; i++) {
        fromnatural[fromnglobal++] = (fa->p1 - fa->p2)/2 + x + i + fa->p1*(y + j);
      }
    }
  }
  if (fa->comm[2]) {
    ierr = DMDAGetCorners(da3,&x,&y,0,&nx,&ny,0);CHKERRQ(ierr);
    if (y+ny == fa->r2g) ny--;    /* includes the ghost points on the upper side */
    fa->xg[2] = x; fa->yg[2] = y; fa->mg[2] = nx; fa->ng[2] = ny;
    ierr      = DMDAGetGhostCorners(da3,&fa->xl[2],&fa->yl[2],0,&fa->ml[2],&fa->nl[2],0);CHKERRQ(ierr);
    for (j=0; j<ny; j++) {
      for (i=0; i<nx; i++) {
        if (x + i < (fa->p1 - fa->p2)/2) fromnatural[fromnglobal++] = x + i + fa->p1*(y + j);
        else fromnatural[fromnglobal++] = fa->p2 + x + i + fa->p1*(y + j);
      }
    }
  }
  if (fa->comm[0]) {
    ierr = DMDAGetCorners(da1,&x,&y,0,&nx,&ny,0);CHKERRQ(ierr);
    if (y == 0) ny--;    /* includes the ghost points on the lower side */
    else y--;
    fa->xg[0] = x; fa->yg[0] = y; fa->mg[0] = nx; fa->ng[0] = ny;
    ierr      = DMDAGetGhostCorners(da1,&fa->xl[0],&fa->yl[0],0,&fa->ml[0],&fa->nl[0],0);CHKERRQ(ierr);
    for (j=0; j<ny; j++) {
      for (i=0; i<nx; i++) {
        fromnatural[fromnglobal++] = fa->p1*fa->r2 + x + i + fa->p1*(y + j);
      }
    }
  }
  /*ierr = PetscIntView(fromnglobal,fromnatural,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);*/
  ierr = AOCreateBasic(PETSC_COMM_WORLD,fromnglobal,fromnatural,0,&globalao);CHKERRQ(ierr);
  ierr = PetscFree(fromnatural);CHKERRQ(ierr);

  /* ---------------------------------------------------*/
  /* Create the scatter that updates 1 from 2 and 3 and 3 and 2 from 1 */
  /* currently handles stencil width of 1 ONLY */
  ierr  = PetscMalloc(tonglobal*sizeof(PetscInt),&to);CHKERRQ(ierr);
  ierr  = PetscMalloc(tonglobal*sizeof(PetscInt),&from);CHKERRQ(ierr);
  nscat = 0;
  if (fa->comm[1]) {
    ierr = DMDAGetCorners(da2,&x,&y,0,&nx,&ny,0);CHKERRQ(ierr);
    for (j=0; j<ny; j++) {
      for (i=0; i<nx; i++) {
        to[nscat] = from[nscat] = (fa->p1 - fa->p2)/2 + x + i + fa->p1*(y + j);nscat++;
      }
    }
  }
  if (fa->comm[2]) {
    ierr = DMDAGetCorners(da3,&x,&y,0,&nx,&ny,0);CHKERRQ(ierr);
    for (j=0; j<ny; j++) {
      for (i=0; i<nx; i++) {
        if (x + i < (fa->p1 - fa->p2)/2) {
          to[nscat] = from[nscat] = x + i + fa->p1*(y + j);nscat++;
        } else {
          to[nscat] = from[nscat] = fa->p2 + x + i + fa->p1*(y + j);nscat++;
        }
      }
    }
  }
  if (fa->comm[0]) {
    ierr = DMDAGetCorners(da1,&x,&y,0,&nx,&ny,0);CHKERRQ(ierr);
    for (j=0; j<ny; j++) {
      for (i=0; i<nx; i++) {
        to[nscat]     = fa->p1*fa->r2g + x + i + fa->p1*(y + j);
        from[nscat++] = fa->p1*(fa->r2 - 1) + x + i + fa->p1*(y + j);
      }
    }
  }
  ierr = AOApplicationToPetsc(toao,nscat,to);CHKERRQ(ierr);
  ierr = AOApplicationToPetsc(globalao,nscat,from);CHKERRQ(ierr);
  ierr = ISCreateGeneral(PETSC_COMM_WORLD,nscat,to,PETSC_COPY_VALUES,&tois);CHKERRQ(ierr);
  ierr = ISCreateGeneral(PETSC_COMM_WORLD,nscat,from,PETSC_COPY_VALUES,&globalis);CHKERRQ(ierr);
  ierr = PetscFree(to);CHKERRQ(ierr);
  ierr = PetscFree(from);CHKERRQ(ierr);
  ierr = VecCreateMPI(PETSC_COMM_WORLD,tonglobal,PETSC_DETERMINE,&tovec);CHKERRQ(ierr);
  ierr = VecCreateMPI(PETSC_COMM_WORLD,fromnglobal,PETSC_DETERMINE,&globalvec);CHKERRQ(ierr);
  ierr = VecScatterCreate(globalvec,globalis,tovec,tois,&vscat);CHKERRQ(ierr);
  ierr = ISDestroy(&tois);CHKERRQ(ierr);
  ierr = ISDestroy(&globalis);CHKERRQ(ierr);
  ierr = AODestroy(&globalao);CHKERRQ(ierr);
  ierr = AODestroy(&toao);CHKERRQ(ierr);

  /* fill up global vector without redundant values with PETSc global numbering */
  ierr = VecGetArray(globalvec,&globalarray);CHKERRQ(ierr);
  for (i=0; i<fromnglobal; i++) {
    globalarray[i] = globalrstart + i;
  }
  ierr = VecRestoreArray(globalvec,&globalarray);CHKERRQ(ierr);

  /* scatter PETSc global indices to redundant valueed array */
  ierr = VecScatterBegin(vscat,globalvec,tovec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
  ierr = VecScatterEnd(vscat,globalvec,tovec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);

  /* Create local vector that is the concatenation of the local vectors */
  nlocal = 0;
  cntl1  = cntl2 = cntl3 = 0;
  if (fa->comm[1]) {
    ierr    = VecGetSize(vl2,&cntl2);CHKERRQ(ierr);
    nlocal += cntl2;
  }
  if (fa->comm[2]) {
    ierr    = VecGetSize(vl3,&cntl3);CHKERRQ(ierr);
    nlocal += cntl3;
  }
  if (fa->comm[0]) {
    ierr    = VecGetSize(vl1,&cntl1);CHKERRQ(ierr);
    nlocal += cntl1;
  }
  fa->offl[0] = cntl2 + cntl3;
  fa->offl[1] = 0;
  fa->offl[2] = cntl2;
  ierr        = VecCreateSeq(PETSC_COMM_SELF,nlocal,&localvec);CHKERRQ(ierr);

  /* cheat so that  vl1, vl2, vl3 shared array memory with localvec */
  ierr = VecGetArray(localvec,&localarray);CHKERRQ(ierr);
  ierr = VecGetArray(tovec,&toarray);CHKERRQ(ierr);
  if (fa->comm[1]) {
    ierr = VecPlaceArray(vl2,localarray+fa->offl[1]);CHKERRQ(ierr);
    ierr = VecPlaceArray(vg2,toarray+offt[1]);CHKERRQ(ierr);
    ierr = DMGlobalToLocalBegin(da2,vg2,INSERT_VALUES,vl2);CHKERRQ(ierr);
    ierr = DMGlobalToLocalEnd(da2,vg2,INSERT_VALUES,vl2);CHKERRQ(ierr);
    ierr = DMRestoreGlobalVector(da2,&vg2);CHKERRQ(ierr);
  }
  if (fa->comm[2]) {
    ierr = VecPlaceArray(vl3,localarray+fa->offl[2]);CHKERRQ(ierr);
    ierr = VecPlaceArray(vg3,toarray+offt[2]);CHKERRQ(ierr);
    ierr = DMGlobalToLocalBegin(da3,vg3,INSERT_VALUES,vl3);CHKERRQ(ierr);
    ierr = DMGlobalToLocalEnd(da3,vg3,INSERT_VALUES,vl3);CHKERRQ(ierr);
    ierr = DMRestoreGlobalVector(da3,&vg3);CHKERRQ(ierr);
  }
  if (fa->comm[0]) {
    ierr = VecPlaceArray(vl1,localarray+fa->offl[0]);CHKERRQ(ierr);
    ierr = VecPlaceArray(vg1,toarray+offt[0]);CHKERRQ(ierr);
    ierr = DMGlobalToLocalBegin(da1,vg1,INSERT_VALUES,vl1);CHKERRQ(ierr);
    ierr = DMGlobalToLocalEnd(da1,vg1,INSERT_VALUES,vl1);CHKERRQ(ierr);
    ierr = DMRestoreGlobalVector(da1,&vg1);CHKERRQ(ierr);
  }
  ierr = VecRestoreArray(localvec,&localarray);CHKERRQ(ierr);
  ierr = VecRestoreArray(tovec,&toarray);CHKERRQ(ierr);

  /* no longer need the redundant vector and VecScatter to it */
  ierr = VecScatterDestroy(&vscat);CHKERRQ(ierr);
  ierr = VecDestroy(&tovec);CHKERRQ(ierr);

  /* Create final scatter that goes directly from globalvec to localvec */
  /* this is the one to be used in the application code */
  ierr = PetscMalloc(nlocal*sizeof(PetscInt),&indices);CHKERRQ(ierr);
  ierr = VecGetArray(localvec,&localarray);CHKERRQ(ierr);
  for (i=0; i<nlocal; i++) {
    indices[i] = (PetscInt) (localarray[i]);
  }
  ierr = VecRestoreArray(localvec,&localarray);CHKERRQ(ierr);
  ierr = ISCreateBlock(PETSC_COMM_WORLD,2,nlocal,indices,PETSC_COPY_VALUES,&is);CHKERRQ(ierr);
  ierr = PetscFree(indices);CHKERRQ(ierr);

  ierr = VecCreateSeq(PETSC_COMM_SELF,2*nlocal,&fa->l);CHKERRQ(ierr);
  ierr = VecCreateMPI(PETSC_COMM_WORLD,2*fromnglobal,PETSC_DETERMINE,&fa->g);CHKERRQ(ierr);

  ierr = VecScatterCreate(fa->g,is,fa->l,NULL,&fa->vscat);CHKERRQ(ierr);
  ierr = ISDestroy(&is);CHKERRQ(ierr);

  ierr = VecDestroy(&globalvec);CHKERRQ(ierr);
  ierr = VecDestroy(&localvec);CHKERRQ(ierr);
  if (fa->comm[0]) {
    ierr = DMRestoreLocalVector(da1,&vl1);CHKERRQ(ierr);
    ierr = DMDestroy(&da1);CHKERRQ(ierr);
  }
  if (fa->comm[1]) {
    ierr = DMRestoreLocalVector(da2,&vl2);CHKERRQ(ierr);
    ierr = DMDestroy(&da2);CHKERRQ(ierr);
  }
  if (fa->comm[2]) {
    ierr = DMRestoreLocalVector(da3,&vl3);CHKERRQ(ierr);
    ierr = DMDestroy(&da3);CHKERRQ(ierr);
  }
  *infa = fa;
  PetscFunctionReturn(0);
}
Beispiel #14
0
PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
{
  Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
  Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ*)(sbaij->B->data);
  PetscErrorCode ierr;
  PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
  PetscInt       bs  = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
  IS             from,to;
  Vec            gvec;
  PetscMPIInt    rank   =sbaij->rank,lsize,size=sbaij->size;
  PetscInt       *owners=sbaij->rangebs,*ec_owner,k;
  const PetscInt *sowners;
  PetscScalar    *ptr;

  PetscFunctionBegin;
  ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr);

  /* For the first stab we make an array as long as the number of columns */
  /* mark those columns that are in sbaij->B */
  ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr);
  for (i=0; i<mbs; i++) {
    for (j=0; j<B->ilen[i]; j++) {
      if (!indices[aj[B->i[i] + j]]) ec++;
      indices[aj[B->i[i] + j]] = 1;
    }
  }

  /* form arrays of columns we need */
  ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
  ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr);

  ec = 0;
  for (j=0; j<size; j++) {
    for (i=owners[j]; i<owners[j+1]; i++) {
      if (indices[i]) {
        garray[ec]   = i;
        ec_owner[ec] = j;
        ec++;
      }
    }
  }

  /* make indices now point into garray */
  for (i=0; i<ec; i++) indices[garray[i]] = i;

  /* compact out the extra columns in B */
  for (i=0; i<mbs; i++) {
    for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
  }
  B->nbs = ec;

  sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs;

  ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr);
  ierr = PetscFree(indices);CHKERRQ(ierr);

  /* create local vector that is used to scatter into */
  ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);

  /* create two temporary index sets for building scatter-gather */
  ierr = PetscMalloc1(2*ec,&stmp);CHKERRQ(ierr);
  ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
  for (i=0; i<ec; i++) stmp[i] = i;
  ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);

  /* generate the scatter context
     -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
  ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
  ierr = VecScatterCreateWithData(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
  ierr = VecDestroy(&gvec);CHKERRQ(ierr);

  sbaij->garray = garray;

  ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);CHKERRQ(ierr);
  ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);CHKERRQ(ierr);
  ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
  ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);

  ierr = ISDestroy(&from);CHKERRQ(ierr);
  ierr = ISDestroy(&to);CHKERRQ(ierr);

  /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
  lsize = (mbs + ec)*bs;
  ierr  = VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
  ierr  = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
  ierr  = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);

  ierr = VecGetOwnershipRanges(sbaij->slvec0,&sowners);CHKERRQ(ierr);

  /* x index in the IS sfrom */
  for (i=0; i<ec; i++) {
    j          = ec_owner[i];
    sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]);
  }
  /* b index in the IS sfrom */
  k = sowners[rank]/bs + mbs;
  for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
  ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);

  /* x index in the IS sto */
  k = sowners[rank]/bs + mbs;
  for (i=0; i<ec; i++) stmp[i] = (k + i);
  /* b index in the IS sto */
  for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];

  ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);

  ierr = VecScatterCreateWithData(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);

  ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
  ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
  ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);

  ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
  ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);

  ierr = PetscFree(stmp);CHKERRQ(ierr);
  ierr = MPI_Barrier(PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);

  ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);CHKERRQ(ierr);
  ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);CHKERRQ(ierr);
  ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);CHKERRQ(ierr);
  ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);CHKERRQ(ierr);
  ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);CHKERRQ(ierr);
  ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);CHKERRQ(ierr);
  ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
  ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);

  ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
  ierr = ISDestroy(&from);CHKERRQ(ierr);
  ierr = ISDestroy(&to);CHKERRQ(ierr);
  ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #15
0
Datei: ex1.c Projekt: petsc/petsc
uses block index sets\n\n";

#include <petscvec.h>

int main(int argc,char **argv)
{
  PetscErrorCode ierr;
  PetscInt       bs=1,n=5,N,i,low;
  PetscInt       ix0[3] = {5,7,9},iy0[3] = {1,2,4},ix1[3] = {2,3,1},iy1[3] = {0,3,9};
  PetscMPIInt    size,rank;
  PetscScalar    *array;
  Vec            x,x1,y;
  IS             isx,isy;
  VecScatter     ctx;
  VecScatterType type;
  PetscBool      flg;

  ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
  ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);

  if (size <2) SETERRQ(PETSC_COMM_SELF,1,"Must run more than one processor");

  ierr = PetscOptionsGetInt(NULL,NULL,"-bs",&bs,NULL);CHKERRQ(ierr);
  n    = bs*n;

  /* Create vector x over shared memory */
  ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr);
  ierr = VecSetSizes(x,n,PETSC_DECIDE);CHKERRQ(ierr);
  ierr = VecSetType(x,VECNODE);CHKERRQ(ierr);
  ierr = VecSetFromOptions(x);CHKERRQ(ierr);

  ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
  ierr = VecGetArray(x,&array);CHKERRQ(ierr);
  for (i=0; i<n; i++) {
    array[i] = (PetscScalar)(i + low);
  }
  ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);
  /* ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */

  /* Test some vector functions */
  ierr = VecAssemblyBegin(x);CHKERRQ(ierr);
  ierr = VecAssemblyEnd(x);CHKERRQ(ierr);

  ierr = VecGetSize(x,&N);CHKERRQ(ierr);
  ierr = VecGetLocalSize(x,&n);CHKERRQ(ierr);

  ierr = VecDuplicate(x,&x1);CHKERRQ(ierr);
  ierr = VecCopy(x,x1);CHKERRQ(ierr);
  ierr = VecEqual(x,x1,&flg);CHKERRQ(ierr);
  if (!flg) SETERRQ(PetscObjectComm((PetscObject)x),PETSC_ERR_ARG_WRONG,"x1 != x");

  ierr = VecScale(x1,2.0);CHKERRQ(ierr);
  ierr = VecSet(x1,10.0);CHKERRQ(ierr);
  /* ierr = VecView(x1,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */

  /* Create vector y over shared memory */
  ierr = VecCreate(PETSC_COMM_WORLD,&y);CHKERRQ(ierr);
  ierr = VecSetSizes(y,n,PETSC_DECIDE);CHKERRQ(ierr);
  ierr = VecSetType(y,VECNODE);CHKERRQ(ierr);
  ierr = VecSetFromOptions(y);CHKERRQ(ierr);
  ierr = VecGetArray(y,&array);CHKERRQ(ierr);
  for (i=0; i<n; i++) {
    array[i] = -(PetscScalar) (i + 100*rank);
  }
  ierr = VecRestoreArray(y,&array);CHKERRQ(ierr);
  ierr = VecAssemblyBegin(y);CHKERRQ(ierr);
  ierr = VecAssemblyEnd(y);CHKERRQ(ierr);
  /* ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */

  /* Create two index sets */
  if (!rank) {
    ierr = ISCreateBlock(PETSC_COMM_SELF,bs,3,ix0,PETSC_COPY_VALUES,&isx);CHKERRQ(ierr);
    ierr = ISCreateBlock(PETSC_COMM_SELF,bs,3,iy0,PETSC_COPY_VALUES,&isy);CHKERRQ(ierr);
  } else {
    ierr = ISCreateBlock(PETSC_COMM_SELF,bs,3,ix1,PETSC_COPY_VALUES,&isx);CHKERRQ(ierr);
    ierr = ISCreateBlock(PETSC_COMM_SELF,bs,3,iy1,PETSC_COPY_VALUES,&isy);CHKERRQ(ierr);
  }

  if (rank == 10) {
    ierr = PetscPrintf(PETSC_COMM_SELF,"\n[%d] isx:\n",rank);CHKERRQ(ierr);
    ierr = ISView(isx,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
    ierr = PetscPrintf(PETSC_COMM_SELF,"\n[%d] isy:\n",rank);CHKERRQ(ierr);
    ierr = ISView(isy,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
  }

  /* Create Vector scatter */
  ierr = VecScatterCreate(x,isx,y,isy,&ctx);CHKERRQ(ierr);
  ierr = VecScatterSetFromOptions(ctx);CHKERRQ(ierr);
  ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
  ierr = PetscPrintf(PETSC_COMM_WORLD,"scatter type %s\n",type);CHKERRQ(ierr);

  /* Test forward vecscatter */
  ierr = VecSet(y,0.0);CHKERRQ(ierr);
  ierr = VecScatterBegin(ctx,x,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
  ierr = VecScatterEnd(ctx,x,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
  ierr = PetscPrintf(PETSC_COMM_WORLD,"\nSCATTER_FORWARD y:\n");CHKERRQ(ierr);
  ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);

  /* Test reverse vecscatter */
  ierr = VecSet(x,0.0);CHKERRQ(ierr);
  ierr = VecSet(y,0.0);CHKERRQ(ierr);
  ierr = VecGetOwnershipRange(y,&low,NULL);CHKERRQ(ierr);
  ierr = VecGetArray(y,&array);CHKERRQ(ierr);
  for (i=0; i<n; i++) {
    array[i] = (PetscScalar)(i+ low);
  }
  ierr = VecRestoreArray(y,&array);CHKERRQ(ierr);
  ierr = VecScatterBegin(ctx,y,x,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
  ierr = VecScatterEnd(ctx,y,x,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
  ierr = PetscPrintf(PETSC_COMM_WORLD,"\nSCATTER_REVERSE x:\n");CHKERRQ(ierr);
  ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);

  /* Free objects */
  ierr = VecScatterDestroy(&ctx);CHKERRQ(ierr);
  ierr = ISDestroy(&isx);CHKERRQ(ierr);
  ierr = ISDestroy(&isy);CHKERRQ(ierr);
  ierr = VecDestroy(&x);CHKERRQ(ierr);
  ierr = VecDestroy(&x1);CHKERRQ(ierr);
  ierr = VecDestroy(&y);CHKERRQ(ierr);
  ierr = PetscFinalize();
  return ierr;
}
Beispiel #16
0
static PetscErrorCode MatPartitioningApply_Parmetis_Private(MatPartitioning part, PetscBool useND, IS *partitioning)
{
  MatPartitioning_Parmetis *pmetis = (MatPartitioning_Parmetis*)part->data;
  PetscErrorCode           ierr;
  PetscInt                 *locals = NULL;
  Mat                      mat     = part->adj,amat,pmat;
  PetscBool                flg;
  PetscInt                 bs = 1;

  PetscFunctionBegin;
  ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIADJ,&flg);CHKERRQ(ierr);
  if (flg) {
    amat = mat;
    ierr = PetscObjectReference((PetscObject)amat);CHKERRQ(ierr);
  } else {
    /* bs indicates if the converted matrix is "reduced" from the original and hence the
       resulting partition results need to be stretched to match the original matrix */
    ierr = MatConvert(mat,MATMPIADJ,MAT_INITIAL_MATRIX,&amat);CHKERRQ(ierr);
    if (amat->rmap->n > 0) bs = mat->rmap->n/amat->rmap->n;
  }
  ierr = MatMPIAdjCreateNonemptySubcommMat(amat,&pmat);CHKERRQ(ierr);
  ierr = MPI_Barrier(PetscObjectComm((PetscObject)part));CHKERRQ(ierr);

  if (pmat) {
    MPI_Comm   pcomm,comm;
    Mat_MPIAdj *adj     = (Mat_MPIAdj*)pmat->data;
    PetscInt   *vtxdist = pmat->rmap->range;
    PetscInt   *xadj    = adj->i;
    PetscInt   *adjncy  = adj->j;
    PetscInt   *NDorder = NULL;
    PetscInt   itmp     = 0,wgtflag=0, numflag=0, ncon=1, nparts=part->n, options[24], i, j;
    real_t     *tpwgts,*ubvec,itr=0.1;
    int        status;

    ierr = PetscObjectGetComm((PetscObject)pmat,&pcomm);CHKERRQ(ierr);
#if defined(PETSC_USE_DEBUG)
    /* check that matrix has no diagonal entries */
    {
      PetscInt rstart;
      ierr = MatGetOwnershipRange(pmat,&rstart,NULL);CHKERRQ(ierr);
      for (i=0; i<pmat->rmap->n; i++) {
        for (j=xadj[i]; j<xadj[i+1]; j++) {
          if (adjncy[j] == i+rstart) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row %d has diagonal entry; Parmetis forbids diagonal entry",i+rstart);
        }
      }
    }
#endif

    ierr = PetscMalloc1(pmat->rmap->n,&locals);CHKERRQ(ierr);

    if (adj->values && !part->vertex_weights)
      wgtflag = 1;
    if (part->vertex_weights && !adj->values)
      wgtflag = 2;
    if (part->vertex_weights && adj->values)
      wgtflag = 3;

    if (PetscLogPrintInfo) {itmp = pmetis->printout; pmetis->printout = 127;}
    ierr = PetscMalloc1(ncon*nparts,&tpwgts);CHKERRQ(ierr);
    for (i=0; i<ncon; i++) {
      for (j=0; j<nparts; j++) {
        if (part->part_weights) {
          tpwgts[i*nparts+j] = part->part_weights[i*nparts+j];
        } else {
          tpwgts[i*nparts+j] = 1./nparts;
        }
      }
    }
    ierr = PetscMalloc1(ncon,&ubvec);CHKERRQ(ierr);
    for (i=0; i<ncon; i++) {
      ubvec[i] = 1.05;
    }
    /* This sets the defaults */
    options[0] = 0;
    for (i=1; i<24; i++) {
      options[i] = -1;
    }
    /* Duplicate the communicator to be sure that ParMETIS attribute caching does not interfere with PETSc. */
    ierr = MPI_Comm_dup(pcomm,&comm);CHKERRQ(ierr);
    if (useND) {
      PetscInt    *sizes, *seps, log2size, subd, *level;
      PetscMPIInt size;
      idx_t       mtype = PARMETIS_MTYPE_GLOBAL, rtype = PARMETIS_SRTYPE_2PHASE, p_nseps = 1, s_nseps = 1;
      real_t      ubfrac = 1.05;

      ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
      ierr = PetscMalloc1(pmat->rmap->n,&NDorder);CHKERRQ(ierr);
      ierr = PetscMalloc3(2*size,&sizes,4*size,&seps,size,&level);CHKERRQ(ierr);
      PetscStackCallParmetis(ParMETIS_V32_NodeND,((idx_t*)vtxdist,(idx_t*)xadj,(idx_t*)adjncy,(idx_t*)part->vertex_weights,(idx_t*)&numflag,&mtype,&rtype,&p_nseps,&s_nseps,&ubfrac,NULL/* seed */,NULL/* dbglvl */,(idx_t*)NDorder,(idx_t*)(sizes),&comm));
      log2size = PetscLog2Real(size);
      subd = PetscPowInt(2,log2size);
      ierr = MatPartitioningSizesToSep_Private(subd,sizes,seps,level);CHKERRQ(ierr);
      for (i=0;i<pmat->rmap->n;i++) {
        PetscInt loc;

        ierr = PetscFindInt(NDorder[i],2*subd,seps,&loc);CHKERRQ(ierr);
        if (loc < 0) {
          loc = -(loc+1);
          if (loc%2) { /* part of subdomain */
            locals[i] = loc/2;
          } else {
            ierr = PetscFindInt(NDorder[i],2*(subd-1),seps+2*subd,&loc);CHKERRQ(ierr);
            loc = loc < 0 ? -(loc+1)/2 : loc/2;
            locals[i] = level[loc];
          }
        } else locals[i] = loc/2;
      }
      ierr = PetscFree3(sizes,seps,level);CHKERRQ(ierr);
    } else {
      if (pmetis->repartition) {
        PetscStackCallParmetis(ParMETIS_V3_AdaptiveRepart,((idx_t*)vtxdist,(idx_t*)xadj,(idx_t*)adjncy,(idx_t*)part->vertex_weights,(idx_t*)part->vertex_weights,(idx_t*)adj->values,(idx_t*)&wgtflag,(idx_t*)&numflag,(idx_t*)&ncon,(idx_t*)&nparts,tpwgts,ubvec,&itr,(idx_t*)options,(idx_t*)&pmetis->cuts,(idx_t*)locals,&comm));
      } else {
        PetscStackCallParmetis(ParMETIS_V3_PartKway,((idx_t*)vtxdist,(idx_t*)xadj,(idx_t*)adjncy,(idx_t*)part->vertex_weights,(idx_t*)adj->values,(idx_t*)&wgtflag,(idx_t*)&numflag,(idx_t*)&ncon,(idx_t*)&nparts,tpwgts,ubvec,(idx_t*)options,(idx_t*)&pmetis->cuts,(idx_t*)locals,&comm));
      }
    }
    ierr = MPI_Comm_free(&comm);CHKERRQ(ierr);

    ierr = PetscFree(tpwgts);CHKERRQ(ierr);
    ierr = PetscFree(ubvec);CHKERRQ(ierr);
    if (PetscLogPrintInfo) pmetis->printout = itmp;

    if (bs > 1) {
      PetscInt i,j,*newlocals;
      ierr = PetscMalloc1(bs*pmat->rmap->n,&newlocals);CHKERRQ(ierr);
      for (i=0; i<pmat->rmap->n; i++) {
        for (j=0; j<bs; j++) {
          newlocals[bs*i + j] = locals[i];
        }
      }
      ierr = PetscFree(locals);CHKERRQ(ierr);
      ierr = ISCreateGeneral(PetscObjectComm((PetscObject)part),bs*pmat->rmap->n,newlocals,PETSC_OWN_POINTER,partitioning);CHKERRQ(ierr);
    } else {
      ierr = ISCreateGeneral(PetscObjectComm((PetscObject)part),pmat->rmap->n,locals,PETSC_OWN_POINTER,partitioning);CHKERRQ(ierr);
    }
    if (useND) {
      IS ndis;

      if (bs > 1) {
        ierr = ISCreateBlock(PetscObjectComm((PetscObject)part),bs,pmat->rmap->n,NDorder,PETSC_OWN_POINTER,&ndis);CHKERRQ(ierr);
      } else {
        ierr = ISCreateGeneral(PetscObjectComm((PetscObject)part),pmat->rmap->n,NDorder,PETSC_OWN_POINTER,&ndis);CHKERRQ(ierr);
      }
      ierr = ISSetPermutation(ndis);CHKERRQ(ierr);
      ierr = PetscObjectCompose((PetscObject)(*partitioning),"_petsc_matpartitioning_ndorder",(PetscObject)ndis);CHKERRQ(ierr);
      ierr = ISDestroy(&ndis);CHKERRQ(ierr);
    }
  } else {
    ierr = ISCreateGeneral(PetscObjectComm((PetscObject)part),0,NULL,PETSC_COPY_VALUES,partitioning);CHKERRQ(ierr);
    if (useND) {
      IS ndis;

      if (bs > 1) {
        ierr = ISCreateBlock(PetscObjectComm((PetscObject)part),bs,0,NULL,PETSC_COPY_VALUES,&ndis);CHKERRQ(ierr);
      } else {
        ierr = ISCreateGeneral(PetscObjectComm((PetscObject)part),0,NULL,PETSC_COPY_VALUES,&ndis);CHKERRQ(ierr);
      }
      ierr = ISSetPermutation(ndis);CHKERRQ(ierr);
      ierr = PetscObjectCompose((PetscObject)(*partitioning),"_petsc_matpartitioning_ndorder",(PetscObject)ndis);CHKERRQ(ierr);
      ierr = ISDestroy(&ndis);CHKERRQ(ierr);
    }
  }
  ierr = MatDestroy(&pmat);CHKERRQ(ierr);
  ierr = MatDestroy(&amat);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #17
0
uses block index sets\n\n";

#include <petscvec.h>

#undef __FUNCT__
#define __FUNCT__ "main"
int main(int argc,char **argv)
{
  PetscErrorCode ierr;
  PetscInt       bs = 1,n = 5,ix0[3] = {5,7,9},ix1[3] = {2,3,4},i,iy0[3] = {1,2,4},iy1[3] = {0,1,3};
  PetscMPIInt    size,rank;
  PetscScalar    value;
  Vec            x,y;
  IS             isx,isy;
  VecScatter     ctx = 0,newctx;

  ierr = PetscInitialize(&argc,&argv,(char*)0,help);CHKERRQ(ierr);  
  ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);

  if (size != 2) SETERRQ(PETSC_COMM_SELF,1,"Must run with 2 processors");

  ierr = PetscOptionsGetInt(PETSC_NULL,"-bs",&bs,PETSC_NULL);CHKERRQ(ierr);
  n = bs*n;

  /* create two vectors */
  ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr);
  ierr = VecSetSizes(x,PETSC_DECIDE,size*n);CHKERRQ(ierr);
  ierr = VecSetFromOptions(x);CHKERRQ(ierr);
  ierr = VecCreateSeq(PETSC_COMM_SELF,n,&y);CHKERRQ(ierr);

  /* create two index sets */
  if (!rank) {
    ierr = ISCreateBlock(PETSC_COMM_SELF,bs,3,ix0,PETSC_COPY_VALUES,&isx);CHKERRQ(ierr);
    ierr = ISCreateBlock(PETSC_COMM_SELF,bs,3,iy0,PETSC_COPY_VALUES,&isy);CHKERRQ(ierr);
  } else {
    ierr = ISCreateBlock(PETSC_COMM_SELF,bs,3,ix1,PETSC_COPY_VALUES,&isx);CHKERRQ(ierr);
    ierr = ISCreateBlock(PETSC_COMM_SELF,bs,3,iy1,PETSC_COPY_VALUES,&isy);CHKERRQ(ierr);
  }

  /* fill local part of parallel vector */
  for (i=n*rank; i<n*(rank+1); i++) {
    value = (PetscScalar) i;
    ierr = VecSetValues(x,1,&i,&value,INSERT_VALUES);CHKERRQ(ierr);
  }
  ierr = VecAssemblyBegin(x);CHKERRQ(ierr);
  ierr = VecAssemblyEnd(x);CHKERRQ(ierr);

  ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);

  /* fill local part of parallel vector */
  for (i=0; i<n; i++) {
    value = -(PetscScalar) (i + 100*rank);
    ierr = VecSetValues(y,1,&i,&value,INSERT_VALUES);CHKERRQ(ierr);
  }
  ierr = VecAssemblyBegin(y);CHKERRQ(ierr);
  ierr = VecAssemblyEnd(y);CHKERRQ(ierr);


  ierr = VecScatterCreate(x,isx,y,isy,&ctx);CHKERRQ(ierr);
  ierr = VecScatterCopy(ctx,&newctx);CHKERRQ(ierr);
  ierr = VecScatterDestroy(&ctx);CHKERRQ(ierr);

  ierr = VecScatterBegin(newctx,y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
  ierr = VecScatterEnd(newctx,y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
  ierr = VecScatterDestroy(&newctx);CHKERRQ(ierr);

  ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);

  ierr = ISDestroy(&isx);CHKERRQ(ierr);
  ierr = ISDestroy(&isy);CHKERRQ(ierr);
  ierr = VecDestroy(&x);CHKERRQ(ierr);
  ierr = VecDestroy(&y);CHKERRQ(ierr);

  ierr = PetscFinalize();
  return 0;
}
Beispiel #18
0
PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
{
  Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
  Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)(baij->B->data);
  PetscErrorCode ierr;
  PetscInt       i,j,*aj = B->j,ec = 0,*garray;
  PetscInt       bs = mat->rmap->bs,*stmp;
  IS             from,to;
  Vec            gvec;
#if defined(PETSC_USE_CTABLE)
  PetscTable         gid1_lid1;
  PetscTablePosition tpos;
  PetscInt           gid,lid;
#else
  PetscInt Nbs = baij->Nbs,*indices;
#endif

  PetscFunctionBegin;
#if defined(PETSC_USE_CTABLE)
  /* use a table - Mark Adams */
  ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr);
  for (i=0; i<B->mbs; i++) {
    for (j=0; j<B->ilen[i]; j++) {
      PetscInt data,gid1 = aj[B->i[i]+j] + 1;
      ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
      if (!data) {
        /* one based table */
        ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
      }
    }
  }
  /* form array of columns we need */
  ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
  ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
  while (tpos) {
    ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
    gid--; lid--;
    garray[lid] = gid;
  }
  ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
  ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
  for (i=0; i<ec; i++) {
    ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
  }
  /* compact out the extra columns in B */
  for (i=0; i<B->mbs; i++) {
    for (j=0; j<B->ilen[i]; j++) {
      PetscInt gid1 = aj[B->i[i] + j] + 1;
      ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
      lid--;
      aj[B->i[i]+j] = lid;
    }
  }
  B->nbs           = ec;
  baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;

  ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
  ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
#else
  /* Make an array as long as the number of columns */
  /* mark those columns that are in baij->B */
  ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
  ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
  for (i=0; i<B->mbs; i++) {
    for (j=0; j<B->ilen[i]; j++) {
      if (!indices[aj[B->i[i] + j]]) ec++;
      indices[aj[B->i[i] + j]] = 1;
    }
  }

  /* form array of columns we need */
  ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
  ec   = 0;
  for (i=0; i<Nbs; i++) {
    if (indices[i]) {
      garray[ec++] = i;
    }
  }

  /* make indices now point into garray */
  for (i=0; i<ec; i++) {
    indices[garray[i]] = i;
  }

  /* compact out the extra columns in B */
  for (i=0; i<B->mbs; i++) {
    for (j=0; j<B->ilen[i]; j++) {
      aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
    }
  }
  B->nbs           = ec;
  baij->B->cmap->n = baij->B->cmap->N  = ec*mat->rmap->bs;

  ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
  ierr = PetscFree(indices);CHKERRQ(ierr);
#endif

  /* create local vector that is used to scatter into */
  ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);

  /* create two temporary index sets for building scatter-gather */
  ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);

  ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
  for (i=0; i<ec; i++) stmp[i] = i;
  ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);

  /* create temporary global vector to generate scatter context */
  ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);

  ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);

  ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);

  baij->garray = garray;

  ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
  ierr = ISDestroy(&from);CHKERRQ(ierr);
  ierr = ISDestroy(&to);CHKERRQ(ierr);
  ierr = VecDestroy(&gvec);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #19
0
PetscErrorCode  DMSetUp_DA_1D(DM da)
{
  DM_DA            *dd   = (DM_DA*)da->data;
  const PetscInt   M     = dd->M;
  const PetscInt   dof   = dd->w;
  const PetscInt   s     = dd->s;
  const PetscInt   sDist = s;  /* stencil distance in points */
  const PetscInt   *lx   = dd->lx;
  DMBoundaryType   bx    = dd->bx;
  MPI_Comm         comm;
  Vec              local, global;
  VecScatter       gtol;
  IS               to, from;
  PetscBool        flg1 = PETSC_FALSE, flg2 = PETSC_FALSE;
  PetscMPIInt      rank, size;
  PetscInt         i,*idx,nn,left,xs,xe,x,Xs,Xe,start,m,IXs,IXe;
  PetscErrorCode   ierr;

  PetscFunctionBegin;
  ierr = PetscObjectGetComm((PetscObject) da, &comm);CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);

  dd->p = 1;
  dd->n = 1;
  dd->m = size;
  m     = dd->m;

  if (s > 0) {
    /* if not communicating data then should be ok to have nothing on some processes */
    if (M < m) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"More processes than data points! %D %D",m,M);
    if ((M-1) < s && size > 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Array is too small for stencil! %D %D",M-1,s);
  }

  /*
     Determine locally owned region
     xs is the first local node number, x is the number of local nodes
  */
  if (!lx) {
    ierr = PetscMalloc1(m, &dd->lx);CHKERRQ(ierr);
    ierr = PetscOptionsGetBool(NULL,"-da_partition_blockcomm",&flg1,NULL);CHKERRQ(ierr);
    ierr = PetscOptionsGetBool(NULL,"-da_partition_nodes_at_end",&flg2,NULL);CHKERRQ(ierr);
    if (flg1) {      /* Block Comm type Distribution */
      xs = rank*M/m;
      x  = (rank + 1)*M/m - xs;
    } else if (flg2) { /* The odd nodes are evenly distributed across last nodes */
      x = (M + rank)/m;
      if (M/m == x) xs = rank*x;
      else          xs = rank*(x-1) + (M+rank)%(x*m);
    } else { /* The odd nodes are evenly distributed across the first k nodes */
      /* Regular PETSc Distribution */
      x = M/m + ((M % m) > rank);
      if (rank >= (M % m)) xs = (rank * (PetscInt)(M/m) + M % m);
      else                 xs = rank * (PetscInt)(M/m) + rank;
    }
    ierr = MPI_Allgather(&xs,1,MPIU_INT,dd->lx,1,MPIU_INT,comm);CHKERRQ(ierr);
    for (i=0; i<m-1; i++) dd->lx[i] = dd->lx[i+1] - dd->lx[i];
    dd->lx[m-1] = M - dd->lx[m-1];
  } else {
    x  = lx[rank];
    xs = 0;
    for (i=0; i<rank; i++) xs += lx[i];
    /* verify that data user provided is consistent */
    left = xs;
    for (i=rank; i<size; i++) left += lx[i];
    if (left != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of lx across processors not equal to M %D %D",left,M);
  }

  /*
   check if the scatter requires more than one process neighbor or wraps around
   the domain more than once
  */
  if ((x < s) & ((M > 1) | (bx == DM_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local x-width of domain x %D is smaller than stencil width s %D",x,s);

  xe  = xs + x;

  /* determine ghost region (Xs) and region scattered into (IXs)  */
  if (xs-sDist > 0) {
    Xs  = xs - sDist;
    IXs = xs - sDist;
  } else {
    if (bx) Xs = xs - sDist;
    else Xs = 0;
    IXs = 0;
  }
  if (xe+sDist <= M) {
    Xe  = xe + sDist;
    IXe = xe + sDist;
  } else {
    if (bx) Xe = xe + sDist;
    else Xe = M;
    IXe = M;
  }

  if (bx == DM_BOUNDARY_PERIODIC || bx == DM_BOUNDARY_MIRROR) {
    Xs  = xs - sDist;
    Xe  = xe + sDist;
    IXs = xs - sDist;
    IXe = xe + sDist;
  }

  /* allocate the base parallel and sequential vectors */
  dd->Nlocal = dof*x;
  ierr       = VecCreateMPIWithArray(comm,dof,dd->Nlocal,PETSC_DECIDE,NULL,&global);CHKERRQ(ierr);
  dd->nlocal = dof*(Xe-Xs);
  ierr       = VecCreateSeqWithArray(PETSC_COMM_SELF,dof,dd->nlocal,NULL,&local);CHKERRQ(ierr);

  ierr = VecGetOwnershipRange(global,&start,NULL);CHKERRQ(ierr);

  /* Create Global to Local Vector Scatter Context */
  /* global to local must retrieve ghost points */
  ierr = ISCreateStride(comm,dof*(IXe-IXs),dof*(IXs-Xs),1,&to);CHKERRQ(ierr);

  ierr = PetscMalloc1(x+2*sDist,&idx);CHKERRQ(ierr);
  ierr = PetscLogObjectMemory((PetscObject)da,(x+2*(sDist))*sizeof(PetscInt));CHKERRQ(ierr);

  for (i=0; i<IXs-Xs; i++) idx[i] = -1; /* prepend with -1s if needed for ghosted case*/

  nn = IXs-Xs;
  if (bx == DM_BOUNDARY_PERIODIC) { /* Handle all cases with periodic first */
    for (i=0; i<sDist; i++) {  /* Left ghost points */
      if ((xs-sDist+i)>=0) idx[nn++] = xs-sDist+i;
      else                 idx[nn++] = M+(xs-sDist+i);
    }

    for (i=0; i<x; i++) idx [nn++] = xs + i;  /* Non-ghost points */

    for (i=0; i<sDist; i++) { /* Right ghost points */
      if ((xe+i)<M) idx [nn++] =  xe+i;
      else          idx [nn++] = (xe+i) - M;
    }
  } else if (bx == DM_BOUNDARY_MIRROR) { /* Handle all cases with periodic first */
    for (i=0; i<(sDist); i++) {  /* Left ghost points */
      if ((xs-sDist+i)>=0) idx[nn++] = xs-sDist+i;
      else                 idx[nn++] = sDist - i;
    }

    for (i=0; i<x; i++) idx [nn++] = xs + i;  /* Non-ghost points */

    for (i=0; i<(sDist); i++) { /* Right ghost points */
      if ((xe+i)<M) idx[nn++] =  xe+i;
      else          idx[nn++] = M - (i + 1);
    }
  } else {      /* Now do all cases with no periodicity */
    if (0 <= xs-sDist) {
      for (i=0; i<sDist; i++) idx[nn++] = xs - sDist + i;
    } else {
      for (i=0; i<xs; i++) idx[nn++] = i;
    }

    for (i=0; i<x; i++) idx [nn++] = xs + i;

    if ((xe+sDist)<=M) {
      for (i=0; i<sDist; i++) idx[nn++]=xe+i;
    } else {
      for (i=xe; i<M; i++) idx[nn++]=i;
    }
  }

  ierr = ISCreateBlock(comm,dof,nn-IXs+Xs,&idx[IXs-Xs],PETSC_USE_POINTER,&from);CHKERRQ(ierr);
  ierr = VecScatterCreate(global,from,local,to,&gtol);CHKERRQ(ierr);
  ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)gtol);CHKERRQ(ierr);
  ierr = ISDestroy(&to);CHKERRQ(ierr);
  ierr = ISDestroy(&from);CHKERRQ(ierr);
  ierr = VecDestroy(&local);CHKERRQ(ierr);
  ierr = VecDestroy(&global);CHKERRQ(ierr);

  dd->xs = dof*xs; dd->xe = dof*xe; dd->ys = 0; dd->ye = 1; dd->zs = 0; dd->ze = 1;
  dd->Xs = dof*Xs; dd->Xe = dof*Xe; dd->Ys = 0; dd->Ye = 1; dd->Zs = 0; dd->Ze = 1;

  dd->gtol      = gtol;
  dd->base      = dof*xs;
  da->ops->view = DMView_DA_1d;

  /*
     Set the local to global ordering in the global vector, this allows use
     of VecSetValuesLocal().
  */
  for (i=0; i<Xe-IXe; i++) idx[nn++] = -1; /* pad with -1s if needed for ghosted case*/

  ierr = ISLocalToGlobalMappingCreate(comm,dof,nn,idx,PETSC_OWN_POINTER,&da->ltogmap);CHKERRQ(ierr);
  ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)da->ltogmap);CHKERRQ(ierr);

  PetscFunctionReturn(0);
}
Beispiel #20
0
PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat)
{
  Mat_MPIAIJ         *aij = (Mat_MPIAIJ*)mat->data;
  Mat_SeqAIJ         *B = (Mat_SeqAIJ*)(aij->B->data);  
  PetscErrorCode     ierr;
  PetscInt           i,j,*aj = B->j,ec = 0,*garray;
  IS                 from,to;
  Vec                gvec;
  PetscBool          useblockis;
#if defined (PETSC_USE_CTABLE)
  PetscTable         gid1_lid1;
  PetscTablePosition tpos;
  PetscInt           gid,lid; 
#else
  PetscInt           N = mat->cmap->N,*indices;
#endif

  PetscFunctionBegin;

#if defined (PETSC_USE_CTABLE)
  /* use a table */
  ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr);
  for (i=0; i<aij->B->rmap->n; i++) {
    for (j=0; j<B->ilen[i]; j++) {
      PetscInt data,gid1 = aj[B->i[i] + j] + 1;
      ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
      if (!data) {
        /* one based table */ 
        ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 
      }
    }
  }
  /* form array of columns we need */
  ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
  ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 
  while (tpos) {  
    ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 
    gid--;
    lid--;
    garray[lid] = gid; 
  }
  ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */
  ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
  for (i=0; i<ec; i++) {
    ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 
  }
  /* compact out the extra columns in B */
  for (i=0; i<aij->B->rmap->n; i++) {
    for (j=0; j<B->ilen[i]; j++) {
      PetscInt gid1 = aj[B->i[i] + j] + 1;
      ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
      lid --;
      aj[B->i[i] + j]  = lid;
    }
  }
  aij->B->cmap->n = aij->B->cmap->N = ec;
  ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr);
  ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
#else
  /* Make an array as long as the number of columns */
  /* mark those columns that are in aij->B */
  ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
  ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr);
  for (i=0; i<aij->B->rmap->n; i++) {
    for (j=0; j<B->ilen[i]; j++) {
      if (!indices[aj[B->i[i] + j] ]) ec++; 
      indices[aj[B->i[i] + j] ] = 1;
    }
  }

  /* form array of columns we need */
  ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
  ec = 0;
  for (i=0; i<N; i++) {
    if (indices[i]) garray[ec++] = i;
  }

  /* make indices now point into garray */
  for (i=0; i<ec; i++) {
    indices[garray[i]] = i;
  }

  /* compact out the extra columns in B */
  for (i=0; i<aij->B->rmap->n; i++) {
    for (j=0; j<B->ilen[i]; j++) {
      aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
    }
  }
  aij->B->cmap->n = aij->B->cmap->N = ec;
  ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr);
  ierr = PetscFree(indices);CHKERRQ(ierr);
#endif  
  /* create local vector that is used to scatter into */
  ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr);

  /* create two temporary Index sets for build scatter gather */
  /*  check for the special case where blocks are communicated for faster VecScatterXXX */
  useblockis = PETSC_FALSE;
  if (mat->cmap->bs > 1) {
    PetscInt bs = mat->cmap->bs,ibs,ga;
    if (!(ec % bs)) {
      useblockis = PETSC_TRUE;
      for (i=0; i<ec/bs; i++) {
        if ((ga = garray[ibs = i*bs]) % bs) {
          useblockis = PETSC_FALSE;
          break;
        }
        for (j=1; j<bs; j++) {
          if (garray[ibs+j] != ga+j) {
            useblockis = PETSC_FALSE;
            break;
          }
        }
        if (!useblockis) break;
      }
    }
  }
#if defined(PETSC_USE_DEBUG)
  i = (PetscInt)useblockis;
  ierr = MPI_Allreduce(&i,&j,1,MPIU_INT,MPI_MIN,((PetscObject)mat)->comm); CHKERRQ(ierr);
  if(j!=i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Use of blocked not consistant (I am usning blocked)");
#endif

  if (useblockis) {
    PetscInt *ga,bs = mat->cmap->bs,iec = ec/bs;
    if(ec%bs)SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ec=%D bs=%D",ec,bs);
    ierr = PetscInfo(mat,"Using block index set to define scatter\n");
    ierr = PetscMalloc(iec*sizeof(PetscInt),&ga);CHKERRQ(ierr);
    for (i=0; i<iec; i++) ga[i] = garray[i*bs]/bs;
    ierr = ISCreateBlock(((PetscObject)mat)->comm,bs,iec,ga,PETSC_OWN_POINTER,&from);CHKERRQ(ierr);
  } else {
    ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
  }

  ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr);

  /* create temporary global vector to generate scatter context */
  /* This does not allocate the array's memory so is efficient */
  ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);

  /* generate the scatter context */
  ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
  aij->garray = garray;
  ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
  ierr = ISDestroy(&from);CHKERRQ(ierr);
  ierr = ISDestroy(&to);CHKERRQ(ierr);
  ierr = VecDestroy(&gvec);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}