Beispiel #1
PetscErrorCode  MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz)
  Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
  PetscErrorCode ierr;
  PetscInt       i;

  ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
  ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
  ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
  ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
  ierr = PetscLayoutGetBlockSize(A->rmap,&bs);CHKERRQ(ierr);

  if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
  if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
  if (nnz) {
    for (i=0; i<A->rmap->n/bs; i++) {
      if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
      if (nnz[i] > A->cmap->n/bs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %d value %d rowlength %d",i,nnz[i],A->cmap->n/bs);
  bmat->mbs = A->rmap->n/bs;

  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->right);CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->middle);CHKERRQ(ierr);
  ierr = VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left);CHKERRQ(ierr);

  if (!bmat->imax) {
    ierr = PetscMalloc2(A->rmap->n,&bmat->imax,A->rmap->n,&bmat->ilen);CHKERRQ(ierr);
    ierr = PetscLogObjectMemory((PetscObject)A,2*A->rmap->n*sizeof(PetscInt));CHKERRQ(ierr);
  if (nnz) {
    nz = 0;
    for (i=0; i<A->rmap->n/A->rmap->bs; i++) {
      bmat->imax[i] = nnz[i];
      nz           += nnz[i];
  } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently requires block row by row preallocation");

  /* bmat->ilen will count nonzeros in each row so far. */
  for (i=0; i<bmat->mbs; i++) bmat->ilen[i] = 0;

  /* allocate the matrix space */
  ierr       = MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);CHKERRQ(ierr);
  ierr       = PetscMalloc3(nz,&bmat->a,nz,&bmat->j,A->rmap->n+1,&bmat->i);CHKERRQ(ierr);
  ierr       = PetscLogObjectMemory((PetscObject)A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
  bmat->i[0] = 0;
  for (i=1; i<bmat->mbs+1; i++) {
    bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1];
  bmat->singlemalloc = PETSC_TRUE;
  bmat->free_a       = PETSC_TRUE;
  bmat->free_ij      = PETSC_TRUE;

  bmat->nz            = 0;
  bmat->maxnz         = nz;
  A->info.nz_unneeded = (double)bmat->maxnz;
  ierr                = MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
Beispiel #2
PETSC_EXTERN PetscErrorCode BVCreate_Mat(BV bv)
  PetscErrorCode ierr;
  BV_MAT         *ctx;
  PetscInt       nloc,bs;
  PetscBool      seq;
  char           str[50];

  ierr = PetscNewLog(bv,&ctx);CHKERRQ(ierr);
  bv->data = (void*)ctx;

  ierr = PetscObjectTypeCompare((PetscObject)bv->t,VECMPI,&ctx->mpi);CHKERRQ(ierr);
  if (!ctx->mpi) {
    ierr = PetscObjectTypeCompare((PetscObject)bv->t,VECSEQ,&seq);CHKERRQ(ierr);
    if (!seq) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot create a BVMAT from a non-standard template vector");

  ierr = VecGetLocalSize(bv->t,&nloc);CHKERRQ(ierr);
  ierr = VecGetBlockSize(bv->t,&bs);CHKERRQ(ierr);

  ierr = MatCreateDense(PetscObjectComm((PetscObject)bv->t),nloc,PETSC_DECIDE,PETSC_DECIDE,bv->m,NULL,&ctx->A);CHKERRQ(ierr);
  ierr = MatAssemblyBegin(ctx->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(ctx->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = PetscLogObjectParent((PetscObject)bv,(PetscObject)ctx->A);CHKERRQ(ierr);
  if (((PetscObject)bv)->name) {
    ierr = PetscSNPrintf(str,50,"%s_0",((PetscObject)bv)->name);CHKERRQ(ierr);
    ierr = PetscObjectSetName((PetscObject)ctx->A,str);CHKERRQ(ierr);

  if (ctx->mpi) {
    ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,PETSC_DECIDE,NULL,&bv->cv[0]);CHKERRQ(ierr);
    ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,PETSC_DECIDE,NULL,&bv->cv[1]);CHKERRQ(ierr);
  } else {
    ierr = VecCreateSeqWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,NULL,&bv->cv[0]);CHKERRQ(ierr);
    ierr = VecCreateSeqWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,NULL,&bv->cv[1]);CHKERRQ(ierr);

  bv->ops->mult             = BVMult_Mat;
  bv->ops->multvec          = BVMultVec_Mat;
  bv->ops->multinplace      = BVMultInPlace_Mat;
  bv->ops->multinplacetrans = BVMultInPlaceTranspose_Mat;
  bv->ops->axpy             = BVAXPY_Mat;
  bv->ops->dot              = BVDot_Mat;
  bv->ops->dotvec           = BVDotVec_Mat;
  bv->ops->scale            = BVScale_Mat;
  bv->ops->norm             = BVNorm_Mat;
  /*bv->ops->orthogonalize    = BVOrthogonalize_Mat;*/
  bv->ops->matmult          = BVMatMult_Mat;
  bv->ops->copy             = BVCopy_Mat;
  bv->ops->resize           = BVResize_Mat;
  bv->ops->getcolumn        = BVGetColumn_Mat;
  bv->ops->restorecolumn    = BVRestoreColumn_Mat;
  bv->ops->getarray         = BVGetArray_Mat;
  bv->ops->restorearray     = BVRestoreArray_Mat;
  bv->ops->destroy          = BVDestroy_Mat;
  if (!ctx->mpi) bv->ops->view = BVView_Mat;
Beispiel #3
static PetscErrorCode PCSetUp_TFS(PC pc)
  PC_TFS        *tfs = (PC_TFS*)pc->data;
  Mat            A = pc->pmat;
  Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
  PetscErrorCode ierr;
  PetscInt      *localtoglobal,ncol,i;
  PetscBool      ismpiaij;

  PetscBool      issymmetric;
  Petsc Real tol = 0.0;

  if (A->cmap->N != A->rmap->N) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_SIZ,"matrix must be square"); 
  ierr = PetscObjectTypeCompare((PetscObject)pc->pmat,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr);
  if (!ismpiaij) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Currently only supports MPIAIJ matrices");

  /* generate the local to global mapping */
  ncol = a->A->cmap->n + a->B->cmap->n;
  ierr = PetscMalloc((ncol)*sizeof(PetscInt),&localtoglobal);CHKERRQ(ierr);
  for (i=0; i<a->A->cmap->n; i++) {
    localtoglobal[i] = A->cmap->rstart + i + 1;
  for (i=0; i<a->B->cmap->n; i++) {
    localtoglobal[i+a->A->cmap->n] = a->garray[i] + 1;
  /* generate the vectors needed for the local solves */
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,a->A->rmap->n,PETSC_NULL,&tfs->b);CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,a->A->cmap->n,PETSC_NULL,&tfs->xd);CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,a->B->cmap->n,PETSC_NULL,&tfs->xo);CHKERRQ(ierr);
  tfs->nd = a->A->cmap->n;

  /*  ierr =  MatIsSymmetric(A,tol,&issymmetric); */
  /*  if (issymmetric) { */
  ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
  if (A->symmetric) {
    tfs->xxt       = XXT_new();
    ierr           = XXT_factor(tfs->xxt,localtoglobal,A->rmap->n,ncol,(void*)PCTFSLocalMult_TFS,pc);CHKERRQ(ierr);
    pc->ops->apply = PCApply_TFS_XXT;
  } else {
    tfs->xyt       = XYT_new();
    ierr           = XYT_factor(tfs->xyt,localtoglobal,A->rmap->n,ncol,(void*)PCTFSLocalMult_TFS,pc);CHKERRQ(ierr);
    pc->ops->apply = PCApply_TFS_XYT;

  ierr = PetscFree(localtoglobal);CHKERRQ(ierr);
int Epetra_PETScAIJMatrix::Multiply(bool TransA,
                               const Epetra_MultiVector& X,
                               Epetra_MultiVector& Y) const
  int NumVectors = X.NumVectors();
  if (NumVectors!=Y.NumVectors()) EPETRA_CHK_ERR(-1);  // X and Y must have same number of vectors

  double ** xptrs;
  double ** yptrs;
  if (RowMatrixImporter()!=0) {
    if (ImportVector_!=0) {
      if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;}
    if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(RowMatrixColMap(),NumVectors);
    ImportVector_->Import(X, *RowMatrixImporter(), Insert);

  double *vals=0;
  int length;
  Vec petscX, petscY;
  int ierr;
  for (int i=0; i<NumVectors; i++) {
#   ifdef HAVE_MPI
    ierr=VecCreateMPIWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptrs[i],&petscX); CHKERRQ(ierr);
    ierr=VecCreateMPIWithArray(Comm_->Comm(),Y.MyLength(),Y.GlobalLength(),yptrs[i],&petscY); CHKERRQ(ierr);
#   else //FIXME  untested
    ierr=VecCreateSeqWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptrs[i],&petscX); CHKERRQ(ierr);
    ierr=VecCreateSeqWithArray(Comm_->Comm(),Y.MyLength(),Y.GlobalLength(),yptrs[i],&petscY); CHKERRQ(ierr);
#   endif

    ierr = MatMult(Amat_,petscX,petscY);CHKERRQ(ierr);

    ierr = VecGetArray(petscY,&vals);CHKERRQ(ierr);
    ierr = VecGetLocalSize(petscY,&length);CHKERRQ(ierr);
    for (int j=0; j<length; j++) yptrs[i][j] = vals[j];
    ierr = VecRestoreArray(petscY,&vals);CHKERRQ(ierr);

  VecDestroy(petscX); VecDestroy(petscY);
  double flops = NumGlobalNonzeros();
  flops *= 2.0;
  flops *= (double) NumVectors;
} //Multiply()
Beispiel #5
   DMDAGlobalToNaturalAllCreate - Creates a scatter context that maps from the
     global vector the entire vector to each processor in natural numbering

   Collective on DMDA

   Input Parameter:
.  da - the distributed array context

   Output Parameter:
.  scatter - the scatter context

   Level: advanced

.keywords: distributed array, global to local, begin, coarse problem

.seealso: DMDAGlobalToNaturalEnd(), DMLocalToGlobalBegin(), DMDACreate2d(),
          DMGlobalToLocalBegin(), DMGlobalToLocalEnd(), DMDACreateNaturalVector()
PetscErrorCode  DMDAGlobalToNaturalAllCreate(DM da,VecScatter *scatter)
  PetscErrorCode ierr;
  PetscInt       N;
  IS             from,to;
  Vec            tmplocal,global;
  AO             ao;
  DM_DA          *dd = (DM_DA*)da->data;

  ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);

  /* create the scatter context */
  ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)da),dd->w,dd->Nlocal,PETSC_DETERMINE,0,&global);CHKERRQ(ierr);
  ierr = VecGetSize(global,&N);CHKERRQ(ierr);
  ierr = ISCreateStride(PetscObjectComm((PetscObject)da),N,0,1,&to);CHKERRQ(ierr);
  ierr = AOPetscToApplicationIS(ao,to);CHKERRQ(ierr);
  ierr = ISCreateStride(PetscObjectComm((PetscObject)da),N,0,1,&from);CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,dd->w,N,0,&tmplocal);CHKERRQ(ierr);
  ierr = VecScatterCreate(global,from,tmplocal,to,scatter);CHKERRQ(ierr);
  ierr = VecDestroy(&tmplocal);CHKERRQ(ierr);
  ierr = VecDestroy(&global);CHKERRQ(ierr);
  ierr = ISDestroy(&from);CHKERRQ(ierr);
  ierr = ISDestroy(&to);CHKERRQ(ierr);
Beispiel #6
void PetscVector::copyFromArray( double v[] )
    int ierr;

    Vec sv;
    IS is;
    VecScatter ctx;

    ierr = VecCreateSeqWithArray(PETSC_COMM_SELF, n, v, &sv);
    assert(ierr == 0);
    ierr = ISCreateStride(PETSC_COMM_WORLD, n, 0, 1, &is);
    assert( ierr  == 0);
    ierr = VecScatterCreate( sv, is, pv, is, &ctx);
    assert( ierr  == 0);

    ierr = VecScatterBegin( sv, pv,INSERT_VALUES,SCATTER_FORWARD,
    assert( ierr  == 0);
    ierr = VecScatterEnd( sv, pv,INSERT_VALUES,SCATTER_FORWARD,
    assert( ierr  == 0);

    ierr = VecScatterDestroy(ctx);
    assert( ierr  == 0);
    ierr = ISDestroy( is );
    assert(ierr == 0);
    ierr = VecDestroy( sv );
    assert(ierr == 0);
PetscSparseMtrx :: times(const FloatArray &x, FloatArray &answer) const
    if ( this->giveNumberOfColumns() != x.giveSize() ) {
        OOFEM_ERROR("Dimension mismatch");

    if ( emodel->isParallel() ) {
        OOFEM_ERROR("PetscSparseMtrx :: times - Not implemented");
    Vec globX, globY;
    VecCreateSeqWithArray(PETSC_COMM_SELF, 1, x.giveSize(), x.givePointer(), & globX);
    VecCreate(PETSC_COMM_SELF, & globY);
    VecSetType(globY, VECSEQ);
    VecSetSizes(globY, PETSC_DECIDE, this->nRows);

    MatMult(this->mtrx, globX, globY);
    double *ptr;
    VecGetArray(globY, & ptr);
    for ( int i = 0; i < this->nRows; i++ ) {
        answer(i) = ptr [ i ];

    VecRestoreArray(globY, & ptr);
Beispiel #8
   DMDANaturalAllToGlobalCreate - Creates a scatter context that maps from a copy
     of the entire vector on each processor to its local part in the global vector.

   Collective on DMDA

   Input Parameter:
.  da - the distributed array context

   Output Parameter:
.  scatter - the scatter context

   Level: advanced

.keywords: distributed array, global to local, begin, coarse problem

.seealso: DMDAGlobalToNaturalEnd(), DMLocalToGlobalBegin(), DMDACreate2d(),
          DMGlobalToLocalBegin(), DMGlobalToLocalEnd(), DMDACreateNaturalVector()
PetscErrorCode  DMDANaturalAllToGlobalCreate(DM da,VecScatter *scatter)
  PetscErrorCode ierr;
  DM_DA          *dd = (DM_DA*)da->data;
  PetscInt       M,m = dd->Nlocal,start;
  IS             from,to;
  Vec            tmplocal,global;
  AO             ao;

  ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);

  /* create the scatter context */
  ierr = MPI_Allreduce(&m,&M,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
  ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)da),dd->w,m,PETSC_DETERMINE,0,&global);CHKERRQ(ierr);
  ierr = VecGetOwnershipRange(global,&start,NULL);CHKERRQ(ierr);
  ierr = ISCreateStride(PetscObjectComm((PetscObject)da),m,start,1,&from);CHKERRQ(ierr);
  ierr = AOPetscToApplicationIS(ao,from);CHKERRQ(ierr);
  ierr = ISCreateStride(PetscObjectComm((PetscObject)da),m,start,1,&to);CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,dd->w,M,0,&tmplocal);CHKERRQ(ierr);
  ierr = VecScatterCreate(tmplocal,from,global,to,scatter);CHKERRQ(ierr);
  ierr = VecDestroy(&tmplocal);CHKERRQ(ierr);
  ierr = VecDestroy(&global);CHKERRQ(ierr);
  ierr = ISDestroy(&from);CHKERRQ(ierr);
  ierr = ISDestroy(&to);CHKERRQ(ierr);
Beispiel #9
Real SAMpatchPETSc::normL2(const Vector& x, char dofType) const
#ifdef HAVE_MPI
  if (adm.isParallel()) {
    if (dofIS.find(dofType) == dofIS.end())

    Vec lx;
    VecCreateSeqWithArray(PETSC_COMM_SELF, 1, x.size(),, &lx);
    Vec gx;
    VecCreate(*adm.getCommunicator(), &gx);
    VecSetSizes(gx, dofIS[dofType].nDofs, PETSC_DETERMINE);
    PetscInt n;
    VecGetSize(gx, &n);

    if (!dofIS[dofType].scatterCreated) {
      VecScatterCreate(lx, dofIS[dofType].local, gx, dofIS[dofType].global, &dofIS[dofType].ctx);
      dofIS[dofType].scatterCreated = true;

    VecScatterBegin(dofIS[dofType].ctx, lx, gx, INSERT_VALUES, SCATTER_FORWARD);
    VecScatterEnd(dofIS[dofType].ctx, lx, gx, INSERT_VALUES, SCATTER_FORWARD);
    PetscReal d;
    VecNorm(gx, NORM_2, &d);

    return d / sqrt(double(n));

  return this->SAM::normL2(x, dofType);
Beispiel #10
// Write the sub mesh into a HDF5 file.
PetscErrorCode ProbeVolume::writeSubMeshHDF5(const std::string &filePath)
    PetscErrorCode ierr;


    // only the first process in the communicator write the sub-mesh into a file
    if (commRank == 0)
        // because only one process is involved in writing the sub-mesh,
        // we need to create a temporary viewer
        PetscViewer viewer2;
        ierr = PetscViewerCreate(PETSC_COMM_SELF, &viewer2); CHKERRQ(ierr);
        ierr = PetscViewerSetType(viewer2, PETSCVIEWERHDF5); CHKERRQ(ierr);
        ierr = PetscViewerFileSetMode(viewer2, FILE_MODE_WRITE); CHKERRQ(ierr);
        ierr = PetscViewerFileSetName(
            viewer2, filePath.c_str()); CHKERRQ(ierr);
        ierr = PetscViewerHDF5PushGroup(viewer2, "mesh"); CHKERRQ(ierr);
        std::vector<std::string> dirs{"x", "y", "z"};
        for (unsigned int d = 0; d < coord.size(); ++d)
            Vec tmp;
            ierr = VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nPtsDir[d],
                                         &coord[d][0], &tmp); CHKERRQ(ierr);
            ierr = PetscObjectSetName((PetscObject) tmp,
                                      dirs[d].c_str()); CHKERRQ(ierr);
            ierr = VecView(tmp, viewer2); CHKERRQ(ierr);
            ierr = VecDestroy(&tmp); CHKERRQ(ierr);
        ierr = PetscViewerDestroy(&viewer2); CHKERRQ(ierr);

}  // ProbeVolume::writeSubMeshHDF5
Beispiel #11
PETSC_EXTERN PetscErrorCode BVCreate_Contiguous(BV bv)
  PetscErrorCode ierr;
  PetscInt       j,nloc,bs;
  PetscBool      seq;
  char           str[50];

  ierr = PetscNewLog(bv,&ctx);CHKERRQ(ierr);
  bv->data = (void*)ctx;

  ierr = PetscObjectTypeCompare((PetscObject)bv->t,VECMPI,&ctx->mpi);CHKERRQ(ierr);
  if (!ctx->mpi) {
    ierr = PetscObjectTypeCompare((PetscObject)bv->t,VECSEQ,&seq);CHKERRQ(ierr);
    if (!seq) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot create a contiguous BV from a non-standard template vector");

  ierr = VecGetLocalSize(bv->t,&nloc);CHKERRQ(ierr);
  ierr = VecGetBlockSize(bv->t,&bs);CHKERRQ(ierr);
  ierr = PetscMalloc1(bv->m*nloc,&ctx->array);CHKERRQ(ierr);
  ierr = PetscMemzero(ctx->array,bv->m*nloc*sizeof(PetscScalar));CHKERRQ(ierr);
  ierr = PetscMalloc1(bv->m,&ctx->V);CHKERRQ(ierr);
  for (j=0;j<bv->m;j++) {
    if (ctx->mpi) {
      ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,PETSC_DECIDE,ctx->array+j*nloc,ctx->V+j);CHKERRQ(ierr);
    } else {
      ierr = VecCreateSeqWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,ctx->array+j*nloc,ctx->V+j);CHKERRQ(ierr);
  ierr = PetscLogObjectParents(bv,bv->m,ctx->V);CHKERRQ(ierr);
  if (((PetscObject)bv)->name) {
    for (j=0;j<bv->m;j++) {
      ierr = PetscSNPrintf(str,50,"%s_%D",((PetscObject)bv)->name,j);CHKERRQ(ierr);
      ierr = PetscObjectSetName((PetscObject)ctx->V[j],str);CHKERRQ(ierr);

  bv->ops->mult             = BVMult_Contiguous;
  bv->ops->multvec          = BVMultVec_Contiguous;
  bv->ops->multinplace      = BVMultInPlace_Contiguous;
  bv->ops->multinplacetrans = BVMultInPlaceTranspose_Contiguous;
  bv->ops->axpy             = BVAXPY_Contiguous;
  bv->ops->dot              = BVDot_Contiguous;
  bv->ops->dotvec           = BVDotVec_Contiguous;
  bv->ops->scale            = BVScale_Contiguous;
  bv->ops->norm             = BVNorm_Contiguous;
  /*bv->ops->orthogonalize    = BVOrthogonalize_Contiguous;*/
  bv->ops->matmult          = BVMatMult_Contiguous;
  bv->ops->copy             = BVCopy_Contiguous;
  bv->ops->resize           = BVResize_Contiguous;
  bv->ops->getcolumn        = BVGetColumn_Contiguous;
  bv->ops->getarray         = BVGetArray_Contiguous;
  bv->ops->destroy          = BVDestroy_Contiguous;
PetscSparseMtrx :: times(const FloatMatrix &B, FloatMatrix &answer) const
    if ( this->giveNumberOfColumns() != B.giveNumberOfRows() ) {
        OOFEM_ERROR("Dimension mismatch");

    if ( emodel->isParallel() ) {
        OOFEM_ERROR("PetscSparseMtrx :: times - Not implemented");
    // I'm opting to work with a set of vectors, as i think it might be faster and more robust. / Mikael

    int nr = this->giveNumberOfRows();
    int nc = B.giveNumberOfColumns();
    answer.resize(nr, nc);
    double *aptr = answer.givePointer();

#if 0
     // Approach using several vectors. Not sure if it is optimal, but it includes petsc calls which i suspect are inefficient. / Mikael
     // UNTESTED!
     Vec globX, globY;
     VecCreate(PETSC_COMM_SELF, &globY);
     VecSetType(globY, VECSEQ);
     VecSetSizes(globY, PETSC_DECIDE, nr);
     int nrB = B.giveNumberOfRows();
     for (int k = 0; k < nc; k++) {
         double colVals[nrB];
         for (int i = 0; i < nrB; i++) colVals[i] = B(i,k); // B.copyColumn(Bk,k);
         VecCreateSeqWithArray(PETSC_COMM_SELF, nrB, colVals, &globX);
         MatMult(this->mtrx, globX, globY );
                 double *ptr;
                 VecGetArray(globY, &ptr);
                 for (int i = 0; i < nr; i++) *aptr++ = ptr[i]; // answer.setColumn(Ak,k);
                 VecRestoreArray(globY, &ptr);

    Mat globB, globC;
    MatCreateSeqDense(PETSC_COMM_SELF, B.giveNumberOfRows(), B.giveNumberOfColumns(), B.givePointer(), & globB);
    MatMatMult(this->mtrx, globB, MAT_INITIAL_MATRIX, PETSC_DEFAULT, & globC);
    const double *vals;
    for ( int r = 0; r < nr; r++ ) {
        MatGetRow(globC, r, NULL, NULL, & vals);
        for ( int i = 0, i2 = r; i < nc; i++, i2 += nr ) {
            aptr [ i2 ] = vals [ i ];
        MatRestoreRow(globC, r, NULL, NULL, & vals);

Beispiel #13
   VecMPISetGhost - Sets the ghost points for an MPI ghost vector

   Collective on Vec

   Input Parameters:
+  vv - the MPI vector
.  nghost - number of local ghost points
-  ghosts - global indices of ghost points, these do not need to be in increasing order (sorted)

   Use VecGhostGetLocalForm() to access the local, ghosted representation
   of the vector.

   This also automatically sets the ISLocalToGlobalMapping() for this vector.

   You must call this AFTER you have set the type of the vector (with VecSetType()) and the size (with VecSetSizes()).

   Level: advanced

   Concepts: vectors^ghosted

.seealso: VecCreateSeq(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateMPI(),
          VecGhostGetLocalForm(), VecGhostRestoreLocalForm(), VecGhostUpdateBegin(),
          VecCreateGhostWithArray(), VecCreateMPIWithArray(), VecGhostUpdateEnd(),
          VecCreateGhostBlock(), VecCreateGhostBlockWithArray()

PetscErrorCode  VecMPISetGhost(Vec vv,PetscInt nghost,const PetscInt ghosts[])
  PetscErrorCode ierr;
  PetscBool      flg;

  ierr = PetscObjectTypeCompare((PetscObject)vv,VECMPI,&flg);CHKERRQ(ierr);
  /* if already fully existant VECMPI then basically destroy it and rebuild with ghosting */
  if (flg) {
    PetscInt               n,N;
    Vec_MPI                *w;
    PetscScalar            *larray;
    IS                     from,to;
    ISLocalToGlobalMapping ltog;
    PetscInt               rstart,i,*indices;
    MPI_Comm               comm = ((PetscObject)vv)->comm;

    n = vv->map->n;
    N = vv->map->N;
    ierr = (*vv->ops->destroy)(vv);CHKERRQ(ierr);
    ierr = VecSetSizes(vv,n,N);CHKERRQ(ierr);
    ierr = VecCreate_MPI_Private(vv,PETSC_TRUE,nghost,PETSC_NULL);CHKERRQ(ierr);
    w    = (Vec_MPI *)(vv)->data;
    /* Create local representation */
    ierr = VecGetArray(vv,&larray);CHKERRQ(ierr);
    ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,n+nghost,larray,&w->localrep);CHKERRQ(ierr);
    ierr = PetscLogObjectParent(vv,w->localrep);CHKERRQ(ierr);
    ierr = VecRestoreArray(vv,&larray);CHKERRQ(ierr);

     Create scatter context for scattering (updating) ghost values
    ierr = ISCreateGeneral(comm,nghost,ghosts,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
    ierr = ISCreateStride(PETSC_COMM_SELF,nghost,n,1,&to);CHKERRQ(ierr);
    ierr = VecScatterCreate(vv,from,w->localrep,to,&w->localupdate);CHKERRQ(ierr);
    ierr = PetscLogObjectParent(vv,w->localupdate);CHKERRQ(ierr);
    ierr = ISDestroy(&to);CHKERRQ(ierr);
    ierr = ISDestroy(&from);CHKERRQ(ierr);

    /* set local to global mapping for ghosted vector */
    ierr = PetscMalloc((n+nghost)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
    ierr = VecGetOwnershipRange(vv,&rstart,PETSC_NULL);CHKERRQ(ierr);
    for (i=0; i<n; i++) {
      indices[i] = rstart + i;
    for (i=0; i<nghost; i++) {
      indices[n+i] = ghosts[i];
    ierr = ISLocalToGlobalMappingCreate(comm,n+nghost,indices,PETSC_OWN_POINTER,&ltog);CHKERRQ(ierr);
    ierr = VecSetLocalToGlobalMapping(vv,ltog);CHKERRQ(ierr);
    ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
  } else if (vv->ops->create == VecCreate_MPI) SETERRQ(((PetscObject)vv)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must set local or global size before setting ghosting");
  else if (!((PetscObject)vv)->type_name) SETERRQ(((PetscObject)vv)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must set type to VECMPI before ghosting");
Beispiel #14
static Vec op_create_vec ( const op_dat vec ) {
  assert( vec );

  Vec p_vec;
  // Create a PETSc vector and pass it the user-allocated storage
  VecCreateSeqWithArray(MPI_COMM_SELF,vec->dim * vec->set->size,(PetscScalar*)vec->data,&p_vec);

  return p_vec;
Beispiel #15
   VecCreateGhostBlockWithArray - Creates a parallel vector with ghost padding on each processor;
   the caller allocates the array space. Indices in the ghost region are based on blocks.

   Collective on MPI_Comm

   Input Parameters:
+  comm - the MPI communicator to use
.  bs - block size
.  n - local vector length
.  N - global vector length (or PETSC_DECIDE to have calculated if n is given)
.  nghost - number of local ghost blocks
.  ghosts - global indices of ghost blocks (or PETSC_NULL if not needed), counts are by block not by index, these do not need to be in increasing order (sorted)
-  array - the space to store the vector values (as long as n + nghost*bs)

   Output Parameter:
.  vv - the global vector representation (without ghost points as part of vector)

   Use VecGhostGetLocalForm() to access the local, ghosted representation
   of the vector.

   n is the local vector size (total local size not the number of blocks) while nghost
   is the number of blocks in the ghost portion, i.e. the number of elements in the ghost
   portion is bs*nghost

   Level: advanced

   Concepts: vectors^creating ghosted
   Concepts: vectors^creating with array

.seealso: VecCreate(), VecGhostGetLocalForm(), VecGhostRestoreLocalForm(),
          VecCreateGhost(), VecCreateSeqWithArray(), VecCreateMPIWithArray(),
          VecCreateGhostWithArray(), VecCreateGhostBlock()

PetscErrorCode  VecCreateGhostBlockWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,PetscInt nghost,const PetscInt ghosts[],const PetscScalar array[],Vec *vv)
  PetscErrorCode         ierr;
  Vec_MPI                *w;
  PetscScalar            *larray;
  IS                     from,to;
  ISLocalToGlobalMapping ltog;
  PetscInt               rstart,i,nb,*indices;

  *vv = 0;

  if (nghost == PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Must set local ghost size");
  if (nghost < 0)             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ghost length must be >= 0");
  if (n % bs)                 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Local size must be a multiple of block size");
  ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
  /* Create global representation */
  ierr = VecCreate(comm,vv);CHKERRQ(ierr);
  ierr = VecSetSizes(*vv,n,N);CHKERRQ(ierr);
  ierr = VecSetBlockSize(*vv,bs);CHKERRQ(ierr);
  ierr = VecCreate_MPI_Private(*vv,PETSC_TRUE,nghost*bs,array);CHKERRQ(ierr);
  w    = (Vec_MPI *)(*vv)->data;
  /* Create local representation */
  ierr = VecGetArray(*vv,&larray);CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs,n+bs*nghost,larray,&w->localrep);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(*vv,w->localrep);CHKERRQ(ierr);
  ierr = VecRestoreArray(*vv,&larray);CHKERRQ(ierr);

       Create scatter context for scattering (updating) ghost values
  ierr = ISCreateBlock(comm,bs,nghost,ghosts,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
  ierr = ISCreateStride(PETSC_COMM_SELF,bs*nghost,n,1,&to);CHKERRQ(ierr);
  ierr = VecScatterCreate(*vv,from,w->localrep,to,&w->localupdate);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(*vv,w->localupdate);CHKERRQ(ierr);
  ierr = ISDestroy(&to);CHKERRQ(ierr);
  ierr = ISDestroy(&from);CHKERRQ(ierr);

  /* set local to global mapping for ghosted vector */
  nb = n/bs;
  ierr = PetscMalloc((nb+nghost)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
  ierr = VecGetOwnershipRange(*vv,&rstart,PETSC_NULL);CHKERRQ(ierr);
  for (i=0; i<nb; i++) {
    indices[i] = rstart + i*bs;
  for (i=0; i<nghost; i++) {
    indices[nb+i] = ghosts[i];
  ierr = ISLocalToGlobalMappingCreate(comm,nb+nghost,indices,PETSC_OWN_POINTER,&ltog);CHKERRQ(ierr);
  ierr = VecSetLocalToGlobalMappingBlock(*vv,ltog);CHKERRQ(ierr);
  ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
Beispiel #16
PetscErrorCode MatMPIAIJCRL_create_aijcrl(Mat A)
  Mat_MPIAIJ     *a      = (Mat_MPIAIJ*)(A)->data;
  Mat_SeqAIJ     *Aij    = (Mat_SeqAIJ*)(a->A->data), *Bij = (Mat_SeqAIJ*)(a->B->data);
  Mat_AIJCRL     *aijcrl = (Mat_AIJCRL*) A->spptr;
  PetscInt       m       = A->rmap->n; /* Number of rows in the matrix. */
  PetscInt       nd      = a->A->cmap->n; /* number of columns in diagonal portion */
  PetscInt       *aj     = Aij->j,*bj = Bij->j; /* From the CSR representation; points to the beginning  of each row. */
  PetscInt       i, j,rmax = 0,*icols, *ailen = Aij->ilen, *bilen = Bij->ilen;
  PetscScalar    *aa = Aij->a,*ba = Bij->a,*acols,*array;
  PetscErrorCode ierr;

  /* determine the row with the most columns */
  for (i=0; i<m; i++) {
    rmax = PetscMax(rmax,ailen[i]+bilen[i]);
  aijcrl->nz   = Aij->nz+Bij->nz;
  aijcrl->m    = A->rmap->n;
  aijcrl->rmax = rmax;

  ierr  = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr);
  ierr  = PetscMalloc2(rmax*m,PetscScalar,&aijcrl->acols,rmax*m,PetscInt,&aijcrl->icols);CHKERRQ(ierr);
  acols = aijcrl->acols;
  icols = aijcrl->icols;
  for (i=0; i<m; i++) {
    for (j=0; j<ailen[i]; j++) {
      acols[j*m+i] = *aa++;
      icols[j*m+i] = *aj++;
    for (; j<ailen[i]+bilen[i]; j++) {
      acols[j*m+i] = *ba++;
      icols[j*m+i] = nd + *bj++;
    for (; j<rmax; j++) { /* empty column entries */
      acols[j*m+i] = 0.0;
      icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0;  /* handle case where row is EMPTY */
  ierr = PetscInfo1(A,"Percentage of 0's introduced for vectorized multiply %g\n",1.0-((double)(aijcrl->nz))/((double)(rmax*m)));CHKERRQ(ierr);

  ierr = PetscFree(aijcrl->array);CHKERRQ(ierr);
  ierr = PetscMalloc((a->B->cmap->n+nd)*sizeof(PetscScalar),&array);CHKERRQ(ierr);
  /* xwork array is actually B->n+nd long, but we define xwork this length so can copy into it */
  ierr = VecDestroy(&aijcrl->xwork);CHKERRQ(ierr);
  ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),1,nd,PETSC_DECIDE,array,&aijcrl->xwork);CHKERRQ(ierr);
  ierr = VecDestroy(&aijcrl->fwork);CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,a->B->cmap->n,array+nd,&aijcrl->fwork);CHKERRQ(ierr);

  aijcrl->array = array;
  aijcrl->xscat = a->Mvctx;
Beispiel #17
    virtual void SetUp() {
        // setup FilePath directories

        FilePath mesh_file( "fields/one_element_2d.msh", FilePath::input_file);
        mesh= new Mesh;
        ifstream in(string( mesh_file ).c_str());
        dh = new DOFHandlerMultiDim(*mesh);
        VecCreateSeqWithArray(PETSC_COMM_SELF, 1, 3, dof_values, &v);
int Epetra_PETScAIJMatrix::RightScale(const Epetra_Vector& X) {
// This function scales the jth row of A by x[j].
  double *xptr;
  Vec petscX;
# ifdef HAVE_MPI
  int ierr=VecCreateMPIWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptr,&petscX); CHKERRQ(ierr);
# else //FIXME  untested
  int ierr=VecCreateSeqWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptr,&petscX); CHKERRQ(ierr);
# endif

  MatDiagonalScale(Amat_, PETSC_NULL, petscX);

  ierr=VecDestroy(petscX); CHKERRQ(ierr);
} //RightScale()
int SkewSymmetricScatter(Vec *x,PetscScalar *cacheScalar ,PetscInt *cacheInt, PetscInt n2,PetscInt Istart,PetscInt localsizex , VecScatter *ctx){

PetscInt i,k; 
IS       isbc;
Vec      bcvec;


k = 0; 
for(i=0;i<localsizex;i++){*(cacheInt+k)= n2*(n2*2-Istart)-1-i*n2;k++;} 
for(i=0;i<localsizex;i++){*(cacheInt+k)= n2*(n2*2-Istart)-2-i*n2;k++;}
for(i=0;i<localsizex;i++){*(cacheInt+k)= n2*(n2*2-Istart)-n2+1-i*n2;k++;}
for(i=0;i<localsizex;i++){*(cacheInt+k)= n2*(n2*2-Istart)-n2-i*n2;k++;}


return 0;
Beispiel #20
///@todo Parallel mode of this.
NM_Status PetscSolver :: solve(SparseMtrx *A, FloatMatrix &B, FloatMatrix &X)
    if ( !A ) {
        _error("solve: Unknown Lhs");
    if ( A->giveType() != SMT_PetscMtrx ) {
        _error("solve: PetscSparseMtrx Expected");

    PetscSparseMtrx *Lhs = ( PetscSparseMtrx * ) A;

    Vec globRhsVec;
    Vec globSolVec;

    bool newLhs = true;
    int rows = B.giveNumberOfRows();
    int cols = B.giveNumberOfColumns();
    NM_Status s;
    X.resize(rows, cols);
    double *Xptr = X.givePointer();

    for (int i = 0; i < cols; ++i) {
        VecCreateSeqWithArray(PETSC_COMM_SELF, rows, B.givePointer() + rows*i, & globRhsVec);
        VecDuplicate(globRhsVec, & globSolVec);
        s = this->petsc_solve(Lhs, globRhsVec, globSolVec, newLhs);
        if ( !(s & NM_Success) ) {
            OOFEM_WARNING2("PetscSolver :: solve - No success at solving column %d",i+1);
            return s;
        newLhs = false;
        double *ptr;
        VecGetArray(globSolVec, & ptr);
        for ( int j = 0; j < rows; ++j ) {
            Xptr[ j + rows*i ] = ptr [ j ];
        VecRestoreArray(globSolVec, & ptr);
    return s;
Beispiel #21
int main(int argc,char **argv)
  PetscErrorCode ierr;
  PetscMPIInt    size;
  PetscInt       n = 10,i;
  PetscScalar    array[10];
  Vec            x;

  ierr = PetscInitialize(&argc,&argv,(char*)0,help);CHKERRQ(ierr); 
  ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
  if (size != 1) SETERRQ(PETSC_COMM_SELF,1,"This is a uniprocessor example only!");

  /* create vector */
  for (i=0; i<n; i++) array[i] = i;
  n = n-1;
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,n,array+1,&x);CHKERRQ(ierr);
  ierr = VecDestroy(&x);CHKERRQ(ierr);
  ierr = PetscFinalize();
  return 0;
Beispiel #22
static PetscErrorCode VecDuplicate_MPI(Vec win,Vec *v)
  PetscErrorCode ierr;
  Vec_MPI        *vw,*w = (Vec_MPI*)win->data;
  PetscScalar    *array;

  ierr = VecCreate(PetscObjectComm((PetscObject)win),v);CHKERRQ(ierr);
  ierr = PetscLayoutReference(win->map,&(*v)->map);CHKERRQ(ierr);

  ierr = VecCreate_MPI_Private(*v,PETSC_TRUE,w->nghost,0);CHKERRQ(ierr);
  vw   = (Vec_MPI*)(*v)->data;
  ierr = PetscMemcpy((*v)->ops,win->ops,sizeof(struct _VecOps));CHKERRQ(ierr);

  /* save local representation of the parallel vector (and scatter) if it exists */
  if (w->localrep) {
    ierr = VecGetArray(*v,&array);CHKERRQ(ierr);
    ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,win->map->bs,win->map->n+w->nghost,array,&vw->localrep);CHKERRQ(ierr);
    ierr = PetscMemcpy(vw->localrep->ops,w->localrep->ops,sizeof(struct _VecOps));CHKERRQ(ierr);
    ierr = VecRestoreArray(*v,&array);CHKERRQ(ierr);
    ierr = PetscLogObjectParent(*v,vw->localrep);CHKERRQ(ierr);

    vw->localupdate = w->localupdate;
    if (vw->localupdate) {
      ierr = PetscObjectReference((PetscObject)vw->localupdate);CHKERRQ(ierr);

  /* New vector should inherit stashing property of parent */
  (*v)->stash.donotstash   = win->stash.donotstash;
  (*v)->stash.ignorenegidx = win->stash.ignorenegidx;

  ierr = PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*v))->olist);CHKERRQ(ierr);
  ierr = PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*v))->qlist);CHKERRQ(ierr);

  (*v)->map->bs   = win->map->bs;
  (*v)-> = win->;
Beispiel #23
PetscErrorCode BVResize_Contiguous(BV bv,PetscInt m,PetscBool copy)
  PetscErrorCode ierr;
  BV_CONTIGUOUS  *ctx = (BV_CONTIGUOUS*)bv->data;
  PetscInt       j,bs;
  PetscScalar    *newarray;
  Vec            *newV;
  char           str[50];

  ierr = VecGetBlockSize(bv->t,&bs);CHKERRQ(ierr);
  ierr = PetscMalloc1(m*bv->n,&newarray);CHKERRQ(ierr);
  ierr = PetscMemzero(newarray,m*bv->n*sizeof(PetscScalar));CHKERRQ(ierr);
  ierr = PetscMalloc1(m,&newV);CHKERRQ(ierr);
  for (j=0;j<m;j++) {
    if (ctx->mpi) {
      ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)bv->t),bs,bv->n,PETSC_DECIDE,newarray+j*bv->n,newV+j);CHKERRQ(ierr);
    } else {
      ierr = VecCreateSeqWithArray(PetscObjectComm((PetscObject)bv->t),bs,bv->n,newarray+j*bv->n,newV+j);CHKERRQ(ierr);
  ierr = PetscLogObjectParents(bv,m,newV);CHKERRQ(ierr);
  if (((PetscObject)bv)->name) {
    for (j=0;j<m;j++) {
      ierr = PetscSNPrintf(str,50,"%s_%D",((PetscObject)bv)->name,j);CHKERRQ(ierr);
      ierr = PetscObjectSetName((PetscObject)newV[j],str);CHKERRQ(ierr);
  if (copy) {
    ierr = PetscMemcpy(newarray,ctx->array,PetscMin(m,bv->m)*bv->n*sizeof(PetscScalar));CHKERRQ(ierr);
  ierr = VecDestroyVecs(bv->m,&ctx->V);CHKERRQ(ierr);
  ctx->V = newV;
  ierr = PetscFree(ctx->array);CHKERRQ(ierr);
  ctx->array = newarray;
Beispiel #24
PetscErrorCode  DMSetUp_DA_1D(DM da)
  DM_DA            *dd   = (DM_DA*)da->data;
  const PetscInt   M     = dd->M;
  const PetscInt   dof   = dd->w;
  const PetscInt   s     = dd->s;
  const PetscInt   sDist = s;  /* stencil distance in points */
  const PetscInt   *lx   = dd->lx;
  DMBoundaryType   bx    = dd->bx;
  MPI_Comm         comm;
  Vec              local, global;
  VecScatter       gtol;
  IS               to, from;
  PetscBool        flg1 = PETSC_FALSE, flg2 = PETSC_FALSE;
  PetscMPIInt      rank, size;
  PetscInt         i,*idx,nn,left,xs,xe,x,Xs,Xe,start,m,IXs,IXe;
  PetscErrorCode   ierr;

  ierr = PetscObjectGetComm((PetscObject) da, &comm);CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);

  dd->p = 1;
  dd->n = 1;
  dd->m = size;
  m     = dd->m;

  if (s > 0) {
    /* if not communicating data then should be ok to have nothing on some processes */
    if (M < m) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"More processes than data points! %D %D",m,M);
    if ((M-1) < s && size > 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Array is too small for stencil! %D %D",M-1,s);

     Determine locally owned region
     xs is the first local node number, x is the number of local nodes
  if (!lx) {
    ierr = PetscMalloc1(m, &dd->lx);CHKERRQ(ierr);
    ierr = PetscOptionsGetBool(NULL,"-da_partition_blockcomm",&flg1,NULL);CHKERRQ(ierr);
    ierr = PetscOptionsGetBool(NULL,"-da_partition_nodes_at_end",&flg2,NULL);CHKERRQ(ierr);
    if (flg1) {      /* Block Comm type Distribution */
      xs = rank*M/m;
      x  = (rank + 1)*M/m - xs;
    } else if (flg2) { /* The odd nodes are evenly distributed across last nodes */
      x = (M + rank)/m;
      if (M/m == x) xs = rank*x;
      else          xs = rank*(x-1) + (M+rank)%(x*m);
    } else { /* The odd nodes are evenly distributed across the first k nodes */
      /* Regular PETSc Distribution */
      x = M/m + ((M % m) > rank);
      if (rank >= (M % m)) xs = (rank * (PetscInt)(M/m) + M % m);
      else                 xs = rank * (PetscInt)(M/m) + rank;
    ierr = MPI_Allgather(&xs,1,MPIU_INT,dd->lx,1,MPIU_INT,comm);CHKERRQ(ierr);
    for (i=0; i<m-1; i++) dd->lx[i] = dd->lx[i+1] - dd->lx[i];
    dd->lx[m-1] = M - dd->lx[m-1];
  } else {
    x  = lx[rank];
    xs = 0;
    for (i=0; i<rank; i++) xs += lx[i];
    /* verify that data user provided is consistent */
    left = xs;
    for (i=rank; i<size; i++) left += lx[i];
    if (left != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of lx across processors not equal to M %D %D",left,M);

   check if the scatter requires more than one process neighbor or wraps around
   the domain more than once
  if ((x < s) & ((M > 1) | (bx == DM_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local x-width of domain x %D is smaller than stencil width s %D",x,s);

  xe  = xs + x;

  /* determine ghost region (Xs) and region scattered into (IXs)  */
  if (xs-sDist > 0) {
    Xs  = xs - sDist;
    IXs = xs - sDist;
  } else {
    if (bx) Xs = xs - sDist;
    else Xs = 0;
    IXs = 0;
  if (xe+sDist <= M) {
    Xe  = xe + sDist;
    IXe = xe + sDist;
  } else {
    if (bx) Xe = xe + sDist;
    else Xe = M;
    IXe = M;

    Xs  = xs - sDist;
    Xe  = xe + sDist;
    IXs = xs - sDist;
    IXe = xe + sDist;

  /* allocate the base parallel and sequential vectors */
  dd->Nlocal = dof*x;
  ierr       = VecCreateMPIWithArray(comm,dof,dd->Nlocal,PETSC_DECIDE,NULL,&global);CHKERRQ(ierr);
  dd->nlocal = dof*(Xe-Xs);
  ierr       = VecCreateSeqWithArray(PETSC_COMM_SELF,dof,dd->nlocal,NULL,&local);CHKERRQ(ierr);

  ierr = VecGetOwnershipRange(global,&start,NULL);CHKERRQ(ierr);

  /* Create Global to Local Vector Scatter Context */
  /* global to local must retrieve ghost points */
  ierr = ISCreateStride(comm,dof*(IXe-IXs),dof*(IXs-Xs),1,&to);CHKERRQ(ierr);

  ierr = PetscMalloc1(x+2*sDist,&idx);CHKERRQ(ierr);
  ierr = PetscLogObjectMemory((PetscObject)da,(x+2*(sDist))*sizeof(PetscInt));CHKERRQ(ierr);

  for (i=0; i<IXs-Xs; i++) idx[i] = -1; /* prepend with -1s if needed for ghosted case*/

  nn = IXs-Xs;
  if (bx == DM_BOUNDARY_PERIODIC) { /* Handle all cases with periodic first */
    for (i=0; i<sDist; i++) {  /* Left ghost points */
      if ((xs-sDist+i)>=0) idx[nn++] = xs-sDist+i;
      else                 idx[nn++] = M+(xs-sDist+i);

    for (i=0; i<x; i++) idx [nn++] = xs + i;  /* Non-ghost points */

    for (i=0; i<sDist; i++) { /* Right ghost points */
      if ((xe+i)<M) idx [nn++] =  xe+i;
      else          idx [nn++] = (xe+i) - M;
  } else if (bx == DM_BOUNDARY_MIRROR) { /* Handle all cases with periodic first */
    for (i=0; i<(sDist); i++) {  /* Left ghost points */
      if ((xs-sDist+i)>=0) idx[nn++] = xs-sDist+i;
      else                 idx[nn++] = sDist - i;

    for (i=0; i<x; i++) idx [nn++] = xs + i;  /* Non-ghost points */

    for (i=0; i<(sDist); i++) { /* Right ghost points */
      if ((xe+i)<M) idx[nn++] =  xe+i;
      else          idx[nn++] = M - (i + 1);
  } else {      /* Now do all cases with no periodicity */
    if (0 <= xs-sDist) {
      for (i=0; i<sDist; i++) idx[nn++] = xs - sDist + i;
    } else {
      for (i=0; i<xs; i++) idx[nn++] = i;

    for (i=0; i<x; i++) idx [nn++] = xs + i;

    if ((xe+sDist)<=M) {
      for (i=0; i<sDist; i++) idx[nn++]=xe+i;
    } else {
      for (i=xe; i<M; i++) idx[nn++]=i;

  ierr = ISCreateBlock(comm,dof,nn-IXs+Xs,&idx[IXs-Xs],PETSC_USE_POINTER,&from);CHKERRQ(ierr);
  ierr = VecScatterCreate(global,from,local,to,&gtol);CHKERRQ(ierr);
  ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)gtol);CHKERRQ(ierr);
  ierr = ISDestroy(&to);CHKERRQ(ierr);
  ierr = ISDestroy(&from);CHKERRQ(ierr);
  ierr = VecDestroy(&local);CHKERRQ(ierr);
  ierr = VecDestroy(&global);CHKERRQ(ierr);

  dd->xs = dof*xs; dd->xe = dof*xe; dd->ys = 0; dd->ye = 1; dd->zs = 0; dd->ze = 1;
  dd->Xs = dof*Xs; dd->Xe = dof*Xe; dd->Ys = 0; dd->Ye = 1; dd->Zs = 0; dd->Ze = 1;

  dd->gtol      = gtol;
  dd->base      = dof*xs;
  da->ops->view = DMView_DA_1d;

     Set the local to global ordering in the global vector, this allows use
     of VecSetValuesLocal().
  for (i=0; i<Xe-IXe; i++) idx[nn++] = -1; /* pad with -1s if needed for ghosted case*/

  ierr = ISLocalToGlobalMappingCreate(comm,dof,nn,idx,PETSC_OWN_POINTER,&da->ltogmap);CHKERRQ(ierr);
  ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)da->ltogmap);CHKERRQ(ierr);

Beispiel #25
PetscErrorCode  DMSetUp_DA_2D(DM da)
    DM_DA            *dd = (DM_DA*)da->data;
    const PetscInt   M            = dd->M;
    const PetscInt   N            = dd->N;
    PetscInt         m            = dd->m;
    PetscInt         n            = dd->n;
    const PetscInt   dof          = dd->w;
    const PetscInt   s            = dd->s;
    DMDABoundaryType bx           = dd->bx;
    DMDABoundaryType by           = dd->by;
    DMDAStencilType  stencil_type = dd->stencil_type;
    PetscInt         *lx          = dd->lx;
    PetscInt         *ly          = dd->ly;
    MPI_Comm         comm;
    PetscMPIInt      rank,size;
    PetscInt         xs,xe,ys,ye,x,y,Xs,Xe,Ys,Ye,start,end,IXs,IXe,IYs,IYe;
    PetscInt         up,down,left,right,i,n0,n1,n2,n3,n5,n6,n7,n8,*idx,nn,*idx_cpy;
    const PetscInt   *idx_full;
    PetscInt         xbase,*bases,*ldims,j,x_t,y_t,s_t,base,count;
    PetscInt         s_x,s_y; /* s proportionalized to w */
    PetscInt         sn0 = 0,sn2 = 0,sn6 = 0,sn8 = 0;
    Vec              local,global;
    VecScatter       ltog,gtol;
    IS               to,from,ltogis;
    PetscErrorCode   ierr;

    if (stencil_type == DMDA_STENCIL_BOX && (bx == DMDA_BOUNDARY_MIRROR || by == DMDA_BOUNDARY_MIRROR)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Mirror boundary and box stencil");
    ierr = PetscObjectGetComm((PetscObject)da,&comm);
#if !defined(PETSC_USE_64BIT_INDICES)
    if (((Petsc64bitInt) M)*((Petsc64bitInt) N)*((Petsc64bitInt) dof) > (Petsc64bitInt) PETSC_MPI_INT_MAX) SETERRQ3(comm,PETSC_ERR_INT_OVERFLOW,"Mesh of %D by %D by %D (dof) is too large for 32 bit indices",M,N,dof);

    if (dof < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Must have 1 or more degrees of freedom per node: %D",dof);
    if (s < 0) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Stencil width cannot be negative: %D",s);

    ierr = MPI_Comm_size(comm,&size);
    ierr = MPI_Comm_rank(comm,&rank);

    if (m != PETSC_DECIDE) {
        if (m < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in X direction: %D",m);
        else if (m > size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in X direction: %D %d",m,size);
    if (n != PETSC_DECIDE) {
        if (n < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in Y direction: %D",n);
        else if (n > size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in Y direction: %D %d",n,size);

    if (m == PETSC_DECIDE || n == PETSC_DECIDE) {
        if (n != PETSC_DECIDE) {
            m = size/n;
        } else if (m != PETSC_DECIDE) {
            n = size/m;
        } else {
            /* try for squarish distribution */
            m = (PetscInt)(0.5 + PetscSqrtReal(((PetscReal)M)*((PetscReal)size)/((PetscReal)N)));
            if (!m) m = 1;
            while (m > 0) {
                n = size/m;
                if (m*n == size) break;
            if (M > N && m < n) {
                PetscInt _m = m;
                m = n;
                n = _m;
        if (m*n != size) SETERRQ(comm,PETSC_ERR_PLIB,"Unable to create partition, check the size of the communicator and input m and n ");
    } else if (m*n != size) SETERRQ(comm,PETSC_ERR_ARG_OUTOFRANGE,"Given Bad partition");

    if (M < m) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in x direction is too fine! %D %D",M,m);
    if (N < n) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in y direction is too fine! %D %D",N,n);

       Determine locally owned region
       xs is the first local node number, x is the number of local nodes
    if (!lx) {
        ierr = PetscMalloc(m*sizeof(PetscInt), &dd->lx);
        lx   = dd->lx;
        for (i=0; i<m; i++) {
            lx[i] = M/m + ((M % m) > i);
    x  = lx[rank % m];
    xs = 0;
    for (i=0; i<(rank % m); i++) {
        xs += lx[i];
#if defined(PETSC_USE_DEBUG)
    left = xs;
    for (i=(rank % m); i<m; i++) {
        left += lx[i];
    if (left != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of lx across processors not equal to M: %D %D",left,M);

       Determine locally owned region
       ys is the first local node number, y is the number of local nodes
    if (!ly) {
        ierr = PetscMalloc(n*sizeof(PetscInt), &dd->ly);
        ly   = dd->ly;
        for (i=0; i<n; i++) {
            ly[i] = N/n + ((N % n) > i);
    y  = ly[rank/m];
    ys = 0;
    for (i=0; i<(rank/m); i++) {
        ys += ly[i];
#if defined(PETSC_USE_DEBUG)
    left = ys;
    for (i=(rank/m); i<n; i++) {
        left += ly[i];
    if (left != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of ly across processors not equal to N: %D %D",left,N);

     check if the scatter requires more than one process neighbor or wraps around
     the domain more than once
    if ((x < s) && ((m > 1) || (bx == DMDA_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local x-width of domain x %D is smaller than stencil width s %D",x,s);
    if ((y < s) && ((n > 1) || (by == DMDA_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local y-width of domain y %D is smaller than stencil width s %D",y,s);
    xe = xs + x;
    ye = ys + y;

    /* determine ghost region (Xs) and region scattered into (IXs)  */
    if (xs-s > 0) {
        Xs = xs - s;
        IXs = xs - s;
    } else {
        if (bx) {
            Xs = xs - s;
        } else {
            Xs = 0;
        IXs = 0;
    if (xe+s <= M) {
        Xe = xe + s;
        IXe = xe + s;
    } else {
        if (bx) {
            Xs = xs - s;
            Xe = xe + s;
        } else {
            Xe = M;
        IXe = M;

        IXs = xs - s;
        IXe = xe + s;
        Xs  = xs - s;
        Xe  = xe + s;

    if (ys-s > 0) {
        Ys = ys - s;
        IYs = ys - s;
    } else {
        if (by) {
            Ys = ys - s;
        } else {
            Ys = 0;
        IYs = 0;
    if (ye+s <= N) {
        Ye = ye + s;
        IYe = ye + s;
    } else {
        if (by) {
            Ye = ye + s;
        } else {
            Ye = N;
        IYe = N;

        IYs = ys - s;
        IYe = ye + s;
        Ys  = ys - s;
        Ye  = ye + s;

    /* stencil length in each direction */
    s_x = s;
    s_y = s;

    /* determine starting point of each processor */
    nn       = x*y;
    ierr     = PetscMalloc2(size+1,PetscInt,&bases,size,PetscInt,&ldims);
    ierr     = MPI_Allgather(&nn,1,MPIU_INT,ldims,1,MPIU_INT,comm);
    bases[0] = 0;
    for (i=1; i<=size; i++) {
        bases[i] = ldims[i-1];
    for (i=1; i<=size; i++) {
        bases[i] += bases[i-1];
    base = bases[rank]*dof;

    /* allocate the base parallel and sequential vectors */
    dd->Nlocal = x*y*dof;
    ierr       = VecCreateMPIWithArray(comm,dof,dd->Nlocal,PETSC_DECIDE,0,&global);
    dd->nlocal = (Xe-Xs)*(Ye-Ys)*dof;
    ierr       = VecCreateSeqWithArray(PETSC_COMM_SELF,dof,dd->nlocal,0,&local);

    /* generate appropriate vector scatters */
    /* local to global inserts non-ghost point region into global */
    ierr = VecGetOwnershipRange(global,&start,&end);
    ierr = ISCreateStride(comm,x*y*dof,start,1,&to);

    ierr  = PetscMalloc(x*y*sizeof(PetscInt),&idx);
    left  = xs - Xs;
    right = left + x;
    down  = ys - Ys;
    up = down + y;
    count = 0;
    for (i=down; i<up; i++) {
        for (j=left; j<right; j++) {
            idx[count++] = i*(Xe-Xs) + j;

    ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&from);
    ierr = VecScatterCreate(local,from,global,to,&ltog);
    ierr = PetscLogObjectParent(dd,ltog);
    ierr = ISDestroy(&from);
    ierr = ISDestroy(&to);

    /* global to local must include ghost points within the domain,
       but not ghost points outside the domain that aren't periodic */
    if (stencil_type == DMDA_STENCIL_BOX) {
        count = (IXe-IXs)*(IYe-IYs);
        ierr  = PetscMalloc(count*sizeof(PetscInt),&idx);

        left  = IXs - Xs;
        right = left + (IXe-IXs);
        down  = IYs - Ys;
        up = down + (IYe-IYs);
        count = 0;
        for (i=down; i<up; i++) {
            for (j=left; j<right; j++) {
                idx[count++] = j + i*(Xe-Xs);
        ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&to);

    } else {
        /* must drop into cross shape region */
        /*       ---------|
                |  top    |
             |---         ---| up
             |   middle      |
             |               |
             ----         ---- down
                | bottom  |
             Xs xs        xe Xe */
        count = (ys-IYs)*x + y*(IXe-IXs) + (IYe-ye)*x;
        ierr  = PetscMalloc(count*sizeof(PetscInt),&idx);

        left  = xs - Xs;
        right = left + x;
        down  = ys - Ys;
        up = down + y;
        count = 0;
        /* bottom */
        for (i=(IYs-Ys); i<down; i++) {
            for (j=left; j<right; j++) {
                idx[count++] = j + i*(Xe-Xs);
        /* middle */
        for (i=down; i<up; i++) {
            for (j=(IXs-Xs); j<(IXe-Xs); j++) {
                idx[count++] = j + i*(Xe-Xs);
        /* top */
        for (i=up; i<up+IYe-ye; i++) {
            for (j=left; j<right; j++) {
                idx[count++] = j + i*(Xe-Xs);
        ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&to);

    /* determine who lies on each side of us stored in    n6 n7 n8
                                                          n3    n5
                                                          n0 n1 n2

    /* Assume the Non-Periodic Case */
    n1 = rank - m;
    if (rank % m) {
        n0 = n1 - 1;
    } else {
        n0 = -1;
    if ((rank+1) % m) {
        n2 = n1 + 1;
        n5 = rank + 1;
        n8 = rank + m + 1;
        if (n8 >= m*n) n8 = -1;
    } else {
        n2 = -1;
        n5 = -1;
        n8 = -1;
    if (rank % m) {
        n3 = rank - 1;
        n6 = n3 + m;
        if (n6 >= m*n) n6 = -1;
    } else {
        n3 = -1;
        n6 = -1;
    n7 = rank + m;
    if (n7 >= m*n) n7 = -1;

        /* Modify for Periodic Cases */
        /* Handle all four corners */
        if ((n6 < 0) && (n7 < 0) && (n3 < 0)) n6 = m-1;
        if ((n8 < 0) && (n7 < 0) && (n5 < 0)) n8 = 0;
        if ((n2 < 0) && (n5 < 0) && (n1 < 0)) n2 = size-m;
        if ((n0 < 0) && (n3 < 0) && (n1 < 0)) n0 = size-1;

        /* Handle Top and Bottom Sides */
        if (n1 < 0) n1 = rank + m * (n-1);
        if (n7 < 0) n7 = rank - m * (n-1);
        if ((n3 >= 0) && (n0 < 0)) n0 = size - m + rank - 1;
        if ((n3 >= 0) && (n6 < 0)) n6 = (rank%m)-1;
        if ((n5 >= 0) && (n2 < 0)) n2 = size - m + rank + 1;
        if ((n5 >= 0) && (n8 < 0)) n8 = (rank%m)+1;

        /* Handle Left and Right Sides */
        if (n3 < 0) n3 = rank + (m-1);
        if (n5 < 0) n5 = rank - (m-1);
        if ((n1 >= 0) && (n0 < 0)) n0 = rank-1;
        if ((n1 >= 0) && (n2 < 0)) n2 = rank-2*m+1;
        if ((n7 >= 0) && (n6 < 0)) n6 = rank+2*m-1;
        if ((n7 >= 0) && (n8 < 0)) n8 = rank+1;
    } else if (by == DMDA_BOUNDARY_PERIODIC) {  /* Handle Top and Bottom Sides */
        if (n1 < 0) n1 = rank + m * (n-1);
        if (n7 < 0) n7 = rank - m * (n-1);
        if ((n3 >= 0) && (n0 < 0)) n0 = size - m + rank - 1;
        if ((n3 >= 0) && (n6 < 0)) n6 = (rank%m)-1;
        if ((n5 >= 0) && (n2 < 0)) n2 = size - m + rank + 1;
        if ((n5 >= 0) && (n8 < 0)) n8 = (rank%m)+1;
    } else if (bx == DMDA_BOUNDARY_PERIODIC) { /* Handle Left and Right Sides */
        if (n3 < 0) n3 = rank + (m-1);
        if (n5 < 0) n5 = rank - (m-1);
        if ((n1 >= 0) && (n0 < 0)) n0 = rank-1;
        if ((n1 >= 0) && (n2 < 0)) n2 = rank-2*m+1;
        if ((n7 >= 0) && (n6 < 0)) n6 = rank+2*m-1;
        if ((n7 >= 0) && (n8 < 0)) n8 = rank+1;

    ierr = PetscMalloc(9*sizeof(PetscInt),&dd->neighbors);

    dd->neighbors[0] = n0;
    dd->neighbors[1] = n1;
    dd->neighbors[2] = n2;
    dd->neighbors[3] = n3;
    dd->neighbors[4] = rank;
    dd->neighbors[5] = n5;
    dd->neighbors[6] = n6;
    dd->neighbors[7] = n7;
    dd->neighbors[8] = n8;

    if (stencil_type == DMDA_STENCIL_STAR) {
        /* save corner processor numbers */
        sn0 = n0;
        sn2 = n2;
        sn6 = n6;
        sn8 = n8;
        n0  = n2 = n6 = n8 = -1;

    ierr = PetscMalloc((Xe-Xs)*(Ye-Ys)*sizeof(PetscInt),&idx);
    ierr = PetscLogObjectMemory(da,(Xe-Xs)*(Ye-Ys)*sizeof(PetscInt));

    nn = 0;
    xbase = bases[rank];
    for (i=1; i<=s_y; i++) {
        if (n0 >= 0) { /* left below */
            x_t = lx[n0 % m];
            y_t = ly[(n0/m)];
            s_t = bases[n0] + x_t*y_t - (s_y-i)*x_t - s_x;
            for (j=0; j<s_x; j++) idx[nn++] = s_t++;

        if (n1 >= 0) { /* directly below */
            x_t = x;
            y_t = ly[(n1/m)];
            s_t = bases[n1] + x_t*y_t - (s_y+1-i)*x_t;
            for (j=0; j<x_t; j++) idx[nn++] = s_t++;
        } else if (by == DMDA_BOUNDARY_MIRROR) {
            for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(s_y - i + 1)  + j;

        if (n2 >= 0) { /* right below */
            x_t = lx[n2 % m];
            y_t = ly[(n2/m)];
            s_t = bases[n2] + x_t*y_t - (s_y+1-i)*x_t;
            for (j=0; j<s_x; j++) idx[nn++] = s_t++;

    for (i=0; i<y; i++) {
        if (n3 >= 0) { /* directly left */
            x_t = lx[n3 % m];
            /* y_t = y; */
            s_t = bases[n3] + (i+1)*x_t - s_x;
            for (j=0; j<s_x; j++) idx[nn++] = s_t++;
        } else if (bx == DMDA_BOUNDARY_MIRROR) {
            for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*i + s_x - j;

        for (j=0; j<x; j++) idx[nn++] = xbase++; /* interior */

        if (n5 >= 0) { /* directly right */
            x_t = lx[n5 % m];
            /* y_t = y; */
            s_t = bases[n5] + (i)*x_t;
            for (j=0; j<s_x; j++) idx[nn++] = s_t++;
        } else if (bx == DMDA_BOUNDARY_MIRROR) {
            for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*(i + 1) - 2 - j;

    for (i=1; i<=s_y; i++) {
        if (n6 >= 0) { /* left above */
            x_t = lx[n6 % m];
            /* y_t = ly[(n6/m)]; */
            s_t = bases[n6] + (i)*x_t - s_x;
            for (j=0; j<s_x; j++) idx[nn++] = s_t++;

        if (n7 >= 0) { /* directly above */
            x_t = x;
            /* y_t = ly[(n7/m)]; */
            s_t = bases[n7] + (i-1)*x_t;
            for (j=0; j<x_t; j++) idx[nn++] = s_t++;
        } else if (by == DMDA_BOUNDARY_MIRROR) {
            for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(y - i - 1)  + j;

        if (n8 >= 0) { /* right above */
            x_t = lx[n8 % m];
            /* y_t = ly[(n8/m)]; */
            s_t = bases[n8] + (i-1)*x_t;
            for (j=0; j<s_x; j++) idx[nn++] = s_t++;

    ierr = ISCreateBlock(comm,dof,nn,idx,PETSC_COPY_VALUES,&from);
    ierr = VecScatterCreate(global,from,local,to,&gtol);
    ierr = PetscLogObjectParent(da,gtol);
    ierr = ISDestroy(&to);
    ierr = ISDestroy(&from);

    if (stencil_type == DMDA_STENCIL_STAR) {
        n0 = sn0;
        n2 = sn2;
        n6 = sn6;
        n8 = sn8;

    if (((stencil_type == DMDA_STENCIL_STAR)  ||
            (bx && bx != DMDA_BOUNDARY_PERIODIC) ||
            (by && by != DMDA_BOUNDARY_PERIODIC))) {
            Recompute the local to global mappings, this time keeping the
          information about the cross corner processor numbers and any ghosted
          but not periodic indices.
        nn    = 0;
        xbase = bases[rank];
        for (i=1; i<=s_y; i++) {
            if (n0 >= 0) { /* left below */
                x_t = lx[n0 % m];
                y_t = ly[(n0/m)];
                s_t = bases[n0] + x_t*y_t - (s_y-i)*x_t - s_x;
                for (j=0; j<s_x; j++) idx[nn++] = s_t++;
            } else if (xs-Xs > 0 && ys-Ys > 0) {
                for (j=0; j<s_x; j++) idx[nn++] = -1;
            if (n1 >= 0) { /* directly below */
                x_t = x;
                y_t = ly[(n1/m)];
                s_t = bases[n1] + x_t*y_t - (s_y+1-i)*x_t;
                for (j=0; j<x_t; j++) idx[nn++] = s_t++;
            } else if (ys-Ys > 0) {
                if (by == DMDA_BOUNDARY_MIRROR) {
                    for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(s_y - i + 1)  + j;
                } else {
                    for (j=0; j<x; j++) idx[nn++] = -1;
            if (n2 >= 0) { /* right below */
                x_t = lx[n2 % m];
                y_t = ly[(n2/m)];
                s_t = bases[n2] + x_t*y_t - (s_y+1-i)*x_t;
                for (j=0; j<s_x; j++) idx[nn++] = s_t++;
            } else if (Xe-xe> 0 && ys-Ys > 0) {
                for (j=0; j<s_x; j++) idx[nn++] = -1;

        for (i=0; i<y; i++) {
            if (n3 >= 0) { /* directly left */
                x_t = lx[n3 % m];
                /* y_t = y; */
                s_t = bases[n3] + (i+1)*x_t - s_x;
                for (j=0; j<s_x; j++) idx[nn++] = s_t++;
            } else if (xs-Xs > 0) {
                if (bx == DMDA_BOUNDARY_MIRROR) {
                    for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*i + s_x - j;
                } else {
                    for (j=0; j<s_x; j++) idx[nn++] = -1;

            for (j=0; j<x; j++) idx[nn++] = xbase++; /* interior */

            if (n5 >= 0) { /* directly right */
                x_t = lx[n5 % m];
                /* y_t = y; */
                s_t = bases[n5] + (i)*x_t;
                for (j=0; j<s_x; j++) idx[nn++] = s_t++;
            } else if (Xe-xe > 0) {
                if (bx == DMDA_BOUNDARY_MIRROR) {
                    for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*(i + 1) - 2 - j;
                } else {
                    for (j=0; j<s_x; j++) idx[nn++] = -1;

        for (i=1; i<=s_y; i++) {
            if (n6 >= 0) { /* left above */
                x_t = lx[n6 % m];
                /* y_t = ly[(n6/m)]; */
                s_t = bases[n6] + (i)*x_t - s_x;
                for (j=0; j<s_x; j++) idx[nn++] = s_t++;
            } else if (xs-Xs > 0 && Ye-ye > 0) {
                for (j=0; j<s_x; j++) idx[nn++] = -1;
            if (n7 >= 0) { /* directly above */
                x_t = x;
                /* y_t = ly[(n7/m)]; */
                s_t = bases[n7] + (i-1)*x_t;
                for (j=0; j<x_t; j++) idx[nn++] = s_t++;
            } else if (Ye-ye > 0) {
                if (by == DMDA_BOUNDARY_MIRROR) {
                    for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(y - i - 1)  + j;
                } else {
                    for (j=0; j<x; j++) idx[nn++] = -1;
            if (n8 >= 0) { /* right above */
                x_t = lx[n8 % m];
                /* y_t = ly[(n8/m)]; */
                s_t = bases[n8] + (i-1)*x_t;
                for (j=0; j<s_x; j++) idx[nn++] = s_t++;
            } else if (Xe-xe > 0 && Ye-ye > 0) {
                for (j=0; j<s_x; j++) idx[nn++] = -1;
       Set the local to global ordering in the global vector, this allows use
       of VecSetValuesLocal().
    ierr = ISCreateBlock(comm,dof,nn,idx,PETSC_OWN_POINTER,&ltogis);
    ierr = PetscMalloc(nn*dof*sizeof(PetscInt),&idx_cpy);
    ierr = PetscLogObjectMemory(da,nn*dof*sizeof(PetscInt));
    ierr = ISGetIndices(ltogis, &idx_full);
    ierr = PetscMemcpy(idx_cpy,idx_full,nn*dof*sizeof(PetscInt));
    ierr = ISRestoreIndices(ltogis, &idx_full);
    ierr = ISLocalToGlobalMappingCreateIS(ltogis,&da->ltogmap);
    ierr = PetscLogObjectParent(da,da->ltogmap);
    ierr = ISDestroy(&ltogis);
    ierr = ISLocalToGlobalMappingBlock(da->ltogmap,dd->w,&da->ltogmapb);
    ierr = PetscLogObjectParent(da,da->ltogmap);

    ierr  = PetscFree2(bases,ldims);
    dd->m = m;
    dd->n  = n;
    /* note petsc expects xs/xe/Xs/Xe to be multiplied by #dofs in many places */
    dd->xs = xs*dof;
    dd->xe = xe*dof;
    dd->ys = ys;
    dd->ye = ye;
    dd->zs = 0;
    dd->ze = 1;
    dd->Xs = Xs*dof;
    dd->Xe = Xe*dof;
    dd->Ys = Ys;
    dd->Ye = Ye;
    dd->Zs = 0;
    dd->Ze = 1;

    ierr = VecDestroy(&local);
    ierr = VecDestroy(&global);

    dd->gtol      = gtol;
    dd->ltog      = ltog;
    dd->idx       = idx_cpy;
    dd->Nl        = nn*dof;
    dd->base      = base;
    da->ops->view = DMView_DA_2d;
    dd->ltol      = NULL;
    dd->ao        = NULL;
Beispiel #26
static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
  PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
  PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
  PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs;
  PetscScalar            *matdata,*matdata2;
  PetscInt               i,max_subset_size,cum,cum2;
  const PetscInt         *idxs;
  PetscBool              newsetup = PETSC_FALSE;
  PetscErrorCode         ierr;

  if (!sub_schurs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Missing PCBDDCSubSchurs");
  if (!sub_schurs->n_subs) PetscFunctionReturn(0);

  /* Allocate arrays for subproblems */
  if (!deluxe_ctx->seq_n) {
    deluxe_ctx->seq_n = sub_schurs->n_subs;
    ierr = PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr);
    newsetup = PETSC_TRUE;
  } else if (deluxe_ctx->seq_n != sub_schurs->n_subs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %D is different from the sub_schurs %D",deluxe_ctx->seq_n,sub_schurs->n_subs);

  /* the change of basis is just a reference to sub_schurs->change (if any) */
  deluxe_ctx->change         = sub_schurs->change;
  deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;

  /* Create objects for deluxe */
  max_subset_size = 0;
  for (i=0;i<sub_schurs->n_subs;i++) {
    PetscInt subset_size;
    ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
    max_subset_size = PetscMax(subset_size,max_subset_size);
  if (newsetup) {
    ierr = PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);CHKERRQ(ierr);
  cum = cum2 = 0;
  ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr);
  ierr = MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr);
  ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr);
  for (i=0;i<deluxe_ctx->seq_n;i++) {
    PetscInt     subset_size;

    ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
    if (newsetup) {
      IS  sub;
      /* work vectors */
      ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr);
      ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr);

      /* scatters */
      ierr = ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);CHKERRQ(ierr);
      ierr = VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr);
      ierr = ISDestroy(&sub);CHKERRQ(ierr);

    /* S_E_j */
    ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
    ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);

    /* \sum_k S^k_E_j */
    ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
    ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
    ierr = MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_SPD,sub_schurs->is_posdef);CHKERRQ(ierr);
    ierr = MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_HERMITIAN,sub_schurs->is_hermitian);CHKERRQ(ierr);
    if (sub_schurs->is_hermitian) {
      ierr = MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);CHKERRQ(ierr);
    } else {
      ierr = MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);CHKERRQ(ierr);
    if (pcbddc->deluxe_singlemat) {
      Mat X,Y;
      if (!sub_schurs->is_hermitian) {
        ierr = MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);CHKERRQ(ierr);
      } else {
        ierr = PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
        X    = deluxe_ctx->seq_mat[i];
      ierr = MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);CHKERRQ(ierr);
      if (!sub_schurs->is_hermitian) {
        ierr = PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr);
      } else {
        ierr = MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr);

      ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr);
      ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr);
      ierr = MatDestroy(&X);CHKERRQ(ierr);
      if (deluxe_ctx->change) {
        Mat C,CY;

        if (!deluxe_ctx->change_with_qr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only QR based change of basis");
        ierr = KSPGetOperators(deluxe_ctx->change[i],&C,NULL);CHKERRQ(ierr);
        ierr = MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr);
        ierr = MatDestroy(&CY);CHKERRQ(ierr);
      ierr = MatTranspose(Y,MAT_INPLACE_MATRIX,&Y);CHKERRQ(ierr);
      deluxe_ctx->seq_mat[i] = Y;
    cum += subset_size;
    cum2 += subset_size*subset_size;
  ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr);
  ierr = MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr);
  ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr);
  if (pcbddc->deluxe_singlemat) {
    deluxe_ctx->change         = NULL;
    deluxe_ctx->change_with_qr = PETSC_FALSE;

  if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
    for (i=0;i<deluxe_ctx->seq_n;i++) {
      if (newsetup) {
        PC pc;

        ierr = KSPGetPC(deluxe_ctx->change[i],&pc);CHKERRQ(ierr);
        ierr = PCSetType(pc,PCLU);CHKERRQ(ierr);
        ierr = KSPSetFromOptions(deluxe_ctx->change[i]);CHKERRQ(ierr);
      ierr = KSPSetUp(deluxe_ctx->change[i]);CHKERRQ(ierr);
int SmoothingRL(Vec *x, Vec *y, PetscScalar *cacheScalar, PetscInt *cacheInt , VecScatter *ctx,PetscInt n, PetscInt Istart, PetscInt Iend){

PetscInt       rank,size;
PetscScalar    C0,C1,C2;
PetscErrorCode ierr;
PetscInt       n2,i, j, k, localsizex;
PetscInt       nvec; 

Vec            xbc,ybc,lvecx,lvecy;
PetscScalar    *xbcpt,*ybcpt,**lvecptx,**lvecpty;
PetscScalar    **xbcpt2,**ybcpt2;
PetscInt       *bcISarray;
PetscInt       left,right;
PetscInt       *bcindI,*bcindJ;

nvec  = 2;
IS             ISfrom[nvec],ISto[nvec];
VecScatter     ctxt[nvec];
Vec            xvec[nvec];

localsizex = Iend-Istart;

  C0 = 1.0/8;
  C1 = 1.0/4;
  C2 = 3.0/16;

n2     = (PetscInt)(n*0.5);
xbcpt  = cacheScalar;
ybcpt  = cacheScalar;
bcindI = cacheInt;
bcindJ = cacheInt+ 4*n2;


if(rank==0){left = n-1;}
       else{left = Istart-1;}
if(rank==size-1){right = 0;}
       else{right = Iend;}

for(i=0;i<n2;i++){ *(bcindI+i)      = left-1;
                   *(bcindJ+i)      = i;        }
for(i=0;i<n2;i++){ *(bcindI+n2+i)   = left;
                   *(bcindJ+n2+i)   = i;        }
for(i=0;i<n2;i++){ *(bcindI+n2*2+i) = right;
                   *(bcindJ+n2*2+i) = i;        }
for(i=0;i<n2;i++){ *(bcindI+n2*3+i) = right+1;
                   *(bcindJ+n2*3+i) = i;        }

  ISCreateGeneralWithIJ(MPI_COMM_SELF,x,xvec,nvec,n2,4*n2, bcindI, bcindJ,ISfrom, ISto);
  LargeVecScatterCreate(xvec,ISfrom,xbc,ISto ,ctxt,nvec); 


     lvecpty[i][j] = C0*lvecptx[i][j]
                    +C1*lvecptx[i-1][j] +C1*lvecptx[i+1][j]
      lvecpty[0][j] = C0*lvecptx[0][j]
                    +C1*xbcpt2[1][j] +C1*lvecptx[1][j]
      lvecpty[1][j] = C0*lvecptx[1][j]
                    +C1*lvecptx[0][j] +C1*lvecptx[2][j]

      lvecpty[localsizex-1][j] = C0*lvecptx[localsizex-1][j]
                                +C1*lvecptx[localsizex-2][j] +C1*xbcpt2[2][j]
                                +C2*lvecptx[localsizex-3][j] +C2*xbcpt2[3][j];   
      lvecpty[localsizex-2][j] = C0*lvecptx[localsizex-2][j]
                                +C1*lvecptx[localsizex-3][j] +C1*lvecptx[localsizex-1][j]
                                +C2*lvecptx[localsizex-4][j] +C2*xbcpt2[2][j];  


k = 0;
for(i=Istart;i<Iend;i++){ *(bcindI+k)      = n-i-1;
                          *(bcindJ+k)      = 1;           
                          k++;                      }
for(i=Istart;i<Iend;i++){ *(bcindI+k)      = n-i-1;
                          *(bcindJ+k)      = 0;
                          k++;                      }
for(i=Istart;i<Iend;i++){ *(bcindI+k)      = n-i-1;
                          *(bcindJ+k)      = n2-1;
                          k++;                      }
for(i=Istart;i<Iend;i++){ *(bcindI+k)      = n-i-1;
                          *(bcindJ+k)      = n2-2;
                          k++;                      }   

  ISCreateGeneralWithIJ(MPI_COMM_SELF,*x,xvec,nvec,n2,4*localsizex, bcindI, bcindJ,ISfrom, ISto);
  LargeVecScatterCreate(xvec,ISfrom,ybc,ISto ,ctxt,nvec); 


      lvecptx[i][j] = C0*lvecpty[i][j]
                     +C1*lvecpty[i][j-1] +C1*lvecpty[i][j+1]
                     +C2*lvecpty[i][j-2] +C2*lvecpty[i][j+2];               


      lvecptx[i][0] =  C0*lvecpty[i][0]
                       +C1*ybcpt2[1][i] +C1*lvecpty[i][1]
                       +C2*ybcpt2[0][i] +C2*lvecpty[i][2];  
      lvecptx[i][1] =  C0*lvecpty[i][1]
                       +C1*lvecpty[i][0] +C1*lvecpty[i][2]
                       +C2*ybcpt2[1][i] +C2*lvecpty[i][3]; 

      lvecptx[i][n2-2] = C0*lvecpty[i][n2-2]
                        +C1*lvecpty[i][n2-3] +C1*lvecpty[i][n2-1]
                        +C2*lvecpty[i][n2-4] +C2*ybcpt2[2][i];  

      lvecptx[i][n2-1] = C0*lvecpty[i][n2-1]
                        +C1*lvecpty[i][n2-2] +C1*ybcpt2[2][i]
                        +C2*lvecpty[i][n2-3] +C2*ybcpt2[3][i];  






return 0;
int Smoothing(Vec *x, Vec *y, PetscScalar *cacheScalar, PetscInt *cacheInt , VecScatter *ctx,PetscInt n,DA myDA, PetscInt Istart, PetscInt Iend){

PetscScalar    C0,C1,C2;
PetscErrorCode ierr;
PetscInt       localsizex;
PetscInt       n2,i, j, k; 
Vec            lvecx,lvecy;
PetscScalar    **lvecptx,**lvecpty;
Vec            bcvec;
IS             isbc;
//VecScatter     ctx;
PetscScalar    *bcpt1,*bcpt2,*bcpt3,*bcpt4;

localsizex    = Iend-Istart;

  C0 = 1.0/8;
  C1 = 1*1.0/4;
  C2 = 1*3.0/16;

n2     = (PetscInt)(n*0.5);



// X direction smoothing

   lvecpty[i][j] = C0*lvecptx[i][j]
                  +C1*lvecptx[i-1][j] +C1*lvecptx[i+1][j]


bcpt1 = cacheScalar;
bcpt2 = cacheScalar+localsizex;
bcpt3 = cacheScalar+localsizex*2;
bcpt4 = cacheScalar+localsizex*3;

k= 0;
  lvecpty[i][0]= *(bcpt3+k);
  lvecpty[i][1]= *(bcpt4+k);
  lvecpty[i][n2+3]= *(bcpt2+k);
  lvecpty[i][n2+2]= *(bcpt1+k);


// Y direction smoothing
   lvecptx[i][j] = C0*lvecpty[i][j]
                  +C1*lvecpty[i][j-1] +C1*lvecpty[i][j+1]
                  +C2*lvecpty[i][j-2] +C2*lvecpty[i][j+2];               




return 0;
Perform backward averaging iteration 
 x    : data
 y    : next iteration

 pmax : number of points to be cached
 Istart, Iend: column  

 cacheInt    : (2*npt+1)*pmax*sizeof(PetscInt)
 cacheScalar : (  npt+1)*pmax*sizeof(PetscScalar)

Tzu-Chen Liang  11-25-2006

int BackwardAverage(Vec *x, Vec *y, PetscInt *cacheInt, PetscScalar *cacheScalar, PetscInt n, PetscInt npt, PetscInt pmax, PetscInt Istart, PetscInt Iend,PetscScalar c){

 PetscErrorCode ierr;
 PetscInt       i, j, k=0, pi, pj, n2,m, puse, pgrid;  

 PetscInt       localsizex,localsizey, rowcount=0;
 PetscInt       *idp, *NzindJ, *NzindI;
 PetscScalar    dx,dy,dx2,dy2,CX,CY;
 PetscScalar    *pty,*pty0; 
 Vec            y0;
 PetscInt       nvec = 2;
 Vec            xvec[nvec];
 IS             ISfrom[nvec],ISto[nvec];
 VecScatter     ctxt[nvec];

 n2     = (PetscInt)(n*0.5);
 dx     = 1.0/n;
 dy     = 1.0/n;
 dx2    = dx/2-dx/1e6;
 dy2    = dy/2-dy/1e6;

  NzindI = cacheInt;        //pmax
  NzindJ = cacheInt+pmax;   //pmax
  idp    = cacheInt;        //pmax
  pty0   = cacheScalar;     //pmax

  localsizex    = Iend-Istart;
  localsizey    = (PetscInt)(pmax*1.0/(localsizex+1))-3;
  if(localsizey>n2){localsizey =n2;}

  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,(localsizex+1)*(localsizey+1),pty0,&y0);


     if (n2-rowcount<=localsizey){localsizey =n2-rowcount;}      
     puse = localsizex*localsizey;
     pgrid = (localsizex+1)*(localsizey+1);
     k= 0;
             CX = (PetscScalar)(i*dx);
             CY = (PetscScalar)(j*dy); 

             pi = (PetscInt)floor(CX*(PetscScalar)n);
             pj = (PetscInt)floor(CY*(PetscScalar)n);   
             if(pj>=n2) {SkewSymmetricPoint(&pi, &pj, n);}
             *(NzindI+k) = pi; 
             *(NzindJ+k) = pj;


    ierr =  VecDestroy(y0);CHKERRQ(ierr);
    ierr =  VecCreateSeqWithArray(PETSC_COMM_SELF,pgrid,pty0,&y0);CHKERRQ(ierr);
    ISCreateGeneralWithIJ(MPI_COMM_SELF,*x,xvec,nvec,n2,pgrid, NzindI, NzindJ,ISfrom, ISto);
    LargeVecScatterCreate(xvec,ISfrom,y0,ISto ,ctxt,nvec); 

    ierr =  VecGetArray(y0,&pty0);CHKERRQ(ierr);
    ierr =  VecGetArray(*y,&pty);CHKERRQ(ierr);
    m    = 0;
              *(pty+i*n2+j+rowcount) = (*(pty0+i*(localsizey+1)+j)+

     rowcount = rowcount + localsizey;


 ierr =  VecDestroy(y0);CHKERRQ(ierr);

return 0;
A new Strategy can handle large size problem (more than 4G variables)

int BackwardAverageRL(Vec *x, Vec *y, PetscInt *cacheInt, PetscScalar *cacheScalar, PetscInt n, PetscInt npt, PetscInt pmax, PetscInt Istart, PetscInt Iend,PetscScalar c){

 PetscInt       rank,size;
 PetscErrorCode ierr;
 PetscInt       i, j, k=0, pi, pj, n2,n4 ,m, puse, pgrid,lx;  
 PetscInt       localsizex,localsizey, rowcount=0;
 PetscInt       k1,k2,pgrid1,pgrid2;
 PetscInt       *idy,*idp, *NzindJ;
 PetscScalar    dx,dy,dx2,dy2,CX,CY;

 PetscScalar    *pty, *pty0; 
 IS             isx1,isx2,isy1,isy2;
 VecScatter     ctx1,ctx2;
 Vec            y0;
 Vec            x1,x2;
 PetscScalar    *ptx1,*ptx2;
 PetscInt       size1,size2,col1,col2;


 n2     = (PetscInt)(n*0.5);
 n4     = (PetscInt)(n*0.25);
 dx     = 1.0/n;
 dy     = 1.0/n;
 dx2    = dx/2-dx/1e6;
 dy2    = dy/2-dy/1e6;

  NzindJ = cacheInt;    //pmax
  idp    = cacheInt;    //pmax
  idy    = cacheInt   + pmax; 

  pty0   = cacheScalar   ; //pmax

  localsizex    = Iend-Istart;
  localsizey    = (PetscInt)(pmax*1.0/(localsizex+1))-2;
  if(localsizey>n2){localsizey =n2;}
  ierr =  VecGetArray(*x,&ptx1);CHKERRQ(ierr);
  ptx2 = ptx1;

  if(rank< size*0.5){lx =  localsizex*n2;}else{lx =0;}
  if(rank< size*0.5){lx =  0;}else{lx =  localsizex*n2; }

  col1 = (PetscInt)(size1*1.0/n2);
  col2 = (PetscInt)(size2*1.0/n2);

  ierr =  VecGetArray(*y,&pty);CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,(localsizex+1)*(localsizey+1),pty0,&y0);


     if (n2-rowcount<=localsizey){localsizey =n2-rowcount;}      
     puse = localsizex*localsizey;
     pgrid = (localsizex+1)*(localsizey+1);
     k= 0;
             CX = (PetscScalar)(i*dx);
             CY = (PetscScalar)(j*dy); 
             pi = (PetscInt)floor(CX*n);
             pj = (PetscInt)floor(CY*n);   
             if(pj>=n2) {SkewSymmetricPoint(&pi, &pj, n);}

                  *(NzindJ+k1) =  (PetscInt)(n2*pi +  pj);
                  *(idy+k1)   =  k;
                  *(NzindJ+pgrid-k2-1) =  (PetscInt)(n2*(pi-col1)+pj);
                  *(idy+pgrid-k2-1)   =  k;

      pgrid1 = k1;
      pgrid2 = k2;

    ierr =  ISCreateGeneralWithArray(PETSC_COMM_SELF,pgrid1,NzindJ,&isx1);CHKERRQ(ierr);
    ierr =  ISCreateGeneralWithArray(PETSC_COMM_SELF,pgrid2,NzindJ+pgrid1,&isx2);CHKERRQ(ierr);  
    ierr =  ISCreateGeneralWithArray(PETSC_COMM_SELF,pgrid1,idy,&isy1);CHKERRQ(ierr);
    ierr =  ISCreateGeneralWithArray(PETSC_COMM_SELF,pgrid2,idy+pgrid1,&isy2);CHKERRQ(ierr);

    ierr =  VecDestroy(y0);CHKERRQ(ierr);
    ierr =  VecCreateSeqWithArray(PETSC_COMM_SELF,pgrid,pty0,&y0);CHKERRQ(ierr);

    ierr =  VecScatterCreate(x1,isx1,y0,isy1,&ctx1);CHKERRQ(ierr);
    ierr =  VecScatterCreate(x2,isx2,y0,isy2,&ctx2);CHKERRQ(ierr);

    ierr =  VecScatterBegin(x1,y0,INSERT_VALUES,SCATTER_FORWARD,ctx1);CHKERRQ(ierr);
    ierr =  VecScatterEnd(x1,y0,INSERT_VALUES,SCATTER_FORWARD,ctx1);CHKERRQ(ierr);
    ierr =  VecScatterBegin(x2,y0,INSERT_VALUES,SCATTER_FORWARD,ctx2);CHKERRQ(ierr);
    ierr =  VecScatterEnd(x2,y0,INSERT_VALUES,SCATTER_FORWARD,ctx2);CHKERRQ(ierr);

    ierr =  VecScatterDestroy(ctx1);
    ierr =  VecScatterDestroy(ctx2);
    ierr =  VecGetArray(y0,&pty0);CHKERRQ(ierr);

      m = 0;
              *(pty+i*n2+j+rowcount) = (*(pty0+i*(localsizey+1)+j)+

     rowcount = rowcount + localsizey;


return 0;