PetscErrorCode MatGetDiagonal_Brussel(Mat A,Vec diag) { Vec d1,d2; PetscInt n; PetscScalar *pd; MPI_Comm comm; CTX_BRUSSEL *ctx; PetscErrorCode ierr; PetscFunctionBeginUser; ierr = MatShellGetContext(A,(void**)&ctx);CHKERRQ(ierr); ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); ierr = MatGetLocalSize(ctx->T,&n,NULL);CHKERRQ(ierr); ierr = VecGetArray(diag,&pd);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(comm,1,n,PETSC_DECIDE,pd,&d1);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(comm,1,n,PETSC_DECIDE,pd+n,&d2);CHKERRQ(ierr); ierr = VecSet(d1,-2.0*ctx->tau1 + ctx->beta - 1.0 + ctx->sigma);CHKERRQ(ierr); ierr = VecSet(d2,-2.0*ctx->tau2 - ctx->alpha*ctx->alpha + ctx->sigma);CHKERRQ(ierr); ierr = VecDestroy(&d1);CHKERRQ(ierr); ierr = VecDestroy(&d2);CHKERRQ(ierr); ierr = VecRestoreArray(diag,&pd);CHKERRQ(ierr); PetscFunctionReturn(0); }
PETSC_EXTERN PetscErrorCode BVCreate_Mat(BV bv) { PetscErrorCode ierr; BV_MAT *ctx; PetscInt nloc,bs; PetscBool seq; char str[50]; PetscFunctionBegin; ierr = PetscNewLog(bv,&ctx);CHKERRQ(ierr); bv->data = (void*)ctx; ierr = PetscObjectTypeCompare((PetscObject)bv->t,VECMPI,&ctx->mpi);CHKERRQ(ierr); if (!ctx->mpi) { ierr = PetscObjectTypeCompare((PetscObject)bv->t,VECSEQ,&seq);CHKERRQ(ierr); if (!seq) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot create a BVMAT from a non-standard template vector"); } ierr = VecGetLocalSize(bv->t,&nloc);CHKERRQ(ierr); ierr = VecGetBlockSize(bv->t,&bs);CHKERRQ(ierr); ierr = MatCreateDense(PetscObjectComm((PetscObject)bv->t),nloc,PETSC_DECIDE,PETSC_DECIDE,bv->m,NULL,&ctx->A);CHKERRQ(ierr); ierr = MatAssemblyBegin(ctx->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(ctx->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)bv,(PetscObject)ctx->A);CHKERRQ(ierr); if (((PetscObject)bv)->name) { ierr = PetscSNPrintf(str,50,"%s_0",((PetscObject)bv)->name);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)ctx->A,str);CHKERRQ(ierr); } if (ctx->mpi) { ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,PETSC_DECIDE,NULL,&bv->cv[0]);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,PETSC_DECIDE,NULL,&bv->cv[1]);CHKERRQ(ierr); } else { ierr = VecCreateSeqWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,NULL,&bv->cv[0]);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,NULL,&bv->cv[1]);CHKERRQ(ierr); } bv->ops->mult = BVMult_Mat; bv->ops->multvec = BVMultVec_Mat; bv->ops->multinplace = BVMultInPlace_Mat; bv->ops->multinplacetrans = BVMultInPlaceTranspose_Mat; bv->ops->axpy = BVAXPY_Mat; bv->ops->dot = BVDot_Mat; bv->ops->dotvec = BVDotVec_Mat; bv->ops->scale = BVScale_Mat; bv->ops->norm = BVNorm_Mat; /*bv->ops->orthogonalize = BVOrthogonalize_Mat;*/ bv->ops->matmult = BVMatMult_Mat; bv->ops->copy = BVCopy_Mat; bv->ops->resize = BVResize_Mat; bv->ops->getcolumn = BVGetColumn_Mat; bv->ops->restorecolumn = BVRestoreColumn_Mat; bv->ops->getarray = BVGetArray_Mat; bv->ops->restorearray = BVRestoreArray_Mat; bv->ops->destroy = BVDestroy_Mat; if (!ctx->mpi) bv->ops->view = BVView_Mat; PetscFunctionReturn(0); }
int Epetra_PETScAIJMatrix::Multiply(bool TransA, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { (void)TransA; int NumVectors = X.NumVectors(); if (NumVectors!=Y.NumVectors()) EPETRA_CHK_ERR(-1); // X and Y must have same number of vectors double ** xptrs; double ** yptrs; X.ExtractView(&xptrs); Y.ExtractView(&yptrs); if (RowMatrixImporter()!=0) { if (ImportVector_!=0) { if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;} } if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(RowMatrixColMap(),NumVectors); ImportVector_->Import(X, *RowMatrixImporter(), Insert); ImportVector_->ExtractView(&xptrs); } double *vals=0; int length; Vec petscX, petscY; int ierr; for (int i=0; i<NumVectors; i++) { # ifdef HAVE_MPI ierr=VecCreateMPIWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptrs[i],&petscX); CHKERRQ(ierr); ierr=VecCreateMPIWithArray(Comm_->Comm(),Y.MyLength(),Y.GlobalLength(),yptrs[i],&petscY); CHKERRQ(ierr); # else //FIXME untested ierr=VecCreateSeqWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptrs[i],&petscX); CHKERRQ(ierr); ierr=VecCreateSeqWithArray(Comm_->Comm(),Y.MyLength(),Y.GlobalLength(),yptrs[i],&petscY); CHKERRQ(ierr); # endif ierr = MatMult(Amat_,petscX,petscY);CHKERRQ(ierr); ierr = VecGetArray(petscY,&vals);CHKERRQ(ierr); ierr = VecGetLocalSize(petscY,&length);CHKERRQ(ierr); for (int j=0; j<length; j++) yptrs[i][j] = vals[j]; ierr = VecRestoreArray(petscY,&vals);CHKERRQ(ierr); } VecDestroy(petscX); VecDestroy(petscY); double flops = NumGlobalNonzeros(); flops *= 2.0; flops *= (double) NumVectors; UpdateFlops(flops); return(0); } //Multiply()
dErr VecCreateDohp(MPI_Comm comm,dInt bs,dInt n,dInt nc,dInt nghosts,const dInt ghosts[],Vec *v) { Vec_MPI *vmpi; Vec vc,vg; dScalar *a; dErr err; dFunctionBegin; dValidPointer(v,7); *v = 0; err = VecCreateGhostBlock(comm,bs,nc*bs,PETSC_DECIDE,nghosts,ghosts,&vc);dCHK(err); err = VecGetArray(vc,&a);dCHK(err); err = VecCreateMPIWithArray(comm,n*bs,PETSC_DECIDE,a,&vg);dCHK(err); err = VecRestoreArray(vc,&a);dCHK(err); err = VecSetBlockSize(vg,bs);dCHK(err); vmpi = vg->data; if (vmpi->localrep) dERROR(PETSC_COMM_SELF,1,"Vector has localrep, expected no localrep"); vmpi->localrep = vc; /* subvert this field to mean closed rep */ /* Since we subvect .localrep, VecDestroy_MPI will automatically destroy the closed form */ vg->ops->duplicate = VecDuplicate_Dohp; //vg->ops->destroy = VecDestroy_Dohp; /* It might be useful to set the (block) LocalToGlobal mapping here, but in the use case I have in mind, the user is * always working with the closed form anyway (in function evaluation). The \e matrix does need a customized * LocalToGlobal mapping. */ err = PetscObjectChangeTypeName((dObject)vg,VECDOHP);dCHK(err); *v = vg; dFunctionReturn(0); }
int LargeVecCreate(Vec *x, PetscInt nvec, Vec xvec[]){ PetscInt size,rank; PetscErrorCode ierr; PetscInt lsize,mylsize; PetscInt i; PetscScalar *ptx; PetscInt pstart[nvec], pend[nvec]; MPI_Comm_size(PETSC_COMM_WORLD,&size); MPI_Comm_rank(PETSC_COMM_WORLD,&rank); LargeVecGetOwnershipRange(x,nvec,pstart,pend); ierr = VecGetArray(*x,&ptx);CHKERRQ(ierr); ierr = VecGetLocalSize(*x,&lsize);CHKERRQ(ierr); for (i=0;i<nvec;i++){ if(rank>=pstart[i]&&rank<pend[i]){mylsize = lsize;}else{mylsize=0;} ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,mylsize,PETSC_DETERMINE,ptx,xvec+i);CHKERRQ(ierr); } return 0; }
PetscErrorCode MatSetUpMultiply_MPIDense(Mat mat) { Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; PetscErrorCode ierr; IS from,to; Vec gvec; PetscFunctionBegin; /* Create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,mat->cmap->N,&mdn->lvec);CHKERRQ(ierr); /* Create temporary index set for building scatter gather */ ierr = ISCreateStride(((PetscObject)mat)->comm,mat->cmap->N,0,1,&from);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,mat->cmap->N,0,1,&to);CHKERRQ(ierr); /* Create temporary global vector to generate scatter context */ /* n = mdn->cowners[mdn->rank+1] - mdn->cowners[mdn->rank]; */ ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mdn->nvec,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); /* Generate the scatter context */ ierr = VecScatterCreate(gvec,from,mdn->lvec,to,&mdn->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,mdn->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,mdn->lvec);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,gvec);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = VecDestroy(&gvec);CHKERRQ(ierr); PetscFunctionReturn(0); }
EXTERN_C_BEGIN void PETSC_STDCALL veccreatempiwitharray_(MPI_Comm *comm,PetscInt *bs,PetscInt *n,PetscInt *N,PetscScalar *s,Vec *V,PetscErrorCode *ierr) { CHKFORTRANNULLSCALAR(s); *ierr = VecCreateMPIWithArray(MPI_Comm_f2c(*(MPI_Fint *)&*comm),*bs,*n,*N,s,V); }
/*@ DMDANaturalAllToGlobalCreate - Creates a scatter context that maps from a copy of the entire vector on each processor to its local part in the global vector. Collective on DMDA Input Parameter: . da - the distributed array context Output Parameter: . scatter - the scatter context Level: advanced .keywords: distributed array, global to local, begin, coarse problem .seealso: DMDAGlobalToNaturalEnd(), DMLocalToGlobalBegin(), DMDACreate2d(), DMGlobalToLocalBegin(), DMGlobalToLocalEnd(), DMDACreateNaturalVector() @*/ PetscErrorCode DMDANaturalAllToGlobalCreate(DM da,VecScatter *scatter) { PetscErrorCode ierr; DM_DA *dd = (DM_DA*)da->data; PetscInt M,m = dd->Nlocal,start; IS from,to; Vec tmplocal,global; AO ao; PetscFunctionBegin; PetscValidHeaderSpecific(da,DM_CLASSID,1); PetscValidPointer(scatter,2); ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); /* create the scatter context */ ierr = MPI_Allreduce(&m,&M,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)da));CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)da),dd->w,m,PETSC_DETERMINE,0,&global);CHKERRQ(ierr); ierr = VecGetOwnershipRange(global,&start,NULL);CHKERRQ(ierr); ierr = ISCreateStride(PetscObjectComm((PetscObject)da),m,start,1,&from);CHKERRQ(ierr); ierr = AOPetscToApplicationIS(ao,from);CHKERRQ(ierr); ierr = ISCreateStride(PetscObjectComm((PetscObject)da),m,start,1,&to);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,dd->w,M,0,&tmplocal);CHKERRQ(ierr); ierr = VecScatterCreate(tmplocal,from,global,to,scatter);CHKERRQ(ierr); ierr = VecDestroy(&tmplocal);CHKERRQ(ierr); ierr = VecDestroy(&global);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@ DMDAGlobalToNaturalAllCreate - Creates a scatter context that maps from the global vector the entire vector to each processor in natural numbering Collective on DMDA Input Parameter: . da - the distributed array context Output Parameter: . scatter - the scatter context Level: advanced .keywords: distributed array, global to local, begin, coarse problem .seealso: DMDAGlobalToNaturalEnd(), DMLocalToGlobalBegin(), DMDACreate2d(), DMGlobalToLocalBegin(), DMGlobalToLocalEnd(), DMDACreateNaturalVector() @*/ PetscErrorCode DMDAGlobalToNaturalAllCreate(DM da,VecScatter *scatter) { PetscErrorCode ierr; PetscInt N; IS from,to; Vec tmplocal,global; AO ao; DM_DA *dd = (DM_DA*)da->data; PetscFunctionBegin; PetscValidHeaderSpecific(da,DM_CLASSID,1); PetscValidPointer(scatter,2); ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); /* create the scatter context */ ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)da),dd->w,dd->Nlocal,PETSC_DETERMINE,0,&global);CHKERRQ(ierr); ierr = VecGetSize(global,&N);CHKERRQ(ierr); ierr = ISCreateStride(PetscObjectComm((PetscObject)da),N,0,1,&to);CHKERRQ(ierr); ierr = AOPetscToApplicationIS(ao,to);CHKERRQ(ierr); ierr = ISCreateStride(PetscObjectComm((PetscObject)da),N,0,1,&from);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,dd->w,N,0,&tmplocal);CHKERRQ(ierr); ierr = VecScatterCreate(global,from,tmplocal,to,scatter);CHKERRQ(ierr); ierr = VecDestroy(&tmplocal);CHKERRQ(ierr); ierr = VecDestroy(&global);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* DAGlobalToNatural_Create - Create the global to natural scatter object Collective on DA Input Parameter: . da - the distributed array context Level: developer Notes: This is an internal routine called by DAGlobalToNatural() to create the scatter context. .keywords: distributed array, global to local, begin .seealso: DAGlobalToNaturalBegin(), DAGlobalToNaturalEnd(), DALocalToGlobal(), DACreate2d(), DAGlobalToLocalBegin(), DAGlobalToLocalEnd(), DACreateNaturalVector() */ PetscErrorCode DAGlobalToNatural_Create(DA da) { PetscErrorCode ierr; PetscInt m,start,Nlocal; IS from,to; Vec global; PetscFunctionBegin; PetscValidHeaderSpecific(da,DM_COOKIE,1); if (!da->natural) { SETERRQ(PETSC_ERR_ORDER,"Natural layout vector not yet created; cannot scatter into it"); } /* create the scatter context */ ierr = VecGetLocalSize(da->natural,&m);CHKERRQ(ierr); ierr = VecGetOwnershipRange(da->natural,&start,PETSC_NULL);CHKERRQ(ierr); ierr = DAGetNatural_Private(da,&Nlocal,&to);CHKERRQ(ierr); if (Nlocal != m) SETERRQ2(PETSC_ERR_PLIB,"Internal error: Nlocal %D local vector size %D",Nlocal,m); ierr = ISCreateStride(((PetscObject)da)->comm,m,start,1,&from);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(((PetscObject)da)->comm,da->Nlocal,PETSC_DETERMINE,0,&global); ierr = VecSetBlockSize(global,da->w);CHKERRQ(ierr); ierr = VecScatterCreate(global,from,da->natural,to,&da->gton);CHKERRQ(ierr); ierr = VecDestroy(global);CHKERRQ(ierr); ierr = ISDestroy(from);CHKERRQ(ierr); ierr = ISDestroy(to);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* PEPLinearExtract_Norm - Auxiliary routine that copies the solution of the linear eigenproblem to the PEP object. The eigenvector of the generalized problem is supposed to be z = [ x ] [ l*x ] If |l|<1.0, the eigenvector is taken from z(1:n), otherwise from z(n+1:2*n). Finally, x is normalized so that ||x||_2 = 1. */ static PetscErrorCode PEPLinearExtract_Norm(PEP pep,EPS eps) { PetscErrorCode ierr; PetscInt i,offset; PetscScalar *px; Vec xr,xi,w,vi; #if !defined(PETSC_USE_COMPLEX) Vec vi1; #endif Mat A; PetscFunctionBegin; ierr = EPSGetOperators(eps,&A,NULL);CHKERRQ(ierr); ierr = MatGetVecs(A,&xr,NULL);CHKERRQ(ierr); ierr = VecDuplicate(xr,&xi);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)pep),1,pep->nloc,pep->n,NULL,&w);CHKERRQ(ierr); for (i=0;i<pep->nconv;i++) { ierr = EPSGetEigenpair(eps,i,&pep->eigr[i],&pep->eigi[i],xr,xi);CHKERRQ(ierr); pep->eigr[i] *= pep->sfactor; pep->eigi[i] *= pep->sfactor; if (SlepcAbsEigenvalue(pep->eigr[i],pep->eigi[i])>1.0) offset = pep->nloc; else offset = 0; #if !defined(PETSC_USE_COMPLEX) if (pep->eigi[i]>0.0) { /* first eigenvalue of a complex conjugate pair */ ierr = VecGetArray(xr,&px);CHKERRQ(ierr); ierr = VecPlaceArray(w,px+offset);CHKERRQ(ierr); ierr = BVInsertVec(pep->V,i,w);CHKERRQ(ierr); ierr = VecResetArray(w);CHKERRQ(ierr); ierr = VecRestoreArray(xr,&px);CHKERRQ(ierr); ierr = VecGetArray(xi,&px);CHKERRQ(ierr); ierr = VecPlaceArray(w,px+offset);CHKERRQ(ierr); ierr = BVInsertVec(pep->V,i+1,w);CHKERRQ(ierr); ierr = VecResetArray(w);CHKERRQ(ierr); ierr = VecRestoreArray(xi,&px);CHKERRQ(ierr); ierr = BVGetColumn(pep->V,i,&vi);CHKERRQ(ierr); ierr = BVGetColumn(pep->V,i+1,&vi1);CHKERRQ(ierr); ierr = SlepcVecNormalize(vi,vi1,PETSC_TRUE,NULL);CHKERRQ(ierr); ierr = BVRestoreColumn(pep->V,i,&vi);CHKERRQ(ierr); ierr = BVRestoreColumn(pep->V,i+1,&vi1);CHKERRQ(ierr); } else if (pep->eigi[i]==0.0) /* real eigenvalue */ #endif { ierr = VecGetArray(xr,&px);CHKERRQ(ierr); ierr = VecPlaceArray(w,px+offset);CHKERRQ(ierr); ierr = BVInsertVec(pep->V,i,w);CHKERRQ(ierr); ierr = VecResetArray(w);CHKERRQ(ierr); ierr = VecRestoreArray(xr,&px);CHKERRQ(ierr); ierr = BVGetColumn(pep->V,i,&vi);CHKERRQ(ierr); ierr = SlepcVecNormalize(vi,NULL,PETSC_FALSE,NULL);CHKERRQ(ierr); ierr = BVRestoreColumn(pep->V,i,&vi);CHKERRQ(ierr); } } ierr = VecDestroy(&w);CHKERRQ(ierr); ierr = VecDestroy(&xr);CHKERRQ(ierr); ierr = VecDestroy(&xi);CHKERRQ(ierr); PetscFunctionReturn(0); }
PETSC_EXTERN PetscErrorCode BVCreate_Contiguous(BV bv) { PetscErrorCode ierr; BV_CONTIGUOUS *ctx; PetscInt j,nloc,bs; PetscBool seq; char str[50]; PetscFunctionBegin; ierr = PetscNewLog(bv,&ctx);CHKERRQ(ierr); bv->data = (void*)ctx; ierr = PetscObjectTypeCompare((PetscObject)bv->t,VECMPI,&ctx->mpi);CHKERRQ(ierr); if (!ctx->mpi) { ierr = PetscObjectTypeCompare((PetscObject)bv->t,VECSEQ,&seq);CHKERRQ(ierr); if (!seq) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot create a contiguous BV from a non-standard template vector"); } ierr = VecGetLocalSize(bv->t,&nloc);CHKERRQ(ierr); ierr = VecGetBlockSize(bv->t,&bs);CHKERRQ(ierr); ierr = PetscMalloc1(bv->m*nloc,&ctx->array);CHKERRQ(ierr); ierr = PetscMemzero(ctx->array,bv->m*nloc*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMalloc1(bv->m,&ctx->V);CHKERRQ(ierr); for (j=0;j<bv->m;j++) { if (ctx->mpi) { ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,PETSC_DECIDE,ctx->array+j*nloc,ctx->V+j);CHKERRQ(ierr); } else { ierr = VecCreateSeqWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,ctx->array+j*nloc,ctx->V+j);CHKERRQ(ierr); } } ierr = PetscLogObjectParents(bv,bv->m,ctx->V);CHKERRQ(ierr); if (((PetscObject)bv)->name) { for (j=0;j<bv->m;j++) { ierr = PetscSNPrintf(str,50,"%s_%D",((PetscObject)bv)->name,j);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)ctx->V[j],str);CHKERRQ(ierr); } } bv->ops->mult = BVMult_Contiguous; bv->ops->multvec = BVMultVec_Contiguous; bv->ops->multinplace = BVMultInPlace_Contiguous; bv->ops->multinplacetrans = BVMultInPlaceTranspose_Contiguous; bv->ops->axpy = BVAXPY_Contiguous; bv->ops->dot = BVDot_Contiguous; bv->ops->dotvec = BVDotVec_Contiguous; bv->ops->scale = BVScale_Contiguous; bv->ops->norm = BVNorm_Contiguous; /*bv->ops->orthogonalize = BVOrthogonalize_Contiguous;*/ bv->ops->matmult = BVMatMult_Contiguous; bv->ops->copy = BVCopy_Contiguous; bv->ops->resize = BVResize_Contiguous; bv->ops->getcolumn = BVGetColumn_Contiguous; bv->ops->getarray = BVGetArray_Contiguous; bv->ops->destroy = BVDestroy_Contiguous; PetscFunctionReturn(0); }
PetscErrorCode MatMPIAIJCRL_create_aijcrl(Mat A) { Mat_MPIAIJ *a = (Mat_MPIAIJ*)(A)->data; Mat_SeqAIJ *Aij = (Mat_SeqAIJ*)(a->A->data), *Bij = (Mat_SeqAIJ*)(a->B->data); Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; PetscInt m = A->rmap->n; /* Number of rows in the matrix. */ PetscInt nd = a->A->cmap->n; /* number of columns in diagonal portion */ PetscInt *aj = Aij->j,*bj = Bij->j; /* From the CSR representation; points to the beginning of each row. */ PetscInt i, j,rmax = 0,*icols, *ailen = Aij->ilen, *bilen = Bij->ilen; PetscScalar *aa = Aij->a,*ba = Bij->a,*acols,*array; PetscErrorCode ierr; PetscFunctionBegin; /* determine the row with the most columns */ for (i=0; i<m; i++) { rmax = PetscMax(rmax,ailen[i]+bilen[i]); } aijcrl->nz = Aij->nz+Bij->nz; aijcrl->m = A->rmap->n; aijcrl->rmax = rmax; ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr); ierr = PetscMalloc2(rmax*m,PetscScalar,&aijcrl->acols,rmax*m,PetscInt,&aijcrl->icols);CHKERRQ(ierr); acols = aijcrl->acols; icols = aijcrl->icols; for (i=0; i<m; i++) { for (j=0; j<ailen[i]; j++) { acols[j*m+i] = *aa++; icols[j*m+i] = *aj++; } for (; j<ailen[i]+bilen[i]; j++) { acols[j*m+i] = *ba++; icols[j*m+i] = nd + *bj++; } for (; j<rmax; j++) { /* empty column entries */ acols[j*m+i] = 0.0; icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0; /* handle case where row is EMPTY */ } } ierr = PetscInfo1(A,"Percentage of 0's introduced for vectorized multiply %g\n",1.0-((double)(aijcrl->nz))/((double)(rmax*m)));CHKERRQ(ierr); ierr = PetscFree(aijcrl->array);CHKERRQ(ierr); ierr = PetscMalloc((a->B->cmap->n+nd)*sizeof(PetscScalar),&array);CHKERRQ(ierr); /* xwork array is actually B->n+nd long, but we define xwork this length so can copy into it */ ierr = VecDestroy(&aijcrl->xwork);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),1,nd,PETSC_DECIDE,array,&aijcrl->xwork);CHKERRQ(ierr); ierr = VecDestroy(&aijcrl->fwork);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,a->B->cmap->n,array+nd,&aijcrl->fwork);CHKERRQ(ierr); aijcrl->array = array; aijcrl->xscat = a->Mvctx; PetscFunctionReturn(0); }
void ReplicatableVector::Replicate(unsigned lo, unsigned hi) { // Create a PetSC vector with the array containing the distributed data Vec distributed_vec; #if (PETSC_VERSION_MAJOR == 3 && PETSC_VERSION_MINOR >= 3) //PETSc 3.3 or later //Extra argument is block size VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, hi-lo, this->GetSize(), &mpData[lo], &distributed_vec); #else VecCreateMPIWithArray(PETSC_COMM_WORLD, hi-lo, this->GetSize(), &mpData[lo], &distributed_vec); #endif #if (PETSC_VERSION_MAJOR == 3) //PETSc 3.x.x VecSetOption(distributed_vec, VEC_IGNORE_OFF_PROC_ENTRIES, PETSC_TRUE); #else VecSetOption(distributed_vec, VEC_IGNORE_OFF_PROC_ENTRIES); #endif // Now do the real replication ReplicatePetscVector(distributed_vec); // Clean up PetscTools::Destroy(distributed_vec); }
//============================================================================= int Epetra_PETScAIJMatrix::RightScale(const Epetra_Vector& X) { // // This function scales the jth row of A by x[j]. // double *xptr; X.ExtractView(&xptr); Vec petscX; # ifdef HAVE_MPI int ierr=VecCreateMPIWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptr,&petscX); CHKERRQ(ierr); # else //FIXME untested int ierr=VecCreateSeqWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptr,&petscX); CHKERRQ(ierr); # endif MatDiagonalScale(Amat_, PETSC_NULL, petscX); ierr=VecDestroy(petscX); CHKERRQ(ierr); return(0); } //RightScale()
PetscErrorCode BVResize_Contiguous(BV bv,PetscInt m,PetscBool copy) { PetscErrorCode ierr; BV_CONTIGUOUS *ctx = (BV_CONTIGUOUS*)bv->data; PetscInt j,bs; PetscScalar *newarray; Vec *newV; char str[50]; PetscFunctionBegin; ierr = VecGetBlockSize(bv->t,&bs);CHKERRQ(ierr); ierr = PetscMalloc1(m*bv->n,&newarray);CHKERRQ(ierr); ierr = PetscMemzero(newarray,m*bv->n*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMalloc1(m,&newV);CHKERRQ(ierr); for (j=0;j<m;j++) { if (ctx->mpi) { ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)bv->t),bs,bv->n,PETSC_DECIDE,newarray+j*bv->n,newV+j);CHKERRQ(ierr); } else { ierr = VecCreateSeqWithArray(PetscObjectComm((PetscObject)bv->t),bs,bv->n,newarray+j*bv->n,newV+j);CHKERRQ(ierr); } } ierr = PetscLogObjectParents(bv,m,newV);CHKERRQ(ierr); if (((PetscObject)bv)->name) { for (j=0;j<m;j++) { ierr = PetscSNPrintf(str,50,"%s_%D",((PetscObject)bv)->name,j);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)newV[j],str);CHKERRQ(ierr); } } if (copy) { ierr = PetscMemcpy(newarray,ctx->array,PetscMin(m,bv->m)*bv->n*sizeof(PetscScalar));CHKERRQ(ierr); } ierr = VecDestroyVecs(bv->m,&ctx->V);CHKERRQ(ierr); ctx->V = newV; ierr = PetscFree(ctx->array);CHKERRQ(ierr); ctx->array = newarray; PetscFunctionReturn(0); }
PetscErrorCode DMSetUp_DA_1D(DM da) { DM_DA *dd = (DM_DA*)da->data; const PetscInt M = dd->M; const PetscInt dof = dd->w; const PetscInt s = dd->s; const PetscInt sDist = s; /* stencil distance in points */ const PetscInt *lx = dd->lx; DMBoundaryType bx = dd->bx; MPI_Comm comm; Vec local, global; VecScatter gtol; IS to, from; PetscBool flg1 = PETSC_FALSE, flg2 = PETSC_FALSE; PetscMPIInt rank, size; PetscInt i,*idx,nn,left,xs,xe,x,Xs,Xe,start,m,IXs,IXe; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject) da, &comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); dd->p = 1; dd->n = 1; dd->m = size; m = dd->m; if (s > 0) { /* if not communicating data then should be ok to have nothing on some processes */ if (M < m) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"More processes than data points! %D %D",m,M); if ((M-1) < s && size > 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Array is too small for stencil! %D %D",M-1,s); } /* Determine locally owned region xs is the first local node number, x is the number of local nodes */ if (!lx) { ierr = PetscMalloc1(m, &dd->lx);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,"-da_partition_blockcomm",&flg1,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,"-da_partition_nodes_at_end",&flg2,NULL);CHKERRQ(ierr); if (flg1) { /* Block Comm type Distribution */ xs = rank*M/m; x = (rank + 1)*M/m - xs; } else if (flg2) { /* The odd nodes are evenly distributed across last nodes */ x = (M + rank)/m; if (M/m == x) xs = rank*x; else xs = rank*(x-1) + (M+rank)%(x*m); } else { /* The odd nodes are evenly distributed across the first k nodes */ /* Regular PETSc Distribution */ x = M/m + ((M % m) > rank); if (rank >= (M % m)) xs = (rank * (PetscInt)(M/m) + M % m); else xs = rank * (PetscInt)(M/m) + rank; } ierr = MPI_Allgather(&xs,1,MPIU_INT,dd->lx,1,MPIU_INT,comm);CHKERRQ(ierr); for (i=0; i<m-1; i++) dd->lx[i] = dd->lx[i+1] - dd->lx[i]; dd->lx[m-1] = M - dd->lx[m-1]; } else { x = lx[rank]; xs = 0; for (i=0; i<rank; i++) xs += lx[i]; /* verify that data user provided is consistent */ left = xs; for (i=rank; i<size; i++) left += lx[i]; if (left != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of lx across processors not equal to M %D %D",left,M); } /* check if the scatter requires more than one process neighbor or wraps around the domain more than once */ if ((x < s) & ((M > 1) | (bx == DM_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local x-width of domain x %D is smaller than stencil width s %D",x,s); xe = xs + x; /* determine ghost region (Xs) and region scattered into (IXs) */ if (xs-sDist > 0) { Xs = xs - sDist; IXs = xs - sDist; } else { if (bx) Xs = xs - sDist; else Xs = 0; IXs = 0; } if (xe+sDist <= M) { Xe = xe + sDist; IXe = xe + sDist; } else { if (bx) Xe = xe + sDist; else Xe = M; IXe = M; } if (bx == DM_BOUNDARY_PERIODIC || bx == DM_BOUNDARY_MIRROR) { Xs = xs - sDist; Xe = xe + sDist; IXs = xs - sDist; IXe = xe + sDist; } /* allocate the base parallel and sequential vectors */ dd->Nlocal = dof*x; ierr = VecCreateMPIWithArray(comm,dof,dd->Nlocal,PETSC_DECIDE,NULL,&global);CHKERRQ(ierr); dd->nlocal = dof*(Xe-Xs); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,dof,dd->nlocal,NULL,&local);CHKERRQ(ierr); ierr = VecGetOwnershipRange(global,&start,NULL);CHKERRQ(ierr); /* Create Global to Local Vector Scatter Context */ /* global to local must retrieve ghost points */ ierr = ISCreateStride(comm,dof*(IXe-IXs),dof*(IXs-Xs),1,&to);CHKERRQ(ierr); ierr = PetscMalloc1(x+2*sDist,&idx);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)da,(x+2*(sDist))*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i<IXs-Xs; i++) idx[i] = -1; /* prepend with -1s if needed for ghosted case*/ nn = IXs-Xs; if (bx == DM_BOUNDARY_PERIODIC) { /* Handle all cases with periodic first */ for (i=0; i<sDist; i++) { /* Left ghost points */ if ((xs-sDist+i)>=0) idx[nn++] = xs-sDist+i; else idx[nn++] = M+(xs-sDist+i); } for (i=0; i<x; i++) idx [nn++] = xs + i; /* Non-ghost points */ for (i=0; i<sDist; i++) { /* Right ghost points */ if ((xe+i)<M) idx [nn++] = xe+i; else idx [nn++] = (xe+i) - M; } } else if (bx == DM_BOUNDARY_MIRROR) { /* Handle all cases with periodic first */ for (i=0; i<(sDist); i++) { /* Left ghost points */ if ((xs-sDist+i)>=0) idx[nn++] = xs-sDist+i; else idx[nn++] = sDist - i; } for (i=0; i<x; i++) idx [nn++] = xs + i; /* Non-ghost points */ for (i=0; i<(sDist); i++) { /* Right ghost points */ if ((xe+i)<M) idx[nn++] = xe+i; else idx[nn++] = M - (i + 1); } } else { /* Now do all cases with no periodicity */ if (0 <= xs-sDist) { for (i=0; i<sDist; i++) idx[nn++] = xs - sDist + i; } else { for (i=0; i<xs; i++) idx[nn++] = i; } for (i=0; i<x; i++) idx [nn++] = xs + i; if ((xe+sDist)<=M) { for (i=0; i<sDist; i++) idx[nn++]=xe+i; } else { for (i=xe; i<M; i++) idx[nn++]=i; } } ierr = ISCreateBlock(comm,dof,nn-IXs+Xs,&idx[IXs-Xs],PETSC_USE_POINTER,&from);CHKERRQ(ierr); ierr = VecScatterCreate(global,from,local,to,>ol);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)gtol);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = VecDestroy(&local);CHKERRQ(ierr); ierr = VecDestroy(&global);CHKERRQ(ierr); dd->xs = dof*xs; dd->xe = dof*xe; dd->ys = 0; dd->ye = 1; dd->zs = 0; dd->ze = 1; dd->Xs = dof*Xs; dd->Xe = dof*Xe; dd->Ys = 0; dd->Ye = 1; dd->Zs = 0; dd->Ze = 1; dd->gtol = gtol; dd->base = dof*xs; da->ops->view = DMView_DA_1d; /* Set the local to global ordering in the global vector, this allows use of VecSetValuesLocal(). */ for (i=0; i<Xe-IXe; i++) idx[nn++] = -1; /* pad with -1s if needed for ghosted case*/ ierr = ISLocalToGlobalMappingCreate(comm,dof,nn,idx,PETSC_OWN_POINTER,&da->ltogmap);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)da->ltogmap);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc,char **args) { PetscErrorCode ierr; PetscMPIInt rank,size; PetscInt N0=50,N1=20,N=N0*N1,DIM; PetscRandom rdm; PetscScalar a; PetscReal enorm; Vec x,y,z; PetscBool view=PETSC_FALSE,use_interface=PETSC_TRUE; ierr = PetscInitialize(&argc,&args,(char*)0,help);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires complex numbers"); #endif ierr = PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "FFTW Options", "ex143");CHKERRQ(ierr); ierr = PetscOptionsBool("-vec_view draw", "View the vectors", "ex143", view, &view, NULL);CHKERRQ(ierr); ierr = PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex143",use_interface, &use_interface, NULL);CHKERRQ(ierr); ierr = PetscOptionsEnd();CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,"-use_FFTW_interface",&use_interface,NULL);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD, &rdm);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr); if (!use_interface) { /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */ /*---------------------------------------------------------*/ fftw_plan fplan,bplan; fftw_complex *data_in,*data_out,*data_out2; ptrdiff_t alloc_local,local_n0,local_0_start; DIM = 2; if (!rank) { ierr = PetscPrintf(PETSC_COMM_SELF,"Use FFTW without PETSc-FFTW interface, DIM %D\n",DIM);CHKERRQ(ierr); } fftw_mpi_init(); N = N0*N1; alloc_local = fftw_mpi_local_size_2d(N0,N1,PETSC_COMM_WORLD,&local_n0,&local_0_start); data_in = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); data_out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); data_out2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_in,&x);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) x, "Real Space vector");CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out,&y);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out2,&z);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr); fplan = fftw_mpi_plan_dft_2d(N0,N1,data_in,data_out,PETSC_COMM_WORLD,FFTW_FORWARD,FFTW_ESTIMATE); bplan = fftw_mpi_plan_dft_2d(N0,N1,data_out,data_out2,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE); ierr = VecSetRandom(x, rdm);CHKERRQ(ierr); if (view) {ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} fftw_execute(fplan); if (view) {ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} fftw_execute(bplan); /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ a = 1.0/(PetscReal)N; ierr = VecScale(z,a);CHKERRQ(ierr); if (view) {ierr = VecView(z, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} ierr = VecAXPY(z,-1.0,x);CHKERRQ(ierr); ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr); if (enorm > 1.e-11 && !rank) { ierr = PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %g\n",(double)enorm);CHKERRQ(ierr); } /* Free spaces */ fftw_destroy_plan(fplan); fftw_destroy_plan(bplan); fftw_free(data_in); ierr = VecDestroy(&x);CHKERRQ(ierr); fftw_free(data_out); ierr = VecDestroy(&y);CHKERRQ(ierr); fftw_free(data_out2);ierr = VecDestroy(&z);CHKERRQ(ierr); } else { /* Use PETSc-FFTW interface */ /*-------------------------------------------*/ PetscInt i,*dim,k; Mat A; N=1; for (i=1; i<5; i++) { DIM = i; ierr = PetscMalloc1(i,&dim);CHKERRQ(ierr); for (k=0; k<i; k++) { dim[k]=30; } N *= dim[i-1]; /* Create FFTW object */ if (!rank) printf("Use PETSc-FFTW interface...%d-DIM: %d\n",(int)DIM,(int)N); ierr = MatCreateFFT(PETSC_COMM_WORLD,DIM,dim,MATFFTW,&A);CHKERRQ(ierr); /* Create vectors that are compatible with parallel layout of A - must call MatCreateVecs()! */ ierr = MatCreateVecsFFTW(A,&x,&y,&z);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) x, "Real space vector");CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr); /* Set values of space vector x */ ierr = VecSetRandom(x,rdm);CHKERRQ(ierr); if (view) {ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} /* Apply FFTW_FORWARD and FFTW_BACKWARD */ ierr = MatMult(A,x,y);CHKERRQ(ierr); if (view) {ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} ierr = MatMultTranspose(A,y,z);CHKERRQ(ierr); /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ a = 1.0/(PetscReal)N; ierr = VecScale(z,a);CHKERRQ(ierr); if (view) {ierr = VecView(z,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} ierr = VecAXPY(z,-1.0,x);CHKERRQ(ierr); ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr); if (enorm > 1.e-9 && !rank) { ierr = PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %e\n",enorm);CHKERRQ(ierr); } ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = VecDestroy(&z);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = PetscFree(dim);CHKERRQ(ierr); } } ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
PetscErrorCode TSSetUp_Sundials(TS ts) { TS_Sundials *cvode = (TS_Sundials*)ts->data; PetscErrorCode ierr; PetscInt glosize,locsize,i,flag; PetscScalar *y_data,*parray; void *mem; PC pc; PCType pctype; PetscBool pcnone; PetscFunctionBegin; /* get the vector size */ ierr = VecGetSize(ts->vec_sol,&glosize);CHKERRQ(ierr); ierr = VecGetLocalSize(ts->vec_sol,&locsize);CHKERRQ(ierr); /* allocate the memory for N_Vec y */ cvode->y = N_VNew_Parallel(cvode->comm_sundials,locsize,glosize); if (!cvode->y) SETERRQ(PETSC_COMM_SELF,1,"cvode->y is not allocated"); /* initialize N_Vec y: copy ts->vec_sol to cvode->y */ ierr = VecGetArray(ts->vec_sol,&parray);CHKERRQ(ierr); y_data = (PetscScalar*) N_VGetArrayPointer(cvode->y); for (i = 0; i < locsize; i++) y_data[i] = parray[i]; ierr = VecRestoreArray(ts->vec_sol,NULL);CHKERRQ(ierr); ierr = VecDuplicate(ts->vec_sol,&cvode->update);CHKERRQ(ierr); ierr = VecDuplicate(ts->vec_sol,&cvode->ydot);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)ts,(PetscObject)cvode->update);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)ts,(PetscObject)cvode->ydot);CHKERRQ(ierr); /* Create work vectors for the TSPSolve_Sundials() routine. Note these are allocated with zero space arrays because the actual array space is provided by Sundials and set using VecPlaceArray(). */ ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)ts),1,locsize,PETSC_DECIDE,0,&cvode->w1);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)ts),1,locsize,PETSC_DECIDE,0,&cvode->w2);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)ts,(PetscObject)cvode->w1);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)ts,(PetscObject)cvode->w2);CHKERRQ(ierr); /* Call CVodeCreate to create the solver memory and the use of a Newton iteration */ mem = CVodeCreate(cvode->cvode_type, CV_NEWTON); if (!mem) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"CVodeCreate() fails"); cvode->mem = mem; /* Set the pointer to user-defined data */ flag = CVodeSetUserData(mem, ts); if (flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVodeSetUserData() fails"); /* Sundials may choose to use a smaller initial step, but will never use a larger step. */ flag = CVodeSetInitStep(mem,(realtype)ts->time_step); if (flag) SETERRQ(PetscObjectComm((PetscObject)ts),PETSC_ERR_LIB,"CVodeSetInitStep() failed"); if (cvode->mindt > 0) { flag = CVodeSetMinStep(mem,(realtype)cvode->mindt); if (flag) { if (flag == CV_MEM_NULL) SETERRQ(PetscObjectComm((PetscObject)ts),PETSC_ERR_LIB,"CVodeSetMinStep() failed, cvode_mem pointer is NULL"); else if (flag == CV_ILL_INPUT) SETERRQ(PetscObjectComm((PetscObject)ts),PETSC_ERR_LIB,"CVodeSetMinStep() failed, hmin is nonpositive or it exceeds the maximum allowable step size"); else SETERRQ(PetscObjectComm((PetscObject)ts),PETSC_ERR_LIB,"CVodeSetMinStep() failed"); } } if (cvode->maxdt > 0) { flag = CVodeSetMaxStep(mem,(realtype)cvode->maxdt); if (flag) SETERRQ(PetscObjectComm((PetscObject)ts),PETSC_ERR_LIB,"CVodeSetMaxStep() failed"); } /* Call CVodeInit to initialize the integrator memory and specify the * user's right hand side function in u'=f(t,u), the inital time T0, and * the initial dependent variable vector cvode->y */ flag = CVodeInit(mem,TSFunction_Sundials,ts->ptime,cvode->y); if (flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVodeInit() fails, flag %d",flag); /* specifies scalar relative and absolute tolerances */ flag = CVodeSStolerances(mem,cvode->reltol,cvode->abstol); if (flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVodeSStolerances() fails, flag %d",flag); /* Specify max num of steps to be taken by cvode in its attempt to reach the next output time */ flag = CVodeSetMaxNumSteps(mem,ts->max_steps); /* call CVSpgmr to use GMRES as the linear solver. */ /* setup the ode integrator with the given preconditioner */ ierr = TSSundialsGetPC(ts,&pc);CHKERRQ(ierr); ierr = PCGetType(pc,&pctype);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)pc,PCNONE,&pcnone);CHKERRQ(ierr); if (pcnone) { flag = CVSpgmr(mem,PREC_NONE,0); if (flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVSpgmr() fails, flag %d",flag); } else { flag = CVSpgmr(mem,PREC_LEFT,cvode->maxl); if (flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVSpgmr() fails, flag %d",flag); /* Set preconditioner and solve routines Precond and PSolve, and the pointer to the user-defined block data */ flag = CVSpilsSetPreconditioner(mem,TSPrecond_Sundials,TSPSolve_Sundials); if (flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVSpilsSetPreconditioner() fails, flag %d", flag); } flag = CVSpilsSetGSType(mem, MODIFIED_GS); if (flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CVSpgmrSetGSType() fails, flag %d",flag); PetscFunctionReturn(0); }
int main(int argc,char **args) { PetscInt rank,size,npt; PetscErrorCode ierr; Vec x,y0,tempvec, *vinda,*vindb,*vindc; PetscInt i,j,k,l,n,p,m,m2,pmax,puse,Istart,Iend,localsize,niter; PetscScalar dx,dy,dx2,dy2; PetscScalar *Mixnorm; PetscInt iter,*iterind,*nind; FILE *fidoutput; char fname[50]; PetscViewer socketviewer; PetscInt withMatlab; PetscTruth Matlabflag; PetscLogDouble v1,v2,elapsed_time; PetscInitialize(&argc,&args,(char *)0,help); MPI_Comm_size(PETSC_COMM_WORLD,&size); MPI_Comm_rank(PETSC_COMM_WORLD,&rank); ierr = PetscPrintf(PETSC_COMM_WORLD,"\nPETSC: Petsc Initializes successfully! \n"); ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: comm_size is %d \n", size); ierr = PetscOptionsGetInt(PETSC_NULL,"-withMatlab",&withMatlab,&Matlabflag);CHKERRQ(ierr); if (Matlabflag == PETSC_FALSE){withMatlab = 0;}else{withMatlab = 1;} if(withMatlab==1){ // Rank 0 connects to socket, use default socket PetscViewerSocketOpen(PETSC_COMM_WORLD,0,PETSC_DEFAULT,&socketviewer); ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: socket opened! \n");CHKERRQ(ierr); // Receive n from Matlab IntReceive(socketviewer, &nind); n = *nind; // Receive iter from Matlab IntReceive(socketviewer, &iterind); iter = *iterind; }else{ ierr = PetscOptionsGetInt(PETSC_NULL,"-ngrid",&n,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-niter",&iter,PETSC_NULL);CHKERRQ(ierr); } ///////////////////////////////////////////////////////////////////////////////////// ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: number of grid is %d \n", n); ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: number of iteration is %d \n", iter); Mixnorm = malloc(iter*sizeof(PetscScalar)); dx = 1.0/n; dy = 1.0/n; dx2 = dx/2-dx/1e6; dy2 = dy/2-dy/1e6; npt = 5; pmax = 4e6; puse = pmax; ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: estimated buffer size (per processer) %f Mbytes \n", pmax*1.0/1e6*8*16 ); ierr = PetscPrintf(PETSC_COMM_WORLD,"PETSC: estimated variable size %f Mbytes\n", 1.0*n*n/1e6*8*2); ///////////////////////////////////////////////////////////////////////////////////// ierr = VecCreateMPI(PETSC_COMM_WORLD,PETSC_DECIDE ,n,&tempvec);CHKERRQ(ierr); ierr = VecGetOwnershipRange(tempvec,&Istart,&Iend);CHKERRQ(ierr); localsize = Iend-Istart; ierr = VecDestroy(tempvec);CHKERRQ(ierr); ///////////////////////////////////////////////////////////////////////////////////// // Create initial vector Vec x0; PetscScalar *x0array; x0array = malloc((localsize)*n*sizeof(PetscScalar)); k = 0; for(i=Istart;i<Iend;i++){ for(j=0;j<n;j++){ *(x0array+k) = cos(2*M_PI*(dx/2+i*dx)); //*(x0array+k) = cos(2*M_PI*(dy/2+j*dy)); k++; } } ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n*localsize,PETSC_DECIDE,x0array,&x0);CHKERRQ(ierr); ierr = VecDuplicate(x0,&x);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,pmax*npt,&y0);CHKERRQ(ierr); ierr = VecNorm(x0,NORM_2,Mixnorm); CHKERRQ(ierr); PetscPrintf(PETSC_COMM_WORLD,"PETSC: initial norm= %f \n",*(Mixnorm+0)/n ); /////////////////////////////////////////////////////////////////////////// // Map Center Points PetscInt *NzindJ,*idx,*idy,*idp; PetscScalar *CenterX,*CenterY,*VecVal,*pty; PetscScalar *ShiftX,*ShiftY,CX,CY, *yarray; IS isx,isy; VecScatter ctx; CenterX = malloc(npt*sizeof(PetscScalar)); CenterY = malloc(npt*sizeof(PetscScalar)); ShiftX = malloc(npt*sizeof(PetscScalar)); ShiftY = malloc(npt*sizeof(PetscScalar)); VecVal = malloc(npt*sizeof(PetscScalar)); yarray = malloc(pmax*sizeof(PetscScalar)); NzindJ = malloc(pmax*npt*sizeof(PetscInt)); idx = malloc(pmax*npt*sizeof(PetscInt)); idy = malloc(pmax*npt*sizeof(PetscInt)); idp = malloc(pmax*sizeof(PetscInt)); *(ShiftX+0) = 0; *(ShiftY+0) = 0; *(ShiftX+1) = -dx2; *(ShiftY+1) = -dy2; *(ShiftX+2) = dx2; *(ShiftY+2) = -dy2; *(ShiftX+3) = -dx2; *(ShiftY+3) = dy2; *(ShiftX+4) = dy2; *(ShiftY+4) = dx2; //*(ShiftX+5) = 0; //*(ShiftY+5) = -dy2; //*(ShiftX+6) = -dx2; //*(ShiftY+6) = 0; //*(ShiftX+7) = dx2; //*(ShiftY+7) = 0; //*(ShiftX+8) = 0; //*(ShiftY+9) = dy2; for(i=0;i<npt*pmax;i++){ *(idy+i)=i; } ISCreateGeneralWithArray(PETSC_COMM_SELF,npt*pmax,idy,&isy); vinda = &x0; vindb = &x; sprintf(fname, "mixnorm_%d_%d",n,iter); ierr =PetscPrintf(PETSC_COMM_WORLD,"\n iter norm time unit time\n");CHKERRQ(ierr); ierr =PetscFOpen(PETSC_COMM_WORLD,fname,"w",&fidoutput);CHKERRQ(ierr); for(niter=0;niter<iter;niter++){ ierr = PetscGetTime(&v1);CHKERRQ(ierr); l = 0; p = 0; if (n*localsize-l<=pmax){puse = n*localsize-l;}else{puse=pmax;} for(i=Istart;i<Iend;i++){ for(j=0;j<n;j++){ CX = dx2+i*dx; CY = dy2+j*dy; for(k=0;k<npt;k++){ *(CenterX+k) = CX + *(ShiftX+k); *(CenterY+k) = CY + *(ShiftY+k); InverseStandardMap((CenterX+k),(CenterY+k)); *(NzindJ+p*npt +k) = floor(*(CenterX+k)*n)*n + floor(*(CenterY+k)*n); } *(idp+p) = Istart*n+ l; if(p>=puse-1){ ierr = ISCreateGeneralWithArray(PETSC_COMM_WORLD,npt*puse,NzindJ,&isx);CHKERRQ(ierr); for(m=0;m<npt*puse;m++){ *(idy+m)=m; } ierr = ISCreateGeneralWithArray(PETSC_COMM_SELF,npt*puse,idy,&isy);CHKERRQ(ierr); ierr = VecScatterCreate(*vinda,isx,y0,isy,&ctx);CHKERRQ(ierr); ierr = VecScatterBegin(*vinda,y0,INSERT_VALUES,SCATTER_FORWARD,ctx);CHKERRQ(ierr); ierr = VecScatterEnd(*vinda,y0,INSERT_VALUES,SCATTER_FORWARD,ctx);CHKERRQ(ierr); ierr = VecScatterDestroy(ctx); ierr = VecGetArray(y0,&pty);CHKERRQ(ierr); for(m=0;m<puse;m++){ for(m2=0;m2<npt;m2++){ *(yarray+m) = *(yarray+m)+*(pty+m*npt+m2); } *(yarray+m) = *(yarray+m)/npt; } VecRestoreArray(y0,&pty); VecSetValues(*vindb,puse,idp,yarray,INSERT_VALUES); for(m=0;m<pmax;m++){*(yarray+m) = 0; } p = 0; if (n*localsize-l<=pmax){puse = n*localsize-l-1;}else{puse=pmax;} }else{p++;} l++; } } VecAssemblyBegin(*vindb); VecAssemblyEnd(*vindb); vindc = vindb; vindb = vinda; vinda = vindc; //ierr = VecCopy(x,x0);CHKERRQ(ierr); ierr = VecNorm(*vinda,NORM_2,Mixnorm+niter); CHKERRQ(ierr); *(Mixnorm+niter) = *(Mixnorm+niter)/n; ierr = PetscGetTime(&v2);CHKERRQ(ierr); elapsed_time = v2 - v1; PetscPrintf(PETSC_COMM_WORLD," %d %f %f %f \n",niter,*(Mixnorm+niter),elapsed_time,elapsed_time/n/n*1e6 ); PetscFPrintf(PETSC_COMM_WORLD,fidoutput," %d %f %f %f\n" ,niter,*(Mixnorm+niter),elapsed_time,elapsed_time/n/n*1e6 ); } PetscFClose(PETSC_COMM_WORLD,fidoutput); /////////////////////////////////////////////////////////////////////////// if(withMatlab==1){ VecView(x0,socketviewer); PetscScalarView(iter,Mixnorm,socketviewer); } free(CenterX); free(CenterY); free(ShiftX); free(ShiftY); free(x0array); free(idx); free(idy); free(idp); free(yarray); free(NzindJ); free(Mixnorm); ierr = VecDestroy(x0);CHKERRQ(ierr); ierr = VecDestroy(x);CHKERRQ(ierr); ierr = VecDestroy(y0);CHKERRQ(ierr); PetscPrintf(PETSC_COMM_WORLD,"Done!"); ////////////////////////////////////////////////////////////////////////////////////// ierr = PetscFinalize();CHKERRQ(ierr); return 0; }
PetscInt main(PetscInt argc,char **args) { ptrdiff_t N0=256,N1=256,N2=256,N3=2,dim[4]; fftw_plan bplan,fplan; fftw_complex *out; double *in1,*in2; ptrdiff_t alloc_local,local_n0,local_0_start; ptrdiff_t local_n1,local_1_start; PetscInt i,j,indx,n1; PetscInt size,rank,n,N,*in,N_factor,NM; PetscScalar *data_fin,value1,one=1.57,zero=0.0; PetscScalar a,*x_arr,*y_arr,*z_arr,enorm; Vec fin,fout,fout1,ini,final; PetscRandom rnd; PetscErrorCode ierr; VecScatter vecscat,vecscat1; IS indx1,indx2; PetscInt *indx3,k,l,*indx4; PetscInt low,tempindx,tempindx1; ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; #if defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires real numbers. Your current scalar type is complex"); #endif ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); PetscRandomCreate(PETSC_COMM_WORLD,&rnd); alloc_local = fftw_mpi_local_size_3d_transposed(N0,N1,N2/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); /* printf("The value alloc_local is %ld from process %d\n",alloc_local,rank); */ printf("The value local_n0 is %ld from process %d\n",local_n0,rank); /* printf("The value local_0_start is %ld from process %d\n",local_0_start,rank);*/ /* printf("The value local_n1 is %ld from process %d\n",local_n1,rank); */ /* printf("The value local_1_start is %ld from process %d\n",local_1_start,rank);*/ /* Allocate space for input and output arrays */ in1=(double*)fftw_malloc(sizeof(double)*alloc_local*2); in2=(double*)fftw_malloc(sizeof(double)*alloc_local*2); out=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); N=2*N0*N1*(N2/2+1);N_factor=N0*N1*N2; n=2*local_n0*N1*(N2/2+1);n1=local_n1*N0*2*N1; /* printf("The value N is %d from process %d\n",N,rank); */ /* printf("The value n is %d from process %d\n",n,rank); */ /* printf("The value n1 is %d from process %d\n",n1,rank); */ /* Creating data vector and accompanying array with VeccreateMPIWithArray */ ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)in1,&fin);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)out,&fout);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)in2,&fout1);CHKERRQ(ierr); /* VecGetSize(fin,&size); */ /* printf("The size is %d\n",size); */ VecSet(fin,one); VecSet(fout,zero); VecSet(fout1,zero); VecAssemblyBegin(fin); VecAssemblyEnd(fin); /* VecView(fin,PETSC_VIEWER_STDOUT_WORLD); */ VecGetArray(fin,&x_arr); VecGetArray(fout1,&z_arr); VecGetArray(fout,&y_arr); fplan=fftw_mpi_plan_dft_r2c_3d(N0,N1,N2,(double*)x_arr,(fftw_complex*)y_arr,PETSC_COMM_WORLD,FFTW_ESTIMATE); bplan=fftw_mpi_plan_dft_c2r_3d(N0,N1,N2,(fftw_complex*)y_arr,(double*)z_arr,PETSC_COMM_WORLD,FFTW_ESTIMATE); fftw_execute(fplan); fftw_execute(bplan); VecRestoreArray(fin,&x_arr); VecRestoreArray(fout1,&z_arr); VecRestoreArray(fout,&y_arr); /* a = 1.0/(PetscReal)N_factor; */ /* ierr = VecScale(fout1,a);CHKERRQ(ierr); */ VecCreate(PETSC_COMM_WORLD,&ini); VecCreate(PETSC_COMM_WORLD,&final); VecSetSizes(ini,local_n0*N1*N2,N_factor); VecSetSizes(final,local_n0*N1*N2,N_factor); /* VecSetSizes(ini,PETSC_DECIDE,N_factor); */ /* VecSetSizes(final,PETSC_DECIDE,N_factor); */ VecSetFromOptions(ini); VecSetFromOptions(final); if (N2%2==0) NM=N2+2; else NM=N2+1; ierr = VecGetOwnershipRange(fin,&low,NULL); printf("The local index is %d from %d\n",low,rank); ierr = PetscMalloc1(local_n0*N1*N2,&indx3); ierr = PetscMalloc1(local_n0*N1*N2,&indx4); for (i=0; i<local_n0; i++) { for (j=0;j<N1;j++) { for (k=0;k<N2;k++) { tempindx = i*N1*N2 + j*N2 + k; tempindx1 = i*N1*NM + j*NM + k; indx3[tempindx]=local_0_start*N1*N2+tempindx; indx4[tempindx]=low+tempindx1; } /* printf("index3 %d from proc %d is \n",indx3[tempindx],rank); */ /* printf("index4 %d from proc %d is \n",indx4[tempindx],rank); */ } } VecGetValues(fin,local_n0*N1*N2,indx4,x_arr); VecSetValues(ini,local_n0*N1*N2,indx3,x_arr,INSERT_VALUES); VecAssemblyBegin(ini); VecAssemblyEnd(ini); VecGetValues(fout1,local_n0*N1*N2,indx4,y_arr); VecSetValues(final,local_n0*N1*N2,indx3,y_arr,INSERT_VALUES); VecAssemblyBegin(final); VecAssemblyEnd(final); printf("The local index value is %ld from %d",local_n0*N1*N2,rank); /* for (i=0;i<N0;i++) { for (j=0;j<N1;j++) { indx=i*N1*NM+j*NM; ISCreateStride(PETSC_COMM_WORLD,N2,indx,1,&indx1); indx=i*N1*N2+j*N2; ISCreateStride(PETSC_COMM_WORLD,N2,indx,1,&indx2); VecScatterCreate(fin,indx1,ini,indx2,&vecscat); VecScatterBegin(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); VecScatterEnd(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); VecScatterCreate(fout1,indx1,final,indx2,&vecscat1); VecScatterBegin(vecscat1,fout1,final,INSERT_VALUES,SCATTER_FORWARD); VecScatterEnd(vecscat1,fout1,final,INSERT_VALUES,SCATTER_FORWARD); } } */ a = 1.0/(PetscReal)N_factor; ierr = VecScale(fout1,a);CHKERRQ(ierr); ierr = VecScale(final,a);CHKERRQ(ierr); VecAssemblyBegin(ini); VecAssemblyEnd(ini); VecAssemblyBegin(final); VecAssemblyEnd(final); /* VecView(final,PETSC_VIEWER_STDOUT_WORLD); */ ierr = VecAXPY(final,-1.0,ini);CHKERRQ(ierr); ierr = VecNorm(final,NORM_1,&enorm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD," Error norm of |x - z| = %e\n",enorm);CHKERRQ(ierr); fftw_destroy_plan(fplan); fftw_destroy_plan(bplan); fftw_free(in1); ierr = VecDestroy(&fin);CHKERRQ(ierr); fftw_free(out); ierr = VecDestroy(&fout);CHKERRQ(ierr); fftw_free(in2); ierr = VecDestroy(&fout1);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) { Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); PetscErrorCode ierr; PetscInt i,j,*aj = B->j,ec = 0,*garray; IS from,to; Vec gvec; #if defined(PETSC_USE_CTABLE) PetscTable gid1_lid1; PetscTablePosition tpos; PetscInt gid,lid; #else PetscInt N = mat->cmap->N,*indices; #endif PetscFunctionBegin; if (!aij->garray) { #if defined(PETSC_USE_CTABLE) /* use a table */ ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt data,gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); if (!data) { /* one based table */ ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); } } } /* form array of columns we need */ ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); while (tpos) { ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); gid--; lid--; garray[lid] = gid; } ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); for (i=0; i<ec; i++) { ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); } /* compact out the extra columns in B */ for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); lid--; aj[B->i[i] + j] = lid; } } aij->B->cmap->n = aij->B->cmap->N = ec; aij->B->cmap->bs = 1; ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); #else /* Make an array as long as the number of columns */ /* mark those columns that are in aij->B */ ierr = PetscCalloc1(N+1,&indices);CHKERRQ(ierr); for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { if (!indices[aj[B->i[i] + j]]) ec++; indices[aj[B->i[i] + j]] = 1; } } /* form array of columns we need */ ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); ec = 0; for (i=0; i<N; i++) { if (indices[i]) garray[ec++] = i; } /* make indices now point into garray */ for (i=0; i<ec; i++) { indices[garray[i]] = i; } /* compact out the extra columns in B */ for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; } } aij->B->cmap->n = aij->B->cmap->N = ec; aij->B->cmap->bs = 1; ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); ierr = PetscFree(indices);CHKERRQ(ierr); #endif } else { garray = aij->garray; } if (!aij->lvec) { /* create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); } else { ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); } /* create two temporary Index sets for build scatter gather */ ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); /* create temporary global vector to generate scatter context */ /* This does not allocate the array's memory so is efficient */ ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); /* generate the scatter context */ if (aij->Mvctx_mpi1_flg) { ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr); ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx_mpi1);CHKERRQ(ierr); ierr = VecScatterSetType(aij->Mvctx_mpi1,VECSCATTERMPI1);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx_mpi1);CHKERRQ(ierr); } else { ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); } aij->garray = garray; ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = VecDestroy(&gvec);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode DMSetUp_DA_2D(DM da) { DM_DA *dd = (DM_DA*)da->data; const PetscInt M = dd->M; const PetscInt N = dd->N; PetscInt m = dd->m; PetscInt n = dd->n; const PetscInt dof = dd->w; const PetscInt s = dd->s; DMDABoundaryType bx = dd->bx; DMDABoundaryType by = dd->by; DMDAStencilType stencil_type = dd->stencil_type; PetscInt *lx = dd->lx; PetscInt *ly = dd->ly; MPI_Comm comm; PetscMPIInt rank,size; PetscInt xs,xe,ys,ye,x,y,Xs,Xe,Ys,Ye,start,end,IXs,IXe,IYs,IYe; PetscInt up,down,left,right,i,n0,n1,n2,n3,n5,n6,n7,n8,*idx,nn,*idx_cpy; const PetscInt *idx_full; PetscInt xbase,*bases,*ldims,j,x_t,y_t,s_t,base,count; PetscInt s_x,s_y; /* s proportionalized to w */ PetscInt sn0 = 0,sn2 = 0,sn6 = 0,sn8 = 0; Vec local,global; VecScatter ltog,gtol; IS to,from,ltogis; PetscErrorCode ierr; PetscFunctionBegin; if (stencil_type == DMDA_STENCIL_BOX && (bx == DMDA_BOUNDARY_MIRROR || by == DMDA_BOUNDARY_MIRROR)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Mirror boundary and box stencil"); ierr = PetscObjectGetComm((PetscObject)da,&comm); CHKERRQ(ierr); #if !defined(PETSC_USE_64BIT_INDICES) if (((Petsc64bitInt) M)*((Petsc64bitInt) N)*((Petsc64bitInt) dof) > (Petsc64bitInt) PETSC_MPI_INT_MAX) SETERRQ3(comm,PETSC_ERR_INT_OVERFLOW,"Mesh of %D by %D by %D (dof) is too large for 32 bit indices",M,N,dof); #endif if (dof < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Must have 1 or more degrees of freedom per node: %D",dof); if (s < 0) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Stencil width cannot be negative: %D",s); ierr = MPI_Comm_size(comm,&size); CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank); CHKERRQ(ierr); if (m != PETSC_DECIDE) { if (m < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in X direction: %D",m); else if (m > size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in X direction: %D %d",m,size); } if (n != PETSC_DECIDE) { if (n < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in Y direction: %D",n); else if (n > size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in Y direction: %D %d",n,size); } if (m == PETSC_DECIDE || n == PETSC_DECIDE) { if (n != PETSC_DECIDE) { m = size/n; } else if (m != PETSC_DECIDE) { n = size/m; } else { /* try for squarish distribution */ m = (PetscInt)(0.5 + PetscSqrtReal(((PetscReal)M)*((PetscReal)size)/((PetscReal)N))); if (!m) m = 1; while (m > 0) { n = size/m; if (m*n == size) break; m--; } if (M > N && m < n) { PetscInt _m = m; m = n; n = _m; } } if (m*n != size) SETERRQ(comm,PETSC_ERR_PLIB,"Unable to create partition, check the size of the communicator and input m and n "); } else if (m*n != size) SETERRQ(comm,PETSC_ERR_ARG_OUTOFRANGE,"Given Bad partition"); if (M < m) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in x direction is too fine! %D %D",M,m); if (N < n) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in y direction is too fine! %D %D",N,n); /* Determine locally owned region xs is the first local node number, x is the number of local nodes */ if (!lx) { ierr = PetscMalloc(m*sizeof(PetscInt), &dd->lx); CHKERRQ(ierr); lx = dd->lx; for (i=0; i<m; i++) { lx[i] = M/m + ((M % m) > i); } } x = lx[rank % m]; xs = 0; for (i=0; i<(rank % m); i++) { xs += lx[i]; } #if defined(PETSC_USE_DEBUG) left = xs; for (i=(rank % m); i<m; i++) { left += lx[i]; } if (left != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of lx across processors not equal to M: %D %D",left,M); #endif /* Determine locally owned region ys is the first local node number, y is the number of local nodes */ if (!ly) { ierr = PetscMalloc(n*sizeof(PetscInt), &dd->ly); CHKERRQ(ierr); ly = dd->ly; for (i=0; i<n; i++) { ly[i] = N/n + ((N % n) > i); } } y = ly[rank/m]; ys = 0; for (i=0; i<(rank/m); i++) { ys += ly[i]; } #if defined(PETSC_USE_DEBUG) left = ys; for (i=(rank/m); i<n; i++) { left += ly[i]; } if (left != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of ly across processors not equal to N: %D %D",left,N); #endif /* check if the scatter requires more than one process neighbor or wraps around the domain more than once */ if ((x < s) && ((m > 1) || (bx == DMDA_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local x-width of domain x %D is smaller than stencil width s %D",x,s); if ((y < s) && ((n > 1) || (by == DMDA_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local y-width of domain y %D is smaller than stencil width s %D",y,s); xe = xs + x; ye = ys + y; /* determine ghost region (Xs) and region scattered into (IXs) */ if (xs-s > 0) { Xs = xs - s; IXs = xs - s; } else { if (bx) { Xs = xs - s; } else { Xs = 0; } IXs = 0; } if (xe+s <= M) { Xe = xe + s; IXe = xe + s; } else { if (bx) { Xs = xs - s; Xe = xe + s; } else { Xe = M; } IXe = M; } if (bx == DMDA_BOUNDARY_PERIODIC || bx == DMDA_BOUNDARY_MIRROR) { IXs = xs - s; IXe = xe + s; Xs = xs - s; Xe = xe + s; } if (ys-s > 0) { Ys = ys - s; IYs = ys - s; } else { if (by) { Ys = ys - s; } else { Ys = 0; } IYs = 0; } if (ye+s <= N) { Ye = ye + s; IYe = ye + s; } else { if (by) { Ye = ye + s; } else { Ye = N; } IYe = N; } if (by == DMDA_BOUNDARY_PERIODIC || by == DMDA_BOUNDARY_MIRROR) { IYs = ys - s; IYe = ye + s; Ys = ys - s; Ye = ye + s; } /* stencil length in each direction */ s_x = s; s_y = s; /* determine starting point of each processor */ nn = x*y; ierr = PetscMalloc2(size+1,PetscInt,&bases,size,PetscInt,&ldims); CHKERRQ(ierr); ierr = MPI_Allgather(&nn,1,MPIU_INT,ldims,1,MPIU_INT,comm); CHKERRQ(ierr); bases[0] = 0; for (i=1; i<=size; i++) { bases[i] = ldims[i-1]; } for (i=1; i<=size; i++) { bases[i] += bases[i-1]; } base = bases[rank]*dof; /* allocate the base parallel and sequential vectors */ dd->Nlocal = x*y*dof; ierr = VecCreateMPIWithArray(comm,dof,dd->Nlocal,PETSC_DECIDE,0,&global); CHKERRQ(ierr); dd->nlocal = (Xe-Xs)*(Ye-Ys)*dof; ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,dof,dd->nlocal,0,&local); CHKERRQ(ierr); /* generate appropriate vector scatters */ /* local to global inserts non-ghost point region into global */ ierr = VecGetOwnershipRange(global,&start,&end); CHKERRQ(ierr); ierr = ISCreateStride(comm,x*y*dof,start,1,&to); CHKERRQ(ierr); ierr = PetscMalloc(x*y*sizeof(PetscInt),&idx); CHKERRQ(ierr); left = xs - Xs; right = left + x; down = ys - Ys; up = down + y; count = 0; for (i=down; i<up; i++) { for (j=left; j<right; j++) { idx[count++] = i*(Xe-Xs) + j; } } ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&from); CHKERRQ(ierr); ierr = VecScatterCreate(local,from,global,to,<og); CHKERRQ(ierr); ierr = PetscLogObjectParent(dd,ltog); CHKERRQ(ierr); ierr = ISDestroy(&from); CHKERRQ(ierr); ierr = ISDestroy(&to); CHKERRQ(ierr); /* global to local must include ghost points within the domain, but not ghost points outside the domain that aren't periodic */ if (stencil_type == DMDA_STENCIL_BOX) { count = (IXe-IXs)*(IYe-IYs); ierr = PetscMalloc(count*sizeof(PetscInt),&idx); CHKERRQ(ierr); left = IXs - Xs; right = left + (IXe-IXs); down = IYs - Ys; up = down + (IYe-IYs); count = 0; for (i=down; i<up; i++) { for (j=left; j<right; j++) { idx[count++] = j + i*(Xe-Xs); } } ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&to); CHKERRQ(ierr); } else { /* must drop into cross shape region */ /* ---------| | top | |--- ---| up | middle | | | ---- ---- down | bottom | ----------- Xs xs xe Xe */ count = (ys-IYs)*x + y*(IXe-IXs) + (IYe-ye)*x; ierr = PetscMalloc(count*sizeof(PetscInt),&idx); CHKERRQ(ierr); left = xs - Xs; right = left + x; down = ys - Ys; up = down + y; count = 0; /* bottom */ for (i=(IYs-Ys); i<down; i++) { for (j=left; j<right; j++) { idx[count++] = j + i*(Xe-Xs); } } /* middle */ for (i=down; i<up; i++) { for (j=(IXs-Xs); j<(IXe-Xs); j++) { idx[count++] = j + i*(Xe-Xs); } } /* top */ for (i=up; i<up+IYe-ye; i++) { for (j=left; j<right; j++) { idx[count++] = j + i*(Xe-Xs); } } ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&to); CHKERRQ(ierr); } /* determine who lies on each side of us stored in n6 n7 n8 n3 n5 n0 n1 n2 */ /* Assume the Non-Periodic Case */ n1 = rank - m; if (rank % m) { n0 = n1 - 1; } else { n0 = -1; } if ((rank+1) % m) { n2 = n1 + 1; n5 = rank + 1; n8 = rank + m + 1; if (n8 >= m*n) n8 = -1; } else { n2 = -1; n5 = -1; n8 = -1; } if (rank % m) { n3 = rank - 1; n6 = n3 + m; if (n6 >= m*n) n6 = -1; } else { n3 = -1; n6 = -1; } n7 = rank + m; if (n7 >= m*n) n7 = -1; if (bx == DMDA_BOUNDARY_PERIODIC && by == DMDA_BOUNDARY_PERIODIC) { /* Modify for Periodic Cases */ /* Handle all four corners */ if ((n6 < 0) && (n7 < 0) && (n3 < 0)) n6 = m-1; if ((n8 < 0) && (n7 < 0) && (n5 < 0)) n8 = 0; if ((n2 < 0) && (n5 < 0) && (n1 < 0)) n2 = size-m; if ((n0 < 0) && (n3 < 0) && (n1 < 0)) n0 = size-1; /* Handle Top and Bottom Sides */ if (n1 < 0) n1 = rank + m * (n-1); if (n7 < 0) n7 = rank - m * (n-1); if ((n3 >= 0) && (n0 < 0)) n0 = size - m + rank - 1; if ((n3 >= 0) && (n6 < 0)) n6 = (rank%m)-1; if ((n5 >= 0) && (n2 < 0)) n2 = size - m + rank + 1; if ((n5 >= 0) && (n8 < 0)) n8 = (rank%m)+1; /* Handle Left and Right Sides */ if (n3 < 0) n3 = rank + (m-1); if (n5 < 0) n5 = rank - (m-1); if ((n1 >= 0) && (n0 < 0)) n0 = rank-1; if ((n1 >= 0) && (n2 < 0)) n2 = rank-2*m+1; if ((n7 >= 0) && (n6 < 0)) n6 = rank+2*m-1; if ((n7 >= 0) && (n8 < 0)) n8 = rank+1; } else if (by == DMDA_BOUNDARY_PERIODIC) { /* Handle Top and Bottom Sides */ if (n1 < 0) n1 = rank + m * (n-1); if (n7 < 0) n7 = rank - m * (n-1); if ((n3 >= 0) && (n0 < 0)) n0 = size - m + rank - 1; if ((n3 >= 0) && (n6 < 0)) n6 = (rank%m)-1; if ((n5 >= 0) && (n2 < 0)) n2 = size - m + rank + 1; if ((n5 >= 0) && (n8 < 0)) n8 = (rank%m)+1; } else if (bx == DMDA_BOUNDARY_PERIODIC) { /* Handle Left and Right Sides */ if (n3 < 0) n3 = rank + (m-1); if (n5 < 0) n5 = rank - (m-1); if ((n1 >= 0) && (n0 < 0)) n0 = rank-1; if ((n1 >= 0) && (n2 < 0)) n2 = rank-2*m+1; if ((n7 >= 0) && (n6 < 0)) n6 = rank+2*m-1; if ((n7 >= 0) && (n8 < 0)) n8 = rank+1; } ierr = PetscMalloc(9*sizeof(PetscInt),&dd->neighbors); CHKERRQ(ierr); dd->neighbors[0] = n0; dd->neighbors[1] = n1; dd->neighbors[2] = n2; dd->neighbors[3] = n3; dd->neighbors[4] = rank; dd->neighbors[5] = n5; dd->neighbors[6] = n6; dd->neighbors[7] = n7; dd->neighbors[8] = n8; if (stencil_type == DMDA_STENCIL_STAR) { /* save corner processor numbers */ sn0 = n0; sn2 = n2; sn6 = n6; sn8 = n8; n0 = n2 = n6 = n8 = -1; } ierr = PetscMalloc((Xe-Xs)*(Ye-Ys)*sizeof(PetscInt),&idx); CHKERRQ(ierr); ierr = PetscLogObjectMemory(da,(Xe-Xs)*(Ye-Ys)*sizeof(PetscInt)); CHKERRQ(ierr); nn = 0; xbase = bases[rank]; for (i=1; i<=s_y; i++) { if (n0 >= 0) { /* left below */ x_t = lx[n0 % m]; y_t = ly[(n0/m)]; s_t = bases[n0] + x_t*y_t - (s_y-i)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } if (n1 >= 0) { /* directly below */ x_t = x; y_t = ly[(n1/m)]; s_t = bases[n1] + x_t*y_t - (s_y+1-i)*x_t; for (j=0; j<x_t; j++) idx[nn++] = s_t++; } else if (by == DMDA_BOUNDARY_MIRROR) { for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(s_y - i + 1) + j; } if (n2 >= 0) { /* right below */ x_t = lx[n2 % m]; y_t = ly[(n2/m)]; s_t = bases[n2] + x_t*y_t - (s_y+1-i)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } } for (i=0; i<y; i++) { if (n3 >= 0) { /* directly left */ x_t = lx[n3 % m]; /* y_t = y; */ s_t = bases[n3] + (i+1)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (bx == DMDA_BOUNDARY_MIRROR) { for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*i + s_x - j; } for (j=0; j<x; j++) idx[nn++] = xbase++; /* interior */ if (n5 >= 0) { /* directly right */ x_t = lx[n5 % m]; /* y_t = y; */ s_t = bases[n5] + (i)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (bx == DMDA_BOUNDARY_MIRROR) { for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*(i + 1) - 2 - j; } } for (i=1; i<=s_y; i++) { if (n6 >= 0) { /* left above */ x_t = lx[n6 % m]; /* y_t = ly[(n6/m)]; */ s_t = bases[n6] + (i)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } if (n7 >= 0) { /* directly above */ x_t = x; /* y_t = ly[(n7/m)]; */ s_t = bases[n7] + (i-1)*x_t; for (j=0; j<x_t; j++) idx[nn++] = s_t++; } else if (by == DMDA_BOUNDARY_MIRROR) { for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(y - i - 1) + j; } if (n8 >= 0) { /* right above */ x_t = lx[n8 % m]; /* y_t = ly[(n8/m)]; */ s_t = bases[n8] + (i-1)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } } ierr = ISCreateBlock(comm,dof,nn,idx,PETSC_COPY_VALUES,&from); CHKERRQ(ierr); ierr = VecScatterCreate(global,from,local,to,>ol); CHKERRQ(ierr); ierr = PetscLogObjectParent(da,gtol); CHKERRQ(ierr); ierr = ISDestroy(&to); CHKERRQ(ierr); ierr = ISDestroy(&from); CHKERRQ(ierr); if (stencil_type == DMDA_STENCIL_STAR) { n0 = sn0; n2 = sn2; n6 = sn6; n8 = sn8; } if (((stencil_type == DMDA_STENCIL_STAR) || (bx && bx != DMDA_BOUNDARY_PERIODIC) || (by && by != DMDA_BOUNDARY_PERIODIC))) { /* Recompute the local to global mappings, this time keeping the information about the cross corner processor numbers and any ghosted but not periodic indices. */ nn = 0; xbase = bases[rank]; for (i=1; i<=s_y; i++) { if (n0 >= 0) { /* left below */ x_t = lx[n0 % m]; y_t = ly[(n0/m)]; s_t = bases[n0] + x_t*y_t - (s_y-i)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (xs-Xs > 0 && ys-Ys > 0) { for (j=0; j<s_x; j++) idx[nn++] = -1; } if (n1 >= 0) { /* directly below */ x_t = x; y_t = ly[(n1/m)]; s_t = bases[n1] + x_t*y_t - (s_y+1-i)*x_t; for (j=0; j<x_t; j++) idx[nn++] = s_t++; } else if (ys-Ys > 0) { if (by == DMDA_BOUNDARY_MIRROR) { for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(s_y - i + 1) + j; } else { for (j=0; j<x; j++) idx[nn++] = -1; } } if (n2 >= 0) { /* right below */ x_t = lx[n2 % m]; y_t = ly[(n2/m)]; s_t = bases[n2] + x_t*y_t - (s_y+1-i)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (Xe-xe> 0 && ys-Ys > 0) { for (j=0; j<s_x; j++) idx[nn++] = -1; } } for (i=0; i<y; i++) { if (n3 >= 0) { /* directly left */ x_t = lx[n3 % m]; /* y_t = y; */ s_t = bases[n3] + (i+1)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (xs-Xs > 0) { if (bx == DMDA_BOUNDARY_MIRROR) { for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*i + s_x - j; } else { for (j=0; j<s_x; j++) idx[nn++] = -1; } } for (j=0; j<x; j++) idx[nn++] = xbase++; /* interior */ if (n5 >= 0) { /* directly right */ x_t = lx[n5 % m]; /* y_t = y; */ s_t = bases[n5] + (i)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (Xe-xe > 0) { if (bx == DMDA_BOUNDARY_MIRROR) { for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*(i + 1) - 2 - j; } else { for (j=0; j<s_x; j++) idx[nn++] = -1; } } } for (i=1; i<=s_y; i++) { if (n6 >= 0) { /* left above */ x_t = lx[n6 % m]; /* y_t = ly[(n6/m)]; */ s_t = bases[n6] + (i)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (xs-Xs > 0 && Ye-ye > 0) { for (j=0; j<s_x; j++) idx[nn++] = -1; } if (n7 >= 0) { /* directly above */ x_t = x; /* y_t = ly[(n7/m)]; */ s_t = bases[n7] + (i-1)*x_t; for (j=0; j<x_t; j++) idx[nn++] = s_t++; } else if (Ye-ye > 0) { if (by == DMDA_BOUNDARY_MIRROR) { for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(y - i - 1) + j; } else { for (j=0; j<x; j++) idx[nn++] = -1; } } if (n8 >= 0) { /* right above */ x_t = lx[n8 % m]; /* y_t = ly[(n8/m)]; */ s_t = bases[n8] + (i-1)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (Xe-xe > 0 && Ye-ye > 0) { for (j=0; j<s_x; j++) idx[nn++] = -1; } } } /* Set the local to global ordering in the global vector, this allows use of VecSetValuesLocal(). */ ierr = ISCreateBlock(comm,dof,nn,idx,PETSC_OWN_POINTER,<ogis); CHKERRQ(ierr); ierr = PetscMalloc(nn*dof*sizeof(PetscInt),&idx_cpy); CHKERRQ(ierr); ierr = PetscLogObjectMemory(da,nn*dof*sizeof(PetscInt)); CHKERRQ(ierr); ierr = ISGetIndices(ltogis, &idx_full); CHKERRQ(ierr); ierr = PetscMemcpy(idx_cpy,idx_full,nn*dof*sizeof(PetscInt)); CHKERRQ(ierr); ierr = ISRestoreIndices(ltogis, &idx_full); CHKERRQ(ierr); ierr = ISLocalToGlobalMappingCreateIS(ltogis,&da->ltogmap); CHKERRQ(ierr); ierr = PetscLogObjectParent(da,da->ltogmap); CHKERRQ(ierr); ierr = ISDestroy(<ogis); CHKERRQ(ierr); ierr = ISLocalToGlobalMappingBlock(da->ltogmap,dd->w,&da->ltogmapb); CHKERRQ(ierr); ierr = PetscLogObjectParent(da,da->ltogmap); CHKERRQ(ierr); ierr = PetscFree2(bases,ldims); CHKERRQ(ierr); dd->m = m; dd->n = n; /* note petsc expects xs/xe/Xs/Xe to be multiplied by #dofs in many places */ dd->xs = xs*dof; dd->xe = xe*dof; dd->ys = ys; dd->ye = ye; dd->zs = 0; dd->ze = 1; dd->Xs = Xs*dof; dd->Xe = Xe*dof; dd->Ys = Ys; dd->Ye = Ye; dd->Zs = 0; dd->Ze = 1; ierr = VecDestroy(&local); CHKERRQ(ierr); ierr = VecDestroy(&global); CHKERRQ(ierr); dd->gtol = gtol; dd->ltog = ltog; dd->idx = idx_cpy; dd->Nl = nn*dof; dd->base = base; da->ops->view = DMView_DA_2d; dd->ltol = NULL; dd->ao = NULL; PetscFunctionReturn(0); }
PetscErrorCode vizGA2DA() { PetscErrorCode ierr; int rank; MPI_Comm_rank(PETSC_COMM_WORLD,&rank); int d1 = 40, d2 = 50; DA da; Vec vec; const PetscInt *lx, *ly, *lz; PetscInt m,n,p; DALocalInfo info; ierr = DACreate2d(PETSC_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_STAR, d1,d2,PETSC_DECIDE,PETSC_DECIDE,1,1,0,0, &da); CHKERRQ(ierr); ierr = DACreateGlobalVector(da, &vec); CHKERRQ(ierr); ierr = DAGetOwnershipRanges(da, &lx, &ly, &lz); CHKERRQ(ierr); ierr = DAGetLocalInfo(da,&info); CHKERRQ(ierr); ierr = DAGetInfo(da,0,0,0,0,&m,&n,&p,0,0,0,0); CHKERRQ(ierr); /**/ ierr = DAView(da, PETSC_VIEWER_STDOUT_WORLD); CHKERRQ(ierr); for (int i = 0; i < m; ++i) { PetscPrintf(PETSC_COMM_WORLD,"%d\tlx: %d\n",i,lx[i]); } for (int i = 0; i < n; ++i) { PetscPrintf(PETSC_COMM_WORLD,"%d\tly: %d\n",i,ly[i]); } /**/ int ga = GA_Create_handle(); int ndim = 2; int dims[2] = {d2,d1}; GA_Set_data(ga,2,dims,MT_DBL); int *map; PetscMalloc( sizeof(int)*(m+n), &map); map[0] = 0; for( int i = 1; i < n; i++ ) { map[i] = ly[i-1] + map[i-1]; } map[n] = 0; for( int i = n+1; i < m+n; i++ ) { map[i] = lx[i-n-1] + map[i-1]; } /* correct ordering, but nodeid's dont line up with mpi rank for petsc's da * DA: +---+---+ GA: +---+---+ * +-2-+-3-+ +-1-+-3-+ * +---+---+ +---+---+ * +-0-+-1-+ +-0-+-2-+ * +---+---+ +---+---+ int *map; PetscMalloc( sizeof(int)*(m+n), &map); map[0] = 0; for( int i = 1; i < m; i++ ) { map[i] = lx[i] + map[i-1]; } map[m] = 0; for( int i = m+1; i < m+n; i++ ) { map[i] = ly[i-m] + map[i-1]; } */ int block[2] = {n,m}; GA_Set_irreg_distr(ga,map,block); ierr = GA_Allocate( ga ); if( !ierr ) GA_Error("\n\n\nga allocaltion failed\n\n",ierr); if( !ga ) GA_Error("\n\n\n ga null \n\n",ierr); if( rank != GA_Nodeid() ) GA_Error("MPI rank does not match GA_Nodeid()",1); GA_Print_distribution(ga); int lo[2], hi[2]; NGA_Distribution(ga,rank,lo,hi); if( lo[1] != info.xs || hi[1] != info.xs+info.xm-1 || lo[0] != info.ys || hi[0] != info.ys+info.ym-1 ) { PetscSynchronizedPrintf(PETSC_COMM_SELF,"[%d] lo:(%2d,%2d) hi:(%2d,%2d) \t DA: (%2d,%2d), (%2d, %2d)\n", rank, lo[1], lo[0], hi[1], hi[0], info.xs, info.ys, info.xs+info.xm-1, info.ys+info.ym-1); } PetscBarrier(0); PetscSynchronizedFlush(PETSC_COMM_WORLD); AO ao; DAGetAO(da,&ao); if( rank == 0 ) { int *idx, len = d1*d2; PetscReal *val; PetscMalloc(sizeof(PetscReal)*len, &val); PetscMalloc(sizeof(int)*len, &idx); for (int j = 0; j < d2; ++j) { for (int i = 0; i < d1; ++i) { idx[i + d1*j] = i + d1*j; val[i + d1*j] = i + d1*j; } } AOApplicationToPetsc(ao,len,idx); VecSetValues(vec,len,idx,val,INSERT_VALUES); int a[2], b[2],ld[1]={0}; double c = 0; for (int j = 0; j < d2; ++j) { for (int i = 0; i < d1; ++i) { a[0] = j; a[1] = i; // printf("%5.0f ",c); NGA_Put(ga,a,a,&c,ld); c++; } } } // GA_Print(ga); VecAssemblyBegin(vec); VecAssemblyEnd(vec); int ld; double *ptr; NGA_Access(ga,lo,hi,&ptr,&ld); PetscReal **d; int c=0; ierr = DAVecGetArray(da,vec,&d); CHKERRQ(ierr); for (int j = info.ys; j < info.ys+info.ym; ++j) { for (int i = info.xs; i < info.xs+info.xm; ++i) { if( d[j][i] != ptr[(i-info.xs)+ld*(j-info.ys)] ) GA_Error("DA array is not equal to GA array",1); // printf("%d (%d,%d):\t%3.0f\t%3.0f\n", c, i, j, d[j][i], ptr[(i-info.xs)+ld*(j-info.ys)]); c++; } } ierr = DAVecRestoreArray(da,vec,&d); CHKERRQ(ierr); c=0; PetscReal *v; int start, end; VecGetOwnershipRange(vec, &start, &end); VecGetArray( vec, &v ); for( int i = start; i < end; i++) { // printf("%d:\t%3.0f\t%3.0f\t%s\n", start, v[i-start], ptr[i-start], (v[i-start]-ptr[i-start]==0?"":"NO") ); } VecRestoreArray( vec, &v ); NGA_Release_update(ga,lo,hi); Vec gada; VecCreateMPIWithArray(((PetscObject)da)->comm,da->Nlocal,PETSC_DETERMINE,ptr,&gada); VecView(gada,PETSC_VIEWER_STDOUT_SELF); GA_Destroy(ga); ierr = VecDestroy(vec); CHKERRQ(ierr); ierr = DADestroy(da); CHKERRQ(ierr); PetscFunctionReturn(0); }
/* A new Strategy can handle large size problem (more than 4G variables) */ int BackwardAverageRL(Vec *x, Vec *y, PetscInt *cacheInt, PetscScalar *cacheScalar, PetscInt n, PetscInt npt, PetscInt pmax, PetscInt Istart, PetscInt Iend,PetscScalar c){ PetscInt rank,size; PetscErrorCode ierr; PetscInt i, j, k=0, pi, pj, n2,n4 ,m, puse, pgrid,lx; PetscInt localsizex,localsizey, rowcount=0; PetscInt k1,k2,pgrid1,pgrid2; PetscInt *idy,*idp, *NzindJ; PetscScalar dx,dy,dx2,dy2,CX,CY; PetscScalar *pty, *pty0; IS isx1,isx2,isy1,isy2; VecScatter ctx1,ctx2; Vec y0; Vec x1,x2; PetscScalar *ptx1,*ptx2; PetscInt size1,size2,col1,col2; MPI_Comm_size(PETSC_COMM_WORLD,&size); MPI_Comm_rank(PETSC_COMM_WORLD,&rank); n2 = (PetscInt)(n*0.5); n4 = (PetscInt)(n*0.25); dx = 1.0/n; dy = 1.0/n; dx2 = dx/2-dx/1e6; dy2 = dy/2-dy/1e6; NzindJ = cacheInt; //pmax idp = cacheInt; //pmax idy = cacheInt + pmax; pty0 = cacheScalar ; //pmax localsizex = Iend-Istart; localsizey = (PetscInt)(pmax*1.0/(localsizex+1))-2; if(localsizey>n2){localsizey =n2;} ierr = VecGetArray(*x,&ptx1);CHKERRQ(ierr); ptx2 = ptx1; if(rank< size*0.5){lx = localsizex*n2;}else{lx =0;} VecCreateMPIWithArray(PETSC_COMM_WORLD,lx,PETSC_DETERMINE,ptx1,&x1); if(rank< size*0.5){lx = 0;}else{lx = localsizex*n2; } VecCreateMPIWithArray(PETSC_COMM_WORLD,lx,PETSC_DETERMINE,ptx2,&x2); VecGetSize(x1,&size1); VecGetSize(x2,&size2); col1 = (PetscInt)(size1*1.0/n2); col2 = (PetscInt)(size2*1.0/n2); ierr = VecGetArray(*y,&pty);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,(localsizex+1)*(localsizey+1),pty0,&y0); while(rowcount<n2){ if (n2-rowcount<=localsizey){localsizey =n2-rowcount;} puse = localsizex*localsizey; pgrid = (localsizex+1)*(localsizey+1); k= 0; k1=0; k2=0; for(i=Istart;i<Iend+1;i++){ for(j=rowcount;j<rowcount+localsizey+1;j++){ CX = (PetscScalar)(i*dx); CY = (PetscScalar)(j*dy); InverseStandardMap(&CX,&CY,c); pi = (PetscInt)floor(CX*n); pj = (PetscInt)floor(CY*n); if(pj>=n2) {SkewSymmetricPoint(&pi, &pj, n);} if(pi<col1){ *(NzindJ+k1) = (PetscInt)(n2*pi + pj); *(idy+k1) = k; k1++; }else{ *(NzindJ+pgrid-k2-1) = (PetscInt)(n2*(pi-col1)+pj); *(idy+pgrid-k2-1) = k; k2++; } k++; } } pgrid1 = k1; pgrid2 = k2; ierr = ISCreateGeneralWithArray(PETSC_COMM_SELF,pgrid1,NzindJ,&isx1);CHKERRQ(ierr); ierr = ISCreateGeneralWithArray(PETSC_COMM_SELF,pgrid2,NzindJ+pgrid1,&isx2);CHKERRQ(ierr); ierr = ISCreateGeneralWithArray(PETSC_COMM_SELF,pgrid1,idy,&isy1);CHKERRQ(ierr); ierr = ISCreateGeneralWithArray(PETSC_COMM_SELF,pgrid2,idy+pgrid1,&isy2);CHKERRQ(ierr); ierr = VecDestroy(y0);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,pgrid,pty0,&y0);CHKERRQ(ierr); ierr = VecScatterCreate(x1,isx1,y0,isy1,&ctx1);CHKERRQ(ierr); ierr = VecScatterCreate(x2,isx2,y0,isy2,&ctx2);CHKERRQ(ierr); ierr = VecScatterBegin(x1,y0,INSERT_VALUES,SCATTER_FORWARD,ctx1);CHKERRQ(ierr); ierr = VecScatterEnd(x1,y0,INSERT_VALUES,SCATTER_FORWARD,ctx1);CHKERRQ(ierr); ierr = VecScatterBegin(x2,y0,INSERT_VALUES,SCATTER_FORWARD,ctx2);CHKERRQ(ierr); ierr = VecScatterEnd(x2,y0,INSERT_VALUES,SCATTER_FORWARD,ctx2);CHKERRQ(ierr); ierr = VecScatterDestroy(ctx1); ierr = VecScatterDestroy(ctx2); ierr = VecGetArray(y0,&pty0);CHKERRQ(ierr); m = 0; for(i=0;i<localsizex;i++){ for(j=0;j<localsizey;j++){ *(pty+i*n2+j+rowcount) = (*(pty0+i*(localsizey+1)+j)+ *(pty0+i*(localsizey+1)+j+1)+ *(pty0+(i+1)*(localsizey+1)+j)+ *(pty0+(i+1)*(localsizey+1)+j+1))/4; m++; } } VecRestoreArray(y0,&pty0); VecRestoreArray(*y,&pty); rowcount = rowcount + localsizey; } VecDestroy(x1); VecDestroy(x2); return 0; }
PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) { Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); PetscErrorCode ierr; PetscInt i,j,*aj = B->j,ec = 0,*garray; PetscInt bs = mat->rmap->bs,*stmp; IS from,to; Vec gvec; #if defined(PETSC_USE_CTABLE) PetscTable gid1_lid1; PetscTablePosition tpos; PetscInt gid,lid; #else PetscInt Nbs = baij->Nbs,*indices; #endif PetscFunctionBegin; #if defined(PETSC_USE_CTABLE) /* use a table - Mark Adams */ ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr); for (i=0; i<B->mbs; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt data,gid1 = aj[B->i[i]+j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); if (!data) { /* one based table */ ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); } } } /* form array of columns we need */ ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); while (tpos) { ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); gid--; lid--; garray[lid] = gid; } ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); for (i=0; i<ec; i++) { ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); } /* compact out the extra columns in B */ for (i=0; i<B->mbs; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); lid--; aj[B->i[i]+j] = lid; } } B->nbs = ec; baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); #else /* Make an array as long as the number of columns */ /* mark those columns that are in baij->B */ ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i<B->mbs; i++) { for (j=0; j<B->ilen[i]; j++) { if (!indices[aj[B->i[i] + j]]) ec++; indices[aj[B->i[i] + j]] = 1; } } /* form array of columns we need */ ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); ec = 0; for (i=0; i<Nbs; i++) { if (indices[i]) { garray[ec++] = i; } } /* make indices now point into garray */ for (i=0; i<ec; i++) { indices[garray[i]] = i; } /* compact out the extra columns in B */ for (i=0; i<B->mbs; i++) { for (j=0; j<B->ilen[i]; j++) { aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; } } B->nbs = ec; baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); ierr = PetscFree(indices);CHKERRQ(ierr); #endif /* create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); /* create two temporary index sets for building scatter-gather */ ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); for (i=0; i<ec; i++) stmp[i] = i; ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); /* create temporary global vector to generate scatter context */ ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); baij->garray = garray; ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = VecDestroy(&gvec);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode PCSetUp_Redundant(PC pc) { PC_Redundant *red = (PC_Redundant*)pc->data; PetscErrorCode ierr; PetscInt mstart,mend,mlocal,M; PetscMPIInt size; MPI_Comm comm,subcomm; Vec x; const char *prefix; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */ ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); if (size == 1) red->useparallelmat = PETSC_FALSE; if (!pc->setupcalled) { PetscInt mloc_sub; if (!red->psubcomm) { ierr = PetscSubcommCreate(comm,&red->psubcomm);CHKERRQ(ierr); ierr = PetscSubcommSetNumber(red->psubcomm,red->nsubcomm);CHKERRQ(ierr); ierr = PetscSubcommSetType(red->psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); /* enable runtime switch of psubcomm type, e.g., '-psubcomm_type interlaced */ ierr = PetscSubcommSetFromOptions(red->psubcomm);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)pc,sizeof(PetscSubcomm));CHKERRQ(ierr); /* create a new PC that processors in each subcomm have copy of */ subcomm = PetscSubcommChild(red->psubcomm); ierr = KSPCreate(subcomm,&red->ksp);CHKERRQ(ierr); ierr = KSPSetErrorIfNotConverged(red->ksp,pc->erroriffailure);CHKERRQ(ierr); ierr = PetscObjectIncrementTabLevel((PetscObject)red->ksp,(PetscObject)pc,1);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)red->ksp);CHKERRQ(ierr); ierr = KSPSetType(red->ksp,KSPPREONLY);CHKERRQ(ierr); ierr = KSPGetPC(red->ksp,&red->pc);CHKERRQ(ierr); ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr); ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); ierr = KSPSetOptionsPrefix(red->ksp,prefix);CHKERRQ(ierr); ierr = KSPAppendOptionsPrefix(red->ksp,"redundant_");CHKERRQ(ierr); } else { subcomm = PetscSubcommChild(red->psubcomm); } if (red->useparallelmat) { /* grab the parallel matrix and put it into processors of a subcomminicator */ ierr = MatCreateRedundantMatrix(pc->pmat,red->psubcomm->n,subcomm,MAT_INITIAL_MATRIX,&red->pmats);CHKERRQ(ierr); ierr = KSPSetOperators(red->ksp,red->pmats,red->pmats);CHKERRQ(ierr); /* get working vectors xsub and ysub */ ierr = MatCreateVecs(red->pmats,&red->xsub,&red->ysub);CHKERRQ(ierr); /* create working vectors xdup and ydup. xdup concatenates all xsub's contigously to form a mpi vector over dupcomm (see PetscSubcommCreate_interlaced()) ydup concatenates all ysub and has empty local arrays because ysub's arrays will be place into it. Note: we use communicator dupcomm, not PetscObjectComm((PetscObject)pc)! */ ierr = MatGetLocalSize(red->pmats,&mloc_sub,NULL);CHKERRQ(ierr); ierr = VecCreateMPI(PetscSubcommContiguousParent(red->psubcomm),mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PetscSubcommContiguousParent(red->psubcomm),1,mloc_sub,PETSC_DECIDE,NULL,&red->ydup);CHKERRQ(ierr); /* create vecscatters */ if (!red->scatterin) { /* efficiency of scatterin is independent from psubcomm_type! */ IS is1,is2; PetscInt *idx1,*idx2,i,j,k; ierr = MatCreateVecs(pc->pmat,&x,0);CHKERRQ(ierr); ierr = VecGetSize(x,&M);CHKERRQ(ierr); ierr = VecGetOwnershipRange(x,&mstart,&mend);CHKERRQ(ierr); mlocal = mend - mstart; ierr = PetscMalloc2(red->psubcomm->n*mlocal,&idx1,red->psubcomm->n*mlocal,&idx2);CHKERRQ(ierr); j = 0; for (k=0; k<red->psubcomm->n; k++) { for (i=mstart; i<mend; i++) { idx1[j] = i; idx2[j++] = i + M*k; } } ierr = ISCreateGeneral(comm,red->psubcomm->n*mlocal,idx1,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr); ierr = ISCreateGeneral(comm,red->psubcomm->n*mlocal,idx2,PETSC_COPY_VALUES,&is2);CHKERRQ(ierr); ierr = VecScatterCreate(x,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr); ierr = ISDestroy(&is1);CHKERRQ(ierr); ierr = ISDestroy(&is2);CHKERRQ(ierr); /* Impl below is good for PETSC_SUBCOMM_INTERLACED (no inter-process communication) and PETSC_SUBCOMM_CONTIGUOUS (communication within subcomm) */ ierr = ISCreateStride(comm,mlocal,mstart+ red->psubcomm->color*M,1,&is1);CHKERRQ(ierr); ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr); ierr = VecScatterCreate(red->xdup,is1,x,is2,&red->scatterout);CHKERRQ(ierr); ierr = ISDestroy(&is1);CHKERRQ(ierr); ierr = ISDestroy(&is2);CHKERRQ(ierr); ierr = PetscFree2(idx1,idx2);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); } } else { /* !red->useparallelmat */ ierr = KSPSetOperators(red->ksp,pc->mat,pc->pmat);CHKERRQ(ierr); } } else { /* pc->setupcalled */ if (red->useparallelmat) { MatReuse reuse; /* grab the parallel matrix and put it into processors of a subcomminicator */ /*--------------------------------------------------------------------------*/ if (pc->flag == DIFFERENT_NONZERO_PATTERN) { /* destroy old matrices */ ierr = MatDestroy(&red->pmats);CHKERRQ(ierr); reuse = MAT_INITIAL_MATRIX; } else { reuse = MAT_REUSE_MATRIX; } ierr = MatCreateRedundantMatrix(pc->pmat,red->psubcomm->n,PetscSubcommChild(red->psubcomm),reuse,&red->pmats);CHKERRQ(ierr); ierr = KSPSetOperators(red->ksp,red->pmats,red->pmats);CHKERRQ(ierr); } else { /* !red->useparallelmat */ ierr = KSPSetOperators(red->ksp,pc->mat,pc->pmat);CHKERRQ(ierr); } } if (pc->setfromoptionscalled) { ierr = KSPSetFromOptions(red->ksp);CHKERRQ(ierr); } ierr = KSPSetUp(red->ksp);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc,char **args) { KSP subksp; Mat A,subA; Vec x,b,u,subb,subx,subu; PetscViewer fd; char file[PETSC_MAX_PATH_LEN]; PetscBool flg; PetscErrorCode ierr; PetscInt i,m,n,its; PetscReal norm; PetscMPIInt rank,size; MPI_Comm comm,subcomm; PetscSubcomm psubcomm; PetscInt nsubcomm=1,id; PetscScalar *barray,*xarray,*uarray,*array,one=1.0; PetscInt type=1; PetscInitialize(&argc,&args,(char*)0,help); /* Load the matrix */ ierr = PetscOptionsGetString(NULL,"-f",file,PETSC_MAX_PATH_LEN,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_WORLD,1,"Must indicate binary file with the -f option"); ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,file,FILE_MODE_READ,&fd);CHKERRQ(ierr); /* Load the matrix; then destroy the viewer.*/ ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatLoad(A,fd);CHKERRQ(ierr); ierr = PetscViewerDestroy(&fd);CHKERRQ(ierr); ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); /* Create rhs vector b */ ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&b);CHKERRQ(ierr); ierr = VecSetSizes(b,m,PETSC_DECIDE);CHKERRQ(ierr); ierr = VecSetFromOptions(b);CHKERRQ(ierr); ierr = VecSet(b,one);CHKERRQ(ierr); ierr = VecDuplicate(b,&x);CHKERRQ(ierr); ierr = VecDuplicate(b,&u);CHKERRQ(ierr); ierr = VecSet(x,0.0);CHKERRQ(ierr); /* Test MatGetMultiProcBlock() */ ierr = PetscOptionsGetInt(NULL,"-nsubcomm",&nsubcomm,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,"-subcomm_type",&type,NULL);CHKERRQ(ierr); ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); if (type == PETSC_SUBCOMM_GENERAL) { /* user provides color, subrank and duprank */ PetscMPIInt color,subrank,duprank,subsize; duprank = size-1 - rank; subsize = size/nsubcomm; if (subsize*nsubcomm != size) SETERRQ2(comm,PETSC_ERR_SUP,"This example requires nsubcomm %D divides nproc %D",nsubcomm,size); color = duprank/subsize; subrank = duprank - color*subsize; ierr = PetscSubcommSetTypeGeneral(psubcomm,color,subrank,duprank);CHKERRQ(ierr); } else if (type == PETSC_SUBCOMM_CONTIGUOUS) { ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); } else if (type == PETSC_SUBCOMM_INTERLACED) { ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_INTERLACED);CHKERRQ(ierr); } else SETERRQ1(psubcomm->parent,PETSC_ERR_SUP,"PetscSubcommType %D is not supported yet",type); subcomm = psubcomm->comm; ierr = PetscOptionsHasName(NULL, "-subcomm_view", &flg);CHKERRQ(ierr); if (flg) { PetscMPIInt subsize,subrank,duprank; ierr = MPI_Comm_size((MPI_Comm)subcomm,&subsize);CHKERRQ(ierr); ierr = MPI_Comm_rank((MPI_Comm)subcomm,&subrank);CHKERRQ(ierr); ierr = MPI_Comm_rank((MPI_Comm)psubcomm->dupparent,&duprank);CHKERRQ(ierr); ierr = PetscSynchronizedPrintf(comm,"[%D], color %D, sub-size %D, sub-rank %D, duprank %D\n",rank,psubcomm->color,subsize,subrank,duprank); ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); } /* Create subA */ ierr = MatGetMultiProcBlock(A,subcomm,MAT_INITIAL_MATRIX,&subA);CHKERRQ(ierr); /* Create sub vectors without arrays. Place b's and x's local arrays into subb and subx */ ierr = MatGetLocalSize(subA,&m,&n);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(subcomm,1,m,PETSC_DECIDE,NULL,&subb);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(subcomm,1,n,PETSC_DECIDE,NULL,&subx);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(subcomm,1,n,PETSC_DECIDE,NULL,&subu);CHKERRQ(ierr); ierr = VecGetArray(b,&barray);CHKERRQ(ierr); ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); ierr = VecGetArray(u,&uarray);CHKERRQ(ierr); ierr = VecPlaceArray(subb,barray);CHKERRQ(ierr); ierr = VecPlaceArray(subx,xarray);CHKERRQ(ierr); ierr = VecPlaceArray(subu,uarray);CHKERRQ(ierr); /* Create linear solvers associated with subA */ ierr = KSPCreate(subcomm,&subksp);CHKERRQ(ierr); ierr = KSPSetOperators(subksp,subA,subA,SAME_NONZERO_PATTERN);CHKERRQ(ierr); ierr = KSPSetFromOptions(subksp);CHKERRQ(ierr); /* Solve sub systems */ ierr = KSPSolve(subksp,subb,subx);CHKERRQ(ierr); ierr = KSPGetIterationNumber(subksp,&its);CHKERRQ(ierr); /* check residual */ ierr = MatMult(subA,subx,subu);CHKERRQ(ierr); ierr = VecAXPY(subu,-1.0,subb);CHKERRQ(ierr); ierr = VecNorm(u,NORM_2,&norm);CHKERRQ(ierr); if (norm > 1.e-4 && !rank) { ierr = PetscPrintf(PETSC_COMM_WORLD,"[%D] Number of iterations = %3D\n",rank,its);CHKERRQ(ierr); printf("Error: Residual norm of each block |subb - subA*subx |= %G\n",norm); } ierr = VecResetArray(subb);CHKERRQ(ierr); ierr = VecResetArray(subx);CHKERRQ(ierr); ierr = VecResetArray(subu);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,"-subvec_view",&id,&flg);CHKERRQ(ierr); if (flg && rank == id) { ierr = PetscPrintf(PETSC_COMM_SELF,"[%D] subb:\n", rank); ierr = VecGetArray(subb,&array);CHKERRQ(ierr); for (i=0; i<m; i++) printf("%G\n",PetscRealPart(array[i])); ierr = VecRestoreArray(subb,&array);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF,"[%D] subx:\n", rank); ierr = VecGetArray(subx,&array);CHKERRQ(ierr); for (i=0; i<m; i++) printf("%G\n",PetscRealPart(array[i])); ierr = VecRestoreArray(subx,&array);CHKERRQ(ierr); } ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr); ierr = VecRestoreArray(u,&uarray);CHKERRQ(ierr); ierr = MatDestroy(&subA);CHKERRQ(ierr); ierr = VecDestroy(&subb);CHKERRQ(ierr); ierr = VecDestroy(&subx);CHKERRQ(ierr); ierr = VecDestroy(&subu);CHKERRQ(ierr); ierr = KSPDestroy(&subksp);CHKERRQ(ierr); ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
int main(int argc,char **args) { const ptrdiff_t N0=2056,N1=2056; fftw_plan bplan,fplan; fftw_complex *out; double *in1,*in2; ptrdiff_t alloc_local,local_n0,local_0_start; ptrdiff_t local_n1,local_1_start; PetscInt i,j; PetscMPIInt size,rank; int n,N,N_factor,NM; PetscScalar one=2.0,zero=0.5; PetscScalar two=4.0,three=8.0,four=16.0; PetscScalar a,*x_arr,*y_arr,*z_arr; PetscReal enorm; Vec fin,fout,fout1; Vec ini,final; PetscRandom rnd; PetscErrorCode ierr; PetscInt *indx3,tempindx,low,*indx4,tempindx1; ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rnd);CHKERRQ(ierr); alloc_local = fftw_mpi_local_size_2d_transposed(N0,N1/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); #if defined(DEBUGGING) printf("The value alloc_local is %ld from process %d\n",alloc_local,rank); printf("The value local_n0 is %ld from process %d\n",local_n0,rank); printf("The value local_0_start is %ld from process %d\n",local_0_start,rank); /* printf("The value local_n1 is %ld from process %d\n",local_n1,rank); */ /* printf("The value local_1_start is %ld from process %d\n",local_1_start,rank); */ /* printf("The value local_n0 is %ld from process %d\n",local_n0,rank); */ #endif /* Allocate space for input and output arrays */ in1=(double*)fftw_malloc(sizeof(double)*alloc_local*2); in2=(double*)fftw_malloc(sizeof(double)*alloc_local*2); out=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); N = 2*N0*(N1/2+1); N_factor = N0*N1; n = 2*local_n0*(N1/2+1); /* printf("The value N is %d from process %d\n",N,rank); */ /* printf("The value n is %d from process %d\n",n,rank); */ /* printf("The value n1 is %d from process %d\n",n1,rank);*/ /* Creating data vector and accompanying array with VeccreateMPIWithArray */ ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)in1,&fin);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)out,&fout);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)in2,&fout1);CHKERRQ(ierr); /* Set the vector with random data */ ierr = VecSet(fin,zero);CHKERRQ(ierr); /* for (i=0;i<N0*N1;i++) */ /* { */ /* VecSetValues(fin,1,&i,&one,INSERT_VALUES); */ /* } */ /* VecSet(fin,one); */ i =0; ierr = VecSetValues(fin,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr); i =1; ierr = VecSetValues(fin,1,&i,&two,INSERT_VALUES);CHKERRQ(ierr); i =4; ierr = VecSetValues(fin,1,&i,&three,INSERT_VALUES);CHKERRQ(ierr); i =5; ierr = VecSetValues(fin,1,&i,&four,INSERT_VALUES);CHKERRQ(ierr); ierr = VecAssemblyBegin(fin);CHKERRQ(ierr); ierr = VecAssemblyEnd(fin);CHKERRQ(ierr); ierr = VecSet(fout,zero);CHKERRQ(ierr); ierr = VecSet(fout1,zero);CHKERRQ(ierr); /* Get the meaningful portion of array */ ierr = VecGetArray(fin,&x_arr);CHKERRQ(ierr); ierr = VecGetArray(fout1,&z_arr);CHKERRQ(ierr); ierr = VecGetArray(fout,&y_arr);CHKERRQ(ierr); fplan=fftw_mpi_plan_dft_r2c_2d(N0,N1,(double*)x_arr,(fftw_complex*)y_arr,PETSC_COMM_WORLD,FFTW_ESTIMATE); bplan=fftw_mpi_plan_dft_c2r_2d(N0,N1,(fftw_complex*)y_arr,(double*)z_arr,PETSC_COMM_WORLD,FFTW_ESTIMATE); fftw_execute(fplan); fftw_execute(bplan); ierr = VecRestoreArray(fin,&x_arr); ierr = VecRestoreArray(fout1,&z_arr); ierr = VecRestoreArray(fout,&y_arr); /* VecView(fin,PETSC_VIEWER_STDOUT_WORLD); */ ierr = VecCreate(PETSC_COMM_WORLD,&ini);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&final);CHKERRQ(ierr); ierr = VecSetSizes(ini,local_n0*N1,N0*N1);CHKERRQ(ierr); ierr = VecSetSizes(final,local_n0*N1,N0*N1);CHKERRQ(ierr); ierr = VecSetFromOptions(ini);CHKERRQ(ierr); ierr = VecSetFromOptions(final);CHKERRQ(ierr); if (N1%2==0) { NM = N1+2; } else { NM = N1+1; } /*printf("The Value of NM is %d",NM); */ ierr = VecGetOwnershipRange(fin,&low,NULL); /*printf("The local index is %d from %d\n",low,rank); */ ierr = PetscMalloc1(local_n0*N1,&indx3); ierr = PetscMalloc1(local_n0*N1,&indx4); for (i=0;i<local_n0;i++) { for (j=0;j<N1;j++) { tempindx = i*N1 + j; tempindx1 = i*NM + j; indx3[tempindx]=local_0_start*N1+tempindx; indx4[tempindx]=low+tempindx1; /* printf("index3 %d from proc %d is \n",indx3[tempindx],rank); */ /* printf("index4 %d from proc %d is \n",indx4[tempindx],rank); */ } } ierr = PetscMalloc2(local_n0*N1,&x_arr,local_n0*N1,&y_arr);CHKERRQ(ierr); /* arr must be allocated for VecGetValues() */ ierr = VecGetValues(fin,local_n0*N1,indx4,(PetscScalar*)x_arr);CHKERRQ(ierr); ierr = VecSetValues(ini,local_n0*N1,indx3,x_arr,INSERT_VALUES);CHKERRQ(ierr); ierr = VecAssemblyBegin(ini);CHKERRQ(ierr); ierr = VecAssemblyEnd(ini);CHKERRQ(ierr); ierr = VecGetValues(fout1,local_n0*N1,indx4,y_arr); ierr = VecSetValues(final,local_n0*N1,indx3,y_arr,INSERT_VALUES); ierr = VecAssemblyBegin(final); ierr = VecAssemblyEnd(final); ierr = PetscFree2(x_arr,y_arr);CHKERRQ(ierr); /* VecScatter vecscat; IS indx1,indx2; for (i=0;i<N0;i++) { indx = i*NM; ISCreateStride(PETSC_COMM_WORLD,N1,indx,1,&indx1); indx = i*N1; ISCreateStride(PETSC_COMM_WORLD,N1,indx,1,&indx2); VecScatterCreate(fin,indx1,ini,indx2,&vecscat); VecScatterBegin(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); VecScatterEnd(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); VecScatterBegin(vecscat,fout1,final,INSERT_VALUES,SCATTER_FORWARD); VecScatterEnd(vecscat,fout1,final,INSERT_VALUES,SCATTER_FORWARD); } */ a = 1.0/(PetscReal)N_factor; ierr = VecScale(fout1,a);CHKERRQ(ierr); ierr = VecScale(final,a);CHKERRQ(ierr); /* VecView(ini,PETSC_VIEWER_STDOUT_WORLD); */ /* VecView(final,PETSC_VIEWER_STDOUT_WORLD); */ ierr = VecAXPY(final,-1.0,ini);CHKERRQ(ierr); ierr = VecNorm(final,NORM_1,&enorm);CHKERRQ(ierr); if (enorm > 1.e-10) { ierr = PetscPrintf(PETSC_COMM_WORLD," Error norm of |x - z| = %e\n",enorm);CHKERRQ(ierr); } /* Execute fftw with function fftw_execute and destory it after execution */ fftw_destroy_plan(fplan); fftw_destroy_plan(bplan); fftw_free(in1); ierr = VecDestroy(&fin);CHKERRQ(ierr); fftw_free(out); ierr = VecDestroy(&fout);CHKERRQ(ierr); fftw_free(in2); ierr = VecDestroy(&fout1);CHKERRQ(ierr); ierr = VecDestroy(&ini);CHKERRQ(ierr); ierr = VecDestroy(&final);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rnd);CHKERRQ(ierr); ierr = PetscFree(indx3);CHKERRQ(ierr); ierr = PetscFree(indx4);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
PETSC_INTERN PetscErrorCode DMSetUp_Stag_1d(DM dm) { PetscErrorCode ierr; DM_Stag * const stag = (DM_Stag*)dm->data; PetscMPIInt size,rank; MPI_Comm comm; PetscInt j; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); /* Check Global size */ if (stag->N[0] < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Global grid size of %D < 1 specified",stag->N[0]); /* Local sizes */ if (stag->N[0] < size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"More ranks (%d) than elements (%D) specified",size,stag->N[0]); if (!stag->l[0]) { /* Divide equally, giving an extra elements to higher ranks */ ierr = PetscMalloc1(stag->nRanks[0],&stag->l[0]);CHKERRQ(ierr); for (j=0; j<stag->nRanks[0]; ++j) stag->l[0][j] = stag->N[0]/stag->nRanks[0] + (stag->N[0] % stag->nRanks[0] > j ? 1 : 0); } { PetscInt Nchk = 0; for (j=0; j<size; ++j) Nchk += stag->l[0][j]; if (Nchk != stag->N[0]) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Sum of specified local sizes (%D) is not equal to global size (%D)",Nchk,stag->N[0]); } stag->n[0] = stag->l[0][rank]; /* Rank (trivial in 1d) */ stag->rank[0] = rank; stag->firstRank[0] = (PetscBool)(rank == 0); stag->lastRank[0] = (PetscBool)(rank == size-1); /* Local (unghosted) numbers of entries */ stag->entriesPerElement = stag->dof[0] + stag->dof[1]; switch (stag->boundaryType[0]) { case DM_BOUNDARY_NONE: case DM_BOUNDARY_GHOSTED: stag->entries = stag->n[0] * stag->entriesPerElement + (stag->lastRank[0] ? stag->dof[0] : 0); break; case DM_BOUNDARY_PERIODIC: stag->entries = stag->n[0] * stag->entriesPerElement; break; default: SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unsupported x boundary type %s",DMBoundaryTypes[stag->boundaryType[0]]); } /* Starting element */ stag->start[0] = 0; for(j=0; j<stag->rank[0]; ++j) stag->start[0] += stag->l[0][j]; /* Local/ghosted size and starting element */ switch (stag->boundaryType[0]) { case DM_BOUNDARY_NONE : switch (stag->stencilType) { case DMSTAG_STENCIL_NONE : /* Only dummy cells on the right */ stag->startGhost[0] = stag->start[0]; stag->nGhost[0] = stag->n[0] + (stag->lastRank[0] ? 1 : 0); break; case DMSTAG_STENCIL_STAR : case DMSTAG_STENCIL_BOX : stag->startGhost[0] = stag->firstRank[0] ? stag->start[0]: stag->start[0] - stag->stencilWidth; stag->nGhost[0] = stag->n[0]; stag->nGhost[0] += stag->firstRank[0] ? 0 : stag->stencilWidth; stag->nGhost[0] += stag->lastRank[0] ? 1 : stag->stencilWidth; break; default : SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unrecognized ghost stencil type %d",stag->stencilType); } break; case DM_BOUNDARY_GHOSTED: case DM_BOUNDARY_PERIODIC: switch (stag->stencilType) { case DMSTAG_STENCIL_NONE : stag->startGhost[0] = stag->start[0]; stag->nGhost[0] = stag->n[0]; break; case DMSTAG_STENCIL_STAR : case DMSTAG_STENCIL_BOX : stag->startGhost[0] = stag->start[0] - stag->stencilWidth; /* Note that this value may be negative */ stag->nGhost[0] = stag->n[0] + 2*stag->stencilWidth; break; default : SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unrecognized ghost stencil type %d",stag->stencilType); } break; default : SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unsupported x boundary type %s",DMBoundaryTypes[stag->boundaryType[0]]); } /* Total size of ghosted/local represention */ stag->entriesGhost = stag->nGhost[0]*stag->entriesPerElement; /* Define neighbors */ ierr = PetscMalloc1(3,&stag->neighbors);CHKERRQ(ierr); if (stag->firstRank[0]) { switch (stag->boundaryType[0]) { case DM_BOUNDARY_GHOSTED: case DM_BOUNDARY_NONE: stag->neighbors[0] = -1; break; case DM_BOUNDARY_PERIODIC: stag->neighbors[0] = stag->nRanks[0]-1; break; default : SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unsupported x boundary type %s",DMBoundaryTypes[stag->boundaryType[0]]); } } else { stag->neighbors[0] = stag->rank[0]-1; } stag->neighbors[1] = stag->rank[0]; if (stag->lastRank[0]) { switch (stag->boundaryType[0]) { case DM_BOUNDARY_GHOSTED: case DM_BOUNDARY_NONE: stag->neighbors[2] = -1; break; case DM_BOUNDARY_PERIODIC: stag->neighbors[2] = 0; break; default : SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unsupported x boundary type %s",DMBoundaryTypes[stag->boundaryType[0]]); } } else { stag->neighbors[2] = stag->rank[0]+1; } if (stag->n[0] < stag->stencilWidth) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"DMStag 1d setup does not support local sizes (%d) smaller than the elementwise stencil width (%d)",stag->n[0],stag->stencilWidth); } /* Create global->local VecScatter and ISLocalToGlobalMapping */ { PetscInt *idxLocal,*idxGlobal,*idxGlobalAll; PetscInt i,iLocal,d,entriesToTransferTotal,ghostOffsetStart,ghostOffsetEnd,nNonDummyGhost; IS isLocal,isGlobal; /* The offset on the right (may not be equal to the stencil width, as we always have at least one ghost element, to account for the boundary point, and may with ghosted boundaries), and the number of non-dummy ghost elements */ ghostOffsetStart = stag->start[0] - stag->startGhost[0]; ghostOffsetEnd = stag->startGhost[0]+stag->nGhost[0] - (stag->start[0]+stag->n[0]); nNonDummyGhost = stag->nGhost[0] - (stag->lastRank[0] ? ghostOffsetEnd : 0) - (stag->firstRank[0] ? ghostOffsetStart : 0); /* Compute the number of non-dummy entries in the local representation This is equal to the number of non-dummy elements in the local (ghosted) representation, plus some extra entries on the right boundary on the last rank*/ switch (stag->boundaryType[0]) { case DM_BOUNDARY_GHOSTED: case DM_BOUNDARY_NONE: entriesToTransferTotal = nNonDummyGhost * stag->entriesPerElement + (stag->lastRank[0] ? stag->dof[0] : 0); break; case DM_BOUNDARY_PERIODIC: entriesToTransferTotal = stag->entriesGhost; /* No dummy points */ break; default : SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unsupported x boundary type %s",DMBoundaryTypes[stag->boundaryType[0]]); } ierr = PetscMalloc1(entriesToTransferTotal,&idxLocal);CHKERRQ(ierr); ierr = PetscMalloc1(entriesToTransferTotal,&idxGlobal);CHKERRQ(ierr); ierr = PetscMalloc1(stag->entriesGhost,&idxGlobalAll);CHKERRQ(ierr); if (stag->boundaryType[0] == DM_BOUNDARY_NONE) { PetscInt count = 0,countAll = 0; /* Left ghost points and native points */ for (i=stag->startGhost[0], iLocal=0; iLocal<nNonDummyGhost; ++i,++iLocal) { for (d=0; d<stag->entriesPerElement; ++d,++count,++countAll) { idxLocal [count] = iLocal * stag->entriesPerElement + d; idxGlobal[count] = i * stag->entriesPerElement + d; idxGlobalAll[countAll] = i * stag->entriesPerElement + d; } } /* Ghost points on the right Special case for last (partial dummy) element on the last rank */ if (stag->lastRank[0] ) { i = stag->N[0]; iLocal = (stag->nGhost[0]-ghostOffsetEnd); /* Only vertex (0-cell) dofs in global representation */ for (d=0; d<stag->dof[0]; ++d,++count,++countAll) { idxGlobal[count] = i * stag->entriesPerElement + d; idxLocal [count] = iLocal * stag->entriesPerElement + d; idxGlobalAll[countAll] = i * stag->entriesPerElement + d; } for (d=stag->dof[0]; d<stag->entriesPerElement; ++d,++countAll) { /* Additional dummy entries */ idxGlobalAll[countAll] = -1; } } } else if (stag->boundaryType[0] == DM_BOUNDARY_PERIODIC) { PetscInt count = 0,iLocal = 0; /* No dummy points, so idxGlobal and idxGlobalAll are identical */ const PetscInt iMin = stag->firstRank[0] ? stag->start[0] : stag->startGhost[0]; const PetscInt iMax = stag->lastRank[0] ? stag->startGhost[0] + stag->nGhost[0] - stag->stencilWidth : stag->startGhost[0] + stag->nGhost[0]; /* Ghost points on the left */ if (stag->firstRank[0]) { for (i=stag->N[0]-stag->stencilWidth; iLocal<stag->stencilWidth; ++i,++iLocal) { for (d=0; d<stag->entriesPerElement; ++d,++count) { idxGlobal[count] = i * stag->entriesPerElement + d; idxLocal [count] = iLocal * stag->entriesPerElement + d; idxGlobalAll[count] = idxGlobal[count]; } } } /* Native points */ for (i=iMin; i<iMax; ++i,++iLocal) { for (d=0; d<stag->entriesPerElement; ++d,++count) { idxGlobal[count] = i * stag->entriesPerElement + d; idxLocal [count] = iLocal * stag->entriesPerElement + d; idxGlobalAll[count] = idxGlobal[count]; } } /* Ghost points on the right */ if (stag->lastRank[0]) { for (i=0; iLocal<stag->nGhost[0]; ++i,++iLocal) { for (d=0; d<stag->entriesPerElement; ++d,++count) { idxGlobal[count] = i * stag->entriesPerElement + d; idxLocal [count] = iLocal * stag->entriesPerElement + d; idxGlobalAll[count] = idxGlobal[count]; } } } } else if (stag->boundaryType[0] == DM_BOUNDARY_GHOSTED) { PetscInt count = 0,countAll = 0; /* Dummy elements on the left, on the first rank */ if (stag->firstRank[0]) { for(iLocal=0; iLocal<ghostOffsetStart; ++iLocal) { /* Complete elements full of dummy entries */ for (d=0; d<stag->entriesPerElement; ++d,++countAll) { idxGlobalAll[countAll] = -1; } } i = 0; /* nonDummy entries start with global entry 0 */ } else { /* nonDummy entries start as usual */ i = stag->startGhost[0]; iLocal = 0; } /* non-Dummy entries */ { PetscInt iLocalNonDummyMax = stag->firstRank[0] ? nNonDummyGhost + ghostOffsetStart : nNonDummyGhost; for (; iLocal<iLocalNonDummyMax; ++i,++iLocal) { for (d=0; d<stag->entriesPerElement; ++d,++count,++countAll) { idxLocal [count] = iLocal * stag->entriesPerElement + d; idxGlobal[count] = i * stag->entriesPerElement + d; idxGlobalAll[countAll] = i * stag->entriesPerElement + d; } } } /* (partial) dummy elements on the right, on the last rank */ if (stag->lastRank[0]) { /* First one is partial dummy */ i = stag->N[0]; iLocal = (stag->nGhost[0]-ghostOffsetEnd); for (d=0; d<stag->dof[0]; ++d,++count,++countAll) { /* Only vertex (0-cell) dofs in global representation */ idxLocal [count] = iLocal * stag->entriesPerElement + d; idxGlobal[count] = i * stag->entriesPerElement + d; idxGlobalAll[countAll] = i * stag->entriesPerElement + d; } for (d=stag->dof[0]; d<stag->entriesPerElement; ++d,++countAll) { /* Additional dummy entries */ idxGlobalAll[countAll] = -1; } for (iLocal = stag->nGhost[0] - ghostOffsetEnd + 1; iLocal < stag->nGhost[0]; ++iLocal) { /* Additional dummy elements */ for (d=0; d<stag->entriesPerElement; ++d,++countAll) { idxGlobalAll[countAll] = -1; } } } } else SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unsupported x boundary type %s",DMBoundaryTypes[stag->boundaryType[0]]); /* Create Local IS (transferring pointer ownership) */ ierr = ISCreateGeneral(PetscObjectComm((PetscObject)dm),entriesToTransferTotal,idxLocal,PETSC_OWN_POINTER,&isLocal);CHKERRQ(ierr); /* Create Global IS (transferring pointer ownership) */ ierr = ISCreateGeneral(PetscObjectComm((PetscObject)dm),entriesToTransferTotal,idxGlobal,PETSC_OWN_POINTER,&isGlobal);CHKERRQ(ierr); /* Create stag->gtol, which doesn't include dummy entries */ { Vec local,global; ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)dm),1,stag->entries,PETSC_DECIDE,NULL,&global);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,stag->entriesPerElement,stag->entriesGhost,NULL,&local);CHKERRQ(ierr); ierr = VecScatterCreateWithData(global,isGlobal,local,isLocal,&stag->gtol);CHKERRQ(ierr); ierr = VecDestroy(&global);CHKERRQ(ierr); ierr = VecDestroy(&local);CHKERRQ(ierr); } /* Destroy ISs */ ierr = ISDestroy(&isLocal);CHKERRQ(ierr); ierr = ISDestroy(&isGlobal);CHKERRQ(ierr); /* Create local-to-global map (transferring pointer ownership) */ ierr = ISLocalToGlobalMappingCreate(comm,1,stag->entriesGhost,idxGlobalAll,PETSC_OWN_POINTER,&dm->ltogmap);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)dm,(PetscObject)dm->ltogmap);CHKERRQ(ierr); } /* Precompute location offsets */ ierr = DMStagComputeLocationOffsets_1d(dm);CHKERRQ(ierr); /* View from Options */ ierr = DMViewFromOptions(dm,NULL,"-dm_view");CHKERRQ(ierr); PetscFunctionReturn(0); }