PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz) { Mat_BlockMat *bmat = (Mat_BlockMat*)A->data; PetscErrorCode ierr; PetscInt i; PetscFunctionBegin; ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); ierr = PetscLayoutGetBlockSize(A->rmap,&bs);CHKERRQ(ierr); if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); if (nnz) { for (i=0; i<A->rmap->n/bs; i++) { if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); if (nnz[i] > A->cmap->n/bs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %d value %d rowlength %d",i,nnz[i],A->cmap->n/bs); } } bmat->mbs = A->rmap->n/bs; ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->right);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->middle);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left);CHKERRQ(ierr); if (!bmat->imax) { ierr = PetscMalloc2(A->rmap->n,&bmat->imax,A->rmap->n,&bmat->ilen);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)A,2*A->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); } if (nnz) { nz = 0; for (i=0; i<A->rmap->n/A->rmap->bs; i++) { bmat->imax[i] = nnz[i]; nz += nnz[i]; } } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently requires block row by row preallocation"); /* bmat->ilen will count nonzeros in each row so far. */ for (i=0; i<bmat->mbs; i++) bmat->ilen[i] = 0; /* allocate the matrix space */ ierr = MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);CHKERRQ(ierr); ierr = PetscMalloc3(nz,&bmat->a,nz,&bmat->j,A->rmap->n+1,&bmat->i);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); bmat->i[0] = 0; for (i=1; i<bmat->mbs+1; i++) { bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1]; } bmat->singlemalloc = PETSC_TRUE; bmat->free_a = PETSC_TRUE; bmat->free_ij = PETSC_TRUE; bmat->nz = 0; bmat->maxnz = nz; A->info.nz_unneeded = (double)bmat->maxnz; ierr = MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); PetscFunctionReturn(0); }
PETSC_EXTERN PetscErrorCode BVCreate_Mat(BV bv) { PetscErrorCode ierr; BV_MAT *ctx; PetscInt nloc,bs; PetscBool seq; char str[50]; PetscFunctionBegin; ierr = PetscNewLog(bv,&ctx);CHKERRQ(ierr); bv->data = (void*)ctx; ierr = PetscObjectTypeCompare((PetscObject)bv->t,VECMPI,&ctx->mpi);CHKERRQ(ierr); if (!ctx->mpi) { ierr = PetscObjectTypeCompare((PetscObject)bv->t,VECSEQ,&seq);CHKERRQ(ierr); if (!seq) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot create a BVMAT from a non-standard template vector"); } ierr = VecGetLocalSize(bv->t,&nloc);CHKERRQ(ierr); ierr = VecGetBlockSize(bv->t,&bs);CHKERRQ(ierr); ierr = MatCreateDense(PetscObjectComm((PetscObject)bv->t),nloc,PETSC_DECIDE,PETSC_DECIDE,bv->m,NULL,&ctx->A);CHKERRQ(ierr); ierr = MatAssemblyBegin(ctx->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(ctx->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)bv,(PetscObject)ctx->A);CHKERRQ(ierr); if (((PetscObject)bv)->name) { ierr = PetscSNPrintf(str,50,"%s_0",((PetscObject)bv)->name);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)ctx->A,str);CHKERRQ(ierr); } if (ctx->mpi) { ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,PETSC_DECIDE,NULL,&bv->cv[0]);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,PETSC_DECIDE,NULL,&bv->cv[1]);CHKERRQ(ierr); } else { ierr = VecCreateSeqWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,NULL,&bv->cv[0]);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,NULL,&bv->cv[1]);CHKERRQ(ierr); } bv->ops->mult = BVMult_Mat; bv->ops->multvec = BVMultVec_Mat; bv->ops->multinplace = BVMultInPlace_Mat; bv->ops->multinplacetrans = BVMultInPlaceTranspose_Mat; bv->ops->axpy = BVAXPY_Mat; bv->ops->dot = BVDot_Mat; bv->ops->dotvec = BVDotVec_Mat; bv->ops->scale = BVScale_Mat; bv->ops->norm = BVNorm_Mat; /*bv->ops->orthogonalize = BVOrthogonalize_Mat;*/ bv->ops->matmult = BVMatMult_Mat; bv->ops->copy = BVCopy_Mat; bv->ops->resize = BVResize_Mat; bv->ops->getcolumn = BVGetColumn_Mat; bv->ops->restorecolumn = BVRestoreColumn_Mat; bv->ops->getarray = BVGetArray_Mat; bv->ops->restorearray = BVRestoreArray_Mat; bv->ops->destroy = BVDestroy_Mat; if (!ctx->mpi) bv->ops->view = BVView_Mat; PetscFunctionReturn(0); }
static PetscErrorCode PCSetUp_TFS(PC pc) { PC_TFS *tfs = (PC_TFS*)pc->data; Mat A = pc->pmat; Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; PetscErrorCode ierr; PetscInt *localtoglobal,ncol,i; PetscBool ismpiaij; /* PetscBool issymmetric; Petsc Real tol = 0.0; */ PetscFunctionBegin; if (A->cmap->N != A->rmap->N) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_SIZ,"matrix must be square"); ierr = PetscObjectTypeCompare((PetscObject)pc->pmat,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr); if (!ismpiaij) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Currently only supports MPIAIJ matrices"); /* generate the local to global mapping */ ncol = a->A->cmap->n + a->B->cmap->n; ierr = PetscMalloc((ncol)*sizeof(PetscInt),&localtoglobal);CHKERRQ(ierr); for (i=0; i<a->A->cmap->n; i++) { localtoglobal[i] = A->cmap->rstart + i + 1; } for (i=0; i<a->B->cmap->n; i++) { localtoglobal[i+a->A->cmap->n] = a->garray[i] + 1; } /* generate the vectors needed for the local solves */ ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,a->A->rmap->n,PETSC_NULL,&tfs->b);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,a->A->cmap->n,PETSC_NULL,&tfs->xd);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,a->B->cmap->n,PETSC_NULL,&tfs->xo);CHKERRQ(ierr); tfs->nd = a->A->cmap->n; /* ierr = MatIsSymmetric(A,tol,&issymmetric); */ /* if (issymmetric) { */ ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); if (A->symmetric) { tfs->xxt = XXT_new(); ierr = XXT_factor(tfs->xxt,localtoglobal,A->rmap->n,ncol,(void*)PCTFSLocalMult_TFS,pc);CHKERRQ(ierr); pc->ops->apply = PCApply_TFS_XXT; } else { tfs->xyt = XYT_new(); ierr = XYT_factor(tfs->xyt,localtoglobal,A->rmap->n,ncol,(void*)PCTFSLocalMult_TFS,pc);CHKERRQ(ierr); pc->ops->apply = PCApply_TFS_XYT; } ierr = PetscFree(localtoglobal);CHKERRQ(ierr); PetscFunctionReturn(0); }
int Epetra_PETScAIJMatrix::Multiply(bool TransA, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { (void)TransA; int NumVectors = X.NumVectors(); if (NumVectors!=Y.NumVectors()) EPETRA_CHK_ERR(-1); // X and Y must have same number of vectors double ** xptrs; double ** yptrs; X.ExtractView(&xptrs); Y.ExtractView(&yptrs); if (RowMatrixImporter()!=0) { if (ImportVector_!=0) { if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;} } if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(RowMatrixColMap(),NumVectors); ImportVector_->Import(X, *RowMatrixImporter(), Insert); ImportVector_->ExtractView(&xptrs); } double *vals=0; int length; Vec petscX, petscY; int ierr; for (int i=0; i<NumVectors; i++) { # ifdef HAVE_MPI ierr=VecCreateMPIWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptrs[i],&petscX); CHKERRQ(ierr); ierr=VecCreateMPIWithArray(Comm_->Comm(),Y.MyLength(),Y.GlobalLength(),yptrs[i],&petscY); CHKERRQ(ierr); # else //FIXME untested ierr=VecCreateSeqWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptrs[i],&petscX); CHKERRQ(ierr); ierr=VecCreateSeqWithArray(Comm_->Comm(),Y.MyLength(),Y.GlobalLength(),yptrs[i],&petscY); CHKERRQ(ierr); # endif ierr = MatMult(Amat_,petscX,petscY);CHKERRQ(ierr); ierr = VecGetArray(petscY,&vals);CHKERRQ(ierr); ierr = VecGetLocalSize(petscY,&length);CHKERRQ(ierr); for (int j=0; j<length; j++) yptrs[i][j] = vals[j]; ierr = VecRestoreArray(petscY,&vals);CHKERRQ(ierr); } VecDestroy(petscX); VecDestroy(petscY); double flops = NumGlobalNonzeros(); flops *= 2.0; flops *= (double) NumVectors; UpdateFlops(flops); return(0); } //Multiply()
/*@ DMDAGlobalToNaturalAllCreate - Creates a scatter context that maps from the global vector the entire vector to each processor in natural numbering Collective on DMDA Input Parameter: . da - the distributed array context Output Parameter: . scatter - the scatter context Level: advanced .keywords: distributed array, global to local, begin, coarse problem .seealso: DMDAGlobalToNaturalEnd(), DMLocalToGlobalBegin(), DMDACreate2d(), DMGlobalToLocalBegin(), DMGlobalToLocalEnd(), DMDACreateNaturalVector() @*/ PetscErrorCode DMDAGlobalToNaturalAllCreate(DM da,VecScatter *scatter) { PetscErrorCode ierr; PetscInt N; IS from,to; Vec tmplocal,global; AO ao; DM_DA *dd = (DM_DA*)da->data; PetscFunctionBegin; PetscValidHeaderSpecific(da,DM_CLASSID,1); PetscValidPointer(scatter,2); ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); /* create the scatter context */ ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)da),dd->w,dd->Nlocal,PETSC_DETERMINE,0,&global);CHKERRQ(ierr); ierr = VecGetSize(global,&N);CHKERRQ(ierr); ierr = ISCreateStride(PetscObjectComm((PetscObject)da),N,0,1,&to);CHKERRQ(ierr); ierr = AOPetscToApplicationIS(ao,to);CHKERRQ(ierr); ierr = ISCreateStride(PetscObjectComm((PetscObject)da),N,0,1,&from);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,dd->w,N,0,&tmplocal);CHKERRQ(ierr); ierr = VecScatterCreate(global,from,tmplocal,to,scatter);CHKERRQ(ierr); ierr = VecDestroy(&tmplocal);CHKERRQ(ierr); ierr = VecDestroy(&global);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); PetscFunctionReturn(0); }
void PetscVector::copyFromArray( double v[] ) { int ierr; Vec sv; IS is; VecScatter ctx; ierr = VecCreateSeqWithArray(PETSC_COMM_SELF, n, v, &sv); assert(ierr == 0); ierr = ISCreateStride(PETSC_COMM_WORLD, n, 0, 1, &is); assert( ierr == 0); ierr = VecScatterCreate( sv, is, pv, is, &ctx); assert( ierr == 0); ierr = VecScatterBegin( sv, pv,INSERT_VALUES,SCATTER_FORWARD, ctx); assert( ierr == 0); ierr = VecScatterEnd( sv, pv,INSERT_VALUES,SCATTER_FORWARD, ctx); assert( ierr == 0); ierr = VecScatterDestroy(ctx); assert( ierr == 0); ierr = ISDestroy( is ); assert(ierr == 0); ierr = VecDestroy( sv ); assert(ierr == 0); }
void PetscSparseMtrx :: times(const FloatArray &x, FloatArray &answer) const { if ( this->giveNumberOfColumns() != x.giveSize() ) { OOFEM_ERROR("Dimension mismatch"); } #ifdef __PARALLEL_MODE if ( emodel->isParallel() ) { OOFEM_ERROR("PetscSparseMtrx :: times - Not implemented"); } #endif Vec globX, globY; VecCreateSeqWithArray(PETSC_COMM_SELF, 1, x.giveSize(), x.givePointer(), & globX); VecCreate(PETSC_COMM_SELF, & globY); VecSetType(globY, VECSEQ); VecSetSizes(globY, PETSC_DECIDE, this->nRows); MatMult(this->mtrx, globX, globY); double *ptr; VecGetArray(globY, & ptr); answer.resize(this->nRows); for ( int i = 0; i < this->nRows; i++ ) { answer(i) = ptr [ i ]; } VecRestoreArray(globY, & ptr); VecDestroy(&globX); VecDestroy(&globY); }
/*@ DMDANaturalAllToGlobalCreate - Creates a scatter context that maps from a copy of the entire vector on each processor to its local part in the global vector. Collective on DMDA Input Parameter: . da - the distributed array context Output Parameter: . scatter - the scatter context Level: advanced .keywords: distributed array, global to local, begin, coarse problem .seealso: DMDAGlobalToNaturalEnd(), DMLocalToGlobalBegin(), DMDACreate2d(), DMGlobalToLocalBegin(), DMGlobalToLocalEnd(), DMDACreateNaturalVector() @*/ PetscErrorCode DMDANaturalAllToGlobalCreate(DM da,VecScatter *scatter) { PetscErrorCode ierr; DM_DA *dd = (DM_DA*)da->data; PetscInt M,m = dd->Nlocal,start; IS from,to; Vec tmplocal,global; AO ao; PetscFunctionBegin; PetscValidHeaderSpecific(da,DM_CLASSID,1); PetscValidPointer(scatter,2); ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); /* create the scatter context */ ierr = MPI_Allreduce(&m,&M,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)da));CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)da),dd->w,m,PETSC_DETERMINE,0,&global);CHKERRQ(ierr); ierr = VecGetOwnershipRange(global,&start,NULL);CHKERRQ(ierr); ierr = ISCreateStride(PetscObjectComm((PetscObject)da),m,start,1,&from);CHKERRQ(ierr); ierr = AOPetscToApplicationIS(ao,from);CHKERRQ(ierr); ierr = ISCreateStride(PetscObjectComm((PetscObject)da),m,start,1,&to);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,dd->w,M,0,&tmplocal);CHKERRQ(ierr); ierr = VecScatterCreate(tmplocal,from,global,to,scatter);CHKERRQ(ierr); ierr = VecDestroy(&tmplocal);CHKERRQ(ierr); ierr = VecDestroy(&global);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); PetscFunctionReturn(0); }
Real SAMpatchPETSc::normL2(const Vector& x, char dofType) const { #ifdef HAVE_MPI if (adm.isParallel()) { if (dofIS.find(dofType) == dofIS.end()) setupIS(dofType); Vec lx; VecCreateSeqWithArray(PETSC_COMM_SELF, 1, x.size(), x.data(), &lx); Vec gx; VecCreate(*adm.getCommunicator(), &gx); VecSetSizes(gx, dofIS[dofType].nDofs, PETSC_DETERMINE); VecSetFromOptions(gx); PetscInt n; VecGetSize(gx, &n); if (!dofIS[dofType].scatterCreated) { VecScatterCreate(lx, dofIS[dofType].local, gx, dofIS[dofType].global, &dofIS[dofType].ctx); dofIS[dofType].scatterCreated = true; } VecScatterBegin(dofIS[dofType].ctx, lx, gx, INSERT_VALUES, SCATTER_FORWARD); VecScatterEnd(dofIS[dofType].ctx, lx, gx, INSERT_VALUES, SCATTER_FORWARD); PetscReal d; VecNorm(gx, NORM_2, &d); VecDestroy(&lx); VecDestroy(&gx); return d / sqrt(double(n)); } #endif return this->SAM::normL2(x, dofType); }
// Write the sub mesh into a HDF5 file. PetscErrorCode ProbeVolume::writeSubMeshHDF5(const std::string &filePath) { PetscErrorCode ierr; PetscFunctionBeginUser; // only the first process in the communicator write the sub-mesh into a file if (commRank == 0) { // because only one process is involved in writing the sub-mesh, // we need to create a temporary viewer PetscViewer viewer2; ierr = PetscViewerCreate(PETSC_COMM_SELF, &viewer2); CHKERRQ(ierr); ierr = PetscViewerSetType(viewer2, PETSCVIEWERHDF5); CHKERRQ(ierr); ierr = PetscViewerFileSetMode(viewer2, FILE_MODE_WRITE); CHKERRQ(ierr); ierr = PetscViewerFileSetName( viewer2, filePath.c_str()); CHKERRQ(ierr); ierr = PetscViewerHDF5PushGroup(viewer2, "mesh"); CHKERRQ(ierr); std::vector<std::string> dirs{"x", "y", "z"}; for (unsigned int d = 0; d < coord.size(); ++d) { Vec tmp; ierr = VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nPtsDir[d], &coord[d][0], &tmp); CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) tmp, dirs[d].c_str()); CHKERRQ(ierr); ierr = VecView(tmp, viewer2); CHKERRQ(ierr); ierr = VecDestroy(&tmp); CHKERRQ(ierr); } ierr = PetscViewerDestroy(&viewer2); CHKERRQ(ierr); } PetscFunctionReturn(0); } // ProbeVolume::writeSubMeshHDF5
PETSC_EXTERN PetscErrorCode BVCreate_Contiguous(BV bv) { PetscErrorCode ierr; BV_CONTIGUOUS *ctx; PetscInt j,nloc,bs; PetscBool seq; char str[50]; PetscFunctionBegin; ierr = PetscNewLog(bv,&ctx);CHKERRQ(ierr); bv->data = (void*)ctx; ierr = PetscObjectTypeCompare((PetscObject)bv->t,VECMPI,&ctx->mpi);CHKERRQ(ierr); if (!ctx->mpi) { ierr = PetscObjectTypeCompare((PetscObject)bv->t,VECSEQ,&seq);CHKERRQ(ierr); if (!seq) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot create a contiguous BV from a non-standard template vector"); } ierr = VecGetLocalSize(bv->t,&nloc);CHKERRQ(ierr); ierr = VecGetBlockSize(bv->t,&bs);CHKERRQ(ierr); ierr = PetscMalloc1(bv->m*nloc,&ctx->array);CHKERRQ(ierr); ierr = PetscMemzero(ctx->array,bv->m*nloc*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMalloc1(bv->m,&ctx->V);CHKERRQ(ierr); for (j=0;j<bv->m;j++) { if (ctx->mpi) { ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,PETSC_DECIDE,ctx->array+j*nloc,ctx->V+j);CHKERRQ(ierr); } else { ierr = VecCreateSeqWithArray(PetscObjectComm((PetscObject)bv->t),bs,nloc,ctx->array+j*nloc,ctx->V+j);CHKERRQ(ierr); } } ierr = PetscLogObjectParents(bv,bv->m,ctx->V);CHKERRQ(ierr); if (((PetscObject)bv)->name) { for (j=0;j<bv->m;j++) { ierr = PetscSNPrintf(str,50,"%s_%D",((PetscObject)bv)->name,j);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)ctx->V[j],str);CHKERRQ(ierr); } } bv->ops->mult = BVMult_Contiguous; bv->ops->multvec = BVMultVec_Contiguous; bv->ops->multinplace = BVMultInPlace_Contiguous; bv->ops->multinplacetrans = BVMultInPlaceTranspose_Contiguous; bv->ops->axpy = BVAXPY_Contiguous; bv->ops->dot = BVDot_Contiguous; bv->ops->dotvec = BVDotVec_Contiguous; bv->ops->scale = BVScale_Contiguous; bv->ops->norm = BVNorm_Contiguous; /*bv->ops->orthogonalize = BVOrthogonalize_Contiguous;*/ bv->ops->matmult = BVMatMult_Contiguous; bv->ops->copy = BVCopy_Contiguous; bv->ops->resize = BVResize_Contiguous; bv->ops->getcolumn = BVGetColumn_Contiguous; bv->ops->getarray = BVGetArray_Contiguous; bv->ops->destroy = BVDestroy_Contiguous; PetscFunctionReturn(0); }
void PetscSparseMtrx :: times(const FloatMatrix &B, FloatMatrix &answer) const { if ( this->giveNumberOfColumns() != B.giveNumberOfRows() ) { OOFEM_ERROR("Dimension mismatch"); } #ifdef __PARALLEL_MODE if ( emodel->isParallel() ) { OOFEM_ERROR("PetscSparseMtrx :: times - Not implemented"); } #endif // I'm opting to work with a set of vectors, as i think it might be faster and more robust. / Mikael int nr = this->giveNumberOfRows(); int nc = B.giveNumberOfColumns(); answer.resize(nr, nc); double *aptr = answer.givePointer(); #if 0 // Approach using several vectors. Not sure if it is optimal, but it includes petsc calls which i suspect are inefficient. / Mikael // UNTESTED! Vec globX, globY; VecCreate(PETSC_COMM_SELF, &globY); VecSetType(globY, VECSEQ); VecSetSizes(globY, PETSC_DECIDE, nr); int nrB = B.giveNumberOfRows(); for (int k = 0; k < nc; k++) { double colVals[nrB]; for (int i = 0; i < nrB; i++) colVals[i] = B(i,k); // B.copyColumn(Bk,k); VecCreateSeqWithArray(PETSC_COMM_SELF, nrB, colVals, &globX); MatMult(this->mtrx, globX, globY ); double *ptr; VecGetArray(globY, &ptr); for (int i = 0; i < nr; i++) *aptr++ = ptr[i]; // answer.setColumn(Ak,k); VecRestoreArray(globY, &ptr); VecDestroy(globX); } VecDestroy(globY); #endif Mat globB, globC; MatCreateSeqDense(PETSC_COMM_SELF, B.giveNumberOfRows(), B.giveNumberOfColumns(), B.givePointer(), & globB); MatMatMult(this->mtrx, globB, MAT_INITIAL_MATRIX, PETSC_DEFAULT, & globC); const double *vals; for ( int r = 0; r < nr; r++ ) { MatGetRow(globC, r, NULL, NULL, & vals); for ( int i = 0, i2 = r; i < nc; i++, i2 += nr ) { aptr [ i2 ] = vals [ i ]; } MatRestoreRow(globC, r, NULL, NULL, & vals); } MatDestroy(&globB); MatDestroy(&globC); }
/*@ VecMPISetGhost - Sets the ghost points for an MPI ghost vector Collective on Vec Input Parameters: + vv - the MPI vector . nghost - number of local ghost points - ghosts - global indices of ghost points, these do not need to be in increasing order (sorted) Notes: Use VecGhostGetLocalForm() to access the local, ghosted representation of the vector. This also automatically sets the ISLocalToGlobalMapping() for this vector. You must call this AFTER you have set the type of the vector (with VecSetType()) and the size (with VecSetSizes()). Level: advanced Concepts: vectors^ghosted .seealso: VecCreateSeq(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateMPI(), VecGhostGetLocalForm(), VecGhostRestoreLocalForm(), VecGhostUpdateBegin(), VecCreateGhostWithArray(), VecCreateMPIWithArray(), VecGhostUpdateEnd(), VecCreateGhostBlock(), VecCreateGhostBlockWithArray() @*/ PetscErrorCode VecMPISetGhost(Vec vv,PetscInt nghost,const PetscInt ghosts[]) { PetscErrorCode ierr; PetscBool flg; PetscFunctionBegin; ierr = PetscObjectTypeCompare((PetscObject)vv,VECMPI,&flg);CHKERRQ(ierr); /* if already fully existant VECMPI then basically destroy it and rebuild with ghosting */ if (flg) { PetscInt n,N; Vec_MPI *w; PetscScalar *larray; IS from,to; ISLocalToGlobalMapping ltog; PetscInt rstart,i,*indices; MPI_Comm comm = ((PetscObject)vv)->comm; n = vv->map->n; N = vv->map->N; ierr = (*vv->ops->destroy)(vv);CHKERRQ(ierr); ierr = VecSetSizes(vv,n,N);CHKERRQ(ierr); ierr = VecCreate_MPI_Private(vv,PETSC_TRUE,nghost,PETSC_NULL);CHKERRQ(ierr); w = (Vec_MPI *)(vv)->data; /* Create local representation */ ierr = VecGetArray(vv,&larray);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,n+nghost,larray,&w->localrep);CHKERRQ(ierr); ierr = PetscLogObjectParent(vv,w->localrep);CHKERRQ(ierr); ierr = VecRestoreArray(vv,&larray);CHKERRQ(ierr); /* Create scatter context for scattering (updating) ghost values */ ierr = ISCreateGeneral(comm,nghost,ghosts,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,nghost,n,1,&to);CHKERRQ(ierr); ierr = VecScatterCreate(vv,from,w->localrep,to,&w->localupdate);CHKERRQ(ierr); ierr = PetscLogObjectParent(vv,w->localupdate);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); /* set local to global mapping for ghosted vector */ ierr = PetscMalloc((n+nghost)*sizeof(PetscInt),&indices);CHKERRQ(ierr); ierr = VecGetOwnershipRange(vv,&rstart,PETSC_NULL);CHKERRQ(ierr); for (i=0; i<n; i++) { indices[i] = rstart + i; } for (i=0; i<nghost; i++) { indices[n+i] = ghosts[i]; } ierr = ISLocalToGlobalMappingCreate(comm,n+nghost,indices,PETSC_OWN_POINTER,<og);CHKERRQ(ierr); ierr = VecSetLocalToGlobalMapping(vv,ltog);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); } else if (vv->ops->create == VecCreate_MPI) SETERRQ(((PetscObject)vv)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must set local or global size before setting ghosting"); else if (!((PetscObject)vv)->type_name) SETERRQ(((PetscObject)vv)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must set type to VECMPI before ghosting"); PetscFunctionReturn(0); }
static Vec op_create_vec ( const op_dat vec ) { assert( vec ); Vec p_vec; // Create a PETSc vector and pass it the user-allocated storage VecCreateSeqWithArray(MPI_COMM_SELF,vec->dim * vec->set->size,(PetscScalar*)vec->data,&p_vec); VecAssemblyBegin(p_vec); VecAssemblyEnd(p_vec); return p_vec; }
/*@C VecCreateGhostBlockWithArray - Creates a parallel vector with ghost padding on each processor; the caller allocates the array space. Indices in the ghost region are based on blocks. Collective on MPI_Comm Input Parameters: + comm - the MPI communicator to use . bs - block size . n - local vector length . N - global vector length (or PETSC_DECIDE to have calculated if n is given) . nghost - number of local ghost blocks . ghosts - global indices of ghost blocks (or PETSC_NULL if not needed), counts are by block not by index, these do not need to be in increasing order (sorted) - array - the space to store the vector values (as long as n + nghost*bs) Output Parameter: . vv - the global vector representation (without ghost points as part of vector) Notes: Use VecGhostGetLocalForm() to access the local, ghosted representation of the vector. n is the local vector size (total local size not the number of blocks) while nghost is the number of blocks in the ghost portion, i.e. the number of elements in the ghost portion is bs*nghost Level: advanced Concepts: vectors^creating ghosted Concepts: vectors^creating with array .seealso: VecCreate(), VecGhostGetLocalForm(), VecGhostRestoreLocalForm(), VecCreateGhost(), VecCreateSeqWithArray(), VecCreateMPIWithArray(), VecCreateGhostWithArray(), VecCreateGhostBlock() @*/ PetscErrorCode VecCreateGhostBlockWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,PetscInt nghost,const PetscInt ghosts[],const PetscScalar array[],Vec *vv) { PetscErrorCode ierr; Vec_MPI *w; PetscScalar *larray; IS from,to; ISLocalToGlobalMapping ltog; PetscInt rstart,i,nb,*indices; PetscFunctionBegin; *vv = 0; if (n == PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Must set local size"); if (nghost == PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Must set local ghost size"); if (nghost < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ghost length must be >= 0"); if (n % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Local size must be a multiple of block size"); ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); /* Create global representation */ ierr = VecCreate(comm,vv);CHKERRQ(ierr); ierr = VecSetSizes(*vv,n,N);CHKERRQ(ierr); ierr = VecSetBlockSize(*vv,bs);CHKERRQ(ierr); ierr = VecCreate_MPI_Private(*vv,PETSC_TRUE,nghost*bs,array);CHKERRQ(ierr); w = (Vec_MPI *)(*vv)->data; /* Create local representation */ ierr = VecGetArray(*vv,&larray);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs,n+bs*nghost,larray,&w->localrep);CHKERRQ(ierr); ierr = PetscLogObjectParent(*vv,w->localrep);CHKERRQ(ierr); ierr = VecRestoreArray(*vv,&larray);CHKERRQ(ierr); /* Create scatter context for scattering (updating) ghost values */ ierr = ISCreateBlock(comm,bs,nghost,ghosts,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,bs*nghost,n,1,&to);CHKERRQ(ierr); ierr = VecScatterCreate(*vv,from,w->localrep,to,&w->localupdate);CHKERRQ(ierr); ierr = PetscLogObjectParent(*vv,w->localupdate);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); /* set local to global mapping for ghosted vector */ nb = n/bs; ierr = PetscMalloc((nb+nghost)*sizeof(PetscInt),&indices);CHKERRQ(ierr); ierr = VecGetOwnershipRange(*vv,&rstart,PETSC_NULL);CHKERRQ(ierr); for (i=0; i<nb; i++) { indices[i] = rstart + i*bs; } for (i=0; i<nghost; i++) { indices[nb+i] = ghosts[i]; } ierr = ISLocalToGlobalMappingCreate(comm,nb+nghost,indices,PETSC_OWN_POINTER,<og);CHKERRQ(ierr); ierr = VecSetLocalToGlobalMappingBlock(*vv,ltog);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatMPIAIJCRL_create_aijcrl(Mat A) { Mat_MPIAIJ *a = (Mat_MPIAIJ*)(A)->data; Mat_SeqAIJ *Aij = (Mat_SeqAIJ*)(a->A->data), *Bij = (Mat_SeqAIJ*)(a->B->data); Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; PetscInt m = A->rmap->n; /* Number of rows in the matrix. */ PetscInt nd = a->A->cmap->n; /* number of columns in diagonal portion */ PetscInt *aj = Aij->j,*bj = Bij->j; /* From the CSR representation; points to the beginning of each row. */ PetscInt i, j,rmax = 0,*icols, *ailen = Aij->ilen, *bilen = Bij->ilen; PetscScalar *aa = Aij->a,*ba = Bij->a,*acols,*array; PetscErrorCode ierr; PetscFunctionBegin; /* determine the row with the most columns */ for (i=0; i<m; i++) { rmax = PetscMax(rmax,ailen[i]+bilen[i]); } aijcrl->nz = Aij->nz+Bij->nz; aijcrl->m = A->rmap->n; aijcrl->rmax = rmax; ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr); ierr = PetscMalloc2(rmax*m,PetscScalar,&aijcrl->acols,rmax*m,PetscInt,&aijcrl->icols);CHKERRQ(ierr); acols = aijcrl->acols; icols = aijcrl->icols; for (i=0; i<m; i++) { for (j=0; j<ailen[i]; j++) { acols[j*m+i] = *aa++; icols[j*m+i] = *aj++; } for (; j<ailen[i]+bilen[i]; j++) { acols[j*m+i] = *ba++; icols[j*m+i] = nd + *bj++; } for (; j<rmax; j++) { /* empty column entries */ acols[j*m+i] = 0.0; icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0; /* handle case where row is EMPTY */ } } ierr = PetscInfo1(A,"Percentage of 0's introduced for vectorized multiply %g\n",1.0-((double)(aijcrl->nz))/((double)(rmax*m)));CHKERRQ(ierr); ierr = PetscFree(aijcrl->array);CHKERRQ(ierr); ierr = PetscMalloc((a->B->cmap->n+nd)*sizeof(PetscScalar),&array);CHKERRQ(ierr); /* xwork array is actually B->n+nd long, but we define xwork this length so can copy into it */ ierr = VecDestroy(&aijcrl->xwork);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),1,nd,PETSC_DECIDE,array,&aijcrl->xwork);CHKERRQ(ierr); ierr = VecDestroy(&aijcrl->fwork);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,a->B->cmap->n,array+nd,&aijcrl->fwork);CHKERRQ(ierr); aijcrl->array = array; aijcrl->xscat = a->Mvctx; PetscFunctionReturn(0); }
virtual void SetUp() { // setup FilePath directories FilePath::set_io_dirs(".",UNIT_TESTS_SRC_DIR,"","."); Profiler::initialize(); PetscInitialize(0,PETSC_NULL,PETSC_NULL,PETSC_NULL); FilePath mesh_file( "fields/one_element_2d.msh", FilePath::input_file); mesh= new Mesh; ifstream in(string( mesh_file ).c_str()); mesh->read_gmsh_from_stream(in); dh = new DOFHandlerMultiDim(*mesh); VecCreateSeqWithArray(PETSC_COMM_SELF, 1, 3, dof_values, &v); }
//============================================================================= int Epetra_PETScAIJMatrix::RightScale(const Epetra_Vector& X) { // // This function scales the jth row of A by x[j]. // double *xptr; X.ExtractView(&xptr); Vec petscX; # ifdef HAVE_MPI int ierr=VecCreateMPIWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptr,&petscX); CHKERRQ(ierr); # else //FIXME untested int ierr=VecCreateSeqWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptr,&petscX); CHKERRQ(ierr); # endif MatDiagonalScale(Amat_, PETSC_NULL, petscX); ierr=VecDestroy(petscX); CHKERRQ(ierr); return(0); } //RightScale()
int SkewSymmetricScatter(Vec *x,PetscScalar *cacheScalar ,PetscInt *cacheInt, PetscInt n2,PetscInt Istart,PetscInt localsizex , VecScatter *ctx){ PetscInt i,k; IS isbc; Vec bcvec; VecCreateSeqWithArray(MPI_COMM_SELF,localsizex*4,cacheScalar,&bcvec); k = 0; for(i=0;i<localsizex;i++){*(cacheInt+k)= n2*(n2*2-Istart)-1-i*n2;k++;} for(i=0;i<localsizex;i++){*(cacheInt+k)= n2*(n2*2-Istart)-2-i*n2;k++;} for(i=0;i<localsizex;i++){*(cacheInt+k)= n2*(n2*2-Istart)-n2+1-i*n2;k++;} for(i=0;i<localsizex;i++){*(cacheInt+k)= n2*(n2*2-Istart)-n2-i*n2;k++;} ISCreateGeneralWithArray(MPI_COMM_WORLD,4*localsizex,cacheInt,&isbc); VecScatterCreate(*x,isbc,bcvec,PETSC_NULL,ctx); ISDestroy(isbc); return 0; }
///@todo Parallel mode of this. NM_Status PetscSolver :: solve(SparseMtrx *A, FloatMatrix &B, FloatMatrix &X) { if ( !A ) { _error("solve: Unknown Lhs"); } if ( A->giveType() != SMT_PetscMtrx ) { _error("solve: PetscSparseMtrx Expected"); } PetscSparseMtrx *Lhs = ( PetscSparseMtrx * ) A; Vec globRhsVec; Vec globSolVec; bool newLhs = true; int rows = B.giveNumberOfRows(); int cols = B.giveNumberOfColumns(); NM_Status s; X.resize(rows, cols); double *Xptr = X.givePointer(); for (int i = 0; i < cols; ++i) { VecCreateSeqWithArray(PETSC_COMM_SELF, rows, B.givePointer() + rows*i, & globRhsVec); VecDuplicate(globRhsVec, & globSolVec); s = this->petsc_solve(Lhs, globRhsVec, globSolVec, newLhs); if ( !(s & NM_Success) ) { OOFEM_WARNING2("PetscSolver :: solve - No success at solving column %d",i+1); return s; } newLhs = false; double *ptr; VecGetArray(globSolVec, & ptr); for ( int j = 0; j < rows; ++j ) { Xptr[ j + rows*i ] = ptr [ j ]; } VecRestoreArray(globSolVec, & ptr); } VecDestroy(globSolVec); VecDestroy(globRhsVec); return s; }
int main(int argc,char **argv) { PetscErrorCode ierr; PetscMPIInt size; PetscInt n = 10,i; PetscScalar array[10]; Vec x; ierr = PetscInitialize(&argc,&argv,(char*)0,help);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size != 1) SETERRQ(PETSC_COMM_SELF,1,"This is a uniprocessor example only!"); /* create vector */ for (i=0; i<n; i++) array[i] = i; n = n-1; ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,n,array+1,&x);CHKERRQ(ierr); ierr = VecView(x,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
static PetscErrorCode VecDuplicate_MPI(Vec win,Vec *v) { PetscErrorCode ierr; Vec_MPI *vw,*w = (Vec_MPI*)win->data; PetscScalar *array; PetscFunctionBegin; ierr = VecCreate(PetscObjectComm((PetscObject)win),v);CHKERRQ(ierr); ierr = PetscLayoutReference(win->map,&(*v)->map);CHKERRQ(ierr); ierr = VecCreate_MPI_Private(*v,PETSC_TRUE,w->nghost,0);CHKERRQ(ierr); vw = (Vec_MPI*)(*v)->data; ierr = PetscMemcpy((*v)->ops,win->ops,sizeof(struct _VecOps));CHKERRQ(ierr); /* save local representation of the parallel vector (and scatter) if it exists */ if (w->localrep) { ierr = VecGetArray(*v,&array);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,win->map->bs,win->map->n+w->nghost,array,&vw->localrep);CHKERRQ(ierr); ierr = PetscMemcpy(vw->localrep->ops,w->localrep->ops,sizeof(struct _VecOps));CHKERRQ(ierr); ierr = VecRestoreArray(*v,&array);CHKERRQ(ierr); ierr = PetscLogObjectParent(*v,vw->localrep);CHKERRQ(ierr); vw->localupdate = w->localupdate; if (vw->localupdate) { ierr = PetscObjectReference((PetscObject)vw->localupdate);CHKERRQ(ierr); } } /* New vector should inherit stashing property of parent */ (*v)->stash.donotstash = win->stash.donotstash; (*v)->stash.ignorenegidx = win->stash.ignorenegidx; ierr = PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*v))->olist);CHKERRQ(ierr); ierr = PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*v))->qlist);CHKERRQ(ierr); (*v)->map->bs = win->map->bs; (*v)->bstash.bs = win->bstash.bs; PetscFunctionReturn(0); }
PetscErrorCode BVResize_Contiguous(BV bv,PetscInt m,PetscBool copy) { PetscErrorCode ierr; BV_CONTIGUOUS *ctx = (BV_CONTIGUOUS*)bv->data; PetscInt j,bs; PetscScalar *newarray; Vec *newV; char str[50]; PetscFunctionBegin; ierr = VecGetBlockSize(bv->t,&bs);CHKERRQ(ierr); ierr = PetscMalloc1(m*bv->n,&newarray);CHKERRQ(ierr); ierr = PetscMemzero(newarray,m*bv->n*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMalloc1(m,&newV);CHKERRQ(ierr); for (j=0;j<m;j++) { if (ctx->mpi) { ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)bv->t),bs,bv->n,PETSC_DECIDE,newarray+j*bv->n,newV+j);CHKERRQ(ierr); } else { ierr = VecCreateSeqWithArray(PetscObjectComm((PetscObject)bv->t),bs,bv->n,newarray+j*bv->n,newV+j);CHKERRQ(ierr); } } ierr = PetscLogObjectParents(bv,m,newV);CHKERRQ(ierr); if (((PetscObject)bv)->name) { for (j=0;j<m;j++) { ierr = PetscSNPrintf(str,50,"%s_%D",((PetscObject)bv)->name,j);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)newV[j],str);CHKERRQ(ierr); } } if (copy) { ierr = PetscMemcpy(newarray,ctx->array,PetscMin(m,bv->m)*bv->n*sizeof(PetscScalar));CHKERRQ(ierr); } ierr = VecDestroyVecs(bv->m,&ctx->V);CHKERRQ(ierr); ctx->V = newV; ierr = PetscFree(ctx->array);CHKERRQ(ierr); ctx->array = newarray; PetscFunctionReturn(0); }
PetscErrorCode DMSetUp_DA_1D(DM da) { DM_DA *dd = (DM_DA*)da->data; const PetscInt M = dd->M; const PetscInt dof = dd->w; const PetscInt s = dd->s; const PetscInt sDist = s; /* stencil distance in points */ const PetscInt *lx = dd->lx; DMBoundaryType bx = dd->bx; MPI_Comm comm; Vec local, global; VecScatter gtol; IS to, from; PetscBool flg1 = PETSC_FALSE, flg2 = PETSC_FALSE; PetscMPIInt rank, size; PetscInt i,*idx,nn,left,xs,xe,x,Xs,Xe,start,m,IXs,IXe; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject) da, &comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); dd->p = 1; dd->n = 1; dd->m = size; m = dd->m; if (s > 0) { /* if not communicating data then should be ok to have nothing on some processes */ if (M < m) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"More processes than data points! %D %D",m,M); if ((M-1) < s && size > 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Array is too small for stencil! %D %D",M-1,s); } /* Determine locally owned region xs is the first local node number, x is the number of local nodes */ if (!lx) { ierr = PetscMalloc1(m, &dd->lx);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,"-da_partition_blockcomm",&flg1,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,"-da_partition_nodes_at_end",&flg2,NULL);CHKERRQ(ierr); if (flg1) { /* Block Comm type Distribution */ xs = rank*M/m; x = (rank + 1)*M/m - xs; } else if (flg2) { /* The odd nodes are evenly distributed across last nodes */ x = (M + rank)/m; if (M/m == x) xs = rank*x; else xs = rank*(x-1) + (M+rank)%(x*m); } else { /* The odd nodes are evenly distributed across the first k nodes */ /* Regular PETSc Distribution */ x = M/m + ((M % m) > rank); if (rank >= (M % m)) xs = (rank * (PetscInt)(M/m) + M % m); else xs = rank * (PetscInt)(M/m) + rank; } ierr = MPI_Allgather(&xs,1,MPIU_INT,dd->lx,1,MPIU_INT,comm);CHKERRQ(ierr); for (i=0; i<m-1; i++) dd->lx[i] = dd->lx[i+1] - dd->lx[i]; dd->lx[m-1] = M - dd->lx[m-1]; } else { x = lx[rank]; xs = 0; for (i=0; i<rank; i++) xs += lx[i]; /* verify that data user provided is consistent */ left = xs; for (i=rank; i<size; i++) left += lx[i]; if (left != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of lx across processors not equal to M %D %D",left,M); } /* check if the scatter requires more than one process neighbor or wraps around the domain more than once */ if ((x < s) & ((M > 1) | (bx == DM_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local x-width of domain x %D is smaller than stencil width s %D",x,s); xe = xs + x; /* determine ghost region (Xs) and region scattered into (IXs) */ if (xs-sDist > 0) { Xs = xs - sDist; IXs = xs - sDist; } else { if (bx) Xs = xs - sDist; else Xs = 0; IXs = 0; } if (xe+sDist <= M) { Xe = xe + sDist; IXe = xe + sDist; } else { if (bx) Xe = xe + sDist; else Xe = M; IXe = M; } if (bx == DM_BOUNDARY_PERIODIC || bx == DM_BOUNDARY_MIRROR) { Xs = xs - sDist; Xe = xe + sDist; IXs = xs - sDist; IXe = xe + sDist; } /* allocate the base parallel and sequential vectors */ dd->Nlocal = dof*x; ierr = VecCreateMPIWithArray(comm,dof,dd->Nlocal,PETSC_DECIDE,NULL,&global);CHKERRQ(ierr); dd->nlocal = dof*(Xe-Xs); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,dof,dd->nlocal,NULL,&local);CHKERRQ(ierr); ierr = VecGetOwnershipRange(global,&start,NULL);CHKERRQ(ierr); /* Create Global to Local Vector Scatter Context */ /* global to local must retrieve ghost points */ ierr = ISCreateStride(comm,dof*(IXe-IXs),dof*(IXs-Xs),1,&to);CHKERRQ(ierr); ierr = PetscMalloc1(x+2*sDist,&idx);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)da,(x+2*(sDist))*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i<IXs-Xs; i++) idx[i] = -1; /* prepend with -1s if needed for ghosted case*/ nn = IXs-Xs; if (bx == DM_BOUNDARY_PERIODIC) { /* Handle all cases with periodic first */ for (i=0; i<sDist; i++) { /* Left ghost points */ if ((xs-sDist+i)>=0) idx[nn++] = xs-sDist+i; else idx[nn++] = M+(xs-sDist+i); } for (i=0; i<x; i++) idx [nn++] = xs + i; /* Non-ghost points */ for (i=0; i<sDist; i++) { /* Right ghost points */ if ((xe+i)<M) idx [nn++] = xe+i; else idx [nn++] = (xe+i) - M; } } else if (bx == DM_BOUNDARY_MIRROR) { /* Handle all cases with periodic first */ for (i=0; i<(sDist); i++) { /* Left ghost points */ if ((xs-sDist+i)>=0) idx[nn++] = xs-sDist+i; else idx[nn++] = sDist - i; } for (i=0; i<x; i++) idx [nn++] = xs + i; /* Non-ghost points */ for (i=0; i<(sDist); i++) { /* Right ghost points */ if ((xe+i)<M) idx[nn++] = xe+i; else idx[nn++] = M - (i + 1); } } else { /* Now do all cases with no periodicity */ if (0 <= xs-sDist) { for (i=0; i<sDist; i++) idx[nn++] = xs - sDist + i; } else { for (i=0; i<xs; i++) idx[nn++] = i; } for (i=0; i<x; i++) idx [nn++] = xs + i; if ((xe+sDist)<=M) { for (i=0; i<sDist; i++) idx[nn++]=xe+i; } else { for (i=xe; i<M; i++) idx[nn++]=i; } } ierr = ISCreateBlock(comm,dof,nn-IXs+Xs,&idx[IXs-Xs],PETSC_USE_POINTER,&from);CHKERRQ(ierr); ierr = VecScatterCreate(global,from,local,to,>ol);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)gtol);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = VecDestroy(&local);CHKERRQ(ierr); ierr = VecDestroy(&global);CHKERRQ(ierr); dd->xs = dof*xs; dd->xe = dof*xe; dd->ys = 0; dd->ye = 1; dd->zs = 0; dd->ze = 1; dd->Xs = dof*Xs; dd->Xe = dof*Xe; dd->Ys = 0; dd->Ye = 1; dd->Zs = 0; dd->Ze = 1; dd->gtol = gtol; dd->base = dof*xs; da->ops->view = DMView_DA_1d; /* Set the local to global ordering in the global vector, this allows use of VecSetValuesLocal(). */ for (i=0; i<Xe-IXe; i++) idx[nn++] = -1; /* pad with -1s if needed for ghosted case*/ ierr = ISLocalToGlobalMappingCreate(comm,dof,nn,idx,PETSC_OWN_POINTER,&da->ltogmap);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)da->ltogmap);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode DMSetUp_DA_2D(DM da) { DM_DA *dd = (DM_DA*)da->data; const PetscInt M = dd->M; const PetscInt N = dd->N; PetscInt m = dd->m; PetscInt n = dd->n; const PetscInt dof = dd->w; const PetscInt s = dd->s; DMDABoundaryType bx = dd->bx; DMDABoundaryType by = dd->by; DMDAStencilType stencil_type = dd->stencil_type; PetscInt *lx = dd->lx; PetscInt *ly = dd->ly; MPI_Comm comm; PetscMPIInt rank,size; PetscInt xs,xe,ys,ye,x,y,Xs,Xe,Ys,Ye,start,end,IXs,IXe,IYs,IYe; PetscInt up,down,left,right,i,n0,n1,n2,n3,n5,n6,n7,n8,*idx,nn,*idx_cpy; const PetscInt *idx_full; PetscInt xbase,*bases,*ldims,j,x_t,y_t,s_t,base,count; PetscInt s_x,s_y; /* s proportionalized to w */ PetscInt sn0 = 0,sn2 = 0,sn6 = 0,sn8 = 0; Vec local,global; VecScatter ltog,gtol; IS to,from,ltogis; PetscErrorCode ierr; PetscFunctionBegin; if (stencil_type == DMDA_STENCIL_BOX && (bx == DMDA_BOUNDARY_MIRROR || by == DMDA_BOUNDARY_MIRROR)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Mirror boundary and box stencil"); ierr = PetscObjectGetComm((PetscObject)da,&comm); CHKERRQ(ierr); #if !defined(PETSC_USE_64BIT_INDICES) if (((Petsc64bitInt) M)*((Petsc64bitInt) N)*((Petsc64bitInt) dof) > (Petsc64bitInt) PETSC_MPI_INT_MAX) SETERRQ3(comm,PETSC_ERR_INT_OVERFLOW,"Mesh of %D by %D by %D (dof) is too large for 32 bit indices",M,N,dof); #endif if (dof < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Must have 1 or more degrees of freedom per node: %D",dof); if (s < 0) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Stencil width cannot be negative: %D",s); ierr = MPI_Comm_size(comm,&size); CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank); CHKERRQ(ierr); if (m != PETSC_DECIDE) { if (m < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in X direction: %D",m); else if (m > size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in X direction: %D %d",m,size); } if (n != PETSC_DECIDE) { if (n < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in Y direction: %D",n); else if (n > size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in Y direction: %D %d",n,size); } if (m == PETSC_DECIDE || n == PETSC_DECIDE) { if (n != PETSC_DECIDE) { m = size/n; } else if (m != PETSC_DECIDE) { n = size/m; } else { /* try for squarish distribution */ m = (PetscInt)(0.5 + PetscSqrtReal(((PetscReal)M)*((PetscReal)size)/((PetscReal)N))); if (!m) m = 1; while (m > 0) { n = size/m; if (m*n == size) break; m--; } if (M > N && m < n) { PetscInt _m = m; m = n; n = _m; } } if (m*n != size) SETERRQ(comm,PETSC_ERR_PLIB,"Unable to create partition, check the size of the communicator and input m and n "); } else if (m*n != size) SETERRQ(comm,PETSC_ERR_ARG_OUTOFRANGE,"Given Bad partition"); if (M < m) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in x direction is too fine! %D %D",M,m); if (N < n) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in y direction is too fine! %D %D",N,n); /* Determine locally owned region xs is the first local node number, x is the number of local nodes */ if (!lx) { ierr = PetscMalloc(m*sizeof(PetscInt), &dd->lx); CHKERRQ(ierr); lx = dd->lx; for (i=0; i<m; i++) { lx[i] = M/m + ((M % m) > i); } } x = lx[rank % m]; xs = 0; for (i=0; i<(rank % m); i++) { xs += lx[i]; } #if defined(PETSC_USE_DEBUG) left = xs; for (i=(rank % m); i<m; i++) { left += lx[i]; } if (left != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of lx across processors not equal to M: %D %D",left,M); #endif /* Determine locally owned region ys is the first local node number, y is the number of local nodes */ if (!ly) { ierr = PetscMalloc(n*sizeof(PetscInt), &dd->ly); CHKERRQ(ierr); ly = dd->ly; for (i=0; i<n; i++) { ly[i] = N/n + ((N % n) > i); } } y = ly[rank/m]; ys = 0; for (i=0; i<(rank/m); i++) { ys += ly[i]; } #if defined(PETSC_USE_DEBUG) left = ys; for (i=(rank/m); i<n; i++) { left += ly[i]; } if (left != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of ly across processors not equal to N: %D %D",left,N); #endif /* check if the scatter requires more than one process neighbor or wraps around the domain more than once */ if ((x < s) && ((m > 1) || (bx == DMDA_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local x-width of domain x %D is smaller than stencil width s %D",x,s); if ((y < s) && ((n > 1) || (by == DMDA_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local y-width of domain y %D is smaller than stencil width s %D",y,s); xe = xs + x; ye = ys + y; /* determine ghost region (Xs) and region scattered into (IXs) */ if (xs-s > 0) { Xs = xs - s; IXs = xs - s; } else { if (bx) { Xs = xs - s; } else { Xs = 0; } IXs = 0; } if (xe+s <= M) { Xe = xe + s; IXe = xe + s; } else { if (bx) { Xs = xs - s; Xe = xe + s; } else { Xe = M; } IXe = M; } if (bx == DMDA_BOUNDARY_PERIODIC || bx == DMDA_BOUNDARY_MIRROR) { IXs = xs - s; IXe = xe + s; Xs = xs - s; Xe = xe + s; } if (ys-s > 0) { Ys = ys - s; IYs = ys - s; } else { if (by) { Ys = ys - s; } else { Ys = 0; } IYs = 0; } if (ye+s <= N) { Ye = ye + s; IYe = ye + s; } else { if (by) { Ye = ye + s; } else { Ye = N; } IYe = N; } if (by == DMDA_BOUNDARY_PERIODIC || by == DMDA_BOUNDARY_MIRROR) { IYs = ys - s; IYe = ye + s; Ys = ys - s; Ye = ye + s; } /* stencil length in each direction */ s_x = s; s_y = s; /* determine starting point of each processor */ nn = x*y; ierr = PetscMalloc2(size+1,PetscInt,&bases,size,PetscInt,&ldims); CHKERRQ(ierr); ierr = MPI_Allgather(&nn,1,MPIU_INT,ldims,1,MPIU_INT,comm); CHKERRQ(ierr); bases[0] = 0; for (i=1; i<=size; i++) { bases[i] = ldims[i-1]; } for (i=1; i<=size; i++) { bases[i] += bases[i-1]; } base = bases[rank]*dof; /* allocate the base parallel and sequential vectors */ dd->Nlocal = x*y*dof; ierr = VecCreateMPIWithArray(comm,dof,dd->Nlocal,PETSC_DECIDE,0,&global); CHKERRQ(ierr); dd->nlocal = (Xe-Xs)*(Ye-Ys)*dof; ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,dof,dd->nlocal,0,&local); CHKERRQ(ierr); /* generate appropriate vector scatters */ /* local to global inserts non-ghost point region into global */ ierr = VecGetOwnershipRange(global,&start,&end); CHKERRQ(ierr); ierr = ISCreateStride(comm,x*y*dof,start,1,&to); CHKERRQ(ierr); ierr = PetscMalloc(x*y*sizeof(PetscInt),&idx); CHKERRQ(ierr); left = xs - Xs; right = left + x; down = ys - Ys; up = down + y; count = 0; for (i=down; i<up; i++) { for (j=left; j<right; j++) { idx[count++] = i*(Xe-Xs) + j; } } ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&from); CHKERRQ(ierr); ierr = VecScatterCreate(local,from,global,to,<og); CHKERRQ(ierr); ierr = PetscLogObjectParent(dd,ltog); CHKERRQ(ierr); ierr = ISDestroy(&from); CHKERRQ(ierr); ierr = ISDestroy(&to); CHKERRQ(ierr); /* global to local must include ghost points within the domain, but not ghost points outside the domain that aren't periodic */ if (stencil_type == DMDA_STENCIL_BOX) { count = (IXe-IXs)*(IYe-IYs); ierr = PetscMalloc(count*sizeof(PetscInt),&idx); CHKERRQ(ierr); left = IXs - Xs; right = left + (IXe-IXs); down = IYs - Ys; up = down + (IYe-IYs); count = 0; for (i=down; i<up; i++) { for (j=left; j<right; j++) { idx[count++] = j + i*(Xe-Xs); } } ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&to); CHKERRQ(ierr); } else { /* must drop into cross shape region */ /* ---------| | top | |--- ---| up | middle | | | ---- ---- down | bottom | ----------- Xs xs xe Xe */ count = (ys-IYs)*x + y*(IXe-IXs) + (IYe-ye)*x; ierr = PetscMalloc(count*sizeof(PetscInt),&idx); CHKERRQ(ierr); left = xs - Xs; right = left + x; down = ys - Ys; up = down + y; count = 0; /* bottom */ for (i=(IYs-Ys); i<down; i++) { for (j=left; j<right; j++) { idx[count++] = j + i*(Xe-Xs); } } /* middle */ for (i=down; i<up; i++) { for (j=(IXs-Xs); j<(IXe-Xs); j++) { idx[count++] = j + i*(Xe-Xs); } } /* top */ for (i=up; i<up+IYe-ye; i++) { for (j=left; j<right; j++) { idx[count++] = j + i*(Xe-Xs); } } ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&to); CHKERRQ(ierr); } /* determine who lies on each side of us stored in n6 n7 n8 n3 n5 n0 n1 n2 */ /* Assume the Non-Periodic Case */ n1 = rank - m; if (rank % m) { n0 = n1 - 1; } else { n0 = -1; } if ((rank+1) % m) { n2 = n1 + 1; n5 = rank + 1; n8 = rank + m + 1; if (n8 >= m*n) n8 = -1; } else { n2 = -1; n5 = -1; n8 = -1; } if (rank % m) { n3 = rank - 1; n6 = n3 + m; if (n6 >= m*n) n6 = -1; } else { n3 = -1; n6 = -1; } n7 = rank + m; if (n7 >= m*n) n7 = -1; if (bx == DMDA_BOUNDARY_PERIODIC && by == DMDA_BOUNDARY_PERIODIC) { /* Modify for Periodic Cases */ /* Handle all four corners */ if ((n6 < 0) && (n7 < 0) && (n3 < 0)) n6 = m-1; if ((n8 < 0) && (n7 < 0) && (n5 < 0)) n8 = 0; if ((n2 < 0) && (n5 < 0) && (n1 < 0)) n2 = size-m; if ((n0 < 0) && (n3 < 0) && (n1 < 0)) n0 = size-1; /* Handle Top and Bottom Sides */ if (n1 < 0) n1 = rank + m * (n-1); if (n7 < 0) n7 = rank - m * (n-1); if ((n3 >= 0) && (n0 < 0)) n0 = size - m + rank - 1; if ((n3 >= 0) && (n6 < 0)) n6 = (rank%m)-1; if ((n5 >= 0) && (n2 < 0)) n2 = size - m + rank + 1; if ((n5 >= 0) && (n8 < 0)) n8 = (rank%m)+1; /* Handle Left and Right Sides */ if (n3 < 0) n3 = rank + (m-1); if (n5 < 0) n5 = rank - (m-1); if ((n1 >= 0) && (n0 < 0)) n0 = rank-1; if ((n1 >= 0) && (n2 < 0)) n2 = rank-2*m+1; if ((n7 >= 0) && (n6 < 0)) n6 = rank+2*m-1; if ((n7 >= 0) && (n8 < 0)) n8 = rank+1; } else if (by == DMDA_BOUNDARY_PERIODIC) { /* Handle Top and Bottom Sides */ if (n1 < 0) n1 = rank + m * (n-1); if (n7 < 0) n7 = rank - m * (n-1); if ((n3 >= 0) && (n0 < 0)) n0 = size - m + rank - 1; if ((n3 >= 0) && (n6 < 0)) n6 = (rank%m)-1; if ((n5 >= 0) && (n2 < 0)) n2 = size - m + rank + 1; if ((n5 >= 0) && (n8 < 0)) n8 = (rank%m)+1; } else if (bx == DMDA_BOUNDARY_PERIODIC) { /* Handle Left and Right Sides */ if (n3 < 0) n3 = rank + (m-1); if (n5 < 0) n5 = rank - (m-1); if ((n1 >= 0) && (n0 < 0)) n0 = rank-1; if ((n1 >= 0) && (n2 < 0)) n2 = rank-2*m+1; if ((n7 >= 0) && (n6 < 0)) n6 = rank+2*m-1; if ((n7 >= 0) && (n8 < 0)) n8 = rank+1; } ierr = PetscMalloc(9*sizeof(PetscInt),&dd->neighbors); CHKERRQ(ierr); dd->neighbors[0] = n0; dd->neighbors[1] = n1; dd->neighbors[2] = n2; dd->neighbors[3] = n3; dd->neighbors[4] = rank; dd->neighbors[5] = n5; dd->neighbors[6] = n6; dd->neighbors[7] = n7; dd->neighbors[8] = n8; if (stencil_type == DMDA_STENCIL_STAR) { /* save corner processor numbers */ sn0 = n0; sn2 = n2; sn6 = n6; sn8 = n8; n0 = n2 = n6 = n8 = -1; } ierr = PetscMalloc((Xe-Xs)*(Ye-Ys)*sizeof(PetscInt),&idx); CHKERRQ(ierr); ierr = PetscLogObjectMemory(da,(Xe-Xs)*(Ye-Ys)*sizeof(PetscInt)); CHKERRQ(ierr); nn = 0; xbase = bases[rank]; for (i=1; i<=s_y; i++) { if (n0 >= 0) { /* left below */ x_t = lx[n0 % m]; y_t = ly[(n0/m)]; s_t = bases[n0] + x_t*y_t - (s_y-i)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } if (n1 >= 0) { /* directly below */ x_t = x; y_t = ly[(n1/m)]; s_t = bases[n1] + x_t*y_t - (s_y+1-i)*x_t; for (j=0; j<x_t; j++) idx[nn++] = s_t++; } else if (by == DMDA_BOUNDARY_MIRROR) { for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(s_y - i + 1) + j; } if (n2 >= 0) { /* right below */ x_t = lx[n2 % m]; y_t = ly[(n2/m)]; s_t = bases[n2] + x_t*y_t - (s_y+1-i)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } } for (i=0; i<y; i++) { if (n3 >= 0) { /* directly left */ x_t = lx[n3 % m]; /* y_t = y; */ s_t = bases[n3] + (i+1)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (bx == DMDA_BOUNDARY_MIRROR) { for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*i + s_x - j; } for (j=0; j<x; j++) idx[nn++] = xbase++; /* interior */ if (n5 >= 0) { /* directly right */ x_t = lx[n5 % m]; /* y_t = y; */ s_t = bases[n5] + (i)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (bx == DMDA_BOUNDARY_MIRROR) { for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*(i + 1) - 2 - j; } } for (i=1; i<=s_y; i++) { if (n6 >= 0) { /* left above */ x_t = lx[n6 % m]; /* y_t = ly[(n6/m)]; */ s_t = bases[n6] + (i)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } if (n7 >= 0) { /* directly above */ x_t = x; /* y_t = ly[(n7/m)]; */ s_t = bases[n7] + (i-1)*x_t; for (j=0; j<x_t; j++) idx[nn++] = s_t++; } else if (by == DMDA_BOUNDARY_MIRROR) { for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(y - i - 1) + j; } if (n8 >= 0) { /* right above */ x_t = lx[n8 % m]; /* y_t = ly[(n8/m)]; */ s_t = bases[n8] + (i-1)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } } ierr = ISCreateBlock(comm,dof,nn,idx,PETSC_COPY_VALUES,&from); CHKERRQ(ierr); ierr = VecScatterCreate(global,from,local,to,>ol); CHKERRQ(ierr); ierr = PetscLogObjectParent(da,gtol); CHKERRQ(ierr); ierr = ISDestroy(&to); CHKERRQ(ierr); ierr = ISDestroy(&from); CHKERRQ(ierr); if (stencil_type == DMDA_STENCIL_STAR) { n0 = sn0; n2 = sn2; n6 = sn6; n8 = sn8; } if (((stencil_type == DMDA_STENCIL_STAR) || (bx && bx != DMDA_BOUNDARY_PERIODIC) || (by && by != DMDA_BOUNDARY_PERIODIC))) { /* Recompute the local to global mappings, this time keeping the information about the cross corner processor numbers and any ghosted but not periodic indices. */ nn = 0; xbase = bases[rank]; for (i=1; i<=s_y; i++) { if (n0 >= 0) { /* left below */ x_t = lx[n0 % m]; y_t = ly[(n0/m)]; s_t = bases[n0] + x_t*y_t - (s_y-i)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (xs-Xs > 0 && ys-Ys > 0) { for (j=0; j<s_x; j++) idx[nn++] = -1; } if (n1 >= 0) { /* directly below */ x_t = x; y_t = ly[(n1/m)]; s_t = bases[n1] + x_t*y_t - (s_y+1-i)*x_t; for (j=0; j<x_t; j++) idx[nn++] = s_t++; } else if (ys-Ys > 0) { if (by == DMDA_BOUNDARY_MIRROR) { for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(s_y - i + 1) + j; } else { for (j=0; j<x; j++) idx[nn++] = -1; } } if (n2 >= 0) { /* right below */ x_t = lx[n2 % m]; y_t = ly[(n2/m)]; s_t = bases[n2] + x_t*y_t - (s_y+1-i)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (Xe-xe> 0 && ys-Ys > 0) { for (j=0; j<s_x; j++) idx[nn++] = -1; } } for (i=0; i<y; i++) { if (n3 >= 0) { /* directly left */ x_t = lx[n3 % m]; /* y_t = y; */ s_t = bases[n3] + (i+1)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (xs-Xs > 0) { if (bx == DMDA_BOUNDARY_MIRROR) { for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*i + s_x - j; } else { for (j=0; j<s_x; j++) idx[nn++] = -1; } } for (j=0; j<x; j++) idx[nn++] = xbase++; /* interior */ if (n5 >= 0) { /* directly right */ x_t = lx[n5 % m]; /* y_t = y; */ s_t = bases[n5] + (i)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (Xe-xe > 0) { if (bx == DMDA_BOUNDARY_MIRROR) { for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*(i + 1) - 2 - j; } else { for (j=0; j<s_x; j++) idx[nn++] = -1; } } } for (i=1; i<=s_y; i++) { if (n6 >= 0) { /* left above */ x_t = lx[n6 % m]; /* y_t = ly[(n6/m)]; */ s_t = bases[n6] + (i)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (xs-Xs > 0 && Ye-ye > 0) { for (j=0; j<s_x; j++) idx[nn++] = -1; } if (n7 >= 0) { /* directly above */ x_t = x; /* y_t = ly[(n7/m)]; */ s_t = bases[n7] + (i-1)*x_t; for (j=0; j<x_t; j++) idx[nn++] = s_t++; } else if (Ye-ye > 0) { if (by == DMDA_BOUNDARY_MIRROR) { for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(y - i - 1) + j; } else { for (j=0; j<x; j++) idx[nn++] = -1; } } if (n8 >= 0) { /* right above */ x_t = lx[n8 % m]; /* y_t = ly[(n8/m)]; */ s_t = bases[n8] + (i-1)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (Xe-xe > 0 && Ye-ye > 0) { for (j=0; j<s_x; j++) idx[nn++] = -1; } } } /* Set the local to global ordering in the global vector, this allows use of VecSetValuesLocal(). */ ierr = ISCreateBlock(comm,dof,nn,idx,PETSC_OWN_POINTER,<ogis); CHKERRQ(ierr); ierr = PetscMalloc(nn*dof*sizeof(PetscInt),&idx_cpy); CHKERRQ(ierr); ierr = PetscLogObjectMemory(da,nn*dof*sizeof(PetscInt)); CHKERRQ(ierr); ierr = ISGetIndices(ltogis, &idx_full); CHKERRQ(ierr); ierr = PetscMemcpy(idx_cpy,idx_full,nn*dof*sizeof(PetscInt)); CHKERRQ(ierr); ierr = ISRestoreIndices(ltogis, &idx_full); CHKERRQ(ierr); ierr = ISLocalToGlobalMappingCreateIS(ltogis,&da->ltogmap); CHKERRQ(ierr); ierr = PetscLogObjectParent(da,da->ltogmap); CHKERRQ(ierr); ierr = ISDestroy(<ogis); CHKERRQ(ierr); ierr = ISLocalToGlobalMappingBlock(da->ltogmap,dd->w,&da->ltogmapb); CHKERRQ(ierr); ierr = PetscLogObjectParent(da,da->ltogmap); CHKERRQ(ierr); ierr = PetscFree2(bases,ldims); CHKERRQ(ierr); dd->m = m; dd->n = n; /* note petsc expects xs/xe/Xs/Xe to be multiplied by #dofs in many places */ dd->xs = xs*dof; dd->xe = xe*dof; dd->ys = ys; dd->ye = ye; dd->zs = 0; dd->ze = 1; dd->Xs = Xs*dof; dd->Xe = Xe*dof; dd->Ys = Ys; dd->Ye = Ye; dd->Zs = 0; dd->Ze = 1; ierr = VecDestroy(&local); CHKERRQ(ierr); ierr = VecDestroy(&global); CHKERRQ(ierr); dd->gtol = gtol; dd->ltog = ltog; dd->idx = idx_cpy; dd->Nl = nn*dof; dd->base = base; da->ops->view = DMView_DA_2d; dd->ltol = NULL; dd->ao = NULL; PetscFunctionReturn(0); }
static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc) { PC_BDDC *pcbddc=(PC_BDDC*)pc->data; PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; PetscScalar *matdata,*matdata2; PetscInt i,max_subset_size,cum,cum2; const PetscInt *idxs; PetscBool newsetup = PETSC_FALSE; PetscErrorCode ierr; PetscFunctionBegin; if (!sub_schurs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Missing PCBDDCSubSchurs"); if (!sub_schurs->n_subs) PetscFunctionReturn(0); /* Allocate arrays for subproblems */ if (!deluxe_ctx->seq_n) { deluxe_ctx->seq_n = sub_schurs->n_subs; ierr = PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);CHKERRQ(ierr); newsetup = PETSC_TRUE; } else if (deluxe_ctx->seq_n != sub_schurs->n_subs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %D is different from the sub_schurs %D",deluxe_ctx->seq_n,sub_schurs->n_subs); /* the change of basis is just a reference to sub_schurs->change (if any) */ deluxe_ctx->change = sub_schurs->change; deluxe_ctx->change_with_qr = sub_schurs->change_with_qr; /* Create objects for deluxe */ max_subset_size = 0; for (i=0;i<sub_schurs->n_subs;i++) { PetscInt subset_size; ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); max_subset_size = PetscMax(subset_size,max_subset_size); } if (newsetup) { ierr = PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);CHKERRQ(ierr); } cum = cum2 = 0; ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr); ierr = MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr); ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr); for (i=0;i<deluxe_ctx->seq_n;i++) { PetscInt subset_size; ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); if (newsetup) { IS sub; /* work vectors */ ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);CHKERRQ(ierr); /* scatters */ ierr = ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);CHKERRQ(ierr); ierr = VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);CHKERRQ(ierr); ierr = ISDestroy(&sub);CHKERRQ(ierr); } /* S_E_j */ ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); /* \sum_k S^k_E_j */ ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); ierr = MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_SPD,sub_schurs->is_posdef);CHKERRQ(ierr); ierr = MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_HERMITIAN,sub_schurs->is_hermitian);CHKERRQ(ierr); if (sub_schurs->is_hermitian) { ierr = MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);CHKERRQ(ierr); } else { ierr = MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);CHKERRQ(ierr); } if (pcbddc->deluxe_singlemat) { Mat X,Y; if (!sub_schurs->is_hermitian) { ierr = MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);CHKERRQ(ierr); } else { ierr = PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); X = deluxe_ctx->seq_mat[i]; } ierr = MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);CHKERRQ(ierr); if (!sub_schurs->is_hermitian) { ierr = PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr); } else { ierr = MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);CHKERRQ(ierr); } ierr = MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);CHKERRQ(ierr); ierr = MatDestroy(&deluxe_ctx->seq_mat[i]);CHKERRQ(ierr); ierr = MatDestroy(&X);CHKERRQ(ierr); if (deluxe_ctx->change) { Mat C,CY; if (!deluxe_ctx->change_with_qr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only QR based change of basis"); ierr = KSPGetOperators(deluxe_ctx->change[i],&C,NULL);CHKERRQ(ierr); ierr = MatMatMult(C,Y,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&CY);CHKERRQ(ierr); ierr = MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr); ierr = MatDestroy(&CY);CHKERRQ(ierr); } ierr = MatTranspose(Y,MAT_INPLACE_MATRIX,&Y);CHKERRQ(ierr); deluxe_ctx->seq_mat[i] = Y; } cum += subset_size; cum2 += subset_size*subset_size; } ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);CHKERRQ(ierr); ierr = MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);CHKERRQ(ierr); ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);CHKERRQ(ierr); if (pcbddc->deluxe_singlemat) { deluxe_ctx->change = NULL; deluxe_ctx->change_with_qr = PETSC_FALSE; } if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) { for (i=0;i<deluxe_ctx->seq_n;i++) { if (newsetup) { PC pc; ierr = KSPGetPC(deluxe_ctx->change[i],&pc);CHKERRQ(ierr); ierr = PCSetType(pc,PCLU);CHKERRQ(ierr); ierr = KSPSetFromOptions(deluxe_ctx->change[i]);CHKERRQ(ierr); } ierr = KSPSetUp(deluxe_ctx->change[i]);CHKERRQ(ierr); } } PetscFunctionReturn(0); }
int SmoothingRL(Vec *x, Vec *y, PetscScalar *cacheScalar, PetscInt *cacheInt , VecScatter *ctx,PetscInt n, PetscInt Istart, PetscInt Iend){ PetscInt rank,size; PetscScalar C0,C1,C2; PetscErrorCode ierr; PetscInt n2,i, j, k, localsizex; PetscInt nvec; Vec xbc,ybc,lvecx,lvecy; PetscScalar *xbcpt,*ybcpt,**lvecptx,**lvecpty; PetscScalar **xbcpt2,**ybcpt2; PetscInt *bcISarray; PetscInt left,right; PetscInt *bcindI,*bcindJ; nvec = 2; IS ISfrom[nvec],ISto[nvec]; VecScatter ctxt[nvec]; Vec xvec[nvec]; MPI_Comm_size(PETSC_COMM_WORLD,&size); MPI_Comm_rank(PETSC_COMM_WORLD,&rank); localsizex = Iend-Istart; C0 = 1.0/8; C1 = 1.0/4; C2 = 3.0/16; n2 = (PetscInt)(n*0.5); xbcpt = cacheScalar; ybcpt = cacheScalar; bcindI = cacheInt; bcindJ = cacheInt+ 4*n2; LargeVecCreate(x,nvec,xvec); if(rank==0){left = n-1;} else{left = Istart-1;} if(rank==size-1){right = 0;} else{right = Iend;} for(i=0;i<n2;i++){ *(bcindI+i) = left-1; *(bcindJ+i) = i; } for(i=0;i<n2;i++){ *(bcindI+n2+i) = left; *(bcindJ+n2+i) = i; } for(i=0;i<n2;i++){ *(bcindI+n2*2+i) = right; *(bcindJ+n2*2+i) = i; } for(i=0;i<n2;i++){ *(bcindI+n2*3+i) = right+1; *(bcindJ+n2*3+i) = i; } VecCreateSeqWithArray(MPI_COMM_SELF,4*n2,cacheScalar,&xbc); ISCreateGeneralWithIJ(MPI_COMM_SELF,x,xvec,nvec,n2,4*n2, bcindI, bcindJ,ISfrom, ISto); LargeVecScatterCreate(xvec,ISfrom,xbc,ISto ,ctxt,nvec); ISArrayDestroy(ISfrom,nvec); ISArrayDestroy(ISto,nvec); LargeVecScatterBeginEnd(xvec,xbc,INSERT_VALUES,SCATTER_FORWARD,ctxt,nvec); VecScatterArrayDestroy(ctxt,nvec); VecGetArray2d(*x,localsizex,n2,0,0,&lvecptx); VecGetArray2d(*y,localsizex,n2,0,0,&lvecpty); VecGetArray2d(xbc,4,n2,0,0,&xbcpt2); for(j=0;j<n2;j++){ for(i=2;i<localsizex-2;i++){ lvecpty[i][j] = C0*lvecptx[i][j] +C1*lvecptx[i-1][j] +C1*lvecptx[i+1][j] +C2*lvecptx[i-2][j]+C2*lvecptx[i+2][j]; } } for(j=0;j<n2;j++){ lvecpty[0][j] = C0*lvecptx[0][j] +C1*xbcpt2[1][j] +C1*lvecptx[1][j] +C2*xbcpt2[0][j]+C2*lvecptx[2][j]; lvecpty[1][j] = C0*lvecptx[1][j] +C1*lvecptx[0][j] +C1*lvecptx[2][j] +C2*xbcpt2[1][j]+C2*lvecptx[3][j]; lvecpty[localsizex-1][j] = C0*lvecptx[localsizex-1][j] +C1*lvecptx[localsizex-2][j] +C1*xbcpt2[2][j] +C2*lvecptx[localsizex-3][j] +C2*xbcpt2[3][j]; lvecpty[localsizex-2][j] = C0*lvecptx[localsizex-2][j] +C1*lvecptx[localsizex-3][j] +C1*lvecptx[localsizex-1][j] +C2*lvecptx[localsizex-4][j] +C2*xbcpt2[2][j]; } VecRestoreArray2d(xbc,4,n2,0,0,&xbcpt2); VecDestroy(xbc); ////////////////////////////////////////////////////////////////////////////////// k = 0; for(i=Istart;i<Iend;i++){ *(bcindI+k) = n-i-1; *(bcindJ+k) = 1; k++; } for(i=Istart;i<Iend;i++){ *(bcindI+k) = n-i-1; *(bcindJ+k) = 0; k++; } for(i=Istart;i<Iend;i++){ *(bcindI+k) = n-i-1; *(bcindJ+k) = n2-1; k++; } for(i=Istart;i<Iend;i++){ *(bcindI+k) = n-i-1; *(bcindJ+k) = n2-2; k++; } VecCreateSeqWithArray(MPI_COMM_SELF,4*localsizex,cacheScalar,&ybc); ISCreateGeneralWithIJ(MPI_COMM_SELF,*x,xvec,nvec,n2,4*localsizex, bcindI, bcindJ,ISfrom, ISto); LargeVecScatterCreate(xvec,ISfrom,ybc,ISto ,ctxt,nvec); ISArrayDestroy(ISfrom,nvec); ISArrayDestroy(ISto,nvec); LargeVecScatterBeginEnd(xvec,ybc,INSERT_VALUES,SCATTER_FORWARD,ctxt,nvec); VecScatterArrayDestroy(ctxt,nvec); VecGetArray2d(ybc,4,localsizex,0,0,&ybcpt2); for(j=2;j<n2-2;j++){ for(i=0;i<localsizex;i++){ lvecptx[i][j] = C0*lvecpty[i][j] +C1*lvecpty[i][j-1] +C1*lvecpty[i][j+1] +C2*lvecpty[i][j-2] +C2*lvecpty[i][j+2]; } } for(i=0;i<localsizex;i++){ lvecptx[i][0] = C0*lvecpty[i][0] +C1*ybcpt2[1][i] +C1*lvecpty[i][1] +C2*ybcpt2[0][i] +C2*lvecpty[i][2]; lvecptx[i][1] = C0*lvecpty[i][1] +C1*lvecpty[i][0] +C1*lvecpty[i][2] +C2*ybcpt2[1][i] +C2*lvecpty[i][3]; lvecptx[i][n2-2] = C0*lvecpty[i][n2-2] +C1*lvecpty[i][n2-3] +C1*lvecpty[i][n2-1] +C2*lvecpty[i][n2-4] +C2*ybcpt2[2][i]; lvecptx[i][n2-1] = C0*lvecpty[i][n2-1] +C1*lvecpty[i][n2-2] +C1*ybcpt2[2][i] +C2*lvecpty[i][n2-3] +C2*ybcpt2[3][i]; } VecRestoreArray2d(ybc,4,localsizex,0,0,&ybcpt2); VecDestroy(ybc); ///////////////////////////////////////////////////////////////////////////////// VecRestoreArray2d(*x,localsizex,n2,0,0,&lvecptx); VecRestoreArray2d(*y,localsizex,n2,0,0,&lvecpty); VecArrayDestroy(xvec,nvec); return 0; }
int Smoothing(Vec *x, Vec *y, PetscScalar *cacheScalar, PetscInt *cacheInt , VecScatter *ctx,PetscInt n,DA myDA, PetscInt Istart, PetscInt Iend){ PetscScalar C0,C1,C2; PetscErrorCode ierr; PetscInt localsizex; PetscInt n2,i, j, k; Vec lvecx,lvecy; PetscScalar **lvecptx,**lvecpty; Vec bcvec; IS isbc; //VecScatter ctx; PetscScalar *bcpt1,*bcpt2,*bcpt3,*bcpt4; localsizex = Iend-Istart; C0 = 1.0/8; C1 = 1*1.0/4; C2 = 1*3.0/16; n2 = (PetscInt)(n*0.5); DACreateLocalVector(myDA,&lvecx); DACreateLocalVector(myDA,&lvecy); DAGlobalToLocalBegin(myDA,*x,INSERT_VALUES,lvecx); DAGlobalToLocalEnd(myDA,*x,INSERT_VALUES,lvecx); VecGetArray2d(lvecx,localsizex+4,n2+4,0,0,&lvecptx); VecGetArray2d(lvecy,localsizex+4,n2+4,0,0,&lvecpty); // X direction smoothing for(j=0;j<n2+4;j++){ for(i=2;i<localsizex+2;i++){ lvecpty[i][j] = C0*lvecptx[i][j] +C1*lvecptx[i-1][j] +C1*lvecptx[i+1][j] +C2*lvecptx[i-2][j]+C2*lvecptx[i+2][j]; } } VecCreateSeqWithArray(MPI_COMM_SELF,localsizex*4,cacheScalar,&bcvec); VecScatterBegin(*x,bcvec,INSERT_VALUES,SCATTER_FORWARD,*ctx); VecScatterEnd(*x,bcvec,INSERT_VALUES,SCATTER_FORWARD,*ctx); bcpt1 = cacheScalar; bcpt2 = cacheScalar+localsizex; bcpt3 = cacheScalar+localsizex*2; bcpt4 = cacheScalar+localsizex*3; k= 0; for(i=2;i<localsizex+2;i++){ lvecpty[i][0]= *(bcpt3+k); lvecpty[i][1]= *(bcpt4+k); lvecpty[i][n2+3]= *(bcpt2+k); lvecpty[i][n2+2]= *(bcpt1+k); k++; } // Y direction smoothing for(j=2;j<n2+2;j++){ for(i=2;i<localsizex+2;i++){ lvecptx[i][j] = C0*lvecpty[i][j] +C1*lvecpty[i][j-1] +C1*lvecpty[i][j+1] +C2*lvecpty[i][j-2] +C2*lvecpty[i][j+2]; } } VecRestoreArray2d(lvecy,localsizex+4,n2+4,0,0,&lvecpty); VecRestoreArray2d(lvecx,localsizex+4,n2+4,0,0,&lvecptx); DALocalToGlobal(myDA,lvecx,INSERT_VALUES,*x); VecDestroy(bcvec); VecDestroy(lvecx); VecDestroy(lvecy); return 0; }
/* Perform backward averaging iteration x : data y : next iteration pmax : number of points to be cached Istart, Iend: column cacheInt : (2*npt+1)*pmax*sizeof(PetscInt) cacheScalar : ( npt+1)*pmax*sizeof(PetscScalar) Tzu-Chen Liang 11-25-2006 */ int BackwardAverage(Vec *x, Vec *y, PetscInt *cacheInt, PetscScalar *cacheScalar, PetscInt n, PetscInt npt, PetscInt pmax, PetscInt Istart, PetscInt Iend,PetscScalar c){ PetscErrorCode ierr; PetscInt i, j, k=0, pi, pj, n2,m, puse, pgrid; PetscInt localsizex,localsizey, rowcount=0; PetscInt *idp, *NzindJ, *NzindI; PetscScalar dx,dy,dx2,dy2,CX,CY; PetscScalar *pty,*pty0; Vec y0; PetscInt nvec = 2; Vec xvec[nvec]; IS ISfrom[nvec],ISto[nvec]; VecScatter ctxt[nvec]; n2 = (PetscInt)(n*0.5); dx = 1.0/n; dy = 1.0/n; dx2 = dx/2-dx/1e6; dy2 = dy/2-dy/1e6; LargeVecCreate(x,nvec,xvec); NzindI = cacheInt; //pmax NzindJ = cacheInt+pmax; //pmax idp = cacheInt; //pmax pty0 = cacheScalar; //pmax localsizex = Iend-Istart; localsizey = (PetscInt)(pmax*1.0/(localsizex+1))-3; if(localsizey>n2){localsizey =n2;} ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,(localsizex+1)*(localsizey+1),pty0,&y0); while(rowcount<n2){ if (n2-rowcount<=localsizey){localsizey =n2-rowcount;} puse = localsizex*localsizey; pgrid = (localsizex+1)*(localsizey+1); k= 0; for(i=Istart;i<Iend+1;i++){ for(j=rowcount;j<rowcount+localsizey+1;j++){ CX = (PetscScalar)(i*dx); CY = (PetscScalar)(j*dy); InverseStandardMap(&CX,&CY,c); //InverseModifiedArnoldsCatMap(&CX,&CY); pi = (PetscInt)floor(CX*(PetscScalar)n); pj = (PetscInt)floor(CY*(PetscScalar)n); if(pj>=n2) {SkewSymmetricPoint(&pi, &pj, n);} *(NzindI+k) = pi; *(NzindJ+k) = pj; k++; } } ierr = VecDestroy(y0);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,pgrid,pty0,&y0);CHKERRQ(ierr); ISCreateGeneralWithIJ(MPI_COMM_SELF,*x,xvec,nvec,n2,pgrid, NzindI, NzindJ,ISfrom, ISto); LargeVecScatterCreate(xvec,ISfrom,y0,ISto ,ctxt,nvec); ISArrayDestroy(ISfrom,nvec); ISArrayDestroy(ISto,nvec); LargeVecScatterBeginEnd(xvec,y0,INSERT_VALUES,SCATTER_FORWARD,ctxt,nvec); VecScatterArrayDestroy(ctxt,nvec); ierr = VecGetArray(y0,&pty0);CHKERRQ(ierr); ierr = VecGetArray(*y,&pty);CHKERRQ(ierr); m = 0; for(i=0;i<localsizex;i++){ for(j=0;j<localsizey;j++){ *(pty+i*n2+j+rowcount) = (*(pty0+i*(localsizey+1)+j)+ *(pty0+i*(localsizey+1)+j+1)+ *(pty0+(i+1)*(localsizey+1)+j)+ *(pty0+(i+1)*(localsizey+1)+j+1))/4; m++; } } VecRestoreArray(y0,&pty0); VecRestoreArray(*y,&pty); rowcount = rowcount + localsizey; } ierr = VecDestroy(y0);CHKERRQ(ierr); VecArrayDestroy(xvec,nvec); return 0; }
/* A new Strategy can handle large size problem (more than 4G variables) */ int BackwardAverageRL(Vec *x, Vec *y, PetscInt *cacheInt, PetscScalar *cacheScalar, PetscInt n, PetscInt npt, PetscInt pmax, PetscInt Istart, PetscInt Iend,PetscScalar c){ PetscInt rank,size; PetscErrorCode ierr; PetscInt i, j, k=0, pi, pj, n2,n4 ,m, puse, pgrid,lx; PetscInt localsizex,localsizey, rowcount=0; PetscInt k1,k2,pgrid1,pgrid2; PetscInt *idy,*idp, *NzindJ; PetscScalar dx,dy,dx2,dy2,CX,CY; PetscScalar *pty, *pty0; IS isx1,isx2,isy1,isy2; VecScatter ctx1,ctx2; Vec y0; Vec x1,x2; PetscScalar *ptx1,*ptx2; PetscInt size1,size2,col1,col2; MPI_Comm_size(PETSC_COMM_WORLD,&size); MPI_Comm_rank(PETSC_COMM_WORLD,&rank); n2 = (PetscInt)(n*0.5); n4 = (PetscInt)(n*0.25); dx = 1.0/n; dy = 1.0/n; dx2 = dx/2-dx/1e6; dy2 = dy/2-dy/1e6; NzindJ = cacheInt; //pmax idp = cacheInt; //pmax idy = cacheInt + pmax; pty0 = cacheScalar ; //pmax localsizex = Iend-Istart; localsizey = (PetscInt)(pmax*1.0/(localsizex+1))-2; if(localsizey>n2){localsizey =n2;} ierr = VecGetArray(*x,&ptx1);CHKERRQ(ierr); ptx2 = ptx1; if(rank< size*0.5){lx = localsizex*n2;}else{lx =0;} VecCreateMPIWithArray(PETSC_COMM_WORLD,lx,PETSC_DETERMINE,ptx1,&x1); if(rank< size*0.5){lx = 0;}else{lx = localsizex*n2; } VecCreateMPIWithArray(PETSC_COMM_WORLD,lx,PETSC_DETERMINE,ptx2,&x2); VecGetSize(x1,&size1); VecGetSize(x2,&size2); col1 = (PetscInt)(size1*1.0/n2); col2 = (PetscInt)(size2*1.0/n2); ierr = VecGetArray(*y,&pty);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,(localsizex+1)*(localsizey+1),pty0,&y0); while(rowcount<n2){ if (n2-rowcount<=localsizey){localsizey =n2-rowcount;} puse = localsizex*localsizey; pgrid = (localsizex+1)*(localsizey+1); k= 0; k1=0; k2=0; for(i=Istart;i<Iend+1;i++){ for(j=rowcount;j<rowcount+localsizey+1;j++){ CX = (PetscScalar)(i*dx); CY = (PetscScalar)(j*dy); InverseStandardMap(&CX,&CY,c); pi = (PetscInt)floor(CX*n); pj = (PetscInt)floor(CY*n); if(pj>=n2) {SkewSymmetricPoint(&pi, &pj, n);} if(pi<col1){ *(NzindJ+k1) = (PetscInt)(n2*pi + pj); *(idy+k1) = k; k1++; }else{ *(NzindJ+pgrid-k2-1) = (PetscInt)(n2*(pi-col1)+pj); *(idy+pgrid-k2-1) = k; k2++; } k++; } } pgrid1 = k1; pgrid2 = k2; ierr = ISCreateGeneralWithArray(PETSC_COMM_SELF,pgrid1,NzindJ,&isx1);CHKERRQ(ierr); ierr = ISCreateGeneralWithArray(PETSC_COMM_SELF,pgrid2,NzindJ+pgrid1,&isx2);CHKERRQ(ierr); ierr = ISCreateGeneralWithArray(PETSC_COMM_SELF,pgrid1,idy,&isy1);CHKERRQ(ierr); ierr = ISCreateGeneralWithArray(PETSC_COMM_SELF,pgrid2,idy+pgrid1,&isy2);CHKERRQ(ierr); ierr = VecDestroy(y0);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,pgrid,pty0,&y0);CHKERRQ(ierr); ierr = VecScatterCreate(x1,isx1,y0,isy1,&ctx1);CHKERRQ(ierr); ierr = VecScatterCreate(x2,isx2,y0,isy2,&ctx2);CHKERRQ(ierr); ierr = VecScatterBegin(x1,y0,INSERT_VALUES,SCATTER_FORWARD,ctx1);CHKERRQ(ierr); ierr = VecScatterEnd(x1,y0,INSERT_VALUES,SCATTER_FORWARD,ctx1);CHKERRQ(ierr); ierr = VecScatterBegin(x2,y0,INSERT_VALUES,SCATTER_FORWARD,ctx2);CHKERRQ(ierr); ierr = VecScatterEnd(x2,y0,INSERT_VALUES,SCATTER_FORWARD,ctx2);CHKERRQ(ierr); ierr = VecScatterDestroy(ctx1); ierr = VecScatterDestroy(ctx2); ierr = VecGetArray(y0,&pty0);CHKERRQ(ierr); m = 0; for(i=0;i<localsizex;i++){ for(j=0;j<localsizey;j++){ *(pty+i*n2+j+rowcount) = (*(pty0+i*(localsizey+1)+j)+ *(pty0+i*(localsizey+1)+j+1)+ *(pty0+(i+1)*(localsizey+1)+j)+ *(pty0+(i+1)*(localsizey+1)+j+1))/4; m++; } } VecRestoreArray(y0,&pty0); VecRestoreArray(*y,&pty); rowcount = rowcount + localsizey; } VecDestroy(x1); VecDestroy(x2); return 0; }