/*@C DMCompositeGetISLocalToGlobalMappings - gets an ISLocalToGlobalMapping for each DM in the DMComposite, maps to the composite global space Collective on DM Input Parameter: . dm - the packer object Output Parameters: . ltogs - the individual mappings for each packed vector. Note that this includes all the ghost points that individual ghosted DMDA's may have. Level: advanced Notes: Each entry of ltogs should be destroyed with ISLocalToGlobalMappingDestroy(), the ltogs array should be freed with PetscFree(). .seealso DMDestroy(), DMCompositeAddDM(), DMCreateGlobalVector(), DMCompositeGather(), DMCompositeCreate(), DMCompositeGetAccess(), DMCompositeScatter(), DMCompositeGetLocalVectors(), DMCompositeRestoreLocalVectors(),DMCompositeGetEntries() @*/ PetscErrorCode DMCompositeGetISLocalToGlobalMappings(DM dm,ISLocalToGlobalMapping **ltogs) { PetscErrorCode ierr; PetscInt i,*idx,n,cnt; struct DMCompositeLink *next; PetscMPIInt rank; DM_Composite *com = (DM_Composite*)dm->data; PetscFunctionBegin; PetscValidHeaderSpecific(dm,DM_CLASSID,1); ierr = DMSetUp(dm);CHKERRQ(ierr); ierr = PetscMalloc((com->nDM)*sizeof(ISLocalToGlobalMapping),ltogs);CHKERRQ(ierr); next = com->next; ierr = MPI_Comm_rank(((PetscObject)dm)->comm,&rank);CHKERRQ(ierr); /* loop over packed objects, handling one at at time */ cnt = 0; while (next) { ISLocalToGlobalMapping ltog; PetscMPIInt size; const PetscInt *suboff,*indices; Vec global; /* Get sub-DM global indices for each local dof */ ierr = DMGetLocalToGlobalMapping(next->dm,<og);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingGetSize(ltog,&n);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingGetIndices(ltog,&indices);CHKERRQ(ierr); ierr = PetscMalloc(n*sizeof(PetscInt),&idx);CHKERRQ(ierr); /* Get the offsets for the sub-DM global vector */ ierr = DMGetGlobalVector(next->dm,&global);CHKERRQ(ierr); ierr = VecGetOwnershipRanges(global,&suboff);CHKERRQ(ierr); ierr = MPI_Comm_size(((PetscObject)global)->comm,&size);CHKERRQ(ierr); /* Shift the sub-DM definition of the global space to the composite global space */ for (i=0; i<n; i++) { PetscInt subi = indices[i],lo = 0,hi = size,t; /* Binary search to find which rank owns subi */ while (hi-lo > 1) { t = lo + (hi-lo)/2; if (suboff[t] > subi) hi = t; else lo = t; } idx[i] = subi - suboff[lo] + next->grstarts[lo]; } ierr = ISLocalToGlobalMappingRestoreIndices(ltog,&indices);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingCreate(((PetscObject)dm)->comm,n,idx,PETSC_OWN_POINTER,&(*ltogs)[cnt]);CHKERRQ(ierr); ierr = DMRestoreGlobalVector(next->dm,&global);CHKERRQ(ierr); next = next->next; cnt++; } PetscFunctionReturn(0); }
/*@C SNESComputeJacobianDefault - Computes the Jacobian using finite differences. Collective on SNES Input Parameters: + x1 - compute Jacobian at this point - ctx - application's function context, as set with SNESSetFunction() Output Parameters: + J - Jacobian matrix (not altered in this routine) - B - newly computed Jacobian matrix to use with preconditioner (generally the same as J) Options Database Key: + -snes_fd - Activates SNESComputeJacobianDefault() . -snes_test_err - Square root of function error tolerance, default square root of machine epsilon (1.e-8 in double, 3.e-4 in single) - -mat_fd_type - Either wp or ds (see MATMFFD_WP or MATMFFD_DS) Notes: This routine is slow and expensive, and is not currently optimized to take advantage of sparsity in the problem. Although SNESComputeJacobianDefault() is not recommended for general use in large-scale applications, It can be useful in checking the correctness of a user-provided Jacobian. An alternative routine that uses coloring to exploit matrix sparsity is SNESComputeJacobianDefaultColor(). Level: intermediate .keywords: SNES, finite differences, Jacobian .seealso: SNESSetJacobian(), SNESComputeJacobianDefaultColor(), MatCreateSNESMF() @*/ PetscErrorCode SNESComputeJacobianDefault(SNES snes,Vec x1,Mat J,Mat B,void *ctx) { Vec j1a,j2a,x2; PetscErrorCode ierr; PetscInt i,N,start,end,j,value,root; PetscScalar dx,*y,*xx,wscale; PetscReal amax,epsilon = PETSC_SQRT_MACHINE_EPSILON; PetscReal dx_min = 1.e-16,dx_par = 1.e-1,unorm; MPI_Comm comm; PetscErrorCode (*eval_fct)(SNES,Vec,Vec)=0; PetscBool assembled,use_wp = PETSC_TRUE,flg; const char *list[2] = {"ds","wp"}; PetscMPIInt size; const PetscInt *ranges; PetscFunctionBegin; ierr = PetscOptionsGetReal(((PetscObject)snes)->prefix,"-snes_test_err",&epsilon,0);CHKERRQ(ierr); eval_fct = SNESComputeFunction; ierr = PetscObjectGetComm((PetscObject)x1,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MatAssembled(B,&assembled);CHKERRQ(ierr); if (assembled) { ierr = MatZeroEntries(B);CHKERRQ(ierr); } if (!snes->nvwork) { snes->nvwork = 3; ierr = VecDuplicateVecs(x1,snes->nvwork,&snes->vwork);CHKERRQ(ierr); ierr = PetscLogObjectParents(snes,snes->nvwork,snes->vwork);CHKERRQ(ierr); } j1a = snes->vwork[0]; j2a = snes->vwork[1]; x2 = snes->vwork[2]; ierr = VecGetSize(x1,&N);CHKERRQ(ierr); ierr = VecGetOwnershipRange(x1,&start,&end);CHKERRQ(ierr); ierr = (*eval_fct)(snes,x1,j1a);CHKERRQ(ierr); ierr = PetscOptionsEList("-mat_fd_type","Algorithm to compute difference parameter","SNESComputeJacobianDefault",list,2,"wp",&value,&flg);CHKERRQ(ierr); if (flg && !value) use_wp = PETSC_FALSE; if (use_wp) { ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); } /* Compute Jacobian approximation, 1 column at a time. x1 = current iterate, j1a = F(x1) x2 = perturbed iterate, j2a = F(x2) */ for (i=0; i<N; i++) { ierr = VecCopy(x1,x2);CHKERRQ(ierr); if (i>= start && i<end) { ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); if (use_wp) dx = 1.0 + unorm; else dx = xx[i-start]; ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); if (PetscAbsScalar(dx) < dx_min) dx = (PetscRealPart(dx) < 0. ? -1. : 1.) * dx_par; dx *= epsilon; wscale = 1.0/dx; ierr = VecSetValues(x2,1,&i,&dx,ADD_VALUES);CHKERRQ(ierr); } else { wscale = 0.0; } ierr = VecAssemblyBegin(x2);CHKERRQ(ierr); ierr = VecAssemblyEnd(x2);CHKERRQ(ierr); ierr = (*eval_fct)(snes,x2,j2a);CHKERRQ(ierr); ierr = VecAXPY(j2a,-1.0,j1a);CHKERRQ(ierr); /* Communicate scale=1/dx_i to all processors */ ierr = VecGetOwnershipRanges(x1,&ranges);CHKERRQ(ierr); root = size; for (j=size-1; j>-1; j--) { root--; if (i>=ranges[j]) break; } ierr = MPI_Bcast(&wscale,1,MPIU_SCALAR,root,comm);CHKERRQ(ierr); ierr = VecScale(j2a,wscale);CHKERRQ(ierr); ierr = VecNorm(j2a,NORM_INFINITY,&amax);CHKERRQ(ierr); amax *= 1.e-14; ierr = VecGetArray(j2a,&y);CHKERRQ(ierr); for (j=start; j<end; j++) { if (PetscAbsScalar(y[j-start]) > amax || j == i) { ierr = MatSetValues(B,1,&j,1,&i,y+j-start,INSERT_VALUES);CHKERRQ(ierr); } } ierr = VecRestoreArray(j2a,&y);CHKERRQ(ierr); } ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (B != J) { ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } PetscFunctionReturn(0); }
int main(int argc,char **argv) { PetscErrorCode ierr; PetscInt i,n,*ix,*iy,*tomap,start; Vec x,y; PetscMPIInt nproc,rank; IS isx,isy; const PetscInt *ranges; VecScatter vscat; PetscFunctionBegin; ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_size(PETSC_COMM_WORLD,&nproc);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); if (nproc != 2) SETERRQ(PETSC_COMM_SELF,1,"This test must run with exactly two MPI ranks\n"); /* ==================================================================== (1) test VecScatterRemap on a parallel to parallel (PtoP) vecscatter ==================================================================== */ n = 64; /* long enough to trigger memcpy optimizations both in local scatter and remote scatter */ /* create two MPI vectors x, y of length n=64, N=128 */ ierr = VecCreateMPI(PETSC_COMM_WORLD,n,PETSC_DECIDE,&x);CHKERRQ(ierr); ierr = VecDuplicate(x,&y);CHKERRQ(ierr); /* Initialize x as {0~127} */ ierr = VecGetOwnershipRanges(x,&ranges);CHKERRQ(ierr); for (i=ranges[rank]; i<ranges[rank+1]; i++) { ierr = VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(x);CHKERRQ(ierr); ierr = VecAssemblyEnd(x);CHKERRQ(ierr); /* create two general index sets isx = {0~127} and isy = {32~63,64~95,96~127,0~31}. isx is sequential, but we use it as general and let PETSc detect the pattern and optimize it. indices in isy are set to make the vecscatter have both local scatter and remote scatter (i.e., MPI communication) */ ierr = PetscMalloc2(n,&ix,n,&iy);CHKERRQ(ierr); start = ranges[rank]; for (i=ranges[rank]; i<ranges[rank+1]; i++) ix[i-start] = i; ierr = ISCreateGeneral(PETSC_COMM_WORLD,n,ix,PETSC_COPY_VALUES,&isx);CHKERRQ(ierr); if (!rank) { for (i=0; i<n; i++) iy[i] = i+32; } else for (i=0; i<n/2; i++) { iy[i] = i+96; iy[i+n/2] = i; } ierr = ISCreateGeneral(PETSC_COMM_WORLD,n,iy,PETSC_COPY_VALUES,&isy);CHKERRQ(ierr); /* create a vecscatter that shifts x to the tail by quater periodically and puts the results in y */ ierr = VecScatterCreateWithData(x,isx,y,isy,&vscat);CHKERRQ(ierr); ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* view y to check the result. y should be {Q3,Q0,Q1,Q2} of x, that is {96~127,0~31,32~63,64~95} */ ierr = PetscPrintf(PETSC_COMM_WORLD,"Before VecScatterRemap on PtoP, MPI vector y is:\n");CHKERRQ(ierr); ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. It changes where we read vector x entries to send out, but does not change the communication pattern (i.e., send/recv pairs and msg lengths). We create tomap as {32~63,0~31}. Originaly, we read from indices {0~64} of the local x to send out. The remap does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices {32~63,0~31} of the local x. isy is unchanged. So, we will shift x to {Q2,Q1,Q0,Q3}, that is {64~95,32~63,0~31,96~127} */ ierr = PetscMalloc1(n,&tomap);CHKERRQ(ierr); for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; ierr = VecScatterRemap(vscat,tomap,NULL);CHKERRQ(ierr); ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* view y to check the result. y should be {64~95,32~63,0~31,96~127} */ ierr = PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on PtoP, MPI vector y is:\n");CHKERRQ(ierr); ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* destroy everything before we recreate them in different types */ ierr = PetscFree2(ix,iy);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = ISDestroy(&isx);CHKERRQ(ierr); ierr = ISDestroy(&isy);CHKERRQ(ierr); ierr = PetscFree(tomap);CHKERRQ(ierr); ierr = VecScatterDestroy(&vscat);CHKERRQ(ierr); /* ========================================================================================== (2) test VecScatterRemap on a sequential general to sequential general (SGToSG) vecscatter ========================================================================================== */ n = 64; /* long enough to trigger memcpy optimizations in local scatter */ /* create two seq vectors x, y of length n */ ierr = VecCreateSeq(PETSC_COMM_SELF,n,&x);CHKERRQ(ierr); ierr = VecDuplicate(x,&y);CHKERRQ(ierr); /* Initialize x as {0~63} */ for (i=0; i<n; i++) { ierr = VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(x);CHKERRQ(ierr); ierr = VecAssemblyEnd(x);CHKERRQ(ierr); /* create two general index sets isx = isy = {0~63}, which are sequential, but we use them as general and let PETSc detect the pattern and optimize it */ ierr = PetscMalloc2(n,&ix,n,&iy);CHKERRQ(ierr); for (i=0; i<n; i++) ix[i] = i; ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ix,PETSC_COPY_VALUES,&isx);CHKERRQ(ierr); ierr = ISDuplicate(isx,&isy);CHKERRQ(ierr); /* create a vecscatter that just copies x to y */ ierr = VecScatterCreateWithData(x,isx,y,isy,&vscat);CHKERRQ(ierr); ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* view y to check the result. y should be {0~63} */ ierr = PetscPrintf(PETSC_COMM_WORLD,"\nBefore VecScatterRemap on SGToSG, SEQ vector y is:\n");CHKERRQ(ierr); ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. Create tomap as {32~63,0~31}. Originaly, we read from indices {0~64} of seq x to write to y. The remap does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices{32~63,0~31} of seq x. */ ierr = PetscMalloc1(n,&tomap);CHKERRQ(ierr); for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; ierr = VecScatterRemap(vscat,tomap,NULL);CHKERRQ(ierr); ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* view y to check the result. y should be {32~63,0~31} */ ierr = PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on SGToSG, SEQ vector y is:\n");CHKERRQ(ierr); ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* destroy everything before we recreate them in different types */ ierr = PetscFree2(ix,iy);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = ISDestroy(&isx);CHKERRQ(ierr); ierr = ISDestroy(&isy);CHKERRQ(ierr); ierr = PetscFree(tomap);CHKERRQ(ierr); ierr = VecScatterDestroy(&vscat);CHKERRQ(ierr); /* =================================================================================================== (3) test VecScatterRemap on a sequential general to sequential stride 1 (SGToSS_Stride1) vecscatter =================================================================================================== */ n = 64; /* long enough to trigger memcpy optimizations in local scatter */ /* create two seq vectors x of length n, and y of length n/2 */ ierr = VecCreateSeq(PETSC_COMM_SELF,n,&x);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,n/2,&y);CHKERRQ(ierr); /* Initialize x as {0~63} */ for (i=0; i<n; i++) { ierr = VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(x);CHKERRQ(ierr); ierr = VecAssemblyEnd(x);CHKERRQ(ierr); /* create a general index set isx = {0:63:2}, which actually is a stride IS with first=0, n=32, step=2, but we use it as general and let PETSc detect the pattern and optimize it. */ ierr = PetscMalloc2(n/2,&ix,n/2,&iy);CHKERRQ(ierr); for (i=0; i<n/2; i++) ix[i] = i*2; ierr = ISCreateGeneral(PETSC_COMM_SELF,n/2,ix,PETSC_COPY_VALUES,&isx);CHKERRQ(ierr); /* create a stride1 index set isy = {0~31}. We intentionally set the step to 1 to trigger optimizations */ ierr = ISCreateStride(PETSC_COMM_SELF,32,0,1,&isy);CHKERRQ(ierr); /* create a vecscatter that just copies even entries of x to y */ ierr = VecScatterCreateWithData(x,isx,y,isy,&vscat);CHKERRQ(ierr); ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* view y to check the result. y should be {0:63:2} */ ierr = PetscPrintf(PETSC_COMM_WORLD,"\nBefore VecScatterRemap on SGToSS_Stride1, SEQ vector y is:\n");CHKERRQ(ierr); ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. Create tomap as {32~63,0~31}. Originaly, we read from indices{0:63:2} of seq x to write to y. The remap does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices{32:63:2,0:31:2} of seq x. */ ierr = PetscMalloc1(n,&tomap);CHKERRQ(ierr); for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; ierr = VecScatterRemap(vscat,tomap,NULL);CHKERRQ(ierr); ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* view y to check the result. y should be {32:63:2,0:31:2} */ ierr = PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on SGToSS_Stride1, SEQ vector y is:\n");CHKERRQ(ierr); ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* destroy everything before PetscFinalize */ ierr = PetscFree2(ix,iy);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = ISDestroy(&isx);CHKERRQ(ierr); ierr = ISDestroy(&isy);CHKERRQ(ierr); ierr = PetscFree(tomap);CHKERRQ(ierr); ierr = VecScatterDestroy(&vscat);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
/* DMPatchZoom - Create a version of the coarse patch (identified by rank) with halo on communicator commz Collective on DM Input Parameters: + dm - the DM . rank - the rank which holds the given patch - commz - the new communicator for the patch Output Parameters: + dmz - the patch DM . sfz - the PetscSF mapping the patch+halo to the zoomed version . sfzr - the PetscSF mapping the patch to the restricted zoomed version Level: intermediate Note: All processes in commz should have the same rank (could autosplit comm) .seealso: DMPatchSolve() */ PetscErrorCode DMPatchZoom(DM dm, Vec X, MatStencil lower, MatStencil upper, MPI_Comm commz, DM *dmz, PetscSF *sfz, PetscSF *sfzr) { DMDAStencilType st; MatStencil blower, bupper, loclower, locupper; IS is; const PetscInt *ranges, *indices; PetscInt *localPoints = NULL; PetscSFNode *remotePoints = NULL; PetscInt dim, dof; PetscInt M, N, P, rM, rN, rP, halo = 1, sxb, syb, szb, sxr, syr, szr, exr, eyr, ezr, mxb, myb, mzb, i, j, k, q; PetscMPIInt size; PetscErrorCode ierr; PetscFunctionBegin; ierr = MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size);CHKERRQ(ierr); /* Create patch DM */ ierr = DMDAGetInfo(dm, &dim, &M, &N, &P, 0,0,0, &dof, 0,0,0,0, &st);CHKERRQ(ierr); /* Get piece for rank r, expanded by halo */ bupper.i = PetscMin(M, upper.i + halo); blower.i = PetscMax(lower.i - halo, 0); bupper.j = PetscMin(N, upper.j + halo); blower.j = PetscMax(lower.j - halo, 0); bupper.k = PetscMin(P, upper.k + halo); blower.k = PetscMax(lower.k - halo, 0); rM = bupper.i - blower.i; rN = bupper.j - blower.j; rP = bupper.k - blower.k; if (commz != MPI_COMM_NULL) { ierr = DMDACreate(commz, dmz);CHKERRQ(ierr); ierr = DMSetDimension(*dmz, dim);CHKERRQ(ierr); ierr = DMDASetSizes(*dmz, rM, rN, rP);CHKERRQ(ierr); ierr = DMDASetNumProcs(*dmz, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE);CHKERRQ(ierr); ierr = DMDASetBoundaryType(*dmz, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE);CHKERRQ(ierr); ierr = DMDASetDof(*dmz, dof);CHKERRQ(ierr); ierr = DMDASetStencilType(*dmz, st);CHKERRQ(ierr); ierr = DMDASetStencilWidth(*dmz, 0);CHKERRQ(ierr); ierr = DMDASetOwnershipRanges(*dmz, NULL, NULL, NULL);CHKERRQ(ierr); ierr = DMSetFromOptions(*dmz);CHKERRQ(ierr); ierr = DMSetUp(*dmz);CHKERRQ(ierr); ierr = DMDAGetCorners(*dmz, &sxb, &syb, &szb, &mxb, &myb, &mzb);CHKERRQ(ierr); sxr = PetscMax(sxb, lower.i - blower.i); syr = PetscMax(syb, lower.j - blower.j); szr = PetscMax(szb, lower.k - blower.k); exr = PetscMin(sxb+mxb, upper.i - blower.i); eyr = PetscMin(syb+myb, upper.j - blower.j); ezr = PetscMin(szb+mzb, upper.k - blower.k); ierr = PetscMalloc2(rM*rN*rP,&localPoints,rM*rN*rP,&remotePoints);CHKERRQ(ierr); } else { sxr = syr = szr = exr = eyr = ezr = sxb = syb = szb = mxb = myb = mzb = 0; } /* Create SF for restricted map */ ierr = VecGetOwnershipRanges(X,&ranges);CHKERRQ(ierr); loclower.i = blower.i + sxr; locupper.i = blower.i + exr; loclower.j = blower.j + syr; locupper.j = blower.j + eyr; loclower.k = blower.k + szr; locupper.k = blower.k + ezr; ierr = DMDACreatePatchIS(dm, &loclower, &locupper, &is);CHKERRQ(ierr); ierr = ISGetIndices(is, &indices);CHKERRQ(ierr); q = 0; for (k = szb; k < szb+mzb; ++k) { if ((k < szr) || (k >= ezr)) continue; for (j = syb; j < syb+myb; ++j) { if ((j < syr) || (j >= eyr)) continue; for (i = sxb; i < sxb+mxb; ++i) { const PetscInt lp = ((k-szb)*rN + (j-syb))*rM + i-sxb; PetscInt r; if ((i < sxr) || (i >= exr)) continue; localPoints[q] = lp; ierr = PetscFindInt(indices[q], size+1, ranges, &r);CHKERRQ(ierr); remotePoints[q].rank = r < 0 ? -(r+1) - 1 : r; remotePoints[q].index = indices[q] - ranges[remotePoints[q].rank]; ++q; } } } ierr = ISRestoreIndices(is, &indices);CHKERRQ(ierr); ierr = ISDestroy(&is);CHKERRQ(ierr); ierr = PetscSFCreate(PetscObjectComm((PetscObject)dm), sfzr);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) *sfzr, "Restricted Map");CHKERRQ(ierr); ierr = PetscSFSetGraph(*sfzr, M*N*P, q, localPoints, PETSC_COPY_VALUES, remotePoints, PETSC_COPY_VALUES);CHKERRQ(ierr); /* Create SF for buffered map */ loclower.i = blower.i + sxb; locupper.i = blower.i + sxb+mxb; loclower.j = blower.j + syb; locupper.j = blower.j + syb+myb; loclower.k = blower.k + szb; locupper.k = blower.k + szb+mzb; ierr = DMDACreatePatchIS(dm, &loclower, &locupper, &is);CHKERRQ(ierr); ierr = ISGetIndices(is, &indices);CHKERRQ(ierr); q = 0; for (k = szb; k < szb+mzb; ++k) { for (j = syb; j < syb+myb; ++j) { for (i = sxb; i < sxb+mxb; ++i, ++q) { PetscInt r; localPoints[q] = q; ierr = PetscFindInt(indices[q], size+1, ranges, &r);CHKERRQ(ierr); remotePoints[q].rank = r < 0 ? -(r+1) - 1 : r; remotePoints[q].index = indices[q] - ranges[remotePoints[q].rank]; } } } ierr = ISRestoreIndices(is, &indices);CHKERRQ(ierr); ierr = ISDestroy(&is);CHKERRQ(ierr); ierr = PetscSFCreate(PetscObjectComm((PetscObject)dm), sfz);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) *sfz, "Buffered Map");CHKERRQ(ierr); ierr = PetscSFSetGraph(*sfz, M*N*P, q, localPoints, PETSC_COPY_VALUES, remotePoints, PETSC_COPY_VALUES);CHKERRQ(ierr); ierr = PetscFree2(localPoints, remotePoints);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) { Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); PetscErrorCode ierr; PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; IS from,to; Vec gvec; PetscMPIInt rank =sbaij->rank,lsize,size=sbaij->size; PetscInt *owners=sbaij->rangebs,*ec_owner,k; const PetscInt *sowners; PetscScalar *ptr; PetscFunctionBegin; ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr); /* For the first stab we make an array as long as the number of columns */ /* mark those columns that are in sbaij->B */ ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr); for (i=0; i<mbs; i++) { for (j=0; j<B->ilen[i]; j++) { if (!indices[aj[B->i[i] + j]]) ec++; indices[aj[B->i[i] + j]] = 1; } } /* form arrays of columns we need */ ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr); ec = 0; for (j=0; j<size; j++) { for (i=owners[j]; i<owners[j+1]; i++) { if (indices[i]) { garray[ec] = i; ec_owner[ec] = j; ec++; } } } /* make indices now point into garray */ for (i=0; i<ec; i++) indices[garray[i]] = i; /* compact out the extra columns in B */ for (i=0; i<mbs; i++) { for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; } B->nbs = ec; sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs; ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr); ierr = PetscFree(indices);CHKERRQ(ierr); /* create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); /* create two temporary index sets for building scatter-gather */ ierr = PetscMalloc1(2*ec,&stmp);CHKERRQ(ierr); ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); for (i=0; i<ec; i++) stmp[i] = i; ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); /* generate the scatter context -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); ierr = VecScatterCreateWithData(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); ierr = VecDestroy(&gvec);CHKERRQ(ierr); sbaij->garray = garray; ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ lsize = (mbs + ec)*bs; ierr = VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); ierr = VecGetOwnershipRanges(sbaij->slvec0,&sowners);CHKERRQ(ierr); /* x index in the IS sfrom */ for (i=0; i<ec; i++) { j = ec_owner[i]; sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); } /* b index in the IS sfrom */ k = sowners[rank]/bs + mbs; for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); /* x index in the IS sto */ k = sowners[rank]/bs + mbs; for (i=0; i<ec; i++) stmp[i] = (k + i); /* b index in the IS sto */ for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec]; ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); ierr = VecScatterCreateWithData(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); ierr = PetscFree(stmp);CHKERRQ(ierr); ierr = MPI_Barrier(PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr); PetscFunctionReturn(0); }