/* FormFunctionFortran - Evaluates nonlinear function, F(x) in Fortran. */ int FormFunctionFortran(SNES snes,Vec X,Vec F,void *ptr) { AppCtx *user = (AppCtx*)ptr; int ierr; PetscScalar *f; const PetscScalar *x; /* Process 0 has to wait for all other processes to get here before proceeding to write in the shared vector */ ierr = PetscBarrier((PetscObject)snes);CHKERRQ(ierr); if (!user->rank) { ierr = VecGetArrayRead(X,&x);CHKERRQ(ierr); ierr = VecGetArray(F,&f);CHKERRQ(ierr); applicationfunctionfortran_(&user->param,&user->mx,&user->my,x,f,&ierr); ierr = VecRestoreArrayRead(X,&x);CHKERRQ(ierr); ierr = VecRestoreArray(F,&f);CHKERRQ(ierr); ierr = PetscLogFlops(11.0*(user->mx-2)*(user->my-2))CHKERRQ(ierr); } /* All the non-busy processors have to wait here for process 0 to finish evaluating the function; otherwise they will start using the vector values before they have been computed */ ierr = PetscBarrier((PetscObject)snes);CHKERRQ(ierr); return 0; }
PetscErrorCode TSAdjointEventHandler(TS ts) { PetscErrorCode ierr; TSEvent event; PetscReal t; Vec U; PetscInt ctr; PetscBool forwardsolve=PETSC_FALSE; /* Flag indicating that TS is doing an adjoint solve */ PetscFunctionBegin; PetscValidHeaderSpecific(ts,TS_CLASSID,1); if (!ts->event) PetscFunctionReturn(0); event = ts->event; ierr = TSGetTime(ts,&t);CHKERRQ(ierr); ierr = TSGetSolution(ts,&U);CHKERRQ(ierr); ctr = event->recorder.ctr-1; if (ctr >= 0 && PetscAbsReal(t - event->recorder.time[ctr]) < PETSC_SMALL) { /* Call the user postevent function */ if (event->postevent) { ierr = (*event->postevent)(ts,event->recorder.nevents[ctr],event->recorder.eventidx[ctr],t,U,forwardsolve,event->ctx);CHKERRQ(ierr); event->recorder.ctr--; } } PetscBarrier((PetscObject)ts); PetscFunctionReturn(0); }
PetscErrorCode testCreate2D() { int ga; DA da; DALocalInfo info; Vec vec; PetscErrorCode ierr; PetscFunctionBegin; int d1 = 1453, d2 = 1451; ierr = DACreate2d(PETSC_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_STAR, d1,d2,PETSC_DECIDE,PETSC_DECIDE,1,1,0,0, &da); CHKERRQ(ierr); ierr = DAGetLocalInfo(da,&info); CHKERRQ(ierr); ierr = DACreateGlobalArray( da, &ga, &vec); CHKERRQ(ierr); PetscReal **v; ierr = DAVecGetArray(da,vec,&v); CHKERRQ(ierr); int xe = info.xs+info.xm, ye = info.ys+info.ym; for (int j = info.ys; j < ye; ++j) { for (int i = info.xs; i < xe; ++i) { v[j][i] = 1.*i + d1 * j; } } ierr = DAVecRestoreArray(da,vec,&v); CHKERRQ(ierr); PetscPrintf(PETSC_COMM_WORLD,"Updated local portion with DAVec\n"); PetscBarrier(0); { double *da_ptr; VecGetArray(vec, &da_ptr); double *ptr; int low[2],hi[2],ld; NGA_Distribution(ga,GA_Nodeid(),low,hi); NGA_Access(ga,low,hi,&ptr,&ld); printf("[%d] ga:%p\tda:%p\tdiff:%p\n", GA_Nodeid(), ptr, da_ptr, (ptr-da_ptr) ); NGA_Release_update(ga,low,hi); } int lo[2],ld; double val; for (int j = 0; j < d2; ++j) { for (int i = 0; i < d1; ++i) { lo[0] = j; lo[1] = i; NGA_Get(ga,lo,lo,&val,&ld); if( PetscAbs( i + d1*j - val) > .1 ) printf("."); // printf("[%d] (%3.0f,%3.0f)\n", GA_Nodeid(), 1.*i + d1*j, val); } } GA_Print_stats(); ierr = VecDestroy(vec); CHKERRQ(ierr); GA_Destroy(ga); PetscFunctionReturn(0); }
static PetscErrorCode PCSetUp_TFS(PC pc) { PC_TFS *tfs = (PC_TFS*)pc->data; Mat A = pc->pmat; Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; PetscErrorCode ierr; PetscInt *localtoglobal,ncol,i; PetscBool ismpiaij; /* PetscBool issymmetric; Petsc Real tol = 0.0; */ PetscFunctionBegin; if (A->cmap->N != A->rmap->N) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_SIZ,"matrix must be square"); ierr = PetscObjectTypeCompare((PetscObject)pc->pmat,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr); if (!ismpiaij) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Currently only supports MPIAIJ matrices"); /* generate the local to global mapping */ ncol = a->A->cmap->n + a->B->cmap->n; ierr = PetscMalloc((ncol)*sizeof(PetscInt),&localtoglobal);CHKERRQ(ierr); for (i=0; i<a->A->cmap->n; i++) { localtoglobal[i] = A->cmap->rstart + i + 1; } for (i=0; i<a->B->cmap->n; i++) { localtoglobal[i+a->A->cmap->n] = a->garray[i] + 1; } /* generate the vectors needed for the local solves */ ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,a->A->rmap->n,PETSC_NULL,&tfs->b);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,a->A->cmap->n,PETSC_NULL,&tfs->xd);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,a->B->cmap->n,PETSC_NULL,&tfs->xo);CHKERRQ(ierr); tfs->nd = a->A->cmap->n; /* ierr = MatIsSymmetric(A,tol,&issymmetric); */ /* if (issymmetric) { */ ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); if (A->symmetric) { tfs->xxt = XXT_new(); ierr = XXT_factor(tfs->xxt,localtoglobal,A->rmap->n,ncol,(void*)PCTFSLocalMult_TFS,pc);CHKERRQ(ierr); pc->ops->apply = PCApply_TFS_XXT; } else { tfs->xyt = XYT_new(); ierr = XYT_factor(tfs->xyt,localtoglobal,A->rmap->n,ncol,(void*)PCTFSLocalMult_TFS,pc);CHKERRQ(ierr); pc->ops->apply = PCApply_TFS_XYT; } ierr = PetscFree(localtoglobal);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc, char **args) { PetscErrorCode ierr; ierr = PetscInitialize(&argc, &args, (char *) 0, ""); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Start %s\n", __FILE__); CHKERRQ(ierr); MPI_Comm comm = PETSC_COMM_WORLD; int rank; MPI_Comm_rank(comm,&rank); char hostname[256]; gethostname(hostname,255); printf("%d: %s\n",rank,hostname); PetscBarrier(0); ierr = PetscPrintf(PETSC_COMM_WORLD, "End %s\n", __FILE__); CHKERRQ(ierr); ierr = PetscFinalize(); CHKERRQ(ierr); }
PetscErrorCode DMVecViewLocal(DM dm, Vec v, PetscViewer viewer) { Vec lv; PetscInt p; PetscMPIInt rank, numProcs; PetscErrorCode ierr; PetscFunctionBeginUser; ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PetscObjectComm((PetscObject)dm), &numProcs);CHKERRQ(ierr); ierr = DMGetLocalVector(dm, &lv);CHKERRQ(ierr); ierr = DMGlobalToLocalBegin(dm, v, INSERT_VALUES, lv);CHKERRQ(ierr); ierr = DMGlobalToLocalEnd(dm, v, INSERT_VALUES, lv);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Local function\n");CHKERRQ(ierr); for (p = 0; p < numProcs; ++p) { if (p == rank) {ierr = VecView(lv, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);} ierr = PetscBarrier((PetscObject) dm);CHKERRQ(ierr); } ierr = DMRestoreLocalVector(dm, &lv);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* FormFunction - Evaluates nonlinear function, F(x). Input Parameters: . snes - the SNES context . X - input vector . ptr - optional user-defined context, as set by SNESSetFunction() Output Parameter: . F - function vector */ int FormFunction(SNES snes,Vec X,Vec F,void *ptr) { AppCtx *user = (AppCtx*)ptr; int ierr,i,j,row,mx,my; PetscReal two = 2.0,one = 1.0,lambda,hx,hy,hxdhy,hydhx,sc; PetscScalar u,uxx,uyy,*f; const PetscScalar *x; /* Process 0 has to wait for all other processes to get here before proceeding to write in the shared vector */ ierr = PetscBarrier((PetscObject)X);CHKERRQ(ierr); if (user->rank) { /* All the non-busy processors have to wait here for process 0 to finish evaluating the function; otherwise they will start using the vector values before they have been computed */ ierr = PetscBarrier((PetscObject)X);CHKERRQ(ierr); return 0; } mx = user->mx; my = user->my; lambda = user->param; hx = one/(PetscReal)(mx-1); hy = one/(PetscReal)(my-1); sc = hx*hy*lambda; hxdhy = hx/hy; hydhx = hy/hx; /* Get pointers to vector data */ ierr = VecGetArrayRead(X,&x);CHKERRQ(ierr); ierr = VecGetArray(F,&f);CHKERRQ(ierr); /* The next line tells the SGI compiler that x and f contain no overlapping regions and thus it can use addition optimizations. */ #pragma arl(4) #pragma distinct (*x,*f) #pragma no side effects (exp) /* Compute function over the entire grid */ for (j=0; j<my; j++) { for (i=0; i<mx; i++) { row = i + j*mx; if (i == 0 || j == 0 || i == mx-1 || j == my-1) { f[row] = x[row]; continue; } u = x[row]; uxx = (two*u - x[row-1] - x[row+1])*hydhx; uyy = (two*u - x[row-mx] - x[row+mx])*hxdhy; f[row] = uxx + uyy - sc*PetscExpScalar(u); } } /* Restore vectors */ ierr = VecRestoreArrayRead(X,&x);CHKERRQ(ierr); ierr = VecRestoreArray(F,&f);CHKERRQ(ierr); ierr = PetscLogFlops(11.0*(mx-2)*(my-2))CHKERRQ(ierr); ierr = PetscBarrier((PetscObject)X);CHKERRQ(ierr); return 0; }
/* FormInitialGuess - Forms initial approximation. Input Parameters: user - user-defined application context X - vector Output Parameter: X - vector */ int FormInitialGuess(AppCtx *user,Vec X) { int i,j,row,mx,my,ierr; PetscReal one = 1.0,lambda,temp1,temp,hx,hy,hxdhy,hydhx,sc; PetscScalar *x; /* Process 0 has to wait for all other processes to get here before proceeding to write in the shared vector */ ierr = PetscBarrier((PetscObject)X);CHKERRQ(ierr); if (user->rank) { /* All the non-busy processors have to wait here for process 0 to finish evaluating the function; otherwise they will start using the vector values before they have been computed */ ierr = PetscBarrier((PetscObject)X);CHKERRQ(ierr); return 0; } mx = user->mx; my = user->my; lambda = user->param; hx = one/(PetscReal)(mx-1); hy = one/(PetscReal)(my-1); sc = hx*hy*lambda; hxdhy = hx/hy; hydhx = hy/hx; temp1 = lambda/(lambda + one); /* Get a pointer to vector data. - For default PETSc vectors, VecGetArray() returns a pointer to the data array. Otherwise, the routine is implementation dependent. - You MUST call VecRestoreArray() when you no longer need access to the array. */ ierr = VecGetArray(X,&x);CHKERRQ(ierr); /* Compute initial guess over the locally owned part of the grid */ #pragma arl(4) #pragma distinct (*x,*f) #pragma no side effects (sqrt) for (j=0; j<my; j++) { temp = (PetscReal)(PetscMin(j,my-j-1))*hy; for (i=0; i<mx; i++) { row = i + j*mx; if (i == 0 || j == 0 || i == mx-1 || j == my-1) { x[row] = 0.0; continue; } x[row] = temp1*PetscSqrtReal(PetscMin((PetscReal)(PetscMin(i,mx-i-1))*hx,temp)); } } /* Restore vector */ ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); ierr = PetscBarrier((PetscObject)X);CHKERRQ(ierr); return 0; }
/* FormFunctionLocal - Form the local residual F from the local input X Input Parameters: + dm - The mesh . X - Local input vector - user - The user context Output Parameter: . F - Local output vector Note: We form the residual one batch of elements at a time. This allows us to offload work onto an accelerator, like a GPU, or vectorize on a multicore machine. .seealso: FormJacobianLocal() */ PetscErrorCode FormFunctionLocal(DM dm, Vec X, Vec F, AppCtx *user) { const PetscInt debug = user->debug; const PetscInt dim = user->dim; PetscReal *coords, *v0, *J, *invJ, *detJ; PetscScalar *elemVec, *u; const PetscInt numCells = cEnd - cStart; PetscInt cellDof = 0; PetscInt maxQuad = 0; PetscInt jacSize = 1; PetscInt cStart, cEnd, c, field; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscLogEventBegin(user->residualEvent,0,0,0,0);CHKERRQ(ierr); ierr = VecSet(F, 0.0);CHKERRQ(ierr); ierr = DMDAGetHeightStratum(dm, 0, &cStart, &cEnd);CHKERRQ(ierr); for(field = 0; field < numFields; ++field) { PetscInt dof = 1; for(d = 0; d < dim; ++d) {dof *= user->q[field].numBasisFuncs*user->q[field].numComponents;} cellDof += dof; maxQuad = PetscMax(maxQuad, user->q[field].numQuadPoints); } for(d = 0; d < dim; ++d) {jacSize *= maxQuad;} ierr = PetscMalloc3(dim,PetscReal,&coords,dim,PetscReal,&v0,jacSize,PetscReal,&J);CHKERRQ(ierr); ierr = PetscMalloc4(numCells*cellDof,PetscScalar,&u,numCells*jacSize,PetscReal,&invJ,numCells,PetscReal,&detJ,numCells*cellDof,PetscScalar,&elemVec);CHKERRQ(ierr); for(c = cStart; c < cEnd; ++c) { const PetscScalar *x; PetscInt i; ierr = DMDAComputeCellGeometry(dm, c, v0, J, &invJ[c*jacSize], &detJ[c]);CHKERRQ(ierr); if (detJ[c] <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ[c], c); ierr = DMDAVecGetClosure(dm, PETSC_NULL, X, c, &x);CHKERRQ(ierr); for(i = 0; i < cellDof; ++i) { u[c*cellDof+i] = x[i]; } } for(field = 0; field < numFields; ++field) { const PetscInt numQuadPoints = user->q[field].numQuadPoints; const PetscInt numBasisFuncs = user->q[field].numBasisFuncs; void (*f0)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]) = user->f0Funcs[field]; void (*f1)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]) = user->f1Funcs[field]; /* Conforming batches */ PetscInt blockSize = numBasisFuncs*numQuadPoints; PetscInt numBlocks = 1; PetscInt batchSize = numBlocks * blockSize; PetscInt numBatches = user->numBatches; PetscInt numChunks = numCells / (numBatches*batchSize); ierr = IntegrateResidualBatchCPU(numChunks*numBatches*batchSize, numFields, field, u, invJ, detJ, user->q, f0, f1, elemVec, user);CHKERRQ(ierr); /* Remainder */ PetscInt numRemainder = numCells % (numBatches * batchSize); PetscInt offset = numCells - numRemainder; ierr = IntegrateResidualBatchCPU(numRemainder, numFields, field, &u[offset*cellDof], &invJ[offset*dim*dim], &detJ[offset], user->q, f0, f1, &elemVec[offset*cellDof], user);CHKERRQ(ierr); } for(c = cStart; c < cEnd; ++c) { if (debug) {ierr = DMPrintCellVector(c, "Residual", cellDof, &elemVec[c*cellDof]);CHKERRQ(ierr);} ierr = DMComplexVecSetClosure(dm, PETSC_NULL, F, c, &elemVec[c*cellDof], ADD_VALUES);CHKERRQ(ierr); } ierr = PetscFree4(u,invJ,detJ,elemVec);CHKERRQ(ierr); ierr = PetscFree3(coords,v0,J);CHKERRQ(ierr); if (user->showResidual) { PetscInt p; ierr = PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");CHKERRQ(ierr); for(p = 0; p < user->numProcs; ++p) { if (p == user->rank) { Vec f; ierr = VecDuplicate(F, &f);CHKERRQ(ierr); ierr = VecCopy(F, f);CHKERRQ(ierr); ierr = VecChop(f, 1.0e-10);CHKERRQ(ierr); ierr = VecView(f, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); ierr = VecDestroy(&f);CHKERRQ(ierr); } ierr = PetscBarrier((PetscObject) dm);CHKERRQ(ierr); } } ierr = PetscLogEventEnd(user->residualEvent,0,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode testCreate3D( ) { int ga; DA da; DALocalInfo info; Vec vec; PetscErrorCode ierr; PetscFunctionBegin; int d1 = 229, d2 = 229, d3 = 229; int rank; MPI_Comm_rank(PETSC_COMM_WORLD,&rank); ierr = DACreate3d(PETSC_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_STAR, d1,d2,d3, PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE, 1,1, 0,0,0, &da); CHKERRQ(ierr); ierr = DAGetLocalInfo(da,&info); CHKERRQ(ierr); ierr = DACreateGlobalArray( da, &ga, &vec); CHKERRQ(ierr); PetscReal ***v; ierr = DAVecGetArray(da,vec,&v); CHKERRQ(ierr); int xe = info.xs+info.xm, ye = info.ys+info.ym, ze = info.zs+info.zm; for (int k = info.zs; k < ze; ++k) { for (int j = info.ys; j < ye; ++j) { for (int i = info.xs; i < xe; ++i) { v[k][j][i] = 1.*i + d1*j + d1*d2*k; } } } ierr = DAVecRestoreArray(da,vec,&v); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Sequential values filled in petsc vec.\n"); CHKERRQ(ierr); ierr = PetscBarrier(0); CHKERRQ(ierr); int lo[3],ld, p = 10; int patch[10][10][10]; double val; for (int k = 0; k < d3; k+=p) { for (int j = 0; j < d2; j+=p) { for (int i = 0; i < d1; i+=p) { lo[0] = k; lo[1] = j; lo[2] = i; NGA_Get(ga,lo,lo,&val,&ld); if( PetscAbs( i + d1*j + d1*d2*k - val) > .1 ) // printf("."); printf("(%3.0f,%3.0f) ", 1.*i + d1*j + d1*d2*k, val); } } } ierr = PetscPrintf(PETSC_COMM_WORLD, "Ended NGA_Get() test.\n"); CHKERRQ(ierr); ierr = PetscBarrier(0); CHKERRQ(ierr); if( rank == 0 ) { for (int k = 0; k < d3; ++k) { printf(">%d\n",k); for (int j = 0; j < d2; ++j) { for (int i = 0; i < d1; ++i) { lo[0] = k; lo[1] = j; lo[2] = i; val = 1.*i + d1*j + d1*d2*k; val *= -1; NGA_Put(ga,lo,lo,&val,&ld); } } } } ierr = PetscPrintf(PETSC_COMM_WORLD, "Ended NGA_Put() negative seq values.\n"); CHKERRQ(ierr); ierr = PetscBarrier(0); CHKERRQ(ierr); ierr = DAVecGetArray(da,vec,&v); CHKERRQ(ierr); for (int k = info.zs; k < ze; ++k) { for (int j = info.ys; j < ye; ++j) { for (int i = info.xs; i < xe; ++i) { val = -1 * (1.*i + d1*j + d1*d2*k); if( PetscAbs( val - v[k][j][i] ) > .1 ) printf("."); } } } ierr = DAVecRestoreArray(da,vec,&v); CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Ended petsc vec update test.\n"); CHKERRQ(ierr); if( rank == 0 ) GA_Print_stats(); ierr = VecDestroy(vec); CHKERRQ(ierr); GA_Destroy(ga); PetscFunctionReturn(0); }
PetscErrorCode vizGA2DA() { PetscErrorCode ierr; int rank; MPI_Comm_rank(PETSC_COMM_WORLD,&rank); int d1 = 40, d2 = 50; DA da; Vec vec; const PetscInt *lx, *ly, *lz; PetscInt m,n,p; DALocalInfo info; ierr = DACreate2d(PETSC_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_STAR, d1,d2,PETSC_DECIDE,PETSC_DECIDE,1,1,0,0, &da); CHKERRQ(ierr); ierr = DACreateGlobalVector(da, &vec); CHKERRQ(ierr); ierr = DAGetOwnershipRanges(da, &lx, &ly, &lz); CHKERRQ(ierr); ierr = DAGetLocalInfo(da,&info); CHKERRQ(ierr); ierr = DAGetInfo(da,0,0,0,0,&m,&n,&p,0,0,0,0); CHKERRQ(ierr); /**/ ierr = DAView(da, PETSC_VIEWER_STDOUT_WORLD); CHKERRQ(ierr); for (int i = 0; i < m; ++i) { PetscPrintf(PETSC_COMM_WORLD,"%d\tlx: %d\n",i,lx[i]); } for (int i = 0; i < n; ++i) { PetscPrintf(PETSC_COMM_WORLD,"%d\tly: %d\n",i,ly[i]); } /**/ int ga = GA_Create_handle(); int ndim = 2; int dims[2] = {d2,d1}; GA_Set_data(ga,2,dims,MT_DBL); int *map; PetscMalloc( sizeof(int)*(m+n), &map); map[0] = 0; for( int i = 1; i < n; i++ ) { map[i] = ly[i-1] + map[i-1]; } map[n] = 0; for( int i = n+1; i < m+n; i++ ) { map[i] = lx[i-n-1] + map[i-1]; } /* correct ordering, but nodeid's dont line up with mpi rank for petsc's da * DA: +---+---+ GA: +---+---+ * +-2-+-3-+ +-1-+-3-+ * +---+---+ +---+---+ * +-0-+-1-+ +-0-+-2-+ * +---+---+ +---+---+ int *map; PetscMalloc( sizeof(int)*(m+n), &map); map[0] = 0; for( int i = 1; i < m; i++ ) { map[i] = lx[i] + map[i-1]; } map[m] = 0; for( int i = m+1; i < m+n; i++ ) { map[i] = ly[i-m] + map[i-1]; } */ int block[2] = {n,m}; GA_Set_irreg_distr(ga,map,block); ierr = GA_Allocate( ga ); if( !ierr ) GA_Error("\n\n\nga allocaltion failed\n\n",ierr); if( !ga ) GA_Error("\n\n\n ga null \n\n",ierr); if( rank != GA_Nodeid() ) GA_Error("MPI rank does not match GA_Nodeid()",1); GA_Print_distribution(ga); int lo[2], hi[2]; NGA_Distribution(ga,rank,lo,hi); if( lo[1] != info.xs || hi[1] != info.xs+info.xm-1 || lo[0] != info.ys || hi[0] != info.ys+info.ym-1 ) { PetscSynchronizedPrintf(PETSC_COMM_SELF,"[%d] lo:(%2d,%2d) hi:(%2d,%2d) \t DA: (%2d,%2d), (%2d, %2d)\n", rank, lo[1], lo[0], hi[1], hi[0], info.xs, info.ys, info.xs+info.xm-1, info.ys+info.ym-1); } PetscBarrier(0); PetscSynchronizedFlush(PETSC_COMM_WORLD); AO ao; DAGetAO(da,&ao); if( rank == 0 ) { int *idx, len = d1*d2; PetscReal *val; PetscMalloc(sizeof(PetscReal)*len, &val); PetscMalloc(sizeof(int)*len, &idx); for (int j = 0; j < d2; ++j) { for (int i = 0; i < d1; ++i) { idx[i + d1*j] = i + d1*j; val[i + d1*j] = i + d1*j; } } AOApplicationToPetsc(ao,len,idx); VecSetValues(vec,len,idx,val,INSERT_VALUES); int a[2], b[2],ld[1]={0}; double c = 0; for (int j = 0; j < d2; ++j) { for (int i = 0; i < d1; ++i) { a[0] = j; a[1] = i; // printf("%5.0f ",c); NGA_Put(ga,a,a,&c,ld); c++; } } } // GA_Print(ga); VecAssemblyBegin(vec); VecAssemblyEnd(vec); int ld; double *ptr; NGA_Access(ga,lo,hi,&ptr,&ld); PetscReal **d; int c=0; ierr = DAVecGetArray(da,vec,&d); CHKERRQ(ierr); for (int j = info.ys; j < info.ys+info.ym; ++j) { for (int i = info.xs; i < info.xs+info.xm; ++i) { if( d[j][i] != ptr[(i-info.xs)+ld*(j-info.ys)] ) GA_Error("DA array is not equal to GA array",1); // printf("%d (%d,%d):\t%3.0f\t%3.0f\n", c, i, j, d[j][i], ptr[(i-info.xs)+ld*(j-info.ys)]); c++; } } ierr = DAVecRestoreArray(da,vec,&d); CHKERRQ(ierr); c=0; PetscReal *v; int start, end; VecGetOwnershipRange(vec, &start, &end); VecGetArray( vec, &v ); for( int i = start; i < end; i++) { // printf("%d:\t%3.0f\t%3.0f\t%s\n", start, v[i-start], ptr[i-start], (v[i-start]-ptr[i-start]==0?"":"NO") ); } VecRestoreArray( vec, &v ); NGA_Release_update(ga,lo,hi); Vec gada; VecCreateMPIWithArray(((PetscObject)da)->comm,da->Nlocal,PETSC_DETERMINE,ptr,&gada); VecView(gada,PETSC_VIEWER_STDOUT_SELF); GA_Destroy(ga); ierr = VecDestroy(vec); CHKERRQ(ierr); ierr = DADestroy(da); CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode Metos3DTimeStepPhi(Metos3D *metos3d, Vec *yin, Vec *yout, PetscInt nparam, PetscReal *u0) { // bgc PetscInt npref = metos3d->fileFormatPrefixCount; PetscInt ntracer = metos3d->tracerCount; // time step PetscScalar t0 = metos3d->timeStepStart; PetscScalar dt = metos3d->timeStep; PetscInt nstep = metos3d->timeStepCount; // work vars PetscScalar tj; PetscInt itracer, istep; Vec *ywork; PetscFunctionBegin; // wait for all processors PetscBarrier(PETSC_NULL); // start log event PetscLogEventBegin(metos3d->eventTimeStepPhi, 0, 0, 0, 0); // prepare work vector VecDuplicateVecs(*yin, ntracer, &ywork); // initial point in time, project time to [0,1[ tj = fmod(t0, 1.0); // init bgc, yout not set (yet) Metos3DBGCStepInit(metos3d, tj, dt, yin, yout, nparam, u0); // time step loop for (istep = 0; istep < nstep; istep++) { // point in time tj = fmod(t0 + istep*dt, 1.0); // work vars char filePrefixFormat[PETSC_MAX_PATH_LEN]; char filePrefix [PETSC_MAX_PATH_LEN]; // file prefix if (npref > 1) { if ((metos3d->spinupStep + 1)%metos3d->moduloStep[0] == 0) { PetscInt modstep = metos3d->moduloStepCount; if (modstep > 0) { PetscInt imodstep = metos3d->moduloStep[1]; if ((istep+1)%imodstep == 0) { sprintf(filePrefixFormat, "%s%s", metos3d->filePrefix, metos3d->fileFormatPrefix[1]); sprintf(filePrefix, filePrefixFormat, istep); // output Metos3DBGCOutputPrefix(metos3d, filePrefix, ntracer, yin); } } else { sprintf(filePrefixFormat, "%s%s", metos3d->filePrefix, metos3d->fileFormatPrefix[1]); sprintf(filePrefix, filePrefixFormat, istep); // output Metos3DBGCOutputPrefix(metos3d, filePrefix, ntracer, yin); } } } // yout = Phi(yi) // yin = yout Metos3DTimeStepPhiStep(metos3d, tj, dt, istep, yin, yout, ywork, nparam, u0); for(itracer = 0; itracer < ntracer; itracer++) VecCopy(yout[itracer], yin[itracer]); } // final bgc, yout not set (yet) Metos3DBGCStepFinal(metos3d, tj, dt, yin, yout, nparam, u0); // free work vector VecDestroyVecs(ntracer, &ywork); // wait for all processors PetscBarrier(PETSC_NULL); // stop log event PetscLogEventEnd(metos3d->eventTimeStepPhi, 0, 0, 0, 0); // debug Metos3DDebug(metos3d, kDebugLevel, "Metos3DTimeStepPhi\n"); PetscFunctionReturn(0); }
PetscErrorCode FiberField_Nei_Alltoall( FiberField f ) { int i; int neiIdx; // index where nei[] == src int count; // probing count num elements received const int tag = 128456826; // TODO: should tag # be something unique for each call to this routine? const int NUMRECV = f->NUMRECV; Array *sendbufs = f->sendbufs; Array *recvbufs = f->recvbufs; MPI_Request reqSend[NUMNEI]; MPI_Request reqRecv[NUMNEI]; MPI_Status status; MPI_Comm comm = f->comm; const PetscMPIInt *neiRanks; PetscErrorCode ierr; PetscFunctionBegin; //TODO: why is this barrier necessary if all Isend/Irecv matched with WaitAll? //BUG: without barrier, sources from other iterations caught in probe ierr = PetscBarrier(0); CHKERRQ(ierr); ierr = DMDAGetNeighbors(f->da, &neiRanks); CHKERRQ(ierr); // send verts to neighbors for (i = 0; i < NUMNEI; i++) { count = ArrayLength(sendbufs[i]); ierr = MPI_Isend(ArrayGetData(sendbufs[i]), count, f->vertmpitype, neiRanks[i], tag, comm, &reqSend[i] ); CHKERRQ(ierr); /*ierr = MPI_Send(ArrayGetData(sendbufs[i]), count, f->vertmpitype, neiRanks[i], tag, comm ); CHKERRQ(ierr);*/ #ifdef DEBUG_ALLTOALL ierr = PetscInfo1(0, "i = %d\n", i ); CHKERRQ(ierr); ierr = PetscInfo1(0, "dst = %d\n", neiRanks[i] ); CHKERRQ(ierr); ierr = PetscInfo1(0, "count = %d\n", ArrayLength(sendbufs[i]) ); CHKERRQ(ierr); #endif } // receive verts from neighbors for (i = 0; i < NUMRECV; i++) { // probe for count verts sent ierr = MPI_Probe(MPI_ANY_SOURCE, tag, comm, &status); CHKERRQ(ierr); ierr = MPI_Get_count( &status, f->vertmpitype, &count); CHKERRQ(ierr); // convert source rank into nei index (for recvbufs array) for (neiIdx = 0; neiIdx < NUMNEI; neiIdx++) { if( neiRanks[neiIdx] == status.MPI_SOURCE ) break; } #ifdef DEBUG_ALLTOALL ierr = PetscInfo1(0, "i = %d\n", i ); CHKERRQ(ierr); ierr = PetscInfo1(0, "src = %d\n", status.MPI_SOURCE ); CHKERRQ(ierr); ierr = PetscInfo1(0, "count = %d\n", count ); CHKERRQ(ierr); #endif ierr = ArraySetSize( recvbufs[neiIdx], count); CHKERRQ(ierr); ierr = MPI_Irecv( ArrayGetData(recvbufs[neiIdx]), count, f->vertmpitype, status.MPI_SOURCE, tag, comm, &reqRecv[i]); CHKERRQ(ierr); /*ierr = MPI_Recv( ArrayGetData(recvbufs[neiIdx]), count, f->vertmpitype, status.MPI_SOURCE, tag, comm, &status ); CHKERRQ(ierr);*/ } ierr = MPI_Waitall( NUMRECV, reqRecv, MPI_STATUSES_IGNORE ); CHKERRQ(ierr); ierr = MPI_Waitall( NUMNEI, reqSend, MPI_STATUSES_IGNORE ); CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc, char **argv) { AppCtx ctx; PetscErrorCode (**funcs)(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nf, PetscScalar *u, void *ctx); DM dm; PetscFE fe; DMInterpolationInfo interpolator; Vec lu, fieldVals; PetscScalar *vals; const PetscScalar *ivals, *vcoords; PetscReal *pcoords; PetscBool pointsAllProcs=PETSC_TRUE; PetscInt spaceDim, c, Np, p; PetscMPIInt rank, size; PetscViewer selfviewer; PetscErrorCode ierr; ierr = PetscInitialize(&argc, &argv, NULL,help);if (ierr) return ierr; ierr = ProcessOptions(PETSC_COMM_WORLD, &ctx);CHKERRQ(ierr); ierr = CreateMesh(PETSC_COMM_WORLD, &ctx, &dm);CHKERRQ(ierr); ierr = DMGetCoordinateDim(dm, &spaceDim);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); /* Create points */ ierr = CreatePoints(dm, &Np, &pcoords, &pointsAllProcs, &ctx);CHKERRQ(ierr); /* Create interpolator */ ierr = DMInterpolationCreate(PETSC_COMM_WORLD, &interpolator);CHKERRQ(ierr); ierr = DMInterpolationSetDim(interpolator, spaceDim);CHKERRQ(ierr); ierr = DMInterpolationAddPoints(interpolator, Np, pcoords);CHKERRQ(ierr); ierr = DMInterpolationSetUp(interpolator, dm, pointsAllProcs);CHKERRQ(ierr); /* Check locations */ for (c = 0; c < interpolator->n; ++c) { ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD, "[%d]Point %D is in Cell %D\n", rank, c, interpolator->cells[c]);CHKERRQ(ierr); } ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD, NULL);CHKERRQ(ierr); ierr = VecView(interpolator->coords, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* Setup Discretization */ ierr = PetscFECreateDefault(PetscObjectComm((PetscObject) dm), ctx.dim, Nc, ctx.cellSimplex, NULL, -1, &fe);CHKERRQ(ierr); ierr = DMSetField(dm, 0, NULL, (PetscObject) fe);CHKERRQ(ierr); ierr = DMCreateDS(dm);CHKERRQ(ierr); ierr = PetscFEDestroy(&fe);CHKERRQ(ierr); /* Create function */ ierr = PetscCalloc2(Nc, &funcs, Nc, &vals);CHKERRQ(ierr); for (c = 0; c < Nc; ++c) funcs[c] = linear; ierr = DMGetLocalVector(dm, &lu);CHKERRQ(ierr); ierr = DMProjectFunctionLocal(dm, 0.0, funcs, NULL, INSERT_ALL_VALUES, lu);CHKERRQ(ierr); ierr = PetscViewerASCIIPushSynchronized(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscViewerGetSubViewer(PETSC_VIEWER_STDOUT_WORLD,PETSC_COMM_SELF,&selfviewer);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(selfviewer, "[%d]solution\n", rank);CHKERRQ(ierr); ierr = VecView(lu,selfviewer);CHKERRQ(ierr); ierr = PetscViewerRestoreSubViewer(PETSC_VIEWER_STDOUT_WORLD,PETSC_COMM_SELF,&selfviewer);CHKERRQ(ierr); ierr = PetscViewerFlush(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscViewerASCIIPopSynchronized(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* Check interpolant */ ierr = VecCreateSeq(PETSC_COMM_SELF, interpolator->n * Nc, &fieldVals);CHKERRQ(ierr); ierr = DMInterpolationSetDof(interpolator, Nc);CHKERRQ(ierr); ierr = DMInterpolationEvaluate(interpolator, dm, lu, fieldVals);CHKERRQ(ierr); for (p = 0; p < size; ++p) { if (p == rank) { ierr = PetscPrintf(PETSC_COMM_SELF, "[%d]Field values\n", rank);CHKERRQ(ierr); ierr = VecView(fieldVals, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } ierr = PetscBarrier((PetscObject) dm);CHKERRQ(ierr); } ierr = VecGetArrayRead(interpolator->coords, &vcoords);CHKERRQ(ierr); ierr = VecGetArrayRead(fieldVals, &ivals);CHKERRQ(ierr); for (p = 0; p < interpolator->n; ++p) { for (c = 0; c < Nc; ++c) { #if defined(PETSC_USE_COMPLEX) PetscReal vcoordsReal[3]; PetscInt i; for (i = 0; i < spaceDim; i++) vcoordsReal[i] = PetscRealPart(vcoords[p * spaceDim + i]); #else const PetscReal *vcoordsReal = &vcoords[p*spaceDim]; #endif (*funcs[c])(ctx.dim, 0.0, vcoordsReal, 1, vals, NULL); if (PetscAbsScalar(ivals[p*Nc+c] - vals[c]) > PETSC_SQRT_MACHINE_EPSILON) SETERRQ4(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid interpolated value %g != %g (%D, %D)", (double) PetscRealPart(ivals[p*Nc+c]), (double) PetscRealPart(vals[c]), p, c); } } ierr = VecRestoreArrayRead(interpolator->coords, &vcoords);CHKERRQ(ierr); ierr = VecRestoreArrayRead(fieldVals, &ivals);CHKERRQ(ierr); /* Cleanup */ ierr = PetscFree(pcoords);CHKERRQ(ierr); ierr = PetscFree2(funcs, vals);CHKERRQ(ierr); ierr = VecDestroy(&fieldVals);CHKERRQ(ierr); ierr = DMRestoreLocalVector(dm, &lu);CHKERRQ(ierr); ierr = DMInterpolationDestroy(&interpolator);CHKERRQ(ierr); ierr = DMDestroy(&dm);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
//int BenchmarkBoundaryChecks( int n, PetscLogDouble t_bulk, PetscLogDouble ) int PetscMain() { int i,j,k, n=128; PetscLogDouble t1,t2,s1,s2; Vec U,V,W,DIV1,DIV2; PetscReal ***u,***v,***w,***div1, ***div2; DA da; DALocalInfo info; PetscErrorCode ierr; ierr = DACreate3d(PETSC_COMM_SELF,//MPI Communicator DA_NONPERIODIC, //DA_NONPERIODIC, DA_XPERIODIC, DA_YPERIODIC, DA_XYPERIODIC DA_STENCIL_STAR, //DA_STENCIL_BOX or DA_STENCIL_STAR n,n,n, //Global array dimension 1,1,1,//Number procs per dim 1, //Number of chemical species 1, //stencil width 0,0,0, //specific array of nodes &da); CHKERRQ(ierr); DACreateGlobalVector(da,&U); DACreateGlobalVector(da,&V); DACreateGlobalVector(da,&W); DACreateGlobalVector(da,&DIV1); DACreateGlobalVector(da,&DIV2); VecSet(DIV1,0); VecSet(DIV2,0); DAVecGetArray(da,U,&u); DAVecGetArray(da,V,&v); DAVecGetArray(da,W,&w); DAVecGetArray(da,DIV1,&div1); DAVecGetArray(da,DIV2,&div2); DAGetLocalInfo(da,&info); PetscBarrier(0); for( k = 0; k < n; ++k) { for( j = 0; j < n; ++j) { for( i = 0; i < n; ++i) { u[k][j][i] = i * (i-n) * j * (j-n) * k * (k-n); v[k][j][i] = 1 - u[k][j][i]; w[k][j][i] = u[k][j][i] * v[k][j][i]; } } } PetscBarrier(0); PetscGetTime(&t1); for( k = 1; k < n-1; ++k) { for( j = 1; j < n-1; ++j) { for( i = 1; i < n-1; ++i) { div1[k][j][i] = u[k][j][i+1] - u[k][j][i-1] + v[k][j+1][i] - v[k][j-1][i] + w[k+1][j][i] - w[k-1][j][i]; div1[k][j][i] /= 2; } } } PetscGetTime(&t2); PetscReal uE,uW,vN,vS,wF,wB; PetscReal hx,hy,hz; PetscBarrier(0); PetscGetTime(&s1); for( k = 1; k < n-1; ++k) { for( j = 1; j < n-1; ++j) { for( i = 1; i < n-1; ++i) { /* uE = i == info.mx-1 ? u[k][j][i] : u[k][j][i+1]; uW = i == 0 ? u[k][j][i] : u[k][j][i-1]; vN = j == info.my-1 ? v[k][j][i] : v[k][j+1][i]; vS = j == 0 ? v[k][j][i] : v[k][j-1][i]; wB = k == info.mz-1 ? w[k][j][i] : w[k+1][j][i]; wF = k == 0 ? w[k][j][i] : w[k-1][j][i]; */ if( i == info.mx-1 ) { uE = u[k][j][i]; hx= 1; }else{ uE = u[k][j][i+1];hx= 2;} if( i == info.mx-1 ) { uE = u[k][j][i]; hx= 1; }else{ uE = u[k][j][i+1];hx= 2;} if( j == info.mx-1 ) { uE = u[k][j][i]; hy= 1; }else{ uE = u[k][j][i+1];hy= 2;} if( j == info.mx-1 ) { uE = u[k][j][i]; hy= 1; }else{ uE = u[k][j][i+1];hy= 2;} if( k == info.mx-1 ) { uE = u[k][j][i]; hz= 1; }else{ uE = u[k][j][i+1];hz= 2;} if( k == info.mx-1 ) { uE = u[k][j][i]; hz= 1; }else{ uE = u[k][j][i+1];hz= 2;} div2[k][j][i] = uE - uW + vN - vS + wB - wF; div2[k][j][i] /= 2; // printf("%f\t%f\t%f\n",div1[k][j][i], div2[k][j][i], div2[k][j][i] - div1[k][j][i]); } } } PetscGetTime(&s2); DAVecRestoreArray(da,DIV1,&div1); DAVecRestoreArray(da,DIV2,&div2); VecAXPY(DIV1,-1,DIV2); PetscReal norm; VecNorm(DIV1,NORM_2,&norm); printf("BULK: %f\nIF's: %f\nDIFF:\t%f\nRATIO:\t%f\nnorm: %f\n", (t2-t1), (s2-s1), (s2-s1)-(t2-t1),(s2-s1)/(t2-t1),norm); }