void FATR ga_antisymmetrize_(Integer *g_a) { DoublePrecision alpha = 0.5; int i, me = GA_Nodeid(); extern void * FATR ga_malloc(Integer nelem, int type, char *name); extern void FATR ga_free(void *ptr); void FATR gai_subtr(int *lo, int *hi, void *a, void *b, DoublePrecision alpha, int type, Integer nelem, int ndim); int alo[GA_MAX_DIM], ahi[GA_MAX_DIM], lda[GA_MAX_DIM]; int blo[GA_MAX_DIM], bhi[GA_MAX_DIM], ldb[GA_MAX_DIM]; int ndim, dims[GA_MAX_DIM], type; Integer nelem=1; Logical have_data; void *a_ptr, *b_ptr; GA_Sync(); NGA_Inquire((int)(*g_a), &type, &ndim, dims); if (dims[0] != dims[1]) GA_Error("ga_sym: can only sym square matrix", 0L); /* Find the local distribution */ NGA_Distribution((int)(*g_a), me, alo, ahi); have_data = ahi[0]>=0; for(i=1; i<ndim; i++) have_data = have_data && ahi[i]>=0; if(have_data) { NGA_Access((int)(*g_a), alo, ahi, &a_ptr, lda); for(i=0; i<ndim; i++) nelem *= ahi[i]-alo[i] +1; b_ptr = (void *) ga_malloc(nelem, MT_C_DBL, "v"); for(i=2; i<ndim; i++) {bhi[i]=ahi[i]; blo[i]=alo[i]; } /* switch rows and cols */ blo[1]=alo[0]; bhi[1]=ahi[0]; blo[0]=alo[1]; bhi[0]=ahi[1]; for (i=0; i < ndim-1; i++) ldb[i] = bhi[i+1] - blo[i+1] + 1; NGA_Get((int)(*g_a), blo, bhi, b_ptr, ldb); } GA_Sync(); if(have_data) { gai_subtr(alo, ahi, a_ptr, b_ptr, alpha, type, nelem, ndim); NGA_Release_update((int)(*g_a), alo, ahi); ga_free(b_ptr); } GA_Sync(); }
irregular_array1(int rank) { int g_A, g_B; int dims[DIM]={5,10}, dims2[DIM], ndim, type, value=5, block[DIM]={2,3}, map[5]={0,2,0,4,6}, val=7; int n_block[DIM], block_dims[DIM], i; g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL); g_B = NGA_Create_irreg(C_INT, DIM, dims, "array_B", block, map); GA_Fill(g_A, &value); GA_Print(g_A); GA_Fill(g_B, &val); GA_Print(g_B); GA_Sync(); NGA_Inquire(g_A, &type, &ndim, dims2); //printf(" %d -- %d,,\n", type, ndim); /* GA_Get_block_info(g_B, n_block, block_dims); for(i=0; i<DIM; i++) printf(" %d: %d ___ %d --- \n", rank, n_block[i], block_dims[i]); */ GA_Destroy(g_A); GA_Destroy(g_B); }
/* * Check to see if inversion is correct. Start by copying g_a into local * buffer a, and g_b into local buffer b. */ void verify(int g_a, int g_b) { int i, type, ndim, dims[MAXDIM], lo[MAXDIM], hi[MAXDIM], ld[MAXDIM]; int a[MAXPROC*TOTALELEMS],b[MAXPROC*TOTALELEMS]; /* Get dimensions of GA */ NGA_Inquire(g_a, &type, &ndim, dims); lo[0] = 0; hi[0] = dims[0]-1; /* ### copy the block of data described by the arrays "lo" and "hi" from * ### the global array "g_a" into the local array "a". Copy the same block * ### of data from "g_b" into the local array "b". Use the array of strides * ### "ld" to describe the physical layout of "a" and "b". */ NGA_Get(g_a,lo,hi,a,ld); NGA_Get(g_b,lo,hi,b,ld); for(i=0; i<dims[0]; i++) if (a[i] != b[dims[0]-i-1]) { printf("Mismatch: a[%d]=%d is not equal to b[%d]=%d\n", i, a[i], dims[0]-i-1, b[dims[0]-i-1]); GA_Error("verify failed",0); } printf(" Transpose OK\n"); }
// ------------------------------------------------------------- // MatMult_DenseGA // ------------------------------------------------------------- static PetscErrorCode MatMult_DenseGA(Mat mat, Vec x, Vec y) { // FIXME: I'm assuming the Mat and Vec's are compatible and that's // been checked somewhere else. Probably a mistake. PetscErrorCode ierr = 0; struct MatGACtx *ctx; ierr = MatShellGetContext(mat, &ctx); CHKERRQ(ierr); PetscInt Arows, Acols; ierr = MatGetSize(mat, &Arows, &Acols); CHKERRQ(ierr); int g_x, g_y; ierr = Vec2GA(x, ctx->gaGroup, &g_x, false); CHKERRQ(ierr); ierr = Vec2GA(y, ctx->gaGroup, &g_y, false); CHKERRQ(ierr); PetscScalarGA alpha(one), beta(zero); int ndim, itype, lo[2] = {0,0}, ahi[2], xhi[2], yhi[2]; NGA_Inquire(ctx->ga, &itype, &ndim, ahi); ahi[0] -= 1; ahi[1] -= 1; NGA_Inquire(g_x, &itype, &ndim, xhi); xhi[0] -= 1; xhi[1] -= 1; NGA_Inquire(g_y, &itype, &ndim, yhi); yhi[0] -= 1; yhi[1] -= 1; // GA_Print(ctx->ga); // GA_Print(g_x); NGA_Matmul_patch('N', 'N', &alpha, &beta, ctx->ga, lo, ahi, g_x, lo, xhi, g_y, lo, yhi); GA_Pgroup_sync(ctx->gaGroup); // GA_Print(g_y); ierr = GA2Vec(g_y, y); CHKERRQ(ierr); GA_Destroy(g_y); GA_Destroy(g_x); MPI_Comm comm; ierr = PetscObjectGetComm((PetscObject)mat,&comm); CHKERRQ(ierr); ierr = MPI_Barrier(comm); return ierr; }
// ------------------------------------------------------------- // MatMatMult_DenseGA // ------------------------------------------------------------- static PetscErrorCode MatMatMult_DenseGA(Mat A, Mat B, MatReuse scall, PetscReal fill, Mat *C) { // matrix sizes appear to be checked before here, so we won't do it again PetscErrorCode ierr = 0; MPI_Comm comm; ierr = PetscObjectGetComm((PetscObject)A, &comm); CHKERRQ(ierr); MatType atype, btype; ierr = MatGetType(A, &atype); ierr = MatGetType(B, &btype); PetscBool issame; PetscStrcmp(atype, btype, &issame); Mat Bga, Cga; struct MatGACtx *Actx, *Bctx, *Cctx; ierr = MatShellGetContext(A, &Actx); CHKERRQ(ierr); if (issame) { Bga = B; } else { ierr = MatConvertToDenseGA(B, &Bga); CHKERRQ(ierr); } ierr = MatShellGetContext(Bga, &Bctx); CHKERRQ(ierr); PetscInt lrows, lcols, grows, gcols, junk; ierr = MatGetSize(A, &grows, &junk); CHKERRQ(ierr); ierr = MatGetSize(B, &junk, &gcols); CHKERRQ(ierr); ierr = MatGetLocalSize(A, &lrows, &junk); CHKERRQ(ierr); ierr = MatGetLocalSize(B, &junk, &lcols); CHKERRQ(ierr); ierr = MatCreateDenseGA(comm, lrows, lcols, grows, gcols, &Cga); CHKERRQ(ierr); ierr = MatShellGetContext(Cga, &Cctx); CHKERRQ(ierr); PetscScalarGA alpha(one), beta(zero); int ndim, itype, lo[2] = {0,0}, ahi[2], bhi[2], chi[2]; NGA_Inquire(Actx->ga, &itype, &ndim, ahi); ahi[0] -= 1; ahi[1] -= 1; NGA_Inquire(Bctx->ga, &itype, &ndim, bhi); bhi[0] -= 1; bhi[1] -= 1; NGA_Inquire(Cctx->ga, &itype, &ndim, chi); chi[0] -= 1; chi[1] -= 1; // GA_Print(Actx->ga); // GA_Print(Bctx->ga); NGA_Matmul_patch('N', 'N', &alpha, &beta, Actx->ga, lo, ahi, Bctx->ga, lo, bhi, Cctx->ga, lo, chi); // GA_Print(Cctx->ga); switch (scall) { case MAT_REUSE_MATRIX: ierr = MatCopy(Cga, *C, SAME_NONZERO_PATTERN); CHKERRQ(ierr); case MAT_INITIAL_MATRIX: default: ierr = MatDuplicate(Cga, MAT_COPY_VALUES, C); CHKERRQ(ierr); break; } if (!issame) ierr = MatDestroy(&Bga); CHKERRQ(ierr); ierr = MatDestroy(&Cga); CHKERRQ(ierr); return ierr; }
/* * test ga_dgemm * Note: - change nummax for large arrays * - turn off "dgemm_verify" for large arrays due to memory * limitations, as dgemm_verify=1 for large arrays produces * segfault, dumps core,or any crap. */ int main(int argc, char **argv) { int num_m; int num_n; int num_k; int i; int ii; double *h0; int g_c; int g_b; int g_a; double a; double t1; double mf; double avg_t[ntrans]; double avg_mf[ntrans]; int itime; int ntimes; int nums_m[/*howmany*/] = {512,1024}; int nums_n[/*howmany*/] = {512,1024}; int nums_k[/*howmany*/] = {512,1024}; char transa[/*ntrans*/] = "ntnt"; char transb[/*ntrans*/] = "nntt"; char ta; char tb; double *tmpa; double *tmpb; double *tmpc; int ndim; int dims[2]; #ifdef BLOCK_CYCLIC int block_size[2]; #endif #if defined(USE_ELEMENTAL) // initialize Elemental (which will initialize MPI) ElInitialize( &argc, &argv ); ElMPICommRank( MPI_COMM_WORLD, &me ); ElMPICommSize( MPI_COMM_WORLD, &nproc ); // instantiate el::global array ElGlobalArraysConstruct_d( &eldga ); // initialize global arrays ElGlobalArraysInitialize_d( eldga ); #else MP_INIT(argc,argv); if (!MA_init(MT_DBL,1,20000000)) { GA_Error("failed: ma_init(MT_DBL,1,20000000)",10); } GA_INIT(argc,argv); me = GA_Nodeid(); #endif h0 = (double*)malloc(sizeof(double) * nummax*nummax); tmpa = (double*)malloc(sizeof(double) * nummax*nummax); tmpb = (double*)malloc(sizeof(double) * nummax*nummax); tmpc = (double*)malloc(sizeof(double) * nummax*nummax); ii = 0; for (i=0; i<nummax*nummax; i++) { ii = ii + 1; if (ii > nummax) { ii = 0; } h0[i] = ii; } /* Compute times assuming 500 mflops and 5 second target time */ /* ntimes = max(3.0d0,5.0d0/(4.0d-9*num**3)); */ ntimes = 5; for (ii=0; ii<howmany; ii++) { num_m = nums_m[ii]; num_n = nums_n[ii]; num_k = nums_k[ii]; a = 0.5/(num_m*num_n); if (num_m > nummax || num_n > nummax || num_k > nummax) { GA_Error("Insufficient memory: check nummax", 1); } #ifndef BLOCK_CYCLIC ndim = 2; /* dims[0] = num_m; dims[1] = num_n; */ dims[1] = num_m; dims[0] = num_n; #if defined(USE_ELEMENTAL) ElGlobalArraysCreate_d( eldga, ndim, dims, "g_c", NULL, &g_c ); #else if (!((g_c = NGA_Create(MT_DBL,ndim,dims,"g_c",NULL)))) { GA_Error("failed: create g_c",20); } #endif /* dims[0] = num_k; dims[1] = num_n; */ dims[1] = num_k; dims[0] = num_n; #if defined(USE_ELEMENTAL) ElGlobalArraysCreate_d( eldga, ndim, dims, "g_b", NULL, &g_b ); #else if (!((g_b = NGA_Create(MT_DBL,ndim,dims,"g_b",NULL)))) { GA_Error("failed: create g_b",30); } #endif /* dims[0] = num_m; dims[1] = num_k; */ dims[1] = num_m; dims[0] = num_k; #if defined(USE_ELEMENTAL) ElGlobalArraysCreate_d( eldga, ndim, dims, "g_a", NULL, &g_a ); #else if (!((g_a = NGA_Create(MT_DBL,ndim,dims,"g_a",NULL)))) { GA_Error("failed: create g_a",40); } #endif #else ndim = 2; block_size[0] = 128; block_size[1] = 128; dims[0] = num_m; dims[1] = num_n; g_c = GA_Create_handle(); GA_Set_data(g_c,ndim,dims,MT_DBL); GA_Set_array_name(g_c,"g_c"); GA_Set_block_cyclic(g_c,block_size); if (!GA_Allocate(g_c)) { GA_Error("failed: create g_c",40); } dims[0] = num_k; dims[1] = num_n; g_b = GA_Create_handle(); GA_Set_data(g_b,ndim,dims,MT_DBL); GA_Set_array_name(g_b,"g_b"); GA_Set_block_cyclic(g_b,block_size); if (!ga_allocate(g_b)) { GA_Error("failed: create g_b",40); } dims[0] = num_m; dims[1] = num_k; g_a = GA_Create_handle(); GA_Set_data(g_a,ndim,dims,MT_DBL); GA_Set_array_name(g_a,"g_a"); GA_Set_block_cyclic(g_a,block_size); if (!ga_allocate(g_a)) { GA_Error('failed: create g_a',40); } #endif /* Initialize matrices A and B */ if (me == 0) { load_ga(g_a, h0, num_m, num_k); load_ga(g_b, h0, num_k, num_n); } #if defined(USE_ELEMENTAL) double zero = 0.0; ElGlobalArraysFill_d( eldga, g_c, &zero ); ElGlobalArraysSync_d( eldga ); #else GA_Zero(g_c); GA_Sync(); #endif #if defined(USE_ELEMENTAL) if (me == 0) { #else if (GA_Nodeid() == 0) { #endif printf("\nMatrix Multiplication on C = A[%ld,%ld]xB[%ld,%ld]\n", (long)num_m, (long)num_k, (long)num_k, (long)num_n); fflush(stdout); } for (i=0; i<ntrans; i++) { avg_t[i] = 0.0; avg_mf[i] = 0.0; } for (itime=0; itime<ntimes; itime++) { for (i=0; i<ntrans; i++) { #if defined(USE_ELEMENTAL) ElGlobalArraysSync_d( eldga ); #else GA_Sync(); #endif ta = transa[i]; tb = transb[i]; t1 = MP_TIMER(); #if defined(USE_ELEMENTAL) ElGlobalArraysDgemm_d( eldga, ta, tb, num_m, num_n, num_k, 1.0, g_a, g_b, 0.0, g_c ); #else GA_Dgemm(ta,tb,num_m,num_n,num_k,1.0, g_a, g_b, 0.0, g_c); #endif t1 = MP_TIMER() - t1; #if defined(USE_ELEMENTAL) if (me == 0) { #else if (GA_Nodeid() == 0) { #endif #if defined(USE_ELEMENTAL) mf = 2e0*num_m*num_n*num_k/t1*1e-6/nproc; #else mf = 2e0*num_m*num_n*num_k/t1*1e-6/GA_Nnodes(); #endif avg_t[i] = avg_t[i]+t1; avg_mf[i] = avg_mf[i] + mf; printf("%15s%2d: %12.4f seconds %12.1f mflops/proc %c %c\n", "Run#", itime, t1, mf, ta, tb); fflush(stdout); if (dgemm_verify && itime == 0) { /* recall the C API swaps the matrix order */ /* we swap it here for the Fortran-based verify */ verify_ga_dgemm(tb, ta, num_n, num_m, num_k, 1.0, g_b, g_a, 0.0, g_c, tmpb, tmpa, tmpc); } } } } #if defined(USE_ELEMENTAL) if (me == 0) { #else if (GA_Nodeid() == 0) { #endif printf("\n"); for (i=0; i<ntrans; i++) { printf("%17s: %12.4f seconds %12.1f mflops/proc %c %c\n", "Average", avg_t[i]/ntimes, avg_mf[i]/ntimes, transa[i], transb[i]); } if(dgemm_verify) { printf("All GA_Dgemms are verified...O.K.\n"); } fflush(stdout); } /* GA_Print(g_a); GA_Print(g_b); GA_Print(g_c); */ #if defined(USE_ELEMENTAL) ElGlobalArraysDestroy_d( eldga, g_a ); ElGlobalArraysDestroy_d( eldga, g_b ); ElGlobalArraysDestroy_d( eldga, g_c ); #else GA_Destroy(g_c); GA_Destroy(g_b); GA_Destroy(g_a); #endif } /* ??? format(a15, i2, ': ', e12.4, ' seconds ',f12.1, . ' mflops/proc ', 3a2) */ #if defined(USE_ELEMENTAL) if (me == 0) { #else if (GA_Nodeid() == 0) { #endif printf("All tests successful\n"); } free(h0); free(tmpa); free(tmpb); free(tmpc); #if defined(USE_ELEMENTAL) // call el::global arrays destructor ElGlobalArraysTerminate_d( eldga ); ElGlobalArraysDestruct_d( eldga ); ElFinalize(); #else GA_Terminate(); MP_FINALIZE(); #endif return 0; } /* * Verify for correctness. Process 0 computes BLAS dgemm * locally. For larger arrays, disbale this test as memory * might not be sufficient */ void verify_ga_dgemm(char xt1, char xt2, int num_m, int num_n, int num_k, double alpha, int g_a, int g_b, double beta, int g_c, double *tmpa, double *tmpb, double *tmpc) { int i,j,type,ndim,dims[2],lo[2],hi[2]; double abs_value; for (i=0; i<num_n; i++) { for (j=0; j<num_m; j++) { tmpc[j+i*num_m] = -1.0; tmpa[j+i*num_m] = -2.0; } } #if defined(USE_ELEMENTAL) ElGlobalArraysInquire_d( eldga, g_a, &ndim, dims ); #else NGA_Inquire(g_a, &type, &ndim, dims); #endif lo[0] = 0; lo[1] = 0; hi[0] = dims[0]-1; hi[1] = dims[1]-1; #if defined(USE_ELEMENTAL) ElGlobalArraysGet_d( eldga, g_a, lo, hi, tmpa, &dims[1] ); #else NGA_Get(g_a, lo, hi, tmpa, &dims[1]); #endif #if defined(USE_ELEMENTAL) ElGlobalArraysInquire_d( eldga, g_a, &ndim, dims ); #else NGA_Inquire(g_a, &type, &ndim, dims); #endif lo[0] = 0; lo[1] = 0; hi[0] = dims[0]-1; hi[1] = dims[1]-1; #if defined(USE_ELEMENTAL) ElGlobalArraysGet_d( eldga, g_b, lo, hi, tmpb, &dims[1] ); #else NGA_Get(g_b, lo, hi, tmpb, &dims[1]); #endif /* compute dgemm sequentially */ #if defined(USE_ELEMENTAL) cblas_dgemm ( CblasRowMajor, ( xt1 == 'n'? CblasNoTrans: CblasTrans ), ( xt2 == 'n'? CblasNoTrans: CblasTrans ), num_m /* M */, num_n /* N */, num_k /* K */, alpha, tmpa, num_m, /* lda */ tmpb, num_k, /* ldb */ beta, tmpc, num_m /* ldc */); #else xb_dgemm(&xt1, &xt2, &num_m, &num_n, &num_k, &alpha, tmpa, &num_m, tmpb, &num_k, &beta, tmpc, &num_m); #endif /* after computing c locally, verify it with the values in g_c */ #if defined(USE_ELEMENTAL) ElGlobalArraysInquire_d( eldga, g_a, &ndim, dims ); #else NGA_Inquire(g_a, &type, &ndim, dims); #endif lo[0] = 0; lo[1] = 0; hi[0] = dims[0]-1; hi[1] = dims[1]-1; #if defined(USE_ELEMENTAL) ElGlobalArraysGet_d( eldga, g_c, lo, hi, tmpa, &dims[1] ); #else NGA_Get(g_c, lo, hi, tmpa, &dims[1]); #endif for (i=0; i<num_n; i++) { for (j=0; j<num_m; j++) { abs_value = fabs(tmpc[j+i*num_m]-tmpa[j+i*num_m]); if(abs_value > 1.0 || abs_value < -1.0) { printf("Values are = %f %f\n", tmpc[j+i*num_m], tmpa[j+i*num_m]); printf("Values are = %f %f\n", fabs(tmpc[j+i*num_m]-tmpa[j*i*num_m]), abs_value); fflush(stdout); GA_Error("verify ga_dgemm failed", 1); } } } } /** * called by process '0' (or your master process ) */ void load_ga(int handle, double *f, int dim1, int dim2) { int lo[2], hi[2]; if (dim1 < 0 || dim2 < 0) { return; } lo[0] = 0; lo[1] = 0; hi[0] = dim1-1; hi[1] = dim2-1; #if defined(USE_ELEMENTAL) ElGlobalArraysPut_d( eldga, handle, lo, hi, f, &dim1 ); #else NGA_Put(handle, lo, hi, f, &dim1); #endif }
void do_work() { int g_a, g_b; int me=GA_Nodeid(), nproc=GA_Nnodes(), proc, loop; int dims[NDIM], lo[NDIM], hi[NDIM], block[NDIM], ld[NDIM-1]; int i,d,*proclist, offset; int adims[NDIM], ndim,type; typedef struct { int lo[NDIM]; int hi[NDIM]; } patch_t; patch_t *regions; int *map; double *buf; /***** create array A with default distribution *****/ if(me==0){printf("Creating array A\n"); fflush(stdout);} for(i = 0; i<NDIM; i++)dims[i] = N*(i+1); #ifdef NEW_API g_a = GA_Create_handle(); GA_Set_data(g_a,NDIM,dims,MT_F_DBL); GA_Set_array_name(g_a,"array A"); (void)GA_Allocate(g_a); #else g_a = NGA_Create(MT_F_DBL, NDIM, dims, "array A", NULL); #endif if(!g_a) GA_Error("create failed: A",0); if(me==0)printf("OK\n\n"); /* print info about array we got */ NGA_Inquire(g_a, &type, &ndim, adims); GA_Print_distribution(g_a); GA_Sync(); /* duplicate array A with ga_create irreg rather than ga_duplicate * -- want to show distribution control * -- with ga_duplicate it would be g_b=GA_Duplicate(g_a,name) */ if(me==0)printf("\nReconstructing distribution description for A\n"); /* get memory for arrays describing distribution */ proclist = (int*)malloc(nproc*sizeof(int)); if(!proclist)GA_Error("malloc failed for proclist",0); regions = (patch_t*)malloc(nproc*sizeof(patch_t)); if(!regions)GA_Error("malloc failed for regions",0); map = (int*)malloc((nproc+ndim)*sizeof(int)); /* ubound= nproc+mdim */ if(!map)GA_Error("malloc failed for map",0); /* first find out how array g_a is distributed */ for(i=0;i<ndim;i++)lo[i]=BASE; for(i=0;i<ndim;i++)hi[i]=adims[i] -1 + BASE; proc = NGA_Locate_region(g_a, lo, hi, (int*)regions, proclist); if(proc<1) GA_Error("error in NGA_Locate_region",proc); /* determine blocking for each dimension */ for(i=0;i<ndim;i++)block[i]=0; for(i=0;i<ndim;i++)adims[i]=0; offset =0; for(d=0; d<ndim; d++) for(i=0;i<proc;i++) if( regions[i].hi[d]>adims[d] ){ map[offset] = regions[i].lo[d]; offset++; block[d]++; adims[d]= regions[i].hi[d]; } if(me==0){ printf("Distribution map contains %d elements\n",offset); print_subscript("number of blocks for each dimension",ndim,block,"\n"); print_subscript("distribution map",offset,map,"\n\n"); fflush(stdout); } if(me==0)printf("Creating array B applying distribution of A\n"); # ifdef USE_DUPLICATE g_b = GA_Duplicate(g_a,"array B"); # else g_b = NGA_Create_irreg(MT_F_DBL, NDIM, dims, "array B", block,map); # endif if(!g_b) GA_Error("create failed: B",0); if(me==0)printf("OK\n\n"); free(proclist); free(regions); free(map); GA_Print_distribution(g_b); GA_Sync(); if(me==0){ printf("\nCompare distributions of A and B\n"); if(GA_Compare_distr(g_a,g_b)) printf("Failure: distributions NOT identical\n"); else printf("Success: distributions identical\n"); fflush(stdout); } if(me==0){ printf("\nAccessing local elements of A: set them to the owner process id\n"); fflush(stdout); } GA_Sync(); NGA_Distribution(g_a,me,lo,hi); if(hi[0]>=0){/* -1 means no elements stored on this processor */ double *ptr; int locdim[NDIM]; NGA_Access(g_a, lo,hi, &ptr, ld); for(i=0;i<ndim;i++)locdim[i]=hi[i]-lo[i]+1; fill_patch(ptr, locdim, ld, ndim,(double)me); } for(i=0;i<nproc; i++){ if(me==i && hi[0]>=0){ char msg[100]; sprintf(msg,"%d: leading dimensions",me); print_subscript(msg,ndim-1,ld,"\n"); fflush(stdout); } GA_Sync(); } GA_Sync(); if(me==0)printf("\nRandomly checking the update using ga_get on array sections\n"); GA_Sync(); /* show ga_get working and verify array updates * every process does N random gets * for simplicity get only a single row at a time */ srand(me); /* different seed for every process */ hi[ndim-1]=adims[ndim-1] -1 + BASE; for(i=1;i<ndim-1; i++)ld[i]=1; ld[ndim-2]=adims[ndim-1] -1 + BASE; /* get buffer memory */ buf = (double*)malloc(adims[ndim-1]*sizeof(double)); if(!buf)GA_Error("malloc failed for buf",0); /* half of the processes check the result */ if(me<=nproc/2) for(loop = 0; loop< N; loop++){ /* task parallel loop */ lo[ndim-1]=BASE; for (i= 0; i < ndim -1; i ++){ lo[i] = hi[i] = rand()%adims[i]+BASE; } /* print_subscript("getting",ndim,lo,"\n");*/ NGA_Get(g_a,lo,hi,buf,ld); /* check values */ for(i=0;i<adims[ndim-1]; i++){ int p = NGA_Locate(g_a, lo); if((double)p != buf[i]) { char msg[100]; sprintf(msg,"%d: wrong value: %d != %lf a",me, p, buf[i]); print_subscript(msg,ndim,lo,"\n"); GA_Error("Error - bye",i); } lo[ndim-1]++; } } free(buf); GA_Sync(); if(me==0)printf("OK\n"); GA_Destroy(g_a); GA_Destroy(g_b); }