int coefs_ga_create_ghost(int Nx, int Ny, int Nz, int nsplines) { int data[Nx*Ny*Nz]; int g_a,dims[4]={Nx,Ny,Nz,nsplines},chunk[4]={-1,-1,-1,nsplines}; int width[4] = {3, 3, 3, 0}; int type=C_INT; //g_a=NGA_Create(type,4,dims,"Coefs",chunk); g_a=NGA_Create_ghosts(type, 4, dims, width, "Coefs", chunk); int lo[4],hi[4],ld[3]; //int value=9; GA_Fill(g_a,&value); GA_Print_distribution(g_a); return g_a; }
PetscErrorCode vizGA2DA() { PetscErrorCode ierr; int rank; MPI_Comm_rank(PETSC_COMM_WORLD,&rank); int d1 = 40, d2 = 50; DA da; Vec vec; const PetscInt *lx, *ly, *lz; PetscInt m,n,p; DALocalInfo info; ierr = DACreate2d(PETSC_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_STAR, d1,d2,PETSC_DECIDE,PETSC_DECIDE,1,1,0,0, &da); CHKERRQ(ierr); ierr = DACreateGlobalVector(da, &vec); CHKERRQ(ierr); ierr = DAGetOwnershipRanges(da, &lx, &ly, &lz); CHKERRQ(ierr); ierr = DAGetLocalInfo(da,&info); CHKERRQ(ierr); ierr = DAGetInfo(da,0,0,0,0,&m,&n,&p,0,0,0,0); CHKERRQ(ierr); /**/ ierr = DAView(da, PETSC_VIEWER_STDOUT_WORLD); CHKERRQ(ierr); for (int i = 0; i < m; ++i) { PetscPrintf(PETSC_COMM_WORLD,"%d\tlx: %d\n",i,lx[i]); } for (int i = 0; i < n; ++i) { PetscPrintf(PETSC_COMM_WORLD,"%d\tly: %d\n",i,ly[i]); } /**/ int ga = GA_Create_handle(); int ndim = 2; int dims[2] = {d2,d1}; GA_Set_data(ga,2,dims,MT_DBL); int *map; PetscMalloc( sizeof(int)*(m+n), &map); map[0] = 0; for( int i = 1; i < n; i++ ) { map[i] = ly[i-1] + map[i-1]; } map[n] = 0; for( int i = n+1; i < m+n; i++ ) { map[i] = lx[i-n-1] + map[i-1]; } /* correct ordering, but nodeid's dont line up with mpi rank for petsc's da * DA: +---+---+ GA: +---+---+ * +-2-+-3-+ +-1-+-3-+ * +---+---+ +---+---+ * +-0-+-1-+ +-0-+-2-+ * +---+---+ +---+---+ int *map; PetscMalloc( sizeof(int)*(m+n), &map); map[0] = 0; for( int i = 1; i < m; i++ ) { map[i] = lx[i] + map[i-1]; } map[m] = 0; for( int i = m+1; i < m+n; i++ ) { map[i] = ly[i-m] + map[i-1]; } */ int block[2] = {n,m}; GA_Set_irreg_distr(ga,map,block); ierr = GA_Allocate( ga ); if( !ierr ) GA_Error("\n\n\nga allocaltion failed\n\n",ierr); if( !ga ) GA_Error("\n\n\n ga null \n\n",ierr); if( rank != GA_Nodeid() ) GA_Error("MPI rank does not match GA_Nodeid()",1); GA_Print_distribution(ga); int lo[2], hi[2]; NGA_Distribution(ga,rank,lo,hi); if( lo[1] != info.xs || hi[1] != info.xs+info.xm-1 || lo[0] != info.ys || hi[0] != info.ys+info.ym-1 ) { PetscSynchronizedPrintf(PETSC_COMM_SELF,"[%d] lo:(%2d,%2d) hi:(%2d,%2d) \t DA: (%2d,%2d), (%2d, %2d)\n", rank, lo[1], lo[0], hi[1], hi[0], info.xs, info.ys, info.xs+info.xm-1, info.ys+info.ym-1); } PetscBarrier(0); PetscSynchronizedFlush(PETSC_COMM_WORLD); AO ao; DAGetAO(da,&ao); if( rank == 0 ) { int *idx, len = d1*d2; PetscReal *val; PetscMalloc(sizeof(PetscReal)*len, &val); PetscMalloc(sizeof(int)*len, &idx); for (int j = 0; j < d2; ++j) { for (int i = 0; i < d1; ++i) { idx[i + d1*j] = i + d1*j; val[i + d1*j] = i + d1*j; } } AOApplicationToPetsc(ao,len,idx); VecSetValues(vec,len,idx,val,INSERT_VALUES); int a[2], b[2],ld[1]={0}; double c = 0; for (int j = 0; j < d2; ++j) { for (int i = 0; i < d1; ++i) { a[0] = j; a[1] = i; // printf("%5.0f ",c); NGA_Put(ga,a,a,&c,ld); c++; } } } // GA_Print(ga); VecAssemblyBegin(vec); VecAssemblyEnd(vec); int ld; double *ptr; NGA_Access(ga,lo,hi,&ptr,&ld); PetscReal **d; int c=0; ierr = DAVecGetArray(da,vec,&d); CHKERRQ(ierr); for (int j = info.ys; j < info.ys+info.ym; ++j) { for (int i = info.xs; i < info.xs+info.xm; ++i) { if( d[j][i] != ptr[(i-info.xs)+ld*(j-info.ys)] ) GA_Error("DA array is not equal to GA array",1); // printf("%d (%d,%d):\t%3.0f\t%3.0f\n", c, i, j, d[j][i], ptr[(i-info.xs)+ld*(j-info.ys)]); c++; } } ierr = DAVecRestoreArray(da,vec,&d); CHKERRQ(ierr); c=0; PetscReal *v; int start, end; VecGetOwnershipRange(vec, &start, &end); VecGetArray( vec, &v ); for( int i = start; i < end; i++) { // printf("%d:\t%3.0f\t%3.0f\t%s\n", start, v[i-start], ptr[i-start], (v[i-start]-ptr[i-start]==0?"":"NO") ); } VecRestoreArray( vec, &v ); NGA_Release_update(ga,lo,hi); Vec gada; VecCreateMPIWithArray(((PetscObject)da)->comm,da->Nlocal,PETSC_DETERMINE,ptr,&gada); VecView(gada,PETSC_VIEWER_STDOUT_SELF); GA_Destroy(ga); ierr = VecDestroy(vec); CHKERRQ(ierr); ierr = DADestroy(da); CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc, char**argv) { int nprocs, me; int i,j; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&me); GA_Initialize(); const int heap=3000000, stack=300000; if(! MA_init(C_INT,stack,heap) ) GA_Error((char *) "MA_init failed",stack+heap /*error code*/); int Nx=97; int Ny=97; int Nz = 97; Nx+=3; Ny+=3; Nz+=3; int data[Nx*Ny*Nz]; int num_splines = 32; int g_a,dims[4]={Nx,Ny,Nz,num_splines},chunk[4]={-1,-1,-1,num_splines}; int width[4] = {3, 3, 3, 0}; int type=C_INT; //g_a=NGA_Create(type,4,dims,"Coefs",chunk); g_a=NGA_Create_ghosts(type, 4, dims, width, "Coefs", chunk); int lo[4],hi[4],ld[3]; //double value=9.0; GA_Fill(g_a,&value); GA_Print_distribution(g_a); fflush(stdout); if(me==0) { for (i=0; i<num_splines; i++) { int x, y, z; for (x=0; x<Nx; x++) for (y=0; y<Ny; y++) for (z=0; z<Nz; z++) { j=x*(Ny*Nz)+y*Nz+z; data[j] = (x*100*100+y*100+z)*100+i;} lo[0]=lo[1]=lo[2]=0; hi[0]=Nx-1;hi[1]=Ny-1;hi[2]=Nz-1; lo[3]=hi[3]=i%num_splines; ld[0]=Ny;ld[1]=Nz;ld[2]=1; NGA_Put(g_a,lo,hi,data,ld); } } GA_Update_ghosts(g_a); GA_Sync(); printf("done\n"),fflush(stdout); ga_coefs_t *ga_coefs = malloc(sizeof(ga_coefs_t)); ga_coefs->Mx = Nx; ga_coefs->My = Ny; ga_coefs->Mz = Nz; ga_coefs->nsplines = num_splines; ga_coefs->g_a=g_a; int *coefs1 = (int*)malloc((size_t)1*sizeof(int)*4*4*4*num_splines); int ix,iy,iz; Nx-=3; Ny-=3; Nz-=3; ga_coefs->sumt=ga_coefs->amount=0; NGA_Distribution(g_a,me,lo,hi); GA_Print_distribution(g_a); int low[16][4],high[16][4]; for(i=0;i<nprocs;i++) NGA_Distribution(g_a,i,low[i],high[i]); srand ( time(NULL) ); int k=GA_Nodeid(); printf("%d: low[k]=%d high[k]=%d\n", GA_Nodeid(), low[k][2], high[k][2]); int unequal=0; for(i=0;i<1000;i++) { ix=rand_index(low[k][0],high[k][0]); if(ix+3>=dims[0]) ix=low[k][0]; iy=rand_index(low[k][1],high[k][1]); if(iy+3>=dims[1]) iy=low[k][1]; iz=rand_index(low[k][2],high[k][2]); if(iz+3>=dims[2]) iz=low[k][2]; coefs_ga_get_3d(ga_coefs,coefs1,ix,iy,iz); long get_sum=mini_cube_sum(coefs1, ga_coefs->nsplines); long ghost_sum=coefs_ghost_access_3d(ga_coefs->g_a, ix, iy, iz, ga_coefs->nsplines); if(get_sum!=ghost_sum) { printf("ixyz=\t%d\t%d\t%d\t", ix, iy, iz); printf("get_sum=%ld ghost_sum=%ld\n", get_sum, ghost_sum); unequal++; } } printf("unequal count=%d\n", unequal); free(coefs1); GA_Terminate(); MPI_Finalize(); return 0; }
static int test(int shape_idx, int type_idx, int dist_idx) { int type = TYPES[type_idx]; int *dims = SHAPES[shape_idx]; int ndim = SHAPES_NDIM[shape_idx]; mock_ga_t *mock_a, *result_a; int g_a; int buffer[100]; int lo[GA_MAX_DIM], hi[GA_MAX_DIM], ld[GA_MAX_DIM], shape[GA_MAX_DIM]; int result=0, error_index=-1, error_proc=-1; mock_a = Mock_Create(type, ndim, dims, "mock", NULL); result_a = Mock_Create(type, ndim, dims, "mock", NULL); g_a = create_function[dist_idx](type, ndim, dims); mock_data(mock_a, g_a); mock_to_global(mock_a, g_a); Mock_Zero(mock_a); GA_Zero(g_a); global_to_mock(g_a, result_a); result = neq_mock(mock_a, result_a, &error_index); if (0 != result) { error_proc = GA_Nodeid(); } GA_Igop(&result, 1, "+"); GA_Igop(&error_proc, 1, "max"); if (error_proc != GA_Nodeid()) { error_index = 0; } GA_Igop(&error_index, 1, "+"); if (0 != result) { if (error_proc == GA_Nodeid()) { printf("ERROR: local result failed to compare to global result\n"); printf("\terror_proc=%d\n", error_proc); printf("\terror_index=%d\n", error_index); printf("***LOCAL RESULT***\n"); Mock_Print(mock_a); printf("***GLOBAL RESULT***\n"); Mock_Print(result_a); printf("\tprinting array distribution\n"); } GA_Sync(); GA_Print(g_a); GA_Print_distribution(g_a); return 1; } Mock_Destroy(mock_a); Mock_Destroy(result_a); GA_Destroy(g_a); return 0; }
static int test(int shape_idx, int type_idx, int dist_idx) { int type = TYPES[type_idx]; int *dims = SHAPES[shape_idx]; int ndim = SHAPES_NDIM[shape_idx]; mock_ga_t *mock_a, *result_a; int g_a; void *alpha = NULL; int buffer[100]; int lo[GA_MAX_DIM], hi[GA_MAX_DIM], ld[GA_MAX_DIM], shape[GA_MAX_DIM]; int result=0, error_index=-1, error_proc=-1; int ival = 6; long lval = 7; long long llval = 8; float fval = 9; double dval = 10; SingleComplex cval = {11,12}; DoubleComplex zval = {13,14}; /* create the local array and result array */ mock_a = Mock_Create(type, ndim, dims, "mock", NULL); result_a = Mock_Create(type, ndim, dims, "mock", NULL); /* create the global array */ g_a = create_function[dist_idx](type, ndim, dims); /* create meaningful data for local array */ mock_data(mock_a, g_a); /* init global array with same data as local array */ mock_to_global(mock_a, g_a); switch (type) { case C_INT: alpha = &ival; break; case C_LONG: alpha = &lval; break; case C_LONGLONG: alpha = &llval; break; case C_FLOAT: alpha = &fval; break; case C_DBL: alpha = &dval; break; case C_SCPL: alpha = &cval; break; case C_DCPL: alpha = &zval; break; } /* call the local routine */ Mock_Add_constant(mock_a, alpha); /* call the global routine */ GA_Add_constant(g_a, alpha); /* get the results from the global array */ global_to_mock(g_a, result_a); /* compare the results */ result = neq_mock(mock_a, result_a, &error_index); if (0 != result) { error_proc = GA_Nodeid(); } /* make sure all procs get same result so they can die gracefully */ GA_Igop(&result, 1, "+"); /* if error occured, find the highest failing node ID */ GA_Igop(&error_proc, 1, "max"); /* clear the error index for all but the highest failing node ID */ if (error_proc != GA_Nodeid()) { error_index = 0; } /* make sure all procs get the error index on the highest failing node ID */ GA_Igop(&error_index, 1, "+"); if (0 != result) { if (error_proc == GA_Nodeid()) { printf("ERROR: local result failed to compare to global result\n"); printf("\terror_proc=%d\n", error_proc); printf("\terror_index=%d\n", error_index); printf("***LOCAL RESULT***\n"); Mock_Print(mock_a); printf("***GLOBAL RESULT***\n"); Mock_Print(result_a); printf("\tprinting array distribution\n"); } GA_Sync(); GA_Print(g_a); GA_Print_distribution(g_a); return 1; } /* clean up */ Mock_Destroy(mock_a); Mock_Destroy(result_a); GA_Destroy(g_a); return 0; }
int main(int argc, char **argv) { int me; int g_a; int status; int i,j; int dims[] = {n,n}; int proc_group[PROC_LIST_SIZE],proclist[PROC_LIST_SIZE],inode; int sbuf[1],rbuf[1]; MPI_Comm comm; MP_INIT(argc,argv); GA_Initialize(); me = GA_Nodeid(); status = MA_init(MT_DBL, 100000, 100000); if (!status) GA_Error("ma_init failed",-1); status = MA_set_auto_verify(1); status = MA_set_hard_fail(1); status = MA_set_error_print(1); inode = GA_Cluster_nodeid(); if (me == 0) { printf("there are %d nodes, node 0 has %d procs\n", GA_Cluster_nnodes(), GA_Cluster_nprocs(0)); fflush(stdout); } GA_Sync(); for (i=0; i<GA_Cluster_nnodes(); ++i) { for (j=0; j<GA_Cluster_nprocs(i); ++j) { proclist[j]=GA_Cluster_procid(i,j); } proc_group[i]=GA_Pgroup_create(proclist,GA_Cluster_nprocs(i)); } GA_Sync(); for (i=0; i<GA_Cluster_nnodes(); ++i) { if (i == inode) { printf("%d joining group %d\n", me, proc_group[inode]); GA_Pgroup_set_default(proc_group[inode]); g_a = NGA_Create(C_DBL, 2, dims, "a", NULL); if (!g_a) GA_Error("NGA_Create failed",-1); printf("%d Created array of group %d as proc no. %d\n", me, proc_group[inode], GA_Nodeid()); GA_Print_distribution(g_a); comm = GA_MPI_Comm_pgroup_default(); if (comm != MPI_COMM_NULL) { sbuf[0] = GA_Nodeid(); status = MPI_Allreduce(sbuf, rbuf, 1, MPI_INT, MPI_MAX, comm); printf("%d max nodeid is %d\n", me, rbuf[0]); if ((rbuf[0]+1) != GA_Cluster_nprocs(i)) { GA_Error("MPI_Allreduce failed",1); } } else { printf("MPI_Comm was null!\n"); } GA_Pgroup_set_default(GA_Pgroup_get_world()); } GA_Sync(); } GA_Terminate(); MP_FINALIZE(); return 0; }
// note: Sayan: brings down memory requirement to about 268 MB int main(int argc, char **argv) { int me, nproc, g_a = -1, i, j; #if defined(USE_ELEMENTAL) int ndim=2, dims[2]= {N1,N2}; #else int ndim=2, type=MT_F_DBL, dims[2]= {N1,N2}; #endif double *buf; int lo[2], hi[2], ld[1]; double alpha = 1.0; #if defined(USE_ELEMENTAL) // initialize Elemental (which will initialize MPI) ElInitialize( &argc, &argv ); ElMPICommRank( MPI_COMM_WORLD, &me ); ElMPICommSize( MPI_COMM_WORLD, &nproc ); ElGlobalArrays_d eldga; // instantiate el::global array ElGlobalArraysConstruct_d( &eldga ); // initialize global arrays ElGlobalArraysInitialize_d( eldga ); printf ("INITIALIZED elemental global array...\n"); #else MP_INIT(argc,argv); GA_Initialize_ltd(-1); me=GA_Nodeid(); nproc=GA_Nnodes(); #endif if(me==0) printf("Using %ld processes\n",(long)nproc); if(me==0) printf("memory = %ld bytes\n",((long)N1)*((long)N2)*8); #if defined(USE_ELEMENTAL) // create and allocate a global array printf ("ndim = %d\n", ndim); printf ("dim[0] = %d and dim[1] = %d\n", dims[0], dims[1]); ElGlobalArraysCreate_d( eldga, ndim, dims, "A", &g_a); printf ("CREATED elemental global array...\n"); // print distribution ElGlobalArraysPrint_d( eldga, g_a ); #else g_a = NGA_Create(type, ndim, dims, "A", NULL); GA_Zero(g_a); /* zero the matrix */ GA_Print_distribution(g_a); #endif if(me == 0) { // buf = (double*)(malloc(N1*1024*sizeof(double))); buf = (double*)(malloc(N1*128*sizeof(double))); // for(j = 0; j < N1*1024; ++j) buf[j] = 1.0; // for(i = 0; i < N2/1024; ++i) { for(j = 0; j < N1*128; ++j) buf[j] = 1.0; for(i = 0; i < N2/128; ++i) { lo[0] = 0; hi[0] = lo[0] + N1 -1; /* lo[1] = i*1024; hi[1] = lo[1] + 1024 -1; ld[0] = 1024; */ lo[1] = i*128; hi[1] = lo[1] + 128 -1; ld[0] = 128; printf("NGA_Acc.%d: %d:%d %d:%d\n",i,lo[0],hi[0],lo[1],hi[1]); #if defined(USE_ELEMENTAL) ElGlobalArraysAccumulate_d( eldga, g_a, lo, hi, buf, ld, &alpha ); // there is an explicit flush in NGA_Acc/Put, so when it returns, the buffer // can be reused and data has reached the destination #else NGA_Init_fence(); NGA_Acc(g_a, lo, hi, buf, ld, &alpha); NGA_Fence(); #endif } } #if defined(USE_ELEMENTAL) ElGlobalArraysSync_d( eldga ); ElGlobalArraysDestroy_d( eldga, g_a ); ElGlobalArraysTerminate_d( eldga ); // call el::global arrays destructor ElGlobalArraysDestruct_d( eldga ); ElFinalize(); #else GA_Sync(); GA_Destroy(g_a); GA_Terminate(); MP_FINALIZE(); #endif return 0; }
int main(int argc, char**argv) { int nprocs, me; int i,j; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&me); GA_Initialize(); int flag=1; int Nx=108; int Ny=108; int Nz = 108; Nx+=3; Ny+=3; Nz+=3; int data[Nx*Ny*Nz]; int num_splines = 32; int g_a,dims[4]={Nx,Ny,Nz,num_splines},chunk[4]={-1,-1,-1,num_splines}; int type=C_INT; g_a=NGA_Create(type,4,dims,"Coefs",chunk); int lo[4],hi[4],ld[3]; double value=9.0; GA_Fill(g_a,&value); GA_Print_distribution(g_a); fflush(stdout); if(me==0) { for (i=0; i<num_splines; i++) { for (j=0; j<Nx*Ny*Nz; j++) data[j] = rand()%1000; lo[0]=lo[1]=lo[2]=0; hi[0]=Nx-1;hi[1]=Ny-1;hi[2]=Nz-1; lo[3]=hi[3]=i%num_splines; ld[0]=Ny;ld[1]=Nz;ld[2]=1; NGA_Put(g_a,lo,hi,data,ld); } } printf("done\n"),fflush(stdout); GA_Sync(); ga_coefs_t *ga_coefs = malloc(sizeof(ga_coefs_t)); ga_coefs->Mx = Nx; ga_coefs->My = Ny; ga_coefs->Mz = Nz; ga_coefs->nsplines = num_splines; ga_coefs->g_a=g_a; int *coefs1 = (int*)malloc((size_t)1*sizeof(int)*4*4*4*num_splines); int ix,iy,iz; Nx-=3; Ny-=3; Nz-=3; ga_coefs->sumt=ga_coefs->amount=0; NGA_Distribution(g_a,me,lo,hi); int low[16][4],high[16][4]; for(i=0;i<nprocs;i++) NGA_Distribution(g_a,i,low[i],high[i]); srand ( time(NULL) ); int k; for(k=0;k<nprocs;k++) { ga_coefs->sumt=ga_coefs->amount=0; { for(i=0;i<1000;i++) { ix=rand_index(low[k][0],high[k][0]); iy=rand_index(low[k][1],high[k][1]); iz=rand_index(low[k][2],high[k][2]); coefs_ga_get_3d(ga_coefs,coefs1,ix,iy,iz); mini_cube_sum(coefs1, ga_coefs->nsplines); } } printf("<%d,%d>\t %lf \t %d \t %lf\n", me,k, ga_coefs->sumt, ga_coefs->amount, ga_coefs->sumt/ga_coefs->amount),fflush(stdout); } free(coefs1); GA_Terminate(); MPI_Finalize(); return 0; }
void do_work() { int g_a, g_b; int me=GA_Nodeid(), nproc=GA_Nnodes(), proc, loop; int dims[NDIM], lo[NDIM], hi[NDIM], block[NDIM], ld[NDIM-1]; int i,d,*proclist, offset; int adims[NDIM], ndim,type; typedef struct { int lo[NDIM]; int hi[NDIM]; } patch_t; patch_t *regions; int *map; double *buf; /***** create array A with default distribution *****/ if(me==0){printf("Creating array A\n"); fflush(stdout);} for(i = 0; i<NDIM; i++)dims[i] = N*(i+1); #ifdef NEW_API g_a = GA_Create_handle(); GA_Set_data(g_a,NDIM,dims,MT_F_DBL); GA_Set_array_name(g_a,"array A"); (void)GA_Allocate(g_a); #else g_a = NGA_Create(MT_F_DBL, NDIM, dims, "array A", NULL); #endif if(!g_a) GA_Error("create failed: A",0); if(me==0)printf("OK\n\n"); /* print info about array we got */ NGA_Inquire(g_a, &type, &ndim, adims); GA_Print_distribution(g_a); GA_Sync(); /* duplicate array A with ga_create irreg rather than ga_duplicate * -- want to show distribution control * -- with ga_duplicate it would be g_b=GA_Duplicate(g_a,name) */ if(me==0)printf("\nReconstructing distribution description for A\n"); /* get memory for arrays describing distribution */ proclist = (int*)malloc(nproc*sizeof(int)); if(!proclist)GA_Error("malloc failed for proclist",0); regions = (patch_t*)malloc(nproc*sizeof(patch_t)); if(!regions)GA_Error("malloc failed for regions",0); map = (int*)malloc((nproc+ndim)*sizeof(int)); /* ubound= nproc+mdim */ if(!map)GA_Error("malloc failed for map",0); /* first find out how array g_a is distributed */ for(i=0;i<ndim;i++)lo[i]=BASE; for(i=0;i<ndim;i++)hi[i]=adims[i] -1 + BASE; proc = NGA_Locate_region(g_a, lo, hi, (int*)regions, proclist); if(proc<1) GA_Error("error in NGA_Locate_region",proc); /* determine blocking for each dimension */ for(i=0;i<ndim;i++)block[i]=0; for(i=0;i<ndim;i++)adims[i]=0; offset =0; for(d=0; d<ndim; d++) for(i=0;i<proc;i++) if( regions[i].hi[d]>adims[d] ){ map[offset] = regions[i].lo[d]; offset++; block[d]++; adims[d]= regions[i].hi[d]; } if(me==0){ printf("Distribution map contains %d elements\n",offset); print_subscript("number of blocks for each dimension",ndim,block,"\n"); print_subscript("distribution map",offset,map,"\n\n"); fflush(stdout); } if(me==0)printf("Creating array B applying distribution of A\n"); # ifdef USE_DUPLICATE g_b = GA_Duplicate(g_a,"array B"); # else g_b = NGA_Create_irreg(MT_F_DBL, NDIM, dims, "array B", block,map); # endif if(!g_b) GA_Error("create failed: B",0); if(me==0)printf("OK\n\n"); free(proclist); free(regions); free(map); GA_Print_distribution(g_b); GA_Sync(); if(me==0){ printf("\nCompare distributions of A and B\n"); if(GA_Compare_distr(g_a,g_b)) printf("Failure: distributions NOT identical\n"); else printf("Success: distributions identical\n"); fflush(stdout); } if(me==0){ printf("\nAccessing local elements of A: set them to the owner process id\n"); fflush(stdout); } GA_Sync(); NGA_Distribution(g_a,me,lo,hi); if(hi[0]>=0){/* -1 means no elements stored on this processor */ double *ptr; int locdim[NDIM]; NGA_Access(g_a, lo,hi, &ptr, ld); for(i=0;i<ndim;i++)locdim[i]=hi[i]-lo[i]+1; fill_patch(ptr, locdim, ld, ndim,(double)me); } for(i=0;i<nproc; i++){ if(me==i && hi[0]>=0){ char msg[100]; sprintf(msg,"%d: leading dimensions",me); print_subscript(msg,ndim-1,ld,"\n"); fflush(stdout); } GA_Sync(); } GA_Sync(); if(me==0)printf("\nRandomly checking the update using ga_get on array sections\n"); GA_Sync(); /* show ga_get working and verify array updates * every process does N random gets * for simplicity get only a single row at a time */ srand(me); /* different seed for every process */ hi[ndim-1]=adims[ndim-1] -1 + BASE; for(i=1;i<ndim-1; i++)ld[i]=1; ld[ndim-2]=adims[ndim-1] -1 + BASE; /* get buffer memory */ buf = (double*)malloc(adims[ndim-1]*sizeof(double)); if(!buf)GA_Error("malloc failed for buf",0); /* half of the processes check the result */ if(me<=nproc/2) for(loop = 0; loop< N; loop++){ /* task parallel loop */ lo[ndim-1]=BASE; for (i= 0; i < ndim -1; i ++){ lo[i] = hi[i] = rand()%adims[i]+BASE; } /* print_subscript("getting",ndim,lo,"\n");*/ NGA_Get(g_a,lo,hi,buf,ld); /* check values */ for(i=0;i<adims[ndim-1]; i++){ int p = NGA_Locate(g_a, lo); if((double)p != buf[i]) { char msg[100]; sprintf(msg,"%d: wrong value: %d != %lf a",me, p, buf[i]); print_subscript(msg,ndim,lo,"\n"); GA_Error("Error - bye",i); } lo[ndim-1]++; } } free(buf); GA_Sync(); if(me==0)printf("OK\n"); GA_Destroy(g_a); GA_Destroy(g_b); }
static int test(int shape_idx, int type_idx, int dist_idx) { int type = TYPES[type_idx]; int *dims = SHAPES[shape_idx]; int ndim = SHAPES_NDIM[shape_idx]; mock_ga_t *mock_a, *mock_b, *mock_c, *mock_r; int g_a, g_b, g_c; int buffer[100]; int lo[GA_MAX_DIM], hi[GA_MAX_DIM], ld[GA_MAX_DIM], shape[GA_MAX_DIM]; int result=0, error_index=-1, error_proc=-1; mock_a = Mock_Create(type, ndim, dims, "mock", NULL); mock_b = Mock_Create(type, ndim, dims, "mock", NULL); mock_c = Mock_Create(type, ndim, dims, "mock", NULL); mock_r = Mock_Create(type, ndim, dims, "mock", NULL); g_a = create_function[dist_idx](type, ndim, dims); g_b = create_function[dist_idx](type, ndim, dims); g_c = create_function[dist_idx](type, ndim, dims); /* create meaningful data for local array */ mock_data(mock_a, g_a); mock_data(mock_b, g_b); /* init global array with same data as local array */ mock_to_global(mock_a, g_a); mock_to_global(mock_b, g_b); /* call the local routine */ Mock_Elem_multiply(mock_a, mock_b, mock_c); /* call the global routine */ GA_Elem_multiply(g_a, g_b, g_c); /* get the results from the global array */ global_to_mock(g_c, mock_r); /* compare the results */ result = neq_mock(mock_c, mock_r, &error_index); if (0 != result) { error_proc = GA_Nodeid(); } /* make sure all procs get same result so they can die gracefully */ GA_Igop(&result, 1, "+"); /* if error occured, find the highest failing node ID */ GA_Igop(&error_proc, 1, "max"); /* clear the error index for all but the highest failing node ID */ if (error_proc != GA_Nodeid()) { error_index = 0; } /* make sure all procs get the error index on the highest failing node ID */ GA_Igop(&error_index, 1, "+"); if (0 != result) { if (error_proc == GA_Nodeid()) { printf("ERROR: local result failed to compare to global result\n"); printf("\terror_proc=%d\n", error_proc); printf("\terror_index=%d\n", error_index); printf("***LOCAL RESULT***\n"); Mock_Print(mock_a); printf("***GLOBAL RESULT***\n"); Mock_Print(mock_r); printf("\tprinting array distribution\n"); } GA_Sync(); GA_Print(g_a); GA_Print_distribution(g_a); return 1; } /* clean up */ Mock_Destroy(mock_a); Mock_Destroy(mock_r); GA_Destroy(g_a); return 0; }