void verify(int g_a, int g_b, int g_c, int *lo, int *hi, int *ld, int N) { double rchk, alpha=1.0, beta=0.0; int g_chk, me=GA_Nodeid(); g_chk = GA_Duplicate(g_a, "array Check"); if(!g_chk) GA_Error("duplicate failed",NDIMS); GA_Sync(); GA_Dgemm('n', 'n', N, N, N, 1.0, g_a, g_b, 0.0, g_chk); GA_Sync(); alpha=1.0, beta=-1.0; GA_Add(&alpha, g_c, &beta, g_chk, g_chk); rchk = GA_Ddot(g_chk, g_chk); if (me==0) { printf("Normed difference in matrices: %12.4e\n", rchk); if(rchk < -TOLERANCE || rchk > TOLERANCE) GA_Error("Matrix multiply verify failed",0); else printf("Matrix Mutiply OK\n"); } GA_Destroy(g_chk); }
void FATR ga_antisymmetrize_(Integer *g_a) { DoublePrecision alpha = 0.5; int i, me = GA_Nodeid(); extern void * FATR ga_malloc(Integer nelem, int type, char *name); extern void FATR ga_free(void *ptr); void FATR gai_subtr(int *lo, int *hi, void *a, void *b, DoublePrecision alpha, int type, Integer nelem, int ndim); int alo[GA_MAX_DIM], ahi[GA_MAX_DIM], lda[GA_MAX_DIM]; int blo[GA_MAX_DIM], bhi[GA_MAX_DIM], ldb[GA_MAX_DIM]; int ndim, dims[GA_MAX_DIM], type; Integer nelem=1; Logical have_data; void *a_ptr, *b_ptr; GA_Sync(); NGA_Inquire((int)(*g_a), &type, &ndim, dims); if (dims[0] != dims[1]) GA_Error("ga_sym: can only sym square matrix", 0L); /* Find the local distribution */ NGA_Distribution((int)(*g_a), me, alo, ahi); have_data = ahi[0]>=0; for(i=1; i<ndim; i++) have_data = have_data && ahi[i]>=0; if(have_data) { NGA_Access((int)(*g_a), alo, ahi, &a_ptr, lda); for(i=0; i<ndim; i++) nelem *= ahi[i]-alo[i] +1; b_ptr = (void *) ga_malloc(nelem, MT_C_DBL, "v"); for(i=2; i<ndim; i++) {bhi[i]=ahi[i]; blo[i]=alo[i]; } /* switch rows and cols */ blo[1]=alo[0]; bhi[1]=ahi[0]; blo[0]=alo[1]; bhi[0]=ahi[1]; for (i=0; i < ndim-1; i++) ldb[i] = bhi[i+1] - blo[i+1] + 1; NGA_Get((int)(*g_a), blo, bhi, b_ptr, ldb); } GA_Sync(); if(have_data) { gai_subtr(alo, ahi, a_ptr, b_ptr, alpha, type, nelem, ndim); NGA_Release_update((int)(*g_a), alo, ahi); ga_free(b_ptr); } GA_Sync(); }
main(int argc, char **argv) { int rank, nprocs, i, j; int g_A, g_B, g_C, local_C[DIM][DIM], dims[DIM]={5,5}; int val_A=5, val_B=3, ld=DIM, max; int lo[DIM]={2,2}, hi[DIM]={4,4}, blo[DIM]={0,0}, bhi[DIM]={2,2}, clo[DIM]={1,1}, chi[DIM]={3,3}; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MA_init(C_INT, 1000, 1000); GA_Initialize(); g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL); g_B = NGA_Create(C_INT, DIM, dims, "array_B", NULL); g_C = NGA_Create(C_INT, DIM, dims, "array_C", NULL); GA_Fill(g_A, &val_A); GA_Fill(g_B, &val_B); GA_Zero(g_C); GA_Elem_maximum_patch(g_A, lo, hi, g_B, blo, bhi, g_C, clo, chi); GA_Print(g_C); GA_Sync(); NGA_Get(g_C, clo, chi, local_C, &ld); if(rank==1) { for(i=0; i<DIM; i++) { for(j=0; j<DIM; j++)printf("%d ", local_C[i][j]); printf("\n"); } if(val_A>val_B) max=val_A; else max=val_B; for(i=0; i<DIM; i++) { for(j=0; j<DIM; j++) if(local_C[i][j]!=max) printf("GA Error : \n"); } } GA_Sync(); if(rank == 0) printf("Test Completed \n"); GA_Terminate(); MPI_Finalize(); }
main(int argc, char **argv) { int rank, nprocs; int g_A, dims[D]={SIZE,SIZE}, *local_A=NULL, *local_G=NULL, **sub_array=NULL, **s_array=NULL; int i, j, value=5; MPI_Init(&argc, &argv); GA_Initialize(); MA_init(C_INT, 1000, 1000); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); s_array=(int**)malloc(N*sizeof(int*)); for(i=0; i<N; i++) { s_array[i]=(int*)malloc(D*sizeof(int)); for(j=0; j<D; j++) s_array[i][j]=rand()%10; } sub_array=(int**)malloc(N*sizeof(int*)); for(i=0; i<N; i++) { sub_array[i]=(int*)malloc(D*sizeof(int)); for(j=0; j<D; j++) sub_array[i][j]=rand()%10; } for(i=0; i<N; i++) local_A=(int*)malloc(N*sizeof(int)); for(i=0; i<N; i++) local_G=(int*)malloc(N*sizeof(int)); g_A=NGA_Create(C_INT, D, dims, "array_A", NULL); GA_Fill(g_A, &value); GA_Sync(); NGA_Scatter(g_A, local_A, s_array, N); NGA_Gather(g_A, local_G, s_array, N); GA_Sync(); GA_Print(g_A); if(rank==0) { for(i=0; i<N; i++) if(local_G[i]!=local_A[i]) printf("GA Error: \n"); } GA_Sync(); if(rank==0) GA_PRINT_MSG(); GA_Terminate(); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { TEST_SETUP; int shape_idx=0, type_idx=0, dist_idx=0; int return_code=0; for (shape_idx=0; shape_idx < NUM_SHAPES; ++shape_idx) { for (type_idx=0; type_idx < NUM_TYPES; ++type_idx) { for (dist_idx=0; dist_idx < NUM_DISTS; ++dist_idx) { if (0 == GA_Nodeid()) { printf("%s\t%s\t%s\n", SHAPE_NAMES[shape_idx], TYPE_NAMES[type_idx], DIST_NAMES[dist_idx] ); } GA_Sync(); return_code = test(shape_idx, type_idx, dist_idx); if (0 != return_code) { break; } } if (0 != return_code) { break; } } if (0 != return_code) { break; } } TEST_TEARDOWN; return return_code; }
irregular_array2(int rank) { int g_A, g_B; int dims[DIM]={GSIZE,GSIZE}, dims2[DIM], block[DIM]={3,2}, map[5]={0,2,6,0,4}, val_A=4, val_B=7; int n_block[DIM], block_dims[DIM], i; g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL); g_B = NGA_Create_irreg(C_INT, DIM, dims, "array_B", block, map); GA_Fill(g_A, &val_A); GA_Print(g_A); GA_Fill(g_B, &val_B); GA_Print(g_B); GA_Sync(); /* GA_Get_block_info(g_B, n_block, block_dims); for(i=0; i<DIM; i++) printf(" %d: %d ___ %d --- \n", rank, n_block[i], block_dims[i]); */ GA_Destroy(g_A); GA_Destroy(g_B); }
int main(int argc, char** argv) { int nprocs,myid,nprocssq; int dims[2],chunk[2]; int i,j,k; int stack = 100000, heap = 100000; MPI_Init(&argc,&argv); GA_Initialize(); MA_init(C_DBL,stack,heap); nprocssq = GA_Nnodes(); nprocs = sqrt(nprocssq); myid = GA_Nodeid(); dims[0] = N; dims[1] = N; chunk[0] = N/nprocs; chunk[1] = N/nprocs; int g_a = NGA_Create(C_DBL,2,dims,"Array A",chunk); int lo[2],hi[2]; NGA_Distribution(g_a,myid,lo,hi); int ld[1] = {N/nprocs}; void *ptr; double *local; printf("Myid = %d, lo = [%d,%d] , hi = [%d,%d] , ld = %d \n",myid,lo[0],lo[1],hi[0],hi[1],ld[0]); NGA_Access(g_a,lo,hi,&ptr,ld); local = (double*) ptr; printf("Myid = %d , local[0][0] = %f\n",*local); GA_Sync(); GA_Destroy(g_a); GA_Terminate(); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int rank, nprocs, i; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MA_init(C_INT, 1000, 1000); GA_Initialize(); for(i=1; i<=NDIM; i++) { verify_ga_dim(i); } GA_Sync(); if(rank == 0) printf("Test Completed \n"); GA_Terminate(); MPI_Finalize(); return 0; }
irregular_array1(int rank) { int g_A, g_B; int dims[DIM]={5,10}, dims2[DIM], ndim, type, value=5, block[DIM]={2,3}, map[5]={0,2,0,4,6}, val=7; int n_block[DIM], block_dims[DIM], i; g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL); g_B = NGA_Create_irreg(C_INT, DIM, dims, "array_B", block, map); GA_Fill(g_A, &value); GA_Print(g_A); GA_Fill(g_B, &val); GA_Print(g_B); GA_Sync(); NGA_Inquire(g_A, &type, &ndim, dims2); //printf(" %d -- %d,,\n", type, ndim); /* GA_Get_block_info(g_B, n_block, block_dims); for(i=0; i<DIM; i++) printf(" %d: %d ___ %d --- \n", rank, n_block[i], block_dims[i]); */ GA_Destroy(g_A); GA_Destroy(g_B); }
Integer util_gnxtval_(Integer *val) { if(*val > 0) { if(!initialized) ga_error("nxtval: not yet initialized", 0L); return (Integer) NGA_Read_inc(g_T, &subscript, 1); } else if(*val==0) { int n = 1; initialized=1; /* create task array */ g_T = NGA_Create(C_LONG, 1, &n,"Atomic Task", NULL); /* Initialize the task array */ if(GA_Nodeid()==0) { int lo=0, hi=0; NGA_Put (g_T, &lo, &hi, &initval, &hi); initval=0; } GA_Sync(); return 0; } else if (*val < 0) { GA_Destroy(g_T); initialized=0; initval=0; return 0;} ga_error("nxtval: invalid value passed", 0L); return -1; }
main(int argc, char **argv) { int rank, nprocs, i, j; int p_Geven, p_Godd, p_size, mod, p_size_mod, *list_even=NULL, *list_odd=NULL; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MA_init(C_INT, 1000, 1000); GA_Initialize(); p_size=nprocs/2; mod=nprocs%2; p_size_mod=p_size+mod; list_even = (int*)malloc(p_size*sizeof(int)); list_odd = (int*)malloc(p_size*sizeof(int)); j=0; for(i=0; i<nprocs; i++) { if(i%2==0) list_even[j]=i; else if(i%2==1) list_odd[j]=i; else break; j++; } j=0; /* for(i=0; i<nprocs; i++) { if(i%2==1) { j++; } } */ p_Geven=GA_Pgroup_create(list_even, p_size_mod); p_Godd=GA_Pgroup_create(list_odd, p_size); if(rank%2==0) printf("%d: My ID is %d :: %d -- even \n", rank, GA_Pgroup_nodeid(p_Geven), GA_Pgroup_nnodes(p_Geven)); else printf("%d: My ID is %d :: %d --- odd\n", rank, GA_Pgroup_nodeid(p_Godd), GA_Pgroup_nnodes(p_Godd)); GA_Sync(); if(rank==0) GA_PRINT_MSG(); GA_Terminate(); MPI_Finalize(); }
/** * Block Topology (of Force Matrix): * Say for example: If there are 4 block and 100 atoms, the size of * the force matrix is 100x100 and each block size is 50x50. * * ----------- * | | | * | 0,0 | 0,1 | * ----------- * | 1,0 | 1,1 | * | | | * ----------- */ int SetupBlocks(AppCtx *user) { int i,j,k=0; int n; int zero = 0; int x_space, g_space; if (user->natoms % user->BlockSize) { GA_Error("Number of atoms should be a multiple of block size. Choose a different block size.", 0L); } n = user->natoms / user->BlockSize; user->nBlocks = n*n; if (user->nBlocks > MAX_BLOCKS) GA_Error("Number of blocks is greater that MAX_BLOCKS: Solution is either to increase the defined MAX_BLOCKS or increase your block size",0L); if (user->nBlocks < user->nproc) GA_Error("Number of blocks should be greater than or equal to the number of processors",0L); for (i=0;i<n;i++) for (j=0;j<n;j++,k++) { user->btopo[k].x = i; user->btopo[k].y = j; } /* Create task array */ n = 1; user->atomicTask = NGA_Create(C_INT, 1, &n, "Atomic Task", NULL); if (!user->atomicTask) GA_Error("NGA_Create failed for Atomic Task",0); if (user->me == 0) NGA_Put(user->atomicTask, &zero, &zero, &user->nproc, &zero); /* space for x values from two processors */ x_space = 2 * user->BlockSize * user->ndim; /* space for ALL gradient value */ g_space = user->natoms * user->ndim; if (MA_push_stack(C_DBL, x_space + g_space+3, "GA LJ bufs", &user->memHandle)) MA_get_pointer(user->memHandle, &user->x1); else GA_Error("ma_alloc_get failed",x_space + g_space); user->x2 = user->x1 + x_space/2 + 1; user->grad = user->x2 + x_space/2 + 1; GA_Sync(); return 0; }
main(int argc, char **argv) { int rank, nprocs, i, j; int g_A, **local_A=NULL, **local_B=NULL; int dims[DIM]={SIZE,SIZE}, dims2[DIM], lo[DIM]={SIZE-SIZE,SIZE-SIZE}, hi[DIM]={SIZE-1,SIZE-1}, ld=5, value=5; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MA_init(C_INT, 1000, 1000); GA_Initialize(); local_A=(int**)malloc(SIZE*sizeof(int*)); for(i=0; i<SIZE; i++) { local_A[i]=(int*)malloc(SIZE*sizeof(int)); for(j=0; j<SIZE; j++) local_A[i][j]=rand()%10; } local_B=(int**)malloc(SIZE*sizeof(int*)); for(i=0; i<SIZE; i++) { local_B[i]=(int*)malloc(SIZE*sizeof(int)); for(j=0; j<SIZE; j++) local_B[i][j]=rand()%10; } g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL); GA_Zero(g_A); if(rank==0) { NGA_Put(g_A, lo, hi, local_A, &ld); NGA_Get(g_A, lo, hi, local_B, &ld); for(i=0; i<SIZE; i++) { for(j=0; j<SIZE; j++) if(local_A[i][j]!=local_B[i][j]) GA_ERROR_MSG(); } } GA_Sync(); GA_Destroy(g_A); if(rank == 0) GA_PRINT_MSG(); GA_Terminate(); MPI_Finalize(); }
main(int argc, char **argv) { int rank, nprocs, i, j; int g_A, g_B; int dims[MAX_DIM], val=4, ndim, re; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MA_init(C_INT, 1000, 1000); GA_Initialize(); for(i=1; i<=MAX_DIM; i++) { ndim=i; dims[i]=SIZE; // for(j=0; j<ndim; j++) g_A = NGA_Create(C_INT, ndim, dims, "array_A", NULL); g_B = NGA_Create(C_INT, ndim, dims, "array_B", NULL); if(!g_A) GA_Error("GA Error: no global array exists \n", ndim); if(!g_B) GA_Error("GA Error: no global array exists \n", ndim); } GA_Sync(); GA_Fill(g_A, &val); re=GA_Solve(g_A, g_B); if(re==0) printf("Cholesky Fact is Successful \n"); else if (re >0) printf("Cholesky Fact couldn't be completed \n"); else printf("An Error occured\n"); if(rank == 0) GA_PRINT_MSG(); GA_Destroy(g_A); GA_Destroy(g_B); GA_Terminate(); MPI_Finalize(); }
auto_number2(int rank, int nprocs) { int g_A, g_B; int dims[DIM]={GSIZE, GSIZE}, dims2[DIM], block[DIM], *map=NULL, val=7; int n_block[DIM], block_dims[DIM], b_temp, i; int b1, b2, inc=0; do{ b2=DIM+inc; b1=nprocs/b2; inc++; }while(nprocs/b2>=GSIZE); block[0]=b1; block[1]=b2; map=(int*)malloc(nprocs*sizeof(int)); for(i=0; i<b1; i++) map[i]=i; for(i=b1; i<(b2+b1); i++) map[i]=i-b1; if(rank==0) { for(i=0; i<(b1+b2); i++) printf("map[%d] - %d\n", i, map[i]); for(i=0; i<DIM; i++) printf("BLOCK[%d] - %d\n", i, block[i]); } g_B = NGA_Create_irreg(C_INT, DIM, dims, "array_B", block, map); GA_Fill(g_B, &val); GA_Print(g_B); GA_Sync(); if(rank==1) { GA_Get_block_info(g_B, n_block, block_dims); for(i=0; i<DIM; i++) printf(" %d: %d --- %d ... %d\n", rank, n_block[i], block_dims[i], b_temp); } GA_Destroy(g_B); }
int main(int argc, char **argv) { int size_dst = 15; int g_a = 0; int I_NEG_ONE = -1; long L_NEG_ONE = -1; long long LL_NEG_ONE = -1; int FIVE = 5; int TEN = 10; int lo; int hi; int *ptr; int i; MP_INIT(argc,argv); GA_INIT(argc,argv); for (i=0; i<3; ++i) { if (0 == i) { g_a = NGA_Create(C_INT, 1, &size_dst, "dst", NULL); GA_Fill(g_a, &I_NEG_ONE); } else if (1 == i) { g_a = NGA_Create(C_LONG, 1, &size_dst, "dst", NULL); GA_Fill(g_a, &L_NEG_ONE); } else if (2 == i) { g_a = NGA_Create(C_LONGLONG, 1, &size_dst, "dst", NULL); GA_Fill(g_a, &LL_NEG_ONE); } GA_Sync(); GA_Print(g_a); NGA_Print_patch(g_a, &FIVE, &TEN, 0); NGA_Print_patch(g_a, &FIVE, &TEN, 1); NGA_Distribution(g_a, GA_Nodeid(), &lo, &hi); NGA_Access(g_a, &lo, &hi, &ptr, NULL); printf("[%d] (%d)=%d\n", GA_Nodeid(), lo, *ptr); NGA_Release(g_a, &lo, &hi); } GA_Terminate(); MP_FINALIZE(); exit(EXIT_SUCCESS); }
int FormFunctionGradient (TAO_GA_APPLICATION gaapp, GAVec ga_X, double *f, GAVec ga_G, void *ptr) { AppCtx *user = (AppCtx *) ptr; int lo, hi; int taskId=user->me; //Which task am I running int i; int zero = 0; /* reset atomicTask to nproc */ if (user->me == 0) NGA_Put(user->atomicTask, &zero, &zero, &user->nproc, &zero); for (i=0;i<user->natoms*user->ndim; i++) user->grad[i] = 0.0; *f = 0.0; while (taskId < user->nBlocks) { getBlock(ga_X, taskId, user); if (user->ndim == 2) LJFG_2D(taskId,f, user); else LJFG_3D(taskId,f, user); /* Get next block */ taskId += user->nproc; //NGA_Read_inc(user->atomicTask, &zero, 1); } /* gather function */ GA_Dgop(f, 1, "+"); /* gather gradient */ GA_Dgop(user->grad, user->natoms*user->ndim, "+"); NGA_Distribution(ga_G, user->me, &lo, &hi); NGA_Put(ga_G, &lo, &hi, user->grad+lo, &hi); GA_Sync(); return 0; }
main(int argc, char **argv) { int rank, nprocs; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MA_init(C_INT, 1000, 1000); GA_Initialize(); fillonly(rank, nprocs); fillandscale(rank, nprocs); GA_Sync(); if(rank == 0) printf("Test Completed \n"); GA_Terminate(); MPI_Finalize(); }
main(int argc, char **argv) { int rank, nprocs, i, j; int g_A, g_B, **local_value=NULL; int dims[DIM]={SIZE,SIZE}, lo[DIM]={SIZE-SIZE,SIZE-SIZE}, hi[DIM]={SIZE-1,SIZE-1}, ld=SIZE; local_value=(int**)malloc(SIZE*sizeof(int*)); for(i=0; i<SIZE; i++) local_value[i]=(int*)malloc(SIZE*sizeof(int)); MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MA_init(C_INT, 1000, 1000); GA_Initialize(); g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL); g_B = NGA_Create(C_INT, DIM, dims, "array_B", NULL); for(i=0; i<SIZE; i++) for(j=0; j<SIZE; j++) local_value[i][j]=rand()%10; if(rank==0) NGA_Put(g_A, lo, hi, local_value, &ld); GA_Transpose(g_A, g_B); if(rank==0) validate_transpose(g_A, g_B, lo, hi, ld); GA_Sync(); if(rank == 1) GA_PRINT_MSG(); GA_Terminate(); MPI_Finalize(); }
int main(int argc, char **argv) { int me; int nproc; int status; int g_a; int dims[NDIM]; int chunk[NDIM]; int pg_world; size_t num = 10; double *p1 = NULL; double *p2 = NULL; size_t i; int num_mutex; int lo[1]; int hi[1]; int ld[1]={1}; MPI_Comm comm; MP_INIT(argc,argv); GA_INIT(argc,argv); me = GA_Nodeid(); nproc = GA_Nnodes(); comm = GA_MPI_Comm_pgroup_default(); printf("%d: Hello world!\n",me); if (me==0) printf("%d: GA_Initialize\n",me); /*if (me==0) printf("%d: ARMCI_Init\n",me);*/ /*ARMCI_Init();*/ /*if (me==0) printf("%d: MA_Init\n",me);*/ /*MA_init(MT_DBL, 8*1024*1024, 2*1024*1024);*/ if (me==0) printf("%d: GA_Create_handle\n",me); g_a = GA_Create_handle(); if (me==0) printf("%d: GA_Set_array_name\n",me); GA_Set_array_name(g_a,"test array A"); dims[0] = 30; if (me==0) printf("%d: GA_Set_data\n",me); GA_Set_data(g_a,NDIM,dims,MT_DBL); chunk[0] = -1; if (me==0) printf("%d: GA_Set_chunk\n",me); GA_Set_chunk(g_a,chunk); if (me==0) printf("%d: GA_Pgroup_get_world\n",me); pg_world = GA_Pgroup_get_world(); if (me==0) printf("%d: GA_Set_pgroup\n",me); GA_Set_pgroup(g_a,pg_world); if (me==0) printf("%d: GA_Allocate\n",me); status = GA_Allocate(g_a); if(0 == status) MPI_Abort(comm,100); if (me==0) printf("%d: GA_Zero\n",me); GA_Zero(g_a); if (me==0) printf("%d: GA_Sync\n",me); GA_Sync(); num = 10; p1 = malloc(num*sizeof(double)); /*double* p1 = ARMCI_Malloc_local(num*sizeof(double));*/ if (p1==NULL) MPI_Abort(comm,1000); p2 = malloc(num*sizeof(double)); /*double* p2 = ARMCI_Malloc_local(num*sizeof(double));*/ if (p2==NULL) MPI_Abort(comm,2000); for ( i=0 ; i<num ; i++ ) p1[i] = 7.0; for ( i=0 ; i<num ; i++ ) p2[i] = 3.0; num_mutex = 17; status = GA_Create_mutexes(num_mutex); if (me==0) printf("%d: GA_Create_mutexes = %d\n",me,status); /***************************************************************/ if (me==0) { printf("%d: before GA_Lock\n",me); GA_Lock(0); lo[0] = 0; hi[0] = num-1; GA_Init_fence(); NGA_Put(g_a,lo,hi,p1,ld); GA_Fence(); GA_Unlock(0); printf("%d: after GA_Unlock\n",me); } GA_Print(g_a); if (me==1) { printf("%d: before GA_Lock\n",me); GA_Lock(0); lo[0] = 0; hi[0] = num-1; GA_Init_fence(); NGA_Get(g_a,lo,hi,p2,ld); GA_Fence(); GA_Unlock(0); printf("%d: after GA_Unlock\n",me); for ( i=0 ; i<num ; i++ ) printf("p2[%2lu] = %20.10f\n", (long unsigned)i,p2[i]); } /***************************************************************/ status = GA_Destroy_mutexes(); if (me==0) printf("%d: GA_Destroy_mutexes = %d\n",me,status); /*ARMCI_Free(p2);*/ /*ARMCI_Free(p1);*/ free(p2); free(p1); if (me==0) printf("%d: GA_Destroy\n",me); GA_Destroy(g_a); /*if (me==0) printf("%d: ARMCI_Finalize\n",me);*/ /*ARMCI_Finalize();*/ if (me==0) printf("%d: GA_Terminate\n",me); GA_Terminate(); if (me==0) printf("%d: MPI_Finalize\n",me); MPI_Finalize(); return(0); }
main(int argc, char **argv) { int rank, nprocs, i, j; int g_A, g_B, g_C, local_C[DIM][DIM], dims[DIM]={5,5}, val1=5, val2=4, alpha=3, beta=2, ld=5; int alo[DIM]={2,2}, ahi[DIM]={3,3}, blo[DIM]={2,2}, bhi[DIM]={3,3}, clo[DIM]={1,1}, chi[DIM]={2,2}; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MA_init(C_INT, 1000, 1000); GA_Initialize(); g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL); g_B = GA_Duplicate(g_A, "array_B"); g_C = GA_Duplicate(g_A, "array_C"); GA_Fill(g_A, &val1); GA_Fill(g_B, &val2); GA_Zero(g_C); NGA_Add_patch(&alpha, g_A, clo, chi, &beta, g_B, blo, bhi, g_C, clo, chi); GA_Sync(); GA_Print(g_A); GA_Print(g_B); GA_Print(g_C); NGA_Get(g_C, clo, chi, local_C, &ld); //printf("check 1 \n"); for(i=0; i<DIM; i++) { for(j=0; j<DIM; j++)printf("%d ", local_C[i][j]); printf("\n"); } if(rank == 0) { printf("check 2\n"); for(i=0; i<DIM; i++) { for(j=0; j<DIM; j++) if(local_C[i][j]!=(alpha*val1)+(beta*val2)) printf("GA Error : \n"); } } if(rank==0) GA_PRINT_MSG(); GA_Sync(); /* GA_Destroy(g_A); GA_Destroy(g_B); GA_Destroy(g_C); */ //******************************************************************* /* what would be the possible reason for GA_destroy to get failed .., * solve this before consolidate the whole */ GA_Terminate(); MPI_Finalize(); }
static int test(int shape_idx, int type_idx, int dist_idx) { int type = TYPES[type_idx]; int *dims = SHAPES[shape_idx]; int ndim = SHAPES_NDIM[shape_idx]; mock_ga_t *mock_a, *result_a; int g_a; int buffer[100]; int lo[GA_MAX_DIM], hi[GA_MAX_DIM], ld[GA_MAX_DIM], shape[GA_MAX_DIM]; int result=0, error_index=-1, error_proc=-1; mock_a = Mock_Create(type, ndim, dims, "mock", NULL); result_a = Mock_Create(type, ndim, dims, "mock", NULL); g_a = create_function[dist_idx](type, ndim, dims); mock_data(mock_a, g_a); mock_to_global(mock_a, g_a); Mock_Zero(mock_a); GA_Zero(g_a); global_to_mock(g_a, result_a); result = neq_mock(mock_a, result_a, &error_index); if (0 != result) { error_proc = GA_Nodeid(); } GA_Igop(&result, 1, "+"); GA_Igop(&error_proc, 1, "max"); if (error_proc != GA_Nodeid()) { error_index = 0; } GA_Igop(&error_index, 1, "+"); if (0 != result) { if (error_proc == GA_Nodeid()) { printf("ERROR: local result failed to compare to global result\n"); printf("\terror_proc=%d\n", error_proc); printf("\terror_index=%d\n", error_index); printf("***LOCAL RESULT***\n"); Mock_Print(mock_a); printf("***GLOBAL RESULT***\n"); Mock_Print(result_a); printf("\tprinting array distribution\n"); } GA_Sync(); GA_Print(g_a); GA_Print_distribution(g_a); return 1; } Mock_Destroy(mock_a); Mock_Destroy(result_a); GA_Destroy(g_a); return 0; }
int main(int argc, char**argv) { int nprocs, me; int i,j; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&me); GA_Initialize(); const int heap=3000000, stack=300000; if(! MA_init(C_INT,stack,heap) ) GA_Error((char *) "MA_init failed",stack+heap /*error code*/); int Nx=97; int Ny=97; int Nz = 97; Nx+=3; Ny+=3; Nz+=3; int data[Nx*Ny*Nz]; int num_splines = 32; int g_a,dims[4]={Nx,Ny,Nz,num_splines},chunk[4]={-1,-1,-1,num_splines}; int width[4] = {3, 3, 3, 0}; int type=C_INT; //g_a=NGA_Create(type,4,dims,"Coefs",chunk); g_a=NGA_Create_ghosts(type, 4, dims, width, "Coefs", chunk); int lo[4],hi[4],ld[3]; //double value=9.0; GA_Fill(g_a,&value); GA_Print_distribution(g_a); fflush(stdout); if(me==0) { for (i=0; i<num_splines; i++) { int x, y, z; for (x=0; x<Nx; x++) for (y=0; y<Ny; y++) for (z=0; z<Nz; z++) { j=x*(Ny*Nz)+y*Nz+z; data[j] = (x*100*100+y*100+z)*100+i;} lo[0]=lo[1]=lo[2]=0; hi[0]=Nx-1;hi[1]=Ny-1;hi[2]=Nz-1; lo[3]=hi[3]=i%num_splines; ld[0]=Ny;ld[1]=Nz;ld[2]=1; NGA_Put(g_a,lo,hi,data,ld); } } GA_Update_ghosts(g_a); GA_Sync(); printf("done\n"),fflush(stdout); ga_coefs_t *ga_coefs = malloc(sizeof(ga_coefs_t)); ga_coefs->Mx = Nx; ga_coefs->My = Ny; ga_coefs->Mz = Nz; ga_coefs->nsplines = num_splines; ga_coefs->g_a=g_a; int *coefs1 = (int*)malloc((size_t)1*sizeof(int)*4*4*4*num_splines); int ix,iy,iz; Nx-=3; Ny-=3; Nz-=3; ga_coefs->sumt=ga_coefs->amount=0; NGA_Distribution(g_a,me,lo,hi); GA_Print_distribution(g_a); int low[16][4],high[16][4]; for(i=0;i<nprocs;i++) NGA_Distribution(g_a,i,low[i],high[i]); srand ( time(NULL) ); int k=GA_Nodeid(); printf("%d: low[k]=%d high[k]=%d\n", GA_Nodeid(), low[k][2], high[k][2]); int unequal=0; for(i=0;i<1000;i++) { ix=rand_index(low[k][0],high[k][0]); if(ix+3>=dims[0]) ix=low[k][0]; iy=rand_index(low[k][1],high[k][1]); if(iy+3>=dims[1]) iy=low[k][1]; iz=rand_index(low[k][2],high[k][2]); if(iz+3>=dims[2]) iz=low[k][2]; coefs_ga_get_3d(ga_coefs,coefs1,ix,iy,iz); long get_sum=mini_cube_sum(coefs1, ga_coefs->nsplines); long ghost_sum=coefs_ghost_access_3d(ga_coefs->g_a, ix, iy, iz, ga_coefs->nsplines); if(get_sum!=ghost_sum) { printf("ixyz=\t%d\t%d\t%d\t", ix, iy, iz); printf("get_sum=%ld ghost_sum=%ld\n", get_sum, ghost_sum); unequal++; } } printf("unequal count=%d\n", unequal); free(coefs1); GA_Terminate(); MPI_Finalize(); return 0; }
main(int argc, char **argv) { int rank, nprocs; int g_A, dims[D]={5,10}, local_A[N], local_G[N], **sub_array=NULL, **s_array=NULL; int i, j, value=5; MPI_Init(&argc, &argv); GA_Initialize(); MA_init(C_INT, 1000, 1000); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); s_array=(int**)malloc(N*sizeof(int*)); for(i=0; i<N; i++) { s_array[i]=(int*)malloc(D*sizeof(int)); for(j=0; j<D; j++) s_array[i][j]=rand()%5; } sub_array=(int**)malloc(N*sizeof(int*)); for(i=0; i<N; i++) { sub_array[i]=(int*)malloc(D*sizeof(int)); for(j=0; j<D; j++) sub_array[i][j]=rand()%5; } for(i=0; i<N; i++) //local_A=(int*)malloc(N*sizeof(int)); /* * depends on the value of array ..we can generate the location values in randon * we can also use the if-condition */ // PRINTing all the genrated array for reference for(i=0; i<N; i++) { for(j=0; j<D; j++)printf("%d ",s_array[i][j]); printf("\n"); } printf("\n"); for(i=0; i<N; i++) { for(j=0; j<D; j++)printf("%d ",sub_array[i][j]); printf("\n"); } printf("\n"); for(i=0; i<N; i++)printf("%d \n",local_A[i]=rand()%5+1); // PRINT done - now creating array g_A=NGA_Create(C_INT, D, dims, "array_A", NULL); GA_Fill(g_A, &value); GA_Sync(); NGA_Scatter(g_A, local_A, s_array, N); NGA_Gather(g_A, local_G, s_array, N); GA_Sync(); GA_Print(g_A); for(i=0; i<N; i++)printf("%d \n",local_G[i]); printf("\n"); if(rank==0) { for(i=0; i<N; i++) if(local_G[i]!=local_A[i]) printf("GA Error: \n"); } GA_Sync(); if(rank==0) GA_PRINT_MSG(); GA_Terminate(); MPI_Finalize(); return 0; }
void do_work() { int ZERO=0; /* useful constants */ int g_a, g_b; int n=N, ndim=2,type=MT_F_DBL,dims[2]={N,N},coord[2]; int me=GA_Nodeid(), nproc=GA_Nnodes(); int row, i, j; int lo[2], hi[2]; /* Note: on all current platforms DoublePrecision = double */ DoublePrecision buf[N], *max_row=NULL; MPI_Comm WORLD_COMM; MPI_Comm ROW_COMM; int ilo,ihi, jlo,jhi, ld, prow, pcol; int root=0, grp_me=-1; WORLD_COMM = GA_MPI_Comm_pgroup_default(); if(me==0)printf("Creating matrix A\n"); dims[0]=n; dims[1]=n; g_a = NGA_Create(type, ndim, dims, "A", NULL); if(!g_a) GA_Error("create failed: A",n); if(me==0)printf("OK\n"); if(me==0)printf("Creating matrix B\n"); dims[0]=n; g_b = NGA_Create(type, 1, dims, "B", NULL); if(!g_b) GA_Error("create failed: B",n); if(me==0)printf("OK\n"); GA_Zero(g_a); /* zero the matrix */ if(me==0)printf("Initializing matrix A\n"); /* fill in matrix A with values: A(i,j) = (i+j) */ for(row=me; row<n; row+= nproc){ /** * simple load balancing: * each process works on a different row in MIMD style */ for(i=0; i<n; i++) buf[i]=(DoublePrecision)(i+row+1); lo[0]=hi[0]=row; lo[1]=ZERO; hi[1]=n-1; NGA_Put(g_a, lo, hi, buf, &n); } /* GA_print(&g_a);*/ NGA_Distribution(g_a, me, lo, hi); ilo=lo[0]; ihi=hi[0]; jlo=lo[1]; jhi=hi[1]; GA_Sync(); if(ihi-ilo+1 >0){ max_row=(DoublePrecision*)malloc(sizeof(DoublePrecision)*(ihi-ilo+1)); if (!max_row) GA_Error("malloc 3 failed",(ihi-ilo+1)); for (i=0; i<(ihi-ilo+1); i++) { max_row[i] = 0.0; } } NGA_Proc_topology(g_a, me, coord); /* block coordinates */ prow = coord[0]; pcol = coord[1]; if(me==0)printf("Splitting comm according to distribution of A\n"); /* GA on SP1 requires synchronization before & after message-passing !!*/ GA_Sync(); if(me==0)printf("Computing max row elements\n"); /* create communicator for processes that 'own' A[:,jlo:jhi] */ MPI_Barrier(WORLD_COMM); if(pcol < 0 || prow <0) MPI_Comm_split(WORLD_COMM,MPI_UNDEFINED,MPI_UNDEFINED, &ROW_COMM); else MPI_Comm_split(WORLD_COMM, (int)pcol, (int)prow, &ROW_COMM); if(ROW_COMM != MPI_COMM_NULL){ double *ptr; MPI_Comm_rank(ROW_COMM, &grp_me); /* each process computes max elements in the block it 'owns' */ lo[0]=ilo; hi[0]=ihi; lo[1]=jlo; hi[1]=jhi; NGA_Access(g_a, lo, hi, &ptr, &ld); for(i=0; i<ihi-ilo+1; i++){ for(j=0; j<jhi-jlo+1; j++) if(max_row[i] < ptr[i*ld + j]){ max_row[i] = ptr[i*ld + j]; } } MPI_Reduce(max_row, buf, ihi-ilo+1, MPI_DOUBLE, MPI_MAX, root, ROW_COMM); }else fprintf(stderr,"process %d not participating\n",me); GA_Sync(); /* processes with rank=root in ROW_COMM put results into g_b */ ld = 1; if(grp_me == root) { lo[0]=ilo; hi[0]=ihi; NGA_Put(g_b, lo, hi, buf, &ld); } GA_Sync(); if(me==0)printf("Checking the result\n"); if(me==0){ lo[0]=ZERO; hi[0]=n-1; NGA_Get(g_b, lo, hi, buf, &n); for(i=0; i< n; i++)if(buf[i] != (double)n+i){ fprintf(stderr,"error:%d max=%f should be:%d\n",i,buf[i],n+i); GA_Error("terminating...",1); } } if(me==0)printf("OK\n"); GA_Destroy(g_a); GA_Destroy(g_b); }
int main( int argc, char **argv ) { int g_a, g_b, i, j, size, size_me; int icnt, idx, jdx, ld; int n=N, type=MT_C_INT, one; int *values, *ptr; int **indices; int dims[2]={N,N}; int lo[2], hi[2]; int heap=3000000, stack=2000000; int me, nproc; int datatype, elements; double *prealloc_mem; MP_INIT(argc,argv); #if 1 GA_INIT(argc,argv); /* initialize GA */ me=GA_Nodeid(); nproc=GA_Nnodes(); if(me==0) { if(GA_Uses_fapi())GA_Error("Program runs with C array API only",1); printf("\nUsing %ld processes\n",(long)nproc); fflush(stdout); } heap /= nproc; stack /= nproc; if(! MA_init(MT_F_DBL, stack, heap)) GA_Error("MA_init failed",stack+heap); /* initialize memory allocator*/ /* Create a regular matrix. */ if(me==0)printf("\nCreating matrix A of size %d x %d\n",N,N); g_a = NGA_Create(type, 2, dims, "A", NULL); if(!g_a) GA_Error("create failed: A",n); /* Fill matrix using scatter routines */ size = N*N; if (size%nproc == 0) { size_me = size/nproc; } else { i = size - size%nproc; size_me = i/nproc; if (me < size%nproc) size_me++; } /* Check that sizes are all okay */ i = size_me; GA_Igop(&i,1,"+"); if (i != size) { GA_Error("Sizes don't add up correctly: ",i); } else if (me==0) { printf("\nSizes add up correctly\n"); } /* Allocate index and value arrays */ indices = (int**)malloc(size_me*sizeof(int*)); values = (int*)malloc(size_me*sizeof(int)); icnt = me; for (i=0; i<size_me; i++) { values[i] = icnt; idx = icnt%N; jdx = (icnt-idx)/N; if (idx >= N || idx < 0) { printf("p[%d] Bogus index i: %d\n",me,idx); } if (jdx >= N || jdx < 0) { printf("p[%d] Bogus index j: %d\n",me,jdx); } indices[i] = (int*)malloc(2*sizeof(int)); (indices[i])[0] = idx; (indices[i])[1] = jdx; icnt += nproc; } /* Scatter values into g_a */ NGA_Scatter(g_a, values, indices, size_me); GA_Sync(); /* Check to see if contents of g_a are correct */ NGA_Distribution( g_a, me, lo, hi ); NGA_Access(g_a, lo, hi, &ptr, &ld); for (i=lo[0]; i<hi[0]; i++) { idx = i-lo[0]; for (j=lo[1]; j<hi[1]; j++) { jdx = j-lo[1]; if (ptr[idx*ld+jdx] != j*N+i) { printf("p[%d] (Scatter) expected: %d actual: %d\n",me,j*N+i,ptr[idx*ld+jdx]); } } } if (me==0) printf("\nCompleted test of NGA_Scatter\n"); for (i=0; i<size_me; i++) { values[i] = 0; } GA_Sync(); NGA_Gather(g_a, values, indices, size_me); icnt = me; for (i=0; i<size_me; i++) { if (icnt != values[i]) { printf("p[%d] (Gather) expected: %d actual: %d\n",me,icnt,values[i]); } icnt += nproc; } if (me==0) printf("\nCompleted test of NGA_Gather\n"); GA_Sync(); /* Scatter-accumulate values back into GA*/ one = 1; NGA_Scatter_acc(g_a, values, indices, size_me, &one); GA_Sync(); /* Check to see if contents of g_a are correct */ for (i=lo[0]; i<hi[0]; i++) { idx = i-lo[0]; for (j=lo[1]; j<hi[1]; j++) { jdx = j-lo[1]; if (ptr[idx*ld+jdx] != 2*(j*N+i)) { printf("p[%d] (Scatter_acc) expected: %d actual: %d\n",me,2*(j*N+i),ptr[idx*ld+jdx]); } } } if (me==0) printf("\nCompleted test of NGA_Scatter_acc\n"); NGA_Release(g_a, lo, hi); /* Test fixed buffer size */ NGA_Alloc_gatscat_buf(size_me); /* Scatter-accumulate values back into GA*/ GA_Sync(); NGA_Scatter_acc(g_a, values, indices, size_me, &one); GA_Sync(); /* Check to see if contents of g_a are correct */ for (i=lo[0]; i<hi[0]; i++) { idx = i-lo[0]; for (j=lo[1]; j<hi[1]; j++) { jdx = j-lo[1]; if (ptr[idx*ld+jdx] != 3*(j*N+i)) { printf("p[%d] (Scatter_acc) expected: %d actual: %d\n",me,3*(j*N+i),ptr[idx*ld+jdx]); } } } if (me==0) printf("\nCompleted test of NGA_Scatter_acc using fixed buffers\n"); NGA_Release(g_a, lo, hi); NGA_Free_gatscat_buf(); GA_Destroy(g_a); if(me==0)printf("\nSuccess\n"); GA_Terminate(); #endif MP_FINALIZE(); return 0; }
int main(int argc, char **argv) { int rank, nprocs; int g_A; int *local_A=NULL, *local_B=NULL, *output_A=NULL; int dims[DIM]={SIZE,SIZE}, dims2[DIM], lo[DIM]={SIZE-SIZE,SIZE-SIZE}, hi[DIM]={SIZE-1,SIZE-1}, ld=SIZE; int value=SIZE; #if defined(USE_ELEMENTAL) // initialize Elemental (which will initialize MPI) ElInitialize( &argc, &argv ); ElMPICommRank( MPI_COMM_WORLD, &rank ); ElMPICommSize( MPI_COMM_WORLD, &nprocs ); // instantiate el::global array ElGlobalArraysConstruct_i( &eliga ); // initialize global arrays ElGlobalArraysInitialize_i( eliga ); #else MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MA_init(C_INT, 1000, 1000); GA_Initialize(); #endif local_A=(int*)malloc(SIZE*SIZE*sizeof(int)); output_A=(int*)malloc(SIZE*SIZE*sizeof(int)); memset (output_A, 0, SIZE*SIZE*sizeof(int)); for(int j=0; j<SIZE; j++) for(int i=0; i<SIZE; i++) local_A[i+j*ld]=(i + j); //for(int i=0; i<SIZE; i++) local_A[i+j*ld]=(rand()%10); local_B=(int*)malloc(SIZE*SIZE*sizeof(int)); memset (local_B, 0, SIZE*SIZE*sizeof(int)); // nb handle #if defined(USE_ELEMENTAL) typedef ElInt ga_nbhdl_t; #endif ga_nbhdl_t nbnb; #if defined(USE_ELEMENTAL) ElGlobalArraysCreate_i( eliga, DIM, dims, "array_A", NULL, &g_A ); ElGlobalArraysFill_i( eliga, g_A, &value ); #else g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL); GA_Fill(g_A, &value); #endif if (rank == 0) printf ("Initial global array:\n"); #if defined(USE_ELEMENTAL) ElGlobalArraysPrint_i( eliga, g_A ); #else GA_Print(g_A); #endif for (int i = 0; i < NITERS; i++) { // acc data #if defined(USE_ELEMENTAL) ElGlobalArraysNBAccumulate_i( eliga, g_A, lo, hi, local_A, &ld, &value, &nbnb ); #else NGA_NbAcc(g_A, lo, hi, local_A, &ld, &value, &nbnb); #endif // updated output MPI_Reduce (local_A, output_A, SIZE*SIZE, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); #if defined(USE_ELEMENTAL) ElGlobalArraysNBWait_i( eliga, &nbnb ); #else NGA_NbWait (&nbnb); #endif // get if (rank == 0) printf ("Get in iter #%d\n", i); #if defined(USE_ELEMENTAL) ElGlobalArraysSync_i( eliga ); ElGlobalArraysGet_i( eliga, g_A, lo, hi, local_B, &ld ); ElGlobalArraysPrint_i( eliga, g_A ); #else GA_Sync(); NGA_Get(g_A, lo, hi, local_B, &ld); GA_Print(g_A); #endif } // end of iters if(rank==0) { printf(" Alpha (multiplier): %d\n", value); printf(" Original local buffer (before accumulation): \n"); for(int i=0; i<SIZE; i++) { for(int j=0; j<SIZE; j++) printf("%d ", local_A[i*ld+j]); printf("\n"); } printf("\n"); printf(" Get returns: \n"); for(int i=0; i<SIZE; i++) { for(int j=0; j<SIZE; j++) printf("%d ", local_B[i*ld + j]); printf("\n"); } printf("\n"); for(int i=0; i<SIZE; i++) { for(int j=0; j<SIZE; j++) { if(local_B[i*ld+j]!=(value + (NITERS * value * (output_A[i*ld+j])))) GA_Error("ERROR", -99); } } } #if defined(USE_ELEMENTAL) ElGlobalArraysDestroy_i( eliga, g_A ); #else GA_Destroy(g_A); #endif if(rank == 0) printf ("OK. Test passed\n"); free (local_A); free (local_B); free (output_A); #if defined(USE_ELEMENTAL) ElGlobalArraysTerminate_i( eliga ); // call el::global arrays destructor ElGlobalArraysDestruct_i( eliga ); ElFinalize(); #else GA_Terminate(); MPI_Finalize(); #endif }
main(int argc, char **argv) { int rank, nprocs, i, j; int g_A, g_B, g_C, **local_C=NULL, dims[DIM]={SIZE,SIZE}, val1=5, val2=4, alpha=3, beta=2; int clo[DIM]={SIZE-SIZE,SIZE-SIZE}, chi[DIM]={SIZE-1,SIZE-1}, ld=SIZE; int **local_tm=NULL; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MA_init(C_INT, 1000, 1000); GA_Initialize(); local_C=(int**)malloc(SIZE*sizeof(int*)); for(i=0; i<SIZE; i++) local_C[i]=(int*)malloc(SIZE*sizeof(int)); local_tm=(int**)malloc(SIZE*sizeof(int*)); for(i=0; i<SIZE; i++) local_tm[i]=(int*)malloc(SIZE*sizeof(int)); g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL); g_B = GA_Duplicate(g_A, "array_B"); g_C = GA_Duplicate(g_A, "array_C"); GA_Fill(g_A, &val1); GA_Fill(g_B, &val2); GA_Add(&alpha, g_A, &beta, g_B, g_C); GA_Sync(); GA_Print(g_A); GA_Print(g_B); GA_Print(g_C); //printf("check 1\n"); NGA_Get(g_C, clo, chi, local_tm, &ld); //printf("check 2\n"); // GA_Sync(); if(rank==0) { for(i=0; i<SIZE; i++) { for(j=0; j<SIZE; j++)printf("%d ", local_tm[i][j]); printf("\n"); } } /* if(rank==0) { NGA_Get(g_C, clo, chi, local_C, &ld); printf("check 1 \n"); for(i=0; i<SIZE; i++) { for(j=0; j<SIZE; j++)printf("%d ", local_C[i][j]); printf("\n"); } printf("check 2\n"); for(i=0; i<SIZE; i++) { for(j=0; j<SIZE; j++) if(local_C[i][j]!=(alpha*val1)+(beta*val2)) printf("GA Error : \n"); } } */ //GA_Sync(); if(rank==0) printf("Test Completed \n"); // GA_Sync(); /* GA_Destroy(g_A); GA_Destroy(g_B); GA_Destroy(g_C); */ //******************************************************************* /* what would be the possible reason for GA_destroy to get failed .., * solve this before consolidate the whole */ GA_Terminate(); MPI_Finalize(); }
void test(int data_type) { int me=GA_Nodeid(); int nproc = GA_Nnodes(); int g_a, g_b, g_c; int ndim = 2; int dims[2]={N,N}; int lo[2]={0,0}; int hi[2]={N-1,N-1}; int block_size[2]={NB,NB-1}; int proc_grid[2]; int i,j,l,k,m,n, ld; double alpha_dbl = 1.0, beta_dbl = 0.0; double dzero = 0.0; double ddiff; float alpha_flt = 1.0, beta_flt = 0.0; float fzero = 0.0; float fdiff; float ftmp; double dtmp; SingleComplex ctmp; DoubleComplex ztmp; DoubleComplex alpha_dcpl = {1.0, 0.0} , beta_dcpl = {0.0, 0.0}; DoubleComplex zzero = {0.0,0.0}; DoubleComplex zdiff; SingleComplex alpha_scpl = {1.0, 0.0} , beta_scpl = {0.0, 0.0}; SingleComplex czero = {0.0,0.0}; SingleComplex cdiff; void *alpha=NULL, *beta=NULL; void *abuf=NULL, *bbuf=NULL, *cbuf=NULL, *c_ptr=NULL; switch (data_type) { case C_FLOAT: alpha = (void *)&alpha_flt; beta = (void *)&beta_flt; abuf = (void*)malloc(N*N*sizeof(float)); bbuf = (void*)malloc(N*N*sizeof(float)); cbuf = (void*)malloc(N*N*sizeof(float)); if(me==0) printf("Single Precision: Testing GA_Sgemm,NGA_Matmul_patch for %d-Dimension", ndim); break; case C_DBL: alpha = (void *)&alpha_dbl; beta = (void *)&beta_dbl; abuf = (void*)malloc(N*N*sizeof(double)); bbuf = (void*)malloc(N*N*sizeof(double)); cbuf = (void*)malloc(N*N*sizeof(double)); if(me==0) printf("Double Precision: Testing GA_Dgemm,NGA_Matmul_patch for %d-Dimension", ndim); break; case C_DCPL: alpha = (void *)&alpha_dcpl; beta = (void *)&beta_dcpl; abuf = (void*)malloc(N*N*sizeof(DoubleComplex)); bbuf = (void*)malloc(N*N*sizeof(DoubleComplex)); cbuf = (void*)malloc(N*N*sizeof(DoubleComplex)); if(me==0) printf("Double Complex: Testing GA_Zgemm,NGA_Matmul_patch for %d-Dimension", ndim); break; case C_SCPL: alpha = (void *)&alpha_scpl; beta = (void *)&beta_scpl; abuf = (void*)malloc(N*N*sizeof(SingleComplex)); bbuf = (void*)malloc(N*N*sizeof(SingleComplex)); cbuf = (void*)malloc(N*N*sizeof(SingleComplex)); if(me==0) printf("Single Complex: Testing GA_Cgemm,NGA_Matmul_patch for %d-Dimension", ndim); break; default: GA_Error("wrong data type", data_type); } if (me==0) printf("\nCreate A, B, C\n"); #ifdef USE_REGULAR g_a = NGA_Create(data_type, ndim, dims, "array A", NULL); #endif #ifdef USE_SIMPLE_CYCLIC g_a = NGA_Create_handle(); NGA_Set_data(g_a,ndim,dims,data_type); NGA_Set_array_name(g_a,"array A"); NGA_Set_block_cyclic(g_a,block_size); if (!GA_Allocate(g_a)) { GA_Error("Failed: create: g_a",40); } #endif #ifdef USE_SCALAPACK g_a = NGA_Create_handle(); NGA_Set_data(g_a,ndim,dims,data_type); NGA_Set_array_name(g_a,"array A"); grid_factor(nproc,&i,&j); proc_grid[0] = i; proc_grid[1] = j; NGA_Set_block_cyclic_proc_grid(g_a,block_size,proc_grid); if (!GA_Allocate(g_a)) { GA_Error("Failed: create: g_a",40); } #endif #ifdef USE_TILED g_a = NGA_Create_handle(); NGA_Set_data(g_a,ndim,dims,data_type); NGA_Set_array_name(g_a,"array A"); grid_factor(nproc,&i,&j); proc_grid[0] = i; proc_grid[1] = j; NGA_Set_tiled_proc_grid(g_a,block_size,proc_grid); if (!GA_Allocate(g_a)) { GA_Error("Failed: create: g_a",40); } #endif g_b = GA_Duplicate(g_a, "array B"); g_c = GA_Duplicate(g_a, "array C"); if(!g_a || !g_b || !g_c) GA_Error("Create failed: a, b or c",1); ld = N; if (me==0) printf("\nInitialize A\n"); /* Set up matrix A */ if (me == 0) { for (i=0; i<N; i++) { for (j=0; j<N; j++) { switch (data_type) { case C_FLOAT: ((float*)abuf)[i*N+j] = (float)(i*N+j); break; case C_DBL: ((double*)abuf)[i*N+j] = (double)(i*N+j); break; case C_DCPL: ((DoubleComplex*)abuf)[i*N+j].real = (double)(i*N+j); ((DoubleComplex*)abuf)[i*N+j].imag = 1.0; break; case C_SCPL: ((SingleComplex*)abuf)[i*N+j].real = (float)(i*N+j); ((SingleComplex*)abuf)[i*N+j].imag = 1.0; break; default: GA_Error("wrong data type", data_type); } } } NGA_Put(g_a,lo,hi,abuf,&ld); } GA_Sync(); if (me==0) printf("\nInitialize B\n"); /* Set up matrix B */ if (me == 0) { for (i=0; i<N; i++) { for (j=0; j<N; j++) { switch (data_type) { case C_FLOAT: ((float*)bbuf)[i*N+j] = (float)(j*N+i); break; case C_DBL: ((double*)bbuf)[i*N+j] = (double)(j*N+i); break; case C_DCPL: ((DoubleComplex*)bbuf)[i*N+j].real = (double)(j*N+i); ((DoubleComplex*)bbuf)[i*N+j].imag = 1.0; break; case C_SCPL: ((SingleComplex*)bbuf)[i*N+j].real = (float)(j*N+i); ((SingleComplex*)bbuf)[i*N+j].imag = 1.0; break; default: GA_Error("wrong data type", data_type); } } } NGA_Put(g_b,lo,hi,bbuf,&ld); } GA_Sync(); if (me==0) printf("\nPerform matrix multiply\n"); switch (data_type) { case C_FLOAT: NGA_Matmul_patch('N','N',&alpha_flt,&beta_flt,g_a,lo,hi, g_b,lo,hi,g_c,lo,hi); break; case C_DBL: NGA_Matmul_patch('N','N',&alpha_dbl,&beta_dbl,g_a,lo,hi, g_b,lo,hi,g_c,lo,hi); break; case C_SCPL: NGA_Matmul_patch('N','N',&alpha_scpl,&beta_scpl,g_a,lo,hi, g_b,lo,hi,g_c,lo,hi); break; case C_DCPL: NGA_Matmul_patch('N','N',&alpha_dcpl,&beta_dcpl,g_a,lo,hi, g_b,lo,hi,g_c,lo,hi); break; default: GA_Error("wrong data type", data_type); } GA_Sync(); #if 0 if (me==0) printf("\nCheck answer\n"); /* GA_Print(g_a); if (me == 0) printf("\n\n\n\n"); GA_Print(g_b); if (me == 0) printf("\n\n\n\n"); GA_Print(g_c); */ /* Check answer */ NGA_Get(g_a,lo,hi,abuf,&ld); NGA_Get(g_b,lo,hi,bbuf,&ld); for (i=0; i<N; i++) { for (j=0; j<N; j++) { switch (data_type) { case C_FLOAT: ((float*)cbuf)[i*N+j] = fzero; break; case C_DBL: ((double*)cbuf)[i*N+j] = dzero; break; case C_DCPL: ((DoubleComplex*)cbuf)[i*N+j] = zzero; break; case C_SCPL: ((SingleComplex*)cbuf)[i*N+j] = czero; break; default: GA_Error("wrong data type", data_type); } for (k=0; k<N; k++) { switch (data_type) { case C_FLOAT: ((float*)cbuf)[i*N+j] += ((float*)abuf)[i*N+k] *((float*)bbuf)[k*N+j]; break; case C_DBL: ((double*)cbuf)[i*N+j] += ((double*)abuf)[i*N+k] *((double*)bbuf)[k*N+j]; break; case C_DCPL: ((DoubleComplex*)cbuf)[i*N+j].real += (((DoubleComplex*)abuf)[i*N+k].real *((DoubleComplex*)bbuf)[k*N+j].real -(((DoubleComplex*)abuf)[i*N+k].imag *((DoubleComplex*)bbuf)[k*N+j].imag)); ((DoubleComplex*)cbuf)[i*N+j].imag += (((DoubleComplex*)abuf)[i*N+k].real *((DoubleComplex*)bbuf)[k*N+j].imag +(((DoubleComplex*)abuf)[i*N+k].imag *((DoubleComplex*)bbuf)[k*N+j].real)); break; case C_SCPL: ((SingleComplex*)cbuf)[i*N+j].real += (((SingleComplex*)abuf)[i*N+k].real *((SingleComplex*)bbuf)[k*N+j].real -(((SingleComplex*)abuf)[i*N+k].imag *((SingleComplex*)bbuf)[k*N+j].imag)); ((SingleComplex*)cbuf)[i*N+j].imag += (((SingleComplex*)abuf)[i*N+k].real *((SingleComplex*)bbuf)[k*N+j].imag +(((SingleComplex*)abuf)[i*N+k].imag *((SingleComplex*)bbuf)[k*N+j].real)); break; default: GA_Error("wrong data type", data_type); } } } } GA_Sync(); if (me == 0) { NGA_Get(g_c,lo,hi,abuf,&ld); for (i=0; i<N; i++) { for (j=0; j<N; j++) { switch (data_type) { case C_FLOAT: fdiff = ((float*)abuf)[i*N+j]-((float*)cbuf)[i*N+j]; if (((float*)abuf)[i*N+j] != 0.0) { fdiff /= ((float*)abuf)[i*N+j]; } if (fabs(fdiff) > TOLERANCE) { printf("p[%d] [%d,%d] Actual: %f Expected: %f\n",me,i,j, ((float*)abuf)[i*N+j],((float*)cbuf)[i*N+j]); } break; case C_DBL: ddiff = ((double*)abuf)[i*N+j]-((double*)cbuf)[i*N+j]; if (((double*)abuf)[i*N+j] != 0.0) { ddiff /= ((double*)abuf)[i*N+j]; } if (fabs(ddiff) > TOLERANCE) { printf("p[%d] [%d,%d] Actual: %f Expected: %f\n",me,i,j, ((double*)abuf)[i*N+j],((double*)cbuf)[i*N+j]); } break; case C_DCPL: zdiff.real = ((DoubleComplex*)abuf)[i*N+j].real -((DoubleComplex*)cbuf)[i*N+j].real; zdiff.imag = ((DoubleComplex*)abuf)[i*N+j].imag -((DoubleComplex*)cbuf)[i*N+j].imag; if (((DoubleComplex*)abuf)[i*N+j].real != 0.0 || ((DoubleComplex*)abuf)[i*N+j].imag != 0.0) { ztmp = ((DoubleComplex*)abuf)[i*N+j]; ddiff = sqrt((zdiff.real*zdiff.real+zdiff.imag*zdiff.imag) /(ztmp.real*ztmp.real+ztmp.imag*ztmp.imag)); } else { ddiff = sqrt(zdiff.real*zdiff.real+zdiff.imag*zdiff.imag); } if (fabs(ddiff) > TOLERANCE) { printf("p[%d] [%d,%d] Actual: (%f,%f) Expected: (%f,%f)\n",me,i,j, ((DoubleComplex*)abuf)[i*N+j].real, ((DoubleComplex*)abuf)[i*N+j].imag, ((DoubleComplex*)cbuf)[i*N+j].real, ((DoubleComplex*)cbuf)[i*N+j].imag); } break; case C_SCPL: cdiff.real = ((SingleComplex*)abuf)[i*N+j].real -((SingleComplex*)cbuf)[i*N+j].real; cdiff.imag = ((SingleComplex*)abuf)[i*N+j].imag -((SingleComplex*)cbuf)[i*N+j].imag; if (((SingleComplex*)abuf)[i*N+j].real != 0.0 || ((SingleComplex*)abuf)[i*N+j].imag != 0.0) { ctmp = ((SingleComplex*)abuf)[i*N+j]; fdiff = sqrt((cdiff.real*cdiff.real+cdiff.imag*cdiff.imag) /(ctmp.real*ctmp.real+ctmp.imag*ctmp.imag)); } else { fdiff = sqrt(cdiff.real*cdiff.real+cdiff.imag*cdiff.imag); } if (fabs(fdiff) > TOLERANCE) { printf("p[%d] [%d,%d] Actual: (%f,%f) Expected: (%f,%f)\n",me,i,j, ((SingleComplex*)abuf)[i*N+j].real, ((SingleComplex*)abuf)[i*N+j].imag, ((SingleComplex*)cbuf)[i*N+j].real, ((SingleComplex*)cbuf)[i*N+j].imag); } break; default: GA_Error("wrong data type", data_type); } } } } GA_Sync(); /* copy cbuf back to g_a */ if (me == 0) { NGA_Put(g_a,lo,hi,cbuf,&ld); } GA_Sync(); /* Get norm of g_a */ switch (data_type) { case C_FLOAT: ftmp = GA_Fdot(g_a,g_a); break; case C_DBL: dtmp = GA_Ddot(g_a,g_a); break; case C_DCPL: ztmp = GA_Zdot(g_a,g_a); break; case C_SCPL: ctmp = GA_Cdot(g_a,g_a); break; default: GA_Error("wrong data type", data_type); } /* subtract C from A and put the results in B */ beta_flt = -1.0; beta_dbl = -1.0; beta_scpl.real = -1.0; beta_dcpl.real = -1.0; GA_Zero(g_b); GA_Add(alpha,g_a,beta,g_c,g_b); /* evaluate the norm of the difference between the two matrices */ switch (data_type) { case C_FLOAT: fdiff = GA_Fdot(g_b, g_b); if (ftmp != 0.0) { fdiff /= ftmp; } if(fabs(fdiff) > TOLERANCE) { printf("\nabs(result) = %f > %f\n", fabsf(fdiff), TOLERANCE); GA_Error("GA_Sgemm Failed", 1); } else if (me == 0) { printf("\nGA_Sgemm OK\n\n"); } break; case C_DBL: ddiff = GA_Ddot(g_b, g_b); if (dtmp != 0.0) { ddiff /= dtmp; } if(fabs(ddiff) > TOLERANCE) { printf("\nabs(result) = %f > %f\n", fabsf(ddiff), TOLERANCE); GA_Error("GA_Dgemm Failed", 1); } else if (me == 0) { printf("\nGA_Dgemm OK\n\n"); } break; case C_DCPL: zdiff = GA_Zdot(g_b, g_b); if (ztmp.real != 0.0 || ztmp.imag != 0.0) { ddiff = sqrt((zdiff.real*zdiff.real+zdiff.imag*zdiff.imag) /(ztmp.real*ztmp.real+ztmp.imag*ztmp.imag)); } else { ddiff = sqrt(zdiff.real*zdiff.real+zdiff.imag*zdiff.imag); } if(fabs(ddiff) > TOLERANCE) { printf("\nabs(result) = %f > %f\n", fabsf(zdiff.real), TOLERANCE); GA_Error("GA_Zgemm Failed", 1); } else if (me == 0) { printf("\nGA_Zgemm OK\n\n"); } break; case C_SCPL: cdiff = GA_Cdot(g_b, g_b); if (ctmp.real != 0.0 || ctmp.imag != 0.0) { fdiff = sqrt((cdiff.real*cdiff.real+cdiff.imag*cdiff.imag) /(ctmp.real*ctmp.real+ctmp.imag*ctmp.imag)); } else { fdiff = sqrt(cdiff.real*cdiff.real+cdiff.imag*cdiff.imag); } if(fabs(fdiff) > TOLERANCE) { printf("\nabs(result) = %f > %f\n", fabsf(cdiff.real), TOLERANCE); GA_Error("GA_Cgemm Failed", 1); } else if (me == 0) { printf("\nGA_Cgemm OK\n\n"); } break; default: GA_Error("wrong data type", data_type); } #endif free(abuf); free(bbuf); free(cbuf); switch (data_type) { case C_FLOAT: abuf = (void*)malloc(N*N*sizeof(float)/4); bbuf = (void*)malloc(N*N*sizeof(float)/4); cbuf = (void*)malloc(N*N*sizeof(float)/4); break; case C_DBL: abuf = (void*)malloc(N*N*sizeof(double)/4); bbuf = (void*)malloc(N*N*sizeof(double)/4); cbuf = (void*)malloc(N*N*sizeof(double)/4); break; case C_DCPL: abuf = (void*)malloc(N*N*sizeof(DoubleComplex)/4); bbuf = (void*)malloc(N*N*sizeof(DoubleComplex)/4); cbuf = (void*)malloc(N*N*sizeof(DoubleComplex)/4); break; case C_SCPL: abuf = (void*)malloc(N*N*sizeof(SingleComplex)/4); bbuf = (void*)malloc(N*N*sizeof(SingleComplex)/4); cbuf = (void*)malloc(N*N*sizeof(SingleComplex)/4); break; default: GA_Error("wrong data type", data_type); } /* Test multiply on a fraction of matrix. Start by reinitializing * A and B */ GA_Zero(g_a); GA_Zero(g_b); GA_Zero(g_c); if (me==0) printf("\nTest patch multiply\n"); lo[0] = N/4; lo[1] = N/4; hi[0] = 3*N/4-1; hi[1] = 3*N/4-1; ld = N/2; /* Set up matrix A */ if (me==0) printf("\nInitialize A\n"); if (me == 0) { for (i=N/4; i<3*N/4; i++) { for (j=N/4; j<3*N/4; j++) { switch (data_type) { case C_FLOAT: ((float*)abuf)[(i-N/4)*N/2+(j-N/4)] = (float)(i*N+j); break; case C_DBL: ((double*)abuf)[(i-N/4)*N/2+(j-N/4)] = (double)(i*N+j); break; case C_DCPL: ((DoubleComplex*)abuf)[(i-N/4)*N/2+(j-N/4)].real = (double)(i*N+j); ((DoubleComplex*)abuf)[(i-N/4)*N/2+(j-N/4)].imag = 1.0; break; case C_SCPL: ((SingleComplex*)abuf)[(i-N/4)*N/2+(j-N/4)].real = (float)(i*N+j); ((SingleComplex*)abuf)[(i-N/4)*N/2+(j-N/4)].imag = 1.0; break; default: GA_Error("wrong data type", data_type); } } } NGA_Put(g_a,lo,hi,abuf,&ld); } GA_Sync(); if (me==0) printf("\nInitialize B\n"); /* Set up matrix B */ if (me == 0) { for (i=N/4; i<3*N/4; i++) { for (j=N/4; j<3*N/4; j++) { switch (data_type) { case C_FLOAT: ((float*)bbuf)[(i-N/4)*N/2+(j-N/4)] = (float)(j*N+i); break; case C_DBL: ((double*)bbuf)[(i-N/4)*N/2+(j-N/4)] = (double)(j*N+i); break; case C_DCPL: ((DoubleComplex*)bbuf)[(i-N/4)*N/2+(j-N/4)].real = (double)(j*N+i); ((DoubleComplex*)bbuf)[(i-N/4)*N/2+(j-N/4)].imag = 1.0; break; case C_SCPL: ((SingleComplex*)bbuf)[(i-N/4)*N/2+(j-N/4)].real = (float)(j*N+i); ((SingleComplex*)bbuf)[(i-N/4)*N/2+(j-N/4)].imag = 1.0; break; default: GA_Error("wrong data type", data_type); } } } NGA_Put(g_b,lo,hi,bbuf,&ld); } GA_Sync(); beta_flt = 0.0; beta_dbl = 0.0; beta_scpl.real = 0.0; beta_dcpl.real = 0.0; if (me==0) printf("\nPerform matrix multiply on sub-blocks\n"); switch (data_type) { case C_FLOAT: NGA_Matmul_patch('N','N',&alpha_flt,&beta_flt,g_a,lo,hi, g_b,lo,hi,g_c,lo,hi); break; case C_DBL: NGA_Matmul_patch('N','N',&alpha_dbl,&beta_dbl,g_a,lo,hi, g_b,lo,hi,g_c,lo,hi); break; case C_SCPL: NGA_Matmul_patch('N','N',&alpha_scpl,&beta_scpl,g_a,lo,hi, g_b,lo,hi,g_c,lo,hi); break; case C_DCPL: NGA_Matmul_patch('N','N',&alpha_dcpl,&beta_dcpl,g_a,lo,hi, g_b,lo,hi,g_c,lo,hi); break; default: GA_Error("wrong data type", data_type); } GA_Sync(); #if 0 if (0) { /* if (data_type != C_SCPL && data_type != C_DCPL) { */ if (me==0) printf("\nCheck answer\n"); /* Multiply buffers by hand */ if (me == 0) { for (i=0; i<N/2; i++) { for (j=0; j<N/2; j++) { switch (data_type) { case C_FLOAT: ((float*)cbuf)[i*N/2+j] = fzero; break; case C_DBL: ((double*)cbuf)[i*N/2+j] = dzero; break; case C_DCPL: ((DoubleComplex*)cbuf)[i*N/2+j] = zzero; break; case C_SCPL: ((SingleComplex*)cbuf)[i*N/2+j] = czero; break; default: GA_Error("wrong data type", data_type); } for (k=0; k<N/2; k++) { switch (data_type) { case C_FLOAT: ((float*)cbuf)[i*N/2+j] += ((float*)abuf)[i*N/2+k] *((float*)bbuf)[k*N/2+j]; break; case C_DBL: ((double*)cbuf)[i*N/2+j] += ((double*)abuf)[i*N/2+k] *((double*)bbuf)[k*N/2+j]; break; case C_DCPL: ((DoubleComplex*)cbuf)[i*N/2+j].real += (((DoubleComplex*)abuf)[i*N/2+k].real *((DoubleComplex*)bbuf)[k*N/2+j].real -(((DoubleComplex*)abuf)[i*N/2+k].imag *((DoubleComplex*)bbuf)[k*N/2+j].imag)); ((DoubleComplex*)cbuf)[i*N/2+j].imag += (((DoubleComplex*)abuf)[i*N/2+k].real *((DoubleComplex*)bbuf)[k*N/2+j].imag +(((DoubleComplex*)abuf)[i*N/2+k].imag *((DoubleComplex*)bbuf)[k*N/2+j].real)); break; case C_SCPL: ((SingleComplex*)cbuf)[i*N/2+j].real += (((SingleComplex*)abuf)[i*N/2+k].real *((SingleComplex*)bbuf)[k*N/2+j].real -(((SingleComplex*)abuf)[i*N/2+k].imag *((SingleComplex*)bbuf)[k*N/2+j].imag)); ((SingleComplex*)cbuf)[i*N/2+j].imag += (((SingleComplex*)abuf)[i*N/2+k].real *((SingleComplex*)bbuf)[k*N/2+j].imag +(((SingleComplex*)abuf)[i*N/2+k].imag *((SingleComplex*)bbuf)[k*N/2+j].real)); break; default: GA_Error("wrong data type", data_type); } } } } NGA_Put(g_a,lo,hi,cbuf,&ld); } if (me == 0) printf("\n\n\n\n"); /* Get norm of g_a */ switch (data_type) { case C_FLOAT: ftmp = NGA_Fdot_patch(g_a,'N',lo,hi,g_a,'N',lo,hi); break; case C_DBL: dtmp = NGA_Ddot_patch(g_a,'N',lo,hi,g_a,'N',lo,hi); break; case C_DCPL: ztmp = NGA_Zdot_patch(g_a,'N',lo,hi,g_a,'N',lo,hi); break; case C_SCPL: ctmp = NGA_Cdot_patch(g_a,'N',lo,hi,g_a,'N',lo,hi); break; default: GA_Error("wrong data type", data_type); } /* subtract C from A and put the results in B */ beta_flt = -1.0; beta_dbl = -1.0; beta_scpl.real = -1.0; beta_dcpl.real = -1.0; NGA_Zero_patch(g_b,lo,hi); NGA_Add_patch(alpha,g_a,lo,hi,beta,g_c,lo,hi,g_b,lo,hi); /* evaluate the norm of the difference between the two matrices */ switch (data_type) { case C_FLOAT: fdiff = NGA_Fdot_patch(g_b,'N',lo,hi,g_b,'N',lo,hi); if (ftmp != 0.0) { fdiff /= ftmp; } if(fabs(fdiff) > TOLERANCE) { printf("\nabs(result) = %f > %f\n", fabsf(fdiff), TOLERANCE); GA_Error("GA_Sgemm Failed", 1); } else if (me == 0) { printf("\nGA_Sgemm OK\n\n"); } break; case C_DBL: ddiff = NGA_Ddot_patch(g_b,'N',lo,hi,g_b,'N',lo,hi); if (dtmp != 0.0) { ddiff /= dtmp; } if(fabs(ddiff) > TOLERANCE) { printf("\nabs(result) = %f > %f\n", fabsf(ddiff), TOLERANCE); GA_Error("GA_Dgemm Failed", 1); } else if (me == 0) { printf("\nGA_Dgemm OK\n\n"); } break; case C_DCPL: zdiff = NGA_Zdot_patch(g_b,'N',lo,hi,g_b,'N',lo,hi); if (ztmp.real != 0.0 || ztmp.imag != 0.0) { ddiff = sqrt((zdiff.real*zdiff.real+zdiff.imag*zdiff.imag) /(ztmp.real*ztmp.real+ztmp.imag*ztmp.imag)); } else { ddiff = sqrt(zdiff.real*zdiff.real+zdiff.imag*zdiff.imag); } if(fabs(ddiff) > TOLERANCE) { printf("\nabs(result) = %f > %f\n", fabsf(zdiff.real), TOLERANCE); GA_Error("GA_Zgemm Failed", 1); } else if (me == 0) { printf("\nGA_Zgemm OK\n\n"); } break; case C_SCPL: cdiff = NGA_Cdot_patch(g_b,'N',lo,hi,g_b,'N',lo,hi); if (ctmp.real != 0.0 || ctmp.imag != 0.0) { fdiff = sqrt((cdiff.real*cdiff.real+cdiff.imag*cdiff.imag) /(ctmp.real*ctmp.real+ctmp.imag*ctmp.imag)); } else { fdiff = sqrt(cdiff.real*cdiff.real+cdiff.imag*cdiff.imag); } if(fabs(fdiff) > TOLERANCE) { printf("\nabs(result) = %f > %f\n", fabsf(cdiff.real), TOLERANCE); GA_Error("GA_Cgemm Failed", 1); } else if (me == 0) { printf("\nGA_Cgemm OK\n\n"); } break; default: GA_Error("wrong data type", data_type); } } #endif free(abuf); free(bbuf); free(cbuf); GA_Destroy(g_a); GA_Destroy(g_b); GA_Destroy(g_c); }
void do_work() { int ONE=1 ; /* useful constants */ int g_a, g_b; int n=N, type=MT_F_DBL; int me=GA_Nodeid(), nproc=GA_Nnodes(); int i, row; int dims[2]={N,N}; int lo[2], hi[2], ld; /* Note: on all current platforms DoublePrecision == double */ double buf[N], err, alpha, beta; if(me==0)printf("Creating matrix A\n"); g_a = NGA_Create(type, 2, dims, "A", NULL); if(!g_a) GA_Error("create failed: A",n); if(me==0)printf("OK\n"); if(me==0)printf("Creating matrix B\n"); /* create matrix B so that it has dims and distribution of A*/ g_b = GA_Duplicate(g_a, "B"); if(! g_b) GA_Error("duplicate failed",n); if(me==0)printf("OK\n"); GA_Zero(g_a); /* zero the matrix */ if(me==0)printf("Initializing matrix A\n"); /* fill in matrix A with random values in range 0.. 1 */ lo[1]=0; hi[1]=n-1; for(row=me; row<n; row+= nproc){ /* each process works on a different row in MIMD style */ lo[0]=hi[0]=row; for(i=0; i<n; i++) buf[i]=sin((double)i + 0.1*(row+1)); NGA_Put(g_a, lo, hi, buf, &n); } if(me==0)printf("Symmetrizing matrix A\n"); GA_Symmetrize(g_a); /* symmetrize the matrix A = 0.5*(A+A') */ /* check if A is symmetric */ if(me==0)printf("Checking if matrix A is symmetric\n"); GA_Transpose(g_a, g_b); /* B=A' */ alpha=1.; beta=-1.; GA_Add(&alpha, g_a, &beta, g_b, g_b); /* B= A - B */ err= GA_Ddot(g_b, g_b); if(me==0)printf("Error=%f\n",(double)err); if(me==0)printf("\nChecking atomic accumulate \n"); GA_Zero(g_a); /* zero the matrix */ for(i=0; i<n; i++) buf[i]=(double)i; /* everybody accumulates to the same location/row */ alpha = 1.0; row = n/2; lo[0]=hi[0]=row; lo[1]=0; hi[1]=n-1; ld = hi[1]-lo[1]+1; NGA_Acc(g_a, lo, hi, buf, &ld, &alpha ); GA_Sync(); if(me==0){ /* node 0 is checking the result */ NGA_Get(g_a, lo, hi, buf,&ld); for(i=0; i<n; i++) if(buf[i] != (double)nproc*i) GA_Error("failed: column=",i); printf("OK\n\n"); } GA_Destroy(g_a); GA_Destroy(g_b); }