// note: Sayan: brings down memory requirement to about 268 MB int main(int argc, char **argv) { int me, nproc, g_a = -1, i, j; #if defined(USE_ELEMENTAL) int ndim=2, dims[2]= {N1,N2}; #else int ndim=2, type=MT_F_DBL, dims[2]= {N1,N2}; #endif double *buf; int lo[2], hi[2], ld[1]; double alpha = 1.0; #if defined(USE_ELEMENTAL) // initialize Elemental (which will initialize MPI) ElInitialize( &argc, &argv ); ElMPICommRank( MPI_COMM_WORLD, &me ); ElMPICommSize( MPI_COMM_WORLD, &nproc ); ElGlobalArrays_d eldga; // instantiate el::global array ElGlobalArraysConstruct_d( &eldga ); // initialize global arrays ElGlobalArraysInitialize_d( eldga ); printf ("INITIALIZED elemental global array...\n"); #else MP_INIT(argc,argv); GA_Initialize_ltd(-1); me=GA_Nodeid(); nproc=GA_Nnodes(); #endif if(me==0) printf("Using %ld processes\n",(long)nproc); if(me==0) printf("memory = %ld bytes\n",((long)N1)*((long)N2)*8); #if defined(USE_ELEMENTAL) // create and allocate a global array printf ("ndim = %d\n", ndim); printf ("dim[0] = %d and dim[1] = %d\n", dims[0], dims[1]); ElGlobalArraysCreate_d( eldga, ndim, dims, "A", &g_a); printf ("CREATED elemental global array...\n"); // print distribution ElGlobalArraysPrint_d( eldga, g_a ); #else g_a = NGA_Create(type, ndim, dims, "A", NULL); GA_Zero(g_a); /* zero the matrix */ GA_Print_distribution(g_a); #endif if(me == 0) { // buf = (double*)(malloc(N1*1024*sizeof(double))); buf = (double*)(malloc(N1*128*sizeof(double))); // for(j = 0; j < N1*1024; ++j) buf[j] = 1.0; // for(i = 0; i < N2/1024; ++i) { for(j = 0; j < N1*128; ++j) buf[j] = 1.0; for(i = 0; i < N2/128; ++i) { lo[0] = 0; hi[0] = lo[0] + N1 -1; /* lo[1] = i*1024; hi[1] = lo[1] + 1024 -1; ld[0] = 1024; */ lo[1] = i*128; hi[1] = lo[1] + 128 -1; ld[0] = 128; printf("NGA_Acc.%d: %d:%d %d:%d\n",i,lo[0],hi[0],lo[1],hi[1]); #if defined(USE_ELEMENTAL) ElGlobalArraysAccumulate_d( eldga, g_a, lo, hi, buf, ld, &alpha ); // there is an explicit flush in NGA_Acc/Put, so when it returns, the buffer // can be reused and data has reached the destination #else NGA_Init_fence(); NGA_Acc(g_a, lo, hi, buf, ld, &alpha); NGA_Fence(); #endif } } #if defined(USE_ELEMENTAL) ElGlobalArraysSync_d( eldga ); ElGlobalArraysDestroy_d( eldga, g_a ); ElGlobalArraysTerminate_d( eldga ); // call el::global arrays destructor ElGlobalArraysDestruct_d( eldga ); ElFinalize(); #else GA_Sync(); GA_Destroy(g_a); GA_Terminate(); MP_FINALIZE(); #endif return 0; }
int main(int argc, char **argv) { int rank, nprocs; int g_A; int *local_A=NULL, *local_B=NULL, *output_A=NULL; int dims[DIM]={SIZE,SIZE}, dims2[DIM], lo[DIM]={SIZE-SIZE,SIZE-SIZE}, hi[DIM]={SIZE-1,SIZE-1}, ld=SIZE; int value=SIZE; #if defined(USE_ELEMENTAL) // initialize Elemental (which will initialize MPI) ElInitialize( &argc, &argv ); ElMPICommRank( MPI_COMM_WORLD, &rank ); ElMPICommSize( MPI_COMM_WORLD, &nprocs ); // instantiate el::global array ElGlobalArraysConstruct_i( &eliga ); // initialize global arrays ElGlobalArraysInitialize_i( eliga ); #else MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MA_init(C_INT, 1000, 1000); GA_Initialize(); #endif local_A=(int*)malloc(SIZE*SIZE*sizeof(int)); output_A=(int*)malloc(SIZE*SIZE*sizeof(int)); memset (output_A, 0, SIZE*SIZE*sizeof(int)); for(int j=0; j<SIZE; j++) for(int i=0; i<SIZE; i++) local_A[i+j*ld]=(i + j); //for(int i=0; i<SIZE; i++) local_A[i+j*ld]=(rand()%10); local_B=(int*)malloc(SIZE*SIZE*sizeof(int)); memset (local_B, 0, SIZE*SIZE*sizeof(int)); // nb handle #if defined(USE_ELEMENTAL) typedef ElInt ga_nbhdl_t; #endif ga_nbhdl_t nbnb; #if defined(USE_ELEMENTAL) ElGlobalArraysCreate_i( eliga, DIM, dims, "array_A", NULL, &g_A ); ElGlobalArraysFill_i( eliga, g_A, &value ); #else g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL); GA_Fill(g_A, &value); #endif if (rank == 0) printf ("Initial global array:\n"); #if defined(USE_ELEMENTAL) ElGlobalArraysPrint_i( eliga, g_A ); #else GA_Print(g_A); #endif for (int i = 0; i < NITERS; i++) { // acc data #if defined(USE_ELEMENTAL) ElGlobalArraysNBAccumulate_i( eliga, g_A, lo, hi, local_A, &ld, &value, &nbnb ); #else NGA_NbAcc(g_A, lo, hi, local_A, &ld, &value, &nbnb); #endif // updated output MPI_Reduce (local_A, output_A, SIZE*SIZE, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); #if defined(USE_ELEMENTAL) ElGlobalArraysNBWait_i( eliga, &nbnb ); #else NGA_NbWait (&nbnb); #endif // get if (rank == 0) printf ("Get in iter #%d\n", i); #if defined(USE_ELEMENTAL) ElGlobalArraysSync_i( eliga ); ElGlobalArraysGet_i( eliga, g_A, lo, hi, local_B, &ld ); ElGlobalArraysPrint_i( eliga, g_A ); #else GA_Sync(); NGA_Get(g_A, lo, hi, local_B, &ld); GA_Print(g_A); #endif } // end of iters if(rank==0) { printf(" Alpha (multiplier): %d\n", value); printf(" Original local buffer (before accumulation): \n"); for(int i=0; i<SIZE; i++) { for(int j=0; j<SIZE; j++) printf("%d ", local_A[i*ld+j]); printf("\n"); } printf("\n"); printf(" Get returns: \n"); for(int i=0; i<SIZE; i++) { for(int j=0; j<SIZE; j++) printf("%d ", local_B[i*ld + j]); printf("\n"); } printf("\n"); for(int i=0; i<SIZE; i++) { for(int j=0; j<SIZE; j++) { if(local_B[i*ld+j]!=(value + (NITERS * value * (output_A[i*ld+j])))) GA_Error("ERROR", -99); } } } #if defined(USE_ELEMENTAL) ElGlobalArraysDestroy_i( eliga, g_A ); #else GA_Destroy(g_A); #endif if(rank == 0) printf ("OK. Test passed\n"); free (local_A); free (local_B); free (output_A); #if defined(USE_ELEMENTAL) ElGlobalArraysTerminate_i( eliga ); // call el::global arrays destructor ElGlobalArraysDestruct_i( eliga ); ElFinalize(); #else GA_Terminate(); MPI_Finalize(); #endif }
/* * test ga_dgemm * Note: - change nummax for large arrays * - turn off "dgemm_verify" for large arrays due to memory * limitations, as dgemm_verify=1 for large arrays produces * segfault, dumps core,or any crap. */ int main(int argc, char **argv) { int num_m; int num_n; int num_k; int i; int ii; double *h0; int g_c; int g_b; int g_a; double a; double t1; double mf; double avg_t[ntrans]; double avg_mf[ntrans]; int itime; int ntimes; int nums_m[/*howmany*/] = {512,1024}; int nums_n[/*howmany*/] = {512,1024}; int nums_k[/*howmany*/] = {512,1024}; char transa[/*ntrans*/] = "ntnt"; char transb[/*ntrans*/] = "nntt"; char ta; char tb; double *tmpa; double *tmpb; double *tmpc; int ndim; int dims[2]; #ifdef BLOCK_CYCLIC int block_size[2]; #endif #if defined(USE_ELEMENTAL) // initialize Elemental (which will initialize MPI) ElInitialize( &argc, &argv ); ElMPICommRank( MPI_COMM_WORLD, &me ); ElMPICommSize( MPI_COMM_WORLD, &nproc ); // instantiate el::global array ElGlobalArraysConstruct_d( &eldga ); // initialize global arrays ElGlobalArraysInitialize_d( eldga ); #else MP_INIT(argc,argv); if (!MA_init(MT_DBL,1,20000000)) { GA_Error("failed: ma_init(MT_DBL,1,20000000)",10); } GA_INIT(argc,argv); me = GA_Nodeid(); #endif h0 = (double*)malloc(sizeof(double) * nummax*nummax); tmpa = (double*)malloc(sizeof(double) * nummax*nummax); tmpb = (double*)malloc(sizeof(double) * nummax*nummax); tmpc = (double*)malloc(sizeof(double) * nummax*nummax); ii = 0; for (i=0; i<nummax*nummax; i++) { ii = ii + 1; if (ii > nummax) { ii = 0; } h0[i] = ii; } /* Compute times assuming 500 mflops and 5 second target time */ /* ntimes = max(3.0d0,5.0d0/(4.0d-9*num**3)); */ ntimes = 5; for (ii=0; ii<howmany; ii++) { num_m = nums_m[ii]; num_n = nums_n[ii]; num_k = nums_k[ii]; a = 0.5/(num_m*num_n); if (num_m > nummax || num_n > nummax || num_k > nummax) { GA_Error("Insufficient memory: check nummax", 1); } #ifndef BLOCK_CYCLIC ndim = 2; /* dims[0] = num_m; dims[1] = num_n; */ dims[1] = num_m; dims[0] = num_n; #if defined(USE_ELEMENTAL) ElGlobalArraysCreate_d( eldga, ndim, dims, "g_c", NULL, &g_c ); #else if (!((g_c = NGA_Create(MT_DBL,ndim,dims,"g_c",NULL)))) { GA_Error("failed: create g_c",20); } #endif /* dims[0] = num_k; dims[1] = num_n; */ dims[1] = num_k; dims[0] = num_n; #if defined(USE_ELEMENTAL) ElGlobalArraysCreate_d( eldga, ndim, dims, "g_b", NULL, &g_b ); #else if (!((g_b = NGA_Create(MT_DBL,ndim,dims,"g_b",NULL)))) { GA_Error("failed: create g_b",30); } #endif /* dims[0] = num_m; dims[1] = num_k; */ dims[1] = num_m; dims[0] = num_k; #if defined(USE_ELEMENTAL) ElGlobalArraysCreate_d( eldga, ndim, dims, "g_a", NULL, &g_a ); #else if (!((g_a = NGA_Create(MT_DBL,ndim,dims,"g_a",NULL)))) { GA_Error("failed: create g_a",40); } #endif #else ndim = 2; block_size[0] = 128; block_size[1] = 128; dims[0] = num_m; dims[1] = num_n; g_c = GA_Create_handle(); GA_Set_data(g_c,ndim,dims,MT_DBL); GA_Set_array_name(g_c,"g_c"); GA_Set_block_cyclic(g_c,block_size); if (!GA_Allocate(g_c)) { GA_Error("failed: create g_c",40); } dims[0] = num_k; dims[1] = num_n; g_b = GA_Create_handle(); GA_Set_data(g_b,ndim,dims,MT_DBL); GA_Set_array_name(g_b,"g_b"); GA_Set_block_cyclic(g_b,block_size); if (!ga_allocate(g_b)) { GA_Error("failed: create g_b",40); } dims[0] = num_m; dims[1] = num_k; g_a = GA_Create_handle(); GA_Set_data(g_a,ndim,dims,MT_DBL); GA_Set_array_name(g_a,"g_a"); GA_Set_block_cyclic(g_a,block_size); if (!ga_allocate(g_a)) { GA_Error('failed: create g_a',40); } #endif /* Initialize matrices A and B */ if (me == 0) { load_ga(g_a, h0, num_m, num_k); load_ga(g_b, h0, num_k, num_n); } #if defined(USE_ELEMENTAL) double zero = 0.0; ElGlobalArraysFill_d( eldga, g_c, &zero ); ElGlobalArraysSync_d( eldga ); #else GA_Zero(g_c); GA_Sync(); #endif #if defined(USE_ELEMENTAL) if (me == 0) { #else if (GA_Nodeid() == 0) { #endif printf("\nMatrix Multiplication on C = A[%ld,%ld]xB[%ld,%ld]\n", (long)num_m, (long)num_k, (long)num_k, (long)num_n); fflush(stdout); } for (i=0; i<ntrans; i++) { avg_t[i] = 0.0; avg_mf[i] = 0.0; } for (itime=0; itime<ntimes; itime++) { for (i=0; i<ntrans; i++) { #if defined(USE_ELEMENTAL) ElGlobalArraysSync_d( eldga ); #else GA_Sync(); #endif ta = transa[i]; tb = transb[i]; t1 = MP_TIMER(); #if defined(USE_ELEMENTAL) ElGlobalArraysDgemm_d( eldga, ta, tb, num_m, num_n, num_k, 1.0, g_a, g_b, 0.0, g_c ); #else GA_Dgemm(ta,tb,num_m,num_n,num_k,1.0, g_a, g_b, 0.0, g_c); #endif t1 = MP_TIMER() - t1; #if defined(USE_ELEMENTAL) if (me == 0) { #else if (GA_Nodeid() == 0) { #endif #if defined(USE_ELEMENTAL) mf = 2e0*num_m*num_n*num_k/t1*1e-6/nproc; #else mf = 2e0*num_m*num_n*num_k/t1*1e-6/GA_Nnodes(); #endif avg_t[i] = avg_t[i]+t1; avg_mf[i] = avg_mf[i] + mf; printf("%15s%2d: %12.4f seconds %12.1f mflops/proc %c %c\n", "Run#", itime, t1, mf, ta, tb); fflush(stdout); if (dgemm_verify && itime == 0) { /* recall the C API swaps the matrix order */ /* we swap it here for the Fortran-based verify */ verify_ga_dgemm(tb, ta, num_n, num_m, num_k, 1.0, g_b, g_a, 0.0, g_c, tmpb, tmpa, tmpc); } } } } #if defined(USE_ELEMENTAL) if (me == 0) { #else if (GA_Nodeid() == 0) { #endif printf("\n"); for (i=0; i<ntrans; i++) { printf("%17s: %12.4f seconds %12.1f mflops/proc %c %c\n", "Average", avg_t[i]/ntimes, avg_mf[i]/ntimes, transa[i], transb[i]); } if(dgemm_verify) { printf("All GA_Dgemms are verified...O.K.\n"); } fflush(stdout); } /* GA_Print(g_a); GA_Print(g_b); GA_Print(g_c); */ #if defined(USE_ELEMENTAL) ElGlobalArraysDestroy_d( eldga, g_a ); ElGlobalArraysDestroy_d( eldga, g_b ); ElGlobalArraysDestroy_d( eldga, g_c ); #else GA_Destroy(g_c); GA_Destroy(g_b); GA_Destroy(g_a); #endif } /* ??? format(a15, i2, ': ', e12.4, ' seconds ',f12.1, . ' mflops/proc ', 3a2) */ #if defined(USE_ELEMENTAL) if (me == 0) { #else if (GA_Nodeid() == 0) { #endif printf("All tests successful\n"); } free(h0); free(tmpa); free(tmpb); free(tmpc); #if defined(USE_ELEMENTAL) // call el::global arrays destructor ElGlobalArraysTerminate_d( eldga ); ElGlobalArraysDestruct_d( eldga ); ElFinalize(); #else GA_Terminate(); MP_FINALIZE(); #endif return 0; } /* * Verify for correctness. Process 0 computes BLAS dgemm * locally. For larger arrays, disbale this test as memory * might not be sufficient */ void verify_ga_dgemm(char xt1, char xt2, int num_m, int num_n, int num_k, double alpha, int g_a, int g_b, double beta, int g_c, double *tmpa, double *tmpb, double *tmpc) { int i,j,type,ndim,dims[2],lo[2],hi[2]; double abs_value; for (i=0; i<num_n; i++) { for (j=0; j<num_m; j++) { tmpc[j+i*num_m] = -1.0; tmpa[j+i*num_m] = -2.0; } } #if defined(USE_ELEMENTAL) ElGlobalArraysInquire_d( eldga, g_a, &ndim, dims ); #else NGA_Inquire(g_a, &type, &ndim, dims); #endif lo[0] = 0; lo[1] = 0; hi[0] = dims[0]-1; hi[1] = dims[1]-1; #if defined(USE_ELEMENTAL) ElGlobalArraysGet_d( eldga, g_a, lo, hi, tmpa, &dims[1] ); #else NGA_Get(g_a, lo, hi, tmpa, &dims[1]); #endif #if defined(USE_ELEMENTAL) ElGlobalArraysInquire_d( eldga, g_a, &ndim, dims ); #else NGA_Inquire(g_a, &type, &ndim, dims); #endif lo[0] = 0; lo[1] = 0; hi[0] = dims[0]-1; hi[1] = dims[1]-1; #if defined(USE_ELEMENTAL) ElGlobalArraysGet_d( eldga, g_b, lo, hi, tmpb, &dims[1] ); #else NGA_Get(g_b, lo, hi, tmpb, &dims[1]); #endif /* compute dgemm sequentially */ #if defined(USE_ELEMENTAL) cblas_dgemm ( CblasRowMajor, ( xt1 == 'n'? CblasNoTrans: CblasTrans ), ( xt2 == 'n'? CblasNoTrans: CblasTrans ), num_m /* M */, num_n /* N */, num_k /* K */, alpha, tmpa, num_m, /* lda */ tmpb, num_k, /* ldb */ beta, tmpc, num_m /* ldc */); #else xb_dgemm(&xt1, &xt2, &num_m, &num_n, &num_k, &alpha, tmpa, &num_m, tmpb, &num_k, &beta, tmpc, &num_m); #endif /* after computing c locally, verify it with the values in g_c */ #if defined(USE_ELEMENTAL) ElGlobalArraysInquire_d( eldga, g_a, &ndim, dims ); #else NGA_Inquire(g_a, &type, &ndim, dims); #endif lo[0] = 0; lo[1] = 0; hi[0] = dims[0]-1; hi[1] = dims[1]-1; #if defined(USE_ELEMENTAL) ElGlobalArraysGet_d( eldga, g_c, lo, hi, tmpa, &dims[1] ); #else NGA_Get(g_c, lo, hi, tmpa, &dims[1]); #endif for (i=0; i<num_n; i++) { for (j=0; j<num_m; j++) { abs_value = fabs(tmpc[j+i*num_m]-tmpa[j+i*num_m]); if(abs_value > 1.0 || abs_value < -1.0) { printf("Values are = %f %f\n", tmpc[j+i*num_m], tmpa[j+i*num_m]); printf("Values are = %f %f\n", fabs(tmpc[j+i*num_m]-tmpa[j*i*num_m]), abs_value); fflush(stdout); GA_Error("verify ga_dgemm failed", 1); } } } } /** * called by process '0' (or your master process ) */ void load_ga(int handle, double *f, int dim1, int dim2) { int lo[2], hi[2]; if (dim1 < 0 || dim2 < 0) { return; } lo[0] = 0; lo[1] = 0; hi[0] = dim1-1; hi[1] = dim2-1; #if defined(USE_ELEMENTAL) ElGlobalArraysPut_d( eldga, handle, lo, hi, f, &dim1 ); #else NGA_Put(handle, lo, hi, f, &dim1); #endif }
int main(int argc, char **argv) { int rank, nprocs; int g_A; int *local_A=NULL, *local_B=NULL, *output_A=NULL; int dims[DIM]={SIZE,SIZE}, dims2[DIM], lo[DIM]={SIZE-SIZE,SIZE-SIZE}, hi[DIM]={SIZE-1,SIZE-1}, ld=SIZE; int value=SIZE; //int value=0; #if defined(USE_ELEMENTAL) // initialize Elemental (which will initialize MPI) ElInitialize( &argc, &argv ); ElMPICommRank( MPI_COMM_WORLD, &rank ); ElMPICommSize( MPI_COMM_WORLD, &nprocs ); // instantiate el::global array ElGlobalArraysConstruct_i( &eliga ); // initialize global arrays ElGlobalArraysInitialize_i( eliga ); #else MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MA_init(C_INT, 1000, 1000); GA_Initialize(); #endif local_A=(int*)malloc(SIZE*SIZE*sizeof(int)); output_A=(int*)malloc(SIZE*SIZE*sizeof(int)); memset (output_A, 0, SIZE*SIZE*sizeof(int)); for(int j=0; j<SIZE; j++) for(int i=0; i<SIZE; i++) local_A[i+j*ld]=(i + j); local_B=(int*)malloc(SIZE*SIZE*sizeof(int)); memset (local_B, 0, SIZE*SIZE*sizeof(int)); #if defined(USE_ELEMENTAL) ElGlobalArraysCreate_i( eliga, DIM, dims, "array_A", NULL, NULL, &g_A ); ElGlobalArraysFill_i( eliga, g_A, &value ); ElGlobalArraysPrint_i( eliga, g_A ); // acc data ElGlobalArraysPut_i( eliga, g_A, lo, hi, local_A, &ld ); ElGlobalArraysSync_i( eliga ); // get ElGlobalArraysGet_i( eliga, g_A, lo, hi, local_B, &ld ); ElGlobalArraysSync_i( eliga ); ElGlobalArraysPrint_i( eliga, g_A ); #else g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL); GA_Fill(g_A, &value); GA_Print(g_A); NGA_Put(g_A, lo, hi, local_A, &ld); GA_Sync(); NGA_Get(g_A, lo, hi, local_B, &ld); GA_Sync(); GA_Print(g_A); #endif // updated output MPI_Reduce (local_A, output_A, SIZE*SIZE, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); if(rank==0) { printf(" Original local buffer to be accumulated: \n"); for(int i=0; i<SIZE; i++) { for(int j=0; j<SIZE; j++) printf("%d ", local_A[i*ld+j]); printf("\n"); } printf("\n"); printf(" Get returns: \n"); for(int i=0; i<SIZE; i++) { for(int j=0; j<SIZE; j++) printf("%d ", local_B[i*ld + j]); printf("\n"); } printf("\n"); for(int i=0; i<SIZE; i++) { for(int j=0; j<SIZE; j++) { if(local_B[i*ld+j]!=output_A[i*ld+j]) GA_Error("ERROR", -99); } } } #if defined(USE_ELEMENTAL) ElGlobalArraysDestroy_i( eliga, g_A ); #else GA_Destroy(g_A); #endif if(rank == 0) printf ("OK. Test passed\n"); free (local_A); free (local_B); free (output_A); #if defined(USE_ELEMENTAL) ElGlobalArraysTerminate_i( eliga ); // call el::global arrays destructor ElGlobalArraysDestruct_i( eliga ); ElFinalize(); #else GA_Terminate(); MPI_Finalize(); #endif }
int main( int argc, char* argv[] ) { ElError error = ElInitialize( &argc, &argv ); if( error != EL_SUCCESS ) MPI_Abort( MPI_COMM_WORLD, 1 ); ElInt m, n, mSub, nSub; bool print, display; error = ElInput_I("--m","matrix height",100,&m); EL_ABORT_ON_ERROR( error ); error = ElInput_I("--n","matrix width",100,&n); EL_ABORT_ON_ERROR( error ); error = ElInput_b("--print","print matrix?",true,&print); EL_ABORT_ON_ERROR( error ); error = ElInput_b("--display","display matrix?",false,&display); error = ElProcessInput(); EL_ABORT_ON_ERROR( error ); error = ElPrintInputReport(); EL_ABORT_ON_ERROR( error ); ElGrid grid; error = ElGridCreate( MPI_COMM_WORLD, EL_COLUMN_MAJOR, &grid ); EL_ABORT_ON_ERROR( error );; ElDistMatrix_z A; error = ElDistMatrixCreateSpecific_z( EL_MR, EL_MC, grid, &A ); EL_ABORT_ON_ERROR( error ); error = ElDistMatrixResize_z( A, m, n ); EL_ABORT_ON_ERROR( error ); ElInt i, j; for( j=0; j<n; ++j ) { for( i=0; i<m; ++i ) { error = ElDistMatrixSet_z( A, i, j, i+j ); EL_ABORT_ON_ERROR( error ); } } if( print ) { error = ElPrintDist_z( A, "A" ); EL_ABORT_ON_ERROR( error ); } if( display ) { error = ElDisplayDist_z( A, "A" ); EL_ABORT_ON_ERROR( error ); } ElEntrywiseMapDist_z( A, &MapFunc ); if( print ) { error = ElPrintDist_z( A, "ASquared" ); EL_ABORT_ON_ERROR( error ); } if( display ) { error = ElDisplayDist_z( A, "ASquared" ); EL_ABORT_ON_ERROR( error ); } error = ElFinalize(); if( error != EL_SUCCESS ) MPI_Abort( MPI_COMM_WORLD, 1 ); return 0; }
int main(int argc, char **argv) { int rank, nprocs, i, j; int g_A, g_B, dims[DIM]={SIZE,SIZE}, val1=5, val2=4; int lo[DIM]={SIZE-SIZE,SIZE-SIZE}, hi[DIM]={SIZE-1,SIZE-1}, ld=SIZE; #if defined(USE_ELEMENTAL) // initialize Elemental (which will initialize MPI) ElInitialize( &argc, &argv ); ElMPICommRank( MPI_COMM_WORLD, &rank ); ElMPICommSize( MPI_COMM_WORLD, &nprocs ); // instantiate el::global array ElGlobalArraysConstruct_i( &eliga ); // initialize global arrays ElGlobalArraysInitialize_i( eliga ); #else MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MA_init(C_INT, 1000, 1000); GA_Initialize(); #endif // create global arrays #if defined(USE_ELEMENTAL) ElGlobalArraysCreate_i( eliga, DIM, dims, "array_A", NULL, &g_A ); #else g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL); #endif #if defined(USE_ELEMENTAL) ElGlobalArraysDuplicate_i( eliga, g_A, "array_B", &g_B ); #else g_B = GA_Duplicate(g_A, "array_B"); #endif #if defined(USE_ELEMENTAL) ElGlobalArraysFill_i( eliga, g_A, &val1 ); ElGlobalArraysFill_i( eliga, g_B, &val2 ); #else GA_Fill(g_A, &val1); GA_Fill(g_B, &val2); #endif int dot_AB = -99; #if defined(USE_ELEMENTAL) ElGlobalArraysDot_i( eliga, g_A, g_B, &dot_AB ); #else dot_AB = GA_Idot(g_A, g_B); #endif #if defined(USE_ELEMENTAL) ElGlobalArraysSync_i( eliga ); #else GA_Sync(); #endif #if defined(USE_ELEMENTAL) ElGlobalArraysPrint_i( eliga, g_A ); ElGlobalArraysPrint_i( eliga, g_B ); #else GA_Print(g_A); GA_Print(g_B); #endif // Check #if defined(USE_ELEMENTAL) ElGlobalArraysSync_i( eliga ); #else GA_Sync(); #endif if(rank==0) printf ("Integer dot product of g_A and g_B: %d\n", dot_AB); #if defined(USE_ELEMENTAL) ElGlobalArraysSync_i( eliga ); #else GA_Sync(); #endif if(rank==0) printf("Test Completed \n"); #if defined(USE_ELEMENTAL) ElGlobalArraysDestroy_i( eliga, g_A ); ElGlobalArraysDestroy_i( eliga, g_B ); #else GA_Destroy(g_A); GA_Destroy(g_B); #endif #if defined(USE_ELEMENTAL) ElGlobalArraysTerminate_i( eliga ); // call el::global arrays destructor ElGlobalArraysDestruct_i( eliga ); ElFinalize(); #else GA_Terminate(); MPI_Finalize(); #endif return 0; }
int main(int argc, char **argv) { /* initialize GA */ #if defined(USE_ELEMENTAL) // initialize Elemental (which will initialize MPI) ElInitialize( &argc, &argv ); ElMPICommRank( MPI_COMM_WORLD, &me ); ElMPICommSize( MPI_COMM_WORLD, &nproc ); // instantiate el::global array ElGlobalArraysConstruct_d( &eldga ); // initialize global arrays ElGlobalArraysInitialize_d( eldga ); #else MP_INIT(argc,argv); GA_Initialize_args(&argc, &argv); me = GA_Nodeid(); nproc = GA_Nnodes(); #endif if (nproc < 2) { if (me == 0) { fprintf(stderr, "USAGE: 2 <= processes - got %d\n", nproc); } #if defined(USE_ELEMENTAL) ElGlobalArraysTerminate_d( eldga ); // call el::global arrays destructor ElGlobalArraysDestruct_d( eldga ); ElFinalize(); #else GA_Terminate(); MP_FINALIZE(); #endif exit(0); } if (!me) { printf("\n Performance of Basic Blocking Communication Operations\n"); } #if defined(USE_ELEMENTAL) ElGlobalArraysSync_d( eldga ); #else GA_Sync(); #endif /* test 1 dimension array */ /* if (!me) { printf("\n\t\t\tContiguous Data Transfer\n"); } test_1D(); */ /* test 2 dimension array */ if (!me) { printf("\n\t\t\tStrided Data Transfer\n"); } test_2D(); #if 0 if (me == 0) { if (warn_accuracy) { printf("\nWARNING: Your timer does not have sufficient accuracy for this test (%d)\n", warn_accuracy); } printf("\n\n------------ Now we test the same data transfer for correctness ----------\n"); fflush(stdout); } #endif #if defined(USE_ELEMENTAL) ElGlobalArraysTerminate_d( eldga ); // call el::global arrays destructor ElGlobalArraysDestruct_d( eldga ); ElFinalize(); #else GA_Terminate(); MP_FINALIZE(); #endif return(0); }