int main(int argc, char *argv[]) { int ndim; armci_msg_init(&argc, &argv); ARMCI_Init_args(&argc, &argv); nproc = armci_msg_nproc(); me = armci_msg_me(); ARMCI_Barrier(); if (me == 0) { printf("\nTesting armci_notify\n"); fflush(stdout); sleep(1); } ARMCI_Barrier(); for (ndim = 1; ndim <= MAXDIMS; ndim++) { test_notify(ndim); } ARMCI_Barrier(); ARMCI_Finalize(); armci_msg_finalize(); return(0); }
int main(int argc, char ** argv) { int rank, nproc, val, i; void **base_ptrs; MPI_Init(&argc, &argv); ARMCI_Init(); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); if (rank == 0) printf("Starting ARMCI mutex read-modify-write test with %d processes\n", nproc); base_ptrs = malloc(nproc*sizeof(void*)); ARMCI_Create_mutexes(rank == 0 ? 1 : 0); ARMCI_Malloc(base_ptrs, (rank == 0) ? sizeof(int) : 0); // Proc 0 has a shared int if (rank == 0) { val = 0; ARMCI_Put(&val, base_ptrs[0], sizeof(int), 0); } ARMCI_Barrier(); for (i = 0; i < NITER; i++) { ARMCI_Lock(0, 0); ARMCI_Get(base_ptrs[0], &val, sizeof(int), 0); val += ADDIN; ARMCI_Put(&val, base_ptrs[0], sizeof(int), 0); ARMCI_Unlock(0, 0); } printf(" + %3d done\n", rank); fflush(NULL); ARMCI_Barrier(); if (rank == 0) { ARMCI_Get(base_ptrs[0], &val, sizeof(int), 0); if (val == ADDIN*nproc*NITER) printf("Test complete: PASS.\n"); else printf("Test complete: FAIL. Got %d, expected %d.\n", val, ADDIN*nproc*NITER); } ARMCI_Free(base_ptrs[rank]); ARMCI_Destroy_mutexes(); free(base_ptrs); ARMCI_Finalize(); MPI_Finalize(); return 0; }
/*\ release/deassociate handle with previously registered callback function \*/ void ARMCI_Gpc_release(int handle) { int h = -handle + GPC_OFFSET; ARMCI_Barrier(); if(h<0 || h >= GPC_SLOTS) armci_die("ARMCI_Gpc_release: bad handle",h); _table[h] = (void*)0; }
void DDI_ARMCI_Barrier(const DDI_Comm *comm) { if (comm == (const DDI_Comm *)Comm_find(DDI_COMM_WORLD)) { ARMCI_Barrier(); } else { ARMCI_AllFence(); MPI_Barrier(comm->compute_comm); } }
void usage() { if (!rank) { printf("Usage: test_mt, or \n"); printf(" test_mt -tTHREADS_PER_PROC -sARRAY_SIZE -iITERATIONS_COUNT\n"); } ARMCI_Barrier(); armci_msg_finalize(); exit(0); }
int main(int argc, char * argv[]) { void *baseAddress[MAX_PROCESSORS]; char *local; int thisImage; int iter = 100, size; double startTime, endTime; int i; // initialize ARMCI_Init(); ARMCI_Myid(&thisImage); // allocate data (collective operation) ARMCI_Malloc(baseAddress, MAX_BUF_SIZE*sizeof(char)); local = (char *)ARMCI_Malloc_local(MAX_BUF_SIZE*sizeof(char)); ARMCI_Barrier(); ARMCI_Migrate(); if (thisImage == 0) { for(size = 1; size <= MAX_BUF_SIZE; size = size<<1){ startTime = CkWallTimer(); for(i = 0; i < iter; i++){ ARMCI_Put(local, baseAddress[1], size, 1); } ARMCI_Fence(1); endTime = CkWallTimer(); printf("%d: %f us\n", size, (endTime-startTime)*1000); } ARMCI_Barrier(); } else if (thisImage == 1) { ARMCI_Barrier(); } ARMCI_Free(baseAddress[thisImage]); ARMCI_Free_local(local); // finalize ARMCI_Finalize(); return 0; }
int main(int argc, char *argv[]) { ARMCI_Init_args(&argc, &argv); nproc = armci_msg_nproc(); me = armci_msg_me(); /* printf("nproc = %d, me = %d\n", nproc, me);*/ if (nproc > MAXPROC && me == 0) { ARMCI_Error("Test works for up to %d processors\n", MAXPROC); } if (me == 0) { printf("ARMCI test program (%d processes)\n", nproc); fflush(stdout); sleep(1); } if (me == 0) { printf("\nAggregate put/get requests\n\n"); fflush(stdout); } test_aggregate(1); /* cold start */ test_aggregate(0); /* warm start */ ARMCI_AllFence(); ARMCI_Barrier(); if (me == 0) { printf("\nSuccess!!\n"); fflush(stdout); } sleep(2); ARMCI_Barrier(); ARMCI_Finalize(); armci_msg_finalize(); return(0); }
/*\ callback functions must be registered -- user gets int handle back \*/ int ARMCI_Gpc_register( int (*func) ()) { int handle =-1, candidate = 0; ARMCI_Barrier(); do{ if(!_table[candidate]){ handle = candidate; _table[candidate]=func; } candidate++; }while(candidate < GPC_SLOTS && handle == -1); return(GPC_OFFSET-handle); }
int main(int argc, char **argv) { int i, j, rank, nranks, msgsize, dest; int xdim, ydim; long bufsize; double **buffer; double t_start, t_stop, t_latency; int count[2], src_stride, trg_stride, stride_level; int provided; MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nranks); ARMCI_Init_args(&argc, &argv); ARMCI_Barrier(); int me = armci_msg_me(); int node = armci_domain_my_id(ARMCI_DOMAIN_SMP); printf("MPI_Rank: %d, \ armci_msg_nproc: %d \ armci_msg_me: %d, \ armci_domain_id: %d, \ armci_domain_same_id: %d,\ armci_domain_my_id: %d, \ armci_domain_count: %d, \ armci_domain_nprocs: %d, \ armci_domain_glob_proc_id: %d \n", rank, armci_msg_nproc(), me, armci_domain_id(ARMCI_DOMAIN_SMP, me), armci_domain_same_id(ARMCI_DOMAIN_SMP, me), armci_domain_my_id(ARMCI_DOMAIN_SMP), armci_domain_count(ARMCI_DOMAIN_SMP), armci_domain_nprocs(ARMCI_DOMAIN_SMP, node), armci_domain_glob_proc_id(ARMCI_DOMAIN_SMP, node, 0)); fflush(stdout); ARMCI_Free((void *) buffer[rank]); ARMCI_Finalize(); MPI_Finalize(); return 0; }
int main(int argc, char ** argv) { int rank, nproc, test_iter; void ***base_ptrs; MPI_Init(&argc, &argv); ARMCI_Init(); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); if (rank == 0) printf("Starting ARMCI memory allocation test with %d processes\n", nproc); base_ptrs = malloc(sizeof(void**)*NUM_ITERATIONS); // Perform a pile of allocations for (test_iter = 0; test_iter < NUM_ITERATIONS; test_iter++) { if (rank == 0) printf(" + allocation %d\n", test_iter); base_ptrs[test_iter] = malloc(sizeof(void*)*nproc); ARMCI_Malloc((void**)base_ptrs[test_iter], (test_iter % 4 == 0) ? 0 : DATA_SZ); } ARMCI_Barrier(); // Free all allocations for (test_iter = 0; test_iter < NUM_ITERATIONS; test_iter++) { if (rank == 0) printf(" + free %d\n", test_iter); ARMCI_Free(((void**)base_ptrs[test_iter])[rank]); free(base_ptrs[test_iter]); } free(base_ptrs); if (rank == 0) printf("Test complete: PASS.\n"); ARMCI_Finalize(); MPI_Finalize(); return 0; }
void test_notify(int ndim) { int lo[MAXDIMS], hi[MAXDIMS], count[MAXDIMS]; int stride[MAXDIMS]; int dim, elems; int i, Idx = 1, idx = 0; void *b[MAXPROC], *a[MAXPROC]; int left = (me + nproc - 1) % nproc; int right = (me + 1) % nproc; int loopcnt = 1, less = 2, strl; /* less>1 takes a partial plane */ /* create shared and local arrays */ create_array(b, sizeof(double), ndim, dimsB); create_array(a, sizeof(double), ndim, dimsB); elems = get_elems(ndim, stride, dimsB, sizeof(double)); init((double *)a[me], ndim, elems, dimsB); for (i = 0; i < ndim; i++) { lo[i] = 0; hi[i] = (less > dimsB[i]) ? dimsB[i] - 1 : dimsB[i] - less; count[i] = hi[i] - lo[i] + 1; } count[0] *= sizeof(double); for (i = 0; i < ndim - 1; i++) { Idx *= dimsB[i]; } ARMCI_Barrier(); if (me == 0) { printf("--------array[%d", dimsB[0]); for (dim = 1; dim < ndim; dim++) { printf(",%d", dimsB[dim]); } printf("]--------\n"); fflush(stdout); } ARMCI_Barrier(); loopcnt = (ndim > 1) ? dimsB[ndim-1] : 1; strl = (ndim > 1) ? ndim - 2 : 0; /* strides of the subpatch to transfer */ for (i = 0; i < loopcnt; i++) { int wc; if (me == 0) { ARMCI_PutS((double *)a[me] + idx, stride, (double *)b[left] + idx, stride, count, strl, left); #if DEBUG_ printf("%d-%d: ps=%p pd=%p i=%d idx=%d count=%d\n", me, left, (double *) a[me] + idx, (double *)b[left] + idx, i, idx, count[0]); fflush(stdout); #endif (void)armci_notify(left); (void)armci_notify_wait(right, &wc); } else { (void)armci_notify_wait(right, &wc); ARMCI_PutS((double *)b[me] + idx, stride, (double *)b[left] + idx, stride, count, strl, left); #if DEBUG_ printf("%d: ps=%p pd=%p i=%d idx=%d count=%d\n", me, (double *)b[me] + idx, (double *)b[left] + idx, i, idx, count[0]); fflush(stdout); #endif (void)armci_notify(left); } idx += Idx; /* advance to the next slab */ } ARMCI_Barrier(); if (me == 0) { compare_patches(0., ndim, (double *)a[0], lo, hi, dimsB, (double *)b[0], lo, hi, dimsB); printf("OK\n"); } ARMCI_Barrier(); destroy_array(b); destroy_array(a); }
int main(int argc, char **argv) { int me, nproc; int i, *procs; ARMCI_Group g_world, g_odd, g_even; MPI_Init(&argc, &argv); ARMCI_Init(); MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Comm_size(MPI_COMM_WORLD, &nproc); procs = malloc(sizeof(int) * ( nproc/2 + (nproc % 2 ? 1 : 0 ))); if (me == 0) printf("ARMCI Group test starting on %d procs\n", nproc); ARMCI_Group_get_world(&g_world); if (me == 0) printf(" + Creating odd group\n"); for (i = 1; i < nproc; i += 2) { procs[i/2] = i; } ARMCI_Group_create_child(i/2, procs, &g_odd, &g_world); if (me == 0) printf(" + Creating even group\n"); for (i = 0; i < nproc; i += 2) { procs[i/2] = i; } ARMCI_Group_create_child(i/2, procs, &g_even, &g_world); /***********************************************************************/ { int grp_me, grp_nproc; double t_abs_to_grp, t_grp_to_abs; const int iter = 1000000; if (me == 0) { ARMCI_Group_rank(&g_even, &grp_me); ARMCI_Group_size(&g_even, &grp_nproc); t_abs_to_grp = MPI_Wtime(); for (i = 0; i < iter; i++) ARMCII_Translate_absolute_to_group(&g_even, (grp_me+1) % grp_nproc); t_abs_to_grp = MPI_Wtime() - t_abs_to_grp; t_grp_to_abs = MPI_Wtime(); for (i = 0; i < iter; i++) ARMCI_Absolute_id(&g_even, (grp_me+1) % grp_nproc); t_grp_to_abs = MPI_Wtime() - t_grp_to_abs; printf("t_abs_to_grp = %f us, t_grp_to_abs = %f us\n", t_abs_to_grp/iter * 1.0e6, t_grp_to_abs/iter * 1.0e6); } ARMCI_Barrier(); } /***********************************************************************/ if (me == 0) printf(" + Freeing groups\n"); if (me % 2 > 0) ARMCI_Group_free(&g_odd); else ARMCI_Group_free(&g_even); free(procs); ARMCI_Finalize(); MPI_Finalize(); return 0; }
void destroy_array(void *ptr[]) { ARMCI_Barrier(); assert(!ARMCI_Free(ptr[me])); }
int main(int argc, char *argv[]) { int i, j, rank, nranks; int xdim, ydim; long bufsize; double **buffer; double t_start=0.0, t_stop=0.0; int count[2], src_stride, trg_stride, stride_level, peer; double expected, actual; int provided; MPI_Init_thread(&argc, &argv, MPI_THREAD_SINGLE, &provided); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nranks); if (nranks < 2) { printf("%s: Must be run with at least 2 processes\n", argv[0]); MPI_Abort(MPI_COMM_WORLD, 1); } ARMCI_Init_args(&argc, &argv); bufsize = MAX_XDIM * MAX_YDIM * sizeof(double); buffer = (double **) malloc(sizeof(double *) * nranks); ARMCI_Malloc((void **) buffer, bufsize); for(i=0; i< bufsize/sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } if(rank == 0) { printf("ARMCI_PutS Latency - local and remote completions - in usec \n"); printf("%30s %22s %22s\n", "Dimensions(array of doubles)", "Latency-LocalCompeltion", "Latency-RemoteCompletion"); fflush(stdout); } src_stride = MAX_YDIM*sizeof(double); trg_stride = MAX_YDIM*sizeof(double); stride_level = 1; ARMCI_Barrier(); for(xdim=1; xdim<=MAX_XDIM; xdim*=2) { count[1] = xdim; for(ydim=1; ydim<=MAX_YDIM; ydim*=2) { count[0] = ydim*sizeof(double); if(rank == 0) { peer = 1; for(i=0; i<ITERATIONS+SKIP; i++) { if(i == SKIP) t_start = MPI_Wtime(); ARMCI_PutS((void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride, count, stride_level, peer); } t_stop = MPI_Wtime(); ARMCI_Fence(peer); char temp[10]; sprintf(temp,"%dX%d", xdim, ydim); printf("%30s %20.2f", temp, ((t_stop-t_start)*1000000)/ITERATIONS); fflush(stdout); ARMCI_Barrier(); ARMCI_Barrier(); for(i=0; i<ITERATIONS+SKIP; i++) { if(i == SKIP) t_start = MPI_Wtime(); ARMCI_PutS((void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride, count, stride_level, peer); ARMCI_Fence(peer); } t_stop = MPI_Wtime(); printf("%20.2f \n", ((t_stop-t_start)*1000000)/ITERATIONS); fflush(stdout); ARMCI_Barrier(); ARMCI_Barrier(); } else { peer = 0; expected = (1.0 + (double) peer); ARMCI_Barrier(); if (rank == 1) { for(i=0; i<xdim; i++) { for(j=0; j<ydim; j++) { actual = *(buffer[rank] + i*MAX_YDIM + j); if(actual != expected) { printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n", i, j, expected, actual); fflush(stdout); ARMCI_Error("Bailing out", 1); } } } } for(i=0; i< bufsize/sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } ARMCI_Barrier(); ARMCI_Barrier(); if (rank == 1) { for(i=0; i<xdim; i++) { for(j=0; j<ydim; j++) { actual = *(buffer[rank] + i*MAX_YDIM + j); if(actual != expected) { printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n", i, j, expected, actual); fflush(stdout); ARMCI_Error("Bailing out", 1); } } } for(i=0; i< bufsize/sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } } ARMCI_Barrier(); } } } ARMCI_Barrier(); ARMCI_Free((void *) buffer[rank]); free(buffer); ARMCI_Finalize(); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int i, j, rank, nranks, peer; size_t xdim, ydim; unsigned long bufsize; double **buffer, *src_buf; double t_start=0.0, t_stop; int count[2], src_stride, trg_stride, stride_level; double scaling; int provided; MPI_Init_thread(&argc, &argv, MPI_THREAD_SINGLE, &provided); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nranks); if (nranks < 2) { printf("%s: Must be run with at least 2 processes\n", argv[0]); MPI_Abort(MPI_COMM_WORLD, 1); } ARMCI_Init_args(&argc, &argv); buffer = (double **) malloc(sizeof(double *) * nranks); bufsize = MAX_XDIM * MAX_YDIM * sizeof(double); ARMCI_Malloc((void **) buffer, bufsize); src_buf = ARMCI_Malloc_local(bufsize); if (rank == 0) { printf("ARMCI_AccS Latency - local and remote completions - in usec \n"); printf("%30s %22s %22s\n", "Dimensions(array of double)", "Local Completion", "Remote completion"); fflush(stdout); } ARMCI_Access_begin(buffer[rank]); for (i = 0; i < bufsize / sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; *(src_buf + i) = 1.0 + rank; } ARMCI_Access_end(buffer[rank]); scaling = 2.0; src_stride = MAX_YDIM * sizeof(double); trg_stride = MAX_YDIM * sizeof(double); stride_level = 1; ARMCI_Barrier(); for (xdim = 1; xdim <= MAX_XDIM; xdim *= 2) { count[1] = xdim; for (ydim = 1; ydim <= MAX_YDIM; ydim *= 2) { count[0] = ydim * sizeof(double); if (rank == 0) { peer = 1; for (i = 0; i < ITERATIONS + SKIP; i++) { if (i == SKIP) t_start = MPI_Wtime(); ARMCI_AccS(ARMCI_ACC_DBL, (void *) &scaling, /* (void *) buffer[rank] */ src_buf, &src_stride, (void *) buffer[peer], &trg_stride, count, stride_level, 1); } t_stop = MPI_Wtime(); ARMCI_Fence(1); char temp[10]; sprintf(temp, "%dX%d", (int) xdim, (int) ydim); printf("%30s %20.2f ", temp, ((t_stop - t_start) * 1000000) / ITERATIONS); fflush(stdout); ARMCI_Barrier(); ARMCI_Barrier(); for (i = 0; i < ITERATIONS + SKIP; i++) { if (i == SKIP) t_start = MPI_Wtime(); ARMCI_AccS(ARMCI_ACC_DBL, (void *) &scaling, /* (void *) buffer[rank] */ src_buf, &src_stride, (void *) buffer[peer], &trg_stride, count, stride_level, 1); ARMCI_Fence(1); } t_stop = MPI_Wtime(); printf("%20.2f \n", ((t_stop - t_start) * 1000000) / ITERATIONS); fflush(stdout); ARMCI_Barrier(); ARMCI_Barrier(); } else { peer = 0; ARMCI_Barrier(); if (rank == 1) { ARMCI_Access_begin(buffer[rank]); for (i = 0; i < xdim; i++) { for (j = 0; j < ydim; j++) { if (*(buffer[rank] + i * MAX_XDIM + j) != ((1.0 + rank) + scaling * (1.0 + peer) * (ITERATIONS + SKIP))) { printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n", i, j, ((1.0 + rank) + scaling * (1.0 + peer)), *(buffer[rank] + i * MAX_YDIM + j)); fflush(stdout); ARMCI_Error("Bailing out", 1); } } } for (i = 0; i < bufsize / sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } ARMCI_Access_end(buffer[rank]); } ARMCI_Barrier(); ARMCI_Barrier(); if (rank == 1) { ARMCI_Access_begin(buffer[rank]); for (i = 0; i < xdim; i++) { for (j = 0; j < ydim; j++) { if (*(buffer[rank] + i * MAX_XDIM + j) != ((1.0 + rank) + scaling * (1.0 + peer) * (ITERATIONS + SKIP))) { printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n", i, j, ((1.0 + rank) + scaling * (1.0 + peer)), *(buffer[rank] + i * MAX_YDIM + j)); fflush(stdout); ARMCI_Error("Bailing out", 1); } } } for (i = 0; i < bufsize / sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } ARMCI_Access_end(buffer[rank]); } ARMCI_Barrier(); } } } ARMCI_Barrier(); ARMCI_Free((void *) buffer[rank]); ARMCI_Free_local(src_buf); free(buffer); ARMCI_Finalize(); MPI_Finalize(); return 0; }
int main(int argc, char ** argv) { int rank, nproc, i, test_iter; int *my_data, *buf; void **base_ptrs; MPI_Init(&argc, &argv); ARMCI_Init(); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); if (rank == 0) printf("Starting ARMCI test with %d processes\n", nproc); buf = malloc(DATA_SZ); base_ptrs = malloc(sizeof(void*)*nproc); for (test_iter = 0; test_iter < NUM_ITERATIONS; test_iter++) { if (rank == 0) printf(" + iteration %d\n", test_iter); /*** Allocate the shared array ***/ ARMCI_Malloc(base_ptrs, DATA_SZ); my_data = base_ptrs[rank]; /*** Get from our right neighbor and verify correct data ***/ ARMCI_Access_begin(my_data); for (i = 0; i < DATA_NELTS; i++) my_data[i] = rank*test_iter; ARMCI_Access_end(my_data); ARMCI_Barrier(); // Wait for all updates to data to complete ARMCI_Get(base_ptrs[(rank+1) % nproc], buf, DATA_SZ, (rank+1) % nproc); for (i = 0; i < DATA_NELTS; i++) { if (buf[i] != ((rank+1) % nproc)*test_iter) { printf("%d: GET expected %d, got %d\n", rank, (rank+1) % nproc, buf[i]); MPI_Abort(MPI_COMM_WORLD, 1); } } ARMCI_Barrier(); // Wait for all gets to complete /*** Put to our left neighbor and verify correct data ***/ for (i = 0; i < DATA_NELTS; i++) buf[i] = rank*test_iter; ARMCI_Put(buf, base_ptrs[(rank+nproc-1) % nproc], DATA_SZ, (rank+nproc-1) % nproc); ARMCI_Barrier(); // Wait for all updates to data to complete ARMCI_Access_begin(my_data); for (i = 0; i < DATA_NELTS; i++) { if (my_data[i] != ((rank+1) % nproc)*test_iter) { printf("%d: PUT expected %d, got %d\n", rank, (rank+1) % nproc, my_data[i]); MPI_Abort(MPI_COMM_WORLD, 1); } } ARMCI_Access_end(my_data); ARMCI_Barrier(); // Wait for all gets to complete /*** Accumulate to our left neighbor and verify correct data ***/ for (i = 0; i < DATA_NELTS; i++) buf[i] = rank; ARMCI_Access_begin(my_data); for (i = 0; i < DATA_NELTS; i++) my_data[i] = rank; ARMCI_Access_end(my_data); ARMCI_Barrier(); int scale = test_iter; ARMCI_Acc(ARMCI_ACC_INT, &scale, buf, base_ptrs[(rank+nproc-1) % nproc], DATA_SZ, (rank+nproc-1) % nproc); ARMCI_Barrier(); // Wait for all updates to data to complete ARMCI_Access_begin(my_data); for (i = 0; i < DATA_NELTS; i++) { if (my_data[i] != rank + ((rank+1) % nproc)*test_iter) { printf("%d: ACC expected %d, got %d\n", rank, (rank+1) % nproc, my_data[i]); //MPI_Abort(MPI_COMM_WORLD, 1); } } ARMCI_Access_end(my_data); ARMCI_Free(my_data); } free(buf); free(base_ptrs); if (rank == 0) printf("Test complete: PASS.\n"); ARMCI_Finalize(); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { size_t i, rank, nranks, msgsize, dest; size_t iterations, max_msgsize; int bufsize; double **buffer; double t_start, t_stop, t_total, d_total; double expected, bandwidth; int provided; armci_hdl_t handle; max_msgsize = MAX_MSGSIZE; MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nranks); ARMCI_Init_args(&argc, &argv); bufsize = max_msgsize * ITERATIONS_LARGE; buffer = (double **) malloc(sizeof(double *) * nranks); ARMCI_Malloc((void **) buffer, bufsize); for (i = 0; i < bufsize / sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } ARMCI_INIT_HANDLE(&handle); ARMCI_SET_AGGREGATE_HANDLE(&handle); ARMCI_Barrier(); if (rank == 0) { printf("ARMCI_Put Bandwidth in MBPS \n"); printf("%20s %22s \n", "Message Size", "Bandwidth"); fflush(stdout); dest = 1; expected = 1 + dest; for (msgsize = sizeof(double); msgsize <= max_msgsize; msgsize *= 2) { if (msgsize <= 16 * 1024) iterations = ITERATIONS_VERYSMALL; else if (msgsize <= 64 * 1024) iterations = ITERATIONS_SMALL; else if (msgsize <= 512 * 1024) iterations = ITERATIONS_MEDIUM; else iterations = ITERATIONS_LARGE; t_start = MPI_Wtime(); for (i = 0; i < iterations; i++) { ARMCI_NbPut((void *) ((size_t) buffer[dest] + (size_t)(i * msgsize)), (void *) ((size_t) buffer[rank] + (size_t)(i * msgsize)), msgsize, dest, &handle); } ARMCI_Wait(&handle); t_stop = MPI_Wtime(); d_total = (iterations * msgsize) / (1024 * 1024); t_total = t_stop - t_start; bandwidth = d_total / t_total; printf("%20d %20.4lf \n", msgsize, bandwidth); fflush(stdout); ARMCI_Fence(dest); } } ARMCI_Barrier(); ARMCI_UNSET_AGGREGATE_HANDLE(&handle); ARMCI_Free((void *) buffer[rank]); ARMCI_Finalize(); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int ch; extern char *optarg; int i, j, r; thread_t threads[MAX_TPP]; /* init ARMCI */ ARMCI_Init_args(&argc, &argv); size = armci_msg_nproc(); rank = armci_msg_me(); while ((ch = getopt(argc, argv, "t:s:i:d:h")) != -1) { switch (ch) { case 't': /* # of threads */ tpp = atoi(optarg); if (tpp < 1 || tpp > MAX_TPP) { PRINTF0("\"%s\" is improper value for -t, should be a " "number between 1 and %d(MAX_TPP)\n", optarg, MAX_TPP); usage(); } break; case 'i': /* # of iterations */ iters = atoi(optarg); if (iters < 1) { PRINTF0("\"%s\" is improper value for -t, should be a " "number equal or larger than 1\n", optarg); usage(); } break; case 's': /* # of elements in the array */ asize = atoi(optarg); if (iters < 1) { PRINTF0("\"%s\" is improper value for -s, should be a " "number equal or larger than 1\n", optarg); usage(); } break; case 'd': delay = atoi(optarg); break; /* delay before start */ case 'h': usage(); break; /* print usage info */ } } #ifdef NOTHREADS tpp = 1; PRINTF0("Warning: NOTHREADS debug symbol is set -- running w/o threads\n"); #endif th_size = size * tpp; PRINTF0("\nTest of multi-threaded capabilities:\n" "%d threads per process (%d threads total),\n" "%d array elements of size %d,\n" "%d iteration(s)\n\n", tpp, th_size, asize, sizeof(atype_t), iters); if (delay) { printf("%d: %d\n", rank, getpid()); fflush(stdout); sleep(delay); ARMCI_Barrier(); } TH_INIT(size, tpp); for (i = 0; i < tpp; i++) { th_rank[i] = rank * tpp + i; } #if defined(DEBUG) && defined(LOG2FILE) for (i = 0; i < tpp; i++) { fname[10] = '0' + th_rank[i] / 100; fname[11] = '0' + th_rank[i] % 100 / 10; fname[12] = '0' + th_rank[i] % 10; dbg[i] = fopen(fname, "w"); } #endif for (i = 0; i < tpp; i++) { prndbg(i, "proc %d, thread %d(%d):\n", rank, i, th_rank[i]); } /* set global seed (to ensure same random sequence across procs) */ time_seed = (unsigned)time(NULL); armci_msg_brdcst(&time_seed, sizeof(time_seed), 0); srand(time_seed); rand(); prndbg(0, "seed = %u\n", time_seed); /* random pairs */ pairs = calloc(th_size, sizeof(int)); for (i = 0; i < th_size; i++) { pairs[i] = -1; } for (i = 0; i < th_size; i++) { if (pairs[i] != -1) { continue; } r = RND(0, th_size); while (i == r || pairs[r] != -1) { r = RND(0, th_size); } pairs[i] = r; pairs[r] = i; } for (i = 0, cbufl = 0; i < th_size; i++) cbufl += sprintf(cbuf + cbufl, " %d->%d|%d->%d", i, pairs[i], pairs[i], pairs[pairs[i]]); prndbg(0, "random pairs:%s\n", cbuf); /* random targets */ rnd_tgts = calloc(th_size, sizeof(int)); for (i = 0, cbufl = 0; i < th_size; i++) { rnd_tgts[i] = RND(0, th_size); if (rnd_tgts[i] == i) { i--; continue; } cbufl += sprintf(cbuf + cbufl, " %d", rnd_tgts[i]); } prndbg(0, "random targets:%s\n", cbuf); /* random one */ rnd_one = RND(0, th_size); prndbg(0, "random one = %d\n", rnd_one); assert(ptrs1 = calloc(th_size, sizeof(void *))); assert(ptrs2 = calloc(th_size, sizeof(void *))); #ifdef NOTHREADS thread_main((void *)(long)0); #else for (i = 0; i < tpp; i++) { THREAD_CREATE(threads + i, thread_main, (void *)(long)i); } for (i = 0; i < tpp; i++) { THREAD_JOIN(threads[i], NULL); } #endif ARMCI_Barrier(); PRINTF0("Tests Completed\n"); /* clean up */ #if defined(DEBUG) && defined(LOG2FILE) for (i = 0; i < tpp; i++) { fclose(dbg[i]); } #endif ARMCI_Finalize(); TH_FINALIZE(); armci_msg_finalize(); return 0; }
int main(int argc, char **argv) { int i, j, rank, nranks, msgsize; int *buffer; int provided; char op = '+'; MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nranks); ARMCI_Init_args(&argc, &argv); ARMCI_Barrier(); buffer = (int *) malloc(MAX_MSG_SIZE); for(i=0; i<MAX_MSG_SIZE/sizeof(int); i++) { if(rank == 0) buffer[i] = (2<<20 - 1); else buffer[i] = 0; } if(rank == 0) { printf("Testing functionality of ARMCI_Bcast \n"); fflush(stdout); } for(msgsize=sizeof(int); msgsize<=MAX_MSG_SIZE; msgsize*=2) { armci_msg_bcast(buffer, msgsize, 0); for(i=0; i<msgsize/sizeof(int); i++) { if(buffer[i] != (2<<20 - 1)) { printf("[%d] Validation failed for msg size: %d at index: %d expected: %d actual: %d \n", rank, msgsize, i, (2<<20 - 1), buffer[i]); fflush(stdout); exit(-1); } } for(i=0; i<MAX_MSG_SIZE/sizeof(int); i++) { if(rank == 0) buffer[i] = (2<<20 - 1); else buffer[i] = 0; } ARMCI_Barrier(); if(rank == 0) { printf("Validation successful for msg size: %d\n", msgsize); fflush(stdout); } } free(buffer); ARMCI_Finalize(); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int i, j, rank, nranks, msgsize, dest; int dim, iterations; long bufsize; double **buffer; double t_start, t_stop, t_total, d_total, bw; int count[2], src_stride, trg_stride, stride_level; int provided; armci_hdl_t handle; MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nranks); ARMCI_Init_args(&argc, &argv); bufsize = MAX_DIM * MAX_DIM * sizeof(double); buffer = (double **) malloc(sizeof(double *) * nranks); ARMCI_Malloc((void **) buffer, bufsize); for (i = 0; i < bufsize / sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } ARMCI_INIT_HANDLE(&handle); ARMCI_SET_AGGREGATE_HANDLE(&handle); ARMCI_Barrier(); if (rank == 0) { printf("ARMCI_PutS Bandwidth in MBPS \n"); printf("%30s %22s \n", "Dimensions(array of doubles)", "Latency"); fflush(stdout); dest = 1; src_stride = MAX_DIM * sizeof(double); trg_stride = MAX_DIM * sizeof(double); stride_level = 1; for (dim = 1; dim <= MAX_DIM; dim *= 2) { count[0] = dim*sizeof(double); count[1] = dim; iterations = 10*(MAX_DIM * MAX_DIM)/(dim * dim); t_start = MPI_Wtime(); for (i = 0; i < iterations; i++) { ARMCI_NbPutS((void *) buffer[rank], &src_stride, (void *) buffer[dest], &trg_stride, count, stride_level, dest, &handle); } ARMCI_Wait(&handle); t_stop = MPI_Wtime(); ARMCI_Fence(1); char temp[10]; sprintf(temp, "%dX%d", dim, dim); t_total = t_stop - t_start; d_total = (dim*dim*sizeof(double)*iterations)/(1024*1024); bw = d_total/t_total; printf("%30s %20.2f \n", temp, bw); fflush(stdout); } } ARMCI_Barrier(); ARMCI_UNSET_AGGREGATE_HANDLE(&handle); ARMCI_Free((void *) buffer[rank]); ARMCI_Finalize(); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int i, j, rank, nranks, peer, bufsize, errors, total_errors; double **buf_bvec, **src_bvec, *src_buf; int count[2], src_stride, trg_stride, stride_level; double scaling, time; MPI_Init(&argc, &argv); ARMCI_Init(); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nranks); buf_bvec = (double **) malloc(sizeof(double *) * nranks); src_bvec = (double **) malloc(sizeof(double *) * nranks); bufsize = XDIM * YDIM * sizeof(double); ARMCI_Malloc((void **) buf_bvec, bufsize); ARMCI_Malloc((void **) src_bvec, bufsize); src_buf = src_bvec[rank]; if (rank == 0) printf("ARMCI Strided DLA Accumulate Test:\n"); ARMCI_Access_begin(buf_bvec[rank]); ARMCI_Access_begin(src_buf); for (i = 0; i < XDIM*YDIM; i++) { *(buf_bvec[rank] + i) = 1.0 + rank; *(src_buf + i) = 1.0 + rank; } ARMCI_Access_end(src_buf); ARMCI_Access_end(buf_bvec[rank]); scaling = 2.0; src_stride = XDIM * sizeof(double); trg_stride = XDIM * sizeof(double); stride_level = 1; count[1] = YDIM; count[0] = XDIM * sizeof(double); ARMCI_Barrier(); time = MPI_Wtime(); peer = (rank+1) % nranks; for (i = 0; i < ITERATIONS; i++) { ARMCI_AccS(ARMCI_ACC_DBL, (void *) &scaling, src_buf, &src_stride, (void *) buf_bvec[peer], &trg_stride, count, stride_level, peer); } ARMCI_Barrier(); time = MPI_Wtime() - time; if (rank == 0) printf("Time: %f sec\n", time); ARMCI_Access_begin(buf_bvec[rank]); for (i = errors = 0; i < XDIM; i++) { for (j = 0; j < YDIM; j++) { const double actual = *(buf_bvec[rank] + i + j*XDIM); const double expected = (1.0 + rank) + scaling * (1.0 + ((rank+nranks-1)%nranks)) * (ITERATIONS); if (actual - expected > 1e-10) { printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n", rank, j, i, expected, actual); errors++; fflush(stdout); } } } ARMCI_Access_end(buf_bvec[rank]); MPI_Allreduce(&errors, &total_errors, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); ARMCI_Free((void *) buf_bvec[rank]); ARMCI_Free((void *) src_bvec[rank]); free(buf_bvec); free(src_bvec); ARMCI_Finalize(); MPI_Finalize(); if (total_errors == 0) { if (rank == 0) printf("Success.\n"); return 0; } else { if (rank == 0) printf("Fail.\n"); return 1; } }
int main(int argc, char *argv[]) { int rank, nranks; size_t i, msgsize, dest; size_t iterations, max_msgsize; int bufsize; double **buffer; double t_start, t_stop, t_total, d_total; double expected, bandwidth; int provided; armci_hdl_t handle; MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nranks); max_msgsize = MAX_MSGSIZE; ARMCI_Init_args(&argc, &argv); bufsize = max_msgsize * ITERATIONS; buffer = (double **) malloc(sizeof(double *) * nranks); ARMCI_Malloc((void **) buffer, bufsize); for (i = 0; i < bufsize / sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } ARMCI_INIT_HANDLE(&handle); ARMCI_SET_AGGREGATE_HANDLE(&handle); ARMCI_Barrier(); if (rank == 0) { printf("ARMCI_Get Bandwidth in MBPS \n"); printf("%20s %22s \n", "Message Size", "Bandwidth"); fflush(stdout); dest = 1; expected = 1 + dest; for (msgsize = sizeof(double); msgsize <= max_msgsize; msgsize *= 2) { iterations = bufsize/msgsize; t_start = MPI_Wtime(); for (i = 0; i < iterations; i++) { ARMCI_NbGet((void *) ((size_t) buffer[dest] + (size_t)(i * msgsize)), (void *) ((size_t) buffer[rank] + (size_t)(i * msgsize)), msgsize, dest, &handle); } ARMCI_Wait(&handle); t_stop = MPI_Wtime(); d_total = (iterations * msgsize) / (1024 * 1024); t_total = t_stop - t_start; bandwidth = d_total / t_total; printf("%20d %20.4lf \n", msgsize, bandwidth); fflush(stdout); #ifdef DATA_VALIDATION { for(j=0; j<((iterations*msgsize)/sizeof(double)); j++) { if(*(buffer[rank] + j) != expected) { printf("Data validation failed At displacement : %d Expected : %lf Actual : %lf \n", j, expected, *(buffer[rank] + j)); fflush(stdout); return -1; } } for(j=0; j<bufsize/sizeof(double); j++) { *(buffer[rank] + j) = 1.0 + rank; } } #endif } } ARMCI_Barrier(); ARMCI_UNSET_AGGREGATE_HANDLE(&handle); ARMCI_Free((void *) buffer[rank]); ARMCI_Finalize(); MPI_Finalize(); return 0; }
void test_aggregate(int dryrun) { int i, j, rc, bytes, elems[2] = {MAXPROC, MAXELEMS}; double *ddst_put[MAXPROC]; double *ddst_get[MAXPROC]; double *dsrc[MAXPROC]; armci_hdl_t aggr_hdl_put[MAXPROC]; armci_hdl_t aggr_hdl_get[MAXPROC]; armci_hdl_t hdl_put[MAXELEMS]; armci_hdl_t hdl_get[MAXELEMS]; armci_giov_t darr; void *src_ptr[MAX_REQUESTS], *dst_ptr[MAX_REQUESTS]; int start = 0, end = 0; double start_time; create_array(ddst_put, 2, elems); create_array(ddst_get, 2, elems); create_array(dsrc, 1, &elems[1]); for (i = 0; i < elems[1]; i++) { dsrc[me][i] = i * 1.001 * (me + 1); } for (i = 0; i < elems[0]*elems[1]; i++) { ddst_put[me][i] = 0.0; ddst_get[me][i] = 0.0; } ARMCI_Barrier(); /* only proc 0 does the work */ if (me == 0) { if (!dryrun) { printf("Transferring %d doubles (Not an array of %d doubles)\n", MAXELEMS, MAXELEMS); } /* initializing non-blocking handles */ for (i = 0; i < elems[1]; i++) { ARMCI_INIT_HANDLE(&hdl_put[i]); } for (i = 0; i < elems[1]; i++) { ARMCI_INIT_HANDLE(&hdl_get[i]); } /* aggregate handles */ for (i = 0; i < nproc; i++) { ARMCI_INIT_HANDLE(&aggr_hdl_put[i]); } for (i = 0; i < nproc; i++) { ARMCI_INIT_HANDLE(&aggr_hdl_get[i]); } for (i = 0; i < nproc; i++) { ARMCI_SET_AGGREGATE_HANDLE(&aggr_hdl_put[i]); } for (i = 0; i < nproc; i++) { ARMCI_SET_AGGREGATE_HANDLE(&aggr_hdl_get[i]); } bytes = sizeof(double); /* **************** PUT **************** */ /* register put */ start_time = armci_timer(); start = 0; end = elems[1]; for (i = 1; i < nproc; i++) { for (j = start; j < end; j++) { ARMCI_NbPutValueDouble(dsrc[me][j], &ddst_put[i][me*elems[1] + j], i, &hdl_put[j]); } for (j = start; j < end; j++) { ARMCI_Wait(&hdl_put[j]); } } if (!dryrun) { printf("%d: Value Put time = %.2es\n", me, armci_timer() - start_time); } /* vector put */ start_time = armci_timer(); for (i = 1; i < nproc; i++) { for (j = start; j < end; j++) { src_ptr[j] = (void *)&dsrc[me][j]; dst_ptr[j] = (void *)&ddst_put[i][me*elems[1] + j]; } darr.src_ptr_array = src_ptr; darr.dst_ptr_array = dst_ptr; darr.bytes = sizeof(double); darr.ptr_array_len = elems[1]; if ((rc = ARMCI_NbPutV(&darr, 1, i, &hdl_put[i]))) { ARMCI_Error("armci_nbputv failed\n", rc); } } for (i = 1; i < nproc; i++) { ARMCI_Wait(&hdl_put[i]); } if (!dryrun) { printf("%d: Vector Put time = %.2es\n", me, armci_timer() - start_time); } /* regular put */ start_time = armci_timer(); for (i = 1; i < nproc; i++) { for (j = start; j < end; j++) { if ((rc = ARMCI_NbPut(&dsrc[me][j], &ddst_put[i][me*elems[1] + j], bytes, i, &hdl_put[j]))) { ARMCI_Error("armci_nbput failed\n", rc); } } for (j = start; j < end; j++) { ARMCI_Wait(&hdl_put[j]); } } if (!dryrun) { printf("%d: Regular Put time = %.2es\n", me, armci_timer() - start_time); } /* aggregate put */ start_time = armci_timer(); for (i = 1; i < nproc; i++) { for (j = start; j < end; j++) { if ((rc = ARMCI_NbPut(&dsrc[me][j], &ddst_put[i][me*elems[1] + j], bytes, i, &aggr_hdl_put[i]))) { ARMCI_Error("armci_nbput failed\n", rc); } } } for (i = 1; i < nproc; i++) { ARMCI_Wait(&aggr_hdl_put[i]); } if (!dryrun) { printf("%d: Aggregate Put time = %.2es\n\n", me, armci_timer() - start_time); } /* **************** GET **************** */ /* vector get */ start_time = armci_timer(); for (i = 1; i < nproc; i++) { for (j = start; j < end; j++) { src_ptr[j] = (void *)&dsrc[i][j]; dst_ptr[j] = (void *)&ddst_get[me][i*elems[1] + j]; } darr.src_ptr_array = src_ptr; darr.dst_ptr_array = dst_ptr; darr.bytes = sizeof(double); darr.ptr_array_len = elems[1]; if ((rc = ARMCI_NbGetV(&darr, 1, i, &hdl_get[i]))) { ARMCI_Error("armci_nbgetv failed\n", rc); } ARMCI_Wait(&hdl_get[i]); } if (!dryrun) { printf("%d: Vector Get time = %.2es\n", me, armci_timer() - start_time); } /* regular get */ start_time = armci_timer(); for (i = 1; i < nproc; i++) { for (j = start; j < end; j++) { if ((rc = ARMCI_NbGet(&dsrc[i][j], &ddst_get[me][i*elems[1] + j], bytes, i, &hdl_get[j]))) { ARMCI_Error("armci_nbget failed\n", rc); } } for (j = start; j < end; j++) { ARMCI_Wait(&hdl_get[j]); } } if (!dryrun) { printf("%d: Regular Get time = %.2es\n", me, armci_timer() - start_time); } /* aggregate get */ start_time = armci_timer(); for (i = 1; i < nproc; i++) { for (j = start; j < end; j++) { ARMCI_NbGet(&dsrc[i][j], &ddst_get[me][i*elems[1] + j], bytes, i, &aggr_hdl_get[i]); } } for (i = 1; i < nproc; i++) { ARMCI_Wait(&aggr_hdl_get[i]); } if (!dryrun) { printf("%d: Aggregate Get time = %.2es\n", me, armci_timer() - start_time); } } ARMCI_Barrier(); ARMCI_AllFence(); ARMCI_Barrier(); /* Verify */ if (!(me == 0)) for (j = 0; j < elems[1]; j++) { if (ARMCI_ABS(ddst_put[me][j] - j * 1.001) > 0.1) { ARMCI_Error("aggregate put failed...1", 0); } } ARMCI_Barrier(); if (!dryrun)if (me == 0) { printf("\n aggregate put ..O.K.\n"); } fflush(stdout); if (me == 0) { for (i = 1; i < nproc; i++) { for (j = 0; j < elems[1]; j++) { if (ARMCI_ABS(ddst_get[me][i*elems[1] + j] - j * 1.001 *(i + 1)) > 0.1) { ARMCI_Error("aggregate get failed...1", 0); } } } } ARMCI_Barrier(); if (!dryrun)if (me == 0) { printf(" aggregate get ..O.K.\n"); } fflush(stdout); ARMCI_AllFence(); ARMCI_Barrier(); if (!dryrun)if (me == 0) { printf("O.K.\n"); fflush(stdout); } destroy_array(ddst_put); destroy_array(ddst_get); destroy_array(dsrc); }
static void contig_test(size_t buffer_size, int op) { void **dst_ptr; void **put_buf; void **get_buf; double *times; dst_ptr = (void*)malloc(nproc * sizeof(void*)); put_buf = (void*)malloc(nproc * sizeof(void*)); get_buf = (void*)malloc(nproc * sizeof(void*)); times = (double*)malloc(nproc * sizeof(double)); ARMCI_Malloc(dst_ptr, buffer_size); ARMCI_Malloc(put_buf, buffer_size); ARMCI_Malloc(get_buf, buffer_size); /* initialize what we're putting */ fill_array((double*)put_buf[me], buffer_size/sizeof(double), me); size_t msg_size; int dst = 1; double scale = 1.0; for (msg_size = 16; msg_size <= buffer_size; msg_size *= 2) { int j; int iter = msg_size > MEDIUM_MESSAGE_SIZE ? ITER_LARGE : ITER_SMALL; double t_start, t_end; if (0 == me) { for (j= 0; j < iter + WARMUP; ++j) { if (WARMUP == j) { t_start = dclock(); } switch (op) { case PUT: ARMCI_Put(put_buf[me], dst_ptr[dst], msg_size, dst); break; case GET: ARMCI_Get(dst_ptr[dst], get_buf[me], msg_size, dst); break; case ACC: ARMCI_Acc(ARMCI_ACC_DBL, &scale, put_buf[me], dst_ptr[dst], msg_size, dst); break; default: ARMCI_Error("oops", 1); } } } /* calculate total time and average time */ t_end = dclock(); ARMCI_Barrier(); if (0 == me) { printf("%8zu\t\t%6.2f\t\t%10.2f\n", msg_size, ((t_end - t_start))/iter, msg_size*iter/((t_end - t_start))); } } ARMCI_Free(dst_ptr[me]); ARMCI_Free(put_buf[me]); ARMCI_Free(get_buf[me]); free(dst_ptr); free(put_buf); free(get_buf); free(times); }
int main(int argc, char **argv) { int i, j, rank, nranks, peer, bufsize, errors; double **buffer, *src_buf; int count[2], src_stride, trg_stride, stride_level; MPI_Init(&argc, &argv); ARMCI_Init(); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nranks); buffer = (double **) malloc(sizeof(double *) * nranks); bufsize = XDIM * YDIM * sizeof(double); ARMCI_Malloc((void **) buffer, bufsize); src_buf = ARMCI_Malloc_local(bufsize); if (rank == 0) printf("ARMCI Strided Put Test:\n"); src_stride = XDIM * sizeof(double); trg_stride = XDIM * sizeof(double); stride_level = 1; count[1] = YDIM; count[0] = XDIM * sizeof(double); ARMCI_Barrier(); peer = (rank+1) % nranks; for (i = 0; i < ITERATIONS; i++) { for (j = 0; j < XDIM*YDIM; j++) { *(src_buf + j) = rank + i; } ARMCI_PutS( src_buf, &src_stride, (void *) buffer[peer], &trg_stride, count, stride_level, peer); } ARMCI_Barrier(); ARMCI_Access_begin(buffer[rank]); for (i = errors = 0; i < XDIM; i++) { for (j = 0; j < YDIM; j++) { const double actual = *(buffer[rank] + i + j*XDIM); const double expected = (1.0 + rank) + (1.0 + ((rank+nranks-1)%nranks)) + (ITERATIONS); if (actual - expected > 1e-10) { printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n", rank, j, i, expected, actual); errors++; fflush(stdout); } } } ARMCI_Access_end(buffer[rank]); ARMCI_Free((void *) buffer[rank]); ARMCI_Free_local(src_buf); free(buffer); ARMCI_Finalize(); MPI_Finalize(); if (errors == 0) { printf("%d: Success\n", rank); return 0; } else { printf("%d: Fail\n", rank); return 1; } }
int main(int argc, char *argv[]) { int rc, i, j = 0, rid, ret; armci_ckpt_ds_t ckptds; ARMCI_Group grp; ARMCI_Init_args(&argc, &argv); nproc = armci_msg_nproc(); me = armci_msg_me(); if (me == 0) { if (nproc > MAXPROCS) { ARMCI_Error("nproc > MAXPROCS", nproc); } else { printf("ARMCI test program (%d processes)\n", nproc); fflush(stdout); sleep(1); } } armci_init_checkpoint2(); ARMCI_Group_get_world(&grp); size = SIZE_; rc = ARMCI_Malloc((void **)ptr_arr, size * 8); printf("ARMCI test program (%d processes)\n", nproc); fflush(stdout); for (size = 1; size <= SIZE_; size *= 2) { t1 = MPI_Wtime(); for (i = 0; i < 5; i++) { for (rc = 0; rc < 15; rc++) { do_work(size); } } time_array[j++] = MPI_Wtime() - t1; ARMCI_Barrier(); printf("%d:done for size %ld\n", me, size); fflush(stdout); } (void)ARMCI_Ckpt_create_ds(&ckptds, 1); ckptds.ptr_arr[0] = ptr_arr[me]; ckptds.sz[0] = SIZE_ * 8; rid = ARMCI_Ckpt_init(NULL, &grp, 1, 0, &ckptds); printf("%d: After ARMCI_Ckpt_init(): \n", me); j = 0; for (size = 128; size <= SIZE_; size *= 2) { int rc; int simulate_restart = 1; t1 = MPI_Wtime(); ret = ARMCI_Ckpt(rid); if (ret == ARMCI_CKPT) { printf("%d: Performed CHECKPOINT @ size=%ld\n", me, size); } else if (ret == ARMCI_RESTART) { simulate_restart = 0; printf("%d: Performed RESTART @ size=%ld\n", me, size); } for (i = 0; i < 5; i++) { for (rc = 0; rc < 15; rc++) if (i == 3 && rc == 10) { } do_work(size); } time_array1[j++] = MPI_Wtime() - t1; sleep(1); if (simulate_restart && size == FAILURE_SIZE_) { printf("%d: Simulating FAILURE @ size = %d\n", me, size); ARMCI_Restart_simulate(rid, 1); } printf("%d: DONE for size=%ld regular=%f withckpt=%f\n\n", me, size, time_array[j-1], time_array1[j-1]); fflush(stdout); } ARMCI_Ckpt_finalize(rid); printf("Before Finalize()\n"); ARMCI_Barrier(); ARMCI_Finalize(); armci_msg_finalize(); return(0); }