uint64_t accumulate (size_t sndlen, int mytask, int origin, int target) { TRACE_ERR((stderr, "(%u) Do test ... sndlen = %zu\n", mytask, sndlen)); unsigned i; uint64_t t1 = GetTimeBase(); double scale = 1.0; if (mytask == origin) { for (i = 0; i < ITERATIONS; i++) { //fprintf(stderr, "(%u) Starting Iteration %d of size %zu dstaddr %p\n", mytask, i, sndlen, rcvbuf); A1_Get ( target, srcbuf, dstbuf, sndlen * sizeof(double) ); A1_Flush (target); } } A1_AllFence(); MPI_Barrier(MPI_COMM_WORLD); uint64_t t2 = GetTimeBase(); return ((t2 - t1) / ITERATIONS); }
void test_rmw (int mytask, int origin, int ntasks) { TRACE_ERR((stderr, "(%u) Do test ... \n", mytask)); int i = 0; int *outbuf = (int *) malloc (2*sizeof(int)); outbuf[0] = mytask; outbuf[1] = origin; int target = origin+1; if (target >= ntasks) target = 0; A1_AllFence(); if (mytask == origin) { for (i = 0; i < ntasks-1; i++) { A1_Rmw ( target, &outbuf[0], &outbuf[1], &outbuf[0], sizeof(int), A1_SWAP, A1_INT32 ); A1_Flush (target); target ++; outbuf[0] = outbuf[1]; //printf ("%d: current swap %d\n", i, outbuf[0]); } } A1_AllFence(); MPI_Barrier(MPI_COMM_WORLD); printf ("%d: My new task id %d\n", mytask, outbuf[0]); }
int main() { size_t i, rank, nranks, msgsize, dest; size_t iterations, max_msgsize; int bufsize; double **buffer; double t_start, t_stop, t_total, d_total; double expected, bandwidth; A1_handle_t a1_handle; max_msgsize = MAX_MSGSIZE; A1_Initialize(A1_THREAD_SINGLE); rank = A1_Process_id(A1_GROUP_WORLD); nranks = A1_Process_total(A1_GROUP_WORLD); bufsize = max_msgsize * ITERATIONS; buffer = (double **) malloc(sizeof(double *) * nranks); A1_Alloc_segment((void **) &(buffer[rank]), bufsize); A1_Exchange_segments(A1_GROUP_WORLD, (void **) buffer); for (i = 0; i < bufsize / sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } A1_Allocate_handle(&a1_handle); A1_Barrier_group(A1_GROUP_WORLD); if (rank == 0) { printf("A1_Put Bandwidth in MBPS \n"); printf("%20s %22s \n", "Message Size", "Bandwidth"); fflush(stdout); dest = 1; expected = 1 + dest; for (msgsize = sizeof(double); msgsize <= max_msgsize; msgsize *= 2) { iterations = bufsize/msgsize; t_start = A1_Time_seconds(); for (i = 0; i < iterations; i++) { A1_NbPut(dest, (void *) ((size_t) buffer[dest] + (size_t)(i * msgsize)), (void *) ((size_t) buffer[rank] + (size_t)(i * msgsize)), msgsize, a1_handle); } A1_Wait_handle(a1_handle); t_stop = A1_Time_seconds(); d_total = (iterations * msgsize) / (1024 * 1024); t_total = t_stop - t_start; bandwidth = d_total / t_total; printf("%20d %20.4lf \n", msgsize, bandwidth); fflush(stdout); A1_Flush(dest); } } A1_Barrier_group(A1_GROUP_WORLD); A1_Release_handle(a1_handle); A1_Release_segments(A1_GROUP_WORLD, buffer[rank]); A1_Finalize(); return 0; }
int main() { int i, j, rank, nranks, msgsize; int xdim, ydim; long bufsize; double **buffer; double t_start, t_stop, t_latency; int count[2], src_stride, trg_stride, stride_level, peer; double expected, actual; A1_Initialize(A1_THREAD_SINGLE); rank = A1_Process_id(A1_GROUP_WORLD); nranks = A1_Process_total(A1_GROUP_WORLD); buffer = (double **) malloc (sizeof(double *) * nranks); A1_Barrier_group(A1_GROUP_WORLD); bufsize = MAX_XDIM * MAX_YDIM * sizeof(double); A1_Alloc_segment((void **) &(buffer[rank]), bufsize); A1_Exchange_segments(A1_GROUP_WORLD, (void **) buffer); for(i=0; i< bufsize/sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } if(rank == 0) { printf("A1_PutS Latency - local and remote completions - in usec \n"); printf("%30s %22s \n", "Dimensions(array of doubles)", "Latency-LocalCompeltion", "Latency-RemoteCompletion"); fflush(stdout); } src_stride = MAX_YDIM*sizeof(double); trg_stride = MAX_YDIM*sizeof(double); stride_level = 1; for(xdim=1; xdim<=MAX_XDIM; xdim*=2) { count[1] = xdim; for(ydim=1; ydim<=MAX_YDIM; ydim*=2) { count[0] = ydim*sizeof(double); if(rank == 0) { peer = 1; for(i=0; i<ITERATIONS+SKIP; i++) { if(i == SKIP) t_start = A1_Time_seconds(); A1_PutS(peer, stride_level, count, (void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride); } t_stop = A1_Time_seconds(); A1_Flush(peer); char temp[10]; sprintf(temp,"%dX%d", xdim, ydim); printf("%30s %20.2f", temp, ((t_stop-t_start)*1000000)/ITERATIONS); fflush(stdout); A1_Barrier_group(A1_GROUP_WORLD); for(i=0; i<ITERATIONS+SKIP; i++) { if(i == SKIP) t_start = A1_Time_seconds(); A1_PutS(peer, stride_level, count, (void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride); A1_Flush(peer); } t_stop = A1_Time_seconds(); printf("%20.2f \n", ((t_stop-t_start)*1000000)/ITERATIONS); fflush(stdout); A1_Barrier_group(A1_GROUP_WORLD); } else if(rank == 1) { peer = 0; expected = (1.0 + (double) peer); A1_Barrier_group(A1_GROUP_WORLD); for(i=0; i<xdim; i++) { for(j=0; j<ydim; j++) { actual = *(buffer[rank] + i*MAX_YDIM + j); if(actual != expected) { printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n", i, j, expected, actual); fflush(stdout); return -1; } } } for(i=0; i< bufsize/sizeof(double); i++) *(buffer[rank] + i) = 1.0 + rank; A1_Barrier_group(A1_GROUP_WORLD); for(i=0; i<xdim; i++) { for(j=0; j<ydim; j++) { actual = *(buffer[rank] + i*MAX_YDIM + j); if(actual != expected) { printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n", i, j, expected, actual); fflush(stdout); return -1; } } } for(i=0; i< bufsize/sizeof(double); i++) *(buffer[rank] + i) = 1.0 + rank; A1_Barrier_group(A1_GROUP_WORLD); } } } A1_Barrier_group(A1_GROUP_WORLD); A1_Release_segments(A1_GROUP_WORLD, (void *) buffer[rank]); A1_Free_segment((void *) buffer[rank]); A1_Finalize(); return 0; }