int main (int argc, char ** argv) { int my_task, origin_task, target_task, num_tasks; MPI_Init (&argc, &argv); A1_Initialize(0); origin_task = 0; srcbuf = (double *) memalign (64, BUFSIZE); dstbuf = (double *) memalign (64, BUFSIZE); MPI_Comm_size(MPI_COMM_WORLD, &num_tasks); MPI_Comm_rank(MPI_COMM_WORLD, &my_task); target_task = num_tasks-1; /* Display some test header information */ if (my_task == origin_task) { char str[3][1024]; int index[3]; index[0] = 0; index[1] = 0; index[2] = 0; index[0] += sprintf (&str[0][index[0]], "# get "); index[1] += sprintf (&str[1][index[1]], "# "); index[2] += sprintf (&str[2][index[2]], "# bytes "); index[2] += sprintf (&str[2][index[2]], " usec"); fprintf (stdout, "#\n"); fprintf (stdout, "%s\n", str[0]); fprintf (stdout, "%s\n", str[1]); fprintf (stdout, "%s\n", str[2]); fflush (stdout); } double usec; char str[10240]; size_t sndlen = 1; int index = 0; for (; sndlen <= BUFSIZE/sizeof(double); sndlen = sndlen * 2) { index += sprintf (&str[index], "%10zd ", sndlen*sizeof(double)); #ifdef WARMUP accumulate (sndlen, my_task, origin_task, target_task); #endif usec = ((double)accumulate (sndlen, my_task, origin_task, target_task)) / 1600.0; index += sprintf (&str[index], "%6.3f \n", usec); } if (my_task == origin_task) fprintf (stdout, "%s\n", str); A1_Finalize(); MPI_Finalize(); return 0; }
int main (int argc, char ** argv) { MPI_Init (&argc, &argv); A1_Initialize(0); origin_task = 0; MPI_Comm_size(MPI_COMM_WORLD, &num_tasks); MPI_Comm_rank(MPI_COMM_WORLD, &my_task); test_rmw (my_task, origin_task, num_tasks); A1_Finalize(); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { /* initialize A1 */ A1_Initialize(A1_THREAD_SINGLE); me = A1_Process_id(A1_GROUP_WORLD); nproc = A1_Process_total(A1_GROUP_WORLD); if(nproc < 2 || nproc> MAXPROC) { if(me == 0) fprintf(stderr, "USAGE: 2 <= processes <= %d - got %d\n", MAXPROC, nproc); A1_Barrier_group(A1_GROUP_WORLD); exit(0); } if(!me)printf("\n Performance of Basic Blocking Communication Operations\n"); A1_Barrier_group(A1_GROUP_WORLD); CHECK_RESULT=1; test_1D(); CHECK_RESULT=0; /* warmup run */ /* test 1 dimension array */ if(!me)printf("\n\t\t\tContiguous Data Transfer\n"); test_1D(); /* test 1 dimension array */ if(!me)printf("\n\t\t\tContiguous Data Transfer - Remote completion\n"); test_1D_remote(); /* test 2 dimension array */ if(!me)printf("\n\t\t\tStrided Data Transfer\n"); test_2D(); /* test 2 dimension array */ if(!me)printf("\n\t\t\tStrided Data Transfer - Remote completion\n"); test_2D_remote(); A1_Barrier_group(A1_GROUP_WORLD); if(me == 0){ if(warn_accuracy) printf("\nWARNING: Your timer does not have sufficient accuracy for this test (%d)\n",warn_accuracy); printf("\n\n------------ Now we test the same data transfer for correctness ----------\n"); fflush(stdout); } A1_Barrier_group(A1_GROUP_WORLD); CHECK_RESULT=1; if(!me)printf("\n\t\t\tContiguous Data Transfer\n"); test_1D(); if(me == 0) printf("OK\n"); A1_Barrier_group(A1_GROUP_WORLD); if(!me)printf("\n\t\t\tStrided Data Transfer\n"); test_2D(); if(me == 0) printf("OK\n\n\nTests Completed.\n"); A1_Barrier_group(A1_GROUP_WORLD); /* done */ A1_Finalize(); return(0); }
int main() { size_t i, rank, nranks, msgsize, dest; size_t iterations, max_msgsize; int bufsize; double **buffer; double t_start, t_stop, t_total, d_total; double expected, bandwidth; A1_handle_t a1_handle; max_msgsize = MAX_MSGSIZE; A1_Initialize(A1_THREAD_SINGLE); rank = A1_Process_id(A1_GROUP_WORLD); nranks = A1_Process_total(A1_GROUP_WORLD); bufsize = max_msgsize * ITERATIONS; buffer = (double **) malloc(sizeof(double *) * nranks); A1_Alloc_segment((void **) &(buffer[rank]), bufsize); A1_Exchange_segments(A1_GROUP_WORLD, (void **) buffer); for (i = 0; i < bufsize / sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } A1_Allocate_handle(&a1_handle); A1_Barrier_group(A1_GROUP_WORLD); if (rank == 0) { printf("A1_Put Bandwidth in MBPS \n"); printf("%20s %22s \n", "Message Size", "Bandwidth"); fflush(stdout); dest = 1; expected = 1 + dest; for (msgsize = sizeof(double); msgsize <= max_msgsize; msgsize *= 2) { iterations = bufsize/msgsize; t_start = A1_Time_seconds(); for (i = 0; i < iterations; i++) { A1_NbPut(dest, (void *) ((size_t) buffer[dest] + (size_t)(i * msgsize)), (void *) ((size_t) buffer[rank] + (size_t)(i * msgsize)), msgsize, a1_handle); } A1_Wait_handle(a1_handle); t_stop = A1_Time_seconds(); d_total = (iterations * msgsize) / (1024 * 1024); t_total = t_stop - t_start; bandwidth = d_total / t_total; printf("%20d %20.4lf \n", msgsize, bandwidth); fflush(stdout); A1_Flush(dest); } } A1_Barrier_group(A1_GROUP_WORLD); A1_Release_handle(a1_handle); A1_Release_segments(A1_GROUP_WORLD, buffer[rank]); A1_Finalize(); return 0; }
int main(int argc, char* argv[]) { int provided; int i, rank, nranks, msgsize, target; long bufsize; int **counter; int *complete; int increment; int counter_fetch; int counters_received; int t_start, t_stop, t_latency; int expected; MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); A1_Initialize(A1_THREAD_SINGLE); rank = A1_Process_id(A1_GROUP_WORLD); nranks = A1_Process_total(A1_GROUP_WORLD); complete = (int *) malloc(sizeof(int) * COUNT); counter = (int **) malloc(sizeof(int *) * nranks); A1_Alloc_segment((void **) &(counter[rank]), sizeof(int)); A1_Exchange_segments(A1_GROUP_WORLD, (void **) counter); if (rank == 0) { printf("A1_RMW Test - in usec \n"); fflush(stdout); } target = 0; for(i=0; i<COUNT; i++) { complete[i] = -1; } if(rank == target) { *(counter[rank]) = 0; } increment = 1; counter_fetch = 0; counters_received = 0; A1_Barrier_group(A1_GROUP_WORLD); while(counter_fetch < COUNT) { A1_Rmw(target, (void *) &increment, (void *) &counter_fetch, (void *) counter[target], sizeof(int), A1_FETCH_AND_ADD, A1_INT32); /* s/1/rank/ means we will know who got the counter */ if (counter_fetch < COUNT) complete[counter_fetch] = rank; counters_received++; } A1_Allreduce_group(A1_GROUP_WORLD, COUNT, A1_SUM, A1_INT32, (void *) complete, (void *) complete); for(i=0; i<COUNT; i++) { if (complete[i] == -1) { printf("[%d] The RMW update failed at index: %d \n", rank, i); fflush(stdout); exit(-1); } } printf("[%d] The RMW update completed successfully \n", rank); fflush(stdout); A1_Barrier_group(A1_GROUP_WORLD); if (0==rank) { printf("Checking for fairness...\n", rank); fflush(stdout); for(i=0; i<COUNT; i++) { printf("counter value %d was received by process %d\n", i, complete[i]); } fflush(stdout); } A1_Barrier_group(A1_GROUP_WORLD); printf("process %d received %d counters\n", rank, counters_received); fflush(stdout); A1_Release_segments(A1_GROUP_WORLD, counter[rank]); A1_Free_segment(counter[rank]); A1_Finalize(); return 0; }
int main() { int i, rank, nranks, msgsize, peer; long bufsize; int *buffer; int t_start, t_stop, t_latency; int expected; A1_Initialize(A1_THREAD_SINGLE); rank = A1_Process_id(A1_GROUP_WORLD); nranks = A1_Process_total(A1_GROUP_WORLD); bufsize = MAX_MSG_SIZE; buffer = (int *) malloc(bufsize); if (rank == 0) { printf("A1_Allreduce Test - in usec \n"); fflush(stdout); } A1_Barrier_group(A1_GROUP_WORLD); for (msgsize = sizeof(int); msgsize < MAX_MSG_SIZE; msgsize *= 2) { for (i = 0; i < bufsize/sizeof(int); i++) { buffer[i] = rank; } A1_Allreduce_group(A1_GROUP_WORLD, msgsize/sizeof(int), A1_SUM, A1_INT32, (void *) buffer, (void *) buffer); expected = (nranks-1)*(nranks)/2; for (i = 0; i < msgsize/sizeof(int); i++) { if(buffer[i] - expected != 0) { printf("[%d] Validation has failed Expected: %d, Actual: %d, i: %d \n", rank, expected, buffer[i], i); fflush(stdout); exit(-1); } } printf("[%d] %d message sum reduce successful\n", rank, msgsize); fflush(stdout); for (i = 0; i < bufsize/sizeof(int); i++) { buffer[i] = 1; } A1_Allreduce_group(A1_GROUP_WORLD, msgsize/sizeof(int), A1_PROD, A1_INT32, (void *) buffer, (void *) buffer); expected = 1; for (i = 0; i < msgsize/sizeof(int); i++) { if(buffer[i] - expected != 0) { printf("[%d] Validation has failed Expected: %d, Actual: %d, i: %d \n", rank, expected, buffer[i], i); fflush(stdout); exit(-1); } } printf("[%d] %d message product reduce successful\n", rank, msgsize); fflush(stdout); } free(buffer); A1_Finalize(); return 0; }
int main() { int i, j, rank, nranks, msgsize; int xdim, ydim; long bufsize; double **buffer; double t_start, t_stop, t_latency; int count[2], src_stride, trg_stride, stride_level, peer; double expected, actual; A1_Initialize(A1_THREAD_SINGLE); rank = A1_Process_id(A1_GROUP_WORLD); nranks = A1_Process_total(A1_GROUP_WORLD); buffer = (double **) malloc (sizeof(double *) * nranks); A1_Barrier_group(A1_GROUP_WORLD); bufsize = MAX_XDIM * MAX_YDIM * sizeof(double); A1_Alloc_segment((void **) &(buffer[rank]), bufsize); A1_Exchange_segments(A1_GROUP_WORLD, (void **) buffer); for(i=0; i< bufsize/sizeof(double); i++) { *(buffer[rank] + i) = 1.0 + rank; } if(rank == 0) { printf("A1_PutS Latency - local and remote completions - in usec \n"); printf("%30s %22s \n", "Dimensions(array of doubles)", "Latency-LocalCompeltion", "Latency-RemoteCompletion"); fflush(stdout); } src_stride = MAX_YDIM*sizeof(double); trg_stride = MAX_YDIM*sizeof(double); stride_level = 1; for(xdim=1; xdim<=MAX_XDIM; xdim*=2) { count[1] = xdim; for(ydim=1; ydim<=MAX_YDIM; ydim*=2) { count[0] = ydim*sizeof(double); if(rank == 0) { peer = 1; for(i=0; i<ITERATIONS+SKIP; i++) { if(i == SKIP) t_start = A1_Time_seconds(); A1_PutS(peer, stride_level, count, (void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride); } t_stop = A1_Time_seconds(); A1_Flush(peer); char temp[10]; sprintf(temp,"%dX%d", xdim, ydim); printf("%30s %20.2f", temp, ((t_stop-t_start)*1000000)/ITERATIONS); fflush(stdout); A1_Barrier_group(A1_GROUP_WORLD); for(i=0; i<ITERATIONS+SKIP; i++) { if(i == SKIP) t_start = A1_Time_seconds(); A1_PutS(peer, stride_level, count, (void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride); A1_Flush(peer); } t_stop = A1_Time_seconds(); printf("%20.2f \n", ((t_stop-t_start)*1000000)/ITERATIONS); fflush(stdout); A1_Barrier_group(A1_GROUP_WORLD); } else if(rank == 1) { peer = 0; expected = (1.0 + (double) peer); A1_Barrier_group(A1_GROUP_WORLD); for(i=0; i<xdim; i++) { for(j=0; j<ydim; j++) { actual = *(buffer[rank] + i*MAX_YDIM + j); if(actual != expected) { printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n", i, j, expected, actual); fflush(stdout); return -1; } } } for(i=0; i< bufsize/sizeof(double); i++) *(buffer[rank] + i) = 1.0 + rank; A1_Barrier_group(A1_GROUP_WORLD); for(i=0; i<xdim; i++) { for(j=0; j<ydim; j++) { actual = *(buffer[rank] + i*MAX_YDIM + j); if(actual != expected) { printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n", i, j, expected, actual); fflush(stdout); return -1; } } } for(i=0; i< bufsize/sizeof(double); i++) *(buffer[rank] + i) = 1.0 + rank; A1_Barrier_group(A1_GROUP_WORLD); } } } A1_Barrier_group(A1_GROUP_WORLD); A1_Release_segments(A1_GROUP_WORLD, (void *) buffer[rank]); A1_Free_segment((void *) buffer[rank]); A1_Finalize(); return 0; }