int main (int argc, char *argv[]) { int i, numprocs, rank, size; int skip; double latency = 0.0, t_start = 0.0, t_stop = 0.0; double timer=0.0; double avg_time = 0.0, max_time = 0.0, min_time = 0.0; char * sendbuf = NULL, * recvbuf = NULL; int po_ret; size_t bufsize; set_header(HEADER); set_benchmark_name("osu_gather"); enable_accel_support(); po_ret = process_options(argc, argv); if (po_okay == po_ret && none != options.accel) { if (init_accel()) { fprintf(stderr, "Error initializing device\n"); exit(EXIT_FAILURE); } } MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); switch (po_ret) { case po_bad_usage: print_bad_usage_message(rank); MPI_Finalize(); exit(EXIT_FAILURE); case po_help_message: print_help_message(rank); MPI_Finalize(); exit(EXIT_SUCCESS); case po_version_message: print_version_message(rank); MPI_Finalize(); exit(EXIT_SUCCESS); case po_okay: break; } if(numprocs < 2) { if (rank == 0) { fprintf(stderr, "This test requires at least two processes\n"); } MPI_Finalize(); exit(EXIT_FAILURE); } if ((options.max_message_size * numprocs) > options.max_mem_limit) { options.max_message_size = options.max_mem_limit / numprocs; } if (0 == rank) { bufsize = options.max_message_size * numprocs; if (allocate_buffer((void**)&recvbuf, bufsize, options.accel)) { fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } set_buffer(recvbuf, options.accel, 1, bufsize); } if (allocate_buffer((void**)&sendbuf, options.max_message_size, options.accel)) { fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } set_buffer(sendbuf, options.accel, 0, options.max_message_size); print_preamble(rank); for (size=1; size <= options.max_message_size; size *= 2) { if (size > LARGE_MESSAGE_SIZE) { skip = SKIP_LARGE; options.iterations = options.iterations_large; } else { skip = SKIP; } MPI_Barrier(MPI_COMM_WORLD); timer=0.0; for (i=0; i < options.iterations + skip ; i++) { t_start = MPI_Wtime(); MPI_Gather(sendbuf, size, MPI_CHAR, recvbuf, size, MPI_CHAR, 0, MPI_COMM_WORLD); t_stop = MPI_Wtime(); if (i >= skip) { timer+=t_stop-t_start; } MPI_Barrier(MPI_COMM_WORLD); } latency = (double)(timer * 1e6) / options.iterations; MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); avg_time = avg_time/numprocs; print_stats(rank, size, avg_time, min_time, max_time); MPI_Barrier(MPI_COMM_WORLD); } if (0 == rank) { free_buffer(recvbuf, options.accel); } free_buffer(sendbuf, options.accel); MPI_Finalize(); if (none != options.accel) { if (cleanup_accel()) { fprintf(stderr, "Error cleaning up device\n"); exit(EXIT_FAILURE); } } return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { int i, j, numprocs, rank, size; double latency = 0.0, t_start = 0.0, t_stop = 0.0; double timer=0.0; double avg_time = 0.0, max_time = 0.0, min_time = 0.0; float *sendbuf, *recvbuf; int po_ret; size_t bufsize; int64_t* problems = all_reduce_kernels_size; int64_t* numRepeats = all_reduce_kernels_repeat; set_header(HEADER); #ifdef ENABLE_MLSL mlsl_comm_req request; set_benchmark_name("mlsl_osu_allreduce"); #else set_benchmark_name("osu_allreduce"); #endif enable_accel_support(); po_ret = process_options(argc, argv); if (po_okay == po_ret && none != options.accel) { if (init_accel()) { fprintf(stderr, "Error initializing device\n"); exit(EXIT_FAILURE); } } #ifdef ENABLE_MLSL MLSL_CALL(mlsl_environment_get_env(&env)); MLSL_CALL(mlsl_environment_init(env, &argc, &argv)); size_t process_idx, process_count; MLSL_CALL(mlsl_environment_get_process_idx(env, &process_idx)); MLSL_CALL(mlsl_environment_get_process_count(env, &process_count)); rank = process_idx; numprocs = process_count; MLSL_CALL(mlsl_environment_create_distribution(env, process_count, 1, &distribution)); #else MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); #endif switch (po_ret) { case po_bad_usage: print_bad_usage_message(rank); FINALIZE(); exit(EXIT_FAILURE); case po_help_message: print_help_message(rank); FINALIZE(); exit(EXIT_SUCCESS); case po_version_message: print_version_message(rank); FINALIZE(); exit(EXIT_SUCCESS); case po_okay: break; } if(numprocs < 2) { if (rank == 0) { fprintf(stderr, "This test requires at least two processes\n"); } FINALIZE(); exit(EXIT_FAILURE); } if (options.max_message_size > options.max_mem_limit) { options.max_message_size = options.max_mem_limit; } bufsize = sizeof(float)*(options.max_message_size/sizeof(float)); if (allocate_buffer((void**)&sendbuf, bufsize, options.accel)) { fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } set_buffer(sendbuf, options.accel, 1, bufsize); bufsize = sizeof(float)*(options.max_message_size/sizeof(float)); if (allocate_buffer((void**)&recvbuf, bufsize, options.accel)) { fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } set_buffer(recvbuf, options.accel, 0, bufsize); print_preamble(rank, numprocs); size = options.max_message_size/sizeof(float); for (j = 0; j < _NUMBER_OF_KERNELS_; j++) { size = problems[j]; options.iterations = numRepeats[j]; MPI_Barrier(MPI_COMM_WORLD); timer = 0.0; t_start = MPI_Wtime(); for(i=0; i < options.iterations; i++) { #ifdef ENABLE_MLSL MLSL_CALL(mlsl_distribution_all_reduce(distribution, sendbuf, recvbuf, size, DT_FLOAT, RT_SUM, GT_DATA, &request)); MLSL_CALL(mlsl_environment_wait(env, request)); #else MPI_Allreduce(sendbuf, recvbuf, size, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD); #endif } t_stop = MPI_Wtime(); timer = t_stop-t_start; latency = (double)(timer * 1e3) / options.iterations; MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); avg_time = avg_time/numprocs; print_stats(rank, size, avg_time, min_time, max_time); MPI_Barrier(MPI_COMM_WORLD); } free_buffer(sendbuf, options.accel); free_buffer(recvbuf, options.accel); FINALIZE(); if (none != options.accel) { if (cleanup_accel()) { fprintf(stderr, "Error cleaning up device\n"); exit(EXIT_FAILURE); } } return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { setbuf(stdout, NULL); int i = 0, rank, size, disp; int numprocs; double latency = 0.0, t_start = 0.0, t_stop = 0.0; double tcomp = 0.0, tcomp_total=0.0, latency_in_secs=0.0; double test_time = 0.0, test_total = 0.0; double timer=0.0; double wait_time = 0.0, init_time = 0.0; double init_total = 0.0, wait_total = 0.0; char *sendbuf=NULL; char *recvbuf=NULL; int *sdispls=NULL, *sendcounts=NULL; int po_ret; size_t bufsize; set_header(HEADER); set_benchmark_name("osu_iscatterv"); enable_accel_support(); po_ret = process_options(argc, argv); if (po_okay == po_ret && none != options.accel) { if (init_accel()) { fprintf(stderr, "Error initializing device\n"); exit(EXIT_FAILURE); } } MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); MPI_Request request; MPI_Status status; switch (po_ret) { case po_bad_usage: print_bad_usage_message(rank); MPI_Finalize(); exit(EXIT_FAILURE); case po_help_message: print_help_message(rank); MPI_Finalize(); exit(EXIT_SUCCESS); case po_version_message: print_version_message(rank); MPI_Finalize(); exit(EXIT_SUCCESS); case po_okay: break; } if(numprocs < 2) { if (rank == 0) { fprintf(stderr, "This test requires at least two processes\n"); } MPI_Finalize(); exit(EXIT_FAILURE); } if ((options.max_message_size * numprocs) > options.max_mem_limit) { options.max_message_size = options.max_mem_limit / numprocs; } if (0 == rank) { if (allocate_buffer((void**)&sendcounts, numprocs*sizeof(int), none)) { fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } if (allocate_buffer((void**)&sdispls, numprocs*sizeof(int), none)) { fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } bufsize = options.max_message_size * numprocs; if (allocate_buffer((void**)&sendbuf, bufsize, options.accel)) { fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } set_buffer(sendbuf, options.accel, 1, bufsize); } if (allocate_buffer((void**)&recvbuf, options.max_message_size, options.accel)) { fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } set_buffer(recvbuf, options.accel, 0, options.max_message_size); print_preamble_nbc(rank); for(size=options.min_message_size; size <=options.max_message_size; size *= 2) { if(size > LARGE_MESSAGE_SIZE) { options.skip = SKIP_LARGE; options.iterations = options.iterations_large; } else { options.skip = SKIP; } if (0 == rank) { disp =0; for ( i = 0; i < numprocs; i++) { sendcounts[i] = size; sdispls[i] = disp; disp += size; } } MPI_Barrier(MPI_COMM_WORLD); timer = 0.0; for(i=0; i < options.iterations + options.skip ; i++) { t_start = MPI_Wtime(); MPI_Iscatterv(sendbuf, sendcounts, sdispls, MPI_CHAR, recvbuf, size, MPI_CHAR, 0, MPI_COMM_WORLD, &request); MPI_Wait(&request,&status); t_stop = MPI_Wtime(); if(i>=options.skip){ timer += t_stop-t_start; } MPI_Barrier(MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); latency = (timer * 1e6) / options.iterations; latency_in_secs = timer/options.iterations; init_arrays(latency_in_secs); if (0 == rank) { disp =0; for ( i = 0; i < numprocs; i++) { sendcounts[i] = size; sdispls[i] = disp; disp += size; } } MPI_Barrier(MPI_COMM_WORLD); timer = 0.0; tcomp_total = 0; tcomp = 0; init_total = 0.0; wait_total = 0.0; test_time = 0.0, test_total = 0.0; for(i=0; i < options.iterations + options.skip ; i++) { t_start = MPI_Wtime(); init_time = MPI_Wtime(); MPI_Iscatterv(sendbuf, sendcounts, sdispls, MPI_CHAR, recvbuf, size, MPI_CHAR, 0, MPI_COMM_WORLD, &request); init_time = MPI_Wtime() - init_time; tcomp = MPI_Wtime(); test_time = dummy_compute(latency_in_secs, &request); tcomp = MPI_Wtime() - tcomp; wait_time = MPI_Wtime(); MPI_Wait(&request,&status); wait_time = MPI_Wtime() - wait_time; t_stop = MPI_Wtime(); if(i>=options.skip){ timer += t_stop-t_start; tcomp_total += tcomp; test_total += test_time; init_total += init_time; wait_total += wait_time; } MPI_Barrier(MPI_COMM_WORLD); } MPI_Barrier (MPI_COMM_WORLD); calculate_and_print_stats(rank, size, numprocs, timer, latency, test_total, tcomp_total, wait_total, init_total); } if (0 == rank) { free_buffer(sendcounts, none); free_buffer(sdispls, none); free_buffer(sendbuf, options.accel); } free_buffer(recvbuf, options.accel); MPI_Finalize(); if (none != options.accel) { if (cleanup_accel()) { fprintf(stderr, "Error cleaning up device\n"); exit(EXIT_FAILURE); } } return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { int i = 0, rank; int numprocs; double avg_time = 0.0, max_time = 0.0, min_time = 0.0; double latency = 0.0, t_start = 0.0, t_stop = 0.0; double timer=0.0; int po_ret; set_header(HEADER); set_benchmark_name("osu_barrier"); enable_accel_support(); po_ret = process_options(argc, argv); if (po_okay == po_ret && none != options.accel) { if (init_accel()) { fprintf(stderr, "Error initializing device\n"); exit(EXIT_FAILURE); } } options.show_size = 0; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); switch (po_ret) { case po_bad_usage: print_bad_usage_message(rank); MPI_Finalize(); exit(EXIT_FAILURE); case po_help_message: print_help_message(rank); MPI_Finalize(); exit(EXIT_SUCCESS); case po_version_message: print_version_message(rank); MPI_Finalize(); exit(EXIT_SUCCESS); case po_okay: break; } if(numprocs < 2) { if(rank == 0) { fprintf(stderr, "This test requires at least two processes\n"); } MPI_Finalize(); return EXIT_FAILURE; } print_preamble(rank); options.skip = options.skip_large; options.iterations = options.iterations_large; timer = 0.0; for(i=0; i < options.iterations + options.skip ; i++) { t_start = MPI_Wtime(); MPI_Barrier(MPI_COMM_WORLD); t_stop = MPI_Wtime(); if(i>=options.skip){ timer+=t_stop-t_start; } } MPI_Barrier(MPI_COMM_WORLD); latency = (timer * 1e6) / options.iterations; MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); avg_time = avg_time/numprocs; print_stats(rank, 0, avg_time, min_time, max_time); MPI_Finalize(); return EXIT_SUCCESS; }