Beispiel #1
0
int
main (int argc, char *argv[])
{
    int i, numprocs, rank, size;
    int skip;
    double latency = 0.0, t_start = 0.0, t_stop = 0.0;
    double timer=0.0;
    double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
    char * sendbuf = NULL, * recvbuf = NULL;
    int po_ret;
    size_t bufsize;

    set_header(HEADER);
    set_benchmark_name("osu_gather");
    enable_accel_support();
    po_ret = process_options(argc, argv);

    if (po_okay == po_ret && none != options.accel) {
        if (init_accel()) {
            fprintf(stderr, "Error initializing device\n");
            exit(EXIT_FAILURE);
        }
    }

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);

    switch (po_ret) {
        case po_bad_usage:
            print_bad_usage_message(rank);
            MPI_Finalize();
            exit(EXIT_FAILURE);
        case po_help_message:
            print_help_message(rank);
            MPI_Finalize();
            exit(EXIT_SUCCESS);
        case po_version_message:
            print_version_message(rank);
            MPI_Finalize();
            exit(EXIT_SUCCESS);
        case po_okay:
            break;
    }

    if(numprocs < 2) {
        if (rank == 0) {
            fprintf(stderr, "This test requires at least two processes\n");
        }

        MPI_Finalize();
        exit(EXIT_FAILURE);
    }

    if ((options.max_message_size * numprocs) > options.max_mem_limit) {
        options.max_message_size = options.max_mem_limit / numprocs;
    }

    if (0 == rank) {
        bufsize = options.max_message_size * numprocs;
        if (allocate_buffer((void**)&recvbuf, bufsize, options.accel)) {
            fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
            MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
        }
        set_buffer(recvbuf, options.accel, 1, bufsize);
    }

    if (allocate_buffer((void**)&sendbuf, options.max_message_size,
                options.accel)) {
        fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
        MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
    }
    set_buffer(sendbuf, options.accel, 0, options.max_message_size);

    print_preamble(rank);

    for (size=1; size <= options.max_message_size; size *= 2) {
        if (size > LARGE_MESSAGE_SIZE) {
            skip = SKIP_LARGE;
            options.iterations = options.iterations_large;
        } else {
            skip = SKIP;
        }

        MPI_Barrier(MPI_COMM_WORLD);
        timer=0.0;

        for (i=0; i < options.iterations + skip ; i++) {
            t_start = MPI_Wtime();
            MPI_Gather(sendbuf, size, MPI_CHAR, recvbuf, size, MPI_CHAR, 0,
                    MPI_COMM_WORLD);
            t_stop = MPI_Wtime();

            if (i >= skip) {
                timer+=t_stop-t_start;
            }
            MPI_Barrier(MPI_COMM_WORLD);
        }
        latency = (double)(timer * 1e6) / options.iterations;

        MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0,
                MPI_COMM_WORLD);
        MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0,
                MPI_COMM_WORLD);
        MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0,
                MPI_COMM_WORLD);
        avg_time = avg_time/numprocs;

        print_stats(rank, size, avg_time, min_time, max_time);
        MPI_Barrier(MPI_COMM_WORLD);
    }

    if (0 == rank) {
        free_buffer(recvbuf, options.accel);
    }
    free_buffer(sendbuf, options.accel);

    MPI_Finalize();

    if (none != options.accel) {
        if (cleanup_accel()) {
            fprintf(stderr, "Error cleaning up device\n");
            exit(EXIT_FAILURE);
        }
    }

    return EXIT_SUCCESS;
}
int main(int argc, char *argv[])
{
    int i, j, numprocs, rank, size;
    double latency = 0.0, t_start = 0.0, t_stop = 0.0;
    double timer=0.0;
    double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
    float *sendbuf, *recvbuf;
    int po_ret;
    size_t bufsize;

    int64_t* problems = all_reduce_kernels_size;
    int64_t* numRepeats = all_reduce_kernels_repeat;

    set_header(HEADER);
#ifdef ENABLE_MLSL
    mlsl_comm_req request;
    set_benchmark_name("mlsl_osu_allreduce");
#else
    set_benchmark_name("osu_allreduce");
#endif
    enable_accel_support();
    po_ret = process_options(argc, argv);

    if (po_okay == po_ret && none != options.accel) {
        if (init_accel()) {
            fprintf(stderr, "Error initializing device\n");
            exit(EXIT_FAILURE);
        }
    }

#ifdef ENABLE_MLSL
    MLSL_CALL(mlsl_environment_get_env(&env));
    MLSL_CALL(mlsl_environment_init(env, &argc, &argv));
    size_t process_idx, process_count;
    MLSL_CALL(mlsl_environment_get_process_idx(env, &process_idx));
    MLSL_CALL(mlsl_environment_get_process_count(env, &process_count));
    rank = process_idx;
    numprocs = process_count;
    MLSL_CALL(mlsl_environment_create_distribution(env, process_count, 1, &distribution));
#else
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
#endif

    switch (po_ret) {
        case po_bad_usage:
            print_bad_usage_message(rank);
            FINALIZE();
            exit(EXIT_FAILURE);
        case po_help_message:
            print_help_message(rank);
            FINALIZE();
            exit(EXIT_SUCCESS);
        case po_version_message:
            print_version_message(rank);
            FINALIZE();
            exit(EXIT_SUCCESS);
        case po_okay:
            break;
    }

    if(numprocs < 2) {
        if (rank == 0) {
            fprintf(stderr, "This test requires at least two processes\n");
        }

        FINALIZE();
        exit(EXIT_FAILURE);
    }

    if (options.max_message_size > options.max_mem_limit) {
        options.max_message_size = options.max_mem_limit;
    }

    bufsize = sizeof(float)*(options.max_message_size/sizeof(float));
    if (allocate_buffer((void**)&sendbuf, bufsize, options.accel)) {
        fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
        MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
    }
    set_buffer(sendbuf, options.accel, 1, bufsize);

    bufsize = sizeof(float)*(options.max_message_size/sizeof(float));
    if (allocate_buffer((void**)&recvbuf, bufsize, options.accel)) {
        fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
        MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
    }
    set_buffer(recvbuf, options.accel, 0, bufsize);

    print_preamble(rank, numprocs);

    size = options.max_message_size/sizeof(float);

    for (j = 0; j < _NUMBER_OF_KERNELS_; j++)
    {
        size = problems[j];

        options.iterations = numRepeats[j];
        MPI_Barrier(MPI_COMM_WORLD);

        timer = 0.0;
        t_start = MPI_Wtime();
        for(i=0; i < options.iterations; i++) {
#ifdef ENABLE_MLSL
            MLSL_CALL(mlsl_distribution_all_reduce(distribution, sendbuf, recvbuf, size, DT_FLOAT, RT_SUM, GT_DATA, &request));
            MLSL_CALL(mlsl_environment_wait(env, request));
#else
            MPI_Allreduce(sendbuf, recvbuf, size, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD);
#endif
        }

        t_stop = MPI_Wtime();
        timer = t_stop-t_start;

        latency = (double)(timer * 1e3) / options.iterations;

        MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0,
                MPI_COMM_WORLD);
        MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0,
                MPI_COMM_WORLD);
        MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0,
                MPI_COMM_WORLD);
        avg_time = avg_time/numprocs;

        print_stats(rank, size, avg_time, min_time, max_time);
        MPI_Barrier(MPI_COMM_WORLD);
    }

    free_buffer(sendbuf, options.accel);
    free_buffer(recvbuf, options.accel);

    FINALIZE();

    if (none != options.accel) {
        if (cleanup_accel()) {
            fprintf(stderr, "Error cleaning up device\n");
            exit(EXIT_FAILURE);
        }
    }

    return EXIT_SUCCESS;
}
Beispiel #3
0
int main(int argc, char *argv[])
{
    setbuf(stdout, NULL);
    int i = 0, rank, size, disp;
    int numprocs;
    double latency = 0.0, t_start = 0.0, t_stop = 0.0;
    double tcomp = 0.0, tcomp_total=0.0, latency_in_secs=0.0;
    double test_time = 0.0, test_total = 0.0;
    double timer=0.0;
    double wait_time = 0.0, init_time = 0.0;
    double init_total = 0.0, wait_total = 0.0;
    char *sendbuf=NULL;
    char *recvbuf=NULL;
    int *sdispls=NULL, *sendcounts=NULL;
    int po_ret;
    size_t bufsize;

    set_header(HEADER);
    set_benchmark_name("osu_iscatterv");
    enable_accel_support();
    po_ret = process_options(argc, argv);

    if (po_okay == po_ret && none != options.accel) {
        if (init_accel()) {
            fprintf(stderr, "Error initializing device\n");
            exit(EXIT_FAILURE);
        }
    }

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
    MPI_Request request;
    MPI_Status status;

    switch (po_ret) {
        case po_bad_usage:
            print_bad_usage_message(rank);
            MPI_Finalize();
            exit(EXIT_FAILURE);
        case po_help_message:
            print_help_message(rank);
            MPI_Finalize();
            exit(EXIT_SUCCESS);
        case po_version_message:
            print_version_message(rank);
            MPI_Finalize();
            exit(EXIT_SUCCESS);
        case po_okay:
            break;
    }

    if(numprocs < 2) {
        if (rank == 0) {
            fprintf(stderr, "This test requires at least two processes\n");
        }

        MPI_Finalize();
        exit(EXIT_FAILURE);
    }

    if ((options.max_message_size * numprocs) > options.max_mem_limit) {
        options.max_message_size = options.max_mem_limit / numprocs;
    }
    
    if (0 == rank) {
        if (allocate_buffer((void**)&sendcounts, numprocs*sizeof(int), none)) {
            fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
            MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
        }
        if (allocate_buffer((void**)&sdispls, numprocs*sizeof(int), none)) {
            fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
            MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
        }

        bufsize = options.max_message_size * numprocs;
        if (allocate_buffer((void**)&sendbuf, bufsize, options.accel)) {
            fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
            MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
        }
        set_buffer(sendbuf, options.accel, 1, bufsize);
    }

    if (allocate_buffer((void**)&recvbuf, options.max_message_size,
                options.accel)) {
        fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
        MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
    }
    set_buffer(recvbuf, options.accel, 0, options.max_message_size);

    print_preamble_nbc(rank);

    for(size=options.min_message_size; size <=options.max_message_size; size *= 2) {
        if(size > LARGE_MESSAGE_SIZE) {
            options.skip = SKIP_LARGE;
            options.iterations = options.iterations_large;
        }
        else {
            options.skip = SKIP;
        }
        if (0 == rank) {
            disp =0;
            for ( i = 0; i < numprocs; i++) {
                sendcounts[i] = size;
                sdispls[i] = disp;
                disp += size;
            }
        }
	
        MPI_Barrier(MPI_COMM_WORLD);
        
        timer = 0.0;     
        
        for(i=0; i < options.iterations + options.skip ; i++) {
            t_start = MPI_Wtime();
            
            MPI_Iscatterv(sendbuf, sendcounts, sdispls, MPI_CHAR, recvbuf,
                      size, MPI_CHAR, 0, MPI_COMM_WORLD, &request);
            MPI_Wait(&request,&status);

            t_stop = MPI_Wtime();
            
            if(i>=options.skip){
                timer += t_stop-t_start;
            } 
            MPI_Barrier(MPI_COMM_WORLD);
        }  
        
        MPI_Barrier(MPI_COMM_WORLD);

        latency = (timer * 1e6) / options.iterations;
        
        latency_in_secs = timer/options.iterations;

        init_arrays(latency_in_secs);
        
        if (0 == rank) {
            disp =0;
            for ( i = 0; i < numprocs; i++) {
                sendcounts[i] = size;
                sdispls[i] = disp;
                disp += size;
            }
        }

        MPI_Barrier(MPI_COMM_WORLD);

        timer = 0.0; tcomp_total = 0; tcomp = 0;        
        init_total = 0.0; wait_total = 0.0;
        test_time = 0.0, test_total = 0.0;

        for(i=0; i < options.iterations + options.skip ; i++) {
            t_start = MPI_Wtime();

            init_time = MPI_Wtime();
            MPI_Iscatterv(sendbuf, sendcounts, sdispls, MPI_CHAR, recvbuf,
                      size, MPI_CHAR, 0, MPI_COMM_WORLD, &request);
            init_time = MPI_Wtime() - init_time;

            tcomp = MPI_Wtime();             
            test_time = dummy_compute(latency_in_secs, &request); 
            tcomp = MPI_Wtime() - tcomp;

            wait_time = MPI_Wtime();
            MPI_Wait(&request,&status);
            wait_time = MPI_Wtime() - wait_time;

            t_stop = MPI_Wtime();
            
            if(i>=options.skip){
                timer += t_stop-t_start;
                tcomp_total += tcomp;
                test_total += test_time;
                init_total += init_time;
                wait_total += wait_time;
            }
            MPI_Barrier(MPI_COMM_WORLD);
        }  
       
        MPI_Barrier (MPI_COMM_WORLD);
        
        calculate_and_print_stats(rank, size, numprocs,
                                  timer, latency,
                                  test_total, tcomp_total,
                                  wait_total, init_total);
    }  
    
    if (0 == rank) {
        free_buffer(sendcounts, none);
        free_buffer(sdispls, none);
        free_buffer(sendbuf, options.accel);
    }
    free_buffer(recvbuf, options.accel);

    MPI_Finalize();

    if (none != options.accel) {
        if (cleanup_accel()) {
            fprintf(stderr, "Error cleaning up device\n");
            exit(EXIT_FAILURE);
        }
    }
   
    return EXIT_SUCCESS;
}
Beispiel #4
0
int main(int argc, char *argv[])
{
    int i = 0, rank;
    int numprocs;
    double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
    double latency = 0.0, t_start = 0.0, t_stop = 0.0;
    double timer=0.0;
    int po_ret;

    set_header(HEADER);
    set_benchmark_name("osu_barrier");
    enable_accel_support();
    po_ret = process_options(argc, argv);

    if (po_okay == po_ret && none != options.accel) {
        if (init_accel()) {
            fprintf(stderr, "Error initializing device\n");
            exit(EXIT_FAILURE);
        }
    }

    options.show_size = 0;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);

    switch (po_ret) {
        case po_bad_usage:
            print_bad_usage_message(rank);
            MPI_Finalize();
            exit(EXIT_FAILURE);
        case po_help_message:
            print_help_message(rank);
            MPI_Finalize();
            exit(EXIT_SUCCESS);
        case po_version_message:
            print_version_message(rank);
            MPI_Finalize();
            exit(EXIT_SUCCESS);
        case po_okay:
            break;
    }

    if(numprocs < 2) {
        if(rank == 0) {
            fprintf(stderr, "This test requires at least two processes\n");
        }

        MPI_Finalize();

        return EXIT_FAILURE;
    }

    print_preamble(rank);

    options.skip = options.skip_large;
    options.iterations = options.iterations_large;
    timer = 0.0;

    for(i=0; i < options.iterations + options.skip ; i++) {
        t_start = MPI_Wtime();
        MPI_Barrier(MPI_COMM_WORLD);
        t_stop = MPI_Wtime();

        if(i>=options.skip){
            timer+=t_stop-t_start;
        }
    }

    MPI_Barrier(MPI_COMM_WORLD);

    latency = (timer * 1e6) / options.iterations;

    MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0,
                MPI_COMM_WORLD);
    MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0,
                MPI_COMM_WORLD);
    MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0,
                MPI_COMM_WORLD);
    avg_time = avg_time/numprocs;

    print_stats(rank, 0, avg_time, min_time, max_time);
    MPI_Finalize();

    return EXIT_SUCCESS;
}