コード例 #1
0
ファイル: libperf.c プロジェクト: xinzhao3/ucx
ucs_status_t ucx_perf_run(ucx_perf_params_t *params, ucx_perf_result_t *result)
{
    ucx_perf_context_t *perf;
    ucs_status_t status;

    if (params->command == UCX_PERF_CMD_LAST) {
        ucs_error("Test is not selected");
        status = UCS_ERR_INVALID_PARAM;
        goto out;
    }

    if ((params->api != UCX_PERF_API_UCT) && (params->api != UCX_PERF_API_UCP)) {
        ucs_error("Invalid test API parameter (should be UCT or UCP)");
        status = UCS_ERR_INVALID_PARAM;
        goto out;
    }

    perf = malloc(sizeof(*perf));
    if (perf == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto out;
    }

    ucx_perf_test_reset(perf, params);

    status = ucx_perf_funcs[params->api].setup(perf, params);
    if (status != UCS_OK) {
        goto out_free;
    }

    if (UCS_THREAD_MODE_SINGLE == params->thread_mode) {
        if (params->warmup_iter > 0) {
            ucx_perf_set_warmup(perf, params);
            status = ucx_perf_funcs[params->api].run(perf);
            if (status != UCS_OK) {
                goto out_cleanup;
            }

            rte_call(perf, barrier);
            ucx_perf_test_reset(perf, params);
        }

        /* Run test */
        status = ucx_perf_funcs[params->api].run(perf);
        rte_call(perf, barrier);
        if (status == UCS_OK) {
            ucx_perf_calc_result(perf, result);
            rte_call(perf, report, result, perf->params.report_arg, 1);
        }
    } else {
        status = ucx_perf_thread_spawn(perf, result);
    }

out_cleanup:
    ucx_perf_funcs[params->api].cleanup(perf);
out_free:
    free(perf);
out:
    return status;
}
コード例 #2
0
ファイル: libperf.c プロジェクト: bbenton/ucx
static int ucx_perf_thread_spawn(ucx_perf_params_t* params, 
                                 ucx_perf_result_t* result) {
    ucx_perf_context_t perf;
    ucs_status_t status;
    int ti;
    int nti = params->thread_count;

    ucx_perf_thread_context_t* tctx = 
        calloc(nti, sizeof(ucx_perf_thread_context_t));
    ucs_status_t* statuses = 
        calloc(nti, sizeof(ucs_status_t));
    pthread_barrier_t tbarrier;
    pthread_barrier_init(&tbarrier, NULL, nti);

    ucx_perf_test_reset(&perf, params);
    status = ucx_perf_funcs[params->api].setup(&perf, params);
    if (UCS_OK != status) {
        goto out_cleanup;
    }

    for (ti = 0; ti < nti; ti++) {
        tctx[ti].tid = ti;
        tctx[ti].ntid = nti;
        tctx[ti].tbarrier = &tbarrier;
        tctx[ti].statuses = statuses;
        tctx[ti].params = *params;
        tctx[ti].perf = perf;
        /* Doctor the src and dst buffers to make them thread specific */
        tctx[ti].perf.send_buffer += ti * params->message_size;
        tctx[ti].perf.recv_buffer += ti * params->message_size;
        pthread_create(&tctx[ti].pt, NULL, 
                       ucx_perf_thread_run_test, (void*)&tctx[ti]);
    }
    
    for (ti = 0; ti < nti; ti++) {
        pthread_join(tctx[ti].pt, NULL);
        if (UCS_OK != statuses[ti]) {
            ucs_error("Thread %d failed to run test: %s", tctx[ti].tid, ucs_status_string(statuses[ti]));
            status = statuses[ti];
        }
    }
    
    ucx_perf_funcs[params->api].cleanup(&perf);

out_cleanup:
    pthread_barrier_destroy(&tbarrier);
    free(statuses);
    free(tctx);

    return status;
}
コード例 #3
0
ファイル: libperf.c プロジェクト: brminich/ucx
static int ucx_perf_thread_spawn(ucx_perf_params_t* params,
                                 ucx_perf_result_t* result) {
    ucx_perf_context_t perf;
    ucs_status_t status = UCS_OK;
    int ti, nti;

    omp_set_num_threads(params->thread_count);
    nti = params->thread_count;

    ucx_perf_thread_context_t* tctx =
        calloc(nti, sizeof(ucx_perf_thread_context_t));
    ucs_status_t* statuses =
        calloc(nti, sizeof(ucs_status_t));

    ucx_perf_test_reset(&perf, params);
    status = ucx_perf_funcs[params->api].setup(&perf, params);
    if (UCS_OK != status) {
        goto out_cleanup;
    }

#pragma omp parallel private(ti)
{
    ti = omp_get_thread_num();
    tctx[ti].tid = ti;
    tctx[ti].ntid = nti;
    tctx[ti].statuses = statuses;
    tctx[ti].params = *params;
    tctx[ti].perf = perf;
    /* Doctor the src and dst buffers to make them thread specific */
    tctx[ti].perf.send_buffer += ti * params->message_size;
    tctx[ti].perf.recv_buffer += ti * params->message_size;
    tctx[ti].perf.offset = ti * params->message_size;
    ucx_perf_thread_run_test((void*)&tctx[ti]);
}
    for (ti = 0; ti < nti; ti++) {
        if (UCS_OK != statuses[ti]) {
            ucs_error("Thread %d failed to run test: %s", tctx[ti].tid, ucs_status_string(statuses[ti]));
            status = statuses[ti];
        }
    }

    ucx_perf_funcs[params->api].cleanup(&perf);

out_cleanup:
    free(statuses);
    free(tctx);

    return status;
}
コード例 #4
0
ファイル: libperf.c プロジェクト: bbenton/ucx
static void* ucx_perf_thread_run_test(void* arg) {
    ucx_perf_thread_context_t* tctx = (ucx_perf_thread_context_t*) arg;
    ucx_perf_params_t* params = &tctx->params;
    ucx_perf_result_t* result = &tctx->result;
    ucx_perf_context_t* perf = &tctx->perf;
    ucs_status_t* statuses = tctx->statuses;
    pthread_barrier_t* tbarrier = tctx->tbarrier;
    int tid = tctx->tid;
    int i;

    if (params->warmup_iter > 0) {
        ucx_perf_set_warmup(perf, params);
        statuses[tid] = ucx_perf_funcs[params->api].run(perf);
        pthread_barrier_wait(tbarrier);
        for (i = 0; i < tctx->ntid; i++) {
            if (UCS_OK != statuses[i]) {
                goto out;
            }
        }
        if (0 == tid) {
            rte_call(perf, barrier);
            ucx_perf_test_reset(perf, params);
        }
    }

    /* Run test */
    pthread_barrier_wait(tbarrier);
    statuses[tid] = ucx_perf_funcs[params->api].run(perf);
    pthread_barrier_wait(tbarrier);
    for (i = 0; i < tctx->ntid; i++) {
        if (UCS_OK != statuses[i]) {
            goto out;
        }
    }
    if (0 == tid) {
        rte_call(perf, barrier);
        /* Assuming all threads are fairly treated, reporting only tid==0
            TODO: aggregate reports */
        ucx_perf_calc_result(perf, result);
        rte_call(perf, report, result, 1);
    }

out:
    return &statuses[tid];
}