static int ucx_perf_thread_spawn(ucx_perf_params_t* params, ucx_perf_result_t* result) { ucx_perf_context_t perf; ucs_status_t status = UCS_OK; int ti, nti; omp_set_num_threads(params->thread_count); nti = params->thread_count; ucx_perf_thread_context_t* tctx = calloc(nti, sizeof(ucx_perf_thread_context_t)); ucs_status_t* statuses = calloc(nti, sizeof(ucs_status_t)); ucx_perf_test_reset(&perf, params); status = ucx_perf_funcs[params->api].setup(&perf, params); if (UCS_OK != status) { goto out_cleanup; } #pragma omp parallel private(ti) { ti = omp_get_thread_num(); tctx[ti].tid = ti; tctx[ti].ntid = nti; tctx[ti].statuses = statuses; tctx[ti].params = *params; tctx[ti].perf = perf; /* Doctor the src and dst buffers to make them thread specific */ tctx[ti].perf.send_buffer += ti * params->message_size; tctx[ti].perf.recv_buffer += ti * params->message_size; tctx[ti].perf.offset = ti * params->message_size; ucx_perf_thread_run_test((void*)&tctx[ti]); } for (ti = 0; ti < nti; ti++) { if (UCS_OK != statuses[ti]) { ucs_error("Thread %d failed to run test: %s", tctx[ti].tid, ucs_status_string(statuses[ti])); status = statuses[ti]; } } ucx_perf_funcs[params->api].cleanup(&perf); out_cleanup: free(statuses); free(tctx); return status; }
static int ucx_perf_thread_spawn(ucx_perf_context_t *perf, ucx_perf_result_t* result) { ucx_perf_thread_context_t* tctx; ucs_status_t* statuses; size_t message_size; ucs_status_t status; int ti, nti; message_size = ucx_perf_get_message_size(&perf->params); omp_set_num_threads(perf->params.thread_count); nti = perf->params.thread_count; tctx = calloc(nti, sizeof(ucx_perf_thread_context_t)); statuses = calloc(nti, sizeof(ucs_status_t)); if ((tctx == NULL) || (statuses == NULL)) { status = UCS_ERR_NO_MEMORY; goto out_free; } #pragma omp parallel private(ti) { ti = omp_get_thread_num(); tctx[ti].tid = ti; tctx[ti].ntid = nti; tctx[ti].statuses = statuses; tctx[ti].perf = *perf; /* Doctor the src and dst buffers to make them thread specific */ tctx[ti].perf.send_buffer += ti * message_size; tctx[ti].perf.recv_buffer += ti * message_size; tctx[ti].perf.offset = ti * message_size; ucx_perf_thread_run_test((void*)&tctx[ti]); } status = UCS_OK; for (ti = 0; ti < nti; ti++) { if (UCS_OK != statuses[ti]) { ucs_error("Thread %d failed to run test: %s", tctx[ti].tid, ucs_status_string(statuses[ti])); status = statuses[ti]; } } out_free: free(statuses); free(tctx); return status; }