static int mca_pml_ucx_send_worker_address(void) { ucp_address_t *address; ucs_status_t status; size_t addrlen; int rc; status = ucp_worker_get_address(ompi_pml_ucx.ucp_worker, &address, &addrlen); if (UCS_OK != status) { PML_UCX_ERROR("Failed to get worker address"); return OMPI_ERROR; } OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &mca_pml_ucx_component.pmlm_version, (void*)address, addrlen); if (OMPI_SUCCESS != rc) { PML_UCX_ERROR("Open MPI couldn't distribute EP connection details"); return OMPI_ERROR; } ucp_worker_release_address(ompi_pml_ucx.ucp_worker, address); return OMPI_SUCCESS; }
static ucs_status_t ucp_perf_test_setup_endpoints(ucx_perf_context_t *perf, uint64_t features) { const size_t buffer_size = 2048; ucx_perf_ep_info_t info, *remote_info; unsigned group_size, i, group_index; ucp_address_t *address; size_t address_length = 0; ucp_ep_params_t ep_params; ucs_status_t status; struct iovec vec[3]; void *rkey_buffer; void *req = NULL; void *buffer; group_size = rte_call(perf, group_size); group_index = rte_call(perf, group_index); status = ucp_worker_get_address(perf->ucp.worker, &address, &address_length); if (status != UCS_OK) { if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_error("ucp_worker_get_address() failed: %s", ucs_status_string(status)); } goto err; } info.ucp.addr_len = address_length; info.recv_buffer = (uintptr_t)perf->recv_buffer; vec[0].iov_base = &info; vec[0].iov_len = sizeof(info); vec[1].iov_base = address; vec[1].iov_len = address_length; if (features & (UCP_FEATURE_RMA|UCP_FEATURE_AMO32|UCP_FEATURE_AMO64)) { status = ucp_rkey_pack(perf->ucp.context, perf->ucp.recv_memh, &rkey_buffer, &info.rkey_size); if (status != UCS_OK) { if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_error("ucp_rkey_pack() failed: %s", ucs_status_string(status)); } ucp_worker_release_address(perf->ucp.worker, address); goto err; } vec[2].iov_base = rkey_buffer; vec[2].iov_len = info.rkey_size; rte_call(perf, post_vec, vec, 3, &req); ucp_rkey_buffer_release(rkey_buffer); } else { info.rkey_size = 0; rte_call(perf, post_vec, vec, 2, &req); } ucp_worker_release_address(perf->ucp.worker, address); rte_call(perf, exchange_vec, req); perf->ucp.peers = calloc(group_size, sizeof(*perf->uct.peers)); if (perf->ucp.peers == NULL) { goto err; } buffer = malloc(buffer_size); if (buffer == NULL) { ucs_error("Failed to allocate RTE receive buffer"); status = UCS_ERR_NO_MEMORY; goto err_destroy_eps; } for (i = 0; i < group_size; ++i) { if (i == group_index) { continue; } rte_call(perf, recv, i, buffer, buffer_size, req); remote_info = buffer; address = (void*)(remote_info + 1); rkey_buffer = (void*)address + remote_info->ucp.addr_len; perf->ucp.peers[i].remote_addr = remote_info->recv_buffer; ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = address; status = ucp_ep_create(perf->ucp.worker, &ep_params, &perf->ucp.peers[i].ep); if (status != UCS_OK) { if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_error("ucp_ep_create() failed: %s", ucs_status_string(status)); } goto err_free_buffer; } if (remote_info->rkey_size > 0) { status = ucp_ep_rkey_unpack(perf->ucp.peers[i].ep, rkey_buffer, &perf->ucp.peers[i].rkey); if (status != UCS_OK) { if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_fatal("ucp_rkey_unpack() failed: %s", ucs_status_string(status)); } goto err_free_buffer; } } else { perf->ucp.peers[i].rkey = NULL; } } free(buffer); status = ucp_perf_test_exchange_status(perf, UCS_OK); if (status != UCS_OK) { ucp_perf_test_destroy_eps(perf, group_size); } return status; err_free_buffer: free(buffer); err_destroy_eps: ucp_perf_test_destroy_eps(perf, group_size); err: (void)ucp_perf_test_exchange_status(perf, status); return status; }
int main(int argc, char **argv) { /* UCP temporary vars */ ucp_params_t ucp_params; ucp_config_t *config; ucs_status_t status; /* UCP handler objects */ ucp_context_h ucp_context; ucp_worker_h ucp_worker; /* OOB connection vars */ uint64_t addr_len = 0; char *server = NULL; int oob_sock = -1; int ret = -1; /* Parse the command line */ if (parse_cmd(argc, argv, &server) != UCS_OK) { goto err; } /* UCP initialization */ status = ucp_config_read(NULL, NULL, &config); if (status != UCS_OK) { goto err; } ucp_params.features = UCP_FEATURE_TAG; if (ucp_test_mode == TEST_MODE_WAIT || ucp_test_mode == TEST_MODE_EVENTFD) { ucp_params.features |= UCP_FEATURE_WAKEUP; } ucp_params.request_size = sizeof(struct ucx_context); ucp_params.request_init = request_init; ucp_params.request_cleanup = NULL; status = ucp_init(&ucp_params, config, &ucp_context); ucp_config_print(config, stdout, NULL, UCS_CONFIG_PRINT_CONFIG); ucp_config_release(config); if (status != UCS_OK) { goto err; } status = ucp_worker_create(ucp_context, UCS_THREAD_MODE_SINGLE, &ucp_worker); if (status != UCS_OK) { goto err_cleanup; } status = ucp_worker_get_address(ucp_worker, &local_addr, &local_addr_len); if (status != UCS_OK) { goto err_worker; } printf("[0x%x] local address length: %zu\n", (unsigned int)pthread_self(), local_addr_len); /* OOB connection establishment */ if (server) { peer_addr_len = local_addr_len; oob_sock = run_client(server); if (oob_sock < 0) { goto err_addr; } ret = recv(oob_sock, &addr_len, sizeof(addr_len), 0); if (ret < 0) { fprintf(stderr, "failed to receive address length\n"); goto err_addr; } peer_addr_len = addr_len; peer_addr = malloc(peer_addr_len); if (!peer_addr) { fprintf(stderr, "unable to allocate memory\n"); goto err_addr; } ret = recv(oob_sock, peer_addr, peer_addr_len, 0); if (ret < 0) { fprintf(stderr, "failed to receive address\n"); goto err_peer_addr; } } else { oob_sock = run_server(); if (oob_sock < 0) { goto err_peer_addr; } addr_len = local_addr_len; ret = send(oob_sock, &addr_len, sizeof(addr_len), 0); if (ret < 0 || ret != sizeof(addr_len)) { fprintf(stderr, "failed to send address length\n"); goto err_peer_addr; } ret = send(oob_sock, local_addr, local_addr_len, 0); if (ret < 0 || ret != local_addr_len) { fprintf(stderr, "failed to send address\n"); goto err_peer_addr; } } close(oob_sock); ret = run_test(server, ucp_worker); err_peer_addr: free(peer_addr); err_addr: ucp_worker_release_address(ucp_worker, local_addr); err_worker: ucp_worker_destroy(ucp_worker); err_cleanup: ucp_cleanup(ucp_context); err: return ret; }
void print_ucp_info(int print_opts, ucs_config_print_flags_t print_flags, uint64_t features) { ucp_config_t *config; ucs_status_t status; ucp_context_h context; ucp_worker_h worker; ucp_params_t params; ucp_worker_params_t worker_params; ucp_ep_params_t ep_params; ucp_address_t *address; size_t address_length; ucp_ep_h ep; status = ucp_config_read(NULL, NULL, &config); if (status != UCS_OK) { return; } memset(¶ms, 0, sizeof(params)); params.features = features; status = ucp_init(¶ms, config, &context); if (status != UCS_OK) { printf("<Failed to create UCP context>\n"); goto out_release_config; } if (print_opts & PRINT_UCP_CONTEXT) { ucp_context_print_info(context, stdout); } if (!(print_opts & (PRINT_UCP_WORKER|PRINT_UCP_EP))) { goto out_cleanup_context; } worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; worker_params.thread_mode = UCS_THREAD_MODE_MULTI; status = ucp_worker_create(context, &worker_params, &worker); if (status != UCS_OK) { printf("<Failed to create UCP worker>\n"); goto out_cleanup_context; } if (print_opts & PRINT_UCP_WORKER) { ucp_worker_print_info(worker, stdout); } if (print_opts & PRINT_UCP_EP) { status = ucp_worker_get_address(worker, &address, &address_length); if (status != UCS_OK) { printf("<Failed to get UCP worker address>\n"); goto out_destroy_worker; } ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = address; status = ucp_ep_create(worker, &ep_params, &ep); ucp_worker_release_address(worker, address); if (status != UCS_OK) { printf("<Failed to get create UCP endpoint>\n"); goto out_destroy_worker; } ucp_ep_print_info(ep, stdout); ucp_ep_destroy(ep); } out_destroy_worker: ucp_worker_destroy(worker); out_cleanup_context: ucp_cleanup(context); out_release_config: ucp_config_release(config); }
int mca_spml_ucx_add_procs(oshmem_proc_t** procs, size_t nprocs) { size_t i, n; int rc = OSHMEM_ERROR; int my_rank = oshmem_my_proc_id(); ucs_status_t err; ucp_address_t *wk_local_addr; size_t wk_addr_len; int *wk_roffs, *wk_rsizes; char *wk_raddrs; mca_spml_ucx.ucp_peers = (ucp_peer_t *) calloc(nprocs, sizeof(*(mca_spml_ucx.ucp_peers))); if (NULL == mca_spml_ucx.ucp_peers) { goto error; } err = ucp_worker_get_address(mca_spml_ucx.ucp_worker, &wk_local_addr, &wk_addr_len); if (err != UCS_OK) { goto error; } dump_address(my_rank, (char *)wk_local_addr, wk_addr_len); rc = oshmem_shmem_xchng(wk_local_addr, wk_addr_len, nprocs, (void **)&wk_raddrs, &wk_roffs, &wk_rsizes); if (rc != OSHMEM_SUCCESS) { goto error; } opal_progress_register(spml_ucx_progress); /* Get the EP connection requests for all the processes from modex */ for (n = 0; n < nprocs; ++n) { i = (my_rank + n) % nprocs; dump_address(i, (char *)(wk_raddrs + wk_roffs[i]), wk_rsizes[i]); err = ucp_ep_create(mca_spml_ucx.ucp_worker, (ucp_address_t *)(wk_raddrs + wk_roffs[i]), &mca_spml_ucx.ucp_peers[i].ucp_conn); if (UCS_OK != err) { SPML_ERROR("ucp_ep_create failed!!!\n"); goto error2; } procs[i]->num_transports = 1; procs[i]->transport_ids = spml_ucx_transport_ids; } ucp_worker_release_address(mca_spml_ucx.ucp_worker, wk_local_addr); free(wk_raddrs); free(wk_rsizes); free(wk_roffs); SPML_VERBOSE(50, "*** ADDED PROCS ***"); return OSHMEM_SUCCESS; error2: for (i = 0; i < nprocs; ++i) { if (mca_spml_ucx.ucp_peers[i].ucp_conn) { ucp_ep_destroy(mca_spml_ucx.ucp_peers[i].ucp_conn); } } if (mca_spml_ucx.ucp_peers) free(mca_spml_ucx.ucp_peers); if (wk_raddrs) free(wk_raddrs); if (wk_rsizes) free(wk_rsizes); if (wk_roffs) free(wk_roffs); if (mca_spml_ucx.ucp_peers) free(mca_spml_ucx.ucp_peers); error: rc = OSHMEM_ERR_OUT_OF_RESOURCE; SPML_ERROR("add procs FAILED rc=%d", rc); return rc; }
static ucs_status_t ucp_perf_test_setup_endpoints(ucx_perf_context_t *perf, uint64_t features) { unsigned group_size, i, group_index; ucp_address_t *address; size_t address_length = 0; ucs_status_t status; struct iovec vec[3]; void *rkey_buffer; size_t rkey_size; void *req = NULL; int iov_len; group_size = rte_call(perf, group_size); group_index = rte_call(perf, group_index); status = ucp_worker_get_address(perf->ucp.worker, &address, &address_length); if (status != UCS_OK) { if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_error("ucp_worker_get_address() failed: %s", ucs_status_string(status)); } goto err; } vec[0].iov_base = address; vec[0].iov_len = address_length; vec[1].iov_base = &perf->recv_buffer; vec[1].iov_len = sizeof(uintptr_t); if (features & (UCP_FEATURE_RMA|UCP_FEATURE_AMO32|UCP_FEATURE_AMO64)) { status = ucp_rkey_pack(perf->ucp.context, perf->ucp.recv_memh, &rkey_buffer, &rkey_size); if (status != UCS_OK) { if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_error("ucp_rkey_pack() failed: %s", ucs_status_string(status)); } ucp_worker_release_address(perf->ucp.worker, address); goto err; } vec[2].iov_base = rkey_buffer; vec[2].iov_len = rkey_size; iov_len = 3; } else { rkey_buffer = NULL; iov_len = 2; } rte_call(perf, post_vec, vec, iov_len, &req); if (rkey_buffer != NULL) { ucp_rkey_buffer_release(rkey_buffer); } ucp_worker_release_address(perf->ucp.worker, address); rte_call(perf, exchange_vec, req); perf->ucp.peers = calloc(group_size, sizeof(*perf->uct.peers)); if (perf->ucp.peers == NULL) { goto err; } for (i = 0; i < group_size; ++i) { if (i == group_index) { continue; } address = malloc(address_length); rkey_buffer = NULL; vec[0].iov_base = address; vec[0].iov_len = address_length; vec[1].iov_base = &perf->ucp.peers[i].remote_addr; vec[1].iov_len = sizeof(uintptr_t); if (iov_len > 2) { rkey_buffer = malloc(rkey_size); vec[2].iov_base = rkey_buffer; vec[2].iov_len = rkey_size; } rte_call(perf, recv_vec, i, vec, iov_len, req); status = ucp_ep_create(perf->ucp.worker, address, &perf->ucp.peers[i].ep); if (status != UCS_OK) { if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_error("ucp_ep_create() failed: %s", ucs_status_string(status)); } free(rkey_buffer); free(address); goto err_destroy_eps; } free(address); if (iov_len > 2) { status = ucp_ep_rkey_unpack(perf->ucp.peers[i].ep, rkey_buffer, &perf->ucp.peers[i].rkey); if (status != UCS_OK) { if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_error("ucp_rkey_unpack() failed: %s", ucs_status_string(status)); } free(rkey_buffer); goto err_destroy_eps; } } else { perf->ucp.peers[i].rkey = NULL; } free(rkey_buffer); } status = ucp_perf_test_exchange_status(perf, UCS_OK); if (status != UCS_OK) { ucp_perf_test_destroy_eps(perf, group_size); } return status; err_destroy_eps: ucp_perf_test_destroy_eps(perf, group_size); err: (void)ucp_perf_test_exchange_status(perf, status); return status; }
void print_ucp_info(int print_opts, ucs_config_print_flags_t print_flags, uint64_t ctx_features, const ucp_ep_params_t *base_ep_params, size_t estimated_num_eps, unsigned dev_type_bitmap, const char *mem_size) { ucp_config_t *config; ucs_status_t status; ucs_status_ptr_t status_ptr; ucp_context_h context; ucp_worker_h worker; ucp_params_t params; ucp_worker_params_t worker_params; ucp_ep_params_t ep_params; ucp_address_t *address; size_t address_length; resource_usage_t usage; ucp_ep_h ep; status = ucp_config_read(NULL, NULL, &config); if (status != UCS_OK) { return; } memset(¶ms, 0, sizeof(params)); params.field_mask = UCP_PARAM_FIELD_FEATURES | UCP_PARAM_FIELD_ESTIMATED_NUM_EPS; params.features = ctx_features; params.estimated_num_eps = estimated_num_eps; get_resource_usage(&usage); if (!(dev_type_bitmap & UCS_BIT(UCT_DEVICE_TYPE_SELF))) { ucp_config_modify(config, "SELF_DEVICES", ""); } if (!(dev_type_bitmap & UCS_BIT(UCT_DEVICE_TYPE_SHM))) { ucp_config_modify(config, "SHM_DEVICES", ""); } if (!(dev_type_bitmap & UCS_BIT(UCT_DEVICE_TYPE_NET))) { ucp_config_modify(config, "NET_DEVICES", ""); } status = ucp_init(¶ms, config, &context); if (status != UCS_OK) { printf("<Failed to create UCP context>\n"); goto out_release_config; } if ((print_opts & PRINT_MEM_MAP) && (mem_size != NULL)) { ucp_mem_print_info(mem_size, context, stdout); } if (print_opts & PRINT_UCP_CONTEXT) { ucp_context_print_info(context, stdout); print_resource_usage(&usage, "UCP context"); } if (!(print_opts & (PRINT_UCP_WORKER|PRINT_UCP_EP))) { goto out_cleanup_context; } worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; worker_params.thread_mode = UCS_THREAD_MODE_MULTI; get_resource_usage(&usage); status = ucp_worker_create(context, &worker_params, &worker); if (status != UCS_OK) { printf("<Failed to create UCP worker>\n"); goto out_cleanup_context; } if (print_opts & PRINT_UCP_WORKER) { ucp_worker_print_info(worker, stdout); print_resource_usage(&usage, "UCP worker"); } if (print_opts & PRINT_UCP_EP) { status = ucp_worker_get_address(worker, &address, &address_length); if (status != UCS_OK) { printf("<Failed to get UCP worker address>\n"); goto out_destroy_worker; } ep_params = *base_ep_params; ep_params.field_mask |= UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = address; status = ucp_ep_create(worker, &ep_params, &ep); ucp_worker_release_address(worker, address); if (status != UCS_OK) { printf("<Failed to create UCP endpoint>\n"); goto out_destroy_worker; } ucp_ep_print_info(ep, stdout); status_ptr = ucp_disconnect_nb(ep); if (UCS_PTR_IS_PTR(status_ptr)) { do { ucp_worker_progress(worker); status = ucp_request_test(status_ptr, NULL); } while (status == UCS_INPROGRESS); ucp_request_release(status_ptr); } } out_destroy_worker: ucp_worker_destroy(worker); out_cleanup_context: ucp_cleanup(context); out_release_config: ucp_config_release(config); }
int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) { size_t i, j, n; int rc = OSHMEM_ERROR; int my_rank = oshmem_my_proc_id(); ucs_status_t err; ucp_address_t *wk_local_addr; size_t wk_addr_len; int *wk_roffs = NULL; int *wk_rsizes = NULL; char *wk_raddrs = NULL; ucp_ep_params_t ep_params; mca_spml_ucx_ctx_default.ucp_peers = (ucp_peer_t *) calloc(nprocs, sizeof(*(mca_spml_ucx_ctx_default.ucp_peers))); if (NULL == mca_spml_ucx_ctx_default.ucp_peers) { goto error; } err = ucp_worker_get_address(mca_spml_ucx_ctx_default.ucp_worker, &wk_local_addr, &wk_addr_len); if (err != UCS_OK) { goto error; } dump_address(my_rank, (char *)wk_local_addr, wk_addr_len); rc = oshmem_shmem_xchng(wk_local_addr, wk_addr_len, nprocs, (void **)&wk_raddrs, &wk_roffs, &wk_rsizes); if (rc != OSHMEM_SUCCESS) { goto error; } opal_progress_register(spml_ucx_default_progress); mca_spml_ucx.remote_addrs_tbl = (char **)calloc(nprocs, sizeof(char *)); memset(mca_spml_ucx.remote_addrs_tbl, 0, nprocs * sizeof(char *)); /* Get the EP connection requests for all the processes from modex */ for (n = 0; n < nprocs; ++n) { i = (my_rank + n) % nprocs; dump_address(i, (char *)(wk_raddrs + wk_roffs[i]), wk_rsizes[i]); ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = (ucp_address_t *)(wk_raddrs + wk_roffs[i]); err = ucp_ep_create(mca_spml_ucx_ctx_default.ucp_worker, &ep_params, &mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn); if (UCS_OK != err) { SPML_UCX_ERROR("ucp_ep_create(proc=%zu/%zu) failed: %s", n, nprocs, ucs_status_string(err)); goto error2; } OSHMEM_PROC_DATA(procs[i])->num_transports = 1; OSHMEM_PROC_DATA(procs[i])->transport_ids = spml_ucx_transport_ids; for (j = 0; j < MCA_MEMHEAP_SEG_COUNT; j++) { mca_spml_ucx_ctx_default.ucp_peers[i].mkeys[j].key.rkey = NULL; } mca_spml_ucx.remote_addrs_tbl[i] = (char *)malloc(wk_rsizes[i]); memcpy(mca_spml_ucx.remote_addrs_tbl[i], (char *)(wk_raddrs + wk_roffs[i]), wk_rsizes[i]); } ucp_worker_release_address(mca_spml_ucx_ctx_default.ucp_worker, wk_local_addr); free(wk_raddrs); free(wk_rsizes); free(wk_roffs); SPML_UCX_VERBOSE(50, "*** ADDED PROCS ***"); return OSHMEM_SUCCESS; error2: for (i = 0; i < nprocs; ++i) { if (mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn) { ucp_ep_destroy(mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn); } if (mca_spml_ucx.remote_addrs_tbl[i]) { free(mca_spml_ucx.remote_addrs_tbl[i]); } } if (mca_spml_ucx_ctx_default.ucp_peers) free(mca_spml_ucx_ctx_default.ucp_peers); if (mca_spml_ucx.remote_addrs_tbl) free(mca_spml_ucx.remote_addrs_tbl); free(wk_raddrs); free(wk_rsizes); free(wk_roffs); error: rc = OSHMEM_ERR_OUT_OF_RESOURCE; SPML_UCX_ERROR("add procs FAILED rc=%d", rc); return rc; }
static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model) { ompi_osc_ucx_module_t *module = NULL; char *name = NULL; long values[2]; int ret = OMPI_SUCCESS; ucs_status_t status; int i, comm_size = ompi_comm_size(comm); int is_eps_ready; bool eps_created = false, worker_created = false; ucp_address_t *my_addr = NULL; size_t my_addr_len; char *recv_buf = NULL; void *rkey_buffer = NULL, *state_rkey_buffer = NULL; size_t rkey_buffer_size, state_rkey_buffer_size; void *state_base = NULL; void * my_info = NULL; size_t my_info_len; int disps[comm_size]; int rkey_sizes[comm_size]; /* the osc/sm component is the exclusive provider for support for * shared memory windows */ if (flavor == MPI_WIN_FLAVOR_SHARED) { return OMPI_ERR_NOT_SUPPORTED; } /* if UCP worker has never been initialized before, init it first */ if (mca_osc_ucx_component.ucp_worker == NULL) { ucp_worker_params_t worker_params; ucp_worker_attr_t worker_attr; memset(&worker_params, 0, sizeof(ucp_worker_h)); worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; worker_params.thread_mode = (mca_osc_ucx_component.enable_mpi_threads == true) ? UCS_THREAD_MODE_MULTI : UCS_THREAD_MODE_SINGLE; status = ucp_worker_create(mca_osc_ucx_component.ucp_context, &worker_params, &(mca_osc_ucx_component.ucp_worker)); if (UCS_OK != status) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_worker_create failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } /* query UCP worker attributes */ worker_attr.field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE; status = ucp_worker_query(mca_osc_ucx_component.ucp_worker, &worker_attr); if (UCS_OK != status) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_worker_query failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } if (mca_osc_ucx_component.enable_mpi_threads == true && worker_attr.thread_mode != UCS_THREAD_MODE_MULTI) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucx does not support multithreading\n", __FILE__, __LINE__); ret = OMPI_ERROR; goto error; } worker_created = true; } /* create module structure */ module = (ompi_osc_ucx_module_t *)calloc(1, sizeof(ompi_osc_ucx_module_t)); if (module == NULL) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; goto error; } /* fill in the function pointer part */ memcpy(module, &ompi_osc_ucx_module_template, sizeof(ompi_osc_base_module_t)); ret = ompi_comm_dup(comm, &module->comm); if (ret != OMPI_SUCCESS) { goto error; } asprintf(&name, "ucx window %d", ompi_comm_get_cid(module->comm)); ompi_win_set_name(win, name); free(name); /* share everyone's displacement units. Only do an allgather if strictly necessary, since it requires O(p) state. */ values[0] = disp_unit; values[1] = -disp_unit; ret = module->comm->c_coll->coll_allreduce(MPI_IN_PLACE, values, 2, MPI_LONG, MPI_MIN, module->comm, module->comm->c_coll->coll_allreduce_module); if (OMPI_SUCCESS != ret) { goto error; } if (values[0] == -values[1]) { /* everyone has the same disp_unit, we do not need O(p) space */ module->disp_unit = disp_unit; } else { /* different disp_unit sizes, allocate O(p) space to store them */ module->disp_unit = -1; module->disp_units = calloc(comm_size, sizeof(int)); if (module->disp_units == NULL) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; goto error; } ret = module->comm->c_coll->coll_allgather(&disp_unit, 1, MPI_INT, module->disp_units, 1, MPI_INT, module->comm, module->comm->c_coll->coll_allgather_module); if (OMPI_SUCCESS != ret) { goto error; } } /* exchange endpoints if necessary */ is_eps_ready = 1; for (i = 0; i < comm_size; i++) { if (OSC_UCX_GET_EP(module->comm, i) == NULL) { is_eps_ready = 0; break; } } ret = module->comm->c_coll->coll_allreduce(MPI_IN_PLACE, &is_eps_ready, 1, MPI_INT, MPI_LAND, module->comm, module->comm->c_coll->coll_allreduce_module); if (OMPI_SUCCESS != ret) { goto error; } if (!is_eps_ready) { status = ucp_worker_get_address(mca_osc_ucx_component.ucp_worker, &my_addr, &my_addr_len); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_worker_get_address failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } ret = allgather_len_and_info(my_addr, (int)my_addr_len, &recv_buf, disps, module->comm); if (ret != OMPI_SUCCESS) { goto error; } for (i = 0; i < comm_size; i++) { if (OSC_UCX_GET_EP(module->comm, i) == NULL) { ucp_ep_params_t ep_params; ucp_ep_h ep; memset(&ep_params, 0, sizeof(ucp_ep_params_t)); ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = (ucp_address_t *)&(recv_buf[disps[i]]); status = ucp_ep_create(mca_osc_ucx_component.ucp_worker, &ep_params, &ep); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_ep_create failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } ompi_comm_peer_lookup(module->comm, i)->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_UCX] = ep; } } ucp_worker_release_address(mca_osc_ucx_component.ucp_worker, my_addr); my_addr = NULL; free(recv_buf); recv_buf = NULL; eps_created = true; } ret = mem_map(base, size, &(module->memh), module, flavor); if (ret != OMPI_SUCCESS) { goto error; } state_base = (void *)&(module->state); ret = mem_map(&state_base, sizeof(ompi_osc_ucx_state_t), &(module->state_memh), module, MPI_WIN_FLAVOR_CREATE); if (ret != OMPI_SUCCESS) { goto error; } module->win_info_array = calloc(comm_size, sizeof(ompi_osc_ucx_win_info_t)); if (module->win_info_array == NULL) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; goto error; } module->state_info_array = calloc(comm_size, sizeof(ompi_osc_ucx_win_info_t)); if (module->state_info_array == NULL) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; goto error; } status = ucp_rkey_pack(mca_osc_ucx_component.ucp_context, module->memh, &rkey_buffer, &rkey_buffer_size); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_rkey_pack failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } status = ucp_rkey_pack(mca_osc_ucx_component.ucp_context, module->state_memh, &state_rkey_buffer, &state_rkey_buffer_size); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_rkey_pack failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } my_info_len = 2 * sizeof(uint64_t) + rkey_buffer_size + state_rkey_buffer_size; my_info = malloc(my_info_len); if (my_info == NULL) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; goto error; } memcpy(my_info, base, sizeof(uint64_t)); memcpy((void *)((char *)my_info + sizeof(uint64_t)), &state_base, sizeof(uint64_t)); memcpy((void *)((char *)my_info + 2 * sizeof(uint64_t)), rkey_buffer, rkey_buffer_size); memcpy((void *)((char *)my_info + 2 * sizeof(uint64_t) + rkey_buffer_size), state_rkey_buffer, state_rkey_buffer_size); ret = allgather_len_and_info(my_info, (int)my_info_len, &recv_buf, disps, module->comm); if (ret != OMPI_SUCCESS) { goto error; } ret = comm->c_coll->coll_allgather((void *)&rkey_buffer_size, 1, MPI_INT, rkey_sizes, 1, MPI_INT, comm, comm->c_coll->coll_allgather_module); if (OMPI_SUCCESS != ret) { goto error; } for (i = 0; i < comm_size; i++) { ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, i); assert(ep != NULL); memcpy(&(module->win_info_array[i]).addr, &recv_buf[disps[i]], sizeof(uint64_t)); memcpy(&(module->state_info_array[i]).addr, &recv_buf[disps[i] + sizeof(uint64_t)], sizeof(uint64_t)); status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t)]), &((module->win_info_array[i]).rkey)); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_ep_rkey_unpack failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t) + rkey_sizes[i]]), &((module->state_info_array[i]).rkey)); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_ep_rkey_unpack failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } } free(my_info); free(recv_buf); ucp_rkey_buffer_release(rkey_buffer); ucp_rkey_buffer_release(state_rkey_buffer); module->state.lock = TARGET_LOCK_UNLOCKED; module->state.post_index = 0; memset((void *)module->state.post_state, 0, sizeof(uint64_t) * OMPI_OSC_UCX_POST_PEER_MAX); module->state.complete_count = 0; module->state.req_flag = 0; module->state.acc_lock = TARGET_LOCK_UNLOCKED; module->epoch_type.access = NONE_EPOCH; module->epoch_type.exposure = NONE_EPOCH; module->lock_count = 0; module->post_count = 0; module->start_group = NULL; module->post_group = NULL; OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t); OBJ_CONSTRUCT(&module->pending_posts, opal_list_t); module->global_ops_num = 0; module->per_target_ops_nums = calloc(comm_size, sizeof(int)); module->start_grp_ranks = NULL; module->lock_all_is_nocheck = false; ret = opal_hash_table_init(&module->outstanding_locks, comm_size); if (ret != OPAL_SUCCESS) { goto error; } win->w_osc_module = &module->super; /* sync with everyone */ ret = module->comm->c_coll->coll_barrier(module->comm, module->comm->c_coll->coll_barrier_module); if (ret != OMPI_SUCCESS) { goto error; } return ret; error: if (my_addr) ucp_worker_release_address(mca_osc_ucx_component.ucp_worker, my_addr); if (recv_buf) free(recv_buf); if (my_info) free(my_info); for (i = 0; i < comm_size; i++) { if ((module->win_info_array[i]).rkey != NULL) { ucp_rkey_destroy((module->win_info_array[i]).rkey); } if ((module->state_info_array[i]).rkey != NULL) { ucp_rkey_destroy((module->state_info_array[i]).rkey); } } if (rkey_buffer) ucp_rkey_buffer_release(rkey_buffer); if (state_rkey_buffer) ucp_rkey_buffer_release(state_rkey_buffer); if (module->win_info_array) free(module->win_info_array); if (module->state_info_array) free(module->state_info_array); if (module->disp_units) free(module->disp_units); if (module->comm) ompi_comm_free(&module->comm); if (module->per_target_ops_nums) free(module->per_target_ops_nums); if (eps_created) { for (i = 0; i < comm_size; i++) { ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, i); ucp_ep_destroy(ep); } } if (worker_created) ucp_worker_destroy(mca_osc_ucx_component.ucp_worker); if (module) free(module); return ret; }