int mca_pml_ucx_init(void) { ucs_status_t status; int rc; PML_UCX_VERBOSE(1, "mca_pml_ucx_init"); /* TODO check MPI thread mode */ status = ucp_worker_create(ompi_pml_ucx.ucp_context, UCS_THREAD_MODE_SINGLE, &ompi_pml_ucx.ucp_worker); if (UCS_OK != status) { return OMPI_ERROR; } rc = mca_pml_ucx_send_worker_address(); if (rc < 0) { return rc; } /* Initialize the free lists */ OBJ_CONSTRUCT(&ompi_pml_ucx.persistent_reqs, mca_pml_ucx_freelist_t); OBJ_CONSTRUCT(&ompi_pml_ucx.convs, mca_pml_ucx_freelist_t); /* Create a completed request to be returned from isend */ OBJ_CONSTRUCT(&ompi_pml_ucx.completed_send_req, ompi_request_t); mca_pml_ucx_completed_request_init(&ompi_pml_ucx.completed_send_req); opal_progress_register(mca_pml_ucx_progress); PML_UCX_VERBOSE(2, "created ucp context %p, worker %p", (void *)ompi_pml_ucx.ucp_context, (void *)ompi_pml_ucx.ucp_worker); return OMPI_SUCCESS; }
static ucs_status_t ucp_perf_setup(ucx_perf_context_t *perf, ucx_perf_params_t *params) { ucp_params_t ucp_params; ucp_config_t *config; ucs_status_t status; uint64_t features; status = ucp_perf_test_check_params(params, &features); if (status != UCS_OK) { goto err; } status = ucp_config_read(NULL, NULL, &config); if (status != UCS_OK) { goto err; } ucp_params.features = features; ucp_params.request_size = 0; ucp_params.request_init = NULL; ucp_params.request_cleanup = NULL; status = ucp_init(&ucp_params, config, &perf->ucp.context); ucp_config_release(config); if (status != UCS_OK) { goto err; } status = ucp_worker_create(perf->ucp.context, params->thread_mode, &perf->ucp.worker); if (status != UCS_OK) { goto err_cleanup; } status = ucp_perf_test_alloc_mem(perf, params); if (status != UCS_OK) { ucs_warn("ucp test failed to alocate memory"); goto err_destroy_worker; } status = ucp_perf_test_setup_endpoints(perf, features); if (status != UCS_OK) { if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_error("Failed to setup endpoints: %s", ucs_status_string(status)); } goto err_free_mem; } return UCS_OK; err_free_mem: ucp_perf_test_free_mem(perf); err_destroy_worker: ucp_worker_destroy(perf->ucp.worker); err_cleanup: ucp_cleanup(perf->ucp.context); err: return status; }
static int spml_ucx_init(void) { ucp_worker_params_t params; ucs_status_t err; params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; params.thread_mode = UCS_THREAD_MODE_SINGLE; err = ucp_worker_create(mca_spml_ucx.ucp_context, ¶ms, &mca_spml_ucx.ucp_worker); if (UCS_OK != err) { return OSHMEM_ERROR; } return OSHMEM_SUCCESS; }
int mlx_ep_open( struct fid_domain *domain, struct fi_info *info, struct fid_ep **fid, void *context) { struct mlx_ep *ep; struct mlx_domain *u_domain; int ofi_status = FI_SUCCESS; ucs_status_t status = UCS_OK; ucp_worker_params_t worker_params; worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; worker_params.thread_mode = UCS_THREAD_MODE_MULTI; u_domain = container_of( domain, struct mlx_domain, u_domain.domain_fid); ep = (struct mlx_ep *) calloc(1, sizeof (struct mlx_ep)); if (!ep) { return -ENOMEM; } ofi_status = ofi_endpoint_init(domain, &mlx_util_prov, info, &ep->ep, context, mlx_ep_progress); if (ofi_status) { goto free_ep; } status = ucp_worker_create( u_domain->context, &worker_params, &(ep->worker)); if (status != UCS_OK) { ofi_status = MLX_TRANSLATE_ERRCODE(status); ofi_atomic_dec32(&(u_domain->u_domain.ref)); goto free_ep; } ep->ep.ep_fid.fid.ops = &mlx_fi_ops; ep->ep.ep_fid.ops = &mlx_ep_ops; ep->ep.ep_fid.cm = &mlx_cm_ops; ep->ep.ep_fid.tagged = &mlx_tagged_ops; ep->ep.flags = info->mode; ep->ep.caps = u_domain->u_domain.info_domain_caps; *fid = &(ep->ep.ep_fid); return FI_SUCCESS; free_ep: free(ep); return ofi_status; }
static ucs_status_t ucp_perf_setup(ucx_perf_context_t *perf, ucx_perf_params_t *params) { ucp_params_t ucp_params; ucp_worker_params_t worker_params; ucp_config_t *config; ucs_status_t status; ucp_params.field_mask = UCP_PARAM_FIELD_FEATURES; ucp_params.features = 0; status = ucp_perf_test_fill_params(params, &ucp_params); if (status != UCS_OK) { goto err; } status = ucp_config_read(NULL, NULL, &config); if (status != UCS_OK) { goto err; } status = ucp_init(&ucp_params, config, &perf->ucp.context); ucp_config_release(config); if (status != UCS_OK) { goto err; } worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; worker_params.thread_mode = params->thread_mode; status = ucp_worker_create(perf->ucp.context, &worker_params, &perf->ucp.worker); if (status != UCS_OK) { goto err_cleanup; } status = ucp_perf_test_alloc_mem(perf, params); if (status != UCS_OK) { ucs_warn("ucp test failed to alocate memory"); goto err_destroy_worker; } status = ucp_perf_test_setup_endpoints(perf, ucp_params.features); if (status != UCS_OK) { if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_error("Failed to setup endpoints: %s", ucs_status_string(status)); } goto err_free_mem; } return UCS_OK; err_free_mem: ucp_perf_test_free_mem(perf); err_destroy_worker: ucp_worker_destroy(perf->ucp.worker); err_cleanup: ucp_cleanup(perf->ucp.context); err: return status; }
void print_ucp_info(int print_opts, ucs_config_print_flags_t print_flags, uint64_t features) { ucp_config_t *config; ucs_status_t status; ucp_context_h context; ucp_worker_h worker; ucp_params_t params; ucp_worker_params_t worker_params; ucp_ep_params_t ep_params; ucp_address_t *address; size_t address_length; ucp_ep_h ep; status = ucp_config_read(NULL, NULL, &config); if (status != UCS_OK) { return; } memset(¶ms, 0, sizeof(params)); params.features = features; status = ucp_init(¶ms, config, &context); if (status != UCS_OK) { printf("<Failed to create UCP context>\n"); goto out_release_config; } if (print_opts & PRINT_UCP_CONTEXT) { ucp_context_print_info(context, stdout); } if (!(print_opts & (PRINT_UCP_WORKER|PRINT_UCP_EP))) { goto out_cleanup_context; } worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; worker_params.thread_mode = UCS_THREAD_MODE_MULTI; status = ucp_worker_create(context, &worker_params, &worker); if (status != UCS_OK) { printf("<Failed to create UCP worker>\n"); goto out_cleanup_context; } if (print_opts & PRINT_UCP_WORKER) { ucp_worker_print_info(worker, stdout); } if (print_opts & PRINT_UCP_EP) { status = ucp_worker_get_address(worker, &address, &address_length); if (status != UCS_OK) { printf("<Failed to get UCP worker address>\n"); goto out_destroy_worker; } ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = address; status = ucp_ep_create(worker, &ep_params, &ep); ucp_worker_release_address(worker, address); if (status != UCS_OK) { printf("<Failed to get create UCP endpoint>\n"); goto out_destroy_worker; } ucp_ep_print_info(ep, stdout); ucp_ep_destroy(ep); } out_destroy_worker: ucp_worker_destroy(worker); out_cleanup_context: ucp_cleanup(context); out_release_config: ucp_config_release(config); }
int main(int argc, char **argv) { /* UCP temporary vars */ ucp_params_t ucp_params; ucp_config_t *config; ucs_status_t status; /* UCP handler objects */ ucp_context_h ucp_context; ucp_worker_h ucp_worker; /* OOB connection vars */ uint64_t addr_len = 0; char *server = NULL; int oob_sock = -1; int ret = -1; /* Parse the command line */ if (parse_cmd(argc, argv, &server) != UCS_OK) { goto err; } /* UCP initialization */ status = ucp_config_read(NULL, NULL, &config); if (status != UCS_OK) { goto err; } ucp_params.features = UCP_FEATURE_TAG; if (ucp_test_mode == TEST_MODE_WAIT || ucp_test_mode == TEST_MODE_EVENTFD) { ucp_params.features |= UCP_FEATURE_WAKEUP; } ucp_params.request_size = sizeof(struct ucx_context); ucp_params.request_init = request_init; ucp_params.request_cleanup = NULL; status = ucp_init(&ucp_params, config, &ucp_context); ucp_config_print(config, stdout, NULL, UCS_CONFIG_PRINT_CONFIG); ucp_config_release(config); if (status != UCS_OK) { goto err; } status = ucp_worker_create(ucp_context, UCS_THREAD_MODE_SINGLE, &ucp_worker); if (status != UCS_OK) { goto err_cleanup; } status = ucp_worker_get_address(ucp_worker, &local_addr, &local_addr_len); if (status != UCS_OK) { goto err_worker; } printf("[0x%x] local address length: %zu\n", (unsigned int)pthread_self(), local_addr_len); /* OOB connection establishment */ if (server) { peer_addr_len = local_addr_len; oob_sock = run_client(server); if (oob_sock < 0) { goto err_addr; } ret = recv(oob_sock, &addr_len, sizeof(addr_len), 0); if (ret < 0) { fprintf(stderr, "failed to receive address length\n"); goto err_addr; } peer_addr_len = addr_len; peer_addr = malloc(peer_addr_len); if (!peer_addr) { fprintf(stderr, "unable to allocate memory\n"); goto err_addr; } ret = recv(oob_sock, peer_addr, peer_addr_len, 0); if (ret < 0) { fprintf(stderr, "failed to receive address\n"); goto err_peer_addr; } } else { oob_sock = run_server(); if (oob_sock < 0) { goto err_peer_addr; } addr_len = local_addr_len; ret = send(oob_sock, &addr_len, sizeof(addr_len), 0); if (ret < 0 || ret != sizeof(addr_len)) { fprintf(stderr, "failed to send address length\n"); goto err_peer_addr; } ret = send(oob_sock, local_addr, local_addr_len, 0); if (ret < 0 || ret != local_addr_len) { fprintf(stderr, "failed to send address\n"); goto err_peer_addr; } } close(oob_sock); ret = run_test(server, ucp_worker); err_peer_addr: free(peer_addr); err_addr: ucp_worker_release_address(ucp_worker, local_addr); err_worker: ucp_worker_destroy(ucp_worker); err_cleanup: ucp_cleanup(ucp_context); err: return ret; }
void print_ucp_info(int print_opts, ucs_config_print_flags_t print_flags, uint64_t ctx_features, const ucp_ep_params_t *base_ep_params, size_t estimated_num_eps, unsigned dev_type_bitmap, const char *mem_size) { ucp_config_t *config; ucs_status_t status; ucs_status_ptr_t status_ptr; ucp_context_h context; ucp_worker_h worker; ucp_params_t params; ucp_worker_params_t worker_params; ucp_ep_params_t ep_params; ucp_address_t *address; size_t address_length; resource_usage_t usage; ucp_ep_h ep; status = ucp_config_read(NULL, NULL, &config); if (status != UCS_OK) { return; } memset(¶ms, 0, sizeof(params)); params.field_mask = UCP_PARAM_FIELD_FEATURES | UCP_PARAM_FIELD_ESTIMATED_NUM_EPS; params.features = ctx_features; params.estimated_num_eps = estimated_num_eps; get_resource_usage(&usage); if (!(dev_type_bitmap & UCS_BIT(UCT_DEVICE_TYPE_SELF))) { ucp_config_modify(config, "SELF_DEVICES", ""); } if (!(dev_type_bitmap & UCS_BIT(UCT_DEVICE_TYPE_SHM))) { ucp_config_modify(config, "SHM_DEVICES", ""); } if (!(dev_type_bitmap & UCS_BIT(UCT_DEVICE_TYPE_NET))) { ucp_config_modify(config, "NET_DEVICES", ""); } status = ucp_init(¶ms, config, &context); if (status != UCS_OK) { printf("<Failed to create UCP context>\n"); goto out_release_config; } if ((print_opts & PRINT_MEM_MAP) && (mem_size != NULL)) { ucp_mem_print_info(mem_size, context, stdout); } if (print_opts & PRINT_UCP_CONTEXT) { ucp_context_print_info(context, stdout); print_resource_usage(&usage, "UCP context"); } if (!(print_opts & (PRINT_UCP_WORKER|PRINT_UCP_EP))) { goto out_cleanup_context; } worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; worker_params.thread_mode = UCS_THREAD_MODE_MULTI; get_resource_usage(&usage); status = ucp_worker_create(context, &worker_params, &worker); if (status != UCS_OK) { printf("<Failed to create UCP worker>\n"); goto out_cleanup_context; } if (print_opts & PRINT_UCP_WORKER) { ucp_worker_print_info(worker, stdout); print_resource_usage(&usage, "UCP worker"); } if (print_opts & PRINT_UCP_EP) { status = ucp_worker_get_address(worker, &address, &address_length); if (status != UCS_OK) { printf("<Failed to get UCP worker address>\n"); goto out_destroy_worker; } ep_params = *base_ep_params; ep_params.field_mask |= UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = address; status = ucp_ep_create(worker, &ep_params, &ep); ucp_worker_release_address(worker, address); if (status != UCS_OK) { printf("<Failed to create UCP endpoint>\n"); goto out_destroy_worker; } ucp_ep_print_info(ep, stdout); status_ptr = ucp_disconnect_nb(ep); if (UCS_PTR_IS_PTR(status_ptr)) { do { ucp_worker_progress(worker); status = ucp_request_test(status_ptr, NULL); } while (status == UCS_INPROGRESS); ucp_request_release(status_ptr); } } out_destroy_worker: ucp_worker_destroy(worker); out_cleanup_context: ucp_cleanup(context); out_release_config: ucp_config_release(config); }
int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx) { mca_spml_ucx_ctx_t *ucx_ctx; ucp_worker_params_t params; ucp_ep_params_t ep_params; size_t i, j, nprocs = oshmem_num_procs(); ucs_status_t err; int my_pe = oshmem_my_proc_id(); size_t len; spml_ucx_mkey_t *ucx_mkey; sshmem_mkey_t *mkey; int rc = OSHMEM_ERROR; ucx_ctx = malloc(sizeof(mca_spml_ucx_ctx_t)); ucx_ctx->options = options; params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; if (oshmem_mpi_thread_provided == SHMEM_THREAD_SINGLE || options & SHMEM_CTX_PRIVATE || options & SHMEM_CTX_SERIALIZED) { params.thread_mode = UCS_THREAD_MODE_SINGLE; } else { params.thread_mode = UCS_THREAD_MODE_MULTI; } err = ucp_worker_create(mca_spml_ucx.ucp_context, ¶ms, &ucx_ctx->ucp_worker); if (UCS_OK != err) { free(ucx_ctx); return OSHMEM_ERROR; } ucx_ctx->ucp_peers = (ucp_peer_t *) calloc(nprocs, sizeof(*(ucx_ctx->ucp_peers))); if (NULL == ucx_ctx->ucp_peers) { goto error; } if (mca_spml_ucx.active_array.ctxs_count == 0) { opal_progress_register(spml_ucx_ctx_progress); } for (i = 0; i < nprocs; i++) { ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = (ucp_address_t *)(mca_spml_ucx.remote_addrs_tbl[i]); err = ucp_ep_create(ucx_ctx->ucp_worker, &ep_params, &ucx_ctx->ucp_peers[i].ucp_conn); if (UCS_OK != err) { SPML_ERROR("ucp_ep_create(proc=%d/%d) failed: %s", i, nprocs, ucs_status_string(err)); goto error2; } for (j = 0; j < MCA_MEMHEAP_SEG_COUNT; j++) { mkey = &memheap_map->mem_segs[j].mkeys_cache[i][0]; ucx_mkey = &ucx_ctx->ucp_peers[i].mkeys[j].key; err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[i].ucp_conn, mkey->u.data, &ucx_mkey->rkey); if (UCS_OK != err) { SPML_UCX_ERROR("failed to unpack rkey"); goto error2; } mca_spml_ucx_cache_mkey(ucx_ctx, mkey, j, i); } } SHMEM_MUTEX_LOCK(mca_spml_ucx.internal_mutex); _ctx_add(&mca_spml_ucx.active_array, ucx_ctx); SHMEM_MUTEX_UNLOCK(mca_spml_ucx.internal_mutex); (*ctx) = (shmem_ctx_t)ucx_ctx; return OSHMEM_SUCCESS; error2: for (i = 0; i < nprocs; i++) { if (ucx_ctx->ucp_peers[i].ucp_conn) { ucp_ep_destroy(ucx_ctx->ucp_peers[i].ucp_conn); } } if (ucx_ctx->ucp_peers) free(ucx_ctx->ucp_peers); error: ucp_worker_destroy(ucx_ctx->ucp_worker); free(ucx_ctx); rc = OSHMEM_ERR_OUT_OF_RESOURCE; SPML_ERROR("ctx create FAILED rc=%d", rc); return rc; }
static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model) { ompi_osc_ucx_module_t *module = NULL; char *name = NULL; long values[2]; int ret = OMPI_SUCCESS; ucs_status_t status; int i, comm_size = ompi_comm_size(comm); int is_eps_ready; bool eps_created = false, worker_created = false; ucp_address_t *my_addr = NULL; size_t my_addr_len; char *recv_buf = NULL; void *rkey_buffer = NULL, *state_rkey_buffer = NULL; size_t rkey_buffer_size, state_rkey_buffer_size; void *state_base = NULL; void * my_info = NULL; size_t my_info_len; int disps[comm_size]; int rkey_sizes[comm_size]; /* the osc/sm component is the exclusive provider for support for * shared memory windows */ if (flavor == MPI_WIN_FLAVOR_SHARED) { return OMPI_ERR_NOT_SUPPORTED; } /* if UCP worker has never been initialized before, init it first */ if (mca_osc_ucx_component.ucp_worker == NULL) { ucp_worker_params_t worker_params; ucp_worker_attr_t worker_attr; memset(&worker_params, 0, sizeof(ucp_worker_h)); worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; worker_params.thread_mode = (mca_osc_ucx_component.enable_mpi_threads == true) ? UCS_THREAD_MODE_MULTI : UCS_THREAD_MODE_SINGLE; status = ucp_worker_create(mca_osc_ucx_component.ucp_context, &worker_params, &(mca_osc_ucx_component.ucp_worker)); if (UCS_OK != status) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_worker_create failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } /* query UCP worker attributes */ worker_attr.field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE; status = ucp_worker_query(mca_osc_ucx_component.ucp_worker, &worker_attr); if (UCS_OK != status) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_worker_query failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } if (mca_osc_ucx_component.enable_mpi_threads == true && worker_attr.thread_mode != UCS_THREAD_MODE_MULTI) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucx does not support multithreading\n", __FILE__, __LINE__); ret = OMPI_ERROR; goto error; } worker_created = true; } /* create module structure */ module = (ompi_osc_ucx_module_t *)calloc(1, sizeof(ompi_osc_ucx_module_t)); if (module == NULL) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; goto error; } /* fill in the function pointer part */ memcpy(module, &ompi_osc_ucx_module_template, sizeof(ompi_osc_base_module_t)); ret = ompi_comm_dup(comm, &module->comm); if (ret != OMPI_SUCCESS) { goto error; } asprintf(&name, "ucx window %d", ompi_comm_get_cid(module->comm)); ompi_win_set_name(win, name); free(name); /* share everyone's displacement units. Only do an allgather if strictly necessary, since it requires O(p) state. */ values[0] = disp_unit; values[1] = -disp_unit; ret = module->comm->c_coll->coll_allreduce(MPI_IN_PLACE, values, 2, MPI_LONG, MPI_MIN, module->comm, module->comm->c_coll->coll_allreduce_module); if (OMPI_SUCCESS != ret) { goto error; } if (values[0] == -values[1]) { /* everyone has the same disp_unit, we do not need O(p) space */ module->disp_unit = disp_unit; } else { /* different disp_unit sizes, allocate O(p) space to store them */ module->disp_unit = -1; module->disp_units = calloc(comm_size, sizeof(int)); if (module->disp_units == NULL) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; goto error; } ret = module->comm->c_coll->coll_allgather(&disp_unit, 1, MPI_INT, module->disp_units, 1, MPI_INT, module->comm, module->comm->c_coll->coll_allgather_module); if (OMPI_SUCCESS != ret) { goto error; } } /* exchange endpoints if necessary */ is_eps_ready = 1; for (i = 0; i < comm_size; i++) { if (OSC_UCX_GET_EP(module->comm, i) == NULL) { is_eps_ready = 0; break; } } ret = module->comm->c_coll->coll_allreduce(MPI_IN_PLACE, &is_eps_ready, 1, MPI_INT, MPI_LAND, module->comm, module->comm->c_coll->coll_allreduce_module); if (OMPI_SUCCESS != ret) { goto error; } if (!is_eps_ready) { status = ucp_worker_get_address(mca_osc_ucx_component.ucp_worker, &my_addr, &my_addr_len); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_worker_get_address failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } ret = allgather_len_and_info(my_addr, (int)my_addr_len, &recv_buf, disps, module->comm); if (ret != OMPI_SUCCESS) { goto error; } for (i = 0; i < comm_size; i++) { if (OSC_UCX_GET_EP(module->comm, i) == NULL) { ucp_ep_params_t ep_params; ucp_ep_h ep; memset(&ep_params, 0, sizeof(ucp_ep_params_t)); ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = (ucp_address_t *)&(recv_buf[disps[i]]); status = ucp_ep_create(mca_osc_ucx_component.ucp_worker, &ep_params, &ep); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_ep_create failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } ompi_comm_peer_lookup(module->comm, i)->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_UCX] = ep; } } ucp_worker_release_address(mca_osc_ucx_component.ucp_worker, my_addr); my_addr = NULL; free(recv_buf); recv_buf = NULL; eps_created = true; } ret = mem_map(base, size, &(module->memh), module, flavor); if (ret != OMPI_SUCCESS) { goto error; } state_base = (void *)&(module->state); ret = mem_map(&state_base, sizeof(ompi_osc_ucx_state_t), &(module->state_memh), module, MPI_WIN_FLAVOR_CREATE); if (ret != OMPI_SUCCESS) { goto error; } module->win_info_array = calloc(comm_size, sizeof(ompi_osc_ucx_win_info_t)); if (module->win_info_array == NULL) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; goto error; } module->state_info_array = calloc(comm_size, sizeof(ompi_osc_ucx_win_info_t)); if (module->state_info_array == NULL) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; goto error; } status = ucp_rkey_pack(mca_osc_ucx_component.ucp_context, module->memh, &rkey_buffer, &rkey_buffer_size); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_rkey_pack failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } status = ucp_rkey_pack(mca_osc_ucx_component.ucp_context, module->state_memh, &state_rkey_buffer, &state_rkey_buffer_size); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_rkey_pack failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } my_info_len = 2 * sizeof(uint64_t) + rkey_buffer_size + state_rkey_buffer_size; my_info = malloc(my_info_len); if (my_info == NULL) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; goto error; } memcpy(my_info, base, sizeof(uint64_t)); memcpy((void *)((char *)my_info + sizeof(uint64_t)), &state_base, sizeof(uint64_t)); memcpy((void *)((char *)my_info + 2 * sizeof(uint64_t)), rkey_buffer, rkey_buffer_size); memcpy((void *)((char *)my_info + 2 * sizeof(uint64_t) + rkey_buffer_size), state_rkey_buffer, state_rkey_buffer_size); ret = allgather_len_and_info(my_info, (int)my_info_len, &recv_buf, disps, module->comm); if (ret != OMPI_SUCCESS) { goto error; } ret = comm->c_coll->coll_allgather((void *)&rkey_buffer_size, 1, MPI_INT, rkey_sizes, 1, MPI_INT, comm, comm->c_coll->coll_allgather_module); if (OMPI_SUCCESS != ret) { goto error; } for (i = 0; i < comm_size; i++) { ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, i); assert(ep != NULL); memcpy(&(module->win_info_array[i]).addr, &recv_buf[disps[i]], sizeof(uint64_t)); memcpy(&(module->state_info_array[i]).addr, &recv_buf[disps[i] + sizeof(uint64_t)], sizeof(uint64_t)); status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t)]), &((module->win_info_array[i]).rkey)); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_ep_rkey_unpack failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t) + rkey_sizes[i]]), &((module->state_info_array[i]).rkey)); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_ep_rkey_unpack failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } } free(my_info); free(recv_buf); ucp_rkey_buffer_release(rkey_buffer); ucp_rkey_buffer_release(state_rkey_buffer); module->state.lock = TARGET_LOCK_UNLOCKED; module->state.post_index = 0; memset((void *)module->state.post_state, 0, sizeof(uint64_t) * OMPI_OSC_UCX_POST_PEER_MAX); module->state.complete_count = 0; module->state.req_flag = 0; module->state.acc_lock = TARGET_LOCK_UNLOCKED; module->epoch_type.access = NONE_EPOCH; module->epoch_type.exposure = NONE_EPOCH; module->lock_count = 0; module->post_count = 0; module->start_group = NULL; module->post_group = NULL; OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t); OBJ_CONSTRUCT(&module->pending_posts, opal_list_t); module->global_ops_num = 0; module->per_target_ops_nums = calloc(comm_size, sizeof(int)); module->start_grp_ranks = NULL; module->lock_all_is_nocheck = false; ret = opal_hash_table_init(&module->outstanding_locks, comm_size); if (ret != OPAL_SUCCESS) { goto error; } win->w_osc_module = &module->super; /* sync with everyone */ ret = module->comm->c_coll->coll_barrier(module->comm, module->comm->c_coll->coll_barrier_module); if (ret != OMPI_SUCCESS) { goto error; } return ret; error: if (my_addr) ucp_worker_release_address(mca_osc_ucx_component.ucp_worker, my_addr); if (recv_buf) free(recv_buf); if (my_info) free(my_info); for (i = 0; i < comm_size; i++) { if ((module->win_info_array[i]).rkey != NULL) { ucp_rkey_destroy((module->win_info_array[i]).rkey); } if ((module->state_info_array[i]).rkey != NULL) { ucp_rkey_destroy((module->state_info_array[i]).rkey); } } if (rkey_buffer) ucp_rkey_buffer_release(rkey_buffer); if (state_rkey_buffer) ucp_rkey_buffer_release(state_rkey_buffer); if (module->win_info_array) free(module->win_info_array); if (module->state_info_array) free(module->state_info_array); if (module->disp_units) free(module->disp_units); if (module->comm) ompi_comm_free(&module->comm); if (module->per_target_ops_nums) free(module->per_target_ops_nums); if (eps_created) { for (i = 0; i < comm_size; i++) { ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, i); ucp_ep_destroy(ep); } } if (worker_created) ucp_worker_destroy(mca_osc_ucx_component.ucp_worker); if (module) free(module); return ret; }