void mca_spml_ucx_rmkey_unpack(shmem_ctx_t ctx, sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id) { spml_ucx_mkey_t *ucx_mkey; mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; ucs_status_t err; ucx_mkey = &ucx_ctx->ucp_peers[pe].mkeys[segno].key; err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[pe].ucp_conn, mkey->u.data, &ucx_mkey->rkey); if (UCS_OK != err) { SPML_UCX_ERROR("failed to unpack rkey: %s", ucs_status_string(err)); goto error_fatal; } if (ucx_ctx == &mca_spml_ucx_ctx_default) { mkey->spml_context = ucx_mkey; } mca_spml_ucx_cache_mkey(ucx_ctx, mkey, segno, pe); return; error_fatal: oshmem_shmem_abort(-1); return; }
void mca_spml_ucx_rmkey_unpack(sshmem_mkey_t *mkey, int pe) { spml_ucx_mkey_t *ucx_mkey; ucs_status_t err; ucx_mkey = (spml_ucx_mkey_t *)malloc(sizeof(*ucx_mkey)); if (!ucx_mkey) { SPML_ERROR("not enough memory to allocate mkey"); goto error_fatal; } err = ucp_ep_rkey_unpack(mca_spml_ucx.ucp_peers[pe].ucp_conn, mkey->u.data, &ucx_mkey->rkey); if (UCS_OK != err) { SPML_ERROR("failed to unpack rkey"); goto error_fatal; } mkey->spml_context = ucx_mkey; return; error_fatal: oshmem_shmem_abort(-1); return; }
static inline int get_dynamic_win_info(uint64_t remote_addr, ompi_osc_ucx_module_t *module, ucp_ep_h ep, int target) { ucp_rkey_h state_rkey = (module->state_info_array)[target].rkey; uint64_t remote_state_addr = (module->state_info_array)[target].addr + OSC_UCX_STATE_DYNAMIC_WIN_CNT_OFFSET; size_t len = sizeof(uint64_t) + sizeof(ompi_osc_dynamic_win_info_t) * OMPI_OSC_UCX_ATTACH_MAX; char *temp_buf = malloc(len); ompi_osc_dynamic_win_info_t *temp_dynamic_wins; int win_count, contain, insert = -1; ucs_status_t status; if ((module->win_info_array[target]).rkey_init == true) { ucp_rkey_destroy((module->win_info_array[target]).rkey); (module->win_info_array[target]).rkey_init == false; } status = ucp_get_nbi(ep, (void *)temp_buf, len, remote_state_addr, state_rkey); if (status != UCS_OK && status != UCS_INPROGRESS) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_get_nbi failed: %d\n", __FILE__, __LINE__, status); return OMPI_ERROR; } status = ucp_ep_flush(ep); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_ep_flush failed: %d\n", __FILE__, __LINE__, status); return OMPI_ERROR; } memcpy(&win_count, temp_buf, sizeof(uint64_t)); assert(win_count > 0 && win_count <= OMPI_OSC_UCX_ATTACH_MAX); temp_dynamic_wins = (ompi_osc_dynamic_win_info_t *)(temp_buf + sizeof(uint64_t)); contain = ompi_osc_find_attached_region_position(temp_dynamic_wins, 0, win_count, remote_addr, 1, &insert); assert(contain >= 0 && contain < win_count); status = ucp_ep_rkey_unpack(ep, temp_dynamic_wins[contain].rkey_buffer, &((module->win_info_array[target]).rkey)); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_ep_rkey_unpack failed: %d\n", __FILE__, __LINE__, status); return OMPI_ERROR; } (module->win_info_array[target]).rkey_init = true; free(temp_buf); return status; }
void mca_spml_ucx_rmkey_unpack(sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id) { spml_ucx_mkey_t *ucx_mkey; ucs_status_t err; ucx_mkey = &mca_spml_ucx.ucp_peers[pe].mkeys[segno].key; err = ucp_ep_rkey_unpack(mca_spml_ucx.ucp_peers[pe].ucp_conn, mkey->u.data, &ucx_mkey->rkey); if (UCS_OK != err) { SPML_ERROR("failed to unpack rkey"); goto error_fatal; } mkey->spml_context = ucx_mkey; mca_spml_ucx_cache_mkey(mkey, segno, pe); return; error_fatal: oshmem_shmem_abort(-1); return; }
static ucs_status_t ucp_perf_test_setup_endpoints(ucx_perf_context_t *perf, uint64_t features) { const size_t buffer_size = 2048; ucx_perf_ep_info_t info, *remote_info; unsigned group_size, i, group_index; ucp_address_t *address; size_t address_length = 0; ucp_ep_params_t ep_params; ucs_status_t status; struct iovec vec[3]; void *rkey_buffer; void *req = NULL; void *buffer; group_size = rte_call(perf, group_size); group_index = rte_call(perf, group_index); status = ucp_worker_get_address(perf->ucp.worker, &address, &address_length); if (status != UCS_OK) { if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_error("ucp_worker_get_address() failed: %s", ucs_status_string(status)); } goto err; } info.ucp.addr_len = address_length; info.recv_buffer = (uintptr_t)perf->recv_buffer; vec[0].iov_base = &info; vec[0].iov_len = sizeof(info); vec[1].iov_base = address; vec[1].iov_len = address_length; if (features & (UCP_FEATURE_RMA|UCP_FEATURE_AMO32|UCP_FEATURE_AMO64)) { status = ucp_rkey_pack(perf->ucp.context, perf->ucp.recv_memh, &rkey_buffer, &info.rkey_size); if (status != UCS_OK) { if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_error("ucp_rkey_pack() failed: %s", ucs_status_string(status)); } ucp_worker_release_address(perf->ucp.worker, address); goto err; } vec[2].iov_base = rkey_buffer; vec[2].iov_len = info.rkey_size; rte_call(perf, post_vec, vec, 3, &req); ucp_rkey_buffer_release(rkey_buffer); } else { info.rkey_size = 0; rte_call(perf, post_vec, vec, 2, &req); } ucp_worker_release_address(perf->ucp.worker, address); rte_call(perf, exchange_vec, req); perf->ucp.peers = calloc(group_size, sizeof(*perf->uct.peers)); if (perf->ucp.peers == NULL) { goto err; } buffer = malloc(buffer_size); if (buffer == NULL) { ucs_error("Failed to allocate RTE receive buffer"); status = UCS_ERR_NO_MEMORY; goto err_destroy_eps; } for (i = 0; i < group_size; ++i) { if (i == group_index) { continue; } rte_call(perf, recv, i, buffer, buffer_size, req); remote_info = buffer; address = (void*)(remote_info + 1); rkey_buffer = (void*)address + remote_info->ucp.addr_len; perf->ucp.peers[i].remote_addr = remote_info->recv_buffer; ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = address; status = ucp_ep_create(perf->ucp.worker, &ep_params, &perf->ucp.peers[i].ep); if (status != UCS_OK) { if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_error("ucp_ep_create() failed: %s", ucs_status_string(status)); } goto err_free_buffer; } if (remote_info->rkey_size > 0) { status = ucp_ep_rkey_unpack(perf->ucp.peers[i].ep, rkey_buffer, &perf->ucp.peers[i].rkey); if (status != UCS_OK) { if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_fatal("ucp_rkey_unpack() failed: %s", ucs_status_string(status)); } goto err_free_buffer; } } else { perf->ucp.peers[i].rkey = NULL; } } free(buffer); status = ucp_perf_test_exchange_status(perf, UCS_OK); if (status != UCS_OK) { ucp_perf_test_destroy_eps(perf, group_size); } return status; err_free_buffer: free(buffer); err_destroy_eps: ucp_perf_test_destroy_eps(perf, group_size); err: (void)ucp_perf_test_exchange_status(perf, status); return status; }
sshmem_mkey_t *mca_spml_ucx_register(void* addr, size_t size, uint64_t shmid, int *count) { sshmem_mkey_t *mkeys; ucs_status_t err; spml_ucx_mkey_t *ucx_mkey; size_t len; *count = 0; mkeys = (sshmem_mkey_t *) calloc(1, sizeof(*mkeys)); if (!mkeys) { return NULL ; } ucx_mkey = (spml_ucx_mkey_t *)malloc(sizeof(*ucx_mkey)); if (!ucx_mkey) { goto error_out; } mkeys[0].spml_context = ucx_mkey; err = ucp_mem_map(mca_spml_ucx.ucp_context, &addr, size, 0, &ucx_mkey->mem_h); if (UCS_OK != err) { goto error_out1; } err = ucp_rkey_pack(mca_spml_ucx.ucp_context, ucx_mkey->mem_h, &mkeys[0].u.data, &len); if (UCS_OK != err) { goto error_unmap; } if (len >= 0xffff) { SPML_ERROR("packed rkey is too long: %llu >= %d", (unsigned long long)len, 0xffff); oshmem_shmem_abort(-1); } err = ucp_ep_rkey_unpack(mca_spml_ucx.ucp_peers[oshmem_group_self->my_pe].ucp_conn, mkeys[0].u.data, &ucx_mkey->rkey); if (UCS_OK != err) { SPML_ERROR("failed to unpack rkey"); goto error_unmap; } mkeys[0].len = len; mkeys[0].va_base = addr; *count = 1; return mkeys; error_unmap: ucp_mem_unmap(mca_spml_ucx.ucp_context, ucx_mkey->mem_h); error_out1: free(ucx_mkey); error_out: free(mkeys); return NULL ; }
static ucs_status_t ucp_perf_test_setup_endpoints(ucx_perf_context_t *perf, uint64_t features) { unsigned group_size, i, group_index; ucp_address_t *address; size_t address_length = 0; ucs_status_t status; struct iovec vec[3]; void *rkey_buffer; size_t rkey_size; void *req = NULL; int iov_len; group_size = rte_call(perf, group_size); group_index = rte_call(perf, group_index); status = ucp_worker_get_address(perf->ucp.worker, &address, &address_length); if (status != UCS_OK) { if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_error("ucp_worker_get_address() failed: %s", ucs_status_string(status)); } goto err; } vec[0].iov_base = address; vec[0].iov_len = address_length; vec[1].iov_base = &perf->recv_buffer; vec[1].iov_len = sizeof(uintptr_t); if (features & (UCP_FEATURE_RMA|UCP_FEATURE_AMO32|UCP_FEATURE_AMO64)) { status = ucp_rkey_pack(perf->ucp.context, perf->ucp.recv_memh, &rkey_buffer, &rkey_size); if (status != UCS_OK) { if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_error("ucp_rkey_pack() failed: %s", ucs_status_string(status)); } ucp_worker_release_address(perf->ucp.worker, address); goto err; } vec[2].iov_base = rkey_buffer; vec[2].iov_len = rkey_size; iov_len = 3; } else { rkey_buffer = NULL; iov_len = 2; } rte_call(perf, post_vec, vec, iov_len, &req); if (rkey_buffer != NULL) { ucp_rkey_buffer_release(rkey_buffer); } ucp_worker_release_address(perf->ucp.worker, address); rte_call(perf, exchange_vec, req); perf->ucp.peers = calloc(group_size, sizeof(*perf->uct.peers)); if (perf->ucp.peers == NULL) { goto err; } for (i = 0; i < group_size; ++i) { if (i == group_index) { continue; } address = malloc(address_length); rkey_buffer = NULL; vec[0].iov_base = address; vec[0].iov_len = address_length; vec[1].iov_base = &perf->ucp.peers[i].remote_addr; vec[1].iov_len = sizeof(uintptr_t); if (iov_len > 2) { rkey_buffer = malloc(rkey_size); vec[2].iov_base = rkey_buffer; vec[2].iov_len = rkey_size; } rte_call(perf, recv_vec, i, vec, iov_len, req); status = ucp_ep_create(perf->ucp.worker, address, &perf->ucp.peers[i].ep); if (status != UCS_OK) { if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_error("ucp_ep_create() failed: %s", ucs_status_string(status)); } free(rkey_buffer); free(address); goto err_destroy_eps; } free(address); if (iov_len > 2) { status = ucp_ep_rkey_unpack(perf->ucp.peers[i].ep, rkey_buffer, &perf->ucp.peers[i].rkey); if (status != UCS_OK) { if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { ucs_error("ucp_rkey_unpack() failed: %s", ucs_status_string(status)); } free(rkey_buffer); goto err_destroy_eps; } } else { perf->ucp.peers[i].rkey = NULL; } free(rkey_buffer); } status = ucp_perf_test_exchange_status(perf, UCS_OK); if (status != UCS_OK) { ucp_perf_test_destroy_eps(perf, group_size); } return status; err_destroy_eps: ucp_perf_test_destroy_eps(perf, group_size); err: (void)ucp_perf_test_exchange_status(perf, status); return status; }
int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx) { mca_spml_ucx_ctx_t *ucx_ctx; ucp_worker_params_t params; ucp_ep_params_t ep_params; size_t i, j, nprocs = oshmem_num_procs(); ucs_status_t err; int my_pe = oshmem_my_proc_id(); size_t len; spml_ucx_mkey_t *ucx_mkey; sshmem_mkey_t *mkey; int rc = OSHMEM_ERROR; ucx_ctx = malloc(sizeof(mca_spml_ucx_ctx_t)); ucx_ctx->options = options; params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; if (oshmem_mpi_thread_provided == SHMEM_THREAD_SINGLE || options & SHMEM_CTX_PRIVATE || options & SHMEM_CTX_SERIALIZED) { params.thread_mode = UCS_THREAD_MODE_SINGLE; } else { params.thread_mode = UCS_THREAD_MODE_MULTI; } err = ucp_worker_create(mca_spml_ucx.ucp_context, ¶ms, &ucx_ctx->ucp_worker); if (UCS_OK != err) { free(ucx_ctx); return OSHMEM_ERROR; } ucx_ctx->ucp_peers = (ucp_peer_t *) calloc(nprocs, sizeof(*(ucx_ctx->ucp_peers))); if (NULL == ucx_ctx->ucp_peers) { goto error; } if (mca_spml_ucx.active_array.ctxs_count == 0) { opal_progress_register(spml_ucx_ctx_progress); } for (i = 0; i < nprocs; i++) { ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = (ucp_address_t *)(mca_spml_ucx.remote_addrs_tbl[i]); err = ucp_ep_create(ucx_ctx->ucp_worker, &ep_params, &ucx_ctx->ucp_peers[i].ucp_conn); if (UCS_OK != err) { SPML_ERROR("ucp_ep_create(proc=%d/%d) failed: %s", i, nprocs, ucs_status_string(err)); goto error2; } for (j = 0; j < MCA_MEMHEAP_SEG_COUNT; j++) { mkey = &memheap_map->mem_segs[j].mkeys_cache[i][0]; ucx_mkey = &ucx_ctx->ucp_peers[i].mkeys[j].key; err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[i].ucp_conn, mkey->u.data, &ucx_mkey->rkey); if (UCS_OK != err) { SPML_UCX_ERROR("failed to unpack rkey"); goto error2; } mca_spml_ucx_cache_mkey(ucx_ctx, mkey, j, i); } } SHMEM_MUTEX_LOCK(mca_spml_ucx.internal_mutex); _ctx_add(&mca_spml_ucx.active_array, ucx_ctx); SHMEM_MUTEX_UNLOCK(mca_spml_ucx.internal_mutex); (*ctx) = (shmem_ctx_t)ucx_ctx; return OSHMEM_SUCCESS; error2: for (i = 0; i < nprocs; i++) { if (ucx_ctx->ucp_peers[i].ucp_conn) { ucp_ep_destroy(ucx_ctx->ucp_peers[i].ucp_conn); } } if (ucx_ctx->ucp_peers) free(ucx_ctx->ucp_peers); error: ucp_worker_destroy(ucx_ctx->ucp_worker); free(ucx_ctx); rc = OSHMEM_ERR_OUT_OF_RESOURCE; SPML_ERROR("ctx create FAILED rc=%d", rc); return rc; }
sshmem_mkey_t *mca_spml_ucx_register(void* addr, size_t size, uint64_t shmid, int *count) { sshmem_mkey_t *mkeys; ucs_status_t status; spml_ucx_mkey_t *ucx_mkey; size_t len; ucp_mem_map_params_t mem_map_params; int segno; map_segment_t *mem_seg; unsigned flags; int my_pe = oshmem_my_proc_id(); *count = 0; mkeys = (sshmem_mkey_t *) calloc(1, sizeof(*mkeys)); if (!mkeys) { return NULL; } segno = memheap_find_segnum(addr); mem_seg = memheap_find_seg(segno); ucx_mkey = &mca_spml_ucx_ctx_default.ucp_peers[my_pe].mkeys[segno].key; mkeys[0].spml_context = ucx_mkey; /* if possible use mem handle already created by ucx allocator */ if (MAP_SEGMENT_ALLOC_UCX != mem_seg->type) { flags = 0; if (mca_spml_ucx.heap_reg_nb && memheap_is_va_in_segment(addr, HEAP_SEG_INDEX)) { flags = UCP_MEM_MAP_NONBLOCK; } mem_map_params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | UCP_MEM_MAP_PARAM_FIELD_LENGTH | UCP_MEM_MAP_PARAM_FIELD_FLAGS; mem_map_params.address = addr; mem_map_params.length = size; mem_map_params.flags = flags; status = ucp_mem_map(mca_spml_ucx.ucp_context, &mem_map_params, &ucx_mkey->mem_h); if (UCS_OK != status) { goto error_out; } } else { ucx_mkey->mem_h = (ucp_mem_h)mem_seg->context; } status = ucp_rkey_pack(mca_spml_ucx.ucp_context, ucx_mkey->mem_h, &mkeys[0].u.data, &len); if (UCS_OK != status) { goto error_unmap; } if (len >= 0xffff) { SPML_UCX_ERROR("packed rkey is too long: %llu >= %d", (unsigned long long)len, 0xffff); oshmem_shmem_abort(-1); } status = ucp_ep_rkey_unpack(mca_spml_ucx_ctx_default.ucp_peers[oshmem_group_self->my_pe].ucp_conn, mkeys[0].u.data, &ucx_mkey->rkey); if (UCS_OK != status) { SPML_UCX_ERROR("failed to unpack rkey"); goto error_unmap; } mkeys[0].len = len; mkeys[0].va_base = addr; *count = 1; mca_spml_ucx_cache_mkey(&mca_spml_ucx_ctx_default, &mkeys[0], segno, my_pe); return mkeys; error_unmap: ucp_mem_unmap(mca_spml_ucx.ucp_context, ucx_mkey->mem_h); error_out: free(mkeys); return NULL ; }
static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model) { ompi_osc_ucx_module_t *module = NULL; char *name = NULL; long values[2]; int ret = OMPI_SUCCESS; ucs_status_t status; int i, comm_size = ompi_comm_size(comm); int is_eps_ready; bool eps_created = false, worker_created = false; ucp_address_t *my_addr = NULL; size_t my_addr_len; char *recv_buf = NULL; void *rkey_buffer = NULL, *state_rkey_buffer = NULL; size_t rkey_buffer_size, state_rkey_buffer_size; void *state_base = NULL; void * my_info = NULL; size_t my_info_len; int disps[comm_size]; int rkey_sizes[comm_size]; /* the osc/sm component is the exclusive provider for support for * shared memory windows */ if (flavor == MPI_WIN_FLAVOR_SHARED) { return OMPI_ERR_NOT_SUPPORTED; } /* if UCP worker has never been initialized before, init it first */ if (mca_osc_ucx_component.ucp_worker == NULL) { ucp_worker_params_t worker_params; ucp_worker_attr_t worker_attr; memset(&worker_params, 0, sizeof(ucp_worker_h)); worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; worker_params.thread_mode = (mca_osc_ucx_component.enable_mpi_threads == true) ? UCS_THREAD_MODE_MULTI : UCS_THREAD_MODE_SINGLE; status = ucp_worker_create(mca_osc_ucx_component.ucp_context, &worker_params, &(mca_osc_ucx_component.ucp_worker)); if (UCS_OK != status) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_worker_create failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } /* query UCP worker attributes */ worker_attr.field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE; status = ucp_worker_query(mca_osc_ucx_component.ucp_worker, &worker_attr); if (UCS_OK != status) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_worker_query failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } if (mca_osc_ucx_component.enable_mpi_threads == true && worker_attr.thread_mode != UCS_THREAD_MODE_MULTI) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucx does not support multithreading\n", __FILE__, __LINE__); ret = OMPI_ERROR; goto error; } worker_created = true; } /* create module structure */ module = (ompi_osc_ucx_module_t *)calloc(1, sizeof(ompi_osc_ucx_module_t)); if (module == NULL) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; goto error; } /* fill in the function pointer part */ memcpy(module, &ompi_osc_ucx_module_template, sizeof(ompi_osc_base_module_t)); ret = ompi_comm_dup(comm, &module->comm); if (ret != OMPI_SUCCESS) { goto error; } asprintf(&name, "ucx window %d", ompi_comm_get_cid(module->comm)); ompi_win_set_name(win, name); free(name); /* share everyone's displacement units. Only do an allgather if strictly necessary, since it requires O(p) state. */ values[0] = disp_unit; values[1] = -disp_unit; ret = module->comm->c_coll->coll_allreduce(MPI_IN_PLACE, values, 2, MPI_LONG, MPI_MIN, module->comm, module->comm->c_coll->coll_allreduce_module); if (OMPI_SUCCESS != ret) { goto error; } if (values[0] == -values[1]) { /* everyone has the same disp_unit, we do not need O(p) space */ module->disp_unit = disp_unit; } else { /* different disp_unit sizes, allocate O(p) space to store them */ module->disp_unit = -1; module->disp_units = calloc(comm_size, sizeof(int)); if (module->disp_units == NULL) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; goto error; } ret = module->comm->c_coll->coll_allgather(&disp_unit, 1, MPI_INT, module->disp_units, 1, MPI_INT, module->comm, module->comm->c_coll->coll_allgather_module); if (OMPI_SUCCESS != ret) { goto error; } } /* exchange endpoints if necessary */ is_eps_ready = 1; for (i = 0; i < comm_size; i++) { if (OSC_UCX_GET_EP(module->comm, i) == NULL) { is_eps_ready = 0; break; } } ret = module->comm->c_coll->coll_allreduce(MPI_IN_PLACE, &is_eps_ready, 1, MPI_INT, MPI_LAND, module->comm, module->comm->c_coll->coll_allreduce_module); if (OMPI_SUCCESS != ret) { goto error; } if (!is_eps_ready) { status = ucp_worker_get_address(mca_osc_ucx_component.ucp_worker, &my_addr, &my_addr_len); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_worker_get_address failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } ret = allgather_len_and_info(my_addr, (int)my_addr_len, &recv_buf, disps, module->comm); if (ret != OMPI_SUCCESS) { goto error; } for (i = 0; i < comm_size; i++) { if (OSC_UCX_GET_EP(module->comm, i) == NULL) { ucp_ep_params_t ep_params; ucp_ep_h ep; memset(&ep_params, 0, sizeof(ucp_ep_params_t)); ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = (ucp_address_t *)&(recv_buf[disps[i]]); status = ucp_ep_create(mca_osc_ucx_component.ucp_worker, &ep_params, &ep); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_ep_create failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } ompi_comm_peer_lookup(module->comm, i)->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_UCX] = ep; } } ucp_worker_release_address(mca_osc_ucx_component.ucp_worker, my_addr); my_addr = NULL; free(recv_buf); recv_buf = NULL; eps_created = true; } ret = mem_map(base, size, &(module->memh), module, flavor); if (ret != OMPI_SUCCESS) { goto error; } state_base = (void *)&(module->state); ret = mem_map(&state_base, sizeof(ompi_osc_ucx_state_t), &(module->state_memh), module, MPI_WIN_FLAVOR_CREATE); if (ret != OMPI_SUCCESS) { goto error; } module->win_info_array = calloc(comm_size, sizeof(ompi_osc_ucx_win_info_t)); if (module->win_info_array == NULL) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; goto error; } module->state_info_array = calloc(comm_size, sizeof(ompi_osc_ucx_win_info_t)); if (module->state_info_array == NULL) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; goto error; } status = ucp_rkey_pack(mca_osc_ucx_component.ucp_context, module->memh, &rkey_buffer, &rkey_buffer_size); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_rkey_pack failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } status = ucp_rkey_pack(mca_osc_ucx_component.ucp_context, module->state_memh, &state_rkey_buffer, &state_rkey_buffer_size); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_rkey_pack failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } my_info_len = 2 * sizeof(uint64_t) + rkey_buffer_size + state_rkey_buffer_size; my_info = malloc(my_info_len); if (my_info == NULL) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; goto error; } memcpy(my_info, base, sizeof(uint64_t)); memcpy((void *)((char *)my_info + sizeof(uint64_t)), &state_base, sizeof(uint64_t)); memcpy((void *)((char *)my_info + 2 * sizeof(uint64_t)), rkey_buffer, rkey_buffer_size); memcpy((void *)((char *)my_info + 2 * sizeof(uint64_t) + rkey_buffer_size), state_rkey_buffer, state_rkey_buffer_size); ret = allgather_len_and_info(my_info, (int)my_info_len, &recv_buf, disps, module->comm); if (ret != OMPI_SUCCESS) { goto error; } ret = comm->c_coll->coll_allgather((void *)&rkey_buffer_size, 1, MPI_INT, rkey_sizes, 1, MPI_INT, comm, comm->c_coll->coll_allgather_module); if (OMPI_SUCCESS != ret) { goto error; } for (i = 0; i < comm_size; i++) { ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, i); assert(ep != NULL); memcpy(&(module->win_info_array[i]).addr, &recv_buf[disps[i]], sizeof(uint64_t)); memcpy(&(module->state_info_array[i]).addr, &recv_buf[disps[i] + sizeof(uint64_t)], sizeof(uint64_t)); status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t)]), &((module->win_info_array[i]).rkey)); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_ep_rkey_unpack failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t) + rkey_sizes[i]]), &((module->state_info_array[i]).rkey)); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_ep_rkey_unpack failed: %d\n", __FILE__, __LINE__, status); ret = OMPI_ERROR; goto error; } } free(my_info); free(recv_buf); ucp_rkey_buffer_release(rkey_buffer); ucp_rkey_buffer_release(state_rkey_buffer); module->state.lock = TARGET_LOCK_UNLOCKED; module->state.post_index = 0; memset((void *)module->state.post_state, 0, sizeof(uint64_t) * OMPI_OSC_UCX_POST_PEER_MAX); module->state.complete_count = 0; module->state.req_flag = 0; module->state.acc_lock = TARGET_LOCK_UNLOCKED; module->epoch_type.access = NONE_EPOCH; module->epoch_type.exposure = NONE_EPOCH; module->lock_count = 0; module->post_count = 0; module->start_group = NULL; module->post_group = NULL; OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t); OBJ_CONSTRUCT(&module->pending_posts, opal_list_t); module->global_ops_num = 0; module->per_target_ops_nums = calloc(comm_size, sizeof(int)); module->start_grp_ranks = NULL; module->lock_all_is_nocheck = false; ret = opal_hash_table_init(&module->outstanding_locks, comm_size); if (ret != OPAL_SUCCESS) { goto error; } win->w_osc_module = &module->super; /* sync with everyone */ ret = module->comm->c_coll->coll_barrier(module->comm, module->comm->c_coll->coll_barrier_module); if (ret != OMPI_SUCCESS) { goto error; } return ret; error: if (my_addr) ucp_worker_release_address(mca_osc_ucx_component.ucp_worker, my_addr); if (recv_buf) free(recv_buf); if (my_info) free(my_info); for (i = 0; i < comm_size; i++) { if ((module->win_info_array[i]).rkey != NULL) { ucp_rkey_destroy((module->win_info_array[i]).rkey); } if ((module->state_info_array[i]).rkey != NULL) { ucp_rkey_destroy((module->state_info_array[i]).rkey); } } if (rkey_buffer) ucp_rkey_buffer_release(rkey_buffer); if (state_rkey_buffer) ucp_rkey_buffer_release(state_rkey_buffer); if (module->win_info_array) free(module->win_info_array); if (module->state_info_array) free(module->state_info_array); if (module->disp_units) free(module->disp_units); if (module->comm) ompi_comm_free(&module->comm); if (module->per_target_ops_nums) free(module->per_target_ops_nums); if (eps_created) { for (i = 0; i < comm_size; i++) { ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, i); ucp_ep_destroy(ep); } } if (worker_created) ucp_worker_destroy(mca_osc_ucx_component.ucp_worker); if (module) free(module); return ret; }