예제 #1
0
파일: libperf.c 프로젝트: xinzhao3/ucx
static void ucp_perf_test_destroy_eps(ucx_perf_context_t* perf,
                                      unsigned group_size)
{
    ucs_status_ptr_t    *reqs;
    ucp_tag_recv_info_t info;
    ucs_status_t        status;
    unsigned i;

    reqs = calloc(sizeof(*reqs), group_size);

    for (i = 0; i < group_size; ++i) {
        if (perf->ucp.peers[i].rkey != NULL) {
            ucp_rkey_destroy(perf->ucp.peers[i].rkey);
        }
        if (perf->ucp.peers[i].ep != NULL) {
            reqs[i] = ucp_disconnect_nb(perf->ucp.peers[i].ep);
        }
    }

    for (i = 0; i < group_size; ++i) {
        if (!UCS_PTR_IS_PTR(reqs[i])) {
            continue;
        }

        do {
            ucp_worker_progress(perf->ucp.worker);
            status = ucp_request_test(reqs[i], &info);
        } while (status == UCS_INPROGRESS);
        ucp_request_release(reqs[i]);
    }

    free(reqs);
    free(perf->ucp.peers);
}
예제 #2
0
int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys)
{
    spml_ucx_mkey_t   *ucx_mkey;
    map_segment_t *mem_seg;
    int segno;
    int my_pe = oshmem_my_proc_id();

    MCA_SPML_CALL(quiet(oshmem_ctx_default));
    if (!mkeys)
        return OSHMEM_SUCCESS;

    if (!mkeys[0].spml_context)
        return OSHMEM_SUCCESS;

    mem_seg  = memheap_find_va(mkeys[0].va_base);
    ucx_mkey = (spml_ucx_mkey_t*)mkeys[0].spml_context;

    if (OPAL_UNLIKELY(NULL == mem_seg)) {
        return OSHMEM_ERROR;
    }
    
    if (MAP_SEGMENT_ALLOC_UCX != mem_seg->type) {
        ucp_mem_unmap(mca_spml_ucx.ucp_context, ucx_mkey->mem_h);
    }
    ucp_rkey_destroy(ucx_mkey->rkey);
    ucx_mkey->rkey = NULL;

    if (0 < mkeys[0].len) {
        ucp_rkey_buffer_release(mkeys[0].u.data);
    }

    free(mkeys);

    return OSHMEM_SUCCESS;
}
예제 #3
0
int ompi_osc_ucx_free(struct ompi_win_t *win) {
    ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module;
    int i, ret = OMPI_SUCCESS;

    if ((module->epoch_type.access != NONE_EPOCH && module->epoch_type.access != FENCE_EPOCH)
        || module->epoch_type.exposure != NONE_EPOCH) {
        ret = OMPI_ERR_RMA_SYNC;
    }

    if (module->start_group != NULL || module->post_group != NULL) {
        ret = OMPI_ERR_RMA_SYNC;
    }

    assert(module->global_ops_num == 0);
    assert(module->lock_count == 0);
    assert(opal_list_is_empty(&module->pending_posts) == true);
    OBJ_DESTRUCT(&module->outstanding_locks);
    OBJ_DESTRUCT(&module->pending_posts);

    while (module->state.lock != TARGET_LOCK_UNLOCKED) {
        /* not sure if this is required */
        ucp_worker_progress(mca_osc_ucx_component.ucp_worker);
    }

    ret = module->comm->c_coll->coll_barrier(module->comm,
                                             module->comm->c_coll->coll_barrier_module);

    for (i = 0; i < ompi_comm_size(module->comm); i++) {
        ucp_rkey_destroy((module->win_info_array[i]).rkey);
        ucp_rkey_destroy((module->state_info_array[i]).rkey);
    }
    free(module->win_info_array);
    free(module->state_info_array);

    free(module->per_target_ops_nums);

    ucp_mem_unmap(mca_osc_ucx_component.ucp_context, module->memh);
    ucp_mem_unmap(mca_osc_ucx_component.ucp_context, module->state_memh);

    if (module->disp_units) free(module->disp_units);
    ompi_comm_free(&module->comm);

    free(module);

    return ret;
}
예제 #4
0
void mca_spml_ucx_rmkey_free(sshmem_mkey_t *mkey)
{
    spml_ucx_mkey_t   *ucx_mkey;

    if (!mkey->spml_context) {
        return;
    }
    ucx_mkey = (spml_ucx_mkey_t *)(mkey->spml_context);
    ucp_rkey_destroy(ucx_mkey->rkey);
}
예제 #5
0
파일: osc_ucx_comm.c 프로젝트: bgoglin/ompi
static inline int get_dynamic_win_info(uint64_t remote_addr, ompi_osc_ucx_module_t *module,
                                       ucp_ep_h ep, int target) {
    ucp_rkey_h state_rkey = (module->state_info_array)[target].rkey;
    uint64_t remote_state_addr = (module->state_info_array)[target].addr + OSC_UCX_STATE_DYNAMIC_WIN_CNT_OFFSET;
    size_t len = sizeof(uint64_t) + sizeof(ompi_osc_dynamic_win_info_t) * OMPI_OSC_UCX_ATTACH_MAX;
    char *temp_buf = malloc(len);
    ompi_osc_dynamic_win_info_t *temp_dynamic_wins;
    int win_count, contain, insert = -1;
    ucs_status_t status;

    if ((module->win_info_array[target]).rkey_init == true) {
        ucp_rkey_destroy((module->win_info_array[target]).rkey);
        (module->win_info_array[target]).rkey_init == false;
    }

    status = ucp_get_nbi(ep, (void *)temp_buf, len, remote_state_addr, state_rkey);
    if (status != UCS_OK && status != UCS_INPROGRESS) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: ucp_get_nbi failed: %d\n",
                            __FILE__, __LINE__, status);
        return OMPI_ERROR;
    }

    status = ucp_ep_flush(ep);
    if (status != UCS_OK) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: ucp_ep_flush failed: %d\n",
                            __FILE__, __LINE__, status);
        return OMPI_ERROR;
    }

    memcpy(&win_count, temp_buf, sizeof(uint64_t));
    assert(win_count > 0 && win_count <= OMPI_OSC_UCX_ATTACH_MAX);

    temp_dynamic_wins = (ompi_osc_dynamic_win_info_t *)(temp_buf + sizeof(uint64_t));
    contain = ompi_osc_find_attached_region_position(temp_dynamic_wins, 0, win_count,
                                                     remote_addr, 1, &insert);
    assert(contain >= 0 && contain < win_count);

    status = ucp_ep_rkey_unpack(ep, temp_dynamic_wins[contain].rkey_buffer,
                                &((module->win_info_array[target]).rkey));
    if (status != UCS_OK) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: ucp_ep_rkey_unpack failed: %d\n",
                            __FILE__, __LINE__, status);
        return OMPI_ERROR;
    }

    (module->win_info_array[target]).rkey_init = true;

    free(temp_buf);

    return status;
}
예제 #6
0
파일: libperf.c 프로젝트: brminich/ucx
static void ucp_perf_test_destroy_eps(ucx_perf_context_t* perf,
                                      unsigned group_size)
{
    unsigned i;

    for (i = 0; i < group_size; ++i) {
        if (perf->ucp.peers[i].rkey != NULL) {
            ucp_rkey_destroy(perf->ucp.peers[i].rkey);
        }
        if (perf->ucp.peers[i].ep != NULL) {
            ucp_ep_destroy(perf->ucp.peers[i].ep);
        }
    }
    free(perf->ucp.peers);
}
예제 #7
0
파일: ucp_rkey.c 프로젝트: vspetrov/ucx
ucs_status_t ucp_ep_rkey_unpack(ucp_ep_h ep, void *rkey_buffer, ucp_rkey_h *rkey_p)
{
    unsigned remote_pd_index, remote_pd_gap;
    unsigned rkey_index;
    unsigned pd_count;
    ucs_status_t status;
    ucp_rkey_h rkey;
    uint8_t pd_size;
    ucp_pd_map_t pd_map;
    void *p;

    /* Count the number of remote PDs in the rkey buffer */
    p = rkey_buffer;

    /* Read remote PD map */
    pd_map   = *(ucp_pd_map_t*)p;

    ucs_trace("unpacking rkey with pd_map 0x%x", pd_map);

    if (pd_map == 0) {
        /* Dummy key return ok */
        *rkey_p = &ucp_mem_dummy_rkey;
        return UCS_OK;
    }

    pd_count = ucs_count_one_bits(pd_map);
    p       += sizeof(ucp_pd_map_t);

    /* Allocate rkey handle which holds UCT rkeys for all remote PDs.
     * We keep all of them to handle a future transport switch.
     */
    rkey = ucs_malloc(sizeof(*rkey) + (sizeof(rkey->uct[0]) * pd_count), "ucp_rkey");
    if (rkey == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto err;
    }

    rkey->pd_map    = 0;
    remote_pd_index = 0; /* Index of remote PD */
    rkey_index      = 0; /* Index of the rkey in the array */

    /* Unpack rkey of each UCT PD */
    while (pd_map > 0) {
        pd_size = *((uint8_t*)p++);

        /* Use bit operations to iterate through the indices of the remote PDs
         * as provided in the pd_map. pd_map always holds a bitmap of PD indices
         * that remain to be used. Every time we find the "gap" until the next
         * valid PD index using ffs operation. If some rkeys cannot be unpacked,
         * we remove them from the local map.
         */
        remote_pd_gap    = ucs_ffs64(pd_map); /* Find the offset for next PD index */
        remote_pd_index += remote_pd_gap;      /* Calculate next index of remote PD*/
        pd_map >>= remote_pd_gap;                   /* Remove the gap from the map */
        ucs_assert(pd_map & 1);

        /* Unpack only reachable rkeys */
        if (UCS_BIT(remote_pd_index) & ucp_ep_config(ep)->key.reachable_pd_map) {
            ucs_assert(rkey_index < pd_count);
            status = uct_rkey_unpack(p, &rkey->uct[rkey_index]);
            if (status != UCS_OK) {
                ucs_error("Failed to unpack remote key from remote pd[%d]: %s",
                          remote_pd_index, ucs_status_string(status));
                goto err_destroy;
            }

            ucs_trace("rkey[%d] for remote pd %d is 0x%lx", rkey_index,
                      remote_pd_index, rkey->uct[rkey_index].rkey);
            rkey->pd_map |= UCS_BIT(remote_pd_index);
            ++rkey_index;
        }

        ++remote_pd_index;
        pd_map >>= 1;
        p += pd_size;
    }

    if (rkey->pd_map == 0) {
        ucs_debug("The unpacked rkey from the destination is unreachable");
        status = UCS_ERR_UNREACHABLE;
        goto err_destroy;
    }

    *rkey_p = rkey;
    return UCS_OK;

err_destroy:
    ucp_rkey_destroy(rkey);
err:
    return status;
}
예제 #8
0
static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
                            struct ompi_communicator_t *comm, struct opal_info_t *info,
                            int flavor, int *model) {
    ompi_osc_ucx_module_t *module = NULL;
    char *name = NULL;
    long values[2];
    int ret = OMPI_SUCCESS;
    ucs_status_t status;
    int i, comm_size = ompi_comm_size(comm);
    int is_eps_ready;
    bool eps_created = false, worker_created = false;
    ucp_address_t *my_addr = NULL;
    size_t my_addr_len;
    char *recv_buf = NULL;
    void *rkey_buffer = NULL, *state_rkey_buffer = NULL;
    size_t rkey_buffer_size, state_rkey_buffer_size;
    void *state_base = NULL;
    void * my_info = NULL;
    size_t my_info_len;
    int disps[comm_size];
    int rkey_sizes[comm_size];

    /* the osc/sm component is the exclusive provider for support for
     * shared memory windows */
    if (flavor == MPI_WIN_FLAVOR_SHARED) {
        return OMPI_ERR_NOT_SUPPORTED;
    }

    /* if UCP worker has never been initialized before, init it first */
    if (mca_osc_ucx_component.ucp_worker == NULL) {
        ucp_worker_params_t worker_params;
        ucp_worker_attr_t worker_attr;

        memset(&worker_params, 0, sizeof(ucp_worker_h));
        worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE;
        worker_params.thread_mode = (mca_osc_ucx_component.enable_mpi_threads == true)
                                    ? UCS_THREAD_MODE_MULTI : UCS_THREAD_MODE_SINGLE;
        status = ucp_worker_create(mca_osc_ucx_component.ucp_context, &worker_params,
                                   &(mca_osc_ucx_component.ucp_worker));
        if (UCS_OK != status) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucp_worker_create failed: %d\n",
                                __FILE__, __LINE__, status);
            ret = OMPI_ERROR;
            goto error;
        }

        /* query UCP worker attributes */
        worker_attr.field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE;
        status = ucp_worker_query(mca_osc_ucx_component.ucp_worker, &worker_attr);
        if (UCS_OK != status) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucp_worker_query failed: %d\n",
                                __FILE__, __LINE__, status);
            ret = OMPI_ERROR;
            goto error;
        }

        if (mca_osc_ucx_component.enable_mpi_threads == true &&
            worker_attr.thread_mode != UCS_THREAD_MODE_MULTI) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucx does not support multithreading\n",
                                __FILE__, __LINE__);
            ret = OMPI_ERROR;
            goto error;
        }

        worker_created = true;
    }

    /* create module structure */
    module = (ompi_osc_ucx_module_t *)calloc(1, sizeof(ompi_osc_ucx_module_t));
    if (module == NULL) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto error;
    }

    /* fill in the function pointer part */
    memcpy(module, &ompi_osc_ucx_module_template, sizeof(ompi_osc_base_module_t));

    ret = ompi_comm_dup(comm, &module->comm);
    if (ret != OMPI_SUCCESS) {
        goto error;
    }

    asprintf(&name, "ucx window %d", ompi_comm_get_cid(module->comm));
    ompi_win_set_name(win, name);
    free(name);

    /* share everyone's displacement units. Only do an allgather if
       strictly necessary, since it requires O(p) state. */
    values[0] = disp_unit;
    values[1] = -disp_unit;

    ret = module->comm->c_coll->coll_allreduce(MPI_IN_PLACE, values, 2, MPI_LONG,
                                               MPI_MIN, module->comm,
                                               module->comm->c_coll->coll_allreduce_module);
    if (OMPI_SUCCESS != ret) {
        goto error;
    }

    if (values[0] == -values[1]) { /* everyone has the same disp_unit, we do not need O(p) space */
        module->disp_unit = disp_unit;
    } else { /* different disp_unit sizes, allocate O(p) space to store them */
        module->disp_unit = -1;
        module->disp_units = calloc(comm_size, sizeof(int));
        if (module->disp_units == NULL) {
            ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
            goto error;
        }

        ret = module->comm->c_coll->coll_allgather(&disp_unit, 1, MPI_INT,
                                                   module->disp_units, 1, MPI_INT,
                                                   module->comm,
                                                   module->comm->c_coll->coll_allgather_module);
        if (OMPI_SUCCESS != ret) {
            goto error;
        }
    }

    /* exchange endpoints if necessary */
    is_eps_ready = 1;
    for (i = 0; i < comm_size; i++) {
        if (OSC_UCX_GET_EP(module->comm, i) == NULL) {
            is_eps_ready = 0;
            break;
        }
    }

    ret = module->comm->c_coll->coll_allreduce(MPI_IN_PLACE, &is_eps_ready, 1, MPI_INT,
                                               MPI_LAND,
                                               module->comm,
                                               module->comm->c_coll->coll_allreduce_module);
    if (OMPI_SUCCESS != ret) {
        goto error;
    }

    if (!is_eps_ready) {
        status = ucp_worker_get_address(mca_osc_ucx_component.ucp_worker,
                                        &my_addr, &my_addr_len);
        if (status != UCS_OK) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucp_worker_get_address failed: %d\n",
                                __FILE__, __LINE__, status);
            ret = OMPI_ERROR;
            goto error;
        }

        ret = allgather_len_and_info(my_addr, (int)my_addr_len,
                                     &recv_buf, disps, module->comm);
        if (ret != OMPI_SUCCESS) {
            goto error;
        }

        for (i = 0; i < comm_size; i++) {
            if (OSC_UCX_GET_EP(module->comm, i) == NULL) {
                ucp_ep_params_t ep_params;
                ucp_ep_h ep;
                memset(&ep_params, 0, sizeof(ucp_ep_params_t));
                ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS;
                ep_params.address = (ucp_address_t *)&(recv_buf[disps[i]]);
                status = ucp_ep_create(mca_osc_ucx_component.ucp_worker, &ep_params, &ep);
                if (status != UCS_OK) {
                    opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                        "%s:%d: ucp_ep_create failed: %d\n",
                                        __FILE__, __LINE__, status);
                    ret = OMPI_ERROR;
                    goto error;
                }

                ompi_comm_peer_lookup(module->comm, i)->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_UCX] = ep;
            }
        }

        ucp_worker_release_address(mca_osc_ucx_component.ucp_worker, my_addr);
        my_addr = NULL;
        free(recv_buf);
        recv_buf = NULL;

        eps_created = true;
    }

    ret = mem_map(base, size, &(module->memh), module, flavor);
    if (ret != OMPI_SUCCESS) {
        goto error;
    }

    state_base = (void *)&(module->state);
    ret = mem_map(&state_base, sizeof(ompi_osc_ucx_state_t), &(module->state_memh),
                  module, MPI_WIN_FLAVOR_CREATE);
    if (ret != OMPI_SUCCESS) {
        goto error;
    }

    module->win_info_array = calloc(comm_size, sizeof(ompi_osc_ucx_win_info_t));
    if (module->win_info_array == NULL) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto error;
    }

    module->state_info_array = calloc(comm_size, sizeof(ompi_osc_ucx_win_info_t));
    if (module->state_info_array == NULL) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto error;
    }

    status = ucp_rkey_pack(mca_osc_ucx_component.ucp_context, module->memh,
                           &rkey_buffer, &rkey_buffer_size);
    if (status != UCS_OK) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: ucp_rkey_pack failed: %d\n",
                            __FILE__, __LINE__, status);
        ret = OMPI_ERROR;
        goto error;
    }

    status = ucp_rkey_pack(mca_osc_ucx_component.ucp_context, module->state_memh,
                           &state_rkey_buffer, &state_rkey_buffer_size);
    if (status != UCS_OK) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: ucp_rkey_pack failed: %d\n",
                            __FILE__, __LINE__, status);
        ret = OMPI_ERROR;
        goto error;
    }

    my_info_len = 2 * sizeof(uint64_t) + rkey_buffer_size + state_rkey_buffer_size;
    my_info = malloc(my_info_len);
    if (my_info == NULL) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto error;
    }

    memcpy(my_info, base, sizeof(uint64_t));
    memcpy((void *)((char *)my_info + sizeof(uint64_t)), &state_base, sizeof(uint64_t));
    memcpy((void *)((char *)my_info + 2 * sizeof(uint64_t)), rkey_buffer, rkey_buffer_size);
    memcpy((void *)((char *)my_info + 2 * sizeof(uint64_t) + rkey_buffer_size),
           state_rkey_buffer, state_rkey_buffer_size);

    ret = allgather_len_and_info(my_info, (int)my_info_len, &recv_buf, disps, module->comm);
    if (ret != OMPI_SUCCESS) {
        goto error;
    }

    ret = comm->c_coll->coll_allgather((void *)&rkey_buffer_size, 1, MPI_INT,
                                       rkey_sizes, 1, MPI_INT, comm,
                                       comm->c_coll->coll_allgather_module);
    if (OMPI_SUCCESS != ret) {
        goto error;
    }

    for (i = 0; i < comm_size; i++) {
        ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, i);
        assert(ep != NULL);

        memcpy(&(module->win_info_array[i]).addr, &recv_buf[disps[i]], sizeof(uint64_t));
        memcpy(&(module->state_info_array[i]).addr, &recv_buf[disps[i] + sizeof(uint64_t)], 
               sizeof(uint64_t));

        status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t)]),
                                    &((module->win_info_array[i]).rkey));
        if (status != UCS_OK) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucp_ep_rkey_unpack failed: %d\n",
                                __FILE__, __LINE__, status);
            ret = OMPI_ERROR;
            goto error;
        }

        status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t) + rkey_sizes[i]]),
                                    &((module->state_info_array[i]).rkey));
        if (status != UCS_OK) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucp_ep_rkey_unpack failed: %d\n",
                                __FILE__, __LINE__, status);
            ret = OMPI_ERROR;
            goto error;
        }
    }

    free(my_info);
    free(recv_buf);

    ucp_rkey_buffer_release(rkey_buffer);
    ucp_rkey_buffer_release(state_rkey_buffer);

    module->state.lock = TARGET_LOCK_UNLOCKED;
    module->state.post_index = 0;
    memset((void *)module->state.post_state, 0, sizeof(uint64_t) * OMPI_OSC_UCX_POST_PEER_MAX);
    module->state.complete_count = 0;
    module->state.req_flag = 0;
    module->state.acc_lock = TARGET_LOCK_UNLOCKED;
    module->epoch_type.access = NONE_EPOCH;
    module->epoch_type.exposure = NONE_EPOCH;
    module->lock_count = 0;
    module->post_count = 0;
    module->start_group = NULL;
    module->post_group = NULL;
    OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t);
    OBJ_CONSTRUCT(&module->pending_posts, opal_list_t);
    module->global_ops_num = 0;
    module->per_target_ops_nums = calloc(comm_size, sizeof(int));
    module->start_grp_ranks = NULL;
    module->lock_all_is_nocheck = false;

    ret = opal_hash_table_init(&module->outstanding_locks, comm_size);
    if (ret != OPAL_SUCCESS) {
        goto error;
    }

    win->w_osc_module = &module->super;

    /* sync with everyone */

    ret = module->comm->c_coll->coll_barrier(module->comm,
                                             module->comm->c_coll->coll_barrier_module);
    if (ret != OMPI_SUCCESS) {
        goto error;
    }

    return ret;

 error:
    if (my_addr) ucp_worker_release_address(mca_osc_ucx_component.ucp_worker, my_addr);
    if (recv_buf) free(recv_buf);
    if (my_info) free(my_info);
    for (i = 0; i < comm_size; i++) {
        if ((module->win_info_array[i]).rkey != NULL) {
            ucp_rkey_destroy((module->win_info_array[i]).rkey);
        }
        if ((module->state_info_array[i]).rkey != NULL) {
            ucp_rkey_destroy((module->state_info_array[i]).rkey);
        }
    }
    if (rkey_buffer) ucp_rkey_buffer_release(rkey_buffer);
    if (state_rkey_buffer) ucp_rkey_buffer_release(state_rkey_buffer);
    if (module->win_info_array) free(module->win_info_array);
    if (module->state_info_array) free(module->state_info_array);
    if (module->disp_units) free(module->disp_units);
    if (module->comm) ompi_comm_free(&module->comm);
    if (module->per_target_ops_nums) free(module->per_target_ops_nums);
    if (eps_created) {
        for (i = 0; i < comm_size; i++) {
            ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, i);
            ucp_ep_destroy(ep);
        }
    }
    if (worker_created) ucp_worker_destroy(mca_osc_ucx_component.ucp_worker);
    if (module) free(module);
    return ret;
}