예제 #1
0
파일: pml_ucx.c 프로젝트: 00datman/ompi
static int mca_pml_ucx_send_worker_address(void)
{
    ucp_address_t *address;
    ucs_status_t status;
    size_t addrlen;
    int rc;

    status = ucp_worker_get_address(ompi_pml_ucx.ucp_worker, &address, &addrlen);
    if (UCS_OK != status) {
        PML_UCX_ERROR("Failed to get worker address");
        return OMPI_ERROR;
    }

    OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
                    &mca_pml_ucx_component.pmlm_version, (void*)address, addrlen);
    if (OMPI_SUCCESS != rc) {
        PML_UCX_ERROR("Open MPI couldn't distribute EP connection details");
        return OMPI_ERROR;
    }

    ucp_worker_release_address(ompi_pml_ucx.ucp_worker, address);

    return OMPI_SUCCESS;
}
예제 #2
0
파일: libperf.c 프로젝트: xinzhao3/ucx
static ucs_status_t ucp_perf_test_setup_endpoints(ucx_perf_context_t *perf,
                                                  uint64_t features)
{
    const size_t buffer_size = 2048;
    ucx_perf_ep_info_t info, *remote_info;
    unsigned group_size, i, group_index;
    ucp_address_t *address;
    size_t address_length = 0;
    ucp_ep_params_t ep_params;
    ucs_status_t status;
    struct iovec vec[3];
    void *rkey_buffer;
    void *req = NULL;
    void *buffer;

    group_size  = rte_call(perf, group_size);
    group_index = rte_call(perf, group_index);

    status = ucp_worker_get_address(perf->ucp.worker, &address, &address_length);
    if (status != UCS_OK) {
        if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) {
            ucs_error("ucp_worker_get_address() failed: %s", ucs_status_string(status));
        }
        goto err;
    }

    info.ucp.addr_len  = address_length;
    info.recv_buffer   = (uintptr_t)perf->recv_buffer;

    vec[0].iov_base    = &info;
    vec[0].iov_len     = sizeof(info);
    vec[1].iov_base    = address;
    vec[1].iov_len     = address_length;

    if (features & (UCP_FEATURE_RMA|UCP_FEATURE_AMO32|UCP_FEATURE_AMO64)) {
        status = ucp_rkey_pack(perf->ucp.context, perf->ucp.recv_memh,
                               &rkey_buffer, &info.rkey_size);
        if (status != UCS_OK) {
            if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) {
                ucs_error("ucp_rkey_pack() failed: %s", ucs_status_string(status));
            }
            ucp_worker_release_address(perf->ucp.worker, address);
            goto err;
        }

        vec[2].iov_base = rkey_buffer;
        vec[2].iov_len  = info.rkey_size;
        rte_call(perf, post_vec, vec, 3, &req);
        ucp_rkey_buffer_release(rkey_buffer);
    } else {
        info.rkey_size  = 0;
        rte_call(perf, post_vec, vec, 2, &req);
    }

    ucp_worker_release_address(perf->ucp.worker, address);
    rte_call(perf, exchange_vec, req);

    perf->ucp.peers = calloc(group_size, sizeof(*perf->uct.peers));
    if (perf->ucp.peers == NULL) {
        goto err;
    }

    buffer = malloc(buffer_size);
    if (buffer == NULL) {
        ucs_error("Failed to allocate RTE receive buffer");
        status = UCS_ERR_NO_MEMORY;
        goto err_destroy_eps;
    }

    for (i = 0; i < group_size; ++i) {
        if (i == group_index) {
            continue;
        }

        rte_call(perf, recv, i, buffer, buffer_size, req);

        remote_info = buffer;
        address     = (void*)(remote_info + 1);
        rkey_buffer = (void*)address + remote_info->ucp.addr_len;
        perf->ucp.peers[i].remote_addr = remote_info->recv_buffer;

        ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS;
        ep_params.address    = address;

        status = ucp_ep_create(perf->ucp.worker, &ep_params, &perf->ucp.peers[i].ep);
        if (status != UCS_OK) {
            if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) {
                ucs_error("ucp_ep_create() failed: %s", ucs_status_string(status));
            }
            goto err_free_buffer;
        }

        if (remote_info->rkey_size > 0) {
            status = ucp_ep_rkey_unpack(perf->ucp.peers[i].ep, rkey_buffer,
                                        &perf->ucp.peers[i].rkey);
            if (status != UCS_OK) {
                if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) {
                    ucs_fatal("ucp_rkey_unpack() failed: %s", ucs_status_string(status));
                }
                goto err_free_buffer;
            }
        } else {
            perf->ucp.peers[i].rkey = NULL;
        }
    }

    free(buffer);

    status = ucp_perf_test_exchange_status(perf, UCS_OK);
    if (status != UCS_OK) {
        ucp_perf_test_destroy_eps(perf, group_size);
    }
    return status;

err_free_buffer:
    free(buffer);
err_destroy_eps:
    ucp_perf_test_destroy_eps(perf, group_size);
err:
    (void)ucp_perf_test_exchange_status(perf, status);
    return status;
}
예제 #3
0
int main(int argc, char **argv)
{
    /* UCP temporary vars */
    ucp_params_t ucp_params;
    ucp_config_t *config;
    ucs_status_t status;

    /* UCP handler objects */
    ucp_context_h ucp_context;
    ucp_worker_h ucp_worker;

    /* OOB connection vars */
    uint64_t addr_len = 0;
    char *server = NULL;
    int oob_sock = -1;
    int ret = -1;

    /* Parse the command line */
    if (parse_cmd(argc, argv, &server) != UCS_OK) {
        goto err;
    }
    /* UCP initialization */
    status = ucp_config_read(NULL, NULL, &config);
    if (status != UCS_OK) {
        goto err;
    }

    ucp_params.features = UCP_FEATURE_TAG;
    if (ucp_test_mode == TEST_MODE_WAIT || ucp_test_mode == TEST_MODE_EVENTFD) {
        ucp_params.features |= UCP_FEATURE_WAKEUP;
    }
    ucp_params.request_size    = sizeof(struct ucx_context);
    ucp_params.request_init    = request_init;
    ucp_params.request_cleanup = NULL;

    status = ucp_init(&ucp_params, config, &ucp_context);

    ucp_config_print(config, stdout, NULL, UCS_CONFIG_PRINT_CONFIG);

    ucp_config_release(config);
    if (status != UCS_OK) {
        goto err;
    }

    status = ucp_worker_create(ucp_context, UCS_THREAD_MODE_SINGLE, &ucp_worker);
    if (status != UCS_OK) {
        goto err_cleanup;
    }

    status = ucp_worker_get_address(ucp_worker, &local_addr, &local_addr_len);
    if (status != UCS_OK) {
        goto err_worker;
    }

    printf("[0x%x] local address length: %zu\n",
           (unsigned int)pthread_self(), local_addr_len);

    /* OOB connection establishment */
    if (server) {
        peer_addr_len = local_addr_len;

        oob_sock = run_client(server);
        if (oob_sock < 0) {
            goto err_addr;
        }

        ret = recv(oob_sock, &addr_len, sizeof(addr_len), 0);
        if (ret < 0) {
            fprintf(stderr, "failed to receive address length\n");
            goto err_addr;
        }

        peer_addr_len = addr_len;
        peer_addr = malloc(peer_addr_len);
        if (!peer_addr) {
            fprintf(stderr, "unable to allocate memory\n");
            goto err_addr;
        }

        ret = recv(oob_sock, peer_addr, peer_addr_len, 0);
        if (ret < 0) {
            fprintf(stderr, "failed to receive address\n");
            goto err_peer_addr;
        }
    } else {
        oob_sock = run_server();
        if (oob_sock < 0) {
            goto err_peer_addr;
        }

        addr_len = local_addr_len;
        ret = send(oob_sock, &addr_len, sizeof(addr_len), 0);
        if (ret < 0 || ret != sizeof(addr_len)) {
            fprintf(stderr, "failed to send address length\n");
            goto err_peer_addr;
        }

        ret = send(oob_sock, local_addr, local_addr_len, 0);
        if (ret < 0 || ret != local_addr_len) {
            fprintf(stderr, "failed to send address\n");
            goto err_peer_addr;
        }
    }
    close(oob_sock);

    ret = run_test(server, ucp_worker);

err_peer_addr:
    free(peer_addr);

err_addr:
    ucp_worker_release_address(ucp_worker, local_addr);

err_worker:
    ucp_worker_destroy(ucp_worker);

err_cleanup:
    ucp_cleanup(ucp_context);

err:
    return ret;
}
예제 #4
0
void print_ucp_info(int print_opts, ucs_config_print_flags_t print_flags,
                    uint64_t features)
{
    ucp_config_t *config;
    ucs_status_t status;
    ucp_context_h context;
    ucp_worker_h worker;
    ucp_params_t params;
    ucp_worker_params_t worker_params;
    ucp_ep_params_t ep_params;
    ucp_address_t *address;
    size_t address_length;
    ucp_ep_h ep;

    status = ucp_config_read(NULL, NULL, &config);
    if (status != UCS_OK) {
        return;
    }

    memset(&params, 0, sizeof(params));
    params.features = features;
    status = ucp_init(&params, config, &context);
    if (status != UCS_OK) {
        printf("<Failed to create UCP context>\n");
        goto out_release_config;
    }

    if (print_opts & PRINT_UCP_CONTEXT) {
        ucp_context_print_info(context, stdout);
    }

    if (!(print_opts & (PRINT_UCP_WORKER|PRINT_UCP_EP))) {
        goto out_cleanup_context;
    }

    worker_params.field_mask  = UCP_WORKER_PARAM_FIELD_THREAD_MODE;
    worker_params.thread_mode = UCS_THREAD_MODE_MULTI;

    status = ucp_worker_create(context, &worker_params, &worker);
    if (status != UCS_OK) {
        printf("<Failed to create UCP worker>\n");
        goto out_cleanup_context;
    }

    if (print_opts & PRINT_UCP_WORKER) {
        ucp_worker_print_info(worker, stdout);
    }

    if (print_opts & PRINT_UCP_EP) {
        status = ucp_worker_get_address(worker, &address, &address_length);
        if (status != UCS_OK) {
            printf("<Failed to get UCP worker address>\n");
            goto out_destroy_worker;
        }

        ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS;
        ep_params.address    = address;

        status = ucp_ep_create(worker, &ep_params, &ep);
        ucp_worker_release_address(worker, address);
        if (status != UCS_OK) {
            printf("<Failed to get create UCP endpoint>\n");
            goto out_destroy_worker;
        }

        ucp_ep_print_info(ep, stdout);

        ucp_ep_destroy(ep);
    }

out_destroy_worker:
    ucp_worker_destroy(worker);
out_cleanup_context:
    ucp_cleanup(context);
out_release_config:
    ucp_config_release(config);
}
예제 #5
0
파일: spml_ucx.c 프로젝트: 00datman/ompi
int mca_spml_ucx_add_procs(oshmem_proc_t** procs, size_t nprocs)
{
    size_t i, n;
    int rc = OSHMEM_ERROR;
    int my_rank = oshmem_my_proc_id();
    ucs_status_t err;
    ucp_address_t *wk_local_addr;
    size_t wk_addr_len;
    int *wk_roffs, *wk_rsizes;
    char *wk_raddrs;


    mca_spml_ucx.ucp_peers = (ucp_peer_t *) calloc(nprocs, sizeof(*(mca_spml_ucx.ucp_peers)));
    if (NULL == mca_spml_ucx.ucp_peers) {
        goto error;
    }

    err = ucp_worker_get_address(mca_spml_ucx.ucp_worker, &wk_local_addr, &wk_addr_len);
    if (err != UCS_OK) {
        goto error;
    }
    dump_address(my_rank, (char *)wk_local_addr, wk_addr_len);

    rc = oshmem_shmem_xchng(wk_local_addr, wk_addr_len, nprocs,
            (void **)&wk_raddrs, &wk_roffs, &wk_rsizes);
    if (rc != OSHMEM_SUCCESS) {
        goto error;
    }

    opal_progress_register(spml_ucx_progress);

    /* Get the EP connection requests for all the processes from modex */
    for (n = 0; n < nprocs; ++n) {
        i = (my_rank + n) % nprocs;
        dump_address(i, (char *)(wk_raddrs + wk_roffs[i]), wk_rsizes[i]);
        err = ucp_ep_create(mca_spml_ucx.ucp_worker, 
                (ucp_address_t *)(wk_raddrs + wk_roffs[i]),
                &mca_spml_ucx.ucp_peers[i].ucp_conn);
        if (UCS_OK != err) {
            SPML_ERROR("ucp_ep_create failed!!!\n");
            goto error2;
        }
        procs[i]->num_transports = 1;
        procs[i]->transport_ids = spml_ucx_transport_ids;
    }

    ucp_worker_release_address(mca_spml_ucx.ucp_worker, wk_local_addr);
    free(wk_raddrs);
    free(wk_rsizes);
    free(wk_roffs);

    SPML_VERBOSE(50, "*** ADDED PROCS ***");
    return OSHMEM_SUCCESS;

error2:
    for (i = 0; i < nprocs; ++i) {
         if (mca_spml_ucx.ucp_peers[i].ucp_conn) {
             ucp_ep_destroy(mca_spml_ucx.ucp_peers[i].ucp_conn);
         }
    }
    if (mca_spml_ucx.ucp_peers) 
        free(mca_spml_ucx.ucp_peers);
    if (wk_raddrs)
        free(wk_raddrs);
    if (wk_rsizes)
        free(wk_rsizes);
    if (wk_roffs)
        free(wk_roffs);
    if (mca_spml_ucx.ucp_peers)
        free(mca_spml_ucx.ucp_peers);
error:
    rc = OSHMEM_ERR_OUT_OF_RESOURCE;
    SPML_ERROR("add procs FAILED rc=%d", rc);
    return rc;

}
예제 #6
0
파일: libperf.c 프로젝트: bbenton/ucx
static ucs_status_t ucp_perf_test_setup_endpoints(ucx_perf_context_t *perf,
                                                  uint64_t features)
{
    unsigned group_size, i, group_index;
    ucp_address_t *address;
    size_t address_length = 0;
    ucs_status_t status;
    struct iovec vec[3];
    void *rkey_buffer;
    size_t rkey_size;
    void *req = NULL;
    int iov_len;

    group_size  = rte_call(perf, group_size);
    group_index = rte_call(perf, group_index);

    status = ucp_worker_get_address(perf->ucp.worker, &address, &address_length);
    if (status != UCS_OK) {
        if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) {
            ucs_error("ucp_worker_get_address() failed: %s", ucs_status_string(status));
        }
        goto err;
    }

    vec[0].iov_base = address;
    vec[0].iov_len  = address_length;
    vec[1].iov_base = &perf->recv_buffer;
    vec[1].iov_len  = sizeof(uintptr_t);

    if (features & (UCP_FEATURE_RMA|UCP_FEATURE_AMO32|UCP_FEATURE_AMO64)) {
        status = ucp_rkey_pack(perf->ucp.context, perf->ucp.recv_memh,
                               &rkey_buffer, &rkey_size);
        if (status != UCS_OK) {
            if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) {
                ucs_error("ucp_rkey_pack() failed: %s", ucs_status_string(status));
            }
            ucp_worker_release_address(perf->ucp.worker, address);
            goto err;
        }

        vec[2].iov_base = rkey_buffer;
        vec[2].iov_len  = rkey_size;
        iov_len = 3;
    } else {
        rkey_buffer     = NULL;
        iov_len = 2;
    }

    rte_call(perf, post_vec, vec, iov_len, &req);

    if (rkey_buffer != NULL) {
        ucp_rkey_buffer_release(rkey_buffer);
    }
    ucp_worker_release_address(perf->ucp.worker, address);

    rte_call(perf, exchange_vec, req);

    perf->ucp.peers = calloc(group_size, sizeof(*perf->uct.peers));
    if (perf->ucp.peers == NULL) {
        goto err;
    }

    for (i = 0; i < group_size; ++i) {
        if (i == group_index) {
            continue;
        }

        address         = malloc(address_length);
        rkey_buffer     = NULL;

        vec[0].iov_base = address;
        vec[0].iov_len  = address_length;
        vec[1].iov_base = &perf->ucp.peers[i].remote_addr;
        vec[1].iov_len  = sizeof(uintptr_t);

        if (iov_len > 2) {
            rkey_buffer     = malloc(rkey_size);
            vec[2].iov_base = rkey_buffer;
            vec[2].iov_len  = rkey_size;
        }

        rte_call(perf, recv_vec, i, vec, iov_len, req);

        status = ucp_ep_create(perf->ucp.worker, address, &perf->ucp.peers[i].ep);
        if (status != UCS_OK) {
            if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) {
                ucs_error("ucp_ep_create() failed: %s", ucs_status_string(status));
            }
            free(rkey_buffer);
            free(address);
            goto err_destroy_eps;
        }

        free(address);

        if (iov_len > 2) {
            status = ucp_ep_rkey_unpack(perf->ucp.peers[i].ep, rkey_buffer,
                                        &perf->ucp.peers[i].rkey);
            if (status != UCS_OK) {
                if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) {
                    ucs_error("ucp_rkey_unpack() failed: %s", ucs_status_string(status));
                }
                free(rkey_buffer);
                goto err_destroy_eps;
            }
        } else {
            perf->ucp.peers[i].rkey = NULL;
        }

        free(rkey_buffer);
    }

    status = ucp_perf_test_exchange_status(perf, UCS_OK);
    if (status != UCS_OK) {
        ucp_perf_test_destroy_eps(perf, group_size);
    }
    return status;

err_destroy_eps:
    ucp_perf_test_destroy_eps(perf, group_size);
err:
    (void)ucp_perf_test_exchange_status(perf, status);
    return status;
}
예제 #7
0
파일: proto_info.c 프로젝트: openucx/ucx
void print_ucp_info(int print_opts, ucs_config_print_flags_t print_flags,
                    uint64_t ctx_features, const ucp_ep_params_t *base_ep_params,
                    size_t estimated_num_eps, unsigned dev_type_bitmap,
                    const char *mem_size)
{
    ucp_config_t *config;
    ucs_status_t status;
    ucs_status_ptr_t status_ptr;
    ucp_context_h context;
    ucp_worker_h worker;
    ucp_params_t params;
    ucp_worker_params_t worker_params;
    ucp_ep_params_t ep_params;
    ucp_address_t *address;
    size_t address_length;
    resource_usage_t usage;
    ucp_ep_h ep;

    status = ucp_config_read(NULL, NULL, &config);
    if (status != UCS_OK) {
        return;
    }

    memset(&params, 0, sizeof(params));
    params.field_mask        = UCP_PARAM_FIELD_FEATURES |
                               UCP_PARAM_FIELD_ESTIMATED_NUM_EPS;
    params.features          = ctx_features;
    params.estimated_num_eps = estimated_num_eps;

    get_resource_usage(&usage);

    if (!(dev_type_bitmap & UCS_BIT(UCT_DEVICE_TYPE_SELF))) {
        ucp_config_modify(config, "SELF_DEVICES", "");
    }
    if (!(dev_type_bitmap & UCS_BIT(UCT_DEVICE_TYPE_SHM))) {
        ucp_config_modify(config, "SHM_DEVICES", "");
    }
    if (!(dev_type_bitmap & UCS_BIT(UCT_DEVICE_TYPE_NET))) {
        ucp_config_modify(config, "NET_DEVICES", "");
    }

    status = ucp_init(&params, config, &context);
    if (status != UCS_OK) {
        printf("<Failed to create UCP context>\n");
        goto out_release_config;
    }

    if ((print_opts & PRINT_MEM_MAP) && (mem_size != NULL)) {
        ucp_mem_print_info(mem_size, context, stdout);
    }

    if (print_opts & PRINT_UCP_CONTEXT) {
        ucp_context_print_info(context, stdout);
        print_resource_usage(&usage, "UCP context");
    }

    if (!(print_opts & (PRINT_UCP_WORKER|PRINT_UCP_EP))) {
        goto out_cleanup_context;
    }

    worker_params.field_mask  = UCP_WORKER_PARAM_FIELD_THREAD_MODE;
    worker_params.thread_mode = UCS_THREAD_MODE_MULTI;

    get_resource_usage(&usage);

    status = ucp_worker_create(context, &worker_params, &worker);
    if (status != UCS_OK) {
        printf("<Failed to create UCP worker>\n");
        goto out_cleanup_context;
    }

    if (print_opts & PRINT_UCP_WORKER) {
        ucp_worker_print_info(worker, stdout);
        print_resource_usage(&usage, "UCP worker");
    }

    if (print_opts & PRINT_UCP_EP) {
        status = ucp_worker_get_address(worker, &address, &address_length);
        if (status != UCS_OK) {
            printf("<Failed to get UCP worker address>\n");
            goto out_destroy_worker;
        }

        ep_params             = *base_ep_params;

        ep_params.field_mask |= UCP_EP_PARAM_FIELD_REMOTE_ADDRESS;
        ep_params.address     = address;

        status = ucp_ep_create(worker, &ep_params, &ep);
        ucp_worker_release_address(worker, address);
        if (status != UCS_OK) {
            printf("<Failed to create UCP endpoint>\n");
            goto out_destroy_worker;
        }

        ucp_ep_print_info(ep, stdout);

        status_ptr = ucp_disconnect_nb(ep);
        if (UCS_PTR_IS_PTR(status_ptr)) {
            do {
                ucp_worker_progress(worker);
                status = ucp_request_test(status_ptr, NULL);
            } while (status == UCS_INPROGRESS);
            ucp_request_release(status_ptr);
        }
    }

out_destroy_worker:
    ucp_worker_destroy(worker);
out_cleanup_context:
    ucp_cleanup(context);
out_release_config:
    ucp_config_release(config);
}
예제 #8
0
int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs)
{
    size_t i, j, n;
    int rc = OSHMEM_ERROR;
    int my_rank = oshmem_my_proc_id();
    ucs_status_t err;
    ucp_address_t *wk_local_addr;
    size_t wk_addr_len;
    int *wk_roffs = NULL;
    int *wk_rsizes = NULL;
    char *wk_raddrs = NULL;
    ucp_ep_params_t ep_params;


    mca_spml_ucx_ctx_default.ucp_peers = (ucp_peer_t *) calloc(nprocs, sizeof(*(mca_spml_ucx_ctx_default.ucp_peers)));
    if (NULL == mca_spml_ucx_ctx_default.ucp_peers) {
        goto error;
    }

    err = ucp_worker_get_address(mca_spml_ucx_ctx_default.ucp_worker, &wk_local_addr, &wk_addr_len);
    if (err != UCS_OK) {
        goto error;
    }
    dump_address(my_rank, (char *)wk_local_addr, wk_addr_len);

    rc = oshmem_shmem_xchng(wk_local_addr, wk_addr_len, nprocs,
                            (void **)&wk_raddrs, &wk_roffs, &wk_rsizes);
    if (rc != OSHMEM_SUCCESS) {
        goto error;
    }

    opal_progress_register(spml_ucx_default_progress);

    mca_spml_ucx.remote_addrs_tbl = (char **)calloc(nprocs, sizeof(char *));
    memset(mca_spml_ucx.remote_addrs_tbl, 0, nprocs * sizeof(char *));

    /* Get the EP connection requests for all the processes from modex */
    for (n = 0; n < nprocs; ++n) {
        i = (my_rank + n) % nprocs;
        dump_address(i, (char *)(wk_raddrs + wk_roffs[i]), wk_rsizes[i]);

        ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS;
        ep_params.address    = (ucp_address_t *)(wk_raddrs + wk_roffs[i]);

        err = ucp_ep_create(mca_spml_ucx_ctx_default.ucp_worker, &ep_params,
                            &mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn);
        if (UCS_OK != err) {
            SPML_UCX_ERROR("ucp_ep_create(proc=%zu/%zu) failed: %s", n, nprocs,
                           ucs_status_string(err));
            goto error2;
        }

        OSHMEM_PROC_DATA(procs[i])->num_transports = 1;
        OSHMEM_PROC_DATA(procs[i])->transport_ids = spml_ucx_transport_ids;

        for (j = 0; j < MCA_MEMHEAP_SEG_COUNT; j++) {
            mca_spml_ucx_ctx_default.ucp_peers[i].mkeys[j].key.rkey = NULL;
        }

        mca_spml_ucx.remote_addrs_tbl[i] = (char *)malloc(wk_rsizes[i]);
        memcpy(mca_spml_ucx.remote_addrs_tbl[i], (char *)(wk_raddrs + wk_roffs[i]),
               wk_rsizes[i]);
    }

    ucp_worker_release_address(mca_spml_ucx_ctx_default.ucp_worker, wk_local_addr);
    free(wk_raddrs);
    free(wk_rsizes);
    free(wk_roffs);

    SPML_UCX_VERBOSE(50, "*** ADDED PROCS ***");
    return OSHMEM_SUCCESS;

error2:
    for (i = 0; i < nprocs; ++i) {
         if (mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn) {
             ucp_ep_destroy(mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn);
         }
         if (mca_spml_ucx.remote_addrs_tbl[i]) {
             free(mca_spml_ucx.remote_addrs_tbl[i]);
         }
    }
    if (mca_spml_ucx_ctx_default.ucp_peers)
        free(mca_spml_ucx_ctx_default.ucp_peers);
    if (mca_spml_ucx.remote_addrs_tbl)
        free(mca_spml_ucx.remote_addrs_tbl);
    free(wk_raddrs);
    free(wk_rsizes);
    free(wk_roffs);
error:
    rc = OSHMEM_ERR_OUT_OF_RESOURCE;
    SPML_UCX_ERROR("add procs FAILED rc=%d", rc);
    return rc;

}
예제 #9
0
static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
                            struct ompi_communicator_t *comm, struct opal_info_t *info,
                            int flavor, int *model) {
    ompi_osc_ucx_module_t *module = NULL;
    char *name = NULL;
    long values[2];
    int ret = OMPI_SUCCESS;
    ucs_status_t status;
    int i, comm_size = ompi_comm_size(comm);
    int is_eps_ready;
    bool eps_created = false, worker_created = false;
    ucp_address_t *my_addr = NULL;
    size_t my_addr_len;
    char *recv_buf = NULL;
    void *rkey_buffer = NULL, *state_rkey_buffer = NULL;
    size_t rkey_buffer_size, state_rkey_buffer_size;
    void *state_base = NULL;
    void * my_info = NULL;
    size_t my_info_len;
    int disps[comm_size];
    int rkey_sizes[comm_size];

    /* the osc/sm component is the exclusive provider for support for
     * shared memory windows */
    if (flavor == MPI_WIN_FLAVOR_SHARED) {
        return OMPI_ERR_NOT_SUPPORTED;
    }

    /* if UCP worker has never been initialized before, init it first */
    if (mca_osc_ucx_component.ucp_worker == NULL) {
        ucp_worker_params_t worker_params;
        ucp_worker_attr_t worker_attr;

        memset(&worker_params, 0, sizeof(ucp_worker_h));
        worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE;
        worker_params.thread_mode = (mca_osc_ucx_component.enable_mpi_threads == true)
                                    ? UCS_THREAD_MODE_MULTI : UCS_THREAD_MODE_SINGLE;
        status = ucp_worker_create(mca_osc_ucx_component.ucp_context, &worker_params,
                                   &(mca_osc_ucx_component.ucp_worker));
        if (UCS_OK != status) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucp_worker_create failed: %d\n",
                                __FILE__, __LINE__, status);
            ret = OMPI_ERROR;
            goto error;
        }

        /* query UCP worker attributes */
        worker_attr.field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE;
        status = ucp_worker_query(mca_osc_ucx_component.ucp_worker, &worker_attr);
        if (UCS_OK != status) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucp_worker_query failed: %d\n",
                                __FILE__, __LINE__, status);
            ret = OMPI_ERROR;
            goto error;
        }

        if (mca_osc_ucx_component.enable_mpi_threads == true &&
            worker_attr.thread_mode != UCS_THREAD_MODE_MULTI) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucx does not support multithreading\n",
                                __FILE__, __LINE__);
            ret = OMPI_ERROR;
            goto error;
        }

        worker_created = true;
    }

    /* create module structure */
    module = (ompi_osc_ucx_module_t *)calloc(1, sizeof(ompi_osc_ucx_module_t));
    if (module == NULL) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto error;
    }

    /* fill in the function pointer part */
    memcpy(module, &ompi_osc_ucx_module_template, sizeof(ompi_osc_base_module_t));

    ret = ompi_comm_dup(comm, &module->comm);
    if (ret != OMPI_SUCCESS) {
        goto error;
    }

    asprintf(&name, "ucx window %d", ompi_comm_get_cid(module->comm));
    ompi_win_set_name(win, name);
    free(name);

    /* share everyone's displacement units. Only do an allgather if
       strictly necessary, since it requires O(p) state. */
    values[0] = disp_unit;
    values[1] = -disp_unit;

    ret = module->comm->c_coll->coll_allreduce(MPI_IN_PLACE, values, 2, MPI_LONG,
                                               MPI_MIN, module->comm,
                                               module->comm->c_coll->coll_allreduce_module);
    if (OMPI_SUCCESS != ret) {
        goto error;
    }

    if (values[0] == -values[1]) { /* everyone has the same disp_unit, we do not need O(p) space */
        module->disp_unit = disp_unit;
    } else { /* different disp_unit sizes, allocate O(p) space to store them */
        module->disp_unit = -1;
        module->disp_units = calloc(comm_size, sizeof(int));
        if (module->disp_units == NULL) {
            ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
            goto error;
        }

        ret = module->comm->c_coll->coll_allgather(&disp_unit, 1, MPI_INT,
                                                   module->disp_units, 1, MPI_INT,
                                                   module->comm,
                                                   module->comm->c_coll->coll_allgather_module);
        if (OMPI_SUCCESS != ret) {
            goto error;
        }
    }

    /* exchange endpoints if necessary */
    is_eps_ready = 1;
    for (i = 0; i < comm_size; i++) {
        if (OSC_UCX_GET_EP(module->comm, i) == NULL) {
            is_eps_ready = 0;
            break;
        }
    }

    ret = module->comm->c_coll->coll_allreduce(MPI_IN_PLACE, &is_eps_ready, 1, MPI_INT,
                                               MPI_LAND,
                                               module->comm,
                                               module->comm->c_coll->coll_allreduce_module);
    if (OMPI_SUCCESS != ret) {
        goto error;
    }

    if (!is_eps_ready) {
        status = ucp_worker_get_address(mca_osc_ucx_component.ucp_worker,
                                        &my_addr, &my_addr_len);
        if (status != UCS_OK) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucp_worker_get_address failed: %d\n",
                                __FILE__, __LINE__, status);
            ret = OMPI_ERROR;
            goto error;
        }

        ret = allgather_len_and_info(my_addr, (int)my_addr_len,
                                     &recv_buf, disps, module->comm);
        if (ret != OMPI_SUCCESS) {
            goto error;
        }

        for (i = 0; i < comm_size; i++) {
            if (OSC_UCX_GET_EP(module->comm, i) == NULL) {
                ucp_ep_params_t ep_params;
                ucp_ep_h ep;
                memset(&ep_params, 0, sizeof(ucp_ep_params_t));
                ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS;
                ep_params.address = (ucp_address_t *)&(recv_buf[disps[i]]);
                status = ucp_ep_create(mca_osc_ucx_component.ucp_worker, &ep_params, &ep);
                if (status != UCS_OK) {
                    opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                        "%s:%d: ucp_ep_create failed: %d\n",
                                        __FILE__, __LINE__, status);
                    ret = OMPI_ERROR;
                    goto error;
                }

                ompi_comm_peer_lookup(module->comm, i)->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_UCX] = ep;
            }
        }

        ucp_worker_release_address(mca_osc_ucx_component.ucp_worker, my_addr);
        my_addr = NULL;
        free(recv_buf);
        recv_buf = NULL;

        eps_created = true;
    }

    ret = mem_map(base, size, &(module->memh), module, flavor);
    if (ret != OMPI_SUCCESS) {
        goto error;
    }

    state_base = (void *)&(module->state);
    ret = mem_map(&state_base, sizeof(ompi_osc_ucx_state_t), &(module->state_memh),
                  module, MPI_WIN_FLAVOR_CREATE);
    if (ret != OMPI_SUCCESS) {
        goto error;
    }

    module->win_info_array = calloc(comm_size, sizeof(ompi_osc_ucx_win_info_t));
    if (module->win_info_array == NULL) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto error;
    }

    module->state_info_array = calloc(comm_size, sizeof(ompi_osc_ucx_win_info_t));
    if (module->state_info_array == NULL) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto error;
    }

    status = ucp_rkey_pack(mca_osc_ucx_component.ucp_context, module->memh,
                           &rkey_buffer, &rkey_buffer_size);
    if (status != UCS_OK) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: ucp_rkey_pack failed: %d\n",
                            __FILE__, __LINE__, status);
        ret = OMPI_ERROR;
        goto error;
    }

    status = ucp_rkey_pack(mca_osc_ucx_component.ucp_context, module->state_memh,
                           &state_rkey_buffer, &state_rkey_buffer_size);
    if (status != UCS_OK) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: ucp_rkey_pack failed: %d\n",
                            __FILE__, __LINE__, status);
        ret = OMPI_ERROR;
        goto error;
    }

    my_info_len = 2 * sizeof(uint64_t) + rkey_buffer_size + state_rkey_buffer_size;
    my_info = malloc(my_info_len);
    if (my_info == NULL) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto error;
    }

    memcpy(my_info, base, sizeof(uint64_t));
    memcpy((void *)((char *)my_info + sizeof(uint64_t)), &state_base, sizeof(uint64_t));
    memcpy((void *)((char *)my_info + 2 * sizeof(uint64_t)), rkey_buffer, rkey_buffer_size);
    memcpy((void *)((char *)my_info + 2 * sizeof(uint64_t) + rkey_buffer_size),
           state_rkey_buffer, state_rkey_buffer_size);

    ret = allgather_len_and_info(my_info, (int)my_info_len, &recv_buf, disps, module->comm);
    if (ret != OMPI_SUCCESS) {
        goto error;
    }

    ret = comm->c_coll->coll_allgather((void *)&rkey_buffer_size, 1, MPI_INT,
                                       rkey_sizes, 1, MPI_INT, comm,
                                       comm->c_coll->coll_allgather_module);
    if (OMPI_SUCCESS != ret) {
        goto error;
    }

    for (i = 0; i < comm_size; i++) {
        ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, i);
        assert(ep != NULL);

        memcpy(&(module->win_info_array[i]).addr, &recv_buf[disps[i]], sizeof(uint64_t));
        memcpy(&(module->state_info_array[i]).addr, &recv_buf[disps[i] + sizeof(uint64_t)], 
               sizeof(uint64_t));

        status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t)]),
                                    &((module->win_info_array[i]).rkey));
        if (status != UCS_OK) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucp_ep_rkey_unpack failed: %d\n",
                                __FILE__, __LINE__, status);
            ret = OMPI_ERROR;
            goto error;
        }

        status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t) + rkey_sizes[i]]),
                                    &((module->state_info_array[i]).rkey));
        if (status != UCS_OK) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucp_ep_rkey_unpack failed: %d\n",
                                __FILE__, __LINE__, status);
            ret = OMPI_ERROR;
            goto error;
        }
    }

    free(my_info);
    free(recv_buf);

    ucp_rkey_buffer_release(rkey_buffer);
    ucp_rkey_buffer_release(state_rkey_buffer);

    module->state.lock = TARGET_LOCK_UNLOCKED;
    module->state.post_index = 0;
    memset((void *)module->state.post_state, 0, sizeof(uint64_t) * OMPI_OSC_UCX_POST_PEER_MAX);
    module->state.complete_count = 0;
    module->state.req_flag = 0;
    module->state.acc_lock = TARGET_LOCK_UNLOCKED;
    module->epoch_type.access = NONE_EPOCH;
    module->epoch_type.exposure = NONE_EPOCH;
    module->lock_count = 0;
    module->post_count = 0;
    module->start_group = NULL;
    module->post_group = NULL;
    OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t);
    OBJ_CONSTRUCT(&module->pending_posts, opal_list_t);
    module->global_ops_num = 0;
    module->per_target_ops_nums = calloc(comm_size, sizeof(int));
    module->start_grp_ranks = NULL;
    module->lock_all_is_nocheck = false;

    ret = opal_hash_table_init(&module->outstanding_locks, comm_size);
    if (ret != OPAL_SUCCESS) {
        goto error;
    }

    win->w_osc_module = &module->super;

    /* sync with everyone */

    ret = module->comm->c_coll->coll_barrier(module->comm,
                                             module->comm->c_coll->coll_barrier_module);
    if (ret != OMPI_SUCCESS) {
        goto error;
    }

    return ret;

 error:
    if (my_addr) ucp_worker_release_address(mca_osc_ucx_component.ucp_worker, my_addr);
    if (recv_buf) free(recv_buf);
    if (my_info) free(my_info);
    for (i = 0; i < comm_size; i++) {
        if ((module->win_info_array[i]).rkey != NULL) {
            ucp_rkey_destroy((module->win_info_array[i]).rkey);
        }
        if ((module->state_info_array[i]).rkey != NULL) {
            ucp_rkey_destroy((module->state_info_array[i]).rkey);
        }
    }
    if (rkey_buffer) ucp_rkey_buffer_release(rkey_buffer);
    if (state_rkey_buffer) ucp_rkey_buffer_release(state_rkey_buffer);
    if (module->win_info_array) free(module->win_info_array);
    if (module->state_info_array) free(module->state_info_array);
    if (module->disp_units) free(module->disp_units);
    if (module->comm) ompi_comm_free(&module->comm);
    if (module->per_target_ops_nums) free(module->per_target_ops_nums);
    if (eps_created) {
        for (i = 0; i < comm_size; i++) {
            ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, i);
            ucp_ep_destroy(ep);
        }
    }
    if (worker_created) ucp_worker_destroy(mca_osc_ucx_component.ucp_worker);
    if (module) free(module);
    return ret;
}