Beispiel #1
0
ucs_status_t ucp_ep_create_stub(ucp_worker_h worker, uint64_t dest_uuid,
                                const char *message, ucp_ep_h *ep_p)
{
    ucs_status_t status;
    ucp_ep_op_t optype;
    ucp_ep_h ep = NULL;

    status = ucp_ep_new(worker, dest_uuid, "??", message, &ep);
    if (status != UCS_OK) {
        goto err;
    }

    for (optype = 0; optype < UCP_EP_OP_LAST; ++optype) {
        status = ucp_stub_ep_create(ep, optype, 0, NULL, &ep->uct_eps[optype]);
        if (status != UCS_OK) {
            goto err_destroy_uct_eps;
        }
    }

    *ep_p = ep;
    return UCS_OK;

err_destroy_uct_eps:
    for (optype = 0; optype < UCP_EP_OP_LAST; ++optype) {
        if (ep->uct_eps[optype] != NULL) {
            uct_ep_destroy(ep->uct_eps[optype]);
        }
    }
    ucp_ep_delete(ep);
err:
    return status;
}
Beispiel #2
0
void ucp_ep_wireup_stop(ucp_ep_h ep)
{
    ucp_worker_h worker = ep->worker;

    ucs_trace_func("ep=%p", ep);

    if (ep->uct.next_ep != NULL) {
        while (uct_ep_flush(ep->uct.next_ep) != UCS_OK) {
            ucp_worker_progress(ep->worker);
        }
        uct_ep_destroy(ep->uct.next_ep);
    }

    if (ep->wireup_ep != NULL) {
        while (uct_ep_flush(ep->wireup_ep) != UCS_OK) {
            ucp_worker_progress(ep->worker);
        }
        uct_ep_destroy(ep->wireup_ep);
    }

    UCS_ASYNC_BLOCK(&worker->async);
    sglib_hashed_ucp_ep_t_delete(worker->ep_hash, ep);
    UCS_ASYNC_UNBLOCK(&worker->async);
}
Beispiel #3
0
static void ucp_ep_destory_uct_eps(ucp_ep_h ep)
{
    ucp_lane_index_t lane;
    uct_ep_h uct_ep;

    for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) {
        uct_ep = ep->uct_eps[lane];
        if (uct_ep == NULL) {
            continue;
        }
        uct_ep_pending_purge(uct_ep, ucp_request_release_pending_send, NULL);
        ucs_debug("destroy ep %p lane %d uct_ep %p", ep, lane, uct_ep);
        uct_ep_destroy(uct_ep);
    }
}
Beispiel #4
0
static void ucp_ep_destory_uct_eps(ucp_ep_h ep)
{
    uct_ep_h uct_ep;
    int optype;

    for (optype = 0; optype < UCP_EP_OP_LAST; ++optype) {
        if (!ucp_ep_is_op_primary(ep, optype)) {
            continue;
        }

        uct_ep = ep->uct_eps[optype];
        uct_ep_pending_purge(uct_ep, ucp_ep_pending_req_release);
        ucs_debug("destroy ep %p op %d uct_ep %p", ep, optype, uct_ep);
        uct_ep_destroy(uct_ep);
    }
}
Beispiel #5
0
static ucs_status_t ucp_dummy_ep_send_func(uct_ep_h uct_ep)
{
    ucp_dummy_ep_t *dummy_ep = ucs_derived_of(uct_ep, ucp_dummy_ep_t);
    ucp_ep_h ep = dummy_ep->ep;

    /*
     * We switch the endpoint in this function (instead in wireup code) since
     * this is guaranteed to run from the main thread.
     */
    sched_yield();
    ucs_async_check_miss(&ep->worker->async);
    if (ep->state & UCP_EP_STATE_REMOTE_CONNECTED) {
        ep->uct.ep      = ep->uct.next_ep;
        ep->uct.next_ep = NULL;
        uct_ep_destroy(&dummy_ep->super);
    }
    return UCS_ERR_NO_RESOURCE;
}
Beispiel #6
0
void ucp_ep_destroy_internal(ucp_ep_h ep, const char *message)
{
    ucp_lane_index_t lane;
    uct_ep_h uct_ep;

    ucs_debug("destroy ep %p%s", ep, message);

    for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) {
        uct_ep = ep->uct_eps[lane];
        if (uct_ep == NULL) {
            continue;
        }

        uct_ep_pending_purge(uct_ep, ucp_destroyed_ep_pending_purge, ep);
        ucs_debug("destroy ep %p lane[%d]=%p", ep, lane, uct_ep);
        uct_ep_destroy(uct_ep);
    }

    ucs_free(ep);
}
Beispiel #7
0
ucs_status_t ucp_ep_create_stub(ucp_worker_h worker, uint64_t dest_uuid,
                                const char *message, ucp_ep_h *ep_p)
{
    ucs_status_t status;
    ucp_ep_config_key_t key;
    ucp_ep_h ep = NULL;

    status = ucp_ep_new(worker, dest_uuid, "??", message, &ep);
    if (status != UCS_OK) {
        goto err;
    }

    /* all operations will use the first lane, which is a stub endpoint */
    memset(&key, 0, sizeof(key));
    key.rma_lane_map     = 1;
    key.amo_lane_map     = 1;
    key.reachable_md_map = 0; /* TODO */
    key.am_lane          = 0;
    key.rndv_lane        = 0;
    key.wireup_msg_lane  = 0;
    key.lanes[0]         = UCP_NULL_RESOURCE;
    key.num_lanes        = 1;
    memset(key.amo_lanes, UCP_NULL_LANE, sizeof(key.amo_lanes));

    ep->cfg_index        = ucp_worker_get_ep_config(worker, &key);
    ep->am_lane          = 0;

    status = ucp_stub_ep_create(ep, &ep->uct_eps[0]);
    if (status != UCS_OK) {
        goto err_destroy_uct_eps;
    }

    *ep_p = ep;
    return UCS_OK;

err_destroy_uct_eps:
    uct_ep_destroy(ep->uct_eps[0]);
    ucp_ep_delete(ep);
err:
    return status;
}
Beispiel #8
0
static void uct_perf_test_cleanup_endpoints(ucx_perf_context_t *perf)
{
    unsigned group_size, group_index, i;

    rte_call(perf, barrier);

    uct_iface_set_am_handler(perf->uct.iface, UCT_PERF_TEST_AM_ID, NULL, NULL, UCT_AM_CB_FLAG_SYNC);

    group_size  = rte_call(perf, group_size);
    group_index = rte_call(perf, group_index);

    for (i = 0; i < group_size; ++i) {
        if (i != group_index) {
            uct_rkey_release(&perf->uct.peers[i].rkey);
            if (perf->uct.peers[i].ep) {
                uct_ep_destroy(perf->uct.peers[i].ep);
            }
        }
    }
    free(perf->uct.peers);
}
Beispiel #9
0
ucs_status_t ucp_ep_wireup_start(ucp_ep_h ep, ucp_address_t *address)
{
    ucp_worker_h worker = ep->worker;
    struct sockaddr *am_short_addr;
    ucp_rsc_index_t wireup_rsc_index;
    struct sockaddr *wireup_addr;
    uct_iface_attr_t *iface_attr;
    uct_iface_h iface;
    ucp_rsc_index_t dst_rsc_index, wireup_dst_rsc_index;
    ucp_rsc_index_t wireup_dst_pd_index;
    ucs_status_t status;

    UCS_ASYNC_BLOCK(&worker->async);

    ep->dest_uuid = ucp_address_uuid(address);
    sglib_hashed_ucp_ep_t_add(worker->ep_hash, ep);

    ucs_debug("connecting 0x%"PRIx64"->0x%"PRIx64, worker->uuid, ep->dest_uuid);

    /*
     * Select best transport for active messages
     */
    status = ucp_pick_best_wireup(worker, address, ucp_am_short_score_func,
                                  &ep->uct.rsc_index, &dst_rsc_index,
                                  &ep->uct.dst_pd_index, &am_short_addr,
                                  &ep->uct.reachable_pds,
                                  "short_am");
    if (status != UCS_OK) {
        ucs_error("No transport for short active message");
        goto err;
    }

    iface      = worker->ifaces[ep->uct.rsc_index];
    iface_attr = &worker->iface_attrs[ep->uct.rsc_index];

    /*
     * If the selected transport can be connected directly, do it.
     */
    if (iface_attr->cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
        status = uct_ep_create_connected(iface, am_short_addr, &ep->uct.next_ep);
        if (status != UCS_OK) {
            ucs_debug("failed to create ep");
            goto err;
        }

        ep->state |= UCP_EP_STATE_LOCAL_CONNECTED;
        ucp_ep_remote_connected(ep);
        goto out;
    }

    /*
     * If we cannot connect the selected transport directly, select another
     * transport for doing the wireup.
     */
    status = ucp_pick_best_wireup(worker, address, ucp_wireup_score_func,
                                  &wireup_rsc_index, &wireup_dst_rsc_index,
                                  &wireup_dst_pd_index, &wireup_addr,
                                  &ep->uct.reachable_pds,
                                  "wireup");
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ep_create_connected(worker->ifaces[wireup_rsc_index],
                                     wireup_addr, &ep->wireup_ep);
    if (status != UCS_OK) {
        goto err;
    }

    if (!(iface_attr->cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP)) {
        status = UCS_ERR_UNREACHABLE;
        goto err_destroy_wireup_ep;
    }

    /*
     * Until the transport is connected, send operations should return NO_RESOURCE.
     * Plant a dummy endpoint object which will do it.
     */
    status = UCS_CLASS_NEW(ucp_dummy_ep_t, &ep->uct.ep, ep);
    if (status != UCS_OK) {
        goto err_destroy_wireup_ep;
    }

    /*
     * Create endpoint for the transport we need to wire-up.
     */
    status = uct_ep_create(iface, &ep->uct.next_ep);
    if (status != UCS_OK) {
        goto err_destroy_uct_ep;
    }

    /*
     * Send initial connection request for wiring-up the transport.
     */
    status = ucp_ep_wireup_send(ep, ep->wireup_ep, UCP_AM_ID_CONN_REQ,
                                dst_rsc_index);
    if (status != UCS_OK) {
        goto err_destroy_next_ep;
    }

out:
    UCS_ASYNC_UNBLOCK(&worker->async);
    return UCS_OK;

err_destroy_next_ep:
    uct_ep_destroy(ep->uct.next_ep);
err_destroy_uct_ep:
    uct_ep_destroy(ep->uct.ep);
err_destroy_wireup_ep:
    uct_ep_destroy(ep->wireup_ep);
err:
    sglib_hashed_ucp_ep_t_delete(worker->ep_hash, ep);
    UCS_ASYNC_UNBLOCK(&worker->async);
    return status;
}
Beispiel #10
0
int main(int argc, char **argv)
{
    /* MPI is initially used to swap the endpoint and interface addresses so each
     * process has knowledge of the others. */
    int partner;
    int size, rank;
    uct_device_addr_t *own_dev, *peer_dev;
    uct_iface_addr_t *own_iface, *peer_iface;
    uct_ep_addr_t *own_ep, *peer_ep;
    ucs_status_t status;          /* status codes for UCS */
    uct_ep_h ep;                  /* Remote endpoint */
    ucs_async_context_t async;    /* Async event context manages times and fd notifications */
    uint8_t id = 0;
    void *arg;
    const char *tl_name = NULL;
    const char *dev_name = NULL;
    struct iface_info if_info;
    int exit_fail = 1;

    optind = 1;
    if (3 == argc) {
        dev_name = argv[1];
        tl_name  = argv[2];
    } else {
        printf("Usage: %s (<dev-name> <tl-name>)\n", argv[0]);
        fflush(stdout);
        return 1;
    }

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    if (size < 2) {
        fprintf(stderr, "Failed to create enough mpi processes\n");
        goto out;
    }

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    if (0 == rank) {
        partner = 1;
    } else if (1 == rank) {
        partner = 0;
    } else {
        /* just wait for other processes in MPI_Finalize */
        exit_fail = 0;
        goto out;
    }

    /* Initialize context */
    status = ucs_async_context_init(&async, UCS_ASYNC_MODE_THREAD);
    CHKERR_JUMP(UCS_OK != status, "init async context", out);

    /* Create a worker object */
    status = uct_worker_create(&async, UCS_THREAD_MODE_SINGLE, &if_info.worker);
    CHKERR_JUMP(UCS_OK != status, "create worker", out_cleanup_async);

    /* Search for the desired transport */
    status = dev_tl_lookup(dev_name, tl_name, &if_info);
    CHKERR_JUMP(UCS_OK != status, "find supported device and transport", out_destroy_worker);

    /* Expect that addr len is the same on both peers */
    own_dev = (uct_device_addr_t*)calloc(2, if_info.attr.device_addr_len);
    CHKERR_JUMP(NULL == own_dev, "allocate memory for dev addrs", out_destroy_iface);
    peer_dev = (uct_device_addr_t*)((char*)own_dev + if_info.attr.device_addr_len);

    own_iface = (uct_iface_addr_t*)calloc(2, if_info.attr.iface_addr_len);
    CHKERR_JUMP(NULL == own_iface, "allocate memory for if addrs", out_free_dev_addrs);
    peer_iface = (uct_iface_addr_t*)((char*)own_iface + if_info.attr.iface_addr_len);

    /* Get device address */
    status = uct_iface_get_device_address(if_info.iface, own_dev);
    CHKERR_JUMP(UCS_OK != status, "get device address", out_free_if_addrs);

    MPI_Sendrecv(own_dev, if_info.attr.device_addr_len, MPI_BYTE, partner, 0,
                 peer_dev, if_info.attr.device_addr_len, MPI_BYTE, partner,0,
                 MPI_COMM_WORLD, MPI_STATUS_IGNORE);

    status = uct_iface_is_reachable(if_info.iface, peer_dev, NULL);
    CHKERR_JUMP(0 == status, "reach the peer", out_free_if_addrs);

    /* Get interface address */
    if (if_info.attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
        status = uct_iface_get_address(if_info.iface, own_iface);
        CHKERR_JUMP(UCS_OK != status, "get interface address", out_free_if_addrs);

        MPI_Sendrecv(own_iface, if_info.attr.iface_addr_len, MPI_BYTE, partner, 0,
                     peer_iface, if_info.attr.iface_addr_len, MPI_BYTE, partner,0,
                     MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    }

    /* Again, expect that ep addr len is the same on both peers */
    own_ep = (uct_ep_addr_t*)calloc(2, if_info.attr.ep_addr_len);
    CHKERR_JUMP(NULL == own_ep, "allocate memory for ep addrs", out_free_if_addrs);
    peer_ep = (uct_ep_addr_t*)((char*)own_ep + if_info.attr.ep_addr_len);

    if (if_info.attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) {
        /* Create new endpoint */
        status = uct_ep_create(if_info.iface, &ep);
        CHKERR_JUMP(UCS_OK != status, "create endpoint", out_free_ep_addrs);

        /* Get endpoint address */
        status = uct_ep_get_address(ep, own_ep);
        CHKERR_JUMP(UCS_OK != status, "get endpoint address", out_free_ep);
    }

    MPI_Sendrecv(own_ep, if_info.attr.ep_addr_len, MPI_BYTE, partner, 0,
                 peer_ep, if_info.attr.ep_addr_len, MPI_BYTE, partner, 0,
                 MPI_COMM_WORLD, MPI_STATUS_IGNORE);

    if (if_info.attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) {
        /* Connect endpoint to a remote endpoint */
        status = uct_ep_connect_to_ep(ep, peer_dev, peer_ep);
        MPI_Barrier(MPI_COMM_WORLD);
    } else if (if_info.attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
        /* Create an endpoint which is connected to a remote interface */
        status = uct_ep_create_connected(if_info.iface, peer_dev, peer_iface, &ep);
    } else {
        status = UCS_ERR_UNSUPPORTED;
    }
    CHKERR_JUMP(UCS_OK != status, "connect endpoint", out_free_ep);

    /*Set active message handler */
    status = uct_iface_set_am_handler(if_info.iface, id, hello_world, arg, UCT_AM_CB_FLAG_SYNC);
    CHKERR_JUMP(UCS_OK != status, "set callback", out_free_ep);

    if (0 == rank) {
        uint64_t header;
        char payload[8];
        unsigned length = sizeof(payload);
        /* Send active message to remote endpoint */
        status = uct_ep_am_short(ep, id, header, payload, length);
        CHKERR_JUMP(UCS_OK != status, "send active msg", out_free_ep);
    } else if (1 == rank) {
        while (holder) {
            /* Explicitly progress any outstanding active message requests */
            uct_worker_progress(if_info.worker);
        }
    }

    /* Everything is fine, we need to call MPI_Finalize rather than MPI_Abort */
    exit_fail = 0;

out_free_ep:
    uct_ep_destroy(ep);
out_free_ep_addrs:
    free(own_ep);
out_free_if_addrs:
    free(own_iface);
out_free_dev_addrs:
    free(own_dev);
out_destroy_iface:
    uct_iface_close(if_info.iface);
    uct_md_close(if_info.pd);
out_destroy_worker:
    uct_worker_destroy(if_info.worker);
out_cleanup_async:
    ucs_async_context_cleanup(&async);
out:
    (0 == exit_fail) ? MPI_Finalize() : MPI_Abort(MPI_COMM_WORLD, 1);
    return exit_fail;
}
Beispiel #11
0
static ucs_status_t uct_perf_test_setup_endpoints(ucx_perf_context_t *perf)
{
    const size_t buffer_size = 2048;
    ucx_perf_ep_info_t info, *remote_info;
    unsigned group_size, i, group_index;
    uct_device_addr_t *dev_addr;
    uct_iface_addr_t *iface_addr;
    uct_ep_addr_t *ep_addr;
    uct_iface_attr_t iface_attr;
    uct_md_attr_t md_attr;
    void *rkey_buffer;
    ucs_status_t status;
    struct iovec vec[5];
    void *buffer;
    void *req;

    buffer = malloc(buffer_size);
    if (buffer == NULL) {
        ucs_error("Failed to allocate RTE buffer");
        status = UCS_ERR_NO_MEMORY;
        goto err;
    }

    status = uct_iface_query(perf->uct.iface, &iface_attr);
    if (status != UCS_OK) {
        ucs_error("Failed to uct_iface_query: %s", ucs_status_string(status));
        goto err_free;
    }

    status = uct_md_query(perf->uct.md, &md_attr);
    if (status != UCS_OK) {
        ucs_error("Failed to uct_md_query: %s", ucs_status_string(status));
        goto err_free;
    }

    if (md_attr.cap.flags & (UCT_MD_FLAG_ALLOC|UCT_MD_FLAG_REG)) {
        info.rkey_size      = md_attr.rkey_packed_size;
    } else {
        info.rkey_size      = 0;
    }
    info.uct.dev_addr_len   = iface_attr.device_addr_len;
    info.uct.iface_addr_len = iface_attr.iface_addr_len;
    info.uct.ep_addr_len    = iface_attr.ep_addr_len;
    info.recv_buffer        = (uintptr_t)perf->recv_buffer;

    rkey_buffer             = buffer;
    dev_addr                = (void*)rkey_buffer + info.rkey_size;
    iface_addr              = (void*)dev_addr    + info.uct.dev_addr_len;
    ep_addr                 = (void*)iface_addr  + info.uct.iface_addr_len;
    ucs_assert_always((void*)ep_addr + info.uct.ep_addr_len <= buffer + buffer_size);

    status = uct_iface_get_device_address(perf->uct.iface, dev_addr);
    if (status != UCS_OK) {
        ucs_error("Failed to uct_iface_get_device_address: %s",
                  ucs_status_string(status));
        goto err_free;
    }

    status = uct_iface_get_address(perf->uct.iface, iface_addr);
    if (status != UCS_OK) {
        ucs_error("Failed to uct_iface_get_address: %s", ucs_status_string(status));
        goto err_free;
    }

    if (info.rkey_size > 0) {
        status = uct_md_mkey_pack(perf->uct.md, perf->uct.recv_mem.memh, rkey_buffer);
        if (status != UCS_OK) {
            ucs_error("Failed to uct_rkey_pack: %s", ucs_status_string(status));
            goto err_free;
        }
    }

    group_size  = rte_call(perf, group_size);
    group_index = rte_call(perf, group_index);

    perf->uct.peers = calloc(group_size, sizeof(*perf->uct.peers));
    if (perf->uct.peers == NULL) {
        goto err_free;
    }

    if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) {
        for (i = 0; i < group_size; ++i) {
            if (i == group_index) {
                continue;
            }

            status = uct_ep_create(perf->uct.iface, &perf->uct.peers[i].ep);
            if (status != UCS_OK) {
                ucs_error("Failed to uct_ep_create: %s", ucs_status_string(status));
                goto err_destroy_eps;
            }
            status = uct_ep_get_address(perf->uct.peers[i].ep, ep_addr);
            if (status != UCS_OK) {
                ucs_error("Failed to uct_ep_get_address: %s", ucs_status_string(status));
                goto err_destroy_eps;
            }
        }
    }

    vec[0].iov_base         = &info;
    vec[0].iov_len          = sizeof(info);
    vec[1].iov_base         = buffer;
    vec[1].iov_len          = info.rkey_size + info.uct.dev_addr_len +
                              info.uct.iface_addr_len + info.uct.ep_addr_len;

    rte_call(perf, post_vec, vec, 2, &req);
    rte_call(perf, exchange_vec, req);

    for (i = 0; i < group_size; ++i) {
        if (i == group_index) {
            continue;
        }

        rte_call(perf, recv, i, buffer, buffer_size, req);

        remote_info = buffer;
        rkey_buffer = remote_info + 1;
        dev_addr    = (void*)rkey_buffer + remote_info->rkey_size;
        iface_addr  = (void*)dev_addr    + remote_info->uct.dev_addr_len;
        ep_addr     = (void*)iface_addr  + remote_info->uct.iface_addr_len;
        perf->uct.peers[i].remote_addr = remote_info->recv_buffer;

        if (!uct_iface_is_reachable(perf->uct.iface, dev_addr,
                                    remote_info->uct.iface_addr_len ?
                                    iface_addr : NULL)) {
            ucs_error("Destination is unreachable");
            status = UCS_ERR_UNREACHABLE;
            goto err_destroy_eps;
        }

        if (remote_info->rkey_size > 0) {
            status = uct_rkey_unpack(rkey_buffer, &perf->uct.peers[i].rkey);
            if (status != UCS_OK) {
                ucs_error("Failed to uct_rkey_unpack: %s", ucs_status_string(status));
                goto err_destroy_eps;
            }
        } else {
            perf->uct.peers[i].rkey.handle = NULL;
            perf->uct.peers[i].rkey.type   = NULL;
            perf->uct.peers[i].rkey.rkey   = UCT_INVALID_RKEY;
        }

        if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) {
            status = uct_ep_connect_to_ep(perf->uct.peers[i].ep, dev_addr, ep_addr);
        } else if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
            status = uct_ep_create_connected(perf->uct.iface, dev_addr, iface_addr,
                                             &perf->uct.peers[i].ep);
        } else {
            status = UCS_ERR_UNSUPPORTED;
        }
        if (status != UCS_OK) {
            ucs_error("Failed to connect endpoint: %s", ucs_status_string(status));
            goto err_destroy_eps;
        }
    }
    uct_perf_iface_flush_b(perf);

    free(buffer);
    rte_call(perf, barrier);
    return UCS_OK;

err_destroy_eps:
    for (i = 0; i < group_size; ++i) {
        if (perf->uct.peers[i].rkey.type != NULL) {
            uct_rkey_release(&perf->uct.peers[i].rkey);
        }
        if (perf->uct.peers[i].ep != NULL) {
            uct_ep_destroy(perf->uct.peers[i].ep);
        }
    }
    free(perf->uct.peers);
err_free:
    free(buffer);
err:
    return status;
}
Beispiel #12
0
ucs_status_t ucp_wireup_init_lanes(ucp_ep_h ep, unsigned address_count,
                                   const ucp_address_entry_t *address_list,
                                   uint8_t *addr_indices)
{
    ucp_worker_h worker = ep->worker;
    ucp_ep_config_key_t key;
    uint16_t new_cfg_index;
    ucp_lane_index_t lane;
    ucs_status_t status;
    uint8_t conn_flag;
    char str[32];

    ucs_trace("ep %p: initialize lanes", ep);

    status = ucp_wireup_select_lanes(ep, address_count, address_list,
                                     addr_indices, &key);
    if (status != UCS_OK) {
        goto err;
    }

    key.reachable_md_map |= ucp_ep_config(ep)->key.reachable_md_map;

    new_cfg_index = ucp_worker_get_ep_config(worker, &key);
    if ((ep->cfg_index == new_cfg_index)) {
        return UCS_OK; /* No change */
    }

    if ((ep->cfg_index != 0) && !ucp_ep_is_stub(ep)) {
        /*
         * TODO handle a case where we have to change lanes and reconfigure the ep:
         *
         * - if we already have uct ep connected to an address - move it to the new lane index
         * - if we don't yet have connection to an address - create it
         * - if an existing lane is not connected anymore - delete it (possibly)
         * - if the configuration has changed - replay all pending operations on all lanes -
         *   need that every pending callback would return, in case of failure, the number
         *   of lane it wants to be queued on.
         */
        ucs_debug("cannot reconfigure ep %p from [%d] to [%d]", ep, ep->cfg_index,
                  new_cfg_index);
        ucp_wireup_print_config(worker->context, &ucp_ep_config(ep)->key, "old", NULL);
        ucp_wireup_print_config(worker->context, &key, "new", NULL);
        ucs_fatal("endpoint reconfiguration not supported yet");
    }

    ep->cfg_index = new_cfg_index;
    ep->am_lane   = key.am_lane;

    snprintf(str, sizeof(str), "ep %p", ep);
    ucp_wireup_print_config(worker->context, &ucp_ep_config(ep)->key, str,
                            addr_indices);

    ucs_trace("ep %p: connect lanes", ep);

    /* establish connections on all underlying endpoints */
    conn_flag = UCP_EP_FLAG_LOCAL_CONNECTED;
    for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) {
        status = ucp_wireup_connect_lane(ep, lane, address_count, address_list,
                                         addr_indices[lane]);
        if (status != UCS_OK) {
            goto err;
        }

        if (ucp_worker_is_tl_p2p(worker, ucp_ep_get_rsc_index(ep, lane))) {
            conn_flag = 0; /* If we have a p2p transport, we're not connected */
        }
    }

    ep->flags |= conn_flag;
    return UCS_OK;

err:
    for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) {
        if (ep->uct_eps[lane] != NULL) {
            uct_ep_destroy(ep->uct_eps[lane]);
            ep->uct_eps[lane] = NULL;
        }
    }
    return status;
}
Beispiel #13
0
int main(int argc, char **argv)
{
	/* MPI is initially used to swap the endpoint and interface addresses so each
	 * process has knowledge of the others. */
	MPI_Status mpi_status;
	int partner;
	int size;
	struct sockaddr *ep_addr; /* Endpoint address */
	struct sockaddr *iface_addr; /* Interface address */
	ucs_status_t status; /* status codes for UCS */
	ucs_thread_mode_t thread_mode = UCS_THREAD_MODE_SINGLE; /* Specifies thread sharing mode of an object */
	uct_ep_h ep; /* Remote endpoint */
	void *arg;

	MPI_Init(NULL, NULL);
	MPI_Comm_size(MPI_COMM_WORLD, &size);
	if (size < 2) {
		fprintf(stderr, "Failed to create enough mpi processes.\n");fflush(stderr);	
		return 1;
	}
	
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	if (0 == rank) { 
		partner = 1; 
	} else if (1 == rank) { 
		partner = 0; 
	} else { 
		MPI_Finalize(); 
		return 0; 
	}

	/* Initialize context */
	status = ucs_async_context_init(&async, UCS_ASYNC_MODE_THREAD);
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to init async context.\n");fflush(stderr);
		goto out;
	}	 

	/* Create a worker object */ 
	status = uct_worker_create(&async, thread_mode, &worker);
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to create worker.\n");fflush(stderr);
		goto out_cleanup_async;
	}	 

	/* The device and tranport names are determined by latency */
	status = dev_tl_lookup();
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to find supported device and transport\n");fflush(stderr);
		goto out_destroy_worker;
	}

	iface_addr = calloc(1, iface_attr.iface_addr_len);
	ep_addr = calloc(1, iface_attr.ep_addr_len);
	if ((NULL == iface_addr) || (NULL == ep_addr)) { 
		goto out_destroy_iface;
	}

	/* Get interface address */
	status = uct_iface_get_address(iface, iface_addr);
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to get interface address.\n");fflush(stderr);
		goto out_free;
	}	 
	
	if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) {
		/* Create new endpoint */
		status = uct_ep_create(iface, &ep);	
		if (UCS_OK != status) {
			fprintf(stderr, "Failed to create endpoint.\n");fflush(stderr);
			goto out_free;
		}	 	
		/* Get endpoint address */
		status = uct_ep_get_address(ep, ep_addr);	
		if (UCS_OK != status) {
			fprintf(stderr, "Failed to get endpoint address.\n");fflush(stderr);
			goto out_free_ep;
		}	 	
	}

	/* Communicate interface and endpoint addresses to corresponding process */
	MPI_Send(iface_addr, iface_attr.iface_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD);
	MPI_Recv(iface_addr, iface_attr.iface_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD, &mpi_status);
	MPI_Send(ep_addr, iface_attr.ep_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD);
	MPI_Recv(ep_addr, iface_attr.ep_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD, &mpi_status);

	if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) {
		/* Connect endpoint to a remote endpoint */
		status = uct_ep_connect_to_ep(ep, ep_addr);
	} else if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
		/* Create an endpoint which is connected to a remote interface */
		status = uct_ep_create_connected(iface, iface_addr, &ep);
	} else status = UCS_ERR_UNSUPPORTED;
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to connect endpoint\n");fflush(stderr);
		goto out_free_ep;
	}

	uint8_t id = 0; /* Tag for active message */
	/*Set active message handler */
	status = uct_iface_set_am_handler(iface, id, hello_world, arg);
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to set callback.\n");fflush(stderr);
		goto out_free_ep;
	}	 	
	
	if (0 == rank) {
		uint64_t header;
		char payload[8];
		unsigned length = sizeof(payload);
		/* Send active message to remote endpoint */
		status = uct_ep_am_short(ep, id, header, payload, length);  		
	} else if (1 == rank) {
		while (holder) { 
			/* Explicitly progress any outstanding active message requests */
			uct_worker_progress(worker);
		}
	}

out_free_ep:
	uct_ep_destroy(ep);
out_free:
	free(iface_addr);
	free(ep_addr);
out_destroy_iface:
	uct_iface_close(iface);
	uct_pd_close(pd);
out_destroy_worker:
	uct_worker_destroy(worker);
out_cleanup_async:
	ucs_async_context_cleanup(&async);
out:
	MPI_Finalize();
	return 0;
}
Beispiel #14
0
void ucp_ep_destroy_uct_ep_safe(ucp_ep_h ep, uct_ep_h uct_ep)
{
    ucs_assert_always(uct_ep != NULL);
    uct_ep_pending_purge(uct_ep, ucp_pending_req_release);
    uct_ep_destroy(uct_ep);
}
Beispiel #15
0
static ucs_status_t uct_perf_test_setup_endpoints(ucx_perf_context_t *perf)
{
    unsigned group_size, i, group_index;
    uct_device_addr_t *dev_addr;
    uct_iface_addr_t *iface_addr;
    uct_ep_addr_t *ep_addr;
    uct_iface_attr_t iface_attr;
    uct_pd_attr_t pd_attr;
    unsigned long va;
    void *rkey_buffer;
    ucs_status_t status;
    struct iovec vec[5];
    void *req;

    status = uct_iface_query(perf->uct.iface, &iface_attr);
    if (status != UCS_OK) {
        ucs_error("Failed to uct_iface_query: %s", ucs_status_string(status));
        goto err;
    }

    status = uct_pd_query(perf->uct.pd, &pd_attr);
    if (status != UCS_OK) {
        ucs_error("Failed to uct_pd_query: %s", ucs_status_string(status));
        goto err;
    }

    dev_addr    = calloc(1, iface_attr.device_addr_len);
    iface_addr  = calloc(1, iface_attr.iface_addr_len);
    ep_addr     = calloc(1, iface_attr.ep_addr_len);
    rkey_buffer = calloc(1, pd_attr.rkey_packed_size);
    if ((iface_addr == NULL) || (ep_addr == NULL) || (rkey_buffer == NULL)) {
        goto err_free;
    }

    status = uct_iface_get_device_address(perf->uct.iface, dev_addr);
    if (status != UCS_OK) {
        ucs_error("Failed to uct_iface_get_device_address: %s",
                  ucs_status_string(status));
        goto err_free;
    }

    if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
        status = uct_iface_get_address(perf->uct.iface, iface_addr);
        if (status != UCS_OK) {
            ucs_error("Failed to uct_iface_get_address: %s", ucs_status_string(status));
            goto err_free;
        }
    }

    status = uct_pd_mkey_pack(perf->uct.pd, perf->uct.recv_mem.memh, rkey_buffer);
    if (status != UCS_OK) {
        ucs_error("Failed to uct_rkey_pack: %s", ucs_status_string(status));
        goto err_free;
    }

    group_size  = rte_call(perf, group_size);
    group_index = rte_call(perf, group_index);

    perf->uct.peers = calloc(group_size, sizeof(*perf->uct.peers));
    if (perf->uct.peers == NULL) {
        goto err_free;
    }

    if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) {
        for (i = 0; i < group_size; ++i) {
            if (i == group_index) {
                continue;
            }

            status = uct_ep_create(perf->uct.iface, &perf->uct.peers[i].ep);
            if (status != UCS_OK) {
                ucs_error("Failed to uct_ep_create: %s", ucs_status_string(status));
                goto err_destroy_eps;
            }
            status = uct_ep_get_address(perf->uct.peers[i].ep, ep_addr);
            if (status != UCS_OK) {
                ucs_error("Failed to uct_ep_get_address: %s", ucs_status_string(status));
                goto err_destroy_eps;
            }
        }
    }

    va                  = (uintptr_t)perf->recv_buffer;
    vec[0].iov_base     = &va;
    vec[0].iov_len      = sizeof(va);
    vec[1].iov_base     = rkey_buffer;
    vec[1].iov_len      = pd_attr.rkey_packed_size;
    vec[2].iov_base     = dev_addr;
    vec[2].iov_len      = iface_attr.device_addr_len;
    vec[3].iov_base     = iface_addr;
    vec[3].iov_len      = iface_attr.iface_addr_len;
    vec[4].iov_base     = ep_addr;
    vec[4].iov_len      = iface_attr.ep_addr_len;

    rte_call(perf, post_vec, vec, 5, &req);
    rte_call(perf, exchange_vec, req);

    for (i = 0; i < group_size; ++i) {
        if (i == group_index) {
            continue;
        }
        vec[0].iov_base     = &va;
        vec[0].iov_len      = sizeof(va);
        vec[1].iov_base     = rkey_buffer;
        vec[1].iov_len      = pd_attr.rkey_packed_size;
        vec[2].iov_base     = dev_addr;
        vec[2].iov_len      = iface_attr.device_addr_len;
        vec[3].iov_base     = iface_addr;
        vec[3].iov_len      = iface_attr.iface_addr_len;
        vec[4].iov_base     = ep_addr;
        vec[4].iov_len      = iface_attr.ep_addr_len;

        rte_call(perf, recv_vec, i, vec, 5, req);

        perf->uct.peers[i].remote_addr = va;
        status = uct_rkey_unpack(rkey_buffer, &perf->uct.peers[i].rkey);
        if (status != UCS_OK) {
            ucs_error("Failed to uct_rkey_unpack: %s", ucs_status_string(status));
            return status;
        }

        if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) {
            status = uct_ep_connect_to_ep(perf->uct.peers[i].ep, dev_addr, ep_addr);
        } else if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
            status = uct_ep_create_connected(perf->uct.iface, dev_addr, iface_addr,
                                             &perf->uct.peers[i].ep);
        } else {
            status = UCS_ERR_UNSUPPORTED;
        }
        if (status != UCS_OK) {
            ucs_error("Failed to connect endpoint: %s", ucs_status_string(status));
            goto err_destroy_eps;
        }
    }
    uct_perf_iface_flush_b(perf);

    rte_call(perf, barrier);

    free(ep_addr);
    free(iface_addr);
    free(dev_addr);
    free(rkey_buffer);

    return UCS_OK;

err_destroy_eps:
    for (i = 0; i < group_size; ++i) {
        if (perf->uct.peers[i].rkey.type != NULL) {
            uct_rkey_release(&perf->uct.peers[i].rkey);
        }
        if (perf->uct.peers[i].ep != NULL) {
            uct_ep_destroy(perf->uct.peers[i].ep);
        }
    }
    free(perf->uct.peers);
err_free:
    free(ep_addr);
    free(iface_addr);
    free(dev_addr);
    free(rkey_buffer);
err:
    return status;
}