Beispiel #1
0
static UCS_F_ALWAYS_INLINE ucs_status_t 
uct_rc_verbs_iface_poll_rx(uct_rc_verbs_iface_t *iface)
{
    uct_ib_iface_recv_desc_t *desc;
    uct_rc_hdr_t *hdr;
    struct ibv_wc wc[UCT_IB_MAX_WC];
    int i, ret;

    ret = ibv_poll_cq(iface->super.super.recv_cq, UCT_IB_MAX_WC, wc);
    if (ret > 0) {
        for (i = 0; i < ret; ++i) {
            if (ucs_unlikely(wc[i].status != IBV_WC_SUCCESS)) {
                ucs_fatal("Receive completion with error: %s", ibv_wc_status_str(wc[i].status));
            }

            UCS_STATS_UPDATE_COUNTER(iface->super.stats, UCT_RC_IFACE_STAT_RX_COMPLETION, 1);

            desc = (void*)wc[i].wr_id;
            uct_ib_iface_desc_received(&iface->super.super, desc, wc[i].byte_len, 1);

            hdr = uct_ib_iface_recv_desc_hdr(&iface->super.super, desc);
            uct_ib_log_recv_completion(IBV_QPT_RC, &wc[i], hdr, uct_rc_ep_am_packet_dump);

            uct_rc_iface_invoke_am(&iface->super, hdr, wc[i].byte_len, desc);
        }

        iface->super.rx.available += ret;
        return UCS_OK;
    } else if (ret == 0) {
        uct_rc_verbs_iface_post_recv(iface, 0);
        return UCS_ERR_NO_PROGRESS;
    } else {
        ucs_fatal("Failed to poll receive CQ");
    }
}
Beispiel #2
0
static inline ucs_status_t uct_ud_verbs_iface_poll_rx(uct_ud_verbs_iface_t *iface)
{
    uct_ib_iface_recv_desc_t *desc;
    struct ibv_wc wc[UCT_IB_MAX_WC];
    int i, ret;
    char *packet;


    ret = ibv_poll_cq(iface->super.super.recv_cq, UCT_IB_MAX_WC, wc);
    if (ret == 0) {
        return UCS_ERR_NO_PROGRESS;
    } 
    if (ucs_unlikely(ret < 0)) {
        ucs_fatal("Failed to poll receive CQ");
    }

    for (i = 0; i < ret; ++i) {
        if (ucs_unlikely(wc[i].status != IBV_WC_SUCCESS)) {
            ucs_fatal("Receive completion with error: %s", ibv_wc_status_str(wc[i].status));
        }

        desc = (void*)wc[i].wr_id;
        ucs_trace_data("pkt rcvd: buf=%p len=%d", desc, wc[i].byte_len);
        packet = uct_ib_iface_recv_desc_hdr(&iface->super.super, desc);
        VALGRIND_MAKE_MEM_DEFINED(packet, wc[i].byte_len);

        uct_ud_ep_process_rx(&iface->super, 
                             (uct_ud_neth_t *)(packet + UCT_IB_GRH_LEN),
                             wc[i].byte_len - UCT_IB_GRH_LEN,
                             (uct_ud_recv_skb_t *)desc); 
    }
    iface->super.rx.available += ret;
    uct_ud_verbs_iface_post_recv(iface);
    return UCS_OK;
}
Beispiel #3
0
static UCS_F_ALWAYS_INLINE void
uct_rc_verbs_ep_post_send(uct_rc_verbs_iface_t* iface, uct_rc_verbs_ep_t* ep,
                          struct ibv_send_wr *wr, int send_flags, int max_log_sge)
{
    struct ibv_send_wr *bad_wr;
    int ret;

    uct_rc_txqp_check(&ep->super.txqp);

    if (!(send_flags & IBV_SEND_SIGNALED)) {
        send_flags |= uct_rc_iface_tx_moderation(&iface->super, &ep->super.txqp,
                                                 IBV_SEND_SIGNALED);
    }
    if (wr->opcode == IBV_WR_RDMA_READ) {
        send_flags |= uct_rc_ep_atomic_fence(&iface->super, &ep->fi,
                                             IBV_SEND_FENCE);
    }

    wr->send_flags = send_flags;
    wr->wr_id      = uct_rc_txqp_unsignaled(&ep->super.txqp);

    uct_ib_log_post_send(&iface->super.super, ep->super.txqp.qp, wr, max_log_sge,
                         (wr->opcode == IBV_WR_SEND) ? uct_rc_ep_packet_dump : NULL);

    ret = ibv_post_send(ep->super.txqp.qp, wr, &bad_wr);
    if (ret != 0) {
        ucs_fatal("ibv_post_send() returned %d (%m)", ret);
    }

    uct_rc_verbs_txqp_posted(&ep->super.txqp, &ep->txcnt, &iface->super, send_flags & IBV_SEND_SIGNALED);
}
Beispiel #4
0
static ucs_status_t ucs_async_handler_dispatch(ucs_async_handler_t *handler)
{
    ucs_async_context_t *async;
    ucs_async_mode_t mode;
    ucs_status_t status;

    mode  = handler->mode;
    async = handler->async;
    if (async != NULL) {
        async->last_wakeup = ucs_get_time();
    }
    if (async == NULL) {
        ucs_trace_async("calling async handler " UCS_ASYNC_HANDLER_FMT,
                        UCS_ASYNC_HANDLER_ARG(handler));
        handler->cb(handler->id, handler->arg);
    } else if (ucs_async_method_call(mode, context_try_block, async)) {
        ucs_trace_async("calling async handler " UCS_ASYNC_HANDLER_FMT,
                        UCS_ASYNC_HANDLER_ARG(handler));
        handler->cb(handler->id, handler->arg);
        ucs_async_method_call(mode, context_unblock, async);
    } else /* async != NULL */ {
        ucs_trace_async("missed " UCS_ASYNC_HANDLER_FMT ", last_wakeup %lu",
                        UCS_ASYNC_HANDLER_ARG(handler), async->last_wakeup);
        if (ucs_atomic_cswap32(&handler->missed, 0, 1) == 0) {
            status = ucs_mpmc_queue_push(&async->missed, handler->id);
            if (status != UCS_OK) {
                ucs_fatal("Failed to push event %d to miss queue: %s",
                          handler->id, ucs_status_string(status));
            }
        }
        return UCS_ERR_NO_PROGRESS;
    }
    return UCS_OK;
}
static UCS_F_ALWAYS_INLINE void
uct_rc_verbs_ep_post_send(uct_rc_verbs_iface_t* iface, uct_rc_verbs_ep_t* ep,
                          struct ibv_send_wr *wr, int send_flags)
{
    struct ibv_send_wr *bad_wr;
    int ret;

    uct_rc_txqp_check(&ep->super.txqp);

    if (!(send_flags & IBV_SEND_SIGNALED)) {
        send_flags |= uct_rc_iface_tx_moderation(&iface->super, &ep->super.txqp,
                                                 IBV_SEND_SIGNALED);
    }
    wr->send_flags = send_flags;
    wr->wr_id      = uct_rc_txqp_unsignaled(&ep->super.txqp);

    uct_ib_log_post_send(&iface->super.super, ep->super.txqp.qp, wr,
                         (wr->opcode == IBV_WR_SEND) ? uct_rc_ep_am_packet_dump : NULL);

    UCT_IB_INSTRUMENT_RECORD_SEND_WR_LEN("uct_rc_verbs_ep_post_send", wr);

    ret = ibv_post_send(ep->super.txqp.qp, wr, &bad_wr);
    if (ret != 0) {
        ucs_fatal("ibv_post_send() returned %d (%m)", ret);
    }

    uct_rc_verbs_txqp_posted(&ep->super.txqp, &ep->txcnt, &iface->super, send_flags & IBV_SEND_SIGNALED);
}
static UCS_F_ALWAYS_INLINE void
uct_rc_verbs_exp_post_send(uct_rc_verbs_ep_t *ep, struct ibv_exp_send_wr *wr,
                           uint64_t signal)
{
    uct_rc_verbs_iface_t *iface = ucs_derived_of(ep->super.super.super.iface,
                                                 uct_rc_verbs_iface_t);
    uct_rc_txqp_check(&ep->super.txqp);

    struct ibv_exp_send_wr *bad_wr;
    int ret;

    signal |= uct_rc_iface_tx_moderation(&iface->super, &ep->super.txqp,
                                         IBV_EXP_SEND_SIGNALED);
    wr->exp_send_flags = signal;
    wr->wr_id          = uct_rc_txqp_unsignaled(&ep->super.txqp);

    uct_ib_log_exp_post_send(&iface->super.super, ep->super.txqp.qp, wr,
                             (wr->exp_opcode == IBV_EXP_WR_SEND) ? uct_rc_ep_am_packet_dump : NULL);

    UCT_IB_INSTRUMENT_RECORD_SEND_EXP_WR_LEN("uct_rc_verbs_exp_post_send", wr);

    ret = ibv_exp_post_send(ep->super.txqp.qp, wr, &bad_wr);
    if (ret != 0) {
        ucs_fatal("ibv_exp_post_send() returned %d (%m)", ret);
    }

    uct_rc_verbs_txqp_posted(&ep->super.txqp, &ep->txcnt, &iface->super, signal);
}
Beispiel #7
0
size_t ucp_tag_rndv_rts_pack(void *dest, void *arg)
{
    ucp_request_t *sreq              = arg;   /* send request */
    ucp_rndv_rts_hdr_t *rndv_rts_hdr = dest;
    ucp_worker_h worker              = sreq->send.ep->worker;
    ssize_t packed_rkey_size;

    rndv_rts_hdr->super.tag        = sreq->send.tag.tag;
    rndv_rts_hdr->sreq.reqptr      = (uintptr_t)sreq;
    rndv_rts_hdr->sreq.sender_uuid = worker->uuid;
    rndv_rts_hdr->size             = sreq->send.length;

    /* Pack remote keys (which can be empty list) */
    if (UCP_DT_IS_CONTIG(sreq->send.datatype) &&
        ucp_rndv_is_get_zcopy(sreq, worker->context->config.ext.rndv_mode)) {
        /* pack rkey, ask target to do get_zcopy */
        rndv_rts_hdr->address = (uintptr_t)sreq->send.buffer;
        packed_rkey_size = ucp_rkey_pack_uct(worker->context,
                                             sreq->send.state.dt.dt.contig.md_map,
                                             sreq->send.state.dt.dt.contig.memh,
                                             rndv_rts_hdr + 1);
        if (packed_rkey_size < 0) {
            ucs_fatal("failed to pack rendezvous remote key: %s",
                      ucs_status_string(packed_rkey_size));
        }
    } else {
        rndv_rts_hdr->address = 0;
        packed_rkey_size      = 0;
    }

    return sizeof(*rndv_rts_hdr) + packed_rkey_size;
}
Beispiel #8
0
static UCS_F_ALWAYS_INLINE void
uct_rc_verbs_exp_post_send(uct_rc_verbs_ep_t *ep, struct ibv_exp_send_wr *wr,
                           int signal)
{
    uct_rc_verbs_iface_t *iface = ucs_derived_of(ep->super.super.super.iface,
                                                 uct_rc_verbs_iface_t);
    struct ibv_exp_send_wr *bad_wr;
    int ret;

    if (!signal) {
         signal = uct_rc_iface_tx_moderation(&iface->super, &ep->super,
                                             IBV_EXP_SEND_SIGNALED);
    }
    wr->exp_send_flags |= signal;
    wr->wr_id          = ep->super.unsignaled;

    uct_ib_log_exp_post_send(ep->super.qp, wr,
                             (wr->exp_opcode == IBV_EXP_WR_SEND) ? uct_rc_ep_am_packet_dump : NULL);

    ret = ibv_exp_post_send(ep->super.qp, wr, &bad_wr);
    if (ret != 0) {
        ucs_fatal("ibv_exp_post_send() returned %d (%m)", ret);
    }

    uct_rc_verbs_ep_posted(ep, signal);
}
Beispiel #9
0
unsigned uct_rc_verbs_iface_post_recv_always(uct_rc_iface_t *iface, unsigned max)
{
    struct ibv_recv_wr *bad_wr;
    uct_ib_recv_wr_t *wrs;
    unsigned count;
    int ret;

    wrs  = ucs_alloca(sizeof *wrs  * max);

    count = uct_ib_iface_prepare_rx_wrs(&iface->super, &iface->rx.mp,
                                        wrs, max);
    if (ucs_unlikely(count == 0)) {
        return 0;
    }

    UCT_IB_INSTRUMENT_RECORD_RECV_WR_LEN("uct_rc_iface_post_recv_always",
                                         &wrs[0].ibwr);
    ret = ibv_post_srq_recv(iface->rx.srq, &wrs[0].ibwr, &bad_wr);
    if (ret != 0) {
        ucs_fatal("ibv_post_srq_recv() returned %d: %m", ret);
    }
    iface->rx.available -= count;

    return count;
}
Beispiel #10
0
void ucs_global_opts_init()
{
    ucs_status_t status;

    status = ucs_config_parser_fill_opts(&ucs_global_opts, ucs_global_opts_table,
                                         NULL, NULL, 1);
    if (status != UCS_OK) {
        ucs_fatal("failed to parse global configuration - aborting");
    }
}
Beispiel #11
0
void
ucp_tag_eager_sync_zcopy_req_complete(ucp_request_t *req, ucs_status_t status)
{
    if (req->send.state.dt.offset == req->send.length) {
        ucp_request_send_buffer_dereg(req); /* TODO register+lane change */
        ucp_tag_eager_sync_completion(req, UCP_REQUEST_FLAG_LOCAL_COMPLETED,
                                      status);
    } else if (status != UCS_OK) {
        ucs_fatal("error handling is not supported with tag-sync protocol");
    }
}
Beispiel #12
0
static UCS_F_ALWAYS_INLINE void 
uct_rc_verbs_iface_poll_tx(uct_rc_verbs_iface_t *iface)
{
    struct ibv_wc wc[UCT_IB_MAX_WC];
    uct_rc_verbs_ep_t *ep;
    uct_rc_iface_send_op_t *op;
    unsigned count;
    uint16_t sn;
    int i, ret;

    ret = ibv_poll_cq(iface->super.super.send_cq, UCT_IB_MAX_WC, wc);
    if (ucs_unlikely(ret <= 0)) {
        if (ucs_unlikely(ret < 0)) {
            ucs_fatal("Failed to poll send CQ");
        }
        return;
    }

    for (i = 0; i < ret; ++i) {
        if (ucs_unlikely(wc[i].status != IBV_WC_SUCCESS)) {
            ucs_fatal("Send completion with error: %s", ibv_wc_status_str(wc[i].status));
        }

        UCS_STATS_UPDATE_COUNTER(iface->super.stats, UCT_RC_IFACE_STAT_TX_COMPLETION, 1);

        ep = ucs_derived_of(uct_rc_iface_lookup_ep(&iface->super, wc[i].qp_num), uct_rc_verbs_ep_t);
        ucs_assert(ep != NULL);

        count = wc[i].wr_id + 1; /* Number of sends with WC completes in batch */
        ep->super.available         += count;
        ep->tx.completion_count     += count;
        ++iface->super.tx.cq_available;

        sn = ep->tx.completion_count;
        ucs_queue_for_each_extract(op, &ep->super.outstanding, queue,
                                   UCS_CIRCULAR_COMPARE16(op->sn, <=, sn)) {
            op->handler(op);
        }
    }
}
Beispiel #13
0
void *uct_mm_ep_attach_remote_seg(uct_mm_ep_t *ep, uct_mm_iface_t *iface, uct_mm_fifo_element_t *elem)
{
    uct_mm_remote_seg_t *remote_seg, search;
    ucs_status_t status;

    /* take the mmid of the chunk that the desc belongs to, (the desc that the fifo_elem
     * is 'assigned' to), and check if the ep has already attached to it.
     */
    search.mmid = elem->desc_mmid;
    remote_seg = sglib_hashed_uct_mm_remote_seg_t_find_member(ep->remote_segments_hash, &search);
    if (remote_seg == NULL) {
        /* not in the hash. attach to the memory the mmid refers to. the attach call
         * will return the base address of the mmid's chunk -
         * save this base address in a hash table (which maps mmid to base address). */
        remote_seg = ucs_malloc(sizeof(*remote_seg), "mm_desc");
        if (remote_seg == NULL) {
            ucs_fatal("Failed to allocated memory for a remote segment identifier. %m");
        }

        status = uct_mm_md_mapper_ops(iface->super.md)->attach(elem->desc_mmid,
                                                               elem->desc_mpool_size,
                                                               elem->desc_chunk_base_addr,
                                                               &remote_seg->address,
                                                               &remote_seg->cookie,
                                                               iface->path);
        if (status != UCS_OK) {
            ucs_fatal("Failed to attach to remote mmid:%zu. %s ",
                      elem->desc_mmid, ucs_status_string(status));
        }

        remote_seg->mmid   = elem->desc_mmid;
        remote_seg->length = elem->desc_mpool_size;

        /* put the base address into the ep's hash table */
        sglib_hashed_uct_mm_remote_seg_t_add(ep->remote_segments_hash, remote_seg);
    }

    return remote_seg->address;

}
Beispiel #14
0
static inline void uct_ud_verbs_iface_poll_tx(uct_ud_verbs_iface_t *iface)
{
    struct ibv_wc wc;
    int ret;

    ret = ibv_poll_cq(iface->super.super.send_cq, 1, &wc);
    if (ucs_unlikely(ret < 0)) {
        ucs_fatal("Failed to poll send CQ");
        return;
    }

    if (ret == 0) {
        return;
    }

    if (ucs_unlikely(wc.status != IBV_WC_SUCCESS)) {
        ucs_fatal("Send completion (wr_id=0x%0X with error: %s ", (unsigned)wc.wr_id, ibv_wc_status_str(wc.status));
        return;
    }

    iface->super.tx.available += UCT_UD_TX_MODERATION + 1;
}
Beispiel #15
0
static void uct_cm_iface_outstanding_remove(uct_cm_iface_t* iface,
                                            struct ib_cm_id* id)
{
    unsigned index;

    for (index = 0; index < iface->num_outstanding; ++index) {
        if (iface->outstanding[index] == id) {
            iface->outstanding[index] = uct_cm_iface_outstanding_pop(iface);
            return;
        }
    }

    ucs_fatal("outstanding cm id %p not found", id);
}
Beispiel #16
0
static void uct_tcp_ep_epoll_ctl(uct_tcp_ep_t *ep, int op)
{
    uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface,
                                            uct_tcp_iface_t);
    struct epoll_event epoll_event;
    int ret;

    memset(&epoll_event, 0, sizeof(epoll_event));
    epoll_event.data.ptr = ep;
    epoll_event.events   = ep->events;
    ret = epoll_ctl(iface->epfd, op, ep->fd, &epoll_event);
    if (ret < 0) {
        ucs_fatal("epoll_ctl(epfd=%d, op=%d, fd=%d) failed: %m",
                  iface->epfd, op, ep->fd);
    }
}
Beispiel #17
0
static void
ucs_config_parser_print_opts_recurs(FILE *stream, const void *opts,
                                    const ucs_config_field_t *fields,
                                    unsigned flags, const char *env_prefix,
                                    const char *table_prefix)
{
    const ucs_config_field_t *field, *aliased_field;
    size_t alias_table_offset;
    const char *prefix;

    prefix = table_prefix == NULL ? "" : table_prefix;

    for (field = fields; field->name; ++field) {
        if (ucs_config_is_table_field(field)) {
            /* Parse with sub-table prefix */
            if (table_prefix == NULL) {
                ucs_config_parser_print_opts_recurs(stream, opts + field->offset,
                                                    field->parser.arg, flags,
                                                    env_prefix, field->name);
            } else {
                ucs_config_parser_print_opts_recurs(stream, opts + field->offset,
                                                    field->parser.arg, flags,
                                                    env_prefix, table_prefix);
            }
        } else if (ucs_config_is_alias_field(field)) {
            if (flags & UCS_CONFIG_PRINT_HIDDEN) {
                aliased_field = ucs_config_find_aliased_field(fields, field,
                                                              &alias_table_offset);
                if (aliased_field == NULL) {
                    ucs_fatal("could not find aliased field of %s", field->name);
                }
                ucs_config_parser_print_field(stream,
                                              opts + alias_table_offset,
                                              env_prefix, table_prefix,
                                              field->name, aliased_field,
                                              flags, "(alias of %s%s%s)",
                                              env_prefix, table_prefix,
                                              aliased_field->name);
            }
        } else {
            ucs_config_parser_print_field(stream, opts, env_prefix, prefix,
                                          field->name, field, flags, NULL);
        }
     }

}
Beispiel #18
0
ucs_status_t ucp_tag_send(ucp_ep_h ep, const void *buffer, size_t length,
                          ucp_tag_t tag)
{
    ucp_worker_h worker = ep->worker;
    ucs_status_t status;

retry:
    if (ucs_likely(length < ep->config.max_short_tag)) {
        UCS_STATIC_ASSERT(sizeof(ucp_tag_t) == sizeof(uint64_t));
        status = uct_ep_am_short(ep->uct_ep, UCP_AM_ID_EAGER_ONLY, tag,
                                 buffer, length);
        if (status == UCS_ERR_NO_RESOURCE) {
            ucp_worker_progress(worker);
            goto retry;
        }
        return status;
    }

    ucs_fatal("unsupported");
}
Beispiel #19
0
void uct_dc_ep_set_failed(ucs_class_t *ep_cls, uct_dc_iface_t *iface,
                          uint32_t qp_num)
{
    uint8_t dci = uct_dc_iface_dci_find(iface, qp_num);
    uct_dc_ep_t *ep = iface->tx.dcis[dci].ep;

    if (!ep) {
        return;
    }

    uct_rc_txqp_purge_outstanding(&iface->tx.dcis[dci].txqp,
                                  UCS_ERR_ENDPOINT_TIMEOUT, 0);
    uct_set_ep_failed(ep_cls, &ep->super.super,
                      &iface->super.super.super.super);
    if (UCS_OK != uct_dc_iface_dci_reconnect(iface, &iface->tx.dcis[dci].txqp)) {
        ucs_fatal("Unsuccessful reconnect of DC QP #%u", qp_num);
    }
    uct_rc_txqp_available_set(&iface->tx.dcis[dci].txqp,
                              iface->super.config.tx_qp_len);
}
Beispiel #20
0
static UCS_F_NOINLINE void
uct_ud_verbs_iface_post_recv_always(uct_ud_verbs_iface_t *iface, int max)
{
    struct ibv_recv_wr *bad_wr;
    uct_ib_recv_wr_t *wrs;
    unsigned count;
    int ret;

    wrs  = ucs_alloca(sizeof *wrs  * max);

    count = uct_ib_iface_prepare_rx_wrs(&iface->super.super, &iface->super.rx.mp,
                                        wrs, max);
    if (count == 0) {
        return;
    }

    ret = ibv_post_recv(iface->super.qp, &wrs[0].ibwr, &bad_wr);
    if (ret != 0) {
        ucs_fatal("ibv_post_recv() returned %d: %m", ret);
    }
    iface->super.rx.available -= count;
}
Beispiel #21
0
static UCS_F_ALWAYS_INLINE void
uct_rc_verbs_ep_post_send(uct_rc_verbs_iface_t* iface, uct_rc_verbs_ep_t* ep,
                          struct ibv_send_wr *wr, int send_flags)
{
    struct ibv_send_wr *bad_wr;
    int ret;

    if (!(send_flags & IBV_SEND_SIGNALED)) {
        send_flags |= uct_rc_iface_tx_moderation(&iface->super, &ep->super,
                                                 IBV_SEND_SIGNALED);
    }
    wr->send_flags = send_flags;
    wr->wr_id      = ep->super.unsignaled;

    uct_ib_log_post_send(ep->super.qp, wr,
                         (wr->opcode == IBV_WR_SEND) ? uct_rc_ep_am_packet_dump : NULL);

    ret = ibv_post_send(ep->super.qp, wr, &bad_wr);
    if (ret != 0) {
        ucs_fatal("ibv_post_send() returned %d (%m)", ret);
    }

    uct_rc_verbs_ep_posted(ep, send_flags & IBV_SEND_SIGNALED);
}
Beispiel #22
0
void __ucs_wtimer_add(ucs_twheel_t *t, ucs_wtimer_t *timer, ucs_time_t delta)
{
    uint64_t slot;

    timer->is_active = 1;
    slot = delta>>t->res_order;
    if (ucs_unlikely(slot == 0)) {
        /* nothing really wrong with adding timer to the current slot. However
         * we want to guard against the case we spend to much time in hi res
         * timer processing */
        ucs_fatal("Timer resolution is too low. Min resolution %lf usec, wanted %lf usec",
                ucs_time_to_usec(t->res), ucs_time_to_usec(delta));
    }
    ucs_assert(slot > 0);

    if (ucs_unlikely(slot >= t->num_slots)) {
        slot = t->num_slots - 1;
    }

    slot = (t->current + slot) % t->num_slots;
    ucs_assert(slot != t->current);

    ucs_list_add_tail(&t->wheel[slot], &timer->list);
}
Beispiel #23
0
static ucs_status_t ucp_perf_test_setup_endpoints(ucx_perf_context_t *perf,
                                                  uint64_t features)
{
    const size_t buffer_size = 2048;
    ucx_perf_ep_info_t info, *remote_info;
    unsigned group_size, i, group_index;
    ucp_address_t *address;
    size_t address_length = 0;
    ucp_ep_params_t ep_params;
    ucs_status_t status;
    struct iovec vec[3];
    void *rkey_buffer;
    void *req = NULL;
    void *buffer;

    group_size  = rte_call(perf, group_size);
    group_index = rte_call(perf, group_index);

    status = ucp_worker_get_address(perf->ucp.worker, &address, &address_length);
    if (status != UCS_OK) {
        if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) {
            ucs_error("ucp_worker_get_address() failed: %s", ucs_status_string(status));
        }
        goto err;
    }

    info.ucp.addr_len  = address_length;
    info.recv_buffer   = (uintptr_t)perf->recv_buffer;

    vec[0].iov_base    = &info;
    vec[0].iov_len     = sizeof(info);
    vec[1].iov_base    = address;
    vec[1].iov_len     = address_length;

    if (features & (UCP_FEATURE_RMA|UCP_FEATURE_AMO32|UCP_FEATURE_AMO64)) {
        status = ucp_rkey_pack(perf->ucp.context, perf->ucp.recv_memh,
                               &rkey_buffer, &info.rkey_size);
        if (status != UCS_OK) {
            if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) {
                ucs_error("ucp_rkey_pack() failed: %s", ucs_status_string(status));
            }
            ucp_worker_release_address(perf->ucp.worker, address);
            goto err;
        }

        vec[2].iov_base = rkey_buffer;
        vec[2].iov_len  = info.rkey_size;
        rte_call(perf, post_vec, vec, 3, &req);
        ucp_rkey_buffer_release(rkey_buffer);
    } else {
        info.rkey_size  = 0;
        rte_call(perf, post_vec, vec, 2, &req);
    }

    ucp_worker_release_address(perf->ucp.worker, address);
    rte_call(perf, exchange_vec, req);

    perf->ucp.peers = calloc(group_size, sizeof(*perf->uct.peers));
    if (perf->ucp.peers == NULL) {
        goto err;
    }

    buffer = malloc(buffer_size);
    if (buffer == NULL) {
        ucs_error("Failed to allocate RTE receive buffer");
        status = UCS_ERR_NO_MEMORY;
        goto err_destroy_eps;
    }

    for (i = 0; i < group_size; ++i) {
        if (i == group_index) {
            continue;
        }

        rte_call(perf, recv, i, buffer, buffer_size, req);

        remote_info = buffer;
        address     = (void*)(remote_info + 1);
        rkey_buffer = (void*)address + remote_info->ucp.addr_len;
        perf->ucp.peers[i].remote_addr = remote_info->recv_buffer;

        ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS;
        ep_params.address    = address;

        status = ucp_ep_create(perf->ucp.worker, &ep_params, &perf->ucp.peers[i].ep);
        if (status != UCS_OK) {
            if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) {
                ucs_error("ucp_ep_create() failed: %s", ucs_status_string(status));
            }
            goto err_free_buffer;
        }

        if (remote_info->rkey_size > 0) {
            status = ucp_ep_rkey_unpack(perf->ucp.peers[i].ep, rkey_buffer,
                                        &perf->ucp.peers[i].rkey);
            if (status != UCS_OK) {
                if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) {
                    ucs_fatal("ucp_rkey_unpack() failed: %s", ucs_status_string(status));
                }
                goto err_free_buffer;
            }
        } else {
            perf->ucp.peers[i].rkey = NULL;
        }
    }

    free(buffer);

    status = ucp_perf_test_exchange_status(perf, UCS_OK);
    if (status != UCS_OK) {
        ucp_perf_test_destroy_eps(perf, group_size);
    }
    return status;

err_free_buffer:
    free(buffer);
err_destroy_eps:
    ucp_perf_test_destroy_eps(perf, group_size);
err:
    (void)ucp_perf_test_exchange_status(perf, status);
    return status;
}
Beispiel #24
0
ucs_status_t ucp_wireup_init_lanes(ucp_ep_h ep, unsigned address_count,
                                   const ucp_address_entry_t *address_list,
                                   uint8_t *addr_indices)
{
    ucp_worker_h worker = ep->worker;
    ucp_ep_config_key_t key;
    uint16_t new_cfg_index;
    ucp_lane_index_t lane;
    ucs_status_t status;
    uint8_t conn_flag;
    char str[32];

    ucs_trace("ep %p: initialize lanes", ep);

    status = ucp_wireup_select_lanes(ep, address_count, address_list,
                                     addr_indices, &key);
    if (status != UCS_OK) {
        goto err;
    }

    key.reachable_md_map |= ucp_ep_config(ep)->key.reachable_md_map;

    new_cfg_index = ucp_worker_get_ep_config(worker, &key);
    if ((ep->cfg_index == new_cfg_index)) {
        return UCS_OK; /* No change */
    }

    if ((ep->cfg_index != 0) && !ucp_ep_is_stub(ep)) {
        /*
         * TODO handle a case where we have to change lanes and reconfigure the ep:
         *
         * - if we already have uct ep connected to an address - move it to the new lane index
         * - if we don't yet have connection to an address - create it
         * - if an existing lane is not connected anymore - delete it (possibly)
         * - if the configuration has changed - replay all pending operations on all lanes -
         *   need that every pending callback would return, in case of failure, the number
         *   of lane it wants to be queued on.
         */
        ucs_debug("cannot reconfigure ep %p from [%d] to [%d]", ep, ep->cfg_index,
                  new_cfg_index);
        ucp_wireup_print_config(worker->context, &ucp_ep_config(ep)->key, "old", NULL);
        ucp_wireup_print_config(worker->context, &key, "new", NULL);
        ucs_fatal("endpoint reconfiguration not supported yet");
    }

    ep->cfg_index = new_cfg_index;
    ep->am_lane   = key.am_lane;

    snprintf(str, sizeof(str), "ep %p", ep);
    ucp_wireup_print_config(worker->context, &ucp_ep_config(ep)->key, str,
                            addr_indices);

    ucs_trace("ep %p: connect lanes", ep);

    /* establish connections on all underlying endpoints */
    conn_flag = UCP_EP_FLAG_LOCAL_CONNECTED;
    for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) {
        status = ucp_wireup_connect_lane(ep, lane, address_count, address_list,
                                         addr_indices[lane]);
        if (status != UCS_OK) {
            goto err;
        }

        if (ucp_worker_is_tl_p2p(worker, ucp_ep_get_rsc_index(ep, lane))) {
            conn_flag = 0; /* If we have a p2p transport, we're not connected */
        }
    }

    ep->flags |= conn_flag;
    return UCS_OK;

err:
    for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) {
        if (ep->uct_eps[lane] != NULL) {
            uct_ep_destroy(ep->uct_eps[lane]);
            ep->uct_eps[lane] = NULL;
        }
    }
    return status;
}
Beispiel #25
0
/*
 * Generic data-pointer posting function.
 * Parameters which are not relevant to the opcode are ignored.
 *
 *            +--------+-----+-------+--------+-------+
 * SEND       | CTRL   | INL | am_id | am_hdr | DPSEG |
 *            +--------+-----+---+---+----+----+------+
 * RDMA_WRITE | CTRL   | RADDR   | DPSEG  |
 *            +--------+---------+--------+-------+
 * ATOMIC     | CTRL   | RADDR   | ATOMIC | DPSEG |
 *            +--------+---------+--------+-------+
 */
static UCS_F_ALWAYS_INLINE void
uct_rc_mlx5_ep_dptr_post(uct_rc_mlx5_ep_t *ep, unsigned opcode_flags,
                         const void *buffer, unsigned length, uint32_t *lkey_p,
                         /* SEND */ uint8_t am_id, const void *am_hdr, unsigned am_hdr_len,
                         /* RDMA/ATOMIC */ uint64_t remote_addr, uct_rkey_t rkey,
                         /* ATOMIC */ uint64_t compare_mask, uint64_t compare, uint64_t swap_add,
                         int signal)
{
    struct mlx5_wqe_ctrl_seg                     *ctrl;
    struct mlx5_wqe_raddr_seg                    *raddr;
    struct mlx5_wqe_atomic_seg                   *atomic;
    struct mlx5_wqe_data_seg                     *dptr;
    struct mlx5_wqe_inl_data_seg                 *inl;
    struct uct_ib_mlx5_atomic_masked_cswap32_seg *masked_cswap32;
    struct uct_ib_mlx5_atomic_masked_fadd32_seg  *masked_fadd32;
    struct uct_ib_mlx5_atomic_masked_cswap64_seg *masked_cswap64;

    uct_rc_mlx5_iface_t *iface;
    uct_rc_hdr_t        *rch;
    unsigned            wqe_size, inl_seg_size;
    uint8_t             opmod;

    iface = ucs_derived_of(ep->super.super.super.iface, uct_rc_mlx5_iface_t);
    if (!signal) {
        signal = uct_rc_iface_tx_moderation(&iface->super, &ep->super,
                                            MLX5_WQE_CTRL_CQ_UPDATE);
    } else {
        ucs_assert(signal == MLX5_WQE_CTRL_CQ_UPDATE);
    }

    opmod = 0;
    ctrl = ep->tx.wq.curr;
    switch (opcode_flags) {
    case MLX5_OPCODE_SEND:
        inl_seg_size     = ucs_align_up_pow2(sizeof(*inl) + sizeof(*rch) + am_hdr_len,
                                             UCT_IB_MLX5_WQE_SEG_SIZE);

        ucs_assert(sizeof(*ctrl) + inl_seg_size + sizeof(*dptr) <=
                   UCT_RC_MLX5_MAX_BB * MLX5_SEND_WQE_BB);
        ucs_assert(length + sizeof(*rch) + am_hdr_len <=
                   iface->super.super.config.seg_size);

        /* Inline segment with AM ID and header */
        inl              = (void*)(ctrl + 1);
        inl->byte_count  = htonl((sizeof(*rch) + am_hdr_len) | MLX5_INLINE_SEG);
        rch              = (void*)(inl + 1);
        rch->am_id       = am_id;

        uct_ib_mlx5_inline_copy(rch + 1, am_hdr, am_hdr_len, &ep->tx.wq);

        /* Data segment with payload */
        if (length == 0) {
            wqe_size     = sizeof(*ctrl) + inl_seg_size;
        } else {
            wqe_size     = sizeof(*ctrl) + inl_seg_size + sizeof(*dptr);
            dptr         = (void*)(ctrl + 1) + inl_seg_size;
            if (ucs_unlikely((void*)dptr >= ep->tx.wq.qend)) {
                dptr = (void*)dptr - (ep->tx.wq.qend - ep->tx.wq.qstart);
            }

            ucs_assert((void*)dptr       >= ep->tx.wq.qstart);
            ucs_assert((void*)(dptr + 1) <= ep->tx.wq.qend);
            uct_ib_mlx5_set_data_seg(dptr, buffer, length, *lkey_p);
        }
        break;

    case MLX5_OPCODE_SEND|UCT_RC_MLX5_OPCODE_FLAG_RAW:
        /* Data segment only */
        ucs_assert(length < (2ul << 30));
        ucs_assert(length <= iface->super.super.config.seg_size);

        wqe_size         = sizeof(*ctrl) + sizeof(*dptr);
        uct_ib_mlx5_set_data_seg((void*)(ctrl + 1), buffer, length, *lkey_p);
        break;

    case MLX5_OPCODE_RDMA_READ:
    case MLX5_OPCODE_RDMA_WRITE:
        /* Set RDMA segment */
        ucs_assert(length <= UCT_IB_MAX_MESSAGE_SIZE);

        raddr            = (void*)(ctrl + 1);
        uct_rc_mlx5_ep_set_rdma_seg(raddr, remote_addr, rkey);

        /* Data segment */
        if (length == 0) {
            wqe_size     = sizeof(*ctrl) + sizeof(*raddr);
        } else {
            wqe_size     = sizeof(*ctrl) + sizeof(*raddr) + sizeof(*dptr);
            uct_ib_mlx5_set_data_seg((void*)(raddr + 1), buffer, length, *lkey_p);
        }
        break;

    case MLX5_OPCODE_ATOMIC_FA:
    case MLX5_OPCODE_ATOMIC_CS:
        ucs_assert(length == sizeof(uint64_t));
        raddr = (void*)(ctrl + 1);
        uct_rc_mlx5_ep_set_rdma_seg(raddr, remote_addr, rkey);

        atomic            = (void*)(raddr + 1);
        if (opcode_flags == MLX5_OPCODE_ATOMIC_CS) {
            atomic->compare = compare;
        }
        atomic->swap_add  = swap_add;

        uct_ib_mlx5_set_data_seg((void*)(atomic + 1), buffer, length, *lkey_p);
        wqe_size          = sizeof(*ctrl) + sizeof(*raddr) + sizeof(*atomic) +
                            sizeof(*dptr);
        break;

    case MLX5_OPCODE_ATOMIC_MASKED_CS:
        raddr = (void*)(ctrl + 1);
        uct_rc_mlx5_ep_set_rdma_seg(raddr, remote_addr, rkey);

        switch (length) {
        case sizeof(uint32_t):
            opmod                        = UCT_IB_MLX5_OPMOD_EXT_ATOMIC(2);
            masked_cswap32 = (void*)(raddr + 1);
            masked_cswap32->swap         = swap_add;
            masked_cswap32->compare      = compare;
            masked_cswap32->swap_mask    = (uint32_t)-1;
            masked_cswap32->compare_mask = compare_mask;
            dptr                         = (void*)(masked_cswap32 + 1);
            wqe_size                     = sizeof(*ctrl) + sizeof(*raddr) +
                                           sizeof(*masked_cswap32) + sizeof(*dptr);
            break;
        case sizeof(uint64_t):
            opmod                        = UCT_IB_MLX5_OPMOD_EXT_ATOMIC(3); /* Ext. atomic, size 2**3 */
            masked_cswap64 = (void*)(raddr + 1);
            masked_cswap64->swap         = swap_add;
            masked_cswap64->compare      = compare;
            masked_cswap64->swap_mask    = (uint64_t)-1;
            masked_cswap64->compare_mask = compare_mask;
            dptr                         = (void*)(masked_cswap64 + 1);
            wqe_size                     = sizeof(*ctrl) + sizeof(*raddr) +
                                           sizeof(*masked_cswap64) + sizeof(*dptr);

            /* Handle QP wrap-around. It cannot happen in the middle of
             * masked-cswap segment, because it's still in the first BB.
             */
            ucs_assert((void*)dptr <= ep->tx.wq.qend);
            if (dptr == ep->tx.wq.qend) {
                dptr = ep->tx.wq.qstart;
            } else {
                ucs_assert((void*)masked_cswap64 < ep->tx.wq.qend);
            }
            break;
        default:
            ucs_assert(0);
        }

        uct_ib_mlx5_set_data_seg(dptr, buffer, length, *lkey_p);
        break;

     case MLX5_OPCODE_ATOMIC_MASKED_FA:
        ucs_assert(length == sizeof(uint32_t));
        raddr = (void*)(ctrl + 1);
        uct_rc_mlx5_ep_set_rdma_seg(raddr, remote_addr, rkey);

        opmod                         = UCT_IB_MLX5_OPMOD_EXT_ATOMIC(2);
        masked_fadd32                 = (void*)(raddr + 1);
        masked_fadd32->add            = swap_add;
        masked_fadd32->filed_boundary = 0;

        uct_ib_mlx5_set_data_seg((void*)(masked_fadd32 + 1), buffer, length,
                                 *lkey_p);
        wqe_size                      = sizeof(*ctrl) + sizeof(*raddr) +
                                        sizeof(*masked_fadd32) + sizeof(*dptr);
        break;

    default:
        ucs_fatal("invalid send opcode");
    }

    uct_rc_mlx5_post_send(ep, ctrl, (opcode_flags & UCT_RC_MLX5_OPCODE_MASK),
                          opmod, signal, wqe_size);
}
Beispiel #26
0
void uct_ud_ep_process_rx(uct_ud_iface_t *iface, uct_ud_neth_t *neth, unsigned byte_len,
                          uct_ud_recv_skb_t *skb)
{
    uint32_t dest_id;
    uint32_t is_am, am_id;
    uct_ud_ep_t *ep = 0; /* todo: check why gcc complaints about uninitialized var */
    ucs_frag_list_ooo_type_t ooo_type;

    dest_id = uct_ud_neth_get_dest_id(neth);
    am_id   = uct_ud_neth_get_am_id(neth);
    is_am   = neth->packet_type & UCT_UD_PACKET_FLAG_AM;

    if (ucs_unlikely(dest_id == UCT_UD_EP_NULL_ID)) {
        /* must be connection request packet */
        uct_ud_iface_log_rx(iface, NULL, neth, byte_len);
        uct_ud_ep_rx_creq(iface, neth);
        goto out;
    }
    else if (ucs_unlikely(!ucs_ptr_array_lookup(&iface->eps, dest_id, ep) ||
                     ep->ep_id != dest_id)) {
        uct_ud_iface_log_rx(iface, ep, neth, byte_len);
        /* TODO: in the future just drop the packet */
        ucs_fatal("Faied to find ep(%d)", dest_id);
        goto out;
    } 

    uct_ud_iface_log_rx(iface, ep, neth, byte_len);
    ucs_assert(ep->ep_id != UCT_UD_EP_NULL_ID);
    UCT_UD_EP_HOOK_CALL_RX(ep, neth);
    
    uct_ud_ep_process_ack(iface, ep, neth->ack_psn);

    if (ucs_unlikely(neth->packet_type & UCT_UD_PACKET_FLAG_ACK_REQ)) {
        uct_ud_iface_queue_pending(iface, ep, UCT_UD_EP_OP_ACK);
    }

    if (ucs_unlikely(!is_am && (byte_len == sizeof(*neth)))) {
        goto out;
    }

    ooo_type = ucs_frag_list_insert(&ep->rx.ooo_pkts, &skb->ooo_elem, neth->psn);
    if (ucs_unlikely(ooo_type != UCS_FRAG_LIST_INSERT_FAST)) {
        ucs_fatal("Out of order is not implemented: got %d", ooo_type);
        goto out;
    }
    
    if (ucs_unlikely(!is_am && (neth->packet_type & UCT_UD_PACKET_FLAG_CTL))) {
        uct_ud_ep_rx_ctl(iface, ep, (uct_ud_ctl_hdr_t *)(neth + 1));
        goto out;
    }

    if (ucs_unlikely(!is_am && (neth->packet_type & UCT_UD_PACKET_FLAG_PUT))) {
        uct_ud_ep_rx_put(neth, byte_len);
        goto out;
    }

    uct_ib_iface_invoke_am(&iface->super, am_id, neth + 1,
                           byte_len - sizeof(*neth), &skb->super);
    return;

out:
    ucs_mpool_put(skb);
}