Exemple #1
0
static UCS_F_ALWAYS_INLINE void 
uct_rc_mlx5_iface_poll_tx(uct_rc_mlx5_iface_t *iface)
{
    uct_rc_iface_send_op_t *op;
    struct mlx5_cqe64 *cqe;
    uct_rc_mlx5_ep_t *ep;
    unsigned qp_num;
    uint16_t hw_ci;

    cqe = uct_ib_mlx5_get_cqe(&iface->tx.cq, UCT_IB_MLX5_CQE64_SIZE_LOG);
    if (cqe == NULL) {
        return;
    }

    UCS_STATS_UPDATE_COUNTER(iface->super.stats, UCT_RC_IFACE_STAT_TX_COMPLETION, 1);

    ucs_memory_cpu_load_fence();

    qp_num = ntohl(cqe->sop_drop_qpn) & UCS_MASK(UCT_IB_QPN_ORDER);
    ep = ucs_derived_of(uct_rc_iface_lookup_ep(&iface->super, qp_num), uct_rc_mlx5_ep_t);
    ucs_assert(ep != NULL);

    hw_ci = ntohs(cqe->wqe_counter);
    ep->super.available = uct_ib_mlx5_txwq_update_bb(&ep->tx.wq, hw_ci);
    ++iface->super.tx.cq_available;

    /* Process completions */
    ucs_queue_for_each_extract(op, &ep->super.outstanding, queue,
                               UCS_CIRCULAR_COMPARE16(op->sn, <=, hw_ci)) {
        op->handler(op);
    }
}
Exemple #2
0
static UCS_F_ALWAYS_INLINE void 
uct_ud_mlx5_iface_poll_tx(uct_ud_mlx5_iface_t *iface)
{
    struct mlx5_cqe64 *cqe;

    cqe = uct_ib_mlx5_get_cqe(&iface->tx.cq, UCT_IB_MLX5_CQE64_SIZE_LOG);
    if (cqe == NULL) {
        return;
    }
    uct_ib_mlx5_log_cqe(cqe);
    iface->super.tx.available = uct_ib_mlx5_txwq_update_bb(&iface->tx.wq, ntohs(cqe->wqe_counter));
}
Exemple #3
0
static UCS_F_ALWAYS_INLINE 
ucs_status_t uct_ud_mlx5_iface_poll_rx(uct_ud_mlx5_iface_t *iface)
{
    struct mlx5_cqe64 *cqe;
    uint16_t ci;
    uct_ib_iface_recv_desc_t *desc;
    uint32_t len;
    void *packet;
    ucs_status_t status;

    ci     = iface->rx.wq.cq_wqe_counter & iface->rx.wq.mask;
    packet = (void *)ntohll(iface->rx.wq.wqes[ci].addr);
    ucs_prefetch(packet + UCT_IB_GRH_LEN);
    desc   = (uct_ib_iface_recv_desc_t *)(packet - iface->super.super.config.rx_hdr_offset);

    cqe = uct_ib_mlx5_get_cqe(&iface->rx.cq, UCT_IB_MLX5_CQE64_SIZE_LOG);
    if (cqe == NULL) {
        status = UCS_ERR_NO_PROGRESS;
        goto out;
    }
    uct_ib_mlx5_log_cqe(cqe);
    ucs_assert(0 == (cqe->op_own & 
               (MLX5_INLINE_SCATTER_32|MLX5_INLINE_SCATTER_64)));
    ucs_assert(ntohs(cqe->wqe_counter) == iface->rx.wq.cq_wqe_counter);

    iface->super.rx.available++;
    iface->rx.wq.cq_wqe_counter++;

    len = ntohl(cqe->byte_cnt);
    VALGRIND_MAKE_MEM_DEFINED(packet, len);

    uct_ud_ep_process_rx(&iface->super,
                         (uct_ud_neth_t *)(packet + UCT_IB_GRH_LEN),
                         len - UCT_IB_GRH_LEN,
                         (uct_ud_recv_skb_t *)desc);
    status = UCS_OK;

out:
    if (iface->super.rx.available >= iface->super.config.rx_max_batch) {
        /* we need to try to post buffers always. Otherwise it is possible
         * to run out of rx wqes if receiver is slow and there are always
         * cqe to process
         */
        uct_ud_mlx5_iface_post_recv(iface);
    }
    return status;
}
Exemple #4
0
static UCS_F_ALWAYS_INLINE ucs_status_t
uct_rc_mlx5_iface_poll_rx(uct_rc_mlx5_iface_t *iface)
{
    struct mlx5_wqe_srq_next_seg *seg;
    uct_rc_mlx5_recv_desc_t *desc;
    uct_rc_hdr_t *hdr;
    struct mlx5_cqe64 *cqe;
    unsigned byte_len;
    uint16_t wqe_ctr_be;
    uint16_t max_batch;
    ucs_status_t status;

    cqe = uct_ib_mlx5_get_cqe(&iface->rx.cq, iface->rx.cq.cqe_size_log);
    if (cqe == NULL) {
        /* If not CQE - post receives */
        status = UCS_ERR_NO_PROGRESS;
        goto done;
    }

    UCS_STATS_UPDATE_COUNTER(iface->super.stats, UCT_RC_IFACE_STAT_RX_COMPLETION, 1);

    ucs_assert(!ucs_queue_is_empty(&iface->rx.desc_q));
    ucs_memory_cpu_load_fence();

    desc     = ucs_queue_pull_elem_non_empty(&iface->rx.desc_q, uct_rc_mlx5_recv_desc_t, queue);
    byte_len = ntohl(cqe->byte_cnt);

    uct_ib_iface_desc_received(&iface->super.super, &desc->super, byte_len,
                               !(cqe->op_own & (MLX5_INLINE_SCATTER_32|MLX5_INLINE_SCATTER_64)));

    /* Get a pointer to AM header (after which comes the payload)
     * Support cases of inline scatter by pointing directly to CQE.
     */
    if (cqe->op_own & MLX5_INLINE_SCATTER_32) {
        hdr = (uct_rc_hdr_t*)cqe;
        UCS_STATS_UPDATE_COUNTER(iface->stats, UCT_RC_MLX5_IFACE_STAT_RX_INL_32, 1);
    } else if (cqe->op_own & MLX5_INLINE_SCATTER_64) {
        hdr = (uct_rc_hdr_t*)(cqe - 1);
        UCS_STATS_UPDATE_COUNTER(iface->stats, UCT_RC_MLX5_IFACE_STAT_RX_INL_64, 1)
    } else {
Exemple #5
0
static UCS_F_ALWAYS_INLINE void
uct_dc_mlx5_poll_tx(uct_dc_mlx5_iface_t *iface)
{
    uint8_t dci;
    struct mlx5_cqe64 *cqe;
    uint32_t qp_num;
    uint16_t hw_ci;
    UCT_DC_MLX5_TXQP_DECL(txqp, txwq);

    cqe = uct_ib_mlx5_get_cqe(&iface->super.super.super, &iface->mlx5_common.tx.cq,
                              iface->mlx5_common.tx.cq.cqe_size_log);
    if (cqe == NULL) {
        return;
    }
    UCS_STATS_UPDATE_COUNTER(iface->super.super.stats, UCT_RC_IFACE_STAT_TX_COMPLETION, 1);

    ucs_memory_cpu_load_fence();

    ucs_assertv(!(cqe->op_own & (MLX5_INLINE_SCATTER_32|MLX5_INLINE_SCATTER_64)),
                "tx inline scatter not supported");

    qp_num = ntohl(cqe->sop_drop_qpn) & UCS_MASK(UCT_IB_QPN_ORDER);
    dci = uct_dc_iface_dci_find(&iface->super, qp_num);
    txqp = &iface->super.tx.dcis[dci].txqp;
    txwq = &iface->dci_wqs[dci];

    hw_ci = ntohs(cqe->wqe_counter);
    uct_rc_txqp_available_set(txqp, uct_ib_mlx5_txwq_update_bb(txwq, hw_ci));
    uct_rc_txqp_completion(txqp, hw_ci);
    iface->super.super.tx.cq_available++;

    uct_dc_iface_dci_put(&iface->super, dci);
    if (uct_dc_iface_dci_can_alloc(&iface->super)) {
        ucs_arbiter_dispatch(&iface->super.super.tx.arbiter, 1, uct_dc_iface_dci_do_pending_wait, NULL);
    }
    ucs_arbiter_dispatch(&iface->super.tx.dci_arbiter, 1, uct_dc_iface_dci_do_pending_tx, NULL);
}