Exemplo n.º 1
0
static UCS_CLASS_INIT_FUNC(uct_rc_verbs_iface_t, uct_pd_h pd, uct_worker_h worker,
                           const char *dev_name, size_t rx_headroom,
                           const uct_iface_config_t *tl_config)
{
    uct_rc_verbs_iface_config_t *config =
                    ucs_derived_of(tl_config, uct_rc_verbs_iface_config_t);
    struct ibv_exp_device_attr *dev_attr;
    size_t am_hdr_size;
    ucs_status_t status;
    struct ibv_qp_cap cap;
    struct ibv_qp *qp;

    extern uct_iface_ops_t uct_rc_verbs_iface_ops;
    UCS_CLASS_CALL_SUPER_INIT(uct_rc_iface_t, &uct_rc_verbs_iface_ops, pd,
                              worker, dev_name, rx_headroom, 0, &config->super);

    /* Initialize inline work request */
    memset(&self->inl_am_wr, 0, sizeof(self->inl_am_wr));
    self->inl_am_wr.sg_list                 = self->inl_sge;
    self->inl_am_wr.num_sge                 = 2;
    self->inl_am_wr.opcode                  = IBV_WR_SEND;
    self->inl_am_wr.send_flags              = IBV_SEND_INLINE;

    memset(&self->inl_rwrite_wr, 0, sizeof(self->inl_rwrite_wr));
    self->inl_rwrite_wr.sg_list             = self->inl_sge;
    self->inl_rwrite_wr.num_sge             = 1;
    self->inl_rwrite_wr.opcode              = IBV_WR_RDMA_WRITE;
    self->inl_rwrite_wr.send_flags          = IBV_SEND_SIGNALED | IBV_SEND_INLINE;

    memset(self->inl_sge, 0, sizeof(self->inl_sge));

    /* Configuration */
    am_hdr_size = ucs_max(config->max_am_hdr, sizeof(uct_rc_hdr_t));
    self->config.short_desc_size = ucs_max(UCT_RC_MAX_ATOMIC_SIZE, am_hdr_size);
    dev_attr = &uct_ib_iface_device(&self->super.super)->dev_attr;
    if (IBV_EXP_HAVE_ATOMIC_HCA(dev_attr) || IBV_EXP_HAVE_ATOMIC_GLOB(dev_attr)) {
        self->config.atomic32_handler = uct_rc_ep_atomic_handler_32_be0;
        self->config.atomic64_handler = uct_rc_ep_atomic_handler_64_be0;
    } else if (IBV_EXP_HAVE_ATOMIC_HCA_REPLY_BE(dev_attr)) {
        self->config.atomic32_handler = uct_rc_ep_atomic_handler_32_be1;
        self->config.atomic64_handler = uct_rc_ep_atomic_handler_64_be1;
    }

    /* Create a dummy QP in order to find out max_inline */
    status = uct_rc_iface_qp_create(&self->super, &qp, &cap);
    if (status != UCS_OK) {
        goto err;
    }
    ibv_destroy_qp(qp);
    self->config.max_inline = cap.max_inline_data;

    /* Create AH headers and Atomic mempool */
    status = uct_iface_mpool_create(&self->super.super.super.super,
                                    sizeof(uct_rc_iface_send_desc_t) +
                                        self->config.short_desc_size,
                                    sizeof(uct_rc_iface_send_desc_t),
                                    UCS_SYS_CACHE_LINE_SIZE,
                                    &config->super.super.tx.mp,
                                    self->super.config.tx_qp_len,
                                    uct_rc_iface_send_desc_init,
                                    "rc_verbs_short_desc", &self->short_desc_mp);
    if (status != UCS_OK) {
        goto err;
    }

    while (self->super.rx.available > 0) {
        if (uct_rc_verbs_iface_post_recv(self, 1) == 0) {
            ucs_error("failed to post receives");
            status = UCS_ERR_NO_MEMORY;
            goto err_destroy_short_desc_mp;
        }
    }

    ucs_notifier_chain_add(&worker->progress_chain, uct_rc_verbs_iface_progress,
                           self);
    return UCS_OK;

err_destroy_short_desc_mp:
    ucs_mpool_destroy(self->short_desc_mp);
err:
    return status;
}
Exemplo n.º 2
0
static ucs_status_t uct_ugni_ep_get_composed_fma_rdma(uct_ep_h tl_ep, void *buffer, size_t length,
                                                      uct_mem_h memh, uint64_t remote_addr,
                                                      uct_rkey_t rkey, uct_completion_t *comp)
{
    uct_ugni_rdma_fetch_desc_t *fma = NULL;
    uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t);
    uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t);
    uct_ugni_rdma_fetch_desc_t *rdma = NULL;

    size_t fma_length, rdma_length, aligned_fma_remote_start;

    uint64_t fma_remote_start, rdma_remote_start;
    ucs_status_t post_result;


    rdma_length = length - iface->config.fma_seg_size;
    fma_length = iface->config.fma_seg_size;

    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get_buffer,
                             fma, return UCS_ERR_NO_RESOURCE);

    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get,
                             rdma, return UCS_ERR_NO_RESOURCE);

    rdma_remote_start = remote_addr;
    fma_remote_start = rdma_remote_start + rdma_length;
    aligned_fma_remote_start = ucs_align_up_pow2(fma_remote_start, UGNI_GET_ALIGN);

    uct_ugni_format_get_fma(fma, GNI_POST_FMA_GET, aligned_fma_remote_start,
                            rkey, fma_length, ep, comp,
                            uct_ugni_unalign_fma_composed_cb, NULL, NULL);

    fma->tail = aligned_fma_remote_start - fma_remote_start;

    uct_ugni_format_unaligned_rdma(rdma, GNI_POST_RDMA_GET, buffer, rdma_remote_start, memh, rkey,
                                   rdma_length+fma->tail, ep, iface->super.local_cq, comp,
                                   uct_ugni_unalign_rdma_composed_cb);

    fma->head = rdma;
    rdma->head = fma;
    fma->network_completed_bytes = rdma->network_completed_bytes = 0;
    fma->user_buffer = rdma->user_buffer = buffer;
    fma->expected_bytes = rdma->expected_bytes = fma->super.desc.length + rdma->super.desc.length;

    ucs_trace_data("Posting split GET ZCOPY, GNI_PostFma of size %"PRIx64" (%lu) from %p to "
                   "%p, with [%"PRIx64" %"PRIx64"] and GNI_PostRdma of size %"PRIx64" (%lu)"
                   " from %p to %p, with [%"PRIx64" %"PRIx64"]",
                   fma->super.desc.length, length,
                   (void *)fma->super.desc.local_addr,
                   (void *)fma->super.desc.remote_addr,
                   fma->super.desc.remote_mem_hndl.qword1,
                   fma->super.desc.remote_mem_hndl.qword2,
                   rdma->super.desc.length, length,
                   (void *)rdma->super.desc.local_addr,
                   (void *)rdma->super.desc.remote_addr,
                   rdma->super.desc.remote_mem_hndl.qword1,
                   rdma->super.desc.remote_mem_hndl.qword2);
    UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY, length);
    post_result = uct_ugni_post_fma(iface, ep, &(fma->super), UCS_INPROGRESS);
    if(post_result != UCS_OK && post_result != UCS_INPROGRESS){
        ucs_mpool_put(rdma);
        return post_result;
    }
    return uct_ugni_post_rdma(iface, ep, &(rdma->super));
}
Exemplo n.º 3
0
/*
 * Generic data-pointer posting function.
 * Parameters which are not relevant to the opcode are ignored.
 *
 *            +--------+-----+-------+--------+-------+
 * SEND       | CTRL   | INL | am_id | am_hdr | DPSEG |
 *            +--------+-----+---+---+----+----+------+
 * RDMA_WRITE | CTRL   | RADDR   | DPSEG  |
 *            +--------+---------+--------+-------+
 * ATOMIC     | CTRL   | RADDR   | ATOMIC | DPSEG |
 *            +--------+---------+--------+-------+
 */
static UCS_F_ALWAYS_INLINE ucs_status_t
uct_rc_mlx5_ep_dptr_post(uct_rc_mlx5_ep_t *ep, unsigned opcode_flags,
                         const void *buffer, unsigned length, uint32_t *lkey_p,
                         /* SEND */ uint8_t am_id, const void *am_hdr, unsigned am_hdr_len,
                         /* RDMA/ATOMIC */ uint64_t remote_addr, uct_rkey_t rkey,
                         /* ATOMIC */ uint64_t compare_mask, uint64_t compare, uint64_t swap_add,
                         int signal)
{
    struct mlx5_wqe_ctrl_seg                     *ctrl;
    struct mlx5_wqe_raddr_seg                    *raddr;
    struct mlx5_wqe_atomic_seg                   *atomic;
    struct mlx5_wqe_data_seg                     *dptr;
    struct mlx5_wqe_inl_data_seg                 *inl;
    struct uct_ib_mlx5_atomic_masked_cswap32_seg *masked_cswap32;
    struct uct_ib_mlx5_atomic_masked_fadd32_seg  *masked_fadd32;
    struct uct_ib_mlx5_atomic_masked_cswap64_seg *masked_cswap64;

    uct_rc_mlx5_iface_t *iface;
    uct_rc_hdr_t        *rch;
    unsigned            wqe_size, inl_seg_size;
    uint8_t             opmod;

    iface = ucs_derived_of(ep->super.super.super.iface, uct_rc_mlx5_iface_t);
    if (!signal) {
        signal = uct_rc_iface_tx_moderation(&iface->super, &ep->super,
                                            MLX5_WQE_CTRL_CQ_UPDATE);
    } else {
        ucs_assert(signal == MLX5_WQE_CTRL_CQ_UPDATE);
    }

    opmod = 0;
    ctrl = ep->tx.seg;
    switch (opcode_flags) {
    case MLX5_OPCODE_SEND:
        UCT_CHECK_LENGTH(length + sizeof(*rch) + am_hdr_len,
                         iface->super.super.config.seg_size, "am_zcopy payload");

        inl_seg_size     = ucs_align_up_pow2(sizeof(*inl) + sizeof(*rch) + am_hdr_len,
                                             UCT_IB_MLX5_WQE_SEG_SIZE);
        UCT_CHECK_LENGTH(sizeof(*ctrl) + inl_seg_size + sizeof(*dptr),
                         UCT_RC_MLX5_MAX_BB * MLX5_SEND_WQE_BB, "am_zcopy header");

        /* Inline segment with AM ID and header */
        inl              = (void*)(ctrl + 1);
        inl->byte_count  = htonl((sizeof(*rch) + am_hdr_len) | MLX5_INLINE_SEG);
        rch              = (void*)(inl + 1);
        rch->am_id       = am_id;

        uct_rc_mlx5_inline_copy(rch + 1, am_hdr, am_hdr_len, ep);

        /* Data segment with payload */
        if (length == 0) {
            wqe_size     = sizeof(*ctrl) + inl_seg_size;
        } else {
            wqe_size     = sizeof(*ctrl) + inl_seg_size + sizeof(*dptr);
            dptr         = (void*)(ctrl + 1) + inl_seg_size;
            if (ucs_unlikely((void*)dptr >= ep->tx.qend)) {
                dptr = (void*)dptr - (ep->tx.qend - ep->tx.qstart);
            }

            ucs_assert((void*)dptr       >= ep->tx.qstart);
            ucs_assert((void*)(dptr + 1) <= ep->tx.qend);
            uct_rc_mlx5_ep_set_dptr_seg(dptr, buffer, length, *lkey_p);
        }
        break;

    case MLX5_OPCODE_SEND|UCT_RC_MLX5_OPCODE_FLAG_RAW:
        /* Data segment only */
        UCT_CHECK_LENGTH(length, iface->super.super.config.seg_size,
                         "send");
        ucs_assert(length < (2ul << 30));

        wqe_size         = sizeof(*ctrl) + sizeof(*dptr);
        uct_rc_mlx5_ep_set_dptr_seg((void*)(ctrl + 1), buffer, length, *lkey_p);
        break;

    case MLX5_OPCODE_RDMA_READ:
    case MLX5_OPCODE_RDMA_WRITE:
        /* Set RDMA segment */
        UCT_CHECK_LENGTH(length, UCT_IB_MAX_MESSAGE_SIZE, "put/get");

        raddr            = (void*)(ctrl + 1);
        uct_rc_mlx5_ep_set_rdma_seg(raddr, remote_addr, rkey);

        /* Data segment */
        if (length == 0) {
            wqe_size     = sizeof(*ctrl) + sizeof(*raddr);
        } else {
            wqe_size     = sizeof(*ctrl) + sizeof(*raddr) + sizeof(*dptr);
            uct_rc_mlx5_ep_set_dptr_seg((void*)(raddr + 1), buffer, length, *lkey_p);
        }
        break;

    case MLX5_OPCODE_ATOMIC_FA:
    case MLX5_OPCODE_ATOMIC_CS:
        ucs_assert(length == sizeof(uint64_t));
        raddr = (void*)(ctrl + 1);
        uct_rc_mlx5_ep_set_rdma_seg(raddr, remote_addr, rkey);

        atomic            = (void*)(raddr + 1);
        if (opcode_flags == MLX5_OPCODE_ATOMIC_CS) {
            atomic->compare = compare;
        }
        atomic->swap_add  = swap_add;

        uct_rc_mlx5_ep_set_dptr_seg((void*)(atomic + 1), buffer, length, *lkey_p);
        wqe_size          = sizeof(*ctrl) + sizeof(*raddr) + sizeof(*atomic) +
                            sizeof(*dptr);
        break;

    case MLX5_OPCODE_ATOMIC_MASKED_CS:
        raddr = (void*)(ctrl + 1);
        uct_rc_mlx5_ep_set_rdma_seg(raddr, remote_addr, rkey);

        switch (length) {
        case sizeof(uint32_t):
            opmod                        = UCT_IB_MLX5_OPMOD_EXT_ATOMIC(2);
            masked_cswap32 = (void*)(raddr + 1);
            masked_cswap32->swap         = swap_add;
            masked_cswap32->compare      = compare;
            masked_cswap32->swap_mask    = (uint32_t)-1;
            masked_cswap32->compare_mask = compare_mask;
            dptr                         = (void*)(masked_cswap32 + 1);
            wqe_size                     = sizeof(*ctrl) + sizeof(*raddr) +
                                           sizeof(*masked_cswap32) + sizeof(*dptr);
            break;
        case sizeof(uint64_t):
            opmod                        = UCT_IB_MLX5_OPMOD_EXT_ATOMIC(3); /* Ext. atomic, size 2**3 */
            masked_cswap64 = (void*)(raddr + 1);
            masked_cswap64->swap         = swap_add;
            masked_cswap64->compare      = compare;
            masked_cswap64->swap_mask    = (uint64_t)-1;
            masked_cswap64->compare_mask = compare_mask;
            dptr                         = (void*)(masked_cswap64 + 1);
            wqe_size                     = sizeof(*ctrl) + sizeof(*raddr) +
                                           sizeof(*masked_cswap64) + sizeof(*dptr);

            /* Handle QP wrap-around. It cannot happen in the middle of
             * masked-cswap segment, because it's still in the first BB.
             */
            ucs_assert((void*)dptr <= ep->tx.qend);
            if (dptr == ep->tx.qend) {
                dptr = ep->tx.qstart;
            } else {
                ucs_assert((void*)masked_cswap64 < ep->tx.qend);
            }
            break;
        default:
            ucs_assert(0);
        }

        uct_rc_mlx5_ep_set_dptr_seg(dptr, buffer, length, *lkey_p);
        break;

     case MLX5_OPCODE_ATOMIC_MASKED_FA:
        ucs_assert(length == sizeof(uint32_t));
        raddr = (void*)(ctrl + 1);
        uct_rc_mlx5_ep_set_rdma_seg(raddr, remote_addr, rkey);

        opmod                         = UCT_IB_MLX5_OPMOD_EXT_ATOMIC(2);
        masked_fadd32                 = (void*)(raddr + 1);
        masked_fadd32->add            = swap_add;
        masked_fadd32->filed_boundary = 0;

        uct_rc_mlx5_ep_set_dptr_seg((void*)(masked_fadd32 + 1), buffer, length,
                                    *lkey_p);
        wqe_size                      = sizeof(*ctrl) + sizeof(*raddr) +
                                        sizeof(*masked_fadd32) + sizeof(*dptr);
        break;

    default:
        return UCS_ERR_INVALID_PARAM;
    }

    uct_rc_mlx5_post_send(ep, ctrl, (opcode_flags & UCT_RC_MLX5_OPCODE_MASK),
                          opmod, signal, wqe_size);
    return UCS_OK;
}
Exemplo n.º 4
0
Arquivo: mm_ep.c Projeto: brminich/ucx
static inline int uct_mm_ep_has_tx_resources(uct_mm_ep_t *ep)
{
    uct_mm_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_mm_iface_t);
    return UCT_MM_EP_IS_ABLE_TO_SEND(ep->fifo_ctl->head, ep->cached_tail,
                                     iface->config.fifo_size);
}
Exemplo n.º 5
0
static UCS_CLASS_INIT_FUNC(uct_ugni_smsg_iface_t, uct_md_h md, uct_worker_h worker,
                           const uct_iface_params_t *params,
                           const uct_iface_config_t *tl_config)
{
    uct_ugni_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_iface_config_t);
    ucs_status_t status;
    gni_return_t ugni_rc;
    unsigned int bytes_per_mbox;
    gni_smsg_attr_t smsg_attr;

    pthread_mutex_lock(&uct_ugni_global_lock);

    UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, md, worker, params,
                              &uct_ugni_smsg_iface_ops,
                              &config->super UCS_STATS_ARG(NULL));

    /* Setting initial configuration */
    self->config.smsg_seg_size = 2048;
    self->config.rx_headroom  = params->rx_headroom;
    self->config.smsg_max_retransmit = 16;
    self->config.smsg_max_credit = 8;
    self->smsg_id = 0;

    smsg_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
    smsg_attr.mbox_maxcredit = self->config.smsg_max_credit;
    smsg_attr.msg_maxsize = self->config.smsg_seg_size;

    ugni_rc = GNI_SmsgBufferSizeNeeded(&(smsg_attr), &bytes_per_mbox);
    self->bytes_per_mbox = ucs_align_up_pow2(bytes_per_mbox, ucs_get_page_size());

    if (ugni_rc != GNI_RC_SUCCESS) {
        ucs_error("Smsg buffer size calculation failed");
        status = UCS_ERR_INVALID_PARAM;
        goto exit;
    }

    status = ucs_mpool_init(&self->free_desc,
                            0,
                            self->config.smsg_seg_size + sizeof(uct_ugni_smsg_desc_t),
                            0,
                            UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                            128           ,               /* grow */
                            config->mpool.max_bufs,       /* max buffers */
                            &uct_ugni_smsg_desc_mpool_ops,
                            "UGNI-SMSG-DESC");

    if (UCS_OK != status) {
        ucs_error("Desc Mpool creation failed");
        goto exit;
    }

    status = ucs_mpool_init(&self->free_mbox,
                            0,
                            self->bytes_per_mbox + sizeof(uct_ugni_smsg_mbox_t),
                            sizeof(uct_ugni_smsg_mbox_t),
                            UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                            128,                          /* grow */
                            config->mpool.max_bufs,       /* max buffers */
                            &uct_ugni_smsg_mbox_mpool_ops,
                            "UGNI-SMSG-MBOX");

    if (UCS_OK != status) {
        ucs_error("Mbox Mpool creation failed");
        goto clean_desc;
    }

    UCT_TL_IFACE_GET_TX_DESC(&self->super.super, &self->free_desc,
                             self->user_desc, self->user_desc = NULL);

    status = ugni_smsg_activate_iface(self);
    if (UCS_OK != status) {
        ucs_error("Failed to activate the interface");
        goto clean_mbox;
    }

    ugni_rc = GNI_SmsgSetMaxRetrans(self->super.nic_handle, self->config.smsg_max_retransmit);

    if (ugni_rc != GNI_RC_SUCCESS) {
        ucs_error("Smsg setting max retransmit count failed.");
        status = UCS_ERR_INVALID_PARAM;
        goto clean_iface;
    }

    /* TBD: eventually the uct_ugni_progress has to be moved to
     * udt layer so each ugni layer will have own progress */
    uct_worker_progress_register(worker, uct_ugni_smsg_progress, self);
    pthread_mutex_unlock(&uct_ugni_global_lock);
    return UCS_OK;

 clean_iface:
    ugni_smsg_deactivate_iface(self);
 clean_desc:
    ucs_mpool_put(self->user_desc);
    ucs_mpool_cleanup(&self->free_desc, 1);
 clean_mbox:
    ucs_mpool_cleanup(&self->free_mbox, 1);
 exit:
    ucs_error("Failed to activate interface");
    pthread_mutex_unlock(&uct_ugni_global_lock);
    return status;
}
Exemplo n.º 6
0
static void uct_ugni_udt_progress(void *arg)
{
    uint32_t rem_addr,
             rem_id;
    uint64_t id;
    void *payload;
    void *user_desc;

    ucs_status_t status;

    uct_ugni_udt_desc_t *desc;
    uct_ugni_udt_header_t *header;
    uct_ugni_udt_iface_t * iface = (uct_ugni_udt_iface_t *)arg;
    uct_ugni_udt_ep_t *ep;

    gni_ep_handle_t ugni_ep;
    gni_post_state_t post_state;
    gni_return_t ugni_rc;

    pthread_mutex_lock(&uct_ugni_global_lock);
    ugni_rc = GNI_PostDataProbeById(iface->super.nic_handle, &id);
    if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) {
        if (GNI_RC_NO_MATCH != ugni_rc) {
            ucs_error("GNI_PostDataProbeById , Error status: %s %d",
                      gni_err_str[ugni_rc], ugni_rc);
        }
        goto exit;
    }

    if (UCT_UGNI_UDT_ANY == id) {
        /* New incomming message */
        ep = NULL;
        ugni_ep = iface->ep_any;
        desc = iface->desc_any;
    } else {
        /* Ack message */
        ep = ucs_derived_of(uct_ugni_iface_lookup_ep(&iface->super, id),
                            uct_ugni_udt_ep_t);
        if (ucs_unlikely(NULL == ep)) {
            ucs_error("Can not lookup ep with id %"PRIx64,id);
            goto exit;
        }
        ugni_ep = ep->super.ep;
        desc = ep->posted_desc;
    }

    ugni_rc = GNI_EpPostDataWaitById(ugni_ep, id, -1, &post_state, &rem_addr, &rem_id);
    if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) {
        ucs_error("GNI_EpPostDataWaitById, Error status: %s %d",
                  gni_err_str[ugni_rc], ugni_rc);
        goto exit;
    }

    header = uct_ugni_udt_get_rheader(desc, iface);
    payload = uct_ugni_udt_get_rpayload(desc, iface);
    user_desc = uct_ugni_udt_get_user_desc(desc, iface);

    if (UCT_UGNI_UDT_ANY == id) {
        /* New incomming message */
        ucs_assert_always(header->type == UCT_UGNI_UDT_PAYLOAD);
        uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_RECV,
                           header->am_id, payload, header->length, "RX: AM");
        status = uct_iface_invoke_am(&iface->super.super, header->am_id, payload,
                                     header->length, user_desc);
        if (UCS_OK != status) {
            uct_ugni_udt_desc_t *new_desc;
            /* set iface for a later release call */
            uct_recv_desc_iface(user_desc) = &iface->super.super.super;
            /* Allocate a new element */
            UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc,
                                     new_desc, goto exit);
            /* set the new desc */
            iface->desc_any = new_desc;
        }
Exemplo n.º 7
0
/**
 * @param rx_headroom   Headroom requested by the user.
 * @param rx_priv_len   Length of transport private data to reserve (0 if unused)
 * @param rx_hdr_len    Length of transport network header.
 * @param mss           Maximal segment size (transport limit).
 */
UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md,
                    uct_worker_h worker, const uct_iface_params_t *params,
                    unsigned rx_priv_len, unsigned rx_hdr_len, unsigned tx_cq_len,
                    size_t mss, const uct_ib_iface_config_t *config)
{
    uct_ib_device_t *dev = &ucs_derived_of(md, uct_ib_md_t)->dev;
    ucs_status_t status;
    uint8_t port_num;

    UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &ops->super, md, worker,
                              &config->super UCS_STATS_ARG(dev->stats));

    status = uct_ib_device_find_port(dev, params->dev_name, &port_num);
    if (status != UCS_OK) {
        goto err;
    }

    self->ops                      = ops;

    self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) +
                                     ucs_max(sizeof(uct_am_recv_desc_t) +
                                             params->rx_headroom,
                                             rx_priv_len + rx_hdr_len);
    self->config.rx_hdr_offset     = self->config.rx_payload_offset - rx_hdr_len;
    self->config.rx_headroom_offset= self->config.rx_payload_offset -
                                     params->rx_headroom;
    self->config.seg_size          = ucs_min(mss, config->super.max_bcopy);
    self->config.tx_max_poll       = config->tx.max_poll;
    self->config.rx_max_poll       = config->rx.max_poll;
    self->config.rx_max_batch      = ucs_min(config->rx.max_batch,
                                             config->rx.queue_len / 4);
    self->config.port_num          = port_num;
    self->config.sl                = config->sl;
    self->config.gid_index         = config->gid_index;

    status = uct_ib_iface_init_pkey(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_device_query_gid(dev, self->config.port_num,
                                     self->config.gid_index, &self->gid);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_iface_init_lmc(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    self->comp_channel = ibv_create_comp_channel(dev->ibv_context);
    if (self->comp_channel == NULL) {
        ucs_error("ibv_create_comp_channel() failed: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_free_path_bits;
    }

    status = ucs_sys_fcntl_modfl(self->comp_channel->fd, O_NONBLOCK, 0);
    if (status != UCS_OK) {
        goto err_destroy_comp_channel;
    }

    status = uct_ib_iface_create_cq(self, tx_cq_len, 0, &self->send_cq);
    if (status != UCS_OK) {
        goto err_destroy_comp_channel;
    }

    status = uct_ib_iface_create_cq(self, config->rx.queue_len, config->rx.inl,
                                    &self->recv_cq);
    if (status != UCS_OK) {
        goto err_destroy_send_cq;
    }

    /* Address scope and size */
    if (config->addr_type == UCT_IB_IFACE_ADDRESS_TYPE_AUTO) {
        if (IBV_PORT_IS_LINK_LAYER_ETHERNET(uct_ib_iface_port_attr(self))) {
            self->addr_type = UCT_IB_ADDRESS_TYPE_ETH;
        } else {
            self->addr_type = uct_ib_address_scope(self->gid.global.subnet_prefix);
        }
    } else {
        ucs_assert(config->addr_type < UCT_IB_ADDRESS_TYPE_LAST);
        self->addr_type = config->addr_type;
    }

    self->addr_size  = uct_ib_address_size(self->addr_type);

    ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d",
              self->config.rx_headroom_offset, self->config.rx_payload_offset,
              self->config.rx_hdr_offset, self->config.seg_size);

    return UCS_OK;

err_destroy_send_cq:
    ibv_destroy_cq(self->send_cq);
err_destroy_comp_channel:
    ibv_destroy_comp_channel(self->comp_channel);
err_free_path_bits:
    ucs_free(self->path_bits);
err:
    return status;
}
Exemplo n.º 8
0
static UCS_CLASS_CLEANUP_FUNC(uct_ugni_smsg_ep_t)
{
    uct_ugni_smsg_iface_t *iface = ucs_derived_of(self->super.super.super.iface, uct_ugni_smsg_iface_t);
    uct_ugni_smsg_mbox_dereg(iface, self->smsg_attr);
    ucs_mpool_put(self->smsg_attr);
}
Exemplo n.º 9
0
static UCS_CLASS_INIT_FUNC(uct_ugni_rdma_iface_t, uct_pd_h pd, uct_worker_h worker,
                           const char *dev_name, size_t rx_headroom,
                           const uct_iface_config_t *tl_config)
{
    uct_ugni_rdma_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_rdma_iface_config_t);
    ucs_status_t status;

    pthread_mutex_lock(&uct_ugni_global_lock);

    UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, pd, worker, dev_name, &uct_ugni_rdma_iface_ops,
                              &config->super UCS_STATS_ARG(NULL));

    /* Setting initial configuration */
    self->config.fma_seg_size  = UCT_UGNI_MAX_FMA;
    self->config.rdma_max_size = UCT_UGNI_MAX_RDMA;

    status = ucs_mpool_init(&self->free_desc,
                            0,
                            sizeof(uct_ugni_base_desc_t),
                            0,                            /* alignment offset */
                            UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                            128,                          /* grow */
                            config->mpool.max_bufs,       /* max buffers */
                            &uct_ugni_rdma_desc_mpool_ops,
                            "UGNI-DESC-ONLY");
    if (UCS_OK != status) {
        ucs_error("Mpool creation failed");
        goto exit;
    }

    status = ucs_mpool_init(&self->free_desc_get,
                            0,
                            sizeof(uct_ugni_rdma_fetch_desc_t),
                            0,                            /* alignment offset */
                            UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                            128 ,                         /* grow */
                            config->mpool.max_bufs,       /* max buffers */
                            &uct_ugni_rdma_desc_mpool_ops,
                            "UGNI-GET-DESC-ONLY");
    if (UCS_OK != status) {
        ucs_error("Mpool creation failed");
        goto clean_desc;
    }

    status = ucs_mpool_init(&self->free_desc_buffer,
                            0,
                            sizeof(uct_ugni_base_desc_t) + self->config.fma_seg_size,
                            sizeof(uct_ugni_base_desc_t), /* alignment offset */
                            UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                            128 ,                         /* grow */
                            config->mpool.max_bufs,       /* max buffers */
                            &uct_ugni_rdma_desc_mpool_ops,
                            "UGNI-DESC-BUFFER");
    if (UCS_OK != status) {
        ucs_error("Mpool creation failed");
        goto clean_desc_get;
    }

    status = uct_iface_mpool_init(&self->super.super,
                                  &self->free_desc_famo,
                                  sizeof(uct_ugni_rdma_fetch_desc_t) + 8,
                                  sizeof(uct_ugni_rdma_fetch_desc_t),/* alignment offset */
                                  UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                                  &config->mpool,               /* mpool config */
                                  128 ,                         /* grow */
                                  uct_ugni_base_desc_key_init,  /* memory/key init */
                                  "UGNI-DESC-FAMO");
    if (UCS_OK != status) {
        ucs_error("Mpool creation failed");
        goto clean_buffer;
    }

    status = uct_iface_mpool_init(&self->super.super,
                                  &self->free_desc_get_buffer,
                                  sizeof(uct_ugni_rdma_fetch_desc_t) +
                                  self->config.fma_seg_size,
                                  sizeof(uct_ugni_rdma_fetch_desc_t), /* alignment offset */
                                  UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                                  &config->mpool,               /* mpool config */
                                  128 ,                         /* grow */
                                  uct_ugni_base_desc_key_init,  /* memory/key init */
                                  "UGNI-DESC-GET");
    if (UCS_OK != status) {
        ucs_error("Mpool creation failed");
        goto clean_famo;
    }

    status = ugni_activate_iface(&self->super);
    if (UCS_OK != status) {
        ucs_error("Failed to activate the interface");
        goto clean_get_buffer;
    }

    if(GNI_DEVICE_ARIES == self->super.dev->type) {
        uct_ugni_rdma_iface_ops.ep_atomic_swap64    = uct_ugni_ep_atomic_swap64;
        uct_ugni_rdma_iface_ops.ep_atomic_add32     = uct_ugni_ep_atomic_add32;
        uct_ugni_rdma_iface_ops.ep_atomic_fadd32    = uct_ugni_ep_atomic_fadd32;
        uct_ugni_rdma_iface_ops.ep_atomic_cswap32   = uct_ugni_ep_atomic_cswap32;
        uct_ugni_rdma_iface_ops.ep_atomic_swap32    = uct_ugni_ep_atomic_swap32;
    }

    /* TBD: eventually the uct_ugni_progress has to be moved to 
     * rdma layer so each ugni layer will have own progress */
    uct_worker_progress_register(worker, uct_ugni_progress, self);
    pthread_mutex_unlock(&uct_ugni_global_lock);
    return UCS_OK;

clean_get_buffer:
    ucs_mpool_cleanup(&self->free_desc_get_buffer, 1);
clean_famo:
    ucs_mpool_cleanup(&self->free_desc_famo, 1);
clean_buffer:
    ucs_mpool_cleanup(&self->free_desc_buffer, 1);
clean_desc_get:
    ucs_mpool_cleanup(&self->free_desc_get, 1);
clean_desc:
    ucs_mpool_cleanup(&self->free_desc, 1);
exit:
    ucs_error("Failed to activate interface");
    pthread_mutex_unlock(&uct_ugni_global_lock);
    return status;
}
Exemplo n.º 10
0
Arquivo: ib_md.c Projeto: alinask/ucx
static void uct_ib_memh_free(uct_ib_mem_t *memh)
{
    ucs_free(memh);
}

static uct_ib_mem_t *uct_ib_memh_alloc()
{
    return ucs_calloc(1, sizeof(uct_ib_mem_t), "ib_memh");
}

static ucs_status_t
uct_ib_mem_alloc_internal(uct_md_h uct_md, size_t *length_p, void **address_p,
                          uct_ib_mem_t *memh UCS_MEMTRACK_ARG)
{
    uct_ib_md_t *md = ucs_derived_of(uct_md, uct_ib_md_t);
    struct ibv_exp_reg_mr_in in = {
        md->pd,
        NULL,
        ucs_memtrack_adjust_alloc_size(*length_p),
        UCT_IB_MEM_ACCESS_FLAGS | IBV_EXP_ACCESS_ALLOCATE_MR,
        0
    };

    memh->mr = ibv_exp_reg_mr(&in);
    if (memh->mr == NULL) {
        ucs_error("ibv_exp_reg_mr(in={NULL, length=%Zu, flags=0x%lx}) failed: %m",
                  ucs_memtrack_adjust_alloc_size(*length_p),
                  (unsigned long)(UCT_IB_MEM_ACCESS_FLAGS | IBV_EXP_ACCESS_ALLOCATE_MR));
        return UCS_ERR_IO_ERROR;
    }
Exemplo n.º 11
0
/**
 * @param rx_headroom   Headroom requested by the user.
 * @param rx_priv_len   Length of transport private data to reserve (0 if unused)
 * @param rx_hdr_len    Length of transport network header.
 * @param mss           Maximal segment size (transport limit).
 */
UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md,
                    uct_worker_h worker, const char *dev_name, unsigned rx_headroom,
                    unsigned rx_priv_len, unsigned rx_hdr_len, unsigned tx_cq_len,
                    size_t mss, uct_ib_iface_config_t *config)
{
    uct_ib_device_t *dev = &ucs_derived_of(md, uct_ib_md_t)->dev;
    ucs_status_t status;
    uint8_t port_num;

    UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &ops->super, md, worker,
                              &config->super UCS_STATS_ARG(dev->stats));

    status = uct_ib_device_find_port(dev, dev_name, &port_num);
    if (status != UCS_OK) {
        goto err;
    }

    self->port_num                 = port_num;
    self->sl                       = config->sl;
    self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) +
                                     ucs_max(sizeof(uct_am_recv_desc_t) + rx_headroom,
                                             rx_priv_len + rx_hdr_len);
    self->config.rx_hdr_offset     = self->config.rx_payload_offset - rx_hdr_len;
    self->config.rx_headroom_offset= self->config.rx_payload_offset - rx_headroom;
    self->config.seg_size          = ucs_min(mss, config->super.max_bcopy);
    self->config.tx_max_poll       = config->tx.max_poll;
    self->config.rx_max_poll       = config->rx.max_poll;
    self->config.rx_max_batch      = ucs_min(config->rx.max_batch,
                                     config->rx.queue_len / 4);
    self->ops                      = ops;

    status = uct_ib_iface_init_pkey(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_iface_init_gid(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_iface_init_lmc(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    self->comp_channel = ibv_create_comp_channel(dev->ibv_context);
    if (self->comp_channel == NULL) {
        ucs_error("Failed to create completion channel: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_free_path_bits;
    }

    status = ucs_sys_fcntl_modfl(self->comp_channel->fd, O_NONBLOCK, 0);
    if (status != UCS_OK) {
        goto err_destroy_comp_channel;
    }

    /* TODO inline scatter for send SQ */
    self->send_cq = ibv_create_cq(dev->ibv_context, tx_cq_len,
                                  NULL, self->comp_channel, 0);
    if (self->send_cq == NULL) {
        ucs_error("Failed to create send cq: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_destroy_comp_channel;
    }

    if (config->rx.inl > 32 /*UCT_IB_MLX5_CQE64_MAX_INL*/) {
        ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "128", 1);
    }

    self->recv_cq = ibv_create_cq(dev->ibv_context, config->rx.queue_len,
                                  NULL, self->comp_channel, 0);
    ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "64", 1);

    if (self->recv_cq == NULL) {
        ucs_error("Failed to create recv cq: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_destroy_send_cq;
    }

    if (!uct_ib_device_is_port_ib(dev, self->port_num)) {
        ucs_error("Unsupported link layer");
        status = UCS_ERR_UNSUPPORTED;
        goto err_destroy_recv_cq;
    }

    /* Address scope and size */
    self->addr_scope = uct_ib_address_scope(self->gid.global.subnet_prefix);
    self->addr_size  = uct_ib_address_size(self->addr_scope);

    ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d",
              self->config.rx_headroom_offset, self->config.rx_payload_offset,
              self->config.rx_hdr_offset, self->config.seg_size);

    return UCS_OK;

err_destroy_recv_cq:
    ibv_destroy_cq(self->recv_cq);
err_destroy_send_cq:
    ibv_destroy_cq(self->send_cq);
err_destroy_comp_channel:
    ibv_destroy_comp_channel(self->comp_channel);
err_free_path_bits:
    ucs_free(self->path_bits);
err:
    return status;
}
Exemplo n.º 12
0
Arquivo: dc_mlx5.c Projeto: yosefe/ucx
ucs_status_t uct_dc_mlx5_ep_fc_ctrl(uct_ep_t *tl_ep, unsigned op,
                                    uct_rc_fc_request_t *req)
{
    uintptr_t sender_ep;
    uct_ib_iface_t *ib_iface;
    uct_ib_mlx5_base_av_t av;
    uct_dc_fc_request_t *dc_req;
    uct_dc_mlx5_ep_t *dc_mlx5_ep;
    uct_dc_ep_t *dc_ep         = ucs_derived_of(tl_ep, uct_dc_ep_t);
    uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface,
                                                uct_dc_mlx5_iface_t);
    UCT_DC_MLX5_TXQP_DECL(txqp, txwq);

    ucs_assert((sizeof(uint8_t) + sizeof(sender_ep)) <=
                UCT_IB_MLX5_AV_FULL_SIZE);

    UCT_DC_CHECK_RES(&iface->super, dc_ep);
    UCT_DC_MLX5_IFACE_TXQP_GET(iface, dc_ep, txqp, txwq);

    if (op == UCT_RC_EP_FC_PURE_GRANT) {
        ucs_assert(req != NULL);
        dc_req    = ucs_derived_of(req, uct_dc_fc_request_t);
        sender_ep = (uintptr_t)dc_req->sender_ep;
        ib_iface  = &iface->super.super.super;

        /* Note av initialization is copied from exp verbs */
        av.stat_rate_sl = ib_iface->config.sl; /* (attr->static_rate << 4) | attr->sl */
        av.fl_mlid      = ib_iface->path_bits[0] & 0x7f;

        /* lid in dc_req is in BE already  */
        av.rlid         = dc_req->lid | htons(ib_iface->path_bits[0]);
        av.dqp_dct      = htonl(dc_req->dct_num);

        if (!iface->ud_common.config.compact_av) {
            av.dqp_dct |= UCT_IB_MLX5_EXTENDED_UD_AV;
        }

        uct_rc_mlx5_txqp_inline_post(&iface->super.super, IBV_EXP_QPT_DC_INI,
                                     txqp, txwq, MLX5_OPCODE_SEND,
                                     NULL, 0, op, sender_ep, 0,
                                     0, 0,
                                     &av, uct_ib_mlx5_wqe_av_size(&av));
    } else {
        ucs_assert(op == UCT_RC_EP_FC_FLAG_HARD_REQ);
        sender_ep    = (uintptr_t)dc_ep;
        dc_mlx5_ep   = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t);

        UCS_STATS_UPDATE_COUNTER(dc_ep->fc.stats,
                                 UCT_RC_FC_STAT_TX_HARD_REQ, 1);

        uct_rc_mlx5_txqp_inline_post(&iface->super.super, IBV_EXP_QPT_DC_INI,
                                     txqp, txwq, MLX5_OPCODE_SEND_IMM,
                                     NULL, 0, op, sender_ep,
                                     iface->super.rx.dct->dct_num,
                                     0, 0,
                                     &dc_mlx5_ep->av,
                                     uct_ib_mlx5_wqe_av_size(&dc_mlx5_ep->av));
    }

    return UCS_OK;
}
Exemplo n.º 13
0
/**
 * @param rx_headroom   Headroom requested by the user.
 * @param rx_priv_len   Length of transport private data to reserve (0 if unused)
 * @param rx_hdr_len    Length of transport network header.
 * @param mss           Maximal segment size (transport limit).
 */
UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_iface_ops_t *ops, uct_pd_h pd,
                    uct_worker_h worker, const char *dev_name, unsigned rx_headroom,
                    unsigned rx_priv_len, unsigned rx_hdr_len, unsigned tx_cq_len,
                    size_t mss, uct_ib_iface_config_t *config)
{
    uct_ib_device_t *dev = &ucs_derived_of(pd, uct_ib_pd_t)->dev;
    ucs_status_t status;
    uint8_t port_num;

    UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, ops, pd, worker,
                              &config->super UCS_STATS_ARG(dev->stats));

    status = uct_ib_iface_find_port(dev, dev_name, &port_num);
    if (status != UCS_OK) {
        ucs_error("Failed to find port %s: %s", dev_name, ucs_status_string(status));
        goto err;
    }

    self->port_num                 = port_num;
    self->sl                       = config->sl;
    self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) +
                                     ucs_max(sizeof(uct_am_recv_desc_t) + rx_headroom,
                                             rx_priv_len + rx_hdr_len);
    self->config.rx_hdr_offset     = self->config.rx_payload_offset - rx_hdr_len;
    self->config.rx_headroom_offset= self->config.rx_payload_offset - rx_headroom;
    self->config.seg_size          = config->super.max_bcopy;

    status = uct_ib_iface_init_pkey(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_iface_init_gid(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_iface_init_lmc(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    /* TODO comp_channel */
    /* TODO inline scatter for send SQ */
    self->send_cq = ibv_create_cq(dev->ibv_context, tx_cq_len, NULL, NULL, 0);
    if (self->send_cq == NULL) {
        ucs_error("Failed to create send cq: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_free_path_bits;
    }

    if (config->rx.inl > 32 /*UCT_IB_MLX5_CQE64_MAX_INL*/) {
        ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "128", 1);
    }

    self->recv_cq = ibv_create_cq(dev->ibv_context, config->rx.queue_len,
                                  NULL, NULL, 0);
    ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "64", 1);

    if (self->recv_cq == NULL) {
        ucs_error("Failed to create recv cq: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_destroy_send_cq;
    }

    if (!uct_ib_device_is_port_ib(dev, self->port_num)) {
        ucs_error("Unsupported link layer");
        status = UCS_ERR_UNSUPPORTED;
        goto err_destroy_recv_cq;
    }

    ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d",
              self->config.rx_headroom_offset, self->config.rx_payload_offset,
              self->config.rx_hdr_offset, self->config.seg_size);

    return UCS_OK;

err_destroy_recv_cq:
    ibv_destroy_cq(self->recv_cq);
err_destroy_send_cq:
    ibv_destroy_cq(self->send_cq);
err_free_path_bits:
    ucs_free(self->path_bits);
err:
    return status;
}
Exemplo n.º 14
0
static ucs_status_t uct_mm_iface_query(uct_iface_h tl_iface,
                                       uct_iface_attr_t *iface_attr)
{
    uct_mm_iface_t *iface = ucs_derived_of(tl_iface, uct_mm_iface_t);
    memset(iface_attr, 0, sizeof(uct_iface_attr_t));

    /* default values for all shared memory transports */
    iface_attr->cap.put.max_short       = UINT_MAX;
    iface_attr->cap.put.max_bcopy       = SIZE_MAX;
    iface_attr->cap.put.min_zcopy       = 0;
    iface_attr->cap.put.max_zcopy       = SIZE_MAX;
    iface_attr->cap.put.opt_zcopy_align = UCS_SYS_CACHE_LINE_SIZE;
    iface_attr->cap.put.align_mtu       = iface_attr->cap.put.opt_zcopy_align;
    iface_attr->cap.put.max_iov         = 1;

    iface_attr->cap.get.max_bcopy       = SIZE_MAX;
    iface_attr->cap.get.min_zcopy       = 0;
    iface_attr->cap.get.max_zcopy       = SIZE_MAX;
    iface_attr->cap.get.opt_zcopy_align = UCS_SYS_CACHE_LINE_SIZE;
    iface_attr->cap.get.align_mtu       = iface_attr->cap.get.opt_zcopy_align;
    iface_attr->cap.get.max_iov         = 1;

    iface_attr->cap.am.max_short        = iface->config.fifo_elem_size -
                                          sizeof(uct_mm_fifo_element_t);
    iface_attr->cap.am.max_bcopy        = iface->config.seg_size;
    iface_attr->cap.am.min_zcopy        = 0;
    iface_attr->cap.am.max_zcopy        = 0;
    iface_attr->cap.am.opt_zcopy_align  = UCS_SYS_CACHE_LINE_SIZE;
    iface_attr->cap.am.align_mtu        = iface_attr->cap.am.opt_zcopy_align;
    iface_attr->cap.am.max_iov          = 1;

    iface_attr->iface_addr_len          = sizeof(uct_mm_iface_addr_t);
    iface_attr->device_addr_len         = UCT_SM_IFACE_DEVICE_ADDR_LEN;
    iface_attr->ep_addr_len             = 0;
    iface_attr->max_conn_priv           = 0;
    iface_attr->cap.flags               = UCT_IFACE_FLAG_PUT_SHORT           |
                                          UCT_IFACE_FLAG_PUT_BCOPY           |
                                          UCT_IFACE_FLAG_ATOMIC_CPU          |
                                          UCT_IFACE_FLAG_GET_BCOPY           |
                                          UCT_IFACE_FLAG_AM_SHORT            |
                                          UCT_IFACE_FLAG_AM_BCOPY            |
                                          UCT_IFACE_FLAG_PENDING             |
                                          UCT_IFACE_FLAG_CB_SYNC             |
                                          UCT_IFACE_FLAG_EVENT_SEND_COMP     |
                                          UCT_IFACE_FLAG_EVENT_RECV_SIG      |
                                          UCT_IFACE_FLAG_CONNECT_TO_IFACE;

    iface_attr->cap.atomic32.op_flags   =
    iface_attr->cap.atomic64.op_flags   = UCS_BIT(UCT_ATOMIC_OP_ADD)         |
                                          UCS_BIT(UCT_ATOMIC_OP_AND)         |
                                          UCS_BIT(UCT_ATOMIC_OP_OR)          |
                                          UCS_BIT(UCT_ATOMIC_OP_XOR);
    iface_attr->cap.atomic32.fop_flags  =
    iface_attr->cap.atomic64.fop_flags  = UCS_BIT(UCT_ATOMIC_OP_ADD)         |
                                          UCS_BIT(UCT_ATOMIC_OP_AND)         |
                                          UCS_BIT(UCT_ATOMIC_OP_OR)          |
                                          UCS_BIT(UCT_ATOMIC_OP_XOR)         |
                                          UCS_BIT(UCT_ATOMIC_OP_SWAP)        |
                                          UCS_BIT(UCT_ATOMIC_OP_CSWAP);

    iface_attr->latency.overhead        = 80e-9; /* 80 ns */
    iface_attr->latency.growth          = 0;
    iface_attr->bandwidth               = 12179 * 1024.0 * 1024.0;
    iface_attr->overhead                = 10e-9; /* 10 ns */
    iface_attr->priority                = uct_mm_md_mapper_ops(iface->super.md)->get_priority();
    return UCS_OK;
}
Exemplo n.º 15
0
static void uct_cm_iface_release_desc(uct_iface_t *tl_iface, void *desc)
{
    uct_cm_iface_t *iface = ucs_derived_of(tl_iface, uct_cm_iface_t);
    ucs_free(desc - iface->super.config.rx_headroom_offset);
}
Exemplo n.º 16
0
static void uct_rc_verbs_ep_am_zcopy_handler(uct_rc_iface_send_op_t *op)
{
    uct_rc_iface_send_desc_t *desc = ucs_derived_of(op, uct_rc_iface_send_desc_t);
    uct_invoke_completion(desc->super.user_comp);
    ucs_mpool_put(desc);
}
Exemplo n.º 17
0
static UCS_CLASS_INIT_FUNC(uct_cm_iface_t, uct_pd_h pd, uct_worker_h worker,
                           const char *dev_name, size_t rx_headroom,
                           const uct_iface_config_t *tl_config)
{
    uct_cm_iface_config_t *config = ucs_derived_of(tl_config, uct_cm_iface_config_t);
    ucs_status_t status;
    int ret;

    ucs_trace_func("");

    UCS_CLASS_CALL_SUPER_INIT(uct_ib_iface_t, &uct_cm_iface_ops, pd, worker,
                              dev_name, rx_headroom, 0 /* rx_priv_len */,
                              0 /* rx_hdr_len */, 1 /* tx_cq_len */,
                              IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE, /* mss */
                              &config->super);

    if (worker->async == NULL) {
        ucs_error("cm must have async!=NULL");
        return UCS_ERR_INVALID_PARAM;
    }

    self->service_id          = (uint32_t)(ucs_generate_uuid((uintptr_t)self) &
                                             (~IB_CM_ASSIGN_SERVICE_ID_MASK));
    self->num_outstanding     = 0;

    self->config.timeout_ms   = (int)(config->timeout * 1e3 + 0.5);
    self->config.max_outstanding = config->max_outstanding;
    self->config.retry_count  = ucs_min(config->retry_count, UINT8_MAX);
    self->notify_q.head       = NULL;
    ucs_queue_head_init(&self->notify_q);

    self->outstanding = ucs_calloc(self->config.max_outstanding,
                                   sizeof(*self->outstanding),
                                   "cm_outstanding");
    if (self->outstanding == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto err;
    }

    self->cmdev = ib_cm_open_device(uct_ib_iface_device(&self->super)->ibv_context);
    if (self->cmdev == NULL) {
        ucs_error("ib_cm_open_device() failed: %m. Check if ib_ucm.ko module is loaded.");
        status = UCS_ERR_NO_DEVICE;
        goto err_free_outstanding;
    }

    status = ucs_sys_fcntl_modfl(self->cmdev->fd, O_NONBLOCK, 0);
    if (status != UCS_OK) {
        goto err_close_device;
    }

    ret = ib_cm_create_id(self->cmdev, &self->listen_id, self);
    if (ret) {
        ucs_error("ib_cm_create_id() failed: %m");
        status = UCS_ERR_NO_DEVICE;
        goto err_close_device;
    }

    ret = ib_cm_listen(self->listen_id, self->service_id, 0);
    if (ret) {
        ucs_error("ib_cm_listen() failed: %m");
        status = UCS_ERR_INVALID_ADDR;
        goto err_destroy_id;
    }

    if (config->async_mode == UCS_ASYNC_MODE_SIGNAL) {
        ucs_warn("ib_cm fd does not support SIGIO");
    }

    status = ucs_async_set_event_handler(config->async_mode, self->cmdev->fd,
                                         POLLIN, uct_cm_iface_event_handler, self,
                                         worker->async);
    if (status != UCS_OK) {
        ucs_error("failed to set event handler");
        goto err_destroy_id;
    }

    ucs_debug("listening for SIDR service_id 0x%x on fd %d", self->service_id,
              self->cmdev->fd);
    return UCS_OK;

err_destroy_id:
    ib_cm_destroy_id(self->listen_id);
err_close_device:
    ib_cm_close_device(self->cmdev);
err_free_outstanding:
    ucs_free(self->outstanding);
err:
    return status;
}
Exemplo n.º 18
0
static ucs_status_t uct_ib_mlx5dv_create_ksm(uct_ib_md_t *ibmd,
                                             uct_ib_mem_t *ib_memh,
                                             off_t offset)
{
    uct_ib_mlx5_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_mlx5_mem_t);
    uct_ib_mlx5_md_t *md = ucs_derived_of(ibmd, uct_ib_mlx5_md_t);
    uint32_t out[UCT_IB_MLX5DV_ST_SZ_DW(create_mkey_out)] = {};
    struct ibv_mr *mr = memh->super.mr;
    ucs_status_t status = UCS_OK;
    struct mlx5dv_pd dvpd = {};
    struct mlx5dv_obj dv = {};
    size_t reg_length, length, inlen;
    int list_size, i;
    void *mkc, *klm;
    uint32_t *in;
    intptr_t addr;

    if (!(md->flags & UCT_IB_MLX5_MD_FLAG_KSM)) {
        return UCS_ERR_UNSUPPORTED;
    }

    reg_length = UCT_IB_MD_MAX_MR_SIZE;
    addr       = (intptr_t)mr->addr & ~(reg_length - 1);
    length     = mr->length + (intptr_t)mr->addr - addr;
    list_size  = ucs_div_round_up(length, reg_length);
    inlen      = UCT_IB_MLX5DV_ST_SZ_BYTES(create_mkey_in) +
                 UCT_IB_MLX5DV_ST_SZ_BYTES(klm) * list_size;

    in         = ucs_calloc(1, inlen, "mkey mailbox");
    if (in == NULL) {
        return UCS_ERR_NO_MEMORY;
    }

    dv.pd.in   = md->super.pd;
    dv.pd.out  = &dvpd;
    mlx5dv_init_obj(&dv, MLX5DV_OBJ_PD);

    UCT_IB_MLX5DV_SET(create_mkey_in, in, opcode, UCT_IB_MLX5_CMD_OP_CREATE_MKEY);
    mkc = UCT_IB_MLX5DV_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
    UCT_IB_MLX5DV_SET(mkc, mkc, access_mode_1_0, UCT_IB_MLX5_MKC_ACCESS_MODE_KSM);
    UCT_IB_MLX5DV_SET(mkc, mkc, a, 1);
    UCT_IB_MLX5DV_SET(mkc, mkc, rw, 1);
    UCT_IB_MLX5DV_SET(mkc, mkc, rr, 1);
    UCT_IB_MLX5DV_SET(mkc, mkc, lw, 1);
    UCT_IB_MLX5DV_SET(mkc, mkc, lr, 1);
    UCT_IB_MLX5DV_SET(mkc, mkc, pd, dvpd.pdn);
    UCT_IB_MLX5DV_SET(mkc, mkc, translations_octword_size, list_size);
    UCT_IB_MLX5DV_SET(mkc, mkc, log_entity_size, ucs_ilog2(reg_length));
    UCT_IB_MLX5DV_SET(mkc, mkc, qpn, 0xffffff);
    UCT_IB_MLX5DV_SET(mkc, mkc, mkey_7_0, offset & 0xff);
    UCT_IB_MLX5DV_SET64(mkc, mkc, start_addr, addr + offset);
    UCT_IB_MLX5DV_SET64(mkc, mkc, len, length);
    UCT_IB_MLX5DV_SET(create_mkey_in, in, translations_octword_actual_size, list_size);

    klm = UCT_IB_MLX5DV_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
    for (i = 0; i < list_size; i++) {
        if (i == list_size - 1) {
            UCT_IB_MLX5DV_SET(klm, klm, byte_count, length % reg_length);
        } else {
            UCT_IB_MLX5DV_SET(klm, klm, byte_count, reg_length);
        }
        UCT_IB_MLX5DV_SET(klm, klm, mkey, mr->lkey);
        UCT_IB_MLX5DV_SET64(klm, klm, address, addr + (i * reg_length));
        klm += UCT_IB_MLX5DV_ST_SZ_BYTES(klm);
    }

    memh->atomic_dvmr = mlx5dv_devx_obj_create(md->super.dev.ibv_context, in, inlen,
                                               out, sizeof(out));
    if (memh->atomic_dvmr == NULL) {
        ucs_debug("CREATE_MKEY KSM failed: %m");
        status = UCS_ERR_UNSUPPORTED;
        md->flags &= ~UCT_IB_MLX5_MD_FLAG_KSM;
        goto out;
    }

    memh->super.atomic_rkey =
        (UCT_IB_MLX5DV_GET(create_mkey_out, out, mkey_index) << 8) |
        (offset & 0xff);

    ucs_debug("KSM registered memory %p..%p offset 0x%lx on %s rkey 0x%x",
              mr->addr, mr->addr + mr->length, offset, uct_ib_device_name(&md->super.dev),
              memh->super.atomic_rkey);
out:
    ucs_free(in);
    return status;
}
Exemplo n.º 19
0
ucs_status_t uct_rc_verbs_ep_fence(uct_ep_h tl_ep, unsigned flags)
{
    uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t);

    return uct_rc_ep_fence(tl_ep, &ep->fi, 1);
}
Exemplo n.º 20
0
static UCS_CLASS_INIT_FUNC(uct_ugni_rdma_iface_t, uct_pd_h pd, uct_worker_h worker,
                           const char *dev_name, size_t rx_headroom,
                           const uct_iface_config_t *tl_config)
{
    uct_ugni_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_iface_config_t);
    ucs_status_t rc;

    pthread_mutex_lock(&uct_ugni_global_lock);

    UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, pd, worker, dev_name, &uct_ugni_iface_ops,
                              &config->super UCS_STATS_ARG(NULL));

    /* Setting initial configuration */
    self->config.fma_seg_size  = UCT_UGNI_MAX_FMA;
    self->config.rdma_max_size = UCT_UGNI_MAX_RDMA;

    rc = ucs_mpool_create("UGNI-DESC-ONLY", sizeof(uct_ugni_base_desc_t),
                          0,                            /* alignment offset */
                          UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                          128 ,                         /* grow */
                          config->mpool.max_bufs,       /* max buffers */
                          &self->super.super,           /* iface */
                          ucs_mpool_hugetlb_malloc,     /* allocation hooks */
                          ucs_mpool_hugetlb_free,       /* free hook */
                          uct_ugni_base_desc_init,      /* init func */
                          NULL , &self->free_desc);
    if (UCS_OK != rc) {
        ucs_error("Mpool creation failed");
        goto exit;
    }

    rc = ucs_mpool_create("UGNI-GET-DESC-ONLY", sizeof(uct_ugni_fetch_desc_t),
                          0,                            /* alignment offset */
                          UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                          128 ,                         /* grow */
                          config->mpool.max_bufs,       /* max buffers */
                          &self->super.super,           /* iface */
                          ucs_mpool_hugetlb_malloc,     /* allocation hooks */
                          ucs_mpool_hugetlb_free,       /* free hook */
                          uct_ugni_base_desc_init,      /* init func */
                          NULL , &self->free_desc_get);
    if (UCS_OK != rc) {
        ucs_error("Mpool creation failed");
        goto clean_desc;
    }

    rc = ucs_mpool_create("UGNI-DESC-BUFFER",
                          sizeof(uct_ugni_base_desc_t) +
                          self->config.fma_seg_size,
                          sizeof(uct_ugni_base_desc_t), /* alignment offset */
                          UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                          128 ,                         /* grow */
                          config->mpool.max_bufs,       /* max buffers */
                          &self->super.super,           /* iface */
                          ucs_mpool_hugetlb_malloc,     /* allocation hooks */
                          ucs_mpool_hugetlb_free,       /* free hook */
                          uct_ugni_base_desc_init,      /* init func */
                          NULL , &self->free_desc_buffer);
    if (UCS_OK != rc) {
        ucs_error("Mpool creation failed");
        goto clean_desc_get;
    }

    rc = uct_iface_mpool_create(&self->super.super.super,
                                sizeof(uct_ugni_fetch_desc_t) + 8,
                                sizeof(uct_ugni_fetch_desc_t),  /* alignment offset */
                                UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                                &config->mpool,               /* mpool config */
                                128 ,                         /* grow */
                                uct_ugni_base_desc_key_init,  /* memory/key init */
                                "UGNI-DESC-FAMO",             /* name */
                                &self->free_desc_famo);       /* mpool */
    if (UCS_OK != rc) {
        ucs_error("Mpool creation failed");
        goto clean_buffer;
    }

    rc = uct_iface_mpool_create(&self->super.super.super,
                                sizeof(uct_ugni_fetch_desc_t) +
                                self->config.fma_seg_size,
                                sizeof(uct_ugni_fetch_desc_t), /* alignment offset */
                                UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                                &config->mpool,               /* mpool config */
                                128 ,                         /* grow */
                                uct_ugni_base_desc_key_init,  /* memory/key init */
                                "UGNI-DESC-GET",              /* name */
                                &self->free_desc_get_buffer); /* mpool */
    if (UCS_OK != rc) {
        ucs_error("Mpool creation failed");
        goto clean_famo;
    }

    rc = ugni_activate_iface(&self->super);
    if (UCS_OK != rc) {
        ucs_error("Failed to activate the interface");
        goto clean_get_buffer;
    }

    /* TBD: eventually the uct_ugni_progress has to be moved to 
     * rdma layer so each ugni layer will have own progress */
    ucs_notifier_chain_add(&worker->progress_chain, uct_ugni_progress, self);
    pthread_mutex_unlock(&uct_ugni_global_lock);
    return UCS_OK;

clean_get_buffer:
    ucs_mpool_destroy(self->free_desc_get_buffer);
clean_famo:
    ucs_mpool_destroy(self->free_desc_famo);
clean_buffer:
    ucs_mpool_destroy(self->free_desc_buffer);
clean_desc_get:
    ucs_mpool_destroy(self->free_desc_get);
clean_desc:
    ucs_mpool_destroy(self->free_desc);
exit:
    ucs_error("Failed to activate interface");
    pthread_mutex_unlock(&uct_ugni_global_lock);
    return rc;
}
Exemplo n.º 21
0
Arquivo: mm_ep.c Projeto: brminich/ucx
static UCS_CLASS_INIT_FUNC(uct_mm_ep_t, uct_iface_t *tl_iface,
                           const uct_device_addr_t *dev_addr,
                           const uct_iface_addr_t *iface_addr)
{
    uct_mm_iface_t *iface = ucs_derived_of(tl_iface, uct_mm_iface_t);
    const uct_mm_iface_addr_t *addr = (const void*)iface_addr;
    ucs_status_t status;
    size_t size_to_attach;
    uct_mm_fifo_ctl_t *remote_fifo_ctl;

    UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super);

    /* Connect to the remote address (remote FIFO) */
    /* Attach the address's memory */
    size_to_attach = UCT_MM_GET_FIFO_SIZE(iface);
    status =
        uct_mm_md_mapper_ops(iface->super.md)->attach(addr->id,
                                                      size_to_attach,
                                                      (void *)addr->vaddr,
                                                      &self->mapped_desc.address,
                                                      &self->mapped_desc.cookie,
                                                      iface->path);
    if (status != UCS_OK) {
        ucs_error("failed to connect to remote peer with mm. remote mm_id: %zu",
                   addr->id);
        return status;
    }

    self->mapped_desc.length     = size_to_attach;
    self->mapped_desc.mmid       = addr->id;

    /* Set the fifo ctl to a dummy struct. It will switch to the real fifo_ctl,
     * which points to the remote peer's fifo, once the connection establishment
     * message is sent successfully on the unix socket */
    self->fifo_ctl = &iface->dummy_fifo_ctl;

    self->cached_tail = self->fifo_ctl->tail;

    /* set the ep->fifo ptr to point to the beginning of the fifo elements at
     * the remote peer */
    uct_mm_set_fifo_elems_ptr(self->mapped_desc.address, &self->fifo);

    remote_fifo_ctl   = uct_mm_set_fifo_ctl(self->mapped_desc.address);
    self->cached_signal_addrlen  = remote_fifo_ctl->signal_addrlen;
    self->cached_signal_sockaddr = remote_fifo_ctl->signal_sockaddr;

    self->cbq_elem_on = 0;

    /* Send connect message to remote side so it will start polling */
    status = uct_mm_ep_signal_remote(self, UCT_MM_IFACE_SIGNAL_CONNECT);
    if (status != UCS_OK) {
        uct_mm_md_mapper_ops(iface->super.md)->detach(&self->mapped_desc);
        return status;
    }

    /* Initiate the hash which will keep the base_adresses of remote memory
     * chunks that hold the descriptors for bcopy. */
    sglib_hashed_uct_mm_remote_seg_t_init(self->remote_segments_hash);

    ucs_arbiter_group_init(&self->arb_group);

    /* Register for send side progress */
    uct_worker_progress_register(iface->super.worker, uct_mm_iface_progress,
                                 iface);

    ucs_debug("mm: ep connected: %p, to remote_shmid: %zu", self, addr->id);

    return UCS_OK;
}
Exemplo n.º 22
0
{
    int ret;

    ret = ibv_dereg_mr(mr);
    if (ret != 0) {
        ucs_error("ibv_dereg_mr() failed: %m");
        return UCS_ERR_IO_ERROR;
    }

    return UCS_OK;
}

static ucs_status_t uct_ib_mem_alloc(uct_pd_h uct_pd, size_t *length_p, void **address_p,
                                     uct_mem_h *memh_p UCS_MEMTRACK_ARG)
{
    uct_ib_pd_t *pd = ucs_derived_of(uct_pd, uct_ib_pd_t);
    struct ibv_exp_reg_mr_in in = {
        pd->pd,
        NULL,
        ucs_memtrack_adjust_alloc_size(*length_p),
        UCT_IB_MEM_ACCESS_FLAGS | IBV_EXP_ACCESS_ALLOCATE_MR,
        0
    };
    struct ibv_mr *mr;

    mr = ibv_exp_reg_mr(&in);
    if (mr == NULL) {
        ucs_error("ibv_exp_reg_mr(in={NULL, length=%Zu, flags=0x%lx}) failed: %m",
                  ucs_memtrack_adjust_alloc_size(*length_p),
                  (unsigned long)(UCT_IB_MEM_ACCESS_FLAGS | IBV_EXP_ACCESS_ALLOCATE_MR));
        return UCS_ERR_IO_ERROR;
Exemplo n.º 23
0
static UCS_CLASS_INIT_FUNC(uct_ud_mlx5_iface_t,
                           uct_pd_h pd, uct_worker_h worker,
                           const char *dev_name, size_t rx_headroom,
                           const uct_iface_config_t *tl_config)
{
    uct_ud_iface_config_t *config = ucs_derived_of(tl_config,
                                                   uct_ud_iface_config_t);
    ucs_status_t status;
    int i;

    ucs_trace_func("");

    UCS_CLASS_CALL_SUPER_INIT(uct_ud_iface_t, &uct_ud_mlx5_iface_ops,
                              pd, worker,
                              dev_name, rx_headroom, 0, config);

    status = uct_ib_mlx5_get_cq(self->super.super.send_cq, &self->tx.cq);
    if (status != UCS_OK) {
        return status;
    }
    if (uct_ib_mlx5_cqe_size(&self->tx.cq) != sizeof(struct mlx5_cqe64)) {
        ucs_error("TX CQE size (%d) is not %d", 
                  uct_ib_mlx5_cqe_size(&self->tx.cq),
                  (int)sizeof(struct mlx5_cqe64));
        return UCS_ERR_IO_ERROR;
    }

    status = uct_ib_mlx5_get_cq(self->super.super.recv_cq, &self->rx.cq);
    if (status != UCS_OK) {
        return UCS_ERR_IO_ERROR;
    }
    if (uct_ib_mlx5_cqe_size(&self->rx.cq) != sizeof(struct mlx5_cqe64)) {
        ucs_error("RX CQE size (%d) is not %d", 
                  uct_ib_mlx5_cqe_size(&self->rx.cq),
                  (int)sizeof(struct mlx5_cqe64));
        return UCS_ERR_IO_ERROR;
    }

    status = uct_ib_mlx5_get_txwq(self->super.qp, &self->tx.wq); 
    if (status != UCS_OK) {
        return UCS_ERR_IO_ERROR;
    }
    self->super.tx.available = self->tx.wq.bb_max;
   
    status = uct_ib_mlx5_get_rxwq(self->super.qp, &self->rx.wq); 
    if (status != UCS_OK) {
        return UCS_ERR_IO_ERROR;
    }

    /* write buffer sizes */
    for (i = 0; i <= self->rx.wq.mask; i++) {
        self->rx.wq.wqes[i].byte_count = htonl(self->super.super.config.rx_payload_offset + 
                                               self->super.super.config.seg_size);
    }
    while (self->super.rx.available >= self->super.config.rx_max_batch) {
        uct_ud_mlx5_iface_post_recv(self);
    }

    /* TODO: add progress on first ep creation */
    uct_worker_progress_register(worker, uct_ud_mlx5_iface_progress, self);

    return UCS_OK;
}
Exemplo n.º 24
0
void uct_ud_ep_destroy_connected(uct_ud_ep_t *ep, const uct_sockaddr_ib_t *addr)
{
    uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ud_iface_t);
    uct_ud_iface_cep_rollback(iface, addr, ep);
    uct_ud_ep_disconnect_from_iface(&ep->super.super);
}
Exemplo n.º 25
0
static ucs_status_t uct_gdr_copy_md_open(const char *md_name,
                                         const uct_md_config_t *uct_md_config,
                                         uct_md_h *md_p)
{
    const uct_gdr_copy_md_config_t *md_config = ucs_derived_of(uct_md_config,
                                                               uct_gdr_copy_md_config_t);
    ucs_status_t status;
    uct_gdr_copy_md_t *md;
    ucs_rcache_params_t rcache_params;

    md = ucs_malloc(sizeof(uct_gdr_copy_md_t), "uct_gdr_copy_md_t");
    if (NULL == md) {
        ucs_error("failed to allocate memory for uct_gdr_copy_md_t");
        return UCS_ERR_NO_MEMORY;
    }

    md->super.ops = &md_ops;
    md->super.component = &uct_gdr_copy_md_component;
    md->rcache = NULL;
    md->reg_cost = md_config->uc_reg_cost;

    md->gdrcpy_ctx = gdr_open();
    if (md->gdrcpy_ctx == NULL) {
        ucs_error("failed to open gdr copy");
        status = UCS_ERR_IO_ERROR;
        goto err_free_md;
    }

    if (md_config->enable_rcache != UCS_NO) {
        rcache_params.region_struct_size = sizeof(uct_gdr_copy_rcache_region_t);
        rcache_params.alignment          = md_config->rcache.alignment;
        rcache_params.max_alignment      = UCT_GDR_COPY_MD_RCACHE_DEFAULT_ALIGN;
        rcache_params.ucm_event_priority = md_config->rcache.event_prio;
        rcache_params.context            = md;
        rcache_params.ops                = &uct_gdr_copy_rcache_ops;
        status = ucs_rcache_create(&rcache_params, "gdr_copy", NULL, &md->rcache);
        if (status == UCS_OK) {
            md->super.ops         = &md_rcache_ops;
            md->reg_cost.overhead = 0;
            md->reg_cost.growth   = 0;
        } else {
            ucs_assert(md->rcache == NULL);
            if (md_config->enable_rcache == UCS_YES) {
                status = UCS_ERR_IO_ERROR;
                goto err_close_gdr;
            } else {
                ucs_debug("could not create registration cache for: %s",
                          ucs_status_string(status));
            }
        }
    }

    *md_p = (uct_md_h) md;
    status = UCS_OK;
out:
    return status;
err_close_gdr:
    gdr_close(md->gdrcpy_ctx);
err_free_md:
    ucs_free(md);
    goto out;
}
Exemplo n.º 26
0
static UCS_CLASS_INIT_FUNC(uct_ugni_udt_iface_t, uct_md_h md, uct_worker_h worker,
                           const uct_iface_params_t *params,
                           const uct_iface_config_t *tl_config)
{
    uct_ugni_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_iface_config_t);
    ucs_status_t status;
    uct_ugni_udt_desc_t *desc;
    gni_return_t ugni_rc;
    int rc;

    UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, md, worker, params,
                              &uct_ugni_udt_iface_ops,
                              &config->super UCS_STATS_ARG(NULL));

    /* Setting initial configuration */
    self->config.udt_seg_size = GNI_DATAGRAM_MAXSIZE;
    self->config.rx_headroom  = params->rx_headroom;
    self->release_desc.cb     = uct_ugni_udt_iface_release_desc;

    status = ucs_async_pipe_create(&self->event_pipe);
    if (UCS_OK != status) {
        ucs_error("Pipe creation failed");
        goto exit;
    }

    status = ucs_mpool_init(&self->free_desc,
                            0,
                            uct_ugni_udt_get_diff(self) + self->config.udt_seg_size * 2,
                            uct_ugni_udt_get_diff(self),
                            UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                            128,                          /* grow */
                            config->mpool.max_bufs,       /* max buffers */
                            &uct_ugni_udt_desc_mpool_ops,
                            "UGNI-UDT-DESC");

    if (UCS_OK != status) {
        ucs_error("Mpool creation failed");
        goto clean_pipe;
    }

    ugni_rc = GNI_EpCreate(uct_ugni_udt_iface_nic_handle(self), NULL, &self->ep_any);
    if (GNI_RC_SUCCESS != ugni_rc) {
        ucs_error("GNI_EpCreate failed, Error status: %s %d",
                  gni_err_str[ugni_rc], ugni_rc);
        status = UCS_ERR_NO_DEVICE;
        goto clean_free_desc;
    }

    UCT_TL_IFACE_GET_TX_DESC(&self->super.super, &self->free_desc,
                             desc, goto clean_ep);
    ucs_debug("First wildcard desc is %p", desc);

    /* Init any desc */
    self->desc_any = desc;
    status = uct_ugni_udt_ep_any_post(self);
    if (UCS_OK != status) {
        /* We can't continue if we can't post the first receive */
        ucs_error("Failed to post wildcard request");
        goto clean_any_desc;
    }

    status = ucs_async_set_event_handler(self->super.super.worker->async->mode,
                                         ucs_async_pipe_rfd(&self->event_pipe),
                                         POLLIN,
                                         uct_ugni_proccess_datagram_pipe,
                                         self, self->super.super.worker->async);
                                 
    if (UCS_OK != status) {
        goto clean_cancel_desc;
    }

    pthread_mutex_init(&self->device_lock, NULL);
    pthread_cond_init(&self->device_condition, NULL);
    self->events_ready = 0;

    rc = pthread_create(&self->event_thread, NULL, uct_ugni_udt_device_thread, self);
    if(0 != rc) {
        goto clean_remove_event;
    }

    return UCS_OK;

 clean_remove_event:
    ucs_async_pipe_destroy(&self->event_pipe);
 clean_cancel_desc:
    uct_ugni_udt_clean_wildcard(self);
 clean_any_desc:
    ucs_mpool_put(self->desc_any);
 clean_ep:
    ugni_rc = GNI_EpDestroy(self->ep_any);
    if (GNI_RC_SUCCESS != ugni_rc) {
        ucs_warn("GNI_EpDestroy failed, Error status: %s %d",
                 gni_err_str[ugni_rc], ugni_rc);
    }
 clean_free_desc:
    ucs_mpool_cleanup(&self->free_desc, 1);
 clean_pipe:
    ucs_async_pipe_destroy(&self->event_pipe);
 exit:
    uct_ugni_cleanup_base_iface(&self->super);
    ucs_error("Failed to activate interface");
    return status;
}
Exemplo n.º 27
0
/*
 * Generic inline posting function.
 * Parameters which are not relevant to the opcode are ignored.
 *
 *            +--------+-----+-------+--------+------------
 * SEND       | CTRL   | INL | am_id | am_hdr | payload ...
 *            +--------+-----+---+---+-+-------+-----------
 * RDMA_WRITE | CTRL   | RADDR   | INL | payload ...
 *            +--------+---------+-----+-------------------
 *
 */
static UCS_F_ALWAYS_INLINE ucs_status_t
uct_rc_mlx5_ep_inline_post(uct_rc_mlx5_ep_t *ep, unsigned opcode,
                           const void *buffer, unsigned length,
                           /* SEND */ uint8_t am_id, uint64_t am_hdr,
                           /* RDMA */ uint64_t rdma_raddr, uct_rkey_t rdma_rkey
                           )
{
    uct_rc_mlx5_iface_t *iface = ucs_derived_of(ep->super.super.super.iface,
                                                uct_rc_mlx5_iface_t);

    struct mlx5_wqe_ctrl_seg     *ctrl;
    struct mlx5_wqe_raddr_seg    *raddr;
    struct mlx5_wqe_inl_data_seg *inl;
    uct_rc_am_short_hdr_t        *am;
    unsigned wqe_size;
    unsigned sig_flag;

    ctrl = ep->tx.seg;
    UCT_RC_MLX5_CHECK_RES(iface, ep);

    switch (opcode) {
    case MLX5_OPCODE_SEND:
        /* Set inline segment which has AM id, AM header, and AM payload */
        wqe_size         = sizeof(*ctrl) + sizeof(*inl) + sizeof(*am) + length;
        UCT_CHECK_LENGTH(wqe_size, UCT_RC_MLX5_MAX_BB * MLX5_SEND_WQE_BB,
                         "am_short");
        inl              = (void*)(ctrl + 1);
        inl->byte_count  = htonl((length + sizeof(*am)) | MLX5_INLINE_SEG);
        am               = (void*)(inl + 1);
        am->rc_hdr.am_id = am_id;
        am->am_hdr       = am_hdr;
        uct_rc_mlx5_inline_copy(am + 1, buffer, length, ep);
        sig_flag         = uct_rc_iface_tx_moderation(&iface->super, &ep->super,
                                                      MLX5_WQE_CTRL_CQ_UPDATE);
        break;

    case MLX5_OPCODE_RDMA_WRITE:
        /* Set RDMA segment */
        if (length == 0) {
            wqe_size     = sizeof(*ctrl) + sizeof(*raddr);
        } else {
            wqe_size     = sizeof(*ctrl) + sizeof(*raddr) + sizeof(*inl) + length;
        }
        UCT_CHECK_LENGTH(wqe_size, UCT_RC_MLX5_MAX_BB * MLX5_SEND_WQE_BB,
                        "put_short");
        raddr            = (void*)(ctrl + 1);
        uct_rc_mlx5_ep_set_rdma_seg(raddr, rdma_raddr, rdma_rkey);
        inl              = (void*)(raddr + 1);
        inl->byte_count  = htonl(length | MLX5_INLINE_SEG);
        uct_rc_mlx5_inline_copy(inl + 1, buffer, length, ep);
        sig_flag         = MLX5_WQE_CTRL_CQ_UPDATE;
        break;

    case MLX5_OPCODE_NOP:
        /* Empty inline segment */
        wqe_size         = sizeof(*ctrl);
        inl              = (void*)(ctrl + 1);
        inl->byte_count  = htonl(MLX5_INLINE_SEG);
        sig_flag         = MLX5_WQE_CTRL_CQ_UPDATE | MLX5_WQE_CTRL_FENCE;
        break;

    default:
        return UCS_ERR_INVALID_PARAM;
    }

    uct_rc_mlx5_post_send(ep, ctrl, opcode, 0, sig_flag, wqe_size);
    return UCS_OK;
}
Exemplo n.º 28
0
ucs_status_t uct_dc_mlx5_ep_fc_ctrl(uct_ep_t *tl_ep, unsigned op,
                                    uct_rc_fc_request_t *req)
{
    uct_dc_mlx5_ep_t *dc_ep    = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t);
    uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface,
                                                uct_dc_mlx5_iface_t);
    uct_ib_iface_t *ib_iface   = &iface->super.super.super;
    struct ibv_ah_attr ah_attr = {.is_global = 0};
    uct_dc_fc_sender_data_t sender;
    uct_dc_fc_request_t *dc_req;
    struct mlx5_wqe_av mlx5_av;
    uct_ib_mlx5_base_av_t av;
    ucs_status_t status;
    uintptr_t sender_ep;
    struct ibv_ah *ah;

    UCT_DC_MLX5_TXQP_DECL(txqp, txwq);

    ucs_assert((sizeof(uint8_t) + sizeof(sender_ep)) <=
                UCT_IB_MLX5_AV_FULL_SIZE);

    UCT_DC_MLX5_CHECK_RES(iface, dc_ep);
    UCT_DC_MLX5_IFACE_TXQP_GET(iface, dc_ep, txqp, txwq);

    dc_req = ucs_derived_of(req, uct_dc_fc_request_t);

    if (op == UCT_RC_EP_FC_PURE_GRANT) {
        ucs_assert(req != NULL);

        sender_ep = (uintptr_t)dc_req->sender.ep;

        /* TODO: look at common code with uct_ud_mlx5_iface_get_av */
        if (dc_req->sender.global.is_global) {
            uct_ib_iface_fill_ah_attr_from_gid_lid(ib_iface, dc_req->lid,
                                                   ucs_unaligned_ptr(&dc_req->sender.global.gid),
                                                   ib_iface->path_bits[0], &ah_attr);

            status = uct_ib_iface_create_ah(ib_iface, &ah_attr, &ah);
            if (status != UCS_OK) {
                return status;
            }

            uct_ib_mlx5_get_av(ah, &mlx5_av);
        }

        /* Note av initialization is copied from exp verbs */
        av.stat_rate_sl = ib_iface->config.sl; /* (attr->static_rate << 4) | attr->sl */
        av.fl_mlid      = ib_iface->path_bits[0] & 0x7f;

        /* lid in dc_req is in BE already  */
        av.rlid         = uct_ib_iface_is_roce(ib_iface) ? 0 :
                          (dc_req->lid | htons(ib_iface->path_bits[0]));
        av.dqp_dct      = htonl(dc_req->dct_num);
        uct_dc_mlx5_iface_set_av_sport(iface, &av, dc_req->dct_num);

        if (!iface->ud_common.config.compact_av || ah_attr.is_global) {
            av.dqp_dct |= UCT_IB_MLX5_EXTENDED_UD_AV;
        }

        uct_rc_mlx5_txqp_inline_post(&iface->super, UCT_IB_QPT_DCI,
                                     txqp, txwq, MLX5_OPCODE_SEND,
                                     &av /*dummy*/, 0, op, sender_ep, 0,
                                     0, 0,
                                     &av, ah_attr.is_global ? mlx5_av_grh(&mlx5_av) : NULL,
                                     uct_ib_mlx5_wqe_av_size(&av), 0, INT_MAX);
    } else {
        ucs_assert(op == UCT_RC_EP_FC_FLAG_HARD_REQ);
        sender.ep               = (uint64_t)dc_ep;
        sender.global.gid       = ib_iface->gid;
        sender.global.is_global = dc_ep->flags & UCT_DC_MLX5_EP_FLAG_GRH;

        UCS_STATS_UPDATE_COUNTER(dc_ep->fc.stats,
                                 UCT_RC_FC_STAT_TX_HARD_REQ, 1);

        uct_rc_mlx5_txqp_inline_post(&iface->super, UCT_IB_QPT_DCI,
                                     txqp, txwq, MLX5_OPCODE_SEND_IMM,
                                     &sender.global, sizeof(sender.global), op, sender.ep,
                                     uct_dc_mlx5_get_dct_num(iface),
                                     0, 0,
                                     &dc_ep->av,
                                     uct_dc_mlx5_ep_get_grh(dc_ep),
                                     uct_ib_mlx5_wqe_av_size(&dc_ep->av),
                                     MLX5_WQE_CTRL_SOLICITED, INT_MAX);
    }

    return UCS_OK;
}


UCS_CLASS_INIT_FUNC(uct_dc_mlx5_ep_t, uct_dc_mlx5_iface_t *iface, const uct_dc_mlx5_iface_addr_t *if_addr,
                    uct_ib_mlx5_base_av_t *av)
{
    uint32_t remote_dctn;

    ucs_trace_func("");

    UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super.super.super.super);

    self->atomic_mr_offset = uct_ib_md_atomic_offset(if_addr->atomic_mr_id);
    remote_dctn            = uct_ib_unpack_uint24(if_addr->qp_num);

    memcpy(&self->av, av, sizeof(*av));
    self->av.dqp_dct      |= htonl(remote_dctn);
    uct_dc_mlx5_iface_set_av_sport(iface, &self->av, remote_dctn);

    return uct_dc_mlx5_ep_basic_init(iface, self);
}
Exemplo n.º 29
0
Arquivo: cm_ep.c Projeto: LenaO/ucx
ssize_t uct_cm_ep_am_bcopy(uct_ep_h tl_ep, uint8_t am_id,
                           uct_pack_callback_t pack_cb, void *arg)
{
    uct_cm_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cm_iface_t);
    uct_cm_ep_t *ep = ucs_derived_of(tl_ep, uct_cm_ep_t);
    struct ib_cm_sidr_req_param req;
    struct ibv_sa_path_rec path;
    struct ib_cm_id *id;
    ucs_status_t status;
    uct_cm_hdr_t *hdr;
    size_t payload_len;
    size_t total_len;
    int ret;

    UCT_CHECK_AM_ID(am_id);

    uct_cm_enter(iface);

    if (iface->num_outstanding >= iface->config.max_outstanding) {
        status = UCS_ERR_NO_RESOURCE;
        goto err;
    }

    /* Allocate temporary contiguous buffer */
    hdr = ucs_malloc(IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE, "cm_send_buf");
    if (hdr == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto err;
    }

    payload_len = pack_cb(hdr + 1, arg);
    hdr->am_id  = am_id;
    hdr->length = payload_len;
    total_len   = sizeof(*hdr) + payload_len;

    status = uct_cm_ep_fill_path_rec(ep, &path);
    if (status != UCS_OK) {
        goto err_free;
    }

    /* Fill SIDR request */
    memset(&req, 0, sizeof req);
    req.path             = &path;
    req.service_id       = ep->dest_addr.id;
    req.timeout_ms       = iface->config.timeout_ms;
    req.private_data     = hdr;
    req.private_data_len = total_len;
    req.max_cm_retries   = iface->config.retry_count;

    /* Create temporary ID for this message. Will be released when getting REP. */
    ret = ib_cm_create_id(iface->cmdev, &id, NULL);
    if (ret) {
        ucs_error("ib_cm_create_id() failed: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_free;
    }

    uct_cm_dump_path(&path);

    ret = ib_cm_send_sidr_req(id, &req);
    if (ret) {
        ucs_error("ib_cm_send_sidr_req() failed: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_destroy_id;
    }

    iface->outstanding[iface->num_outstanding++] = id;
    UCT_TL_EP_STAT_OP(&ep->super, AM, BCOPY, payload_len);
    uct_cm_leave(iface);

    uct_cm_iface_trace_data(iface, UCT_AM_TRACE_TYPE_SEND, hdr,
                            "TX: SIDR_REQ [dlid %d svc 0x%"PRIx64"]",
                            ntohs(path.dlid), req.service_id);
    ucs_free(hdr);
    return payload_len;

err_destroy_id:
    ib_cm_destroy_id(id);
err_free:
    ucs_free(hdr);
err:
    uct_cm_leave(iface);
    return status;
}
Exemplo n.º 30
0
static UCS_F_ALWAYS_INLINE ssize_t
uct_ugni_udt_ep_am_common_send(const unsigned is_short, uct_ugni_udt_ep_t *ep, uct_ugni_udt_iface_t *iface,
                               uint8_t am_id, unsigned length, uint64_t header,
                               const void *payload, uct_pack_callback_t pack_cb, void *arg)
{
    gni_return_t ugni_rc;
    uint16_t msg_length;
    uct_ugni_udt_desc_t *desc;
    uct_ugni_udt_header_t *sheader,
                          *rheader;
    ssize_t packed_length;

    UCT_CHECK_AM_ID(am_id);
    if (ucs_unlikely(NULL != ep->posted_desc)) {
        UCT_TL_IFACE_STAT_TX_NO_DESC(&iface->super.super);
        return UCS_ERR_NO_RESOURCE;
    }
    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc,
                             desc, return UCS_ERR_NO_RESOURCE);

    rheader = uct_ugni_udt_get_rheader(desc, iface);
    rheader->type = UCT_UGNI_UDT_EMPTY;

    sheader = uct_ugni_udt_get_sheader(desc, iface);

    if (is_short) {
        uint64_t *hdr = (uint64_t *)uct_ugni_udt_get_spayload(desc, iface);
        *hdr = header;
        memcpy((void*)(hdr + 1), payload, length);
        sheader->length = length + sizeof(header);
        msg_length = sheader->length + sizeof(*sheader);
        UCT_TL_EP_STAT_OP(ucs_derived_of(ep, uct_base_ep_t), AM, SHORT, sizeof(header) + length);
    } else {
        packed_length = pack_cb((void *)uct_ugni_udt_get_spayload(desc, iface),
                                arg);
        sheader->length = packed_length;
        msg_length = sheader->length + sizeof(*sheader);
        UCT_TL_EP_STAT_OP(ucs_derived_of(ep, uct_base_ep_t), AM, BCOPY, packed_length);
    }

    uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_SEND, am_id,
                       uct_ugni_udt_get_spayload(desc, iface), length,
                       is_short ? "TX: AM_SHORT" : "TX: AM_BCOPY");

    sheader->am_id = am_id;
    sheader->type = UCT_UGNI_UDT_PAYLOAD;

    ucs_assert_always(sheader->length <= GNI_DATAGRAM_MAXSIZE);

    pthread_mutex_lock(&uct_ugni_global_lock);
    ugni_rc = GNI_EpPostDataWId(ep->super.ep,
                                sheader, msg_length,
                                rheader, (uint16_t)iface->config.udt_seg_size,
                                ep->super.hash_key);
    pthread_mutex_unlock(&uct_ugni_global_lock);
    UCT_UGNI_UDT_CHECK_RC(ugni_rc);

    ep->posted_desc = desc;
    ++ep->super.outstanding;
    ++iface->super.outstanding;
    return is_short ? UCS_OK : packed_length;
}