Пример #1
0
ucs_status_t ucp_put(ucp_ep_h ep, const void *buffer, size_t length,
                     uint64_t remote_addr, ucp_rkey_h rkey)
{
    ucp_ep_rma_config_t *rma_config;
    ucs_status_t status;
    uct_rkey_t uct_rkey;
    size_t frag_length;
    ssize_t packed_len;
    ucp_lane_index_t lane;

    UCP_RMA_CHECK_PARAMS(buffer, length);

    /* Loop until all message has been sent.
     * We re-check the configuration on every iteration, because it can be
     * changed by transport switch.
     */
    for (;;) {
        UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, lane, uct_rkey, rma_config);
        if (length <= rma_config->max_put_short) {
            status = uct_ep_put_short(ep->uct_eps[lane], buffer, length,
                                      remote_addr, uct_rkey);
            if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) {
                break;
            }
        } else {
            if (length <= ucp_ep_config(ep)->bcopy_thresh) {
                frag_length = ucs_min(length, rma_config->max_put_short);
                status = uct_ep_put_short(ep->uct_eps[lane], buffer, frag_length,
                                          remote_addr, uct_rkey);
            } else {
                ucp_memcpy_pack_context_t pack_ctx;
                pack_ctx.src    = buffer;
                pack_ctx.length = frag_length =
                                ucs_min(length, rma_config->max_put_bcopy);
                packed_len = uct_ep_put_bcopy(ep->uct_eps[lane], ucp_memcpy_pack,
                                              &pack_ctx, remote_addr, uct_rkey);
                status = (packed_len > 0) ? UCS_OK : (ucs_status_t)packed_len;
            }
            if (ucs_likely(status == UCS_OK)) {
                length      -= frag_length;
                if (length == 0) {
                    break;
                }

                buffer      += frag_length;
                remote_addr += frag_length;
            } else if (status != UCS_ERR_NO_RESOURCE) {
                break;
            }
        }
        ucp_worker_progress(ep->worker);
    }

    return status;
}
Пример #2
0
ucs_status_t ucp_rma_put(ucp_ep_h ep, const void *buffer, size_t length,
                         uint64_t remote_addr, ucp_rkey_h rkey)
{
    ucs_status_t status;
    uct_rkey_t uct_rkey;
    size_t frag_length;

    if (length == 0) {
        return UCS_OK;
    }
    if (ENABLE_PARAMS_CHECK && (buffer == NULL)) {
        return UCS_ERR_INVALID_PARAM;
    }

    uct_rkey = UCP_RMA_RKEY_LOOKUP(ep, rkey);

    /* Loop until all message has been sent.
     * We re-check the configuration on every iteration, because it can be
     * changed by transport switch.
     */
    for (;;) {
        if (length <= ep->config.max_short_put) {
            status = uct_ep_put_short(ep->uct.ep, buffer, length, remote_addr,
                                      uct_rkey);
            if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) {
                break;
            }
        } else {
            if (length <= ep->worker->context->config.bcopy_thresh) {
                frag_length = ucs_min(length, ep->config.max_short_put);
                status = uct_ep_put_short(ep->uct.ep, buffer, frag_length, remote_addr,
                                          uct_rkey);
            } else {
                frag_length = ucs_min(length, ep->config.max_bcopy_put);
                status = uct_ep_put_bcopy(ep->uct.ep, (uct_pack_callback_t)memcpy,
                                          (void*)buffer, frag_length, remote_addr,
                                          uct_rkey);
            }
            if (ucs_likely(status == UCS_OK)) {
                length      -= frag_length;
                if (length == 0) {
                    break;
                }

                buffer      += frag_length;
                remote_addr += frag_length;
            } else if (status != UCS_ERR_NO_RESOURCE) {
                break;
            }
        }
        ucp_worker_progress(ep->worker);
    }

    return status;
}
Пример #3
0
static UCS_F_ALWAYS_INLINE ucs_status_t
ucp_tag_send_try(ucp_ep_h ep, const void *buffer, size_t count,
                 ucp_datatype_t datatype, ucp_tag_t tag)
{
    size_t length;

    if (ucs_likely((datatype & UCP_DATATYPE_CLASS_MASK) == UCP_DATATYPE_CONTIG)) {
        length = ucp_contig_dt_length(datatype, count);
        if (ucs_likely(length <= ep->config.max_short_egr)) {
            return ucp_tag_send_eager_short(ep, tag, buffer, length);
        }
    }

    return UCS_ERR_NO_RESOURCE; /* Fallback to slower progress */
}
Пример #4
0
static inline ucs_status_t uct_mm_iface_process_recv(uct_mm_iface_t *iface,
                                                     uct_mm_fifo_element_t* elem)
{
    ucs_status_t status;
    void         *data;

    if (ucs_likely(elem->flags & UCT_MM_FIFO_ELEM_FLAG_INLINE)) {
        /* read short (inline) messages from the FIFO elements */
        uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_RECV, elem->am_id,
                           elem + 1, elem->length, "RX: AM_SHORT");
        status = uct_mm_iface_invoke_am(iface, elem->am_id, elem + 1,
                                        elem->length, 0);
    } else {
        /* read bcopy messages from the receive descriptors */
        VALGRIND_MAKE_MEM_DEFINED(elem->desc_chunk_base_addr + elem->desc_offset,
                                  elem->length);

        data = elem->desc_chunk_base_addr + elem->desc_offset;

        uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_RECV, elem->am_id,
                           data, elem->length, "RX: AM_BCOPY");

        status = uct_mm_iface_invoke_am(iface, elem->am_id, data, elem->length,
                                        UCT_CB_FLAG_DESC);
        if (status != UCS_OK) {
            /* assign a new receive descriptor to this FIFO element.*/
            uct_mm_assign_desc_to_fifo_elem(iface, elem, 0);
        }
    }
    return status;
}
Пример #5
0
ucs_status_t uct_rc_mlx5_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header,
                                     unsigned header_length, const void *payload,
                                     size_t length, uct_mem_h memh,
                                     uct_completion_t *comp)
{
    uct_rc_mlx5_ep_t *ep  = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t);
    uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t);
    ucs_status_t status;

    UCT_CHECK_AM_ID(id);
    UCT_RC_CHECK_FC_WND(iface, &ep->super, id);

    UCT_CHECK_LENGTH(sizeof(struct mlx5_wqe_ctrl_seg) +
                     sizeof(struct mlx5_wqe_data_seg) +
                     sizeof(struct mlx5_wqe_inl_data_seg) +
                     sizeof(uct_rc_hdr_t) + header_length,
                     UCT_RC_MLX5_MAX_BB * MLX5_SEND_WQE_BB,
                     "am zcopy");
    UCT_CHECK_LENGTH(header_length + length + sizeof(uct_rc_hdr_t),
                     ucs_derived_of(tl_ep->iface, uct_ib_iface_t)->config.seg_size,
                     "am_zcopy");
    UCT_CHECK_LENGTH(header_length + length, UCT_IB_MAX_MESSAGE_SIZE, "am_zcopy");

    status = uct_rc_mlx5_ep_zcopy_post(ep, MLX5_OPCODE_SEND, payload, length, memh,
                                       id, header, header_length, 0, 0, 0, comp);
    if (ucs_likely(status >= 0)) {
        UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY, header_length + length);
        UCT_RC_UPDATE_FC_WND(&ep->super);
    }
    return status;
}
Пример #6
0
ucs_status_t uct_dc_mlx5_ep_put_short(uct_ep_h tl_ep, const void *payload,
                                      unsigned length, uint64_t remote_addr,
                                      uct_rkey_t rkey)
{
#if HAVE_IBV_EXP_DM
    uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t);
    uct_dc_mlx5_ep_t *ep       = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t);
    ucs_status_t status;

    if (ucs_likely((length <= UCT_IB_MLX5_PUT_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE)) ||
                   !iface->super.dm.dm)) {
#endif
        return uct_dc_mlx5_ep_put_short_inline(tl_ep, payload, length, remote_addr, rkey);
#if HAVE_IBV_EXP_DM
    }

    UCT_CHECK_LENGTH(length, 0, iface->super.dm.seg_len, "put_short");
    UCT_DC_MLX5_CHECK_RES(iface, ep);
    status = uct_dc_mlx5_ep_short_dm(ep, NULL, 0, payload, length,
                                     MLX5_OPCODE_RDMA_WRITE,
                                     MLX5_WQE_CTRL_CQ_UPDATE,
                                     remote_addr, rkey);
    if (UCS_STATUS_IS_ERR(status)) {
        return status;
    }
    UCT_TL_EP_STAT_OP(&ep->super, PUT, SHORT, length);
    return UCS_OK;
#endif
}
Пример #7
0
static ucs_arbiter_cb_result_t uct_dc_ep_abriter_purge_cb(ucs_arbiter_t *arbiter,
                                                          ucs_arbiter_elem_t *elem,
                                                          void *arg)
{
    uct_purge_cb_args_t  *cb_args   = arg;
    uct_pending_purge_callback_t cb = cb_args->cb;
    uct_pending_req_t *req          = ucs_container_of(elem, uct_pending_req_t, priv);
    uct_rc_fc_request_t *freq       = ucs_derived_of(req, uct_rc_fc_request_t);
    uct_dc_ep_t *ep                 = ucs_container_of(ucs_arbiter_elem_group(elem),
                                                       uct_dc_ep_t, arb_group);

    if (ucs_likely(req->func != uct_dc_iface_fc_grant)){
        if (cb != NULL) {
            cb(req, cb_args->arg);
        } else {
            ucs_warn("ep=%p cancelling user pending request %p", ep, req);
        }
    } else {
        /* User callback should not be called for FC messages.
         * Just return pending request memory to the pool */
        ucs_mpool_put(freq);
    }

    return UCS_ARBITER_CB_RESULT_REMOVE_ELEM;
}
Пример #8
0
ucs_status_t uct_dc_mlx5_ep_tag_eager_short(uct_ep_h tl_ep, uct_tag_t tag,
                                            const void *data, size_t length)
{
#if HAVE_IBV_EXP_DM
    uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t);
    uct_dc_mlx5_ep_t *ep       = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t);
    uct_rc_mlx5_dm_copy_data_t cache;
    ucs_status_t status;

    if (ucs_likely((sizeof(struct ibv_exp_tmh) + length <=
                    UCT_IB_MLX5_AM_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE)) ||
                   !iface->super.dm.dm)) {
#endif
        return uct_dc_mlx5_ep_tag_eager_short_inline(tl_ep, tag, data, length);
#if HAVE_IBV_EXP_DM
    }

    UCT_CHECK_LENGTH(length + sizeof(struct ibv_exp_tmh), 0,
                     iface->super.dm.seg_len, "tag_short");
    UCT_DC_MLX5_CHECK_RES(iface, ep);

    uct_rc_mlx5_fill_tmh(ucs_unaligned_ptr(&cache.tm_hdr), tag, 0, IBV_EXP_TMH_EAGER);

    status = uct_dc_mlx5_ep_short_dm(ep, &cache, sizeof(cache.tm_hdr), data,
                                     length, MLX5_OPCODE_SEND,
                                     MLX5_WQE_CTRL_SOLICITED | MLX5_WQE_CTRL_CQ_UPDATE,
                                     0, 0);
    if (!UCS_STATUS_IS_ERR(status)) {
        UCT_TL_EP_STAT_OP(&ep->super, TAG, SHORT, length);
    }

    return status;
#endif
}
Пример #9
0
static UCS_F_ALWAYS_INLINE void
ucp_request_release_common(void *request, uint8_t cb_flag, const char *debug_name)
{
    ucp_request_t *req = (ucp_request_t*)request - 1;
    ucp_worker_h UCS_V_UNUSED worker = ucs_container_of(ucs_mpool_obj_owner(req),
                                                        ucp_worker_t, req_mp);
    uint16_t flags;

    UCP_THREAD_CS_ENTER_CONDITIONAL(&worker->mt_lock);

    flags = req->flags;
    ucs_trace_req("%s request %p (%p) "UCP_REQUEST_FLAGS_FMT, debug_name,
                  req, req + 1, UCP_REQUEST_FLAGS_ARG(flags));

    ucs_assert(!(flags & UCP_REQUEST_DEBUG_FLAG_EXTERNAL));
    ucs_assert(!(flags & UCP_REQUEST_FLAG_RELEASED));

    if (ucs_likely(flags & UCP_REQUEST_FLAG_COMPLETED)) {
        ucp_request_put(req);
    } else {
        req->flags = (flags | UCP_REQUEST_FLAG_RELEASED) & ~cb_flag;
    }

    UCP_THREAD_CS_EXIT_CONDITIONAL(&worker->mt_lock);
}
Пример #10
0
ucs_status_t ucp_get_nbi(ucp_ep_h ep, void *buffer, size_t length,
                         uint64_t remote_addr, ucp_rkey_h rkey)
{
    ucp_ep_rma_config_t *rma_config;
    ucp_lane_index_t lane;
    uct_rkey_t uct_rkey;
    ucs_status_t status;

    UCP_RMA_CHECK_PARAMS(buffer, length);
    UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, lane, uct_rkey, rma_config);

    if (length <= rma_config->max_get_bcopy) {
        status = uct_ep_get_bcopy(ep->uct_eps[lane],
                                  (uct_unpack_callback_t)memcpy,
                                  (void*)buffer,
                                  length,
                                  remote_addr,
                                  uct_rkey,
                                  NULL);
        if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) {
            /* Return on error or success */
            return status;
        }
    }

    ucp_rma_start_nbi(ep, buffer, length, remote_addr, rkey,
                      ucp_progress_get_nbi);
    return UCS_INPROGRESS;
}
Пример #11
0
ucs_status_t ucp_put_nbi(ucp_ep_h ep, const void *buffer, size_t length,
                         uint64_t remote_addr, ucp_rkey_h rkey)
{
    ucp_ep_rma_config_t *rma_config;
    ucp_lane_index_t lane;
    uct_rkey_t uct_rkey;
    ucs_status_t status;

    UCP_RMA_CHECK_PARAMS(buffer, length);
    UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, lane, uct_rkey, rma_config);

    /* Fast path for a single short message */
    if (length <= rma_config->max_put_short) {
        status = uct_ep_put_short(ep->uct_eps[lane], buffer, length, remote_addr,
                                  uct_rkey);
        if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) {
            /* Return on error or success */
            return status;
        }
    }

    ucp_rma_start_nbi(ep, buffer, length, remote_addr, rkey,
                        ucp_progress_put_nbi);
    return UCS_INPROGRESS;
}
Пример #12
0
ucs_status_ptr_t ucp_tag_send_nb(ucp_ep_h ep, const void *buffer, size_t count,
                                 uintptr_t datatype, ucp_tag_t tag,
                                 ucp_send_callback_t cb)
{
    ucs_status_t status;
    ucp_request_t *req;
    size_t length;

    ucs_trace_req("send_nb buffer %p count %zu tag %"PRIx64" to %s cb %p",
                  buffer, count, tag, ucp_ep_peer_name(ep), cb);

    if (ucs_likely((datatype & UCP_DATATYPE_CLASS_MASK) == UCP_DATATYPE_CONTIG)) {
        length = ucp_contig_dt_length(datatype, count);
        UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX,
                              "ucp_tag_send_nb (eager - start)",
                              buffer, length);
        if (ucs_likely(length <= ucp_ep_config(ep)->max_eager_short)) {
            status = ucp_tag_send_eager_short(ep, tag, buffer, length);
            if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) {
                UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX,
                                      "ucp_tag_send_nb (eager - finish)",
                                      buffer, length);
                return UCS_STATUS_PTR(status); /* UCS_OK also goes here */
            }
        }
    }

    req = ucp_request_get(ep->worker);
    if (req == NULL) {
        return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY);
    }

    UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX, "ucp_tag_send_nb", req,
                          ucp_dt_length(datatype, count, buffer, &req->send.state));

    ucp_tag_send_req_init(req, ep, buffer, datatype, tag);

    return ucp_tag_send_req(req, count,
                            ucp_ep_config(ep)->max_eager_short,
                            ucp_ep_config(ep)->zcopy_thresh,
                            ucp_ep_config(ep)->rndv_thresh,
                            cb, &ucp_tag_eager_proto);
}
Пример #13
0
ucs_status_t ucp_rma_get(ucp_ep_h ep, void *buffer, size_t length,
                         uint64_t remote_addr, ucp_rkey_h rkey)
{
    uct_completion_t comp;
    ucs_status_t status;
    uct_rkey_t uct_rkey;
    size_t frag_length;

    if (length == 0) {
        return UCS_OK;
    }

    uct_rkey = UCP_RMA_RKEY_LOOKUP(ep, rkey);

    comp.count = 1;

    for (;;) {

        /* Push out all fragments, and request completion only for the last
         * fragment.
         */
        frag_length = ucs_min(ep->config.max_bcopy_get, length);
        status = uct_ep_get_bcopy(ep->uct.ep, (uct_unpack_callback_t)memcpy,
                                  (void*)buffer, frag_length, remote_addr,
                                  uct_rkey, &comp);
        if (ucs_likely(status == UCS_OK)) {
            goto posted;
        } else if (status == UCS_INPROGRESS) {
            ++comp.count;
            goto posted;
        } else if (status == UCS_ERR_NO_RESOURCE) {
            goto retry;
        } else {
            return status;
        }

posted:
        length      -= frag_length;
        if (length == 0) {
            break;
        }

        buffer      += frag_length;
        remote_addr += frag_length;
retry:
        ucp_worker_progress(ep->worker);
    }

    /* coverity[loop_condition] */
    while (comp.count > 1) {
        ucp_worker_progress(ep->worker);
    };
    return UCS_OK;
}
Пример #14
0
static ucs_status_t ucp_progress_put_nbi(uct_pending_req_t *self)
{
    ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct);
    ucp_rkey_h rkey    = req->send.rma.rkey;
    ucp_ep_t *ep       = req->send.ep;
    ucp_ep_rma_config_t *rma_config;
    ucs_status_t status;
    uct_rkey_t uct_rkey;
    ssize_t packed_len;
    uct_ep_h uct_ep;

    UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, uct_ep, uct_rkey, rma_config);
    for (;;) {
        if (req->send.length <= ep->worker->context->config.ext.bcopy_thresh) {
            /* Should be replaced with bcopy */
            packed_len = ucs_min(req->send.length, rma_config->max_put_short);
            status = uct_ep_put_short(uct_ep,
                                      req->send.buffer,
                                      packed_len,
                                      req->send.rma.remote_addr,
                                      uct_rkey);
        } else {
            /* We don't do it right now, but in future we have to add
             * an option to use zcopy
             */
            ucp_memcpy_pack_context_t pack_ctx;
            pack_ctx.src    = req->send.buffer;
            pack_ctx.length =
                ucs_min(req->send.length, rma_config->max_put_bcopy);
            packed_len = uct_ep_put_bcopy(uct_ep,
                                          ucp_memcpy_pack,
                                          &pack_ctx,
                                          req->send.rma.remote_addr,
                                          uct_rkey);
            status = (packed_len > 0) ? UCS_OK : (ucs_status_t)packed_len;
        }

        if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) {
            req->send.length -= packed_len;
            if (req->send.length == 0) {
                ucp_request_complete(req, void);
                break;
            }

            req->send.buffer += packed_len;
            req->send.rma.remote_addr += packed_len;
        } else {
            break;
        }
    }

    return status;
}
Пример #15
0
ucs_status_t uct_mm_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb,
                                 void *arg, size_t length,
                                 uint64_t remote_addr, uct_rkey_t rkey)
{
    if (ucs_likely(length != 0)) {
        pack_cb((void *)(rkey + remote_addr), arg, length);
        uct_mm_trace_data(remote_addr, rkey, "PUT_BCOPY [size %zu]", length);
    } else {
        ucs_trace_data("PUT_BCOPY [zero-length]");
    }
    return UCS_OK;
}
Пример #16
0
ucs_status_t ucp_get_nbi(ucp_ep_h ep, void *buffer, size_t length,
                         uint64_t remote_addr, ucp_rkey_h rkey)
{
    ucp_ep_rma_config_t *rma_config;
    ucs_status_t status;
    uct_rkey_t uct_rkey;
    size_t frag_length;
    uct_ep_h uct_ep;

    UCP_RMA_CHECK_PARAMS(buffer, length);

    for (;;) {
        UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, uct_ep, uct_rkey, rma_config);
        frag_length = ucs_min(rma_config->max_get_bcopy, length);
        status = uct_ep_get_bcopy(uct_ep,
                                  (uct_unpack_callback_t)memcpy,
                                  (void*)buffer,
                                  frag_length,
                                  remote_addr,
                                  uct_rkey,
                                  NULL);
        if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) {
            /* Get was initiated */
            length -= frag_length;
            buffer += frag_length;
            remote_addr += frag_length;
            if (length == 0) {
                break;
            }
        } else if (ucs_unlikely(status == UCS_ERR_NO_RESOURCE)) {
            /* Out of resources - adding request for later schedule */
            ucp_request_t *req;
            req = ucs_mpool_get_inline(&ep->worker->req_mp);
            if (req == NULL) {
                /* can't allocate memory for request - abort */
                status = UCS_ERR_NO_MEMORY;
                break;
            }
            ucp_add_pending_rma(req, ep, uct_ep, buffer, length, remote_addr,
                                rkey, ucp_progress_get_nbi);

            /* Mark it as in progress */
            status = UCS_INPROGRESS;
            break;
        } else {
            /* Error */
            break;
        }
    }

    return status;
}
Пример #17
0
ucs_status_t uct_mm_ep_get_bcopy(uct_ep_h tl_ep, uct_unpack_callback_t unpack_cb,
                                 void *arg, size_t length,
                                 uint64_t remote_addr, uct_rkey_t rkey,
                                 uct_completion_t *comp)
{
    if (ucs_likely(0 != length)) {
        unpack_cb(arg, (void *)(rkey + remote_addr), length);
        uct_mm_trace_data(remote_addr, rkey, "GET_BCOPY [length %zu]", length);
    } else {
        ucs_trace_data("GET_BCOPY [zero-length]");
    }
    return UCS_OK;
}
Пример #18
0
ucs_status_t uct_mm_ep_put_short(uct_ep_h tl_ep, const void *buffer,
                                 unsigned length, uint64_t remote_addr,
                                 uct_rkey_t rkey)
{
    if (ucs_likely(length != 0)) {
        memcpy((void *)(rkey + remote_addr), buffer, length);
        uct_mm_trace_data(remote_addr, rkey, "PUT_SHORT [buffer %p size %u]",
                          buffer, length);
    } else {
        ucs_trace_data("PUT_SHORT [zero-length]");
    }
    return UCS_OK;
}
Пример #19
0
ucs_status_t uct_sm_ep_put_short(uct_ep_h tl_ep, const void *buffer,
                                 unsigned length, uint64_t remote_addr,
                                 uct_rkey_t rkey)
{
    if (ucs_likely(length != 0)) {
        memcpy((void *)(rkey + remote_addr), buffer, length);
        uct_sm_ep_trace_data(remote_addr, rkey, "PUT_SHORT [buffer %p size %u]",
                             buffer, length);
    } else {
        ucs_trace_data("PUT_SHORT [zero-length]");
    }
    UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, SHORT, length);
    return UCS_OK;
}
Пример #20
0
ucs_status_ptr_t ucp_tag_send_nb(ucp_ep_h ep, const void *buffer, size_t count,
                                 uintptr_t datatype, ucp_tag_t tag,
                                 ucp_send_callback_t cb)
{
    ucs_status_t status;

    ucs_trace_req("send_nb buffer %p count %zu tag %"PRIx64" to %s cb %p",
                  buffer, count, tag, ucp_ep_peer_name(ep), cb);

    status = ucp_tag_send_try(ep, buffer, count, datatype, tag);
    if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) {
        return UCS_STATUS_PTR(status); /* UCS_OK also goes here */
    }

    return ucp_tag_send_slow(ep, buffer, count, datatype, tag, cb);
}
Пример #21
0
void uct_ugni_progress(void *arg)
{
    gni_cq_entry_t  event_data = 0;
    gni_post_descriptor_t *event_post_desc_ptr;
    uct_ugni_base_desc_t *desc;
    uct_ugni_iface_t * iface = (uct_ugni_iface_t *)arg;
    gni_return_t ugni_rc;

    ugni_rc = GNI_CqGetEvent(iface->local_cq, &event_data);
    if (GNI_RC_NOT_DONE == ugni_rc) {
        goto out;
    }

    if ((GNI_RC_SUCCESS != ugni_rc && !event_data) || GNI_CQ_OVERRUN(event_data)) {
        ucs_error("GNI_CqGetEvent falied. Error status %s %d ",
                  gni_err_str[ugni_rc], ugni_rc);
        return;
    }

    ugni_rc = GNI_GetCompleted(iface->local_cq, event_data, &event_post_desc_ptr);
    if (GNI_RC_SUCCESS != ugni_rc && GNI_RC_TRANSACTION_ERROR != ugni_rc) {
        ucs_error("GNI_GetCompleted falied. Error status %s %d %d",
                  gni_err_str[ugni_rc], ugni_rc, GNI_RC_TRANSACTION_ERROR);
        return;
    }

    desc = (uct_ugni_base_desc_t *)event_post_desc_ptr;
    ucs_trace_async("Completion received on %p", desc);

    if (NULL != desc->comp_cb) {
        uct_invoke_completion(desc->comp_cb, UCS_OK);
    }
    --iface->outstanding;
    --desc->ep->outstanding;

    if (ucs_likely(0 == desc->not_ready_to_free)) {
        ucs_mpool_put(desc);
    }

    uct_ugni_ep_check_flush(desc->ep);

out:
    /* have a go a processing the pending queue */
    ucs_arbiter_dispatch(&iface->arbiter, 1, uct_ugni_ep_process_pending, NULL);
    return;
}
Пример #22
0
ucs_status_t uct_rc_mlx5_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr,
                                     const void *payload, unsigned length)
{
    uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t);
    uct_rc_mlx5_ep_t *ep  = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t);
    ucs_status_t status;

    UCT_CHECK_AM_ID(id);
    UCT_RC_CHECK_FC_WND(iface, &ep->super, id);

    status = uct_rc_mlx5_ep_inline_post(ep, MLX5_OPCODE_SEND, payload, length,
                                        id, hdr, 0, 0);
    if (ucs_likely(status >= 0)) {
        UCT_TL_EP_STAT_OP(&ep->super.super, AM, SHORT, sizeof(hdr) + length);
        UCT_RC_UPDATE_FC_WND(&ep->super);
    }
    return status;
}
Пример #23
0
ucs_status_t ucp_tag_send(ucp_ep_h ep, const void *buffer, size_t length,
                          ucp_tag_t tag)
{
    ucp_worker_h worker = ep->worker;
    ucs_status_t status;

retry:
    if (ucs_likely(length < ep->config.max_short_tag)) {
        UCS_STATIC_ASSERT(sizeof(ucp_tag_t) == sizeof(uint64_t));
        status = uct_ep_am_short(ep->uct_ep, UCP_AM_ID_EAGER_ONLY, tag,
                                 buffer, length);
        if (status == UCS_ERR_NO_RESOURCE) {
            ucp_worker_progress(worker);
            goto retry;
        }
        return status;
    }

    ucs_fatal("unsupported");
}
Пример #24
0
static ucs_status_t ucp_progress_get_nbi(uct_pending_req_t *self)
{
    ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct);
    ucp_rkey_h rkey    = req->send.rma.rkey;
    ucp_ep_t *ep       = req->send.ep;
    ucp_ep_rma_config_t *rma_config;
    ucs_status_t status;
    uct_rkey_t uct_rkey;
    size_t frag_length;
    uct_ep_h uct_ep;

    UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, uct_ep, uct_rkey, rma_config);
    for (;;) {
        frag_length = ucs_min(rma_config->max_get_bcopy, req->send.length);
        status = uct_ep_get_bcopy(uct_ep,
                                  (uct_unpack_callback_t)memcpy,
                                  (void*)req->send.buffer,
                                  frag_length,
                                  req->send.rma.remote_addr,
                                  uct_rkey,
                                  NULL);
        if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) {
            /* Get was initiated */
            req->send.length -= frag_length;
            req->send.buffer += frag_length;
            req->send.rma.remote_addr += frag_length;
            if (req->send.length == 0) {
                /* Get was posted */
                ucp_request_complete(req, void);
                status = UCS_OK;
                break;
            }
        } else {
            /* Error - abort */
            break;
        }
    }

    return status;
}
Пример #25
0
void uct_ugni_progress(void *arg)
{
    gni_cq_entry_t  event_data = 0;
    gni_post_descriptor_t *event_post_desc_ptr;
    uct_ugni_base_desc_t *desc;
    uct_ugni_iface_t * iface = (uct_ugni_iface_t *)arg;
    gni_return_t ugni_rc;

    ugni_rc = GNI_CqGetEvent(iface->local_cq, &event_data);
    if (GNI_RC_NOT_DONE == ugni_rc) {
        return;
    }

    if ((GNI_RC_SUCCESS != ugni_rc && !event_data) || GNI_CQ_OVERRUN(event_data)) {
        ucs_error("GNI_CqGetEvent falied. Error status %s %d ",
                  gni_err_str[ugni_rc], ugni_rc);
        return;
    }

    ugni_rc = GNI_GetCompleted(iface->local_cq, event_data, &event_post_desc_ptr);
    if (GNI_RC_SUCCESS != ugni_rc && GNI_RC_TRANSACTION_ERROR != ugni_rc) {
        ucs_error("GNI_GetCompleted falied. Error status %s %d %d",
                  gni_err_str[ugni_rc], ugni_rc, GNI_RC_TRANSACTION_ERROR);
        return;
    }

    desc = (uct_ugni_base_desc_t *)event_post_desc_ptr;
    ucs_trace_async("Completion received on %p", desc);

    if (NULL != desc->comp_cb) {
        uct_invoke_completion(desc->comp_cb);
    }
    --iface->outstanding;
    --desc->ep->outstanding;

    if (ucs_likely(desc->not_ready_to_free == 0)) {
        ucs_mpool_put(desc);
    }
    return;
}
Пример #26
0
ucs_status_ptr_t ucp_tag_send_nb(ucp_ep_h ep, const void *buffer, size_t count,
                                 uintptr_t datatype, ucp_tag_t tag,
                                 ucp_send_callback_t cb)
{
    ucs_status_t status;
    ucp_request_t *req;

    ucs_trace_req("send_nb buffer %p count %zu tag %"PRIx64" to %s", buffer,
                  count, tag, ucp_ep_peer_name(ep));

    status = ucp_tag_send_try(ep, buffer, count, datatype, tag);
    if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) {
        return UCS_STATUS_PTR(status); /* UCS_OK also goes here */
    }

    req = ucs_mpool_get(&ep->worker->req_mp);
    if (req == NULL) {
        return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY);
    }

    VALGRIND_MAKE_MEM_DEFINED(req + 1, ep->worker->context->config.request.size);

    req->flags   = 0;
    req->cb.send = cb;

    status = ucp_tag_send_start_req(ep, buffer, count, datatype, tag, req);
    if (status != UCS_OK) {
        return UCS_STATUS_PTR(status); /* UCS_OK also goes here */
    }

    if (!(req->flags & UCP_REQUEST_FLAG_COMPLETED)) {
        ucp_ep_add_pending(ep, ep->uct_ep, req);
        ucp_worker_progress(ep->worker);
    }

    ucs_trace_req("send_nb returning request %p", req);
    return req + 1;
}
Пример #27
0
ucs_status_t uct_rc_mlx5_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header,
                                     unsigned header_length, const uct_iov_t *iov,
                                     size_t iovcnt, uct_completion_t *comp)
{
    uct_rc_mlx5_ep_t *ep  = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t);
    uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t);
    ucs_status_t status;

    UCT_CHECK_IOV_SIZE(iovcnt, UCT_IB_MLX5_AM_ZCOPY_MAX_IOV,
                       "uct_rc_mlx5_ep_am_zcopy");
    UCT_RC_MLX5_CHECK_AM_ZCOPY(id, header_length, uct_iov_total_length(iov, iovcnt),
                               iface->super.config.seg_size, 0);
    UCT_RC_CHECK_FC_WND(iface, &ep->super, id);

    status = uct_rc_mlx5_ep_zcopy_post(ep, MLX5_OPCODE_SEND, iov, iovcnt,
                                       id, header, header_length, 0, 0, 0, comp);
    if (ucs_likely(status >= 0)) {
        UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY,
                          header_length + uct_iov_total_length(iov, iovcnt));
        UCT_RC_UPDATE_FC_WND(iface, &ep->super, id);
    }
    return status;
}
Пример #28
0
ucs_status_t uct_dc_mlx5_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr,
                                     const void *buffer, unsigned length)
{
#if HAVE_IBV_EXP_DM
    uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t);
    uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t);
    ucs_status_t status;
    uct_rc_mlx5_dm_copy_data_t cache;

    if (ucs_likely((sizeof(uct_rc_mlx5_am_short_hdr_t) + length <=
                    UCT_IB_MLX5_AM_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE)) ||
                   !iface->super.dm.dm)) {
#endif
        return uct_dc_mlx5_ep_am_short_inline(tl_ep, id, hdr, buffer, length);
#if HAVE_IBV_EXP_DM
    }

    UCT_CHECK_AM_ID(id);
    UCT_CHECK_LENGTH(length + sizeof(uct_rc_mlx5_am_short_hdr_t), 0,
                     iface->super.dm.seg_len, "am_short");
    UCT_DC_CHECK_RES_AND_FC(iface, ep);

    uct_rc_mlx5_am_hdr_fill(&cache.am_hdr.rc_hdr, id);
    cache.am_hdr.am_hdr = hdr;

    status = uct_dc_mlx5_ep_short_dm(ep, &cache, sizeof(cache.am_hdr), buffer, length,
                                     MLX5_OPCODE_SEND,
                                     MLX5_WQE_CTRL_SOLICITED | MLX5_WQE_CTRL_CQ_UPDATE,
                                     0, 0);
    if (UCS_STATUS_IS_ERR(status)) {
        return status;
    }
    UCT_TL_EP_STAT_OP(&ep->super, AM, SHORT, sizeof(cache.am_hdr) + length);
    UCT_RC_UPDATE_FC_WND(&iface->super.super, &ep->fc);
    return UCS_OK;
#endif
}
Пример #29
0
static UCS_F_ALWAYS_INLINE void
ucp_tag_recv_common(ucp_worker_h worker, void *buffer, size_t count,
                    uintptr_t datatype, ucp_tag_t tag, ucp_tag_t tag_mask,
                    ucp_request_t *req, uint32_t req_flags, ucp_tag_recv_callback_t cb,
                    ucp_recv_desc_t *rdesc, const char *debug_name)
{
    unsigned common_flags = UCP_REQUEST_FLAG_RECV | UCP_REQUEST_FLAG_EXPECTED;
    ucp_eager_first_hdr_t *eagerf_hdr;
    ucp_request_queue_t *req_queue;
    uct_memory_type_t mem_type;
    size_t hdr_len, recv_len;
    ucs_status_t status;
    uint64_t msg_id;

    ucp_trace_req(req, "%s buffer %p dt 0x%lx count %zu tag %"PRIx64"/%"PRIx64,
                  debug_name, buffer, datatype, count, tag, tag_mask);

    /* First, check the fast path case - single fragment
     * in this case avoid initializing most of request fields
     * */
    if (ucs_likely((rdesc != NULL) && (rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_ONLY))) {
        UCS_PROFILE_REQUEST_EVENT(req, "eager_only_match", 0);
        UCP_WORKER_STAT_EAGER_MSG(worker, rdesc->flags);
        UCP_WORKER_STAT_EAGER_CHUNK(worker, UNEXP);

        if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_SYNC)) {
            ucp_tag_eager_sync_send_ack(worker, rdesc + 1, rdesc->flags);
        }

        req->flags                    = UCP_REQUEST_FLAG_RECV | req_flags;
        hdr_len                       = rdesc->payload_offset;
        recv_len                      = rdesc->length - hdr_len;
        req->recv.tag.info.sender_tag = ucp_rdesc_get_tag(rdesc);
        req->recv.tag.info.length     = recv_len;

        ucp_memory_type_detect_mds(worker->context, buffer, recv_len, &mem_type);

        status = ucp_dt_unpack_only(worker, buffer, count, datatype, mem_type,
                                    (void*)(rdesc + 1) + hdr_len, recv_len, 1);
        ucp_recv_desc_release(rdesc);

        if (req_flags & UCP_REQUEST_FLAG_CALLBACK) {
            cb(req + 1, status, &req->recv.tag.info);
        }
        ucp_tag_recv_request_completed(req, status, &req->recv.tag.info,
                                       debug_name);
        return;
    }

    /* Initialize receive request */
    req->status             = UCS_OK;
    req->recv.worker        = worker;
    req->recv.buffer        = buffer;
    req->recv.datatype      = datatype;

    ucp_dt_recv_state_init(&req->recv.state, buffer, datatype, count);

    if (!UCP_DT_IS_CONTIG(datatype)) {
        common_flags       |= UCP_REQUEST_FLAG_BLOCK_OFFLOAD;
    }

    req->flags              = common_flags | req_flags;
    req->recv.length        = ucp_dt_length(datatype, count, buffer,
                                            &req->recv.state);

    ucp_memory_type_detect_mds(worker->context, buffer, req->recv.length, &mem_type);

    req->recv.mem_type      = mem_type;
    req->recv.tag.tag       = tag;
    req->recv.tag.tag_mask  = tag_mask;
    req->recv.tag.cb        = cb;
    if (ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_REQ)) {
        req->recv.tag.info.sender_tag = 0;
    }

    if (ucs_unlikely(rdesc == NULL)) {
        /* If not found on unexpected, wait until it arrives.
         * If was found but need this receive request for later completion, save it */
        req_queue = ucp_tag_exp_get_queue(&worker->tm, tag, tag_mask);

        /* If offload supported, post this tag to transport as well.
         * TODO: need to distinguish the cases when posting is not needed. */
        ucp_tag_offload_try_post(worker, req, req_queue);

        ucp_tag_exp_push(&worker->tm, req_queue, req);

        ucs_trace_req("%s returning expected request %p (%p)", debug_name, req,
                      req + 1);
        return;
    }

    /* Check rendezvous case */
    if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_RNDV)) {
        ucp_rndv_matched(worker, req, (void*)(rdesc + 1));
        UCP_WORKER_STAT_RNDV(worker, UNEXP);
        ucp_recv_desc_release(rdesc);
        return;
    }

    if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_SYNC)) {
        ucp_tag_eager_sync_send_ack(worker, rdesc + 1, rdesc->flags);
    }

    UCP_WORKER_STAT_EAGER_MSG(worker, rdesc->flags);
    ucs_assert(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER);
    eagerf_hdr                    = (void*)(rdesc + 1);
    req->recv.tag.info.sender_tag = ucp_rdesc_get_tag(rdesc);
    req->recv.tag.info.length     =
    req->recv.tag.remaining       = eagerf_hdr->total_len;

    /* process first fragment */
    UCP_WORKER_STAT_EAGER_CHUNK(worker, UNEXP);
    msg_id = eagerf_hdr->msg_id;
    status = ucp_tag_recv_request_process_rdesc(req, rdesc, 0);
    ucs_assert(status == UCS_INPROGRESS);

    /* process additional fragments */
    ucp_tag_frag_list_process_queue(&worker->tm, req, msg_id
                                    UCS_STATS_ARG(UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_UNEXP));
}
Пример #30
0
/* Endpoint operations */
static inline void uct_ugni_invoke_orig_comp(uct_ugni_rdma_fetch_desc_t *fma, ucs_status_t status)
{
    if (ucs_likely(NULL != fma->orig_comp_cb)) {
        uct_invoke_completion(fma->orig_comp_cb, status);
    }
}