ucs_status_t ucp_put(ucp_ep_h ep, const void *buffer, size_t length, uint64_t remote_addr, ucp_rkey_h rkey) { ucp_ep_rma_config_t *rma_config; ucs_status_t status; uct_rkey_t uct_rkey; size_t frag_length; ssize_t packed_len; ucp_lane_index_t lane; UCP_RMA_CHECK_PARAMS(buffer, length); /* Loop until all message has been sent. * We re-check the configuration on every iteration, because it can be * changed by transport switch. */ for (;;) { UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, lane, uct_rkey, rma_config); if (length <= rma_config->max_put_short) { status = uct_ep_put_short(ep->uct_eps[lane], buffer, length, remote_addr, uct_rkey); if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { break; } } else { if (length <= ucp_ep_config(ep)->bcopy_thresh) { frag_length = ucs_min(length, rma_config->max_put_short); status = uct_ep_put_short(ep->uct_eps[lane], buffer, frag_length, remote_addr, uct_rkey); } else { ucp_memcpy_pack_context_t pack_ctx; pack_ctx.src = buffer; pack_ctx.length = frag_length = ucs_min(length, rma_config->max_put_bcopy); packed_len = uct_ep_put_bcopy(ep->uct_eps[lane], ucp_memcpy_pack, &pack_ctx, remote_addr, uct_rkey); status = (packed_len > 0) ? UCS_OK : (ucs_status_t)packed_len; } if (ucs_likely(status == UCS_OK)) { length -= frag_length; if (length == 0) { break; } buffer += frag_length; remote_addr += frag_length; } else if (status != UCS_ERR_NO_RESOURCE) { break; } } ucp_worker_progress(ep->worker); } return status; }
ucs_status_t ucp_rma_put(ucp_ep_h ep, const void *buffer, size_t length, uint64_t remote_addr, ucp_rkey_h rkey) { ucs_status_t status; uct_rkey_t uct_rkey; size_t frag_length; if (length == 0) { return UCS_OK; } if (ENABLE_PARAMS_CHECK && (buffer == NULL)) { return UCS_ERR_INVALID_PARAM; } uct_rkey = UCP_RMA_RKEY_LOOKUP(ep, rkey); /* Loop until all message has been sent. * We re-check the configuration on every iteration, because it can be * changed by transport switch. */ for (;;) { if (length <= ep->config.max_short_put) { status = uct_ep_put_short(ep->uct.ep, buffer, length, remote_addr, uct_rkey); if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { break; } } else { if (length <= ep->worker->context->config.bcopy_thresh) { frag_length = ucs_min(length, ep->config.max_short_put); status = uct_ep_put_short(ep->uct.ep, buffer, frag_length, remote_addr, uct_rkey); } else { frag_length = ucs_min(length, ep->config.max_bcopy_put); status = uct_ep_put_bcopy(ep->uct.ep, (uct_pack_callback_t)memcpy, (void*)buffer, frag_length, remote_addr, uct_rkey); } if (ucs_likely(status == UCS_OK)) { length -= frag_length; if (length == 0) { break; } buffer += frag_length; remote_addr += frag_length; } else if (status != UCS_ERR_NO_RESOURCE) { break; } } ucp_worker_progress(ep->worker); } return status; }
static UCS_F_ALWAYS_INLINE ucs_status_t ucp_tag_send_try(ucp_ep_h ep, const void *buffer, size_t count, ucp_datatype_t datatype, ucp_tag_t tag) { size_t length; if (ucs_likely((datatype & UCP_DATATYPE_CLASS_MASK) == UCP_DATATYPE_CONTIG)) { length = ucp_contig_dt_length(datatype, count); if (ucs_likely(length <= ep->config.max_short_egr)) { return ucp_tag_send_eager_short(ep, tag, buffer, length); } } return UCS_ERR_NO_RESOURCE; /* Fallback to slower progress */ }
static inline ucs_status_t uct_mm_iface_process_recv(uct_mm_iface_t *iface, uct_mm_fifo_element_t* elem) { ucs_status_t status; void *data; if (ucs_likely(elem->flags & UCT_MM_FIFO_ELEM_FLAG_INLINE)) { /* read short (inline) messages from the FIFO elements */ uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_RECV, elem->am_id, elem + 1, elem->length, "RX: AM_SHORT"); status = uct_mm_iface_invoke_am(iface, elem->am_id, elem + 1, elem->length, 0); } else { /* read bcopy messages from the receive descriptors */ VALGRIND_MAKE_MEM_DEFINED(elem->desc_chunk_base_addr + elem->desc_offset, elem->length); data = elem->desc_chunk_base_addr + elem->desc_offset; uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_RECV, elem->am_id, data, elem->length, "RX: AM_BCOPY"); status = uct_mm_iface_invoke_am(iface, elem->am_id, data, elem->length, UCT_CB_FLAG_DESC); if (status != UCS_OK) { /* assign a new receive descriptor to this FIFO element.*/ uct_mm_assign_desc_to_fifo_elem(iface, elem, 0); } } return status; }
ucs_status_t uct_rc_mlx5_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, unsigned header_length, const void *payload, size_t length, uct_mem_h memh, uct_completion_t *comp) { uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t); ucs_status_t status; UCT_CHECK_AM_ID(id); UCT_RC_CHECK_FC_WND(iface, &ep->super, id); UCT_CHECK_LENGTH(sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_data_seg) + sizeof(struct mlx5_wqe_inl_data_seg) + sizeof(uct_rc_hdr_t) + header_length, UCT_RC_MLX5_MAX_BB * MLX5_SEND_WQE_BB, "am zcopy"); UCT_CHECK_LENGTH(header_length + length + sizeof(uct_rc_hdr_t), ucs_derived_of(tl_ep->iface, uct_ib_iface_t)->config.seg_size, "am_zcopy"); UCT_CHECK_LENGTH(header_length + length, UCT_IB_MAX_MESSAGE_SIZE, "am_zcopy"); status = uct_rc_mlx5_ep_zcopy_post(ep, MLX5_OPCODE_SEND, payload, length, memh, id, header, header_length, 0, 0, 0, comp); if (ucs_likely(status >= 0)) { UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY, header_length + length); UCT_RC_UPDATE_FC_WND(&ep->super); } return status; }
ucs_status_t uct_dc_mlx5_ep_put_short(uct_ep_h tl_ep, const void *payload, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { #if HAVE_IBV_EXP_DM uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); ucs_status_t status; if (ucs_likely((length <= UCT_IB_MLX5_PUT_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE)) || !iface->super.dm.dm)) { #endif return uct_dc_mlx5_ep_put_short_inline(tl_ep, payload, length, remote_addr, rkey); #if HAVE_IBV_EXP_DM } UCT_CHECK_LENGTH(length, 0, iface->super.dm.seg_len, "put_short"); UCT_DC_MLX5_CHECK_RES(iface, ep); status = uct_dc_mlx5_ep_short_dm(ep, NULL, 0, payload, length, MLX5_OPCODE_RDMA_WRITE, MLX5_WQE_CTRL_CQ_UPDATE, remote_addr, rkey); if (UCS_STATUS_IS_ERR(status)) { return status; } UCT_TL_EP_STAT_OP(&ep->super, PUT, SHORT, length); return UCS_OK; #endif }
static ucs_arbiter_cb_result_t uct_dc_ep_abriter_purge_cb(ucs_arbiter_t *arbiter, ucs_arbiter_elem_t *elem, void *arg) { uct_purge_cb_args_t *cb_args = arg; uct_pending_purge_callback_t cb = cb_args->cb; uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); uct_rc_fc_request_t *freq = ucs_derived_of(req, uct_rc_fc_request_t); uct_dc_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_dc_ep_t, arb_group); if (ucs_likely(req->func != uct_dc_iface_fc_grant)){ if (cb != NULL) { cb(req, cb_args->arg); } else { ucs_warn("ep=%p cancelling user pending request %p", ep, req); } } else { /* User callback should not be called for FC messages. * Just return pending request memory to the pool */ ucs_mpool_put(freq); } return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; }
ucs_status_t uct_dc_mlx5_ep_tag_eager_short(uct_ep_h tl_ep, uct_tag_t tag, const void *data, size_t length) { #if HAVE_IBV_EXP_DM uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); uct_rc_mlx5_dm_copy_data_t cache; ucs_status_t status; if (ucs_likely((sizeof(struct ibv_exp_tmh) + length <= UCT_IB_MLX5_AM_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE)) || !iface->super.dm.dm)) { #endif return uct_dc_mlx5_ep_tag_eager_short_inline(tl_ep, tag, data, length); #if HAVE_IBV_EXP_DM } UCT_CHECK_LENGTH(length + sizeof(struct ibv_exp_tmh), 0, iface->super.dm.seg_len, "tag_short"); UCT_DC_MLX5_CHECK_RES(iface, ep); uct_rc_mlx5_fill_tmh(ucs_unaligned_ptr(&cache.tm_hdr), tag, 0, IBV_EXP_TMH_EAGER); status = uct_dc_mlx5_ep_short_dm(ep, &cache, sizeof(cache.tm_hdr), data, length, MLX5_OPCODE_SEND, MLX5_WQE_CTRL_SOLICITED | MLX5_WQE_CTRL_CQ_UPDATE, 0, 0); if (!UCS_STATUS_IS_ERR(status)) { UCT_TL_EP_STAT_OP(&ep->super, TAG, SHORT, length); } return status; #endif }
static UCS_F_ALWAYS_INLINE void ucp_request_release_common(void *request, uint8_t cb_flag, const char *debug_name) { ucp_request_t *req = (ucp_request_t*)request - 1; ucp_worker_h UCS_V_UNUSED worker = ucs_container_of(ucs_mpool_obj_owner(req), ucp_worker_t, req_mp); uint16_t flags; UCP_THREAD_CS_ENTER_CONDITIONAL(&worker->mt_lock); flags = req->flags; ucs_trace_req("%s request %p (%p) "UCP_REQUEST_FLAGS_FMT, debug_name, req, req + 1, UCP_REQUEST_FLAGS_ARG(flags)); ucs_assert(!(flags & UCP_REQUEST_DEBUG_FLAG_EXTERNAL)); ucs_assert(!(flags & UCP_REQUEST_FLAG_RELEASED)); if (ucs_likely(flags & UCP_REQUEST_FLAG_COMPLETED)) { ucp_request_put(req); } else { req->flags = (flags | UCP_REQUEST_FLAG_RELEASED) & ~cb_flag; } UCP_THREAD_CS_EXIT_CONDITIONAL(&worker->mt_lock); }
ucs_status_t ucp_get_nbi(ucp_ep_h ep, void *buffer, size_t length, uint64_t remote_addr, ucp_rkey_h rkey) { ucp_ep_rma_config_t *rma_config; ucp_lane_index_t lane; uct_rkey_t uct_rkey; ucs_status_t status; UCP_RMA_CHECK_PARAMS(buffer, length); UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, lane, uct_rkey, rma_config); if (length <= rma_config->max_get_bcopy) { status = uct_ep_get_bcopy(ep->uct_eps[lane], (uct_unpack_callback_t)memcpy, (void*)buffer, length, remote_addr, uct_rkey, NULL); if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { /* Return on error or success */ return status; } } ucp_rma_start_nbi(ep, buffer, length, remote_addr, rkey, ucp_progress_get_nbi); return UCS_INPROGRESS; }
ucs_status_t ucp_put_nbi(ucp_ep_h ep, const void *buffer, size_t length, uint64_t remote_addr, ucp_rkey_h rkey) { ucp_ep_rma_config_t *rma_config; ucp_lane_index_t lane; uct_rkey_t uct_rkey; ucs_status_t status; UCP_RMA_CHECK_PARAMS(buffer, length); UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, lane, uct_rkey, rma_config); /* Fast path for a single short message */ if (length <= rma_config->max_put_short) { status = uct_ep_put_short(ep->uct_eps[lane], buffer, length, remote_addr, uct_rkey); if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { /* Return on error or success */ return status; } } ucp_rma_start_nbi(ep, buffer, length, remote_addr, rkey, ucp_progress_put_nbi); return UCS_INPROGRESS; }
ucs_status_ptr_t ucp_tag_send_nb(ucp_ep_h ep, const void *buffer, size_t count, uintptr_t datatype, ucp_tag_t tag, ucp_send_callback_t cb) { ucs_status_t status; ucp_request_t *req; size_t length; ucs_trace_req("send_nb buffer %p count %zu tag %"PRIx64" to %s cb %p", buffer, count, tag, ucp_ep_peer_name(ep), cb); if (ucs_likely((datatype & UCP_DATATYPE_CLASS_MASK) == UCP_DATATYPE_CONTIG)) { length = ucp_contig_dt_length(datatype, count); UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX, "ucp_tag_send_nb (eager - start)", buffer, length); if (ucs_likely(length <= ucp_ep_config(ep)->max_eager_short)) { status = ucp_tag_send_eager_short(ep, tag, buffer, length); if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX, "ucp_tag_send_nb (eager - finish)", buffer, length); return UCS_STATUS_PTR(status); /* UCS_OK also goes here */ } } } req = ucp_request_get(ep->worker); if (req == NULL) { return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); } UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX, "ucp_tag_send_nb", req, ucp_dt_length(datatype, count, buffer, &req->send.state)); ucp_tag_send_req_init(req, ep, buffer, datatype, tag); return ucp_tag_send_req(req, count, ucp_ep_config(ep)->max_eager_short, ucp_ep_config(ep)->zcopy_thresh, ucp_ep_config(ep)->rndv_thresh, cb, &ucp_tag_eager_proto); }
ucs_status_t ucp_rma_get(ucp_ep_h ep, void *buffer, size_t length, uint64_t remote_addr, ucp_rkey_h rkey) { uct_completion_t comp; ucs_status_t status; uct_rkey_t uct_rkey; size_t frag_length; if (length == 0) { return UCS_OK; } uct_rkey = UCP_RMA_RKEY_LOOKUP(ep, rkey); comp.count = 1; for (;;) { /* Push out all fragments, and request completion only for the last * fragment. */ frag_length = ucs_min(ep->config.max_bcopy_get, length); status = uct_ep_get_bcopy(ep->uct.ep, (uct_unpack_callback_t)memcpy, (void*)buffer, frag_length, remote_addr, uct_rkey, &comp); if (ucs_likely(status == UCS_OK)) { goto posted; } else if (status == UCS_INPROGRESS) { ++comp.count; goto posted; } else if (status == UCS_ERR_NO_RESOURCE) { goto retry; } else { return status; } posted: length -= frag_length; if (length == 0) { break; } buffer += frag_length; remote_addr += frag_length; retry: ucp_worker_progress(ep->worker); } /* coverity[loop_condition] */ while (comp.count > 1) { ucp_worker_progress(ep->worker); }; return UCS_OK; }
static ucs_status_t ucp_progress_put_nbi(uct_pending_req_t *self) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_rkey_h rkey = req->send.rma.rkey; ucp_ep_t *ep = req->send.ep; ucp_ep_rma_config_t *rma_config; ucs_status_t status; uct_rkey_t uct_rkey; ssize_t packed_len; uct_ep_h uct_ep; UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, uct_ep, uct_rkey, rma_config); for (;;) { if (req->send.length <= ep->worker->context->config.ext.bcopy_thresh) { /* Should be replaced with bcopy */ packed_len = ucs_min(req->send.length, rma_config->max_put_short); status = uct_ep_put_short(uct_ep, req->send.buffer, packed_len, req->send.rma.remote_addr, uct_rkey); } else { /* We don't do it right now, but in future we have to add * an option to use zcopy */ ucp_memcpy_pack_context_t pack_ctx; pack_ctx.src = req->send.buffer; pack_ctx.length = ucs_min(req->send.length, rma_config->max_put_bcopy); packed_len = uct_ep_put_bcopy(uct_ep, ucp_memcpy_pack, &pack_ctx, req->send.rma.remote_addr, uct_rkey); status = (packed_len > 0) ? UCS_OK : (ucs_status_t)packed_len; } if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) { req->send.length -= packed_len; if (req->send.length == 0) { ucp_request_complete(req, void); break; } req->send.buffer += packed_len; req->send.rma.remote_addr += packed_len; } else { break; } } return status; }
ucs_status_t uct_mm_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey) { if (ucs_likely(length != 0)) { pack_cb((void *)(rkey + remote_addr), arg, length); uct_mm_trace_data(remote_addr, rkey, "PUT_BCOPY [size %zu]", length); } else { ucs_trace_data("PUT_BCOPY [zero-length]"); } return UCS_OK; }
ucs_status_t ucp_get_nbi(ucp_ep_h ep, void *buffer, size_t length, uint64_t remote_addr, ucp_rkey_h rkey) { ucp_ep_rma_config_t *rma_config; ucs_status_t status; uct_rkey_t uct_rkey; size_t frag_length; uct_ep_h uct_ep; UCP_RMA_CHECK_PARAMS(buffer, length); for (;;) { UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, uct_ep, uct_rkey, rma_config); frag_length = ucs_min(rma_config->max_get_bcopy, length); status = uct_ep_get_bcopy(uct_ep, (uct_unpack_callback_t)memcpy, (void*)buffer, frag_length, remote_addr, uct_rkey, NULL); if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) { /* Get was initiated */ length -= frag_length; buffer += frag_length; remote_addr += frag_length; if (length == 0) { break; } } else if (ucs_unlikely(status == UCS_ERR_NO_RESOURCE)) { /* Out of resources - adding request for later schedule */ ucp_request_t *req; req = ucs_mpool_get_inline(&ep->worker->req_mp); if (req == NULL) { /* can't allocate memory for request - abort */ status = UCS_ERR_NO_MEMORY; break; } ucp_add_pending_rma(req, ep, uct_ep, buffer, length, remote_addr, rkey, ucp_progress_get_nbi); /* Mark it as in progress */ status = UCS_INPROGRESS; break; } else { /* Error */ break; } } return status; }
ucs_status_t uct_mm_ep_get_bcopy(uct_ep_h tl_ep, uct_unpack_callback_t unpack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { if (ucs_likely(0 != length)) { unpack_cb(arg, (void *)(rkey + remote_addr), length); uct_mm_trace_data(remote_addr, rkey, "GET_BCOPY [length %zu]", length); } else { ucs_trace_data("GET_BCOPY [zero-length]"); } return UCS_OK; }
ucs_status_t uct_mm_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { if (ucs_likely(length != 0)) { memcpy((void *)(rkey + remote_addr), buffer, length); uct_mm_trace_data(remote_addr, rkey, "PUT_SHORT [buffer %p size %u]", buffer, length); } else { ucs_trace_data("PUT_SHORT [zero-length]"); } return UCS_OK; }
ucs_status_t uct_sm_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { if (ucs_likely(length != 0)) { memcpy((void *)(rkey + remote_addr), buffer, length); uct_sm_ep_trace_data(remote_addr, rkey, "PUT_SHORT [buffer %p size %u]", buffer, length); } else { ucs_trace_data("PUT_SHORT [zero-length]"); } UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, SHORT, length); return UCS_OK; }
ucs_status_ptr_t ucp_tag_send_nb(ucp_ep_h ep, const void *buffer, size_t count, uintptr_t datatype, ucp_tag_t tag, ucp_send_callback_t cb) { ucs_status_t status; ucs_trace_req("send_nb buffer %p count %zu tag %"PRIx64" to %s cb %p", buffer, count, tag, ucp_ep_peer_name(ep), cb); status = ucp_tag_send_try(ep, buffer, count, datatype, tag); if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { return UCS_STATUS_PTR(status); /* UCS_OK also goes here */ } return ucp_tag_send_slow(ep, buffer, count, datatype, tag, cb); }
void uct_ugni_progress(void *arg) { gni_cq_entry_t event_data = 0; gni_post_descriptor_t *event_post_desc_ptr; uct_ugni_base_desc_t *desc; uct_ugni_iface_t * iface = (uct_ugni_iface_t *)arg; gni_return_t ugni_rc; ugni_rc = GNI_CqGetEvent(iface->local_cq, &event_data); if (GNI_RC_NOT_DONE == ugni_rc) { goto out; } if ((GNI_RC_SUCCESS != ugni_rc && !event_data) || GNI_CQ_OVERRUN(event_data)) { ucs_error("GNI_CqGetEvent falied. Error status %s %d ", gni_err_str[ugni_rc], ugni_rc); return; } ugni_rc = GNI_GetCompleted(iface->local_cq, event_data, &event_post_desc_ptr); if (GNI_RC_SUCCESS != ugni_rc && GNI_RC_TRANSACTION_ERROR != ugni_rc) { ucs_error("GNI_GetCompleted falied. Error status %s %d %d", gni_err_str[ugni_rc], ugni_rc, GNI_RC_TRANSACTION_ERROR); return; } desc = (uct_ugni_base_desc_t *)event_post_desc_ptr; ucs_trace_async("Completion received on %p", desc); if (NULL != desc->comp_cb) { uct_invoke_completion(desc->comp_cb, UCS_OK); } --iface->outstanding; --desc->ep->outstanding; if (ucs_likely(0 == desc->not_ready_to_free)) { ucs_mpool_put(desc); } uct_ugni_ep_check_flush(desc->ep); out: /* have a go a processing the pending queue */ ucs_arbiter_dispatch(&iface->arbiter, 1, uct_ugni_ep_process_pending, NULL); return; }
ucs_status_t uct_rc_mlx5_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, const void *payload, unsigned length) { uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t); uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); ucs_status_t status; UCT_CHECK_AM_ID(id); UCT_RC_CHECK_FC_WND(iface, &ep->super, id); status = uct_rc_mlx5_ep_inline_post(ep, MLX5_OPCODE_SEND, payload, length, id, hdr, 0, 0); if (ucs_likely(status >= 0)) { UCT_TL_EP_STAT_OP(&ep->super.super, AM, SHORT, sizeof(hdr) + length); UCT_RC_UPDATE_FC_WND(&ep->super); } return status; }
ucs_status_t ucp_tag_send(ucp_ep_h ep, const void *buffer, size_t length, ucp_tag_t tag) { ucp_worker_h worker = ep->worker; ucs_status_t status; retry: if (ucs_likely(length < ep->config.max_short_tag)) { UCS_STATIC_ASSERT(sizeof(ucp_tag_t) == sizeof(uint64_t)); status = uct_ep_am_short(ep->uct_ep, UCP_AM_ID_EAGER_ONLY, tag, buffer, length); if (status == UCS_ERR_NO_RESOURCE) { ucp_worker_progress(worker); goto retry; } return status; } ucs_fatal("unsupported"); }
static ucs_status_t ucp_progress_get_nbi(uct_pending_req_t *self) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_rkey_h rkey = req->send.rma.rkey; ucp_ep_t *ep = req->send.ep; ucp_ep_rma_config_t *rma_config; ucs_status_t status; uct_rkey_t uct_rkey; size_t frag_length; uct_ep_h uct_ep; UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, uct_ep, uct_rkey, rma_config); for (;;) { frag_length = ucs_min(rma_config->max_get_bcopy, req->send.length); status = uct_ep_get_bcopy(uct_ep, (uct_unpack_callback_t)memcpy, (void*)req->send.buffer, frag_length, req->send.rma.remote_addr, uct_rkey, NULL); if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) { /* Get was initiated */ req->send.length -= frag_length; req->send.buffer += frag_length; req->send.rma.remote_addr += frag_length; if (req->send.length == 0) { /* Get was posted */ ucp_request_complete(req, void); status = UCS_OK; break; } } else { /* Error - abort */ break; } } return status; }
void uct_ugni_progress(void *arg) { gni_cq_entry_t event_data = 0; gni_post_descriptor_t *event_post_desc_ptr; uct_ugni_base_desc_t *desc; uct_ugni_iface_t * iface = (uct_ugni_iface_t *)arg; gni_return_t ugni_rc; ugni_rc = GNI_CqGetEvent(iface->local_cq, &event_data); if (GNI_RC_NOT_DONE == ugni_rc) { return; } if ((GNI_RC_SUCCESS != ugni_rc && !event_data) || GNI_CQ_OVERRUN(event_data)) { ucs_error("GNI_CqGetEvent falied. Error status %s %d ", gni_err_str[ugni_rc], ugni_rc); return; } ugni_rc = GNI_GetCompleted(iface->local_cq, event_data, &event_post_desc_ptr); if (GNI_RC_SUCCESS != ugni_rc && GNI_RC_TRANSACTION_ERROR != ugni_rc) { ucs_error("GNI_GetCompleted falied. Error status %s %d %d", gni_err_str[ugni_rc], ugni_rc, GNI_RC_TRANSACTION_ERROR); return; } desc = (uct_ugni_base_desc_t *)event_post_desc_ptr; ucs_trace_async("Completion received on %p", desc); if (NULL != desc->comp_cb) { uct_invoke_completion(desc->comp_cb); } --iface->outstanding; --desc->ep->outstanding; if (ucs_likely(desc->not_ready_to_free == 0)) { ucs_mpool_put(desc); } return; }
ucs_status_ptr_t ucp_tag_send_nb(ucp_ep_h ep, const void *buffer, size_t count, uintptr_t datatype, ucp_tag_t tag, ucp_send_callback_t cb) { ucs_status_t status; ucp_request_t *req; ucs_trace_req("send_nb buffer %p count %zu tag %"PRIx64" to %s", buffer, count, tag, ucp_ep_peer_name(ep)); status = ucp_tag_send_try(ep, buffer, count, datatype, tag); if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { return UCS_STATUS_PTR(status); /* UCS_OK also goes here */ } req = ucs_mpool_get(&ep->worker->req_mp); if (req == NULL) { return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); } VALGRIND_MAKE_MEM_DEFINED(req + 1, ep->worker->context->config.request.size); req->flags = 0; req->cb.send = cb; status = ucp_tag_send_start_req(ep, buffer, count, datatype, tag, req); if (status != UCS_OK) { return UCS_STATUS_PTR(status); /* UCS_OK also goes here */ } if (!(req->flags & UCP_REQUEST_FLAG_COMPLETED)) { ucp_ep_add_pending(ep, ep->uct_ep, req); ucp_worker_progress(ep->worker); } ucs_trace_req("send_nb returning request %p", req); return req + 1; }
ucs_status_t uct_rc_mlx5_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, unsigned header_length, const uct_iov_t *iov, size_t iovcnt, uct_completion_t *comp) { uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t); ucs_status_t status; UCT_CHECK_IOV_SIZE(iovcnt, UCT_IB_MLX5_AM_ZCOPY_MAX_IOV, "uct_rc_mlx5_ep_am_zcopy"); UCT_RC_MLX5_CHECK_AM_ZCOPY(id, header_length, uct_iov_total_length(iov, iovcnt), iface->super.config.seg_size, 0); UCT_RC_CHECK_FC_WND(iface, &ep->super, id); status = uct_rc_mlx5_ep_zcopy_post(ep, MLX5_OPCODE_SEND, iov, iovcnt, id, header, header_length, 0, 0, 0, comp); if (ucs_likely(status >= 0)) { UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY, header_length + uct_iov_total_length(iov, iovcnt)); UCT_RC_UPDATE_FC_WND(iface, &ep->super, id); } return status; }
ucs_status_t uct_dc_mlx5_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, const void *buffer, unsigned length) { #if HAVE_IBV_EXP_DM uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); ucs_status_t status; uct_rc_mlx5_dm_copy_data_t cache; if (ucs_likely((sizeof(uct_rc_mlx5_am_short_hdr_t) + length <= UCT_IB_MLX5_AM_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE)) || !iface->super.dm.dm)) { #endif return uct_dc_mlx5_ep_am_short_inline(tl_ep, id, hdr, buffer, length); #if HAVE_IBV_EXP_DM } UCT_CHECK_AM_ID(id); UCT_CHECK_LENGTH(length + sizeof(uct_rc_mlx5_am_short_hdr_t), 0, iface->super.dm.seg_len, "am_short"); UCT_DC_CHECK_RES_AND_FC(iface, ep); uct_rc_mlx5_am_hdr_fill(&cache.am_hdr.rc_hdr, id); cache.am_hdr.am_hdr = hdr; status = uct_dc_mlx5_ep_short_dm(ep, &cache, sizeof(cache.am_hdr), buffer, length, MLX5_OPCODE_SEND, MLX5_WQE_CTRL_SOLICITED | MLX5_WQE_CTRL_CQ_UPDATE, 0, 0); if (UCS_STATUS_IS_ERR(status)) { return status; } UCT_TL_EP_STAT_OP(&ep->super, AM, SHORT, sizeof(cache.am_hdr) + length); UCT_RC_UPDATE_FC_WND(&iface->super.super, &ep->fc); return UCS_OK; #endif }
static UCS_F_ALWAYS_INLINE void ucp_tag_recv_common(ucp_worker_h worker, void *buffer, size_t count, uintptr_t datatype, ucp_tag_t tag, ucp_tag_t tag_mask, ucp_request_t *req, uint32_t req_flags, ucp_tag_recv_callback_t cb, ucp_recv_desc_t *rdesc, const char *debug_name) { unsigned common_flags = UCP_REQUEST_FLAG_RECV | UCP_REQUEST_FLAG_EXPECTED; ucp_eager_first_hdr_t *eagerf_hdr; ucp_request_queue_t *req_queue; uct_memory_type_t mem_type; size_t hdr_len, recv_len; ucs_status_t status; uint64_t msg_id; ucp_trace_req(req, "%s buffer %p dt 0x%lx count %zu tag %"PRIx64"/%"PRIx64, debug_name, buffer, datatype, count, tag, tag_mask); /* First, check the fast path case - single fragment * in this case avoid initializing most of request fields * */ if (ucs_likely((rdesc != NULL) && (rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_ONLY))) { UCS_PROFILE_REQUEST_EVENT(req, "eager_only_match", 0); UCP_WORKER_STAT_EAGER_MSG(worker, rdesc->flags); UCP_WORKER_STAT_EAGER_CHUNK(worker, UNEXP); if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_SYNC)) { ucp_tag_eager_sync_send_ack(worker, rdesc + 1, rdesc->flags); } req->flags = UCP_REQUEST_FLAG_RECV | req_flags; hdr_len = rdesc->payload_offset; recv_len = rdesc->length - hdr_len; req->recv.tag.info.sender_tag = ucp_rdesc_get_tag(rdesc); req->recv.tag.info.length = recv_len; ucp_memory_type_detect_mds(worker->context, buffer, recv_len, &mem_type); status = ucp_dt_unpack_only(worker, buffer, count, datatype, mem_type, (void*)(rdesc + 1) + hdr_len, recv_len, 1); ucp_recv_desc_release(rdesc); if (req_flags & UCP_REQUEST_FLAG_CALLBACK) { cb(req + 1, status, &req->recv.tag.info); } ucp_tag_recv_request_completed(req, status, &req->recv.tag.info, debug_name); return; } /* Initialize receive request */ req->status = UCS_OK; req->recv.worker = worker; req->recv.buffer = buffer; req->recv.datatype = datatype; ucp_dt_recv_state_init(&req->recv.state, buffer, datatype, count); if (!UCP_DT_IS_CONTIG(datatype)) { common_flags |= UCP_REQUEST_FLAG_BLOCK_OFFLOAD; } req->flags = common_flags | req_flags; req->recv.length = ucp_dt_length(datatype, count, buffer, &req->recv.state); ucp_memory_type_detect_mds(worker->context, buffer, req->recv.length, &mem_type); req->recv.mem_type = mem_type; req->recv.tag.tag = tag; req->recv.tag.tag_mask = tag_mask; req->recv.tag.cb = cb; if (ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_REQ)) { req->recv.tag.info.sender_tag = 0; } if (ucs_unlikely(rdesc == NULL)) { /* If not found on unexpected, wait until it arrives. * If was found but need this receive request for later completion, save it */ req_queue = ucp_tag_exp_get_queue(&worker->tm, tag, tag_mask); /* If offload supported, post this tag to transport as well. * TODO: need to distinguish the cases when posting is not needed. */ ucp_tag_offload_try_post(worker, req, req_queue); ucp_tag_exp_push(&worker->tm, req_queue, req); ucs_trace_req("%s returning expected request %p (%p)", debug_name, req, req + 1); return; } /* Check rendezvous case */ if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_RNDV)) { ucp_rndv_matched(worker, req, (void*)(rdesc + 1)); UCP_WORKER_STAT_RNDV(worker, UNEXP); ucp_recv_desc_release(rdesc); return; } if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_SYNC)) { ucp_tag_eager_sync_send_ack(worker, rdesc + 1, rdesc->flags); } UCP_WORKER_STAT_EAGER_MSG(worker, rdesc->flags); ucs_assert(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER); eagerf_hdr = (void*)(rdesc + 1); req->recv.tag.info.sender_tag = ucp_rdesc_get_tag(rdesc); req->recv.tag.info.length = req->recv.tag.remaining = eagerf_hdr->total_len; /* process first fragment */ UCP_WORKER_STAT_EAGER_CHUNK(worker, UNEXP); msg_id = eagerf_hdr->msg_id; status = ucp_tag_recv_request_process_rdesc(req, rdesc, 0); ucs_assert(status == UCS_INPROGRESS); /* process additional fragments */ ucp_tag_frag_list_process_queue(&worker->tm, req, msg_id UCS_STATS_ARG(UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_UNEXP)); }
/* Endpoint operations */ static inline void uct_ugni_invoke_orig_comp(uct_ugni_rdma_fetch_desc_t *fma, ucs_status_t status) { if (ucs_likely(NULL != fma->orig_comp_cb)) { uct_invoke_completion(fma->orig_comp_cb, status); } }