ucs_arbiter_cb_result_t uct_ugni_ep_process_pending(ucs_arbiter_t *arbiter, ucs_arbiter_elem_t *elem, void *arg){ uct_ugni_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_ugni_ep_t, arb_group); uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); ucs_status_t rc; ep->arb_sched = 1; ucs_trace_data("progressing pending request %p", req); rc = req->func(req); ep->arb_sched = 0; ucs_trace_data("status returned from progress pending: %s", ucs_status_string(rc)); if (UCS_OK == rc) { /* sent successfully. remove from the arbiter */ return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; } else if (UCS_INPROGRESS == rc) { return UCS_ARBITER_CB_RESULT_NEXT_GROUP; } else { /* couldn't send. keep this request in the arbiter until the next time * this function is called */ return UCS_ARBITER_CB_RESULT_RESCHED_GROUP; } }
ucs_arbiter_cb_result_t uct_mm_ep_process_pending(ucs_arbiter_t *arbiter, ucs_arbiter_elem_t *elem, void *arg) { uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); ucs_status_t status; uct_mm_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_mm_ep_t, arb_group); /* update the local tail with its actual value from the remote peer * making sure that the pending sends would use the real tail value */ ucs_memory_cpu_load_fence(); ep->cached_tail = ep->fifo_ctl->tail; if (!uct_mm_ep_has_tx_resources(ep)) { return UCS_ARBITER_CB_RESULT_RESCHED_GROUP; } status = req->func(req); ucs_trace_data("progress pending request %p returned %s", req, ucs_status_string(status)); if (status == UCS_OK) { /* sent successfully. remove from the arbiter */ return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; } else if (status == UCS_INPROGRESS) { /* sent but not completed, keep in the arbiter */ return UCS_ARBITER_CB_RESULT_NEXT_GROUP; } else { /* couldn't send. keep this request in the arbiter until the next time * this function is called */ return UCS_ARBITER_CB_RESULT_RESCHED_GROUP; } }
static ucs_arbiter_cb_result_t uct_dc_ep_abriter_purge_cb(ucs_arbiter_t *arbiter, ucs_arbiter_elem_t *elem, void *arg) { uct_purge_cb_args_t *cb_args = arg; uct_pending_purge_callback_t cb = cb_args->cb; uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); uct_rc_fc_request_t *freq = ucs_derived_of(req, uct_rc_fc_request_t); uct_dc_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_dc_ep_t, arb_group); if (ucs_likely(req->func != uct_dc_iface_fc_grant)){ if (cb != NULL) { cb(req, cb_args->arg); } else { ucs_warn("ep=%p cancelling user pending request %p", ep, req); } } else { /* User callback should not be called for FC messages. * Just return pending request memory to the pool */ ucs_mpool_put(freq); } return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; }
/** * dispatch requests waiting for tx resources */ ucs_arbiter_cb_result_t uct_dc_mlx5_iface_dci_do_pending_tx(ucs_arbiter_t *arbiter, ucs_arbiter_elem_t *elem, void *arg) { uct_dc_mlx5_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_dc_mlx5_ep_t, arb_group); uct_dc_mlx5_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_dc_mlx5_iface_t); uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); ucs_status_t status; if (!uct_rc_iface_has_tx_resources(&iface->super.super)) { return UCS_ARBITER_CB_RESULT_STOP; } status = req->func(req); ucs_trace_data("progress pending request %p returned: %s", req, ucs_status_string(status)); if (status == UCS_OK) { /* For dcs* policies release dci if this is the last elem in the group * and the dci has no outstanding operations. For example pending * callback did not send anything. (uct_ep_flush or just return ok) */ if (ucs_arbiter_elem_is_last(&ep->arb_group, elem)) { uct_dc_mlx5_iface_dci_free(iface, ep); } return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; } if (status == UCS_INPROGRESS) { return UCS_ARBITER_CB_RESULT_NEXT_GROUP; } if (!uct_dc_mlx5_iface_dci_ep_can_send(ep)) { /* Deschedule the group even if FC is the only resource, which * is missing. It will be scheduled again when credits arrive. * We can't desched group with rand policy if non FC resources are * missing, since it's never scheduled again. */ if (uct_dc_mlx5_iface_is_dci_rand(iface) && uct_rc_fc_has_resources(&iface->super.super, &ep->fc)) { return UCS_ARBITER_CB_RESULT_RESCHED_GROUP; } else { return UCS_ARBITER_CB_RESULT_DESCHED_GROUP; } } ucs_assertv(!uct_rc_iface_has_tx_resources(&iface->super.super), "pending callback returned error but send resources are available"); return UCS_ARBITER_CB_RESULT_STOP; }
void ucp_tag_eager_sync_zcopy_completion(uct_completion_t *self, ucs_status_t status) { ucp_request_t *req; req = ucs_container_of(self, ucp_request_t, send.state.uct_comp); ucp_tag_eager_sync_zcopy_req_complete(req, status); }
static void uct_mm_ep_signal_remote_slow_path_callback(ucs_callbackq_slow_elem_t *self) { uct_mm_ep_t *ep = ucs_container_of(self, uct_mm_ep_t, cbq_elem); uct_mm_ep_signal_remote(ep, UCT_MM_IFACE_SIGNAL_CONNECT); }
static ucs_status_t ucp_tag_eager_am_handler(void *arg, void *data, size_t length, void *desc) { ucp_context_h context = arg; ucp_recv_desc_t *rdesc = desc; ucp_recv_request_t *rreq; ucs_queue_iter_t iter; ucp_tag_t tag; ucs_assert(length >= sizeof(ucp_tag_t)); tag = *(ucp_tag_t*)data; /* Search in expected queue */ iter = ucs_queue_iter_begin(&context->tag.expected); while (!ucs_queue_iter_end(&context->tag.expected, iter)) { rreq = ucs_container_of(*iter, ucp_recv_request_t, queue); if (ucp_tag_is_match(tag, rreq->tag, rreq->tag_mask)) { ucs_queue_del_iter(&context->tag.expected, iter); rreq->status = ucp_tag_matched(rreq->buffer, rreq->length, tag, data + sizeof(ucp_tag_t), length - sizeof(ucp_tag_t), &rreq->comp); return UCS_OK; } } if (data != rdesc + 1) { memcpy(rdesc + 1, data, length); } rdesc->length = length; ucs_queue_push(&context->tag.unexpected, &rdesc->queue); return UCS_INPROGRESS; }
ucs_status_t ucp_ep_pending_req_release(uct_pending_req_t *self) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_request_complete(req, req->cb.send, UCS_ERR_CANCELED) return UCS_OK; }
static ucs_arbiter_cb_result_t uct_mm_ep_abriter_purge_cb(ucs_arbiter_t *arbiter, ucs_arbiter_elem_t *elem, void *arg) { uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); uct_purge_cb_args_t *cb_args = arg; uct_pending_purge_callback_t cb = cb_args->cb; uct_mm_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_mm_ep_t, arb_group); if (cb != NULL) { cb(req, cb_args->arg); } else { ucs_warn("ep=%p canceling user pending request %p", ep, req); } return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; }
static void uct_ugni_unalign_fma_composed_cb(uct_completion_t *self, ucs_status_t status) { uct_ugni_rdma_fetch_desc_t *fma = (uct_ugni_rdma_fetch_desc_t *) ucs_container_of(self, uct_ugni_rdma_fetch_desc_t, tmp); uct_ugni_rdma_fetch_desc_t *head_fma = fma; ucs_assert(head_fma->network_completed_bytes == 0 && head_fma->expected_bytes != 0); head_fma->network_completed_bytes += head_fma->super.desc.length; ucs_assert(head_fma->network_completed_bytes <= head_fma->expected_bytes); /* Check if messages is completed */ if (head_fma->network_completed_bytes == head_fma->expected_bytes) { assemble_composed_unaligned(head_fma); /* Call the orignal callback and skip padding */ uct_ugni_invoke_orig_comp(head_fma, status); if(head_fma->head != NULL) { head_fma->head->super.not_ready_to_free = 0; ucs_mpool_put(head_fma->head); } } else { head_fma->super.not_ready_to_free = 1; } }
static UCS_F_ALWAYS_INLINE void ucp_request_release_common(void *request, uint8_t cb_flag, const char *debug_name) { ucp_request_t *req = (ucp_request_t*)request - 1; ucp_worker_h UCS_V_UNUSED worker = ucs_container_of(ucs_mpool_obj_owner(req), ucp_worker_t, req_mp); uint16_t flags; UCP_THREAD_CS_ENTER_CONDITIONAL(&worker->mt_lock); flags = req->flags; ucs_trace_req("%s request %p (%p) "UCP_REQUEST_FLAGS_FMT, debug_name, req, req + 1, UCP_REQUEST_FLAGS_ARG(flags)); ucs_assert(!(flags & UCP_REQUEST_DEBUG_FLAG_EXTERNAL)); ucs_assert(!(flags & UCP_REQUEST_FLAG_RELEASED)); if (ucs_likely(flags & UCP_REQUEST_FLAG_COMPLETED)) { ucp_request_put(req); } else { req->flags = (flags | UCP_REQUEST_FLAG_RELEASED) & ~cb_flag; } UCP_THREAD_CS_EXIT_CONDITIONAL(&worker->mt_lock); }
ucs_status_t ucp_wireup_msg_progress(uct_pending_req_t *self) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_ep_h ep = req->send.ep; ssize_t packed_len; if (req->send.wireup.type == UCP_WIREUP_MSG_REQUEST) { if (ep->flags & UCP_EP_FLAG_REMOTE_CONNECTED) { ucs_trace("ep %p: not sending wireup message - remote already connected", ep); goto out; } } /* send the active message */ if (req->send.wireup.type == UCP_WIREUP_MSG_ACK) { req->send.lane = ucp_ep_get_am_lane(ep); } else { req->send.lane = ucp_ep_get_wireup_msg_lane(ep); } packed_len = uct_ep_am_bcopy(ep->uct_eps[req->send.lane], UCP_AM_ID_WIREUP, ucp_wireup_msg_pack, req); if (packed_len < 0) { if (packed_len != UCS_ERR_NO_RESOURCE) { ucs_error("failed to send wireup: %s", ucs_status_string(packed_len)); } return (ucs_status_t)packed_len; } out: ucp_request_complete_send(req, UCS_OK); return UCS_OK; }
static void ucp_ep_flush_resume_slow_path_callback(ucs_callbackq_slow_elem_t *self) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.flush.cbq_elem); ucp_ep_flush_slow_path_remove(req); ucp_ep_flush_progress(req); ucp_flush_check_completion(req); }
static ucs_arbiter_cb_result_t uct_ugni_ep_abriter_purge_cb(ucs_arbiter_t *arbiter, ucs_arbiter_elem_t *elem, void *arg){ uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); uct_pending_callback_t cb = arg; cb(req); return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; }
static void uct_ugni_amo_unpack32(uct_completion_t *self, ucs_status_t status) { uct_ugni_rdma_fetch_desc_t *fma = (uct_ugni_rdma_fetch_desc_t *) ucs_container_of(self, uct_ugni_rdma_fetch_desc_t, tmp); /* Call the orignal callback and skip padding */ *(uint32_t *)fma->user_buffer = *(uint32_t *)(fma + 1); uct_ugni_invoke_orig_comp(fma, status); }
static ucs_status_t ucp_tag_eager_contig_zcopy_single(uct_pending_req_t *self) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_eager_hdr_t hdr; hdr.super.tag = req->send.tag; return ucp_do_am_zcopy_single(self, UCP_AM_ID_EAGER_ONLY, &hdr, sizeof(hdr), ucp_tag_eager_contig_zcopy_req_complete); }
static void uct_ugni_flush_cb(uct_completion_t *self, ucs_status_t status) { uct_ugni_flush_group_t *group = ucs_container_of(self, uct_ugni_flush_group_t, flush_comp); ucs_trace("group=%p, parent=%p, user_comp=%p", group, group->parent, group->user_comp); uct_invoke_completion(group->user_comp, UCS_OK); uct_ugni_check_flush(group->parent); uct_ugni_put_flush_group(group); }
static ucs_status_t ucp_tag_eager_sync_contig_bcopy_single(uct_pending_req_t *self) { ucs_status_t status = ucp_do_am_bcopy_single(self, UCP_AM_ID_EAGER_SYNC_ONLY, ucp_tag_pack_eager_sync_only_contig); if (status == UCS_OK) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_tag_eager_sync_completion(req, UCP_REQUEST_FLAG_LOCAL_COMPLETED); } return status; }
static ucs_status_t ucp_tag_eager_contig_bcopy_single(uct_pending_req_t *self) { ucs_status_t status = ucp_do_am_bcopy_single(self, UCP_AM_ID_EAGER_ONLY, ucp_tag_pack_eager_only_contig); if (status == UCS_OK) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_request_complete(req, req->cb.send, UCS_OK); } return status; }
static void uct_ugni_udt_iface_release_desc(uct_recv_desc_t *self, void *desc) { uct_ugni_udt_desc_t *ugni_desc; uct_ugni_udt_iface_t *iface = ucs_container_of(self, uct_ugni_udt_iface_t, release_desc); ugni_desc = (uct_ugni_udt_desc_t *)((uct_recv_desc_t *)desc - 1); ucs_assert_always(NULL != ugni_desc); uct_ugni_udt_reset_desc(ugni_desc, iface); ucs_mpool_put(ugni_desc); }
static void uct_ugni_unalign_fma_get_cb(uct_completion_t *self, ucs_status_t status) { uct_ugni_rdma_fetch_desc_t *fma = (uct_ugni_rdma_fetch_desc_t *) ucs_container_of(self, uct_ugni_rdma_fetch_desc_t, tmp); /* Call the orignal callback and skip padding */ fma->super.unpack_cb(fma->user_buffer, (char *)(fma + 1) + fma->padding, fma->super.desc.length - fma->padding - fma->tail); uct_ugni_invoke_orig_comp(fma, status); }
static ucs_status_t ucp_tag_eager_bcopy_single(uct_pending_req_t *self) { ucs_status_t status = ucp_do_am_bcopy_single(self, UCP_AM_ID_EAGER_ONLY, ucp_tag_pack_eager_only_dt); if (status == UCS_OK) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_request_send_generic_dt_finish(req); ucp_request_complete_send(req, UCS_OK); } return status; }
ucs_status_t uct_ib_mlx5dv_init_obj(uct_ib_mlx5dv_t *obj, uint64_t obj_type) { ucs_status_t ret = UCS_OK; if (obj_type & MLX5DV_OBJ_QP) { ret = uct_ib_mlx5_get_qp_info(obj->dv.qp.in, ucs_container_of(obj->dv.qp.out, uct_ib_mlx5dv_qp_t, dv)); } if (!ret && (obj_type & MLX5DV_OBJ_CQ)) { ret = uct_ib_mlx5_get_cq(obj->dv.cq.in, ucs_container_of(obj->dv.cq.out, uct_ib_mlx5dv_cq_t, dv)); } if (!ret && (obj_type & MLX5DV_OBJ_SRQ)) { ret = uct_ib_mlx5_get_srq_info(obj->dv.srq.in, ucs_container_of(obj->dv.srq.out, uct_ib_mlx5dv_srq_t, dv)); } return ret; }
static ucs_status_t ucp_tag_eager_sync_contig_zcopy_single(uct_pending_req_t *self) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_eager_sync_hdr_t hdr; hdr.super.super.tag = req->send.tag; hdr.req.sender_uuid = req->send.ep->worker->uuid; hdr.req.reqptr = (uintptr_t)req; return ucp_do_am_zcopy_single(self, UCP_AM_ID_EAGER_SYNC_ONLY, &hdr, sizeof(hdr), ucp_tag_eager_sync_contig_zcopy_req_complete); }
static ucs_status_t ucp_progress_put_nbi(uct_pending_req_t *self) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_rkey_h rkey = req->send.rma.rkey; ucp_ep_t *ep = req->send.ep; ucp_ep_rma_config_t *rma_config; ucs_status_t status; uct_rkey_t uct_rkey; ssize_t packed_len; uct_ep_h uct_ep; UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, uct_ep, uct_rkey, rma_config); for (;;) { if (req->send.length <= ep->worker->context->config.ext.bcopy_thresh) { /* Should be replaced with bcopy */ packed_len = ucs_min(req->send.length, rma_config->max_put_short); status = uct_ep_put_short(uct_ep, req->send.buffer, packed_len, req->send.rma.remote_addr, uct_rkey); } else { /* We don't do it right now, but in future we have to add * an option to use zcopy */ ucp_memcpy_pack_context_t pack_ctx; pack_ctx.src = req->send.buffer; pack_ctx.length = ucs_min(req->send.length, rma_config->max_put_bcopy); packed_len = uct_ep_put_bcopy(uct_ep, ucp_memcpy_pack, &pack_ctx, req->send.rma.remote_addr, uct_rkey); status = (packed_len > 0) ? UCS_OK : (ucs_status_t)packed_len; } if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) { req->send.length -= packed_len; if (req->send.length == 0) { ucp_request_complete(req, void); break; } req->send.buffer += packed_len; req->send.rma.remote_addr += packed_len; } else { break; } } return status; }
static void free_composed_desc(void *arg) { uct_ugni_rdma_fetch_desc_t *desc = (uct_ugni_rdma_fetch_desc_t*)arg; uct_ugni_rdma_fetch_desc_t *fma = ucs_container_of(desc->super.comp_cb, uct_ugni_rdma_fetch_desc_t, tmp); uct_ugni_rdma_fetch_desc_t *rdma = fma->head; if (0 == --rdma->tmp.count) { fma->super.free_cb = rdma->super.free_cb = ucs_mpool_put; ucs_mpool_put(fma); ucs_mpool_put(rdma); } }
static void ucp_ep_flush_completion(uct_completion_t *self, ucs_status_t status) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct_comp); ucs_assert(!(req->flags & UCP_REQUEST_FLAG_COMPLETED)); if (status == UCS_OK) { req->status = status; } ucp_ep_flush_progress(req); ucp_flush_check_completion(req); }
static ucs_status_t ucp_tag_eager_sync_bcopy_single(uct_pending_req_t *self) { ucs_status_t status = ucp_do_am_bcopy_single(self, UCP_AM_ID_EAGER_SYNC_ONLY, ucp_tag_pack_eager_sync_only_dt); if (status == UCS_OK) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_request_send_generic_dt_finish(req); ucp_tag_eager_sync_completion(req, UCP_REQUEST_FLAG_LOCAL_COMPLETED, UCS_OK); } else if (status == UCP_STATUS_PENDING_SWITCH) { status = UCS_OK; } return status; }
static ucs_status_t ucp_tag_eager_contig_zcopy_multi(uct_pending_req_t *self) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_eager_first_hdr_t first_hdr; first_hdr.super.super.tag = req->send.tag; first_hdr.total_len = req->send.length; return ucp_do_am_zcopy_multi(self, UCP_AM_ID_EAGER_FIRST, UCP_AM_ID_EAGER_MIDDLE, UCP_AM_ID_EAGER_LAST, &first_hdr, sizeof(first_hdr), &first_hdr.super, sizeof(first_hdr.super), ucp_tag_eager_contig_zcopy_req_complete); }
static ucs_status_t ucp_tag_eager_contig_short(uct_pending_req_t *self) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_ep_t *ep = req->send.ep; ucs_status_t status; status = ucp_tag_send_eager_short(ep, req->send.tag, req->send.buffer, req->send.length); if (status != UCS_OK) { return status; } ucp_request_complete(req, req->cb.send, UCS_OK); return UCS_OK; }