static UCS_F_ALWAYS_INLINE void ucp_request_release_common(void *request, uint8_t cb_flag, const char *debug_name) { ucp_request_t *req = (ucp_request_t*)request - 1; ucp_worker_h UCS_V_UNUSED worker = ucs_container_of(ucs_mpool_obj_owner(req), ucp_worker_t, req_mp); uint16_t flags; UCP_THREAD_CS_ENTER_CONDITIONAL(&worker->mt_lock); flags = req->flags; ucs_trace_req("%s request %p (%p) "UCP_REQUEST_FLAGS_FMT, debug_name, req, req + 1, UCP_REQUEST_FLAGS_ARG(flags)); ucs_assert(!(flags & UCP_REQUEST_DEBUG_FLAG_EXTERNAL)); ucs_assert(!(flags & UCP_REQUEST_FLAG_RELEASED)); if (ucs_likely(flags & UCP_REQUEST_FLAG_COMPLETED)) { ucp_request_put(req); } else { req->flags = (flags | UCP_REQUEST_FLAG_RELEASED) & ~cb_flag; } UCP_THREAD_CS_EXIT_CONDITIONAL(&worker->mt_lock); }
static UCS_F_ALWAYS_INLINE ucs_status_ptr_t ucp_tag_send_req(ucp_request_t *req, size_t count, const ucp_ep_msg_config_t* msg_config, size_t rndv_rma_thresh, size_t rndv_am_thresh, ucp_send_callback_t cb, const ucp_proto_t *proto) { size_t seg_size = (msg_config->max_bcopy - proto->only_hdr_size); size_t rndv_thresh = ucp_tag_get_rndv_threshold(req, count, msg_config->max_iov, rndv_rma_thresh, rndv_am_thresh, seg_size); size_t zcopy_thresh = ucp_proto_get_zcopy_threshold(req, msg_config, count, rndv_thresh); ssize_t max_short = ucp_proto_get_short_max(req, msg_config); ucs_status_t status; ucs_trace_req("select tag request(%p) progress algorithm datatype=%lx " "buffer=%p length=%zu max_short=%zd rndv_thresh=%zu " "zcopy_thresh=%zu", req, req->send.datatype, req->send.buffer, req->send.length, max_short, rndv_thresh, zcopy_thresh); status = ucp_request_send_start(req, max_short, zcopy_thresh, seg_size, rndv_thresh, proto); if (ucs_unlikely(status != UCS_OK)) { if (status == UCS_ERR_NO_PROGRESS) { ucs_assert(req->send.length >= rndv_thresh); /* RMA/AM rendezvous */ status = ucp_tag_send_start_rndv(req); } if (status != UCS_OK) { return UCS_STATUS_PTR(status); } } ucp_request_send_tag_stat(req); /* * Start the request. * If it is completed immediately, release the request and return the status. * Otherwise, return the request. */ status = ucp_request_send(req); if (req->flags & UCP_REQUEST_FLAG_COMPLETED) { ucs_trace_req("releasing send request %p, returning status %s", req, ucs_status_string(status)); ucp_request_put(req); return UCS_STATUS_PTR(status); } ucp_request_set_callback(req, send.cb, cb) ucs_trace_req("returning send request %p", req); return req + 1; }
static UCS_F_ALWAYS_INLINE void ucp_tag_recv_request_completed(ucp_request_t *req, ucs_status_t status, ucp_tag_recv_info_t *info, const char *function) { ucs_trace_req("%s returning completed request %p (%p) stag 0x%"PRIx64" len %zu, %s", function, req, req + 1, info->sender_tag, info->length, ucs_status_string(status)); req->status = status; if ((req->flags |= UCP_REQUEST_FLAG_COMPLETED) & UCP_REQUEST_FLAG_RELEASED) { ucp_request_put(req); } UCS_PROFILE_REQUEST_EVENT(req, "complete_recv", 0); }
static ucs_status_t ucp_progress_put_nbi(uct_pending_req_t *self) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_rkey_h rkey = req->send.rma.rkey; ucp_ep_t *ep = req->send.ep; ucp_ep_rma_config_t *rma_config; ucs_status_t status; uct_rkey_t uct_rkey; ssize_t packed_len; UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, req->send.lane, uct_rkey, rma_config); if (req->send.length <= ep->worker->context->config.ext.bcopy_thresh) { /* Should be replaced with bcopy */ packed_len = ucs_min(req->send.length, rma_config->max_put_short); status = uct_ep_put_short(ep->uct_eps[req->send.lane], req->send.buffer, packed_len, req->send.rma.remote_addr, uct_rkey); } else { /* We don't do it right now, but in future we have to add * an option to use zcopy */ ucp_memcpy_pack_context_t pack_ctx; pack_ctx.src = req->send.buffer; pack_ctx.length = ucs_min(req->send.length, rma_config->max_put_bcopy); packed_len = uct_ep_put_bcopy(ep->uct_eps[req->send.lane], ucp_memcpy_pack, &pack_ctx, req->send.rma.remote_addr, uct_rkey); status = (packed_len > 0) ? UCS_OK : (ucs_status_t)packed_len; } if ((status == UCS_OK) || (status == UCS_INPROGRESS)) { req->send.length -= packed_len; if (req->send.length == 0) { ucp_request_put(req, UCS_OK); return UCS_OK; } req->send.buffer += packed_len; req->send.rma.remote_addr += packed_len; return UCS_INPROGRESS; } else { return status; } }
static void ucp_ep_flushed_slow_path_callback(ucs_callbackq_slow_elem_t *self) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.flush.cbq_elem); ucp_ep_h ep = req->send.ep; ucs_assert(!(req->flags & UCP_REQUEST_FLAG_COMPLETED)); ucs_trace("flush req %p ep %p remove from uct_worker %p", req, ep, ep->worker->uct); ucp_ep_flush_slow_path_remove(req); req->send.flush.flushed_cb(req); /* Complete send request from here, to avoid releasing the request while * slow-path element is still pending */ ucs_trace_req("completing flush request %p (%p) with status %s", req, req + 1, ucs_status_string(req->status)); ucp_request_put(req, req->status); }
static ucs_status_t ucp_progress_get_nbi(uct_pending_req_t *self) { ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_rkey_h rkey = req->send.rma.rkey; ucp_ep_t *ep = req->send.ep; ucp_ep_rma_config_t *rma_config; ucs_status_t status; uct_rkey_t uct_rkey; size_t frag_length; UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, req->send.lane, uct_rkey, rma_config); frag_length = ucs_min(rma_config->max_get_bcopy, req->send.length); status = uct_ep_get_bcopy(ep->uct_eps[req->send.lane], (uct_unpack_callback_t)memcpy, (void*)req->send.buffer, frag_length, req->send.rma.remote_addr, uct_rkey, NULL); if ((status == UCS_OK) || (status == UCS_INPROGRESS)) { /* Get was initiated */ req->send.length -= frag_length; req->send.buffer += frag_length; req->send.rma.remote_addr += frag_length; if (req->send.length == 0) { /* Get was posted */ ucp_request_put(req, UCS_OK); return UCS_OK; } else { return UCS_INPROGRESS; } } else { return status; } }