static UCS_F_NOINLINE ucs_status_ptr_t ucp_tag_send_slow(ucp_ep_h ep, const void *buffer, size_t count, uintptr_t datatype, ucp_tag_t tag, ucp_send_callback_t cb) { ucp_request_t *req; ucs_status_t status; req = ucs_mpool_get_inline(&ep->worker->req_mp); if (req == NULL) { return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); } VALGRIND_MAKE_MEM_DEFINED(req + 1, ep->worker->context->config.request.size); req->flags = 0; req->cb.send = cb; status = ucp_tag_send_start_req(ep, buffer, count, datatype, tag, req); if (status != UCS_OK) { return UCS_STATUS_PTR(status); /* UCS_OK also goes here */ } if (!(req->flags & UCP_REQUEST_FLAG_COMPLETED)) { ucp_ep_add_pending(ep, ep->uct_ep, req); ucp_worker_progress(ep->worker); } ucs_trace_req("send_nb returning request %p", req); return req + 1; }
static UCS_F_ALWAYS_INLINE ucs_status_ptr_t ucp_tag_send_req(ucp_request_t *req, size_t count, const ucp_ep_msg_config_t* msg_config, size_t rndv_rma_thresh, size_t rndv_am_thresh, ucp_send_callback_t cb, const ucp_proto_t *proto) { size_t seg_size = (msg_config->max_bcopy - proto->only_hdr_size); size_t rndv_thresh = ucp_tag_get_rndv_threshold(req, count, msg_config->max_iov, rndv_rma_thresh, rndv_am_thresh, seg_size); size_t zcopy_thresh = ucp_proto_get_zcopy_threshold(req, msg_config, count, rndv_thresh); ssize_t max_short = ucp_proto_get_short_max(req, msg_config); ucs_status_t status; ucs_trace_req("select tag request(%p) progress algorithm datatype=%lx " "buffer=%p length=%zu max_short=%zd rndv_thresh=%zu " "zcopy_thresh=%zu", req, req->send.datatype, req->send.buffer, req->send.length, max_short, rndv_thresh, zcopy_thresh); status = ucp_request_send_start(req, max_short, zcopy_thresh, seg_size, rndv_thresh, proto); if (ucs_unlikely(status != UCS_OK)) { if (status == UCS_ERR_NO_PROGRESS) { ucs_assert(req->send.length >= rndv_thresh); /* RMA/AM rendezvous */ status = ucp_tag_send_start_rndv(req); } if (status != UCS_OK) { return UCS_STATUS_PTR(status); } } ucp_request_send_tag_stat(req); /* * Start the request. * If it is completed immediately, release the request and return the status. * Otherwise, return the request. */ status = ucp_request_send(req); if (req->flags & UCP_REQUEST_FLAG_COMPLETED) { ucs_trace_req("releasing send request %p, returning status %s", req, ucs_status_string(status)); ucp_request_put(req); return UCS_STATUS_PTR(status); } ucp_request_set_callback(req, send.cb, cb) ucs_trace_req("returning send request %p", req); return req + 1; }
static ucs_status_ptr_t ucp_disconnect_nb_internal(ucp_ep_h ep) { ucs_status_t status; ucp_request_t *req; ucs_debug("disconnect ep %p", ep); req = ucs_mpool_get(&ep->worker->req_mp); if (req == NULL) { return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); } /* * Flush operation can be queued on the pending queue of only one of the * lanes (indicated by req->send.lane) and scheduled for completion on any * number of lanes. req->send.uct_comp.count keeps track of how many lanes * are not flushed yet, and when it reaches zero, it means all lanes are * flushed. req->send.flush.lanes keeps track of which lanes we still have * to start flush on. * If a flush is completed from a pending/completion callback, we need to * schedule slow-path callback to release the endpoint later, since a UCT * endpoint cannot be released from pending/completion callback context. */ req->flags = 0; req->status = UCS_OK; req->send.ep = ep; req->send.flush.flushed_cb = ucp_ep_disconnected; req->send.flush.lanes = UCS_MASK(ucp_ep_num_lanes(ep)); req->send.flush.cbq_elem.cb = ucp_ep_flushed_slow_path_callback; req->send.flush.cbq_elem_on = 0; req->send.lane = UCP_NULL_LANE; req->send.uct.func = ucp_ep_flush_progress_pending; req->send.uct_comp.func = ucp_ep_flush_completion; req->send.uct_comp.count = ucp_ep_num_lanes(ep); ucp_ep_flush_progress(req); if (req->send.uct_comp.count == 0) { status = req->status; ucp_ep_disconnected(req); ucs_trace_req("ep %p: releasing flush request %p, returning status %s", ep, req, ucs_status_string(status)); ucs_mpool_put(req); return UCS_STATUS_PTR(status); } ucs_trace_req("ep %p: return inprogress flush request %p (%p)", ep, req, req + 1); return req + 1; }
static inline ucs_status_ptr_t ucp_tag_send_req(ucp_request_t *req, size_t count, ssize_t max_short, size_t zcopy_thresh, size_t rndv_thresh, ucp_send_callback_t cb, const ucp_proto_t *proto) { ucs_status_t status; switch (req->send.datatype & UCP_DATATYPE_CLASS_MASK) { case UCP_DATATYPE_CONTIG: status = ucp_tag_req_start_contig(req, count, max_short, zcopy_thresh, rndv_thresh, proto); if (status != UCS_OK) { return UCS_STATUS_PTR(status); } break; case UCP_DATATYPE_IOV: status = ucp_tag_req_start_iov(req, count, max_short, zcopy_thresh, rndv_thresh, proto); if (status != UCS_OK) { return UCS_STATUS_PTR(status); } break; case UCP_DATATYPE_GENERIC: ucp_tag_req_start_generic(req, count, rndv_thresh, proto); break; default: ucs_error("Invalid data type"); return UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM); } /* * Start the request. * If it is completed immediately, release the request and return the status. * Otherwise, return the request. */ status = ucp_request_start_send(req); if (req->flags & UCP_REQUEST_FLAG_COMPLETED) { ucs_trace_req("releasing send request %p, returning status %s", req, ucs_status_string(status)); ucs_mpool_put(req); return UCS_STATUS_PTR(status); } ucs_trace_req("returning send request %p", req); req->send.cb = cb; return req + 1; }
ucs_status_ptr_t ucp_tag_send_sync_nb(ucp_ep_h ep, const void *buffer, size_t count, ucp_datatype_t datatype, ucp_tag_t tag, ucp_send_callback_t cb) { ucp_request_t *req; ucs_trace_req("send_sync_nb buffer %p count %zu tag %"PRIx64" to %s cb %p", buffer, count, tag, ucp_ep_peer_name(ep), cb); req = ucp_request_get(ep->worker); if (req == NULL) { return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); } UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX, "ucp_tag_send_sync_nb", req, ucp_dt_length(datatype, count, buffer, &req->send.state)); /* Remote side needs to send reply, so have it connect to us */ ucp_ep_connect_remote(ep); ucp_tag_send_req_init(req, ep, buffer, datatype, tag); return ucp_tag_send_req(req, count, -1, /* disable short method */ ucp_ep_config(ep)->sync_zcopy_thresh, ucp_ep_config(ep)->sync_rndv_thresh, cb, &ucp_tag_eager_sync_proto); }
ucs_status_ptr_t ucp_tag_send_nb(ucp_ep_h ep, const void *buffer, size_t count, uintptr_t datatype, ucp_tag_t tag, ucp_send_callback_t cb) { ucs_status_t status; ucp_request_t *req; size_t length; ucs_trace_req("send_nb buffer %p count %zu tag %"PRIx64" to %s cb %p", buffer, count, tag, ucp_ep_peer_name(ep), cb); if (ucs_likely((datatype & UCP_DATATYPE_CLASS_MASK) == UCP_DATATYPE_CONTIG)) { length = ucp_contig_dt_length(datatype, count); UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX, "ucp_tag_send_nb (eager - start)", buffer, length); if (ucs_likely(length <= ucp_ep_config(ep)->max_eager_short)) { status = ucp_tag_send_eager_short(ep, tag, buffer, length); if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX, "ucp_tag_send_nb (eager - finish)", buffer, length); return UCS_STATUS_PTR(status); /* UCS_OK also goes here */ } } } req = ucp_request_get(ep->worker); if (req == NULL) { return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); } UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX, "ucp_tag_send_nb", req, ucp_dt_length(datatype, count, buffer, &req->send.state)); ucp_tag_send_req_init(req, ep, buffer, datatype, tag); return ucp_tag_send_req(req, count, ucp_ep_config(ep)->max_eager_short, ucp_ep_config(ep)->zcopy_thresh, ucp_ep_config(ep)->rndv_thresh, cb, &ucp_tag_eager_proto); }
ucs_status_ptr_t ucp_tag_send_nb(ucp_ep_h ep, const void *buffer, size_t count, uintptr_t datatype, ucp_tag_t tag, ucp_send_callback_t cb) { ucs_status_t status; ucs_trace_req("send_nb buffer %p count %zu tag %"PRIx64" to %s cb %p", buffer, count, tag, ucp_ep_peer_name(ep), cb); status = ucp_tag_send_try(ep, buffer, count, datatype, tag); if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { return UCS_STATUS_PTR(status); /* UCS_OK also goes here */ } return ucp_tag_send_slow(ep, buffer, count, datatype, tag, cb); }