size_t ucp_tag_rndv_rts_pack(void *dest, void *arg) { ucp_request_t *sreq = arg; /* send request */ ucp_rndv_rts_hdr_t *rndv_rts_hdr = dest; ucp_worker_h worker = sreq->send.ep->worker; ssize_t packed_rkey_size; rndv_rts_hdr->super.tag = sreq->send.tag.tag; rndv_rts_hdr->sreq.reqptr = (uintptr_t)sreq; rndv_rts_hdr->sreq.sender_uuid = worker->uuid; rndv_rts_hdr->size = sreq->send.length; /* Pack remote keys (which can be empty list) */ if (UCP_DT_IS_CONTIG(sreq->send.datatype) && ucp_rndv_is_get_zcopy(sreq, worker->context->config.ext.rndv_mode)) { /* pack rkey, ask target to do get_zcopy */ rndv_rts_hdr->address = (uintptr_t)sreq->send.buffer; packed_rkey_size = ucp_rkey_pack_uct(worker->context, sreq->send.state.dt.dt.contig.md_map, sreq->send.state.dt.dt.contig.memh, rndv_rts_hdr + 1); if (packed_rkey_size < 0) { ucs_fatal("failed to pack rendezvous remote key: %s", ucs_status_string(packed_rkey_size)); } } else { rndv_rts_hdr->address = 0; packed_rkey_size = 0; } return sizeof(*rndv_rts_hdr) + packed_rkey_size; }
static size_t ucp_tag_rndv_pack_rkey(ucp_request_t *sreq, ucp_rndv_rts_hdr_t *rndv_rts_hdr) { ucp_ep_h ep = sreq->send.ep; size_t packed_rkey = 0; ucs_status_t status; ucs_assert(UCP_DT_IS_CONTIG(sreq->send.datatype)); /* Check if the sender needs to register the send buffer - * is its datatype contiguous and does the receive side need it */ if ((ucp_ep_is_rndv_lane_present(ep)) && (ucp_ep_rndv_md_flags(ep) & UCT_MD_FLAG_NEED_RKEY)) { status = ucp_request_send_buffer_reg(sreq, ucp_ep_get_rndv_get_lane(ep)); ucs_assert_always(status == UCS_OK); /* if the send buffer was registered, send the rkey */ UCS_PROFILE_CALL(uct_md_mkey_pack, ucp_ep_md(ep, ucp_ep_get_rndv_get_lane(ep)), sreq->send.state.dt.contig.memh, rndv_rts_hdr + 1); rndv_rts_hdr->flags |= UCP_RNDV_RTS_FLAG_PACKED_RKEY; packed_rkey = ucp_ep_md_attr(ep, ucp_ep_get_rndv_get_lane(ep))->rkey_packed_size; } return packed_rkey; }
static void ucp_rndv_rma_request_send_buffer_dereg(ucp_request_t *sreq) { if ((UCP_DT_IS_CONTIG(sreq->send.datatype)) && (ucp_ep_is_rndv_lane_present(sreq->send.ep))) { ucp_request_send_buffer_dereg(sreq, ucp_ep_get_rndv_get_lane(sreq->send.ep)); } }
static size_t ucp_tag_rndv_rts_pack(void *dest, void *arg) { ucp_request_t *sreq = arg; /* the sender's request */ ucp_rndv_rts_hdr_t *rndv_rts_hdr = dest; size_t packed_len = sizeof(*rndv_rts_hdr);; rndv_rts_hdr->flags = 0; rndv_rts_hdr->super.tag = sreq->send.tag; /* reqptr holds the original sreq */ rndv_rts_hdr->sreq.reqptr = (uintptr_t)sreq; rndv_rts_hdr->sreq.sender_uuid = sreq->send.ep->worker->uuid; rndv_rts_hdr->size = sreq->send.length; if (UCP_DT_IS_CONTIG(sreq->send.datatype)) { rndv_rts_hdr->address = (uintptr_t) sreq->send.buffer; packed_len += ucp_tag_rndv_pack_rkey(sreq, rndv_rts_hdr); } else if (UCP_DT_IS_GENERIC(sreq->send.datatype)) { rndv_rts_hdr->address = 0; } /* For rndv emulation based on AM rndv (send-recv), only the size of the rts * header is returned */ return packed_len; }
static UCS_F_ALWAYS_INLINE void ucp_tag_recv_common(ucp_worker_h worker, void *buffer, size_t count, uintptr_t datatype, ucp_tag_t tag, ucp_tag_t tag_mask, ucp_request_t *req, uint32_t req_flags, ucp_tag_recv_callback_t cb, ucp_recv_desc_t *rdesc, const char *debug_name) { unsigned common_flags = UCP_REQUEST_FLAG_RECV | UCP_REQUEST_FLAG_EXPECTED; ucp_eager_first_hdr_t *eagerf_hdr; ucp_request_queue_t *req_queue; uct_memory_type_t mem_type; size_t hdr_len, recv_len; ucs_status_t status; uint64_t msg_id; ucp_trace_req(req, "%s buffer %p dt 0x%lx count %zu tag %"PRIx64"/%"PRIx64, debug_name, buffer, datatype, count, tag, tag_mask); /* First, check the fast path case - single fragment * in this case avoid initializing most of request fields * */ if (ucs_likely((rdesc != NULL) && (rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_ONLY))) { UCS_PROFILE_REQUEST_EVENT(req, "eager_only_match", 0); UCP_WORKER_STAT_EAGER_MSG(worker, rdesc->flags); UCP_WORKER_STAT_EAGER_CHUNK(worker, UNEXP); if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_SYNC)) { ucp_tag_eager_sync_send_ack(worker, rdesc + 1, rdesc->flags); } req->flags = UCP_REQUEST_FLAG_RECV | req_flags; hdr_len = rdesc->payload_offset; recv_len = rdesc->length - hdr_len; req->recv.tag.info.sender_tag = ucp_rdesc_get_tag(rdesc); req->recv.tag.info.length = recv_len; ucp_memory_type_detect_mds(worker->context, buffer, recv_len, &mem_type); status = ucp_dt_unpack_only(worker, buffer, count, datatype, mem_type, (void*)(rdesc + 1) + hdr_len, recv_len, 1); ucp_recv_desc_release(rdesc); if (req_flags & UCP_REQUEST_FLAG_CALLBACK) { cb(req + 1, status, &req->recv.tag.info); } ucp_tag_recv_request_completed(req, status, &req->recv.tag.info, debug_name); return; } /* Initialize receive request */ req->status = UCS_OK; req->recv.worker = worker; req->recv.buffer = buffer; req->recv.datatype = datatype; ucp_dt_recv_state_init(&req->recv.state, buffer, datatype, count); if (!UCP_DT_IS_CONTIG(datatype)) { common_flags |= UCP_REQUEST_FLAG_BLOCK_OFFLOAD; } req->flags = common_flags | req_flags; req->recv.length = ucp_dt_length(datatype, count, buffer, &req->recv.state); ucp_memory_type_detect_mds(worker->context, buffer, req->recv.length, &mem_type); req->recv.mem_type = mem_type; req->recv.tag.tag = tag; req->recv.tag.tag_mask = tag_mask; req->recv.tag.cb = cb; if (ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_REQ)) { req->recv.tag.info.sender_tag = 0; } if (ucs_unlikely(rdesc == NULL)) { /* If not found on unexpected, wait until it arrives. * If was found but need this receive request for later completion, save it */ req_queue = ucp_tag_exp_get_queue(&worker->tm, tag, tag_mask); /* If offload supported, post this tag to transport as well. * TODO: need to distinguish the cases when posting is not needed. */ ucp_tag_offload_try_post(worker, req, req_queue); ucp_tag_exp_push(&worker->tm, req_queue, req); ucs_trace_req("%s returning expected request %p (%p)", debug_name, req, req + 1); return; } /* Check rendezvous case */ if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_RNDV)) { ucp_rndv_matched(worker, req, (void*)(rdesc + 1)); UCP_WORKER_STAT_RNDV(worker, UNEXP); ucp_recv_desc_release(rdesc); return; } if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_SYNC)) { ucp_tag_eager_sync_send_ack(worker, rdesc + 1, rdesc->flags); } UCP_WORKER_STAT_EAGER_MSG(worker, rdesc->flags); ucs_assert(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER); eagerf_hdr = (void*)(rdesc + 1); req->recv.tag.info.sender_tag = ucp_rdesc_get_tag(rdesc); req->recv.tag.info.length = req->recv.tag.remaining = eagerf_hdr->total_len; /* process first fragment */ UCP_WORKER_STAT_EAGER_CHUNK(worker, UNEXP); msg_id = eagerf_hdr->msg_id; status = ucp_tag_recv_request_process_rdesc(req, rdesc, 0); ucs_assert(status == UCS_INPROGRESS); /* process additional fragments */ ucp_tag_frag_list_process_queue(&worker->tm, req, msg_id UCS_STATS_ARG(UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_UNEXP)); }