ucs_status_ptr_t ucp_tag_send_sync_nb(ucp_ep_h ep, const void *buffer, size_t count, ucp_datatype_t datatype, ucp_tag_t tag, ucp_send_callback_t cb) { ucp_request_t *req; ucs_trace_req("send_sync_nb buffer %p count %zu tag %"PRIx64" to %s cb %p", buffer, count, tag, ucp_ep_peer_name(ep), cb); req = ucp_request_get(ep->worker); if (req == NULL) { return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); } UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX, "ucp_tag_send_sync_nb", req, ucp_dt_length(datatype, count, buffer, &req->send.state)); /* Remote side needs to send reply, so have it connect to us */ ucp_ep_connect_remote(ep); ucp_tag_send_req_init(req, ep, buffer, datatype, tag); return ucp_tag_send_req(req, count, -1, /* disable short method */ ucp_ep_config(ep)->sync_zcopy_thresh, ucp_ep_config(ep)->sync_rndv_thresh, cb, &ucp_tag_eager_sync_proto); }
ucs_status_t ucp_put(ucp_ep_h ep, const void *buffer, size_t length, uint64_t remote_addr, ucp_rkey_h rkey) { ucs_status_t status; uct_rkey_t uct_rkey; size_t frag_length; ssize_t packed_len; UCP_RMA_CHECK_PARAMS(buffer, length); uct_rkey = UCP_RKEY_LOOKUP(ep, rkey); /* Loop until all message has been sent. * We re-check the configuration on every iteration, because it can be * changed by transport switch. */ for (;;) { if (length <= ucp_ep_config(ep)->put.max_short) { status = uct_ep_put_short(ep->uct_ep, buffer, length, remote_addr, uct_rkey); if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { break; } } else { if (length <= ucp_ep_config(ep)->put.bcopy_thresh) { frag_length = ucs_min(length, ucp_ep_config(ep)->put.max_short); status = uct_ep_put_short(ep->uct_ep, buffer, frag_length, remote_addr, uct_rkey); } else { ucp_memcpy_pack_context_t pack_ctx; pack_ctx.src = buffer; pack_ctx.length = frag_length = ucs_min(length, ucp_ep_config(ep)->put.max_bcopy); packed_len = uct_ep_put_bcopy(ep->uct_ep, ucp_memcpy_pack, &pack_ctx, remote_addr, uct_rkey); status = (packed_len > 0) ? UCS_OK : (ucs_status_t)packed_len; } if (ucs_likely(status == UCS_OK)) { length -= frag_length; if (length == 0) { break; } buffer += frag_length; remote_addr += frag_length; } else if (status != UCS_ERR_NO_RESOURCE) { break; } } ucp_worker_progress(ep->worker); } return status; }
static ucs_status_t ucp_progress_put_nbi(uct_pending_req_t *self) { ucs_status_t status; ssize_t packed_len; ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_ep_t *ep = req->send.ep; uct_rkey_t uct_rkey = UCP_RKEY_LOOKUP(ep, req->send.rma.rkey); for (;;) { if (req->send.length <= ep->worker->context->config.ext.bcopy_thresh) { /* Should be replaced with bcopy */ packed_len = ucs_min(req->send.length, ucp_ep_config(ep)->put.max_short); status = uct_ep_put_short(ep->uct_ep, req->send.buffer, packed_len, req->send.rma.remote_addr, uct_rkey); } else { /* We don't do it right now, but in future we have to add * an option to use zcopy */ ucp_memcpy_pack_context_t pack_ctx; pack_ctx.src = req->send.buffer; pack_ctx.length = ucs_min(req->send.length, ucp_ep_config(ep)->put.max_bcopy); packed_len = uct_ep_put_bcopy(ep->uct_ep, ucp_memcpy_pack, &pack_ctx, req->send.rma.remote_addr, uct_rkey); status = (packed_len > 0) ? UCS_OK : (ucs_status_t)packed_len; } if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) { req->send.length -= packed_len; if (req->send.length == 0) { ucp_request_complete(req, void); break; } req->send.buffer += packed_len; req->send.rma.remote_addr += packed_len; } else { break; } } return status; }
static ucs_status_t ucp_wireup_connect_local(ucp_ep_h ep, const uint8_t *tli, unsigned address_count, const ucp_address_entry_t *address_list) { ucp_worker_h worker = ep->worker; const ucp_address_entry_t *address; ucp_rsc_index_t rsc_index; ucp_lane_index_t lane, amo_index; ucs_status_t status; ucp_md_map_t UCS_V_UNUSED md_map; ucs_trace("ep %p: connect local transports", ep); for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { rsc_index = ucp_ep_get_rsc_index(ep, lane); if (!ucp_worker_is_tl_p2p(worker, rsc_index)) { continue; } address = &address_list[tli[lane]]; ucs_assert(address->tl_addr_len > 0); /* Check that if the lane is used for RMA/AMO, destination md index matches */ md_map = ucp_lane_map_get_lane(ucp_ep_config(ep)->key.rma_lane_map, lane); ucs_assertv((md_map == 0) || (md_map == UCS_BIT(address->md_index)), "lane=%d ai=%d md_map=0x%x md_index=%d", lane, tli[lane], md_map, address->md_index); amo_index = ucp_ep_get_amo_lane_index(&ucp_ep_config(ep)->key, lane); if (amo_index != UCP_NULL_LANE) { md_map = ucp_lane_map_get_lane(ucp_ep_config(ep)->key.amo_lane_map, amo_index); ucs_assertv((md_map == 0) || (md_map == UCS_BIT(address->md_index)), "lane=%d ai=%d md_map=0x%x md_index=%d", lane, tli[lane], md_map, address->md_index); } status = uct_ep_connect_to_ep(ep->uct_eps[lane], address->dev_addr, address->ep_addr); if (status != UCS_OK) { return status; } } return UCS_OK; }
ucs_status_ptr_t ucp_tag_send_nb(ucp_ep_h ep, const void *buffer, size_t count, uintptr_t datatype, ucp_tag_t tag, ucp_send_callback_t cb) { ucs_status_t status; ucp_request_t *req; size_t length; ucs_trace_req("send_nb buffer %p count %zu tag %"PRIx64" to %s cb %p", buffer, count, tag, ucp_ep_peer_name(ep), cb); if (ucs_likely((datatype & UCP_DATATYPE_CLASS_MASK) == UCP_DATATYPE_CONTIG)) { length = ucp_contig_dt_length(datatype, count); UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX, "ucp_tag_send_nb (eager - start)", buffer, length); if (ucs_likely(length <= ucp_ep_config(ep)->max_eager_short)) { status = ucp_tag_send_eager_short(ep, tag, buffer, length); if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX, "ucp_tag_send_nb (eager - finish)", buffer, length); return UCS_STATUS_PTR(status); /* UCS_OK also goes here */ } } } req = ucp_request_get(ep->worker); if (req == NULL) { return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); } UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX, "ucp_tag_send_nb", req, ucp_dt_length(datatype, count, buffer, &req->send.state)); ucp_tag_send_req_init(req, ep, buffer, datatype, tag); return ucp_tag_send_req(req, count, ucp_ep_config(ep)->max_eager_short, ucp_ep_config(ep)->zcopy_thresh, ucp_ep_config(ep)->rndv_thresh, cb, &ucp_tag_eager_proto); }
static size_t ucp_tag_pack_eager_middle_generic(void *dest, void *arg) { ucp_eager_hdr_t *hdr = dest; ucp_request_t *req = arg; size_t max_length; max_length = ucp_ep_config(req->send.ep)->max_am_bcopy - sizeof(*hdr); hdr->super.tag = req->send.tag; return sizeof(*hdr) + ucp_request_generic_dt_pack(req, hdr + 1, max_length); }
static size_t ucp_tag_pack_eager_middle_contig(void *dest, void *arg) { ucp_eager_hdr_t *hdr = dest; ucp_request_t *req = arg; size_t length; length = ucp_ep_config(req->send.ep)->max_am_bcopy - sizeof(*hdr); ucs_debug("pack eager_middle paylen %zu", length); hdr->super.tag = req->send.tag; memcpy(hdr + 1, req->send.buffer + req->send.state.offset, length); return sizeof(*hdr) + length; }
static ucs_status_t ucp_tag_req_start_contig(ucp_request_t *req, size_t count, ssize_t max_short, size_t zcopy_thresh, size_t rndv_thresh, const ucp_proto_t *proto) { ucp_ep_config_t *config = ucp_ep_config(req->send.ep); size_t only_hdr_size = proto->only_hdr_size; ucs_status_t status; size_t max_zcopy; ssize_t length; length = ucp_contig_dt_length(req->send.datatype, count); req->send.length = length; if (length <= max_short) { /* short */ req->send.uct.func = proto->contig_short; } else if (length >= rndv_thresh) { /* rendezvous */ status = ucp_tag_send_start_rndv(req); if (status != UCS_OK) { return status; } } else if (length < zcopy_thresh) { /* bcopy */ if (req->send.length <= config->max_am_bcopy - only_hdr_size) { req->send.uct.func = proto->bcopy_single; } else { req->send.uct.func = proto->bcopy_multi; } } else { /* eager zcopy */ status = ucp_request_send_buffer_reg(req, ucp_ep_get_am_lane(req->send.ep)); if (status != UCS_OK) { return status; } req->send.uct_comp.func = proto->contig_zcopy_completion; max_zcopy = config->max_am_zcopy; if (req->send.length <= max_zcopy - only_hdr_size) { req->send.uct_comp.count = 1; req->send.uct.func = proto->contig_zcopy_single; } else { /* calculate number of zcopy fragments */ req->send.uct_comp.count = 1 + (length + proto->first_hdr_size - proto->mid_hdr_size - 1) / (max_zcopy - proto->mid_hdr_size); req->send.uct.func = proto->contig_zcopy_multi; } } return UCS_OK; }
ucs_status_t ucp_get(ucp_ep_h ep, void *buffer, size_t length, uint64_t remote_addr, ucp_rkey_h rkey) { uct_completion_t comp; ucs_status_t status; uct_rkey_t uct_rkey; size_t frag_length; UCP_RMA_CHECK_PARAMS(buffer, length); uct_rkey = UCP_RKEY_LOOKUP(ep, rkey); comp.count = 1; for (;;) { /* Push out all fragments, and request completion only for the last * fragment. */ frag_length = ucs_min(ucp_ep_config(ep)->get.max_bcopy, length); status = uct_ep_get_bcopy(ep->uct_ep, (uct_unpack_callback_t)memcpy, (void*)buffer, frag_length, remote_addr, uct_rkey, &comp); if (ucs_likely(status == UCS_OK)) { goto posted; } else if (status == UCS_INPROGRESS) { ++comp.count; goto posted; } else if (status == UCS_ERR_NO_RESOURCE) { goto retry; } else { return status; } posted: length -= frag_length; if (length == 0) { break; } buffer += frag_length; remote_addr += frag_length; retry: ucp_worker_progress(ep->worker); } /* coverity[loop_condition] */ while (comp.count > 1) { ucp_worker_progress(ep->worker); } return UCS_OK; }
ucs_status_t ucp_get_nbi(ucp_ep_h ep, void *buffer, size_t length, uint64_t remote_addr, ucp_rkey_h rkey) { ucs_status_t status; size_t frag_length; uct_rkey_t uct_rkey = UCP_RKEY_LOOKUP(ep, rkey); UCP_RMA_CHECK_PARAMS(buffer, length); uct_rkey = UCP_RKEY_LOOKUP(ep, rkey); for (;;) { frag_length = ucs_min(ucp_ep_config(ep)->get.max_bcopy, length); status = uct_ep_get_bcopy(ep->uct_ep, (uct_unpack_callback_t)memcpy, (void*)buffer, frag_length, remote_addr, uct_rkey, NULL); if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) { /* Get was initiated */ length -= frag_length; buffer += frag_length; remote_addr += frag_length; if (length == 0) { break; } } else if (ucs_unlikely(status == UCS_ERR_NO_RESOURCE)) { /* Out of resources - adding request for later schedule */ ucp_request_t *req; req = ucs_mpool_get_inline(&ep->worker->req_mp); if (req == NULL) { /* can't allocate memory for request - abort */ status = UCS_ERR_NO_MEMORY; break; } ucp_add_pending_rma(req, ep, buffer, length, remote_addr, rkey, ucp_progress_get_nbi); /* Mark it as in progress */ status = UCS_INPROGRESS; break; } else { /* Error */ break; } } return status; }
static size_t ucp_tag_pack_eager_middle_dt(void *dest, void *arg) { ucp_eager_hdr_t *hdr = dest; ucp_request_t *req = arg; size_t length; length = ucp_ep_config(req->send.ep)->am.max_bcopy - sizeof(*hdr); ucs_debug("pack eager_middle paylen %zu offset %zu", length, req->send.state.offset); hdr->super.tag = req->send.tag; return sizeof(*hdr) + ucp_dt_pack(req->send.datatype, hdr + 1, req->send.buffer, &req->send.state, length); }
static size_t ucp_tag_pack_eager_first_contig(void *dest, void *arg) { ucp_eager_first_hdr_t *hdr = dest; ucp_request_t *req = arg; size_t length; length = ucp_ep_config(req->send.ep)->max_am_bcopy - sizeof(*hdr); hdr->super.super.tag = req->send.tag; hdr->total_len = req->send.length; ucs_assert(req->send.state.offset == 0); ucs_assert(req->send.length > length); memcpy(hdr + 1, req->send.buffer, length); return sizeof(*hdr) + length; }
static size_t ucp_tag_pack_eager_first_generic(void *dest, void *arg) { ucp_eager_first_hdr_t *hdr = dest; ucp_request_t *req = arg; size_t max_length, length; ucs_assert(req->send.state.offset == 0); max_length = ucp_ep_config(req->send.ep)->max_am_bcopy - sizeof(*hdr); hdr->super.super.tag = req->send.tag; hdr->total_len = req->send.length; ucs_assert(req->send.length > max_length); length = ucp_request_generic_dt_pack(req, hdr + 1, max_length); return sizeof(*hdr) + length; }
static UCS_F_ALWAYS_INLINE void ucp_tag_send_req_init(ucp_request_t* req, ucp_ep_h ep, const void* buffer, uintptr_t datatype, size_t count, ucp_tag_t tag, uint16_t flags) { req->flags = flags; req->send.ep = ep; req->send.buffer = buffer; req->send.datatype = datatype; req->send.tag = tag; ucp_request_send_state_init(req, datatype, count); req->send.length = ucp_dt_length(req->send.datatype, count, req->send.buffer, &req->send.state.dt); req->send.lane = ucp_ep_config(ep)->tag.lane; }
void ucp_ep_print_info(ucp_ep_h ep, FILE *stream) { fprintf(stream, "#\n"); fprintf(stream, "# UCP endpoint\n"); fprintf(stream, "#\n"); fprintf(stream, "# peer: %s%suuid 0x%"PRIx64"\n", #if ENABLE_DEBUG_DATA ucp_ep_peer_name(ep), ", ", #else "", "", #endif ep->dest_uuid); ucp_ep_config_print(stream, ep->worker, ucp_ep_config(ep), NULL); fprintf(stream, "#\n"); }
static size_t ucp_tag_pack_eager_first_dt(void *dest, void *arg) { ucp_eager_first_hdr_t *hdr = dest; ucp_request_t *req = arg; size_t length; length = ucp_ep_config(req->send.ep)->am.max_bcopy - sizeof(*hdr); hdr->super.super.tag = req->send.tag; hdr->total_len = req->send.length; ucs_debug("pack eager_first paylen %zu", length); ucs_assert(req->send.state.offset == 0); ucs_assert(req->send.length > length); return sizeof(*hdr) + ucp_dt_pack(req->send.datatype, hdr + 1, req->send.buffer, &req->send.state, length); }
static size_t ucp_tag_pack_eager_sync_first_contig(void *dest, void *arg) { ucp_eager_sync_first_hdr_t *hdr = dest; ucp_request_t *req = arg; size_t length; length = ucp_ep_config(req->send.ep)->max_am_bcopy - sizeof(*hdr); hdr->super.super.super.tag = req->send.tag; hdr->super.total_len = req->send.length; hdr->req.sender_uuid = req->send.ep->worker->uuid; hdr->req.reqptr = (uintptr_t)req; ucs_debug("pack eager_sync_first paylen %zu", length); ucs_assert(req->send.state.offset == 0); ucs_assert(req->send.length > length); memcpy(hdr + 1, req->send.buffer, length); return sizeof(*hdr) + length; }
static ucs_status_t ucp_tag_req_start_iov(ucp_request_t *req, size_t count, ssize_t max_short, size_t zcopy_thresh, size_t rndv_thresh, const ucp_proto_t *proto) { ucp_ep_config_t *config = ucp_ep_config(req->send.ep); size_t only_hdr_size = proto->only_hdr_size; req->send.length = ucp_dt_iov_length(req->send.buffer, count); req->send.state.dt.iov.iovcnt_offset = 0; req->send.state.dt.iov.iov_offset = 0; /* bcopy */ if (req->send.length <= config->max_am_bcopy - only_hdr_size) { req->send.uct.func = proto->bcopy_single; } else { req->send.uct.func = proto->bcopy_multi; } return UCS_OK; }
static void ucp_tag_req_start_generic(ucp_request_t *req, size_t count, size_t rndv_thresh, const ucp_proto_t *progress) { ucp_ep_config_t *config = ucp_ep_config(req->send.ep); ucp_dt_generic_t *dt_gen; size_t length; void *state; dt_gen = ucp_dt_generic(req->send.datatype); state = dt_gen->ops.start_pack(dt_gen->context, req->send.buffer, count); req->send.state.dt.generic.state = state; req->send.length = length = dt_gen->ops.packed_size(state); if (length <= config->max_am_bcopy - progress->only_hdr_size) { req->send.uct.func = progress->bcopy_single; } else { req->send.uct.func = progress->bcopy_multi; } }
static ucs_status_t ucp_progress_get_nbi(uct_pending_req_t *self) { ucs_status_t status; size_t frag_length; ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); ucp_ep_t *ep = req->send.ep; ucp_rkey_h rkey = req->send.rma.rkey; uct_rkey_t uct_rkey = UCP_RKEY_LOOKUP(ep, rkey); for (;;) { frag_length = ucs_min(ucp_ep_config(ep)->get.max_bcopy, req->send.length); status = uct_ep_get_bcopy(ep->uct_ep, (uct_unpack_callback_t)memcpy, (void*)req->send.buffer, frag_length, req->send.rma.remote_addr, uct_rkey, NULL); if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) { /* Get was initiated */ req->send.length -= frag_length; req->send.buffer += frag_length; req->send.rma.remote_addr += frag_length; if (req->send.length == 0) { /* Get was posted */ ucp_request_complete(req, void); status = UCS_OK; break; } } else { /* Error - abort */ break; } } return status; }
static ucs_status_t ucp_address_pack_ep_address(ucp_ep_h ep, ucp_rsc_index_t tl_index, uct_ep_addr_t *addr) { ucp_ep_op_t optype; uct_ep_h uct_ep; for (optype = 0; optype < UCP_EP_OP_LAST; ++optype) { uct_ep = ep->uct_eps[optype]; if (ucp_ep_config(ep)->rscs[optype] == tl_index) { /* * If this is a stub endpoint, it will return the underlying next_ep * address, and the length will be correct because the resource index * is of the next_ep. */ return uct_ep_get_address(uct_ep, addr); } } ucs_bug("provided ucp_ep without required transport"); return UCS_ERR_INVALID_ADDR; }
static UCS_F_ALWAYS_INLINE size_t ucp_tag_get_rndv_threshold(const ucp_request_t *req, size_t count, size_t max_iov, size_t rndv_rma_thresh, size_t rndv_am_thresh, size_t seg_size) { switch (req->send.datatype & UCP_DATATYPE_CLASS_MASK) { case UCP_DATATYPE_IOV: if ((count > max_iov) && ucp_ep_is_tag_offload_enabled(ucp_ep_config(req->send.ep))) { /* Make sure SW RNDV will be used, because tag offload does * not support multi-packet eager protocols. */ return seg_size; } /* Fall through */ case UCP_DATATYPE_CONTIG: return ucs_min(rndv_rma_thresh, rndv_am_thresh); case UCP_DATATYPE_GENERIC: return rndv_am_thresh; default: ucs_error("Invalid data type %lx", req->send.datatype); } return SIZE_MAX; }
ucs_status_t ucp_ep_rkey_unpack(ucp_ep_h ep, void *rkey_buffer, ucp_rkey_h *rkey_p) { unsigned remote_pd_index, remote_pd_gap; unsigned rkey_index; unsigned pd_count; ucs_status_t status; ucp_rkey_h rkey; uint8_t pd_size; ucp_pd_map_t pd_map; void *p; /* Count the number of remote PDs in the rkey buffer */ p = rkey_buffer; /* Read remote PD map */ pd_map = *(ucp_pd_map_t*)p; ucs_trace("unpacking rkey with pd_map 0x%x", pd_map); if (pd_map == 0) { /* Dummy key return ok */ *rkey_p = &ucp_mem_dummy_rkey; return UCS_OK; } pd_count = ucs_count_one_bits(pd_map); p += sizeof(ucp_pd_map_t); /* Allocate rkey handle which holds UCT rkeys for all remote PDs. * We keep all of them to handle a future transport switch. */ rkey = ucs_malloc(sizeof(*rkey) + (sizeof(rkey->uct[0]) * pd_count), "ucp_rkey"); if (rkey == NULL) { status = UCS_ERR_NO_MEMORY; goto err; } rkey->pd_map = 0; remote_pd_index = 0; /* Index of remote PD */ rkey_index = 0; /* Index of the rkey in the array */ /* Unpack rkey of each UCT PD */ while (pd_map > 0) { pd_size = *((uint8_t*)p++); /* Use bit operations to iterate through the indices of the remote PDs * as provided in the pd_map. pd_map always holds a bitmap of PD indices * that remain to be used. Every time we find the "gap" until the next * valid PD index using ffs operation. If some rkeys cannot be unpacked, * we remove them from the local map. */ remote_pd_gap = ucs_ffs64(pd_map); /* Find the offset for next PD index */ remote_pd_index += remote_pd_gap; /* Calculate next index of remote PD*/ pd_map >>= remote_pd_gap; /* Remove the gap from the map */ ucs_assert(pd_map & 1); /* Unpack only reachable rkeys */ if (UCS_BIT(remote_pd_index) & ucp_ep_config(ep)->key.reachable_pd_map) { ucs_assert(rkey_index < pd_count); status = uct_rkey_unpack(p, &rkey->uct[rkey_index]); if (status != UCS_OK) { ucs_error("Failed to unpack remote key from remote pd[%d]: %s", remote_pd_index, ucs_status_string(status)); goto err_destroy; } ucs_trace("rkey[%d] for remote pd %d is 0x%lx", rkey_index, remote_pd_index, rkey->uct[rkey_index].rkey); rkey->pd_map |= UCS_BIT(remote_pd_index); ++rkey_index; } ++remote_pd_index; pd_map >>= 1; p += pd_size; } if (rkey->pd_map == 0) { ucs_debug("The unpacked rkey from the destination is unreachable"); status = UCS_ERR_UNREACHABLE; goto err_destroy; } *rkey_p = rkey; return UCS_OK; err_destroy: ucp_rkey_destroy(rkey); err: return status; }
ucs_status_t ucp_wireup_init_lanes(ucp_ep_h ep, unsigned address_count, const ucp_address_entry_t *address_list, uint8_t *addr_indices) { ucp_worker_h worker = ep->worker; ucp_ep_config_key_t key; uint16_t new_cfg_index; ucp_lane_index_t lane; ucs_status_t status; uint8_t conn_flag; char str[32]; ucs_trace("ep %p: initialize lanes", ep); status = ucp_wireup_select_lanes(ep, address_count, address_list, addr_indices, &key); if (status != UCS_OK) { goto err; } key.reachable_md_map |= ucp_ep_config(ep)->key.reachable_md_map; new_cfg_index = ucp_worker_get_ep_config(worker, &key); if ((ep->cfg_index == new_cfg_index)) { return UCS_OK; /* No change */ } if ((ep->cfg_index != 0) && !ucp_ep_is_stub(ep)) { /* * TODO handle a case where we have to change lanes and reconfigure the ep: * * - if we already have uct ep connected to an address - move it to the new lane index * - if we don't yet have connection to an address - create it * - if an existing lane is not connected anymore - delete it (possibly) * - if the configuration has changed - replay all pending operations on all lanes - * need that every pending callback would return, in case of failure, the number * of lane it wants to be queued on. */ ucs_debug("cannot reconfigure ep %p from [%d] to [%d]", ep, ep->cfg_index, new_cfg_index); ucp_wireup_print_config(worker->context, &ucp_ep_config(ep)->key, "old", NULL); ucp_wireup_print_config(worker->context, &key, "new", NULL); ucs_fatal("endpoint reconfiguration not supported yet"); } ep->cfg_index = new_cfg_index; ep->am_lane = key.am_lane; snprintf(str, sizeof(str), "ep %p", ep); ucp_wireup_print_config(worker->context, &ucp_ep_config(ep)->key, str, addr_indices); ucs_trace("ep %p: connect lanes", ep); /* establish connections on all underlying endpoints */ conn_flag = UCP_EP_FLAG_LOCAL_CONNECTED; for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { status = ucp_wireup_connect_lane(ep, lane, address_count, address_list, addr_indices[lane]); if (status != UCS_OK) { goto err; } if (ucp_worker_is_tl_p2p(worker, ucp_ep_get_rsc_index(ep, lane))) { conn_flag = 0; /* If we have a p2p transport, we're not connected */ } } ep->flags |= conn_flag; return UCS_OK; err: for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { if (ep->uct_eps[lane] != NULL) { uct_ep_destroy(ep->uct_eps[lane]); ep->uct_eps[lane] = NULL; } } return status; }
ucs_status_t ucp_put_nbi(ucp_ep_h ep, const void *buffer, size_t length, uint64_t remote_addr, ucp_rkey_h rkey) { ucp_ep_rma_config_t *rma_config; ucs_status_t status; uct_rkey_t uct_rkey; ssize_t packed_len; ucp_request_t *req; uct_ep_h uct_ep; UCP_RMA_CHECK_PARAMS(buffer, length); for (;;) { UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, uct_ep, uct_rkey, rma_config); if (length <= rma_config->max_put_short) { /* Fast path for a single short message */ status = uct_ep_put_short(uct_ep, buffer, length, remote_addr, uct_rkey); if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { /* Return on error or success */ break; } else { /* Out of resources - adding request for later schedule */ req = ucs_mpool_get_inline(&ep->worker->req_mp); if (req == NULL) { status = UCS_ERR_NO_MEMORY; break; } ucp_add_pending_rma(req, ep, uct_ep, buffer, length, remote_addr, rkey, ucp_progress_put_nbi); status = UCS_INPROGRESS; break; } } else { /* Fragmented put */ if (length <= ucp_ep_config(ep)->bcopy_thresh) { /* TBD: Should be replaced with bcopy */ packed_len = ucs_min(length, rma_config->max_put_short); status = uct_ep_put_short(uct_ep, buffer, packed_len, remote_addr, uct_rkey); } else { /* TBD: Use z-copy */ ucp_memcpy_pack_context_t pack_ctx; pack_ctx.src = buffer; pack_ctx.length = ucs_min(length, rma_config->max_put_bcopy); packed_len = uct_ep_put_bcopy(uct_ep, ucp_memcpy_pack, &pack_ctx, remote_addr, uct_rkey); status = (packed_len > 0) ? UCS_OK : (ucs_status_t)packed_len; } if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) { length -= packed_len; if (length == 0) { /* Put is completed - return success */ break; } buffer += packed_len; remote_addr += packed_len; } else if (status == UCS_ERR_NO_RESOURCE) { /* Out of resources - adding request for later schedule */ req = ucs_mpool_get_inline(&ep->worker->req_mp); if (req == NULL) { status = UCS_ERR_NO_MEMORY; break; } ucp_add_pending_rma(req, ep, uct_ep, buffer, length, remote_addr, rkey, ucp_progress_put_nbi); status = UCS_INPROGRESS; break; } else { /* Return - Error occured */ break; } } } return status; }
int ucp_ep_is_op_primary(ucp_ep_h ep, ucp_ep_op_t optype) { ucp_ep_config_t *config = ucp_ep_config(ep); return (config->rscs[optype] != UCP_NULL_RESOURCE) && /* exists */ (config->dups[optype] == UCP_EP_OP_LAST); /* not a duplicate */ }