static ucs_status_ptr_t ucp_disconnect_nb_internal(ucp_ep_h ep) { ucs_status_t status; ucp_request_t *req; ucs_debug("disconnect ep %p", ep); req = ucs_mpool_get(&ep->worker->req_mp); if (req == NULL) { return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); } /* * Flush operation can be queued on the pending queue of only one of the * lanes (indicated by req->send.lane) and scheduled for completion on any * number of lanes. req->send.uct_comp.count keeps track of how many lanes * are not flushed yet, and when it reaches zero, it means all lanes are * flushed. req->send.flush.lanes keeps track of which lanes we still have * to start flush on. * If a flush is completed from a pending/completion callback, we need to * schedule slow-path callback to release the endpoint later, since a UCT * endpoint cannot be released from pending/completion callback context. */ req->flags = 0; req->status = UCS_OK; req->send.ep = ep; req->send.flush.flushed_cb = ucp_ep_disconnected; req->send.flush.lanes = UCS_MASK(ucp_ep_num_lanes(ep)); req->send.flush.cbq_elem.cb = ucp_ep_flushed_slow_path_callback; req->send.flush.cbq_elem_on = 0; req->send.lane = UCP_NULL_LANE; req->send.uct.func = ucp_ep_flush_progress_pending; req->send.uct_comp.func = ucp_ep_flush_completion; req->send.uct_comp.count = ucp_ep_num_lanes(ep); ucp_ep_flush_progress(req); if (req->send.uct_comp.count == 0) { status = req->status; ucp_ep_disconnected(req); ucs_trace_req("ep %p: releasing flush request %p, returning status %s", ep, req, ucs_status_string(status)); ucs_mpool_put(req); return UCS_STATUS_PTR(status); } ucs_trace_req("ep %p: return inprogress flush request %p (%p)", ep, req, req + 1); return req + 1; }
static void ucp_wireup_ep_remote_connected(ucp_ep_h ep) { ucp_worker_h worker = ep->worker; ucp_rsc_index_t rsc_index; ucp_lane_index_t lane; ucs_trace("ep %p: remote connected", ep); for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { rsc_index = ucp_ep_get_rsc_index(ep, lane); if (ucp_worker_is_tl_p2p(worker, rsc_index)) { ucp_stub_ep_remote_connected(ep->uct_eps[lane]); } } }
static void ucp_ep_destory_uct_eps(ucp_ep_h ep) { ucp_lane_index_t lane; uct_ep_h uct_ep; for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { uct_ep = ep->uct_eps[lane]; if (uct_ep == NULL) { continue; } uct_ep_pending_purge(uct_ep, ucp_request_release_pending_send, NULL); ucs_debug("destroy ep %p lane %d uct_ep %p", ep, lane, uct_ep); uct_ep_destroy(uct_ep); } }
static ucs_status_t ucp_wireup_connect_local(ucp_ep_h ep, const uint8_t *tli, unsigned address_count, const ucp_address_entry_t *address_list) { ucp_worker_h worker = ep->worker; const ucp_address_entry_t *address; ucp_rsc_index_t rsc_index; ucp_lane_index_t lane, amo_index; ucs_status_t status; ucp_md_map_t UCS_V_UNUSED md_map; ucs_trace("ep %p: connect local transports", ep); for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { rsc_index = ucp_ep_get_rsc_index(ep, lane); if (!ucp_worker_is_tl_p2p(worker, rsc_index)) { continue; } address = &address_list[tli[lane]]; ucs_assert(address->tl_addr_len > 0); /* Check that if the lane is used for RMA/AMO, destination md index matches */ md_map = ucp_lane_map_get_lane(ucp_ep_config(ep)->key.rma_lane_map, lane); ucs_assertv((md_map == 0) || (md_map == UCS_BIT(address->md_index)), "lane=%d ai=%d md_map=0x%x md_index=%d", lane, tli[lane], md_map, address->md_index); amo_index = ucp_ep_get_amo_lane_index(&ucp_ep_config(ep)->key, lane); if (amo_index != UCP_NULL_LANE) { md_map = ucp_lane_map_get_lane(ucp_ep_config(ep)->key.amo_lane_map, amo_index); ucs_assertv((md_map == 0) || (md_map == UCS_BIT(address->md_index)), "lane=%d ai=%d md_map=0x%x md_index=%d", lane, tli[lane], md_map, address->md_index); } status = uct_ep_connect_to_ep(ep->uct_eps[lane], address->dev_addr, address->ep_addr); if (status != UCS_OK) { return status; } } return UCS_OK; }
ucs_status_t ucp_ep_flush(ucp_ep_h ep) { ucp_lane_index_t lane; ucs_status_t status; for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { for (;;) { status = uct_ep_flush(ep->uct_eps[lane], 0, NULL); if (status == UCS_OK) { break; } else if ((status != UCS_INPROGRESS) && (status != UCS_ERR_NO_RESOURCE)) { return status; } ucp_worker_progress(ep->worker); } } return UCS_OK; }
static ucs_status_t ucp_address_pack_ep_address(ucp_ep_h ep, ucp_rsc_index_t tl_index, uct_ep_addr_t *addr) { ucp_lane_index_t lane; for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { if (ucp_ep_get_rsc_index(ep, lane) == tl_index) { /* * If this is a wireup endpoint, it will return the underlying next_ep * address, and the length will be correct because the resource index * is of the next_ep. */ return uct_ep_get_address(ep->uct_eps[lane], addr); } } ucs_bug("provided ucp_ep without required transport"); return UCS_ERR_INVALID_ADDR; }
void ucp_ep_destroy_internal(ucp_ep_h ep, const char *message) { ucp_lane_index_t lane; uct_ep_h uct_ep; ucs_debug("destroy ep %p%s", ep, message); for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { uct_ep = ep->uct_eps[lane]; if (uct_ep == NULL) { continue; } uct_ep_pending_purge(uct_ep, ucp_destroyed_ep_pending_purge, ep); ucs_debug("destroy ep %p lane[%d]=%p", ep, lane, uct_ep); uct_ep_destroy(uct_ep); } ucs_free(ep); }
ucs_status_t ucp_wireup_send_request(ucp_ep_h ep) { ucp_worker_h worker = ep->worker; ucp_rsc_index_t rsc_tli[UCP_MAX_LANES]; ucp_rsc_index_t rsc_index; uint64_t tl_bitmap = 0; ucp_lane_index_t lane; ucs_status_t status; if (ep->flags & UCP_EP_FLAG_CONNECT_REQ_SENT) { return UCS_OK; } ucs_assert_always(!ucp_ep_is_stub(ep)); for (lane = 0; lane < UCP_MAX_LANES; ++lane) { if (lane < ucp_ep_num_lanes(ep)) { rsc_index = ucp_ep_get_rsc_index(ep, lane); rsc_tli[lane] = ucp_worker_is_tl_p2p(worker, rsc_index) ? rsc_index : UCP_NULL_RESOURCE; tl_bitmap |= UCS_BIT(rsc_index); } else { rsc_tli[lane] = UCP_NULL_RESOURCE; } } /* TODO make sure such lane would exist */ rsc_index = ucp_stub_ep_get_aux_rsc_index( ep->uct_eps[ucp_ep_get_wireup_msg_lane(ep)]); if (rsc_index != UCP_NULL_RESOURCE) { tl_bitmap |= UCS_BIT(rsc_index); } ucs_debug("ep %p: send wireup request (flags=0x%x)", ep, ep->flags); status = ucp_wireup_msg_send(ep, UCP_WIREUP_MSG_REQUEST, tl_bitmap, rsc_tli); ep->flags |= UCP_EP_FLAG_CONNECT_REQ_SENT; return status; }
ucs_status_t ucp_wireup_init_lanes(ucp_ep_h ep, unsigned address_count, const ucp_address_entry_t *address_list, uint8_t *addr_indices) { ucp_worker_h worker = ep->worker; ucp_ep_config_key_t key; uint16_t new_cfg_index; ucp_lane_index_t lane; ucs_status_t status; uint8_t conn_flag; char str[32]; ucs_trace("ep %p: initialize lanes", ep); status = ucp_wireup_select_lanes(ep, address_count, address_list, addr_indices, &key); if (status != UCS_OK) { goto err; } key.reachable_md_map |= ucp_ep_config(ep)->key.reachable_md_map; new_cfg_index = ucp_worker_get_ep_config(worker, &key); if ((ep->cfg_index == new_cfg_index)) { return UCS_OK; /* No change */ } if ((ep->cfg_index != 0) && !ucp_ep_is_stub(ep)) { /* * TODO handle a case where we have to change lanes and reconfigure the ep: * * - if we already have uct ep connected to an address - move it to the new lane index * - if we don't yet have connection to an address - create it * - if an existing lane is not connected anymore - delete it (possibly) * - if the configuration has changed - replay all pending operations on all lanes - * need that every pending callback would return, in case of failure, the number * of lane it wants to be queued on. */ ucs_debug("cannot reconfigure ep %p from [%d] to [%d]", ep, ep->cfg_index, new_cfg_index); ucp_wireup_print_config(worker->context, &ucp_ep_config(ep)->key, "old", NULL); ucp_wireup_print_config(worker->context, &key, "new", NULL); ucs_fatal("endpoint reconfiguration not supported yet"); } ep->cfg_index = new_cfg_index; ep->am_lane = key.am_lane; snprintf(str, sizeof(str), "ep %p", ep); ucp_wireup_print_config(worker->context, &ucp_ep_config(ep)->key, str, addr_indices); ucs_trace("ep %p: connect lanes", ep); /* establish connections on all underlying endpoints */ conn_flag = UCP_EP_FLAG_LOCAL_CONNECTED; for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { status = ucp_wireup_connect_lane(ep, lane, address_count, address_list, addr_indices[lane]); if (status != UCS_OK) { goto err; } if (ucp_worker_is_tl_p2p(worker, ucp_ep_get_rsc_index(ep, lane))) { conn_flag = 0; /* If we have a p2p transport, we're not connected */ } } ep->flags |= conn_flag; return UCS_OK; err: for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { if (ep->uct_eps[lane] != NULL) { uct_ep_destroy(ep->uct_eps[lane]); ep->uct_eps[lane] = NULL; } } return status; }
static void ucp_wireup_process_request(ucp_worker_h worker, const ucp_wireup_msg_t *msg, uint64_t uuid, const char *peer_name, unsigned address_count, const ucp_address_entry_t *address_list) { ucp_ep_h ep = ucp_worker_ep_find(worker, uuid); ucp_rsc_index_t rsc_tli[UCP_MAX_LANES]; uint8_t addr_indices[UCP_MAX_LANES]; ucp_lane_index_t lane, remote_lane; ucp_rsc_index_t rsc_index; ucs_status_t status; uint64_t tl_bitmap = 0; ucs_trace("ep %p: got wireup request from %s", ep, peer_name); /* Create a new endpoint if does not exist */ if (ep == NULL) { status = ucp_ep_new(worker, uuid, peer_name, "remote-request", &ep); if (status != UCS_OK) { return; } } /* Initialize lanes (possible destroy existing lanes) */ status = ucp_wireup_init_lanes(ep, address_count, address_list, addr_indices); if (status != UCS_OK) { return; } /* Connect p2p addresses to remote endpoint */ if (!(ep->flags & UCP_EP_FLAG_LOCAL_CONNECTED)) { status = ucp_wireup_connect_local(ep, addr_indices, address_count, address_list); if (status != UCS_OK) { return; } ep->flags |= UCP_EP_FLAG_LOCAL_CONNECTED; /* Construct the list that tells the remote side with which address we * have connected to each of its lanes. */ memset(rsc_tli, -1, sizeof(rsc_tli)); for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { rsc_index = ucp_ep_get_rsc_index(ep, lane); for (remote_lane = 0; remote_lane < UCP_MAX_LANES; ++remote_lane) { /* If 'lane' has connected to 'remote_lane' ... */ if (addr_indices[lane] == msg->tli[remote_lane]) { ucs_assert(ucp_worker_is_tl_p2p(worker, rsc_index)); rsc_tli[remote_lane] = rsc_index; tl_bitmap |= UCS_BIT(rsc_index); } } } ucs_trace("ep %p: sending wireup reply", ep); status = ucp_wireup_msg_send(ep, UCP_WIREUP_MSG_REPLY, tl_bitmap, rsc_tli); if (status != UCS_OK) { return; } } }