static void ucp_wireup_ep_remote_connected(ucp_ep_h ep) { ucp_worker_h worker = ep->worker; ucp_rsc_index_t rsc_index; ucp_lane_index_t lane; ucs_trace("ep %p: remote connected", ep); for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { rsc_index = ucp_ep_get_rsc_index(ep, lane); if (ucp_worker_is_tl_p2p(worker, rsc_index)) { ucp_stub_ep_remote_connected(ep->uct_eps[lane]); } } }
static ucs_status_t ucp_wireup_connect_local(ucp_ep_h ep, const uint8_t *tli, unsigned address_count, const ucp_address_entry_t *address_list) { ucp_worker_h worker = ep->worker; const ucp_address_entry_t *address; ucp_rsc_index_t rsc_index; ucp_lane_index_t lane, amo_index; ucs_status_t status; ucp_md_map_t UCS_V_UNUSED md_map; ucs_trace("ep %p: connect local transports", ep); for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { rsc_index = ucp_ep_get_rsc_index(ep, lane); if (!ucp_worker_is_tl_p2p(worker, rsc_index)) { continue; } address = &address_list[tli[lane]]; ucs_assert(address->tl_addr_len > 0); /* Check that if the lane is used for RMA/AMO, destination md index matches */ md_map = ucp_lane_map_get_lane(ucp_ep_config(ep)->key.rma_lane_map, lane); ucs_assertv((md_map == 0) || (md_map == UCS_BIT(address->md_index)), "lane=%d ai=%d md_map=0x%x md_index=%d", lane, tli[lane], md_map, address->md_index); amo_index = ucp_ep_get_amo_lane_index(&ucp_ep_config(ep)->key, lane); if (amo_index != UCP_NULL_LANE) { md_map = ucp_lane_map_get_lane(ucp_ep_config(ep)->key.amo_lane_map, amo_index); ucs_assertv((md_map == 0) || (md_map == UCS_BIT(address->md_index)), "lane=%d ai=%d md_map=0x%x md_index=%d", lane, tli[lane], md_map, address->md_index); } status = uct_ep_connect_to_ep(ep->uct_eps[lane], address->dev_addr, address->ep_addr); if (status != UCS_OK) { return status; } } return UCS_OK; }
static ucs_status_t ucp_address_pack_ep_address(ucp_ep_h ep, ucp_rsc_index_t tl_index, uct_ep_addr_t *addr) { ucp_lane_index_t lane; for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { if (ucp_ep_get_rsc_index(ep, lane) == tl_index) { /* * If this is a wireup endpoint, it will return the underlying next_ep * address, and the length will be correct because the resource index * is of the next_ep. */ return uct_ep_get_address(ep->uct_eps[lane], addr); } } ucs_bug("provided ucp_ep without required transport"); return UCS_ERR_INVALID_ADDR; }
ucs_status_t ucp_wireup_send_request(ucp_ep_h ep) { ucp_worker_h worker = ep->worker; ucp_rsc_index_t rsc_tli[UCP_MAX_LANES]; ucp_rsc_index_t rsc_index; uint64_t tl_bitmap = 0; ucp_lane_index_t lane; ucs_status_t status; if (ep->flags & UCP_EP_FLAG_CONNECT_REQ_SENT) { return UCS_OK; } ucs_assert_always(!ucp_ep_is_stub(ep)); for (lane = 0; lane < UCP_MAX_LANES; ++lane) { if (lane < ucp_ep_num_lanes(ep)) { rsc_index = ucp_ep_get_rsc_index(ep, lane); rsc_tli[lane] = ucp_worker_is_tl_p2p(worker, rsc_index) ? rsc_index : UCP_NULL_RESOURCE; tl_bitmap |= UCS_BIT(rsc_index); } else { rsc_tli[lane] = UCP_NULL_RESOURCE; } } /* TODO make sure such lane would exist */ rsc_index = ucp_stub_ep_get_aux_rsc_index( ep->uct_eps[ucp_ep_get_wireup_msg_lane(ep)]); if (rsc_index != UCP_NULL_RESOURCE) { tl_bitmap |= UCS_BIT(rsc_index); } ucs_debug("ep %p: send wireup request (flags=0x%x)", ep, ep->flags); status = ucp_wireup_msg_send(ep, UCP_WIREUP_MSG_REQUEST, tl_bitmap, rsc_tli); ep->flags |= UCP_EP_FLAG_CONNECT_REQ_SENT; return status; }
ucs_status_t ucp_wireup_init_lanes(ucp_ep_h ep, unsigned address_count, const ucp_address_entry_t *address_list, uint8_t *addr_indices) { ucp_worker_h worker = ep->worker; ucp_ep_config_key_t key; uint16_t new_cfg_index; ucp_lane_index_t lane; ucs_status_t status; uint8_t conn_flag; char str[32]; ucs_trace("ep %p: initialize lanes", ep); status = ucp_wireup_select_lanes(ep, address_count, address_list, addr_indices, &key); if (status != UCS_OK) { goto err; } key.reachable_md_map |= ucp_ep_config(ep)->key.reachable_md_map; new_cfg_index = ucp_worker_get_ep_config(worker, &key); if ((ep->cfg_index == new_cfg_index)) { return UCS_OK; /* No change */ } if ((ep->cfg_index != 0) && !ucp_ep_is_stub(ep)) { /* * TODO handle a case where we have to change lanes and reconfigure the ep: * * - if we already have uct ep connected to an address - move it to the new lane index * - if we don't yet have connection to an address - create it * - if an existing lane is not connected anymore - delete it (possibly) * - if the configuration has changed - replay all pending operations on all lanes - * need that every pending callback would return, in case of failure, the number * of lane it wants to be queued on. */ ucs_debug("cannot reconfigure ep %p from [%d] to [%d]", ep, ep->cfg_index, new_cfg_index); ucp_wireup_print_config(worker->context, &ucp_ep_config(ep)->key, "old", NULL); ucp_wireup_print_config(worker->context, &key, "new", NULL); ucs_fatal("endpoint reconfiguration not supported yet"); } ep->cfg_index = new_cfg_index; ep->am_lane = key.am_lane; snprintf(str, sizeof(str), "ep %p", ep); ucp_wireup_print_config(worker->context, &ucp_ep_config(ep)->key, str, addr_indices); ucs_trace("ep %p: connect lanes", ep); /* establish connections on all underlying endpoints */ conn_flag = UCP_EP_FLAG_LOCAL_CONNECTED; for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { status = ucp_wireup_connect_lane(ep, lane, address_count, address_list, addr_indices[lane]); if (status != UCS_OK) { goto err; } if (ucp_worker_is_tl_p2p(worker, ucp_ep_get_rsc_index(ep, lane))) { conn_flag = 0; /* If we have a p2p transport, we're not connected */ } } ep->flags |= conn_flag; return UCS_OK; err: for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { if (ep->uct_eps[lane] != NULL) { uct_ep_destroy(ep->uct_eps[lane]); ep->uct_eps[lane] = NULL; } } return status; }
static ucs_status_t ucp_wireup_connect_lane(ucp_ep_h ep, ucp_lane_index_t lane, unsigned address_count, const ucp_address_entry_t *address_list, unsigned addr_index) { ucp_worker_h worker = ep->worker; ucp_rsc_index_t rsc_index = ucp_ep_get_rsc_index(ep, lane); uct_iface_attr_t *iface_attr = &worker->iface_attrs[rsc_index]; uct_ep_h new_uct_ep; ucs_status_t status; /* * if the selected transport can be connected directly to the remote * interface, just create a connected UCT endpoint. */ if ((iface_attr->cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) && ((ep->uct_eps[lane] == NULL) || ucp_stub_ep_test(ep->uct_eps[lane]))) { /* create an endpoint connected to the remote interface */ ucs_assert(address_list[addr_index].tl_addr_len > 0); status = uct_ep_create_connected(worker->ifaces[rsc_index], address_list[addr_index].dev_addr, address_list[addr_index].iface_addr, &new_uct_ep); if (status != UCS_OK) { return status; } /* If ep already exists, it's a stub, and we need to update its next_ep * instead of replacing it. */ if (ep->uct_eps[lane] == NULL) { ucs_trace("ep %p: assign uct_ep[%d]=%p", ep, lane, new_uct_ep); ep->uct_eps[lane] = new_uct_ep; } else { ucs_trace("ep %p: assign set stub_ep[%d]=%p next to %p", ep, lane, ep->uct_eps[lane], new_uct_ep); ucp_stub_ep_set_next_ep(ep->uct_eps[lane], new_uct_ep); ucp_stub_ep_remote_connected(ep->uct_eps[lane]); } return UCS_OK; } /* * create a stub endpoint which will start connection establishment * protocol using an auxiliary transport. */ if (iface_attr->cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { /* If ep already exists, it's a stub, and we need to start auxiliary * wireup on that stub. */ if (ep->uct_eps[lane] == NULL) { ucs_trace("ep %p: create stub_ep[%d]=%p", ep, lane, ep->uct_eps[lane]); status = ucp_stub_ep_create(ep, &ep->uct_eps[lane]); if (status != UCS_OK) { return status; } } ucs_trace("ep %p: connect stub_ep[%d]=%p", ep, lane, ep->uct_eps[lane]); return ucp_stub_ep_connect(ep->uct_eps[lane], ucp_ep_get_rsc_index(ep, lane), lane == ucp_ep_get_wireup_msg_lane(ep), address_count, address_list); } return UCS_ERR_UNREACHABLE; }
static void ucp_wireup_process_request(ucp_worker_h worker, const ucp_wireup_msg_t *msg, uint64_t uuid, const char *peer_name, unsigned address_count, const ucp_address_entry_t *address_list) { ucp_ep_h ep = ucp_worker_ep_find(worker, uuid); ucp_rsc_index_t rsc_tli[UCP_MAX_LANES]; uint8_t addr_indices[UCP_MAX_LANES]; ucp_lane_index_t lane, remote_lane; ucp_rsc_index_t rsc_index; ucs_status_t status; uint64_t tl_bitmap = 0; ucs_trace("ep %p: got wireup request from %s", ep, peer_name); /* Create a new endpoint if does not exist */ if (ep == NULL) { status = ucp_ep_new(worker, uuid, peer_name, "remote-request", &ep); if (status != UCS_OK) { return; } } /* Initialize lanes (possible destroy existing lanes) */ status = ucp_wireup_init_lanes(ep, address_count, address_list, addr_indices); if (status != UCS_OK) { return; } /* Connect p2p addresses to remote endpoint */ if (!(ep->flags & UCP_EP_FLAG_LOCAL_CONNECTED)) { status = ucp_wireup_connect_local(ep, addr_indices, address_count, address_list); if (status != UCS_OK) { return; } ep->flags |= UCP_EP_FLAG_LOCAL_CONNECTED; /* Construct the list that tells the remote side with which address we * have connected to each of its lanes. */ memset(rsc_tli, -1, sizeof(rsc_tli)); for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { rsc_index = ucp_ep_get_rsc_index(ep, lane); for (remote_lane = 0; remote_lane < UCP_MAX_LANES; ++remote_lane) { /* If 'lane' has connected to 'remote_lane' ... */ if (addr_indices[lane] == msg->tli[remote_lane]) { ucs_assert(ucp_worker_is_tl_p2p(worker, rsc_index)); rsc_tli[remote_lane] = rsc_index; tl_bitmap |= UCS_BIT(rsc_index); } } } ucs_trace("ep %p: sending wireup reply", ep); status = ucp_wireup_msg_send(ep, UCP_WIREUP_MSG_REPLY, tl_bitmap, rsc_tli); if (status != UCS_OK) { return; } } }
int ucp_ep_is_stub(ucp_ep_h ep) { return ucp_ep_get_rsc_index(ep, 0) == UCP_NULL_RESOURCE; }