static ucs_status_t uct_perf_test_setup_endpoints(ucx_perf_context_t *perf) { const size_t buffer_size = 2048; ucx_perf_ep_info_t info, *remote_info; unsigned group_size, i, group_index; uct_device_addr_t *dev_addr; uct_iface_addr_t *iface_addr; uct_ep_addr_t *ep_addr; uct_iface_attr_t iface_attr; uct_md_attr_t md_attr; void *rkey_buffer; ucs_status_t status; struct iovec vec[5]; void *buffer; void *req; buffer = malloc(buffer_size); if (buffer == NULL) { ucs_error("Failed to allocate RTE buffer"); status = UCS_ERR_NO_MEMORY; goto err; } status = uct_iface_query(perf->uct.iface, &iface_attr); if (status != UCS_OK) { ucs_error("Failed to uct_iface_query: %s", ucs_status_string(status)); goto err_free; } status = uct_md_query(perf->uct.md, &md_attr); if (status != UCS_OK) { ucs_error("Failed to uct_md_query: %s", ucs_status_string(status)); goto err_free; } info.rkey_size = md_attr.rkey_packed_size; info.uct.dev_addr_len = iface_attr.device_addr_len; info.uct.iface_addr_len = iface_attr.iface_addr_len; info.uct.ep_addr_len = iface_attr.ep_addr_len; info.recv_buffer = (uintptr_t)perf->recv_buffer; rkey_buffer = buffer; dev_addr = (void*)rkey_buffer + info.rkey_size; iface_addr = (void*)dev_addr + info.uct.dev_addr_len; ep_addr = (void*)iface_addr + info.uct.iface_addr_len; ucs_assert_always((void*)ep_addr + info.uct.ep_addr_len <= buffer + buffer_size); status = uct_iface_get_device_address(perf->uct.iface, dev_addr); if (status != UCS_OK) { ucs_error("Failed to uct_iface_get_device_address: %s", ucs_status_string(status)); goto err_free; } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { status = uct_iface_get_address(perf->uct.iface, iface_addr); if (status != UCS_OK) { ucs_error("Failed to uct_iface_get_address: %s", ucs_status_string(status)); goto err_free; } } status = uct_md_mkey_pack(perf->uct.md, perf->uct.recv_mem.memh, rkey_buffer); if (status != UCS_OK) { ucs_error("Failed to uct_rkey_pack: %s", ucs_status_string(status)); goto err_free; } group_size = rte_call(perf, group_size); group_index = rte_call(perf, group_index); perf->uct.peers = calloc(group_size, sizeof(*perf->uct.peers)); if (perf->uct.peers == NULL) { goto err_free; } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { for (i = 0; i < group_size; ++i) { if (i == group_index) { continue; } status = uct_ep_create(perf->uct.iface, &perf->uct.peers[i].ep); if (status != UCS_OK) { ucs_error("Failed to uct_ep_create: %s", ucs_status_string(status)); goto err_destroy_eps; } status = uct_ep_get_address(perf->uct.peers[i].ep, ep_addr); if (status != UCS_OK) { ucs_error("Failed to uct_ep_get_address: %s", ucs_status_string(status)); goto err_destroy_eps; } } } vec[0].iov_base = &info; vec[0].iov_len = sizeof(info); vec[1].iov_base = buffer; vec[1].iov_len = info.rkey_size + info.uct.dev_addr_len + info.uct.iface_addr_len + info.uct.ep_addr_len; rte_call(perf, post_vec, vec, 2, &req); rte_call(perf, exchange_vec, req); for (i = 0; i < group_size; ++i) { if (i == group_index) { continue; } rte_call(perf, recv, i, buffer, buffer_size, req); remote_info = buffer; rkey_buffer = remote_info + 1; dev_addr = (void*)rkey_buffer + remote_info->rkey_size; iface_addr = (void*)dev_addr + remote_info->uct.dev_addr_len; ep_addr = (void*)iface_addr + remote_info->uct.iface_addr_len; perf->uct.peers[i].remote_addr = remote_info->recv_buffer; status = uct_rkey_unpack(rkey_buffer, &perf->uct.peers[i].rkey); if (status != UCS_OK) { ucs_error("Failed to uct_rkey_unpack: %s", ucs_status_string(status)); goto err_destroy_eps; } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { status = uct_ep_connect_to_ep(perf->uct.peers[i].ep, dev_addr, ep_addr); } else if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { status = uct_ep_create_connected(perf->uct.iface, dev_addr, iface_addr, &perf->uct.peers[i].ep); } else { status = UCS_ERR_UNSUPPORTED; } if (status != UCS_OK) { ucs_error("Failed to connect endpoint: %s", ucs_status_string(status)); goto err_destroy_eps; } } uct_perf_iface_flush_b(perf); free(buffer); rte_call(perf, barrier); return UCS_OK; err_destroy_eps: for (i = 0; i < group_size; ++i) { if (perf->uct.peers[i].rkey.type != NULL) { uct_rkey_release(&perf->uct.peers[i].rkey); } if (perf->uct.peers[i].ep != NULL) { uct_ep_destroy(perf->uct.peers[i].ep); } } free(perf->uct.peers); err_free: free(buffer); err: return status; }
static ucs_status_t uct_perf_test_setup_endpoints(ucx_perf_context_t *perf) { unsigned group_size, i, group_index; uct_device_addr_t *dev_addr; uct_iface_addr_t *iface_addr; uct_ep_addr_t *ep_addr; uct_iface_attr_t iface_attr; uct_pd_attr_t pd_attr; unsigned long va; void *rkey_buffer; ucs_status_t status; struct iovec vec[5]; void *req; status = uct_iface_query(perf->uct.iface, &iface_attr); if (status != UCS_OK) { ucs_error("Failed to uct_iface_query: %s", ucs_status_string(status)); goto err; } status = uct_pd_query(perf->uct.pd, &pd_attr); if (status != UCS_OK) { ucs_error("Failed to uct_pd_query: %s", ucs_status_string(status)); goto err; } dev_addr = calloc(1, iface_attr.device_addr_len); iface_addr = calloc(1, iface_attr.iface_addr_len); ep_addr = calloc(1, iface_attr.ep_addr_len); rkey_buffer = calloc(1, pd_attr.rkey_packed_size); if ((iface_addr == NULL) || (ep_addr == NULL) || (rkey_buffer == NULL)) { goto err_free; } status = uct_iface_get_device_address(perf->uct.iface, dev_addr); if (status != UCS_OK) { ucs_error("Failed to uct_iface_get_device_address: %s", ucs_status_string(status)); goto err_free; } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { status = uct_iface_get_address(perf->uct.iface, iface_addr); if (status != UCS_OK) { ucs_error("Failed to uct_iface_get_address: %s", ucs_status_string(status)); goto err_free; } } status = uct_pd_mkey_pack(perf->uct.pd, perf->uct.recv_mem.memh, rkey_buffer); if (status != UCS_OK) { ucs_error("Failed to uct_rkey_pack: %s", ucs_status_string(status)); goto err_free; } group_size = rte_call(perf, group_size); group_index = rte_call(perf, group_index); perf->uct.peers = calloc(group_size, sizeof(*perf->uct.peers)); if (perf->uct.peers == NULL) { goto err_free; } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { for (i = 0; i < group_size; ++i) { if (i == group_index) { continue; } status = uct_ep_create(perf->uct.iface, &perf->uct.peers[i].ep); if (status != UCS_OK) { ucs_error("Failed to uct_ep_create: %s", ucs_status_string(status)); goto err_destroy_eps; } status = uct_ep_get_address(perf->uct.peers[i].ep, ep_addr); if (status != UCS_OK) { ucs_error("Failed to uct_ep_get_address: %s", ucs_status_string(status)); goto err_destroy_eps; } } } va = (uintptr_t)perf->recv_buffer; vec[0].iov_base = &va; vec[0].iov_len = sizeof(va); vec[1].iov_base = rkey_buffer; vec[1].iov_len = pd_attr.rkey_packed_size; vec[2].iov_base = dev_addr; vec[2].iov_len = iface_attr.device_addr_len; vec[3].iov_base = iface_addr; vec[3].iov_len = iface_attr.iface_addr_len; vec[4].iov_base = ep_addr; vec[4].iov_len = iface_attr.ep_addr_len; rte_call(perf, post_vec, vec, 5, &req); rte_call(perf, exchange_vec, req); for (i = 0; i < group_size; ++i) { if (i == group_index) { continue; } vec[0].iov_base = &va; vec[0].iov_len = sizeof(va); vec[1].iov_base = rkey_buffer; vec[1].iov_len = pd_attr.rkey_packed_size; vec[2].iov_base = dev_addr; vec[2].iov_len = iface_attr.device_addr_len; vec[3].iov_base = iface_addr; vec[3].iov_len = iface_attr.iface_addr_len; vec[4].iov_base = ep_addr; vec[4].iov_len = iface_attr.ep_addr_len; rte_call(perf, recv_vec, i, vec, 5, req); perf->uct.peers[i].remote_addr = va; status = uct_rkey_unpack(rkey_buffer, &perf->uct.peers[i].rkey); if (status != UCS_OK) { ucs_error("Failed to uct_rkey_unpack: %s", ucs_status_string(status)); return status; } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { status = uct_ep_connect_to_ep(perf->uct.peers[i].ep, dev_addr, ep_addr); } else if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { status = uct_ep_create_connected(perf->uct.iface, dev_addr, iface_addr, &perf->uct.peers[i].ep); } else { status = UCS_ERR_UNSUPPORTED; } if (status != UCS_OK) { ucs_error("Failed to connect endpoint: %s", ucs_status_string(status)); goto err_destroy_eps; } } uct_perf_iface_flush_b(perf); rte_call(perf, barrier); free(ep_addr); free(iface_addr); free(dev_addr); free(rkey_buffer); return UCS_OK; err_destroy_eps: for (i = 0; i < group_size; ++i) { if (perf->uct.peers[i].rkey.type != NULL) { uct_rkey_release(&perf->uct.peers[i].rkey); } if (perf->uct.peers[i].ep != NULL) { uct_ep_destroy(perf->uct.peers[i].ep); } } free(perf->uct.peers); err_free: free(ep_addr); free(iface_addr); free(dev_addr); free(rkey_buffer); err: return status; }
ucs_status_t ucp_ep_rkey_unpack(ucp_ep_h ep, void *rkey_buffer, ucp_rkey_h *rkey_p) { unsigned remote_md_index, remote_md_gap; unsigned rkey_index; unsigned md_count; ucs_status_t status; ucp_rkey_h rkey; uint8_t md_size; ucp_md_map_t md_map; void *p; /* Count the number of remote MDs in the rkey buffer */ p = rkey_buffer; /* Read remote MD map */ md_map = *(ucp_md_map_t*)p; ucs_trace("unpacking rkey with md_map 0x%x", md_map); if (md_map == 0) { /* Dummy key return ok */ *rkey_p = &ucp_mem_dummy_rkey; return UCS_OK; } md_count = ucs_count_one_bits(md_map); p += sizeof(ucp_md_map_t); /* Allocate rkey handle which holds UCT rkeys for all remote MDs. * We keep all of them to handle a future transport switch. */ rkey = ucs_malloc(sizeof(*rkey) + (sizeof(rkey->uct[0]) * md_count), "ucp_rkey"); if (rkey == NULL) { status = UCS_ERR_NO_MEMORY; goto err; } rkey->md_map = 0; remote_md_index = 0; /* Index of remote MD */ rkey_index = 0; /* Index of the rkey in the array */ /* Unpack rkey of each UCT MD */ while (md_map > 0) { md_size = *((uint8_t*)p++); /* Use bit operations to iterate through the indices of the remote MDs * as provided in the md_map. md_map always holds a bitmap of MD indices * that remain to be used. Every time we find the "gap" until the next * valid MD index using ffs operation. If some rkeys cannot be unpacked, * we remove them from the local map. */ remote_md_gap = ucs_ffs64(md_map); /* Find the offset for next MD index */ remote_md_index += remote_md_gap; /* Calculate next index of remote MD*/ md_map >>= remote_md_gap; /* Remove the gap from the map */ ucs_assert(md_map & 1); ucs_assert_always(remote_md_index <= UCP_MD_INDEX_BITS); /* Unpack only reachable rkeys */ if (UCS_BIT(remote_md_index) & ucp_ep_config(ep)->key.reachable_md_map) { ucs_assert(rkey_index < md_count); status = uct_rkey_unpack(p, &rkey->uct[rkey_index]); if (status != UCS_OK) { ucs_error("Failed to unpack remote key from remote md[%d]: %s", remote_md_index, ucs_status_string(status)); goto err_destroy; } ucs_trace("rkey[%d] for remote md %d is 0x%lx", rkey_index, remote_md_index, rkey->uct[rkey_index].rkey); rkey->md_map |= UCS_BIT(remote_md_index); ++rkey_index; } ++remote_md_index; md_map >>= 1; p += md_size; } if (rkey->md_map == 0) { ucs_debug("The unpacked rkey from the destination is unreachable"); status = UCS_ERR_UNREACHABLE; goto err_destroy; } *rkey_p = rkey; return UCS_OK; err_destroy: ucp_rkey_destroy(rkey); err: return status; }
ucs_status_t ucp_ep_rkey_unpack(ucp_ep_h ep, void *rkey_buffer, ucp_rkey_h *rkey_p) { unsigned remote_pd_index, remote_pd_gap; unsigned rkey_index; unsigned pd_count; ucs_status_t status; ucp_rkey_h rkey; uint8_t pd_size; uint64_t pd_map; void *p; /* Count the number of remote PDs in the rkey buffer */ p = rkey_buffer; /* Read remote PD map */ pd_map = *(uint64_t*)p; pd_count = ucs_count_one_bits(pd_map); p += sizeof(uint64_t); /* Allocate rkey handle which holds UCT rkeys for all remote PDs. * We keep all of them to handle a future transport switch. */ rkey = ucs_malloc(sizeof(*rkey) + (sizeof(rkey->uct[0]) * pd_count), "ucp_rkey"); if (rkey == NULL) { status = UCS_ERR_NO_MEMORY; goto err; } rkey->pd_map = 0; remote_pd_index = 0; /* Index of remote PD */ rkey_index = 0; /* Index of the rkey in the array */ /* Unpack rkey of each UCT PD */ ucs_trace("unpacking rkey with pd_map 0x%"PRIx64, pd_map); while (pd_map > 0) { pd_size = *((uint8_t*)p++); /* Use bit operations to iterate through the indices of the remote PDs * as provided in the pd_map. pd_map always holds a bitmap of PD indices * that remain to be used. Every time we find the "gap" until the next * valid PD index using ffs operation. If some rkeys cannot be unpacked, * we remove them from the local map. */ remote_pd_gap = ucs_ffs64(pd_map); /* Find the offset for next PD index */ remote_pd_index += remote_pd_gap; /* Calculate next index of remote PD*/ pd_map >>= remote_pd_gap; /* Remove the gap from the map */ ucs_assert(pd_map & 1); /* Unpack only reachable rkeys */ if (ep->uct.reachable_pds & UCS_BIT(remote_pd_index)) { ucs_assert(rkey_index < pd_count); status = uct_rkey_unpack(p, &rkey->uct[rkey_index]); if (status != UCS_OK) { ucs_error("Failed to unpack remote key from remote pd[%d]: %s", remote_pd_index, ucs_status_string(status)); goto err_destroy; } ucs_trace("rkey[%d] for remote pd %d is 0x%lx", rkey_index, remote_pd_index, rkey->uct[rkey_index].rkey); rkey->pd_map |= UCS_BIT(remote_pd_index); ++rkey_index; } ++remote_pd_index; pd_map >>= 1; p += pd_size; } *rkey_p = rkey; return UCS_OK; err_destroy: ucp_rkey_destroy(rkey); err: return status; }