ucs_status_t ucp_rkey_pack(ucp_context_h context, ucp_mem_h memh, void **rkey_buffer_p, size_t *size_p) { unsigned pd_index, uct_memh_index; void *rkey_buffer, *p; size_t size, pd_size; ucs_trace("packing rkeys for buffer %p memh %p pd_map 0x%"PRIx64, memh->address, memh, memh->pd_map); size = sizeof(uint64_t); for (pd_index = 0; pd_index < context->num_pds; ++pd_index) { size += sizeof(uint8_t); pd_size = context->pd_attrs[pd_index].rkey_packed_size; ucs_assert_always(pd_size < UINT8_MAX); size += pd_size; } rkey_buffer = ucs_malloc(size, "ucp_rkey_buffer"); if (rkey_buffer == NULL) { return UCS_ERR_NO_MEMORY; } p = rkey_buffer; /* Write the PD map */ *(uint64_t*)p = memh->pd_map; p += sizeof(uint64_t); /* Write both size and rkey_buffer for each UCT rkey */ uct_memh_index = 0; for (pd_index = 0; pd_index < context->num_pds; ++pd_index) { if (!(memh->pd_map & UCS_BIT(pd_index))) { continue; } pd_size = context->pd_attrs[pd_index].rkey_packed_size; *((uint8_t*)p++) = pd_size; uct_pd_mkey_pack(context->pds[pd_index], memh->uct[uct_memh_index], p); ++uct_memh_index; p += pd_size; } *rkey_buffer_p = rkey_buffer; *size_p = size; return UCS_OK; }
ucs_status_t ucp_rkey_pack(ucp_context_h context, ucp_mem_h memh, void **rkey_buffer_p, size_t *size_p) { unsigned pd_index, uct_memh_index; void *rkey_buffer, *p; size_t size, pd_size; ucs_status_t status; char UCS_V_UNUSED buf[128]; ucs_trace("packing rkeys for buffer %p memh %p pd_map 0x%x", memh->address, memh, memh->pd_map); if (memh->length == 0) { /* dummy memh, return dummy key */ *rkey_buffer_p = &ucp_mem_dummy_buffer; *size_p = sizeof(ucp_mem_dummy_buffer); return UCS_OK; } size = sizeof(ucp_pd_map_t); for (pd_index = 0; pd_index < context->num_pds; ++pd_index) { size += sizeof(uint8_t); pd_size = context->pd_attrs[pd_index].rkey_packed_size; ucs_assert_always(pd_size < UINT8_MAX); size += pd_size; } rkey_buffer = ucs_malloc(size, "ucp_rkey_buffer"); if (rkey_buffer == NULL) { status = UCS_ERR_NO_MEMORY; goto err; } p = rkey_buffer; /* Write the PD map */ *(ucp_pd_map_t*)p = memh->pd_map; p += sizeof(ucp_pd_map_t); /* Write both size and rkey_buffer for each UCT rkey */ uct_memh_index = 0; for (pd_index = 0; pd_index < context->num_pds; ++pd_index) { if (!(memh->pd_map & UCS_BIT(pd_index))) { continue; } pd_size = context->pd_attrs[pd_index].rkey_packed_size; *((uint8_t*)p++) = pd_size; uct_pd_mkey_pack(context->pds[pd_index], memh->uct[uct_memh_index], p); ucs_trace("rkey[%d]=%s for pd[%d]=%s", uct_memh_index, ucs_log_dump_hex(p, pd_size, buf, sizeof(buf)), pd_index, context->pd_rscs[pd_index].pd_name); ++uct_memh_index; p += pd_size; } if (uct_memh_index == 0) { status = UCS_ERR_UNSUPPORTED; goto err_destroy; } *rkey_buffer_p = rkey_buffer; *size_p = size; return UCS_OK; err_destroy: ucs_free(rkey_buffer); err: return status; }
static ucs_status_t uct_perf_test_setup_endpoints(ucx_perf_context_t *perf) { unsigned group_size, i, group_index; uct_device_addr_t *dev_addr; uct_iface_addr_t *iface_addr; uct_ep_addr_t *ep_addr; uct_iface_attr_t iface_attr; uct_pd_attr_t pd_attr; unsigned long va; void *rkey_buffer; ucs_status_t status; struct iovec vec[5]; void *req; status = uct_iface_query(perf->uct.iface, &iface_attr); if (status != UCS_OK) { ucs_error("Failed to uct_iface_query: %s", ucs_status_string(status)); goto err; } status = uct_pd_query(perf->uct.pd, &pd_attr); if (status != UCS_OK) { ucs_error("Failed to uct_pd_query: %s", ucs_status_string(status)); goto err; } dev_addr = calloc(1, iface_attr.device_addr_len); iface_addr = calloc(1, iface_attr.iface_addr_len); ep_addr = calloc(1, iface_attr.ep_addr_len); rkey_buffer = calloc(1, pd_attr.rkey_packed_size); if ((iface_addr == NULL) || (ep_addr == NULL) || (rkey_buffer == NULL)) { goto err_free; } status = uct_iface_get_device_address(perf->uct.iface, dev_addr); if (status != UCS_OK) { ucs_error("Failed to uct_iface_get_device_address: %s", ucs_status_string(status)); goto err_free; } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { status = uct_iface_get_address(perf->uct.iface, iface_addr); if (status != UCS_OK) { ucs_error("Failed to uct_iface_get_address: %s", ucs_status_string(status)); goto err_free; } } status = uct_pd_mkey_pack(perf->uct.pd, perf->uct.recv_mem.memh, rkey_buffer); if (status != UCS_OK) { ucs_error("Failed to uct_rkey_pack: %s", ucs_status_string(status)); goto err_free; } group_size = rte_call(perf, group_size); group_index = rte_call(perf, group_index); perf->uct.peers = calloc(group_size, sizeof(*perf->uct.peers)); if (perf->uct.peers == NULL) { goto err_free; } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { for (i = 0; i < group_size; ++i) { if (i == group_index) { continue; } status = uct_ep_create(perf->uct.iface, &perf->uct.peers[i].ep); if (status != UCS_OK) { ucs_error("Failed to uct_ep_create: %s", ucs_status_string(status)); goto err_destroy_eps; } status = uct_ep_get_address(perf->uct.peers[i].ep, ep_addr); if (status != UCS_OK) { ucs_error("Failed to uct_ep_get_address: %s", ucs_status_string(status)); goto err_destroy_eps; } } } va = (uintptr_t)perf->recv_buffer; vec[0].iov_base = &va; vec[0].iov_len = sizeof(va); vec[1].iov_base = rkey_buffer; vec[1].iov_len = pd_attr.rkey_packed_size; vec[2].iov_base = dev_addr; vec[2].iov_len = iface_attr.device_addr_len; vec[3].iov_base = iface_addr; vec[3].iov_len = iface_attr.iface_addr_len; vec[4].iov_base = ep_addr; vec[4].iov_len = iface_attr.ep_addr_len; rte_call(perf, post_vec, vec, 5, &req); rte_call(perf, exchange_vec, req); for (i = 0; i < group_size; ++i) { if (i == group_index) { continue; } vec[0].iov_base = &va; vec[0].iov_len = sizeof(va); vec[1].iov_base = rkey_buffer; vec[1].iov_len = pd_attr.rkey_packed_size; vec[2].iov_base = dev_addr; vec[2].iov_len = iface_attr.device_addr_len; vec[3].iov_base = iface_addr; vec[3].iov_len = iface_attr.iface_addr_len; vec[4].iov_base = ep_addr; vec[4].iov_len = iface_attr.ep_addr_len; rte_call(perf, recv_vec, i, vec, 5, req); perf->uct.peers[i].remote_addr = va; status = uct_rkey_unpack(rkey_buffer, &perf->uct.peers[i].rkey); if (status != UCS_OK) { ucs_error("Failed to uct_rkey_unpack: %s", ucs_status_string(status)); return status; } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { status = uct_ep_connect_to_ep(perf->uct.peers[i].ep, dev_addr, ep_addr); } else if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { status = uct_ep_create_connected(perf->uct.iface, dev_addr, iface_addr, &perf->uct.peers[i].ep); } else { status = UCS_ERR_UNSUPPORTED; } if (status != UCS_OK) { ucs_error("Failed to connect endpoint: %s", ucs_status_string(status)); goto err_destroy_eps; } } uct_perf_iface_flush_b(perf); rte_call(perf, barrier); free(ep_addr); free(iface_addr); free(dev_addr); free(rkey_buffer); return UCS_OK; err_destroy_eps: for (i = 0; i < group_size; ++i) { if (perf->uct.peers[i].rkey.type != NULL) { uct_rkey_release(&perf->uct.peers[i].rkey); } if (perf->uct.peers[i].ep != NULL) { uct_ep_destroy(perf->uct.peers[i].ep); } } free(perf->uct.peers); err_free: free(ep_addr); free(iface_addr); free(dev_addr); free(rkey_buffer); err: return status; }