ucs_status_t ucp_ep_create_stub(ucp_worker_h worker, uint64_t dest_uuid, const char *message, ucp_ep_h *ep_p) { ucs_status_t status; ucp_ep_op_t optype; ucp_ep_h ep = NULL; status = ucp_ep_new(worker, dest_uuid, "??", message, &ep); if (status != UCS_OK) { goto err; } for (optype = 0; optype < UCP_EP_OP_LAST; ++optype) { status = ucp_stub_ep_create(ep, optype, 0, NULL, &ep->uct_eps[optype]); if (status != UCS_OK) { goto err_destroy_uct_eps; } } *ep_p = ep; return UCS_OK; err_destroy_uct_eps: for (optype = 0; optype < UCP_EP_OP_LAST; ++optype) { if (ep->uct_eps[optype] != NULL) { uct_ep_destroy(ep->uct_eps[optype]); } } ucp_ep_delete(ep); err: return status; }
void ucp_ep_wireup_stop(ucp_ep_h ep) { ucp_worker_h worker = ep->worker; ucs_trace_func("ep=%p", ep); if (ep->uct.next_ep != NULL) { while (uct_ep_flush(ep->uct.next_ep) != UCS_OK) { ucp_worker_progress(ep->worker); } uct_ep_destroy(ep->uct.next_ep); } if (ep->wireup_ep != NULL) { while (uct_ep_flush(ep->wireup_ep) != UCS_OK) { ucp_worker_progress(ep->worker); } uct_ep_destroy(ep->wireup_ep); } UCS_ASYNC_BLOCK(&worker->async); sglib_hashed_ucp_ep_t_delete(worker->ep_hash, ep); UCS_ASYNC_UNBLOCK(&worker->async); }
static void ucp_ep_destory_uct_eps(ucp_ep_h ep) { ucp_lane_index_t lane; uct_ep_h uct_ep; for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { uct_ep = ep->uct_eps[lane]; if (uct_ep == NULL) { continue; } uct_ep_pending_purge(uct_ep, ucp_request_release_pending_send, NULL); ucs_debug("destroy ep %p lane %d uct_ep %p", ep, lane, uct_ep); uct_ep_destroy(uct_ep); } }
static void ucp_ep_destory_uct_eps(ucp_ep_h ep) { uct_ep_h uct_ep; int optype; for (optype = 0; optype < UCP_EP_OP_LAST; ++optype) { if (!ucp_ep_is_op_primary(ep, optype)) { continue; } uct_ep = ep->uct_eps[optype]; uct_ep_pending_purge(uct_ep, ucp_ep_pending_req_release); ucs_debug("destroy ep %p op %d uct_ep %p", ep, optype, uct_ep); uct_ep_destroy(uct_ep); } }
static ucs_status_t ucp_dummy_ep_send_func(uct_ep_h uct_ep) { ucp_dummy_ep_t *dummy_ep = ucs_derived_of(uct_ep, ucp_dummy_ep_t); ucp_ep_h ep = dummy_ep->ep; /* * We switch the endpoint in this function (instead in wireup code) since * this is guaranteed to run from the main thread. */ sched_yield(); ucs_async_check_miss(&ep->worker->async); if (ep->state & UCP_EP_STATE_REMOTE_CONNECTED) { ep->uct.ep = ep->uct.next_ep; ep->uct.next_ep = NULL; uct_ep_destroy(&dummy_ep->super); } return UCS_ERR_NO_RESOURCE; }
void ucp_ep_destroy_internal(ucp_ep_h ep, const char *message) { ucp_lane_index_t lane; uct_ep_h uct_ep; ucs_debug("destroy ep %p%s", ep, message); for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { uct_ep = ep->uct_eps[lane]; if (uct_ep == NULL) { continue; } uct_ep_pending_purge(uct_ep, ucp_destroyed_ep_pending_purge, ep); ucs_debug("destroy ep %p lane[%d]=%p", ep, lane, uct_ep); uct_ep_destroy(uct_ep); } ucs_free(ep); }
ucs_status_t ucp_ep_create_stub(ucp_worker_h worker, uint64_t dest_uuid, const char *message, ucp_ep_h *ep_p) { ucs_status_t status; ucp_ep_config_key_t key; ucp_ep_h ep = NULL; status = ucp_ep_new(worker, dest_uuid, "??", message, &ep); if (status != UCS_OK) { goto err; } /* all operations will use the first lane, which is a stub endpoint */ memset(&key, 0, sizeof(key)); key.rma_lane_map = 1; key.amo_lane_map = 1; key.reachable_md_map = 0; /* TODO */ key.am_lane = 0; key.rndv_lane = 0; key.wireup_msg_lane = 0; key.lanes[0] = UCP_NULL_RESOURCE; key.num_lanes = 1; memset(key.amo_lanes, UCP_NULL_LANE, sizeof(key.amo_lanes)); ep->cfg_index = ucp_worker_get_ep_config(worker, &key); ep->am_lane = 0; status = ucp_stub_ep_create(ep, &ep->uct_eps[0]); if (status != UCS_OK) { goto err_destroy_uct_eps; } *ep_p = ep; return UCS_OK; err_destroy_uct_eps: uct_ep_destroy(ep->uct_eps[0]); ucp_ep_delete(ep); err: return status; }
static void uct_perf_test_cleanup_endpoints(ucx_perf_context_t *perf) { unsigned group_size, group_index, i; rte_call(perf, barrier); uct_iface_set_am_handler(perf->uct.iface, UCT_PERF_TEST_AM_ID, NULL, NULL, UCT_AM_CB_FLAG_SYNC); group_size = rte_call(perf, group_size); group_index = rte_call(perf, group_index); for (i = 0; i < group_size; ++i) { if (i != group_index) { uct_rkey_release(&perf->uct.peers[i].rkey); if (perf->uct.peers[i].ep) { uct_ep_destroy(perf->uct.peers[i].ep); } } } free(perf->uct.peers); }
ucs_status_t ucp_ep_wireup_start(ucp_ep_h ep, ucp_address_t *address) { ucp_worker_h worker = ep->worker; struct sockaddr *am_short_addr; ucp_rsc_index_t wireup_rsc_index; struct sockaddr *wireup_addr; uct_iface_attr_t *iface_attr; uct_iface_h iface; ucp_rsc_index_t dst_rsc_index, wireup_dst_rsc_index; ucp_rsc_index_t wireup_dst_pd_index; ucs_status_t status; UCS_ASYNC_BLOCK(&worker->async); ep->dest_uuid = ucp_address_uuid(address); sglib_hashed_ucp_ep_t_add(worker->ep_hash, ep); ucs_debug("connecting 0x%"PRIx64"->0x%"PRIx64, worker->uuid, ep->dest_uuid); /* * Select best transport for active messages */ status = ucp_pick_best_wireup(worker, address, ucp_am_short_score_func, &ep->uct.rsc_index, &dst_rsc_index, &ep->uct.dst_pd_index, &am_short_addr, &ep->uct.reachable_pds, "short_am"); if (status != UCS_OK) { ucs_error("No transport for short active message"); goto err; } iface = worker->ifaces[ep->uct.rsc_index]; iface_attr = &worker->iface_attrs[ep->uct.rsc_index]; /* * If the selected transport can be connected directly, do it. */ if (iface_attr->cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { status = uct_ep_create_connected(iface, am_short_addr, &ep->uct.next_ep); if (status != UCS_OK) { ucs_debug("failed to create ep"); goto err; } ep->state |= UCP_EP_STATE_LOCAL_CONNECTED; ucp_ep_remote_connected(ep); goto out; } /* * If we cannot connect the selected transport directly, select another * transport for doing the wireup. */ status = ucp_pick_best_wireup(worker, address, ucp_wireup_score_func, &wireup_rsc_index, &wireup_dst_rsc_index, &wireup_dst_pd_index, &wireup_addr, &ep->uct.reachable_pds, "wireup"); if (status != UCS_OK) { goto err; } status = uct_ep_create_connected(worker->ifaces[wireup_rsc_index], wireup_addr, &ep->wireup_ep); if (status != UCS_OK) { goto err; } if (!(iface_attr->cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP)) { status = UCS_ERR_UNREACHABLE; goto err_destroy_wireup_ep; } /* * Until the transport is connected, send operations should return NO_RESOURCE. * Plant a dummy endpoint object which will do it. */ status = UCS_CLASS_NEW(ucp_dummy_ep_t, &ep->uct.ep, ep); if (status != UCS_OK) { goto err_destroy_wireup_ep; } /* * Create endpoint for the transport we need to wire-up. */ status = uct_ep_create(iface, &ep->uct.next_ep); if (status != UCS_OK) { goto err_destroy_uct_ep; } /* * Send initial connection request for wiring-up the transport. */ status = ucp_ep_wireup_send(ep, ep->wireup_ep, UCP_AM_ID_CONN_REQ, dst_rsc_index); if (status != UCS_OK) { goto err_destroy_next_ep; } out: UCS_ASYNC_UNBLOCK(&worker->async); return UCS_OK; err_destroy_next_ep: uct_ep_destroy(ep->uct.next_ep); err_destroy_uct_ep: uct_ep_destroy(ep->uct.ep); err_destroy_wireup_ep: uct_ep_destroy(ep->wireup_ep); err: sglib_hashed_ucp_ep_t_delete(worker->ep_hash, ep); UCS_ASYNC_UNBLOCK(&worker->async); return status; }
int main(int argc, char **argv) { /* MPI is initially used to swap the endpoint and interface addresses so each * process has knowledge of the others. */ int partner; int size, rank; uct_device_addr_t *own_dev, *peer_dev; uct_iface_addr_t *own_iface, *peer_iface; uct_ep_addr_t *own_ep, *peer_ep; ucs_status_t status; /* status codes for UCS */ uct_ep_h ep; /* Remote endpoint */ ucs_async_context_t async; /* Async event context manages times and fd notifications */ uint8_t id = 0; void *arg; const char *tl_name = NULL; const char *dev_name = NULL; struct iface_info if_info; int exit_fail = 1; optind = 1; if (3 == argc) { dev_name = argv[1]; tl_name = argv[2]; } else { printf("Usage: %s (<dev-name> <tl-name>)\n", argv[0]); fflush(stdout); return 1; } MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size < 2) { fprintf(stderr, "Failed to create enough mpi processes\n"); goto out; } MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (0 == rank) { partner = 1; } else if (1 == rank) { partner = 0; } else { /* just wait for other processes in MPI_Finalize */ exit_fail = 0; goto out; } /* Initialize context */ status = ucs_async_context_init(&async, UCS_ASYNC_MODE_THREAD); CHKERR_JUMP(UCS_OK != status, "init async context", out); /* Create a worker object */ status = uct_worker_create(&async, UCS_THREAD_MODE_SINGLE, &if_info.worker); CHKERR_JUMP(UCS_OK != status, "create worker", out_cleanup_async); /* Search for the desired transport */ status = dev_tl_lookup(dev_name, tl_name, &if_info); CHKERR_JUMP(UCS_OK != status, "find supported device and transport", out_destroy_worker); /* Expect that addr len is the same on both peers */ own_dev = (uct_device_addr_t*)calloc(2, if_info.attr.device_addr_len); CHKERR_JUMP(NULL == own_dev, "allocate memory for dev addrs", out_destroy_iface); peer_dev = (uct_device_addr_t*)((char*)own_dev + if_info.attr.device_addr_len); own_iface = (uct_iface_addr_t*)calloc(2, if_info.attr.iface_addr_len); CHKERR_JUMP(NULL == own_iface, "allocate memory for if addrs", out_free_dev_addrs); peer_iface = (uct_iface_addr_t*)((char*)own_iface + if_info.attr.iface_addr_len); /* Get device address */ status = uct_iface_get_device_address(if_info.iface, own_dev); CHKERR_JUMP(UCS_OK != status, "get device address", out_free_if_addrs); MPI_Sendrecv(own_dev, if_info.attr.device_addr_len, MPI_BYTE, partner, 0, peer_dev, if_info.attr.device_addr_len, MPI_BYTE, partner,0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); status = uct_iface_is_reachable(if_info.iface, peer_dev, NULL); CHKERR_JUMP(0 == status, "reach the peer", out_free_if_addrs); /* Get interface address */ if (if_info.attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { status = uct_iface_get_address(if_info.iface, own_iface); CHKERR_JUMP(UCS_OK != status, "get interface address", out_free_if_addrs); MPI_Sendrecv(own_iface, if_info.attr.iface_addr_len, MPI_BYTE, partner, 0, peer_iface, if_info.attr.iface_addr_len, MPI_BYTE, partner,0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } /* Again, expect that ep addr len is the same on both peers */ own_ep = (uct_ep_addr_t*)calloc(2, if_info.attr.ep_addr_len); CHKERR_JUMP(NULL == own_ep, "allocate memory for ep addrs", out_free_if_addrs); peer_ep = (uct_ep_addr_t*)((char*)own_ep + if_info.attr.ep_addr_len); if (if_info.attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { /* Create new endpoint */ status = uct_ep_create(if_info.iface, &ep); CHKERR_JUMP(UCS_OK != status, "create endpoint", out_free_ep_addrs); /* Get endpoint address */ status = uct_ep_get_address(ep, own_ep); CHKERR_JUMP(UCS_OK != status, "get endpoint address", out_free_ep); } MPI_Sendrecv(own_ep, if_info.attr.ep_addr_len, MPI_BYTE, partner, 0, peer_ep, if_info.attr.ep_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); if (if_info.attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { /* Connect endpoint to a remote endpoint */ status = uct_ep_connect_to_ep(ep, peer_dev, peer_ep); MPI_Barrier(MPI_COMM_WORLD); } else if (if_info.attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { /* Create an endpoint which is connected to a remote interface */ status = uct_ep_create_connected(if_info.iface, peer_dev, peer_iface, &ep); } else { status = UCS_ERR_UNSUPPORTED; } CHKERR_JUMP(UCS_OK != status, "connect endpoint", out_free_ep); /*Set active message handler */ status = uct_iface_set_am_handler(if_info.iface, id, hello_world, arg, UCT_AM_CB_FLAG_SYNC); CHKERR_JUMP(UCS_OK != status, "set callback", out_free_ep); if (0 == rank) { uint64_t header; char payload[8]; unsigned length = sizeof(payload); /* Send active message to remote endpoint */ status = uct_ep_am_short(ep, id, header, payload, length); CHKERR_JUMP(UCS_OK != status, "send active msg", out_free_ep); } else if (1 == rank) { while (holder) { /* Explicitly progress any outstanding active message requests */ uct_worker_progress(if_info.worker); } } /* Everything is fine, we need to call MPI_Finalize rather than MPI_Abort */ exit_fail = 0; out_free_ep: uct_ep_destroy(ep); out_free_ep_addrs: free(own_ep); out_free_if_addrs: free(own_iface); out_free_dev_addrs: free(own_dev); out_destroy_iface: uct_iface_close(if_info.iface); uct_md_close(if_info.pd); out_destroy_worker: uct_worker_destroy(if_info.worker); out_cleanup_async: ucs_async_context_cleanup(&async); out: (0 == exit_fail) ? MPI_Finalize() : MPI_Abort(MPI_COMM_WORLD, 1); return exit_fail; }
static ucs_status_t uct_perf_test_setup_endpoints(ucx_perf_context_t *perf) { const size_t buffer_size = 2048; ucx_perf_ep_info_t info, *remote_info; unsigned group_size, i, group_index; uct_device_addr_t *dev_addr; uct_iface_addr_t *iface_addr; uct_ep_addr_t *ep_addr; uct_iface_attr_t iface_attr; uct_md_attr_t md_attr; void *rkey_buffer; ucs_status_t status; struct iovec vec[5]; void *buffer; void *req; buffer = malloc(buffer_size); if (buffer == NULL) { ucs_error("Failed to allocate RTE buffer"); status = UCS_ERR_NO_MEMORY; goto err; } status = uct_iface_query(perf->uct.iface, &iface_attr); if (status != UCS_OK) { ucs_error("Failed to uct_iface_query: %s", ucs_status_string(status)); goto err_free; } status = uct_md_query(perf->uct.md, &md_attr); if (status != UCS_OK) { ucs_error("Failed to uct_md_query: %s", ucs_status_string(status)); goto err_free; } if (md_attr.cap.flags & (UCT_MD_FLAG_ALLOC|UCT_MD_FLAG_REG)) { info.rkey_size = md_attr.rkey_packed_size; } else { info.rkey_size = 0; } info.uct.dev_addr_len = iface_attr.device_addr_len; info.uct.iface_addr_len = iface_attr.iface_addr_len; info.uct.ep_addr_len = iface_attr.ep_addr_len; info.recv_buffer = (uintptr_t)perf->recv_buffer; rkey_buffer = buffer; dev_addr = (void*)rkey_buffer + info.rkey_size; iface_addr = (void*)dev_addr + info.uct.dev_addr_len; ep_addr = (void*)iface_addr + info.uct.iface_addr_len; ucs_assert_always((void*)ep_addr + info.uct.ep_addr_len <= buffer + buffer_size); status = uct_iface_get_device_address(perf->uct.iface, dev_addr); if (status != UCS_OK) { ucs_error("Failed to uct_iface_get_device_address: %s", ucs_status_string(status)); goto err_free; } status = uct_iface_get_address(perf->uct.iface, iface_addr); if (status != UCS_OK) { ucs_error("Failed to uct_iface_get_address: %s", ucs_status_string(status)); goto err_free; } if (info.rkey_size > 0) { status = uct_md_mkey_pack(perf->uct.md, perf->uct.recv_mem.memh, rkey_buffer); if (status != UCS_OK) { ucs_error("Failed to uct_rkey_pack: %s", ucs_status_string(status)); goto err_free; } } group_size = rte_call(perf, group_size); group_index = rte_call(perf, group_index); perf->uct.peers = calloc(group_size, sizeof(*perf->uct.peers)); if (perf->uct.peers == NULL) { goto err_free; } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { for (i = 0; i < group_size; ++i) { if (i == group_index) { continue; } status = uct_ep_create(perf->uct.iface, &perf->uct.peers[i].ep); if (status != UCS_OK) { ucs_error("Failed to uct_ep_create: %s", ucs_status_string(status)); goto err_destroy_eps; } status = uct_ep_get_address(perf->uct.peers[i].ep, ep_addr); if (status != UCS_OK) { ucs_error("Failed to uct_ep_get_address: %s", ucs_status_string(status)); goto err_destroy_eps; } } } vec[0].iov_base = &info; vec[0].iov_len = sizeof(info); vec[1].iov_base = buffer; vec[1].iov_len = info.rkey_size + info.uct.dev_addr_len + info.uct.iface_addr_len + info.uct.ep_addr_len; rte_call(perf, post_vec, vec, 2, &req); rte_call(perf, exchange_vec, req); for (i = 0; i < group_size; ++i) { if (i == group_index) { continue; } rte_call(perf, recv, i, buffer, buffer_size, req); remote_info = buffer; rkey_buffer = remote_info + 1; dev_addr = (void*)rkey_buffer + remote_info->rkey_size; iface_addr = (void*)dev_addr + remote_info->uct.dev_addr_len; ep_addr = (void*)iface_addr + remote_info->uct.iface_addr_len; perf->uct.peers[i].remote_addr = remote_info->recv_buffer; if (!uct_iface_is_reachable(perf->uct.iface, dev_addr, remote_info->uct.iface_addr_len ? iface_addr : NULL)) { ucs_error("Destination is unreachable"); status = UCS_ERR_UNREACHABLE; goto err_destroy_eps; } if (remote_info->rkey_size > 0) { status = uct_rkey_unpack(rkey_buffer, &perf->uct.peers[i].rkey); if (status != UCS_OK) { ucs_error("Failed to uct_rkey_unpack: %s", ucs_status_string(status)); goto err_destroy_eps; } } else { perf->uct.peers[i].rkey.handle = NULL; perf->uct.peers[i].rkey.type = NULL; perf->uct.peers[i].rkey.rkey = UCT_INVALID_RKEY; } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { status = uct_ep_connect_to_ep(perf->uct.peers[i].ep, dev_addr, ep_addr); } else if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { status = uct_ep_create_connected(perf->uct.iface, dev_addr, iface_addr, &perf->uct.peers[i].ep); } else { status = UCS_ERR_UNSUPPORTED; } if (status != UCS_OK) { ucs_error("Failed to connect endpoint: %s", ucs_status_string(status)); goto err_destroy_eps; } } uct_perf_iface_flush_b(perf); free(buffer); rte_call(perf, barrier); return UCS_OK; err_destroy_eps: for (i = 0; i < group_size; ++i) { if (perf->uct.peers[i].rkey.type != NULL) { uct_rkey_release(&perf->uct.peers[i].rkey); } if (perf->uct.peers[i].ep != NULL) { uct_ep_destroy(perf->uct.peers[i].ep); } } free(perf->uct.peers); err_free: free(buffer); err: return status; }
ucs_status_t ucp_wireup_init_lanes(ucp_ep_h ep, unsigned address_count, const ucp_address_entry_t *address_list, uint8_t *addr_indices) { ucp_worker_h worker = ep->worker; ucp_ep_config_key_t key; uint16_t new_cfg_index; ucp_lane_index_t lane; ucs_status_t status; uint8_t conn_flag; char str[32]; ucs_trace("ep %p: initialize lanes", ep); status = ucp_wireup_select_lanes(ep, address_count, address_list, addr_indices, &key); if (status != UCS_OK) { goto err; } key.reachable_md_map |= ucp_ep_config(ep)->key.reachable_md_map; new_cfg_index = ucp_worker_get_ep_config(worker, &key); if ((ep->cfg_index == new_cfg_index)) { return UCS_OK; /* No change */ } if ((ep->cfg_index != 0) && !ucp_ep_is_stub(ep)) { /* * TODO handle a case where we have to change lanes and reconfigure the ep: * * - if we already have uct ep connected to an address - move it to the new lane index * - if we don't yet have connection to an address - create it * - if an existing lane is not connected anymore - delete it (possibly) * - if the configuration has changed - replay all pending operations on all lanes - * need that every pending callback would return, in case of failure, the number * of lane it wants to be queued on. */ ucs_debug("cannot reconfigure ep %p from [%d] to [%d]", ep, ep->cfg_index, new_cfg_index); ucp_wireup_print_config(worker->context, &ucp_ep_config(ep)->key, "old", NULL); ucp_wireup_print_config(worker->context, &key, "new", NULL); ucs_fatal("endpoint reconfiguration not supported yet"); } ep->cfg_index = new_cfg_index; ep->am_lane = key.am_lane; snprintf(str, sizeof(str), "ep %p", ep); ucp_wireup_print_config(worker->context, &ucp_ep_config(ep)->key, str, addr_indices); ucs_trace("ep %p: connect lanes", ep); /* establish connections on all underlying endpoints */ conn_flag = UCP_EP_FLAG_LOCAL_CONNECTED; for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { status = ucp_wireup_connect_lane(ep, lane, address_count, address_list, addr_indices[lane]); if (status != UCS_OK) { goto err; } if (ucp_worker_is_tl_p2p(worker, ucp_ep_get_rsc_index(ep, lane))) { conn_flag = 0; /* If we have a p2p transport, we're not connected */ } } ep->flags |= conn_flag; return UCS_OK; err: for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { if (ep->uct_eps[lane] != NULL) { uct_ep_destroy(ep->uct_eps[lane]); ep->uct_eps[lane] = NULL; } } return status; }
int main(int argc, char **argv) { /* MPI is initially used to swap the endpoint and interface addresses so each * process has knowledge of the others. */ MPI_Status mpi_status; int partner; int size; struct sockaddr *ep_addr; /* Endpoint address */ struct sockaddr *iface_addr; /* Interface address */ ucs_status_t status; /* status codes for UCS */ ucs_thread_mode_t thread_mode = UCS_THREAD_MODE_SINGLE; /* Specifies thread sharing mode of an object */ uct_ep_h ep; /* Remote endpoint */ void *arg; MPI_Init(NULL, NULL); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size < 2) { fprintf(stderr, "Failed to create enough mpi processes.\n");fflush(stderr); return 1; } MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (0 == rank) { partner = 1; } else if (1 == rank) { partner = 0; } else { MPI_Finalize(); return 0; } /* Initialize context */ status = ucs_async_context_init(&async, UCS_ASYNC_MODE_THREAD); if (UCS_OK != status) { fprintf(stderr, "Failed to init async context.\n");fflush(stderr); goto out; } /* Create a worker object */ status = uct_worker_create(&async, thread_mode, &worker); if (UCS_OK != status) { fprintf(stderr, "Failed to create worker.\n");fflush(stderr); goto out_cleanup_async; } /* The device and tranport names are determined by latency */ status = dev_tl_lookup(); if (UCS_OK != status) { fprintf(stderr, "Failed to find supported device and transport\n");fflush(stderr); goto out_destroy_worker; } iface_addr = calloc(1, iface_attr.iface_addr_len); ep_addr = calloc(1, iface_attr.ep_addr_len); if ((NULL == iface_addr) || (NULL == ep_addr)) { goto out_destroy_iface; } /* Get interface address */ status = uct_iface_get_address(iface, iface_addr); if (UCS_OK != status) { fprintf(stderr, "Failed to get interface address.\n");fflush(stderr); goto out_free; } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { /* Create new endpoint */ status = uct_ep_create(iface, &ep); if (UCS_OK != status) { fprintf(stderr, "Failed to create endpoint.\n");fflush(stderr); goto out_free; } /* Get endpoint address */ status = uct_ep_get_address(ep, ep_addr); if (UCS_OK != status) { fprintf(stderr, "Failed to get endpoint address.\n");fflush(stderr); goto out_free_ep; } } /* Communicate interface and endpoint addresses to corresponding process */ MPI_Send(iface_addr, iface_attr.iface_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD); MPI_Recv(iface_addr, iface_attr.iface_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD, &mpi_status); MPI_Send(ep_addr, iface_attr.ep_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD); MPI_Recv(ep_addr, iface_attr.ep_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD, &mpi_status); if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { /* Connect endpoint to a remote endpoint */ status = uct_ep_connect_to_ep(ep, ep_addr); } else if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { /* Create an endpoint which is connected to a remote interface */ status = uct_ep_create_connected(iface, iface_addr, &ep); } else status = UCS_ERR_UNSUPPORTED; if (UCS_OK != status) { fprintf(stderr, "Failed to connect endpoint\n");fflush(stderr); goto out_free_ep; } uint8_t id = 0; /* Tag for active message */ /*Set active message handler */ status = uct_iface_set_am_handler(iface, id, hello_world, arg); if (UCS_OK != status) { fprintf(stderr, "Failed to set callback.\n");fflush(stderr); goto out_free_ep; } if (0 == rank) { uint64_t header; char payload[8]; unsigned length = sizeof(payload); /* Send active message to remote endpoint */ status = uct_ep_am_short(ep, id, header, payload, length); } else if (1 == rank) { while (holder) { /* Explicitly progress any outstanding active message requests */ uct_worker_progress(worker); } } out_free_ep: uct_ep_destroy(ep); out_free: free(iface_addr); free(ep_addr); out_destroy_iface: uct_iface_close(iface); uct_pd_close(pd); out_destroy_worker: uct_worker_destroy(worker); out_cleanup_async: ucs_async_context_cleanup(&async); out: MPI_Finalize(); return 0; }
void ucp_ep_destroy_uct_ep_safe(ucp_ep_h ep, uct_ep_h uct_ep) { ucs_assert_always(uct_ep != NULL); uct_ep_pending_purge(uct_ep, ucp_pending_req_release); uct_ep_destroy(uct_ep); }
static ucs_status_t uct_perf_test_setup_endpoints(ucx_perf_context_t *perf) { unsigned group_size, i, group_index; uct_device_addr_t *dev_addr; uct_iface_addr_t *iface_addr; uct_ep_addr_t *ep_addr; uct_iface_attr_t iface_attr; uct_pd_attr_t pd_attr; unsigned long va; void *rkey_buffer; ucs_status_t status; struct iovec vec[5]; void *req; status = uct_iface_query(perf->uct.iface, &iface_attr); if (status != UCS_OK) { ucs_error("Failed to uct_iface_query: %s", ucs_status_string(status)); goto err; } status = uct_pd_query(perf->uct.pd, &pd_attr); if (status != UCS_OK) { ucs_error("Failed to uct_pd_query: %s", ucs_status_string(status)); goto err; } dev_addr = calloc(1, iface_attr.device_addr_len); iface_addr = calloc(1, iface_attr.iface_addr_len); ep_addr = calloc(1, iface_attr.ep_addr_len); rkey_buffer = calloc(1, pd_attr.rkey_packed_size); if ((iface_addr == NULL) || (ep_addr == NULL) || (rkey_buffer == NULL)) { goto err_free; } status = uct_iface_get_device_address(perf->uct.iface, dev_addr); if (status != UCS_OK) { ucs_error("Failed to uct_iface_get_device_address: %s", ucs_status_string(status)); goto err_free; } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { status = uct_iface_get_address(perf->uct.iface, iface_addr); if (status != UCS_OK) { ucs_error("Failed to uct_iface_get_address: %s", ucs_status_string(status)); goto err_free; } } status = uct_pd_mkey_pack(perf->uct.pd, perf->uct.recv_mem.memh, rkey_buffer); if (status != UCS_OK) { ucs_error("Failed to uct_rkey_pack: %s", ucs_status_string(status)); goto err_free; } group_size = rte_call(perf, group_size); group_index = rte_call(perf, group_index); perf->uct.peers = calloc(group_size, sizeof(*perf->uct.peers)); if (perf->uct.peers == NULL) { goto err_free; } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { for (i = 0; i < group_size; ++i) { if (i == group_index) { continue; } status = uct_ep_create(perf->uct.iface, &perf->uct.peers[i].ep); if (status != UCS_OK) { ucs_error("Failed to uct_ep_create: %s", ucs_status_string(status)); goto err_destroy_eps; } status = uct_ep_get_address(perf->uct.peers[i].ep, ep_addr); if (status != UCS_OK) { ucs_error("Failed to uct_ep_get_address: %s", ucs_status_string(status)); goto err_destroy_eps; } } } va = (uintptr_t)perf->recv_buffer; vec[0].iov_base = &va; vec[0].iov_len = sizeof(va); vec[1].iov_base = rkey_buffer; vec[1].iov_len = pd_attr.rkey_packed_size; vec[2].iov_base = dev_addr; vec[2].iov_len = iface_attr.device_addr_len; vec[3].iov_base = iface_addr; vec[3].iov_len = iface_attr.iface_addr_len; vec[4].iov_base = ep_addr; vec[4].iov_len = iface_attr.ep_addr_len; rte_call(perf, post_vec, vec, 5, &req); rte_call(perf, exchange_vec, req); for (i = 0; i < group_size; ++i) { if (i == group_index) { continue; } vec[0].iov_base = &va; vec[0].iov_len = sizeof(va); vec[1].iov_base = rkey_buffer; vec[1].iov_len = pd_attr.rkey_packed_size; vec[2].iov_base = dev_addr; vec[2].iov_len = iface_attr.device_addr_len; vec[3].iov_base = iface_addr; vec[3].iov_len = iface_attr.iface_addr_len; vec[4].iov_base = ep_addr; vec[4].iov_len = iface_attr.ep_addr_len; rte_call(perf, recv_vec, i, vec, 5, req); perf->uct.peers[i].remote_addr = va; status = uct_rkey_unpack(rkey_buffer, &perf->uct.peers[i].rkey); if (status != UCS_OK) { ucs_error("Failed to uct_rkey_unpack: %s", ucs_status_string(status)); return status; } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { status = uct_ep_connect_to_ep(perf->uct.peers[i].ep, dev_addr, ep_addr); } else if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { status = uct_ep_create_connected(perf->uct.iface, dev_addr, iface_addr, &perf->uct.peers[i].ep); } else { status = UCS_ERR_UNSUPPORTED; } if (status != UCS_OK) { ucs_error("Failed to connect endpoint: %s", ucs_status_string(status)); goto err_destroy_eps; } } uct_perf_iface_flush_b(perf); rte_call(perf, barrier); free(ep_addr); free(iface_addr); free(dev_addr); free(rkey_buffer); return UCS_OK; err_destroy_eps: for (i = 0; i < group_size; ++i) { if (perf->uct.peers[i].rkey.type != NULL) { uct_rkey_release(&perf->uct.peers[i].rkey); } if (perf->uct.peers[i].ep != NULL) { uct_ep_destroy(perf->uct.peers[i].ep); } } free(perf->uct.peers); err_free: free(ep_addr); free(iface_addr); free(dev_addr); free(rkey_buffer); err: return status; }