static void uct_perf_cleanup(ucx_perf_context_t *perf) { uct_perf_test_cleanup_endpoints(perf); uct_perf_test_free_mem(perf); uct_iface_close(perf->uct.iface); uct_pd_close(perf->uct.pd); uct_worker_destroy(perf->uct.worker); ucs_async_context_cleanup(&perf->uct.async); }
static ucs_status_t print_tl_info(uct_md_h md, const char *tl_name, uct_tl_resource_desc_t *resources, unsigned num_resources, int print_opts, ucs_config_print_flags_t print_flags) { ucs_async_context_t async; uct_worker_h worker; ucs_status_t status; unsigned i; status = ucs_async_context_init(&async, UCS_ASYNC_MODE_THREAD); if (status != UCS_OK) { return status; } /* coverity[alloc_arg] */ status = uct_worker_create(&async, UCS_THREAD_MODE_MULTI, &worker); if (status != UCS_OK) { goto out; } printf("#\n"); printf("# Transport: %s\n", tl_name); printf("#\n"); if (num_resources == 0) { printf("# (No supported devices found)\n"); } for (i = 0; i < num_resources; ++i) { ucs_assert(!strcmp(tl_name, resources[i].tl_name)); print_iface_info(worker, md, &resources[i]); } uct_worker_destroy(worker); out: ucs_async_context_cleanup(&async); return status; }
int main(int argc, char **argv) { /* MPI is initially used to swap the endpoint and interface addresses so each * process has knowledge of the others. */ int partner; int size, rank; uct_device_addr_t *own_dev, *peer_dev; uct_iface_addr_t *own_iface, *peer_iface; uct_ep_addr_t *own_ep, *peer_ep; ucs_status_t status; /* status codes for UCS */ uct_ep_h ep; /* Remote endpoint */ ucs_async_context_t async; /* Async event context manages times and fd notifications */ uint8_t id = 0; void *arg; const char *tl_name = NULL; const char *dev_name = NULL; struct iface_info if_info; int exit_fail = 1; optind = 1; if (3 == argc) { dev_name = argv[1]; tl_name = argv[2]; } else { printf("Usage: %s (<dev-name> <tl-name>)\n", argv[0]); fflush(stdout); return 1; } MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size < 2) { fprintf(stderr, "Failed to create enough mpi processes\n"); goto out; } MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (0 == rank) { partner = 1; } else if (1 == rank) { partner = 0; } else { /* just wait for other processes in MPI_Finalize */ exit_fail = 0; goto out; } /* Initialize context */ status = ucs_async_context_init(&async, UCS_ASYNC_MODE_THREAD); CHKERR_JUMP(UCS_OK != status, "init async context", out); /* Create a worker object */ status = uct_worker_create(&async, UCS_THREAD_MODE_SINGLE, &if_info.worker); CHKERR_JUMP(UCS_OK != status, "create worker", out_cleanup_async); /* Search for the desired transport */ status = dev_tl_lookup(dev_name, tl_name, &if_info); CHKERR_JUMP(UCS_OK != status, "find supported device and transport", out_destroy_worker); /* Expect that addr len is the same on both peers */ own_dev = (uct_device_addr_t*)calloc(2, if_info.attr.device_addr_len); CHKERR_JUMP(NULL == own_dev, "allocate memory for dev addrs", out_destroy_iface); peer_dev = (uct_device_addr_t*)((char*)own_dev + if_info.attr.device_addr_len); own_iface = (uct_iface_addr_t*)calloc(2, if_info.attr.iface_addr_len); CHKERR_JUMP(NULL == own_iface, "allocate memory for if addrs", out_free_dev_addrs); peer_iface = (uct_iface_addr_t*)((char*)own_iface + if_info.attr.iface_addr_len); /* Get device address */ status = uct_iface_get_device_address(if_info.iface, own_dev); CHKERR_JUMP(UCS_OK != status, "get device address", out_free_if_addrs); MPI_Sendrecv(own_dev, if_info.attr.device_addr_len, MPI_BYTE, partner, 0, peer_dev, if_info.attr.device_addr_len, MPI_BYTE, partner,0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); status = uct_iface_is_reachable(if_info.iface, peer_dev, NULL); CHKERR_JUMP(0 == status, "reach the peer", out_free_if_addrs); /* Get interface address */ if (if_info.attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { status = uct_iface_get_address(if_info.iface, own_iface); CHKERR_JUMP(UCS_OK != status, "get interface address", out_free_if_addrs); MPI_Sendrecv(own_iface, if_info.attr.iface_addr_len, MPI_BYTE, partner, 0, peer_iface, if_info.attr.iface_addr_len, MPI_BYTE, partner,0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } /* Again, expect that ep addr len is the same on both peers */ own_ep = (uct_ep_addr_t*)calloc(2, if_info.attr.ep_addr_len); CHKERR_JUMP(NULL == own_ep, "allocate memory for ep addrs", out_free_if_addrs); peer_ep = (uct_ep_addr_t*)((char*)own_ep + if_info.attr.ep_addr_len); if (if_info.attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { /* Create new endpoint */ status = uct_ep_create(if_info.iface, &ep); CHKERR_JUMP(UCS_OK != status, "create endpoint", out_free_ep_addrs); /* Get endpoint address */ status = uct_ep_get_address(ep, own_ep); CHKERR_JUMP(UCS_OK != status, "get endpoint address", out_free_ep); } MPI_Sendrecv(own_ep, if_info.attr.ep_addr_len, MPI_BYTE, partner, 0, peer_ep, if_info.attr.ep_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); if (if_info.attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { /* Connect endpoint to a remote endpoint */ status = uct_ep_connect_to_ep(ep, peer_dev, peer_ep); MPI_Barrier(MPI_COMM_WORLD); } else if (if_info.attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { /* Create an endpoint which is connected to a remote interface */ status = uct_ep_create_connected(if_info.iface, peer_dev, peer_iface, &ep); } else { status = UCS_ERR_UNSUPPORTED; } CHKERR_JUMP(UCS_OK != status, "connect endpoint", out_free_ep); /*Set active message handler */ status = uct_iface_set_am_handler(if_info.iface, id, hello_world, arg, UCT_AM_CB_FLAG_SYNC); CHKERR_JUMP(UCS_OK != status, "set callback", out_free_ep); if (0 == rank) { uint64_t header; char payload[8]; unsigned length = sizeof(payload); /* Send active message to remote endpoint */ status = uct_ep_am_short(ep, id, header, payload, length); CHKERR_JUMP(UCS_OK != status, "send active msg", out_free_ep); } else if (1 == rank) { while (holder) { /* Explicitly progress any outstanding active message requests */ uct_worker_progress(if_info.worker); } } /* Everything is fine, we need to call MPI_Finalize rather than MPI_Abort */ exit_fail = 0; out_free_ep: uct_ep_destroy(ep); out_free_ep_addrs: free(own_ep); out_free_if_addrs: free(own_iface); out_free_dev_addrs: free(own_dev); out_destroy_iface: uct_iface_close(if_info.iface); uct_md_close(if_info.pd); out_destroy_worker: uct_worker_destroy(if_info.worker); out_cleanup_async: ucs_async_context_cleanup(&async); out: (0 == exit_fail) ? MPI_Finalize() : MPI_Abort(MPI_COMM_WORLD, 1); return exit_fail; }
static ucs_status_t uct_perf_setup(ucx_perf_context_t *perf, ucx_perf_params_t *params) { uct_iface_config_t *iface_config; ucs_status_t status; uct_iface_params_t iface_params = { .open_mode = UCT_IFACE_OPEN_MODE_DEVICE, .mode.device.tl_name = params->uct.tl_name, .mode.device.dev_name = params->uct.dev_name, .stats_root = ucs_stats_get_root(), .rx_headroom = 0 }; UCS_CPU_ZERO(&iface_params.cpu_mask); status = ucs_async_context_init(&perf->uct.async, params->async_mode); if (status != UCS_OK) { goto out; } status = uct_worker_create(&perf->uct.async, params->thread_mode, &perf->uct.worker); if (status != UCS_OK) { goto out_cleanup_async; } status = uct_perf_create_md(perf); if (status != UCS_OK) { goto out_destroy_worker; } status = uct_md_iface_config_read(perf->uct.md, params->uct.tl_name, NULL, NULL, &iface_config); if (status != UCS_OK) { goto out_destroy_md; } status = uct_iface_open(perf->uct.md, perf->uct.worker, &iface_params, iface_config, &perf->uct.iface); uct_config_release(iface_config); if (status != UCS_OK) { ucs_error("Failed to open iface: %s", ucs_status_string(status)); goto out_destroy_md; } status = uct_perf_test_check_capabilities(params, perf->uct.iface); if (status != UCS_OK) { goto out_iface_close; } status = uct_perf_test_alloc_mem(perf, params); if (status != UCS_OK) { goto out_iface_close; } status = uct_perf_test_setup_endpoints(perf); if (status != UCS_OK) { ucs_error("Failed to setup endpoints: %s", ucs_status_string(status)); goto out_free_mem; } uct_iface_progress_enable(perf->uct.iface, UCT_PROGRESS_SEND | UCT_PROGRESS_RECV); return UCS_OK; out_free_mem: uct_perf_test_free_mem(perf); out_iface_close: uct_iface_close(perf->uct.iface); out_destroy_md: uct_md_close(perf->uct.md); out_destroy_worker: uct_worker_destroy(perf->uct.worker); out_cleanup_async: ucs_async_context_cleanup(&perf->uct.async); out: return status; } static void uct_perf_cleanup(ucx_perf_context_t *perf) { uct_perf_test_cleanup_endpoints(perf); uct_perf_test_free_mem(perf); uct_iface_close(perf->uct.iface); uct_md_close(perf->uct.md); uct_worker_destroy(perf->uct.worker); ucs_async_context_cleanup(&perf->uct.async); }
int main(int argc, char **argv) { /* MPI is initially used to swap the endpoint and interface addresses so each * process has knowledge of the others. */ MPI_Status mpi_status; int partner; int size; struct sockaddr *ep_addr; /* Endpoint address */ struct sockaddr *iface_addr; /* Interface address */ ucs_status_t status; /* status codes for UCS */ ucs_thread_mode_t thread_mode = UCS_THREAD_MODE_SINGLE; /* Specifies thread sharing mode of an object */ uct_ep_h ep; /* Remote endpoint */ void *arg; MPI_Init(NULL, NULL); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size < 2) { fprintf(stderr, "Failed to create enough mpi processes.\n");fflush(stderr); return 1; } MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (0 == rank) { partner = 1; } else if (1 == rank) { partner = 0; } else { MPI_Finalize(); return 0; } /* Initialize context */ status = ucs_async_context_init(&async, UCS_ASYNC_MODE_THREAD); if (UCS_OK != status) { fprintf(stderr, "Failed to init async context.\n");fflush(stderr); goto out; } /* Create a worker object */ status = uct_worker_create(&async, thread_mode, &worker); if (UCS_OK != status) { fprintf(stderr, "Failed to create worker.\n");fflush(stderr); goto out_cleanup_async; } /* The device and tranport names are determined by latency */ status = dev_tl_lookup(); if (UCS_OK != status) { fprintf(stderr, "Failed to find supported device and transport\n");fflush(stderr); goto out_destroy_worker; } iface_addr = calloc(1, iface_attr.iface_addr_len); ep_addr = calloc(1, iface_attr.ep_addr_len); if ((NULL == iface_addr) || (NULL == ep_addr)) { goto out_destroy_iface; } /* Get interface address */ status = uct_iface_get_address(iface, iface_addr); if (UCS_OK != status) { fprintf(stderr, "Failed to get interface address.\n");fflush(stderr); goto out_free; } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { /* Create new endpoint */ status = uct_ep_create(iface, &ep); if (UCS_OK != status) { fprintf(stderr, "Failed to create endpoint.\n");fflush(stderr); goto out_free; } /* Get endpoint address */ status = uct_ep_get_address(ep, ep_addr); if (UCS_OK != status) { fprintf(stderr, "Failed to get endpoint address.\n");fflush(stderr); goto out_free_ep; } } /* Communicate interface and endpoint addresses to corresponding process */ MPI_Send(iface_addr, iface_attr.iface_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD); MPI_Recv(iface_addr, iface_attr.iface_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD, &mpi_status); MPI_Send(ep_addr, iface_attr.ep_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD); MPI_Recv(ep_addr, iface_attr.ep_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD, &mpi_status); if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { /* Connect endpoint to a remote endpoint */ status = uct_ep_connect_to_ep(ep, ep_addr); } else if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { /* Create an endpoint which is connected to a remote interface */ status = uct_ep_create_connected(iface, iface_addr, &ep); } else status = UCS_ERR_UNSUPPORTED; if (UCS_OK != status) { fprintf(stderr, "Failed to connect endpoint\n");fflush(stderr); goto out_free_ep; } uint8_t id = 0; /* Tag for active message */ /*Set active message handler */ status = uct_iface_set_am_handler(iface, id, hello_world, arg); if (UCS_OK != status) { fprintf(stderr, "Failed to set callback.\n");fflush(stderr); goto out_free_ep; } if (0 == rank) { uint64_t header; char payload[8]; unsigned length = sizeof(payload); /* Send active message to remote endpoint */ status = uct_ep_am_short(ep, id, header, payload, length); } else if (1 == rank) { while (holder) { /* Explicitly progress any outstanding active message requests */ uct_worker_progress(worker); } } out_free_ep: uct_ep_destroy(ep); out_free: free(iface_addr); free(ep_addr); out_destroy_iface: uct_iface_close(iface); uct_pd_close(pd); out_destroy_worker: uct_worker_destroy(worker); out_cleanup_async: ucs_async_context_cleanup(&async); out: MPI_Finalize(); return 0; }
static ucs_status_t uct_perf_setup(ucx_perf_context_t *perf, ucx_perf_params_t *params) { uct_iface_config_t *iface_config; ucs_status_t status; uct_iface_params_t iface_params = { .tl_name = params->uct.tl_name, .dev_name = params->uct.dev_name, .rx_headroom = 0 }; status = ucs_async_context_init(&perf->uct.async, params->async_mode); if (status != UCS_OK) { goto out; } status = uct_worker_create(&perf->uct.async, params->thread_mode, &perf->uct.worker); if (status != UCS_OK) { goto out_cleanup_async; } status = uct_perf_create_md(perf); if (status != UCS_OK) { goto out_destroy_worker; } status = uct_iface_config_read(params->uct.tl_name, NULL, NULL, &iface_config); if (status != UCS_OK) { goto out_destroy_md; } status = uct_iface_open(perf->uct.md, perf->uct.worker, &iface_params, iface_config, &perf->uct.iface); uct_config_release(iface_config); if (status != UCS_OK) { ucs_error("Failed to open iface: %s", ucs_status_string(status)); goto out_destroy_md; } status = uct_perf_test_check_capabilities(params, perf->uct.iface); if (status != UCS_OK) { goto out_iface_close; } status = uct_perf_test_alloc_mem(perf, params); if (status != UCS_OK) { goto out_iface_close; } status = uct_perf_test_setup_endpoints(perf); if (status != UCS_OK) { ucs_error("Failed to setup endpoints: %s", ucs_status_string(status)); goto out_free_mem; } return UCS_OK; out_free_mem: uct_perf_test_free_mem(perf); out_iface_close: uct_iface_close(perf->uct.iface); out_destroy_md: uct_md_close(perf->uct.md); out_destroy_worker: uct_worker_destroy(perf->uct.worker); out_cleanup_async: ucs_async_context_cleanup(&perf->uct.async); out: return status; } static void uct_perf_cleanup(ucx_perf_context_t *perf) { uct_perf_test_cleanup_endpoints(perf); uct_perf_test_free_mem(perf); uct_iface_close(perf->uct.iface); uct_md_close(perf->uct.md); uct_worker_destroy(perf->uct.worker); ucs_async_context_cleanup(&perf->uct.async); }
static void print_iface_info(uct_worker_h worker, uct_md_h md, uct_tl_resource_desc_t *resource) { uct_iface_config_t *iface_config; uct_iface_attr_t iface_attr; ucs_status_t status; uct_iface_h iface; char buf[200] = {0}; uct_iface_params_t iface_params = { .tl_name = resource->tl_name, .dev_name = resource->dev_name, .rx_headroom = 0 }; status = uct_iface_config_read(resource->tl_name, NULL, NULL, &iface_config); if (status != UCS_OK) { return; } printf("# Device: %s\n", resource->dev_name); status = uct_iface_open(md, worker, &iface_params, iface_config, &iface); uct_config_release(iface_config); if (status != UCS_OK) { printf("# < failed to open interface >\n"); return; } printf("#\n"); printf("# capabilities:\n"); status = uct_iface_query(iface, &iface_attr); if (status != UCS_OK) { printf("# < failed to query interface >\n"); } else { printf("# bandwidth: %-.2f MB/sec\n", iface_attr.bandwidth / (1024 * 1024)); printf("# latency: %-.0f nsec\n", iface_attr.latency * 1e9); printf("# overhead: %-.0f nsec\n", iface_attr.overhead * 1e9); PRINT_CAP(PUT_SHORT, iface_attr.cap.flags, iface_attr.cap.put.max_short); PRINT_CAP(PUT_BCOPY, iface_attr.cap.flags, iface_attr.cap.put.max_bcopy); PRINT_CAP(PUT_ZCOPY, iface_attr.cap.flags, iface_attr.cap.put.max_zcopy); PRINT_CAP(GET_BCOPY, iface_attr.cap.flags, iface_attr.cap.get.max_bcopy); PRINT_CAP(GET_ZCOPY, iface_attr.cap.flags, iface_attr.cap.get.max_zcopy); PRINT_CAP(AM_SHORT, iface_attr.cap.flags, iface_attr.cap.am.max_short); PRINT_CAP(AM_BCOPY, iface_attr.cap.flags, iface_attr.cap.am.max_bcopy); PRINT_CAP(AM_ZCOPY, iface_attr.cap.flags, iface_attr.cap.am.max_zcopy); if (iface_attr.cap.flags & (UCT_IFACE_FLAG_AM_BCOPY|UCT_IFACE_FLAG_AM_ZCOPY)) { printf("# am header: %s\n", size_limit_to_str(iface_attr.cap.am.max_hdr)); } PRINT_ATOMIC_CAP(ATOMIC_ADD, iface_attr.cap.flags); PRINT_ATOMIC_CAP(ATOMIC_FADD, iface_attr.cap.flags); PRINT_ATOMIC_CAP(ATOMIC_SWAP, iface_attr.cap.flags); PRINT_ATOMIC_CAP(ATOMIC_CSWAP, iface_attr.cap.flags); buf[0] = '\0'; if (iface_attr.cap.flags & (UCT_IFACE_FLAG_CONNECT_TO_EP | UCT_IFACE_FLAG_CONNECT_TO_IFACE)) { if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { strncat(buf, " to ep,", sizeof(buf) - 1); } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { strncat(buf, " to iface,", sizeof(buf) - 1); } buf[strlen(buf) - 1] = '\0'; } else { strncat(buf, " none", sizeof(buf) - 1); } printf("# connection:%s\n", buf); printf("# device address: %zu bytes\n", iface_attr.device_addr_len); if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { printf("# iface address: %zu bytes\n", iface_attr.iface_addr_len); } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { printf("# ep address: %zu bytes\n", iface_attr.ep_addr_len); } buf[0] = '\0'; if (iface_attr.cap.flags & (UCT_IFACE_FLAG_ERRHANDLE_SHORT_BUF | UCT_IFACE_FLAG_ERRHANDLE_BCOPY_BUF | UCT_IFACE_FLAG_ERRHANDLE_ZCOPY_BUF | UCT_IFACE_FLAG_ERRHANDLE_AM_ID | UCT_IFACE_FLAG_ERRHANDLE_REMOTE_MEM | UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE)) { if (iface_attr.cap.flags & (UCT_IFACE_FLAG_ERRHANDLE_SHORT_BUF | UCT_IFACE_FLAG_ERRHANDLE_BCOPY_BUF | UCT_IFACE_FLAG_ERRHANDLE_ZCOPY_BUF)) { strncat(buf, " buffer (", sizeof(buf) - 1); if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_SHORT_BUF) { strncat(buf, "short,", sizeof(buf) - 1); } if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_BCOPY_BUF) { strncat(buf, "bcopy,", sizeof(buf) - 1); } if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_ZCOPY_BUF) { strncat(buf, "zcopy,", sizeof(buf) - 1); } buf[strlen(buf) - 1] = '\0'; strncat(buf, "),", sizeof(buf) - 1); } if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_AM_ID) { strncat(buf, " active-message id,", sizeof(buf) - 1); } if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_REMOTE_MEM) { strncat(buf, " remote access,", sizeof(buf) - 1); } if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE) { strncat(buf, " peer failure,", sizeof(buf) - 1); } buf[strlen(buf) - 1] = '\0'; } else { strncat(buf, " none", sizeof(buf) - 1); } printf("# error handling:%s\n", buf); } uct_iface_close(iface); printf("#\n"); } static ucs_status_t print_tl_info(uct_md_h md, const char *tl_name, uct_tl_resource_desc_t *resources, unsigned num_resources, int print_opts, ucs_config_print_flags_t print_flags) { ucs_async_context_t async; uct_worker_h worker; ucs_status_t status; unsigned i; status = ucs_async_context_init(&async, UCS_ASYNC_MODE_THREAD); if (status != UCS_OK) { return status; } /* coverity[alloc_arg] */ status = uct_worker_create(&async, UCS_THREAD_MODE_MULTI, &worker); if (status != UCS_OK) { goto out; } printf("#\n"); printf("# Transport: %s\n", tl_name); printf("#\n"); if (num_resources == 0) { printf("# (No supported devices found)\n"); } for (i = 0; i < num_resources; ++i) { ucs_assert(!strcmp(tl_name, resources[i].tl_name)); print_iface_info(worker, md, &resources[i]); } uct_worker_destroy(worker); out: ucs_async_context_cleanup(&async); return status; }
static void print_iface_info(uct_worker_h worker, uct_md_h md, uct_tl_resource_desc_t *resource) { uct_iface_config_t *iface_config; uct_iface_attr_t iface_attr; ucs_status_t status; uct_iface_h iface; char buf[200] = {0}; uct_iface_params_t iface_params = { .field_mask = UCT_IFACE_PARAM_FIELD_OPEN_MODE | UCT_IFACE_PARAM_FIELD_DEVICE | UCT_IFACE_PARAM_FIELD_STATS_ROOT | UCT_IFACE_PARAM_FIELD_RX_HEADROOM | UCT_IFACE_PARAM_FIELD_CPU_MASK, .open_mode = UCT_IFACE_OPEN_MODE_DEVICE, .mode.device.tl_name = resource->tl_name, .mode.device.dev_name = resource->dev_name, .stats_root = ucs_stats_get_root(), .rx_headroom = 0 }; UCS_CPU_ZERO(&iface_params.cpu_mask); status = uct_md_iface_config_read(md, resource->tl_name, NULL, NULL, &iface_config); if (status != UCS_OK) { return; } printf("# Device: %s\n", resource->dev_name); status = uct_iface_open(md, worker, &iface_params, iface_config, &iface); uct_config_release(iface_config); if (status != UCS_OK) { printf("# < failed to open interface >\n"); return; } printf("#\n"); printf("# capabilities:\n"); status = uct_iface_query(iface, &iface_attr); if (status != UCS_OK) { printf("# < failed to query interface >\n"); } else { printf("# bandwidth: %-.2f MB/sec\n", iface_attr.bandwidth / UCS_MBYTE); printf("# latency: %-.0f nsec", iface_attr.latency.overhead * 1e9); if (iface_attr.latency.growth > 0) { printf(" + %.0f * N\n", iface_attr.latency.growth * 1e9); } else { printf("\n"); } printf("# overhead: %-.0f nsec\n", iface_attr.overhead * 1e9); PRINT_CAP(PUT_SHORT, iface_attr.cap.flags, iface_attr.cap.put.max_short); PRINT_CAP(PUT_BCOPY, iface_attr.cap.flags, iface_attr.cap.put.max_bcopy); PRINT_ZCAP(PUT_ZCOPY, iface_attr.cap.flags, iface_attr.cap.put.min_zcopy, iface_attr.cap.put.max_zcopy, iface_attr.cap.put.max_iov); if (iface_attr.cap.flags & UCT_IFACE_FLAG_PUT_ZCOPY) { printf("# put_opt_zcopy_align: %s\n", size_limit_to_str(0, iface_attr.cap.put.opt_zcopy_align)); printf("# put_align_mtu: %s\n", size_limit_to_str(0, iface_attr.cap.put.align_mtu)); } PRINT_CAP(GET_SHORT, iface_attr.cap.flags, iface_attr.cap.get.max_short); PRINT_CAP(GET_BCOPY, iface_attr.cap.flags, iface_attr.cap.get.max_bcopy); PRINT_ZCAP(GET_ZCOPY, iface_attr.cap.flags, iface_attr.cap.get.min_zcopy, iface_attr.cap.get.max_zcopy, iface_attr.cap.get.max_iov); if (iface_attr.cap.flags & UCT_IFACE_FLAG_GET_ZCOPY) { printf("# get_opt_zcopy_align: %s\n", size_limit_to_str(0, iface_attr.cap.get.opt_zcopy_align)); printf("# get_align_mtu: %s\n", size_limit_to_str(0, iface_attr.cap.get.align_mtu)); } PRINT_CAP(AM_SHORT, iface_attr.cap.flags, iface_attr.cap.am.max_short); PRINT_CAP(AM_BCOPY, iface_attr.cap.flags, iface_attr.cap.am.max_bcopy); PRINT_ZCAP(AM_ZCOPY, iface_attr.cap.flags, iface_attr.cap.am.min_zcopy, iface_attr.cap.am.max_zcopy, iface_attr.cap.am.max_iov); if (iface_attr.cap.flags & UCT_IFACE_FLAG_AM_ZCOPY) { printf("# am_opt_zcopy_align: %s\n", size_limit_to_str(0, iface_attr.cap.am.opt_zcopy_align)); printf("# am_align_mtu: %s\n", size_limit_to_str(0, iface_attr.cap.am.align_mtu)); printf("# am header: %s\n", size_limit_to_str(0, iface_attr.cap.am.max_hdr)); } PRINT_CAP(TAG_EAGER_SHORT, iface_attr.cap.flags, iface_attr.cap.tag.eager.max_short); PRINT_CAP(TAG_EAGER_BCOPY, iface_attr.cap.flags, iface_attr.cap.tag.eager.max_bcopy); PRINT_ZCAP(TAG_EAGER_ZCOPY, iface_attr.cap.flags, 0, iface_attr.cap.tag.eager.max_zcopy, iface_attr.cap.tag.eager.max_iov); if (iface_attr.cap.flags & UCT_IFACE_FLAG_TAG_RNDV_ZCOPY) { PRINT_ZCAP_NO_CHECK(TAG_RNDV_ZCOPY, 0, iface_attr.cap.tag.rndv.max_zcopy, iface_attr.cap.tag.rndv.max_iov); printf("# rndv private header: %s\n", size_limit_to_str(0, iface_attr.cap.tag.rndv.max_hdr)); } if (iface_attr.cap.flags & (UCT_IFACE_FLAG_TAG_EAGER_SHORT | UCT_IFACE_FLAG_TAG_EAGER_BCOPY | UCT_IFACE_FLAG_TAG_EAGER_ZCOPY | UCT_IFACE_FLAG_TAG_RNDV_ZCOPY)) { PRINT_ZCAP_NO_CHECK(TAG_RECV, iface_attr.cap.tag.recv.min_recv, iface_attr.cap.tag.recv.max_zcopy, iface_attr.cap.tag.recv.max_iov); printf("# tag_max_outstanding: %s\n", size_limit_to_str(0, iface_attr.cap.tag.recv.max_outstanding)); } if (iface_attr.cap.atomic32.op_flags || iface_attr.cap.atomic64.op_flags || iface_attr.cap.atomic32.fop_flags || iface_attr.cap.atomic64.fop_flags) { if (iface_attr.cap.flags & UCT_IFACE_FLAG_ATOMIC_DEVICE) { printf("# domain: device\n"); } else if (iface_attr.cap.flags & UCT_IFACE_FLAG_ATOMIC_CPU) { printf("# domain: cpu\n"); } PRINT_ATOMIC_POST(ADD, iface_attr.cap); PRINT_ATOMIC_POST(AND, iface_attr.cap); PRINT_ATOMIC_POST(OR, iface_attr.cap); PRINT_ATOMIC_POST(XOR, iface_attr.cap); PRINT_ATOMIC_FETCH(ADD, iface_attr.cap, "f"); PRINT_ATOMIC_FETCH(AND, iface_attr.cap, "f"); PRINT_ATOMIC_FETCH(OR, iface_attr.cap, "f"); PRINT_ATOMIC_FETCH(XOR, iface_attr.cap, "f"); PRINT_ATOMIC_FETCH(SWAP , iface_attr.cap, ""); PRINT_ATOMIC_FETCH(CSWAP, iface_attr.cap, ""); } buf[0] = '\0'; if (iface_attr.cap.flags & (UCT_IFACE_FLAG_CONNECT_TO_EP | UCT_IFACE_FLAG_CONNECT_TO_IFACE)) { if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { strncat(buf, " to ep,", sizeof(buf) - 1); } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { strncat(buf, " to iface,", sizeof(buf) - 1); } buf[strlen(buf) - 1] = '\0'; } else { strncat(buf, " none", sizeof(buf) - 1); } printf("# connection:%s\n", buf); printf("# priority: %d\n", iface_attr.priority); printf("# device address: %zu bytes\n", iface_attr.device_addr_len); if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { printf("# iface address: %zu bytes\n", iface_attr.iface_addr_len); } if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { printf("# ep address: %zu bytes\n", iface_attr.ep_addr_len); } buf[0] = '\0'; if (iface_attr.cap.flags & (UCT_IFACE_FLAG_ERRHANDLE_SHORT_BUF | UCT_IFACE_FLAG_ERRHANDLE_BCOPY_BUF | UCT_IFACE_FLAG_ERRHANDLE_ZCOPY_BUF | UCT_IFACE_FLAG_ERRHANDLE_AM_ID | UCT_IFACE_FLAG_ERRHANDLE_REMOTE_MEM | UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE)) { if (iface_attr.cap.flags & (UCT_IFACE_FLAG_ERRHANDLE_SHORT_BUF | UCT_IFACE_FLAG_ERRHANDLE_BCOPY_BUF | UCT_IFACE_FLAG_ERRHANDLE_ZCOPY_BUF)) { strncat(buf, " buffer (", sizeof(buf) - 1); if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_SHORT_BUF) { strncat(buf, "short,", sizeof(buf) - 1); } if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_BCOPY_BUF) { strncat(buf, "bcopy,", sizeof(buf) - 1); } if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_ZCOPY_BUF) { strncat(buf, "zcopy,", sizeof(buf) - 1); } buf[strlen(buf) - 1] = '\0'; strncat(buf, "),", sizeof(buf) - 1); } if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_AM_ID) { strncat(buf, " active-message id,", sizeof(buf) - 1); } if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_REMOTE_MEM) { strncat(buf, " remote access,", sizeof(buf) - 1); } if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE) { strncat(buf, " peer failure,", sizeof(buf) - 1); } buf[strlen(buf) - 1] = '\0'; } else { strncat(buf, " none", sizeof(buf) - 1); } printf("# error handling:%s\n", buf); } uct_iface_close(iface); printf("#\n"); } static ucs_status_t print_tl_info(uct_md_h md, const char *tl_name, uct_tl_resource_desc_t *resources, unsigned num_resources, int print_opts, ucs_config_print_flags_t print_flags) { ucs_async_context_t async; uct_worker_h worker; ucs_status_t status; unsigned i; status = ucs_async_context_init(&async, UCS_ASYNC_THREAD_LOCK_TYPE); if (status != UCS_OK) { return status; } /* coverity[alloc_arg] */ status = uct_worker_create(&async, UCS_THREAD_MODE_SINGLE, &worker); if (status != UCS_OK) { goto out; } printf("#\n"); printf("# Transport: %s\n", tl_name); printf("#\n"); if (num_resources == 0) { printf("# (No supported devices found)\n"); } for (i = 0; i < num_resources; ++i) { ucs_assert(!strcmp(tl_name, resources[i].tl_name)); print_iface_info(worker, md, &resources[i]); } uct_worker_destroy(worker); out: ucs_async_context_cleanup(&async); return status; }