static ucs_status_t uct_perf_create_pd(ucx_perf_context_t *perf) { uct_pd_resource_desc_t *pd_resources; uct_tl_resource_desc_t *tl_resources; unsigned i, num_pd_resources; unsigned j, num_tl_resources; ucs_status_t status; uct_pd_h pd; uct_pd_config_t *pd_config; status = uct_query_pd_resources(&pd_resources, &num_pd_resources); if (status != UCS_OK) { goto out; } for (i = 0; i < num_pd_resources; ++i) { status = uct_pd_config_read(pd_resources[i].pd_name, NULL, NULL, &pd_config); if (status != UCS_OK) { goto out_release_pd_resources; } status = uct_pd_open(pd_resources[i].pd_name, pd_config, &pd); uct_config_release(pd_config); if (status != UCS_OK) { goto out_release_pd_resources; } status = uct_pd_query_tl_resources(pd, &tl_resources, &num_tl_resources); if (status != UCS_OK) { uct_pd_close(pd); goto out_release_pd_resources; } for (j = 0; j < num_tl_resources; ++j) { if (!strcmp(perf->params.uct.tl_name, tl_resources[j].tl_name) && !strcmp(perf->params.uct.dev_name, tl_resources[j].dev_name)) { uct_release_tl_resource_list(tl_resources); perf->uct.pd = pd; status = UCS_OK; goto out_release_pd_resources; } } uct_pd_close(pd); uct_release_tl_resource_list(tl_resources); } ucs_error("Cannot use transport %s on device %s", perf->params.uct.tl_name, perf->params.uct.dev_name); status = UCS_ERR_NO_DEVICE; out_release_pd_resources: uct_release_pd_resource_list(pd_resources); out: return status; }
/* Device and transport to be used are determined by minimum latency */ static ucs_status_t dev_tl_lookup() { int i; int j; uint64_t min_latency = UINT64_MAX; int pd_index = -1; int tl_index = -1; ucs_status_t status; uct_pd_resource_desc_t *pd_resources; /* Protection domain resource descriptor */ uct_tl_resource_desc_t *tl_resources; /*Communication resource descriptor */ unsigned num_pd_resources; /* Number of protected domain */ unsigned num_tl_resources; /* Number of transport resources resource objects created */ uct_pd_config_t *pd_config; status = uct_query_pd_resources(&pd_resources, &num_pd_resources); if (UCS_OK != status) { fprintf(stderr, "Failed to query for protected domain resources.\n"); goto out1; } /* Iterate through protected domain resources */ for (i = 0; i < num_pd_resources; ++i) { status = uct_pd_config_read(pd_resources[i].pd_name, NULL, NULL, &pd_config); if (status != UCS_OK) { goto release1; } status = uct_pd_open(pd_resources[i].pd_name, pd_config, &pd); uct_config_release(pd_config); if (UCS_OK != status) { fprintf(stderr, "Failed to open protected domain.\n"); fflush(stderr); goto release1; } status = uct_pd_query_tl_resources(pd, &tl_resources, &num_tl_resources); if (UCS_OK != status) { fprintf(stderr, "Failed to query transport resources.\n"); fflush(stderr); uct_pd_close(pd); goto release1; } /* Go through each available transport resource for a particular protected domain * and keep track of the fastest latency */ for (j = 0; j < num_tl_resources; ++j) { status = resource_supported(tl_resources[j].dev_name, tl_resources[j].tl_name, 1); if (UCS_OK == status) { if (tl_resources[j].latency < min_latency) { min_latency = tl_resources[j].latency; pd_index = i; tl_index = j; } } } uct_release_tl_resource_list(tl_resources); uct_pd_close(pd); } /* Check if any valid device/transport found */ if ((-1 == pd_index) || (-1 == tl_index)) { uct_release_pd_resource_list(pd_resources); return UCS_ERR_UNSUPPORTED; } /* IMPORTANT: Certain functions that operate on an interface rely on a pointer to the protection domain that created it */ /* Reopen new protection domain and */ status = uct_pd_config_read(pd_resources[i].pd_name, NULL, NULL, &pd_config); if (status != UCS_OK) { goto release1; } status = uct_pd_open(pd_resources[pd_index].pd_name, &pd); uct_config_release(pd_config); if (UCS_OK != status) { fprintf(stderr, "Failed to open final protected domain.\n"); fflush(stderr); goto release1; } /* Open new tranport resources */ status = uct_pd_query_tl_resources(pd, &tl_resources, &num_tl_resources); if (UCS_OK != status) { fprintf(stderr, "Failed to query final transport resources.\n"); fflush(stderr); uct_pd_close(pd); goto release1; } /* Call resource_supported() again to set the interface */ status = resource_supported(tl_resources[tl_index].dev_name, tl_resources[tl_index].tl_name, 0); if (UCS_OK != status) { fprintf(stderr, "Failed to initialize final interface.\n"); fflush(stderr); uct_pd_close(pd); return status; } printf("Using %s with %s.\n", tl_resources[tl_index].dev_name, tl_resources[tl_index].tl_name);fflush(stdout); uct_release_tl_resource_list(tl_resources); release1: uct_release_pd_resource_list(pd_resources); out1: return status; }
static ucs_status_t ucp_fill_resources(ucp_context_h context, const ucp_config_t *config) { unsigned num_tl_resources; unsigned num_pd_resources; uct_pd_resource_desc_t *pd_rscs; ucs_status_t status; ucp_rsc_index_t i; unsigned pd_index; uct_pd_h pd; uct_pd_config_t *pd_config; uint64_t masks[UCT_DEVICE_TYPE_LAST] = {0}; /* if we got here then num_resources > 0. * if the user's device list is empty, there is no match */ if ((0 == config->devices[UCT_DEVICE_TYPE_NET].count) && (0 == config->devices[UCT_DEVICE_TYPE_SHM].count) && (0 == config->devices[UCT_DEVICE_TYPE_ACC].count)) { ucs_error("The device lists are empty. Please specify the devices you would like to use " "or omit the UCX_*_DEVICES so that the default will be used."); status = UCS_ERR_NO_ELEM; goto err; } /* if we got here then num_resources > 0. * if the user's tls list is empty, there is no match */ if (0 == config->tls.count) { ucs_error("The TLs list is empty. Please specify the transports you would like to use " "or omit the UCX_TLS so that the default will be used."); status = UCS_ERR_NO_ELEM; goto err; } /* List protection domain resources */ status = uct_query_pd_resources(&pd_rscs, &num_pd_resources); if (status != UCS_OK) { goto err; } /* Error check: Make sure there is at least one PD */ if (num_pd_resources == 0) { ucs_error("No pd resources found"); status = UCS_ERR_NO_DEVICE; goto err_release_pd_resources; } if (num_pd_resources >= UCP_MAX_PDS) { ucs_error("Only up to %ld PDs are supported", UCP_MAX_PDS); status = UCS_ERR_EXCEEDS_LIMIT; goto err_release_pd_resources; } context->num_pds = 0; context->pd_rscs = NULL; context->pds = NULL; context->pd_attrs = NULL; context->num_tls = 0; context->tl_rscs = NULL; /* Allocate array of PD resources we would actually use */ context->pd_rscs = ucs_calloc(num_pd_resources, sizeof(*context->pd_rscs), "ucp_pd_resources"); if (context->pd_rscs == NULL) { status = UCS_ERR_NO_MEMORY; goto err_free_context_resources; } /* Allocate array of protection domains */ context->pds = ucs_calloc(num_pd_resources, sizeof(*context->pds), "ucp_pds"); if (context->pds == NULL) { status = UCS_ERR_NO_MEMORY; goto err_free_context_resources; } /* Allocate array of protection domains attributes */ context->pd_attrs = ucs_calloc(num_pd_resources, sizeof(*context->pd_attrs), "ucp_pd_attrs"); if (context->pd_attrs == NULL) { status = UCS_ERR_NO_MEMORY; goto err_free_context_resources; } /* Open all protection domains, keep only those which have at least one TL * resources selected on them. */ pd_index = 0; for (i = 0; i < num_pd_resources; ++i) { status = uct_pd_config_read(pd_rscs[i].pd_name, NULL, NULL, &pd_config); if (status != UCS_OK) { goto err_free_context_resources; } status = uct_pd_open(pd_rscs[i].pd_name, pd_config, &pd); uct_config_release(pd_config); if (status != UCS_OK) { goto err_free_context_resources; } context->pd_rscs[pd_index] = pd_rscs[i]; context->pds[pd_index] = pd; /* Save PD attributes */ status = uct_pd_query(pd, &context->pd_attrs[pd_index]); if (status != UCS_OK) { goto err_free_context_resources; } /* Add communication resources of each PD */ status = ucp_add_tl_resources(context, pd, pd_index, config, &num_tl_resources, masks); if (status != UCS_OK) { goto err_free_context_resources; } /* If the PD does not have transport resources, don't use it */ if (num_tl_resources > 0) { ++pd_index; ++context->num_pds; } else { ucs_debug("closing pd %s because it has no selected transport resources", pd_rscs[i].pd_name); uct_pd_close(pd); } } /* Error check: Make sure there is at least one transport */ if (0 == context->num_tls) { ucs_error("There are no available resources matching the configured criteria"); status = UCS_ERR_NO_DEVICE; goto err_free_context_resources; } /* Notify the user if there are devices from the command line that are not available */ ucp_check_unavailable_devices(config->devices, masks); /* Error check: Make sure there are not too many transports */ if (context->num_tls >= UCP_MAX_RESOURCES) { ucs_error("Exceeded resources limit (%u requested, up to %d are supported)", context->num_tls, UCP_MAX_RESOURCES); status = UCS_ERR_EXCEEDS_LIMIT; goto err_free_context_resources; } uct_release_pd_resource_list(pd_rscs); return UCS_OK; err_free_context_resources: ucp_free_resources(context); err_release_pd_resources: uct_release_pd_resource_list(pd_rscs); err: return status; }