static ucs_status_t uct_perf_create_pd(ucx_perf_context_t *perf) { uct_pd_resource_desc_t *pd_resources; uct_tl_resource_desc_t *tl_resources; unsigned i, num_pd_resources; unsigned j, num_tl_resources; ucs_status_t status; uct_pd_h pd; uct_pd_config_t *pd_config; status = uct_query_pd_resources(&pd_resources, &num_pd_resources); if (status != UCS_OK) { goto out; } for (i = 0; i < num_pd_resources; ++i) { status = uct_pd_config_read(pd_resources[i].pd_name, NULL, NULL, &pd_config); if (status != UCS_OK) { goto out_release_pd_resources; } status = uct_pd_open(pd_resources[i].pd_name, pd_config, &pd); uct_config_release(pd_config); if (status != UCS_OK) { goto out_release_pd_resources; } status = uct_pd_query_tl_resources(pd, &tl_resources, &num_tl_resources); if (status != UCS_OK) { uct_pd_close(pd); goto out_release_pd_resources; } for (j = 0; j < num_tl_resources; ++j) { if (!strcmp(perf->params.uct.tl_name, tl_resources[j].tl_name) && !strcmp(perf->params.uct.dev_name, tl_resources[j].dev_name)) { uct_release_tl_resource_list(tl_resources); perf->uct.pd = pd; status = UCS_OK; goto out_release_pd_resources; } } uct_pd_close(pd); uct_release_tl_resource_list(tl_resources); } ucs_error("Cannot use transport %s on device %s", perf->params.uct.tl_name, perf->params.uct.dev_name); status = UCS_ERR_NO_DEVICE; out_release_pd_resources: uct_release_pd_resource_list(pd_resources); out: return status; }
static ucs_status_t ucp_add_tl_resources(ucp_context_h context, uct_pd_h pd, ucp_rsc_index_t pd_index, const ucp_config_t *config, unsigned *num_resources_p, uint64_t *masks) { uct_tl_resource_desc_t *tl_resources; ucp_tl_resource_desc_t *tmp; unsigned num_resources; ucs_status_t status; ucp_rsc_index_t i; *num_resources_p = 0; /* check what are the available uct resources */ status = uct_pd_query_tl_resources(pd, &tl_resources, &num_resources); if (status != UCS_OK) { ucs_error("Failed to query resources: %s", ucs_status_string(status)); goto err; } if (num_resources == 0) { ucs_debug("No tl resources found for pd %s", context->pd_rscs[pd_index].pd_name); goto out_free_resources; } tmp = ucs_realloc(context->tl_rscs, sizeof(*context->tl_rscs) * (context->num_tls + num_resources), "ucp resources"); if (tmp == NULL) { ucs_error("Failed to allocate resources"); status = UCS_ERR_NO_MEMORY; goto err_free_resources; } /* copy only the resources enabled by user configuration */ context->tl_rscs = tmp; for (i = 0; i < num_resources; ++i) { if (ucp_is_resource_enabled(&tl_resources[i], config, masks)) { context->tl_rscs[context->num_tls].tl_rsc = tl_resources[i]; context->tl_rscs[context->num_tls].pd_index = pd_index; ++context->num_tls; ++(*num_resources_p); } } out_free_resources: uct_release_tl_resource_list(tl_resources); return UCS_OK; err_free_resources: uct_release_tl_resource_list(tl_resources); err: return status; }
/* Device and transport to be used are determined by minimum latency */ static ucs_status_t dev_tl_lookup() { int i; int j; uint64_t min_latency = UINT64_MAX; int pd_index = -1; int tl_index = -1; ucs_status_t status; uct_pd_resource_desc_t *pd_resources; /* Protection domain resource descriptor */ uct_tl_resource_desc_t *tl_resources; /*Communication resource descriptor */ unsigned num_pd_resources; /* Number of protected domain */ unsigned num_tl_resources; /* Number of transport resources resource objects created */ status = uct_query_pd_resources(&pd_resources, &num_pd_resources); if (UCS_OK != status) { fprintf(stderr, "Failed to query for protected domain resources.\n"); goto out1; } /* Iterate through protected domain resources */ for (i = 0; i < num_pd_resources; ++i) { status = uct_pd_open(pd_resources[i].pd_name, &pd); if (UCS_OK != status) { fprintf(stderr, "Failed to open protected domain.\n"); fflush(stderr); goto release1; } status = uct_pd_query_tl_resources(pd, &tl_resources, &num_tl_resources); if (UCS_OK != status) { fprintf(stderr, "Failed to query transport resources.\n"); fflush(stderr); uct_pd_close(pd); goto release1; } /* Go through each available transport resource for a particular protected domain * and keep track of the fastest latency */ for (j = 0; j < num_tl_resources; ++j) { status = resource_supported(tl_resources[j].dev_name, tl_resources[j].tl_name, 1); if (UCS_OK == status) { if (tl_resources[j].latency < min_latency) { min_latency = tl_resources[j].latency; pd_index = i; tl_index = j; } } } uct_release_tl_resource_list(tl_resources); uct_pd_close(pd); } /* Check if any valid device/transport found */ if ((-1 == pd_index) || (-1 == tl_index)) { uct_release_pd_resource_list(pd_resources); return UCS_ERR_UNSUPPORTED; } /* IMPORTANT: Certain functions that operate on an interface rely on a pointer to the protection domain that created it */ /* Reopen new protection domain and */ status = uct_pd_open(pd_resources[pd_index].pd_name, &pd); if (UCS_OK != status) { fprintf(stderr, "Failed to open final protected domain.\n"); fflush(stderr); goto release1; } /* Open new tranport resources */ status = uct_pd_query_tl_resources(pd, &tl_resources, &num_tl_resources); if (UCS_OK != status) { fprintf(stderr, "Failed to query final transport resources.\n"); fflush(stderr); uct_pd_close(pd); goto release1; } /* Call resource_supported() again to set the interface */ status = resource_supported(tl_resources[tl_index].dev_name, tl_resources[tl_index].tl_name, 0); if (UCS_OK != status) { fprintf(stderr, "Failed to initialize final interface.\n"); fflush(stderr); uct_pd_close(pd); return status; } printf("Using %s with %s.\n", tl_resources[tl_index].dev_name, tl_resources[tl_index].tl_name);fflush(stdout); uct_release_tl_resource_list(tl_resources); release1: uct_release_pd_resource_list(pd_resources); out1: return status; }
static ucs_status_t ucp_add_tl_resources(ucp_context_h context, uct_pd_h pd, ucp_rsc_index_t pd_index, const ucp_config_t *config, unsigned *num_resources_p) { uint64_t used_devices_mask, mask, config_devices_mask; uct_tl_resource_desc_t *tl_resources; ucp_tl_resource_desc_t *tmp; unsigned num_resources; ucs_status_t status; ucp_rsc_index_t i; *num_resources_p = 0; /* check what are the available uct resources */ status = uct_pd_query_tl_resources(pd, &tl_resources, &num_resources); if (status != UCS_OK) { ucs_error("Failed to query resources: %s", ucs_status_string(status)); goto err; } if (num_resources == 0) { ucs_debug("No tl resources found for pd %s", context->pd_rscs[pd_index].pd_name); goto out_free_resources; } tmp = ucs_realloc(context->tl_rscs, sizeof(*context->tl_rscs) * (context->num_tls + num_resources), "ucp resources"); if (tmp == NULL) { ucs_error("Failed to allocate resources"); status = UCS_ERR_NO_MEMORY; goto err_free_resources; } /* mask of all devices from configuration which were used */ used_devices_mask = 0; /* copy only the resources enabled by user configuration */ context->tl_rscs = tmp; for (i = 0; i < num_resources; ++i) { if (ucp_is_resource_enabled(&tl_resources[i], config, &mask)) { context->tl_rscs[context->num_tls].tl_rsc = tl_resources[i]; context->tl_rscs[context->num_tls].pd_index = pd_index; ++context->num_tls; used_devices_mask |= mask; ++(*num_resources_p); } } /* if all devices should be used, check that */ config_devices_mask = UCS_MASK_SAFE(config->devices.count); if (config->force_all_devices && (used_devices_mask != config_devices_mask)) { i = ucs_ffs64(used_devices_mask ^ config_devices_mask); ucs_error("device %s is not available", config->devices.names[i]); status = UCS_ERR_NO_DEVICE; goto err_free_resources; } out_free_resources: uct_release_tl_resource_list(tl_resources); return UCS_OK; err_free_resources: uct_release_tl_resource_list(tl_resources); err: return status; }