static ucs_status_t uct_perf_create_md(ucx_perf_context_t *perf) { uct_md_resource_desc_t *md_resources; uct_tl_resource_desc_t *tl_resources; unsigned i, num_md_resources; unsigned j, num_tl_resources; ucs_status_t status; uct_md_h md; uct_md_config_t *md_config; status = uct_query_md_resources(&md_resources, &num_md_resources); if (status != UCS_OK) { goto out; } for (i = 0; i < num_md_resources; ++i) { status = uct_md_config_read(md_resources[i].md_name, NULL, NULL, &md_config); if (status != UCS_OK) { goto out_release_md_resources; } status = uct_md_open(md_resources[i].md_name, md_config, &md); uct_config_release(md_config); if (status != UCS_OK) { goto out_release_md_resources; } status = uct_md_query_tl_resources(md, &tl_resources, &num_tl_resources); if (status != UCS_OK) { uct_md_close(md); goto out_release_md_resources; } for (j = 0; j < num_tl_resources; ++j) { if (!strcmp(perf->params.uct.tl_name, tl_resources[j].tl_name) && !strcmp(perf->params.uct.dev_name, tl_resources[j].dev_name)) { uct_release_tl_resource_list(tl_resources); perf->uct.md = md; status = UCS_OK; goto out_release_md_resources; } } uct_md_close(md); uct_release_tl_resource_list(tl_resources); } ucs_error("Cannot use transport %s on device %s", perf->params.uct.tl_name, perf->params.uct.dev_name); status = UCS_ERR_NO_DEVICE; out_release_md_resources: uct_release_md_resource_list(md_resources); out: return status; }
/* Device and transport to be used are determined by minimum latency */ static ucs_status_t dev_tl_lookup(const char *dev_name, const char *tl_name, struct iface_info *iface_p) { int i; int j; ucs_status_t status; uct_md_resource_desc_t *md_resources; /* Memory domain resource descriptor */ uct_tl_resource_desc_t *tl_resources; /*Communication resource descriptor */ unsigned num_md_resources; /* Number of protected domain */ unsigned num_tl_resources; /* Number of transport resources resource objects created */ uct_md_config_t *md_config; status = uct_query_md_resources(&md_resources, &num_md_resources); CHKERR_JUMP(UCS_OK != status, "query for protected domain resources", error_ret); /* Iterate through protected domain resources */ for (i = 0; i < num_md_resources; ++i) { status = uct_md_config_read(md_resources[i].md_name, NULL, NULL, &md_config); CHKERR_JUMP(UCS_OK != status, "read PD config", release_pd); status = uct_md_open(md_resources[i].md_name, md_config, &iface_p->pd); uct_config_release(md_config); CHKERR_JUMP(UCS_OK != status, "open protected domains", release_pd); status = uct_md_query_tl_resources(iface_p->pd, &tl_resources, &num_tl_resources); CHKERR_JUMP(UCS_OK != status, "query transport resources", close_pd); /* Go through each available transport and find the proper name */ for (j = 0; j < num_tl_resources; ++j) { if (!strcmp(dev_name, tl_resources[j].dev_name) && !strcmp(tl_name, tl_resources[j].tl_name)) { status = init_iface(tl_resources[j].dev_name, tl_resources[j].tl_name, iface_p); if (UCS_OK == status) { printf("Using %s with %s.\n", tl_resources[j].dev_name, tl_resources[j].tl_name); fflush(stdout); uct_release_tl_resource_list(tl_resources); goto release_pd; } } } uct_release_tl_resource_list(tl_resources); uct_md_close(iface_p->pd); } fprintf(stderr, "No supported (dev/tl) found (%s/%s)\n", dev_name, tl_name); status = UCS_ERR_UNSUPPORTED; release_pd: uct_release_md_resource_list(md_resources); error_ret: return status; close_pd: uct_md_close(iface_p->pd); goto release_pd; }
void print_uct_info(int print_opts, ucs_config_print_flags_t print_flags, const char *req_tl_name) { uct_md_resource_desc_t *resources; unsigned i, num_resources; ucs_status_t status; status = uct_query_md_resources(&resources, &num_resources); if (status != UCS_OK) { printf("# < failed to query MD resources >\n"); goto out; } for (i = 0; i < num_resources; ++i) { print_md_info(resources[i].md_name, print_opts, print_flags, req_tl_name); } uct_release_md_resource_list(resources); out: ; }
static ucs_status_t ucp_fill_resources(ucp_context_h context, const ucp_config_t *config) { unsigned num_tl_resources; unsigned num_md_resources; uct_md_resource_desc_t *md_rscs; ucs_status_t status; ucp_rsc_index_t i; unsigned md_index; uct_md_h md; uct_md_config_t *md_config; uint64_t masks[UCT_DEVICE_TYPE_LAST] = {0}; /* if we got here then num_resources > 0. * if the user's device list is empty, there is no match */ if ((0 == config->devices[UCT_DEVICE_TYPE_NET].count) && (0 == config->devices[UCT_DEVICE_TYPE_SHM].count) && (0 == config->devices[UCT_DEVICE_TYPE_ACC].count) && (0 == config->devices[UCT_DEVICE_TYPE_SELF].count)) { ucs_error("The device lists are empty. Please specify the devices you would like to use " "or omit the UCX_*_DEVICES so that the default will be used."); status = UCS_ERR_NO_ELEM; goto err; } /* if we got here then num_resources > 0. * if the user's tls list is empty, there is no match */ if (0 == config->tls.count) { ucs_error("The TLs list is empty. Please specify the transports you would like to use " "or omit the UCX_TLS so that the default will be used."); status = UCS_ERR_NO_ELEM; goto err; } /* List memory domain resources */ status = uct_query_md_resources(&md_rscs, &num_md_resources); if (status != UCS_OK) { goto err; } /* Sort md's by name, to increase the likelihood of reusing the same ep * configuration (since remote md map is part of the key). */ qsort(md_rscs, num_md_resources, sizeof(*md_rscs), ucp_md_rsc_compare_name); /* Error check: Make sure there is at least one MD */ if (num_md_resources == 0) { ucs_error("No md resources found"); status = UCS_ERR_NO_DEVICE; goto err_release_md_resources; } context->num_mds = 0; context->md_rscs = NULL; context->mds = NULL; context->md_attrs = NULL; context->num_tls = 0; context->tl_rscs = NULL; /* Allocate array of MD resources we would actually use */ context->md_rscs = ucs_calloc(num_md_resources, sizeof(*context->md_rscs), "ucp_md_resources"); if (context->md_rscs == NULL) { status = UCS_ERR_NO_MEMORY; goto err_free_context_resources; } /* Allocate array of memory domains */ context->mds = ucs_calloc(num_md_resources, sizeof(*context->mds), "ucp_mds"); if (context->mds == NULL) { status = UCS_ERR_NO_MEMORY; goto err_free_context_resources; } /* Allocate array of memory domains attributes */ context->md_attrs = ucs_calloc(num_md_resources, sizeof(*context->md_attrs), "ucp_md_attrs"); if (context->md_attrs == NULL) { status = UCS_ERR_NO_MEMORY; goto err_free_context_resources; } /* Open all memory domains, keep only those which have at least one TL * resources selected on them. */ md_index = 0; for (i = 0; i < num_md_resources; ++i) { status = uct_md_config_read(md_rscs[i].md_name, NULL, NULL, &md_config); if (status != UCS_OK) { goto err_free_context_resources; } status = uct_md_open(md_rscs[i].md_name, md_config, &md); uct_config_release(md_config); if (status != UCS_OK) { goto err_free_context_resources; } context->md_rscs[md_index] = md_rscs[i]; context->mds[md_index] = md; /* Save MD attributes */ status = uct_md_query(md, &context->md_attrs[md_index]); if (status != UCS_OK) { goto err_free_context_resources; } /* Add communication resources of each MD */ status = ucp_add_tl_resources(context, md, md_index, config, &num_tl_resources, masks); if (status != UCS_OK) { goto err_free_context_resources; } /* If the MD does not have transport resources, don't use it */ if (num_tl_resources > 0) { ++md_index; ++context->num_mds; } else { ucs_debug("closing md %s because it has no selected transport resources", md_rscs[i].md_name); uct_md_close(md); } } /* Error check: Make sure there is at least one transport */ if (0 == context->num_tls) { ucs_error("There are no available resources matching the configured criteria"); status = UCS_ERR_NO_DEVICE; goto err_free_context_resources; } if (context->num_mds > UCP_MD_INDEX_BITS) { ucs_error("Only up to %d memory domains are supported (have: %d)", UCP_MD_INDEX_BITS, context->num_mds); status = UCS_ERR_EXCEEDS_LIMIT; goto err_release_md_resources; } /* Notify the user if there are devices from the command line that are not available */ ucp_check_unavailable_devices(config->devices, masks); /* Error check: Make sure there are not too many transports */ if (context->num_tls >= UCP_MAX_RESOURCES) { ucs_error("Exceeded resources limit (%u requested, up to %d are supported)", context->num_tls, UCP_MAX_RESOURCES); status = UCS_ERR_EXCEEDS_LIMIT; goto err_free_context_resources; } status = ucp_check_tl_names(context); if (status != UCS_OK) { goto err_free_context_resources; } uct_release_md_resource_list(md_rscs); return UCS_OK; err_free_context_resources: ucp_free_resources(context); err_release_md_resources: uct_release_md_resource_list(md_rscs); err: return status; }
void print_uct_config(ucs_config_print_flags_t print_flags, const char *tl_name) { uct_md_resource_desc_t *md_resources; unsigned md_rsc_index, num_md_resources; uct_tl_resource_desc_t *tl_resources; unsigned tl_rsc_index, num_tl_resources; uct_iface_config_t *config; char tl_names[UINT8_MAX][UCT_TL_NAME_MAX]; char cfg_title[UCT_TL_NAME_MAX + 128]; unsigned i, num_tls; ucs_status_t status; uct_md_h md; uct_md_config_t *md_config; status = uct_query_md_resources(&md_resources, &num_md_resources); if (status != UCS_OK) { return; } uct_md_component_config_print(print_flags); num_tls = 0; for (md_rsc_index = 0; md_rsc_index < num_md_resources; ++md_rsc_index) { status = uct_md_config_read(md_resources[md_rsc_index].md_name, NULL, NULL, &md_config); if (status != UCS_OK) { continue; } status = uct_md_open(md_resources[md_rsc_index].md_name, md_config, &md); uct_config_release(md_config); if (status != UCS_OK) { continue; } status = uct_md_query_tl_resources(md, &tl_resources, &num_tl_resources); if (status != UCS_OK) { uct_md_close(md); continue; } for (tl_rsc_index = 0; tl_rsc_index < num_tl_resources; ++tl_rsc_index) { i = 0; while (i < num_tls) { if (!strcmp(tl_names[i], tl_resources[tl_rsc_index].tl_name)) { break; } ++i; } /* Make sure this transport is not inserted to the array before, and * if user selects a specific transport - also make sure this is it. */ if ((i == num_tls) && ((tl_name == NULL) || !strcmp(tl_name, tl_resources[tl_rsc_index].tl_name))) { strncpy(tl_names[num_tls], tl_resources[tl_rsc_index].tl_name, UCT_TL_NAME_MAX); ++num_tls; } } uct_release_tl_resource_list(tl_resources); uct_md_close(md); } uct_release_md_resource_list(md_resources); for (i = 0; i < num_tls; ++i) { snprintf(cfg_title, sizeof(cfg_title), "%s transport configuration", tl_names[i]); status = uct_iface_config_read(tl_names[i], NULL, NULL, &config); if (status != UCS_OK) { printf("# < Failed to read configuration >\n"); continue; } uct_config_print(config, stdout, cfg_title, print_flags); uct_config_release(config); } }