Beispiel #1
0
static ucs_status_t uct_perf_create_md(ucx_perf_context_t *perf)
{
    uct_md_resource_desc_t *md_resources;
    uct_tl_resource_desc_t *tl_resources;
    unsigned i, num_md_resources;
    unsigned j, num_tl_resources;
    ucs_status_t status;
    uct_md_h md;
    uct_md_config_t *md_config;

    status = uct_query_md_resources(&md_resources, &num_md_resources);
    if (status != UCS_OK) {
        goto out;
    }

    for (i = 0; i < num_md_resources; ++i) {
        status = uct_md_config_read(md_resources[i].md_name, NULL, NULL, &md_config);
        if (status != UCS_OK) {
            goto out_release_md_resources;
        }

        status = uct_md_open(md_resources[i].md_name, md_config, &md);
        uct_config_release(md_config);
        if (status != UCS_OK) {
            goto out_release_md_resources;
        }

        status = uct_md_query_tl_resources(md, &tl_resources, &num_tl_resources);
        if (status != UCS_OK) {
            uct_md_close(md);
            goto out_release_md_resources;
        }

        for (j = 0; j < num_tl_resources; ++j) {
            if (!strcmp(perf->params.uct.tl_name,  tl_resources[j].tl_name) &&
                !strcmp(perf->params.uct.dev_name, tl_resources[j].dev_name))
            {
                uct_release_tl_resource_list(tl_resources);
                perf->uct.md = md;
                status = UCS_OK;
                goto out_release_md_resources;
            }
        }

        uct_md_close(md);
        uct_release_tl_resource_list(tl_resources);
    }

    ucs_error("Cannot use transport %s on device %s", perf->params.uct.tl_name,
              perf->params.uct.dev_name);
    status = UCS_ERR_NO_DEVICE;

out_release_md_resources:
    uct_release_md_resource_list(md_resources);
out:
    return status;
}
Beispiel #2
0
static ucs_status_t ucp_add_tl_resources(ucp_context_h context,
                                         uct_pd_h pd, ucp_rsc_index_t pd_index,
                                         const ucp_config_t *config,
                                         unsigned *num_resources_p,
                                         uint64_t *masks)
{
    uct_tl_resource_desc_t *tl_resources;
    ucp_tl_resource_desc_t *tmp;
    unsigned num_resources;
    ucs_status_t status;
    ucp_rsc_index_t i;

    *num_resources_p = 0;

    /* check what are the available uct resources */
    status = uct_pd_query_tl_resources(pd, &tl_resources, &num_resources);
    if (status != UCS_OK) {
        ucs_error("Failed to query resources: %s", ucs_status_string(status));
        goto err;
    }

    if (num_resources == 0) {
        ucs_debug("No tl resources found for pd %s", context->pd_rscs[pd_index].pd_name);
        goto out_free_resources;
    }

    tmp = ucs_realloc(context->tl_rscs,
                      sizeof(*context->tl_rscs) * (context->num_tls + num_resources),
                      "ucp resources");
    if (tmp == NULL) {
        ucs_error("Failed to allocate resources");
        status = UCS_ERR_NO_MEMORY;
        goto err_free_resources;
    }

    /* copy only the resources enabled by user configuration */
    context->tl_rscs = tmp;
    for (i = 0; i < num_resources; ++i) {
        if (ucp_is_resource_enabled(&tl_resources[i], config, masks)) {
            context->tl_rscs[context->num_tls].tl_rsc   = tl_resources[i];
            context->tl_rscs[context->num_tls].pd_index = pd_index;
            ++context->num_tls;
            ++(*num_resources_p);
        }
    }

out_free_resources:
    uct_release_tl_resource_list(tl_resources);
    return UCS_OK;

err_free_resources:
    uct_release_tl_resource_list(tl_resources);
err:
    return status;
}
Beispiel #3
0
/* Device and transport to be used are determined by minimum latency */
static ucs_status_t dev_tl_lookup(const char *dev_name, const char *tl_name, struct iface_info *iface_p)
{
    int i;
    int j;
    ucs_status_t status;
    uct_md_resource_desc_t *md_resources; /* Memory domain resource descriptor */
    uct_tl_resource_desc_t *tl_resources; /*Communication resource descriptor */
    unsigned num_md_resources; /* Number of protected domain */
    unsigned num_tl_resources; /* Number of transport resources resource objects created */
    uct_md_config_t *md_config;

    status = uct_query_md_resources(&md_resources, &num_md_resources);
    CHKERR_JUMP(UCS_OK != status, "query for protected domain resources", error_ret);

    /* Iterate through protected domain resources */
    for (i = 0; i < num_md_resources; ++i) {
        status = uct_md_config_read(md_resources[i].md_name, NULL, NULL, &md_config);
        CHKERR_JUMP(UCS_OK != status, "read PD config", release_pd);

        status = uct_md_open(md_resources[i].md_name, md_config, &iface_p->pd);
        uct_config_release(md_config);
        CHKERR_JUMP(UCS_OK != status, "open protected domains", release_pd);

        status = uct_md_query_tl_resources(iface_p->pd, &tl_resources, &num_tl_resources);
        CHKERR_JUMP(UCS_OK != status, "query transport resources", close_pd);

        /* Go through each available transport and find the proper name */
        for (j = 0; j < num_tl_resources; ++j) {
            if (!strcmp(dev_name, tl_resources[j].dev_name) &&
                !strcmp(tl_name, tl_resources[j].tl_name)) {
                status = init_iface(tl_resources[j].dev_name, tl_resources[j].tl_name, iface_p);
                if (UCS_OK == status) {
                    printf("Using %s with %s.\n", tl_resources[j].dev_name, tl_resources[j].tl_name);
                    fflush(stdout);
                    uct_release_tl_resource_list(tl_resources);
                    goto release_pd;
                }
            }
        }
        uct_release_tl_resource_list(tl_resources);
        uct_md_close(iface_p->pd);
    }

    fprintf(stderr, "No supported (dev/tl) found (%s/%s)\n", dev_name, tl_name);
    status = UCS_ERR_UNSUPPORTED;

release_pd:
    uct_release_md_resource_list(md_resources);
error_ret:
    return status;
close_pd:
    uct_md_close(iface_p->pd);
    goto release_pd;
}
Beispiel #4
0
/* Device and transport to be used are determined by minimum latency */
static ucs_status_t dev_tl_lookup()
{
	int i;
	int j;
	uint64_t min_latency = UINT64_MAX;
	int pd_index = -1;
	int tl_index = -1;
	ucs_status_t status;
	uct_pd_resource_desc_t *pd_resources; /* Protection domain resource descriptor */
	uct_tl_resource_desc_t *tl_resources; /*Communication resource descriptor */
	unsigned num_pd_resources; /* Number of protected domain */
	unsigned num_tl_resources; /* Number of transport resources resource objects created */

	status = uct_query_pd_resources(&pd_resources, &num_pd_resources);
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to query for protected domain resources.\n");	
		goto out1;
	}

	/* Iterate through protected domain resources */
	for (i = 0; i < num_pd_resources; ++i) {
		status = uct_pd_open(pd_resources[i].pd_name, &pd);
		if (UCS_OK != status) {
			fprintf(stderr, "Failed to open protected domain.\n"); fflush(stderr);
			goto release1;
		}

		status = uct_pd_query_tl_resources(pd, &tl_resources, &num_tl_resources);	
		if (UCS_OK != status) {
			fprintf(stderr, "Failed to query transport resources.\n"); fflush(stderr);
			uct_pd_close(pd);
			goto release1;
		}
		
		/* Go through each available transport resource for a particular protected domain 
		 * and keep track of the fastest latency */
		for (j = 0; j < num_tl_resources; ++j) {
			status = resource_supported(tl_resources[j].dev_name, tl_resources[j].tl_name, 1);	
			if (UCS_OK == status) {
				if (tl_resources[j].latency < min_latency) {
					min_latency = tl_resources[j].latency;
					pd_index = i;
					tl_index = j;
				}
			}
		}

		uct_release_tl_resource_list(tl_resources);
		uct_pd_close(pd);
	}

	/* Check if any valid device/transport found */
	if ((-1 == pd_index) || (-1 == tl_index)) {
		uct_release_pd_resource_list(pd_resources);
		return UCS_ERR_UNSUPPORTED;
	}

	/* IMPORTANT: Certain functions that operate on an interface rely on a pointer to the protection domain that created it */
	/* Reopen new protection domain and */
	status = uct_pd_open(pd_resources[pd_index].pd_name, &pd);
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to open final protected domain.\n"); fflush(stderr);
		goto release1;
	}

	/* Open new tranport resources */
	status = uct_pd_query_tl_resources(pd, &tl_resources, &num_tl_resources);
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to query final transport resources.\n"); fflush(stderr);
		uct_pd_close(pd);
		goto release1;
	}

	/* Call resource_supported() again to set the interface */
	status = resource_supported(tl_resources[tl_index].dev_name, tl_resources[tl_index].tl_name, 0);
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to initialize final interface.\n"); fflush(stderr);
		uct_pd_close(pd);
		return status;	
	}

	printf("Using %s with %s.\n", tl_resources[tl_index].dev_name, tl_resources[tl_index].tl_name);fflush(stdout);

	uct_release_tl_resource_list(tl_resources);
release1:
	uct_release_pd_resource_list(pd_resources);
out1:
	return status;
}
Beispiel #5
0
static ucs_status_t ucp_add_tl_resources(ucp_context_h context,
        uct_pd_h pd, ucp_rsc_index_t pd_index,
        const ucp_config_t *config,
        unsigned *num_resources_p)
{
    uint64_t used_devices_mask, mask, config_devices_mask;
    uct_tl_resource_desc_t *tl_resources;
    ucp_tl_resource_desc_t *tmp;
    unsigned num_resources;
    ucs_status_t status;
    ucp_rsc_index_t i;

    *num_resources_p = 0;

    /* check what are the available uct resources */
    status = uct_pd_query_tl_resources(pd, &tl_resources, &num_resources);
    if (status != UCS_OK) {
        ucs_error("Failed to query resources: %s", ucs_status_string(status));
        goto err;
    }

    if (num_resources == 0) {
        ucs_debug("No tl resources found for pd %s", context->pd_rscs[pd_index].pd_name);
        goto out_free_resources;
    }

    tmp = ucs_realloc(context->tl_rscs,
                      sizeof(*context->tl_rscs) * (context->num_tls + num_resources),
                      "ucp resources");
    if (tmp == NULL) {
        ucs_error("Failed to allocate resources");
        status = UCS_ERR_NO_MEMORY;
        goto err_free_resources;
    }

    /* mask of all devices from configuration which were used */
    used_devices_mask = 0;

    /* copy only the resources enabled by user configuration */
    context->tl_rscs = tmp;
    for (i = 0; i < num_resources; ++i) {
        if (ucp_is_resource_enabled(&tl_resources[i], config, &mask)) {
            context->tl_rscs[context->num_tls].tl_rsc   = tl_resources[i];
            context->tl_rscs[context->num_tls].pd_index = pd_index;
            ++context->num_tls;
            used_devices_mask |= mask;
            ++(*num_resources_p);
        }
    }

    /* if all devices should be used, check that */
    config_devices_mask = UCS_MASK_SAFE(config->devices.count);
    if (config->force_all_devices && (used_devices_mask != config_devices_mask)) {
        i = ucs_ffs64(used_devices_mask ^ config_devices_mask);
        ucs_error("device %s is not available", config->devices.names[i]);
        status = UCS_ERR_NO_DEVICE;
        goto err_free_resources;
    }

out_free_resources:
    uct_release_tl_resource_list(tl_resources);
    return UCS_OK;

err_free_resources:
    uct_release_tl_resource_list(tl_resources);
err:
    return status;
}
Beispiel #6
0
void print_uct_config(ucs_config_print_flags_t print_flags, const char *tl_name)
{
    uct_md_resource_desc_t *md_resources;
    unsigned md_rsc_index, num_md_resources;
    uct_tl_resource_desc_t *tl_resources;
    unsigned tl_rsc_index, num_tl_resources;
    uct_iface_config_t *config;
    char tl_names[UINT8_MAX][UCT_TL_NAME_MAX];
    char cfg_title[UCT_TL_NAME_MAX + 128];
    unsigned i, num_tls;
    ucs_status_t status;
    uct_md_h md;
    uct_md_config_t *md_config;

    status = uct_query_md_resources(&md_resources, &num_md_resources);
    if (status != UCS_OK) {
        return;
    }

    uct_md_component_config_print(print_flags);

    num_tls = 0;
    for (md_rsc_index = 0; md_rsc_index < num_md_resources; ++md_rsc_index) {

        status = uct_md_config_read(md_resources[md_rsc_index].md_name, NULL,
                                    NULL, &md_config);
        if (status != UCS_OK) {
            continue;
        }

        status = uct_md_open(md_resources[md_rsc_index].md_name, md_config, &md);
        uct_config_release(md_config);
        if (status != UCS_OK) {
            continue;
        }

        status = uct_md_query_tl_resources(md, &tl_resources, &num_tl_resources);
        if (status != UCS_OK) {
            uct_md_close(md);
            continue;
        }

        for (tl_rsc_index = 0; tl_rsc_index < num_tl_resources; ++tl_rsc_index) {
            i = 0;
            while (i < num_tls) {
                if (!strcmp(tl_names[i], tl_resources[tl_rsc_index].tl_name)) {
                    break;
                }
                ++i;
            }

            /* Make sure this transport is not inserted to the array before, and
             * if user selects a specific transport - also make sure this is it.
             */
            if ((i == num_tls) &&
                ((tl_name == NULL) || !strcmp(tl_name, tl_resources[tl_rsc_index].tl_name)))
            {
                strncpy(tl_names[num_tls], tl_resources[tl_rsc_index].tl_name,
                        UCT_TL_NAME_MAX);
                ++num_tls;
            }
        }

        uct_release_tl_resource_list(tl_resources);
        uct_md_close(md);
    }

    uct_release_md_resource_list(md_resources);

    for (i = 0; i < num_tls; ++i) {
        snprintf(cfg_title, sizeof(cfg_title), "%s transport configuration",
                 tl_names[i]);
        status = uct_iface_config_read(tl_names[i], NULL, NULL, &config);
        if (status != UCS_OK) {
            printf("# < Failed to read configuration >\n");
            continue;
        }

        uct_config_print(config, stdout, cfg_title, print_flags);
        uct_config_release(config);
    }

}
Beispiel #7
0
static void print_md_info(const char *md_name, int print_opts,
                          ucs_config_print_flags_t print_flags,
                          const char *req_tl_name)
{
    uct_tl_resource_desc_t *resources, tmp;
    unsigned resource_index, j, num_resources, count;
    ucs_status_t status;
    const char *tl_name;
    uct_md_config_t *md_config;
    uct_md_attr_t md_attr;
    uct_md_h md;

    status = uct_md_config_read(md_name, NULL, NULL, &md_config);
    if (status != UCS_OK) {
        goto out;
    }

    status = uct_md_open(md_name, md_config, &md);
    uct_config_release(md_config);
    if (status != UCS_OK) {
        printf("# < failed to open memory domain %s >\n", md_name);
        goto out;
    }

    status = uct_md_query_tl_resources(md, &resources, &num_resources);
    if (status != UCS_OK) {
        printf("#   < failed to query memory domain resources >\n");
        goto out_close_md;
    }

    if (req_tl_name != NULL) {
        resource_index = 0;
        while (resource_index < num_resources) {
            if (!strcmp(resources[resource_index].tl_name, req_tl_name)) {
                break;
            }
            ++resource_index;
        }
        if (resource_index == num_resources) {
            /* no selected transport on the MD */
            goto out_free_list;
        }
    }

    status = uct_md_query(md, &md_attr);
    if (status != UCS_OK) {
        printf("# < failed to query memory domain >\n");
        goto out_free_list;
    } else {
        printf("#\n");
        printf("# Memory domain: %s\n", md_name);
        printf("#   component:        %s\n", md_attr.component_name);
        if (md_attr.cap.flags & UCT_MD_FLAG_ALLOC) {
            printf("#   allocate:         %s\n",
                   size_limit_to_str(md_attr.cap.max_alloc));
        }
        if (md_attr.cap.flags & UCT_MD_FLAG_REG) {
            printf("#   register:         %s, cost: %.0f",
                   size_limit_to_str(md_attr.cap.max_reg),
                   md_attr.reg_cost.overhead * 1e9);
            if (md_attr.reg_cost.growth * 1e9 > 1e-3) {
                printf("+(%.3f*<SIZE>)", md_attr.reg_cost.growth * 1e9);
            }
            printf(" nsec\n");
        }
        printf("#   remote key:       %zu bytes\n", md_attr.rkey_packed_size);
    }

    if (num_resources == 0) {
        printf("#   < no supported devices found >\n");
        goto out_free_list;
    }

    resource_index = 0;
    while (resource_index < num_resources) {
        /* Gather all resources for this transport */
        tl_name = resources[resource_index].tl_name;
        count = 1;
        for (j = resource_index + 1; j < num_resources; ++j) {
            if (!strcmp(tl_name, resources[j].tl_name)) {
                tmp = resources[count + resource_index];
                resources[count + resource_index] = resources[j];
                resources[j] = tmp;
                ++count;
            }
        }

        if ((req_tl_name == NULL) || !strcmp(tl_name, req_tl_name)) {
            print_tl_info(md, tl_name, &resources[resource_index], count,
                          print_opts, print_flags);
        }

        resource_index += count;
    }

out_free_list:
    uct_release_tl_resource_list(resources);
out_close_md:
    uct_md_close(md);
out:
    ;
}