Beispiel #1
0
static ucs_status_t uct_perf_create_md(ucx_perf_context_t *perf)
{
    uct_md_resource_desc_t *md_resources;
    uct_tl_resource_desc_t *tl_resources;
    unsigned i, num_md_resources;
    unsigned j, num_tl_resources;
    ucs_status_t status;
    uct_md_h md;
    uct_md_config_t *md_config;

    status = uct_query_md_resources(&md_resources, &num_md_resources);
    if (status != UCS_OK) {
        goto out;
    }

    for (i = 0; i < num_md_resources; ++i) {
        status = uct_md_config_read(md_resources[i].md_name, NULL, NULL, &md_config);
        if (status != UCS_OK) {
            goto out_release_md_resources;
        }

        status = uct_md_open(md_resources[i].md_name, md_config, &md);
        uct_config_release(md_config);
        if (status != UCS_OK) {
            goto out_release_md_resources;
        }

        status = uct_md_query_tl_resources(md, &tl_resources, &num_tl_resources);
        if (status != UCS_OK) {
            uct_md_close(md);
            goto out_release_md_resources;
        }

        for (j = 0; j < num_tl_resources; ++j) {
            if (!strcmp(perf->params.uct.tl_name,  tl_resources[j].tl_name) &&
                !strcmp(perf->params.uct.dev_name, tl_resources[j].dev_name))
            {
                uct_release_tl_resource_list(tl_resources);
                perf->uct.md = md;
                status = UCS_OK;
                goto out_release_md_resources;
            }
        }

        uct_md_close(md);
        uct_release_tl_resource_list(tl_resources);
    }

    ucs_error("Cannot use transport %s on device %s", perf->params.uct.tl_name,
              perf->params.uct.dev_name);
    status = UCS_ERR_NO_DEVICE;

out_release_md_resources:
    uct_release_md_resource_list(md_resources);
out:
    return status;
}
Beispiel #2
0
/* Device and transport to be used are determined by minimum latency */
static ucs_status_t dev_tl_lookup(const char *dev_name, const char *tl_name, struct iface_info *iface_p)
{
    int i;
    int j;
    ucs_status_t status;
    uct_md_resource_desc_t *md_resources; /* Memory domain resource descriptor */
    uct_tl_resource_desc_t *tl_resources; /*Communication resource descriptor */
    unsigned num_md_resources; /* Number of protected domain */
    unsigned num_tl_resources; /* Number of transport resources resource objects created */
    uct_md_config_t *md_config;

    status = uct_query_md_resources(&md_resources, &num_md_resources);
    CHKERR_JUMP(UCS_OK != status, "query for protected domain resources", error_ret);

    /* Iterate through protected domain resources */
    for (i = 0; i < num_md_resources; ++i) {
        status = uct_md_config_read(md_resources[i].md_name, NULL, NULL, &md_config);
        CHKERR_JUMP(UCS_OK != status, "read PD config", release_pd);

        status = uct_md_open(md_resources[i].md_name, md_config, &iface_p->pd);
        uct_config_release(md_config);
        CHKERR_JUMP(UCS_OK != status, "open protected domains", release_pd);

        status = uct_md_query_tl_resources(iface_p->pd, &tl_resources, &num_tl_resources);
        CHKERR_JUMP(UCS_OK != status, "query transport resources", close_pd);

        /* Go through each available transport and find the proper name */
        for (j = 0; j < num_tl_resources; ++j) {
            if (!strcmp(dev_name, tl_resources[j].dev_name) &&
                !strcmp(tl_name, tl_resources[j].tl_name)) {
                status = init_iface(tl_resources[j].dev_name, tl_resources[j].tl_name, iface_p);
                if (UCS_OK == status) {
                    printf("Using %s with %s.\n", tl_resources[j].dev_name, tl_resources[j].tl_name);
                    fflush(stdout);
                    uct_release_tl_resource_list(tl_resources);
                    goto release_pd;
                }
            }
        }
        uct_release_tl_resource_list(tl_resources);
        uct_md_close(iface_p->pd);
    }

    fprintf(stderr, "No supported (dev/tl) found (%s/%s)\n", dev_name, tl_name);
    status = UCS_ERR_UNSUPPORTED;

release_pd:
    uct_release_md_resource_list(md_resources);
error_ret:
    return status;
close_pd:
    uct_md_close(iface_p->pd);
    goto release_pd;
}
Beispiel #3
0
static void ucp_free_resources(ucp_context_t *context)
{
    ucp_rsc_index_t i;

    ucs_free(context->tl_rscs);
    for (i = 0; i < context->num_mds; ++i) {
        if (context->mds[i] != NULL) {
            uct_md_close(context->mds[i]);
        }
    }
    ucs_free(context->md_attrs);
    ucs_free(context->mds);
    ucs_free(context->md_rscs);
}
Beispiel #4
0
int main(int argc, char **argv)
{
    /* MPI is initially used to swap the endpoint and interface addresses so each
     * process has knowledge of the others. */
    int partner;
    int size, rank;
    uct_device_addr_t *own_dev, *peer_dev;
    uct_iface_addr_t *own_iface, *peer_iface;
    uct_ep_addr_t *own_ep, *peer_ep;
    ucs_status_t status;          /* status codes for UCS */
    uct_ep_h ep;                  /* Remote endpoint */
    ucs_async_context_t async;    /* Async event context manages times and fd notifications */
    uint8_t id = 0;
    void *arg;
    const char *tl_name = NULL;
    const char *dev_name = NULL;
    struct iface_info if_info;
    int exit_fail = 1;

    optind = 1;
    if (3 == argc) {
        dev_name = argv[1];
        tl_name  = argv[2];
    } else {
        printf("Usage: %s (<dev-name> <tl-name>)\n", argv[0]);
        fflush(stdout);
        return 1;
    }

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    if (size < 2) {
        fprintf(stderr, "Failed to create enough mpi processes\n");
        goto out;
    }

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    if (0 == rank) {
        partner = 1;
    } else if (1 == rank) {
        partner = 0;
    } else {
        /* just wait for other processes in MPI_Finalize */
        exit_fail = 0;
        goto out;
    }

    /* Initialize context */
    status = ucs_async_context_init(&async, UCS_ASYNC_MODE_THREAD);
    CHKERR_JUMP(UCS_OK != status, "init async context", out);

    /* Create a worker object */
    status = uct_worker_create(&async, UCS_THREAD_MODE_SINGLE, &if_info.worker);
    CHKERR_JUMP(UCS_OK != status, "create worker", out_cleanup_async);

    /* Search for the desired transport */
    status = dev_tl_lookup(dev_name, tl_name, &if_info);
    CHKERR_JUMP(UCS_OK != status, "find supported device and transport", out_destroy_worker);

    /* Expect that addr len is the same on both peers */
    own_dev = (uct_device_addr_t*)calloc(2, if_info.attr.device_addr_len);
    CHKERR_JUMP(NULL == own_dev, "allocate memory for dev addrs", out_destroy_iface);
    peer_dev = (uct_device_addr_t*)((char*)own_dev + if_info.attr.device_addr_len);

    own_iface = (uct_iface_addr_t*)calloc(2, if_info.attr.iface_addr_len);
    CHKERR_JUMP(NULL == own_iface, "allocate memory for if addrs", out_free_dev_addrs);
    peer_iface = (uct_iface_addr_t*)((char*)own_iface + if_info.attr.iface_addr_len);

    /* Get device address */
    status = uct_iface_get_device_address(if_info.iface, own_dev);
    CHKERR_JUMP(UCS_OK != status, "get device address", out_free_if_addrs);

    MPI_Sendrecv(own_dev, if_info.attr.device_addr_len, MPI_BYTE, partner, 0,
                 peer_dev, if_info.attr.device_addr_len, MPI_BYTE, partner,0,
                 MPI_COMM_WORLD, MPI_STATUS_IGNORE);

    status = uct_iface_is_reachable(if_info.iface, peer_dev, NULL);
    CHKERR_JUMP(0 == status, "reach the peer", out_free_if_addrs);

    /* Get interface address */
    if (if_info.attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
        status = uct_iface_get_address(if_info.iface, own_iface);
        CHKERR_JUMP(UCS_OK != status, "get interface address", out_free_if_addrs);

        MPI_Sendrecv(own_iface, if_info.attr.iface_addr_len, MPI_BYTE, partner, 0,
                     peer_iface, if_info.attr.iface_addr_len, MPI_BYTE, partner,0,
                     MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    }

    /* Again, expect that ep addr len is the same on both peers */
    own_ep = (uct_ep_addr_t*)calloc(2, if_info.attr.ep_addr_len);
    CHKERR_JUMP(NULL == own_ep, "allocate memory for ep addrs", out_free_if_addrs);
    peer_ep = (uct_ep_addr_t*)((char*)own_ep + if_info.attr.ep_addr_len);

    if (if_info.attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) {
        /* Create new endpoint */
        status = uct_ep_create(if_info.iface, &ep);
        CHKERR_JUMP(UCS_OK != status, "create endpoint", out_free_ep_addrs);

        /* Get endpoint address */
        status = uct_ep_get_address(ep, own_ep);
        CHKERR_JUMP(UCS_OK != status, "get endpoint address", out_free_ep);
    }

    MPI_Sendrecv(own_ep, if_info.attr.ep_addr_len, MPI_BYTE, partner, 0,
                 peer_ep, if_info.attr.ep_addr_len, MPI_BYTE, partner, 0,
                 MPI_COMM_WORLD, MPI_STATUS_IGNORE);

    if (if_info.attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) {
        /* Connect endpoint to a remote endpoint */
        status = uct_ep_connect_to_ep(ep, peer_dev, peer_ep);
        MPI_Barrier(MPI_COMM_WORLD);
    } else if (if_info.attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
        /* Create an endpoint which is connected to a remote interface */
        status = uct_ep_create_connected(if_info.iface, peer_dev, peer_iface, &ep);
    } else {
        status = UCS_ERR_UNSUPPORTED;
    }
    CHKERR_JUMP(UCS_OK != status, "connect endpoint", out_free_ep);

    /*Set active message handler */
    status = uct_iface_set_am_handler(if_info.iface, id, hello_world, arg, UCT_AM_CB_FLAG_SYNC);
    CHKERR_JUMP(UCS_OK != status, "set callback", out_free_ep);

    if (0 == rank) {
        uint64_t header;
        char payload[8];
        unsigned length = sizeof(payload);
        /* Send active message to remote endpoint */
        status = uct_ep_am_short(ep, id, header, payload, length);
        CHKERR_JUMP(UCS_OK != status, "send active msg", out_free_ep);
    } else if (1 == rank) {
        while (holder) {
            /* Explicitly progress any outstanding active message requests */
            uct_worker_progress(if_info.worker);
        }
    }

    /* Everything is fine, we need to call MPI_Finalize rather than MPI_Abort */
    exit_fail = 0;

out_free_ep:
    uct_ep_destroy(ep);
out_free_ep_addrs:
    free(own_ep);
out_free_if_addrs:
    free(own_iface);
out_free_dev_addrs:
    free(own_dev);
out_destroy_iface:
    uct_iface_close(if_info.iface);
    uct_md_close(if_info.pd);
out_destroy_worker:
    uct_worker_destroy(if_info.worker);
out_cleanup_async:
    ucs_async_context_cleanup(&async);
out:
    (0 == exit_fail) ? MPI_Finalize() : MPI_Abort(MPI_COMM_WORLD, 1);
    return exit_fail;
}
Beispiel #5
0
static ucs_status_t ucp_fill_resources(ucp_context_h context,
                                       const ucp_config_t *config)
{
    unsigned num_tl_resources;
    unsigned num_md_resources;
    uct_md_resource_desc_t *md_rscs;
    ucs_status_t status;
    ucp_rsc_index_t i;
    unsigned md_index;
    uct_md_h md;
    uct_md_config_t *md_config;
    uint64_t masks[UCT_DEVICE_TYPE_LAST] = {0};

    /* if we got here then num_resources > 0.
     * if the user's device list is empty, there is no match */
    if ((0 == config->devices[UCT_DEVICE_TYPE_NET].count) &&
        (0 == config->devices[UCT_DEVICE_TYPE_SHM].count) &&
        (0 == config->devices[UCT_DEVICE_TYPE_ACC].count) &&
        (0 == config->devices[UCT_DEVICE_TYPE_SELF].count)) {
        ucs_error("The device lists are empty. Please specify the devices you would like to use "
                  "or omit the UCX_*_DEVICES so that the default will be used.");
        status = UCS_ERR_NO_ELEM;
        goto err;
    }

    /* if we got here then num_resources > 0.
     * if the user's tls list is empty, there is no match */
    if (0 == config->tls.count) {
        ucs_error("The TLs list is empty. Please specify the transports you would like to use "
                  "or omit the UCX_TLS so that the default will be used.");
        status = UCS_ERR_NO_ELEM;
        goto err;
    }

    /* List memory domain resources */
    status = uct_query_md_resources(&md_rscs, &num_md_resources);
    if (status != UCS_OK) {
        goto err;
    }

    /* Sort md's by name, to increase the likelihood of reusing the same ep
     * configuration (since remote md map is part of the key).
     */
    qsort(md_rscs, num_md_resources, sizeof(*md_rscs), ucp_md_rsc_compare_name);

    /* Error check: Make sure there is at least one MD */
    if (num_md_resources == 0) {
        ucs_error("No md resources found");
        status = UCS_ERR_NO_DEVICE;
        goto err_release_md_resources;
    }

    context->num_mds  = 0;
    context->md_rscs  = NULL;
    context->mds      = NULL;
    context->md_attrs = NULL;
    context->num_tls  = 0;
    context->tl_rscs  = NULL;

    /* Allocate array of MD resources we would actually use */
    context->md_rscs = ucs_calloc(num_md_resources, sizeof(*context->md_rscs),
                                  "ucp_md_resources");
    if (context->md_rscs == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto err_free_context_resources;
    }

    /* Allocate array of memory domains */
    context->mds = ucs_calloc(num_md_resources, sizeof(*context->mds), "ucp_mds");
    if (context->mds == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto err_free_context_resources;
    }

    /* Allocate array of memory domains attributes */
    context->md_attrs = ucs_calloc(num_md_resources, sizeof(*context->md_attrs),
                                   "ucp_md_attrs");
    if (context->md_attrs == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto err_free_context_resources;
    }

    /* Open all memory domains, keep only those which have at least one TL
     * resources selected on them.
     */
    md_index = 0;
    for (i = 0; i < num_md_resources; ++i) {
        status = uct_md_config_read(md_rscs[i].md_name, NULL, NULL, &md_config);
        if (status != UCS_OK) {
            goto err_free_context_resources;
        }

        status = uct_md_open(md_rscs[i].md_name, md_config, &md);
        uct_config_release(md_config);
        if (status != UCS_OK) {
            goto err_free_context_resources;
        }

        context->md_rscs[md_index] = md_rscs[i];
        context->mds[md_index]     = md;

        /* Save MD attributes */
        status = uct_md_query(md, &context->md_attrs[md_index]);
        if (status != UCS_OK) {
            goto err_free_context_resources;
        }

        /* Add communication resources of each MD */
        status = ucp_add_tl_resources(context, md, md_index, config,
                                      &num_tl_resources, masks);
        if (status != UCS_OK) {
            goto err_free_context_resources;
        }

        /* If the MD does not have transport resources, don't use it */
        if (num_tl_resources > 0) {
            ++md_index;
            ++context->num_mds;
        } else {
            ucs_debug("closing md %s because it has no selected transport resources",
                      md_rscs[i].md_name);
            uct_md_close(md);
        }
    }

    /* Error check: Make sure there is at least one transport */
    if (0 == context->num_tls) {
        ucs_error("There are no available resources matching the configured criteria");
        status = UCS_ERR_NO_DEVICE;
        goto err_free_context_resources;
    }

    if (context->num_mds > UCP_MD_INDEX_BITS) {
        ucs_error("Only up to %d memory domains are supported (have: %d)",
                  UCP_MD_INDEX_BITS, context->num_mds);
        status = UCS_ERR_EXCEEDS_LIMIT;
        goto err_release_md_resources;
    }

    /* Notify the user if there are devices from the command line that are not available */
    ucp_check_unavailable_devices(config->devices, masks);

    /* Error check: Make sure there are not too many transports */
    if (context->num_tls >= UCP_MAX_RESOURCES) {
        ucs_error("Exceeded resources limit (%u requested, up to %d are supported)",
                  context->num_tls, UCP_MAX_RESOURCES);
        status = UCS_ERR_EXCEEDS_LIMIT;
        goto err_free_context_resources;
    }

    status = ucp_check_tl_names(context);
    if (status != UCS_OK) {
        goto err_free_context_resources;
    }

    uct_release_md_resource_list(md_rscs);
    return UCS_OK;

err_free_context_resources:
    ucp_free_resources(context);
err_release_md_resources:
    uct_release_md_resource_list(md_rscs);
err:
    return status;
}
Beispiel #6
0
static ucs_status_t uct_perf_setup(ucx_perf_context_t *perf, ucx_perf_params_t *params)
{
    uct_iface_config_t *iface_config;
    ucs_status_t status;
    uct_iface_params_t iface_params = {
        .open_mode            = UCT_IFACE_OPEN_MODE_DEVICE,
        .mode.device.tl_name  = params->uct.tl_name,
        .mode.device.dev_name = params->uct.dev_name,
        .stats_root           = ucs_stats_get_root(),
        .rx_headroom          = 0
    };
    UCS_CPU_ZERO(&iface_params.cpu_mask);

    status = ucs_async_context_init(&perf->uct.async, params->async_mode);
    if (status != UCS_OK) {
        goto out;
    }

    status = uct_worker_create(&perf->uct.async, params->thread_mode,
                               &perf->uct.worker);
    if (status != UCS_OK) {
        goto out_cleanup_async;
    }

    status = uct_perf_create_md(perf);
    if (status != UCS_OK) {
        goto out_destroy_worker;
    }

    status = uct_md_iface_config_read(perf->uct.md, params->uct.tl_name, NULL,
                                      NULL, &iface_config);
    if (status != UCS_OK) {
        goto out_destroy_md;
    }

    status = uct_iface_open(perf->uct.md, perf->uct.worker, &iface_params,
                            iface_config, &perf->uct.iface);
    uct_config_release(iface_config);
    if (status != UCS_OK) {
        ucs_error("Failed to open iface: %s", ucs_status_string(status));
        goto out_destroy_md;
    }

    status = uct_perf_test_check_capabilities(params, perf->uct.iface);
    if (status != UCS_OK) {
        goto out_iface_close;
    }

    status = uct_perf_test_alloc_mem(perf, params);
    if (status != UCS_OK) {
        goto out_iface_close;
    }

    status = uct_perf_test_setup_endpoints(perf);
    if (status != UCS_OK) {
        ucs_error("Failed to setup endpoints: %s", ucs_status_string(status));
        goto out_free_mem;
    }

    uct_iface_progress_enable(perf->uct.iface,
                              UCT_PROGRESS_SEND | UCT_PROGRESS_RECV);

    return UCS_OK;

out_free_mem:
    uct_perf_test_free_mem(perf);
out_iface_close:
    uct_iface_close(perf->uct.iface);
out_destroy_md:
    uct_md_close(perf->uct.md);
out_destroy_worker:
    uct_worker_destroy(perf->uct.worker);
out_cleanup_async:
    ucs_async_context_cleanup(&perf->uct.async);
out:
    return status;
}

static void uct_perf_cleanup(ucx_perf_context_t *perf)
{
    uct_perf_test_cleanup_endpoints(perf);
    uct_perf_test_free_mem(perf);
    uct_iface_close(perf->uct.iface);
    uct_md_close(perf->uct.md);
    uct_worker_destroy(perf->uct.worker);
    ucs_async_context_cleanup(&perf->uct.async);
}
Beispiel #7
0
static ucs_status_t uct_perf_setup(ucx_perf_context_t *perf, ucx_perf_params_t *params)
{
    uct_iface_config_t *iface_config;
    ucs_status_t status;
    uct_iface_params_t iface_params = {
        .tl_name     = params->uct.tl_name,
        .dev_name    = params->uct.dev_name,
        .rx_headroom = 0
    };

    status = ucs_async_context_init(&perf->uct.async, params->async_mode);
    if (status != UCS_OK) {
        goto out;
    }

    status = uct_worker_create(&perf->uct.async, params->thread_mode,
                               &perf->uct.worker);
    if (status != UCS_OK) {
        goto out_cleanup_async;
    }

    status = uct_perf_create_md(perf);
    if (status != UCS_OK) {
        goto out_destroy_worker;
    }

    status = uct_iface_config_read(params->uct.tl_name, NULL, NULL, &iface_config);
    if (status != UCS_OK) {
        goto out_destroy_md;
    }

    status = uct_iface_open(perf->uct.md, perf->uct.worker, &iface_params,
                            iface_config, &perf->uct.iface);
    uct_config_release(iface_config);
    if (status != UCS_OK) {
        ucs_error("Failed to open iface: %s", ucs_status_string(status));
        goto out_destroy_md;
    }

    status = uct_perf_test_check_capabilities(params, perf->uct.iface);
    if (status != UCS_OK) {
        goto out_iface_close;
    }

    status = uct_perf_test_alloc_mem(perf, params);
    if (status != UCS_OK) {
        goto out_iface_close;
    }

    status = uct_perf_test_setup_endpoints(perf);
    if (status != UCS_OK) {
        ucs_error("Failed to setup endpoints: %s", ucs_status_string(status));
        goto out_free_mem;
    }

    return UCS_OK;

out_free_mem:
    uct_perf_test_free_mem(perf);
out_iface_close:
    uct_iface_close(perf->uct.iface);
out_destroy_md:
    uct_md_close(perf->uct.md);
out_destroy_worker:
    uct_worker_destroy(perf->uct.worker);
out_cleanup_async:
    ucs_async_context_cleanup(&perf->uct.async);
out:
    return status;
}

static void uct_perf_cleanup(ucx_perf_context_t *perf)
{
    uct_perf_test_cleanup_endpoints(perf);
    uct_perf_test_free_mem(perf);
    uct_iface_close(perf->uct.iface);
    uct_md_close(perf->uct.md);
    uct_worker_destroy(perf->uct.worker);
    ucs_async_context_cleanup(&perf->uct.async);
}
Beispiel #8
0
void print_uct_config(ucs_config_print_flags_t print_flags, const char *tl_name)
{
    uct_md_resource_desc_t *md_resources;
    unsigned md_rsc_index, num_md_resources;
    uct_tl_resource_desc_t *tl_resources;
    unsigned tl_rsc_index, num_tl_resources;
    uct_iface_config_t *config;
    char tl_names[UINT8_MAX][UCT_TL_NAME_MAX];
    char cfg_title[UCT_TL_NAME_MAX + 128];
    unsigned i, num_tls;
    ucs_status_t status;
    uct_md_h md;
    uct_md_config_t *md_config;

    status = uct_query_md_resources(&md_resources, &num_md_resources);
    if (status != UCS_OK) {
        return;
    }

    uct_md_component_config_print(print_flags);

    num_tls = 0;
    for (md_rsc_index = 0; md_rsc_index < num_md_resources; ++md_rsc_index) {

        status = uct_md_config_read(md_resources[md_rsc_index].md_name, NULL,
                                    NULL, &md_config);
        if (status != UCS_OK) {
            continue;
        }

        status = uct_md_open(md_resources[md_rsc_index].md_name, md_config, &md);
        uct_config_release(md_config);
        if (status != UCS_OK) {
            continue;
        }

        status = uct_md_query_tl_resources(md, &tl_resources, &num_tl_resources);
        if (status != UCS_OK) {
            uct_md_close(md);
            continue;
        }

        for (tl_rsc_index = 0; tl_rsc_index < num_tl_resources; ++tl_rsc_index) {
            i = 0;
            while (i < num_tls) {
                if (!strcmp(tl_names[i], tl_resources[tl_rsc_index].tl_name)) {
                    break;
                }
                ++i;
            }

            /* Make sure this transport is not inserted to the array before, and
             * if user selects a specific transport - also make sure this is it.
             */
            if ((i == num_tls) &&
                ((tl_name == NULL) || !strcmp(tl_name, tl_resources[tl_rsc_index].tl_name)))
            {
                strncpy(tl_names[num_tls], tl_resources[tl_rsc_index].tl_name,
                        UCT_TL_NAME_MAX);
                ++num_tls;
            }
        }

        uct_release_tl_resource_list(tl_resources);
        uct_md_close(md);
    }

    uct_release_md_resource_list(md_resources);

    for (i = 0; i < num_tls; ++i) {
        snprintf(cfg_title, sizeof(cfg_title), "%s transport configuration",
                 tl_names[i]);
        status = uct_iface_config_read(tl_names[i], NULL, NULL, &config);
        if (status != UCS_OK) {
            printf("# < Failed to read configuration >\n");
            continue;
        }

        uct_config_print(config, stdout, cfg_title, print_flags);
        uct_config_release(config);
    }

}
Beispiel #9
0
static void print_md_info(const char *md_name, int print_opts,
                          ucs_config_print_flags_t print_flags,
                          const char *req_tl_name)
{
    uct_tl_resource_desc_t *resources, tmp;
    unsigned resource_index, j, num_resources, count;
    ucs_status_t status;
    const char *tl_name;
    uct_md_config_t *md_config;
    uct_md_attr_t md_attr;
    uct_md_h md;

    status = uct_md_config_read(md_name, NULL, NULL, &md_config);
    if (status != UCS_OK) {
        goto out;
    }

    status = uct_md_open(md_name, md_config, &md);
    uct_config_release(md_config);
    if (status != UCS_OK) {
        printf("# < failed to open memory domain %s >\n", md_name);
        goto out;
    }

    status = uct_md_query_tl_resources(md, &resources, &num_resources);
    if (status != UCS_OK) {
        printf("#   < failed to query memory domain resources >\n");
        goto out_close_md;
    }

    if (req_tl_name != NULL) {
        resource_index = 0;
        while (resource_index < num_resources) {
            if (!strcmp(resources[resource_index].tl_name, req_tl_name)) {
                break;
            }
            ++resource_index;
        }
        if (resource_index == num_resources) {
            /* no selected transport on the MD */
            goto out_free_list;
        }
    }

    status = uct_md_query(md, &md_attr);
    if (status != UCS_OK) {
        printf("# < failed to query memory domain >\n");
        goto out_free_list;
    } else {
        printf("#\n");
        printf("# Memory domain: %s\n", md_name);
        printf("#   component:        %s\n", md_attr.component_name);
        if (md_attr.cap.flags & UCT_MD_FLAG_ALLOC) {
            printf("#   allocate:         %s\n",
                   size_limit_to_str(md_attr.cap.max_alloc));
        }
        if (md_attr.cap.flags & UCT_MD_FLAG_REG) {
            printf("#   register:         %s, cost: %.0f",
                   size_limit_to_str(md_attr.cap.max_reg),
                   md_attr.reg_cost.overhead * 1e9);
            if (md_attr.reg_cost.growth * 1e9 > 1e-3) {
                printf("+(%.3f*<SIZE>)", md_attr.reg_cost.growth * 1e9);
            }
            printf(" nsec\n");
        }
        printf("#   remote key:       %zu bytes\n", md_attr.rkey_packed_size);
    }

    if (num_resources == 0) {
        printf("#   < no supported devices found >\n");
        goto out_free_list;
    }

    resource_index = 0;
    while (resource_index < num_resources) {
        /* Gather all resources for this transport */
        tl_name = resources[resource_index].tl_name;
        count = 1;
        for (j = resource_index + 1; j < num_resources; ++j) {
            if (!strcmp(tl_name, resources[j].tl_name)) {
                tmp = resources[count + resource_index];
                resources[count + resource_index] = resources[j];
                resources[j] = tmp;
                ++count;
            }
        }

        if ((req_tl_name == NULL) || !strcmp(tl_name, req_tl_name)) {
            print_tl_info(md, tl_name, &resources[resource_index], count,
                          print_opts, print_flags);
        }

        resource_index += count;
    }

out_free_list:
    uct_release_tl_resource_list(resources);
out_close_md:
    uct_md_close(md);
out:
    ;
}