Beispiel #1
0
ucs_status_t uct_iface_mem_alloc(uct_iface_h tl_iface, size_t length,
                                 const char *name, uct_allocated_memory_t *mem)
{
    uct_base_iface_t *iface = ucs_derived_of(tl_iface, uct_base_iface_t);
    uct_pd_attr_t pd_attr;
    ucs_status_t status;

    status = uct_mem_alloc(length, iface->config.alloc_methods,
                           iface->config.num_alloc_methods, &iface->pd, 1,
                           name, mem);
    if (status != UCS_OK) {
        goto err;
    }

    /* If the memory was not allocated using PD, register it */
    if (mem->method != UCT_ALLOC_METHOD_PD) {

        status = uct_pd_query(iface->pd, &pd_attr);
        if (status != UCS_OK) {
            goto err_free;
        }

        /* If PD does not support registration, allow only the PD method */
        if (!(pd_attr.cap.flags & UCT_PD_FLAG_REG)) {
            ucs_error("%s pd does not supprt registration, so cannot use any allocation "
                      "method except 'pd'", iface->pd->component->name);
            status = UCS_ERR_NO_MEMORY;
            goto err_free;
        }

        status = uct_pd_mem_reg(iface->pd, mem->address, mem->length, &mem->memh);
        if (status != UCS_OK) {
            goto err_free;
        }

        mem->pd = iface->pd;
    }

    return UCS_OK;

err_free:
    uct_mem_free(mem);
err:
    return status;
}
Beispiel #2
0
ucs_status_t uct_mem_alloc(size_t min_length, uct_alloc_method_t *methods,
                           unsigned num_methods, uct_pd_h *pds, unsigned num_pds,
                           const char *alloc_name, uct_allocated_memory_t *mem)
{
    uct_alloc_method_t *method;
    uct_pd_attr_t pd_attr;
    ucs_status_t status;
    size_t alloc_length;
    unsigned pd_index;
    uct_mem_h memh;
    uct_pd_h pd;
    void *address;
    int shmid;

    if (min_length == 0) {
        ucs_error("Allocation length cannot be 0");
        return UCS_ERR_INVALID_PARAM;
    }

    if (num_methods == 0) {
        ucs_error("No allocation methods provided");
        return UCS_ERR_INVALID_PARAM;
    }

    for (method = methods; method < methods + num_methods; ++method) {
        ucs_debug("trying allocation method %s", uct_alloc_method_names[*method]);

        switch (*method) {
        case UCT_ALLOC_METHOD_PD:
            /* Allocate with one of the specified protection domains */
            for (pd_index = 0; pd_index < num_pds; ++pd_index) {
                pd = pds[pd_index];
                status = uct_pd_query(pd, &pd_attr);
                if (status != UCS_OK) {
                    ucs_error("Failed to query PD");
                    return status;
                }

                /* Check if PD supports allocation */
                if (!(pd_attr.cap.flags & UCT_PD_FLAG_ALLOC)) {
                    continue;
                }

                /* Allocate memory using the PD.
                 * If the allocation fails, it's considered an error and we don't
                 * fall-back, because this PD already exposed support for memory
                 * allocation.
                 */
                alloc_length = min_length;
                status = uct_pd_mem_alloc(pd, &alloc_length, &address,
                                          alloc_name, &memh);
                if (status != UCS_OK) {
                    ucs_error("failed to allocate %zu bytes using pd %s: %s",
                              alloc_length, pd->component->name,
                              ucs_status_string(status));
                    return status;
                }
                mem->pd   = pd;
                mem->memh = memh;
                goto allocated;

            }
            break;

        case UCT_ALLOC_METHOD_HEAP:
            /* Allocate aligned memory using libc allocator */
            alloc_length = min_length;
            address = ucs_memalign(UCS_SYS_CACHE_LINE_SIZE, alloc_length
                                   UCS_MEMTRACK_VAL);
            if (address != NULL) {
                goto allocated_without_pd;
            }

            ucs_debug("failed to allocate %zu bytes from the heap", alloc_length);
            break;

        case UCT_ALLOC_METHOD_MMAP:
            /* Request memory from operating system using mmap() */
            alloc_length = ucs_align_up_pow2(min_length, ucs_get_page_size());
            address = ucs_mmap(NULL, alloc_length, PROT_READ|PROT_WRITE,
                               MAP_PRIVATE|MAP_ANON, -1, 0 UCS_MEMTRACK_VAL);
            if (address != MAP_FAILED) {
                goto allocated_without_pd;
            }

            ucs_debug("failed to mmap %zu bytes: %m", alloc_length);
            break;

        case UCT_ALLOC_METHOD_HUGE:
            /* Allocate huge pages */
            alloc_length = min_length;
            status = ucs_sysv_alloc(&alloc_length, &address, SHM_HUGETLB, &shmid
                                    UCS_MEMTRACK_VAL);
            if (status == UCS_OK) {
                goto allocated_without_pd;
            }

            ucs_debug("failed to allocate %zu bytes from hugetlb: %s",
                      min_length, ucs_status_string(status));
            break;

        default:
            ucs_error("Invalid allocation method %d", *method);
            return UCS_ERR_INVALID_PARAM;
        }
    }

    ucs_debug("Could not allocate memory with any of the provided methods");
    return UCS_ERR_NO_MEMORY;

allocated_without_pd:
    mem->pd      = NULL;
    mem->memh    = UCT_INVALID_MEM_HANDLE;
allocated:
    ucs_debug("allocated %zu bytes at %p using %s", alloc_length, address,
              (mem->pd == NULL) ? uct_alloc_method_names[*method]
                                : mem->pd->component->name);
    mem->address = address;
    mem->length  = alloc_length;
    mem->method  = *method;
    return UCS_OK;
}
Beispiel #3
0
static ucs_status_t ucp_fill_resources(ucp_context_h context,
                                       const ucp_config_t *config)
{
    unsigned num_tl_resources;
    unsigned num_pd_resources;
    uct_pd_resource_desc_t *pd_rscs;
    ucs_status_t status;
    ucp_rsc_index_t i;
    unsigned pd_index;
    uct_pd_h pd;

    /* if we got here then num_resources > 0.
     * if the user's device list is empty, there is no match */
    if (0 == config->devices.count) {
        ucs_error("The device list is empty. Please specify the devices you would like to use "
                  "or omit the UCX_DEVICES so that the default will be used.");
        status = UCS_ERR_NO_ELEM;
        goto err;
    }

    /* if we got here then num_resources > 0.
     * if the user's tls list is empty, there is no match */
    if (0 == config->tls.count) {
        ucs_error("The TLs list is empty. Please specify the transports you would like to use "
                  "or omit the UCX_TLS so that the default will be used.");
        status = UCS_ERR_NO_ELEM;
        goto err;
    }

    /* List protection domain resources */
    status = uct_query_pd_resources(&pd_rscs, &num_pd_resources);
    if (status != UCS_OK) {
        goto err;
    }

    /* Error check: Make sure there is at least one PD */
    if (num_pd_resources == 0) {
        ucs_error("No pd resources found");
        status = UCS_ERR_NO_DEVICE;
        goto err_release_pd_resources;
    }

    if (num_pd_resources >= UCP_MAX_PDS) {
        ucs_error("Only up to %ld PDs are supported", UCP_MAX_PDS);
        status = UCS_ERR_EXCEEDS_LIMIT;
        goto err_release_pd_resources;
    }

    context->num_pds  = 0;
    context->pd_rscs  = NULL;
    context->pds      = NULL;
    context->pd_attrs = NULL;
    context->num_tls  = 0;
    context->tl_rscs  = NULL;

    /* Allocate array of PD resources we would actually use */
    context->pd_rscs = ucs_calloc(num_pd_resources, sizeof(*context->pd_rscs),
                                  "ucp_pd_resources");
    if (context->pd_rscs == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto err_free_context_resources;
    }

    /* Allocate array of protection domains */
    context->pds = ucs_calloc(num_pd_resources, sizeof(*context->pds), "ucp_pds");
    if (context->pds == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto err_free_context_resources;
    }

    /* Allocate array of protection domains attributes */
    context->pd_attrs = ucs_calloc(num_pd_resources, sizeof(*context->pd_attrs),
                                   "ucp_pd_attrs");
    if (context->pd_attrs == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto err_free_context_resources;
    }

    /* Open all protection domains, keep only those which have at least one TL
     * resources selected on them.
     */
    pd_index = 0;
    for (i = 0; i < num_pd_resources; ++i) {
        status = uct_pd_open(pd_rscs[i].pd_name, &pd);
        if (status != UCS_OK) {
            goto err_free_context_resources;
        }

        context->pd_rscs[pd_index] = pd_rscs[i];
        context->pds[pd_index]     = pd;

        /* Save PD attributes */
        status = uct_pd_query(pd, &context->pd_attrs[pd_index]);
        if (status != UCS_OK) {
            goto err_free_context_resources;
        }

        /* Add communication resources of each PD */
        status = ucp_add_tl_resources(context, pd, pd_index, config,
                                      &num_tl_resources);
        if (status != UCS_OK) {
            goto err_free_context_resources;
        }

        /* If the PD does not have transport resources, don't use it */
        if (num_tl_resources > 0) {
            ++pd_index;
            ++context->num_pds;
        } else {
            ucs_debug("closing pd %s because it has no selected transport resources",
                      pd_rscs[i].pd_name);
            uct_pd_close(pd);
        }
    }

    /* Error check: Make sure there is at least one transport */
    if (0 == context->num_tls) {
        ucs_error("There are no available resources matching the configured criteria");
        status = UCS_ERR_NO_DEVICE;
        goto err_free_context_resources;
    }

    /* Error check: Make sure there are no too many transports */
    if (context->num_tls >= UCP_MAX_TLS) {
        ucs_error("Exceeded resources limit (%u requested, up to %d are supported)",
                  context->num_tls, UCP_MAX_TLS);
        status = UCS_ERR_EXCEEDS_LIMIT;
        goto err_free_context_resources;
    }

    uct_release_pd_resource_list(pd_rscs);
    return UCS_OK;

err_free_context_resources:
    ucp_free_resources(context);
err_release_pd_resources:
    uct_release_pd_resource_list(pd_rscs);
err:
    return status;
}
Beispiel #4
0
static ucs_status_t uct_perf_test_setup_endpoints(ucx_perf_context_t *perf)
{
    unsigned group_size, i, group_index;
    uct_device_addr_t *dev_addr;
    uct_iface_addr_t *iface_addr;
    uct_ep_addr_t *ep_addr;
    uct_iface_attr_t iface_attr;
    uct_pd_attr_t pd_attr;
    unsigned long va;
    void *rkey_buffer;
    ucs_status_t status;
    struct iovec vec[5];
    void *req;

    status = uct_iface_query(perf->uct.iface, &iface_attr);
    if (status != UCS_OK) {
        ucs_error("Failed to uct_iface_query: %s", ucs_status_string(status));
        goto err;
    }

    status = uct_pd_query(perf->uct.pd, &pd_attr);
    if (status != UCS_OK) {
        ucs_error("Failed to uct_pd_query: %s", ucs_status_string(status));
        goto err;
    }

    dev_addr    = calloc(1, iface_attr.device_addr_len);
    iface_addr  = calloc(1, iface_attr.iface_addr_len);
    ep_addr     = calloc(1, iface_attr.ep_addr_len);
    rkey_buffer = calloc(1, pd_attr.rkey_packed_size);
    if ((iface_addr == NULL) || (ep_addr == NULL) || (rkey_buffer == NULL)) {
        goto err_free;
    }

    status = uct_iface_get_device_address(perf->uct.iface, dev_addr);
    if (status != UCS_OK) {
        ucs_error("Failed to uct_iface_get_device_address: %s",
                  ucs_status_string(status));
        goto err_free;
    }

    if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
        status = uct_iface_get_address(perf->uct.iface, iface_addr);
        if (status != UCS_OK) {
            ucs_error("Failed to uct_iface_get_address: %s", ucs_status_string(status));
            goto err_free;
        }
    }

    status = uct_pd_mkey_pack(perf->uct.pd, perf->uct.recv_mem.memh, rkey_buffer);
    if (status != UCS_OK) {
        ucs_error("Failed to uct_rkey_pack: %s", ucs_status_string(status));
        goto err_free;
    }

    group_size  = rte_call(perf, group_size);
    group_index = rte_call(perf, group_index);

    perf->uct.peers = calloc(group_size, sizeof(*perf->uct.peers));
    if (perf->uct.peers == NULL) {
        goto err_free;
    }

    if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) {
        for (i = 0; i < group_size; ++i) {
            if (i == group_index) {
                continue;
            }

            status = uct_ep_create(perf->uct.iface, &perf->uct.peers[i].ep);
            if (status != UCS_OK) {
                ucs_error("Failed to uct_ep_create: %s", ucs_status_string(status));
                goto err_destroy_eps;
            }
            status = uct_ep_get_address(perf->uct.peers[i].ep, ep_addr);
            if (status != UCS_OK) {
                ucs_error("Failed to uct_ep_get_address: %s", ucs_status_string(status));
                goto err_destroy_eps;
            }
        }
    }

    va                  = (uintptr_t)perf->recv_buffer;
    vec[0].iov_base     = &va;
    vec[0].iov_len      = sizeof(va);
    vec[1].iov_base     = rkey_buffer;
    vec[1].iov_len      = pd_attr.rkey_packed_size;
    vec[2].iov_base     = dev_addr;
    vec[2].iov_len      = iface_attr.device_addr_len;
    vec[3].iov_base     = iface_addr;
    vec[3].iov_len      = iface_attr.iface_addr_len;
    vec[4].iov_base     = ep_addr;
    vec[4].iov_len      = iface_attr.ep_addr_len;

    rte_call(perf, post_vec, vec, 5, &req);
    rte_call(perf, exchange_vec, req);

    for (i = 0; i < group_size; ++i) {
        if (i == group_index) {
            continue;
        }
        vec[0].iov_base     = &va;
        vec[0].iov_len      = sizeof(va);
        vec[1].iov_base     = rkey_buffer;
        vec[1].iov_len      = pd_attr.rkey_packed_size;
        vec[2].iov_base     = dev_addr;
        vec[2].iov_len      = iface_attr.device_addr_len;
        vec[3].iov_base     = iface_addr;
        vec[3].iov_len      = iface_attr.iface_addr_len;
        vec[4].iov_base     = ep_addr;
        vec[4].iov_len      = iface_attr.ep_addr_len;

        rte_call(perf, recv_vec, i, vec, 5, req);

        perf->uct.peers[i].remote_addr = va;
        status = uct_rkey_unpack(rkey_buffer, &perf->uct.peers[i].rkey);
        if (status != UCS_OK) {
            ucs_error("Failed to uct_rkey_unpack: %s", ucs_status_string(status));
            return status;
        }

        if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) {
            status = uct_ep_connect_to_ep(perf->uct.peers[i].ep, dev_addr, ep_addr);
        } else if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
            status = uct_ep_create_connected(perf->uct.iface, dev_addr, iface_addr,
                                             &perf->uct.peers[i].ep);
        } else {
            status = UCS_ERR_UNSUPPORTED;
        }
        if (status != UCS_OK) {
            ucs_error("Failed to connect endpoint: %s", ucs_status_string(status));
            goto err_destroy_eps;
        }
    }
    uct_perf_iface_flush_b(perf);

    rte_call(perf, barrier);

    free(ep_addr);
    free(iface_addr);
    free(dev_addr);
    free(rkey_buffer);

    return UCS_OK;

err_destroy_eps:
    for (i = 0; i < group_size; ++i) {
        if (perf->uct.peers[i].rkey.type != NULL) {
            uct_rkey_release(&perf->uct.peers[i].rkey);
        }
        if (perf->uct.peers[i].ep != NULL) {
            uct_ep_destroy(perf->uct.peers[i].ep);
        }
    }
    free(perf->uct.peers);
err_free:
    free(ep_addr);
    free(iface_addr);
    free(dev_addr);
    free(rkey_buffer);
err:
    return status;
}