Beispiel #1
0
void ucp_rkey_destroy(ucp_rkey_h rkey)
{
    unsigned num_rkeys = ucs_count_one_bits(rkey->pd_map);
    unsigned i;

    for (i = 0; i < num_rkeys; ++i) {
        uct_rkey_release(&rkey->uct[i]);
    }
    ucs_free(rkey);
}
Beispiel #2
0
static inline uct_rkey_t ucp_lookup_uct_rkey(ucp_ep_h ep, ucp_rkey_h rkey)
{
    unsigned rkey_index;

    /*
     * Calculate the rkey index inside the compact array. This is actually the
     * number of PDs in the map with index less-than ours. So mask pd_map to get
     * only the less-than indices, and then count them using popcount operation.
     * TODO save the mask in ep->uct, to avoid the shift operation.
     */
    rkey_index = ucs_count_one_bits(rkey->pd_map & UCS_MASK(ep->uct.dst_pd_index));
    return rkey->uct[rkey_index].rkey;
}
Beispiel #3
0
void ucp_rkey_destroy(ucp_rkey_h rkey)
{
    unsigned num_rkeys;
    unsigned i;

    if (rkey == &ucp_mem_dummy_rkey) {
        return;
    }

    num_rkeys = ucs_count_one_bits(rkey->pd_map);

    for (i = 0; i < num_rkeys; ++i) {
        uct_rkey_release(&rkey->uct[i]);
    }
    ucs_free(rkey);
}
Beispiel #4
0
ucs_status_t ucp_ep_rkey_unpack(ucp_ep_h ep, void *rkey_buffer, ucp_rkey_h *rkey_p)
{
    unsigned remote_pd_index, remote_pd_gap;
    unsigned rkey_index;
    unsigned pd_count;
    ucs_status_t status;
    ucp_rkey_h rkey;
    uint8_t pd_size;
    ucp_pd_map_t pd_map;
    void *p;

    /* Count the number of remote PDs in the rkey buffer */
    p = rkey_buffer;

    /* Read remote PD map */
    pd_map   = *(ucp_pd_map_t*)p;

    ucs_trace("unpacking rkey with pd_map 0x%x", pd_map);

    if (pd_map == 0) {
        /* Dummy key return ok */
        *rkey_p = &ucp_mem_dummy_rkey;
        return UCS_OK;
    }

    pd_count = ucs_count_one_bits(pd_map);
    p       += sizeof(ucp_pd_map_t);

    /* Allocate rkey handle which holds UCT rkeys for all remote PDs.
     * We keep all of them to handle a future transport switch.
     */
    rkey = ucs_malloc(sizeof(*rkey) + (sizeof(rkey->uct[0]) * pd_count), "ucp_rkey");
    if (rkey == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto err;
    }

    rkey->pd_map    = 0;
    remote_pd_index = 0; /* Index of remote PD */
    rkey_index      = 0; /* Index of the rkey in the array */

    /* Unpack rkey of each UCT PD */
    while (pd_map > 0) {
        pd_size = *((uint8_t*)p++);

        /* Use bit operations to iterate through the indices of the remote PDs
         * as provided in the pd_map. pd_map always holds a bitmap of PD indices
         * that remain to be used. Every time we find the "gap" until the next
         * valid PD index using ffs operation. If some rkeys cannot be unpacked,
         * we remove them from the local map.
         */
        remote_pd_gap    = ucs_ffs64(pd_map); /* Find the offset for next PD index */
        remote_pd_index += remote_pd_gap;      /* Calculate next index of remote PD*/
        pd_map >>= remote_pd_gap;                   /* Remove the gap from the map */
        ucs_assert(pd_map & 1);

        /* Unpack only reachable rkeys */
        if (UCS_BIT(remote_pd_index) & ucp_ep_config(ep)->key.reachable_pd_map) {
            ucs_assert(rkey_index < pd_count);
            status = uct_rkey_unpack(p, &rkey->uct[rkey_index]);
            if (status != UCS_OK) {
                ucs_error("Failed to unpack remote key from remote pd[%d]: %s",
                          remote_pd_index, ucs_status_string(status));
                goto err_destroy;
            }

            ucs_trace("rkey[%d] for remote pd %d is 0x%lx", rkey_index,
                      remote_pd_index, rkey->uct[rkey_index].rkey);
            rkey->pd_map |= UCS_BIT(remote_pd_index);
            ++rkey_index;
        }

        ++remote_pd_index;
        pd_map >>= 1;
        p += pd_size;
    }

    if (rkey->pd_map == 0) {
        ucs_debug("The unpacked rkey from the destination is unreachable");
        status = UCS_ERR_UNREACHABLE;
        goto err_destroy;
    }

    *rkey_p = rkey;
    return UCS_OK;

err_destroy:
    ucp_rkey_destroy(rkey);
err:
    return status;
}
Beispiel #5
0
static unsigned ucp_wireup_address_index(const unsigned *order,
                                         uint64_t tl_bitmap,
                                         ucp_rsc_index_t tl_index)
{
    return order[ucs_count_one_bits(tl_bitmap & UCS_MASK(tl_index))];
}
Beispiel #6
0
static ucs_status_t ucp_address_do_pack(ucp_worker_h worker, ucp_ep_h ep,
                                        void *buffer, size_t size,
                                        uint64_t tl_bitmap, unsigned *order,
                                        const ucp_address_packed_device_t *devices,
                                        ucp_rsc_index_t num_devices)
{
    ucp_context_h context = worker->context;
    const ucp_address_packed_device_t *dev;
    uct_iface_attr_t *iface_attr;
    ucp_rsc_index_t md_index;
    ucs_status_t status;
    ucp_rsc_index_t i;
    size_t iface_addr_len;
    size_t ep_addr_len;
    uint64_t md_flags;
    unsigned index;
    void *ptr;
    uint8_t *iface_addr_len_ptr;

    ptr = buffer;
    index = 0;

    *(uint64_t*)ptr = worker->uuid;
    ptr += sizeof(uint64_t);
    ptr = ucp_address_pack_string(ucp_worker_get_name(worker), ptr);

    if (num_devices == 0) {
        *((uint8_t*)ptr) = UCP_NULL_RESOURCE;
        ++ptr;
        goto out;
    }

    for (dev = devices; dev < devices + num_devices; ++dev) {

        /* MD index */
        md_index       = context->tl_rscs[dev->rsc_index].md_index;
        md_flags       = context->tl_mds[md_index].attr.cap.flags;
        ucs_assert_always(!(md_index & ~UCP_ADDRESS_FLAG_MD_MASK));

        *(uint8_t*)ptr = md_index |
                         ((dev->tl_bitmap == 0)          ? UCP_ADDRESS_FLAG_EMPTY    : 0) |
                         ((md_flags & UCT_MD_FLAG_ALLOC) ? UCP_ADDRESS_FLAG_MD_ALLOC : 0) |
                         ((md_flags & UCT_MD_FLAG_REG)   ? UCP_ADDRESS_FLAG_MD_REG   : 0);
        ++ptr;

        /* Device address length */
        ucs_assert(dev->dev_addr_len < UCP_ADDRESS_FLAG_LAST);
        *(uint8_t*)ptr = dev->dev_addr_len | ((dev == (devices + num_devices - 1)) ?
                                              UCP_ADDRESS_FLAG_LAST : 0);
        ++ptr;

        /* Device address */
        status = uct_iface_get_device_address(worker->ifaces[dev->rsc_index].iface,
                                              (uct_device_addr_t*)ptr);
        if (status != UCS_OK) {
            return status;
        }

        ucp_address_memchek(ptr, dev->dev_addr_len,
                            &context->tl_rscs[dev->rsc_index].tl_rsc);
        ptr += dev->dev_addr_len;

        for (i = 0; i < context->num_tls; ++i) {

            if (!(UCS_BIT(i) & dev->tl_bitmap)) {
                continue;
            }

            /* Transport name checksum */
            *(uint16_t*)ptr = context->tl_rscs[i].tl_name_csum;
            ptr += sizeof(uint16_t);

            /* Transport information */
            ucp_address_pack_iface_attr(ptr, &worker->ifaces[i].attr,
                                        worker->atomic_tls & UCS_BIT(i));
            ucp_address_memchek(ptr, sizeof(ucp_address_packed_iface_attr_t),
                                &context->tl_rscs[dev->rsc_index].tl_rsc);
            ptr += sizeof(ucp_address_packed_iface_attr_t);

            iface_attr = &worker->ifaces[i].attr;

            if (!(iface_attr->cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) &&
                !(iface_attr->cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP)) {
                return UCS_ERR_INVALID_ADDR;
            }

            /* Pack iface address */
            iface_addr_len = iface_attr->iface_addr_len;
            ucs_assert(iface_addr_len < UCP_ADDRESS_FLAG_EP_ADDR);

            status = uct_iface_get_address(worker->ifaces[i].iface,
                                           (uct_iface_addr_t*)(ptr + 1));
            if (status != UCS_OK) {
                return status;
            }
            ucp_address_memchek(ptr + 1, iface_addr_len,
                                &context->tl_rscs[dev->rsc_index].tl_rsc);
            iface_addr_len_ptr  = ptr;
            *iface_addr_len_ptr = iface_addr_len | ((i == ucs_ilog2(dev->tl_bitmap)) ?
                                                    UCP_ADDRESS_FLAG_LAST : 0);
            ptr += 1 + iface_addr_len;

            /* Pack ep address if present */
            if (!(iface_attr->cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) &&
                (ep != NULL)) {
                *iface_addr_len_ptr |= UCP_ADDRESS_FLAG_EP_ADDR;

                ep_addr_len = iface_attr->ep_addr_len;
                ucs_assert(ep_addr_len < UINT8_MAX);
                *(uint8_t*)ptr = ep_addr_len;

                status      = ucp_address_pack_ep_address(ep, i, ptr + 1);
                if (status != UCS_OK) {
                    return status;
                }
                ucp_address_memchek(ptr + 1, ep_addr_len,
                                    &context->tl_rscs[dev->rsc_index].tl_rsc);
                ptr += 1 + ep_addr_len;
            }

            /* Save the address index of this transport */
            if (order != NULL) {
                order[ucs_count_one_bits(tl_bitmap & UCS_MASK(i))] = index;
            }

            ucs_trace("pack addr[%d] : "UCT_TL_RESOURCE_DESC_FMT
                      " md_flags 0x%"PRIx64" tl_flags 0x%"PRIx64" bw %e ovh %e "
                      "lat_ovh: %e dev_priority %d",
                      index,
                      UCT_TL_RESOURCE_DESC_ARG(&context->tl_rscs[i].tl_rsc),
                      md_flags, worker->ifaces[i].attr.cap.flags,
                      worker->ifaces[i].attr.bandwidth,
                      worker->ifaces[i].attr.overhead,
                      worker->ifaces[i].attr.latency.overhead,
                      worker->ifaces[i].attr.priority);
            ++index;
        }
    }

out:
    ucs_assertv(buffer + size == ptr, "buffer=%p size=%zu ptr=%p ptr-buffer=%zd",
                buffer, size, ptr, ptr - buffer);
    return UCS_OK;
}
Beispiel #7
0
static ucs_status_t ucp_address_do_pack(ucp_worker_h worker, ucp_ep_h ep,
                                        void *buffer, size_t size,
                                        uint64_t tl_bitmap, unsigned *order,
                                        const ucp_address_packed_device_t *devices,
                                        ucp_rsc_index_t num_devices)
{
    ucp_context_h context = worker->context;
    const ucp_address_packed_device_t *dev;
    uct_iface_attr_t *iface_attr;
    ucs_status_t status;
    ucp_rsc_index_t i;
    size_t tl_addr_len;
    unsigned index;
    void *ptr;

    ptr = buffer;
    index = 0;

    *(uint64_t*)ptr = worker->uuid;
    ptr += sizeof(uint64_t);
    ptr = ucp_address_pack_string(ucp_worker_get_name(worker), ptr);

    if (num_devices == 0) {
        *((uint8_t*)ptr) = UCP_NULL_RESOURCE;
        ++ptr;
        goto out;
    }

    for (dev = devices; dev < devices + num_devices; ++dev) {

        /* PD index */
        *(uint8_t*)ptr = context->tl_rscs[dev->rsc_index].pd_index |
                         ((dev->tl_bitmap == 0) ? UCP_ADDRESS_FLAG_EMPTY : 0);
        ++ptr;

        /* Device address length */
        ucs_assert(dev->dev_addr_len < UCP_ADDRESS_FLAG_LAST);
        *(uint8_t*)ptr = dev->dev_addr_len | ((dev == (devices + num_devices - 1)) ?
                                              UCP_ADDRESS_FLAG_LAST : 0);
        ++ptr;

        /* Device address */
        status = uct_iface_get_device_address(worker->ifaces[dev->rsc_index],
                                              (uct_device_addr_t*)ptr);
        if (status != UCS_OK) {
            return status;
        }

        ptr += dev->dev_addr_len;

        for (i = 0; i < context->num_tls; ++i) {

            if (!(UCS_BIT(i) & dev->tl_bitmap)) {
                continue;
            }

            /* Transport name */
            ptr = ucp_address_pack_string(context->tl_rscs[i].tl_rsc.tl_name, ptr);

            /* Transport address length */
            iface_attr = &worker->iface_attrs[i];
            if (iface_attr->cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
                tl_addr_len = iface_attr->iface_addr_len;
                status = uct_iface_get_address(worker->ifaces[i],
                                               (uct_iface_addr_t*)(ptr + 1));
            } else if (iface_attr->cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) {
                if (ep == NULL) {
                    tl_addr_len = 0;
                    status      = UCS_OK;
                } else {
                    tl_addr_len = iface_attr->ep_addr_len;
                    status      = ucp_address_pack_ep_address(ep, i, ptr + 1);
                }
            } else {
                status      = UCS_ERR_INVALID_ADDR;
            }
            if (status != UCS_OK) {
                return status;
            }

            ucp_address_memchek(ptr + 1, tl_addr_len,
                                &context->tl_rscs[dev->rsc_index].tl_rsc);

            /* Save the address index of this transport */
            if (order != NULL) {
                order[ucs_count_one_bits(tl_bitmap & UCS_MASK(i))] = index++;
            }

            ucs_assert(tl_addr_len < UCP_ADDRESS_FLAG_LAST);
            *(uint8_t*)ptr = tl_addr_len | ((i == ucs_ilog2(dev->tl_bitmap)) ?
                                            UCP_ADDRESS_FLAG_LAST : 0);
            ptr += 1 + tl_addr_len;
        }
    }

out:
    ucs_assertv(buffer + size == ptr, "buffer=%p size=%zu ptr=%p", buffer, size,
                ptr);
    return UCS_OK;
}