Example #1
0
ucs_status_ptr_t ucp_tag_send_sync_nb(ucp_ep_h ep, const void *buffer, size_t count,
                                      ucp_datatype_t datatype, ucp_tag_t tag,
                                      ucp_send_callback_t cb)
{
    ucp_request_t *req;

    ucs_trace_req("send_sync_nb buffer %p count %zu tag %"PRIx64" to %s cb %p",
                  buffer, count, tag, ucp_ep_peer_name(ep), cb);

    req = ucp_request_get(ep->worker);
    if (req == NULL) {
        return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY);
    }

    UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX, "ucp_tag_send_sync_nb", req,
                          ucp_dt_length(datatype, count, buffer, &req->send.state));

    /* Remote side needs to send reply, so have it connect to us */
    ucp_ep_connect_remote(ep);

    ucp_tag_send_req_init(req, ep, buffer, datatype, tag);

    return ucp_tag_send_req(req, count,
                            -1, /* disable short method */
                            ucp_ep_config(ep)->sync_zcopy_thresh,
                            ucp_ep_config(ep)->sync_rndv_thresh,
                            cb, &ucp_tag_eager_sync_proto);
}
Example #2
0
ucs_status_t ucp_put(ucp_ep_h ep, const void *buffer, size_t length,
                     uint64_t remote_addr, ucp_rkey_h rkey)
{
    ucs_status_t status;
    uct_rkey_t uct_rkey;
    size_t frag_length;
    ssize_t packed_len;

    UCP_RMA_CHECK_PARAMS(buffer, length);

    uct_rkey = UCP_RKEY_LOOKUP(ep, rkey);

    /* Loop until all message has been sent.
     * We re-check the configuration on every iteration, because it can be
     * changed by transport switch.
     */
    for (;;) {
        if (length <= ucp_ep_config(ep)->put.max_short) {
            status = uct_ep_put_short(ep->uct_ep, buffer, length, remote_addr,
                                      uct_rkey);
            if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) {
                break;
            }
        } else {
            if (length <= ucp_ep_config(ep)->put.bcopy_thresh) {
                frag_length = ucs_min(length, ucp_ep_config(ep)->put.max_short);
                status = uct_ep_put_short(ep->uct_ep, buffer, frag_length, remote_addr,
                                          uct_rkey);
            } else {
                ucp_memcpy_pack_context_t pack_ctx;
                pack_ctx.src    = buffer;
                pack_ctx.length = frag_length =
                                ucs_min(length, ucp_ep_config(ep)->put.max_bcopy);
                packed_len = uct_ep_put_bcopy(ep->uct_ep, ucp_memcpy_pack, &pack_ctx,
                                              remote_addr, uct_rkey);
                status = (packed_len > 0) ? UCS_OK : (ucs_status_t)packed_len;
            }
            if (ucs_likely(status == UCS_OK)) {
                length      -= frag_length;
                if (length == 0) {
                    break;
                }

                buffer      += frag_length;
                remote_addr += frag_length;
            } else if (status != UCS_ERR_NO_RESOURCE) {
                break;
            }
        }
        ucp_worker_progress(ep->worker);
    }

    return status;
}
Example #3
0
static ucs_status_t ucp_progress_put_nbi(uct_pending_req_t *self)
{
    ucs_status_t status;
    ssize_t packed_len;

    ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct);

    ucp_ep_t *ep = req->send.ep;
    uct_rkey_t uct_rkey = UCP_RKEY_LOOKUP(ep, req->send.rma.rkey);

    for (;;) {
        if (req->send.length <= ep->worker->context->config.ext.bcopy_thresh) {
            /* Should be replaced with bcopy */
            packed_len = ucs_min(req->send.length, ucp_ep_config(ep)->put.max_short);
            status = uct_ep_put_short(ep->uct_ep,
                                      req->send.buffer,
                                      packed_len,
                                      req->send.rma.remote_addr,
                                      uct_rkey);
        } else {
            /* We don't do it right now, but in future we have to add
             * an option to use zcopy
             */
            ucp_memcpy_pack_context_t pack_ctx;
            pack_ctx.src    = req->send.buffer;
            pack_ctx.length =
                ucs_min(req->send.length, ucp_ep_config(ep)->put.max_bcopy);
            packed_len = uct_ep_put_bcopy(ep->uct_ep,
                                          ucp_memcpy_pack,
                                          &pack_ctx,
                                          req->send.rma.remote_addr,
                                          uct_rkey);
            status = (packed_len > 0) ? UCS_OK : (ucs_status_t)packed_len;
        }

        if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) {
            req->send.length -= packed_len;
            if (req->send.length == 0) {
                ucp_request_complete(req, void);
                break;
            }

            req->send.buffer += packed_len;
            req->send.rma.remote_addr += packed_len;
        } else {
            break;
        }
    }

    return status;
}
Example #4
0
static ucs_status_t ucp_wireup_connect_local(ucp_ep_h ep, const uint8_t *tli,
                                             unsigned address_count,
                                             const ucp_address_entry_t *address_list)
{
    ucp_worker_h worker = ep->worker;
    const ucp_address_entry_t *address;
    ucp_rsc_index_t rsc_index;
    ucp_lane_index_t lane, amo_index;
    ucs_status_t status;
    ucp_md_map_t UCS_V_UNUSED md_map;

    ucs_trace("ep %p: connect local transports", ep);

    for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) {
        rsc_index = ucp_ep_get_rsc_index(ep, lane);
        if (!ucp_worker_is_tl_p2p(worker, rsc_index)) {
            continue;
        }

        address = &address_list[tli[lane]];
        ucs_assert(address->tl_addr_len > 0);

        /* Check that if the lane is used for RMA/AMO, destination md index matches */
        md_map = ucp_lane_map_get_lane(ucp_ep_config(ep)->key.rma_lane_map, lane);
        ucs_assertv((md_map == 0) || (md_map == UCS_BIT(address->md_index)),
                    "lane=%d ai=%d md_map=0x%x md_index=%d", lane, tli[lane],
                    md_map, address->md_index);

        amo_index = ucp_ep_get_amo_lane_index(&ucp_ep_config(ep)->key, lane);
        if (amo_index != UCP_NULL_LANE) {
            md_map = ucp_lane_map_get_lane(ucp_ep_config(ep)->key.amo_lane_map,
                                           amo_index);
            ucs_assertv((md_map == 0) || (md_map == UCS_BIT(address->md_index)),
                        "lane=%d ai=%d md_map=0x%x md_index=%d", lane, tli[lane],
                        md_map, address->md_index);
        }

        status = uct_ep_connect_to_ep(ep->uct_eps[lane], address->dev_addr,
                                      address->ep_addr);
        if (status != UCS_OK) {
            return status;
        }
    }

    return UCS_OK;
}
Example #5
0
ucs_status_ptr_t ucp_tag_send_nb(ucp_ep_h ep, const void *buffer, size_t count,
                                 uintptr_t datatype, ucp_tag_t tag,
                                 ucp_send_callback_t cb)
{
    ucs_status_t status;
    ucp_request_t *req;
    size_t length;

    ucs_trace_req("send_nb buffer %p count %zu tag %"PRIx64" to %s cb %p",
                  buffer, count, tag, ucp_ep_peer_name(ep), cb);

    if (ucs_likely((datatype & UCP_DATATYPE_CLASS_MASK) == UCP_DATATYPE_CONTIG)) {
        length = ucp_contig_dt_length(datatype, count);
        UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX,
                              "ucp_tag_send_nb (eager - start)",
                              buffer, length);
        if (ucs_likely(length <= ucp_ep_config(ep)->max_eager_short)) {
            status = ucp_tag_send_eager_short(ep, tag, buffer, length);
            if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) {
                UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX,
                                      "ucp_tag_send_nb (eager - finish)",
                                      buffer, length);
                return UCS_STATUS_PTR(status); /* UCS_OK also goes here */
            }
        }
    }

    req = ucp_request_get(ep->worker);
    if (req == NULL) {
        return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY);
    }

    UCS_INSTRUMENT_RECORD(UCS_INSTRUMENT_TYPE_UCP_TX, "ucp_tag_send_nb", req,
                          ucp_dt_length(datatype, count, buffer, &req->send.state));

    ucp_tag_send_req_init(req, ep, buffer, datatype, tag);

    return ucp_tag_send_req(req, count,
                            ucp_ep_config(ep)->max_eager_short,
                            ucp_ep_config(ep)->zcopy_thresh,
                            ucp_ep_config(ep)->rndv_thresh,
                            cb, &ucp_tag_eager_proto);
}
Example #6
0
static size_t ucp_tag_pack_eager_middle_generic(void *dest, void *arg)
{
    ucp_eager_hdr_t *hdr = dest;
    ucp_request_t *req = arg;
    size_t max_length;

    max_length     = ucp_ep_config(req->send.ep)->max_am_bcopy - sizeof(*hdr);
    hdr->super.tag = req->send.tag;
    return sizeof(*hdr) + ucp_request_generic_dt_pack(req, hdr + 1, max_length);
}
Example #7
0
static size_t ucp_tag_pack_eager_middle_contig(void *dest, void *arg)
{
    ucp_eager_hdr_t *hdr = dest;
    ucp_request_t *req = arg;
    size_t length;

    length         = ucp_ep_config(req->send.ep)->max_am_bcopy - sizeof(*hdr);
    ucs_debug("pack eager_middle paylen %zu", length);
    hdr->super.tag = req->send.tag;
    memcpy(hdr + 1, req->send.buffer + req->send.state.offset, length);
    return sizeof(*hdr) + length;
}
Example #8
0
static ucs_status_t ucp_tag_req_start_contig(ucp_request_t *req, size_t count,
                                             ssize_t max_short, size_t zcopy_thresh,
                                             size_t rndv_thresh,
                                             const ucp_proto_t *proto)
{
    ucp_ep_config_t *config = ucp_ep_config(req->send.ep);
    size_t only_hdr_size = proto->only_hdr_size;
    ucs_status_t status;
    size_t max_zcopy;
    ssize_t length;

    length           = ucp_contig_dt_length(req->send.datatype, count);
    req->send.length = length;

    if (length <= max_short) {
        /* short */
        req->send.uct.func = proto->contig_short;
    } else if (length >= rndv_thresh) {
        /* rendezvous */
        status = ucp_tag_send_start_rndv(req);
        if (status != UCS_OK) {
            return status;
        }
    } else if (length < zcopy_thresh) {
        /* bcopy */
        if (req->send.length <= config->max_am_bcopy - only_hdr_size) {
            req->send.uct.func = proto->bcopy_single;
        } else {
            req->send.uct.func = proto->bcopy_multi;
        }
    } else {
        /* eager zcopy */
        status = ucp_request_send_buffer_reg(req, ucp_ep_get_am_lane(req->send.ep));
        if (status != UCS_OK) {
            return status;
        }

        req->send.uct_comp.func = proto->contig_zcopy_completion;

        max_zcopy = config->max_am_zcopy;
        if (req->send.length <= max_zcopy - only_hdr_size) {
            req->send.uct_comp.count = 1;
            req->send.uct.func = proto->contig_zcopy_single;
        } else {
            /* calculate number of zcopy fragments */
            req->send.uct_comp.count = 1 +
                    (length + proto->first_hdr_size - proto->mid_hdr_size - 1) /
                    (max_zcopy - proto->mid_hdr_size);
            req->send.uct.func = proto->contig_zcopy_multi;
        }
    }
    return UCS_OK;
}
Example #9
0
ucs_status_t ucp_get(ucp_ep_h ep, void *buffer, size_t length,
                     uint64_t remote_addr, ucp_rkey_h rkey)
{
    uct_completion_t comp;
    ucs_status_t status;
    uct_rkey_t uct_rkey;
    size_t frag_length;

    UCP_RMA_CHECK_PARAMS(buffer, length);

    uct_rkey = UCP_RKEY_LOOKUP(ep, rkey);

    comp.count = 1;

    for (;;) {

        /* Push out all fragments, and request completion only for the last
         * fragment.
         */
        frag_length = ucs_min(ucp_ep_config(ep)->get.max_bcopy, length);
        status = uct_ep_get_bcopy(ep->uct_ep, (uct_unpack_callback_t)memcpy,
                                  (void*)buffer, frag_length, remote_addr,
                                  uct_rkey, &comp);
        if (ucs_likely(status == UCS_OK)) {
            goto posted;
        } else if (status == UCS_INPROGRESS) {
            ++comp.count;
            goto posted;
        } else if (status == UCS_ERR_NO_RESOURCE) {
            goto retry;
        } else {
            return status;
        }

posted:
        length      -= frag_length;
        if (length == 0) {
            break;
        }

        buffer      += frag_length;
        remote_addr += frag_length;
retry:
        ucp_worker_progress(ep->worker);
    }

    /* coverity[loop_condition] */
    while (comp.count > 1) {
        ucp_worker_progress(ep->worker);
    }
    return UCS_OK;
}
Example #10
0
ucs_status_t ucp_get_nbi(ucp_ep_h ep, void *buffer, size_t length,
                         uint64_t remote_addr, ucp_rkey_h rkey)
{
    ucs_status_t status;
    size_t frag_length;

    uct_rkey_t uct_rkey = UCP_RKEY_LOOKUP(ep, rkey);

    UCP_RMA_CHECK_PARAMS(buffer, length);
    uct_rkey = UCP_RKEY_LOOKUP(ep, rkey);

    for (;;) {
        frag_length = ucs_min(ucp_ep_config(ep)->get.max_bcopy, length);
        status = uct_ep_get_bcopy(ep->uct_ep,
                                  (uct_unpack_callback_t)memcpy,
                                  (void*)buffer,
                                  frag_length,
                                  remote_addr,
                                  uct_rkey,
                                  NULL);
        if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) {
            /* Get was initiated */
            length -= frag_length;
            buffer += frag_length;
            remote_addr += frag_length;
            if (length == 0) {
                break;
            }
        } else if (ucs_unlikely(status == UCS_ERR_NO_RESOURCE)) {
            /* Out of resources - adding request for later schedule */
            ucp_request_t *req;
            req = ucs_mpool_get_inline(&ep->worker->req_mp);
            if (req == NULL) {
                /* can't allocate memory for request - abort */
                status = UCS_ERR_NO_MEMORY;
                break;
            }
            ucp_add_pending_rma(req, ep, buffer, length, remote_addr,
                                rkey, ucp_progress_get_nbi);

            /* Mark it as in progress */
            status = UCS_INPROGRESS;
            break;
        } else {
            /* Error */
            break;
        }
    }

    return status;
}
Example #11
0
static size_t ucp_tag_pack_eager_middle_dt(void *dest, void *arg)
{
    ucp_eager_hdr_t *hdr = dest;
    ucp_request_t *req = arg;
    size_t length;

    length         = ucp_ep_config(req->send.ep)->am.max_bcopy - sizeof(*hdr);
    ucs_debug("pack eager_middle paylen %zu offset %zu", length,
              req->send.state.offset);
    hdr->super.tag = req->send.tag;
    return sizeof(*hdr) + ucp_dt_pack(req->send.datatype, hdr + 1,
                                      req->send.buffer, &req->send.state,
                                      length);
}
Example #12
0
static size_t ucp_tag_pack_eager_first_contig(void *dest, void *arg)
{
    ucp_eager_first_hdr_t *hdr = dest;
    ucp_request_t *req = arg;
    size_t length;

    length               = ucp_ep_config(req->send.ep)->max_am_bcopy - sizeof(*hdr);
    hdr->super.super.tag = req->send.tag;
    hdr->total_len       = req->send.length;

    ucs_assert(req->send.state.offset == 0);
    ucs_assert(req->send.length > length);
    memcpy(hdr + 1, req->send.buffer, length);
    return sizeof(*hdr) + length;
}
Example #13
0
static size_t ucp_tag_pack_eager_first_generic(void *dest, void *arg)
{
    ucp_eager_first_hdr_t *hdr = dest;
    ucp_request_t *req = arg;
    size_t max_length, length;

    ucs_assert(req->send.state.offset == 0);

    max_length           = ucp_ep_config(req->send.ep)->max_am_bcopy - sizeof(*hdr);
    hdr->super.super.tag = req->send.tag;
    hdr->total_len       = req->send.length;

    ucs_assert(req->send.length > max_length);
    length = ucp_request_generic_dt_pack(req, hdr + 1, max_length);
    return sizeof(*hdr) + length;
}
Example #14
0
static UCS_F_ALWAYS_INLINE void
ucp_tag_send_req_init(ucp_request_t* req, ucp_ep_h ep, const void* buffer,
                      uintptr_t datatype, size_t count, ucp_tag_t tag,
                      uint16_t flags)
{
    req->flags             = flags;
    req->send.ep           = ep;
    req->send.buffer       = buffer;
    req->send.datatype     = datatype;
    req->send.tag          = tag;
    ucp_request_send_state_init(req, datatype, count);
    req->send.length       = ucp_dt_length(req->send.datatype, count,
                                           req->send.buffer,
                                           &req->send.state.dt);
    req->send.lane         = ucp_ep_config(ep)->tag.lane;
}
Example #15
0
File: ucp_ep.c Project: alex--m/ucx
void ucp_ep_print_info(ucp_ep_h ep, FILE *stream)
{
    fprintf(stream, "#\n");
    fprintf(stream, "# UCP endpoint\n");
    fprintf(stream, "#\n");

    fprintf(stream, "#               peer: %s%suuid 0x%"PRIx64"\n",
#if ENABLE_DEBUG_DATA
            ucp_ep_peer_name(ep), ", ",
#else
            "", "",
#endif
            ep->dest_uuid);

    ucp_ep_config_print(stream, ep->worker, ucp_ep_config(ep), NULL);

    fprintf(stream, "#\n");
}
Example #16
0
static size_t ucp_tag_pack_eager_first_dt(void *dest, void *arg)
{
    ucp_eager_first_hdr_t *hdr = dest;
    ucp_request_t *req = arg;
    size_t length;

    length               = ucp_ep_config(req->send.ep)->am.max_bcopy -
                                         sizeof(*hdr);
    hdr->super.super.tag = req->send.tag;
    hdr->total_len       = req->send.length;

    ucs_debug("pack eager_first paylen %zu", length);
    ucs_assert(req->send.state.offset == 0);
    ucs_assert(req->send.length > length);
    return sizeof(*hdr) + ucp_dt_pack(req->send.datatype, hdr + 1,
                                      req->send.buffer, &req->send.state,
                                      length);
}
Example #17
0
static size_t ucp_tag_pack_eager_sync_first_contig(void *dest, void *arg)
{
    ucp_eager_sync_first_hdr_t *hdr = dest;
    ucp_request_t *req = arg;
    size_t length;

    length                     = ucp_ep_config(req->send.ep)->max_am_bcopy - sizeof(*hdr);
    hdr->super.super.super.tag = req->send.tag;
    hdr->super.total_len       = req->send.length;
    hdr->req.sender_uuid       = req->send.ep->worker->uuid;
    hdr->req.reqptr            = (uintptr_t)req;

    ucs_debug("pack eager_sync_first paylen %zu", length);
    ucs_assert(req->send.state.offset == 0);
    ucs_assert(req->send.length > length);
    memcpy(hdr + 1, req->send.buffer, length);
    return sizeof(*hdr) + length;
}
Example #18
0
static ucs_status_t ucp_tag_req_start_iov(ucp_request_t *req, size_t count,
                                          ssize_t max_short, size_t zcopy_thresh,
                                          size_t rndv_thresh,
                                          const ucp_proto_t *proto)
{
    ucp_ep_config_t *config = ucp_ep_config(req->send.ep);
    size_t only_hdr_size = proto->only_hdr_size;

    req->send.length = ucp_dt_iov_length(req->send.buffer, count);
    req->send.state.dt.iov.iovcnt_offset = 0;
    req->send.state.dt.iov.iov_offset    = 0;

    /* bcopy */
    if (req->send.length <= config->max_am_bcopy - only_hdr_size) {
        req->send.uct.func = proto->bcopy_single;
    } else {
        req->send.uct.func = proto->bcopy_multi;
    }
    return UCS_OK;
}
Example #19
0
static void ucp_tag_req_start_generic(ucp_request_t *req, size_t count,
                                      size_t rndv_thresh,
                                      const ucp_proto_t *progress)
{
    ucp_ep_config_t *config = ucp_ep_config(req->send.ep);
    ucp_dt_generic_t *dt_gen;
    size_t length;
    void *state;

    dt_gen = ucp_dt_generic(req->send.datatype);
    state = dt_gen->ops.start_pack(dt_gen->context, req->send.buffer, count);

    req->send.state.dt.generic.state = state;
    req->send.length = length = dt_gen->ops.packed_size(state);

    if (length <= config->max_am_bcopy - progress->only_hdr_size) {
        req->send.uct.func = progress->bcopy_single;
    } else {
        req->send.uct.func = progress->bcopy_multi;
    }
}
Example #20
0
static ucs_status_t ucp_progress_get_nbi(uct_pending_req_t *self)
{
    ucs_status_t status;
    size_t frag_length;

    ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct);

    ucp_ep_t *ep = req->send.ep;
    ucp_rkey_h rkey = req->send.rma.rkey;
    uct_rkey_t uct_rkey = UCP_RKEY_LOOKUP(ep, rkey);

    for (;;) {
        frag_length = ucs_min(ucp_ep_config(ep)->get.max_bcopy, req->send.length);
        status = uct_ep_get_bcopy(ep->uct_ep,
                                  (uct_unpack_callback_t)memcpy,
                                  (void*)req->send.buffer,
                                  frag_length,
                                  req->send.rma.remote_addr,
                                  uct_rkey,
                                  NULL);
        if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) {
            /* Get was initiated */
            req->send.length -= frag_length;
            req->send.buffer += frag_length;
            req->send.rma.remote_addr += frag_length;
            if (req->send.length == 0) {
                /* Get was posted */
                ucp_request_complete(req, void);
                status = UCS_OK;
                break;
            }
        } else {
            /* Error - abort */
            break;
        }
    }

    return status;
}
Example #21
0
static ucs_status_t
ucp_address_pack_ep_address(ucp_ep_h ep, ucp_rsc_index_t tl_index,
                            uct_ep_addr_t *addr)
{
    ucp_ep_op_t optype;
    uct_ep_h uct_ep;

    for (optype = 0; optype < UCP_EP_OP_LAST; ++optype) {
        uct_ep = ep->uct_eps[optype];
        if (ucp_ep_config(ep)->rscs[optype] == tl_index) {
            /*
             * If this is a stub endpoint, it will return the underlying next_ep
             * address, and the length will be correct because the resource index
             * is of the next_ep.
             */
            return uct_ep_get_address(uct_ep, addr);
        }
    }

    ucs_bug("provided ucp_ep without required transport");
    return UCS_ERR_INVALID_ADDR;
}
Example #22
0
static UCS_F_ALWAYS_INLINE size_t
ucp_tag_get_rndv_threshold(const ucp_request_t *req, size_t count,
                           size_t max_iov, size_t rndv_rma_thresh,
                           size_t rndv_am_thresh, size_t seg_size)
{
    switch (req->send.datatype & UCP_DATATYPE_CLASS_MASK) {
    case UCP_DATATYPE_IOV: 
        if ((count > max_iov) &&
            ucp_ep_is_tag_offload_enabled(ucp_ep_config(req->send.ep))) {
            /* Make sure SW RNDV will be used, because tag offload does
             * not support multi-packet eager protocols. */
            return seg_size;
        }
        /* Fall through */
    case UCP_DATATYPE_CONTIG: 
        return ucs_min(rndv_rma_thresh, rndv_am_thresh);
    case UCP_DATATYPE_GENERIC:
        return rndv_am_thresh;
    default:
        ucs_error("Invalid data type %lx", req->send.datatype);
    }
 
    return SIZE_MAX;
}
Example #23
0
ucs_status_t ucp_ep_rkey_unpack(ucp_ep_h ep, void *rkey_buffer, ucp_rkey_h *rkey_p)
{
    unsigned remote_pd_index, remote_pd_gap;
    unsigned rkey_index;
    unsigned pd_count;
    ucs_status_t status;
    ucp_rkey_h rkey;
    uint8_t pd_size;
    ucp_pd_map_t pd_map;
    void *p;

    /* Count the number of remote PDs in the rkey buffer */
    p = rkey_buffer;

    /* Read remote PD map */
    pd_map   = *(ucp_pd_map_t*)p;

    ucs_trace("unpacking rkey with pd_map 0x%x", pd_map);

    if (pd_map == 0) {
        /* Dummy key return ok */
        *rkey_p = &ucp_mem_dummy_rkey;
        return UCS_OK;
    }

    pd_count = ucs_count_one_bits(pd_map);
    p       += sizeof(ucp_pd_map_t);

    /* Allocate rkey handle which holds UCT rkeys for all remote PDs.
     * We keep all of them to handle a future transport switch.
     */
    rkey = ucs_malloc(sizeof(*rkey) + (sizeof(rkey->uct[0]) * pd_count), "ucp_rkey");
    if (rkey == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto err;
    }

    rkey->pd_map    = 0;
    remote_pd_index = 0; /* Index of remote PD */
    rkey_index      = 0; /* Index of the rkey in the array */

    /* Unpack rkey of each UCT PD */
    while (pd_map > 0) {
        pd_size = *((uint8_t*)p++);

        /* Use bit operations to iterate through the indices of the remote PDs
         * as provided in the pd_map. pd_map always holds a bitmap of PD indices
         * that remain to be used. Every time we find the "gap" until the next
         * valid PD index using ffs operation. If some rkeys cannot be unpacked,
         * we remove them from the local map.
         */
        remote_pd_gap    = ucs_ffs64(pd_map); /* Find the offset for next PD index */
        remote_pd_index += remote_pd_gap;      /* Calculate next index of remote PD*/
        pd_map >>= remote_pd_gap;                   /* Remove the gap from the map */
        ucs_assert(pd_map & 1);

        /* Unpack only reachable rkeys */
        if (UCS_BIT(remote_pd_index) & ucp_ep_config(ep)->key.reachable_pd_map) {
            ucs_assert(rkey_index < pd_count);
            status = uct_rkey_unpack(p, &rkey->uct[rkey_index]);
            if (status != UCS_OK) {
                ucs_error("Failed to unpack remote key from remote pd[%d]: %s",
                          remote_pd_index, ucs_status_string(status));
                goto err_destroy;
            }

            ucs_trace("rkey[%d] for remote pd %d is 0x%lx", rkey_index,
                      remote_pd_index, rkey->uct[rkey_index].rkey);
            rkey->pd_map |= UCS_BIT(remote_pd_index);
            ++rkey_index;
        }

        ++remote_pd_index;
        pd_map >>= 1;
        p += pd_size;
    }

    if (rkey->pd_map == 0) {
        ucs_debug("The unpacked rkey from the destination is unreachable");
        status = UCS_ERR_UNREACHABLE;
        goto err_destroy;
    }

    *rkey_p = rkey;
    return UCS_OK;

err_destroy:
    ucp_rkey_destroy(rkey);
err:
    return status;
}
Example #24
0
ucs_status_t ucp_wireup_init_lanes(ucp_ep_h ep, unsigned address_count,
                                   const ucp_address_entry_t *address_list,
                                   uint8_t *addr_indices)
{
    ucp_worker_h worker = ep->worker;
    ucp_ep_config_key_t key;
    uint16_t new_cfg_index;
    ucp_lane_index_t lane;
    ucs_status_t status;
    uint8_t conn_flag;
    char str[32];

    ucs_trace("ep %p: initialize lanes", ep);

    status = ucp_wireup_select_lanes(ep, address_count, address_list,
                                     addr_indices, &key);
    if (status != UCS_OK) {
        goto err;
    }

    key.reachable_md_map |= ucp_ep_config(ep)->key.reachable_md_map;

    new_cfg_index = ucp_worker_get_ep_config(worker, &key);
    if ((ep->cfg_index == new_cfg_index)) {
        return UCS_OK; /* No change */
    }

    if ((ep->cfg_index != 0) && !ucp_ep_is_stub(ep)) {
        /*
         * TODO handle a case where we have to change lanes and reconfigure the ep:
         *
         * - if we already have uct ep connected to an address - move it to the new lane index
         * - if we don't yet have connection to an address - create it
         * - if an existing lane is not connected anymore - delete it (possibly)
         * - if the configuration has changed - replay all pending operations on all lanes -
         *   need that every pending callback would return, in case of failure, the number
         *   of lane it wants to be queued on.
         */
        ucs_debug("cannot reconfigure ep %p from [%d] to [%d]", ep, ep->cfg_index,
                  new_cfg_index);
        ucp_wireup_print_config(worker->context, &ucp_ep_config(ep)->key, "old", NULL);
        ucp_wireup_print_config(worker->context, &key, "new", NULL);
        ucs_fatal("endpoint reconfiguration not supported yet");
    }

    ep->cfg_index = new_cfg_index;
    ep->am_lane   = key.am_lane;

    snprintf(str, sizeof(str), "ep %p", ep);
    ucp_wireup_print_config(worker->context, &ucp_ep_config(ep)->key, str,
                            addr_indices);

    ucs_trace("ep %p: connect lanes", ep);

    /* establish connections on all underlying endpoints */
    conn_flag = UCP_EP_FLAG_LOCAL_CONNECTED;
    for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) {
        status = ucp_wireup_connect_lane(ep, lane, address_count, address_list,
                                         addr_indices[lane]);
        if (status != UCS_OK) {
            goto err;
        }

        if (ucp_worker_is_tl_p2p(worker, ucp_ep_get_rsc_index(ep, lane))) {
            conn_flag = 0; /* If we have a p2p transport, we're not connected */
        }
    }

    ep->flags |= conn_flag;
    return UCS_OK;

err:
    for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) {
        if (ep->uct_eps[lane] != NULL) {
            uct_ep_destroy(ep->uct_eps[lane]);
            ep->uct_eps[lane] = NULL;
        }
    }
    return status;
}
Example #25
0
ucs_status_t ucp_put_nbi(ucp_ep_h ep, const void *buffer, size_t length,
                         uint64_t remote_addr, ucp_rkey_h rkey)
{
    ucp_ep_rma_config_t *rma_config;
    ucs_status_t status;
    uct_rkey_t uct_rkey;
    ssize_t packed_len;
    ucp_request_t *req;
    uct_ep_h uct_ep;

    UCP_RMA_CHECK_PARAMS(buffer, length);

    for (;;) {
        UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, uct_ep, uct_rkey, rma_config);
        if (length <= rma_config->max_put_short) {
            /* Fast path for a single short message */
            status = uct_ep_put_short(uct_ep, buffer, length, remote_addr,
                                      uct_rkey);
            if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) {
                /* Return on error or success */
                break;
            } else {
                /* Out of resources - adding request for later schedule */
                req = ucs_mpool_get_inline(&ep->worker->req_mp);
                if (req == NULL) {
                    status = UCS_ERR_NO_MEMORY;
                    break;
                }
                ucp_add_pending_rma(req, ep, uct_ep, buffer, length, remote_addr,
                                    rkey, ucp_progress_put_nbi);
                status = UCS_INPROGRESS;
                break;
            }
        } else {
            /* Fragmented put */
            if (length <= ucp_ep_config(ep)->bcopy_thresh) {
                /* TBD: Should be replaced with bcopy */
                packed_len = ucs_min(length, rma_config->max_put_short);
                status = uct_ep_put_short(uct_ep, buffer, packed_len, remote_addr,
                                          uct_rkey);
            } else {
                /* TBD: Use z-copy */
                ucp_memcpy_pack_context_t pack_ctx;
                pack_ctx.src    = buffer;
                pack_ctx.length =
                    ucs_min(length, rma_config->max_put_bcopy);
                packed_len = uct_ep_put_bcopy(uct_ep, ucp_memcpy_pack, &pack_ctx,
                                              remote_addr, uct_rkey);
                status = (packed_len > 0) ? UCS_OK : (ucs_status_t)packed_len;
            }

            if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) {
                length -= packed_len;
                if (length == 0) {
                    /* Put is completed - return success */
                    break;
                }

                buffer += packed_len;
                remote_addr += packed_len;
            } else if (status == UCS_ERR_NO_RESOURCE) {
                /* Out of resources - adding request for later schedule */
                req = ucs_mpool_get_inline(&ep->worker->req_mp);
                if (req == NULL) {
                    status = UCS_ERR_NO_MEMORY;
                    break;
                }
                ucp_add_pending_rma(req, ep, uct_ep, buffer, length, remote_addr,
                                    rkey, ucp_progress_put_nbi);
                status = UCS_INPROGRESS;
                break;
            } else {
                /* Return - Error occured */
                break;
            }
        }
    }

    return status;
}
Example #26
0
File: ucp_ep.c Project: bbenton/ucx
int ucp_ep_is_op_primary(ucp_ep_h ep, ucp_ep_op_t optype)
{
    ucp_ep_config_t *config = ucp_ep_config(ep);
    return (config->rscs[optype] != UCP_NULL_RESOURCE) && /* exists */
           (config->dups[optype] == UCP_EP_OP_LAST);      /* not a duplicate */
}