Example #1
0
ucs_status_t ucp_rma_put(ucp_ep_h ep, const void *buffer, size_t length,
                         uint64_t remote_addr, ucp_rkey_h rkey)
{
    ucs_status_t status;
    uct_rkey_t uct_rkey;
    size_t frag_length;

    if (length == 0) {
        return UCS_OK;
    }
    if (ENABLE_PARAMS_CHECK && (buffer == NULL)) {
        return UCS_ERR_INVALID_PARAM;
    }

    uct_rkey = UCP_RMA_RKEY_LOOKUP(ep, rkey);

    /* Loop until all message has been sent.
     * We re-check the configuration on every iteration, because it can be
     * changed by transport switch.
     */
    for (;;) {
        if (length <= ep->config.max_short_put) {
            status = uct_ep_put_short(ep->uct.ep, buffer, length, remote_addr,
                                      uct_rkey);
            if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) {
                break;
            }
        } else {
            if (length <= ep->worker->context->config.bcopy_thresh) {
                frag_length = ucs_min(length, ep->config.max_short_put);
                status = uct_ep_put_short(ep->uct.ep, buffer, frag_length, remote_addr,
                                          uct_rkey);
            } else {
                frag_length = ucs_min(length, ep->config.max_bcopy_put);
                status = uct_ep_put_bcopy(ep->uct.ep, (uct_pack_callback_t)memcpy,
                                          (void*)buffer, frag_length, remote_addr,
                                          uct_rkey);
            }
            if (ucs_likely(status == UCS_OK)) {
                length      -= frag_length;
                if (length == 0) {
                    break;
                }

                buffer      += frag_length;
                remote_addr += frag_length;
            } else if (status != UCS_ERR_NO_RESOURCE) {
                break;
            }
        }
        ucp_worker_progress(ep->worker);
    }

    return status;
}
Example #2
0
ucs_status_t ucp_put(ucp_ep_h ep, const void *buffer, size_t length,
                     uint64_t remote_addr, ucp_rkey_h rkey)
{
    ucp_ep_rma_config_t *rma_config;
    ucs_status_t status;
    uct_rkey_t uct_rkey;
    size_t frag_length;
    ssize_t packed_len;
    ucp_lane_index_t lane;

    UCP_RMA_CHECK_PARAMS(buffer, length);

    /* Loop until all message has been sent.
     * We re-check the configuration on every iteration, because it can be
     * changed by transport switch.
     */
    for (;;) {
        UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, lane, uct_rkey, rma_config);
        if (length <= rma_config->max_put_short) {
            status = uct_ep_put_short(ep->uct_eps[lane], buffer, length,
                                      remote_addr, uct_rkey);
            if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) {
                break;
            }
        } else {
            if (length <= ucp_ep_config(ep)->bcopy_thresh) {
                frag_length = ucs_min(length, rma_config->max_put_short);
                status = uct_ep_put_short(ep->uct_eps[lane], buffer, frag_length,
                                          remote_addr, uct_rkey);
            } else {
                ucp_memcpy_pack_context_t pack_ctx;
                pack_ctx.src    = buffer;
                pack_ctx.length = frag_length =
                                ucs_min(length, rma_config->max_put_bcopy);
                packed_len = uct_ep_put_bcopy(ep->uct_eps[lane], ucp_memcpy_pack,
                                              &pack_ctx, remote_addr, uct_rkey);
                status = (packed_len > 0) ? UCS_OK : (ucs_status_t)packed_len;
            }
            if (ucs_likely(status == UCS_OK)) {
                length      -= frag_length;
                if (length == 0) {
                    break;
                }

                buffer      += frag_length;
                remote_addr += frag_length;
            } else if (status != UCS_ERR_NO_RESOURCE) {
                break;
            }
        }
        ucp_worker_progress(ep->worker);
    }

    return status;
}
Example #3
0
static ucs_status_t ucp_progress_put_nbi(uct_pending_req_t *self)
{
    ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct);
    ucp_rkey_h rkey    = req->send.rma.rkey;
    ucp_ep_t *ep       = req->send.ep;
    ucp_ep_rma_config_t *rma_config;
    ucs_status_t status;
    uct_rkey_t uct_rkey;
    ssize_t packed_len;
    uct_ep_h uct_ep;

    UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, uct_ep, uct_rkey, rma_config);
    for (;;) {
        if (req->send.length <= ep->worker->context->config.ext.bcopy_thresh) {
            /* Should be replaced with bcopy */
            packed_len = ucs_min(req->send.length, rma_config->max_put_short);
            status = uct_ep_put_short(uct_ep,
                                      req->send.buffer,
                                      packed_len,
                                      req->send.rma.remote_addr,
                                      uct_rkey);
        } else {
            /* We don't do it right now, but in future we have to add
             * an option to use zcopy
             */
            ucp_memcpy_pack_context_t pack_ctx;
            pack_ctx.src    = req->send.buffer;
            pack_ctx.length =
                ucs_min(req->send.length, rma_config->max_put_bcopy);
            packed_len = uct_ep_put_bcopy(uct_ep,
                                          ucp_memcpy_pack,
                                          &pack_ctx,
                                          req->send.rma.remote_addr,
                                          uct_rkey);
            status = (packed_len > 0) ? UCS_OK : (ucs_status_t)packed_len;
        }

        if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) {
            req->send.length -= packed_len;
            if (req->send.length == 0) {
                ucp_request_complete(req, void);
                break;
            }

            req->send.buffer += packed_len;
            req->send.rma.remote_addr += packed_len;
        } else {
            break;
        }
    }

    return status;
}
Example #4
0
void uct_ib_log_dump_sg_list(struct ibv_sge *sg_list, int num_sge, uint64_t inline_bitmap,
                             uct_log_data_dump_func_t data_dump, char *buf, size_t max)
{
    char data[256];
    size_t total_len       = 0;
    size_t total_valid_len = 0;;
    char *s    = buf;
    char *ends = buf + max;
    void *pd   = data;
    size_t len;
    int i;

    for (i = 0; i < num_sge; ++i) {
        if (inline_bitmap & UCS_BIT(i)) {
            snprintf(s, ends - s, " [inl len %d]", sg_list[i].length);
        } else {
            snprintf(s, ends - s, " [va 0x%"PRIx64" len %d lkey 0x%x]",
                     sg_list[i].addr, sg_list[i].length, sg_list[i].lkey);
        }

        len = ucs_min(sg_list[i].length, (void*)data + sizeof(data) - pd);
        memcpy(pd, (void*)sg_list[i].addr, len);

        s               += strlen(s);
        pd              += len;
        total_len       += len;
        total_valid_len += sg_list[i].length;
    }

    if (data_dump != NULL) {
        data_dump(data, total_len, total_valid_len, s, ends - s);
    }
}
Example #5
0
File: dt_iov.c Project: openucx/ucx
size_t ucp_dt_iov_scatter(ucp_dt_iov_t *iov, size_t iovcnt, const void *src,
                          size_t length, size_t *iov_offset, size_t *iovcnt_offset)
{
    size_t item_len, item_len_to_copy;
    size_t length_it = 0;

    while ((length_it < length) && (*iovcnt_offset < iovcnt)) {
        item_len         = iov[*iovcnt_offset].length;
        item_len_to_copy = ucs_min(ucs_max((ssize_t)(item_len - *iov_offset), 0),
                                   length - length_it);
        ucs_assert(*iov_offset <= item_len);

        memcpy(iov[*iovcnt_offset].buffer + *iov_offset, src + length_it,
               item_len_to_copy);
        length_it += item_len_to_copy;

        ucs_assert(length_it <= length);
        if (length_it < length) {
            *iov_offset = 0;
            ++(*iovcnt_offset);
        } else {
            *iov_offset += item_len_to_copy;
        }
    }
    return length_it;
}
Example #6
0
static void *ucm_realloc(void *oldptr, size_t size, const void *caller)
{
    void *newptr;
    size_t oldsz;
    int foreign;

    ucm_malloc_hook_state.hook_called = 1;
    if (oldptr != NULL) {
        foreign = !ucm_malloc_address_remove_if_managed(oldptr, "realloc");
        if (RUNNING_ON_VALGRIND || foreign) {
            /*  If pointer was created by original malloc(), allocate the new pointer
             * with the new heap, and copy out the data. Then, release the old pointer.
             *  We do the same if we are running with valgrind, so we could use client
             * requests properly.
             */
            newptr = ucm_dlmalloc(size);
            ucm_malloc_allocated(newptr, size, "realloc");

            oldsz = ucm_malloc_hook_state.usable_size(oldptr);
            memcpy(newptr, oldptr, ucs_min(size, oldsz));

            if (foreign) {
                ucm_release_foreign_block(oldptr, ucm_malloc_hook_state.free, "realloc");
            } else{
                ucm_mem_free(oldptr, oldsz);
            }
            return newptr;
        }
    }

    newptr = ucm_dlrealloc(oldptr, size);
    ucm_malloc_allocated(newptr, size, "realloc");
    return newptr;
}
Example #7
0
File: timerq.c Project: openucx/ucx
ucs_status_t ucs_timerq_remove(ucs_timer_queue_t *timerq, int timer_id)
{
    ucs_status_t status;
    ucs_timer_t *ptr;

    ucs_trace_func("timerq=%p timer_id=%d", timerq, timer_id);

    status = UCS_ERR_NO_ELEM;

    pthread_spin_lock(&timerq->lock);
    timerq->min_interval = UCS_TIME_INFINITY;
    ptr = timerq->timers;
    while (ptr < timerq->timers + timerq->num_timers) {
        if (ptr->id == timer_id) {
            *ptr = timerq->timers[--timerq->num_timers];
            status = UCS_OK;
        } else {
            timerq->min_interval = ucs_min(timerq->min_interval, ptr->interval);
            ++ptr;
        }
    }

    /* TODO realloc - shrink */
    if (timerq->num_timers == 0) {
        ucs_assert(timerq->min_interval == UCS_TIME_INFINITY);
        free(timerq->timers);
        timerq->timers = NULL;
    } else {
        ucs_assert(timerq->min_interval != UCS_TIME_INFINITY);
    }

    pthread_spin_unlock(&timerq->lock);
    return status;
}
Example #8
0
File: ucp_ep.c Project: alex--m/ucx
static void ucp_ep_config_print_tag_proto(FILE *stream, const char *name,
                                          size_t max_eager_short,
                                          size_t zcopy_thresh, size_t rndv_thresh)
{
    size_t max_bcopy;

    fprintf(stream, "# %18s: 0", name);
    if (max_eager_short > 0) {
        fprintf(stream, "..<egr/short>..%zu" , max_eager_short + 1);
    }
    max_bcopy = ucs_min(zcopy_thresh, rndv_thresh);
    if (max_eager_short < max_bcopy) {
        fprintf(stream, "..<egr/bcopy>..%zu", max_bcopy);
    }
    if (zcopy_thresh < rndv_thresh) {
        fprintf(stream, "..<egr/zcopy>..");
        if (rndv_thresh < SIZE_MAX) {
            fprintf(stream, "%zu", rndv_thresh);
        }
    }
    if (rndv_thresh < SIZE_MAX) {
        fprintf(stream, "..<rndv>..");
    }
    fprintf(stream, "(inf)\n");
}
Example #9
0
static double ucp_wireup_rma_score_func(const uct_md_attr_t *md_attr,
                                        const uct_iface_attr_t *iface_attr,
                                        const ucp_wireup_iface_attr_t *remote_iface_attr)
{
    /* best for 4k messages */
    return 1e-3 / (iface_attr->latency + iface_attr->overhead +
                    (4096.0 / ucs_min(iface_attr->bandwidth, remote_iface_attr->bandwidth)));
}
Example #10
0
size_t ucs_config_memunits_get(size_t config_size, size_t auto_size,
                               size_t max_size)
{
    if (config_size == UCS_CONFIG_MEMUNITS_AUTO) {
        return auto_size;
    } else {
        return ucs_min(config_size, max_size);
    }
}
Example #11
0
/* Unpack a string and return pointer to next storage byte */
static const void* ucp_address_unpack_string(const void *src, char *s, size_t max)
{
    size_t length, avail;

    ucs_assert(max >= 1);
    length   = *(const uint8_t*)src;
    avail    = ucs_min(length, max - 1);
    memcpy(s, src + 1, avail);
    s[avail]  = '\0';
    return src + length + 1;
}
Example #12
0
ucs_status_t ucp_rma_get(ucp_ep_h ep, void *buffer, size_t length,
                         uint64_t remote_addr, ucp_rkey_h rkey)
{
    uct_completion_t comp;
    ucs_status_t status;
    uct_rkey_t uct_rkey;
    size_t frag_length;

    if (length == 0) {
        return UCS_OK;
    }

    uct_rkey = UCP_RMA_RKEY_LOOKUP(ep, rkey);

    comp.count = 1;

    for (;;) {

        /* Push out all fragments, and request completion only for the last
         * fragment.
         */
        frag_length = ucs_min(ep->config.max_bcopy_get, length);
        status = uct_ep_get_bcopy(ep->uct.ep, (uct_unpack_callback_t)memcpy,
                                  (void*)buffer, frag_length, remote_addr,
                                  uct_rkey, &comp);
        if (ucs_likely(status == UCS_OK)) {
            goto posted;
        } else if (status == UCS_INPROGRESS) {
            ++comp.count;
            goto posted;
        } else if (status == UCS_ERR_NO_RESOURCE) {
            goto retry;
        } else {
            return status;
        }

posted:
        length      -= frag_length;
        if (length == 0) {
            break;
        }

        buffer      += frag_length;
        remote_addr += frag_length;
retry:
        ucp_worker_progress(ep->worker);
    }

    /* coverity[loop_condition] */
    while (comp.count > 1) {
        ucp_worker_progress(ep->worker);
    };
    return UCS_OK;
}
Example #13
0
ucs_status_t ucp_get_nbi(ucp_ep_h ep, void *buffer, size_t length,
                         uint64_t remote_addr, ucp_rkey_h rkey)
{
    ucp_ep_rma_config_t *rma_config;
    ucs_status_t status;
    uct_rkey_t uct_rkey;
    size_t frag_length;
    uct_ep_h uct_ep;

    UCP_RMA_CHECK_PARAMS(buffer, length);

    for (;;) {
        UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, uct_ep, uct_rkey, rma_config);
        frag_length = ucs_min(rma_config->max_get_bcopy, length);
        status = uct_ep_get_bcopy(uct_ep,
                                  (uct_unpack_callback_t)memcpy,
                                  (void*)buffer,
                                  frag_length,
                                  remote_addr,
                                  uct_rkey,
                                  NULL);
        if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) {
            /* Get was initiated */
            length -= frag_length;
            buffer += frag_length;
            remote_addr += frag_length;
            if (length == 0) {
                break;
            }
        } else if (ucs_unlikely(status == UCS_ERR_NO_RESOURCE)) {
            /* Out of resources - adding request for later schedule */
            ucp_request_t *req;
            req = ucs_mpool_get_inline(&ep->worker->req_mp);
            if (req == NULL) {
                /* can't allocate memory for request - abort */
                status = UCS_ERR_NO_MEMORY;
                break;
            }
            ucp_add_pending_rma(req, ep, uct_ep, buffer, length, remote_addr,
                                rkey, ucp_progress_get_nbi);

            /* Mark it as in progress */
            status = UCS_INPROGRESS;
            break;
        } else {
            /* Error */
            break;
        }
    }

    return status;
}
Example #14
0
static ucs_status_t uct_posix_test_mem(size_t length, int shm_fd)
{
    int *buf;
    int chunk_size = 256 * 1024;
    ucs_status_t status = UCS_OK;
    size_t size_to_write, remaining;
    ssize_t single_write;

    buf = ucs_malloc(chunk_size, "write buffer");
    if (buf == NULL) {
        ucs_error("Failed to allocate memory for testing space for backing file.");
        status = UCS_ERR_NO_MEMORY;
        goto out;
    }

    memset(buf, 0, chunk_size);
    if (lseek(shm_fd, 0, SEEK_SET) < 0) {
        ucs_error("lseek failed. %m");
        status = UCS_ERR_IO_ERROR;
        goto out_free_buf;
    }

    remaining = length;
    while (remaining > 0) {
        size_to_write = ucs_min(remaining, chunk_size);
        single_write = write(shm_fd, buf, size_to_write);
        if (single_write < 0) {
            ucs_error("Failed to write %zu bytes. %m", size_to_write);
            status = UCS_ERR_IO_ERROR;
            goto out_free_buf;
        }
        if (single_write != size_to_write) {
            ucs_error("Not enough memory to write total of %zu bytes. "
                      "Please check that /dev/shm or the directory you specified has "
                      "more available memory.", length);
            status = UCS_ERR_NO_MEMORY;
            goto out_free_buf;
        }
        remaining -= size_to_write;
    }

out_free_buf:
    ucs_free(buf);

out:
    return status;
}
Example #15
0
static ucs_status_t uct_cm_iface_query(uct_iface_h tl_iface,
                                       uct_iface_attr_t *iface_attr)
{
    size_t mtu;

    mtu = ucs_min(IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - sizeof(uct_cm_hdr_t),
                  UINT8_MAX);

    memset(iface_attr, 0, sizeof(*iface_attr));
    iface_attr->cap.am.max_bcopy      = mtu;
    iface_attr->iface_addr_len        = sizeof(uct_sockaddr_ib_t);
    iface_attr->ep_addr_len           = 0;
    iface_attr->cap.flags             = /* UCT_IFACE_FLAG_AM_BCOPY | enable only in case of emergency */
                                        UCT_IFACE_FLAG_PENDING |
                                        UCT_IFACE_FLAG_CONNECT_TO_IFACE;
    return UCS_OK;
}
Example #16
0
static uint64_t __sumup_host_name(unsigned prime_index)
{
    uint64_t sum, n;
    const char *p;
    unsigned i;

    sum = 0;
    i = prime_index;
    p = ucs_get_host_name();
    while (*p != '\0') {
        n = 0;
        strncpy((char*)&n, p, sizeof(n));
        sum += ucs_get_prime(i) * n;
        ++i;
        p += ucs_min(sizeof(n), strlen(p));
    }
    return sum;
}
Example #17
0
File: timerq.c Project: openucx/ucx
ucs_status_t ucs_timerq_add(ucs_timer_queue_t *timerq, int timer_id,
                            ucs_time_t interval)
{
    ucs_status_t status;
    ucs_timer_t *ptr;

    ucs_trace_func("timerq=%p interval=%.2fus timer_id=%d", timerq,
                   ucs_time_to_usec(interval), timer_id);

    pthread_spin_lock(&timerq->lock);

    /* Make sure ID is unique */
    for (ptr = timerq->timers; ptr < timerq->timers + timerq->num_timers; ++ptr) {
        if (ptr->id == timer_id) {
            status = UCS_ERR_ALREADY_EXISTS;
            goto out_unlock;
        }
    }

    /* Resize timer array */
    ptr = ucs_realloc(timerq->timers, (timerq->num_timers + 1) * sizeof(ucs_timer_t),
                      "timerq");
    if (ptr == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto out_unlock;
    }
    timerq->timers = ptr;
    ++timerq->num_timers;
    timerq->min_interval = ucs_min(interval, timerq->min_interval);
    ucs_assert(timerq->min_interval != UCS_TIME_INFINITY);

    /* Initialize the new timer */
    ptr = &timerq->timers[timerq->num_timers - 1];
    ptr->expiration = 0; /* will fire the next time sweep is called */
    ptr->interval   = interval;
    ptr->id         = timer_id;

    status = UCS_OK;

out_unlock:
    pthread_spin_unlock(&timerq->lock);
    return status;
}
Example #18
0
static ucs_status_t ucp_progress_get_nbi(uct_pending_req_t *self)
{
    ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct);
    ucp_rkey_h rkey    = req->send.rma.rkey;
    ucp_ep_t *ep       = req->send.ep;
    ucp_ep_rma_config_t *rma_config;
    ucs_status_t status;
    uct_rkey_t uct_rkey;
    size_t frag_length;
    uct_ep_h uct_ep;

    UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, uct_ep, uct_rkey, rma_config);
    for (;;) {
        frag_length = ucs_min(rma_config->max_get_bcopy, req->send.length);
        status = uct_ep_get_bcopy(uct_ep,
                                  (uct_unpack_callback_t)memcpy,
                                  (void*)req->send.buffer,
                                  frag_length,
                                  req->send.rma.remote_addr,
                                  uct_rkey,
                                  NULL);
        if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) {
            /* Get was initiated */
            req->send.length -= frag_length;
            req->send.buffer += frag_length;
            req->send.rma.remote_addr += frag_length;
            if (req->send.length == 0) {
                /* Get was posted */
                ucp_request_complete(req, void);
                status = UCS_OK;
                break;
            }
        } else {
            /* Error - abort */
            break;
        }
    }

    return status;
}
Example #19
0
static UCS_CLASS_INIT_FUNC(uct_rc_mlx5_ep_t, uct_iface_h tl_iface)
{
    uct_rc_mlx5_iface_t *iface = ucs_derived_of(tl_iface, uct_rc_mlx5_iface_t);
    ucs_status_t status;

    UCS_CLASS_CALL_SUPER_INIT(uct_rc_ep_t, &iface->super);

    status = uct_ib_mlx5_txwq_init(iface->super.super.super.worker, &self->tx.wq,
                                   self->super.txqp.qp);
    if (status != UCS_OK) {
        ucs_error("Failed to get mlx5 QP information");
        return status;
    }

    self->qp_num       = self->super.txqp.qp->qp_num;
    self->tx.wq.bb_max = ucs_min(self->tx.wq.bb_max, iface->tx.bb_max);
    uct_rc_txqp_available_set(&self->super.txqp, self->tx.wq.bb_max);

    uct_worker_progress_register(iface->super.super.super.worker,
                                 uct_rc_mlx5_iface_progress, iface);
    return UCS_OK;
}
Example #20
0
ucs_status_t uct_ib_iface_recv_mpool_init(uct_ib_iface_t *iface,
                                          const uct_ib_iface_config_t *config,
                                          const char *name, ucs_mpool_t *mp)
{
    unsigned grow;

    if (config->rx.queue_len < 1024) {
        grow = 1024;
    } else {
        /* We want to have some free (+10%) elements to avoid mem pool expansion */
        grow = ucs_min( (int)(1.1 * config->rx.queue_len + 0.5),
                        config->rx.mp.max_bufs);
    }

    return uct_iface_mpool_init(&iface->super, mp,
                                iface->config.rx_payload_offset + iface->config.seg_size,
                                iface->config.rx_hdr_offset,
                                UCS_SYS_CACHE_LINE_SIZE,
                                &config->rx.mp, grow,
                                uct_ib_iface_recv_desc_init,
                                name);
}
Example #21
0
static ucs_status_t uct_cm_iface_query(uct_iface_h tl_iface,
                                       uct_iface_attr_t *iface_attr)
{
    uct_cm_iface_t *iface = ucs_derived_of(tl_iface, uct_cm_iface_t);
    size_t mtu;

    uct_ib_iface_query(&iface->super, 32 /* TODO */, iface_attr);
    iface_attr->overhead = 1200e-9;

    mtu = ucs_min(IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - sizeof(uct_cm_hdr_t),
                  UINT8_MAX);

    iface_attr->cap.am.max_bcopy      = mtu;
    iface_attr->iface_addr_len        = sizeof(uint32_t);
    iface_attr->ep_addr_len           = 0;
    iface_attr->cap.flags             = UCT_IFACE_FLAG_AM_BCOPY |
                                        UCT_IFACE_FLAG_AM_DUP |
                                        UCT_IFACE_FLAG_PENDING |
                                        UCT_IFACE_FLAG_AM_CB_ASYNC |
                                        UCT_IFACE_FLAG_CONNECT_TO_IFACE;
    return UCS_OK;
}
Example #22
0
static UCS_F_ALWAYS_INLINE size_t
ucp_tag_get_rndv_threshold(const ucp_request_t *req, size_t count,
                           size_t max_iov, size_t rndv_rma_thresh,
                           size_t rndv_am_thresh, size_t seg_size)
{
    switch (req->send.datatype & UCP_DATATYPE_CLASS_MASK) {
    case UCP_DATATYPE_IOV: 
        if ((count > max_iov) &&
            ucp_ep_is_tag_offload_enabled(ucp_ep_config(req->send.ep))) {
            /* Make sure SW RNDV will be used, because tag offload does
             * not support multi-packet eager protocols. */
            return seg_size;
        }
        /* Fall through */
    case UCP_DATATYPE_CONTIG: 
        return ucs_min(rndv_rma_thresh, rndv_am_thresh);
    case UCP_DATATYPE_GENERIC:
        return rndv_am_thresh;
    default:
        ucs_error("Invalid data type %lx", req->send.datatype);
    }
 
    return SIZE_MAX;
}
Example #23
0
static ucs_status_t ucp_progress_get_nbi(uct_pending_req_t *self)
{
    ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct);
    ucp_rkey_h rkey    = req->send.rma.rkey;
    ucp_ep_t *ep       = req->send.ep;
    ucp_ep_rma_config_t *rma_config;
    ucs_status_t status;
    uct_rkey_t uct_rkey;
    size_t frag_length;

    UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, req->send.lane, uct_rkey, rma_config);

    frag_length = ucs_min(rma_config->max_get_bcopy, req->send.length);
    status = uct_ep_get_bcopy(ep->uct_eps[req->send.lane],
                              (uct_unpack_callback_t)memcpy,
                              (void*)req->send.buffer,
                              frag_length,
                              req->send.rma.remote_addr,
                              uct_rkey,
                              NULL);
    if ((status == UCS_OK) || (status == UCS_INPROGRESS)) {
        /* Get was initiated */
        req->send.length          -= frag_length;
        req->send.buffer          += frag_length;
        req->send.rma.remote_addr += frag_length;
        if (req->send.length == 0) {
            /* Get was posted */
            ucp_request_put(req, UCS_OK);
            return UCS_OK;
        } else {
            return UCS_INPROGRESS;
        }
    } else {
        return status;
    }
}
Example #24
0
static void ucx_perf_set_warmup(ucx_perf_context_t* perf, ucx_perf_params_t* params)
{
    perf->max_iter = ucs_min(params->warmup_iter, params->max_iter / 10);
    perf->report_interval = -1;
}
Example #25
0
ucs_status_t ucp_put_nbi(ucp_ep_h ep, const void *buffer, size_t length,
                         uint64_t remote_addr, ucp_rkey_h rkey)
{
    ucp_ep_rma_config_t *rma_config;
    ucs_status_t status;
    uct_rkey_t uct_rkey;
    ssize_t packed_len;
    ucp_request_t *req;
    uct_ep_h uct_ep;

    UCP_RMA_CHECK_PARAMS(buffer, length);

    for (;;) {
        UCP_EP_RESOLVE_RKEY_RMA(ep, rkey, uct_ep, uct_rkey, rma_config);
        if (length <= rma_config->max_put_short) {
            /* Fast path for a single short message */
            status = uct_ep_put_short(uct_ep, buffer, length, remote_addr,
                                      uct_rkey);
            if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) {
                /* Return on error or success */
                break;
            } else {
                /* Out of resources - adding request for later schedule */
                req = ucs_mpool_get_inline(&ep->worker->req_mp);
                if (req == NULL) {
                    status = UCS_ERR_NO_MEMORY;
                    break;
                }
                ucp_add_pending_rma(req, ep, uct_ep, buffer, length, remote_addr,
                                    rkey, ucp_progress_put_nbi);
                status = UCS_INPROGRESS;
                break;
            }
        } else {
            /* Fragmented put */
            if (length <= ucp_ep_config(ep)->bcopy_thresh) {
                /* TBD: Should be replaced with bcopy */
                packed_len = ucs_min(length, rma_config->max_put_short);
                status = uct_ep_put_short(uct_ep, buffer, packed_len, remote_addr,
                                          uct_rkey);
            } else {
                /* TBD: Use z-copy */
                ucp_memcpy_pack_context_t pack_ctx;
                pack_ctx.src    = buffer;
                pack_ctx.length =
                    ucs_min(length, rma_config->max_put_bcopy);
                packed_len = uct_ep_put_bcopy(uct_ep, ucp_memcpy_pack, &pack_ctx,
                                              remote_addr, uct_rkey);
                status = (packed_len > 0) ? UCS_OK : (ucs_status_t)packed_len;
            }

            if (ucs_likely(status == UCS_OK || status == UCS_INPROGRESS)) {
                length -= packed_len;
                if (length == 0) {
                    /* Put is completed - return success */
                    break;
                }

                buffer += packed_len;
                remote_addr += packed_len;
            } else if (status == UCS_ERR_NO_RESOURCE) {
                /* Out of resources - adding request for later schedule */
                req = ucs_mpool_get_inline(&ep->worker->req_mp);
                if (req == NULL) {
                    status = UCS_ERR_NO_MEMORY;
                    break;
                }
                ucp_add_pending_rma(req, ep, uct_ep, buffer, length, remote_addr,
                                    rkey, ucp_progress_put_nbi);
                status = UCS_INPROGRESS;
                break;
            } else {
                /* Return - Error occured */
                break;
            }
        }
    }

    return status;
}
Example #26
0
ucs_status_t uct_ib_iface_query(uct_ib_iface_t *iface, size_t xport_hdr_len,
                                uct_iface_attr_t *iface_attr)
{
    uct_ib_device_t *dev = uct_ib_iface_device(iface);
    static const unsigned ib_port_widths[] = {
        [0] = 1,
        [1] = 4,
        [2] = 8,
        [3] = 12
    };
    uint8_t active_width, active_speed, active_mtu;
    double encoding, signal_rate, wire_speed;
    size_t mtu, width, extra_pkt_len;
    int i = 0;

    active_width = uct_ib_iface_port_attr(iface)->active_width;
    active_speed = uct_ib_iface_port_attr(iface)->active_speed;
    active_mtu   = uct_ib_iface_port_attr(iface)->active_mtu;

    /* Get active width */
    if (!ucs_is_pow2(active_width) ||
        (active_width < 1) || (ucs_ilog2(active_width) > 3))
    {
        ucs_error("Invalid active_width on %s:%d: %d",
                  UCT_IB_IFACE_ARG(iface), active_width);
        return UCS_ERR_IO_ERROR;
    }

    memset(iface_attr, 0, sizeof(*iface_attr));

    iface_attr->device_addr_len = iface->addr_size;

    switch (active_speed) {
    case 1: /* SDR */
        iface_attr->latency = 5000e-9;
        signal_rate         = 2.5e9;
        encoding            = 8.0/10.0;
        break;
    case 2: /* DDR */
        iface_attr->latency = 2500e-9;
        signal_rate         = 5.0e9;
        encoding            = 8.0/10.0;
        break;
    case 4: /* QDR */
        iface_attr->latency = 1300e-9;
        signal_rate         = 10.0e9;
        encoding            = 8.0/10.0;
        break;
    case 8: /* FDR10 */
        iface_attr->latency = 700e-9;
        signal_rate         = 10.3125e9;
        encoding            = 64.0/66.0;
        break;
    case 16: /* FDR */
        iface_attr->latency = 700e-9;
        signal_rate         = 14.0625e9;
        encoding            = 64.0/66.0;
        break;
    case 32: /* EDR */
        iface_attr->latency = 600e-9;
        signal_rate         = 25.78125e9;
        encoding            = 64.0/66.0;
        break;
    default:
        ucs_error("Invalid active_speed on %s:%d: %d",
                  UCT_IB_IFACE_ARG(iface), active_speed);
        return UCS_ERR_IO_ERROR;
    }

    /* Wire speed calculation: Width * SignalRate * Encoding */
    width                 = ib_port_widths[ucs_ilog2(active_width)];
    wire_speed            = (width * signal_rate * encoding) / 8.0;

    /* Calculate packet overhead  */
    mtu                   = ucs_min(uct_ib_mtu_value(active_mtu),
                                    iface->config.seg_size);

    extra_pkt_len = UCT_IB_BTH_LEN + xport_hdr_len +  UCT_IB_ICRC_LEN + UCT_IB_VCRC_LEN + UCT_IB_DELIM_LEN;

    if (IBV_PORT_IS_LINK_LAYER_ETHERNET(uct_ib_iface_port_attr(iface))) {
        extra_pkt_len += UCT_IB_GRH_LEN + UCT_IB_ROCE_LEN;
    } else {
        /* TODO check if UCT_IB_DELIM_LEN is present in RoCE as well */
        extra_pkt_len += UCT_IB_LRH_LEN;
    }

    iface_attr->bandwidth = (wire_speed * mtu) / (mtu + extra_pkt_len);

    /* Set priority of current device */
    iface_attr->priority = 0;
    while (uct_ib_device_info_table[i].vendor_part_id != 0) {
        if (uct_ib_device_info_table[i].vendor_part_id == dev->dev_attr.vendor_part_id) {
            iface_attr->priority = uct_ib_device_info_table[i].priority;
            break;
        }
        i++;
    }

    return UCS_OK;
}
Example #27
0
/**
 * @param rx_headroom   Headroom requested by the user.
 * @param rx_priv_len   Length of transport private data to reserve (0 if unused)
 * @param rx_hdr_len    Length of transport network header.
 * @param mss           Maximal segment size (transport limit).
 */
UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md,
                    uct_worker_h worker, const uct_iface_params_t *params,
                    unsigned rx_priv_len, unsigned rx_hdr_len, unsigned tx_cq_len,
                    size_t mss, const uct_ib_iface_config_t *config)
{
    uct_ib_device_t *dev = &ucs_derived_of(md, uct_ib_md_t)->dev;
    ucs_status_t status;
    uint8_t port_num;

    UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &ops->super, md, worker,
                              &config->super UCS_STATS_ARG(dev->stats));

    status = uct_ib_device_find_port(dev, params->dev_name, &port_num);
    if (status != UCS_OK) {
        goto err;
    }

    self->ops                      = ops;

    self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) +
                                     ucs_max(sizeof(uct_am_recv_desc_t) +
                                             params->rx_headroom,
                                             rx_priv_len + rx_hdr_len);
    self->config.rx_hdr_offset     = self->config.rx_payload_offset - rx_hdr_len;
    self->config.rx_headroom_offset= self->config.rx_payload_offset -
                                     params->rx_headroom;
    self->config.seg_size          = ucs_min(mss, config->super.max_bcopy);
    self->config.tx_max_poll       = config->tx.max_poll;
    self->config.rx_max_poll       = config->rx.max_poll;
    self->config.rx_max_batch      = ucs_min(config->rx.max_batch,
                                             config->rx.queue_len / 4);
    self->config.port_num          = port_num;
    self->config.sl                = config->sl;
    self->config.gid_index         = config->gid_index;

    status = uct_ib_iface_init_pkey(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_device_query_gid(dev, self->config.port_num,
                                     self->config.gid_index, &self->gid);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_iface_init_lmc(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    self->comp_channel = ibv_create_comp_channel(dev->ibv_context);
    if (self->comp_channel == NULL) {
        ucs_error("ibv_create_comp_channel() failed: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_free_path_bits;
    }

    status = ucs_sys_fcntl_modfl(self->comp_channel->fd, O_NONBLOCK, 0);
    if (status != UCS_OK) {
        goto err_destroy_comp_channel;
    }

    status = uct_ib_iface_create_cq(self, tx_cq_len, 0, &self->send_cq);
    if (status != UCS_OK) {
        goto err_destroy_comp_channel;
    }

    status = uct_ib_iface_create_cq(self, config->rx.queue_len, config->rx.inl,
                                    &self->recv_cq);
    if (status != UCS_OK) {
        goto err_destroy_send_cq;
    }

    /* Address scope and size */
    if (config->addr_type == UCT_IB_IFACE_ADDRESS_TYPE_AUTO) {
        if (IBV_PORT_IS_LINK_LAYER_ETHERNET(uct_ib_iface_port_attr(self))) {
            self->addr_type = UCT_IB_ADDRESS_TYPE_ETH;
        } else {
            self->addr_type = uct_ib_address_scope(self->gid.global.subnet_prefix);
        }
    } else {
        ucs_assert(config->addr_type < UCT_IB_ADDRESS_TYPE_LAST);
        self->addr_type = config->addr_type;
    }

    self->addr_size  = uct_ib_address_size(self->addr_type);

    ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d",
              self->config.rx_headroom_offset, self->config.rx_payload_offset,
              self->config.rx_hdr_offset, self->config.seg_size);

    return UCS_OK;

err_destroy_send_cq:
    ibv_destroy_cq(self->send_cq);
err_destroy_comp_channel:
    ibv_destroy_comp_channel(self->comp_channel);
err_free_path_bits:
    ucs_free(self->path_bits);
err:
    return status;
}
Example #28
0
static ucs_status_t uct_ib_iface_create_cq(uct_ib_iface_t *iface, int cq_length,
                                           size_t inl, struct ibv_cq **cq_p)
{
    static const char *cqe_size_env_var = "MLX5_CQE_SIZE";
    uct_ib_device_t *dev = uct_ib_iface_device(iface);
    const char *cqe_size_env_value;
    size_t cqe_size_min, cqe_size;
    char cqe_size_buf[32];
    ucs_status_t status;
    struct ibv_cq *cq;
    int env_var_added = 0;
    int ret;

    cqe_size_min       = (inl > 32) ? 128 : 64;
    cqe_size_env_value = getenv(cqe_size_env_var);

    if (cqe_size_env_value != NULL) {
        cqe_size = atol(cqe_size_env_value);
        if (cqe_size < cqe_size_min) {
            ucs_error("%s is set to %zu, but at least %zu is required (inl: %zu)",
                      cqe_size_env_var, cqe_size, cqe_size_min, inl);
            status = UCS_ERR_INVALID_PARAM;
            goto out;
        }
    } else {
        /* CQE size is not defined by the environment, set it according to inline
         * size and cache line size.
         */
        cqe_size = ucs_max(cqe_size_min, UCS_SYS_CACHE_LINE_SIZE);
        cqe_size = ucs_max(cqe_size, 64);  /* at least 64 */
        cqe_size = ucs_min(cqe_size, 128); /* at most 128 */
        snprintf(cqe_size_buf, sizeof(cqe_size_buf),"%zu", cqe_size);
        ucs_debug("%s: setting %s=%s", uct_ib_device_name(dev), cqe_size_env_var,
                  cqe_size_buf);
        ret = ibv_exp_setenv(dev->ibv_context, cqe_size_env_var, cqe_size_buf, 1);
        if (ret) {
            ucs_error("ibv_exp_setenv(%s=%s) failed: %m", cqe_size_env_var,
                      cqe_size_buf);
            status = UCS_ERR_INVALID_PARAM;
            goto out;
        }

        env_var_added = 1;
    }

    cq = ibv_create_cq(dev->ibv_context, cq_length, NULL, iface->comp_channel, 0);
    if (cq == NULL) {
        ucs_error("ibv_create_cq(cqe=%d) failed: %m", cq_length);
        status = UCS_ERR_IO_ERROR;
        goto out_unsetenv;
    }

    *cq_p = cq;
    status = UCS_OK;

out_unsetenv:
    if (env_var_added) {
        /* if we created a new environment variable, remove it */
        ret = ibv_exp_unsetenv(dev->ibv_context, cqe_size_env_var);
        if (ret) {
            ucs_warn("unsetenv(%s) failed: %m", cqe_size_env_var);
        }
    }
out:
    return status;
}
Example #29
0
void ucs_fill_filename_template(const char *tmpl, char *buf, size_t max)
{
    char *p, *end;
    const char *pf, *pp;
    size_t length;
    time_t t;

    p = buf;
    end = buf + max - 1;
    *end = 0;
    pf = tmpl;
    while (*pf != 0 && p < end) {
        pp = strchr(pf, '%');
        if (pp == NULL) {
            strncpy(p, pf, end - p);
            p = end;
            break;
        }

        length = ucs_min(pp - pf, end - p);
        strncpy(p, pf, length);
        p += length;

        switch (*(pp + 1)) {
        case 'p':
            snprintf(p, end - p, "%d", getpid());
            pf = pp + 2;
            p += strlen(p);
            break;
        case 'h':
            snprintf(p, end - p, "%s", ucs_get_host_name());
            pf = pp + 2;
            p += strlen(p);
            break;
        case 'c':
            snprintf(p, end - p, "%02d", ucs_get_first_cpu());
            pf = pp + 2;
            p += strlen(p);
            break;
        case 't':
            t = time(NULL);
            strftime(p, end - p, "%Y-%m-%d-%H:%M:%S", localtime(&t));
            pf = pp + 2;
            p += strlen(p);
            break;
        case 'u':
            snprintf(p, end - p, "%s", basename(ucs_get_user_name()));
            pf = pp + 2;
            p += strlen(p);
            break;
        case 'e':
            snprintf(p, end - p, "%s", basename(ucs_get_exe()));
            pf = pp + 2;
            p += strlen(p);
            break;
        default:
            *(p++) = *pp;
            pf = pp + 1;
            break;
        }

        p += strlen(p);
    }
    *p = 0;
}
Example #30
0
static UCS_CLASS_INIT_FUNC(uct_cm_iface_t, uct_pd_h pd, uct_worker_h worker,
                           const char *dev_name, size_t rx_headroom,
                           const uct_iface_config_t *tl_config)
{
    uct_cm_iface_config_t *config = ucs_derived_of(tl_config, uct_cm_iface_config_t);
    ucs_status_t status;
    int ret;

    ucs_trace_func("");

    UCS_CLASS_CALL_SUPER_INIT(uct_ib_iface_t, &uct_cm_iface_ops, pd, worker,
                              dev_name, rx_headroom, 0 /* rx_priv_len */,
                              0 /* rx_hdr_len */, 1 /* tx_cq_len */,
                              IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE, /* mss */
                              &config->super);

    if (worker->async == NULL) {
        ucs_error("cm must have async!=NULL");
        return UCS_ERR_INVALID_PARAM;
    }

    self->service_id          = (uint32_t)(ucs_generate_uuid((uintptr_t)self) &
                                             (~IB_CM_ASSIGN_SERVICE_ID_MASK));
    self->num_outstanding     = 0;

    self->config.timeout_ms   = (int)(config->timeout * 1e3 + 0.5);
    self->config.max_outstanding = config->max_outstanding;
    self->config.retry_count  = ucs_min(config->retry_count, UINT8_MAX);
    self->notify_q.head       = NULL;
    ucs_queue_head_init(&self->notify_q);

    self->outstanding = ucs_calloc(self->config.max_outstanding,
                                   sizeof(*self->outstanding),
                                   "cm_outstanding");
    if (self->outstanding == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto err;
    }

    self->cmdev = ib_cm_open_device(uct_ib_iface_device(&self->super)->ibv_context);
    if (self->cmdev == NULL) {
        ucs_error("ib_cm_open_device() failed: %m. Check if ib_ucm.ko module is loaded.");
        status = UCS_ERR_NO_DEVICE;
        goto err_free_outstanding;
    }

    status = ucs_sys_fcntl_modfl(self->cmdev->fd, O_NONBLOCK, 0);
    if (status != UCS_OK) {
        goto err_close_device;
    }

    ret = ib_cm_create_id(self->cmdev, &self->listen_id, self);
    if (ret) {
        ucs_error("ib_cm_create_id() failed: %m");
        status = UCS_ERR_NO_DEVICE;
        goto err_close_device;
    }

    ret = ib_cm_listen(self->listen_id, self->service_id, 0);
    if (ret) {
        ucs_error("ib_cm_listen() failed: %m");
        status = UCS_ERR_INVALID_ADDR;
        goto err_destroy_id;
    }

    if (config->async_mode == UCS_ASYNC_MODE_SIGNAL) {
        ucs_warn("ib_cm fd does not support SIGIO");
    }

    status = ucs_async_set_event_handler(config->async_mode, self->cmdev->fd,
                                         POLLIN, uct_cm_iface_event_handler, self,
                                         worker->async);
    if (status != UCS_OK) {
        ucs_error("failed to set event handler");
        goto err_destroy_id;
    }

    ucs_debug("listening for SIDR service_id 0x%x on fd %d", self->service_id,
              self->cmdev->fd);
    return UCS_OK;

err_destroy_id:
    ib_cm_destroy_id(self->listen_id);
err_close_device:
    ib_cm_close_device(self->cmdev);
err_free_outstanding:
    ucs_free(self->outstanding);
err:
    return status;
}