Exemplo n.º 1
0
static UCS_CLASS_INIT_FUNC(uct_self_iface_t, uct_md_h md, uct_worker_h worker,
                           const uct_iface_params_t *params,
                           const uct_iface_config_t *tl_config)
{
    ucs_status_t status;
    uct_self_iface_config_t *self_config = 0;

    ucs_trace_func("Creating a loop-back transport self=%p rxh=%lu",
                   self, params->rx_headroom);

    if (strcmp(params->dev_name, UCT_SELF_NAME) != 0) {
        ucs_error("No device was found: %s", params->dev_name);
        return UCS_ERR_NO_DEVICE;
    }

    UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_self_iface_ops, md, worker,
                              tl_config UCS_STATS_ARG(params->stats_root)
                              UCS_STATS_ARG(UCT_SELF_NAME));

    self_config = ucs_derived_of(tl_config, uct_self_iface_config_t);

    self->id              = ucs_generate_uuid((uintptr_t)self);
    self->rx_headroom     = params->rx_headroom;
    self->data_length     = self_config->super.max_bcopy;
    self->release_desc.cb = uct_self_iface_release_desc;

    /* create a memory pool for data transferred */
    status = uct_iface_mpool_init(&self->super,
                                  &self->msg_desc_mp,
                                  sizeof(uct_recv_desc_t) + self->rx_headroom +
                                                            self->data_length,
                                  sizeof(uct_recv_desc_t) + self->rx_headroom,
                                  UCS_SYS_CACHE_LINE_SIZE,
                                  &self_config->mp,
                                  256,
                                  ucs_empty_function,
                                  "self_msg_desc");
    if (UCS_OK != status) {
        ucs_error("Failed to create a memory pool for the loop-back transport");
        goto err;
    }

    /* set the message descriptor for the loop-back */
    self->msg_cur_desc = ucs_mpool_get(&self->msg_desc_mp);
    VALGRIND_MAKE_MEM_DEFINED(self->msg_cur_desc, sizeof(*(self->msg_cur_desc)));
    if (NULL == self->msg_cur_desc) {
        ucs_error("Failed to get the first descriptor in loop-back MP storage");
        status = UCS_ERR_NO_RESOURCE;
        goto destroy_mpool;
    }

    ucs_debug("Created a loop-back iface. id=0x%lx, desc=%p, len=%u, tx_hdr=%lu",
              self->id, self->msg_cur_desc, self->data_length, self->rx_headroom);
    return UCS_OK;

destroy_mpool:
    ucs_mpool_cleanup(&self->msg_desc_mp, 1);
err:
    return status;
}
Exemplo n.º 2
0
Arquivo: ucp_ep.c Projeto: alex--m/ucx
static ucs_status_ptr_t ucp_disconnect_nb_internal(ucp_ep_h ep)
{
    ucs_status_t status;
    ucp_request_t *req;

    ucs_debug("disconnect ep %p", ep);

    req = ucs_mpool_get(&ep->worker->req_mp);
    if (req == NULL) {
        return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY);
    }

    /*
     *  Flush operation can be queued on the pending queue of only one of the
     * lanes (indicated by req->send.lane) and scheduled for completion on any
     * number of lanes. req->send.uct_comp.count keeps track of how many lanes
     * are not flushed yet, and when it reaches zero, it means all lanes are
     * flushed. req->send.flush.lanes keeps track of which lanes we still have
     * to start flush on.
     *  If a flush is completed from a pending/completion callback, we need to
     * schedule slow-path callback to release the endpoint later, since a UCT
     * endpoint cannot be released from pending/completion callback context.
     */
    req->flags                  = 0;
    req->status                 = UCS_OK;
    req->send.ep                = ep;
    req->send.flush.flushed_cb  = ucp_ep_disconnected;
    req->send.flush.lanes       = UCS_MASK(ucp_ep_num_lanes(ep));
    req->send.flush.cbq_elem.cb = ucp_ep_flushed_slow_path_callback;
    req->send.flush.cbq_elem_on = 0;
    req->send.lane              = UCP_NULL_LANE;
    req->send.uct.func          = ucp_ep_flush_progress_pending;
    req->send.uct_comp.func     = ucp_ep_flush_completion;
    req->send.uct_comp.count    = ucp_ep_num_lanes(ep);

    ucp_ep_flush_progress(req);

    if (req->send.uct_comp.count == 0) {
        status = req->status;
        ucp_ep_disconnected(req);
        ucs_trace_req("ep %p: releasing flush request %p, returning status %s",
                      ep, req, ucs_status_string(status));
        ucs_mpool_put(req);
        return UCS_STATUS_PTR(status);
    }

    ucs_trace_req("ep %p: return inprogress flush request %p (%p)", ep, req,
                  req + 1);
    return req + 1;
}
Exemplo n.º 3
0
/*
 * @param [in] rsc_tli  Resource index for every lane.
 */
static ucs_status_t ucp_wireup_msg_send(ucp_ep_h ep, uint8_t type,
                                        uint64_t tl_bitmap,
                                        const ucp_rsc_index_t *rsc_tli)
{
    ucp_rsc_index_t rsc_index;
    ucp_lane_index_t lane;
    unsigned order[UCP_MAX_LANES + 1];
    ucp_request_t* req;
    ucs_status_t status;
    void *address;

    ucs_assert(ep->cfg_index != (uint8_t)-1);

    req = ucs_mpool_get(&ep->worker->req_mp);
    if (req == NULL) {
        return UCS_ERR_NO_MEMORY;
    }

    req->flags                   = UCP_REQUEST_FLAG_RELEASED;
    req->send.ep                 = ep;
    req->send.cb                 = ucp_wireup_msg_send_completion;
    req->send.wireup.type        = type;
    req->send.uct.func           = ucp_wireup_msg_progress;

    /* pack all addresses */
    status = ucp_address_pack(ep->worker, ep, tl_bitmap, order,
                              &req->send.length, &address);
    if (status != UCS_OK) {
        ucs_error("failed to pack address: %s", ucs_status_string(status));
        return status;
    }

    req->send.buffer = address;

    /* send the indices addresses that should be connected by remote side */
    for (lane = 0; lane < UCP_MAX_LANES; ++lane) {
        rsc_index = rsc_tli[lane];
        if (rsc_index != UCP_NULL_RESOURCE) {
            req->send.wireup.tli[lane] = ucp_wireup_address_index(order,
                                                                  tl_bitmap,
                                                                  rsc_index);
        } else {
            req->send.wireup.tli[lane] = -1;
        }
    }

    ucp_request_start_send(req);
    return UCS_OK;
}
Exemplo n.º 4
0
ucs_status_ptr_t ucp_tag_send_nb(ucp_ep_h ep, const void *buffer, size_t count,
                                 uintptr_t datatype, ucp_tag_t tag,
                                 ucp_send_callback_t cb)
{
    ucs_status_t status;
    ucp_request_t *req;

    ucs_trace_req("send_nb buffer %p count %zu tag %"PRIx64" to %s", buffer,
                  count, tag, ucp_ep_peer_name(ep));

    status = ucp_tag_send_try(ep, buffer, count, datatype, tag);
    if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) {
        return UCS_STATUS_PTR(status); /* UCS_OK also goes here */
    }

    req = ucs_mpool_get(&ep->worker->req_mp);
    if (req == NULL) {
        return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY);
    }

    VALGRIND_MAKE_MEM_DEFINED(req + 1, ep->worker->context->config.request.size);

    req->flags   = 0;
    req->cb.send = cb;

    status = ucp_tag_send_start_req(ep, buffer, count, datatype, tag, req);
    if (status != UCS_OK) {
        return UCS_STATUS_PTR(status); /* UCS_OK also goes here */
    }

    if (!(req->flags & UCP_REQUEST_FLAG_COMPLETED)) {
        ucp_ep_add_pending(ep, ep->uct_ep, req);
        ucp_worker_progress(ep->worker);
    }

    ucs_trace_req("send_nb returning request %p", req);
    return req + 1;
}
Exemplo n.º 5
0
ucs_status_t uct_rc_verbs_iface_common_init(uct_rc_verbs_iface_common_t *iface,
                                            uct_rc_iface_t *rc_iface,
                                            uct_rc_verbs_iface_common_config_t *config,
                                            uct_rc_iface_config_t *rc_config,
                                            size_t max_hdr_size)
{
    memset(iface->inl_sge, 0, sizeof(iface->inl_sge));
    ucs_status_t status;

    /* Configuration */
    iface->config.short_desc_size = ucs_max(UCT_RC_MAX_ATOMIC_SIZE, max_hdr_size);

    /* Create AM headers and Atomic mempool */
    status = uct_iface_mpool_init(&rc_iface->super.super,
                                  &iface->short_desc_mp,
                                  sizeof(uct_rc_iface_send_desc_t) +
                                      iface->config.short_desc_size,
                                  sizeof(uct_rc_iface_send_desc_t),
                                  UCS_SYS_CACHE_LINE_SIZE,
                                  &rc_config->super.tx.mp,
                                  rc_iface->config.tx_qp_len,
                                  uct_rc_iface_send_desc_init,
                                  "rc_verbs_short_desc");
    if (status != UCS_OK) {
        return status;
    }

    iface->config.notag_hdr_size = 0;

    iface->am_inl_hdr = ucs_mpool_get(&iface->short_desc_mp);
    if (iface->am_inl_hdr == NULL) {
        ucs_error("Failed to allocate AM short header");
        return UCS_ERR_NO_MEMORY;
    }

    return UCS_OK;
}
Exemplo n.º 6
0
static UCS_F_ALWAYS_INLINE ucs_status_t
uct_cuda_copy_post_cuda_async_copy(uct_ep_h tl_ep, void *dst, void *src, size_t length,
                                   int direction, cudaStream_t stream,
                                   ucs_queue_head_t *outstanding_queue,
                                   uct_completion_t *comp)
{
    uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cuda_copy_iface_t);
    uct_cuda_copy_event_desc_t *cuda_event;
    ucs_status_t status;

    if (!length) {
        return UCS_OK;
    }

    cuda_event = ucs_mpool_get(&iface->cuda_event_desc);
    if (ucs_unlikely(cuda_event == NULL)) {
        ucs_error("Failed to allocate cuda event object");
        return UCS_ERR_NO_MEMORY;
    }

    status = UCT_CUDA_FUNC(cudaMemcpyAsync(dst, src, length, direction, stream));
    if (UCS_OK != status) {
        return UCS_ERR_IO_ERROR;
    }

    status = UCT_CUDA_FUNC(cudaEventRecord(cuda_event->event, stream));
    if (UCS_OK != status) {
        return UCS_ERR_IO_ERROR;
    }
    ucs_queue_push(outstanding_queue, &cuda_event->queue);
    cuda_event->comp = comp;

    ucs_trace("cuda async issued :%p dst:%p, src:%p  len:%ld",
             cuda_event, dst, src, length);
    return UCS_INPROGRESS;
}
Exemplo n.º 7
0
Arquivo: mpool.c Projeto: bbenton/ucx
void *ucs_mpool_get_grow(ucs_mpool_t *mp)
{
    size_t chunk_size, chunk_padding;
    ucs_mpool_data_t *data = mp->data;
    ucs_mpool_chunk_t *chunk;
    ucs_mpool_elem_t *elem;
    ucs_status_t status;
    unsigned i;
    void *ptr;

    if (data->quota == 0) {
        return NULL;
    }

    chunk_size = data->chunk_size;
    status = data->ops->chunk_alloc(mp, &chunk_size, &ptr);
    if (status != UCS_OK) {
        ucs_error("Failed to allocate memory pool chunk: %s", ucs_status_string(status));
        return NULL;
    }

    /* Calculate padding, and update element count according to allocated size */
    chunk            = ptr;
    chunk_padding    = ucs_padding((uintptr_t)(chunk + 1) + data->align_offset,
                                   data->alignment);
    chunk->elems     = (void*)(chunk + 1) + chunk_padding;
    chunk->num_elems = (chunk_size - chunk_padding - sizeof(*chunk)) /
                       ucs_mpool_elem_total_size(data);

    ucs_debug("mpool %s: allocated chunk %p of %lu bytes with %u elements",
              ucs_mpool_name(mp), chunk, chunk_size, chunk->num_elems);

    for (i = 0; i < chunk->num_elems; ++i) {
        elem         = ucs_mpool_chunk_elem(data, chunk, i);
        if (data->ops->obj_init != NULL) {
            data->ops->obj_init(mp, elem + 1, chunk);
        }

        ucs_mpool_add_to_freelist(mp, elem, 0);
        if (mp->data->tail == NULL) {
            mp->data->tail = elem;
        }
    }


    chunk->next  = data->chunks;
    data->chunks = chunk;

    if (data->quota == UINT_MAX) {
        /* Infinite memory pool */
    } else if (data->quota >= chunk->num_elems) {
        data->quota -= chunk->num_elems;
    } else {
        data->quota = 0;
    }

    VALGRIND_MAKE_MEM_NOACCESS(chunk + 1, chunk_size - sizeof(*chunk));

    ucs_assert(mp->freelist != NULL); /* Should not recurse */
    return ucs_mpool_get(mp);
}
Exemplo n.º 8
0
static uct_ugni_flush_group_t *uct_ugni_new_flush_group(uct_ugni_iface_t *iface)
{
    return ucs_mpool_get(&iface->flush_pool);
}