Exemple #1
0
static UCS_CLASS_INIT_FUNC(uct_self_iface_t, uct_md_h md, uct_worker_h worker,
                           const uct_iface_params_t *params,
                           const uct_iface_config_t *tl_config)
{
    ucs_status_t status;
    uct_self_iface_config_t *self_config = 0;

    ucs_trace_func("Creating a loop-back transport self=%p rxh=%lu",
                   self, params->rx_headroom);

    if (strcmp(params->dev_name, UCT_SELF_NAME) != 0) {
        ucs_error("No device was found: %s", params->dev_name);
        return UCS_ERR_NO_DEVICE;
    }

    UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_self_iface_ops, md, worker,
                              tl_config UCS_STATS_ARG(params->stats_root)
                              UCS_STATS_ARG(UCT_SELF_NAME));

    self_config = ucs_derived_of(tl_config, uct_self_iface_config_t);

    self->id              = ucs_generate_uuid((uintptr_t)self);
    self->rx_headroom     = params->rx_headroom;
    self->data_length     = self_config->super.max_bcopy;
    self->release_desc.cb = uct_self_iface_release_desc;

    /* create a memory pool for data transferred */
    status = uct_iface_mpool_init(&self->super,
                                  &self->msg_desc_mp,
                                  sizeof(uct_recv_desc_t) + self->rx_headroom +
                                                            self->data_length,
                                  sizeof(uct_recv_desc_t) + self->rx_headroom,
                                  UCS_SYS_CACHE_LINE_SIZE,
                                  &self_config->mp,
                                  256,
                                  ucs_empty_function,
                                  "self_msg_desc");
    if (UCS_OK != status) {
        ucs_error("Failed to create a memory pool for the loop-back transport");
        goto err;
    }

    /* set the message descriptor for the loop-back */
    self->msg_cur_desc = ucs_mpool_get(&self->msg_desc_mp);
    VALGRIND_MAKE_MEM_DEFINED(self->msg_cur_desc, sizeof(*(self->msg_cur_desc)));
    if (NULL == self->msg_cur_desc) {
        ucs_error("Failed to get the first descriptor in loop-back MP storage");
        status = UCS_ERR_NO_RESOURCE;
        goto destroy_mpool;
    }

    ucs_debug("Created a loop-back iface. id=0x%lx, desc=%p, len=%u, tx_hdr=%lu",
              self->id, self->msg_cur_desc, self->data_length, self->rx_headroom);
    return UCS_OK;

destroy_mpool:
    ucs_mpool_cleanup(&self->msg_desc_mp, 1);
err:
    return status;
}
Exemple #2
0
static UCS_CLASS_INIT_FUNC(uct_knem_iface_t, uct_md_h md, uct_worker_h worker,
                           const uct_iface_params_t *params,
                           const uct_iface_config_t *tl_config)
{
    ucs_assert(params->open_mode & UCT_IFACE_OPEN_MODE_DEVICE);

    UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_knem_iface_ops, md, worker,
                              params, tl_config UCS_STATS_ARG(params->stats_root)
                              UCS_STATS_ARG(UCT_KNEM_TL_NAME));
    self->knem_md = (uct_knem_md_t *)md;
    uct_sm_get_max_iov(); /* to initialize ucs_get_max_iov static variable */

    return UCS_OK;
}
Exemple #3
0
static UCS_CLASS_INIT_FUNC(uct_cma_iface_t, uct_pd_h pd, uct_worker_h worker,
                           const char *dev_name, size_t rx_headroom,
                           const uct_iface_config_t *tl_config)
{
    UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_cma_iface_ops, pd, worker,
                              tl_config UCS_STATS_ARG(NULL));
    return UCS_OK;
}
Exemple #4
0
static UCS_CLASS_INIT_FUNC(uct_gdr_copy_iface_t, uct_md_h md, uct_worker_h worker,
                           const uct_iface_params_t *params,
                           const uct_iface_config_t *tl_config)
{
    UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_gdr_copy_iface_ops, md, worker,
                              params, tl_config UCS_STATS_ARG(params->stats_root)
                              UCS_STATS_ARG(UCT_GDR_COPY_TL_NAME));

    if (strncmp(params->mode.device.dev_name,
                UCT_CUDA_DEV_NAME, strlen(UCT_CUDA_DEV_NAME)) != 0) {
        ucs_error("No device was found: %s", params->mode.device.dev_name);
        return UCS_ERR_NO_DEVICE;
    }

    self->id = ucs_generate_uuid((uintptr_t)self);

    return UCS_OK;
}
Exemple #5
0
static UCS_F_ALWAYS_INLINE ucs_status_t
ucp_eager_tagged_handler(void *arg, void *data, size_t length, unsigned am_flags,
                         uint16_t flags, uint16_t hdr_len, uint16_t priv_length)
{
    ucp_worker_h worker        = arg;
    ucp_eager_hdr_t *eager_hdr = data;
    ucp_eager_first_hdr_t *eagerf_hdr;
    ucp_recv_desc_t *rdesc;
    ucp_request_t *req;
    ucs_status_t status;
    ucp_tag_t recv_tag;
    size_t recv_len;

    ucs_assert(length >= hdr_len);
    ucs_assert(flags & UCP_RECV_DESC_FLAG_EAGER);

    recv_tag = eager_hdr->super.tag;
    recv_len = length - hdr_len;

    req = ucp_tag_exp_search(&worker->tm, recv_tag);
    if (req != NULL) {
        ucp_eager_expected_handler(worker, req, data, recv_len, recv_tag, flags);

        if (flags & UCP_RECV_DESC_FLAG_EAGER_SYNC) {
            ucp_tag_eager_sync_send_ack(worker, data, flags);
        }

        if (flags & UCP_RECV_DESC_FLAG_EAGER_ONLY) {
            req->recv.tag.info.length = recv_len;
            status = ucp_request_recv_data_unpack(req, data + hdr_len, recv_len,
                                                  0, 1);
            ucp_request_complete_tag_recv(req, status);
        } else {
            eagerf_hdr                = data;
            req->recv.tag.info.length =
            req->recv.tag.remaining   = eagerf_hdr->total_len;

            status = ucp_tag_request_process_recv_data(req, data + hdr_len,
                                                       recv_len, 0, 0);
            ucs_assert(status == UCS_INPROGRESS);

            ucp_tag_frag_list_process_queue(&worker->tm, req, eagerf_hdr->msg_id
                                            UCS_STATS_ARG(UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_EXP));
        }

        status = UCS_OK;
    } else {
        status = ucp_recv_desc_init(worker, data, length, 0, am_flags, hdr_len,
                                    flags, priv_length, &rdesc);
        if (!UCS_STATUS_IS_ERR(status)) {
            ucp_tag_unexp_recv(&worker->tm, rdesc, recv_tag);
        }
    }

    return status;
}
Exemple #6
0
static void uct_ud_ep_reset(uct_ud_ep_t *ep)
{
    ep->tx.psn         = 1;
    /* TODO: configurable max window size */
    ep->tx.max_psn     = ep->tx.psn + UCT_UD_MAX_WINDOW;
    ep->tx.acked_psn   = 0;
    ep->tx.pending.ops = UCT_UD_EP_OP_NONE;
    ucs_queue_head_init(&ep->tx.window);

    ep->rx.acked_psn = 0;
    ucs_frag_list_init(ep->tx.psn-1, &ep->rx.ooo_pkts, 0 /*TODO: ooo support */
                       UCS_STATS_ARG(ep->rx.stats));
}
Exemple #7
0
static UCS_CLASS_INIT_FUNC(uct_cuda_iface_t, uct_pd_h pd, uct_worker_h worker,
                           const char *dev_name, size_t rx_headroom,
                           const uct_iface_config_t *tl_config)
{
    UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_cuda_iface_ops, pd, worker,
                              tl_config UCS_STATS_ARG(NULL));

    if (strcmp(dev_name, UCT_CUDA_DEV_NAME) != 0) {
        ucs_error("No device was found: %s", dev_name);
        return UCS_ERR_NO_DEVICE;
    }

    return UCS_OK;
}
Exemple #8
0
static UCS_CLASS_INIT_FUNC(uct_ugni_smsg_iface_t, uct_md_h md, uct_worker_h worker,
                           const uct_iface_params_t *params,
                           const uct_iface_config_t *tl_config)
{
    uct_ugni_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_iface_config_t);
    ucs_status_t status;
    gni_return_t ugni_rc;
    unsigned int bytes_per_mbox;
    gni_smsg_attr_t smsg_attr;

    pthread_mutex_lock(&uct_ugni_global_lock);

    UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, md, worker, params,
                              &uct_ugni_smsg_iface_ops,
                              &config->super UCS_STATS_ARG(NULL));

    /* Setting initial configuration */
    self->config.smsg_seg_size = 2048;
    self->config.rx_headroom  = params->rx_headroom;
    self->config.smsg_max_retransmit = 16;
    self->config.smsg_max_credit = 8;
    self->smsg_id = 0;

    smsg_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
    smsg_attr.mbox_maxcredit = self->config.smsg_max_credit;
    smsg_attr.msg_maxsize = self->config.smsg_seg_size;

    ugni_rc = GNI_SmsgBufferSizeNeeded(&(smsg_attr), &bytes_per_mbox);
    self->bytes_per_mbox = ucs_align_up_pow2(bytes_per_mbox, ucs_get_page_size());

    if (ugni_rc != GNI_RC_SUCCESS) {
        ucs_error("Smsg buffer size calculation failed");
        status = UCS_ERR_INVALID_PARAM;
        goto exit;
    }

    status = ucs_mpool_init(&self->free_desc,
                            0,
                            self->config.smsg_seg_size + sizeof(uct_ugni_smsg_desc_t),
                            0,
                            UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                            128           ,               /* grow */
                            config->mpool.max_bufs,       /* max buffers */
                            &uct_ugni_smsg_desc_mpool_ops,
                            "UGNI-SMSG-DESC");

    if (UCS_OK != status) {
        ucs_error("Desc Mpool creation failed");
        goto exit;
    }

    status = ucs_mpool_init(&self->free_mbox,
                            0,
                            self->bytes_per_mbox + sizeof(uct_ugni_smsg_mbox_t),
                            sizeof(uct_ugni_smsg_mbox_t),
                            UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                            128,                          /* grow */
                            config->mpool.max_bufs,       /* max buffers */
                            &uct_ugni_smsg_mbox_mpool_ops,
                            "UGNI-SMSG-MBOX");

    if (UCS_OK != status) {
        ucs_error("Mbox Mpool creation failed");
        goto clean_desc;
    }

    UCT_TL_IFACE_GET_TX_DESC(&self->super.super, &self->free_desc,
                             self->user_desc, self->user_desc = NULL);

    status = ugni_smsg_activate_iface(self);
    if (UCS_OK != status) {
        ucs_error("Failed to activate the interface");
        goto clean_mbox;
    }

    ugni_rc = GNI_SmsgSetMaxRetrans(self->super.nic_handle, self->config.smsg_max_retransmit);

    if (ugni_rc != GNI_RC_SUCCESS) {
        ucs_error("Smsg setting max retransmit count failed.");
        status = UCS_ERR_INVALID_PARAM;
        goto clean_iface;
    }

    /* TBD: eventually the uct_ugni_progress has to be moved to
     * udt layer so each ugni layer will have own progress */
    uct_worker_progress_register(worker, uct_ugni_smsg_progress, self);
    pthread_mutex_unlock(&uct_ugni_global_lock);
    return UCS_OK;

 clean_iface:
    ugni_smsg_deactivate_iface(self);
 clean_desc:
    ucs_mpool_put(self->user_desc);
    ucs_mpool_cleanup(&self->free_desc, 1);
 clean_mbox:
    ucs_mpool_cleanup(&self->free_mbox, 1);
 exit:
    ucs_error("Failed to activate interface");
    pthread_mutex_unlock(&uct_ugni_global_lock);
    return status;
}
Exemple #9
0
static UCS_CLASS_INIT_FUNC(uct_ugni_rdma_iface_t, uct_pd_h pd, uct_worker_h worker,
                           const char *dev_name, size_t rx_headroom,
                           const uct_iface_config_t *tl_config)
{
    uct_ugni_rdma_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_rdma_iface_config_t);
    ucs_status_t status;

    pthread_mutex_lock(&uct_ugni_global_lock);

    UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, pd, worker, dev_name, &uct_ugni_rdma_iface_ops,
                              &config->super UCS_STATS_ARG(NULL));

    /* Setting initial configuration */
    self->config.fma_seg_size  = UCT_UGNI_MAX_FMA;
    self->config.rdma_max_size = UCT_UGNI_MAX_RDMA;

    status = ucs_mpool_init(&self->free_desc,
                            0,
                            sizeof(uct_ugni_base_desc_t),
                            0,                            /* alignment offset */
                            UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                            128,                          /* grow */
                            config->mpool.max_bufs,       /* max buffers */
                            &uct_ugni_rdma_desc_mpool_ops,
                            "UGNI-DESC-ONLY");
    if (UCS_OK != status) {
        ucs_error("Mpool creation failed");
        goto exit;
    }

    status = ucs_mpool_init(&self->free_desc_get,
                            0,
                            sizeof(uct_ugni_rdma_fetch_desc_t),
                            0,                            /* alignment offset */
                            UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                            128 ,                         /* grow */
                            config->mpool.max_bufs,       /* max buffers */
                            &uct_ugni_rdma_desc_mpool_ops,
                            "UGNI-GET-DESC-ONLY");
    if (UCS_OK != status) {
        ucs_error("Mpool creation failed");
        goto clean_desc;
    }

    status = ucs_mpool_init(&self->free_desc_buffer,
                            0,
                            sizeof(uct_ugni_base_desc_t) + self->config.fma_seg_size,
                            sizeof(uct_ugni_base_desc_t), /* alignment offset */
                            UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                            128 ,                         /* grow */
                            config->mpool.max_bufs,       /* max buffers */
                            &uct_ugni_rdma_desc_mpool_ops,
                            "UGNI-DESC-BUFFER");
    if (UCS_OK != status) {
        ucs_error("Mpool creation failed");
        goto clean_desc_get;
    }

    status = uct_iface_mpool_init(&self->super.super,
                                  &self->free_desc_famo,
                                  sizeof(uct_ugni_rdma_fetch_desc_t) + 8,
                                  sizeof(uct_ugni_rdma_fetch_desc_t),/* alignment offset */
                                  UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                                  &config->mpool,               /* mpool config */
                                  128 ,                         /* grow */
                                  uct_ugni_base_desc_key_init,  /* memory/key init */
                                  "UGNI-DESC-FAMO");
    if (UCS_OK != status) {
        ucs_error("Mpool creation failed");
        goto clean_buffer;
    }

    status = uct_iface_mpool_init(&self->super.super,
                                  &self->free_desc_get_buffer,
                                  sizeof(uct_ugni_rdma_fetch_desc_t) +
                                  self->config.fma_seg_size,
                                  sizeof(uct_ugni_rdma_fetch_desc_t), /* alignment offset */
                                  UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                                  &config->mpool,               /* mpool config */
                                  128 ,                         /* grow */
                                  uct_ugni_base_desc_key_init,  /* memory/key init */
                                  "UGNI-DESC-GET");
    if (UCS_OK != status) {
        ucs_error("Mpool creation failed");
        goto clean_famo;
    }

    status = ugni_activate_iface(&self->super);
    if (UCS_OK != status) {
        ucs_error("Failed to activate the interface");
        goto clean_get_buffer;
    }

    if(GNI_DEVICE_ARIES == self->super.dev->type) {
        uct_ugni_rdma_iface_ops.ep_atomic_swap64    = uct_ugni_ep_atomic_swap64;
        uct_ugni_rdma_iface_ops.ep_atomic_add32     = uct_ugni_ep_atomic_add32;
        uct_ugni_rdma_iface_ops.ep_atomic_fadd32    = uct_ugni_ep_atomic_fadd32;
        uct_ugni_rdma_iface_ops.ep_atomic_cswap32   = uct_ugni_ep_atomic_cswap32;
        uct_ugni_rdma_iface_ops.ep_atomic_swap32    = uct_ugni_ep_atomic_swap32;
    }

    /* TBD: eventually the uct_ugni_progress has to be moved to 
     * rdma layer so each ugni layer will have own progress */
    uct_worker_progress_register(worker, uct_ugni_progress, self);
    pthread_mutex_unlock(&uct_ugni_global_lock);
    return UCS_OK;

clean_get_buffer:
    ucs_mpool_cleanup(&self->free_desc_get_buffer, 1);
clean_famo:
    ucs_mpool_cleanup(&self->free_desc_famo, 1);
clean_buffer:
    ucs_mpool_cleanup(&self->free_desc_buffer, 1);
clean_desc_get:
    ucs_mpool_cleanup(&self->free_desc_get, 1);
clean_desc:
    ucs_mpool_cleanup(&self->free_desc, 1);
exit:
    ucs_error("Failed to activate interface");
    pthread_mutex_unlock(&uct_ugni_global_lock);
    return status;
}
Exemple #10
0
static UCS_F_ALWAYS_INLINE void
ucp_tag_recv_common(ucp_worker_h worker, void *buffer, size_t count,
                    uintptr_t datatype, ucp_tag_t tag, ucp_tag_t tag_mask,
                    ucp_request_t *req, uint32_t req_flags, ucp_tag_recv_callback_t cb,
                    ucp_recv_desc_t *rdesc, const char *debug_name)
{
    unsigned common_flags = UCP_REQUEST_FLAG_RECV | UCP_REQUEST_FLAG_EXPECTED;
    ucp_eager_first_hdr_t *eagerf_hdr;
    ucp_request_queue_t *req_queue;
    uct_memory_type_t mem_type;
    size_t hdr_len, recv_len;
    ucs_status_t status;
    uint64_t msg_id;

    ucp_trace_req(req, "%s buffer %p dt 0x%lx count %zu tag %"PRIx64"/%"PRIx64,
                  debug_name, buffer, datatype, count, tag, tag_mask);

    /* First, check the fast path case - single fragment
     * in this case avoid initializing most of request fields
     * */
    if (ucs_likely((rdesc != NULL) && (rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_ONLY))) {
        UCS_PROFILE_REQUEST_EVENT(req, "eager_only_match", 0);
        UCP_WORKER_STAT_EAGER_MSG(worker, rdesc->flags);
        UCP_WORKER_STAT_EAGER_CHUNK(worker, UNEXP);

        if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_SYNC)) {
            ucp_tag_eager_sync_send_ack(worker, rdesc + 1, rdesc->flags);
        }

        req->flags                    = UCP_REQUEST_FLAG_RECV | req_flags;
        hdr_len                       = rdesc->payload_offset;
        recv_len                      = rdesc->length - hdr_len;
        req->recv.tag.info.sender_tag = ucp_rdesc_get_tag(rdesc);
        req->recv.tag.info.length     = recv_len;

        ucp_memory_type_detect_mds(worker->context, buffer, recv_len, &mem_type);

        status = ucp_dt_unpack_only(worker, buffer, count, datatype, mem_type,
                                    (void*)(rdesc + 1) + hdr_len, recv_len, 1);
        ucp_recv_desc_release(rdesc);

        if (req_flags & UCP_REQUEST_FLAG_CALLBACK) {
            cb(req + 1, status, &req->recv.tag.info);
        }
        ucp_tag_recv_request_completed(req, status, &req->recv.tag.info,
                                       debug_name);
        return;
    }

    /* Initialize receive request */
    req->status             = UCS_OK;
    req->recv.worker        = worker;
    req->recv.buffer        = buffer;
    req->recv.datatype      = datatype;

    ucp_dt_recv_state_init(&req->recv.state, buffer, datatype, count);

    if (!UCP_DT_IS_CONTIG(datatype)) {
        common_flags       |= UCP_REQUEST_FLAG_BLOCK_OFFLOAD;
    }

    req->flags              = common_flags | req_flags;
    req->recv.length        = ucp_dt_length(datatype, count, buffer,
                                            &req->recv.state);

    ucp_memory_type_detect_mds(worker->context, buffer, req->recv.length, &mem_type);

    req->recv.mem_type      = mem_type;
    req->recv.tag.tag       = tag;
    req->recv.tag.tag_mask  = tag_mask;
    req->recv.tag.cb        = cb;
    if (ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_REQ)) {
        req->recv.tag.info.sender_tag = 0;
    }

    if (ucs_unlikely(rdesc == NULL)) {
        /* If not found on unexpected, wait until it arrives.
         * If was found but need this receive request for later completion, save it */
        req_queue = ucp_tag_exp_get_queue(&worker->tm, tag, tag_mask);

        /* If offload supported, post this tag to transport as well.
         * TODO: need to distinguish the cases when posting is not needed. */
        ucp_tag_offload_try_post(worker, req, req_queue);

        ucp_tag_exp_push(&worker->tm, req_queue, req);

        ucs_trace_req("%s returning expected request %p (%p)", debug_name, req,
                      req + 1);
        return;
    }

    /* Check rendezvous case */
    if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_RNDV)) {
        ucp_rndv_matched(worker, req, (void*)(rdesc + 1));
        UCP_WORKER_STAT_RNDV(worker, UNEXP);
        ucp_recv_desc_release(rdesc);
        return;
    }

    if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_SYNC)) {
        ucp_tag_eager_sync_send_ack(worker, rdesc + 1, rdesc->flags);
    }

    UCP_WORKER_STAT_EAGER_MSG(worker, rdesc->flags);
    ucs_assert(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER);
    eagerf_hdr                    = (void*)(rdesc + 1);
    req->recv.tag.info.sender_tag = ucp_rdesc_get_tag(rdesc);
    req->recv.tag.info.length     =
    req->recv.tag.remaining       = eagerf_hdr->total_len;

    /* process first fragment */
    UCP_WORKER_STAT_EAGER_CHUNK(worker, UNEXP);
    msg_id = eagerf_hdr->msg_id;
    status = ucp_tag_recv_request_process_rdesc(req, rdesc, 0);
    ucs_assert(status == UCS_INPROGRESS);

    /* process additional fragments */
    ucp_tag_frag_list_process_queue(&worker->tm, req, msg_id
                                    UCS_STATS_ARG(UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_UNEXP));
}
Exemple #11
0
/**
 * @param rx_headroom   Headroom requested by the user.
 * @param rx_priv_len   Length of transport private data to reserve (0 if unused)
 * @param rx_hdr_len    Length of transport network header.
 * @param mss           Maximal segment size (transport limit).
 */
UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md,
                    uct_worker_h worker, const uct_iface_params_t *params,
                    unsigned rx_priv_len, unsigned rx_hdr_len, unsigned tx_cq_len,
                    size_t mss, const uct_ib_iface_config_t *config)
{
    uct_ib_device_t *dev = &ucs_derived_of(md, uct_ib_md_t)->dev;
    ucs_status_t status;
    uint8_t port_num;

    UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &ops->super, md, worker,
                              &config->super UCS_STATS_ARG(dev->stats));

    status = uct_ib_device_find_port(dev, params->dev_name, &port_num);
    if (status != UCS_OK) {
        goto err;
    }

    self->ops                      = ops;

    self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) +
                                     ucs_max(sizeof(uct_am_recv_desc_t) +
                                             params->rx_headroom,
                                             rx_priv_len + rx_hdr_len);
    self->config.rx_hdr_offset     = self->config.rx_payload_offset - rx_hdr_len;
    self->config.rx_headroom_offset= self->config.rx_payload_offset -
                                     params->rx_headroom;
    self->config.seg_size          = ucs_min(mss, config->super.max_bcopy);
    self->config.tx_max_poll       = config->tx.max_poll;
    self->config.rx_max_poll       = config->rx.max_poll;
    self->config.rx_max_batch      = ucs_min(config->rx.max_batch,
                                             config->rx.queue_len / 4);
    self->config.port_num          = port_num;
    self->config.sl                = config->sl;
    self->config.gid_index         = config->gid_index;

    status = uct_ib_iface_init_pkey(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_device_query_gid(dev, self->config.port_num,
                                     self->config.gid_index, &self->gid);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_iface_init_lmc(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    self->comp_channel = ibv_create_comp_channel(dev->ibv_context);
    if (self->comp_channel == NULL) {
        ucs_error("ibv_create_comp_channel() failed: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_free_path_bits;
    }

    status = ucs_sys_fcntl_modfl(self->comp_channel->fd, O_NONBLOCK, 0);
    if (status != UCS_OK) {
        goto err_destroy_comp_channel;
    }

    status = uct_ib_iface_create_cq(self, tx_cq_len, 0, &self->send_cq);
    if (status != UCS_OK) {
        goto err_destroy_comp_channel;
    }

    status = uct_ib_iface_create_cq(self, config->rx.queue_len, config->rx.inl,
                                    &self->recv_cq);
    if (status != UCS_OK) {
        goto err_destroy_send_cq;
    }

    /* Address scope and size */
    if (config->addr_type == UCT_IB_IFACE_ADDRESS_TYPE_AUTO) {
        if (IBV_PORT_IS_LINK_LAYER_ETHERNET(uct_ib_iface_port_attr(self))) {
            self->addr_type = UCT_IB_ADDRESS_TYPE_ETH;
        } else {
            self->addr_type = uct_ib_address_scope(self->gid.global.subnet_prefix);
        }
    } else {
        ucs_assert(config->addr_type < UCT_IB_ADDRESS_TYPE_LAST);
        self->addr_type = config->addr_type;
    }

    self->addr_size  = uct_ib_address_size(self->addr_type);

    ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d",
              self->config.rx_headroom_offset, self->config.rx_payload_offset,
              self->config.rx_hdr_offset, self->config.seg_size);

    return UCS_OK;

err_destroy_send_cq:
    ibv_destroy_cq(self->send_cq);
err_destroy_comp_channel:
    ibv_destroy_comp_channel(self->comp_channel);
err_free_path_bits:
    ucs_free(self->path_bits);
err:
    return status;
}
Exemple #12
0
static UCS_CLASS_INIT_FUNC(uct_ugni_udt_iface_t, uct_md_h md, uct_worker_h worker,
                           const uct_iface_params_t *params,
                           const uct_iface_config_t *tl_config)
{
    uct_ugni_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_iface_config_t);
    ucs_status_t status;
    uct_ugni_udt_desc_t *desc;
    gni_return_t ugni_rc;
    int rc;

    UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, md, worker, params,
                              &uct_ugni_udt_iface_ops,
                              &config->super UCS_STATS_ARG(NULL));

    /* Setting initial configuration */
    self->config.udt_seg_size = GNI_DATAGRAM_MAXSIZE;
    self->config.rx_headroom  = params->rx_headroom;
    self->release_desc.cb     = uct_ugni_udt_iface_release_desc;

    status = ucs_async_pipe_create(&self->event_pipe);
    if (UCS_OK != status) {
        ucs_error("Pipe creation failed");
        goto exit;
    }

    status = ucs_mpool_init(&self->free_desc,
                            0,
                            uct_ugni_udt_get_diff(self) + self->config.udt_seg_size * 2,
                            uct_ugni_udt_get_diff(self),
                            UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                            128,                          /* grow */
                            config->mpool.max_bufs,       /* max buffers */
                            &uct_ugni_udt_desc_mpool_ops,
                            "UGNI-UDT-DESC");

    if (UCS_OK != status) {
        ucs_error("Mpool creation failed");
        goto clean_pipe;
    }

    ugni_rc = GNI_EpCreate(uct_ugni_udt_iface_nic_handle(self), NULL, &self->ep_any);
    if (GNI_RC_SUCCESS != ugni_rc) {
        ucs_error("GNI_EpCreate failed, Error status: %s %d",
                  gni_err_str[ugni_rc], ugni_rc);
        status = UCS_ERR_NO_DEVICE;
        goto clean_free_desc;
    }

    UCT_TL_IFACE_GET_TX_DESC(&self->super.super, &self->free_desc,
                             desc, goto clean_ep);
    ucs_debug("First wildcard desc is %p", desc);

    /* Init any desc */
    self->desc_any = desc;
    status = uct_ugni_udt_ep_any_post(self);
    if (UCS_OK != status) {
        /* We can't continue if we can't post the first receive */
        ucs_error("Failed to post wildcard request");
        goto clean_any_desc;
    }

    status = ucs_async_set_event_handler(self->super.super.worker->async->mode,
                                         ucs_async_pipe_rfd(&self->event_pipe),
                                         POLLIN,
                                         uct_ugni_proccess_datagram_pipe,
                                         self, self->super.super.worker->async);
                                 
    if (UCS_OK != status) {
        goto clean_cancel_desc;
    }

    pthread_mutex_init(&self->device_lock, NULL);
    pthread_cond_init(&self->device_condition, NULL);
    self->events_ready = 0;

    rc = pthread_create(&self->event_thread, NULL, uct_ugni_udt_device_thread, self);
    if(0 != rc) {
        goto clean_remove_event;
    }

    return UCS_OK;

 clean_remove_event:
    ucs_async_pipe_destroy(&self->event_pipe);
 clean_cancel_desc:
    uct_ugni_udt_clean_wildcard(self);
 clean_any_desc:
    ucs_mpool_put(self->desc_any);
 clean_ep:
    ugni_rc = GNI_EpDestroy(self->ep_any);
    if (GNI_RC_SUCCESS != ugni_rc) {
        ucs_warn("GNI_EpDestroy failed, Error status: %s %d",
                 gni_err_str[ugni_rc], ugni_rc);
    }
 clean_free_desc:
    ucs_mpool_cleanup(&self->free_desc, 1);
 clean_pipe:
    ucs_async_pipe_destroy(&self->event_pipe);
 exit:
    uct_ugni_cleanup_base_iface(&self->super);
    ucs_error("Failed to activate interface");
    return status;
}
Exemple #13
0
static UCS_CLASS_INIT_FUNC(uct_ugni_rdma_iface_t, uct_pd_h pd, uct_worker_h worker,
                           const char *dev_name, size_t rx_headroom,
                           const uct_iface_config_t *tl_config)
{
    uct_ugni_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_iface_config_t);
    ucs_status_t rc;

    pthread_mutex_lock(&uct_ugni_global_lock);

    UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, pd, worker, dev_name, &uct_ugni_iface_ops,
                              &config->super UCS_STATS_ARG(NULL));

    /* Setting initial configuration */
    self->config.fma_seg_size  = UCT_UGNI_MAX_FMA;
    self->config.rdma_max_size = UCT_UGNI_MAX_RDMA;

    rc = ucs_mpool_create("UGNI-DESC-ONLY", sizeof(uct_ugni_base_desc_t),
                          0,                            /* alignment offset */
                          UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                          128 ,                         /* grow */
                          config->mpool.max_bufs,       /* max buffers */
                          &self->super.super,           /* iface */
                          ucs_mpool_hugetlb_malloc,     /* allocation hooks */
                          ucs_mpool_hugetlb_free,       /* free hook */
                          uct_ugni_base_desc_init,      /* init func */
                          NULL , &self->free_desc);
    if (UCS_OK != rc) {
        ucs_error("Mpool creation failed");
        goto exit;
    }

    rc = ucs_mpool_create("UGNI-GET-DESC-ONLY", sizeof(uct_ugni_fetch_desc_t),
                          0,                            /* alignment offset */
                          UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                          128 ,                         /* grow */
                          config->mpool.max_bufs,       /* max buffers */
                          &self->super.super,           /* iface */
                          ucs_mpool_hugetlb_malloc,     /* allocation hooks */
                          ucs_mpool_hugetlb_free,       /* free hook */
                          uct_ugni_base_desc_init,      /* init func */
                          NULL , &self->free_desc_get);
    if (UCS_OK != rc) {
        ucs_error("Mpool creation failed");
        goto clean_desc;
    }

    rc = ucs_mpool_create("UGNI-DESC-BUFFER",
                          sizeof(uct_ugni_base_desc_t) +
                          self->config.fma_seg_size,
                          sizeof(uct_ugni_base_desc_t), /* alignment offset */
                          UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                          128 ,                         /* grow */
                          config->mpool.max_bufs,       /* max buffers */
                          &self->super.super,           /* iface */
                          ucs_mpool_hugetlb_malloc,     /* allocation hooks */
                          ucs_mpool_hugetlb_free,       /* free hook */
                          uct_ugni_base_desc_init,      /* init func */
                          NULL , &self->free_desc_buffer);
    if (UCS_OK != rc) {
        ucs_error("Mpool creation failed");
        goto clean_desc_get;
    }

    rc = uct_iface_mpool_create(&self->super.super.super,
                                sizeof(uct_ugni_fetch_desc_t) + 8,
                                sizeof(uct_ugni_fetch_desc_t),  /* alignment offset */
                                UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                                &config->mpool,               /* mpool config */
                                128 ,                         /* grow */
                                uct_ugni_base_desc_key_init,  /* memory/key init */
                                "UGNI-DESC-FAMO",             /* name */
                                &self->free_desc_famo);       /* mpool */
    if (UCS_OK != rc) {
        ucs_error("Mpool creation failed");
        goto clean_buffer;
    }

    rc = uct_iface_mpool_create(&self->super.super.super,
                                sizeof(uct_ugni_fetch_desc_t) +
                                self->config.fma_seg_size,
                                sizeof(uct_ugni_fetch_desc_t), /* alignment offset */
                                UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                                &config->mpool,               /* mpool config */
                                128 ,                         /* grow */
                                uct_ugni_base_desc_key_init,  /* memory/key init */
                                "UGNI-DESC-GET",              /* name */
                                &self->free_desc_get_buffer); /* mpool */
    if (UCS_OK != rc) {
        ucs_error("Mpool creation failed");
        goto clean_famo;
    }

    rc = ugni_activate_iface(&self->super);
    if (UCS_OK != rc) {
        ucs_error("Failed to activate the interface");
        goto clean_get_buffer;
    }

    /* TBD: eventually the uct_ugni_progress has to be moved to 
     * rdma layer so each ugni layer will have own progress */
    ucs_notifier_chain_add(&worker->progress_chain, uct_ugni_progress, self);
    pthread_mutex_unlock(&uct_ugni_global_lock);
    return UCS_OK;

clean_get_buffer:
    ucs_mpool_destroy(self->free_desc_get_buffer);
clean_famo:
    ucs_mpool_destroy(self->free_desc_famo);
clean_buffer:
    ucs_mpool_destroy(self->free_desc_buffer);
clean_desc_get:
    ucs_mpool_destroy(self->free_desc_get);
clean_desc:
    ucs_mpool_destroy(self->free_desc);
exit:
    ucs_error("Failed to activate interface");
    pthread_mutex_unlock(&uct_ugni_global_lock);
    return rc;
}
Exemple #14
0
/**
 * @param rx_headroom   Headroom requested by the user.
 * @param rx_priv_len   Length of transport private data to reserve (0 if unused)
 * @param rx_hdr_len    Length of transport network header.
 * @param mss           Maximal segment size (transport limit).
 */
UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md,
                    uct_worker_h worker, const char *dev_name, unsigned rx_headroom,
                    unsigned rx_priv_len, unsigned rx_hdr_len, unsigned tx_cq_len,
                    size_t mss, uct_ib_iface_config_t *config)
{
    uct_ib_device_t *dev = &ucs_derived_of(md, uct_ib_md_t)->dev;
    ucs_status_t status;
    uint8_t port_num;

    UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &ops->super, md, worker,
                              &config->super UCS_STATS_ARG(dev->stats));

    status = uct_ib_device_find_port(dev, dev_name, &port_num);
    if (status != UCS_OK) {
        goto err;
    }

    self->port_num                 = port_num;
    self->sl                       = config->sl;
    self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) +
                                     ucs_max(sizeof(uct_am_recv_desc_t) + rx_headroom,
                                             rx_priv_len + rx_hdr_len);
    self->config.rx_hdr_offset     = self->config.rx_payload_offset - rx_hdr_len;
    self->config.rx_headroom_offset= self->config.rx_payload_offset - rx_headroom;
    self->config.seg_size          = ucs_min(mss, config->super.max_bcopy);
    self->config.tx_max_poll       = config->tx.max_poll;
    self->config.rx_max_poll       = config->rx.max_poll;
    self->config.rx_max_batch      = ucs_min(config->rx.max_batch,
                                     config->rx.queue_len / 4);
    self->ops                      = ops;

    status = uct_ib_iface_init_pkey(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_iface_init_gid(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_iface_init_lmc(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    self->comp_channel = ibv_create_comp_channel(dev->ibv_context);
    if (self->comp_channel == NULL) {
        ucs_error("Failed to create completion channel: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_free_path_bits;
    }

    status = ucs_sys_fcntl_modfl(self->comp_channel->fd, O_NONBLOCK, 0);
    if (status != UCS_OK) {
        goto err_destroy_comp_channel;
    }

    /* TODO inline scatter for send SQ */
    self->send_cq = ibv_create_cq(dev->ibv_context, tx_cq_len,
                                  NULL, self->comp_channel, 0);
    if (self->send_cq == NULL) {
        ucs_error("Failed to create send cq: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_destroy_comp_channel;
    }

    if (config->rx.inl > 32 /*UCT_IB_MLX5_CQE64_MAX_INL*/) {
        ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "128", 1);
    }

    self->recv_cq = ibv_create_cq(dev->ibv_context, config->rx.queue_len,
                                  NULL, self->comp_channel, 0);
    ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "64", 1);

    if (self->recv_cq == NULL) {
        ucs_error("Failed to create recv cq: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_destroy_send_cq;
    }

    if (!uct_ib_device_is_port_ib(dev, self->port_num)) {
        ucs_error("Unsupported link layer");
        status = UCS_ERR_UNSUPPORTED;
        goto err_destroy_recv_cq;
    }

    /* Address scope and size */
    self->addr_scope = uct_ib_address_scope(self->gid.global.subnet_prefix);
    self->addr_size  = uct_ib_address_size(self->addr_scope);

    ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d",
              self->config.rx_headroom_offset, self->config.rx_payload_offset,
              self->config.rx_hdr_offset, self->config.seg_size);

    return UCS_OK;

err_destroy_recv_cq:
    ibv_destroy_cq(self->recv_cq);
err_destroy_send_cq:
    ibv_destroy_cq(self->send_cq);
err_destroy_comp_channel:
    ibv_destroy_comp_channel(self->comp_channel);
err_free_path_bits:
    ucs_free(self->path_bits);
err:
    return status;
}
Exemple #15
0
/**
 * @param rx_headroom   Headroom requested by the user.
 * @param rx_priv_len   Length of transport private data to reserve (0 if unused)
 * @param rx_hdr_len    Length of transport network header.
 * @param mss           Maximal segment size (transport limit).
 */
UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_iface_ops_t *ops, uct_pd_h pd,
                    uct_worker_h worker, const char *dev_name, unsigned rx_headroom,
                    unsigned rx_priv_len, unsigned rx_hdr_len, unsigned tx_cq_len,
                    size_t mss, uct_ib_iface_config_t *config)
{
    uct_ib_device_t *dev = &ucs_derived_of(pd, uct_ib_pd_t)->dev;
    ucs_status_t status;
    uint8_t port_num;

    UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, ops, pd, worker,
                              &config->super UCS_STATS_ARG(dev->stats));

    status = uct_ib_iface_find_port(dev, dev_name, &port_num);
    if (status != UCS_OK) {
        ucs_error("Failed to find port %s: %s", dev_name, ucs_status_string(status));
        goto err;
    }

    self->port_num                 = port_num;
    self->sl                       = config->sl;
    self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) +
                                     ucs_max(sizeof(uct_am_recv_desc_t) + rx_headroom,
                                             rx_priv_len + rx_hdr_len);
    self->config.rx_hdr_offset     = self->config.rx_payload_offset - rx_hdr_len;
    self->config.rx_headroom_offset= self->config.rx_payload_offset - rx_headroom;
    self->config.seg_size          = config->super.max_bcopy;

    status = uct_ib_iface_init_pkey(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_iface_init_gid(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_iface_init_lmc(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    /* TODO comp_channel */
    /* TODO inline scatter for send SQ */
    self->send_cq = ibv_create_cq(dev->ibv_context, tx_cq_len, NULL, NULL, 0);
    if (self->send_cq == NULL) {
        ucs_error("Failed to create send cq: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_free_path_bits;
    }

    if (config->rx.inl > 32 /*UCT_IB_MLX5_CQE64_MAX_INL*/) {
        ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "128", 1);
    }

    self->recv_cq = ibv_create_cq(dev->ibv_context, config->rx.queue_len,
                                  NULL, NULL, 0);
    ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "64", 1);

    if (self->recv_cq == NULL) {
        ucs_error("Failed to create recv cq: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_destroy_send_cq;
    }

    if (!uct_ib_device_is_port_ib(dev, self->port_num)) {
        ucs_error("Unsupported link layer");
        status = UCS_ERR_UNSUPPORTED;
        goto err_destroy_recv_cq;
    }

    ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d",
              self->config.rx_headroom_offset, self->config.rx_payload_offset,
              self->config.rx_hdr_offset, self->config.seg_size);

    return UCS_OK;

err_destroy_recv_cq:
    ibv_destroy_cq(self->recv_cq);
err_destroy_send_cq:
    ibv_destroy_cq(self->send_cq);
err_free_path_bits:
    ucs_free(self->path_bits);
err:
    return status;
}