static UCS_CLASS_INIT_FUNC(uct_self_iface_t, uct_md_h md, uct_worker_h worker, const uct_iface_params_t *params, const uct_iface_config_t *tl_config) { ucs_status_t status; uct_self_iface_config_t *self_config = 0; ucs_trace_func("Creating a loop-back transport self=%p rxh=%lu", self, params->rx_headroom); if (strcmp(params->dev_name, UCT_SELF_NAME) != 0) { ucs_error("No device was found: %s", params->dev_name); return UCS_ERR_NO_DEVICE; } UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_self_iface_ops, md, worker, tl_config UCS_STATS_ARG(params->stats_root) UCS_STATS_ARG(UCT_SELF_NAME)); self_config = ucs_derived_of(tl_config, uct_self_iface_config_t); self->id = ucs_generate_uuid((uintptr_t)self); self->rx_headroom = params->rx_headroom; self->data_length = self_config->super.max_bcopy; self->release_desc.cb = uct_self_iface_release_desc; /* create a memory pool for data transferred */ status = uct_iface_mpool_init(&self->super, &self->msg_desc_mp, sizeof(uct_recv_desc_t) + self->rx_headroom + self->data_length, sizeof(uct_recv_desc_t) + self->rx_headroom, UCS_SYS_CACHE_LINE_SIZE, &self_config->mp, 256, ucs_empty_function, "self_msg_desc"); if (UCS_OK != status) { ucs_error("Failed to create a memory pool for the loop-back transport"); goto err; } /* set the message descriptor for the loop-back */ self->msg_cur_desc = ucs_mpool_get(&self->msg_desc_mp); VALGRIND_MAKE_MEM_DEFINED(self->msg_cur_desc, sizeof(*(self->msg_cur_desc))); if (NULL == self->msg_cur_desc) { ucs_error("Failed to get the first descriptor in loop-back MP storage"); status = UCS_ERR_NO_RESOURCE; goto destroy_mpool; } ucs_debug("Created a loop-back iface. id=0x%lx, desc=%p, len=%u, tx_hdr=%lu", self->id, self->msg_cur_desc, self->data_length, self->rx_headroom); return UCS_OK; destroy_mpool: ucs_mpool_cleanup(&self->msg_desc_mp, 1); err: return status; }
static UCS_CLASS_INIT_FUNC(uct_knem_iface_t, uct_md_h md, uct_worker_h worker, const uct_iface_params_t *params, const uct_iface_config_t *tl_config) { ucs_assert(params->open_mode & UCT_IFACE_OPEN_MODE_DEVICE); UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_knem_iface_ops, md, worker, params, tl_config UCS_STATS_ARG(params->stats_root) UCS_STATS_ARG(UCT_KNEM_TL_NAME)); self->knem_md = (uct_knem_md_t *)md; uct_sm_get_max_iov(); /* to initialize ucs_get_max_iov static variable */ return UCS_OK; }
static UCS_CLASS_INIT_FUNC(uct_cma_iface_t, uct_pd_h pd, uct_worker_h worker, const char *dev_name, size_t rx_headroom, const uct_iface_config_t *tl_config) { UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_cma_iface_ops, pd, worker, tl_config UCS_STATS_ARG(NULL)); return UCS_OK; }
static UCS_CLASS_INIT_FUNC(uct_gdr_copy_iface_t, uct_md_h md, uct_worker_h worker, const uct_iface_params_t *params, const uct_iface_config_t *tl_config) { UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_gdr_copy_iface_ops, md, worker, params, tl_config UCS_STATS_ARG(params->stats_root) UCS_STATS_ARG(UCT_GDR_COPY_TL_NAME)); if (strncmp(params->mode.device.dev_name, UCT_CUDA_DEV_NAME, strlen(UCT_CUDA_DEV_NAME)) != 0) { ucs_error("No device was found: %s", params->mode.device.dev_name); return UCS_ERR_NO_DEVICE; } self->id = ucs_generate_uuid((uintptr_t)self); return UCS_OK; }
static UCS_F_ALWAYS_INLINE ucs_status_t ucp_eager_tagged_handler(void *arg, void *data, size_t length, unsigned am_flags, uint16_t flags, uint16_t hdr_len, uint16_t priv_length) { ucp_worker_h worker = arg; ucp_eager_hdr_t *eager_hdr = data; ucp_eager_first_hdr_t *eagerf_hdr; ucp_recv_desc_t *rdesc; ucp_request_t *req; ucs_status_t status; ucp_tag_t recv_tag; size_t recv_len; ucs_assert(length >= hdr_len); ucs_assert(flags & UCP_RECV_DESC_FLAG_EAGER); recv_tag = eager_hdr->super.tag; recv_len = length - hdr_len; req = ucp_tag_exp_search(&worker->tm, recv_tag); if (req != NULL) { ucp_eager_expected_handler(worker, req, data, recv_len, recv_tag, flags); if (flags & UCP_RECV_DESC_FLAG_EAGER_SYNC) { ucp_tag_eager_sync_send_ack(worker, data, flags); } if (flags & UCP_RECV_DESC_FLAG_EAGER_ONLY) { req->recv.tag.info.length = recv_len; status = ucp_request_recv_data_unpack(req, data + hdr_len, recv_len, 0, 1); ucp_request_complete_tag_recv(req, status); } else { eagerf_hdr = data; req->recv.tag.info.length = req->recv.tag.remaining = eagerf_hdr->total_len; status = ucp_tag_request_process_recv_data(req, data + hdr_len, recv_len, 0, 0); ucs_assert(status == UCS_INPROGRESS); ucp_tag_frag_list_process_queue(&worker->tm, req, eagerf_hdr->msg_id UCS_STATS_ARG(UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_EXP)); } status = UCS_OK; } else { status = ucp_recv_desc_init(worker, data, length, 0, am_flags, hdr_len, flags, priv_length, &rdesc); if (!UCS_STATUS_IS_ERR(status)) { ucp_tag_unexp_recv(&worker->tm, rdesc, recv_tag); } } return status; }
static void uct_ud_ep_reset(uct_ud_ep_t *ep) { ep->tx.psn = 1; /* TODO: configurable max window size */ ep->tx.max_psn = ep->tx.psn + UCT_UD_MAX_WINDOW; ep->tx.acked_psn = 0; ep->tx.pending.ops = UCT_UD_EP_OP_NONE; ucs_queue_head_init(&ep->tx.window); ep->rx.acked_psn = 0; ucs_frag_list_init(ep->tx.psn-1, &ep->rx.ooo_pkts, 0 /*TODO: ooo support */ UCS_STATS_ARG(ep->rx.stats)); }
static UCS_CLASS_INIT_FUNC(uct_cuda_iface_t, uct_pd_h pd, uct_worker_h worker, const char *dev_name, size_t rx_headroom, const uct_iface_config_t *tl_config) { UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_cuda_iface_ops, pd, worker, tl_config UCS_STATS_ARG(NULL)); if (strcmp(dev_name, UCT_CUDA_DEV_NAME) != 0) { ucs_error("No device was found: %s", dev_name); return UCS_ERR_NO_DEVICE; } return UCS_OK; }
static UCS_CLASS_INIT_FUNC(uct_ugni_smsg_iface_t, uct_md_h md, uct_worker_h worker, const uct_iface_params_t *params, const uct_iface_config_t *tl_config) { uct_ugni_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_iface_config_t); ucs_status_t status; gni_return_t ugni_rc; unsigned int bytes_per_mbox; gni_smsg_attr_t smsg_attr; pthread_mutex_lock(&uct_ugni_global_lock); UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, md, worker, params, &uct_ugni_smsg_iface_ops, &config->super UCS_STATS_ARG(NULL)); /* Setting initial configuration */ self->config.smsg_seg_size = 2048; self->config.rx_headroom = params->rx_headroom; self->config.smsg_max_retransmit = 16; self->config.smsg_max_credit = 8; self->smsg_id = 0; smsg_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT; smsg_attr.mbox_maxcredit = self->config.smsg_max_credit; smsg_attr.msg_maxsize = self->config.smsg_seg_size; ugni_rc = GNI_SmsgBufferSizeNeeded(&(smsg_attr), &bytes_per_mbox); self->bytes_per_mbox = ucs_align_up_pow2(bytes_per_mbox, ucs_get_page_size()); if (ugni_rc != GNI_RC_SUCCESS) { ucs_error("Smsg buffer size calculation failed"); status = UCS_ERR_INVALID_PARAM; goto exit; } status = ucs_mpool_init(&self->free_desc, 0, self->config.smsg_seg_size + sizeof(uct_ugni_smsg_desc_t), 0, UCS_SYS_CACHE_LINE_SIZE, /* alignment */ 128 , /* grow */ config->mpool.max_bufs, /* max buffers */ &uct_ugni_smsg_desc_mpool_ops, "UGNI-SMSG-DESC"); if (UCS_OK != status) { ucs_error("Desc Mpool creation failed"); goto exit; } status = ucs_mpool_init(&self->free_mbox, 0, self->bytes_per_mbox + sizeof(uct_ugni_smsg_mbox_t), sizeof(uct_ugni_smsg_mbox_t), UCS_SYS_CACHE_LINE_SIZE, /* alignment */ 128, /* grow */ config->mpool.max_bufs, /* max buffers */ &uct_ugni_smsg_mbox_mpool_ops, "UGNI-SMSG-MBOX"); if (UCS_OK != status) { ucs_error("Mbox Mpool creation failed"); goto clean_desc; } UCT_TL_IFACE_GET_TX_DESC(&self->super.super, &self->free_desc, self->user_desc, self->user_desc = NULL); status = ugni_smsg_activate_iface(self); if (UCS_OK != status) { ucs_error("Failed to activate the interface"); goto clean_mbox; } ugni_rc = GNI_SmsgSetMaxRetrans(self->super.nic_handle, self->config.smsg_max_retransmit); if (ugni_rc != GNI_RC_SUCCESS) { ucs_error("Smsg setting max retransmit count failed."); status = UCS_ERR_INVALID_PARAM; goto clean_iface; } /* TBD: eventually the uct_ugni_progress has to be moved to * udt layer so each ugni layer will have own progress */ uct_worker_progress_register(worker, uct_ugni_smsg_progress, self); pthread_mutex_unlock(&uct_ugni_global_lock); return UCS_OK; clean_iface: ugni_smsg_deactivate_iface(self); clean_desc: ucs_mpool_put(self->user_desc); ucs_mpool_cleanup(&self->free_desc, 1); clean_mbox: ucs_mpool_cleanup(&self->free_mbox, 1); exit: ucs_error("Failed to activate interface"); pthread_mutex_unlock(&uct_ugni_global_lock); return status; }
static UCS_CLASS_INIT_FUNC(uct_ugni_rdma_iface_t, uct_pd_h pd, uct_worker_h worker, const char *dev_name, size_t rx_headroom, const uct_iface_config_t *tl_config) { uct_ugni_rdma_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_rdma_iface_config_t); ucs_status_t status; pthread_mutex_lock(&uct_ugni_global_lock); UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, pd, worker, dev_name, &uct_ugni_rdma_iface_ops, &config->super UCS_STATS_ARG(NULL)); /* Setting initial configuration */ self->config.fma_seg_size = UCT_UGNI_MAX_FMA; self->config.rdma_max_size = UCT_UGNI_MAX_RDMA; status = ucs_mpool_init(&self->free_desc, 0, sizeof(uct_ugni_base_desc_t), 0, /* alignment offset */ UCS_SYS_CACHE_LINE_SIZE, /* alignment */ 128, /* grow */ config->mpool.max_bufs, /* max buffers */ &uct_ugni_rdma_desc_mpool_ops, "UGNI-DESC-ONLY"); if (UCS_OK != status) { ucs_error("Mpool creation failed"); goto exit; } status = ucs_mpool_init(&self->free_desc_get, 0, sizeof(uct_ugni_rdma_fetch_desc_t), 0, /* alignment offset */ UCS_SYS_CACHE_LINE_SIZE, /* alignment */ 128 , /* grow */ config->mpool.max_bufs, /* max buffers */ &uct_ugni_rdma_desc_mpool_ops, "UGNI-GET-DESC-ONLY"); if (UCS_OK != status) { ucs_error("Mpool creation failed"); goto clean_desc; } status = ucs_mpool_init(&self->free_desc_buffer, 0, sizeof(uct_ugni_base_desc_t) + self->config.fma_seg_size, sizeof(uct_ugni_base_desc_t), /* alignment offset */ UCS_SYS_CACHE_LINE_SIZE, /* alignment */ 128 , /* grow */ config->mpool.max_bufs, /* max buffers */ &uct_ugni_rdma_desc_mpool_ops, "UGNI-DESC-BUFFER"); if (UCS_OK != status) { ucs_error("Mpool creation failed"); goto clean_desc_get; } status = uct_iface_mpool_init(&self->super.super, &self->free_desc_famo, sizeof(uct_ugni_rdma_fetch_desc_t) + 8, sizeof(uct_ugni_rdma_fetch_desc_t),/* alignment offset */ UCS_SYS_CACHE_LINE_SIZE, /* alignment */ &config->mpool, /* mpool config */ 128 , /* grow */ uct_ugni_base_desc_key_init, /* memory/key init */ "UGNI-DESC-FAMO"); if (UCS_OK != status) { ucs_error("Mpool creation failed"); goto clean_buffer; } status = uct_iface_mpool_init(&self->super.super, &self->free_desc_get_buffer, sizeof(uct_ugni_rdma_fetch_desc_t) + self->config.fma_seg_size, sizeof(uct_ugni_rdma_fetch_desc_t), /* alignment offset */ UCS_SYS_CACHE_LINE_SIZE, /* alignment */ &config->mpool, /* mpool config */ 128 , /* grow */ uct_ugni_base_desc_key_init, /* memory/key init */ "UGNI-DESC-GET"); if (UCS_OK != status) { ucs_error("Mpool creation failed"); goto clean_famo; } status = ugni_activate_iface(&self->super); if (UCS_OK != status) { ucs_error("Failed to activate the interface"); goto clean_get_buffer; } if(GNI_DEVICE_ARIES == self->super.dev->type) { uct_ugni_rdma_iface_ops.ep_atomic_swap64 = uct_ugni_ep_atomic_swap64; uct_ugni_rdma_iface_ops.ep_atomic_add32 = uct_ugni_ep_atomic_add32; uct_ugni_rdma_iface_ops.ep_atomic_fadd32 = uct_ugni_ep_atomic_fadd32; uct_ugni_rdma_iface_ops.ep_atomic_cswap32 = uct_ugni_ep_atomic_cswap32; uct_ugni_rdma_iface_ops.ep_atomic_swap32 = uct_ugni_ep_atomic_swap32; } /* TBD: eventually the uct_ugni_progress has to be moved to * rdma layer so each ugni layer will have own progress */ uct_worker_progress_register(worker, uct_ugni_progress, self); pthread_mutex_unlock(&uct_ugni_global_lock); return UCS_OK; clean_get_buffer: ucs_mpool_cleanup(&self->free_desc_get_buffer, 1); clean_famo: ucs_mpool_cleanup(&self->free_desc_famo, 1); clean_buffer: ucs_mpool_cleanup(&self->free_desc_buffer, 1); clean_desc_get: ucs_mpool_cleanup(&self->free_desc_get, 1); clean_desc: ucs_mpool_cleanup(&self->free_desc, 1); exit: ucs_error("Failed to activate interface"); pthread_mutex_unlock(&uct_ugni_global_lock); return status; }
static UCS_F_ALWAYS_INLINE void ucp_tag_recv_common(ucp_worker_h worker, void *buffer, size_t count, uintptr_t datatype, ucp_tag_t tag, ucp_tag_t tag_mask, ucp_request_t *req, uint32_t req_flags, ucp_tag_recv_callback_t cb, ucp_recv_desc_t *rdesc, const char *debug_name) { unsigned common_flags = UCP_REQUEST_FLAG_RECV | UCP_REQUEST_FLAG_EXPECTED; ucp_eager_first_hdr_t *eagerf_hdr; ucp_request_queue_t *req_queue; uct_memory_type_t mem_type; size_t hdr_len, recv_len; ucs_status_t status; uint64_t msg_id; ucp_trace_req(req, "%s buffer %p dt 0x%lx count %zu tag %"PRIx64"/%"PRIx64, debug_name, buffer, datatype, count, tag, tag_mask); /* First, check the fast path case - single fragment * in this case avoid initializing most of request fields * */ if (ucs_likely((rdesc != NULL) && (rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_ONLY))) { UCS_PROFILE_REQUEST_EVENT(req, "eager_only_match", 0); UCP_WORKER_STAT_EAGER_MSG(worker, rdesc->flags); UCP_WORKER_STAT_EAGER_CHUNK(worker, UNEXP); if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_SYNC)) { ucp_tag_eager_sync_send_ack(worker, rdesc + 1, rdesc->flags); } req->flags = UCP_REQUEST_FLAG_RECV | req_flags; hdr_len = rdesc->payload_offset; recv_len = rdesc->length - hdr_len; req->recv.tag.info.sender_tag = ucp_rdesc_get_tag(rdesc); req->recv.tag.info.length = recv_len; ucp_memory_type_detect_mds(worker->context, buffer, recv_len, &mem_type); status = ucp_dt_unpack_only(worker, buffer, count, datatype, mem_type, (void*)(rdesc + 1) + hdr_len, recv_len, 1); ucp_recv_desc_release(rdesc); if (req_flags & UCP_REQUEST_FLAG_CALLBACK) { cb(req + 1, status, &req->recv.tag.info); } ucp_tag_recv_request_completed(req, status, &req->recv.tag.info, debug_name); return; } /* Initialize receive request */ req->status = UCS_OK; req->recv.worker = worker; req->recv.buffer = buffer; req->recv.datatype = datatype; ucp_dt_recv_state_init(&req->recv.state, buffer, datatype, count); if (!UCP_DT_IS_CONTIG(datatype)) { common_flags |= UCP_REQUEST_FLAG_BLOCK_OFFLOAD; } req->flags = common_flags | req_flags; req->recv.length = ucp_dt_length(datatype, count, buffer, &req->recv.state); ucp_memory_type_detect_mds(worker->context, buffer, req->recv.length, &mem_type); req->recv.mem_type = mem_type; req->recv.tag.tag = tag; req->recv.tag.tag_mask = tag_mask; req->recv.tag.cb = cb; if (ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_REQ)) { req->recv.tag.info.sender_tag = 0; } if (ucs_unlikely(rdesc == NULL)) { /* If not found on unexpected, wait until it arrives. * If was found but need this receive request for later completion, save it */ req_queue = ucp_tag_exp_get_queue(&worker->tm, tag, tag_mask); /* If offload supported, post this tag to transport as well. * TODO: need to distinguish the cases when posting is not needed. */ ucp_tag_offload_try_post(worker, req, req_queue); ucp_tag_exp_push(&worker->tm, req_queue, req); ucs_trace_req("%s returning expected request %p (%p)", debug_name, req, req + 1); return; } /* Check rendezvous case */ if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_RNDV)) { ucp_rndv_matched(worker, req, (void*)(rdesc + 1)); UCP_WORKER_STAT_RNDV(worker, UNEXP); ucp_recv_desc_release(rdesc); return; } if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_SYNC)) { ucp_tag_eager_sync_send_ack(worker, rdesc + 1, rdesc->flags); } UCP_WORKER_STAT_EAGER_MSG(worker, rdesc->flags); ucs_assert(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER); eagerf_hdr = (void*)(rdesc + 1); req->recv.tag.info.sender_tag = ucp_rdesc_get_tag(rdesc); req->recv.tag.info.length = req->recv.tag.remaining = eagerf_hdr->total_len; /* process first fragment */ UCP_WORKER_STAT_EAGER_CHUNK(worker, UNEXP); msg_id = eagerf_hdr->msg_id; status = ucp_tag_recv_request_process_rdesc(req, rdesc, 0); ucs_assert(status == UCS_INPROGRESS); /* process additional fragments */ ucp_tag_frag_list_process_queue(&worker->tm, req, msg_id UCS_STATS_ARG(UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_UNEXP)); }
/** * @param rx_headroom Headroom requested by the user. * @param rx_priv_len Length of transport private data to reserve (0 if unused) * @param rx_hdr_len Length of transport network header. * @param mss Maximal segment size (transport limit). */ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md, uct_worker_h worker, const uct_iface_params_t *params, unsigned rx_priv_len, unsigned rx_hdr_len, unsigned tx_cq_len, size_t mss, const uct_ib_iface_config_t *config) { uct_ib_device_t *dev = &ucs_derived_of(md, uct_ib_md_t)->dev; ucs_status_t status; uint8_t port_num; UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &ops->super, md, worker, &config->super UCS_STATS_ARG(dev->stats)); status = uct_ib_device_find_port(dev, params->dev_name, &port_num); if (status != UCS_OK) { goto err; } self->ops = ops; self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) + ucs_max(sizeof(uct_am_recv_desc_t) + params->rx_headroom, rx_priv_len + rx_hdr_len); self->config.rx_hdr_offset = self->config.rx_payload_offset - rx_hdr_len; self->config.rx_headroom_offset= self->config.rx_payload_offset - params->rx_headroom; self->config.seg_size = ucs_min(mss, config->super.max_bcopy); self->config.tx_max_poll = config->tx.max_poll; self->config.rx_max_poll = config->rx.max_poll; self->config.rx_max_batch = ucs_min(config->rx.max_batch, config->rx.queue_len / 4); self->config.port_num = port_num; self->config.sl = config->sl; self->config.gid_index = config->gid_index; status = uct_ib_iface_init_pkey(self, config); if (status != UCS_OK) { goto err; } status = uct_ib_device_query_gid(dev, self->config.port_num, self->config.gid_index, &self->gid); if (status != UCS_OK) { goto err; } status = uct_ib_iface_init_lmc(self, config); if (status != UCS_OK) { goto err; } self->comp_channel = ibv_create_comp_channel(dev->ibv_context); if (self->comp_channel == NULL) { ucs_error("ibv_create_comp_channel() failed: %m"); status = UCS_ERR_IO_ERROR; goto err_free_path_bits; } status = ucs_sys_fcntl_modfl(self->comp_channel->fd, O_NONBLOCK, 0); if (status != UCS_OK) { goto err_destroy_comp_channel; } status = uct_ib_iface_create_cq(self, tx_cq_len, 0, &self->send_cq); if (status != UCS_OK) { goto err_destroy_comp_channel; } status = uct_ib_iface_create_cq(self, config->rx.queue_len, config->rx.inl, &self->recv_cq); if (status != UCS_OK) { goto err_destroy_send_cq; } /* Address scope and size */ if (config->addr_type == UCT_IB_IFACE_ADDRESS_TYPE_AUTO) { if (IBV_PORT_IS_LINK_LAYER_ETHERNET(uct_ib_iface_port_attr(self))) { self->addr_type = UCT_IB_ADDRESS_TYPE_ETH; } else { self->addr_type = uct_ib_address_scope(self->gid.global.subnet_prefix); } } else { ucs_assert(config->addr_type < UCT_IB_ADDRESS_TYPE_LAST); self->addr_type = config->addr_type; } self->addr_size = uct_ib_address_size(self->addr_type); ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d", self->config.rx_headroom_offset, self->config.rx_payload_offset, self->config.rx_hdr_offset, self->config.seg_size); return UCS_OK; err_destroy_send_cq: ibv_destroy_cq(self->send_cq); err_destroy_comp_channel: ibv_destroy_comp_channel(self->comp_channel); err_free_path_bits: ucs_free(self->path_bits); err: return status; }
static UCS_CLASS_INIT_FUNC(uct_ugni_udt_iface_t, uct_md_h md, uct_worker_h worker, const uct_iface_params_t *params, const uct_iface_config_t *tl_config) { uct_ugni_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_iface_config_t); ucs_status_t status; uct_ugni_udt_desc_t *desc; gni_return_t ugni_rc; int rc; UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, md, worker, params, &uct_ugni_udt_iface_ops, &config->super UCS_STATS_ARG(NULL)); /* Setting initial configuration */ self->config.udt_seg_size = GNI_DATAGRAM_MAXSIZE; self->config.rx_headroom = params->rx_headroom; self->release_desc.cb = uct_ugni_udt_iface_release_desc; status = ucs_async_pipe_create(&self->event_pipe); if (UCS_OK != status) { ucs_error("Pipe creation failed"); goto exit; } status = ucs_mpool_init(&self->free_desc, 0, uct_ugni_udt_get_diff(self) + self->config.udt_seg_size * 2, uct_ugni_udt_get_diff(self), UCS_SYS_CACHE_LINE_SIZE, /* alignment */ 128, /* grow */ config->mpool.max_bufs, /* max buffers */ &uct_ugni_udt_desc_mpool_ops, "UGNI-UDT-DESC"); if (UCS_OK != status) { ucs_error("Mpool creation failed"); goto clean_pipe; } ugni_rc = GNI_EpCreate(uct_ugni_udt_iface_nic_handle(self), NULL, &self->ep_any); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_EpCreate failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); status = UCS_ERR_NO_DEVICE; goto clean_free_desc; } UCT_TL_IFACE_GET_TX_DESC(&self->super.super, &self->free_desc, desc, goto clean_ep); ucs_debug("First wildcard desc is %p", desc); /* Init any desc */ self->desc_any = desc; status = uct_ugni_udt_ep_any_post(self); if (UCS_OK != status) { /* We can't continue if we can't post the first receive */ ucs_error("Failed to post wildcard request"); goto clean_any_desc; } status = ucs_async_set_event_handler(self->super.super.worker->async->mode, ucs_async_pipe_rfd(&self->event_pipe), POLLIN, uct_ugni_proccess_datagram_pipe, self, self->super.super.worker->async); if (UCS_OK != status) { goto clean_cancel_desc; } pthread_mutex_init(&self->device_lock, NULL); pthread_cond_init(&self->device_condition, NULL); self->events_ready = 0; rc = pthread_create(&self->event_thread, NULL, uct_ugni_udt_device_thread, self); if(0 != rc) { goto clean_remove_event; } return UCS_OK; clean_remove_event: ucs_async_pipe_destroy(&self->event_pipe); clean_cancel_desc: uct_ugni_udt_clean_wildcard(self); clean_any_desc: ucs_mpool_put(self->desc_any); clean_ep: ugni_rc = GNI_EpDestroy(self->ep_any); if (GNI_RC_SUCCESS != ugni_rc) { ucs_warn("GNI_EpDestroy failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); } clean_free_desc: ucs_mpool_cleanup(&self->free_desc, 1); clean_pipe: ucs_async_pipe_destroy(&self->event_pipe); exit: uct_ugni_cleanup_base_iface(&self->super); ucs_error("Failed to activate interface"); return status; }
static UCS_CLASS_INIT_FUNC(uct_ugni_rdma_iface_t, uct_pd_h pd, uct_worker_h worker, const char *dev_name, size_t rx_headroom, const uct_iface_config_t *tl_config) { uct_ugni_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_iface_config_t); ucs_status_t rc; pthread_mutex_lock(&uct_ugni_global_lock); UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, pd, worker, dev_name, &uct_ugni_iface_ops, &config->super UCS_STATS_ARG(NULL)); /* Setting initial configuration */ self->config.fma_seg_size = UCT_UGNI_MAX_FMA; self->config.rdma_max_size = UCT_UGNI_MAX_RDMA; rc = ucs_mpool_create("UGNI-DESC-ONLY", sizeof(uct_ugni_base_desc_t), 0, /* alignment offset */ UCS_SYS_CACHE_LINE_SIZE, /* alignment */ 128 , /* grow */ config->mpool.max_bufs, /* max buffers */ &self->super.super, /* iface */ ucs_mpool_hugetlb_malloc, /* allocation hooks */ ucs_mpool_hugetlb_free, /* free hook */ uct_ugni_base_desc_init, /* init func */ NULL , &self->free_desc); if (UCS_OK != rc) { ucs_error("Mpool creation failed"); goto exit; } rc = ucs_mpool_create("UGNI-GET-DESC-ONLY", sizeof(uct_ugni_fetch_desc_t), 0, /* alignment offset */ UCS_SYS_CACHE_LINE_SIZE, /* alignment */ 128 , /* grow */ config->mpool.max_bufs, /* max buffers */ &self->super.super, /* iface */ ucs_mpool_hugetlb_malloc, /* allocation hooks */ ucs_mpool_hugetlb_free, /* free hook */ uct_ugni_base_desc_init, /* init func */ NULL , &self->free_desc_get); if (UCS_OK != rc) { ucs_error("Mpool creation failed"); goto clean_desc; } rc = ucs_mpool_create("UGNI-DESC-BUFFER", sizeof(uct_ugni_base_desc_t) + self->config.fma_seg_size, sizeof(uct_ugni_base_desc_t), /* alignment offset */ UCS_SYS_CACHE_LINE_SIZE, /* alignment */ 128 , /* grow */ config->mpool.max_bufs, /* max buffers */ &self->super.super, /* iface */ ucs_mpool_hugetlb_malloc, /* allocation hooks */ ucs_mpool_hugetlb_free, /* free hook */ uct_ugni_base_desc_init, /* init func */ NULL , &self->free_desc_buffer); if (UCS_OK != rc) { ucs_error("Mpool creation failed"); goto clean_desc_get; } rc = uct_iface_mpool_create(&self->super.super.super, sizeof(uct_ugni_fetch_desc_t) + 8, sizeof(uct_ugni_fetch_desc_t), /* alignment offset */ UCS_SYS_CACHE_LINE_SIZE, /* alignment */ &config->mpool, /* mpool config */ 128 , /* grow */ uct_ugni_base_desc_key_init, /* memory/key init */ "UGNI-DESC-FAMO", /* name */ &self->free_desc_famo); /* mpool */ if (UCS_OK != rc) { ucs_error("Mpool creation failed"); goto clean_buffer; } rc = uct_iface_mpool_create(&self->super.super.super, sizeof(uct_ugni_fetch_desc_t) + self->config.fma_seg_size, sizeof(uct_ugni_fetch_desc_t), /* alignment offset */ UCS_SYS_CACHE_LINE_SIZE, /* alignment */ &config->mpool, /* mpool config */ 128 , /* grow */ uct_ugni_base_desc_key_init, /* memory/key init */ "UGNI-DESC-GET", /* name */ &self->free_desc_get_buffer); /* mpool */ if (UCS_OK != rc) { ucs_error("Mpool creation failed"); goto clean_famo; } rc = ugni_activate_iface(&self->super); if (UCS_OK != rc) { ucs_error("Failed to activate the interface"); goto clean_get_buffer; } /* TBD: eventually the uct_ugni_progress has to be moved to * rdma layer so each ugni layer will have own progress */ ucs_notifier_chain_add(&worker->progress_chain, uct_ugni_progress, self); pthread_mutex_unlock(&uct_ugni_global_lock); return UCS_OK; clean_get_buffer: ucs_mpool_destroy(self->free_desc_get_buffer); clean_famo: ucs_mpool_destroy(self->free_desc_famo); clean_buffer: ucs_mpool_destroy(self->free_desc_buffer); clean_desc_get: ucs_mpool_destroy(self->free_desc_get); clean_desc: ucs_mpool_destroy(self->free_desc); exit: ucs_error("Failed to activate interface"); pthread_mutex_unlock(&uct_ugni_global_lock); return rc; }
/** * @param rx_headroom Headroom requested by the user. * @param rx_priv_len Length of transport private data to reserve (0 if unused) * @param rx_hdr_len Length of transport network header. * @param mss Maximal segment size (transport limit). */ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md, uct_worker_h worker, const char *dev_name, unsigned rx_headroom, unsigned rx_priv_len, unsigned rx_hdr_len, unsigned tx_cq_len, size_t mss, uct_ib_iface_config_t *config) { uct_ib_device_t *dev = &ucs_derived_of(md, uct_ib_md_t)->dev; ucs_status_t status; uint8_t port_num; UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &ops->super, md, worker, &config->super UCS_STATS_ARG(dev->stats)); status = uct_ib_device_find_port(dev, dev_name, &port_num); if (status != UCS_OK) { goto err; } self->port_num = port_num; self->sl = config->sl; self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) + ucs_max(sizeof(uct_am_recv_desc_t) + rx_headroom, rx_priv_len + rx_hdr_len); self->config.rx_hdr_offset = self->config.rx_payload_offset - rx_hdr_len; self->config.rx_headroom_offset= self->config.rx_payload_offset - rx_headroom; self->config.seg_size = ucs_min(mss, config->super.max_bcopy); self->config.tx_max_poll = config->tx.max_poll; self->config.rx_max_poll = config->rx.max_poll; self->config.rx_max_batch = ucs_min(config->rx.max_batch, config->rx.queue_len / 4); self->ops = ops; status = uct_ib_iface_init_pkey(self, config); if (status != UCS_OK) { goto err; } status = uct_ib_iface_init_gid(self, config); if (status != UCS_OK) { goto err; } status = uct_ib_iface_init_lmc(self, config); if (status != UCS_OK) { goto err; } self->comp_channel = ibv_create_comp_channel(dev->ibv_context); if (self->comp_channel == NULL) { ucs_error("Failed to create completion channel: %m"); status = UCS_ERR_IO_ERROR; goto err_free_path_bits; } status = ucs_sys_fcntl_modfl(self->comp_channel->fd, O_NONBLOCK, 0); if (status != UCS_OK) { goto err_destroy_comp_channel; } /* TODO inline scatter for send SQ */ self->send_cq = ibv_create_cq(dev->ibv_context, tx_cq_len, NULL, self->comp_channel, 0); if (self->send_cq == NULL) { ucs_error("Failed to create send cq: %m"); status = UCS_ERR_IO_ERROR; goto err_destroy_comp_channel; } if (config->rx.inl > 32 /*UCT_IB_MLX5_CQE64_MAX_INL*/) { ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "128", 1); } self->recv_cq = ibv_create_cq(dev->ibv_context, config->rx.queue_len, NULL, self->comp_channel, 0); ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "64", 1); if (self->recv_cq == NULL) { ucs_error("Failed to create recv cq: %m"); status = UCS_ERR_IO_ERROR; goto err_destroy_send_cq; } if (!uct_ib_device_is_port_ib(dev, self->port_num)) { ucs_error("Unsupported link layer"); status = UCS_ERR_UNSUPPORTED; goto err_destroy_recv_cq; } /* Address scope and size */ self->addr_scope = uct_ib_address_scope(self->gid.global.subnet_prefix); self->addr_size = uct_ib_address_size(self->addr_scope); ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d", self->config.rx_headroom_offset, self->config.rx_payload_offset, self->config.rx_hdr_offset, self->config.seg_size); return UCS_OK; err_destroy_recv_cq: ibv_destroy_cq(self->recv_cq); err_destroy_send_cq: ibv_destroy_cq(self->send_cq); err_destroy_comp_channel: ibv_destroy_comp_channel(self->comp_channel); err_free_path_bits: ucs_free(self->path_bits); err: return status; }
/** * @param rx_headroom Headroom requested by the user. * @param rx_priv_len Length of transport private data to reserve (0 if unused) * @param rx_hdr_len Length of transport network header. * @param mss Maximal segment size (transport limit). */ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_iface_ops_t *ops, uct_pd_h pd, uct_worker_h worker, const char *dev_name, unsigned rx_headroom, unsigned rx_priv_len, unsigned rx_hdr_len, unsigned tx_cq_len, size_t mss, uct_ib_iface_config_t *config) { uct_ib_device_t *dev = &ucs_derived_of(pd, uct_ib_pd_t)->dev; ucs_status_t status; uint8_t port_num; UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, ops, pd, worker, &config->super UCS_STATS_ARG(dev->stats)); status = uct_ib_iface_find_port(dev, dev_name, &port_num); if (status != UCS_OK) { ucs_error("Failed to find port %s: %s", dev_name, ucs_status_string(status)); goto err; } self->port_num = port_num; self->sl = config->sl; self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) + ucs_max(sizeof(uct_am_recv_desc_t) + rx_headroom, rx_priv_len + rx_hdr_len); self->config.rx_hdr_offset = self->config.rx_payload_offset - rx_hdr_len; self->config.rx_headroom_offset= self->config.rx_payload_offset - rx_headroom; self->config.seg_size = config->super.max_bcopy; status = uct_ib_iface_init_pkey(self, config); if (status != UCS_OK) { goto err; } status = uct_ib_iface_init_gid(self, config); if (status != UCS_OK) { goto err; } status = uct_ib_iface_init_lmc(self, config); if (status != UCS_OK) { goto err; } /* TODO comp_channel */ /* TODO inline scatter for send SQ */ self->send_cq = ibv_create_cq(dev->ibv_context, tx_cq_len, NULL, NULL, 0); if (self->send_cq == NULL) { ucs_error("Failed to create send cq: %m"); status = UCS_ERR_IO_ERROR; goto err_free_path_bits; } if (config->rx.inl > 32 /*UCT_IB_MLX5_CQE64_MAX_INL*/) { ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "128", 1); } self->recv_cq = ibv_create_cq(dev->ibv_context, config->rx.queue_len, NULL, NULL, 0); ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "64", 1); if (self->recv_cq == NULL) { ucs_error("Failed to create recv cq: %m"); status = UCS_ERR_IO_ERROR; goto err_destroy_send_cq; } if (!uct_ib_device_is_port_ib(dev, self->port_num)) { ucs_error("Unsupported link layer"); status = UCS_ERR_UNSUPPORTED; goto err_destroy_recv_cq; } ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d", self->config.rx_headroom_offset, self->config.rx_payload_offset, self->config.rx_hdr_offset, self->config.seg_size); return UCS_OK; err_destroy_recv_cq: ibv_destroy_cq(self->recv_cq); err_destroy_send_cq: ibv_destroy_cq(self->send_cq); err_free_path_bits: ucs_free(self->path_bits); err: return status; }