static UCS_CLASS_INIT_FUNC(uct_tcp_ep_t, uct_tcp_iface_t *iface, int fd, const struct sockaddr_in *dest_addr) { ucs_status_t status; UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super) self->buf = ucs_malloc(iface->config.buf_size, "tcp_buf"); if (self->buf == NULL) { return UCS_ERR_NO_MEMORY; } self->events = 0; self->offset = 0; self->length = 0; ucs_queue_head_init(&self->pending_q); if (fd == -1) { status = ucs_tcpip_socket_create(&self->fd); if (status != UCS_OK) { goto err; } /* TODO use non-blocking connect */ status = uct_tcp_socket_connect(self->fd, dest_addr); if (status != UCS_OK) { goto err_close; } } else { self->fd = fd; } status = ucs_sys_fcntl_modfl(self->fd, O_NONBLOCK, 0); if (status != UCS_OK) { goto err_close; } status = uct_tcp_iface_set_sockopt(iface, self->fd); if (status != UCS_OK) { goto err_close; } uct_tcp_ep_epoll_ctl(self, EPOLL_CTL_ADD); UCS_ASYNC_BLOCK(iface->super.worker->async); ucs_list_add_tail(&iface->ep_list, &self->list); UCS_ASYNC_UNBLOCK(iface->super.worker->async); ucs_debug("tcp_ep %p: created on iface %p, fd %d", self, iface, self->fd); return UCS_OK; err_close: close(self->fd); err: return status; }
/** * @param rx_headroom Headroom requested by the user. * @param rx_priv_len Length of transport private data to reserve (0 if unused) * @param rx_hdr_len Length of transport network header. * @param mss Maximal segment size (transport limit). */ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md, uct_worker_h worker, const uct_iface_params_t *params, unsigned rx_priv_len, unsigned rx_hdr_len, unsigned tx_cq_len, size_t mss, const uct_ib_iface_config_t *config) { uct_ib_device_t *dev = &ucs_derived_of(md, uct_ib_md_t)->dev; ucs_status_t status; uint8_t port_num; UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &ops->super, md, worker, &config->super UCS_STATS_ARG(dev->stats)); status = uct_ib_device_find_port(dev, params->dev_name, &port_num); if (status != UCS_OK) { goto err; } self->ops = ops; self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) + ucs_max(sizeof(uct_am_recv_desc_t) + params->rx_headroom, rx_priv_len + rx_hdr_len); self->config.rx_hdr_offset = self->config.rx_payload_offset - rx_hdr_len; self->config.rx_headroom_offset= self->config.rx_payload_offset - params->rx_headroom; self->config.seg_size = ucs_min(mss, config->super.max_bcopy); self->config.tx_max_poll = config->tx.max_poll; self->config.rx_max_poll = config->rx.max_poll; self->config.rx_max_batch = ucs_min(config->rx.max_batch, config->rx.queue_len / 4); self->config.port_num = port_num; self->config.sl = config->sl; self->config.gid_index = config->gid_index; status = uct_ib_iface_init_pkey(self, config); if (status != UCS_OK) { goto err; } status = uct_ib_device_query_gid(dev, self->config.port_num, self->config.gid_index, &self->gid); if (status != UCS_OK) { goto err; } status = uct_ib_iface_init_lmc(self, config); if (status != UCS_OK) { goto err; } self->comp_channel = ibv_create_comp_channel(dev->ibv_context); if (self->comp_channel == NULL) { ucs_error("ibv_create_comp_channel() failed: %m"); status = UCS_ERR_IO_ERROR; goto err_free_path_bits; } status = ucs_sys_fcntl_modfl(self->comp_channel->fd, O_NONBLOCK, 0); if (status != UCS_OK) { goto err_destroy_comp_channel; } status = uct_ib_iface_create_cq(self, tx_cq_len, 0, &self->send_cq); if (status != UCS_OK) { goto err_destroy_comp_channel; } status = uct_ib_iface_create_cq(self, config->rx.queue_len, config->rx.inl, &self->recv_cq); if (status != UCS_OK) { goto err_destroy_send_cq; } /* Address scope and size */ if (config->addr_type == UCT_IB_IFACE_ADDRESS_TYPE_AUTO) { if (IBV_PORT_IS_LINK_LAYER_ETHERNET(uct_ib_iface_port_attr(self))) { self->addr_type = UCT_IB_ADDRESS_TYPE_ETH; } else { self->addr_type = uct_ib_address_scope(self->gid.global.subnet_prefix); } } else { ucs_assert(config->addr_type < UCT_IB_ADDRESS_TYPE_LAST); self->addr_type = config->addr_type; } self->addr_size = uct_ib_address_size(self->addr_type); ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d", self->config.rx_headroom_offset, self->config.rx_payload_offset, self->config.rx_hdr_offset, self->config.seg_size); return UCS_OK; err_destroy_send_cq: ibv_destroy_cq(self->send_cq); err_destroy_comp_channel: ibv_destroy_comp_channel(self->comp_channel); err_free_path_bits: ucs_free(self->path_bits); err: return status; }
static UCS_CLASS_INIT_FUNC(uct_cm_iface_t, uct_pd_h pd, uct_worker_h worker, const char *dev_name, size_t rx_headroom, const uct_iface_config_t *tl_config) { uct_cm_iface_config_t *config = ucs_derived_of(tl_config, uct_cm_iface_config_t); ucs_status_t status; int ret; ucs_trace_func(""); UCS_CLASS_CALL_SUPER_INIT(uct_ib_iface_t, &uct_cm_iface_ops, pd, worker, dev_name, rx_headroom, 0 /* rx_priv_len */, 0 /* rx_hdr_len */, 1 /* tx_cq_len */, IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE, /* mss */ &config->super); if (worker->async == NULL) { ucs_error("cm must have async!=NULL"); return UCS_ERR_INVALID_PARAM; } self->service_id = (uint32_t)(ucs_generate_uuid((uintptr_t)self) & (~IB_CM_ASSIGN_SERVICE_ID_MASK)); self->num_outstanding = 0; self->config.timeout_ms = (int)(config->timeout * 1e3 + 0.5); self->config.max_outstanding = config->max_outstanding; self->config.retry_count = ucs_min(config->retry_count, UINT8_MAX); self->notify_q.head = NULL; ucs_queue_head_init(&self->notify_q); self->outstanding = ucs_calloc(self->config.max_outstanding, sizeof(*self->outstanding), "cm_outstanding"); if (self->outstanding == NULL) { status = UCS_ERR_NO_MEMORY; goto err; } self->cmdev = ib_cm_open_device(uct_ib_iface_device(&self->super)->ibv_context); if (self->cmdev == NULL) { ucs_error("ib_cm_open_device() failed: %m. Check if ib_ucm.ko module is loaded."); status = UCS_ERR_NO_DEVICE; goto err_free_outstanding; } status = ucs_sys_fcntl_modfl(self->cmdev->fd, O_NONBLOCK, 0); if (status != UCS_OK) { goto err_close_device; } ret = ib_cm_create_id(self->cmdev, &self->listen_id, self); if (ret) { ucs_error("ib_cm_create_id() failed: %m"); status = UCS_ERR_NO_DEVICE; goto err_close_device; } ret = ib_cm_listen(self->listen_id, self->service_id, 0); if (ret) { ucs_error("ib_cm_listen() failed: %m"); status = UCS_ERR_INVALID_ADDR; goto err_destroy_id; } if (config->async_mode == UCS_ASYNC_MODE_SIGNAL) { ucs_warn("ib_cm fd does not support SIGIO"); } status = ucs_async_set_event_handler(config->async_mode, self->cmdev->fd, POLLIN, uct_cm_iface_event_handler, self, worker->async); if (status != UCS_OK) { ucs_error("failed to set event handler"); goto err_destroy_id; } ucs_debug("listening for SIDR service_id 0x%x on fd %d", self->service_id, self->cmdev->fd); return UCS_OK; err_destroy_id: ib_cm_destroy_id(self->listen_id); err_close_device: ib_cm_close_device(self->cmdev); err_free_outstanding: ucs_free(self->outstanding); err: return status; }
/** * @param rx_headroom Headroom requested by the user. * @param rx_priv_len Length of transport private data to reserve (0 if unused) * @param rx_hdr_len Length of transport network header. * @param mss Maximal segment size (transport limit). */ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md, uct_worker_h worker, const char *dev_name, unsigned rx_headroom, unsigned rx_priv_len, unsigned rx_hdr_len, unsigned tx_cq_len, size_t mss, uct_ib_iface_config_t *config) { uct_ib_device_t *dev = &ucs_derived_of(md, uct_ib_md_t)->dev; ucs_status_t status; uint8_t port_num; UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &ops->super, md, worker, &config->super UCS_STATS_ARG(dev->stats)); status = uct_ib_device_find_port(dev, dev_name, &port_num); if (status != UCS_OK) { goto err; } self->port_num = port_num; self->sl = config->sl; self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) + ucs_max(sizeof(uct_am_recv_desc_t) + rx_headroom, rx_priv_len + rx_hdr_len); self->config.rx_hdr_offset = self->config.rx_payload_offset - rx_hdr_len; self->config.rx_headroom_offset= self->config.rx_payload_offset - rx_headroom; self->config.seg_size = ucs_min(mss, config->super.max_bcopy); self->config.tx_max_poll = config->tx.max_poll; self->config.rx_max_poll = config->rx.max_poll; self->config.rx_max_batch = ucs_min(config->rx.max_batch, config->rx.queue_len / 4); self->ops = ops; status = uct_ib_iface_init_pkey(self, config); if (status != UCS_OK) { goto err; } status = uct_ib_iface_init_gid(self, config); if (status != UCS_OK) { goto err; } status = uct_ib_iface_init_lmc(self, config); if (status != UCS_OK) { goto err; } self->comp_channel = ibv_create_comp_channel(dev->ibv_context); if (self->comp_channel == NULL) { ucs_error("Failed to create completion channel: %m"); status = UCS_ERR_IO_ERROR; goto err_free_path_bits; } status = ucs_sys_fcntl_modfl(self->comp_channel->fd, O_NONBLOCK, 0); if (status != UCS_OK) { goto err_destroy_comp_channel; } /* TODO inline scatter for send SQ */ self->send_cq = ibv_create_cq(dev->ibv_context, tx_cq_len, NULL, self->comp_channel, 0); if (self->send_cq == NULL) { ucs_error("Failed to create send cq: %m"); status = UCS_ERR_IO_ERROR; goto err_destroy_comp_channel; } if (config->rx.inl > 32 /*UCT_IB_MLX5_CQE64_MAX_INL*/) { ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "128", 1); } self->recv_cq = ibv_create_cq(dev->ibv_context, config->rx.queue_len, NULL, self->comp_channel, 0); ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "64", 1); if (self->recv_cq == NULL) { ucs_error("Failed to create recv cq: %m"); status = UCS_ERR_IO_ERROR; goto err_destroy_send_cq; } if (!uct_ib_device_is_port_ib(dev, self->port_num)) { ucs_error("Unsupported link layer"); status = UCS_ERR_UNSUPPORTED; goto err_destroy_recv_cq; } /* Address scope and size */ self->addr_scope = uct_ib_address_scope(self->gid.global.subnet_prefix); self->addr_size = uct_ib_address_size(self->addr_scope); ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d", self->config.rx_headroom_offset, self->config.rx_payload_offset, self->config.rx_hdr_offset, self->config.seg_size); return UCS_OK; err_destroy_recv_cq: ibv_destroy_cq(self->recv_cq); err_destroy_send_cq: ibv_destroy_cq(self->send_cq); err_destroy_comp_channel: ibv_destroy_comp_channel(self->comp_channel); err_free_path_bits: ucs_free(self->path_bits); err: return status; }