/** * @param rx_headroom Headroom requested by the user. * @param rx_priv_len Length of transport private data to reserve (0 if unused) * @param rx_hdr_len Length of transport network header. * @param mss Maximal segment size (transport limit). */ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md, uct_worker_h worker, const char *dev_name, unsigned rx_headroom, unsigned rx_priv_len, unsigned rx_hdr_len, unsigned tx_cq_len, size_t mss, uct_ib_iface_config_t *config) { uct_ib_device_t *dev = &ucs_derived_of(md, uct_ib_md_t)->dev; ucs_status_t status; uint8_t port_num; UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &ops->super, md, worker, &config->super UCS_STATS_ARG(dev->stats)); status = uct_ib_device_find_port(dev, dev_name, &port_num); if (status != UCS_OK) { goto err; } self->port_num = port_num; self->sl = config->sl; self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) + ucs_max(sizeof(uct_am_recv_desc_t) + rx_headroom, rx_priv_len + rx_hdr_len); self->config.rx_hdr_offset = self->config.rx_payload_offset - rx_hdr_len; self->config.rx_headroom_offset= self->config.rx_payload_offset - rx_headroom; self->config.seg_size = ucs_min(mss, config->super.max_bcopy); self->config.tx_max_poll = config->tx.max_poll; self->config.rx_max_poll = config->rx.max_poll; self->config.rx_max_batch = ucs_min(config->rx.max_batch, config->rx.queue_len / 4); self->ops = ops; status = uct_ib_iface_init_pkey(self, config); if (status != UCS_OK) { goto err; } status = uct_ib_iface_init_gid(self, config); if (status != UCS_OK) { goto err; } status = uct_ib_iface_init_lmc(self, config); if (status != UCS_OK) { goto err; } self->comp_channel = ibv_create_comp_channel(dev->ibv_context); if (self->comp_channel == NULL) { ucs_error("Failed to create completion channel: %m"); status = UCS_ERR_IO_ERROR; goto err_free_path_bits; } status = ucs_sys_fcntl_modfl(self->comp_channel->fd, O_NONBLOCK, 0); if (status != UCS_OK) { goto err_destroy_comp_channel; } /* TODO inline scatter for send SQ */ self->send_cq = ibv_create_cq(dev->ibv_context, tx_cq_len, NULL, self->comp_channel, 0); if (self->send_cq == NULL) { ucs_error("Failed to create send cq: %m"); status = UCS_ERR_IO_ERROR; goto err_destroy_comp_channel; } if (config->rx.inl > 32 /*UCT_IB_MLX5_CQE64_MAX_INL*/) { ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "128", 1); } self->recv_cq = ibv_create_cq(dev->ibv_context, config->rx.queue_len, NULL, self->comp_channel, 0); ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "64", 1); if (self->recv_cq == NULL) { ucs_error("Failed to create recv cq: %m"); status = UCS_ERR_IO_ERROR; goto err_destroy_send_cq; } if (!uct_ib_device_is_port_ib(dev, self->port_num)) { ucs_error("Unsupported link layer"); status = UCS_ERR_UNSUPPORTED; goto err_destroy_recv_cq; } /* Address scope and size */ self->addr_scope = uct_ib_address_scope(self->gid.global.subnet_prefix); self->addr_size = uct_ib_address_size(self->addr_scope); ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d", self->config.rx_headroom_offset, self->config.rx_payload_offset, self->config.rx_hdr_offset, self->config.seg_size); return UCS_OK; err_destroy_recv_cq: ibv_destroy_cq(self->recv_cq); err_destroy_send_cq: ibv_destroy_cq(self->send_cq); err_destroy_comp_channel: ibv_destroy_comp_channel(self->comp_channel); err_free_path_bits: ucs_free(self->path_bits); err: return status; }
ucs_status_t uct_ib_iface_query(uct_ib_iface_t *iface, size_t xport_hdr_len, uct_iface_attr_t *iface_attr) { uct_ib_device_t *dev = uct_ib_iface_device(iface); static const unsigned ib_port_widths[] = { [0] = 1, [1] = 4, [2] = 8, [3] = 12 }; uint8_t active_width, active_speed, active_mtu; double encoding, signal_rate, wire_speed; size_t mtu, width, extra_pkt_len; if (!uct_ib_device_is_port_ib(dev, iface->port_num)) { return UCS_ERR_UNSUPPORTED; } active_width = uct_ib_iface_port_attr(iface)->active_width; active_speed = uct_ib_iface_port_attr(iface)->active_speed; active_mtu = uct_ib_iface_port_attr(iface)->active_mtu; /* Get active width */ if (!ucs_is_pow2(active_width) || (active_width < 1) || (ucs_ilog2(active_width) > 3)) { ucs_error("Invalid active_width on %s:%d: %d", uct_ib_device_name(dev), iface->port_num, active_width); return UCS_ERR_IO_ERROR; } memset(iface_attr, 0, sizeof(*iface_attr)); iface_attr->device_addr_len = iface->addr_size; switch (active_speed) { case 1: /* SDR */ iface_attr->latency = 5000e-9; signal_rate = 2.5e9; encoding = 8.0/10.0; break; case 2: /* DDR */ iface_attr->latency = 2500e-9; signal_rate = 5.0e9; encoding = 8.0/10.0; break; case 4: /* QDR */ iface_attr->latency = 1300e-9; signal_rate = 10.0e9; encoding = 8.0/10.0; break; case 8: /* FDR10 */ iface_attr->latency = 700e-9; signal_rate = 10.3125e9; encoding = 64.0/66.0; break; case 16: /* FDR */ iface_attr->latency = 700e-9; signal_rate = 14.0625e9; encoding = 64.0/66.0; break; case 32: /* EDR */ iface_attr->latency = 600e-9; signal_rate = 25.78125e9; encoding = 64.0/66.0; break; default: ucs_error("Invalid active_speed on %s:%d: %d", uct_ib_device_name(dev), iface->port_num, active_speed); return UCS_ERR_IO_ERROR; } /* Wire speed calculation: Width * SignalRate * Encoding */ width = ib_port_widths[ucs_ilog2(active_width)]; wire_speed = (width * signal_rate * encoding) / 8.0; /* Calculate packet overhead */ mtu = ucs_min(uct_ib_mtu_value(active_mtu), iface->config.seg_size); extra_pkt_len = UCT_IB_LRH_LEN + UCT_IB_BTH_LEN + xport_hdr_len + UCT_IB_ICRC_LEN + UCT_IB_VCRC_LEN + UCT_IB_DELIM_LEN; iface_attr->bandwidth = (wire_speed * mtu) / (mtu + extra_pkt_len); return UCS_OK; }
/** * @param rx_headroom Headroom requested by the user. * @param rx_priv_len Length of transport private data to reserve (0 if unused) * @param rx_hdr_len Length of transport network header. * @param mss Maximal segment size (transport limit). */ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_iface_ops_t *ops, uct_pd_h pd, uct_worker_h worker, const char *dev_name, unsigned rx_headroom, unsigned rx_priv_len, unsigned rx_hdr_len, unsigned tx_cq_len, size_t mss, uct_ib_iface_config_t *config) { uct_ib_device_t *dev = &ucs_derived_of(pd, uct_ib_pd_t)->dev; ucs_status_t status; uint8_t port_num; UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, ops, pd, worker, &config->super UCS_STATS_ARG(dev->stats)); status = uct_ib_iface_find_port(dev, dev_name, &port_num); if (status != UCS_OK) { ucs_error("Failed to find port %s: %s", dev_name, ucs_status_string(status)); goto err; } self->port_num = port_num; self->sl = config->sl; self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) + ucs_max(sizeof(uct_am_recv_desc_t) + rx_headroom, rx_priv_len + rx_hdr_len); self->config.rx_hdr_offset = self->config.rx_payload_offset - rx_hdr_len; self->config.rx_headroom_offset= self->config.rx_payload_offset - rx_headroom; self->config.seg_size = config->super.max_bcopy; status = uct_ib_iface_init_pkey(self, config); if (status != UCS_OK) { goto err; } status = uct_ib_iface_init_gid(self, config); if (status != UCS_OK) { goto err; } status = uct_ib_iface_init_lmc(self, config); if (status != UCS_OK) { goto err; } /* TODO comp_channel */ /* TODO inline scatter for send SQ */ self->send_cq = ibv_create_cq(dev->ibv_context, tx_cq_len, NULL, NULL, 0); if (self->send_cq == NULL) { ucs_error("Failed to create send cq: %m"); status = UCS_ERR_IO_ERROR; goto err_free_path_bits; } if (config->rx.inl > 32 /*UCT_IB_MLX5_CQE64_MAX_INL*/) { ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "128", 1); } self->recv_cq = ibv_create_cq(dev->ibv_context, config->rx.queue_len, NULL, NULL, 0); ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "64", 1); if (self->recv_cq == NULL) { ucs_error("Failed to create recv cq: %m"); status = UCS_ERR_IO_ERROR; goto err_destroy_send_cq; } if (!uct_ib_device_is_port_ib(dev, self->port_num)) { ucs_error("Unsupported link layer"); status = UCS_ERR_UNSUPPORTED; goto err_destroy_recv_cq; } ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d", self->config.rx_headroom_offset, self->config.rx_payload_offset, self->config.rx_hdr_offset, self->config.seg_size); return UCS_OK; err_destroy_recv_cq: ibv_destroy_cq(self->recv_cq); err_destroy_send_cq: ibv_destroy_cq(self->send_cq); err_free_path_bits: ucs_free(self->path_bits); err: return status; }