ucs_status_t uct_ud_ep_connect_to_ep(uct_ud_ep_t *ep, const struct sockaddr *addr) { uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ud_iface_t); uct_ib_device_t *dev = uct_ib_iface_device(&iface->super); const uct_sockaddr_ib_t *ib_addr = (uct_sockaddr_ib_t *)addr; ucs_assert_always(ep->dest_ep_id == UCT_UD_EP_NULL_ID); ucs_trace_func(""); ep->dest_ep_id = ib_addr->id; ep->dest_qpn = ib_addr->qp_num; uct_ud_ep_reset(ep); ucs_debug("%s:%d slid=%d qpn=%d ep=%u connected to dlid=%d qpn=%d ep=%u", ibv_get_device_name(dev->ibv_context->device), iface->super.port_num, dev->port_attr[iface->super.port_num-dev->first_port].lid, iface->qp->qp_num, ep->ep_id, ib_addr->lid, ep->dest_qpn, ep->dest_ep_id); return UCS_OK; }
ucs_status_ptr_t uct_rc_verbs_ep_tag_rndv_zcopy(uct_ep_h tl_ep, uct_tag_t tag, const void *header, unsigned header_length, const uct_iov_t *iov, size_t iovcnt, uct_completion_t *comp) { uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); void *hdr = ucs_alloca(iface->tm.rndv_hdr_size); uct_ib_device_t *dev = uct_ib_iface_device(&iface->super.super); uint32_t op_index; UCT_CHECK_PARAM_PTR(iovcnt <= 1ul, "Wrong iovcnt in uct_rc_verbs_ep_tag_rndv_zcopy %lu", iovcnt); UCT_CHECK_PARAM_PTR(header_length <= IBV_DEVICE_TM_CAPS(dev, max_rndv_priv_size), "Invalid hdr len in uct_rc_verbs_ep_tag_rndv_zcopy %u", header_length); UCT_CHECK_PARAM_PTR((header_length + iface->tm.rndv_hdr_size) <= iface->verbs_common.config.max_inline, "Invalid RTS len in uct_rc_verbs_ep_tag_rndv_zcopy %u", header_length + iface->tm.rndv_hdr_size); op_index = uct_rc_verbs_iface_tag_get_op_id(iface, comp); uct_rc_verbs_iface_fill_inl_rndv_sge(iface, hdr, tag, op_index, ((uct_ib_mem_t*)iov->memh)->mr->rkey, iov->buffer, uct_iov_get_length(iov), header, header_length); uct_rc_verbs_ep_post_send(iface, ep, &iface->inl_am_wr, IBV_SEND_INLINE); return (ucs_status_ptr_t)((uint64_t)op_index); }
ucs_status_t uct_rc_verbs_ep_flush(uct_ep_h tl_ep, unsigned flags, uct_completion_t *comp) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); ucs_status_t status; if (!uct_rc_iface_has_tx_resources(&iface->super)) { return UCS_ERR_NO_RESOURCE; } if (uct_rc_txqp_available(&ep->super.txqp) == iface->config.tx_max_wr) { UCT_TL_EP_STAT_FLUSH(&ep->super.super); return UCS_OK; } if (uct_rc_txqp_unsignaled(&ep->super.txqp) != 0) { if (IBV_DEVICE_HAS_NOP(&uct_ib_iface_device(&iface->super.super)->dev_attr)) { status = uct_rc_verbs_ep_nop(ep); } else { status = uct_rc_verbs_ep_put_short(tl_ep, NULL, 0, 0, 0); } if (status != UCS_OK) { return status; } } else if (!uct_rc_ep_has_tx_resources(&ep->super)) { return UCS_ERR_NO_RESOURCE; } uct_rc_txqp_add_send_comp(&iface->super, &ep->super.txqp, comp, ep->txcnt.pi); UCT_TL_EP_STAT_FLUSH_WAIT(&ep->super.super); return UCS_INPROGRESS; }
ucs_status_t uct_ib_iface_get_device_address(uct_iface_h tl_iface, uct_device_addr_t *dev_addr) { uct_ib_iface_t *iface = ucs_derived_of(tl_iface, uct_ib_iface_t); uct_ib_address_pack(uct_ib_iface_device(iface), iface->addr_type, &iface->gid, uct_ib_iface_port_attr(iface)->lid, (void*)dev_addr); return UCS_OK; }
static ucs_status_t uct_rc_verbs_iface_query(uct_iface_h tl_iface, uct_iface_attr_t *iface_attr) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_iface, uct_rc_verbs_iface_t); struct ibv_exp_device_attr *dev_attr = &uct_ib_iface_device(&iface->super.super)->dev_attr; uct_rc_iface_query(&iface->super, iface_attr); /* PUT */ iface_attr->cap.put.max_short = iface->config.max_inline; iface_attr->cap.put.max_bcopy = iface->super.super.config.seg_size; iface_attr->cap.put.max_zcopy = uct_ib_iface_port_attr(&iface->super.super)->max_msg_sz; /* GET */ iface_attr->cap.get.max_bcopy = iface->super.super.config.seg_size; iface_attr->cap.get.max_zcopy = uct_ib_iface_port_attr(&iface->super.super)->max_msg_sz; /* AM */ iface_attr->cap.am.max_short = iface->config.max_inline - sizeof(uct_rc_hdr_t); iface_attr->cap.am.max_bcopy = iface->super.super.config.seg_size - sizeof(uct_rc_hdr_t); iface_attr->cap.am.max_zcopy = iface->super.super.config.seg_size - sizeof(uct_rc_hdr_t); iface_attr->cap.am.max_hdr = iface->config.short_desc_size - sizeof(uct_rc_hdr_t); /* * Atomics. * Need to make sure device support at least one kind of atomics. */ if (IBV_EXP_HAVE_ATOMIC_HCA(dev_attr) || IBV_EXP_HAVE_ATOMIC_GLOB(dev_attr) || IBV_EXP_HAVE_ATOMIC_HCA_REPLY_BE(dev_attr)) { iface_attr->cap.flags |= UCT_IFACE_FLAG_ATOMIC_ADD64 | UCT_IFACE_FLAG_ATOMIC_FADD64 | UCT_IFACE_FLAG_ATOMIC_CSWAP64; if (uct_rc_verbs_is_ext_atomic_supported(dev_attr, sizeof(uint32_t))) { iface_attr->cap.flags |= UCT_IFACE_FLAG_ATOMIC_ADD32 | UCT_IFACE_FLAG_ATOMIC_FADD32 | UCT_IFACE_FLAG_ATOMIC_SWAP32 | UCT_IFACE_FLAG_ATOMIC_CSWAP32; } if (uct_rc_verbs_is_ext_atomic_supported(dev_attr, sizeof(uint64_t))) { iface_attr->cap.flags |= UCT_IFACE_FLAG_ATOMIC_SWAP64; } } return UCS_OK; }
struct ibv_ah *uct_ib_create_ah(uct_ib_iface_t *iface, uint16_t dlid) { struct ibv_ah_attr ah_attr; uct_ib_device_t *dev = uct_ib_iface_device(iface); memset(&ah_attr, 0, sizeof(ah_attr)); ah_attr.port_num = iface->port_num; ah_attr.sl = iface->sl; ah_attr.is_global = 0; ah_attr.dlid = dlid; return ibv_create_ah(dev->pd, &ah_attr); }
static ucs_status_t uct_ib_iface_arm_cq(uct_ib_iface_t *iface, struct ibv_cq *cq, int solicited) { int ret; ret = ibv_req_notify_cq(cq, solicited); if (ret != 0) { uct_ib_device_t *dev = uct_ib_iface_device(iface); ucs_error("ibv_req_notify_cq(%s:%d, cq) failed: %m", uct_ib_device_name(dev), iface->port_num); return UCS_ERR_IO_ERROR; } return UCS_OK; }
static ucs_status_t uct_ib_iface_init_pkey(uct_ib_iface_t *iface, const uct_ib_iface_config_t *config) { uct_ib_device_t *dev = uct_ib_iface_device(iface); uint16_t pkey_tbl_len = uct_ib_iface_port_attr(iface)->pkey_tbl_len; uint16_t pkey_index, port_pkey, pkey; if (config->pkey_value > UCT_IB_PKEY_PARTITION_MASK) { ucs_error("Requested pkey 0x%x is invalid, should be in the range 0..0x%x", config->pkey_value, UCT_IB_PKEY_PARTITION_MASK); return UCS_ERR_INVALID_PARAM; } /* get the user's pkey value and find its index in the port's pkey table */ for (pkey_index = 0; pkey_index < pkey_tbl_len; ++pkey_index) { /* get the pkey values from the port's pkeys table */ if (ibv_query_pkey(dev->ibv_context, iface->config.port_num, pkey_index, &port_pkey)) { ucs_error("ibv_query_pkey("UCT_IB_IFACE_FMT", index=%d) failed: %m", UCT_IB_IFACE_ARG(iface), pkey_index); } pkey = ntohs(port_pkey); if (!(pkey & UCT_IB_PKEY_MEMBERSHIP_MASK)) { ucs_debug("skipping send-only pkey[%d]=0x%x", pkey_index, pkey); continue; } /* take only the lower 15 bits for the comparison */ if ((pkey & UCT_IB_PKEY_PARTITION_MASK) == config->pkey_value) { iface->pkey_index = pkey_index; iface->pkey_value = pkey; ucs_debug("using pkey[%d] 0x%x on "UCT_IB_IFACE_FMT, iface->pkey_index, iface->pkey_value, UCT_IB_IFACE_ARG(iface)); return UCS_OK; } } ucs_error("The requested pkey: 0x%x, cannot be used. " "It wasn't found or the configured pkey doesn't have full membership.", config->pkey_value); return UCS_ERR_INVALID_PARAM; }
ucs_status_t uct_ud_ep_connect_to_iface(uct_ud_ep_t *ep, const uct_sockaddr_ib_t *if_addr) { uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ud_iface_t); uct_ib_device_t *dev = uct_ib_iface_device(&iface->super); ep->dest_qpn = if_addr->qp_num; uct_ud_ep_reset(ep); ucs_debug("%s:%d slid=%d qpn=%d ep_id=%u ep=%p connected to IFACE dlid=%d qpn=%d", ibv_get_device_name(dev->ibv_context->device), iface->super.port_num, dev->port_attr[iface->super.port_num-dev->first_port].lid, iface->qp->qp_num, ep->ep_id, ep, if_addr->lid, if_addr->qp_num); return UCS_OK; }
static ucs_status_t uct_ib_iface_init_gid(uct_ib_iface_t *iface, uct_ib_iface_config_t *config) { uct_ib_device_t *dev = uct_ib_iface_device(iface); int ret; ret = ibv_query_gid(dev->ibv_context, iface->port_num, config->gid_index, &iface->gid); if (ret != 0) { ucs_error("ibv_query_gid(index=%d) failed: %m", config->gid_index); return UCS_ERR_INVALID_PARAM; } if ((iface->gid.global.interface_id == 0) && (iface->gid.global.subnet_prefix == 0)) { ucs_error("Invalid gid[%d] on %s:%d", config->gid_index, uct_ib_device_name(dev), iface->port_num); return UCS_ERR_INVALID_ADDR; } return UCS_OK; }
ucs_status_t uct_ib_iface_create_ah(uct_ib_iface_t *iface, const uct_ib_address_t *ib_addr, uint8_t src_path_bits, struct ibv_ah **ah_p) { struct ibv_ah_attr ah_attr; struct ibv_ah *ah; char buf[128]; char *p, *endp; uct_ib_iface_fill_ah_attr(iface, ib_addr, src_path_bits, &ah_attr); ah = ibv_create_ah(uct_ib_iface_md(iface)->pd, &ah_attr); if (ah == NULL) { p = buf; endp = buf + sizeof(buf); snprintf(p, endp - p, "dlid=%d sl=%d port=%d path_bits=%d", ah_attr.dlid, ah_attr.sl, ah_attr.port_num, ah_attr.src_path_bits); p += strlen(p); if (ah_attr.is_global) { snprintf(p, endp - p, "dgid="); p += strlen(p); inet_ntop(AF_INET6, &ah_attr.grh.dgid, p, endp - p); p += strlen(p); snprintf(p, endp - p, " sgid_index=%d", ah_attr.grh.sgid_index); } ucs_error("ibv_create_ah(%s) on %s:%d failed: %m", buf, uct_ib_device_name(uct_ib_iface_device(iface)), iface->port_num); return UCS_ERR_INVALID_ADDR; } *ah_p = ah; return UCS_OK; }
ucs_status_t uct_rc_verbs_ep_flush(uct_ep_h tl_ep) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); ucs_status_t status; if (ep->tx.available == iface->super.config.tx_qp_len) { UCT_TL_EP_STAT_FLUSH(&ep->super); return UCS_OK; } if (ep->super.unsignaled != 0) { if (IBV_DEVICE_HAS_NOP(&uct_ib_iface_device(&iface->super.super)->dev_attr)) { status = uct_rc_verbs_ep_nop(ep); } else { status = uct_rc_verbs_ep_put_short(tl_ep, NULL, 0, 0, 0); } if (status != UCS_OK) { return status; } } return UCS_INPROGRESS; }
ucs_status_t uct_ib_mlx5_get_compact_av(uct_ib_iface_t *iface, int *compact_av) { *compact_av = !!(uct_ib_iface_device(iface)->flags & UCT_IB_DEVICE_FLAG_AV); return UCS_OK; }
ucs_status_t uct_ib_iface_query(uct_ib_iface_t *iface, size_t xport_hdr_len, uct_iface_attr_t *iface_attr) { uct_ib_device_t *dev = uct_ib_iface_device(iface); static const unsigned ib_port_widths[] = { [0] = 1, [1] = 4, [2] = 8, [3] = 12 }; uint8_t active_width, active_speed, active_mtu; double encoding, signal_rate, wire_speed; size_t mtu, width, extra_pkt_len; int i = 0; active_width = uct_ib_iface_port_attr(iface)->active_width; active_speed = uct_ib_iface_port_attr(iface)->active_speed; active_mtu = uct_ib_iface_port_attr(iface)->active_mtu; /* Get active width */ if (!ucs_is_pow2(active_width) || (active_width < 1) || (ucs_ilog2(active_width) > 3)) { ucs_error("Invalid active_width on %s:%d: %d", UCT_IB_IFACE_ARG(iface), active_width); return UCS_ERR_IO_ERROR; } memset(iface_attr, 0, sizeof(*iface_attr)); iface_attr->device_addr_len = iface->addr_size; switch (active_speed) { case 1: /* SDR */ iface_attr->latency = 5000e-9; signal_rate = 2.5e9; encoding = 8.0/10.0; break; case 2: /* DDR */ iface_attr->latency = 2500e-9; signal_rate = 5.0e9; encoding = 8.0/10.0; break; case 4: /* QDR */ iface_attr->latency = 1300e-9; signal_rate = 10.0e9; encoding = 8.0/10.0; break; case 8: /* FDR10 */ iface_attr->latency = 700e-9; signal_rate = 10.3125e9; encoding = 64.0/66.0; break; case 16: /* FDR */ iface_attr->latency = 700e-9; signal_rate = 14.0625e9; encoding = 64.0/66.0; break; case 32: /* EDR */ iface_attr->latency = 600e-9; signal_rate = 25.78125e9; encoding = 64.0/66.0; break; default: ucs_error("Invalid active_speed on %s:%d: %d", UCT_IB_IFACE_ARG(iface), active_speed); return UCS_ERR_IO_ERROR; } /* Wire speed calculation: Width * SignalRate * Encoding */ width = ib_port_widths[ucs_ilog2(active_width)]; wire_speed = (width * signal_rate * encoding) / 8.0; /* Calculate packet overhead */ mtu = ucs_min(uct_ib_mtu_value(active_mtu), iface->config.seg_size); extra_pkt_len = UCT_IB_BTH_LEN + xport_hdr_len + UCT_IB_ICRC_LEN + UCT_IB_VCRC_LEN + UCT_IB_DELIM_LEN; if (IBV_PORT_IS_LINK_LAYER_ETHERNET(uct_ib_iface_port_attr(iface))) { extra_pkt_len += UCT_IB_GRH_LEN + UCT_IB_ROCE_LEN; } else { /* TODO check if UCT_IB_DELIM_LEN is present in RoCE as well */ extra_pkt_len += UCT_IB_LRH_LEN; } iface_attr->bandwidth = (wire_speed * mtu) / (mtu + extra_pkt_len); /* Set priority of current device */ iface_attr->priority = 0; while (uct_ib_device_info_table[i].vendor_part_id != 0) { if (uct_ib_device_info_table[i].vendor_part_id == dev->dev_attr.vendor_part_id) { iface_attr->priority = uct_ib_device_info_table[i].priority; break; } i++; } return UCS_OK; }
static ucs_status_t uct_ib_iface_create_cq(uct_ib_iface_t *iface, int cq_length, size_t inl, struct ibv_cq **cq_p) { static const char *cqe_size_env_var = "MLX5_CQE_SIZE"; uct_ib_device_t *dev = uct_ib_iface_device(iface); const char *cqe_size_env_value; size_t cqe_size_min, cqe_size; char cqe_size_buf[32]; ucs_status_t status; struct ibv_cq *cq; int env_var_added = 0; int ret; cqe_size_min = (inl > 32) ? 128 : 64; cqe_size_env_value = getenv(cqe_size_env_var); if (cqe_size_env_value != NULL) { cqe_size = atol(cqe_size_env_value); if (cqe_size < cqe_size_min) { ucs_error("%s is set to %zu, but at least %zu is required (inl: %zu)", cqe_size_env_var, cqe_size, cqe_size_min, inl); status = UCS_ERR_INVALID_PARAM; goto out; } } else { /* CQE size is not defined by the environment, set it according to inline * size and cache line size. */ cqe_size = ucs_max(cqe_size_min, UCS_SYS_CACHE_LINE_SIZE); cqe_size = ucs_max(cqe_size, 64); /* at least 64 */ cqe_size = ucs_min(cqe_size, 128); /* at most 128 */ snprintf(cqe_size_buf, sizeof(cqe_size_buf),"%zu", cqe_size); ucs_debug("%s: setting %s=%s", uct_ib_device_name(dev), cqe_size_env_var, cqe_size_buf); ret = ibv_exp_setenv(dev->ibv_context, cqe_size_env_var, cqe_size_buf, 1); if (ret) { ucs_error("ibv_exp_setenv(%s=%s) failed: %m", cqe_size_env_var, cqe_size_buf); status = UCS_ERR_INVALID_PARAM; goto out; } env_var_added = 1; } cq = ibv_create_cq(dev->ibv_context, cq_length, NULL, iface->comp_channel, 0); if (cq == NULL) { ucs_error("ibv_create_cq(cqe=%d) failed: %m", cq_length); status = UCS_ERR_IO_ERROR; goto out_unsetenv; } *cq_p = cq; status = UCS_OK; out_unsetenv: if (env_var_added) { /* if we created a new environment variable, remove it */ ret = ibv_exp_unsetenv(dev->ibv_context, cqe_size_env_var); if (ret) { ucs_warn("unsetenv(%s) failed: %m", cqe_size_env_var); } } out: return status; }
static UCS_CLASS_INIT_FUNC(uct_rc_verbs_iface_t, uct_pd_h pd, uct_worker_h worker, const char *dev_name, size_t rx_headroom, const uct_iface_config_t *tl_config) { uct_rc_verbs_iface_config_t *config = ucs_derived_of(tl_config, uct_rc_verbs_iface_config_t); struct ibv_exp_device_attr *dev_attr; size_t am_hdr_size; ucs_status_t status; struct ibv_qp_cap cap; struct ibv_qp *qp; extern uct_iface_ops_t uct_rc_verbs_iface_ops; UCS_CLASS_CALL_SUPER_INIT(uct_rc_iface_t, &uct_rc_verbs_iface_ops, pd, worker, dev_name, rx_headroom, 0, &config->super); /* Initialize inline work request */ memset(&self->inl_am_wr, 0, sizeof(self->inl_am_wr)); self->inl_am_wr.sg_list = self->inl_sge; self->inl_am_wr.num_sge = 2; self->inl_am_wr.opcode = IBV_WR_SEND; self->inl_am_wr.send_flags = IBV_SEND_INLINE; memset(&self->inl_rwrite_wr, 0, sizeof(self->inl_rwrite_wr)); self->inl_rwrite_wr.sg_list = self->inl_sge; self->inl_rwrite_wr.num_sge = 1; self->inl_rwrite_wr.opcode = IBV_WR_RDMA_WRITE; self->inl_rwrite_wr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE; memset(self->inl_sge, 0, sizeof(self->inl_sge)); /* Configuration */ am_hdr_size = ucs_max(config->max_am_hdr, sizeof(uct_rc_hdr_t)); self->config.short_desc_size = ucs_max(UCT_RC_MAX_ATOMIC_SIZE, am_hdr_size); dev_attr = &uct_ib_iface_device(&self->super.super)->dev_attr; if (IBV_EXP_HAVE_ATOMIC_HCA(dev_attr) || IBV_EXP_HAVE_ATOMIC_GLOB(dev_attr)) { self->config.atomic32_handler = uct_rc_ep_atomic_handler_32_be0; self->config.atomic64_handler = uct_rc_ep_atomic_handler_64_be0; } else if (IBV_EXP_HAVE_ATOMIC_HCA_REPLY_BE(dev_attr)) { self->config.atomic32_handler = uct_rc_ep_atomic_handler_32_be1; self->config.atomic64_handler = uct_rc_ep_atomic_handler_64_be1; } /* Create a dummy QP in order to find out max_inline */ status = uct_rc_iface_qp_create(&self->super, &qp, &cap); if (status != UCS_OK) { goto err; } ibv_destroy_qp(qp); self->config.max_inline = cap.max_inline_data; /* Create AH headers and Atomic mempool */ status = uct_iface_mpool_create(&self->super.super.super.super, sizeof(uct_rc_iface_send_desc_t) + self->config.short_desc_size, sizeof(uct_rc_iface_send_desc_t), UCS_SYS_CACHE_LINE_SIZE, &config->super.super.tx.mp, self->super.config.tx_qp_len, uct_rc_iface_send_desc_init, "rc_verbs_short_desc", &self->short_desc_mp); if (status != UCS_OK) { goto err; } while (self->super.rx.available > 0) { if (uct_rc_verbs_iface_post_recv(self, 1) == 0) { ucs_error("failed to post receives"); status = UCS_ERR_NO_MEMORY; goto err_destroy_short_desc_mp; } } ucs_notifier_chain_add(&worker->progress_chain, uct_rc_verbs_iface_progress, self); return UCS_OK; err_destroy_short_desc_mp: ucs_mpool_destroy(self->short_desc_mp); err: return status; }
static UCS_CLASS_INIT_FUNC(uct_cm_iface_t, uct_pd_h pd, uct_worker_h worker, const char *dev_name, size_t rx_headroom, const uct_iface_config_t *tl_config) { uct_cm_iface_config_t *config = ucs_derived_of(tl_config, uct_cm_iface_config_t); ucs_status_t status; int ret; ucs_trace_func(""); UCS_CLASS_CALL_SUPER_INIT(uct_ib_iface_t, &uct_cm_iface_ops, pd, worker, dev_name, rx_headroom, 0 /* rx_priv_len */, 0 /* rx_hdr_len */, 1 /* tx_cq_len */, IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE, /* mss */ &config->super); if (worker->async == NULL) { ucs_error("cm must have async!=NULL"); return UCS_ERR_INVALID_PARAM; } self->service_id = (uint32_t)(ucs_generate_uuid((uintptr_t)self) & (~IB_CM_ASSIGN_SERVICE_ID_MASK)); self->num_outstanding = 0; self->config.timeout_ms = (int)(config->timeout * 1e3 + 0.5); self->config.max_outstanding = config->max_outstanding; self->config.retry_count = ucs_min(config->retry_count, UINT8_MAX); self->notify_q.head = NULL; ucs_queue_head_init(&self->notify_q); self->outstanding = ucs_calloc(self->config.max_outstanding, sizeof(*self->outstanding), "cm_outstanding"); if (self->outstanding == NULL) { status = UCS_ERR_NO_MEMORY; goto err; } self->cmdev = ib_cm_open_device(uct_ib_iface_device(&self->super)->ibv_context); if (self->cmdev == NULL) { ucs_error("ib_cm_open_device() failed: %m. Check if ib_ucm.ko module is loaded."); status = UCS_ERR_NO_DEVICE; goto err_free_outstanding; } status = ucs_sys_fcntl_modfl(self->cmdev->fd, O_NONBLOCK, 0); if (status != UCS_OK) { goto err_close_device; } ret = ib_cm_create_id(self->cmdev, &self->listen_id, self); if (ret) { ucs_error("ib_cm_create_id() failed: %m"); status = UCS_ERR_NO_DEVICE; goto err_close_device; } ret = ib_cm_listen(self->listen_id, self->service_id, 0); if (ret) { ucs_error("ib_cm_listen() failed: %m"); status = UCS_ERR_INVALID_ADDR; goto err_destroy_id; } if (config->async_mode == UCS_ASYNC_MODE_SIGNAL) { ucs_warn("ib_cm fd does not support SIGIO"); } status = ucs_async_set_event_handler(config->async_mode, self->cmdev->fd, POLLIN, uct_cm_iface_event_handler, self, worker->async); if (status != UCS_OK) { ucs_error("failed to set event handler"); goto err_destroy_id; } ucs_debug("listening for SIDR service_id 0x%x on fd %d", self->service_id, self->cmdev->fd); return UCS_OK; err_destroy_id: ib_cm_destroy_id(self->listen_id); err_close_device: ib_cm_close_device(self->cmdev); err_free_outstanding: ucs_free(self->outstanding); err: return status; }
ucs_status_t uct_ib_iface_query(uct_ib_iface_t *iface, size_t xport_hdr_len, uct_iface_attr_t *iface_attr) { uct_ib_device_t *dev = uct_ib_iface_device(iface); static const unsigned ib_port_widths[] = { [0] = 1, [1] = 4, [2] = 8, [3] = 12 }; uint8_t active_width, active_speed, active_mtu; double encoding, signal_rate, wire_speed; size_t mtu, width, extra_pkt_len; if (!uct_ib_device_is_port_ib(dev, iface->port_num)) { return UCS_ERR_UNSUPPORTED; } active_width = uct_ib_iface_port_attr(iface)->active_width; active_speed = uct_ib_iface_port_attr(iface)->active_speed; active_mtu = uct_ib_iface_port_attr(iface)->active_mtu; /* Get active width */ if (!ucs_is_pow2(active_width) || (active_width < 1) || (ucs_ilog2(active_width) > 3)) { ucs_error("Invalid active_width on %s:%d: %d", uct_ib_device_name(dev), iface->port_num, active_width); return UCS_ERR_IO_ERROR; } memset(iface_attr, 0, sizeof(*iface_attr)); iface_attr->device_addr_len = iface->addr_size; switch (active_speed) { case 1: /* SDR */ iface_attr->latency = 5000e-9; signal_rate = 2.5e9; encoding = 8.0/10.0; break; case 2: /* DDR */ iface_attr->latency = 2500e-9; signal_rate = 5.0e9; encoding = 8.0/10.0; break; case 4: /* QDR */ iface_attr->latency = 1300e-9; signal_rate = 10.0e9; encoding = 8.0/10.0; break; case 8: /* FDR10 */ iface_attr->latency = 700e-9; signal_rate = 10.3125e9; encoding = 64.0/66.0; break; case 16: /* FDR */ iface_attr->latency = 700e-9; signal_rate = 14.0625e9; encoding = 64.0/66.0; break; case 32: /* EDR */ iface_attr->latency = 600e-9; signal_rate = 25.78125e9; encoding = 64.0/66.0; break; default: ucs_error("Invalid active_speed on %s:%d: %d", uct_ib_device_name(dev), iface->port_num, active_speed); return UCS_ERR_IO_ERROR; } /* Wire speed calculation: Width * SignalRate * Encoding */ width = ib_port_widths[ucs_ilog2(active_width)]; wire_speed = (width * signal_rate * encoding) / 8.0; /* Calculate packet overhead */ mtu = ucs_min(uct_ib_mtu_value(active_mtu), iface->config.seg_size); extra_pkt_len = UCT_IB_LRH_LEN + UCT_IB_BTH_LEN + xport_hdr_len + UCT_IB_ICRC_LEN + UCT_IB_VCRC_LEN + UCT_IB_DELIM_LEN; iface_attr->bandwidth = (wire_speed * mtu) / (mtu + extra_pkt_len); return UCS_OK; }