Example #1
0
static ucs_status_t uct_ib_iface_init_pkey(uct_ib_iface_t *iface,
        uct_ib_iface_config_t *config)
{
    uct_ib_device_t *dev = uct_ib_iface_device(iface);
    uint16_t pkey_tbl_len = uct_ib_iface_port_attr(iface)->pkey_tbl_len;
    uint16_t pkey_index, port_pkey, pkey;

    if (config->pkey_value > UCT_IB_PKEY_PARTITION_MASK) {
        ucs_error("Requested pkey 0x%x is invalid, should be in the range 0..0x%x",
                  config->pkey_value, UCT_IB_PKEY_PARTITION_MASK);
        return UCS_ERR_INVALID_PARAM;
    }

    /* get the user's pkey value and find its index in the port's pkey table */
    for (pkey_index = 0; pkey_index < pkey_tbl_len; ++pkey_index) {
        /* get the pkey values from the port's pkeys table */
        if (ibv_query_pkey(dev->ibv_context, iface->port_num, pkey_index, &port_pkey)) {
            ucs_error("ibv_query_pkey(%s:%d, index=%d) failed: %m",
                      uct_ib_device_name(dev), iface->port_num, pkey_index);
        }

        pkey = ntohs(port_pkey);
        if (!(pkey & UCT_IB_PKEY_MEMBERSHIP_MASK)) {
            ucs_debug("skipping send-only pkey[%d]=0x%x", pkey_index, pkey);
            continue;
        }

        /* take only the lower 15 bits for the comparison */
        if ((pkey & UCT_IB_PKEY_PARTITION_MASK) == config->pkey_value) {
            iface->pkey_index = pkey_index;
            iface->pkey_value = pkey;
            ucs_debug("using pkey[%d] 0x%x on %s:%d", iface->pkey_index,
                      iface->pkey_value, uct_ib_device_name(dev), iface->port_num);
            return UCS_OK;
        }
    }

    ucs_error("The requested pkey: 0x%x, cannot be used. "
              "It wasn't found or the configured pkey doesn't have full membership.",
              config->pkey_value);
    return UCS_ERR_INVALID_PARAM;
}
Example #2
0
static ucs_status_t uct_ib_iface_arm_cq(uct_ib_iface_t *iface, struct ibv_cq *cq,
                                        int solicited)
{
    int ret;

    ret = ibv_req_notify_cq(cq, solicited);
    if (ret != 0) {
        uct_ib_device_t *dev = uct_ib_iface_device(iface);
        ucs_error("ibv_req_notify_cq(%s:%d, cq) failed: %m",
                  uct_ib_device_name(dev), iface->port_num);
        return UCS_ERR_IO_ERROR;
    }
    return UCS_OK;
}
Example #3
0
static ucs_status_t uct_ib_iface_init_gid(uct_ib_iface_t *iface,
        uct_ib_iface_config_t *config)
{
    uct_ib_device_t *dev = uct_ib_iface_device(iface);
    int ret;

    ret = ibv_query_gid(dev->ibv_context, iface->port_num, config->gid_index,
                        &iface->gid);
    if (ret != 0) {
        ucs_error("ibv_query_gid(index=%d) failed: %m", config->gid_index);
        return UCS_ERR_INVALID_PARAM;
    }

    if ((iface->gid.global.interface_id == 0) && (iface->gid.global.subnet_prefix == 0)) {
        ucs_error("Invalid gid[%d] on %s:%d", config->gid_index,
                  uct_ib_device_name(dev), iface->port_num);
        return UCS_ERR_INVALID_ADDR;
    }

    return UCS_OK;
}
Example #4
0
ucs_status_t uct_ib_iface_create_ah(uct_ib_iface_t *iface,
                                    const uct_ib_address_t *ib_addr,
                                    uint8_t src_path_bits,
                                    struct ibv_ah **ah_p)
{
    struct ibv_ah_attr ah_attr;
    struct ibv_ah *ah;
    char buf[128];
    char *p, *endp;

    uct_ib_iface_fill_ah_attr(iface, ib_addr, src_path_bits, &ah_attr);
    ah = ibv_create_ah(uct_ib_iface_md(iface)->pd, &ah_attr);

    if (ah == NULL) {
        p    = buf;
        endp = buf + sizeof(buf);
        snprintf(p, endp - p, "dlid=%d sl=%d port=%d path_bits=%d",
                 ah_attr.dlid, ah_attr.sl, ah_attr.port_num, ah_attr.src_path_bits);
        p += strlen(p);

        if (ah_attr.is_global) {
            snprintf(p, endp - p, "dgid=");
            p += strlen(p);
            inet_ntop(AF_INET6, &ah_attr.grh.dgid, p, endp - p);
            p += strlen(p);
            snprintf(p, endp - p, " sgid_index=%d", ah_attr.grh.sgid_index);
        }

        ucs_error("ibv_create_ah(%s) on %s:%d failed: %m", buf,
                  uct_ib_device_name(uct_ib_iface_device(iface)), iface->port_num);
        return UCS_ERR_INVALID_ADDR;
    }

    *ah_p = ah;
    return UCS_OK;
}
Example #5
0
static ucs_status_t uct_ib_iface_find_port(uct_ib_device_t *dev,
                                           const char *resource_dev_name,
                                           uint8_t *p_port_num)
{
    const char *ibdev_name;
    unsigned port_num;
    size_t devname_len;
    char *p;

    p = strrchr(resource_dev_name, ':');
    if (p == NULL) {
        goto err; /* Wrong device name format */
    }
    devname_len = p - resource_dev_name;

    ibdev_name = uct_ib_device_name(dev);
    if ((strlen(ibdev_name) != devname_len) ||
        strncmp(ibdev_name, resource_dev_name, devname_len))
    {
        goto err; /* Device name is wrong */
    }

    port_num = strtod(p + 1, &p);
    if (*p != '\0') {
        goto err; /* Failed to parse port number */
    }
    if ((port_num < dev->first_port) || (port_num >= dev->first_port + dev->num_ports)) {
        goto err; /* Port number out of range */
    }

    *p_port_num = port_num;
    return UCS_OK;

err:
    return UCS_ERR_NO_DEVICE;
}
Example #6
0
static ucs_status_t uct_ib_iface_create_cq(uct_ib_iface_t *iface, int cq_length,
                                           size_t inl, struct ibv_cq **cq_p)
{
    static const char *cqe_size_env_var = "MLX5_CQE_SIZE";
    uct_ib_device_t *dev = uct_ib_iface_device(iface);
    const char *cqe_size_env_value;
    size_t cqe_size_min, cqe_size;
    char cqe_size_buf[32];
    ucs_status_t status;
    struct ibv_cq *cq;
    int env_var_added = 0;
    int ret;

    cqe_size_min       = (inl > 32) ? 128 : 64;
    cqe_size_env_value = getenv(cqe_size_env_var);

    if (cqe_size_env_value != NULL) {
        cqe_size = atol(cqe_size_env_value);
        if (cqe_size < cqe_size_min) {
            ucs_error("%s is set to %zu, but at least %zu is required (inl: %zu)",
                      cqe_size_env_var, cqe_size, cqe_size_min, inl);
            status = UCS_ERR_INVALID_PARAM;
            goto out;
        }
    } else {
        /* CQE size is not defined by the environment, set it according to inline
         * size and cache line size.
         */
        cqe_size = ucs_max(cqe_size_min, UCS_SYS_CACHE_LINE_SIZE);
        cqe_size = ucs_max(cqe_size, 64);  /* at least 64 */
        cqe_size = ucs_min(cqe_size, 128); /* at most 128 */
        snprintf(cqe_size_buf, sizeof(cqe_size_buf),"%zu", cqe_size);
        ucs_debug("%s: setting %s=%s", uct_ib_device_name(dev), cqe_size_env_var,
                  cqe_size_buf);
        ret = ibv_exp_setenv(dev->ibv_context, cqe_size_env_var, cqe_size_buf, 1);
        if (ret) {
            ucs_error("ibv_exp_setenv(%s=%s) failed: %m", cqe_size_env_var,
                      cqe_size_buf);
            status = UCS_ERR_INVALID_PARAM;
            goto out;
        }

        env_var_added = 1;
    }

    cq = ibv_create_cq(dev->ibv_context, cq_length, NULL, iface->comp_channel, 0);
    if (cq == NULL) {
        ucs_error("ibv_create_cq(cqe=%d) failed: %m", cq_length);
        status = UCS_ERR_IO_ERROR;
        goto out_unsetenv;
    }

    *cq_p = cq;
    status = UCS_OK;

out_unsetenv:
    if (env_var_added) {
        /* if we created a new environment variable, remove it */
        ret = ibv_exp_unsetenv(dev->ibv_context, cqe_size_env_var);
        if (ret) {
            ucs_warn("unsetenv(%s) failed: %m", cqe_size_env_var);
        }
    }
out:
    return status;
}
Example #7
0
static ucs_status_t uct_ib_mlx5dv_create_ksm(uct_ib_md_t *ibmd,
                                             uct_ib_mem_t *ib_memh,
                                             off_t offset)
{
    uct_ib_mlx5_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_mlx5_mem_t);
    uct_ib_mlx5_md_t *md = ucs_derived_of(ibmd, uct_ib_mlx5_md_t);
    uint32_t out[UCT_IB_MLX5DV_ST_SZ_DW(create_mkey_out)] = {};
    struct ibv_mr *mr = memh->super.mr;
    ucs_status_t status = UCS_OK;
    struct mlx5dv_pd dvpd = {};
    struct mlx5dv_obj dv = {};
    size_t reg_length, length, inlen;
    int list_size, i;
    void *mkc, *klm;
    uint32_t *in;
    intptr_t addr;

    if (!(md->flags & UCT_IB_MLX5_MD_FLAG_KSM)) {
        return UCS_ERR_UNSUPPORTED;
    }

    reg_length = UCT_IB_MD_MAX_MR_SIZE;
    addr       = (intptr_t)mr->addr & ~(reg_length - 1);
    length     = mr->length + (intptr_t)mr->addr - addr;
    list_size  = ucs_div_round_up(length, reg_length);
    inlen      = UCT_IB_MLX5DV_ST_SZ_BYTES(create_mkey_in) +
                 UCT_IB_MLX5DV_ST_SZ_BYTES(klm) * list_size;

    in         = ucs_calloc(1, inlen, "mkey mailbox");
    if (in == NULL) {
        return UCS_ERR_NO_MEMORY;
    }

    dv.pd.in   = md->super.pd;
    dv.pd.out  = &dvpd;
    mlx5dv_init_obj(&dv, MLX5DV_OBJ_PD);

    UCT_IB_MLX5DV_SET(create_mkey_in, in, opcode, UCT_IB_MLX5_CMD_OP_CREATE_MKEY);
    mkc = UCT_IB_MLX5DV_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
    UCT_IB_MLX5DV_SET(mkc, mkc, access_mode_1_0, UCT_IB_MLX5_MKC_ACCESS_MODE_KSM);
    UCT_IB_MLX5DV_SET(mkc, mkc, a, 1);
    UCT_IB_MLX5DV_SET(mkc, mkc, rw, 1);
    UCT_IB_MLX5DV_SET(mkc, mkc, rr, 1);
    UCT_IB_MLX5DV_SET(mkc, mkc, lw, 1);
    UCT_IB_MLX5DV_SET(mkc, mkc, lr, 1);
    UCT_IB_MLX5DV_SET(mkc, mkc, pd, dvpd.pdn);
    UCT_IB_MLX5DV_SET(mkc, mkc, translations_octword_size, list_size);
    UCT_IB_MLX5DV_SET(mkc, mkc, log_entity_size, ucs_ilog2(reg_length));
    UCT_IB_MLX5DV_SET(mkc, mkc, qpn, 0xffffff);
    UCT_IB_MLX5DV_SET(mkc, mkc, mkey_7_0, offset & 0xff);
    UCT_IB_MLX5DV_SET64(mkc, mkc, start_addr, addr + offset);
    UCT_IB_MLX5DV_SET64(mkc, mkc, len, length);
    UCT_IB_MLX5DV_SET(create_mkey_in, in, translations_octword_actual_size, list_size);

    klm = UCT_IB_MLX5DV_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
    for (i = 0; i < list_size; i++) {
        if (i == list_size - 1) {
            UCT_IB_MLX5DV_SET(klm, klm, byte_count, length % reg_length);
        } else {
            UCT_IB_MLX5DV_SET(klm, klm, byte_count, reg_length);
        }
        UCT_IB_MLX5DV_SET(klm, klm, mkey, mr->lkey);
        UCT_IB_MLX5DV_SET64(klm, klm, address, addr + (i * reg_length));
        klm += UCT_IB_MLX5DV_ST_SZ_BYTES(klm);
    }

    memh->atomic_dvmr = mlx5dv_devx_obj_create(md->super.dev.ibv_context, in, inlen,
                                               out, sizeof(out));
    if (memh->atomic_dvmr == NULL) {
        ucs_debug("CREATE_MKEY KSM failed: %m");
        status = UCS_ERR_UNSUPPORTED;
        md->flags &= ~UCT_IB_MLX5_MD_FLAG_KSM;
        goto out;
    }

    memh->super.atomic_rkey =
        (UCT_IB_MLX5DV_GET(create_mkey_out, out, mkey_index) << 8) |
        (offset & 0xff);

    ucs_debug("KSM registered memory %p..%p offset 0x%lx on %s rkey 0x%x",
              mr->addr, mr->addr + mr->length, offset, uct_ib_device_name(&md->super.dev),
              memh->super.atomic_rkey);
out:
    ucs_free(in);
    return status;
}
Example #8
0
ucs_status_t uct_ib_iface_query(uct_ib_iface_t *iface, size_t xport_hdr_len,
                                uct_iface_attr_t *iface_attr)
{
    uct_ib_device_t *dev = uct_ib_iface_device(iface);
    static const unsigned ib_port_widths[] = {
        [0] = 1,
        [1] = 4,
        [2] = 8,
        [3] = 12
    };
    uint8_t active_width, active_speed, active_mtu;
    double encoding, signal_rate, wire_speed;
    size_t mtu, width, extra_pkt_len;

    if (!uct_ib_device_is_port_ib(dev, iface->port_num)) {
        return UCS_ERR_UNSUPPORTED;
    }

    active_width = uct_ib_iface_port_attr(iface)->active_width;
    active_speed = uct_ib_iface_port_attr(iface)->active_speed;
    active_mtu   = uct_ib_iface_port_attr(iface)->active_mtu;

    /* Get active width */
    if (!ucs_is_pow2(active_width) ||
            (active_width < 1) || (ucs_ilog2(active_width) > 3))
    {
        ucs_error("Invalid active_width on %s:%d: %d",
                  uct_ib_device_name(dev), iface->port_num, active_width);
        return UCS_ERR_IO_ERROR;
    }

    memset(iface_attr, 0, sizeof(*iface_attr));

    iface_attr->device_addr_len = iface->addr_size;

    switch (active_speed) {
    case 1: /* SDR */
        iface_attr->latency = 5000e-9;
        signal_rate         = 2.5e9;
        encoding            = 8.0/10.0;
        break;
    case 2: /* DDR */
        iface_attr->latency = 2500e-9;
        signal_rate         = 5.0e9;
        encoding            = 8.0/10.0;
        break;
    case 4: /* QDR */
        iface_attr->latency = 1300e-9;
        signal_rate         = 10.0e9;
        encoding            = 8.0/10.0;
        break;
    case 8: /* FDR10 */
        iface_attr->latency = 700e-9;
        signal_rate         = 10.3125e9;
        encoding            = 64.0/66.0;
        break;
    case 16: /* FDR */
        iface_attr->latency = 700e-9;
        signal_rate         = 14.0625e9;
        encoding            = 64.0/66.0;
        break;
    case 32: /* EDR */
        iface_attr->latency = 600e-9;
        signal_rate         = 25.78125e9;
        encoding            = 64.0/66.0;
        break;
    default:
        ucs_error("Invalid active_speed on %s:%d: %d",
                  uct_ib_device_name(dev), iface->port_num, active_speed);
        return UCS_ERR_IO_ERROR;
    }

    /* Wire speed calculation: Width * SignalRate * Encoding */
    width                 = ib_port_widths[ucs_ilog2(active_width)];
    wire_speed            = (width * signal_rate * encoding) / 8.0;

    /* Calculate packet overhead  */
    mtu                   = ucs_min(uct_ib_mtu_value(active_mtu),
                                    iface->config.seg_size);
    extra_pkt_len         = UCT_IB_LRH_LEN + UCT_IB_BTH_LEN + xport_hdr_len +
                            UCT_IB_ICRC_LEN + UCT_IB_VCRC_LEN + UCT_IB_DELIM_LEN;
    iface_attr->bandwidth = (wire_speed * mtu) / (mtu + extra_pkt_len);
    return UCS_OK;
}
Example #9
0
File: ib_md.c Project: alinask/ucx
static UCS_F_MAYBE_UNUSED
struct ibv_mr *uct_ib_md_create_umr(uct_ib_md_t *md, struct ibv_mr *mr)
{
#if HAVE_EXP_UMR
    struct ibv_exp_mem_region mem_reg;
    struct ibv_exp_send_wr wr, *bad_wr;
    struct ibv_exp_create_mr_in mrin;
    struct ibv_mr *umr;
    struct ibv_wc wc;
    int ret;
    size_t offset;

    if ((md->umr_qp == NULL) || (md->umr_cq == NULL)) {
        return NULL;
    }

    offset = uct_ib_md_umr_offset(uct_ib_md_umr_id(md));
    /* Create memory key */
    memset(&mrin, 0, sizeof(mrin));
    mrin.pd                       = md->pd;

#ifdef HAVE_EXP_UMR_NEW_API
    mrin.attr.create_flags        = IBV_EXP_MR_INDIRECT_KLMS;
    mrin.attr.exp_access_flags    = UCT_IB_MEM_ACCESS_FLAGS;
    mrin.attr.max_klm_list_size   = 1;
#else
    mrin.attr.create_flags        = IBV_MR_NONCONTIG_MEM;
    mrin.attr.access_flags        = UCT_IB_MEM_ACCESS_FLAGS;
    mrin.attr.max_reg_descriptors = 1;
#endif

    umr = ibv_exp_create_mr(&mrin);
    if (!umr) {
        ucs_error("Failed to create modified_mr: %m");
        goto err;
    }

    /* Fill memory list and UMR */
    memset(&wr, 0, sizeof(wr));
    memset(&mem_reg, 0, sizeof(mem_reg));

    mem_reg.base_addr                              = (uintptr_t) mr->addr;
    mem_reg.length                                 = mr->length;

#ifdef HAVE_EXP_UMR_NEW_API
    mem_reg.mr                                     = mr;

    wr.ext_op.umr.umr_type                         = IBV_EXP_UMR_MR_LIST;
    wr.ext_op.umr.mem_list.mem_reg_list            = &mem_reg;
    wr.ext_op.umr.exp_access                       = UCT_IB_MEM_ACCESS_FLAGS;
    wr.ext_op.umr.modified_mr                      = umr;
    wr.ext_op.umr.base_addr                        = (uint64_t) (uintptr_t) mr->addr + offset;

    wr.ext_op.umr.num_mrs                          = 1;
#else
    mem_reg.m_key                                  = mr;

    wr.ext_op.umr.memory_key.mkey_type             = IBV_EXP_UMR_MEM_LAYOUT_NONCONTIG;
    wr.ext_op.umr.memory_key.mem_list.mem_reg_list = &mem_reg;
    wr.ext_op.umr.memory_key.access                = UCT_IB_MEM_ACCESS_FLAGS;
    wr.ext_op.umr.memory_key.modified_mr           = umr;
    wr.ext_op.umr.memory_key.region_base_addr      = mr->addr + offset;

    wr.num_sge                                     = 1;
#endif

    wr.exp_opcode                                  = IBV_EXP_WR_UMR_FILL;
    wr.exp_send_flags                              = IBV_EXP_SEND_INLINE | IBV_EXP_SEND_SIGNALED;

    /* Post UMR */
    ret = ibv_exp_post_send(md->umr_qp, &wr, &bad_wr);
    if (ret) {
        ucs_error("ibv_exp_post_send(UMR_FILL) failed: %m");
        goto err_free_umr;
    }

    /* Wait for send UMR completion */
    for (;;) {
        ret = ibv_poll_cq(md->umr_cq, 1, &wc);
        if (ret < 0) {
            ucs_error("ibv_exp_poll_cq(umr_cq) failed: %m");
            goto err_free_umr;
        }
        if (ret == 1) {
            if (wc.status != IBV_WC_SUCCESS) {
                ucs_error("UMR_FILL completed with error: %s vendor_err %d",
                          ibv_wc_status_str(wc.status), wc.vendor_err);
                goto err_free_umr;
            }
            break;
        }
    }

    ucs_trace("UMR registered memory %p..%p offset 0x%x on %s lkey 0x%x rkey 0x%x",
              mr->addr, mr->addr + mr->length, (unsigned)offset, uct_ib_device_name(&md->dev), umr->lkey,
              umr->rkey);
    return umr;

err_free_umr:
    ibv_dereg_mr(umr);
err:
#endif
    return NULL;
}