Example #1
0
File: cm_ep.c Project: LenaO/ucx
static ucs_status_t uct_cm_ep_fill_path_rec(uct_cm_ep_t *ep,
                                            struct ibv_sa_path_rec *path)
{
    uct_cm_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_cm_iface_t);

    path->dgid.global.subnet_prefix = ep->dest_addr.subnet_prefix;
    path->dgid.global.interface_id  = ep->dest_addr.guid;
    path->sgid                      = iface->super.gid;
    path->dlid                      = htons(ep->dest_addr.lid);
    path->slid                      = htons(uct_ib_iface_port_attr(&iface->super)->lid);
    path->raw_traffic               = 0; /* IB traffic */
    path->flow_label                = 0;
    path->hop_limit                 = 0;
    path->traffic_class             = 0;
    path->reversible                = htonl(1); /* IBCM currently only supports reversible paths */
    path->numb_path                 = 0;
    path->pkey                      = ntohs(iface->super.pkey_value);
    path->sl                        = iface->super.sl;
    path->mtu_selector              = 2; /* EQ */
    path->mtu                       = uct_ib_iface_port_attr(&iface->super)->active_mtu;
    path->rate_selector             = 2; /* EQ */
    path->rate                      = IBV_RATE_MAX;
    path->packet_life_time_selector = 2; /* EQ */
    path->packet_life_time          = 0;
    path->preference                = 0; /* Use first path */
    return UCS_OK;
}
Example #2
0
void uct_rc_verbs_iface_common_query(uct_rc_verbs_iface_common_t *verbs_iface,
                                     uct_rc_iface_t *iface, uct_iface_attr_t *iface_attr)
{
    /* PUT */
    iface_attr->cap.put.max_short = verbs_iface->config.max_inline;
    iface_attr->cap.put.max_bcopy = iface->super.config.seg_size;
    iface_attr->cap.put.min_zcopy = 0;
    iface_attr->cap.put.max_zcopy = uct_ib_iface_port_attr(&iface->super)->max_msg_sz;
    iface_attr->cap.put.max_iov   = uct_ib_iface_get_max_iov(&iface->super);

    /* GET */
    iface_attr->cap.get.max_bcopy = iface->super.config.seg_size;
    iface_attr->cap.get.min_zcopy = iface->super.config.max_inl_resp + 1;
    iface_attr->cap.get.max_zcopy = uct_ib_iface_port_attr(&iface->super)->max_msg_sz;
    iface_attr->cap.get.max_iov   = uct_ib_iface_get_max_iov(&iface->super);

    /* AM */
    iface_attr->cap.am.max_short  = verbs_iface->config.max_inline - sizeof(uct_rc_hdr_t);
    iface_attr->cap.am.max_bcopy  = iface->super.config.seg_size - sizeof(uct_rc_hdr_t);
    iface_attr->cap.am.min_zcopy  = 0;
    iface_attr->cap.am.max_zcopy  = iface->super.config.seg_size - sizeof(uct_rc_hdr_t);
    /* The first IOV is reserved for the header */
    iface_attr->cap.am.max_iov    = uct_ib_iface_get_max_iov(&iface->super) - 1;

    /* TODO: may need to change for dc/rc */
    iface_attr->cap.am.max_hdr    = verbs_iface->config.short_desc_size - sizeof(uct_rc_hdr_t);

    iface_attr->cap.flags        |= UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE;

    /* Software overhead */
    iface_attr->overhead          = 75e-9;
}
Example #3
0
static ucs_status_t uct_rc_verbs_iface_query(uct_iface_h tl_iface, uct_iface_attr_t *iface_attr)
{
    uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_iface, uct_rc_verbs_iface_t);
    struct ibv_exp_device_attr *dev_attr =
                    &uct_ib_iface_device(&iface->super.super)->dev_attr;

    uct_rc_iface_query(&iface->super, iface_attr);

    /* PUT */
    iface_attr->cap.put.max_short = iface->config.max_inline;
    iface_attr->cap.put.max_bcopy = iface->super.super.config.seg_size;
    iface_attr->cap.put.max_zcopy =
                        uct_ib_iface_port_attr(&iface->super.super)->max_msg_sz;

    /* GET */
    iface_attr->cap.get.max_bcopy = iface->super.super.config.seg_size;
    iface_attr->cap.get.max_zcopy =
                    uct_ib_iface_port_attr(&iface->super.super)->max_msg_sz;

    /* AM */
    iface_attr->cap.am.max_short  = iface->config.max_inline
                                        - sizeof(uct_rc_hdr_t);
    iface_attr->cap.am.max_bcopy  = iface->super.super.config.seg_size
                                        - sizeof(uct_rc_hdr_t);

    iface_attr->cap.am.max_zcopy  = iface->super.super.config.seg_size
                                            - sizeof(uct_rc_hdr_t);
    iface_attr->cap.am.max_hdr    = iface->config.short_desc_size
                                            - sizeof(uct_rc_hdr_t);

    /*
     * Atomics.
     * Need to make sure device support at least one kind of atomics.
     */
    if (IBV_EXP_HAVE_ATOMIC_HCA(dev_attr) ||
        IBV_EXP_HAVE_ATOMIC_GLOB(dev_attr) ||
        IBV_EXP_HAVE_ATOMIC_HCA_REPLY_BE(dev_attr))
    {
        iface_attr->cap.flags |= UCT_IFACE_FLAG_ATOMIC_ADD64 |
                                 UCT_IFACE_FLAG_ATOMIC_FADD64 |
                                 UCT_IFACE_FLAG_ATOMIC_CSWAP64;

        if (uct_rc_verbs_is_ext_atomic_supported(dev_attr, sizeof(uint32_t))) {
            iface_attr->cap.flags |= UCT_IFACE_FLAG_ATOMIC_ADD32 |
                                     UCT_IFACE_FLAG_ATOMIC_FADD32 |
                                     UCT_IFACE_FLAG_ATOMIC_SWAP32 |
                                     UCT_IFACE_FLAG_ATOMIC_CSWAP32;
        }

        if (uct_rc_verbs_is_ext_atomic_supported(dev_attr, sizeof(uint64_t))) {
            iface_attr->cap.flags |= UCT_IFACE_FLAG_ATOMIC_SWAP64;
        }
    }
    return UCS_OK;
}
Example #4
0
ucs_status_t uct_ib_iface_get_device_address(uct_iface_h tl_iface,
                                             uct_device_addr_t *dev_addr)
{
    uct_ib_iface_t *iface = ucs_derived_of(tl_iface, uct_ib_iface_t);
    uct_ib_address_pack(uct_ib_iface_device(iface), iface->addr_type,
                        &iface->gid, uct_ib_iface_port_attr(iface)->lid,
                        (void*)dev_addr);
    return UCS_OK;
}
Example #5
0
ucs_status_t uct_ib_iface_get_address(uct_iface_h tl_iface, struct sockaddr *addr)
{
    uct_ib_iface_t *iface = ucs_derived_of(tl_iface, uct_ib_iface_t);
    uct_sockaddr_ib_t *ib_addr = (uct_sockaddr_ib_t*)addr;

    /* TODO LMC */
    ib_addr->sib_family    = UCT_AF_INFINIBAND;
    ib_addr->lid           = uct_ib_iface_port_attr(iface)->lid;
    ib_addr->id            = 0;
    ib_addr->guid          = iface->gid.global.interface_id;
    ib_addr->subnet_prefix = iface->gid.global.subnet_prefix;
    ib_addr->qp_num        = 0;
    return UCS_OK;
}
Example #6
0
static ucs_status_t uct_ib_iface_init_pkey(uct_ib_iface_t *iface,
                                           const uct_ib_iface_config_t *config)
{
    uct_ib_device_t *dev = uct_ib_iface_device(iface);
    uint16_t pkey_tbl_len = uct_ib_iface_port_attr(iface)->pkey_tbl_len;
    uint16_t pkey_index, port_pkey, pkey;

    if (config->pkey_value > UCT_IB_PKEY_PARTITION_MASK) {
        ucs_error("Requested pkey 0x%x is invalid, should be in the range 0..0x%x",
                  config->pkey_value, UCT_IB_PKEY_PARTITION_MASK);
        return UCS_ERR_INVALID_PARAM;
    }

    /* get the user's pkey value and find its index in the port's pkey table */
    for (pkey_index = 0; pkey_index < pkey_tbl_len; ++pkey_index) {
        /* get the pkey values from the port's pkeys table */
        if (ibv_query_pkey(dev->ibv_context, iface->config.port_num, pkey_index,
                           &port_pkey))
        {
            ucs_error("ibv_query_pkey("UCT_IB_IFACE_FMT", index=%d) failed: %m",
                      UCT_IB_IFACE_ARG(iface), pkey_index);
        }

        pkey = ntohs(port_pkey);
        if (!(pkey & UCT_IB_PKEY_MEMBERSHIP_MASK)) {
            ucs_debug("skipping send-only pkey[%d]=0x%x", pkey_index, pkey);
            continue;
        }

        /* take only the lower 15 bits for the comparison */
        if ((pkey & UCT_IB_PKEY_PARTITION_MASK) == config->pkey_value) {
            iface->pkey_index = pkey_index;
            iface->pkey_value = pkey;
            ucs_debug("using pkey[%d] 0x%x on "UCT_IB_IFACE_FMT, iface->pkey_index,
                      iface->pkey_value, UCT_IB_IFACE_ARG(iface));
            return UCS_OK;
        }
    }

    ucs_error("The requested pkey: 0x%x, cannot be used. "
              "It wasn't found or the configured pkey doesn't have full membership.",
              config->pkey_value);
    return UCS_ERR_INVALID_PARAM;
}
Example #7
0
ucs_status_t uct_ib_iface_query(uct_ib_iface_t *iface, size_t xport_hdr_len,
                                uct_iface_attr_t *iface_attr)
{
    uct_ib_device_t *dev = uct_ib_iface_device(iface);
    static const unsigned ib_port_widths[] = {
        [0] = 1,
        [1] = 4,
        [2] = 8,
        [3] = 12
    };
    uint8_t active_width, active_speed, active_mtu;
    double encoding, signal_rate, wire_speed;
    size_t mtu, width, extra_pkt_len;
    int i = 0;

    active_width = uct_ib_iface_port_attr(iface)->active_width;
    active_speed = uct_ib_iface_port_attr(iface)->active_speed;
    active_mtu   = uct_ib_iface_port_attr(iface)->active_mtu;

    /* Get active width */
    if (!ucs_is_pow2(active_width) ||
        (active_width < 1) || (ucs_ilog2(active_width) > 3))
    {
        ucs_error("Invalid active_width on %s:%d: %d",
                  UCT_IB_IFACE_ARG(iface), active_width);
        return UCS_ERR_IO_ERROR;
    }

    memset(iface_attr, 0, sizeof(*iface_attr));

    iface_attr->device_addr_len = iface->addr_size;

    switch (active_speed) {
    case 1: /* SDR */
        iface_attr->latency = 5000e-9;
        signal_rate         = 2.5e9;
        encoding            = 8.0/10.0;
        break;
    case 2: /* DDR */
        iface_attr->latency = 2500e-9;
        signal_rate         = 5.0e9;
        encoding            = 8.0/10.0;
        break;
    case 4: /* QDR */
        iface_attr->latency = 1300e-9;
        signal_rate         = 10.0e9;
        encoding            = 8.0/10.0;
        break;
    case 8: /* FDR10 */
        iface_attr->latency = 700e-9;
        signal_rate         = 10.3125e9;
        encoding            = 64.0/66.0;
        break;
    case 16: /* FDR */
        iface_attr->latency = 700e-9;
        signal_rate         = 14.0625e9;
        encoding            = 64.0/66.0;
        break;
    case 32: /* EDR */
        iface_attr->latency = 600e-9;
        signal_rate         = 25.78125e9;
        encoding            = 64.0/66.0;
        break;
    default:
        ucs_error("Invalid active_speed on %s:%d: %d",
                  UCT_IB_IFACE_ARG(iface), active_speed);
        return UCS_ERR_IO_ERROR;
    }

    /* Wire speed calculation: Width * SignalRate * Encoding */
    width                 = ib_port_widths[ucs_ilog2(active_width)];
    wire_speed            = (width * signal_rate * encoding) / 8.0;

    /* Calculate packet overhead  */
    mtu                   = ucs_min(uct_ib_mtu_value(active_mtu),
                                    iface->config.seg_size);

    extra_pkt_len = UCT_IB_BTH_LEN + xport_hdr_len +  UCT_IB_ICRC_LEN + UCT_IB_VCRC_LEN + UCT_IB_DELIM_LEN;

    if (IBV_PORT_IS_LINK_LAYER_ETHERNET(uct_ib_iface_port_attr(iface))) {
        extra_pkt_len += UCT_IB_GRH_LEN + UCT_IB_ROCE_LEN;
    } else {
        /* TODO check if UCT_IB_DELIM_LEN is present in RoCE as well */
        extra_pkt_len += UCT_IB_LRH_LEN;
    }

    iface_attr->bandwidth = (wire_speed * mtu) / (mtu + extra_pkt_len);

    /* Set priority of current device */
    iface_attr->priority = 0;
    while (uct_ib_device_info_table[i].vendor_part_id != 0) {
        if (uct_ib_device_info_table[i].vendor_part_id == dev->dev_attr.vendor_part_id) {
            iface_attr->priority = uct_ib_device_info_table[i].priority;
            break;
        }
        i++;
    }

    return UCS_OK;
}
Example #8
0
/**
 * @param rx_headroom   Headroom requested by the user.
 * @param rx_priv_len   Length of transport private data to reserve (0 if unused)
 * @param rx_hdr_len    Length of transport network header.
 * @param mss           Maximal segment size (transport limit).
 */
UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md,
                    uct_worker_h worker, const uct_iface_params_t *params,
                    unsigned rx_priv_len, unsigned rx_hdr_len, unsigned tx_cq_len,
                    size_t mss, const uct_ib_iface_config_t *config)
{
    uct_ib_device_t *dev = &ucs_derived_of(md, uct_ib_md_t)->dev;
    ucs_status_t status;
    uint8_t port_num;

    UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &ops->super, md, worker,
                              &config->super UCS_STATS_ARG(dev->stats));

    status = uct_ib_device_find_port(dev, params->dev_name, &port_num);
    if (status != UCS_OK) {
        goto err;
    }

    self->ops                      = ops;

    self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) +
                                     ucs_max(sizeof(uct_am_recv_desc_t) +
                                             params->rx_headroom,
                                             rx_priv_len + rx_hdr_len);
    self->config.rx_hdr_offset     = self->config.rx_payload_offset - rx_hdr_len;
    self->config.rx_headroom_offset= self->config.rx_payload_offset -
                                     params->rx_headroom;
    self->config.seg_size          = ucs_min(mss, config->super.max_bcopy);
    self->config.tx_max_poll       = config->tx.max_poll;
    self->config.rx_max_poll       = config->rx.max_poll;
    self->config.rx_max_batch      = ucs_min(config->rx.max_batch,
                                             config->rx.queue_len / 4);
    self->config.port_num          = port_num;
    self->config.sl                = config->sl;
    self->config.gid_index         = config->gid_index;

    status = uct_ib_iface_init_pkey(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_device_query_gid(dev, self->config.port_num,
                                     self->config.gid_index, &self->gid);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_iface_init_lmc(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    self->comp_channel = ibv_create_comp_channel(dev->ibv_context);
    if (self->comp_channel == NULL) {
        ucs_error("ibv_create_comp_channel() failed: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_free_path_bits;
    }

    status = ucs_sys_fcntl_modfl(self->comp_channel->fd, O_NONBLOCK, 0);
    if (status != UCS_OK) {
        goto err_destroy_comp_channel;
    }

    status = uct_ib_iface_create_cq(self, tx_cq_len, 0, &self->send_cq);
    if (status != UCS_OK) {
        goto err_destroy_comp_channel;
    }

    status = uct_ib_iface_create_cq(self, config->rx.queue_len, config->rx.inl,
                                    &self->recv_cq);
    if (status != UCS_OK) {
        goto err_destroy_send_cq;
    }

    /* Address scope and size */
    if (config->addr_type == UCT_IB_IFACE_ADDRESS_TYPE_AUTO) {
        if (IBV_PORT_IS_LINK_LAYER_ETHERNET(uct_ib_iface_port_attr(self))) {
            self->addr_type = UCT_IB_ADDRESS_TYPE_ETH;
        } else {
            self->addr_type = uct_ib_address_scope(self->gid.global.subnet_prefix);
        }
    } else {
        ucs_assert(config->addr_type < UCT_IB_ADDRESS_TYPE_LAST);
        self->addr_type = config->addr_type;
    }

    self->addr_size  = uct_ib_address_size(self->addr_type);

    ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d",
              self->config.rx_headroom_offset, self->config.rx_payload_offset,
              self->config.rx_hdr_offset, self->config.seg_size);

    return UCS_OK;

err_destroy_send_cq:
    ibv_destroy_cq(self->send_cq);
err_destroy_comp_channel:
    ibv_destroy_comp_channel(self->comp_channel);
err_free_path_bits:
    ucs_free(self->path_bits);
err:
    return status;
}
Example #9
0
static ucs_status_t uct_ib_iface_init_lmc(uct_ib_iface_t *iface,
                                          const uct_ib_iface_config_t *config)
{
    unsigned i, j, num_path_bits;
    unsigned first, last;
    uint8_t lmc;
    int step;

    if (config->lid_path_bits.count == 0) {
        ucs_error("List of path bits must not be empty");
        return UCS_ERR_INVALID_PARAM;
    }

    /* count the number of lid_path_bits */
    num_path_bits = 0;
    for (i = 0; i < config->lid_path_bits.count; i++) {
        num_path_bits += 1 + abs(config->lid_path_bits.ranges[i].first -
                                 config->lid_path_bits.ranges[i].last);
    }

    iface->path_bits = ucs_calloc(1, num_path_bits * sizeof(*iface->path_bits),
                                  "ib_path_bits");
    if (iface->path_bits == NULL) {
        return UCS_ERR_NO_MEMORY;
    }

    lmc = uct_ib_iface_port_attr(iface)->lmc;

    /* go over the list of values (ranges) for the lid_path_bits and set them */
    iface->path_bits_count = 0;
    for (i = 0; i < config->lid_path_bits.count; ++i) {

        first = config->lid_path_bits.ranges[i].first;
        last  = config->lid_path_bits.ranges[i].last;

        /* range of values or one value */
        if (first < last) {
            step = 1;
        } else {
            step = -1;
        }

        /* fill the value/s */
        for (j = first; j != (last + step); j += step) {
            if (j >= UCS_BIT(lmc)) {
                ucs_debug("Not using value %d for path_bits - must be < 2^lmc (lmc=%d)",
                          j, lmc);
                if (step == 1) {
                    break;
                } else {
                    continue;
                }
            }

            ucs_assert(iface->path_bits_count <= num_path_bits);
            iface->path_bits[iface->path_bits_count] = j;
            iface->path_bits_count++;
        }
    }

    return UCS_OK;
}
Example #10
0
ucs_status_t uct_ib_iface_query(uct_ib_iface_t *iface, size_t xport_hdr_len,
                                uct_iface_attr_t *iface_attr)
{
    uct_ib_device_t *dev = uct_ib_iface_device(iface);
    static const unsigned ib_port_widths[] = {
        [0] = 1,
        [1] = 4,
        [2] = 8,
        [3] = 12
    };
    uint8_t active_width, active_speed, active_mtu;
    double encoding, signal_rate, wire_speed;
    size_t mtu, width, extra_pkt_len;

    if (!uct_ib_device_is_port_ib(dev, iface->port_num)) {
        return UCS_ERR_UNSUPPORTED;
    }

    active_width = uct_ib_iface_port_attr(iface)->active_width;
    active_speed = uct_ib_iface_port_attr(iface)->active_speed;
    active_mtu   = uct_ib_iface_port_attr(iface)->active_mtu;

    /* Get active width */
    if (!ucs_is_pow2(active_width) ||
            (active_width < 1) || (ucs_ilog2(active_width) > 3))
    {
        ucs_error("Invalid active_width on %s:%d: %d",
                  uct_ib_device_name(dev), iface->port_num, active_width);
        return UCS_ERR_IO_ERROR;
    }

    memset(iface_attr, 0, sizeof(*iface_attr));

    iface_attr->device_addr_len = iface->addr_size;

    switch (active_speed) {
    case 1: /* SDR */
        iface_attr->latency = 5000e-9;
        signal_rate         = 2.5e9;
        encoding            = 8.0/10.0;
        break;
    case 2: /* DDR */
        iface_attr->latency = 2500e-9;
        signal_rate         = 5.0e9;
        encoding            = 8.0/10.0;
        break;
    case 4: /* QDR */
        iface_attr->latency = 1300e-9;
        signal_rate         = 10.0e9;
        encoding            = 8.0/10.0;
        break;
    case 8: /* FDR10 */
        iface_attr->latency = 700e-9;
        signal_rate         = 10.3125e9;
        encoding            = 64.0/66.0;
        break;
    case 16: /* FDR */
        iface_attr->latency = 700e-9;
        signal_rate         = 14.0625e9;
        encoding            = 64.0/66.0;
        break;
    case 32: /* EDR */
        iface_attr->latency = 600e-9;
        signal_rate         = 25.78125e9;
        encoding            = 64.0/66.0;
        break;
    default:
        ucs_error("Invalid active_speed on %s:%d: %d",
                  uct_ib_device_name(dev), iface->port_num, active_speed);
        return UCS_ERR_IO_ERROR;
    }

    /* Wire speed calculation: Width * SignalRate * Encoding */
    width                 = ib_port_widths[ucs_ilog2(active_width)];
    wire_speed            = (width * signal_rate * encoding) / 8.0;

    /* Calculate packet overhead  */
    mtu                   = ucs_min(uct_ib_mtu_value(active_mtu),
                                    iface->config.seg_size);
    extra_pkt_len         = UCT_IB_LRH_LEN + UCT_IB_BTH_LEN + xport_hdr_len +
                            UCT_IB_ICRC_LEN + UCT_IB_VCRC_LEN + UCT_IB_DELIM_LEN;
    iface_attr->bandwidth = (wire_speed * mtu) / (mtu + extra_pkt_len);
    return UCS_OK;
}