Example #1
0
int ipoib_init_qp(struct net_device *dev)
{
    struct ipoib_dev_priv *priv = netdev_priv(dev);
    int ret;
    struct ib_qp_attr qp_attr;
    int attr_mask;

    if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
        return -1;

    qp_attr.qp_state = IB_QPS_INIT;
    qp_attr.qkey = 0;
    qp_attr.port_num = priv->port;
    qp_attr.pkey_index = priv->pkey_index;
    attr_mask =
        IB_QP_QKEY |
        IB_QP_PORT |
        IB_QP_PKEY_INDEX |
        IB_QP_STATE;
    ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
    if (ret) {
        ipoib_warn(priv, "failed to modify QP to init, ret = %d\n", ret);
        goto out_fail;
    }

    qp_attr.qp_state = IB_QPS_RTR;
    /* Can't set this in a INIT->RTR transition */
    attr_mask &= ~IB_QP_PORT;
    ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
    if (ret) {
        ipoib_warn(priv, "failed to modify QP to RTR, ret = %d\n", ret);
        goto out_fail;
    }

    qp_attr.qp_state = IB_QPS_RTS;
    qp_attr.sq_psn = 0;
    attr_mask |= IB_QP_SQ_PSN;
    attr_mask &= ~IB_QP_PKEY_INDEX;
    ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
    if (ret) {
        ipoib_warn(priv, "failed to modify QP to RTS, ret = %d\n", ret);
        goto out_fail;
    }

    return 0;

out_fail:
    qp_attr.qp_state = IB_QPS_RESET;
    if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
        ipoib_warn(priv, "Failed to modify QP to RESET state\n");

    return ret;
}
Example #2
0
static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
				struct nlattr *tb[], struct nlattr *data[],
				struct netlink_ext_ack *extack)
{
	struct net_device *pdev;
	struct ipoib_dev_priv *ppriv;
	u16 child_pkey;
	int err;

	if (!tb[IFLA_LINK])
		return -EINVAL;

	pdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
	if (!pdev || pdev->type != ARPHRD_INFINIBAND)
		return -ENODEV;

	ppriv = ipoib_priv(pdev);

	if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) {
		ipoib_warn(ppriv, "child creation disallowed for child devices\n");
		return -EINVAL;
	}

	if (!data || !data[IFLA_IPOIB_PKEY]) {
		ipoib_dbg(ppriv, "no pkey specified, using parent pkey\n");
		child_pkey  = ppriv->pkey;
	} else
		child_pkey  = nla_get_u16(data[IFLA_IPOIB_PKEY]);

	err = ipoib_intf_init(ppriv->ca, ppriv->port, dev->name, dev);
	if (err) {
		ipoib_warn(ppriv, "failed to initialize pkey device\n");
		return err;
	}

	err = __ipoib_vlan_add(ppriv, ipoib_priv(dev),
			       child_pkey, IPOIB_RTNL_CHILD);
	if (err)
		return err;

	if (data) {
		err = ipoib_changelink(dev, tb, data, extack);
		if (err) {
			unregister_netdevice(dev);
			return err;
		}
	}

	return 0;
}
Example #3
0
static int ipoib_set_coalesce(struct net_device *dev,
			      struct ethtool_coalesce *coal)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	int ret;

	/*
	 * These values are saved in the private data and returned
	 * when ipoib_get_coalesce() is called
	 */
	if (coal->rx_coalesce_usecs       > 0xffff ||
	    coal->rx_max_coalesced_frames > 0xffff)
		return -EINVAL;

	ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames,
			   coal->rx_coalesce_usecs);
	if (ret && ret != -ENOSYS) {
		ipoib_warn(priv, "failed modifying CQ (%d)\n", ret);
		return ret;
	}

	priv->ethtool.coalesce_usecs       = coal->rx_coalesce_usecs;
	priv->ethtool.max_coalesced_frames = coal->rx_max_coalesced_frames;

	return 0;
}
Example #4
0
static int ipoib_set_coalesce(struct net_device *dev,
			      struct ethtool_coalesce *coal)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	int ret;

	/*
	 * Since IPoIB uses a single CQ for both rx and tx, we assume
	 * that rx params dictate the configuration.  These values are
	 * saved in the private data and returned when ipoib_get_coalesce()
	 * is called.
	 */
	if (coal->rx_coalesce_usecs       > 0xffff ||
	    coal->rx_max_coalesced_frames > 0xffff)
		return -EINVAL;

	ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames,
			   coal->rx_coalesce_usecs);
	if (ret && ret != -ENOSYS) {
		ipoib_warn(priv, "failed modifying CQ (%d)\n", ret);
		return ret;
	}

	coal->tx_coalesce_usecs       = coal->rx_coalesce_usecs;
	coal->tx_max_coalesced_frames = coal->rx_max_coalesced_frames;
	priv->ethtool.coalesce_usecs       = coal->rx_coalesce_usecs;
	priv->ethtool.max_coalesced_frames = coal->rx_max_coalesced_frames;

	return 0;
}
static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
					struct ipoib_cm_rx *rx,
					struct ib_recv_wr *wr,
					struct ib_sge *sge, int id)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	struct ib_recv_wr *bad_wr;
	int i, ret;

	wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;

	for (i = 0; i < IPOIB_CM_RX_SG; ++i)
		sge[i].addr = rx->rx_ring[id].mapping[i];

	ret = ib_post_recv(rx->qp, wr, &bad_wr);
	if (unlikely(ret)) {
		ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret);
		ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
				      rx->rx_ring[id].mapping);
		dev_kfree_skb_any(rx->rx_ring[id].skb);
		rx->rx_ring[id].skb = NULL;
	}

	return ret;
}
Example #6
0
int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	struct ib_qp_attr *qp_attr;
	int attr_mask;
	int ret;
	u16 pkey_index;

	ret = -ENOMEM;
	qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
	if (!qp_attr)
		goto out;

	if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
		ret = -ENXIO;
		goto out;
	}
	set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);

	/* set correct QKey for QP */
	qp_attr->qkey = priv->qkey;
	attr_mask = IB_QP_QKEY;
	ret = ib_modify_qp(priv->qp, qp_attr, attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
		goto out;
	}

	/* attach QP to multicast group */
	down(&priv->mcast_mutex);
	ret = ib_attach_mcast(priv->qp, mgid, mlid);
	up(&priv->mcast_mutex);
	if (ret)
		ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret);

out:
	kfree(qp_attr);
	return ret;
}
Example #7
0
int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
{
    struct ipoib_dev_priv *priv = netdev_priv(dev);
    int ret;

    mutex_lock(&priv->mcast_mutex);
    ret = ib_detach_mcast(priv->qp, mgid, mlid);
    mutex_unlock(&priv->mcast_mutex);
    if (ret)
        ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);

    return ret;
}
Example #8
0
int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca,
		       union ib_gid *mgid, u16 mlid, int set_qkey, u32 qkey)
{
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
	struct ib_qp_attr *qp_attr = NULL;
	int ret;
	u16 pkey_index;

	if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
		ret = -ENXIO;
		goto out;
	}
	set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);

	if (set_qkey) {
		ret = -ENOMEM;
		qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
		if (!qp_attr)
			goto out;

		/* set correct QKey for QP */
		qp_attr->qkey = qkey;
		ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY);
		if (ret) {
			ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
			goto out;
		}
	}

	/* attach QP to multicast group */
	ret = ib_attach_mcast(priv->qp, mgid, mlid);
	if (ret)
		ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret);

out:
	kfree(qp_attr);
	return ret;
}
Example #9
0
static int ipoib_set_tso(struct net_device *dev, u32 data)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);

	if (data) {
		if (!test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags) &&
		    (dev->features & NETIF_F_SG) &&
		    (priv->hca_caps & IB_DEVICE_UD_TSO)) {
			dev->features |= NETIF_F_TSO;
		} else {
			ipoib_warn(priv, "can't set TSO on\n");
			return -EOPNOTSUPP;
		}
	} else
		dev->features &= ~NETIF_F_TSO;

	return 0;
}
Example #10
0
static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
			       struct nlattr *tb[], struct nlattr *data[])
{
	struct net_device *pdev;
	struct ipoib_dev_priv *ppriv;
	u16 child_pkey;
	int err;

	if (!tb[IFLA_LINK])
		return -EINVAL;

	pdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
	if (!pdev)
		return -ENODEV;

	ppriv = netdev_priv(pdev);

	if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) {
		ipoib_warn(ppriv, "child creation disallowed for child devices\n");
		return -EINVAL;
	}

	if (!data || !data[IFLA_IPOIB_PKEY]) {
		ipoib_dbg(ppriv, "no pkey specified, using parent pkey\n");
		child_pkey  = ppriv->pkey;
	} else
		child_pkey  = nla_get_u16(data[IFLA_IPOIB_PKEY]);

	if (child_pkey == 0 || child_pkey == 0x8000)
		return -EINVAL;

	/*
	 * Set the full membership bit, so that we join the right
	 * broadcast group, etc.
	 */
	child_pkey |= 0x8000;

	err = __ipoib_vlan_add(ppriv, netdev_priv(dev), child_pkey, IPOIB_RTNL_CHILD);

	if (!err && data)
		err = ipoib_changelink(dev, tb, data);
	return err;
}
static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
{
	struct ib_send_wr *bad_wr;
	struct ipoib_cm_rx *p;

	/* We only reserved 1 extra slot in CQ for drain WRs, so
	 * make sure we have at most 1 outstanding WR. */
	if (list_empty(&priv->cm.rx_flush_list) ||
	    !list_empty(&priv->cm.rx_drain_list))
		return;

	/*
	 * QPs on flush list are error state.  This way, a "flush
	 * error" WC will be immediately generated for each WR we post.
	 */
	p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
	if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
		ipoib_warn(priv, "failed to post drain wr\n");

	list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
}
static int ipoib_set_coalesce(struct net_device *dev,
			      struct ethtool_coalesce *coal)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	int ret;

	if (coal->rx_coalesce_usecs       > 0xffff ||
	    coal->rx_max_coalesced_frames > 0xffff)
		return -EINVAL;

	ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames,
			   coal->rx_coalesce_usecs);
	if (ret && ret != -ENOSYS) {
		ipoib_warn(priv, "failed modifying CQ (%d)\n", ret);
		return ret;
	}

	priv->ethtool.coalesce_usecs       = coal->rx_coalesce_usecs;
	priv->ethtool.max_coalesced_frames = coal->rx_max_coalesced_frames;

	return 0;
}
static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	struct ib_recv_wr *bad_wr;
	int i, ret;

	priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;

	for (i = 0; i < priv->cm.num_frags; ++i)
		priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];

	ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
	if (unlikely(ret)) {
		ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
		ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
				      priv->cm.srq_ring[id].mapping);
		dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
		priv->cm.srq_ring[id].skb = NULL;
	}

	return ret;
}
Example #14
0
int ipoib_init_qp(struct net_device *dev)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	int ret;
	u16 pkey_index;
	struct ib_qp_attr qp_attr;
	int attr_mask;

	/*
	 * Search through the port P_Key table for the requested pkey value.
	 * The port has to be assigned to the respective IB partition in
	 * advance.
	 */
	ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
	if (ret) {
		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
		return ret;
	}
	set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);

	qp_attr.qp_state = IB_QPS_INIT;
	qp_attr.qkey = 0;
	qp_attr.port_num = priv->port;
	qp_attr.pkey_index = pkey_index;
	attr_mask =
	    IB_QP_QKEY |
	    IB_QP_PORT |
	    IB_QP_PKEY_INDEX |
	    IB_QP_STATE;
	ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to modify QP to init, ret = %d\n", ret);
		goto out_fail;
	}

	qp_attr.qp_state = IB_QPS_RTR;
	/* Can't set this in a INIT->RTR transition */
	attr_mask &= ~IB_QP_PORT;
	ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to modify QP to RTR, ret = %d\n", ret);
		goto out_fail;
	}

	qp_attr.qp_state = IB_QPS_RTS;
	qp_attr.sq_psn = 0;
	attr_mask |= IB_QP_SQ_PSN;
	attr_mask &= ~IB_QP_PKEY_INDEX;
	ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to modify QP to RTS, ret = %d\n", ret);
		goto out_fail;
	}

	return 0;

out_fail:
	qp_attr.qp_state = IB_QPS_RESET;
	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
		ipoib_warn(priv, "Failed to modify QP to RESET state\n");

	return ret;
}
static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
					   struct ipoib_cm_rx *p)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	struct ib_qp_init_attr attr = {
		.event_handler = ipoib_cm_rx_event_handler,
		.send_cq = priv->recv_cq, /* For drain WR */
		.recv_cq = priv->recv_cq,
		.srq = priv->cm.srq,
		.cap.max_send_wr = 1, /* For drain WR */
		.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
		.sq_sig_type = IB_SIGNAL_ALL_WR,
		.qp_type = IB_QPT_RC,
		.qp_context = p,
	};

	if (!ipoib_cm_has_srq(dev)) {
		attr.cap.max_recv_wr  = ipoib_recvq_size;
		attr.cap.max_recv_sge = IPOIB_CM_RX_SG;
	}

	return ib_create_qp(priv->pd, &attr);
}

static int ipoib_cm_modify_rx_qp(struct net_device *dev,
				 struct ib_cm_id *cm_id, struct ib_qp *qp,
				 unsigned psn)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	struct ib_qp_attr qp_attr;
	int qp_attr_mask, ret;

	qp_attr.qp_state = IB_QPS_INIT;
	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret);
		return ret;
	}
	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret);
		return ret;
	}
	qp_attr.qp_state = IB_QPS_RTR;
	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
		return ret;
	}
	qp_attr.rq_psn = psn;
	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
		return ret;
	}

	/*
	 * Current Mellanox HCA firmware won't generate completions
	 * with error for drain WRs unless the QP has been moved to
	 * RTS first. This work-around leaves a window where a QP has
	 * moved to error asynchronously, but this will eventually get
	 * fixed in firmware, so let's not error out if modify QP
	 * fails.
	 */
	qp_attr.qp_state = IB_QPS_RTS;
	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
		return 0;
	}
	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
		return 0;
	}

	return 0;
}

static void ipoib_cm_init_rx_wr(struct net_device *dev,
				struct ib_recv_wr *wr,
				struct ib_sge *sge)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	int i;

	for (i = 0; i < priv->cm.num_frags; ++i)
		sge[i].lkey = priv->mr->lkey;

	sge[0].length = IPOIB_CM_HEAD_SIZE;
	for (i = 1; i < priv->cm.num_frags; ++i)
		sge[i].length = PAGE_SIZE;

	wr->next    = NULL;
	wr->sg_list = sge;
	wr->num_sge = priv->cm.num_frags;
}

static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
				   struct ipoib_cm_rx *rx)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	struct {
		struct ib_recv_wr wr;
		struct ib_sge sge[IPOIB_CM_RX_SG];
	} *t;
	int ret;
	int i;

<<<<<<< HEAD
	rx->rx_ring = vzalloc(ipoib_recvq_size * sizeof *rx->rx_ring);
=======