int ipoib_init_qp(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; struct ib_qp_attr qp_attr; int attr_mask; if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) return -1; qp_attr.qp_state = IB_QPS_INIT; qp_attr.qkey = 0; qp_attr.port_num = priv->port; qp_attr.pkey_index = priv->pkey_index; attr_mask = IB_QP_QKEY | IB_QP_PORT | IB_QP_PKEY_INDEX | IB_QP_STATE; ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to init, ret = %d\n", ret); goto out_fail; } qp_attr.qp_state = IB_QPS_RTR; /* Can't set this in a INIT->RTR transition */ attr_mask &= ~IB_QP_PORT; ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to RTR, ret = %d\n", ret); goto out_fail; } qp_attr.qp_state = IB_QPS_RTS; qp_attr.sq_psn = 0; attr_mask |= IB_QP_SQ_PSN; attr_mask &= ~IB_QP_PKEY_INDEX; ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to RTS, ret = %d\n", ret); goto out_fail; } return 0; out_fail: qp_attr.qp_state = IB_QPS_RESET; if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) ipoib_warn(priv, "Failed to modify QP to RESET state\n"); return ret; }
static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct net_device *pdev; struct ipoib_dev_priv *ppriv; u16 child_pkey; int err; if (!tb[IFLA_LINK]) return -EINVAL; pdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (!pdev || pdev->type != ARPHRD_INFINIBAND) return -ENODEV; ppriv = ipoib_priv(pdev); if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) { ipoib_warn(ppriv, "child creation disallowed for child devices\n"); return -EINVAL; } if (!data || !data[IFLA_IPOIB_PKEY]) { ipoib_dbg(ppriv, "no pkey specified, using parent pkey\n"); child_pkey = ppriv->pkey; } else child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]); err = ipoib_intf_init(ppriv->ca, ppriv->port, dev->name, dev); if (err) { ipoib_warn(ppriv, "failed to initialize pkey device\n"); return err; } err = __ipoib_vlan_add(ppriv, ipoib_priv(dev), child_pkey, IPOIB_RTNL_CHILD); if (err) return err; if (data) { err = ipoib_changelink(dev, tb, data, extack); if (err) { unregister_netdevice(dev); return err; } } return 0; }
static int ipoib_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; /* * These values are saved in the private data and returned * when ipoib_get_coalesce() is called */ if (coal->rx_coalesce_usecs > 0xffff || coal->rx_max_coalesced_frames > 0xffff) return -EINVAL; ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames, coal->rx_coalesce_usecs); if (ret && ret != -ENOSYS) { ipoib_warn(priv, "failed modifying CQ (%d)\n", ret); return ret; } priv->ethtool.coalesce_usecs = coal->rx_coalesce_usecs; priv->ethtool.max_coalesced_frames = coal->rx_max_coalesced_frames; return 0; }
static int ipoib_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; /* * Since IPoIB uses a single CQ for both rx and tx, we assume * that rx params dictate the configuration. These values are * saved in the private data and returned when ipoib_get_coalesce() * is called. */ if (coal->rx_coalesce_usecs > 0xffff || coal->rx_max_coalesced_frames > 0xffff) return -EINVAL; ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames, coal->rx_coalesce_usecs); if (ret && ret != -ENOSYS) { ipoib_warn(priv, "failed modifying CQ (%d)\n", ret); return ret; } coal->tx_coalesce_usecs = coal->rx_coalesce_usecs; coal->tx_max_coalesced_frames = coal->rx_max_coalesced_frames; priv->ethtool.coalesce_usecs = coal->rx_coalesce_usecs; priv->ethtool.max_coalesced_frames = coal->rx_max_coalesced_frames; return 0; }
static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, struct ipoib_cm_rx *rx, struct ib_recv_wr *wr, struct ib_sge *sge, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_recv_wr *bad_wr; int i, ret; wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; for (i = 0; i < IPOIB_CM_RX_SG; ++i) sge[i].addr = rx->rx_ring[id].mapping[i]; ret = ib_post_recv(rx->qp, wr, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, rx->rx_ring[id].mapping); dev_kfree_skb_any(rx->rx_ring[id].skb); rx->rx_ring[id].skb = NULL; } return ret; }
int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_attr *qp_attr; int attr_mask; int ret; u16 pkey_index; ret = -ENOMEM; qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); if (!qp_attr) goto out; if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ret = -ENXIO; goto out; } set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); /* set correct QKey for QP */ qp_attr->qkey = priv->qkey; attr_mask = IB_QP_QKEY; ret = ib_modify_qp(priv->qp, qp_attr, attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); goto out; } /* attach QP to multicast group */ down(&priv->mcast_mutex); ret = ib_attach_mcast(priv->qp, mgid, mlid); up(&priv->mcast_mutex); if (ret) ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret); out: kfree(qp_attr); return ret; }
int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; mutex_lock(&priv->mcast_mutex); ret = ib_detach_mcast(priv->qp, mgid, mlid); mutex_unlock(&priv->mcast_mutex); if (ret) ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); return ret; }
int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca, union ib_gid *mgid, u16 mlid, int set_qkey, u32 qkey) { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_qp_attr *qp_attr = NULL; int ret; u16 pkey_index; if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ret = -ENXIO; goto out; } set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); if (set_qkey) { ret = -ENOMEM; qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL); if (!qp_attr) goto out; /* set correct QKey for QP */ qp_attr->qkey = qkey; ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY); if (ret) { ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); goto out; } } /* attach QP to multicast group */ ret = ib_attach_mcast(priv->qp, mgid, mlid); if (ret) ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret); out: kfree(qp_attr); return ret; }
static int ipoib_set_tso(struct net_device *dev, u32 data) { struct ipoib_dev_priv *priv = netdev_priv(dev); if (data) { if (!test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags) && (dev->features & NETIF_F_SG) && (priv->hca_caps & IB_DEVICE_UD_TSO)) { dev->features |= NETIF_F_TSO; } else { ipoib_warn(priv, "can't set TSO on\n"); return -EOPNOTSUPP; } } else dev->features &= ~NETIF_F_TSO; return 0; }
static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net_device *pdev; struct ipoib_dev_priv *ppriv; u16 child_pkey; int err; if (!tb[IFLA_LINK]) return -EINVAL; pdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (!pdev) return -ENODEV; ppriv = netdev_priv(pdev); if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) { ipoib_warn(ppriv, "child creation disallowed for child devices\n"); return -EINVAL; } if (!data || !data[IFLA_IPOIB_PKEY]) { ipoib_dbg(ppriv, "no pkey specified, using parent pkey\n"); child_pkey = ppriv->pkey; } else child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]); if (child_pkey == 0 || child_pkey == 0x8000) return -EINVAL; /* * Set the full membership bit, so that we join the right * broadcast group, etc. */ child_pkey |= 0x8000; err = __ipoib_vlan_add(ppriv, netdev_priv(dev), child_pkey, IPOIB_RTNL_CHILD); if (!err && data) err = ipoib_changelink(dev, tb, data); return err; }
static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) { struct ib_send_wr *bad_wr; struct ipoib_cm_rx *p; /* We only reserved 1 extra slot in CQ for drain WRs, so * make sure we have at most 1 outstanding WR. */ if (list_empty(&priv->cm.rx_flush_list) || !list_empty(&priv->cm.rx_drain_list)) return; /* * QPs on flush list are error state. This way, a "flush * error" WC will be immediately generated for each WR we post. */ p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list); if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr)) ipoib_warn(priv, "failed to post drain wr\n"); list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list); }
static int ipoib_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; if (coal->rx_coalesce_usecs > 0xffff || coal->rx_max_coalesced_frames > 0xffff) return -EINVAL; ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames, coal->rx_coalesce_usecs); if (ret && ret != -ENOSYS) { ipoib_warn(priv, "failed modifying CQ (%d)\n", ret); return ret; } priv->ethtool.coalesce_usecs = coal->rx_coalesce_usecs; priv->ethtool.max_coalesced_frames = coal->rx_max_coalesced_frames; return 0; }
static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_recv_wr *bad_wr; int i, ret; priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; for (i = 0; i < priv->cm.num_frags; ++i) priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1, priv->cm.srq_ring[id].mapping); dev_kfree_skb_any(priv->cm.srq_ring[id].skb); priv->cm.srq_ring[id].skb = NULL; } return ret; }
int ipoib_init_qp(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; u16 pkey_index; struct ib_qp_attr qp_attr; int attr_mask; /* * Search through the port P_Key table for the requested pkey value. * The port has to be assigned to the respective IB partition in * advance. */ ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index); if (ret) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); return ret; } set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); qp_attr.qp_state = IB_QPS_INIT; qp_attr.qkey = 0; qp_attr.port_num = priv->port; qp_attr.pkey_index = pkey_index; attr_mask = IB_QP_QKEY | IB_QP_PORT | IB_QP_PKEY_INDEX | IB_QP_STATE; ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to init, ret = %d\n", ret); goto out_fail; } qp_attr.qp_state = IB_QPS_RTR; /* Can't set this in a INIT->RTR transition */ attr_mask &= ~IB_QP_PORT; ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to RTR, ret = %d\n", ret); goto out_fail; } qp_attr.qp_state = IB_QPS_RTS; qp_attr.sq_psn = 0; attr_mask |= IB_QP_SQ_PSN; attr_mask &= ~IB_QP_PKEY_INDEX; ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to RTS, ret = %d\n", ret); goto out_fail; } return 0; out_fail: qp_attr.qp_state = IB_QPS_RESET; if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) ipoib_warn(priv, "Failed to modify QP to RESET state\n"); return ret; }
static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, struct ipoib_cm_rx *p) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_init_attr attr = { .event_handler = ipoib_cm_rx_event_handler, .send_cq = priv->recv_cq, /* For drain WR */ .recv_cq = priv->recv_cq, .srq = priv->cm.srq, .cap.max_send_wr = 1, /* For drain WR */ .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_RC, .qp_context = p, }; if (!ipoib_cm_has_srq(dev)) { attr.cap.max_recv_wr = ipoib_recvq_size; attr.cap.max_recv_sge = IPOIB_CM_RX_SG; } return ib_create_qp(priv->pd, &attr); } static int ipoib_cm_modify_rx_qp(struct net_device *dev, struct ib_cm_id *cm_id, struct ib_qp *qp, unsigned psn) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_attr qp_attr; int qp_attr_mask, ret; qp_attr.qp_state = IB_QPS_INIT; ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret); return ret; } ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret); return ret; } qp_attr.qp_state = IB_QPS_RTR; ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); return ret; } qp_attr.rq_psn = psn; ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); return ret; } /* * Current Mellanox HCA firmware won't generate completions * with error for drain WRs unless the QP has been moved to * RTS first. This work-around leaves a window where a QP has * moved to error asynchronously, but this will eventually get * fixed in firmware, so let's not error out if modify QP * fails. */ qp_attr.qp_state = IB_QPS_RTS; ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret); return 0; } ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret); return 0; } return 0; } static void ipoib_cm_init_rx_wr(struct net_device *dev, struct ib_recv_wr *wr, struct ib_sge *sge) { struct ipoib_dev_priv *priv = netdev_priv(dev); int i; for (i = 0; i < priv->cm.num_frags; ++i) sge[i].lkey = priv->mr->lkey; sge[0].length = IPOIB_CM_HEAD_SIZE; for (i = 1; i < priv->cm.num_frags; ++i) sge[i].length = PAGE_SIZE; wr->next = NULL; wr->sg_list = sge; wr->num_sge = priv->cm.num_frags; } static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id, struct ipoib_cm_rx *rx) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct { struct ib_recv_wr wr; struct ib_sge sge[IPOIB_CM_RX_SG]; } *t; int ret; int i; <<<<<<< HEAD rx->rx_ring = vzalloc(ipoib_recvq_size * sizeof *rx->rx_ring); =======