int ipoib_init_qp(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; struct ib_qp_attr qp_attr; int attr_mask; if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) return -1; qp_attr.qp_state = IB_QPS_INIT; qp_attr.qkey = 0; qp_attr.port_num = priv->port; qp_attr.pkey_index = priv->pkey_index; attr_mask = IB_QP_QKEY | IB_QP_PORT | IB_QP_PKEY_INDEX | IB_QP_STATE; ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to init, ret = %d\n", ret); goto out_fail; } qp_attr.qp_state = IB_QPS_RTR; /* Can't set this in a INIT->RTR transition */ attr_mask &= ~IB_QP_PORT; ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to RTR, ret = %d\n", ret); goto out_fail; } qp_attr.qp_state = IB_QPS_RTS; qp_attr.sq_psn = 0; attr_mask |= IB_QP_SQ_PSN; attr_mask &= ~IB_QP_PKEY_INDEX; ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to RTS, ret = %d\n", ret); goto out_fail; } return 0; out_fail: qp_attr.qp_state = IB_QPS_RESET; if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) ipoib_warn(priv, "Failed to modify QP to RESET state\n"); return ret; }
static int init_qp (struct ib_qp *qp) { struct ib_qp_attr qp_attr; int ret, attr_mask; struct ib_qp_init_attr init_attr; memset (&qp_attr, 0, sizeof (qp_attr)); qp_attr.qp_state = IB_QPS_INIT; qp_attr.pkey_index = 0; qp_attr.port_num = 1; qp_attr.qkey = 0; attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY; ret = ib_modify_qp (qp, &qp_attr, attr_mask); if (ret) { printk (KERN_INFO "failed to modify QP to init, ret = %d\n", ret); return 1; } qp_attr.qp_state = IB_QPS_RTR; /* Can't set this in a INIT->RTR transition */ attr_mask &= ~IB_QP_PORT; ret = ib_modify_qp(qp, &qp_attr, attr_mask); if (ret) { printk (KERN_INFO "failed to modify QP to RTR, ret = %d\n", ret); return 1; } qp_attr.qp_state = IB_QPS_RTS; qp_attr.sq_psn = 0; attr_mask |= IB_QP_SQ_PSN; attr_mask &= ~IB_QP_PKEY_INDEX; ret = ib_modify_qp(qp, &qp_attr, attr_mask); if (ret) { printk (KERN_INFO "failed to modify QP to RTS, ret = %d\n", ret); return 1; } ret = ib_query_qp (qp, &qp_attr, IB_QP_QKEY, &init_attr); if (ret) { printk (KERN_INFO "failed to query QP: %d\n", ret); return 1; } local_info.qkey = qp_attr.qkey; return 0; }
bool IBInterface::_setAttributeReadyToReceive() { /*Information needed to change the state of a queue pair through the ib_modify_qp call.*/ ib_qp_mod_t attr; memset( &attr, 0, sizeof( ib_qp_mod_t )); attr.req_state = IB_QPS_RTR; attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_LEN_4096; attr.state.rtr.dest_qp = _dests.getData()[0].qpn; attr.state.rtr.rq_psn = _dests.getData()[0].psn; attr.state.rtr.resp_res = 1; attr.state.rtr.rnr_nak_timeout = 12; attr.state.rtr.primary_av.grh_valid = 0; attr.state.rtr.primary_av.dlid = _dests.getData()[0].lid; attr.state.rtr.primary_av.sl = 0; attr.state.rtr.primary_av.path_bits = 0; attr.state.rtr.primary_av.port_num = 1; attr.state.rtr.primary_av.static_rate = IB_PATH_RECORD_RATE_10_GBS; attr.state.rtr.opts = IB_MOD_QP_LOCAL_ACK_TIMEOUT | IB_MOD_QP_RESP_RES | IB_MOD_QP_PRIMARY_AV; if( ib_modify_qp( _queuePair, &attr ) != IB_SUCCESS ) { LBERROR << "Error during modification Queue pair RTR" << std::endl; return false; } return true; }
bool IBInterface::_setAttributeReadyToSend( ) { /*Information needed to change the state of a queue pair through the ib_modify_qp call.*/ ib_qp_mod_t attr; memset( &attr, 0, sizeof( ib_qp_mod_t )); attr.req_state = IB_QPS_RTS; attr.state.rts.sq_psn = _psn; attr.state.rts.resp_res = 1; attr.state.rts.local_ack_timeout = 14; attr.state.rts.retry_cnt = 7; attr.state.rts.rnr_retry_cnt = 7; attr.state.rts.opts = IB_MOD_QP_RNR_RETRY_CNT | IB_MOD_QP_RETRY_CNT | IB_MOD_QP_LOCAL_ACK_TIMEOUT; if( ib_modify_qp( _queuePair, &attr ) != IB_SUCCESS ) { LBERROR << "Error during modification Queue pair RTS" << std::endl; return false; } return true; }
void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event) { const struct rds_ib_connect_private *dp = NULL; struct rds_ib_connection *ic = conn->c_transport_data; struct ib_qp_attr qp_attr; int err; if (event->param.conn.private_data_len >= sizeof(*dp)) { dp = event->param.conn.private_data; if (dp->dp_protocol_major) { rds_ib_set_protocol(conn, RDS_PROTOCOL(dp->dp_protocol_major, dp->dp_protocol_minor)); rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); } } if (conn->c_version < RDS_PROTOCOL(3,1)) { printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed," " no longer supported\n", &conn->c_faddr, RDS_PROTOCOL_MAJOR(conn->c_version), RDS_PROTOCOL_MINOR(conn->c_version)); rds_conn_destroy(conn); return; } else { printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n", &conn->c_faddr, RDS_PROTOCOL_MAJOR(conn->c_version), RDS_PROTOCOL_MINOR(conn->c_version), ic->i_flowctl ? ", flow control" : ""); } rds_ib_send_init_ring(ic); rds_ib_recv_init_ring(ic); rds_ib_recv_refill(conn, 1); rds_ib_tune_rnr(ic, &qp_attr); qp_attr.qp_state = IB_QPS_RTS; err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE); if (err) printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err); err = rds_ib_update_ipaddr(ic->rds_ibdev, conn->c_laddr); if (err) printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); if (dp && dp->dp_ack_seq) rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL); rds_connect_complete(conn); }
/* * Connection established. * We get here for both outgoing and incoming connection. */ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event) { const struct rds_ib_connect_private *dp = NULL; struct rds_ib_connection *ic = conn->c_transport_data; struct rds_ib_device *rds_ibdev; struct ib_qp_attr qp_attr; int err; if (event->param.conn.private_data_len >= sizeof(*dp)) { dp = event->param.conn.private_data; /* make sure it isn't empty data */ if (dp->dp_protocol_major) { rds_ib_set_protocol(conn, RDS_PROTOCOL(dp->dp_protocol_major, dp->dp_protocol_minor)); rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); } } printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n", &conn->c_faddr, RDS_PROTOCOL_MAJOR(conn->c_version), RDS_PROTOCOL_MINOR(conn->c_version), ic->i_flowctl ? ", flow control" : ""); /* * Init rings and fill recv. this needs to wait until protocol negotiation * is complete, since ring layout is different from 3.0 to 3.1. */ rds_ib_send_init_ring(ic); rds_ib_recv_init_ring(ic); /* Post receive buffers - as a side effect, this will update * the posted credit count. */ rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1); /* Tune RNR behavior */ rds_ib_tune_rnr(ic, &qp_attr); qp_attr.qp_state = IB_QPS_RTS; err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE); if (err) printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err); /* update ib_device with this local ipaddr & conn */ rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr); if (err) printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); rds_ib_add_conn(rds_ibdev, conn); /* If the peer gave us the last packet it saw, process this as if * we had received a regular ACK. */ if (dp && dp->dp_ack_seq) rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL); rds_connect_complete(conn); }
/* * Tune RNR behavior. Without flow control, we use a rather * low timeout, but not the absolute minimum - this should * be tunable. * * We already set the RNR retry count to 7 (which is the * smallest infinite number :-) above. * If flow control is off, we want to change this back to 0 * so that we learn quickly when our credit accounting is * buggy. * * Caller passes in a qp_attr pointer - don't waste stack spacv * by allocation this twice. */ static void rds_ib_tune_rnr(struct rds_ib_connection *ic, struct ib_qp_attr *attr) { int ret; attr->min_rnr_timer = IB_RNR_TIMER_000_32; ret = ib_modify_qp(ic->i_cm_id->qp, attr, IB_QP_MIN_RNR_TIMER); if (ret) printk(KERN_NOTICE "ib_modify_qp(IB_QP_MIN_RNR_TIMER): err=%d\n", -ret); }
bool IBInterface::create( IBAdapter* adapter, IBCompletionQueue* completionQueue, IBConnection* ibConnection ) { _completionQueue = completionQueue; _ibConnection = ibConnection; // memory block Write for ( int i = 0; i < EQ_NUMBLOCKMEMORY ; i++ ) { if ( !_writeBlocks[i]->create( adapter->getProtectionDomain(), EQ_MAXBLOCKBUFFER )) return false; // memory block Read if ( !_readBlocks[i]->create( adapter->getProtectionDomain(), EQ_MAXBLOCKBUFFER )) return false; } _createQueuePair( adapter ); ib_qp_mod_t queuePairModify; memset( &queuePairModify, 0, sizeof( ib_qp_mod_t )); queuePairModify.req_state = IB_QPS_INIT; queuePairModify.state.init.pkey_index = 0; queuePairModify.state.init.primary_port = 1; // Indicates the type of access is permitted on resources such as QPs, // memory regions and memory windows. queuePairModify.state.init.access_ctrl = IB_AC_RDMA_WRITE | IB_AC_LOCAL_WRITE ; ib_api_status_t ibStatus; ibStatus = ib_modify_qp( _queuePair, &queuePairModify ); if ( ibStatus != IB_SUCCESS ) { LBERROR << "cannot modify a queue pair" << std::endl; return false; } ibStatus = ib_query_qp( _queuePair, &_queuePairAttr ); if ( ibStatus != IB_SUCCESS ) { LBERROR << "cannot query a queue pair" << std::endl; return false; } _dlid = adapter->getLid( 1 ); return true; }
/* * Tune RNR behavior. Without flow control, we use a rather * low timeout, but not the absolute minimum - this should * be tunable. * * We already set the RNR retry count to 7 (which is the * smallest infinite number :-) above. * If flow control is off, we want to change this back to 0 * so that we learn quickly when our credit accounting is * buggy. * * Caller passes in a qp_attr pointer - don't waste stack spacv * by allocation this twice. */ static void rdsv3_ib_tune_rnr(struct rdsv3_ib_connection *ic, struct ib_qp_attr *attr) { int ret; RDSV3_DPRINTF2("rdsv3_ib_tune_rnr", "Enter ic: %p attr: %p", ic, attr); attr->min_rnr_timer = IB_RNR_TIMER_000_32; ret = ib_modify_qp(ic->i_cm_id->qp, attr, IB_QP_MIN_RNR_TIMER); if (ret) RDSV3_DPRINTF2("rdsv3_ib_tune_rnr", "ib_modify_qp(IB_QP_MIN_RNR_TIMER): err=%d", -ret); }
int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_attr *qp_attr; int attr_mask; int ret; u16 pkey_index; ret = -ENOMEM; qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); if (!qp_attr) goto out; if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ret = -ENXIO; goto out; } set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); /* set correct QKey for QP */ qp_attr->qkey = priv->qkey; attr_mask = IB_QP_QKEY; ret = ib_modify_qp(priv->qp, qp_attr, attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); goto out; } /* attach QP to multicast group */ down(&priv->mcast_mutex); ret = ib_attach_mcast(priv->qp, mgid, mlid); up(&priv->mcast_mutex); if (ret) ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret); out: kfree(qp_attr); return ret; }
int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca, union ib_gid *mgid, u16 mlid, int set_qkey, u32 qkey) { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_qp_attr *qp_attr = NULL; int ret; u16 pkey_index; if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ret = -ENXIO; goto out; } set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); if (set_qkey) { ret = -ENOMEM; qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL); if (!qp_attr) goto out; /* set correct QKey for QP */ qp_attr->qkey = qkey; ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY); if (ret) { ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); goto out; } } /* attach QP to multicast group */ ret = ib_attach_mcast(priv->qp, mgid, mlid); if (ret) ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret); out: kfree(qp_attr); return ret; }
ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_modify_qp cmd; struct ib_qp *qp; struct ib_qp_attr *attr; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) return -ENOMEM; mutex_lock(&ib_uverbs_idr_mutex); qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); if (!qp || qp->uobject->context != file->ucontext) { ret = -EINVAL; goto out; } attr->qp_state = cmd.qp_state; attr->cur_qp_state = cmd.cur_qp_state; attr->path_mtu = cmd.path_mtu; attr->path_mig_state = cmd.path_mig_state; attr->qkey = cmd.qkey; attr->rq_psn = cmd.rq_psn; attr->sq_psn = cmd.sq_psn; attr->dest_qp_num = cmd.dest_qp_num; attr->qp_access_flags = cmd.qp_access_flags; attr->pkey_index = cmd.pkey_index; attr->alt_pkey_index = cmd.pkey_index; attr->en_sqd_async_notify = cmd.en_sqd_async_notify; attr->max_rd_atomic = cmd.max_rd_atomic; attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; attr->min_rnr_timer = cmd.min_rnr_timer; attr->port_num = cmd.port_num; attr->timeout = cmd.timeout; attr->retry_cnt = cmd.retry_cnt; attr->rnr_retry = cmd.rnr_retry; attr->alt_port_num = cmd.alt_port_num; attr->alt_timeout = cmd.alt_timeout; memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); attr->ah_attr.grh.flow_label = cmd.dest.flow_label; attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; attr->ah_attr.dlid = cmd.dest.dlid; attr->ah_attr.sl = cmd.dest.sl; attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; attr->ah_attr.static_rate = cmd.dest.static_rate; attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; attr->ah_attr.port_num = cmd.dest.port_num; memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; attr->alt_ah_attr.sl = cmd.alt_dest.sl; attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; ret = ib_modify_qp(qp, attr, cmd.attr_mask); if (ret) goto out; ret = in_len; out: mutex_unlock(&ib_uverbs_idr_mutex); kfree(attr); return ret; }
/* * Connect unconnected endpoint. */ int rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { struct rdma_cm_id *id; int rc = 0; int retry_count = 0; if (ep->rep_connected != 0) { struct rpcrdma_xprt *xprt; retry: rc = rpcrdma_ep_disconnect(ep, ia); if (rc && rc != -ENOTCONN) dprintk("RPC: %s: rpcrdma_ep_disconnect" " status %i\n", __func__, rc); rpcrdma_clean_cq(ep->rep_cq); xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); id = rpcrdma_create_id(xprt, ia, (struct sockaddr *)&xprt->rx_data.addr); if (IS_ERR(id)) { rc = PTR_ERR(id); goto out; } /* TEMP TEMP TEMP - fail if new device: * Deregister/remarshal *all* requests! * Close and recreate adapter, pd, etc! * Re-determine all attributes still sane! * More stuff I haven't thought of! * Rrrgh! */ if (ia->ri_id->device != id->device) { printk("RPC: %s: can't reconnect on " "different device!\n", __func__); rdma_destroy_id(id); rc = -ENETDOWN; goto out; } /* END TEMP */ rdma_destroy_qp(ia->ri_id); rdma_destroy_id(ia->ri_id); ia->ri_id = id; } rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); if (rc) { dprintk("RPC: %s: rdma_create_qp failed %i\n", __func__, rc); goto out; } /* XXX Tavor device performs badly with 2K MTU! */ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device); if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR && (pcid->vendor == PCI_VENDOR_ID_MELLANOX || pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) { struct ib_qp_attr attr = { .path_mtu = IB_MTU_1024 }; rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU); } } ep->rep_connected = 0; rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); if (rc) { dprintk("RPC: %s: rdma_connect() failed with %i\n", __func__, rc); goto out; } wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); /* * Check state. A non-peer reject indicates no listener * (ECONNREFUSED), which may be a transient state. All * others indicate a transport condition which has already * undergone a best-effort. */ if (ep->rep_connected == -ECONNREFUSED && ++retry_count <= RDMA_CONNECT_RETRY_MAX) { dprintk("RPC: %s: non-peer_reject, retry\n", __func__); goto retry; } if (ep->rep_connected <= 0) { /* Sometimes, the only way to reliably connect to remote * CMs is to use same nonzero values for ORD and IRD. */ if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 && (ep->rep_remote_cma.responder_resources == 0 || ep->rep_remote_cma.initiator_depth != ep->rep_remote_cma.responder_resources)) { if (ep->rep_remote_cma.responder_resources == 0) ep->rep_remote_cma.responder_resources = 1; ep->rep_remote_cma.initiator_depth = ep->rep_remote_cma.responder_resources; goto retry; } rc = ep->rep_connected; } else { dprintk("RPC: %s: connected\n", __func__); } out: if (rc) ep->rep_connected = rc; return rc; }
/* * Connection established. * We get here for both outgoing and incoming connection. */ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event) { const struct rds_ib_connect_private *dp = NULL; struct rds_ib_connection *ic = conn->c_transport_data; struct ib_qp_attr qp_attr; int err; if (event->param.conn.private_data_len >= sizeof(*dp)) { dp = event->param.conn.private_data; /* make sure it isn't empty data */ if (dp->dp_protocol_major) { rds_ib_set_protocol(conn, RDS_PROTOCOL(dp->dp_protocol_major, dp->dp_protocol_minor)); rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); } } if (conn->c_version < RDS_PROTOCOL(3, 1)) { printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed," " no longer supported\n", &conn->c_faddr, RDS_PROTOCOL_MAJOR(conn->c_version), RDS_PROTOCOL_MINOR(conn->c_version)); rds_conn_destroy(conn); return; } else { printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n", &conn->c_faddr, RDS_PROTOCOL_MAJOR(conn->c_version), RDS_PROTOCOL_MINOR(conn->c_version), ic->i_flowctl ? ", flow control" : ""); } /* * Init rings and fill recv. this needs to wait until protocol negotiation * is complete, since ring layout is different from 3.0 to 3.1. */ rds_ib_send_init_ring(ic); rds_ib_recv_init_ring(ic); /* Post receive buffers - as a side effect, this will update * the posted credit count. */ rds_ib_recv_refill(conn, 1, GFP_KERNEL); /* Tune RNR behavior */ rds_ib_tune_rnr(ic, &qp_attr); qp_attr.qp_state = IB_QPS_RTS; err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE); if (err) printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err); /* update ib_device with this local ipaddr */ err = rds_ib_update_ipaddr(ic->rds_ibdev, conn->c_laddr); if (err) printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); /* If the peer gave us the last packet it saw, process this as if * we had received a regular ACK. */ if (dp) { /* dp structure start is not guaranteed to be 8 bytes aligned. * Since dp_ack_seq is 64-bit extended load operations can be * used so go through get_unaligned to avoid unaligned errors. */ __be64 dp_ack_seq = get_unaligned(&dp->dp_ack_seq); if (dp_ack_seq) rds_send_drop_acked(conn, be64_to_cpu(dp_ack_seq), NULL); } rds_connect_complete(conn); }
static int modify_qp(rdma_ctx_t ctx) { int retval; struct ib_qp_attr attr; memset(&attr, 0, sizeof(attr)); attr.qp_state = IB_QPS_INIT; attr.pkey_index = 0; attr.port_num = 1; attr.qp_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_ATOMIC; LOG_KERN(LOG_INFO, ("Going to INIT..\n")); retval = ib_modify_qp(ctx->qp, &attr, IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS); CHECK_MSG_RET(retval == 0, "Error moving to INIT", -1); LOG_KERN(LOG_INFO, ("Preparing for RTR. mtu: %d rem_qpn: %d rem_psn: %d rem_lid: %d\n", rdma_ib_device.attr.active_mtu, ctx->rem_qpn, ctx->rem_psn, ctx->rem_lid)); memset(&attr, 0, sizeof(attr)); attr.qp_state = IB_QPS_RTR; attr.path_mtu = rdma_ib_device.attr.active_mtu; attr.dest_qp_num = ctx->rem_qpn; attr.rq_psn = ctx->rem_psn; attr.max_dest_rd_atomic = 1; attr.min_rnr_timer = 12; attr.ah_attr.dlid = ctx->rem_lid; attr.ah_attr.sl = 0; // service level attr.ah_attr.src_path_bits = 0; attr.ah_attr.port_num = 1; LOG_KERN(LOG_INFO, ("Going to RTR..\n")); retval = ib_modify_qp(ctx->qp, &attr, IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER); CHECK_MSG_RET(retval == 0, "Error moving to RTR", -1); attr.qp_state = IB_QPS_RTS; attr.timeout = 14; attr.retry_cnt = 7; attr.rnr_retry = 6; attr.sq_psn = ctx->psn; attr.max_rd_atomic = 1; LOG_KERN(LOG_INFO, ("Going to RTS..\n")); retval = ib_modify_qp(ctx->qp, &attr, IB_QP_STATE | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC); CHECK_MSG_RET(retval == 0, "Error moving to RTS", -1); return 0; }
int roq_eth_rem_init_qp(struct net_device *ndev) { struct roq_eth_priv *vdev = netdev_priv(ndev); struct ib_qp_init_attr create_qp_attrs; struct ib_qp_attr qp_attr; enum ib_qp_attr_mask qp_attr_mask; char *argv[] = {"/etc/init.d/post_discovery", NULL}; char *env[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", "LD_LIBRARY_PATH=/lib:/usr/lib", NULL}; int i, rank, size, ret = 0; if (vdev->send_cq == NULL || vdev->recv_cq == NULL || vdev->srq == NULL || vdev->kpd == NULL) { vdev->fix_rem = 1; pr_warn("roq_eth_rem_init: return w/o discovery\n"); return 0; } /* clean old remote qps */ if (vdev->rem_part_size) { for (i = 0; i < vdev->rem_part_size; i++) ib_destroy_qp(vdev->qps_rem[i]); kfree(vdev->qps_rem); } vdev->rem_part_size = RoQ_NetworkSize(vdev->netdesc_rem); rank = roq_tcoords_to_rank(vdev->netdesc, vdev->personality.Network_Config.Acoord, vdev->personality.Network_Config.Bcoord, vdev->personality.Network_Config.Ccoord, vdev->personality.Network_Config.Dcoord, vdev->personality.Network_Config.Ecoord); if (IS_ERR_VALUE(rank)) { ret = -EINVAL; pr_warn("roq_eth_rem_init: invalid rank\n"); goto out; } memset(&create_qp_attrs, 0, sizeof(struct ib_qp_init_attr)); create_qp_attrs.send_cq = vdev->send_cq; create_qp_attrs.recv_cq = vdev->recv_cq; /* set some more parameters */ create_qp_attrs.qp_type = IB_QPT_UD; create_qp_attrs.event_handler = NULL; create_qp_attrs.qp_context = NULL; create_qp_attrs.srq = vdev->srq; create_qp_attrs.cap.max_send_wr = MAX_TX_SKBS; create_qp_attrs.cap.max_recv_wr = 1; create_qp_attrs.cap.max_send_sge = 1; create_qp_attrs.cap.max_recv_sge = 1; create_qp_attrs.cap.max_inline_data = 0; size = sizeof(struct ib_qp *) * vdev->rem_part_size; vdev->qps_rem = (struct ib_qp **)kmalloc(size, GFP_KERNEL); if (!vdev->qps_rem) { pr_warn("roq_eth_rem_init_qp: remote QP alloc failed"); ret = -ENOMEM; goto out; } for (i = 0; i < vdev->rem_part_size; i++) { vdev->qps_rem[i] = ib_create_qp(vdev->kpd, &create_qp_attrs); if (IS_ERR(vdev->qps_rem[i])) { pr_warn("roq_eth_rem_init_qp: error creating qp %p\n", vdev->qps_rem[i]); ret = PTR_ERR(vdev->qps_rem[i]); goto out; } } for (i = 0; i < vdev->rem_part_size; i++) { qp_attr_mask = 0; qp_attr_mask |= IB_QP_STATE; qp_attr.qp_state = IB_QPS_RTS; qp_attr_mask |= IB_QP_AV; /* this QP will send to peer rank i (zero based) */ qp_attr.ah_attr.dlid = 0x8000 | i; qp_attr_mask |= IB_QP_DEST_QPN; /* * this QP will send to peer qp num rank + 1 * (QP zero is reserved) */ qp_attr.dest_qp_num = rank + 1; ib_modify_qp(vdev->qps_rem[i], &qp_attr, qp_attr_mask); } ret = call_usermodehelper(argv[0], argv, env, UMH_WAIT_EXEC); out: return ret; }
static int roq_eth_init_qp(struct net_device *ndev) { struct roq_eth_priv *vdev = netdev_priv(ndev); struct ib_qp_init_attr create_qp_attrs; struct ib_device *ibdev = vdev->ibdev; struct ib_qp_attr qp_attr; enum ib_qp_attr_mask qp_attr_mask; struct ib_srq_init_attr srq_attr; int rank, i, size, ret = 0; /* initialize variables */ memset(&create_qp_attrs, 0, sizeof(struct ib_qp_init_attr)); rank = roq_tcoords_to_rank(vdev->netdesc, vdev->personality.Network_Config.Acoord, vdev->personality.Network_Config.Bcoord, vdev->personality.Network_Config.Ccoord, vdev->personality.Network_Config.Dcoord, vdev->personality.Network_Config.Ecoord); if (IS_ERR_VALUE(rank)) { ret = -EINVAL; pr_info("roq_eth_init_qp: invalid rank\n"); goto out; } if ((ret = roq_alloc_rx_buffers(vdev)) != 0) goto out; /* create completion queues */ vdev->send_cq = ib_create_cq(ibdev, roq_eth_tx_ib_compl, NULL, vdev->ndev, MAX_TX_SKBS, 0); if (IS_ERR(vdev->send_cq)) { pr_warn("roq_eth_init_qp: ib_create_cq failed"); ret = PTR_ERR(vdev->send_cq); vdev->send_cq = NULL; goto out; } vdev->recv_cq = ib_create_cq(ibdev, roq_eth_rx_ib_compl, NULL, vdev->ndev, MAX_RX_SKBS, 0); if (IS_ERR(vdev->recv_cq)) { pr_warn("roq_eth_init_qp: ib_create_cq failed"); ret = PTR_ERR(vdev->recv_cq); vdev->recv_cq = NULL; goto out; } create_qp_attrs.send_cq = vdev->send_cq; create_qp_attrs.recv_cq = vdev->recv_cq; /* allocate protection domain and qp array */ vdev->kpd = ib_alloc_pd(ibdev); if (IS_ERR(vdev->kpd)) { pr_warn("roq_eth_init_qp: ib_alloc_pd failed"); ret = PTR_ERR(vdev->kpd); vdev->kpd = NULL; goto out; } memset(&srq_attr, 0, sizeof(struct ib_srq_init_attr)); srq_attr.attr.max_wr = MAX_RX_SKBS; srq_attr.attr.max_sge = 1; vdev->srq = ib_create_srq(vdev->kpd, &srq_attr); if (IS_ERR(vdev->srq)) { pr_warn("roq_eth_init_qp: ib_create_srq failed"); ret = PTR_ERR(vdev->srq); vdev->srq = NULL; goto out; } /* set some more parameters */ create_qp_attrs.qp_type = IB_QPT_UD; create_qp_attrs.event_handler = NULL; create_qp_attrs.qp_context = NULL; create_qp_attrs.srq = vdev->srq; create_qp_attrs.cap.max_send_wr = MAX_TX_SKBS; create_qp_attrs.cap.max_recv_wr = 1; create_qp_attrs.cap.max_send_sge = 1; create_qp_attrs.cap.max_recv_sge = 1; create_qp_attrs.cap.max_inline_data = 0; size = sizeof(struct ib_qp *) * vdev->part_size; vdev->qps = kmalloc(size, GFP_KERNEL); if (!vdev->qps) { pr_warn("roq_eth_init_qp: kmalloc failed\n"); goto out; } for (i = 0; i < vdev->part_size; i++) { vdev->qps[i] = ib_create_qp(vdev->kpd, &create_qp_attrs); if (IS_ERR(vdev->qps[i])) { ret = PTR_ERR(vdev->qps[i]); pr_warn("roq_eth_init_qp: ib_create_qp failed: %d", ret); goto out; } } ret = ib_req_notify_cq(vdev->send_cq, IB_CQ_NEXT_COMP); if (ret) goto out; ret = ib_req_notify_cq(vdev->recv_cq, IB_CQ_NEXT_COMP); if (ret) goto out; for (i = 0; i < vdev->part_size; i++) { qp_attr_mask = 0; qp_attr_mask |= IB_QP_STATE; qp_attr.qp_state = IB_QPS_RTS; qp_attr_mask |= IB_QP_AV; /* this QP will send to peer rank i (zero based) */ qp_attr.ah_attr.dlid = i; qp_attr_mask |= IB_QP_DEST_QPN; /* * this QP will send to peer QP num rank + 1 * (QP zero is reserved) */ qp_attr.dest_qp_num = rank + 1; ib_modify_qp(vdev->qps[i], &qp_attr, qp_attr_mask); } /* SETUP RECEIVE QP */ for (i = 0; i < MAX_RX_SKBS; i++) roq_eth_post_recv(vdev, i); if (vdev->fix_rem == 1) { roq_eth_rem_init_qp(ndev); vdev->fix_rem = 0; } out: if (ret) { pr_warn("roq_eth_init_qp: rv = %d\n", ret); roq_eth_cleanup_ofa(vdev); } return ret; }
/* * Connection established. * We get here for both outgoing and incoming connection. */ void rdsv3_ib_cm_connect_complete(struct rdsv3_connection *conn, struct rdma_cm_event *event) { const struct rdsv3_ib_connect_private *dp = NULL; struct rdsv3_ib_connection *ic = conn->c_transport_data; struct rdsv3_ib_device *rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rdsv3_ib_client); struct ib_qp_attr qp_attr; int err; RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete", "Enter conn: %p event: %p", conn, event); if (event->param.conn.private_data_len >= sizeof (*dp)) { dp = event->param.conn.private_data; /* make sure it isn't empty data */ if (dp->dp_protocol_major) { rdsv3_ib_set_protocol(conn, RDS_PROTOCOL(dp->dp_protocol_major, dp->dp_protocol_minor)); rdsv3_ib_set_flow_control(conn, ntohl(dp->dp_credit)); } } if (conn->c_version < RDS_PROTOCOL(3, 1)) { RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete", "RDS/IB: Connection to %u.%u.%u.%u version %u.%u failed", NIPQUAD(conn->c_faddr), RDS_PROTOCOL_MAJOR(conn->c_version), RDS_PROTOCOL_MINOR(conn->c_version)); rdsv3_conn_destroy(conn); return; } else { RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete", "RDS/IB: connected to %u.%u.%u.%u version %u.%u%s", NIPQUAD(conn->c_faddr), RDS_PROTOCOL_MAJOR(conn->c_version), RDS_PROTOCOL_MINOR(conn->c_version), ic->i_flowctl ? ", flow control" : ""); } ASSERT(ic->i_soft_cq == NULL); ic->i_soft_cq = rdsv3_af_intr_thr_create(rdsv3_ib_tasklet_fn, (void *)ic, SCQ_INTR_BIND_CPU, rds_ibdev->aft_hcagp, ic->i_cq->ibt_cq); if (rdsv3_enable_snd_cq) { ic->i_snd_soft_cq = rdsv3_af_intr_thr_create( rdsv3_ib_snd_tasklet_fn, (void *)ic, SCQ_INTR_BIND_CPU, rds_ibdev->aft_hcagp, ic->i_snd_cq->ibt_cq); } /* rdsv3_ib_refill_fn is expecting i_max_recv_alloc set */ ic->i_max_recv_alloc = rdsv3_ib_sysctl_max_recv_allocation; ic->i_refill_rq = rdsv3_af_thr_create(rdsv3_ib_refill_fn, (void *)conn, SCQ_WRK_BIND_CPU, rds_ibdev->aft_hcagp); rdsv3_af_grp_draw(rds_ibdev->aft_hcagp); (void) ib_req_notify_cq(ic->i_cq, IB_CQ_SOLICITED); if (rdsv3_enable_snd_cq) { (void) ib_req_notify_cq(ic->i_snd_cq, IB_CQ_NEXT_COMP); } /* * Init rings and fill recv. this needs to wait until protocol * negotiation * is complete, since ring layout is different from 3.0 to 3.1. */ rdsv3_ib_send_init_ring(ic); rdsv3_ib_recv_init_ring(ic); /* * Post receive buffers - as a side effect, this will update * the posted credit count. */ (void) rdsv3_ib_recv_refill(conn, 1); /* Tune RNR behavior */ rdsv3_ib_tune_rnr(ic, &qp_attr); qp_attr.qp_state = IB_QPS_RTS; err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE); if (err) RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete", "ib_modify_qp(IB_QP_STATE, RTS): err=%d", err); /* update ib_device with this local ipaddr & conn */ err = rdsv3_ib_update_ipaddr(rds_ibdev, conn->c_laddr); if (err) RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete", "rdsv3_ib_update_ipaddr failed (%d)", err); rdsv3_ib_add_conn(rds_ibdev, conn); /* * If the peer gave us the last packet it saw, process this as if * we had received a regular ACK. */ if (dp && dp->dp_ack_seq) rdsv3_send_drop_acked(conn, ntohll(dp->dp_ack_seq), NULL); rdsv3_connect_complete(conn); RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete", "Return conn: %p event: %p", conn, event); }
static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, struct ipoib_cm_rx *p) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_init_attr attr = { .event_handler = ipoib_cm_rx_event_handler, .send_cq = priv->recv_cq, /* For drain WR */ .recv_cq = priv->recv_cq, .srq = priv->cm.srq, .cap.max_send_wr = 1, /* For drain WR */ .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_RC, .qp_context = p, }; if (!ipoib_cm_has_srq(dev)) { attr.cap.max_recv_wr = ipoib_recvq_size; attr.cap.max_recv_sge = IPOIB_CM_RX_SG; } return ib_create_qp(priv->pd, &attr); } static int ipoib_cm_modify_rx_qp(struct net_device *dev, struct ib_cm_id *cm_id, struct ib_qp *qp, unsigned psn) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_attr qp_attr; int qp_attr_mask, ret; qp_attr.qp_state = IB_QPS_INIT; ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret); return ret; } ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret); return ret; } qp_attr.qp_state = IB_QPS_RTR; ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); return ret; } qp_attr.rq_psn = psn; ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); return ret; } /* * Current Mellanox HCA firmware won't generate completions * with error for drain WRs unless the QP has been moved to * RTS first. This work-around leaves a window where a QP has * moved to error asynchronously, but this will eventually get * fixed in firmware, so let's not error out if modify QP * fails. */ qp_attr.qp_state = IB_QPS_RTS; ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret); return 0; } ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret); return 0; } return 0; } static void ipoib_cm_init_rx_wr(struct net_device *dev, struct ib_recv_wr *wr, struct ib_sge *sge) { struct ipoib_dev_priv *priv = netdev_priv(dev); int i; for (i = 0; i < priv->cm.num_frags; ++i) sge[i].lkey = priv->mr->lkey; sge[0].length = IPOIB_CM_HEAD_SIZE; for (i = 1; i < priv->cm.num_frags; ++i) sge[i].length = PAGE_SIZE; wr->next = NULL; wr->sg_list = sge; wr->num_sge = priv->cm.num_frags; } static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id, struct ipoib_cm_rx *rx) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct { struct ib_recv_wr wr; struct ib_sge sge[IPOIB_CM_RX_SG]; } *t; int ret; int i; <<<<<<< HEAD rx->rx_ring = vzalloc(ipoib_recvq_size * sizeof *rx->rx_ring); =======
int ipoib_init_qp(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; u16 pkey_index; struct ib_qp_attr qp_attr; int attr_mask; /* * Search through the port P_Key table for the requested pkey value. * The port has to be assigned to the respective IB partition in * advance. */ ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index); if (ret) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); return ret; } set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); qp_attr.qp_state = IB_QPS_INIT; qp_attr.qkey = 0; qp_attr.port_num = priv->port; qp_attr.pkey_index = pkey_index; attr_mask = IB_QP_QKEY | IB_QP_PORT | IB_QP_PKEY_INDEX | IB_QP_STATE; ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to init, ret = %d\n", ret); goto out_fail; } qp_attr.qp_state = IB_QPS_RTR; /* Can't set this in a INIT->RTR transition */ attr_mask &= ~IB_QP_PORT; ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to RTR, ret = %d\n", ret); goto out_fail; } qp_attr.qp_state = IB_QPS_RTS; qp_attr.sq_psn = 0; attr_mask |= IB_QP_SQ_PSN; attr_mask &= ~IB_QP_PKEY_INDEX; ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to RTS, ret = %d\n", ret); goto out_fail; } return 0; out_fail: qp_attr.qp_state = IB_QPS_RESET; if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) ipoib_warn(priv, "Failed to modify QP to RESET state\n"); return ret; }