/* * Connection established. * We get here for both outgoing and incoming connection. */ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event) { const struct rds_ib_connect_private *dp = NULL; struct rds_ib_connection *ic = conn->c_transport_data; struct rds_ib_device *rds_ibdev; struct ib_qp_attr qp_attr; int err; if (event->param.conn.private_data_len >= sizeof(*dp)) { dp = event->param.conn.private_data; /* make sure it isn't empty data */ if (dp->dp_protocol_major) { rds_ib_set_protocol(conn, RDS_PROTOCOL(dp->dp_protocol_major, dp->dp_protocol_minor)); rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); } } printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n", &conn->c_faddr, RDS_PROTOCOL_MAJOR(conn->c_version), RDS_PROTOCOL_MINOR(conn->c_version), ic->i_flowctl ? ", flow control" : ""); /* * Init rings and fill recv. this needs to wait until protocol negotiation * is complete, since ring layout is different from 3.0 to 3.1. */ rds_ib_send_init_ring(ic); rds_ib_recv_init_ring(ic); /* Post receive buffers - as a side effect, this will update * the posted credit count. */ rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1); /* Tune RNR behavior */ rds_ib_tune_rnr(ic, &qp_attr); qp_attr.qp_state = IB_QPS_RTS; err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE); if (err) printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err); /* update ib_device with this local ipaddr & conn */ rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr); if (err) printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); rds_ib_add_conn(rds_ibdev, conn); /* If the peer gave us the last packet it saw, process this as if * we had received a regular ACK. */ if (dp && dp->dp_ack_seq) rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL); rds_connect_complete(conn); }
/* * This needs to be very careful to not leave IS_ERR pointers around for * cleanup to trip over. */ static int rds_ib_setup_qp(struct rds_connection *conn) { struct rds_ib_connection *ic = conn->c_transport_data; struct ib_device *dev = ic->i_cm_id->device; struct ib_qp_init_attr attr; struct ib_cq_init_attr cq_attr = {}; struct rds_ib_device *rds_ibdev; int ret, fr_queue_space; /* * It's normal to see a null device if an incoming connection races * with device removal, so we don't print a warning. */ rds_ibdev = rds_ib_get_client_data(dev); if (!rds_ibdev) return -EOPNOTSUPP; /* The fr_queue_space is currently set to 512, to add extra space on * completion queue and send queue. This extra space is used for FRMR * registration and invalidation work requests */ fr_queue_space = (rds_ibdev->use_fastreg ? RDS_IB_DEFAULT_FR_WR : 0); /* add the conn now so that connection establishment has the dev */ rds_ib_add_conn(rds_ibdev, conn); if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1) rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1); /* Protection domain and memory range */ ic->i_pd = rds_ibdev->pd; cq_attr.cqe = ic->i_send_ring.w_nr + fr_queue_space + 1; ic->i_send_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_send, rds_ib_cq_event_handler, conn, &cq_attr); if (IS_ERR(ic->i_send_cq)) { ret = PTR_ERR(ic->i_send_cq); ic->i_send_cq = NULL; rdsdebug("ib_create_cq send failed: %d\n", ret); goto out; } cq_attr.cqe = ic->i_recv_ring.w_nr; ic->i_recv_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_recv, rds_ib_cq_event_handler, conn, &cq_attr); if (IS_ERR(ic->i_recv_cq)) { ret = PTR_ERR(ic->i_recv_cq); ic->i_recv_cq = NULL; rdsdebug("ib_create_cq recv failed: %d\n", ret); goto out; } ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); if (ret) { rdsdebug("ib_req_notify_cq send failed: %d\n", ret); goto out; } ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); if (ret) { rdsdebug("ib_req_notify_cq recv failed: %d\n", ret); goto out; } /* XXX negotiate max send/recv with remote? */ memset(&attr, 0, sizeof(attr)); attr.event_handler = rds_ib_qp_event_handler; attr.qp_context = conn; /* + 1 to allow for the single ack message */ attr.cap.max_send_wr = ic->i_send_ring.w_nr + fr_queue_space + 1; attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1; attr.cap.max_send_sge = rds_ibdev->max_sge; attr.cap.max_recv_sge = RDS_IB_RECV_SGE; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; attr.send_cq = ic->i_send_cq; attr.recv_cq = ic->i_recv_cq; atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR); /* * XXX this can fail if max_*_wr is too large? Are we supposed * to back off until we get a value that the hardware can support? */ ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); if (ret) { rdsdebug("rdma_create_qp failed: %d\n", ret); goto out; } ic->i_send_hdrs = ib_dma_alloc_coherent(dev, ic->i_send_ring.w_nr * sizeof(struct rds_header), &ic->i_send_hdrs_dma, GFP_KERNEL); if (!ic->i_send_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent send failed\n"); goto out; } ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, ic->i_recv_ring.w_nr * sizeof(struct rds_header), &ic->i_recv_hdrs_dma, GFP_KERNEL); if (!ic->i_recv_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent recv failed\n"); goto out; } ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), &ic->i_ack_dma, GFP_KERNEL); if (!ic->i_ack) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent ack failed\n"); goto out; } ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work), ibdev_to_node(dev)); if (!ic->i_sends) { ret = -ENOMEM; rdsdebug("send allocation failed\n"); goto out; } ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work), ibdev_to_node(dev)); if (!ic->i_recvs) { ret = -ENOMEM; rdsdebug("recv allocation failed\n"); goto out; } rds_ib_recv_init_ack(ic); rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd, ic->i_send_cq, ic->i_recv_cq); out: rds_ib_dev_put(rds_ibdev); return ret; }
static int rds_ib_setup_qp(struct rds_connection *conn) { struct rds_ib_connection *ic = conn->c_transport_data; struct ib_device *dev = ic->i_cm_id->device; struct ib_qp_init_attr attr; struct rds_ib_device *rds_ibdev; int ret; rds_ibdev = rds_ib_get_client_data(dev); if (!rds_ibdev) return -EOPNOTSUPP; rds_ib_add_conn(rds_ibdev, conn); if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1) rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1); ic->i_pd = rds_ibdev->pd; ic->i_mr = rds_ibdev->mr; ic->i_send_cq = ib_create_cq(dev, rds_ib_send_cq_comp_handler, rds_ib_cq_event_handler, conn, ic->i_send_ring.w_nr + 1, 0); if (IS_ERR(ic->i_send_cq)) { ret = PTR_ERR(ic->i_send_cq); ic->i_send_cq = NULL; rdsdebug("ib_create_cq send failed: %d\n", ret); goto out; } ic->i_recv_cq = ib_create_cq(dev, rds_ib_recv_cq_comp_handler, rds_ib_cq_event_handler, conn, ic->i_recv_ring.w_nr, 0); if (IS_ERR(ic->i_recv_cq)) { ret = PTR_ERR(ic->i_recv_cq); ic->i_recv_cq = NULL; rdsdebug("ib_create_cq recv failed: %d\n", ret); goto out; } ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); if (ret) { rdsdebug("ib_req_notify_cq send failed: %d\n", ret); goto out; } ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); if (ret) { rdsdebug("ib_req_notify_cq recv failed: %d\n", ret); goto out; } memset(&attr, 0, sizeof(attr)); attr.event_handler = rds_ib_qp_event_handler; attr.qp_context = conn; attr.cap.max_send_wr = ic->i_send_ring.w_nr + 1; attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1; attr.cap.max_send_sge = rds_ibdev->max_sge; attr.cap.max_recv_sge = RDS_IB_RECV_SGE; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; attr.send_cq = ic->i_send_cq; attr.recv_cq = ic->i_recv_cq; ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); if (ret) { rdsdebug("rdma_create_qp failed: %d\n", ret); goto out; } ic->i_send_hdrs = ib_dma_alloc_coherent(dev, ic->i_send_ring.w_nr * sizeof(struct rds_header), &ic->i_send_hdrs_dma, GFP_KERNEL); if (!ic->i_send_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent send failed\n"); goto out; } ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, ic->i_recv_ring.w_nr * sizeof(struct rds_header), &ic->i_recv_hdrs_dma, GFP_KERNEL); if (!ic->i_recv_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent recv failed\n"); goto out; } ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), &ic->i_ack_dma, GFP_KERNEL); if (!ic->i_ack) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent ack failed\n"); goto out; } ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work), ibdev_to_node(dev)); if (!ic->i_sends) { ret = -ENOMEM; rdsdebug("send allocation failed\n"); goto out; } ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work), ibdev_to_node(dev)); if (!ic->i_recvs) { ret = -ENOMEM; rdsdebug("recv allocation failed\n"); goto out; } rds_ib_recv_init_ack(ic); rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr, ic->i_send_cq, ic->i_recv_cq); out: rds_ib_dev_put(rds_ibdev); return ret; }