static void pfmr_remove_one(struct ib_device *device) { struct pfmr_test_ctx *test_ctx = ib_get_client_data(device, &test_client); if (test_ctx) { if (test_ctx->fmr_map_op) { info("MAP FMR OP: %s was removed for IB device %s...\n", test_ctx->fmr_map_op->name, device->name); destroy_kvlop(test_ctx->fmr_map_op); } if (test_ctx->create_fmr_op) { info("Create FMR OP: %s was removed for IB device %s.." ".\n", test_ctx->create_fmr_op->name, device->name); destroy_kvlop(test_ctx->create_fmr_op); } if (test_ctx->destroy_fmr_op) { info("Create FMR OP: %s was removed for IB device %s.." ".\n", test_ctx->destroy_fmr_op->name, device->name); destroy_kvlop(test_ctx->destroy_fmr_op); } if (test_ctx->create_dma_op) { info("Create DMA OP : %s was removed for IB device %s.." ".\n", test_ctx->create_dma_op->name, device->name); destroy_kvlop(test_ctx->create_dma_op); } if (test_ctx->destroy_dma_op) { info("Create FMR OP: %s was removed for IB device %s.." ".\n", test_ctx->destroy_dma_op->name, device->name); destroy_kvlop(test_ctx->destroy_dma_op); } if (test_ctx->fmr_pool_op) { info("Create DMA OP : %s was removed for IB device %s.." ".\n", test_ctx->fmr_pool_op->name, device->name); destroy_kvlop(test_ctx->fmr_pool_op); } if (test_ctx->cq_op) { info("CQ OP : %s was removed for IB device %s.." ".\n", test_ctx->cq_op->name, device->name); destroy_kvlop(test_ctx->cq_op); } if (test_ctx->qp_op) { info("QP OP : %s was removed for IB device %s.." ".\n", test_ctx->qp_op->name, device->name); destroy_kvlop(test_ctx->qp_op); } if (test_ctx->complete_test_op) { info("QP OP : %s was removed for IB device %s.." ".\n", test_ctx->complete_test_op->name, device->name); destroy_kvlop(test_ctx->complete_test_op); } } if (test_ctx) pfmr_destroy_test_ctx(test_ctx); info("IB device %s removed successfuly\n", device->name); }
static void roq_eth_add_one(struct ib_device *ibdev) { struct roq_eth_priv *priv; struct roq_dev *roq_dev = container_of(ibdev, struct roq_dev, ofa_dev); struct net_device *ndev = roq_dev->l2dev; /* roq is represented as RDMA_NODE_RNIC */ if (rdma_node_get_transport(ibdev->node_type) != RDMA_TRANSPORT_IWARP || strncmp(ibdev->name, "roq", IB_DEVICE_NAME_MAX) != 0) return; priv = ib_get_client_data(ibdev, &roq_eth_client); if (priv) { pr_warn("roq_eth_add_one: netdev already allocated!\n"); return; } SET_NETDEV_DEV(ndev, ibdev->dma_device); priv = netdev_priv(ndev); priv->ndev = ndev; priv->ibdev = ibdev; ib_set_client_data(ibdev, &roq_eth_client, priv); return; }
/* * Connection established. * We get here for both outgoing and incoming connection. */ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event) { const struct rds_ib_connect_private *dp = NULL; struct rds_ib_connection *ic = conn->c_transport_data; struct rds_ib_device *rds_ibdev; struct ib_qp_attr qp_attr; int err; if (event->param.conn.private_data_len >= sizeof(*dp)) { dp = event->param.conn.private_data; /* make sure it isn't empty data */ if (dp->dp_protocol_major) { rds_ib_set_protocol(conn, RDS_PROTOCOL(dp->dp_protocol_major, dp->dp_protocol_minor)); rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); } } printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n", &conn->c_faddr, RDS_PROTOCOL_MAJOR(conn->c_version), RDS_PROTOCOL_MINOR(conn->c_version), ic->i_flowctl ? ", flow control" : ""); /* * Init rings and fill recv. this needs to wait until protocol negotiation * is complete, since ring layout is different from 3.0 to 3.1. */ rds_ib_send_init_ring(ic); rds_ib_recv_init_ring(ic); /* Post receive buffers - as a side effect, this will update * the posted credit count. */ rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1); /* Tune RNR behavior */ rds_ib_tune_rnr(ic, &qp_attr); qp_attr.qp_state = IB_QPS_RTS; err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE); if (err) printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err); /* update ib_device with this local ipaddr & conn */ rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr); if (err) printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); rds_ib_add_conn(rds_ibdev, conn); /* If the peer gave us the last packet it saw, process this as if * we had received a regular ACK. */ if (dp && dp->dp_ack_seq) rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL); rds_connect_complete(conn); }
static void rdsv3_ib_cm_fill_conn_param(struct rdsv3_connection *conn, struct rdma_conn_param *conn_param, struct rdsv3_ib_connect_private *dp, uint32_t protocol_version, uint32_t max_responder_resources, uint32_t max_initiator_depth) { struct rdsv3_ib_connection *ic = conn->c_transport_data; struct rdsv3_ib_device *rds_ibdev; RDSV3_DPRINTF2("rdsv3_ib_cm_fill_conn_param", "Enter conn: %p conn_param: %p private: %p version: %d", conn, conn_param, dp, protocol_version); (void) memset(conn_param, 0, sizeof (struct rdma_conn_param)); rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rdsv3_ib_client); conn_param->responder_resources = MIN(rds_ibdev->max_responder_resources, max_responder_resources); conn_param->initiator_depth = MIN(rds_ibdev->max_initiator_depth, max_initiator_depth); conn_param->retry_count = min(rdsv3_ib_retry_count, 7); conn_param->rnr_retry_count = 7; if (dp) { (void) memset(dp, 0, sizeof (*dp)); dp->dp_saddr = conn->c_laddr; dp->dp_daddr = conn->c_faddr; dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version); dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version); dp->dp_protocol_minor_mask = htons(RDSV3_IB_SUPPORTED_PROTOCOLS); dp->dp_ack_seq = rdsv3_ib_piggyb_ack(ic); /* Advertise flow control */ if (ic->i_flowctl) { unsigned int credits; credits = IB_GET_POST_CREDITS( atomic_get(&ic->i_credits)); dp->dp_credit = htonl(credits); atomic_add_32(&ic->i_credits, -IB_SET_POST_CREDITS(credits)); } conn_param->private_data = dp; conn_param->private_data_len = sizeof (*dp); } RDSV3_DPRINTF2("rdsv3_ib_cm_fill_conn_param", "Return conn: %p conn_param: %p private: %p version: %d", conn, conn_param, dp, protocol_version); }
static void roq_eth_remove_one(struct ib_device *ibdev) { struct roq_eth_priv *priv; if (rdma_node_get_transport(ibdev->node_type) != RDMA_TRANSPORT_IWARP || strncmp(ibdev->name, "roq", IB_DEVICE_NAME_MAX) != 0) return; priv = ib_get_client_data(ibdev, &roq_eth_client); priv->ndev->dev.parent = NULL; unregister_netdev(priv->ndev); free_netdev(priv->ndev); }
static int ib_create_path_iter(struct ib_device *device, u8 port_num, union ib_gid *dgid, struct ib_sa_attr_iter *iter) { struct sa_db_device *dev; struct sa_db_port *port; struct ib_sa_attr_list *list; dev = ib_get_client_data(device, &sa_db_client); if (!dev) return -ENODEV; port = &dev->port[port_num - dev->start_port]; read_lock_irqsave(&rwlock, iter->flags); list = find_attr_list(&port->paths, dgid->raw); if (!list) { ib_free_path_iter(iter); return -ENODATA; } iter->iter = &list->iter; return 0; }
/* * Connection established. * We get here for both outgoing and incoming connection. */ void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event) { const struct rds_iw_connect_private *dp = NULL; struct rds_iw_connection *ic = conn->c_transport_data; struct rds_iw_device *rds_iwdev; int err; if (event->param.conn.private_data_len) { dp = event->param.conn.private_data; rds_iw_set_protocol(conn, RDS_PROTOCOL(dp->dp_protocol_major, dp->dp_protocol_minor)); rds_iw_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); } /* update ib_device with this local ipaddr & conn */ rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id); if (err) printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err); rds_iw_add_conn(rds_iwdev, conn); /* If the peer gave us the last packet it saw, process this as if * we had received a regular ACK. */ if (dp && dp->dp_ack_seq) rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL); printk(KERN_NOTICE "RDS/IW: connected to %pI4<->%pI4 version %u.%u%s\n", &conn->c_laddr, &conn->c_faddr, RDS_PROTOCOL_MAJOR(conn->c_version), RDS_PROTOCOL_MINOR(conn->c_version), ic->i_flowctl ? ", flow control" : ""); rds_connect_complete(conn); }
/* * This needs to be very careful to not leave IS_ERR pointers around for * cleanup to trip over. */ static int rds_iw_setup_qp(struct rds_connection *conn) { struct rds_iw_connection *ic = conn->c_transport_data; struct ib_device *dev = ic->i_cm_id->device; struct ib_qp_init_attr attr; struct rds_iw_device *rds_iwdev; int ret; /* rds_iw_add_one creates a rds_iw_device object per IB device, * and allocates a protection domain, memory range and MR pool * for each. If that fails for any reason, it will not register * the rds_iwdev at all. */ rds_iwdev = ib_get_client_data(dev, &rds_iw_client); if (!rds_iwdev) { if (printk_ratelimit()) printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n", dev->name); return -EOPNOTSUPP; } /* Protection domain and memory range */ ic->i_pd = rds_iwdev->pd; ic->i_mr = rds_iwdev->mr; ret = rds_iw_init_qp_attrs(&attr, rds_iwdev, &ic->i_send_ring, rds_iw_send_cq_comp_handler, &ic->i_recv_ring, rds_iw_recv_cq_comp_handler, conn); if (ret < 0) goto out; ic->i_send_cq = attr.send_cq; ic->i_recv_cq = attr.recv_cq; /* * XXX this can fail if max_*_wr is too large? Are we supposed * to back off until we get a value that the hardware can support? */ ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); if (ret) { rdsdebug("rdma_create_qp failed: %d\n", ret); goto out; } ic->i_send_hdrs = ib_dma_alloc_coherent(dev, ic->i_send_ring.w_nr * sizeof(struct rds_header), &ic->i_send_hdrs_dma, GFP_KERNEL); if (!ic->i_send_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent send failed\n"); goto out; } ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, ic->i_recv_ring.w_nr * sizeof(struct rds_header), &ic->i_recv_hdrs_dma, GFP_KERNEL); if (!ic->i_recv_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent recv failed\n"); goto out; } ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), &ic->i_ack_dma, GFP_KERNEL); if (!ic->i_ack) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent ack failed\n"); goto out; } ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work)); if (!ic->i_sends) { ret = -ENOMEM; rdsdebug("send allocation failed\n"); goto out; } rds_iw_send_init_ring(ic); ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work)); if (!ic->i_recvs) { ret = -ENOMEM; rdsdebug("recv allocation failed\n"); goto out; } rds_iw_recv_init_ring(ic); rds_iw_recv_init_ack(ic); /* Post receive buffers - as a side effect, this will update * the posted credit count. */ rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1); rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr, ic->i_send_cq, ic->i_recv_cq); out: return ret; }
static int sdp_init_qp(struct sock *sk, struct rdma_cm_id *id) { struct ib_qp_init_attr qp_init_attr = { .event_handler = sdp_qp_event_handler, .cap.max_send_wr = SDP_TX_SIZE, .cap.max_recv_wr = sdp_rx_size, .cap.max_inline_data = sdp_inline_thresh, .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_RC, }; struct ib_device *device = id->device; int rc; sdp_dbg(sk, "%s\n", __func__); sdp_sk(sk)->max_sge = sdp_get_max_dev_sge(device); sdp_dbg(sk, "Max sges: %d\n", sdp_sk(sk)->max_sge); qp_init_attr.cap.max_send_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_SEND_SGES); sdp_dbg(sk, "Setting max send sge to: %d\n", qp_init_attr.cap.max_send_sge); qp_init_attr.cap.max_recv_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_RECV_SGES); sdp_dbg(sk, "Setting max recv sge to: %d\n", qp_init_attr.cap.max_recv_sge); sdp_sk(sk)->sdp_dev = ib_get_client_data(device, &sdp_client); if (!sdp_sk(sk)->sdp_dev) { sdp_warn(sk, "SDP not available on device %s\n", device->name); rc = -ENODEV; goto err_rx; } rc = sdp_rx_ring_create(sdp_sk(sk), device); if (rc) goto err_rx; rc = sdp_tx_ring_create(sdp_sk(sk), device); if (rc) goto err_tx; qp_init_attr.recv_cq = sdp_sk(sk)->rx_ring.cq; qp_init_attr.send_cq = sdp_sk(sk)->tx_ring.cq; rc = rdma_create_qp(id, sdp_sk(sk)->sdp_dev->pd, &qp_init_attr); if (rc) { sdp_warn(sk, "Unable to create QP: %d.\n", rc); goto err_qp; } sdp_sk(sk)->qp = id->qp; sdp_sk(sk)->ib_device = device; sdp_sk(sk)->qp_active = 1; sdp_sk(sk)->context.device = device; sdp_sk(sk)->inline_thresh = qp_init_attr.cap.max_inline_data; sdp_dbg(sk, "%s done\n", __func__); return 0; err_qp: sdp_tx_ring_destroy(sdp_sk(sk)); err_tx: sdp_rx_ring_destroy(sdp_sk(sk)); err_rx: return rc; } static int sdp_get_max_send_frags(u32 buf_size) { return MIN( /* +1 to conpensate on not aligned buffers */ (PAGE_ALIGN(buf_size) >> PAGE_SHIFT) + 1, SDP_MAX_SEND_SGES - 1); } static int sdp_connect_handler(struct sock *sk, struct rdma_cm_id *id, struct rdma_cm_event *event) { struct sockaddr_in *dst_addr; struct sock *child; const struct sdp_hh *h; int rc = 0; sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id); h = event->param.conn.private_data; SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); if (h->ipv_cap & HH_IPV_MASK & ~(HH_IPV4 | HH_IPV6)) { sdp_warn(sk, "Bad IPV field in SDP Hello header: 0x%x\n", h->ipv_cap & HH_IPV_MASK); return -EINVAL; } if (!h->max_adverts) return -EINVAL; #if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)) child = sk_clone(sk, GFP_KERNEL); #else child = sk_clone_lock(sk, GFP_KERNEL); #endif if (!child) return -ENOMEM; sdp_init_sock(child); dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; sdp_inet_dport(child) = dst_addr->sin_port; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (inet6_sk(sk)) { struct ipv6_pinfo *newnp; newnp = inet_sk(child)->pinet6 = sdp_inet6_sk_generic(child); memcpy(newnp, inet6_sk(sk), sizeof(struct ipv6_pinfo)); if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV4) { /* V6 mapped */ sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr; ipv6_addr_set(&child->sk_v6_daddr, 0, 0, htonl(0x0000FFFF), h->src_addr.ip4.addr); ipv6_addr_set(&child->sk_v6_rcv_saddr, 0, 0, htonl(0x0000FFFF), h->dst_addr.ip4.addr); ipv6_addr_copy(&child->sk_v6_rcv_saddr, &child->sk_v6_daddr); } else if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV6) { struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst_addr; struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *)&id->route.addr.src_addr; ipv6_addr_copy(&child->sk_v6_daddr, &dst_addr6->sin6_addr); ipv6_addr_copy(&child->sk_v6_rcv_saddr, &src_addr6->sin6_addr); ipv6_addr_copy(&newnp->saddr, &src_addr6->sin6_addr); } else { sdp_warn(child, "Bad IPV field: 0x%x\n", h->ipv_cap & HH_IPV_MASK); } sdp_inet_daddr(child) = sdp_inet_saddr(child) = sdp_inet_rcv_saddr(child) = LOOPBACK4_IPV6; } else #endif { sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr; } #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif __sock_put(child, SOCK_REF_CLONE); down_read(&device_removal_lock); rc = sdp_init_qp(child, id); if (rc) { bh_unlock_sock(child); up_read(&device_removal_lock); sdp_sk(child)->destructed_already = 1; #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_close(child); #endif sk_free(child); return rc; } sdp_sk(child)->max_bufs = ntohs(h->bsdh.bufs); atomic_set(&sdp_sk(child)->tx_ring.credits, sdp_sk(child)->max_bufs); sdp_sk(child)->min_bufs = tx_credits(sdp_sk(child)) / 4; sdp_sk(child)->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh); sdp_sk(child)->send_frags = sdp_get_max_send_frags(sdp_sk(child)->xmit_size_goal); sdp_init_buffers(sdp_sk(child), rcvbuf_initial_size); id->context = child; sdp_sk(child)->id = id; list_add_tail(&sdp_sk(child)->backlog_queue, &sdp_sk(sk)->backlog_queue); sdp_sk(child)->parent = sk; bh_unlock_sock(child); sdp_add_sock(sdp_sk(child)); up_read(&device_removal_lock); sdp_exch_state(child, TCPF_LISTEN | TCPF_CLOSE, TCP_SYN_RECV); /* child->sk_write_space(child); */ /* child->sk_data_ready(child, 0); */ sk->sk_data_ready(sk); return 0; } static int sdp_response_handler(struct sock *sk, struct rdma_cm_id *id, struct rdma_cm_event *event) { const struct sdp_hah *h; struct sockaddr_in *dst_addr; sdp_dbg(sk, "%s\n", __func__); sdp_exch_state(sk, TCPF_SYN_SENT, TCP_ESTABLISHED); sdp_set_default_moderation(sdp_sk(sk)); if (sock_flag(sk, SOCK_KEEPOPEN)) sdp_start_keepalive_timer(sk); if (sock_flag(sk, SOCK_DEAD)) return 0; h = event->param.conn.private_data; SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); sdp_sk(sk)->max_bufs = ntohs(h->bsdh.bufs); atomic_set(&sdp_sk(sk)->tx_ring.credits, sdp_sk(sk)->max_bufs); sdp_sk(sk)->min_bufs = tx_credits(sdp_sk(sk)) / 4; sdp_sk(sk)->xmit_size_goal = ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh); sdp_sk(sk)->send_frags = sdp_get_max_send_frags(sdp_sk(sk)->xmit_size_goal); sdp_sk(sk)->xmit_size_goal = MIN(sdp_sk(sk)->xmit_size_goal, sdp_sk(sk)->send_frags * PAGE_SIZE); sdp_sk(sk)->poll_cq = 1; sk->sk_state_change(sk); sk_wake_async(sk, 0, POLL_OUT); dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; sdp_inet_dport(sk) = dst_addr->sin_port; sdp_inet_daddr(sk) = dst_addr->sin_addr.s_addr; #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif return 0; } static int sdp_connected_handler(struct sock *sk) { struct sock *parent; sdp_dbg(sk, "%s\n", __func__); parent = sdp_sk(sk)->parent; BUG_ON(!parent); sdp_exch_state(sk, TCPF_SYN_RECV, TCP_ESTABLISHED); #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif sdp_set_default_moderation(sdp_sk(sk)); if (sock_flag(sk, SOCK_KEEPOPEN)) sdp_start_keepalive_timer(sk); if (sock_flag(sk, SOCK_DEAD)) return 0; lock_sock(parent); if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */ sdp_dbg(sk, "parent is going away.\n"); goto done; } sk_acceptq_added(parent); sdp_dbg(parent, "%s child connection established\n", __func__); list_del_init(&sdp_sk(sk)->backlog_queue); list_add_tail(&sdp_sk(sk)->accept_queue, &sdp_sk(parent)->accept_queue); parent->sk_state_change(parent); sk_wake_async(parent, 0, POLL_OUT); done: release_sock(parent); return 0; } static int sdp_disconnected_handler(struct sock *sk) { struct sdp_sock *ssk = sdp_sk(sk); sdp_dbg(sk, "%s\n", __func__); if (ssk->tx_ring.cq) if (sdp_xmit_poll(ssk, 1)) sdp_post_sends(ssk, 0); if (sk->sk_state == TCP_SYN_RECV) { sdp_connected_handler(sk); if (rcv_nxt(ssk)) return 0; } return -ECONNRESET; } int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct rdma_conn_param conn_param; struct sock *parent = NULL; struct sock *child = NULL; struct sock *sk; struct sdp_hah hah; struct sdp_hh hh; int rc = 0, rc2; sk = id->context; if (!sk) { sdp_dbg(NULL, "cm_id is being torn down, event %s\n", rdma_cm_event_str(event->event)); return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ? -EINVAL : 0; } sdp_add_to_history(sk, rdma_cm_event_str(event->event)); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); sdp_dbg(sk, "event: %s\n", rdma_cm_event_str(event->event)); if (!sdp_sk(sk)->id) { sdp_dbg(sk, "socket is being torn down\n"); rc = event->event == RDMA_CM_EVENT_CONNECT_REQUEST ? -EINVAL : 0; release_sock(sk); return rc; } switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: if (sdp_link_layer_ib_only && rdma_node_get_transport(id->device->node_type) == RDMA_TRANSPORT_IB && rdma_port_get_link_layer(id->device, id->port_num) != IB_LINK_LAYER_INFINIBAND) { sdp_dbg(sk, "Link layer is: %d. Only IB link layer " "is allowed\n", rdma_port_get_link_layer(id->device, id->port_num)); rc = -ENETUNREACH; break; } rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT); break; case RDMA_CM_EVENT_ADDR_ERROR: rc = -ENETUNREACH; break; case RDMA_CM_EVENT_ROUTE_RESOLVED: rc = sdp_init_qp(sk, id); if (rc) break; memset(&hh, 0, sizeof hh); hh.bsdh.mid = SDP_MID_HELLO; hh.bsdh.len = htonl(sizeof(struct sdp_hh)); hh.max_adverts = 1; hh.majv_minv = SDP_MAJV_MINV; sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size); hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk))); atomic_set(&sdp_sk(sk)->remote_credits, rx_ring_posted(sdp_sk(sk))); hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_frags * PAGE_SIZE + sizeof(struct sdp_bsdh)); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (inet6_sk(sk)) { struct sockaddr *src_addr = (struct sockaddr *)&id->route.addr.src_addr; struct sockaddr_in *addr4 = (struct sockaddr_in *)src_addr; struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)src_addr; if (src_addr->sa_family == AF_INET) { /* IPv4 over IPv6 */ ipv6_addr_set(&sk->sk_v6_rcv_saddr, 0, 0, htonl(0xFFFF), addr4->sin_addr.s_addr); } else { sk->sk_v6_rcv_saddr = addr6->sin6_addr; } inet6_sk(sk)->saddr = sk->sk_v6_rcv_saddr; } else #endif { sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) = ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; } memset(&conn_param, 0, sizeof conn_param); conn_param.private_data_len = sizeof hh; conn_param.private_data = &hh; conn_param.responder_resources = 4 /* TODO */; conn_param.initiator_depth = 4 /* TODO */; conn_param.retry_count = sdp_retry_count; SDP_DUMP_PACKET(sk, "TX", NULL, &hh.bsdh); if (sdp_apm_enable) { rc = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc) sdp_warn(sk, "APM couldn't be enabled for active side: %d\n", rc); } rc = rdma_connect(id, &conn_param); break; case RDMA_CM_EVENT_ALT_ROUTE_RESOLVED: sdp_dbg(sk, "alt route was resolved slid=%d, dlid=%d\n", id->route.path_rec[1].slid, id->route.path_rec[1].dlid); break; case RDMA_CM_EVENT_ALT_PATH_LOADED: sdp_dbg(sk, "alt route path loaded\n"); break; case RDMA_CM_EVENT_ALT_ROUTE_ERROR: sdp_warn(sk, "alt route resolve error\n"); break; case RDMA_CM_EVENT_ROUTE_ERROR: rc = -ETIMEDOUT; break; case RDMA_CM_EVENT_CONNECT_REQUEST: rc = sdp_connect_handler(sk, id, event); if (rc) { sdp_dbg(sk, "Destroying qp\n"); rdma_reject(id, NULL, 0); break; } child = id->context; atomic_set(&sdp_sk(child)->remote_credits, rx_ring_posted(sdp_sk(child))); memset(&hah, 0, sizeof hah); hah.bsdh.mid = SDP_MID_HELLO_ACK; hah.bsdh.bufs = htons(rx_ring_posted(sdp_sk(child))); hah.bsdh.len = htonl(sizeof(struct sdp_hah)); hah.majv_minv = SDP_MAJV_MINV; hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec, but just in case */ hah.actrcvsz = htonl(sdp_sk(child)->recv_frags * PAGE_SIZE + sizeof(struct sdp_bsdh)); memset(&conn_param, 0, sizeof conn_param); conn_param.private_data_len = sizeof hah; conn_param.private_data = &hah; conn_param.responder_resources = 4 /* TODO */; conn_param.initiator_depth = 4 /* TODO */; conn_param.retry_count = sdp_retry_count; SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh); rc = rdma_accept(id, &conn_param); if (rc) { sdp_sk(child)->id = NULL; id->qp = NULL; id->context = NULL; parent = sdp_sk(child)->parent; /* TODO: hold ? */ } else if (sdp_apm_enable) { rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc2) sdp_warn(sk, "APM couldn't be enabled for passive side: %d\n", rc2); } break; case RDMA_CM_EVENT_CONNECT_RESPONSE: rc = sdp_response_handler(sk, id, event); if (rc) { sdp_dbg(sk, "Destroying qp\n"); rdma_reject(id, NULL, 0); } else { rc = rdma_accept(id, NULL); if (!rc && sdp_apm_enable) { rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc2) sdp_warn(sk, "APM couldn't be enabled for passive side:%d \n", rc2); } } break; case RDMA_CM_EVENT_CONNECT_ERROR: rc = -ETIMEDOUT; break; case RDMA_CM_EVENT_UNREACHABLE: rc = -ENETUNREACH; break; case RDMA_CM_EVENT_REJECTED: rc = -ECONNREFUSED; break; case RDMA_CM_EVENT_ESTABLISHED: sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) = ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; rc = sdp_connected_handler(sk); break; case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */ if (sk->sk_state == TCP_LAST_ACK) { sdp_cancel_dreq_wait_timeout(sdp_sk(sk)); sdp_exch_state(sk, TCPF_LAST_ACK, TCP_TIME_WAIT); sdp_dbg(sk, "%s: waiting for Infiniband tear down\n", __func__); } sdp_sk(sk)->qp_active = 0; rdma_disconnect(id); if (sk->sk_state != TCP_TIME_WAIT) { if (sk->sk_state == TCP_CLOSE_WAIT) { sdp_dbg(sk, "IB teardown while in " "TCP_CLOSE_WAIT taking reference to " "let close() finish the work\n"); sock_hold(sk, SOCK_REF_CMA); sdp_start_cma_timewait_timeout(sdp_sk(sk), SDP_CMA_TIMEWAIT_TIMEOUT); } sdp_set_error(sk, -EPIPE); rc = sdp_disconnected_handler(sk); } break; case RDMA_CM_EVENT_TIMEWAIT_EXIT: rc = sdp_disconnected_handler(sk); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: rc = -ENETRESET; break; case RDMA_CM_EVENT_ADDR_CHANGE: sdp_dbg(sk, "Got Address change event\n"); rc = 0; break; default: printk(KERN_ERR "SDP: Unexpected CMA event: %d\n", event->event); rc = -ECONNABORTED; break; } sdp_dbg(sk, "event: %s handled\n", rdma_cm_event_str(event->event)); if (rc && sdp_sk(sk)->id == id) { child = sk; sdp_sk(sk)->id = NULL; id->qp = NULL; id->context = NULL; parent = sdp_sk(sk)->parent; sdp_reset_sk(sk, rc); } release_sock(sk); sdp_dbg(sk, "event: %s done. status %d\n", rdma_cm_event_str(event->event), rc); if (parent) { lock_sock(parent); if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */ sdp_dbg(sk, "parent is going away.\n"); child = NULL; goto done; } if (!list_empty(&sdp_sk(child)->backlog_queue)) list_del_init(&sdp_sk(child)->backlog_queue); else child = NULL; done: release_sock(parent); if (child) sdp_common_release(child); } return rc; }
static int rds_iw_setup_qp(struct rds_connection *conn) { struct rds_iw_connection *ic = conn->c_transport_data; struct ib_device *dev = ic->i_cm_id->device; struct ib_qp_init_attr attr; struct rds_iw_device *rds_iwdev; int ret; /* */ rds_iwdev = ib_get_client_data(dev, &rds_iw_client); if (!rds_iwdev) { printk_ratelimited(KERN_NOTICE "RDS/IW: No client_data for device %s\n", dev->name); return -EOPNOTSUPP; } /* */ ic->i_pd = rds_iwdev->pd; ic->i_mr = rds_iwdev->mr; ret = rds_iw_init_qp_attrs(&attr, rds_iwdev, &ic->i_send_ring, rds_iw_send_cq_comp_handler, &ic->i_recv_ring, rds_iw_recv_cq_comp_handler, conn); if (ret < 0) goto out; ic->i_send_cq = attr.send_cq; ic->i_recv_cq = attr.recv_cq; /* */ ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); if (ret) { rdsdebug("rdma_create_qp failed: %d\n", ret); goto out; } ic->i_send_hdrs = ib_dma_alloc_coherent(dev, ic->i_send_ring.w_nr * sizeof(struct rds_header), &ic->i_send_hdrs_dma, GFP_KERNEL); if (!ic->i_send_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent send failed\n"); goto out; } ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, ic->i_recv_ring.w_nr * sizeof(struct rds_header), &ic->i_recv_hdrs_dma, GFP_KERNEL); if (!ic->i_recv_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent recv failed\n"); goto out; } ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), &ic->i_ack_dma, GFP_KERNEL); if (!ic->i_ack) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent ack failed\n"); goto out; } ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work)); if (!ic->i_sends) { ret = -ENOMEM; rdsdebug("send allocation failed\n"); goto out; } rds_iw_send_init_ring(ic); ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work)); if (!ic->i_recvs) { ret = -ENOMEM; rdsdebug("recv allocation failed\n"); goto out; } rds_iw_recv_init_ring(ic); rds_iw_recv_init_ack(ic); /* */ rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1); rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr, ic->i_send_cq, ic->i_recv_cq); out: return ret; }
/* * This needs to be very careful to not leave IS_ERR pointers around for * cleanup to trip over. */ static int rds_ib_setup_qp(struct rds_connection *conn) { struct rds_ib_connection *ic = conn->c_transport_data; struct ib_device *dev = ic->i_cm_id->device; struct ib_qp_init_attr attr; struct rds_ib_device *rds_ibdev; int ret; /* rds_ib_add_one creates a rds_ib_device object per IB device, * and allocates a protection domain, memory range and FMR pool * for each. If that fails for any reason, it will not register * the rds_ibdev at all. */ rds_ibdev = ib_get_client_data(dev, &rds_ib_client); if (rds_ibdev == NULL) { if (printk_ratelimit()) printk(KERN_NOTICE "RDS/IB: No client_data for device %s\n", dev->name); return -EOPNOTSUPP; } if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1) rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1); /* Protection domain and memory range */ ic->i_pd = rds_ibdev->pd; ic->i_mr = rds_ibdev->mr; ic->i_send_cq = ib_create_cq(dev, rds_ib_send_cq_comp_handler, rds_ib_cq_event_handler, conn, ic->i_send_ring.w_nr + 1, 0); if (IS_ERR(ic->i_send_cq)) { ret = PTR_ERR(ic->i_send_cq); ic->i_send_cq = NULL; rdsdebug("ib_create_cq send failed: %d\n", ret); goto out; } ic->i_recv_cq = ib_create_cq(dev, rds_ib_recv_cq_comp_handler, rds_ib_cq_event_handler, conn, ic->i_recv_ring.w_nr, 0); if (IS_ERR(ic->i_recv_cq)) { ret = PTR_ERR(ic->i_recv_cq); ic->i_recv_cq = NULL; rdsdebug("ib_create_cq recv failed: %d\n", ret); goto out; } ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); if (ret) { rdsdebug("ib_req_notify_cq send failed: %d\n", ret); goto out; } ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); if (ret) { rdsdebug("ib_req_notify_cq recv failed: %d\n", ret); goto out; } /* XXX negotiate max send/recv with remote? */ memset(&attr, 0, sizeof(attr)); attr.event_handler = rds_ib_qp_event_handler; attr.qp_context = conn; /* + 1 to allow for the single ack message */ attr.cap.max_send_wr = ic->i_send_ring.w_nr + 1; attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1; attr.cap.max_send_sge = rds_ibdev->max_sge; attr.cap.max_recv_sge = RDS_IB_RECV_SGE; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; attr.send_cq = ic->i_send_cq; attr.recv_cq = ic->i_recv_cq; /* * XXX this can fail if max_*_wr is too large? Are we supposed * to back off until we get a value that the hardware can support? */ ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); if (ret) { rdsdebug("rdma_create_qp failed: %d\n", ret); goto out; } ic->i_send_hdrs = ib_dma_alloc_coherent(dev, ic->i_send_ring.w_nr * sizeof(struct rds_header), &ic->i_send_hdrs_dma, GFP_KERNEL); if (ic->i_send_hdrs == NULL) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent send failed\n"); goto out; } ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, ic->i_recv_ring.w_nr * sizeof(struct rds_header), &ic->i_recv_hdrs_dma, GFP_KERNEL); if (ic->i_recv_hdrs == NULL) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent recv failed\n"); goto out; } ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), &ic->i_ack_dma, GFP_KERNEL); if (ic->i_ack == NULL) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent ack failed\n"); goto out; } ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); if (ic->i_sends == NULL) { ret = -ENOMEM; rdsdebug("send allocation failed\n"); goto out; } memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work)); if (ic->i_recvs == NULL) { ret = -ENOMEM; rdsdebug("recv allocation failed\n"); goto out; } memset(ic->i_recvs, 0, ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work)); rds_ib_recv_init_ack(ic); rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr, ic->i_send_cq, ic->i_recv_cq); out: return ret; }
/* * This needs to be very careful to not leave IS_ERR pointers around for * cleanup to trip over. */ static int rdsv3_ib_setup_qp(struct rdsv3_connection *conn) { struct rdsv3_ib_connection *ic = conn->c_transport_data; struct ib_device *dev = ic->i_cm_id->device; struct ib_qp_init_attr attr; struct rdsv3_ib_device *rds_ibdev; ibt_send_wr_t *wrp; ibt_wr_ds_t *sgl; int ret, i; RDSV3_DPRINTF2("rdsv3_ib_setup_qp", "Enter conn: %p", conn); /* * rdsv3_ib_add_one creates a rdsv3_ib_device object per IB device, * and allocates a protection domain, memory range and FMR pool * for each. If that fails for any reason, it will not register * the rds_ibdev at all. */ rds_ibdev = ib_get_client_data(dev, &rdsv3_ib_client); if (!rds_ibdev) { RDSV3_DPRINTF2("rdsv3_ib_setup_qp", "RDS/IB: No client_data for device %s", dev->name); return (-EOPNOTSUPP); } ic->rds_ibdev = rds_ibdev; if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) rdsv3_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1) rdsv3_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1); /* Protection domain and memory range */ ic->i_pd = rds_ibdev->pd; /* * IB_CQ_VECTOR_LEAST_ATTACHED and/or the corresponding feature is * not implmeneted in Hermon yet, but we can pass it to ib_create_cq() * anyway. */ ic->i_cq = ib_create_cq(dev, rdsv3_ib_cq_comp_handler, rdsv3_ib_cq_event_handler, conn, ic->i_recv_ring.w_nr + ic->i_send_ring.w_nr + 1, rdsv3_af_grp_get_sched(ic->rds_ibdev->aft_hcagp)); if (IS_ERR(ic->i_cq)) { ret = PTR_ERR(ic->i_cq); ic->i_cq = NULL; RDSV3_DPRINTF2("rdsv3_ib_setup_qp", "ib_create_cq failed: %d", ret); goto out; } if (rdsv3_enable_snd_cq) { ic->i_snd_cq = ib_create_cq(dev, rdsv3_ib_snd_cq_comp_handler, rdsv3_ib_cq_event_handler, conn, ic->i_send_ring.w_nr + 1, rdsv3_af_grp_get_sched(ic->rds_ibdev->aft_hcagp)); if (IS_ERR(ic->i_snd_cq)) { ret = PTR_ERR(ic->i_snd_cq); (void) ib_destroy_cq(ic->i_cq); ic->i_cq = NULL; ic->i_snd_cq = NULL; RDSV3_DPRINTF2("rdsv3_ib_setup_qp", "ib_create_cq send cq failed: %d", ret); goto out; } } /* XXX negotiate max send/recv with remote? */ (void) memset(&attr, 0, sizeof (attr)); attr.event_handler = rdsv3_ib_qp_event_handler; attr.qp_context = conn; /* + 1 to allow for the single ack message */ attr.cap.max_send_wr = ic->i_send_ring.w_nr + 1; attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1; attr.cap.max_send_sge = rds_ibdev->max_sge; attr.cap.max_recv_sge = RDSV3_IB_RECV_SGE; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; if (rdsv3_enable_snd_cq) { attr.send_cq = ic->i_snd_cq; } else { attr.send_cq = ic->i_cq; } attr.recv_cq = ic->i_cq; /* * XXX this can fail if max_*_wr is too large? Are we supposed * to back off until we get a value that the hardware can support? */ ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); if (ret) { RDSV3_DPRINTF2("rdsv3_ib_setup_qp", "rdma_create_qp failed: %d", ret); goto out; } ret = rdsv3_ib_alloc_hdrs(dev, ic); if (ret != 0) { ret = -ENOMEM; RDSV3_DPRINTF2("rdsv3_ib_setup_qp", "rdsv3_ib_alloc_hdrs failed: %d", ret); goto out; } ic->i_sends = kmem_alloc(ic->i_send_ring.w_nr * sizeof (struct rdsv3_ib_send_work), KM_NOSLEEP); if (ic->i_sends == NULL) { ret = -ENOMEM; RDSV3_DPRINTF2("rdsv3_ib_setup_qp", "send allocation failed: %d", ret); goto out; } (void) memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof (struct rdsv3_ib_send_work)); ic->i_send_wrs = kmem_alloc(ic->i_send_ring.w_nr * (sizeof (ibt_send_wr_t) + RDSV3_IB_MAX_SGE * sizeof (ibt_wr_ds_t)), KM_NOSLEEP); if (ic->i_send_wrs == NULL) { ret = -ENOMEM; RDSV3_DPRINTF2("rdsv3_ib_setup_qp", "Send WR allocation failed: %d", ret); goto out; } sgl = (ibt_wr_ds_t *)((uint8_t *)ic->i_send_wrs + (ic->i_send_ring.w_nr * sizeof (ibt_send_wr_t))); for (i = 0; i < ic->i_send_ring.w_nr; i++) { wrp = &ic->i_send_wrs[i]; wrp->wr_sgl = &sgl[i * RDSV3_IB_MAX_SGE]; } ic->i_recvs = kmem_alloc(ic->i_recv_ring.w_nr * sizeof (struct rdsv3_ib_recv_work), KM_NOSLEEP); if (ic->i_recvs == NULL) { ret = -ENOMEM; RDSV3_DPRINTF2("rdsv3_ib_setup_qp", "recv allocation failed: %d", ret); goto out; } (void) memset(ic->i_recvs, 0, ic->i_recv_ring.w_nr * sizeof (struct rdsv3_ib_recv_work)); ic->i_recv_wrs = kmem_alloc(ic->i_recv_ring.w_nr * sizeof (ibt_recv_wr_t), KM_NOSLEEP); if (ic->i_recv_wrs == NULL) { ret = -ENOMEM; RDSV3_DPRINTF2("rdsv3_ib_setup_qp", "Recv WR allocation failed: %d", ret); goto out; } rdsv3_ib_recv_init_ack(ic); RDSV3_DPRINTF2("rdsv3_ib_setup_qp", "conn %p pd %p mr %p cq %p", conn, ic->i_pd, ic->i_mr, ic->i_cq); out: return (ret); }
/* * Connection established. * We get here for both outgoing and incoming connection. */ void rdsv3_ib_cm_connect_complete(struct rdsv3_connection *conn, struct rdma_cm_event *event) { const struct rdsv3_ib_connect_private *dp = NULL; struct rdsv3_ib_connection *ic = conn->c_transport_data; struct rdsv3_ib_device *rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rdsv3_ib_client); struct ib_qp_attr qp_attr; int err; RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete", "Enter conn: %p event: %p", conn, event); if (event->param.conn.private_data_len >= sizeof (*dp)) { dp = event->param.conn.private_data; /* make sure it isn't empty data */ if (dp->dp_protocol_major) { rdsv3_ib_set_protocol(conn, RDS_PROTOCOL(dp->dp_protocol_major, dp->dp_protocol_minor)); rdsv3_ib_set_flow_control(conn, ntohl(dp->dp_credit)); } } if (conn->c_version < RDS_PROTOCOL(3, 1)) { RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete", "RDS/IB: Connection to %u.%u.%u.%u version %u.%u failed", NIPQUAD(conn->c_faddr), RDS_PROTOCOL_MAJOR(conn->c_version), RDS_PROTOCOL_MINOR(conn->c_version)); rdsv3_conn_destroy(conn); return; } else { RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete", "RDS/IB: connected to %u.%u.%u.%u version %u.%u%s", NIPQUAD(conn->c_faddr), RDS_PROTOCOL_MAJOR(conn->c_version), RDS_PROTOCOL_MINOR(conn->c_version), ic->i_flowctl ? ", flow control" : ""); } ASSERT(ic->i_soft_cq == NULL); ic->i_soft_cq = rdsv3_af_intr_thr_create(rdsv3_ib_tasklet_fn, (void *)ic, SCQ_INTR_BIND_CPU, rds_ibdev->aft_hcagp, ic->i_cq->ibt_cq); if (rdsv3_enable_snd_cq) { ic->i_snd_soft_cq = rdsv3_af_intr_thr_create( rdsv3_ib_snd_tasklet_fn, (void *)ic, SCQ_INTR_BIND_CPU, rds_ibdev->aft_hcagp, ic->i_snd_cq->ibt_cq); } /* rdsv3_ib_refill_fn is expecting i_max_recv_alloc set */ ic->i_max_recv_alloc = rdsv3_ib_sysctl_max_recv_allocation; ic->i_refill_rq = rdsv3_af_thr_create(rdsv3_ib_refill_fn, (void *)conn, SCQ_WRK_BIND_CPU, rds_ibdev->aft_hcagp); rdsv3_af_grp_draw(rds_ibdev->aft_hcagp); (void) ib_req_notify_cq(ic->i_cq, IB_CQ_SOLICITED); if (rdsv3_enable_snd_cq) { (void) ib_req_notify_cq(ic->i_snd_cq, IB_CQ_NEXT_COMP); } /* * Init rings and fill recv. this needs to wait until protocol * negotiation * is complete, since ring layout is different from 3.0 to 3.1. */ rdsv3_ib_send_init_ring(ic); rdsv3_ib_recv_init_ring(ic); /* * Post receive buffers - as a side effect, this will update * the posted credit count. */ (void) rdsv3_ib_recv_refill(conn, 1); /* Tune RNR behavior */ rdsv3_ib_tune_rnr(ic, &qp_attr); qp_attr.qp_state = IB_QPS_RTS; err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE); if (err) RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete", "ib_modify_qp(IB_QP_STATE, RTS): err=%d", err); /* update ib_device with this local ipaddr & conn */ err = rdsv3_ib_update_ipaddr(rds_ibdev, conn->c_laddr); if (err) RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete", "rdsv3_ib_update_ipaddr failed (%d)", err); rdsv3_ib_add_conn(rds_ibdev, conn); /* * If the peer gave us the last packet it saw, process this as if * we had received a regular ACK. */ if (dp && dp->dp_ack_seq) rdsv3_send_drop_acked(conn, ntohll(dp->dp_ack_seq), NULL); rdsv3_connect_complete(conn); RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete", "Return conn: %p event: %p", conn, event); }