Ejemplo n.º 1
0
static void pfmr_remove_one(struct ib_device *device)
{
	struct pfmr_test_ctx *test_ctx = ib_get_client_data(device,
							    &test_client);
	if (test_ctx) {
		if (test_ctx->fmr_map_op) {
			info("MAP FMR OP: %s was removed for IB device %s...\n",
			     test_ctx->fmr_map_op->name, device->name);
			destroy_kvlop(test_ctx->fmr_map_op);
		}
		if (test_ctx->create_fmr_op) {
			info("Create FMR OP: %s was removed for IB device %s.."
			     ".\n", test_ctx->create_fmr_op->name,
			     device->name);
			destroy_kvlop(test_ctx->create_fmr_op);
		}
		if (test_ctx->destroy_fmr_op) {
			info("Create FMR OP: %s was removed for IB device %s.."
			     ".\n", test_ctx->destroy_fmr_op->name,
			     device->name);
			destroy_kvlop(test_ctx->destroy_fmr_op);
		}
		if (test_ctx->create_dma_op) {
			info("Create DMA OP : %s was removed for IB device %s.."
			     ".\n", test_ctx->create_dma_op->name,
			     device->name);
			destroy_kvlop(test_ctx->create_dma_op);
		}
		if (test_ctx->destroy_dma_op) {
			info("Create FMR OP: %s was removed for IB device %s.."
			     ".\n", test_ctx->destroy_dma_op->name,
			     device->name);
			destroy_kvlop(test_ctx->destroy_dma_op);
		}
		if (test_ctx->fmr_pool_op) {
			info("Create DMA OP : %s was removed for IB device %s.."
			     ".\n", test_ctx->fmr_pool_op->name, device->name);
			destroy_kvlop(test_ctx->fmr_pool_op);
		}
		if (test_ctx->cq_op) {
			info("CQ OP : %s was removed for IB device %s.."
			     ".\n", test_ctx->cq_op->name, device->name);
			destroy_kvlop(test_ctx->cq_op);
		}
		if (test_ctx->qp_op) {
			info("QP OP : %s was removed for IB device %s.."
			     ".\n", test_ctx->qp_op->name, device->name);
			destroy_kvlop(test_ctx->qp_op);
		}
		if (test_ctx->complete_test_op) {
			info("QP OP : %s was removed for IB device %s.."
			     ".\n", test_ctx->complete_test_op->name,
			     device->name);
			destroy_kvlop(test_ctx->complete_test_op);
		}
	}
	if (test_ctx)
		pfmr_destroy_test_ctx(test_ctx);
	info("IB device %s removed successfuly\n", device->name);
}
Ejemplo n.º 2
0
static void
roq_eth_add_one(struct ib_device *ibdev)
{
	struct roq_eth_priv *priv;
	struct roq_dev *roq_dev = container_of(ibdev, struct roq_dev, ofa_dev);
	struct net_device *ndev = roq_dev->l2dev;

	/* roq is represented as RDMA_NODE_RNIC */
	if (rdma_node_get_transport(ibdev->node_type) != RDMA_TRANSPORT_IWARP
	    || strncmp(ibdev->name, "roq", IB_DEVICE_NAME_MAX) != 0)
		return;

	priv = ib_get_client_data(ibdev, &roq_eth_client);
	if (priv) {
		pr_warn("roq_eth_add_one: netdev already allocated!\n");
		return;
	}

	SET_NETDEV_DEV(ndev, ibdev->dma_device);

	priv = netdev_priv(ndev);
	priv->ndev  = ndev;
	priv->ibdev = ibdev;
	ib_set_client_data(ibdev, &roq_eth_client, priv);

	return;
}
Ejemplo n.º 3
0
/*
 * Connection established.
 * We get here for both outgoing and incoming connection.
 */
void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event)
{
	const struct rds_ib_connect_private *dp = NULL;
	struct rds_ib_connection *ic = conn->c_transport_data;
	struct rds_ib_device *rds_ibdev;
	struct ib_qp_attr qp_attr;
	int err;

	if (event->param.conn.private_data_len >= sizeof(*dp)) {
		dp = event->param.conn.private_data;

		/* make sure it isn't empty data */
		if (dp->dp_protocol_major) {
			rds_ib_set_protocol(conn,
				RDS_PROTOCOL(dp->dp_protocol_major,
				dp->dp_protocol_minor));
			rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
		}
	}

	printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n",
			&conn->c_faddr,
			RDS_PROTOCOL_MAJOR(conn->c_version),
			RDS_PROTOCOL_MINOR(conn->c_version),
			ic->i_flowctl ? ", flow control" : "");

	/*
	 * Init rings and fill recv. this needs to wait until protocol negotiation
	 * is complete, since ring layout is different from 3.0 to 3.1.
	 */
	rds_ib_send_init_ring(ic);
	rds_ib_recv_init_ring(ic);
	/* Post receive buffers - as a side effect, this will update
	 * the posted credit count. */
	rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1);

	/* Tune RNR behavior */
	rds_ib_tune_rnr(ic, &qp_attr);

	qp_attr.qp_state = IB_QPS_RTS;
	err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE);
	if (err)
		printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err);

	/* update ib_device with this local ipaddr & conn */
	rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
	err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr);
	if (err)
		printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err);
	rds_ib_add_conn(rds_ibdev, conn);

	/* If the peer gave us the last packet it saw, process this as if
	 * we had received a regular ACK. */
	if (dp && dp->dp_ack_seq)
		rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);

	rds_connect_complete(conn);
}
Ejemplo n.º 4
0
static void
rdsv3_ib_cm_fill_conn_param(struct rdsv3_connection *conn,
    struct rdma_conn_param *conn_param,
    struct rdsv3_ib_connect_private *dp,
    uint32_t protocol_version,
    uint32_t max_responder_resources,
    uint32_t max_initiator_depth)
{
	struct rdsv3_ib_connection *ic = conn->c_transport_data;
	struct rdsv3_ib_device *rds_ibdev;

	RDSV3_DPRINTF2("rdsv3_ib_cm_fill_conn_param",
	    "Enter conn: %p conn_param: %p private: %p version: %d",
	    conn, conn_param, dp, protocol_version);

	(void) memset(conn_param, 0, sizeof (struct rdma_conn_param));

	rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rdsv3_ib_client);

	conn_param->responder_resources =
	    MIN(rds_ibdev->max_responder_resources, max_responder_resources);
	conn_param->initiator_depth =
	    MIN(rds_ibdev->max_initiator_depth, max_initiator_depth);
	conn_param->retry_count = min(rdsv3_ib_retry_count, 7);
	conn_param->rnr_retry_count = 7;

	if (dp) {
		(void) memset(dp, 0, sizeof (*dp));
		dp->dp_saddr = conn->c_laddr;
		dp->dp_daddr = conn->c_faddr;
		dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version);
		dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version);
		dp->dp_protocol_minor_mask =
		    htons(RDSV3_IB_SUPPORTED_PROTOCOLS);
		dp->dp_ack_seq = rdsv3_ib_piggyb_ack(ic);

		/* Advertise flow control */
		if (ic->i_flowctl) {
			unsigned int credits;

			credits = IB_GET_POST_CREDITS(
			    atomic_get(&ic->i_credits));
			dp->dp_credit = htonl(credits);
			atomic_add_32(&ic->i_credits,
			    -IB_SET_POST_CREDITS(credits));
		}

		conn_param->private_data = dp;
		conn_param->private_data_len = sizeof (*dp);
	}

	RDSV3_DPRINTF2("rdsv3_ib_cm_fill_conn_param",
	    "Return conn: %p conn_param: %p private: %p version: %d",
	    conn, conn_param, dp, protocol_version);
}
Ejemplo n.º 5
0
static void
roq_eth_remove_one(struct ib_device *ibdev)
{
	struct roq_eth_priv *priv;

	if (rdma_node_get_transport(ibdev->node_type) != RDMA_TRANSPORT_IWARP
	    || strncmp(ibdev->name, "roq", IB_DEVICE_NAME_MAX) != 0)
		return;

	priv = ib_get_client_data(ibdev, &roq_eth_client);

	priv->ndev->dev.parent   = NULL;
	unregister_netdev(priv->ndev);
	free_netdev(priv->ndev);
}
Ejemplo n.º 6
0
static int ib_create_path_iter(struct ib_device *device, u8 port_num,
			       union ib_gid *dgid, struct ib_sa_attr_iter *iter)
{
	struct sa_db_device *dev;
	struct sa_db_port *port;
	struct ib_sa_attr_list *list;

	dev = ib_get_client_data(device, &sa_db_client);
	if (!dev)
		return -ENODEV;

	port = &dev->port[port_num - dev->start_port];

	read_lock_irqsave(&rwlock, iter->flags);
	list = find_attr_list(&port->paths, dgid->raw);
	if (!list) {
		ib_free_path_iter(iter);
		return -ENODATA;
	}

	iter->iter = &list->iter;
	return 0;
}
Ejemplo n.º 7
0
/*
 * Connection established.
 * We get here for both outgoing and incoming connection.
 */
void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event)
{
	const struct rds_iw_connect_private *dp = NULL;
	struct rds_iw_connection *ic = conn->c_transport_data;
	struct rds_iw_device *rds_iwdev;
	int err;

	if (event->param.conn.private_data_len) {
		dp = event->param.conn.private_data;

		rds_iw_set_protocol(conn,
				RDS_PROTOCOL(dp->dp_protocol_major,
					dp->dp_protocol_minor));
		rds_iw_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
	}

	/* update ib_device with this local ipaddr & conn */
	rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
	err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id);
	if (err)
		printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err);
	rds_iw_add_conn(rds_iwdev, conn);

	/* If the peer gave us the last packet it saw, process this as if
	 * we had received a regular ACK. */
	if (dp && dp->dp_ack_seq)
		rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);

	printk(KERN_NOTICE "RDS/IW: connected to %pI4<->%pI4 version %u.%u%s\n",
			&conn->c_laddr, &conn->c_faddr,
			RDS_PROTOCOL_MAJOR(conn->c_version),
			RDS_PROTOCOL_MINOR(conn->c_version),
			ic->i_flowctl ? ", flow control" : "");

	rds_connect_complete(conn);
}
Ejemplo n.º 8
0
/*
 * This needs to be very careful to not leave IS_ERR pointers around for
 * cleanup to trip over.
 */
static int rds_iw_setup_qp(struct rds_connection *conn)
{
	struct rds_iw_connection *ic = conn->c_transport_data;
	struct ib_device *dev = ic->i_cm_id->device;
	struct ib_qp_init_attr attr;
	struct rds_iw_device *rds_iwdev;
	int ret;

	/* rds_iw_add_one creates a rds_iw_device object per IB device,
	 * and allocates a protection domain, memory range and MR pool
	 * for each.  If that fails for any reason, it will not register
	 * the rds_iwdev at all.
	 */
	rds_iwdev = ib_get_client_data(dev, &rds_iw_client);
	if (!rds_iwdev) {
		if (printk_ratelimit())
			printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n",
					dev->name);
		return -EOPNOTSUPP;
	}

	/* Protection domain and memory range */
	ic->i_pd = rds_iwdev->pd;
	ic->i_mr = rds_iwdev->mr;

	ret = rds_iw_init_qp_attrs(&attr, rds_iwdev,
			&ic->i_send_ring, rds_iw_send_cq_comp_handler,
			&ic->i_recv_ring, rds_iw_recv_cq_comp_handler,
			conn);
	if (ret < 0)
		goto out;

	ic->i_send_cq = attr.send_cq;
	ic->i_recv_cq = attr.recv_cq;

	/*
	 * XXX this can fail if max_*_wr is too large?  Are we supposed
	 * to back off until we get a value that the hardware can support?
	 */
	ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
	if (ret) {
		rdsdebug("rdma_create_qp failed: %d\n", ret);
		goto out;
	}

	ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
					   ic->i_send_ring.w_nr *
						sizeof(struct rds_header),
					   &ic->i_send_hdrs_dma, GFP_KERNEL);
	if (!ic->i_send_hdrs) {
		ret = -ENOMEM;
		rdsdebug("ib_dma_alloc_coherent send failed\n");
		goto out;
	}

	ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
					   ic->i_recv_ring.w_nr *
						sizeof(struct rds_header),
					   &ic->i_recv_hdrs_dma, GFP_KERNEL);
	if (!ic->i_recv_hdrs) {
		ret = -ENOMEM;
		rdsdebug("ib_dma_alloc_coherent recv failed\n");
		goto out;
	}

	ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
				       &ic->i_ack_dma, GFP_KERNEL);
	if (!ic->i_ack) {
		ret = -ENOMEM;
		rdsdebug("ib_dma_alloc_coherent ack failed\n");
		goto out;
	}

	ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work));
	if (!ic->i_sends) {
		ret = -ENOMEM;
		rdsdebug("send allocation failed\n");
		goto out;
	}
	rds_iw_send_init_ring(ic);

	ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work));
	if (!ic->i_recvs) {
		ret = -ENOMEM;
		rdsdebug("recv allocation failed\n");
		goto out;
	}

	rds_iw_recv_init_ring(ic);
	rds_iw_recv_init_ack(ic);

	/* Post receive buffers - as a side effect, this will update
	 * the posted credit count. */
	rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1);

	rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr,
		 ic->i_send_cq, ic->i_recv_cq);

out:
	return ret;
}
Ejemplo n.º 9
0
static int sdp_init_qp(struct sock *sk, struct rdma_cm_id *id)
{
	struct ib_qp_init_attr qp_init_attr = {
		.event_handler = sdp_qp_event_handler,
		.cap.max_send_wr = SDP_TX_SIZE,
		.cap.max_recv_wr = sdp_rx_size,
		.cap.max_inline_data = sdp_inline_thresh,
        	.sq_sig_type = IB_SIGNAL_REQ_WR,
        	.qp_type = IB_QPT_RC,
	};
	struct ib_device *device = id->device;
	int rc;

	sdp_dbg(sk, "%s\n", __func__);

	sdp_sk(sk)->max_sge = sdp_get_max_dev_sge(device);
	sdp_dbg(sk, "Max sges: %d\n", sdp_sk(sk)->max_sge);

	qp_init_attr.cap.max_send_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_SEND_SGES);
	sdp_dbg(sk, "Setting max send sge to: %d\n", qp_init_attr.cap.max_send_sge);

	qp_init_attr.cap.max_recv_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_RECV_SGES);
	sdp_dbg(sk, "Setting max recv sge to: %d\n", qp_init_attr.cap.max_recv_sge);

	sdp_sk(sk)->sdp_dev = ib_get_client_data(device, &sdp_client);
	if (!sdp_sk(sk)->sdp_dev) {
		sdp_warn(sk, "SDP not available on device %s\n", device->name);
		rc = -ENODEV;
		goto err_rx;
	}

	rc = sdp_rx_ring_create(sdp_sk(sk), device);
	if (rc)
		goto err_rx;

	rc = sdp_tx_ring_create(sdp_sk(sk), device);
	if (rc)
		goto err_tx;

	qp_init_attr.recv_cq = sdp_sk(sk)->rx_ring.cq;
	qp_init_attr.send_cq = sdp_sk(sk)->tx_ring.cq;

	rc = rdma_create_qp(id, sdp_sk(sk)->sdp_dev->pd, &qp_init_attr);
	if (rc) {
		sdp_warn(sk, "Unable to create QP: %d.\n", rc);
		goto err_qp;
	}
	sdp_sk(sk)->qp = id->qp;
	sdp_sk(sk)->ib_device = device;
	sdp_sk(sk)->qp_active = 1;
	sdp_sk(sk)->context.device = device;
	sdp_sk(sk)->inline_thresh = qp_init_attr.cap.max_inline_data;

	sdp_dbg(sk, "%s done\n", __func__);
	return 0;

err_qp:
	sdp_tx_ring_destroy(sdp_sk(sk));
err_tx:
	sdp_rx_ring_destroy(sdp_sk(sk));
err_rx:
	return rc;
}

static int sdp_get_max_send_frags(u32 buf_size)
{
	return MIN(
		/* +1 to conpensate on not aligned buffers */
		(PAGE_ALIGN(buf_size) >> PAGE_SHIFT) + 1,
		SDP_MAX_SEND_SGES - 1);
}

static int sdp_connect_handler(struct sock *sk, struct rdma_cm_id *id,
		       	struct rdma_cm_event *event)
{
	struct sockaddr_in *dst_addr;
	struct sock *child;
	const struct sdp_hh *h;
	int rc = 0;

	sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id);

	h = event->param.conn.private_data;
	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);

	if (h->ipv_cap & HH_IPV_MASK & ~(HH_IPV4 | HH_IPV6)) {
		sdp_warn(sk, "Bad IPV field in SDP Hello header: 0x%x\n",
				h->ipv_cap & HH_IPV_MASK);
		return -EINVAL;
	}

	if (!h->max_adverts)
		return -EINVAL;

#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0))
	child = sk_clone(sk, GFP_KERNEL);
#else
	child = sk_clone_lock(sk, GFP_KERNEL);
#endif
	if (!child)
		return -ENOMEM;

	sdp_init_sock(child);

	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
	sdp_inet_dport(child) = dst_addr->sin_port;

#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
	if (inet6_sk(sk)) {
		struct ipv6_pinfo *newnp;

		newnp = inet_sk(child)->pinet6 = sdp_inet6_sk_generic(child);

		memcpy(newnp, inet6_sk(sk), sizeof(struct ipv6_pinfo));
		if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV4) {
			/* V6 mapped */
			sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr;
			ipv6_addr_set(&child->sk_v6_daddr, 0, 0, htonl(0x0000FFFF),
					h->src_addr.ip4.addr);

			ipv6_addr_set(&child->sk_v6_rcv_saddr, 0, 0, htonl(0x0000FFFF),
					h->dst_addr.ip4.addr);

			ipv6_addr_copy(&child->sk_v6_rcv_saddr, &child->sk_v6_daddr);
		} else if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV6) {
			struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst_addr;
			struct sockaddr_in6 *src_addr6 =
				(struct sockaddr_in6 *)&id->route.addr.src_addr;

			ipv6_addr_copy(&child->sk_v6_daddr, &dst_addr6->sin6_addr);
			ipv6_addr_copy(&child->sk_v6_rcv_saddr, &src_addr6->sin6_addr);
			ipv6_addr_copy(&newnp->saddr, &src_addr6->sin6_addr);
		} else {
			sdp_warn(child, "Bad IPV field: 0x%x\n", h->ipv_cap & HH_IPV_MASK);
		}

		sdp_inet_daddr(child) = sdp_inet_saddr(child) =
			sdp_inet_rcv_saddr(child) = LOOPBACK4_IPV6;
	} else
#endif
	{
		sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr;
	}

#ifdef SDP_SOCK_HISTORY
	sdp_ssk_hist_rename(sk);
#endif
	__sock_put(child, SOCK_REF_CLONE);

	down_read(&device_removal_lock);

	rc = sdp_init_qp(child, id);
	if (rc) {
		bh_unlock_sock(child);
		up_read(&device_removal_lock);
		sdp_sk(child)->destructed_already = 1;
#ifdef SDP_SOCK_HISTORY
		sdp_ssk_hist_close(child);
#endif
		sk_free(child);
		return rc;
	}

	sdp_sk(child)->max_bufs = ntohs(h->bsdh.bufs);
	atomic_set(&sdp_sk(child)->tx_ring.credits, sdp_sk(child)->max_bufs);

	sdp_sk(child)->min_bufs = tx_credits(sdp_sk(child)) / 4;
	sdp_sk(child)->xmit_size_goal = ntohl(h->localrcvsz) -
		sizeof(struct sdp_bsdh);

	sdp_sk(child)->send_frags = sdp_get_max_send_frags(sdp_sk(child)->xmit_size_goal);
	sdp_init_buffers(sdp_sk(child), rcvbuf_initial_size);

	id->context = child;
	sdp_sk(child)->id = id;

	list_add_tail(&sdp_sk(child)->backlog_queue,
			&sdp_sk(sk)->backlog_queue);
	sdp_sk(child)->parent = sk;

	bh_unlock_sock(child);
	sdp_add_sock(sdp_sk(child));
	up_read(&device_removal_lock);

	sdp_exch_state(child, TCPF_LISTEN | TCPF_CLOSE, TCP_SYN_RECV);

	/* child->sk_write_space(child); */
	/* child->sk_data_ready(child, 0); */
	sk->sk_data_ready(sk);

	return 0;
}

static int sdp_response_handler(struct sock *sk, struct rdma_cm_id *id,
				struct rdma_cm_event *event)
{
	const struct sdp_hah *h;
	struct sockaddr_in *dst_addr;
	sdp_dbg(sk, "%s\n", __func__);

	sdp_exch_state(sk, TCPF_SYN_SENT, TCP_ESTABLISHED);
	sdp_set_default_moderation(sdp_sk(sk));

	if (sock_flag(sk, SOCK_KEEPOPEN))
		sdp_start_keepalive_timer(sk);

	if (sock_flag(sk, SOCK_DEAD))
		return 0;

	h = event->param.conn.private_data;
	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
	sdp_sk(sk)->max_bufs = ntohs(h->bsdh.bufs);
	atomic_set(&sdp_sk(sk)->tx_ring.credits, sdp_sk(sk)->max_bufs);
	sdp_sk(sk)->min_bufs = tx_credits(sdp_sk(sk)) / 4;
	sdp_sk(sk)->xmit_size_goal =
		ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh);
	sdp_sk(sk)->send_frags = sdp_get_max_send_frags(sdp_sk(sk)->xmit_size_goal);
	sdp_sk(sk)->xmit_size_goal = MIN(sdp_sk(sk)->xmit_size_goal,
		sdp_sk(sk)->send_frags * PAGE_SIZE);

	sdp_sk(sk)->poll_cq = 1;

	sk->sk_state_change(sk);
	sk_wake_async(sk, 0, POLL_OUT);

	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
	sdp_inet_dport(sk) = dst_addr->sin_port;
	sdp_inet_daddr(sk) = dst_addr->sin_addr.s_addr;

#ifdef SDP_SOCK_HISTORY
	sdp_ssk_hist_rename(sk);
#endif
	return 0;
}

static int sdp_connected_handler(struct sock *sk)
{
	struct sock *parent;
	sdp_dbg(sk, "%s\n", __func__);

	parent = sdp_sk(sk)->parent;
	BUG_ON(!parent);

	sdp_exch_state(sk, TCPF_SYN_RECV, TCP_ESTABLISHED);

#ifdef SDP_SOCK_HISTORY
	sdp_ssk_hist_rename(sk);
#endif
	sdp_set_default_moderation(sdp_sk(sk));

	if (sock_flag(sk, SOCK_KEEPOPEN))
		sdp_start_keepalive_timer(sk);

	if (sock_flag(sk, SOCK_DEAD))
		return 0;

	lock_sock(parent);
	if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */
		sdp_dbg(sk, "parent is going away.\n");
		goto done;
	}

	sk_acceptq_added(parent);
	sdp_dbg(parent, "%s child connection established\n", __func__);
	list_del_init(&sdp_sk(sk)->backlog_queue);
	list_add_tail(&sdp_sk(sk)->accept_queue,
			&sdp_sk(parent)->accept_queue);

	parent->sk_state_change(parent);
	sk_wake_async(parent, 0, POLL_OUT);
done:
	release_sock(parent);

	return 0;
}

static int sdp_disconnected_handler(struct sock *sk)
{
	struct sdp_sock *ssk = sdp_sk(sk);

	sdp_dbg(sk, "%s\n", __func__);

	if (ssk->tx_ring.cq)
		if (sdp_xmit_poll(ssk, 1))
			sdp_post_sends(ssk, 0);

	if (sk->sk_state == TCP_SYN_RECV) {
		sdp_connected_handler(sk);

		if (rcv_nxt(ssk))
			return 0;
	}

	return -ECONNRESET;
}

int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
	struct rdma_conn_param conn_param;
	struct sock *parent = NULL;
	struct sock *child = NULL;
	struct sock *sk;
	struct sdp_hah hah;
	struct sdp_hh hh;

	int rc = 0, rc2;

	sk = id->context;
	if (!sk) {
		sdp_dbg(NULL, "cm_id is being torn down, event %s\n",
		       	rdma_cm_event_str(event->event));
		return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ?
			-EINVAL : 0;
	}

	sdp_add_to_history(sk, rdma_cm_event_str(event->event));

	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
	sdp_dbg(sk, "event: %s\n", rdma_cm_event_str(event->event));
	if (!sdp_sk(sk)->id) {
		sdp_dbg(sk, "socket is being torn down\n");
		rc = event->event == RDMA_CM_EVENT_CONNECT_REQUEST ?
			-EINVAL : 0;
		release_sock(sk);
		return rc;
	}

	switch (event->event) {
	case RDMA_CM_EVENT_ADDR_RESOLVED:
		if (sdp_link_layer_ib_only &&
			rdma_node_get_transport(id->device->node_type) ==
				RDMA_TRANSPORT_IB &&
			rdma_port_get_link_layer(id->device, id->port_num) !=
				IB_LINK_LAYER_INFINIBAND) {
			sdp_dbg(sk, "Link layer is: %d. Only IB link layer "
				"is allowed\n",
				rdma_port_get_link_layer(id->device,
					id->port_num));
			rc = -ENETUNREACH;
			break;
		}

		rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT);
		break;
	case RDMA_CM_EVENT_ADDR_ERROR:
		rc = -ENETUNREACH;
		break;
	case RDMA_CM_EVENT_ROUTE_RESOLVED:
		rc = sdp_init_qp(sk, id);
		if (rc)
			break;
		memset(&hh, 0, sizeof hh);
		hh.bsdh.mid = SDP_MID_HELLO;
		hh.bsdh.len = htonl(sizeof(struct sdp_hh));
		hh.max_adverts = 1;

		hh.majv_minv = SDP_MAJV_MINV;
		sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size);
		hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk)));
		atomic_set(&sdp_sk(sk)->remote_credits,
				rx_ring_posted(sdp_sk(sk)));
		hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_frags *
				PAGE_SIZE + sizeof(struct sdp_bsdh));
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
		if (inet6_sk(sk)) {
			struct sockaddr *src_addr = (struct sockaddr *)&id->route.addr.src_addr;
			struct sockaddr_in *addr4 = (struct sockaddr_in *)src_addr;
			struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)src_addr;

			if (src_addr->sa_family == AF_INET) {
				/* IPv4 over IPv6 */
				ipv6_addr_set(&sk->sk_v6_rcv_saddr, 0, 0, htonl(0xFFFF),
						addr4->sin_addr.s_addr);
			} else {
				sk->sk_v6_rcv_saddr = addr6->sin6_addr;
			}
			inet6_sk(sk)->saddr = sk->sk_v6_rcv_saddr;
		}
			else
#endif
		{
			sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) =
				((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
		}
		memset(&conn_param, 0, sizeof conn_param);
		conn_param.private_data_len = sizeof hh;
		conn_param.private_data = &hh;
		conn_param.responder_resources = 4 /* TODO */;
		conn_param.initiator_depth = 4 /* TODO */;
		conn_param.retry_count = sdp_retry_count;
		SDP_DUMP_PACKET(sk, "TX", NULL, &hh.bsdh);

		if (sdp_apm_enable) {
			rc = rdma_enable_apm(id, RDMA_ALT_PATH_BEST);
			if (rc)
				sdp_warn(sk, "APM couldn't be enabled for active side: %d\n", rc);
		}

		rc = rdma_connect(id, &conn_param);
		break;

	case RDMA_CM_EVENT_ALT_ROUTE_RESOLVED:
		sdp_dbg(sk, "alt route was resolved slid=%d, dlid=%d\n",
				id->route.path_rec[1].slid, id->route.path_rec[1].dlid);
		break;

	case RDMA_CM_EVENT_ALT_PATH_LOADED:
		sdp_dbg(sk, "alt route path loaded\n");
		break;

	case RDMA_CM_EVENT_ALT_ROUTE_ERROR:
		sdp_warn(sk, "alt route resolve error\n");
		break;

	case RDMA_CM_EVENT_ROUTE_ERROR:
		rc = -ETIMEDOUT;
		break;
	case RDMA_CM_EVENT_CONNECT_REQUEST:
		rc = sdp_connect_handler(sk, id, event);
		if (rc) {
			sdp_dbg(sk, "Destroying qp\n");
			rdma_reject(id, NULL, 0);
			break;
		}
		child = id->context;
		atomic_set(&sdp_sk(child)->remote_credits,
				rx_ring_posted(sdp_sk(child)));
		memset(&hah, 0, sizeof hah);
		hah.bsdh.mid = SDP_MID_HELLO_ACK;
		hah.bsdh.bufs = htons(rx_ring_posted(sdp_sk(child)));
		hah.bsdh.len = htonl(sizeof(struct sdp_hah));
		hah.majv_minv = SDP_MAJV_MINV;
		hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec,
					    but just in case */
		hah.actrcvsz = htonl(sdp_sk(child)->recv_frags * PAGE_SIZE +
			sizeof(struct sdp_bsdh));
		memset(&conn_param, 0, sizeof conn_param);
		conn_param.private_data_len = sizeof hah;
		conn_param.private_data = &hah;
		conn_param.responder_resources = 4 /* TODO */;
		conn_param.initiator_depth = 4 /* TODO */;
		conn_param.retry_count = sdp_retry_count;
		SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh);
		rc = rdma_accept(id, &conn_param);
		if (rc) {
			sdp_sk(child)->id = NULL;
			id->qp = NULL;
			id->context = NULL;
			parent = sdp_sk(child)->parent; /* TODO: hold ? */
		} else if (sdp_apm_enable) {
				rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST);
				if (rc2)
					sdp_warn(sk, "APM couldn't be enabled for passive side: %d\n", rc2);
		}
		break;
	case RDMA_CM_EVENT_CONNECT_RESPONSE:
		rc = sdp_response_handler(sk, id, event);
		if (rc) {
			sdp_dbg(sk, "Destroying qp\n");
			rdma_reject(id, NULL, 0);
		} else {
			rc = rdma_accept(id, NULL);
			if (!rc && sdp_apm_enable) {
				rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST);
				if (rc2)
					sdp_warn(sk, "APM couldn't be enabled for passive side:%d \n", rc2);
			}
		}
		break;
	case RDMA_CM_EVENT_CONNECT_ERROR:
		rc = -ETIMEDOUT;
		break;
	case RDMA_CM_EVENT_UNREACHABLE:
		rc = -ENETUNREACH;
		break;
	case RDMA_CM_EVENT_REJECTED:
		rc = -ECONNREFUSED;
		break;
	case RDMA_CM_EVENT_ESTABLISHED:
		sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) =
			((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
		rc = sdp_connected_handler(sk);
		break;
	case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */
		if (sk->sk_state == TCP_LAST_ACK) {
			sdp_cancel_dreq_wait_timeout(sdp_sk(sk));

			sdp_exch_state(sk, TCPF_LAST_ACK, TCP_TIME_WAIT);

			sdp_dbg(sk, "%s: waiting for Infiniband tear down\n",
				__func__);
		}

		sdp_sk(sk)->qp_active = 0;
		rdma_disconnect(id);

		if (sk->sk_state != TCP_TIME_WAIT) {
			if (sk->sk_state == TCP_CLOSE_WAIT) {
				sdp_dbg(sk, "IB teardown while in "
					"TCP_CLOSE_WAIT taking reference to "
					"let close() finish the work\n");
				sock_hold(sk, SOCK_REF_CMA);
				sdp_start_cma_timewait_timeout(sdp_sk(sk),
						SDP_CMA_TIMEWAIT_TIMEOUT);

			}
			sdp_set_error(sk, -EPIPE);
			rc = sdp_disconnected_handler(sk);
		}
		break;
	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
		rc = sdp_disconnected_handler(sk);
		break;
	case RDMA_CM_EVENT_DEVICE_REMOVAL:
		rc = -ENETRESET;
		break;

	case RDMA_CM_EVENT_ADDR_CHANGE:
		sdp_dbg(sk, "Got Address change event\n");
		rc = 0;
		break;
	default:
		printk(KERN_ERR "SDP: Unexpected CMA event: %d\n",
		       event->event);
		rc = -ECONNABORTED;
		break;
	}

	sdp_dbg(sk, "event: %s handled\n", rdma_cm_event_str(event->event));

	if (rc && sdp_sk(sk)->id == id) {
		child = sk;
		sdp_sk(sk)->id = NULL;
		id->qp = NULL;
		id->context = NULL;
		parent = sdp_sk(sk)->parent;
		sdp_reset_sk(sk, rc);
	}

	release_sock(sk);

	sdp_dbg(sk, "event: %s done. status %d\n",
			rdma_cm_event_str(event->event), rc);

	if (parent) {
		lock_sock(parent);
		if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */
			sdp_dbg(sk, "parent is going away.\n");
			child = NULL;
			goto done;
		}
		if (!list_empty(&sdp_sk(child)->backlog_queue))
			list_del_init(&sdp_sk(child)->backlog_queue);
		else
			child = NULL;
done:
		release_sock(parent);
		if (child)
			sdp_common_release(child);
	}
	return rc;
}
Ejemplo n.º 10
0
static int rds_iw_setup_qp(struct rds_connection *conn)
{
	struct rds_iw_connection *ic = conn->c_transport_data;
	struct ib_device *dev = ic->i_cm_id->device;
	struct ib_qp_init_attr attr;
	struct rds_iw_device *rds_iwdev;
	int ret;

	/*                                                             
                                                               
                                                                 
                         
  */
	rds_iwdev = ib_get_client_data(dev, &rds_iw_client);
	if (!rds_iwdev) {
		printk_ratelimited(KERN_NOTICE "RDS/IW: No client_data for device %s\n",
					dev->name);
		return -EOPNOTSUPP;
	}

	/*                                    */
	ic->i_pd = rds_iwdev->pd;
	ic->i_mr = rds_iwdev->mr;

	ret = rds_iw_init_qp_attrs(&attr, rds_iwdev,
			&ic->i_send_ring, rds_iw_send_cq_comp_handler,
			&ic->i_recv_ring, rds_iw_recv_cq_comp_handler,
			conn);
	if (ret < 0)
		goto out;

	ic->i_send_cq = attr.send_cq;
	ic->i_recv_cq = attr.recv_cq;

	/*
                                                                
                                                                   
  */
	ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
	if (ret) {
		rdsdebug("rdma_create_qp failed: %d\n", ret);
		goto out;
	}

	ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
					   ic->i_send_ring.w_nr *
						sizeof(struct rds_header),
					   &ic->i_send_hdrs_dma, GFP_KERNEL);
	if (!ic->i_send_hdrs) {
		ret = -ENOMEM;
		rdsdebug("ib_dma_alloc_coherent send failed\n");
		goto out;
	}

	ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
					   ic->i_recv_ring.w_nr *
						sizeof(struct rds_header),
					   &ic->i_recv_hdrs_dma, GFP_KERNEL);
	if (!ic->i_recv_hdrs) {
		ret = -ENOMEM;
		rdsdebug("ib_dma_alloc_coherent recv failed\n");
		goto out;
	}

	ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
				       &ic->i_ack_dma, GFP_KERNEL);
	if (!ic->i_ack) {
		ret = -ENOMEM;
		rdsdebug("ib_dma_alloc_coherent ack failed\n");
		goto out;
	}

	ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work));
	if (!ic->i_sends) {
		ret = -ENOMEM;
		rdsdebug("send allocation failed\n");
		goto out;
	}
	rds_iw_send_init_ring(ic);

	ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work));
	if (!ic->i_recvs) {
		ret = -ENOMEM;
		rdsdebug("recv allocation failed\n");
		goto out;
	}

	rds_iw_recv_init_ring(ic);
	rds_iw_recv_init_ack(ic);

	/*                                                          
                             */
	rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1);

	rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr,
		 ic->i_send_cq, ic->i_recv_cq);

out:
	return ret;
}
Ejemplo n.º 11
0
/*
 * This needs to be very careful to not leave IS_ERR pointers around for
 * cleanup to trip over.
 */
static int rds_ib_setup_qp(struct rds_connection *conn)
{
	struct rds_ib_connection *ic = conn->c_transport_data;
	struct ib_device *dev = ic->i_cm_id->device;
	struct ib_qp_init_attr attr;
	struct rds_ib_device *rds_ibdev;
	int ret;

	/* rds_ib_add_one creates a rds_ib_device object per IB device,
	 * and allocates a protection domain, memory range and FMR pool
	 * for each.  If that fails for any reason, it will not register
	 * the rds_ibdev at all.
	 */
	rds_ibdev = ib_get_client_data(dev, &rds_ib_client);
	if (rds_ibdev == NULL) {
		if (printk_ratelimit())
			printk(KERN_NOTICE "RDS/IB: No client_data for device %s\n",
					dev->name);
		return -EOPNOTSUPP;
	}

	if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1)
		rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1);
	if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1)
		rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1);

	/* Protection domain and memory range */
	ic->i_pd = rds_ibdev->pd;
	ic->i_mr = rds_ibdev->mr;

	ic->i_send_cq = ib_create_cq(dev, rds_ib_send_cq_comp_handler,
				     rds_ib_cq_event_handler, conn,
				     ic->i_send_ring.w_nr + 1, 0);
	if (IS_ERR(ic->i_send_cq)) {
		ret = PTR_ERR(ic->i_send_cq);
		ic->i_send_cq = NULL;
		rdsdebug("ib_create_cq send failed: %d\n", ret);
		goto out;
	}

	ic->i_recv_cq = ib_create_cq(dev, rds_ib_recv_cq_comp_handler,
				     rds_ib_cq_event_handler, conn,
				     ic->i_recv_ring.w_nr, 0);
	if (IS_ERR(ic->i_recv_cq)) {
		ret = PTR_ERR(ic->i_recv_cq);
		ic->i_recv_cq = NULL;
		rdsdebug("ib_create_cq recv failed: %d\n", ret);
		goto out;
	}

	ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
	if (ret) {
		rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
		goto out;
	}

	ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
	if (ret) {
		rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
		goto out;
	}

	/* XXX negotiate max send/recv with remote? */
	memset(&attr, 0, sizeof(attr));
	attr.event_handler = rds_ib_qp_event_handler;
	attr.qp_context = conn;
	/* + 1 to allow for the single ack message */
	attr.cap.max_send_wr = ic->i_send_ring.w_nr + 1;
	attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1;
	attr.cap.max_send_sge = rds_ibdev->max_sge;
	attr.cap.max_recv_sge = RDS_IB_RECV_SGE;
	attr.sq_sig_type = IB_SIGNAL_REQ_WR;
	attr.qp_type = IB_QPT_RC;
	attr.send_cq = ic->i_send_cq;
	attr.recv_cq = ic->i_recv_cq;

	/*
	 * XXX this can fail if max_*_wr is too large?  Are we supposed
	 * to back off until we get a value that the hardware can support?
	 */
	ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
	if (ret) {
		rdsdebug("rdma_create_qp failed: %d\n", ret);
		goto out;
	}

	ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
					   ic->i_send_ring.w_nr *
						sizeof(struct rds_header),
					   &ic->i_send_hdrs_dma, GFP_KERNEL);
	if (ic->i_send_hdrs == NULL) {
		ret = -ENOMEM;
		rdsdebug("ib_dma_alloc_coherent send failed\n");
		goto out;
	}

	ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
					   ic->i_recv_ring.w_nr *
						sizeof(struct rds_header),
					   &ic->i_recv_hdrs_dma, GFP_KERNEL);
	if (ic->i_recv_hdrs == NULL) {
		ret = -ENOMEM;
		rdsdebug("ib_dma_alloc_coherent recv failed\n");
		goto out;
	}

	ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
				       &ic->i_ack_dma, GFP_KERNEL);
	if (ic->i_ack == NULL) {
		ret = -ENOMEM;
		rdsdebug("ib_dma_alloc_coherent ack failed\n");
		goto out;
	}

	ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work));
	if (ic->i_sends == NULL) {
		ret = -ENOMEM;
		rdsdebug("send allocation failed\n");
		goto out;
	}
	memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work));

	ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work));
	if (ic->i_recvs == NULL) {
		ret = -ENOMEM;
		rdsdebug("recv allocation failed\n");
		goto out;
	}
	memset(ic->i_recvs, 0, ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work));

	rds_ib_recv_init_ack(ic);

	rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr,
		 ic->i_send_cq, ic->i_recv_cq);

out:
	return ret;
}
Ejemplo n.º 12
0
/*
 * This needs to be very careful to not leave IS_ERR pointers around for
 * cleanup to trip over.
 */
static int
rdsv3_ib_setup_qp(struct rdsv3_connection *conn)
{
	struct rdsv3_ib_connection *ic = conn->c_transport_data;
	struct ib_device *dev = ic->i_cm_id->device;
	struct ib_qp_init_attr attr;
	struct rdsv3_ib_device *rds_ibdev;
	ibt_send_wr_t *wrp;
	ibt_wr_ds_t *sgl;
	int ret, i;

	RDSV3_DPRINTF2("rdsv3_ib_setup_qp", "Enter conn: %p", conn);

	/*
	 * rdsv3_ib_add_one creates a rdsv3_ib_device object per IB device,
	 * and allocates a protection domain, memory range and FMR pool
	 * for each.  If that fails for any reason, it will not register
	 * the rds_ibdev at all.
	 */
	rds_ibdev = ib_get_client_data(dev, &rdsv3_ib_client);
	if (!rds_ibdev) {
		RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
		    "RDS/IB: No client_data for device %s", dev->name);
		return (-EOPNOTSUPP);
	}
	ic->rds_ibdev = rds_ibdev;

	if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1)
		rdsv3_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1);
	if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1)
		rdsv3_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1);

	/* Protection domain and memory range */
	ic->i_pd = rds_ibdev->pd;

	/*
	 * IB_CQ_VECTOR_LEAST_ATTACHED and/or the corresponding feature is
	 * not implmeneted in Hermon yet, but we can pass it to ib_create_cq()
	 * anyway.
	 */
	ic->i_cq = ib_create_cq(dev, rdsv3_ib_cq_comp_handler,
	    rdsv3_ib_cq_event_handler, conn,
	    ic->i_recv_ring.w_nr + ic->i_send_ring.w_nr + 1,
	    rdsv3_af_grp_get_sched(ic->rds_ibdev->aft_hcagp));
	if (IS_ERR(ic->i_cq)) {
		ret = PTR_ERR(ic->i_cq);
		ic->i_cq = NULL;
		RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
		    "ib_create_cq failed: %d", ret);
		goto out;
	}
	if (rdsv3_enable_snd_cq) {
		ic->i_snd_cq = ib_create_cq(dev, rdsv3_ib_snd_cq_comp_handler,
		    rdsv3_ib_cq_event_handler, conn, ic->i_send_ring.w_nr + 1,
		    rdsv3_af_grp_get_sched(ic->rds_ibdev->aft_hcagp));
		if (IS_ERR(ic->i_snd_cq)) {
			ret = PTR_ERR(ic->i_snd_cq);
			(void) ib_destroy_cq(ic->i_cq);
			ic->i_cq = NULL;
			ic->i_snd_cq = NULL;
			RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
			    "ib_create_cq send cq failed: %d", ret);
			goto out;
		}
	}

	/* XXX negotiate max send/recv with remote? */
	(void) memset(&attr, 0, sizeof (attr));
	attr.event_handler = rdsv3_ib_qp_event_handler;
	attr.qp_context = conn;
	/* + 1 to allow for the single ack message */
	attr.cap.max_send_wr = ic->i_send_ring.w_nr + 1;
	attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1;
	attr.cap.max_send_sge = rds_ibdev->max_sge;
	attr.cap.max_recv_sge = RDSV3_IB_RECV_SGE;
	attr.sq_sig_type = IB_SIGNAL_REQ_WR;
	attr.qp_type = IB_QPT_RC;
	if (rdsv3_enable_snd_cq) {
		attr.send_cq = ic->i_snd_cq;
	} else {
		attr.send_cq = ic->i_cq;
	}
	attr.recv_cq = ic->i_cq;

	/*
	 * XXX this can fail if max_*_wr is too large?  Are we supposed
	 * to back off until we get a value that the hardware can support?
	 */
	ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
	if (ret) {
		RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
		    "rdma_create_qp failed: %d", ret);
		goto out;
	}

	ret = rdsv3_ib_alloc_hdrs(dev, ic);
	if (ret != 0) {
		ret = -ENOMEM;
		RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
		    "rdsv3_ib_alloc_hdrs failed: %d", ret);
		goto out;
	}

	ic->i_sends = kmem_alloc(ic->i_send_ring.w_nr *
	    sizeof (struct rdsv3_ib_send_work), KM_NOSLEEP);
	if (ic->i_sends == NULL) {
		ret = -ENOMEM;
		RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
		    "send allocation failed: %d", ret);
		goto out;
	}
	(void) memset(ic->i_sends, 0, ic->i_send_ring.w_nr *
	    sizeof (struct rdsv3_ib_send_work));

	ic->i_send_wrs =
	    kmem_alloc(ic->i_send_ring.w_nr * (sizeof (ibt_send_wr_t) +
	    RDSV3_IB_MAX_SGE * sizeof (ibt_wr_ds_t)), KM_NOSLEEP);
	if (ic->i_send_wrs == NULL) {
		ret = -ENOMEM;
		RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
		    "Send WR allocation failed: %d", ret);
		goto out;
	}
	sgl = (ibt_wr_ds_t *)((uint8_t *)ic->i_send_wrs +
	    (ic->i_send_ring.w_nr * sizeof (ibt_send_wr_t)));
	for (i = 0; i < ic->i_send_ring.w_nr; i++) {
		wrp = &ic->i_send_wrs[i];
		wrp->wr_sgl = &sgl[i * RDSV3_IB_MAX_SGE];
	}

	ic->i_recvs = kmem_alloc(ic->i_recv_ring.w_nr *
	    sizeof (struct rdsv3_ib_recv_work), KM_NOSLEEP);
	if (ic->i_recvs == NULL) {
		ret = -ENOMEM;
		RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
		    "recv allocation failed: %d", ret);
		goto out;
	}
	(void) memset(ic->i_recvs, 0, ic->i_recv_ring.w_nr *
	    sizeof (struct rdsv3_ib_recv_work));

	ic->i_recv_wrs =
	    kmem_alloc(ic->i_recv_ring.w_nr * sizeof (ibt_recv_wr_t),
	    KM_NOSLEEP);
	if (ic->i_recv_wrs == NULL) {
		ret = -ENOMEM;
		RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
		    "Recv WR allocation failed: %d", ret);
		goto out;
	}

	rdsv3_ib_recv_init_ack(ic);

	RDSV3_DPRINTF2("rdsv3_ib_setup_qp", "conn %p pd %p mr %p cq %p",
	    conn, ic->i_pd, ic->i_mr, ic->i_cq);

out:
	return (ret);
}
Ejemplo n.º 13
0
/*
 * Connection established.
 * We get here for both outgoing and incoming connection.
 */
void
rdsv3_ib_cm_connect_complete(struct rdsv3_connection *conn,
    struct rdma_cm_event *event)
{
	const struct rdsv3_ib_connect_private *dp = NULL;
	struct rdsv3_ib_connection *ic = conn->c_transport_data;
	struct rdsv3_ib_device *rds_ibdev =
	    ib_get_client_data(ic->i_cm_id->device, &rdsv3_ib_client);
	struct ib_qp_attr qp_attr;
	int err;

	RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete",
	    "Enter conn: %p event: %p", conn, event);

	if (event->param.conn.private_data_len >= sizeof (*dp)) {
		dp = event->param.conn.private_data;

		/* make sure it isn't empty data */
		if (dp->dp_protocol_major) {
			rdsv3_ib_set_protocol(conn,
			    RDS_PROTOCOL(dp->dp_protocol_major,
			    dp->dp_protocol_minor));
			rdsv3_ib_set_flow_control(conn,
			    ntohl(dp->dp_credit));
		}
	}

	if (conn->c_version < RDS_PROTOCOL(3, 1)) {
		RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete",
		    "RDS/IB: Connection to %u.%u.%u.%u version %u.%u failed",
		    NIPQUAD(conn->c_faddr),
		    RDS_PROTOCOL_MAJOR(conn->c_version),
		    RDS_PROTOCOL_MINOR(conn->c_version));
		rdsv3_conn_destroy(conn);
		return;
	} else {
		RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete",
		    "RDS/IB: connected to %u.%u.%u.%u version %u.%u%s",
		    NIPQUAD(conn->c_faddr),
		    RDS_PROTOCOL_MAJOR(conn->c_version),
		    RDS_PROTOCOL_MINOR(conn->c_version),
		    ic->i_flowctl ? ", flow control" : "");
	}

	ASSERT(ic->i_soft_cq == NULL);
	ic->i_soft_cq = rdsv3_af_intr_thr_create(rdsv3_ib_tasklet_fn,
	    (void *)ic, SCQ_INTR_BIND_CPU, rds_ibdev->aft_hcagp,
	    ic->i_cq->ibt_cq);
	if (rdsv3_enable_snd_cq) {
		ic->i_snd_soft_cq = rdsv3_af_intr_thr_create(
		    rdsv3_ib_snd_tasklet_fn,
		    (void *)ic, SCQ_INTR_BIND_CPU, rds_ibdev->aft_hcagp,
		    ic->i_snd_cq->ibt_cq);
	}
	/* rdsv3_ib_refill_fn is expecting i_max_recv_alloc set */
	ic->i_max_recv_alloc = rdsv3_ib_sysctl_max_recv_allocation;
	ic->i_refill_rq = rdsv3_af_thr_create(rdsv3_ib_refill_fn, (void *)conn,
	    SCQ_WRK_BIND_CPU, rds_ibdev->aft_hcagp);
	rdsv3_af_grp_draw(rds_ibdev->aft_hcagp);

	(void) ib_req_notify_cq(ic->i_cq, IB_CQ_SOLICITED);
	if (rdsv3_enable_snd_cq) {
		(void) ib_req_notify_cq(ic->i_snd_cq, IB_CQ_NEXT_COMP);
	}

	/*
	 * Init rings and fill recv. this needs to wait until protocol
	 * negotiation
	 * is complete, since ring layout is different from 3.0 to 3.1.
	 */
	rdsv3_ib_send_init_ring(ic);
	rdsv3_ib_recv_init_ring(ic);
	/*
	 * Post receive buffers - as a side effect, this will update
	 * the posted credit count.
	 */
	(void) rdsv3_ib_recv_refill(conn, 1);

	/* Tune RNR behavior */
	rdsv3_ib_tune_rnr(ic, &qp_attr);

	qp_attr.qp_state = IB_QPS_RTS;
	err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE);
	if (err)
		RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete",
		    "ib_modify_qp(IB_QP_STATE, RTS): err=%d", err);

	/* update ib_device with this local ipaddr & conn */
	err = rdsv3_ib_update_ipaddr(rds_ibdev, conn->c_laddr);
	if (err)
		RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete",
		    "rdsv3_ib_update_ipaddr failed (%d)", err);
	rdsv3_ib_add_conn(rds_ibdev, conn);

	/*
	 * If the peer gave us the last packet it saw, process this as if
	 * we had received a regular ACK.
	 */
	if (dp && dp->dp_ack_seq)
		rdsv3_send_drop_acked(conn, ntohll(dp->dp_ack_seq), NULL);

	rdsv3_connect_complete(conn);

	RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete",
	    "Return conn: %p event: %p",
	    conn, event);
}