Example #1
0
static void rds_ib_tasklet_fn_recv(unsigned long data)
{
	struct rds_ib_connection *ic = (struct rds_ib_connection *)data;
	struct rds_connection *conn = ic->conn;
	struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
	struct rds_ib_ack_state state;

	if (!rds_ibdev)
		rds_conn_drop(conn);

	rds_ib_stats_inc(s_ib_tasklet_call);

	memset(&state, 0, sizeof(state));
	poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state);
	ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
	poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state);

	if (state.ack_next_valid)
		rds_ib_set_ack(ic, state.ack_next, state.ack_required);
	if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
		rds_send_drop_acked(conn, state.ack_recv, NULL);
		ic->i_ack_recv = state.ack_recv;
	}

	if (rds_conn_up(conn))
		rds_ib_attempt_ack(ic);
}
Example #2
0
void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
{
	struct rds_ib_mr *ibmr = (void *)(unsigned long)wc->wr_id;
	struct rds_ib_frmr *frmr = &ibmr->u.frmr;

	if (wc->status != IB_WC_SUCCESS) {
		frmr->fr_state = FRMR_IS_STALE;
		if (rds_conn_up(ic->conn))
			rds_ib_conn_error(ic->conn,
					  "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n",
					  &ic->conn->c_laddr,
					  &ic->conn->c_faddr,
					  wc->status,
					  ib_wc_status_msg(wc->status),
					  wc->vendor_err);
	}

	if (frmr->fr_inv) {
		frmr->fr_state = FRMR_IS_FREE;
		frmr->fr_inv = false;
		atomic_inc(&ic->i_fastreg_wrs);
	} else {
		atomic_inc(&ic->i_fastunreg_wrs);
	}
}
Example #3
0
int rds_tcp_conn_connect(struct rds_connection *conn)
{
	struct socket *sock = NULL;
	struct sockaddr_in src, dest;
	int ret;
	struct rds_tcp_connection *tc = conn->c_transport_data;

	mutex_lock(&tc->t_conn_lock);

	if (rds_conn_up(conn)) {
		mutex_unlock(&tc->t_conn_lock);
		return 0;
	}
	ret = sock_create_kern(rds_conn_net(conn), PF_INET,
			       SOCK_STREAM, IPPROTO_TCP, &sock);
	if (ret < 0)
		goto out;

	rds_tcp_tune(sock);

	src.sin_family = AF_INET;
	src.sin_addr.s_addr = (__force u32)conn->c_laddr;
	src.sin_port = (__force u16)htons(0);

	ret = sock->ops->bind(sock, (struct sockaddr *)&src, sizeof(src));
	if (ret) {
		rdsdebug("bind failed with %d at address %pI4\n",
			 ret, &conn->c_laddr);
		goto out;
	}

	dest.sin_family = AF_INET;
	dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
	dest.sin_port = (__force u16)htons(RDS_TCP_PORT);

	/*
	 * once we call connect() we can start getting callbacks and they
	 * own the socket
	 */
	rds_tcp_set_callbacks(sock, conn);
	ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest),
				 O_NONBLOCK);

	rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret);
	if (ret == -EINPROGRESS)
		ret = 0;
	if (ret == 0) {
		rds_tcp_keepalive(sock);
		sock = NULL;
	} else {
		rds_tcp_restore_callbacks(sock, conn->c_transport_data);
	}

out:
	mutex_unlock(&tc->t_conn_lock);
	if (sock)
		sock_release(sock);
	return ret;
}
Example #4
0
static void rds_ib_tasklet_fn_send(unsigned long data)
{
	struct rds_ib_connection *ic = (struct rds_ib_connection *)data;
	struct rds_connection *conn = ic->conn;

	rds_ib_stats_inc(s_ib_tasklet_call);

	poll_scq(ic, ic->i_send_cq, ic->i_send_wc);
	ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
	poll_scq(ic, ic->i_send_cq, ic->i_send_wc);

	if (rds_conn_up(conn) &&
	    (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
	    test_bit(0, &conn->c_map_queued)))
		rds_send_xmit(ic->conn);
}
Example #5
0
static void rds_ib_tasklet_fn_send(unsigned long data)
{
	struct rds_ib_connection *ic = (struct rds_ib_connection *)data;
	struct rds_connection *conn = ic->conn;

	rds_ib_stats_inc(s_ib_tasklet_call);

	/* if cq has been already reaped, ignore incoming cq event */
	if (atomic_read(&ic->i_cq_quiesce))
		return;

	poll_scq(ic, ic->i_send_cq, ic->i_send_wc);
	ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
	poll_scq(ic, ic->i_send_cq, ic->i_send_wc);

	if (rds_conn_up(conn) &&
	    (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
	    test_bit(0, &conn->c_map_queued)))
		rds_send_xmit(&ic->conn->c_path[0]);
}
Example #6
0
/*
 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc
 * operations performed in the send path.  As the sender allocs and potentially
 * unallocs the next free entry in the ring it doesn't alter which is
 * the next to be freed, which is what this is concerned with.
 */
void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
{
	struct rds_connection *conn = context;
	struct rds_ib_connection *ic = conn->c_transport_data;
	struct ib_wc wc;
	struct rds_ib_send_work *send;
	u32 completed;
	u32 oldest;
	u32 i = 0;
	int ret;

	rdsdebug("cq %p conn %p\n", cq, conn);
	rds_ib_stats_inc(s_ib_tx_cq_call);
	ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
	if (ret)
		rdsdebug("ib_req_notify_cq send failed: %d\n", ret);

	while (ib_poll_cq(cq, 1, &wc) > 0) {
		rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
			 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
			 be32_to_cpu(wc.ex.imm_data));
		rds_ib_stats_inc(s_ib_tx_cq_event);

		if (wc.wr_id == RDS_IB_ACK_WR_ID) {
			if (ic->i_ack_queued + HZ/2 < jiffies)
				rds_ib_stats_inc(s_ib_tx_stalled);
			rds_ib_ack_send_complete(ic);
			continue;
		}

		oldest = rds_ib_ring_oldest(&ic->i_send_ring);

		completed = rds_ib_ring_completed(&ic->i_send_ring, wc.wr_id, oldest);

		for (i = 0; i < completed; i++) {
			send = &ic->i_sends[oldest];

			/* In the error case, wc.opcode sometimes contains garbage */
			switch (send->s_wr.opcode) {
			case IB_WR_SEND:
				if (send->s_rm)
					rds_ib_send_unmap_rm(ic, send, wc.status);
				break;
			case IB_WR_RDMA_WRITE:
			case IB_WR_RDMA_READ:
				/* Nothing to be done - the SG list will be unmapped
				 * when the SEND completes. */
				break;
			default:
				if (printk_ratelimit())
					printk(KERN_NOTICE
						"RDS/IB: %s: unexpected opcode 0x%x in WR!\n",
						__func__, send->s_wr.opcode);
				break;
			}

			send->s_wr.opcode = 0xdead;
			send->s_wr.num_sge = 1;
			if (send->s_queued + HZ/2 < jiffies)
				rds_ib_stats_inc(s_ib_tx_stalled);

			/* If a RDMA operation produced an error, signal this right
			 * away. If we don't, the subsequent SEND that goes with this
			 * RDMA will be canceled with ERR_WFLUSH, and the application
			 * never learn that the RDMA failed. */
			if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) {
				struct rds_message *rm;

				rm = rds_send_get_message(conn, send->s_op);
				if (rm)
					rds_ib_send_rdma_complete(rm, wc.status);
			}

			oldest = (oldest + 1) % ic->i_send_ring.w_nr;
		}

		rds_ib_ring_free(&ic->i_send_ring, completed);

		if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags)
		 || test_bit(0, &conn->c_map_queued))
			queue_delayed_work(rds_wq, &conn->c_send_w, 0);

		/* We expect errors as the qp is drained during shutdown */
		if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
			rds_ib_conn_error(conn,
				"send completion on %pI4 "
				"had status %u, disconnecting and reconnecting\n",
				&conn->c_faddr, wc.status);
		}
	}
}