static void rds_iw_qp_event_handler(struct ib_event *event, void *data)
{
	struct rds_connection *conn = data;
	struct rds_iw_connection *ic = conn->c_transport_data;

	rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event);

	switch (event->event) {
	case IB_EVENT_COMM_EST:
		rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
		break;
	case IB_EVENT_QP_REQ_ERR:
	case IB_EVENT_QP_FATAL:
	default:
		rds_iw_conn_error(conn, "RDS/IW: Fatal QP Event %u - connection %pI4->%pI4...reconnecting\n",
			event->event, &conn->c_laddr,
			&conn->c_faddr);
		break;
	}
}
Exemple #2
0
/*
 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc
 * operations performed in the send path.  As the sender allocs and potentially
 * unallocs the next free entry in the ring it doesn't alter which is
 * the next to be freed, which is what this is concerned with.
 */
void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
{
	struct rds_connection *conn = context;
	struct rds_iw_connection *ic = conn->c_transport_data;
	struct ib_wc wc;
	struct rds_iw_send_work *send;
	u32 completed;
	u32 oldest;
	u32 i;
	int ret;

	rdsdebug("cq %p conn %p\n", cq, conn);
	rds_iw_stats_inc(s_iw_tx_cq_call);
	ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
	if (ret)
		rdsdebug("ib_req_notify_cq send failed: %d\n", ret);

	while (ib_poll_cq(cq, 1, &wc) > 0) {
		rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
			 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
			 be32_to_cpu(wc.ex.imm_data));
		rds_iw_stats_inc(s_iw_tx_cq_event);

		if (wc.status != IB_WC_SUCCESS) {
			printk(KERN_ERR "WC Error:  status = %d opcode = %d\n", wc.status, wc.opcode);
			break;
		}

		if (wc.opcode == IB_WC_LOCAL_INV && wc.wr_id == RDS_IW_LOCAL_INV_WR_ID) {
			ic->i_fastreg_posted = 0;
			continue;
		}

		if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == RDS_IW_FAST_REG_WR_ID) {
			ic->i_fastreg_posted = 1;
			continue;
		}

		if (wc.wr_id == RDS_IW_ACK_WR_ID) {
			if (ic->i_ack_queued + HZ/2 < jiffies)
				rds_iw_stats_inc(s_iw_tx_stalled);
			rds_iw_ack_send_complete(ic);
			continue;
		}

		oldest = rds_iw_ring_oldest(&ic->i_send_ring);

		completed = rds_iw_ring_completed(&ic->i_send_ring, wc.wr_id, oldest);

		for (i = 0; i < completed; i++) {
			send = &ic->i_sends[oldest];

			/* In the error case, wc.opcode sometimes contains garbage */
			switch (send->s_wr.opcode) {
			case IB_WR_SEND:
				if (send->s_rm)
					rds_iw_send_unmap_rm(ic, send, wc.status);
				break;
			case IB_WR_FAST_REG_MR:
			case IB_WR_RDMA_WRITE:
			case IB_WR_RDMA_READ:
			case IB_WR_RDMA_READ_WITH_INV:
				/* Nothing to be done - the SG list will be unmapped
				 * when the SEND completes. */
				break;
			default:
				if (printk_ratelimit())
					printk(KERN_NOTICE
						"RDS/IW: %s: unexpected opcode 0x%x in WR!\n",
						__func__, send->s_wr.opcode);
				break;
			}

			send->s_wr.opcode = 0xdead;
			send->s_wr.num_sge = 1;
			if (send->s_queued + HZ/2 < jiffies)
				rds_iw_stats_inc(s_iw_tx_stalled);

			/* If a RDMA operation produced an error, signal this right
			 * away. If we don't, the subsequent SEND that goes with this
			 * RDMA will be canceled with ERR_WFLUSH, and the application
			 * never learn that the RDMA failed. */
			if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) {
				struct rds_message *rm;

				rm = rds_send_get_message(conn, send->s_op);
				if (rm)
					rds_iw_send_rdma_complete(rm, wc.status);
			}

			oldest = (oldest + 1) % ic->i_send_ring.w_nr;
		}

		rds_iw_ring_free(&ic->i_send_ring, completed);

		if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags)
		 || test_bit(0, &conn->c_map_queued))
			queue_delayed_work(rds_wq, &conn->c_send_w, 0);

		/* We expect errors as the qp is drained during shutdown */
		if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
			rds_iw_conn_error(conn,
				"send completion on %u.%u.%u.%u "
				"had status %u, disconnecting and reconnecting\n",
				NIPQUAD(conn->c_faddr), wc.status);
		}
	}
}