Example #1
0
void
sdp_post_keepalive(struct sdp_sock *ssk)
{
	int rc;
	struct ib_send_wr wr, *bad_wr;

	sdp_dbg(ssk->socket, "%s\n", __func__);

	memset(&wr, 0, sizeof(wr));

	wr.next    = NULL;
	wr.wr_id   = 0;
	wr.sg_list = NULL;
	wr.num_sge = 0;
	wr.opcode  = IB_WR_RDMA_WRITE;

	rc = ib_post_send(ssk->qp, &wr, &bad_wr);
	if (rc) {
		sdp_dbg(ssk->socket,
			"ib_post_keepalive failed with status %d.\n", rc);
		sdp_notify(ssk, ECONNRESET);
	}

	sdp_cnt(sdp_keepalive_probes_sent);
}
Example #2
0
/* Like tcp_fin - called when SDP_MID_DISCONNECT is received */
static void
sdp_handle_disconn(struct sdp_sock *ssk)
{

	sdp_dbg(ssk->socket, "%s\n", __func__);

	SDP_WLOCK_ASSERT(ssk);
	if (TCPS_HAVERCVDFIN(ssk->state) == 0)
		socantrcvmore(ssk->socket);

	switch (ssk->state) {
	case TCPS_SYN_RECEIVED:
	case TCPS_ESTABLISHED:
		ssk->state = TCPS_CLOSE_WAIT;
		break;

	case TCPS_FIN_WAIT_1:
		/* Received a reply FIN - start Infiniband tear down */
		sdp_dbg(ssk->socket,
		    "%s: Starting Infiniband tear down sending DREQ\n",
		    __func__);

		sdp_cancel_dreq_wait_timeout(ssk);
		ssk->qp_active = 0;
		if (ssk->id) {
			struct rdma_cm_id *id;

			id = ssk->id;
			SDP_WUNLOCK(ssk);
			rdma_disconnect(id);
			SDP_WLOCK(ssk);
		} else {
			sdp_warn(ssk->socket,
			    "%s: ssk->id is NULL\n", __func__);
			return;
		}
		break;
	case TCPS_TIME_WAIT:
		/* This is a mutual close situation and we've got the DREQ from
		   the peer before the SDP_MID_DISCONNECT */
		break;
	case TCPS_CLOSED:
		/* FIN arrived after IB teardown started - do nothing */
		sdp_dbg(ssk->socket, "%s: fin in state %s\n",
		    __func__, sdp_state_str(ssk->state));
		return;
	default:
		sdp_warn(ssk->socket,
		    "%s: FIN in unexpected state. state=%d\n",
		    __func__, ssk->state);
		break;
	}
}
Example #3
0
void
sdp_tx_ring_destroy(struct sdp_sock *ssk)
{

	sdp_dbg(ssk->socket, "tx ring destroy\n");
	SDP_WLOCK(ssk);
	callout_stop(&ssk->tx_ring.timer);
	callout_stop(&ssk->nagle_timer);
	SDP_WUNLOCK(ssk);
	callout_drain(&ssk->tx_ring.timer);
	callout_drain(&ssk->nagle_timer);

	if (ssk->tx_ring.buffer) {
		sdp_tx_ring_purge(ssk);

		kfree(ssk->tx_ring.buffer);
		ssk->tx_ring.buffer = NULL;
	}

	if (ssk->tx_ring.cq) {
		if (ib_destroy_cq(ssk->tx_ring.cq)) {
			sdp_warn(ssk->socket, "destroy cq(%p) failed\n",
					ssk->tx_ring.cq);
		} else {
			ssk->tx_ring.cq = NULL;
		}
	}

	WARN_ON(ring_head(ssk->tx_ring) != ring_tail(ssk->tx_ring));
}
Example #4
0
static int
sdp_process_tx_cq(struct sdp_sock *ssk)
{
	struct ib_wc ibwc[SDP_NUM_WC];
	int n, i;
	int wc_processed = 0;

	SDP_WLOCK_ASSERT(ssk);

	if (!ssk->tx_ring.cq) {
		sdp_dbg(ssk->socket, "tx irq on destroyed tx_cq\n");
		return 0;
	}

	do {
		n = ib_poll_cq(ssk->tx_ring.cq, SDP_NUM_WC, ibwc);
		for (i = 0; i < n; ++i) {
			sdp_process_tx_wc(ssk, ibwc + i);
			wc_processed++;
		}
	} while (n == SDP_NUM_WC);

	if (wc_processed) {
		sdp_post_sends(ssk, M_DONTWAIT);
		sdp_prf1(sk, NULL, "Waking sendmsg. inflight=%d", 
				(u32) tx_ring_posted(ssk));
		sowwakeup(ssk->socket);
	}

	return wc_processed;
}
Example #5
0
int
sdp_process_rx(struct sdp_sock *ssk)
{
	int wc_processed = 0;
	int credits_before;

	if (!rx_ring_trylock(&ssk->rx_ring)) {
		sdp_dbg(ssk->socket, "ring destroyed. not polling it\n");
		return 0;
	}

	credits_before = tx_credits(ssk);

	wc_processed = sdp_poll_rx_cq(ssk);
	sdp_prf(ssk->socket, NULL, "processed %d", wc_processed);

	if (wc_processed) {
		sdp_prf(ssk->socket, NULL, "credits:  %d -> %d",
				credits_before, tx_credits(ssk));
		queue_work(rx_comp_wq, &ssk->rx_comp_work);
	}
	sdp_arm_rx_cq(ssk);

	rx_ring_unlock(&ssk->rx_ring);

	return (wc_processed);
}
Example #6
0
static int
sdp_handle_send_comp(struct sdp_sock *ssk, struct ib_wc *wc)
{
	struct mbuf *mb = NULL;
	struct sdp_bsdh *h;

	if (unlikely(wc->status)) {
		if (wc->status != IB_WC_WR_FLUSH_ERR) {
			sdp_prf(ssk->socket, mb, "Send completion with error. "
				"Status %d", wc->status);
			sdp_dbg_data(ssk->socket, "Send completion with error. "
				"Status %d\n", wc->status);
			sdp_notify(ssk, ECONNRESET);
		}
	}

	mb = sdp_send_completion(ssk, wc->wr_id);
	if (unlikely(!mb))
		return -1;

	h = mtod(mb, struct sdp_bsdh *);
	sdp_prf1(ssk->socket, mb, "tx completion. mseq:%d", ntohl(h->mseq));
	sdp_dbg(ssk->socket, "tx completion. %p %d mseq:%d",
	    mb, mb->m_pkthdr.len, ntohl(h->mseq));
	m_freem(mb);

	return 0;
}
static void sdp_qp_event_handler(struct ib_event *event, void *data)
{
	if (event->event == IB_EVENT_PATH_MIG) {
		sdp_dbg(NULL, "Path migration event\n");
		return;
	}
	sdp_warn(NULL, "unexpected invocation: event: %d, data=%p\n",
			event->event, data);
}
Example #8
0
static inline void
sdp_process_tx_wc(struct sdp_sock *ssk, struct ib_wc *wc)
{

	if (likely(wc->wr_id & SDP_OP_SEND)) {
		sdp_handle_send_comp(ssk, wc);
		return;
	}

#ifdef SDP_ZCOPY
	if (wc->wr_id & SDP_OP_RDMA) {
		/* TODO: handle failed RDMA read cqe */

		sdp_dbg_data(ssk->socket,
	 	    "TX comp: RDMA read. status: %d\n", wc->status);
		sdp_prf1(sk, NULL, "TX comp: RDMA read");

		if (!ssk->tx_ring.rdma_inflight) {
			sdp_warn(ssk->socket, "ERROR: unexpected RDMA read\n");
			return;
		}

		if (!ssk->tx_ring.rdma_inflight->busy) {
			sdp_warn(ssk->socket,
			    "ERROR: too many RDMA read completions\n");
			return;
		}

		/* Only last RDMA read WR is signalled. Order is guaranteed -
		 * therefore if Last RDMA read WR is completed - all other
		 * have, too */
		ssk->tx_ring.rdma_inflight->busy = 0;
		sowwakeup(ssk->socket);
		sdp_dbg_data(ssk->socket, "woke up sleepers\n");
		return;
	}
#endif

	/* Keepalive probe sent cleanup */
	sdp_cnt(sdp_keepalive_probes_sent);

	if (likely(!wc->status))
		return;

	sdp_dbg(ssk->socket, " %s consumes KEEPALIVE status %d\n",
			__func__, wc->status);

	if (wc->status == IB_WC_WR_FLUSH_ERR)
		return;

	sdp_notify(ssk, ECONNRESET);
}
Example #9
0
/* called only from irq */
static struct mbuf *
sdp_process_rx_wc(struct sdp_sock *ssk, struct ib_wc *wc)
{
	struct mbuf *mb;
	struct sdp_bsdh *h;
	struct socket *sk = ssk->socket;
	int mseq;

	mb = sdp_recv_completion(ssk, wc->wr_id);
	if (unlikely(!mb))
		return NULL;

	if (unlikely(wc->status)) {
		if (ssk->qp_active && sk) {
			sdp_dbg(sk, "Recv completion with error. "
					"Status %d, vendor: %d\n",
				wc->status, wc->vendor_err);
			sdp_abort(sk);
			ssk->qp_active = 0;
		}
		m_freem(mb);
		return NULL;
	}

	sdp_dbg_data(sk, "Recv completion. ID %d Length %d\n",
			(int)wc->wr_id, wc->byte_len);
	if (unlikely(wc->byte_len < sizeof(struct sdp_bsdh))) {
		sdp_warn(sk, "SDP BUG! byte_len %d < %zd\n",
				wc->byte_len, sizeof(struct sdp_bsdh));
		m_freem(mb);
		return NULL;
	}
	/* Use m_adj to trim the tail of data we didn't use. */
	m_adj(mb, -(mb->m_pkthdr.len - wc->byte_len));
	h = mtod(mb, struct sdp_bsdh *);

	SDP_DUMP_PACKET(ssk->socket, "RX", mb, h);

	ssk->rx_packets++;
	ssk->rx_bytes += mb->m_pkthdr.len;

	mseq = ntohl(h->mseq);
	atomic_set(&ssk->mseq_ack, mseq);
	if (mseq != (int)wc->wr_id)
		sdp_warn(sk, "SDP BUG! mseq %d != wrid %d\n",
				mseq, (int)wc->wr_id);

	return mb;
}
Example #10
0
static void
sdp_rx_irq(struct ib_cq *cq, void *cq_context)
{
	struct socket *sk = cq_context;
	struct sdp_sock *ssk = sdp_sk(sk);

	if (cq != ssk->rx_ring.cq) {
		sdp_dbg(sk, "cq = %p, ssk->cq = %p\n", cq, ssk->rx_ring.cq);
		return;
	}

	SDPSTATS_COUNTER_INC(rx_int_count);

	sdp_prf(sk, NULL, "rx irq");

	sdp_process_rx(ssk);
}
Example #11
0
void
sdp_do_posts(struct sdp_sock *ssk)
{
	struct socket *sk = ssk->socket;
	int xmit_poll_force;
	struct mbuf *mb;

	SDP_WLOCK_ASSERT(ssk);
	if (!ssk->qp_active) {
		sdp_dbg(sk, "QP is deactivated\n");
		return;
	}

	while ((mb = ssk->rx_ctl_q)) {
		ssk->rx_ctl_q = mb->m_nextpkt;
		mb->m_nextpkt = NULL;
		sdp_process_rx_ctl_mb(ssk, mb);
	}

	if (ssk->state == TCPS_TIME_WAIT)
		return;

	if (!ssk->rx_ring.cq || !ssk->tx_ring.cq)
		return;

	sdp_post_recvs(ssk);

	if (tx_ring_posted(ssk))
		sdp_xmit_poll(ssk, 1);

	sdp_post_sends(ssk, M_NOWAIT);

	xmit_poll_force = tx_credits(ssk) < SDP_MIN_TX_CREDITS;

	if (credit_update_needed(ssk) || xmit_poll_force) {
		/* if has pending tx because run out of tx_credits - xmit it */
		sdp_prf(sk, NULL, "Processing to free pending sends");
		sdp_xmit_poll(ssk,  xmit_poll_force);
		sdp_prf(sk, NULL, "Sending credit update");
		sdp_post_sends(ssk, M_NOWAIT);
	}

}
Example #12
0
int
sdp_tx_ring_create(struct sdp_sock *ssk, struct ib_device *device)
{
	struct ib_cq *tx_cq;
	int rc = 0;

	sdp_dbg(ssk->socket, "tx ring create\n");
	callout_init_rw(&ssk->tx_ring.timer, &ssk->lock, 0);
	callout_init_rw(&ssk->nagle_timer, &ssk->lock, 0);
	atomic_set(&ssk->tx_ring.head, 1);
	atomic_set(&ssk->tx_ring.tail, 1);

	ssk->tx_ring.buffer = kzalloc(
			sizeof *ssk->tx_ring.buffer * SDP_TX_SIZE, GFP_KERNEL);
	if (!ssk->tx_ring.buffer) {
		rc = -ENOMEM;
		sdp_warn(ssk->socket, "Can't allocate TX Ring size %zd.\n",
			 sizeof(*ssk->tx_ring.buffer) * SDP_TX_SIZE);

		goto out;
	}

	tx_cq = ib_create_cq(device, sdp_tx_irq, sdp_tx_cq_event_handler,
			  ssk, SDP_TX_SIZE, IB_CQ_VECTOR_LEAST_ATTACHED);

	if (IS_ERR(tx_cq)) {
		rc = PTR_ERR(tx_cq);
		sdp_warn(ssk->socket, "Unable to allocate TX CQ: %d.\n", rc);
		goto err_cq;
	}
	ssk->tx_ring.cq = tx_cq;
	ssk->tx_ring.poll_cnt = 0;
	sdp_arm_tx_cq(ssk);

	return 0;

err_cq:
	kfree(ssk->tx_ring.buffer);
	ssk->tx_ring.buffer = NULL;
out:
	return rc;
}
Example #13
0
int
sdp_rx_ring_create(struct sdp_sock *ssk, struct ib_device *device)
{
	struct ib_cq *rx_cq;
	int rc = 0;


	sdp_dbg(ssk->socket, "rx ring created");
	INIT_WORK(&ssk->rx_comp_work, sdp_rx_comp_work);
	atomic_set(&ssk->rx_ring.head, 1);
	atomic_set(&ssk->rx_ring.tail, 1);

	ssk->rx_ring.buffer = kmalloc(
			sizeof *ssk->rx_ring.buffer * SDP_RX_SIZE, GFP_KERNEL);
	if (!ssk->rx_ring.buffer) {
		sdp_warn(ssk->socket,
			"Unable to allocate RX Ring size %zd.\n",
			 sizeof(*ssk->rx_ring.buffer) * SDP_RX_SIZE);

		return -ENOMEM;
	}

	rx_cq = ib_create_cq(device, sdp_rx_irq, sdp_rx_cq_event_handler,
			  ssk->socket, SDP_RX_SIZE, IB_CQ_VECTOR_LEAST_ATTACHED);

	if (IS_ERR(rx_cq)) {
		rc = PTR_ERR(rx_cq);
		sdp_warn(ssk->socket, "Unable to allocate RX CQ: %d.\n", rc);
		goto err_cq;
	}

	sdp_sk(ssk->socket)->rx_ring.cq = rx_cq;
	sdp_arm_rx_cq(ssk);

	return 0;

err_cq:
	kfree(ssk->rx_ring.buffer);
	ssk->rx_ring.buffer = NULL;
	return rc;
}
Example #14
0
static int sdp_process_tx_cq(struct sdp_sock *ssk)
{
	struct ib_wc ibwc[SDP_NUM_WC];
	int n, i;
	int wc_processed = 0;

	if (!ssk->tx_ring.cq) {
		sdp_dbg(sk_ssk(ssk), "tx irq on destroyed tx_cq\n");
		return 0;
	}

	do {
		n = ib_poll_cq(ssk->tx_ring.cq, SDP_NUM_WC, ibwc);
		for (i = 0; i < n; ++i) {
			sdp_process_tx_wc(ssk, ibwc + i);
			wc_processed++;
		}
	} while (n == SDP_NUM_WC);

	if (wc_processed) {
		struct sock *sk = sk_ssk(ssk);
		sdp_prf1(sk, NULL, "Waking sendmsg. inflight=%d",
				(u32) tx_ring_posted(ssk));

		sk_mem_reclaim(sk);

		sk_stream_write_space(sk_ssk(ssk));
		if (sk->sk_write_pending &&
				test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
				tx_ring_posted(ssk)) {
			/* a write is pending and still no room in tx queue,
			 * arm tx cq
			 */
			sdp_prf(sk_ssk(ssk), NULL, "pending tx - rearming");
			sdp_arm_tx_cq(sk);
		}

	}

	return wc_processed;
}
Example #15
0
void sdp_post_send(struct sdp_sock *ssk, struct sk_buff *skb)
{
	struct sdp_buf *tx_req;
	struct sdp_bsdh *h = (struct sdp_bsdh *)skb_transport_header(skb);
	unsigned long mseq = ring_head(ssk->tx_ring);
	int i, rc, frags;
	u64 addr;
	struct ib_device *dev;
	struct ib_send_wr *bad_wr;

	struct ib_sge ibsge[SDP_MAX_SEND_SGES];
	struct ib_sge *sge = ibsge;
	struct ib_send_wr tx_wr = { NULL };
	u32 send_flags = IB_SEND_SIGNALED;

	SDPSTATS_COUNTER_MID_INC(post_send, h->mid);
	SDPSTATS_HIST(send_size, skb->len);

	if (!ssk->qp_active)
		goto err;

	ssk->tx_packets++;

	if (h->mid != SDP_MID_SRCAVAIL &&
			h->mid != SDP_MID_DATA &&
			h->mid != SDP_MID_SRCAVAIL_CANCEL) {
		struct sock *sk = sk_ssk(ssk);

		sk->sk_wmem_queued += skb->truesize;
		sk_mem_charge(sk, skb->truesize);
	}

	if (unlikely(h->mid == SDP_MID_SRCAVAIL)) {
		struct tx_srcavail_state *tx_sa = TX_SRCAVAIL_STATE(skb);
		if (ssk->tx_sa != tx_sa) {
			sdp_dbg_data(sk_ssk(ssk), "SrcAvail cancelled "
					"before being sent!\n");
			SDP_WARN_ON(1);
			sk_wmem_free_skb(sk_ssk(ssk), skb);
			return;
		}
		TX_SRCAVAIL_STATE(skb)->mseq = mseq;
	}

	if (unlikely(SDP_SKB_CB(skb)->flags & TCPHDR_URG))
		h->flags = SDP_OOB_PRES | SDP_OOB_PEND;
	else
		h->flags = 0;

	h->bufs = htons(rx_ring_posted(ssk));
	h->len = htonl(skb->len);
	h->mseq = htonl(mseq);
	h->mseq_ack = htonl(mseq_ack(ssk));

	sdp_prf(sk_ssk(ssk), skb, "TX: %s bufs: %d mseq:%ld ack:%d c: %d",
			mid2str(h->mid), rx_ring_posted(ssk), mseq,
			ntohl(h->mseq_ack), tx_credits(ssk));

	SDP_DUMP_PACKET(sk_ssk(ssk), "TX", skb, h);

	tx_req = &ssk->tx_ring.buffer[mseq & (SDP_TX_SIZE - 1)];
	tx_req->skb = skb;
	dev = ssk->ib_device;

	if (skb->len <= ssk->inline_thresh && !skb_shinfo(skb)->nr_frags) {
		SDPSTATS_COUNTER_INC(inline_sends);
		sge->addr = (u64) skb->data;
		sge->length = skb->len;
		sge->lkey = 0;
		frags = 0;
		tx_req->mapping[0] = 0; /* Nothing to be cleaned up by sdp_cleanup_sdp_buf() */
		send_flags |= IB_SEND_INLINE;
	} else {
		addr = ib_dma_map_single(dev, skb->data, skb->len - skb->data_len,
				DMA_TO_DEVICE);
		tx_req->mapping[0] = addr;

		/* TODO: proper error handling */
		BUG_ON(ib_dma_mapping_error(dev, addr));

		sge->addr = addr;
		sge->length = skb->len - skb->data_len;
		sge->lkey = ssk->sdp_dev->mr->lkey;
		frags = skb_shinfo(skb)->nr_frags;
		for (i = 0; i < frags; ++i) {
			++sge;
			addr = ib_dma_map_page(dev,
					skb_shinfo(skb)->frags[i].page.p,
					skb_shinfo(skb)->frags[i].page_offset,
					skb_shinfo(skb)->frags[i].size,
					DMA_TO_DEVICE);
			BUG_ON(ib_dma_mapping_error(dev, addr));
			tx_req->mapping[i + 1] = addr;
			sge->addr = addr;
			sge->length = skb_shinfo(skb)->frags[i].size;
			sge->lkey = ssk->sdp_dev->mr->lkey;
		}
	}

	tx_wr.next = NULL;
	tx_wr.wr_id = ring_head(ssk->tx_ring) | SDP_OP_SEND;
	tx_wr.sg_list = ibsge;
	tx_wr.num_sge = frags + 1;
	tx_wr.opcode = IB_WR_SEND;
	tx_wr.send_flags = send_flags;
	if (unlikely(SDP_SKB_CB(skb)->flags & TCPHDR_URG))
		tx_wr.send_flags |= IB_SEND_SOLICITED;

	rc = ib_post_send(ssk->qp, &tx_wr, &bad_wr);
	if (unlikely(rc)) {
		sdp_dbg(sk_ssk(ssk),
				"ib_post_send failed with status %d.\n", rc);

		sdp_cleanup_sdp_buf(ssk, tx_req, skb->len - skb->data_len, DMA_TO_DEVICE);

		sdp_set_error(sk_ssk(ssk), -ECONNRESET);

		goto err;
	}

	atomic_inc(&ssk->tx_ring.head);
	atomic_dec(&ssk->tx_ring.credits);
	atomic_set(&ssk->remote_credits, rx_ring_posted(ssk));

	return;

err:
	sk_wmem_free_skb(sk_ssk(ssk), skb);
}
Example #16
0
int sdp_post_sends(struct sdp_sock *ssk, gfp_t gfp)
{
	/* TODO: nonagle? */
	struct sk_buff *skb;
	int post_count = 0;
	struct sock *sk = sk_ssk(ssk);

	if (unlikely(!ssk->id)) {
		if (sk->sk_send_head) {
			sdp_dbg(sk, "Send on socket without cmid ECONNRESET\n");
			/* TODO: flush send queue? */
			sdp_reset(sk);
		}
		return -ECONNRESET;
	}
again:
	if (sdp_tx_ring_slots_left(ssk) < SDP_TX_SIZE / 2)
		sdp_xmit_poll(ssk, 1);

	/* Run out of credits, check if got a credit update */
	if (unlikely(tx_credits(ssk) <= SDP_MIN_TX_CREDITS)) {
		sdp_poll_rx_cq(ssk);

		if (unlikely(sdp_should_rearm(sk) || !posts_handler(ssk)))
			sdp_arm_rx_cq(sk);
	}

	if (unlikely((ssk->sa_post_rdma_rd_compl || ssk->sa_post_sendsm) &&
			tx_credits(ssk) < SDP_MIN_TX_CREDITS)) {
		sdp_dbg_data(sk, "Run out of credits, can't abort SrcAvail. "
			"RdmaRdCompl: %d SendSm: %d\n",
			ssk->sa_post_rdma_rd_compl, ssk->sa_post_sendsm);
	}

	if (ssk->sa_post_rdma_rd_compl && tx_credits(ssk) >= SDP_MIN_TX_CREDITS) {
		int unreported = ssk->sa_post_rdma_rd_compl;

		skb = sdp_alloc_skb_rdmardcompl(sk, unreported, gfp);
		if (!skb)
			goto no_mem;
		sdp_post_send(ssk, skb);
		post_count++;
		ssk->sa_post_rdma_rd_compl = 0;
	}

	if (ssk->sa_post_sendsm && tx_credits(ssk) >= SDP_MIN_TX_CREDITS) {
		skb = sdp_alloc_skb_sendsm(sk, gfp);
		if (unlikely(!skb))
			goto no_mem;
		sdp_post_send(ssk, skb);
		ssk->sa_post_sendsm = 0;
		post_count++;
	}

	if (ssk->recv_request &&
	    ring_tail(ssk->rx_ring) >= SDP_MIN_TX_CREDITS &&
	    tx_credits(ssk) >= SDP_MIN_TX_CREDITS &&
	    sdp_tx_ring_slots_left(ssk)) {
		skb = sdp_alloc_skb_chrcvbuf_ack(sk,
				ssk->recv_frags * PAGE_SIZE, gfp);
		if (!skb)
			goto no_mem;
		ssk->recv_request = 0;
		sdp_post_send(ssk, skb);
		post_count++;
	}

	if (tx_credits(ssk) <= SDP_MIN_TX_CREDITS &&
	       sdp_tx_ring_slots_left(ssk) &&
	       sk->sk_send_head &&
		sdp_nagle_off(ssk, sk->sk_send_head)) {
		SDPSTATS_COUNTER_INC(send_miss_no_credits);
	}

	while (tx_credits(ssk) > SDP_MIN_TX_CREDITS &&
	       sdp_tx_ring_slots_left(ssk) &&
	       (skb = sk->sk_send_head) &&
		sdp_nagle_off(ssk, skb)) {
		update_send_head(sk, skb);
		__skb_dequeue(&sk->sk_write_queue);

		sdp_post_send(ssk, skb);

		post_count++;
	}

	if (credit_update_needed(ssk) &&
	    likely((1 << sk->sk_state) &
		    (TCPF_ESTABLISHED | TCPF_FIN_WAIT1))) {

		skb = sdp_alloc_skb_data(sk, 0, gfp);
		if (!skb)
			goto no_mem;

		sk->sk_wmem_queued += skb->truesize;
		sk_mem_charge(sk, skb->truesize);

		sdp_post_send(ssk, skb);
		SDPSTATS_COUNTER_INC(post_send_credits);
		post_count++;
	}

	/* send DisConn if needed
	 * Do not send DisConn if there is only 1 credit. Compliance with CA4-82
	 * If one credit is available, an implementation shall only send SDP
	 * messages that provide additional credits and also do not contain ULP
	 * payload. */
	if (unlikely(ssk->sdp_disconnect) &&
			!sk->sk_send_head &&
			tx_credits(ssk) >= SDP_MIN_TX_CREDITS) {
		skb = sdp_alloc_skb_disconnect(sk, gfp);
		if (!skb)
			goto no_mem;
		ssk->sdp_disconnect = 0;
		sdp_post_send(ssk, skb);
		post_count++;
	}

	if (!sdp_tx_ring_slots_left(ssk) || post_count) {
		if (sdp_xmit_poll(ssk, 1))
			goto again;
	}

no_mem:
	return post_count;
}
Example #17
0
void
sdp_post_sends(struct sdp_sock *ssk, int wait)
{
	struct mbuf *mb;
	int post_count = 0;
	struct socket *sk;
	int low;

	sk = ssk->socket;
	if (unlikely(!ssk->id)) {
		if (sk->so_snd.sb_sndptr) {
			sdp_dbg(ssk->socket,
				"Send on socket without cmid ECONNRESET.\n");
			sdp_notify(ssk, ECONNRESET);
		}
		return;
	}
again:
	if (sdp_tx_ring_slots_left(ssk) < SDP_TX_SIZE / 2)
		sdp_xmit_poll(ssk,  1);

	if (ssk->recv_request &&
	    ring_tail(ssk->rx_ring) >= ssk->recv_request_head &&
	    tx_credits(ssk) >= SDP_MIN_TX_CREDITS &&
	    sdp_tx_ring_slots_left(ssk)) {
		mb = sdp_alloc_mb_chrcvbuf_ack(sk,
		    ssk->recv_bytes - SDP_HEAD_SIZE, wait);
		if (mb == NULL)
			goto allocfail;
		ssk->recv_request = 0;
		sdp_post_send(ssk, mb);
		post_count++;
	}

	if (tx_credits(ssk) <= SDP_MIN_TX_CREDITS &&
	    sdp_tx_ring_slots_left(ssk) && sk->so_snd.sb_sndptr &&
	    sdp_nagle_off(ssk, sk->so_snd.sb_sndptr)) {
		SDPSTATS_COUNTER_INC(send_miss_no_credits);
	}

	while (tx_credits(ssk) > SDP_MIN_TX_CREDITS &&
	    sdp_tx_ring_slots_left(ssk) && (mb = sk->so_snd.sb_sndptr) &&
	    sdp_nagle_off(ssk, mb)) {
		struct mbuf *n;

		SOCKBUF_LOCK(&sk->so_snd);
		sk->so_snd.sb_sndptr = mb->m_nextpkt;
		sk->so_snd.sb_mb = mb->m_nextpkt;
		mb->m_nextpkt = NULL;
		SB_EMPTY_FIXUP(&sk->so_snd);
		for (n = mb; n != NULL; n = n->m_next)
			sbfree(&sk->so_snd, n);
		SOCKBUF_UNLOCK(&sk->so_snd);
		sdp_post_send(ssk, mb);
		post_count++;
	}

	if (credit_update_needed(ssk) && ssk->state >= TCPS_ESTABLISHED &&
	    ssk->state < TCPS_FIN_WAIT_2) {
		mb = sdp_alloc_mb_data(ssk->socket, wait);
		if (mb == NULL)
			goto allocfail;
		sdp_post_send(ssk, mb);

		SDPSTATS_COUNTER_INC(post_send_credits);
		post_count++;
	}

	/* send DisConn if needed
	 * Do not send DisConn if there is only 1 credit. Compliance with CA4-82
	 * If one credit is available, an implementation shall only send SDP
	 * messages that provide additional credits and also do not contain ULP
	 * payload. */
	if ((ssk->flags & SDP_NEEDFIN) && !sk->so_snd.sb_sndptr &&
	    tx_credits(ssk) > 1) {
		mb = sdp_alloc_mb_disconnect(sk, wait);
		if (mb == NULL)
			goto allocfail;
		ssk->flags &= ~SDP_NEEDFIN;
		sdp_post_send(ssk, mb);
		post_count++;
	}
	low = (sdp_tx_ring_slots_left(ssk) <= SDP_MIN_TX_CREDITS);
	if (post_count || low) {
		if (low)
			sdp_arm_tx_cq(ssk);
		if (sdp_xmit_poll(ssk, low))
			goto again;
	}
	return;

allocfail:
	ssk->nagle_last_unacked = -1;
	callout_reset(&ssk->nagle_timer, 1, sdp_nagle_timeout, ssk);
	return;
}
static int sdp_init_qp(struct sock *sk, struct rdma_cm_id *id)
{
	struct ib_qp_init_attr qp_init_attr = {
		.event_handler = sdp_qp_event_handler,
		.cap.max_send_wr = SDP_TX_SIZE,
		.cap.max_recv_wr = sdp_rx_size,
		.cap.max_inline_data = sdp_inline_thresh,
        	.sq_sig_type = IB_SIGNAL_REQ_WR,
        	.qp_type = IB_QPT_RC,
	};
	struct ib_device *device = id->device;
	int rc;

	sdp_dbg(sk, "%s\n", __func__);

	sdp_sk(sk)->max_sge = sdp_get_max_dev_sge(device);
	sdp_dbg(sk, "Max sges: %d\n", sdp_sk(sk)->max_sge);

	qp_init_attr.cap.max_send_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_SEND_SGES);
	sdp_dbg(sk, "Setting max send sge to: %d\n", qp_init_attr.cap.max_send_sge);

	qp_init_attr.cap.max_recv_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_RECV_SGES);
	sdp_dbg(sk, "Setting max recv sge to: %d\n", qp_init_attr.cap.max_recv_sge);

	sdp_sk(sk)->sdp_dev = ib_get_client_data(device, &sdp_client);
	if (!sdp_sk(sk)->sdp_dev) {
		sdp_warn(sk, "SDP not available on device %s\n", device->name);
		rc = -ENODEV;
		goto err_rx;
	}

	rc = sdp_rx_ring_create(sdp_sk(sk), device);
	if (rc)
		goto err_rx;

	rc = sdp_tx_ring_create(sdp_sk(sk), device);
	if (rc)
		goto err_tx;

	qp_init_attr.recv_cq = sdp_sk(sk)->rx_ring.cq;
	qp_init_attr.send_cq = sdp_sk(sk)->tx_ring.cq;

	rc = rdma_create_qp(id, sdp_sk(sk)->sdp_dev->pd, &qp_init_attr);
	if (rc) {
		sdp_warn(sk, "Unable to create QP: %d.\n", rc);
		goto err_qp;
	}
	sdp_sk(sk)->qp = id->qp;
	sdp_sk(sk)->ib_device = device;
	sdp_sk(sk)->qp_active = 1;
	sdp_sk(sk)->context.device = device;
	sdp_sk(sk)->inline_thresh = qp_init_attr.cap.max_inline_data;

	sdp_dbg(sk, "%s done\n", __func__);
	return 0;

err_qp:
	sdp_tx_ring_destroy(sdp_sk(sk));
err_tx:
	sdp_rx_ring_destroy(sdp_sk(sk));
err_rx:
	return rc;
}

static int sdp_get_max_send_frags(u32 buf_size)
{
	return MIN(
		/* +1 to conpensate on not aligned buffers */
		(PAGE_ALIGN(buf_size) >> PAGE_SHIFT) + 1,
		SDP_MAX_SEND_SGES - 1);
}

static int sdp_connect_handler(struct sock *sk, struct rdma_cm_id *id,
		       	struct rdma_cm_event *event)
{
	struct sockaddr_in *dst_addr;
	struct sock *child;
	const struct sdp_hh *h;
	int rc = 0;

	sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id);

	h = event->param.conn.private_data;
	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);

	if (h->ipv_cap & HH_IPV_MASK & ~(HH_IPV4 | HH_IPV6)) {
		sdp_warn(sk, "Bad IPV field in SDP Hello header: 0x%x\n",
				h->ipv_cap & HH_IPV_MASK);
		return -EINVAL;
	}

	if (!h->max_adverts)
		return -EINVAL;

#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0))
	child = sk_clone(sk, GFP_KERNEL);
#else
	child = sk_clone_lock(sk, GFP_KERNEL);
#endif
	if (!child)
		return -ENOMEM;

	sdp_init_sock(child);

	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
	sdp_inet_dport(child) = dst_addr->sin_port;

#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
	if (inet6_sk(sk)) {
		struct ipv6_pinfo *newnp;

		newnp = inet_sk(child)->pinet6 = sdp_inet6_sk_generic(child);

		memcpy(newnp, inet6_sk(sk), sizeof(struct ipv6_pinfo));
		if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV4) {
			/* V6 mapped */
			sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr;
			ipv6_addr_set(&child->sk_v6_daddr, 0, 0, htonl(0x0000FFFF),
					h->src_addr.ip4.addr);

			ipv6_addr_set(&child->sk_v6_rcv_saddr, 0, 0, htonl(0x0000FFFF),
					h->dst_addr.ip4.addr);

			ipv6_addr_copy(&child->sk_v6_rcv_saddr, &child->sk_v6_daddr);
		} else if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV6) {
			struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst_addr;
			struct sockaddr_in6 *src_addr6 =
				(struct sockaddr_in6 *)&id->route.addr.src_addr;

			ipv6_addr_copy(&child->sk_v6_daddr, &dst_addr6->sin6_addr);
			ipv6_addr_copy(&child->sk_v6_rcv_saddr, &src_addr6->sin6_addr);
			ipv6_addr_copy(&newnp->saddr, &src_addr6->sin6_addr);
		} else {
			sdp_warn(child, "Bad IPV field: 0x%x\n", h->ipv_cap & HH_IPV_MASK);
		}

		sdp_inet_daddr(child) = sdp_inet_saddr(child) =
			sdp_inet_rcv_saddr(child) = LOOPBACK4_IPV6;
	} else
#endif
	{
		sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr;
	}

#ifdef SDP_SOCK_HISTORY
	sdp_ssk_hist_rename(sk);
#endif
	__sock_put(child, SOCK_REF_CLONE);

	down_read(&device_removal_lock);

	rc = sdp_init_qp(child, id);
	if (rc) {
		bh_unlock_sock(child);
		up_read(&device_removal_lock);
		sdp_sk(child)->destructed_already = 1;
#ifdef SDP_SOCK_HISTORY
		sdp_ssk_hist_close(child);
#endif
		sk_free(child);
		return rc;
	}

	sdp_sk(child)->max_bufs = ntohs(h->bsdh.bufs);
	atomic_set(&sdp_sk(child)->tx_ring.credits, sdp_sk(child)->max_bufs);

	sdp_sk(child)->min_bufs = tx_credits(sdp_sk(child)) / 4;
	sdp_sk(child)->xmit_size_goal = ntohl(h->localrcvsz) -
		sizeof(struct sdp_bsdh);

	sdp_sk(child)->send_frags = sdp_get_max_send_frags(sdp_sk(child)->xmit_size_goal);
	sdp_init_buffers(sdp_sk(child), rcvbuf_initial_size);

	id->context = child;
	sdp_sk(child)->id = id;

	list_add_tail(&sdp_sk(child)->backlog_queue,
			&sdp_sk(sk)->backlog_queue);
	sdp_sk(child)->parent = sk;

	bh_unlock_sock(child);
	sdp_add_sock(sdp_sk(child));
	up_read(&device_removal_lock);

	sdp_exch_state(child, TCPF_LISTEN | TCPF_CLOSE, TCP_SYN_RECV);

	/* child->sk_write_space(child); */
	/* child->sk_data_ready(child, 0); */
	sk->sk_data_ready(sk);

	return 0;
}

static int sdp_response_handler(struct sock *sk, struct rdma_cm_id *id,
				struct rdma_cm_event *event)
{
	const struct sdp_hah *h;
	struct sockaddr_in *dst_addr;
	sdp_dbg(sk, "%s\n", __func__);

	sdp_exch_state(sk, TCPF_SYN_SENT, TCP_ESTABLISHED);
	sdp_set_default_moderation(sdp_sk(sk));

	if (sock_flag(sk, SOCK_KEEPOPEN))
		sdp_start_keepalive_timer(sk);

	if (sock_flag(sk, SOCK_DEAD))
		return 0;

	h = event->param.conn.private_data;
	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
	sdp_sk(sk)->max_bufs = ntohs(h->bsdh.bufs);
	atomic_set(&sdp_sk(sk)->tx_ring.credits, sdp_sk(sk)->max_bufs);
	sdp_sk(sk)->min_bufs = tx_credits(sdp_sk(sk)) / 4;
	sdp_sk(sk)->xmit_size_goal =
		ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh);
	sdp_sk(sk)->send_frags = sdp_get_max_send_frags(sdp_sk(sk)->xmit_size_goal);
	sdp_sk(sk)->xmit_size_goal = MIN(sdp_sk(sk)->xmit_size_goal,
		sdp_sk(sk)->send_frags * PAGE_SIZE);

	sdp_sk(sk)->poll_cq = 1;

	sk->sk_state_change(sk);
	sk_wake_async(sk, 0, POLL_OUT);

	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
	sdp_inet_dport(sk) = dst_addr->sin_port;
	sdp_inet_daddr(sk) = dst_addr->sin_addr.s_addr;

#ifdef SDP_SOCK_HISTORY
	sdp_ssk_hist_rename(sk);
#endif
	return 0;
}

static int sdp_connected_handler(struct sock *sk)
{
	struct sock *parent;
	sdp_dbg(sk, "%s\n", __func__);

	parent = sdp_sk(sk)->parent;
	BUG_ON(!parent);

	sdp_exch_state(sk, TCPF_SYN_RECV, TCP_ESTABLISHED);

#ifdef SDP_SOCK_HISTORY
	sdp_ssk_hist_rename(sk);
#endif
	sdp_set_default_moderation(sdp_sk(sk));

	if (sock_flag(sk, SOCK_KEEPOPEN))
		sdp_start_keepalive_timer(sk);

	if (sock_flag(sk, SOCK_DEAD))
		return 0;

	lock_sock(parent);
	if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */
		sdp_dbg(sk, "parent is going away.\n");
		goto done;
	}

	sk_acceptq_added(parent);
	sdp_dbg(parent, "%s child connection established\n", __func__);
	list_del_init(&sdp_sk(sk)->backlog_queue);
	list_add_tail(&sdp_sk(sk)->accept_queue,
			&sdp_sk(parent)->accept_queue);

	parent->sk_state_change(parent);
	sk_wake_async(parent, 0, POLL_OUT);
done:
	release_sock(parent);

	return 0;
}

static int sdp_disconnected_handler(struct sock *sk)
{
	struct sdp_sock *ssk = sdp_sk(sk);

	sdp_dbg(sk, "%s\n", __func__);

	if (ssk->tx_ring.cq)
		if (sdp_xmit_poll(ssk, 1))
			sdp_post_sends(ssk, 0);

	if (sk->sk_state == TCP_SYN_RECV) {
		sdp_connected_handler(sk);

		if (rcv_nxt(ssk))
			return 0;
	}

	return -ECONNRESET;
}

int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
	struct rdma_conn_param conn_param;
	struct sock *parent = NULL;
	struct sock *child = NULL;
	struct sock *sk;
	struct sdp_hah hah;
	struct sdp_hh hh;

	int rc = 0, rc2;

	sk = id->context;
	if (!sk) {
		sdp_dbg(NULL, "cm_id is being torn down, event %s\n",
		       	rdma_cm_event_str(event->event));
		return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ?
			-EINVAL : 0;
	}

	sdp_add_to_history(sk, rdma_cm_event_str(event->event));

	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
	sdp_dbg(sk, "event: %s\n", rdma_cm_event_str(event->event));
	if (!sdp_sk(sk)->id) {
		sdp_dbg(sk, "socket is being torn down\n");
		rc = event->event == RDMA_CM_EVENT_CONNECT_REQUEST ?
			-EINVAL : 0;
		release_sock(sk);
		return rc;
	}

	switch (event->event) {
	case RDMA_CM_EVENT_ADDR_RESOLVED:
		if (sdp_link_layer_ib_only &&
			rdma_node_get_transport(id->device->node_type) ==
				RDMA_TRANSPORT_IB &&
			rdma_port_get_link_layer(id->device, id->port_num) !=
				IB_LINK_LAYER_INFINIBAND) {
			sdp_dbg(sk, "Link layer is: %d. Only IB link layer "
				"is allowed\n",
				rdma_port_get_link_layer(id->device,
					id->port_num));
			rc = -ENETUNREACH;
			break;
		}

		rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT);
		break;
	case RDMA_CM_EVENT_ADDR_ERROR:
		rc = -ENETUNREACH;
		break;
	case RDMA_CM_EVENT_ROUTE_RESOLVED:
		rc = sdp_init_qp(sk, id);
		if (rc)
			break;
		memset(&hh, 0, sizeof hh);
		hh.bsdh.mid = SDP_MID_HELLO;
		hh.bsdh.len = htonl(sizeof(struct sdp_hh));
		hh.max_adverts = 1;

		hh.majv_minv = SDP_MAJV_MINV;
		sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size);
		hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk)));
		atomic_set(&sdp_sk(sk)->remote_credits,
				rx_ring_posted(sdp_sk(sk)));
		hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_frags *
				PAGE_SIZE + sizeof(struct sdp_bsdh));
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
		if (inet6_sk(sk)) {
			struct sockaddr *src_addr = (struct sockaddr *)&id->route.addr.src_addr;
			struct sockaddr_in *addr4 = (struct sockaddr_in *)src_addr;
			struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)src_addr;

			if (src_addr->sa_family == AF_INET) {
				/* IPv4 over IPv6 */
				ipv6_addr_set(&sk->sk_v6_rcv_saddr, 0, 0, htonl(0xFFFF),
						addr4->sin_addr.s_addr);
			} else {
				sk->sk_v6_rcv_saddr = addr6->sin6_addr;
			}
			inet6_sk(sk)->saddr = sk->sk_v6_rcv_saddr;
		}
			else
#endif
		{
			sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) =
				((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
		}
		memset(&conn_param, 0, sizeof conn_param);
		conn_param.private_data_len = sizeof hh;
		conn_param.private_data = &hh;
		conn_param.responder_resources = 4 /* TODO */;
		conn_param.initiator_depth = 4 /* TODO */;
		conn_param.retry_count = sdp_retry_count;
		SDP_DUMP_PACKET(sk, "TX", NULL, &hh.bsdh);

		if (sdp_apm_enable) {
			rc = rdma_enable_apm(id, RDMA_ALT_PATH_BEST);
			if (rc)
				sdp_warn(sk, "APM couldn't be enabled for active side: %d\n", rc);
		}

		rc = rdma_connect(id, &conn_param);
		break;

	case RDMA_CM_EVENT_ALT_ROUTE_RESOLVED:
		sdp_dbg(sk, "alt route was resolved slid=%d, dlid=%d\n",
				id->route.path_rec[1].slid, id->route.path_rec[1].dlid);
		break;

	case RDMA_CM_EVENT_ALT_PATH_LOADED:
		sdp_dbg(sk, "alt route path loaded\n");
		break;

	case RDMA_CM_EVENT_ALT_ROUTE_ERROR:
		sdp_warn(sk, "alt route resolve error\n");
		break;

	case RDMA_CM_EVENT_ROUTE_ERROR:
		rc = -ETIMEDOUT;
		break;
	case RDMA_CM_EVENT_CONNECT_REQUEST:
		rc = sdp_connect_handler(sk, id, event);
		if (rc) {
			sdp_dbg(sk, "Destroying qp\n");
			rdma_reject(id, NULL, 0);
			break;
		}
		child = id->context;
		atomic_set(&sdp_sk(child)->remote_credits,
				rx_ring_posted(sdp_sk(child)));
		memset(&hah, 0, sizeof hah);
		hah.bsdh.mid = SDP_MID_HELLO_ACK;
		hah.bsdh.bufs = htons(rx_ring_posted(sdp_sk(child)));
		hah.bsdh.len = htonl(sizeof(struct sdp_hah));
		hah.majv_minv = SDP_MAJV_MINV;
		hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec,
					    but just in case */
		hah.actrcvsz = htonl(sdp_sk(child)->recv_frags * PAGE_SIZE +
			sizeof(struct sdp_bsdh));
		memset(&conn_param, 0, sizeof conn_param);
		conn_param.private_data_len = sizeof hah;
		conn_param.private_data = &hah;
		conn_param.responder_resources = 4 /* TODO */;
		conn_param.initiator_depth = 4 /* TODO */;
		conn_param.retry_count = sdp_retry_count;
		SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh);
		rc = rdma_accept(id, &conn_param);
		if (rc) {
			sdp_sk(child)->id = NULL;
			id->qp = NULL;
			id->context = NULL;
			parent = sdp_sk(child)->parent; /* TODO: hold ? */
		} else if (sdp_apm_enable) {
				rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST);
				if (rc2)
					sdp_warn(sk, "APM couldn't be enabled for passive side: %d\n", rc2);
		}
		break;
	case RDMA_CM_EVENT_CONNECT_RESPONSE:
		rc = sdp_response_handler(sk, id, event);
		if (rc) {
			sdp_dbg(sk, "Destroying qp\n");
			rdma_reject(id, NULL, 0);
		} else {
			rc = rdma_accept(id, NULL);
			if (!rc && sdp_apm_enable) {
				rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST);
				if (rc2)
					sdp_warn(sk, "APM couldn't be enabled for passive side:%d \n", rc2);
			}
		}
		break;
	case RDMA_CM_EVENT_CONNECT_ERROR:
		rc = -ETIMEDOUT;
		break;
	case RDMA_CM_EVENT_UNREACHABLE:
		rc = -ENETUNREACH;
		break;
	case RDMA_CM_EVENT_REJECTED:
		rc = -ECONNREFUSED;
		break;
	case RDMA_CM_EVENT_ESTABLISHED:
		sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) =
			((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
		rc = sdp_connected_handler(sk);
		break;
	case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */
		if (sk->sk_state == TCP_LAST_ACK) {
			sdp_cancel_dreq_wait_timeout(sdp_sk(sk));

			sdp_exch_state(sk, TCPF_LAST_ACK, TCP_TIME_WAIT);

			sdp_dbg(sk, "%s: waiting for Infiniband tear down\n",
				__func__);
		}

		sdp_sk(sk)->qp_active = 0;
		rdma_disconnect(id);

		if (sk->sk_state != TCP_TIME_WAIT) {
			if (sk->sk_state == TCP_CLOSE_WAIT) {
				sdp_dbg(sk, "IB teardown while in "
					"TCP_CLOSE_WAIT taking reference to "
					"let close() finish the work\n");
				sock_hold(sk, SOCK_REF_CMA);
				sdp_start_cma_timewait_timeout(sdp_sk(sk),
						SDP_CMA_TIMEWAIT_TIMEOUT);

			}
			sdp_set_error(sk, -EPIPE);
			rc = sdp_disconnected_handler(sk);
		}
		break;
	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
		rc = sdp_disconnected_handler(sk);
		break;
	case RDMA_CM_EVENT_DEVICE_REMOVAL:
		rc = -ENETRESET;
		break;

	case RDMA_CM_EVENT_ADDR_CHANGE:
		sdp_dbg(sk, "Got Address change event\n");
		rc = 0;
		break;
	default:
		printk(KERN_ERR "SDP: Unexpected CMA event: %d\n",
		       event->event);
		rc = -ECONNABORTED;
		break;
	}

	sdp_dbg(sk, "event: %s handled\n", rdma_cm_event_str(event->event));

	if (rc && sdp_sk(sk)->id == id) {
		child = sk;
		sdp_sk(sk)->id = NULL;
		id->qp = NULL;
		id->context = NULL;
		parent = sdp_sk(sk)->parent;
		sdp_reset_sk(sk, rc);
	}

	release_sock(sk);

	sdp_dbg(sk, "event: %s done. status %d\n",
			rdma_cm_event_str(event->event), rc);

	if (parent) {
		lock_sock(parent);
		if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */
			sdp_dbg(sk, "parent is going away.\n");
			child = NULL;
			goto done;
		}
		if (!list_empty(&sdp_sk(child)->backlog_queue))
			list_del_init(&sdp_sk(child)->backlog_queue);
		else
			child = NULL;
done:
		release_sock(parent);
		if (child)
			sdp_common_release(child);
	}
	return rc;
}
Example #19
0
void
sdp_post_send(struct sdp_sock *ssk, struct mbuf *mb)
{
	struct sdp_buf *tx_req;
	struct sdp_bsdh *h;
	unsigned long mseq;
	struct ib_device *dev;
	struct ib_send_wr *bad_wr;
	struct ib_sge ibsge[SDP_MAX_SEND_SGES];
	struct ib_sge *sge;
	struct ib_send_wr tx_wr = { NULL };
	int i, rc;
	u64 addr;

	SDPSTATS_COUNTER_MID_INC(post_send, h->mid);
	SDPSTATS_HIST(send_size, mb->len);

	if (!ssk->qp_active) {
		m_freem(mb);
		return;
	}

	mseq = ring_head(ssk->tx_ring);
	h = mtod(mb, struct sdp_bsdh *);
	ssk->tx_packets++;
	ssk->tx_bytes += mb->m_pkthdr.len;

#ifdef SDP_ZCOPY
	if (unlikely(h->mid == SDP_MID_SRCAVAIL)) {
		struct tx_srcavail_state *tx_sa = TX_SRCAVAIL_STATE(mb);
		if (ssk->tx_sa != tx_sa) {
			sdp_dbg_data(ssk->socket, "SrcAvail cancelled "
					"before being sent!\n");
			WARN_ON(1);
			m_freem(mb);
			return;
		}
		TX_SRCAVAIL_STATE(mb)->mseq = mseq;
	}
#endif

	if (unlikely(mb->m_flags & M_URG))
		h->flags = SDP_OOB_PRES | SDP_OOB_PEND;
	else
		h->flags = 0;

	mb->m_flags |= M_RDONLY; /* Don't allow compression once sent. */
	h->bufs = htons(rx_ring_posted(ssk));
	h->len = htonl(mb->m_pkthdr.len);
	h->mseq = htonl(mseq);
	h->mseq_ack = htonl(mseq_ack(ssk));

	sdp_prf1(ssk->socket, mb, "TX: %s bufs: %d mseq:%ld ack:%d",
			mid2str(h->mid), rx_ring_posted(ssk), mseq,
			ntohl(h->mseq_ack));

	SDP_DUMP_PACKET(ssk->socket, "TX", mb, h);

	tx_req = &ssk->tx_ring.buffer[mseq & (SDP_TX_SIZE - 1)];
	tx_req->mb = mb;
	dev = ssk->ib_device;
	sge = &ibsge[0];
	for (i = 0;  mb != NULL; i++, mb = mb->m_next, sge++) {
		addr = ib_dma_map_single(dev, mb->m_data, mb->m_len,
		    DMA_TO_DEVICE);
		/* TODO: proper error handling */
		BUG_ON(ib_dma_mapping_error(dev, addr));
		BUG_ON(i >= SDP_MAX_SEND_SGES);
		tx_req->mapping[i] = addr;
		sge->addr = addr;
		sge->length = mb->m_len;
		sge->lkey = ssk->sdp_dev->mr->lkey;
	}
	tx_wr.next = NULL;
	tx_wr.wr_id = mseq | SDP_OP_SEND;
	tx_wr.sg_list = ibsge;
	tx_wr.num_sge = i;
	tx_wr.opcode = IB_WR_SEND;
	tx_wr.send_flags = IB_SEND_SIGNALED;
	if (unlikely(tx_req->mb->m_flags & M_URG))
		tx_wr.send_flags |= IB_SEND_SOLICITED;

	rc = ib_post_send(ssk->qp, &tx_wr, &bad_wr);
	if (unlikely(rc)) {
		sdp_dbg(ssk->socket,
				"ib_post_send failed with status %d.\n", rc);

		sdp_cleanup_sdp_buf(ssk, tx_req, DMA_TO_DEVICE);

		sdp_notify(ssk, ECONNRESET);
		m_freem(tx_req->mb);
		return;
	}

	atomic_inc(&ssk->tx_ring.head);
	atomic_dec(&ssk->tx_ring.credits);
	atomic_set(&ssk->remote_credits, rx_ring_posted(ssk));

	return;
}
Example #20
0
/* socket lock should be taken before calling this */
static int
sdp_process_rx_ctl_mb(struct sdp_sock *ssk, struct mbuf *mb)
{
	struct sdp_bsdh *h;
	struct socket *sk;

	SDP_WLOCK_ASSERT(ssk);
	sk = ssk->socket;
 	h = mtod(mb, struct sdp_bsdh *);
	switch (h->mid) {
	case SDP_MID_DATA:
	case SDP_MID_SRCAVAIL:
		sdp_dbg(sk, "DATA after socket rcv was shutdown\n");

		/* got data in RCV_SHUTDOWN */
		if (ssk->state == TCPS_FIN_WAIT_1) {
			sdp_dbg(sk, "RX data when state = FIN_WAIT1\n");
			sdp_notify(ssk, ECONNRESET);
		}
		m_freem(mb);

		break;
#ifdef SDP_ZCOPY
	case SDP_MID_RDMARDCOMPL:
		m_freem(mb);
		break;
	case SDP_MID_SENDSM:
		sdp_handle_sendsm(ssk, ntohl(h->mseq_ack));
		m_freem(mb);
		break;
	case SDP_MID_SRCAVAIL_CANCEL:
		sdp_dbg_data(sk, "Handling SrcAvailCancel\n");
		sdp_prf(sk, NULL, "Handling SrcAvailCancel");
		if (ssk->rx_sa) {
			ssk->srcavail_cancel_mseq = ntohl(h->mseq);
			ssk->rx_sa->flags |= RX_SA_ABORTED;
			ssk->rx_sa = NULL; /* TODO: change it into SDP_MID_DATA and get 
			                      the dirty logic from recvmsg */
		} else {
			sdp_dbg(sk, "Got SrcAvailCancel - "
					"but no SrcAvail in process\n");
		}
		m_freem(mb);
		break;
	case SDP_MID_SINKAVAIL:
		sdp_dbg_data(sk, "Got SinkAvail - not supported: ignored\n");
		sdp_prf(sk, NULL, "Got SinkAvail - not supported: ignored");
		/* FALLTHROUGH */
#endif
	case SDP_MID_ABORT:
		sdp_dbg_data(sk, "Handling ABORT\n");
		sdp_prf(sk, NULL, "Handling ABORT");
		sdp_notify(ssk, ECONNRESET);
		m_freem(mb);
		break;
	case SDP_MID_DISCONN:
		sdp_dbg_data(sk, "Handling DISCONN\n");
		sdp_prf(sk, NULL, "Handling DISCONN");
		sdp_handle_disconn(ssk);
		break;
	case SDP_MID_CHRCVBUF:
		sdp_dbg_data(sk, "Handling RX CHRCVBUF\n");
		sdp_handle_resize_request(ssk, (struct sdp_chrecvbuf *)(h+1));
		m_freem(mb);
		break;
	case SDP_MID_CHRCVBUF_ACK:
		sdp_dbg_data(sk, "Handling RX CHRCVBUF_ACK\n");
		sdp_handle_resize_ack(ssk, (struct sdp_chrecvbuf *)(h+1));
		m_freem(mb);
		break;
	default:
		/* TODO: Handle other messages */
		sdp_warn(sk, "SDP: FIXME MID %d\n", h->mid);
		m_freem(mb);
	}

	return 0;
}