Ejemplo n.º 1
0
static struct mbuf *
sdp_send_completion(struct sdp_sock *ssk, int mseq)
{
	struct ib_device *dev;
	struct sdp_buf *tx_req;
	struct mbuf *mb = NULL;
	struct sdp_tx_ring *tx_ring = &ssk->tx_ring;

	if (unlikely(mseq != ring_tail(*tx_ring))) {
		printk(KERN_WARNING "Bogus send completion id %d tail %d\n",
			mseq, ring_tail(*tx_ring));
		goto out;
	}

	dev = ssk->ib_device;
	tx_req = &tx_ring->buffer[mseq & (SDP_TX_SIZE - 1)];
	mb = tx_req->mb;
	sdp_cleanup_sdp_buf(ssk, tx_req, DMA_TO_DEVICE);

#ifdef SDP_ZCOPY
	/* TODO: AIO and real zcopy code; add their context support here */
	if (BZCOPY_STATE(mb))
		BZCOPY_STATE(mb)->busy--;
#endif

	atomic_inc(&tx_ring->tail);

out:
	return mb;
}
Ejemplo n.º 2
0
static struct sk_buff *sdp_send_completion(struct sdp_sock *ssk, int mseq)
{
	struct ib_device *dev;
	struct sdp_buf *tx_req;
	struct sk_buff *skb = NULL;
	struct sdp_tx_ring *tx_ring = &ssk->tx_ring;
	if (unlikely(mseq != ring_tail(*tx_ring))) {
		printk(KERN_WARNING "Bogus send completion id %d tail %d\n",
			mseq, ring_tail(*tx_ring));
		goto out;
	}

	dev = ssk->ib_device;
	tx_req = &tx_ring->buffer[mseq & (SDP_TX_SIZE - 1)];
	skb = tx_req->skb;
	if (!skb)
		goto skip; /* This slot was used by RDMA WR */

	sdp_cleanup_sdp_buf(ssk, tx_req, skb->len - skb->data_len, DMA_TO_DEVICE);

	tx_ring->una_seq += SDP_SKB_CB(skb)->end_seq;

	/* TODO: AIO and real zcopy code; add their context support here */
	if (BZCOPY_STATE(skb))
		BZCOPY_STATE(skb)->busy--;

skip:
	atomic_inc(&tx_ring->tail);

out:
	return skb;
}
Ejemplo n.º 3
0
void
sdp_tx_ring_destroy(struct sdp_sock *ssk)
{

	sdp_dbg(ssk->socket, "tx ring destroy\n");
	SDP_WLOCK(ssk);
	callout_stop(&ssk->tx_ring.timer);
	callout_stop(&ssk->nagle_timer);
	SDP_WUNLOCK(ssk);
	callout_drain(&ssk->tx_ring.timer);
	callout_drain(&ssk->nagle_timer);

	if (ssk->tx_ring.buffer) {
		sdp_tx_ring_purge(ssk);

		kfree(ssk->tx_ring.buffer);
		ssk->tx_ring.buffer = NULL;
	}

	if (ssk->tx_ring.cq) {
		if (ib_destroy_cq(ssk->tx_ring.cq)) {
			sdp_warn(ssk->socket, "destroy cq(%p) failed\n",
					ssk->tx_ring.cq);
		} else {
			ssk->tx_ring.cq = NULL;
		}
	}

	WARN_ON(ring_head(ssk->tx_ring) != ring_tail(ssk->tx_ring));
}
Ejemplo n.º 4
0
void sdp_tx_ring_destroy(struct sdp_sock *ssk)
{
	del_timer_sync(&ssk->tx_ring.timer);

	if (ssk->nagle_timer.function)
		del_timer_sync(&ssk->nagle_timer);

	if (ssk->tx_ring.buffer) {
		sdp_tx_ring_purge(ssk);

		kfree(ssk->tx_ring.buffer);
		ssk->tx_ring.buffer = NULL;
	}

	if (ssk->tx_ring.cq) {
		if (ib_destroy_cq(ssk->tx_ring.cq)) {
			sdp_warn(sk_ssk(ssk), "destroy cq(%p) failed\n",
					ssk->tx_ring.cq);
		} else {
			ssk->tx_ring.cq = NULL;
		}
	}

	tasklet_kill(&ssk->tx_ring.tasklet);
	/* tx_cq is destroyed, so no more tx_irq, so no one will schedule this
	 * tasklet. */

	SDP_WARN_ON(ring_head(ssk->tx_ring) != ring_tail(ssk->tx_ring));
}
Ejemplo n.º 5
0
void
sdp_rx_ring_destroy(struct sdp_sock *ssk)
{

	cancel_work_sync(&ssk->rx_comp_work);
	rx_ring_destroy_lock(&ssk->rx_ring);

	if (ssk->rx_ring.buffer) {
		sdp_rx_ring_purge(ssk);

		kfree(ssk->rx_ring.buffer);
		ssk->rx_ring.buffer = NULL;
	}

	if (ssk->rx_ring.cq) {
		if (ib_destroy_cq(ssk->rx_ring.cq)) {
			sdp_warn(ssk->socket, "destroy cq(%p) failed\n",
				ssk->rx_ring.cq);
		} else {
			ssk->rx_ring.cq = NULL;
		}
	}

	WARN_ON(ring_head(ssk->rx_ring) != ring_tail(ssk->rx_ring));
}
Ejemplo n.º 6
0
static void
sdp_handle_resize_request(struct sdp_sock *ssk, struct sdp_chrecvbuf *buf)
{
	if (sdp_resize_buffers(ssk, ntohl(buf->size)) == 0)
		ssk->recv_request_head = ring_head(ssk->rx_ring) + 1;
	else
		ssk->recv_request_head = ring_tail(ssk->rx_ring);
	ssk->recv_request = 1;
}
Ejemplo n.º 7
0
static void sdp_poll_tx_timeout(unsigned long data)
{
	struct sdp_sock *ssk = (struct sdp_sock *)data;
	struct sock *sk = sk_ssk(ssk);
	u32 inflight, wc_processed;

	sdp_prf1(sk_ssk(ssk), NULL, "TX timeout: inflight=%d, head=%d tail=%d",
		(u32) tx_ring_posted(ssk),
		ring_head(ssk->tx_ring), ring_tail(ssk->tx_ring));

	/* Only process if the socket is not in use */
	bh_lock_sock(sk);
	if (sock_owned_by_user(sk)) {
		sdp_prf(sk_ssk(ssk), NULL, "TX comp: socket is busy");

		if (sdp_tx_handler_select(ssk) && sk->sk_state != TCP_CLOSE &&
				likely(ssk->qp_active)) {
			sdp_prf1(sk, NULL, "schedule a timer");
			mod_timer(&ssk->tx_ring.timer, jiffies + SDP_TX_POLL_TIMEOUT);
		}

		SDPSTATS_COUNTER_INC(tx_poll_busy);
		goto out;
	}

	if (unlikely(!ssk->qp || sk->sk_state == TCP_CLOSE)) {
		SDPSTATS_COUNTER_INC(tx_poll_no_op);
		goto out;
	}

	wc_processed = sdp_process_tx_cq(ssk);
	if (!wc_processed)
		SDPSTATS_COUNTER_INC(tx_poll_miss);
	else {
		sdp_post_sends(ssk, GFP_ATOMIC);
		SDPSTATS_COUNTER_INC(tx_poll_hit);
	}

	inflight = (u32) tx_ring_posted(ssk);
	sdp_prf1(sk_ssk(ssk), NULL, "finished tx proccessing. inflight = %d",
			tx_ring_posted(ssk));

	/* If there are still packets in flight and the timer has not already
	 * been scheduled by the Tx routine then schedule it here to guarantee
	 * completion processing of these packets */
	if (inflight && likely(ssk->qp_active))
		mod_timer(&ssk->tx_ring.timer, jiffies + SDP_TX_POLL_TIMEOUT);

out:
	if (ssk->tx_ring.rdma_inflight && ssk->tx_ring.rdma_inflight->busy) {
		sdp_prf1(sk, NULL, "RDMA is inflight - arming irq");
		sdp_arm_tx_cq(sk);
	}

	bh_unlock_sock(sk);
}
Ejemplo n.º 8
0
static void sdp_tx_ring_purge(struct sdp_sock *ssk)
{
	while (ring_posted(ssk->tx_ring)) {
		struct sk_buff *skb;
		skb = sdp_send_completion(ssk, ring_tail(ssk->tx_ring));
		if (!skb)
			break;
		sk_wmem_free_skb(sk_ssk(ssk), skb);
	}
}
Ejemplo n.º 9
0
static
void sdp_tx_ring_purge(struct sdp_sock *ssk)
{
	while (tx_ring_posted(ssk)) {
		struct mbuf *mb;
		mb = sdp_send_completion(ssk, ring_tail(ssk->tx_ring));
		if (!mb)
			break;
		m_freem(mb);
	}
}
Ejemplo n.º 10
0
static struct mbuf *
sdp_recv_completion(struct sdp_sock *ssk, int id)
{
	struct sdp_buf *rx_req;
	struct ib_device *dev;
	struct mbuf *mb;

	if (unlikely(id != ring_tail(ssk->rx_ring))) {
		printk(KERN_WARNING "Bogus recv completion id %d tail %d\n",
			id, ring_tail(ssk->rx_ring));
		return NULL;
	}

	dev = ssk->ib_device;
	rx_req = &ssk->rx_ring.buffer[id & (SDP_RX_SIZE - 1)];
	mb = rx_req->mb;
	sdp_cleanup_sdp_buf(ssk, rx_req, DMA_FROM_DEVICE);

	atomic_inc(&ssk->rx_ring.tail);
	atomic_dec(&ssk->remote_credits);
	return mb;
}
Ejemplo n.º 11
0
static void
sdp_poll_tx(struct sdp_sock *ssk)
{
	struct socket *sk = ssk->socket;
	u32 inflight, wc_processed;

	sdp_prf1(ssk->socket, NULL, "TX timeout: inflight=%d, head=%d tail=%d", 
		(u32) tx_ring_posted(ssk),
		ring_head(ssk->tx_ring), ring_tail(ssk->tx_ring));

	if (unlikely(ssk->state == TCPS_CLOSED)) {
		sdp_warn(sk, "Socket is closed\n");
		goto out;
	}

	wc_processed = sdp_process_tx_cq(ssk);
	if (!wc_processed)
		SDPSTATS_COUNTER_INC(tx_poll_miss);
	else
		SDPSTATS_COUNTER_INC(tx_poll_hit);

	inflight = (u32) tx_ring_posted(ssk);
	sdp_prf1(ssk->socket, NULL, "finished tx proccessing. inflight = %d",
	    inflight);

	/* If there are still packets in flight and the timer has not already
	 * been scheduled by the Tx routine then schedule it here to guarantee
	 * completion processing of these packets */
	if (inflight)
		callout_reset(&ssk->tx_ring.timer, SDP_TX_POLL_TIMEOUT,
		    sdp_poll_tx_timeout, ssk);
out:
#ifdef SDP_ZCOPY
	if (ssk->tx_ring.rdma_inflight && ssk->tx_ring.rdma_inflight->busy) {
		sdp_prf1(sk, NULL, "RDMA is inflight - arming irq");
		sdp_arm_tx_cq(ssk);
	}
#endif
	return;
}
Ejemplo n.º 12
0
int sdp_post_sends(struct sdp_sock *ssk, gfp_t gfp)
{
	/* TODO: nonagle? */
	struct sk_buff *skb;
	int post_count = 0;
	struct sock *sk = sk_ssk(ssk);

	if (unlikely(!ssk->id)) {
		if (sk->sk_send_head) {
			sdp_dbg(sk, "Send on socket without cmid ECONNRESET\n");
			/* TODO: flush send queue? */
			sdp_reset(sk);
		}
		return -ECONNRESET;
	}
again:
	if (sdp_tx_ring_slots_left(ssk) < SDP_TX_SIZE / 2)
		sdp_xmit_poll(ssk, 1);

	/* Run out of credits, check if got a credit update */
	if (unlikely(tx_credits(ssk) <= SDP_MIN_TX_CREDITS)) {
		sdp_poll_rx_cq(ssk);

		if (unlikely(sdp_should_rearm(sk) || !posts_handler(ssk)))
			sdp_arm_rx_cq(sk);
	}

	if (unlikely((ssk->sa_post_rdma_rd_compl || ssk->sa_post_sendsm) &&
			tx_credits(ssk) < SDP_MIN_TX_CREDITS)) {
		sdp_dbg_data(sk, "Run out of credits, can't abort SrcAvail. "
			"RdmaRdCompl: %d SendSm: %d\n",
			ssk->sa_post_rdma_rd_compl, ssk->sa_post_sendsm);
	}

	if (ssk->sa_post_rdma_rd_compl && tx_credits(ssk) >= SDP_MIN_TX_CREDITS) {
		int unreported = ssk->sa_post_rdma_rd_compl;

		skb = sdp_alloc_skb_rdmardcompl(sk, unreported, gfp);
		if (!skb)
			goto no_mem;
		sdp_post_send(ssk, skb);
		post_count++;
		ssk->sa_post_rdma_rd_compl = 0;
	}

	if (ssk->sa_post_sendsm && tx_credits(ssk) >= SDP_MIN_TX_CREDITS) {
		skb = sdp_alloc_skb_sendsm(sk, gfp);
		if (unlikely(!skb))
			goto no_mem;
		sdp_post_send(ssk, skb);
		ssk->sa_post_sendsm = 0;
		post_count++;
	}

	if (ssk->recv_request &&
	    ring_tail(ssk->rx_ring) >= SDP_MIN_TX_CREDITS &&
	    tx_credits(ssk) >= SDP_MIN_TX_CREDITS &&
	    sdp_tx_ring_slots_left(ssk)) {
		skb = sdp_alloc_skb_chrcvbuf_ack(sk,
				ssk->recv_frags * PAGE_SIZE, gfp);
		if (!skb)
			goto no_mem;
		ssk->recv_request = 0;
		sdp_post_send(ssk, skb);
		post_count++;
	}

	if (tx_credits(ssk) <= SDP_MIN_TX_CREDITS &&
	       sdp_tx_ring_slots_left(ssk) &&
	       sk->sk_send_head &&
		sdp_nagle_off(ssk, sk->sk_send_head)) {
		SDPSTATS_COUNTER_INC(send_miss_no_credits);
	}

	while (tx_credits(ssk) > SDP_MIN_TX_CREDITS &&
	       sdp_tx_ring_slots_left(ssk) &&
	       (skb = sk->sk_send_head) &&
		sdp_nagle_off(ssk, skb)) {
		update_send_head(sk, skb);
		__skb_dequeue(&sk->sk_write_queue);

		sdp_post_send(ssk, skb);

		post_count++;
	}

	if (credit_update_needed(ssk) &&
	    likely((1 << sk->sk_state) &
		    (TCPF_ESTABLISHED | TCPF_FIN_WAIT1))) {

		skb = sdp_alloc_skb_data(sk, 0, gfp);
		if (!skb)
			goto no_mem;

		sk->sk_wmem_queued += skb->truesize;
		sk_mem_charge(sk, skb->truesize);

		sdp_post_send(ssk, skb);
		SDPSTATS_COUNTER_INC(post_send_credits);
		post_count++;
	}

	/* send DisConn if needed
	 * Do not send DisConn if there is only 1 credit. Compliance with CA4-82
	 * If one credit is available, an implementation shall only send SDP
	 * messages that provide additional credits and also do not contain ULP
	 * payload. */
	if (unlikely(ssk->sdp_disconnect) &&
			!sk->sk_send_head &&
			tx_credits(ssk) >= SDP_MIN_TX_CREDITS) {
		skb = sdp_alloc_skb_disconnect(sk, gfp);
		if (!skb)
			goto no_mem;
		ssk->sdp_disconnect = 0;
		sdp_post_send(ssk, skb);
		post_count++;
	}

	if (!sdp_tx_ring_slots_left(ssk) || post_count) {
		if (sdp_xmit_poll(ssk, 1))
			goto again;
	}

no_mem:
	return post_count;
}
Ejemplo n.º 13
0
void
sdp_post_sends(struct sdp_sock *ssk, int wait)
{
	struct mbuf *mb;
	int post_count = 0;
	struct socket *sk;
	int low;

	sk = ssk->socket;
	if (unlikely(!ssk->id)) {
		if (sk->so_snd.sb_sndptr) {
			sdp_dbg(ssk->socket,
				"Send on socket without cmid ECONNRESET.\n");
			sdp_notify(ssk, ECONNRESET);
		}
		return;
	}
again:
	if (sdp_tx_ring_slots_left(ssk) < SDP_TX_SIZE / 2)
		sdp_xmit_poll(ssk,  1);

	if (ssk->recv_request &&
	    ring_tail(ssk->rx_ring) >= ssk->recv_request_head &&
	    tx_credits(ssk) >= SDP_MIN_TX_CREDITS &&
	    sdp_tx_ring_slots_left(ssk)) {
		mb = sdp_alloc_mb_chrcvbuf_ack(sk,
		    ssk->recv_bytes - SDP_HEAD_SIZE, wait);
		if (mb == NULL)
			goto allocfail;
		ssk->recv_request = 0;
		sdp_post_send(ssk, mb);
		post_count++;
	}

	if (tx_credits(ssk) <= SDP_MIN_TX_CREDITS &&
	    sdp_tx_ring_slots_left(ssk) && sk->so_snd.sb_sndptr &&
	    sdp_nagle_off(ssk, sk->so_snd.sb_sndptr)) {
		SDPSTATS_COUNTER_INC(send_miss_no_credits);
	}

	while (tx_credits(ssk) > SDP_MIN_TX_CREDITS &&
	    sdp_tx_ring_slots_left(ssk) && (mb = sk->so_snd.sb_sndptr) &&
	    sdp_nagle_off(ssk, mb)) {
		struct mbuf *n;

		SOCKBUF_LOCK(&sk->so_snd);
		sk->so_snd.sb_sndptr = mb->m_nextpkt;
		sk->so_snd.sb_mb = mb->m_nextpkt;
		mb->m_nextpkt = NULL;
		SB_EMPTY_FIXUP(&sk->so_snd);
		for (n = mb; n != NULL; n = n->m_next)
			sbfree(&sk->so_snd, n);
		SOCKBUF_UNLOCK(&sk->so_snd);
		sdp_post_send(ssk, mb);
		post_count++;
	}

	if (credit_update_needed(ssk) && ssk->state >= TCPS_ESTABLISHED &&
	    ssk->state < TCPS_FIN_WAIT_2) {
		mb = sdp_alloc_mb_data(ssk->socket, wait);
		if (mb == NULL)
			goto allocfail;
		sdp_post_send(ssk, mb);

		SDPSTATS_COUNTER_INC(post_send_credits);
		post_count++;
	}

	/* send DisConn if needed
	 * Do not send DisConn if there is only 1 credit. Compliance with CA4-82
	 * If one credit is available, an implementation shall only send SDP
	 * messages that provide additional credits and also do not contain ULP
	 * payload. */
	if ((ssk->flags & SDP_NEEDFIN) && !sk->so_snd.sb_sndptr &&
	    tx_credits(ssk) > 1) {
		mb = sdp_alloc_mb_disconnect(sk, wait);
		if (mb == NULL)
			goto allocfail;
		ssk->flags &= ~SDP_NEEDFIN;
		sdp_post_send(ssk, mb);
		post_count++;
	}
	low = (sdp_tx_ring_slots_left(ssk) <= SDP_MIN_TX_CREDITS);
	if (post_count || low) {
		if (low)
			sdp_arm_tx_cq(ssk);
		if (sdp_xmit_poll(ssk, low))
			goto again;
	}
	return;

allocfail:
	ssk->nagle_last_unacked = -1;
	callout_reset(&ssk->nagle_timer, 1, sdp_nagle_timeout, ssk);
	return;
}