Beispiel #1
0
static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
	struct sk_buff_head *list = &sk->sk_receive_queue;

	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
		return -ENOMEM;

	if (!sk_rmem_schedule(sk, skb, skb->truesize))
		return -ENOBUFS;

	skb->dev = NULL;

	skb_orphan(skb);
	skb->sk = sk;
	skb->destructor = kcm_rfree;
	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
	sk_mem_charge(sk, skb->truesize);

	skb_queue_tail(list, skb);

	if (!sock_flag(sk, SOCK_DEAD))
		sk->sk_data_ready(sk);

	return 0;
}
Beispiel #2
0
static int alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
		    int *sg_num_elem, unsigned int *sg_size,
		    int first_coalesce)
{
	struct page_frag *pfrag;
	unsigned int size = *sg_size;
	int num_elem = *sg_num_elem, use = 0, rc = 0;
	struct scatterlist *sge;
	unsigned int orig_offset;

	len -= size;
	pfrag = sk_page_frag(sk);

	while (len > 0) {
		if (!sk_page_frag_refill(sk, pfrag)) {
			rc = -ENOMEM;
			goto out;
		}

		use = min_t(int, len, pfrag->size - pfrag->offset);

		if (!sk_wmem_schedule(sk, use)) {
			rc = -ENOMEM;
			goto out;
		}

		sk_mem_charge(sk, use);
		size += use;
		orig_offset = pfrag->offset;
		pfrag->offset += use;

		sge = sg + num_elem - 1;
		if (num_elem > first_coalesce && sg_page(sg) == pfrag->page &&
		    sg->offset + sg->length == orig_offset) {
			sg->length += use;
		} else {
			sge++;
			sg_unmark_end(sge);
			sg_set_page(sge, pfrag->page, use, orig_offset);
			get_page(pfrag->page);
			++num_elem;
			if (num_elem == MAX_SKB_FRAGS) {
				rc = -ENOSPC;
				break;
			}
		}

		len -= use;
	}
	goto out;

out:
	*sg_size = size;
	*sg_num_elem = num_elem;
	return rc;
}
Beispiel #3
0
static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
			      int length)
{
	struct tls_context *tls_ctx = tls_get_ctx(sk);
	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
	struct page *pages[MAX_SKB_FRAGS];

	size_t offset;
	ssize_t copied, use;
	int i = 0;
	unsigned int size = ctx->sg_plaintext_size;
	int num_elem = ctx->sg_plaintext_num_elem;
	int rc = 0;
	int maxpages;

	while (length > 0) {
		i = 0;
		maxpages = ARRAY_SIZE(ctx->sg_plaintext_data) - num_elem;
		if (maxpages == 0) {
			rc = -EFAULT;
			goto out;
		}
		copied = iov_iter_get_pages(from, pages,
					    length,
					    maxpages, &offset);
		if (copied <= 0) {
			rc = -EFAULT;
			goto out;
		}

		iov_iter_advance(from, copied);

		length -= copied;
		size += copied;
		while (copied) {
			use = min_t(int, copied, PAGE_SIZE - offset);

			sg_set_page(&ctx->sg_plaintext_data[num_elem],
				    pages[i], use, offset);
			sg_unmark_end(&ctx->sg_plaintext_data[num_elem]);
			sk_mem_charge(sk, use);

			offset = 0;
			copied -= use;

			++i;
			++num_elem;
		}
	}

out:
	ctx->sg_plaintext_size = size;
	ctx->sg_plaintext_num_elem = num_elem;
	return rc;
}
Beispiel #4
0
static void
ss_skb_entail(struct sock *sk, struct sk_buff *skb)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);

	skb->csum    = 0;
	tcb->seq     = tcb->end_seq = tp->write_seq;
	tcb->tcp_flags = TCPHDR_ACK;
	tcb->sacked  = 0;
	skb_header_release(skb);
	tcp_add_write_queue_tail(sk, skb);
	sk->sk_wmem_queued += skb->truesize;
	sk_mem_charge(sk, skb->truesize);
	if (tp->nonagle & TCP_NAGLE_PUSH)
		tp->nonagle &= ~TCP_NAGLE_PUSH;
}
Beispiel #5
0
int tls_sw_sendpage(struct sock *sk, struct page *page,
		    int offset, size_t size, int flags)
{
	struct tls_context *tls_ctx = tls_get_ctx(sk);
	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
	int ret = 0;
	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
	bool eor;
	size_t orig_size = size;
	unsigned char record_type = TLS_RECORD_TYPE_DATA;
	struct scatterlist *sg;
	bool full_record;
	int record_room;

	if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
		      MSG_SENDPAGE_NOTLAST))
		return -ENOTSUPP;

	/* No MSG_EOR from splice, only look at MSG_MORE */
	eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST));

	lock_sock(sk);

	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);

	if (tls_complete_pending_work(sk, tls_ctx, flags, &timeo))
		goto sendpage_end;

	/* Call the sk_stream functions to manage the sndbuf mem. */
	while (size > 0) {
		size_t copy, required_size;

		if (sk->sk_err) {
			ret = sk->sk_err;
			goto sendpage_end;
		}

		full_record = false;
		record_room = TLS_MAX_PAYLOAD_SIZE - ctx->sg_plaintext_size;
		copy = size;
		if (copy >= record_room) {
			copy = record_room;
			full_record = true;
		}
		required_size = ctx->sg_plaintext_size + copy +
			      tls_ctx->overhead_size;

		if (!sk_stream_memory_free(sk))
			goto wait_for_sndbuf;
alloc_payload:
		ret = alloc_encrypted_sg(sk, required_size);
		if (ret) {
			if (ret != -ENOSPC)
				goto wait_for_memory;

			/* Adjust copy according to the amount that was
			 * actually allocated. The difference is due
			 * to max sg elements limit
			 */
			copy -= required_size - ctx->sg_plaintext_size;
			full_record = true;
		}

		get_page(page);
		sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem;
		sg_set_page(sg, page, copy, offset);
		ctx->sg_plaintext_num_elem++;

		sk_mem_charge(sk, copy);
		offset += copy;
		size -= copy;
		ctx->sg_plaintext_size += copy;
		tls_ctx->pending_open_record_frags = ctx->sg_plaintext_num_elem;

		if (full_record || eor ||
		    ctx->sg_plaintext_num_elem ==
		    ARRAY_SIZE(ctx->sg_plaintext_data)) {
push_record:
			ret = tls_push_record(sk, flags, record_type);
			if (ret) {
				if (ret == -ENOMEM)
					goto wait_for_memory;

				goto sendpage_end;
			}
		}
		continue;
wait_for_sndbuf:
		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
		ret = sk_stream_wait_memory(sk, &timeo);
		if (ret) {
			trim_both_sgl(sk, ctx->sg_plaintext_size);
			goto sendpage_end;
		}

		if (tls_is_pending_closed_record(tls_ctx))
			goto push_record;

		goto alloc_payload;
	}

sendpage_end:
	if (orig_size > size)
		ret = orig_size - size;
	else
		ret = sk_stream_error(sk, flags, ret);

	release_sock(sk);
	return ret;
}
Beispiel #6
0
void sdp_post_send(struct sdp_sock *ssk, struct sk_buff *skb)
{
	struct sdp_buf *tx_req;
	struct sdp_bsdh *h = (struct sdp_bsdh *)skb_transport_header(skb);
	unsigned long mseq = ring_head(ssk->tx_ring);
	int i, rc, frags;
	u64 addr;
	struct ib_device *dev;
	struct ib_send_wr *bad_wr;

	struct ib_sge ibsge[SDP_MAX_SEND_SGES];
	struct ib_sge *sge = ibsge;
	struct ib_send_wr tx_wr = { NULL };
	u32 send_flags = IB_SEND_SIGNALED;

	SDPSTATS_COUNTER_MID_INC(post_send, h->mid);
	SDPSTATS_HIST(send_size, skb->len);

	if (!ssk->qp_active)
		goto err;

	ssk->tx_packets++;

	if (h->mid != SDP_MID_SRCAVAIL &&
			h->mid != SDP_MID_DATA &&
			h->mid != SDP_MID_SRCAVAIL_CANCEL) {
		struct sock *sk = sk_ssk(ssk);

		sk->sk_wmem_queued += skb->truesize;
		sk_mem_charge(sk, skb->truesize);
	}

	if (unlikely(h->mid == SDP_MID_SRCAVAIL)) {
		struct tx_srcavail_state *tx_sa = TX_SRCAVAIL_STATE(skb);
		if (ssk->tx_sa != tx_sa) {
			sdp_dbg_data(sk_ssk(ssk), "SrcAvail cancelled "
					"before being sent!\n");
			SDP_WARN_ON(1);
			sk_wmem_free_skb(sk_ssk(ssk), skb);
			return;
		}
		TX_SRCAVAIL_STATE(skb)->mseq = mseq;
	}

	if (unlikely(SDP_SKB_CB(skb)->flags & TCPHDR_URG))
		h->flags = SDP_OOB_PRES | SDP_OOB_PEND;
	else
		h->flags = 0;

	h->bufs = htons(rx_ring_posted(ssk));
	h->len = htonl(skb->len);
	h->mseq = htonl(mseq);
	h->mseq_ack = htonl(mseq_ack(ssk));

	sdp_prf(sk_ssk(ssk), skb, "TX: %s bufs: %d mseq:%ld ack:%d c: %d",
			mid2str(h->mid), rx_ring_posted(ssk), mseq,
			ntohl(h->mseq_ack), tx_credits(ssk));

	SDP_DUMP_PACKET(sk_ssk(ssk), "TX", skb, h);

	tx_req = &ssk->tx_ring.buffer[mseq & (SDP_TX_SIZE - 1)];
	tx_req->skb = skb;
	dev = ssk->ib_device;

	if (skb->len <= ssk->inline_thresh && !skb_shinfo(skb)->nr_frags) {
		SDPSTATS_COUNTER_INC(inline_sends);
		sge->addr = (u64) skb->data;
		sge->length = skb->len;
		sge->lkey = 0;
		frags = 0;
		tx_req->mapping[0] = 0; /* Nothing to be cleaned up by sdp_cleanup_sdp_buf() */
		send_flags |= IB_SEND_INLINE;
	} else {
		addr = ib_dma_map_single(dev, skb->data, skb->len - skb->data_len,
				DMA_TO_DEVICE);
		tx_req->mapping[0] = addr;

		/* TODO: proper error handling */
		BUG_ON(ib_dma_mapping_error(dev, addr));

		sge->addr = addr;
		sge->length = skb->len - skb->data_len;
		sge->lkey = ssk->sdp_dev->mr->lkey;
		frags = skb_shinfo(skb)->nr_frags;
		for (i = 0; i < frags; ++i) {
			++sge;
			addr = ib_dma_map_page(dev,
					skb_shinfo(skb)->frags[i].page.p,
					skb_shinfo(skb)->frags[i].page_offset,
					skb_shinfo(skb)->frags[i].size,
					DMA_TO_DEVICE);
			BUG_ON(ib_dma_mapping_error(dev, addr));
			tx_req->mapping[i + 1] = addr;
			sge->addr = addr;
			sge->length = skb_shinfo(skb)->frags[i].size;
			sge->lkey = ssk->sdp_dev->mr->lkey;
		}
	}

	tx_wr.next = NULL;
	tx_wr.wr_id = ring_head(ssk->tx_ring) | SDP_OP_SEND;
	tx_wr.sg_list = ibsge;
	tx_wr.num_sge = frags + 1;
	tx_wr.opcode = IB_WR_SEND;
	tx_wr.send_flags = send_flags;
	if (unlikely(SDP_SKB_CB(skb)->flags & TCPHDR_URG))
		tx_wr.send_flags |= IB_SEND_SOLICITED;

	rc = ib_post_send(ssk->qp, &tx_wr, &bad_wr);
	if (unlikely(rc)) {
		sdp_dbg(sk_ssk(ssk),
				"ib_post_send failed with status %d.\n", rc);

		sdp_cleanup_sdp_buf(ssk, tx_req, skb->len - skb->data_len, DMA_TO_DEVICE);

		sdp_set_error(sk_ssk(ssk), -ECONNRESET);

		goto err;
	}

	atomic_inc(&ssk->tx_ring.head);
	atomic_dec(&ssk->tx_ring.credits);
	atomic_set(&ssk->remote_credits, rx_ring_posted(ssk));

	return;

err:
	sk_wmem_free_skb(sk_ssk(ssk), skb);
}
Beispiel #7
0
int sdp_post_sends(struct sdp_sock *ssk, gfp_t gfp)
{
	/* TODO: nonagle? */
	struct sk_buff *skb;
	int post_count = 0;
	struct sock *sk = sk_ssk(ssk);

	if (unlikely(!ssk->id)) {
		if (sk->sk_send_head) {
			sdp_dbg(sk, "Send on socket without cmid ECONNRESET\n");
			/* TODO: flush send queue? */
			sdp_reset(sk);
		}
		return -ECONNRESET;
	}
again:
	if (sdp_tx_ring_slots_left(ssk) < SDP_TX_SIZE / 2)
		sdp_xmit_poll(ssk, 1);

	/* Run out of credits, check if got a credit update */
	if (unlikely(tx_credits(ssk) <= SDP_MIN_TX_CREDITS)) {
		sdp_poll_rx_cq(ssk);

		if (unlikely(sdp_should_rearm(sk) || !posts_handler(ssk)))
			sdp_arm_rx_cq(sk);
	}

	if (unlikely((ssk->sa_post_rdma_rd_compl || ssk->sa_post_sendsm) &&
			tx_credits(ssk) < SDP_MIN_TX_CREDITS)) {
		sdp_dbg_data(sk, "Run out of credits, can't abort SrcAvail. "
			"RdmaRdCompl: %d SendSm: %d\n",
			ssk->sa_post_rdma_rd_compl, ssk->sa_post_sendsm);
	}

	if (ssk->sa_post_rdma_rd_compl && tx_credits(ssk) >= SDP_MIN_TX_CREDITS) {
		int unreported = ssk->sa_post_rdma_rd_compl;

		skb = sdp_alloc_skb_rdmardcompl(sk, unreported, gfp);
		if (!skb)
			goto no_mem;
		sdp_post_send(ssk, skb);
		post_count++;
		ssk->sa_post_rdma_rd_compl = 0;
	}

	if (ssk->sa_post_sendsm && tx_credits(ssk) >= SDP_MIN_TX_CREDITS) {
		skb = sdp_alloc_skb_sendsm(sk, gfp);
		if (unlikely(!skb))
			goto no_mem;
		sdp_post_send(ssk, skb);
		ssk->sa_post_sendsm = 0;
		post_count++;
	}

	if (ssk->recv_request &&
	    ring_tail(ssk->rx_ring) >= SDP_MIN_TX_CREDITS &&
	    tx_credits(ssk) >= SDP_MIN_TX_CREDITS &&
	    sdp_tx_ring_slots_left(ssk)) {
		skb = sdp_alloc_skb_chrcvbuf_ack(sk,
				ssk->recv_frags * PAGE_SIZE, gfp);
		if (!skb)
			goto no_mem;
		ssk->recv_request = 0;
		sdp_post_send(ssk, skb);
		post_count++;
	}

	if (tx_credits(ssk) <= SDP_MIN_TX_CREDITS &&
	       sdp_tx_ring_slots_left(ssk) &&
	       sk->sk_send_head &&
		sdp_nagle_off(ssk, sk->sk_send_head)) {
		SDPSTATS_COUNTER_INC(send_miss_no_credits);
	}

	while (tx_credits(ssk) > SDP_MIN_TX_CREDITS &&
	       sdp_tx_ring_slots_left(ssk) &&
	       (skb = sk->sk_send_head) &&
		sdp_nagle_off(ssk, skb)) {
		update_send_head(sk, skb);
		__skb_dequeue(&sk->sk_write_queue);

		sdp_post_send(ssk, skb);

		post_count++;
	}

	if (credit_update_needed(ssk) &&
	    likely((1 << sk->sk_state) &
		    (TCPF_ESTABLISHED | TCPF_FIN_WAIT1))) {

		skb = sdp_alloc_skb_data(sk, 0, gfp);
		if (!skb)
			goto no_mem;

		sk->sk_wmem_queued += skb->truesize;
		sk_mem_charge(sk, skb->truesize);

		sdp_post_send(ssk, skb);
		SDPSTATS_COUNTER_INC(post_send_credits);
		post_count++;
	}

	/* send DisConn if needed
	 * Do not send DisConn if there is only 1 credit. Compliance with CA4-82
	 * If one credit is available, an implementation shall only send SDP
	 * messages that provide additional credits and also do not contain ULP
	 * payload. */
	if (unlikely(ssk->sdp_disconnect) &&
			!sk->sk_send_head &&
			tx_credits(ssk) >= SDP_MIN_TX_CREDITS) {
		skb = sdp_alloc_skb_disconnect(sk, gfp);
		if (!skb)
			goto no_mem;
		ssk->sdp_disconnect = 0;
		sdp_post_send(ssk, skb);
		post_count++;
	}

	if (!sdp_tx_ring_slots_left(ssk) || post_count) {
		if (sdp_xmit_poll(ssk, 1))
			goto again;
	}

no_mem:
	return post_count;
}