static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct sk_buff_head *list = &sk->sk_receive_queue; if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) return -ENOMEM; if (!sk_rmem_schedule(sk, skb, skb->truesize)) return -ENOBUFS; skb->dev = NULL; skb_orphan(skb); skb->sk = sk; skb->destructor = kcm_rfree; atomic_add(skb->truesize, &sk->sk_rmem_alloc); sk_mem_charge(sk, skb->truesize); skb_queue_tail(list, skb); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk); return 0; }
static int alloc_sg(struct sock *sk, int len, struct scatterlist *sg, int *sg_num_elem, unsigned int *sg_size, int first_coalesce) { struct page_frag *pfrag; unsigned int size = *sg_size; int num_elem = *sg_num_elem, use = 0, rc = 0; struct scatterlist *sge; unsigned int orig_offset; len -= size; pfrag = sk_page_frag(sk); while (len > 0) { if (!sk_page_frag_refill(sk, pfrag)) { rc = -ENOMEM; goto out; } use = min_t(int, len, pfrag->size - pfrag->offset); if (!sk_wmem_schedule(sk, use)) { rc = -ENOMEM; goto out; } sk_mem_charge(sk, use); size += use; orig_offset = pfrag->offset; pfrag->offset += use; sge = sg + num_elem - 1; if (num_elem > first_coalesce && sg_page(sg) == pfrag->page && sg->offset + sg->length == orig_offset) { sg->length += use; } else { sge++; sg_unmark_end(sge); sg_set_page(sge, pfrag->page, use, orig_offset); get_page(pfrag->page); ++num_elem; if (num_elem == MAX_SKB_FRAGS) { rc = -ENOSPC; break; } } len -= use; } goto out; out: *sg_size = size; *sg_num_elem = num_elem; return rc; }
static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from, int length) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); struct page *pages[MAX_SKB_FRAGS]; size_t offset; ssize_t copied, use; int i = 0; unsigned int size = ctx->sg_plaintext_size; int num_elem = ctx->sg_plaintext_num_elem; int rc = 0; int maxpages; while (length > 0) { i = 0; maxpages = ARRAY_SIZE(ctx->sg_plaintext_data) - num_elem; if (maxpages == 0) { rc = -EFAULT; goto out; } copied = iov_iter_get_pages(from, pages, length, maxpages, &offset); if (copied <= 0) { rc = -EFAULT; goto out; } iov_iter_advance(from, copied); length -= copied; size += copied; while (copied) { use = min_t(int, copied, PAGE_SIZE - offset); sg_set_page(&ctx->sg_plaintext_data[num_elem], pages[i], use, offset); sg_unmark_end(&ctx->sg_plaintext_data[num_elem]); sk_mem_charge(sk, use); offset = 0; copied -= use; ++i; ++num_elem; } } out: ctx->sg_plaintext_size = size; ctx->sg_plaintext_num_elem = num_elem; return rc; }
static void ss_skb_entail(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); skb->csum = 0; tcb->seq = tcb->end_seq = tp->write_seq; tcb->tcp_flags = TCPHDR_ACK; tcb->sacked = 0; skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); if (tp->nonagle & TCP_NAGLE_PUSH) tp->nonagle &= ~TCP_NAGLE_PUSH; }
int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); int ret = 0; long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); bool eor; size_t orig_size = size; unsigned char record_type = TLS_RECORD_TYPE_DATA; struct scatterlist *sg; bool full_record; int record_room; if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST)) return -ENOTSUPP; /* No MSG_EOR from splice, only look at MSG_MORE */ eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST)); lock_sock(sk); sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); if (tls_complete_pending_work(sk, tls_ctx, flags, &timeo)) goto sendpage_end; /* Call the sk_stream functions to manage the sndbuf mem. */ while (size > 0) { size_t copy, required_size; if (sk->sk_err) { ret = sk->sk_err; goto sendpage_end; } full_record = false; record_room = TLS_MAX_PAYLOAD_SIZE - ctx->sg_plaintext_size; copy = size; if (copy >= record_room) { copy = record_room; full_record = true; } required_size = ctx->sg_plaintext_size + copy + tls_ctx->overhead_size; if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; alloc_payload: ret = alloc_encrypted_sg(sk, required_size); if (ret) { if (ret != -ENOSPC) goto wait_for_memory; /* Adjust copy according to the amount that was * actually allocated. The difference is due * to max sg elements limit */ copy -= required_size - ctx->sg_plaintext_size; full_record = true; } get_page(page); sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem; sg_set_page(sg, page, copy, offset); ctx->sg_plaintext_num_elem++; sk_mem_charge(sk, copy); offset += copy; size -= copy; ctx->sg_plaintext_size += copy; tls_ctx->pending_open_record_frags = ctx->sg_plaintext_num_elem; if (full_record || eor || ctx->sg_plaintext_num_elem == ARRAY_SIZE(ctx->sg_plaintext_data)) { push_record: ret = tls_push_record(sk, flags, record_type); if (ret) { if (ret == -ENOMEM) goto wait_for_memory; goto sendpage_end; } } continue; wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: ret = sk_stream_wait_memory(sk, &timeo); if (ret) { trim_both_sgl(sk, ctx->sg_plaintext_size); goto sendpage_end; } if (tls_is_pending_closed_record(tls_ctx)) goto push_record; goto alloc_payload; } sendpage_end: if (orig_size > size) ret = orig_size - size; else ret = sk_stream_error(sk, flags, ret); release_sock(sk); return ret; }
void sdp_post_send(struct sdp_sock *ssk, struct sk_buff *skb) { struct sdp_buf *tx_req; struct sdp_bsdh *h = (struct sdp_bsdh *)skb_transport_header(skb); unsigned long mseq = ring_head(ssk->tx_ring); int i, rc, frags; u64 addr; struct ib_device *dev; struct ib_send_wr *bad_wr; struct ib_sge ibsge[SDP_MAX_SEND_SGES]; struct ib_sge *sge = ibsge; struct ib_send_wr tx_wr = { NULL }; u32 send_flags = IB_SEND_SIGNALED; SDPSTATS_COUNTER_MID_INC(post_send, h->mid); SDPSTATS_HIST(send_size, skb->len); if (!ssk->qp_active) goto err; ssk->tx_packets++; if (h->mid != SDP_MID_SRCAVAIL && h->mid != SDP_MID_DATA && h->mid != SDP_MID_SRCAVAIL_CANCEL) { struct sock *sk = sk_ssk(ssk); sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); } if (unlikely(h->mid == SDP_MID_SRCAVAIL)) { struct tx_srcavail_state *tx_sa = TX_SRCAVAIL_STATE(skb); if (ssk->tx_sa != tx_sa) { sdp_dbg_data(sk_ssk(ssk), "SrcAvail cancelled " "before being sent!\n"); SDP_WARN_ON(1); sk_wmem_free_skb(sk_ssk(ssk), skb); return; } TX_SRCAVAIL_STATE(skb)->mseq = mseq; } if (unlikely(SDP_SKB_CB(skb)->flags & TCPHDR_URG)) h->flags = SDP_OOB_PRES | SDP_OOB_PEND; else h->flags = 0; h->bufs = htons(rx_ring_posted(ssk)); h->len = htonl(skb->len); h->mseq = htonl(mseq); h->mseq_ack = htonl(mseq_ack(ssk)); sdp_prf(sk_ssk(ssk), skb, "TX: %s bufs: %d mseq:%ld ack:%d c: %d", mid2str(h->mid), rx_ring_posted(ssk), mseq, ntohl(h->mseq_ack), tx_credits(ssk)); SDP_DUMP_PACKET(sk_ssk(ssk), "TX", skb, h); tx_req = &ssk->tx_ring.buffer[mseq & (SDP_TX_SIZE - 1)]; tx_req->skb = skb; dev = ssk->ib_device; if (skb->len <= ssk->inline_thresh && !skb_shinfo(skb)->nr_frags) { SDPSTATS_COUNTER_INC(inline_sends); sge->addr = (u64) skb->data; sge->length = skb->len; sge->lkey = 0; frags = 0; tx_req->mapping[0] = 0; /* Nothing to be cleaned up by sdp_cleanup_sdp_buf() */ send_flags |= IB_SEND_INLINE; } else { addr = ib_dma_map_single(dev, skb->data, skb->len - skb->data_len, DMA_TO_DEVICE); tx_req->mapping[0] = addr; /* TODO: proper error handling */ BUG_ON(ib_dma_mapping_error(dev, addr)); sge->addr = addr; sge->length = skb->len - skb->data_len; sge->lkey = ssk->sdp_dev->mr->lkey; frags = skb_shinfo(skb)->nr_frags; for (i = 0; i < frags; ++i) { ++sge; addr = ib_dma_map_page(dev, skb_shinfo(skb)->frags[i].page.p, skb_shinfo(skb)->frags[i].page_offset, skb_shinfo(skb)->frags[i].size, DMA_TO_DEVICE); BUG_ON(ib_dma_mapping_error(dev, addr)); tx_req->mapping[i + 1] = addr; sge->addr = addr; sge->length = skb_shinfo(skb)->frags[i].size; sge->lkey = ssk->sdp_dev->mr->lkey; } } tx_wr.next = NULL; tx_wr.wr_id = ring_head(ssk->tx_ring) | SDP_OP_SEND; tx_wr.sg_list = ibsge; tx_wr.num_sge = frags + 1; tx_wr.opcode = IB_WR_SEND; tx_wr.send_flags = send_flags; if (unlikely(SDP_SKB_CB(skb)->flags & TCPHDR_URG)) tx_wr.send_flags |= IB_SEND_SOLICITED; rc = ib_post_send(ssk->qp, &tx_wr, &bad_wr); if (unlikely(rc)) { sdp_dbg(sk_ssk(ssk), "ib_post_send failed with status %d.\n", rc); sdp_cleanup_sdp_buf(ssk, tx_req, skb->len - skb->data_len, DMA_TO_DEVICE); sdp_set_error(sk_ssk(ssk), -ECONNRESET); goto err; } atomic_inc(&ssk->tx_ring.head); atomic_dec(&ssk->tx_ring.credits); atomic_set(&ssk->remote_credits, rx_ring_posted(ssk)); return; err: sk_wmem_free_skb(sk_ssk(ssk), skb); }
int sdp_post_sends(struct sdp_sock *ssk, gfp_t gfp) { /* TODO: nonagle? */ struct sk_buff *skb; int post_count = 0; struct sock *sk = sk_ssk(ssk); if (unlikely(!ssk->id)) { if (sk->sk_send_head) { sdp_dbg(sk, "Send on socket without cmid ECONNRESET\n"); /* TODO: flush send queue? */ sdp_reset(sk); } return -ECONNRESET; } again: if (sdp_tx_ring_slots_left(ssk) < SDP_TX_SIZE / 2) sdp_xmit_poll(ssk, 1); /* Run out of credits, check if got a credit update */ if (unlikely(tx_credits(ssk) <= SDP_MIN_TX_CREDITS)) { sdp_poll_rx_cq(ssk); if (unlikely(sdp_should_rearm(sk) || !posts_handler(ssk))) sdp_arm_rx_cq(sk); } if (unlikely((ssk->sa_post_rdma_rd_compl || ssk->sa_post_sendsm) && tx_credits(ssk) < SDP_MIN_TX_CREDITS)) { sdp_dbg_data(sk, "Run out of credits, can't abort SrcAvail. " "RdmaRdCompl: %d SendSm: %d\n", ssk->sa_post_rdma_rd_compl, ssk->sa_post_sendsm); } if (ssk->sa_post_rdma_rd_compl && tx_credits(ssk) >= SDP_MIN_TX_CREDITS) { int unreported = ssk->sa_post_rdma_rd_compl; skb = sdp_alloc_skb_rdmardcompl(sk, unreported, gfp); if (!skb) goto no_mem; sdp_post_send(ssk, skb); post_count++; ssk->sa_post_rdma_rd_compl = 0; } if (ssk->sa_post_sendsm && tx_credits(ssk) >= SDP_MIN_TX_CREDITS) { skb = sdp_alloc_skb_sendsm(sk, gfp); if (unlikely(!skb)) goto no_mem; sdp_post_send(ssk, skb); ssk->sa_post_sendsm = 0; post_count++; } if (ssk->recv_request && ring_tail(ssk->rx_ring) >= SDP_MIN_TX_CREDITS && tx_credits(ssk) >= SDP_MIN_TX_CREDITS && sdp_tx_ring_slots_left(ssk)) { skb = sdp_alloc_skb_chrcvbuf_ack(sk, ssk->recv_frags * PAGE_SIZE, gfp); if (!skb) goto no_mem; ssk->recv_request = 0; sdp_post_send(ssk, skb); post_count++; } if (tx_credits(ssk) <= SDP_MIN_TX_CREDITS && sdp_tx_ring_slots_left(ssk) && sk->sk_send_head && sdp_nagle_off(ssk, sk->sk_send_head)) { SDPSTATS_COUNTER_INC(send_miss_no_credits); } while (tx_credits(ssk) > SDP_MIN_TX_CREDITS && sdp_tx_ring_slots_left(ssk) && (skb = sk->sk_send_head) && sdp_nagle_off(ssk, skb)) { update_send_head(sk, skb); __skb_dequeue(&sk->sk_write_queue); sdp_post_send(ssk, skb); post_count++; } if (credit_update_needed(ssk) && likely((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1))) { skb = sdp_alloc_skb_data(sk, 0, gfp); if (!skb) goto no_mem; sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); sdp_post_send(ssk, skb); SDPSTATS_COUNTER_INC(post_send_credits); post_count++; } /* send DisConn if needed * Do not send DisConn if there is only 1 credit. Compliance with CA4-82 * If one credit is available, an implementation shall only send SDP * messages that provide additional credits and also do not contain ULP * payload. */ if (unlikely(ssk->sdp_disconnect) && !sk->sk_send_head && tx_credits(ssk) >= SDP_MIN_TX_CREDITS) { skb = sdp_alloc_skb_disconnect(sk, gfp); if (!skb) goto no_mem; ssk->sdp_disconnect = 0; sdp_post_send(ssk, skb); post_count++; } if (!sdp_tx_ring_slots_left(ssk) || post_count) { if (sdp_xmit_poll(ssk, 1)) goto again; } no_mem: return post_count; }