static inline int sdp_nagle_off(struct sdp_sock *ssk, struct sk_buff *skb) { struct sdp_bsdh *h = (struct sdp_bsdh *)skb_transport_header(skb); int send_now = BZCOPY_STATE(skb) || unlikely(h->mid != SDP_MID_DATA) || (ssk->nonagle & TCP_NAGLE_OFF) || !ssk->nagle_last_unacked || skb->next != (struct sk_buff *)&sk_ssk(ssk)->sk_write_queue || skb->len + sizeof(struct sdp_bsdh) >= ssk->xmit_size_goal || (SDP_SKB_CB(skb)->flags & TCPHDR_PSH) || (SDP_SKB_CB(skb)->flags & TCPHDR_URG); if (send_now) { unsigned long mseq = ring_head(ssk->tx_ring); ssk->nagle_last_unacked = mseq; } else { if (!timer_pending(&ssk->nagle_timer) && ssk->qp_active) { mod_timer(&ssk->nagle_timer, jiffies + SDP_NAGLE_TIMEOUT); sdp_dbg_data(sk_ssk(ssk), "Starting nagle timer\n"); } } return send_now; }
static struct sk_buff *sdp_send_completion(struct sdp_sock *ssk, int mseq) { struct ib_device *dev; struct sdp_buf *tx_req; struct sk_buff *skb = NULL; struct sdp_tx_ring *tx_ring = &ssk->tx_ring; if (unlikely(mseq != ring_tail(*tx_ring))) { printk(KERN_WARNING "Bogus send completion id %d tail %d\n", mseq, ring_tail(*tx_ring)); goto out; } dev = ssk->ib_device; tx_req = &tx_ring->buffer[mseq & (SDP_TX_SIZE - 1)]; skb = tx_req->skb; if (!skb) goto skip; /* This slot was used by RDMA WR */ sdp_cleanup_sdp_buf(ssk, tx_req, skb->len - skb->data_len, DMA_TO_DEVICE); tx_ring->una_seq += SDP_SKB_CB(skb)->end_seq; /* TODO: AIO and real zcopy code; add their context support here */ if (BZCOPY_STATE(skb)) BZCOPY_STATE(skb)->busy--; skip: atomic_inc(&tx_ring->tail); out: return skb; }
static inline struct mbuf * sdp_sock_queue_rcv_mb(struct socket *sk, struct mbuf *mb) { struct sdp_sock *ssk = sdp_sk(sk); struct sdp_bsdh *h; h = mtod(mb, struct sdp_bsdh *); #ifdef SDP_ZCOPY SDP_SKB_CB(mb)->seq = rcv_nxt(ssk); if (h->mid == SDP_MID_SRCAVAIL) { struct sdp_srcah *srcah = (struct sdp_srcah *)(h+1); struct rx_srcavail_state *rx_sa; ssk->srcavail_cancel_mseq = 0; ssk->rx_sa = rx_sa = RX_SRCAVAIL_STATE(mb) = kzalloc( sizeof(struct rx_srcavail_state), M_NOWAIT); rx_sa->mseq = ntohl(h->mseq); rx_sa->used = 0; rx_sa->len = mb_len = ntohl(srcah->len); rx_sa->rkey = ntohl(srcah->rkey); rx_sa->vaddr = be64_to_cpu(srcah->vaddr); rx_sa->flags = 0; if (ssk->tx_sa) { sdp_dbg_data(ssk->socket, "got RX SrcAvail while waiting " "for TX SrcAvail. waking up TX SrcAvail" "to be aborted\n"); wake_up(sk->sk_sleep); } atomic_add(mb->len, &ssk->rcv_nxt); sdp_dbg_data(sk, "queueing SrcAvail. mb_len = %d vaddr = %lld\n", mb_len, rx_sa->vaddr); } else #endif { atomic_add(mb->m_pkthdr.len, &ssk->rcv_nxt); } m_adj(mb, SDP_HEAD_SIZE); SOCKBUF_LOCK(&sk->so_rcv); if (unlikely(h->flags & SDP_OOB_PRES)) sdp_urg(ssk, mb); sbappend_locked(&sk->so_rcv, mb); sorwakeup_locked(sk); return mb; }
void sdp_post_send(struct sdp_sock *ssk, struct sk_buff *skb) { struct sdp_buf *tx_req; struct sdp_bsdh *h = (struct sdp_bsdh *)skb_transport_header(skb); unsigned long mseq = ring_head(ssk->tx_ring); int i, rc, frags; u64 addr; struct ib_device *dev; struct ib_send_wr *bad_wr; struct ib_sge ibsge[SDP_MAX_SEND_SGES]; struct ib_sge *sge = ibsge; struct ib_send_wr tx_wr = { NULL }; u32 send_flags = IB_SEND_SIGNALED; SDPSTATS_COUNTER_MID_INC(post_send, h->mid); SDPSTATS_HIST(send_size, skb->len); if (!ssk->qp_active) goto err; ssk->tx_packets++; if (h->mid != SDP_MID_SRCAVAIL && h->mid != SDP_MID_DATA && h->mid != SDP_MID_SRCAVAIL_CANCEL) { struct sock *sk = sk_ssk(ssk); sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); } if (unlikely(h->mid == SDP_MID_SRCAVAIL)) { struct tx_srcavail_state *tx_sa = TX_SRCAVAIL_STATE(skb); if (ssk->tx_sa != tx_sa) { sdp_dbg_data(sk_ssk(ssk), "SrcAvail cancelled " "before being sent!\n"); SDP_WARN_ON(1); sk_wmem_free_skb(sk_ssk(ssk), skb); return; } TX_SRCAVAIL_STATE(skb)->mseq = mseq; } if (unlikely(SDP_SKB_CB(skb)->flags & TCPHDR_URG)) h->flags = SDP_OOB_PRES | SDP_OOB_PEND; else h->flags = 0; h->bufs = htons(rx_ring_posted(ssk)); h->len = htonl(skb->len); h->mseq = htonl(mseq); h->mseq_ack = htonl(mseq_ack(ssk)); sdp_prf(sk_ssk(ssk), skb, "TX: %s bufs: %d mseq:%ld ack:%d c: %d", mid2str(h->mid), rx_ring_posted(ssk), mseq, ntohl(h->mseq_ack), tx_credits(ssk)); SDP_DUMP_PACKET(sk_ssk(ssk), "TX", skb, h); tx_req = &ssk->tx_ring.buffer[mseq & (SDP_TX_SIZE - 1)]; tx_req->skb = skb; dev = ssk->ib_device; if (skb->len <= ssk->inline_thresh && !skb_shinfo(skb)->nr_frags) { SDPSTATS_COUNTER_INC(inline_sends); sge->addr = (u64) skb->data; sge->length = skb->len; sge->lkey = 0; frags = 0; tx_req->mapping[0] = 0; /* Nothing to be cleaned up by sdp_cleanup_sdp_buf() */ send_flags |= IB_SEND_INLINE; } else { addr = ib_dma_map_single(dev, skb->data, skb->len - skb->data_len, DMA_TO_DEVICE); tx_req->mapping[0] = addr; /* TODO: proper error handling */ BUG_ON(ib_dma_mapping_error(dev, addr)); sge->addr = addr; sge->length = skb->len - skb->data_len; sge->lkey = ssk->sdp_dev->mr->lkey; frags = skb_shinfo(skb)->nr_frags; for (i = 0; i < frags; ++i) { ++sge; addr = ib_dma_map_page(dev, skb_shinfo(skb)->frags[i].page.p, skb_shinfo(skb)->frags[i].page_offset, skb_shinfo(skb)->frags[i].size, DMA_TO_DEVICE); BUG_ON(ib_dma_mapping_error(dev, addr)); tx_req->mapping[i + 1] = addr; sge->addr = addr; sge->length = skb_shinfo(skb)->frags[i].size; sge->lkey = ssk->sdp_dev->mr->lkey; } } tx_wr.next = NULL; tx_wr.wr_id = ring_head(ssk->tx_ring) | SDP_OP_SEND; tx_wr.sg_list = ibsge; tx_wr.num_sge = frags + 1; tx_wr.opcode = IB_WR_SEND; tx_wr.send_flags = send_flags; if (unlikely(SDP_SKB_CB(skb)->flags & TCPHDR_URG)) tx_wr.send_flags |= IB_SEND_SOLICITED; rc = ib_post_send(ssk->qp, &tx_wr, &bad_wr); if (unlikely(rc)) { sdp_dbg(sk_ssk(ssk), "ib_post_send failed with status %d.\n", rc); sdp_cleanup_sdp_buf(ssk, tx_req, skb->len - skb->data_len, DMA_TO_DEVICE); sdp_set_error(sk_ssk(ssk), -ECONNRESET); goto err; } atomic_inc(&ssk->tx_ring.head); atomic_dec(&ssk->tx_ring.credits); atomic_set(&ssk->remote_credits, rx_ring_posted(ssk)); return; err: sk_wmem_free_skb(sk_ssk(ssk), skb); }