static struct mbuf * sdp_send_completion(struct sdp_sock *ssk, int mseq) { struct ib_device *dev; struct sdp_buf *tx_req; struct mbuf *mb = NULL; struct sdp_tx_ring *tx_ring = &ssk->tx_ring; if (unlikely(mseq != ring_tail(*tx_ring))) { printk(KERN_WARNING "Bogus send completion id %d tail %d\n", mseq, ring_tail(*tx_ring)); goto out; } dev = ssk->ib_device; tx_req = &tx_ring->buffer[mseq & (SDP_TX_SIZE - 1)]; mb = tx_req->mb; sdp_cleanup_sdp_buf(ssk, tx_req, DMA_TO_DEVICE); #ifdef SDP_ZCOPY /* TODO: AIO and real zcopy code; add their context support here */ if (BZCOPY_STATE(mb)) BZCOPY_STATE(mb)->busy--; #endif atomic_inc(&tx_ring->tail); out: return mb; }
static struct sk_buff *sdp_send_completion(struct sdp_sock *ssk, int mseq) { struct ib_device *dev; struct sdp_buf *tx_req; struct sk_buff *skb = NULL; struct sdp_tx_ring *tx_ring = &ssk->tx_ring; if (unlikely(mseq != ring_tail(*tx_ring))) { printk(KERN_WARNING "Bogus send completion id %d tail %d\n", mseq, ring_tail(*tx_ring)); goto out; } dev = ssk->ib_device; tx_req = &tx_ring->buffer[mseq & (SDP_TX_SIZE - 1)]; skb = tx_req->skb; if (!skb) goto skip; /* This slot was used by RDMA WR */ sdp_cleanup_sdp_buf(ssk, tx_req, skb->len - skb->data_len, DMA_TO_DEVICE); tx_ring->una_seq += SDP_SKB_CB(skb)->end_seq; /* TODO: AIO and real zcopy code; add their context support here */ if (BZCOPY_STATE(skb)) BZCOPY_STATE(skb)->busy--; skip: atomic_inc(&tx_ring->tail); out: return skb; }
void sdp_tx_ring_destroy(struct sdp_sock *ssk) { sdp_dbg(ssk->socket, "tx ring destroy\n"); SDP_WLOCK(ssk); callout_stop(&ssk->tx_ring.timer); callout_stop(&ssk->nagle_timer); SDP_WUNLOCK(ssk); callout_drain(&ssk->tx_ring.timer); callout_drain(&ssk->nagle_timer); if (ssk->tx_ring.buffer) { sdp_tx_ring_purge(ssk); kfree(ssk->tx_ring.buffer); ssk->tx_ring.buffer = NULL; } if (ssk->tx_ring.cq) { if (ib_destroy_cq(ssk->tx_ring.cq)) { sdp_warn(ssk->socket, "destroy cq(%p) failed\n", ssk->tx_ring.cq); } else { ssk->tx_ring.cq = NULL; } } WARN_ON(ring_head(ssk->tx_ring) != ring_tail(ssk->tx_ring)); }
void sdp_tx_ring_destroy(struct sdp_sock *ssk) { del_timer_sync(&ssk->tx_ring.timer); if (ssk->nagle_timer.function) del_timer_sync(&ssk->nagle_timer); if (ssk->tx_ring.buffer) { sdp_tx_ring_purge(ssk); kfree(ssk->tx_ring.buffer); ssk->tx_ring.buffer = NULL; } if (ssk->tx_ring.cq) { if (ib_destroy_cq(ssk->tx_ring.cq)) { sdp_warn(sk_ssk(ssk), "destroy cq(%p) failed\n", ssk->tx_ring.cq); } else { ssk->tx_ring.cq = NULL; } } tasklet_kill(&ssk->tx_ring.tasklet); /* tx_cq is destroyed, so no more tx_irq, so no one will schedule this * tasklet. */ SDP_WARN_ON(ring_head(ssk->tx_ring) != ring_tail(ssk->tx_ring)); }
void sdp_rx_ring_destroy(struct sdp_sock *ssk) { cancel_work_sync(&ssk->rx_comp_work); rx_ring_destroy_lock(&ssk->rx_ring); if (ssk->rx_ring.buffer) { sdp_rx_ring_purge(ssk); kfree(ssk->rx_ring.buffer); ssk->rx_ring.buffer = NULL; } if (ssk->rx_ring.cq) { if (ib_destroy_cq(ssk->rx_ring.cq)) { sdp_warn(ssk->socket, "destroy cq(%p) failed\n", ssk->rx_ring.cq); } else { ssk->rx_ring.cq = NULL; } } WARN_ON(ring_head(ssk->rx_ring) != ring_tail(ssk->rx_ring)); }
static void sdp_handle_resize_request(struct sdp_sock *ssk, struct sdp_chrecvbuf *buf) { if (sdp_resize_buffers(ssk, ntohl(buf->size)) == 0) ssk->recv_request_head = ring_head(ssk->rx_ring) + 1; else ssk->recv_request_head = ring_tail(ssk->rx_ring); ssk->recv_request = 1; }
static void sdp_poll_tx_timeout(unsigned long data) { struct sdp_sock *ssk = (struct sdp_sock *)data; struct sock *sk = sk_ssk(ssk); u32 inflight, wc_processed; sdp_prf1(sk_ssk(ssk), NULL, "TX timeout: inflight=%d, head=%d tail=%d", (u32) tx_ring_posted(ssk), ring_head(ssk->tx_ring), ring_tail(ssk->tx_ring)); /* Only process if the socket is not in use */ bh_lock_sock(sk); if (sock_owned_by_user(sk)) { sdp_prf(sk_ssk(ssk), NULL, "TX comp: socket is busy"); if (sdp_tx_handler_select(ssk) && sk->sk_state != TCP_CLOSE && likely(ssk->qp_active)) { sdp_prf1(sk, NULL, "schedule a timer"); mod_timer(&ssk->tx_ring.timer, jiffies + SDP_TX_POLL_TIMEOUT); } SDPSTATS_COUNTER_INC(tx_poll_busy); goto out; } if (unlikely(!ssk->qp || sk->sk_state == TCP_CLOSE)) { SDPSTATS_COUNTER_INC(tx_poll_no_op); goto out; } wc_processed = sdp_process_tx_cq(ssk); if (!wc_processed) SDPSTATS_COUNTER_INC(tx_poll_miss); else { sdp_post_sends(ssk, GFP_ATOMIC); SDPSTATS_COUNTER_INC(tx_poll_hit); } inflight = (u32) tx_ring_posted(ssk); sdp_prf1(sk_ssk(ssk), NULL, "finished tx proccessing. inflight = %d", tx_ring_posted(ssk)); /* If there are still packets in flight and the timer has not already * been scheduled by the Tx routine then schedule it here to guarantee * completion processing of these packets */ if (inflight && likely(ssk->qp_active)) mod_timer(&ssk->tx_ring.timer, jiffies + SDP_TX_POLL_TIMEOUT); out: if (ssk->tx_ring.rdma_inflight && ssk->tx_ring.rdma_inflight->busy) { sdp_prf1(sk, NULL, "RDMA is inflight - arming irq"); sdp_arm_tx_cq(sk); } bh_unlock_sock(sk); }
static void sdp_tx_ring_purge(struct sdp_sock *ssk) { while (ring_posted(ssk->tx_ring)) { struct sk_buff *skb; skb = sdp_send_completion(ssk, ring_tail(ssk->tx_ring)); if (!skb) break; sk_wmem_free_skb(sk_ssk(ssk), skb); } }
static void sdp_tx_ring_purge(struct sdp_sock *ssk) { while (tx_ring_posted(ssk)) { struct mbuf *mb; mb = sdp_send_completion(ssk, ring_tail(ssk->tx_ring)); if (!mb) break; m_freem(mb); } }
static struct mbuf * sdp_recv_completion(struct sdp_sock *ssk, int id) { struct sdp_buf *rx_req; struct ib_device *dev; struct mbuf *mb; if (unlikely(id != ring_tail(ssk->rx_ring))) { printk(KERN_WARNING "Bogus recv completion id %d tail %d\n", id, ring_tail(ssk->rx_ring)); return NULL; } dev = ssk->ib_device; rx_req = &ssk->rx_ring.buffer[id & (SDP_RX_SIZE - 1)]; mb = rx_req->mb; sdp_cleanup_sdp_buf(ssk, rx_req, DMA_FROM_DEVICE); atomic_inc(&ssk->rx_ring.tail); atomic_dec(&ssk->remote_credits); return mb; }
static void sdp_poll_tx(struct sdp_sock *ssk) { struct socket *sk = ssk->socket; u32 inflight, wc_processed; sdp_prf1(ssk->socket, NULL, "TX timeout: inflight=%d, head=%d tail=%d", (u32) tx_ring_posted(ssk), ring_head(ssk->tx_ring), ring_tail(ssk->tx_ring)); if (unlikely(ssk->state == TCPS_CLOSED)) { sdp_warn(sk, "Socket is closed\n"); goto out; } wc_processed = sdp_process_tx_cq(ssk); if (!wc_processed) SDPSTATS_COUNTER_INC(tx_poll_miss); else SDPSTATS_COUNTER_INC(tx_poll_hit); inflight = (u32) tx_ring_posted(ssk); sdp_prf1(ssk->socket, NULL, "finished tx proccessing. inflight = %d", inflight); /* If there are still packets in flight and the timer has not already * been scheduled by the Tx routine then schedule it here to guarantee * completion processing of these packets */ if (inflight) callout_reset(&ssk->tx_ring.timer, SDP_TX_POLL_TIMEOUT, sdp_poll_tx_timeout, ssk); out: #ifdef SDP_ZCOPY if (ssk->tx_ring.rdma_inflight && ssk->tx_ring.rdma_inflight->busy) { sdp_prf1(sk, NULL, "RDMA is inflight - arming irq"); sdp_arm_tx_cq(ssk); } #endif return; }
int sdp_post_sends(struct sdp_sock *ssk, gfp_t gfp) { /* TODO: nonagle? */ struct sk_buff *skb; int post_count = 0; struct sock *sk = sk_ssk(ssk); if (unlikely(!ssk->id)) { if (sk->sk_send_head) { sdp_dbg(sk, "Send on socket without cmid ECONNRESET\n"); /* TODO: flush send queue? */ sdp_reset(sk); } return -ECONNRESET; } again: if (sdp_tx_ring_slots_left(ssk) < SDP_TX_SIZE / 2) sdp_xmit_poll(ssk, 1); /* Run out of credits, check if got a credit update */ if (unlikely(tx_credits(ssk) <= SDP_MIN_TX_CREDITS)) { sdp_poll_rx_cq(ssk); if (unlikely(sdp_should_rearm(sk) || !posts_handler(ssk))) sdp_arm_rx_cq(sk); } if (unlikely((ssk->sa_post_rdma_rd_compl || ssk->sa_post_sendsm) && tx_credits(ssk) < SDP_MIN_TX_CREDITS)) { sdp_dbg_data(sk, "Run out of credits, can't abort SrcAvail. " "RdmaRdCompl: %d SendSm: %d\n", ssk->sa_post_rdma_rd_compl, ssk->sa_post_sendsm); } if (ssk->sa_post_rdma_rd_compl && tx_credits(ssk) >= SDP_MIN_TX_CREDITS) { int unreported = ssk->sa_post_rdma_rd_compl; skb = sdp_alloc_skb_rdmardcompl(sk, unreported, gfp); if (!skb) goto no_mem; sdp_post_send(ssk, skb); post_count++; ssk->sa_post_rdma_rd_compl = 0; } if (ssk->sa_post_sendsm && tx_credits(ssk) >= SDP_MIN_TX_CREDITS) { skb = sdp_alloc_skb_sendsm(sk, gfp); if (unlikely(!skb)) goto no_mem; sdp_post_send(ssk, skb); ssk->sa_post_sendsm = 0; post_count++; } if (ssk->recv_request && ring_tail(ssk->rx_ring) >= SDP_MIN_TX_CREDITS && tx_credits(ssk) >= SDP_MIN_TX_CREDITS && sdp_tx_ring_slots_left(ssk)) { skb = sdp_alloc_skb_chrcvbuf_ack(sk, ssk->recv_frags * PAGE_SIZE, gfp); if (!skb) goto no_mem; ssk->recv_request = 0; sdp_post_send(ssk, skb); post_count++; } if (tx_credits(ssk) <= SDP_MIN_TX_CREDITS && sdp_tx_ring_slots_left(ssk) && sk->sk_send_head && sdp_nagle_off(ssk, sk->sk_send_head)) { SDPSTATS_COUNTER_INC(send_miss_no_credits); } while (tx_credits(ssk) > SDP_MIN_TX_CREDITS && sdp_tx_ring_slots_left(ssk) && (skb = sk->sk_send_head) && sdp_nagle_off(ssk, skb)) { update_send_head(sk, skb); __skb_dequeue(&sk->sk_write_queue); sdp_post_send(ssk, skb); post_count++; } if (credit_update_needed(ssk) && likely((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1))) { skb = sdp_alloc_skb_data(sk, 0, gfp); if (!skb) goto no_mem; sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); sdp_post_send(ssk, skb); SDPSTATS_COUNTER_INC(post_send_credits); post_count++; } /* send DisConn if needed * Do not send DisConn if there is only 1 credit. Compliance with CA4-82 * If one credit is available, an implementation shall only send SDP * messages that provide additional credits and also do not contain ULP * payload. */ if (unlikely(ssk->sdp_disconnect) && !sk->sk_send_head && tx_credits(ssk) >= SDP_MIN_TX_CREDITS) { skb = sdp_alloc_skb_disconnect(sk, gfp); if (!skb) goto no_mem; ssk->sdp_disconnect = 0; sdp_post_send(ssk, skb); post_count++; } if (!sdp_tx_ring_slots_left(ssk) || post_count) { if (sdp_xmit_poll(ssk, 1)) goto again; } no_mem: return post_count; }
void sdp_post_sends(struct sdp_sock *ssk, int wait) { struct mbuf *mb; int post_count = 0; struct socket *sk; int low; sk = ssk->socket; if (unlikely(!ssk->id)) { if (sk->so_snd.sb_sndptr) { sdp_dbg(ssk->socket, "Send on socket without cmid ECONNRESET.\n"); sdp_notify(ssk, ECONNRESET); } return; } again: if (sdp_tx_ring_slots_left(ssk) < SDP_TX_SIZE / 2) sdp_xmit_poll(ssk, 1); if (ssk->recv_request && ring_tail(ssk->rx_ring) >= ssk->recv_request_head && tx_credits(ssk) >= SDP_MIN_TX_CREDITS && sdp_tx_ring_slots_left(ssk)) { mb = sdp_alloc_mb_chrcvbuf_ack(sk, ssk->recv_bytes - SDP_HEAD_SIZE, wait); if (mb == NULL) goto allocfail; ssk->recv_request = 0; sdp_post_send(ssk, mb); post_count++; } if (tx_credits(ssk) <= SDP_MIN_TX_CREDITS && sdp_tx_ring_slots_left(ssk) && sk->so_snd.sb_sndptr && sdp_nagle_off(ssk, sk->so_snd.sb_sndptr)) { SDPSTATS_COUNTER_INC(send_miss_no_credits); } while (tx_credits(ssk) > SDP_MIN_TX_CREDITS && sdp_tx_ring_slots_left(ssk) && (mb = sk->so_snd.sb_sndptr) && sdp_nagle_off(ssk, mb)) { struct mbuf *n; SOCKBUF_LOCK(&sk->so_snd); sk->so_snd.sb_sndptr = mb->m_nextpkt; sk->so_snd.sb_mb = mb->m_nextpkt; mb->m_nextpkt = NULL; SB_EMPTY_FIXUP(&sk->so_snd); for (n = mb; n != NULL; n = n->m_next) sbfree(&sk->so_snd, n); SOCKBUF_UNLOCK(&sk->so_snd); sdp_post_send(ssk, mb); post_count++; } if (credit_update_needed(ssk) && ssk->state >= TCPS_ESTABLISHED && ssk->state < TCPS_FIN_WAIT_2) { mb = sdp_alloc_mb_data(ssk->socket, wait); if (mb == NULL) goto allocfail; sdp_post_send(ssk, mb); SDPSTATS_COUNTER_INC(post_send_credits); post_count++; } /* send DisConn if needed * Do not send DisConn if there is only 1 credit. Compliance with CA4-82 * If one credit is available, an implementation shall only send SDP * messages that provide additional credits and also do not contain ULP * payload. */ if ((ssk->flags & SDP_NEEDFIN) && !sk->so_snd.sb_sndptr && tx_credits(ssk) > 1) { mb = sdp_alloc_mb_disconnect(sk, wait); if (mb == NULL) goto allocfail; ssk->flags &= ~SDP_NEEDFIN; sdp_post_send(ssk, mb); post_count++; } low = (sdp_tx_ring_slots_left(ssk) <= SDP_MIN_TX_CREDITS); if (post_count || low) { if (low) sdp_arm_tx_cq(ssk); if (sdp_xmit_poll(ssk, low)) goto again; } return; allocfail: ssk->nagle_last_unacked = -1; callout_reset(&ssk->nagle_timer, 1, sdp_nagle_timeout, ssk); return; }