void sdp_post_keepalive(struct sdp_sock *ssk) { int rc; struct ib_send_wr wr, *bad_wr; sdp_dbg(ssk->socket, "%s\n", __func__); memset(&wr, 0, sizeof(wr)); wr.next = NULL; wr.wr_id = 0; wr.sg_list = NULL; wr.num_sge = 0; wr.opcode = IB_WR_RDMA_WRITE; rc = ib_post_send(ssk->qp, &wr, &bad_wr); if (rc) { sdp_dbg(ssk->socket, "ib_post_keepalive failed with status %d.\n", rc); sdp_notify(ssk, ECONNRESET); } sdp_cnt(sdp_keepalive_probes_sent); }
/* Like tcp_fin - called when SDP_MID_DISCONNECT is received */ static void sdp_handle_disconn(struct sdp_sock *ssk) { sdp_dbg(ssk->socket, "%s\n", __func__); SDP_WLOCK_ASSERT(ssk); if (TCPS_HAVERCVDFIN(ssk->state) == 0) socantrcvmore(ssk->socket); switch (ssk->state) { case TCPS_SYN_RECEIVED: case TCPS_ESTABLISHED: ssk->state = TCPS_CLOSE_WAIT; break; case TCPS_FIN_WAIT_1: /* Received a reply FIN - start Infiniband tear down */ sdp_dbg(ssk->socket, "%s: Starting Infiniband tear down sending DREQ\n", __func__); sdp_cancel_dreq_wait_timeout(ssk); ssk->qp_active = 0; if (ssk->id) { struct rdma_cm_id *id; id = ssk->id; SDP_WUNLOCK(ssk); rdma_disconnect(id); SDP_WLOCK(ssk); } else { sdp_warn(ssk->socket, "%s: ssk->id is NULL\n", __func__); return; } break; case TCPS_TIME_WAIT: /* This is a mutual close situation and we've got the DREQ from the peer before the SDP_MID_DISCONNECT */ break; case TCPS_CLOSED: /* FIN arrived after IB teardown started - do nothing */ sdp_dbg(ssk->socket, "%s: fin in state %s\n", __func__, sdp_state_str(ssk->state)); return; default: sdp_warn(ssk->socket, "%s: FIN in unexpected state. state=%d\n", __func__, ssk->state); break; } }
void sdp_tx_ring_destroy(struct sdp_sock *ssk) { sdp_dbg(ssk->socket, "tx ring destroy\n"); SDP_WLOCK(ssk); callout_stop(&ssk->tx_ring.timer); callout_stop(&ssk->nagle_timer); SDP_WUNLOCK(ssk); callout_drain(&ssk->tx_ring.timer); callout_drain(&ssk->nagle_timer); if (ssk->tx_ring.buffer) { sdp_tx_ring_purge(ssk); kfree(ssk->tx_ring.buffer); ssk->tx_ring.buffer = NULL; } if (ssk->tx_ring.cq) { if (ib_destroy_cq(ssk->tx_ring.cq)) { sdp_warn(ssk->socket, "destroy cq(%p) failed\n", ssk->tx_ring.cq); } else { ssk->tx_ring.cq = NULL; } } WARN_ON(ring_head(ssk->tx_ring) != ring_tail(ssk->tx_ring)); }
static int sdp_process_tx_cq(struct sdp_sock *ssk) { struct ib_wc ibwc[SDP_NUM_WC]; int n, i; int wc_processed = 0; SDP_WLOCK_ASSERT(ssk); if (!ssk->tx_ring.cq) { sdp_dbg(ssk->socket, "tx irq on destroyed tx_cq\n"); return 0; } do { n = ib_poll_cq(ssk->tx_ring.cq, SDP_NUM_WC, ibwc); for (i = 0; i < n; ++i) { sdp_process_tx_wc(ssk, ibwc + i); wc_processed++; } } while (n == SDP_NUM_WC); if (wc_processed) { sdp_post_sends(ssk, M_DONTWAIT); sdp_prf1(sk, NULL, "Waking sendmsg. inflight=%d", (u32) tx_ring_posted(ssk)); sowwakeup(ssk->socket); } return wc_processed; }
int sdp_process_rx(struct sdp_sock *ssk) { int wc_processed = 0; int credits_before; if (!rx_ring_trylock(&ssk->rx_ring)) { sdp_dbg(ssk->socket, "ring destroyed. not polling it\n"); return 0; } credits_before = tx_credits(ssk); wc_processed = sdp_poll_rx_cq(ssk); sdp_prf(ssk->socket, NULL, "processed %d", wc_processed); if (wc_processed) { sdp_prf(ssk->socket, NULL, "credits: %d -> %d", credits_before, tx_credits(ssk)); queue_work(rx_comp_wq, &ssk->rx_comp_work); } sdp_arm_rx_cq(ssk); rx_ring_unlock(&ssk->rx_ring); return (wc_processed); }
static int sdp_handle_send_comp(struct sdp_sock *ssk, struct ib_wc *wc) { struct mbuf *mb = NULL; struct sdp_bsdh *h; if (unlikely(wc->status)) { if (wc->status != IB_WC_WR_FLUSH_ERR) { sdp_prf(ssk->socket, mb, "Send completion with error. " "Status %d", wc->status); sdp_dbg_data(ssk->socket, "Send completion with error. " "Status %d\n", wc->status); sdp_notify(ssk, ECONNRESET); } } mb = sdp_send_completion(ssk, wc->wr_id); if (unlikely(!mb)) return -1; h = mtod(mb, struct sdp_bsdh *); sdp_prf1(ssk->socket, mb, "tx completion. mseq:%d", ntohl(h->mseq)); sdp_dbg(ssk->socket, "tx completion. %p %d mseq:%d", mb, mb->m_pkthdr.len, ntohl(h->mseq)); m_freem(mb); return 0; }
static void sdp_qp_event_handler(struct ib_event *event, void *data) { if (event->event == IB_EVENT_PATH_MIG) { sdp_dbg(NULL, "Path migration event\n"); return; } sdp_warn(NULL, "unexpected invocation: event: %d, data=%p\n", event->event, data); }
static inline void sdp_process_tx_wc(struct sdp_sock *ssk, struct ib_wc *wc) { if (likely(wc->wr_id & SDP_OP_SEND)) { sdp_handle_send_comp(ssk, wc); return; } #ifdef SDP_ZCOPY if (wc->wr_id & SDP_OP_RDMA) { /* TODO: handle failed RDMA read cqe */ sdp_dbg_data(ssk->socket, "TX comp: RDMA read. status: %d\n", wc->status); sdp_prf1(sk, NULL, "TX comp: RDMA read"); if (!ssk->tx_ring.rdma_inflight) { sdp_warn(ssk->socket, "ERROR: unexpected RDMA read\n"); return; } if (!ssk->tx_ring.rdma_inflight->busy) { sdp_warn(ssk->socket, "ERROR: too many RDMA read completions\n"); return; } /* Only last RDMA read WR is signalled. Order is guaranteed - * therefore if Last RDMA read WR is completed - all other * have, too */ ssk->tx_ring.rdma_inflight->busy = 0; sowwakeup(ssk->socket); sdp_dbg_data(ssk->socket, "woke up sleepers\n"); return; } #endif /* Keepalive probe sent cleanup */ sdp_cnt(sdp_keepalive_probes_sent); if (likely(!wc->status)) return; sdp_dbg(ssk->socket, " %s consumes KEEPALIVE status %d\n", __func__, wc->status); if (wc->status == IB_WC_WR_FLUSH_ERR) return; sdp_notify(ssk, ECONNRESET); }
/* called only from irq */ static struct mbuf * sdp_process_rx_wc(struct sdp_sock *ssk, struct ib_wc *wc) { struct mbuf *mb; struct sdp_bsdh *h; struct socket *sk = ssk->socket; int mseq; mb = sdp_recv_completion(ssk, wc->wr_id); if (unlikely(!mb)) return NULL; if (unlikely(wc->status)) { if (ssk->qp_active && sk) { sdp_dbg(sk, "Recv completion with error. " "Status %d, vendor: %d\n", wc->status, wc->vendor_err); sdp_abort(sk); ssk->qp_active = 0; } m_freem(mb); return NULL; } sdp_dbg_data(sk, "Recv completion. ID %d Length %d\n", (int)wc->wr_id, wc->byte_len); if (unlikely(wc->byte_len < sizeof(struct sdp_bsdh))) { sdp_warn(sk, "SDP BUG! byte_len %d < %zd\n", wc->byte_len, sizeof(struct sdp_bsdh)); m_freem(mb); return NULL; } /* Use m_adj to trim the tail of data we didn't use. */ m_adj(mb, -(mb->m_pkthdr.len - wc->byte_len)); h = mtod(mb, struct sdp_bsdh *); SDP_DUMP_PACKET(ssk->socket, "RX", mb, h); ssk->rx_packets++; ssk->rx_bytes += mb->m_pkthdr.len; mseq = ntohl(h->mseq); atomic_set(&ssk->mseq_ack, mseq); if (mseq != (int)wc->wr_id) sdp_warn(sk, "SDP BUG! mseq %d != wrid %d\n", mseq, (int)wc->wr_id); return mb; }
static void sdp_rx_irq(struct ib_cq *cq, void *cq_context) { struct socket *sk = cq_context; struct sdp_sock *ssk = sdp_sk(sk); if (cq != ssk->rx_ring.cq) { sdp_dbg(sk, "cq = %p, ssk->cq = %p\n", cq, ssk->rx_ring.cq); return; } SDPSTATS_COUNTER_INC(rx_int_count); sdp_prf(sk, NULL, "rx irq"); sdp_process_rx(ssk); }
void sdp_do_posts(struct sdp_sock *ssk) { struct socket *sk = ssk->socket; int xmit_poll_force; struct mbuf *mb; SDP_WLOCK_ASSERT(ssk); if (!ssk->qp_active) { sdp_dbg(sk, "QP is deactivated\n"); return; } while ((mb = ssk->rx_ctl_q)) { ssk->rx_ctl_q = mb->m_nextpkt; mb->m_nextpkt = NULL; sdp_process_rx_ctl_mb(ssk, mb); } if (ssk->state == TCPS_TIME_WAIT) return; if (!ssk->rx_ring.cq || !ssk->tx_ring.cq) return; sdp_post_recvs(ssk); if (tx_ring_posted(ssk)) sdp_xmit_poll(ssk, 1); sdp_post_sends(ssk, M_NOWAIT); xmit_poll_force = tx_credits(ssk) < SDP_MIN_TX_CREDITS; if (credit_update_needed(ssk) || xmit_poll_force) { /* if has pending tx because run out of tx_credits - xmit it */ sdp_prf(sk, NULL, "Processing to free pending sends"); sdp_xmit_poll(ssk, xmit_poll_force); sdp_prf(sk, NULL, "Sending credit update"); sdp_post_sends(ssk, M_NOWAIT); } }
int sdp_tx_ring_create(struct sdp_sock *ssk, struct ib_device *device) { struct ib_cq *tx_cq; int rc = 0; sdp_dbg(ssk->socket, "tx ring create\n"); callout_init_rw(&ssk->tx_ring.timer, &ssk->lock, 0); callout_init_rw(&ssk->nagle_timer, &ssk->lock, 0); atomic_set(&ssk->tx_ring.head, 1); atomic_set(&ssk->tx_ring.tail, 1); ssk->tx_ring.buffer = kzalloc( sizeof *ssk->tx_ring.buffer * SDP_TX_SIZE, GFP_KERNEL); if (!ssk->tx_ring.buffer) { rc = -ENOMEM; sdp_warn(ssk->socket, "Can't allocate TX Ring size %zd.\n", sizeof(*ssk->tx_ring.buffer) * SDP_TX_SIZE); goto out; } tx_cq = ib_create_cq(device, sdp_tx_irq, sdp_tx_cq_event_handler, ssk, SDP_TX_SIZE, IB_CQ_VECTOR_LEAST_ATTACHED); if (IS_ERR(tx_cq)) { rc = PTR_ERR(tx_cq); sdp_warn(ssk->socket, "Unable to allocate TX CQ: %d.\n", rc); goto err_cq; } ssk->tx_ring.cq = tx_cq; ssk->tx_ring.poll_cnt = 0; sdp_arm_tx_cq(ssk); return 0; err_cq: kfree(ssk->tx_ring.buffer); ssk->tx_ring.buffer = NULL; out: return rc; }
int sdp_rx_ring_create(struct sdp_sock *ssk, struct ib_device *device) { struct ib_cq *rx_cq; int rc = 0; sdp_dbg(ssk->socket, "rx ring created"); INIT_WORK(&ssk->rx_comp_work, sdp_rx_comp_work); atomic_set(&ssk->rx_ring.head, 1); atomic_set(&ssk->rx_ring.tail, 1); ssk->rx_ring.buffer = kmalloc( sizeof *ssk->rx_ring.buffer * SDP_RX_SIZE, GFP_KERNEL); if (!ssk->rx_ring.buffer) { sdp_warn(ssk->socket, "Unable to allocate RX Ring size %zd.\n", sizeof(*ssk->rx_ring.buffer) * SDP_RX_SIZE); return -ENOMEM; } rx_cq = ib_create_cq(device, sdp_rx_irq, sdp_rx_cq_event_handler, ssk->socket, SDP_RX_SIZE, IB_CQ_VECTOR_LEAST_ATTACHED); if (IS_ERR(rx_cq)) { rc = PTR_ERR(rx_cq); sdp_warn(ssk->socket, "Unable to allocate RX CQ: %d.\n", rc); goto err_cq; } sdp_sk(ssk->socket)->rx_ring.cq = rx_cq; sdp_arm_rx_cq(ssk); return 0; err_cq: kfree(ssk->rx_ring.buffer); ssk->rx_ring.buffer = NULL; return rc; }
static int sdp_process_tx_cq(struct sdp_sock *ssk) { struct ib_wc ibwc[SDP_NUM_WC]; int n, i; int wc_processed = 0; if (!ssk->tx_ring.cq) { sdp_dbg(sk_ssk(ssk), "tx irq on destroyed tx_cq\n"); return 0; } do { n = ib_poll_cq(ssk->tx_ring.cq, SDP_NUM_WC, ibwc); for (i = 0; i < n; ++i) { sdp_process_tx_wc(ssk, ibwc + i); wc_processed++; } } while (n == SDP_NUM_WC); if (wc_processed) { struct sock *sk = sk_ssk(ssk); sdp_prf1(sk, NULL, "Waking sendmsg. inflight=%d", (u32) tx_ring_posted(ssk)); sk_mem_reclaim(sk); sk_stream_write_space(sk_ssk(ssk)); if (sk->sk_write_pending && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) && tx_ring_posted(ssk)) { /* a write is pending and still no room in tx queue, * arm tx cq */ sdp_prf(sk_ssk(ssk), NULL, "pending tx - rearming"); sdp_arm_tx_cq(sk); } } return wc_processed; }
void sdp_post_send(struct sdp_sock *ssk, struct sk_buff *skb) { struct sdp_buf *tx_req; struct sdp_bsdh *h = (struct sdp_bsdh *)skb_transport_header(skb); unsigned long mseq = ring_head(ssk->tx_ring); int i, rc, frags; u64 addr; struct ib_device *dev; struct ib_send_wr *bad_wr; struct ib_sge ibsge[SDP_MAX_SEND_SGES]; struct ib_sge *sge = ibsge; struct ib_send_wr tx_wr = { NULL }; u32 send_flags = IB_SEND_SIGNALED; SDPSTATS_COUNTER_MID_INC(post_send, h->mid); SDPSTATS_HIST(send_size, skb->len); if (!ssk->qp_active) goto err; ssk->tx_packets++; if (h->mid != SDP_MID_SRCAVAIL && h->mid != SDP_MID_DATA && h->mid != SDP_MID_SRCAVAIL_CANCEL) { struct sock *sk = sk_ssk(ssk); sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); } if (unlikely(h->mid == SDP_MID_SRCAVAIL)) { struct tx_srcavail_state *tx_sa = TX_SRCAVAIL_STATE(skb); if (ssk->tx_sa != tx_sa) { sdp_dbg_data(sk_ssk(ssk), "SrcAvail cancelled " "before being sent!\n"); SDP_WARN_ON(1); sk_wmem_free_skb(sk_ssk(ssk), skb); return; } TX_SRCAVAIL_STATE(skb)->mseq = mseq; } if (unlikely(SDP_SKB_CB(skb)->flags & TCPHDR_URG)) h->flags = SDP_OOB_PRES | SDP_OOB_PEND; else h->flags = 0; h->bufs = htons(rx_ring_posted(ssk)); h->len = htonl(skb->len); h->mseq = htonl(mseq); h->mseq_ack = htonl(mseq_ack(ssk)); sdp_prf(sk_ssk(ssk), skb, "TX: %s bufs: %d mseq:%ld ack:%d c: %d", mid2str(h->mid), rx_ring_posted(ssk), mseq, ntohl(h->mseq_ack), tx_credits(ssk)); SDP_DUMP_PACKET(sk_ssk(ssk), "TX", skb, h); tx_req = &ssk->tx_ring.buffer[mseq & (SDP_TX_SIZE - 1)]; tx_req->skb = skb; dev = ssk->ib_device; if (skb->len <= ssk->inline_thresh && !skb_shinfo(skb)->nr_frags) { SDPSTATS_COUNTER_INC(inline_sends); sge->addr = (u64) skb->data; sge->length = skb->len; sge->lkey = 0; frags = 0; tx_req->mapping[0] = 0; /* Nothing to be cleaned up by sdp_cleanup_sdp_buf() */ send_flags |= IB_SEND_INLINE; } else { addr = ib_dma_map_single(dev, skb->data, skb->len - skb->data_len, DMA_TO_DEVICE); tx_req->mapping[0] = addr; /* TODO: proper error handling */ BUG_ON(ib_dma_mapping_error(dev, addr)); sge->addr = addr; sge->length = skb->len - skb->data_len; sge->lkey = ssk->sdp_dev->mr->lkey; frags = skb_shinfo(skb)->nr_frags; for (i = 0; i < frags; ++i) { ++sge; addr = ib_dma_map_page(dev, skb_shinfo(skb)->frags[i].page.p, skb_shinfo(skb)->frags[i].page_offset, skb_shinfo(skb)->frags[i].size, DMA_TO_DEVICE); BUG_ON(ib_dma_mapping_error(dev, addr)); tx_req->mapping[i + 1] = addr; sge->addr = addr; sge->length = skb_shinfo(skb)->frags[i].size; sge->lkey = ssk->sdp_dev->mr->lkey; } } tx_wr.next = NULL; tx_wr.wr_id = ring_head(ssk->tx_ring) | SDP_OP_SEND; tx_wr.sg_list = ibsge; tx_wr.num_sge = frags + 1; tx_wr.opcode = IB_WR_SEND; tx_wr.send_flags = send_flags; if (unlikely(SDP_SKB_CB(skb)->flags & TCPHDR_URG)) tx_wr.send_flags |= IB_SEND_SOLICITED; rc = ib_post_send(ssk->qp, &tx_wr, &bad_wr); if (unlikely(rc)) { sdp_dbg(sk_ssk(ssk), "ib_post_send failed with status %d.\n", rc); sdp_cleanup_sdp_buf(ssk, tx_req, skb->len - skb->data_len, DMA_TO_DEVICE); sdp_set_error(sk_ssk(ssk), -ECONNRESET); goto err; } atomic_inc(&ssk->tx_ring.head); atomic_dec(&ssk->tx_ring.credits); atomic_set(&ssk->remote_credits, rx_ring_posted(ssk)); return; err: sk_wmem_free_skb(sk_ssk(ssk), skb); }
int sdp_post_sends(struct sdp_sock *ssk, gfp_t gfp) { /* TODO: nonagle? */ struct sk_buff *skb; int post_count = 0; struct sock *sk = sk_ssk(ssk); if (unlikely(!ssk->id)) { if (sk->sk_send_head) { sdp_dbg(sk, "Send on socket without cmid ECONNRESET\n"); /* TODO: flush send queue? */ sdp_reset(sk); } return -ECONNRESET; } again: if (sdp_tx_ring_slots_left(ssk) < SDP_TX_SIZE / 2) sdp_xmit_poll(ssk, 1); /* Run out of credits, check if got a credit update */ if (unlikely(tx_credits(ssk) <= SDP_MIN_TX_CREDITS)) { sdp_poll_rx_cq(ssk); if (unlikely(sdp_should_rearm(sk) || !posts_handler(ssk))) sdp_arm_rx_cq(sk); } if (unlikely((ssk->sa_post_rdma_rd_compl || ssk->sa_post_sendsm) && tx_credits(ssk) < SDP_MIN_TX_CREDITS)) { sdp_dbg_data(sk, "Run out of credits, can't abort SrcAvail. " "RdmaRdCompl: %d SendSm: %d\n", ssk->sa_post_rdma_rd_compl, ssk->sa_post_sendsm); } if (ssk->sa_post_rdma_rd_compl && tx_credits(ssk) >= SDP_MIN_TX_CREDITS) { int unreported = ssk->sa_post_rdma_rd_compl; skb = sdp_alloc_skb_rdmardcompl(sk, unreported, gfp); if (!skb) goto no_mem; sdp_post_send(ssk, skb); post_count++; ssk->sa_post_rdma_rd_compl = 0; } if (ssk->sa_post_sendsm && tx_credits(ssk) >= SDP_MIN_TX_CREDITS) { skb = sdp_alloc_skb_sendsm(sk, gfp); if (unlikely(!skb)) goto no_mem; sdp_post_send(ssk, skb); ssk->sa_post_sendsm = 0; post_count++; } if (ssk->recv_request && ring_tail(ssk->rx_ring) >= SDP_MIN_TX_CREDITS && tx_credits(ssk) >= SDP_MIN_TX_CREDITS && sdp_tx_ring_slots_left(ssk)) { skb = sdp_alloc_skb_chrcvbuf_ack(sk, ssk->recv_frags * PAGE_SIZE, gfp); if (!skb) goto no_mem; ssk->recv_request = 0; sdp_post_send(ssk, skb); post_count++; } if (tx_credits(ssk) <= SDP_MIN_TX_CREDITS && sdp_tx_ring_slots_left(ssk) && sk->sk_send_head && sdp_nagle_off(ssk, sk->sk_send_head)) { SDPSTATS_COUNTER_INC(send_miss_no_credits); } while (tx_credits(ssk) > SDP_MIN_TX_CREDITS && sdp_tx_ring_slots_left(ssk) && (skb = sk->sk_send_head) && sdp_nagle_off(ssk, skb)) { update_send_head(sk, skb); __skb_dequeue(&sk->sk_write_queue); sdp_post_send(ssk, skb); post_count++; } if (credit_update_needed(ssk) && likely((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1))) { skb = sdp_alloc_skb_data(sk, 0, gfp); if (!skb) goto no_mem; sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); sdp_post_send(ssk, skb); SDPSTATS_COUNTER_INC(post_send_credits); post_count++; } /* send DisConn if needed * Do not send DisConn if there is only 1 credit. Compliance with CA4-82 * If one credit is available, an implementation shall only send SDP * messages that provide additional credits and also do not contain ULP * payload. */ if (unlikely(ssk->sdp_disconnect) && !sk->sk_send_head && tx_credits(ssk) >= SDP_MIN_TX_CREDITS) { skb = sdp_alloc_skb_disconnect(sk, gfp); if (!skb) goto no_mem; ssk->sdp_disconnect = 0; sdp_post_send(ssk, skb); post_count++; } if (!sdp_tx_ring_slots_left(ssk) || post_count) { if (sdp_xmit_poll(ssk, 1)) goto again; } no_mem: return post_count; }
void sdp_post_sends(struct sdp_sock *ssk, int wait) { struct mbuf *mb; int post_count = 0; struct socket *sk; int low; sk = ssk->socket; if (unlikely(!ssk->id)) { if (sk->so_snd.sb_sndptr) { sdp_dbg(ssk->socket, "Send on socket without cmid ECONNRESET.\n"); sdp_notify(ssk, ECONNRESET); } return; } again: if (sdp_tx_ring_slots_left(ssk) < SDP_TX_SIZE / 2) sdp_xmit_poll(ssk, 1); if (ssk->recv_request && ring_tail(ssk->rx_ring) >= ssk->recv_request_head && tx_credits(ssk) >= SDP_MIN_TX_CREDITS && sdp_tx_ring_slots_left(ssk)) { mb = sdp_alloc_mb_chrcvbuf_ack(sk, ssk->recv_bytes - SDP_HEAD_SIZE, wait); if (mb == NULL) goto allocfail; ssk->recv_request = 0; sdp_post_send(ssk, mb); post_count++; } if (tx_credits(ssk) <= SDP_MIN_TX_CREDITS && sdp_tx_ring_slots_left(ssk) && sk->so_snd.sb_sndptr && sdp_nagle_off(ssk, sk->so_snd.sb_sndptr)) { SDPSTATS_COUNTER_INC(send_miss_no_credits); } while (tx_credits(ssk) > SDP_MIN_TX_CREDITS && sdp_tx_ring_slots_left(ssk) && (mb = sk->so_snd.sb_sndptr) && sdp_nagle_off(ssk, mb)) { struct mbuf *n; SOCKBUF_LOCK(&sk->so_snd); sk->so_snd.sb_sndptr = mb->m_nextpkt; sk->so_snd.sb_mb = mb->m_nextpkt; mb->m_nextpkt = NULL; SB_EMPTY_FIXUP(&sk->so_snd); for (n = mb; n != NULL; n = n->m_next) sbfree(&sk->so_snd, n); SOCKBUF_UNLOCK(&sk->so_snd); sdp_post_send(ssk, mb); post_count++; } if (credit_update_needed(ssk) && ssk->state >= TCPS_ESTABLISHED && ssk->state < TCPS_FIN_WAIT_2) { mb = sdp_alloc_mb_data(ssk->socket, wait); if (mb == NULL) goto allocfail; sdp_post_send(ssk, mb); SDPSTATS_COUNTER_INC(post_send_credits); post_count++; } /* send DisConn if needed * Do not send DisConn if there is only 1 credit. Compliance with CA4-82 * If one credit is available, an implementation shall only send SDP * messages that provide additional credits and also do not contain ULP * payload. */ if ((ssk->flags & SDP_NEEDFIN) && !sk->so_snd.sb_sndptr && tx_credits(ssk) > 1) { mb = sdp_alloc_mb_disconnect(sk, wait); if (mb == NULL) goto allocfail; ssk->flags &= ~SDP_NEEDFIN; sdp_post_send(ssk, mb); post_count++; } low = (sdp_tx_ring_slots_left(ssk) <= SDP_MIN_TX_CREDITS); if (post_count || low) { if (low) sdp_arm_tx_cq(ssk); if (sdp_xmit_poll(ssk, low)) goto again; } return; allocfail: ssk->nagle_last_unacked = -1; callout_reset(&ssk->nagle_timer, 1, sdp_nagle_timeout, ssk); return; }
static int sdp_init_qp(struct sock *sk, struct rdma_cm_id *id) { struct ib_qp_init_attr qp_init_attr = { .event_handler = sdp_qp_event_handler, .cap.max_send_wr = SDP_TX_SIZE, .cap.max_recv_wr = sdp_rx_size, .cap.max_inline_data = sdp_inline_thresh, .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_RC, }; struct ib_device *device = id->device; int rc; sdp_dbg(sk, "%s\n", __func__); sdp_sk(sk)->max_sge = sdp_get_max_dev_sge(device); sdp_dbg(sk, "Max sges: %d\n", sdp_sk(sk)->max_sge); qp_init_attr.cap.max_send_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_SEND_SGES); sdp_dbg(sk, "Setting max send sge to: %d\n", qp_init_attr.cap.max_send_sge); qp_init_attr.cap.max_recv_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_RECV_SGES); sdp_dbg(sk, "Setting max recv sge to: %d\n", qp_init_attr.cap.max_recv_sge); sdp_sk(sk)->sdp_dev = ib_get_client_data(device, &sdp_client); if (!sdp_sk(sk)->sdp_dev) { sdp_warn(sk, "SDP not available on device %s\n", device->name); rc = -ENODEV; goto err_rx; } rc = sdp_rx_ring_create(sdp_sk(sk), device); if (rc) goto err_rx; rc = sdp_tx_ring_create(sdp_sk(sk), device); if (rc) goto err_tx; qp_init_attr.recv_cq = sdp_sk(sk)->rx_ring.cq; qp_init_attr.send_cq = sdp_sk(sk)->tx_ring.cq; rc = rdma_create_qp(id, sdp_sk(sk)->sdp_dev->pd, &qp_init_attr); if (rc) { sdp_warn(sk, "Unable to create QP: %d.\n", rc); goto err_qp; } sdp_sk(sk)->qp = id->qp; sdp_sk(sk)->ib_device = device; sdp_sk(sk)->qp_active = 1; sdp_sk(sk)->context.device = device; sdp_sk(sk)->inline_thresh = qp_init_attr.cap.max_inline_data; sdp_dbg(sk, "%s done\n", __func__); return 0; err_qp: sdp_tx_ring_destroy(sdp_sk(sk)); err_tx: sdp_rx_ring_destroy(sdp_sk(sk)); err_rx: return rc; } static int sdp_get_max_send_frags(u32 buf_size) { return MIN( /* +1 to conpensate on not aligned buffers */ (PAGE_ALIGN(buf_size) >> PAGE_SHIFT) + 1, SDP_MAX_SEND_SGES - 1); } static int sdp_connect_handler(struct sock *sk, struct rdma_cm_id *id, struct rdma_cm_event *event) { struct sockaddr_in *dst_addr; struct sock *child; const struct sdp_hh *h; int rc = 0; sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id); h = event->param.conn.private_data; SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); if (h->ipv_cap & HH_IPV_MASK & ~(HH_IPV4 | HH_IPV6)) { sdp_warn(sk, "Bad IPV field in SDP Hello header: 0x%x\n", h->ipv_cap & HH_IPV_MASK); return -EINVAL; } if (!h->max_adverts) return -EINVAL; #if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)) child = sk_clone(sk, GFP_KERNEL); #else child = sk_clone_lock(sk, GFP_KERNEL); #endif if (!child) return -ENOMEM; sdp_init_sock(child); dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; sdp_inet_dport(child) = dst_addr->sin_port; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (inet6_sk(sk)) { struct ipv6_pinfo *newnp; newnp = inet_sk(child)->pinet6 = sdp_inet6_sk_generic(child); memcpy(newnp, inet6_sk(sk), sizeof(struct ipv6_pinfo)); if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV4) { /* V6 mapped */ sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr; ipv6_addr_set(&child->sk_v6_daddr, 0, 0, htonl(0x0000FFFF), h->src_addr.ip4.addr); ipv6_addr_set(&child->sk_v6_rcv_saddr, 0, 0, htonl(0x0000FFFF), h->dst_addr.ip4.addr); ipv6_addr_copy(&child->sk_v6_rcv_saddr, &child->sk_v6_daddr); } else if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV6) { struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst_addr; struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *)&id->route.addr.src_addr; ipv6_addr_copy(&child->sk_v6_daddr, &dst_addr6->sin6_addr); ipv6_addr_copy(&child->sk_v6_rcv_saddr, &src_addr6->sin6_addr); ipv6_addr_copy(&newnp->saddr, &src_addr6->sin6_addr); } else { sdp_warn(child, "Bad IPV field: 0x%x\n", h->ipv_cap & HH_IPV_MASK); } sdp_inet_daddr(child) = sdp_inet_saddr(child) = sdp_inet_rcv_saddr(child) = LOOPBACK4_IPV6; } else #endif { sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr; } #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif __sock_put(child, SOCK_REF_CLONE); down_read(&device_removal_lock); rc = sdp_init_qp(child, id); if (rc) { bh_unlock_sock(child); up_read(&device_removal_lock); sdp_sk(child)->destructed_already = 1; #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_close(child); #endif sk_free(child); return rc; } sdp_sk(child)->max_bufs = ntohs(h->bsdh.bufs); atomic_set(&sdp_sk(child)->tx_ring.credits, sdp_sk(child)->max_bufs); sdp_sk(child)->min_bufs = tx_credits(sdp_sk(child)) / 4; sdp_sk(child)->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh); sdp_sk(child)->send_frags = sdp_get_max_send_frags(sdp_sk(child)->xmit_size_goal); sdp_init_buffers(sdp_sk(child), rcvbuf_initial_size); id->context = child; sdp_sk(child)->id = id; list_add_tail(&sdp_sk(child)->backlog_queue, &sdp_sk(sk)->backlog_queue); sdp_sk(child)->parent = sk; bh_unlock_sock(child); sdp_add_sock(sdp_sk(child)); up_read(&device_removal_lock); sdp_exch_state(child, TCPF_LISTEN | TCPF_CLOSE, TCP_SYN_RECV); /* child->sk_write_space(child); */ /* child->sk_data_ready(child, 0); */ sk->sk_data_ready(sk); return 0; } static int sdp_response_handler(struct sock *sk, struct rdma_cm_id *id, struct rdma_cm_event *event) { const struct sdp_hah *h; struct sockaddr_in *dst_addr; sdp_dbg(sk, "%s\n", __func__); sdp_exch_state(sk, TCPF_SYN_SENT, TCP_ESTABLISHED); sdp_set_default_moderation(sdp_sk(sk)); if (sock_flag(sk, SOCK_KEEPOPEN)) sdp_start_keepalive_timer(sk); if (sock_flag(sk, SOCK_DEAD)) return 0; h = event->param.conn.private_data; SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); sdp_sk(sk)->max_bufs = ntohs(h->bsdh.bufs); atomic_set(&sdp_sk(sk)->tx_ring.credits, sdp_sk(sk)->max_bufs); sdp_sk(sk)->min_bufs = tx_credits(sdp_sk(sk)) / 4; sdp_sk(sk)->xmit_size_goal = ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh); sdp_sk(sk)->send_frags = sdp_get_max_send_frags(sdp_sk(sk)->xmit_size_goal); sdp_sk(sk)->xmit_size_goal = MIN(sdp_sk(sk)->xmit_size_goal, sdp_sk(sk)->send_frags * PAGE_SIZE); sdp_sk(sk)->poll_cq = 1; sk->sk_state_change(sk); sk_wake_async(sk, 0, POLL_OUT); dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; sdp_inet_dport(sk) = dst_addr->sin_port; sdp_inet_daddr(sk) = dst_addr->sin_addr.s_addr; #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif return 0; } static int sdp_connected_handler(struct sock *sk) { struct sock *parent; sdp_dbg(sk, "%s\n", __func__); parent = sdp_sk(sk)->parent; BUG_ON(!parent); sdp_exch_state(sk, TCPF_SYN_RECV, TCP_ESTABLISHED); #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif sdp_set_default_moderation(sdp_sk(sk)); if (sock_flag(sk, SOCK_KEEPOPEN)) sdp_start_keepalive_timer(sk); if (sock_flag(sk, SOCK_DEAD)) return 0; lock_sock(parent); if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */ sdp_dbg(sk, "parent is going away.\n"); goto done; } sk_acceptq_added(parent); sdp_dbg(parent, "%s child connection established\n", __func__); list_del_init(&sdp_sk(sk)->backlog_queue); list_add_tail(&sdp_sk(sk)->accept_queue, &sdp_sk(parent)->accept_queue); parent->sk_state_change(parent); sk_wake_async(parent, 0, POLL_OUT); done: release_sock(parent); return 0; } static int sdp_disconnected_handler(struct sock *sk) { struct sdp_sock *ssk = sdp_sk(sk); sdp_dbg(sk, "%s\n", __func__); if (ssk->tx_ring.cq) if (sdp_xmit_poll(ssk, 1)) sdp_post_sends(ssk, 0); if (sk->sk_state == TCP_SYN_RECV) { sdp_connected_handler(sk); if (rcv_nxt(ssk)) return 0; } return -ECONNRESET; } int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct rdma_conn_param conn_param; struct sock *parent = NULL; struct sock *child = NULL; struct sock *sk; struct sdp_hah hah; struct sdp_hh hh; int rc = 0, rc2; sk = id->context; if (!sk) { sdp_dbg(NULL, "cm_id is being torn down, event %s\n", rdma_cm_event_str(event->event)); return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ? -EINVAL : 0; } sdp_add_to_history(sk, rdma_cm_event_str(event->event)); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); sdp_dbg(sk, "event: %s\n", rdma_cm_event_str(event->event)); if (!sdp_sk(sk)->id) { sdp_dbg(sk, "socket is being torn down\n"); rc = event->event == RDMA_CM_EVENT_CONNECT_REQUEST ? -EINVAL : 0; release_sock(sk); return rc; } switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: if (sdp_link_layer_ib_only && rdma_node_get_transport(id->device->node_type) == RDMA_TRANSPORT_IB && rdma_port_get_link_layer(id->device, id->port_num) != IB_LINK_LAYER_INFINIBAND) { sdp_dbg(sk, "Link layer is: %d. Only IB link layer " "is allowed\n", rdma_port_get_link_layer(id->device, id->port_num)); rc = -ENETUNREACH; break; } rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT); break; case RDMA_CM_EVENT_ADDR_ERROR: rc = -ENETUNREACH; break; case RDMA_CM_EVENT_ROUTE_RESOLVED: rc = sdp_init_qp(sk, id); if (rc) break; memset(&hh, 0, sizeof hh); hh.bsdh.mid = SDP_MID_HELLO; hh.bsdh.len = htonl(sizeof(struct sdp_hh)); hh.max_adverts = 1; hh.majv_minv = SDP_MAJV_MINV; sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size); hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk))); atomic_set(&sdp_sk(sk)->remote_credits, rx_ring_posted(sdp_sk(sk))); hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_frags * PAGE_SIZE + sizeof(struct sdp_bsdh)); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (inet6_sk(sk)) { struct sockaddr *src_addr = (struct sockaddr *)&id->route.addr.src_addr; struct sockaddr_in *addr4 = (struct sockaddr_in *)src_addr; struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)src_addr; if (src_addr->sa_family == AF_INET) { /* IPv4 over IPv6 */ ipv6_addr_set(&sk->sk_v6_rcv_saddr, 0, 0, htonl(0xFFFF), addr4->sin_addr.s_addr); } else { sk->sk_v6_rcv_saddr = addr6->sin6_addr; } inet6_sk(sk)->saddr = sk->sk_v6_rcv_saddr; } else #endif { sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) = ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; } memset(&conn_param, 0, sizeof conn_param); conn_param.private_data_len = sizeof hh; conn_param.private_data = &hh; conn_param.responder_resources = 4 /* TODO */; conn_param.initiator_depth = 4 /* TODO */; conn_param.retry_count = sdp_retry_count; SDP_DUMP_PACKET(sk, "TX", NULL, &hh.bsdh); if (sdp_apm_enable) { rc = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc) sdp_warn(sk, "APM couldn't be enabled for active side: %d\n", rc); } rc = rdma_connect(id, &conn_param); break; case RDMA_CM_EVENT_ALT_ROUTE_RESOLVED: sdp_dbg(sk, "alt route was resolved slid=%d, dlid=%d\n", id->route.path_rec[1].slid, id->route.path_rec[1].dlid); break; case RDMA_CM_EVENT_ALT_PATH_LOADED: sdp_dbg(sk, "alt route path loaded\n"); break; case RDMA_CM_EVENT_ALT_ROUTE_ERROR: sdp_warn(sk, "alt route resolve error\n"); break; case RDMA_CM_EVENT_ROUTE_ERROR: rc = -ETIMEDOUT; break; case RDMA_CM_EVENT_CONNECT_REQUEST: rc = sdp_connect_handler(sk, id, event); if (rc) { sdp_dbg(sk, "Destroying qp\n"); rdma_reject(id, NULL, 0); break; } child = id->context; atomic_set(&sdp_sk(child)->remote_credits, rx_ring_posted(sdp_sk(child))); memset(&hah, 0, sizeof hah); hah.bsdh.mid = SDP_MID_HELLO_ACK; hah.bsdh.bufs = htons(rx_ring_posted(sdp_sk(child))); hah.bsdh.len = htonl(sizeof(struct sdp_hah)); hah.majv_minv = SDP_MAJV_MINV; hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec, but just in case */ hah.actrcvsz = htonl(sdp_sk(child)->recv_frags * PAGE_SIZE + sizeof(struct sdp_bsdh)); memset(&conn_param, 0, sizeof conn_param); conn_param.private_data_len = sizeof hah; conn_param.private_data = &hah; conn_param.responder_resources = 4 /* TODO */; conn_param.initiator_depth = 4 /* TODO */; conn_param.retry_count = sdp_retry_count; SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh); rc = rdma_accept(id, &conn_param); if (rc) { sdp_sk(child)->id = NULL; id->qp = NULL; id->context = NULL; parent = sdp_sk(child)->parent; /* TODO: hold ? */ } else if (sdp_apm_enable) { rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc2) sdp_warn(sk, "APM couldn't be enabled for passive side: %d\n", rc2); } break; case RDMA_CM_EVENT_CONNECT_RESPONSE: rc = sdp_response_handler(sk, id, event); if (rc) { sdp_dbg(sk, "Destroying qp\n"); rdma_reject(id, NULL, 0); } else { rc = rdma_accept(id, NULL); if (!rc && sdp_apm_enable) { rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc2) sdp_warn(sk, "APM couldn't be enabled for passive side:%d \n", rc2); } } break; case RDMA_CM_EVENT_CONNECT_ERROR: rc = -ETIMEDOUT; break; case RDMA_CM_EVENT_UNREACHABLE: rc = -ENETUNREACH; break; case RDMA_CM_EVENT_REJECTED: rc = -ECONNREFUSED; break; case RDMA_CM_EVENT_ESTABLISHED: sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) = ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; rc = sdp_connected_handler(sk); break; case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */ if (sk->sk_state == TCP_LAST_ACK) { sdp_cancel_dreq_wait_timeout(sdp_sk(sk)); sdp_exch_state(sk, TCPF_LAST_ACK, TCP_TIME_WAIT); sdp_dbg(sk, "%s: waiting for Infiniband tear down\n", __func__); } sdp_sk(sk)->qp_active = 0; rdma_disconnect(id); if (sk->sk_state != TCP_TIME_WAIT) { if (sk->sk_state == TCP_CLOSE_WAIT) { sdp_dbg(sk, "IB teardown while in " "TCP_CLOSE_WAIT taking reference to " "let close() finish the work\n"); sock_hold(sk, SOCK_REF_CMA); sdp_start_cma_timewait_timeout(sdp_sk(sk), SDP_CMA_TIMEWAIT_TIMEOUT); } sdp_set_error(sk, -EPIPE); rc = sdp_disconnected_handler(sk); } break; case RDMA_CM_EVENT_TIMEWAIT_EXIT: rc = sdp_disconnected_handler(sk); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: rc = -ENETRESET; break; case RDMA_CM_EVENT_ADDR_CHANGE: sdp_dbg(sk, "Got Address change event\n"); rc = 0; break; default: printk(KERN_ERR "SDP: Unexpected CMA event: %d\n", event->event); rc = -ECONNABORTED; break; } sdp_dbg(sk, "event: %s handled\n", rdma_cm_event_str(event->event)); if (rc && sdp_sk(sk)->id == id) { child = sk; sdp_sk(sk)->id = NULL; id->qp = NULL; id->context = NULL; parent = sdp_sk(sk)->parent; sdp_reset_sk(sk, rc); } release_sock(sk); sdp_dbg(sk, "event: %s done. status %d\n", rdma_cm_event_str(event->event), rc); if (parent) { lock_sock(parent); if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */ sdp_dbg(sk, "parent is going away.\n"); child = NULL; goto done; } if (!list_empty(&sdp_sk(child)->backlog_queue)) list_del_init(&sdp_sk(child)->backlog_queue); else child = NULL; done: release_sock(parent); if (child) sdp_common_release(child); } return rc; }
void sdp_post_send(struct sdp_sock *ssk, struct mbuf *mb) { struct sdp_buf *tx_req; struct sdp_bsdh *h; unsigned long mseq; struct ib_device *dev; struct ib_send_wr *bad_wr; struct ib_sge ibsge[SDP_MAX_SEND_SGES]; struct ib_sge *sge; struct ib_send_wr tx_wr = { NULL }; int i, rc; u64 addr; SDPSTATS_COUNTER_MID_INC(post_send, h->mid); SDPSTATS_HIST(send_size, mb->len); if (!ssk->qp_active) { m_freem(mb); return; } mseq = ring_head(ssk->tx_ring); h = mtod(mb, struct sdp_bsdh *); ssk->tx_packets++; ssk->tx_bytes += mb->m_pkthdr.len; #ifdef SDP_ZCOPY if (unlikely(h->mid == SDP_MID_SRCAVAIL)) { struct tx_srcavail_state *tx_sa = TX_SRCAVAIL_STATE(mb); if (ssk->tx_sa != tx_sa) { sdp_dbg_data(ssk->socket, "SrcAvail cancelled " "before being sent!\n"); WARN_ON(1); m_freem(mb); return; } TX_SRCAVAIL_STATE(mb)->mseq = mseq; } #endif if (unlikely(mb->m_flags & M_URG)) h->flags = SDP_OOB_PRES | SDP_OOB_PEND; else h->flags = 0; mb->m_flags |= M_RDONLY; /* Don't allow compression once sent. */ h->bufs = htons(rx_ring_posted(ssk)); h->len = htonl(mb->m_pkthdr.len); h->mseq = htonl(mseq); h->mseq_ack = htonl(mseq_ack(ssk)); sdp_prf1(ssk->socket, mb, "TX: %s bufs: %d mseq:%ld ack:%d", mid2str(h->mid), rx_ring_posted(ssk), mseq, ntohl(h->mseq_ack)); SDP_DUMP_PACKET(ssk->socket, "TX", mb, h); tx_req = &ssk->tx_ring.buffer[mseq & (SDP_TX_SIZE - 1)]; tx_req->mb = mb; dev = ssk->ib_device; sge = &ibsge[0]; for (i = 0; mb != NULL; i++, mb = mb->m_next, sge++) { addr = ib_dma_map_single(dev, mb->m_data, mb->m_len, DMA_TO_DEVICE); /* TODO: proper error handling */ BUG_ON(ib_dma_mapping_error(dev, addr)); BUG_ON(i >= SDP_MAX_SEND_SGES); tx_req->mapping[i] = addr; sge->addr = addr; sge->length = mb->m_len; sge->lkey = ssk->sdp_dev->mr->lkey; } tx_wr.next = NULL; tx_wr.wr_id = mseq | SDP_OP_SEND; tx_wr.sg_list = ibsge; tx_wr.num_sge = i; tx_wr.opcode = IB_WR_SEND; tx_wr.send_flags = IB_SEND_SIGNALED; if (unlikely(tx_req->mb->m_flags & M_URG)) tx_wr.send_flags |= IB_SEND_SOLICITED; rc = ib_post_send(ssk->qp, &tx_wr, &bad_wr); if (unlikely(rc)) { sdp_dbg(ssk->socket, "ib_post_send failed with status %d.\n", rc); sdp_cleanup_sdp_buf(ssk, tx_req, DMA_TO_DEVICE); sdp_notify(ssk, ECONNRESET); m_freem(tx_req->mb); return; } atomic_inc(&ssk->tx_ring.head); atomic_dec(&ssk->tx_ring.credits); atomic_set(&ssk->remote_credits, rx_ring_posted(ssk)); return; }
/* socket lock should be taken before calling this */ static int sdp_process_rx_ctl_mb(struct sdp_sock *ssk, struct mbuf *mb) { struct sdp_bsdh *h; struct socket *sk; SDP_WLOCK_ASSERT(ssk); sk = ssk->socket; h = mtod(mb, struct sdp_bsdh *); switch (h->mid) { case SDP_MID_DATA: case SDP_MID_SRCAVAIL: sdp_dbg(sk, "DATA after socket rcv was shutdown\n"); /* got data in RCV_SHUTDOWN */ if (ssk->state == TCPS_FIN_WAIT_1) { sdp_dbg(sk, "RX data when state = FIN_WAIT1\n"); sdp_notify(ssk, ECONNRESET); } m_freem(mb); break; #ifdef SDP_ZCOPY case SDP_MID_RDMARDCOMPL: m_freem(mb); break; case SDP_MID_SENDSM: sdp_handle_sendsm(ssk, ntohl(h->mseq_ack)); m_freem(mb); break; case SDP_MID_SRCAVAIL_CANCEL: sdp_dbg_data(sk, "Handling SrcAvailCancel\n"); sdp_prf(sk, NULL, "Handling SrcAvailCancel"); if (ssk->rx_sa) { ssk->srcavail_cancel_mseq = ntohl(h->mseq); ssk->rx_sa->flags |= RX_SA_ABORTED; ssk->rx_sa = NULL; /* TODO: change it into SDP_MID_DATA and get the dirty logic from recvmsg */ } else { sdp_dbg(sk, "Got SrcAvailCancel - " "but no SrcAvail in process\n"); } m_freem(mb); break; case SDP_MID_SINKAVAIL: sdp_dbg_data(sk, "Got SinkAvail - not supported: ignored\n"); sdp_prf(sk, NULL, "Got SinkAvail - not supported: ignored"); /* FALLTHROUGH */ #endif case SDP_MID_ABORT: sdp_dbg_data(sk, "Handling ABORT\n"); sdp_prf(sk, NULL, "Handling ABORT"); sdp_notify(ssk, ECONNRESET); m_freem(mb); break; case SDP_MID_DISCONN: sdp_dbg_data(sk, "Handling DISCONN\n"); sdp_prf(sk, NULL, "Handling DISCONN"); sdp_handle_disconn(ssk); break; case SDP_MID_CHRCVBUF: sdp_dbg_data(sk, "Handling RX CHRCVBUF\n"); sdp_handle_resize_request(ssk, (struct sdp_chrecvbuf *)(h+1)); m_freem(mb); break; case SDP_MID_CHRCVBUF_ACK: sdp_dbg_data(sk, "Handling RX CHRCVBUF_ACK\n"); sdp_handle_resize_ack(ssk, (struct sdp_chrecvbuf *)(h+1)); m_freem(mb); break; default: /* TODO: Handle other messages */ sdp_warn(sk, "SDP: FIXME MID %d\n", h->mid); m_freem(mb); } return 0; }