static void timer_func (unsigned long dummy) { struct ib_send_wr wr, *bad_wr; struct ib_sge sge; int ret; struct ib_wc wc; static int id = 1; if (!have_path) return; if (!have_remote_info) return; printk (KERN_INFO "verbs_timer: sending datagram to LID = %u, qpn = %x\n", remote_info.lid, remote_info.qp_num); memset (&wr, 0, sizeof (wr)); wr.wr_id = id++; wr.wr.ud.ah = ah; wr.wr.ud.port_num = 1; wr.wr.ud.remote_qkey = remote_info.qkey; wr.wr.ud.remote_qpn = remote_info.qp_num; wr.opcode = IB_WR_SEND; wr.sg_list = &sge; wr.send_flags = 0; wr.num_sge = 1; /* sge */ sge.addr = send_key; sge.length = buf_size; sge.lkey = mr->lkey; ret = ib_post_send (qp, &wr, &bad_wr); if (ret) printk (KERN_INFO "post_send failed: %d\n", ret); else printk (KERN_INFO "post_send succeeded\n"); ret = ib_req_notify_cq (recv_cq, IB_CQ_NEXT_COMP); printk (KERN_INFO "notify_cq return %d for recv_cq\n", ret); /* ret = ib_req_notify_cq (send_cq, IB_CQ_NEXT_COMP); */ /* printk (KERN_INFO "notify_cq return %d for send_cq\n", ret); */ ret = ib_poll_cq (recv_cq, 1, &wc); printk (KERN_INFO "poll_cq returned %d for recv_cq\n", ret); if (ret) { printk (KERN_INFO "ID: %llu, status: %d, opcode: %d, len: %u\n", wc.wr_id, (int)wc.status, (int)wc.opcode, wc.byte_len); verbs_post_recv_req (); } ret = ib_poll_cq (send_cq, 1, &wc); printk (KERN_INFO "poll_cq returned %d for send_cq\n", ret); mod_timer (&verbs_timer, NEXTJIFF(SEND_INTERVAL)); }
static void rpcrdma_flush_cqs(struct rpcrdma_ep *ep) { struct ib_wc wc; while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0) rpcrdma_recvcq_process_wc(&wc); while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0) rpcrdma_sendcq_process_wc(&wc); }
static void rpcrdma_flush_cqs(struct rpcrdma_ep *ep) { struct ib_wc wc; LIST_HEAD(sched_list); while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0) rpcrdma_recvcq_process_wc(&wc, &sched_list); if (!list_empty(&sched_list)) rpcrdma_schedule_tasklet(&sched_list); while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0) rpcrdma_sendcq_process_wc(&wc); }
static void krping_rlat_test_server(struct krping_cb *cb) { struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; /* Spin waiting for client's Start STAG/TO/Len */ while (cb->state < RDMA_READ_ADV) { krping_cq_event_handler(cb->cq, cb); } /* Send STAG/TO/Len to client */ if (cb->dma_mr) krping_format_send(cb, cb->start_addr, cb->dma_mr); else krping_format_send(cb, cb->start_addr, cb->start_mr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { log(LOG_ERR, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { log(LOG_ERR, "poll error %d\n", ret); return; } if (wc.status) { log(LOG_ERR, "send completiong error %d\n", wc.status); return; } krping_wait(cb, ERROR); }
static void comp_handler_send(struct ib_cq* cq, void* cq_context) { struct ib_wc wc; rdma_ctx_t ctx = (rdma_ctx_t)cq_context; LOG_KERN(LOG_INFO, ("COMP HANDLER\n")); do { while (ib_poll_cq(cq, 1, &wc)> 0) { if (wc.status == IB_WC_SUCCESS) { LOG_KERN(LOG_INFO, ("IB_WC_SUCCESS\n")); LOG_KERN(LOG_INFO, ("OP: %s\n", wc.opcode == IB_WC_RDMA_READ ? "IB_WC_RDMA_READ" : wc.opcode == IB_WC_RDMA_WRITE ? "IB_WC_RDMA_WRITE" : "other")); LOG_KERN(LOG_INFO, ("byte_len: %d\n", wc.byte_len)); LOG_KERN(LOG_INFO, ("Decrementing outstanding requests...\n")); ctx->outstanding_requests--; } else { LOG_KERN(LOG_INFO, ("FAILURE %d\n", wc.status)); } } } while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS) > 0); }
static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs, int batch) { int i, n, completed = 0; /* * budget might be (-1) if the caller does not * want to bound this call, thus we need unsigned * minimum here. */ while ((n = ib_poll_cq(cq, min_t(u32, batch, budget - completed), wcs)) > 0) { for (i = 0; i < n; i++) { struct ib_wc *wc = &wcs[i]; if (wc->wr_cqe) wc->wr_cqe->done(cq, wc); else WARN_ON_ONCE(wc->status == IB_WC_SUCCESS); } completed += n; if (n != batch || (budget != -1 && completed >= budget)) break; } return completed; }
/* only from interrupt. */ static int sdp_poll_rx_cq(struct sdp_sock *ssk) { struct ib_cq *cq = ssk->rx_ring.cq; struct ib_wc ibwc[SDP_NUM_WC]; int n, i; int wc_processed = 0; struct mbuf *mb; do { n = ib_poll_cq(cq, SDP_NUM_WC, ibwc); for (i = 0; i < n; ++i) { struct ib_wc *wc = &ibwc[i]; BUG_ON(!(wc->wr_id & SDP_OP_RECV)); mb = sdp_process_rx_wc(ssk, wc); if (!mb) continue; sdp_process_rx_mb(ssk, mb); wc_processed++; } } while (n == SDP_NUM_WC); if (wc_processed) sdp_bzcopy_write_space(ssk); return wc_processed; }
ib_api_status_t IBCompletionQueue::pollCQRead( IN OUT ib_wc_t** const freeWclist, OUT ib_wc_t** const doneWclist) { eq::base::ScopedMutex mutex( _mutex ); return ib_poll_cq( getReadHandle(), freeWclist, doneWclist); }
static void cq_comp_handler(struct ib_cq *cq, void *cq_context) { struct p9_client *client = cq_context; struct p9_trans_rdma *rdma = client->trans; int ret; struct ib_wc wc; ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id; switch (c->wc_op) { case IB_WC_RECV: handle_recv(client, rdma, c, wc.status, wc.byte_len); up(&rdma->rq_sem); break; case IB_WC_SEND: handle_send(client, rdma, c, wc.status, wc.byte_len); up(&rdma->sq_sem); break; default: pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n", c->wc_op, wc.opcode, wc.status); break; } kfree(c); } }
static int sdp_process_tx_cq(struct sdp_sock *ssk) { struct ib_wc ibwc[SDP_NUM_WC]; int n, i; int wc_processed = 0; SDP_WLOCK_ASSERT(ssk); if (!ssk->tx_ring.cq) { sdp_dbg(ssk->socket, "tx irq on destroyed tx_cq\n"); return 0; } do { n = ib_poll_cq(ssk->tx_ring.cq, SDP_NUM_WC, ibwc); for (i = 0; i < n; ++i) { sdp_process_tx_wc(ssk, ibwc + i); wc_processed++; } } while (n == SDP_NUM_WC); if (wc_processed) { sdp_post_sends(ssk, M_DONTWAIT); sdp_prf1(sk, NULL, "Waking sendmsg. inflight=%d", (u32) tx_ring_posted(ssk)); sowwakeup(ssk->socket); } return wc_processed; }
static int rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) { struct list_head sched_list; struct ib_wc *wcs; int budget, count, rc; INIT_LIST_HEAD(&sched_list); budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; do { wcs = ep->rep_recv_wcs; rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs); if (rc <= 0) goto out_schedule; count = rc; while (count-- > 0) rpcrdma_recvcq_process_wc(wcs++, &sched_list); } while (rc == RPCRDMA_POLLSIZE && --budget); rc = 0; out_schedule: rpcrdma_schedule_tasklet(&sched_list); return rc; }
/* * Send Queue Completion Handler - potentially called on interrupt context. * * Note that caller must hold a transport reference. */ static void sq_cq_reap(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt = NULL; struct ib_wc wc; struct ib_cq *cq = xprt->sc_sq_cq; int ret; if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { if (wc.status != IB_WC_SUCCESS) /* Close the transport */ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); /* Decrement used SQ WR count */ atomic_dec(&xprt->sc_sq_count); wake_up(&xprt->sc_send_wait); ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; if (ctxt) process_context(xprt, ctxt); svc_xprt_put(&xprt->sc_xprt); } if (ctxt) atomic_inc(&rdma_stat_sq_prod); }
static void iser_cq_tasklet_fn(unsigned long data) { struct iser_device *device = (struct iser_device *)data; struct ib_cq *cq = device->cq; struct ib_wc wc; struct iser_desc *desc; unsigned long xfer_len; while (ib_poll_cq(cq, 1, &wc) == 1) { desc = (struct iser_desc *) (unsigned long) wc.wr_id; BUG_ON(desc == NULL); if (wc.status == IB_WC_SUCCESS) { if (desc->type == ISCSI_RX) { xfer_len = (unsigned long)wc.byte_len; iser_rcv_completion(desc, xfer_len); } else /* type == ISCSI_TX_CONTROL/SCSI_CMD/DOUT */ iser_snd_completion(desc); } else { iser_err("comp w. error op %d status %d\n",desc->type,wc.status); iser_handle_comp_error(desc); } } /* #warning "it is assumed here that arming CQ only once its empty" * * " would not cause interrupts to be missed" */ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); }
static int iser_drain_tx_cq(struct iser_device *device) { struct ib_cq *cq = device->tx_cq; struct ib_wc wc; struct iser_tx_desc *tx_desc; struct iser_conn *ib_conn; int completed_tx = 0; while (ib_poll_cq(cq, 1, &wc) == 1) { tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id; ib_conn = wc.qp->qp_context; if (wc.status == IB_WC_SUCCESS) { if (wc.opcode == IB_WC_SEND) iser_snd_completion(tx_desc, ib_conn); else iser_err("expected opcode %d got %d\n", IB_WC_SEND, wc.opcode); } else { iser_err("tx id %llx status %d vend_err %x\n", wc.wr_id, wc.status, wc.vendor_err); atomic_dec(&ib_conn->post_send_buf_count); iser_handle_comp_error(tx_desc, ib_conn); } completed_tx++; } return completed_tx; }
static void iser_cq_tasklet_fn(unsigned long data) { struct iser_device *device = (struct iser_device *)data; struct ib_cq *cq = device->cq; struct ib_wc wc; struct iser_desc *desc; unsigned long xfer_len; while (ib_poll_cq(cq, 1, &wc) == 1) { desc = (struct iser_desc *) (unsigned long) wc.wr_id; BUG_ON(desc == NULL); if (wc.status == IB_WC_SUCCESS) { if (desc->type == ISCSI_RX) { xfer_len = (unsigned long)wc.byte_len; iser_rcv_completion(desc, xfer_len); } else iser_snd_completion(desc); } else { iser_err("comp w. error op %d status %d\n",desc->type,wc.status); iser_handle_comp_error(desc); } } ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); }
/* * Drain any cq, prior to teardown. */ static void rpcrdma_clean_cq(struct ib_cq *cq) { struct ib_wc wc; int count = 0; while (1 == ib_poll_cq(cq, 1, &wc)) ++count; if (count) dprintk("RPC: %s: flushed %d events (last 0x%x)\n", __func__, count, wc.opcode); }
/* * Send Queue Completion Handler - potentially called on interrupt context. * * Note that caller must hold a transport reference. */ static void sq_cq_reap(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt = NULL; struct ib_wc wc_a[6]; struct ib_wc *wc; struct ib_cq *cq = xprt->sc_sq_cq; int ret; memset(wc_a, 0, sizeof(wc_a)); if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) { int i; for (i = 0; i < ret; i++) { wc = &wc_a[i]; if (wc->status != IB_WC_SUCCESS) { dprintk("svcrdma: sq wc err status %s (%d)\n", ib_wc_status_msg(wc->status), wc->status); /* Close the transport */ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); } /* Decrement used SQ WR count */ atomic_dec(&xprt->sc_sq_count); wake_up(&xprt->sc_send_wait); ctxt = (struct svc_rdma_op_ctxt *) (unsigned long)wc->wr_id; if (ctxt) process_context(xprt, ctxt); svc_xprt_put(&xprt->sc_xprt); } } if (ctxt) atomic_inc(&rdma_stat_sq_prod); }
static void poll_rcq(struct rds_ib_connection *ic, struct ib_cq *cq, struct ib_wc *wcs, struct rds_ib_ack_state *ack_state) { int nr, i; struct ib_wc *wc; while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) { for (i = 0; i < nr; i++) { wc = wcs + i; rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", (unsigned long long)wc->wr_id, wc->status, wc->byte_len, be32_to_cpu(wc->ex.imm_data)); rds_ib_recv_cqe_handler(ic, wc, ack_state); } } }
/* The wc array is on stack: automatic memory is always CPU-local. * * struct ib_wc is 64 bytes, making the poll array potentially * large. But this is at the bottom of the call chain. Further * substantial work is done in another thread. */ static void rpcrdma_recvcq_poll(struct ib_cq *cq) { struct ib_wc *pos, wcs[4]; int count, rc; do { pos = wcs; rc = ib_poll_cq(cq, ARRAY_SIZE(wcs), pos); if (rc < 0) break; count = rc; while (count-- > 0) rpcrdma_recvcq_process_wc(pos++); } while (rc == ARRAY_SIZE(wcs)); }
/* * rq_cq_reap - Process the RQ CQ. * * Take all completing WC off the CQE and enqueue the associated DTO * context on the dto_q for the transport. * * Note that caller must hold a transport reference. */ static void rq_cq_reap(struct svcxprt_rdma *xprt) { int ret; struct ib_wc wc; struct svc_rdma_op_ctxt *ctxt = NULL; if (!test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_rq_poll); while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) { ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; ctxt->wc_status = wc.status; ctxt->byte_len = wc.byte_len; svc_rdma_unmap_dma(ctxt); if (wc.status != IB_WC_SUCCESS) { /* Close the transport */ dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_put_context(ctxt, 1); svc_xprt_put(&xprt->sc_xprt); continue; } spin_lock_bh(&xprt->sc_rq_dto_lock); list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); spin_unlock_bh(&xprt->sc_rq_dto_lock); svc_xprt_put(&xprt->sc_xprt); } if (ctxt) atomic_inc(&rdma_stat_rq_prod); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); /* * If data arrived before established event, * don't enqueue. This defers RPC I/O until the * RDMA connection is complete. */ if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) svc_xprt_enqueue(&xprt->sc_xprt); }
static int rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) { struct ib_wc *wcs; int budget, count, rc; budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; do { wcs = ep->rep_recv_wcs; rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs); if (rc <= 0) return rc; count = rc; while (count-- > 0) rpcrdma_recvcq_process_wc(wcs++); } while (rc == RPCRDMA_POLLSIZE && --budget); return 0; }
static int sdp_process_tx_cq(struct sdp_sock *ssk) { struct ib_wc ibwc[SDP_NUM_WC]; int n, i; int wc_processed = 0; if (!ssk->tx_ring.cq) { sdp_dbg(sk_ssk(ssk), "tx irq on destroyed tx_cq\n"); return 0; } do { n = ib_poll_cq(ssk->tx_ring.cq, SDP_NUM_WC, ibwc); for (i = 0; i < n; ++i) { sdp_process_tx_wc(ssk, ibwc + i); wc_processed++; } } while (n == SDP_NUM_WC); if (wc_processed) { struct sock *sk = sk_ssk(ssk); sdp_prf1(sk, NULL, "Waking sendmsg. inflight=%d", (u32) tx_ring_posted(ssk)); sk_mem_reclaim(sk); sk_stream_write_space(sk_ssk(ssk)); if (sk->sk_write_pending && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) && tx_ring_posted(ssk)) { /* a write is pending and still no room in tx queue, * arm tx cq */ sdp_prf(sk_ssk(ssk), NULL, "pending tx - rearming"); sdp_arm_tx_cq(sk); } } return wc_processed; }
static inline int rpcrdma_cq_poll(struct ib_cq *cq) { struct ib_wc wc; int rc; for (;;) { rc = ib_poll_cq(cq, 1, &wc); if (rc < 0) { dprintk("RPC: %s: ib_poll_cq failed %i\n", __func__, rc); return rc; } if (rc == 0) break; rpcrdma_event_process(&wc); } return 0; }
static void iser_cq_tasklet_fn(unsigned long data) { struct iser_device *device = (struct iser_device *)data; struct ib_cq *cq = device->rx_cq; struct ib_wc wc; struct iser_rx_desc *desc; unsigned long xfer_len; struct iser_conn *ib_conn; int completed_tx, completed_rx; completed_tx = completed_rx = 0; while (ib_poll_cq(cq, 1, &wc) == 1) { desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; BUG_ON(desc == NULL); ib_conn = wc.qp->qp_context; if (wc.status == IB_WC_SUCCESS) { if (wc.opcode == IB_WC_RECV) { xfer_len = (unsigned long)wc.byte_len; iser_rcv_completion(desc, xfer_len, ib_conn); } else iser_err("expected opcode %d got %d\n", IB_WC_RECV, wc.opcode); } else { if (wc.status != IB_WC_WR_FLUSH_ERR) iser_err("rx id %llx status %d vend_err %x\n", wc.wr_id, wc.status, wc.vendor_err); ib_conn->post_recv_buf_count--; iser_handle_comp_error(NULL, ib_conn); } completed_rx++; if (!(completed_rx & 63)) completed_tx += iser_drain_tx_cq(device); } /* #warning "it is assumed here that arming CQ only once its empty" * * " would not cause interrupts to be missed" */ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); completed_tx += iser_drain_tx_cq(device); iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); }
static int isert_poll_cq(struct isert_cq *cq) { int err; struct ib_wc *wc, *last_wc; TRACE_ENTRY(); do { err = ib_poll_cq(cq->cq, ARRAY_SIZE(cq->wc), cq->wc); last_wc = &cq->wc[err]; for (wc = cq->wc; wc < last_wc; ++wc) { if (likely(wc->status == IB_WC_SUCCESS)) isert_handle_wc(wc); else isert_handle_wc_error(wc); } } while (err > 0); TRACE_EXIT_RES(err); return err; }
static void poll_scq(struct rds_ib_connection *ic, struct ib_cq *cq, struct ib_wc *wcs) { int nr, i; struct ib_wc *wc; while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) { for (i = 0; i < nr; i++) { wc = wcs + i; rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", (unsigned long long)wc->wr_id, wc->status, wc->byte_len, be32_to_cpu(wc->ex.imm_data)); if (wc->wr_id <= ic->i_send_ring.w_nr || wc->wr_id == RDS_IB_ACK_WR_ID) rds_ib_send_cqe_handler(ic, wc); else rds_ib_mr_cqe_handler(ic, wc); } } }
static int __ib_process_cq(struct ib_cq *cq, int budget) { int i, n, completed = 0; while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) { for (i = 0; i < n; i++) { struct ib_wc *wc = &cq->wc[i]; if (wc->wr_cqe) wc->wr_cqe->done(cq, wc); else WARN_ON_ONCE(wc->status == IB_WC_SUCCESS); } completed += n; if (n != IB_POLL_BATCH || (budget != -1 && completed >= budget)) break; } return completed; }
/* * The _oldest/_free ring operations here race cleanly with the alloc/unalloc * operations performed in the send path. As the sender allocs and potentially * unallocs the next free entry in the ring it doesn't alter which is * the next to be freed, which is what this is concerned with. */ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) { struct rds_connection *conn = context; struct rds_ib_connection *ic = conn->c_transport_data; struct ib_wc wc; struct rds_ib_send_work *send; u32 completed; u32 oldest; u32 i = 0; int ret; rdsdebug("cq %p conn %p\n", cq, conn); rds_ib_stats_inc(s_ib_tx_cq_call); ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); if (ret) rdsdebug("ib_req_notify_cq send failed: %d\n", ret); while (ib_poll_cq(cq, 1, &wc) > 0) { rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", (unsigned long long)wc.wr_id, wc.status, wc.byte_len, be32_to_cpu(wc.ex.imm_data)); rds_ib_stats_inc(s_ib_tx_cq_event); if (wc.wr_id == RDS_IB_ACK_WR_ID) { if (ic->i_ack_queued + HZ/2 < jiffies) rds_ib_stats_inc(s_ib_tx_stalled); rds_ib_ack_send_complete(ic); continue; } oldest = rds_ib_ring_oldest(&ic->i_send_ring); completed = rds_ib_ring_completed(&ic->i_send_ring, wc.wr_id, oldest); for (i = 0; i < completed; i++) { send = &ic->i_sends[oldest]; /* In the error case, wc.opcode sometimes contains garbage */ switch (send->s_wr.opcode) { case IB_WR_SEND: if (send->s_rm) rds_ib_send_unmap_rm(ic, send, wc.status); break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_READ: /* Nothing to be done - the SG list will be unmapped * when the SEND completes. */ break; default: if (printk_ratelimit()) printk(KERN_NOTICE "RDS/IB: %s: unexpected opcode 0x%x in WR!\n", __func__, send->s_wr.opcode); break; } send->s_wr.opcode = 0xdead; send->s_wr.num_sge = 1; if (send->s_queued + HZ/2 < jiffies) rds_ib_stats_inc(s_ib_tx_stalled); /* If a RDMA operation produced an error, signal this right * away. If we don't, the subsequent SEND that goes with this * RDMA will be canceled with ERR_WFLUSH, and the application * never learn that the RDMA failed. */ if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) { struct rds_message *rm; rm = rds_send_get_message(conn, send->s_op); if (rm) rds_ib_send_rdma_complete(rm, wc.status); } oldest = (oldest + 1) % ic->i_send_ring.w_nr; } rds_ib_ring_free(&ic->i_send_ring, completed); if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) || test_bit(0, &conn->c_map_queued)) queue_delayed_work(rds_wq, &conn->c_send_w, 0); /* We expect errors as the qp is drained during shutdown */ if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) { rds_ib_conn_error(conn, "send completion on %pI4 " "had status %u, disconnecting and reconnecting\n", &conn->c_faddr, wc.status); } } }
int64_t IBInterface::postRdmaWrite( const void* buffer, uint32_t numBytes ) { #ifdef EQ_MEASURE_TIME eq::lunchbox::Clock clock; clock.reset(); #endif ib_api_status_t ibStatus; ib_wc_t wc; ib_wc_t *wcDone,*wcFree; wcFree = &wc; wcFree->p_next = 0; wcDone = 0; #ifdef EQ_MEASURE_TIME eq::lunchbox::Clock clockWait; clockWait.reset(); #endif // validation of the send job do { ibStatus = ib_poll_cq( _completionQueue->getWriteHandle(), &wcFree, &wcDone ); if ( ibStatus == IB_SUCCESS ) { if ( wcDone->status != IB_WCS_SUCCESS ) { LBWARN << "ERROR IN POLL WRITE"<< std::endl; return -1; } _writePoll.getData()[wcDone->wr_id] = true; wcFree = wcDone; wcFree->p_next = 0; wcDone = 0; } else if ( !_writePoll.getData()[ _numBufWrite ] ) { ibStatus = IB_SUCCESS; } } while ( ibStatus == IB_SUCCESS ); #ifdef EQ_MEASURE_TIME _timeTotalWriteWait += clockWait.getTimef(); #endif uint32_t incBytes = 0; uint32_t compt = 0; uint32_t size; size = LB_MIN( numBytes, EQ_MAXBLOCKBUFFER ); ib_local_ds_t list; #ifdef EQ_MEASURE_TIME eq::lunchbox::Clock clockCopy; clockCopy.reset(); #endif //memcpy( _writeBlocks[ _numBufWrite ]->buf.getData(), // buffer , size ); eq::lunchbox::fastCopy( _writeBlocks[ _numBufWrite ]->buf.getData(), buffer , size ); list.vaddr = _writeBlocks[ _numBufWrite ]->getVaddr(); #ifdef EQ_MEASURE_TIME _timeCopyBufferWrite += clockCopy.getTimef(); #endif list.lkey = _writeBlocks[_numBufWrite ]->getLocalKey(); list.length = size; // A 64-bit work request identifier that is returned to the consumer // as part of the work completion. _wr.wr_id = _numBufWrite; // A reference to an array of local data segments used by the send // operation. _wr.ds_array = &list; // Number of local data segments specified by this work request. _wr.num_ds = 1; // The type of work request being submitted to the send queue. _wr.wr_type = WR_SEND; // A pointer used to chain work requests together. This permits multiple // work requests to be posted to a queue pair through a single function // call. This value is set to NULL to mark the end of the chain. _wr.p_next = 0; // This routine posts a work request to the send queue of a queue pair ibStatus = ib_post_send( _queuePair, &_wr, 0 ); if ( ibStatus != IB_SUCCESS ) { LBWARN << "ERROR IN POST SEND DATA"<< std::endl; return -1; } _writePoll.getData()[ _numBufWrite ] = false; if ( _numBufWrite == EQ_NUMBLOCKMEMORY -1 ) _ibConnection->incWriteInterface(); _numBufWrite = ( _numBufWrite + 1 ) % EQ_NUMBLOCKMEMORY; #ifdef EQ_MEASURE_TIME _timeTotalWrite += clock.getTimef(); #endif return size; }
/* * Send Queue Completion Handler - potentially called on interrupt context. * * Note that caller must hold a transport reference. */ static void sq_cq_reap(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt = NULL; struct ib_wc wc; struct ib_cq *cq = xprt->sc_sq_cq; int ret; if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; xprt = ctxt->xprt; svc_rdma_unmap_dma(ctxt); if (wc.status != IB_WC_SUCCESS) /* Close the transport */ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); /* Decrement used SQ WR count */ atomic_dec(&xprt->sc_sq_count); wake_up(&xprt->sc_send_wait); switch (ctxt->wr_op) { case IB_WR_SEND: svc_rdma_put_context(ctxt, 1); break; case IB_WR_RDMA_WRITE: svc_rdma_put_context(ctxt, 0); break; case IB_WR_RDMA_READ: if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; BUG_ON(!read_hdr); spin_lock_bh(&xprt->sc_rq_dto_lock); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); list_add_tail(&read_hdr->dto_q, &xprt->sc_read_complete_q); spin_unlock_bh(&xprt->sc_rq_dto_lock); svc_xprt_enqueue(&xprt->sc_xprt); } svc_rdma_put_context(ctxt, 0); break; default: printk(KERN_ERR "svcrdma: unexpected completion type, " "opcode=%d, status=%d\n", wc.opcode, wc.status); break; } svc_xprt_put(&xprt->sc_xprt); } if (ctxt) atomic_inc(&rdma_stat_sq_prod); }