static void iser_cq_tasklet_fn(unsigned long data) { struct iser_device *device = (struct iser_device *)data; struct ib_cq *cq = device->cq; struct ib_wc wc; struct iser_desc *desc; unsigned long xfer_len; while (ib_poll_cq(cq, 1, &wc) == 1) { desc = (struct iser_desc *) (unsigned long) wc.wr_id; BUG_ON(desc == NULL); if (wc.status == IB_WC_SUCCESS) { if (desc->type == ISCSI_RX) { xfer_len = (unsigned long)wc.byte_len; iser_rcv_completion(desc, xfer_len); } else iser_snd_completion(desc); } else { iser_err("comp w. error op %d status %d\n",desc->type,wc.status); iser_handle_comp_error(desc); } } ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); }
/* * Handle receive completions. * * It is reentrant but processes single events in order to maintain * ordering of receives to keep server credits. * * It is the responsibility of the scheduled tasklet to return * recv buffers to the pool. NOTE: this affects synchronization of * connection shutdown. That is, the structures required for * the completion of the reply handler must remain intact until * all memory has been reclaimed. */ static void rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context) { struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context; int rc; rc = rpcrdma_recvcq_poll(cq, ep); if (rc) { dprintk("RPC: %s: ib_poll_cq failed: %i\n", __func__, rc); return; } rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); if (rc == 0) return; if (rc < 0) { dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", __func__, rc); return; } rpcrdma_recvcq_poll(cq, ep); }
static void iser_cq_tasklet_fn(unsigned long data) { struct iser_device *device = (struct iser_device *)data; struct ib_cq *cq = device->cq; struct ib_wc wc; struct iser_desc *desc; unsigned long xfer_len; while (ib_poll_cq(cq, 1, &wc) == 1) { desc = (struct iser_desc *) (unsigned long) wc.wr_id; BUG_ON(desc == NULL); if (wc.status == IB_WC_SUCCESS) { if (desc->type == ISCSI_RX) { xfer_len = (unsigned long)wc.byte_len; iser_rcv_completion(desc, xfer_len); } else /* type == ISCSI_TX_CONTROL/SCSI_CMD/DOUT */ iser_snd_completion(desc); } else { iser_err("comp w. error op %d status %d\n",desc->type,wc.status); iser_handle_comp_error(desc); } } /* #warning "it is assumed here that arming CQ only once its empty" * * " would not cause interrupts to be missed" */ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); }
static void rds_ib_tasklet_fn_recv(unsigned long data) { struct rds_ib_connection *ic = (struct rds_ib_connection *)data; struct rds_connection *conn = ic->conn; struct rds_ib_device *rds_ibdev = ic->rds_ibdev; struct rds_ib_ack_state state; if (!rds_ibdev) rds_conn_drop(conn); rds_ib_stats_inc(s_ib_tasklet_call); memset(&state, 0, sizeof(state)); poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state); ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state); if (state.ack_next_valid) rds_ib_set_ack(ic, state.ack_next, state.ack_required); if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) { rds_send_drop_acked(conn, state.ack_recv, NULL); ic->i_ack_recv = state.ack_recv; } if (rds_conn_up(conn)) rds_ib_attempt_ack(ic); }
/* * Send Queue Completion Handler - potentially called on interrupt context. * * Note that caller must hold a transport reference. */ static void sq_cq_reap(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt = NULL; struct ib_wc wc; struct ib_cq *cq = xprt->sc_sq_cq; int ret; if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { if (wc.status != IB_WC_SUCCESS) /* Close the transport */ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); /* Decrement used SQ WR count */ atomic_dec(&xprt->sc_sq_count); wake_up(&xprt->sc_send_wait); ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; if (ctxt) process_context(xprt, ctxt); svc_xprt_put(&xprt->sc_xprt); } if (ctxt) atomic_inc(&rdma_stat_sq_prod); }
static void comp_handler_send(struct ib_cq* cq, void* cq_context) { struct ib_wc wc; rdma_ctx_t ctx = (rdma_ctx_t)cq_context; LOG_KERN(LOG_INFO, ("COMP HANDLER\n")); do { while (ib_poll_cq(cq, 1, &wc)> 0) { if (wc.status == IB_WC_SUCCESS) { LOG_KERN(LOG_INFO, ("IB_WC_SUCCESS\n")); LOG_KERN(LOG_INFO, ("OP: %s\n", wc.opcode == IB_WC_RDMA_READ ? "IB_WC_RDMA_READ" : wc.opcode == IB_WC_RDMA_WRITE ? "IB_WC_RDMA_WRITE" : "other")); LOG_KERN(LOG_INFO, ("byte_len: %d\n", wc.byte_len)); LOG_KERN(LOG_INFO, ("Decrementing outstanding requests...\n")); ctx->outstanding_requests--; } else { LOG_KERN(LOG_INFO, ("FAILURE %d\n", wc.status)); } } } while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS) > 0); }
static void cq_comp_handler(struct ib_cq *cq, void *cq_context) { struct p9_client *client = cq_context; struct p9_trans_rdma *rdma = client->trans; int ret; struct ib_wc wc; ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id; switch (c->wc_op) { case IB_WC_RECV: handle_recv(client, rdma, c, wc.status, wc.byte_len); up(&rdma->rq_sem); break; case IB_WC_SEND: handle_send(client, rdma, c, wc.status, wc.byte_len); up(&rdma->sq_sem); break; default: pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n", c->wc_op, wc.opcode, wc.status); break; } kfree(c); } }
/* callback function for isert_dev->[cq]->cq_comp_work */ static void isert_cq_comp_work_cb(struct work_struct *work) { struct isert_cq *cq_desc; int ret; TRACE_ENTRY(); cq_desc = container_of(work, struct isert_cq, cq_comp_work); ret = isert_poll_cq(cq_desc); if (unlikely(ret < 0)) { /* poll error */ pr_err("ib_poll_cq failed\n"); goto out; } ib_req_notify_cq(cq_desc->cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); /* * not all HCAs support IB_CQ_REPORT_MISSED_EVENTS, * so we need to make sure we don't miss any events between * last call to ib_poll_cq() and ib_req_notify_cq() */ isert_poll_cq(cq_desc); out: TRACE_EXIT(); return; }
static void timer_func (unsigned long dummy) { struct ib_send_wr wr, *bad_wr; struct ib_sge sge; int ret; struct ib_wc wc; static int id = 1; if (!have_path) return; if (!have_remote_info) return; printk (KERN_INFO "verbs_timer: sending datagram to LID = %u, qpn = %x\n", remote_info.lid, remote_info.qp_num); memset (&wr, 0, sizeof (wr)); wr.wr_id = id++; wr.wr.ud.ah = ah; wr.wr.ud.port_num = 1; wr.wr.ud.remote_qkey = remote_info.qkey; wr.wr.ud.remote_qpn = remote_info.qp_num; wr.opcode = IB_WR_SEND; wr.sg_list = &sge; wr.send_flags = 0; wr.num_sge = 1; /* sge */ sge.addr = send_key; sge.length = buf_size; sge.lkey = mr->lkey; ret = ib_post_send (qp, &wr, &bad_wr); if (ret) printk (KERN_INFO "post_send failed: %d\n", ret); else printk (KERN_INFO "post_send succeeded\n"); ret = ib_req_notify_cq (recv_cq, IB_CQ_NEXT_COMP); printk (KERN_INFO "notify_cq return %d for recv_cq\n", ret); /* ret = ib_req_notify_cq (send_cq, IB_CQ_NEXT_COMP); */ /* printk (KERN_INFO "notify_cq return %d for send_cq\n", ret); */ ret = ib_poll_cq (recv_cq, 1, &wc); printk (KERN_INFO "poll_cq returned %d for recv_cq\n", ret); if (ret) { printk (KERN_INFO "ID: %llu, status: %d, opcode: %d, len: %u\n", wc.wr_id, (int)wc.status, (int)wc.opcode, wc.byte_len); verbs_post_recv_req (); } ret = ib_poll_cq (send_cq, 1, &wc); printk (KERN_INFO "poll_cq returned %d for send_cq\n", ret); mod_timer (&verbs_timer, NEXTJIFF(SEND_INTERVAL)); }
/* Handle provider receive completion upcalls. */ static void rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context) { do { rpcrdma_recvcq_poll(cq); } while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS) > 0); }
/** * iser_create_device_ib_res - creates Protection Domain (PD), Completion * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with * the adapator. * * returns 0 on success, -1 on failure */ static int iser_create_device_ib_res(struct iser_device *device) { device->pd = ib_alloc_pd(device->ib_device); if (IS_ERR(device->pd)) goto pd_err; device->rx_cq = ib_create_cq(device->ib_device, iser_cq_callback, iser_cq_event_callback, (void *)device, ISER_MAX_RX_CQ_LEN, 0); if (IS_ERR(device->rx_cq)) goto rx_cq_err; device->tx_cq = ib_create_cq(device->ib_device, NULL, iser_cq_event_callback, (void *)device, ISER_MAX_TX_CQ_LEN, 0); if (IS_ERR(device->tx_cq)) goto tx_cq_err; if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP)) goto cq_arm_err; tasklet_init(&device->cq_tasklet, iser_cq_tasklet_fn, (unsigned long)device); device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); if (IS_ERR(device->mr)) goto dma_mr_err; INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, iser_event_handler); if (ib_register_event_handler(&device->event_handler)) goto handler_err; return 0; handler_err: ib_dereg_mr(device->mr); dma_mr_err: tasklet_kill(&device->cq_tasklet); cq_arm_err: ib_destroy_cq(device->tx_cq); tx_cq_err: ib_destroy_cq(device->rx_cq); rx_cq_err: ib_dealloc_pd(device->pd); pd_err: iser_err("failed to allocate an IB resource\n"); return -1; }
static void rds_ib_tasklet_fn_send(unsigned long data) { struct rds_ib_connection *ic = (struct rds_ib_connection *)data; struct rds_connection *conn = ic->conn; rds_ib_stats_inc(s_ib_tasklet_call); poll_scq(ic, ic->i_send_cq, ic->i_send_wc); ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); poll_scq(ic, ic->i_send_cq, ic->i_send_wc); if (rds_conn_up(conn) && (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) || test_bit(0, &conn->c_map_queued))) rds_send_xmit(ic->conn); }
/* * Send Queue Completion Handler - potentially called on interrupt context. * * Note that caller must hold a transport reference. */ static void sq_cq_reap(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt = NULL; struct ib_wc wc_a[6]; struct ib_wc *wc; struct ib_cq *cq = xprt->sc_sq_cq; int ret; memset(wc_a, 0, sizeof(wc_a)); if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) { int i; for (i = 0; i < ret; i++) { wc = &wc_a[i]; if (wc->status != IB_WC_SUCCESS) { dprintk("svcrdma: sq wc err status %s (%d)\n", ib_wc_status_msg(wc->status), wc->status); /* Close the transport */ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); } /* Decrement used SQ WR count */ atomic_dec(&xprt->sc_sq_count); wake_up(&xprt->sc_send_wait); ctxt = (struct svc_rdma_op_ctxt *) (unsigned long)wc->wr_id; if (ctxt) process_context(xprt, ctxt); svc_xprt_put(&xprt->sc_xprt); } } if (ctxt) atomic_inc(&rdma_stat_sq_prod); }
/* * rpcrdma_cq_event_upcall * * This upcall handles recv, send, bind and unbind events. * It is reentrant but processes single events in order to maintain * ordering of receives to keep server credits. * * It is the responsibility of the scheduled tasklet to return * recv buffers to the pool. NOTE: this affects synchronization of * connection shutdown. That is, the structures required for * the completion of the reply handler must remain intact until * all memory has been reclaimed. * * Note that send events are suppressed and do not result in an upcall. */ static void rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context) { int rc; rc = rpcrdma_cq_poll(cq); if (rc) return; rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); if (rc) { dprintk("RPC: %s: ib_req_notify_cq failed %i\n", __func__, rc); return; } rpcrdma_cq_poll(cq); }
/* * rq_cq_reap - Process the RQ CQ. * * Take all completing WC off the CQE and enqueue the associated DTO * context on the dto_q for the transport. * * Note that caller must hold a transport reference. */ static void rq_cq_reap(struct svcxprt_rdma *xprt) { int ret; struct ib_wc wc; struct svc_rdma_op_ctxt *ctxt = NULL; if (!test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_rq_poll); while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) { ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; ctxt->wc_status = wc.status; ctxt->byte_len = wc.byte_len; svc_rdma_unmap_dma(ctxt); if (wc.status != IB_WC_SUCCESS) { /* Close the transport */ dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_put_context(ctxt, 1); svc_xprt_put(&xprt->sc_xprt); continue; } spin_lock_bh(&xprt->sc_rq_dto_lock); list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); spin_unlock_bh(&xprt->sc_rq_dto_lock); svc_xprt_put(&xprt->sc_xprt); } if (ctxt) atomic_inc(&rdma_stat_rq_prod); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); /* * If data arrived before established event, * don't enqueue. This defers RPC I/O until the * RDMA connection is complete. */ if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) svc_xprt_enqueue(&xprt->sc_xprt); }
static void rds_ib_tasklet_fn_send(unsigned long data) { struct rds_ib_connection *ic = (struct rds_ib_connection *)data; struct rds_connection *conn = ic->conn; rds_ib_stats_inc(s_ib_tasklet_call); /* if cq has been already reaped, ignore incoming cq event */ if (atomic_read(&ic->i_cq_quiesce)) return; poll_scq(ic, ic->i_send_cq, ic->i_send_wc); ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); poll_scq(ic, ic->i_send_cq, ic->i_send_wc); if (rds_conn_up(conn) && (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) || test_bit(0, &conn->c_map_queued))) rds_send_xmit(&ic->conn->c_path[0]); }
static int krping_setup_qp(struct krping_cb *cb, struct rdma_cm_id *cm_id) { int ret; cb->pd = ib_alloc_pd(cm_id->device); if (IS_ERR(cb->pd)) { log(LOG_ERR, "ib_alloc_pd failed\n"); return PTR_ERR(cb->pd); } DEBUG_LOG(PFX "created pd %p\n", cb->pd); cb->cq = ib_create_cq(cm_id->device, krping_cq_event_handler, NULL, cb, cb->txdepth * 2, 0); if (IS_ERR(cb->cq)) { log(LOG_ERR, "ib_create_cq failed\n"); ret = PTR_ERR(cb->cq); goto err1; } DEBUG_LOG(PFX "created cq %p\n", cb->cq); if (!cb->wlat && !cb->rlat && !cb->bw) { ret = ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); if (ret) { log(LOG_ERR, "ib_create_cq failed\n"); goto err2; } } ret = krping_create_qp(cb); if (ret) { log(LOG_ERR, "krping_create_qp failed: %d\n", ret); goto err2; } DEBUG_LOG(PFX "created qp %p\n", cb->qp); return 0; err2: ib_destroy_cq(cb->cq); err1: ib_dealloc_pd(cb->pd); return ret; }
static void iser_cq_tasklet_fn(unsigned long data) { struct iser_device *device = (struct iser_device *)data; struct ib_cq *cq = device->rx_cq; struct ib_wc wc; struct iser_rx_desc *desc; unsigned long xfer_len; struct iser_conn *ib_conn; int completed_tx, completed_rx; completed_tx = completed_rx = 0; while (ib_poll_cq(cq, 1, &wc) == 1) { desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; BUG_ON(desc == NULL); ib_conn = wc.qp->qp_context; if (wc.status == IB_WC_SUCCESS) { if (wc.opcode == IB_WC_RECV) { xfer_len = (unsigned long)wc.byte_len; iser_rcv_completion(desc, xfer_len, ib_conn); } else iser_err("expected opcode %d got %d\n", IB_WC_RECV, wc.opcode); } else { if (wc.status != IB_WC_WR_FLUSH_ERR) iser_err("rx id %llx status %d vend_err %x\n", wc.wr_id, wc.status, wc.vendor_err); ib_conn->post_recv_buf_count--; iser_handle_comp_error(NULL, ib_conn); } completed_rx++; if (!(completed_rx & 63)) completed_tx += iser_drain_tx_cq(device); } /* #warning "it is assumed here that arming CQ only once its empty" * * " would not cause interrupts to be missed" */ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); completed_tx += iser_drain_tx_cq(device); iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); }
ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_req_notify_cq cmd; struct ib_cq *cq; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; mutex_lock(&ib_uverbs_idr_mutex); cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); if (cq && cq->uobject->context == file->ucontext) { ib_req_notify_cq(cq, cmd.solicited_only ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); ret = in_len; } mutex_unlock(&ib_uverbs_idr_mutex); return ret; }
/* * The _oldest/_free ring operations here race cleanly with the alloc/unalloc * operations performed in the send path. As the sender allocs and potentially * unallocs the next free entry in the ring it doesn't alter which is * the next to be freed, which is what this is concerned with. */ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) { struct rds_connection *conn = context; struct rds_ib_connection *ic = conn->c_transport_data; struct ib_wc wc; struct rds_ib_send_work *send; u32 completed; u32 oldest; u32 i = 0; int ret; rdsdebug("cq %p conn %p\n", cq, conn); rds_ib_stats_inc(s_ib_tx_cq_call); ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); if (ret) rdsdebug("ib_req_notify_cq send failed: %d\n", ret); while (ib_poll_cq(cq, 1, &wc) > 0) { rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", (unsigned long long)wc.wr_id, wc.status, wc.byte_len, be32_to_cpu(wc.ex.imm_data)); rds_ib_stats_inc(s_ib_tx_cq_event); if (wc.wr_id == RDS_IB_ACK_WR_ID) { if (ic->i_ack_queued + HZ/2 < jiffies) rds_ib_stats_inc(s_ib_tx_stalled); rds_ib_ack_send_complete(ic); continue; } oldest = rds_ib_ring_oldest(&ic->i_send_ring); completed = rds_ib_ring_completed(&ic->i_send_ring, wc.wr_id, oldest); for (i = 0; i < completed; i++) { send = &ic->i_sends[oldest]; /* In the error case, wc.opcode sometimes contains garbage */ switch (send->s_wr.opcode) { case IB_WR_SEND: if (send->s_rm) rds_ib_send_unmap_rm(ic, send, wc.status); break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_READ: /* Nothing to be done - the SG list will be unmapped * when the SEND completes. */ break; default: if (printk_ratelimit()) printk(KERN_NOTICE "RDS/IB: %s: unexpected opcode 0x%x in WR!\n", __func__, send->s_wr.opcode); break; } send->s_wr.opcode = 0xdead; send->s_wr.num_sge = 1; if (send->s_queued + HZ/2 < jiffies) rds_ib_stats_inc(s_ib_tx_stalled); /* If a RDMA operation produced an error, signal this right * away. If we don't, the subsequent SEND that goes with this * RDMA will be canceled with ERR_WFLUSH, and the application * never learn that the RDMA failed. */ if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) { struct rds_message *rm; rm = rds_send_get_message(conn, send->s_op); if (rm) rds_ib_send_rdma_complete(rm, wc.status); } oldest = (oldest + 1) % ic->i_send_ring.w_nr; } rds_ib_ring_free(&ic->i_send_ring, completed); if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) || test_bit(0, &conn->c_map_queued)) queue_delayed_work(rds_wq, &conn->c_send_w, 0); /* We expect errors as the qp is drained during shutdown */ if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) { rds_ib_conn_error(conn, "send completion on %pI4 " "had status %u, disconnecting and reconnecting\n", &conn->c_faddr, wc.status); } } }
/* * Create a QP */ static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr, struct rds_iw_device *rds_iwdev, struct rds_iw_work_ring *send_ring, void (*send_cq_handler)(struct ib_cq *, void *), struct rds_iw_work_ring *recv_ring, void (*recv_cq_handler)(struct ib_cq *, void *), void *context) { struct ib_device *dev = rds_iwdev->dev; unsigned int send_size, recv_size; int ret; /* The offset of 1 is to accommodate the additional ACK WR. */ send_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_send_wr + 1); recv_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_recv_wr + 1); rds_iw_ring_resize(send_ring, send_size - 1); rds_iw_ring_resize(recv_ring, recv_size - 1); memset(attr, 0, sizeof(*attr)); attr->event_handler = rds_iw_qp_event_handler; attr->qp_context = context; attr->cap.max_send_wr = send_size; attr->cap.max_recv_wr = recv_size; attr->cap.max_send_sge = rds_iwdev->max_sge; attr->cap.max_recv_sge = RDS_IW_RECV_SGE; attr->sq_sig_type = IB_SIGNAL_REQ_WR; attr->qp_type = IB_QPT_RC; attr->send_cq = ib_create_cq(dev, send_cq_handler, rds_iw_cq_event_handler, context, send_size, 0); if (IS_ERR(attr->send_cq)) { ret = PTR_ERR(attr->send_cq); attr->send_cq = NULL; rdsdebug("ib_create_cq send failed: %d\n", ret); goto out; } attr->recv_cq = ib_create_cq(dev, recv_cq_handler, rds_iw_cq_event_handler, context, recv_size, 0); if (IS_ERR(attr->recv_cq)) { ret = PTR_ERR(attr->recv_cq); attr->recv_cq = NULL; rdsdebug("ib_create_cq send failed: %d\n", ret); goto out; } ret = ib_req_notify_cq(attr->send_cq, IB_CQ_NEXT_COMP); if (ret) { rdsdebug("ib_req_notify_cq send failed: %d\n", ret); goto out; } ret = ib_req_notify_cq(attr->recv_cq, IB_CQ_SOLICITED); if (ret) { rdsdebug("ib_req_notify_cq recv failed: %d\n", ret); goto out; } out: if (ret) { if (attr->send_cq) ib_destroy_cq(attr->send_cq); if (attr->recv_cq) ib_destroy_cq(attr->recv_cq); } return ret; }
/* * Send Queue Completion Handler - potentially called on interrupt context. * * Note that caller must hold a transport reference. */ static void sq_cq_reap(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt = NULL; struct ib_wc wc; struct ib_cq *cq = xprt->sc_sq_cq; int ret; if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; xprt = ctxt->xprt; svc_rdma_unmap_dma(ctxt); if (wc.status != IB_WC_SUCCESS) /* Close the transport */ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); /* Decrement used SQ WR count */ atomic_dec(&xprt->sc_sq_count); wake_up(&xprt->sc_send_wait); switch (ctxt->wr_op) { case IB_WR_SEND: svc_rdma_put_context(ctxt, 1); break; case IB_WR_RDMA_WRITE: svc_rdma_put_context(ctxt, 0); break; case IB_WR_RDMA_READ: if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; BUG_ON(!read_hdr); spin_lock_bh(&xprt->sc_rq_dto_lock); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); list_add_tail(&read_hdr->dto_q, &xprt->sc_read_complete_q); spin_unlock_bh(&xprt->sc_rq_dto_lock); svc_xprt_enqueue(&xprt->sc_xprt); } svc_rdma_put_context(ctxt, 0); break; default: printk(KERN_ERR "svcrdma: unexpected completion type, " "opcode=%d, status=%d\n", wc.opcode, wc.status); break; } svc_xprt_put(&xprt->sc_xprt); } if (ctxt) atomic_inc(&rdma_stat_sq_prod); }
/* * Create unconnected endpoint. */ int rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) { struct ib_device_attr *devattr = &ia->ri_devattr; struct ib_cq *sendcq, *recvcq; struct ib_cq_init_attr cq_attr = {}; unsigned int max_qp_wr; int rc, err; if (devattr->max_sge < RPCRDMA_MAX_IOVS) { dprintk("RPC: %s: insufficient sge's available\n", __func__); return -ENOMEM; } if (devattr->max_qp_wr <= RPCRDMA_BACKWARD_WRS) { dprintk("RPC: %s: insufficient wqe's available\n", __func__); return -ENOMEM; } max_qp_wr = devattr->max_qp_wr - RPCRDMA_BACKWARD_WRS; /* check provider's send/recv wr limits */ if (cdata->max_requests > max_qp_wr) cdata->max_requests = max_qp_wr; ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; ep->rep_attr.qp_context = ep; ep->rep_attr.srq = NULL; ep->rep_attr.cap.max_send_wr = cdata->max_requests; ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; rc = ia->ri_ops->ro_open(ia, ep, cdata); if (rc) return rc; ep->rep_attr.cap.max_recv_wr = cdata->max_requests; ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS; ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_inline_data = 0; ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; ep->rep_attr.qp_type = IB_QPT_RC; ep->rep_attr.port_num = ~0; dprintk("RPC: %s: requested max: dtos: send %d recv %d; " "iovs: send %d recv %d\n", __func__, ep->rep_attr.cap.max_send_wr, ep->rep_attr.cap.max_recv_wr, ep->rep_attr.cap.max_send_sge, ep->rep_attr.cap.max_recv_sge); /* set trigger for requesting send completion */ ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; /* always signal? */ INIT_CQCOUNT(ep); init_waitqueue_head(&ep->rep_connect_wait); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1; sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall, rpcrdma_cq_async_error_upcall, NULL, &cq_attr); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); dprintk("RPC: %s: failed to create send CQ: %i\n", __func__, rc); goto out1; } rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP); if (rc) { dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", __func__, rc); goto out2; } cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1; recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall, rpcrdma_cq_async_error_upcall, NULL, &cq_attr); if (IS_ERR(recvcq)) { rc = PTR_ERR(recvcq); dprintk("RPC: %s: failed to create recv CQ: %i\n", __func__, rc); goto out2; } rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP); if (rc) { dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", __func__, rc); ib_destroy_cq(recvcq); goto out2; } ep->rep_attr.send_cq = sendcq; ep->rep_attr.recv_cq = recvcq; /* Initialize cma parameters */ /* RPC/RDMA does not use private data */ ep->rep_remote_cma.private_data = NULL; ep->rep_remote_cma.private_data_len = 0; /* Client offers RDMA Read but does not initiate */ ep->rep_remote_cma.initiator_depth = 0; if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */ ep->rep_remote_cma.responder_resources = 32; else ep->rep_remote_cma.responder_resources = devattr->max_qp_rd_atom; ep->rep_remote_cma.retry_count = 7; ep->rep_remote_cma.flow_control = 0; ep->rep_remote_cma.rnr_retry_count = 0; return 0; out2: err = ib_destroy_cq(sendcq); if (err) dprintk("RPC: %s: ib_destroy_cq returned %i\n", __func__, err); out1: if (ia->ri_dma_mr) ib_dereg_mr(ia->ri_dma_mr); return rc; }
static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma) { struct sockaddr_in cl = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), }; int port, err = -EINVAL; for (port = P9_DEF_MAX_RESVPORT; port >= P9_DEF_MIN_RESVPORT; port--) { cl.sin_port = htons((ushort)port); err = rdma_bind_addr(rdma->cm_id, (struct sockaddr *)&cl); if (err != -EADDRINUSE) break; } return err; } /** * trans_create_rdma - Transport method for creating atransport instance * @client: client instance * @addr: IP address string * @args: Mount options string */ static int rdma_create_trans(struct p9_client *client, const char *addr, char *args) { int err; struct p9_rdma_opts opts; struct p9_trans_rdma *rdma; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; struct ib_cq_init_attr cq_attr = {}; /* Parse the transport specific mount options */ err = parse_opts(args, &opts); if (err < 0) return err; /* Create and initialize the RDMA transport structure */ rdma = alloc_rdma(&opts); if (!rdma) return -ENOMEM; /* Create the RDMA CM ID */ rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(rdma->cm_id)) goto error; /* Associate the client with the transport */ client->trans = rdma; /* Bind to a privileged port if we need to */ if (opts.privport) { err = p9_rdma_bind_privport(rdma); if (err < 0) { pr_err("%s (%d): problem binding to privport: %d\n", __func__, task_pid_nr(current), -err); goto error; } } /* Resolve the server's address */ rdma->addr.sin_family = AF_INET; rdma->addr.sin_addr.s_addr = in_aton(addr); rdma->addr.sin_port = htons(opts.port); err = rdma_resolve_addr(rdma->cm_id, NULL, (struct sockaddr *)&rdma->addr, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) goto error; /* Resolve the route to the server */ err = rdma_resolve_route(rdma->cm_id, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) goto error; /* Query the device attributes */ err = ib_query_device(rdma->cm_id->device, &devattr); if (err) goto error; /* Create the Completion Queue */ cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1; rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, cq_event_handler, client, &cq_attr); if (IS_ERR(rdma->cq)) goto error; ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); /* Create the Protection Domain */ rdma->pd = ib_alloc_pd(rdma->cm_id->device); if (IS_ERR(rdma->pd)) goto error; /* Cache the DMA lkey in the transport */ rdma->dma_mr = NULL; if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) rdma->lkey = rdma->cm_id->device->local_dma_lkey; else { rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(rdma->dma_mr)) goto error; rdma->lkey = rdma->dma_mr->lkey; } /* Create the Queue Pair */ memset(&qp_attr, 0, sizeof qp_attr); qp_attr.event_handler = qp_event_handler; qp_attr.qp_context = client; qp_attr.cap.max_send_wr = opts.sq_depth; qp_attr.cap.max_recv_wr = opts.rq_depth; qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = rdma->cq; qp_attr.recv_cq = rdma->cq; err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); if (err) goto error; rdma->qp = rdma->cm_id->qp; /* Request a connection */ memset(&conn_param, 0, sizeof(conn_param)); conn_param.private_data = NULL; conn_param.private_data_len = 0; conn_param.responder_resources = P9_RDMA_IRD; conn_param.initiator_depth = P9_RDMA_ORD; err = rdma_connect(rdma->cm_id, &conn_param); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_CONNECTED)) goto error; client->status = Connected; return 0; error: rdma_destroy_trans(rdma); return -ENOTCONN; }
/* * This needs to be very careful to not leave IS_ERR pointers around for * cleanup to trip over. */ static int rds_ib_setup_qp(struct rds_connection *conn) { struct rds_ib_connection *ic = conn->c_transport_data; struct ib_device *dev = ic->i_cm_id->device; struct ib_qp_init_attr attr; struct rds_ib_device *rds_ibdev; int ret; /* rds_ib_add_one creates a rds_ib_device object per IB device, * and allocates a protection domain, memory range and FMR pool * for each. If that fails for any reason, it will not register * the rds_ibdev at all. */ rds_ibdev = ib_get_client_data(dev, &rds_ib_client); if (rds_ibdev == NULL) { if (printk_ratelimit()) printk(KERN_NOTICE "RDS/IB: No client_data for device %s\n", dev->name); return -EOPNOTSUPP; } if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1) rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1); /* Protection domain and memory range */ ic->i_pd = rds_ibdev->pd; ic->i_mr = rds_ibdev->mr; ic->i_send_cq = ib_create_cq(dev, rds_ib_send_cq_comp_handler, rds_ib_cq_event_handler, conn, ic->i_send_ring.w_nr + 1, 0); if (IS_ERR(ic->i_send_cq)) { ret = PTR_ERR(ic->i_send_cq); ic->i_send_cq = NULL; rdsdebug("ib_create_cq send failed: %d\n", ret); goto out; } ic->i_recv_cq = ib_create_cq(dev, rds_ib_recv_cq_comp_handler, rds_ib_cq_event_handler, conn, ic->i_recv_ring.w_nr, 0); if (IS_ERR(ic->i_recv_cq)) { ret = PTR_ERR(ic->i_recv_cq); ic->i_recv_cq = NULL; rdsdebug("ib_create_cq recv failed: %d\n", ret); goto out; } ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); if (ret) { rdsdebug("ib_req_notify_cq send failed: %d\n", ret); goto out; } ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); if (ret) { rdsdebug("ib_req_notify_cq recv failed: %d\n", ret); goto out; } /* XXX negotiate max send/recv with remote? */ memset(&attr, 0, sizeof(attr)); attr.event_handler = rds_ib_qp_event_handler; attr.qp_context = conn; /* + 1 to allow for the single ack message */ attr.cap.max_send_wr = ic->i_send_ring.w_nr + 1; attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1; attr.cap.max_send_sge = rds_ibdev->max_sge; attr.cap.max_recv_sge = RDS_IB_RECV_SGE; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; attr.send_cq = ic->i_send_cq; attr.recv_cq = ic->i_recv_cq; /* * XXX this can fail if max_*_wr is too large? Are we supposed * to back off until we get a value that the hardware can support? */ ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); if (ret) { rdsdebug("rdma_create_qp failed: %d\n", ret); goto out; } ic->i_send_hdrs = ib_dma_alloc_coherent(dev, ic->i_send_ring.w_nr * sizeof(struct rds_header), &ic->i_send_hdrs_dma, GFP_KERNEL); if (ic->i_send_hdrs == NULL) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent send failed\n"); goto out; } ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, ic->i_recv_ring.w_nr * sizeof(struct rds_header), &ic->i_recv_hdrs_dma, GFP_KERNEL); if (ic->i_recv_hdrs == NULL) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent recv failed\n"); goto out; } ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), &ic->i_ack_dma, GFP_KERNEL); if (ic->i_ack == NULL) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent ack failed\n"); goto out; } ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); if (ic->i_sends == NULL) { ret = -ENOMEM; rdsdebug("send allocation failed\n"); goto out; } memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work)); if (ic->i_recvs == NULL) { ret = -ENOMEM; rdsdebug("recv allocation failed\n"); goto out; } memset(ic->i_recvs, 0, ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work)); rds_ib_recv_init_ack(ic); rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr, ic->i_send_cq, ic->i_recv_cq); out: return ret; }
/* * This needs to be very careful to not leave IS_ERR pointers around for * cleanup to trip over. */ static int rds_ib_setup_qp(struct rds_connection *conn) { struct rds_ib_connection *ic = conn->c_transport_data; struct ib_device *dev = ic->i_cm_id->device; struct ib_qp_init_attr attr; struct ib_cq_init_attr cq_attr = {}; struct rds_ib_device *rds_ibdev; int ret, fr_queue_space; /* * It's normal to see a null device if an incoming connection races * with device removal, so we don't print a warning. */ rds_ibdev = rds_ib_get_client_data(dev); if (!rds_ibdev) return -EOPNOTSUPP; /* The fr_queue_space is currently set to 512, to add extra space on * completion queue and send queue. This extra space is used for FRMR * registration and invalidation work requests */ fr_queue_space = (rds_ibdev->use_fastreg ? RDS_IB_DEFAULT_FR_WR : 0); /* add the conn now so that connection establishment has the dev */ rds_ib_add_conn(rds_ibdev, conn); if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1) rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1); /* Protection domain and memory range */ ic->i_pd = rds_ibdev->pd; cq_attr.cqe = ic->i_send_ring.w_nr + fr_queue_space + 1; ic->i_send_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_send, rds_ib_cq_event_handler, conn, &cq_attr); if (IS_ERR(ic->i_send_cq)) { ret = PTR_ERR(ic->i_send_cq); ic->i_send_cq = NULL; rdsdebug("ib_create_cq send failed: %d\n", ret); goto out; } cq_attr.cqe = ic->i_recv_ring.w_nr; ic->i_recv_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_recv, rds_ib_cq_event_handler, conn, &cq_attr); if (IS_ERR(ic->i_recv_cq)) { ret = PTR_ERR(ic->i_recv_cq); ic->i_recv_cq = NULL; rdsdebug("ib_create_cq recv failed: %d\n", ret); goto out; } ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); if (ret) { rdsdebug("ib_req_notify_cq send failed: %d\n", ret); goto out; } ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); if (ret) { rdsdebug("ib_req_notify_cq recv failed: %d\n", ret); goto out; } /* XXX negotiate max send/recv with remote? */ memset(&attr, 0, sizeof(attr)); attr.event_handler = rds_ib_qp_event_handler; attr.qp_context = conn; /* + 1 to allow for the single ack message */ attr.cap.max_send_wr = ic->i_send_ring.w_nr + fr_queue_space + 1; attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1; attr.cap.max_send_sge = rds_ibdev->max_sge; attr.cap.max_recv_sge = RDS_IB_RECV_SGE; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; attr.send_cq = ic->i_send_cq; attr.recv_cq = ic->i_recv_cq; atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR); /* * XXX this can fail if max_*_wr is too large? Are we supposed * to back off until we get a value that the hardware can support? */ ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); if (ret) { rdsdebug("rdma_create_qp failed: %d\n", ret); goto out; } ic->i_send_hdrs = ib_dma_alloc_coherent(dev, ic->i_send_ring.w_nr * sizeof(struct rds_header), &ic->i_send_hdrs_dma, GFP_KERNEL); if (!ic->i_send_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent send failed\n"); goto out; } ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, ic->i_recv_ring.w_nr * sizeof(struct rds_header), &ic->i_recv_hdrs_dma, GFP_KERNEL); if (!ic->i_recv_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent recv failed\n"); goto out; } ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), &ic->i_ack_dma, GFP_KERNEL); if (!ic->i_ack) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent ack failed\n"); goto out; } ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work), ibdev_to_node(dev)); if (!ic->i_sends) { ret = -ENOMEM; rdsdebug("send allocation failed\n"); goto out; } ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work), ibdev_to_node(dev)); if (!ic->i_recvs) { ret = -ENOMEM; rdsdebug("recv allocation failed\n"); goto out; } rds_ib_recv_init_ack(ic); rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd, ic->i_send_cq, ic->i_recv_cq); out: rds_ib_dev_put(rds_ibdev); return ret; }
/** * trans_create_rdma - Transport method for creating atransport instance * @client: client instance * @addr: IP address string * @args: Mount options string */ static int rdma_create_trans(struct p9_client *client, const char *addr, char *args) { int err; struct p9_rdma_opts opts; struct p9_trans_rdma *rdma; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; /* Parse the transport specific mount options */ err = parse_opts(args, &opts); if (err < 0) return err; /* Create and initialize the RDMA transport structure */ rdma = alloc_rdma(&opts); if (!rdma) return -ENOMEM; /* Create the RDMA CM ID */ rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP); if (IS_ERR(rdma->cm_id)) goto error; /* Associate the client with the transport */ client->trans = rdma; /* Resolve the server's address */ rdma->addr.sin_family = AF_INET; rdma->addr.sin_addr.s_addr = in_aton(addr); rdma->addr.sin_port = htons(opts.port); err = rdma_resolve_addr(rdma->cm_id, NULL, (struct sockaddr *)&rdma->addr, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) goto error; /* Resolve the route to the server */ err = rdma_resolve_route(rdma->cm_id, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) goto error; /* Query the device attributes */ err = ib_query_device(rdma->cm_id->device, &devattr); if (err) goto error; /* Create the Completion Queue */ rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, cq_event_handler, client, opts.sq_depth + opts.rq_depth + 1, 0); if (IS_ERR(rdma->cq)) goto error; ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); /* Create the Protection Domain */ rdma->pd = ib_alloc_pd(rdma->cm_id->device); if (IS_ERR(rdma->pd)) goto error; /* Cache the DMA lkey in the transport */ rdma->dma_mr = NULL; if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) rdma->lkey = rdma->cm_id->device->local_dma_lkey; else { rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(rdma->dma_mr)) goto error; rdma->lkey = rdma->dma_mr->lkey; } /* Create the Queue Pair */ memset(&qp_attr, 0, sizeof qp_attr); qp_attr.event_handler = qp_event_handler; qp_attr.qp_context = client; qp_attr.cap.max_send_wr = opts.sq_depth; qp_attr.cap.max_recv_wr = opts.rq_depth; qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = rdma->cq; qp_attr.recv_cq = rdma->cq; err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); if (err) goto error; rdma->qp = rdma->cm_id->qp; /* Request a connection */ memset(&conn_param, 0, sizeof(conn_param)); conn_param.private_data = NULL; conn_param.private_data_len = 0; conn_param.responder_resources = P9_RDMA_IRD; conn_param.initiator_depth = P9_RDMA_ORD; err = rdma_connect(rdma->cm_id, &conn_param); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_CONNECTED)) goto error; client->status = Connected; return 0; error: rdma_destroy_trans(rdma); return -ENOTCONN; }
/* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */ static void isert_cq_comp_work_cb(void *ctx) { struct isert_cq *cq_desc = ctx; #else static void isert_cq_comp_work_cb(struct work_struct *work) { struct isert_cq *cq_desc = container_of(work, struct isert_cq, cq_comp_work); #endif int ret; TRACE_ENTRY(); ret = isert_poll_cq(cq_desc); if (unlikely(ret < 0)) { /* poll error */ pr_err("ib_poll_cq failed\n"); goto out; } ib_req_notify_cq(cq_desc->cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); /* * not all HCAs support IB_CQ_REPORT_MISSED_EVENTS, * so we need to make sure we don't miss any events between * last call to ib_poll_cq() and ib_req_notify_cq() */ isert_poll_cq(cq_desc); out: TRACE_EXIT(); return; } static void isert_cq_comp_handler(struct ib_cq *cq, void *context) { struct isert_cq *cq_desc = context; #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) queue_work(cq_desc->cq_workqueue, &cq_desc->cq_comp_work); #else queue_work_on(smp_processor_id(), cq_desc->cq_workqueue, &cq_desc->cq_comp_work); #endif } static const char *ib_event_type_str(enum ib_event_type ev_type) { switch (ev_type) { case IB_EVENT_COMM_EST: return "COMM_EST"; case IB_EVENT_QP_FATAL: return "QP_FATAL"; case IB_EVENT_QP_REQ_ERR: return "QP_REQ_ERR"; case IB_EVENT_QP_ACCESS_ERR: return "QP_ACCESS_ERR"; case IB_EVENT_SQ_DRAINED: return "SQ_DRAINED"; case IB_EVENT_PATH_MIG: return "PATH_MIG"; case IB_EVENT_PATH_MIG_ERR: return "PATH_MIG_ERR"; case IB_EVENT_QP_LAST_WQE_REACHED: return "QP_LAST_WQE_REACHED"; case IB_EVENT_CQ_ERR: return "CQ_ERR"; case IB_EVENT_SRQ_ERR: return "SRQ_ERR"; case IB_EVENT_SRQ_LIMIT_REACHED: return "SRQ_LIMIT_REACHED"; case IB_EVENT_PORT_ACTIVE: return "PORT_ACTIVE"; case IB_EVENT_PORT_ERR: return "PORT_ERR"; case IB_EVENT_LID_CHANGE: return "LID_CHANGE"; case IB_EVENT_PKEY_CHANGE: return "PKEY_CHANGE"; case IB_EVENT_SM_CHANGE: return "SM_CHANGE"; case IB_EVENT_CLIENT_REREGISTER: return "CLIENT_REREGISTER"; case IB_EVENT_DEVICE_FATAL: return "DEVICE_FATAL"; default: return "UNKNOWN"; } } static void isert_async_evt_handler(struct ib_event *async_ev, void *context) { struct isert_cq *cq = context; struct isert_device *isert_dev = cq->dev; struct ib_device *ib_dev = isert_dev->ib_dev; char *dev_name = ib_dev->name; enum ib_event_type ev_type = async_ev->event; struct isert_connection *isert_conn; TRACE_ENTRY(); switch (ev_type) { case IB_EVENT_COMM_EST: isert_conn = async_ev->element.qp->qp_context; pr_info("conn:0x%p cm_id:0x%p dev:%s, QP evt: %s\n", isert_conn, isert_conn->cm_id, dev_name, ib_event_type_str(IB_EVENT_COMM_EST)); /* force "connection established" event */ rdma_notify(isert_conn->cm_id, IB_EVENT_COMM_EST); break; /* rest of QP-related events */ case IB_EVENT_QP_FATAL: case IB_EVENT_QP_REQ_ERR: case IB_EVENT_QP_ACCESS_ERR: case IB_EVENT_SQ_DRAINED: case IB_EVENT_PATH_MIG: case IB_EVENT_PATH_MIG_ERR: case IB_EVENT_QP_LAST_WQE_REACHED: isert_conn = async_ev->element.qp->qp_context; pr_err("conn:0x%p cm_id:0x%p dev:%s, QP evt: %s\n", isert_conn, isert_conn->cm_id, dev_name, ib_event_type_str(ev_type)); break; /* CQ-related events */ case IB_EVENT_CQ_ERR: pr_err("dev:%s CQ evt: %s\n", dev_name, ib_event_type_str(ev_type)); break; /* SRQ events */ case IB_EVENT_SRQ_ERR: case IB_EVENT_SRQ_LIMIT_REACHED: pr_err("dev:%s SRQ evt: %s\n", dev_name, ib_event_type_str(ev_type)); break; /* Port events */ case IB_EVENT_PORT_ACTIVE: case IB_EVENT_PORT_ERR: case IB_EVENT_LID_CHANGE: case IB_EVENT_PKEY_CHANGE: case IB_EVENT_SM_CHANGE: case IB_EVENT_CLIENT_REREGISTER: pr_err("dev:%s port:%d evt: %s\n", dev_name, async_ev->element.port_num, ib_event_type_str(ev_type)); break; /* HCA events */ case IB_EVENT_DEVICE_FATAL: pr_err("dev:%s HCA evt: %s\n", dev_name, ib_event_type_str(ev_type)); break; default: pr_err("dev:%s evt: %s\n", dev_name, ib_event_type_str(ev_type)); break; } TRACE_EXIT(); } static struct isert_device *isert_device_create(struct ib_device *ib_dev) { struct isert_device *isert_dev; struct ib_device_attr *dev_attr; int cqe_num, err; struct ib_pd *pd; struct ib_mr *mr; struct ib_cq *cq; char wq_name[64]; int i, j; TRACE_ENTRY(); isert_dev = kzalloc(sizeof(*isert_dev), GFP_KERNEL); if (unlikely(isert_dev == NULL)) { pr_err("Failed to allocate iser dev\n"); err = -ENOMEM; goto out; } dev_attr = &isert_dev->device_attr; err = ib_query_device(ib_dev, dev_attr); if (unlikely(err)) { pr_err("Failed to query device, err: %d\n", err); goto fail_query; } isert_dev->num_cqs = min_t(int, num_online_cpus(), ib_dev->num_comp_vectors); isert_dev->cq_qps = kzalloc(sizeof(*isert_dev->cq_qps) * isert_dev->num_cqs, GFP_KERNEL); if (unlikely(isert_dev->cq_qps == NULL)) { pr_err("Failed to allocate iser cq_qps\n"); err = -ENOMEM; goto fail_cq_qps; } isert_dev->cq_desc = vmalloc(sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs); if (unlikely(isert_dev->cq_desc == NULL)) { pr_err("Failed to allocate %ld bytes for iser cq_desc\n", sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs); err = -ENOMEM; goto fail_alloc_cq_desc; } pd = ib_alloc_pd(ib_dev); if (unlikely(IS_ERR(pd))) { err = PTR_ERR(pd); pr_err("Failed to alloc iser dev pd, err:%d\n", err); goto fail_pd; } mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); if (unlikely(IS_ERR(mr))) { err = PTR_ERR(mr); pr_err("Failed to get dma mr, err: %d\n", err); goto fail_mr; } cqe_num = min(isert_dev->device_attr.max_cqe, ISER_CQ_ENTRIES); cqe_num = cqe_num / isert_dev->num_cqs; #ifdef CONFIG_SCST_EXTRACHECKS if (isert_dev->device_attr.max_cqe == 0) pr_err("Zero max_cqe encountered: you may have a compilation problem\n"); #endif for (i = 0; i < isert_dev->num_cqs; ++i) { struct isert_cq *cq_desc = &isert_dev->cq_desc[i]; cq_desc->dev = isert_dev; cq_desc->idx = i; #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb, NULL); #else INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb); #endif snprintf(wq_name, sizeof(wq_name), "isert_cq_%p", cq_desc); #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) cq_desc->cq_workqueue = create_singlethread_workqueue(wq_name); #else #if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 36) cq_desc->cq_workqueue = alloc_workqueue(wq_name, WQ_CPU_INTENSIVE| WQ_RESCUER, 1); #else cq_desc->cq_workqueue = alloc_workqueue(wq_name, WQ_CPU_INTENSIVE| WQ_MEM_RECLAIM, 1); #endif #endif if (unlikely(!cq_desc->cq_workqueue)) { pr_err("Failed to alloc iser cq work queue for dev:%s\n", ib_dev->name); err = -ENOMEM; goto fail_cq; } #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0) cq = ib_create_cq(ib_dev, isert_cq_comp_handler, isert_async_evt_handler, cq_desc, /* context */ cqe_num, i); /* completion vector */ #else { struct ib_cq_init_attr ia = { .cqe = cqe_num, .comp_vector = i, }; cq = ib_create_cq(ib_dev, isert_cq_comp_handler, isert_async_evt_handler, cq_desc, /* context */ &ia); } #endif if (unlikely(IS_ERR(cq))) { cq_desc->cq = NULL; err = PTR_ERR(cq); pr_err("Failed to create iser dev cq, err:%d\n", err); goto fail_cq; } cq_desc->cq = cq; err = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); if (unlikely(err)) { pr_err("Failed to request notify cq, err: %d\n", err); goto fail_cq; } } isert_dev->ib_dev = ib_dev; isert_dev->pd = pd; isert_dev->mr = mr; INIT_LIST_HEAD(&isert_dev->conn_list); lockdep_assert_held(&dev_list_mutex); isert_dev_list_add(isert_dev); pr_info("iser created device:%p\n", isert_dev); return isert_dev; fail_cq: for (j = 0; j <= i; ++j) { if (isert_dev->cq_desc[j].cq) ib_destroy_cq(isert_dev->cq_desc[j].cq); if (isert_dev->cq_desc[j].cq_workqueue) destroy_workqueue(isert_dev->cq_desc[j].cq_workqueue); } ib_dereg_mr(mr); fail_mr: ib_dealloc_pd(pd); fail_pd: vfree(isert_dev->cq_desc); fail_alloc_cq_desc: kfree(isert_dev->cq_qps); fail_cq_qps: fail_query: kfree(isert_dev); out: TRACE_EXIT_RES(err); return ERR_PTR(err); } static void isert_device_release(struct isert_device *isert_dev) { int err, i; TRACE_ENTRY(); lockdep_assert_held(&dev_list_mutex); isert_dev_list_remove(isert_dev); /* remove from global list */ for (i = 0; i < isert_dev->num_cqs; ++i) { struct isert_cq *cq_desc = &isert_dev->cq_desc[i]; #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22) /* * cancel_work_sync() was introduced in 2.6.22. We can * only wait until all scheduled work is done. */ flush_workqueue(cq_desc->cq_workqueue); #else cancel_work_sync(&cq_desc->cq_comp_work); #endif err = ib_destroy_cq(cq_desc->cq); if (unlikely(err)) pr_err("Failed to destroy cq, err:%d\n", err); destroy_workqueue(cq_desc->cq_workqueue); } err = ib_dereg_mr(isert_dev->mr); if (unlikely(err)) pr_err("Failed to destroy mr, err:%d\n", err); err = ib_dealloc_pd(isert_dev->pd); if (unlikely(err)) pr_err("Failed to destroy pd, err:%d\n", err); vfree(isert_dev->cq_desc); isert_dev->cq_desc = NULL; kfree(isert_dev->cq_qps); isert_dev->cq_qps = NULL; kfree(isert_dev); TRACE_EXIT(); }
/* * Create unconnected endpoint. */ int rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) { struct ib_device_attr devattr; struct ib_cq *sendcq, *recvcq; int rc, err; rc = ib_query_device(ia->ri_id->device, &devattr); if (rc) { dprintk("RPC: %s: ib_query_device failed %d\n", __func__, rc); return rc; } /* check provider's send/recv wr limits */ if (cdata->max_requests > devattr.max_qp_wr) cdata->max_requests = devattr.max_qp_wr; ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; ep->rep_attr.qp_context = ep; /* send_cq and recv_cq initialized below */ ep->rep_attr.srq = NULL; ep->rep_attr.cap.max_send_wr = cdata->max_requests; switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: { int depth = 7; /* Add room for frmr register and invalidate WRs. * 1. FRMR reg WR for head * 2. FRMR invalidate WR for head * 3. N FRMR reg WRs for pagelist * 4. N FRMR invalidate WRs for pagelist * 5. FRMR reg WR for tail * 6. FRMR invalidate WR for tail * 7. The RDMA_SEND WR */ /* Calculate N if the device max FRMR depth is smaller than * RPCRDMA_MAX_DATA_SEGS. */ if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { int delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth; do { depth += 2; /* FRMR reg + invalidate */ delta -= ia->ri_max_frmr_depth; } while (delta > 0); } ep->rep_attr.cap.max_send_wr *= depth; if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { cdata->max_requests = devattr.max_qp_wr / depth; if (!cdata->max_requests) return -EINVAL; ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth; } break; } default: break; } ep->rep_attr.cap.max_recv_wr = cdata->max_requests; ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_inline_data = 0; ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; ep->rep_attr.qp_type = IB_QPT_RC; ep->rep_attr.port_num = ~0; dprintk("RPC: %s: requested max: dtos: send %d recv %d; " "iovs: send %d recv %d\n", __func__, ep->rep_attr.cap.max_send_wr, ep->rep_attr.cap.max_recv_wr, ep->rep_attr.cap.max_send_sge, ep->rep_attr.cap.max_recv_sge); /* set trigger for requesting send completion */ ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; INIT_CQCOUNT(ep); ep->rep_ia = ia; init_waitqueue_head(&ep->rep_connect_wait); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall, rpcrdma_cq_async_error_upcall, ep, ep->rep_attr.cap.max_send_wr + 1, 0); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); dprintk("RPC: %s: failed to create send CQ: %i\n", __func__, rc); goto out1; } rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP); if (rc) { dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", __func__, rc); goto out2; } recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall, rpcrdma_cq_async_error_upcall, ep, ep->rep_attr.cap.max_recv_wr + 1, 0); if (IS_ERR(recvcq)) { rc = PTR_ERR(recvcq); dprintk("RPC: %s: failed to create recv CQ: %i\n", __func__, rc); goto out2; } rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP); if (rc) { dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", __func__, rc); ib_destroy_cq(recvcq); goto out2; } ep->rep_attr.send_cq = sendcq; ep->rep_attr.recv_cq = recvcq; /* Initialize cma parameters */ /* RPC/RDMA does not use private data */ ep->rep_remote_cma.private_data = NULL; ep->rep_remote_cma.private_data_len = 0; /* Client offers RDMA Read but does not initiate */ ep->rep_remote_cma.initiator_depth = 0; if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ ep->rep_remote_cma.responder_resources = 32; else ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; ep->rep_remote_cma.retry_count = 7; ep->rep_remote_cma.flow_control = 0; ep->rep_remote_cma.rnr_retry_count = 0; return 0; out2: err = ib_destroy_cq(sendcq); if (err) dprintk("RPC: %s: ib_destroy_cq returned %i\n", __func__, err); out1: return rc; }
/* * Create unconnected endpoint. */ int rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) { struct ib_device_attr devattr; int rc, err; rc = ib_query_device(ia->ri_id->device, &devattr); if (rc) { dprintk("RPC: %s: ib_query_device failed %d\n", __func__, rc); return rc; } /* check provider's send/recv wr limits */ if (cdata->max_requests > devattr.max_qp_wr) cdata->max_requests = devattr.max_qp_wr; ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; ep->rep_attr.qp_context = ep; /* send_cq and recv_cq initialized below */ ep->rep_attr.srq = NULL; ep->rep_attr.cap.max_send_wr = cdata->max_requests; switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: /* Add room for frmr register and invalidate WRs */ ep->rep_attr.cap.max_send_wr *= 3; if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) return -EINVAL; break; case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: /* Add room for mw_binds+unbinds - overkill! */ ep->rep_attr.cap.max_send_wr++; ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS); if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) return -EINVAL; break; default: break; } ep->rep_attr.cap.max_recv_wr = cdata->max_requests; ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_inline_data = 0; ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; ep->rep_attr.qp_type = IB_QPT_RC; ep->rep_attr.port_num = ~0; dprintk("RPC: %s: requested max: dtos: send %d recv %d; " "iovs: send %d recv %d\n", __func__, ep->rep_attr.cap.max_send_wr, ep->rep_attr.cap.max_recv_wr, ep->rep_attr.cap.max_send_sge, ep->rep_attr.cap.max_recv_sge); /* set trigger for requesting send completion */ ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/; switch (ia->ri_memreg_strategy) { case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: ep->rep_cqinit -= RPCRDMA_MAX_SEGS; break; default: break; } if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; INIT_CQCOUNT(ep); ep->rep_ia = ia; init_waitqueue_head(&ep->rep_connect_wait); /* * Create a single cq for receive dto and mw_bind (only ever * care about unbind, really). Send completions are suppressed. * Use single threaded tasklet upcalls to maintain ordering. */ ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall, rpcrdma_cq_async_error_upcall, NULL, ep->rep_attr.cap.max_recv_wr + ep->rep_attr.cap.max_send_wr + 1, 0); if (IS_ERR(ep->rep_cq)) { rc = PTR_ERR(ep->rep_cq); dprintk("RPC: %s: ib_create_cq failed: %i\n", __func__, rc); goto out1; } rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP); if (rc) { dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", __func__, rc); goto out2; } ep->rep_attr.send_cq = ep->rep_cq; ep->rep_attr.recv_cq = ep->rep_cq; /* Initialize cma parameters */ /* RPC/RDMA does not use private data */ ep->rep_remote_cma.private_data = NULL; ep->rep_remote_cma.private_data_len = 0; /* Client offers RDMA Read but does not initiate */ ep->rep_remote_cma.initiator_depth = 0; if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS) ep->rep_remote_cma.responder_resources = 0; else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ ep->rep_remote_cma.responder_resources = 32; else ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; ep->rep_remote_cma.retry_count = 7; ep->rep_remote_cma.flow_control = 0; ep->rep_remote_cma.rnr_retry_count = 0; return 0; out2: err = ib_destroy_cq(ep->rep_cq); if (err) dprintk("RPC: %s: ib_destroy_cq returned %i\n", __func__, err); out1: return rc; }