static void krping_free_buffers(struct krping_cb *cb) { DEBUG_LOG(PFX "krping_free_buffers called on cb %p\n", cb); #if 0 dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, recv_mapping), sizeof(cb->recv_buf), DMA_BIDIRECTIONAL); dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, send_mapping), sizeof(cb->send_buf), DMA_BIDIRECTIONAL); dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, rdma_mapping), cb->size, DMA_BIDIRECTIONAL); #endif contigfree(cb->rdma_buf, cb->size, M_DEVBUF); if (!cb->server || cb->wlat || cb->rlat || cb->bw) { #if 0 dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, start_mapping), cb->size, DMA_BIDIRECTIONAL); #endif contigfree(cb->start_buf, cb->size, M_DEVBUF); } if (cb->use_dmamr) ib_dereg_mr(cb->dma_mr); else { ib_dereg_mr(cb->send_mr); ib_dereg_mr(cb->recv_mr); ib_dereg_mr(cb->rdma_mr); if (!cb->server) ib_dereg_mr(cb->start_mr); } }
static int __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) { struct rpcrdma_frmr *f = &r->frmr; int rc; rc = ib_dereg_mr(f->fr_mr); if (rc) { pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", rc, r); return rc; } f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG, ia->ri_max_frmr_depth); if (IS_ERR(f->fr_mr)) { pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", PTR_ERR(f->fr_mr), r); return PTR_ERR(f->fr_mr); } dprintk("RPC: %s: recovered FRMR %p\n", __func__, f); f->fr_state = FRMR_IS_INVALID; return 0; }
/* * rpcrdma_ep_destroy * * Disconnect and destroy endpoint. After this, the only * valid operations on the ep are to free it (if dynamically * allocated) or re-create it. */ void rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { int rc; dprintk("RPC: %s: entering, connected is %d\n", __func__, ep->rep_connected); cancel_delayed_work_sync(&ep->rep_connect_worker); if (ia->ri_id->qp) { rpcrdma_ep_disconnect(ep, ia); rdma_destroy_qp(ia->ri_id); ia->ri_id->qp = NULL; } ib_free_cq(ep->rep_attr.recv_cq); ib_free_cq(ep->rep_attr.send_cq); if (ia->ri_dma_mr) { rc = ib_dereg_mr(ia->ri_dma_mr); dprintk("RPC: %s: ib_dereg_mr returned %i\n", __func__, rc); } }
static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) { struct ib_mr *mr; struct scatterlist *sg; struct svc_rdma_fastreg_mr *frmr; u32 num_sg; frmr = kmalloc(sizeof(*frmr), GFP_KERNEL); if (!frmr) goto err; num_sg = min_t(u32, RPCSVC_MAXPAGES, xprt->sc_frmr_pg_list_len); mr = ib_alloc_mr(xprt->sc_pd, IB_MR_TYPE_MEM_REG, num_sg); if (IS_ERR(mr)) goto err_free_frmr; sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL); if (!sg) goto err_free_mr; sg_init_table(sg, RPCSVC_MAXPAGES); frmr->mr = mr; frmr->sg = sg; INIT_LIST_HEAD(&frmr->frmr_list); return frmr; err_free_mr: ib_dereg_mr(mr); err_free_frmr: kfree(frmr); err: return ERR_PTR(-ENOMEM); }
static int __frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) { struct rpcrdma_frwr *frwr = &mr->frwr; int rc; rc = ib_dereg_mr(frwr->fr_mr); if (rc) { pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", rc, mr); return rc; } frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, ia->ri_max_frwr_depth); if (IS_ERR(frwr->fr_mr)) { pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", PTR_ERR(frwr->fr_mr), mr); return PTR_ERR(frwr->fr_mr); } dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr); frwr->fr_state = FRWR_IS_INVALID; return 0; }
static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) { struct ib_mr *mr; struct ib_fast_reg_page_list *pl; struct svc_rdma_fastreg_mr *frmr; frmr = kmalloc(sizeof(*frmr), GFP_KERNEL); if (!frmr) goto err; mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES); if (IS_ERR(mr)) goto err_free_frmr; pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, RPCSVC_MAXPAGES); if (IS_ERR(pl)) goto err_free_mr; frmr->mr = mr; frmr->page_list = pl; INIT_LIST_HEAD(&frmr->frmr_list); return frmr; err_free_mr: ib_dereg_mr(mr); err_free_frmr: kfree(frmr); err: return ERR_PTR(-ENOMEM); }
static int frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) { unsigned int depth = ia->ri_max_frmr_depth; struct rpcrdma_frmr *f = &r->frmr; int rc; f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG, depth); if (IS_ERR(f->fr_mr)) goto out_mr_err; r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL); if (!r->mw_sg) goto out_list_err; sg_init_table(r->mw_sg, depth); init_completion(&f->fr_linv_done); return 0; out_mr_err: rc = PTR_ERR(f->fr_mr); dprintk("RPC: %s: ib_alloc_mr status %i\n", __func__, rc); return rc; out_list_err: rc = -ENOMEM; dprintk("RPC: %s: sg allocation failure\n", __func__); ib_dereg_mr(f->fr_mr); return rc; }
static void ib_sock_mem_fini_common(struct IB_SOCK *sock) { if (sock->is_mem.ism_mr) ib_dereg_mr(sock->is_mem.ism_mr); if (sock->is_mem.ism_pd) ib_dealloc_pd(sock->is_mem.ism_pd); }
/** * iser_create_device_ib_res - creates Protection Domain (PD), Completion * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with * the adapator. * * returns 0 on success, -1 on failure */ static int iser_create_device_ib_res(struct iser_device *device) { device->pd = ib_alloc_pd(device->ib_device); if (IS_ERR(device->pd)) goto pd_err; device->rx_cq = ib_create_cq(device->ib_device, iser_cq_callback, iser_cq_event_callback, (void *)device, ISER_MAX_RX_CQ_LEN, 0); if (IS_ERR(device->rx_cq)) goto rx_cq_err; device->tx_cq = ib_create_cq(device->ib_device, NULL, iser_cq_event_callback, (void *)device, ISER_MAX_TX_CQ_LEN, 0); if (IS_ERR(device->tx_cq)) goto tx_cq_err; if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP)) goto cq_arm_err; tasklet_init(&device->cq_tasklet, iser_cq_tasklet_fn, (unsigned long)device); device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); if (IS_ERR(device->mr)) goto dma_mr_err; INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, iser_event_handler); if (ib_register_event_handler(&device->event_handler)) goto handler_err; return 0; handler_err: ib_dereg_mr(device->mr); dma_mr_err: tasklet_kill(&device->cq_tasklet); cq_arm_err: ib_destroy_cq(device->tx_cq); tx_cq_err: ib_destroy_cq(device->rx_cq); rx_cq_err: ib_dealloc_pd(device->pd); pd_err: iser_err("failed to allocate an IB resource\n"); return -1; }
void IBMemBlock::close() { if ( _memoryRegion ) { ib_api_status_t status = ib_dereg_mr( _memoryRegion ); _memoryRegion = 0; } _localKey = 0; _remoteKey = 0; }
static int fi_ib_mr_close(struct fid *fid) { int ret; struct fi_ib_mem_desc *md = (struct fi_ib_mem_desc *) fid; print_trace("in\n"); ret = ib_dereg_mr(md->mr); if (ret) print_err("ib_dereg_mr returned %d\n", ret); return ret; }
static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt) { struct svc_rdma_fastreg_mr *frmr; while (!list_empty(&xprt->sc_frmr_q)) { frmr = list_entry(xprt->sc_frmr_q.next, struct svc_rdma_fastreg_mr, frmr_list); list_del_init(&frmr->frmr_list); ib_dereg_mr(frmr->mr); ib_free_fast_reg_page_list(frmr->page_list); kfree(frmr); } }
static void iser_free_device_ib_res(struct iser_device *device) { BUG_ON(device->mr == NULL); tasklet_kill(&device->cq_tasklet); (void)ib_dereg_mr(device->mr); (void)ib_destroy_cq(device->cq); (void)ib_dealloc_pd(device->pd); device->mr = NULL; device->cq = NULL; device->pd = NULL; }
static void frwr_op_release_mr(struct rpcrdma_mw *r) { int rc; /* Ensure MW is not on any rl_registered list */ if (!list_empty(&r->mw_list)) list_del(&r->mw_list); rc = ib_dereg_mr(r->frmr.fr_mr); if (rc) pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", r, rc); kfree(r->mw_sg); kfree(r); }
static void isert_device_release(struct isert_device *isert_dev) { int err, i; TRACE_ENTRY(); lockdep_assert_held(&dev_list_mutex); isert_dev_list_remove(isert_dev); /* remove from global list */ for (i = 0; i < isert_dev->num_cqs; ++i) { struct isert_cq *cq_desc = &isert_dev->cq_desc[i]; #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22) /* * cancel_work_sync() was introduced in 2.6.22. We can * only wait until all scheduled work is done. */ flush_workqueue(cq_desc->cq_workqueue); #else cancel_work_sync(&cq_desc->cq_comp_work); #endif err = ib_destroy_cq(cq_desc->cq); if (unlikely(err)) pr_err("Failed to destroy cq, err:%d\n", err); destroy_workqueue(cq_desc->cq_workqueue); } err = ib_dereg_mr(isert_dev->mr); if (unlikely(err)) pr_err("Failed to destroy mr, err:%d\n", err); err = ib_dealloc_pd(isert_dev->pd); if (unlikely(err)) pr_err("Failed to destroy pd, err:%d\n", err); vfree(isert_dev->cq_desc); isert_dev->cq_desc = NULL; kfree(isert_dev->cq_qps); isert_dev->cq_qps = NULL; kfree(isert_dev); TRACE_EXIT(); }
static void verbs_remove_device (struct ib_device *dev) { printk (KERN_INFO "IB remove device called. Name = %s\n", dev->name); if (ah) ib_destroy_ah (ah); if (qp) ib_destroy_qp (qp); if (send_cq) ib_destroy_cq (send_cq); if (recv_cq) ib_destroy_cq (recv_cq); if (mr) ib_dereg_mr (mr); if (pd) ib_dealloc_pd (pd); }
void rds_iw_send_clear_ring(struct rds_iw_connection *ic) { struct rds_iw_send_work *send; u32 i; for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { BUG_ON(!send->s_mr); ib_dereg_mr(send->s_mr); BUG_ON(!send->s_page_list); ib_free_fast_reg_page_list(send->s_page_list); if (send->s_wr.opcode == 0xdead) continue; if (send->s_rm) rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); if (send->s_op) rds_iw_send_unmap_rdma(ic, send->s_op); } }
/** * ib_dealloc_pd - Deallocates a protection domain. * @pd: The protection domain to deallocate. * * It is an error to call this function while any resources in the pd still * exist. The caller is responsible to synchronously destroy them and * guarantee no new allocations will happen. */ void ib_dealloc_pd(struct ib_pd *pd) { int ret; if (pd->local_mr) { ret = ib_dereg_mr(pd->local_mr); WARN_ON(ret); pd->local_mr = NULL; } /* uverbs manipulates usecnt with proper locking, while the kabi requires the caller to guarantee we can't race here. */ WARN_ON(atomic_read(&pd->usecnt)); /* Making delalloc_pd a void return is a WIP, no driver should return an error here. */ ret = pd->device->dealloc_pd(pd); WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); }
/* * Clean up/close an IA. * o if event handles and PD have been initialized, free them. * o close the IA */ void rpcrdma_ia_close(struct rpcrdma_ia *ia) { int rc; dprintk("RPC: %s: entering\n", __func__); if (ia->ri_bind_mem != NULL) { rc = ib_dereg_mr(ia->ri_bind_mem); dprintk("RPC: %s: ib_dereg_mr returned %i\n", __func__, rc); } if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp) rdma_destroy_qp(ia->ri_id); if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { rc = ib_dealloc_pd(ia->ri_pd); dprintk("RPC: %s: ib_dealloc_pd returned %i\n", __func__, rc); } if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) rdma_destroy_id(ia->ri_id); }
static void rdma_destroy_trans(struct p9_trans_rdma *rdma) { if (!rdma) return; if (rdma->dma_mr && !IS_ERR(rdma->dma_mr)) ib_dereg_mr(rdma->dma_mr); if (rdma->qp && !IS_ERR(rdma->qp)) ib_destroy_qp(rdma->qp); if (rdma->pd && !IS_ERR(rdma->pd)) ib_dealloc_pd(rdma->pd); if (rdma->cq && !IS_ERR(rdma->cq)) ib_destroy_cq(rdma->cq); if (rdma->cm_id && !IS_ERR(rdma->cm_id)) rdma_destroy_id(rdma->cm_id); kfree(rdma); }
ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_dereg_mr cmd; struct ib_mr *mr; struct ib_umem_object *memobj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; mutex_lock(&ib_uverbs_idr_mutex); mr = idr_find(&ib_uverbs_mr_idr, cmd.mr_handle); if (!mr || mr->uobject->context != file->ucontext) goto out; memobj = container_of(mr->uobject, struct ib_umem_object, uobject); ret = ib_dereg_mr(mr); if (ret) goto out; idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle); mutex_lock(&file->mutex); list_del(&memobj->uobject.list); mutex_unlock(&file->mutex); ib_umem_release(file->device->ib_dev, &memobj->umem); kfree(memobj); out: mutex_unlock(&ib_uverbs_idr_mutex); return ret ? ret : in_len; }
void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed, unsigned long *unpinned, unsigned int goal) { struct rds_ib_mr *ibmr, *next; struct rds_ib_frmr *frmr; int ret = 0; unsigned int freed = *nfreed; /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ list_for_each_entry(ibmr, list, unmap_list) { if (ibmr->sg_dma_len) ret |= rds_ib_post_inv(ibmr); } if (ret) pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret); /* Now we can destroy the DMA mapping and unpin any pages */ list_for_each_entry_safe(ibmr, next, list, unmap_list) { *unpinned += ibmr->sg_len; frmr = &ibmr->u.frmr; __rds_ib_teardown_mr(ibmr); if (freed < goal || frmr->fr_state == FRMR_IS_STALE) { /* Don't de-allocate if the MR is not free yet */ if (frmr->fr_state == FRMR_IS_INUSE) continue; if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL) rds_ib_stats_inc(s_ib_rdma_mr_8k_free); else rds_ib_stats_inc(s_ib_rdma_mr_1m_free); list_del(&ibmr->unmap_list); if (frmr->mr) ib_dereg_mr(frmr->mr); kfree(ibmr); freed++; } }
static struct isert_device *isert_device_create(struct ib_device *ib_dev) { struct isert_device *isert_dev; struct ib_device_attr *dev_attr; int cqe_num, err; struct ib_pd *pd; struct ib_mr *mr; struct ib_cq *cq; char wq_name[64]; int i, j; TRACE_ENTRY(); isert_dev = kzalloc(sizeof(*isert_dev), GFP_KERNEL); if (unlikely(isert_dev == NULL)) { pr_err("Failed to allocate iser dev\n"); err = -ENOMEM; goto out; } dev_attr = &isert_dev->device_attr; err = ib_query_device(ib_dev, dev_attr); if (unlikely(err)) { pr_err("Failed to query device, err: %d\n", err); goto fail_query; } isert_dev->num_cqs = min_t(int, num_online_cpus(), ib_dev->num_comp_vectors); isert_dev->cq_qps = kzalloc(sizeof(*isert_dev->cq_qps) * isert_dev->num_cqs, GFP_KERNEL); if (unlikely(isert_dev->cq_qps == NULL)) { pr_err("Failed to allocate iser cq_qps\n"); err = -ENOMEM; goto fail_cq_qps; } isert_dev->cq_desc = vmalloc(sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs); if (unlikely(isert_dev->cq_desc == NULL)) { pr_err("Failed to allocate %ld bytes for iser cq_desc\n", sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs); err = -ENOMEM; goto fail_alloc_cq_desc; } pd = ib_alloc_pd(ib_dev); if (unlikely(IS_ERR(pd))) { err = PTR_ERR(pd); pr_err("Failed to alloc iser dev pd, err:%d\n", err); goto fail_pd; } mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); if (unlikely(IS_ERR(mr))) { err = PTR_ERR(mr); pr_err("Failed to get dma mr, err: %d\n", err); goto fail_mr; } cqe_num = min(isert_dev->device_attr.max_cqe, ISER_CQ_ENTRIES); cqe_num = cqe_num / isert_dev->num_cqs; #ifdef CONFIG_SCST_EXTRACHECKS if (isert_dev->device_attr.max_cqe == 0) pr_err("Zero max_cqe encountered: you may have a compilation problem\n"); #endif for (i = 0; i < isert_dev->num_cqs; ++i) { struct isert_cq *cq_desc = &isert_dev->cq_desc[i]; cq_desc->dev = isert_dev; cq_desc->idx = i; #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb, NULL); #else INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb); #endif snprintf(wq_name, sizeof(wq_name), "isert_cq_%p", cq_desc); #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) cq_desc->cq_workqueue = create_singlethread_workqueue(wq_name); #else #if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 36) cq_desc->cq_workqueue = alloc_workqueue(wq_name, WQ_CPU_INTENSIVE| WQ_RESCUER, 1); #else cq_desc->cq_workqueue = alloc_workqueue(wq_name, WQ_CPU_INTENSIVE| WQ_MEM_RECLAIM, 1); #endif #endif if (unlikely(!cq_desc->cq_workqueue)) { pr_err("Failed to alloc iser cq work queue for dev:%s\n", ib_dev->name); err = -ENOMEM; goto fail_cq; } cq = ib_create_cq(ib_dev, isert_cq_comp_handler, isert_async_evt_handler, cq_desc, /* context */ cqe_num, i); /* completion vector */ if (unlikely(IS_ERR(cq))) { cq_desc->cq = NULL; err = PTR_ERR(cq); pr_err("Failed to create iser dev cq, err:%d\n", err); goto fail_cq; } cq_desc->cq = cq; err = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); if (unlikely(err)) { pr_err("Failed to request notify cq, err: %d\n", err); goto fail_cq; } } isert_dev->ib_dev = ib_dev; isert_dev->pd = pd; isert_dev->mr = mr; INIT_LIST_HEAD(&isert_dev->conn_list); lockdep_assert_held(&dev_list_mutex); isert_dev_list_add(isert_dev); pr_info("iser created device:%p\n", isert_dev); return isert_dev; fail_cq: for (j = 0; j <= i; ++j) { if (isert_dev->cq_desc[j].cq) ib_destroy_cq(isert_dev->cq_desc[j].cq); if (isert_dev->cq_desc[j].cq_workqueue) destroy_workqueue(isert_dev->cq_desc[j].cq_workqueue); } ib_dereg_mr(mr); fail_mr: ib_dealloc_pd(pd); fail_pd: vfree(isert_dev->cq_desc); fail_alloc_cq_desc: kfree(isert_dev->cq_qps); fail_cq_qps: fail_query: kfree(isert_dev); out: TRACE_EXIT_RES(err); return ERR_PTR(err); }
static int krping_setup_buffers(struct krping_cb *cb) { int ret; struct ib_phys_buf buf; u64 iovbase; DEBUG_LOG(PFX "krping_setup_buffers called on cb %p\n", cb); if (cb->use_dmamr) { cb->dma_mr = ib_get_dma_mr(cb->pd, IB_ACCESS_LOCAL_WRITE| IB_ACCESS_REMOTE_READ| IB_ACCESS_REMOTE_WRITE); if (IS_ERR(cb->dma_mr)) { log(LOG_ERR, "reg_dmamr failed\n"); return PTR_ERR(cb->dma_mr); } } else { buf.addr = vtophys(&cb->recv_buf); buf.size = sizeof cb->recv_buf; iovbase = vtophys(&cb->recv_buf); cb->recv_mr = ib_reg_phys_mr(cb->pd, &buf, 1, IB_ACCESS_LOCAL_WRITE, &iovbase); if (IS_ERR(cb->recv_mr)) { log(LOG_ERR, "recv_buf reg_mr failed\n"); return PTR_ERR(cb->recv_mr); } buf.addr = vtophys(&cb->send_buf); buf.size = sizeof cb->send_buf; iovbase = vtophys(&cb->send_buf); cb->send_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 0, &iovbase); if (IS_ERR(cb->send_mr)) { log(LOG_ERR, "send_buf reg_mr failed\n"); ib_dereg_mr(cb->recv_mr); return PTR_ERR(cb->send_mr); } } /* RNIC adapters have a limit upto which it can register physical memory * If DMA-MR memory mode is set then normally driver registers maximum * supported memory. After that if contigmalloc allocates memory beyond the * specified RNIC limit then Krping may not work. */ if (cb->use_dmamr && cb->memlimit) cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, cb->memlimit, PAGE_SIZE, 0); else cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL, PAGE_SIZE, 0); if (!cb->rdma_buf) { log(LOG_ERR, "rdma_buf malloc failed\n"); ret = ENOMEM; goto err1; } if (!cb->use_dmamr) { buf.addr = vtophys(cb->rdma_buf); buf.size = cb->size; iovbase = vtophys(cb->rdma_buf); cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1, IB_ACCESS_REMOTE_READ| IB_ACCESS_REMOTE_WRITE, &iovbase); if (IS_ERR(cb->rdma_mr)) { log(LOG_ERR, "rdma_buf reg_mr failed\n"); ret = PTR_ERR(cb->rdma_mr); goto err2; } } if (!cb->server || cb->wlat || cb->rlat || cb->bw) { if (cb->use_dmamr && cb->memlimit) cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, cb->memlimit, PAGE_SIZE, 0); else cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL, PAGE_SIZE, 0); if (!cb->start_buf) { log(LOG_ERR, "start_buf malloc failed\n"); ret = ENOMEM; goto err2; } if (!cb->use_dmamr) { unsigned flags = IB_ACCESS_REMOTE_READ; if (cb->wlat || cb->rlat || cb->bw) flags |= IB_ACCESS_REMOTE_WRITE; buf.addr = vtophys(cb->start_buf); buf.size = cb->size; iovbase = vtophys(cb->start_buf); cb->start_mr = ib_reg_phys_mr(cb->pd, &buf, 1, flags, &iovbase); if (IS_ERR(cb->start_mr)) { log(LOG_ERR, "start_buf reg_mr failed\n"); ret = PTR_ERR(cb->start_mr); goto err3; } } } krping_setup_wr(cb); DEBUG_LOG(PFX "allocated & registered buffers...\n"); return 0; err3: contigfree(cb->start_buf, cb->size, M_DEVBUF); if (!cb->use_dmamr) ib_dereg_mr(cb->rdma_mr); err2: contigfree(cb->rdma_buf, cb->size, M_DEVBUF); err1: if (cb->use_dmamr) ib_dereg_mr(cb->dma_mr); else { ib_dereg_mr(cb->recv_mr); ib_dereg_mr(cb->send_mr); } return ret; }
/* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */ static void isert_cq_comp_work_cb(void *ctx) { struct isert_cq *cq_desc = ctx; #else static void isert_cq_comp_work_cb(struct work_struct *work) { struct isert_cq *cq_desc = container_of(work, struct isert_cq, cq_comp_work); #endif int ret; TRACE_ENTRY(); ret = isert_poll_cq(cq_desc); if (unlikely(ret < 0)) { /* poll error */ pr_err("ib_poll_cq failed\n"); goto out; } ib_req_notify_cq(cq_desc->cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); /* * not all HCAs support IB_CQ_REPORT_MISSED_EVENTS, * so we need to make sure we don't miss any events between * last call to ib_poll_cq() and ib_req_notify_cq() */ isert_poll_cq(cq_desc); out: TRACE_EXIT(); return; } static void isert_cq_comp_handler(struct ib_cq *cq, void *context) { struct isert_cq *cq_desc = context; #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) queue_work(cq_desc->cq_workqueue, &cq_desc->cq_comp_work); #else queue_work_on(smp_processor_id(), cq_desc->cq_workqueue, &cq_desc->cq_comp_work); #endif } static const char *ib_event_type_str(enum ib_event_type ev_type) { switch (ev_type) { case IB_EVENT_COMM_EST: return "COMM_EST"; case IB_EVENT_QP_FATAL: return "QP_FATAL"; case IB_EVENT_QP_REQ_ERR: return "QP_REQ_ERR"; case IB_EVENT_QP_ACCESS_ERR: return "QP_ACCESS_ERR"; case IB_EVENT_SQ_DRAINED: return "SQ_DRAINED"; case IB_EVENT_PATH_MIG: return "PATH_MIG"; case IB_EVENT_PATH_MIG_ERR: return "PATH_MIG_ERR"; case IB_EVENT_QP_LAST_WQE_REACHED: return "QP_LAST_WQE_REACHED"; case IB_EVENT_CQ_ERR: return "CQ_ERR"; case IB_EVENT_SRQ_ERR: return "SRQ_ERR"; case IB_EVENT_SRQ_LIMIT_REACHED: return "SRQ_LIMIT_REACHED"; case IB_EVENT_PORT_ACTIVE: return "PORT_ACTIVE"; case IB_EVENT_PORT_ERR: return "PORT_ERR"; case IB_EVENT_LID_CHANGE: return "LID_CHANGE"; case IB_EVENT_PKEY_CHANGE: return "PKEY_CHANGE"; case IB_EVENT_SM_CHANGE: return "SM_CHANGE"; case IB_EVENT_CLIENT_REREGISTER: return "CLIENT_REREGISTER"; case IB_EVENT_DEVICE_FATAL: return "DEVICE_FATAL"; default: return "UNKNOWN"; } } static void isert_async_evt_handler(struct ib_event *async_ev, void *context) { struct isert_cq *cq = context; struct isert_device *isert_dev = cq->dev; struct ib_device *ib_dev = isert_dev->ib_dev; char *dev_name = ib_dev->name; enum ib_event_type ev_type = async_ev->event; struct isert_connection *isert_conn; TRACE_ENTRY(); switch (ev_type) { case IB_EVENT_COMM_EST: isert_conn = async_ev->element.qp->qp_context; pr_info("conn:0x%p cm_id:0x%p dev:%s, QP evt: %s\n", isert_conn, isert_conn->cm_id, dev_name, ib_event_type_str(IB_EVENT_COMM_EST)); /* force "connection established" event */ rdma_notify(isert_conn->cm_id, IB_EVENT_COMM_EST); break; /* rest of QP-related events */ case IB_EVENT_QP_FATAL: case IB_EVENT_QP_REQ_ERR: case IB_EVENT_QP_ACCESS_ERR: case IB_EVENT_SQ_DRAINED: case IB_EVENT_PATH_MIG: case IB_EVENT_PATH_MIG_ERR: case IB_EVENT_QP_LAST_WQE_REACHED: isert_conn = async_ev->element.qp->qp_context; pr_err("conn:0x%p cm_id:0x%p dev:%s, QP evt: %s\n", isert_conn, isert_conn->cm_id, dev_name, ib_event_type_str(ev_type)); break; /* CQ-related events */ case IB_EVENT_CQ_ERR: pr_err("dev:%s CQ evt: %s\n", dev_name, ib_event_type_str(ev_type)); break; /* SRQ events */ case IB_EVENT_SRQ_ERR: case IB_EVENT_SRQ_LIMIT_REACHED: pr_err("dev:%s SRQ evt: %s\n", dev_name, ib_event_type_str(ev_type)); break; /* Port events */ case IB_EVENT_PORT_ACTIVE: case IB_EVENT_PORT_ERR: case IB_EVENT_LID_CHANGE: case IB_EVENT_PKEY_CHANGE: case IB_EVENT_SM_CHANGE: case IB_EVENT_CLIENT_REREGISTER: pr_err("dev:%s port:%d evt: %s\n", dev_name, async_ev->element.port_num, ib_event_type_str(ev_type)); break; /* HCA events */ case IB_EVENT_DEVICE_FATAL: pr_err("dev:%s HCA evt: %s\n", dev_name, ib_event_type_str(ev_type)); break; default: pr_err("dev:%s evt: %s\n", dev_name, ib_event_type_str(ev_type)); break; } TRACE_EXIT(); } static struct isert_device *isert_device_create(struct ib_device *ib_dev) { struct isert_device *isert_dev; struct ib_device_attr *dev_attr; int cqe_num, err; struct ib_pd *pd; struct ib_mr *mr; struct ib_cq *cq; char wq_name[64]; int i, j; TRACE_ENTRY(); isert_dev = kzalloc(sizeof(*isert_dev), GFP_KERNEL); if (unlikely(isert_dev == NULL)) { pr_err("Failed to allocate iser dev\n"); err = -ENOMEM; goto out; } dev_attr = &isert_dev->device_attr; err = ib_query_device(ib_dev, dev_attr); if (unlikely(err)) { pr_err("Failed to query device, err: %d\n", err); goto fail_query; } isert_dev->num_cqs = min_t(int, num_online_cpus(), ib_dev->num_comp_vectors); isert_dev->cq_qps = kzalloc(sizeof(*isert_dev->cq_qps) * isert_dev->num_cqs, GFP_KERNEL); if (unlikely(isert_dev->cq_qps == NULL)) { pr_err("Failed to allocate iser cq_qps\n"); err = -ENOMEM; goto fail_cq_qps; } isert_dev->cq_desc = vmalloc(sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs); if (unlikely(isert_dev->cq_desc == NULL)) { pr_err("Failed to allocate %ld bytes for iser cq_desc\n", sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs); err = -ENOMEM; goto fail_alloc_cq_desc; } pd = ib_alloc_pd(ib_dev); if (unlikely(IS_ERR(pd))) { err = PTR_ERR(pd); pr_err("Failed to alloc iser dev pd, err:%d\n", err); goto fail_pd; } mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); if (unlikely(IS_ERR(mr))) { err = PTR_ERR(mr); pr_err("Failed to get dma mr, err: %d\n", err); goto fail_mr; } cqe_num = min(isert_dev->device_attr.max_cqe, ISER_CQ_ENTRIES); cqe_num = cqe_num / isert_dev->num_cqs; #ifdef CONFIG_SCST_EXTRACHECKS if (isert_dev->device_attr.max_cqe == 0) pr_err("Zero max_cqe encountered: you may have a compilation problem\n"); #endif for (i = 0; i < isert_dev->num_cqs; ++i) { struct isert_cq *cq_desc = &isert_dev->cq_desc[i]; cq_desc->dev = isert_dev; cq_desc->idx = i; #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb, NULL); #else INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb); #endif snprintf(wq_name, sizeof(wq_name), "isert_cq_%p", cq_desc); #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) cq_desc->cq_workqueue = create_singlethread_workqueue(wq_name); #else #if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 36) cq_desc->cq_workqueue = alloc_workqueue(wq_name, WQ_CPU_INTENSIVE| WQ_RESCUER, 1); #else cq_desc->cq_workqueue = alloc_workqueue(wq_name, WQ_CPU_INTENSIVE| WQ_MEM_RECLAIM, 1); #endif #endif if (unlikely(!cq_desc->cq_workqueue)) { pr_err("Failed to alloc iser cq work queue for dev:%s\n", ib_dev->name); err = -ENOMEM; goto fail_cq; } #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0) cq = ib_create_cq(ib_dev, isert_cq_comp_handler, isert_async_evt_handler, cq_desc, /* context */ cqe_num, i); /* completion vector */ #else { struct ib_cq_init_attr ia = { .cqe = cqe_num, .comp_vector = i, }; cq = ib_create_cq(ib_dev, isert_cq_comp_handler, isert_async_evt_handler, cq_desc, /* context */ &ia); } #endif if (unlikely(IS_ERR(cq))) { cq_desc->cq = NULL; err = PTR_ERR(cq); pr_err("Failed to create iser dev cq, err:%d\n", err); goto fail_cq; } cq_desc->cq = cq; err = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); if (unlikely(err)) { pr_err("Failed to request notify cq, err: %d\n", err); goto fail_cq; } } isert_dev->ib_dev = ib_dev; isert_dev->pd = pd; isert_dev->mr = mr; INIT_LIST_HEAD(&isert_dev->conn_list); lockdep_assert_held(&dev_list_mutex); isert_dev_list_add(isert_dev); pr_info("iser created device:%p\n", isert_dev); return isert_dev; fail_cq: for (j = 0; j <= i; ++j) { if (isert_dev->cq_desc[j].cq) ib_destroy_cq(isert_dev->cq_desc[j].cq); if (isert_dev->cq_desc[j].cq_workqueue) destroy_workqueue(isert_dev->cq_desc[j].cq_workqueue); } ib_dereg_mr(mr); fail_mr: ib_dealloc_pd(pd); fail_pd: vfree(isert_dev->cq_desc); fail_alloc_cq_desc: kfree(isert_dev->cq_qps); fail_cq_qps: fail_query: kfree(isert_dev); out: TRACE_EXIT_RES(err); return ERR_PTR(err); } static void isert_device_release(struct isert_device *isert_dev) { int err, i; TRACE_ENTRY(); lockdep_assert_held(&dev_list_mutex); isert_dev_list_remove(isert_dev); /* remove from global list */ for (i = 0; i < isert_dev->num_cqs; ++i) { struct isert_cq *cq_desc = &isert_dev->cq_desc[i]; #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22) /* * cancel_work_sync() was introduced in 2.6.22. We can * only wait until all scheduled work is done. */ flush_workqueue(cq_desc->cq_workqueue); #else cancel_work_sync(&cq_desc->cq_comp_work); #endif err = ib_destroy_cq(cq_desc->cq); if (unlikely(err)) pr_err("Failed to destroy cq, err:%d\n", err); destroy_workqueue(cq_desc->cq_workqueue); } err = ib_dereg_mr(isert_dev->mr); if (unlikely(err)) pr_err("Failed to destroy mr, err:%d\n", err); err = ib_dealloc_pd(isert_dev->pd); if (unlikely(err)) pr_err("Failed to destroy pd, err:%d\n", err); vfree(isert_dev->cq_desc); isert_dev->cq_desc = NULL; kfree(isert_dev->cq_qps); isert_dev->cq_qps = NULL; kfree(isert_dev); TRACE_EXIT(); }
/* * Create unconnected endpoint. */ int rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) { struct ib_device_attr *devattr = &ia->ri_devattr; struct ib_cq *sendcq, *recvcq; struct ib_cq_init_attr cq_attr = {}; unsigned int max_qp_wr; int rc, err; if (devattr->max_sge < RPCRDMA_MAX_IOVS) { dprintk("RPC: %s: insufficient sge's available\n", __func__); return -ENOMEM; } if (devattr->max_qp_wr <= RPCRDMA_BACKWARD_WRS) { dprintk("RPC: %s: insufficient wqe's available\n", __func__); return -ENOMEM; } max_qp_wr = devattr->max_qp_wr - RPCRDMA_BACKWARD_WRS; /* check provider's send/recv wr limits */ if (cdata->max_requests > max_qp_wr) cdata->max_requests = max_qp_wr; ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; ep->rep_attr.qp_context = ep; ep->rep_attr.srq = NULL; ep->rep_attr.cap.max_send_wr = cdata->max_requests; ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; rc = ia->ri_ops->ro_open(ia, ep, cdata); if (rc) return rc; ep->rep_attr.cap.max_recv_wr = cdata->max_requests; ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS; ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_inline_data = 0; ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; ep->rep_attr.qp_type = IB_QPT_RC; ep->rep_attr.port_num = ~0; dprintk("RPC: %s: requested max: dtos: send %d recv %d; " "iovs: send %d recv %d\n", __func__, ep->rep_attr.cap.max_send_wr, ep->rep_attr.cap.max_recv_wr, ep->rep_attr.cap.max_send_sge, ep->rep_attr.cap.max_recv_sge); /* set trigger for requesting send completion */ ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; /* always signal? */ INIT_CQCOUNT(ep); init_waitqueue_head(&ep->rep_connect_wait); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1; sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall, rpcrdma_cq_async_error_upcall, NULL, &cq_attr); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); dprintk("RPC: %s: failed to create send CQ: %i\n", __func__, rc); goto out1; } rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP); if (rc) { dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", __func__, rc); goto out2; } cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1; recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall, rpcrdma_cq_async_error_upcall, NULL, &cq_attr); if (IS_ERR(recvcq)) { rc = PTR_ERR(recvcq); dprintk("RPC: %s: failed to create recv CQ: %i\n", __func__, rc); goto out2; } rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP); if (rc) { dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", __func__, rc); ib_destroy_cq(recvcq); goto out2; } ep->rep_attr.send_cq = sendcq; ep->rep_attr.recv_cq = recvcq; /* Initialize cma parameters */ /* RPC/RDMA does not use private data */ ep->rep_remote_cma.private_data = NULL; ep->rep_remote_cma.private_data_len = 0; /* Client offers RDMA Read but does not initiate */ ep->rep_remote_cma.initiator_depth = 0; if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */ ep->rep_remote_cma.responder_resources = 32; else ep->rep_remote_cma.responder_resources = devattr->max_qp_rd_atom; ep->rep_remote_cma.retry_count = 7; ep->rep_remote_cma.flow_control = 0; ep->rep_remote_cma.rnr_retry_count = 0; return 0; out2: err = ib_destroy_cq(sendcq); if (err) dprintk("RPC: %s: ib_destroy_cq returned %i\n", __func__, err); out1: if (ia->ri_dma_mr) ib_dereg_mr(ia->ri_dma_mr); return rc; }
static int krping_setup_buffers(struct krping_cb *cb) { int ret; struct ib_phys_buf buf; u64 iovbase; DEBUG_LOG(PFX "krping_setup_buffers called on cb %p\n", cb); if (cb->use_dmamr) { cb->dma_mr = ib_get_dma_mr(cb->pd, IB_ACCESS_LOCAL_WRITE| IB_ACCESS_REMOTE_READ| IB_ACCESS_REMOTE_WRITE); if (IS_ERR(cb->dma_mr)) { log(LOG_ERR, "reg_dmamr failed\n"); return PTR_ERR(cb->dma_mr); } } else { buf.addr = vtophys(&cb->recv_buf); buf.size = sizeof cb->recv_buf; iovbase = vtophys(&cb->recv_buf); cb->recv_mr = ib_reg_phys_mr(cb->pd, &buf, 1, IB_ACCESS_LOCAL_WRITE, &iovbase); if (IS_ERR(cb->recv_mr)) { log(LOG_ERR, "recv_buf reg_mr failed\n"); return PTR_ERR(cb->recv_mr); } buf.addr = vtophys(&cb->send_buf); buf.size = sizeof cb->send_buf; iovbase = vtophys(&cb->send_buf); cb->send_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 0, &iovbase); if (IS_ERR(cb->send_mr)) { log(LOG_ERR, "send_buf reg_mr failed\n"); ib_dereg_mr(cb->recv_mr); return PTR_ERR(cb->send_mr); } } cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL, PAGE_SIZE, 0); if (!cb->rdma_buf) { log(LOG_ERR, "rdma_buf malloc failed\n"); ret = ENOMEM; goto err1; } if (!cb->use_dmamr) { buf.addr = vtophys(cb->rdma_buf); buf.size = cb->size; iovbase = vtophys(cb->rdma_buf); cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1, IB_ACCESS_REMOTE_READ| IB_ACCESS_REMOTE_WRITE, &iovbase); if (IS_ERR(cb->rdma_mr)) { log(LOG_ERR, "rdma_buf reg_mr failed\n"); ret = PTR_ERR(cb->rdma_mr); goto err2; } } if (!cb->server || cb->wlat || cb->rlat || cb->bw) { cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL, PAGE_SIZE, 0); if (!cb->start_buf) { log(LOG_ERR, "start_buf malloc failed\n"); ret = ENOMEM; goto err2; } if (!cb->use_dmamr) { unsigned flags = IB_ACCESS_REMOTE_READ; if (cb->wlat || cb->rlat || cb->bw) flags |= IB_ACCESS_REMOTE_WRITE; buf.addr = vtophys(cb->start_buf); buf.size = cb->size; iovbase = vtophys(cb->start_buf); cb->start_mr = ib_reg_phys_mr(cb->pd, &buf, 1, flags, &iovbase); if (IS_ERR(cb->start_mr)) { log(LOG_ERR, "start_buf reg_mr failed\n"); ret = PTR_ERR(cb->start_mr); goto err3; } } } krping_setup_wr(cb); DEBUG_LOG(PFX "allocated & registered buffers...\n"); return 0; err3: contigfree(cb->start_buf, cb->size, M_DEVBUF); if (!cb->use_dmamr) ib_dereg_mr(cb->rdma_mr); err2: contigfree(cb->rdma_buf, cb->size, M_DEVBUF); err1: if (cb->use_dmamr) ib_dereg_mr(cb->dma_mr); else { ib_dereg_mr(cb->recv_mr); ib_dereg_mr(cb->send_mr); } return ret; }
ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_reg_mr cmd; struct ib_uverbs_reg_mr_resp resp; struct ib_udata udata; struct ib_umem_object *obj; struct ib_pd *pd; struct ib_mr *mr; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; /* * Local write permission is required if remote write or * remote atomic permission is also requested. */ if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) return -EINVAL; obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; obj->uobject.context = file->ucontext; /* * We ask for writable memory if any access flags other than * "remote read" are set. "Local write" and "remote write" * obviously require write access. "Remote atomic" can do * things like fetch and add, which will modify memory, and * "MW bind" can change permissions by binding a window. */ ret = ib_umem_get(file->device->ib_dev, &obj->umem, (void *) (unsigned long) cmd.start, cmd.length, !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ)); if (ret) goto err_free; obj->umem.virt_base = cmd.hca_va; mutex_lock(&ib_uverbs_idr_mutex); pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); if (!pd || pd->uobject->context != file->ucontext) { ret = -EINVAL; goto err_up; } if (!pd->device->reg_user_mr) { ret = -ENOSYS; goto err_up; } mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata); if (IS_ERR(mr)) { ret = PTR_ERR(mr); goto err_up; } mr->device = pd->device; mr->pd = pd; mr->uobject = &obj->uobject; atomic_inc(&pd->usecnt); atomic_set(&mr->usecnt, 0); memset(&resp, 0, sizeof resp); resp.lkey = mr->lkey; resp.rkey = mr->rkey; retry: if (!idr_pre_get(&ib_uverbs_mr_idr, GFP_KERNEL)) { ret = -ENOMEM; goto err_unreg; } ret = idr_get_new(&ib_uverbs_mr_idr, mr, &obj->uobject.id); if (ret == -EAGAIN) goto retry; if (ret) goto err_unreg; resp.mr_handle = obj->uobject.id; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; goto err_idr; } mutex_lock(&file->mutex); list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); mutex_unlock(&file->mutex); mutex_unlock(&ib_uverbs_idr_mutex); return in_len; err_idr: idr_remove(&ib_uverbs_mr_idr, obj->uobject.id); err_unreg: ib_dereg_mr(mr); atomic_dec(&pd->usecnt); err_up: mutex_unlock(&ib_uverbs_idr_mutex); ib_umem_release(file->device->ib_dev, &obj->umem); err_free: kfree(obj); return ret; }
/* * Unregister and destroy buffer memory. Need to deal with * partial initialization, so it's callable from failed create. * Must be called before destroying endpoint, as registrations * reference it. */ void rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) { int rc, i; struct rpcrdma_ia *ia = rdmab_to_ia(buf); struct rpcrdma_mw *r; /* clean up in reverse order from create * 1. recv mr memory (mr free, then kfree) * 1a. bind mw memory * 2. send mr memory (mr free, then kfree) * 3. padding (if any) [moved to rpcrdma_ep_destroy] * 4. arrays */ dprintk("RPC: %s: entering\n", __func__); for (i = 0; i < buf->rb_max_requests; i++) { if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) { rpcrdma_deregister_internal(ia, buf->rb_recv_bufs[i]->rr_handle, &buf->rb_recv_bufs[i]->rr_iov); kfree(buf->rb_recv_bufs[i]); } if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { while (!list_empty(&buf->rb_mws)) { r = list_entry(buf->rb_mws.next, struct rpcrdma_mw, mw_list); list_del(&r->mw_list); switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: rc = ib_dereg_mr(r->r.frmr.fr_mr); if (rc) dprintk("RPC: %s:" " ib_dereg_mr" " failed %i\n", __func__, rc); ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); break; case RPCRDMA_MTHCAFMR: rc = ib_dealloc_fmr(r->r.fmr); if (rc) dprintk("RPC: %s:" " ib_dealloc_fmr" " failed %i\n", __func__, rc); break; case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: rc = ib_dealloc_mw(r->r.mw); if (rc) dprintk("RPC: %s:" " ib_dealloc_mw" " failed %i\n", __func__, rc); break; default: break; } } rpcrdma_deregister_internal(ia, buf->rb_send_bufs[i]->rl_handle, &buf->rb_send_bufs[i]->rl_iov); kfree(buf->rb_send_bufs[i]); } } kfree(buf->rb_pool); }
/* * Create unconnected endpoint. */ int rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) { struct ib_cq *sendcq, *recvcq; unsigned int max_qp_wr; int rc; if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) { dprintk("RPC: %s: insufficient sge's available\n", __func__); return -ENOMEM; } if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { dprintk("RPC: %s: insufficient wqe's available\n", __func__); return -ENOMEM; } max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS - 1; /* check provider's send/recv wr limits */ if (cdata->max_requests > max_qp_wr) cdata->max_requests = max_qp_wr; ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; ep->rep_attr.qp_context = ep; ep->rep_attr.srq = NULL; ep->rep_attr.cap.max_send_wr = cdata->max_requests; ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; ep->rep_attr.cap.max_send_wr += 1; /* drain cqe */ rc = ia->ri_ops->ro_open(ia, ep, cdata); if (rc) return rc; ep->rep_attr.cap.max_recv_wr = cdata->max_requests; ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */ ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS; ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_inline_data = 0; ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; ep->rep_attr.qp_type = IB_QPT_RC; ep->rep_attr.port_num = ~0; dprintk("RPC: %s: requested max: dtos: send %d recv %d; " "iovs: send %d recv %d\n", __func__, ep->rep_attr.cap.max_send_wr, ep->rep_attr.cap.max_recv_wr, ep->rep_attr.cap.max_send_sge, ep->rep_attr.cap.max_recv_sge); /* set trigger for requesting send completion */ ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; /* always signal? */ INIT_CQCOUNT(ep); init_waitqueue_head(&ep->rep_connect_wait); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); sendcq = ib_alloc_cq(ia->ri_device, NULL, ep->rep_attr.cap.max_send_wr + 1, 0, IB_POLL_SOFTIRQ); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); dprintk("RPC: %s: failed to create send CQ: %i\n", __func__, rc); goto out1; } recvcq = ib_alloc_cq(ia->ri_device, NULL, ep->rep_attr.cap.max_recv_wr + 1, 0, IB_POLL_SOFTIRQ); if (IS_ERR(recvcq)) { rc = PTR_ERR(recvcq); dprintk("RPC: %s: failed to create recv CQ: %i\n", __func__, rc); goto out2; } ep->rep_attr.send_cq = sendcq; ep->rep_attr.recv_cq = recvcq; /* Initialize cma parameters */ memset(&ep->rep_remote_cma, 0, sizeof(ep->rep_remote_cma)); /* RPC/RDMA does not use private data */ ep->rep_remote_cma.private_data = NULL; ep->rep_remote_cma.private_data_len = 0; /* Client offers RDMA Read but does not initiate */ ep->rep_remote_cma.initiator_depth = 0; if (ia->ri_device->attrs.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ ep->rep_remote_cma.responder_resources = 32; else ep->rep_remote_cma.responder_resources = ia->ri_device->attrs.max_qp_rd_atom; /* Limit transport retries so client can detect server * GID changes quickly. RPC layer handles re-establishing * transport connection and retransmission. */ ep->rep_remote_cma.retry_count = 6; /* RPC-over-RDMA handles its own flow control. In addition, * make all RNR NAKs visible so we know that RPC-over-RDMA * flow control is working correctly (no NAKs should be seen). */ ep->rep_remote_cma.flow_control = 0; ep->rep_remote_cma.rnr_retry_count = 0; return 0; out2: ib_free_cq(sendcq); out1: if (ia->ri_dma_mr) ib_dereg_mr(ia->ri_dma_mr); return rc; }