/** * ib_alloc_pd - Allocates an unused protection domain. * @device: The device on which to allocate the protection domain. * * A protection domain object provides an association between QPs, shared * receive queues, address handles, memory regions, and memory windows. * * Every PD has a local_dma_lkey which can be used as the lkey value for local * memory operations. */ struct ib_pd *ib_alloc_pd(struct ib_device *device) { struct ib_pd *pd; pd = device->alloc_pd(device, NULL, NULL); if (IS_ERR(pd)) return pd; pd->device = device; pd->uobject = NULL; pd->local_mr = NULL; atomic_set(&pd->usecnt, 0); if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) pd->local_dma_lkey = device->local_dma_lkey; else { struct ib_mr *mr; mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(mr)) { ib_dealloc_pd(pd); return (struct ib_pd *)mr; } pd->local_mr = mr; pd->local_dma_lkey = pd->local_mr->lkey; } return pd; }
/* it may done for device only (not per connection base) */ static int ib_sock_mem_init_common(struct IB_SOCK *sock) { int ret; int access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE; /* create a protection domain. */ sock->is_mem.ism_pd = ib_alloc_pd(sock->is_id->device); if (IS_ERR(sock->is_mem.ism_pd)) { printk("can't create a protection domain\n"); ret = PTR_ERR(sock->is_mem.ism_pd); sock->is_mem.ism_pd = NULL; return ret; } sock->is_mem.ism_mr = ib_get_dma_mr(sock->is_mem.ism_pd, access_flags); if (IS_ERR(sock->is_mem.ism_mr)) { ret = PTR_ERR(sock->is_mem.ism_mr); sock->is_mem.ism_mr = NULL; printk("Failed ib_get_dma_mr : %d\n", ret); return ret; } return 0; }
/** * iser_create_device_ib_res - creates Protection Domain (PD), Completion * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with * the adapator. * * returns 0 on success, -1 on failure */ static int iser_create_device_ib_res(struct iser_device *device) { device->pd = ib_alloc_pd(device->ib_device); if (IS_ERR(device->pd)) goto pd_err; device->rx_cq = ib_create_cq(device->ib_device, iser_cq_callback, iser_cq_event_callback, (void *)device, ISER_MAX_RX_CQ_LEN, 0); if (IS_ERR(device->rx_cq)) goto rx_cq_err; device->tx_cq = ib_create_cq(device->ib_device, NULL, iser_cq_event_callback, (void *)device, ISER_MAX_TX_CQ_LEN, 0); if (IS_ERR(device->tx_cq)) goto tx_cq_err; if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP)) goto cq_arm_err; tasklet_init(&device->cq_tasklet, iser_cq_tasklet_fn, (unsigned long)device); device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); if (IS_ERR(device->mr)) goto dma_mr_err; INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, iser_event_handler); if (ib_register_event_handler(&device->event_handler)) goto handler_err; return 0; handler_err: ib_dereg_mr(device->mr); dma_mr_err: tasklet_kill(&device->cq_tasklet); cq_arm_err: ib_destroy_cq(device->tx_cq); tx_cq_err: ib_destroy_cq(device->rx_cq); rx_cq_err: ib_dealloc_pd(device->pd); pd_err: iser_err("failed to allocate an IB resource\n"); return -1; }
int fi_ib_mr_reg(struct fid *fid, const void *buf, size_t len, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context) { struct fid_domain *domain = (struct fid_domain *)fid; struct fi_ib_mem_desc *md; int ret; print_trace("in\n"); if (flags) return -FI_EBADFLAGS; md = kzalloc(sizeof(*md), GFP_KERNEL); if (!md) { print_err("kalloc failed!\n"); return -FI_ENOMEM; } md->domain = (struct fi_ib_domain *) domain; md->mr_fid.fid.fclass = FI_CLASS_MR; md->mr_fid.fid.context = context; md->mr_fid.fid.ops = &fi_ib_mr_ops; md->mr = ib_get_dma_mr(md->domain->pd, IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); if (IS_ERR(md->mr)) { ret = PTR_ERR(md->mr); print_err("ib_get_dma_mr returned %d\n", ret); goto err; } md->mr_fid.mem_desc = (void *) (uintptr_t) md->mr->lkey; md->mr_fid.key = md->mr->rkey; *mr = &md->mr_fid; return 0; err: kfree(md); return ret; }
static int rdma_setup(rdma_ctx_t ctx) { // create receive buffer ctx->rdma_recv_buffer = kmalloc(RDMA_BUFFER_SIZE, GFP_KERNEL); CHECK_MSG_RET(ctx->rdma_recv_buffer != 0, "Error kmalloc", -1); // create memory region ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE); CHECK_MSG_RET(ctx->mr != 0, "Error creating MR", -1); ctx->rkey = ctx->mr->rkey; // get dma_addr ctx->dma_addr = ib_dma_map_single(rdma_ib_device.dev, ctx->rdma_recv_buffer, RDMA_BUFFER_SIZE, DMA_BIDIRECTIONAL); CHECK_MSG_RET(ib_dma_mapping_error(rdma_ib_device.dev, ctx->dma_addr) == 0, "Error ib_dma_map_single", -1); // modify QP until RTS modify_qp(ctx); return 0; }
/* * Open and initialize an Interface Adapter. * o initializes fields of struct rpcrdma_ia, including * interface and provider attributes and protection zone. */ int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) { int rc, mem_priv; struct ib_device_attr devattr; struct rpcrdma_ia *ia = &xprt->rx_ia; ia->ri_id = rpcrdma_create_id(xprt, ia, addr); if (IS_ERR(ia->ri_id)) { rc = PTR_ERR(ia->ri_id); goto out1; } ia->ri_pd = ib_alloc_pd(ia->ri_id->device); if (IS_ERR(ia->ri_pd)) { rc = PTR_ERR(ia->ri_pd); dprintk("RPC: %s: ib_alloc_pd() failed %i\n", __func__, rc); goto out2; } /* * Query the device to determine if the requested memory * registration strategy is supported. If it isn't, set the * strategy to a globally supported model. */ rc = ib_query_device(ia->ri_id->device, &devattr); if (rc) { dprintk("RPC: %s: ib_query_device failed %d\n", __func__, rc); goto out2; } if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { ia->ri_have_dma_lkey = 1; ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; } if (memreg == RPCRDMA_FRMR) { /* Requires both frmr reg and local dma lkey */ if ((devattr.device_cap_flags & (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { dprintk("RPC: %s: FRMR registration " "not supported by HCA\n", __func__); memreg = RPCRDMA_MTHCAFMR; } else { /* Mind the ia limit on FRMR page list depth */ ia->ri_max_frmr_depth = min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, devattr.max_fast_reg_page_list_len); } } if (memreg == RPCRDMA_MTHCAFMR) { if (!ia->ri_id->device->alloc_fmr) { dprintk("RPC: %s: MTHCAFMR registration " "not supported by HCA\n", __func__); #if RPCRDMA_PERSISTENT_REGISTRATION memreg = RPCRDMA_ALLPHYSICAL; #else rc = -ENOMEM; goto out2; #endif } } /* * Optionally obtain an underlying physical identity mapping in * order to do a memory window-based bind. This base registration * is protected from remote access - that is enabled only by binding * for the specific bytes targeted during each RPC operation, and * revoked after the corresponding completion similar to a storage * adapter. */ switch (memreg) { case RPCRDMA_FRMR: break; #if RPCRDMA_PERSISTENT_REGISTRATION case RPCRDMA_ALLPHYSICAL: mem_priv = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; goto register_setup; #endif case RPCRDMA_MTHCAFMR: if (ia->ri_have_dma_lkey) break; mem_priv = IB_ACCESS_LOCAL_WRITE; #if RPCRDMA_PERSISTENT_REGISTRATION register_setup: #endif ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); if (IS_ERR(ia->ri_bind_mem)) { printk(KERN_ALERT "%s: ib_get_dma_mr for " "phys register failed with %lX\n", __func__, PTR_ERR(ia->ri_bind_mem)); rc = -ENOMEM; goto out2; } break; default: printk(KERN_ERR "RPC: Unsupported memory " "registration mode: %d\n", memreg); rc = -ENOMEM; goto out2; } dprintk("RPC: %s: memory registration strategy is %d\n", __func__, memreg); /* Else will do memory reg/dereg for each chunk */ ia->ri_memreg_strategy = memreg; return 0; out2: rdma_destroy_id(ia->ri_id); ia->ri_id = NULL; out1: return rc; }
static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma) { struct sockaddr_in cl = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), }; int port, err = -EINVAL; for (port = P9_DEF_MAX_RESVPORT; port >= P9_DEF_MIN_RESVPORT; port--) { cl.sin_port = htons((ushort)port); err = rdma_bind_addr(rdma->cm_id, (struct sockaddr *)&cl); if (err != -EADDRINUSE) break; } return err; } /** * trans_create_rdma - Transport method for creating atransport instance * @client: client instance * @addr: IP address string * @args: Mount options string */ static int rdma_create_trans(struct p9_client *client, const char *addr, char *args) { int err; struct p9_rdma_opts opts; struct p9_trans_rdma *rdma; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; struct ib_cq_init_attr cq_attr = {}; /* Parse the transport specific mount options */ err = parse_opts(args, &opts); if (err < 0) return err; /* Create and initialize the RDMA transport structure */ rdma = alloc_rdma(&opts); if (!rdma) return -ENOMEM; /* Create the RDMA CM ID */ rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(rdma->cm_id)) goto error; /* Associate the client with the transport */ client->trans = rdma; /* Bind to a privileged port if we need to */ if (opts.privport) { err = p9_rdma_bind_privport(rdma); if (err < 0) { pr_err("%s (%d): problem binding to privport: %d\n", __func__, task_pid_nr(current), -err); goto error; } } /* Resolve the server's address */ rdma->addr.sin_family = AF_INET; rdma->addr.sin_addr.s_addr = in_aton(addr); rdma->addr.sin_port = htons(opts.port); err = rdma_resolve_addr(rdma->cm_id, NULL, (struct sockaddr *)&rdma->addr, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) goto error; /* Resolve the route to the server */ err = rdma_resolve_route(rdma->cm_id, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) goto error; /* Query the device attributes */ err = ib_query_device(rdma->cm_id->device, &devattr); if (err) goto error; /* Create the Completion Queue */ cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1; rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, cq_event_handler, client, &cq_attr); if (IS_ERR(rdma->cq)) goto error; ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); /* Create the Protection Domain */ rdma->pd = ib_alloc_pd(rdma->cm_id->device); if (IS_ERR(rdma->pd)) goto error; /* Cache the DMA lkey in the transport */ rdma->dma_mr = NULL; if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) rdma->lkey = rdma->cm_id->device->local_dma_lkey; else { rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(rdma->dma_mr)) goto error; rdma->lkey = rdma->dma_mr->lkey; } /* Create the Queue Pair */ memset(&qp_attr, 0, sizeof qp_attr); qp_attr.event_handler = qp_event_handler; qp_attr.qp_context = client; qp_attr.cap.max_send_wr = opts.sq_depth; qp_attr.cap.max_recv_wr = opts.rq_depth; qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = rdma->cq; qp_attr.recv_cq = rdma->cq; err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); if (err) goto error; rdma->qp = rdma->cm_id->qp; /* Request a connection */ memset(&conn_param, 0, sizeof(conn_param)); conn_param.private_data = NULL; conn_param.private_data_len = 0; conn_param.responder_resources = P9_RDMA_IRD; conn_param.initiator_depth = P9_RDMA_ORD; err = rdma_connect(rdma->cm_id, &conn_param); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_CONNECTED)) goto error; client->status = Connected; return 0; error: rdma_destroy_trans(rdma); return -ENOTCONN; }
/** * trans_create_rdma - Transport method for creating atransport instance * @client: client instance * @addr: IP address string * @args: Mount options string */ static int rdma_create_trans(struct p9_client *client, const char *addr, char *args) { int err; struct p9_rdma_opts opts; struct p9_trans_rdma *rdma; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; /* Parse the transport specific mount options */ err = parse_opts(args, &opts); if (err < 0) return err; /* Create and initialize the RDMA transport structure */ rdma = alloc_rdma(&opts); if (!rdma) return -ENOMEM; /* Create the RDMA CM ID */ rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP); if (IS_ERR(rdma->cm_id)) goto error; /* Associate the client with the transport */ client->trans = rdma; /* Resolve the server's address */ rdma->addr.sin_family = AF_INET; rdma->addr.sin_addr.s_addr = in_aton(addr); rdma->addr.sin_port = htons(opts.port); err = rdma_resolve_addr(rdma->cm_id, NULL, (struct sockaddr *)&rdma->addr, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) goto error; /* Resolve the route to the server */ err = rdma_resolve_route(rdma->cm_id, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) goto error; /* Query the device attributes */ err = ib_query_device(rdma->cm_id->device, &devattr); if (err) goto error; /* Create the Completion Queue */ rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, cq_event_handler, client, opts.sq_depth + opts.rq_depth + 1, 0); if (IS_ERR(rdma->cq)) goto error; ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); /* Create the Protection Domain */ rdma->pd = ib_alloc_pd(rdma->cm_id->device); if (IS_ERR(rdma->pd)) goto error; /* Cache the DMA lkey in the transport */ rdma->dma_mr = NULL; if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) rdma->lkey = rdma->cm_id->device->local_dma_lkey; else { rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(rdma->dma_mr)) goto error; rdma->lkey = rdma->dma_mr->lkey; } /* Create the Queue Pair */ memset(&qp_attr, 0, sizeof qp_attr); qp_attr.event_handler = qp_event_handler; qp_attr.qp_context = client; qp_attr.cap.max_send_wr = opts.sq_depth; qp_attr.cap.max_recv_wr = opts.rq_depth; qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = rdma->cq; qp_attr.recv_cq = rdma->cq; err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); if (err) goto error; rdma->qp = rdma->cm_id->qp; /* Request a connection */ memset(&conn_param, 0, sizeof(conn_param)); conn_param.private_data = NULL; conn_param.private_data_len = 0; conn_param.responder_resources = P9_RDMA_IRD; conn_param.initiator_depth = P9_RDMA_ORD; err = rdma_connect(rdma->cm_id, &conn_param); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_CONNECTED)) goto error; client->status = Connected; return 0; error: rdma_destroy_trans(rdma); return -ENOTCONN; }
/* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */ static void isert_cq_comp_work_cb(void *ctx) { struct isert_cq *cq_desc = ctx; #else static void isert_cq_comp_work_cb(struct work_struct *work) { struct isert_cq *cq_desc = container_of(work, struct isert_cq, cq_comp_work); #endif int ret; TRACE_ENTRY(); ret = isert_poll_cq(cq_desc); if (unlikely(ret < 0)) { /* poll error */ pr_err("ib_poll_cq failed\n"); goto out; } ib_req_notify_cq(cq_desc->cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); /* * not all HCAs support IB_CQ_REPORT_MISSED_EVENTS, * so we need to make sure we don't miss any events between * last call to ib_poll_cq() and ib_req_notify_cq() */ isert_poll_cq(cq_desc); out: TRACE_EXIT(); return; } static void isert_cq_comp_handler(struct ib_cq *cq, void *context) { struct isert_cq *cq_desc = context; #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) queue_work(cq_desc->cq_workqueue, &cq_desc->cq_comp_work); #else queue_work_on(smp_processor_id(), cq_desc->cq_workqueue, &cq_desc->cq_comp_work); #endif } static const char *ib_event_type_str(enum ib_event_type ev_type) { switch (ev_type) { case IB_EVENT_COMM_EST: return "COMM_EST"; case IB_EVENT_QP_FATAL: return "QP_FATAL"; case IB_EVENT_QP_REQ_ERR: return "QP_REQ_ERR"; case IB_EVENT_QP_ACCESS_ERR: return "QP_ACCESS_ERR"; case IB_EVENT_SQ_DRAINED: return "SQ_DRAINED"; case IB_EVENT_PATH_MIG: return "PATH_MIG"; case IB_EVENT_PATH_MIG_ERR: return "PATH_MIG_ERR"; case IB_EVENT_QP_LAST_WQE_REACHED: return "QP_LAST_WQE_REACHED"; case IB_EVENT_CQ_ERR: return "CQ_ERR"; case IB_EVENT_SRQ_ERR: return "SRQ_ERR"; case IB_EVENT_SRQ_LIMIT_REACHED: return "SRQ_LIMIT_REACHED"; case IB_EVENT_PORT_ACTIVE: return "PORT_ACTIVE"; case IB_EVENT_PORT_ERR: return "PORT_ERR"; case IB_EVENT_LID_CHANGE: return "LID_CHANGE"; case IB_EVENT_PKEY_CHANGE: return "PKEY_CHANGE"; case IB_EVENT_SM_CHANGE: return "SM_CHANGE"; case IB_EVENT_CLIENT_REREGISTER: return "CLIENT_REREGISTER"; case IB_EVENT_DEVICE_FATAL: return "DEVICE_FATAL"; default: return "UNKNOWN"; } } static void isert_async_evt_handler(struct ib_event *async_ev, void *context) { struct isert_cq *cq = context; struct isert_device *isert_dev = cq->dev; struct ib_device *ib_dev = isert_dev->ib_dev; char *dev_name = ib_dev->name; enum ib_event_type ev_type = async_ev->event; struct isert_connection *isert_conn; TRACE_ENTRY(); switch (ev_type) { case IB_EVENT_COMM_EST: isert_conn = async_ev->element.qp->qp_context; pr_info("conn:0x%p cm_id:0x%p dev:%s, QP evt: %s\n", isert_conn, isert_conn->cm_id, dev_name, ib_event_type_str(IB_EVENT_COMM_EST)); /* force "connection established" event */ rdma_notify(isert_conn->cm_id, IB_EVENT_COMM_EST); break; /* rest of QP-related events */ case IB_EVENT_QP_FATAL: case IB_EVENT_QP_REQ_ERR: case IB_EVENT_QP_ACCESS_ERR: case IB_EVENT_SQ_DRAINED: case IB_EVENT_PATH_MIG: case IB_EVENT_PATH_MIG_ERR: case IB_EVENT_QP_LAST_WQE_REACHED: isert_conn = async_ev->element.qp->qp_context; pr_err("conn:0x%p cm_id:0x%p dev:%s, QP evt: %s\n", isert_conn, isert_conn->cm_id, dev_name, ib_event_type_str(ev_type)); break; /* CQ-related events */ case IB_EVENT_CQ_ERR: pr_err("dev:%s CQ evt: %s\n", dev_name, ib_event_type_str(ev_type)); break; /* SRQ events */ case IB_EVENT_SRQ_ERR: case IB_EVENT_SRQ_LIMIT_REACHED: pr_err("dev:%s SRQ evt: %s\n", dev_name, ib_event_type_str(ev_type)); break; /* Port events */ case IB_EVENT_PORT_ACTIVE: case IB_EVENT_PORT_ERR: case IB_EVENT_LID_CHANGE: case IB_EVENT_PKEY_CHANGE: case IB_EVENT_SM_CHANGE: case IB_EVENT_CLIENT_REREGISTER: pr_err("dev:%s port:%d evt: %s\n", dev_name, async_ev->element.port_num, ib_event_type_str(ev_type)); break; /* HCA events */ case IB_EVENT_DEVICE_FATAL: pr_err("dev:%s HCA evt: %s\n", dev_name, ib_event_type_str(ev_type)); break; default: pr_err("dev:%s evt: %s\n", dev_name, ib_event_type_str(ev_type)); break; } TRACE_EXIT(); } static struct isert_device *isert_device_create(struct ib_device *ib_dev) { struct isert_device *isert_dev; struct ib_device_attr *dev_attr; int cqe_num, err; struct ib_pd *pd; struct ib_mr *mr; struct ib_cq *cq; char wq_name[64]; int i, j; TRACE_ENTRY(); isert_dev = kzalloc(sizeof(*isert_dev), GFP_KERNEL); if (unlikely(isert_dev == NULL)) { pr_err("Failed to allocate iser dev\n"); err = -ENOMEM; goto out; } dev_attr = &isert_dev->device_attr; err = ib_query_device(ib_dev, dev_attr); if (unlikely(err)) { pr_err("Failed to query device, err: %d\n", err); goto fail_query; } isert_dev->num_cqs = min_t(int, num_online_cpus(), ib_dev->num_comp_vectors); isert_dev->cq_qps = kzalloc(sizeof(*isert_dev->cq_qps) * isert_dev->num_cqs, GFP_KERNEL); if (unlikely(isert_dev->cq_qps == NULL)) { pr_err("Failed to allocate iser cq_qps\n"); err = -ENOMEM; goto fail_cq_qps; } isert_dev->cq_desc = vmalloc(sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs); if (unlikely(isert_dev->cq_desc == NULL)) { pr_err("Failed to allocate %ld bytes for iser cq_desc\n", sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs); err = -ENOMEM; goto fail_alloc_cq_desc; } pd = ib_alloc_pd(ib_dev); if (unlikely(IS_ERR(pd))) { err = PTR_ERR(pd); pr_err("Failed to alloc iser dev pd, err:%d\n", err); goto fail_pd; } mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); if (unlikely(IS_ERR(mr))) { err = PTR_ERR(mr); pr_err("Failed to get dma mr, err: %d\n", err); goto fail_mr; } cqe_num = min(isert_dev->device_attr.max_cqe, ISER_CQ_ENTRIES); cqe_num = cqe_num / isert_dev->num_cqs; #ifdef CONFIG_SCST_EXTRACHECKS if (isert_dev->device_attr.max_cqe == 0) pr_err("Zero max_cqe encountered: you may have a compilation problem\n"); #endif for (i = 0; i < isert_dev->num_cqs; ++i) { struct isert_cq *cq_desc = &isert_dev->cq_desc[i]; cq_desc->dev = isert_dev; cq_desc->idx = i; #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb, NULL); #else INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb); #endif snprintf(wq_name, sizeof(wq_name), "isert_cq_%p", cq_desc); #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) cq_desc->cq_workqueue = create_singlethread_workqueue(wq_name); #else #if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 36) cq_desc->cq_workqueue = alloc_workqueue(wq_name, WQ_CPU_INTENSIVE| WQ_RESCUER, 1); #else cq_desc->cq_workqueue = alloc_workqueue(wq_name, WQ_CPU_INTENSIVE| WQ_MEM_RECLAIM, 1); #endif #endif if (unlikely(!cq_desc->cq_workqueue)) { pr_err("Failed to alloc iser cq work queue for dev:%s\n", ib_dev->name); err = -ENOMEM; goto fail_cq; } #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0) cq = ib_create_cq(ib_dev, isert_cq_comp_handler, isert_async_evt_handler, cq_desc, /* context */ cqe_num, i); /* completion vector */ #else { struct ib_cq_init_attr ia = { .cqe = cqe_num, .comp_vector = i, }; cq = ib_create_cq(ib_dev, isert_cq_comp_handler, isert_async_evt_handler, cq_desc, /* context */ &ia); } #endif if (unlikely(IS_ERR(cq))) { cq_desc->cq = NULL; err = PTR_ERR(cq); pr_err("Failed to create iser dev cq, err:%d\n", err); goto fail_cq; } cq_desc->cq = cq; err = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); if (unlikely(err)) { pr_err("Failed to request notify cq, err: %d\n", err); goto fail_cq; } } isert_dev->ib_dev = ib_dev; isert_dev->pd = pd; isert_dev->mr = mr; INIT_LIST_HEAD(&isert_dev->conn_list); lockdep_assert_held(&dev_list_mutex); isert_dev_list_add(isert_dev); pr_info("iser created device:%p\n", isert_dev); return isert_dev; fail_cq: for (j = 0; j <= i; ++j) { if (isert_dev->cq_desc[j].cq) ib_destroy_cq(isert_dev->cq_desc[j].cq); if (isert_dev->cq_desc[j].cq_workqueue) destroy_workqueue(isert_dev->cq_desc[j].cq_workqueue); } ib_dereg_mr(mr); fail_mr: ib_dealloc_pd(pd); fail_pd: vfree(isert_dev->cq_desc); fail_alloc_cq_desc: kfree(isert_dev->cq_qps); fail_cq_qps: fail_query: kfree(isert_dev); out: TRACE_EXIT_RES(err); return ERR_PTR(err); } static void isert_device_release(struct isert_device *isert_dev) { int err, i; TRACE_ENTRY(); lockdep_assert_held(&dev_list_mutex); isert_dev_list_remove(isert_dev); /* remove from global list */ for (i = 0; i < isert_dev->num_cqs; ++i) { struct isert_cq *cq_desc = &isert_dev->cq_desc[i]; #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22) /* * cancel_work_sync() was introduced in 2.6.22. We can * only wait until all scheduled work is done. */ flush_workqueue(cq_desc->cq_workqueue); #else cancel_work_sync(&cq_desc->cq_comp_work); #endif err = ib_destroy_cq(cq_desc->cq); if (unlikely(err)) pr_err("Failed to destroy cq, err:%d\n", err); destroy_workqueue(cq_desc->cq_workqueue); } err = ib_dereg_mr(isert_dev->mr); if (unlikely(err)) pr_err("Failed to destroy mr, err:%d\n", err); err = ib_dealloc_pd(isert_dev->pd); if (unlikely(err)) pr_err("Failed to destroy pd, err:%d\n", err); vfree(isert_dev->cq_desc); isert_dev->cq_desc = NULL; kfree(isert_dev->cq_qps); isert_dev->cq_qps = NULL; kfree(isert_dev); TRACE_EXIT(); }
/* * Open and initialize an Interface Adapter. * o initializes fields of struct rpcrdma_ia, including * interface and provider attributes and protection zone. */ int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) { int rc, mem_priv; struct ib_device_attr devattr; struct rpcrdma_ia *ia = &xprt->rx_ia; ia->ri_id = rpcrdma_create_id(xprt, ia, addr); if (IS_ERR(ia->ri_id)) { rc = PTR_ERR(ia->ri_id); goto out1; } ia->ri_pd = ib_alloc_pd(ia->ri_id->device); if (IS_ERR(ia->ri_pd)) { rc = PTR_ERR(ia->ri_pd); dprintk("RPC: %s: ib_alloc_pd() failed %i\n", __func__, rc); goto out2; } /* * Query the device to determine if the requested memory * registration strategy is supported. If it isn't, set the * strategy to a globally supported model. */ rc = ib_query_device(ia->ri_id->device, &devattr); if (rc) { dprintk("RPC: %s: ib_query_device failed %d\n", __func__, rc); goto out2; } if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { ia->ri_have_dma_lkey = 1; ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; } switch (memreg) { case RPCRDMA_MEMWINDOWS: case RPCRDMA_MEMWINDOWS_ASYNC: if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) { dprintk("RPC: %s: MEMWINDOWS registration " "specified but not supported by adapter, " "using slower RPCRDMA_REGISTER\n", __func__); memreg = RPCRDMA_REGISTER; } break; case RPCRDMA_MTHCAFMR: if (!ia->ri_id->device->alloc_fmr) { #if RPCRDMA_PERSISTENT_REGISTRATION dprintk("RPC: %s: MTHCAFMR registration " "specified but not supported by adapter, " "using riskier RPCRDMA_ALLPHYSICAL\n", __func__); memreg = RPCRDMA_ALLPHYSICAL; #else dprintk("RPC: %s: MTHCAFMR registration " "specified but not supported by adapter, " "using slower RPCRDMA_REGISTER\n", __func__); memreg = RPCRDMA_REGISTER; #endif } break; case RPCRDMA_FRMR: /* Requires both frmr reg and local dma lkey */ if ((devattr.device_cap_flags & (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { #if RPCRDMA_PERSISTENT_REGISTRATION dprintk("RPC: %s: FRMR registration " "specified but not supported by adapter, " "using riskier RPCRDMA_ALLPHYSICAL\n", __func__); memreg = RPCRDMA_ALLPHYSICAL; #else dprintk("RPC: %s: FRMR registration " "specified but not supported by adapter, " "using slower RPCRDMA_REGISTER\n", __func__); memreg = RPCRDMA_REGISTER; #endif } break; } /* * Optionally obtain an underlying physical identity mapping in * order to do a memory window-based bind. This base registration * is protected from remote access - that is enabled only by binding * for the specific bytes targeted during each RPC operation, and * revoked after the corresponding completion similar to a storage * adapter. */ switch (memreg) { case RPCRDMA_BOUNCEBUFFERS: case RPCRDMA_REGISTER: case RPCRDMA_FRMR: break; #if RPCRDMA_PERSISTENT_REGISTRATION case RPCRDMA_ALLPHYSICAL: mem_priv = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; goto register_setup; #endif case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: mem_priv = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_MW_BIND; goto register_setup; case RPCRDMA_MTHCAFMR: if (ia->ri_have_dma_lkey) break; mem_priv = IB_ACCESS_LOCAL_WRITE; register_setup: ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); if (IS_ERR(ia->ri_bind_mem)) { printk(KERN_ALERT "%s: ib_get_dma_mr for " "phys register failed with %lX\n\t" "Will continue with degraded performance\n", __func__, PTR_ERR(ia->ri_bind_mem)); memreg = RPCRDMA_REGISTER; ia->ri_bind_mem = NULL; } break; default: printk(KERN_ERR "%s: invalid memory registration mode %d\n", __func__, memreg); rc = -EINVAL; goto out2; } dprintk("RPC: %s: memory registration strategy is %d\n", __func__, memreg); /* Else will do memory reg/dereg for each chunk */ ia->ri_memreg_strategy = memreg; return 0; out2: rdma_destroy_id(ia->ri_id); ia->ri_id = NULL; out1: return rc; }
/* * Open and initialize an Interface Adapter. * o initializes fields of struct rpcrdma_ia, including * interface and provider attributes and protection zone. */ int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) { int rc; struct rpcrdma_ia *ia = &xprt->rx_ia; init_completion(&ia->ri_done); ia->ri_id = rpcrdma_create_id(xprt, ia, addr); if (IS_ERR(ia->ri_id)) { rc = PTR_ERR(ia->ri_id); goto out1; } ia->ri_pd = ib_alloc_pd(ia->ri_id->device); if (IS_ERR(ia->ri_pd)) { rc = PTR_ERR(ia->ri_pd); dprintk("RPC: %s: ib_alloc_pd() failed %i\n", __func__, rc); goto out2; } /* * Optionally obtain an underlying physical identity mapping in * order to do a memory window-based bind. This base registration * is protected from remote access - that is enabled only by binding * for the specific bytes targeted during each RPC operation, and * revoked after the corresponding completion similar to a storage * adapter. */ if (memreg > RPCRDMA_REGISTER) { int mem_priv = IB_ACCESS_LOCAL_WRITE; switch (memreg) { #if RPCRDMA_PERSISTENT_REGISTRATION case RPCRDMA_ALLPHYSICAL: mem_priv |= IB_ACCESS_REMOTE_WRITE; mem_priv |= IB_ACCESS_REMOTE_READ; break; #endif case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: mem_priv |= IB_ACCESS_MW_BIND; break; default: break; } ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); if (IS_ERR(ia->ri_bind_mem)) { printk(KERN_ALERT "%s: ib_get_dma_mr for " "phys register failed with %lX\n\t" "Will continue with degraded performance\n", __func__, PTR_ERR(ia->ri_bind_mem)); memreg = RPCRDMA_REGISTER; ia->ri_bind_mem = NULL; } } /* Else will do memory reg/dereg for each chunk */ ia->ri_memreg_strategy = memreg; return 0; out2: rdma_destroy_id(ia->ri_id); out1: return rc; }
static int krping_setup_buffers(struct krping_cb *cb) { int ret; struct ib_phys_buf buf; u64 iovbase; DEBUG_LOG(PFX "krping_setup_buffers called on cb %p\n", cb); if (cb->use_dmamr) { cb->dma_mr = ib_get_dma_mr(cb->pd, IB_ACCESS_LOCAL_WRITE| IB_ACCESS_REMOTE_READ| IB_ACCESS_REMOTE_WRITE); if (IS_ERR(cb->dma_mr)) { log(LOG_ERR, "reg_dmamr failed\n"); return PTR_ERR(cb->dma_mr); } } else { buf.addr = vtophys(&cb->recv_buf); buf.size = sizeof cb->recv_buf; iovbase = vtophys(&cb->recv_buf); cb->recv_mr = ib_reg_phys_mr(cb->pd, &buf, 1, IB_ACCESS_LOCAL_WRITE, &iovbase); if (IS_ERR(cb->recv_mr)) { log(LOG_ERR, "recv_buf reg_mr failed\n"); return PTR_ERR(cb->recv_mr); } buf.addr = vtophys(&cb->send_buf); buf.size = sizeof cb->send_buf; iovbase = vtophys(&cb->send_buf); cb->send_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 0, &iovbase); if (IS_ERR(cb->send_mr)) { log(LOG_ERR, "send_buf reg_mr failed\n"); ib_dereg_mr(cb->recv_mr); return PTR_ERR(cb->send_mr); } } /* RNIC adapters have a limit upto which it can register physical memory * If DMA-MR memory mode is set then normally driver registers maximum * supported memory. After that if contigmalloc allocates memory beyond the * specified RNIC limit then Krping may not work. */ if (cb->use_dmamr && cb->memlimit) cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, cb->memlimit, PAGE_SIZE, 0); else cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL, PAGE_SIZE, 0); if (!cb->rdma_buf) { log(LOG_ERR, "rdma_buf malloc failed\n"); ret = ENOMEM; goto err1; } if (!cb->use_dmamr) { buf.addr = vtophys(cb->rdma_buf); buf.size = cb->size; iovbase = vtophys(cb->rdma_buf); cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1, IB_ACCESS_REMOTE_READ| IB_ACCESS_REMOTE_WRITE, &iovbase); if (IS_ERR(cb->rdma_mr)) { log(LOG_ERR, "rdma_buf reg_mr failed\n"); ret = PTR_ERR(cb->rdma_mr); goto err2; } } if (!cb->server || cb->wlat || cb->rlat || cb->bw) { if (cb->use_dmamr && cb->memlimit) cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, cb->memlimit, PAGE_SIZE, 0); else cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL, PAGE_SIZE, 0); if (!cb->start_buf) { log(LOG_ERR, "start_buf malloc failed\n"); ret = ENOMEM; goto err2; } if (!cb->use_dmamr) { unsigned flags = IB_ACCESS_REMOTE_READ; if (cb->wlat || cb->rlat || cb->bw) flags |= IB_ACCESS_REMOTE_WRITE; buf.addr = vtophys(cb->start_buf); buf.size = cb->size; iovbase = vtophys(cb->start_buf); cb->start_mr = ib_reg_phys_mr(cb->pd, &buf, 1, flags, &iovbase); if (IS_ERR(cb->start_mr)) { log(LOG_ERR, "start_buf reg_mr failed\n"); ret = PTR_ERR(cb->start_mr); goto err3; } } } krping_setup_wr(cb); DEBUG_LOG(PFX "allocated & registered buffers...\n"); return 0; err3: contigfree(cb->start_buf, cb->size, M_DEVBUF); if (!cb->use_dmamr) ib_dereg_mr(cb->rdma_mr); err2: contigfree(cb->rdma_buf, cb->size, M_DEVBUF); err1: if (cb->use_dmamr) ib_dereg_mr(cb->dma_mr); else { ib_dereg_mr(cb->recv_mr); ib_dereg_mr(cb->send_mr); } return ret; }
static struct isert_device *isert_device_create(struct ib_device *ib_dev) { struct isert_device *isert_dev; struct ib_device_attr *dev_attr; int cqe_num, err; struct ib_pd *pd; struct ib_mr *mr; struct ib_cq *cq; char wq_name[64]; int i, j; TRACE_ENTRY(); isert_dev = kzalloc(sizeof(*isert_dev), GFP_KERNEL); if (unlikely(isert_dev == NULL)) { pr_err("Failed to allocate iser dev\n"); err = -ENOMEM; goto out; } dev_attr = &isert_dev->device_attr; err = ib_query_device(ib_dev, dev_attr); if (unlikely(err)) { pr_err("Failed to query device, err: %d\n", err); goto fail_query; } isert_dev->num_cqs = min_t(int, num_online_cpus(), ib_dev->num_comp_vectors); isert_dev->cq_qps = kzalloc(sizeof(*isert_dev->cq_qps) * isert_dev->num_cqs, GFP_KERNEL); if (unlikely(isert_dev->cq_qps == NULL)) { pr_err("Failed to allocate iser cq_qps\n"); err = -ENOMEM; goto fail_cq_qps; } isert_dev->cq_desc = vmalloc(sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs); if (unlikely(isert_dev->cq_desc == NULL)) { pr_err("Failed to allocate %ld bytes for iser cq_desc\n", sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs); err = -ENOMEM; goto fail_alloc_cq_desc; } pd = ib_alloc_pd(ib_dev); if (unlikely(IS_ERR(pd))) { err = PTR_ERR(pd); pr_err("Failed to alloc iser dev pd, err:%d\n", err); goto fail_pd; } mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); if (unlikely(IS_ERR(mr))) { err = PTR_ERR(mr); pr_err("Failed to get dma mr, err: %d\n", err); goto fail_mr; } cqe_num = min(isert_dev->device_attr.max_cqe, ISER_CQ_ENTRIES); cqe_num = cqe_num / isert_dev->num_cqs; #ifdef CONFIG_SCST_EXTRACHECKS if (isert_dev->device_attr.max_cqe == 0) pr_err("Zero max_cqe encountered: you may have a compilation problem\n"); #endif for (i = 0; i < isert_dev->num_cqs; ++i) { struct isert_cq *cq_desc = &isert_dev->cq_desc[i]; cq_desc->dev = isert_dev; cq_desc->idx = i; #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb, NULL); #else INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb); #endif snprintf(wq_name, sizeof(wq_name), "isert_cq_%p", cq_desc); #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) cq_desc->cq_workqueue = create_singlethread_workqueue(wq_name); #else #if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 36) cq_desc->cq_workqueue = alloc_workqueue(wq_name, WQ_CPU_INTENSIVE| WQ_RESCUER, 1); #else cq_desc->cq_workqueue = alloc_workqueue(wq_name, WQ_CPU_INTENSIVE| WQ_MEM_RECLAIM, 1); #endif #endif if (unlikely(!cq_desc->cq_workqueue)) { pr_err("Failed to alloc iser cq work queue for dev:%s\n", ib_dev->name); err = -ENOMEM; goto fail_cq; } cq = ib_create_cq(ib_dev, isert_cq_comp_handler, isert_async_evt_handler, cq_desc, /* context */ cqe_num, i); /* completion vector */ if (unlikely(IS_ERR(cq))) { cq_desc->cq = NULL; err = PTR_ERR(cq); pr_err("Failed to create iser dev cq, err:%d\n", err); goto fail_cq; } cq_desc->cq = cq; err = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); if (unlikely(err)) { pr_err("Failed to request notify cq, err: %d\n", err); goto fail_cq; } } isert_dev->ib_dev = ib_dev; isert_dev->pd = pd; isert_dev->mr = mr; INIT_LIST_HEAD(&isert_dev->conn_list); lockdep_assert_held(&dev_list_mutex); isert_dev_list_add(isert_dev); pr_info("iser created device:%p\n", isert_dev); return isert_dev; fail_cq: for (j = 0; j <= i; ++j) { if (isert_dev->cq_desc[j].cq) ib_destroy_cq(isert_dev->cq_desc[j].cq); if (isert_dev->cq_desc[j].cq_workqueue) destroy_workqueue(isert_dev->cq_desc[j].cq_workqueue); } ib_dereg_mr(mr); fail_mr: ib_dealloc_pd(pd); fail_pd: vfree(isert_dev->cq_desc); fail_alloc_cq_desc: kfree(isert_dev->cq_qps); fail_cq_qps: fail_query: kfree(isert_dev); out: TRACE_EXIT_RES(err); return ERR_PTR(err); }
static void verbs_add_device (struct ib_device *dev) { int ret; struct ib_qp_init_attr attrs; if (ib_dev) return; /* durty hack for ib_dma_map_single not to segfault */ dev->dma_ops = NULL; ib_dev = dev; printk (KERN_INFO "IB add device called. Name = %s\n", dev->name); ret = ib_query_device (dev, &dev_attr); if (ret) { printk (KERN_INFO "ib_quer_device failed: %d\n", ret); return; } printk (KERN_INFO "IB device caps: max_qp %d, max_mcast_grp: %d, max_pkeys: %d\n", dev_attr.max_qp, dev_attr.max_mcast_grp, (int)dev_attr.max_pkeys); /* We'll work with first port. It's a sample module, anyway. Who is that moron which decided * to count ports from one? */ ret = ib_query_port (dev, 1, &port_attr); if (ret) { printk (KERN_INFO "ib_query_port failed: %d\n", ret); return; } printk (KERN_INFO "Port info: lid: %u, sm_lid: %u, max_msg_size: %u\n", (unsigned)port_attr.lid, (unsigned)port_attr.sm_lid, port_attr.max_msg_sz); pd = ib_alloc_pd (dev); if (IS_ERR (pd)) { ret = PTR_ERR (pd); printk (KERN_INFO "pd allocation failed: %d\n", ret); return; } printk (KERN_INFO "PD allocated\n"); mr = ib_get_dma_mr (pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR (mr)) { ret = PTR_ERR (mr); printk (KERN_INFO "get_dma_mr failed: %d\n", ret); return; } send_cq = ib_create_cq (dev, NULL, NULL, NULL, 1, 1); if (IS_ERR (send_cq)) { ret = PTR_ERR (send_cq); printk (KERN_INFO "ib_create_cq failed: %d\n", ret); return; } recv_cq = ib_create_cq (dev, verbs_comp_handler_recv, NULL, NULL, 1, 1); if (IS_ERR (recv_cq)) { ret = PTR_ERR (recv_cq); printk (KERN_INFO "ib_create_cq failed: %d\n", ret); return; } ib_req_notify_cq (recv_cq, IB_CQ_NEXT_COMP); printk (KERN_INFO "CQs allocated\n"); ib_query_pkey (dev, 1, 0, &pkey); /* allocate memory */ send_buf = kmalloc (buf_size + 40, GFP_KERNEL); recv_buf = kmalloc (buf_size + 40, GFP_KERNEL); if (!send_buf || !recv_buf) { printk (KERN_INFO "Memory allocation error\n"); return; } printk (KERN_INFO "Trying to register regions\n"); if (ib_dev->dma_ops) printk (KERN_INFO "DMA ops are defined\n"); memset (send_buf, 0, buf_size+40); memset (send_buf, 0, buf_size+40); send_key = ib_dma_map_single (ib_dev, send_buf, buf_size, DMA_FROM_DEVICE); printk (KERN_INFO "send_key obtained %llx\n", send_key); recv_key = ib_dma_map_single (ib_dev, recv_buf, buf_size, DMA_TO_DEVICE); printk (KERN_INFO "recv_key obtained %llx\n", recv_key); if (ib_dma_mapping_error (ib_dev, send_key)) { printk (KERN_INFO "Error mapping send buffer\n"); return; } if (ib_dma_mapping_error (ib_dev, recv_key)) { printk (KERN_INFO "Error mapping recv buffer\n"); return; } memset (&attrs, 0, sizeof (attrs)); attrs.qp_type = IB_QPT_UD; attrs.sq_sig_type = IB_SIGNAL_ALL_WR; attrs.event_handler = verbs_qp_event; attrs.cap.max_send_wr = CQ_SIZE; attrs.cap.max_recv_wr = CQ_SIZE; attrs.cap.max_send_sge = 1; attrs.cap.max_recv_sge = 1; attrs.send_cq = send_cq; attrs.recv_cq = recv_cq; qp = ib_create_qp (pd, &attrs); if (IS_ERR (qp)) { ret = PTR_ERR (qp); printk (KERN_INFO "qp allocation failed: %d\n", ret); return; } printk (KERN_INFO "Create QP with num %x\n", qp->qp_num); if (init_qp (qp)) { printk (KERN_INFO "Failed to initialize QP\n"); return; } ret = ib_query_gid (ib_dev, 1, 0, &local_info.gid); if (ret) { printk (KERN_INFO "query_gid failed %d\n", ret); return; } local_info.qp_num = qp->qp_num; local_info.lid = port_attr.lid; /* now we are ready to send our QP number and other stuff to other party */ if (!server_addr) { schedule_work (&sock_accept); flush_scheduled_work (); } else exchange_info (server_addr); if (!have_remote_info) { printk (KERN_INFO "Have no remote info, give up\n"); return; } ret = path_rec_lookup_start (); if (ret) { printk (KERN_INFO "path_rec lookup start failed: %d\n", ret); return; } /* post receive request */ verbs_post_recv_req (); mod_timer (&verbs_timer, NEXTJIFF(1)); }
static int krping_setup_buffers(struct krping_cb *cb) { int ret; struct ib_phys_buf buf; u64 iovbase; DEBUG_LOG(PFX "krping_setup_buffers called on cb %p\n", cb); if (cb->use_dmamr) { cb->dma_mr = ib_get_dma_mr(cb->pd, IB_ACCESS_LOCAL_WRITE| IB_ACCESS_REMOTE_READ| IB_ACCESS_REMOTE_WRITE); if (IS_ERR(cb->dma_mr)) { log(LOG_ERR, "reg_dmamr failed\n"); return PTR_ERR(cb->dma_mr); } } else { buf.addr = vtophys(&cb->recv_buf); buf.size = sizeof cb->recv_buf; iovbase = vtophys(&cb->recv_buf); cb->recv_mr = ib_reg_phys_mr(cb->pd, &buf, 1, IB_ACCESS_LOCAL_WRITE, &iovbase); if (IS_ERR(cb->recv_mr)) { log(LOG_ERR, "recv_buf reg_mr failed\n"); return PTR_ERR(cb->recv_mr); } buf.addr = vtophys(&cb->send_buf); buf.size = sizeof cb->send_buf; iovbase = vtophys(&cb->send_buf); cb->send_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 0, &iovbase); if (IS_ERR(cb->send_mr)) { log(LOG_ERR, "send_buf reg_mr failed\n"); ib_dereg_mr(cb->recv_mr); return PTR_ERR(cb->send_mr); } } cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL, PAGE_SIZE, 0); if (!cb->rdma_buf) { log(LOG_ERR, "rdma_buf malloc failed\n"); ret = ENOMEM; goto err1; } if (!cb->use_dmamr) { buf.addr = vtophys(cb->rdma_buf); buf.size = cb->size; iovbase = vtophys(cb->rdma_buf); cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1, IB_ACCESS_REMOTE_READ| IB_ACCESS_REMOTE_WRITE, &iovbase); if (IS_ERR(cb->rdma_mr)) { log(LOG_ERR, "rdma_buf reg_mr failed\n"); ret = PTR_ERR(cb->rdma_mr); goto err2; } } if (!cb->server || cb->wlat || cb->rlat || cb->bw) { cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL, PAGE_SIZE, 0); if (!cb->start_buf) { log(LOG_ERR, "start_buf malloc failed\n"); ret = ENOMEM; goto err2; } if (!cb->use_dmamr) { unsigned flags = IB_ACCESS_REMOTE_READ; if (cb->wlat || cb->rlat || cb->bw) flags |= IB_ACCESS_REMOTE_WRITE; buf.addr = vtophys(cb->start_buf); buf.size = cb->size; iovbase = vtophys(cb->start_buf); cb->start_mr = ib_reg_phys_mr(cb->pd, &buf, 1, flags, &iovbase); if (IS_ERR(cb->start_mr)) { log(LOG_ERR, "start_buf reg_mr failed\n"); ret = PTR_ERR(cb->start_mr); goto err3; } } } krping_setup_wr(cb); DEBUG_LOG(PFX "allocated & registered buffers...\n"); return 0; err3: contigfree(cb->start_buf, cb->size, M_DEVBUF); if (!cb->use_dmamr) ib_dereg_mr(cb->rdma_mr); err2: contigfree(cb->rdma_buf, cb->size, M_DEVBUF); err1: if (cb->use_dmamr) ib_dereg_mr(cb->dma_mr); else { ib_dereg_mr(cb->recv_mr); ib_dereg_mr(cb->send_mr); } return ret; }
/* * Open and initialize an Interface Adapter. * o initializes fields of struct rpcrdma_ia, including * interface and provider attributes and protection zone. */ int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) { int rc, mem_priv; struct rpcrdma_ia *ia = &xprt->rx_ia; struct ib_device_attr *devattr = &ia->ri_devattr; ia->ri_id = rpcrdma_create_id(xprt, ia, addr); if (IS_ERR(ia->ri_id)) { rc = PTR_ERR(ia->ri_id); goto out1; } ia->ri_pd = ib_alloc_pd(ia->ri_id->device); if (IS_ERR(ia->ri_pd)) { rc = PTR_ERR(ia->ri_pd); dprintk("RPC: %s: ib_alloc_pd() failed %i\n", __func__, rc); goto out2; } rc = ib_query_device(ia->ri_id->device, devattr); if (rc) { dprintk("RPC: %s: ib_query_device failed %d\n", __func__, rc); goto out3; } if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { ia->ri_have_dma_lkey = 1; ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; } if (memreg == RPCRDMA_FRMR) { /* Requires both frmr reg and local dma lkey */ if (((devattr->device_cap_flags & (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) || (devattr->max_fast_reg_page_list_len == 0)) { dprintk("RPC: %s: FRMR registration " "not supported by HCA\n", __func__); memreg = RPCRDMA_MTHCAFMR; } } if (memreg == RPCRDMA_MTHCAFMR) { if (!ia->ri_id->device->alloc_fmr) { dprintk("RPC: %s: MTHCAFMR registration " "not supported by HCA\n", __func__); memreg = RPCRDMA_ALLPHYSICAL; } } /* * Optionally obtain an underlying physical identity mapping in * order to do a memory window-based bind. This base registration * is protected from remote access - that is enabled only by binding * for the specific bytes targeted during each RPC operation, and * revoked after the corresponding completion similar to a storage * adapter. */ switch (memreg) { case RPCRDMA_FRMR: ia->ri_ops = &rpcrdma_frwr_memreg_ops; break; case RPCRDMA_ALLPHYSICAL: ia->ri_ops = &rpcrdma_physical_memreg_ops; mem_priv = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; goto register_setup; case RPCRDMA_MTHCAFMR: ia->ri_ops = &rpcrdma_fmr_memreg_ops; if (ia->ri_have_dma_lkey) break; mem_priv = IB_ACCESS_LOCAL_WRITE; register_setup: ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); if (IS_ERR(ia->ri_bind_mem)) { printk(KERN_ALERT "%s: ib_get_dma_mr for " "phys register failed with %lX\n", __func__, PTR_ERR(ia->ri_bind_mem)); rc = -ENOMEM; goto out3; } break; default: printk(KERN_ERR "RPC: Unsupported memory " "registration mode: %d\n", memreg); rc = -ENOMEM; goto out3; } dprintk("RPC: %s: memory registration strategy is '%s'\n", __func__, ia->ri_ops->ro_displayname); /* Else will do memory reg/dereg for each chunk */ ia->ri_memreg_strategy = memreg; rwlock_init(&ia->ri_qplock); return 0; out3: ib_dealloc_pd(ia->ri_pd); ia->ri_pd = NULL; out2: rdma_destroy_id(ia->ri_id); ia->ri_id = NULL; out1: return rc; }