/** * rvt_create_cq - create a completion queue * @ibdev: the device this completion queue is attached to * @attr: creation attributes * @context: unused by the QLogic_IB driver * @udata: user data for libibverbs.so * * Called by ib_create_cq() in the generic verbs code. * * Return: pointer to the completion queue or negative errno values * for failure. */ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) { struct rvt_dev_info *rdi = ib_to_rvt(ibdev); struct rvt_cq *cq; struct rvt_cq_wc *wc; struct ib_cq *ret; u32 sz; unsigned int entries = attr->cqe; if (attr->flags) return ERR_PTR(-EINVAL); if (entries < 1 || entries > rdi->dparms.props.max_cqe) return ERR_PTR(-EINVAL); /* Allocate the completion queue structure. */ cq = kzalloc(sizeof(*cq), GFP_KERNEL); if (!cq) return ERR_PTR(-ENOMEM); /* * Allocate the completion queue entries and head/tail pointers. * This is allocated separately so that it can be resized and * also mapped into user space. * We need to use vmalloc() in order to support mmap and large * numbers of entries. */ sz = sizeof(*wc); if (udata && udata->outlen >= sizeof(__u64)) sz += sizeof(struct ib_uverbs_wc) * (entries + 1); else sz += sizeof(struct ib_wc) * (entries + 1); wc = vmalloc_user(sz); if (!wc) { ret = ERR_PTR(-ENOMEM); goto bail_cq; } /* * Return the address of the WC as the offset to mmap. * See rvt_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { int err; cq->ip = rvt_create_mmap_info(rdi, sz, context, wc); if (!cq->ip) { ret = ERR_PTR(-ENOMEM); goto bail_wc; } err = ib_copy_to_udata(udata, &cq->ip->offset, sizeof(cq->ip->offset)); if (err) { ret = ERR_PTR(err); goto bail_ip; } } spin_lock(&rdi->n_cqs_lock); if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { spin_unlock(&rdi->n_cqs_lock); ret = ERR_PTR(-ENOMEM); goto bail_ip; } rdi->n_cqs_allocated++; spin_unlock(&rdi->n_cqs_lock); if (cq->ip) { spin_lock_irq(&rdi->pending_lock); list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps); spin_unlock_irq(&rdi->pending_lock); } /* * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. * The number of entries should be >= the number requested or return * an error. */ cq->rdi = rdi; cq->ibcq.cqe = entries; cq->notify = RVT_CQ_NONE; spin_lock_init(&cq->lock); init_kthread_work(&cq->comptask, send_complete); cq->queue = wc; ret = &cq->ibcq; goto done; bail_ip: kfree(cq->ip); bail_wc: vfree(wc); bail_cq: kfree(cq); done: return ret; }
/** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for * @init_attr: the attributes of the queue pair * @udata: user data for libibverbs.so * * Queue pair creation is mostly an rvt issue. However, drivers have their own * unique idea of what queue pair numbers mean. For instance there is a reserved * range for PSM. * * Return: the queue pair on success, otherwise returns an errno. * * Called by the ib_create_qp() core verbs function. */ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct rvt_qp *qp; int err; struct rvt_swqe *swq = NULL; size_t sz; size_t sg_list_sz; struct ib_qp *ret = ERR_PTR(-ENOMEM); struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); void *priv = NULL; gfp_t gfp; if (!rdi) return ERR_PTR(-EINVAL); if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge || init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO)) return ERR_PTR(-EINVAL); /* GFP_NOIO is applicable to RC QP's only */ if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO && init_attr->qp_type != IB_QPT_RC) return ERR_PTR(-EINVAL); gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ? GFP_NOIO : GFP_KERNEL; /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge || init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr) return ERR_PTR(-EINVAL); if (init_attr->cap.max_send_sge + init_attr->cap.max_send_wr + init_attr->cap.max_recv_sge + init_attr->cap.max_recv_wr == 0) return ERR_PTR(-EINVAL); } switch (init_attr->qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: if (init_attr->port_num == 0 || init_attr->port_num > ibpd->device->phys_port_cnt) return ERR_PTR(-EINVAL); case IB_QPT_UC: case IB_QPT_RC: case IB_QPT_UD: sz = sizeof(struct rvt_sge) * init_attr->cap.max_send_sge + sizeof(struct rvt_swqe); if (gfp == GFP_NOIO) swq = __vmalloc( (init_attr->cap.max_send_wr + 1) * sz, gfp, PAGE_KERNEL); else swq = vmalloc_node( (init_attr->cap.max_send_wr + 1) * sz, rdi->dparms.node); if (!swq) return ERR_PTR(-ENOMEM); sz = sizeof(*qp); sg_list_sz = 0; if (init_attr->srq) { struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq); if (srq->rq.max_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (srq->rq.max_sge - 1); } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); qp = kzalloc_node(sz + sg_list_sz, gfp, rdi->dparms.node); if (!qp) goto bail_swq; RCU_INIT_POINTER(qp->next, NULL); /* * Driver needs to set up it's private QP structure and do any * initialization that is needed. */ priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp); if (!priv) goto bail_qp; qp->priv = priv; qp->timeout_jiffies = usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL); if (init_attr->srq) { sz = 0; } else { qp->r_rq.size = init_attr->cap.max_recv_wr + 1; qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct rvt_rwqe); if (udata) qp->r_rq.wq = vmalloc_user( sizeof(struct rvt_rwq) + qp->r_rq.size * sz); else if (gfp == GFP_NOIO) qp->r_rq.wq = __vmalloc( sizeof(struct rvt_rwq) + qp->r_rq.size * sz, gfp, PAGE_KERNEL); else qp->r_rq.wq = vmalloc_node( sizeof(struct rvt_rwq) + qp->r_rq.size * sz, rdi->dparms.node); if (!qp->r_rq.wq) goto bail_driver_priv; } /* * ib_create_qp() will initialize qp->ibqp * except for qp->ibqp.qp_num. */ spin_lock_init(&qp->r_lock); spin_lock_init(&qp->s_hlock); spin_lock_init(&qp->s_lock); spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); init_waitqueue_head(&qp->wait); init_timer(&qp->s_timer); qp->s_timer.data = (unsigned long)qp; INIT_LIST_HEAD(&qp->rspwait); qp->state = IB_QPS_RESET; qp->s_wq = swq; qp->s_size = init_attr->cap.max_send_wr + 1; qp->s_avail = init_attr->cap.max_send_wr; qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) qp->s_flags = RVT_S_SIGNAL_REQ_WR; err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, init_attr->qp_type, init_attr->port_num, gfp); if (err < 0) { ret = ERR_PTR(err); goto bail_rq_wq; } qp->ibqp.qp_num = err; qp->port_num = init_attr->port_num; rvt_reset_qp(rdi, qp, init_attr->qp_type); break; default: /* Don't support raw QPs */ return ERR_PTR(-EINVAL); } init_attr->cap.max_inline_data = 0; /* * Return the address of the RWQ as the offset to mmap. * See rvt_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { if (!qp->r_rq.wq) { __u64 offset = 0; err = ib_copy_to_udata(udata, &offset, sizeof(offset)); if (err) { ret = ERR_PTR(err); goto bail_qpn; } } else { u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; qp->ip = rvt_create_mmap_info(rdi, s, ibpd->uobject->context, qp->r_rq.wq); if (!qp->ip) { ret = ERR_PTR(-ENOMEM); goto bail_qpn; } err = ib_copy_to_udata(udata, &qp->ip->offset, sizeof(qp->ip->offset)); if (err) { ret = ERR_PTR(err); goto bail_ip; } } qp->pid = current->pid; } spin_lock(&rdi->n_qps_lock); if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) { spin_unlock(&rdi->n_qps_lock); ret = ERR_PTR(-ENOMEM); goto bail_ip; } rdi->n_qps_allocated++; /* * Maintain a busy_jiffies variable that will be added to the timeout * period in mod_retry_timer and add_retry_timer. This busy jiffies * is scaled by the number of rc qps created for the device to reduce * the number of timeouts occurring when there is a large number of * qps. busy_jiffies is incremented every rc qp scaling interval. * The scaling interval is selected based on extensive performance * evaluation of targeted workloads. */ if (init_attr->qp_type == IB_QPT_RC) { rdi->n_rc_qps++; rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL; } spin_unlock(&rdi->n_qps_lock); if (qp->ip) { spin_lock_irq(&rdi->pending_lock); list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps); spin_unlock_irq(&rdi->pending_lock); } ret = &qp->ibqp; /* * We have our QP and its good, now keep track of what types of opcodes * can be processed on this QP. We do this by keeping track of what the * 3 high order bits of the opcode are. */ switch (init_attr->qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK; break; case IB_QPT_RC: qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK; break; case IB_QPT_UC: qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK; break; default: ret = ERR_PTR(-EINVAL); goto bail_ip; } return ret; bail_ip: kref_put(&qp->ip->ref, rvt_release_mmap_info); bail_qpn: free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); bail_rq_wq: vfree(qp->r_rq.wq); bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); bail_qp: kfree(qp); bail_swq: vfree(swq); return ret; }
/** * rvt_create_srq - create a shared receive queue * @ibpd: the protection domain of the SRQ to create * @srq_init_attr: the attributes of the SRQ * @udata: data from libibverbs when creating a user SRQ * * Return: Allocated srq object */ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata) { struct rvt_dev_info *dev = ib_to_rvt(ibpd->device); struct rvt_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct rvt_ucontext, ibucontext); struct rvt_srq *srq; u32 sz; struct ib_srq *ret; if (srq_init_attr->srq_type != IB_SRQT_BASIC) return ERR_PTR(-EOPNOTSUPP); if (srq_init_attr->attr.max_sge == 0 || srq_init_attr->attr.max_sge > dev->dparms.props.max_srq_sge || srq_init_attr->attr.max_wr == 0 || srq_init_attr->attr.max_wr > dev->dparms.props.max_srq_wr) return ERR_PTR(-EINVAL); srq = kzalloc_node(sizeof(*srq), GFP_KERNEL, dev->dparms.node); if (!srq) return ERR_PTR(-ENOMEM); /* * Need to use vmalloc() if we want to support large #s of entries. */ srq->rq.size = srq_init_attr->attr.max_wr + 1; srq->rq.max_sge = srq_init_attr->attr.max_sge; sz = sizeof(struct ib_sge) * srq->rq.max_sge + sizeof(struct rvt_rwqe); srq->rq.wq = udata ? vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz) : vzalloc_node(sizeof(struct rvt_rwq) + srq->rq.size * sz, dev->dparms.node); if (!srq->rq.wq) { ret = ERR_PTR(-ENOMEM); goto bail_srq; } /* * Return the address of the RWQ as the offset to mmap. * See rvt_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { int err; u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz; srq->ip = rvt_create_mmap_info(dev, s, &ucontext->ibucontext, srq->rq.wq); if (!srq->ip) { ret = ERR_PTR(-ENOMEM); goto bail_wq; } err = ib_copy_to_udata(udata, &srq->ip->offset, sizeof(srq->ip->offset)); if (err) { ret = ERR_PTR(err); goto bail_ip; } } /* * ib_create_srq() will initialize srq->ibsrq. */ spin_lock_init(&srq->rq.lock); srq->limit = srq_init_attr->attr.srq_limit; spin_lock(&dev->n_srqs_lock); if (dev->n_srqs_allocated == dev->dparms.props.max_srq) { spin_unlock(&dev->n_srqs_lock); ret = ERR_PTR(-ENOMEM); goto bail_ip; } dev->n_srqs_allocated++; spin_unlock(&dev->n_srqs_lock); if (srq->ip) { spin_lock_irq(&dev->pending_lock); list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps); spin_unlock_irq(&dev->pending_lock); } return &srq->ibsrq; bail_ip: kfree(srq->ip); bail_wq: vfree(srq->rq.wq); bail_srq: kfree(srq); return ret; }