Exemple #1
0
/**
 * rvt_create_cq - create a completion queue
 * @ibdev: the device this completion queue is attached to
 * @attr: creation attributes
 * @context: unused by the QLogic_IB driver
 * @udata: user data for libibverbs.so
 *
 * Called by ib_create_cq() in the generic verbs code.
 *
 * Return: pointer to the completion queue or negative errno values
 * for failure.
 */
struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
			    const struct ib_cq_init_attr *attr,
			    struct ib_ucontext *context,
			    struct ib_udata *udata)
{
	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
	struct rvt_cq *cq;
	struct rvt_cq_wc *wc;
	struct ib_cq *ret;
	u32 sz;
	unsigned int entries = attr->cqe;

	if (attr->flags)
		return ERR_PTR(-EINVAL);

	if (entries < 1 || entries > rdi->dparms.props.max_cqe)
		return ERR_PTR(-EINVAL);

	/* Allocate the completion queue structure. */
	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
	if (!cq)
		return ERR_PTR(-ENOMEM);

	/*
	 * Allocate the completion queue entries and head/tail pointers.
	 * This is allocated separately so that it can be resized and
	 * also mapped into user space.
	 * We need to use vmalloc() in order to support mmap and large
	 * numbers of entries.
	 */
	sz = sizeof(*wc);
	if (udata && udata->outlen >= sizeof(__u64))
		sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
	else
		sz += sizeof(struct ib_wc) * (entries + 1);
	wc = vmalloc_user(sz);
	if (!wc) {
		ret = ERR_PTR(-ENOMEM);
		goto bail_cq;
	}

	/*
	 * Return the address of the WC as the offset to mmap.
	 * See rvt_mmap() for details.
	 */
	if (udata && udata->outlen >= sizeof(__u64)) {
		int err;

		cq->ip = rvt_create_mmap_info(rdi, sz, context, wc);
		if (!cq->ip) {
			ret = ERR_PTR(-ENOMEM);
			goto bail_wc;
		}

		err = ib_copy_to_udata(udata, &cq->ip->offset,
				       sizeof(cq->ip->offset));
		if (err) {
			ret = ERR_PTR(err);
			goto bail_ip;
		}
	}

	spin_lock(&rdi->n_cqs_lock);
	if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
		spin_unlock(&rdi->n_cqs_lock);
		ret = ERR_PTR(-ENOMEM);
		goto bail_ip;
	}

	rdi->n_cqs_allocated++;
	spin_unlock(&rdi->n_cqs_lock);

	if (cq->ip) {
		spin_lock_irq(&rdi->pending_lock);
		list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps);
		spin_unlock_irq(&rdi->pending_lock);
	}

	/*
	 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
	 * The number of entries should be >= the number requested or return
	 * an error.
	 */
	cq->rdi = rdi;
	cq->ibcq.cqe = entries;
	cq->notify = RVT_CQ_NONE;
	spin_lock_init(&cq->lock);
	init_kthread_work(&cq->comptask, send_complete);
	cq->queue = wc;

	ret = &cq->ibcq;

	goto done;

bail_ip:
	kfree(cq->ip);
bail_wc:
	vfree(wc);
bail_cq:
	kfree(cq);
done:
	return ret;
}
Exemple #2
0
/**
 * rvt_create_qp - create a queue pair for a device
 * @ibpd: the protection domain who's device we create the queue pair for
 * @init_attr: the attributes of the queue pair
 * @udata: user data for libibverbs.so
 *
 * Queue pair creation is mostly an rvt issue. However, drivers have their own
 * unique idea of what queue pair numbers mean. For instance there is a reserved
 * range for PSM.
 *
 * Return: the queue pair on success, otherwise returns an errno.
 *
 * Called by the ib_create_qp() core verbs function.
 */
struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
			    struct ib_qp_init_attr *init_attr,
			    struct ib_udata *udata)
{
	struct rvt_qp *qp;
	int err;
	struct rvt_swqe *swq = NULL;
	size_t sz;
	size_t sg_list_sz;
	struct ib_qp *ret = ERR_PTR(-ENOMEM);
	struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
	void *priv = NULL;
	gfp_t gfp;

	if (!rdi)
		return ERR_PTR(-EINVAL);

	if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge ||
	    init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
	    init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
		return ERR_PTR(-EINVAL);

	/* GFP_NOIO is applicable to RC QP's only */

	if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
	    init_attr->qp_type != IB_QPT_RC)
		return ERR_PTR(-EINVAL);

	gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
						GFP_NOIO : GFP_KERNEL;

	/* Check receive queue parameters if no SRQ is specified. */
	if (!init_attr->srq) {
		if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge ||
		    init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
			return ERR_PTR(-EINVAL);

		if (init_attr->cap.max_send_sge +
		    init_attr->cap.max_send_wr +
		    init_attr->cap.max_recv_sge +
		    init_attr->cap.max_recv_wr == 0)
			return ERR_PTR(-EINVAL);
	}

	switch (init_attr->qp_type) {
	case IB_QPT_SMI:
	case IB_QPT_GSI:
		if (init_attr->port_num == 0 ||
		    init_attr->port_num > ibpd->device->phys_port_cnt)
			return ERR_PTR(-EINVAL);
	case IB_QPT_UC:
	case IB_QPT_RC:
	case IB_QPT_UD:
		sz = sizeof(struct rvt_sge) *
			init_attr->cap.max_send_sge +
			sizeof(struct rvt_swqe);
		if (gfp == GFP_NOIO)
			swq = __vmalloc(
				(init_attr->cap.max_send_wr + 1) * sz,
				gfp, PAGE_KERNEL);
		else
			swq = vmalloc_node(
				(init_attr->cap.max_send_wr + 1) * sz,
				rdi->dparms.node);
		if (!swq)
			return ERR_PTR(-ENOMEM);

		sz = sizeof(*qp);
		sg_list_sz = 0;
		if (init_attr->srq) {
			struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq);

			if (srq->rq.max_sge > 1)
				sg_list_sz = sizeof(*qp->r_sg_list) *
					(srq->rq.max_sge - 1);
		} else if (init_attr->cap.max_recv_sge > 1)
			sg_list_sz = sizeof(*qp->r_sg_list) *
				(init_attr->cap.max_recv_sge - 1);
		qp = kzalloc_node(sz + sg_list_sz, gfp, rdi->dparms.node);
		if (!qp)
			goto bail_swq;

		RCU_INIT_POINTER(qp->next, NULL);

		/*
		 * Driver needs to set up it's private QP structure and do any
		 * initialization that is needed.
		 */
		priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp);
		if (!priv)
			goto bail_qp;
		qp->priv = priv;
		qp->timeout_jiffies =
			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
				1000UL);
		if (init_attr->srq) {
			sz = 0;
		} else {
			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
				sizeof(struct rvt_rwqe);
			if (udata)
				qp->r_rq.wq = vmalloc_user(
						sizeof(struct rvt_rwq) +
						qp->r_rq.size * sz);
			else if (gfp == GFP_NOIO)
				qp->r_rq.wq = __vmalloc(
						sizeof(struct rvt_rwq) +
						qp->r_rq.size * sz,
						gfp, PAGE_KERNEL);
			else
				qp->r_rq.wq = vmalloc_node(
						sizeof(struct rvt_rwq) +
						qp->r_rq.size * sz,
						rdi->dparms.node);
			if (!qp->r_rq.wq)
				goto bail_driver_priv;
		}

		/*
		 * ib_create_qp() will initialize qp->ibqp
		 * except for qp->ibqp.qp_num.
		 */
		spin_lock_init(&qp->r_lock);
		spin_lock_init(&qp->s_hlock);
		spin_lock_init(&qp->s_lock);
		spin_lock_init(&qp->r_rq.lock);
		atomic_set(&qp->refcount, 0);
		init_waitqueue_head(&qp->wait);
		init_timer(&qp->s_timer);
		qp->s_timer.data = (unsigned long)qp;
		INIT_LIST_HEAD(&qp->rspwait);
		qp->state = IB_QPS_RESET;
		qp->s_wq = swq;
		qp->s_size = init_attr->cap.max_send_wr + 1;
		qp->s_avail = init_attr->cap.max_send_wr;
		qp->s_max_sge = init_attr->cap.max_send_sge;
		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
			qp->s_flags = RVT_S_SIGNAL_REQ_WR;

		err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
				init_attr->qp_type,
				init_attr->port_num, gfp);
		if (err < 0) {
			ret = ERR_PTR(err);
			goto bail_rq_wq;
		}
		qp->ibqp.qp_num = err;
		qp->port_num = init_attr->port_num;
		rvt_reset_qp(rdi, qp, init_attr->qp_type);
		break;

	default:
		/* Don't support raw QPs */
		return ERR_PTR(-EINVAL);
	}

	init_attr->cap.max_inline_data = 0;

	/*
	 * Return the address of the RWQ as the offset to mmap.
	 * See rvt_mmap() for details.
	 */
	if (udata && udata->outlen >= sizeof(__u64)) {
		if (!qp->r_rq.wq) {
			__u64 offset = 0;

			err = ib_copy_to_udata(udata, &offset,
					       sizeof(offset));
			if (err) {
				ret = ERR_PTR(err);
				goto bail_qpn;
			}
		} else {
			u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz;

			qp->ip = rvt_create_mmap_info(rdi, s,
						      ibpd->uobject->context,
						      qp->r_rq.wq);
			if (!qp->ip) {
				ret = ERR_PTR(-ENOMEM);
				goto bail_qpn;
			}

			err = ib_copy_to_udata(udata, &qp->ip->offset,
					       sizeof(qp->ip->offset));
			if (err) {
				ret = ERR_PTR(err);
				goto bail_ip;
			}
		}
		qp->pid = current->pid;
	}

	spin_lock(&rdi->n_qps_lock);
	if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) {
		spin_unlock(&rdi->n_qps_lock);
		ret = ERR_PTR(-ENOMEM);
		goto bail_ip;
	}

	rdi->n_qps_allocated++;
	/*
	 * Maintain a busy_jiffies variable that will be added to the timeout
	 * period in mod_retry_timer and add_retry_timer. This busy jiffies
	 * is scaled by the number of rc qps created for the device to reduce
	 * the number of timeouts occurring when there is a large number of
	 * qps. busy_jiffies is incremented every rc qp scaling interval.
	 * The scaling interval is selected based on extensive performance
	 * evaluation of targeted workloads.
	 */
	if (init_attr->qp_type == IB_QPT_RC) {
		rdi->n_rc_qps++;
		rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
	}
	spin_unlock(&rdi->n_qps_lock);

	if (qp->ip) {
		spin_lock_irq(&rdi->pending_lock);
		list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps);
		spin_unlock_irq(&rdi->pending_lock);
	}

	ret = &qp->ibqp;

	/*
	 * We have our QP and its good, now keep track of what types of opcodes
	 * can be processed on this QP. We do this by keeping track of what the
	 * 3 high order bits of the opcode are.
	 */
	switch (init_attr->qp_type) {
	case IB_QPT_SMI:
	case IB_QPT_GSI:
	case IB_QPT_UD:
		qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK;
		break;
	case IB_QPT_RC:
		qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK;
		break;
	case IB_QPT_UC:
		qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK;
		break;
	default:
		ret = ERR_PTR(-EINVAL);
		goto bail_ip;
	}

	return ret;

bail_ip:
	kref_put(&qp->ip->ref, rvt_release_mmap_info);

bail_qpn:
	free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);

bail_rq_wq:
	vfree(qp->r_rq.wq);

bail_driver_priv:
	rdi->driver_f.qp_priv_free(rdi, qp);

bail_qp:
	kfree(qp);

bail_swq:
	vfree(swq);

	return ret;
}
Exemple #3
0
/**
 * rvt_create_srq - create a shared receive queue
 * @ibpd: the protection domain of the SRQ to create
 * @srq_init_attr: the attributes of the SRQ
 * @udata: data from libibverbs when creating a user SRQ
 *
 * Return: Allocated srq object
 */
struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
			      struct ib_srq_init_attr *srq_init_attr,
			      struct ib_udata *udata)
{
	struct rvt_dev_info *dev = ib_to_rvt(ibpd->device);
	struct rvt_ucontext *ucontext = rdma_udata_to_drv_context(
		udata, struct rvt_ucontext, ibucontext);
	struct rvt_srq *srq;
	u32 sz;
	struct ib_srq *ret;

	if (srq_init_attr->srq_type != IB_SRQT_BASIC)
		return ERR_PTR(-EOPNOTSUPP);

	if (srq_init_attr->attr.max_sge == 0 ||
	    srq_init_attr->attr.max_sge > dev->dparms.props.max_srq_sge ||
	    srq_init_attr->attr.max_wr == 0 ||
	    srq_init_attr->attr.max_wr > dev->dparms.props.max_srq_wr)
		return ERR_PTR(-EINVAL);

	srq = kzalloc_node(sizeof(*srq), GFP_KERNEL, dev->dparms.node);
	if (!srq)
		return ERR_PTR(-ENOMEM);

	/*
	 * Need to use vmalloc() if we want to support large #s of entries.
	 */
	srq->rq.size = srq_init_attr->attr.max_wr + 1;
	srq->rq.max_sge = srq_init_attr->attr.max_sge;
	sz = sizeof(struct ib_sge) * srq->rq.max_sge +
		sizeof(struct rvt_rwqe);
	srq->rq.wq = udata ?
		vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz) :
		vzalloc_node(sizeof(struct rvt_rwq) + srq->rq.size * sz,
			     dev->dparms.node);
	if (!srq->rq.wq) {
		ret = ERR_PTR(-ENOMEM);
		goto bail_srq;
	}

	/*
	 * Return the address of the RWQ as the offset to mmap.
	 * See rvt_mmap() for details.
	 */
	if (udata && udata->outlen >= sizeof(__u64)) {
		int err;
		u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz;

		srq->ip =
		    rvt_create_mmap_info(dev, s, &ucontext->ibucontext,
					 srq->rq.wq);
		if (!srq->ip) {
			ret = ERR_PTR(-ENOMEM);
			goto bail_wq;
		}

		err = ib_copy_to_udata(udata, &srq->ip->offset,
				       sizeof(srq->ip->offset));
		if (err) {
			ret = ERR_PTR(err);
			goto bail_ip;
		}
	}

	/*
	 * ib_create_srq() will initialize srq->ibsrq.
	 */
	spin_lock_init(&srq->rq.lock);
	srq->limit = srq_init_attr->attr.srq_limit;

	spin_lock(&dev->n_srqs_lock);
	if (dev->n_srqs_allocated == dev->dparms.props.max_srq) {
		spin_unlock(&dev->n_srqs_lock);
		ret = ERR_PTR(-ENOMEM);
		goto bail_ip;
	}

	dev->n_srqs_allocated++;
	spin_unlock(&dev->n_srqs_lock);

	if (srq->ip) {
		spin_lock_irq(&dev->pending_lock);
		list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
		spin_unlock_irq(&dev->pending_lock);
	}

	return &srq->ibsrq;

bail_ip:
	kfree(srq->ip);
bail_wq:
	vfree(srq->rq.wq);
bail_srq:
	kfree(srq);
	return ret;
}