static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp, enum ib_qp_type type) { unsigned long flags; int ret; ret = alloc_qpn(qpt, type); if (ret < 0) goto bail; qp->ibqp.qp_num = ret; /* */ spin_lock_irqsave(&qpt->lock, flags); ret %= qpt->max; qp->next = qpt->table[ret]; qpt->table[ret] = qp; atomic_inc(&qp->refcount); spin_unlock_irqrestore(&qpt->lock, flags); ret = 0; bail: return ret; }
/** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for * @init_attr: the attributes of the queue pair * @udata: user data for libibverbs.so * * Queue pair creation is mostly an rvt issue. However, drivers have their own * unique idea of what queue pair numbers mean. For instance there is a reserved * range for PSM. * * Return: the queue pair on success, otherwise returns an errno. * * Called by the ib_create_qp() core verbs function. */ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct rvt_qp *qp; int err; struct rvt_swqe *swq = NULL; size_t sz; size_t sg_list_sz; struct ib_qp *ret = ERR_PTR(-ENOMEM); struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); void *priv = NULL; gfp_t gfp; if (!rdi) return ERR_PTR(-EINVAL); if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge || init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO)) return ERR_PTR(-EINVAL); /* GFP_NOIO is applicable to RC QP's only */ if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO && init_attr->qp_type != IB_QPT_RC) return ERR_PTR(-EINVAL); gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ? GFP_NOIO : GFP_KERNEL; /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge || init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr) return ERR_PTR(-EINVAL); if (init_attr->cap.max_send_sge + init_attr->cap.max_send_wr + init_attr->cap.max_recv_sge + init_attr->cap.max_recv_wr == 0) return ERR_PTR(-EINVAL); } switch (init_attr->qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: if (init_attr->port_num == 0 || init_attr->port_num > ibpd->device->phys_port_cnt) return ERR_PTR(-EINVAL); case IB_QPT_UC: case IB_QPT_RC: case IB_QPT_UD: sz = sizeof(struct rvt_sge) * init_attr->cap.max_send_sge + sizeof(struct rvt_swqe); if (gfp == GFP_NOIO) swq = __vmalloc( (init_attr->cap.max_send_wr + 1) * sz, gfp, PAGE_KERNEL); else swq = vmalloc_node( (init_attr->cap.max_send_wr + 1) * sz, rdi->dparms.node); if (!swq) return ERR_PTR(-ENOMEM); sz = sizeof(*qp); sg_list_sz = 0; if (init_attr->srq) { struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq); if (srq->rq.max_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (srq->rq.max_sge - 1); } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); qp = kzalloc_node(sz + sg_list_sz, gfp, rdi->dparms.node); if (!qp) goto bail_swq; RCU_INIT_POINTER(qp->next, NULL); /* * Driver needs to set up it's private QP structure and do any * initialization that is needed. */ priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp); if (!priv) goto bail_qp; qp->priv = priv; qp->timeout_jiffies = usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL); if (init_attr->srq) { sz = 0; } else { qp->r_rq.size = init_attr->cap.max_recv_wr + 1; qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct rvt_rwqe); if (udata) qp->r_rq.wq = vmalloc_user( sizeof(struct rvt_rwq) + qp->r_rq.size * sz); else if (gfp == GFP_NOIO) qp->r_rq.wq = __vmalloc( sizeof(struct rvt_rwq) + qp->r_rq.size * sz, gfp, PAGE_KERNEL); else qp->r_rq.wq = vmalloc_node( sizeof(struct rvt_rwq) + qp->r_rq.size * sz, rdi->dparms.node); if (!qp->r_rq.wq) goto bail_driver_priv; } /* * ib_create_qp() will initialize qp->ibqp * except for qp->ibqp.qp_num. */ spin_lock_init(&qp->r_lock); spin_lock_init(&qp->s_hlock); spin_lock_init(&qp->s_lock); spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); init_waitqueue_head(&qp->wait); init_timer(&qp->s_timer); qp->s_timer.data = (unsigned long)qp; INIT_LIST_HEAD(&qp->rspwait); qp->state = IB_QPS_RESET; qp->s_wq = swq; qp->s_size = init_attr->cap.max_send_wr + 1; qp->s_avail = init_attr->cap.max_send_wr; qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) qp->s_flags = RVT_S_SIGNAL_REQ_WR; err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, init_attr->qp_type, init_attr->port_num, gfp); if (err < 0) { ret = ERR_PTR(err); goto bail_rq_wq; } qp->ibqp.qp_num = err; qp->port_num = init_attr->port_num; rvt_reset_qp(rdi, qp, init_attr->qp_type); break; default: /* Don't support raw QPs */ return ERR_PTR(-EINVAL); } init_attr->cap.max_inline_data = 0; /* * Return the address of the RWQ as the offset to mmap. * See rvt_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { if (!qp->r_rq.wq) { __u64 offset = 0; err = ib_copy_to_udata(udata, &offset, sizeof(offset)); if (err) { ret = ERR_PTR(err); goto bail_qpn; } } else { u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; qp->ip = rvt_create_mmap_info(rdi, s, ibpd->uobject->context, qp->r_rq.wq); if (!qp->ip) { ret = ERR_PTR(-ENOMEM); goto bail_qpn; } err = ib_copy_to_udata(udata, &qp->ip->offset, sizeof(qp->ip->offset)); if (err) { ret = ERR_PTR(err); goto bail_ip; } } qp->pid = current->pid; } spin_lock(&rdi->n_qps_lock); if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) { spin_unlock(&rdi->n_qps_lock); ret = ERR_PTR(-ENOMEM); goto bail_ip; } rdi->n_qps_allocated++; /* * Maintain a busy_jiffies variable that will be added to the timeout * period in mod_retry_timer and add_retry_timer. This busy jiffies * is scaled by the number of rc qps created for the device to reduce * the number of timeouts occurring when there is a large number of * qps. busy_jiffies is incremented every rc qp scaling interval. * The scaling interval is selected based on extensive performance * evaluation of targeted workloads. */ if (init_attr->qp_type == IB_QPT_RC) { rdi->n_rc_qps++; rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL; } spin_unlock(&rdi->n_qps_lock); if (qp->ip) { spin_lock_irq(&rdi->pending_lock); list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps); spin_unlock_irq(&rdi->pending_lock); } ret = &qp->ibqp; /* * We have our QP and its good, now keep track of what types of opcodes * can be processed on this QP. We do this by keeping track of what the * 3 high order bits of the opcode are. */ switch (init_attr->qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK; break; case IB_QPT_RC: qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK; break; case IB_QPT_UC: qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK; break; default: ret = ERR_PTR(-EINVAL); goto bail_ip; } return ret; bail_ip: kref_put(&qp->ip->ref, rvt_release_mmap_info); bail_qpn: free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); bail_rq_wq: vfree(qp->r_rq.wq); bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); bail_qp: kfree(qp); bail_swq: vfree(swq); return ret; }
/** * hfi1_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for * @init_attr: the attributes of the queue pair * @udata: user data for libibverbs.so * * Returns the queue pair on success, otherwise returns an errno. * * Called by the ib_create_qp() core verbs function. */ struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct hfi1_qp *qp; int err; struct hfi1_swqe *swq = NULL; struct hfi1_ibdev *dev; struct hfi1_devdata *dd; size_t sz; size_t sg_list_sz; struct ib_qp *ret; if (init_attr->cap.max_send_sge > hfi1_max_sges || init_attr->cap.max_send_wr > hfi1_max_qp_wrs || init_attr->create_flags) { ret = ERR_PTR(-EINVAL); goto bail; } /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { if (init_attr->cap.max_recv_sge > hfi1_max_sges || init_attr->cap.max_recv_wr > hfi1_max_qp_wrs) { ret = ERR_PTR(-EINVAL); goto bail; } if (init_attr->cap.max_send_sge + init_attr->cap.max_send_wr + init_attr->cap.max_recv_sge + init_attr->cap.max_recv_wr == 0) { ret = ERR_PTR(-EINVAL); goto bail; } } switch (init_attr->qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: if (init_attr->port_num == 0 || init_attr->port_num > ibpd->device->phys_port_cnt) { ret = ERR_PTR(-EINVAL); goto bail; } case IB_QPT_UC: case IB_QPT_RC: case IB_QPT_UD: sz = sizeof(struct hfi1_sge) * init_attr->cap.max_send_sge + sizeof(struct hfi1_swqe); swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); if (swq == NULL) { ret = ERR_PTR(-ENOMEM); goto bail; } sz = sizeof(*qp); sg_list_sz = 0; if (init_attr->srq) { struct hfi1_srq *srq = to_isrq(init_attr->srq); if (srq->rq.max_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (srq->rq.max_sge - 1); } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); if (!qp) { ret = ERR_PTR(-ENOMEM); goto bail_swq; } RCU_INIT_POINTER(qp->next, NULL); qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL); if (!qp->s_hdr) { ret = ERR_PTR(-ENOMEM); goto bail_qp; } qp->timeout_jiffies = usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL); if (init_attr->srq) sz = 0; else { qp->r_rq.size = init_attr->cap.max_recv_wr + 1; qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct hfi1_rwqe); qp->r_rq.wq = vmalloc_user(sizeof(struct hfi1_rwq) + qp->r_rq.size * sz); if (!qp->r_rq.wq) { ret = ERR_PTR(-ENOMEM); goto bail_qp; } } /* * ib_create_qp() will initialize qp->ibqp * except for qp->ibqp.qp_num. */ spin_lock_init(&qp->r_lock); spin_lock_init(&qp->s_lock); spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); init_waitqueue_head(&qp->wait); init_timer(&qp->s_timer); qp->s_timer.data = (unsigned long)qp; INIT_LIST_HEAD(&qp->rspwait); qp->state = IB_QPS_RESET; qp->s_wq = swq; qp->s_size = init_attr->cap.max_send_wr + 1; qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) qp->s_flags = HFI1_S_SIGNAL_REQ_WR; dev = to_idev(ibpd->device); dd = dd_from_dev(dev); err = alloc_qpn(dd, &dev->qp_dev->qpn_table, init_attr->qp_type, init_attr->port_num); if (err < 0) { ret = ERR_PTR(err); vfree(qp->r_rq.wq); goto bail_qp; } qp->ibqp.qp_num = err; qp->port_num = init_attr->port_num; reset_qp(qp, init_attr->qp_type); break; default: /* Don't support raw QPs */ ret = ERR_PTR(-ENOSYS); goto bail; } init_attr->cap.max_inline_data = 0; /* * Return the address of the RWQ as the offset to mmap. * See hfi1_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { if (!qp->r_rq.wq) { __u64 offset = 0; err = ib_copy_to_udata(udata, &offset, sizeof(offset)); if (err) { ret = ERR_PTR(err); goto bail_ip; } } else { u32 s = sizeof(struct hfi1_rwq) + qp->r_rq.size * sz; qp->ip = hfi1_create_mmap_info(dev, s, ibpd->uobject->context, qp->r_rq.wq); if (!qp->ip) { ret = ERR_PTR(-ENOMEM); goto bail_ip; } err = ib_copy_to_udata(udata, &(qp->ip->offset), sizeof(qp->ip->offset)); if (err) { ret = ERR_PTR(err); goto bail_ip; } } } spin_lock(&dev->n_qps_lock); if (dev->n_qps_allocated == hfi1_max_qps) { spin_unlock(&dev->n_qps_lock); ret = ERR_PTR(-ENOMEM); goto bail_ip; } dev->n_qps_allocated++; spin_unlock(&dev->n_qps_lock); if (qp->ip) { spin_lock_irq(&dev->pending_lock); list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps); spin_unlock_irq(&dev->pending_lock); } ret = &qp->ibqp; /* * We have our QP and its good, now keep track of what types of opcodes * can be processed on this QP. We do this by keeping track of what the * 3 high order bits of the opcode are. */ switch (init_attr->qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & OPCODE_QP_MASK; break; case IB_QPT_RC: qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & OPCODE_QP_MASK; break; case IB_QPT_UC: qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & OPCODE_QP_MASK; break; default: ret = ERR_PTR(-EINVAL); goto bail_ip; } goto bail; bail_ip: if (qp->ip) kref_put(&qp->ip->ref, hfi1_release_mmap_info); else vfree(qp->r_rq.wq); free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num); bail_qp: kfree(qp->s_hdr); kfree(qp); bail_swq: vfree(swq); bail: return ret; }