/** * rvt_post_receive - post a receive on a QP * @ibqp: the QP to post the receive on * @wr: the WR to post * @bad_wr: the first bad WR is put here * * This may be called from interrupt context. * * Return: 0 on success otherwise errno */ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_rwq *wq = qp->r_rq.wq; unsigned long flags; int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) && !qp->ibqp.srq; /* Check that state is OK to post receive. */ if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) { *bad_wr = wr; return -EINVAL; } for (; wr; wr = wr->next) { struct rvt_rwqe *wqe; u32 next; int i; if ((unsigned)wr->num_sge > qp->r_rq.max_sge) { *bad_wr = wr; return -EINVAL; } spin_lock_irqsave(&qp->r_rq.lock, flags); next = wq->head + 1; if (next >= qp->r_rq.size) next = 0; if (next == wq->tail) { spin_unlock_irqrestore(&qp->r_rq.lock, flags); *bad_wr = wr; return -ENOMEM; } if (unlikely(qp_err_flush)) { struct ib_wc wc; memset(&wc, 0, sizeof(wc)); wc.qp = &qp->ibqp; wc.opcode = IB_WC_RECV; wc.wr_id = wr->wr_id; wc.status = IB_WC_WR_FLUSH_ERR; rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); } else { wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head); wqe->wr_id = wr->wr_id; wqe->num_sge = wr->num_sge; for (i = 0; i < wr->num_sge; i++) wqe->sg_list[i] = wr->sg_list[i]; /* * Make sure queue entry is written * before the head index. */ smp_wmb(); wq->head = next; } spin_unlock_irqrestore(&qp->r_rq.lock, flags); } return 0; }
/** * rvt_post_srq_receive - post a receive on a shared receive queue * @ibsrq: the SRQ to post the receive on * @wr: the list of work requests to post * @bad_wr: A pointer to the first WR to cause a problem is put here * * This may be called from interrupt context. * * Return: 0 on success else errno */ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); struct rvt_rwq *wq; unsigned long flags; for (; wr; wr = wr->next) { struct rvt_rwqe *wqe; u32 next; int i; if ((unsigned)wr->num_sge > srq->rq.max_sge) { *bad_wr = wr; return -EINVAL; } spin_lock_irqsave(&srq->rq.lock, flags); wq = srq->rq.wq; next = wq->head + 1; if (next >= srq->rq.size) next = 0; if (next == wq->tail) { spin_unlock_irqrestore(&srq->rq.lock, flags); *bad_wr = wr; return -ENOMEM; } wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head); wqe->wr_id = wr->wr_id; wqe->num_sge = wr->num_sge; for (i = 0; i < wr->num_sge; i++) wqe->sg_list[i] = wr->sg_list[i]; /* Make sure queue entry is written before the head index. */ smp_wmb(); wq->head = next; spin_unlock_irqrestore(&srq->rq.lock, flags); } return 0; }
/** * qib_get_rwqe - copy the next RWQE into the QP's RWQE * @qp: the QP * @wr_id_only: update qp->r_wr_id only, not qp->r_sge * * Return -1 if there is a local error, 0 if no RWQE is available, * otherwise return 1. * * Can be called from interrupt level. */ int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only) { unsigned long flags; struct rvt_rq *rq; struct rvt_rwq *wq; struct rvt_srq *srq; struct rvt_rwqe *wqe; void (*handler)(struct ib_event *, void *); u32 tail; int ret; if (qp->ibqp.srq) { srq = ibsrq_to_rvtsrq(qp->ibqp.srq); handler = srq->ibsrq.event_handler; rq = &srq->rq; } else { srq = NULL; handler = NULL; rq = &qp->r_rq; } spin_lock_irqsave(&rq->lock, flags); if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { ret = 0; goto unlock; } wq = rq->wq; tail = wq->tail; /* Validate tail before using it since it is user writable. */ if (tail >= rq->size) tail = 0; if (unlikely(tail == wq->head)) { ret = 0; goto unlock; } /* Make sure entry is read after head index is read. */ smp_rmb(); wqe = rvt_get_rwqe_ptr(rq, tail); /* * Even though we update the tail index in memory, the verbs * consumer is not supposed to post more entries until a * completion is generated. */ if (++tail >= rq->size) tail = 0; wq->tail = tail; if (!wr_id_only && !qib_init_sge(qp, wqe)) { ret = -1; goto unlock; } qp->r_wr_id = wqe->wr_id; ret = 1; set_bit(RVT_R_WRID_VALID, &qp->r_aflags); if (handler) { u32 n; /* * Validate head pointer value and compute * the number of remaining WQEs. */ n = wq->head; if (n >= rq->size) n = 0; if (n < tail) n += rq->size - tail; else n -= tail; if (n < srq->limit) { struct ib_event ev; srq->limit = 0; spin_unlock_irqrestore(&rq->lock, flags); ev.device = qp->ibqp.device; ev.element.srq = qp->ibqp.srq; ev.event = IB_EVENT_SRQ_LIMIT_REACHED; handler(&ev, srq->ibsrq.srq_context); goto bail; } } unlock: spin_unlock_irqrestore(&rq->lock, flags); bail: return ret; }
/** * rvt_error_qp - put a QP into the error state * @qp: the QP to put into the error state * @err: the receive completion error to signal if a RWQE is active * * Flushes both send and receive work queues. * * Return: true if last WQE event should be generated. * The QP r_lock and s_lock should be held and interrupts disabled. * If we are already in error state, just return. */ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) { struct ib_wc wc; int ret = 0; struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) goto bail; qp->state = IB_QPS_ERR; if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); del_timer(&qp->s_timer); } if (qp->s_flags & RVT_S_ANY_WAIT_SEND) qp->s_flags &= ~RVT_S_ANY_WAIT_SEND; rdi->driver_f.notify_error_qp(qp); /* Schedule the sending tasklet to drain the send work queue. */ if (ACCESS_ONCE(qp->s_last) != qp->s_head) rdi->driver_f.schedule_send(qp); rvt_clear_mr_refs(qp, 0); memset(&wc, 0, sizeof(wc)); wc.qp = &qp->ibqp; wc.opcode = IB_WC_RECV; if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) { wc.wr_id = qp->r_wr_id; wc.status = err; rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); } wc.status = IB_WC_WR_FLUSH_ERR; if (qp->r_rq.wq) { struct rvt_rwq *wq; u32 head; u32 tail; spin_lock(&qp->r_rq.lock); /* sanity check pointers before trusting them */ wq = qp->r_rq.wq; head = wq->head; if (head >= qp->r_rq.size) head = 0; tail = wq->tail; if (tail >= qp->r_rq.size) tail = 0; while (tail != head) { wc.wr_id = rvt_get_rwqe_ptr(&qp->r_rq, tail)->wr_id; if (++tail >= qp->r_rq.size) tail = 0; rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); } wq->tail = tail; spin_unlock(&qp->r_rq.lock); } else if (qp->ibqp.event_handler) { ret = 1; } bail: return ret; }
/** * rvt_modify_srq - modify a shared receive queue * @ibsrq: the SRQ to modify * @attr: the new attributes of the SRQ * @attr_mask: indicates which attributes to modify * @udata: user data for libibverbs.so * * Return: 0 on success */ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device); struct rvt_rwq *wq; int ret = 0; if (attr_mask & IB_SRQ_MAX_WR) { struct rvt_rwq *owq; struct rvt_rwqe *p; u32 sz, size, n, head, tail; /* Check that the requested sizes are below the limits. */ if ((attr->max_wr > dev->dparms.props.max_srq_wr) || ((attr_mask & IB_SRQ_LIMIT) ? attr->srq_limit : srq->limit) > attr->max_wr) return -EINVAL; sz = sizeof(struct rvt_rwqe) + srq->rq.max_sge * sizeof(struct ib_sge); size = attr->max_wr + 1; wq = udata ? vmalloc_user(sizeof(struct rvt_rwq) + size * sz) : vzalloc_node(sizeof(struct rvt_rwq) + size * sz, dev->dparms.node); if (!wq) return -ENOMEM; /* Check that we can write the offset to mmap. */ if (udata && udata->inlen >= sizeof(__u64)) { __u64 offset_addr; __u64 offset = 0; ret = ib_copy_from_udata(&offset_addr, udata, sizeof(offset_addr)); if (ret) goto bail_free; udata->outbuf = (void __user *) (unsigned long)offset_addr; ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); if (ret) goto bail_free; } spin_lock_irq(&srq->rq.lock); /* * validate head and tail pointer values and compute * the number of remaining WQEs. */ owq = srq->rq.wq; head = owq->head; tail = owq->tail; if (head >= srq->rq.size || tail >= srq->rq.size) { ret = -EINVAL; goto bail_unlock; } n = head; if (n < tail) n += srq->rq.size - tail; else n -= tail; if (size <= n) { ret = -EINVAL; goto bail_unlock; } n = 0; p = wq->wq; while (tail != head) { struct rvt_rwqe *wqe; int i; wqe = rvt_get_rwqe_ptr(&srq->rq, tail); p->wr_id = wqe->wr_id; p->num_sge = wqe->num_sge; for (i = 0; i < wqe->num_sge; i++) p->sg_list[i] = wqe->sg_list[i]; n++; p = (struct rvt_rwqe *)((char *)p + sz); if (++tail >= srq->rq.size) tail = 0; } srq->rq.wq = wq; srq->rq.size = size; wq->head = n; wq->tail = 0; if (attr_mask & IB_SRQ_LIMIT) srq->limit = attr->srq_limit; spin_unlock_irq(&srq->rq.lock); vfree(owq); if (srq->ip) { struct rvt_mmap_info *ip = srq->ip; struct rvt_dev_info *dev = ib_to_rvt(srq->ibsrq.device); u32 s = sizeof(struct rvt_rwq) + size * sz; rvt_update_mmap_info(dev, ip, s, wq); /* * Return the offset to mmap. * See rvt_mmap() for details. */ if (udata && udata->inlen >= sizeof(__u64)) { ret = ib_copy_to_udata(udata, &ip->offset, sizeof(ip->offset)); if (ret) return ret; } /* * Put user mapping info onto the pending list * unless it already is on the list. */ spin_lock_irq(&dev->pending_lock); if (list_empty(&ip->pending_mmaps)) list_add(&ip->pending_mmaps, &dev->pending_mmaps); spin_unlock_irq(&dev->pending_lock); } } else if (attr_mask & IB_SRQ_LIMIT) { spin_lock_irq(&srq->rq.lock); if (attr->srq_limit >= srq->rq.size) ret = -EINVAL; else srq->limit = attr->srq_limit; spin_unlock_irq(&srq->rq.lock); } return ret; bail_unlock: spin_unlock_irq(&srq->rq.lock); bail_free: vfree(wq); return ret; }