void ipath_do_send(unsigned long data) { struct ipath_qp *qp = (struct ipath_qp *)data; struct ipath_ibdev *dev = to_idev(qp->ibqp.device); int (*make_req)(struct ipath_qp *qp); unsigned long flags; if ((qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) && qp->remote_ah_attr.dlid == dev->dd->ipath_lid) { ipath_ruc_loopback(qp); goto bail; } if (qp->ibqp.qp_type == IB_QPT_RC) make_req = ipath_make_rc_req; else if (qp->ibqp.qp_type == IB_QPT_UC) make_req = ipath_make_uc_req; else make_req = ipath_make_ud_req; spin_lock_irqsave(&qp->s_lock, flags); if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) { spin_unlock_irqrestore(&qp->s_lock, flags); goto bail; } qp->s_flags |= IPATH_S_BUSY; spin_unlock_irqrestore(&qp->s_lock, flags); again: if (qp->s_hdrwords != 0) { if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, qp->s_cur_sge, qp->s_cur_size)) { if (ipath_no_bufs_available(qp, dev)) goto bail; } dev->n_unicast_xmit++; qp->s_hdrwords = 0; } if (make_req(qp)) goto again; bail:; }
int ipath_destroy_qp(struct ib_qp *ibqp) { struct ipath_qp *qp = to_iqp(ibqp); struct ipath_ibdev *dev = to_idev(ibqp->device); /* */ spin_lock_irq(&qp->s_lock); if (qp->state != IB_QPS_RESET) { qp->state = IB_QPS_RESET; spin_lock(&dev->pending_lock); if (!list_empty(&qp->timerwait)) list_del_init(&qp->timerwait); if (!list_empty(&qp->piowait)) list_del_init(&qp->piowait); spin_unlock(&dev->pending_lock); qp->s_flags &= ~IPATH_S_ANY_WAIT; spin_unlock_irq(&qp->s_lock); /* */ tasklet_kill(&qp->s_task); wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); } else spin_unlock_irq(&qp->s_lock); ipath_free_qp(&dev->qp_table, qp); if (qp->s_tx) { atomic_dec(&qp->refcount); if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) kfree(qp->s_tx->txreq.map_addr); spin_lock_irq(&dev->pending_lock); list_add(&qp->s_tx->txreq.list, &dev->txreq_free); spin_unlock_irq(&dev->pending_lock); qp->s_tx = NULL; } wait_event(qp->wait, !atomic_read(&qp->refcount)); /* */ free_qpn(&dev->qp_table, qp->ibqp.qp_num); spin_lock(&dev->n_qps_lock); dev->n_qps_allocated--; spin_unlock(&dev->n_qps_lock); if (qp->ip) kref_put(&qp->ip->ref, ipath_release_mmap_info); else vfree(qp->r_rq.wq); kfree(qp->r_ud_sg_list); vfree(qp->s_wq); kfree(qp); return 0; }
static void flush_iowait(struct hfi1_qp *qp) { struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); unsigned long flags; write_seqlock_irqsave(&dev->iowait_lock, flags); if (!list_empty(&qp->s_iowait.list)) { list_del_init(&qp->s_iowait.list); if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); } write_sequnlock_irqrestore(&dev->iowait_lock, flags); }
/** * ipath_poll_cq - poll for work completion entries * @ibcq: the completion queue to poll * @num_entries: the maximum number of entries to return * @entry: pointer to array where work completions are placed * * Returns the number of completion entries polled. * * This may be called from interrupt context. Also called by ib_poll_cq() * in the generic verbs code. */ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) { struct ipath_cq *cq = to_icq(ibcq); struct ipath_cq_wc *wc; unsigned long flags; int npolled; u32 tail; spin_lock_irqsave(&cq->lock, flags); wc = cq->queue; tail = wc->tail; if (tail > (u32) cq->ibcq.cqe) tail = (u32) cq->ibcq.cqe; for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { struct ipath_qp *qp; if (tail == wc->head) break; qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table, wc->queue[tail].qp_num); entry->qp = &qp->ibqp; if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); entry->wr_id = wc->queue[tail].wr_id; entry->status = wc->queue[tail].status; entry->opcode = wc->queue[tail].opcode; entry->vendor_err = wc->queue[tail].vendor_err; entry->byte_len = wc->queue[tail].byte_len; entry->imm_data = wc->queue[tail].imm_data; entry->src_qp = wc->queue[tail].src_qp; entry->wc_flags = wc->queue[tail].wc_flags; entry->pkey_index = wc->queue[tail].pkey_index; entry->slid = wc->queue[tail].slid; entry->sl = wc->queue[tail].sl; entry->dlid_path_bits = wc->queue[tail].dlid_path_bits; entry->port_num = wc->queue[tail].port_num; if (tail >= cq->ibcq.cqe) tail = 0; else tail++; } wc->tail = tail; spin_unlock_irqrestore(&cq->lock, flags); return npolled; }
/* * Validate a RWQE and fill in the SGE state. * Return 1 if OK. */ static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) { int i, j, ret; struct ib_wc wc; struct rvt_lkey_table *rkt; struct rvt_pd *pd; struct rvt_sge_state *ss; rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table; pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); ss = &qp->r_sge; ss->sg_list = qp->r_sg_list; qp->r_len = 0; for (i = j = 0; i < wqe->num_sge; i++) { if (wqe->sg_list[i].length == 0) continue; /* Check LKEY */ ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE); if (unlikely(ret <= 0)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; j++; } ss->num_sge = j; ss->total_len = qp->r_len; ret = 1; goto bail; bad_lkey: while (j) { struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; rvt_put_mr(sge->mr); } ss->num_sge = 0; memset(&wc, 0, sizeof(wc)); wc.wr_id = wqe->wr_id; wc.status = IB_WC_LOC_PROT_ERR; wc.opcode = IB_WC_RECV; wc.qp = &qp->ibqp; /* Signal solicited completion event. */ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); ret = 0; bail: return ret; }
/** * qib_destroy_srq - destroy a shared receive queue * @ibsrq: the SRQ to destroy */ int qib_destroy_srq(struct ib_srq *ibsrq) { struct qib_srq *srq = to_isrq(ibsrq); struct qib_ibdev *dev = to_idev(ibsrq->device); spin_lock(&dev->n_srqs_lock); dev->n_srqs_allocated--; spin_unlock(&dev->n_srqs_lock); if (srq->ip) kref_put(&srq->ip->ref, qib_release_mmap_info); else vfree(srq->rq.wq); kfree(srq); return 0; }
/* * Validate a RWQE and fill in the SGE state. * Return 1 if OK. */ static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe) { int i, j, ret; struct ib_wc wc; struct qib_lkey_table *rkt; struct qib_pd *pd; struct qib_sge_state *ss; rkt = &to_idev(qp->ibqp.device)->lk_table; pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); ss = &qp->r_sge; ss->sg_list = qp->r_sg_list; qp->r_len = 0; for (i = j = 0; i < wqe->num_sge; i++) { if (wqe->sg_list[i].length == 0) continue; /* Check LKEY */ if (!qib_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; j++; } ss->num_sge = j; ss->total_len = qp->r_len; ret = 1; goto bail; bad_lkey: while (j) { struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; atomic_dec(&sge->mr->refcount); } ss->num_sge = 0; memset(&wc, 0, sizeof(wc)); wc.wr_id = wqe->wr_id; wc.status = IB_WC_LOC_PROT_ERR; wc.opcode = IB_WC_RECV; wc.qp = &qp->ibqp; /* Signal solicited completion event. */ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); ret = 0; bail: return ret; }
/** * qib_destroy_cq - destroy a completion queue * @ibcq: the completion queue to destroy. * * Returns 0 for success. * * Called by ib_destroy_cq() in the generic verbs code. */ int qib_destroy_cq(struct ib_cq *ibcq) { struct qib_ibdev *dev = to_idev(ibcq->device); struct qib_cq *cq = to_icq(ibcq); flush_work(&cq->comptask); spin_lock(&dev->n_cqs_lock); dev->n_cqs_allocated--; spin_unlock(&dev->n_cqs_lock); if (cq->ip) kref_put(&cq->ip->ref, qib_release_mmap_info); else vfree(cq->queue); kfree(cq); return 0; }
static void qp_pio_drain(struct rvt_qp *qp) { struct hfi1_ibdev *dev; struct hfi1_qp_priv *priv = qp->priv; if (!priv->s_sendcontext) return; dev = to_idev(qp->ibqp.device); while (iowait_pio_pending(&priv->s_iowait)) { write_seqlock_irq(&dev->iowait_lock); hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1); write_sequnlock_irq(&dev->iowait_lock); iowait_pio_drain(&priv->s_iowait); write_seqlock_irq(&dev->iowait_lock); hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0); write_sequnlock_irq(&dev->iowait_lock); } }
/** * hfi1_destroy_qp - destroy a queue pair * @ibqp: the queue pair to destroy * * Returns 0 on success. * * Note that this can be called while the QP is actively sending or * receiving! */ int hfi1_destroy_qp(struct ib_qp *ibqp) { struct hfi1_qp *qp = to_iqp(ibqp); struct hfi1_ibdev *dev = to_idev(ibqp->device); /* Make sure HW and driver activity is stopped. */ spin_lock_irq(&qp->r_lock); spin_lock(&qp->s_lock); if (qp->state != IB_QPS_RESET) { qp->state = IB_QPS_RESET; flush_iowait(qp); qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT); spin_unlock(&qp->s_lock); spin_unlock_irq(&qp->r_lock); cancel_work_sync(&qp->s_iowait.iowork); del_timer_sync(&qp->s_timer); iowait_sdma_drain(&qp->s_iowait); flush_tx_list(qp); remove_qp(dev, qp); wait_event(qp->wait, !atomic_read(&qp->refcount)); spin_lock_irq(&qp->r_lock); spin_lock(&qp->s_lock); clear_mr_refs(qp, 1); clear_ahg(qp); } spin_unlock(&qp->s_lock); spin_unlock_irq(&qp->r_lock); /* all user's cleaned up, mark it available */ free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num); spin_lock(&dev->n_qps_lock); dev->n_qps_allocated--; spin_unlock(&dev->n_qps_lock); if (qp->ip) kref_put(&qp->ip->ref, hfi1_release_mmap_info); else vfree(qp->r_rq.wq); vfree(qp->s_wq); kfree(qp->s_hdr); kfree(qp); return 0; }
/** * ipath_destroy_qp - destroy a queue pair * @ibqp: the queue pair to destroy * * Returns 0 on success. * * Note that this can be called while the QP is actively sending or * receiving! */ int ipath_destroy_qp(struct ib_qp *ibqp) { struct ipath_qp *qp = to_iqp(ibqp); struct ipath_ibdev *dev = to_idev(ibqp->device); unsigned long flags; spin_lock_irqsave(&qp->s_lock, flags); qp->state = IB_QPS_ERR; spin_unlock_irqrestore(&qp->s_lock, flags); spin_lock(&dev->n_qps_lock); dev->n_qps_allocated--; spin_unlock(&dev->n_qps_lock); /* Stop the sending tasklet. */ tasklet_kill(&qp->s_task); /* Make sure the QP isn't on the timeout list. */ spin_lock_irqsave(&dev->pending_lock, flags); if (!list_empty(&qp->timerwait)) list_del_init(&qp->timerwait); if (!list_empty(&qp->piowait)) list_del_init(&qp->piowait); spin_unlock_irqrestore(&dev->pending_lock, flags); /* * Make sure that the QP is not in the QPN table so receive * interrupts will discard packets for this QP. XXX Also remove QP * from multicast table. */ if (atomic_read(&qp->refcount) != 0) ipath_free_qp(&dev->qp_table, qp); if (qp->ip) kref_put(&qp->ip->ref, ipath_release_mmap_info); else vfree(qp->r_rq.wq); vfree(qp->s_wq); kfree(qp); return 0; }
/** * qib_free_lkey - free an lkey * @mr: mr to free from tables */ void qib_free_lkey(struct qib_mregion *mr) { unsigned long flags; u32 lkey = mr->lkey; u32 r; struct qib_ibdev *dev = to_idev(mr->pd->device); struct qib_lkey_table *rkt = &dev->lk_table; spin_lock_irqsave(&rkt->lock, flags); if (!mr->lkey_published) goto out; if (lkey == 0) rcu_assign_pointer(dev->dma_mr, NULL); else { r = lkey >> (32 - ib_qib_lkey_table_size); rcu_assign_pointer(rkt->table[r], NULL); } qib_put_mr(mr); mr->lkey_published = 0; out: spin_unlock_irqrestore(&rkt->lock, flags); }
void hfi1_put_txreq(struct verbs_txreq *tx) { struct hfi1_ibdev *dev; struct rvt_qp *qp; unsigned long flags; unsigned int seq; struct hfi1_qp_priv *priv; qp = tx->qp; dev = to_idev(qp->ibqp.device); if (tx->mr) rvt_put_mr(tx->mr); sdma_txclean(dd_from_dev(dev), &tx->txreq); /* Free verbs_txreq and return to slab cache */ kmem_cache_free(dev->verbs_txreq_cache, tx); do { seq = read_seqbegin(&dev->iowait_lock); if (!list_empty(&dev->txwait)) { struct iowait *wait; write_seqlock_irqsave(&dev->iowait_lock, flags); wait = list_first_entry(&dev->txwait, struct iowait, list); qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); /* refcount held until actual wake up */ write_sequnlock_irqrestore(&dev->iowait_lock, flags); hfi1_qp_wakeup(qp, RVT_S_WAIT_TX); break; } } while (read_seqretry(&dev->iowait_lock, seq)); }
int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct ib_qp *ibqp = &qp->ibqp; struct hfi1_ibdev *dev = to_idev(ibqp->device); struct hfi1_devdata *dd = dd_from_dev(dev); u8 sc; if (attr_mask & IB_QP_AV) { sc = ah_to_sc(ibqp->device, &attr->ah_attr); if (sc == 0xf) return -EINVAL; if (!qp_to_sdma_engine(qp, sc) && dd->flags & HFI1_HAS_SEND_DMA) return -EINVAL; if (!qp_to_send_context(qp, sc)) return -EINVAL; } if (attr_mask & IB_QP_ALT_PATH) { sc = ah_to_sc(ibqp->device, &attr->alt_ah_attr); if (sc == 0xf) return -EINVAL; if (!qp_to_sdma_engine(qp, sc) && dd->flags & HFI1_HAS_SEND_DMA) return -EINVAL; if (!qp_to_send_context(qp, sc)) return -EINVAL; } return 0; }
/** * qib_resize_cq - change the size of the CQ * @ibcq: the completion queue * * Returns 0 for success. */ int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) { struct qib_cq *cq = to_icq(ibcq); struct qib_cq_wc *old_wc; struct qib_cq_wc *wc; u32 head, tail, n; int ret; u32 sz; if (cqe < 1 || cqe > ib_qib_max_cqes) { ret = -EINVAL; goto bail; } /* * Need to use vmalloc() if we want to support large #s of entries. */ sz = sizeof(*wc); if (udata && udata->outlen >= sizeof(__u64)) sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); else sz += sizeof(struct ib_wc) * (cqe + 1); wc = vmalloc_user(sz); if (!wc) { ret = -ENOMEM; goto bail; } /* Check that we can write the offset to mmap. */ if (udata && udata->outlen >= sizeof(__u64)) { __u64 offset = 0; ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); if (ret) goto bail_free; } spin_lock_irq(&cq->lock); /* * Make sure head and tail are sane since they * might be user writable. */ old_wc = cq->queue; head = old_wc->head; if (head > (u32) cq->ibcq.cqe) head = (u32) cq->ibcq.cqe; tail = old_wc->tail; if (tail > (u32) cq->ibcq.cqe) tail = (u32) cq->ibcq.cqe; if (head < tail) n = cq->ibcq.cqe + 1 + head - tail; else n = head - tail; if (unlikely((u32)cqe < n)) { ret = -EINVAL; goto bail_unlock; } for (n = 0; tail != head; n++) { if (cq->ip) wc->uqueue[n] = old_wc->uqueue[tail]; else wc->kqueue[n] = old_wc->kqueue[tail]; if (tail == (u32) cq->ibcq.cqe) tail = 0; else tail++; } cq->ibcq.cqe = cqe; wc->head = n; wc->tail = 0; cq->queue = wc; spin_unlock_irq(&cq->lock); vfree(old_wc); if (cq->ip) { struct qib_ibdev *dev = to_idev(ibcq->device); struct qib_mmap_info *ip = cq->ip; qib_update_mmap_info(dev, ip, sz, wc); /* * Return the offset to mmap. * See qib_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { ret = ib_copy_to_udata(udata, &ip->offset, sizeof(ip->offset)); if (ret) goto bail; } spin_lock_irq(&dev->pending_lock); if (list_empty(&ip->pending_mmaps)) list_add(&ip->pending_mmaps, &dev->pending_mmaps); spin_unlock_irq(&dev->pending_lock); } ret = 0; goto bail; bail_unlock: spin_unlock_irq(&cq->lock); bail_free: vfree(wc); bail: return ret; }
/** * qib_create_cq - create a completion queue * @ibdev: the device this completion queue is attached to * @entries: the minimum size of the completion queue * @context: unused by the QLogic_IB driver * @udata: user data for libibverbs.so * * Returns a pointer to the completion queue or negative errno values * for failure. * * Called by ib_create_cq() in the generic verbs code. */ struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, int comp_vector, struct ib_ucontext *context, struct ib_udata *udata) { struct qib_ibdev *dev = to_idev(ibdev); struct qib_cq *cq; struct qib_cq_wc *wc; struct ib_cq *ret; u32 sz; if (entries < 1 || entries > ib_qib_max_cqes) { ret = ERR_PTR(-EINVAL); goto done; } /* Allocate the completion queue structure. */ cq = kmalloc(sizeof(*cq), GFP_KERNEL); if (!cq) { ret = ERR_PTR(-ENOMEM); goto done; } /* * Allocate the completion queue entries and head/tail pointers. * This is allocated separately so that it can be resized and * also mapped into user space. * We need to use vmalloc() in order to support mmap and large * numbers of entries. */ sz = sizeof(*wc); if (udata && udata->outlen >= sizeof(__u64)) sz += sizeof(struct ib_uverbs_wc) * (entries + 1); else sz += sizeof(struct ib_wc) * (entries + 1); wc = vmalloc_user(sz); if (!wc) { ret = ERR_PTR(-ENOMEM); goto bail_cq; } /* * Return the address of the WC as the offset to mmap. * See qib_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { int err; cq->ip = qib_create_mmap_info(dev, sz, context, wc); if (!cq->ip) { ret = ERR_PTR(-ENOMEM); goto bail_wc; } err = ib_copy_to_udata(udata, &cq->ip->offset, sizeof(cq->ip->offset)); if (err) { ret = ERR_PTR(err); goto bail_ip; } } else cq->ip = NULL; spin_lock(&dev->n_cqs_lock); if (dev->n_cqs_allocated == ib_qib_max_cqs) { spin_unlock(&dev->n_cqs_lock); ret = ERR_PTR(-ENOMEM); goto bail_ip; } dev->n_cqs_allocated++; spin_unlock(&dev->n_cqs_lock); if (cq->ip) { spin_lock_irq(&dev->pending_lock); list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps); spin_unlock_irq(&dev->pending_lock); } /* * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. * The number of entries should be >= the number requested or return * an error. */ cq->ibcq.cqe = entries; cq->notify = IB_CQ_NONE; cq->triggered = 0; spin_lock_init(&cq->lock); INIT_WORK(&cq->comptask, send_complete); wc->head = 0; wc->tail = 0; cq->queue = wc; ret = &cq->ibcq; goto done; bail_ip: kfree(cq->ip); bail_wc: vfree(wc); bail_cq: kfree(cq); done: return ret; }
/** * hfi1_modify_qp - modify the attributes of a queue pair * @ibqp: the queue pair who's attributes we're modifying * @attr: the new attributes * @attr_mask: the mask of attributes to modify * @udata: user data for libibverbs.so * * Returns 0 on success, otherwise returns an errno. */ int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct hfi1_ibdev *dev = to_idev(ibqp->device); struct hfi1_qp *qp = to_iqp(ibqp); enum ib_qp_state cur_state, new_state; struct ib_event ev; int lastwqe = 0; int mig = 0; int ret; u32 pmtu = 0; /* for gcc warning only */ struct hfi1_devdata *dd; spin_lock_irq(&qp->r_lock); spin_lock(&qp->s_lock); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, IB_LINK_LAYER_UNSPECIFIED)) goto inval; if (attr_mask & IB_QP_AV) { if (attr->ah_attr.dlid >= HFI1_MULTICAST_LID_BASE) goto inval; if (hfi1_check_ah(qp->ibqp.device, &attr->ah_attr)) goto inval; } if (attr_mask & IB_QP_ALT_PATH) { if (attr->alt_ah_attr.dlid >= HFI1_MULTICAST_LID_BASE) goto inval; if (hfi1_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) goto inval; if (attr->alt_pkey_index >= hfi1_get_npkeys(dd_from_dev(dev))) goto inval; } if (attr_mask & IB_QP_PKEY_INDEX) if (attr->pkey_index >= hfi1_get_npkeys(dd_from_dev(dev))) goto inval; if (attr_mask & IB_QP_MIN_RNR_TIMER) if (attr->min_rnr_timer > 31) goto inval; if (attr_mask & IB_QP_PORT) if (qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI || attr->port_num == 0 || attr->port_num > ibqp->device->phys_port_cnt) goto inval; if (attr_mask & IB_QP_DEST_QPN) if (attr->dest_qp_num > HFI1_QPN_MASK) goto inval; if (attr_mask & IB_QP_RETRY_CNT) if (attr->retry_cnt > 7) goto inval; if (attr_mask & IB_QP_RNR_RETRY) if (attr->rnr_retry > 7) goto inval; /* * Don't allow invalid path_mtu values. OK to set greater * than the active mtu (or even the max_cap, if we have tuned * that to a small mtu. We'll set qp->path_mtu * to the lesser of requested attribute mtu and active, * for packetizing messages. * Note that the QP port has to be set in INIT and MTU in RTR. */ if (attr_mask & IB_QP_PATH_MTU) { int mtu, pidx = qp->port_num - 1; dd = dd_from_dev(dev); mtu = verbs_mtu_enum_to_int(ibqp->device, attr->path_mtu); if (mtu == -1) goto inval; if (mtu > dd->pport[pidx].ibmtu) pmtu = mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048); else pmtu = attr->path_mtu; } if (attr_mask & IB_QP_PATH_MIG_STATE) { if (attr->path_mig_state == IB_MIG_REARM) { if (qp->s_mig_state == IB_MIG_ARMED) goto inval; if (new_state != IB_QPS_RTS) goto inval; } else if (attr->path_mig_state == IB_MIG_MIGRATED) { if (qp->s_mig_state == IB_MIG_REARM) goto inval; if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD) goto inval; if (qp->s_mig_state == IB_MIG_ARMED) mig = 1; } else goto inval; } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) if (attr->max_dest_rd_atomic > HFI1_MAX_RDMA_ATOMIC) goto inval; switch (new_state) { case IB_QPS_RESET: if (qp->state != IB_QPS_RESET) { qp->state = IB_QPS_RESET; flush_iowait(qp); qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT); spin_unlock(&qp->s_lock); spin_unlock_irq(&qp->r_lock); /* Stop the sending work queue and retry timer */ cancel_work_sync(&qp->s_iowait.iowork); del_timer_sync(&qp->s_timer); iowait_sdma_drain(&qp->s_iowait); flush_tx_list(qp); remove_qp(dev, qp); wait_event(qp->wait, !atomic_read(&qp->refcount)); spin_lock_irq(&qp->r_lock); spin_lock(&qp->s_lock); clear_mr_refs(qp, 1); clear_ahg(qp); reset_qp(qp, ibqp->qp_type); } break; case IB_QPS_RTR: /* Allow event to re-trigger if QP set to RTR more than once */ qp->r_flags &= ~HFI1_R_COMM_EST; qp->state = new_state; break; case IB_QPS_SQD: qp->s_draining = qp->s_last != qp->s_cur; qp->state = new_state; break; case IB_QPS_SQE: if (qp->ibqp.qp_type == IB_QPT_RC) goto inval; qp->state = new_state; break; case IB_QPS_ERR: lastwqe = hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR); break; default: qp->state = new_state; break; } if (attr_mask & IB_QP_PKEY_INDEX) qp->s_pkey_index = attr->pkey_index; if (attr_mask & IB_QP_PORT) qp->port_num = attr->port_num; if (attr_mask & IB_QP_DEST_QPN) qp->remote_qpn = attr->dest_qp_num; if (attr_mask & IB_QP_SQ_PSN) { qp->s_next_psn = attr->sq_psn & PSN_MODIFY_MASK; qp->s_psn = qp->s_next_psn; qp->s_sending_psn = qp->s_next_psn; qp->s_last_psn = qp->s_next_psn - 1; qp->s_sending_hpsn = qp->s_last_psn; } if (attr_mask & IB_QP_RQ_PSN) qp->r_psn = attr->rq_psn & PSN_MODIFY_MASK; if (attr_mask & IB_QP_ACCESS_FLAGS) qp->qp_access_flags = attr->qp_access_flags; if (attr_mask & IB_QP_AV) { qp->remote_ah_attr = attr->ah_attr; qp->s_srate = attr->ah_attr.static_rate; qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); } if (attr_mask & IB_QP_ALT_PATH) { qp->alt_ah_attr = attr->alt_ah_attr; qp->s_alt_pkey_index = attr->alt_pkey_index; } if (attr_mask & IB_QP_PATH_MIG_STATE) { qp->s_mig_state = attr->path_mig_state; if (mig) { qp->remote_ah_attr = qp->alt_ah_attr; qp->port_num = qp->alt_ah_attr.port_num; qp->s_pkey_index = qp->s_alt_pkey_index; qp->s_flags |= HFI1_S_AHG_CLEAR; } } if (attr_mask & IB_QP_PATH_MTU) { struct hfi1_ibport *ibp; u8 sc, vl; u32 mtu; dd = dd_from_dev(dev); ibp = &dd->pport[qp->port_num - 1].ibport_data; sc = ibp->sl_to_sc[qp->remote_ah_attr.sl]; vl = sc_to_vlt(dd, sc); mtu = verbs_mtu_enum_to_int(ibqp->device, pmtu); if (vl < PER_VL_SEND_CONTEXTS) mtu = min_t(u32, mtu, dd->vld[vl].mtu); pmtu = mtu_to_enum(mtu, OPA_MTU_8192); qp->path_mtu = pmtu; qp->pmtu = mtu; } if (attr_mask & IB_QP_RETRY_CNT) { qp->s_retry_cnt = attr->retry_cnt; qp->s_retry = attr->retry_cnt; } if (attr_mask & IB_QP_RNR_RETRY) { qp->s_rnr_retry_cnt = attr->rnr_retry; qp->s_rnr_retry = attr->rnr_retry; } if (attr_mask & IB_QP_MIN_RNR_TIMER) qp->r_min_rnr_timer = attr->min_rnr_timer; if (attr_mask & IB_QP_TIMEOUT) { qp->timeout = attr->timeout; qp->timeout_jiffies = usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL); } if (attr_mask & IB_QP_QKEY) qp->qkey = attr->qkey; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) qp->r_max_rd_atomic = attr->max_dest_rd_atomic; if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) qp->s_max_rd_atomic = attr->max_rd_atomic; spin_unlock(&qp->s_lock); spin_unlock_irq(&qp->r_lock); if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) insert_qp(dev, qp); if (lastwqe) { ev.device = qp->ibqp.device; ev.element.qp = &qp->ibqp; ev.event = IB_EVENT_QP_LAST_WQE_REACHED; qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); } if (mig) { ev.device = qp->ibqp.device; ev.element.qp = &qp->ibqp; ev.event = IB_EVENT_PATH_MIG; qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); } ret = 0; goto bail; inval: spin_unlock(&qp->s_lock); spin_unlock_irq(&qp->r_lock); ret = -EINVAL; bail: return ret; }
/** * qib_modify_srq - modify a shared receive queue * @ibsrq: the SRQ to modify * @attr: the new attributes of the SRQ * @attr_mask: indicates which attributes to modify * @udata: user data for libibverbs.so */ int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) { struct qib_srq *srq = to_isrq(ibsrq); struct qib_rwq *wq; int ret = 0; if (attr_mask & IB_SRQ_MAX_WR) { struct qib_rwq *owq; struct qib_rwqe *p; u32 sz, size, n, head, tail; /* Check that the requested sizes are below the limits. */ if ((attr->max_wr > ib_qib_max_srq_wrs) || ((attr_mask & IB_SRQ_LIMIT) ? attr->srq_limit : srq->limit) > attr->max_wr) { ret = -EINVAL; goto bail; } sz = sizeof(struct qib_rwqe) + srq->rq.max_sge * sizeof(struct ib_sge); size = attr->max_wr + 1; wq = vmalloc_user(sizeof(struct qib_rwq) + size * sz); if (!wq) { ret = -ENOMEM; goto bail; } /* Check that we can write the offset to mmap. */ if (udata && udata->inlen >= sizeof(__u64)) { __u64 offset_addr; __u64 offset = 0; ret = ib_copy_from_udata(&offset_addr, udata, sizeof(offset_addr)); if (ret) goto bail_free; udata->outbuf = (void __user *) (unsigned long) offset_addr; ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); if (ret) goto bail_free; } spin_lock_irq(&srq->rq.lock); /* * validate head and tail pointer values and compute * the number of remaining WQEs. */ owq = srq->rq.wq; head = owq->head; tail = owq->tail; if (head >= srq->rq.size || tail >= srq->rq.size) { ret = -EINVAL; goto bail_unlock; } n = head; if (n < tail) n += srq->rq.size - tail; else n -= tail; if (size <= n) { ret = -EINVAL; goto bail_unlock; } n = 0; p = wq->wq; while (tail != head) { struct qib_rwqe *wqe; int i; wqe = get_rwqe_ptr(&srq->rq, tail); p->wr_id = wqe->wr_id; p->num_sge = wqe->num_sge; for (i = 0; i < wqe->num_sge; i++) p->sg_list[i] = wqe->sg_list[i]; n++; p = (struct qib_rwqe *)((char *) p + sz); if (++tail >= srq->rq.size) tail = 0; } srq->rq.wq = wq; srq->rq.size = size; wq->head = n; wq->tail = 0; if (attr_mask & IB_SRQ_LIMIT) srq->limit = attr->srq_limit; spin_unlock_irq(&srq->rq.lock); vfree(owq); if (srq->ip) { struct qib_mmap_info *ip = srq->ip; struct qib_ibdev *dev = to_idev(srq->ibsrq.device); u32 s = sizeof(struct qib_rwq) + size * sz; qib_update_mmap_info(dev, ip, s, wq); /* * Return the offset to mmap. * See qib_mmap() for details. */ if (udata && udata->inlen >= sizeof(__u64)) { ret = ib_copy_to_udata(udata, &ip->offset, sizeof(ip->offset)); if (ret) goto bail; } /* * Put user mapping info onto the pending list * unless it already is on the list. */ spin_lock_irq(&dev->pending_lock); if (list_empty(&ip->pending_mmaps)) list_add(&ip->pending_mmaps, &dev->pending_mmaps); spin_unlock_irq(&dev->pending_lock); } } else if (attr_mask & IB_SRQ_LIMIT) { spin_lock_irq(&srq->rq.lock); if (attr->srq_limit >= srq->rq.size) ret = -EINVAL; else srq->limit = attr->srq_limit; spin_unlock_irq(&srq->rq.lock); } goto bail; bail_unlock: spin_unlock_irq(&srq->rq.lock); bail_free: vfree(wq); bail: return ret; }
static void ipath_ruc_loopback(struct ipath_qp *sqp) { struct ipath_ibdev *dev = to_idev(sqp->ibqp.device); struct ipath_qp *qp; struct ipath_swqe *wqe; struct ipath_sge *sge; unsigned long flags; struct ib_wc wc; u64 sdata; atomic64_t *maddr; enum ib_wc_status send_status; qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); spin_lock_irqsave(&sqp->s_lock, flags); if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) goto unlock; sqp->s_flags |= IPATH_S_BUSY; again: if (sqp->s_last == sqp->s_head) goto clr_busy; wqe = get_swqe_ptr(sqp, sqp->s_last); if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND)) goto clr_busy; send_status = IB_WC_WR_FLUSH_ERR; goto flush_send; } if (sqp->s_last == sqp->s_cur) { if (++sqp->s_cur >= sqp->s_size) sqp->s_cur = 0; } spin_unlock_irqrestore(&sqp->s_lock, flags); if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { dev->n_pkt_drops++; if (sqp->ibqp.qp_type == IB_QPT_RC) send_status = IB_WC_RETRY_EXC_ERR; else send_status = IB_WC_SUCCESS; goto serr; } memset(&wc, 0, sizeof wc); send_status = IB_WC_SUCCESS; sqp->s_sge.sge = wqe->sg_list[0]; sqp->s_sge.sg_list = wqe->sg_list + 1; sqp->s_sge.num_sge = wqe->wr.num_sge; sqp->s_len = wqe->length; switch (wqe->wr.opcode) { case IB_WR_SEND_WITH_IMM: wc.wc_flags = IB_WC_WITH_IMM; wc.ex.imm_data = wqe->wr.ex.imm_data; case IB_WR_SEND: if (!ipath_get_rwqe(qp, 0)) goto rnr_nak; break; case IB_WR_RDMA_WRITE_WITH_IMM: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) goto inv_err; wc.wc_flags = IB_WC_WITH_IMM; wc.ex.imm_data = wqe->wr.ex.imm_data; if (!ipath_get_rwqe(qp, 1)) goto rnr_nak; case IB_WR_RDMA_WRITE: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) goto inv_err; if (wqe->length == 0) break; if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_WRITE))) goto acc_err; break; case IB_WR_RDMA_READ: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) goto inv_err; if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_READ))) goto acc_err; qp->r_sge.sge = wqe->sg_list[0]; qp->r_sge.sg_list = wqe->sg_list + 1; qp->r_sge.num_sge = wqe->wr.num_sge; break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto inv_err; if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), wqe->wr.wr.atomic.remote_addr, wqe->wr.wr.atomic.rkey, IB_ACCESS_REMOTE_ATOMIC))) goto acc_err; maddr = (atomic64_t *) qp->r_sge.sge.vaddr; sdata = wqe->wr.wr.atomic.compare_add; *(u64 *) sqp->s_sge.sge.vaddr = (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, sdata, wqe->wr.wr.atomic.swap); goto send_comp; default: send_status = IB_WC_LOC_QP_OP_ERR; goto serr; } sge = &sqp->s_sge.sge; while (sqp->s_len) { u32 len = sqp->s_len; if (len > sge->length) len = sge->length; if (len > sge->sge_length) len = sge->sge_length; BUG_ON(len == 0); ipath_copy_sge(&qp->r_sge, sge->vaddr, len); sge->vaddr += len; sge->length -= len; sge->sge_length -= len; if (sge->sge_length == 0) { if (--sqp->s_sge.num_sge) *sge = *sqp->s_sge.sg_list++; } else if (sge->length == 0 && sge->mr != NULL) { if (++sge->n >= IPATH_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; sge->n = 0; } sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; sge->length = sge->mr->map[sge->m]->segs[sge->n].length; } sqp->s_len -= len; } if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) goto send_comp; if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; else wc.opcode = IB_WC_RECV; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.byte_len = wqe->length; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; wc.port_num = 1; ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, wqe->wr.send_flags & IB_SEND_SOLICITED); send_comp: spin_lock_irqsave(&sqp->s_lock, flags); flush_send: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; ipath_send_complete(sqp, wqe, send_status); goto again; rnr_nak: if (qp->ibqp.qp_type == IB_QPT_UC) goto send_comp; if (sqp->s_rnr_retry == 0) { send_status = IB_WC_RNR_RETRY_EXC_ERR; goto serr; } if (sqp->s_rnr_retry_cnt < 7) sqp->s_rnr_retry--; spin_lock_irqsave(&sqp->s_lock, flags); if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK)) goto clr_busy; sqp->s_flags |= IPATH_S_WAITING; dev->n_rnr_naks++; sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer]; ipath_insert_rnr_queue(sqp); goto clr_busy; inv_err: send_status = IB_WC_REM_INV_REQ_ERR; wc.status = IB_WC_LOC_QP_OP_ERR; goto err; acc_err: send_status = IB_WC_REM_ACCESS_ERR; wc.status = IB_WC_LOC_PROT_ERR; err: ipath_rc_error(qp, wc.status); serr: spin_lock_irqsave(&sqp->s_lock, flags); ipath_send_complete(sqp, wqe, send_status); if (sqp->ibqp.qp_type == IB_QPT_RC) { int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR); sqp->s_flags &= ~IPATH_S_BUSY; spin_unlock_irqrestore(&sqp->s_lock, flags); if (lastwqe) { struct ib_event ev; ev.device = sqp->ibqp.device; ev.element.qp = &sqp->ibqp; ev.event = IB_EVENT_QP_LAST_WQE_REACHED; sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); } goto done; } clr_busy: sqp->s_flags &= ~IPATH_S_BUSY; unlock: spin_unlock_irqrestore(&sqp->s_lock, flags); done: if (qp && atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); }
int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) { unsigned long flags; u32 r; u32 n; int ret = 0; struct qib_ibdev *dev = to_idev(mr->pd->device); struct qib_lkey_table *rkt = &dev->lk_table; spin_lock_irqsave(&rkt->lock, flags); /* special case for dma_mr lkey == 0 */ if (dma_region) { struct qib_mregion *tmr; tmr = rcu_dereference(dev->dma_mr); if (!tmr) { qib_get_mr(mr); rcu_assign_pointer(dev->dma_mr, mr); mr->lkey_published = 1; } goto success; } /* Find the next available LKEY */ r = rkt->next; n = r; for (;;) { if (rkt->table[r] == NULL) break; r = (r + 1) & (rkt->max - 1); if (r == n) { qib_dbg("LKEY table full\n"); goto bail; } } rkt->next = (r + 1) & (rkt->max - 1); /* * Make sure lkey is never zero which is reserved to indicate an * unrestricted LKEY. */ rkt->gen++; mr->lkey = (r << (32 - ib_qib_lkey_table_size)) | ((((1 << (24 - ib_qib_lkey_table_size)) - 1) & rkt->gen) << 8); if (mr->lkey == 0) { mr->lkey |= 1 << 8; rkt->gen++; } qib_get_mr(mr); rcu_assign_pointer(rkt->table[r], mr); mr->lkey_published = 1; success: spin_unlock_irqrestore(&rkt->lock, flags); out: return ret; bail: spin_unlock_irqrestore(&rkt->lock, flags); ret = -ENOMEM; goto out; }
/** * qib_create_srq - create a shared receive queue * @ibpd: the protection domain of the SRQ to create * @srq_init_attr: the attributes of the SRQ * @udata: data from libibverbs when creating a user SRQ */ struct ib_srq *qib_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata) { struct qib_ibdev *dev = to_idev(ibpd->device); struct qib_srq *srq; u32 sz; struct ib_srq *ret; if (srq_init_attr->attr.max_sge == 0 || srq_init_attr->attr.max_sge > ib_qib_max_srq_sges || srq_init_attr->attr.max_wr == 0 || srq_init_attr->attr.max_wr > ib_qib_max_srq_wrs) { ret = ERR_PTR(-EINVAL); goto done; } srq = kmalloc(sizeof(*srq), GFP_KERNEL); if (!srq) { ret = ERR_PTR(-ENOMEM); goto done; } /* * Need to use vmalloc() if we want to support large #s of entries. */ srq->rq.size = srq_init_attr->attr.max_wr + 1; srq->rq.max_sge = srq_init_attr->attr.max_sge; sz = sizeof(struct ib_sge) * srq->rq.max_sge + sizeof(struct qib_rwqe); srq->rq.wq = vmalloc_user(sizeof(struct qib_rwq) + srq->rq.size * sz); if (!srq->rq.wq) { ret = ERR_PTR(-ENOMEM); goto bail_srq; } /* * Return the address of the RWQ as the offset to mmap. * See qib_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { int err; u32 s = sizeof(struct qib_rwq) + srq->rq.size * sz; srq->ip = qib_create_mmap_info(dev, s, ibpd->uobject->context, srq->rq.wq); if (!srq->ip) { ret = ERR_PTR(-ENOMEM); goto bail_wq; } err = ib_copy_to_udata(udata, &srq->ip->offset, sizeof(srq->ip->offset)); if (err) { ret = ERR_PTR(err); goto bail_ip; } } else srq->ip = NULL; /* * ib_create_srq() will initialize srq->ibsrq. */ spin_lock_init(&srq->rq.lock); srq->rq.wq->head = 0; srq->rq.wq->tail = 0; srq->limit = srq_init_attr->attr.srq_limit; spin_lock(&dev->n_srqs_lock); if (dev->n_srqs_allocated == ib_qib_max_srqs) { spin_unlock(&dev->n_srqs_lock); ret = ERR_PTR(-ENOMEM); goto bail_ip; } dev->n_srqs_allocated++; spin_unlock(&dev->n_srqs_lock); if (srq->ip) { spin_lock_irq(&dev->pending_lock); list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps); spin_unlock_irq(&dev->pending_lock); } ret = &srq->ibsrq; goto done; bail_ip: kfree(srq->ip); bail_wq: vfree(srq->rq.wq); bail_srq: kfree(srq); done: return ret; }
/** * ipath_ruc_loopback - handle UC and RC lookback requests * @sqp: the loopback QP * @wc: the work completion entry * * This is called from ipath_do_uc_send() or ipath_do_rc_send() to * forward a WQE addressed to the same HCA. * Note that although we are single threaded due to the tasklet, we still * have to protect against post_send(). We don't have to worry about * receive interrupts since this is a connected protocol and all packets * will pass through here. */ void ipath_ruc_loopback(struct ipath_qp *sqp, struct ib_wc *wc) { struct ipath_ibdev *dev = to_idev(sqp->ibqp.device); struct ipath_qp *qp; struct ipath_swqe *wqe; struct ipath_sge *sge; unsigned long flags; u64 sdata; qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); if (!qp) { dev->n_pkt_drops++; return; } again: spin_lock_irqsave(&sqp->s_lock, flags); if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK)) { spin_unlock_irqrestore(&sqp->s_lock, flags); goto done; } /* Get the next send request. */ if (sqp->s_last == sqp->s_head) { /* Send work queue is empty. */ spin_unlock_irqrestore(&sqp->s_lock, flags); goto done; } /* * We can rely on the entry not changing without the s_lock * being held until we update s_last. */ wqe = get_swqe_ptr(sqp, sqp->s_last); spin_unlock_irqrestore(&sqp->s_lock, flags); wc->wc_flags = 0; wc->imm_data = 0; sqp->s_sge.sge = wqe->sg_list[0]; sqp->s_sge.sg_list = wqe->sg_list + 1; sqp->s_sge.num_sge = wqe->wr.num_sge; sqp->s_len = wqe->length; switch (wqe->wr.opcode) { case IB_WR_SEND_WITH_IMM: wc->wc_flags = IB_WC_WITH_IMM; wc->imm_data = wqe->wr.imm_data; /* FALLTHROUGH */ case IB_WR_SEND: spin_lock_irqsave(&qp->r_rq.lock, flags); if (!ipath_get_rwqe(qp, 0)) { rnr_nak: spin_unlock_irqrestore(&qp->r_rq.lock, flags); /* Handle RNR NAK */ if (qp->ibqp.qp_type == IB_QPT_UC) goto send_comp; if (sqp->s_rnr_retry == 0) { wc->status = IB_WC_RNR_RETRY_EXC_ERR; goto err; } if (sqp->s_rnr_retry_cnt < 7) sqp->s_rnr_retry--; dev->n_rnr_naks++; sqp->s_rnr_timeout = ib_ipath_rnr_table[sqp->s_min_rnr_timer]; ipath_insert_rnr_queue(sqp); goto done; } spin_unlock_irqrestore(&qp->r_rq.lock, flags); break; case IB_WR_RDMA_WRITE_WITH_IMM: wc->wc_flags = IB_WC_WITH_IMM; wc->imm_data = wqe->wr.imm_data; spin_lock_irqsave(&qp->r_rq.lock, flags); if (!ipath_get_rwqe(qp, 1)) goto rnr_nak; spin_unlock_irqrestore(&qp->r_rq.lock, flags); /* FALLTHROUGH */ case IB_WR_RDMA_WRITE: if (wqe->length == 0) break; if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_WRITE))) { acc_err: wc->status = IB_WC_REM_ACCESS_ERR; err: wc->wr_id = wqe->wr.wr_id; wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; wc->vendor_err = 0; wc->byte_len = 0; wc->qp_num = sqp->ibqp.qp_num; wc->src_qp = sqp->remote_qpn; wc->pkey_index = 0; wc->slid = sqp->remote_ah_attr.dlid; wc->sl = sqp->remote_ah_attr.sl; wc->dlid_path_bits = 0; wc->port_num = 0; ipath_sqerror_qp(sqp, wc); goto done; } break; case IB_WR_RDMA_READ: if (unlikely(!ipath_rkey_ok(dev, &sqp->s_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_READ))) goto acc_err; if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) goto acc_err; qp->r_sge.sge = wqe->sg_list[0]; qp->r_sge.sg_list = wqe->sg_list + 1; qp->r_sge.num_sge = wqe->wr.num_sge; break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, sizeof(u64), wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_ATOMIC))) goto acc_err; /* Perform atomic OP and save result. */ sdata = wqe->wr.wr.atomic.swap; spin_lock_irqsave(&dev->pending_lock, flags); qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) *(u64 *) qp->r_sge.sge.vaddr = qp->r_atomic_data + sdata; else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add) *(u64 *) qp->r_sge.sge.vaddr = sdata; spin_unlock_irqrestore(&dev->pending_lock, flags); *(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data; goto send_comp; default: goto done; } sge = &sqp->s_sge.sge; while (sqp->s_len) { u32 len = sqp->s_len; if (len > sge->length) len = sge->length; BUG_ON(len == 0); ipath_copy_sge(&qp->r_sge, sge->vaddr, len); sge->vaddr += len; sge->length -= len; sge->sge_length -= len; if (sge->sge_length == 0) { if (--sqp->s_sge.num_sge) *sge = *sqp->s_sge.sg_list++; } else if (sge->length == 0 && sge->mr != NULL) { if (++sge->n >= IPATH_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; sge->n = 0; } sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; sge->length = sge->mr->map[sge->m]->segs[sge->n].length; } sqp->s_len -= len; } if (wqe->wr.opcode == IB_WR_RDMA_WRITE || wqe->wr.opcode == IB_WR_RDMA_READ) goto send_comp; if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; else wc->opcode = IB_WC_RECV; wc->wr_id = qp->r_wr_id; wc->status = IB_WC_SUCCESS; wc->vendor_err = 0; wc->byte_len = wqe->length; wc->qp_num = qp->ibqp.qp_num; wc->src_qp = qp->remote_qpn; /* XXX do we know which pkey matched? Only needed for GSI. */ wc->pkey_index = 0; wc->slid = qp->remote_ah_attr.dlid; wc->sl = qp->remote_ah_attr.sl; wc->dlid_path_bits = 0; /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc, wqe->wr.send_flags & IB_SEND_SOLICITED); send_comp: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { wc->wr_id = wqe->wr.wr_id; wc->status = IB_WC_SUCCESS; wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; wc->vendor_err = 0; wc->byte_len = wqe->length; wc->qp_num = sqp->ibqp.qp_num; wc->src_qp = 0; wc->pkey_index = 0; wc->slid = 0; wc->sl = 0; wc->dlid_path_bits = 0; wc->port_num = 0; ipath_cq_enter(to_icq(sqp->ibqp.send_cq), wc, 0); } /* Update s_last now that we are finished with the SWQE */ spin_lock_irqsave(&sqp->s_lock, flags); if (++sqp->s_last >= sqp->s_size) sqp->s_last = 0; spin_unlock_irqrestore(&sqp->s_lock, flags); goto again; done: if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); }
/** * ipath_do_ruc_send - perform a send on an RC or UC QP * @data: contains a pointer to the QP * * Process entries in the send work queue until credit or queue is * exhausted. Only allow one CPU to send a packet per QP (tasklet). * Otherwise, after we drop the QP s_lock, two threads could send * packets out of order. */ void ipath_do_ruc_send(unsigned long data) { struct ipath_qp *qp = (struct ipath_qp *)data; struct ipath_ibdev *dev = to_idev(qp->ibqp.device); unsigned long flags; u16 lrh0; u32 nwords; u32 extra_bytes; u32 bth0; u32 bth2; u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); struct ipath_other_headers *ohdr; if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy)) goto bail; if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) { ipath_ruc_loopback(qp); goto clear; } ohdr = &qp->s_hdr.u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) ohdr = &qp->s_hdr.u.l.oth; again: /* Check for a constructed packet to be sent. */ if (qp->s_hdrwords != 0) { /* * If no PIO bufs are available, return. An interrupt will * call ipath_ib_piobufavail() when one is available. */ if (ipath_verbs_send(dev->dd, qp->s_hdrwords, (u32 *) &qp->s_hdr, qp->s_cur_size, qp->s_cur_sge)) { ipath_no_bufs_available(qp, dev); goto bail; } dev->n_unicast_xmit++; /* Record that we sent the packet and s_hdr is empty. */ qp->s_hdrwords = 0; } /* * The lock is needed to synchronize between setting * qp->s_ack_state, resend timer, and post_send(). */ spin_lock_irqsave(&qp->s_lock, flags); if (!((qp->ibqp.qp_type == IB_QPT_RC) ? ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) : ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) { /* * Clear the busy bit before unlocking to avoid races with * adding new work queue items and then failing to process * them. */ clear_bit(IPATH_S_BUSY, &qp->s_busy); spin_unlock_irqrestore(&qp->s_lock, flags); goto bail; } spin_unlock_irqrestore(&qp->s_lock, flags); /* Construct the header. */ extra_bytes = (4 - qp->s_cur_size) & 3; nwords = (qp->s_cur_size + extra_bytes) >> 2; lrh0 = IPATH_LRH_BTH; if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh, &qp->remote_ah_attr.grh, qp->s_hdrwords, nwords); lrh0 = IPATH_LRH_GRH; } lrh0 |= qp->remote_ah_attr.sl << 4; qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid); bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index); bth0 |= extra_bytes << 20; ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[2] = cpu_to_be32(bth2); /* Check for more work to do. */ goto again; clear: clear_bit(IPATH_S_BUSY, &qp->s_busy); bail: return; }
int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct ipath_ibdev *dev = to_idev(ibqp->device); struct ipath_qp *qp = to_iqp(ibqp); enum ib_qp_state cur_state, new_state; int lastwqe = 0; int ret; spin_lock_irq(&qp->s_lock); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) goto inval; if (attr_mask & IB_QP_AV) { if (attr->ah_attr.dlid == 0 || attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE) goto inval; if ((attr->ah_attr.ah_flags & IB_AH_GRH) && (attr->ah_attr.grh.sgid_index > 1)) goto inval; } if (attr_mask & IB_QP_PKEY_INDEX) if (attr->pkey_index >= ipath_get_npkeys(dev->dd)) goto inval; if (attr_mask & IB_QP_MIN_RNR_TIMER) if (attr->min_rnr_timer > 31) goto inval; if (attr_mask & IB_QP_PORT) if (attr->port_num == 0 || attr->port_num > ibqp->device->phys_port_cnt) goto inval; /* */ if ((attr_mask & IB_QP_PATH_MTU) && (ib_mtu_enum_to_int(attr->path_mtu) == -1 || (attr->path_mtu > IB_MTU_2048 && !ipath_mtu4096))) goto inval; if (attr_mask & IB_QP_PATH_MIG_STATE) if (attr->path_mig_state != IB_MIG_MIGRATED && attr->path_mig_state != IB_MIG_REARM) goto inval; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC) goto inval; switch (new_state) { case IB_QPS_RESET: if (qp->state != IB_QPS_RESET) { qp->state = IB_QPS_RESET; spin_lock(&dev->pending_lock); if (!list_empty(&qp->timerwait)) list_del_init(&qp->timerwait); if (!list_empty(&qp->piowait)) list_del_init(&qp->piowait); spin_unlock(&dev->pending_lock); qp->s_flags &= ~IPATH_S_ANY_WAIT; spin_unlock_irq(&qp->s_lock); /* */ tasklet_kill(&qp->s_task); wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); spin_lock_irq(&qp->s_lock); } ipath_reset_qp(qp, ibqp->qp_type); break; case IB_QPS_SQD: qp->s_draining = qp->s_last != qp->s_cur; qp->state = new_state; break; case IB_QPS_SQE: if (qp->ibqp.qp_type == IB_QPT_RC) goto inval; qp->state = new_state; break; case IB_QPS_ERR: lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); break; default: qp->state = new_state; break; } if (attr_mask & IB_QP_PKEY_INDEX) qp->s_pkey_index = attr->pkey_index; if (attr_mask & IB_QP_DEST_QPN) qp->remote_qpn = attr->dest_qp_num; if (attr_mask & IB_QP_SQ_PSN) { qp->s_psn = qp->s_next_psn = attr->sq_psn; qp->s_last_psn = qp->s_next_psn - 1; } if (attr_mask & IB_QP_RQ_PSN) qp->r_psn = attr->rq_psn; if (attr_mask & IB_QP_ACCESS_FLAGS) qp->qp_access_flags = attr->qp_access_flags; if (attr_mask & IB_QP_AV) { qp->remote_ah_attr = attr->ah_attr; qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate); } if (attr_mask & IB_QP_PATH_MTU) qp->path_mtu = attr->path_mtu; if (attr_mask & IB_QP_RETRY_CNT) qp->s_retry = qp->s_retry_cnt = attr->retry_cnt; if (attr_mask & IB_QP_RNR_RETRY) { qp->s_rnr_retry = attr->rnr_retry; if (qp->s_rnr_retry > 7) qp->s_rnr_retry = 7; qp->s_rnr_retry_cnt = qp->s_rnr_retry; } if (attr_mask & IB_QP_MIN_RNR_TIMER) qp->r_min_rnr_timer = attr->min_rnr_timer; if (attr_mask & IB_QP_TIMEOUT) qp->timeout = attr->timeout; if (attr_mask & IB_QP_QKEY) qp->qkey = attr->qkey; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) qp->r_max_rd_atomic = attr->max_dest_rd_atomic; if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) qp->s_max_rd_atomic = attr->max_rd_atomic; spin_unlock_irq(&qp->s_lock); if (lastwqe) { struct ib_event ev; ev.device = qp->ibqp.device; ev.element.qp = &qp->ibqp; ev.event = IB_EVENT_QP_LAST_WQE_REACHED; qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); } ret = 0; goto bail; inval: spin_unlock_irq(&qp->s_lock); ret = -EINVAL; bail: return ret; }
struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct ipath_qp *qp; int err; struct ipath_swqe *swq = NULL; struct ipath_ibdev *dev; size_t sz; size_t sg_list_sz; struct ib_qp *ret; if (init_attr->create_flags) { ret = ERR_PTR(-EINVAL); goto bail; } if (init_attr->cap.max_send_sge > ib_ipath_max_sges || init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs) { ret = ERR_PTR(-EINVAL); goto bail; } /* */ if (!init_attr->srq) { if (init_attr->cap.max_recv_sge > ib_ipath_max_sges || init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) { ret = ERR_PTR(-EINVAL); goto bail; } if (init_attr->cap.max_send_sge + init_attr->cap.max_send_wr + init_attr->cap.max_recv_sge + init_attr->cap.max_recv_wr == 0) { ret = ERR_PTR(-EINVAL); goto bail; } } switch (init_attr->qp_type) { case IB_QPT_UC: case IB_QPT_RC: case IB_QPT_UD: case IB_QPT_SMI: case IB_QPT_GSI: sz = sizeof(struct ipath_sge) * init_attr->cap.max_send_sge + sizeof(struct ipath_swqe); swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); if (swq == NULL) { ret = ERR_PTR(-ENOMEM); goto bail; } sz = sizeof(*qp); sg_list_sz = 0; if (init_attr->srq) { struct ipath_srq *srq = to_isrq(init_attr->srq); if (srq->rq.max_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (srq->rq.max_sge - 1); } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); qp = kmalloc(sz + sg_list_sz, GFP_KERNEL); if (!qp) { ret = ERR_PTR(-ENOMEM); goto bail_swq; } if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD || init_attr->qp_type == IB_QPT_SMI || init_attr->qp_type == IB_QPT_GSI)) { qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL); if (!qp->r_ud_sg_list) { ret = ERR_PTR(-ENOMEM); goto bail_qp; } } else qp->r_ud_sg_list = NULL; if (init_attr->srq) { sz = 0; qp->r_rq.size = 0; qp->r_rq.max_sge = 0; qp->r_rq.wq = NULL; init_attr->cap.max_recv_wr = 0; init_attr->cap.max_recv_sge = 0; } else { qp->r_rq.size = init_attr->cap.max_recv_wr + 1; qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct ipath_rwqe); qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + qp->r_rq.size * sz); if (!qp->r_rq.wq) { ret = ERR_PTR(-ENOMEM); goto bail_sg_list; } } /* */ spin_lock_init(&qp->s_lock); spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); init_waitqueue_head(&qp->wait); init_waitqueue_head(&qp->wait_dma); tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp); INIT_LIST_HEAD(&qp->piowait); INIT_LIST_HEAD(&qp->timerwait); qp->state = IB_QPS_RESET; qp->s_wq = swq; qp->s_size = init_attr->cap.max_send_wr + 1; qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) qp->s_flags = IPATH_S_SIGNAL_REQ_WR; else qp->s_flags = 0; dev = to_idev(ibpd->device); err = ipath_alloc_qpn(&dev->qp_table, qp, init_attr->qp_type); if (err) { ret = ERR_PTR(err); vfree(qp->r_rq.wq); goto bail_sg_list; } qp->ip = NULL; qp->s_tx = NULL; ipath_reset_qp(qp, init_attr->qp_type); break; default: /* */ ret = ERR_PTR(-ENOSYS); goto bail; } init_attr->cap.max_inline_data = 0; /* */ if (udata && udata->outlen >= sizeof(__u64)) { if (!qp->r_rq.wq) { __u64 offset = 0; err = ib_copy_to_udata(udata, &offset, sizeof(offset)); if (err) { ret = ERR_PTR(err); goto bail_ip; } } else { u32 s = sizeof(struct ipath_rwq) + qp->r_rq.size * sz; qp->ip = ipath_create_mmap_info(dev, s, ibpd->uobject->context, qp->r_rq.wq); if (!qp->ip) { ret = ERR_PTR(-ENOMEM); goto bail_ip; } err = ib_copy_to_udata(udata, &(qp->ip->offset), sizeof(qp->ip->offset)); if (err) { ret = ERR_PTR(err); goto bail_ip; } } } spin_lock(&dev->n_qps_lock); if (dev->n_qps_allocated == ib_ipath_max_qps) { spin_unlock(&dev->n_qps_lock); ret = ERR_PTR(-ENOMEM); goto bail_ip; } dev->n_qps_allocated++; spin_unlock(&dev->n_qps_lock); if (qp->ip) { spin_lock_irq(&dev->pending_lock); list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps); spin_unlock_irq(&dev->pending_lock); } ret = &qp->ibqp; goto bail; bail_ip: if (qp->ip) kref_put(&qp->ip->ref, ipath_release_mmap_info); else vfree(qp->r_rq.wq); ipath_free_qp(&dev->qp_table, qp); free_qpn(&dev->qp_table, qp->ibqp.qp_num); bail_sg_list: kfree(qp->r_ud_sg_list); bail_qp: kfree(qp); bail_swq: vfree(swq); bail: return ret; }
int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) { struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ib_wc wc; int ret = 0; if (qp->state == IB_QPS_ERR) goto bail; qp->state = IB_QPS_ERR; spin_lock(&dev->pending_lock); if (!list_empty(&qp->timerwait)) list_del_init(&qp->timerwait); if (!list_empty(&qp->piowait)) list_del_init(&qp->piowait); spin_unlock(&dev->pending_lock); /* */ if (qp->s_last != qp->s_head) ipath_schedule_send(qp); memset(&wc, 0, sizeof(wc)); wc.qp = &qp->ibqp; wc.opcode = IB_WC_RECV; if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) { wc.wr_id = qp->r_wr_id; wc.status = err; ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); } wc.status = IB_WC_WR_FLUSH_ERR; if (qp->r_rq.wq) { struct ipath_rwq *wq; u32 head; u32 tail; spin_lock(&qp->r_rq.lock); /* */ wq = qp->r_rq.wq; head = wq->head; if (head >= qp->r_rq.size) head = 0; tail = wq->tail; if (tail >= qp->r_rq.size) tail = 0; while (tail != head) { wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; if (++tail >= qp->r_rq.size) tail = 0; ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); } wq->tail = tail; spin_unlock(&qp->r_rq.lock); } else if (qp->ibqp.event_handler) ret = 1; bail: return ret; }
/** * ipath_post_rc_send - post RC and UC sends * @qp: the QP to post on * @wr: the work request to send */ int ipath_post_rc_send(struct ipath_qp *qp, struct ib_send_wr *wr) { struct ipath_swqe *wqe; unsigned long flags; u32 next; int i, j; int acc; int ret; /* * Don't allow RDMA reads or atomic operations on UC or * undefined operations. * Make sure buffer is large enough to hold the result for atomics. */ if (qp->ibqp.qp_type == IB_QPT_UC) { if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) { ret = -EINVAL; goto bail; } } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) { ret = -EINVAL; goto bail; } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && (wr->num_sge == 0 || wr->sg_list[0].length < sizeof(u64) || wr->sg_list[0].addr & (sizeof(u64) - 1))) { ret = -EINVAL; goto bail; } /* IB spec says that num_sge == 0 is OK. */ if (wr->num_sge > qp->s_max_sge) { ret = -ENOMEM; goto bail; } spin_lock_irqsave(&qp->s_lock, flags); next = qp->s_head + 1; if (next >= qp->s_size) next = 0; if (next == qp->s_last) { spin_unlock_irqrestore(&qp->s_lock, flags); ret = -EINVAL; goto bail; } wqe = get_swqe_ptr(qp, qp->s_head); wqe->wr = *wr; wqe->ssn = qp->s_ssn++; wqe->sg_list[0].mr = NULL; wqe->sg_list[0].vaddr = NULL; wqe->sg_list[0].length = 0; wqe->sg_list[0].sge_length = 0; wqe->length = 0; acc = wr->opcode >= IB_WR_RDMA_READ ? IB_ACCESS_LOCAL_WRITE : 0; for (i = 0, j = 0; i < wr->num_sge; i++) { if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) { spin_unlock_irqrestore(&qp->s_lock, flags); ret = -EINVAL; goto bail; } if (wr->sg_list[i].length == 0) continue; if (!ipath_lkey_ok(&to_idev(qp->ibqp.device)->lk_table, &wqe->sg_list[j], &wr->sg_list[i], acc)) { spin_unlock_irqrestore(&qp->s_lock, flags); ret = -EINVAL; goto bail; } wqe->length += wr->sg_list[i].length; j++; } wqe->wr.num_sge = j; qp->s_head = next; spin_unlock_irqrestore(&qp->s_lock, flags); if (qp->ibqp.qp_type == IB_QPT_UC) ipath_do_uc_send((unsigned long) qp); else ipath_do_rc_send((unsigned long) qp); ret = 0; bail: return ret; }
/** * ipath_post_srq_receive - post a receive on a shared receive queue * @ibsrq: the SRQ to post the receive on * @wr: the list of work requests to post * @bad_wr: the first WR to cause a problem is put here * * This may be called from interrupt context. */ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { struct ipath_srq *srq = to_isrq(ibsrq); struct ipath_ibdev *dev = to_idev(ibsrq->device); unsigned long flags; int ret; for (; wr; wr = wr->next) { struct ipath_rwqe *wqe; u32 next; int i, j; if (wr->num_sge > srq->rq.max_sge) { *bad_wr = wr; ret = -ENOMEM; goto bail; } spin_lock_irqsave(&srq->rq.lock, flags); next = srq->rq.head + 1; if (next >= srq->rq.size) next = 0; if (next == srq->rq.tail) { spin_unlock_irqrestore(&srq->rq.lock, flags); *bad_wr = wr; ret = -ENOMEM; goto bail; } wqe = get_rwqe_ptr(&srq->rq, srq->rq.head); wqe->wr_id = wr->wr_id; wqe->sg_list[0].mr = NULL; wqe->sg_list[0].vaddr = NULL; wqe->sg_list[0].length = 0; wqe->sg_list[0].sge_length = 0; wqe->length = 0; for (i = 0, j = 0; i < wr->num_sge; i++) { /* Check LKEY */ if (to_ipd(srq->ibsrq.pd)->user && wr->sg_list[i].lkey == 0) { spin_unlock_irqrestore(&srq->rq.lock, flags); *bad_wr = wr; ret = -EINVAL; goto bail; } if (wr->sg_list[i].length == 0) continue; if (!ipath_lkey_ok(&dev->lk_table, &wqe->sg_list[j], &wr->sg_list[i], IB_ACCESS_LOCAL_WRITE)) { spin_unlock_irqrestore(&srq->rq.lock, flags); *bad_wr = wr; ret = -EINVAL; goto bail; } wqe->length += wr->sg_list[i].length; j++; } wqe->num_sge = j; srq->rq.head = next; spin_unlock_irqrestore(&srq->rq.lock, flags); } ret = 0; bail: return ret; }
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) { struct ipath_cq_wc *wc; unsigned long flags; u32 head; u32 next; spin_lock_irqsave(&cq->lock, flags); /* * Note that the head pointer might be writable by user processes. * Take care to verify it is a sane value. */ wc = cq->queue; head = wc->head; if (head >= (unsigned) cq->ibcq.cqe) { head = cq->ibcq.cqe; next = 0; } else next = head + 1; if (unlikely(next == wc->tail)) { spin_unlock_irqrestore(&cq->lock, flags); if (cq->ibcq.event_handler) { struct ib_event ev; ev.device = cq->ibcq.device; ev.element.cq = &cq->ibcq; ev.event = IB_EVENT_CQ_ERR; cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); } return; } if (cq->ip) { wc->uqueue[head].wr_id = entry->wr_id; wc->uqueue[head].status = entry->status; wc->uqueue[head].opcode = entry->opcode; wc->uqueue[head].vendor_err = entry->vendor_err; wc->uqueue[head].byte_len = entry->byte_len; wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data; wc->uqueue[head].qp_num = entry->qp->qp_num; wc->uqueue[head].src_qp = entry->src_qp; wc->uqueue[head].wc_flags = entry->wc_flags; wc->uqueue[head].pkey_index = entry->pkey_index; wc->uqueue[head].slid = entry->slid; wc->uqueue[head].sl = entry->sl; wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; wc->uqueue[head].port_num = entry->port_num; /* Make sure entry is written before the head index. */ smp_wmb(); } else wc->kqueue[head] = *entry; wc->head = next; if (cq->notify == IB_CQ_NEXT_COMP || (cq->notify == IB_CQ_SOLICITED && solicited)) { cq->notify = IB_CQ_NONE; cq->triggered++; /* * This will cause send_complete() to be called in * another thread. */ tasklet_hi_schedule(&cq->comptask); } spin_unlock_irqrestore(&cq->lock, flags); if (entry->status != IB_WC_SUCCESS) to_idev(cq->ibcq.device)->n_wqe_errs++; }
/** * ipath_ruc_loopback - handle UC and RC lookback requests * @sqp: the sending QP * * This is called from ipath_do_send() to * forward a WQE addressed to the same HCA. * Note that although we are single threaded due to the tasklet, we still * have to protect against post_send(). We don't have to worry about * receive interrupts since this is a connected protocol and all packets * will pass through here. */ static void ipath_ruc_loopback(struct ipath_qp *sqp) { struct ipath_ibdev *dev = to_idev(sqp->ibqp.device); struct ipath_qp *qp; struct ipath_swqe *wqe; struct ipath_sge *sge; unsigned long flags; struct ib_wc wc; u64 sdata; atomic64_t *maddr; enum ib_wc_status send_status; /* * Note that we check the responder QP state after * checking the requester's state. */ qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); spin_lock_irqsave(&sqp->s_lock, flags); /* Return if we are already busy processing a work request. */ if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) goto unlock; sqp->s_flags |= IPATH_S_BUSY; again: if (sqp->s_last == sqp->s_head) goto clr_busy; wqe = get_swqe_ptr(sqp, sqp->s_last); /* Return if it is not OK to start a new work reqeust. */ if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND)) goto clr_busy; /* We are in the error state, flush the work request. */ send_status = IB_WC_WR_FLUSH_ERR; goto flush_send; } /* * We can rely on the entry not changing without the s_lock * being held until we update s_last. * We increment s_cur to indicate s_last is in progress. */ if (sqp->s_last == sqp->s_cur) { if (++sqp->s_cur >= sqp->s_size) sqp->s_cur = 0; } spin_unlock_irqrestore(&sqp->s_lock, flags); if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { dev->n_pkt_drops++; /* * For RC, the requester would timeout and retry so * shortcut the timeouts and just signal too many retries. */ if (sqp->ibqp.qp_type == IB_QPT_RC) send_status = IB_WC_RETRY_EXC_ERR; else send_status = IB_WC_SUCCESS; goto serr; } memset(&wc, 0, sizeof wc); send_status = IB_WC_SUCCESS; sqp->s_sge.sge = wqe->sg_list[0]; sqp->s_sge.sg_list = wqe->sg_list + 1; sqp->s_sge.num_sge = wqe->wr.num_sge; sqp->s_len = wqe->length; switch (wqe->wr.opcode) { case IB_WR_SEND_WITH_IMM: wc.wc_flags = IB_WC_WITH_IMM; wc.imm_data = wqe->wr.ex.imm_data; /* FALLTHROUGH */ case IB_WR_SEND: if (!ipath_get_rwqe(qp, 0)) goto rnr_nak; break; case IB_WR_RDMA_WRITE_WITH_IMM: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) goto inv_err; wc.wc_flags = IB_WC_WITH_IMM; wc.imm_data = wqe->wr.ex.imm_data; if (!ipath_get_rwqe(qp, 1)) goto rnr_nak; /* FALLTHROUGH */ case IB_WR_RDMA_WRITE: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) goto inv_err; if (wqe->length == 0) break; if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_WRITE))) goto acc_err; break; case IB_WR_RDMA_READ: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) goto inv_err; if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_READ))) goto acc_err; qp->r_sge.sge = wqe->sg_list[0]; qp->r_sge.sg_list = wqe->sg_list + 1; qp->r_sge.num_sge = wqe->wr.num_sge; break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto inv_err; if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), wqe->wr.wr.atomic.remote_addr, wqe->wr.wr.atomic.rkey, IB_ACCESS_REMOTE_ATOMIC))) goto acc_err; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; sdata = wqe->wr.wr.atomic.compare_add; *(u64 *) sqp->s_sge.sge.vaddr = (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, sdata, wqe->wr.wr.atomic.swap); goto send_comp; default: send_status = IB_WC_LOC_QP_OP_ERR; goto serr; } sge = &sqp->s_sge.sge; while (sqp->s_len) { u32 len = sqp->s_len; if (len > sge->length) len = sge->length; if (len > sge->sge_length) len = sge->sge_length; BUG_ON(len == 0); ipath_copy_sge(&qp->r_sge, sge->vaddr, len); sge->vaddr += len; sge->length -= len; sge->sge_length -= len; if (sge->sge_length == 0) { if (--sqp->s_sge.num_sge) *sge = *sqp->s_sge.sg_list++; } else if (sge->length == 0 && sge->mr != NULL) { if (++sge->n >= IPATH_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; sge->n = 0; } sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; sge->length = sge->mr->map[sge->m]->segs[sge->n].length; } sqp->s_len -= len; } if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) goto send_comp; if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; else wc.opcode = IB_WC_RECV; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.byte_len = wqe->length; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; wc.port_num = 1; /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, wqe->wr.send_flags & IB_SEND_SOLICITED); send_comp: spin_lock_irqsave(&sqp->s_lock, flags); flush_send: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; ipath_send_complete(sqp, wqe, send_status); goto again; rnr_nak: /* Handle RNR NAK */ if (qp->ibqp.qp_type == IB_QPT_UC) goto send_comp; /* * Note: we don't need the s_lock held since the BUSY flag * makes this single threaded. */ if (sqp->s_rnr_retry == 0) { send_status = IB_WC_RNR_RETRY_EXC_ERR; goto serr; } if (sqp->s_rnr_retry_cnt < 7) sqp->s_rnr_retry--; spin_lock_irqsave(&sqp->s_lock, flags); if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK)) goto clr_busy; sqp->s_flags |= IPATH_S_WAITING; dev->n_rnr_naks++; sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer]; ipath_insert_rnr_queue(sqp); goto clr_busy; inv_err: send_status = IB_WC_REM_INV_REQ_ERR; wc.status = IB_WC_LOC_QP_OP_ERR; goto err; acc_err: send_status = IB_WC_REM_ACCESS_ERR; wc.status = IB_WC_LOC_PROT_ERR; err: /* responder goes to error state */ ipath_rc_error(qp, wc.status); serr: spin_lock_irqsave(&sqp->s_lock, flags); ipath_send_complete(sqp, wqe, send_status); if (sqp->ibqp.qp_type == IB_QPT_RC) { int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR); sqp->s_flags &= ~IPATH_S_BUSY; spin_unlock_irqrestore(&sqp->s_lock, flags); if (lastwqe) { struct ib_event ev; ev.device = sqp->ibqp.device; ev.element.qp = &sqp->ibqp; ev.event = IB_EVENT_QP_LAST_WQE_REACHED; sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); } goto done; } clr_busy: sqp->s_flags &= ~IPATH_S_BUSY; unlock: spin_unlock_irqrestore(&sqp->s_lock, flags); done: if (qp && atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); }