Пример #1
0
void ipath_do_send(unsigned long data)
{
	struct ipath_qp *qp = (struct ipath_qp *)data;
	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
	int (*make_req)(struct ipath_qp *qp);
	unsigned long flags;

	if ((qp->ibqp.qp_type == IB_QPT_RC ||
	     qp->ibqp.qp_type == IB_QPT_UC) &&
	    qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
		ipath_ruc_loopback(qp);
		goto bail;
	}

	if (qp->ibqp.qp_type == IB_QPT_RC)
	       make_req = ipath_make_rc_req;
	else if (qp->ibqp.qp_type == IB_QPT_UC)
	       make_req = ipath_make_uc_req;
	else
	       make_req = ipath_make_ud_req;

	spin_lock_irqsave(&qp->s_lock, flags);

	
	if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
	    !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
		spin_unlock_irqrestore(&qp->s_lock, flags);
		goto bail;
	}

	qp->s_flags |= IPATH_S_BUSY;

	spin_unlock_irqrestore(&qp->s_lock, flags);

again:
	
	if (qp->s_hdrwords != 0) {
		
		if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
				     qp->s_cur_sge, qp->s_cur_size)) {
			if (ipath_no_bufs_available(qp, dev))
				goto bail;
		}
		dev->n_unicast_xmit++;
		
		qp->s_hdrwords = 0;
	}

	if (make_req(qp))
		goto again;

bail:;
}
Пример #2
0
int ipath_destroy_qp(struct ib_qp *ibqp)
{
	struct ipath_qp *qp = to_iqp(ibqp);
	struct ipath_ibdev *dev = to_idev(ibqp->device);

	/*                                              */
	spin_lock_irq(&qp->s_lock);
	if (qp->state != IB_QPS_RESET) {
		qp->state = IB_QPS_RESET;
		spin_lock(&dev->pending_lock);
		if (!list_empty(&qp->timerwait))
			list_del_init(&qp->timerwait);
		if (!list_empty(&qp->piowait))
			list_del_init(&qp->piowait);
		spin_unlock(&dev->pending_lock);
		qp->s_flags &= ~IPATH_S_ANY_WAIT;
		spin_unlock_irq(&qp->s_lock);
		/*                          */
		tasklet_kill(&qp->s_task);
		wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
	} else
		spin_unlock_irq(&qp->s_lock);

	ipath_free_qp(&dev->qp_table, qp);

	if (qp->s_tx) {
		atomic_dec(&qp->refcount);
		if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
			kfree(qp->s_tx->txreq.map_addr);
		spin_lock_irq(&dev->pending_lock);
		list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
		spin_unlock_irq(&dev->pending_lock);
		qp->s_tx = NULL;
	}

	wait_event(qp->wait, !atomic_read(&qp->refcount));

	/*                                          */
	free_qpn(&dev->qp_table, qp->ibqp.qp_num);
	spin_lock(&dev->n_qps_lock);
	dev->n_qps_allocated--;
	spin_unlock(&dev->n_qps_lock);

	if (qp->ip)
		kref_put(&qp->ip->ref, ipath_release_mmap_info);
	else
		vfree(qp->r_rq.wq);
	kfree(qp->r_ud_sg_list);
	vfree(qp->s_wq);
	kfree(qp);
	return 0;
}
Пример #3
0
static void flush_iowait(struct hfi1_qp *qp)
{
	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
	unsigned long flags;

	write_seqlock_irqsave(&dev->iowait_lock, flags);
	if (!list_empty(&qp->s_iowait.list)) {
		list_del_init(&qp->s_iowait.list);
		if (atomic_dec_and_test(&qp->refcount))
			wake_up(&qp->wait);
	}
	write_sequnlock_irqrestore(&dev->iowait_lock, flags);
}
Пример #4
0
/**
 * ipath_poll_cq - poll for work completion entries
 * @ibcq: the completion queue to poll
 * @num_entries: the maximum number of entries to return
 * @entry: pointer to array where work completions are placed
 *
 * Returns the number of completion entries polled.
 *
 * This may be called from interrupt context.  Also called by ib_poll_cq()
 * in the generic verbs code.
 */
int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
{
	struct ipath_cq *cq = to_icq(ibcq);
	struct ipath_cq_wc *wc;
	unsigned long flags;
	int npolled;
	u32 tail;

	spin_lock_irqsave(&cq->lock, flags);

	wc = cq->queue;
	tail = wc->tail;
	if (tail > (u32) cq->ibcq.cqe)
		tail = (u32) cq->ibcq.cqe;
	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
		struct ipath_qp *qp;

		if (tail == wc->head)
			break;

		qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table,
				      wc->queue[tail].qp_num);
		entry->qp = &qp->ibqp;
		if (atomic_dec_and_test(&qp->refcount))
			wake_up(&qp->wait);

		entry->wr_id = wc->queue[tail].wr_id;
		entry->status = wc->queue[tail].status;
		entry->opcode = wc->queue[tail].opcode;
		entry->vendor_err = wc->queue[tail].vendor_err;
		entry->byte_len = wc->queue[tail].byte_len;
		entry->imm_data = wc->queue[tail].imm_data;
		entry->src_qp = wc->queue[tail].src_qp;
		entry->wc_flags = wc->queue[tail].wc_flags;
		entry->pkey_index = wc->queue[tail].pkey_index;
		entry->slid = wc->queue[tail].slid;
		entry->sl = wc->queue[tail].sl;
		entry->dlid_path_bits = wc->queue[tail].dlid_path_bits;
		entry->port_num = wc->queue[tail].port_num;
		if (tail >= cq->ibcq.cqe)
			tail = 0;
		else
			tail++;
	}
	wc->tail = tail;

	spin_unlock_irqrestore(&cq->lock, flags);

	return npolled;
}
Пример #5
0
/*
 * Validate a RWQE and fill in the SGE state.
 * Return 1 if OK.
 */
static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
{
	int i, j, ret;
	struct ib_wc wc;
	struct rvt_lkey_table *rkt;
	struct rvt_pd *pd;
	struct rvt_sge_state *ss;

	rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
	pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
	ss = &qp->r_sge;
	ss->sg_list = qp->r_sg_list;
	qp->r_len = 0;
	for (i = j = 0; i < wqe->num_sge; i++) {
		if (wqe->sg_list[i].length == 0)
			continue;
		/* Check LKEY */
		ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
				  NULL, &wqe->sg_list[i],
				  IB_ACCESS_LOCAL_WRITE);
		if (unlikely(ret <= 0))
			goto bad_lkey;
		qp->r_len += wqe->sg_list[i].length;
		j++;
	}
	ss->num_sge = j;
	ss->total_len = qp->r_len;
	ret = 1;
	goto bail;

bad_lkey:
	while (j) {
		struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;

		rvt_put_mr(sge->mr);
	}
	ss->num_sge = 0;
	memset(&wc, 0, sizeof(wc));
	wc.wr_id = wqe->wr_id;
	wc.status = IB_WC_LOC_PROT_ERR;
	wc.opcode = IB_WC_RECV;
	wc.qp = &qp->ibqp;
	/* Signal solicited completion event. */
	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
	ret = 0;
bail:
	return ret;
}
/**
 * qib_destroy_srq - destroy a shared receive queue
 * @ibsrq: the SRQ to destroy
 */
int qib_destroy_srq(struct ib_srq *ibsrq)
{
	struct qib_srq *srq = to_isrq(ibsrq);
	struct qib_ibdev *dev = to_idev(ibsrq->device);

	spin_lock(&dev->n_srqs_lock);
	dev->n_srqs_allocated--;
	spin_unlock(&dev->n_srqs_lock);
	if (srq->ip)
		kref_put(&srq->ip->ref, qib_release_mmap_info);
	else
		vfree(srq->rq.wq);
	kfree(srq);

	return 0;
}
Пример #7
0
/*
 * Validate a RWQE and fill in the SGE state.
 * Return 1 if OK.
 */
static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe)
{
    int i, j, ret;
    struct ib_wc wc;
    struct qib_lkey_table *rkt;
    struct qib_pd *pd;
    struct qib_sge_state *ss;

    rkt = &to_idev(qp->ibqp.device)->lk_table;
    pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
    ss = &qp->r_sge;
    ss->sg_list = qp->r_sg_list;
    qp->r_len = 0;
    for (i = j = 0; i < wqe->num_sge; i++) {
        if (wqe->sg_list[i].length == 0)
            continue;
        /* Check LKEY */
        if (!qib_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
                         &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
            goto bad_lkey;
        qp->r_len += wqe->sg_list[i].length;
        j++;
    }
    ss->num_sge = j;
    ss->total_len = qp->r_len;
    ret = 1;
    goto bail;

bad_lkey:
    while (j) {
        struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;

        atomic_dec(&sge->mr->refcount);
    }
    ss->num_sge = 0;
    memset(&wc, 0, sizeof(wc));
    wc.wr_id = wqe->wr_id;
    wc.status = IB_WC_LOC_PROT_ERR;
    wc.opcode = IB_WC_RECV;
    wc.qp = &qp->ibqp;
    /* Signal solicited completion event. */
    qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
    ret = 0;
bail:
    return ret;
}
Пример #8
0
/**
 * qib_destroy_cq - destroy a completion queue
 * @ibcq: the completion queue to destroy.
 *
 * Returns 0 for success.
 *
 * Called by ib_destroy_cq() in the generic verbs code.
 */
int qib_destroy_cq(struct ib_cq *ibcq)
{
	struct qib_ibdev *dev = to_idev(ibcq->device);
	struct qib_cq *cq = to_icq(ibcq);

	flush_work(&cq->comptask);
	spin_lock(&dev->n_cqs_lock);
	dev->n_cqs_allocated--;
	spin_unlock(&dev->n_cqs_lock);
	if (cq->ip)
		kref_put(&cq->ip->ref, qib_release_mmap_info);
	else
		vfree(cq->queue);
	kfree(cq);

	return 0;
}
Пример #9
0
static void qp_pio_drain(struct rvt_qp *qp)
{
	struct hfi1_ibdev *dev;
	struct hfi1_qp_priv *priv = qp->priv;

	if (!priv->s_sendcontext)
		return;
	dev = to_idev(qp->ibqp.device);
	while (iowait_pio_pending(&priv->s_iowait)) {
		write_seqlock_irq(&dev->iowait_lock);
		hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1);
		write_sequnlock_irq(&dev->iowait_lock);
		iowait_pio_drain(&priv->s_iowait);
		write_seqlock_irq(&dev->iowait_lock);
		hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0);
		write_sequnlock_irq(&dev->iowait_lock);
	}
}
Пример #10
0
/**
 * hfi1_destroy_qp - destroy a queue pair
 * @ibqp: the queue pair to destroy
 *
 * Returns 0 on success.
 *
 * Note that this can be called while the QP is actively sending or
 * receiving!
 */
int hfi1_destroy_qp(struct ib_qp *ibqp)
{
	struct hfi1_qp *qp = to_iqp(ibqp);
	struct hfi1_ibdev *dev = to_idev(ibqp->device);

	/* Make sure HW and driver activity is stopped. */
	spin_lock_irq(&qp->r_lock);
	spin_lock(&qp->s_lock);
	if (qp->state != IB_QPS_RESET) {
		qp->state = IB_QPS_RESET;
		flush_iowait(qp);
		qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT);
		spin_unlock(&qp->s_lock);
		spin_unlock_irq(&qp->r_lock);
		cancel_work_sync(&qp->s_iowait.iowork);
		del_timer_sync(&qp->s_timer);
		iowait_sdma_drain(&qp->s_iowait);
		flush_tx_list(qp);
		remove_qp(dev, qp);
		wait_event(qp->wait, !atomic_read(&qp->refcount));
		spin_lock_irq(&qp->r_lock);
		spin_lock(&qp->s_lock);
		clear_mr_refs(qp, 1);
		clear_ahg(qp);
	}
	spin_unlock(&qp->s_lock);
	spin_unlock_irq(&qp->r_lock);

	/* all user's cleaned up, mark it available */
	free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num);
	spin_lock(&dev->n_qps_lock);
	dev->n_qps_allocated--;
	spin_unlock(&dev->n_qps_lock);

	if (qp->ip)
		kref_put(&qp->ip->ref, hfi1_release_mmap_info);
	else
		vfree(qp->r_rq.wq);
	vfree(qp->s_wq);
	kfree(qp->s_hdr);
	kfree(qp);
	return 0;
}
Пример #11
0
/**
 * ipath_destroy_qp - destroy a queue pair
 * @ibqp: the queue pair to destroy
 *
 * Returns 0 on success.
 *
 * Note that this can be called while the QP is actively sending or
 * receiving!
 */
int ipath_destroy_qp(struct ib_qp *ibqp)
{
	struct ipath_qp *qp = to_iqp(ibqp);
	struct ipath_ibdev *dev = to_idev(ibqp->device);
	unsigned long flags;

	spin_lock_irqsave(&qp->s_lock, flags);
	qp->state = IB_QPS_ERR;
	spin_unlock_irqrestore(&qp->s_lock, flags);
	spin_lock(&dev->n_qps_lock);
	dev->n_qps_allocated--;
	spin_unlock(&dev->n_qps_lock);

	/* Stop the sending tasklet. */
	tasklet_kill(&qp->s_task);

	/* Make sure the QP isn't on the timeout list. */
	spin_lock_irqsave(&dev->pending_lock, flags);
	if (!list_empty(&qp->timerwait))
		list_del_init(&qp->timerwait);
	if (!list_empty(&qp->piowait))
		list_del_init(&qp->piowait);
	spin_unlock_irqrestore(&dev->pending_lock, flags);

	/*
	 * Make sure that the QP is not in the QPN table so receive
	 * interrupts will discard packets for this QP.  XXX Also remove QP
	 * from multicast table.
	 */
	if (atomic_read(&qp->refcount) != 0)
		ipath_free_qp(&dev->qp_table, qp);

	if (qp->ip)
		kref_put(&qp->ip->ref, ipath_release_mmap_info);
	else
		vfree(qp->r_rq.wq);
	vfree(qp->s_wq);
	kfree(qp);
	return 0;
}
Пример #12
0
/**
 * qib_free_lkey - free an lkey
 * @mr: mr to free from tables
 */
void qib_free_lkey(struct qib_mregion *mr)
{
	unsigned long flags;
	u32 lkey = mr->lkey;
	u32 r;
	struct qib_ibdev *dev = to_idev(mr->pd->device);
	struct qib_lkey_table *rkt = &dev->lk_table;

	spin_lock_irqsave(&rkt->lock, flags);
	if (!mr->lkey_published)
		goto out;
	if (lkey == 0)
		rcu_assign_pointer(dev->dma_mr, NULL);
	else {
		r = lkey >> (32 - ib_qib_lkey_table_size);
		rcu_assign_pointer(rkt->table[r], NULL);
	}
	qib_put_mr(mr);
	mr->lkey_published = 0;
out:
	spin_unlock_irqrestore(&rkt->lock, flags);
}
Пример #13
0
void hfi1_put_txreq(struct verbs_txreq *tx)
{
	struct hfi1_ibdev *dev;
	struct rvt_qp *qp;
	unsigned long flags;
	unsigned int seq;
	struct hfi1_qp_priv *priv;

	qp = tx->qp;
	dev = to_idev(qp->ibqp.device);

	if (tx->mr)
		rvt_put_mr(tx->mr);

	sdma_txclean(dd_from_dev(dev), &tx->txreq);

	/* Free verbs_txreq and return to slab cache */
	kmem_cache_free(dev->verbs_txreq_cache, tx);

	do {
		seq = read_seqbegin(&dev->iowait_lock);
		if (!list_empty(&dev->txwait)) {
			struct iowait *wait;

			write_seqlock_irqsave(&dev->iowait_lock, flags);
			wait = list_first_entry(&dev->txwait, struct iowait,
						list);
			qp = iowait_to_qp(wait);
			priv = qp->priv;
			list_del_init(&priv->s_iowait.list);
			/* refcount held until actual wake up */
			write_sequnlock_irqrestore(&dev->iowait_lock, flags);
			hfi1_qp_wakeup(qp, RVT_S_WAIT_TX);
			break;
		}
	} while (read_seqretry(&dev->iowait_lock, seq));
}
Пример #14
0
int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
			 int attr_mask, struct ib_udata *udata)
{
	struct ib_qp *ibqp = &qp->ibqp;
	struct hfi1_ibdev *dev = to_idev(ibqp->device);
	struct hfi1_devdata *dd = dd_from_dev(dev);
	u8 sc;

	if (attr_mask & IB_QP_AV) {
		sc = ah_to_sc(ibqp->device, &attr->ah_attr);
		if (sc == 0xf)
			return -EINVAL;

		if (!qp_to_sdma_engine(qp, sc) &&
		    dd->flags & HFI1_HAS_SEND_DMA)
			return -EINVAL;

		if (!qp_to_send_context(qp, sc))
			return -EINVAL;
	}

	if (attr_mask & IB_QP_ALT_PATH) {
		sc = ah_to_sc(ibqp->device, &attr->alt_ah_attr);
		if (sc == 0xf)
			return -EINVAL;

		if (!qp_to_sdma_engine(qp, sc) &&
		    dd->flags & HFI1_HAS_SEND_DMA)
			return -EINVAL;

		if (!qp_to_send_context(qp, sc))
			return -EINVAL;
	}

	return 0;
}
Пример #15
0
/**
 * qib_resize_cq - change the size of the CQ
 * @ibcq: the completion queue
 *
 * Returns 0 for success.
 */
int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
{
	struct qib_cq *cq = to_icq(ibcq);
	struct qib_cq_wc *old_wc;
	struct qib_cq_wc *wc;
	u32 head, tail, n;
	int ret;
	u32 sz;

	if (cqe < 1 || cqe > ib_qib_max_cqes) {
		ret = -EINVAL;
		goto bail;
	}

	/*
	 * Need to use vmalloc() if we want to support large #s of entries.
	 */
	sz = sizeof(*wc);
	if (udata && udata->outlen >= sizeof(__u64))
		sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
	else
		sz += sizeof(struct ib_wc) * (cqe + 1);
	wc = vmalloc_user(sz);
	if (!wc) {
		ret = -ENOMEM;
		goto bail;
	}

	/* Check that we can write the offset to mmap. */
	if (udata && udata->outlen >= sizeof(__u64)) {
		__u64 offset = 0;

		ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
		if (ret)
			goto bail_free;
	}

	spin_lock_irq(&cq->lock);
	/*
	 * Make sure head and tail are sane since they
	 * might be user writable.
	 */
	old_wc = cq->queue;
	head = old_wc->head;
	if (head > (u32) cq->ibcq.cqe)
		head = (u32) cq->ibcq.cqe;
	tail = old_wc->tail;
	if (tail > (u32) cq->ibcq.cqe)
		tail = (u32) cq->ibcq.cqe;
	if (head < tail)
		n = cq->ibcq.cqe + 1 + head - tail;
	else
		n = head - tail;
	if (unlikely((u32)cqe < n)) {
		ret = -EINVAL;
		goto bail_unlock;
	}
	for (n = 0; tail != head; n++) {
		if (cq->ip)
			wc->uqueue[n] = old_wc->uqueue[tail];
		else
			wc->kqueue[n] = old_wc->kqueue[tail];
		if (tail == (u32) cq->ibcq.cqe)
			tail = 0;
		else
			tail++;
	}
	cq->ibcq.cqe = cqe;
	wc->head = n;
	wc->tail = 0;
	cq->queue = wc;
	spin_unlock_irq(&cq->lock);

	vfree(old_wc);

	if (cq->ip) {
		struct qib_ibdev *dev = to_idev(ibcq->device);
		struct qib_mmap_info *ip = cq->ip;

		qib_update_mmap_info(dev, ip, sz, wc);

		/*
		 * Return the offset to mmap.
		 * See qib_mmap() for details.
		 */
		if (udata && udata->outlen >= sizeof(__u64)) {
			ret = ib_copy_to_udata(udata, &ip->offset,
					       sizeof(ip->offset));
			if (ret)
				goto bail;
		}

		spin_lock_irq(&dev->pending_lock);
		if (list_empty(&ip->pending_mmaps))
			list_add(&ip->pending_mmaps, &dev->pending_mmaps);
		spin_unlock_irq(&dev->pending_lock);
	}

	ret = 0;
	goto bail;

bail_unlock:
	spin_unlock_irq(&cq->lock);
bail_free:
	vfree(wc);
bail:
	return ret;
}
Пример #16
0
/**
 * qib_create_cq - create a completion queue
 * @ibdev: the device this completion queue is attached to
 * @entries: the minimum size of the completion queue
 * @context: unused by the QLogic_IB driver
 * @udata: user data for libibverbs.so
 *
 * Returns a pointer to the completion queue or negative errno values
 * for failure.
 *
 * Called by ib_create_cq() in the generic verbs code.
 */
struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries,
			    int comp_vector, struct ib_ucontext *context,
			    struct ib_udata *udata)
{
	struct qib_ibdev *dev = to_idev(ibdev);
	struct qib_cq *cq;
	struct qib_cq_wc *wc;
	struct ib_cq *ret;
	u32 sz;

	if (entries < 1 || entries > ib_qib_max_cqes) {
		ret = ERR_PTR(-EINVAL);
		goto done;
	}

	/* Allocate the completion queue structure. */
	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
	if (!cq) {
		ret = ERR_PTR(-ENOMEM);
		goto done;
	}

	/*
	 * Allocate the completion queue entries and head/tail pointers.
	 * This is allocated separately so that it can be resized and
	 * also mapped into user space.
	 * We need to use vmalloc() in order to support mmap and large
	 * numbers of entries.
	 */
	sz = sizeof(*wc);
	if (udata && udata->outlen >= sizeof(__u64))
		sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
	else
		sz += sizeof(struct ib_wc) * (entries + 1);
	wc = vmalloc_user(sz);
	if (!wc) {
		ret = ERR_PTR(-ENOMEM);
		goto bail_cq;
	}

	/*
	 * Return the address of the WC as the offset to mmap.
	 * See qib_mmap() for details.
	 */
	if (udata && udata->outlen >= sizeof(__u64)) {
		int err;

		cq->ip = qib_create_mmap_info(dev, sz, context, wc);
		if (!cq->ip) {
			ret = ERR_PTR(-ENOMEM);
			goto bail_wc;
		}

		err = ib_copy_to_udata(udata, &cq->ip->offset,
				       sizeof(cq->ip->offset));
		if (err) {
			ret = ERR_PTR(err);
			goto bail_ip;
		}
	} else
		cq->ip = NULL;

	spin_lock(&dev->n_cqs_lock);
	if (dev->n_cqs_allocated == ib_qib_max_cqs) {
		spin_unlock(&dev->n_cqs_lock);
		ret = ERR_PTR(-ENOMEM);
		goto bail_ip;
	}

	dev->n_cqs_allocated++;
	spin_unlock(&dev->n_cqs_lock);

	if (cq->ip) {
		spin_lock_irq(&dev->pending_lock);
		list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
		spin_unlock_irq(&dev->pending_lock);
	}

	/*
	 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
	 * The number of entries should be >= the number requested or return
	 * an error.
	 */
	cq->ibcq.cqe = entries;
	cq->notify = IB_CQ_NONE;
	cq->triggered = 0;
	spin_lock_init(&cq->lock);
	INIT_WORK(&cq->comptask, send_complete);
	wc->head = 0;
	wc->tail = 0;
	cq->queue = wc;

	ret = &cq->ibcq;

	goto done;

bail_ip:
	kfree(cq->ip);
bail_wc:
	vfree(wc);
bail_cq:
	kfree(cq);
done:
	return ret;
}
Пример #17
0
/**
 * hfi1_modify_qp - modify the attributes of a queue pair
 * @ibqp: the queue pair who's attributes we're modifying
 * @attr: the new attributes
 * @attr_mask: the mask of attributes to modify
 * @udata: user data for libibverbs.so
 *
 * Returns 0 on success, otherwise returns an errno.
 */
int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
		   int attr_mask, struct ib_udata *udata)
{
	struct hfi1_ibdev *dev = to_idev(ibqp->device);
	struct hfi1_qp *qp = to_iqp(ibqp);
	enum ib_qp_state cur_state, new_state;
	struct ib_event ev;
	int lastwqe = 0;
	int mig = 0;
	int ret;
	u32 pmtu = 0; /* for gcc warning only */
	struct hfi1_devdata *dd;

	spin_lock_irq(&qp->r_lock);
	spin_lock(&qp->s_lock);

	cur_state = attr_mask & IB_QP_CUR_STATE ?
		attr->cur_qp_state : qp->state;
	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;

	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
				attr_mask, IB_LINK_LAYER_UNSPECIFIED))
		goto inval;

	if (attr_mask & IB_QP_AV) {
		if (attr->ah_attr.dlid >= HFI1_MULTICAST_LID_BASE)
			goto inval;
		if (hfi1_check_ah(qp->ibqp.device, &attr->ah_attr))
			goto inval;
	}

	if (attr_mask & IB_QP_ALT_PATH) {
		if (attr->alt_ah_attr.dlid >= HFI1_MULTICAST_LID_BASE)
			goto inval;
		if (hfi1_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
			goto inval;
		if (attr->alt_pkey_index >= hfi1_get_npkeys(dd_from_dev(dev)))
			goto inval;
	}

	if (attr_mask & IB_QP_PKEY_INDEX)
		if (attr->pkey_index >= hfi1_get_npkeys(dd_from_dev(dev)))
			goto inval;

	if (attr_mask & IB_QP_MIN_RNR_TIMER)
		if (attr->min_rnr_timer > 31)
			goto inval;

	if (attr_mask & IB_QP_PORT)
		if (qp->ibqp.qp_type == IB_QPT_SMI ||
		    qp->ibqp.qp_type == IB_QPT_GSI ||
		    attr->port_num == 0 ||
		    attr->port_num > ibqp->device->phys_port_cnt)
			goto inval;

	if (attr_mask & IB_QP_DEST_QPN)
		if (attr->dest_qp_num > HFI1_QPN_MASK)
			goto inval;

	if (attr_mask & IB_QP_RETRY_CNT)
		if (attr->retry_cnt > 7)
			goto inval;

	if (attr_mask & IB_QP_RNR_RETRY)
		if (attr->rnr_retry > 7)
			goto inval;

	/*
	 * Don't allow invalid path_mtu values.  OK to set greater
	 * than the active mtu (or even the max_cap, if we have tuned
	 * that to a small mtu.  We'll set qp->path_mtu
	 * to the lesser of requested attribute mtu and active,
	 * for packetizing messages.
	 * Note that the QP port has to be set in INIT and MTU in RTR.
	 */
	if (attr_mask & IB_QP_PATH_MTU) {
		int mtu, pidx = qp->port_num - 1;

		dd = dd_from_dev(dev);
		mtu = verbs_mtu_enum_to_int(ibqp->device, attr->path_mtu);
		if (mtu == -1)
			goto inval;

		if (mtu > dd->pport[pidx].ibmtu)
			pmtu = mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048);
		else
			pmtu = attr->path_mtu;
	}

	if (attr_mask & IB_QP_PATH_MIG_STATE) {
		if (attr->path_mig_state == IB_MIG_REARM) {
			if (qp->s_mig_state == IB_MIG_ARMED)
				goto inval;
			if (new_state != IB_QPS_RTS)
				goto inval;
		} else if (attr->path_mig_state == IB_MIG_MIGRATED) {
			if (qp->s_mig_state == IB_MIG_REARM)
				goto inval;
			if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
				goto inval;
			if (qp->s_mig_state == IB_MIG_ARMED)
				mig = 1;
		} else
			goto inval;
	}

	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
		if (attr->max_dest_rd_atomic > HFI1_MAX_RDMA_ATOMIC)
			goto inval;

	switch (new_state) {
	case IB_QPS_RESET:
		if (qp->state != IB_QPS_RESET) {
			qp->state = IB_QPS_RESET;
			flush_iowait(qp);
			qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT);
			spin_unlock(&qp->s_lock);
			spin_unlock_irq(&qp->r_lock);
			/* Stop the sending work queue and retry timer */
			cancel_work_sync(&qp->s_iowait.iowork);
			del_timer_sync(&qp->s_timer);
			iowait_sdma_drain(&qp->s_iowait);
			flush_tx_list(qp);
			remove_qp(dev, qp);
			wait_event(qp->wait, !atomic_read(&qp->refcount));
			spin_lock_irq(&qp->r_lock);
			spin_lock(&qp->s_lock);
			clear_mr_refs(qp, 1);
			clear_ahg(qp);
			reset_qp(qp, ibqp->qp_type);
		}
		break;

	case IB_QPS_RTR:
		/* Allow event to re-trigger if QP set to RTR more than once */
		qp->r_flags &= ~HFI1_R_COMM_EST;
		qp->state = new_state;
		break;

	case IB_QPS_SQD:
		qp->s_draining = qp->s_last != qp->s_cur;
		qp->state = new_state;
		break;

	case IB_QPS_SQE:
		if (qp->ibqp.qp_type == IB_QPT_RC)
			goto inval;
		qp->state = new_state;
		break;

	case IB_QPS_ERR:
		lastwqe = hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
		break;

	default:
		qp->state = new_state;
		break;
	}

	if (attr_mask & IB_QP_PKEY_INDEX)
		qp->s_pkey_index = attr->pkey_index;

	if (attr_mask & IB_QP_PORT)
		qp->port_num = attr->port_num;

	if (attr_mask & IB_QP_DEST_QPN)
		qp->remote_qpn = attr->dest_qp_num;

	if (attr_mask & IB_QP_SQ_PSN) {
		qp->s_next_psn = attr->sq_psn & PSN_MODIFY_MASK;
		qp->s_psn = qp->s_next_psn;
		qp->s_sending_psn = qp->s_next_psn;
		qp->s_last_psn = qp->s_next_psn - 1;
		qp->s_sending_hpsn = qp->s_last_psn;
	}

	if (attr_mask & IB_QP_RQ_PSN)
		qp->r_psn = attr->rq_psn & PSN_MODIFY_MASK;

	if (attr_mask & IB_QP_ACCESS_FLAGS)
		qp->qp_access_flags = attr->qp_access_flags;

	if (attr_mask & IB_QP_AV) {
		qp->remote_ah_attr = attr->ah_attr;
		qp->s_srate = attr->ah_attr.static_rate;
		qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
	}

	if (attr_mask & IB_QP_ALT_PATH) {
		qp->alt_ah_attr = attr->alt_ah_attr;
		qp->s_alt_pkey_index = attr->alt_pkey_index;
	}

	if (attr_mask & IB_QP_PATH_MIG_STATE) {
		qp->s_mig_state = attr->path_mig_state;
		if (mig) {
			qp->remote_ah_attr = qp->alt_ah_attr;
			qp->port_num = qp->alt_ah_attr.port_num;
			qp->s_pkey_index = qp->s_alt_pkey_index;
			qp->s_flags |= HFI1_S_AHG_CLEAR;
		}
	}

	if (attr_mask & IB_QP_PATH_MTU) {
		struct hfi1_ibport *ibp;
		u8 sc, vl;
		u32 mtu;

		dd = dd_from_dev(dev);
		ibp = &dd->pport[qp->port_num - 1].ibport_data;

		sc = ibp->sl_to_sc[qp->remote_ah_attr.sl];
		vl = sc_to_vlt(dd, sc);

		mtu = verbs_mtu_enum_to_int(ibqp->device, pmtu);
		if (vl < PER_VL_SEND_CONTEXTS)
			mtu = min_t(u32, mtu, dd->vld[vl].mtu);
		pmtu = mtu_to_enum(mtu, OPA_MTU_8192);

		qp->path_mtu = pmtu;
		qp->pmtu = mtu;
	}

	if (attr_mask & IB_QP_RETRY_CNT) {
		qp->s_retry_cnt = attr->retry_cnt;
		qp->s_retry = attr->retry_cnt;
	}

	if (attr_mask & IB_QP_RNR_RETRY) {
		qp->s_rnr_retry_cnt = attr->rnr_retry;
		qp->s_rnr_retry = attr->rnr_retry;
	}

	if (attr_mask & IB_QP_MIN_RNR_TIMER)
		qp->r_min_rnr_timer = attr->min_rnr_timer;

	if (attr_mask & IB_QP_TIMEOUT) {
		qp->timeout = attr->timeout;
		qp->timeout_jiffies =
			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
				1000UL);
	}

	if (attr_mask & IB_QP_QKEY)
		qp->qkey = attr->qkey;

	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
		qp->r_max_rd_atomic = attr->max_dest_rd_atomic;

	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
		qp->s_max_rd_atomic = attr->max_rd_atomic;

	spin_unlock(&qp->s_lock);
	spin_unlock_irq(&qp->r_lock);

	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
		insert_qp(dev, qp);

	if (lastwqe) {
		ev.device = qp->ibqp.device;
		ev.element.qp = &qp->ibqp;
		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
	}
	if (mig) {
		ev.device = qp->ibqp.device;
		ev.element.qp = &qp->ibqp;
		ev.event = IB_EVENT_PATH_MIG;
		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
	}
	ret = 0;
	goto bail;

inval:
	spin_unlock(&qp->s_lock);
	spin_unlock_irq(&qp->r_lock);
	ret = -EINVAL;

bail:
	return ret;
}
Пример #18
0
/**
 * qib_modify_srq - modify a shared receive queue
 * @ibsrq: the SRQ to modify
 * @attr: the new attributes of the SRQ
 * @attr_mask: indicates which attributes to modify
 * @udata: user data for libibverbs.so
 */
int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
		   enum ib_srq_attr_mask attr_mask,
		   struct ib_udata *udata)
{
	struct qib_srq *srq = to_isrq(ibsrq);
	struct qib_rwq *wq;
	int ret = 0;

	if (attr_mask & IB_SRQ_MAX_WR) {
		struct qib_rwq *owq;
		struct qib_rwqe *p;
		u32 sz, size, n, head, tail;

		/* Check that the requested sizes are below the limits. */
		if ((attr->max_wr > ib_qib_max_srq_wrs) ||
		    ((attr_mask & IB_SRQ_LIMIT) ?
		     attr->srq_limit : srq->limit) > attr->max_wr) {
			ret = -EINVAL;
			goto bail;
		}

		sz = sizeof(struct qib_rwqe) +
			srq->rq.max_sge * sizeof(struct ib_sge);
		size = attr->max_wr + 1;
		wq = vmalloc_user(sizeof(struct qib_rwq) + size * sz);
		if (!wq) {
			ret = -ENOMEM;
			goto bail;
		}

		/* Check that we can write the offset to mmap. */
		if (udata && udata->inlen >= sizeof(__u64)) {
			__u64 offset_addr;
			__u64 offset = 0;

			ret = ib_copy_from_udata(&offset_addr, udata,
						 sizeof(offset_addr));
			if (ret)
				goto bail_free;
			udata->outbuf =
				(void __user *) (unsigned long) offset_addr;
			ret = ib_copy_to_udata(udata, &offset,
					       sizeof(offset));
			if (ret)
				goto bail_free;
		}

		spin_lock_irq(&srq->rq.lock);
		/*
		 * validate head and tail pointer values and compute
		 * the number of remaining WQEs.
		 */
		owq = srq->rq.wq;
		head = owq->head;
		tail = owq->tail;
		if (head >= srq->rq.size || tail >= srq->rq.size) {
			ret = -EINVAL;
			goto bail_unlock;
		}
		n = head;
		if (n < tail)
			n += srq->rq.size - tail;
		else
			n -= tail;
		if (size <= n) {
			ret = -EINVAL;
			goto bail_unlock;
		}
		n = 0;
		p = wq->wq;
		while (tail != head) {
			struct qib_rwqe *wqe;
			int i;

			wqe = get_rwqe_ptr(&srq->rq, tail);
			p->wr_id = wqe->wr_id;
			p->num_sge = wqe->num_sge;
			for (i = 0; i < wqe->num_sge; i++)
				p->sg_list[i] = wqe->sg_list[i];
			n++;
			p = (struct qib_rwqe *)((char *) p + sz);
			if (++tail >= srq->rq.size)
				tail = 0;
		}
		srq->rq.wq = wq;
		srq->rq.size = size;
		wq->head = n;
		wq->tail = 0;
		if (attr_mask & IB_SRQ_LIMIT)
			srq->limit = attr->srq_limit;
		spin_unlock_irq(&srq->rq.lock);

		vfree(owq);

		if (srq->ip) {
			struct qib_mmap_info *ip = srq->ip;
			struct qib_ibdev *dev = to_idev(srq->ibsrq.device);
			u32 s = sizeof(struct qib_rwq) + size * sz;

			qib_update_mmap_info(dev, ip, s, wq);

			/*
			 * Return the offset to mmap.
			 * See qib_mmap() for details.
			 */
			if (udata && udata->inlen >= sizeof(__u64)) {
				ret = ib_copy_to_udata(udata, &ip->offset,
						       sizeof(ip->offset));
				if (ret)
					goto bail;
			}

			/*
			 * Put user mapping info onto the pending list
			 * unless it already is on the list.
			 */
			spin_lock_irq(&dev->pending_lock);
			if (list_empty(&ip->pending_mmaps))
				list_add(&ip->pending_mmaps,
					 &dev->pending_mmaps);
			spin_unlock_irq(&dev->pending_lock);
		}
	} else if (attr_mask & IB_SRQ_LIMIT) {
		spin_lock_irq(&srq->rq.lock);
		if (attr->srq_limit >= srq->rq.size)
			ret = -EINVAL;
		else
			srq->limit = attr->srq_limit;
		spin_unlock_irq(&srq->rq.lock);
	}
	goto bail;

bail_unlock:
	spin_unlock_irq(&srq->rq.lock);
bail_free:
	vfree(wq);
bail:
	return ret;
}
Пример #19
0
static void ipath_ruc_loopback(struct ipath_qp *sqp)
{
	struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
	struct ipath_qp *qp;
	struct ipath_swqe *wqe;
	struct ipath_sge *sge;
	unsigned long flags;
	struct ib_wc wc;
	u64 sdata;
	atomic64_t *maddr;
	enum ib_wc_status send_status;

	
	qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);

	spin_lock_irqsave(&sqp->s_lock, flags);

	
	if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
	    !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
		goto unlock;

	sqp->s_flags |= IPATH_S_BUSY;

again:
	if (sqp->s_last == sqp->s_head)
		goto clr_busy;
	wqe = get_swqe_ptr(sqp, sqp->s_last);

	
	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
		if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
			goto clr_busy;
		
		send_status = IB_WC_WR_FLUSH_ERR;
		goto flush_send;
	}

	
	if (sqp->s_last == sqp->s_cur) {
		if (++sqp->s_cur >= sqp->s_size)
			sqp->s_cur = 0;
	}
	spin_unlock_irqrestore(&sqp->s_lock, flags);

	if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
		dev->n_pkt_drops++;
		
		if (sqp->ibqp.qp_type == IB_QPT_RC)
			send_status = IB_WC_RETRY_EXC_ERR;
		else
			send_status = IB_WC_SUCCESS;
		goto serr;
	}

	memset(&wc, 0, sizeof wc);
	send_status = IB_WC_SUCCESS;

	sqp->s_sge.sge = wqe->sg_list[0];
	sqp->s_sge.sg_list = wqe->sg_list + 1;
	sqp->s_sge.num_sge = wqe->wr.num_sge;
	sqp->s_len = wqe->length;
	switch (wqe->wr.opcode) {
	case IB_WR_SEND_WITH_IMM:
		wc.wc_flags = IB_WC_WITH_IMM;
		wc.ex.imm_data = wqe->wr.ex.imm_data;
		
	case IB_WR_SEND:
		if (!ipath_get_rwqe(qp, 0))
			goto rnr_nak;
		break;

	case IB_WR_RDMA_WRITE_WITH_IMM:
		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
			goto inv_err;
		wc.wc_flags = IB_WC_WITH_IMM;
		wc.ex.imm_data = wqe->wr.ex.imm_data;
		if (!ipath_get_rwqe(qp, 1))
			goto rnr_nak;
		
	case IB_WR_RDMA_WRITE:
		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
			goto inv_err;
		if (wqe->length == 0)
			break;
		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
					    wqe->wr.wr.rdma.remote_addr,
					    wqe->wr.wr.rdma.rkey,
					    IB_ACCESS_REMOTE_WRITE)))
			goto acc_err;
		break;

	case IB_WR_RDMA_READ:
		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
			goto inv_err;
		if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
					    wqe->wr.wr.rdma.remote_addr,
					    wqe->wr.wr.rdma.rkey,
					    IB_ACCESS_REMOTE_READ)))
			goto acc_err;
		qp->r_sge.sge = wqe->sg_list[0];
		qp->r_sge.sg_list = wqe->sg_list + 1;
		qp->r_sge.num_sge = wqe->wr.num_sge;
		break;

	case IB_WR_ATOMIC_CMP_AND_SWP:
	case IB_WR_ATOMIC_FETCH_AND_ADD:
		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
			goto inv_err;
		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
					    wqe->wr.wr.atomic.remote_addr,
					    wqe->wr.wr.atomic.rkey,
					    IB_ACCESS_REMOTE_ATOMIC)))
			goto acc_err;
		
		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
		sdata = wqe->wr.wr.atomic.compare_add;
		*(u64 *) sqp->s_sge.sge.vaddr =
			(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
			(u64) atomic64_add_return(sdata, maddr) - sdata :
			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
				      sdata, wqe->wr.wr.atomic.swap);
		goto send_comp;

	default:
		send_status = IB_WC_LOC_QP_OP_ERR;
		goto serr;
	}

	sge = &sqp->s_sge.sge;
	while (sqp->s_len) {
		u32 len = sqp->s_len;

		if (len > sge->length)
			len = sge->length;
		if (len > sge->sge_length)
			len = sge->sge_length;
		BUG_ON(len == 0);
		ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
		sge->vaddr += len;
		sge->length -= len;
		sge->sge_length -= len;
		if (sge->sge_length == 0) {
			if (--sqp->s_sge.num_sge)
				*sge = *sqp->s_sge.sg_list++;
		} else if (sge->length == 0 && sge->mr != NULL) {
			if (++sge->n >= IPATH_SEGSZ) {
				if (++sge->m >= sge->mr->mapsz)
					break;
				sge->n = 0;
			}
			sge->vaddr =
				sge->mr->map[sge->m]->segs[sge->n].vaddr;
			sge->length =
				sge->mr->map[sge->m]->segs[sge->n].length;
		}
		sqp->s_len -= len;
	}

	if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
		goto send_comp;

	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
	else
		wc.opcode = IB_WC_RECV;
	wc.wr_id = qp->r_wr_id;
	wc.status = IB_WC_SUCCESS;
	wc.byte_len = wqe->length;
	wc.qp = &qp->ibqp;
	wc.src_qp = qp->remote_qpn;
	wc.slid = qp->remote_ah_attr.dlid;
	wc.sl = qp->remote_ah_attr.sl;
	wc.port_num = 1;
	
	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
		       wqe->wr.send_flags & IB_SEND_SOLICITED);

send_comp:
	spin_lock_irqsave(&sqp->s_lock, flags);
flush_send:
	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
	ipath_send_complete(sqp, wqe, send_status);
	goto again;

rnr_nak:
	
	if (qp->ibqp.qp_type == IB_QPT_UC)
		goto send_comp;
	
	if (sqp->s_rnr_retry == 0) {
		send_status = IB_WC_RNR_RETRY_EXC_ERR;
		goto serr;
	}
	if (sqp->s_rnr_retry_cnt < 7)
		sqp->s_rnr_retry--;
	spin_lock_irqsave(&sqp->s_lock, flags);
	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
		goto clr_busy;
	sqp->s_flags |= IPATH_S_WAITING;
	dev->n_rnr_naks++;
	sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
	ipath_insert_rnr_queue(sqp);
	goto clr_busy;

inv_err:
	send_status = IB_WC_REM_INV_REQ_ERR;
	wc.status = IB_WC_LOC_QP_OP_ERR;
	goto err;

acc_err:
	send_status = IB_WC_REM_ACCESS_ERR;
	wc.status = IB_WC_LOC_PROT_ERR;
err:
	
	ipath_rc_error(qp, wc.status);

serr:
	spin_lock_irqsave(&sqp->s_lock, flags);
	ipath_send_complete(sqp, wqe, send_status);
	if (sqp->ibqp.qp_type == IB_QPT_RC) {
		int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR);

		sqp->s_flags &= ~IPATH_S_BUSY;
		spin_unlock_irqrestore(&sqp->s_lock, flags);
		if (lastwqe) {
			struct ib_event ev;

			ev.device = sqp->ibqp.device;
			ev.element.qp = &sqp->ibqp;
			ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
			sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
		}
		goto done;
	}
clr_busy:
	sqp->s_flags &= ~IPATH_S_BUSY;
unlock:
	spin_unlock_irqrestore(&sqp->s_lock, flags);
done:
	if (qp && atomic_dec_and_test(&qp->refcount))
		wake_up(&qp->wait);
}
Пример #20
0
int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
{
	unsigned long flags;
	u32 r;
	u32 n;
	int ret = 0;
	struct qib_ibdev *dev = to_idev(mr->pd->device);
	struct qib_lkey_table *rkt = &dev->lk_table;

	spin_lock_irqsave(&rkt->lock, flags);

	/* special case for dma_mr lkey == 0 */
	if (dma_region) {
		struct qib_mregion *tmr;

		tmr = rcu_dereference(dev->dma_mr);
		if (!tmr) {
			qib_get_mr(mr);
			rcu_assign_pointer(dev->dma_mr, mr);
			mr->lkey_published = 1;
		}
		goto success;
	}

	/* Find the next available LKEY */
	r = rkt->next;
	n = r;
	for (;;) {
		if (rkt->table[r] == NULL)
			break;
		r = (r + 1) & (rkt->max - 1);
		if (r == n) {
			qib_dbg("LKEY table full\n");
			goto bail;
		}
	}
	rkt->next = (r + 1) & (rkt->max - 1);
	/*
	 * Make sure lkey is never zero which is reserved to indicate an
	 * unrestricted LKEY.
	 */
	rkt->gen++;
	mr->lkey = (r << (32 - ib_qib_lkey_table_size)) |
		((((1 << (24 - ib_qib_lkey_table_size)) - 1) & rkt->gen)
		 << 8);
	if (mr->lkey == 0) {
		mr->lkey |= 1 << 8;
		rkt->gen++;
	}
	qib_get_mr(mr);
	rcu_assign_pointer(rkt->table[r], mr);
	mr->lkey_published = 1;
success:
	spin_unlock_irqrestore(&rkt->lock, flags);
out:
	return ret;
bail:
	spin_unlock_irqrestore(&rkt->lock, flags);
	ret = -ENOMEM;
	goto out;
}
Пример #21
0
/**
 * qib_create_srq - create a shared receive queue
 * @ibpd: the protection domain of the SRQ to create
 * @srq_init_attr: the attributes of the SRQ
 * @udata: data from libibverbs when creating a user SRQ
 */
struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
			      struct ib_srq_init_attr *srq_init_attr,
			      struct ib_udata *udata)
{
	struct qib_ibdev *dev = to_idev(ibpd->device);
	struct qib_srq *srq;
	u32 sz;
	struct ib_srq *ret;

	if (srq_init_attr->attr.max_sge == 0 ||
	    srq_init_attr->attr.max_sge > ib_qib_max_srq_sges ||
	    srq_init_attr->attr.max_wr == 0 ||
	    srq_init_attr->attr.max_wr > ib_qib_max_srq_wrs) {
		ret = ERR_PTR(-EINVAL);
		goto done;
	}

	srq = kmalloc(sizeof(*srq), GFP_KERNEL);
	if (!srq) {
		ret = ERR_PTR(-ENOMEM);
		goto done;
	}

	/*
	 * Need to use vmalloc() if we want to support large #s of entries.
	 */
	srq->rq.size = srq_init_attr->attr.max_wr + 1;
	srq->rq.max_sge = srq_init_attr->attr.max_sge;
	sz = sizeof(struct ib_sge) * srq->rq.max_sge +
		sizeof(struct qib_rwqe);
	srq->rq.wq = vmalloc_user(sizeof(struct qib_rwq) + srq->rq.size * sz);
	if (!srq->rq.wq) {
		ret = ERR_PTR(-ENOMEM);
		goto bail_srq;
	}

	/*
	 * Return the address of the RWQ as the offset to mmap.
	 * See qib_mmap() for details.
	 */
	if (udata && udata->outlen >= sizeof(__u64)) {
		int err;
		u32 s = sizeof(struct qib_rwq) + srq->rq.size * sz;

		srq->ip =
		    qib_create_mmap_info(dev, s, ibpd->uobject->context,
					 srq->rq.wq);
		if (!srq->ip) {
			ret = ERR_PTR(-ENOMEM);
			goto bail_wq;
		}

		err = ib_copy_to_udata(udata, &srq->ip->offset,
				       sizeof(srq->ip->offset));
		if (err) {
			ret = ERR_PTR(err);
			goto bail_ip;
		}
	} else
		srq->ip = NULL;

	/*
	 * ib_create_srq() will initialize srq->ibsrq.
	 */
	spin_lock_init(&srq->rq.lock);
	srq->rq.wq->head = 0;
	srq->rq.wq->tail = 0;
	srq->limit = srq_init_attr->attr.srq_limit;

	spin_lock(&dev->n_srqs_lock);
	if (dev->n_srqs_allocated == ib_qib_max_srqs) {
		spin_unlock(&dev->n_srqs_lock);
		ret = ERR_PTR(-ENOMEM);
		goto bail_ip;
	}

	dev->n_srqs_allocated++;
	spin_unlock(&dev->n_srqs_lock);

	if (srq->ip) {
		spin_lock_irq(&dev->pending_lock);
		list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
		spin_unlock_irq(&dev->pending_lock);
	}

	ret = &srq->ibsrq;
	goto done;

bail_ip:
	kfree(srq->ip);
bail_wq:
	vfree(srq->rq.wq);
bail_srq:
	kfree(srq);
done:
	return ret;
}
Пример #22
0
/**
 * ipath_ruc_loopback - handle UC and RC lookback requests
 * @sqp: the loopback QP
 * @wc: the work completion entry
 *
 * This is called from ipath_do_uc_send() or ipath_do_rc_send() to
 * forward a WQE addressed to the same HCA.
 * Note that although we are single threaded due to the tasklet, we still
 * have to protect against post_send().  We don't have to worry about
 * receive interrupts since this is a connected protocol and all packets
 * will pass through here.
 */
void ipath_ruc_loopback(struct ipath_qp *sqp, struct ib_wc *wc)
{
	struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
	struct ipath_qp *qp;
	struct ipath_swqe *wqe;
	struct ipath_sge *sge;
	unsigned long flags;
	u64 sdata;

	qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
	if (!qp) {
		dev->n_pkt_drops++;
		return;
	}

again:
	spin_lock_irqsave(&sqp->s_lock, flags);

	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK)) {
		spin_unlock_irqrestore(&sqp->s_lock, flags);
		goto done;
	}

	/* Get the next send request. */
	if (sqp->s_last == sqp->s_head) {
		/* Send work queue is empty. */
		spin_unlock_irqrestore(&sqp->s_lock, flags);
		goto done;
	}

	/*
	 * We can rely on the entry not changing without the s_lock
	 * being held until we update s_last.
	 */
	wqe = get_swqe_ptr(sqp, sqp->s_last);
	spin_unlock_irqrestore(&sqp->s_lock, flags);

	wc->wc_flags = 0;
	wc->imm_data = 0;

	sqp->s_sge.sge = wqe->sg_list[0];
	sqp->s_sge.sg_list = wqe->sg_list + 1;
	sqp->s_sge.num_sge = wqe->wr.num_sge;
	sqp->s_len = wqe->length;
	switch (wqe->wr.opcode) {
	case IB_WR_SEND_WITH_IMM:
		wc->wc_flags = IB_WC_WITH_IMM;
		wc->imm_data = wqe->wr.imm_data;
		/* FALLTHROUGH */
	case IB_WR_SEND:
		spin_lock_irqsave(&qp->r_rq.lock, flags);
		if (!ipath_get_rwqe(qp, 0)) {
		rnr_nak:
			spin_unlock_irqrestore(&qp->r_rq.lock, flags);
			/* Handle RNR NAK */
			if (qp->ibqp.qp_type == IB_QPT_UC)
				goto send_comp;
			if (sqp->s_rnr_retry == 0) {
				wc->status = IB_WC_RNR_RETRY_EXC_ERR;
				goto err;
			}
			if (sqp->s_rnr_retry_cnt < 7)
				sqp->s_rnr_retry--;
			dev->n_rnr_naks++;
			sqp->s_rnr_timeout =
				ib_ipath_rnr_table[sqp->s_min_rnr_timer];
			ipath_insert_rnr_queue(sqp);
			goto done;
		}
		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
		break;

	case IB_WR_RDMA_WRITE_WITH_IMM:
		wc->wc_flags = IB_WC_WITH_IMM;
		wc->imm_data = wqe->wr.imm_data;
		spin_lock_irqsave(&qp->r_rq.lock, flags);
		if (!ipath_get_rwqe(qp, 1))
			goto rnr_nak;
		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
		/* FALLTHROUGH */
	case IB_WR_RDMA_WRITE:
		if (wqe->length == 0)
			break;
		if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, wqe->length,
					    wqe->wr.wr.rdma.remote_addr,
					    wqe->wr.wr.rdma.rkey,
					    IB_ACCESS_REMOTE_WRITE))) {
		acc_err:
			wc->status = IB_WC_REM_ACCESS_ERR;
		err:
			wc->wr_id = wqe->wr.wr_id;
			wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
			wc->vendor_err = 0;
			wc->byte_len = 0;
			wc->qp_num = sqp->ibqp.qp_num;
			wc->src_qp = sqp->remote_qpn;
			wc->pkey_index = 0;
			wc->slid = sqp->remote_ah_attr.dlid;
			wc->sl = sqp->remote_ah_attr.sl;
			wc->dlid_path_bits = 0;
			wc->port_num = 0;
			ipath_sqerror_qp(sqp, wc);
			goto done;
		}
		break;

	case IB_WR_RDMA_READ:
		if (unlikely(!ipath_rkey_ok(dev, &sqp->s_sge, wqe->length,
					    wqe->wr.wr.rdma.remote_addr,
					    wqe->wr.wr.rdma.rkey,
					    IB_ACCESS_REMOTE_READ)))
			goto acc_err;
		if (unlikely(!(qp->qp_access_flags &
			       IB_ACCESS_REMOTE_READ)))
			goto acc_err;
		qp->r_sge.sge = wqe->sg_list[0];
		qp->r_sge.sg_list = wqe->sg_list + 1;
		qp->r_sge.num_sge = wqe->wr.num_sge;
		break;

	case IB_WR_ATOMIC_CMP_AND_SWP:
	case IB_WR_ATOMIC_FETCH_AND_ADD:
		if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, sizeof(u64),
					    wqe->wr.wr.rdma.remote_addr,
					    wqe->wr.wr.rdma.rkey,
					    IB_ACCESS_REMOTE_ATOMIC)))
			goto acc_err;
		/* Perform atomic OP and save result. */
		sdata = wqe->wr.wr.atomic.swap;
		spin_lock_irqsave(&dev->pending_lock, flags);
		qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
		if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
			*(u64 *) qp->r_sge.sge.vaddr =
				qp->r_atomic_data + sdata;
		else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add)
			*(u64 *) qp->r_sge.sge.vaddr = sdata;
		spin_unlock_irqrestore(&dev->pending_lock, flags);
		*(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data;
		goto send_comp;

	default:
		goto done;
	}

	sge = &sqp->s_sge.sge;
	while (sqp->s_len) {
		u32 len = sqp->s_len;

		if (len > sge->length)
			len = sge->length;
		BUG_ON(len == 0);
		ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
		sge->vaddr += len;
		sge->length -= len;
		sge->sge_length -= len;
		if (sge->sge_length == 0) {
			if (--sqp->s_sge.num_sge)
				*sge = *sqp->s_sge.sg_list++;
		} else if (sge->length == 0 && sge->mr != NULL) {
			if (++sge->n >= IPATH_SEGSZ) {
				if (++sge->m >= sge->mr->mapsz)
					break;
				sge->n = 0;
			}
			sge->vaddr =
				sge->mr->map[sge->m]->segs[sge->n].vaddr;
			sge->length =
				sge->mr->map[sge->m]->segs[sge->n].length;
		}
		sqp->s_len -= len;
	}

	if (wqe->wr.opcode == IB_WR_RDMA_WRITE ||
	    wqe->wr.opcode == IB_WR_RDMA_READ)
		goto send_comp;

	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
		wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
	else
		wc->opcode = IB_WC_RECV;
	wc->wr_id = qp->r_wr_id;
	wc->status = IB_WC_SUCCESS;
	wc->vendor_err = 0;
	wc->byte_len = wqe->length;
	wc->qp_num = qp->ibqp.qp_num;
	wc->src_qp = qp->remote_qpn;
	/* XXX do we know which pkey matched? Only needed for GSI. */
	wc->pkey_index = 0;
	wc->slid = qp->remote_ah_attr.dlid;
	wc->sl = qp->remote_ah_attr.sl;
	wc->dlid_path_bits = 0;
	/* Signal completion event if the solicited bit is set. */
	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc,
		       wqe->wr.send_flags & IB_SEND_SOLICITED);

send_comp:
	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;

	if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) ||
	    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
		wc->wr_id = wqe->wr.wr_id;
		wc->status = IB_WC_SUCCESS;
		wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
		wc->vendor_err = 0;
		wc->byte_len = wqe->length;
		wc->qp_num = sqp->ibqp.qp_num;
		wc->src_qp = 0;
		wc->pkey_index = 0;
		wc->slid = 0;
		wc->sl = 0;
		wc->dlid_path_bits = 0;
		wc->port_num = 0;
		ipath_cq_enter(to_icq(sqp->ibqp.send_cq), wc, 0);
	}

	/* Update s_last now that we are finished with the SWQE */
	spin_lock_irqsave(&sqp->s_lock, flags);
	if (++sqp->s_last >= sqp->s_size)
		sqp->s_last = 0;
	spin_unlock_irqrestore(&sqp->s_lock, flags);
	goto again;

done:
	if (atomic_dec_and_test(&qp->refcount))
		wake_up(&qp->wait);
}
Пример #23
0
/**
 * ipath_do_ruc_send - perform a send on an RC or UC QP
 * @data: contains a pointer to the QP
 *
 * Process entries in the send work queue until credit or queue is
 * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
 * Otherwise, after we drop the QP s_lock, two threads could send
 * packets out of order.
 */
void ipath_do_ruc_send(unsigned long data)
{
	struct ipath_qp *qp = (struct ipath_qp *)data;
	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
	unsigned long flags;
	u16 lrh0;
	u32 nwords;
	u32 extra_bytes;
	u32 bth0;
	u32 bth2;
	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
	struct ipath_other_headers *ohdr;

	if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
		goto bail;

	if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
		ipath_ruc_loopback(qp);
		goto clear;
	}

	ohdr = &qp->s_hdr.u.oth;
	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
		ohdr = &qp->s_hdr.u.l.oth;

again:
	/* Check for a constructed packet to be sent. */
	if (qp->s_hdrwords != 0) {
		/*
		 * If no PIO bufs are available, return.  An interrupt will
		 * call ipath_ib_piobufavail() when one is available.
		 */
		if (ipath_verbs_send(dev->dd, qp->s_hdrwords,
				     (u32 *) &qp->s_hdr, qp->s_cur_size,
				     qp->s_cur_sge)) {
			ipath_no_bufs_available(qp, dev);
			goto bail;
		}
		dev->n_unicast_xmit++;
		/* Record that we sent the packet and s_hdr is empty. */
		qp->s_hdrwords = 0;
	}

	/*
	 * The lock is needed to synchronize between setting
	 * qp->s_ack_state, resend timer, and post_send().
	 */
	spin_lock_irqsave(&qp->s_lock, flags);

	if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
	       ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
	       ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
		/*
		 * Clear the busy bit before unlocking to avoid races with
		 * adding new work queue items and then failing to process
		 * them.
		 */
		clear_bit(IPATH_S_BUSY, &qp->s_busy);
		spin_unlock_irqrestore(&qp->s_lock, flags);
		goto bail;
	}

	spin_unlock_irqrestore(&qp->s_lock, flags);

	/* Construct the header. */
	extra_bytes = (4 - qp->s_cur_size) & 3;
	nwords = (qp->s_cur_size + extra_bytes) >> 2;
	lrh0 = IPATH_LRH_BTH;
	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
		qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
						 &qp->remote_ah_attr.grh,
						 qp->s_hdrwords, nwords);
		lrh0 = IPATH_LRH_GRH;
	}
	lrh0 |= qp->remote_ah_attr.sl << 4;
	qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
	qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
				       SIZE_OF_CRC);
	qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
	bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
	bth0 |= extra_bytes << 20;
	ohdr->bth[0] = cpu_to_be32(bth0);
	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
	ohdr->bth[2] = cpu_to_be32(bth2);

	/* Check for more work to do. */
	goto again;

clear:
	clear_bit(IPATH_S_BUSY, &qp->s_busy);
bail:
	return;
}
Пример #24
0
int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
		    int attr_mask, struct ib_udata *udata)
{
	struct ipath_ibdev *dev = to_idev(ibqp->device);
	struct ipath_qp *qp = to_iqp(ibqp);
	enum ib_qp_state cur_state, new_state;
	int lastwqe = 0;
	int ret;

	spin_lock_irq(&qp->s_lock);

	cur_state = attr_mask & IB_QP_CUR_STATE ?
		attr->cur_qp_state : qp->state;
	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;

	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
				attr_mask))
		goto inval;

	if (attr_mask & IB_QP_AV) {
		if (attr->ah_attr.dlid == 0 ||
		    attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE)
			goto inval;

		if ((attr->ah_attr.ah_flags & IB_AH_GRH) &&
		    (attr->ah_attr.grh.sgid_index > 1))
			goto inval;
	}

	if (attr_mask & IB_QP_PKEY_INDEX)
		if (attr->pkey_index >= ipath_get_npkeys(dev->dd))
			goto inval;

	if (attr_mask & IB_QP_MIN_RNR_TIMER)
		if (attr->min_rnr_timer > 31)
			goto inval;

	if (attr_mask & IB_QP_PORT)
		if (attr->port_num == 0 ||
		    attr->port_num > ibqp->device->phys_port_cnt)
			goto inval;

	/*
                                                            
                                          
  */
	if ((attr_mask & IB_QP_PATH_MTU) &&
		(ib_mtu_enum_to_int(attr->path_mtu) == -1 ||
		(attr->path_mtu > IB_MTU_2048 && !ipath_mtu4096)))
		goto inval;

	if (attr_mask & IB_QP_PATH_MIG_STATE)
		if (attr->path_mig_state != IB_MIG_MIGRATED &&
		    attr->path_mig_state != IB_MIG_REARM)
			goto inval;

	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
		if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC)
			goto inval;

	switch (new_state) {
	case IB_QPS_RESET:
		if (qp->state != IB_QPS_RESET) {
			qp->state = IB_QPS_RESET;
			spin_lock(&dev->pending_lock);
			if (!list_empty(&qp->timerwait))
				list_del_init(&qp->timerwait);
			if (!list_empty(&qp->piowait))
				list_del_init(&qp->piowait);
			spin_unlock(&dev->pending_lock);
			qp->s_flags &= ~IPATH_S_ANY_WAIT;
			spin_unlock_irq(&qp->s_lock);
			/*                          */
			tasklet_kill(&qp->s_task);
			wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
			spin_lock_irq(&qp->s_lock);
		}
		ipath_reset_qp(qp, ibqp->qp_type);
		break;

	case IB_QPS_SQD:
		qp->s_draining = qp->s_last != qp->s_cur;
		qp->state = new_state;
		break;

	case IB_QPS_SQE:
		if (qp->ibqp.qp_type == IB_QPT_RC)
			goto inval;
		qp->state = new_state;
		break;

	case IB_QPS_ERR:
		lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
		break;

	default:
		qp->state = new_state;
		break;
	}

	if (attr_mask & IB_QP_PKEY_INDEX)
		qp->s_pkey_index = attr->pkey_index;

	if (attr_mask & IB_QP_DEST_QPN)
		qp->remote_qpn = attr->dest_qp_num;

	if (attr_mask & IB_QP_SQ_PSN) {
		qp->s_psn = qp->s_next_psn = attr->sq_psn;
		qp->s_last_psn = qp->s_next_psn - 1;
	}

	if (attr_mask & IB_QP_RQ_PSN)
		qp->r_psn = attr->rq_psn;

	if (attr_mask & IB_QP_ACCESS_FLAGS)
		qp->qp_access_flags = attr->qp_access_flags;

	if (attr_mask & IB_QP_AV) {
		qp->remote_ah_attr = attr->ah_attr;
		qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate);
	}

	if (attr_mask & IB_QP_PATH_MTU)
		qp->path_mtu = attr->path_mtu;

	if (attr_mask & IB_QP_RETRY_CNT)
		qp->s_retry = qp->s_retry_cnt = attr->retry_cnt;

	if (attr_mask & IB_QP_RNR_RETRY) {
		qp->s_rnr_retry = attr->rnr_retry;
		if (qp->s_rnr_retry > 7)
			qp->s_rnr_retry = 7;
		qp->s_rnr_retry_cnt = qp->s_rnr_retry;
	}

	if (attr_mask & IB_QP_MIN_RNR_TIMER)
		qp->r_min_rnr_timer = attr->min_rnr_timer;

	if (attr_mask & IB_QP_TIMEOUT)
		qp->timeout = attr->timeout;

	if (attr_mask & IB_QP_QKEY)
		qp->qkey = attr->qkey;

	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
		qp->r_max_rd_atomic = attr->max_dest_rd_atomic;

	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
		qp->s_max_rd_atomic = attr->max_rd_atomic;

	spin_unlock_irq(&qp->s_lock);

	if (lastwqe) {
		struct ib_event ev;

		ev.device = qp->ibqp.device;
		ev.element.qp = &qp->ibqp;
		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
	}
	ret = 0;
	goto bail;

inval:
	spin_unlock_irq(&qp->s_lock);
	ret = -EINVAL;

bail:
	return ret;
}
Пример #25
0
struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
			      struct ib_qp_init_attr *init_attr,
			      struct ib_udata *udata)
{
	struct ipath_qp *qp;
	int err;
	struct ipath_swqe *swq = NULL;
	struct ipath_ibdev *dev;
	size_t sz;
	size_t sg_list_sz;
	struct ib_qp *ret;

	if (init_attr->create_flags) {
		ret = ERR_PTR(-EINVAL);
		goto bail;
	}

	if (init_attr->cap.max_send_sge > ib_ipath_max_sges ||
	    init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs) {
		ret = ERR_PTR(-EINVAL);
		goto bail;
	}

	/*                                                        */
	if (!init_attr->srq) {
		if (init_attr->cap.max_recv_sge > ib_ipath_max_sges ||
		    init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) {
			ret = ERR_PTR(-EINVAL);
			goto bail;
		}
		if (init_attr->cap.max_send_sge +
		    init_attr->cap.max_send_wr +
		    init_attr->cap.max_recv_sge +
		    init_attr->cap.max_recv_wr == 0) {
			ret = ERR_PTR(-EINVAL);
			goto bail;
		}
	}

	switch (init_attr->qp_type) {
	case IB_QPT_UC:
	case IB_QPT_RC:
	case IB_QPT_UD:
	case IB_QPT_SMI:
	case IB_QPT_GSI:
		sz = sizeof(struct ipath_sge) *
			init_attr->cap.max_send_sge +
			sizeof(struct ipath_swqe);
		swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
		if (swq == NULL) {
			ret = ERR_PTR(-ENOMEM);
			goto bail;
		}
		sz = sizeof(*qp);
		sg_list_sz = 0;
		if (init_attr->srq) {
			struct ipath_srq *srq = to_isrq(init_attr->srq);

			if (srq->rq.max_sge > 1)
				sg_list_sz = sizeof(*qp->r_sg_list) *
					(srq->rq.max_sge - 1);
		} else if (init_attr->cap.max_recv_sge > 1)
			sg_list_sz = sizeof(*qp->r_sg_list) *
				(init_attr->cap.max_recv_sge - 1);
		qp = kmalloc(sz + sg_list_sz, GFP_KERNEL);
		if (!qp) {
			ret = ERR_PTR(-ENOMEM);
			goto bail_swq;
		}
		if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD ||
		    init_attr->qp_type == IB_QPT_SMI ||
		    init_attr->qp_type == IB_QPT_GSI)) {
			qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL);
			if (!qp->r_ud_sg_list) {
				ret = ERR_PTR(-ENOMEM);
				goto bail_qp;
			}
		} else
			qp->r_ud_sg_list = NULL;
		if (init_attr->srq) {
			sz = 0;
			qp->r_rq.size = 0;
			qp->r_rq.max_sge = 0;
			qp->r_rq.wq = NULL;
			init_attr->cap.max_recv_wr = 0;
			init_attr->cap.max_recv_sge = 0;
		} else {
			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
				sizeof(struct ipath_rwqe);
			qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
					      qp->r_rq.size * sz);
			if (!qp->r_rq.wq) {
				ret = ERR_PTR(-ENOMEM);
				goto bail_sg_list;
			}
		}

		/*
                                            
                                
   */
		spin_lock_init(&qp->s_lock);
		spin_lock_init(&qp->r_rq.lock);
		atomic_set(&qp->refcount, 0);
		init_waitqueue_head(&qp->wait);
		init_waitqueue_head(&qp->wait_dma);
		tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
		INIT_LIST_HEAD(&qp->piowait);
		INIT_LIST_HEAD(&qp->timerwait);
		qp->state = IB_QPS_RESET;
		qp->s_wq = swq;
		qp->s_size = init_attr->cap.max_send_wr + 1;
		qp->s_max_sge = init_attr->cap.max_send_sge;
		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
			qp->s_flags = IPATH_S_SIGNAL_REQ_WR;
		else
			qp->s_flags = 0;
		dev = to_idev(ibpd->device);
		err = ipath_alloc_qpn(&dev->qp_table, qp,
				      init_attr->qp_type);
		if (err) {
			ret = ERR_PTR(err);
			vfree(qp->r_rq.wq);
			goto bail_sg_list;
		}
		qp->ip = NULL;
		qp->s_tx = NULL;
		ipath_reset_qp(qp, init_attr->qp_type);
		break;

	default:
		/*                       */
		ret = ERR_PTR(-ENOSYS);
		goto bail;
	}

	init_attr->cap.max_inline_data = 0;

	/*
                                                        
                                 
  */
	if (udata && udata->outlen >= sizeof(__u64)) {
		if (!qp->r_rq.wq) {
			__u64 offset = 0;

			err = ib_copy_to_udata(udata, &offset,
					       sizeof(offset));
			if (err) {
				ret = ERR_PTR(err);
				goto bail_ip;
			}
		} else {
			u32 s = sizeof(struct ipath_rwq) +
				qp->r_rq.size * sz;

			qp->ip =
			    ipath_create_mmap_info(dev, s,
						   ibpd->uobject->context,
						   qp->r_rq.wq);
			if (!qp->ip) {
				ret = ERR_PTR(-ENOMEM);
				goto bail_ip;
			}

			err = ib_copy_to_udata(udata, &(qp->ip->offset),
					       sizeof(qp->ip->offset));
			if (err) {
				ret = ERR_PTR(err);
				goto bail_ip;
			}
		}
	}

	spin_lock(&dev->n_qps_lock);
	if (dev->n_qps_allocated == ib_ipath_max_qps) {
		spin_unlock(&dev->n_qps_lock);
		ret = ERR_PTR(-ENOMEM);
		goto bail_ip;
	}

	dev->n_qps_allocated++;
	spin_unlock(&dev->n_qps_lock);

	if (qp->ip) {
		spin_lock_irq(&dev->pending_lock);
		list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
		spin_unlock_irq(&dev->pending_lock);
	}

	ret = &qp->ibqp;
	goto bail;

bail_ip:
	if (qp->ip)
		kref_put(&qp->ip->ref, ipath_release_mmap_info);
	else
		vfree(qp->r_rq.wq);
	ipath_free_qp(&dev->qp_table, qp);
	free_qpn(&dev->qp_table, qp->ibqp.qp_num);
bail_sg_list:
	kfree(qp->r_ud_sg_list);
bail_qp:
	kfree(qp);
bail_swq:
	vfree(swq);
bail:
	return ret;
}
Пример #26
0
int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
{
	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
	struct ib_wc wc;
	int ret = 0;

	if (qp->state == IB_QPS_ERR)
		goto bail;

	qp->state = IB_QPS_ERR;

	spin_lock(&dev->pending_lock);
	if (!list_empty(&qp->timerwait))
		list_del_init(&qp->timerwait);
	if (!list_empty(&qp->piowait))
		list_del_init(&qp->piowait);
	spin_unlock(&dev->pending_lock);

	/*                                                            */
	if (qp->s_last != qp->s_head)
		ipath_schedule_send(qp);

	memset(&wc, 0, sizeof(wc));
	wc.qp = &qp->ibqp;
	wc.opcode = IB_WC_RECV;

	if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {
		wc.wr_id = qp->r_wr_id;
		wc.status = err;
		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
	}
	wc.status = IB_WC_WR_FLUSH_ERR;

	if (qp->r_rq.wq) {
		struct ipath_rwq *wq;
		u32 head;
		u32 tail;

		spin_lock(&qp->r_rq.lock);

		/*                                            */
		wq = qp->r_rq.wq;
		head = wq->head;
		if (head >= qp->r_rq.size)
			head = 0;
		tail = wq->tail;
		if (tail >= qp->r_rq.size)
			tail = 0;
		while (tail != head) {
			wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
			if (++tail >= qp->r_rq.size)
				tail = 0;
			ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
		}
		wq->tail = tail;

		spin_unlock(&qp->r_rq.lock);
	} else if (qp->ibqp.event_handler)
		ret = 1;

bail:
	return ret;
}
Пример #27
0
/**
 * ipath_post_rc_send - post RC and UC sends
 * @qp: the QP to post on
 * @wr: the work request to send
 */
int ipath_post_rc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
{
	struct ipath_swqe *wqe;
	unsigned long flags;
	u32 next;
	int i, j;
	int acc;
	int ret;

	/*
	 * Don't allow RDMA reads or atomic operations on UC or
	 * undefined operations.
	 * Make sure buffer is large enough to hold the result for atomics.
	 */
	if (qp->ibqp.qp_type == IB_QPT_UC) {
		if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) {
			ret = -EINVAL;
			goto bail;
		}
	} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) {
		ret = -EINVAL;
		goto bail;
	} else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
		   (wr->num_sge == 0 ||
		    wr->sg_list[0].length < sizeof(u64) ||
		    wr->sg_list[0].addr & (sizeof(u64) - 1))) {
		ret = -EINVAL;
		goto bail;
	}
	/* IB spec says that num_sge == 0 is OK. */
	if (wr->num_sge > qp->s_max_sge) {
		ret = -ENOMEM;
		goto bail;
	}
	spin_lock_irqsave(&qp->s_lock, flags);
	next = qp->s_head + 1;
	if (next >= qp->s_size)
		next = 0;
	if (next == qp->s_last) {
		spin_unlock_irqrestore(&qp->s_lock, flags);
		ret = -EINVAL;
		goto bail;
	}

	wqe = get_swqe_ptr(qp, qp->s_head);
	wqe->wr = *wr;
	wqe->ssn = qp->s_ssn++;
	wqe->sg_list[0].mr = NULL;
	wqe->sg_list[0].vaddr = NULL;
	wqe->sg_list[0].length = 0;
	wqe->sg_list[0].sge_length = 0;
	wqe->length = 0;
	acc = wr->opcode >= IB_WR_RDMA_READ ? IB_ACCESS_LOCAL_WRITE : 0;
	for (i = 0, j = 0; i < wr->num_sge; i++) {
		if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) {
			spin_unlock_irqrestore(&qp->s_lock, flags);
			ret = -EINVAL;
			goto bail;
		}
		if (wr->sg_list[i].length == 0)
			continue;
		if (!ipath_lkey_ok(&to_idev(qp->ibqp.device)->lk_table,
				   &wqe->sg_list[j], &wr->sg_list[i],
				   acc)) {
			spin_unlock_irqrestore(&qp->s_lock, flags);
			ret = -EINVAL;
			goto bail;
		}
		wqe->length += wr->sg_list[i].length;
		j++;
	}
	wqe->wr.num_sge = j;
	qp->s_head = next;
	spin_unlock_irqrestore(&qp->s_lock, flags);

	if (qp->ibqp.qp_type == IB_QPT_UC)
		ipath_do_uc_send((unsigned long) qp);
	else
		ipath_do_rc_send((unsigned long) qp);

	ret = 0;

bail:
	return ret;
}
Пример #28
0
/**
 * ipath_post_srq_receive - post a receive on a shared receive queue
 * @ibsrq: the SRQ to post the receive on
 * @wr: the list of work requests to post
 * @bad_wr: the first WR to cause a problem is put here
 *
 * This may be called from interrupt context.
 */
int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
			   struct ib_recv_wr **bad_wr)
{
	struct ipath_srq *srq = to_isrq(ibsrq);
	struct ipath_ibdev *dev = to_idev(ibsrq->device);
	unsigned long flags;
	int ret;

	for (; wr; wr = wr->next) {
		struct ipath_rwqe *wqe;
		u32 next;
		int i, j;

		if (wr->num_sge > srq->rq.max_sge) {
			*bad_wr = wr;
			ret = -ENOMEM;
			goto bail;
		}

		spin_lock_irqsave(&srq->rq.lock, flags);
		next = srq->rq.head + 1;
		if (next >= srq->rq.size)
			next = 0;
		if (next == srq->rq.tail) {
			spin_unlock_irqrestore(&srq->rq.lock, flags);
			*bad_wr = wr;
			ret = -ENOMEM;
			goto bail;
		}

		wqe = get_rwqe_ptr(&srq->rq, srq->rq.head);
		wqe->wr_id = wr->wr_id;
		wqe->sg_list[0].mr = NULL;
		wqe->sg_list[0].vaddr = NULL;
		wqe->sg_list[0].length = 0;
		wqe->sg_list[0].sge_length = 0;
		wqe->length = 0;
		for (i = 0, j = 0; i < wr->num_sge; i++) {
			/* Check LKEY */
			if (to_ipd(srq->ibsrq.pd)->user &&
			    wr->sg_list[i].lkey == 0) {
				spin_unlock_irqrestore(&srq->rq.lock,
						       flags);
				*bad_wr = wr;
				ret = -EINVAL;
				goto bail;
			}
			if (wr->sg_list[i].length == 0)
				continue;
			if (!ipath_lkey_ok(&dev->lk_table,
					   &wqe->sg_list[j],
					   &wr->sg_list[i],
					   IB_ACCESS_LOCAL_WRITE)) {
				spin_unlock_irqrestore(&srq->rq.lock,
						       flags);
				*bad_wr = wr;
				ret = -EINVAL;
				goto bail;
			}
			wqe->length += wr->sg_list[i].length;
			j++;
		}
		wqe->num_sge = j;
		srq->rq.head = next;
		spin_unlock_irqrestore(&srq->rq.lock, flags);
	}
	ret = 0;

bail:
	return ret;
}
Пример #29
0
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
{
	struct ipath_cq_wc *wc;
	unsigned long flags;
	u32 head;
	u32 next;

	spin_lock_irqsave(&cq->lock, flags);

	/*
	 * Note that the head pointer might be writable by user processes.
	 * Take care to verify it is a sane value.
	 */
	wc = cq->queue;
	head = wc->head;
	if (head >= (unsigned) cq->ibcq.cqe) {
		head = cq->ibcq.cqe;
		next = 0;
	} else
		next = head + 1;
	if (unlikely(next == wc->tail)) {
		spin_unlock_irqrestore(&cq->lock, flags);
		if (cq->ibcq.event_handler) {
			struct ib_event ev;

			ev.device = cq->ibcq.device;
			ev.element.cq = &cq->ibcq;
			ev.event = IB_EVENT_CQ_ERR;
			cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
		}
		return;
	}
	if (cq->ip) {
		wc->uqueue[head].wr_id = entry->wr_id;
		wc->uqueue[head].status = entry->status;
		wc->uqueue[head].opcode = entry->opcode;
		wc->uqueue[head].vendor_err = entry->vendor_err;
		wc->uqueue[head].byte_len = entry->byte_len;
		wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data;
		wc->uqueue[head].qp_num = entry->qp->qp_num;
		wc->uqueue[head].src_qp = entry->src_qp;
		wc->uqueue[head].wc_flags = entry->wc_flags;
		wc->uqueue[head].pkey_index = entry->pkey_index;
		wc->uqueue[head].slid = entry->slid;
		wc->uqueue[head].sl = entry->sl;
		wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
		wc->uqueue[head].port_num = entry->port_num;
		/* Make sure entry is written before the head index. */
		smp_wmb();
	} else
		wc->kqueue[head] = *entry;
	wc->head = next;

	if (cq->notify == IB_CQ_NEXT_COMP ||
	    (cq->notify == IB_CQ_SOLICITED && solicited)) {
		cq->notify = IB_CQ_NONE;
		cq->triggered++;
		/*
		 * This will cause send_complete() to be called in
		 * another thread.
		 */
		tasklet_hi_schedule(&cq->comptask);
	}

	spin_unlock_irqrestore(&cq->lock, flags);

	if (entry->status != IB_WC_SUCCESS)
		to_idev(cq->ibcq.device)->n_wqe_errs++;
}
Пример #30
0
/**
 * ipath_ruc_loopback - handle UC and RC lookback requests
 * @sqp: the sending QP
 *
 * This is called from ipath_do_send() to
 * forward a WQE addressed to the same HCA.
 * Note that although we are single threaded due to the tasklet, we still
 * have to protect against post_send().  We don't have to worry about
 * receive interrupts since this is a connected protocol and all packets
 * will pass through here.
 */
static void ipath_ruc_loopback(struct ipath_qp *sqp)
{
	struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
	struct ipath_qp *qp;
	struct ipath_swqe *wqe;
	struct ipath_sge *sge;
	unsigned long flags;
	struct ib_wc wc;
	u64 sdata;
	atomic64_t *maddr;
	enum ib_wc_status send_status;

	/*
	 * Note that we check the responder QP state after
	 * checking the requester's state.
	 */
	qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);

	spin_lock_irqsave(&sqp->s_lock, flags);

	/* Return if we are already busy processing a work request. */
	if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
	    !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
		goto unlock;

	sqp->s_flags |= IPATH_S_BUSY;

again:
	if (sqp->s_last == sqp->s_head)
		goto clr_busy;
	wqe = get_swqe_ptr(sqp, sqp->s_last);

	/* Return if it is not OK to start a new work reqeust. */
	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
		if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
			goto clr_busy;
		/* We are in the error state, flush the work request. */
		send_status = IB_WC_WR_FLUSH_ERR;
		goto flush_send;
	}

	/*
	 * We can rely on the entry not changing without the s_lock
	 * being held until we update s_last.
	 * We increment s_cur to indicate s_last is in progress.
	 */
	if (sqp->s_last == sqp->s_cur) {
		if (++sqp->s_cur >= sqp->s_size)
			sqp->s_cur = 0;
	}
	spin_unlock_irqrestore(&sqp->s_lock, flags);

	if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
		dev->n_pkt_drops++;
		/*
		 * For RC, the requester would timeout and retry so
		 * shortcut the timeouts and just signal too many retries.
		 */
		if (sqp->ibqp.qp_type == IB_QPT_RC)
			send_status = IB_WC_RETRY_EXC_ERR;
		else
			send_status = IB_WC_SUCCESS;
		goto serr;
	}

	memset(&wc, 0, sizeof wc);
	send_status = IB_WC_SUCCESS;

	sqp->s_sge.sge = wqe->sg_list[0];
	sqp->s_sge.sg_list = wqe->sg_list + 1;
	sqp->s_sge.num_sge = wqe->wr.num_sge;
	sqp->s_len = wqe->length;
	switch (wqe->wr.opcode) {
	case IB_WR_SEND_WITH_IMM:
		wc.wc_flags = IB_WC_WITH_IMM;
		wc.imm_data = wqe->wr.ex.imm_data;
		/* FALLTHROUGH */
	case IB_WR_SEND:
		if (!ipath_get_rwqe(qp, 0))
			goto rnr_nak;
		break;

	case IB_WR_RDMA_WRITE_WITH_IMM:
		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
			goto inv_err;
		wc.wc_flags = IB_WC_WITH_IMM;
		wc.imm_data = wqe->wr.ex.imm_data;
		if (!ipath_get_rwqe(qp, 1))
			goto rnr_nak;
		/* FALLTHROUGH */
	case IB_WR_RDMA_WRITE:
		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
			goto inv_err;
		if (wqe->length == 0)
			break;
		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
					    wqe->wr.wr.rdma.remote_addr,
					    wqe->wr.wr.rdma.rkey,
					    IB_ACCESS_REMOTE_WRITE)))
			goto acc_err;
		break;

	case IB_WR_RDMA_READ:
		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
			goto inv_err;
		if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
					    wqe->wr.wr.rdma.remote_addr,
					    wqe->wr.wr.rdma.rkey,
					    IB_ACCESS_REMOTE_READ)))
			goto acc_err;
		qp->r_sge.sge = wqe->sg_list[0];
		qp->r_sge.sg_list = wqe->sg_list + 1;
		qp->r_sge.num_sge = wqe->wr.num_sge;
		break;

	case IB_WR_ATOMIC_CMP_AND_SWP:
	case IB_WR_ATOMIC_FETCH_AND_ADD:
		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
			goto inv_err;
		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
					    wqe->wr.wr.atomic.remote_addr,
					    wqe->wr.wr.atomic.rkey,
					    IB_ACCESS_REMOTE_ATOMIC)))
			goto acc_err;
		/* Perform atomic OP and save result. */
		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
		sdata = wqe->wr.wr.atomic.compare_add;
		*(u64 *) sqp->s_sge.sge.vaddr =
			(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
			(u64) atomic64_add_return(sdata, maddr) - sdata :
			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
				      sdata, wqe->wr.wr.atomic.swap);
		goto send_comp;

	default:
		send_status = IB_WC_LOC_QP_OP_ERR;
		goto serr;
	}

	sge = &sqp->s_sge.sge;
	while (sqp->s_len) {
		u32 len = sqp->s_len;

		if (len > sge->length)
			len = sge->length;
		if (len > sge->sge_length)
			len = sge->sge_length;
		BUG_ON(len == 0);
		ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
		sge->vaddr += len;
		sge->length -= len;
		sge->sge_length -= len;
		if (sge->sge_length == 0) {
			if (--sqp->s_sge.num_sge)
				*sge = *sqp->s_sge.sg_list++;
		} else if (sge->length == 0 && sge->mr != NULL) {
			if (++sge->n >= IPATH_SEGSZ) {
				if (++sge->m >= sge->mr->mapsz)
					break;
				sge->n = 0;
			}
			sge->vaddr =
				sge->mr->map[sge->m]->segs[sge->n].vaddr;
			sge->length =
				sge->mr->map[sge->m]->segs[sge->n].length;
		}
		sqp->s_len -= len;
	}

	if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
		goto send_comp;

	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
	else
		wc.opcode = IB_WC_RECV;
	wc.wr_id = qp->r_wr_id;
	wc.status = IB_WC_SUCCESS;
	wc.byte_len = wqe->length;
	wc.qp = &qp->ibqp;
	wc.src_qp = qp->remote_qpn;
	wc.slid = qp->remote_ah_attr.dlid;
	wc.sl = qp->remote_ah_attr.sl;
	wc.port_num = 1;
	/* Signal completion event if the solicited bit is set. */
	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
		       wqe->wr.send_flags & IB_SEND_SOLICITED);

send_comp:
	spin_lock_irqsave(&sqp->s_lock, flags);
flush_send:
	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
	ipath_send_complete(sqp, wqe, send_status);
	goto again;

rnr_nak:
	/* Handle RNR NAK */
	if (qp->ibqp.qp_type == IB_QPT_UC)
		goto send_comp;
	/*
	 * Note: we don't need the s_lock held since the BUSY flag
	 * makes this single threaded.
	 */
	if (sqp->s_rnr_retry == 0) {
		send_status = IB_WC_RNR_RETRY_EXC_ERR;
		goto serr;
	}
	if (sqp->s_rnr_retry_cnt < 7)
		sqp->s_rnr_retry--;
	spin_lock_irqsave(&sqp->s_lock, flags);
	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
		goto clr_busy;
	sqp->s_flags |= IPATH_S_WAITING;
	dev->n_rnr_naks++;
	sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
	ipath_insert_rnr_queue(sqp);
	goto clr_busy;

inv_err:
	send_status = IB_WC_REM_INV_REQ_ERR;
	wc.status = IB_WC_LOC_QP_OP_ERR;
	goto err;

acc_err:
	send_status = IB_WC_REM_ACCESS_ERR;
	wc.status = IB_WC_LOC_PROT_ERR;
err:
	/* responder goes to error state */
	ipath_rc_error(qp, wc.status);

serr:
	spin_lock_irqsave(&sqp->s_lock, flags);
	ipath_send_complete(sqp, wqe, send_status);
	if (sqp->ibqp.qp_type == IB_QPT_RC) {
		int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR);

		sqp->s_flags &= ~IPATH_S_BUSY;
		spin_unlock_irqrestore(&sqp->s_lock, flags);
		if (lastwqe) {
			struct ib_event ev;

			ev.device = sqp->ibqp.device;
			ev.element.qp = &sqp->ibqp;
			ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
			sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
		}
		goto done;
	}
clr_busy:
	sqp->s_flags &= ~IPATH_S_BUSY;
unlock:
	spin_unlock_irqrestore(&sqp->s_lock, flags);
done:
	if (qp && atomic_dec_and_test(&qp->refcount))
		wake_up(&qp->wait);
}