コード例 #1
0
ファイル: mthca_qp.c プロジェクト: BackupTheBerlios/tuxap
int mthca_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
		    struct ib_send_wr **bad_wr)
{
	struct mthca_dev *dev = to_mdev(ibqp->device);
	struct mthca_qp *qp = to_mqp(ibqp);
	void *wqe;
	void *prev_wqe;
	unsigned long flags;
	int err = 0;
	int nreq;
	int i;
	int size;
	int size0 = 0;
	u32 f0 = 0;
	int ind;
	u8 op0 = 0;

	static const u8 opcode[] = {
		[IB_WR_SEND]                 = MTHCA_OPCODE_SEND,
		[IB_WR_SEND_WITH_IMM]        = MTHCA_OPCODE_SEND_IMM,
		[IB_WR_RDMA_WRITE]           = MTHCA_OPCODE_RDMA_WRITE,
		[IB_WR_RDMA_WRITE_WITH_IMM]  = MTHCA_OPCODE_RDMA_WRITE_IMM,
		[IB_WR_RDMA_READ]            = MTHCA_OPCODE_RDMA_READ,
		[IB_WR_ATOMIC_CMP_AND_SWP]   = MTHCA_OPCODE_ATOMIC_CS,
		[IB_WR_ATOMIC_FETCH_AND_ADD] = MTHCA_OPCODE_ATOMIC_FA,
	};
コード例 #2
0
/*
 * Experimental functions
 */
struct ib_qp *mlx4_ib_exp_create_qp(struct ib_pd *pd,
				    struct ib_exp_qp_init_attr *init_attr,
				    struct ib_udata *udata)
{
	int rwqe_size;
	struct ib_qp *qp;
	struct mlx4_ib_qp *mqp;
	int use_inlr;
	struct mlx4_ib_dev *dev;

	if ((init_attr->create_flags & IB_QP_CREATE_ATOMIC_BE_REPLY) &&
	    mlx4_is_little_endian())
		return ERR_PTR(-EINVAL);

	if (init_attr->max_inl_recv && !udata)
		return ERR_PTR(-EINVAL);

	use_inlr = mlx4_ib_qp_has_rq((struct ib_qp_init_attr *)init_attr) &&
		   init_attr->max_inl_recv && pd;
	if (use_inlr) {
		rwqe_size = roundup_pow_of_two(max(1U, init_attr->cap.max_recv_sge)) *
					       sizeof(struct mlx4_wqe_data_seg);
		if (rwqe_size < init_attr->max_inl_recv) {
			dev = to_mdev(pd->device);
			init_attr->max_inl_recv = min(init_attr->max_inl_recv,
						      (u32)(dev->dev->caps.max_rq_sg *
						      sizeof(struct mlx4_wqe_data_seg)));
			init_attr->cap.max_recv_sge = roundup_pow_of_two(init_attr->max_inl_recv) /
						      sizeof(struct mlx4_wqe_data_seg);
		}
	} else {
		init_attr->max_inl_recv = 0;
	}
	qp = mlx4_ib_create_qp(pd, (struct ib_qp_init_attr *)init_attr, udata);
	if (IS_ERR(qp))
		return qp;

	if (use_inlr) {
		mqp = to_mqp(qp);
		mqp->max_inlr_data = 1 << mqp->rq.wqe_shift;
		init_attr->max_inl_recv = mqp->max_inlr_data;
	}

	return qp;
}
コード例 #3
0
ファイル: mthca_qp.c プロジェクト: BackupTheBerlios/tuxap
int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
{
	struct mthca_dev *dev = to_mdev(ibqp->device);
	struct mthca_qp *qp = to_mqp(ibqp);
	enum ib_qp_state cur_state, new_state;
	void *mailbox = NULL;
	struct mthca_qp_param *qp_param;
	struct mthca_qp_context *qp_context;
	u32 req_param, opt_param;
	u8 status;
	int err;

	if (attr_mask & IB_QP_CUR_STATE) {
		if (attr->cur_qp_state != IB_QPS_RTR &&
		    attr->cur_qp_state != IB_QPS_RTS &&
		    attr->cur_qp_state != IB_QPS_SQD &&
		    attr->cur_qp_state != IB_QPS_SQE)
			return -EINVAL;
		else
			cur_state = attr->cur_qp_state;
	} else {
		spin_lock_irq(&qp->lock);
		cur_state = qp->state;
		spin_unlock_irq(&qp->lock);
	}

	if (attr_mask & IB_QP_STATE) {
               if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR)
			return -EINVAL;
		new_state = attr->qp_state;
	} else
		new_state = cur_state;

	if (state_table[cur_state][new_state].trans == MTHCA_TRANS_INVALID) {
		mthca_dbg(dev, "Illegal QP transition "
			  "%d->%d\n", cur_state, new_state);
		return -EINVAL;
	}

	req_param = state_table[cur_state][new_state].req_param[qp->transport];
	opt_param = state_table[cur_state][new_state].opt_param[qp->transport];

	if ((req_param & attr_mask) != req_param) {
		mthca_dbg(dev, "QP transition "
			  "%d->%d missing req attr 0x%08x\n",
			  cur_state, new_state,
			  req_param & ~attr_mask);
		return -EINVAL;
	}

	if (attr_mask & ~(req_param | opt_param | IB_QP_STATE)) {
		mthca_dbg(dev, "QP transition (transport %d) "
			  "%d->%d has extra attr 0x%08x\n",
			  qp->transport,
			  cur_state, new_state,
			  attr_mask & ~(req_param | opt_param |
						 IB_QP_STATE));
		return -EINVAL;
	}

	mailbox = kmalloc(sizeof (*qp_param) + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
	if (!mailbox)
		return -ENOMEM;
	qp_param = MAILBOX_ALIGN(mailbox);
	qp_context = &qp_param->context;
	memset(qp_param, 0, sizeof *qp_param);

	qp_context->flags      = cpu_to_be32((to_mthca_state(new_state) << 28) |
					     (to_mthca_st(qp->transport) << 16));
	qp_context->flags     |= cpu_to_be32(MTHCA_QP_BIT_DE);
	if (!(attr_mask & IB_QP_PATH_MIG_STATE))
		qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11);
	else {
		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PM_STATE);
		switch (attr->path_mig_state) {
		case IB_MIG_MIGRATED:
			qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11);
			break;
		case IB_MIG_REARM:
			qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_REARM << 11);
			break;
		case IB_MIG_ARMED:
			qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_ARMED << 11);
			break;
		}
	}
	/* leave sched_queue as 0 */
	if (qp->transport == MLX || qp->transport == UD)
		qp_context->mtu_msgmax = cpu_to_be32((IB_MTU_2048 << 29) |
						     (11 << 24));
	else if (attr_mask & IB_QP_PATH_MTU) {
		qp_context->mtu_msgmax = cpu_to_be32((attr->path_mtu << 29) |
						     (31 << 24));
	}
	qp_context->usr_page   = cpu_to_be32(MTHCA_KAR_PAGE);
	qp_context->local_qpn  = cpu_to_be32(qp->qpn);
	if (attr_mask & IB_QP_DEST_QPN) {
		qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
	}

	if (qp->transport == MLX)
		qp_context->pri_path.port_pkey |=
			cpu_to_be32(to_msqp(qp)->port << 24);
	else {
		if (attr_mask & IB_QP_PORT) {
			qp_context->pri_path.port_pkey |=
				cpu_to_be32(attr->port_num << 24);
			qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PORT_NUM);
		}
	}

	if (attr_mask & IB_QP_PKEY_INDEX) {
		qp_context->pri_path.port_pkey |=
			cpu_to_be32(attr->pkey_index);
		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PKEY_INDEX);
	}

	if (attr_mask & IB_QP_RNR_RETRY) {
		qp_context->pri_path.rnr_retry = attr->rnr_retry << 5;
		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY);
	}

	if (attr_mask & IB_QP_AV) {
		qp_context->pri_path.g_mylmc     = attr->ah_attr.src_path_bits & 0x7f;
		qp_context->pri_path.rlid        = cpu_to_be16(attr->ah_attr.dlid);
		qp_context->pri_path.static_rate = (!!attr->ah_attr.static_rate) << 3;
		if (attr->ah_attr.ah_flags & IB_AH_GRH) {
			qp_context->pri_path.g_mylmc |= 1 << 7;
			qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index;
			qp_context->pri_path.hop_limit = attr->ah_attr.grh.hop_limit;
			qp_context->pri_path.sl_tclass_flowlabel =
				cpu_to_be32((attr->ah_attr.sl << 28)                |
					    (attr->ah_attr.grh.traffic_class << 20) |
					    (attr->ah_attr.grh.flow_label));
			memcpy(qp_context->pri_path.rgid,
			       attr->ah_attr.grh.dgid.raw, 16);
		} else {
			qp_context->pri_path.sl_tclass_flowlabel =
				cpu_to_be32(attr->ah_attr.sl << 28);
		}
		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
	}

	if (attr_mask & IB_QP_TIMEOUT) {
		qp_context->pri_path.ackto = attr->timeout;
		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT);
	}

	/* XXX alt_path */

	/* leave rdd as 0 */
	qp_context->pd         = cpu_to_be32(to_mpd(ibqp->pd)->pd_num);
	/* leave wqe_base as 0 (we always create an MR based at 0 for WQs) */
	qp_context->wqe_lkey   = cpu_to_be32(qp->mr.ibmr.lkey);
	qp_context->params1    = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) |
					     (MTHCA_FLIGHT_LIMIT << 24) |
					     MTHCA_QP_BIT_SRE           |
					     MTHCA_QP_BIT_SWE           |
					     MTHCA_QP_BIT_SAE);
	if (qp->sq.policy == IB_SIGNAL_ALL_WR)
		qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC);
	if (attr_mask & IB_QP_RETRY_CNT) {
		qp_context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT);
	}

	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
		qp_context->params1 |= cpu_to_be32(min(attr->max_dest_rd_atomic ?
						       ffs(attr->max_dest_rd_atomic) - 1 : 0,
						       7) << 21);
		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX);
	}

	if (attr_mask & IB_QP_SQ_PSN)
		qp_context->next_send_psn = cpu_to_be32(attr->sq_psn);
	qp_context->cqn_snd = cpu_to_be32(to_mcq(ibqp->send_cq)->cqn);

	if (attr_mask & IB_QP_ACCESS_FLAGS) {
		/*
		 * Only enable RDMA/atomics if we have responder
		 * resources set to a non-zero value.
		 */
		if (qp->resp_depth) {
			qp_context->params2 |=
				cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE ?
					    MTHCA_QP_BIT_RWE : 0);
			qp_context->params2 |=
				cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_READ ?
					    MTHCA_QP_BIT_RRE : 0);
			qp_context->params2 |=
				cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC ?
					    MTHCA_QP_BIT_RAE : 0);
		}

		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
							MTHCA_QP_OPTPAR_RRE |
							MTHCA_QP_OPTPAR_RAE);

		qp->atomic_rd_en = attr->qp_access_flags;
	}

	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
		u8 rra_max;

		if (qp->resp_depth && !attr->max_rd_atomic) {
			/*
			 * Lowering our responder resources to zero.
			 * Turn off RDMA/atomics as responder.
			 * (RWE/RRE/RAE in params2 already zero)
			 */
			qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
								MTHCA_QP_OPTPAR_RRE |
								MTHCA_QP_OPTPAR_RAE);
		}

		if (!qp->resp_depth && attr->max_rd_atomic) {
			/*
			 * Increasing our responder resources from
			 * zero.  Turn on RDMA/atomics as appropriate.
			 */
			qp_context->params2 |=
				cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_WRITE ?
					    MTHCA_QP_BIT_RWE : 0);
			qp_context->params2 |=
				cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_READ ?
					    MTHCA_QP_BIT_RRE : 0);
			qp_context->params2 |=
				cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_ATOMIC ?
					    MTHCA_QP_BIT_RAE : 0);

			qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
								MTHCA_QP_OPTPAR_RRE |
								MTHCA_QP_OPTPAR_RAE);
		}

		for (rra_max = 0;
		     1 << rra_max < attr->max_rd_atomic &&
			     rra_max < dev->qp_table.rdb_shift;
		     ++rra_max)
			; /* nothing */

		qp_context->params2      |= cpu_to_be32(rra_max << 21);
		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);

		qp->resp_depth = attr->max_rd_atomic;
	}

	if (qp->rq.policy == IB_SIGNAL_ALL_WR)
		qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);
	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
		qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_TIMEOUT);
	}
	if (attr_mask & IB_QP_RQ_PSN)
		qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);

	qp_context->ra_buff_indx = dev->qp_table.rdb_base +
		((qp->qpn & (dev->limits.num_qps - 1)) * MTHCA_RDB_ENTRY_SIZE <<
		 dev->qp_table.rdb_shift);

	qp_context->cqn_rcv = cpu_to_be32(to_mcq(ibqp->recv_cq)->cqn);

	if (attr_mask & IB_QP_QKEY) {
		qp_context->qkey = cpu_to_be32(attr->qkey);
		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY);
	}

	err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans,
			      qp->qpn, 0, qp_param, 0, &status);
	if (status) {
		mthca_warn(dev, "modify QP %d returned status %02x.\n",
			   state_table[cur_state][new_state].trans, status);
		err = -EINVAL;
	}

	if (!err)
		qp->state = new_state;

	kfree(mailbox);

	if (is_sqp(dev, qp))
		store_attrs(to_msqp(qp), attr, attr_mask);

	/*
	 * If we are moving QP0 to RTR, bring the IB link up; if we
	 * are moving QP0 to RESET or ERROR, bring the link back down.
	 */
	if (is_qp0(dev, qp)) {
		if (cur_state != IB_QPS_RTR &&
		    new_state == IB_QPS_RTR)
			init_port(dev, to_msqp(qp)->port);

		if (cur_state != IB_QPS_RESET &&
		    cur_state != IB_QPS_ERR &&
		    (new_state == IB_QPS_RESET ||
		     new_state == IB_QPS_ERR))
			mthca_CLOSE_IB(dev, to_msqp(qp)->port, &status);
	}

	return err;
}
コード例 #4
0
int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
		   struct ibv_recv_wr **bad_wr)
{
	struct mlx4_qp *qp = to_mqp(ibqp);
	struct mlx4_wqe_data_seg *scat;
	int ret = 0;
	int nreq;
	int ind;
	int i;

	pthread_spin_lock(&qp->rq.lock);

	/* XXX check that state is OK to post receive */

	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);

	for (nreq = 0; wr; ++nreq, wr = wr->next) {
		if (wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
			ret = -1;
			*bad_wr = wr;
			goto out;
		}

		if (wr->num_sge > qp->rq.max_gs) {
			ret = -1;
			*bad_wr = wr;
			goto out;
		}

		scat = get_recv_wqe(qp, ind);

		for (i = 0; i < wr->num_sge; ++i)
			__set_data_seg(scat + i, wr->sg_list + i);

		if (i < qp->rq.max_gs) {
			scat[i].byte_count = 0;
			scat[i].lkey       = htonl(MLX4_INVALID_LKEY);
			scat[i].addr       = 0;
		}

		qp->rq.wrid[ind] = wr->wr_id;

		ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
	}

out:
	if (nreq) {
		qp->rq.head += nreq;

		/*
		 * Make sure that descriptors are written before
		 * doorbell record.
		 */
		wmb();

		*qp->db = htonl(qp->rq.head & 0xffff);
	}

	pthread_spin_unlock(&qp->rq.lock);

	return ret;
}
コード例 #5
0
int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
			  struct ibv_send_wr **bad_wr)
{
	struct mlx4_context *ctx;
	struct mlx4_qp *qp = to_mqp(ibqp);
	void *wqe;
	struct mlx4_wqe_ctrl_seg *ctrl;
	int ind;
	int nreq;
	int inl = 0;
	int ret = 0;
	int size;
	int i;

	pthread_spin_lock(&qp->sq.lock);

	/* XXX check that state is OK to post send */

	ind = qp->sq.head;

	for (nreq = 0; wr; ++nreq, wr = wr->next) {
		if (wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
			ret = -1;
			*bad_wr = wr;
			goto out;
		}

		if (wr->num_sge > qp->sq.max_gs) {
			ret = -1;
			*bad_wr = wr;
			goto out;
		}

		if (wr->opcode >= sizeof mlx4_ib_opcode / sizeof mlx4_ib_opcode[0]) {
			ret = -1;
			*bad_wr = wr;
			goto out;
		}

		ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
		qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;

		ctrl->xrcrb_flags =
			(wr->send_flags & IBV_SEND_SIGNALED ?
			 htonl(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
			(wr->send_flags & IBV_SEND_SOLICITED ?
			 htonl(MLX4_WQE_CTRL_SOLICIT) : 0)   |
			qp->sq_signal_bits;

		if (wr->opcode == IBV_WR_SEND_WITH_IMM ||
		    wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM)
			ctrl->imm = wr->imm_data;
		else
			ctrl->imm = 0;

		wqe += sizeof *ctrl;
		size = sizeof *ctrl / 16;

		switch (ibqp->qp_type) {
		case IBV_QPT_XRC:
			ctrl->xrcrb_flags |= htonl(wr->xrc_remote_srq_num << 8);
			/* fall thru */
		case IBV_QPT_RC:
		case IBV_QPT_UC:
			switch (wr->opcode) {
			case IBV_WR_ATOMIC_CMP_AND_SWP:
			case IBV_WR_ATOMIC_FETCH_AND_ADD:
				set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
					      wr->wr.atomic.rkey);
				wqe  += sizeof (struct mlx4_wqe_raddr_seg);

				set_atomic_seg(wqe, wr);
				wqe  += sizeof (struct mlx4_wqe_atomic_seg);
				size += (sizeof (struct mlx4_wqe_raddr_seg) +
					 sizeof (struct mlx4_wqe_atomic_seg)) / 16;

				break;

			case IBV_WR_RDMA_READ:
				inl = 1;
				/* fall through */
			case IBV_WR_RDMA_WRITE:
			case IBV_WR_RDMA_WRITE_WITH_IMM:
				set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
					      wr->wr.rdma.rkey);
				wqe  += sizeof (struct mlx4_wqe_raddr_seg);
				size += sizeof (struct mlx4_wqe_raddr_seg) / 16;

				break;

			default:
				/* No extra segments required for sends */
				break;
			}
			break;

		case IBV_QPT_UD:
			set_datagram_seg(wqe, wr);
			wqe  += sizeof (struct mlx4_wqe_datagram_seg);
			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
			if (to_mah(wr->wr.ud.ah)->tagged) {
				ctrl->ins_vlan = 1 << 6;
				ctrl->vlan_tag = htons(to_mah(wr->wr.ud.ah)->vlan);
			}

			break;

		default:
			break;
		}

		if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) {
			struct mlx4_wqe_inline_seg *seg;
			void *addr;
			int len, seg_len;
			int num_seg;
			int off, to_copy;

			inl = 0;

			seg = wqe;
			wqe += sizeof *seg;
			off = ((uintptr_t) wqe) & (MLX4_INLINE_ALIGN - 1);
			num_seg = 0;
			seg_len = 0;

			for (i = 0; i < wr->num_sge; ++i) {
				addr = (void *) (uintptr_t) wr->sg_list[i].addr;
				len  = wr->sg_list[i].length;
				inl += len;

				if (inl > qp->max_inline_data) {
					inl = 0;
					ret = -1;
					*bad_wr = wr;
					goto out;
				}

				while (len >= MLX4_INLINE_ALIGN - off) {
					to_copy = MLX4_INLINE_ALIGN - off;
					memcpy(wqe, addr, to_copy);
					len -= to_copy;
					wqe += to_copy;
					addr += to_copy;
					seg_len += to_copy;
					wmb(); /* see comment below */
					seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len);
					seg_len = 0;
					seg = wqe;
					wqe += sizeof *seg;
					off = sizeof *seg;
					++num_seg;
				}

				memcpy(wqe, addr, len);
				wqe += len;
				seg_len += len;
				off += len;
			}

			if (seg_len) {
				++num_seg;
				/*
				 * Need a barrier here to make sure
				 * all the data is visible before the
				 * byte_count field is set.  Otherwise
				 * the HCA prefetcher could grab the
				 * 64-byte chunk with this inline
				 * segment and get a valid (!=
				 * 0xffffffff) byte count but stale
				 * data, and end up sending the wrong
				 * data.
				 */
				wmb();
				seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len);
			}

			size += (inl + num_seg * sizeof * seg + 15) / 16;
		} else {
			struct mlx4_wqe_data_seg *seg = wqe;

			for (i = wr->num_sge - 1; i >= 0 ; --i)
				set_data_seg(seg + i, wr->sg_list + i);

			size += wr->num_sge * (sizeof *seg / 16);
		}

		ctrl->fence_size = (wr->send_flags & IBV_SEND_FENCE ?
				    MLX4_WQE_CTRL_FENCE : 0) | size;

		/*
		 * Make sure descriptor is fully written before
		 * setting ownership bit (because HW can start
		 * executing as soon as we do).
		 */
		wmb();

		ctrl->owner_opcode = htonl(mlx4_ib_opcode[wr->opcode]) |
			(ind & qp->sq.wqe_cnt ? htonl(1 << 31) : 0);

		/*
		 * We can improve latency by not stamping the last
		 * send queue WQE until after ringing the doorbell, so
		 * only stamp here if there are still more WQEs to post.
		 */
		if (wr->next)
			stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) &
				       (qp->sq.wqe_cnt - 1));

		++ind;
	}

out:
	ctx = to_mctx(ibqp->context);

	if (nreq == 1 && inl && size > 1 && size < ctx->bf_buf_size / 16) {
		ctrl->owner_opcode |= htonl((qp->sq.head & 0xffff) << 8);
		*(uint32_t *) (&ctrl->vlan_tag) |= qp->doorbell_qpn;
		/*
		 * Make sure that descriptor is written to memory
		 * before writing to BlueFlame page.
		 */
		wmb();

		++qp->sq.head;

		pthread_spin_lock(&ctx->bf_lock);

		mlx4_bf_copy(ctx->bf_page + ctx->bf_offset, (unsigned long *) ctrl,
			     align(size * 16, 64));
		wc_wmb();

		ctx->bf_offset ^= ctx->bf_buf_size;

		pthread_spin_unlock(&ctx->bf_lock);
	} else if (nreq) {
		qp->sq.head += nreq;

		/*
		 * Make sure that descriptors are written before
		 * doorbell record.
		 */
		wmb();

		*(uint32_t *) (ctx->uar + MLX4_SEND_DOORBELL) = qp->doorbell_qpn;
	}

	if (nreq)
		stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) &
			       (qp->sq.wqe_cnt - 1));

	pthread_spin_unlock(&qp->sq.lock);

	return ret;
}
コード例 #6
0
ファイル: main.c プロジェクト: 3sOx/asuswrt-merlin
static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
	return mlx4_multicast_detach(to_mdev(ibqp->device)->dev,
				     &to_mqp(ibqp)->mqp, gid->raw);
}