/*
 * siw_qp_prepare_tx()
 *
 * Prepare tx state for sending out one fpdu. Builds complete pkt
 * if no user data or only immediate data are present.
 *
 * returns PKT_COMPLETE if complete pkt built, PKT_FRAGMENTED otherwise.
 */
static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx)
{
	struct siw_wqe		*wqe = c_tx->wqe;
	u32			*crc = NULL;

	dprint(DBG_TX, "(QP%d):\n", TX_QPID(c_tx));

	switch (wr_type(wqe)) {

	case SIW_WR_RDMA_READ_REQ:
		memcpy(&c_tx->pkt.ctrl,
		       &iwarp_pktinfo[RDMAP_RDMA_READ_REQ].ctrl,
		       sizeof(struct iwarp_ctrl));

		c_tx->pkt.rreq.rsvd = 0;
		c_tx->pkt.rreq.ddp_qn = htonl(RDMAP_UNTAGGED_QN_RDMA_READ);
		c_tx->pkt.rreq.ddp_msn =
			htonl(++c_tx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]);
		c_tx->pkt.rreq.ddp_mo = 0;
		c_tx->pkt.rreq.sink_stag = htonl(wqe->wr.rread.sge[0].lkey);
		c_tx->pkt.rreq.sink_to =
			cpu_to_be64(wqe->wr.rread.sge[0].addr); /* abs addr! */
		c_tx->pkt.rreq.source_stag = htonl(wqe->wr.rread.rtag);
		c_tx->pkt.rreq.source_to = cpu_to_be64(wqe->wr.rread.raddr);
		c_tx->pkt.rreq.read_size = htonl(wqe->bytes);

		dprint(DBG_TX, ": RREQ: Sink: %x, 0x%016llx\n",
			wqe->wr.rread.sge[0].lkey, wqe->wr.rread.sge[0].addr);

		c_tx->ctrl_len = sizeof(struct iwarp_rdma_rreq);
		crc = &c_tx->pkt.rreq_pkt.crc;
		break;

	case SIW_WR_SEND:
		if (wr_flags(wqe) & IB_SEND_SOLICITED)
			memcpy(&c_tx->pkt.ctrl,
			       &iwarp_pktinfo[RDMAP_SEND_SE].ctrl,
			       sizeof(struct iwarp_ctrl));
		else
			memcpy(&c_tx->pkt.ctrl,
			       &iwarp_pktinfo[RDMAP_SEND].ctrl,
			       sizeof(struct iwarp_ctrl));

		c_tx->pkt.send.ddp_qn = RDMAP_UNTAGGED_QN_SEND;
		c_tx->pkt.send.ddp_msn =
			htonl(++c_tx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]);
		c_tx->pkt.send.ddp_mo = 0;
		c_tx->pkt.send.rsvd = 0;

		c_tx->ctrl_len = sizeof(struct iwarp_send);

		if (!wqe->bytes)
			crc = &c_tx->pkt.send_pkt.crc;
		break;

	case SIW_WR_RDMA_WRITE:
		memcpy(&c_tx->pkt.ctrl, &iwarp_pktinfo[RDMAP_RDMA_WRITE].ctrl,
		       sizeof(struct iwarp_ctrl));

		c_tx->pkt.rwrite.sink_stag = htonl(wqe->wr.write.rtag);
		c_tx->pkt.rwrite.sink_to = cpu_to_be64(wqe->wr.write.raddr);
		c_tx->ctrl_len = sizeof(struct iwarp_rdma_write);

		if (!wqe->bytes)
			crc = &c_tx->pkt.write_pkt.crc;
		break;

	case SIW_WR_RDMA_READ_RESP:
		memcpy(&c_tx->pkt.ctrl,
		       &iwarp_pktinfo[RDMAP_RDMA_READ_RESP].ctrl,
		       sizeof(struct iwarp_ctrl));

		/* NBO */
		c_tx->pkt.rresp.sink_stag = wqe->wr.rresp.rtag;
		c_tx->pkt.rresp.sink_to = cpu_to_be64(wqe->wr.rresp.raddr);

		c_tx->ctrl_len = sizeof(struct iwarp_rdma_rresp);

		dprint(DBG_TX, ": RRESP: Sink: %x, 0x%016llx\n",
			wqe->wr.rresp.rtag, wqe->wr.rresp.raddr);

		if (!wqe->bytes)
			crc = &c_tx->pkt.rresp_pkt.crc;
		break;

	default:
		dprint(DBG_ON, "Unsupported WQE type %d\n", wr_type(wqe));
		BUG();
		break;
	}
	c_tx->ctrl_sent = 0;
	c_tx->sge_idx = 0;
	c_tx->sge_off = 0;
	c_tx->pg_idx = 0;
	c_tx->umem_chunk = NULL;

	/*
	 * Do complete CRC if enabled and short packet
	 */
	if (crc) {
		*crc = 0;
		if (c_tx->crc_enabled) {
			if (siw_crc_txhdr(c_tx) != 0)
				return -EINVAL;
			crypto_hash_final(&c_tx->mpa_crc_hd, (u8 *)crc);
		}
	}
	c_tx->ctrl_len += MPA_CRC_SIZE;

	/*
	 * Allow direct sending out of user buffer if WR is non signalled
	 * and payload is over threshold and no CRC is enabled.
	 * Per RDMA verbs, the application should not change the send buffer
	 * until the work completed. In iWarp, work completion is only
	 * local delivery to TCP. TCP may reuse the buffer for
	 * retransmission. Changing unsent data also breaks the CRC,
	 * if applied.
	 * Inline buffers are already out of user control and can be
	 * send 0copy.
	 */
	if (zcopy_tx &&
	     (!(wr_flags(wqe) & IB_SEND_SIGNALED) || SIW_INLINED_DATA(wqe)) &&
	     wqe->bytes > SENDPAGE_THRESH &&
	     wr_type(wqe) != SIW_WR_RDMA_READ_REQ)
		c_tx->use_sendpage = 1;
	else
		c_tx->use_sendpage = 0;

	return crc == NULL ? PKT_FRAGMENTED : PKT_COMPLETE;
}
Example #2
0
/*
 * siw_post_send()
 *
 * Post a list of S-WR's to a SQ.
 *
 * @ofa_qp:	OFA QP contained in siw QP
 * @wr:		Null terminated list of user WR's
 * @bad_wr:	Points to failing WR in case of synchronous failure.
 */
int siw_post_send(struct ib_qp *ofa_qp, struct ib_send_wr *wr,
		  struct ib_send_wr **bad_wr)
{
	struct siw_wqe	*wqe = NULL;
	struct siw_qp	*qp = siw_qp_ofa2siw(ofa_qp);

	unsigned long flags;
	int rv = 0;

	dprint(DBG_WR|DBG_TX, "(QP%d): state=%d\n",
		QP_ID(qp), qp->attrs.state);

	/*
	 * Try to acquire QP state lock. Must be non-blocking
	 * to accommodate kernel clients needs.
	 */
	if (!down_read_trylock(&qp->state_lock)) {
		*bad_wr = wr;
		return -ENOTCONN;
	}

	if (qp->attrs.state != SIW_QP_STATE_RTS) {
		dprint(DBG_WR|DBG_ON, "(QP%d): state=%d\n",
			QP_ID(qp), qp->attrs.state);
		up_read(&qp->state_lock);
		*bad_wr = wr;
		return -ENOTCONN;
	}
	dprint(DBG_WR|DBG_TX, "(QP%d): sq_space(#1)=%d\n",
		QP_ID(qp), atomic_read(&qp->sq_space));

	while (wr) {
		wqe = siw_wqe_alloc(qp, opcode_ofa2siw(wr->opcode));
		if (!wqe) {
			dprint(DBG_ON, " siw_wqe_alloc\n");
			rv = -ENOMEM;
			break;
		}
		wr_type(wqe) = opcode_ofa2siw(wr->opcode);
		wr_id(wqe) = wr->wr_id;

		wr_flags(wqe) = wr->send_flags;
		if (qp->attrs.flags & SIW_SIGNAL_ALL_WR)
			wr_flags(wqe) |= IB_SEND_SIGNALED;

		if (wr->num_sge > qp->attrs.sq_max_sges) {
			/*
			 * NOTE: we allow for zero length wr's here.
			 */
			dprint(DBG_WR, "(QP%d): Num SGE: %d\n",
				QP_ID(qp), wr->num_sge);
			rv = -EINVAL;
			break;
		}

		switch (wr->opcode) {

		case IB_WR_SEND:
			if (!SIW_INLINED_DATA(wqe)) {
				rv = siw_copy_sgl(wr->sg_list,
						  wqe->wr.send.sge,
						  wr->num_sge);
				wqe->wr.send.num_sge = wr->num_sge;
			} else
				rv = siw_copy_inline_sgl(wr, wqe);

			if (rv < 0) {
				rv = -EINVAL;
				break;
			}
			wqe->bytes = rv;
			break;

		case IB_WR_RDMA_READ:
			/*
			 * OFED WR restricts RREAD sink to SGL containing
			 * 1 SGE only. we could relax to SGL with multiple
			 * elements referring the SAME ltag or even sending
			 * a private per-rreq tag referring to a checked
			 * local sgl with MULTIPLE ltag's. would be easy
			 * to do...
			 */
			if (wr->num_sge != 1) {
				rv = -EINVAL;
				break;
			}
			rv = siw_copy_sgl(wr->sg_list, wqe->wr.rread.sge, 1);
			/*
			 * NOTE: zero length RREAD is allowed!
			 */
			wqe->wr.rread.raddr = wr->wr.rdma.remote_addr;
			wqe->wr.rread.rtag = wr->wr.rdma.rkey;
			wqe->wr.rread.num_sge = 1;
			wqe->bytes = rv;
			break;

		case IB_WR_RDMA_WRITE:
			if (!SIW_INLINED_DATA(wqe)) {
				rv = siw_copy_sgl(wr->sg_list,
						  wqe->wr.send.sge,
						  wr->num_sge);
				wqe->wr.write.num_sge = wr->num_sge;
			} else
				rv = siw_copy_inline_sgl(wr, wqe);
			/*
			 * NOTE: zero length WRITE is allowed!
			 */
			if (rv < 0) {
				rv = -EINVAL;
				break;
			}
			wqe->wr.write.raddr = wr->wr.rdma.remote_addr;
			wqe->wr.write.rtag = wr->wr.rdma.rkey;
			wqe->bytes = rv;
			break;

		default:
			dprint(DBG_WR|DBG_TX|DBG_ON,
				"(QP%d): Opcode %d not yet implemented\n",
				QP_ID(qp), wr->opcode);
			wqe->wr.sgl.num_sge = 0;
			rv = -ENOSYS;
			break;
		}
		dprint(DBG_WR|DBG_TX, "(QP%d): opcode %d, bytes %d, "
				"flags 0x%x\n",
				QP_ID(qp), wr_type(wqe), wqe->bytes,
				wr_flags(wqe));
		if (rv < 0)
			break;

		wqe->wr_status = SR_WR_QUEUED;

		lock_sq_rxsave(qp, flags);
		list_add_tail(&wqe->list, &qp->sq);
		unlock_sq_rxsave(qp, flags);

		wr = wr->next;
	}
	/*
	 * Send directly if SQ processing is not in progress.
	 * Eventual immediate errors (rv < 0) do not affect the involved
	 * RI resources (Verbs, 8.3.1) and thus do not prevent from SQ
	 * processing, if new work is already pending. But rv must be passed
	 * to caller.
	 */
	lock_sq_rxsave(qp, flags);
	if (tx_wqe(qp) == NULL) {
		struct siw_wqe	*next = siw_next_tx_wqe(qp);
		if (next != NULL) {
			if (wr_type(next) != SIW_WR_RDMA_READ_REQ ||
			    !ORD_SUSPEND_SQ(qp)) {
				tx_wqe(qp) = next;
				if (wr_type(next) != SIW_WR_RDMA_READ_REQ)
					list_del_init(&next->list);
				else
					siw_rreq_queue(next, qp);

				unlock_sq_rxsave(qp, flags);

				dprint(DBG_WR|DBG_TX,
					"(QP%d): Direct sending...\n",
					QP_ID(qp));

				if (qp->attrs.flags & SIW_KERNEL_VERBS)
					siw_sq_queue_work(qp);
				else if (siw_qp_sq_process(qp, 1) != 0 &&
				    !(qp->tx_ctx.tx_suspend))
					siw_qp_cm_drop(qp, 0);
			} else
				unlock_sq_rxsave(qp, flags);
		} else
			unlock_sq_rxsave(qp, flags);
	} else
		unlock_sq_rxsave(qp, flags);

	up_read(&qp->state_lock);

	dprint(DBG_WR|DBG_TX, "(QP%d): sq_space(#2)=%d\n", QP_ID(qp),
		atomic_read(&qp->sq_space));
	if (rv >= 0)
		return 0;
	/*
	 * Immediate error
	 */
	dprint(DBG_WR|DBG_ON, "(QP%d): error=%d\n", QP_ID(qp), rv);

	if (wqe != NULL)
		siw_wqe_put(wqe);
	*bad_wr = wr;
	return rv;
}