示例#1
0
文件: verbs.c 项目: MaxChina/linux
static void
rpcrdma_recvcq_process_wc(struct ib_wc *wc)
{
	struct rpcrdma_rep *rep =
			(struct rpcrdma_rep *)(unsigned long)wc->wr_id;

	dprintk("RPC:       %s: rep %p status %X opcode %X length %u\n",
		__func__, rep, wc->status, wc->opcode, wc->byte_len);

	if (wc->status != IB_WC_SUCCESS) {
		rep->rr_len = ~0U;
		goto out_schedule;
	}
	if (wc->opcode != IB_WC_RECV)
		return;

	rep->rr_len = wc->byte_len;
	ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
			rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);

	if (rep->rr_len >= 16) {
		struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base;
		unsigned int credits = ntohl(p->rm_credit);

		if (credits == 0)
			credits = 1;	/* don't deadlock */
		else if (credits > rep->rr_buffer->rb_max_requests)
			credits = rep->rr_buffer->rb_max_requests;
		atomic_set(&rep->rr_buffer->rb_credits, credits);
	}

out_schedule:
	rpcrdma_schedule_tasklet(rep);
}
int iser_send_control(struct iscsi_conn *conn,
		      struct iscsi_task *task)
{
	struct iser_conn *iser_conn = conn->dd_data;
	struct iscsi_iser_task *iser_task = task->dd_data;
	struct iser_tx_desc *mdesc = &iser_task->desc;
	unsigned long data_seg_len;
	int err = 0;
	struct iser_device *device;

	/* build the tx desc regd header and add it to the tx desc dto */
	mdesc->type = ISCSI_TX_CONTROL;
	iser_create_send_desc(iser_conn, mdesc);

	device = iser_conn->ib_conn.device;

	data_seg_len = ntoh24(task->hdr->dlength);

	if (data_seg_len > 0) {
		struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
		if (task != conn->login_task) {
			iser_err("data present on non login task!!!\n");
			goto send_control_error;
		}

		ib_dma_sync_single_for_cpu(device->ib_device,
			iser_conn->login_req_dma, task->data_count,
			DMA_TO_DEVICE);

		memcpy(iser_conn->login_req_buf, task->data, task->data_count);

		ib_dma_sync_single_for_device(device->ib_device,
			iser_conn->login_req_dma, task->data_count,
			DMA_TO_DEVICE);

		tx_dsg->addr    = iser_conn->login_req_dma;
		tx_dsg->length  = task->data_count;
		tx_dsg->lkey    = device->pd->local_dma_lkey;
		mdesc->num_sge = 2;
	}

	if (task == conn->login_task) {
		iser_dbg("op %x dsl %lx, posting login rx buffer\n",
			 task->hdr->opcode, data_seg_len);
		err = iser_post_recvl(iser_conn);
		if (err)
			goto send_control_error;
		err = iser_post_rx_bufs(conn, task->hdr);
		if (err)
			goto send_control_error;
	}

	err = iser_post_send(&iser_conn->ib_conn, mdesc, true);
	if (!err)
		return 0;

send_control_error:
	iser_err("conn %p failed err %d\n",conn, err);
	return err;
}
示例#3
0
void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
{
	struct ib_conn *ib_conn = wc->qp->qp_context;
	struct iser_conn *iser_conn = to_iser_conn(ib_conn);
	struct iser_login_desc *desc = iser_login(wc->wr_cqe);
	struct iscsi_hdr *hdr;
	char *data;
	int length;

	if (unlikely(wc->status != IB_WC_SUCCESS)) {
		iser_err_comp(wc, "login_rsp");
		return;
	}

	ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
				   desc->rsp_dma, ISER_RX_LOGIN_SIZE,
				   DMA_FROM_DEVICE);

	hdr = desc->rsp + sizeof(struct iser_ctrl);
	data = desc->rsp + ISER_HEADERS_LEN;
	length = wc->byte_len - ISER_HEADERS_LEN;

	iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
		 hdr->itt, length);

	iscsi_iser_recv(iser_conn->iscsi_conn, hdr, data, length);

	ib_dma_sync_single_for_device(ib_conn->device->ib_device,
				      desc->rsp_dma, ISER_RX_LOGIN_SIZE,
				      DMA_FROM_DEVICE);

	ib_conn->post_recv_buf_count--;
}
示例#4
0
文件: verbs.c 项目: a2hojsjsjs/linux
static void
rpcrdma_recvcq_process_wc(struct ib_wc *wc)
{
	struct rpcrdma_rep *rep =
			(struct rpcrdma_rep *)(unsigned long)wc->wr_id;

	/* WARNING: Only wr_id and status are reliable at this point */
	if (wc->status != IB_WC_SUCCESS)
		goto out_fail;

	/* status == SUCCESS means all fields in wc are trustworthy */
	if (wc->opcode != IB_WC_RECV)
		return;

	dprintk("RPC:       %s: rep %p opcode 'recv', length %u: success\n",
		__func__, rep, wc->byte_len);

	rep->rr_len = wc->byte_len;
	ib_dma_sync_single_for_cpu(rep->rr_device,
				   rdmab_addr(rep->rr_rdmabuf),
				   rep->rr_len, DMA_FROM_DEVICE);
	prefetch(rdmab_to_msg(rep->rr_rdmabuf));

out_schedule:
	queue_work(rpcrdma_receive_wq, &rep->rr_work);
	return;

out_fail:
	if (wc->status != IB_WC_WR_FLUSH_ERR)
		pr_err("RPC:       %s: rep %p: %s\n",
		       __func__, rep, ib_wc_status_msg(wc->status));
	rep->rr_len = RPCRDMA_BAD_LEN;
	goto out_schedule;
}
示例#5
0
static inline
void rpcrdma_event_process(struct ib_wc *wc)
{
	struct rpcrdma_rep *rep =
			(struct rpcrdma_rep *)(unsigned long) wc->wr_id;

	dprintk("RPC:       %s: event rep %p status %X opcode %X length %u\n",
		__func__, rep, wc->status, wc->opcode, wc->byte_len);

	if (!rep) /* send or bind completion that we don't care about */
		return;

	if (IB_WC_SUCCESS != wc->status) {
		dprintk("RPC:       %s: %s WC status %X, connection lost\n",
			__func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send",
			 wc->status);
		rep->rr_len = ~0U;
		rpcrdma_schedule_tasklet(rep);
		return;
	}

	switch (wc->opcode) {
	case IB_WC_RECV:
		rep->rr_len = wc->byte_len;
		ib_dma_sync_single_for_cpu(
			rdmab_to_ia(rep->rr_buffer)->ri_id->device,
			rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
		/* Keep (only) the most recent credits, after check validity */
		if (rep->rr_len >= 16) {
			struct rpcrdma_msg *p =
					(struct rpcrdma_msg *) rep->rr_base;
			unsigned int credits = ntohl(p->rm_credit);
			if (credits == 0) {
				dprintk("RPC:       %s: server"
					" dropped credits to 0!\n", __func__);
				/* don't deadlock */
				credits = 1;
			} else if (credits > rep->rr_buffer->rb_max_requests) {
				dprintk("RPC:       %s: server"
					" over-crediting: %d (%d)\n",
					__func__, credits,
					rep->rr_buffer->rb_max_requests);
				credits = rep->rr_buffer->rb_max_requests;
			}
			atomic_set(&rep->rr_buffer->rb_credits, credits);
		}
		/* fall through */
	case IB_WC_BIND_MW:
		rpcrdma_schedule_tasklet(rep);
		break;
	default:
		dprintk("RPC:       %s: unexpected WC event %X\n",
			__func__, wc->opcode);
		break;
	}
}
示例#6
0
static void isert_dma_sync_data_for_cpu(struct ib_device *ib_dev,
					struct ib_sge *sge, size_t size)
{
	size_t to_sync = size > (PAGE_SIZE - ISER_HDRS_SZ) ?
			 (PAGE_SIZE - ISER_HDRS_SZ) : size;
	ib_dma_sync_single_for_cpu(ib_dev, sge->addr + ISER_HDRS_SZ,
				   to_sync,
				   DMA_FROM_DEVICE);

	size -= to_sync;
	while (size) {
		++sge;
		to_sync = size > PAGE_SIZE ? PAGE_SIZE : size;
		ib_dma_sync_single_for_cpu(ib_dev, sge->addr,
					   to_sync,
					   DMA_FROM_DEVICE);

		size -= to_sync;
	}
}
/* creates a new tx descriptor and adds header regd buffer */
static void iser_create_send_desc(struct iser_conn	*iser_conn,
				  struct iser_tx_desc	*tx_desc)
{
	struct iser_device *device = iser_conn->ib_conn.device;

	ib_dma_sync_single_for_cpu(device->ib_device,
		tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);

	memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
	tx_desc->iser_header.flags = ISER_VER;
	tx_desc->num_sge = 1;
}
/**
 * iser_rcv_dto_completion - recv DTO completion
 */
void iser_rcv_completion(struct iser_rx_desc *rx_desc,
			 unsigned long rx_xfer_len,
			 struct ib_conn *ib_conn)
{
	struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
						   ib_conn);
	struct iscsi_hdr *hdr;
	u64 rx_dma;
	int rx_buflen, outstanding, count, err;

	/* differentiate between login to all other PDUs */
	if ((char *)rx_desc == iser_conn->login_resp_buf) {
		rx_dma = iser_conn->login_resp_dma;
		rx_buflen = ISER_RX_LOGIN_SIZE;
	} else {
		rx_dma = rx_desc->dma_addr;
		rx_buflen = ISER_RX_PAYLOAD_SIZE;
	}

	ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
				   rx_buflen, DMA_FROM_DEVICE);

	hdr = &rx_desc->iscsi_header;

	iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
			hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));

	iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data,
			rx_xfer_len - ISER_HEADERS_LEN);

	ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
				      rx_buflen, DMA_FROM_DEVICE);

	/* decrementing conn->post_recv_buf_count only --after-- freeing the   *
	 * task eliminates the need to worry on tasks which are completed in   *
	 * parallel to the execution of iser_conn_term. So the code that waits *
	 * for the posted rx bufs refcount to become zero handles everything   */
	ib_conn->post_recv_buf_count--;

	if (rx_dma == iser_conn->login_resp_dma)
		return;

	outstanding = ib_conn->post_recv_buf_count;
	if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
		count = min(iser_conn->qp_max_recv_dtos - outstanding,
			    iser_conn->min_posted_rx);
		err = iser_post_recvm(iser_conn, count);
		if (err)
			iser_err("posting %d rx bufs err %d\n", count, err);
	}
}
示例#9
0
void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc)
{
	struct ib_conn *ib_conn = wc->qp->qp_context;
	struct iser_conn *iser_conn = to_iser_conn(ib_conn);
	struct iser_rx_desc *desc = iser_rx(wc->wr_cqe);
	struct iscsi_hdr *hdr;
	int length;
	int outstanding, count, err;

	if (unlikely(wc->status != IB_WC_SUCCESS)) {
		iser_err_comp(wc, "task_rsp");
		return;
	}

	ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
				   desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
				   DMA_FROM_DEVICE);

	hdr = &desc->iscsi_header;
	length = wc->byte_len - ISER_HEADERS_LEN;

	iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
		 hdr->itt, length);

	if (iser_check_remote_inv(iser_conn, wc, hdr)) {
		iscsi_conn_failure(iser_conn->iscsi_conn,
				   ISCSI_ERR_CONN_FAILED);
		return;
	}

	iscsi_iser_recv(iser_conn->iscsi_conn, hdr, desc->data, length);

	ib_dma_sync_single_for_device(ib_conn->device->ib_device,
				      desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
				      DMA_FROM_DEVICE);

	/* decrementing conn->post_recv_buf_count only --after-- freeing the   *
	 * task eliminates the need to worry on tasks which are completed in   *
	 * parallel to the execution of iser_conn_term. So the code that waits *
	 * for the posted rx bufs refcount to become zero handles everything   */
	ib_conn->post_recv_buf_count--;

	outstanding = ib_conn->post_recv_buf_count;
	if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
		count = min(iser_conn->qp_max_recv_dtos - outstanding,
			    iser_conn->min_posted_rx);
		err = iser_post_recvm(iser_conn, count);
		if (err)
			iser_err("posting %d rx bufs err %d\n", count, err);
	}
}
示例#10
0
/* creates a new tx descriptor and adds header regd buffer */
static void iser_create_send_desc(struct iser_conn	*iser_conn,
				  struct iser_tx_desc	*tx_desc)
{
	struct iser_device *device = iser_conn->ib_conn.device;

	ib_dma_sync_single_for_cpu(device->ib_device,
		tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);

	memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
	tx_desc->iser_header.flags = ISER_VER;

	tx_desc->num_sge = 1;

	if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
		tx_desc->tx_sg[0].lkey = device->mr->lkey;
		iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc);
	}
}
示例#11
0
/**
 * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
 * @cq: Completion Queue context
 * @wc: Work Completion object
 *
 * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that
 * the Receive completion handler could be running.
 */
static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
{
	struct svcxprt_rdma *rdma = cq->cq_context;
	struct ib_cqe *cqe = wc->wr_cqe;
	struct svc_rdma_recv_ctxt *ctxt;

	trace_svcrdma_wc_receive(wc);

	/* WARNING: Only wc->wr_cqe and wc->status are reliable */
	ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe);

	if (wc->status != IB_WC_SUCCESS)
		goto flushed;

	if (svc_rdma_post_recv(rdma))
		goto post_err;

	/* All wc fields are now known to be valid */
	ctxt->rc_byte_len = wc->byte_len;
	ib_dma_sync_single_for_cpu(rdma->sc_pd->device,
				   ctxt->rc_recv_sge.addr,
				   wc->byte_len, DMA_FROM_DEVICE);

	spin_lock(&rdma->sc_rq_dto_lock);
	list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q);
	spin_unlock(&rdma->sc_rq_dto_lock);
	set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
	if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
		svc_xprt_enqueue(&rdma->sc_xprt);
	goto out;

flushed:
	if (wc->status != IB_WC_WR_FLUSH_ERR)
		pr_err("svcrdma: Recv: %s (%u/0x%x)\n",
		       ib_wc_status_msg(wc->status),
		       wc->status, wc->vendor_err);
post_err:
	svc_rdma_recv_ctxt_put(rdma, ctxt);
	set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
	svc_xprt_enqueue(&rdma->sc_xprt);
out:
	svc_xprt_put(&rdma->sc_xprt);
}
示例#12
0
/**
 * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
 * @cq:	completion queue (ignored)
 * @wc:	completed WR
 *
 */
static void
rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
{
	struct ib_cqe *cqe = wc->wr_cqe;
	struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep,
					       rr_cqe);

	/* WARNING: Only wr_id and status are reliable at this point */
	if (wc->status != IB_WC_SUCCESS)
		goto out_fail;

	/* status == SUCCESS means all fields in wc are trustworthy */
	if (wc->opcode != IB_WC_RECV)
		return;

	dprintk("RPC:       %s: rep %p opcode 'recv', length %u: success\n",
		__func__, rep, wc->byte_len);

	rep->rr_len = wc->byte_len;
	rep->rr_wc_flags = wc->wc_flags;
	rep->rr_inv_rkey = wc->ex.invalidate_rkey;

	ib_dma_sync_single_for_cpu(rep->rr_device,
				   rdmab_addr(rep->rr_rdmabuf),
				   rep->rr_len, DMA_FROM_DEVICE);

	rpcrdma_update_granted_credits(rep);

out_schedule:
	queue_work(rpcrdma_receive_wq, &rep->rr_work);
	return;

out_fail:
	if (wc->status != IB_WC_WR_FLUSH_ERR)
		pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
		       ib_wc_status_msg(wc->status),
		       wc->status, wc->vendor_err);
	rep->rr_len = RPCRDMA_BAD_LEN;
	goto out_schedule;
}
示例#13
0
static void isert_recv_completion_handler(struct isert_wr *wr)
{
	struct isert_cmnd *pdu = wr->pdu;
	struct ib_sge *sge = wr->sge_list;
	struct ib_device *ib_dev = wr->isert_dev->ib_dev;
	int err;

	TRACE_ENTRY();

	ib_dma_sync_single_for_cpu(ib_dev, sge->addr,
				   ISER_HDRS_SZ,
				   DMA_FROM_DEVICE);
	isert_rx_pdu_parse_headers(pdu);
	isert_dma_sync_data_for_cpu(ib_dev, sge,
				    pdu->iscsi.pdu.datasize + pdu->iscsi.pdu.ahssize);

	switch (pdu->isert_opcode) {
	case ISER_ISCSI_CTRL:
		switch (pdu->iscsi_opcode) {
		case ISCSI_OP_NOP_OUT:
			err = isert_pdu_handle_nop_out(pdu);
			break;
		case ISCSI_OP_SCSI_CMD:
			err = isert_pdu_handle_scsi_cmd(pdu);
			break;
		case ISCSI_OP_SCSI_TASK_MGT_MSG:
			err = isert_pdu_handle_tm_func(pdu);
			break;
		case ISCSI_OP_LOGIN_CMD:
			err = isert_pdu_handle_login_req(pdu);
			break;
		case ISCSI_OP_TEXT_CMD:
			err = isert_pdu_handle_text(pdu);
			break;
		case ISCSI_OP_SCSI_DATA_OUT:
			err = isert_pdu_handle_data_out(pdu);
			break;
		case ISCSI_OP_LOGOUT_CMD:
			err = isert_pdu_handle_logout(pdu);
			break;
		case ISCSI_OP_SNACK_CMD:
			err = isert_pdu_handle_snack(pdu);
			break;
		default:
			pr_err("Unexpected iscsi opcode:0x%x\n",
			       pdu->iscsi_opcode);
			err = -EINVAL;
			break;
		}
		break;
	case ISER_HELLO:
		err = isert_pdu_handle_hello_req(pdu);
		break;
	default:
		pr_err("malformed isert_hdr, iser op:%x flags 0x%02x\n",
		       pdu->isert_opcode, pdu->isert_hdr->flags);
		err = -EINVAL;
		break;
	}

	if (unlikely(err)) {
		pr_err("err:%d while handling iser pdu\n", err);
		isert_conn_disconnect(wr->conn);
	}

	TRACE_EXIT();
}
示例#14
0
int vnic_rx_skb(struct vnic_login *login, struct vnic_rx_ring *ring,
		struct ib_wc *wc, int ip_summed, char *eth_hdr_va)
{
	u64 wr_id = (unsigned int)wc->wr_id;
	struct sk_buff *skb;
	int used_frags;
	char *va = eth_hdr_va;
	int length = wc->byte_len - VNIC_EOIB_HDR_SIZE - VNIC_VLAN_OFFSET(login),
	    linear_length = (length <= SMALL_PACKET_SIZE) ?
	    length : SMALL_PACKET_SIZE, hdr_len = min(length, HEADER_COPY_SIZE),
	    offest = NET_IP_ALIGN + 16;
	struct ib_device *ib_dev = login->port->dev->ca;

	/* alloc a small linear SKB */
	skb = alloc_skb(linear_length + offest, GFP_ATOMIC);
	if (unlikely(!skb))
		return -ENOMEM;

	skb_record_rx_queue(skb, ring->index);
	skb_reserve(skb, offest);

	if (vnic_linear_small_pkt && length <= SMALL_PACKET_SIZE) {
		u64 dma;

		/* We are copying all relevant data to the skb - temporarily
		 * synch buffers for the copy
		 */
		dma = ring->rx_info[wr_id].dma_addr[0] + VNIC_EOIB_HDR_SIZE +
			VNIC_VLAN_OFFSET(login);
		ib_dma_sync_single_for_cpu(ib_dev, dma, length,
					   DMA_FROM_DEVICE);
		skb_copy_to_linear_data(skb, va, length);
		ib_dma_sync_single_for_device(ib_dev, dma, length,
					      DMA_FROM_DEVICE);
		skb->tail += length;
	} else {
		/* unmap the needed fragmentand reallocate them. Fragments that
		 * were not used will not be reused as is. */
		used_frags = vnic_unmap_and_replace_rx(ring, ib_dev,
						       skb_shinfo(skb)->frags,
						       wr_id, wc->byte_len);
		if (!used_frags)
			goto free_and_repost;

		skb_shinfo(skb)->nr_frags = used_frags;

		/* Copy headers into the skb linear buffer */
		memcpy(skb->data, va, hdr_len);
		skb->tail += hdr_len;
		/* Skip headers in first fragment */
		skb_shinfo(skb)->frags[0].page_offset +=
		    (VNIC_EOIB_HDR_SIZE + VNIC_VLAN_OFFSET(login) +
		     hdr_len);

		/* Adjust size of first fragment */
		skb_shinfo(skb)->frags[0].size -=
		    (VNIC_EOIB_HDR_SIZE + VNIC_VLAN_OFFSET(login) +
		     hdr_len);
		skb->data_len = length - hdr_len;
	}

	/* update skb fields */
	skb->len = length;
	skb->truesize = length + sizeof(struct sk_buff);
	skb->ip_summed = ip_summed;
	skb->dev = login->dev;
	skb->protocol = eth_type_trans(skb, skb->dev);

	return vnic_rx(login, skb, wc);

free_and_repost:
	dev_kfree_skb(skb);
	return -ENODEV;

}