static int
dpdk_knidev_writer_tx(void *port, struct rte_mbuf *pkt)
{
    struct dpdk_knidev_writer *p = (struct dpdk_knidev_writer *) port;
    struct rte_mbuf *pkt_copy;

    /*
     * KNI kernel module uses a trick to speed up packet processing. It takes
     * a physical address of a memory pool, converts it to the kernel virtual
     * address with phys_to_virt() and saves the address.
     *
     * Then in kni_net_rx_normal() instead of using phys_to_virt() per each
     * packet, KNI just calculates the difference between the previously
     * converted physical address of the given mempool and the packets
     * physical address.
     *
     * It works well for the mbufs from the same mempool. It also works fine
     * with any mempool allocated from the same physically contiguous memory
     * segment.
     *
     * As soon as we get a mempool allocated from another memory segment, the
     * difference calculations fail and thus we might have a crash.
     *
     * So we make sure the packet is from the RSS mempool. If not, we make
     * a copy to the RSS mempool.
     */
    if (unlikely(pkt->pool != vr_dpdk.rss_mempool ||
            /* Check indirect mbuf's data is within the RSS mempool. */
            rte_pktmbuf_mtod(pkt, uintptr_t) < vr_dpdk.rss_mempool->elt_va_start ||
            rte_pktmbuf_mtod(pkt, uintptr_t) > vr_dpdk.rss_mempool->elt_va_end
            )) {
        pkt_copy = vr_dpdk_pktmbuf_copy(pkt, vr_dpdk.rss_mempool);
        /* The original mbuf is no longer needed. */
        vr_dpdk_pfree(pkt, VP_DROP_CLONED_ORIGINAL);

        if (unlikely(pkt_copy == NULL)) {
            DPDK_KNIDEV_WRITER_STATS_PKTS_DROP_ADD(p, 1);
            return -1;
        }

        pkt = pkt_copy;
    }

    p->tx_buf[p->tx_buf_count++] = pkt;
    DPDK_KNIDEV_WRITER_STATS_PKTS_IN_ADD(p, 1);
    if (p->tx_buf_count >= p->tx_burst_sz)
        send_burst(p);

    return 0;
}
示例#2
0
文件: mlx5_rxtx.c 项目: 0day-ci/dpdk
/**
 * Iterator function for rte_mempool_walk() to register existing mempools and
 * fill the MP to MR cache of a TX queue.
 *
 * @param[in] mp
 *   Memory Pool to register.
 * @param *arg
 *   Pointer to TX queue structure.
 */
void
txq_mp2mr_iter(const struct rte_mempool *mp, void *arg)
{
	struct txq *txq = arg;
	struct txq_mp2mr_mbuf_check_data data = {
		.mp = mp,
		.ret = -1,
	};

	/* Discard empty mempools. */
	if (mp->size == 0)
		return;
	/* Register mempool only if the first element looks like a mbuf. */
	rte_mempool_obj_iter((void *)mp->elt_va_start,
			     1,
			     mp->header_size + mp->elt_size + mp->trailer_size,
			     1,
			     mp->elt_pa,
			     mp->pg_num,
			     mp->pg_shift,
			     txq_mp2mr_mbuf_check,
			     &data);
	if (data.ret)
		return;
	txq_mp2mr(txq, mp);
}

/**
 * Insert VLAN using mbuf headroom space.
 *
 * @param buf
 *   Buffer for VLAN insertion.
 *
 * @return
 *   0 on success, errno value on failure.
 */
static inline int
insert_vlan_sw(struct rte_mbuf *buf)
{
	uintptr_t addr;
	uint32_t vlan;
	uint16_t head_room_len = rte_pktmbuf_headroom(buf);

	if (head_room_len < 4)
		return EINVAL;

	addr = rte_pktmbuf_mtod(buf, uintptr_t);
	vlan = htonl(0x81000000 | buf->vlan_tci);
	memmove((void *)(addr - 4), (void *)addr, 12);
	memcpy((void *)(addr + 8), &vlan, sizeof(vlan));

	SET_DATA_OFF(buf, head_room_len - 4);
	DATA_LEN(buf) += 4;

	return 0;
}
示例#3
0
文件: mlx5_rxtx.c 项目: 0day-ci/dpdk
tx_burst_sg(struct txq *txq, unsigned int segs, struct txq_elt *elt,
	    struct rte_mbuf *buf, unsigned int elts_head,
	    struct ibv_sge (*sges)[MLX5_PMD_SGE_WR_N])
{
	unsigned int sent_size = 0;
	unsigned int j;
	int linearize = 0;

	/* When there are too many segments, extra segments are
	 * linearized in the last SGE. */
	if (unlikely(segs > RTE_DIM(*sges))) {
		segs = (RTE_DIM(*sges) - 1);
		linearize = 1;
	}
	/* Update element. */
	elt->buf = buf;
	/* Register segments as SGEs. */
	for (j = 0; (j != segs); ++j) {
		struct ibv_sge *sge = &(*sges)[j];
		uint32_t lkey;

		/* Retrieve Memory Region key for this memory pool. */
		lkey = txq_mp2mr(txq, txq_mb2mp(buf));
		if (unlikely(lkey == (uint32_t)-1)) {
			/* MR does not exist. */
			DEBUG("%p: unable to get MP <-> MR association",
			      (void *)txq);
			/* Clean up TX element. */
			elt->buf = NULL;
			goto stop;
		}
		/* Update SGE. */
		sge->addr = rte_pktmbuf_mtod(buf, uintptr_t);
		if (txq->priv->vf)
			rte_prefetch0((volatile void *)
				      (uintptr_t)sge->addr);
		sge->length = DATA_LEN(buf);
		sge->lkey = lkey;
		sent_size += sge->length;
		buf = NEXT(buf);
	}
	/* If buf is not NULL here and is not going to be linearized,
	 * nb_segs is not valid. */
	assert(j == segs);
	assert((buf == NULL) || (linearize));
	/* Linearize extra segments. */
	if (linearize) {
		struct ibv_sge *sge = &(*sges)[segs];
		linear_t *linear = &(*txq->elts_linear)[elts_head];
		unsigned int size = linearize_mbuf(linear, buf);

		assert(segs == (RTE_DIM(*sges) - 1));
		if (size == 0) {
			/* Invalid packet. */
			DEBUG("%p: packet too large to be linearized.",
			      (void *)txq);
			/* Clean up TX element. */
			elt->buf = NULL;
			goto stop;
		}
		/* If MLX5_PMD_SGE_WR_N is 1, free mbuf immediately. */
		if (RTE_DIM(*sges) == 1) {
			do {
				struct rte_mbuf *next = NEXT(buf);

				rte_pktmbuf_free_seg(buf);
				buf = next;
			} while (buf != NULL);
			elt->buf = NULL;
		}
		/* Update SGE. */
		sge->addr = (uintptr_t)&(*linear)[0];
		sge->length = size;
		sge->lkey = txq->mr_linear->lkey;
		sent_size += size;
		/* Include last segment. */
		segs++;
	}
	return (struct tx_burst_sg_ret){
		.length = sent_size,
		.num = segs,
	};
stop:
	return (struct tx_burst_sg_ret){
		.length = -1,
		.num = -1,
	};
}

#endif /* MLX5_PMD_SGE_WR_N > 1 */

/**
 * DPDK callback for TX.
 *
 * @param dpdk_txq
 *   Generic pointer to TX queue structure.
 * @param[in] pkts
 *   Packets to transmit.
 * @param pkts_n
 *   Number of packets in array.
 *
 * @return
 *   Number of packets successfully transmitted (<= pkts_n).
 */
uint16_t
mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
	struct txq *txq = (struct txq *)dpdk_txq;
	unsigned int elts_head = txq->elts_head;
	const unsigned int elts_n = txq->elts_n;
	unsigned int elts_comp_cd = txq->elts_comp_cd;
	unsigned int elts_comp = 0;
	unsigned int i;
	unsigned int max;
	int err;
	struct rte_mbuf *buf = pkts[0];

	assert(elts_comp_cd != 0);
	/* Prefetch first packet cacheline. */
	rte_prefetch0(buf);
	txq_complete(txq);
	max = (elts_n - (elts_head - txq->elts_tail));
	if (max > elts_n)
		max -= elts_n;
	assert(max >= 1);
	assert(max <= elts_n);
	/* Always leave one free entry in the ring. */
	--max;
	if (max == 0)
		return 0;
	if (max > pkts_n)
		max = pkts_n;
	for (i = 0; (i != max); ++i) {
		struct rte_mbuf *buf_next = pkts[i + 1];
		unsigned int elts_head_next =
			(((elts_head + 1) == elts_n) ? 0 : elts_head + 1);
		struct txq_elt *elt = &(*txq->elts)[elts_head];
		unsigned int segs = NB_SEGS(buf);
#ifdef MLX5_PMD_SOFT_COUNTERS
		unsigned int sent_size = 0;
#endif
		uint32_t send_flags = 0;
#ifdef HAVE_VERBS_VLAN_INSERTION
		int insert_vlan = 0;
#endif /* HAVE_VERBS_VLAN_INSERTION */

		if (i + 1 < max)
			rte_prefetch0(buf_next);
		/* Request TX completion. */
		if (unlikely(--elts_comp_cd == 0)) {
			elts_comp_cd = txq->elts_comp_cd_init;
			++elts_comp;
			send_flags |= IBV_EXP_QP_BURST_SIGNALED;
		}
		/* Should we enable HW CKSUM offload */
		if (buf->ol_flags &
		    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
			send_flags |= IBV_EXP_QP_BURST_IP_CSUM;
			/* HW does not support checksum offloads at arbitrary
			 * offsets but automatically recognizes the packet
			 * type. For inner L3/L4 checksums, only VXLAN (UDP)
			 * tunnels are currently supported. */
			if (RTE_ETH_IS_TUNNEL_PKT(buf->packet_type))
				send_flags |= IBV_EXP_QP_BURST_TUNNEL;
		}
		if (buf->ol_flags & PKT_TX_VLAN_PKT) {
#ifdef HAVE_VERBS_VLAN_INSERTION
			if (!txq->priv->mps)
				insert_vlan = 1;
			else
#endif /* HAVE_VERBS_VLAN_INSERTION */
			{
				err = insert_vlan_sw(buf);
				if (unlikely(err))
					goto stop;
			}
		}
		if (likely(segs == 1)) {
			uintptr_t addr;
			uint32_t length;
			uint32_t lkey;
			uintptr_t buf_next_addr;

			/* Retrieve buffer information. */
			addr = rte_pktmbuf_mtod(buf, uintptr_t);
			length = DATA_LEN(buf);
			/* Update element. */
			elt->buf = buf;
			if (txq->priv->vf)
				rte_prefetch0((volatile void *)
					      (uintptr_t)addr);
			/* Prefetch next buffer data. */
			if (i + 1 < max) {
				buf_next_addr =
					rte_pktmbuf_mtod(buf_next, uintptr_t);
				rte_prefetch0((volatile void *)
					      (uintptr_t)buf_next_addr);
			}
			/* Put packet into send queue. */
#if MLX5_PMD_MAX_INLINE > 0
			if (length <= txq->max_inline) {
#ifdef HAVE_VERBS_VLAN_INSERTION
				if (insert_vlan)
					err = txq->send_pending_inline_vlan
						(txq->qp,
						 (void *)addr,
						 length,
						 send_flags,
						 &buf->vlan_tci);
				else
#endif /* HAVE_VERBS_VLAN_INSERTION */
					err = txq->send_pending_inline
						(txq->qp,
						 (void *)addr,
						 length,
						 send_flags);
			} else
#endif
			{
				/* Retrieve Memory Region key for this
				 * memory pool. */
				lkey = txq_mp2mr(txq, txq_mb2mp(buf));
				if (unlikely(lkey == (uint32_t)-1)) {
					/* MR does not exist. */
					DEBUG("%p: unable to get MP <-> MR"
					      " association", (void *)txq);
					/* Clean up TX element. */
					elt->buf = NULL;
					goto stop;
				}
#ifdef HAVE_VERBS_VLAN_INSERTION
				if (insert_vlan)
					err = txq->send_pending_vlan
						(txq->qp,
						 addr,
						 length,
						 lkey,
						 send_flags,
						 &buf->vlan_tci);
				else
#endif /* HAVE_VERBS_VLAN_INSERTION */
					err = txq->send_pending
						(txq->qp,
						 addr,
						 length,
						 lkey,
						 send_flags);
			}
			if (unlikely(err))
				goto stop;
#ifdef MLX5_PMD_SOFT_COUNTERS
			sent_size += length;
#endif
		} else {
#if MLX5_PMD_SGE_WR_N > 1
			struct ibv_sge sges[MLX5_PMD_SGE_WR_N];
			struct tx_burst_sg_ret ret;

			ret = tx_burst_sg(txq, segs, elt, buf, elts_head,
					  &sges);
			if (ret.length == (unsigned int)-1)
				goto stop;
			/* Put SG list into send queue. */
#ifdef HAVE_VERBS_VLAN_INSERTION
			if (insert_vlan)
				err = txq->send_pending_sg_list_vlan
					(txq->qp,
					 sges,
					 ret.num,
					 send_flags,
					 &buf->vlan_tci);
			else
#endif /* HAVE_VERBS_VLAN_INSERTION */
				err = txq->send_pending_sg_list
					(txq->qp,
					 sges,
					 ret.num,
					 send_flags);
			if (unlikely(err))
				goto stop;
#ifdef MLX5_PMD_SOFT_COUNTERS
			sent_size += ret.length;
#endif
#else /* MLX5_PMD_SGE_WR_N > 1 */
			DEBUG("%p: TX scattered buffers support not"
			      " compiled in", (void *)txq);
			goto stop;
#endif /* MLX5_PMD_SGE_WR_N > 1 */
		}
		elts_head = elts_head_next;
		buf = buf_next;
#ifdef MLX5_PMD_SOFT_COUNTERS
		/* Increment sent bytes counter. */
		txq->stats.obytes += sent_size;
#endif
	}
stop:
	/* Take a shortcut if nothing must be sent. */
	if (unlikely(i == 0))
		return 0;
#ifdef MLX5_PMD_SOFT_COUNTERS
	/* Increment sent packets counter. */
	txq->stats.opackets += i;
#endif
	/* Ring QP doorbell. */
	err = txq->send_flush(txq->qp);
	if (unlikely(err)) {
		/* A nonzero value is not supposed to be returned.
		 * Nothing can be done about it. */
		DEBUG("%p: send_flush() failed with error %d",
		      (void *)txq, err);
	}
	txq->elts_head = elts_head;
	txq->elts_comp += elts_comp;
	txq->elts_comp_cd = elts_comp_cd;
	return i;
}

/**
 * Translate RX completion flags to packet type.
 *
 * @param flags
 *   RX completion flags returned by poll_length_flags().
 *
 * @note: fix mlx5_dev_supported_ptypes_get() if any change here.
 *
 * @return
 *   Packet type for struct rte_mbuf.
 */
static inline uint32_t
rxq_cq_to_pkt_type(uint32_t flags)
{
	uint32_t pkt_type;

	if (flags & IBV_EXP_CQ_RX_TUNNEL_PACKET)
		pkt_type =
			TRANSPOSE(flags,
				  IBV_EXP_CQ_RX_OUTER_IPV4_PACKET,
				  RTE_PTYPE_L3_IPV4) |
			TRANSPOSE(flags,
				  IBV_EXP_CQ_RX_OUTER_IPV6_PACKET,
				  RTE_PTYPE_L3_IPV6) |
			TRANSPOSE(flags,
				  IBV_EXP_CQ_RX_IPV4_PACKET,
				  RTE_PTYPE_INNER_L3_IPV4) |
			TRANSPOSE(flags,
				  IBV_EXP_CQ_RX_IPV6_PACKET,
				  RTE_PTYPE_INNER_L3_IPV6);
	else
		pkt_type =
			TRANSPOSE(flags,
				  IBV_EXP_CQ_RX_IPV4_PACKET,
				  RTE_PTYPE_L3_IPV4) |
			TRANSPOSE(flags,
				  IBV_EXP_CQ_RX_IPV6_PACKET,
				  RTE_PTYPE_L3_IPV6);
	return pkt_type;
}