virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count) { struct vhost_virtqueue *vq; struct vring_desc *desc; struct rte_mbuf *buff; /* The virtio_hdr is initialised to 0. */ struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0,0,0,0,0,0},0}; uint64_t buff_addr = 0; uint64_t buff_hdr_addr = 0; uint32_t head[MAX_PKT_BURST], packet_len = 0; uint32_t head_idx, packet_success = 0; uint16_t avail_idx, res_cur_idx; uint16_t res_base_idx, res_end_idx; uint16_t free_entries; uint8_t success = 0; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh); vq = dev->virtqueue_rx; count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count; /* As many data cores may want access to available buffers, they need to be reserved. */ do { res_base_idx = vq->last_used_idx_res; avail_idx = *((volatile uint16_t *)&vq->avail->idx); free_entries = (avail_idx - res_base_idx); /*check that we have enough buffers*/ if (unlikely(count > free_entries)) count = free_entries; if (count == 0) return 0; res_end_idx = res_base_idx + count; /* vq->last_used_idx_res is atomically updated. */ success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx, res_end_idx); } while (unlikely(success == 0)); res_cur_idx = res_base_idx; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_end_idx); /* Prefetch available ring to retrieve indexes. */ rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]); /* Retrieve all of the head indexes first to avoid caching issues. */ for (head_idx = 0; head_idx < count; head_idx++) head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)]; /*Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); while (res_cur_idx != res_end_idx) { /* Get descriptor from available ring */ desc = &vq->desc[head[packet_success]]; /* Prefetch descriptor address. */ rte_prefetch0(desc); buff = pkts[packet_success]; /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */ buff_addr = gpa_to_vva(dev, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void*)(uintptr_t)buff_addr); { /* Copy virtio_hdr to packet and increment buffer address */ buff_hdr_addr = buff_addr; packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen; /* * If the descriptors are chained the header and data are placed in * separate buffers. */ if (desc->flags & VRING_DESC_F_NEXT) { desc->len = vq->vhost_hlen; desc = &vq->desc[desc->next]; /* Buffer address translation. */ buff_addr = gpa_to_vva(dev, desc->addr); desc->len = rte_pktmbuf_data_len(buff); } else { buff_addr += vq->vhost_hlen; desc->len = packet_len; } } /* Update used ring with desc information */ vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success]; vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len; /* Copy mbuf data to buffer */ rte_memcpy((void *)(uintptr_t)buff_addr, (const void*)buff->pkt.data, rte_pktmbuf_data_len(buff)); res_cur_idx++; packet_success++; /* mergeable is disabled then a header is required per buffer. */ rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void*)&virtio_hdr, vq->vhost_hlen); if (res_cur_idx < res_end_idx) { /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); } } rte_compiler_barrier(); /* Wait until it's our turn to add our buffer to the used ring. */ while (unlikely(vq->last_used_idx != res_base_idx)) rte_pause(); *(volatile uint16_t *)&vq->used->idx += count; vq->last_used_idx = res_end_idx; return count; }
virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count) { struct vhost_virtqueue *vq; uint32_t pkt_idx = 0, entry_success = 0; uint16_t avail_idx; uint16_t res_base_idx, res_cur_idx; uint8_t success = 0; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n", dev->device_fh); if (unlikely(queue_id != VIRTIO_RXQ)) { LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n"); } vq = dev->virtqueue[VIRTIO_RXQ]; count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); if (count == 0) return 0; for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen; do { /* * As many data cores may want access to available * buffers, they need to be reserved. */ uint32_t secure_len = 0; uint32_t vec_idx = 0; res_base_idx = vq->last_used_idx_res; res_cur_idx = res_base_idx; do { avail_idx = *((volatile uint16_t *)&vq->avail->idx); if (unlikely(res_cur_idx == avail_idx)) { LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Failed " "to get enough desc from " "vring\n", dev->device_fh); return pkt_idx; } else { update_secure_len(vq, res_cur_idx, &secure_len, &vec_idx); res_cur_idx++; } } while (pkt_len > secure_len); /* vq->last_used_idx_res is atomically updated. */ success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx, res_cur_idx); } while (success == 0); entry_success = copy_from_mbuf_to_vring(dev, res_base_idx, res_cur_idx, pkts[pkt_idx]); rte_compiler_barrier(); /* * Wait until it's our turn to add our buffer * to the used ring. */ while (unlikely(vq->last_used_idx != res_base_idx)) rte_pause(); *(volatile uint16_t *)&vq->used->idx += entry_success; vq->last_used_idx = res_cur_idx; /* flush used->idx update before we read avail->flags. */ rte_mb(); /* Kick the guest if necessary. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) eventfd_write((int)vq->callfd, 1); } return count; }
virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count) { struct vhost_virtqueue *vq; struct vring_desc *desc; struct rte_mbuf *buff; /* The virtio_hdr is initialised to 0. */ struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; uint64_t buff_addr = 0; uint64_t buff_hdr_addr = 0; uint32_t head[MAX_PKT_BURST]; uint32_t head_idx, packet_success = 0; uint16_t avail_idx, res_cur_idx; uint16_t res_base_idx, res_end_idx; uint16_t free_entries; uint8_t success = 0; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh); if (unlikely(queue_id != VIRTIO_RXQ)) { LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n"); return 0; } vq = dev->virtqueue[VIRTIO_RXQ]; count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count; /* * As many data cores may want access to available buffers, * they need to be reserved. */ do { res_base_idx = vq->last_used_idx_res; avail_idx = *((volatile uint16_t *)&vq->avail->idx); free_entries = (avail_idx - res_base_idx); /*check that we have enough buffers*/ if (unlikely(count > free_entries)) count = free_entries; if (count == 0) return 0; res_end_idx = res_base_idx + count; /* vq->last_used_idx_res is atomically updated. */ /* TODO: Allow to disable cmpset if no concurrency in application. */ success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx, res_end_idx); } while (unlikely(success == 0)); res_cur_idx = res_base_idx; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_end_idx); /* Prefetch available ring to retrieve indexes. */ rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]); /* Retrieve all of the head indexes first to avoid caching issues. */ for (head_idx = 0; head_idx < count; head_idx++) head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)]; /*Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); while (res_cur_idx != res_end_idx) { uint32_t offset = 0, vb_offset = 0; uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0; uint8_t hdr = 0, uncompleted_pkt = 0; /* Get descriptor from available ring */ desc = &vq->desc[head[packet_success]]; buff = pkts[packet_success]; /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */ buff_addr = gpa_to_vva(dev, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void *)(uintptr_t)buff_addr); /* Copy virtio_hdr to packet and increment buffer address */ buff_hdr_addr = buff_addr; /* * If the descriptors are chained the header and data are * placed in separate buffers. */ if ((desc->flags & VRING_DESC_F_NEXT) && (desc->len == vq->vhost_hlen)) { desc = &vq->desc[desc->next]; /* Buffer address translation. */ buff_addr = gpa_to_vva(dev, desc->addr); } else { vb_offset += vq->vhost_hlen; hdr = 1; } pkt_len = rte_pktmbuf_pkt_len(buff); data_len = rte_pktmbuf_data_len(buff); len_to_cpy = RTE_MIN(data_len, hdr ? desc->len - vq->vhost_hlen : desc->len); while (total_copied < pkt_len) { /* Copy mbuf data to buffer */ rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset), (const void *)(rte_pktmbuf_mtod(buff, const char *) + offset), len_to_cpy); PRINT_PACKET(dev, (uintptr_t)(buff_addr + vb_offset), len_to_cpy, 0); offset += len_to_cpy; vb_offset += len_to_cpy; total_copied += len_to_cpy; /* The whole packet completes */ if (total_copied == pkt_len) break; /* The current segment completes */ if (offset == data_len) { buff = buff->next; offset = 0; data_len = rte_pktmbuf_data_len(buff); } /* The current vring descriptor done */ if (vb_offset == desc->len) { if (desc->flags & VRING_DESC_F_NEXT) { desc = &vq->desc[desc->next]; buff_addr = gpa_to_vva(dev, desc->addr); vb_offset = 0; } else { /* Room in vring buffer is not enough */ uncompleted_pkt = 1; break; } } len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset); }; /* Update used ring with desc information */ vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success]; /* Drop the packet if it is uncompleted */ if (unlikely(uncompleted_pkt == 1)) vq->used->ring[res_cur_idx & (vq->size - 1)].len = vq->vhost_hlen; else vq->used->ring[res_cur_idx & (vq->size - 1)].len = pkt_len + vq->vhost_hlen; res_cur_idx++; packet_success++; if (unlikely(uncompleted_pkt == 1)) continue; rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen); PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1); if (res_cur_idx < res_end_idx) { /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); } } rte_compiler_barrier(); /* Wait until it's our turn to add our buffer to the used ring. */ while (unlikely(vq->last_used_idx != res_base_idx)) rte_pause(); *(volatile uint16_t *)&vq->used->idx += count; vq->last_used_idx = res_end_idx; /* flush used->idx update before we read avail->flags. */ rte_mb(); /* Kick the guest if necessary. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) eventfd_write((int)vq->callfd, 1); return count; }
dpdk_virtio_dev_to_vm_tx_burst(struct dpdk_virtio_writer *p, vr_dpdk_virtioq_t *vq, struct rte_mbuf **pkts, uint32_t count) { struct vring_desc *desc; struct rte_mbuf *buff; /* The virtio_hdr is initialised to 0. */ struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; uint64_t buff_addr = 0; uint64_t buff_hdr_addr = 0; uint32_t head[VR_DPDK_VIRTIO_TX_BURST_SZ]; uint32_t head_idx, packet_success = 0; uint16_t avail_idx, res_cur_idx; uint16_t res_base_idx, res_end_idx; uint16_t free_entries; uint8_t success = 0; vr_uvh_client_t *vru_cl; if (unlikely(vq->vdv_ready_state == VQ_NOT_READY)) return 0; vru_cl = vr_dpdk_virtio_get_vif_client(vq->vdv_vif_idx); if (unlikely(vru_cl == NULL)) return 0; /* * As many data cores may want access to available buffers, * they need to be reserved. */ do { res_base_idx = vq->vdv_last_used_idx_res; avail_idx = *((volatile uint16_t *)&vq->vdv_avail->idx); free_entries = (avail_idx - res_base_idx); /*check that we have enough buffers*/ if (unlikely(count > free_entries)) count = free_entries; if (unlikely(count == 0)) return 0; res_end_idx = res_base_idx + count; /* vq->vdv_last_used_idx_res is atomically updated. */ /* TODO: Allow to disable cmpset if no concurrency in application. */ success = rte_atomic16_cmpset(&vq->vdv_last_used_idx_res, res_base_idx, res_end_idx); } while (unlikely(success == 0)); res_cur_idx = res_base_idx; RTE_LOG(DEBUG, VROUTER, "%s: Current Index %d| End Index %d\n", __func__, res_cur_idx, res_end_idx); /* Prefetch available ring to retrieve indexes. */ rte_prefetch0(&vq->vdv_avail->ring[res_cur_idx & (vq->vdv_size - 1)]); /* Retrieve all of the head indexes first to avoid caching issues. */ for (head_idx = 0; head_idx < count; head_idx++) head[head_idx] = vq->vdv_avail->ring[(res_cur_idx + head_idx) & (vq->vdv_size - 1)]; /* Prefetch descriptor index. */ rte_prefetch0(&vq->vdv_desc[head[packet_success]]); while (res_cur_idx != res_end_idx) { uint32_t offset = 0, vb_offset = 0; uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0; uint8_t hdr = 0, uncompleted_pkt = 0; /* Get descriptor from available ring */ desc = &vq->vdv_desc[head[packet_success]]; buff = pkts[packet_success]; /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */ buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void *)(uintptr_t)buff_addr); /* Copy virtio_hdr to packet and increment buffer address */ buff_hdr_addr = buff_addr; /* * If the descriptors are chained the header and data are * placed in separate buffers. */ if (likely(desc->flags & VRING_DESC_F_NEXT) && (desc->len == sizeof(struct virtio_net_hdr))) { /* * TODO: verify that desc->next is sane below. */ desc = &vq->vdv_desc[desc->next]; /* Buffer address translation. */ buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr); } else { vb_offset += sizeof(struct virtio_net_hdr); hdr = 1; } pkt_len = rte_pktmbuf_pkt_len(buff); data_len = rte_pktmbuf_data_len(buff); len_to_cpy = RTE_MIN(data_len, hdr ? desc->len - sizeof(struct virtio_net_hdr) : desc->len); while (total_copied < pkt_len) { /* Copy mbuf data to buffer */ rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset), rte_pktmbuf_mtod_offset(buff, const void *, offset), len_to_cpy); offset += len_to_cpy; vb_offset += len_to_cpy; total_copied += len_to_cpy; /* The whole packet completes */ if (likely(total_copied == pkt_len)) break; /* The current segment completes */ if (offset == data_len) { buff = buff->next; offset = 0; data_len = rte_pktmbuf_data_len(buff); } /* The current vring descriptor done */ if (vb_offset == desc->len) { if (desc->flags & VRING_DESC_F_NEXT) { desc = &vq->vdv_desc[desc->next]; buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr); vb_offset = 0; } else { /* Room in vring buffer is not enough */ uncompleted_pkt = 1; break; } } len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset); }; /* Update used ring with desc information */ vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].id = head[packet_success]; /* Drop the packet if it is uncompleted */ if (unlikely(uncompleted_pkt == 1)) vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].len = sizeof(struct virtio_net_hdr); else vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].len = pkt_len + sizeof(struct virtio_net_hdr); res_cur_idx++; packet_success++; /* TODO: in DPDK 2.1 we do not copy the header if (unlikely(uncompleted_pkt == 1)) continue; */ rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, sizeof(struct virtio_net_hdr)); if (likely(res_cur_idx < res_end_idx)) { /* Prefetch descriptor index. */ rte_prefetch0(&vq->vdv_desc[head[packet_success]]); } } rte_compiler_barrier(); /* Wait until it's our turn to add our buffer to the used ring. */ while (unlikely(vq->vdv_last_used_idx != res_base_idx)) rte_pause(); *(volatile uint16_t *)&vq->vdv_used->idx += count; vq->vdv_last_used_idx = res_end_idx; RTE_LOG(DEBUG, VROUTER, "%s: vif %d vq %p last_used_idx %d used->idx %d\n", __func__, vq->vdv_vif_idx, vq, vq->vdv_last_used_idx, vq->vdv_used->idx); /* flush used->idx update before we read avail->flags. */ rte_mb(); /* Kick the guest if necessary. */ if (unlikely(!(vq->vdv_avail->flags & VRING_AVAIL_F_NO_INTERRUPT))) { p->nb_syscalls++; eventfd_write(vq->vdv_callfd, 1); } return count; }