/* test case to time the number of cycles to round-trip a cache line between * two cores and back again. */ static void time_cache_line_switch(void) { /* allocate a full cache line for data, we use only first byte of it */ uint64_t data[RTE_CACHE_LINE_SIZE*3 / sizeof(uint64_t)]; unsigned i, slaveid = rte_get_next_lcore(rte_lcore_id(), 0, 0); volatile uint64_t *pdata = &data[0]; *pdata = 1; rte_eal_remote_launch((lcore_function_t *)flip_bit, &data[0], slaveid); while (*pdata) rte_pause(); const uint64_t start_time = rte_rdtsc(); for (i = 0; i < (1 << ITER_POWER); i++) { while (*pdata) rte_pause(); *pdata = 1; } const uint64_t end_time = rte_rdtsc(); while (*pdata) rte_pause(); *pdata = 2; rte_eal_wait_lcore(slaveid); printf("==== Cache line switch test ===\n"); printf("Time for %u iterations = %"PRIu64" ticks\n", (1<<ITER_POWER), end_time-start_time); printf("Ticks per iteration = %"PRIu64"\n\n", (end_time-start_time) >> ITER_POWER); }
static int order_queue_worker(void *arg) { ORDER_WORKER_INIT; struct rte_event ev; while (t->err == false) { uint16_t event = rte_event_dequeue_burst(dev_id, port, &ev, 1, 0); if (!event) { if (rte_atomic64_read(outstand_pkts) <= 0) break; rte_pause(); continue; } if (ev.queue_id == 0) { /* from ordered queue */ order_queue_process_stage_0(&ev); while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) rte_pause(); } else if (ev.queue_id == 1) { /* from atomic queue */ order_process_stage_1(t, &ev, nb_flows, expected_flow_seq, outstand_pkts); } else { order_process_stage_invalid(t, &ev); } } return 0; }
static int perf_atq_worker(void *arg, const int enable_fwd_latency) { PERF_WORKER_INIT; struct rte_event ev; while (t->done == false) { uint16_t event = rte_event_dequeue_burst(dev, port, &ev, 1, 0); if (!event) { rte_pause(); continue; } if (enable_fwd_latency && !prod_timer_type) /* first stage in pipeline, mark ts to compute fwd latency */ atq_mark_fwd_latency(&ev); /* last stage in pipeline */ if (unlikely((ev.sub_event_type % nb_stages) == laststage)) { if (enable_fwd_latency) cnt = perf_process_last_stage_latency(pool, &ev, w, bufs, sz, cnt); else cnt = perf_process_last_stage(pool, &ev, w, bufs, sz, cnt); } else { atq_fwd_event(&ev, sched_type_list, nb_stages); while (rte_event_enqueue_burst(dev, port, &ev, 1) != 1) rte_pause(); } } return 0; }
void rte_distributor_request_pkt_v1705(struct rte_distributor *d, unsigned int worker_id, struct rte_mbuf **oldpkt, unsigned int count) { struct rte_distributor_buffer *buf = &(d->bufs[worker_id]); unsigned int i; volatile int64_t *retptr64; if (unlikely(d->alg_type == RTE_DIST_ALG_SINGLE)) { rte_distributor_request_pkt_v20(d->d_v20, worker_id, oldpkt[0]); return; } retptr64 = &(buf->retptr64[0]); /* Spin while handshake bits are set (scheduler clears it) */ while (unlikely(*retptr64 & RTE_DISTRIB_GET_BUF)) { rte_pause(); uint64_t t = rte_rdtsc()+100; while (rte_rdtsc() < t) rte_pause(); } /* * OK, if we've got here, then the scheduler has just cleared the * handshake bits. Populate the retptrs with returning packets. */ for (i = count; i < RTE_DIST_BURST_SIZE; i++) buf->retptr64[i] = 0; /* Set Return bit for each packet returned */ for (i = count; i-- > 0; ) buf->retptr64[i] = (((int64_t)(uintptr_t)(oldpkt[i])) << RTE_DISTRIB_FLAG_BITS) | RTE_DISTRIB_RETURN_BUF; /* * Finally, set the GET_BUF to signal to distributor that cache * line is ready for processing */ *retptr64 |= RTE_DISTRIB_GET_BUF; }
void rte_delay_us_block(unsigned int us) { const uint64_t start = rte_get_timer_cycles(); const uint64_t ticks = (uint64_t)us * rte_get_timer_hz() / 1E6; while ((rte_get_timer_cycles() - start) < ticks) rte_pause(); }
/** * @brief Pause for a requested time in ns */ void DPDKAdapter::StreamInfo::nPause() { const uint64_t start = rte_get_tsc_cycles(); while ((rte_get_tsc_cycles() - start) < ticksDelay_) { rte_pause(); } }
/* worker thread used for testing the time to do a round-trip of a cache * line between two cores and back again */ static void flip_bit(volatile uint64_t *arg) { uint64_t old_val = 0; while (old_val != 2) { while (!*arg) rte_pause(); old_val = *arg; *arg = 0; } }
static int perf_atq_worker_burst(void *arg, const int enable_fwd_latency) { PERF_WORKER_INIT; uint16_t i; /* +1 to avoid prefetch out of array check */ struct rte_event ev[BURST_SIZE + 1]; while (t->done == false) { uint16_t const nb_rx = rte_event_dequeue_burst(dev, port, ev, BURST_SIZE, 0); if (!nb_rx) { rte_pause(); continue; } for (i = 0; i < nb_rx; i++) { if (enable_fwd_latency && !prod_timer_type) { rte_prefetch0(ev[i+1].event_ptr); /* first stage in pipeline. * mark time stamp to compute fwd latency */ atq_mark_fwd_latency(&ev[i]); } /* last stage in pipeline */ if (unlikely((ev[i].sub_event_type % nb_stages) == laststage)) { if (enable_fwd_latency) cnt = perf_process_last_stage_latency( pool, &ev[i], w, bufs, sz, cnt); else cnt = perf_process_last_stage(pool, &ev[i], w, bufs, sz, cnt); ev[i].op = RTE_EVENT_OP_RELEASE; } else { atq_fwd_event(&ev[i], sched_type_list, nb_stages); } } uint16_t enq; enq = rte_event_enqueue_burst(dev, port, ev, nb_rx); while (enq < nb_rx) { enq += rte_event_enqueue_burst(dev, port, ev + enq, nb_rx - enq); } } return 0; }
static int test_misc(void) { char memdump[] = "memdump_test"; if (rte_bsf32(129)) FAIL("rte_bsf32"); rte_memdump(stdout, "test", memdump, sizeof(memdump)); rte_hexdump(stdout, "test", memdump, sizeof(memdump)); rte_pause(); return 0; }
static inline int perf_producer(void *arg) { struct prod_data *p = arg; struct test_perf *t = p->t; struct evt_options *opt = t->opt; const uint8_t dev_id = p->dev_id; const uint8_t port = p->port_id; struct rte_mempool *pool = t->pool; const uint64_t nb_pkts = t->nb_pkts; const uint32_t nb_flows = t->nb_flows; uint32_t flow_counter = 0; uint64_t count = 0; struct perf_elt *m; struct rte_event ev; if (opt->verbose_level > 1) printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__, rte_lcore_id(), dev_id, port, p->queue_id); ev.event = 0; ev.op = RTE_EVENT_OP_NEW; ev.queue_id = p->queue_id; ev.sched_type = t->opt->sched_type_list[0]; ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL; ev.event_type = RTE_EVENT_TYPE_CPU; ev.sub_event_type = 0; /* stage 0 */ while (count < nb_pkts && t->done == false) { if (rte_mempool_get(pool, (void **)&m) < 0) continue; ev.flow_id = flow_counter++ % nb_flows; ev.event_ptr = m; m->timestamp = rte_get_timer_cycles(); while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) { if (t->done) break; rte_pause(); m->timestamp = rte_get_timer_cycles(); } count++; } return 0; }
static int order_queue_worker_burst(void *arg) { ORDER_WORKER_INIT; struct rte_event ev[BURST_SIZE]; uint16_t i; while (t->err == false) { uint16_t const nb_rx = rte_event_dequeue_burst(dev_id, port, ev, BURST_SIZE, 0); if (nb_rx == 0) { if (rte_atomic64_read(outstand_pkts) <= 0) break; rte_pause(); continue; } for (i = 0; i < nb_rx; i++) { if (ev[i].queue_id == 0) { /* from ordered queue */ order_queue_process_stage_0(&ev[i]); } else if (ev[i].queue_id == 1) {/* from atomic queue */ order_process_stage_1(t, &ev[i], nb_flows, expected_flow_seq, outstand_pkts); ev[i].op = RTE_EVENT_OP_RELEASE; } else { order_process_stage_invalid(t, &ev[i]); } } uint16_t enq; enq = rte_event_enqueue_burst(dev_id, port, ev, nb_rx); while (enq < nb_rx) { enq += rte_event_enqueue_burst(dev_id, port, ev + enq, nb_rx - enq); } } return 0; }
/* * Remove a device from ovs_dpdk data path. Synchonization occurs through the * use of the lcore dev_removal_flag. Device is made volatile here to avoid * re-ordering of dev->remove=1 which can cause an infinite loop in the * rte_pause loop. */ static void destroy_device (volatile struct virtio_net *dev) { unsigned lcore; /* Remove device from ovs_dpdk port. */ if (vport_vhost_down((struct virtio_net*) dev) < 0) { RTE_LOG(INFO, APP, "Device could not be removed from ovs_dpdk port %s\n", dev->port_name); dev->flags &= ~VIRTIO_DEV_RUNNING; return; } /* Set the dev_removal_flag on each lcore. */ RTE_LCORE_FOREACH(lcore) { dev_removal_flag[lcore] = REQUEST_DEV_REMOVAL; } /* * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL we can be sure that * they can no longer access the device removed from the data path and that the devices * are no longer in use. */ RTE_LCORE_FOREACH(lcore) { while (dev_removal_flag[lcore] != ACK_DEV_REMOVAL) { rte_pause(); } } dev->flags &= ~VIRTIO_DEV_RUNNING; RTE_LOG(INFO, APP, "(%"PRIu64") Device has been removed from ovs_dpdk \ port %s\n", dev->device_fh, dev->port_name); }
virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count) { struct vhost_virtqueue *vq; struct vring_desc *desc; struct rte_mbuf *buff; /* The virtio_hdr is initialised to 0. */ struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0,0,0,0,0,0},0}; uint64_t buff_addr = 0; uint64_t buff_hdr_addr = 0; uint32_t head[MAX_PKT_BURST], packet_len = 0; uint32_t head_idx, packet_success = 0; uint16_t avail_idx, res_cur_idx; uint16_t res_base_idx, res_end_idx; uint16_t free_entries; uint8_t success = 0; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh); vq = dev->virtqueue_rx; count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count; /* As many data cores may want access to available buffers, they need to be reserved. */ do { res_base_idx = vq->last_used_idx_res; avail_idx = *((volatile uint16_t *)&vq->avail->idx); free_entries = (avail_idx - res_base_idx); /*check that we have enough buffers*/ if (unlikely(count > free_entries)) count = free_entries; if (count == 0) return 0; res_end_idx = res_base_idx + count; /* vq->last_used_idx_res is atomically updated. */ success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx, res_end_idx); } while (unlikely(success == 0)); res_cur_idx = res_base_idx; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_end_idx); /* Prefetch available ring to retrieve indexes. */ rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]); /* Retrieve all of the head indexes first to avoid caching issues. */ for (head_idx = 0; head_idx < count; head_idx++) head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)]; /*Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); while (res_cur_idx != res_end_idx) { /* Get descriptor from available ring */ desc = &vq->desc[head[packet_success]]; /* Prefetch descriptor address. */ rte_prefetch0(desc); buff = pkts[packet_success]; /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */ buff_addr = gpa_to_vva(dev, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void*)(uintptr_t)buff_addr); { /* Copy virtio_hdr to packet and increment buffer address */ buff_hdr_addr = buff_addr; packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen; /* * If the descriptors are chained the header and data are placed in * separate buffers. */ if (desc->flags & VRING_DESC_F_NEXT) { desc->len = vq->vhost_hlen; desc = &vq->desc[desc->next]; /* Buffer address translation. */ buff_addr = gpa_to_vva(dev, desc->addr); desc->len = rte_pktmbuf_data_len(buff); } else { buff_addr += vq->vhost_hlen; desc->len = packet_len; } } /* Update used ring with desc information */ vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success]; vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len; /* Copy mbuf data to buffer */ rte_memcpy((void *)(uintptr_t)buff_addr, (const void*)buff->pkt.data, rte_pktmbuf_data_len(buff)); res_cur_idx++; packet_success++; /* mergeable is disabled then a header is required per buffer. */ rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void*)&virtio_hdr, vq->vhost_hlen); if (res_cur_idx < res_end_idx) { /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); } } rte_compiler_barrier(); /* Wait until it's our turn to add our buffer to the used ring. */ while (unlikely(vq->last_used_idx != res_base_idx)) rte_pause(); *(volatile uint16_t *)&vq->used->idx += count; vq->last_used_idx = res_end_idx; return count; }
virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count) { struct vhost_virtqueue *vq; struct vring_desc *desc; struct rte_mbuf *buff; /* The virtio_hdr is initialised to 0. */ struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; uint64_t buff_addr = 0; uint64_t buff_hdr_addr = 0; uint32_t head[MAX_PKT_BURST]; uint32_t head_idx, packet_success = 0; uint16_t avail_idx, res_cur_idx; uint16_t res_base_idx, res_end_idx; uint16_t free_entries; uint8_t success = 0; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh); if (unlikely(queue_id != VIRTIO_RXQ)) { LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n"); return 0; } vq = dev->virtqueue[VIRTIO_RXQ]; count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count; /* * As many data cores may want access to available buffers, * they need to be reserved. */ do { res_base_idx = vq->last_used_idx_res; avail_idx = *((volatile uint16_t *)&vq->avail->idx); free_entries = (avail_idx - res_base_idx); /*check that we have enough buffers*/ if (unlikely(count > free_entries)) count = free_entries; if (count == 0) return 0; res_end_idx = res_base_idx + count; /* vq->last_used_idx_res is atomically updated. */ /* TODO: Allow to disable cmpset if no concurrency in application. */ success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx, res_end_idx); } while (unlikely(success == 0)); res_cur_idx = res_base_idx; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_end_idx); /* Prefetch available ring to retrieve indexes. */ rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]); /* Retrieve all of the head indexes first to avoid caching issues. */ for (head_idx = 0; head_idx < count; head_idx++) head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)]; /*Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); while (res_cur_idx != res_end_idx) { uint32_t offset = 0, vb_offset = 0; uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0; uint8_t hdr = 0, uncompleted_pkt = 0; /* Get descriptor from available ring */ desc = &vq->desc[head[packet_success]]; buff = pkts[packet_success]; /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */ buff_addr = gpa_to_vva(dev, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void *)(uintptr_t)buff_addr); /* Copy virtio_hdr to packet and increment buffer address */ buff_hdr_addr = buff_addr; /* * If the descriptors are chained the header and data are * placed in separate buffers. */ if ((desc->flags & VRING_DESC_F_NEXT) && (desc->len == vq->vhost_hlen)) { desc = &vq->desc[desc->next]; /* Buffer address translation. */ buff_addr = gpa_to_vva(dev, desc->addr); } else { vb_offset += vq->vhost_hlen; hdr = 1; } pkt_len = rte_pktmbuf_pkt_len(buff); data_len = rte_pktmbuf_data_len(buff); len_to_cpy = RTE_MIN(data_len, hdr ? desc->len - vq->vhost_hlen : desc->len); while (total_copied < pkt_len) { /* Copy mbuf data to buffer */ rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset), (const void *)(rte_pktmbuf_mtod(buff, const char *) + offset), len_to_cpy); PRINT_PACKET(dev, (uintptr_t)(buff_addr + vb_offset), len_to_cpy, 0); offset += len_to_cpy; vb_offset += len_to_cpy; total_copied += len_to_cpy; /* The whole packet completes */ if (total_copied == pkt_len) break; /* The current segment completes */ if (offset == data_len) { buff = buff->next; offset = 0; data_len = rte_pktmbuf_data_len(buff); } /* The current vring descriptor done */ if (vb_offset == desc->len) { if (desc->flags & VRING_DESC_F_NEXT) { desc = &vq->desc[desc->next]; buff_addr = gpa_to_vva(dev, desc->addr); vb_offset = 0; } else { /* Room in vring buffer is not enough */ uncompleted_pkt = 1; break; } } len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset); }; /* Update used ring with desc information */ vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success]; /* Drop the packet if it is uncompleted */ if (unlikely(uncompleted_pkt == 1)) vq->used->ring[res_cur_idx & (vq->size - 1)].len = vq->vhost_hlen; else vq->used->ring[res_cur_idx & (vq->size - 1)].len = pkt_len + vq->vhost_hlen; res_cur_idx++; packet_success++; if (unlikely(uncompleted_pkt == 1)) continue; rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen); PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1); if (res_cur_idx < res_end_idx) { /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); } } rte_compiler_barrier(); /* Wait until it's our turn to add our buffer to the used ring. */ while (unlikely(vq->last_used_idx != res_base_idx)) rte_pause(); *(volatile uint16_t *)&vq->used->idx += count; vq->last_used_idx = res_end_idx; /* flush used->idx update before we read avail->flags. */ rte_mb(); /* Kick the guest if necessary. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) eventfd_write((int)vq->callfd, 1); return count; }
virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count) { struct vhost_virtqueue *vq; uint32_t pkt_idx = 0, entry_success = 0; uint16_t avail_idx; uint16_t res_base_idx, res_cur_idx; uint8_t success = 0; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n", dev->device_fh); if (unlikely(queue_id != VIRTIO_RXQ)) { LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n"); } vq = dev->virtqueue[VIRTIO_RXQ]; count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); if (count == 0) return 0; for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen; do { /* * As many data cores may want access to available * buffers, they need to be reserved. */ uint32_t secure_len = 0; uint32_t vec_idx = 0; res_base_idx = vq->last_used_idx_res; res_cur_idx = res_base_idx; do { avail_idx = *((volatile uint16_t *)&vq->avail->idx); if (unlikely(res_cur_idx == avail_idx)) { LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Failed " "to get enough desc from " "vring\n", dev->device_fh); return pkt_idx; } else { update_secure_len(vq, res_cur_idx, &secure_len, &vec_idx); res_cur_idx++; } } while (pkt_len > secure_len); /* vq->last_used_idx_res is atomically updated. */ success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx, res_cur_idx); } while (success == 0); entry_success = copy_from_mbuf_to_vring(dev, res_base_idx, res_cur_idx, pkts[pkt_idx]); rte_compiler_barrier(); /* * Wait until it's our turn to add our buffer * to the used ring. */ while (unlikely(vq->last_used_idx != res_base_idx)) rte_pause(); *(volatile uint16_t *)&vq->used->idx += entry_success; vq->last_used_idx = res_cur_idx; /* flush used->idx update before we read avail->flags. */ rte_mb(); /* Kick the guest if necessary. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) eventfd_write((int)vq->callfd, 1); } return count; }
dpdk_virtio_dev_to_vm_tx_burst(struct dpdk_virtio_writer *p, vr_dpdk_virtioq_t *vq, struct rte_mbuf **pkts, uint32_t count) { struct vring_desc *desc; struct rte_mbuf *buff; /* The virtio_hdr is initialised to 0. */ struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; uint64_t buff_addr = 0; uint64_t buff_hdr_addr = 0; uint32_t head[VR_DPDK_VIRTIO_TX_BURST_SZ]; uint32_t head_idx, packet_success = 0; uint16_t avail_idx, res_cur_idx; uint16_t res_base_idx, res_end_idx; uint16_t free_entries; uint8_t success = 0; vr_uvh_client_t *vru_cl; if (unlikely(vq->vdv_ready_state == VQ_NOT_READY)) return 0; vru_cl = vr_dpdk_virtio_get_vif_client(vq->vdv_vif_idx); if (unlikely(vru_cl == NULL)) return 0; /* * As many data cores may want access to available buffers, * they need to be reserved. */ do { res_base_idx = vq->vdv_last_used_idx_res; avail_idx = *((volatile uint16_t *)&vq->vdv_avail->idx); free_entries = (avail_idx - res_base_idx); /*check that we have enough buffers*/ if (unlikely(count > free_entries)) count = free_entries; if (unlikely(count == 0)) return 0; res_end_idx = res_base_idx + count; /* vq->vdv_last_used_idx_res is atomically updated. */ /* TODO: Allow to disable cmpset if no concurrency in application. */ success = rte_atomic16_cmpset(&vq->vdv_last_used_idx_res, res_base_idx, res_end_idx); } while (unlikely(success == 0)); res_cur_idx = res_base_idx; RTE_LOG(DEBUG, VROUTER, "%s: Current Index %d| End Index %d\n", __func__, res_cur_idx, res_end_idx); /* Prefetch available ring to retrieve indexes. */ rte_prefetch0(&vq->vdv_avail->ring[res_cur_idx & (vq->vdv_size - 1)]); /* Retrieve all of the head indexes first to avoid caching issues. */ for (head_idx = 0; head_idx < count; head_idx++) head[head_idx] = vq->vdv_avail->ring[(res_cur_idx + head_idx) & (vq->vdv_size - 1)]; /* Prefetch descriptor index. */ rte_prefetch0(&vq->vdv_desc[head[packet_success]]); while (res_cur_idx != res_end_idx) { uint32_t offset = 0, vb_offset = 0; uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0; uint8_t hdr = 0, uncompleted_pkt = 0; /* Get descriptor from available ring */ desc = &vq->vdv_desc[head[packet_success]]; buff = pkts[packet_success]; /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */ buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void *)(uintptr_t)buff_addr); /* Copy virtio_hdr to packet and increment buffer address */ buff_hdr_addr = buff_addr; /* * If the descriptors are chained the header and data are * placed in separate buffers. */ if (likely(desc->flags & VRING_DESC_F_NEXT) && (desc->len == sizeof(struct virtio_net_hdr))) { /* * TODO: verify that desc->next is sane below. */ desc = &vq->vdv_desc[desc->next]; /* Buffer address translation. */ buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr); } else { vb_offset += sizeof(struct virtio_net_hdr); hdr = 1; } pkt_len = rte_pktmbuf_pkt_len(buff); data_len = rte_pktmbuf_data_len(buff); len_to_cpy = RTE_MIN(data_len, hdr ? desc->len - sizeof(struct virtio_net_hdr) : desc->len); while (total_copied < pkt_len) { /* Copy mbuf data to buffer */ rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset), rte_pktmbuf_mtod_offset(buff, const void *, offset), len_to_cpy); offset += len_to_cpy; vb_offset += len_to_cpy; total_copied += len_to_cpy; /* The whole packet completes */ if (likely(total_copied == pkt_len)) break; /* The current segment completes */ if (offset == data_len) { buff = buff->next; offset = 0; data_len = rte_pktmbuf_data_len(buff); } /* The current vring descriptor done */ if (vb_offset == desc->len) { if (desc->flags & VRING_DESC_F_NEXT) { desc = &vq->vdv_desc[desc->next]; buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr); vb_offset = 0; } else { /* Room in vring buffer is not enough */ uncompleted_pkt = 1; break; } } len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset); }; /* Update used ring with desc information */ vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].id = head[packet_success]; /* Drop the packet if it is uncompleted */ if (unlikely(uncompleted_pkt == 1)) vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].len = sizeof(struct virtio_net_hdr); else vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].len = pkt_len + sizeof(struct virtio_net_hdr); res_cur_idx++; packet_success++; /* TODO: in DPDK 2.1 we do not copy the header if (unlikely(uncompleted_pkt == 1)) continue; */ rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, sizeof(struct virtio_net_hdr)); if (likely(res_cur_idx < res_end_idx)) { /* Prefetch descriptor index. */ rte_prefetch0(&vq->vdv_desc[head[packet_success]]); } } rte_compiler_barrier(); /* Wait until it's our turn to add our buffer to the used ring. */ while (unlikely(vq->vdv_last_used_idx != res_base_idx)) rte_pause(); *(volatile uint16_t *)&vq->vdv_used->idx += count; vq->vdv_last_used_idx = res_end_idx; RTE_LOG(DEBUG, VROUTER, "%s: vif %d vq %p last_used_idx %d used->idx %d\n", __func__, vq->vdv_vif_idx, vq, vq->vdv_last_used_idx, vq->vdv_used->idx); /* flush used->idx update before we read avail->flags. */ rte_mb(); /* Kick the guest if necessary. */ if (unlikely(!(vq->vdv_avail->flags & VRING_AVAIL_F_NO_INTERRUPT))) { p->nb_syscalls++; eventfd_write(vq->vdv_callfd, 1); } return count; }
/* * Write scattered channel packet to TX bufring. * * The offset of this channel packet is written as a 64bits value * immediately after this channel packet. * * The write goes through three stages: * 1. Reserve space in ring buffer for the new data. * Writer atomically moves priv_write_index. * 2. Copy the new data into the ring. * 3. Update the tail of the ring (visible to host) that indicates * next read location. Writer updates write_index */ int vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen, bool *need_sig) { struct vmbus_bufring *vbr = tbr->vbr; uint32_t ring_size = tbr->dsize; uint32_t old_windex, next_windex, windex, total; uint64_t save_windex; int i; total = 0; for (i = 0; i < iovlen; i++) total += iov[i].iov_len; total += sizeof(save_windex); /* Reserve space in ring */ do { uint32_t avail; /* Get current free location */ old_windex = tbr->windex; /* Prevent compiler reordering this with calculation */ rte_compiler_barrier(); avail = vmbus_br_availwrite(tbr, old_windex); /* If not enough space in ring, then tell caller. */ if (avail <= total) return -EAGAIN; next_windex = vmbus_br_idxinc(old_windex, total, ring_size); /* Atomic update of next write_index for other threads */ } while (!rte_atomic32_cmpset(&tbr->windex, old_windex, next_windex)); /* Space from old..new is now reserved */ windex = old_windex; for (i = 0; i < iovlen; i++) { windex = vmbus_txbr_copyto(tbr, windex, iov[i].iov_base, iov[i].iov_len); } /* Set the offset of the current channel packet. */ save_windex = ((uint64_t)old_windex) << 32; windex = vmbus_txbr_copyto(tbr, windex, &save_windex, sizeof(save_windex)); /* The region reserved should match region used */ RTE_ASSERT(windex == next_windex); /* Ensure that data is available before updating host index */ rte_smp_wmb(); /* Checkin for our reservation. wait for our turn to update host */ while (!rte_atomic32_cmpset(&vbr->windex, old_windex, next_windex)) rte_pause(); /* If host had read all data before this, then need to signal */ *need_sig |= vmbus_txbr_need_signal(tbr, old_windex); return 0; }