/* * move up to 'limit' pkts from rxring to txring swapping buffers. */ static int process_rings(struct netmap_ring *rxring, struct netmap_ring *txring, u_int limit, const char *msg) { u_int j, k, m = 0; /* print a warning if any of the ring flags is set (e.g. NM_REINIT) */ if (rxring->flags || txring->flags) D("%s rxflags %x txflags %x", msg, rxring->flags, txring->flags); j = rxring->cur; /* RX */ k = txring->cur; /* TX */ m = nm_ring_space(rxring); if (m < limit) limit = m; m = nm_ring_space(txring); if (m < limit) limit = m; m = limit; while (limit-- > 0) { struct netmap_slot *rs = &rxring->slot[j]; struct netmap_slot *ts = &txring->slot[k]; /* swap packets */ if (ts->buf_idx < 2 || rs->buf_idx < 2) { D("wrong index rx[%d] = %d -> tx[%d] = %d", j, rs->buf_idx, k, ts->buf_idx); sleep(2); } /* copy the packet length. */ if (rs->len > 2048) { D("wrong len %d rx[%d] -> tx[%d]", rs->len, j, k); rs->len = 0; } else if (verbose > 1) { D("%s send len %d rx[%d] -> tx[%d]", msg, rs->len, j, k); } ts->len = rs->len; if (zerocopy) { uint32_t pkt = ts->buf_idx; ts->buf_idx = rs->buf_idx; rs->buf_idx = pkt; /* report the buffer change. */ ts->flags |= NS_BUF_CHANGED; rs->flags |= NS_BUF_CHANGED; } else { char *rxbuf = NETMAP_BUF(rxring, rs->buf_idx); char *txbuf = NETMAP_BUF(txring, ts->buf_idx); nm_pkt_copy(rxbuf, txbuf, ts->len); } j = nm_ring_next(rxring, j); k = nm_ring_next(txring, k); } rxring->head = rxring->cur = j; txring->head = txring->cur = k; if (verbose && m > 0) D("%s sent %d packets to %p", msg, m, txring); return (m); }
/* Mimics netmap's nm_nextpkt function. This version does not * release buffer in order to avoid extra copies. This has the * disadvantage of potentially holding up the ring if service times * are not equal. * * TODO: explore swapping buffers with a tx and rx ring with a 3rd buffer. * This may prevent packets with exceptionally long service time from * holding up the entire ring... */ static u_char* fp_netmap_nextpkt(struct nm_desc* d, struct nm_pkthdr* hdr) { int ring_i = d->cur_rx_ring; do { /* compute current ring to use */ struct netmap_ring* ring = NETMAP_RXRING(d->nifp, ring_i); if (!nm_ring_empty(ring)) { u_int buf_i = ring->cur; u_int buf_idx = ring->slot[buf_i].buf_idx; u_char* buf_ptr = (u_char*)NETMAP_BUF(ring, buf_idx); // __builtin_prefetch(buf); hdr->ts = ring->ts; hdr->len = hdr->caplen = ring->slot[buf_i].len; ring->cur = nm_ring_next(ring, buf_i); /* we could postpone advancing head if we want * to hold the buffer. This can be supported in * the future. */ // ring->head = ring->cur; d->cur_rx_ring = ring_i; return buf_ptr; } ring_i++; if (ring_i > d->last_rx_ring) ring_i = d->first_rx_ring; } while (ring_i != d->cur_rx_ring); return NULL; /* nothing found */ }
static int receive_packets(struct netmap_ring *ring, u_int limit, int dump) { struct netmap_slot *slot; u_int cur, rx, n; u_char *p; #ifdef TIME_NANO clock_gettime(CLOCK_REALTIME, &g_time); #else gettimeofday(&g_time, 0); #endif cur = ring->cur; n = nm_ring_space(ring); if (n < limit) limit = n; for (rx = 0; rx < limit; rx++) { slot = &ring->slot[cur]; p = (u_char*)NETMAP_BUF(ring, slot->buf_idx); eth_input(p, slot->len); cur = nm_ring_next(ring, cur); } ring->head = ring->cur = cur; return (rx); }
/* * Populate the pkt batch from the netmap array. * * returns the number of entries; and sets nj to the next ring entry. */ static int pkt_create_queue(struct netmap_ring *ring, struct pkt *pq, int limit, u_int *nj) { int i, n; u_int j; j = *nj; /* Only do batches of MAXPKT */ if (limit > MAXPKT) limit = MAXPKT; for (n = 0, i = 0; i < limit; i++) { struct netmap_slot *rs = &ring->slot[j]; pq[i].buf = NETMAP_BUF(ring, rs->buf_idx); pq[i].len = rs->len; pq[i].iphdr = NULL; pq[i].slotid = j; j = nm_ring_next(ring, j); n++; } *nj = j; return (n); }
static void receiver(lua_State *L, int cb_ref, struct nm_desc *d, unsigned int ring_id) { struct pollfd fds; struct netmap_ring *ring; unsigned int i, len; char *buf; time_t now; int pps; now = time(NULL); pps = 0; while (1) { fds.fd = d->fd; fds.events = POLLIN; int r = poll(&fds, 1, 1000); if (r < 0) { if (errno != EINTR) { perror("poll()"); exit(3); } } if (time(NULL) > now) { printf("[+] receiving %d pps\n", pps); pps = 0; now = time(NULL); } ring = NETMAP_RXRING(d->nifp, ring_id); while (!nm_ring_empty(ring)) { i = ring->cur; buf = NETMAP_BUF(ring, ring->slot[i].buf_idx); len = ring->slot[i].len; pps++; if (filter_packet(L, cb_ref, buf, len)) { // forward packet to kernel ring->flags |= NR_FORWARD; ring->slot[i].flags |= NS_FORWARD; printf("+++ PASS\n"); } else { // drop packet printf("--- DROP\n"); } ring->head = ring->cur = nm_ring_next(ring, i); } } }
/* * Note: this thread is the only one pulling packets off of any * given netmap instance */ static void * receiver(void *arg) { struct virtif_user *viu = arg; struct iovec iov; struct netmap_if *nifp = viu->nm_nifp; struct netmap_ring *ring = NETMAP_RXRING(nifp, 0); struct netmap_slot *slot; struct pollfd pfd; int prv; rumpuser_component_kthread(); for (;;) { pfd.fd = viu->viu_fd; pfd.events = POLLIN; if (viu->viu_dying) { break; } prv = 0; while (nm_ring_empty(ring) && prv == 0) { DPRINTF(("receive pkt via netmap\n")); prv = poll(&pfd, 1, 1000); if (prv > 0 || (prv < 0 && errno != EAGAIN)) break; } #if 0 /* XXX: report non-transient errors */ if (ring->avail == 0) { rv = errno; break; } #endif slot = &ring->slot[ring->cur]; DPRINTF(("got pkt of size %d\n", slot->len)); iov.iov_base = NETMAP_BUF(ring, slot->buf_idx); iov.iov_len = slot->len; /* XXX: allow batch processing */ rumpuser_component_schedule(NULL); VIF_DELIVERPKT(viu->viu_virtifsc, &iov, 1); rumpuser_component_unschedule(); ring->head = ring->cur = nm_ring_next(ring, ring->cur); } rumpuser_component_kthread_release(); return NULL; }
void VIFHYPER_SEND(struct virtif_user *viu, struct iovec *iov, size_t iovlen) { void *cookie = NULL; /* XXXgcc */ struct netmap_if *nifp = viu->nm_nifp; struct netmap_ring *ring = NETMAP_TXRING(nifp, 0); char *p; int retries; int unscheduled = 0; unsigned n; DPRINTF(("sending pkt via netmap len %d\n", (int)iovlen)); for (retries = 10; !(n = nm_ring_space(ring)) && retries > 0; retries--) { struct pollfd pfd; if (!unscheduled) { cookie = rumpuser_component_unschedule(); unscheduled = 1; } pfd.fd = viu->viu_fd; pfd.events = POLLOUT; DPRINTF(("cannot send on netmap, ring full\n")); (void)poll(&pfd, 1, 500 /* ms */); } if (n > 0) { int i, totlen = 0; struct netmap_slot *slot = &ring->slot[ring->cur]; #define MAX_BUF_SIZE 1900 p = NETMAP_BUF(ring, slot->buf_idx); for (i = 0; totlen < MAX_BUF_SIZE && i < iovlen; i++) { int n = iov[i].iov_len; if (totlen + n > MAX_BUF_SIZE) { n = MAX_BUF_SIZE - totlen; DPRINTF(("truncating long pkt")); } memcpy(p + totlen, iov[i].iov_base, n); totlen += n; } #undef MAX_BUF_SIZE slot->len = totlen; ring->head = ring->cur = nm_ring_next(ring, ring->cur); if (ioctl(viu->viu_fd, NIOCTXSYNC, NULL) < 0) perror("NIOCTXSYNC"); } if (unscheduled) rumpuser_component_schedule(cookie); }
static int test_send(struct netmap_ring *ring, usn_mbuf_t *m, u_int count) { u_int n, sent, cur = ring->cur; n = nm_ring_space(ring); if (n < count) count = n; for (sent = 0; sent < count; sent++) { struct netmap_slot *slot = &ring->slot[cur]; char *p = NETMAP_BUF(ring, slot->buf_idx); nm_pkt_copy(m->head, p, m->mlen); slot->len = m->mlen; cur = nm_ring_next(ring, cur); } ring->head = ring->cur = cur; return (sent); }
static __inline int pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt) { int len = 0; int i = 0; int r; for (r = nmd->cur_rx_ring; ; ) { struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r); uint32_t cur, idx; char *buf; size_t left; if (nm_ring_empty(ring)) { r++; if (r > nmd->last_rx_ring) r = nmd->first_rx_ring; if (r == nmd->cur_rx_ring) break; continue; } cur = ring->cur; idx = ring->slot[cur].buf_idx; buf = NETMAP_BUF(ring, idx); left = ring->slot[cur].len; for (i = 0; i < iovcnt && left > 0; i++) { if (iov[i].iov_len > left) iov[i].iov_len = left; memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len); len += iov[i].iov_len; left -= iov[i].iov_len; } ring->head = ring->cur = nm_ring_next(ring, cur); nmd->cur_rx_ring = r; ioctl(nmd->fd, NIOCRXSYNC, NULL); break; } for (; i < iovcnt; i++) iov[i].iov_len = 0; return (len); }
int receive_packets(struct netmap_ring *ring) { u_int cur, rx, n; cur = ring->cur; n = nm_ring_space(ring); for (rx = 0; rx < n; rx++) { struct netmap_slot *slot = &ring->slot[cur]; char *p = NETMAP_BUF(ring, slot->buf_idx); // process data consume_pkt((u_char*)p, slot->len); cur = nm_ring_next(ring, cur); } ring->head = ring->cur = cur; return (rx); }
static void netmap_send(void *opaque) { NetmapState *s = opaque; struct netmap_ring *ring = s->rx; /* Keep sending while there are available packets into the netmap RX ring and the forwarding path towards the peer is open. */ while (!nm_ring_empty(ring)) { uint32_t i; uint32_t idx; bool morefrag; int iovcnt = 0; int iovsize; do { i = ring->cur; idx = ring->slot[i].buf_idx; morefrag = (ring->slot[i].flags & NS_MOREFRAG); s->iov[iovcnt].iov_base = (u_char *)NETMAP_BUF(ring, idx); s->iov[iovcnt].iov_len = ring->slot[i].len; iovcnt++; ring->cur = ring->head = nm_ring_next(ring, i); } while (!nm_ring_empty(ring) && morefrag); if (unlikely(nm_ring_empty(ring) && morefrag)) { RD(5, "[netmap_send] ran out of slots, with a pending" "incomplete packet\n"); } iovsize = qemu_sendv_packet_async(&s->nc, s->iov, iovcnt, netmap_send_completed); if (iovsize == 0) { /* The peer does not receive anymore. Packet is queued, stop * reading from the backend until netmap_send_completed() */ netmap_read_poll(s, false); break; } } }
static void pkt_fwd_packets(struct thr_ctx *th, struct netmap_ring *rxring, struct netmap_ring *txring, struct pkt *pq, int n, u_int *txidx) { struct netmap_slot *rs; struct netmap_slot *ts; int i; u_int t; t = *txidx; /* Flip or copy packets to tx, incr txidx as appropriate */ for (i = 0; i < n; i++) { /* Don't forward if we're not supposed to */ if (! pq[i].fwd) continue; rs = &rxring->slot[pq[i].slotid]; ts = &txring->slot[t]; ts->len = rs->len; if (th->zerocopy) { uint32_t pkt = ts->buf_idx; ts->buf_idx = rs->buf_idx; rs->buf_idx = pkt; /* report the buffer change. */ ts->flags |= NS_BUF_CHANGED; rs->flags |= NS_BUF_CHANGED; } else { char *rxbuf = NETMAP_BUF(rxring, rs->buf_idx); char *txbuf = NETMAP_BUF(txring, ts->buf_idx); nm_pkt_copy(rxbuf, txbuf, ts->len); } /* Increment the transmit ring */ t = nm_ring_next(txring, t); } *txidx = t; }
//#define NO_SWAP static int process_rings(struct netmap_ring *rxring, u_int limit) { u_int j,m = 0; /* print a warning if any of the ring flags is set (e.g. NM_REINIT) */ j = rxring->cur; /* RX */ m = nm_ring_space(rxring); if (m < limit) limit = m; m = 0; while (limit-- > 0) { m ++; j = nm_ring_next(rxring, j); } rxring->head = rxring->cur = j; return (m); }
static int receive_packets(struct netmap_ring *ring, u_int limit, int dump) { u_int cur, rx, n; cur = ring->cur; n = nm_ring_space(ring); if (n < limit) limit = n; for (rx = 0; rx < limit; rx++) { struct netmap_slot *slot = &ring->slot[cur]; char *p = NETMAP_BUF(ring, slot->buf_idx); if (dump) dump_payload(p, slot->len, ring, cur); cur = nm_ring_next(ring, cur); } ring->head = ring->cur = cur; return (rx); }
static ssize_t netmap_receive(NetClientState *nc, const uint8_t *buf, size_t size) { NetmapState *s = DO_UPCAST(NetmapState, nc, nc); struct netmap_ring *ring = s->tx; uint32_t i; uint32_t idx; uint8_t *dst; if (unlikely(!ring)) { /* Drop. */ return size; } if (unlikely(size > ring->nr_buf_size)) { RD(5, "[netmap_receive] drop packet of size %d > %d\n", (int)size, ring->nr_buf_size); return size; } if (nm_ring_empty(ring)) { /* No available slots in the netmap TX ring. */ netmap_write_poll(s, true); return 0; } i = ring->cur; idx = ring->slot[i].buf_idx; dst = (uint8_t *)NETMAP_BUF(ring, idx); ring->slot[i].len = size; ring->slot[i].flags = 0; pkt_copy(buf, dst, size); ring->cur = ring->head = nm_ring_next(ring, i); ioctl(s->nmd->fd, NIOCTXSYNC, NULL); return size; }
static __inline int pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt) { int r, i; int len = 0; for (r = nmd->cur_tx_ring; ; ) { struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r); uint32_t cur, idx; char *buf; if (nm_ring_empty(ring)) { r++; if (r > nmd->last_tx_ring) r = nmd->first_tx_ring; if (r == nmd->cur_tx_ring) break; continue; } cur = ring->cur; idx = ring->slot[cur].buf_idx; buf = NETMAP_BUF(ring, idx); for (i = 0; i < iovcnt; i++) { if (len + iov[i].iov_len > 2048) break; memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len); len += iov[i].iov_len; } ring->slot[cur].len = len; ring->head = ring->cur = nm_ring_next(ring, cur); nmd->cur_tx_ring = r; ioctl(nmd->fd, NIOCTXSYNC, NULL); break; } return (len); }
/*----------------------------------------------------------------------------*/ int32_t netmap_recv_pkts(struct mtcp_thread_context *ctxt, int ifidx) { struct netmap_private_context *npc; struct nm_desc *d; npc = (struct netmap_private_context *)ctxt->io_private_context; d = npc->local_nmd[ifidx]; int p = 0; int c, got = 0, ri = d->cur_rx_ring; int n = d->last_rx_ring - d->first_rx_ring + 1; int cnt = MAX_PKT_BURST; for (c = 0; c < n && cnt != got && npc->dev_poll_flag[ifidx]; c++) { /* compute current ring to use */ struct netmap_ring *ring; ri = d->cur_rx_ring + c; if (ri > d->last_rx_ring) ri = d->first_rx_ring; ring = NETMAP_RXRING(d->nifp, ri); for ( ; !nm_ring_empty(ring) && cnt != got; got++) { u_int i = ring->cur; u_int idx = ring->slot[i].buf_idx; npc->rcv_pktbuf[p] = (u_char *)NETMAP_BUF(ring, idx); npc->rcv_pkt_len[p] = ring->slot[i].len; p++; ring->head = ring->cur = nm_ring_next(ring, i); } } d->cur_rx_ring = ri; npc->dev_poll_flag[ifidx] = 0; return p; }
/* * txq[] has a batch of n packets that possibly need to be forwarded. */ int netmap_fwd(struct my_netmap_port *port) { u_int dr; /* destination ring */ u_int i = 0; const u_int n = port->cur_txq; /* how many queued packets */ struct txq_entry *x = port->q; int retry = 5; /* max retries */ struct nm_desc *dst = port->d; if (n == 0) { D("nothing to forward to %s", port->ifp.if_xname); return 0; } again: /* scan all output rings; dr is the destination ring index */ for (dr = dst->first_tx_ring; i < n && dr <= dst->last_tx_ring; dr++) { struct netmap_ring *ring = NETMAP_TXRING(dst->nifp, dr); __builtin_prefetch(ring); if (nm_ring_empty(ring)) continue; /* * We have different ways to transfer from src->dst * * src dst Now Eventually (not done) * * PHYS PHYS buf swap * PHYS VIRT NS_INDIRECT * VIRT PHYS copy NS_INDIRECT * VIRT VIRT NS_INDIRECT * MBUF PHYS copy NS_INDIRECT * MBUF VIRT NS_INDIRECT * * The "eventually" depends on implementing NS_INDIRECT * on physical device drivers. * Note we do not yet differentiate PHYS/VIRT. */ for (; i < n && !nm_ring_empty(ring); i++) { struct netmap_slot *dst, *src; dst = &ring->slot[ring->cur]; if (x[i].flags == TXQ_IS_SLOT) { struct netmap_ring *sr = x[i].ring_or_mbuf; src = &sr->slot[x[i].slot_idx]; dst->len = src->len; if (port->can_swap_bufs) { ND("pkt %d len %d", i, src->len); u_int tmp = dst->buf_idx; dst->flags = src->flags = NS_BUF_CHANGED; dst->buf_idx = src->buf_idx; src->buf_idx = tmp; } else if (port->peer->allocator_id == 1) { // no indirect nm_pkt_copy(NETMAP_BUF(sr, src->buf_idx), NETMAP_BUF(ring, dst->buf_idx), dst->len); } else { dst->ptr = (uintptr_t)NETMAP_BUF(sr, src->buf_idx); dst->flags = NS_INDIRECT; } } else if (x[i].flags == TXQ_IS_MBUF) { struct mbuf *m = (void *)x[i].ring_or_mbuf; ND("copy from mbuf"); dst->len = m->__m_extlen; nm_pkt_copy(m->__m_extbuf, NETMAP_BUF(ring, dst->buf_idx), dst->len); FREE_PKT(m); } else { panic("bad slot"); } x[i].flags = 0; ring->head = ring->cur = nm_ring_next(ring, ring->cur); } } if (i < n) { if (retry-- > 0) { ioctl(port->d->fd, NIOCTXSYNC); goto again; } RD(1, "%d buffers leftover", n - i); for (;i < n; i++) { if (x[i].flags == TXQ_IS_MBUF) { FREE_PKT(x[i].ring_or_mbuf); } } } port->cur_txq = 0; return 0; }
/* push the packet described by slot rs to the group g. * This may cause other buffers to be pushed down the * chain headed by g. * Return a free buffer. */ uint32_t forward_packet(struct group_des *g, struct netmap_slot *rs) { uint32_t hash = rs->ptr; uint32_t output_port = hash % g->nports; struct port_des *port = &g->ports[output_port]; struct netmap_ring *ring = port->ring; struct overflow_queue *q = port->oq; /* Move the packet to the output pipe, unless there is * either no space left on the ring, or there is some * packet still in the overflow queue (since those must * take precedence over the new one) */ if (nm_ring_space(ring) && (q == NULL || oq_empty(q))) { struct netmap_slot *ts = &ring->slot[ring->cur]; struct netmap_slot old_slot = *ts; uint32_t free_buf; ts->buf_idx = rs->buf_idx; ts->len = rs->len; ts->flags |= NS_BUF_CHANGED; ts->ptr = rs->ptr; ring->head = ring->cur = nm_ring_next(ring, ring->cur); port->ctr.pkts++; forwarded++; if (old_slot.ptr && !g->last) { /* old slot not empty and we are not the last group: * push it further down the chain */ free_buf = forward_packet(g + 1, &old_slot); } else { /* just return the old slot buffer: it is * either empty or already seen by everybody */ free_buf = old_slot.buf_idx; } return free_buf; } /* use the overflow queue, if available */ if (q == NULL || oq_full(q)) { /* no space left on the ring and no overflow queue * available: we are forced to drop the packet */ dropped++; port->ctr.drop++; return rs->buf_idx; } oq_enq(q, rs); /* * we cannot continue down the chain and we need to * return a free buffer now. We take it from the free queue. */ if (oq_empty(freeq)) { /* the free queue is empty. Revoke some buffers * from the longest overflow queue */ uint32_t j; struct port_des *lp = &ports[0]; uint32_t max = lp->oq->n; /* let lp point to the port with the longest queue */ for (j = 1; j < glob_arg.output_rings; j++) { struct port_des *cp = &ports[j]; if (cp->oq->n > max) { lp = cp; max = cp->oq->n; } } /* move the oldest BUF_REVOKE buffers from the * lp queue to the free queue */ // XXX optimize this cycle for (j = 0; lp->oq->n && j < BUF_REVOKE; j++) { struct netmap_slot tmp = oq_deq(lp->oq); oq_enq(freeq, &tmp); } ND(1, "revoked %d buffers from %s", j, lq->name); lp->ctr.drop += j; dropped += j; } return oq_deq(freeq).buf_idx; }
int main(int argc, char **argv) { int ch; uint32_t i; int rv; unsigned int iter = 0; glob_arg.ifname[0] = '\0'; glob_arg.output_rings = 0; glob_arg.batch = DEF_BATCH; glob_arg.syslog_interval = DEF_SYSLOG_INT; while ( (ch = getopt(argc, argv, "i:p:b:B:s:")) != -1) { switch (ch) { case 'i': D("interface is %s", optarg); if (strlen(optarg) > MAX_IFNAMELEN - 8) { D("ifname too long %s", optarg); return 1; } if (strncmp(optarg, "netmap:", 7) && strncmp(optarg, "vale", 4)) { sprintf(glob_arg.ifname, "netmap:%s", optarg); } else { strcpy(glob_arg.ifname, optarg); } break; case 'p': if (parse_pipes(optarg)) { usage(); return 1; } break; case 'B': glob_arg.extra_bufs = atoi(optarg); D("requested %d extra buffers", glob_arg.extra_bufs); break; case 'b': glob_arg.batch = atoi(optarg); D("batch is %d", glob_arg.batch); break; case 's': glob_arg.syslog_interval = atoi(optarg); D("syslog interval is %d", glob_arg.syslog_interval); break; default: D("bad option %c %s", ch, optarg); usage(); return 1; } } if (glob_arg.ifname[0] == '\0') { D("missing interface name"); usage(); return 1; } /* extract the base name */ char *nscan = strncmp(glob_arg.ifname, "netmap:", 7) ? glob_arg.ifname : glob_arg.ifname + 7; strncpy(glob_arg.base_name, nscan, MAX_IFNAMELEN); for (nscan = glob_arg.base_name; *nscan && !index("-*^{}/@", *nscan); nscan++) ; *nscan = '\0'; if (glob_arg.num_groups == 0) parse_pipes(""); setlogmask(LOG_UPTO(LOG_INFO)); openlog("lb", LOG_CONS | LOG_PID | LOG_NDELAY, LOG_LOCAL1); uint32_t npipes = glob_arg.output_rings; pthread_t stat_thread; ports = calloc(npipes + 1, sizeof(struct port_des)); if (!ports) { D("failed to allocate the stats array"); return 1; } struct port_des *rxport = &ports[npipes]; init_groups(); if (pthread_create(&stat_thread, NULL, print_stats, NULL) == -1) { D("unable to create the stats thread: %s", strerror(errno)); return 1; } /* we need base_req to specify pipes and extra bufs */ struct nmreq base_req; memset(&base_req, 0, sizeof(base_req)); base_req.nr_arg1 = npipes; base_req.nr_arg3 = glob_arg.extra_bufs; rxport->nmd = nm_open(glob_arg.ifname, &base_req, 0, NULL); if (rxport->nmd == NULL) { D("cannot open %s", glob_arg.ifname); return (1); } else { D("successfully opened %s (tx rings: %u)", glob_arg.ifname, rxport->nmd->req.nr_tx_slots); } uint32_t extra_bufs = rxport->nmd->req.nr_arg3; struct overflow_queue *oq = NULL; /* reference ring to access the buffers */ rxport->ring = NETMAP_RXRING(rxport->nmd->nifp, 0); if (!glob_arg.extra_bufs) goto run; D("obtained %d extra buffers", extra_bufs); if (!extra_bufs) goto run; /* one overflow queue for each output pipe, plus one for the * free extra buffers */ oq = calloc(npipes + 1, sizeof(struct overflow_queue)); if (!oq) { D("failed to allocated overflow queues descriptors"); goto run; } freeq = &oq[npipes]; rxport->oq = freeq; freeq->slots = calloc(extra_bufs, sizeof(struct netmap_slot)); if (!freeq->slots) { D("failed to allocate the free list"); } freeq->size = extra_bufs; snprintf(freeq->name, MAX_IFNAMELEN, "free queue"); /* * the list of buffers uses the first uint32_t in each buffer * as the index of the next buffer. */ uint32_t scan; for (scan = rxport->nmd->nifp->ni_bufs_head; scan; scan = *(uint32_t *)NETMAP_BUF(rxport->ring, scan)) { struct netmap_slot s; s.buf_idx = scan; ND("freeq <- %d", s.buf_idx); oq_enq(freeq, &s); } if (freeq->n != extra_bufs) { D("something went wrong: netmap reported %d extra_bufs, but the free list contained %d", extra_bufs, freeq->n); return 1; } rxport->nmd->nifp->ni_bufs_head = 0; run: /* we need to create the persistent vale ports */ if (create_custom_ports(rxport->nmd->req.nr_arg2)) { free_buffers(); return 1; } atexit(delete_custom_ports); atexit(free_buffers); int j, t = 0; for (j = 0; j < glob_arg.num_groups; j++) { struct group_des *g = &groups[j]; int k; for (k = 0; k < g->nports; ++k) { struct port_des *p = &g->ports[k]; char interface[25]; sprintf(interface, "netmap:%s{%d/xT", g->pipename, g->first_id + k); D("opening pipe named %s", interface); p->nmd = nm_open(interface, NULL, 0, rxport->nmd); if (p->nmd == NULL) { D("cannot open %s", interface); return (1); } else { D("successfully opened pipe #%d %s (tx slots: %d)", k + 1, interface, p->nmd->req.nr_tx_slots); p->ring = NETMAP_TXRING(p->nmd->nifp, 0); } D("zerocopy %s", (rxport->nmd->mem == p->nmd->mem) ? "enabled" : "disabled"); if (extra_bufs) { struct overflow_queue *q = &oq[t + k]; q->slots = calloc(extra_bufs, sizeof(struct netmap_slot)); if (!q->slots) { D("failed to allocate overflow queue for pipe %d", k); /* make all overflow queue management fail */ extra_bufs = 0; } q->size = extra_bufs; snprintf(q->name, MAX_IFNAMELEN, "oq %s{%d", g->pipename, k); p->oq = q; } } t += g->nports; } if (glob_arg.extra_bufs && !extra_bufs) { if (oq) { for (i = 0; i < npipes + 1; i++) { free(oq[i].slots); oq[i].slots = NULL; } free(oq); oq = NULL; } D("*** overflow queues disabled ***"); } sleep(2); struct pollfd pollfd[npipes + 1]; memset(&pollfd, 0, sizeof(pollfd)); signal(SIGINT, sigint_h); while (!do_abort) { u_int polli = 0; iter++; for (i = 0; i < npipes; ++i) { struct netmap_ring *ring = ports[i].ring; if (nm_ring_next(ring, ring->tail) == ring->cur) { /* no need to poll, there are no packets pending */ continue; } pollfd[polli].fd = ports[i].nmd->fd; pollfd[polli].events = POLLOUT; pollfd[polli].revents = 0; ++polli; } pollfd[polli].fd = rxport->nmd->fd; pollfd[polli].events = POLLIN; pollfd[polli].revents = 0; ++polli; //RD(5, "polling %d file descriptors", polli+1); rv = poll(pollfd, polli, 10); if (rv <= 0) { if (rv < 0 && errno != EAGAIN && errno != EINTR) RD(1, "poll error %s", strerror(errno)); continue; } if (oq) { /* try to push packets from the overflow queues * to the corresponding pipes */ for (i = 0; i < npipes; i++) { struct port_des *p = &ports[i]; struct overflow_queue *q = p->oq; struct group_des *g = p->group; uint32_t j, lim; struct netmap_ring *ring; struct netmap_slot *slot; if (oq_empty(q)) continue; ring = p->ring; lim = nm_ring_space(ring); if (!lim) continue; if (q->n < lim) lim = q->n; for (j = 0; j < lim; j++) { struct netmap_slot s = oq_deq(q), tmp; tmp.ptr = 0; slot = &ring->slot[ring->cur]; if (slot->ptr && !g->last) { tmp.buf_idx = forward_packet(g + 1, slot); /* the forwarding may have removed packets * from the current queue */ if (q->n < lim) lim = q->n; } else { tmp.buf_idx = slot->buf_idx; } oq_enq(freeq, &tmp); *slot = s; slot->flags |= NS_BUF_CHANGED; ring->cur = nm_ring_next(ring, ring->cur); } ring->head = ring->cur; forwarded += lim; p->ctr.pkts += lim; } } int batch = 0; for (i = rxport->nmd->first_rx_ring; i <= rxport->nmd->last_rx_ring; i++) { struct netmap_ring *rxring = NETMAP_RXRING(rxport->nmd->nifp, i); //D("prepare to scan rings"); int next_cur = rxring->cur; struct netmap_slot *next_slot = &rxring->slot[next_cur]; const char *next_buf = NETMAP_BUF(rxring, next_slot->buf_idx); while (!nm_ring_empty(rxring)) { struct netmap_slot *rs = next_slot; struct group_des *g = &groups[0]; // CHOOSE THE CORRECT OUTPUT PIPE uint32_t hash = pkt_hdr_hash((const unsigned char *)next_buf, 4, 'B'); if (hash == 0) { non_ip++; // XXX ?? } rs->ptr = hash | (1UL << 32); // prefetch the buffer for the next round next_cur = nm_ring_next(rxring, next_cur); next_slot = &rxring->slot[next_cur]; next_buf = NETMAP_BUF(rxring, next_slot->buf_idx); __builtin_prefetch(next_buf); // 'B' is just a hashing seed rs->buf_idx = forward_packet(g, rs); rs->flags |= NS_BUF_CHANGED; rxring->head = rxring->cur = next_cur; batch++; if (unlikely(batch >= glob_arg.batch)) { ioctl(rxport->nmd->fd, NIOCRXSYNC, NULL); batch = 0; } ND(1, "Forwarded Packets: %"PRIu64" Dropped packets: %"PRIu64" Percent: %.2f", forwarded, dropped, ((float)dropped / (float)forwarded * 100)); } } } pthread_join(stat_thread, NULL); printf("%"PRIu64" packets forwarded. %"PRIu64" packets dropped. Total %"PRIu64"\n", forwarded, dropped, forwarded + dropped); return 0; }
static ssize_t netmap_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) { NetmapState *s = DO_UPCAST(NetmapState, nc, nc); struct netmap_ring *ring = s->tx; uint32_t last; uint32_t idx; uint8_t *dst; int j; uint32_t i; if (unlikely(!ring)) { /* Drop the packet. */ return iov_size(iov, iovcnt); } last = i = ring->cur; if (nm_ring_space(ring) < iovcnt) { /* Not enough netmap slots. */ netmap_write_poll(s, true); return 0; } for (j = 0; j < iovcnt; j++) { int iov_frag_size = iov[j].iov_len; int offset = 0; int nm_frag_size; /* Split each iovec fragment over more netmap slots, if necessary. */ while (iov_frag_size) { nm_frag_size = MIN(iov_frag_size, ring->nr_buf_size); if (unlikely(nm_ring_empty(ring))) { /* We run out of netmap slots while splitting the iovec fragments. */ netmap_write_poll(s, true); return 0; } idx = ring->slot[i].buf_idx; dst = (uint8_t *)NETMAP_BUF(ring, idx); ring->slot[i].len = nm_frag_size; ring->slot[i].flags = NS_MOREFRAG; pkt_copy(iov[j].iov_base + offset, dst, nm_frag_size); last = i; i = nm_ring_next(ring, i); offset += nm_frag_size; iov_frag_size -= nm_frag_size; } } /* The last slot must not have NS_MOREFRAG set. */ ring->slot[last].flags &= ~NS_MOREFRAG; /* Now update ring->cur and ring->head. */ ring->cur = ring->head = i; ioctl(s->nmd->fd, NIOCTXSYNC, NULL); return iov_size(iov, iovcnt); }
int32 usnet_send_frame(usn_mbuf_t *m) { struct pollfd fds; struct netmap_if *nifp; int32 ret, error; u_int size; u_char *buf; int attemps = 0; int i, j; // TODO: put a check here fds.fd = g_nmd->fd; nifp = g_nmd->nifp; if ( m == 0 ) return -USN_ENULLPTR; buf = m->head; size = m->mlen; resend: if ( attemps==3 ) return -USN_EBUSY; if(g_config.npkts >= g_config.burst ){ fds.events = POLLOUT; fds.revents = 0; g_config.npkts = 0; ret = poll(&fds, 1, 2000); if (ret <= 0 ) { // XXX: save pending packets? // as it is easy to reach line rate. goto fail; } if (fds.revents & POLLERR) { struct netmap_ring *tx = NETMAP_RXRING(nifp, g_nmd->cur_tx_ring); (void)tx; DEBUG("error on em1, rx [%d,%d,%d]", tx->head, tx->cur, tx->tail); error = -USN_EFDPOLL; goto fail; } if (fds.revents & POLLOUT) { goto send; } goto flush; } send: for (j = g_nmd->first_tx_ring; j <= g_nmd->last_tx_ring; j++) { struct netmap_ring *ring; uint32_t i, idx; ring = NETMAP_TXRING(nifp, j); if (nm_ring_empty(ring)) { continue; } i = ring->cur; idx = ring->slot[i].buf_idx; ring->slot[i].flags = 0; ring->slot[i].len = size; nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size); ring->slot[i].len = size; g_nmd->cur_tx_ring = j; ring->head = ring->cur = nm_ring_next(ring, i); g_config.npkts++; return size; } flush: /* flush any remaining packets */ //printf("flush \n"); ioctl(fds.fd, NIOCTXSYNC, NULL); /* final part: wait all the TX queues to be empty. */ for (i = g_nmd->first_tx_ring; i <= g_nmd->last_tx_ring; i++) { struct netmap_ring *txring = NETMAP_TXRING(nifp, i); while (nm_tx_pending(txring)) { ioctl(fds.fd, NIOCTXSYNC, NULL); usleep(1); /* wait 1 tick */ } } attemps++; goto resend; fail: printf("send_packet: failed to send\n"); return error; }
/* * create and enqueue a batch of packets on a ring. * On the last one set NS_REPORT to tell the driver to generate * an interrupt when done. */ static int send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame, int size, struct glob_arg *g, u_int count, int options, u_int nfrags) { u_int n, sent, cur = ring->cur; u_int fcnt; n = nm_ring_space(ring); if (n < count) count = n; if (count < nfrags) { D("truncating packet, no room for frags %d %d", count, nfrags); } #if 0 if (options & (OPT_COPY | OPT_PREFETCH) ) { for (sent = 0; sent < count; sent++) { struct netmap_slot *slot = &ring->slot[cur]; char *p = NETMAP_BUF(ring, slot->buf_idx); __builtin_prefetch(p); cur = nm_ring_next(ring, cur); } cur = ring->cur; } #endif for (fcnt = nfrags, sent = 0; sent < count; sent++) { struct netmap_slot *slot = &ring->slot[cur]; char *p = NETMAP_BUF(ring, slot->buf_idx); slot->flags = 0; if (options & OPT_INDIRECT) { slot->flags |= NS_INDIRECT; slot->ptr = (uint64_t)frame; } else if (options & OPT_COPY) { nm_pkt_copy(frame, p, size); if (fcnt == nfrags) update_addresses(pkt, g); } else if (options & OPT_MEMCPY) { memcpy(p, frame, size); if (fcnt == nfrags) update_addresses(pkt, g); } else if (options & OPT_PREFETCH) { __builtin_prefetch(p); } if (options & OPT_DUMP) dump_payload(p, size, ring, cur); slot->len = size; if (--fcnt > 0) slot->flags |= NS_MOREFRAG; else fcnt = nfrags; if (sent == count - 1) { slot->flags &= ~NS_MOREFRAG; slot->flags |= NS_REPORT; } cur = nm_ring_next(ring, cur); } ring->head = ring->cur = cur; return (sent); }
static void * pinger_body(void *data) { struct targ *targ = (struct targ *) data; struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; struct netmap_if *nifp = targ->nmd->nifp; int i, rx = 0, n = targ->g->npackets; void *frame; int size; uint32_t sent = 0; struct timespec ts, now, last_print; uint32_t count = 0, min = 1000000000, av = 0; frame = &targ->pkt; frame += sizeof(targ->pkt.vh) - targ->g->virt_header; size = targ->g->pkt_size + targ->g->virt_header; if (targ->g->nthreads > 1) { D("can only ping with 1 thread"); return NULL; } clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); now = last_print; while (n == 0 || (int)sent < n) { struct netmap_ring *ring = NETMAP_TXRING(nifp, 0); struct netmap_slot *slot; char *p; for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */ slot = &ring->slot[ring->cur]; slot->len = size; p = NETMAP_BUF(ring, slot->buf_idx); if (nm_ring_empty(ring)) { D("-- ouch, cannot send"); } else { struct tstamp *tp; nm_pkt_copy(frame, p, size); clock_gettime(CLOCK_REALTIME_PRECISE, &ts); bcopy(&sent, p+42, sizeof(sent)); tp = (struct tstamp *)(p+46); tp->sec = (uint32_t)ts.tv_sec; tp->nsec = (uint32_t)ts.tv_nsec; sent++; ring->head = ring->cur = nm_ring_next(ring, ring->cur); } } /* should use a parameter to decide how often to send */ if (poll(&pfd, 1, 3000) <= 0) { D("poll error/timeout on queue %d: %s", targ->me, strerror(errno)); continue; } /* see what we got back */ for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { ring = NETMAP_RXRING(nifp, i); while (!nm_ring_empty(ring)) { uint32_t seq; struct tstamp *tp; slot = &ring->slot[ring->cur]; p = NETMAP_BUF(ring, slot->buf_idx); clock_gettime(CLOCK_REALTIME_PRECISE, &now); bcopy(p+42, &seq, sizeof(seq)); tp = (struct tstamp *)(p+46); ts.tv_sec = (time_t)tp->sec; ts.tv_nsec = (long)tp->nsec; ts.tv_sec = now.tv_sec - ts.tv_sec; ts.tv_nsec = now.tv_nsec - ts.tv_nsec; if (ts.tv_nsec < 0) { ts.tv_nsec += 1000000000; ts.tv_sec--; } if (1) D("seq %d/%d delta %d.%09d", seq, sent, (int)ts.tv_sec, (int)ts.tv_nsec); if (ts.tv_nsec < (int)min) min = ts.tv_nsec; count ++; av += ts.tv_nsec; ring->head = ring->cur = nm_ring_next(ring, ring->cur); rx++; } } //D("tx %d rx %d", sent, rx); //usleep(100000); ts.tv_sec = now.tv_sec - last_print.tv_sec; ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; if (ts.tv_nsec < 0) { ts.tv_nsec += 1000000000; ts.tv_sec--; } if (ts.tv_sec >= 1) { D("count %d min %d av %d", count, min, av/count); count = 0; av = 0; min = 100000000; last_print = now; } } return NULL; } /* * reply to ping requests */ static void * ponger_body(void *data) { struct targ *targ = (struct targ *) data; struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; struct netmap_if *nifp = targ->nmd->nifp; struct netmap_ring *txring, *rxring; int i, rx = 0, sent = 0, n = targ->g->npackets; if (targ->g->nthreads > 1) { D("can only reply ping with 1 thread"); return NULL; } D("understood ponger %d but don't know how to do it", n); while (n == 0 || sent < n) { uint32_t txcur, txavail; //#define BUSYWAIT #ifdef BUSYWAIT ioctl(pfd.fd, NIOCRXSYNC, NULL); #else if (poll(&pfd, 1, 1000) <= 0) { D("poll error/timeout on queue %d: %s", targ->me, strerror(errno)); continue; } #endif txring = NETMAP_TXRING(nifp, 0); txcur = txring->cur; txavail = nm_ring_space(txring); /* see what we got back */ for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { rxring = NETMAP_RXRING(nifp, i); while (!nm_ring_empty(rxring)) { uint16_t *spkt, *dpkt; uint32_t cur = rxring->cur; struct netmap_slot *slot = &rxring->slot[cur]; char *src, *dst; src = NETMAP_BUF(rxring, slot->buf_idx); //D("got pkt %p of size %d", src, slot->len); rxring->head = rxring->cur = nm_ring_next(rxring, cur); rx++; if (txavail == 0) continue; dst = NETMAP_BUF(txring, txring->slot[txcur].buf_idx); /* copy... */ dpkt = (uint16_t *)dst; spkt = (uint16_t *)src; nm_pkt_copy(src, dst, slot->len); dpkt[0] = spkt[3]; dpkt[1] = spkt[4]; dpkt[2] = spkt[5]; dpkt[3] = spkt[0]; dpkt[4] = spkt[1]; dpkt[5] = spkt[2]; txring->slot[txcur].len = slot->len; /* XXX swap src dst mac */ txcur = nm_ring_next(txring, txcur); txavail--; sent++; } } txring->head = txring->cur = txcur; targ->count = sent; #ifdef BUSYWAIT ioctl(pfd.fd, NIOCTXSYNC, NULL); #endif //D("tx %d rx %d", sent, rx); } return NULL; } static __inline int timespec_ge(const struct timespec *a, const struct timespec *b) { if (a->tv_sec > b->tv_sec) return (1); if (a->tv_sec < b->tv_sec) return (0); if (a->tv_nsec >= b->tv_nsec) return (1); return (0); } static __inline struct timespec timeval2spec(const struct timeval *a) { struct timespec ts = { .tv_sec = a->tv_sec, .tv_nsec = a->tv_usec * 1000 }; return ts; } static __inline struct timeval timespec2val(const struct timespec *a) { struct timeval tv = { .tv_sec = a->tv_sec, .tv_usec = a->tv_nsec / 1000 }; return tv; } static __inline struct timespec timespec_add(struct timespec a, struct timespec b) { struct timespec ret = { a.tv_sec + b.tv_sec, a.tv_nsec + b.tv_nsec }; if (ret.tv_nsec >= 1000000000) { ret.tv_sec++; ret.tv_nsec -= 1000000000; } return ret; } static __inline struct timespec timespec_sub(struct timespec a, struct timespec b) { struct timespec ret = { a.tv_sec - b.tv_sec, a.tv_nsec - b.tv_nsec }; if (ret.tv_nsec < 0) { ret.tv_sec--; ret.tv_nsec += 1000000000; } return ret; } /* * wait until ts, either busy or sleeping if more than 1ms. * Return wakeup time. */ static struct timespec wait_time(struct timespec ts) { for (;;) { struct timespec w, cur; clock_gettime(CLOCK_REALTIME_PRECISE, &cur); w = timespec_sub(ts, cur); if (w.tv_sec < 0) return cur; else if (w.tv_sec > 0 || w.tv_nsec > 1000000) poll(NULL, 0, 1); } } static void * sender_body(void *data) { struct targ *targ = (struct targ *) data; struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; struct netmap_if *nifp; struct netmap_ring *txring; int i, n = targ->g->npackets / targ->g->nthreads; int64_t sent = 0; int options = targ->g->options | OPT_COPY; struct timespec nexttime = { 0, 0}; // XXX silence compiler int rate_limit = targ->g->tx_rate; struct pkt *pkt = &targ->pkt; void *frame; int size; if (targ->frame == NULL) { frame = pkt; frame += sizeof(pkt->vh) - targ->g->virt_header; size = targ->g->pkt_size + targ->g->virt_header; } else { frame = targ->frame; size = targ->g->pkt_size; } D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd); if (setaffinity(targ->thread, targ->affinity)) goto quit; /* main loop.*/ clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); if (rate_limit) { targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); targ->tic.tv_nsec = 0; wait_time(targ->tic); nexttime = targ->tic; } if (targ->g->dev_type == DEV_TAP) { D("writing to file desc %d", targ->g->main_fd); for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { if (write(targ->g->main_fd, frame, size) != -1) sent++; update_addresses(pkt, targ->g); if (i > 10000) { targ->count = sent; i = 0; } } #ifndef NO_PCAP } else if (targ->g->dev_type == DEV_PCAP) { pcap_t *p = targ->g->p; for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { if (pcap_inject(p, frame, size) != -1) sent++; update_addresses(pkt, targ->g); if (i > 10000) { targ->count = sent; i = 0; } } #endif /* NO_PCAP */ } else { int tosend = 0; int frags = targ->g->frags; nifp = targ->nmd->nifp; while (!targ->cancel && (n == 0 || sent < n)) { if (rate_limit && tosend <= 0) { tosend = targ->g->burst; nexttime = timespec_add(nexttime, targ->g->tx_period); wait_time(nexttime); } /* * wait for available room in the send queue(s) */ if (poll(&pfd, 1, 2000) <= 0) { if (targ->cancel) break; D("poll error/timeout on queue %d: %s", targ->me, strerror(errno)); // goto quit; } if (pfd.revents & POLLERR) { D("poll error"); goto quit; } /* * scan our queues and send on those with room */ if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) { D("drop copy"); options &= ~OPT_COPY; } for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { int m, limit = rate_limit ? tosend : targ->g->burst; if (n > 0 && n - sent < limit) limit = n - sent; txring = NETMAP_TXRING(nifp, i); if (nm_ring_empty(txring)) continue; if (frags > 1) limit = ((limit + frags - 1) / frags) * frags; m = send_packets(txring, pkt, frame, size, targ->g, limit, options, frags); ND("limit %d tail %d frags %d m %d", limit, txring->tail, frags, m); sent += m; targ->count = sent; if (rate_limit) { tosend -= m; if (tosend <= 0) break; } } } /* flush any remaining packets */ D("flush tail %d head %d on thread %p", txring->tail, txring->head, pthread_self()); ioctl(pfd.fd, NIOCTXSYNC, NULL); /* final part: wait all the TX queues to be empty. */ for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { txring = NETMAP_TXRING(nifp, i); while (nm_tx_pending(txring)) { RD(5, "pending tx tail %d head %d on ring %d", txring->tail, txring->head, i); ioctl(pfd.fd, NIOCTXSYNC, NULL); usleep(1); /* wait 1 tick */ } } } /* end DEV_NETMAP */ clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); targ->completed = 1; targ->count = sent; quit: /* reset the ``used`` flag. */ targ->used = 0; return (NULL); } #ifndef NO_PCAP static void receive_pcap(u_char *user, const struct pcap_pkthdr * h, const u_char * bytes) { int *count = (int *)user; (void)h; /* UNUSED */ (void)bytes; /* UNUSED */ (*count)++; } #endif /* !NO_PCAP */ static int receive_packets(struct netmap_ring *ring, u_int limit, int dump) { u_int cur, rx, n; cur = ring->cur; n = nm_ring_space(ring); if (n < limit) limit = n; for (rx = 0; rx < limit; rx++) { struct netmap_slot *slot = &ring->slot[cur]; char *p = NETMAP_BUF(ring, slot->buf_idx); if (dump) dump_payload(p, slot->len, ring, cur); cur = nm_ring_next(ring, cur); } ring->head = ring->cur = cur; return (rx); } static void * receiver_body(void *data) { struct targ *targ = (struct targ *) data; struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; struct netmap_if *nifp; struct netmap_ring *rxring; int i; uint64_t received = 0; if (setaffinity(targ->thread, targ->affinity)) goto quit; D("reading from %s fd %d main_fd %d", targ->g->ifname, targ->fd, targ->g->main_fd); /* unbounded wait for the first packet. */ for (;!targ->cancel;) { i = poll(&pfd, 1, 1000); if (i > 0 && !(pfd.revents & POLLERR)) break; RD(1, "waiting for initial packets, poll returns %d %d", i, pfd.revents); } /* main loop, exit after 1s silence */ clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); if (targ->g->dev_type == DEV_TAP) { while (!targ->cancel) { char buf[MAX_BODYSIZE]; /* XXX should we poll ? */ if (read(targ->g->main_fd, buf, sizeof(buf)) > 0) targ->count++; } #ifndef NO_PCAP } else if (targ->g->dev_type == DEV_PCAP) { while (!targ->cancel) { /* XXX should we poll ? */ pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, (u_char *)&targ->count); } #endif /* !NO_PCAP */ } else { int dump = targ->g->options & OPT_DUMP; nifp = targ->nmd->nifp; while (!targ->cancel) { /* Once we started to receive packets, wait at most 1 seconds before quitting. */ if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); targ->toc.tv_sec -= 1; /* Subtract timeout time. */ goto out; } if (pfd.revents & POLLERR) { D("poll err"); goto quit; } for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { int m; rxring = NETMAP_RXRING(nifp, i); if (nm_ring_empty(rxring)) continue; m = receive_packets(rxring, targ->g->burst, dump); received += m; } targ->count = received; } } clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); out: targ->completed = 1; targ->count = received; quit: /* reset the ``used`` flag. */ targ->used = 0; return (NULL); } /* very crude code to print a number in normalized form. * Caller has to make sure that the buffer is large enough. */ static const char * norm(char *buf, double val) { char *units[] = { "", "K", "M", "G", "T" }; u_int i; for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++) val /= 1000; sprintf(buf, "%.2f %s", val, units[i]); return buf; } static void tx_output(uint64_t sent, int size, double delta) { double bw, raw_bw, pps; char b1[40], b2[80], b3[80]; printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n", (unsigned long long)sent, size, delta); if (delta == 0) delta = 1e-6; if (size < 60) /* correct for min packet size */ size = 60; pps = sent / delta; bw = (8.0 * size * sent) / delta; /* raw packets have4 bytes crc + 20 bytes framing */ raw_bw = (8.0 * (size + 24) * sent) / delta; printf("Speed: %spps Bandwidth: %sbps (raw %sbps)\n", norm(b1, pps), norm(b2, bw), norm(b3, raw_bw) ); } static void rx_output(uint64_t received, double delta) { double pps; char b1[40]; printf("Received %llu packets, in %.2f seconds.\n", (unsigned long long) received, delta); if (delta == 0) delta = 1e-6; pps = received / delta; printf("Speed: %spps\n", norm(b1, pps)); }
/* * reply to ping requests */ static void * ponger_body(void *data) { struct targ *targ = (struct targ *) data; struct pollfd fds[1]; struct netmap_if *nifp = targ->nifp; struct netmap_ring *txring, *rxring; int i, rx = 0, sent = 0, n = targ->g->npackets; fds[0].fd = targ->fd; fds[0].events = (POLLIN); if (targ->g->nthreads > 1) { D("can only reply ping with 1 thread"); return NULL; } D("understood ponger %d but don't know how to do it", n); while (n == 0 || sent < n) { uint32_t txcur, txavail; //#define BUSYWAIT #ifdef BUSYWAIT ioctl(fds[0].fd, NIOCRXSYNC, NULL); #else if (poll(fds, 1, 1000) <= 0) { D("poll error/timeout on queue %d: %s", targ->me, strerror(errno)); continue; } #endif txring = NETMAP_TXRING(nifp, 0); txcur = txring->cur; txavail = nm_ring_space(txring); /* see what we got back */ for (i = targ->qfirst; i < targ->qlast; i++) { rxring = NETMAP_RXRING(nifp, i); while (!nm_ring_empty(rxring)) { uint16_t *spkt, *dpkt; uint32_t cur = rxring->cur; struct netmap_slot *slot = &rxring->slot[cur]; char *src, *dst; src = NETMAP_BUF(rxring, slot->buf_idx); //D("got pkt %p of size %d", src, slot->len); rxring->head = rxring->cur = nm_ring_next(rxring, cur); rx++; if (txavail == 0) continue; dst = NETMAP_BUF(txring, txring->slot[txcur].buf_idx); /* copy... */ dpkt = (uint16_t *)dst; spkt = (uint16_t *)src; pkt_copy(src, dst, slot->len); dpkt[0] = spkt[3]; dpkt[1] = spkt[4]; dpkt[2] = spkt[5]; dpkt[3] = spkt[0]; dpkt[4] = spkt[1]; dpkt[5] = spkt[2]; txring->slot[txcur].len = slot->len; /* XXX swap src dst mac */ txcur = nm_ring_next(txring, txcur); txavail--; sent++; } } txring->head = txring->cur = txcur; targ->count = sent; #ifdef BUSYWAIT ioctl(fds[0].fd, NIOCTXSYNC, NULL); #endif //D("tx %d rx %d", sent, rx); } return NULL; }
static void * pinger_body(void *data) { struct targ *targ = (struct targ *) data; struct pollfd fds[1]; struct netmap_if *nifp = targ->nifp; int i, rx = 0, n = targ->g->npackets; void *frame; int size; frame = &targ->pkt; frame += sizeof(targ->pkt.vh) - targ->g->virt_header; size = targ->g->pkt_size + targ->g->virt_header; fds[0].fd = targ->fd; fds[0].events = (POLLIN); static uint32_t sent; struct timespec ts, now, last_print; uint32_t count = 0, min = 1000000000, av = 0; if (targ->g->nthreads > 1) { D("can only ping with 1 thread"); return NULL; } clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); now = last_print; while (n == 0 || (int)sent < n) { struct netmap_ring *ring = NETMAP_TXRING(nifp, 0); struct netmap_slot *slot; char *p; for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */ slot = &ring->slot[ring->cur]; slot->len = size; p = NETMAP_BUF(ring, slot->buf_idx); if (nm_ring_empty(ring)) { D("-- ouch, cannot send"); } else { pkt_copy(frame, p, size); clock_gettime(CLOCK_REALTIME_PRECISE, &ts); bcopy(&sent, p+42, sizeof(sent)); bcopy(&ts, p+46, sizeof(ts)); sent++; ring->head = ring->cur = nm_ring_next(ring, ring->cur); } } /* should use a parameter to decide how often to send */ if (poll(fds, 1, 3000) <= 0) { D("poll error/timeout on queue %d: %s", targ->me, strerror(errno)); continue; } /* see what we got back */ for (i = targ->qfirst; i < targ->qlast; i++) { ring = NETMAP_RXRING(nifp, i); while (!nm_ring_empty(ring)) { uint32_t seq; slot = &ring->slot[ring->cur]; p = NETMAP_BUF(ring, slot->buf_idx); clock_gettime(CLOCK_REALTIME_PRECISE, &now); bcopy(p+42, &seq, sizeof(seq)); bcopy(p+46, &ts, sizeof(ts)); ts.tv_sec = now.tv_sec - ts.tv_sec; ts.tv_nsec = now.tv_nsec - ts.tv_nsec; if (ts.tv_nsec < 0) { ts.tv_nsec += 1000000000; ts.tv_sec--; } if (1) D("seq %d/%d delta %d.%09d", seq, sent, (int)ts.tv_sec, (int)ts.tv_nsec); if (ts.tv_nsec < (int)min) min = ts.tv_nsec; count ++; av += ts.tv_nsec; ring->head = ring->cur = nm_ring_next(ring, ring->cur); rx++; } } //D("tx %d rx %d", sent, rx); //usleep(100000); ts.tv_sec = now.tv_sec - last_print.tv_sec; ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; if (ts.tv_nsec < 0) { ts.tv_nsec += 1000000000; ts.tv_sec--; } if (ts.tv_sec >= 1) { D("count %d min %d av %d", count, min, av/count); count = 0; av = 0; min = 100000000; last_print = now; } } return NULL; }
/* * Read packets from a port, invoke the firewall and possibly * pass them to the peer. * The firewall receives a fake mbuf on the stack that refers * to the netmap slot. In this case the mbuf has two extra fields, * indicating the original buffer and length (buf = NULL if no need * to copy). * We also need to pass the pointer to a peer, though we can use ifp for that. * If the result is accept, no need to copy * and we can just pass the slot to the destination interface. * Otherwise, we need to do an explicit copy. */ int netmap_read(struct sess *sess, void *arg) { struct my_netmap_port *port = arg; u_int si, hdrlen; struct mbuf dm, dm0; struct ip_fw_args args; struct my_netmap_port *peer = port->peer; struct txq_entry *x = peer->q; struct nm_desc *srcp = port->d; bzero(&dm0, sizeof(dm0)); bzero(&args, sizeof(args)); /* scan all rings */ for (si = srcp->first_rx_ring; si <= srcp->last_rx_ring; si++) { struct netmap_ring *ring = NETMAP_RXRING(srcp->nifp, si); __builtin_prefetch(ring); if (nm_ring_empty(ring)) continue; __builtin_prefetch(&ring->slot[ring->cur]); while (!nm_ring_empty(ring)) { u_int src, idx, len; struct netmap_slot *slot; void *buf; /* make sure we have room before looking at the input */ if (peer->cur_txq >= MY_TXQ_LEN) { netmap_fwd(peer); continue; } src = ring->cur; slot = &ring->slot[src]; __builtin_prefetch (slot+1); idx = slot->buf_idx; buf = (u_char *)NETMAP_BUF(ring, idx); if (idx < 2) { D("%s bogus RX index at offset %d", srcp->nifp->ni_name, src); sleep(2); } __builtin_prefetch(buf); ring->head = ring->cur = nm_ring_next(ring, src); /* prepare to invoke the firewall */ dm = dm0; // XXX clear all including tags args.m = &dm; len = slot->len; dm.m_flags = M_STACK; // remember original buf and peer dm.__m_extbuf = buf; dm.__m_extlen = len; dm.__m_peer = peer; /* the routine to call in netisr_dispatch */ dm.__m_callback = netmap_enqueue; /* XXX can we use check_frame ? */ dm.m_pkthdr.rcvif = &port->ifp; hdrlen = ((uint16_t *)buf)[6] == htons(0x8100) ? 18 : 14; ND(1, "hdrlen %d", hdrlen); dm.m_data = buf + hdrlen; // skip mac + vlan hdr if any dm.m_len = dm.m_pkthdr.len = len - hdrlen; ND("slot %d len %d", i, dm.m_len); // XXX ipfw_chk is slightly faster //ret = ipfw_chk(&args); ipfw_check_packet(NULL, &args.m, NULL, PFIL_IN, NULL); if (args.m != NULL) { // ok. forward /* * XXX TODO remember to clean up any tags that * ipfw may have allocated */ u_int dst = peer->cur_txq; x[dst].ring_or_mbuf = ring; x[dst].slot_idx = src; x[dst].flags = TXQ_IS_SLOT; peer->cur_txq++; } ND("exit at slot %d", next_i); } } /* process packets sent to the opposite queue */ if (peer->cur_txq > 0) netmap_fwd(peer); if (port->cur_txq > 0) { // WANT_RUN /* any traffic in this direction ? */ netmap_fwd(port); } ND("done"); return 0; }
u_int move (struct vnfapp * va) { u_int burst, m, j, k; struct vnfin * v = va->data; struct netmap_slot * rx_slot, * tx_slot; struct ether_header * eth; struct ip * ip; j = va->rx_ring->cur; k = va->tx_ring->cur; burst = BURST_MAX; m = nm_ring_space (va->rx_ring); if (m < BURST_MAX) burst = m; m = nm_ring_space (va->tx_ring); if (m < burst) burst = m; m = burst; while (burst-- > 0) { /* netmap zero copy switching */ rx_slot = &va->rx_ring->slot[j]; tx_slot = &va->tx_ring->slot[k]; if (tx_slot->buf_idx < 2 || rx_slot->buf_idx < 2) { D("wrong index rx[%d] = %d -> tx[%d] = %d", j, rx_slot->buf_idx, k, tx_slot->buf_idx); sleep(2); } eth = (struct ether_header *) NETMAP_BUF (va->rx_ring, rx_slot->buf_idx); ip = (struct ip *) (eth + 1); /* drop acl check */ if (find_patricia_entry (tree, &ip->ip_dst, 32)) { goto drop; } #ifdef ZEROCPY u_int idx; eth = (struct ether_header *) NETMAP_BUF (va->rx_ring, rx_slot->buf_idx); MACCOPY (OUTDSTMAC(v), eth->ether_dhost); idx = tx_slot->buf_idx; tx_slot->buf_idx = rx_slot->buf_idx; rx_slot->buf_idx = idx; tx_slot->flags |= NS_BUF_CHANGED; rx_slot->flags |= NS_BUF_CHANGED; tx_slot->len = rx_slot->len; #else char * spkt = NETMAP_BUF (va->rx_ring, rx_slot->buf_idx); char * dpkt = NETMAP_BUF (va->tx_ring, tx_slot->buf_idx); nm_pkt_copy (spkt, dpkt, rx_slot->len); /* change destination mac */ eth = (struct ether_header *) dpkt; MACCOPY (OUTDSTMAC(v), eth->ether_dhost); tx_slot->len = rx_slot->len; #endif drop: j = nm_ring_next (va->rx_ring, j); k = nm_ring_next (va->tx_ring, k); } va->rx_ring->head = va->rx_ring->cur = j; va->tx_ring->head = va->tx_ring->cur = k; if (verbose) D ("rx queue %d send %u packets", va->rx_q, m); return m; }
int main(int argc, char **argv) { int ch; uint32_t i; int rv; unsigned int iter = 0; glob_arg.ifname[0] = '\0'; glob_arg.output_rings = DEF_OUT_PIPES; glob_arg.batch = DEF_BATCH; glob_arg.syslog_interval = DEF_SYSLOG_INT; while ( (ch = getopt(argc, argv, "i:p:b:B:s:")) != -1) { switch (ch) { case 'i': D("interface is %s", optarg); if (strlen(optarg) > MAX_IFNAMELEN - 8) { D("ifname too long %s", optarg); return 1; } if (strncmp(optarg, "netmap:", 7) && strncmp(optarg, "vale", 4)) { sprintf(glob_arg.ifname, "netmap:%s", optarg); } else { strcpy(glob_arg.ifname, optarg); } break; case 'p': glob_arg.output_rings = atoi(optarg); if (glob_arg.output_rings < 1) { D("you must output to at least one pipe"); usage(); return 1; } break; case 'B': glob_arg.extra_bufs = atoi(optarg); D("requested %d extra buffers", glob_arg.extra_bufs); break; case 'b': glob_arg.batch = atoi(optarg); D("batch is %d", glob_arg.batch); break; case 's': glob_arg.syslog_interval = atoi(optarg); D("syslog interval is %d", glob_arg.syslog_interval); break; default: D("bad option %c %s", ch, optarg); usage(); return 1; } } if (glob_arg.ifname[0] == '\0') { D("missing interface name"); usage(); return 1; } setlogmask(LOG_UPTO(LOG_INFO)); openlog("lb", LOG_CONS | LOG_PID | LOG_NDELAY, LOG_LOCAL1); uint32_t npipes = glob_arg.output_rings; struct overflow_queue *freeq = NULL; pthread_t stat_thread; ports = calloc(npipes + 1, sizeof(struct port_des)); if (!ports) { D("failed to allocate the stats array"); return 1; } struct port_des *rxport = &ports[npipes]; if (pthread_create(&stat_thread, NULL, print_stats, NULL) == -1) { D("unable to create the stats thread: %s", strerror(errno)); return 1; } /* we need base_req to specify pipes and extra bufs */ struct nmreq base_req; memset(&base_req, 0, sizeof(base_req)); base_req.nr_arg1 = npipes; base_req.nr_arg3 = glob_arg.extra_bufs; rxport->nmd = nm_open(glob_arg.ifname, &base_req, 0, NULL); if (rxport->nmd == NULL) { D("cannot open %s", glob_arg.ifname); return (1); } else { D("successfully opened %s (tx rings: %u)", glob_arg.ifname, rxport->nmd->req.nr_tx_slots); } uint32_t extra_bufs = rxport->nmd->req.nr_arg3; struct overflow_queue *oq = NULL; /* reference ring to access the buffers */ rxport->ring = NETMAP_RXRING(rxport->nmd->nifp, 0); if (!glob_arg.extra_bufs) goto run; D("obtained %d extra buffers", extra_bufs); if (!extra_bufs) goto run; /* one overflow queue for each output pipe, plus one for the * free extra buffers */ oq = calloc(npipes + 1, sizeof(struct overflow_queue)); if (!oq) { D("failed to allocated overflow queues descriptors"); goto run; } freeq = &oq[npipes]; rxport->oq = freeq; freeq->slots = calloc(extra_bufs, sizeof(struct netmap_slot)); if (!freeq->slots) { D("failed to allocate the free list"); } freeq->size = extra_bufs; snprintf(freeq->name, MAX_IFNAMELEN, "free queue"); /* * the list of buffers uses the first uint32_t in each buffer * as the index of the next buffer. */ uint32_t scan; for (scan = rxport->nmd->nifp->ni_bufs_head; scan; scan = *(uint32_t *)NETMAP_BUF(rxport->ring, scan)) { struct netmap_slot s; s.buf_idx = scan; ND("freeq <- %d", s.buf_idx); oq_enq(freeq, &s); } atexit(free_buffers); if (freeq->n != extra_bufs) { D("something went wrong: netmap reported %d extra_bufs, but the free list contained %d", extra_bufs, freeq->n); return 1; } rxport->nmd->nifp->ni_bufs_head = 0; run: for (i = 0; i < npipes; ++i) { char interface[25]; sprintf(interface, "%s{%d", glob_arg.ifname, i); D("opening pipe named %s", interface); //ports[i].nmd = nm_open(interface, NULL, NM_OPEN_NO_MMAP | NM_OPEN_ARG3 | NM_OPEN_RING_CFG, rxport->nmd); ports[i].nmd = nm_open(interface, NULL, 0, rxport->nmd); if (ports[i].nmd == NULL) { D("cannot open %s", interface); return (1); } else { D("successfully opened pipe #%d %s (tx slots: %d)", i + 1, interface, ports[i].nmd->req.nr_tx_slots); ports[i].ring = NETMAP_TXRING(ports[i].nmd->nifp, 0); } D("zerocopy %s", (rxport->nmd->mem == ports[i].nmd->mem) ? "enabled" : "disabled"); if (extra_bufs) { struct overflow_queue *q = &oq[i]; q->slots = calloc(extra_bufs, sizeof(struct netmap_slot)); if (!q->slots) { D("failed to allocate overflow queue for pipe %d", i); /* make all overflow queue management fail */ extra_bufs = 0; } q->size = extra_bufs; snprintf(q->name, MAX_IFNAMELEN, "oq %d", i); ports[i].oq = q; } } if (glob_arg.extra_bufs && !extra_bufs) { if (oq) { for (i = 0; i < npipes + 1; i++) { free(oq[i].slots); oq[i].slots = NULL; } free(oq); oq = NULL; } D("*** overflow queues disabled ***"); } sleep(2); struct pollfd pollfd[npipes + 1]; memset(&pollfd, 0, sizeof(pollfd)); signal(SIGINT, sigint_h); while (!do_abort) { u_int polli = 0; iter++; for (i = 0; i < npipes; ++i) { struct netmap_ring *ring = ports[i].ring; if (nm_ring_next(ring, ring->tail) == ring->cur) { /* no need to poll, there are no packets pending */ continue; } pollfd[polli].fd = ports[i].nmd->fd; pollfd[polli].events = POLLOUT; pollfd[polli].revents = 0; ++polli; } pollfd[polli].fd = rxport->nmd->fd; pollfd[polli].events = POLLIN; pollfd[polli].revents = 0; ++polli; //RD(5, "polling %d file descriptors", polli+1); rv = poll(pollfd, polli, 10); if (rv <= 0) { if (rv < 0 && errno != EAGAIN && errno != EINTR) RD(1, "poll error %s", strerror(errno)); continue; } if (oq) { /* try to push packets from the overflow queues * to the corresponding pipes */ for (i = 0; i < npipes; i++) { struct port_des *p = &ports[i]; struct overflow_queue *q = p->oq; uint32_t j, lim; struct netmap_ring *ring; struct netmap_slot *slot; if (!q->n) continue; ring = p->ring; lim = nm_ring_space(ring); if (!lim) continue; if (q->n < lim) lim = q->n; for (j = 0; j < lim; j++) { struct netmap_slot s = oq_deq(q); slot = &ring->slot[ring->cur]; oq_enq(freeq, slot); *slot = s; slot->flags |= NS_BUF_CHANGED; ring->cur = nm_ring_next(ring, ring->cur); } ring->head = ring->cur; forwarded += lim; p->ctr.pkts += lim; } } int batch = 0; for (i = rxport->nmd->first_rx_ring; i <= rxport->nmd->last_rx_ring; i++) { struct netmap_ring *rxring = NETMAP_RXRING(rxport->nmd->nifp, i); //D("prepare to scan rings"); int next_cur = rxring->cur; struct netmap_slot *next_slot = &rxring->slot[next_cur]; const char *next_buf = NETMAP_BUF(rxring, next_slot->buf_idx); while (!nm_ring_empty(rxring)) { struct overflow_queue *q; struct netmap_slot *rs = next_slot; // CHOOSE THE CORRECT OUTPUT PIPE uint32_t hash = pkt_hdr_hash((const unsigned char *)next_buf, 4, 'B'); if (hash == 0) non_ip++; // XXX ?? // prefetch the buffer for the next round next_cur = nm_ring_next(rxring, next_cur); next_slot = &rxring->slot[next_cur]; next_buf = NETMAP_BUF(rxring, next_slot->buf_idx); __builtin_prefetch(next_buf); // 'B' is just a hashing seed uint32_t output_port = hash % glob_arg.output_rings; struct port_des *port = &ports[output_port]; struct netmap_ring *ring = port->ring; uint32_t free_buf; // Move the packet to the output pipe. if (nm_ring_space(ring)) { struct netmap_slot *ts = &ring->slot[ring->cur]; free_buf = ts->buf_idx; ts->buf_idx = rs->buf_idx; ts->len = rs->len; ts->flags |= NS_BUF_CHANGED; ring->head = ring->cur = nm_ring_next(ring, ring->cur); port->ctr.pkts++; forwarded++; goto forward; } /* use the overflow queue, if available */ if (!oq) { dropped++; port->ctr.drop++; goto next; } q = &oq[output_port]; if (!freeq->n) { /* revoke some buffers from the longest overflow queue */ uint32_t j; struct port_des *lp = &ports[0]; uint32_t max = lp->oq->n; for (j = 1; j < npipes; j++) { struct port_des *cp = &ports[j]; if (cp->oq->n > max) { lp = cp; max = cp->oq->n; } } // XXX optimize this cycle for (j = 0; lp->oq->n && j < BUF_REVOKE; j++) { struct netmap_slot tmp = oq_deq(lp->oq); oq_enq(freeq, &tmp); } ND(1, "revoked %d buffers from %s", j, lq->name); lp->ctr.drop += j; dropped += j; } free_buf = oq_deq(freeq).buf_idx; oq_enq(q, rs); forward: rs->buf_idx = free_buf; rs->flags |= NS_BUF_CHANGED; next: rxring->head = rxring->cur = next_cur; batch++; if (unlikely(batch >= glob_arg.batch)) { ioctl(rxport->nmd->fd, NIOCRXSYNC, NULL); batch = 0; } ND(1, "Forwarded Packets: %"PRIu64" Dropped packets: %"PRIu64" Percent: %.2f", forwarded, dropped, ((float)dropped / (float)forwarded * 100)); } } } pthread_join(stat_thread, NULL); printf("%"PRIu64" packets forwarded. %"PRIu64" packets dropped. Total %"PRIu64"\n", forwarded, dropped, forwarded + dropped); return 0; }