static void free_buffers(void) { int i, tot = 0; struct port_des *rxport = &ports[glob_arg.output_rings]; /* build a netmap free list with the buffers in all the overflow queues */ for (i = 0; i < glob_arg.output_rings + 1; i++) { struct port_des *cp = &ports[i]; struct overflow_queue *q = cp->oq; if (!q) continue; while (q->n) { struct netmap_slot s = oq_deq(q); uint32_t *b = (uint32_t *)NETMAP_BUF(cp->ring, s.buf_idx); *b = rxport->nmd->nifp->ni_bufs_head; rxport->nmd->nifp->ni_bufs_head = s.buf_idx; tot++; } } D("added %d buffers to netmap free list", tot); for (i = 0; i < glob_arg.output_rings + 1; ++i) { nm_close(ports[i].nmd); } }
int main(int argc, char **argv) { int ch; uint32_t i; int rv; unsigned int iter = 0; glob_arg.ifname[0] = '\0'; glob_arg.output_rings = DEF_OUT_PIPES; glob_arg.batch = DEF_BATCH; glob_arg.syslog_interval = DEF_SYSLOG_INT; while ( (ch = getopt(argc, argv, "i:p:b:B:s:")) != -1) { switch (ch) { case 'i': D("interface is %s", optarg); if (strlen(optarg) > MAX_IFNAMELEN - 8) { D("ifname too long %s", optarg); return 1; } if (strncmp(optarg, "netmap:", 7) && strncmp(optarg, "vale", 4)) { sprintf(glob_arg.ifname, "netmap:%s", optarg); } else { strcpy(glob_arg.ifname, optarg); } break; case 'p': glob_arg.output_rings = atoi(optarg); if (glob_arg.output_rings < 1) { D("you must output to at least one pipe"); usage(); return 1; } break; case 'B': glob_arg.extra_bufs = atoi(optarg); D("requested %d extra buffers", glob_arg.extra_bufs); break; case 'b': glob_arg.batch = atoi(optarg); D("batch is %d", glob_arg.batch); break; case 's': glob_arg.syslog_interval = atoi(optarg); D("syslog interval is %d", glob_arg.syslog_interval); break; default: D("bad option %c %s", ch, optarg); usage(); return 1; } } if (glob_arg.ifname[0] == '\0') { D("missing interface name"); usage(); return 1; } setlogmask(LOG_UPTO(LOG_INFO)); openlog("lb", LOG_CONS | LOG_PID | LOG_NDELAY, LOG_LOCAL1); uint32_t npipes = glob_arg.output_rings; struct overflow_queue *freeq = NULL; pthread_t stat_thread; ports = calloc(npipes + 1, sizeof(struct port_des)); if (!ports) { D("failed to allocate the stats array"); return 1; } struct port_des *rxport = &ports[npipes]; if (pthread_create(&stat_thread, NULL, print_stats, NULL) == -1) { D("unable to create the stats thread: %s", strerror(errno)); return 1; } /* we need base_req to specify pipes and extra bufs */ struct nmreq base_req; memset(&base_req, 0, sizeof(base_req)); base_req.nr_arg1 = npipes; base_req.nr_arg3 = glob_arg.extra_bufs; rxport->nmd = nm_open(glob_arg.ifname, &base_req, 0, NULL); if (rxport->nmd == NULL) { D("cannot open %s", glob_arg.ifname); return (1); } else { D("successfully opened %s (tx rings: %u)", glob_arg.ifname, rxport->nmd->req.nr_tx_slots); } uint32_t extra_bufs = rxport->nmd->req.nr_arg3; struct overflow_queue *oq = NULL; /* reference ring to access the buffers */ rxport->ring = NETMAP_RXRING(rxport->nmd->nifp, 0); if (!glob_arg.extra_bufs) goto run; D("obtained %d extra buffers", extra_bufs); if (!extra_bufs) goto run; /* one overflow queue for each output pipe, plus one for the * free extra buffers */ oq = calloc(npipes + 1, sizeof(struct overflow_queue)); if (!oq) { D("failed to allocated overflow queues descriptors"); goto run; } freeq = &oq[npipes]; rxport->oq = freeq; freeq->slots = calloc(extra_bufs, sizeof(struct netmap_slot)); if (!freeq->slots) { D("failed to allocate the free list"); } freeq->size = extra_bufs; snprintf(freeq->name, MAX_IFNAMELEN, "free queue"); /* * the list of buffers uses the first uint32_t in each buffer * as the index of the next buffer. */ uint32_t scan; for (scan = rxport->nmd->nifp->ni_bufs_head; scan; scan = *(uint32_t *)NETMAP_BUF(rxport->ring, scan)) { struct netmap_slot s; s.buf_idx = scan; ND("freeq <- %d", s.buf_idx); oq_enq(freeq, &s); } atexit(free_buffers); if (freeq->n != extra_bufs) { D("something went wrong: netmap reported %d extra_bufs, but the free list contained %d", extra_bufs, freeq->n); return 1; } rxport->nmd->nifp->ni_bufs_head = 0; run: for (i = 0; i < npipes; ++i) { char interface[25]; sprintf(interface, "%s{%d", glob_arg.ifname, i); D("opening pipe named %s", interface); //ports[i].nmd = nm_open(interface, NULL, NM_OPEN_NO_MMAP | NM_OPEN_ARG3 | NM_OPEN_RING_CFG, rxport->nmd); ports[i].nmd = nm_open(interface, NULL, 0, rxport->nmd); if (ports[i].nmd == NULL) { D("cannot open %s", interface); return (1); } else { D("successfully opened pipe #%d %s (tx slots: %d)", i + 1, interface, ports[i].nmd->req.nr_tx_slots); ports[i].ring = NETMAP_TXRING(ports[i].nmd->nifp, 0); } D("zerocopy %s", (rxport->nmd->mem == ports[i].nmd->mem) ? "enabled" : "disabled"); if (extra_bufs) { struct overflow_queue *q = &oq[i]; q->slots = calloc(extra_bufs, sizeof(struct netmap_slot)); if (!q->slots) { D("failed to allocate overflow queue for pipe %d", i); /* make all overflow queue management fail */ extra_bufs = 0; } q->size = extra_bufs; snprintf(q->name, MAX_IFNAMELEN, "oq %d", i); ports[i].oq = q; } } if (glob_arg.extra_bufs && !extra_bufs) { if (oq) { for (i = 0; i < npipes + 1; i++) { free(oq[i].slots); oq[i].slots = NULL; } free(oq); oq = NULL; } D("*** overflow queues disabled ***"); } sleep(2); struct pollfd pollfd[npipes + 1]; memset(&pollfd, 0, sizeof(pollfd)); signal(SIGINT, sigint_h); while (!do_abort) { u_int polli = 0; iter++; for (i = 0; i < npipes; ++i) { struct netmap_ring *ring = ports[i].ring; if (nm_ring_next(ring, ring->tail) == ring->cur) { /* no need to poll, there are no packets pending */ continue; } pollfd[polli].fd = ports[i].nmd->fd; pollfd[polli].events = POLLOUT; pollfd[polli].revents = 0; ++polli; } pollfd[polli].fd = rxport->nmd->fd; pollfd[polli].events = POLLIN; pollfd[polli].revents = 0; ++polli; //RD(5, "polling %d file descriptors", polli+1); rv = poll(pollfd, polli, 10); if (rv <= 0) { if (rv < 0 && errno != EAGAIN && errno != EINTR) RD(1, "poll error %s", strerror(errno)); continue; } if (oq) { /* try to push packets from the overflow queues * to the corresponding pipes */ for (i = 0; i < npipes; i++) { struct port_des *p = &ports[i]; struct overflow_queue *q = p->oq; uint32_t j, lim; struct netmap_ring *ring; struct netmap_slot *slot; if (!q->n) continue; ring = p->ring; lim = nm_ring_space(ring); if (!lim) continue; if (q->n < lim) lim = q->n; for (j = 0; j < lim; j++) { struct netmap_slot s = oq_deq(q); slot = &ring->slot[ring->cur]; oq_enq(freeq, slot); *slot = s; slot->flags |= NS_BUF_CHANGED; ring->cur = nm_ring_next(ring, ring->cur); } ring->head = ring->cur; forwarded += lim; p->ctr.pkts += lim; } } int batch = 0; for (i = rxport->nmd->first_rx_ring; i <= rxport->nmd->last_rx_ring; i++) { struct netmap_ring *rxring = NETMAP_RXRING(rxport->nmd->nifp, i); //D("prepare to scan rings"); int next_cur = rxring->cur; struct netmap_slot *next_slot = &rxring->slot[next_cur]; const char *next_buf = NETMAP_BUF(rxring, next_slot->buf_idx); while (!nm_ring_empty(rxring)) { struct overflow_queue *q; struct netmap_slot *rs = next_slot; // CHOOSE THE CORRECT OUTPUT PIPE uint32_t hash = pkt_hdr_hash((const unsigned char *)next_buf, 4, 'B'); if (hash == 0) non_ip++; // XXX ?? // prefetch the buffer for the next round next_cur = nm_ring_next(rxring, next_cur); next_slot = &rxring->slot[next_cur]; next_buf = NETMAP_BUF(rxring, next_slot->buf_idx); __builtin_prefetch(next_buf); // 'B' is just a hashing seed uint32_t output_port = hash % glob_arg.output_rings; struct port_des *port = &ports[output_port]; struct netmap_ring *ring = port->ring; uint32_t free_buf; // Move the packet to the output pipe. if (nm_ring_space(ring)) { struct netmap_slot *ts = &ring->slot[ring->cur]; free_buf = ts->buf_idx; ts->buf_idx = rs->buf_idx; ts->len = rs->len; ts->flags |= NS_BUF_CHANGED; ring->head = ring->cur = nm_ring_next(ring, ring->cur); port->ctr.pkts++; forwarded++; goto forward; } /* use the overflow queue, if available */ if (!oq) { dropped++; port->ctr.drop++; goto next; } q = &oq[output_port]; if (!freeq->n) { /* revoke some buffers from the longest overflow queue */ uint32_t j; struct port_des *lp = &ports[0]; uint32_t max = lp->oq->n; for (j = 1; j < npipes; j++) { struct port_des *cp = &ports[j]; if (cp->oq->n > max) { lp = cp; max = cp->oq->n; } } // XXX optimize this cycle for (j = 0; lp->oq->n && j < BUF_REVOKE; j++) { struct netmap_slot tmp = oq_deq(lp->oq); oq_enq(freeq, &tmp); } ND(1, "revoked %d buffers from %s", j, lq->name); lp->ctr.drop += j; dropped += j; } free_buf = oq_deq(freeq).buf_idx; oq_enq(q, rs); forward: rs->buf_idx = free_buf; rs->flags |= NS_BUF_CHANGED; next: rxring->head = rxring->cur = next_cur; batch++; if (unlikely(batch >= glob_arg.batch)) { ioctl(rxport->nmd->fd, NIOCRXSYNC, NULL); batch = 0; } ND(1, "Forwarded Packets: %"PRIu64" Dropped packets: %"PRIu64" Percent: %.2f", forwarded, dropped, ((float)dropped / (float)forwarded * 100)); } } } pthread_join(stat_thread, NULL); printf("%"PRIu64" packets forwarded. %"PRIu64" packets dropped. Total %"PRIu64"\n", forwarded, dropped, forwarded + dropped); return 0; }
int main(int argc, char **argv) { int ch; uint32_t i; int rv; unsigned int iter = 0; glob_arg.ifname[0] = '\0'; glob_arg.output_rings = 0; glob_arg.batch = DEF_BATCH; glob_arg.syslog_interval = DEF_SYSLOG_INT; while ( (ch = getopt(argc, argv, "i:p:b:B:s:")) != -1) { switch (ch) { case 'i': D("interface is %s", optarg); if (strlen(optarg) > MAX_IFNAMELEN - 8) { D("ifname too long %s", optarg); return 1; } if (strncmp(optarg, "netmap:", 7) && strncmp(optarg, "vale", 4)) { sprintf(glob_arg.ifname, "netmap:%s", optarg); } else { strcpy(glob_arg.ifname, optarg); } break; case 'p': if (parse_pipes(optarg)) { usage(); return 1; } break; case 'B': glob_arg.extra_bufs = atoi(optarg); D("requested %d extra buffers", glob_arg.extra_bufs); break; case 'b': glob_arg.batch = atoi(optarg); D("batch is %d", glob_arg.batch); break; case 's': glob_arg.syslog_interval = atoi(optarg); D("syslog interval is %d", glob_arg.syslog_interval); break; default: D("bad option %c %s", ch, optarg); usage(); return 1; } } if (glob_arg.ifname[0] == '\0') { D("missing interface name"); usage(); return 1; } /* extract the base name */ char *nscan = strncmp(glob_arg.ifname, "netmap:", 7) ? glob_arg.ifname : glob_arg.ifname + 7; strncpy(glob_arg.base_name, nscan, MAX_IFNAMELEN); for (nscan = glob_arg.base_name; *nscan && !index("-*^{}/@", *nscan); nscan++) ; *nscan = '\0'; if (glob_arg.num_groups == 0) parse_pipes(""); setlogmask(LOG_UPTO(LOG_INFO)); openlog("lb", LOG_CONS | LOG_PID | LOG_NDELAY, LOG_LOCAL1); uint32_t npipes = glob_arg.output_rings; pthread_t stat_thread; ports = calloc(npipes + 1, sizeof(struct port_des)); if (!ports) { D("failed to allocate the stats array"); return 1; } struct port_des *rxport = &ports[npipes]; init_groups(); if (pthread_create(&stat_thread, NULL, print_stats, NULL) == -1) { D("unable to create the stats thread: %s", strerror(errno)); return 1; } /* we need base_req to specify pipes and extra bufs */ struct nmreq base_req; memset(&base_req, 0, sizeof(base_req)); base_req.nr_arg1 = npipes; base_req.nr_arg3 = glob_arg.extra_bufs; rxport->nmd = nm_open(glob_arg.ifname, &base_req, 0, NULL); if (rxport->nmd == NULL) { D("cannot open %s", glob_arg.ifname); return (1); } else { D("successfully opened %s (tx rings: %u)", glob_arg.ifname, rxport->nmd->req.nr_tx_slots); } uint32_t extra_bufs = rxport->nmd->req.nr_arg3; struct overflow_queue *oq = NULL; /* reference ring to access the buffers */ rxport->ring = NETMAP_RXRING(rxport->nmd->nifp, 0); if (!glob_arg.extra_bufs) goto run; D("obtained %d extra buffers", extra_bufs); if (!extra_bufs) goto run; /* one overflow queue for each output pipe, plus one for the * free extra buffers */ oq = calloc(npipes + 1, sizeof(struct overflow_queue)); if (!oq) { D("failed to allocated overflow queues descriptors"); goto run; } freeq = &oq[npipes]; rxport->oq = freeq; freeq->slots = calloc(extra_bufs, sizeof(struct netmap_slot)); if (!freeq->slots) { D("failed to allocate the free list"); } freeq->size = extra_bufs; snprintf(freeq->name, MAX_IFNAMELEN, "free queue"); /* * the list of buffers uses the first uint32_t in each buffer * as the index of the next buffer. */ uint32_t scan; for (scan = rxport->nmd->nifp->ni_bufs_head; scan; scan = *(uint32_t *)NETMAP_BUF(rxport->ring, scan)) { struct netmap_slot s; s.buf_idx = scan; ND("freeq <- %d", s.buf_idx); oq_enq(freeq, &s); } if (freeq->n != extra_bufs) { D("something went wrong: netmap reported %d extra_bufs, but the free list contained %d", extra_bufs, freeq->n); return 1; } rxport->nmd->nifp->ni_bufs_head = 0; run: /* we need to create the persistent vale ports */ if (create_custom_ports(rxport->nmd->req.nr_arg2)) { free_buffers(); return 1; } atexit(delete_custom_ports); atexit(free_buffers); int j, t = 0; for (j = 0; j < glob_arg.num_groups; j++) { struct group_des *g = &groups[j]; int k; for (k = 0; k < g->nports; ++k) { struct port_des *p = &g->ports[k]; char interface[25]; sprintf(interface, "netmap:%s{%d/xT", g->pipename, g->first_id + k); D("opening pipe named %s", interface); p->nmd = nm_open(interface, NULL, 0, rxport->nmd); if (p->nmd == NULL) { D("cannot open %s", interface); return (1); } else { D("successfully opened pipe #%d %s (tx slots: %d)", k + 1, interface, p->nmd->req.nr_tx_slots); p->ring = NETMAP_TXRING(p->nmd->nifp, 0); } D("zerocopy %s", (rxport->nmd->mem == p->nmd->mem) ? "enabled" : "disabled"); if (extra_bufs) { struct overflow_queue *q = &oq[t + k]; q->slots = calloc(extra_bufs, sizeof(struct netmap_slot)); if (!q->slots) { D("failed to allocate overflow queue for pipe %d", k); /* make all overflow queue management fail */ extra_bufs = 0; } q->size = extra_bufs; snprintf(q->name, MAX_IFNAMELEN, "oq %s{%d", g->pipename, k); p->oq = q; } } t += g->nports; } if (glob_arg.extra_bufs && !extra_bufs) { if (oq) { for (i = 0; i < npipes + 1; i++) { free(oq[i].slots); oq[i].slots = NULL; } free(oq); oq = NULL; } D("*** overflow queues disabled ***"); } sleep(2); struct pollfd pollfd[npipes + 1]; memset(&pollfd, 0, sizeof(pollfd)); signal(SIGINT, sigint_h); while (!do_abort) { u_int polli = 0; iter++; for (i = 0; i < npipes; ++i) { struct netmap_ring *ring = ports[i].ring; if (nm_ring_next(ring, ring->tail) == ring->cur) { /* no need to poll, there are no packets pending */ continue; } pollfd[polli].fd = ports[i].nmd->fd; pollfd[polli].events = POLLOUT; pollfd[polli].revents = 0; ++polli; } pollfd[polli].fd = rxport->nmd->fd; pollfd[polli].events = POLLIN; pollfd[polli].revents = 0; ++polli; //RD(5, "polling %d file descriptors", polli+1); rv = poll(pollfd, polli, 10); if (rv <= 0) { if (rv < 0 && errno != EAGAIN && errno != EINTR) RD(1, "poll error %s", strerror(errno)); continue; } if (oq) { /* try to push packets from the overflow queues * to the corresponding pipes */ for (i = 0; i < npipes; i++) { struct port_des *p = &ports[i]; struct overflow_queue *q = p->oq; struct group_des *g = p->group; uint32_t j, lim; struct netmap_ring *ring; struct netmap_slot *slot; if (oq_empty(q)) continue; ring = p->ring; lim = nm_ring_space(ring); if (!lim) continue; if (q->n < lim) lim = q->n; for (j = 0; j < lim; j++) { struct netmap_slot s = oq_deq(q), tmp; tmp.ptr = 0; slot = &ring->slot[ring->cur]; if (slot->ptr && !g->last) { tmp.buf_idx = forward_packet(g + 1, slot); /* the forwarding may have removed packets * from the current queue */ if (q->n < lim) lim = q->n; } else { tmp.buf_idx = slot->buf_idx; } oq_enq(freeq, &tmp); *slot = s; slot->flags |= NS_BUF_CHANGED; ring->cur = nm_ring_next(ring, ring->cur); } ring->head = ring->cur; forwarded += lim; p->ctr.pkts += lim; } } int batch = 0; for (i = rxport->nmd->first_rx_ring; i <= rxport->nmd->last_rx_ring; i++) { struct netmap_ring *rxring = NETMAP_RXRING(rxport->nmd->nifp, i); //D("prepare to scan rings"); int next_cur = rxring->cur; struct netmap_slot *next_slot = &rxring->slot[next_cur]; const char *next_buf = NETMAP_BUF(rxring, next_slot->buf_idx); while (!nm_ring_empty(rxring)) { struct netmap_slot *rs = next_slot; struct group_des *g = &groups[0]; // CHOOSE THE CORRECT OUTPUT PIPE uint32_t hash = pkt_hdr_hash((const unsigned char *)next_buf, 4, 'B'); if (hash == 0) { non_ip++; // XXX ?? } rs->ptr = hash | (1UL << 32); // prefetch the buffer for the next round next_cur = nm_ring_next(rxring, next_cur); next_slot = &rxring->slot[next_cur]; next_buf = NETMAP_BUF(rxring, next_slot->buf_idx); __builtin_prefetch(next_buf); // 'B' is just a hashing seed rs->buf_idx = forward_packet(g, rs); rs->flags |= NS_BUF_CHANGED; rxring->head = rxring->cur = next_cur; batch++; if (unlikely(batch >= glob_arg.batch)) { ioctl(rxport->nmd->fd, NIOCRXSYNC, NULL); batch = 0; } ND(1, "Forwarded Packets: %"PRIu64" Dropped packets: %"PRIu64" Percent: %.2f", forwarded, dropped, ((float)dropped / (float)forwarded * 100)); } } } pthread_join(stat_thread, NULL); printf("%"PRIu64" packets forwarded. %"PRIu64" packets dropped. Total %"PRIu64"\n", forwarded, dropped, forwarded + dropped); return 0; }
/* push the packet described by slot rs to the group g. * This may cause other buffers to be pushed down the * chain headed by g. * Return a free buffer. */ uint32_t forward_packet(struct group_des *g, struct netmap_slot *rs) { uint32_t hash = rs->ptr; uint32_t output_port = hash % g->nports; struct port_des *port = &g->ports[output_port]; struct netmap_ring *ring = port->ring; struct overflow_queue *q = port->oq; /* Move the packet to the output pipe, unless there is * either no space left on the ring, or there is some * packet still in the overflow queue (since those must * take precedence over the new one) */ if (nm_ring_space(ring) && (q == NULL || oq_empty(q))) { struct netmap_slot *ts = &ring->slot[ring->cur]; struct netmap_slot old_slot = *ts; uint32_t free_buf; ts->buf_idx = rs->buf_idx; ts->len = rs->len; ts->flags |= NS_BUF_CHANGED; ts->ptr = rs->ptr; ring->head = ring->cur = nm_ring_next(ring, ring->cur); port->ctr.pkts++; forwarded++; if (old_slot.ptr && !g->last) { /* old slot not empty and we are not the last group: * push it further down the chain */ free_buf = forward_packet(g + 1, &old_slot); } else { /* just return the old slot buffer: it is * either empty or already seen by everybody */ free_buf = old_slot.buf_idx; } return free_buf; } /* use the overflow queue, if available */ if (q == NULL || oq_full(q)) { /* no space left on the ring and no overflow queue * available: we are forced to drop the packet */ dropped++; port->ctr.drop++; return rs->buf_idx; } oq_enq(q, rs); /* * we cannot continue down the chain and we need to * return a free buffer now. We take it from the free queue. */ if (oq_empty(freeq)) { /* the free queue is empty. Revoke some buffers * from the longest overflow queue */ uint32_t j; struct port_des *lp = &ports[0]; uint32_t max = lp->oq->n; /* let lp point to the port with the longest queue */ for (j = 1; j < glob_arg.output_rings; j++) { struct port_des *cp = &ports[j]; if (cp->oq->n > max) { lp = cp; max = cp->oq->n; } } /* move the oldest BUF_REVOKE buffers from the * lp queue to the free queue */ // XXX optimize this cycle for (j = 0; lp->oq->n && j < BUF_REVOKE; j++) { struct netmap_slot tmp = oq_deq(lp->oq); oq_enq(freeq, &tmp); } ND(1, "revoked %d buffers from %s", j, lq->name); lp->ctr.drop += j; dropped += j; } return oq_deq(freeq).buf_idx; }