pcap_t * pcap_open_live(const char *device, int snaplen, int promisc, int to_ms, char *errbuf) { struct nm_desc_t *d; int l; if (!device) { D("missing device name"); return NULL; } l = strlen(device) + 1; D("request to open %s snaplen %d promisc %d timeout %dms", device, snaplen, promisc, to_ms); d = nm_open(device, NULL, 0, 0); if (d == NULL) { D("error opening %s", device); return NULL; } d->to_ms = to_ms; d->snaplen = snaplen; d->errbuf = errbuf; d->promisc = promisc; return d; }
/* Netmap.__init__(), may be called many times, or not called at all. */ static int NetmapDesc_init(NetmapDesc *self, PyObject *args, PyObject *kwds) { PyObject *dev_name = NULL; static char *kwlist[] = {"ifname", "flags", NULL}; const char *ifname; unsigned long flags = 0; int ret; if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|k", kwlist, &ifname, &flags)) { return -1; } /* Open the netmap device and register an interface. */ self->nmd = nm_open(ifname, NULL, flags, NULL); if (self->nmd == NULL) { PyErr_SetString(NetmapError, "nm_open() failed"); return -1; } /* Setup the netmap memory layout. The +1 are here to take into account the host rings. */ ret = NetmapMemory_setup(&self->memory, self->nmd->nifp, self->nmd->req.nr_tx_rings + 1, self->nmd->req.nr_rx_rings + 1); return ret; }
struct pico_device * pico_netmap_create(char *interface, char *name, uint8_t *mac) { struct pico_device_netmap *netmap; char ifname[IFNAMSIZ + 7]; netmap = PICO_ZALLOC(sizeof(struct pico_device_netmap)); if (!netmap) { return NULL; } if (pico_device_init((struct pico_device *)netmap, name, mac)) { pico_netmap_destroy((struct pico_device *)netmap); return NULL; } sprintf(ifname, "netmap:%s", interface); netmap->dev.overhead = 0; netmap->conn = nm_open(ifname, NULL, 0, 0); if (! netmap->conn) { pico_netmap_destroy((struct pico_device *)netmap); return NULL; } netmap->dev.send = pico_netmap_send; netmap->dev.poll = pico_netmap_poll; netmap->dev.destroy = pico_netmap_destroy; return (struct pico_device *) netmap; }
int main(int argc, char *argv[]) { if (argc < 3) { fprintf(stderr, "Usage: %s [interface] [RX ring number]\n", argv[0]); exit(1); } struct sigaction sa; // Setup the sighub handler sa.sa_handler = &handle_signal; // Restart the system call, if at all possible sa.sa_flags = SA_RESTART; // Block every signal during the handler sigfillset(&sa.sa_mask); // Intercept SIGHUP and SIGINT if (sigaction(SIGHUP, &sa, NULL) == -1) { perror("Error: cannot handle SIGHUP"); // Should not happen } L = luaL_newstate(); luaL_openlibs(L); /* Load Lua libraries */ // Try loading the file containing the script and run it if ( luaL_loadfile(L, "script.lua") || lua_pcall(L, 0, 0, 0) ) { fprintf(stderr, "Couldn't load file: %s\n", lua_tostring(L, -1)); exit(1); } lua_getglobal(L, "callback"); int cb_ref = luaL_ref(L, LUA_REGISTRYINDEX); char netmap_ifname[IFNAMSIZ + 21]; const char *interface; unsigned int ring_id; struct nm_desc *d; interface = argv[1]; ring_id = atoi(argv[2]); snprintf(netmap_ifname, sizeof netmap_ifname, "netmap:%s-%d/R", interface, ring_id); d = nm_open(netmap_ifname, NULL, 0, 0); if (!d) { perror("nm_open()"); exit(2); } printf("[+] Receiving packets on interface %s, RX ring %d\n", interface, ring_id); receiver(L, cb_ref, d, ring_id); lua_close(L); return 0; }
struct nm_desc* usnet_init( struct nm_desc *gg_nmd, const char *dev_name, u_int flags) { struct nmreq nmr; struct nm_desc *nmd = NULL; struct netmap_if *nifp = NULL; struct netmap_ring *txr, *rxr; signal(SIGINT, sigint_h); bzero(&nmr, sizeof(nmr)); strcpy(nmr.nr_name, dev_name); // XXX: which netmap flags? //nmr.nr_flags = NR_REG_ALL_NIC; //| flags; printf("nm_open: %s\n", nmr.nr_name); nmd = nm_open(nmr.nr_name, &nmr, 0, NULL); if ( nmd == NULL ) { DEBUG("Cannot open interface %s", nmr.nr_name); exit(1); } nifp = nmd->nifp; txr = NETMAP_TXRING(nifp, 0); rxr = NETMAP_RXRING(nifp, 0); printf("nmreq info, name=%s, version=%d," " flags=%d, memsize=%d," " ni_tx_rings=%d, ni_rx_rings=%d, num_tx_slots=%d, num_rx_slot=%d \n", nifp->ni_name, nifp->ni_version, nifp->ni_flags, nmd->memsize, nifp->ni_tx_rings, nifp->ni_rx_rings, txr->num_slots, rxr->num_slots); memset(&g_config, 0, sizeof(g_config)); g_config.burst = 1000; g_config.tx_rate = 0; memset(&g_ipq, 0, sizeof(g_ipq)); usnet_init_internal(); usnet_route_init(); usnet_network_init(); usnet_udp_init(); usnet_ipv4_init(); usnet_socket_init(); return nmd; }
/*----------------------------------------------------------------------------*/ void netmap_init_handle(struct mtcp_thread_context *ctxt) { struct netmap_private_context *npc; char ifname[MAX_IFNAMELEN]; char nifname[MAX_IFNAMELEN]; struct netdev_entry **ent; int j; ent = g_config.mos->netdev_table->ent; /* create and initialize private I/O module context */ ctxt->io_private_context = calloc(1, sizeof(struct netmap_private_context)); if (ctxt->io_private_context == NULL) { TRACE_ERROR("Failed to initialize ctxt->io_private_context: " "Can't allocate memory\n"); exit(EXIT_FAILURE); } npc = (struct netmap_private_context *)ctxt->io_private_context; /* initialize per-thread netmap interfaces */ for (j = 0; j < g_config.mos->netdev_table->num; j++) { #if 0 if (if_indextoname(devices_attached[j], ifname) == NULL) { TRACE_ERROR("Failed to initialize interface %s with ifidx: %d - " "error string: %s\n", ifname, devices_attached[j], strerror(errno)); exit(EXIT_FAILURE); } #else strcpy(ifname, ent[j]->dev_name); #endif if (unlikely(g_config.mos->num_cores == 1)) sprintf(nifname, "netmap:%s", ifname); else sprintf(nifname, "netmap:%s-%d", ifname, ctxt->cpu); TRACE_INFO("Opening %s with j: %d (cpu: %d)\n", nifname, j, ctxt->cpu); struct nmreq base_nmd; memset(&base_nmd, 0, sizeof(base_nmd)); base_nmd.nr_arg3 = EXTRA_BUFS; npc->local_nmd[j] = nm_open(nifname, &base_nmd, 0, NULL); if (npc->local_nmd[j] == NULL) { TRACE_ERROR("Unable to open %s: %s\n", nifname, strerror(errno)); exit(EXIT_FAILURE); } } }
static int thr_ctx_setup(struct thr_ctx *th, char *ifa, char *ifb, int zerocopy, int burst, struct thr_ctx *parent, int cpuid) { int nm_flags = 0; struct nm_desc *p_pa = NULL; bzero(th, sizeof(struct thr_ctx)); th->ifa = strdup(ifa); th->ifb = strdup(ifb); th->zerocopy = zerocopy; th->burst = burst; th->cpuid = cpuid; /* Setup netmap rings */ if (parent) { nm_flags |= NM_OPEN_NO_MMAP; p_pa = parent->pa; } th->pa = nm_open(ifa, NULL, nm_flags, p_pa); if (th->pa == NULL) { D("cannot open %s", ifa); exit(1); } th->pb = nm_open(ifb, NULL, NM_OPEN_NO_MMAP, th->pa); if (th->pb == NULL) { D("cannot open %s", ifb); nm_close(th->pa); exit(1); } th->zerocopy = th->zerocopy && (th->pa->mem == th->pb->mem); /* Done */ return (0); }
/* Attempt to open port in netmap mode */ struct fp_device* fp_netmap_open(const char* name) { /* Build the device. */ struct fp_netmap_device* dev = fp_allocate(struct fp_netmap_device); dev->base.vtbl = &netmap_vtbl; dev->handle = nm_open(name, NULL, 0, NULL); if (dev->handle == NULL) { fprintf(stderr, "error: unable to open netmap device %s\n", name); fp_deallocate(dev); return NULL; } return (struct fp_device*)dev; }
/* * Open a netmap device. We assume there is only one queue * (which is the case for the VALE bridge). */ static struct nm_desc *netmap_open(const NetdevNetmapOptions *nm_opts, Error **errp) { struct nm_desc *nmd; struct nmreq req; memset(&req, 0, sizeof(req)); nmd = nm_open(nm_opts->ifname, &req, NETMAP_NO_TX_POLL, NULL); if (nmd == NULL) { error_setg_errno(errp, errno, "Failed to nm_open() %s", nm_opts->ifname); return NULL; } return nmd; }
/* * add a netmap port. We add them in pairs, so forwarding occurs * between two of them. */ void netmap_add_port(const char *dev) { static struct sess *s1 = NULL; // XXX stateful; bad! struct my_netmap_port *port; int l; struct sess *s2; D("opening netmap device %s", dev); l = strlen(dev) + 1; if (l >= IFNAMSIZ) { D("name %s too long, max %d", dev, IFNAMSIZ - 1); sleep(2); return; } port = calloc(1, sizeof(*port)); port->d = nm_open(dev, NULL, 0, NULL); if (port->d == NULL) { D("error opening %s", dev); kern_free(port); // XXX compat return; } strncpy(port->ifp.if_xname, dev, IFNAMSIZ-1); port->allocator_id = port->d->req.nr_arg2; D("--- mem_id %d", port->allocator_id); s2 = new_session(port->d->fd, netmap_read, port, WANT_READ); port->sess = s2; D("create sess %p my_netmap_port %p", s2, port); if (s1 == NULL) { /* first of a pair */ s1 = s2; } else { /* second of a pair, cross link */ struct my_netmap_port *peer = s1->arg; port->peer = peer; peer->peer = port; port->can_swap_bufs = peer->can_swap_bufs = (port->allocator_id == peer->allocator_id); D("%p %s %d <-> %p %s %d %s", port, port->d->req.nr_name, port->allocator_id, peer, peer->d->req.nr_name, peer->allocator_id, port->can_swap_bufs ? "SWAP" : "COPY"); s1 = NULL; } }
int main(int argc, char *argv[]) { struct nm_desc *d; struct pollfd pfd; char buf[2048]; int count = 0; if (argc < 2) { usage(argv[0]); } bzero(&pfd, sizeof(pfd)); d = nm_open(argv[1], NULL, 0, 0); if (d == NULL) { fprintf(stderr, "no netmap\n"); exit(0); } pfd.fd = d->fd; pfd.events = argv[2] && argv[2][0] == 'w' ? POLLOUT : POLLIN; fprintf(stderr, "working on %s in %s mode\n", argv[1], pfd.events == POLLIN ? "read" : "write"); for (;;) { if (pfd.events == POLLIN) { nm_dispatch(d, -1, my_cb, (void *)&count); } else { if (nm_inject(d, buf, 60) > 0) { count++; continue; } fprintf(stderr, "polling after sending %d\n", count); count = 0; } poll(&pfd, 1, 1000); } nm_close(d); return 0; }
static void pci_vtnet_netmap_setup(struct pci_vtnet_softc *sc, char *ifname) { sc->pci_vtnet_rx = pci_vtnet_netmap_rx; sc->pci_vtnet_tx = pci_vtnet_netmap_tx; sc->vsc_nmd = nm_open(ifname, NULL, 0, 0); if (sc->vsc_nmd == NULL) { WPRINTF(("open of netmap device %s failed\n", ifname)); return; } sc->vsc_mevp = mevent_add(sc->vsc_nmd->fd, EVF_READ, pci_vtnet_rx_callback, sc); if (sc->vsc_mevp == NULL) { WPRINTF(("Could not register event\n")); nm_close(sc->vsc_nmd); sc->vsc_nmd = NULL; } }
void test_netmap(usn_mbuf_t *m) { int tosend = 0; int n, i; int rate_limit = 0; int sent = 0; struct pollfd pfd = { .fd = g_nmd->fd, .events = POLLOUT }; struct netmap_if *nifp = g_nmd->nifp; struct timeval stime, etime; struct nm_desc nmd = *g_nmd; struct nm_desc *t_nmd; uint64_t nmd_flags = 0; // re-open netmap device. nmd.req.nr_flags = NR_REG_ONE_NIC; nmd.req.nr_ringid = 0; printf("interface name:%s,len=%d\n",g_interface, m->mlen); t_nmd = nm_open(g_interface, NULL, nmd_flags | NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, &nmd); if (t_nmd == NULL) { printf("Unable to open %s: %s", g_interface, strerror(errno)); return; } nifp =t_nmd->nifp; pfd.fd =t_nmd->fd; pfd.events = POLLOUT; n = 10000; sent = 0; g_config.burst = 512; printf("g_config.burst=%d\n", g_config.burst); gettimeofday(&stime, 0); while ( sent < n ) { /* * wait for available room in the send queue(s) */ if (poll(&pfd, 1, 1000) <= 0) { D("poll error/timeout on queue: %s", strerror(errno)); // goto quit; } if (pfd.revents & POLLERR) { D("poll error"); goto quit; } for (i = g_nmd->first_tx_ring; i <= g_nmd->last_tx_ring; i++) { int limit = rate_limit ? tosend : g_config.burst; int cnt = 0; if (n > 0 && n - sent < limit) limit = n - sent; struct netmap_ring *txring = NETMAP_TXRING(nifp, i); if (nm_ring_empty(txring)) continue; cnt = test_send(txring, m, limit); DEBUG("limit %d tail %d cnt %d", limit, txring->tail, cnt); sent += cnt; } } // print info stats gettimeofday(&etime, 0); timersub(&etime,&stime,&etime); printf("num of sent pkts: %d\n", n); printf("total time: %lu (seconds) %lu (microseconds) \n", etime.tv_sec, etime.tv_usec); /* flush any remaining packets */ ioctl(pfd.fd, NIOCTXSYNC, NULL); /* final part: wait all the TX queues to be empty. */ for (i = g_nmd->first_tx_ring; i <= g_nmd->last_tx_ring; i++) { struct netmap_ring *txring = NETMAP_TXRING(nifp, i); while (nm_tx_pending(txring)) { ioctl(pfd.fd, NIOCTXSYNC, NULL); usleep(1); /* wait 1 tick */ } } quit: return; } /* set the thread affinity. */ int setaffinity( int i) { cpuset_t cpumask; if (i == -1) return 0; /* Set thread affinity affinity.*/ CPU_ZERO(&cpumask); CPU_SET(i, &cpumask); if (cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_CPUSET, -1, sizeof(cpuset_t), &cpumask) != 0) { DEBUG("Unable to set affinity: %s", strerror(errno)); return 1; } return 0; }
void receiver(void) { struct nm_desc *netmap_descriptor; u_int num_cpus = sysconf( _SC_NPROCESSORS_ONLN ); printf("We have %d cpus\n", num_cpus); struct nmreq base_nmd; bzero(&base_nmd, sizeof(base_nmd)); // Magic from pkt-gen.c base_nmd.nr_tx_rings = base_nmd.nr_rx_rings = 0; base_nmd.nr_tx_slots = base_nmd.nr_rx_slots = 0; std::string interface = "netmap:eth4"; netmap_descriptor = nm_open(interface.c_str(), &base_nmd, 0, NULL); if (netmap_descriptor == NULL) { printf("Can't open netmap device %s\n", interface.c_str()); exit(1); return; } printf("Mapped %dKB memory at %p\n", netmap_descriptor->req.nr_memsize>>10, netmap_descriptor->mem); printf("We have %d tx and %d rx rings\n", netmap_descriptor->req.nr_tx_rings, netmap_descriptor->req.nr_rx_rings); /* protocol stack and may cause a reset of the card, which in turn may take some time for the PHY to reconfigure. We do the open here to have time to reset. */ int wait_link = 2; printf("Wait %d seconds for NIC reset\n", wait_link); sleep(wait_link); boost::thread* boost_threads_array[num_cpus]; for (int i = 0; i < num_cpus; i++) { struct nm_desc nmd = *netmap_descriptor; // This operation is VERY important! nmd.self = &nmd; uint64_t nmd_flags = 0; if (nmd.req.nr_flags != NR_REG_ALL_NIC) { printf("SHIT SHIT SHIT HAPPINED\n"); } nmd.req.nr_flags = NR_REG_ONE_NIC; nmd.req.nr_ringid = i; /* Only touch one of the rings (rx is already ok) */ nmd_flags |= NETMAP_NO_TX_POLL; struct nm_desc* new_nmd = nm_open(interface.c_str(), NULL, nmd_flags | NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, &nmd); if (new_nmd == NULL) { printf("Can't open netmap descripto for netmap\n"); exit(1); } printf("My first ring is %d and last ring id is %d I'm thread %d\n", new_nmd->first_rx_ring, new_nmd->last_rx_ring, i); printf("Start new thread %d\n", i); // Start thread and pass netmap descriptor to it boost_threads_array[i] = new boost::thread(netmap_thread, new_nmd, i); } printf("Wait for thread finish\n"); // Wait all threads for completion for (int i = 0; i < num_cpus; i++) { boost_threads_array[i]->join(); } }
int main(int argc, char **argv) { int index = 0; int channel_nums = 4; int member = 2; const char *ifname = NULL; struct thread_args_t targs[MAX_RINGS]; memset(targs, 0x0, sizeof(targs)); if (argc < 2 || argc > 3) { printf("Usage:%s [interface] <channel nums>\n", argv[1]); return 1; } ifname = argv[1]; if (argc == 3) { channel_nums = atoi(argv[2]); } if (channel_nums < 1 || channel_nums > MAX_RINGS || (MAX_RINGS % channel_nums) != 0 ) { printf("channel nums error.\n"); return 1; } member = MAX_RINGS / channel_nums; for (index = 0; index < channel_nums; index++) { char buff[64]; memset(buff,0x0, 64); struct thread_args_t *thread_arg = &targs[index]; unsigned short start = index * member; unsigned short end = start + member - 1; snprintf(buff, 63, "netmap:%s+%d.%d", ifname, start, end); thread_arg->desc = nm_open(buff, NULL, NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL, NULL); if ( thread_arg->desc == NULL) { D("cannot open eth0"); return 1; } printf("%d first_rx_ring:%d last_rx_rings:%d ", index, thread_arg->desc->first_rx_ring, thread_arg->desc->last_rx_ring); printf(" first_tx_ring:%d last_tx_rings:%d\n", thread_arg->desc->first_tx_ring, thread_arg->desc->last_tx_ring); thread_arg->affinity = index; pthread_create(&(thread_arg->thread), NULL, run, (void*)thread_arg); pthread_detach(thread_arg->thread); } main_loop_statistics(targs, channel_nums); for (index = 0; index < channel_nums; index ++) { nm_close(targs[index].desc); } D("exiting"); return (0); }
static int pcap_netmap_dispatch(pcap_t *p, int cnt, pcap_handler cb, u_char *user) { int ret; struct pcap_netmap *pn = p->priv; struct nm_desc *d = pn->d; struct pollfd pfd = { .fd = p->fd, .events = POLLIN, .revents = 0 }; pn->cb = cb; pn->cb_arg = user; for (;;) { if (p->break_loop) { p->break_loop = 0; return PCAP_ERROR_BREAK; } /* nm_dispatch won't run forever */ ret = nm_dispatch((void *)d, cnt, (void *)pcap_netmap_filter, (void *)p); if (ret != 0) break; errno = 0; ret = poll(&pfd, 1, p->opt.timeout); } return ret; } /* XXX need to check the NIOCTXSYNC/poll */ static int pcap_netmap_inject(pcap_t *p, const void *buf, size_t size) { struct pcap_netmap *pn = p->priv; struct nm_desc *d = pn->d; return nm_inject(d, buf, size); } static int pcap_netmap_ioctl(pcap_t *p, u_long what, uint32_t *if_flags) { struct pcap_netmap *pn = p->priv; struct nm_desc *d = pn->d; struct ifreq ifr; int error, fd = d->fd; #ifdef linux fd = socket(AF_INET, SOCK_DGRAM, 0); if (fd < 0) { fprintf(stderr, "Error: cannot get device control socket.\n"); return -1; } #endif /* linux */ bzero(&ifr, sizeof(ifr)); strncpy(ifr.ifr_name, d->req.nr_name, sizeof(ifr.ifr_name)); switch (what) { case SIOCSIFFLAGS: /* * The flags we pass in are 32-bit and unsigned. * * On most if not all UN*Xes, ifr_flags is 16-bit and * signed, and the result of assigning a longer * unsigned value to a shorter signed value is * implementation-defined (even if, in practice, it'll * do what's intended on all platforms we support * result of assigning a 32-bit unsigned value). * So we mask out the upper 16 bits. */ ifr.ifr_flags = *if_flags & 0xffff; #ifdef __FreeBSD__ /* * In FreeBSD, we need to set the high-order flags, * as we're using IFF_PPROMISC, which is in those bits. * * XXX - DragonFly BSD? */ ifr.ifr_flagshigh = *if_flags >> 16; #endif /* __FreeBSD__ */ break; } error = ioctl(fd, what, &ifr); if (!error) { switch (what) { case SIOCGIFFLAGS: /* * The flags we return are 32-bit. * * On most if not all UN*Xes, ifr_flags is * 16-bit and signed, and will get sign- * extended, so that the upper 16 bits of * those flags will be forced on. So we * mask out the upper 16 bits of the * sign-extended value. */ *if_flags = ifr.ifr_flags & 0xffff; #ifdef __FreeBSD__ /* * In FreeBSD, we need to return the * high-order flags, as we're using * IFF_PPROMISC, which is in those bits. * * XXX - DragonFly BSD? */ *if_flags |= (ifr.ifr_flagshigh << 16); #endif /* __FreeBSD__ */ } } #ifdef linux close(fd); #endif /* linux */ return error ? -1 : 0; } static void pcap_netmap_close(pcap_t *p) { struct pcap_netmap *pn = p->priv; struct nm_desc *d = pn->d; uint32_t if_flags = 0; if (pn->must_clear_promisc) { pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */ if (if_flags & IFF_PPROMISC) { if_flags &= ~IFF_PPROMISC; pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags); } } nm_close(d); pcap_cleanup_live_common(p); } static int pcap_netmap_activate(pcap_t *p) { struct pcap_netmap *pn = p->priv; struct nm_desc *d; uint32_t if_flags = 0; d = nm_open(p->opt.device, NULL, 0, NULL); if (d == NULL) { pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE, errno, "netmap open: cannot access %s", p->opt.device); pcap_cleanup_live_common(p); return (PCAP_ERROR); } if (0) fprintf(stderr, "%s device %s priv %p fd %d ports %d..%d\n", __FUNCTION__, p->opt.device, d, d->fd, d->first_rx_ring, d->last_rx_ring); pn->d = d; p->fd = d->fd; /* * Turn a negative snapshot value (invalid), a snapshot value of * 0 (unspecified), or a value bigger than the normal maximum * value, into the maximum allowed value. * * If some application really *needs* a bigger snapshot * length, we should just increase MAXIMUM_SNAPLEN. */ if (p->snapshot <= 0 || p->snapshot > MAXIMUM_SNAPLEN) p->snapshot = MAXIMUM_SNAPLEN; if (p->opt.promisc && !(d->req.nr_ringid & NETMAP_SW_RING)) { pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */ if (!(if_flags & IFF_PPROMISC)) { pn->must_clear_promisc = 1; if_flags |= IFF_PPROMISC; pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags); } } p->linktype = DLT_EN10MB; p->selectable_fd = p->fd; p->read_op = pcap_netmap_dispatch; p->inject_op = pcap_netmap_inject, p->setfilter_op = install_bpf_program; p->setdirection_op = NULL; p->set_datalink_op = NULL; p->getnonblock_op = pcap_getnonblock_fd; p->setnonblock_op = pcap_setnonblock_fd; p->stats_op = pcap_netmap_stats; p->cleanup_op = pcap_netmap_close; return (0); } pcap_t * pcap_netmap_create(const char *device, char *ebuf, int *is_ours) { pcap_t *p; *is_ours = (!strncmp(device, "netmap:", 7) || !strncmp(device, "vale", 4)); if (! *is_ours) return NULL; p = pcap_create_common(ebuf, sizeof (struct pcap_netmap)); if (p == NULL) return (NULL); p->activate_op = pcap_netmap_activate; return (p); }
static int netmap_open(odp_pktio_t id ODP_UNUSED, pktio_entry_t *pktio_entry, const char *netdev, odp_pool_t pool) { int i; int err; int sockfd; int mtu; uint32_t buf_size; pkt_netmap_t *pkt_nm = &pktio_entry->s.pkt_nm; struct nm_desc *desc; struct netmap_ring *ring; odp_pktin_hash_proto_t hash_proto; odp_pktio_stats_t cur_stats; if (getenv("ODP_PKTIO_DISABLE_NETMAP")) return -1; if (pool == ODP_POOL_INVALID) return -1; /* Init pktio entry */ memset(pkt_nm, 0, sizeof(*pkt_nm)); pkt_nm->sockfd = -1; pkt_nm->pool = pool; /* max frame len taking into account the l2-offset */ pkt_nm->max_frame_len = ODP_CONFIG_PACKET_BUF_LEN_MAX - odp_buffer_pool_headroom(pool) - odp_buffer_pool_tailroom(pool); snprintf(pktio_entry->s.name, sizeof(pktio_entry->s.name), "%s", netdev); snprintf(pkt_nm->nm_name, sizeof(pkt_nm->nm_name), "netmap:%s", netdev); /* Dummy open here to check if netmap module is available and to read * capability info. */ desc = nm_open(pkt_nm->nm_name, NULL, 0, NULL); if (desc == NULL) { ODP_ERR("nm_open(%s) failed\n", pkt_nm->nm_name); goto error; } if (desc->nifp->ni_rx_rings > NM_MAX_DESC) { ODP_ERR("Unable to store all rx rings\n"); nm_close(desc); goto error; } pkt_nm->num_rx_rings = desc->nifp->ni_rx_rings; pkt_nm->capa.max_input_queues = PKTIO_MAX_QUEUES; if (desc->nifp->ni_rx_rings < PKTIO_MAX_QUEUES) pkt_nm->capa.max_input_queues = desc->nifp->ni_rx_rings; if (desc->nifp->ni_tx_rings > NM_MAX_DESC) { ODP_ERR("Unable to store all tx rings\n"); nm_close(desc); goto error; } pkt_nm->num_tx_rings = desc->nifp->ni_tx_rings; pkt_nm->capa.max_output_queues = PKTIO_MAX_QUEUES; if (desc->nifp->ni_tx_rings < PKTIO_MAX_QUEUES) pkt_nm->capa.max_output_queues = desc->nifp->ni_tx_rings; ring = NETMAP_RXRING(desc->nifp, desc->cur_rx_ring); buf_size = ring->nr_buf_size; nm_close(desc); sockfd = socket(AF_INET, SOCK_DGRAM, 0); if (sockfd == -1) { ODP_ERR("Cannot get device control socket\n"); goto error; } pkt_nm->sockfd = sockfd; /* Use either interface MTU (+ ethernet header length) or netmap buffer * size as MTU, whichever is smaller. */ mtu = mtu_get_fd(pktio_entry->s.pkt_nm.sockfd, pktio_entry->s.name) + ODPH_ETHHDR_LEN; if (mtu < 0) { ODP_ERR("Unable to read interface MTU\n"); goto error; } pkt_nm->mtu = ((uint32_t)mtu < buf_size) ? (uint32_t)mtu : buf_size; /* Check if RSS is supported. If not, set 'max_input_queues' to 1. */ if (rss_conf_get_supported_fd(sockfd, netdev, &hash_proto) == 0) { ODP_DBG("RSS not supported\n"); pkt_nm->capa.max_input_queues = 1; } err = netmap_do_ioctl(pktio_entry, SIOCGIFFLAGS, 0); if (err) goto error; if ((pkt_nm->if_flags & IFF_UP) == 0) ODP_DBG("%s is down\n", pktio_entry->s.name); err = mac_addr_get_fd(sockfd, netdev, pkt_nm->if_mac); if (err) goto error; for (i = 0; i < PKTIO_MAX_QUEUES; i++) { odp_ticketlock_init(&pkt_nm->rx_desc_ring[i].s.lock); odp_ticketlock_init(&pkt_nm->tx_desc_ring[i].s.lock); } /* netmap uses only ethtool to get statistics counters */ err = ethtool_stats_get_fd(pktio_entry->s.pkt_nm.sockfd, pktio_entry->s.name, &cur_stats); if (err) { ODP_ERR( "netmap pktio %s does not support statistics counters\n", pktio_entry->s.name); pktio_entry->s.stats_type = STATS_UNSUPPORTED; } else { pktio_entry->s.stats_type = STATS_ETHTOOL; } (void)netmap_stats_reset(pktio_entry); return 0; error: netmap_close(pktio_entry); return -1; }
static int netmap_start(pktio_entry_t *pktio_entry) { pkt_netmap_t *pkt_nm = &pktio_entry->s.pkt_nm; netmap_ring_t *desc_ring; struct nm_desc base_desc; unsigned i; unsigned j; unsigned num_rx_desc = 0; uint64_t flags; odp_pktin_mode_t in_mode = pktio_entry->s.param.in_mode; odp_pktout_mode_t out_mode = pktio_entry->s.param.out_mode; /* If no pktin/pktout queues have been configured. Configure one * for each direction. */ if (!pktio_entry->s.num_in_queue && in_mode != ODP_PKTIN_MODE_DISABLED) { odp_pktin_queue_param_t param; odp_pktin_queue_param_init(¶m); param.num_queues = 1; if (odp_pktin_queue_config(pktio_entry->s.handle, ¶m)) return -1; } if (!pktio_entry->s.num_out_queue && out_mode == ODP_PKTOUT_MODE_DIRECT) { odp_pktout_queue_param_t param; odp_pktout_queue_param_init(¶m); param.num_queues = 1; if (odp_pktout_queue_config(pktio_entry->s.handle, ¶m)) return -1; } if (pkt_nm->num_rx_desc_rings == pktio_entry->s.num_in_queue && pkt_nm->num_tx_desc_rings == pktio_entry->s.num_out_queue) return (netmap_wait_for_link(pktio_entry) == 1) ? 0 : -1; netmap_close_descriptors(pktio_entry); /* Map pktin/pktout queues to netmap rings */ if (pktio_entry->s.num_in_queue) { /* In single queue case only one netmap descriptor is * required. */ num_rx_desc = (pktio_entry->s.num_in_queue == 1) ? 1 : pkt_nm->num_rx_rings; map_netmap_rings(pkt_nm->rx_desc_ring, pktio_entry->s.num_in_queue, num_rx_desc); } if (pktio_entry->s.num_out_queue) /* Enough to map only one netmap tx ring per pktout queue */ map_netmap_rings(pkt_nm->tx_desc_ring, pktio_entry->s.num_out_queue, pktio_entry->s.num_out_queue); base_desc.self = &base_desc; base_desc.mem = NULL; memcpy(base_desc.req.nr_name, pktio_entry->s.name, sizeof(pktio_entry->s.name)); base_desc.req.nr_flags &= ~NR_REG_MASK; if (num_rx_desc == 1) base_desc.req.nr_flags |= NR_REG_ALL_NIC; else base_desc.req.nr_flags |= NR_REG_ONE_NIC; base_desc.req.nr_ringid = 0; /* Only the first rx descriptor does mmap */ desc_ring = pkt_nm->rx_desc_ring; flags = NM_OPEN_IFNAME | NETMAP_NO_TX_POLL; desc_ring[0].s.desc[0] = nm_open(pkt_nm->nm_name, NULL, flags, &base_desc); if (desc_ring[0].s.desc[0] == NULL) { ODP_ERR("nm_start(%s) failed\n", pkt_nm->nm_name); goto error; } /* Open rest of the rx descriptors (one per netmap ring) */ flags = NM_OPEN_IFNAME | NETMAP_NO_TX_POLL | NM_OPEN_NO_MMAP; for (i = 0; i < pktio_entry->s.num_in_queue; i++) for (j = desc_ring[i].s.first; j <= desc_ring[i].s.last; j++) { if (i == 0 && j == 0) { /* First already opened */ if (num_rx_desc > 1) continue; else break; } base_desc.req.nr_ringid = j; desc_ring[i].s.desc[j] = nm_open(pkt_nm->nm_name, NULL, flags, &base_desc); if (desc_ring[i].s.desc[j] == NULL) { ODP_ERR("nm_start(%s) failed\n", pkt_nm->nm_name); goto error; } } /* Open tx descriptors */ desc_ring = pkt_nm->tx_desc_ring; flags = NM_OPEN_IFNAME | NM_OPEN_NO_MMAP; base_desc.req.nr_flags &= !NR_REG_ALL_NIC; base_desc.req.nr_flags |= NR_REG_ONE_NIC; for (i = 0; i < pktio_entry->s.num_out_queue; i++) for (j = desc_ring[i].s.first; j <= desc_ring[i].s.last; j++) { base_desc.req.nr_ringid = j; desc_ring[i].s.desc[j] = nm_open(pkt_nm->nm_name, NULL, flags, &base_desc); if (desc_ring[i].s.desc[j] == NULL) { ODP_ERR("nm_start(%s) failed\n", pkt_nm->nm_name); goto error; } } pkt_nm->num_rx_desc_rings = pktio_entry->s.num_in_queue; pkt_nm->num_tx_desc_rings = pktio_entry->s.num_out_queue; /* Wait for the link to come up */ return (netmap_wait_for_link(pktio_entry) == 1) ? 0 : -1; error: netmap_close_descriptors(pktio_entry); return -1; }
int main(int argc, char **argv) { struct pollfd pollfd[2]; int ch; char *ifa = NULL, *ifb = NULL; int wait_link = 2; int win_size_usec = 50; unsigned int fifo_size = 10; int n; int hold = 0; struct nmreq base_req; uint32_t buf_head = 0; #ifdef DEDUP_HASH_STAT time_t last_hash_output = 0; #endif fprintf(stderr, "%s built %s %s\n\n", argv[0], __DATE__, __TIME__); while ((ch = getopt(argc, argv, "hci:vw:W:F:H")) != -1) { switch (ch) { default: D("bad option %c %s", ch, optarg); /* fallthrough */ case 'h': usage(); break; case 'i': /* interface */ if (ifa == NULL) ifa = optarg; else if (ifb == NULL) ifb = optarg; else D("%s ignored, already have 2 interfaces", optarg); break; case 'c': zerocopy = 0; /* do not zerocopy */ break; case 'v': verbose++; break; case 'w': wait_link = atoi(optarg); break; case 'W': win_size_usec = atoi(optarg); break; case 'F': fifo_size = atoi(optarg); break; case 'H': hold = 1; break; } } if (!ifa || !ifb) { D("missing interface"); usage(); } memset(&base_req, 0, sizeof(base_req)); if (!hold) { base_req.nr_arg3 = fifo_size; } pa = nm_open(ifa, &base_req, 0, NULL); if (pa == NULL) { D("cannot open %s", ifa); return (1); } if (!hold) { if (base_req.nr_arg3 != fifo_size) { D("failed to allocate %u extra buffers", fifo_size); return (1); // XXX failover to copy? } else { buf_head = pa->nifp->ni_bufs_head; } } if (pa->first_rx_ring != pa->last_rx_ring) { D("%s: too many RX rings (%d)", pa->req.nr_name, pa->last_rx_ring - pa->first_rx_ring + 1); return (1); } /* try to reuse the mmap() of the first interface, if possible */ pb = nm_open(ifb, NULL, NM_OPEN_NO_MMAP, pa); if (pb == NULL) { D("cannot open %s", ifb); nm_close(pa); return (1); } if (pb->first_tx_ring != pb->last_tx_ring) { D("%s: too many TX rings (%d)", pb->req.nr_name, pb->last_rx_ring - pb->first_rx_ring + 1); nm_close(pa); return (1); } memset(&dedup, 0, sizeof(dedup)); dedup.out_slot = dedup.out_ring->slot; if (dedup_init(&dedup, fifo_size, NETMAP_RXRING(pa->nifp, pa->first_rx_ring), NETMAP_TXRING(pb->nifp, pb->first_tx_ring)) < 0) { D("failed to initialize dedup with fifo_size %u", fifo_size); return (1); } if (fifo_size >= dedup.out_ring->num_slots - 1) { D("fifo_size %u too large (max %u)", fifo_size, dedup.out_ring->num_slots - 1); return (1); } if (dedup_set_fifo_buffers(&dedup, NULL, buf_head) != 0) { D("failed to set 'hold packets' option"); return (1); } pa->nifp->ni_bufs_head = 0; atexit(free_buffers); /* enable/disable zerocopy */ dedup.in_memid = pa->req.nr_arg2; dedup.out_memid = (zerocopy ? pb->req.nr_arg2 : -1 ); dedup.fifo_memid = hold ? dedup.out_memid : dedup.in_memid; D("memids: in %d out %d fifo %d", dedup.in_memid, dedup.out_memid, dedup.fifo_memid); dedup.win_size.tv_sec = win_size_usec / 1000000; dedup.win_size.tv_usec = win_size_usec % 1000000; D("win_size %lld+%lld", (long long) dedup.win_size.tv_sec, (long long) dedup.win_size.tv_usec); /* setup poll(2) array */ memset(pollfd, 0, sizeof(pollfd)); pollfd[0].fd = pa->fd; pollfd[1].fd = pb->fd; D("Wait %d secs for link to come up...", wait_link); sleep(wait_link); D("Ready to go, %s -> %s", pa->req.nr_name, pb->req.nr_name); /* main loop */ signal(SIGINT, sigint_h); n = 0; while (!do_abort) { int ret; struct timeval now; pollfd[0].events = pollfd[1].events = 0; pollfd[0].revents = pollfd[1].revents = 0; if (!n) pollfd[0].events = POLLIN; else pollfd[1].events = POLLOUT; /* poll() also cause kernel to txsync/rxsync the NICs */ ret = poll(pollfd, 2, 1000); gettimeofday(&now, NULL); if (ret <= 0 || verbose) D("poll %s [0] ev %x %x" " [1] ev %x %x", ret <= 0 ? "timeout" : "ok", pollfd[0].events, pollfd[0].revents, pollfd[1].events, pollfd[1].revents ); n = dedup_push_in(&dedup, &now); #ifdef DEDUP_HASH_STAT if (now.tv_sec != last_hash_output) { unsigned int i; last_hash_output = now.tv_sec; printf("buckets: "); for (i = 0; i <= dedup.hashmap_mask; i++) { if (dedup.hashmap[i].bucket_size) printf("%u: %u, ", i, dedup.hashmap[i].bucket_size); } printf("\n"); } #endif } return (0); }
/*---------------------------------------------------------------------*/ int32_t netmap_link_iface(void *ctxt, const unsigned char *iface, const uint16_t batchsize, int8_t qid) { TRACE_NETMAP_FUNC_START(); char nifname[MAX_IFNAMELEN]; netmap_module_context *nmc = (netmap_module_context *)ctxt; netmap_iface_context *nic = NULL; /* setting nm-ifname*/ sprintf(nifname, "netmap:%s", iface); /* check if the interface has been registered with some other engine */ netiface *nif = interface_find((char *)iface); if (nif == NULL) { nic = calloc(1, sizeof(netmap_iface_context)); if (nic == NULL) { TRACE_ERR("Can't allocate memory for " "netmap_iface_context (for %s)\n", iface); TRACE_NETMAP_FUNC_END(); return -1; } /* resetting base_nmd */ memset(&nic->base_nmd, 0, sizeof(struct nm_desc)); /* resetting fd to -1 */ nic->global_fd = nmc->local_fd = -1; /* use some extra rings */ nic->base_nmd.req.nr_arg3 = NM_EXTRA_BUFS; nic->nmd_flags |= NM_OPEN_ARG3; nic->global_nmd = nm_open((char *)nifname, NULL, nic->nmd_flags, &nic->base_nmd); if (nic->global_nmd == NULL) { TRACE_LOG("Unable to open %s: %s\n", iface, strerror(errno)); free(nic); TRACE_NETMAP_FUNC_END(); return -1; } nic->global_fd = nic->global_nmd->fd; TRACE_DEBUG_LOG("mapped %dKB at %p\n", nic->global_nmd->req.nr_memsize>>10, nic->global_nmd->mem); TRACE_DEBUG_LOG("zerocopy %s", (nic->global_nmd->mem == nic->base_nmd.mem) ? "enabled\n" : "disabled\n"); if (qid != -1) { nic->global_nmd->req.nr_flags = NR_REG_ONE_NIC; nic->global_nmd->req.nr_ringid = qid; } /* create interface entry */ create_interface_entry(iface, (qid == -1) ? NO_QUEUES : HW_QUEUES, IO_NETMAP, nic, nmc->eng); } else { /* otherwise check if that interface can be registered */
int main(int arc, char **argv) { int i; struct glob_arg g; int ch; int wait_link = 2; int devqueues = 1; /* how many device queues */ bzero(&g, sizeof(g)); g.main_fd = -1; g.td_body = receiver_body; g.report_interval = 1000; /* report interval */ g.affinity = -1; /* ip addresses can also be a range x.x.x.x-x.x.x.y */ g.src_ip.name = "10.0.0.1"; g.dst_ip.name = "10.1.0.1"; g.dst_mac.name = "ff:ff:ff:ff:ff:ff"; g.src_mac.name = NULL; g.pkt_size = 60; g.burst = 512; // default g.nthreads = 1; g.cpus = 1; g.forever = 1; g.tx_rate = 0; g.frags = 1; g.nmr_config = ""; g.virt_header = 0; while ( (ch = getopt(arc, argv, "a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:m:P:zZ")) != -1) { struct sf *fn; switch(ch) { default: D("bad option %c %s", ch, optarg); usage(); break; case 'n': g.npackets = atoi(optarg); break; case 'F': i = atoi(optarg); if (i < 1 || i > 63) { D("invalid frags %d [1..63], ignore", i); break; } g.frags = i; break; case 'f': for (fn = func; fn->key; fn++) { if (!strcmp(fn->key, optarg)) break; } if (fn->key) g.td_body = fn->f; else D("unrecognised function %s", optarg); break; case 'o': /* data generation options */ g.options = atoi(optarg); break; case 'a': /* force affinity */ g.affinity = atoi(optarg); break; case 'i': /* interface */ /* a prefix of tap: netmap: or pcap: forces the mode. * otherwise we guess */ D("interface is %s", optarg); if (strlen(optarg) > MAX_IFNAMELEN - 8) { D("ifname too long %s", optarg); break; } strcpy(g.ifname, optarg); if (!strcmp(optarg, "null")) { g.dev_type = DEV_NETMAP; g.dummy_send = 1; } else if (!strncmp(optarg, "tap:", 4)) { g.dev_type = DEV_TAP; strcpy(g.ifname, optarg + 4); } else if (!strncmp(optarg, "pcap:", 5)) { g.dev_type = DEV_PCAP; strcpy(g.ifname, optarg + 5); } else if (!strncmp(optarg, "netmap:", 7) || !strncmp(optarg, "vale", 4)) { g.dev_type = DEV_NETMAP; } else if (!strncmp(optarg, "tap", 3)) { g.dev_type = DEV_TAP; } else { /* prepend netmap: */ g.dev_type = DEV_NETMAP; sprintf(g.ifname, "netmap:%s", optarg); } break; case 'I': g.options |= OPT_INDIRECT; /* XXX use indirect buffer */ break; case 'l': /* pkt_size */ g.pkt_size = atoi(optarg); break; case 'd': g.dst_ip.name = optarg; break; case 's': g.src_ip.name = optarg; break; case 'T': /* report interval */ g.report_interval = atoi(optarg); break; case 'w': wait_link = atoi(optarg); break; case 'W': /* XXX changed default */ g.forever = 0; /* do not exit rx even with no traffic */ break; case 'b': /* burst */ g.burst = atoi(optarg); break; case 'c': g.cpus = atoi(optarg); break; case 'p': g.nthreads = atoi(optarg); break; case 'D': /* destination mac */ g.dst_mac.name = optarg; break; case 'S': /* source mac */ g.src_mac.name = optarg; break; case 'v': verbose++; break; case 'R': g.tx_rate = atoi(optarg); break; case 'X': g.options |= OPT_DUMP; break; case 'C': g.nmr_config = strdup(optarg); break; case 'H': g.virt_header = atoi(optarg); break; case 'e': /* extra bufs */ g.extra_bufs = atoi(optarg); break; case 'm': if (strcmp(optarg, "tx") == 0) { g.options |= OPT_MONITOR_TX; } else if (strcmp(optarg, "rx") == 0) { g.options |= OPT_MONITOR_RX; } else { D("unrecognized monitor mode %s", optarg); } break; case 'P': g.packet_file = strdup(optarg); break; case 'z': g.options |= OPT_RANDOM_SRC; break; case 'Z': g.options |= OPT_RANDOM_DST; break; } } if (strlen(g.ifname) <=0 ) { D("missing ifname"); usage(); } i = system_ncpus(); if (g.cpus < 0 || g.cpus > i) { D("%d cpus is too high, have only %d cpus", g.cpus, i); usage(); } if (g.cpus == 0) g.cpus = i; if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) { D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE); usage(); } if (g.src_mac.name == NULL) { static char mybuf[20] = "00:00:00:00:00:00"; /* retrieve source mac address. */ if (source_hwaddr(g.ifname, mybuf) == -1) { D("Unable to retrieve source mac"); // continue, fail later } g.src_mac.name = mybuf; } /* extract address ranges */ extract_ip_range(&g.src_ip); extract_ip_range(&g.dst_ip); extract_mac_range(&g.src_mac); extract_mac_range(&g.dst_mac); if (g.src_ip.start != g.src_ip.end || g.src_ip.port0 != g.src_ip.port1 || g.dst_ip.start != g.dst_ip.end || g.dst_ip.port0 != g.dst_ip.port1) g.options |= OPT_COPY; if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1 && g.virt_header != VIRT_HDR_2) { D("bad virtio-net-header length"); usage(); } if (g.dev_type == DEV_TAP) { D("want to use tap %s", g.ifname); g.main_fd = tap_alloc(g.ifname); if (g.main_fd < 0) { D("cannot open tap %s", g.ifname); usage(); } #ifndef NO_PCAP } else if (g.dev_type == DEV_PCAP) { char pcap_errbuf[PCAP_ERRBUF_SIZE]; pcap_errbuf[0] = '\0'; // init the buffer g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf); if (g.p == NULL) { D("cannot open pcap on %s", g.ifname); usage(); } g.main_fd = pcap_fileno(g.p); D("using pcap on %s fileno %d", g.ifname, g.main_fd); #endif /* !NO_PCAP */ } else if (g.dummy_send) { /* but DEV_NETMAP */ D("using a dummy send routine"); } else { struct nmreq base_nmd; bzero(&base_nmd, sizeof(base_nmd)); parse_nmr_config(g.nmr_config, &base_nmd); if (g.extra_bufs) { base_nmd.nr_arg3 = g.extra_bufs; } /* * Open the netmap device using nm_open(). * * protocol stack and may cause a reset of the card, * which in turn may take some time for the PHY to * reconfigure. We do the open here to have time to reset. */ g.nmd = nm_open(g.ifname, &base_nmd, 0, NULL); if (g.nmd == NULL) { D("Unable to open %s: %s", g.ifname, strerror(errno)); goto out; } g.main_fd = g.nmd->fd; D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem); /* get num of queues in tx or rx */ if (g.td_body == sender_body) devqueues = g.nmd->req.nr_tx_rings; else devqueues = g.nmd->req.nr_rx_rings; /* validate provided nthreads. */ if (g.nthreads < 1 || g.nthreads > devqueues) { D("bad nthreads %d, have %d queues", g.nthreads, devqueues); // continue, fail later } if (verbose) { struct netmap_if *nifp = g.nmd->nifp; struct nmreq *req = &g.nmd->req; D("nifp at offset %d, %d tx %d rx region %d", req->nr_offset, req->nr_tx_rings, req->nr_rx_rings, req->nr_arg2); for (i = 0; i <= req->nr_tx_rings; i++) { struct netmap_ring *ring = NETMAP_TXRING(nifp, i); D(" TX%d at 0x%lx slots %d", i, (char *)ring - (char *)nifp, ring->num_slots); } for (i = 0; i <= req->nr_rx_rings; i++) { struct netmap_ring *ring = NETMAP_RXRING(nifp, i); D(" RX%d at 0x%lx slots %d", i, (char *)ring - (char *)nifp, ring->num_slots); } } /* Print some debug information. */ fprintf(stdout, "%s %s: %d queues, %d threads and %d cpus.\n", (g.td_body == sender_body) ? "Sending on" : "Receiving from", g.ifname, devqueues, g.nthreads, g.cpus); if (g.td_body == sender_body) { fprintf(stdout, "%s -> %s (%s -> %s)\n", g.src_ip.name, g.dst_ip.name, g.src_mac.name, g.dst_mac.name); } out: /* Exit if something went wrong. */ if (g.main_fd < 0) { D("aborting"); usage(); } } if (g.options) { D("--- SPECIAL OPTIONS:%s%s%s%s%s\n", g.options & OPT_PREFETCH ? " prefetch" : "", g.options & OPT_ACCESS ? " access" : "", g.options & OPT_MEMCPY ? " memcpy" : "", g.options & OPT_INDIRECT ? " indirect" : "", g.options & OPT_COPY ? " copy" : ""); } g.tx_period.tv_sec = g.tx_period.tv_nsec = 0; if (g.tx_rate > 0) { /* try to have at least something every second, * reducing the burst size to some 0.01s worth of data * (but no less than one full set of fragments) */ uint64_t x; int lim = (g.tx_rate)/300; if (g.burst > lim) g.burst = lim; if (g.burst < g.frags) g.burst = g.frags; x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate; g.tx_period.tv_nsec = x; g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; } if (g.td_body == sender_body) D("Sending %d packets every %ld.%09ld s", g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec); /* Wait for PHY reset. */ D("Wait %d secs for phy reset", wait_link); sleep(wait_link); D("Ready..."); /* Install ^C handler. */ global_nthreads = g.nthreads; signal(SIGINT, sigint_h); start_threads(&g); main_thread(&g); return 0; }
/* * bridge [-v] if1 [if2] * * If only one name, or the two interfaces are the same, * bridges userland and the adapter. Otherwise bridge * two intefaces. */ int main(int argc, char **argv) { struct pollfd pollfd[2]; int ch; u_int burst = 1024, wait_link = 4; struct nm_desc *pa = NULL, *pb = NULL; char *ifa = NULL, *ifb = NULL; char ifabuf[64] = { 0 }; fprintf(stderr, "%s built %s %s\n", argv[0], __DATE__, __TIME__); while ( (ch = getopt(argc, argv, "b:ci:vw:")) != -1) { switch (ch) { default: D("bad option %c %s", ch, optarg); usage(); break; case 'b': /* burst */ burst = atoi(optarg); break; case 'i': /* interface */ if (ifa == NULL) ifa = optarg; else if (ifb == NULL) ifb = optarg; else D("%s ignored, already have 2 interfaces", optarg); break; case 'c': zerocopy = 0; /* do not zerocopy */ break; case 'v': verbose++; break; case 'w': wait_link = atoi(optarg); break; } } argc -= optind; argv += optind; if (argc > 1) ifa = argv[1]; if (argc > 2) ifb = argv[2]; if (argc > 3) burst = atoi(argv[3]); if (!ifb) ifb = ifa; if (!ifa) { D("missing interface"); usage(); } if (burst < 1 || burst > 8192) { D("invalid burst %d, set to 1024", burst); burst = 1024; } if (wait_link > 100) { D("invalid wait_link %d, set to 4", wait_link); wait_link = 4; } if (!strcmp(ifa, ifb)) { D("same interface, endpoint 0 goes to host"); snprintf(ifabuf, sizeof(ifabuf) - 1, "%s^", ifa); ifa = ifabuf; } else { /* two different interfaces. Take all rings on if1 */ } pa = nm_open(ifa, NULL, 0, NULL); if (pa == NULL) { D("cannot open %s", ifa); return (1); } // XXX use a single mmap ? pb = nm_open(ifb, NULL, NM_OPEN_NO_MMAP, pa); if (pb == NULL) { D("cannot open %s", ifb); nm_close(pa); return (1); } zerocopy = zerocopy && (pa->mem == pb->mem); D("------- zerocopy %ssupported", zerocopy ? "" : "NOT "); /* setup poll(2) variables. */ memset(pollfd, 0, sizeof(pollfd)); pollfd[0].fd = pa->fd; pollfd[1].fd = pb->fd; D("Wait %d secs for link to come up...", wait_link); sleep(wait_link); D("Ready to go, %s 0x%x/%d <-> %s 0x%x/%d.", pa->req.nr_name, pa->first_rx_ring, pa->req.nr_rx_rings, pb->req.nr_name, pb->first_rx_ring, pb->req.nr_rx_rings); /* main loop */ signal(SIGINT, sigint_h); while (!do_abort) { int n0, n1, ret; pollfd[0].events = pollfd[1].events = 0; pollfd[0].revents = pollfd[1].revents = 0; n0 = pkt_queued(pa, 0); n1 = pkt_queued(pb, 0); if (n0) pollfd[1].events |= POLLOUT; else pollfd[0].events |= POLLIN; if (n1) pollfd[0].events |= POLLOUT; else pollfd[1].events |= POLLIN; ret = poll(pollfd, 2, 2500); if (ret <= 0 || verbose) D("poll %s [0] ev %x %x rx %d@%d tx %d," " [1] ev %x %x rx %d@%d tx %d", ret <= 0 ? "timeout" : "ok", pollfd[0].events, pollfd[0].revents, pkt_queued(pa, 0), NETMAP_RXRING(pa->nifp, pa->cur_rx_ring)->cur, pkt_queued(pa, 1), pollfd[1].events, pollfd[1].revents, pkt_queued(pb, 0), NETMAP_RXRING(pb->nifp, pb->cur_rx_ring)->cur, pkt_queued(pb, 1) ); if (ret < 0) continue; if (pollfd[0].revents & POLLERR) { struct netmap_ring *rx = NETMAP_RXRING(pa->nifp, pa->cur_rx_ring); D("error on fd0, rx [%d,%d,%d)", rx->head, rx->cur, rx->tail); } if (pollfd[1].revents & POLLERR) { struct netmap_ring *rx = NETMAP_RXRING(pb->nifp, pb->cur_rx_ring); D("error on fd1, rx [%d,%d,%d)", rx->head, rx->cur, rx->tail); } if (pollfd[0].revents & POLLOUT) { move(pb, pa, burst); // XXX we don't need the ioctl */ // ioctl(me[0].fd, NIOCTXSYNC, NULL); } if (pollfd[1].revents & POLLOUT) { move(pa, pb, burst); // XXX we don't need the ioctl */ // ioctl(me[1].fd, NIOCTXSYNC, NULL); } } D("exiting"); nm_close(pb); nm_close(pa); return (0); }
static void start_threads(struct glob_arg *g) { int i; targs = calloc(g->nthreads, sizeof(*targs)); /* * Now create the desired number of threads, each one * using a single descriptor. */ for (i = 0; i < g->nthreads; i++) { struct targ *t = &targs[i]; bzero(t, sizeof(*t)); t->fd = -1; /* default, with pcap */ t->g = g; if (g->dev_type == DEV_NETMAP) { struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */ uint64_t nmd_flags = 0; nmd.self = &nmd; if (g->nthreads > 1) { if (nmd.req.nr_flags != NR_REG_ALL_NIC) { D("invalid nthreads mode %d", nmd.req.nr_flags); continue; } nmd.req.nr_flags = NR_REG_ONE_NIC; nmd.req.nr_ringid = i; } /* Only touch one of the rings (rx is already ok) */ if (g->td_body == receiver_body) nmd_flags |= NETMAP_NO_TX_POLL; /* register interface. Override ifname and ringid etc. */ if (g->options & OPT_MONITOR_TX) nmd.req.nr_flags |= NR_MONITOR_TX; if (g->options & OPT_MONITOR_RX) nmd.req.nr_flags |= NR_MONITOR_RX; t->nmd = nm_open(t->g->ifname, NULL, nmd_flags | NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, &nmd); if (t->nmd == NULL) { D("Unable to open %s: %s", t->g->ifname, strerror(errno)); continue; } t->fd = t->nmd->fd; set_vnet_hdr_len(t); } else { targs[i].fd = g->main_fd; } t->used = 1; t->me = i; if (g->affinity >= 0) { if (g->affinity < g->cpus) t->affinity = g->affinity; else t->affinity = i % g->cpus; } else { t->affinity = -1; } /* default, init packets */ initialize_packet(t); if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) { D("Unable to create thread %d: %s", i, strerror(errno)); t->used = 0; } } }
/* * bridge [-v] if1 [if2] * * If only one name, or the two interfaces are the same, * bridges userland and the adapter. Otherwise bridge * two intefaces. */ int main(int argc, char **argv) { struct pollfd pollfd[2]; int ch; u_int burst = 1024, wait_link = 4; struct nm_desc *pa = NULL, *pb = NULL; char *ifa = NULL, *ifb = NULL; char ifabuf[64] = { 0 }; int loopback = 0; fprintf(stderr, "%s built %s %s\n\n", argv[0], __DATE__, __TIME__); while ((ch = getopt(argc, argv, "hb:ci:vw:L")) != -1) { switch (ch) { default: D("bad option %c %s", ch, optarg); /* fallthrough */ case 'h': usage(); break; case 'b': /* burst */ burst = atoi(optarg); break; case 'i': /* interface */ if (ifa == NULL) ifa = optarg; else if (ifb == NULL) ifb = optarg; else D("%s ignored, already have 2 interfaces", optarg); break; case 'c': zerocopy = 0; /* do not zerocopy */ break; case 'v': verbose++; break; case 'w': wait_link = atoi(optarg); break; case 'L': loopback = 1; break; } } argc -= optind; argv += optind; if (argc > 0) ifa = argv[0]; if (argc > 1) ifb = argv[1]; if (argc > 2) burst = atoi(argv[2]); if (!ifb) ifb = ifa; if (!ifa) { D("missing interface"); usage(); } if (burst < 1 || burst > 8192) { D("invalid burst %d, set to 1024", burst); burst = 1024; } if (wait_link > 100) { D("invalid wait_link %d, set to 4", wait_link); wait_link = 4; } if (!strcmp(ifa, ifb)) { if (!loopback) { D("same interface, endpoint 0 goes to host"); snprintf(ifabuf, sizeof(ifabuf) - 1, "%s^", ifa); ifa = ifabuf; } else { D("same interface, loopbacking traffic"); } } else { /* two different interfaces. Take all rings on if1 */ } pa = nm_open(ifa, NULL, 0, NULL); if (pa == NULL) { D("cannot open %s", ifa); return (1); } /* try to reuse the mmap() of the first interface, if possible */ pb = nm_open(ifb, NULL, NM_OPEN_NO_MMAP, pa); if (pb == NULL) { D("cannot open %s", ifb); nm_close(pa); return (1); } zerocopy = zerocopy && (pa->mem == pb->mem); D("------- zerocopy %ssupported", zerocopy ? "" : "NOT "); /* setup poll(2) array */ memset(pollfd, 0, sizeof(pollfd)); pollfd[0].fd = pa->fd; pollfd[1].fd = pb->fd; D("Wait %d secs for link to come up...", wait_link); sleep(wait_link); D("Ready to go, %s 0x%x/%d <-> %s 0x%x/%d.", pa->req.nr_name, pa->first_rx_ring, pa->req.nr_rx_rings, pb->req.nr_name, pb->first_rx_ring, pb->req.nr_rx_rings); /* main loop */ signal(SIGINT, sigint_h); while (!do_abort) { int n0, n1, ret; pollfd[0].events = pollfd[1].events = 0; pollfd[0].revents = pollfd[1].revents = 0; n0 = pkt_queued(pa, 0); n1 = pkt_queued(pb, 0); #if defined(_WIN32) || defined(BUSYWAIT) if (n0) { ioctl(pollfd[1].fd, NIOCTXSYNC, NULL); pollfd[1].revents = POLLOUT; } else { ioctl(pollfd[0].fd, NIOCRXSYNC, NULL); } if (n1) { ioctl(pollfd[0].fd, NIOCTXSYNC, NULL); pollfd[0].revents = POLLOUT; } else { ioctl(pollfd[1].fd, NIOCRXSYNC, NULL); } ret = 1; #else if (n0) pollfd[1].events |= POLLOUT; else pollfd[0].events |= POLLIN; if (n1) pollfd[0].events |= POLLOUT; else pollfd[1].events |= POLLIN; /* poll() also cause kernel to txsync/rxsync the NICs */ ret = poll(pollfd, 2, 2500); #endif /* defined(_WIN32) || defined(BUSYWAIT) */ if (ret <= 0 || verbose) D("poll %s [0] ev %x %x rx %d@%d tx %d," " [1] ev %x %x rx %d@%d tx %d", ret <= 0 ? "timeout" : "ok", pollfd[0].events, pollfd[0].revents, pkt_queued(pa, 0), NETMAP_RXRING(pa->nifp, pa->cur_rx_ring)->cur, pkt_queued(pa, 1), pollfd[1].events, pollfd[1].revents, pkt_queued(pb, 0), NETMAP_RXRING(pb->nifp, pb->cur_rx_ring)->cur, pkt_queued(pb, 1) ); if (ret < 0) continue; if (pollfd[0].revents & POLLERR) { struct netmap_ring *rx = NETMAP_RXRING(pa->nifp, pa->cur_rx_ring); D("error on fd0, rx [%d,%d,%d)", rx->head, rx->cur, rx->tail); } if (pollfd[1].revents & POLLERR) { struct netmap_ring *rx = NETMAP_RXRING(pb->nifp, pb->cur_rx_ring); D("error on fd1, rx [%d,%d,%d)", rx->head, rx->cur, rx->tail); } if (pollfd[0].revents & POLLOUT) move(pb, pa, burst); if (pollfd[1].revents & POLLOUT) move(pa, pb, burst); /* We don't need ioctl(NIOCTXSYNC) on the two file descriptors here, * kernel will txsync on next poll(). */ } nm_close(pb); nm_close(pa); return (0); }
int main(int argc, char **argv) { int ch; uint32_t i; int rv; unsigned int iter = 0; glob_arg.ifname[0] = '\0'; glob_arg.output_rings = DEF_OUT_PIPES; glob_arg.batch = DEF_BATCH; glob_arg.syslog_interval = DEF_SYSLOG_INT; while ( (ch = getopt(argc, argv, "i:p:b:B:s:")) != -1) { switch (ch) { case 'i': D("interface is %s", optarg); if (strlen(optarg) > MAX_IFNAMELEN - 8) { D("ifname too long %s", optarg); return 1; } if (strncmp(optarg, "netmap:", 7) && strncmp(optarg, "vale", 4)) { sprintf(glob_arg.ifname, "netmap:%s", optarg); } else { strcpy(glob_arg.ifname, optarg); } break; case 'p': glob_arg.output_rings = atoi(optarg); if (glob_arg.output_rings < 1) { D("you must output to at least one pipe"); usage(); return 1; } break; case 'B': glob_arg.extra_bufs = atoi(optarg); D("requested %d extra buffers", glob_arg.extra_bufs); break; case 'b': glob_arg.batch = atoi(optarg); D("batch is %d", glob_arg.batch); break; case 's': glob_arg.syslog_interval = atoi(optarg); D("syslog interval is %d", glob_arg.syslog_interval); break; default: D("bad option %c %s", ch, optarg); usage(); return 1; } } if (glob_arg.ifname[0] == '\0') { D("missing interface name"); usage(); return 1; } setlogmask(LOG_UPTO(LOG_INFO)); openlog("lb", LOG_CONS | LOG_PID | LOG_NDELAY, LOG_LOCAL1); uint32_t npipes = glob_arg.output_rings; struct overflow_queue *freeq = NULL; pthread_t stat_thread; ports = calloc(npipes + 1, sizeof(struct port_des)); if (!ports) { D("failed to allocate the stats array"); return 1; } struct port_des *rxport = &ports[npipes]; if (pthread_create(&stat_thread, NULL, print_stats, NULL) == -1) { D("unable to create the stats thread: %s", strerror(errno)); return 1; } /* we need base_req to specify pipes and extra bufs */ struct nmreq base_req; memset(&base_req, 0, sizeof(base_req)); base_req.nr_arg1 = npipes; base_req.nr_arg3 = glob_arg.extra_bufs; rxport->nmd = nm_open(glob_arg.ifname, &base_req, 0, NULL); if (rxport->nmd == NULL) { D("cannot open %s", glob_arg.ifname); return (1); } else { D("successfully opened %s (tx rings: %u)", glob_arg.ifname, rxport->nmd->req.nr_tx_slots); } uint32_t extra_bufs = rxport->nmd->req.nr_arg3; struct overflow_queue *oq = NULL; /* reference ring to access the buffers */ rxport->ring = NETMAP_RXRING(rxport->nmd->nifp, 0); if (!glob_arg.extra_bufs) goto run; D("obtained %d extra buffers", extra_bufs); if (!extra_bufs) goto run; /* one overflow queue for each output pipe, plus one for the * free extra buffers */ oq = calloc(npipes + 1, sizeof(struct overflow_queue)); if (!oq) { D("failed to allocated overflow queues descriptors"); goto run; } freeq = &oq[npipes]; rxport->oq = freeq; freeq->slots = calloc(extra_bufs, sizeof(struct netmap_slot)); if (!freeq->slots) { D("failed to allocate the free list"); } freeq->size = extra_bufs; snprintf(freeq->name, MAX_IFNAMELEN, "free queue"); /* * the list of buffers uses the first uint32_t in each buffer * as the index of the next buffer. */ uint32_t scan; for (scan = rxport->nmd->nifp->ni_bufs_head; scan; scan = *(uint32_t *)NETMAP_BUF(rxport->ring, scan)) { struct netmap_slot s; s.buf_idx = scan; ND("freeq <- %d", s.buf_idx); oq_enq(freeq, &s); } atexit(free_buffers); if (freeq->n != extra_bufs) { D("something went wrong: netmap reported %d extra_bufs, but the free list contained %d", extra_bufs, freeq->n); return 1; } rxport->nmd->nifp->ni_bufs_head = 0; run: for (i = 0; i < npipes; ++i) { char interface[25]; sprintf(interface, "%s{%d", glob_arg.ifname, i); D("opening pipe named %s", interface); //ports[i].nmd = nm_open(interface, NULL, NM_OPEN_NO_MMAP | NM_OPEN_ARG3 | NM_OPEN_RING_CFG, rxport->nmd); ports[i].nmd = nm_open(interface, NULL, 0, rxport->nmd); if (ports[i].nmd == NULL) { D("cannot open %s", interface); return (1); } else { D("successfully opened pipe #%d %s (tx slots: %d)", i + 1, interface, ports[i].nmd->req.nr_tx_slots); ports[i].ring = NETMAP_TXRING(ports[i].nmd->nifp, 0); } D("zerocopy %s", (rxport->nmd->mem == ports[i].nmd->mem) ? "enabled" : "disabled"); if (extra_bufs) { struct overflow_queue *q = &oq[i]; q->slots = calloc(extra_bufs, sizeof(struct netmap_slot)); if (!q->slots) { D("failed to allocate overflow queue for pipe %d", i); /* make all overflow queue management fail */ extra_bufs = 0; } q->size = extra_bufs; snprintf(q->name, MAX_IFNAMELEN, "oq %d", i); ports[i].oq = q; } } if (glob_arg.extra_bufs && !extra_bufs) { if (oq) { for (i = 0; i < npipes + 1; i++) { free(oq[i].slots); oq[i].slots = NULL; } free(oq); oq = NULL; } D("*** overflow queues disabled ***"); } sleep(2); struct pollfd pollfd[npipes + 1]; memset(&pollfd, 0, sizeof(pollfd)); signal(SIGINT, sigint_h); while (!do_abort) { u_int polli = 0; iter++; for (i = 0; i < npipes; ++i) { struct netmap_ring *ring = ports[i].ring; if (nm_ring_next(ring, ring->tail) == ring->cur) { /* no need to poll, there are no packets pending */ continue; } pollfd[polli].fd = ports[i].nmd->fd; pollfd[polli].events = POLLOUT; pollfd[polli].revents = 0; ++polli; } pollfd[polli].fd = rxport->nmd->fd; pollfd[polli].events = POLLIN; pollfd[polli].revents = 0; ++polli; //RD(5, "polling %d file descriptors", polli+1); rv = poll(pollfd, polli, 10); if (rv <= 0) { if (rv < 0 && errno != EAGAIN && errno != EINTR) RD(1, "poll error %s", strerror(errno)); continue; } if (oq) { /* try to push packets from the overflow queues * to the corresponding pipes */ for (i = 0; i < npipes; i++) { struct port_des *p = &ports[i]; struct overflow_queue *q = p->oq; uint32_t j, lim; struct netmap_ring *ring; struct netmap_slot *slot; if (!q->n) continue; ring = p->ring; lim = nm_ring_space(ring); if (!lim) continue; if (q->n < lim) lim = q->n; for (j = 0; j < lim; j++) { struct netmap_slot s = oq_deq(q); slot = &ring->slot[ring->cur]; oq_enq(freeq, slot); *slot = s; slot->flags |= NS_BUF_CHANGED; ring->cur = nm_ring_next(ring, ring->cur); } ring->head = ring->cur; forwarded += lim; p->ctr.pkts += lim; } } int batch = 0; for (i = rxport->nmd->first_rx_ring; i <= rxport->nmd->last_rx_ring; i++) { struct netmap_ring *rxring = NETMAP_RXRING(rxport->nmd->nifp, i); //D("prepare to scan rings"); int next_cur = rxring->cur; struct netmap_slot *next_slot = &rxring->slot[next_cur]; const char *next_buf = NETMAP_BUF(rxring, next_slot->buf_idx); while (!nm_ring_empty(rxring)) { struct overflow_queue *q; struct netmap_slot *rs = next_slot; // CHOOSE THE CORRECT OUTPUT PIPE uint32_t hash = pkt_hdr_hash((const unsigned char *)next_buf, 4, 'B'); if (hash == 0) non_ip++; // XXX ?? // prefetch the buffer for the next round next_cur = nm_ring_next(rxring, next_cur); next_slot = &rxring->slot[next_cur]; next_buf = NETMAP_BUF(rxring, next_slot->buf_idx); __builtin_prefetch(next_buf); // 'B' is just a hashing seed uint32_t output_port = hash % glob_arg.output_rings; struct port_des *port = &ports[output_port]; struct netmap_ring *ring = port->ring; uint32_t free_buf; // Move the packet to the output pipe. if (nm_ring_space(ring)) { struct netmap_slot *ts = &ring->slot[ring->cur]; free_buf = ts->buf_idx; ts->buf_idx = rs->buf_idx; ts->len = rs->len; ts->flags |= NS_BUF_CHANGED; ring->head = ring->cur = nm_ring_next(ring, ring->cur); port->ctr.pkts++; forwarded++; goto forward; } /* use the overflow queue, if available */ if (!oq) { dropped++; port->ctr.drop++; goto next; } q = &oq[output_port]; if (!freeq->n) { /* revoke some buffers from the longest overflow queue */ uint32_t j; struct port_des *lp = &ports[0]; uint32_t max = lp->oq->n; for (j = 1; j < npipes; j++) { struct port_des *cp = &ports[j]; if (cp->oq->n > max) { lp = cp; max = cp->oq->n; } } // XXX optimize this cycle for (j = 0; lp->oq->n && j < BUF_REVOKE; j++) { struct netmap_slot tmp = oq_deq(lp->oq); oq_enq(freeq, &tmp); } ND(1, "revoked %d buffers from %s", j, lq->name); lp->ctr.drop += j; dropped += j; } free_buf = oq_deq(freeq).buf_idx; oq_enq(q, rs); forward: rs->buf_idx = free_buf; rs->flags |= NS_BUF_CHANGED; next: rxring->head = rxring->cur = next_cur; batch++; if (unlikely(batch >= glob_arg.batch)) { ioctl(rxport->nmd->fd, NIOCRXSYNC, NULL); batch = 0; } ND(1, "Forwarded Packets: %"PRIu64" Dropped packets: %"PRIu64" Percent: %.2f", forwarded, dropped, ((float)dropped / (float)forwarded * 100)); } } } pthread_join(stat_thread, NULL); printf("%"PRIu64" packets forwarded. %"PRIu64" packets dropped. Total %"PRIu64"\n", forwarded, dropped, forwarded + dropped); return 0; }
int main(int argc, char **argv) { int ch; uint32_t i; int rv; unsigned int iter = 0; glob_arg.ifname[0] = '\0'; glob_arg.output_rings = 0; glob_arg.batch = DEF_BATCH; glob_arg.syslog_interval = DEF_SYSLOG_INT; while ( (ch = getopt(argc, argv, "i:p:b:B:s:")) != -1) { switch (ch) { case 'i': D("interface is %s", optarg); if (strlen(optarg) > MAX_IFNAMELEN - 8) { D("ifname too long %s", optarg); return 1; } if (strncmp(optarg, "netmap:", 7) && strncmp(optarg, "vale", 4)) { sprintf(glob_arg.ifname, "netmap:%s", optarg); } else { strcpy(glob_arg.ifname, optarg); } break; case 'p': if (parse_pipes(optarg)) { usage(); return 1; } break; case 'B': glob_arg.extra_bufs = atoi(optarg); D("requested %d extra buffers", glob_arg.extra_bufs); break; case 'b': glob_arg.batch = atoi(optarg); D("batch is %d", glob_arg.batch); break; case 's': glob_arg.syslog_interval = atoi(optarg); D("syslog interval is %d", glob_arg.syslog_interval); break; default: D("bad option %c %s", ch, optarg); usage(); return 1; } } if (glob_arg.ifname[0] == '\0') { D("missing interface name"); usage(); return 1; } /* extract the base name */ char *nscan = strncmp(glob_arg.ifname, "netmap:", 7) ? glob_arg.ifname : glob_arg.ifname + 7; strncpy(glob_arg.base_name, nscan, MAX_IFNAMELEN); for (nscan = glob_arg.base_name; *nscan && !index("-*^{}/@", *nscan); nscan++) ; *nscan = '\0'; if (glob_arg.num_groups == 0) parse_pipes(""); setlogmask(LOG_UPTO(LOG_INFO)); openlog("lb", LOG_CONS | LOG_PID | LOG_NDELAY, LOG_LOCAL1); uint32_t npipes = glob_arg.output_rings; pthread_t stat_thread; ports = calloc(npipes + 1, sizeof(struct port_des)); if (!ports) { D("failed to allocate the stats array"); return 1; } struct port_des *rxport = &ports[npipes]; init_groups(); if (pthread_create(&stat_thread, NULL, print_stats, NULL) == -1) { D("unable to create the stats thread: %s", strerror(errno)); return 1; } /* we need base_req to specify pipes and extra bufs */ struct nmreq base_req; memset(&base_req, 0, sizeof(base_req)); base_req.nr_arg1 = npipes; base_req.nr_arg3 = glob_arg.extra_bufs; rxport->nmd = nm_open(glob_arg.ifname, &base_req, 0, NULL); if (rxport->nmd == NULL) { D("cannot open %s", glob_arg.ifname); return (1); } else { D("successfully opened %s (tx rings: %u)", glob_arg.ifname, rxport->nmd->req.nr_tx_slots); } uint32_t extra_bufs = rxport->nmd->req.nr_arg3; struct overflow_queue *oq = NULL; /* reference ring to access the buffers */ rxport->ring = NETMAP_RXRING(rxport->nmd->nifp, 0); if (!glob_arg.extra_bufs) goto run; D("obtained %d extra buffers", extra_bufs); if (!extra_bufs) goto run; /* one overflow queue for each output pipe, plus one for the * free extra buffers */ oq = calloc(npipes + 1, sizeof(struct overflow_queue)); if (!oq) { D("failed to allocated overflow queues descriptors"); goto run; } freeq = &oq[npipes]; rxport->oq = freeq; freeq->slots = calloc(extra_bufs, sizeof(struct netmap_slot)); if (!freeq->slots) { D("failed to allocate the free list"); } freeq->size = extra_bufs; snprintf(freeq->name, MAX_IFNAMELEN, "free queue"); /* * the list of buffers uses the first uint32_t in each buffer * as the index of the next buffer. */ uint32_t scan; for (scan = rxport->nmd->nifp->ni_bufs_head; scan; scan = *(uint32_t *)NETMAP_BUF(rxport->ring, scan)) { struct netmap_slot s; s.buf_idx = scan; ND("freeq <- %d", s.buf_idx); oq_enq(freeq, &s); } if (freeq->n != extra_bufs) { D("something went wrong: netmap reported %d extra_bufs, but the free list contained %d", extra_bufs, freeq->n); return 1; } rxport->nmd->nifp->ni_bufs_head = 0; run: /* we need to create the persistent vale ports */ if (create_custom_ports(rxport->nmd->req.nr_arg2)) { free_buffers(); return 1; } atexit(delete_custom_ports); atexit(free_buffers); int j, t = 0; for (j = 0; j < glob_arg.num_groups; j++) { struct group_des *g = &groups[j]; int k; for (k = 0; k < g->nports; ++k) { struct port_des *p = &g->ports[k]; char interface[25]; sprintf(interface, "netmap:%s{%d/xT", g->pipename, g->first_id + k); D("opening pipe named %s", interface); p->nmd = nm_open(interface, NULL, 0, rxport->nmd); if (p->nmd == NULL) { D("cannot open %s", interface); return (1); } else { D("successfully opened pipe #%d %s (tx slots: %d)", k + 1, interface, p->nmd->req.nr_tx_slots); p->ring = NETMAP_TXRING(p->nmd->nifp, 0); } D("zerocopy %s", (rxport->nmd->mem == p->nmd->mem) ? "enabled" : "disabled"); if (extra_bufs) { struct overflow_queue *q = &oq[t + k]; q->slots = calloc(extra_bufs, sizeof(struct netmap_slot)); if (!q->slots) { D("failed to allocate overflow queue for pipe %d", k); /* make all overflow queue management fail */ extra_bufs = 0; } q->size = extra_bufs; snprintf(q->name, MAX_IFNAMELEN, "oq %s{%d", g->pipename, k); p->oq = q; } } t += g->nports; } if (glob_arg.extra_bufs && !extra_bufs) { if (oq) { for (i = 0; i < npipes + 1; i++) { free(oq[i].slots); oq[i].slots = NULL; } free(oq); oq = NULL; } D("*** overflow queues disabled ***"); } sleep(2); struct pollfd pollfd[npipes + 1]; memset(&pollfd, 0, sizeof(pollfd)); signal(SIGINT, sigint_h); while (!do_abort) { u_int polli = 0; iter++; for (i = 0; i < npipes; ++i) { struct netmap_ring *ring = ports[i].ring; if (nm_ring_next(ring, ring->tail) == ring->cur) { /* no need to poll, there are no packets pending */ continue; } pollfd[polli].fd = ports[i].nmd->fd; pollfd[polli].events = POLLOUT; pollfd[polli].revents = 0; ++polli; } pollfd[polli].fd = rxport->nmd->fd; pollfd[polli].events = POLLIN; pollfd[polli].revents = 0; ++polli; //RD(5, "polling %d file descriptors", polli+1); rv = poll(pollfd, polli, 10); if (rv <= 0) { if (rv < 0 && errno != EAGAIN && errno != EINTR) RD(1, "poll error %s", strerror(errno)); continue; } if (oq) { /* try to push packets from the overflow queues * to the corresponding pipes */ for (i = 0; i < npipes; i++) { struct port_des *p = &ports[i]; struct overflow_queue *q = p->oq; struct group_des *g = p->group; uint32_t j, lim; struct netmap_ring *ring; struct netmap_slot *slot; if (oq_empty(q)) continue; ring = p->ring; lim = nm_ring_space(ring); if (!lim) continue; if (q->n < lim) lim = q->n; for (j = 0; j < lim; j++) { struct netmap_slot s = oq_deq(q), tmp; tmp.ptr = 0; slot = &ring->slot[ring->cur]; if (slot->ptr && !g->last) { tmp.buf_idx = forward_packet(g + 1, slot); /* the forwarding may have removed packets * from the current queue */ if (q->n < lim) lim = q->n; } else { tmp.buf_idx = slot->buf_idx; } oq_enq(freeq, &tmp); *slot = s; slot->flags |= NS_BUF_CHANGED; ring->cur = nm_ring_next(ring, ring->cur); } ring->head = ring->cur; forwarded += lim; p->ctr.pkts += lim; } } int batch = 0; for (i = rxport->nmd->first_rx_ring; i <= rxport->nmd->last_rx_ring; i++) { struct netmap_ring *rxring = NETMAP_RXRING(rxport->nmd->nifp, i); //D("prepare to scan rings"); int next_cur = rxring->cur; struct netmap_slot *next_slot = &rxring->slot[next_cur]; const char *next_buf = NETMAP_BUF(rxring, next_slot->buf_idx); while (!nm_ring_empty(rxring)) { struct netmap_slot *rs = next_slot; struct group_des *g = &groups[0]; // CHOOSE THE CORRECT OUTPUT PIPE uint32_t hash = pkt_hdr_hash((const unsigned char *)next_buf, 4, 'B'); if (hash == 0) { non_ip++; // XXX ?? } rs->ptr = hash | (1UL << 32); // prefetch the buffer for the next round next_cur = nm_ring_next(rxring, next_cur); next_slot = &rxring->slot[next_cur]; next_buf = NETMAP_BUF(rxring, next_slot->buf_idx); __builtin_prefetch(next_buf); // 'B' is just a hashing seed rs->buf_idx = forward_packet(g, rs); rs->flags |= NS_BUF_CHANGED; rxring->head = rxring->cur = next_cur; batch++; if (unlikely(batch >= glob_arg.batch)) { ioctl(rxport->nmd->fd, NIOCRXSYNC, NULL); batch = 0; } ND(1, "Forwarded Packets: %"PRIu64" Dropped packets: %"PRIu64" Percent: %.2f", forwarded, dropped, ((float)dropped / (float)forwarded * 100)); } } } pthread_join(stat_thread, NULL); printf("%"PRIu64" packets forwarded. %"PRIu64" packets dropped. Total %"PRIu64"\n", forwarded, dropped, forwarded + dropped); return 0; }
static int pcap_netmap_dispatch(pcap_t *p, int cnt, pcap_handler cb, u_char *user) { int ret; struct pcap_netmap *pn = NM_PRIV(p); struct nm_desc *d = pn->d; struct pollfd pfd = { .fd = p->fd, .events = POLLIN, .revents = 0 }; pn->cb = cb; pn->cb_arg = user; for (;;) { if (p->break_loop) { p->break_loop = 0; return PCAP_ERROR_BREAK; } /* nm_dispatch won't run forever */ ret = nm_dispatch((void *)d, cnt, (void *)pcap_netmap_filter, (void *)p); if (ret != 0) break; errno = 0; ret = poll(&pfd, 1, p->the_timeout); } return ret; } /* XXX need to check the NIOCTXSYNC/poll */ static int pcap_netmap_inject(pcap_t *p, const void *buf, size_t size) { struct nm_desc *d = NM_PRIV(p)->d; return nm_inject(d, buf, size); } static int pcap_netmap_ioctl(pcap_t *p, u_long what, uint32_t *if_flags) { struct pcap_netmap *pn = NM_PRIV(p); struct nm_desc *d = pn->d; struct ifreq ifr; int error, fd = d->fd; #ifdef linux fd = socket(AF_INET, SOCK_DGRAM, 0); if (fd < 0) { fprintf(stderr, "Error: cannot get device control socket.\n"); return -1; } #endif /* linux */ bzero(&ifr, sizeof(ifr)); strncpy(ifr.ifr_name, d->req.nr_name, sizeof(ifr.ifr_name)); switch (what) { case SIOCSIFFLAGS: ifr.ifr_flags = *if_flags; #ifdef __FreeBSD__ ifr.ifr_flagshigh = *if_flags >> 16; #endif /* __FreeBSD__ */ break; } error = ioctl(fd, what, &ifr); if (!error) { switch (what) { case SIOCGIFFLAGS: *if_flags = ifr.ifr_flags; #ifdef __FreeBSD__ *if_flags |= (ifr.ifr_flagshigh << 16); #endif /* __FreeBSD__ */ } } #ifdef linux close(fd); #endif /* linux */ return error ? -1 : 0; } static void pcap_netmap_close(pcap_t *p) { struct pcap_netmap *pn = NM_PRIV(p); struct nm_desc *d = pn->d; uint32_t if_flags = 0; if (pn->must_clear_promisc) { pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */ if (if_flags & IFF_PPROMISC) { if_flags &= ~IFF_PPROMISC; pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags); } } nm_close(d); #ifdef HAVE_NO_PRIV free(pn); SET_PRIV(p, NULL); // unnecessary #endif pcap_cleanup_live_common(p); } static int pcap_netmap_activate(pcap_t *p) { struct pcap_netmap *pn = NM_PRIV(p); struct nm_desc *d = nm_open(p->opt.source, NULL, 0, NULL); uint32_t if_flags = 0; if (d == NULL) { snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "netmap open: cannot access %s: %s\n", p->opt.source, pcap_strerror(errno)); #ifdef HAVE_NO_PRIV free(pn); SET_PRIV(p, NULL); // unnecessary #endif pcap_cleanup_live_common(p); return (PCAP_ERROR); } if (0) fprintf(stderr, "%s device %s priv %p fd %d ports %d..%d\n", __FUNCTION__, p->opt.source, d, d->fd, d->first_rx_ring, d->last_rx_ring); pn->d = d; p->fd = d->fd; if (p->opt.promisc && !(d->req.nr_ringid & NETMAP_SW_RING)) { pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */ if (!(if_flags & IFF_PPROMISC)) { pn->must_clear_promisc = 1; if_flags |= IFF_PPROMISC; pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags); } } p->linktype = DLT_EN10MB; p->selectable_fd = p->fd; p->read_op = pcap_netmap_dispatch; p->inject_op = pcap_netmap_inject, p->setfilter_op = install_bpf_program; p->setdirection_op = NULL; p->set_datalink_op = NULL; p->getnonblock_op = pcap_getnonblock_fd; p->setnonblock_op = pcap_setnonblock_fd; p->stats_op = pcap_netmap_stats; p->cleanup_op = pcap_netmap_close; return (0); } pcap_t * pcap_netmap_create(const char *device, char *ebuf, int *is_ours) { pcap_t *p; *is_ours = (!strncmp(device, "netmap:", 7) || !strncmp(device, "vale", 4)); if (! *is_ours) return NULL; #ifdef HAVE_NO_PRIV { void *pn = calloc(1, sizeof(struct pcap_netmap)); if (pn == NULL) return NULL; p = pcap_create_common(device, ebuf); if (p == NULL) { free(pn); return NULL; } SET_PRIV(p, pn); } #else p = pcap_create_common(device, ebuf, sizeof (struct pcap_netmap)); if (p == NULL) return (NULL); #endif p->activate_op = pcap_netmap_activate; return (p); }