bool netmap_tx_queues_empty(void *p) { sendpacket_t *sp = p; struct netmap_ring *txring; assert(sp); txring = NETMAP_TXRING(sp->nm_if, sp->cur_tx_ring); while (NETMAP_TX_RING_EMPTY(txring)) { /* current ring is empty- go to next */ ++sp->cur_tx_ring; if (sp->cur_tx_ring > sp->last_tx_ring) /* last ring */ return true; txring = NETMAP_TXRING(sp->nm_if, sp->cur_tx_ring); } /* * send TX interrupt signal */ ioctl(sp->handle.fd, NIOCTXSYNC, NULL); return false; }
/* The exported init function * * ... -net netmap,ifname="..." */ int net_init_netmap(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp) { const NetdevNetmapOptions *netmap_opts = &netdev->u.netmap; struct nm_desc *nmd; NetClientState *nc; Error *err = NULL; NetmapState *s; nmd = netmap_open(netmap_opts, &err); if (err) { error_propagate(errp, err); return -1; } /* Create the object. */ nc = qemu_new_net_client(&net_netmap_info, peer, "netmap", name); s = DO_UPCAST(NetmapState, nc, nc); s->nmd = nmd; s->tx = NETMAP_TXRING(nmd->nifp, 0); s->rx = NETMAP_RXRING(nmd->nifp, 0); s->vnet_hdr_len = 0; pstrcpy(s->ifname, sizeof(s->ifname), netmap_opts->ifname); netmap_read_poll(s, true); /* Initially only poll for reads. */ return 0; }
void ether_bridge(struct nm_if *nmif, int ring, char *inbuf, int len) { char *buf; struct netmap_if *ifp; struct netmap_ring *nring; struct nm_if *parentif; parentif = NETMAP_PARENTIF(nmif); ifp = parentif->nm_if_ifp; if (NETMAP_HOST_RING(parentif, ring)) nring = netmap_hw_tx_ring(ifp); else nring = NETMAP_TXRING(ifp, ifp->ni_tx_rings); buf = NETMAP_GET_BUF(nring); if (buf == NULL) { DPRINTF("%s: no available buffer for tx (%s).\n", __func__, nmif->nm_if_name); parentif->nm_if_txsync = 1; pktcnt.tx_drop++; return; } /* Copy the payload. */ memcpy(buf, inbuf, len); NETMAP_UPDATE_LEN(nring, len); /* Update the current ring slot. */ NETMAP_RING_NEXT(nring); pktcnt.tx_pkts++; parentif->nm_if_txsync = 1; }
/* move packts from src to destination */ static int move(struct thr_ctx *th, struct nm_desc *src, struct nm_desc *dst, u_int limit) { struct netmap_ring *txring, *rxring; u_int m = 0, si = src->first_rx_ring, di = dst->first_tx_ring; const char *msg = (src->req.nr_ringid & NETMAP_SW_RING) ? "host->net" : "net->host"; while (si <= src->last_rx_ring && di <= dst->last_tx_ring) { rxring = NETMAP_RXRING(src->nifp, si); txring = NETMAP_TXRING(dst->nifp, di); ND("txring %p rxring %p", txring, rxring); if (nm_ring_empty(rxring)) { si++; continue; } if (nm_ring_empty(txring)) { di++; continue; } m += process_rings(th, rxring, txring, limit, msg); } return (m); }
int pcap_inject(pcap_t *p, const void *buf, size_t size) { struct my_ring *me = p; u_int si; ND("cnt %d", cnt); /* scan all rings */ for (si = me->begin; si < me->end; si++) { struct netmap_ring *ring = NETMAP_TXRING(me->nifp, si); ND("ring has %d pkts", ring->avail); if (ring->avail == 0) continue; u_int i = ring->cur; u_int idx = ring->slot[i].buf_idx; if (idx < 2) { D("%s bogus TX index %d at offset %d", me->nifp->ni_name, idx, i); sleep(2); } u_char *dst = (u_char *)NETMAP_BUF(ring, idx); ring->slot[i].len = size; pkt_copy(buf, dst, size); ring->cur = NETMAP_RING_NEXT(ring, i); ring->avail--; // if (ring->avail == 0) ioctl(me->fd, NIOCTXSYNC, NULL); return size; } errno = ENOBUFS; return -1; }
void netmap_wait() { int i; struct pollfd pfd = { .fd = g_nmd->fd, .events = POLLOUT }; for (i = g_nmd->first_tx_ring; i <= g_nmd->last_tx_ring; i++) { struct netmap_ring *txring = NETMAP_TXRING(g_nmd->nifp, i); while (nm_tx_pending(txring)) { ioctl(pfd.fd, NIOCTXSYNC, NULL); usleep(1); // wait 1 tick } } } /* sysctl wrapper to return the number of active CPUs */ int system_ncpus(void) { int ncpus; #if defined (__FreeBSD__) int mib[2] = { CTL_HW, HW_NCPU }; size_t len = sizeof(mib); sysctl(mib, 2, &ncpus, &len, NULL, 0); #elif defined(linux) ncpus = sysconf(_SC_NPROCESSORS_ONLN); #else /* others */ ncpus = 1; #endif /* others */ return (ncpus); }
/* move packts from src to destination */ static int move(struct my_ring *src, struct my_ring *dst, u_int limit) { struct netmap_ring *txring, *rxring; u_int m = 0, si = src->begin, di = dst->begin; const char *msg = (src->queueid & NETMAP_SW_RING) ? "host->net" : "net->host"; while (si < src->end && di < dst->end) { rxring = NETMAP_RXRING(src->nifp, si); txring = NETMAP_TXRING(dst->nifp, di); ND("txring %p rxring %p", txring, rxring); if (rxring->avail == 0) { si++; continue; } if (txring->avail == 0) { di++; continue; } m += process_rings(rxring, txring, limit, msg); } return (m); }
struct if_netmap_host_context * if_netmap_register_if(int nmfd, const char *ifname, unsigned int isvale, unsigned int qno) { struct if_netmap_host_context *ctx; ctx = calloc(1, sizeof(struct if_netmap_host_context)); if (NULL == ctx) return (NULL); ctx->fd = nmfd; ctx->isvale = isvale; ctx->ifname = ifname; /* * Disable TCP and checksum offload, which can impact throughput * and also cause packets to be dropped or modified gratuitously. * * Also disable VLAN offload/filtering - we want to talk straight to * the wire. * */ if (!ctx->isvale) { if (0 != if_netmap_set_offload(ctx, 0)) { goto fail; } if (0 != if_netmap_set_promisc(ctx, 1)) { goto fail; } } ctx->req.nr_version = NETMAP_API; ctx->req.nr_ringid = NETMAP_NO_TX_POLL | NETMAP_HW_RING | qno; snprintf(ctx->req.nr_name, sizeof(ctx->req.nr_name), "%s", ifname); if (-1 == ioctl(ctx->fd, NIOCREGIF, &ctx->req)) { goto fail; } ctx->mem = uhi_mmap(NULL, ctx->req.nr_memsize, UHI_PROT_READ | UHI_PROT_WRITE, UHI_MAP_NOCORE | UHI_MAP_SHARED, ctx->fd, 0); if (MAP_FAILED == ctx->mem) { goto fail; } ctx->hw_rx_ring = NETMAP_RXRING(NETMAP_IF(ctx->mem, ctx->req.nr_offset), qno); ctx->hw_tx_ring = NETMAP_TXRING(NETMAP_IF(ctx->mem, ctx->req.nr_offset), qno); /* NIOCREGIF will reset the hardware rings, but the reserved count * might still be non-zero from a previous user's activities */ ctx->hw_rx_ring->reserved = 0; return (ctx); fail: free(ctx); return(NULL); }
/* * how many packets on this set of queues ? */ static int pkt_queued(struct my_ring *me, int tx) { u_int i, tot = 0; ND("me %p begin %d end %d", me, me->begin, me->end); for (i = me->begin; i < me->end; i++) { struct netmap_ring *ring = tx ? NETMAP_TXRING(me->nifp, i) : NETMAP_RXRING(me->nifp, i); tot += ring->avail; } if (0 && verbose && tot && !tx) D("ring %s %s %s has %d avail at %d", me->ifname, tx ? "tx": "rx", me->end >= me->nifp->ni_tx_rings ? // XXX who comes first ? "host":"net", tot, NETMAP_TXRING(me->nifp, me->begin)->cur); return tot; }
struct nm_desc* usnet_init( struct nm_desc *gg_nmd, const char *dev_name, u_int flags) { struct nmreq nmr; struct nm_desc *nmd = NULL; struct netmap_if *nifp = NULL; struct netmap_ring *txr, *rxr; signal(SIGINT, sigint_h); bzero(&nmr, sizeof(nmr)); strcpy(nmr.nr_name, dev_name); // XXX: which netmap flags? //nmr.nr_flags = NR_REG_ALL_NIC; //| flags; printf("nm_open: %s\n", nmr.nr_name); nmd = nm_open(nmr.nr_name, &nmr, 0, NULL); if ( nmd == NULL ) { DEBUG("Cannot open interface %s", nmr.nr_name); exit(1); } nifp = nmd->nifp; txr = NETMAP_TXRING(nifp, 0); rxr = NETMAP_RXRING(nifp, 0); printf("nmreq info, name=%s, version=%d," " flags=%d, memsize=%d," " ni_tx_rings=%d, ni_rx_rings=%d, num_tx_slots=%d, num_rx_slot=%d \n", nifp->ni_name, nifp->ni_version, nifp->ni_flags, nmd->memsize, nifp->ni_tx_rings, nifp->ni_rx_rings, txr->num_slots, rxr->num_slots); memset(&g_config, 0, sizeof(g_config)); g_config.burst = 1000; g_config.tx_rate = 0; memset(&g_ipq, 0, sizeof(g_ipq)); usnet_init_internal(); usnet_route_init(); usnet_network_init(); usnet_udp_init(); usnet_ipv4_init(); usnet_socket_init(); return nmd; }
int nm_ring (char * ifname, int q, struct netmap_ring ** ring, int x, int w) { int fd; char * mem; struct nmreq nmr; struct netmap_if * nifp; /* open netmap for ring */ fd = open ("/dev/netmap", O_RDWR); if (fd < 0) { D ("unable to open /dev/netmap"); return -1; } memset (&nmr, 0, sizeof (nmr)); strcpy (nmr.nr_name, ifname); nmr.nr_version = NETMAP_API; nmr.nr_ringid = q | (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); if (w) nmr.nr_flags |= NR_REG_ONE_NIC; else nmr.nr_flags |= NR_REG_ALL_NIC; if (vale_rings && strncmp (ifname, "vale", 4) == 0) { nmr.nr_rx_rings = vale_rings; nmr.nr_tx_rings = vale_rings; } if (ioctl (fd, NIOCREGIF, &nmr) < 0) { D ("unable to register interface %s", ifname); return -1; } mem = mmap (NULL, nmr.nr_memsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (mem == MAP_FAILED) { D ("unable to mmap"); return -1; } nifp = NETMAP_IF (mem, nmr.nr_offset); if (x > 0) *ring = NETMAP_TXRING (nifp, q); else *ring = NETMAP_RXRING (nifp, q); return fd; }
void usnet_netmap_flush() { int i; ioctl(g_nmd->fd, NIOCTXSYNC, NULL); /* final part: wait all the TX queues to be empty. */ for (i = g_nmd->first_tx_ring; i <= g_nmd->last_tx_ring; i++) { struct netmap_ring *txring = NETMAP_TXRING(g_nmd->nifp, i); while (nm_tx_pending(txring)) { ioctl(g_nmd->fd, NIOCTXSYNC, NULL); usleep(1); /* wait 1 tick */ } } }
void VIFHYPER_SEND(struct virtif_user *viu, struct iovec *iov, size_t iovlen) { void *cookie = NULL; /* XXXgcc */ struct netmap_if *nifp = viu->nm_nifp; struct netmap_ring *ring = NETMAP_TXRING(nifp, 0); char *p; int retries; int unscheduled = 0; unsigned n; DPRINTF(("sending pkt via netmap len %d\n", (int)iovlen)); for (retries = 10; !(n = nm_ring_space(ring)) && retries > 0; retries--) { struct pollfd pfd; if (!unscheduled) { cookie = rumpuser_component_unschedule(); unscheduled = 1; } pfd.fd = viu->viu_fd; pfd.events = POLLOUT; DPRINTF(("cannot send on netmap, ring full\n")); (void)poll(&pfd, 1, 500 /* ms */); } if (n > 0) { int i, totlen = 0; struct netmap_slot *slot = &ring->slot[ring->cur]; #define MAX_BUF_SIZE 1900 p = NETMAP_BUF(ring, slot->buf_idx); for (i = 0; totlen < MAX_BUF_SIZE && i < iovlen; i++) { int n = iov[i].iov_len; if (totlen + n > MAX_BUF_SIZE) { n = MAX_BUF_SIZE - totlen; DPRINTF(("truncating long pkt")); } memcpy(p + totlen, iov[i].iov_base, n); totlen += n; } #undef MAX_BUF_SIZE slot->len = totlen; ring->head = ring->cur = nm_ring_next(ring, ring->cur); if (ioctl(viu->viu_fd, NIOCTXSYNC, NULL) < 0) perror("NIOCTXSYNC"); } if (unscheduled) rumpuser_component_schedule(cookie); }
static int netmap_port_open(uint32_t idx) { int err; struct netmap_port *port; struct nmreq req; port = ports.p + idx; port->fd = rte_netmap_open("/dev/netmap", O_RDWR); snprintf(req.nr_name, sizeof(req.nr_name), "%s", port->str); req.nr_version = NETMAP_API; req.nr_ringid = 0; err = rte_netmap_ioctl(port->fd, NIOCGINFO, &req); if (err) { printf("[E] NIOCGINFO ioctl failed (error %d)\n", err); return err; } snprintf(req.nr_name, sizeof(req.nr_name), "%s", port->str); req.nr_version = NETMAP_API; req.nr_ringid = 0; err = rte_netmap_ioctl(port->fd, NIOCREGIF, &req); if (err) { printf("[E] NIOCREGIF ioctl failed (error %d)\n", err); return err; } /* mmap only once. */ if (ports.mem == NULL) ports.mem = rte_netmap_mmap(NULL, req.nr_memsize, PROT_WRITE | PROT_READ, MAP_PRIVATE, port->fd, 0); if (ports.mem == MAP_FAILED) { printf("[E] NETMAP mmap failed for fd: %d)\n", port->fd); return -ENOMEM; } port->nmif = NETMAP_IF(ports.mem, req.nr_offset); port->tx_ring = NETMAP_TXRING(port->nmif, 0); port->rx_ring = NETMAP_RXRING(port->nmif, 0); return 0; }
/* * how many packets on this set of queues ? * * Receive: how many frames in the receive path. * Transmit: how many slots are available to transmit. */ int pkt_queued(struct nm_desc *d, int tx) { u_int i, tot = 0; if (tx) { for (i = d->first_tx_ring; i <= d->last_tx_ring; i++) { tot += nm_ring_space(NETMAP_TXRING(d->nifp, i)); } } else { for (i = d->first_rx_ring; i <= d->last_rx_ring; i++) { tot += nm_ring_space(NETMAP_RXRING(d->nifp, i)); } } return tot; }
/** * Open netmap ring. * @param[in,out] ring * @param[in] ringid Ring ID. * @param[in] cached_mmap_mem Pointer to already mmapped shared netmap memory. */ int znm_open(struct znm_ring *ring, const char *ifname, uint16_t ringid, void *cached_mmap_mem) { struct nmreq req; ring->fd = open(ZNM_DEVICE, O_RDWR); if (ring->fd < 0) { ZERO_ELOG(LOG_ERR, "Unable to open %s", ZNM_DEVICE); return -1; } memset(&req, 0, sizeof(req)); req.nr_version = NETMAP_API; strncpy(req.nr_name, ifname, sizeof(req.nr_name)); req.nr_ringid = ringid; req.nr_flags = NR_REG_ONE_NIC; if (0 == ioctl(ring->fd, NIOCGINFO, &req)) { ring->memsize = req.nr_memsize; if (0 == ioctl(ring->fd, NIOCREGIF, &req)) { if (NULL != cached_mmap_mem) { ring->mem = cached_mmap_mem; } else { ring->mem = mmap(0, ring->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, ring->fd, 0); ring->own_mmap = 1; } if (MAP_FAILED != ring->mem) { ZERO_LOG(LOG_DEBUG, "Attached to %s HW ring %u", ifname, ringid); ring->nifp = NETMAP_IF(ring->mem, req.nr_offset); ring->tx = NETMAP_TXRING(ring->nifp, ringid); ring->rx = NETMAP_RXRING(ring->nifp, ringid); // Success. return 0; } else { ring->mem = NULL; ZERO_ELOG(LOG_ERR, "Unable to mmap netmap shared memory"); } } else { ZERO_ELOG(LOG_ERR, "Unable to register %s with netmap", ifname); } } else { ZERO_ELOG(LOG_ERR, "Unable to query netmap for '%s' info", ifname); } close(ring->fd); return -1; }
int usnet_setup(int argc, char *argv[]) { //struct nm_desc *nmd; char *p; int ret; (void)argc; (void)argv; (void)p; //(void)nmd; setaffinity(0); ret = usnet_get_options(argc, argv); if ( ret < 0 ) { show_help(); exit(0); } g_nmd = usnet_init(g_nmd, (char*)g_interface, 0); if (1) { struct netmap_if *nifp = g_nmd->nifp; struct nmreq *req = &g_nmd->req; int i; D("fisrt_tx_ring=%d, last_tx_ring=%d", g_nmd->first_tx_ring, g_nmd->last_tx_ring); D("nifp at offset %d, %d tx %d rx region %d", req->nr_offset, req->nr_tx_rings, req->nr_rx_rings, req->nr_arg2); for (i = 0; i <= req->nr_tx_rings; i++) { struct netmap_ring *ring = NETMAP_TXRING(nifp, i); D(" TX%d at 0x%p slots %d", i, (void *)((char *)ring - (char *)nifp), ring->num_slots); } for (i = 0; i <= req->nr_rx_rings; i++) { struct netmap_ring *ring = NETMAP_RXRING(nifp, i); D(" RX%d at 0x%p slots %d", i, (void *)((char *)ring - (char *)nifp), ring->num_slots); } } return 0; }
struct netmap_ring * get_ring() { struct netmap_if *nifp; char *arg; unsigned int ringid; /* defaults */ ringid = 0; /* first arg: ring number */ arg = nextarg(); if (!arg) goto doit; ringid = strtoul(arg, NULL, 0); doit: nifp = get_if(); return NETMAP_TXRING(nifp, ringid); }
static int tx_sync_if(uint32_t port) { uint16_t burst; uint32_t i, rc; struct netmap_if *nifp; struct netmap_ring *r; struct rte_mempool *mp; nifp = ports[port].nmif; mp = ports[port].pool; burst = ports[port].tx_burst; rc = 0; for (i = 0; i < nifp->ni_tx_rings + 1; i++) { r = NETMAP_TXRING(nifp, i); tx_sync_ring(r, (uint8_t)port, (uint16_t)i, mp, burst); rc += r->avail; } return (rc); }
static __inline int pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt) { int r, i; int len = 0; for (r = nmd->cur_tx_ring; ; ) { struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r); uint32_t cur, idx; char *buf; if (nm_ring_empty(ring)) { r++; if (r > nmd->last_tx_ring) r = nmd->first_tx_ring; if (r == nmd->cur_tx_ring) break; continue; } cur = ring->cur; idx = ring->slot[cur].buf_idx; buf = NETMAP_BUF(ring, idx); for (i = 0; i < iovcnt; i++) { if (len + iov[i].iov_len > 2048) break; memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len); len += iov[i].iov_len; } ring->slot[cur].len = len; ring->head = ring->cur = nm_ring_next(ring, cur); nmd->cur_tx_ring = r; ioctl(nmd->fd, NIOCTXSYNC, NULL); break; } return (len); }
int sendpacket_send_netmap(void *p, const u_char *data, size_t len) { sendpacket_t *sp = p; struct netmap_ring *txring; struct netmap_slot *slot; char *pkt; uint32_t cur, avail; if (sp->abort) return 0; txring = NETMAP_TXRING(sp->nm_if, sp->cur_tx_ring); while ((avail = nm_ring_space(txring)) == 0) { /* out of space on current TX queue - go to next */ ++sp->cur_tx_ring; if (sp->cur_tx_ring > sp->last_tx_ring) { /* * out of space on all queues * * we have looped through all configured TX queues * so we have to reset to the first queue and * wait for available space */ sp->cur_tx_ring = sp->first_tx_ring; /* send TX interrupt signal * * On Linux this makes one slot free on the * ring, which increases speed by about 10Mbps. * * But it will never free up all the slots. For * that we must poll and call again. */ ioctl(sp->handle.fd, NIOCTXSYNC, NULL); /* loop again */ return -2; } txring = NETMAP_TXRING(sp->nm_if, sp->cur_tx_ring); } /* * send */ cur = txring->cur; slot = &txring->slot[cur]; slot->flags = 0; pkt = NETMAP_BUF(txring, slot->buf_idx); memcpy(pkt, data, min(len, txring->nr_buf_size)); slot->len = len; if (avail <= 1) slot->flags = NS_REPORT; dbgx(3, "netmap cur=%d slot index=%d flags=0x%x empty=%d avail=%u bufsize=%d\n", cur, slot->buf_idx, slot->flags, NETMAP_TX_RING_EMPTY(txring), nm_ring_space(txring), txring->nr_buf_size); /* let kernel know that packet is available */ cur = NETMAP_RING_NEXT(txring, cur); #ifdef HAVE_NETMAP_RING_HEAD_TAIL txring->head = cur; #else txring->avail--; #endif txring->cur = cur; return len; }
static void * pinger_body(void *data) { struct targ *targ = (struct targ *) data; struct pollfd fds[1]; struct netmap_if *nifp = targ->nifp; int i, rx = 0, n = targ->g->npackets; void *frame; int size; frame = &targ->pkt; frame += sizeof(targ->pkt.vh) - targ->g->virt_header; size = targ->g->pkt_size + targ->g->virt_header; fds[0].fd = targ->fd; fds[0].events = (POLLIN); static uint32_t sent; struct timespec ts, now, last_print; uint32_t count = 0, min = 1000000000, av = 0; if (targ->g->nthreads > 1) { D("can only ping with 1 thread"); return NULL; } clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); now = last_print; while (n == 0 || (int)sent < n) { struct netmap_ring *ring = NETMAP_TXRING(nifp, 0); struct netmap_slot *slot; char *p; for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */ slot = &ring->slot[ring->cur]; slot->len = size; p = NETMAP_BUF(ring, slot->buf_idx); if (nm_ring_empty(ring)) { D("-- ouch, cannot send"); } else { pkt_copy(frame, p, size); clock_gettime(CLOCK_REALTIME_PRECISE, &ts); bcopy(&sent, p+42, sizeof(sent)); bcopy(&ts, p+46, sizeof(ts)); sent++; ring->head = ring->cur = nm_ring_next(ring, ring->cur); } } /* should use a parameter to decide how often to send */ if (poll(fds, 1, 3000) <= 0) { D("poll error/timeout on queue %d: %s", targ->me, strerror(errno)); continue; } /* see what we got back */ for (i = targ->qfirst; i < targ->qlast; i++) { ring = NETMAP_RXRING(nifp, i); while (!nm_ring_empty(ring)) { uint32_t seq; slot = &ring->slot[ring->cur]; p = NETMAP_BUF(ring, slot->buf_idx); clock_gettime(CLOCK_REALTIME_PRECISE, &now); bcopy(p+42, &seq, sizeof(seq)); bcopy(p+46, &ts, sizeof(ts)); ts.tv_sec = now.tv_sec - ts.tv_sec; ts.tv_nsec = now.tv_nsec - ts.tv_nsec; if (ts.tv_nsec < 0) { ts.tv_nsec += 1000000000; ts.tv_sec--; } if (1) D("seq %d/%d delta %d.%09d", seq, sent, (int)ts.tv_sec, (int)ts.tv_nsec); if (ts.tv_nsec < (int)min) min = ts.tv_nsec; count ++; av += ts.tv_nsec; ring->head = ring->cur = nm_ring_next(ring, ring->cur); rx++; } } //D("tx %d rx %d", sent, rx); //usleep(100000); ts.tv_sec = now.tv_sec - last_print.tv_sec; ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; if (ts.tv_nsec < 0) { ts.tv_nsec += 1000000000; ts.tv_sec--; } if (ts.tv_sec >= 1) { D("count %d min %d av %d", count, min, av/count); count = 0; av = 0; min = 100000000; last_print = now; } } return NULL; }
/** * Inner sendpacket_open() method for using netmap */ void * sendpacket_open_netmap(const char *device, char *errbuf, void *arg) { tcpreplay_t *ctx = (tcpreplay_t*)arg; sendpacket_t *sp = NULL; nmreq_t nmr; char ifname_buf[MAX_IFNAMELEN]; const char *ifname; const char *port = NULL; size_t namelen; u_int32_t nr_ringid = 0; u_int32_t nr_flags = NR_REG_DEFAULT; int is_default = 0; assert(device); assert(errbuf); dbg(1, "sendpacket_open_netmap: using netmap"); bzero(&nmr, sizeof(nmr)); /* prep & return our sp handle */ sp = (sendpacket_t *)safe_malloc(sizeof(sendpacket_t)); if (strlen(device) > MAX_IFNAMELEN - 8) { snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "Interface name is to long: %s\n", device); goto IFACENAME_INVALID; } /* get the version of the netmap driver. If < 0, driver is not installed */ sp->netmap_version = get_netmap_version(); if (sp->netmap_version < 0) { snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "Unable to determine the running netmap version.\n" "See INSTALL document for details on installing or upgrading netmap."); goto NETMAP_NOT_INSTALLED; } /* * Sort out interface names * * ifname (foo, netmap:foo or vale:foo) is the port name * foo bind to a single NIC hardware queue * netmap:foo bind to a single NIC hardware queue * vale:foo bind to the Vale virtual interface * * for netmap version 10+ a suffix can indicate the following: * netmap:foo! bind to all NIC hardware queues (may cause TX reordering) * netmap:foo^ bind to the host (sw) ring pair * netmap:foo* bind to the host (sw) and NIC ring pairs (transparent) * netmap:foo-NN bind to the individual NIC ring pair (queue) where NN = the ring number * netmap:foo{NN bind to the master side of pipe NN * netmap:foo}NN bind to the slave side of pipe NN */ if (strncmp(device, "netmap:", 7) && strncmp(device, "vale", 4)) { snprintf(ifname_buf, sizeof(ifname_buf), "netmap:%s", device); ifname = ifname_buf; } else { ifname = device; } if (!strncmp("vale", device, 4)) sp->is_vale = 1; if (ifname[0] == 'n') ifname += 7; /* scan for a separator */ for (port = ifname; *port && !index("!-*^{}", *port); port++) ; namelen = port - ifname; if (namelen > sizeof(nmr.nr_name)) { snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "Interface name is to long: %s\n", device); goto IFACENAME_INVALID; } /* * Open the netmap device to fetch the number of queues of our * interface. * * The first NIOCREGIF also detaches the card from the * protocol stack and may cause a reset of the card, * which in turn may take some time for the PHY to * reconfigure. */ if ((sp->handle.fd = open("/dev/netmap", O_RDWR)) < 0) { dbg(1, "sendpacket_open_netmap: Unable to access netmap"); snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "Unable to access netmap.\n" "See INSTALL to learn which NICs are supported and\n" "how to set up netmap-capable network drivers."); goto OPEN_FAILED; } /* * The nmreq structure must have the NETMAP_API version for the running machine. * However the binary may have been compiled on a different machine than the * running machine. Discover the true netmap API version, and be careful to call * fuctions that are available on all netmap versions. */ if (sp->netmap_version >= 10) { switch (*port) { case '-': /* one NIC */ nr_flags = NR_REG_ONE_NIC; nr_ringid = atoi(port + 1); break; case '*': /* NIC and SW, ignore port */ nr_flags = NR_REG_NIC_SW; if (port[1]) { snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "invalid netmap port for nic+sw"); goto NETMAP_IF_PARSE_FAIL; } break; case '^': /* only sw ring */ nr_flags = NR_REG_SW; if (port[1]) { snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "invalid port for sw ring"); goto NETMAP_IF_PARSE_FAIL; } break; case '{': nr_flags = NR_REG_PIPE_MASTER; nr_ringid = atoi(port + 1); break; case '}': nr_flags = NR_REG_PIPE_SLAVE; nr_ringid = atoi(port + 1); break; case '!': nr_flags = NR_REG_ALL_NIC; break; default: /* '\0', no suffix */ nr_flags = NR_REG_ALL_NIC; is_default = 1; break; } if (nr_ringid >= NETMAP_RING_MASK) { snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "invalid ringid"); goto NETMAP_IF_PARSE_FAIL; } nmr.nr_ringid = nr_ringid; nmr.nr_flags = nr_flags; } nmr.nr_version = sp->netmap_version; memcpy(nmr.nr_name, ifname, namelen); nmr.nr_name[namelen] = '\0'; strlcpy(sp->device, nmr.nr_name, sizeof(sp->device)); /* * Register the interface on the netmap device: from now on, * we can operate on the network interface without any * interference from the legacy network stack. * * Cards take a long time to reset the PHY. */ fprintf(stderr, "Switching network driver for %s to netmap bypass mode... ", sp->device); fflush(NULL); sleep(1); /* ensure message prints when user is connected via ssh */ if (ioctl (sp->handle.fd, NIOCREGIF, &nmr)) { snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "Failure accessing netmap.\n" "\tRequest for netmap version %u failed.\n\tCompiled netmap driver is version %u.\n\tError=%s\n", sp->netmap_version, NETMAP_API, strerror(errno)); goto NETMAP_IF_FAILED; } if (!nmr.nr_memsize) { snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "Netmap interface '%s' not configured.\n", device); goto NETMAP_IF_FAILED; } sp->mmap_size = nmr.nr_memsize; sp->mmap_addr = (struct netmap_d *)mmap (0, sp->mmap_size, PROT_WRITE | PROT_READ, MAP_SHARED, sp->handle.fd, 0); if (!sp->mmap_addr || sp->mmap_addr == MAP_FAILED) { snprintf (errbuf, SENDPACKET_ERRBUF_SIZE, "mmap: %s", strerror (errno)); goto MMAP_FAILED; } dbgx(1, "sendpacket_open_netmap: mapping %d Kbytes queues=%d", sp->mmap_size >> 10, nmr.nr_tx_rings); sp->nm_if = NETMAP_IF(sp->mmap_addr, nmr.nr_offset); sp->nmr = nmr; sp->handle_type = SP_TYPE_NETMAP; /* set up ring IDs */ sp->cur_tx_ring = 0; switch(nr_flags) { case NR_REG_DEFAULT: /* only use one queue to prevent TX reordering */ sp->first_tx_ring = sp->last_tx_ring = sp->cur_tx_ring = 0; break; case NR_REG_ALL_NIC: if (is_default) { sp->first_tx_ring = sp->last_tx_ring = sp->cur_tx_ring = 0; } else { sp->first_tx_ring = sp->cur_tx_ring = 0; sp->last_tx_ring = nmr.nr_tx_rings - 1; } break; case NR_REG_SW: sp->first_tx_ring = sp->last_tx_ring = sp->cur_tx_ring = nmr.nr_tx_rings; break; case NR_REG_NIC_SW: sp->first_tx_ring = sp->cur_tx_ring = 0; sp->last_tx_ring = nmr.nr_tx_rings; break; case NR_REG_ONE_NIC: sp->first_tx_ring = sp->last_tx_ring = sp->cur_tx_ring = nr_ringid; break; default: sp->first_tx_ring = sp->last_tx_ring = sp->cur_tx_ring = 0; } { /* debugging code */ int i; dbgx(1, "%s tx first=%d last=%d num=%d", ifname, sp->first_tx_ring, sp->last_tx_ring, sp->nmr.nr_tx_rings); for (i = 0; i <= sp->nmr.nr_tx_rings; i++) { #ifdef HAVE_NETMAP_RING_HEAD_TAIL dbgx(1, "TX%d 0x%p head=%d cur=%d tail=%d", i, NETMAP_TXRING(sp->nm_if, i), (NETMAP_TXRING(sp->nm_if, i))->head, (NETMAP_TXRING(sp->nm_if, i))->cur, (NETMAP_TXRING(sp->nm_if, i))->tail); #else dbgx(1, "TX%d 0x%p cur=%d avail=%d", i, NETMAP_TXRING(sp->nm_if, i), (NETMAP_TXRING(sp->nm_if, i))->cur, (NETMAP_TXRING(sp->nm_if, i))->avail); #endif } } dbgx(2, "Waiting %d seconds for phy reset...", ctx->options->netmap_delay); sleep(ctx->options->netmap_delay); dbg(2, "Ready!"); if (!sp->is_vale) { if (nm_do_ioctl(sp, SIOCGIFFLAGS, 0) < 0) goto NM_DO_IOCTL_FAILED; if ((sp->if_flags & IFF_RUNNING) == 0) { dbgx(1, "sendpacket_open_netmap: %s is not running", sp->device); snprintf (errbuf, SENDPACKET_ERRBUF_SIZE, "interface %s is not running - check cables\n", sp->device); goto NETMAP_IF_NOT_RUNNING; } if ((sp->if_flags & IFF_UP) == 0) { dbgx(1, "%s is down, bringing up...", sp->device); sp->if_flags |= IFF_UP; } /* set promiscuous mode */ sp->if_flags |= IFF_PROMISC; if (nm_do_ioctl(sp, SIOCSIFFLAGS, 0) < 0) goto NM_DO_IOCTL_FAILED; #ifdef linux /* disable: * - generic-segmentation-offload * - tcp-segmentation-offload * - rx-checksumming * - tx-checksumming */ if (nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_GGSO) < 0 || nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_GTSO) < 0 || nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_GRXCSUM) < 0 || nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_GTXCSUM) < 0) goto NM_DO_IOCTL_FAILED; sp->data = 0; if (nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_SGSO) < 0 || nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_STSO) < 0 || nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_SRXCSUM) < 0 || nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_STXCSUM)) goto NM_DO_IOCTL_FAILED; #endif } if(sp->abort) goto NETMAP_ABORT; notice("done!"); return sp; NM_DO_IOCTL_FAILED: snprintf (errbuf, SENDPACKET_ERRBUF_SIZE, "nm_do_ioctl: %s", strerror (errno)); NETMAP_IF_NOT_RUNNING: notice("failed!"); NETMAP_ABORT: fprintf(stderr, " Switching network driver for %s to normal mode... ", sp->device); fflush(NULL); munmap(sp->mmap_addr, sp->mmap_size); MMAP_FAILED: #if NETMAP_API < 10 ioctl(sp->handle.fd, NIOCUNREGIF, NULL); #endif NETMAP_IF_FAILED: NETMAP_IF_PARSE_FAIL: close (sp->handle.fd); OPEN_FAILED: safe_free(sp); IFACENAME_INVALID: NETMAP_NOT_INSTALLED: return NULL; }
static int netmap_regif(struct nmreq *req, uint32_t idx, uint8_t port) { struct netmap_if *nmif; struct netmap_ring *ring; uint32_t i, slots, start_ring; int32_t rc; if (ports[port].fd < RTE_DIM(fd_port)) { RTE_LOG(ERR, USER1, "port %hhu already in use by fd: %u\n", port, IDX_TO_FD(ports[port].fd)); return (-EBUSY); } if (fd_port[idx].port != FD_PORT_RSRV) { RTE_LOG(ERR, USER1, "fd: %u is misconfigured\n", IDX_TO_FD(idx)); return (-EBUSY); } nmif = ports[port].nmif; /* setup netmap_if fields. */ memset(nmif, 0, netmap.netif_memsz); /* only ALL rings supported right now. */ if (req->nr_ringid != 0) return (-EINVAL); snprintf(nmif->ni_name, sizeof(nmif->ni_name), "%s", req->nr_name); nmif->ni_version = req->nr_version; /* Netmap uses ni_(r|t)x_rings + 1 */ nmif->ni_rx_rings = ports[port].nr_rx_rings - 1; nmif->ni_tx_rings = ports[port].nr_tx_rings - 1; /* * Setup TX rings and slots. * Refer to the comments in netmap.h for details */ slots = 0; for (i = 0; i < nmif->ni_tx_rings + 1; i++) { nmif->ring_ofs[i] = NETMAP_IF_RING_OFS(i, PORT_NUM_RINGS, slots); ring = NETMAP_TXRING(nmif, i); netmap_ring_setup(ring, port, i, ports[port].nr_tx_slots); ring->avail = ring->num_slots; slots += ports[port].nr_tx_slots; } /* * Setup RX rings and slots. * Refer to the comments in netmap.h for details */ start_ring = i; for (; i < nmif->ni_rx_rings + 1 + start_ring; i++) { nmif->ring_ofs[i] = NETMAP_IF_RING_OFS(i, PORT_NUM_RINGS, slots); ring = NETMAP_RXRING(nmif, (i - start_ring)); netmap_ring_setup(ring, port, i, ports[port].nr_rx_slots); ring->avail = 0; slots += ports[port].nr_rx_slots; } if ((rc = rte_eth_dev_start(port)) < 0) { RTE_LOG(ERR, USER1, "Couldn't start ethernet device %s (error %d)\n", req->nr_name, rc); return (rc); } /* setup fdi <--> port relationtip. */ ports[port].fd = idx; fd_port[idx].port = port; req->nr_memsize = netmap.mem_sz; req->nr_offset = (uintptr_t)nmif - (uintptr_t)netmap.mem; return (0); }
static void * pinger_body(void *data) { struct targ *targ = (struct targ *) data; struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; struct netmap_if *nifp = targ->nmd->nifp; int i, rx = 0, n = targ->g->npackets; void *frame; int size; uint32_t sent = 0; struct timespec ts, now, last_print; uint32_t count = 0, min = 1000000000, av = 0; frame = &targ->pkt; frame += sizeof(targ->pkt.vh) - targ->g->virt_header; size = targ->g->pkt_size + targ->g->virt_header; if (targ->g->nthreads > 1) { D("can only ping with 1 thread"); return NULL; } clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); now = last_print; while (n == 0 || (int)sent < n) { struct netmap_ring *ring = NETMAP_TXRING(nifp, 0); struct netmap_slot *slot; char *p; for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */ slot = &ring->slot[ring->cur]; slot->len = size; p = NETMAP_BUF(ring, slot->buf_idx); if (nm_ring_empty(ring)) { D("-- ouch, cannot send"); } else { struct tstamp *tp; nm_pkt_copy(frame, p, size); clock_gettime(CLOCK_REALTIME_PRECISE, &ts); bcopy(&sent, p+42, sizeof(sent)); tp = (struct tstamp *)(p+46); tp->sec = (uint32_t)ts.tv_sec; tp->nsec = (uint32_t)ts.tv_nsec; sent++; ring->head = ring->cur = nm_ring_next(ring, ring->cur); } } /* should use a parameter to decide how often to send */ if (poll(&pfd, 1, 3000) <= 0) { D("poll error/timeout on queue %d: %s", targ->me, strerror(errno)); continue; } /* see what we got back */ for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { ring = NETMAP_RXRING(nifp, i); while (!nm_ring_empty(ring)) { uint32_t seq; struct tstamp *tp; slot = &ring->slot[ring->cur]; p = NETMAP_BUF(ring, slot->buf_idx); clock_gettime(CLOCK_REALTIME_PRECISE, &now); bcopy(p+42, &seq, sizeof(seq)); tp = (struct tstamp *)(p+46); ts.tv_sec = (time_t)tp->sec; ts.tv_nsec = (long)tp->nsec; ts.tv_sec = now.tv_sec - ts.tv_sec; ts.tv_nsec = now.tv_nsec - ts.tv_nsec; if (ts.tv_nsec < 0) { ts.tv_nsec += 1000000000; ts.tv_sec--; } if (1) D("seq %d/%d delta %d.%09d", seq, sent, (int)ts.tv_sec, (int)ts.tv_nsec); if (ts.tv_nsec < (int)min) min = ts.tv_nsec; count ++; av += ts.tv_nsec; ring->head = ring->cur = nm_ring_next(ring, ring->cur); rx++; } } //D("tx %d rx %d", sent, rx); //usleep(100000); ts.tv_sec = now.tv_sec - last_print.tv_sec; ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; if (ts.tv_nsec < 0) { ts.tv_nsec += 1000000000; ts.tv_sec--; } if (ts.tv_sec >= 1) { D("count %d min %d av %d", count, min, av/count); count = 0; av = 0; min = 100000000; last_print = now; } } return NULL; } /* * reply to ping requests */ static void * ponger_body(void *data) { struct targ *targ = (struct targ *) data; struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; struct netmap_if *nifp = targ->nmd->nifp; struct netmap_ring *txring, *rxring; int i, rx = 0, sent = 0, n = targ->g->npackets; if (targ->g->nthreads > 1) { D("can only reply ping with 1 thread"); return NULL; } D("understood ponger %d but don't know how to do it", n); while (n == 0 || sent < n) { uint32_t txcur, txavail; //#define BUSYWAIT #ifdef BUSYWAIT ioctl(pfd.fd, NIOCRXSYNC, NULL); #else if (poll(&pfd, 1, 1000) <= 0) { D("poll error/timeout on queue %d: %s", targ->me, strerror(errno)); continue; } #endif txring = NETMAP_TXRING(nifp, 0); txcur = txring->cur; txavail = nm_ring_space(txring); /* see what we got back */ for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { rxring = NETMAP_RXRING(nifp, i); while (!nm_ring_empty(rxring)) { uint16_t *spkt, *dpkt; uint32_t cur = rxring->cur; struct netmap_slot *slot = &rxring->slot[cur]; char *src, *dst; src = NETMAP_BUF(rxring, slot->buf_idx); //D("got pkt %p of size %d", src, slot->len); rxring->head = rxring->cur = nm_ring_next(rxring, cur); rx++; if (txavail == 0) continue; dst = NETMAP_BUF(txring, txring->slot[txcur].buf_idx); /* copy... */ dpkt = (uint16_t *)dst; spkt = (uint16_t *)src; nm_pkt_copy(src, dst, slot->len); dpkt[0] = spkt[3]; dpkt[1] = spkt[4]; dpkt[2] = spkt[5]; dpkt[3] = spkt[0]; dpkt[4] = spkt[1]; dpkt[5] = spkt[2]; txring->slot[txcur].len = slot->len; /* XXX swap src dst mac */ txcur = nm_ring_next(txring, txcur); txavail--; sent++; } } txring->head = txring->cur = txcur; targ->count = sent; #ifdef BUSYWAIT ioctl(pfd.fd, NIOCTXSYNC, NULL); #endif //D("tx %d rx %d", sent, rx); } return NULL; } static __inline int timespec_ge(const struct timespec *a, const struct timespec *b) { if (a->tv_sec > b->tv_sec) return (1); if (a->tv_sec < b->tv_sec) return (0); if (a->tv_nsec >= b->tv_nsec) return (1); return (0); } static __inline struct timespec timeval2spec(const struct timeval *a) { struct timespec ts = { .tv_sec = a->tv_sec, .tv_nsec = a->tv_usec * 1000 }; return ts; } static __inline struct timeval timespec2val(const struct timespec *a) { struct timeval tv = { .tv_sec = a->tv_sec, .tv_usec = a->tv_nsec / 1000 }; return tv; } static __inline struct timespec timespec_add(struct timespec a, struct timespec b) { struct timespec ret = { a.tv_sec + b.tv_sec, a.tv_nsec + b.tv_nsec }; if (ret.tv_nsec >= 1000000000) { ret.tv_sec++; ret.tv_nsec -= 1000000000; } return ret; } static __inline struct timespec timespec_sub(struct timespec a, struct timespec b) { struct timespec ret = { a.tv_sec - b.tv_sec, a.tv_nsec - b.tv_nsec }; if (ret.tv_nsec < 0) { ret.tv_sec--; ret.tv_nsec += 1000000000; } return ret; } /* * wait until ts, either busy or sleeping if more than 1ms. * Return wakeup time. */ static struct timespec wait_time(struct timespec ts) { for (;;) { struct timespec w, cur; clock_gettime(CLOCK_REALTIME_PRECISE, &cur); w = timespec_sub(ts, cur); if (w.tv_sec < 0) return cur; else if (w.tv_sec > 0 || w.tv_nsec > 1000000) poll(NULL, 0, 1); } } static void * sender_body(void *data) { struct targ *targ = (struct targ *) data; struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; struct netmap_if *nifp; struct netmap_ring *txring; int i, n = targ->g->npackets / targ->g->nthreads; int64_t sent = 0; int options = targ->g->options | OPT_COPY; struct timespec nexttime = { 0, 0}; // XXX silence compiler int rate_limit = targ->g->tx_rate; struct pkt *pkt = &targ->pkt; void *frame; int size; if (targ->frame == NULL) { frame = pkt; frame += sizeof(pkt->vh) - targ->g->virt_header; size = targ->g->pkt_size + targ->g->virt_header; } else { frame = targ->frame; size = targ->g->pkt_size; } D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd); if (setaffinity(targ->thread, targ->affinity)) goto quit; /* main loop.*/ clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); if (rate_limit) { targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); targ->tic.tv_nsec = 0; wait_time(targ->tic); nexttime = targ->tic; } if (targ->g->dev_type == DEV_TAP) { D("writing to file desc %d", targ->g->main_fd); for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { if (write(targ->g->main_fd, frame, size) != -1) sent++; update_addresses(pkt, targ->g); if (i > 10000) { targ->count = sent; i = 0; } } #ifndef NO_PCAP } else if (targ->g->dev_type == DEV_PCAP) { pcap_t *p = targ->g->p; for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { if (pcap_inject(p, frame, size) != -1) sent++; update_addresses(pkt, targ->g); if (i > 10000) { targ->count = sent; i = 0; } } #endif /* NO_PCAP */ } else { int tosend = 0; int frags = targ->g->frags; nifp = targ->nmd->nifp; while (!targ->cancel && (n == 0 || sent < n)) { if (rate_limit && tosend <= 0) { tosend = targ->g->burst; nexttime = timespec_add(nexttime, targ->g->tx_period); wait_time(nexttime); } /* * wait for available room in the send queue(s) */ if (poll(&pfd, 1, 2000) <= 0) { if (targ->cancel) break; D("poll error/timeout on queue %d: %s", targ->me, strerror(errno)); // goto quit; } if (pfd.revents & POLLERR) { D("poll error"); goto quit; } /* * scan our queues and send on those with room */ if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) { D("drop copy"); options &= ~OPT_COPY; } for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { int m, limit = rate_limit ? tosend : targ->g->burst; if (n > 0 && n - sent < limit) limit = n - sent; txring = NETMAP_TXRING(nifp, i); if (nm_ring_empty(txring)) continue; if (frags > 1) limit = ((limit + frags - 1) / frags) * frags; m = send_packets(txring, pkt, frame, size, targ->g, limit, options, frags); ND("limit %d tail %d frags %d m %d", limit, txring->tail, frags, m); sent += m; targ->count = sent; if (rate_limit) { tosend -= m; if (tosend <= 0) break; } } } /* flush any remaining packets */ D("flush tail %d head %d on thread %p", txring->tail, txring->head, pthread_self()); ioctl(pfd.fd, NIOCTXSYNC, NULL); /* final part: wait all the TX queues to be empty. */ for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { txring = NETMAP_TXRING(nifp, i); while (nm_tx_pending(txring)) { RD(5, "pending tx tail %d head %d on ring %d", txring->tail, txring->head, i); ioctl(pfd.fd, NIOCTXSYNC, NULL); usleep(1); /* wait 1 tick */ } } } /* end DEV_NETMAP */ clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); targ->completed = 1; targ->count = sent; quit: /* reset the ``used`` flag. */ targ->used = 0; return (NULL); } #ifndef NO_PCAP static void receive_pcap(u_char *user, const struct pcap_pkthdr * h, const u_char * bytes) { int *count = (int *)user; (void)h; /* UNUSED */ (void)bytes; /* UNUSED */ (*count)++; } #endif /* !NO_PCAP */ static int receive_packets(struct netmap_ring *ring, u_int limit, int dump) { u_int cur, rx, n; cur = ring->cur; n = nm_ring_space(ring); if (n < limit) limit = n; for (rx = 0; rx < limit; rx++) { struct netmap_slot *slot = &ring->slot[cur]; char *p = NETMAP_BUF(ring, slot->buf_idx); if (dump) dump_payload(p, slot->len, ring, cur); cur = nm_ring_next(ring, cur); } ring->head = ring->cur = cur; return (rx); } static void * receiver_body(void *data) { struct targ *targ = (struct targ *) data; struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; struct netmap_if *nifp; struct netmap_ring *rxring; int i; uint64_t received = 0; if (setaffinity(targ->thread, targ->affinity)) goto quit; D("reading from %s fd %d main_fd %d", targ->g->ifname, targ->fd, targ->g->main_fd); /* unbounded wait for the first packet. */ for (;!targ->cancel;) { i = poll(&pfd, 1, 1000); if (i > 0 && !(pfd.revents & POLLERR)) break; RD(1, "waiting for initial packets, poll returns %d %d", i, pfd.revents); } /* main loop, exit after 1s silence */ clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); if (targ->g->dev_type == DEV_TAP) { while (!targ->cancel) { char buf[MAX_BODYSIZE]; /* XXX should we poll ? */ if (read(targ->g->main_fd, buf, sizeof(buf)) > 0) targ->count++; } #ifndef NO_PCAP } else if (targ->g->dev_type == DEV_PCAP) { while (!targ->cancel) { /* XXX should we poll ? */ pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, (u_char *)&targ->count); } #endif /* !NO_PCAP */ } else { int dump = targ->g->options & OPT_DUMP; nifp = targ->nmd->nifp; while (!targ->cancel) { /* Once we started to receive packets, wait at most 1 seconds before quitting. */ if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); targ->toc.tv_sec -= 1; /* Subtract timeout time. */ goto out; } if (pfd.revents & POLLERR) { D("poll err"); goto quit; } for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { int m; rxring = NETMAP_RXRING(nifp, i); if (nm_ring_empty(rxring)) continue; m = receive_packets(rxring, targ->g->burst, dump); received += m; } targ->count = received; } } clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); out: targ->completed = 1; targ->count = received; quit: /* reset the ``used`` flag. */ targ->used = 0; return (NULL); } /* very crude code to print a number in normalized form. * Caller has to make sure that the buffer is large enough. */ static const char * norm(char *buf, double val) { char *units[] = { "", "K", "M", "G", "T" }; u_int i; for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++) val /= 1000; sprintf(buf, "%.2f %s", val, units[i]); return buf; } static void tx_output(uint64_t sent, int size, double delta) { double bw, raw_bw, pps; char b1[40], b2[80], b3[80]; printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n", (unsigned long long)sent, size, delta); if (delta == 0) delta = 1e-6; if (size < 60) /* correct for min packet size */ size = 60; pps = sent / delta; bw = (8.0 * size * sent) / delta; /* raw packets have4 bytes crc + 20 bytes framing */ raw_bw = (8.0 * (size + 24) * sent) / delta; printf("Speed: %spps Bandwidth: %sbps (raw %sbps)\n", norm(b1, pps), norm(b2, bw), norm(b3, raw_bw) ); } static void rx_output(uint64_t received, double delta) { double pps; char b1[40]; printf("Received %llu packets, in %.2f seconds.\n", (unsigned long long) received, delta); if (delta == 0) delta = 1e-6; pps = received / delta; printf("Speed: %spps\n", norm(b1, pps)); }
int main(int arc, char **argv) { int i; struct glob_arg g; int ch; int wait_link = 2; int devqueues = 1; /* how many device queues */ bzero(&g, sizeof(g)); g.main_fd = -1; g.td_body = receiver_body; g.report_interval = 1000; /* report interval */ g.affinity = -1; /* ip addresses can also be a range x.x.x.x-x.x.x.y */ g.src_ip.name = "10.0.0.1"; g.dst_ip.name = "10.1.0.1"; g.dst_mac.name = "ff:ff:ff:ff:ff:ff"; g.src_mac.name = NULL; g.pkt_size = 60; g.burst = 512; // default g.nthreads = 1; g.cpus = 1; g.forever = 1; g.tx_rate = 0; g.frags = 1; g.nmr_config = ""; g.virt_header = 0; while ( (ch = getopt(arc, argv, "a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:m:P:zZ")) != -1) { struct sf *fn; switch(ch) { default: D("bad option %c %s", ch, optarg); usage(); break; case 'n': g.npackets = atoi(optarg); break; case 'F': i = atoi(optarg); if (i < 1 || i > 63) { D("invalid frags %d [1..63], ignore", i); break; } g.frags = i; break; case 'f': for (fn = func; fn->key; fn++) { if (!strcmp(fn->key, optarg)) break; } if (fn->key) g.td_body = fn->f; else D("unrecognised function %s", optarg); break; case 'o': /* data generation options */ g.options = atoi(optarg); break; case 'a': /* force affinity */ g.affinity = atoi(optarg); break; case 'i': /* interface */ /* a prefix of tap: netmap: or pcap: forces the mode. * otherwise we guess */ D("interface is %s", optarg); if (strlen(optarg) > MAX_IFNAMELEN - 8) { D("ifname too long %s", optarg); break; } strcpy(g.ifname, optarg); if (!strcmp(optarg, "null")) { g.dev_type = DEV_NETMAP; g.dummy_send = 1; } else if (!strncmp(optarg, "tap:", 4)) { g.dev_type = DEV_TAP; strcpy(g.ifname, optarg + 4); } else if (!strncmp(optarg, "pcap:", 5)) { g.dev_type = DEV_PCAP; strcpy(g.ifname, optarg + 5); } else if (!strncmp(optarg, "netmap:", 7) || !strncmp(optarg, "vale", 4)) { g.dev_type = DEV_NETMAP; } else if (!strncmp(optarg, "tap", 3)) { g.dev_type = DEV_TAP; } else { /* prepend netmap: */ g.dev_type = DEV_NETMAP; sprintf(g.ifname, "netmap:%s", optarg); } break; case 'I': g.options |= OPT_INDIRECT; /* XXX use indirect buffer */ break; case 'l': /* pkt_size */ g.pkt_size = atoi(optarg); break; case 'd': g.dst_ip.name = optarg; break; case 's': g.src_ip.name = optarg; break; case 'T': /* report interval */ g.report_interval = atoi(optarg); break; case 'w': wait_link = atoi(optarg); break; case 'W': /* XXX changed default */ g.forever = 0; /* do not exit rx even with no traffic */ break; case 'b': /* burst */ g.burst = atoi(optarg); break; case 'c': g.cpus = atoi(optarg); break; case 'p': g.nthreads = atoi(optarg); break; case 'D': /* destination mac */ g.dst_mac.name = optarg; break; case 'S': /* source mac */ g.src_mac.name = optarg; break; case 'v': verbose++; break; case 'R': g.tx_rate = atoi(optarg); break; case 'X': g.options |= OPT_DUMP; break; case 'C': g.nmr_config = strdup(optarg); break; case 'H': g.virt_header = atoi(optarg); break; case 'e': /* extra bufs */ g.extra_bufs = atoi(optarg); break; case 'm': if (strcmp(optarg, "tx") == 0) { g.options |= OPT_MONITOR_TX; } else if (strcmp(optarg, "rx") == 0) { g.options |= OPT_MONITOR_RX; } else { D("unrecognized monitor mode %s", optarg); } break; case 'P': g.packet_file = strdup(optarg); break; case 'z': g.options |= OPT_RANDOM_SRC; break; case 'Z': g.options |= OPT_RANDOM_DST; break; } } if (strlen(g.ifname) <=0 ) { D("missing ifname"); usage(); } i = system_ncpus(); if (g.cpus < 0 || g.cpus > i) { D("%d cpus is too high, have only %d cpus", g.cpus, i); usage(); } if (g.cpus == 0) g.cpus = i; if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) { D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE); usage(); } if (g.src_mac.name == NULL) { static char mybuf[20] = "00:00:00:00:00:00"; /* retrieve source mac address. */ if (source_hwaddr(g.ifname, mybuf) == -1) { D("Unable to retrieve source mac"); // continue, fail later } g.src_mac.name = mybuf; } /* extract address ranges */ extract_ip_range(&g.src_ip); extract_ip_range(&g.dst_ip); extract_mac_range(&g.src_mac); extract_mac_range(&g.dst_mac); if (g.src_ip.start != g.src_ip.end || g.src_ip.port0 != g.src_ip.port1 || g.dst_ip.start != g.dst_ip.end || g.dst_ip.port0 != g.dst_ip.port1) g.options |= OPT_COPY; if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1 && g.virt_header != VIRT_HDR_2) { D("bad virtio-net-header length"); usage(); } if (g.dev_type == DEV_TAP) { D("want to use tap %s", g.ifname); g.main_fd = tap_alloc(g.ifname); if (g.main_fd < 0) { D("cannot open tap %s", g.ifname); usage(); } #ifndef NO_PCAP } else if (g.dev_type == DEV_PCAP) { char pcap_errbuf[PCAP_ERRBUF_SIZE]; pcap_errbuf[0] = '\0'; // init the buffer g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf); if (g.p == NULL) { D("cannot open pcap on %s", g.ifname); usage(); } g.main_fd = pcap_fileno(g.p); D("using pcap on %s fileno %d", g.ifname, g.main_fd); #endif /* !NO_PCAP */ } else if (g.dummy_send) { /* but DEV_NETMAP */ D("using a dummy send routine"); } else { struct nmreq base_nmd; bzero(&base_nmd, sizeof(base_nmd)); parse_nmr_config(g.nmr_config, &base_nmd); if (g.extra_bufs) { base_nmd.nr_arg3 = g.extra_bufs; } /* * Open the netmap device using nm_open(). * * protocol stack and may cause a reset of the card, * which in turn may take some time for the PHY to * reconfigure. We do the open here to have time to reset. */ g.nmd = nm_open(g.ifname, &base_nmd, 0, NULL); if (g.nmd == NULL) { D("Unable to open %s: %s", g.ifname, strerror(errno)); goto out; } g.main_fd = g.nmd->fd; D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem); /* get num of queues in tx or rx */ if (g.td_body == sender_body) devqueues = g.nmd->req.nr_tx_rings; else devqueues = g.nmd->req.nr_rx_rings; /* validate provided nthreads. */ if (g.nthreads < 1 || g.nthreads > devqueues) { D("bad nthreads %d, have %d queues", g.nthreads, devqueues); // continue, fail later } if (verbose) { struct netmap_if *nifp = g.nmd->nifp; struct nmreq *req = &g.nmd->req; D("nifp at offset %d, %d tx %d rx region %d", req->nr_offset, req->nr_tx_rings, req->nr_rx_rings, req->nr_arg2); for (i = 0; i <= req->nr_tx_rings; i++) { struct netmap_ring *ring = NETMAP_TXRING(nifp, i); D(" TX%d at 0x%lx slots %d", i, (char *)ring - (char *)nifp, ring->num_slots); } for (i = 0; i <= req->nr_rx_rings; i++) { struct netmap_ring *ring = NETMAP_RXRING(nifp, i); D(" RX%d at 0x%lx slots %d", i, (char *)ring - (char *)nifp, ring->num_slots); } } /* Print some debug information. */ fprintf(stdout, "%s %s: %d queues, %d threads and %d cpus.\n", (g.td_body == sender_body) ? "Sending on" : "Receiving from", g.ifname, devqueues, g.nthreads, g.cpus); if (g.td_body == sender_body) { fprintf(stdout, "%s -> %s (%s -> %s)\n", g.src_ip.name, g.dst_ip.name, g.src_mac.name, g.dst_mac.name); } out: /* Exit if something went wrong. */ if (g.main_fd < 0) { D("aborting"); usage(); } } if (g.options) { D("--- SPECIAL OPTIONS:%s%s%s%s%s\n", g.options & OPT_PREFETCH ? " prefetch" : "", g.options & OPT_ACCESS ? " access" : "", g.options & OPT_MEMCPY ? " memcpy" : "", g.options & OPT_INDIRECT ? " indirect" : "", g.options & OPT_COPY ? " copy" : ""); } g.tx_period.tv_sec = g.tx_period.tv_nsec = 0; if (g.tx_rate > 0) { /* try to have at least something every second, * reducing the burst size to some 0.01s worth of data * (but no less than one full set of fragments) */ uint64_t x; int lim = (g.tx_rate)/300; if (g.burst > lim) g.burst = lim; if (g.burst < g.frags) g.burst = g.frags; x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate; g.tx_period.tv_nsec = x; g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; } if (g.td_body == sender_body) D("Sending %d packets every %ld.%09ld s", g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec); /* Wait for PHY reset. */ D("Wait %d secs for phy reset", wait_link); sleep(wait_link); D("Ready..."); /* Install ^C handler. */ global_nthreads = g.nthreads; signal(SIGINT, sigint_h); start_threads(&g); main_thread(&g); return 0; }
static void * fio_dns_send(struct fio_context *context) { //uint16_t sport_rang = 5000; uint32_t sip_rang[2]; struct fio_nic *nics[2]; struct pollfd fds[2+1]; uint32_t dirs[2]; int all_move, limit; int m, num_ready, i, j, num_nic = context->num_nic; struct netmap_if *nifp; struct netmap_ring *txring; struct fio_nic *tn; struct fio_poll_data pd; //uint32_t dst_ips[] = {(ntohl((uint32_t)(sysconfig.dst_ip[0].start.s_addr)) & 0xFFFFFF00), // (ntohl((uint32_t)(sysconfig.dst_ip[1].start.s_addr)) & 0xFFFFFF00)}; //uint32_t dst_ips[] = {(ntohl((uint32_t)(sysconfig.src_ip.start.s_addr)) & 0xFFFFFF00), // (ntohl((uint32_t)(sysconfig.src_ip.start.s_addr)) & 0xFFFFFF00)}; char donames[][255] = { "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com", "www.likunxiang.com" }; int donames_len[] = { strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com"), strlen("www.likunxiang.com") }; struct fio_txdata txds[2][24]; char buf_if_necessary[2*24][MAX_PKT_SIZE]; struct pktudp *pkt4test[] = {&g_pkt4test, &g_pkt4test2}; for (j = 0; j < num_nic; j++) { sip_rang[j] = 126; for (i = 0; i < 24; i++) { struct fio_txdata *txd = &txds[j][i]; memset(txd, 0, sizeof(struct fio_txdata)); txd->type = TXDATA_TYPE_NORMAL; //txd->pbuf = txd->buf; txd->pbuf = buf_if_necessary[j*24+i]; txd->pdata = txd->pbuf+g_payload_offset; memcpy(txd->pbuf, (char*)pkt4test[j], sysconfig.pkt_size); int paylen = dns_pack_head(txd->pdata, donames[i], donames_len[i]); txd->size = g_payload_offset+paylen; *(uint16_t*)(txd->pbuf+38) = htons(paylen+8); *(uint16_t*)(txd->pbuf+16) = htons(paylen+8+20); struct pktudp *ppkt = (struct pktudp*)txd->pbuf; //*(uint32_t*)(txd->pbuf+g_sip_offset) = htonl(dst_ips[j]|sip_rang[j]++); *(uint32_t*)(txd->pbuf+g_sip_offset) = sysconfig.src_ip.start.s_addr; bcopy(&sysconfig.src_mac.start, txd->pbuf+g_eth_smac, 6); struct ip *ip = &ppkt->ip; ip->ip_sum = 0; ip->ip_sum = ip_fast_csum((char*)ip, ip->ip_hl); } } for (i = 0; i < num_nic; i++) { nics[i] = &context->nics[i]; dirs[i] = FIO_DIR_TX; sip_rang[i] = 0; } OD( "tid %d Wait %d secs for phy reset", context->me, 5); sleep(5); OD( "tid %d Ready... num_nic %d", context->me, num_nic); gettimeofday(&context->tic, NULL); pd.nics = nics; pd.fds = fds; pd.dirs = dirs; pd.num_nic = num_nic; pd.notifyfds = context->notifyfds; pd.num_notify = 0; #ifdef _BREAKh_ int breakh = 0; #endif while (sysconfig.working) { if ( (num_ready=context->vtbl.polls(&pd, 500)) < 0) { gettimeofday(&context->toc, NULL); context->toc.tv_sec -= 1; /* Subtract timeout time. */ OD( "<error> poll error!!!!!"); continue; } #ifdef _BREAKh_ if (breakh) break; #endif if (num_ready == 0) { continue; } for (i = 0; i < num_nic; i++) { tn = nics[i]; fio_nic_send_arpbuf(tn); fio_nic_send_txbuf(tn); if (tn->nictxavl < 1) continue; all_move = 0; m = 0; nifp = tn->nifp; limit = tn->nictxavl; #ifdef _SND_LIMIT_ //D("limit %d", limit); limit = sysconfig.snd_pkts_per_sec; #endif while(limit > 0) { txring = NETMAP_TXRING(nifp, tn->cur_tx_head); if (txring->avail == 0) { tn->cur_tx_head = (tn->cur_tx_head+1 == tn->qtxlast ? tn->qtxfirst : tn->cur_tx_head+1); if (++sip_rang[i] >= 24) sip_rang[i] = 0; } else { m = fio_fill_dns_pkt(txring, &txds[i][sip_rang[i]], limit); all_move += m; limit -= m; } } if (nm_likely(all_move > 0)) { tn->txcount += all_move; tn->nictxavl -= all_move; } } #ifdef _BREAKh_ //breakh = 1; #endif #ifdef _SND_LIMIT_ sleep(2); #endif } /* reset the ``used`` flag. */ context->used = 0; OD( "I'll shutdown"); return NULL; }
int sendpacket_send_netmap(void *p, const u_char *data, size_t len) { int retcode = 0; sendpacket_t *sp = p; struct netmap_ring *txring; struct netmap_slot *slot; char *pkt; uint32_t cur, avail; if (sp->abort) return retcode; txring = NETMAP_TXRING(sp->nm_if, sp->cur_tx_ring); while ((avail = nm_ring_space(txring)) == 0) { /* out of space on current TX queue - go to next */ ++sp->cur_tx_ring; if (sp->cur_tx_ring > sp->last_tx_ring) { /* * out of space on all queues * * we have looped through all configured TX queues * so we have to reset to the first queue and * wait for available space */ struct pollfd pfd; sp->cur_tx_ring = sp->first_tx_ring; /* send TX interrupt signal * * On Linux this makes one slot free on the * ring, which increases speed by about 10Mbps. * * But it will never free up all the slots. For * that we must poll and call again. */ ioctl(sp->handle.fd, NIOCTXSYNC, NULL); pfd.fd = sp->handle.fd; pfd.events = POLLOUT; pfd.revents = 0; if (poll(&pfd, 1, 1000) <= 0) { if (++sp->tx_timeouts == NETMAP_TX_TIMEOUT_SEC) { return -1; } return -2; } sp->tx_timeouts = 0; /* * Do not remove this even though it looks redundant. * Overall performance is increased with this restart * of the TX queue. * * This call increases the number of available slots from * 1 to all that are truly available. */ ioctl(sp->handle.fd, NIOCTXSYNC, NULL); } txring = NETMAP_TXRING(sp->nm_if, sp->cur_tx_ring); } /* * send */ cur = txring->cur; slot = &txring->slot[cur]; slot->flags = 0; pkt = NETMAP_BUF(txring, slot->buf_idx); memcpy(pkt, data, min(len, txring->nr_buf_size)); slot->len = len; if (avail <= 1) slot->flags = NS_REPORT; dbgx(3, "netmap cur=%d slot index=%d flags=0x%x empty=%d avail=%u bufsize=%d\n", cur, slot->buf_idx, slot->flags, NETMAP_TX_RING_EMPTY(txring), nm_ring_space(txring), txring->nr_buf_size); /* let kernel know that packet is available */ cur = NETMAP_RING_NEXT(txring, cur); #ifdef HAVE_NETMAP_RING_HEAD_TAIL txring->head = cur; #else txring->avail--; #endif txring->cur = cur; retcode = len; return retcode; }
/* * reply to ping requests */ static void * ponger_body(void *data) { struct targ *targ = (struct targ *) data; struct pollfd fds[1]; struct netmap_if *nifp = targ->nifp; struct netmap_ring *txring, *rxring; int i, rx = 0, sent = 0, n = targ->g->npackets; fds[0].fd = targ->fd; fds[0].events = (POLLIN); if (targ->g->nthreads > 1) { D("can only reply ping with 1 thread"); return NULL; } D("understood ponger %d but don't know how to do it", n); while (n == 0 || sent < n) { uint32_t txcur, txavail; //#define BUSYWAIT #ifdef BUSYWAIT ioctl(fds[0].fd, NIOCRXSYNC, NULL); #else if (poll(fds, 1, 1000) <= 0) { D("poll error/timeout on queue %d: %s", targ->me, strerror(errno)); continue; } #endif txring = NETMAP_TXRING(nifp, 0); txcur = txring->cur; txavail = nm_ring_space(txring); /* see what we got back */ for (i = targ->qfirst; i < targ->qlast; i++) { rxring = NETMAP_RXRING(nifp, i); while (!nm_ring_empty(rxring)) { uint16_t *spkt, *dpkt; uint32_t cur = rxring->cur; struct netmap_slot *slot = &rxring->slot[cur]; char *src, *dst; src = NETMAP_BUF(rxring, slot->buf_idx); //D("got pkt %p of size %d", src, slot->len); rxring->head = rxring->cur = nm_ring_next(rxring, cur); rx++; if (txavail == 0) continue; dst = NETMAP_BUF(txring, txring->slot[txcur].buf_idx); /* copy... */ dpkt = (uint16_t *)dst; spkt = (uint16_t *)src; pkt_copy(src, dst, slot->len); dpkt[0] = spkt[3]; dpkt[1] = spkt[4]; dpkt[2] = spkt[5]; dpkt[3] = spkt[0]; dpkt[4] = spkt[1]; dpkt[5] = spkt[2]; txring->slot[txcur].len = slot->len; /* XXX swap src dst mac */ txcur = nm_ring_next(txring, txcur); txavail--; sent++; } } txring->head = txring->cur = txcur; targ->count = sent; #ifdef BUSYWAIT ioctl(fds[0].fd, NIOCTXSYNC, NULL); #endif //D("tx %d rx %d", sent, rx); } return NULL; }
static __inline struct timespec timeval2spec(const struct timeval *a) { struct timespec ts = { .tv_sec = a->tv_sec, .tv_nsec = a->tv_usec * 1000 }; return ts; } static __inline struct timeval timespec2val(const struct timespec *a) { struct timeval tv = { .tv_sec = a->tv_sec, .tv_usec = a->tv_nsec / 1000 }; return tv; } static __inline struct timespec timespec_add(struct timespec a, struct timespec b) { struct timespec ret = { a.tv_sec + b.tv_sec, a.tv_nsec + b.tv_nsec }; if (ret.tv_nsec >= 1000000000) { ret.tv_sec++; ret.tv_nsec -= 1000000000; } return ret; } static __inline struct timespec timespec_sub(struct timespec a, struct timespec b) { struct timespec ret = { a.tv_sec - b.tv_sec, a.tv_nsec - b.tv_nsec }; if (ret.tv_nsec < 0) { ret.tv_sec--; ret.tv_nsec += 1000000000; } return ret; } /* * wait until ts, either busy or sleeping if more than 1ms. * Return wakeup time. */ static struct timespec wait_time(struct timespec ts) { for (;;) { struct timespec w, cur; clock_gettime(CLOCK_REALTIME_PRECISE, &cur); w = timespec_sub(ts, cur); if (w.tv_sec < 0) return cur; else if (w.tv_sec > 0 || w.tv_nsec > 1000000) poll(NULL, 0, 1); } } static void * sender_body(void *data) { struct targ *targ = (struct targ *) data; struct pollfd fds[1]; struct netmap_if *nifp = targ->nifp; struct netmap_ring *txring; int i, n = targ->g->npackets / targ->g->nthreads, sent = 0; int options = targ->g->options | OPT_COPY; struct timespec nexttime = { 0, 0}; // XXX silence compiler int rate_limit = targ->g->tx_rate; struct pkt *pkt = &targ->pkt; void *frame; int size; frame = pkt; frame += sizeof(pkt->vh) - targ->g->virt_header; size = targ->g->pkt_size + targ->g->virt_header; D("start"); if (setaffinity(targ->thread, targ->affinity)) goto quit; /* setup poll(2) mechanism. */ memset(fds, 0, sizeof(fds)); fds[0].fd = targ->fd; fds[0].events = (POLLOUT); /* main loop.*/ clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); if (rate_limit) { targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); targ->tic.tv_nsec = 0; wait_time(targ->tic); nexttime = targ->tic; } if (targ->g->dev_type == DEV_PCAP) { pcap_t *p = targ->g->p; for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { if (pcap_inject(p, frame, size) != -1) sent++; update_addresses(pkt, targ->g); if (i > 10000) { targ->count = sent; i = 0; } } } else if (targ->g->dev_type == DEV_TAP) { /* tap */ D("writing to file desc %d", targ->g->main_fd); for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { if (write(targ->g->main_fd, frame, size) != -1) sent++; update_addresses(pkt, targ->g); if (i > 10000) { targ->count = sent; i = 0; } } } else { int tosend = 0; int frags = targ->g->frags; while (!targ->cancel && (n == 0 || sent < n)) { if (rate_limit && tosend <= 0) { tosend = targ->g->burst; nexttime = timespec_add(nexttime, targ->g->tx_period); wait_time(nexttime); } /* * wait for available room in the send queue(s) */ if (poll(fds, 1, 2000) <= 0) { if (targ->cancel) break; D("poll error/timeout on queue %d: %s", targ->me, strerror(errno)); goto quit; } if (fds[0].revents & POLLERR) { D("poll error"); goto quit; } /* * scan our queues and send on those with room */ if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) { D("drop copy"); options &= ~OPT_COPY; } for (i = targ->qfirst; i < targ->qlast; i++) { int m, limit = rate_limit ? tosend : targ->g->burst; if (n > 0 && n - sent < limit) limit = n - sent; txring = NETMAP_TXRING(nifp, i); if (nm_ring_empty(txring)) continue; if (frags > 1) limit = ((limit + frags - 1) / frags) * frags; m = send_packets(txring, pkt, frame, size, targ->g, limit, options, frags); ND("limit %d avail %d frags %d m %d", limit, txring->avail, frags, m); sent += m; targ->count = sent; if (rate_limit) { tosend -= m; if (tosend <= 0) break; } } } /* flush any remaining packets */ ioctl(fds[0].fd, NIOCTXSYNC, NULL); /* final part: wait all the TX queues to be empty. */ for (i = targ->qfirst; i < targ->qlast; i++) { txring = NETMAP_TXRING(nifp, i); while (nm_tx_pending(txring)) { ioctl(fds[0].fd, NIOCTXSYNC, NULL); usleep(1); /* wait 1 tick */ } } } clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); targ->completed = 1; targ->count = sent; quit: /* reset the ``used`` flag. */ targ->used = 0; return (NULL); } static void receive_pcap(u_char *user, const struct pcap_pkthdr * h, const u_char * bytes) { int *count = (int *)user; (void)h; /* UNUSED */ (void)bytes; /* UNUSED */ (*count)++; }