/* test case to time the number of cycles to round-trip a cache line between * two cores and back again. */ static void time_cache_line_switch(void) { /* allocate a full cache line for data, we use only first byte of it */ uint64_t data[RTE_CACHE_LINE_SIZE*3 / sizeof(uint64_t)]; unsigned i, slaveid = rte_get_next_lcore(rte_lcore_id(), 0, 0); volatile uint64_t *pdata = &data[0]; *pdata = 1; rte_eal_remote_launch((lcore_function_t *)flip_bit, &data[0], slaveid); while (*pdata) rte_pause(); const uint64_t start_time = rte_rdtsc(); for (i = 0; i < (1 << ITER_POWER); i++) { while (*pdata) rte_pause(); *pdata = 1; } const uint64_t end_time = rte_rdtsc(); while (*pdata) rte_pause(); *pdata = 2; rte_eal_wait_lcore(slaveid); printf("==== Cache line switch test ===\n"); printf("Time for %u iterations = %"PRIu64" ticks\n", (1<<ITER_POWER), end_time-start_time); printf("Ticks per iteration = %"PRIu64"\n\n", (end_time-start_time) >> ITER_POWER); }
static inline uint8_t efd_get_all_sockets_bitmask(void) { uint8_t all_cpu_sockets_bitmask = 0; unsigned int i; unsigned int next_lcore = rte_get_master_lcore(); const int val_true = 1, val_false = 0; for (i = 0; i < rte_lcore_count(); i++) { all_cpu_sockets_bitmask |= 1 << rte_lcore_to_socket_id(next_lcore); next_lcore = rte_get_next_lcore(next_lcore, val_false, val_true); } return all_cpu_sockets_bitmask; }
int eal_adjust_config(struct internal_config *internal_cfg) { int i; struct rte_config *cfg = rte_eal_get_configuration(); if (internal_config.process_type == RTE_PROC_AUTO) internal_config.process_type = eal_proc_type_detect(); /* default master lcore is the first one */ if (!master_lcore_parsed) cfg->master_lcore = rte_get_next_lcore(-1, 0, 0); /* if no memory amounts were requested, this will result in 0 and * will be overridden later, right after eal_hugepage_info_init() */ for (i = 0; i < RTE_MAX_NUMA_NODES; i++) internal_cfg->memory += internal_cfg->socket_mem[i]; return 0; }
static int paxos_rx_process(struct rte_mbuf *pkt, struct proposer* proposer) { int ret = 0; uint8_t l4_proto = 0; uint16_t outer_header_len; union tunnel_offload_info info = { .data = 0 }; struct udp_hdr *udp_hdr; struct paxos_hdr *paxos_hdr; struct ether_hdr *phdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); parse_ethernet(phdr, &info, &l4_proto); if (l4_proto != IPPROTO_UDP) return -1; udp_hdr = (struct udp_hdr *)((char *)phdr + info.outer_l2_len + info.outer_l3_len); /* if UDP dst port is not either PROPOSER or LEARNER port */ if (!(udp_hdr->dst_port == rte_cpu_to_be_16(PROPOSER_PORT) || udp_hdr->dst_port == rte_cpu_to_be_16(LEARNER_PORT)) && (pkt->packet_type & RTE_PTYPE_TUNNEL_MASK) == 0) return -1; paxos_hdr = (struct paxos_hdr *)((char *)udp_hdr + sizeof(struct udp_hdr)); if (rte_get_log_level() == RTE_LOG_DEBUG) { //rte_hexdump(stdout, "udp", udp_hdr, sizeof(struct udp_hdr)); //rte_hexdump(stdout, "paxos", paxos_hdr, sizeof(struct paxos_hdr)); print_paxos_hdr(paxos_hdr); } int value_len = rte_be_to_cpu_16(paxos_hdr->value_len); struct paxos_value *v = paxos_value_new((char *)paxos_hdr->paxosval, value_len); switch(rte_be_to_cpu_16(paxos_hdr->msgtype)) { case PAXOS_PROMISE: { struct paxos_promise promise = { .iid = rte_be_to_cpu_32(paxos_hdr->inst), .ballot = rte_be_to_cpu_16(paxos_hdr->rnd), .value_ballot = rte_be_to_cpu_16(paxos_hdr->vrnd), .aid = rte_be_to_cpu_16(paxos_hdr->acptid), .value = *v }; proposer_handle_promise(proposer, &promise); break; } case PAXOS_ACCEPT: { if (first_time) { proposer_preexecute(proposer); first_time = false; } struct paxos_accept acpt = { .iid = rte_be_to_cpu_32(paxos_hdr->inst), .ballot = rte_be_to_cpu_16(paxos_hdr->rnd), .value_ballot = rte_be_to_cpu_16(paxos_hdr->vrnd), .aid = rte_be_to_cpu_16(paxos_hdr->acptid), .value = *v }; proposer_handle_accept(proposer, &acpt); break; } case PAXOS_ACCEPTED: { struct paxos_accepted ack = { .iid = rte_be_to_cpu_32(paxos_hdr->inst), .ballot = rte_be_to_cpu_16(paxos_hdr->rnd), .value_ballot = rte_be_to_cpu_16(paxos_hdr->vrnd), .aid = rte_be_to_cpu_16(paxos_hdr->acptid), .value = *v }; proposer_handle_accepted(proposer, &ack); break; } default: break; } outer_header_len = info.outer_l2_len + info.outer_l3_len + sizeof(struct udp_hdr) + sizeof(struct paxos_hdr); rte_pktmbuf_adj(pkt, outer_header_len); return ret; } static uint16_t add_timestamps(uint8_t port __rte_unused, uint16_t qidx __rte_unused, struct rte_mbuf **pkts, uint16_t nb_pkts, uint16_t max_pkts __rte_unused, void *user_param) { struct proposer* proposer = (struct proposer *)user_param; unsigned i; uint64_t now = rte_rdtsc(); for (i = 0; i < nb_pkts; i++) { pkts[i]->udata64 = now; paxos_rx_process(pkts[i], proposer); } return nb_pkts; } static inline int port_init(uint8_t port, struct rte_mempool *mbuf_pool, struct proposer* proposer) { struct rte_eth_dev_info dev_info; struct rte_eth_txconf *txconf; struct rte_eth_rxconf *rxconf; struct rte_eth_conf port_conf = port_conf_default; const uint16_t rx_rings = 1, tx_rings = 1; int retval; uint16_t q; rte_eth_dev_info_get(port, &dev_info); rxconf = &dev_info.default_rxconf; txconf = &dev_info.default_txconf; txconf->txq_flags &= PKT_TX_IPV4; txconf->txq_flags &= PKT_TX_UDP_CKSUM; if (port >= rte_eth_dev_count()) return -1; retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); if (retval != 0) return retval; for (q = 0; q < rx_rings; q++) { retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE, rte_eth_dev_socket_id(port), rxconf, mbuf_pool); if (retval < 0) return retval; } for (q = 0; q < tx_rings; q++) { retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE, rte_eth_dev_socket_id(port), txconf); if (retval < 0) return retval; } retval = rte_eth_dev_start(port); if (retval < 0) return retval; struct ether_addr addr; rte_eth_macaddr_get(port, &addr); rte_eth_promiscuous_enable(port); rte_eth_add_rx_callback(port, 0, add_timestamps, proposer); rte_eth_add_tx_callback(port, 0, calc_latency, NULL); return 0; } static void lcore_main(uint8_t port, __rte_unused struct proposer *p) { proposer_preexecute(p); for (;;) { // Check if signal is received if (force_quit) break; struct rte_mbuf *bufs[BURST_SIZE]; const uint16_t nb_rx = rte_eth_rx_burst(port, 0, bufs, BURST_SIZE); if (unlikely(nb_rx == 0)) continue; uint16_t buf; for (buf = 0; buf < nb_rx; buf++) rte_pktmbuf_free(bufs[buf]); } } static __attribute__((noreturn)) int lcore_mainloop(__attribute__((unused)) void *arg) { uint64_t prev_tsc = 0, cur_tsc, diff_tsc; unsigned lcore_id; lcore_id = rte_lcore_id(); rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_TIMER, "Starting mainloop on core %u\n", lcore_id); while(1) { cur_tsc = rte_rdtsc(); diff_tsc = cur_tsc - prev_tsc; if (diff_tsc > TIMER_RESOLUTION_CYCLES) { rte_timer_manage(); prev_tsc = cur_tsc; } } } static void report_stat(struct rte_timer *tim, __attribute((unused)) void *arg) { /* print stat */ uint32_t count = rte_atomic32_read(&stat); rte_log(RTE_LOG_INFO, RTE_LOGTYPE_USER8, "Throughput = %8u msg/s\n", count); /* reset stat */ rte_atomic32_set(&stat, 0); /* this timer is automatically reloaded until we decide to stop it */ if (force_quit) rte_timer_stop(tim); } static void check_timeout(struct rte_timer *tim, void *arg) { struct proposer* p = (struct proposer *) arg; unsigned lcore_id = rte_lcore_id(); rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER8, "%s() on lcore_id %i\n", __func__, lcore_id); struct paxos_message out; out.type = PAXOS_PREPARE; struct timeout_iterator* iter = proposer_timeout_iterator(p); while(timeout_iterator_prepare(iter, &out.u.prepare)) { rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER8, "%s Send PREPARE inst %d ballot %d\n", __func__, out.u.prepare.iid, out.u.prepare.ballot); send_paxos_message(&out); } out.type = PAXOS_ACCEPT; while(timeout_iterator_accept(iter, &out.u.accept)) { rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER8, "%s: Send ACCEPT inst %d ballot %d\n", __func__, out.u.prepare.iid, out.u.prepare.ballot); send_paxos_message(&out); } timeout_iterator_free(iter); /* this timer is automatically reloaded until we decide to stop it */ if (force_quit) rte_timer_stop(tim); } int main(int argc, char *argv[]) { uint8_t portid = 0; unsigned master_core, lcore_id; signal(SIGTERM, signal_handler); signal(SIGINT, signal_handler); force_quit = false; int proposer_id = 0; if (rte_get_log_level() == RTE_LOG_DEBUG) { paxos_config.verbosity = PAXOS_LOG_DEBUG; } struct proposer *proposer = proposer_new(proposer_id, NUM_ACCEPTORS); first_time = true; /* init EAL */ int ret = rte_eal_init(argc, argv); if (ret < 0) rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); /* init timer structure */ rte_timer_init(&timer); rte_timer_init(&stat_timer); /* load deliver_timer, every 1 s, on a slave lcore, reloaded automatically */ uint64_t hz = rte_get_timer_hz(); /* Call rte_timer_manage every 10ms */ TIMER_RESOLUTION_CYCLES = hz / 100; rte_log(RTE_LOG_INFO, RTE_LOGTYPE_USER1, "Clock: %"PRIu64"\n", hz); /* master core */ master_core = rte_lcore_id(); /* slave core */ lcore_id = rte_get_next_lcore(master_core, 0, 1); rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER1, "lcore_id: %d\n", lcore_id); rte_timer_reset(&timer, hz, PERIODICAL, lcore_id, check_timeout, proposer); /* reset timer */ rte_eal_remote_launch(lcore_mainloop, NULL, lcore_id); /* stat core */ lcore_id = rte_get_next_lcore(lcore_id , 0, 1); rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER1, "lcore_id: %d\n", lcore_id); rte_timer_reset(&stat_timer, hz, PERIODICAL, lcore_id, report_stat, NULL); /* init RTE timer library */ rte_timer_subsystem_init(); mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS, MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); if (mbuf_pool == NULL) rte_exit(EXIT_FAILURE, "Cannot create mbuf_pool\n"); /* reset timer */ rte_eal_remote_launch(lcore_mainloop, NULL, lcore_id); if (port_init(portid, mbuf_pool, proposer) != 0) rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n", portid); lcore_main(portid, proposer); rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER8, "Free proposer\n"); proposer_free(proposer); return 0; }
/* * Initialize a given port using default settings and with the RX buffers * coming from the mbuf_pool passed as a parameter. * FIXME: Starting with assumption of one thread/core per port */ static inline int uhd_dpdk_port_init(struct uhd_dpdk_port *port, struct rte_mempool *rx_mbuf_pool, unsigned int mtu) { int retval; /* Check for a valid port */ if (port->id >= rte_eth_dev_count()) return -ENODEV; /* Set up Ethernet device with defaults (1 RX ring, 1 TX ring) */ /* FIXME: Check if hw_ip_checksum is possible */ struct rte_eth_conf port_conf = { .rxmode = { .max_rx_pkt_len = mtu, .jumbo_frame = 1, .hw_ip_checksum = 1, } }; retval = rte_eth_dev_configure(port->id, 1, 1, &port_conf); if (retval != 0) return retval; retval = rte_eth_rx_queue_setup(port->id, 0, DEFAULT_RING_SIZE, rte_eth_dev_socket_id(port->id), NULL, rx_mbuf_pool); if (retval < 0) return retval; retval = rte_eth_tx_queue_setup(port->id, 0, DEFAULT_RING_SIZE, rte_eth_dev_socket_id(port->id), NULL); if (retval < 0) goto port_init_fail; /* Create the hash table for the RX sockets */ char name[32]; snprintf(name, sizeof(name), "rx_table_%u", port->id); struct rte_hash_parameters hash_params = { .name = name, .entries = UHD_DPDK_MAX_SOCKET_CNT, .key_len = sizeof(struct uhd_dpdk_ipv4_5tuple), .hash_func = NULL, .hash_func_init_val = 0, }; port->rx_table = rte_hash_create(&hash_params); if (port->rx_table == NULL) { retval = rte_errno; goto port_init_fail; } /* Create ARP table */ snprintf(name, sizeof(name), "arp_table_%u", port->id); hash_params.name = name; hash_params.entries = UHD_DPDK_MAX_SOCKET_CNT; hash_params.key_len = sizeof(uint32_t); hash_params.hash_func = NULL; hash_params.hash_func_init_val = 0; port->arp_table = rte_hash_create(&hash_params); if (port->arp_table == NULL) { retval = rte_errno; goto free_rx_table; } /* Set up list for TX queues */ LIST_INIT(&port->txq_list); /* Start the Ethernet port. */ retval = rte_eth_dev_start(port->id); if (retval < 0) { goto free_arp_table; } /* Display the port MAC address. */ rte_eth_macaddr_get(port->id, &port->mac_addr); RTE_LOG(INFO, EAL, "Port %u MAC: %02x %02x %02x %02x %02x %02x\n", (unsigned)port->id, port->mac_addr.addr_bytes[0], port->mac_addr.addr_bytes[1], port->mac_addr.addr_bytes[2], port->mac_addr.addr_bytes[3], port->mac_addr.addr_bytes[4], port->mac_addr.addr_bytes[5]); struct rte_eth_link link; rte_eth_link_get(port->id, &link); RTE_LOG(INFO, EAL, "Port %u UP: %d\n", port->id, link.link_status); return 0; free_arp_table: rte_hash_free(port->arp_table); free_rx_table: rte_hash_free(port->rx_table); port_init_fail: return rte_errno; } static int uhd_dpdk_thread_init(struct uhd_dpdk_thread *thread, unsigned int id) { if (!ctx || !thread) return -EINVAL; unsigned int socket_id = rte_lcore_to_socket_id(id); thread->id = id; thread->rx_pktbuf_pool = ctx->rx_pktbuf_pools[socket_id]; thread->tx_pktbuf_pool = ctx->tx_pktbuf_pools[socket_id]; LIST_INIT(&thread->port_list); char name[32]; snprintf(name, sizeof(name), "sockreq_ring_%u", id); thread->sock_req_ring = rte_ring_create( name, UHD_DPDK_MAX_PENDING_SOCK_REQS, socket_id, RING_F_SC_DEQ ); if (!thread->sock_req_ring) return -ENOMEM; return 0; } int uhd_dpdk_init(int argc, char **argv, unsigned int num_ports, int *port_thread_mapping, int num_mbufs, int mbuf_cache_size, int mtu) { /* Init context only once */ if (ctx) return 1; if ((num_ports == 0) || (port_thread_mapping == NULL)) { return -EINVAL; } /* Grabs arguments intended for DPDK's EAL */ int ret = rte_eal_init(argc, argv); if (ret < 0) rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); ctx = (struct uhd_dpdk_ctx *) rte_zmalloc("uhd_dpdk_ctx", sizeof(*ctx), rte_socket_id()); if (!ctx) return -ENOMEM; ctx->num_threads = rte_lcore_count(); if (ctx->num_threads <= 1) rte_exit(EXIT_FAILURE, "Error: No worker threads enabled\n"); /* Check that we have ports to send/receive on */ ctx->num_ports = rte_eth_dev_count(); if (ctx->num_ports < 1) rte_exit(EXIT_FAILURE, "Error: Found no ports\n"); if (ctx->num_ports < num_ports) rte_exit(EXIT_FAILURE, "Error: User requested more ports than available\n"); /* Get memory for thread and port data structures */ ctx->threads = rte_zmalloc("uhd_dpdk_thread", RTE_MAX_LCORE*sizeof(struct uhd_dpdk_thread), 0); if (!ctx->threads) rte_exit(EXIT_FAILURE, "Error: Could not allocate memory for thread data\n"); ctx->ports = rte_zmalloc("uhd_dpdk_port", ctx->num_ports*sizeof(struct uhd_dpdk_port), 0); if (!ctx->ports) rte_exit(EXIT_FAILURE, "Error: Could not allocate memory for port data\n"); /* Initialize the thread data structures */ for (int i = rte_get_next_lcore(-1, 1, 0); (i < RTE_MAX_LCORE); i = rte_get_next_lcore(i, 1, 0)) { /* Do one mempool of RX/TX per socket */ unsigned int socket_id = rte_lcore_to_socket_id(i); /* FIXME Probably want to take into account actual number of ports per socket */ if (ctx->tx_pktbuf_pools[socket_id] == NULL) { /* Creates a new mempool in memory to hold the mbufs. * This is done for each CPU socket */ const int mbuf_size = mtu + 2048 + RTE_PKTMBUF_HEADROOM; char name[32]; snprintf(name, sizeof(name), "rx_mbuf_pool_%u", socket_id); ctx->rx_pktbuf_pools[socket_id] = rte_pktmbuf_pool_create( name, ctx->num_ports*num_mbufs, mbuf_cache_size, 0, mbuf_size, socket_id ); snprintf(name, sizeof(name), "tx_mbuf_pool_%u", socket_id); ctx->tx_pktbuf_pools[socket_id] = rte_pktmbuf_pool_create( name, ctx->num_ports*num_mbufs, mbuf_cache_size, 0, mbuf_size, socket_id ); if ((ctx->rx_pktbuf_pools[socket_id]== NULL) || (ctx->tx_pktbuf_pools[socket_id]== NULL)) rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); } if (uhd_dpdk_thread_init(&ctx->threads[i], i) < 0) rte_exit(EXIT_FAILURE, "Error initializing thread %i\n", i); } unsigned master_lcore = rte_get_master_lcore(); /* Assign ports to threads and initialize the port data structures */ for (unsigned int i = 0; i < num_ports; i++) { int thread_id = port_thread_mapping[i]; if (thread_id < 0) continue; if (((unsigned int) thread_id) == master_lcore) RTE_LOG(WARNING, EAL, "User requested master lcore for port %u\n", i); if (ctx->threads[thread_id].id != (unsigned int) thread_id) rte_exit(EXIT_FAILURE, "Requested inactive lcore %u for port %u\n", (unsigned int) thread_id, i); struct uhd_dpdk_port *port = &ctx->ports[i]; port->id = i; port->parent = &ctx->threads[thread_id]; ctx->threads[thread_id].num_ports++; LIST_INSERT_HEAD(&ctx->threads[thread_id].port_list, port, port_entry); /* Initialize port. */ if (uhd_dpdk_port_init(port, port->parent->rx_pktbuf_pool, mtu) != 0) rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8 "\n", i); } RTE_LOG(INFO, EAL, "Init DONE!\n"); /* FIXME: Create functions to do this */ RTE_LOG(INFO, EAL, "Starting I/O threads!\n"); for (int i = rte_get_next_lcore(-1, 1, 0); (i < RTE_MAX_LCORE); i = rte_get_next_lcore(i, 1, 0)) { struct uhd_dpdk_thread *t = &ctx->threads[i]; if (!LIST_EMPTY(&t->port_list)) { rte_eal_remote_launch(_uhd_dpdk_driver_main, NULL, ctx->threads[i].id); } } return 0; } /* FIXME: This will be changed once we have functions to handle the threads */ int uhd_dpdk_destroy(void) { if (!ctx) return -ENODEV; struct uhd_dpdk_config_req *req = (struct uhd_dpdk_config_req *) rte_zmalloc(NULL, sizeof(*req), 0); if (!req) return -ENOMEM; req->req_type = UHD_DPDK_LCORE_TERM; for (int i = rte_get_next_lcore(-1, 1, 0); (i < RTE_MAX_LCORE); i = rte_get_next_lcore(i, 1, 0)) { struct uhd_dpdk_thread *t = &ctx->threads[i]; if (LIST_EMPTY(&t->port_list)) continue; if (rte_eal_get_lcore_state(t->id) == FINISHED) continue; pthread_mutex_init(&req->mutex, NULL); pthread_cond_init(&req->cond, NULL); pthread_mutex_lock(&req->mutex); if (rte_ring_enqueue(t->sock_req_ring, req)) { pthread_mutex_unlock(&req->mutex); RTE_LOG(ERR, USER2, "Failed to terminate thread %d\n", i); rte_free(req); return -ENOSPC; } struct timespec timeout = { .tv_sec = 1, .tv_nsec = 0 }; pthread_cond_timedwait(&req->cond, &req->mutex, &timeout); pthread_mutex_unlock(&req->mutex); } rte_free(req); return 0; }