static void send_paxos_message(paxos_message *pm) { uint8_t port_id = 0; struct rte_mbuf *created_pkt = rte_pktmbuf_alloc(mbuf_pool); created_pkt->l2_len = sizeof(struct ether_hdr); created_pkt->l3_len = sizeof(struct ipv4_hdr); created_pkt->l4_len = sizeof(struct udp_hdr) + sizeof(paxos_message); craft_new_packet(&created_pkt, IPv4(192,168,4,99), ACCEPTOR_ADDR, PROPOSER_PORT, ACCEPTOR_PORT, sizeof(paxos_message), port_id); //struct udp_hdr *udp; size_t udp_offset = sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr); //udp = rte_pktmbuf_mtod_offset(created_pkt, struct udp_hdr *, udp_offset); size_t paxos_offset = udp_offset + sizeof(struct udp_hdr); struct paxos_hdr *px = rte_pktmbuf_mtod_offset(created_pkt, struct paxos_hdr *, paxos_offset); px->msgtype = rte_cpu_to_be_16(pm->type); px->inst = rte_cpu_to_be_32(pm->u.accept.iid); px->inst = rte_cpu_to_be_32(pm->u.accept.iid); px->rnd = rte_cpu_to_be_16(pm->u.accept.ballot); px->vrnd = rte_cpu_to_be_16(pm->u.accept.value_ballot); px->acptid = 0; rte_memcpy(px->paxosval, pm->u.accept.value.paxos_value_val, pm->u.accept.value.paxos_value_len); created_pkt->ol_flags = PKT_TX_IPV4 | PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM; const uint16_t nb_tx = rte_eth_tx_burst(port_id, 0, &created_pkt, 1); rte_pktmbuf_free(created_pkt); rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER8, "Send %d messages\n", nb_tx); }
/* byteswap to cpu or network order */ static void bswap_test_data(struct ipv4_7tuple *data, int len, int to_be) { int i; for (i = 0; i < len; i++) { if (to_be) { /* swap all bytes so that they are in network order */ data[i].ip_dst = rte_cpu_to_be_32(data[i].ip_dst); data[i].ip_src = rte_cpu_to_be_32(data[i].ip_src); data[i].port_dst = rte_cpu_to_be_16(data[i].port_dst); data[i].port_src = rte_cpu_to_be_16(data[i].port_src); data[i].vlan = rte_cpu_to_be_16(data[i].vlan); data[i].domain = rte_cpu_to_be_16(data[i].domain); } else { data[i].ip_dst = rte_be_to_cpu_32(data[i].ip_dst); data[i].ip_src = rte_be_to_cpu_32(data[i].ip_src); data[i].port_dst = rte_be_to_cpu_16(data[i].port_dst); data[i].port_src = rte_be_to_cpu_16(data[i].port_src); data[i].vlan = rte_be_to_cpu_16(data[i].vlan); data[i].domain = rte_be_to_cpu_16(data[i].domain); } } }
static void prepare_pkt(struct rte_mbuf *mbuf) { struct ether_hdr *eth_hdr; struct vlan_hdr *vlan1, *vlan2; struct ipv4_hdr *ip_hdr; /* Simulate a classifier */ eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); vlan1 = (struct vlan_hdr *)(ð_hdr->ether_type ); vlan2 = (struct vlan_hdr *)((uintptr_t)ð_hdr->ether_type + sizeof(struct vlan_hdr)); eth_hdr = (struct ether_hdr *)((uintptr_t)ð_hdr->ether_type + 2 *sizeof(struct vlan_hdr)); ip_hdr = (struct ipv4_hdr *)((uintptr_t)eth_hdr + sizeof(eth_hdr->ether_type)); vlan1->vlan_tci = rte_cpu_to_be_16(SUBPORT); vlan2->vlan_tci = rte_cpu_to_be_16(PIPE); eth_hdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4); ip_hdr->dst_addr = IPv4(0,0,TC,QUEUE); rte_sched_port_pkt_write(mbuf, SUBPORT, PIPE, TC, QUEUE, e_RTE_METER_YELLOW); /* 64 byte packet */ mbuf->pkt.pkt_len = 60; mbuf->pkt.data_len = 60; }
void xmit_arp_req(struct gatekeeper_if *iface, const struct ipaddr *addr, const struct ether_addr *ha, uint16_t tx_queue) { struct rte_mbuf *created_pkt; struct ether_hdr *eth_hdr; struct arp_hdr *arp_hdr; size_t pkt_size; struct lls_config *lls_conf = get_lls_conf(); int ret; struct rte_mempool *mp = lls_conf->net->gatekeeper_pktmbuf_pool[ rte_lcore_to_socket_id(lls_conf->lcore_id)]; created_pkt = rte_pktmbuf_alloc(mp); if (created_pkt == NULL) { LLS_LOG(ERR, "Could not alloc a packet for an ARP request\n"); return; } pkt_size = iface->l2_len_out + sizeof(struct arp_hdr); created_pkt->data_len = pkt_size; created_pkt->pkt_len = pkt_size; /* Set-up Ethernet header. */ eth_hdr = rte_pktmbuf_mtod(created_pkt, struct ether_hdr *); ether_addr_copy(&iface->eth_addr, ð_hdr->s_addr); if (ha == NULL) memset(ð_hdr->d_addr, 0xFF, ETHER_ADDR_LEN); else ether_addr_copy(ha, ð_hdr->d_addr); /* Set-up VLAN header. */ if (iface->vlan_insert) fill_vlan_hdr(eth_hdr, iface->vlan_tag_be, ETHER_TYPE_ARP); else eth_hdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_ARP); /* Set-up ARP header. */ arp_hdr = pkt_out_skip_l2(iface, eth_hdr); arp_hdr->arp_hrd = rte_cpu_to_be_16(ARP_HRD_ETHER); arp_hdr->arp_pro = rte_cpu_to_be_16(ETHER_TYPE_IPv4); arp_hdr->arp_hln = ETHER_ADDR_LEN; arp_hdr->arp_pln = sizeof(struct in_addr); arp_hdr->arp_op = rte_cpu_to_be_16(ARP_OP_REQUEST); ether_addr_copy(&iface->eth_addr, &arp_hdr->arp_data.arp_sha); arp_hdr->arp_data.arp_sip = iface->ip4_addr.s_addr; memset(&arp_hdr->arp_data.arp_tha, 0, ETHER_ADDR_LEN); arp_hdr->arp_data.arp_tip = addr->ip.v4.s_addr; ret = rte_eth_tx_burst(iface->id, tx_queue, &created_pkt, 1); if (ret <= 0) { rte_pktmbuf_free(created_pkt); LLS_LOG(ERR, "Could not transmit an ARP request\n"); } }
static inline void __fill_ipv4hdr_frag(struct ipv4_hdr *dst, const struct ipv4_hdr *src, uint16_t len, uint16_t fofs, uint16_t dofs, uint32_t mf) { rte_memcpy(dst, src, sizeof(*dst)); fofs = (uint16_t)(fofs + (dofs >> IPV4_HDR_FO_SHIFT)); fofs = (uint16_t)(fofs | mf << IPV4_HDR_MF_SHIFT); dst->fragment_offset = rte_cpu_to_be_16(fofs); dst->total_length = rte_cpu_to_be_16(len); dst->hdr_checksum = 0; }
void init_hdr_templates(void) { memset(ip_hdr_template, 0, sizeof(ip_hdr_template)); memset(l2_hdr_template, 0, sizeof(l2_hdr_template)); ip_hdr_template[0].version_ihl = IP_VHL_DEF; ip_hdr_template[0].type_of_service = (2 << 2); // default DSCP 2 ip_hdr_template[0].total_length = 0; ip_hdr_template[0].packet_id = 0; ip_hdr_template[0].fragment_offset = IP_DN_FRAGMENT_FLAG; ip_hdr_template[0].time_to_live = IP_DEFTTL; ip_hdr_template[0].next_proto_id = IPPROTO_IP; ip_hdr_template[0].hdr_checksum = 0; ip_hdr_template[0].src_addr = rte_cpu_to_be_32(0x00000000); ip_hdr_template[0].dst_addr = rte_cpu_to_be_32(0x07010101); l2_hdr_template[0].d_addr.addr_bytes[0] = 0x0a; l2_hdr_template[0].d_addr.addr_bytes[1] = 0x00; l2_hdr_template[0].d_addr.addr_bytes[2] = 0x27; l2_hdr_template[0].d_addr.addr_bytes[3] = 0x00; l2_hdr_template[0].d_addr.addr_bytes[4] = 0x00; l2_hdr_template[0].d_addr.addr_bytes[5] = 0x01; l2_hdr_template[0].s_addr.addr_bytes[0] = 0x08; l2_hdr_template[0].s_addr.addr_bytes[1] = 0x00; l2_hdr_template[0].s_addr.addr_bytes[2] = 0x27; l2_hdr_template[0].s_addr.addr_bytes[3] = 0x7d; l2_hdr_template[0].s_addr.addr_bytes[4] = 0xc7; l2_hdr_template[0].s_addr.addr_bytes[5] = 0x68; l2_hdr_template[0].ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4); return; }
static int init_traffic(struct rte_mempool *mp, struct rte_mbuf **pkts_burst, uint32_t burst_size) { struct ether_hdr pkt_eth_hdr; struct ipv4_hdr pkt_ipv4_hdr; struct udp_hdr pkt_udp_hdr; uint32_t pktlen; static uint8_t src_mac[] = { 0x00, 0xFF, 0xAA, 0xFF, 0xAA, 0xFF }; static uint8_t dst_mac[] = { 0x00, 0xAA, 0xFF, 0xAA, 0xFF, 0xAA }; initialize_eth_header(&pkt_eth_hdr, (struct ether_addr *)src_mac, (struct ether_addr *)dst_mac, 1, 0, 0); pkt_eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4); pktlen = initialize_ipv4_header(&pkt_ipv4_hdr, IPV4_ADDR(10, 0, 0, 1), IPV4_ADDR(10, 0, 0, 2), 26); printf("IPv4 pktlen %u\n", pktlen); pktlen = initialize_udp_header(&pkt_udp_hdr, 0, 0, 18); printf("UDP pktlen %u\n", pktlen); return generate_packet_burst(mp, pkts_burst, &pkt_eth_hdr, 0, &pkt_ipv4_hdr, 1, &pkt_udp_hdr, burst_size, PACKET_BURST_GEN_PKT_LEN, 1); }
static inline void __fill_ipv6hdr_frag(struct ipv6_hdr *dst, const struct ipv6_hdr *src, uint16_t len, uint16_t fofs, uint32_t mf) { struct ipv6_extension_fragment *fh; rte_memcpy(dst, src, sizeof(*dst)); dst->payload_len = rte_cpu_to_be_16(len); dst->proto = IPPROTO_FRAGMENT; fh = (struct ipv6_extension_fragment *) ++dst; fh->next_header = src->proto; fh->reserved = 0; fh->frag_data = rte_cpu_to_be_16(RTE_IPV6_SET_FRAG_DATA(fofs, mf)); fh->id = 0; }
void *thr(void* arg) { struct node* n = arg; struct rte_mbuf* restrict pkts[32]; int i; int q = n->queue; int start_sec = cursec(); int rcvd = 0; int sent = 0; init_thread(n->tid, n->core); if (q >= 20) { printf("Somehow, queue beyond 20\n"); } while(1) { /*int recv;*/ i = mbuf_alloc_bulk(pkts, 60, 32); if (i != 0) { printf("Error allocating packets %d\n", i); break; } else { int send, recv; /* Start setting MAC address */ for (i = 0; i < 32; i++) { struct ether_hdr* hdr = rte_pktmbuf_mtod(pkts[i], struct ether_hdr*); hdr->d_addr.addr_bytes[5] = (10 * q) + 1; hdr->s_addr.addr_bytes[5] = (10 * q) + 2; hdr->ether_type = rte_cpu_to_be_16(0x0800); /*rte_mbuf_sanity_check(pkts[i], 1);*/ } send = send_pkts(PORT_OUT, q, pkts, 32); for (i = send; i < 32; i++) { mbuf_free(pkts[i]); } recv = recv_pkts(PORT_IN, q, pkts, 32); rcvd += recv; sent += send; if (cursec() != start_sec) { printf("%d %d rx=%d tx=%d\n", n->core, (cursec() - start_sec), rcvd, sent); /*printf("recv_pkt\n");*/ /*rte_pktmbuf_dump(stdout, pkts[0], 16384);*/ start_sec = cursec(); rcvd = 0; sent = 0; } for (int i = 0; i < recv; i++) { mbuf_free(pkts[i]); } } } printf("Socket ID (%d) is %d. DONE\n", n->core, rte_socket_id()); return NULL; }
static inline size_t get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto) { size_t vlan_offset = 0; if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) { struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1); vlan_offset = sizeof(struct vlan_hdr); *proto = vlan_hdr->eth_proto; if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) { vlan_hdr = vlan_hdr + 1; *proto = vlan_hdr->eth_proto; vlan_offset += sizeof(struct vlan_hdr); } } return vlan_offset; }
/* Add hardware filter */ int vr_dpdk_ethdev_filter_add(struct vr_interface *vif, uint16_t queue_id, unsigned dst_ip, unsigned mpls_label) { struct vr_dpdk_ethdev *ethdev = (struct vr_dpdk_ethdev *)vif->vif_os; uint8_t port_id = ethdev->ethdev_port_id; struct rte_fdir_filter filter; int ret; /* accept 2-byte labels only */ if (mpls_label > 0xffff) return -EINVAL; if (queue_id > VR_DPDK_MAX_NB_RX_QUEUES) return -EINVAL; memset(&filter, 0, sizeof(filter)); filter.iptype = RTE_FDIR_IPTYPE_IPV4; filter.l4type = RTE_FDIR_L4TYPE_UDP; filter.ip_dst.ipv4_addr = dst_ip; filter.port_dst = rte_cpu_to_be_16((uint16_t)VR_MPLS_OVER_UDP_DST_PORT); filter.flex_bytes = rte_cpu_to_be_16((uint16_t)mpls_label); RTE_LOG(DEBUG, VROUTER, "%s: ip_dst=0x%x port_dst=%d flex_bytes=%d\n", __func__, (unsigned)dst_ip, (unsigned)VR_MPLS_OVER_UDP_DST_PORT, (unsigned)mpls_label); if (queue_id >= 0xFF) { RTE_LOG(ERR, VROUTER, " error adding perfect filter for eth device %" PRIu8 ": queue ID %" PRIu16 " is out of range\n", port_id, queue_id); return -EINVAL; } ret = rte_eth_dev_fdir_add_perfect_filter(port_id, &filter, (uint16_t)mpls_label, (uint8_t)queue_id, 0); if (ret == 0) ethdev->ethdev_queue_states[queue_id] = VR_DPDK_QUEUE_FILTERING_STATE; return ret; }
static struct rte_mbuf *build_ip_packet(const char *src_ip, const char *dst_ip, uint16_t data) { struct rte_mempool *mp = pg_get_mempool(); struct rte_mbuf *pkt = rte_pktmbuf_alloc(mp); uint16_t len = sizeof(struct ether_hdr) + sizeof(struct ip) + sizeof(uint16_t); struct ether_hdr *eth; struct ip *ip; uint16_t *payload_ip; pkt->pkt_len = len; pkt->data_len = len; pkt->nb_segs = 1; pkt->next = NULL; /* ethernet header */ eth = rte_pktmbuf_mtod(pkt, struct ether_hdr*); memset(eth, 0, len); eth->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4); /* ipv4 header */ ip = (struct ip *)(eth + 1); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(struct ip) >> 2; ip->ip_off = 0; ip->ip_ttl = 64; /* FEAR ! this is CHAOS ! */ ip->ip_p = 16; ip->ip_len = htons(sizeof(struct ip) + 1); ip->ip_src.s_addr = inet_addr(src_ip); ip->ip_dst.s_addr = inet_addr(dst_ip); /* write some data */ payload_ip = (uint16_t *)(ip + 1); *payload_ip = data; return pkt; }
static struct rte_mbuf *build_non_ip_packet(void) { struct rte_mempool *mp = pg_get_mempool(); struct rte_mbuf *pkt = rte_pktmbuf_alloc(mp); uint8_t *payload; struct ether_hdr *eth; uint16_t len = sizeof(struct ether_hdr) + 1; pkt->pkt_len = len; pkt->data_len = len; pkt->nb_segs = 1; pkt->next = NULL; /* ethernet header */ eth = rte_pktmbuf_mtod(pkt, struct ether_hdr*); memset(eth, 0, len); eth->ether_type = rte_cpu_to_be_16(ETHER_TYPE_ARP); /* write some data */ payload = (uint8_t *)(eth + 1); *payload = 42; return pkt; }
/* * The function is used to insert or update VLAN tag; * the firmware has state of the firmware tag to insert per TxQ * (controlled by option descriptors), hence, if the tag of the * packet to be sent is different from one remembered by the firmware, * the function will update it */ static unsigned int sfc_efx_tx_maybe_insert_tag(struct sfc_efx_txq *txq, struct rte_mbuf *m, efx_desc_t **pend) { uint16_t this_tag = ((m->ol_flags & PKT_TX_VLAN_PKT) ? m->vlan_tci : 0); if (this_tag == txq->hw_vlan_tci) return 0; /* * The expression inside SFC_ASSERT() is not desired to be checked in * a non-debug build because it might be too expensive on the data path */ SFC_ASSERT(efx_nic_cfg_get(txq->evq->sa->nic)->enc_hw_tx_insert_vlan_enabled); efx_tx_qdesc_vlantci_create(txq->common, rte_cpu_to_be_16(this_tag), *pend); (*pend)++; txq->hw_vlan_tci = this_tag; return 1; }
void app_main_loop_rx_flow(void) { const unsigned lcore_id = rte_lcore_id(); struct rte_mbuf *bufs[RX_BURST_SIZE]; struct rte_mbuf *buf; struct ether_hdr *eth_hdr; struct ipv4_hdr *ipv4_hdr; struct ipv6_hdr *ipv6_hdr; struct tcp_hdr *tcp_hdr; struct udp_hdr *udp_hdr; struct pkt_info pktinfo; int32_t ret; uint16_t i, n_rx, queueid; uint8_t port; port = 0; queueid = (uint16_t) app.lcore_conf[lcore_id].queue_id; RTE_LOG(INFO, FLOWATCHER, "[core %u] packet RX & update flow_table Ready\n", lcore_id); while (!app_quit_signal) { n_rx = rte_eth_rx_burst(port, queueid, bufs, RX_BURST_SIZE); if (unlikely(n_rx == 0)) { port++; if (port >= app.n_ports) port = 0; continue; } app_stat[queueid].rx_count += n_rx; for (i = 0; i < n_rx; i++) { buf = bufs[i]; pktinfo.timestamp = rte_rdtsc(); pktinfo.pktlen = rte_pktmbuf_pkt_len(buf); eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); /* strip vlan_hdr */ if (eth_hdr->ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) { /* struct vlan_hdr *vh = (struct vlan_hdr *) ð_hdr[1]; */ /* buf->ol_flags |= PKT_RX_VLAN_PKT; */ /* buf->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci); */ /* memmove(rte_pktmbuf_adj(buf, sizeof(struct vlan_hdr)), */ /* eth_hdr, 2 * ETHER_ADDR_LEN); */ /* eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); */ eth_hdr = (struct ether_hdr *) rte_pktmbuf_adj(buf, sizeof(struct vlan_hdr)); } if (eth_hdr->ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { /* IPv4 */ pktinfo.type = PKT_IP_TYPE_IPV4; ipv4_hdr = (struct ipv4_hdr *) ð_hdr[1]; pktinfo.key.v4.src_ip = rte_be_to_cpu_32(ipv4_hdr->src_addr); pktinfo.key.v4.dst_ip = rte_be_to_cpu_32(ipv4_hdr->dst_addr); pktinfo.key.v4.proto = ipv4_hdr->next_proto_id; switch (ipv4_hdr->next_proto_id) { case IPPROTO_TCP: tcp_hdr = (struct tcp_hdr *) &ipv4_hdr[1]; pktinfo.key.v4.src_port = rte_be_to_cpu_16(tcp_hdr->src_port); pktinfo.key.v4.dst_port = rte_be_to_cpu_16(tcp_hdr->dst_port); break; case IPPROTO_UDP: udp_hdr = (struct udp_hdr *) &ipv4_hdr[1]; pktinfo.key.v4.src_port = rte_be_to_cpu_16(udp_hdr->src_port); pktinfo.key.v4.dst_port = rte_be_to_cpu_16(udp_hdr->dst_port); break; default: pktinfo.key.v4.src_port = 0; pktinfo.key.v4.dst_port = 0; break; } rte_pktmbuf_free(buf); /* update flow_table_v4 */ ret = update_flow_entry(app.flow_table_v4[queueid], &pktinfo); if (ret == 0) app_stat[queueid].updated_tbl_v4_count++; else app_stat[queueid].miss_updated_tbl_v4_count++; } else if (eth_hdr->ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) { /* IPv6 */ pktinfo.type = PKT_IP_TYPE_IPV6; ipv6_hdr = (struct ipv6_hdr *) ð_hdr[1]; rte_memcpy(pktinfo.key.v6.src_ip, ipv6_hdr->src_addr, 16); rte_memcpy(pktinfo.key.v6.dst_ip, ipv6_hdr->dst_addr, 16); pktinfo.key.v6.proto = ipv6_hdr->proto; switch (ipv6_hdr->proto) { case IPPROTO_TCP: tcp_hdr = (struct tcp_hdr *) &ipv6_hdr[1]; pktinfo.key.v6.src_port = rte_be_to_cpu_16(tcp_hdr->src_port); pktinfo.key.v6.dst_port = rte_be_to_cpu_16(tcp_hdr->dst_port); break; case IPPROTO_UDP: udp_hdr = (struct udp_hdr *) &ipv6_hdr[1]; pktinfo.key.v6.src_port = rte_be_to_cpu_16(udp_hdr->src_port); pktinfo.key.v6.dst_port = rte_be_to_cpu_16(udp_hdr->dst_port); break; default: pktinfo.key.v6.src_port = 0; pktinfo.key.v6.dst_port = 0; break; } rte_pktmbuf_free(buf); /* update flow_table_v6 */ ret = update_flow_entry(app.flow_table_v6[queueid], &pktinfo); if (ret == 0) app_stat[queueid].updated_tbl_v6_count++; else app_stat[queueid].miss_updated_tbl_v6_count++; } else { /* others */ app_stat[queueid].unknown_pkt_count++; rte_pktmbuf_free(buf); continue; } } port++; if (port >= app.n_ports) port = 0; } RTE_LOG(INFO, FLOWATCHER, "[core %u] packet RX & update flow_table finished\n", lcore_id); }
static int neigh_update(struct nlmsghdr *nlh) { int err; int len = nlh->nlmsg_len; uint32_t index; char *pifname; char if_name[IF_NAMESIZE]; char buf[512] = {0}; struct ndmsg *ndm; struct rtattr *rta; struct rtattr *tb[NDA_MAX+1]; struct nda_cacheinfo *ci = NULL; struct msg_hdr *hdr; struct arp_add *arp_add; struct arp_del *arp_del; struct route_add *rt_add; struct route_del *rt_del; len -= NLMSG_LENGTH(sizeof(*ndm)); if (len < 0) return -1; ndm = NLMSG_DATA(nlh); hdr = (struct msg_hdr *)buf; if (ndm->ndm_type != RTN_UNICAST) return 0; if (AF_INET != ndm->ndm_family && AF_INET6 != ndm->ndm_family) { fastpath_log_debug("family %d error.\n", ndm->ndm_family); return 0; } index = get_port_map(ndm->ndm_ifindex); if (index >= ROUTE_MAX_LINK) { fastpath_log_debug("ifidx %d not concerned\n", ndm->ndm_ifindex); return 0; } pifname = if_indextoname(ndm->ndm_ifindex, if_name); if (pifname == NULL) { fastpath_log_error("%s:get if name by ifindex:%d err\n", __func__, ndm->ndm_ifindex); return -EIO; } rta = (struct rtattr*)((char*)ndm + NLMSG_ALIGN(sizeof(struct ndmsg))); rtattr_parse(tb, NDA_MAX, rta, len); if (NULL == tb[NDA_DST]) { fastpath_log_error( "nda dst is null.\n"); return -EINVAL; } if (NULL != tb[NDA_CACHEINFO]) { ci = RTA_DATA(tb[NDA_CACHEINFO]); } fastpath_log_debug( "%s: neigh update, family %d, ifidx %d, eif%d, state 0x%02x\n", __func__, ndm->ndm_family, ndm->ndm_ifindex, index, ndm->ndm_state); if (ndm->ndm_state & NUD_FAILED || (ci && (ci->ndm_refcnt == 0))) { hdr->cmd = ROUTE_MSG_DEL_NEIGH; arp_del = (struct arp_del *)hdr->data; arp_del->nh_iface = rte_cpu_to_be_32(index); memcpy(&arp_del->nh_ip, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST])); err = route_send(hdr); if (err != 0) { fastpath_log_error( "neigh_update: send neigh failed\n", __func__); } hdr->cmd = ROUTE_MSG_DEL_NH; rt_del = (struct route_del *)hdr->data; memcpy(&rt_del->ip, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST])); rt_del->depth = 32; err = route_send(hdr); if (err != 0) { fastpath_log_error( "neigh_update: send nh failed\n"); } } else /* if (ndm->ndm_state & (NUD_REACHABLE | NUD_PERMANENT)) */ { hdr->cmd = ROUTE_MSG_ADD_NEIGH; arp_add = (struct arp_add *)hdr->data; arp_add->nh_iface = rte_cpu_to_be_32(index); memcpy(&arp_add->nh_ip, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST])); arp_add->type = rte_cpu_to_be_16(NEIGH_TYPE_REACHABLE); if (NULL != tb[NDA_LLADDR]) { memcpy(&arp_add->nh_arp, (char*)RTA_DATA(tb[NDA_LLADDR]), RTA_PAYLOAD(tb[NDA_LLADDR])); } err = route_send(hdr); if (err != 0) { fastpath_log_error( "neigh_update: send neigh failed\n", __func__); } hdr->cmd = ROUTE_MSG_ADD_NH; rt_add = (struct route_add *)hdr->data; memcpy(&rt_add->ip, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST])); rt_add->depth = 32; memcpy(&rt_add->nh_ip, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST])); rt_add->nh_iface = rte_cpu_to_be_32(index); err = route_send(hdr); if (err != 0) { fastpath_log_error( "neigh_update: send nh failed\n"); } } #if 0 else {
static int vr_usocket_bind(struct vr_usocket *usockp) { int error = 0; struct sockaddr_in sin; struct sockaddr_un sun; struct sockaddr *addr = NULL; socklen_t addrlen = 0; int optval; bool server; optval = 1; RTE_LOG(DEBUG, USOCK, "%s[%lx]: FD %d setting option\n", __func__, pthread_self(), usockp->usock_fd); if (setsockopt(usockp->usock_fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval))) return -errno; switch (usockp->usock_type) { case TCP: sin.sin_family = AF_INET; sin.sin_port = rte_cpu_to_be_16(VR_NETLINK_TCP_PORT); sin.sin_addr.s_addr = INADDR_ANY; addr = (struct sockaddr *)&sin; addrlen = sizeof(sin); server = true; break; case UNIX: sun.sun_family = AF_UNIX; memset(sun.sun_path, 0, sizeof(sun.sun_path)); strncpy(sun.sun_path, VR_NETLINK_UNIX_FILE, sizeof(sun.sun_path) - 1); addr = (struct sockaddr *)&sun; addrlen = sizeof(sun); server = true; mkdir(VR_SOCKET_DIR, VR_SOCKET_DIR_MODE); unlink(sun.sun_path); break; case RAW: sun.sun_family = AF_UNIX; memset(sun.sun_path, 0, sizeof(sun.sun_path)); strncpy(sun.sun_path, VR_PACKET_UNIX_FILE, sizeof(sun.sun_path) - 1); addr = (struct sockaddr *)&sun; addrlen = sizeof(sun); server = false; mkdir(VR_SOCKET_DIR, VR_SOCKET_DIR_MODE); unlink(sun.sun_path); break; default: return -EINVAL; } #ifdef VR_DPDK_USOCK_DUMP RTE_LOG(DEBUG, USOCK, "%s[%lx]: FD %d binding\n", __func__, pthread_self(), usockp->usock_fd); rte_hexdump(stdout, "usock address dump:", addr, addrlen); #endif error = bind(usockp->usock_fd, addr, addrlen); if (error < 0) return error; if (server) { RTE_LOG(DEBUG, USOCK, "%s[%lx]: FD %d listening\n", __func__, pthread_self(), usockp->usock_fd); error = listen(usockp->usock_fd, 1); if (error < 0) return error; usockp->usock_state = LISTENING; } return 0; }
/* * Reassemble fragments into one packet. */ struct rte_mbuf * ipv6_frag_reassemble(const struct ip_frag_pkt *fp) { struct ipv6_hdr *ip_hdr; struct ipv6_extension_fragment *frag_hdr; struct rte_mbuf *m, *prev; uint32_t i, n, ofs, first_len; uint32_t last_len, move_len, payload_len; first_len = fp->frags[IP_FIRST_FRAG_IDX].len; n = fp->last_idx - 1; /*start from the last fragment. */ m = fp->frags[IP_LAST_FRAG_IDX].mb; ofs = fp->frags[IP_LAST_FRAG_IDX].ofs; last_len = fp->frags[IP_LAST_FRAG_IDX].len; payload_len = ofs + last_len; while (ofs != first_len) { prev = m; for (i = n; i != IP_FIRST_FRAG_IDX && ofs != first_len; i--) { /* previous fragment found. */ if (fp->frags[i].ofs + fp->frags[i].len == ofs) { ip_frag_chain(fp->frags[i].mb, m); /* update our last fragment and offset. */ m = fp->frags[i].mb; ofs = fp->frags[i].ofs; } } /* error - hole in the packet. */ if (m == prev) { return NULL; } } /* chain with the first fragment. */ ip_frag_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m); m = fp->frags[IP_FIRST_FRAG_IDX].mb; /* update mbuf fields for reassembled packet. */ m->ol_flags |= PKT_TX_IP_CKSUM; /* update ipv6 header for the reassembled datagram */ ip_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len); ip_hdr->payload_len = rte_cpu_to_be_16(payload_len); /* * remove fragmentation header. note that per RFC2460, we need to update * the last non-fragmentable header with the "next header" field to contain * type of the first fragmentable header, but we currently don't support * other headers, so we assume there are no other headers and thus update * the main IPv6 header instead. */ move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr); frag_hdr = (struct ipv6_extension_fragment *) (ip_hdr + 1); ip_hdr->proto = frag_hdr->next_header; ip_frag_memmove(rte_pktmbuf_mtod_offset(m, char *, sizeof(*frag_hdr)), rte_pktmbuf_mtod(m, char*), move_len); rte_pktmbuf_adj(m, sizeof(*frag_hdr)); return m; }
/* * Reassemble fragments into one packet. */ struct rte_mbuf * ipv4_frag_reassemble(struct ip_frag_pkt *fp) { struct ipv4_hdr *ip_hdr; struct rte_mbuf *m, *prev; uint32_t i, n, ofs, first_len; uint32_t curr_idx = 0; first_len = fp->frags[IP_FIRST_FRAG_IDX].len; n = fp->last_idx - 1; /*start from the last fragment. */ m = fp->frags[IP_LAST_FRAG_IDX].mb; ofs = fp->frags[IP_LAST_FRAG_IDX].ofs; curr_idx = IP_LAST_FRAG_IDX; while (ofs != first_len) { prev = m; for (i = n; i != IP_FIRST_FRAG_IDX && ofs != first_len; i--) { /* previous fragment found. */ if(fp->frags[i].ofs + fp->frags[i].len == ofs) { /* adjust start of the last fragment data. */ rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); rte_pktmbuf_chain(fp->frags[i].mb, m); /* this mbuf should not be accessed directly */ fp->frags[curr_idx].mb = NULL; curr_idx = i; /* update our last fragment and offset. */ m = fp->frags[i].mb; ofs = fp->frags[i].ofs; } } /* error - hole in the packet. */ if (m == prev) { return NULL; } } /* chain with the first fragment. */ rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m); m = fp->frags[IP_FIRST_FRAG_IDX].mb; /* update mbuf fields for reassembled packet. */ m->ol_flags |= PKT_TX_IP_CKSUM; /* update ipv4 header for the reassmebled packet */ ip_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); ip_hdr->total_length = rte_cpu_to_be_16((uint16_t)(fp->total_size + m->l3_len)); ip_hdr->fragment_offset = (uint16_t)(ip_hdr->fragment_offset & rte_cpu_to_be_16(IPV4_HDR_DF_FLAG)); ip_hdr->hdr_checksum = 0; return m; }
static int rx_thread(void * param) { struct port_configure * pconf = NULL; //(struct port_configure *) param; struct rte_ring* ring_in = NULL; //pconf->ring_in; struct rte_ring* ring_out = NULL; //pconf->ring_out; uint8_t port_id = 0; //pconf->portid; uint16_t nb_rx_pkts; struct rte_mbuf *m; struct ether_hdr *eth_hdr; struct arp_hdr *arp_hdr; int32_t i, j, k, ret; struct rte_mbuf *pkts[MAX_PKTS_BURST]; int32_t ether_type; struct ipv4_hdr *ip_hdr; struct icmp_hdr* icmp_hdr; struct udp_hdr* udp_hdr; while (!quit_signal) { for (k = 0; k < num_config_port; ++k) { pconf = &ports_conf[k]; ring_in = pconf->ring_in; ring_out = pconf->ring_out; port_id = pconf->portid; nb_rx_pkts = rte_eth_rx_burst(port_id, 0, pkts, MAX_PKTS_BURST); if (unlikely(nb_rx_pkts == 0)) { continue; } for (i = 0; i < nb_rx_pkts; ++i) { m = pkts[i]; eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); ether_type = eth_hdr->ether_type; if (unlikely(rte_cpu_to_be_16(ETHER_TYPE_ARP) == ether_type)) { arp_hdr = (struct arp_hdr *) ((char *) (eth_hdr + 1)); if (arp_hdr->arp_op == rte_cpu_to_be_16(ARP_OP_REQUEST)) { if (arp_hdr->arp_data.arp_tip == (pconf->ip)) { arp_hdr->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY); ether_addr_copy(ð_hdr->s_addr, ð_hdr->d_addr); ether_addr_copy(&pconf->addr, ð_hdr->s_addr); ether_addr_copy(&arp_hdr->arp_data.arp_sha, &arp_hdr->arp_data.arp_tha); arp_hdr->arp_data.arp_tip = arp_hdr->arp_data.arp_sip; ether_addr_copy(&pconf->addr, &arp_hdr->arp_data.arp_sha); arp_hdr->arp_data.arp_sip = (pconf->ip); MMENQUEUE(ring_out, m); } else { MMENQUEUE(ring_arp_request, m); } } else if (arp_hdr->arp_op == rte_cpu_to_be_16(ARP_OP_REPLY)) { MMENQUEUE(ring_arp_reply, m); } else { rte_pktmbuf_free(m); } } else if (likely( rte_cpu_to_be_16(ETHER_TYPE_IPv4) == ether_type)) { ip_hdr = (struct ipv4_hdr *) ((char *) (eth_hdr + 1)); switch (ip_hdr->next_proto_id) { case IPPROTO_ICMP: //printf("nhan ban tin ping\n"); icmp_hdr = (struct icmp_hdr *) ((unsigned char *) ip_hdr + sizeof(struct ipv4_hdr)); if (unlikely( wrapsum( checksum(icmp_hdr, (m->data_len - sizeof(struct ether_hdr) - sizeof(struct ipv4_hdr)), 0)))) { printf("ICMP check sum error\n"); rte_pktmbuf_free(m); break; } if (unlikely( icmp_hdr->icmp_type == IP_ICMP_ECHO_REQUEST)) { if (ntohl(ip_hdr->dst_addr) == INADDR_BROADCAST) { rte_pktmbuf_free(m); } else { icmp_hdr->icmp_type = IP_ICMP_ECHO_REPLY; icmp_hdr->icmp_cksum = 0; icmp_hdr->icmp_cksum = wrapsum( checksum(icmp_hdr, (m->data_len - sizeof(struct ether_hdr) - sizeof(struct ipv4_hdr)), 0)); inetAddrSwap(&ip_hdr->src_addr, &ip_hdr->dst_addr); ip_hdr->packet_id = htons( ntohs(ip_hdr->packet_id) + m->data_len); ip_hdr->hdr_checksum = 0; ip_hdr->hdr_checksum = wrapsum( checksum(ip_hdr, sizeof(struct ipv4_hdr), 0)); ethAddrSwap(ð_hdr->d_addr, ð_hdr->s_addr); MMENQUEUE(ring_out, m); } } else { rte_pktmbuf_free(m); } break; case IPPROTO_UDP: MMENQUEUE(ring_in, m) ; break; default: rte_pktmbuf_free(m); } } else { rte_pktmbuf_free(m); } } } } return 0; }
static inline void fdir_filter_add(uint8_t port_id, const char *addr, enum rte_eth_fdir_behavior behavior, uint32_t soft_id) { struct rte_eth_fdir_filter entry; uint32_t fdir_ip_addr; int ret = 0; ret = rte_eth_dev_filter_supported(port_id, RTE_ETH_FILTER_FDIR); if (ret < 0) { printf("flow director is not supported on port %u.\n", port_id); return; } memset(&entry, 0, sizeof(struct rte_eth_fdir_filter)); ret = inet_pton(AF_INET, addr, &fdir_ip_addr); if (ret <= 0) { if (ret == 0) { printf("Error: %s is not in presentation format\n", addr); return; } else if (ret == -1) { perror("inet_pton"); return; } } //printf("%d\n", behavior); //printf("%s, %u\n", addr, fdir_ip_addr); entry.input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_TCP; //entry.input.flow_type = RTE_ETH_FLOW_IPV4; entry.input.flow.ip4_flow.dst_ip = fdir_ip_addr; //entry.input.flow.udp4_flow.src_port = rte_cpu_to_be_16(TCP_PORT); //entry.input.flow.udp4_flow.dst_port = rte_cpu_to_be_16(TCP_PORT); entry.input.flow.udp4_flow.dst_port = rte_cpu_to_be_16(0); entry.input.flow_ext.is_vf = 0; entry.action.behavior = behavior; entry.action.flex_off = 0; entry.action.report_status = RTE_ETH_FDIR_REPORT_ID; if (behavior == RTE_ETH_FDIR_ACCEPT) entry.action.rx_queue = PKT_ACCEPT_QUEUE; else entry.action.rx_queue = PKT_DROP_QUEUE; entry.soft_id = soft_id; ret = rte_eth_dev_filter_ctrl(port_id, RTE_ETH_FILTER_FDIR, RTE_ETH_FILTER_ADD, &entry); if (ret < 0) printf("flow director programming error: (%s)\n", strerror(-ret)); entry.soft_id = soft_id + 100; entry.input.flow.udp4_flow.dst_port = rte_cpu_to_be_16(0x1); ret = rte_eth_dev_filter_ctrl(port_id, RTE_ETH_FILTER_FDIR, RTE_ETH_FILTER_ADD, &entry); if (ret < 0) printf("flow director programming error: (%s)\n", strerror(-ret)); }
int process_arp(struct lls_config *lls_conf, struct gatekeeper_if *iface, uint16_t tx_queue, struct rte_mbuf *buf, struct ether_hdr *eth_hdr, struct arp_hdr *arp_hdr) { struct ipaddr addr = { .proto = ETHER_TYPE_IPv4, .ip.v4.s_addr = arp_hdr->arp_data.arp_sip, }; struct lls_mod_req mod_req; uint16_t pkt_len = rte_pktmbuf_data_len(buf); /* pkt_in_skip_l2() already called by LLS. */ size_t l2_len = pkt_in_l2_hdr_len(buf); int ret; if (pkt_len < l2_len + sizeof(*arp_hdr)) { LLS_LOG(ERR, "%s interface received ARP packet of size %hu bytes, but it should be at least %zu bytes\n", iface->name, pkt_len, l2_len + sizeof(*arp_hdr)); return -1; } ret = verify_l2_hdr(iface, eth_hdr, buf->l2_type, "ARP"); if (ret < 0) return ret; if (unlikely(arp_hdr->arp_hrd != rte_cpu_to_be_16(ARP_HRD_ETHER) || arp_hdr->arp_pro != rte_cpu_to_be_16(ETHER_TYPE_IPv4) || arp_hdr->arp_hln != ETHER_ADDR_LEN || arp_hdr->arp_pln != sizeof(struct in_addr))) return -1; /* If sip is not in the same subnet as our IP address, drop. */ if (!ipv4_in_subnet(iface, &addr)) return -1; /* Update cache with source resolution, regardless of operation. */ mod_req.cache = &lls_conf->arp_cache; mod_req.addr = addr; ether_addr_copy(&arp_hdr->arp_data.arp_sha, &mod_req.ha); mod_req.port_id = iface->id; mod_req.ts = time(NULL); RTE_VERIFY(mod_req.ts >= 0); lls_process_mod(lls_conf, &mod_req); /* * If it's a Gratuitous ARP or if the target address * is not us, then no response is needed. */ if (is_garp_pkt(arp_hdr) || (iface->ip4_addr.s_addr != arp_hdr->arp_data.arp_tip)) return -1; switch (rte_be_to_cpu_16(arp_hdr->arp_op)) { case ARP_OP_REQUEST: { uint16_t num_tx; /* * We are reusing the frame, but an ARP reply always goes out * the same interface that received it. Therefore, the L2 * space of the frame is the same. If needed, the correct * VLAN tag was set in verify_l2_hdr(). */ /* Set-up Ethernet header. */ ether_addr_copy(ð_hdr->s_addr, ð_hdr->d_addr); ether_addr_copy(&iface->eth_addr, ð_hdr->s_addr); /* Set-up ARP header. */ arp_hdr->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY); ether_addr_copy(&arp_hdr->arp_data.arp_sha, &arp_hdr->arp_data.arp_tha); arp_hdr->arp_data.arp_tip = arp_hdr->arp_data.arp_sip; ether_addr_copy(&iface->eth_addr, &arp_hdr->arp_data.arp_sha); arp_hdr->arp_data.arp_sip = iface->ip4_addr.s_addr; /* Need to transmit reply. */ num_tx = rte_eth_tx_burst(iface->id, tx_queue, &buf, 1); if (unlikely(num_tx != 1)) { LLS_LOG(NOTICE, "ARP reply failed\n"); return -1; } return 0; } case ARP_OP_REPLY: /* * No further action required. Could check to make sure * arp_hdr->arp_data.arp_tha is equal to arp->ether_addr, * but there's nothing that can be done if it's wrong anyway. */ return -1; default: LLS_LOG(NOTICE, "%s received an ARP packet with an unknown operation (%hu)\n", __func__, rte_be_to_cpu_16(arp_hdr->arp_op)); return -1; } }
/** * IPv4 fragmentation. * * This function implements the fragmentation of IPv4 packets. * * @param pkt_in * The input packet. * @param pkts_out * Array storing the output fragments. * @param mtu_size * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4 * datagrams. This value includes the size of the IPv4 header. * @param pool_direct * MBUF pool used for allocating direct buffers for the output fragments. * @param pool_indirect * MBUF pool used for allocating indirect buffers for the output fragments. * @return * Upon successful completion - number of output fragments placed * in the pkts_out array. * Otherwise - (-1) * <errno>. */ int32_t rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, struct rte_mbuf **pkts_out, uint16_t nb_pkts_out, uint16_t mtu_size, struct rte_mempool *pool_direct, struct rte_mempool *pool_indirect) { struct rte_mbuf *in_seg = NULL; struct ipv4_hdr *in_hdr; uint32_t out_pkt_pos, in_seg_data_pos; uint32_t more_in_segs; uint16_t fragment_offset, flag_offset, frag_size; frag_size = (uint16_t)(mtu_size - sizeof(struct ipv4_hdr)); /* Fragment size should be a multiply of 8. */ IP_FRAG_ASSERT((frag_size & IPV4_HDR_FO_MASK) == 0); in_hdr = rte_pktmbuf_mtod(pkt_in, struct ipv4_hdr *); flag_offset = rte_cpu_to_be_16(in_hdr->fragment_offset); /* If Don't Fragment flag is set */ if (unlikely ((flag_offset & IPV4_HDR_DF_MASK) != 0)) return -ENOTSUP; /* Check that pkts_out is big enough to hold all fragments */ if (unlikely(frag_size * nb_pkts_out < (uint16_t)(pkt_in->pkt_len - sizeof (struct ipv4_hdr)))) return -EINVAL; in_seg = pkt_in; in_seg_data_pos = sizeof(struct ipv4_hdr); out_pkt_pos = 0; fragment_offset = 0; more_in_segs = 1; while (likely(more_in_segs)) { struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL; uint32_t more_out_segs; struct ipv4_hdr *out_hdr; /* Allocate direct buffer */ out_pkt = rte_pktmbuf_alloc(pool_direct); if (unlikely(out_pkt == NULL)) { __free_fragments(pkts_out, out_pkt_pos); return -ENOMEM; } /* Reserve space for the IP header that will be built later */ out_pkt->data_len = sizeof(struct ipv4_hdr); out_pkt->pkt_len = sizeof(struct ipv4_hdr); out_seg_prev = out_pkt; more_out_segs = 1; while (likely(more_out_segs && more_in_segs)) { struct rte_mbuf *out_seg = NULL; uint32_t len; /* Allocate indirect buffer */ out_seg = rte_pktmbuf_alloc(pool_indirect); if (unlikely(out_seg == NULL)) { rte_pktmbuf_free(out_pkt); __free_fragments(pkts_out, out_pkt_pos); return -ENOMEM; } out_seg_prev->next = out_seg; out_seg_prev = out_seg; /* Prepare indirect buffer */ rte_pktmbuf_attach(out_seg, in_seg); len = mtu_size - out_pkt->pkt_len; if (len > (in_seg->data_len - in_seg_data_pos)) { len = in_seg->data_len - in_seg_data_pos; } out_seg->data_off = in_seg->data_off + in_seg_data_pos; out_seg->data_len = (uint16_t)len; out_pkt->pkt_len = (uint16_t)(len + out_pkt->pkt_len); out_pkt->nb_segs += 1; in_seg_data_pos += len; /* Current output packet (i.e. fragment) done ? */ if (unlikely(out_pkt->pkt_len >= mtu_size)) more_out_segs = 0; /* Current input segment done ? */ if (unlikely(in_seg_data_pos == in_seg->data_len)) { in_seg = in_seg->next; in_seg_data_pos = 0; if (unlikely(in_seg == NULL)) more_in_segs = 0; } } /* Build the IP header */ out_hdr = rte_pktmbuf_mtod(out_pkt, struct ipv4_hdr *); __fill_ipv4hdr_frag(out_hdr, in_hdr, (uint16_t)out_pkt->pkt_len, flag_offset, fragment_offset, more_in_segs); fragment_offset = (uint16_t)(fragment_offset + out_pkt->pkt_len - sizeof(struct ipv4_hdr)); out_pkt->ol_flags |= PKT_TX_IP_CKSUM; out_pkt->l3_len = sizeof(struct ipv4_hdr); /* Write the fragment to the output list */ pkts_out[out_pkt_pos] = out_pkt; out_pkt_pos ++; } return out_pkt_pos; }
/* * Receive a burst of packets, lookup for ICMP echo requets, and, if any, * send back ICMP echo replies. */ static void reply_to_icmp_echo_rqsts(struct fwd_stream *fs) { struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; struct rte_mbuf *pkt; struct ether_hdr *eth_h; struct vlan_hdr *vlan_h; struct arp_hdr *arp_h; struct ipv4_hdr *ip_h; struct icmp_hdr *icmp_h; struct ether_addr eth_addr; uint32_t ip_addr; uint16_t nb_rx; uint16_t nb_tx; uint16_t nb_replies; uint16_t eth_type; uint16_t vlan_id; uint16_t arp_op; uint16_t arp_pro; uint8_t i; int l2_len; #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES uint64_t start_tsc; uint64_t end_tsc; uint64_t core_cycles; #endif #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES start_tsc = rte_rdtsc(); #endif /* * First, receive a burst of packets. */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); if (unlikely(nb_rx == 0)) return; #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif fs->rx_packets += nb_rx; nb_replies = 0; for (i = 0; i < nb_rx; i++) { pkt = pkts_burst[i]; eth_h = (struct ether_hdr *) pkt->pkt.data; eth_type = RTE_BE_TO_CPU_16(eth_h->ether_type); l2_len = sizeof(struct ether_hdr); if (verbose_level > 0) { printf("\nPort %d pkt-len=%u nb-segs=%u\n", fs->rx_port, pkt->pkt.pkt_len, pkt->pkt.nb_segs); ether_addr_dump(" ETH: src=", ð_h->s_addr); ether_addr_dump(" dst=", ð_h->d_addr); } if (eth_type == ETHER_TYPE_VLAN) { vlan_h = (struct vlan_hdr *) ((char *)eth_h + sizeof(struct ether_hdr)); l2_len += sizeof(struct vlan_hdr); eth_type = rte_be_to_cpu_16(vlan_h->eth_proto); if (verbose_level > 0) { vlan_id = rte_be_to_cpu_16(vlan_h->vlan_tci) & 0xFFF; printf(" [vlan id=%u]", vlan_id); } } if (verbose_level > 0) { printf(" type=0x%04x\n", eth_type); } /* Reply to ARP requests */ if (eth_type == ETHER_TYPE_ARP) { arp_h = (struct arp_hdr *) ((char *)eth_h + l2_len); arp_op = RTE_BE_TO_CPU_16(arp_h->arp_op); arp_pro = RTE_BE_TO_CPU_16(arp_h->arp_pro); if (verbose_level > 0) { printf(" ARP: hrd=%d proto=0x%04x hln=%d " "pln=%d op=%u (%s)\n", RTE_BE_TO_CPU_16(arp_h->arp_hrd), arp_pro, arp_h->arp_hln, arp_h->arp_pln, arp_op, arp_op_name(arp_op)); } if ((RTE_BE_TO_CPU_16(arp_h->arp_hrd) != ARP_HRD_ETHER) || (arp_pro != ETHER_TYPE_IPv4) || (arp_h->arp_hln != 6) || (arp_h->arp_pln != 4) ) { rte_pktmbuf_free(pkt); if (verbose_level > 0) printf("\n"); continue; } if (verbose_level > 0) { memcpy(ð_addr, arp_h->arp_data.arp_ip.arp_sha, 6); ether_addr_dump(" sha=", ð_addr); memcpy(&ip_addr, arp_h->arp_data.arp_ip.arp_sip, 4); ipv4_addr_dump(" sip=", ip_addr); printf("\n"); memcpy(ð_addr, arp_h->arp_data.arp_ip.arp_tha, 6); ether_addr_dump(" tha=", ð_addr); memcpy(&ip_addr, arp_h->arp_data.arp_ip.arp_tip, 4); ipv4_addr_dump(" tip=", ip_addr); printf("\n"); } if (arp_op != ARP_OP_REQUEST) { rte_pktmbuf_free(pkt); continue; } /* * Build ARP reply. */ /* Use source MAC address as destination MAC address. */ ether_addr_copy(ð_h->s_addr, ð_h->d_addr); /* Set source MAC address with MAC address of TX port */ ether_addr_copy(&ports[fs->tx_port].eth_addr, ð_h->s_addr); arp_h->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY); memcpy(ð_addr, arp_h->arp_data.arp_ip.arp_tha, 6); memcpy(arp_h->arp_data.arp_ip.arp_tha, arp_h->arp_data.arp_ip.arp_sha, 6); memcpy(arp_h->arp_data.arp_ip.arp_sha, ð_h->s_addr, 6); /* Swap IP addresses in ARP payload */ memcpy(&ip_addr, arp_h->arp_data.arp_ip.arp_sip, 4); memcpy(arp_h->arp_data.arp_ip.arp_sip, arp_h->arp_data.arp_ip.arp_tip, 4); memcpy(arp_h->arp_data.arp_ip.arp_tip, &ip_addr, 4); pkts_burst[nb_replies++] = pkt; continue; } if (eth_type != ETHER_TYPE_IPv4) { rte_pktmbuf_free(pkt); continue; } ip_h = (struct ipv4_hdr *) ((char *)eth_h + l2_len); if (verbose_level > 0) { ipv4_addr_dump(" IPV4: src=", ip_h->src_addr); ipv4_addr_dump(" dst=", ip_h->dst_addr); printf(" proto=%d (%s)\n", ip_h->next_proto_id, ip_proto_name(ip_h->next_proto_id)); } /* * Check if packet is a ICMP echo request. */ icmp_h = (struct icmp_hdr *) ((char *)ip_h + sizeof(struct ipv4_hdr)); if (! ((ip_h->next_proto_id == IPPROTO_ICMP) && (icmp_h->icmp_type == IP_ICMP_ECHO_REQUEST) && (icmp_h->icmp_code == 0))) { rte_pktmbuf_free(pkt); continue; } if (verbose_level > 0) printf(" ICMP: echo request seq id=%d\n", rte_be_to_cpu_16(icmp_h->icmp_seq_nb)); /* * Prepare ICMP echo reply to be sent back. * - switch ethernet source and destinations addresses, * - switch IPv4 source and destinations addresses, * - set IP_ICMP_ECHO_REPLY in ICMP header. * No need to re-compute the IP header checksum. * Reset ICMP checksum. */ ether_addr_copy(ð_h->s_addr, ð_addr); ether_addr_copy(ð_h->d_addr, ð_h->s_addr); ether_addr_copy(ð_addr, ð_h->d_addr); ip_addr = ip_h->src_addr; ip_h->src_addr = ip_h->dst_addr; ip_h->dst_addr = ip_addr; icmp_h->icmp_type = IP_ICMP_ECHO_REPLY; icmp_h->icmp_cksum = 0; pkts_burst[nb_replies++] = pkt; } /* Send back ICMP echo replies, if any. */ if (nb_replies > 0) { nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_replies); fs->tx_packets += nb_tx; #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; #endif if (unlikely(nb_tx < nb_replies)) { fs->fwd_dropped += (nb_replies - nb_tx); do { rte_pktmbuf_free(pkts_burst[nb_tx]); } while (++nb_tx < nb_replies); } } #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES end_tsc = rte_rdtsc(); core_cycles = (end_tsc - start_tsc); fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); #endif }
static int vhost_bdev_scsi_inquiry_command(struct vhost_block_dev *bdev, struct vhost_scsi_task *task) { int hlen = 0; uint32_t alloc_len = 0; uint16_t len = 0; uint16_t *temp16; int pc; int pd; int evpd; int i; uint8_t *buf; struct scsi_cdb_inquiry *inq; inq = (struct scsi_cdb_inquiry *)task->req->cdb; assert(task->iovs_cnt == 1); /* At least 36Bytes for inquiry command */ if (task->data_len < 0x24) goto inq_error; pd = SPC_PERIPHERAL_DEVICE_TYPE_DISK; pc = inq->page_code; evpd = inq->evpd & 0x1; if (!evpd && pc) goto inq_error; if (evpd) { struct scsi_vpd_page *vpage = (struct scsi_vpd_page *) task->iovs[0].iov_base; /* PERIPHERAL QUALIFIER(7-5) PERIPHERAL DEVICE TYPE(4-0) */ vpage->peripheral = pd; /* PAGE CODE */ vpage->page_code = pc; switch (pc) { case SPC_VPD_SUPPORTED_VPD_PAGES: hlen = 4; vpage->params[0] = SPC_VPD_SUPPORTED_VPD_PAGES; vpage->params[1] = SPC_VPD_UNIT_SERIAL_NUMBER; vpage->params[2] = SPC_VPD_DEVICE_IDENTIFICATION; len = 3; /* PAGE LENGTH */ vpage->alloc_len = rte_cpu_to_be_16(len); break; case SPC_VPD_UNIT_SERIAL_NUMBER: hlen = 4; strncpy((char *)vpage->params, bdev->name, 32); vpage->alloc_len = rte_cpu_to_be_16(32); break; case SPC_VPD_DEVICE_IDENTIFICATION: buf = vpage->params; struct scsi_desig_desc *desig; hlen = 4; /* NAA designator */ desig = (struct scsi_desig_desc *)buf; desig->code_set = SPC_VPD_CODE_SET_BINARY; desig->protocol_id = SPC_PROTOCOL_IDENTIFIER_ISCSI; desig->type = SPC_VPD_IDENTIFIER_TYPE_NAA; desig->association = SPC_VPD_ASSOCIATION_LOGICAL_UNIT; desig->reserved0 = 0; desig->piv = 1; desig->reserved1 = 0; desig->len = 8; vhost_bdev_scsi_set_naa_ieee_extended(bdev->name, desig->desig); len = sizeof(struct scsi_desig_desc) + 8; buf += sizeof(struct scsi_desig_desc) + desig->len; /* T10 Vendor ID designator */ desig = (struct scsi_desig_desc *)buf; desig->code_set = SPC_VPD_CODE_SET_ASCII; desig->protocol_id = SPC_PROTOCOL_IDENTIFIER_ISCSI; desig->type = SPC_VPD_IDENTIFIER_TYPE_T10_VENDOR_ID; desig->association = SPC_VPD_ASSOCIATION_LOGICAL_UNIT; desig->reserved0 = 0; desig->piv = 1; desig->reserved1 = 0; desig->len = 8 + 16 + 32; strncpy((char *)desig->desig, "INTEL", 8); vhost_strcpy_pad((char *)&desig->desig[8], bdev->product_name, 16, ' '); strncpy((char *)&desig->desig[24], bdev->name, 32); len += sizeof(struct scsi_desig_desc) + 8 + 16 + 32; buf += sizeof(struct scsi_desig_desc) + desig->len; /* SCSI Device Name designator */ desig = (struct scsi_desig_desc *)buf; desig->code_set = SPC_VPD_CODE_SET_UTF8; desig->protocol_id = SPC_PROTOCOL_IDENTIFIER_ISCSI; desig->type = SPC_VPD_IDENTIFIER_TYPE_SCSI_NAME; desig->association = SPC_VPD_ASSOCIATION_TARGET_DEVICE; desig->reserved0 = 0; desig->piv = 1; desig->reserved1 = 0; desig->len = snprintf((char *)desig->desig, 255, "%s", bdev->name); len += sizeof(struct scsi_desig_desc) + desig->len; buf += sizeof(struct scsi_desig_desc) + desig->len; vpage->alloc_len = rte_cpu_to_be_16(len); break; default: goto inq_error; } } else { struct scsi_cdb_inquiry_data *inqdata = (struct scsi_cdb_inquiry_data *)task->iovs[0].iov_base; /* Standard INQUIRY data */ /* PERIPHERAL QUALIFIER(7-5) PERIPHERAL DEVICE TYPE(4-0) */ inqdata->peripheral = pd; /* RMB(7) */ inqdata->rmb = 0; /* VERSION */ /* See SPC3/SBC2/MMC4/SAM2 for more details */ inqdata->version = SPC_VERSION_SPC3; /* NORMACA(5) HISUP(4) RESPONSE DATA FORMAT(3-0) */ /* format 2 */ /* hierarchical support */ inqdata->response = 2 | 1 << 4; hlen = 5; /* SCCS(7) ACC(6) TPGS(5-4) 3PC(3) PROTECT(0) */ /* Not support TPGS */ inqdata->flags = 0; /* MULTIP */ inqdata->flags2 = 0x10; /* WBUS16(5) SYNC(4) LINKED(3) CMDQUE(1) VS(0) */ /* CMDQUE */ inqdata->flags3 = 0x2; /* T10 VENDOR IDENTIFICATION */ strncpy((char *)inqdata->t10_vendor_id, "INTEL", 8); /* PRODUCT IDENTIFICATION */ strncpy((char *)inqdata->product_id, bdev->product_name, 16); /* PRODUCT REVISION LEVEL */ strncpy((char *)inqdata->product_rev, "0001", 4); /* Standard inquiry data ends here. Only populate * remaining fields if alloc_len indicates enough * space to hold it. */ len = INQ_OFFSET(product_rev) - 5; if (alloc_len >= INQ_OFFSET(vendor)) { /* Vendor specific */ memset(inqdata->vendor, 0x20, 20); len += sizeof(inqdata->vendor); } if (alloc_len >= INQ_OFFSET(ius)) { /* CLOCKING(3-2) QAS(1) IUS(0) */ inqdata->ius = 0; len += sizeof(inqdata->ius); } if (alloc_len >= INQ_OFFSET(reserved)) { /* Reserved */ inqdata->reserved = 0; len += sizeof(inqdata->reserved); } /* VERSION DESCRIPTOR 1-8 */ if (alloc_len >= INQ_OFFSET(reserved) + 2) { temp16 = (uint16_t *)&inqdata->desc[0]; *temp16 = rte_cpu_to_be_16(0x0960); len += 2; } if (alloc_len >= INQ_OFFSET(reserved) + 4) { /* SPC-3 (no version claimed) */ temp16 = (uint16_t *)&inqdata->desc[2]; *temp16 = rte_cpu_to_be_16(0x0300); len += 2; } if (alloc_len >= INQ_OFFSET(reserved) + 6) { /* SBC-2 (no version claimed) */ temp16 = (uint16_t *)&inqdata->desc[4]; *temp16 = rte_cpu_to_be_16(0x0320); len += 2; } if (alloc_len >= INQ_OFFSET(reserved) + 8) { /* SAM-2 (no version claimed) */ temp16 = (uint16_t *)&inqdata->desc[6]; *temp16 = rte_cpu_to_be_16(0x0040); len += 2; } if (alloc_len > INQ_OFFSET(reserved) + 8) { i = alloc_len - (INQ_OFFSET(reserved) + 8); if (i > 30) i = 30; memset(&inqdata->desc[8], 0, i); len += i; } /* ADDITIONAL LENGTH */ inqdata->add_len = len; } /* STATUS GOOD */ scsi_task_set_status(task, SCSI_STATUS_GOOD, 0, 0, 0); return hlen + len; inq_error: scsi_task_set_status(task, SCSI_STATUS_CHECK_CONDITION, SCSI_SENSE_ILLEGAL_REQUEST, SCSI_ASC_INVALID_FIELD_IN_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); return 0; }
static int app_pipeline_fc_key_convert(struct pipeline_fc_key *key_in, uint8_t *key_out, uint32_t *signature) { uint8_t buffer[PIPELINE_FC_FLOW_KEY_MAX_SIZE]; void *key_buffer = (key_out) ? key_out : buffer; switch (key_in->type) { case FLOW_KEY_QINQ: { struct pkt_key_qinq *qinq = key_buffer; qinq->ethertype_svlan = 0; qinq->svlan = rte_cpu_to_be_16(key_in->key.qinq.svlan); qinq->ethertype_cvlan = 0; qinq->cvlan = rte_cpu_to_be_16(key_in->key.qinq.cvlan); if (signature) *signature = (uint32_t) hash_default_key8(qinq, 8, 0); return 0; } case FLOW_KEY_IPV4_5TUPLE: { struct pkt_key_ipv4_5tuple *ipv4 = key_buffer; ipv4->ttl = 0; ipv4->proto = key_in->key.ipv4_5tuple.proto; ipv4->checksum = 0; ipv4->ip_src = rte_cpu_to_be_32(key_in->key.ipv4_5tuple.ip_src); ipv4->ip_dst = rte_cpu_to_be_32(key_in->key.ipv4_5tuple.ip_dst); ipv4->port_src = rte_cpu_to_be_16(key_in->key.ipv4_5tuple.port_src); ipv4->port_dst = rte_cpu_to_be_16(key_in->key.ipv4_5tuple.port_dst); if (signature) *signature = (uint32_t) hash_default_key16(ipv4, 16, 0); return 0; } case FLOW_KEY_IPV6_5TUPLE: { struct pkt_key_ipv6_5tuple *ipv6 = key_buffer; memset(ipv6, 0, 64); ipv6->payload_length = 0; ipv6->proto = key_in->key.ipv6_5tuple.proto; ipv6->hop_limit = 0; memcpy(&ipv6->ip_src, &key_in->key.ipv6_5tuple.ip_src, 16); memcpy(&ipv6->ip_dst, &key_in->key.ipv6_5tuple.ip_dst, 16); ipv6->port_src = rte_cpu_to_be_16(key_in->key.ipv6_5tuple.port_src); ipv6->port_dst = rte_cpu_to_be_16(key_in->key.ipv6_5tuple.port_dst); if (signature) *signature = (uint32_t) hash_default_key64(ipv6, 64, 0); return 0; } default: return -1; } }
/* * This function learns the MAC address of the device and set init * L2 header and L3 header info. */ int vxlan_link(struct vhost_dev *vdev, struct rte_mbuf *m) { int i, ret; struct ether_hdr *pkt_hdr; struct virtio_net *dev = vdev->dev; uint64_t portid = dev->device_fh; struct ipv4_hdr *ip; struct rte_eth_tunnel_filter_conf tunnel_filter_conf; if (unlikely(portid > VXLAN_N_PORTS)) { RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") WARNING: Not configuring device," "as already have %d ports for VXLAN.", dev->device_fh, VXLAN_N_PORTS); return -1; } /* Learn MAC address of guest device from packet */ pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); if (is_same_ether_addr(&(pkt_hdr->s_addr), &vdev->mac_address)) { RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") WARNING: This device is using an existing" " MAC address and has not been registered.\n", dev->device_fh); return -1; } for (i = 0; i < ETHER_ADDR_LEN; i++) { vdev->mac_address.addr_bytes[i] = vxdev.port[portid].vport_mac.addr_bytes[i] = pkt_hdr->s_addr.addr_bytes[i]; vxdev.port[portid].peer_mac.addr_bytes[i] = peer_mac[i]; } memset(&tunnel_filter_conf, 0, sizeof(struct rte_eth_tunnel_filter_conf)); ether_addr_copy(&ports_eth_addr[0], &tunnel_filter_conf.outer_mac); tunnel_filter_conf.filter_type = tep_filter_type[filter_idx]; /* inner MAC */ ether_addr_copy(&vdev->mac_address, &tunnel_filter_conf.inner_mac); tunnel_filter_conf.queue_id = vdev->rx_q; tunnel_filter_conf.tenant_id = tenant_id_conf[vdev->rx_q]; if (tep_filter_type[filter_idx] == RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID) tunnel_filter_conf.inner_vlan = INNER_VLAN_ID; tunnel_filter_conf.tunnel_type = RTE_TUNNEL_TYPE_VXLAN; ret = rte_eth_dev_filter_ctrl(ports[0], RTE_ETH_FILTER_TUNNEL, RTE_ETH_FILTER_ADD, &tunnel_filter_conf); if (ret) { RTE_LOG(ERR, VHOST_DATA, "%d Failed to add device MAC address to cloud filter\n", vdev->rx_q); return -1; } /* Print out inner MAC and VNI info. */ RTE_LOG(INFO, VHOST_DATA, "(%d) MAC_ADDRESS %02x:%02x:%02x:%02x:%02x:%02x and VNI %d registered\n", vdev->rx_q, vdev->mac_address.addr_bytes[0], vdev->mac_address.addr_bytes[1], vdev->mac_address.addr_bytes[2], vdev->mac_address.addr_bytes[3], vdev->mac_address.addr_bytes[4], vdev->mac_address.addr_bytes[5], tenant_id_conf[vdev->rx_q]); vxdev.port[portid].vport_id = portid; for (i = 0; i < 4; i++) { /* Local VTEP IP */ vxdev.port_ip |= vxlan_multicast_ips[portid][i] << (8 * i); /* Remote VTEP IP */ vxdev.port[portid].peer_ip |= vxlan_overlay_ips[portid][i] << (8 * i); } vxdev.out_key = tenant_id_conf[vdev->rx_q]; ether_addr_copy(&vxdev.port[portid].peer_mac, &app_l2_hdr[portid].d_addr); ether_addr_copy(&ports_eth_addr[0], &app_l2_hdr[portid].s_addr); app_l2_hdr[portid].ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4); ip = &app_ip_hdr[portid]; ip->version_ihl = IP_VHL_DEF; ip->type_of_service = 0; ip->total_length = 0; ip->packet_id = 0; ip->fragment_offset = IP_DN_FRAGMENT_FLAG; ip->time_to_live = IP_DEFTTL; ip->next_proto_id = IPPROTO_UDP; ip->hdr_checksum = 0; ip->src_addr = vxdev.port_ip; ip->dst_addr = vxdev.port[portid].peer_ip; /* Set device as ready for RX. */ vdev->ready = DEVICE_RX; return 0; }
static int paxos_rx_process(struct rte_mbuf *pkt, struct proposer* proposer) { int ret = 0; uint8_t l4_proto = 0; uint16_t outer_header_len; union tunnel_offload_info info = { .data = 0 }; struct udp_hdr *udp_hdr; struct paxos_hdr *paxos_hdr; struct ether_hdr *phdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); parse_ethernet(phdr, &info, &l4_proto); if (l4_proto != IPPROTO_UDP) return -1; udp_hdr = (struct udp_hdr *)((char *)phdr + info.outer_l2_len + info.outer_l3_len); /* if UDP dst port is not either PROPOSER or LEARNER port */ if (!(udp_hdr->dst_port == rte_cpu_to_be_16(PROPOSER_PORT) || udp_hdr->dst_port == rte_cpu_to_be_16(LEARNER_PORT)) && (pkt->packet_type & RTE_PTYPE_TUNNEL_MASK) == 0) return -1; paxos_hdr = (struct paxos_hdr *)((char *)udp_hdr + sizeof(struct udp_hdr)); if (rte_get_log_level() == RTE_LOG_DEBUG) { //rte_hexdump(stdout, "udp", udp_hdr, sizeof(struct udp_hdr)); //rte_hexdump(stdout, "paxos", paxos_hdr, sizeof(struct paxos_hdr)); print_paxos_hdr(paxos_hdr); } int value_len = rte_be_to_cpu_16(paxos_hdr->value_len); struct paxos_value *v = paxos_value_new((char *)paxos_hdr->paxosval, value_len); switch(rte_be_to_cpu_16(paxos_hdr->msgtype)) { case PAXOS_PROMISE: { struct paxos_promise promise = { .iid = rte_be_to_cpu_32(paxos_hdr->inst), .ballot = rte_be_to_cpu_16(paxos_hdr->rnd), .value_ballot = rte_be_to_cpu_16(paxos_hdr->vrnd), .aid = rte_be_to_cpu_16(paxos_hdr->acptid), .value = *v }; proposer_handle_promise(proposer, &promise); break; } case PAXOS_ACCEPT: { if (first_time) { proposer_preexecute(proposer); first_time = false; } struct paxos_accept acpt = { .iid = rte_be_to_cpu_32(paxos_hdr->inst), .ballot = rte_be_to_cpu_16(paxos_hdr->rnd), .value_ballot = rte_be_to_cpu_16(paxos_hdr->vrnd), .aid = rte_be_to_cpu_16(paxos_hdr->acptid), .value = *v }; proposer_handle_accept(proposer, &acpt); break; } case PAXOS_ACCEPTED: { struct paxos_accepted ack = { .iid = rte_be_to_cpu_32(paxos_hdr->inst), .ballot = rte_be_to_cpu_16(paxos_hdr->rnd), .value_ballot = rte_be_to_cpu_16(paxos_hdr->vrnd), .aid = rte_be_to_cpu_16(paxos_hdr->acptid), .value = *v }; proposer_handle_accepted(proposer, &ack); break; } default: break; } outer_header_len = info.outer_l2_len + info.outer_l3_len + sizeof(struct udp_hdr) + sizeof(struct paxos_hdr); rte_pktmbuf_adj(pkt, outer_header_len); return ret; } static uint16_t add_timestamps(uint8_t port __rte_unused, uint16_t qidx __rte_unused, struct rte_mbuf **pkts, uint16_t nb_pkts, uint16_t max_pkts __rte_unused, void *user_param) { struct proposer* proposer = (struct proposer *)user_param; unsigned i; uint64_t now = rte_rdtsc(); for (i = 0; i < nb_pkts; i++) { pkts[i]->udata64 = now; paxos_rx_process(pkts[i], proposer); } return nb_pkts; } static inline int port_init(uint8_t port, struct rte_mempool *mbuf_pool, struct proposer* proposer) { struct rte_eth_dev_info dev_info; struct rte_eth_txconf *txconf; struct rte_eth_rxconf *rxconf; struct rte_eth_conf port_conf = port_conf_default; const uint16_t rx_rings = 1, tx_rings = 1; int retval; uint16_t q; rte_eth_dev_info_get(port, &dev_info); rxconf = &dev_info.default_rxconf; txconf = &dev_info.default_txconf; txconf->txq_flags &= PKT_TX_IPV4; txconf->txq_flags &= PKT_TX_UDP_CKSUM; if (port >= rte_eth_dev_count()) return -1; retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); if (retval != 0) return retval; for (q = 0; q < rx_rings; q++) { retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE, rte_eth_dev_socket_id(port), rxconf, mbuf_pool); if (retval < 0) return retval; } for (q = 0; q < tx_rings; q++) { retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE, rte_eth_dev_socket_id(port), txconf); if (retval < 0) return retval; } retval = rte_eth_dev_start(port); if (retval < 0) return retval; struct ether_addr addr; rte_eth_macaddr_get(port, &addr); rte_eth_promiscuous_enable(port); rte_eth_add_rx_callback(port, 0, add_timestamps, proposer); rte_eth_add_tx_callback(port, 0, calc_latency, NULL); return 0; } static void lcore_main(uint8_t port, __rte_unused struct proposer *p) { proposer_preexecute(p); for (;;) { // Check if signal is received if (force_quit) break; struct rte_mbuf *bufs[BURST_SIZE]; const uint16_t nb_rx = rte_eth_rx_burst(port, 0, bufs, BURST_SIZE); if (unlikely(nb_rx == 0)) continue; uint16_t buf; for (buf = 0; buf < nb_rx; buf++) rte_pktmbuf_free(bufs[buf]); } } static __attribute__((noreturn)) int lcore_mainloop(__attribute__((unused)) void *arg) { uint64_t prev_tsc = 0, cur_tsc, diff_tsc; unsigned lcore_id; lcore_id = rte_lcore_id(); rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_TIMER, "Starting mainloop on core %u\n", lcore_id); while(1) { cur_tsc = rte_rdtsc(); diff_tsc = cur_tsc - prev_tsc; if (diff_tsc > TIMER_RESOLUTION_CYCLES) { rte_timer_manage(); prev_tsc = cur_tsc; } } } static void report_stat(struct rte_timer *tim, __attribute((unused)) void *arg) { /* print stat */ uint32_t count = rte_atomic32_read(&stat); rte_log(RTE_LOG_INFO, RTE_LOGTYPE_USER8, "Throughput = %8u msg/s\n", count); /* reset stat */ rte_atomic32_set(&stat, 0); /* this timer is automatically reloaded until we decide to stop it */ if (force_quit) rte_timer_stop(tim); } static void check_timeout(struct rte_timer *tim, void *arg) { struct proposer* p = (struct proposer *) arg; unsigned lcore_id = rte_lcore_id(); rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER8, "%s() on lcore_id %i\n", __func__, lcore_id); struct paxos_message out; out.type = PAXOS_PREPARE; struct timeout_iterator* iter = proposer_timeout_iterator(p); while(timeout_iterator_prepare(iter, &out.u.prepare)) { rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER8, "%s Send PREPARE inst %d ballot %d\n", __func__, out.u.prepare.iid, out.u.prepare.ballot); send_paxos_message(&out); } out.type = PAXOS_ACCEPT; while(timeout_iterator_accept(iter, &out.u.accept)) { rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER8, "%s: Send ACCEPT inst %d ballot %d\n", __func__, out.u.prepare.iid, out.u.prepare.ballot); send_paxos_message(&out); } timeout_iterator_free(iter); /* this timer is automatically reloaded until we decide to stop it */ if (force_quit) rte_timer_stop(tim); } int main(int argc, char *argv[]) { uint8_t portid = 0; unsigned master_core, lcore_id; signal(SIGTERM, signal_handler); signal(SIGINT, signal_handler); force_quit = false; int proposer_id = 0; if (rte_get_log_level() == RTE_LOG_DEBUG) { paxos_config.verbosity = PAXOS_LOG_DEBUG; } struct proposer *proposer = proposer_new(proposer_id, NUM_ACCEPTORS); first_time = true; /* init EAL */ int ret = rte_eal_init(argc, argv); if (ret < 0) rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); /* init timer structure */ rte_timer_init(&timer); rte_timer_init(&stat_timer); /* load deliver_timer, every 1 s, on a slave lcore, reloaded automatically */ uint64_t hz = rte_get_timer_hz(); /* Call rte_timer_manage every 10ms */ TIMER_RESOLUTION_CYCLES = hz / 100; rte_log(RTE_LOG_INFO, RTE_LOGTYPE_USER1, "Clock: %"PRIu64"\n", hz); /* master core */ master_core = rte_lcore_id(); /* slave core */ lcore_id = rte_get_next_lcore(master_core, 0, 1); rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER1, "lcore_id: %d\n", lcore_id); rte_timer_reset(&timer, hz, PERIODICAL, lcore_id, check_timeout, proposer); /* reset timer */ rte_eal_remote_launch(lcore_mainloop, NULL, lcore_id); /* stat core */ lcore_id = rte_get_next_lcore(lcore_id , 0, 1); rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER1, "lcore_id: %d\n", lcore_id); rte_timer_reset(&stat_timer, hz, PERIODICAL, lcore_id, report_stat, NULL); /* init RTE timer library */ rte_timer_subsystem_init(); mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS, MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); if (mbuf_pool == NULL) rte_exit(EXIT_FAILURE, "Cannot create mbuf_pool\n"); /* reset timer */ rte_eal_remote_launch(lcore_mainloop, NULL, lcore_id); if (port_init(portid, mbuf_pool, proposer) != 0) rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n", portid); lcore_main(portid, proposer); rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER8, "Free proposer\n"); proposer_free(proposer); return 0; }