/* byteswap to cpu or network order */ static void bswap_test_data(struct ipv4_7tuple *data, int len, int to_be) { int i; for (i = 0; i < len; i++) { if (to_be) { /* swap all bytes so that they are in network order */ data[i].ip_dst = rte_cpu_to_be_32(data[i].ip_dst); data[i].ip_src = rte_cpu_to_be_32(data[i].ip_src); data[i].port_dst = rte_cpu_to_be_16(data[i].port_dst); data[i].port_src = rte_cpu_to_be_16(data[i].port_src); data[i].vlan = rte_cpu_to_be_16(data[i].vlan); data[i].domain = rte_cpu_to_be_16(data[i].domain); } else { data[i].ip_dst = rte_be_to_cpu_32(data[i].ip_dst); data[i].ip_src = rte_be_to_cpu_32(data[i].ip_src); data[i].port_dst = rte_be_to_cpu_16(data[i].port_dst); data[i].port_src = rte_be_to_cpu_16(data[i].port_src); data[i].vlan = rte_be_to_cpu_16(data[i].vlan); data[i].domain = rte_be_to_cpu_16(data[i].domain); } } }
static void process_ipv6(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt) { /* Assume there is no ethernet header */ struct ipv6_hdr *pkt_hdr = rte_pktmbuf_mtod(pkt, struct ipv6_hdr *); struct ipv6_extension_fragment *frag_hdr; uint16_t frag_data = 0; frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(pkt_hdr); if (frag_hdr != NULL) frag_data = rte_be_to_cpu_16(frag_hdr->frag_data); /* If it is a fragmented packet, then try to reassemble */ if ((frag_data & RTE_IPV6_FRAG_USED_MASK) == 0) p->tx_buf[p->tx_buf_count++] = pkt; else { struct rte_mbuf *mo; struct rte_ip_frag_tbl *tbl = p->frag_tbl; struct rte_ip_frag_death_row *dr = &p->death_row; pkt->l3_len = sizeof(*pkt_hdr) + sizeof(*frag_hdr); /* Process this fragment */ mo = rte_ipv6_frag_reassemble_packet(tbl, dr, pkt, rte_rdtsc(), pkt_hdr, frag_hdr); if (mo != NULL) p->tx_buf[p->tx_buf_count++] = mo; rte_ip_frag_free_death_row(&p->death_row, 3); } }
int arp_input(struct rte_mbuf *m) { struct arp_hdr *arph; int rc; arph = rte_pktmbuf_mtod(m, struct arp_hdr *); if (ust_ip_addr != rte_be_to_cpu_32(arph->arp_data.arp_tip)) { goto out; } switch (rte_be_to_cpu_16(arph->arp_op)) { default: rc = -EINVAL; break; case ARP_OP_REQUEST: rc = process_request(arph); break; case ARP_OP_REPLY: break; } out: rte_pktmbuf_free(m); return rc; }
static void process_ipv4(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt) { /* Assume there is no ethernet header */ struct ipv4_hdr *pkt_hdr = rte_pktmbuf_mtod(pkt, struct ipv4_hdr *); /* Get "More fragments" flag and fragment offset */ uint16_t frag_field = rte_be_to_cpu_16(pkt_hdr->fragment_offset); uint16_t frag_offset = (uint16_t)(frag_field & IPV4_HDR_OFFSET_MASK); uint16_t frag_flag = (uint16_t)(frag_field & IPV4_HDR_MF_FLAG); /* If it is a fragmented packet, then try to reassemble */ if ((frag_flag == 0) && (frag_offset == 0)) p->tx_buf[p->tx_buf_count++] = pkt; else { struct rte_mbuf *mo; struct rte_ip_frag_tbl *tbl = p->frag_tbl; struct rte_ip_frag_death_row *dr = &p->death_row; pkt->l3_len = sizeof(*pkt_hdr); /* Process this fragment */ mo = rte_ipv4_frag_reassemble_packet(tbl, dr, pkt, rte_rdtsc(), pkt_hdr); if (mo != NULL) p->tx_buf[p->tx_buf_count++] = mo; rte_ip_frag_free_death_row(&p->death_row, 3); } }
/* Copy Flow Director filter to a VIC ipv4 filter (for Cisco VICs * without advanced filter support. */ void copy_fltr_v1(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, __rte_unused struct rte_eth_fdir_masks *masks) { fltr->type = FILTER_IPV4_5TUPLE; fltr->u.ipv4.src_addr = rte_be_to_cpu_32( input->flow.ip4_flow.src_ip); fltr->u.ipv4.dst_addr = rte_be_to_cpu_32( input->flow.ip4_flow.dst_ip); fltr->u.ipv4.src_port = rte_be_to_cpu_16( input->flow.udp4_flow.src_port); fltr->u.ipv4.dst_port = rte_be_to_cpu_16( input->flow.udp4_flow.dst_port); if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_TCP) fltr->u.ipv4.protocol = PROTO_TCP; else fltr->u.ipv4.protocol = PROTO_UDP; fltr->u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE; }
/* * Upper layer processing for a received Ethernet packet. */ void ether_demux(struct ifnet *ifp, struct rte_mbuf *m) { struct ether_hdr *eh; int i, isr; u_short ether_type; //KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__)); eh = rte_pktmbuf_mtod(m, struct ether_hdr *); ether_type = rte_be_to_cpu_16(eh->ether_type); rte_pktmbuf_adj(m, ETHER_HDR_LEN); /* * Dispatch frame to upper layer. */ switch (ether_type) { case ETHER_TYPE_IPv4: isr = NETISR_IP; break; case ETHER_TYPE_ARP: isr = NETISR_ARP; break; #ifdef INET6 case ETHER_TYPE_IPv6: isr = NETISR_IPV6; break; #endif default: goto discard; } netisr_dispatch(isr, m); return; discard: /* * Packet is to be discarded. If netgraph is present, * hand the packet to it for last chance processing; * otherwise dispose of it. */ rte_pktmbuf_free(m); }
void app_main_loop_rx_flow(void) { const unsigned lcore_id = rte_lcore_id(); struct rte_mbuf *bufs[RX_BURST_SIZE]; struct rte_mbuf *buf; struct ether_hdr *eth_hdr; struct ipv4_hdr *ipv4_hdr; struct ipv6_hdr *ipv6_hdr; struct tcp_hdr *tcp_hdr; struct udp_hdr *udp_hdr; struct pkt_info pktinfo; int32_t ret; uint16_t i, n_rx, queueid; uint8_t port; port = 0; queueid = (uint16_t) app.lcore_conf[lcore_id].queue_id; RTE_LOG(INFO, FLOWATCHER, "[core %u] packet RX & update flow_table Ready\n", lcore_id); while (!app_quit_signal) { n_rx = rte_eth_rx_burst(port, queueid, bufs, RX_BURST_SIZE); if (unlikely(n_rx == 0)) { port++; if (port >= app.n_ports) port = 0; continue; } app_stat[queueid].rx_count += n_rx; for (i = 0; i < n_rx; i++) { buf = bufs[i]; pktinfo.timestamp = rte_rdtsc(); pktinfo.pktlen = rte_pktmbuf_pkt_len(buf); eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); /* strip vlan_hdr */ if (eth_hdr->ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) { /* struct vlan_hdr *vh = (struct vlan_hdr *) ð_hdr[1]; */ /* buf->ol_flags |= PKT_RX_VLAN_PKT; */ /* buf->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci); */ /* memmove(rte_pktmbuf_adj(buf, sizeof(struct vlan_hdr)), */ /* eth_hdr, 2 * ETHER_ADDR_LEN); */ /* eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); */ eth_hdr = (struct ether_hdr *) rte_pktmbuf_adj(buf, sizeof(struct vlan_hdr)); } if (eth_hdr->ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { /* IPv4 */ pktinfo.type = PKT_IP_TYPE_IPV4; ipv4_hdr = (struct ipv4_hdr *) ð_hdr[1]; pktinfo.key.v4.src_ip = rte_be_to_cpu_32(ipv4_hdr->src_addr); pktinfo.key.v4.dst_ip = rte_be_to_cpu_32(ipv4_hdr->dst_addr); pktinfo.key.v4.proto = ipv4_hdr->next_proto_id; switch (ipv4_hdr->next_proto_id) { case IPPROTO_TCP: tcp_hdr = (struct tcp_hdr *) &ipv4_hdr[1]; pktinfo.key.v4.src_port = rte_be_to_cpu_16(tcp_hdr->src_port); pktinfo.key.v4.dst_port = rte_be_to_cpu_16(tcp_hdr->dst_port); break; case IPPROTO_UDP: udp_hdr = (struct udp_hdr *) &ipv4_hdr[1]; pktinfo.key.v4.src_port = rte_be_to_cpu_16(udp_hdr->src_port); pktinfo.key.v4.dst_port = rte_be_to_cpu_16(udp_hdr->dst_port); break; default: pktinfo.key.v4.src_port = 0; pktinfo.key.v4.dst_port = 0; break; } rte_pktmbuf_free(buf); /* update flow_table_v4 */ ret = update_flow_entry(app.flow_table_v4[queueid], &pktinfo); if (ret == 0) app_stat[queueid].updated_tbl_v4_count++; else app_stat[queueid].miss_updated_tbl_v4_count++; } else if (eth_hdr->ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) { /* IPv6 */ pktinfo.type = PKT_IP_TYPE_IPV6; ipv6_hdr = (struct ipv6_hdr *) ð_hdr[1]; rte_memcpy(pktinfo.key.v6.src_ip, ipv6_hdr->src_addr, 16); rte_memcpy(pktinfo.key.v6.dst_ip, ipv6_hdr->dst_addr, 16); pktinfo.key.v6.proto = ipv6_hdr->proto; switch (ipv6_hdr->proto) { case IPPROTO_TCP: tcp_hdr = (struct tcp_hdr *) &ipv6_hdr[1]; pktinfo.key.v6.src_port = rte_be_to_cpu_16(tcp_hdr->src_port); pktinfo.key.v6.dst_port = rte_be_to_cpu_16(tcp_hdr->dst_port); break; case IPPROTO_UDP: udp_hdr = (struct udp_hdr *) &ipv6_hdr[1]; pktinfo.key.v6.src_port = rte_be_to_cpu_16(udp_hdr->src_port); pktinfo.key.v6.dst_port = rte_be_to_cpu_16(udp_hdr->dst_port); break; default: pktinfo.key.v6.src_port = 0; pktinfo.key.v6.dst_port = 0; break; } rte_pktmbuf_free(buf); /* update flow_table_v6 */ ret = update_flow_entry(app.flow_table_v6[queueid], &pktinfo); if (ret == 0) app_stat[queueid].updated_tbl_v6_count++; else app_stat[queueid].miss_updated_tbl_v6_count++; } else { /* others */ app_stat[queueid].unknown_pkt_count++; rte_pktmbuf_free(buf); continue; } } port++; if (port >= app.n_ports) port = 0; } RTE_LOG(INFO, FLOWATCHER, "[core %u] packet RX & update flow_table finished\n", lcore_id); }
struct rte_mbuf * rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, struct ipv6_hdr *ip_hdr, struct ipv6_extension_fragment *frag_hdr) { struct ip_frag_pkt *fp; struct ip_frag_key key; uint16_t ip_len, ip_ofs; rte_memcpy(&key.src_dst[0], ip_hdr->src_addr, 16); rte_memcpy(&key.src_dst[2], ip_hdr->dst_addr, 16); key.id = frag_hdr->id; key.key_len = IPV6_KEYLEN; ip_ofs = FRAG_OFFSET(frag_hdr->frag_data) * 8; /* * as per RFC2460, payload length contains all extension headers as well. * since we don't support anything but frag headers, this is what we remove * from the payload len. */ ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr); IP_FRAG_LOG(DEBUG, "%s:%d:\n" "mbuf: %p, tms: %" PRIu64 ", key: <" IPv6_KEY_BYTES_FMT ", %#x>, ofs: %u, len: %u, flags: %#x\n" "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, " "max_entries: %u, use_entries: %u\n\n", __func__, __LINE__, mb, tms, IPv6_KEY_BYTES(key.src_dst), key.id, ip_ofs, ip_len, frag_hdr->more_frags, tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries, tbl->use_entries); /* try to find/add entry into the fragment's table. */ fp = ip_frag_find(tbl, dr, &key, tms); if (fp == NULL) { IP_FRAG_MBUF2DR(dr, mb); return NULL; } IP_FRAG_LOG(DEBUG, "%s:%d:\n" "tbl: %p, max_entries: %u, use_entries: %u\n" "ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 ", total_size: %u, frag_size: %u, last_idx: %u\n\n", __func__, __LINE__, tbl, tbl->max_entries, tbl->use_entries, fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id, fp->start, fp->total_size, fp->frag_size, fp->last_idx); /* process the fragmented packet. */ mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, MORE_FRAGS(frag_hdr->frag_data)); ip_frag_inuse(tbl, fp); IP_FRAG_LOG(DEBUG, "%s:%d:\n" "mbuf: %p\n" "tbl: %p, max_entries: %u, use_entries: %u\n" "ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 ", total_size: %u, frag_size: %u, last_idx: %u\n\n", __func__, __LINE__, mb, tbl, tbl->max_entries, tbl->use_entries, fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id, fp->start, fp->total_size, fp->frag_size, fp->last_idx); return mb; }
static void log_packet(struct rte_mbuf *m) { char buf[4096]; int offset = 0; int n; uint16_t ether_type; uint8_t ipproto; char *l3_h = NULL; struct ether_hdr *eth_h; struct ipv4_hdr *ipv4_h = NULL; struct ipv6_hdr *ipv6_h = NULL; struct udp_hdr *udp_h = NULL; struct tcp_hdr *tcp_h = NULL; char ip_src_str[INET6_ADDRSTRLEN]; char ip_dst_str[INET6_ADDRSTRLEN]; eth_h = rte_pktmbuf_mtod(m, struct ether_hdr *); ether_format_addr(buf+offset, ETHER_ADDR_FMT_SIZE, ð_h->s_addr); offset += ETHER_ADDR_FMT_SIZE-1; strcpy(buf+offset, " -> "); offset += strlen(" -> "); ether_format_addr(buf+offset, ETHER_ADDR_FMT_SIZE, ð_h->d_addr); offset += ETHER_ADDR_FMT_SIZE-1; strcpy(buf+offset, "\n"); offset += strlen("\n"); ether_type = rte_be_to_cpu_16(eth_h->ether_type); l3_h = (char *)(eth_h + 1); switch (ether_type) { case ETHER_TYPE_ARP: return; case ETHER_TYPE_IPv4: ipv4_h = (struct ipv4_hdr *)l3_h; ipproto = ipv4_h->next_proto_id; inet_ntop(AF_INET, &(ipv4_h->src_addr), ip_src_str, INET6_ADDRSTRLEN); inet_ntop(AF_INET, &(ipv4_h->dst_addr), ip_dst_str, INET6_ADDRSTRLEN); n = snprintf(buf+offset, 4096-offset, " IPV4 %s -> %s (ttl %d, id %d, tlen: %d, offset %d, flags(%s%s))\n", ip_src_str, ip_dst_str, ipv4_h->time_to_live, rte_be_to_cpu_16(ipv4_h->packet_id), rte_be_to_cpu_16(ipv4_h->total_length), (rte_be_to_cpu_16(ipv4_h->fragment_offset) & \ IPV4_HDR_OFFSET_MASK) * IPV4_HDR_OFFSET_UNITS, (rte_be_to_cpu_16(ipv4_h->fragment_offset) & IPV4_HDR_DF_FLAG)? "DF":"", (rte_be_to_cpu_16(ipv4_h->fragment_offset) & IPV4_HDR_MF_FLAG)? "MF":""); offset += n; break; case ETHER_TYPE_IPv6: ipv6_h = (struct ipv6_hdr *)l3_h; ipproto = ipv6_h->proto; inet_ntop(AF_INET6, &(ipv6_h->src_addr), ip_src_str, INET6_ADDRSTRLEN); inet_ntop(AF_INET6, &(ipv6_h->dst_addr), ip_dst_str, INET6_ADDRSTRLEN); n = snprintf(buf+offset, 4096-offset, " IPV6 %s -> %s (ttl %d)\n", ip_src_str, ip_dst_str, ipv6_h->hop_limits); offset += n; break; default: return; } switch (ipproto) { case IPPROTO_UDP: udp_h = (struct udp_hdr *) (l3_h + m->l3_len); snprintf(buf+offset, 4096-offset, " UDP %d -> %d\n", rte_be_to_cpu_16(udp_h->src_port), rte_be_to_cpu_16(udp_h->dst_port)); break; case IPPROTO_TCP: tcp_h = (struct tcp_hdr *) (l3_h + m->l3_len); snprintf(buf+offset, 4096-offset, " TCP %d -> %d\n", rte_be_to_cpu_16(tcp_h->src_port), rte_be_to_cpu_16(tcp_h->dst_port)); default: return; } LOG_RAW(DEBUG, "%s", buf); }
int enic_fdir_add_fltr(struct enic *enic, struct rte_eth_fdir_filter *params) { struct enic_fdir_node *key; struct filter fltr = {0}; int32_t pos; u8 do_free = 0; u16 old_fltr_id = 0; u32 flowtype_supported; u16 flex_bytes; u16 queue; flowtype_supported = ( (RTE_ETH_FLOW_NONFRAG_IPV4_TCP == params->input.flow_type) || (RTE_ETH_FLOW_NONFRAG_IPV4_UDP == params->input.flow_type)); flex_bytes = ((params->input.flow_ext.flexbytes[1] << 8 & 0xFF00) | (params->input.flow_ext.flexbytes[0] & 0xFF)); if (!enic->fdir.hash || (params->input.flow_ext.vlan_tci & 0xFFF) || !flowtype_supported || flex_bytes || params->action.behavior /* drop */) { enic->fdir.stats.f_add++; return -ENOTSUP; } queue = params->action.rx_queue; /* See if the key is already there in the table */ pos = rte_hash_del_key(enic->fdir.hash, params); switch (pos) { case -EINVAL: enic->fdir.stats.f_add++; return -EINVAL; case -ENOENT: /* Add a new classifier entry */ if (!enic->fdir.stats.free) { enic->fdir.stats.f_add++; return -ENOSPC; } key = rte_zmalloc("enic_fdir_node", sizeof(struct enic_fdir_node), 0); if (!key) { enic->fdir.stats.f_add++; return -ENOMEM; } break; default: /* The entry is already present in the table. * Check if there is a change in queue */ key = enic->fdir.nodes[pos]; enic->fdir.nodes[pos] = NULL; if (unlikely(key->rq_index == queue)) { /* Nothing to be done */ enic->fdir.stats.f_add++; pos = rte_hash_add_key(enic->fdir.hash, params); if (pos < 0) { dev_err(enic, "Add hash key failed\n"); return pos; } enic->fdir.nodes[pos] = key; dev_warning(enic, "FDIR rule is already present\n"); return 0; } if (likely(enic->fdir.stats.free)) { /* Add the filter and then delete the old one. * This is to avoid packets from going into the * default queue during the window between * delete and add */ do_free = 1; old_fltr_id = key->fltr_id; } else { /* No free slots in the classifier. * Delete the filter and add the modified one later */ vnic_dev_classifier(enic->vdev, CLSF_DEL, &key->fltr_id, NULL); enic->fdir.stats.free++; } break; } key->filter = *params; key->rq_index = queue; fltr.type = FILTER_IPV4_5TUPLE; fltr.u.ipv4.src_addr = rte_be_to_cpu_32( params->input.flow.ip4_flow.src_ip); fltr.u.ipv4.dst_addr = rte_be_to_cpu_32( params->input.flow.ip4_flow.dst_ip); fltr.u.ipv4.src_port = rte_be_to_cpu_16( params->input.flow.udp4_flow.src_port); fltr.u.ipv4.dst_port = rte_be_to_cpu_16( params->input.flow.udp4_flow.dst_port); if (RTE_ETH_FLOW_NONFRAG_IPV4_TCP == params->input.flow_type) fltr.u.ipv4.protocol = PROTO_TCP; else fltr.u.ipv4.protocol = PROTO_UDP; fltr.u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE; if (!vnic_dev_classifier(enic->vdev, CLSF_ADD, &queue, &fltr)) { key->fltr_id = queue; } else { dev_err(enic, "Add classifier entry failed\n"); enic->fdir.stats.f_add++; rte_free(key); return -1; } if (do_free) vnic_dev_classifier(enic->vdev, CLSF_DEL, &old_fltr_id, NULL); else{ enic->fdir.stats.free--; enic->fdir.stats.add++; } pos = rte_hash_add_key(enic->fdir.hash, params); if (pos < 0) { dev_err(enic, "Add hash key failed\n"); return pos; } enic->fdir.nodes[pos] = key; return 0; }
/* * Process new mbuf with fragment of IPV4 packet. * Incoming mbuf should have it's l2_len/l3_len fields setuped correclty. * @param tbl * Table where to lookup/add the fragmented packet. * @param mb * Incoming mbuf with IPV4 fragment. * @param tms * Fragment arrival timestamp. * @param ip_hdr * Pointer to the IPV4 header inside the fragment. * @return * Pointer to mbuf for reassebled packet, or NULL if: * - an error occured. * - not all fragments of the packet are collected yet. */ struct rte_mbuf * rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, struct ipv4_hdr *ip_hdr) { struct ip_frag_pkt *fp; struct ip_frag_key key; const unaligned_uint64_t *psd; uint16_t ip_len; uint16_t flag_offset, ip_ofs, ip_flag; flag_offset = rte_be_to_cpu_16(ip_hdr->fragment_offset); ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK); ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG); psd = (unaligned_uint64_t *)&ip_hdr->src_addr; /* use first 8 bytes only */ key.src_dst[0] = psd[0]; key.id = ip_hdr->packet_id; key.key_len = IPV4_KEYLEN; ip_ofs *= IPV4_HDR_OFFSET_UNITS; ip_len = (uint16_t)(rte_be_to_cpu_16(ip_hdr->total_length) - mb->l3_len); IP_FRAG_LOG(DEBUG, "%s:%d:\n" "mbuf: %p, tms: %" PRIu64 ", key: <%" PRIx64 ", %#x>, ofs: %u, len: %u, flags: %#x\n" "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, " "max_entries: %u, use_entries: %u\n\n", __func__, __LINE__, mb, tms, key.src_dst[0], key.id, ip_ofs, ip_len, ip_flag, tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries, tbl->use_entries); /* try to find/add entry into the fragment's table. */ if ((fp = ip_frag_find(tbl, dr, &key, tms)) == NULL) { IP_FRAG_MBUF2DR(dr, mb); return NULL; } IP_FRAG_LOG(DEBUG, "%s:%d:\n" "tbl: %p, max_entries: %u, use_entries: %u\n" "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64 ", total_size: %u, frag_size: %u, last_idx: %u\n\n", __func__, __LINE__, tbl, tbl->max_entries, tbl->use_entries, fp, fp->key.src_dst[0], fp->key.id, fp->start, fp->total_size, fp->frag_size, fp->last_idx); /* process the fragmented packet. */ mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, ip_flag); ip_frag_inuse(tbl, fp); IP_FRAG_LOG(DEBUG, "%s:%d:\n" "mbuf: %p\n" "tbl: %p, max_entries: %u, use_entries: %u\n" "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64 ", total_size: %u, frag_size: %u, last_idx: %u\n\n", __func__, __LINE__, mb, tbl, tbl->max_entries, tbl->use_entries, fp, fp->key.src_dst[0], fp->key.id, fp->start, fp->total_size, fp->frag_size, fp->last_idx); return mb; }
/* * Receive a burst of packets, lookup for ICMP echo requets, and, if any, * send back ICMP echo replies. */ static void reply_to_icmp_echo_rqsts(struct fwd_stream *fs) { struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; struct rte_mbuf *pkt; struct ether_hdr *eth_h; struct vlan_hdr *vlan_h; struct arp_hdr *arp_h; struct ipv4_hdr *ip_h; struct icmp_hdr *icmp_h; struct ether_addr eth_addr; uint32_t ip_addr; uint16_t nb_rx; uint16_t nb_tx; uint16_t nb_replies; uint16_t eth_type; uint16_t vlan_id; uint16_t arp_op; uint16_t arp_pro; uint8_t i; int l2_len; #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES uint64_t start_tsc; uint64_t end_tsc; uint64_t core_cycles; #endif #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES start_tsc = rte_rdtsc(); #endif /* * First, receive a burst of packets. */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); if (unlikely(nb_rx == 0)) return; #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif fs->rx_packets += nb_rx; nb_replies = 0; for (i = 0; i < nb_rx; i++) { pkt = pkts_burst[i]; eth_h = (struct ether_hdr *) pkt->pkt.data; eth_type = RTE_BE_TO_CPU_16(eth_h->ether_type); l2_len = sizeof(struct ether_hdr); if (verbose_level > 0) { printf("\nPort %d pkt-len=%u nb-segs=%u\n", fs->rx_port, pkt->pkt.pkt_len, pkt->pkt.nb_segs); ether_addr_dump(" ETH: src=", ð_h->s_addr); ether_addr_dump(" dst=", ð_h->d_addr); } if (eth_type == ETHER_TYPE_VLAN) { vlan_h = (struct vlan_hdr *) ((char *)eth_h + sizeof(struct ether_hdr)); l2_len += sizeof(struct vlan_hdr); eth_type = rte_be_to_cpu_16(vlan_h->eth_proto); if (verbose_level > 0) { vlan_id = rte_be_to_cpu_16(vlan_h->vlan_tci) & 0xFFF; printf(" [vlan id=%u]", vlan_id); } } if (verbose_level > 0) { printf(" type=0x%04x\n", eth_type); } /* Reply to ARP requests */ if (eth_type == ETHER_TYPE_ARP) { arp_h = (struct arp_hdr *) ((char *)eth_h + l2_len); arp_op = RTE_BE_TO_CPU_16(arp_h->arp_op); arp_pro = RTE_BE_TO_CPU_16(arp_h->arp_pro); if (verbose_level > 0) { printf(" ARP: hrd=%d proto=0x%04x hln=%d " "pln=%d op=%u (%s)\n", RTE_BE_TO_CPU_16(arp_h->arp_hrd), arp_pro, arp_h->arp_hln, arp_h->arp_pln, arp_op, arp_op_name(arp_op)); } if ((RTE_BE_TO_CPU_16(arp_h->arp_hrd) != ARP_HRD_ETHER) || (arp_pro != ETHER_TYPE_IPv4) || (arp_h->arp_hln != 6) || (arp_h->arp_pln != 4) ) { rte_pktmbuf_free(pkt); if (verbose_level > 0) printf("\n"); continue; } if (verbose_level > 0) { memcpy(ð_addr, arp_h->arp_data.arp_ip.arp_sha, 6); ether_addr_dump(" sha=", ð_addr); memcpy(&ip_addr, arp_h->arp_data.arp_ip.arp_sip, 4); ipv4_addr_dump(" sip=", ip_addr); printf("\n"); memcpy(ð_addr, arp_h->arp_data.arp_ip.arp_tha, 6); ether_addr_dump(" tha=", ð_addr); memcpy(&ip_addr, arp_h->arp_data.arp_ip.arp_tip, 4); ipv4_addr_dump(" tip=", ip_addr); printf("\n"); } if (arp_op != ARP_OP_REQUEST) { rte_pktmbuf_free(pkt); continue; } /* * Build ARP reply. */ /* Use source MAC address as destination MAC address. */ ether_addr_copy(ð_h->s_addr, ð_h->d_addr); /* Set source MAC address with MAC address of TX port */ ether_addr_copy(&ports[fs->tx_port].eth_addr, ð_h->s_addr); arp_h->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY); memcpy(ð_addr, arp_h->arp_data.arp_ip.arp_tha, 6); memcpy(arp_h->arp_data.arp_ip.arp_tha, arp_h->arp_data.arp_ip.arp_sha, 6); memcpy(arp_h->arp_data.arp_ip.arp_sha, ð_h->s_addr, 6); /* Swap IP addresses in ARP payload */ memcpy(&ip_addr, arp_h->arp_data.arp_ip.arp_sip, 4); memcpy(arp_h->arp_data.arp_ip.arp_sip, arp_h->arp_data.arp_ip.arp_tip, 4); memcpy(arp_h->arp_data.arp_ip.arp_tip, &ip_addr, 4); pkts_burst[nb_replies++] = pkt; continue; } if (eth_type != ETHER_TYPE_IPv4) { rte_pktmbuf_free(pkt); continue; } ip_h = (struct ipv4_hdr *) ((char *)eth_h + l2_len); if (verbose_level > 0) { ipv4_addr_dump(" IPV4: src=", ip_h->src_addr); ipv4_addr_dump(" dst=", ip_h->dst_addr); printf(" proto=%d (%s)\n", ip_h->next_proto_id, ip_proto_name(ip_h->next_proto_id)); } /* * Check if packet is a ICMP echo request. */ icmp_h = (struct icmp_hdr *) ((char *)ip_h + sizeof(struct ipv4_hdr)); if (! ((ip_h->next_proto_id == IPPROTO_ICMP) && (icmp_h->icmp_type == IP_ICMP_ECHO_REQUEST) && (icmp_h->icmp_code == 0))) { rte_pktmbuf_free(pkt); continue; } if (verbose_level > 0) printf(" ICMP: echo request seq id=%d\n", rte_be_to_cpu_16(icmp_h->icmp_seq_nb)); /* * Prepare ICMP echo reply to be sent back. * - switch ethernet source and destinations addresses, * - switch IPv4 source and destinations addresses, * - set IP_ICMP_ECHO_REPLY in ICMP header. * No need to re-compute the IP header checksum. * Reset ICMP checksum. */ ether_addr_copy(ð_h->s_addr, ð_addr); ether_addr_copy(ð_h->d_addr, ð_h->s_addr); ether_addr_copy(ð_addr, ð_h->d_addr); ip_addr = ip_h->src_addr; ip_h->src_addr = ip_h->dst_addr; ip_h->dst_addr = ip_addr; icmp_h->icmp_type = IP_ICMP_ECHO_REPLY; icmp_h->icmp_cksum = 0; pkts_burst[nb_replies++] = pkt; } /* Send back ICMP echo replies, if any. */ if (nb_replies > 0) { nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_replies); fs->tx_packets += nb_tx; #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; #endif if (unlikely(nb_tx < nb_replies)) { fs->fwd_dropped += (nb_replies - nb_tx); do { rte_pktmbuf_free(pkts_burst[nb_tx]); } while (++nb_tx < nb_replies); } } #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES end_tsc = rte_rdtsc(); core_cycles = (end_tsc - start_tsc); fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); #endif }
int process_arp(struct lls_config *lls_conf, struct gatekeeper_if *iface, uint16_t tx_queue, struct rte_mbuf *buf, struct ether_hdr *eth_hdr, struct arp_hdr *arp_hdr) { struct ipaddr addr = { .proto = ETHER_TYPE_IPv4, .ip.v4.s_addr = arp_hdr->arp_data.arp_sip, }; struct lls_mod_req mod_req; uint16_t pkt_len = rte_pktmbuf_data_len(buf); /* pkt_in_skip_l2() already called by LLS. */ size_t l2_len = pkt_in_l2_hdr_len(buf); int ret; if (pkt_len < l2_len + sizeof(*arp_hdr)) { LLS_LOG(ERR, "%s interface received ARP packet of size %hu bytes, but it should be at least %zu bytes\n", iface->name, pkt_len, l2_len + sizeof(*arp_hdr)); return -1; } ret = verify_l2_hdr(iface, eth_hdr, buf->l2_type, "ARP"); if (ret < 0) return ret; if (unlikely(arp_hdr->arp_hrd != rte_cpu_to_be_16(ARP_HRD_ETHER) || arp_hdr->arp_pro != rte_cpu_to_be_16(ETHER_TYPE_IPv4) || arp_hdr->arp_hln != ETHER_ADDR_LEN || arp_hdr->arp_pln != sizeof(struct in_addr))) return -1; /* If sip is not in the same subnet as our IP address, drop. */ if (!ipv4_in_subnet(iface, &addr)) return -1; /* Update cache with source resolution, regardless of operation. */ mod_req.cache = &lls_conf->arp_cache; mod_req.addr = addr; ether_addr_copy(&arp_hdr->arp_data.arp_sha, &mod_req.ha); mod_req.port_id = iface->id; mod_req.ts = time(NULL); RTE_VERIFY(mod_req.ts >= 0); lls_process_mod(lls_conf, &mod_req); /* * If it's a Gratuitous ARP or if the target address * is not us, then no response is needed. */ if (is_garp_pkt(arp_hdr) || (iface->ip4_addr.s_addr != arp_hdr->arp_data.arp_tip)) return -1; switch (rte_be_to_cpu_16(arp_hdr->arp_op)) { case ARP_OP_REQUEST: { uint16_t num_tx; /* * We are reusing the frame, but an ARP reply always goes out * the same interface that received it. Therefore, the L2 * space of the frame is the same. If needed, the correct * VLAN tag was set in verify_l2_hdr(). */ /* Set-up Ethernet header. */ ether_addr_copy(ð_hdr->s_addr, ð_hdr->d_addr); ether_addr_copy(&iface->eth_addr, ð_hdr->s_addr); /* Set-up ARP header. */ arp_hdr->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY); ether_addr_copy(&arp_hdr->arp_data.arp_sha, &arp_hdr->arp_data.arp_tha); arp_hdr->arp_data.arp_tip = arp_hdr->arp_data.arp_sip; ether_addr_copy(&iface->eth_addr, &arp_hdr->arp_data.arp_sha); arp_hdr->arp_data.arp_sip = iface->ip4_addr.s_addr; /* Need to transmit reply. */ num_tx = rte_eth_tx_burst(iface->id, tx_queue, &buf, 1); if (unlikely(num_tx != 1)) { LLS_LOG(NOTICE, "ARP reply failed\n"); return -1; } return 0; } case ARP_OP_REPLY: /* * No further action required. Could check to make sure * arp_hdr->arp_data.arp_tha is equal to arp->ether_addr, * but there's nothing that can be done if it's wrong anyway. */ return -1; default: LLS_LOG(NOTICE, "%s received an ARP packet with an unknown operation (%hu)\n", __func__, rte_be_to_cpu_16(arp_hdr->arp_op)); return -1; } }
* Destination IP host (0.0.0.XXX) defines queue * Values below define offset to each field from start of frame */ #define SUBPORT_OFFSET 7 #define PIPE_OFFSET 9 #define TC_OFFSET 20 #define QUEUE_OFFSET 20 #define COLOR_OFFSET 19 static inline int get_pkt_sched(struct rte_mbuf *m, uint32_t *subport, uint32_t *pipe, uint32_t *traffic_class, uint32_t *queue, uint32_t *color) { uint16_t *pdata = rte_pktmbuf_mtod(m, uint16_t *); *subport = (rte_be_to_cpu_16(pdata[SUBPORT_OFFSET]) & 0x0FFF) & (port_params.n_subports_per_port - 1); /* Outer VLAN ID*/ *pipe = (rte_be_to_cpu_16(pdata[PIPE_OFFSET]) & 0x0FFF) & (port_params.n_pipes_per_subport - 1); /* Inner VLAN ID */ *traffic_class = (pdata[QUEUE_OFFSET] & 0x0F) & (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1); /* Destination IP */ *queue = ((pdata[QUEUE_OFFSET] >> 8) & 0x0F) & (RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS - 1) ; /* Destination IP */ *color = pdata[COLOR_OFFSET] & 0x03; /* Destination IP */ return 0; } void app_rx_thread(struct thread_conf **confs) {
int sfc_efx_tso_do(struct sfc_efx_txq *txq, unsigned int idx, struct rte_mbuf **in_seg, size_t *in_off, efx_desc_t **pend, unsigned int *pkt_descs, size_t *pkt_len) { uint8_t *tsoh; const struct tcp_hdr *th; efsys_dma_addr_t header_paddr; uint16_t packet_id; uint32_t sent_seq; struct rte_mbuf *m = *in_seg; size_t nh_off = m->l2_len; /* IP header offset */ size_t tcph_off = m->l2_len + m->l3_len; /* TCP header offset */ size_t header_len = m->l2_len + m->l3_len + m->l4_len; const efx_nic_cfg_t *encp = efx_nic_cfg_get(txq->evq->sa->nic); idx += SFC_TSO_OPT_DESCS_NUM; /* Packets which have too big headers should be discarded */ if (unlikely(header_len > SFC_TSOH_STD_LEN)) return EMSGSIZE; /* * The TCP header must start at most 208 bytes into the frame. * If it starts later than this then the NIC won't realise * it's a TCP packet and TSO edits won't be applied */ if (unlikely(tcph_off > encp->enc_tx_tso_tcp_header_offset_limit)) return EMSGSIZE; header_paddr = rte_pktmbuf_iova(m); /* * Sometimes headers may be split across multiple mbufs. In such cases * we need to glue those pieces and store them in some temporary place. * Also, packet headers must be contiguous in memory, so that * they can be referred to with a single DMA descriptor. EF10 has no * limitations on address boundaries crossing by DMA descriptor data. */ if (m->data_len < header_len) { tsoh = txq->sw_ring[idx & txq->ptr_mask].tsoh; sfc_tso_prepare_header(tsoh, header_len, in_seg, in_off); header_paddr = rte_malloc_virt2iova((void *)tsoh); } else { if (m->data_len == header_len) { *in_off = 0; *in_seg = m->next; } else { *in_off = header_len; } tsoh = rte_pktmbuf_mtod(m, uint8_t *); } /* Handle IP header */ if (m->ol_flags & PKT_TX_IPV4) { const struct ipv4_hdr *iphe4; iphe4 = (const struct ipv4_hdr *)(tsoh + nh_off); rte_memcpy(&packet_id, &iphe4->packet_id, sizeof(uint16_t)); packet_id = rte_be_to_cpu_16(packet_id); } else if (m->ol_flags & PKT_TX_IPV6) { packet_id = 0; } else { return EINVAL; } /* Handle TCP header */ th = (const struct tcp_hdr *)(tsoh + tcph_off); rte_memcpy(&sent_seq, &th->sent_seq, sizeof(uint32_t)); sent_seq = rte_be_to_cpu_32(sent_seq); efx_tx_qdesc_tso2_create(txq->common, packet_id, 0, sent_seq, m->tso_segsz, *pend, EFX_TX_FATSOV2_OPT_NDESCS); *pend += EFX_TX_FATSOV2_OPT_NDESCS; *pkt_descs += EFX_TX_FATSOV2_OPT_NDESCS; efx_tx_qdesc_dma_create(txq->common, header_paddr, header_len, B_FALSE, (*pend)++); (*pkt_descs)++; *pkt_len -= header_len; return 0; }
static void ieee1588_packet_fwd(struct fwd_stream *fs) { struct rte_mbuf *mb; struct ether_hdr *eth_hdr; struct ptpv2_msg *ptp_hdr; uint16_t eth_type; /* * Receive 1 packet at a time. */ if (rte_eth_rx_burst(fs->rx_port, fs->rx_queue, &mb, 1) == 0) return; fs->rx_packets += 1; /* * Check that the received packet is a PTP packet that was detected * by the hardware. */ eth_hdr = (struct ether_hdr *)mb->pkt.data; eth_type = rte_be_to_cpu_16(eth_hdr->ether_type); if (! (mb->ol_flags & PKT_RX_IEEE1588_PTP)) { if (eth_type == ETHER_TYPE_1588) { printf("Port %u Received PTP packet not filtered" " by hardware\n", (unsigned) fs->rx_port); } else { printf("Port %u Received non PTP packet type=0x%4x " "len=%u\n", (unsigned) fs->rx_port, eth_type, (unsigned) mb->pkt.pkt_len); } rte_pktmbuf_free(mb); return; } if (eth_type != ETHER_TYPE_1588) { printf("Port %u Received NON PTP packet wrongly" " detected by hardware\n", (unsigned) fs->rx_port); rte_pktmbuf_free(mb); return; } /* * Check that the received PTP packet is a PTP V2 packet of type * PTP_SYNC_MESSAGE. */ ptp_hdr = (struct ptpv2_msg *) ((char *) mb->pkt.data + sizeof(struct ether_hdr)); if (ptp_hdr->version != 0x02) { printf("Port %u Received PTP V2 Ethernet frame with wrong PTP" " protocol version 0x%x (should be 0x02)\n", (unsigned) fs->rx_port, ptp_hdr->version); rte_pktmbuf_free(mb); return; } if (ptp_hdr->msg_id != PTP_SYNC_MESSAGE) { printf("Port %u Received PTP V2 Ethernet frame with unexpected" " messageID 0x%x (expected 0x0 - PTP_SYNC_MESSAGE)\n", (unsigned) fs->rx_port, ptp_hdr->msg_id); rte_pktmbuf_free(mb); return; } printf("Port %u IEEE1588 PTP V2 SYNC Message filtered by hardware\n", (unsigned) fs->rx_port); /* * Check that the received PTP packet has been timestamped by the * hardware. */ if (! (mb->ol_flags & PKT_RX_IEEE1588_TMST)) { printf("Port %u Received PTP packet not timestamped" " by hardware\n", (unsigned) fs->rx_port); rte_pktmbuf_free(mb); return; } /* Check the RX timestamp */ port_ieee1588_rx_timestamp_check(fs->rx_port); /* Forward PTP packet with hardware TX timestamp */ mb->ol_flags |= PKT_TX_IEEE1588_TMST; fs->tx_packets += 1; if (rte_eth_tx_burst(fs->rx_port, fs->tx_queue, &mb, 1) == 0) { printf("Port %u sent PTP packet dropped\n", (unsigned) fs->rx_port); fs->fwd_dropped += 1; rte_pktmbuf_free(mb); return; } /* * Check the TX timestamp. */ port_ieee1588_tx_timestamp_check(fs->rx_port); }
static int paxos_rx_process(struct rte_mbuf *pkt, struct proposer* proposer) { int ret = 0; uint8_t l4_proto = 0; uint16_t outer_header_len; union tunnel_offload_info info = { .data = 0 }; struct udp_hdr *udp_hdr; struct paxos_hdr *paxos_hdr; struct ether_hdr *phdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); parse_ethernet(phdr, &info, &l4_proto); if (l4_proto != IPPROTO_UDP) return -1; udp_hdr = (struct udp_hdr *)((char *)phdr + info.outer_l2_len + info.outer_l3_len); /* if UDP dst port is not either PROPOSER or LEARNER port */ if (!(udp_hdr->dst_port == rte_cpu_to_be_16(PROPOSER_PORT) || udp_hdr->dst_port == rte_cpu_to_be_16(LEARNER_PORT)) && (pkt->packet_type & RTE_PTYPE_TUNNEL_MASK) == 0) return -1; paxos_hdr = (struct paxos_hdr *)((char *)udp_hdr + sizeof(struct udp_hdr)); if (rte_get_log_level() == RTE_LOG_DEBUG) { //rte_hexdump(stdout, "udp", udp_hdr, sizeof(struct udp_hdr)); //rte_hexdump(stdout, "paxos", paxos_hdr, sizeof(struct paxos_hdr)); print_paxos_hdr(paxos_hdr); } int value_len = rte_be_to_cpu_16(paxos_hdr->value_len); struct paxos_value *v = paxos_value_new((char *)paxos_hdr->paxosval, value_len); switch(rte_be_to_cpu_16(paxos_hdr->msgtype)) { case PAXOS_PROMISE: { struct paxos_promise promise = { .iid = rte_be_to_cpu_32(paxos_hdr->inst), .ballot = rte_be_to_cpu_16(paxos_hdr->rnd), .value_ballot = rte_be_to_cpu_16(paxos_hdr->vrnd), .aid = rte_be_to_cpu_16(paxos_hdr->acptid), .value = *v }; proposer_handle_promise(proposer, &promise); break; } case PAXOS_ACCEPT: { if (first_time) { proposer_preexecute(proposer); first_time = false; } struct paxos_accept acpt = { .iid = rte_be_to_cpu_32(paxos_hdr->inst), .ballot = rte_be_to_cpu_16(paxos_hdr->rnd), .value_ballot = rte_be_to_cpu_16(paxos_hdr->vrnd), .aid = rte_be_to_cpu_16(paxos_hdr->acptid), .value = *v }; proposer_handle_accept(proposer, &acpt); break; } case PAXOS_ACCEPTED: { struct paxos_accepted ack = { .iid = rte_be_to_cpu_32(paxos_hdr->inst), .ballot = rte_be_to_cpu_16(paxos_hdr->rnd), .value_ballot = rte_be_to_cpu_16(paxos_hdr->vrnd), .aid = rte_be_to_cpu_16(paxos_hdr->acptid), .value = *v }; proposer_handle_accepted(proposer, &ack); break; } default: break; } outer_header_len = info.outer_l2_len + info.outer_l3_len + sizeof(struct udp_hdr) + sizeof(struct paxos_hdr); rte_pktmbuf_adj(pkt, outer_header_len); return ret; } static uint16_t add_timestamps(uint8_t port __rte_unused, uint16_t qidx __rte_unused, struct rte_mbuf **pkts, uint16_t nb_pkts, uint16_t max_pkts __rte_unused, void *user_param) { struct proposer* proposer = (struct proposer *)user_param; unsigned i; uint64_t now = rte_rdtsc(); for (i = 0; i < nb_pkts; i++) { pkts[i]->udata64 = now; paxos_rx_process(pkts[i], proposer); } return nb_pkts; } static inline int port_init(uint8_t port, struct rte_mempool *mbuf_pool, struct proposer* proposer) { struct rte_eth_dev_info dev_info; struct rte_eth_txconf *txconf; struct rte_eth_rxconf *rxconf; struct rte_eth_conf port_conf = port_conf_default; const uint16_t rx_rings = 1, tx_rings = 1; int retval; uint16_t q; rte_eth_dev_info_get(port, &dev_info); rxconf = &dev_info.default_rxconf; txconf = &dev_info.default_txconf; txconf->txq_flags &= PKT_TX_IPV4; txconf->txq_flags &= PKT_TX_UDP_CKSUM; if (port >= rte_eth_dev_count()) return -1; retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); if (retval != 0) return retval; for (q = 0; q < rx_rings; q++) { retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE, rte_eth_dev_socket_id(port), rxconf, mbuf_pool); if (retval < 0) return retval; } for (q = 0; q < tx_rings; q++) { retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE, rte_eth_dev_socket_id(port), txconf); if (retval < 0) return retval; } retval = rte_eth_dev_start(port); if (retval < 0) return retval; struct ether_addr addr; rte_eth_macaddr_get(port, &addr); rte_eth_promiscuous_enable(port); rte_eth_add_rx_callback(port, 0, add_timestamps, proposer); rte_eth_add_tx_callback(port, 0, calc_latency, NULL); return 0; } static void lcore_main(uint8_t port, __rte_unused struct proposer *p) { proposer_preexecute(p); for (;;) { // Check if signal is received if (force_quit) break; struct rte_mbuf *bufs[BURST_SIZE]; const uint16_t nb_rx = rte_eth_rx_burst(port, 0, bufs, BURST_SIZE); if (unlikely(nb_rx == 0)) continue; uint16_t buf; for (buf = 0; buf < nb_rx; buf++) rte_pktmbuf_free(bufs[buf]); } } static __attribute__((noreturn)) int lcore_mainloop(__attribute__((unused)) void *arg) { uint64_t prev_tsc = 0, cur_tsc, diff_tsc; unsigned lcore_id; lcore_id = rte_lcore_id(); rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_TIMER, "Starting mainloop on core %u\n", lcore_id); while(1) { cur_tsc = rte_rdtsc(); diff_tsc = cur_tsc - prev_tsc; if (diff_tsc > TIMER_RESOLUTION_CYCLES) { rte_timer_manage(); prev_tsc = cur_tsc; } } } static void report_stat(struct rte_timer *tim, __attribute((unused)) void *arg) { /* print stat */ uint32_t count = rte_atomic32_read(&stat); rte_log(RTE_LOG_INFO, RTE_LOGTYPE_USER8, "Throughput = %8u msg/s\n", count); /* reset stat */ rte_atomic32_set(&stat, 0); /* this timer is automatically reloaded until we decide to stop it */ if (force_quit) rte_timer_stop(tim); } static void check_timeout(struct rte_timer *tim, void *arg) { struct proposer* p = (struct proposer *) arg; unsigned lcore_id = rte_lcore_id(); rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER8, "%s() on lcore_id %i\n", __func__, lcore_id); struct paxos_message out; out.type = PAXOS_PREPARE; struct timeout_iterator* iter = proposer_timeout_iterator(p); while(timeout_iterator_prepare(iter, &out.u.prepare)) { rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER8, "%s Send PREPARE inst %d ballot %d\n", __func__, out.u.prepare.iid, out.u.prepare.ballot); send_paxos_message(&out); } out.type = PAXOS_ACCEPT; while(timeout_iterator_accept(iter, &out.u.accept)) { rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER8, "%s: Send ACCEPT inst %d ballot %d\n", __func__, out.u.prepare.iid, out.u.prepare.ballot); send_paxos_message(&out); } timeout_iterator_free(iter); /* this timer is automatically reloaded until we decide to stop it */ if (force_quit) rte_timer_stop(tim); } int main(int argc, char *argv[]) { uint8_t portid = 0; unsigned master_core, lcore_id; signal(SIGTERM, signal_handler); signal(SIGINT, signal_handler); force_quit = false; int proposer_id = 0; if (rte_get_log_level() == RTE_LOG_DEBUG) { paxos_config.verbosity = PAXOS_LOG_DEBUG; } struct proposer *proposer = proposer_new(proposer_id, NUM_ACCEPTORS); first_time = true; /* init EAL */ int ret = rte_eal_init(argc, argv); if (ret < 0) rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); /* init timer structure */ rte_timer_init(&timer); rte_timer_init(&stat_timer); /* load deliver_timer, every 1 s, on a slave lcore, reloaded automatically */ uint64_t hz = rte_get_timer_hz(); /* Call rte_timer_manage every 10ms */ TIMER_RESOLUTION_CYCLES = hz / 100; rte_log(RTE_LOG_INFO, RTE_LOGTYPE_USER1, "Clock: %"PRIu64"\n", hz); /* master core */ master_core = rte_lcore_id(); /* slave core */ lcore_id = rte_get_next_lcore(master_core, 0, 1); rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER1, "lcore_id: %d\n", lcore_id); rte_timer_reset(&timer, hz, PERIODICAL, lcore_id, check_timeout, proposer); /* reset timer */ rte_eal_remote_launch(lcore_mainloop, NULL, lcore_id); /* stat core */ lcore_id = rte_get_next_lcore(lcore_id , 0, 1); rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER1, "lcore_id: %d\n", lcore_id); rte_timer_reset(&stat_timer, hz, PERIODICAL, lcore_id, report_stat, NULL); /* init RTE timer library */ rte_timer_subsystem_init(); mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS, MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); if (mbuf_pool == NULL) rte_exit(EXIT_FAILURE, "Cannot create mbuf_pool\n"); /* reset timer */ rte_eal_remote_launch(lcore_mainloop, NULL, lcore_id); if (port_init(portid, mbuf_pool, proposer) != 0) rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n", portid); lcore_main(portid, proposer); rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER8, "Free proposer\n"); proposer_free(proposer); return 0; }
static int vhost_bdev_scsi_process_block(struct vhost_block_dev *bdev, struct vhost_scsi_task *task) { uint64_t lba, *temp64; uint32_t xfer_len, *temp32; uint16_t *temp16; uint8_t *cdb = (uint8_t *)task->req->cdb; switch (cdb[0]) { case SBC_READ_6: case SBC_WRITE_6: lba = (uint64_t)cdb[1] << 16; lba |= (uint64_t)cdb[2] << 8; lba |= (uint64_t)cdb[3]; xfer_len = cdb[4]; if (xfer_len == 0) xfer_len = 256; return vhost_bdev_scsi_readwrite(bdev, task, lba, xfer_len); case SBC_READ_10: case SBC_WRITE_10: temp32 = (uint32_t *)&cdb[2]; lba = rte_be_to_cpu_32(*temp32); temp16 = (uint16_t *)&cdb[7]; xfer_len = rte_be_to_cpu_16(*temp16); return vhost_bdev_scsi_readwrite(bdev, task, lba, xfer_len); case SBC_READ_12: case SBC_WRITE_12: temp32 = (uint32_t *)&cdb[2]; lba = rte_be_to_cpu_32(*temp32); temp32 = (uint32_t *)&cdb[6]; xfer_len = rte_be_to_cpu_32(*temp32); return vhost_bdev_scsi_readwrite(bdev, task, lba, xfer_len); case SBC_READ_16: case SBC_WRITE_16: temp64 = (uint64_t *)&cdb[2]; lba = rte_be_to_cpu_64(*temp64); temp32 = (uint32_t *)&cdb[10]; xfer_len = rte_be_to_cpu_32(*temp32); return vhost_bdev_scsi_readwrite(bdev, task, lba, xfer_len); case SBC_READ_CAPACITY_10: { uint8_t buffer[8]; if (bdev->blockcnt - 1 > 0xffffffffULL) memset(buffer, 0xff, 4); else { temp32 = (uint32_t *)buffer; *temp32 = rte_cpu_to_be_32(bdev->blockcnt - 1); } temp32 = (uint32_t *)&buffer[4]; *temp32 = rte_cpu_to_be_32(bdev->blocklen); memcpy(task->iovs[0].iov_base, buffer, sizeof(buffer)); task->resp->status = SCSI_STATUS_GOOD; return sizeof(buffer); } case SBC_SYNCHRONIZE_CACHE_10: case SBC_SYNCHRONIZE_CACHE_16: task->resp->status = SCSI_STATUS_GOOD; return 0; } scsi_task_set_status(task, SCSI_STATUS_CHECK_CONDITION, SCSI_SENSE_ILLEGAL_REQUEST, SCSI_ASC_INVALID_FIELD_IN_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); return 0; }