/* * arp responses from vhostX need to be cross connected. nothing * needs to be done for arp responses from VMs, while responses * from fabric needs to be Xconnected and sent to agent */ static int vr_handle_arp_reply(struct vr_arp *sarp, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { struct vr_interface *vif = pkt->vp_if; struct vr_packet *cloned_pkt; if (vif_mode_xconnect(vif) || vif->vif_type == VIF_TYPE_HOST) return vif_xconnect(vif, pkt, fmd); if (vif->vif_type != VIF_TYPE_PHYSICAL) { if (vif_is_virtual(vif)) { vr_preset(pkt); return vr_trap(pkt, fmd->fmd_dvrf, AGENT_TRAP_ARP, NULL); } vr_pfree(pkt, VP_DROP_INVALID_IF); return 0; } cloned_pkt = vr_pclone(pkt); if (cloned_pkt) { vr_preset(cloned_pkt); vif_xconnect(vif, cloned_pkt, fmd); } return vr_trap(pkt, fmd->fmd_dvrf, AGENT_TRAP_ARP, NULL); }
unsigned int vr_trap_flow(struct vrouter *router, struct vr_flow_entry *fe, struct vr_packet *pkt, unsigned int index) { unsigned int trap_reason; struct vr_packet *npkt; struct vr_flow_trap_arg ta; npkt = vr_pclone(pkt); if (!npkt) return -ENOMEM; vr_preset(npkt); switch (fe->fe_flags & VR_FLOW_FLAG_TRAP_MASK) { default: trap_reason = AGENT_TRAP_FLOW_MISS; ta.vfta_index = index; if ((fe->fe_type == VP_TYPE_IP) || (fe->fe_type == VP_TYPE_IP6)) ta.vfta_nh_index = fe->fe_key.flow_nh_id; break; } return vr_trap(npkt, fe->fe_vrf, trap_reason, &ta); }
static void vr_flow_init_close(struct vrouter *router, struct vr_flow_entry *flow_e, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { unsigned int flow_index; unsigned int head_room = sizeof(struct agent_hdr) + sizeof(struct vr_eth); struct vr_packet *pkt_c; pkt_c = vr_pclone(pkt); if (!pkt_c) return; vr_preset(pkt_c); if (vr_pcow(pkt_c, head_room)) { vr_pfree(pkt_c, VP_DROP_PCOW_FAIL); return; } flow_index = fmd->fmd_flow_index; vr_trap(pkt_c, fmd->fmd_dvrf, AGENT_TRAP_SESSION_CLOSE, (void *)&flow_index); return; }
static int vr_handle_arp_request(struct vr_arp *sarp, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { bool handled = true; unsigned char dmac[VR_ETHER_ALEN]; mac_response_t arp_result; struct vr_packet *pkt_c; struct vr_interface *vif = pkt->vp_if; arp_result = vif->vif_mac_request(vif, pkt, fmd, dmac); switch (arp_result) { case MR_PROXY: vr_arp_proxy(sarp, pkt, fmd, dmac); break; case MR_XCONNECT: vif_xconnect(pkt->vp_if, pkt, fmd); break; case MR_TRAP_X: pkt_c = vr_pclone(pkt); if (pkt_c) vif_xconnect(pkt->vp_if, pkt_c, fmd); vr_trap(pkt, fmd->fmd_dvrf, AGENT_TRAP_ARP, NULL); break; case MR_TRAP: vr_trap(pkt, fmd->fmd_dvrf, AGENT_TRAP_ARP, NULL); break; case MR_DROP: vr_pfree(pkt, VP_DROP_INVALID_ARP); break; case MR_FLOOD: default: handled = false; break; } return handled; }
/* * arp responses from vhostX need to be cross connected. nothing * needs to be done for arp responses from VMs, while responses * from fabric needs to be Xconnected and sent to agent */ static int vr_handle_arp_reply(struct vrouter *router, unsigned short vrf, struct vr_arp *sarp, struct vr_packet *pkt) { struct vr_interface *vif = pkt->vp_if; struct vr_packet *cloned_pkt; if (vif_mode_xconnect(vif) || vif->vif_type == VIF_TYPE_HOST) return vif_xconnect(vif, pkt); if (vif->vif_type != VIF_TYPE_PHYSICAL) { vr_pfree(pkt, VP_DROP_INVALID_IF); return 0; } cloned_pkt = vr_pclone(pkt); if (cloned_pkt) { vr_preset(cloned_pkt); vif_xconnect(vif, cloned_pkt); } return vr_trap(pkt, vrf, AGENT_TRAP_ARP, NULL); }
static flow_result_t vr_flow_action(struct vrouter *router, struct vr_flow_entry *fe, unsigned int index, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { int valid_src; flow_result_t result; struct vr_forwarding_md mirror_fmd; struct vr_nexthop *src_nh; struct vr_packet *pkt_clone; fmd->fmd_dvrf = fe->fe_vrf; /* * for now, we will not use dvrf if VRFT is set, because the RPF * check needs to happen in the source vrf */ src_nh = __vrouter_get_nexthop(router, fe->fe_src_nh_index); if (!src_nh) { vr_pfree(pkt, VP_DROP_INVALID_NH); return FLOW_CONSUMED; } if (src_nh->nh_validate_src) { valid_src = src_nh->nh_validate_src(pkt, src_nh, fmd, NULL); if (valid_src == NH_SOURCE_INVALID) { vr_pfree(pkt, VP_DROP_INVALID_SOURCE); return FLOW_CONSUMED; } if (valid_src == NH_SOURCE_MISMATCH) { pkt_clone = vr_pclone(pkt); if (pkt_clone) { vr_preset(pkt_clone); if (vr_pcow(pkt_clone, sizeof(struct vr_eth) + sizeof(struct agent_hdr))) { vr_pfree(pkt_clone, VP_DROP_PCOW_FAIL); } else { vr_trap(pkt_clone, fmd->fmd_dvrf, AGENT_TRAP_ECMP_RESOLVE, &fmd->fmd_flow_index); } } } } if (fe->fe_flags & VR_FLOW_FLAG_VRFT) { if (fmd->fmd_dvrf != fe->fe_dvrf) { fmd->fmd_dvrf = fe->fe_dvrf; fmd->fmd_to_me = 1; } } if (fe->fe_flags & VR_FLOW_FLAG_MIRROR) { if (fe->fe_mirror_id < VR_MAX_MIRROR_INDICES) { mirror_fmd = *fmd; mirror_fmd.fmd_ecmp_nh_index = -1; vr_mirror(router, fe->fe_mirror_id, pkt, &mirror_fmd); } if (fe->fe_sec_mirror_id < VR_MAX_MIRROR_INDICES) { mirror_fmd = *fmd; mirror_fmd.fmd_ecmp_nh_index = -1; vr_mirror(router, fe->fe_sec_mirror_id, pkt, &mirror_fmd); } } switch (fe->fe_action) { case VR_FLOW_ACTION_DROP: vr_pfree(pkt, VP_DROP_FLOW_ACTION_DROP); result = FLOW_CONSUMED; break; case VR_FLOW_ACTION_FORWARD: result = FLOW_FORWARD; break; case VR_FLOW_ACTION_NAT: result = vr_flow_nat(fe, pkt, fmd); break; default: vr_pfree(pkt, VP_DROP_FLOW_ACTION_INVALID); result = FLOW_CONSUMED; break; } return result; }
unsigned int vr_flow_inet_input(struct vrouter *router, unsigned short vrf, struct vr_packet *pkt, unsigned short proto, struct vr_forwarding_md *fmd) { struct vr_flow_key key, *key_p = &key; struct vr_ip *ip, *icmp_pl_ip = NULL; struct vr_fragment *frag; unsigned int flow_parse_res; unsigned int trap_res = 0; unsigned int sip, dip; unsigned short *t_hdr, sport, dport; unsigned char ip_proto; struct vr_icmp *icmph; /* * interface is in a mode where it wants all packets to be received * without doing lookups to figure out whether packets were destined * to me or not */ if (pkt->vp_flags & VP_FLAG_TO_ME) return vr_ip_rcv(router, pkt, fmd); ip = (struct vr_ip *)pkt_network_header(pkt); ip_proto = ip->ip_proto; /* if the packet is not a fragment, we easily know the sport, and dport */ if (vr_ip_transport_header_valid(ip)) { t_hdr = (unsigned short *)((char *)ip + (ip->ip_hl * 4)); if (ip_proto == VR_IP_PROTO_ICMP) { icmph = (struct vr_icmp *)t_hdr; if (vr_icmp_error(icmph)) { icmp_pl_ip = (struct vr_ip *)(icmph + 1); ip_proto = icmp_pl_ip->ip_proto; t_hdr = (unsigned short *)((char *)icmp_pl_ip + (icmp_pl_ip->ip_hl * 4)); if (ip_proto == VR_IP_PROTO_ICMP) icmph = (struct vr_icmp *)t_hdr; } } if (ip_proto == VR_IP_PROTO_ICMP) { if (icmph->icmp_type == VR_ICMP_TYPE_ECHO || icmph->icmp_type == VR_ICMP_TYPE_ECHO_REPLY) { sport = icmph->icmp_eid; dport = VR_ICMP_TYPE_ECHO_REPLY; } else { sport = 0; dport = icmph->icmp_type; } } else { if (icmp_pl_ip) { sport = *(t_hdr + 1); dport = *t_hdr; } else { sport = *t_hdr; dport = *(t_hdr + 1); } } } else { /* ...else, we need to get it from somewhere */ flow_parse_res = vr_flow_parse(router, NULL, pkt, &trap_res); /* ...and it really matters only if we need to do a flow lookup */ if (flow_parse_res == VR_FLOW_LOOKUP) { frag = vr_fragment_get(router, vrf, ip); if (!frag) { vr_pfree(pkt, VP_DROP_FRAGMENTS); return 0; } sport = frag->f_sport; dport = frag->f_dport; if (vr_ip_fragment_tail(ip)) vr_fragment_del(frag); } else { /* * since there is no other way of deriving a key, set the * key_p to NULL, indicating to code below that there is * indeed no need for flow lookup */ key_p = NULL; } } if (key_p) { /* we have everything to make a key */ if (icmp_pl_ip) { sip = icmp_pl_ip->ip_daddr; dip = icmp_pl_ip->ip_saddr; } else { sip = ip->ip_saddr; dip = ip->ip_daddr; } vr_get_flow_key(key_p, fmd->fmd_vlan, pkt, sip, dip, ip_proto, sport, dport); flow_parse_res = vr_flow_parse(router, key_p, pkt, &trap_res); if (flow_parse_res == VR_FLOW_LOOKUP && vr_ip_fragment_head(ip)) vr_fragment_add(router, vrf, ip, key_p->key_src_port, key_p->key_dst_port); if (flow_parse_res == VR_FLOW_BYPASS) { return vr_flow_forward(vrf, pkt, proto, fmd); } else if (flow_parse_res == VR_FLOW_TRAP) { return vr_trap(pkt, vrf, trap_res, NULL); } return vr_flow_lookup(router, vrf, key_p, pkt, proto, fmd); } /* * ...come here, when there is not enough information to do a * flow lookup */ return vr_flow_forward(vrf, pkt, proto, fmd); }
static int vr_handle_arp_request(struct vrouter *router, unsigned short vrf, struct vr_arp *sarp, struct vr_packet *pkt) { struct vr_packet *cloned_pkt; struct vr_interface *vif = pkt->vp_if; unsigned short proto = htons(VR_ETH_PROTO_ARP); struct vr_eth *eth; struct vr_arp *arp; unsigned int dpa; bool should_proxy = false; /* * still @ l2 level, and hence we can use the mode of the interface * to figure out whether we need to xconnect or not. in the xconnect * mode, just pass it to the peer so that he can handle the arp requests */ if (vif_mode_xconnect(vif)) return vif_xconnect(vif, pkt); should_proxy = vr_should_proxy(vif, sarp->arp_dpa, sarp->arp_spa); /* * if vr should not proxy, all the other arp requests should go out on * the physical interface */ if (vif->vif_type == VIF_TYPE_HOST && !should_proxy) return vif_xconnect(vif, pkt); /* * grat arp from * * VMs - need to be dropped * Fabric - need to be xconnected and also sent to agent * Vhost - xconnected above */ if (vr_grat_arp(sarp)) { if (vif->vif_type == VIF_TYPE_VIRTUAL) { vr_pfree(pkt, VP_DROP_GARP_FROM_VM); return 0; } cloned_pkt = vr_pclone(pkt); if (cloned_pkt) { vr_preset(cloned_pkt); vif_xconnect(vif, cloned_pkt); } return vr_trap(pkt, vrf, AGENT_TRAP_ARP, NULL); } if (should_proxy) { pkt_reset(pkt); eth = (struct vr_eth *)pkt_data(pkt); memcpy(eth->eth_dmac, sarp->arp_sha, VR_ETHER_ALEN); memcpy(eth->eth_smac, vif->vif_mac, VR_ETHER_ALEN); memcpy(ð->eth_proto, &proto, sizeof(proto)); arp = (struct vr_arp *)pkt_pull_tail(pkt, VR_ETHER_HLEN); sarp->arp_op = htons(VR_ARP_OP_REPLY); memcpy(sarp->arp_sha, vif->vif_mac, VR_ETHER_ALEN); memcpy(sarp->arp_dha, eth->eth_dmac, VR_ETHER_ALEN); dpa = sarp->arp_dpa; memcpy(&sarp->arp_dpa, &sarp->arp_spa, sizeof(sarp->arp_dpa)); memcpy(&sarp->arp_spa, &dpa, sizeof(sarp->arp_spa)); memcpy(arp, sarp, sizeof(*sarp)); pkt_pull_tail(pkt, sizeof(*arp)); vif->vif_tx(vif, pkt); } else { /* requests for which vr doesn't have to do anything */ vr_pfree(pkt, VP_DROP_INVALID_ARP); } return 0; }
/* * vr_input is called from linux(host) ingress path. we are not allowed to * sleep here. return value should indicate whether the router consumed the * packet or not. if the router did not consume, host will continue with * its packet processing with the same packet. if the router did consume, * host will not touch the packet again. a return of 0 will tell the handler * that router consumed it, while all other return values are passed as is. * maybe we need a return value to host return mapping, but at a later time ? */ unsigned int vr_input(unsigned short vrf, struct vr_interface *vif, struct vr_packet *pkt) { unsigned char *data = pkt_data(pkt); unsigned char *eth = data; unsigned char *dmac = ð[VR_ETHER_DMAC_OFF]; unsigned short eth_proto; struct vr_vlan_hdr *vlan; struct vrouter *router = vif->vif_router; struct vr_forwarding_md fmd; int reason; if (vif->vif_flags & VIF_FLAG_MIRROR_RX) { vr_init_forwarding_md(&fmd); fmd.fmd_dvrf = vif->vif_vrf; vr_mirror(vif->vif_router, vif->vif_mirror_id, pkt, &fmd); } /* * we will optimise for the most likely case i.e that of IPv4. need * to see what needs to happen for v6 when it comes */ data = pkt_pull(pkt, VR_ETHER_HLEN); if (!data) { vif_drop_pkt(vif, pkt, 1); return 0; } eth_proto = ntohs(*(unsigned short *)(eth + VR_ETHER_PROTO_OFF)); while (eth_proto == VR_ETH_PROTO_VLAN) { vlan = (struct vr_vlan_hdr *)data; eth_proto = ntohs(vlan->vlan_proto); data = pkt_pull(pkt, sizeof(*vlan)); if (!data) { vif_drop_pkt(vif, pkt, 1); return 0; } } vr_init_forwarding_md(&fmd); pkt_set_network_header(pkt, pkt->vp_data); pkt_set_inner_network_header(pkt, pkt->vp_data); if (eth_proto == VR_ETH_PROTO_IP) { if (vr_from_vm_mss_adj && vr_pkt_from_vm_tcp_mss_adj && (vif->vif_type == VIF_TYPE_VIRTUAL)) { if ((reason = vr_pkt_from_vm_tcp_mss_adj(pkt))) { vr_pfree(pkt, reason); return 0; } } return vr_flow_inet_input(router, vrf, pkt, eth_proto, &fmd); } else if (eth_proto == VR_ETH_PROTO_ARP) return vr_arp_input(router, vrf, pkt); /* rest of the stuff is for slow path and we should be ok doing this */ if (well_known_mac(dmac)) return vr_trap(pkt, vrf, AGENT_TRAP_L2_PROTOCOLS, NULL); return vr_default_input(pkt); }
unsigned int vr_bridge_input(struct vrouter *router, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { int reason, handled; l4_pkt_type_t l4_type = L4_TYPE_UNKNOWN; unsigned short pull_len, overlay_len = VROUTER_OVERLAY_LEN; int8_t *dmac; struct vr_bridge_entry *be; struct vr_nexthop *nh = NULL; struct vr_vrf_stats *stats; dmac = (int8_t *) pkt_data(pkt); if (pkt->vp_if->vif_flags & VIF_FLAG_MAC_LEARN) { if (vr_bridge_learn(router, pkt, fmd)) { return 0; } } pull_len = 0; if ((pkt->vp_type == VP_TYPE_IP) || (pkt->vp_type == VP_TYPE_IP6) || (pkt->vp_type == VP_TYPE_ARP)) { pull_len = pkt_get_network_header_off(pkt) - pkt_head_space(pkt); if (pull_len && !pkt_pull(pkt, pull_len)) { vr_pfree(pkt, VP_DROP_PULL); return 0; } } if ((pkt->vp_type == VP_TYPE_IP) || (pkt->vp_type == VP_TYPE_IP6)) { if (fmd->fmd_dscp < 0) { if (pkt->vp_type == VP_TYPE_IP) { fmd->fmd_dscp = vr_inet_get_tos((struct vr_ip *)pkt_network_header(pkt)); } else if (pkt->vp_type == VP_TYPE_IP6) { fmd->fmd_dscp = vr_inet6_get_tos((struct vr_ip6 *)pkt_network_header(pkt)); } } } else { if (fmd->fmd_dotonep < 0) { fmd->fmd_dotonep = vr_vlan_get_tos(pkt_data(pkt)); } } /* Do the bridge lookup for the packets not meant for "me" */ if (!fmd->fmd_to_me) { /* * If DHCP packet coming from VM, Trap it to Agent before doing the bridge * lookup itself */ if (vif_is_virtual(pkt->vp_if)) { if (pkt->vp_type == VP_TYPE_IP) l4_type = vr_ip_well_known_packet(pkt); else if (pkt->vp_type == VP_TYPE_IP6) l4_type = vr_ip6_well_known_packet(pkt); if (l4_type == L4_TYPE_DHCP_REQUEST) { if (pkt->vp_if->vif_flags & VIF_FLAG_DHCP_ENABLED) { vr_trap(pkt, fmd->fmd_dvrf, AGENT_TRAP_L3_PROTOCOLS, NULL); return 0; } } /* * Handle the unicast ARP, coming from VM, not * destined to us. Broadcast ARP requests would be handled * in L2 multicast nexthop. Multicast ARP on fabric * interface also would be handled in L2 multicast nexthop. * Unicast ARP packets on fabric interface would be handled * in plug routines of interface. */ if (!IS_MAC_BMCAST(dmac)) { handled = 0; if (pkt->vp_type == VP_TYPE_ARP) { handled = vr_arp_input(pkt, fmd, dmac); } else if (l4_type == L4_TYPE_NEIGHBOUR_SOLICITATION) { handled = vr_neighbor_input(pkt, fmd, dmac); } if (handled) return 0; } } be = bridge_lookup(dmac, fmd); if (be) nh = be->be_nh; if (!nh || nh->nh_type == NH_DISCARD) { /* If Flooding of unknown unicast not allowed, drop the packet */ if (!vr_unknown_uc_flood(pkt->vp_if, pkt->vp_nh) || IS_MAC_BMCAST(dmac)) { vr_pfree(pkt, VP_DROP_L2_NO_ROUTE); return 0; } be = bridge_lookup(vr_bcast_mac, fmd); nh = be->be_nh; if (!nh) { vr_pfree(pkt, VP_DROP_L2_NO_ROUTE); return 0; } stats = vr_inet_vrf_stats(fmd->fmd_dvrf, pkt->vp_cpu); if (stats) stats->vrf_uuc_floods++; /* Treat this unknown unicast packet as multicast */ pkt->vp_flags |= VP_FLAG_MULTICAST; } if (be) __sync_fetch_and_add(&be->be_packets, 1); if (nh->nh_type != NH_L2_RCV) overlay_len = VROUTER_L2_OVERLAY_LEN; } /* Adjust MSS for V4 and V6 packets */ if ((pkt->vp_type == VP_TYPE_IP) || (pkt->vp_type == VP_TYPE_IP6)) { if (vif_is_virtual(pkt->vp_if) && vr_from_vm_mss_adj && vr_pkt_from_vm_tcp_mss_adj) { if ((reason = vr_pkt_from_vm_tcp_mss_adj(pkt, overlay_len))) { vr_pfree(pkt, reason); return 0; } } if (fmd->fmd_to_me) { handled = vr_l3_input(pkt, fmd); if (!handled) { vr_pfree(pkt, VP_DROP_NOWHERE_TO_GO); } return 0; } } if (pull_len && !pkt_push(pkt, pull_len)) { vr_pfree(pkt, VP_DROP_PUSH); return 0; } nh_output(pkt, nh, fmd); return 0; }
unsigned int vr_bridge_learn(struct vrouter *router, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { int ret = 0, lock, valid_src; unsigned int trap_reason; bool trap = false; struct vr_eth *eth; struct vr_packet *pkt_c; struct vr_nexthop *nh = NULL; struct vr_bridge_entry *be; eth = (struct vr_eth *)pkt_data(pkt); if (!eth) return 0; if (IS_MAC_BMCAST(eth->eth_smac)) return 0; be = bridge_lookup(eth->eth_smac, fmd); if (be) { nh = be->be_nh; } if (!nh) { be = bridge_lookup((uint8_t *)vr_bcast_mac, fmd); if (be) { nh = be->be_nh; } if (!nh) return 0; lock = bridge_table_lock(pkt->vp_if, eth->eth_smac); if (lock < 0) return 0; be = bridge_add(0, fmd->fmd_dvrf, eth->eth_smac, nh->nh_id); bridge_table_unlock(pkt->vp_if, eth->eth_smac, lock); if (!be) return -ENOMEM; trap_reason = AGENT_TRAP_MAC_LEARN; trap = true; } else { if (!(be->be_flags & VR_BE_MAC_MOVED_FLAG) && (nh->nh_validate_src)) { valid_src = nh->nh_validate_src(pkt, nh, fmd, NULL); if (valid_src != NH_SOURCE_VALID) { ret = vr_bridge_set_route_flags(be, VR_BE_MAC_MOVED_FLAG); if (!ret) { /* trap the packet for mac move */ trap_reason = AGENT_TRAP_MAC_MOVE; trap = true; } ret = 0; } } } __sync_fetch_and_add(&be->be_packets, 1); if (trap) { pkt_c = pkt_cow(pkt, 0); if (!pkt_c) { pkt_c = pkt; ret = -ENOMEM; } vr_trap(pkt_c, fmd->fmd_dvrf, trap_reason, (void *)&be->be_hentry.hentry_index); } return ret; }