static void mtrie_dumper_make_response(struct vr_message_dumper *dumper, vr_route_req *resp, struct ip_bucket_entry *ent, int8_t *prefix, unsigned int prefix_len) { vr_route_req *req = (vr_route_req *)dumper->dump_req; struct vr_route_req lreq; resp->rtr_vrf_id = req->rtr_vrf_id; resp->rtr_family = req->rtr_family; memcpy(resp->rtr_prefix, prefix, prefix_len / IPBUCKET_LEVEL_BITS); resp->rtr_prefix_size = req->rtr_prefix_size; resp->rtr_marker_size = 0; resp->rtr_marker = NULL; resp->rtr_prefix_len = prefix_len; resp->rtr_rid = req->rtr_rid; resp->rtr_label_flags = ent->entry_label_flags; resp->rtr_label = ent->entry_label; resp->rtr_nh_id = ent->entry_nh_p->nh_id; resp->rtr_index = ent->entry_bridge_index; if (resp->rtr_index != VR_BE_INVALID_INDEX) { resp->rtr_mac = vr_zalloc(VR_ETHER_ALEN, VR_ROUTE_REQ_MAC_OBJECT); resp->rtr_mac_size = VR_ETHER_ALEN; lreq.rtr_req.rtr_mac = resp->rtr_mac; lreq.rtr_req.rtr_index = resp->rtr_index; lreq.rtr_req.rtr_mac_size = VR_ETHER_ALEN; vr_bridge_lookup(resp->rtr_vrf_id, &lreq); } else { resp->rtr_mac_size = 0; resp->rtr_mac = NULL; } resp->rtr_replace_plen = ent->entry_prefix_len; return; }
/* * Exact-match * returns the next-hop on exact match. NULL otherwise */ static int mtrie_get(unsigned int vrf_id, struct vr_route_req *rt) { struct vr_nexthop *nh; struct vr_route_req breq; vr_route_req *req = &rt->rtr_req; nh = mtrie_lookup(vrf_id, rt); if (nh) req->rtr_nh_id = nh->nh_id; else req->rtr_nh_id = -1; if (req->rtr_index != VR_BE_INVALID_INDEX) { req->rtr_mac = vr_zalloc(VR_ETHER_ALEN, VR_ROUTE_REQ_MAC_OBJECT); req->rtr_mac_size = VR_ETHER_ALEN; breq.rtr_req.rtr_mac = req->rtr_mac; breq.rtr_req.rtr_index = req->rtr_index; breq.rtr_req.rtr_mac_size = VR_ETHER_ALEN; vr_bridge_lookup(req->rtr_vrf_id, &breq); } else { req->rtr_mac_size = 0; req->rtr_mac = NULL; } return 0; }
/* * adds a route to the corresponding vrf table. returns 0 on * success and non-zero otherwise */ static int mtrie_add(struct vr_rtable * _unused, struct vr_route_req *rt) { unsigned int vrf_id = rt->rtr_req.rtr_vrf_id; struct ip_mtrie *mtrie = vrfid_to_mtrie(vrf_id, rt->rtr_req.rtr_family); int ret; struct vr_route_req tmp_req; mtrie = (mtrie ? mtrie : mtrie_alloc_vrf(vrf_id, rt->rtr_req.rtr_family)); if (!mtrie) return -ENOMEM; rt->rtr_nh = vrouter_get_nexthop(rt->rtr_req.rtr_rid, rt->rtr_req.rtr_nh_id); if (!rt->rtr_nh) return -ENOENT; if ((!(rt->rtr_req.rtr_label_flags & VR_RT_LABEL_VALID_FLAG)) && (rt->rtr_nh->nh_type == NH_TUNNEL)) { vrouter_put_nexthop(rt->rtr_nh); return -EINVAL; } rt->rtr_req.rtr_index = VR_BE_INVALID_INDEX; if ((rt->rtr_req.rtr_mac_size == VR_ETHER_ALEN) && (!IS_MAC_ZERO(rt->rtr_req.rtr_mac))) { tmp_req.rtr_req.rtr_index = rt->rtr_req.rtr_index; tmp_req.rtr_req.rtr_mac_size = VR_ETHER_ALEN; tmp_req.rtr_req.rtr_mac = rt->rtr_req.rtr_mac; tmp_req.rtr_req.rtr_vrf_id = rt->rtr_req.rtr_vrf_id; if (!vr_bridge_lookup(tmp_req.rtr_req.rtr_vrf_id, &tmp_req)) return -ENOENT; rt->rtr_req.rtr_index = tmp_req.rtr_req.rtr_index; } if (!(rt->rtr_req.rtr_label_flags & VR_RT_LABEL_VALID_FLAG)) { rt->rtr_req.rtr_label = 0xFFFFFF; } else { rt->rtr_req.rtr_label &= 0xFFFFFF; } ret = __mtrie_add(mtrie, rt, 1); vrouter_put_nexthop(rt->rtr_nh); return ret; }
unsigned int vr_bridge_input(struct vrouter *router, unsigned short vrf, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { struct vr_route_req rt; struct vr_nexthop *nh; struct vr_forwarding_md cmd; char bcast_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; char *mac; /* First mark the packet as L2 */ pkt->vp_type = VP_TYPE_L2; mac = (char *)pkt_data(pkt); rt.rtr_req.rtr_mac_size = VR_ETHER_ALEN; rt.rtr_req.rtr_mac =(int8_t *) mac; /* If multicast L2 packet, use broadcast composite nexthop */ if (IS_MAC_BMCAST(mac)) { rt.rtr_req.rtr_mac = (int8_t *)bcast_mac; pkt->vp_flags |= VP_FLAG_MULTICAST; } rt.rtr_req.rtr_vrf_id = vrf; nh = vr_bridge_lookup(vrf, &rt, pkt); if (nh) { /* * If there is a label attached to this bridge entry add the * label */ if (rt.rtr_req.rtr_label_flags & VR_RT_LABEL_VALID_FLAG) { if (!fmd) { vr_init_forwarding_md(&cmd); fmd = &cmd; } fmd->fmd_label = rt.rtr_req.rtr_label; } return nh_output(vrf, pkt, nh, fmd); } vr_pfree(pkt, VP_DROP_INVALID_NH); return 0; }
/* * Delete a route from the table. * prefix is in network byte order. * returns 0 on failure; or non-zero if an entry was found. * * When deleting a route: * - Move all descendent bucket (not covered by more-specifics) with the * parent of this node. * - If any buckets contain the same next-hop result, the bucket can be * deleted. Memory should be freed after a delay in order to deal with * concurrency. */ static int mtrie_delete(struct vr_rtable * _unused, struct vr_route_req *rt) { int vrf_id = rt->rtr_req.rtr_vrf_id; struct ip_mtrie *rtable; struct vr_route_req lreq; rtable = vrfid_to_mtrie(vrf_id, rt->rtr_req.rtr_family); if (!rtable) return -ENOENT; rt->rtr_nh = vrouter_get_nexthop(rt->rtr_req.rtr_rid, rt->rtr_req.rtr_nh_id); if (!rt->rtr_nh) return -ENOENT; rt->rtr_req.rtr_index = VR_BE_INVALID_INDEX; if ((rt->rtr_req.rtr_mac_size == VR_ETHER_ALEN) && (!IS_MAC_ZERO(rt->rtr_req.rtr_mac))) { lreq.rtr_req.rtr_index = rt->rtr_req.rtr_index; lreq.rtr_req.rtr_mac_size = VR_ETHER_ALEN; lreq.rtr_req.rtr_mac = rt->rtr_req.rtr_mac; lreq.rtr_req.rtr_vrf_id = vrf_id; if (!vr_bridge_lookup(vrf_id, &lreq)) return -ENOENT; rt->rtr_req.rtr_index = lreq.rtr_req.rtr_index; } if (!(rt->rtr_req.rtr_label_flags & VR_RT_LABEL_VALID_FLAG)) { rt->rtr_req.rtr_label = 0xFFFFFF; } else { rt->rtr_req.rtr_label &= 0xFFFFFF; } __mtrie_delete(rt, &rtable->root, 0, 0, 1); vrouter_put_nexthop(rt->rtr_nh); return 0; }
mac_response_t vr_get_proxy_mac(struct vr_packet *pkt, struct vr_forwarding_md *fmd, struct vr_route_req *rt, unsigned char *dmac) { bool from_fabric, stitched, flood; bool to_gateway, no_proxy, to_vcp; unsigned char *resp_mac; struct vr_nexthop *nh = NULL; struct vr_interface *vif = pkt->vp_if; struct vr_vrf_stats *stats; from_fabric = stitched = flood = to_gateway = to_vcp = no_proxy = false; stats = vr_inet_vrf_stats(fmd->fmd_dvrf, pkt->vp_cpu); /* here we will not check for stats, but will check before use */ if (vif->vif_type == VIF_TYPE_PHYSICAL) from_fabric = true; if (vif->vif_flags & VIF_FLAG_NO_ARP_PROXY) no_proxy = true; if (rt->rtr_req.rtr_label_flags & VR_RT_ARP_FLOOD_FLAG) flood = true; if (vr_gateway_nexthop(rt->rtr_nh)) to_gateway = true; /* * the no_proxy flag is set for the vcp ports. From such ports * vrouter should proxy only for the gateway ip. */ if (no_proxy && !to_gateway) return MR_DROP; if (from_fabric) { if (vr_nexthop_is_vcp(rt->rtr_nh)) { to_vcp = true; } } resp_mac = vif->vif_mac; if (rt->rtr_req.rtr_index != VR_BE_INVALID_INDEX) { if ((nh = vr_bridge_lookup(fmd->fmd_dvrf, rt))) { resp_mac = rt->rtr_req.rtr_mac; stitched = true; } } /* If ECMP source, we force routing */ if (fmd->fmd_ecmp_src_nh_index != -1) { resp_mac = vif->vif_mac; fmd->fmd_ecmp_src_nh_index = -1; } /* * situations that are handled here (from_fabric) * * . arp request from vm, but not proxied at the source because of lack * of information at the source. only the compute that hosts the * destination should respond, and that too only if the mac information * is present (and hence the ENCAP check). * * . arp request from a baremetal arriving at a TSN, which if posesses the * mac information for the destination vm, should proxy. If it does not * hold the mac information, the request should be flooded * * . arp request from the uplink port of a vcp */ if (from_fabric) { if (flood && !stitched) { if (stats) stats->vrf_arp_physical_flood++; return MR_FLOOD; } /* * arp requests to gateway coming from the fabric should be dropped * unless the request was for the TSN DNS service (which appears as * the gateway, with the current set of checks). We should not respond * for gateway ip if we are TSN and the request came from baremetal. * TSN does not have gateway route and hence the to_gateway will be * true only for the DNS ip. */ if (to_gateway) { if (fmd->fmd_src != TOR_SOURCE) { return MR_DROP; } } /* * we should proxy if the vm is hosted by us, in which case nh will be * of ENCAP type. we should also proxy for a host in vcp port. In all * other cases, we should proxy only if * * i am a TSN(fmd->fmd_src), * i amd the dns IP or * i have the mac information (nh - (mostly tunnel)) and * the originator is a bare metal (fmd->fmd_src) */ if (to_vcp || to_gateway || (nh && ((nh->nh_type == NH_ENCAP) || (fmd->fmd_src == TOR_SOURCE)))) { if (stats) stats->vrf_arp_physical_stitch++; } else { if (stats) stats->vrf_arp_physical_flood++; return MR_FLOOD; } } else { if (!stitched && flood) { /* * if there is no stitching information, but flood flag is set * we should flood */ if (stats) stats->vrf_arp_virtual_flood++; return MR_FLOOD; } if (stats) { if (stitched) { stats->vrf_arp_virtual_stitch++; } else { stats->vrf_arp_virtual_proxy++; } } } VR_MAC_COPY(dmac, resp_mac); return MR_PROXY; }
unsigned int vr_bridge_input(struct vrouter *router, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { struct vr_route_req rt; struct vr_forwarding_md cmd; struct vr_nexthop *nh; unsigned short pull_len, overlay_len = VROUTER_L2_OVERLAY_LEN; int reason; rt.rtr_req.rtr_label_flags = 0; rt.rtr_req.rtr_index = VR_BE_INVALID_INDEX; rt.rtr_req.rtr_mac_size = VR_ETHER_ALEN; rt.rtr_req.rtr_mac =(int8_t *) pkt_data(pkt); /* If multicast L2 packet, use broadcast composite nexthop */ if (IS_MAC_BMCAST(rt.rtr_req.rtr_mac)) rt.rtr_req.rtr_mac = (int8_t *)vr_bcast_mac; rt.rtr_req.rtr_vrf_id = fmd->fmd_dvrf; nh = vr_bridge_lookup(fmd->fmd_dvrf, &rt); if (!nh) { vr_pfree(pkt, VP_DROP_L2_NO_ROUTE); return 0; } if (nh->nh_type == NH_L2_RCV) overlay_len = VROUTER_OVERLAY_LEN; if (pkt->vp_type == VP_TYPE_IP || pkt->vp_type == VP_TYPE_IP6) { if (vif_is_virtual(pkt->vp_if) && vr_from_vm_mss_adj && vr_pkt_from_vm_tcp_mss_adj) { pull_len = pkt_get_network_header_off(pkt) - pkt_head_space(pkt); if (!pkt_pull(pkt, pull_len)) { vr_pfree(pkt, VP_DROP_PULL); return 0; } if ((reason = vr_pkt_from_vm_tcp_mss_adj(pkt, overlay_len))) { vr_pfree(pkt, reason); return 0; } if (!pkt_push(pkt, pull_len)) { vr_pfree(pkt, VP_DROP_PUSH); return 0; } } } /* * If there is a label attached to this bridge entry add the * label */ if (rt.rtr_req.rtr_label_flags & VR_RT_LABEL_VALID_FLAG) { if (!fmd) { vr_init_forwarding_md(&cmd); fmd = &cmd; } fmd->fmd_label = rt.rtr_req.rtr_label; } nh_output(pkt, nh, fmd); return 0; }