/* * handle unicast arp requests and neighbor refreshes. In many cases, * we wouldn't like the unicast arp requests from gateway (such as MX) * to reach the VMs and change the gateway mac to ip(6) binding, since * for vms the gateway is always agent. We would like such requests * to go only if the mode is l2 */ int vif_plug_mac_request(struct vr_interface *vif, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { int handled = 1; int nheader; struct vr_arp *sarp; if (pkt->vp_flags & VP_FLAG_MULTICAST) goto unhandled; nheader = pkt_network_header(pkt) - pkt_data(pkt); if (nheader < 0 || (pkt->vp_data + nheader > pkt->vp_end)) goto unhandled; if (pkt->vp_type == VP_TYPE_ARP) { if (pkt->vp_len < (nheader + sizeof(*sarp))) goto unhandled; sarp = (struct vr_arp *)(pkt_data(pkt) + nheader); if (ntohs(sarp->arp_op) != VR_ARP_OP_REQUEST) goto unhandled; pkt_pull(pkt, nheader); handled = vr_arp_input(pkt, fmd); if (!handled) { pkt_push(pkt, nheader); } return handled; } else if (pkt->vp_type == VP_TYPE_IP6) { if (pkt->vp_len < (nheader + sizeof(struct vr_ip6) + sizeof(struct vr_icmp) + VR_IP6_ADDRESS_LEN + sizeof(struct vr_neighbor_option) + VR_ETHER_ALEN)) goto unhandled; pkt_pull(pkt, nheader); handled = vr_neighbor_input(pkt, fmd); if (!handled) { pkt_push(pkt, nheader); } return handled; } unhandled: return !handled; }
unsigned int vr_mcast_forward(struct vrouter *router, unsigned short vrf, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { struct vr_route_req rt; struct vr_nexthop *nh; struct vr_ip *ip; pkt->vp_type = VP_TYPE_IP; ip = (struct vr_ip *)pkt_data(pkt); rt.rtr_req.rtr_vrf_id = vrf; rt.rtr_req.rtr_prefix_len = 32; if (IS_MCAST_LINK_LOCAL(ip->ip_daddr) || IS_BCAST_IP(ip->ip_daddr)) { rt.rtr_req.rtr_src = 0; rt.rtr_req.rtr_prefix = 0xFFFFFFFF; } else { rt.rtr_req.rtr_src = ip->ip_saddr; rt.rtr_req.rtr_prefix = ip->ip_daddr; } nh = mcast_lookup(vrf, &rt, pkt); if (!nh) { nh = ip4_default_nh; } return nh_output(vrf, pkt, nh, fmd); }
int vr_arp_input(struct vr_packet *pkt, struct vr_forwarding_md *fmd) { int handled = 1; struct vr_arp sarp; /* If vlan tagged packet, we let the VM handle the ARP packets */ if ((pkt->vp_type != VP_TYPE_ARP) || (fmd->fmd_vlan != VLAN_ID_INVALID)) return !handled; if (pkt->vp_len < sizeof(struct vr_arp)) { vr_pfree(pkt, VP_DROP_INVALID_ARP); return handled; } memcpy(&sarp, pkt_data(pkt), sizeof(struct vr_arp)); switch (ntohs(sarp.arp_op)) { case VR_ARP_OP_REQUEST: return vr_handle_arp_request(&sarp, pkt, fmd); case VR_ARP_OP_REPLY: vr_handle_arp_reply(&sarp, pkt, fmd); break; default: vr_pfree(pkt, VP_DROP_INVALID_ARP); } return handled; }
struct vr_packet * pkt_copy(struct vr_packet *pkt, unsigned short off, unsigned short len) { struct vr_packet *pkt_c; unsigned short head_space; /* * one eth header for agent, and one more for packets from * tun interfaces */ head_space = (2 * sizeof(struct vr_eth)) + sizeof(struct agent_hdr); pkt_c = vr_palloc(head_space + len); if (!pkt_c) return pkt_c; pkt_c->vp_data += head_space; pkt_c->vp_tail += head_space; if (vr_pcopy(pkt_data(pkt_c), pkt, off, len) < 0) { vr_pfree(pkt_c, VP_DROP_MISC); return NULL; } pkt_pull_tail(pkt_c, len); pkt_c->vp_if = pkt->vp_if; pkt_c->vp_flags = pkt->vp_flags; pkt_c->vp_cpu = pkt->vp_cpu; pkt_c->vp_network_h = 0; return pkt_c; }
static void vr_arp_proxy(struct vr_arp *sarp, struct vr_packet *pkt, struct vr_forwarding_md *fmd, unsigned char *dmac) { struct vr_eth *eth; struct vr_arp *arp; struct vr_forwarding_md fmd_new; struct vr_interface *vif = pkt->vp_if; eth = (struct vr_eth *)pkt_push(pkt, sizeof(*eth)); if (!eth) { vr_pfree(pkt, VP_DROP_PUSH); return; } memcpy(eth->eth_dmac, sarp->arp_sha, VR_ETHER_ALEN); memcpy(eth->eth_smac, dmac, VR_ETHER_ALEN); eth->eth_proto = htons(VR_ETH_PROTO_ARP); arp = (struct vr_arp *)(pkt_data(pkt) + sizeof(*eth)); arp->arp_hw = htons(VR_ARP_HW_TYPE_ETHER); arp->arp_proto = htons(VR_ETH_PROTO_IP); arp->arp_hwlen = VR_ETHER_ALEN; arp->arp_protolen = VR_IP_ADDRESS_LEN; arp->arp_op = htons(VR_ARP_OP_REPLY); memcpy(arp->arp_sha, dmac, VR_ETHER_ALEN); memcpy(arp->arp_dha, sarp->arp_sha, VR_ETHER_ALEN); memcpy(&arp->arp_dpa, &sarp->arp_spa, sizeof(sarp->arp_spa)); memcpy(&arp->arp_spa, &sarp->arp_dpa, sizeof(sarp->arp_dpa)); vr_init_forwarding_md(&fmd_new); fmd_new.fmd_dvrf = fmd->fmd_dvrf; vr_pkt_type(pkt, 0, &fmd_new); /* * XXX: for vcp ports, there won't be bridge table entries. to avoid * doing vr_bridge_input, we check for the flag NO_ARP_PROXY and * and if set, directly send out on that interface */ if (vif_is_vhost(vif) || (vif->vif_flags & VIF_FLAG_NO_ARP_PROXY)) { vif->vif_tx(vif, pkt, fmd); } else { vr_bridge_input(vif->vif_router, pkt, &fmd_new); } return; }
int vr_untag_pkt(struct vr_packet *pkt) { struct vr_eth *eth; unsigned char *new_eth; eth = (struct vr_eth *)pkt_data(pkt); if (eth->eth_proto != htons(VR_ETH_PROTO_VLAN)) return 0; new_eth = pkt_pull(pkt, VR_VLAN_HLEN); if (!new_eth) return -1; memmove(new_eth, eth, (2 * VR_ETHER_ALEN)); return 0; }
unsigned int vr_bridge_input(struct vrouter *router, unsigned short vrf, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { struct vr_route_req rt; struct vr_nexthop *nh; struct vr_forwarding_md cmd; char bcast_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; char *mac; /* First mark the packet as L2 */ pkt->vp_type = VP_TYPE_L2; mac = (char *)pkt_data(pkt); rt.rtr_req.rtr_mac_size = VR_ETHER_ALEN; rt.rtr_req.rtr_mac =(int8_t *) mac; /* If multicast L2 packet, use broadcast composite nexthop */ if (IS_MAC_BMCAST(mac)) { rt.rtr_req.rtr_mac = (int8_t *)bcast_mac; pkt->vp_flags |= VP_FLAG_MULTICAST; } rt.rtr_req.rtr_vrf_id = vrf; nh = vr_bridge_lookup(vrf, &rt, pkt); if (nh) { /* * If there is a label attached to this bridge entry add the * label */ if (rt.rtr_req.rtr_label_flags & VR_RT_LABEL_VALID_FLAG) { if (!fmd) { vr_init_forwarding_md(&cmd); fmd = &cmd; } fmd->fmd_label = rt.rtr_req.rtr_label; } return nh_output(vrf, pkt, nh, fmd); } vr_pfree(pkt, VP_DROP_INVALID_NH); return 0; }
/* * Function to add vlan tag to ethernet header. As it modifies vr_packet * structure and not skb, one is expected to invoke vr_pset_data() to * modify the data pointer of skb */ int vr_tag_pkt(struct vr_packet *pkt, unsigned short vlan_id) { struct vr_eth *new_eth, *eth; unsigned short *vlan_tag; eth = (struct vr_eth *)pkt_data(pkt); if (eth->eth_proto == htons(VR_ETH_PROTO_VLAN)) return 0; new_eth = (struct vr_eth *)pkt_push(pkt, VR_VLAN_HLEN); if (!new_eth) return -1; memmove(new_eth, eth, (2 * VR_ETHER_ALEN)); new_eth->eth_proto = htons(VR_ETH_PROTO_VLAN); vlan_tag = (unsigned short *)(new_eth + 1); *vlan_tag = htons(vlan_id); return 0; }
static int vhost_tx(struct vr_interface *vif, struct vr_packet *pkt) { int ret; struct vr_interface_stats *stats = vif_get_stats(vif, pkt->vp_cpu); stats->vis_obytes += pkt_len(pkt); stats->vis_opackets++; if (vif->vif_type == VIF_TYPE_XEN_LL_HOST) memcpy(pkt_data(pkt), vif->vif_mac, sizeof(vif->vif_mac)); ret = hif_ops->hif_rx(vif, pkt); if (ret < 0) { ret = 0; stats->vis_oerrors++; } return ret; }
unsigned int vr_arp_input(struct vrouter *router, unsigned short vrf, struct vr_packet *pkt) { struct vr_arp sarp; memcpy(&sarp, pkt_data(pkt), sizeof(struct vr_arp)); switch (ntohs(sarp.arp_op)) { case VR_ARP_OP_REQUEST: vr_handle_arp_request(router, vrf, &sarp, pkt); break; case VR_ARP_OP_REPLY: vr_handle_arp_reply(router, vrf, &sarp, pkt); break; default: vr_pfree(pkt, VP_DROP_INVALID_ARP); } return 0; }
/* * This funciton parses the ethernet packet and assigns the * pkt->vp_type, network protocol of the packet. The ethernet header can * start from an offset from vp_data */ int vr_pkt_type(struct vr_packet *pkt, unsigned short offset, struct vr_forwarding_md *fmd) { unsigned char *eth = pkt_data(pkt) + offset; unsigned short eth_proto; int pull_len, pkt_len = pkt_head_len(pkt) - offset; struct vr_vlan_hdr *vlan; pull_len = VR_ETHER_HLEN; if (pkt_len < pull_len) return -1; pkt->vp_flags &= ~(VP_FLAG_MULTICAST); /* L2 broadcast/multicast packets are multicast packets */ if (IS_MAC_BMCAST(eth)) pkt->vp_flags |= VP_FLAG_MULTICAST; eth_proto = ntohs(*(unsigned short *)(eth + VR_ETHER_PROTO_OFF)); while (eth_proto == VR_ETH_PROTO_VLAN) { if (pkt_len < (pull_len + sizeof(*vlan))) return -1; vlan = (struct vr_vlan_hdr *)(eth + pull_len); if (fmd && (fmd->fmd_vlan == VLAN_ID_INVALID)) fmd->fmd_vlan = vlan->vlan_tag & 0xFFF; eth_proto = ntohs(vlan->vlan_proto); pull_len += sizeof(*vlan); } pkt_set_network_header(pkt, pkt->vp_data + offset + pull_len); pkt_set_inner_network_header(pkt, pkt->vp_data + offset + pull_len); pkt->vp_type = vr_eth_proto_to_pkt_type(eth_proto); return 0; }
unsigned int vr_bridge_learn(struct vrouter *router, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { int ret = 0, lock, valid_src; unsigned int trap_reason; bool trap = false; struct vr_eth *eth; struct vr_packet *pkt_c; struct vr_nexthop *nh = NULL; struct vr_bridge_entry *be; eth = (struct vr_eth *)pkt_data(pkt); if (!eth) return 0; if (IS_MAC_BMCAST(eth->eth_smac)) return 0; be = bridge_lookup(eth->eth_smac, fmd); if (be) { nh = be->be_nh; } if (!nh) { be = bridge_lookup((uint8_t *)vr_bcast_mac, fmd); if (be) { nh = be->be_nh; } if (!nh) return 0; lock = bridge_table_lock(pkt->vp_if, eth->eth_smac); if (lock < 0) return 0; be = bridge_add(0, fmd->fmd_dvrf, eth->eth_smac, nh->nh_id); bridge_table_unlock(pkt->vp_if, eth->eth_smac, lock); if (!be) return -ENOMEM; trap_reason = AGENT_TRAP_MAC_LEARN; trap = true; } else { if (!(be->be_flags & VR_BE_MAC_MOVED_FLAG) && (nh->nh_validate_src)) { valid_src = nh->nh_validate_src(pkt, nh, fmd, NULL); if (valid_src != NH_SOURCE_VALID) { ret = vr_bridge_set_route_flags(be, VR_BE_MAC_MOVED_FLAG); if (!ret) { /* trap the packet for mac move */ trap_reason = AGENT_TRAP_MAC_MOVE; trap = true; } ret = 0; } } } __sync_fetch_and_add(&be->be_packets, 1); if (trap) { pkt_c = pkt_cow(pkt, 0); if (!pkt_c) { pkt_c = pkt; ret = -ENOMEM; } vr_trap(pkt_c, fmd->fmd_dvrf, trap_reason, (void *)&be->be_hentry.hentry_index); } return ret; }
/* * vr_input is called from linux(host) ingress path. we are not allowed to * sleep here. return value should indicate whether the router consumed the * packet or not. if the router did not consume, host will continue with * its packet processing with the same packet. if the router did consume, * host will not touch the packet again. a return of 0 will tell the handler * that router consumed it, while all other return values are passed as is. * maybe we need a return value to host return mapping, but at a later time ? */ unsigned int vr_input(unsigned short vrf, struct vr_interface *vif, struct vr_packet *pkt) { unsigned char *data = pkt_data(pkt); unsigned char *eth = data; unsigned char *dmac = ð[VR_ETHER_DMAC_OFF]; unsigned short eth_proto; struct vr_vlan_hdr *vlan; struct vrouter *router = vif->vif_router; struct vr_forwarding_md fmd; int reason; if (vif->vif_flags & VIF_FLAG_MIRROR_RX) { vr_init_forwarding_md(&fmd); fmd.fmd_dvrf = vif->vif_vrf; vr_mirror(vif->vif_router, vif->vif_mirror_id, pkt, &fmd); } /* * we will optimise for the most likely case i.e that of IPv4. need * to see what needs to happen for v6 when it comes */ data = pkt_pull(pkt, VR_ETHER_HLEN); if (!data) { vif_drop_pkt(vif, pkt, 1); return 0; } eth_proto = ntohs(*(unsigned short *)(eth + VR_ETHER_PROTO_OFF)); while (eth_proto == VR_ETH_PROTO_VLAN) { vlan = (struct vr_vlan_hdr *)data; eth_proto = ntohs(vlan->vlan_proto); data = pkt_pull(pkt, sizeof(*vlan)); if (!data) { vif_drop_pkt(vif, pkt, 1); return 0; } } vr_init_forwarding_md(&fmd); pkt_set_network_header(pkt, pkt->vp_data); pkt_set_inner_network_header(pkt, pkt->vp_data); if (eth_proto == VR_ETH_PROTO_IP) { if (vr_from_vm_mss_adj && vr_pkt_from_vm_tcp_mss_adj && (vif->vif_type == VIF_TYPE_VIRTUAL)) { if ((reason = vr_pkt_from_vm_tcp_mss_adj(pkt))) { vr_pfree(pkt, reason); return 0; } } return vr_flow_inet_input(router, vrf, pkt, eth_proto, &fmd); } else if (eth_proto == VR_ETH_PROTO_ARP) return vr_arp_input(router, vrf, pkt); /* rest of the stuff is for slow path and we should be ok doing this */ if (well_known_mac(dmac)) return vr_trap(pkt, vrf, AGENT_TRAP_L2_PROTOCOLS, NULL); return vr_default_input(pkt); }
static int vr_handle_arp_request(struct vrouter *router, unsigned short vrf, struct vr_arp *sarp, struct vr_packet *pkt) { struct vr_packet *cloned_pkt; struct vr_interface *vif = pkt->vp_if; unsigned short proto = htons(VR_ETH_PROTO_ARP); struct vr_eth *eth; struct vr_arp *arp; unsigned int dpa; bool should_proxy = false; /* * still @ l2 level, and hence we can use the mode of the interface * to figure out whether we need to xconnect or not. in the xconnect * mode, just pass it to the peer so that he can handle the arp requests */ if (vif_mode_xconnect(vif)) return vif_xconnect(vif, pkt); should_proxy = vr_should_proxy(vif, sarp->arp_dpa, sarp->arp_spa); /* * if vr should not proxy, all the other arp requests should go out on * the physical interface */ if (vif->vif_type == VIF_TYPE_HOST && !should_proxy) return vif_xconnect(vif, pkt); /* * grat arp from * * VMs - need to be dropped * Fabric - need to be xconnected and also sent to agent * Vhost - xconnected above */ if (vr_grat_arp(sarp)) { if (vif->vif_type == VIF_TYPE_VIRTUAL) { vr_pfree(pkt, VP_DROP_GARP_FROM_VM); return 0; } cloned_pkt = vr_pclone(pkt); if (cloned_pkt) { vr_preset(cloned_pkt); vif_xconnect(vif, cloned_pkt); } return vr_trap(pkt, vrf, AGENT_TRAP_ARP, NULL); } if (should_proxy) { pkt_reset(pkt); eth = (struct vr_eth *)pkt_data(pkt); memcpy(eth->eth_dmac, sarp->arp_sha, VR_ETHER_ALEN); memcpy(eth->eth_smac, vif->vif_mac, VR_ETHER_ALEN); memcpy(ð->eth_proto, &proto, sizeof(proto)); arp = (struct vr_arp *)pkt_pull_tail(pkt, VR_ETHER_HLEN); sarp->arp_op = htons(VR_ARP_OP_REPLY); memcpy(sarp->arp_sha, vif->vif_mac, VR_ETHER_ALEN); memcpy(sarp->arp_dha, eth->eth_dmac, VR_ETHER_ALEN); dpa = sarp->arp_dpa; memcpy(&sarp->arp_dpa, &sarp->arp_spa, sizeof(sarp->arp_dpa)); memcpy(&sarp->arp_spa, &dpa, sizeof(sarp->arp_spa)); memcpy(arp, sarp, sizeof(*sarp)); pkt_pull_tail(pkt, sizeof(*arp)); vif->vif_tx(vif, pkt); } else { /* requests for which vr doesn't have to do anything */ vr_pfree(pkt, VP_DROP_INVALID_ARP); } return 0; }
int main(int argc, char *argv[]) { nextopt_t nopt = nextopt_INIT(argc, argv, ":hVb:"); char opt; const char *basedir = NULL; char pathbuf[256]; tain_t now; size_t n; int fd_conn; progname = nextopt_progname(&nopt); while((opt = nextopt(&nopt))){ char optc[2] = {nopt.opt_got, '\0'}; switch(opt){ case 'h': usage(); die(0); break; case 'V': version(); die(0); break; case 'b': basedir = nopt.opt_arg; break; case ':': fatal_usage("missing argument for option -", optc); break; case '?': if(nopt.opt_got != '?'){ fatal_usage("invalid option -", optc); } /* else fallthrough: */ default : die_usage(); break; } } argc -= nopt.arg_ndx; argv += nopt.arg_ndx; if(!*argv){ fatal_usage("missing argument"); } if(!basedir) basedir = getenv("PERP_BASE"); if(!basedir) basedir = "."; if(chdir(basedir) != 0){ fatal_syserr("fail chdir() to ", basedir); } /* connect to control socket: */ n = cstr_vlen(basedir, "/", PERP_CONTROL, "/", PERPD_SOCKET); if(!(n < sizeof pathbuf)){ errno = ENAMETOOLONG; fatal_syserr("failure locating perpd control socket ", basedir, "/", PERP_CONTROL, "/", PERPD_SOCKET); } cstr_vcopy(pathbuf, basedir, "/", PERP_CONTROL, "/", PERPD_SOCKET); fd_conn = domsock_connect(pathbuf); if(fd_conn == -1){ if(errno == ECONNREFUSED){ fatal_syserr("perpd not running on control socket ", pathbuf); }else{ fatal_syserr("failure connecting to perpd control socket ", pathbuf); } } /* uptimes compared to now: */ tain_now(&now); /* loop through service directory arguments and display report: */ for(; *argv != NULL; ++argv){ pkt_t pkt = pkt_INIT(2, 'Q', 16); struct stat st; if(stat(*argv, &st) == -1){ eputs(*argv, ": error: service directory not found"); continue; } if(! S_ISDIR(st.st_mode)){ eputs(*argv, ": error: not a directory"); continue; } if(!(S_ISVTX & st.st_mode)){ vputs(*argv, ": not activated\n"); continue; } upak_pack(pkt_data(pkt), "LL", (uint64_t)st.st_dev, (uint64_t)st.st_ino); if(pkt_write(fd_conn, pkt, 0) == -1){ eputs_syserr("error: ", *argv, ": error writing query"); continue; } if(pkt_read(fd_conn, pkt, 0) == -1){ eputs_syserr("error: ", *argv, ": error reading response"); continue; } if(pkt[0] != 2){ eputs("error: ", *argv, ": unknown packet protocol in reply"); continue; } if(pkt[1] != 'S'){ if(pkt[1] == 'E'){ errno = (int)upak32_unpack(&pkt[3]); eputs_syserr("error: ", *argv, ": error reported in reply"); } else { eputs("error: ", *argv, ": unknown packet type in reply"); } continue; } report(*argv, pkt_data(pkt), &now); } vputs_flush(); die(0); }
static int vr_mcast_mpls_input(struct vrouter *router, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { unsigned int ttl; unsigned int label; unsigned short drop_reason = 0; int i; int found; struct vr_nexthop *nh; struct vr_nexthop *dir_nh; struct vr_ip *ip; label = ntohl(*(unsigned int *)pkt_data(pkt)); ttl = label & 0xFF; label >>= VR_MPLS_LABEL_SHIFT; if (--ttl == 0) { drop_reason = VP_DROP_TTL_EXCEEDED; goto dropit; } nh = router->vr_ilm[label]; if (!nh || nh->nh_type != NH_COMPOSITE) { drop_reason = VP_DROP_INVALID_NH; goto dropit; } if (!pkt_pull(pkt, VR_MPLS_HDR_LEN)) { drop_reason = VP_DROP_PUSH; goto dropit; } ip = (struct vr_ip *)pkt_network_header(pkt); /* Ensure that the packet is received from one of the tree descendants */ for (i = 0, found = 0; i < nh->nh_component_cnt; i++) { dir_nh = nh->nh_component_nh[i].cnh; if (dir_nh->nh_type == NH_TUNNEL) { if (ip->ip_saddr == dir_nh->nh_gre_tun_dip) { found = 1; break; } } } if (found == 0) { drop_reason = VP_DROP_INVALID_MCAST_SOURCE; goto dropit; } /* Update the ttl to be used for the subsequent nh processing */ pkt->vp_ttl = ttl; /* If from valid descndant, start replicating */ nh_output(pkt->vp_if->vif_vrf, pkt, nh, fmd); return 0; dropit: vr_pfree(pkt, drop_reason); return 0; }
int vr_mpls_input(struct vrouter *router, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { unsigned int label; unsigned short vrf; struct vr_nexthop *nh; unsigned char *data; struct vr_ip *ip; unsigned short drop_reason = 0; label = ntohl(*(unsigned int *)pkt_data(pkt)); label >>= VR_MPLS_LABEL_SHIFT; if (label >= router->vr_max_labels) { drop_reason = VP_DROP_INVALID_LABEL; goto dropit; } /* Set network header to inner ip header only if unicast */ if (vr_mpls_is_label_mcast(label) == true) { vr_mcast_mpls_input(router, pkt, fmd); return 0; } /* drop the TOStack label */ data = pkt_pull(pkt, VR_MPLS_HDR_LEN); if (!data) { drop_reason = VP_DROP_PULL; goto dropit; } /* this is the new network header and inner network header too*/ pkt_set_network_header(pkt, pkt->vp_data); pkt_set_inner_network_header(pkt, pkt->vp_data); pkt->vp_type = VP_TYPE_IP; nh = router->vr_ilm[label]; if (!nh) { drop_reason = VP_DROP_INVALID_NH; goto dropit; } /* * We are typically looking at interface nexthops, and hence we will * hit the vrf of the destination device. But, labels can also point * to composite nexthops (ECMP being case in point), in which case we * will take the vrf from the nexthop. When everything else fails, we * will forward the packet in the vrf in which it came i.e fabric */ if (nh->nh_vrf >= 0) vrf = nh->nh_vrf; else if (nh->nh_dev) vrf = nh->nh_dev->vif_vrf; else vrf = pkt->vp_if->vif_vrf; ip = (struct vr_ip *)pkt_data(pkt); if (ip->ip_csum == VR_DIAG_IP_CSUM) { pkt->vp_flags |= VP_FLAG_DIAG; } else if (vr_perfr) { pkt->vp_flags |= VP_FLAG_GRO; } nh_output(vrf, pkt, nh, fmd); return 0; dropit: vr_pfree(pkt, drop_reason); return 0; }
/* do_control() ** send cmd to list of services given in argv */ void do_control(uchar_t cmd[], char *argv[]){ char pathbuf[256]; size_t n; int fd_conn; int e; /* connect to control socket: */ n = cstr_vlen(basedir, "/", PERP_CONTROL, "/", PERPD_SOCKET); if(!(n < sizeof pathbuf)){ errno = ENAMETOOLONG; fatal_syserr("failure locating perpd control socket ", basedir, "/", PERP_CONTROL, "/", PERPD_SOCKET); } cstr_vcopy(pathbuf, basedir, "/", PERP_CONTROL, "/", PERPD_SOCKET); fd_conn = domsock_connect(pathbuf); if(fd_conn == -1){ if(errno == ECONNREFUSED){ fatal_syserr("perpd not running on control socket ", pathbuf); } else { fatal_syserr("failure connecting to perpd control socket ", pathbuf); } } /* loop through service directory arguments and send control packet: */ for(; *argv != NULL; ++argv){ pkt_t pkt = pkt_INIT(2, 'C', 18); struct stat st; if(stat(*argv, &st) == -1){ ++errs; eputs("error: ", *argv, ": service directory not found"); continue; } if(! S_ISDIR(st.st_mode)){ ++errs; eputs("error: ", *argv, ": not a directory"); continue; } if(!(S_ISVTX & st.st_mode)){ ++errs; eputs("error: ", *argv, ": service directory not activated"); continue; } /* control packet for this directory: */ upak_pack(pkt_data(pkt), "LLbb", (uint64_t)st.st_dev, (uint64_t)st.st_ino, cmd[0], cmd[1]); if(pkt_write(fd_conn, pkt, 0) == -1){ ++errs; eputs_syserr("error: ", *argv, ": error writing request"); continue; } if(pkt_read(fd_conn, pkt, 0) == -1){ ++errs; eputs_syserr("error: ", *argv, ": error reading response"); continue; } if(pkt[0] != 2){ ++errs; eputs("error: ", *argv, ": unknown packet protocol in reply"); continue; } if(pkt[1] != 'E'){ ++errs; eputs("error: ", *argv, ": unknown packet type in reply"); continue; } e = (int)upak32_unpack(&pkt[3]); if(e != 0){ ++errs; errno = e; eputs_syserr("error: ", *argv, ": error reported in reply"); continue; } /* success: */ report(*argv, ": ok"); } return; }
unsigned int vr_bridge_input(struct vrouter *router, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { int reason, handled; l4_pkt_type_t l4_type = L4_TYPE_UNKNOWN; unsigned short pull_len, overlay_len = VROUTER_OVERLAY_LEN; int8_t *dmac; struct vr_bridge_entry *be; struct vr_nexthop *nh = NULL; struct vr_vrf_stats *stats; dmac = (int8_t *) pkt_data(pkt); if (pkt->vp_if->vif_flags & VIF_FLAG_MAC_LEARN) { if (vr_bridge_learn(router, pkt, fmd)) { return 0; } } pull_len = 0; if ((pkt->vp_type == VP_TYPE_IP) || (pkt->vp_type == VP_TYPE_IP6) || (pkt->vp_type == VP_TYPE_ARP)) { pull_len = pkt_get_network_header_off(pkt) - pkt_head_space(pkt); if (pull_len && !pkt_pull(pkt, pull_len)) { vr_pfree(pkt, VP_DROP_PULL); return 0; } } if ((pkt->vp_type == VP_TYPE_IP) || (pkt->vp_type == VP_TYPE_IP6)) { if (fmd->fmd_dscp < 0) { if (pkt->vp_type == VP_TYPE_IP) { fmd->fmd_dscp = vr_inet_get_tos((struct vr_ip *)pkt_network_header(pkt)); } else if (pkt->vp_type == VP_TYPE_IP6) { fmd->fmd_dscp = vr_inet6_get_tos((struct vr_ip6 *)pkt_network_header(pkt)); } } } else { if (fmd->fmd_dotonep < 0) { fmd->fmd_dotonep = vr_vlan_get_tos(pkt_data(pkt)); } } /* Do the bridge lookup for the packets not meant for "me" */ if (!fmd->fmd_to_me) { /* * If DHCP packet coming from VM, Trap it to Agent before doing the bridge * lookup itself */ if (vif_is_virtual(pkt->vp_if)) { if (pkt->vp_type == VP_TYPE_IP) l4_type = vr_ip_well_known_packet(pkt); else if (pkt->vp_type == VP_TYPE_IP6) l4_type = vr_ip6_well_known_packet(pkt); if (l4_type == L4_TYPE_DHCP_REQUEST) { if (pkt->vp_if->vif_flags & VIF_FLAG_DHCP_ENABLED) { vr_trap(pkt, fmd->fmd_dvrf, AGENT_TRAP_L3_PROTOCOLS, NULL); return 0; } } /* * Handle the unicast ARP, coming from VM, not * destined to us. Broadcast ARP requests would be handled * in L2 multicast nexthop. Multicast ARP on fabric * interface also would be handled in L2 multicast nexthop. * Unicast ARP packets on fabric interface would be handled * in plug routines of interface. */ if (!IS_MAC_BMCAST(dmac)) { handled = 0; if (pkt->vp_type == VP_TYPE_ARP) { handled = vr_arp_input(pkt, fmd, dmac); } else if (l4_type == L4_TYPE_NEIGHBOUR_SOLICITATION) { handled = vr_neighbor_input(pkt, fmd, dmac); } if (handled) return 0; } } be = bridge_lookup(dmac, fmd); if (be) nh = be->be_nh; if (!nh || nh->nh_type == NH_DISCARD) { /* If Flooding of unknown unicast not allowed, drop the packet */ if (!vr_unknown_uc_flood(pkt->vp_if, pkt->vp_nh) || IS_MAC_BMCAST(dmac)) { vr_pfree(pkt, VP_DROP_L2_NO_ROUTE); return 0; } be = bridge_lookup(vr_bcast_mac, fmd); nh = be->be_nh; if (!nh) { vr_pfree(pkt, VP_DROP_L2_NO_ROUTE); return 0; } stats = vr_inet_vrf_stats(fmd->fmd_dvrf, pkt->vp_cpu); if (stats) stats->vrf_uuc_floods++; /* Treat this unknown unicast packet as multicast */ pkt->vp_flags |= VP_FLAG_MULTICAST; } if (be) __sync_fetch_and_add(&be->be_packets, 1); if (nh->nh_type != NH_L2_RCV) overlay_len = VROUTER_L2_OVERLAY_LEN; } /* Adjust MSS for V4 and V6 packets */ if ((pkt->vp_type == VP_TYPE_IP) || (pkt->vp_type == VP_TYPE_IP6)) { if (vif_is_virtual(pkt->vp_if) && vr_from_vm_mss_adj && vr_pkt_from_vm_tcp_mss_adj) { if ((reason = vr_pkt_from_vm_tcp_mss_adj(pkt, overlay_len))) { vr_pfree(pkt, reason); return 0; } } if (fmd->fmd_to_me) { handled = vr_l3_input(pkt, fmd); if (!handled) { vr_pfree(pkt, VP_DROP_NOWHERE_TO_GO); } return 0; } } if (pull_len && !pkt_push(pkt, pull_len)) { vr_pfree(pkt, VP_DROP_PUSH); return 0; } nh_output(pkt, nh, fmd); return 0; }
unsigned int vr_bridge_input(struct vrouter *router, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { struct vr_route_req rt; struct vr_forwarding_md cmd; struct vr_nexthop *nh; unsigned short pull_len, overlay_len = VROUTER_L2_OVERLAY_LEN; int reason; rt.rtr_req.rtr_label_flags = 0; rt.rtr_req.rtr_index = VR_BE_INVALID_INDEX; rt.rtr_req.rtr_mac_size = VR_ETHER_ALEN; rt.rtr_req.rtr_mac =(int8_t *) pkt_data(pkt); /* If multicast L2 packet, use broadcast composite nexthop */ if (IS_MAC_BMCAST(rt.rtr_req.rtr_mac)) rt.rtr_req.rtr_mac = (int8_t *)vr_bcast_mac; rt.rtr_req.rtr_vrf_id = fmd->fmd_dvrf; nh = vr_bridge_lookup(fmd->fmd_dvrf, &rt); if (!nh) { vr_pfree(pkt, VP_DROP_L2_NO_ROUTE); return 0; } if (nh->nh_type == NH_L2_RCV) overlay_len = VROUTER_OVERLAY_LEN; if (pkt->vp_type == VP_TYPE_IP || pkt->vp_type == VP_TYPE_IP6) { if (vif_is_virtual(pkt->vp_if) && vr_from_vm_mss_adj && vr_pkt_from_vm_tcp_mss_adj) { pull_len = pkt_get_network_header_off(pkt) - pkt_head_space(pkt); if (!pkt_pull(pkt, pull_len)) { vr_pfree(pkt, VP_DROP_PULL); return 0; } if ((reason = vr_pkt_from_vm_tcp_mss_adj(pkt, overlay_len))) { vr_pfree(pkt, reason); return 0; } if (!pkt_push(pkt, pull_len)) { vr_pfree(pkt, VP_DROP_PUSH); return 0; } } } /* * If there is a label attached to this bridge entry add the * label */ if (rt.rtr_req.rtr_label_flags & VR_RT_LABEL_VALID_FLAG) { if (!fmd) { vr_init_forwarding_md(&cmd); fmd = &cmd; } fmd->fmd_label = rt.rtr_req.rtr_label; } nh_output(pkt, nh, fmd); return 0; }
static int vr_flow_nat(unsigned short vrf, struct vr_flow_entry *fe, struct vr_packet *pkt, unsigned short proto, struct vr_forwarding_md *fmd) { unsigned int ip_inc, inc = 0; unsigned short *t_sport, *t_dport; struct vrouter *router = pkt->vp_if->vif_router; struct vr_flow_entry *rfe; struct vr_ip *ip, *icmp_pl_ip; struct vr_icmp *icmph; bool hdr_update = false; if (fe->fe_rflow < 0) goto drop; rfe = vr_get_flow_entry(router, fe->fe_rflow); if (!rfe) goto drop; ip = (struct vr_ip *)pkt_data(pkt); if (ip->ip_proto == VR_IP_PROTO_ICMP) { icmph = (struct vr_icmp *)((unsigned char *)ip + (ip->ip_hl * 4)); if (vr_icmp_error(icmph)) { icmp_pl_ip = (struct vr_ip *)(icmph + 1); if (fe->fe_flags & VR_FLOW_FLAG_SNAT) { icmp_pl_ip->ip_daddr = rfe->fe_key.key_dest_ip; hdr_update = true; } if (fe->fe_flags & VR_FLOW_FLAG_DNAT) { icmp_pl_ip->ip_saddr = rfe->fe_key.key_src_ip; hdr_update = true; } if (hdr_update) icmp_pl_ip->ip_csum = vr_ip_csum(icmp_pl_ip); t_sport = (unsigned short *)((unsigned char *)icmp_pl_ip + (icmp_pl_ip->ip_hl * 4)); t_dport = t_sport + 1; if (fe->fe_flags & VR_FLOW_FLAG_SPAT) *t_dport = rfe->fe_key.key_dst_port; if (fe->fe_flags & VR_FLOW_FLAG_DPAT) *t_sport = rfe->fe_key.key_src_port; } } if ((fe->fe_flags & VR_FLOW_FLAG_SNAT) && (ip->ip_saddr == fe->fe_key.key_src_ip)) { vr_incremental_diff(ip->ip_saddr, rfe->fe_key.key_dest_ip, &inc); ip->ip_saddr = rfe->fe_key.key_dest_ip; } if (fe->fe_flags & VR_FLOW_FLAG_DNAT) { vr_incremental_diff(ip->ip_daddr, rfe->fe_key.key_src_ip, &inc); ip->ip_daddr = rfe->fe_key.key_src_ip; } ip_inc = inc; if (vr_ip_transport_header_valid(ip)) { t_sport = (unsigned short *)((unsigned char *)ip + (ip->ip_hl * 4)); t_dport = t_sport + 1; if (fe->fe_flags & VR_FLOW_FLAG_SPAT) { vr_incremental_diff(*t_sport, rfe->fe_key.key_dst_port, &inc); *t_sport = rfe->fe_key.key_dst_port; } if (fe->fe_flags & VR_FLOW_FLAG_DPAT) { vr_incremental_diff(*t_dport, rfe->fe_key.key_src_port, &inc); *t_dport = rfe->fe_key.key_src_port; } } #ifdef VROUTER_CONFIG_DIAG if (ip->ip_csum != VR_DIAG_IP_CSUM) vr_ip_update_csum(pkt, ip_inc, inc); #else vr_ip_update_csum(pkt, ip_inc, inc); #endif /* * If VRF is translated lets chose a new nexthop */ if ((fe->fe_flags & VR_FLOW_FLAG_VRFT) && pkt->vp_nh && pkt->vp_nh->nh_vrf != vrf) pkt->vp_nh = NULL; return vr_flow_forward(vrf, pkt, proto, fmd); drop: vr_pfree(pkt, VP_DROP_FLOW_NAT_NO_RFLOW); return 0; }
int vr_mpls_input(struct vrouter *router, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { int ttl, l2_offset = 0; unsigned int label; unsigned short drop_reason; struct vr_nexthop *nh; struct vr_ip *ip; struct vr_forwarding_md c_fmd; if (!fmd) { vr_init_forwarding_md(&c_fmd); fmd = &c_fmd; } label = ntohl(*(unsigned int *)pkt_data(pkt)); ttl = label & 0xFF; label >>= VR_MPLS_LABEL_SHIFT; if (label >= router->vr_max_labels) { drop_reason = VP_DROP_INVALID_LABEL; goto dropit; } if (--ttl <= 0) { drop_reason = VP_DROP_TTL_EXCEEDED; goto dropit; } ip = (struct vr_ip *)pkt_network_header(pkt); fmd->fmd_outer_src_ip = ip->ip_saddr; vr_forwarding_md_set_label(fmd, label, VR_LABEL_TYPE_MPLS); /* Store the TTL in packet. Will be used for multicast replication */ pkt->vp_ttl = ttl; /* drop the TOStack label */ if (!pkt_pull(pkt, VR_MPLS_HDR_LEN)) { drop_reason = VP_DROP_PULL; goto dropit; } nh = __vrouter_get_label(router, label); if (!nh) { drop_reason = VP_DROP_INVALID_LABEL; goto dropit; } /* * Mark it for GRO. Diag, L2 and multicast nexthops unmark if * required */ if (vr_perfr) pkt->vp_flags |= VP_FLAG_GRO; /* Reset the flags which get defined below */ pkt->vp_flags &= ~VP_FLAG_MULTICAST; fmd->fmd_vlan = VLAN_ID_INVALID; if (nh->nh_family == AF_INET) { ip = (struct vr_ip *)pkt_data(pkt); if (vr_ip_is_ip4(ip)) { pkt->vp_type = VP_TYPE_IP; } else if (vr_ip_is_ip6(ip)) { pkt->vp_type = VP_TYPE_IP6; } else { drop_reason = VP_DROP_INVALID_PROTOCOL; goto dropit; } pkt_set_network_header(pkt, pkt->vp_data); pkt_set_inner_network_header(pkt, pkt->vp_data); } else if (nh->nh_family == AF_BRIDGE) { if (nh->nh_type == NH_COMPOSITE) { if (label >= VR_MAX_UCAST_LABELS) l2_offset = VR_L2_MCAST_CTRL_DATA_LEN + VR_VXLAN_HDR_LEN; } if (vr_pkt_type(pkt, l2_offset, fmd) < 0) { drop_reason = VP_DROP_INVALID_PACKET; goto dropit; } } else { drop_reason = VP_DROP_INVALID_NH; goto dropit; } /* * We are typically looking at interface nexthops, and hence we will * hit the vrf of the destination device. But, labels can also point * to composite nexthops (ECMP being case in point), in which case we * will take the vrf from the nexthop. When everything else fails, we * will forward the packet in the vrf in which it came i.e fabric */ if (nh->nh_vrf >= 0) fmd->fmd_dvrf = nh->nh_vrf; else if (nh->nh_dev) fmd->fmd_dvrf = nh->nh_dev->vif_vrf; else fmd->fmd_dvrf = pkt->vp_if->vif_vrf; nh_output(pkt, nh, fmd); return 0; dropit: vr_pfree(pkt, drop_reason); return 0; }