int netlink_link_down(vrrp_rt *vrrp) { int status = 1; struct { struct nlmsghdr n; struct ifinfomsg ifi; char buf[256]; } req; memset(&req, 0, sizeof (req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof (struct ifinfomsg)); req.n.nlmsg_flags = NLM_F_REQUEST; req.n.nlmsg_type = RTM_NEWLINK; req.ifi.ifi_family = AF_UNSPEC; req.ifi.ifi_index = IF_INDEX(vrrp->xmit_ifp); req.ifi.ifi_change |= IFF_UP; req.ifi.ifi_flags &= ~IFF_UP; if (netlink_talk(&nl_cmd, &req.n) < 0) status = -1; return status; }
int netlink_link_del_vmac(vrrp_rt *vrrp) { int status = 1; struct { struct nlmsghdr n; struct ifinfomsg ifi; char buf[256]; } req; if (!vrrp->ifp) return -1; memset(&req, 0, sizeof (req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof (struct ifinfomsg)); req.n.nlmsg_flags = NLM_F_REQUEST; req.n.nlmsg_type = RTM_DELLINK; req.ifi.ifi_family = AF_INET; req.ifi.ifi_index = vrrp->vmac_ifindex; if (netlink_talk(&nl_cmd, &req.n) < 0) status = -1; return status; }
/* Link layer handling */ static int netlink_link_setlladdr(vrrp_rt *vrrp) { int status = 1; u_char ll_addr[ETH_ALEN] = {0x00, 0x00, 0x5e, 0x00, 0x01, vrrp->vrid}; struct { struct nlmsghdr n; struct ifinfomsg ifi; char buf[256]; } req; memset(&req, 0, sizeof (req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof (struct ifinfomsg)); req.n.nlmsg_flags = NLM_F_REQUEST; req.n.nlmsg_type = RTM_NEWLINK; req.ifi.ifi_family = AF_INET; req.ifi.ifi_index = IF_INDEX(vrrp->xmit_ifp); addattr_l(&req.n, sizeof(req), IFLA_ADDRESS, ll_addr, ETH_ALEN); if (netlink_talk(&nl_cmd, &req.n) < 0) status = -1; else memcpy(vrrp->xmit_ifp->hw_addr, ll_addr, ETH_ALEN); return status; }
int kernel_interface_set_master(struct interface *master, struct interface *slave) { struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT); struct { struct nlmsghdr n; struct ifinfomsg ifa; char buf[NL_PKT_BUF_SIZE]; } req; memset(&req, 0, sizeof req); req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); req.n.nlmsg_flags = NLM_F_REQUEST; req.n.nlmsg_type = RTM_SETLINK; req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid; req.ifa.ifi_index = slave->ifindex; addattr_l(&req.n, sizeof req, IFLA_MASTER, &master->ifindex, 4); addattr_l(&req.n, sizeof req, IFLA_LINK, &slave->ifindex, 4); return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, 0); }
/* Interface address modification. */ static int netlink_address(int cmd, int family, struct interface *ifp, struct connected *ifc) { int bytelen; struct prefix *p; struct { struct nlmsghdr n; struct ifaddrmsg ifa; char buf[NL_PKT_BUF_SIZE]; } req; struct zebra_ns *zns; if (vrf_is_backend_netns()) zns = zebra_ns_lookup((ns_id_t)ifp->vrf_id); else zns = zebra_ns_lookup(NS_DEFAULT); p = ifc->address; memset(&req, 0, sizeof req - NL_PKT_BUF_SIZE); bytelen = (family == AF_INET ? 4 : 16); req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)); req.n.nlmsg_flags = NLM_F_REQUEST; req.n.nlmsg_type = cmd; req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid; req.ifa.ifa_family = family; req.ifa.ifa_index = ifp->ifindex; addattr_l(&req.n, sizeof req, IFA_LOCAL, &p->u.prefix, bytelen); if (family == AF_INET) { if (CONNECTED_PEER(ifc)) { p = ifc->destination; addattr_l(&req.n, sizeof req, IFA_ADDRESS, &p->u.prefix, bytelen); } else if (cmd == RTM_NEWADDR && ifc->destination) { p = ifc->destination; addattr_l(&req.n, sizeof req, IFA_BROADCAST, &p->u.prefix, bytelen); } } /* p is now either ifc->address or ifc->destination */ req.ifa.ifa_prefixlen = p->prefixlen; if (CHECK_FLAG(ifc->flags, ZEBRA_IFA_SECONDARY)) SET_FLAG(req.ifa.ifa_flags, IFA_F_SECONDARY); if (ifc->label) addattr_l(&req.n, sizeof req, IFA_LABEL, ifc->label, strlen(ifc->label) + 1); return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, 0); }
int netlink_link_add_vmac(vrrp_rt *vrrp) { struct rtattr *linkinfo; interface *ifp; char ifname[IFNAMSIZ]; struct { struct nlmsghdr n; struct ifinfomsg ifi; char buf[256]; } req; if (!vrrp->ifp) return -1; memset(&req, 0, sizeof (req)); memset(ifname, 0, IFNAMSIZ); strncpy(ifname, vrrp->vmac_ifname, IFNAMSIZ - 1); req.n.nlmsg_len = NLMSG_LENGTH(sizeof (struct ifinfomsg)); req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; req.n.nlmsg_type = RTM_NEWLINK; req.ifi.ifi_family = AF_INET; /* macvlan settings */ linkinfo = NLMSG_TAIL(&req.n); addattr_l(&req.n, sizeof(req), IFLA_LINKINFO, NULL, 0); addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, (void *)ll_kind, strlen(ll_kind)); linkinfo->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)linkinfo; addattr_l(&req.n, sizeof(req), IFLA_LINK, &IF_INDEX(vrrp->ifp), sizeof(uint32_t)); addattr_l(&req.n, sizeof(req), IFLA_IFNAME, ifname, strlen(ifname)); if (netlink_talk(&nl_cmd, &req.n) < 0) return -1; /* * Update interface queue and vrrp instance interface binding. * bring it UP ! */ netlink_interface_lookup(); ifp = if_get_by_ifname(ifname); if (!ifp) return -1; vrrp->ifp = ifp; vrrp->vmac |= 2; netlink_link_setlladdr(vrrp); netlink_link_up(vrrp); /* * By default MACVLAN interface are in VEPA mode which filters * out received packets whose MAC source address matches that * of the MACVLAN interface. Setting MACVLAN interface in private * mode will not filter based on source MAC address. */ netlink_link_setmode(vrrp); return 1; }
/* Routing table change via netlink interface. */ int netlink_route (int cmd, int family, void *dest, int length, void *gate, int index, int zebra_flags, int table) { int ret; int bytelen; struct sockaddr_nl snl; struct { struct nlmsghdr n; struct rtmsg r; char buf[1024]; } req; memset (&req, 0, sizeof req); bytelen = (family == AF_INET ? 4 : 16); req.n.nlmsg_len = NLMSG_LENGTH (sizeof (struct rtmsg)); req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; req.n.nlmsg_type = cmd; req.r.rtm_family = family; req.r.rtm_table = table; req.r.rtm_dst_len = length; if (cmd == RTM_NEWROUTE) { req.r.rtm_protocol = RTPROT_ZEBRA; req.r.rtm_scope = RT_SCOPE_UNIVERSE; if (zebra_flags & ZEBRA_FLAG_BLACKHOLE) req.r.rtm_type = RTN_BLACKHOLE; else req.r.rtm_type = RTN_UNICAST; } if (gate) addattr_l (&req.n, sizeof req, RTA_GATEWAY, gate, bytelen); if (dest) addattr_l (&req.n, sizeof req, RTA_DST, dest, bytelen); if (index > 0) addattr32 (&req.n, sizeof req, RTA_OIF, index); /* Destination netlink address. */ memset (&snl, 0, sizeof snl); snl.nl_family = AF_NETLINK; /* Talk to netlink socket. */ ret = netlink_talk (&req.n, &netlink); if (ret < 0) return -1; return 0; }
/* Interface address modification. */ static int netlink_address (int cmd, int family, struct interface *ifp, struct connected *ifc) { int bytelen; struct prefix *p; struct { struct nlmsghdr n; struct ifaddrmsg ifa; char buf[NL_PKT_BUF_SIZE]; } req; p = ifc->address; memset (&req, 0, sizeof req); bytelen = (family == AF_INET ? 4 : 16); req.n.nlmsg_len = NLMSG_LENGTH (sizeof (struct ifaddrmsg)); req.n.nlmsg_flags = NLM_F_REQUEST; req.n.nlmsg_type = cmd; req.ifa.ifa_family = family; req.ifa.ifa_index = ifp->ifindex; req.ifa.ifa_prefixlen = p->prefixlen; addattr_l (&req.n, sizeof req, IFA_LOCAL, &p->u.prefix, bytelen); if (family == AF_INET && cmd == RTM_NEWADDR) { if (!CONNECTED_PEER(ifc) && ifc->destination) { p = ifc->destination; addattr_l (&req.n, sizeof req, IFA_BROADCAST, &p->u.prefix, bytelen); } } if (CHECK_FLAG (ifc->flags, KROUTE_IFA_SECONDARY)) SET_FLAG (req.ifa.ifa_flags, IFA_F_SECONDARY); if (ifc->label) addattr_l (&req.n, sizeof req, IFA_LABEL, ifc->label, strlen (ifc->label) + 1); return netlink_talk (&req.n, &netlink_cmd); }
static int netlink_link_setmode(vrrp_rt *vrrp) { int status = 1; struct { struct nlmsghdr n; struct ifinfomsg ifi; char buf[256]; } req; struct rtattr *linkinfo; struct rtattr *data; memset(&req, 0, sizeof (req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof (struct ifinfomsg)); req.n.nlmsg_flags = NLM_F_REQUEST; req.n.nlmsg_type = RTM_NEWLINK; req.ifi.ifi_family = AF_INET; req.ifi.ifi_index = IF_INDEX(vrrp->xmit_ifp); linkinfo = NLMSG_TAIL(&req.n); addattr_l(&req.n, sizeof(req), IFLA_LINKINFO, NULL, 0); addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, (void *)ll_kind, strlen(ll_kind)); data = NLMSG_TAIL(&req.n); addattr_l(&req.n, sizeof(req), IFLA_INFO_DATA, NULL, 0); /* * In private mode, macvlan will receive frames with same MAC addr * as configured on the interface. */ #ifndef MACVLAN_MODE_VRRP #define MACVLAN_MODE_VRRP 16 #endif addattr32(&req.n, sizeof(req), IFLA_MACVLAN_MODE, MACVLAN_MODE_VRRP); data->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)data; linkinfo->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)linkinfo; if (netlink_talk(&nl_cmd, &req.n) < 0) status = -1; return status; }
/* Add/Delete IP rule to/from a specific IP/network */ int netlink_rule(ip_rule_t *iprule, int cmd) { int status = 1; struct { struct nlmsghdr n; struct rtmsg r; char buf[1024]; } req; memset(&req, 0, sizeof (req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; req.n.nlmsg_type = cmd ? RTM_NEWRULE : RTM_DELRULE; req.r.rtm_family = IP_FAMILY(iprule->addr); if (iprule->table < 256) req.r.rtm_table = iprule->table ? iprule->table : RT_TABLE_MAIN; else { req.r.rtm_table = RT_TABLE_UNSPEC; addattr32(&req.n, sizeof(req), FRA_TABLE, iprule->table); } req.r.rtm_type = RTN_UNSPEC; req.r.rtm_scope = RT_SCOPE_UNIVERSE; req.r.rtm_flags = 0; if (cmd) { req.r.rtm_protocol = RTPROT_BOOT; req.r.rtm_type = RTN_UNICAST; } /* Set rule entry */ if (iprule->dir == VRRP_RULE_FROM) { req.r.rtm_src_len = iprule->mask; add_addr2req(&req.n, sizeof(req), FRA_SRC, iprule->addr); } else if (iprule->dir == VRRP_RULE_TO) { req.r.rtm_dst_len = iprule->mask; add_addr2req(&req.n, sizeof(req), FRA_DST, iprule->addr); } if (netlink_talk(&nl_cmd, &req.n) < 0) status = -1; return status; }
int netlink_link_del_vmac(vrrp_t *vrrp) { int status = 1; interface_t *base_ifp ; struct { struct nlmsghdr n; struct ifinfomsg ifi; char buf[256]; } req; if (!vrrp->ifp) return -1; /* Reset arp_ignore and arp_filter on the base interface if necessary */ if (vrrp->family == AF_INET) { base_ifp = if_get_by_ifindex(vrrp->ifp->base_ifindex); if (base_ifp) reset_interface_parameters(base_ifp); else log_message(LOG_INFO, "Unable to find base interface for vrrp instance %s", vrrp->iname); } memset(&req, 0, sizeof (req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof (struct ifinfomsg)); req.n.nlmsg_flags = NLM_F_REQUEST; req.n.nlmsg_type = RTM_DELLINK; req.ifi.ifi_family = AF_INET; req.ifi.ifi_index = vrrp->vmac_ifindex; if (netlink_talk(&nl_cmd, &req.n) < 0) { log_message(LOG_INFO, "vmac: Error removing VMAC interface %s for vrrp_instance %s!!!" , vrrp->vmac_ifname, vrrp->iname); return -1; } log_message(LOG_INFO, "vmac: Success removing VMAC interface %s for vrrp_instance %s" , vrrp->vmac_ifname, vrrp->iname); return status; }
static inline int netlink_set_interface_flags(int ifindex, const sysctl_opts_t *sys_opts) { int status = 0; struct { struct nlmsghdr n; struct ifinfomsg ifi; char buf[64]; } req; struct nlattr *start; struct nlattr *inet_start; struct nlattr *conf_start; const sysctl_opts_t *so; memset(&req, 0, sizeof (req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof (struct ifinfomsg)); req.n.nlmsg_flags = NLM_F_REQUEST; req.n.nlmsg_type = RTM_NEWLINK; req.ifi.ifi_family = AF_UNSPEC; req.ifi.ifi_index = ifindex; start = nest_start(&req.n, IFLA_AF_SPEC); inet_start = nest_start(&req.n, AF_INET); conf_start = nest_start(&req.n, IFLA_INET_CONF); for (so = sys_opts; so->param; so++) addattr32(&req.n, sizeof req, so->param, so->value); nest_end(NLMSG_TAIL(&req.n), conf_start); nest_end(NLMSG_TAIL(&req.n), inet_start); nest_end(NLMSG_TAIL(&req.n), start); if (netlink_talk(&nl_cmd, &req.n) < 0) status = 1; return status; }
/* Routing table change via netlink interface. */ int netlink_route_multipath (int cmd, struct prefix *p, struct rib *rib, int family) { int bytelen; struct sockaddr_nl snl; struct nexthop *nexthop = NULL; int nexthop_num = 0; struct nlsock *nl; struct { struct nlmsghdr n; struct rtmsg r; char buf[1024]; } req; memset (&req, 0, sizeof req); bytelen = (family == AF_INET ? 4 : 16); req.n.nlmsg_len = NLMSG_LENGTH (sizeof (struct rtmsg)); req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; req.n.nlmsg_type = cmd; req.r.rtm_family = family; req.r.rtm_table = rib->table; req.r.rtm_dst_len = p->prefixlen; if (cmd == RTM_NEWROUTE) { req.r.rtm_protocol = RTPROT_ZEBRA; req.r.rtm_scope = RT_SCOPE_UNIVERSE; if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_BLACKHOLE)) req.r.rtm_type = RTN_BLACKHOLE; else req.r.rtm_type = RTN_UNICAST; } addattr_l (&req.n, sizeof req, RTA_DST, &p->u.prefix, bytelen); /* Metric. */ addattr32 (&req.n, sizeof req, RTA_PRIORITY, rib->metric); /* Multipath case. */ if (rib->nexthop_active_num == 1 || MULTIPATH_NUM == 1) { for (nexthop = rib->nexthop; nexthop; nexthop = nexthop->next) { if ((cmd == RTM_NEWROUTE && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ACTIVE)) || (cmd == RTM_DELROUTE && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB))) { if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) { if (nexthop->rtype == NEXTHOP_TYPE_IPV4 || nexthop->rtype == NEXTHOP_TYPE_IPV4_IFINDEX) addattr_l (&req.n, sizeof req, RTA_GATEWAY, &nexthop->rgate.ipv4, bytelen); #ifdef HAVE_IPV6 if (nexthop->rtype == NEXTHOP_TYPE_IPV6 || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFINDEX || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFNAME) addattr_l (&req.n, sizeof req, RTA_GATEWAY, &nexthop->rgate.ipv6, bytelen); #endif /* HAVE_IPV6 */ if (nexthop->rtype == NEXTHOP_TYPE_IFINDEX || nexthop->rtype == NEXTHOP_TYPE_IFNAME || nexthop->rtype == NEXTHOP_TYPE_IPV4_IFINDEX || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFINDEX || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFNAME) addattr32 (&req.n, sizeof req, RTA_OIF, nexthop->rifindex); } else { if (nexthop->type == NEXTHOP_TYPE_IPV4 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) addattr_l (&req.n, sizeof req, RTA_GATEWAY, &nexthop->gate.ipv4, bytelen); #ifdef HAVE_IPV6 if (nexthop->type == NEXTHOP_TYPE_IPV6 || nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) addattr_l (&req.n, sizeof req, RTA_GATEWAY, &nexthop->gate.ipv6, bytelen); #endif /* HAVE_IPV6 */ if (nexthop->type == NEXTHOP_TYPE_IFINDEX || nexthop->type == NEXTHOP_TYPE_IFNAME || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX || nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME) addattr32 (&req.n, sizeof req, RTA_OIF, nexthop->ifindex); } if (cmd == RTM_NEWROUTE) SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB); nexthop_num++; break; } } } else { char buf[1024]; struct rtattr *rta = (void *) buf; struct rtnexthop *rtnh; rta->rta_type = RTA_MULTIPATH; rta->rta_len = RTA_LENGTH(0); rtnh = RTA_DATA(rta); nexthop_num = 0; for (nexthop = rib->nexthop; nexthop && (MULTIPATH_NUM == 0 || nexthop_num < MULTIPATH_NUM); nexthop = nexthop->next) { if ((cmd == RTM_NEWROUTE && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ACTIVE)) || (cmd == RTM_DELROUTE && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB))) { nexthop_num++; rtnh->rtnh_len = sizeof (*rtnh); rtnh->rtnh_flags = 0; rtnh->rtnh_hops = 0; rta->rta_len += rtnh->rtnh_len; if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) { if (nexthop->rtype == NEXTHOP_TYPE_IPV4 || nexthop->rtype == NEXTHOP_TYPE_IPV4_IFINDEX) { rta_addattr_l (rta, 4096, RTA_GATEWAY, &nexthop->rgate.ipv4, bytelen); rtnh->rtnh_len += sizeof (struct rtattr) + 4; } #ifdef HAVE_IPV6 if (nexthop->rtype == NEXTHOP_TYPE_IPV6 || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFNAME || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFINDEX) rta_addattr_l (rta, 4096, RTA_GATEWAY, &nexthop->rgate.ipv6, bytelen); #endif /* HAVE_IPV6 */ /* ifindex */ if (nexthop->rtype == NEXTHOP_TYPE_IFINDEX || nexthop->rtype == NEXTHOP_TYPE_IFNAME || nexthop->rtype == NEXTHOP_TYPE_IPV4_IFINDEX || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFINDEX || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFNAME) rtnh->rtnh_ifindex = nexthop->rifindex; else rtnh->rtnh_ifindex = 0; } else { if (nexthop->type == NEXTHOP_TYPE_IPV4 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) { rta_addattr_l (rta, 4096, RTA_GATEWAY, &nexthop->gate.ipv4, bytelen); rtnh->rtnh_len += sizeof (struct rtattr) + 4; } #ifdef HAVE_IPV6 if (nexthop->type == NEXTHOP_TYPE_IPV6 || nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) rta_addattr_l (rta, 4096, RTA_GATEWAY, &nexthop->gate.ipv6, bytelen); #endif /* HAVE_IPV6 */ /* ifindex */ if (nexthop->type == NEXTHOP_TYPE_IFINDEX || nexthop->type == NEXTHOP_TYPE_IFNAME || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX || nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) rtnh->rtnh_ifindex = nexthop->ifindex; else rtnh->rtnh_ifindex = 0; } rtnh = RTNH_NEXT(rtnh); if (cmd == RTM_NEWROUTE) SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB); } } if (rta->rta_len > RTA_LENGTH (0)) addattr_l (&req.n, 1024, RTA_MULTIPATH, RTA_DATA(rta), RTA_PAYLOAD(rta)); } /* If there is no useful nexthop then return. */ if (nexthop_num == 0) { if (IS_ZEBRA_DEBUG_KERNEL) zlog_info ("netlink_route_multipath(): No useful nexthop."); return 0; } /* Destination netlink address. */ memset (&snl, 0, sizeof snl); snl.nl_family = AF_NETLINK; if (family == AF_INET) nl = &netlink_cmd; else nl = &netlink; /* Talk to netlink socket. */ return netlink_talk (&req.n, nl); }
int kernel_route(int operation, const unsigned char *dest, unsigned short plen, const unsigned char *gate, int ifindex, unsigned int metric, const unsigned char *newgate, int newifindex, unsigned int newmetric) { union { char raw[1024]; struct nlmsghdr nh; } buf; struct rtmsg *rtm; struct rtattr *rta; int len = sizeof(buf.raw); int rc, ipv4; if(!nl_setup) { fprintf(stderr,"kernel_route: netlink not initialized.\n"); errno = EIO; return -1; } /* if the socket has been closed after an IO error, */ /* we try to re-open it. */ if(nl_command.sock < 0) { rc = netlink_socket(&nl_command, 0); if(rc < 0) { int olderrno = errno; perror("kernel_route: netlink_socket()"); errno = olderrno; return -1; } } /* Check that the protocol family is consistent. */ if(plen >= 96 && v4mapped(dest)) { if(!v4mapped(gate)) { errno = EINVAL; return -1; } } else { if(v4mapped(gate)) { errno = EINVAL; return -1; } } ipv4 = v4mapped(gate); if(operation == ROUTE_MODIFY) { if(newmetric == metric && memcmp(newgate, gate, 16) == 0 && newifindex == ifindex) return 0; /* It would be better to add the new route before removing the old one, to avoid losing packets. However, this causes problems with non-multipath kernels, which sometimes silently fail the request, causing "stuck" routes. Let's stick with the naive approach, and hope that the window is small enough to be negligible. */ kernel_route(ROUTE_FLUSH, dest, plen, gate, ifindex, metric, NULL, 0, 0); rc = kernel_route(ROUTE_ADD, dest, plen, newgate, newifindex, newmetric, NULL, 0, 0); if(rc < 0) { if(errno == EEXIST) rc = 1; /* Should we try to re-install the flushed route on failure? Error handling is hard. */ } return rc; } kdebugf("kernel_route: %s %s/%d metric %d dev %d nexthop %s\n", operation == ROUTE_ADD ? "add" : operation == ROUTE_FLUSH ? "flush" : "???", format_address(dest), plen, metric, ifindex, format_address(gate)); /* Unreachable default routes cause all sort of weird interactions; ignore them. */ if(metric >= KERNEL_INFINITY && (plen == 0 || (ipv4 && plen == 96))) return 0; memset(buf.raw, 0, sizeof(buf.raw)); if(operation == ROUTE_ADD) { buf.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; buf.nh.nlmsg_type = RTM_NEWROUTE; } else { buf.nh.nlmsg_flags = NLM_F_REQUEST; buf.nh.nlmsg_type = RTM_DELROUTE; } rtm = NLMSG_DATA(&buf.nh); rtm->rtm_family = ipv4 ? AF_INET : AF_INET6; rtm->rtm_dst_len = ipv4 ? plen - 96 : plen; rtm->rtm_table = export_table; rtm->rtm_scope = RT_SCOPE_UNIVERSE; if(metric < KERNEL_INFINITY) rtm->rtm_type = RTN_UNICAST; else rtm->rtm_type = RTN_UNREACHABLE; rtm->rtm_protocol = RTPROT_BABEL; rtm->rtm_flags |= RTNH_F_ONLINK; rta = RTM_RTA(rtm); if(ipv4) { rta = RTA_NEXT(rta, len); rta->rta_len = RTA_LENGTH(sizeof(struct in_addr)); rta->rta_type = RTA_DST; memcpy(RTA_DATA(rta), dest + 12, sizeof(struct in_addr)); } else { rta = RTA_NEXT(rta, len); rta->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); rta->rta_type = RTA_DST; memcpy(RTA_DATA(rta), dest, sizeof(struct in6_addr)); } rta = RTA_NEXT(rta, len); rta->rta_len = RTA_LENGTH(sizeof(int)); rta->rta_type = RTA_PRIORITY; if(metric < KERNEL_INFINITY) { *(int*)RTA_DATA(rta) = metric; rta = RTA_NEXT(rta, len); rta->rta_len = RTA_LENGTH(sizeof(int)); rta->rta_type = RTA_OIF; *(int*)RTA_DATA(rta) = ifindex; if(ipv4) { rta = RTA_NEXT(rta, len); rta->rta_len = RTA_LENGTH(sizeof(struct in_addr)); rta->rta_type = RTA_GATEWAY; memcpy(RTA_DATA(rta), gate + 12, sizeof(struct in_addr)); } else { rta = RTA_NEXT(rta, len); rta->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); rta->rta_type = RTA_GATEWAY; memcpy(RTA_DATA(rta), gate, sizeof(struct in6_addr)); } } else { *(int*)RTA_DATA(rta) = -1; } buf.nh.nlmsg_len = (char*)rta + rta->rta_len - buf.raw; return netlink_talk(&buf.nh); }
/* Install or uninstall specified rule for a specific interface. * Form netlink message and ship it. Currently, notify status after * waiting for netlink status. */ static int netlink_rule_update(int cmd, struct zebra_pbr_rule *rule) { int family; int bytelen; struct { struct nlmsghdr n; struct fib_rule_hdr frh; char buf[NL_PKT_BUF_SIZE]; } req; struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT); struct sockaddr_nl snl; char buf1[PREFIX_STRLEN]; char buf2[PREFIX_STRLEN]; memset(&req, 0, sizeof(req) - NL_PKT_BUF_SIZE); family = PREFIX_FAMILY(&rule->rule.filter.src_ip); bytelen = (family == AF_INET ? 4 : 16); req.n.nlmsg_type = cmd; req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); req.n.nlmsg_flags = NLM_F_REQUEST; req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid; req.frh.family = family; req.frh.action = FR_ACT_TO_TBL; /* rule's pref # */ addattr32(&req.n, sizeof(req), FRA_PRIORITY, rule->rule.priority); /* interface on which applied */ if (rule->ifp) addattr_l(&req.n, sizeof(req), FRA_IFNAME, rule->ifp->name, strlen(rule->ifp->name) + 1); /* source IP, if specified */ if (IS_RULE_FILTERING_ON_SRC_IP(rule)) { req.frh.src_len = rule->rule.filter.src_ip.prefixlen; addattr_l(&req.n, sizeof(req), FRA_SRC, &rule->rule.filter.src_ip.u.prefix, bytelen); } /* destination IP, if specified */ if (IS_RULE_FILTERING_ON_DST_IP(rule)) { req.frh.dst_len = rule->rule.filter.dst_ip.prefixlen; addattr_l(&req.n, sizeof(req), FRA_DST, &rule->rule.filter.dst_ip.u.prefix, bytelen); } /* fwmark, if specified */ if (IS_RULE_FILTERING_ON_FWMARK(rule)) { addattr32(&req.n, sizeof(req), FRA_FWMARK, rule->rule.filter.fwmark); } /* Route table to use to forward, if filter criteria matches. */ if (rule->rule.action.table < 256) req.frh.table = rule->rule.action.table; else { req.frh.table = RT_TABLE_UNSPEC; addattr32(&req.n, sizeof(req), FRA_TABLE, rule->rule.action.table); } if (IS_ZEBRA_DEBUG_KERNEL) zlog_debug( "Tx %s family %s IF %s(%u) Pref %u Fwmark %u Src %s Dst %s Table %u", nl_msg_type_to_str(cmd), nl_family_to_str(family), rule->ifp ? rule->ifp->name : "Unknown", rule->ifp ? rule->ifp->ifindex : 0, rule->rule.priority, rule->rule.filter.fwmark, prefix2str(&rule->rule.filter.src_ip, buf1, sizeof(buf1)), prefix2str(&rule->rule.filter.dst_ip, buf2, sizeof(buf2)), rule->rule.action.table); /* Ship off the message. * Note: Currently, netlink_talk() is a blocking call which returns * back the status. */ memset(&snl, 0, sizeof(snl)); snl.nl_family = AF_NETLINK; return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, 0); }
int netlink_link_add_vmac(vrrp_rt *vrrp) { struct rtattr *linkinfo; interface *ifp; char ifname[IFNAMSIZ]; struct { struct nlmsghdr n; struct ifinfomsg ifi; char buf[256]; } req; if (!vrrp->ifp) return -1; memset(&req, 0, sizeof (req)); memset(ifname, 0, IFNAMSIZ); strncpy(ifname, vrrp->vmac_ifname, IFNAMSIZ - 1); /* * Check to see if this vmac interface was created * by a previous instance. */ if (reload && (ifp = if_get_by_ifname(ifname))) { vrrp->xmit_ifp = ifp; /* Save ifindex for use on delete */ vrrp->vmac_ifindex = IF_INDEX(vrrp->xmit_ifp); vrrp->vmac |= 2; return 1; } req.n.nlmsg_len = NLMSG_LENGTH(sizeof (struct ifinfomsg)); req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; req.n.nlmsg_type = RTM_NEWLINK; req.ifi.ifi_family = AF_INET; /* macvlan settings */ linkinfo = NLMSG_TAIL(&req.n); addattr_l(&req.n, sizeof(req), IFLA_LINKINFO, NULL, 0); addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, (void *)ll_kind, strlen(ll_kind)); linkinfo->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)linkinfo; addattr_l(&req.n, sizeof(req), IFLA_LINK, &IF_INDEX(vrrp->ifp), sizeof(uint32_t)); addattr_l(&req.n, sizeof(req), IFLA_IFNAME, ifname, strlen(ifname)); if (netlink_talk(&nl_cmd, &req.n) < 0) return -1; /* * Update interface queue and vrrp instance interface binding. * bring it UP ! */ netlink_interface_lookup(); ifp = if_get_by_ifname(ifname); if (!ifp) return -1; vrrp->xmit_ifp = ifp; vrrp->vmac_ifindex = IF_INDEX(vrrp->xmit_ifp); /* For use on delete */ vrrp->vmac |= 2; netlink_link_setlladdr(vrrp); vyatta_if_setup(ifname); netlink_link_up(vrrp); netlink_link_setmode(vrrp); return 1; }
/* Add/Delete IP route to/from a specific interface */ static int netlink_route(ip_route_t *iproute, int cmd) { int status = 1; struct { struct nlmsghdr n; struct rtmsg r; char buf[1024]; } req; char buf[1024]; struct rtattr *rta = (void*)buf; struct rtnexthop *rtnh; memset(&req, 0, sizeof (req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; req.n.nlmsg_type = cmd ? RTM_NEWROUTE : RTM_DELROUTE; req.r.rtm_family = (iproute->dst) ? IP_FAMILY(iproute->dst) : (iproute->src) ? IP_FAMILY(iproute->src) : AF_INET; if (iproute->table < 256) req.r.rtm_table = iproute->table ? iproute->table : RT_TABLE_MAIN; else { req.r.rtm_table = RT_TABLE_UNSPEC; addattr32(&req.n, sizeof(req), RTA_TABLE, iproute->table); } req.r.rtm_scope = RT_SCOPE_NOWHERE; if (cmd == IPROUTE_ADD) { req.r.rtm_protocol = RTPROT_BOOT; req.r.rtm_scope = iproute->scope; req.r.rtm_type = RTN_UNICAST; } if (iproute->blackhole) req.r.rtm_type = RTN_BLACKHOLE; /* Set routing entry */ if (iproute->dst) { req.r.rtm_dst_len = iproute->dmask; add_addr2req(&req.n, sizeof(req), RTA_DST, iproute->dst); } if ((!iproute->blackhole) && (!iproute->gw2)) add_addr2req(&req.n, sizeof(req), RTA_GATEWAY, iproute->gw); if (iproute->gw && iproute->gw2) { rta->rta_type = RTA_MULTIPATH; rta->rta_len = RTA_LENGTH(0); rtnh = RTA_DATA(rta); #define MULTIPATH_ADD_GW(x) \ memset(rtnh, 0, sizeof(*rtnh)); \ rtnh->rtnh_len = sizeof(*rtnh); \ if (iproute->index) rtnh->rtnh_ifindex = iproute->index; \ rta->rta_len += rtnh->rtnh_len; \ add_addr2rta(rta, 1024, RTA_GATEWAY, x); \ rtnh->rtnh_len += sizeof(struct rtattr) + IP_SIZE(x); \ rtnh = RTNH_NEXT(rtnh); MULTIPATH_ADD_GW(iproute->gw); MULTIPATH_ADD_GW(iproute->gw2); addattr_l(&req.n, sizeof(req), RTA_MULTIPATH, RTA_DATA(rta), RTA_PAYLOAD(rta)); } if ((iproute->index) && (!iproute->gw2)) addattr32(&req.n, sizeof(req), RTA_OIF, iproute->index); if (iproute->src) add_addr2req(&req.n, sizeof(req), RTA_PREFSRC, iproute->src); if (iproute->metric) addattr32(&req.n, sizeof(req), RTA_PRIORITY, iproute->metric); /* This returns ESRCH if the address of via address doesn't exist */ /* ENETDOWN if dev p33p1.40 for example is down */ if (netlink_talk(&nl_cmd, &req.n) < 0) status = -1; return status; }
/* Add/Delete IP address to a specific interface_t */ int netlink_ipaddress(ip_address_t *ipaddress, int cmd) { struct ifa_cacheinfo cinfo; int status = 1; struct { struct nlmsghdr n; struct ifaddrmsg ifa; char buf[256]; } req; memset(&req, 0, sizeof (req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof (struct ifaddrmsg)); req.n.nlmsg_flags = NLM_F_REQUEST; req.n.nlmsg_type = (cmd == IPADDRESS_DEL) ? RTM_DELADDR : RTM_NEWADDR; req.ifa = ipaddress->ifa; if (IP_IS6(ipaddress)) { if (cmd == IPADDRESS_ADD) { /* Mark IPv6 address as deprecated (rfc3484) in order to prevent * using VRRP VIP as source address in healthchecking use cases. */ if (ipaddress->ifa.ifa_prefixlen == 128) { memset(&cinfo, 0, sizeof(cinfo)); cinfo.ifa_prefered = 0; cinfo.ifa_valid = INFINITY_LIFE_TIME; log_message(LOG_INFO, "%s has a prefix length of 128, setting " "preferred_lft to 0", ipaddresstos(NULL, ipaddress)); addattr_l(&req.n, sizeof(req), IFA_CACHEINFO, &cinfo, sizeof(cinfo)); } /* Disable, per VIP, Duplicate Address Detection algorithm (DAD). * Using the nodad flag has the following benefits: * * (1) The address becomes immediately usable after they're * configured. * (2) In the case of a temporary layer-2 / split-brain problem * we can avoid that the active VIP transitions into the * dadfailed phase and stays there forever - leaving us * without service. HA/VRRP setups have their own "DAD"-like * functionality, so it's not really needed from the IPv6 stack. */ #ifdef IFA_F_NODAD /* Since Linux 2.6.19 */ req.ifa.ifa_flags |= IFA_F_NODAD; #endif } addattr_l(&req.n, sizeof(req), IFA_LOCAL, &ipaddress->u.sin6_addr, sizeof(ipaddress->u.sin6_addr)); } else { addattr_l(&req.n, sizeof(req), IFA_LOCAL, &ipaddress->u.sin.sin_addr, sizeof(ipaddress->u.sin.sin_addr)); if (cmd == IPADDRESS_ADD) { if (ipaddress->u.sin.sin_brd.s_addr) addattr_l(&req.n, sizeof(req), IFA_BROADCAST, &ipaddress->u.sin.sin_brd, sizeof(ipaddress->u.sin.sin_brd)); } else { /* IPADDRESS_DEL */ addattr_l(&req.n, sizeof(req), IFA_ADDRESS, &ipaddress->u.sin.sin_addr, sizeof(ipaddress->u.sin.sin_addr)); } } if (cmd == IPADDRESS_ADD) if (ipaddress->label) addattr_l(&req.n, sizeof (req), IFA_LABEL, ipaddress->label, strlen(ipaddress->label) + 1); if (netlink_talk(&nl_cmd, &req.n) < 0) status = -1; return status; }
static int netlink_receive(struct netlink_fd *fd, struct nlmsghdr *reply) { struct sockaddr_nl nladdr; struct iovec iov; struct msghdr msg = { .msg_name = &nladdr, .msg_namelen = sizeof(nladdr), .msg_iov = &iov, .msg_iovlen = 1, }; int got_reply = FALSE, len; char buf[16*1024]; iov.iov_base = buf; while (!got_reply) { int status; struct nlmsghdr *h; iov.iov_len = sizeof(buf); status = recvmsg(fd->fd, &msg, MSG_DONTWAIT); if (status < 0) { if (errno == EINTR) continue; if (errno == EAGAIN) return reply == NULL; fprintf(stderr, "Netlink overrun\n"); continue; } if (status == 0) { fprintf(stderr, "Netlink returned EOF\n"); return FALSE; } h = (struct nlmsghdr *) buf; while (NLMSG_OK(h, status)) { if (reply != NULL && h->nlmsg_seq == reply->nlmsg_seq) { len = h->nlmsg_len; if (len > reply->nlmsg_len) { fprintf(stderr, "Netlink message " "truncated\n"); len = reply->nlmsg_len; } memcpy(reply, h, len); got_reply = TRUE; } else if (h->nlmsg_type != NLMSG_DONE) { fprintf(stderr, "Unknown NLmsg: 0x%08x, len %d\n", h->nlmsg_type, h->nlmsg_len); } h = NLMSG_NEXT(h, status); } } return TRUE; } static int netlink_send(struct netlink_fd *fd, struct nlmsghdr *req) { struct sockaddr_nl nladdr; struct iovec iov = { .iov_base = (void*) req, .iov_len = req->nlmsg_len }; struct msghdr msg = { .msg_name = &nladdr, .msg_namelen = sizeof(nladdr), .msg_iov = &iov, .msg_iovlen = 1, }; int status; memset(&nladdr, 0, sizeof(nladdr)); nladdr.nl_family = AF_NETLINK; req->nlmsg_seq = ++fd->seq; status = sendmsg(fd->fd, &msg, 0); if (status < 0) { fprintf(stderr, "Cannot talk to rtnetlink\n"); return FALSE; } return TRUE; } static int netlink_talk(struct nlmsghdr *req, size_t replysize, struct nlmsghdr *reply) { struct netlink_fd fd; int ret = FALSE; if (!netlink_open(&fd)) return FALSE; if (reply == NULL) req->nlmsg_flags |= NLM_F_ACK; if (!netlink_send(&fd, req)) goto out; if (reply != NULL) { reply->nlmsg_len = replysize; ret = netlink_receive(&fd, reply); } else { ret = TRUE; } out: netlink_close(&fd); return ret; } int netlink_route_get(struct sockaddr *dst, u_int16_t *mtu, char *ifname) { struct { struct nlmsghdr n; union { struct rtmsg r; struct ifinfomsg i; }; char buf[1024]; } req; struct rtmsg *r = NLMSG_DATA(&req.n); struct rtattr *rta[RTA_MAX+1]; struct rtattr *rtax[RTAX_MAX+1]; struct rtattr *ifla[IFLA_MAX+1]; int index; memset(&req, 0, sizeof(req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); req.n.nlmsg_flags = NLM_F_REQUEST; req.n.nlmsg_type = RTM_GETROUTE; req.r.rtm_family = dst->sa_family; netlink_add_rtaddr_l(&req.n, sizeof(req), RTA_DST, dst); req.r.rtm_dst_len = 32; if (!netlink_talk(&req.n, sizeof(req), &req.n)) return FALSE; netlink_parse_rtattr(rta, RTA_MAX, RTM_RTA(r), RTM_PAYLOAD(&req.n)); if (mtu != NULL) { if (rta[RTA_METRICS] == NULL) return FALSE; netlink_parse_rtattr(rtax, RTAX_MAX, RTA_DATA(rta[RTA_METRICS]), RTA_PAYLOAD(rta[RTA_METRICS])); if (rtax[RTAX_MTU] == NULL) return FALSE; *mtu = *(int*) RTA_DATA(rtax[RTAX_MTU]); } if (ifname != NULL) { if (rta[RTA_OIF] == NULL) return FALSE; index = *(int*) RTA_DATA(rta[RTA_OIF]); memset(&req, 0, sizeof(req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); req.n.nlmsg_flags = NLM_F_REQUEST; req.n.nlmsg_type = RTM_GETLINK; req.i.ifi_index = index; if (!netlink_talk(&req.n, sizeof(req), &req.n)) return FALSE; netlink_parse_rtattr(ifla, IFLA_MAX, IFLA_RTA(r), IFLA_PAYLOAD(&req.n)); if (ifla[IFLA_IFNAME] == NULL) return FALSE; memcpy(ifname, RTA_DATA(ifla[IFLA_IFNAME]), RTA_PAYLOAD(ifla[IFLA_IFNAME])); } return TRUE; }
int netlink_link_add_vmac(vrrp_t *vrrp) { struct rtattr *linkinfo; struct rtattr *data; unsigned int base_ifindex; interface_t *ifp; interface_t *base_ifp; char ifname[IFNAMSIZ]; struct { struct nlmsghdr n; struct ifinfomsg ifi; char buf[256]; } req; if (!vrrp->ifp || __test_bit(VRRP_VMAC_UP_BIT, &vrrp->vmac_flags) || !vrrp->vrid) return -1; if (vrrp->family == AF_INET6) ll_addr[4] = 0x02; else ll_addr[4] = 0x01; ll_addr[ETH_ALEN-1] = vrrp->vrid; memset(&req, 0, sizeof (req)); memset(ifname, 0, IFNAMSIZ); strncpy(ifname, vrrp->vmac_ifname, IFNAMSIZ - 1); /* * Check to see if this vmac interface was created * by a previous instance. */ if ((ifp = if_get_by_ifname(ifname))) { /* Check to see whether this interface has wrong mac ? */ if (memcmp((const void *) ifp->hw_addr, (const void *) ll_addr, ETH_ALEN) != 0) { /* We have found a VIF but the vmac do not match */ log_message(LOG_INFO, "vmac: Removing old VMAC interface %s due to conflicting " "interface MAC for vrrp_instance %s!!!" , vrrp->vmac_ifname, vrrp->iname); /* Request that NETLINK remove the VIF interface first */ memset(&req, 0, sizeof (req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof (struct ifinfomsg)); req.n.nlmsg_flags = NLM_F_REQUEST; req.n.nlmsg_type = RTM_DELLINK; req.ifi.ifi_family = AF_INET; req.ifi.ifi_index = IF_INDEX(ifp); if (netlink_talk(&nl_cmd, &req.n) < 0) { log_message(LOG_INFO, "vmac: Error removing VMAC interface %s for " "vrrp_instance %s!!!" , vrrp->vmac_ifname, vrrp->iname); return -1; } /* Interface successfully removed, now recreate */ } } /* Request that NETLINK create the VIF interface */ req.n.nlmsg_len = NLMSG_LENGTH(sizeof (struct ifinfomsg)); req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; req.n.nlmsg_type = RTM_NEWLINK; req.ifi.ifi_family = AF_INET; /* macvlan settings */ linkinfo = NLMSG_TAIL(&req.n); addattr_l(&req.n, sizeof(req), IFLA_LINKINFO, NULL, 0); addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, (void *)macvlan_ll_kind, strlen(macvlan_ll_kind)); data = NLMSG_TAIL(&req.n); addattr_l(&req.n, sizeof(req), IFLA_INFO_DATA, NULL, 0); /* * In private mode, macvlan will receive frames with same MAC addr * as configured on the interface. */ addattr32(&req.n, sizeof(req), IFLA_MACVLAN_MODE, MACVLAN_MODE_PRIVATE); data->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)data; linkinfo->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)linkinfo; addattr_l(&req.n, sizeof(req), IFLA_LINK, &IF_INDEX(vrrp->ifp), sizeof(uint32_t)); addattr_l(&req.n, sizeof(req), IFLA_IFNAME, ifname, strlen(ifname)); addattr_l(&req.n, sizeof(req), IFLA_ADDRESS, ll_addr, ETH_ALEN); if (netlink_talk(&nl_cmd, &req.n) < 0) { log_message(LOG_INFO, "vmac: Error creating VMAC interface %s for vrrp_instance %s!!!" , ifname, vrrp->iname); return -1; } log_message(LOG_INFO, "vmac: Success creating VMAC interface %s for vrrp_instance %s" , ifname, vrrp->iname); /* * Update interface queue and vrrp instance interface binding. */ netlink_interface_lookup(); ifp = if_get_by_ifname(ifname); if (!ifp) return -1; base_ifp = vrrp->ifp; base_ifindex = vrrp->ifp->ifindex; ifp->flags = vrrp->ifp->flags; /* Copy base interface flags */ vrrp->ifp = ifp; vrrp->ifp->base_ifindex = base_ifindex; vrrp->ifp->vmac = 1; vrrp->vmac_ifindex = IF_INDEX(vrrp->ifp); /* For use on delete */ if (vrrp->family == AF_INET) { /* Set the necessary kernel parameters to make macvlans work for us */ set_interface_parameters(ifp, base_ifp); /* We don't want IPv6 running on the interface unless we have some IPv6 * eVIPs, so disable it if not needed */ if (!vrrp->evip_add_ipv6) link_disable_ipv6(ifp); } if (vrrp->family == AF_INET6 || vrrp->evip_add_ipv6) { // We don't want a link-local address auto assigned - see RFC5798 paragraph 7.4. // If we have a sufficiently recent kernel, we can stop a link local address // based on the MAC address being automatically assigned. If not, then we have // to delete the generated address after bringing the interface up (see below). #ifdef IFLA_INET6_ADDR_GEN_MODE /* Since Linux 3.17 */ memset(&req, 0, sizeof (req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof (struct ifinfomsg)); req.n.nlmsg_flags = NLM_F_REQUEST ; req.n.nlmsg_type = RTM_NEWLINK; req.ifi.ifi_family = AF_UNSPEC; req.ifi.ifi_index = vrrp->vmac_ifindex; u_char val = IN6_ADDR_GEN_MODE_NONE; struct rtattr* spec; spec = NLMSG_TAIL(&req.n); addattr_l(&req.n, sizeof(req), IFLA_AF_SPEC, NULL,0); data = NLMSG_TAIL(&req.n); addattr_l(&req.n, sizeof(req), AF_INET6, NULL,0); addattr_l(&req.n, sizeof(req), IFLA_INET6_ADDR_GEN_MODE, &val, sizeof(val)); data->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)data; spec->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)spec; if (netlink_talk(&nl_cmd, &req.n) < 0) log_message(LOG_INFO, "vmac: Error setting ADDR_GEN_MODE to NONE"); #endif if (vrrp->family == AF_INET6) { /* Add link-local address. If a source address has been specified, use it, * else use link-local address from underlying interface to vmac if there is one, * otherwise construct a link-local address based on underlying interface's * MAC address. * This is so that VRRP advertisements will be sent from a non-VIP address, but * using the VRRP MAC address */ ip_address_t ipaddress; memset(&ipaddress, 0, sizeof(ipaddress)); ipaddress.ifp = ifp; if (vrrp->saddr.ss_family == AF_INET6) ipaddress.u.sin6_addr = ((struct sockaddr_in6*)&vrrp->saddr)->sin6_addr; else if (base_ifp->sin6_addr.s6_addr32[0]) ipaddress.u.sin6_addr = base_ifp->sin6_addr; else make_link_local_address(&ipaddress.u.sin6_addr, base_ifp->hw_addr); ipaddress.ifa.ifa_family = AF_INET6; ipaddress.ifa.ifa_prefixlen = 64; ipaddress.ifa.ifa_index = vrrp->vmac_ifindex; if (netlink_ipaddress(&ipaddress, IPADDRESS_ADD) != 1) log_message(LOG_INFO, "Adding link-local address to vmac failed"); /* Save the address as source for vrrp packets */ if (vrrp->saddr.ss_family == AF_UNSPEC) inet_ip6tosockaddr(&ipaddress.u.sin6_addr, &vrrp->saddr); inet_ip6scopeid(vrrp->vmac_ifindex, &vrrp->saddr); } } /* bring it UP ! */ __set_bit(VRRP_VMAC_UP_BIT, &vrrp->vmac_flags); netlink_link_up(vrrp); #ifndef IFLA_INET6_ADDR_GEN_MODE if (vrrp->family == AF_INET6 || vrrp->evip_add_ipv6) { /* Delete the automatically created link-local address based on the * MAC address if we weren't able to configure the interface not to * create the address (see above). * This isn't ideal, since the invalid address will exist momentarily, * but is there any better way to do it? probably not otherwise * ADDR_GEN_MODE wouldn't have been added to the kernel. */ ip_address_t ipaddress; memset(&ipaddress, 0, sizeof(ipaddress)); ipaddress.u.sin6_addr = base_ifp->sin6_addr; make_link_local_address(&ipaddress.u.sin6_addr, ll_addr); ipaddress.ifa.ifa_family = AF_INET6; ipaddress.ifa.ifa_prefixlen = 64; ipaddress.ifa.ifa_index = vrrp->vmac_ifindex; if (netlink_ipaddress(&ipaddress, IPADDRESS_DEL) != 1) log_message(LOG_INFO, "Deleting auto link-local address from vmac failed"); } #endif return 1; }
/* Add/Delete IP route to/from a specific interface */ static int netlink_route(ip_route_t *iproute, int cmd) { int status = 1; struct { struct nlmsghdr n; struct rtmsg r; char buf[RTM_SIZE]; } req; char buf[RTA_SIZE]; struct rtattr *rta = (void*)buf; memset(&req, 0, sizeof (req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); if (cmd == IPROUTE_DEL) { req.n.nlmsg_flags = NLM_F_REQUEST; req.n.nlmsg_type = RTM_DELROUTE; } else { req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; if (cmd == IPROUTE_REPLACE) req.n.nlmsg_flags |= NLM_F_REPLACE; req.n.nlmsg_type = RTM_NEWROUTE; } rta->rta_type = RTA_METRICS; rta->rta_len = RTA_LENGTH(0); req.r.rtm_family = iproute->family; if (iproute->table < 256) req.r.rtm_table = iproute->table; else { req.r.rtm_table = RT_TABLE_UNSPEC; addattr32(&req.n, sizeof(req), RTA_TABLE, iproute->table); } if (cmd == IPROUTE_DEL) { req.r.rtm_scope = RT_SCOPE_NOWHERE; if (iproute->mask & IPROUTE_BIT_TYPE) req.r.rtm_type = iproute->type; } else { req.r.rtm_protocol = RTPROT_BOOT; req.r.rtm_scope = RT_SCOPE_UNIVERSE; req.r.rtm_type = iproute->type; } if (iproute->mask & IPROUTE_BIT_PROTOCOL) req.r.rtm_protocol = iproute->protocol; if (iproute->mask & IPROUTE_BIT_SCOPE) req.r.rtm_scope = iproute->scope; if (iproute->dst) { req.r.rtm_dst_len = iproute->dst->ifa.ifa_prefixlen; add_addr2req(&req.n, sizeof(req), RTA_DST, iproute->dst); } if (iproute->src) { req.r.rtm_src_len = iproute->src->ifa.ifa_prefixlen; add_addr2req(&req.n, sizeof(req), RTA_SRC, iproute->src); } if (iproute->pref_src) add_addr2req(&req.n, sizeof(req), RTA_PREFSRC, iproute->pref_src); //#ifdef _HAVE_RTA_NEWDST_ // if (iproute->as_to) // add_addr2req(&req.n, sizeof(req), RTA_NEWDST, iproute->as_to); //#endif if (iproute->via) { if (iproute->via->ifa.ifa_family == iproute->family) add_addr2req(&req.n, sizeof(req), RTA_GATEWAY, iproute->via); #ifdef _HAVE_RTA_VIA_ else add_addr_fam2req(&req.n, sizeof(req), RTA_VIA, iproute->via); #endif } #ifdef _HAVE_RTA_ENCAP_ if (iproute->encap.type != LWTUNNEL_ENCAP_NONE) { char encap_buf[ENCAP_RTA_SIZE]; struct rtattr *encap_rta = (void *)encap_buf; encap_rta->rta_type = RTA_ENCAP; encap_rta->rta_len = RTA_LENGTH(0); add_encap(encap_rta, sizeof(encap_buf), &iproute->encap); if (encap_rta->rta_len > RTA_LENGTH(0)) addraw_l(&req.n, sizeof(encap_buf), RTA_DATA(encap_rta), RTA_PAYLOAD(encap_rta)); } #endif if (iproute->mask & IPROUTE_BIT_DSFIELD) req.r.rtm_tos = iproute->tos; if (iproute->oif) addattr32(&req.n, sizeof(req), RTA_OIF, iproute->oif->ifindex); if (iproute->mask & IPROUTE_BIT_METRIC) addattr32(&req.n, sizeof(req), RTA_PRIORITY, iproute->metric); req.r.rtm_flags = iproute->flags; if (iproute->realms) addattr32(&req.n, sizeof(req), RTA_FLOW, iproute->realms); #ifdef _HAVE_RTA_EXPIRES_ if (iproute->mask & IPROUTE_BIT_EXPIRES) addattr32(&req.n, sizeof(req), RTA_EXPIRES, iproute->expires); #endif #ifdef RTAX_CC_ALGO if (iproute->congctl) rta_addattr_l(rta, sizeof(buf), RTAX_CC_ALGO, iproute->congctl, strlen(iproute->congctl)); #endif if (iproute->mask & IPROUTE_BIT_RTT) rta_addattr32(rta, sizeof(buf), RTAX_RTT, iproute->rtt); if (iproute->mask & IPROUTE_BIT_RTTVAR) rta_addattr32(rta, sizeof(buf), RTAX_RTTVAR, iproute->rttvar); if (iproute->mask & IPROUTE_BIT_RTO_MIN) rta_addattr32(rta, sizeof(buf), RTAX_RTO_MIN, iproute->rto_min); #ifdef RTAX_FEATURES if (iproute->features) rta_addattr32(rta, sizeof(buf), RTAX_FEATURES, iproute->features); #endif if (iproute->mask & IPROUTE_BIT_MTU) rta_addattr32(rta, sizeof(buf), RTAX_MTU, iproute->mtu); if (iproute->mask & IPROUTE_BIT_WINDOW) rta_addattr32(rta, sizeof(buf), RTAX_WINDOW, iproute->window); if (iproute->mask & IPROUTE_BIT_SSTHRESH) rta_addattr32(rta, sizeof(buf), RTAX_SSTHRESH, iproute->ssthresh); if (iproute->mask & IPROUTE_BIT_CWND) rta_addattr32(rta, sizeof(buf), RTAX_CWND, iproute->cwnd); if (iproute->mask & IPROUTE_BIT_ADVMSS) rta_addattr32(rta, sizeof(buf), RTAX_ADVMSS, iproute->advmss); if (iproute->mask & IPROUTE_BIT_REORDERING) rta_addattr32(rta, sizeof(buf), RTAX_REORDERING, iproute->reordering); if (iproute->mask & IPROUTE_BIT_HOPLIMIT) rta_addattr32(rta, sizeof(buf), RTAX_HOPLIMIT, iproute->hoplimit); if (iproute->mask & IPROUTE_BIT_INITCWND) rta_addattr32(rta, sizeof(buf), RTAX_INITCWND, iproute->initcwnd); #ifdef RTAX_INITRWND if (iproute->mask & IPROUTE_BIT_INITRWND) rta_addattr32(rta, sizeof(buf), RTAX_INITRWND, iproute->initrwnd); #endif #ifdef RTAX_QUICKACK if (iproute->mask & IPROUTE_BIT_QUICKACK) rta_addattr32(rta, sizeof(buf), RTAX_QUICKACK, iproute->quickack); #endif #ifdef _HAVE_RTA_PREF_ if (iproute->mask & IPROUTE_BIT_PREF) addattr8(&req.n, sizeof(req), RTA_PREF, iproute->pref); #endif if (rta->rta_len > RTA_LENGTH(0)) { if (iproute->lock) rta_addattr32(rta, sizeof(buf), RTAX_LOCK, iproute->lock); addattr_l(&req.n, sizeof(req), RTA_METRICS, RTA_DATA(rta), RTA_PAYLOAD(rta)); } if (!LIST_ISEMPTY(iproute->nhs)) add_nexthops(iproute, &req.n, &req.r); #ifdef DEBUG_NETLINK_MSG size_t i, j; uint8_t *p; char lbuf[3072]; char *op = lbuf; log_message(LOG_INFO, "rtmsg buffer used %lu, rtattr buffer used %d", req.n.nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg)), rta->rta_len); op += snprintf(op, sizeof(lbuf) - (op - lbuf), "nlmsghdr %p(%u):", &req.n, req.n.nlmsg_len); for (i = 0, p = (uint8_t*)&req.n; i < sizeof(struct nlmsghdr); i++) op += snprintf(op, sizeof(lbuf) - (op - lbuf), " %2.2hhx", *(p++)); log_message(LOG_INFO, "%s\n", lbuf); op = lbuf; op += snprintf(op, sizeof(lbuf) - (op - lbuf), "rtmsg %p(%lu):", &req.r, req.n.nlmsg_len - sizeof(struct nlmsghdr)); for (i = 0, p = (uint8_t*)&req.r; i < + req.n.nlmsg_len - sizeof(struct nlmsghdr); i++) op += snprintf(op, sizeof(lbuf) - (op - lbuf), " %2.2hhx", *(p++)); for (j = 0; lbuf + j < op; j+= MAX_LOG_MSG) log_message(LOG_INFO, "%.*\n", MAX_LOG_MSG, lbuf+j); #endif /* This returns ESRCH if the address of via address doesn't exist */ /* ENETDOWN if dev p33p1.40 for example is down */ if (netlink_talk(&nl_cmd, &req.n) < 0) { #ifdef _HAVE_RTA_EXPIRES_ /* If an expiry was set on the route, it may have disappeared already */ if (cmd != IPADDRESS_DEL || !(iproute->mask & IPROUTE_BIT_EXPIRES)) #endif status = -1; } return status; }
/* Add/Delete IP rule to/from a specific IP/network */ static int netlink_rule(ip_rule_t *iprule, int cmd) { int status = 1; struct { struct nlmsghdr n; struct fib_rule_hdr frh; char buf[1024]; } req; memset(&req, 0, sizeof (req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); req.n.nlmsg_flags = NLM_F_REQUEST; if (cmd != IPRULE_DEL) { req.n.nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL; req.n.nlmsg_type = RTM_NEWRULE; req.frh.action = FR_ACT_UNSPEC; } else { req.frh.action = FR_ACT_UNSPEC; req.n.nlmsg_type = RTM_DELRULE; } req.frh.table = RT_TABLE_UNSPEC; req.frh.flags = 0; req.frh.tos = iprule->tos; // Hex value - 0xnn <= 255, or name from rt_dsfield req.frh.family = iprule->family; if (iprule->action == FR_ACT_TO_TBL #if HAVE_DECL_FRA_L3MDEV && !iprule->l3mdev #endif ) { if (iprule->table < 256) // "Table" or "lookup" req.frh.table = iprule->table ? iprule->table & 0xff : RT_TABLE_MAIN; else { req.frh.table = RT_TABLE_UNSPEC; addattr32(&req.n, sizeof(req), FRA_TABLE, iprule->table); } } if (iprule->invert) req.frh.flags |= FIB_RULE_INVERT; // "not" /* Set rule entry */ if (iprule->from_addr) { // can be "default"/"any"/"all" - and to addr => bytelen == bitlen == 0 add_addr2req(&req.n, sizeof(req), FRA_SRC, iprule->from_addr); req.frh.src_len = iprule->from_addr->ifa.ifa_prefixlen; } if (iprule->to_addr) { add_addr2req(&req.n, sizeof(req), FRA_DST, iprule->to_addr); req.frh.dst_len = iprule->to_addr->ifa.ifa_prefixlen; } if (iprule->mask & IPRULE_BIT_PRIORITY) // "priority/order/preference" addattr32(&req.n, sizeof(req), FRA_PRIORITY, iprule->priority); if (iprule->mask & IPRULE_BIT_FWMARK) // "fwmark" addattr32(&req.n, sizeof(req), FRA_FWMARK, iprule->fwmark); if (iprule->mask & IPRULE_BIT_FWMASK) // "fwmark number followed by /nn" addattr32(&req.n, sizeof(req), FRA_FWMASK, iprule->fwmask); if (iprule->realms) // "realms u16[/u16] using rt_realms. after / is 16 msb (src), pre slash is 16 lsb (dest)" addattr32(&req.n, sizeof(req), FRA_FLOW, iprule->realms); #if HAVE_DECL_FRA_SUPPRESS_PREFIXLEN if (iprule->suppress_prefix_len != -1) // "suppress_prefixlength" - only valid if table != 0 addattr32(&req.n, sizeof(req), FRA_SUPPRESS_PREFIXLEN, iprule->suppress_prefix_len); #endif #if HAVE_DECL_FRA_SUPPRESS_IFGROUP if (iprule->mask & IPRULE_BIT_SUP_GROUP) // "suppress_ifgroup" or "sup_group" int32 - only valid if table !=0 addattr32(&req.n, sizeof(req), FRA_SUPPRESS_IFGROUP, iprule->suppress_group); #endif if (iprule->iif) // "dev/iif" addattr_l(&req.n, sizeof(req), FRA_IFNAME, iprule->iif, strlen(iprule->iif->ifname)+1); #if HAVE_DECL_FRA_OIFNAME if (iprule->oif) // "oif" addattr_l(&req.n, sizeof(req), FRA_OIFNAME, iprule->oif, strlen(iprule->oif->ifname)+1); #endif #if HAVE_DECL_FRA_TUN_ID if (iprule->tunnel_id) addattr64(&req.n, sizeof(req), FRA_TUN_ID, htobe64(iprule->tunnel_id)); #endif #if HAVE_DECL_FRA_UID_RANGE if (iprule->mask & IPRULE_BIT_UID_RANGE) addattr_l(&req.n, sizeof(req), FRA_UID_RANGE, &iprule->uid_range, sizeof(iprule->uid_range)); #endif #if HAVE_DECL_FRA_L3MDEV if (iprule->l3mdev) addattr8(&req.n, sizeof(req), FRA_L3MDEV, 1); #endif #if HAVE_DECL_FRA_PROTOCOL if (iprule->mask & IPRULE_BIT_PROTOCOL) addattr8(&req.n, sizeof(req), FRA_PROTOCOL, iprule->protocol); #endif #if HAVE_DECL_FRA_IP_PROTO if (iprule->mask & IPRULE_BIT_IP_PROTO) addattr8(&req.n, sizeof(req), FRA_IP_PROTO, iprule->ip_proto); #endif #if HAVE_DECL_FRA_SPORT_RANGE if (iprule->mask & IPRULE_BIT_SPORT_RANGE) addattr_l(&req.n, sizeof(req), FRA_SPORT_RANGE, &iprule->src_port, sizeof(iprule->src_port)); #endif #if HAVE_DECL_FRA_DPORT_RANGE if (iprule->mask & IPRULE_BIT_DPORT_RANGE) addattr_l(&req.n, sizeof(req), FRA_DPORT_RANGE, &iprule->dst_port, sizeof(iprule->dst_port)); #endif if (iprule->action == FR_ACT_GOTO) { // "goto" addattr32(&req.n, sizeof(req), FRA_GOTO, iprule->goto_target); req.frh.action = FR_ACT_GOTO; } req.frh.action = iprule->action; if (netlink_talk(&nl_cmd, &req.n) < 0) status = -1; return status; }