int rtnl_route_get_dst_len(struct rtnl_route * rtnlroute) { struct nl_addr * dst; dst = rtnl_route_get_dst(rtnlroute); return nl_addr_get_prefixlen(dst); }
static void foreach_route_cb (struct nl_object *object, void *user_data) { ForeachRouteInfo *info = user_data; struct rtnl_route *route = (struct rtnl_route *) object; struct nl_addr *dst; if (info->out_route) return; if (nm_logging_level_enabled (LOGL_DEBUG)) dump_route (route); if ( info->ifindex > 0 && rtnl_route_get_oif (route) != info->ifindex) return; if ( info->scope != RT_SCOPE_UNIVERSE && rtnl_route_get_scope (route) != info->scope) return; if ( info->family != AF_UNSPEC && rtnl_route_get_family (route) != info->family) return; dst = rtnl_route_get_dst (route); /* Check for IPv6 LL and MC routes that might need to be ignored */ if ( (info->family == AF_INET6 || info->family == AF_UNSPEC) && (rtnl_route_get_family (route) == AF_INET6)) { struct in6_addr *addr = NULL; if (dst) addr = nl_addr_get_binary_addr (dst); if (addr) { if ( IN6_IS_ADDR_LINKLOCAL (addr) || IN6_IS_ADDR_MC_LINKLOCAL (addr) || (IN6_IS_ADDR_MULTICAST (addr) && (nl_addr_get_prefixlen (dst) == 8))) return; } } info->out_route = info->callback (route, dst, info->iface, info->user_data); if (info->out_route) { /* Ref the route so it sticks around after the cache is cleared */ rtnl_route_get (info->out_route); } }
static void dump_route (struct rtnl_route *route) { char buf6[INET6_ADDRSTRLEN]; char buf4[INET_ADDRSTRLEN]; struct nl_addr *nl; struct in6_addr *addr6 = NULL; struct in_addr *addr4 = NULL; int prefixlen = 0; const char *sf = "UNSPEC"; int family = rtnl_route_get_family (route); guint32 log_level = LOGD_IP4 | LOGD_IP6; memset (buf6, 0, sizeof (buf6)); memset (buf4, 0, sizeof (buf4)); nl = rtnl_route_get_dst (route); if (nl) { if (nl_addr_get_family (nl) == AF_INET) { addr4 = nl_addr_get_binary_addr (nl); if (addr4) inet_ntop (AF_INET, addr4, &buf4[0], sizeof (buf4)); } else if (nl_addr_get_family (nl) == AF_INET6) { addr6 = nl_addr_get_binary_addr (nl); if (addr6) inet_ntop (AF_INET6, addr6, &buf6[0], sizeof (buf6)); } prefixlen = nl_addr_get_prefixlen (nl); } if (family == AF_INET) { sf = "INET"; log_level = LOGD_IP4; } else if (family == AF_INET6) { sf = "INET6"; log_level = LOGD_IP6; } nm_log_dbg (log_level, " route idx %d family %s (%d) addr %s/%d", rtnl_route_get_oif (route), sf, family, strlen (buf4) ? buf4 : (strlen (buf6) ? buf6 : "<unknown>"), prefixlen); }
static int build_rule_msg(struct rtnl_rule *tmpl, int cmd, int flags, struct nl_msg **result) { struct nl_msg *msg; struct fib_rule_hdr frh = { .family = tmpl->r_family, .table = tmpl->r_table, .action = tmpl->r_action, .flags = tmpl->r_flags, .tos = tmpl->r_dsfield, }; if (!(tmpl->ce_mask & RULE_ATTR_FAMILY)) return -NLE_MISSING_ATTR; msg = nlmsg_alloc_simple(cmd, flags); if (!msg) return -NLE_NOMEM; if (nlmsg_append(msg, &frh, sizeof(frh), NLMSG_ALIGNTO) < 0) goto nla_put_failure; if (tmpl->ce_mask & RULE_ATTR_SRC) { frh.src_len = nl_addr_get_prefixlen(tmpl->r_src); NLA_PUT_ADDR(msg, FRA_SRC, tmpl->r_src); } if (tmpl->ce_mask & RULE_ATTR_DST) { frh.dst_len = nl_addr_get_prefixlen(tmpl->r_dst); NLA_PUT_ADDR(msg, FRA_DST, tmpl->r_dst); } if (tmpl->ce_mask & RULE_ATTR_IIFNAME) NLA_PUT_STRING(msg, FRA_IIFNAME, tmpl->r_iifname); if (tmpl->ce_mask & RULE_ATTR_OIFNAME) NLA_PUT_STRING(msg, FRA_OIFNAME, tmpl->r_oifname); if (tmpl->ce_mask & RULE_ATTR_PRIO) NLA_PUT_U32(msg, FRA_PRIORITY, tmpl->r_prio); if (tmpl->ce_mask & RULE_ATTR_MARK) NLA_PUT_U32(msg, FRA_FWMARK, tmpl->r_mark); if (tmpl->ce_mask & RULE_ATTR_MASK) NLA_PUT_U32(msg, FRA_FWMASK, tmpl->r_mask); if (tmpl->ce_mask & RULE_ATTR_GOTO) NLA_PUT_U32(msg, FRA_GOTO, tmpl->r_goto); if (tmpl->ce_mask & RULE_ATTR_FLOW) NLA_PUT_U32(msg, FRA_FLOW, tmpl->r_flow); *result = msg; return 0; nla_put_failure: nlmsg_free(msg); return -NLE_MSGSIZE; } /** * Build netlink request message to add a new rule * @arg tmpl template with data of new rule * @arg flags additional netlink message flags * * Builds a new netlink message requesting a addition of a new * rule. The netlink message header isn't fully equipped with * all relevant fields and must thus be sent out via nl_send_auto_complete() * or supplemented as needed. \a tmpl must contain the attributes of the new * address set via \c rtnl_rule_set_* functions. * * @return The netlink message */ int rtnl_rule_build_add_request(struct rtnl_rule *tmpl, int flags, struct nl_msg **result) { return build_rule_msg(tmpl, RTM_NEWRULE, NLM_F_CREATE | flags, result); } /** * Add a new rule * @arg sk Netlink socket. * @arg tmpl template with requested changes * @arg flags additional netlink message flags * * Builds a netlink message by calling rtnl_rule_build_add_request(), * sends the request to the kernel and waits for the next ACK to be * received and thus blocks until the request has been fullfilled. * * @return 0 on sucess or a negative error if an error occured. */ int rtnl_rule_add(struct nl_sock *sk, struct rtnl_rule *tmpl, int flags) { struct nl_msg *msg; int err; if ((err = rtnl_rule_build_add_request(tmpl, flags, &msg)) < 0) return err; err = nl_send_auto_complete(sk, msg); nlmsg_free(msg); if (err < 0) return err; return wait_for_ack(sk); } /** @} */ /** * @name Rule Deletion * @{ */ /** * Build a netlink request message to delete a rule * @arg rule rule to delete * @arg flags additional netlink message flags * * Builds a new netlink message requesting a deletion of a rule. * The netlink message header isn't fully equipped with all relevant * fields and must thus be sent out via nl_send_auto_complete() * or supplemented as needed. \a rule must point to an existing * address. * * @return The netlink message */ int rtnl_rule_build_delete_request(struct rtnl_rule *rule, int flags, struct nl_msg **result) { return build_rule_msg(rule, RTM_DELRULE, flags, result); } /** * Delete a rule * @arg sk Netlink socket. * @arg rule rule to delete * @arg flags additional netlink message flags * * Builds a netlink message by calling rtnl_rule_build_delete_request(), * sends the request to the kernel and waits for the next ACK to be * received and thus blocks until the request has been fullfilled. * * @return 0 on sucess or a negative error if an error occured. */ int rtnl_rule_delete(struct nl_sock *sk, struct rtnl_rule *rule, int flags) { struct nl_msg *msg; int err; if ((err = rtnl_rule_build_delete_request(rule, flags, &msg)) < 0) return err; err = nl_send_auto_complete(sk, msg); nlmsg_free(msg); if (err < 0) return err; return wait_for_ack(sk); } /** @} */ /** * @name Attribute Modification * @{ */ void rtnl_rule_set_family(struct rtnl_rule *rule, int family) { rule->r_family = family; rule->ce_mask |= RULE_ATTR_FAMILY; }
int main(int argc, char *argv[]) { char *unikernel; enum { QEMU, KVM, UKVM, UNIX } hypervisor; if (argc < 3) { fprintf(stderr, "usage: runner HYPERVISOR UNIKERNEL [ ARGS... ]\n"); fprintf(stderr, "HYPERVISOR: qemu | kvm | ukvm | unix\n"); return 1; } if (strcmp(argv[1], "qemu") == 0) hypervisor = QEMU; else if (strcmp(argv[1], "kvm") == 0) hypervisor = KVM; else if (strcmp(argv[1], "ukvm") == 0) hypervisor = UKVM; else if (strcmp(argv[1], "unix") == 0) hypervisor = UNIX; else { warnx("error: Invalid hypervisor: %s", argv[1]); return 1; } unikernel = argv[2]; /* * Remaining arguments are to be passed on to the unikernel. */ argv += 3; argc -= 3; /* * Check we have CAP_NET_ADMIN. */ if (capng_get_caps_process() != 0) { warnx("error: capng_get_caps_process() failed"); return 1; } if (!capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_ADMIN)) { warnx("error: CAP_NET_ADMIN is required"); return 1; } /* * Connect to netlink, load link cache from kernel. */ struct nl_sock *sk; struct nl_cache *link_cache; int err; sk = nl_socket_alloc(); assert(sk); err = nl_connect(sk, NETLINK_ROUTE); if (err < 0) { warnx("nl_connect() failed: %s", nl_geterror(err)); return 1; } err = rtnl_link_alloc_cache(sk, AF_UNSPEC, &link_cache); if (err < 0) { warnx("rtnl_link_alloc_cache() failed: %s", nl_geterror(err)); return 1; } /* * Retrieve container network configuration -- IP address and * default gateway. */ struct rtnl_link *l_veth; l_veth = rtnl_link_get_by_name(link_cache, VETH_LINK_NAME); if (l_veth == NULL) { warnx("error: Could not get link information for %s", VETH_LINK_NAME); return 1; } struct nl_addr *veth_addr; err = get_link_inet_addr(sk, l_veth, &veth_addr); if (err) { warnx("error: Unable to determine IP address of %s", VETH_LINK_NAME); return 1; } struct nl_addr *gw_addr; err = get_default_gw_inet_addr(sk, &gw_addr); if (err) { warnx("error: get_deGfault_gw_inet_addr() failed"); return 1; } if (gw_addr == NULL) { warnx("error: No default gateway found. This is currently " "not supported"); return 1; } /* * Create bridge and tap interface, enslave veth and tap interfaces to * bridge. */ err = create_bridge_link(sk, BRIDGE_LINK_NAME); if (err < 0) { warnx("create_bridge_link(%s) failed: %s", BRIDGE_LINK_NAME, nl_geterror(err)); return 1; } int tap_fd; if (hypervisor == UKVM) err = create_tap_link(TAP_LINK_NAME, &tap_fd); else err = create_tap_link(TAP_LINK_NAME, NULL); if (err != 0) { warnx("create_tap_link(%s) failed: %s", TAP_LINK_NAME, strerror(err)); return 1; } /* Refill link cache with newly-created interfaces */ nl_cache_refill(sk, link_cache); struct rtnl_link *l_bridge; l_bridge = rtnl_link_get_by_name(link_cache, BRIDGE_LINK_NAME); if (l_bridge == NULL) { warnx("error: Could not get link information for %s", BRIDGE_LINK_NAME); return 1; } struct rtnl_link *l_tap; l_tap = rtnl_link_get_by_name(link_cache, TAP_LINK_NAME); if (l_tap == NULL) { warnx("error: Could not get link information for %s", TAP_LINK_NAME); return 1; } err = rtnl_link_enslave(sk, l_bridge, l_veth); if (err < 0) { warnx("error: Unable to enslave %s to %s: %s", VETH_LINK_NAME, BRIDGE_LINK_NAME, nl_geterror(err)); return 1; } err = rtnl_link_enslave(sk, l_bridge, l_tap); if (err < 0) { warnx("error: Unable to enslave %s to %s: %s", TAP_LINK_NAME, BRIDGE_LINK_NAME, nl_geterror(err)); return 1; } /* * Flush all IPv4 addresses from the veth interface. This is now safe * as we are good to commit and have retrieved the existing configuration. */ struct rtnl_addr *flush_addr; flush_addr = rtnl_addr_alloc(); assert(flush_addr); rtnl_addr_set_ifindex(flush_addr, rtnl_link_get_ifindex(l_veth)); rtnl_addr_set_family(flush_addr, AF_INET); rtnl_addr_set_local(flush_addr, veth_addr); err = rtnl_addr_delete(sk, flush_addr, 0); if (err < 0) { warnx("error: Could not flush addresses on %s: %s", VETH_LINK_NAME, nl_geterror(err)); return 1; } rtnl_addr_put(flush_addr); /* * Bring up the tap and bridge interfaces. */ struct rtnl_link *l_up; l_up = rtnl_link_alloc(); assert(l_up); /* You'd think set_operstate was the thing to do here. It's not. */ rtnl_link_set_flags(l_up, IFF_UP); err = rtnl_link_change(sk, l_tap, l_up, 0); if (err < 0) { warnx("error: rtnl_link_change(%s, UP) failed: %s", TAP_LINK_NAME, nl_geterror(err)); return 1; } err = rtnl_link_change(sk, l_bridge, l_up, 0); if (err < 0) { warnx("error: rtnl_link_change(%s, UP) failed: %s", BRIDGE_LINK_NAME, nl_geterror(err)); return 1; } rtnl_link_put(l_up); /* * Collect network configuration data. */ char ip[AF_INET_BUFSIZE]; if (inet_ntop(AF_INET, nl_addr_get_binary_addr(veth_addr), ip, sizeof ip) == NULL) { perror("inet_ntop()"); return 1; } char uarg_ip[AF_INET_BUFSIZE]; unsigned int prefixlen = nl_addr_get_prefixlen(veth_addr); snprintf(uarg_ip, sizeof uarg_ip, "%s/%u", ip, prefixlen); char uarg_gw[AF_INET_BUFSIZE]; if (inet_ntop(AF_INET, nl_addr_get_binary_addr(gw_addr), uarg_gw, sizeof uarg_gw) == NULL) { perror("inet_ntop()"); return 1; } /* * Build unikernel and hypervisor arguments. */ ptrvec* uargpv = pvnew(); char *uarg_buf; /* * QEMU/KVM: * /usr/bin/qemu-system-x86_64 <qemu args> -kernel <unikernel> -append "<unikernel args>" */ if (hypervisor == QEMU || hypervisor == KVM) { pvadd(uargpv, "/usr/bin/qemu-system-x86_64"); pvadd(uargpv, "-nodefaults"); pvadd(uargpv, "-no-acpi"); pvadd(uargpv, "-display"); pvadd(uargpv, "none"); pvadd(uargpv, "-serial"); pvadd(uargpv, "stdio"); pvadd(uargpv, "-m"); pvadd(uargpv, "512"); if (hypervisor == KVM) { pvadd(uargpv, "-enable-kvm"); pvadd(uargpv, "-cpu"); pvadd(uargpv, "host"); } else { /* * Required for AESNI use in Mirage. */ pvadd(uargpv, "-cpu"); pvadd(uargpv, "Westmere"); } pvadd(uargpv, "-device"); char *guest_mac = generate_mac(); assert(guest_mac); err = asprintf(&uarg_buf, "virtio-net-pci,netdev=n0,mac=%s", guest_mac); assert(err != -1); pvadd(uargpv, uarg_buf); pvadd(uargpv, "-netdev"); err = asprintf(&uarg_buf, "tap,id=n0,ifname=%s,script=no,downscript=no", TAP_LINK_NAME); assert(err != -1); pvadd(uargpv, uarg_buf); pvadd(uargpv, "-kernel"); pvadd(uargpv, unikernel); pvadd(uargpv, "-append"); /* * TODO: Replace any occurences of ',' with ',,' in -append, because * QEMU arguments are insane. */ char cmdline[1024]; char *cmdline_p = cmdline; size_t cmdline_free = sizeof cmdline; for (; *argv; argc--, argv++) { size_t alen = snprintf(cmdline_p, cmdline_free, "%s%s", *argv, (argc > 1) ? " " : ""); if (alen >= cmdline_free) { warnx("error: Command line too long"); return 1; } cmdline_free -= alen; cmdline_p += alen; } size_t alen = snprintf(cmdline_p, cmdline_free, "--ipv4=%s --ipv4-gateway=%s", uarg_ip, uarg_gw); if (alen >= cmdline_free) { warnx("error: Command line too long"); return 1; } pvadd(uargpv, cmdline); } /* * UKVM: * /unikernel/ukvm <ukvm args> <unikernel> -- <unikernel args> */ else if (hypervisor == UKVM) { pvadd(uargpv, "/unikernel/ukvm"); err = asprintf(&uarg_buf, "--net=@%d", tap_fd); assert(err != -1); pvadd(uargpv, uarg_buf); pvadd(uargpv, "--"); pvadd(uargpv, unikernel); for (; *argv; argc--, argv++) { pvadd(uargpv, *argv); } err = asprintf(&uarg_buf, "--ipv4=%s", uarg_ip); assert(err != -1); pvadd(uargpv, uarg_buf); err = asprintf(&uarg_buf, "--ipv4-gateway=%s", uarg_gw); assert(err != -1); pvadd(uargpv, uarg_buf); } /* * UNIX: * <unikernel> <unikernel args> */ else if (hypervisor == UNIX) { pvadd(uargpv, unikernel); err = asprintf(&uarg_buf, "--interface=%s", TAP_LINK_NAME); assert(err != -1); pvadd(uargpv, uarg_buf); for (; *argv; argc--, argv++) { pvadd(uargpv, *argv); } err = asprintf(&uarg_buf, "--ipv4=%s", uarg_ip); assert(err != -1); pvadd(uargpv, uarg_buf); err = asprintf(&uarg_buf, "--ipv4-gateway=%s", uarg_gw); assert(err != -1); pvadd(uargpv, uarg_buf); } char **uargv = (char **)pvfinal(uargpv); /* * Done with netlink, free all resources and close socket. */ rtnl_link_put(l_veth); rtnl_link_put(l_bridge); rtnl_link_put(l_tap); nl_addr_put(veth_addr); nl_addr_put(gw_addr); nl_cache_free(link_cache); nl_close(sk); nl_socket_free(sk); /* * Drop all capabilities except CAP_NET_BIND_SERVICE. */ capng_clear(CAPNG_SELECT_BOTH); capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED | CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); if (capng_apply(CAPNG_SELECT_BOTH) != 0) { warnx("error: Could not drop capabilities"); return 1; } /* * Run the unikernel. */ err = execv(uargv[0], uargv); warn("error: execv() of %s failed", uargv[0]); return 1; }
void NetlinkManager::netlinkRouteUpdated( struct nl_cache* /*cache*/, struct nl_object* obj, int nlOperation, void* data) { NetlinkManager* nlm = static_cast<NetlinkManager*>(data); std::string operation = nlm->nlOperationToStr(nlOperation); if (operation == "unknown") { VLOG(1) << "Ignoring an unknown route update"; return; } VLOG(2) << "Received a " << operation << " netlink route update message"; struct rtnl_route* route = (struct rtnl_route*)obj; if (rtnl_route_get_type(route) != RTN_UNICAST) { VLOG(1) << "Ignoring non-unicast route update"; return; } struct nl_addr* nlDst = rtnl_route_get_dst(route); const uint8_t ipLen = nl_addr_get_prefixlen(nlDst); char strDst[ipLen]; nl_addr2str(nlDst, strDst, ipLen); int numNexthops = rtnl_route_get_nnexthops(route); if (!numNexthops) { VLOG(0) << "Could not find next hop for route update for " << strDst; struct nl_dump_params dumpParams = initDumpParams(); nl_object_dump((nl_object*)route, &dumpParams); return; } std::vector<BinaryAddress> nexthops; { std::lock_guard<std::mutex> lock(nlm->interfacesMutex_); nexthops = getNextHops(route, ipLen, nlm->monitoredInterfaces_); } if (nexthops.empty()) { VLOG(1) << operation << " Route update for " << strDst << " has no valid nexthop"; return; } if (FLAGS_debug) { VLOG(1) << "Got " << operation << " route update for " << strDst; return; } switch (nlOperation) { case NL_ACT_NEW: { nlm->addRouteViaFbossThrift(nlDst, nexthops); break; } case NL_ACT_DEL: { nlm->deleteRouteViaFbossThrift(nlDst); break; } case NL_ACT_CHANGE: { VLOG(2) << "Not updating state due to unimplemented" << "NL_ACT_CHANGE netlink operation"; break; } default: { VLOG(0) << "Not updating state due to unknown netlink operation " << std::to_string(nlOperation); break; /* NL_ACT_??? */ } } return; }
bool TNlAddr::IsHost() const { return Addr && nl_addr_get_prefixlen(Addr) == nl_addr_get_len(Addr) * 8; }
CAMLprim value ocaml_get_routing_table(value unit) { CAMLparam1(unit); CAMLlocal3( ret, tmp, entry ); struct nl_sock *fd; struct nl_cache *res, *links; struct rtnl_route *it; uint32 i_ip, netmask = 0, mask_len, gw; int i; struct nl_addr *ip; char device_name[IFNAMSIZ]; struct rtnl_nexthop *to; fd = nl_socket_alloc(); if (!fd) { fprintf(stderr, "error nl_socket_alloc\n"); exit(1); } if(nl_connect(fd, NETLINK_ROUTE) < 0) { fprintf(stderr, "error nl_connect\n"); exit(1); } ret = Val_emptylist; if(rtnl_route_alloc_cache(fd, AF_UNSPEC, 0, &res) < 0) { fprintf(stderr, "error rtnl_route_alloc_cache"); exit(1); } if(rtnl_link_alloc_cache (fd, AF_UNSPEC, &links) < 0) { fprintf(stderr, "error rtnl_link_alloc_cache"); exit(1); } it = (struct rtnl_route *)nl_cache_get_first(res); for(; it != NULL; it = (struct rtnl_route *) nl_cache_get_next((struct nl_object *)it) ) { if(rtnl_route_get_family (it) == AF_INET) { ip = rtnl_route_get_dst(it); i_ip = ntohl(*(int *)nl_addr_get_binary_addr(ip)); mask_len = nl_addr_get_prefixlen(ip); for(i = 0; i < 32; i++) netmask = (netmask << 1) + (i< mask_len?1:0); to = rtnl_route_nexthop_n(it, 0); rtnl_link_i2name(links, rtnl_route_nh_get_ifindex(to), device_name, IFNAMSIZ); if ( rtnl_route_nh_get_gateway (to) != NULL) gw = ntohl(*(int *)nl_addr_get_binary_addr( rtnl_route_nh_get_gateway (to))); else gw = 0; /*printf("src ip:%x mask:%x gw:%x dev:%s\n", i_ip, netmask, */ /*gw, device_name);*/ entry = caml_alloc(7,0); Store_field(entry, 0, Val_int(i_ip & 0xFFFF)); Store_field(entry, 1, Val_int(i_ip >> 16)); Store_field(entry, 2, Val_int(netmask & 0xFFFF)); Store_field(entry, 3, Val_int(netmask >> 16)); Store_field(entry, 4, Val_int(gw & 0xFFFF)); Store_field(entry, 5, Val_int(gw >> 16)); Store_field(entry, 6, caml_copy_string(device_name)); // store in list tmp = caml_alloc(2, 0); Store_field( tmp, 0, entry); // head Store_field( tmp, 1, ret); // tail ret = tmp; } }