int handle_rtm_newroute(const struct nlmsghdr *nl){ const struct rtmsg *rt = NLMSG_DATA(nl); struct rtattr *ra; void *as,*ad,*ag; int rlen,oif; route *r,**prev; size_t flen; oif = -1; if((r = create_route()) == NULL){ return -1; } switch( (r->family = rt->rtm_family) ){ case AF_INET:{ flen = sizeof(uint32_t); as = &((struct sockaddr_in *)&r->sss)->sin_addr; ad = &((struct sockaddr_in *)&r->ssd)->sin_addr; ag = &((struct sockaddr_in *)&r->ssg)->sin_addr; break;}case AF_INET6:{ flen = sizeof(uint32_t) * 4; as = &((struct sockaddr_in6 *)&r->sss)->sin6_addr; ad = &((struct sockaddr_in6 *)&r->ssd)->sin6_addr; ag = &((struct sockaddr_in6 *)&r->ssg)->sin6_addr; break;}case AF_BRIDGE:{ // FIXME wtf is a bridge route diagnostic("got a bridge route hrmmm FIXME"); return -1; // FIXME break;}default:{ flen = 0; break;} } r->maskbits = rt->rtm_dst_len; if(flen == 0 || flen > sizeof(r->sss.__ss_padding)){ diagnostic("Unknown route family %u",rt->rtm_family); return -1; } rlen = nl->nlmsg_len - NLMSG_LENGTH(sizeof(*rt)); ra = (struct rtattr *)((char *)(NLMSG_DATA(nl)) + sizeof(*rt)); memset(&r->ssg,0,sizeof(r->ssg)); memset(&r->ssd,0,sizeof(r->ssd)); memset(&r->sss,0,sizeof(r->sss)); while(RTA_OK(ra,rlen)){ switch(ra->rta_type){ case RTA_DST:{ if(RTA_PAYLOAD(ra) != flen){ diagnostic("Expected %zu dst bytes, got %zu", flen,RTA_PAYLOAD(ra)); break; } if(r->ssd.ss_family){ diagnostic("Got two destinations for route"); break; } memcpy(ad,RTA_DATA(ra),flen); r->ssd.ss_family = r->family; break;}case RTA_PREFSRC: case RTA_SRC:{ // FIXME do we not want to prefer PREFSRC? if(RTA_PAYLOAD(ra) != flen){ diagnostic("Expected %zu src bytes, got %zu", flen,RTA_PAYLOAD(ra)); break; } if(r->sss.ss_family){ diagnostic("Got two sources for route"); break; } memcpy(as,RTA_DATA(ra),flen); r->sss.ss_family = r->family; break;}case RTA_IIF:{ if(RTA_PAYLOAD(ra) != sizeof(int)){ diagnostic("Expected %zu iiface bytes, got %zu", sizeof(int),RTA_PAYLOAD(ra)); break; } // we don't use RTA_OIF: iif = *(int *)RTA_DATA(ra); break;}case RTA_OIF:{ if(RTA_PAYLOAD(ra) != sizeof(int)){ diagnostic("Expected %zu oiface bytes, got %zu", sizeof(int),RTA_PAYLOAD(ra)); break; } oif = *(int *)RTA_DATA(ra); break;}case RTA_GATEWAY:{ if(RTA_PAYLOAD(ra) != flen){ diagnostic("Expected %zu gw bytes, got %zu", flen,RTA_PAYLOAD(ra)); break; } if(r->ssg.ss_family){ diagnostic("Got two gateways for route"); break; } // We get 0.0.0.0 as the gateway when there's no 'via' if(memcmp(ag,RTA_DATA(ra),flen)){ memcpy(ag,RTA_DATA(ra),flen); r->ssg.ss_family = r->family; } break;}case RTA_PRIORITY:{ break;}case RTA_METRICS:{ break;}case RTA_MULTIPATH:{ // break;}case RTA_PROTOINFO:{ // unused break;}case RTA_FLOW:{ break;}case RTA_CACHEINFO:{ // break;}case RTA_SESSION:{ // unused // break;}case RTA_MP_ALGO:{ // unused break;}case RTA_TABLE:{ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36) break;}case RTA_MARK:{ #endif break;}case RTA_MFC_STATS:{ break;}case RTA_VIA:{ break;}case RTA_NEWDST:{ break;}case RTA_PREF:{ break;}case RTA_ENCAP_TYPE:{ break;}case RTA_ENCAP:{ break;}case RTA_EXPIRES:{ break;}case RTA_PAD:{ break;}default:{ diagnostic("Unknown rtatype %u",ra->rta_type); break;}} ra = RTA_NEXT(ra,rlen); } if(rlen){ diagnostic("%d excess bytes on newlink message",rlen); } if((r->iface = iface_by_idx(oif)) == NULL){ diagnostic("Unknown output interface %d on %s",oif,r->iface->name); goto err; } { char str[INET6_ADDRSTRLEN],via[INET6_ADDRSTRLEN]; inet_ntop(rt->rtm_family,ad,str,sizeof(str)); inet_ntop(rt->rtm_family,ag,via,sizeof(via)); diagnostic("[%s] new route to %s/%u %ls%ls%s", r->iface->name,str,r->maskbits, rt->rtm_type == RTN_LOCAL ? L"(local)" : rt->rtm_type == RTN_BROADCAST ? L"(broadcast)" : rt->rtm_type == RTN_UNREACHABLE ? L"(unreachable)" : rt->rtm_type == RTN_ANYCAST ? L"(anycast)" : rt->rtm_type == RTN_UNICAST ? L"(unicast)" : rt->rtm_type == RTN_MULTICAST ? L"(multicast)" : rt->rtm_type == RTN_BLACKHOLE ? L"(blackhole)" : rt->rtm_type == RTN_MULTICAST ? L"(multicast)" : L"", r->ssg.ss_family ? L" via " : L"", r->ssg.ss_family ? via : ""); } // We're not interest in blackholes, unreachables, prohibits, NATs yet if(rt->rtm_type != RTN_UNICAST && rt->rtm_type != RTN_LOCAL && rt->rtm_type != RTN_BROADCAST && rt->rtm_type != RTN_ANYCAST && rt->rtm_type != RTN_MULTICAST){ free_route(r); return 0; } assert(r->iface); if(!r->sss.ss_family){ struct routepath rp; if(get_router(r->sss.ss_family,ad,&rp) == 0){ if(r->sss.ss_family == AF_INET){ memcpy(as,rp.src,4); }else if(r->sss.ss_family == AF_INET6){ memcpy(as,rp.src,16); }else{ assert(0); } }else{ // FIXME vicious hackery! if(r->family == AF_INET6){ memcpy(as,r->iface->ip6defsrc,flen); r->sss.ss_family = AF_INET6; } } } if(r->family == AF_INET){ lock_interface(r->iface); if(add_route4(r->iface,ad,r->ssg.ss_family ? ag : NULL, r->sss.ss_family ? as : NULL, r->maskbits)){ unlock_interface(r->iface); diagnostic("Couldn't add route to %s",r->iface->name); goto err; } if(r->ssg.ss_family){ send_arp_req(r->iface,r->iface->bcast,ag,as); } unlock_interface(r->iface); pthread_mutex_lock(&route_lock); prev = &ip_table4; // Order most-specific (largest maskbits) to least-specific (0 maskbits) while(*prev){ if(r->maskbits > (*prev)->maskbits){ break; } prev = &(*prev)->next; } r->next = *prev; *prev = r; if(r->sss.ss_family){ while( *(prev = &(*prev)->next) ){ assert((*prev)->maskbits < r->maskbits); if(!((*prev)->sss.ss_family)){ memcpy(&(*prev)->sss,&r->sss,sizeof(r->sss)); } } } pthread_mutex_unlock(&route_lock); }else if(r->family == AF_INET6){ lock_interface(r->iface); if(add_route6(r->iface,ad,r->ssg.ss_family ? ag : NULL,r->sss.ss_family ? as : NULL,r->maskbits)){ unlock_interface(r->iface); diagnostic("Couldn't add route to %s",r->iface->name); goto err; } unlock_interface(r->iface); pthread_mutex_lock(&route_lock); prev = &ip_table6; // Order most-specific (largest maskbits) to least-specific (0 maskbits) while(*prev){ if(r->maskbits > (*prev)->maskbits){ break; } prev = &(*prev)->next; } r->next = *prev; *prev = r; // FIXME set less-specific sources pthread_mutex_unlock(&route_lock); } return 0; err: free_route(r); return -1; }
/* * Process a route report for a single origin, creating or updating the * corresponding routing table entry if necessary. 'src' is either the * address of a neighboring router from which the report arrived, or zero * to indicate a change of status of one of our own interfaces. */ void update_route(u_int32_t origin, u_int32_t mask, u_int metric, u_int32_t src, vifi_t vifi) { register struct rtentry *r; u_int adj_metric; /* * Compute an adjusted metric, taking into account the cost of the * subnet or tunnel over which the report arrived, and normalizing * all unreachable/poisoned metrics into a single value. */ if (src != 0 && (metric < 1 || metric >= 2*UNREACHABLE)) { logit(LOG_WARNING, 0, "%s reports out-of-range metric %u for origin %s", inet_fmt(src, s1), metric, inet_fmts(origin, mask, s2)); return; } adj_metric = metric + uvifs[vifi].uv_metric; if (adj_metric > UNREACHABLE) adj_metric = UNREACHABLE; /* * Look up the reported origin in the routing table. */ if (!find_route(origin, mask)) { /* * Not found. * Don't create a new entry if the report says it's unreachable, * or if the reported origin and mask are invalid. */ if (adj_metric == UNREACHABLE) { return; } if (src != 0 && !inet_valid_subnet(origin, mask)) { logit(LOG_WARNING, 0, "%s reports an invalid origin (%s) and/or mask (%08x)", inet_fmt(src, s1), inet_fmt(origin, s2), ntohl(mask)); return; } /* * OK, create the new routing entry. 'rtp' will be left pointing * to the new entry. */ create_route(origin, mask); /* * Now "steal away" any sources that belong under this route * by deleting any cache entries they might have created * and allowing the kernel to re-request them. */ steal_sources(rtp); rtp->rt_metric = UNREACHABLE; /* temporary; updated below */ } /* * We now have a routing entry for the reported origin. Update it? */ r = rtp; if (r->rt_metric == UNREACHABLE) { /* * The routing entry is for a formerly-unreachable or new origin. * If the report claims reachability, update the entry to use * the reported route. */ if (adj_metric == UNREACHABLE) return; r->rt_parent = vifi; init_children_and_leaves(r, vifi); r->rt_gateway = src; r->rt_timer = 0; r->rt_metric = adj_metric; r->rt_flags |= RTF_CHANGED; routes_changed = TRUE; update_table_entry(r); } else if (src == r->rt_gateway) { /* * The report has come either from the interface directly-connected * to the origin subnet (src and r->rt_gateway both equal zero) or * from the gateway we have chosen as the best first-hop gateway back * towards the origin (src and r->rt_gateway not equal zero). Reset * the route timer and, if the reported metric has changed, update * our entry accordingly. */ r->rt_timer = 0; if (adj_metric == r->rt_metric) return; if (adj_metric == UNREACHABLE) { del_table_entry(r, 0, DEL_ALL_ROUTES); r->rt_timer = ROUTE_EXPIRE_TIME; } else if (adj_metric < r->rt_metric) { if (init_children_and_leaves(r, vifi)) { update_table_entry(r); } } r->rt_metric = adj_metric; r->rt_flags |= RTF_CHANGED; routes_changed = TRUE; } else if (src == 0 || (r->rt_gateway != 0 && (adj_metric < r->rt_metric || (adj_metric == r->rt_metric && (ntohl(src) < ntohl(r->rt_gateway) || r->rt_timer >= ROUTE_SWITCH_TIME))))) { /* * The report is for an origin we consider reachable; the report * comes either from one of our own interfaces or from a gateway * other than the one we have chosen as the best first-hop gateway * back towards the origin. If the source of the update is one of * our own interfaces, or if the origin is not a directly-connected * subnet and the reported metric for that origin is better than * what our routing entry says, update the entry to use the new * gateway and metric. We also switch gateways if the reported * metric is the same as the one in the route entry and the gateway * associated with the route entry has not been heard from recently, * or if the metric is the same but the reporting gateway has a lower * IP address than the gateway associated with the route entry. * Did you get all that? */ if (r->rt_parent != vifi || adj_metric < r->rt_metric) { /* * XXX Why do we do this if we are just changing the metric? */ r->rt_parent = vifi; if (init_children_and_leaves(r, vifi)) { update_table_entry(r); } } r->rt_gateway = src; r->rt_timer = 0; r->rt_metric = adj_metric; r->rt_flags |= RTF_CHANGED; routes_changed = TRUE; } else if (vifi != r->rt_parent) { /* * The report came from a vif other than the route's parent vif. * Update the children and leaf info, if necessary. */ if (VIFM_ISSET(vifi, r->rt_children)) { /* * Vif is a child vif for this route. */ if (metric < r->rt_metric || (metric == r->rt_metric && ntohl(src) < ntohl(uvifs[vifi].uv_lcl_addr))) { /* * Neighbor has lower metric to origin (or has same metric * and lower IP address) -- it becomes the dominant router, * and vif is no longer a child for me. */ VIFM_CLR(vifi, r->rt_children); VIFM_CLR(vifi, r->rt_leaves); r->rt_dominants [vifi] = src; r->rt_subordinates[vifi] = 0; r->rt_leaf_timers [vifi] = 0; update_table_entry(r); } else if (metric > UNREACHABLE) { /* "poisoned reverse" */ /* * Neighbor considers this vif to be on path to route's * origin; if no subordinate recorded, record this neighbor * as subordinate and clear the leaf flag. */ if (r->rt_subordinates[vifi] == 0) { VIFM_CLR(vifi, r->rt_leaves); r->rt_subordinates[vifi] = src; r->rt_leaf_timers [vifi] = 0; update_table_entry(r); } } else if (src == r->rt_subordinates[vifi]) { /* * Current subordinate no longer considers this vif to be on * path to route's origin; it is no longer a subordinate * router, and we set the leaf confirmation timer to give * us time to hear from other subordinates. */ r->rt_subordinates[vifi] = 0; if (uvifs[vifi].uv_neighbors == NULL || uvifs[vifi].uv_neighbors->al_next == NULL) { VIFM_SET(vifi, r->rt_leaves); update_table_entry(r); } else { r->rt_leaf_timers [vifi] = LEAF_CONFIRMATION_TIME; r->rt_flags |= RTF_LEAF_TIMING; } } } else if (src == r->rt_dominants[vifi] && (metric > r->rt_metric || (metric == r->rt_metric && ntohl(src) > ntohl(uvifs[vifi].uv_lcl_addr)))) { /* * Current dominant no longer has a lower metric to origin * (or same metric and lower IP address); we adopt the vif * as our own child. */ VIFM_SET(vifi, r->rt_children); r->rt_dominants [vifi] = 0; if (metric > UNREACHABLE) { r->rt_subordinates[vifi] = src; } else if (uvifs[vifi].uv_neighbors == NULL || uvifs[vifi].uv_neighbors->al_next == NULL) { VIFM_SET(vifi, r->rt_leaves); } else { r->rt_leaf_timers[vifi] = LEAF_CONFIRMATION_TIME; r->rt_flags |= RTF_LEAF_TIMING; } update_table_entry(r); } } }