struct rtentry * rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate, u_int8_t prio) { struct radix_node *rn = (struct radix_node *)rt; rn = rn_mpath_prio((struct radix_node *)rt, prio); rt = (struct rtentry *)rn; /* check if returned node has same priority */ if (prio != RTP_ANY && rt->rt_priority != prio) return NULL; /* * if gate is set it must be compared, if not set the route must be * a non-multipath one. */ if (!gate && !rn_mpath_next(rn)) return rt; if (!gate) return NULL; do { rt = (struct rtentry *)rn; if (rt->rt_gateway->sa_len == gate->sa_len && !memcmp(rt->rt_gateway, gate, gate->sa_len)) break; } while ((rn = rn_mpath_next(rn)) != NULL); return (struct rtentry *)rn; }
struct rtentry * rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate, u_int8_t prio) { struct radix_node *rn = (struct radix_node *)rt; do { rt = (struct rtentry *)rn; /* first find routes with correct priority */ if (prio != RTP_ANY && (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) continue; /* * if gate is set it must be compared, if not set the route * must be a non-multipath one. */ if (!gate && !rn_mpath_next(rn, 0)) return rt; if (!gate) return NULL; if (!rt->rt_gateway) continue; if (rt->rt_gateway->sa_len == gate->sa_len && !memcmp(rt->rt_gateway, gate, gate->sa_len)) break; } while ((rn = rn_mpath_next(rn, 1)) != NULL); return (struct rtentry *)rn; }
struct rtentry * rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate) { struct radix_node *rn; if (!gate || !rt->rt_gateway) return NULL; /* beyond here, we use rn as the master copy */ rn = (struct radix_node *)rt; do { rt = (struct rtentry *)rn; /* * we are removing an address alias that has * the same prefix as another address * we need to compare the interface address because * rt_gateway is a special sockadd_dl structure */ if (rt->rt_gateway->sa_family == AF_LINK) { if (!memcmp(rt->rt_ifa->ifa_addr, gate, gate->sa_len)) break; } /* * Check for other options: * 1) Routes with 'real' IPv4/IPv6 gateway * 2) Loopback host routes (another AF_LINK/sockadd_dl check) * */ if (rt->rt_gateway->sa_len == gate->sa_len && !memcmp(rt->rt_gateway, gate, gate->sa_len)) break; } while ((rn = rn_mpath_next(rn)) != NULL); return (struct rtentry *)rn; }
/* Gateway selection by Hash-Threshold (RFC 2992) */ struct rtentry * rn_mpath_select(struct rtentry *rt, uint32_t *srcaddrp) { struct radix_node *rn; int hash, npaths, threshold; rn = (struct radix_node *)rt; npaths = rn_mpath_active_count(rn); hash = rn_mpath_hash(rt_key(rt), srcaddrp) & 0xffff; threshold = 1 + (0xffff / npaths); while (hash > threshold && rn) { /* stay within the multipath routes */ rn = rn_mpath_next(rn, RMP_MODE_ACTIVE); hash -= threshold; } /* if gw selection fails, use the first match (default) */ if (rn != NULL) { rtfree(rt); rt = (struct rtentry *)rn; rt->rt_refcnt++; } return (rt); }
struct rtentry * rt_mpath_next(struct rtentry *rt) { struct radix_node *rn = (struct radix_node *)rt; return ((struct rtentry *)rn_mpath_next(rn, RMP_MODE_ACTIVE)); }
void rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum) { struct radix_node *rn0, *rn; u_int32_t n; struct rtentry *rt; int64_t weight; /* * XXX we don't attempt to lookup cached route again; what should * be done for sendto(3) case? */ if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP) && RT_LINK_IS_UP(ro->ro_rt->rt_ifp)) return; ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0, fibnum); /* if the route does not exist or it is not multipath, don't care */ if (ro->ro_rt == NULL) return; if (rn_mpath_next((struct radix_node *)ro->ro_rt) == NULL) { RT_UNLOCK(ro->ro_rt); return; } /* beyond here, we use rn as the master copy */ rn0 = rn = (struct radix_node *)ro->ro_rt; n = rn_mpath_count(rn0); /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */ hash += hashjitter; hash %= n; for (weight = abs((int32_t)hash), rt = ro->ro_rt; weight >= rt->rt_weight && rn; weight -= rt->rt_weight) { /* stay within the multipath routes */ if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask) break; rn = rn->rn_dupedkey; rt = (struct rtentry *)rn; } /* XXX try filling rt_gwroute and avoid unreachable gw */ /* gw selection has failed - there must be only zero weight routes */ if (!rn) { RT_UNLOCK(ro->ro_rt); ro->ro_rt = NULL; return; } if (ro->ro_rt != rt) { RTFREE_LOCKED(ro->ro_rt); ro->ro_rt = (struct rtentry *)rn; RT_LOCK(ro->ro_rt); RT_ADDREF(ro->ro_rt); } RT_UNLOCK(ro->ro_rt); }
struct rtentry * rt_mpath_select(struct rtentry *rte, uint32_t hash) { if (rn_mpath_next((struct radix_node *)rte) == NULL) return (rte); return (rt_mpath_selectrte(rte, hash)); }
/* * allocate a route, potentially using multipath to select the peer. */ void rtalloc_mpath(struct route *ro, u_int32_t *srcaddrp) { #if defined(INET) || defined(INET6) struct radix_node *rn; int hash, npaths, threshold; #endif /* * return a cached entry if it is still valid, otherwise we increase * the risk of disrupting local flows. */ if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)) return; ro->ro_rt = rtalloc1(&ro->ro_dst, RT_REPORT, ro->ro_tableid); /* if the route does not exist or it is not multipath, don't care */ if (!ro->ro_rt || !(ro->ro_rt->rt_flags & RTF_MPATH)) return; /* check if multipath routing is enabled for the specified protocol */ if (!(0 #ifdef INET || (ipmultipath && ro->ro_dst.sa_family == AF_INET) #endif #ifdef INET6 || (ip6_multipath && ro->ro_dst.sa_family == AF_INET6) #endif )) return; #if defined(INET) || defined(INET6) /* gw selection by Hash-Threshold (RFC 2992) */ rn = (struct radix_node *)ro->ro_rt; npaths = rn_mpath_count(rn); hash = rn_mpath_hash(ro, srcaddrp) & 0xffff; threshold = 1 + (0xffff / npaths); while (hash > threshold && rn) { /* stay within the multipath routes */ if (rn_mpath_next(rn, 0) == NULL) break; rn = rn->rn_dupedkey; hash -= threshold; } /* XXX try filling rt_gwroute and avoid unreachable gw */ /* if gw selection fails, use the first match (default) */ if (!rn) return; rtfree(ro->ro_rt); ro->ro_rt = (struct rtentry *)rn; ro->ro_rt->rt_refcnt++; #endif }
int rn_mpath_count(struct radix_node *rn) { int i; i = 1; while ((rn = rn_mpath_next(rn, 0)) != NULL) i++; return i; }
int rn_mpath_active_count(struct radix_node *rn) { int i; i = 1; while ((rn = rn_mpath_next(rn, RMP_MODE_ACTIVE)) != NULL) i++; return i; }
/* * Adjust the RTF_MPATH flag for the part of the rn_dupedkey chain * that matches the prio. */ void rn_mpath_adj_mpflag(struct radix_node *rn, u_int8_t prio) { struct rtentry *rt = (struct rtentry *)rn; if (!rn) return; prio &= RTP_MASK; rt = rt_mpath_matchgate(rt, NULL, prio); rn = (struct radix_node *)rt; if (!rn) return; if (rn_mpath_next(rn, RMP_MODE_BYPRIO)) { while (rn != NULL) { ((struct rtentry *)rn)->rt_flags |= RTF_MPATH; rn = rn_mpath_next(rn, RMP_MODE_BYPRIO); } } else rt->rt_flags &= ~RTF_MPATH; }
/* * go through the chain and unlink "rt" from the list * the caller will free "rt" */ int rt_mpath_deldup(struct rtentry *headrt, struct rtentry *rt) { struct radix_node *t, *tt; if (!headrt || !rt) return (0); t = (struct radix_node *)headrt; tt = rn_mpath_next(t); while (tt) { if (tt == (struct radix_node *)rt) { t->rn_dupedkey = tt->rn_dupedkey; tt->rn_dupedkey = NULL; tt->rn_flags &= ~RNF_ACTIVE; tt[1].rn_flags &= ~RNF_ACTIVE; return (1); } t = tt; tt = rn_mpath_next((struct radix_node *)t); } return (0); }
uint32_t rn_mpath_count(struct radix_node *rn) { uint32_t i = 0; struct rtentry *rt; while (rn != NULL) { rt = (struct rtentry *)rn; i += rt->rt_weight; rn = rn_mpath_next(rn); } return (i); }
struct rtentry * rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate) { struct radix_node *rn; if (!rn_mpath_next((struct radix_node *)rt)) return rt; if (!gate) return NULL; /* beyond here, we use rn as the master copy */ rn = (struct radix_node *)rt; do { rt = (struct rtentry *)rn; if (rt->rt_gateway->sa_len == gate->sa_len && !memcmp(rt->rt_gateway, gate, gate->sa_len)) break; } while ((rn = rn_mpath_next(rn)) != NULL); if (!rn) return NULL; return (struct rtentry *)rn; }
void rtalloc_mpath(struct route *ro, int hash) { struct radix_node *rn0, *rn; int n; /* * XXX we don't attempt to lookup cached route again; what should * be done for sendto(3) case? */ if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)) return; /* XXX */ #ifdef __FreeBSD__ ro->ro_rt = rtalloc1(&ro->ro_dst, 1, 0UL); #else ro->ro_rt = rtalloc1(&ro->ro_dst, 1); #endif /* if the route does not exist or it is not multipath, don't care */ if (!ro->ro_rt || !rn_mpath_next((struct radix_node *)ro->ro_rt)) return; /* beyond here, we use rn as the master copy */ rn0 = rn = (struct radix_node *)ro->ro_rt; n = rn_mpath_count(rn0); /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */ hash += hashjitter; hash %= n; while (hash-- > 0 && rn) { /* stay within the multipath routes */ if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask) break; rn = rn->rn_dupedkey; } /* XXX try filling rt_gwroute and avoid unreachable gw */ /* if gw selection fails, use the first match (default) */ if (!rn) return; rtfree(ro->ro_rt); ro->ro_rt = (struct rtentry *)rn; ro->ro_rt->rt_refcnt++; }
void rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum) { struct rtentry *rt; /* * XXX we don't attempt to lookup cached route again; what should * be done for sendto(3) case? */ if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP) && RT_LINK_IS_UP(ro->ro_rt->rt_ifp)) return; ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0, fibnum); /* if the route does not exist or it is not multipath, don't care */ if (ro->ro_rt == NULL) return; if (rn_mpath_next((struct radix_node *)ro->ro_rt) == NULL) { RT_UNLOCK(ro->ro_rt); return; } rt = rt_mpath_selectrte(ro->ro_rt, hash); /* XXX try filling rt_gwroute and avoid unreachable gw */ /* gw selection has failed - there must be only zero weight routes */ if (!rt) { RT_UNLOCK(ro->ro_rt); ro->ro_rt = NULL; return; } if (ro->ro_rt != rt) { RTFREE_LOCKED(ro->ro_rt); ro->ro_rt = rt; RT_LOCK(ro->ro_rt); RT_ADDREF(ro->ro_rt); } RT_UNLOCK(ro->ro_rt); }
/* * return best matching route based on gateway and prio. If both are * specified it acts as a lookup function to get the actual rt. * If gate is NULL the first node matching the prio will be returned. */ struct rtentry * rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate, u_int8_t prio) { struct radix_node *rn = (struct radix_node *)rt; do { rt = (struct rtentry *)rn; /* first find routes with correct priority */ if (prio != RTP_ANY && (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) continue; /* if no gate is set we found a match */ if (!gate) return rt; if (rt->rt_gateway->sa_len == gate->sa_len && !memcmp(rt->rt_gateway, gate, gate->sa_len)) break; } while ((rn = rn_mpath_next(rn, RMP_MODE_ALL)) != NULL); return (struct rtentry *)rn; }
/* * return first route matching prio or the node just before. */ struct radix_node * rn_mpath_prio(struct radix_node *rn, u_int8_t prio) { struct radix_node *hit = rn; struct rtentry *rt; if (prio == RTP_ANY) return (hit); do { rt = (struct rtentry *)rn; if (rt->rt_priority == prio) /* perfect match */ return (rn); /* list is sorted remember last more prefered (smaller) entry */ if (rt->rt_priority < prio) hit = rn; } while ((rn = rn_mpath_next(rn, RMP_MODE_ALL)) != NULL); return (hit); }
/* * check if we have the same key/mask/gateway on the table already. * Assume @rt rt_key host bits are cleared according to @netmask */ int rt_mpath_conflict(struct rib_head *rnh, struct rtentry *rt, struct sockaddr *netmask) { struct radix_node *rn, *rn1; struct rtentry *rt1; rn = (struct radix_node *)rt; rn1 = rnh->rnh_lookup(rt_key(rt), netmask, &rnh->head); if (!rn1 || rn1->rn_flags & RNF_ROOT) return (0); /* key/mask are the same. compare gateway for all multipaths */ do { rt1 = (struct rtentry *)rn1; /* sanity: no use in comparing the same thing */ if (rn1 == rn) continue; if (rt1->rt_gateway->sa_family == AF_LINK) { if (rt1->rt_ifa->ifa_addr->sa_len != rt->rt_ifa->ifa_addr->sa_len || bcmp(rt1->rt_ifa->ifa_addr, rt->rt_ifa->ifa_addr, rt1->rt_ifa->ifa_addr->sa_len)) continue; } else { if (rt1->rt_gateway->sa_len != rt->rt_gateway->sa_len || bcmp(rt1->rt_gateway, rt->rt_gateway, rt1->rt_gateway->sa_len)) continue; } /* all key/mask/gateway are the same. conflicting entry. */ return (EEXIST); } while ((rn1 = rn_mpath_next(rn1)) != NULL); return (0); }
int rtrequest1(int req, struct rt_addrinfo *info, u_int8_t prio, struct rtentry **ret_nrt, u_int tableid) { int s = splsoftnet(); int error = 0; struct rtentry *rt, *crt; struct radix_node *rn; struct radix_node_head *rnh; struct ifaddr *ifa; struct sockaddr *ndst; struct sockaddr_rtlabel *sa_rl, sa_rl2; #ifdef MPLS struct sockaddr_mpls *sa_mpls; #endif #define senderr(x) { error = x ; goto bad; } if ((rnh = rt_gettable(info->rti_info[RTAX_DST]->sa_family, tableid)) == NULL) senderr(EAFNOSUPPORT); if (info->rti_flags & RTF_HOST) info->rti_info[RTAX_NETMASK] = NULL; switch (req) { case RTM_DELETE: if ((rn = rnh->rnh_lookup(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], rnh)) == NULL) senderr(ESRCH); rt = (struct rtentry *)rn; #ifndef SMALL_KERNEL /* * if we got multipath routes, we require users to specify * a matching RTAX_GATEWAY. */ if (rn_mpath_capable(rnh)) { rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY], prio); rn = (struct radix_node *)rt; if (!rt) senderr(ESRCH); } #endif if ((rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], rnh, rn)) == NULL) senderr(ESRCH); rt = (struct rtentry *)rn; /* clean up any cloned children */ if ((rt->rt_flags & RTF_CLONING) != 0) rtflushclone(rnh, rt); if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic ("rtrequest delete"); if (rt->rt_gwroute) { rt = rt->rt_gwroute; RTFREE(rt); (rt = (struct rtentry *)rn)->rt_gwroute = NULL; } if (rt->rt_parent) { rt->rt_parent->rt_refcnt--; rt->rt_parent = NULL; } #ifndef SMALL_KERNEL if (rn_mpath_capable(rnh)) { if ((rn = rnh->rnh_lookup(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], rnh)) != NULL && rn_mpath_next(rn, 0) == NULL) ((struct rtentry *)rn)->rt_flags &= ~RTF_MPATH; } #endif rt->rt_flags &= ~RTF_UP; if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) ifa->ifa_rtrequest(RTM_DELETE, rt, info); rttrash++; if (ret_nrt) *ret_nrt = rt; else if (rt->rt_refcnt <= 0) { rt->rt_refcnt++; rtfree(rt); } break; case RTM_RESOLVE: if (ret_nrt == NULL || (rt = *ret_nrt) == NULL) senderr(EINVAL); if ((rt->rt_flags & RTF_CLONING) == 0) senderr(EINVAL); ifa = rt->rt_ifa; info->rti_flags = rt->rt_flags & ~(RTF_CLONING | RTF_STATIC); info->rti_flags |= RTF_CLONED; info->rti_info[RTAX_GATEWAY] = rt->rt_gateway; if ((info->rti_info[RTAX_NETMASK] = rt->rt_genmask) == NULL) info->rti_flags |= RTF_HOST; info->rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl2); goto makeroute; case RTM_ADD: if (info->rti_ifa == 0 && (error = rt_getifa(info, tableid))) senderr(error); ifa = info->rti_ifa; makeroute: rt = pool_get(&rtentry_pool, PR_NOWAIT | PR_ZERO); if (rt == NULL) senderr(ENOBUFS); rt->rt_flags = info->rti_flags; if (prio == 0) prio = ifa->ifa_ifp->if_priority + RTP_STATIC; rt->rt_priority = prio; /* init routing priority */ if ((LINK_STATE_IS_UP(ifa->ifa_ifp->if_link_state) || ifa->ifa_ifp->if_link_state == LINK_STATE_UNKNOWN) && ifa->ifa_ifp->if_flags & IFF_UP) rt->rt_flags |= RTF_UP; else { rt->rt_flags &= ~RTF_UP; rt->rt_priority |= RTP_DOWN; } LIST_INIT(&rt->rt_timer); if (rt_setgate(rt, info->rti_info[RTAX_DST], info->rti_info[RTAX_GATEWAY], tableid)) { pool_put(&rtentry_pool, rt); senderr(ENOBUFS); } ndst = rt_key(rt); if (info->rti_info[RTAX_NETMASK] != NULL) { rt_maskedcopy(info->rti_info[RTAX_DST], ndst, info->rti_info[RTAX_NETMASK]); } else Bcopy(info->rti_info[RTAX_DST], ndst, info->rti_info[RTAX_DST]->sa_len); #ifndef SMALL_KERNEL /* do not permit exactly the same dst/mask/gw pair */ if (rn_mpath_capable(rnh) && rt_mpath_conflict(rnh, rt, info->rti_info[RTAX_NETMASK], info->rti_flags & RTF_MPATH)) { if (rt->rt_gwroute) rtfree(rt->rt_gwroute); Free(rt_key(rt)); pool_put(&rtentry_pool, rt); senderr(EEXIST); } #endif if (info->rti_info[RTAX_LABEL] != NULL) { sa_rl = (struct sockaddr_rtlabel *) info->rti_info[RTAX_LABEL]; rt->rt_labelid = rtlabel_name2id(sa_rl->sr_label); } #ifdef MPLS /* We have to allocate additional space for MPLS infos */ if (info->rti_info[RTAX_SRC] != NULL || info->rti_info[RTAX_DST]->sa_family == AF_MPLS) { struct rt_mpls *rt_mpls; sa_mpls = (struct sockaddr_mpls *) info->rti_info[RTAX_SRC]; rt->rt_llinfo = (caddr_t)malloc(sizeof(struct rt_mpls), M_TEMP, M_NOWAIT|M_ZERO); if (rt->rt_llinfo == NULL) { if (rt->rt_gwroute) rtfree(rt->rt_gwroute); Free(rt_key(rt)); pool_put(&rtentry_pool, rt); senderr(ENOMEM); } rt_mpls = (struct rt_mpls *)rt->rt_llinfo; if (sa_mpls != NULL) rt_mpls->mpls_label = sa_mpls->smpls_label; rt_mpls->mpls_operation = info->rti_mpls; /* XXX: set experimental bits */ rt->rt_flags |= RTF_MPLS; } #endif ifa->ifa_refcnt++; rt->rt_ifa = ifa; rt->rt_ifp = ifa->ifa_ifp; if (req == RTM_RESOLVE) { /* * Copy both metrics and a back pointer to the cloned * route's parent. */ rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */ rt->rt_priority = (*ret_nrt)->rt_priority; rt->rt_parent = *ret_nrt; /* Back ptr. to parent. */ rt->rt_parent->rt_refcnt++; } rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)info->rti_info[RTAX_NETMASK], rnh, rt->rt_nodes, rt->rt_priority); if (rn == NULL && (crt = rtalloc1(ndst, 0, tableid)) != NULL) { /* overwrite cloned route */ if ((crt->rt_flags & RTF_CLONED) != 0) { rtdeletemsg(crt, tableid); rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)info->rti_info[RTAX_NETMASK], rnh, rt->rt_nodes, rt->rt_priority); } RTFREE(crt); } if (rn == 0) { IFAFREE(ifa); if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent) rtfree(rt->rt_parent); if (rt->rt_gwroute) rtfree(rt->rt_gwroute); Free(rt_key(rt)); pool_put(&rtentry_pool, rt); senderr(EEXIST); } #ifndef SMALL_KERNEL if (rn_mpath_capable(rnh) && (rn = rnh->rnh_lookup(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], rnh)) != NULL && (rn = rn_mpath_prio(rn, prio)) != NULL) { if (rn_mpath_next(rn, 0) == NULL) ((struct rtentry *)rn)->rt_flags &= ~RTF_MPATH; else ((struct rtentry *)rn)->rt_flags |= RTF_MPATH; } #endif if (ifa->ifa_rtrequest) ifa->ifa_rtrequest(req, rt, info); if (ret_nrt) { *ret_nrt = rt; rt->rt_refcnt++; } if ((rt->rt_flags & RTF_CLONING) != 0) { /* clean up any cloned children */ rtflushclone(rnh, rt); } if_group_routechange(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK]); break; } bad: splx(s); return (error); }
/* * check if we have the same key/mask/gateway on the table already. */ int rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt, struct sockaddr *netmask, int mpathok) { struct radix_node *rn, *rn1; struct rtentry *rt1; char *p, *q, *eq; int same, l, skip; rn = (struct radix_node *)rt; rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh); if (!rn1 || rn1->rn_flags & RNF_ROOT) return 0; /* * unlike other functions we have in this file, we have to check * all key/mask/gateway as rnh_lookup can match less specific entry. */ rt1 = (struct rtentry *)rn1; /* compare key. */ if (rt_key(rt1)->sa_len != rt_key(rt)->sa_len || bcmp(rt_key(rt1), rt_key(rt), rt_key(rt1)->sa_len)) goto different; /* key was the same. compare netmask. hairy... */ if (rt_mask(rt1) && netmask) { skip = rnh->rnh_treetop->rn_off; if (rt_mask(rt1)->sa_len > netmask->sa_len) { /* * as rt_mask(rt1) is made optimal by radix.c, * there must be some 1-bits on rt_mask(rt1) * after netmask->sa_len. therefore, in * this case, the entries are different. */ if (rt_mask(rt1)->sa_len > skip) goto different; else { /* no bits to compare, i.e. same*/ goto maskmatched; } } l = rt_mask(rt1)->sa_len; if (skip > l) { /* no bits to compare, i.e. same */ goto maskmatched; } p = (char *)rt_mask(rt1); q = (char *)netmask; if (bcmp(p + skip, q + skip, l - skip)) goto different; /* * need to go through all the bit, as netmask is not * optimal and can contain trailing 0s */ eq = (char *)netmask + netmask->sa_len; q += l; same = 1; while (eq > q) if (*q++) { same = 0; break; } if (!same) goto different; } else if (!rt_mask(rt1) && !netmask) ; /* no mask to compare, i.e. same */ else { /* one has mask and the other does not, different */ goto different; } maskmatched: if (!mpathok && rt1->rt_priority == rt->rt_priority) return EEXIST; rn1 = rn_mpath_prio((struct radix_node *)rt1, rt->rt_priority); /* key/mask were the same. compare gateway for all multipaths */ do { rt1 = (struct rtentry *)rn1; /* sanity: no use in comparing the same thing */ if (rn1 == rn) continue; if (rt1->rt_gateway->sa_len != rt->rt_gateway->sa_len || bcmp(rt1->rt_gateway, rt->rt_gateway, rt1->rt_gateway->sa_len)) continue; /* check the route priority */ if (rt1->rt_priority != rt->rt_priority) continue; /* all key/mask/gateway are the same. conflicting entry. */ return EEXIST; } while ((rn1 = rn_mpath_next(rn1, 0)) != NULL); different: return 0; }
int rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, u_int tableid) { int s = splsoftnet(); int error = 0; struct rtentry *rt, *crt; struct radix_node *rn; struct radix_node_head *rnh; struct ifaddr *ifa; struct sockaddr *ndst; struct sockaddr_rtlabel *sa_rl; #define senderr(x) { error = x ; goto bad; } if ((rnh = rt_gettable(info->rti_info[RTAX_DST]->sa_family, tableid)) == NULL) senderr(EAFNOSUPPORT); if (info->rti_flags & RTF_HOST) info->rti_info[RTAX_NETMASK] = NULL; switch (req) { case RTM_DELETE: if ((rn = rnh->rnh_lookup(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], rnh)) == NULL) senderr(ESRCH); rt = (struct rtentry *)rn; #ifndef SMALL_KERNEL /* * if we got multipath routes, we require users to specify * a matching RTAX_GATEWAY. */ if (rn_mpath_capable(rnh)) { rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]); rn = (struct radix_node *)rt; if (!rt) senderr(ESRCH); } #endif if ((rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], rnh, rn)) == NULL) senderr(ESRCH); rt = (struct rtentry *)rn; /* clean up any cloned children */ if ((rt->rt_flags & RTF_CLONING) != 0) rtflushclone(rnh, rt); if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic ("rtrequest delete"); if (rt->rt_gwroute) { rt = rt->rt_gwroute; RTFREE(rt); (rt = (struct rtentry *)rn)->rt_gwroute = NULL; } if (rt->rt_parent) { rt->rt_parent->rt_refcnt--; rt->rt_parent = NULL; } #ifndef SMALL_KERNEL if (rn_mpath_capable(rnh)) { if ((rn = rnh->rnh_lookup(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], rnh)) != NULL && rn_mpath_next(rn) == NULL) ((struct rtentry *)rn)->rt_flags &= ~RTF_MPATH; } #endif rt->rt_flags &= ~RTF_UP; if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) ifa->ifa_rtrequest(RTM_DELETE, rt, info); rttrash++; if (ret_nrt) *ret_nrt = rt; else if (rt->rt_refcnt <= 0) { rt->rt_refcnt++; rtfree(rt); } break; case RTM_RESOLVE: if (ret_nrt == NULL || (rt = *ret_nrt) == NULL) senderr(EINVAL); if ((rt->rt_flags & RTF_CLONING) == 0) senderr(EINVAL); ifa = rt->rt_ifa; info->rti_flags = rt->rt_flags & ~(RTF_CLONING | RTF_STATIC); info->rti_flags |= RTF_CLONED; info->rti_info[RTAX_GATEWAY] = rt->rt_gateway; if ((info->rti_info[RTAX_NETMASK] = rt->rt_genmask) == NULL) info->rti_flags |= RTF_HOST; goto makeroute; case RTM_ADD: if (info->rti_ifa == 0 && (error = rt_getifa(info))) senderr(error); ifa = info->rti_ifa; makeroute: rt = pool_get(&rtentry_pool, PR_NOWAIT); if (rt == NULL) senderr(ENOBUFS); Bzero(rt, sizeof(*rt)); rt->rt_flags = RTF_UP | info->rti_flags; LIST_INIT(&rt->rt_timer); if (rt_setgate(rt, info->rti_info[RTAX_DST], info->rti_info[RTAX_GATEWAY], tableid)) { pool_put(&rtentry_pool, rt); senderr(ENOBUFS); } ndst = rt_key(rt); if (info->rti_info[RTAX_NETMASK] != NULL) { rt_maskedcopy(info->rti_info[RTAX_DST], ndst, info->rti_info[RTAX_NETMASK]); } else Bcopy(info->rti_info[RTAX_DST], ndst, info->rti_info[RTAX_DST]->sa_len); #ifndef SMALL_KERNEL /* do not permit exactly the same dst/mask/gw pair */ if (rn_mpath_capable(rnh) && rt_mpath_conflict(rnh, rt, info->rti_info[RTAX_NETMASK], info->rti_flags & RTF_MPATH)) { if (rt->rt_gwroute) rtfree(rt->rt_gwroute); Free(rt_key(rt)); pool_put(&rtentry_pool, rt); senderr(EEXIST); } #endif if (info->rti_info[RTAX_LABEL] != NULL) { sa_rl = (struct sockaddr_rtlabel *) info->rti_info[RTAX_LABEL]; rt->rt_labelid = rtlabel_name2id(sa_rl->sr_label); } ifa->ifa_refcnt++; rt->rt_ifa = ifa; rt->rt_ifp = ifa->ifa_ifp; if (req == RTM_RESOLVE) { /* * Copy both metrics and a back pointer to the cloned * route's parent. */ rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */ rt->rt_parent = *ret_nrt; /* Back ptr. to parent. */ rt->rt_parent->rt_refcnt++; } rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)info->rti_info[RTAX_NETMASK], rnh, rt->rt_nodes); if (rn == NULL && (crt = rtalloc1(ndst, 0, tableid)) != NULL) { /* overwrite cloned route */ if ((crt->rt_flags & RTF_CLONED) != 0) { rtdeletemsg(crt, tableid); rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)info->rti_info[RTAX_NETMASK], rnh, rt->rt_nodes); } RTFREE(crt); } if (rn == 0) { IFAFREE(ifa); if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent) rtfree(rt->rt_parent); if (rt->rt_gwroute) rtfree(rt->rt_gwroute); Free(rt_key(rt)); pool_put(&rtentry_pool, rt); senderr(EEXIST); } #ifndef SMALL_KERNEL if (rn_mpath_capable(rnh) && (rn = rnh->rnh_lookup(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], rnh)) != NULL) { if (rn_mpath_next(rn) == NULL) ((struct rtentry *)rn)->rt_flags &= ~RTF_MPATH; else ((struct rtentry *)rn)->rt_flags |= RTF_MPATH; } #endif if (ifa->ifa_rtrequest) ifa->ifa_rtrequest(req, rt, info); if (ret_nrt) { *ret_nrt = rt; rt->rt_refcnt++; } if ((rt->rt_flags & RTF_CLONING) != 0) { /* clean up any cloned children */ rtflushclone(rnh, rt); } if_group_routechange(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK]); break; } bad: splx(s); return (error); }
void rn_mpath_reprio(struct radix_node *rn, int newprio) { struct radix_node *prev = rn->rn_p; struct radix_node *next = rn->rn_dupedkey; struct radix_node *t, *tt, *saved_tt, *head; struct rtentry *rt = (struct rtentry *)rn; int mid, oldprio, prioinv = 0; oldprio = rt->rt_priority; rt->rt_priority = newprio; /* same prio, no change needed */ if (oldprio == newprio) return; if (rn_mpath_next(rn, 1) == NULL) { /* no need to move node, route is alone */ if (prev->rn_mask != rn->rn_mask) return; /* ... or route is last and prio gets bigger */ if (oldprio < newprio) return; } /* remove node from dupedkey list and reinsert at correct place */ if (prev->rn_dupedkey == rn) { prev->rn_dupedkey = next; if (next) next->rn_p = prev; else next = prev; } else { if (next == NULL) panic("next == NULL"); next->rn_p = prev; if (prev->rn_l == rn) prev->rn_l = next; else prev->rn_r = next; } /* re-insert rn at the right spot, so first rewind to the head */ for (tt = next; tt->rn_p->rn_dupedkey == tt; tt = tt->rn_p) ; saved_tt = tt; /* * Stolen from radix.c rn_addroute(). * This is nasty code with a certain amount of magic and dragons. * t is the element where the re-priorized rn is inserted -- before * or after depending on prioinv. saved_tt points to the head of the * dupedkey chain and tt is a bit of a helper * * First we skip with tt to the start of the mpath group then we * search the right spot to enter our node. */ for (; tt; tt = tt->rn_dupedkey) if (rn->rn_mask == tt->rn_mask) break; head = tt; /* store current head entry for rn_mklist check */ tt = rn_mpath_prio(tt, newprio); if (((struct rtentry *)tt)->rt_priority != newprio) { if (((struct rtentry *)tt)->rt_priority > newprio) prioinv = 1; t = tt; } else { mid = rn_mpath_count(tt) / 2; do { t = tt; tt = rn_mpath_next(tt, 0); } while (tt && --mid > 0); } /* insert rn before or after t depending on prioinv, tt and saved_tt */ if (tt == saved_tt && prioinv) { /* link in at head of list */ rn->rn_dupedkey = tt; rn->rn_p = tt->rn_p; tt->rn_p = rn; if (rn->rn_p->rn_l == tt) rn->rn_p->rn_l = rn; else rn->rn_p->rn_r = rn; } else if (prioinv == 1) { rn->rn_dupedkey = t; t->rn_p->rn_dupedkey = rn; rn->rn_p = t->rn_p; t->rn_p = rn; } else { rn->rn_dupedkey = t->rn_dupedkey; t->rn_dupedkey = rn; rn->rn_p = t; if (rn->rn_dupedkey) rn->rn_dupedkey->rn_p = rn; } #ifdef RN_DEBUG /* readd at head of creation list */ for (t = rn_clist; t && t->rn_ybro != rn; t = t->rn_ybro) ; if (t) t->rn_ybro = rn->rn_ybro; rn->rn_ybro = rn_clist; rn_clist = rn; #endif if (rn->rn_mklist && rn->rn_flags & RNF_NORMAL) { /* the rn_mklist needs to be fixed if the best route changed */ if (rn->rn_mklist->rm_leaf != rn) { if (rn->rn_mklist->rm_leaf->rn_p == rn) /* changed route is now best */ rn->rn_mklist->rm_leaf = rn; } else { if (rn->rn_dupedkey != head) /* rn moved behind head, so head is new head */ rn->rn_mklist->rm_leaf = head; } } }