/* * On last reference drop, mark the route as belong to us so that it can be * timed out. */ static void in6_clsroute(struct radix_node *rn, __unused struct radix_node_head *head) { struct rtentry *rt = (struct rtentry *)rn; lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); RT_LOCK_ASSERT_HELD(rt); if (!(rt->rt_flags & RTF_UP)) return; /* prophylactic measures */ if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST) return; if (rt->rt_flags & RTPRF_OURS) return; if (!(rt->rt_flags & (RTF_WASCLONED | RTF_DYNAMIC))) return; /* * Delete the route immediately if RTF_DELCLONE is set or * if route caching is disabled (rtq_reallyold set to 0). * Otherwise, let it expire and be deleted by in6_rtqkill(). */ if ((rt->rt_flags & RTF_DELCLONE) || rtq_reallyold == 0) { /* * Delete the route from the radix tree but since we are * called when the route's reference count is 0, don't * deallocate it until we return from this routine by * telling rtrequest that we're interested in it. * Safe to drop rt_lock and use rt_key, rt_gateway, * since holding rnh_lock here prevents another thread * from calling rt_setgate() on this route. */ RT_UNLOCK(rt); if (rtrequest_locked(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, &rt) == 0) { /* Now let the caller free it */ RT_LOCK(rt); RT_REMREF_LOCKED(rt); } else { RT_LOCK(rt); } } else { struct timeval timenow; getmicrotime(&timenow); rt->rt_flags |= RTPRF_OURS; rt->rt_rmx.rmx_expire = timenow.tv_sec + rtq_reallyold; } }
static int in_ifadownkill(struct rtentry *rt, void *xap) { struct in_ifadown_arg *ap = xap; RT_LOCK(rt); if (rt->rt_ifa == ap->ifa && (ap->del || !(rt->rt_flags & RTF_STATIC))) { /* * Aquire a reference so that it can later be freed * as the refcount would be 0 here in case of at least * ap->del. */ RT_ADDREF(rt); /* * Disconnect it from the tree and permit protocols * to cleanup. */ rt_expunge(ap->rnh, rt); /* * At this point it is an rttrash node, and in case * the above is the only reference we must free it. * If we do not noone will have a pointer and the * rtentry will be leaked forever. * In case someone else holds a reference, we are * fine as we only decrement the refcount. In that * case if the other entity calls RT_REMREF, we * will still be leaking but at least we tried. */ RTFREE_LOCKED(rt); return (0); } RT_UNLOCK(rt); return 0; }
static int in6_mtuexpire(struct radix_node *rn, void *rock) { struct rtentry *rt = (struct rtentry *)rn; struct mtuex_arg *ap = rock; struct timeval timenow; getmicrotime(&timenow); /* sanity */ if (!rt) panic("rt == NULL in in6_mtuexpire"); RT_LOCK(rt); if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) { if (rt->rt_rmx.rmx_expire <= timenow.tv_sec) { rt->rt_flags |= RTF_PROBEMTU; } else { ap->nextstop = lmin(ap->nextstop, rt->rt_rmx.rmx_expire); } } RT_UNLOCK(rt); return 0; }
void rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum) { struct radix_node *rn0, *rn; u_int32_t n; struct rtentry *rt; int64_t weight; /* * XXX we don't attempt to lookup cached route again; what should * be done for sendto(3) case? */ if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP) && RT_LINK_IS_UP(ro->ro_rt->rt_ifp)) return; ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0, fibnum); /* if the route does not exist or it is not multipath, don't care */ if (ro->ro_rt == NULL) return; if (rn_mpath_next((struct radix_node *)ro->ro_rt) == NULL) { RT_UNLOCK(ro->ro_rt); return; } /* beyond here, we use rn as the master copy */ rn0 = rn = (struct radix_node *)ro->ro_rt; n = rn_mpath_count(rn0); /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */ hash += hashjitter; hash %= n; for (weight = abs((int32_t)hash), rt = ro->ro_rt; weight >= rt->rt_weight && rn; weight -= rt->rt_weight) { /* stay within the multipath routes */ if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask) break; rn = rn->rn_dupedkey; rt = (struct rtentry *)rn; } /* XXX try filling rt_gwroute and avoid unreachable gw */ /* gw selection has failed - there must be only zero weight routes */ if (!rn) { RT_UNLOCK(ro->ro_rt); ro->ro_rt = NULL; return; } if (ro->ro_rt != rt) { RTFREE_LOCKED(ro->ro_rt); ro->ro_rt = (struct rtentry *)rn; RT_LOCK(ro->ro_rt); RT_ADDREF(ro->ro_rt); } RT_UNLOCK(ro->ro_rt); }
/* * Get rid of old routes. When draining, this deletes everything, even when * the timeout is not expired yet. This also applies if the route is dynamic * and there are sufficiently large number of such routes (more than a half of * maximum). When updating, this makes sure that nothing has a timeout longer * than the current value of rtq_reallyold. */ static int in6_rtqkill(struct radix_node *rn, void *rock) { struct rtqk_arg *ap = rock; struct rtentry *rt = (struct rtentry *)rn; int err; struct timeval timenow; getmicrotime(&timenow); lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); RT_LOCK(rt); if (rt->rt_flags & RTPRF_OURS) { ap->found++; if (ap->draining || rt->rt_rmx.rmx_expire <= timenow.tv_sec || ((rt->rt_flags & RTF_DYNAMIC) != 0 && ip6_maxdynroutes >= 0 && in6dynroutes > ip6_maxdynroutes / 2)) { if (rt->rt_refcnt > 0) panic("rtqkill route really not free"); /* * Delete this route since we're done with it; * the route may be freed afterwards, so we * can no longer refer to 'rt' upon returning * from rtrequest(). Safe to drop rt_lock and * use rt_key, rt_gateway, since holding rnh_lock * here prevents another thread from calling * rt_setgate() on this route. */ RT_UNLOCK(rt); err = rtrequest_locked(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); if (err) { log(LOG_WARNING, "in6_rtqkill: error %d", err); } else { ap->killed++; } } else { if (ap->updating && (rt->rt_rmx.rmx_expire - timenow.tv_sec > rtq_reallyold)) { rt->rt_rmx.rmx_expire = timenow.tv_sec + rtq_reallyold; } ap->nextstop = lmin(ap->nextstop, rt->rt_rmx.rmx_expire); RT_UNLOCK(rt); } } else { RT_UNLOCK(rt); } return 0; }
/* * This code is the inverse of in_clsroute: on first reference, if we * were managing the route, stop doing so and set the expiration timer * back off again. */ static struct radix_node * in_matroute(void *v_arg, struct radix_node_head *head) { struct radix_node *rn = rn_match(v_arg, head); struct rtentry *rt = (struct rtentry *)rn; if (rt) { RT_LOCK(rt); if (rt->rt_flags & RTPRF_OURS) { rt->rt_flags &= ~RTPRF_OURS; rt->rt_rmx.rmx_expire = 0; } RT_UNLOCK(rt); } return rn; }
void in_arpdrain(void *ignored_arg) { #pragma unused (ignored_arg) struct llinfo_arp *la, *ola; struct timeval timenow; lck_mtx_lock(rnh_lock); la = llinfo_arp.lh_first; getmicrotime(&timenow); while ((ola = la) != 0) { struct rtentry *rt = la->la_rt; la = la->la_le.le_next; RT_LOCK(rt); if (rt->rt_expire && rt->rt_expire <= timenow.tv_sec) arptfree(ola); /* timer has expired, clear */ else RT_UNLOCK(rt); } lck_mtx_unlock(rnh_lock); }
void rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum) { struct rtentry *rt; /* * XXX we don't attempt to lookup cached route again; what should * be done for sendto(3) case? */ if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP) && RT_LINK_IS_UP(ro->ro_rt->rt_ifp)) return; ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0, fibnum); /* if the route does not exist or it is not multipath, don't care */ if (ro->ro_rt == NULL) return; if (rn_mpath_next((struct radix_node *)ro->ro_rt) == NULL) { RT_UNLOCK(ro->ro_rt); return; } rt = rt_mpath_selectrte(ro->ro_rt, hash); /* XXX try filling rt_gwroute and avoid unreachable gw */ /* gw selection has failed - there must be only zero weight routes */ if (!rt) { RT_UNLOCK(ro->ro_rt); ro->ro_rt = NULL; return; } if (ro->ro_rt != rt) { RTFREE_LOCKED(ro->ro_rt); ro->ro_rt = rt; RT_LOCK(ro->ro_rt); RT_ADDREF(ro->ro_rt); } RT_UNLOCK(ro->ro_rt); }
static int in_ifadownkill(struct radix_node *rn, void *xap) { struct in_ifadown_arg *ap = xap; struct rtentry *rt = (struct rtentry *)rn; RT_LOCK(rt); if (rt->rt_ifa == ap->ifa && (ap->del || !(rt->rt_flags & RTF_STATIC))) { /* * We need to disable the automatic prune that happens * in this case in rtrequest() because it will blow * away the pointers that rn_walktree() needs in order * continue our descent. We will end up deleting all * the routes that rtrequest() would have in any case, * so that behavior is not needed there. */ rt->rt_flags &= ~RTF_CLONING; rtexpunge(rt); } RT_UNLOCK(rt); return 0; }
/* * Parallel to llc_rtrequest. */ static void arp_rtrequest( int req, struct rtentry *rt, __unused struct sockaddr *sa) { struct sockaddr *gate = rt->rt_gateway; struct llinfo_arp *la = rt->rt_llinfo; static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK, 0, 0, 0, 0, 0, {0}}; struct timeval timenow; if (!arpinit_done) { panic("%s: ARP has not been initialized", __func__); /* NOTREACHED */ } lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); RT_LOCK_ASSERT_HELD(rt); if (rt->rt_flags & RTF_GATEWAY) return; getmicrotime(&timenow); switch (req) { case RTM_ADD: /* * XXX: If this is a manually added route to interface * such as older version of routed or gated might provide, * restore cloning bit. */ if ((rt->rt_flags & RTF_HOST) == 0 && SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff) rt->rt_flags |= RTF_CLONING; if (rt->rt_flags & RTF_CLONING) { /* * Case 1: This route should come from a route to iface. */ if (rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl) == 0) { gate = rt->rt_gateway; SDL(gate)->sdl_type = rt->rt_ifp->if_type; SDL(gate)->sdl_index = rt->rt_ifp->if_index; /* * In case we're called before 1.0 sec. * has elapsed. */ rt->rt_expire = MAX(timenow.tv_sec, 1); } break; } /* Announce a new entry if requested. */ if (rt->rt_flags & RTF_ANNOUNCE) { RT_UNLOCK(rt); dlil_send_arp(rt->rt_ifp, ARPOP_REQUEST, SDL(gate), rt_key(rt), NULL, rt_key(rt)); RT_LOCK(rt); } /*FALLTHROUGH*/ case RTM_RESOLVE: if (gate->sa_family != AF_LINK || gate->sa_len < sizeof(null_sdl)) { if (log_arp_warnings) log(LOG_DEBUG, "arp_rtrequest: bad gateway value\n"); break; } SDL(gate)->sdl_type = rt->rt_ifp->if_type; SDL(gate)->sdl_index = rt->rt_ifp->if_index; if (la != 0) break; /* This happens on a route change */ /* * Case 2: This route may come from cloning, or a manual route * add with a LL address. */ rt->rt_llinfo = la = arp_llinfo_alloc(); if (la == NULL) { if (log_arp_warnings) log(LOG_DEBUG, "%s: malloc failed\n", __func__); break; } rt->rt_llinfo_free = arp_llinfo_free; arp_inuse++, arp_allocated++; Bzero(la, sizeof(*la)); la->la_rt = rt; rt->rt_flags |= RTF_LLINFO; LIST_INSERT_HEAD(&llinfo_arp, la, la_le); /* * This keeps the multicast addresses from showing up * in `arp -a' listings as unresolved. It's not actually * functional. Then the same for broadcast. */ if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) { RT_UNLOCK(rt); dlil_resolve_multi(rt->rt_ifp, rt_key(rt), gate, sizeof(struct sockaddr_dl)); RT_LOCK(rt); rt->rt_expire = 0; } else if (in_broadcast(SIN(rt_key(rt))->sin_addr, rt->rt_ifp)) { struct sockaddr_dl *gate_ll = SDL(gate); size_t broadcast_len; ifnet_llbroadcast_copy_bytes(rt->rt_ifp, LLADDR(gate_ll), sizeof(gate_ll->sdl_data), &broadcast_len); gate_ll->sdl_alen = broadcast_len; gate_ll->sdl_family = AF_LINK; gate_ll->sdl_len = sizeof(struct sockaddr_dl); /* In case we're called before 1.0 sec. has elapsed */ rt->rt_expire = MAX(timenow.tv_sec, 1); } if (SIN(rt_key(rt))->sin_addr.s_addr == (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) { /* * This test used to be * if (loif.if_flags & IFF_UP) * It allowed local traffic to be forced * through the hardware by configuring the loopback down. * However, it causes problems during network configuration * for boards that can't receive packets they send. * It is now necessary to clear "useloopback" and remove * the route to force traffic out to the hardware. */ rt->rt_expire = 0; ifnet_lladdr_copy_bytes(rt->rt_ifp, LLADDR(SDL(gate)), SDL(gate)->sdl_alen = 6); if (useloopback) { #if IFNET_ROUTE_REFCNT /* Adjust route ref count for the interfaces */ if (rt->rt_if_ref_fn != NULL && rt->rt_ifp != lo_ifp) { rt->rt_if_ref_fn(lo_ifp, 1); rt->rt_if_ref_fn(rt->rt_ifp, -1); } #endif /* IFNET_ROUTE_REFCNT */ rt->rt_ifp = lo_ifp; } } break; case RTM_DELETE: if (la == 0) break; arp_inuse--; /* * Unchain it but defer the actual freeing until the route * itself is to be freed. rt->rt_llinfo still points to * llinfo_arp, and likewise, la->la_rt still points to this * route entry, except that RTF_LLINFO is now cleared. */ LIST_REMOVE(la, la_le); la->la_le.le_next = NULL; la->la_le.le_prev = NULL; rt->rt_flags &= ~RTF_LLINFO; if (la->la_hold != NULL) m_freem(la->la_hold); la->la_hold = NULL; } }
/* * Fragment input * NOTE: this function is called with the inet6_domain_mutex held from ip6_input. * inet6_domain_mutex is protecting he frag6 queue manipulation. */ int frag6_input(struct mbuf **mp, int *offp, int proto) { #pragma unused(proto) struct mbuf *m = *mp, *t; struct ip6_hdr *ip6; struct ip6_frag *ip6f; struct ip6q *q6; struct ip6asfrag *af6, *ip6af, *af6dwn; int offset = *offp, nxt, i, next; int first_frag = 0; int fragoff, frgpartlen; /* must be larger than u_int16_t */ struct ifnet *dstifp; struct ifaddr *ifa = NULL; u_int8_t ecn, ecn0; #ifdef IN6_IFSTAT_STRICT struct route_in6 ro; struct sockaddr_in6 *dst; #endif /* Expect 32-bit aligned data pointer on strict-align platforms */ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), return IPPROTO_DONE); ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset); #else IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f)); if (ip6f == NULL) return IPPROTO_DONE; #endif dstifp = NULL; #ifdef IN6_IFSTAT_STRICT /* find the destination interface of the packet. */ bzero(&ro, sizeof (ro)); dst = (struct sockaddr_in6 *)&ro.ro_dst; dst->sin6_family = AF_INET6; dst->sin6_len = sizeof (struct sockaddr_in6); dst->sin6_addr = ip6->ip6_dst; rtalloc((struct route *)&ro); if (ro.ro_rt != NULL) { RT_LOCK(ro.ro_rt); if ((ifa = ro.ro_rt->rt_ifa) != NULL) { IFA_ADDREF(ifa); dstifp = ((struct in6_ifaddr *)ro.ro_rt->rt_ifa)->ia_ifp; } RT_UNLOCK(ro.ro_rt); rtfree(ro.ro_rt); ro.ro_rt = NULL; } #else /* we are violating the spec, this is not the destination interface */ if ((m->m_flags & M_PKTHDR) != 0) dstifp = m->m_pkthdr.rcvif; #endif /* jumbo payload can't contain a fragment header */ if (ip6->ip6_plen == 0) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); in6_ifstat_inc(dstifp, ifs6_reass_fail); if (ifa != NULL) IFA_REMREF(ifa); return IPPROTO_DONE; } /* * check whether fragment packet's fragment length is * multiple of 8 octets. * sizeof(struct ip6_frag) == 8 * sizeof(struct ip6_hdr) = 40 */ if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) && (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offsetof(struct ip6_hdr, ip6_plen)); in6_ifstat_inc(dstifp, ifs6_reass_fail); if (ifa != NULL) IFA_REMREF(ifa); return IPPROTO_DONE; } ip6stat.ip6s_fragments++; in6_ifstat_inc(dstifp, ifs6_reass_reqd); /* offset now points to data portion */ offset += sizeof(struct ip6_frag); frag6_doing_reass = 1; /* * Enforce upper bound on number of fragments. * If maxfrag is 0, never accept fragments. * If maxfrag is -1, accept all fragments without limitation. */ if (ip6_maxfrags < 0) ; else if (frag6_nfrags >= (u_int)ip6_maxfrags) goto dropfrag; for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next) if (ip6f->ip6f_ident == q6->ip6q_ident && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) && IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst)) break; if (q6 == &ip6q) { /* * the first fragment to arrive, create a reassembly queue. */ first_frag = 1; /* * Enforce upper bound on number of fragmented packets * for which we attempt reassembly; * If maxfrag is 0, never accept fragments. * If maxfrag is -1, accept all fragments without limitation. */ if (ip6_maxfragpackets < 0) ; else if (frag6_nfragpackets >= (u_int)ip6_maxfragpackets) goto dropfrag; frag6_nfragpackets++; q6 = (struct ip6q *)_MALLOC(sizeof(struct ip6q), M_FTABLE, M_DONTWAIT); if (q6 == NULL) goto dropfrag; bzero(q6, sizeof(*q6)); frag6_insque(q6, &ip6q); /* ip6q_nxt will be filled afterwards, from 1st fragment */ q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6; #ifdef notyet q6->ip6q_nxtp = (u_char *)nxtp; #endif q6->ip6q_ident = ip6f->ip6f_ident; q6->ip6q_ttl = IPV6_FRAGTTL; q6->ip6q_src = ip6->ip6_src; q6->ip6q_dst = ip6->ip6_dst; q6->ip6q_ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */ q6->ip6q_nfrag = 0; }
/* * Accept by socket(4) in AF_ROUTE received service requests. * * o RTAX_DST holds key x for fec = < x, nh > where nh -> ifp * * o RTAX_GATEWAY holds by sockaddr_ftn{} for MPLS label * binding necessary * * < seg_i , seg_j > * * tuple where seg_i denotes key for by nhlfe generated ilm, * Furthermore, seg_j denotes typically upstream label for * transmission downstream by interface ifp in link-layer. * * o In rt_addrinfo{} Service Primitive (spi) contained flags * encodes with MPLS label binding linked operation. * * RTF_{POP|PUSH|SWAP} - self expanatory. * * RTF_MPE, denotes initial label of Label Switch Path. * * RTF_STK, denotes label stacking, but not yet fully * implemented. */ int mpls_rt_output_fib(struct rt_msghdr *rtm, struct rt_addrinfo *rti, struct rtentry **rt, u_int fibnum) { struct rtentry *fec = NULL; struct ifnet *ifp = NULL; struct mpls_aliasreq ifra; int error = 0, cmd = 0; #ifdef MPLS_DEBUG (void)printf("%s\n", __func__); #endif /* MPLS_DEBUG */ if (rti_dst(rti)->sa_len > sizeof(ifra.ifra_x)) { log(LOG_INFO, "%s: destination x in fec invalid\n", __func__); error = EMSGSIZE; goto out; } if (rti_gateway(rti)->sa_family != AF_MPLS) { log(LOG_INFO, "%s: segment invalid\n", __func__); error = EINVAL; goto out; } if (rti_gateway(rti)->sa_len > sizeof(ifra.ifra_seg)) { log(LOG_INFO, "%s: segment invalid\n", __func__); error = EMSGSIZE; goto out; } /* * Fetch interface by Forward Equivalence Class (fec). */ fec = rtalloc1_fib(rti_dst(rti), 0, 0UL, fibnum); if ((fec == NULL) || (fec->rt_gateway == NULL) || ((ifp = fec->rt_ifp) == NULL) || (fec->rt_ifa == NULL) || ((fec->rt_flags & RTF_UP) == 0)) { error = ESRCH; goto out; } bzero(&ifra, sizeof(ifra)); bcopy(rti_dst(rti), &ifra.ifra_x, rti_dst(rti)->sa_len); switch ((int)rtm->rtm_type) { case RTM_ADD: /* * Apply MPLS label binding on Forward Equivalence Class (fec). */ cmd = SIOCAIFADDR; /* FALLTHROUGH */ case RTM_DELETE: /* * Delete MPLS label binding on fec. */ cmd = (cmd == 0) ? SIOCDIFADDR : cmd; /* * Perform MPLS control operations on interface-layer. */ bcopy(rti_gateway(rti), &ifra.ifra_seg, rti_gateway(rti)->sa_len); ifra.ifra_flags = rti_flags(rti); RT_UNLOCK(fec); error = mpls_control(NULL, cmd, (caddr_t)&ifra, ifp, NULL); RT_LOCK(fec); break; case RTM_GET: /* * XXX: looks ugly... I'll delegate this operation * XXX: back to rt_output, but I'm not yet sure, if * XXX: I'll should do that... */ ifra.ifra_seg.sftn_len = SMPLS_LEN; ifra.ifra_seg.sftn_family = AF_MPLS; ((struct sockaddr_mpls *)&ifra.ifra_seg)->smpls_label = satosmpls_label(rti_gateway(rti)); /* * Fetch Incoming Label Map (ilm) by MPLS label binding on fec. */ *rt = ((ifra.ifra_flags & RTF_MPE) == 0) ? rtalloc1_fib((struct sockaddr *)&ifra.ifra_seg, 0, 0UL, ifp->if_fib) : NULL; if (*rt != NULL) { /* * Update by socket(2) on route(4) used Service Data Unit (sdu). */ bcopy(rt_key(*rt), rti_dst(rti), rt_key(*rt)->sa_len); bcopy((*rt)->rt_gateway, rti_gateway(rti), (*rt)->rt_gateway->sa_len); } else error = EADDRNOTAVAIL; break; default: log(LOG_INFO, "%s: command invalid\n", __func__); error = EOPNOTSUPP; break; } out: if (fec != NULL) RTFREE_LOCKED(fec); return (error); }
__private_extern__ errno_t arp_route_to_gateway_route(const struct sockaddr *net_dest, route_t hint0, route_t *out_route) { struct timeval timenow; route_t rt = hint0, hint = hint0; errno_t error = 0; *out_route = NULL; /* * Next hop determination. Because we may involve the gateway route * in addition to the original route, locking is rather complicated. * The general concept is that regardless of whether the route points * to the original route or to the gateway route, this routine takes * an extra reference on such a route. This extra reference will be * released at the end. * * Care must be taken to ensure that the "hint0" route never gets freed * via rtfree(), since the caller may have stored it inside a struct * route with a reference held for that placeholder. */ if (rt != NULL) { unsigned int ifindex; RT_LOCK_SPIN(rt); ifindex = rt->rt_ifp->if_index; RT_ADDREF_LOCKED(rt); if (!(rt->rt_flags & RTF_UP)) { RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); /* route is down, find a new one */ hint = rt = rtalloc1_scoped((struct sockaddr *) (size_t)net_dest, 1, 0, ifindex); if (hint != NULL) { RT_LOCK_SPIN(rt); ifindex = rt->rt_ifp->if_index; } else { senderr(EHOSTUNREACH); } } /* * We have a reference to "rt" by now; it will either * be released or freed at the end of this routine. */ RT_LOCK_ASSERT_HELD(rt); if (rt->rt_flags & RTF_GATEWAY) { struct rtentry *gwrt = rt->rt_gwroute; struct sockaddr_in gw; /* If there's no gateway rt, look it up */ if (gwrt == NULL) { gw = *((struct sockaddr_in *)rt->rt_gateway); RT_UNLOCK(rt); goto lookup; } /* Become a regular mutex */ RT_CONVERT_LOCK(rt); /* * Take gwrt's lock while holding route's lock; * this is okay since gwrt never points back * to "rt", so no lock ordering issues. */ RT_LOCK_SPIN(gwrt); if (!(gwrt->rt_flags & RTF_UP)) { struct rtentry *ogwrt; rt->rt_gwroute = NULL; RT_UNLOCK(gwrt); gw = *((struct sockaddr_in *)rt->rt_gateway); RT_UNLOCK(rt); rtfree(gwrt); lookup: gwrt = rtalloc1_scoped( (struct sockaddr *)&gw, 1, 0, ifindex); RT_LOCK(rt); /* * Bail out if the route is down, no route * to gateway, circular route, or if the * gateway portion of "rt" has changed. */ if (!(rt->rt_flags & RTF_UP) || gwrt == NULL || gwrt == rt || !equal(SA(&gw), rt->rt_gateway)) { if (gwrt == rt) { RT_REMREF_LOCKED(gwrt); gwrt = NULL; } RT_UNLOCK(rt); if (gwrt != NULL) rtfree(gwrt); senderr(EHOSTUNREACH); } /* Remove any existing gwrt */ ogwrt = rt->rt_gwroute; if ((rt->rt_gwroute = gwrt) != NULL) RT_ADDREF(gwrt); /* Clean up "rt" now while we can */ if (rt == hint0) { RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); } else { RT_UNLOCK(rt); rtfree(rt); } rt = gwrt; /* Now free the replaced gwrt */ if (ogwrt != NULL) rtfree(ogwrt); /* If still no route to gateway, bail out */ if (rt == NULL) senderr(EHOSTUNREACH); } else { RT_ADDREF_LOCKED(gwrt); RT_UNLOCK(gwrt); /* Clean up "rt" now while we can */ if (rt == hint0) { RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); } else { RT_UNLOCK(rt); rtfree(rt); } rt = gwrt; } /* rt == gwrt; if it is now down, give up */ RT_LOCK_SPIN(rt); if (!(rt->rt_flags & RTF_UP)) { RT_UNLOCK(rt); senderr(EHOSTUNREACH); } } if (rt->rt_flags & RTF_REJECT) { getmicrotime(&timenow); if (rt->rt_rmx.rmx_expire == 0 || timenow.tv_sec < rt->rt_rmx.rmx_expire) { RT_UNLOCK(rt); senderr(rt == hint ? EHOSTDOWN : EHOSTUNREACH); } } /* Become a regular mutex */ RT_CONVERT_LOCK(rt); /* Caller is responsible for cleaning up "rt" */ *out_route = rt; } return (0); bad: /* Clean up route (either it is "rt" or "gwrt") */ if (rt != NULL) { RT_LOCK_SPIN(rt); if (rt == hint0) { RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); } else { RT_UNLOCK(rt); rtfree(rt); } } return (error); }
/* * arp_lookup_route will lookup the route for a given address. * * The address must be for a host on a local network on this interface. * If the returned route is non-NULL, the route is locked and the caller * is responsible for unlocking it and releasing its reference. */ static errno_t arp_lookup_route(const struct in_addr *addr, int create, int proxy, route_t *route, unsigned int ifscope) { struct sockaddr_inarp sin = {sizeof(sin), AF_INET, 0, {0}, {0}, 0, 0}; const char *why = NULL; errno_t error = 0; route_t rt; *route = NULL; sin.sin_addr.s_addr = addr->s_addr; sin.sin_other = proxy ? SIN_PROXY : 0; rt = rtalloc1_scoped((struct sockaddr*)&sin, create, 0, ifscope); if (rt == NULL) return (ENETUNREACH); RT_LOCK(rt); if (rt->rt_flags & RTF_GATEWAY) { why = "host is not on local network"; error = ENETUNREACH; } else if (!(rt->rt_flags & RTF_LLINFO)) { why = "could not allocate llinfo"; error = ENOMEM; } else if (rt->rt_gateway->sa_family != AF_LINK) { why = "gateway route is not ours"; error = EPROTONOSUPPORT; } if (error != 0) { if (create && log_arp_warnings) { char tmp[MAX_IPv4_STR_LEN]; log(LOG_DEBUG, "arplookup link#%d %s failed: %s\n", ifscope, inet_ntop(AF_INET, addr, tmp, sizeof (tmp)), why); } /* * If there are no references to this route, and it is * a cloned route, and not static, and ARP had created * the route, then purge it from the routing table as * it is probably bogus. */ if (rt->rt_refcnt == 1 && (rt->rt_flags & (RTF_WASCLONED | RTF_STATIC)) == RTF_WASCLONED) { /* * Prevent another thread from modiying rt_key, * rt_gateway via rt_setgate() after rt_lock is * dropped by marking the route as defunct. */ rt->rt_flags |= RTF_CONDEMNED; RT_UNLOCK(rt); rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); rtfree(rt); } else { RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); } return (error); } /* * Caller releases reference and does RT_UNLOCK(rt). */ *route = rt; return (0); }
struct mbuf * ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, int srcrt) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct sockaddr_in6 *dst; struct rtentry *rt; int error, type = 0, code = 0; boolean_t proxy = FALSE; struct mbuf *mcopy = NULL; struct ifnet *ifp, *origifp; /* maybe unnecessary */ u_int32_t inzone, outzone; struct in6_addr src_in6, dst_in6; #if IPSEC struct secpolicy *sp = NULL; #endif struct timeval timenow; unsigned int ifscope = IFSCOPE_NONE; #if PF struct pf_mtag *pf_mtag; #endif /* PF */ getmicrotime(&timenow); #if PF pf_mtag = pf_find_mtag(m); if (pf_mtag != NULL && pf_mtag->pftag_rtableid != IFSCOPE_NONE) ifscope = pf_mtag->pftag_rtableid; /* * If the caller provides a route which is on a different interface * than the one specified for scoped forwarding, discard the route * and do a lookup below. */ if (ifscope != IFSCOPE_NONE && (rt = ip6forward_rt->ro_rt) != NULL) { RT_LOCK(rt); if (rt->rt_ifp->if_index != ifscope) { RT_UNLOCK(rt); rtfree(rt); rt = ip6forward_rt->ro_rt = NULL; } else { RT_UNLOCK(rt); } } #endif /* PF */ #if IPSEC /* * Check AH/ESP integrity. */ /* * Don't increment ip6s_cantforward because this is the check * before forwarding packet actually. */ if (ipsec_bypass == 0) { if (ipsec6_in_reject(m, NULL)) { IPSEC_STAT_INCREMENT(ipsec6stat.in_polvio); m_freem(m); return (NULL); } } #endif /*IPSEC*/ /* * Do not forward packets to multicast destination (should be handled * by ip6_mforward(). * Do not forward packets with unspecified source. It was discussed * in July 2000, on ipngwg mailing list. */ if ((m->m_flags & (M_BCAST|M_MCAST)) != 0 || IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { ip6stat.ip6s_cantforward++; /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */ if (ip6_log_time + ip6_log_interval < timenow.tv_sec) { ip6_log_time = timenow.tv_sec; log(LOG_DEBUG, "cannot forward " "from %s to %s nxt %d received on %s\n", ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst), ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif)); } m_freem(m); return (NULL); } if (ip6->ip6_hlim <= IPV6_HLIMDEC) { /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */ icmp6_error(m, ICMP6_TIME_EXCEEDED, ICMP6_TIME_EXCEED_TRANSIT, 0); return (NULL); } /* * See if the destination is a proxied address, and if so pretend * that it's for us. This is mostly to handle NUD probes against * the proxied addresses. We filter for ICMPv6 here and will let * icmp6_input handle the rest. */ if (!srcrt && nd6_prproxy) { VERIFY(!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)); proxy = nd6_prproxy_isours(m, ip6, ip6forward_rt, ifscope); /* * Don't update hop limit while proxying; RFC 4389 4.1. * Also skip IPsec forwarding path processing as this * packet is not to be forwarded. */ if (proxy) goto skip_ipsec; } ip6->ip6_hlim -= IPV6_HLIMDEC; /* * Save at most ICMPV6_PLD_MAXLEN (= the min IPv6 MTU - * size of IPv6 + ICMPv6 headers) bytes of the packet in case * we need to generate an ICMP6 message to the src. * Thanks to M_EXT, in most cases copy will not occur. * * It is important to save it before IPsec processing as IPsec * processing may modify the mbuf. */ mcopy = m_copy(m, 0, imin(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN)); #if IPSEC if (ipsec_bypass != 0) goto skip_ipsec; /* get a security policy for this packet */ sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, IP_FORWARDING, &error); if (sp == NULL) { IPSEC_STAT_INCREMENT(ipsec6stat.out_inval); ip6stat.ip6s_cantforward++; if (mcopy) { #if 0 /* XXX: what icmp ? */ #else m_freem(mcopy); #endif } m_freem(m); return (NULL); } error = 0; /* check policy */ switch (sp->policy) { case IPSEC_POLICY_DISCARD: case IPSEC_POLICY_GENERATE: /* * This packet is just discarded. */ IPSEC_STAT_INCREMENT(ipsec6stat.out_polvio); ip6stat.ip6s_cantforward++; key_freesp(sp, KEY_SADB_UNLOCKED); if (mcopy) { #if 0 /* XXX: what icmp ? */ #else m_freem(mcopy); #endif } m_freem(m); return (NULL); case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: /* no need to do IPsec. */ key_freesp(sp, KEY_SADB_UNLOCKED); goto skip_ipsec; case IPSEC_POLICY_IPSEC: if (sp->req == NULL) { /* XXX should be panic ? */ printf("ip6_forward: No IPsec request specified.\n"); ip6stat.ip6s_cantforward++; key_freesp(sp, KEY_SADB_UNLOCKED); if (mcopy) { #if 0 /* XXX: what icmp ? */ #else m_freem(mcopy); #endif } m_freem(m); return (NULL); } /* do IPsec */ break; case IPSEC_POLICY_ENTRUST: default: /* should be panic ?? */ printf("ip6_forward: Invalid policy found. %d\n", sp->policy); key_freesp(sp, KEY_SADB_UNLOCKED); goto skip_ipsec; } { struct ipsec_output_state state; /* * All the extension headers will become inaccessible * (since they can be encrypted). * Don't panic, we need no more updates to extension headers * on inner IPv6 packet (since they are now encapsulated). * * IPv6 [ESP|AH] IPv6 [extension headers] payload */ bzero(&state, sizeof(state)); state.m = m; state.dst = NULL; /* update at ipsec6_output_tunnel() */ error = ipsec6_output_tunnel(&state, sp, 0); key_freesp(sp, KEY_SADB_UNLOCKED); if (state.tunneled == 4) return (NULL); /* packet is gone - sent over IPv4 */ m = state.m; if (state.ro.ro_rt) { rtfree(state.ro.ro_rt); state.ro.ro_rt = NULL; } if (error) { /* mbuf is already reclaimed in ipsec6_output_tunnel. */ switch (error) { case EHOSTUNREACH: case ENETUNREACH: case EMSGSIZE: case ENOBUFS: case ENOMEM: break; default: printf("ip6_output (ipsec): error code %d\n", error); /* fall through */ case ENOENT: /* don't show these error codes to the user */ break; } ip6stat.ip6s_cantforward++; if (mcopy) { #if 0 /* XXX: what icmp ? */ #else m_freem(mcopy); #endif } m_freem(m); return (NULL); } } skip_ipsec: #endif /* IPSEC */ dst = (struct sockaddr_in6 *)&ip6forward_rt->ro_dst; if ((rt = ip6forward_rt->ro_rt) != NULL) { RT_LOCK(rt); /* Take an extra ref for ourselves */ RT_ADDREF_LOCKED(rt); } if (!srcrt) { /* * ip6forward_rt->ro_dst.sin6_addr is equal to ip6->ip6_dst */ if (rt == NULL || !(rt->rt_flags & RTF_UP) || rt->generation_id != route_generation) { if (rt != NULL) { /* Release extra ref */ RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); rtfree(rt); ip6forward_rt->ro_rt = NULL; } /* this probably fails but give it a try again */ rtalloc_scoped_ign((struct route *)ip6forward_rt, RTF_PRCLONING, ifscope); if ((rt = ip6forward_rt->ro_rt) != NULL) { RT_LOCK(rt); /* Take an extra ref for ourselves */ RT_ADDREF_LOCKED(rt); } } if (rt == NULL) { ip6stat.ip6s_noroute++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute); if (mcopy) icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOROUTE, 0); m_freem(m); return (NULL); } RT_LOCK_ASSERT_HELD(rt); } else if (rt == NULL || !(rt->rt_flags & RTF_UP) || !IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst->sin6_addr) || rt->generation_id != route_generation) { if (rt != NULL) { /* Release extra ref */ RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); rtfree(rt); ip6forward_rt->ro_rt = NULL; } bzero(dst, sizeof(*dst)); dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_family = AF_INET6; dst->sin6_addr = ip6->ip6_dst; rtalloc_scoped_ign((struct route *)ip6forward_rt, RTF_PRCLONING, ifscope); if ((rt = ip6forward_rt->ro_rt) == NULL) { ip6stat.ip6s_noroute++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute); if (mcopy) icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOROUTE, 0); m_freem(m); return (NULL); } RT_LOCK(rt); /* Take an extra ref for ourselves */ RT_ADDREF_LOCKED(rt); } /* * Source scope check: if a packet can't be delivered to its * destination for the reason that the destination is beyond the scope * of the source address, discard the packet and return an icmp6 * destination unreachable error with Code 2 (beyond scope of source * address) unless we are proxying (source address is link local * for NUDs.) We use a local copy of ip6_src, since in6_setscope() * will possibly modify its first argument. * [draft-ietf-ipngwg-icmp-v3-04.txt, Section 3.1] */ src_in6 = ip6->ip6_src; if (in6_setscope(&src_in6, rt->rt_ifp, &outzone)) { /* XXX: this should not happen */ ip6stat.ip6s_cantforward++; ip6stat.ip6s_badscope++; m_freem(m); return (NULL); } if (in6_setscope(&src_in6, m->m_pkthdr.rcvif, &inzone)) { ip6stat.ip6s_cantforward++; ip6stat.ip6s_badscope++; m_freem(m); return (NULL); } if (inzone != outzone && !proxy) { ip6stat.ip6s_cantforward++; ip6stat.ip6s_badscope++; in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard); if (ip6_log_time + ip6_log_interval < timenow.tv_sec) { ip6_log_time = timenow.tv_sec; log(LOG_DEBUG, "cannot forward " "src %s, dst %s, nxt %d, rcvif %s, outif %s\n", ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst), ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif), if_name(rt->rt_ifp)); } /* Release extra ref */ RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); if (mcopy) { icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_BEYONDSCOPE, 0); } m_freem(m); return (NULL); } /* * Destination scope check: if a packet is going to break the scope * zone of packet's destination address, discard it. This case should * usually be prevented by appropriately-configured routing table, but * we need an explicit check because we may mistakenly forward the * packet to a different zone by (e.g.) a default route. */ dst_in6 = ip6->ip6_dst; if (in6_setscope(&dst_in6, m->m_pkthdr.rcvif, &inzone) != 0 || in6_setscope(&dst_in6, rt->rt_ifp, &outzone) != 0 || inzone != outzone) { ip6stat.ip6s_cantforward++; ip6stat.ip6s_badscope++; m_freem(m); return (NULL); } if (m->m_pkthdr.len > rt->rt_ifp->if_mtu) { in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig); if (mcopy) { uint32_t mtu; #if IPSEC struct secpolicy *sp2; int ipsecerror; size_t ipsechdrsiz; #endif mtu = rt->rt_ifp->if_mtu; #if IPSEC /* * When we do IPsec tunnel ingress, we need to play * with the link value (decrement IPsec header size * from mtu value). The code is much simpler than v4 * case, as we have the outgoing interface for * encapsulated packet as "rt->rt_ifp". */ sp2 = ipsec6_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND, IP_FORWARDING, &ipsecerror); if (sp2) { ipsechdrsiz = ipsec6_hdrsiz(mcopy, IPSEC_DIR_OUTBOUND, NULL); if (ipsechdrsiz < mtu) mtu -= ipsechdrsiz; key_freesp(sp2, KEY_SADB_UNLOCKED); } /* * if mtu becomes less than minimum MTU, * tell minimum MTU (and I'll need to fragment it). */ if (mtu < IPV6_MMTU) mtu = IPV6_MMTU; #endif /* Release extra ref */ RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu); } else { /* Release extra ref */ RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); } m_freem(m); return (NULL); } if (rt->rt_flags & RTF_GATEWAY) dst = (struct sockaddr_in6 *)(void *)rt->rt_gateway; /* * If we are to forward the packet using the same interface * as one we got the packet from, perhaps we should send a redirect * to sender to shortcut a hop. * Only send redirect if source is sending directly to us, * and if packet was not source routed (or has any options). * Also, don't send redirect if forwarding using a route * modified by a redirect. */ if (!proxy && ip6_sendredirects && rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) { if ((rt->rt_ifp->if_flags & IFF_POINTOPOINT) != 0) { /* * If the incoming interface is equal to the outgoing * one, and the link attached to the interface is * point-to-point, then it will be highly probable * that a routing loop occurs. Thus, we immediately * drop the packet and send an ICMPv6 error message. * * type/code is based on suggestion by Rich Draves. * not sure if it is the best pick. */ RT_REMREF_LOCKED(rt); /* Release extra ref */ RT_UNLOCK(rt); icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 0); m_freem(m); return (NULL); } type = ND_REDIRECT; } #if IPFW2 /* * Check with the firewall... */ if (ip6_fw_enable && ip6_fw_chk_ptr) { u_short port = 0; ifp = rt->rt_ifp; /* Drop the lock but retain the extra ref */ RT_UNLOCK(rt); /* If ipfw says divert, we have to just drop packet */ if (ip6_fw_chk_ptr(&ip6, ifp, &port, &m)) { m_freem(m); goto freecopy; } if (!m) { goto freecopy; } /* We still have the extra ref on rt */ RT_LOCK(rt); } #endif /* * Fake scoped addresses. Note that even link-local source or * destinaion can appear, if the originating node just sends the * packet to us (without address resolution for the destination). * Since both icmp6_error and icmp6_redirect_output fill the embedded * link identifiers, we can do this stuff after making a copy for * returning an error. */ if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { /* * See corresponding comments in ip6_output. * XXX: but is it possible that ip6_forward() sends a packet * to a loopback interface? I don't think so, and thus * I bark here. ([email protected]) * XXX: it is common to route invalid packets to loopback. * also, the codepath will be visited on use of ::1 in * rthdr. (itojun) */ #if 1 if (0) #else if ((rt->rt_flags & (RTF_BLACKHOLE|RTF_REJECT)) == 0) #endif { printf("ip6_forward: outgoing interface is loopback. " "src %s, dst %s, nxt %d, rcvif %s, outif %s\n", ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst), ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif), if_name(rt->rt_ifp)); } /* we can just use rcvif in forwarding. */ origifp = m->m_pkthdr.rcvif; } else origifp = rt->rt_ifp; /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); ifp = rt->rt_ifp; /* Drop the lock but retain the extra ref */ RT_UNLOCK(rt); /* * If this is to be processed locally, let ip6_input have it. */ if (proxy) { VERIFY(m->m_pkthdr.aux_flags & MAUXF_PROXY_DST); /* Release extra ref */ RT_REMREF(rt); if (mcopy != NULL) m_freem(mcopy); return (m); } #if PF /* Invoke outbound packet filter */ error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, NULL); if (error != 0 || m == NULL) { if (m != NULL) { panic("%s: unexpected packet %p\n", __func__, m); /* NOTREACHED */ } /* Already freed by callee */ goto senderr; } ip6 = mtod(m, struct ip6_hdr *); #endif /* PF */ error = nd6_output(ifp, origifp, m, dst, rt, NULL); if (error) { in6_ifstat_inc(ifp, ifs6_out_discard); ip6stat.ip6s_cantforward++; } else { ip6stat.ip6s_forward++; in6_ifstat_inc(ifp, ifs6_out_forward); if (type) ip6stat.ip6s_redirectsent++; else { if (mcopy) { goto freecopy; } } } #if PF senderr: #endif /* PF */ if (mcopy == NULL) { /* Release extra ref */ RT_REMREF(rt); return (NULL); } switch (error) { case 0: #if 1 if (type == ND_REDIRECT) { icmp6_redirect_output(mcopy, rt); /* Release extra ref */ RT_REMREF(rt); return (NULL); } #endif goto freecopy; case EMSGSIZE: /* xxx MTU is constant in PPP? */ goto freecopy; case ENOBUFS: /* Tell source to slow down like source quench in IP? */ goto freecopy; case ENETUNREACH: /* shouldn't happen, checked above */ case EHOSTUNREACH: case ENETDOWN: case EHOSTDOWN: default: type = ICMP6_DST_UNREACH; code = ICMP6_DST_UNREACH_ADDR; break; } icmp6_error(mcopy, type, code, 0); /* Release extra ref */ RT_REMREF(rt); return (NULL); freecopy: m_freem(mcopy); /* Release extra ref */ RT_REMREF(rt); return (NULL); }
/* * Given a source IPv6 address (and route, if available), determine the best * interface to send the packet from. Checking for (and updating) the * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done * without any locks, based on the assumption that in the event this is * called from ip6_output(), the output operation is single-threaded per-pcb, * i.e. for any given pcb there can only be one thread performing output at * the IPv6 layer. * * This routine is analogous to in_selectsrcif() for IPv4. * * clone - meaningful only for bsdi and freebsd */ static int selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp, struct rtentry **retrt, int clone, int norouteok, unsigned int ifscope, unsigned int nocell) { int error = 0; struct ifnet *ifp = NULL; struct route_in6 *route = NULL; struct sockaddr_in6 *sin6_next; struct in6_pktinfo *pi = NULL; struct in6_addr *dst = &dstsock->sin6_addr; struct ifaddr *ifa = NULL; char s_src[MAX_IPv6_STR_LEN], s_dst[MAX_IPv6_STR_LEN]; boolean_t select_srcif; #if 0 char ip6buf[INET6_ADDRSTRLEN]; if (dstsock->sin6_addr.s6_addr32[0] == 0 && dstsock->sin6_addr.s6_addr32[1] == 0 && !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { printf("in6_selectroute: strange destination %s\n", ip6_sprintf(ip6buf, &dstsock->sin6_addr)); } else { printf("in6_selectroute: destination = %s%%%d\n", ip6_sprintf(ip6buf, &dstsock->sin6_addr), dstsock->sin6_scope_id); /* for debug */ } #endif if (retifp != NULL) *retifp = NULL; if (retrt != NULL) *retrt = NULL; if (ip6_select_srcif_debug) { struct in6_addr src; src = (srcsock != NULL) ? srcsock->sin6_addr : in6addr_any; (void) inet_ntop(AF_INET6, &src, s_src, sizeof (s_src)); (void) inet_ntop(AF_INET6, dst, s_dst, sizeof (s_dst)); } /* * If the destination address is UNSPECIFIED addr, bail out. */ if (IN6_IS_ADDR_UNSPECIFIED(dst)) { error = EHOSTUNREACH; goto done; } /* * Perform source interface selection only if Scoped Routing * is enabled and a source address that isn't unspecified. */ select_srcif = (ip6_doscopedroute && srcsock != NULL && !IN6_IS_ADDR_UNSPECIFIED(&srcsock->sin6_addr)); /* * If Scoped Routing is disabled, ignore the given ifscope. * Otherwise even if source selection won't be performed, * we still obey IPV6_BOUND_IF. */ if (!ip6_doscopedroute && ifscope != IFSCOPE_NONE) ifscope = IFSCOPE_NONE; /* If the caller specified the outgoing interface explicitly, use it */ if (opts != NULL && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex != 0) { /* * If IPV6_PKTINFO takes precedence over IPV6_BOUND_IF. */ ifscope = pi->ipi6_ifindex; ifnet_head_lock_shared(); /* ifp may be NULL if detached or out of range */ ifp = (ifscope <= if_index) ? ifindex2ifnet[ifscope] : NULL; ifnet_head_done(); if (norouteok || retrt == NULL || IN6_IS_ADDR_MULTICAST(dst)) { /* * We do not have to check or get the route for * multicast. If the caller didn't ask/care for * the route and we have no interface to use, * it's an error. */ if (ifp == NULL) error = EHOSTUNREACH; goto done; } else { goto getsrcif; } } /* * If the destination address is a multicast address and the outgoing * interface for the address is specified by the caller, use it. */ if (IN6_IS_ADDR_MULTICAST(dst) && mopts != NULL) { IM6O_LOCK(mopts); if ((ifp = mopts->im6o_multicast_ifp) != NULL) { IM6O_UNLOCK(mopts); goto done; /* we do not need a route for multicast. */ } IM6O_UNLOCK(mopts); } getsrcif: /* * If the outgoing interface was not set via IPV6_BOUND_IF or * IPV6_PKTINFO, use the scope ID in the destination address. */ if (ip6_doscopedroute && ifscope == IFSCOPE_NONE) ifscope = dstsock->sin6_scope_id; /* * Perform source interface selection; the source IPv6 address * must belong to one of the addresses of the interface used * by the route. For performance reasons, do this only if * there is no route, or if the routing table has changed, * or if we haven't done source interface selection on this * route (for this PCB instance) before. */ if (!select_srcif || (ro != NULL && ro->ro_rt != NULL && (ro->ro_rt->rt_flags & RTF_UP) && ro->ro_rt->generation_id == route_generation && (ro->ro_flags & ROF_SRCIF_SELECTED))) { if (ro != NULL && ro->ro_rt != NULL) { ifa = ro->ro_rt->rt_ifa; IFA_ADDREF(ifa); } goto getroute; } /* * Given the source IPv6 address, find a suitable source interface * to use for transmission; if a scope ID has been specified, * optimize the search by looking at the addresses only for that * interface. This is still suboptimal, however, as we need to * traverse the per-interface list. */ if (ifscope != IFSCOPE_NONE || (ro != NULL && ro->ro_rt != NULL)) { unsigned int scope = ifscope; struct ifnet *rt_ifp; rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL; /* * If no scope is specified and the route is stale (pointing * to a defunct interface) use the current primary interface; * this happens when switching between interfaces configured * with the same IPv6 address. Otherwise pick up the scope * information from the route; the ULP may have looked up a * correct route and we just need to verify it here and mark * it with the ROF_SRCIF_SELECTED flag below. */ if (scope == IFSCOPE_NONE) { scope = rt_ifp->if_index; if (scope != get_primary_ifscope(AF_INET6) && ro->ro_rt->generation_id != route_generation) scope = get_primary_ifscope(AF_INET6); } ifa = (struct ifaddr *) ifa_foraddr6_scoped(&srcsock->sin6_addr, scope); if (ip6_select_srcif_debug && ifa != NULL) { if (ro->ro_rt != NULL) { printf("%s->%s ifscope %d->%d ifa_if %s " "ro_if %s\n", s_src, s_dst, ifscope, scope, if_name(ifa->ifa_ifp), if_name(rt_ifp)); } else { printf("%s->%s ifscope %d->%d ifa_if %s\n", s_src, s_dst, ifscope, scope, if_name(ifa->ifa_ifp)); } } } /* * Slow path; search for an interface having the corresponding source * IPv6 address if the scope was not specified by the caller, and: * * 1) There currently isn't any route, or, * 2) The interface used by the route does not own that source * IPv6 address; in this case, the route will get blown away * and we'll do a more specific scoped search using the newly * found interface. */ if (ifa == NULL && ifscope == IFSCOPE_NONE) { ifa = (struct ifaddr *)ifa_foraddr6(&srcsock->sin6_addr); if (ip6_select_srcif_debug && ifa != NULL) { printf("%s->%s ifscope %d ifa_if %s\n", s_src, s_dst, ifscope, if_name(ifa->ifa_ifp)); } } getroute: if (ifa != NULL) ifscope = ifa->ifa_ifp->if_index; /* * If the next hop address for the packet is specified by the caller, * use it as the gateway. */ if (opts != NULL && opts->ip6po_nexthop != NULL) { struct route_in6 *ron; sin6_next = satosin6(opts->ip6po_nexthop); /* at this moment, we only support AF_INET6 next hops */ if (sin6_next->sin6_family != AF_INET6) { error = EAFNOSUPPORT; /* or should we proceed? */ goto done; } /* * If the next hop is an IPv6 address, then the node identified * by that address must be a neighbor of the sending host. */ ron = &opts->ip6po_nextroute; if (ron->ro_rt != NULL) RT_LOCK(ron->ro_rt); if ((ron->ro_rt != NULL && ((ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != (RTF_UP | RTF_LLINFO) || ron->ro_rt->generation_id != route_generation || (select_srcif && (ifa == NULL || ifa->ifa_ifp != ron->ro_rt->rt_ifp)))) || !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr, &sin6_next->sin6_addr)) { if (ron->ro_rt != NULL) { RT_UNLOCK(ron->ro_rt); rtfree(ron->ro_rt); ron->ro_rt = NULL; } *satosin6(&ron->ro_dst) = *sin6_next; } if (ron->ro_rt == NULL) { rtalloc_scoped((struct route *)ron, ifscope); if (ron->ro_rt != NULL) RT_LOCK(ron->ro_rt); if (ron->ro_rt == NULL || !(ron->ro_rt->rt_flags & RTF_LLINFO) || !IN6_ARE_ADDR_EQUAL(&satosin6(rt_key(ron->ro_rt))-> sin6_addr, &sin6_next->sin6_addr)) { if (ron->ro_rt != NULL) { RT_UNLOCK(ron->ro_rt); rtfree(ron->ro_rt); ron->ro_rt = NULL; } error = EHOSTUNREACH; goto done; } } route = ron; ifp = ron->ro_rt->rt_ifp; /* * When cloning is required, try to allocate a route to the * destination so that the caller can store path MTU * information. */ if (!clone) { if (select_srcif) { /* Keep the route locked */ goto validateroute; } RT_UNLOCK(ron->ro_rt); goto done; } RT_UNLOCK(ron->ro_rt); } /* * Use a cached route if it exists and is valid, else try to allocate * a new one. Note that we should check the address family of the * cached destination, in case of sharing the cache with IPv4. */ if (ro == NULL) goto done; if (ro->ro_rt != NULL) RT_LOCK(ro->ro_rt); if (ro->ro_rt != NULL && (!(ro->ro_rt->rt_flags & RTF_UP) || satosin6(&ro->ro_dst)->sin6_family != AF_INET6 || ro->ro_rt->generation_id != route_generation || !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst) || (select_srcif && (ifa == NULL || ifa->ifa_ifp != ro->ro_rt->rt_ifp)))) { RT_UNLOCK(ro->ro_rt); rtfree(ro->ro_rt); ro->ro_rt = NULL; } if (ro->ro_rt == NULL) { struct sockaddr_in6 *sa6; if (ro->ro_rt != NULL) RT_UNLOCK(ro->ro_rt); /* No route yet, so try to acquire one */ bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); sa6 = (struct sockaddr_in6 *)&ro->ro_dst; sa6->sin6_family = AF_INET6; sa6->sin6_len = sizeof(struct sockaddr_in6); sa6->sin6_addr = *dst; if (IN6_IS_ADDR_MULTICAST(dst)) { ro->ro_rt = rtalloc1_scoped( &((struct route *)ro)->ro_dst, 0, 0, ifscope); } else { rtalloc_scoped((struct route *)ro, ifscope); } if (ro->ro_rt != NULL) RT_LOCK(ro->ro_rt); } /* * Do not care about the result if we have the nexthop * explicitly specified (in case we're asked to clone.) */ if (opts != NULL && opts->ip6po_nexthop != NULL) { if (ro->ro_rt != NULL) RT_UNLOCK(ro->ro_rt); goto done; } if (ro->ro_rt != NULL) { RT_LOCK_ASSERT_HELD(ro->ro_rt); ifp = ro->ro_rt->rt_ifp; } else { error = EHOSTUNREACH; } route = ro; validateroute: if (select_srcif) { boolean_t has_route = (route != NULL && route->ro_rt != NULL); if (has_route) RT_LOCK_ASSERT_HELD(route->ro_rt); /* * If there is a non-loopback route with the wrong interface, * or if there is no interface configured with such an address, * blow it away. Except for local/loopback, we look for one * with a matching interface scope/index. */ if (has_route && (ifa == NULL || (ifa->ifa_ifp != ifp && ifp != lo_ifp) || !(route->ro_rt->rt_flags & RTF_UP))) { if (ip6_select_srcif_debug) { if (ifa != NULL) { printf("%s->%s ifscope %d ro_if %s " "!= ifa_if %s (cached route " "cleared)\n", s_src, s_dst, ifscope, if_name(ifp), if_name(ifa->ifa_ifp)); } else { printf("%s->%s ifscope %d ro_if %s " "(no ifa_if found)\n", s_src, s_dst, ifscope, if_name(ifp)); } } RT_UNLOCK(route->ro_rt); rtfree(route->ro_rt); route->ro_rt = NULL; route->ro_flags &= ~ROF_SRCIF_SELECTED; error = EHOSTUNREACH; /* Undo the settings done above */ route = NULL; ifp = NULL; } else if (has_route) { route->ro_flags |= ROF_SRCIF_SELECTED; route->ro_rt->generation_id = route_generation; RT_UNLOCK(route->ro_rt); } } else { if (ro->ro_rt != NULL) RT_UNLOCK(ro->ro_rt); if (ifp != NULL && opts != NULL && opts->ip6po_pktinfo != NULL && opts->ip6po_pktinfo->ipi6_ifindex != 0) { /* * Check if the outgoing interface conflicts with the * interface specified by ipi6_ifindex (if specified). * Note that loopback interface is always okay. * (this may happen when we are sending a packet to * one of our own addresses.) */ if (!(ifp->if_flags & IFF_LOOPBACK) && ifp->if_index != opts->ip6po_pktinfo->ipi6_ifindex) { error = EHOSTUNREACH; goto done; } } } done: if (nocell && error == 0) { if ((ifp != NULL && ifp->if_type == IFT_CELLULAR) || (route != NULL && route->ro_rt != NULL && route->ro_rt->rt_ifp->if_type == IFT_CELLULAR)) { if (route != NULL && route->ro_rt != NULL) { rtfree(route->ro_rt); route->ro_rt = NULL; route->ro_flags &= ~ROF_SRCIF_SELECTED; route = NULL; } ifp = NULL; error = EHOSTUNREACH; } } if (ifp == NULL && (route == NULL || route->ro_rt == NULL)) { /* * This can happen if the caller did not pass a cached route * nor any other hints. We treat this case an error. */ error = EHOSTUNREACH; } if (error == EHOSTUNREACH) ip6stat.ip6s_noroute++; if (error == 0) { if (retifp != NULL) { if (ifp != NULL) ifnet_reference(ifp); /* for caller */ *retifp = ifp; } if (retrt != NULL && route != NULL) *retrt = route->ro_rt; /* ro_rt may be NULL */ } else if (select_srcif && ip6_select_srcif_debug) { printf("%s->%s ifscope %d ifa_if %s ro_if %s (error=%d)\n", s_src, s_dst, ifscope, (ifa != NULL) ? if_name(ifa->ifa_ifp) : "NONE", (ifp != NULL) ? if_name(ifp) : "NONE", error); } if (ifa != NULL) IFA_REMREF(ifa); return (error); }
/* * Do what we need to do when inserting a route. */ static struct radix_node * in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, struct radix_node *treenodes) { struct rtentry *rt = (struct rtentry *)treenodes; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt); struct radix_node *ret; lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); RT_LOCK_ASSERT_HELD(rt); /* * If this is a dynamic route (which is created via Redirect) and * we already have the maximum acceptable number of such route entries, * reject creating a new one. We could initiate garbage collection to * make available space right now, but the benefit would probably not * be worth the cleaning overhead; we only have to endure a slightly * suboptimal path even without the redirecbted route. */ if ((rt->rt_flags & RTF_DYNAMIC) != 0 && ip6_maxdynroutes >= 0 && in6dynroutes >= ip6_maxdynroutes) return (NULL); /* * For IPv6, all unicast non-host routes are automatically cloning. */ if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) rt->rt_flags |= RTF_MULTICAST; if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) { rt->rt_flags |= RTF_PRCLONING; } /* * A little bit of help for both IPv6 output and input: * For local addresses, we make sure that RTF_LOCAL is set, * with the thought that this might one day be used to speed up * ip_input(). * * We also mark routes to multicast addresses as such, because * it's easy to do and might be useful (but this is much more * dubious since it's so easy to inspect the address). (This * is done above.) * * XXX * should elaborate the code. */ if (rt->rt_flags & RTF_HOST) { if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr) ->sin6_addr, &sin6->sin6_addr)) { rt->rt_flags |= RTF_LOCAL; } } if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) && rt->rt_ifp) rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; ret = rn_addroute(v_arg, n_arg, head, treenodes); if (ret == NULL && (rt->rt_flags & RTF_HOST)) { struct rtentry *rt2; /* * We are trying to add a host route, but can't. * Find out if it is because of an * ARP entry and delete it if so. */ rt2 = rtalloc1_locked((struct sockaddr *)sin6, 0, RTF_CLONING | RTF_PRCLONING); if (rt2) { RT_LOCK(rt2); if ((rt2->rt_flags & RTF_LLINFO) && (rt2->rt_flags & RTF_HOST) && rt2->rt_gateway != NULL && rt2->rt_gateway->sa_family == AF_LINK) { /* * Safe to drop rt_lock and use rt_key, * rt_gateway, since holding rnh_lock here * prevents another thread from calling * rt_setgate() on this route. */ RT_UNLOCK(rt2); (void) rtrequest_locked(RTM_DELETE, rt_key(rt2), rt2->rt_gateway, rt_mask(rt2), rt2->rt_flags, 0); ret = rn_addroute(v_arg, n_arg, head, treenodes); } else { RT_UNLOCK(rt2); } rtfree_locked(rt2); } } else if (ret == NULL && (rt->rt_flags & RTF_CLONING)) { struct rtentry *rt2; /* * We are trying to add a net route, but can't. * The following case should be allowed, so we'll make a * special check for this: * Two IPv6 addresses with the same prefix is assigned * to a single interrface. * # ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1) * # ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2) * In this case, (*1) and (*2) want to add the same * net route entry, 3ffe:0501:: -> if0. * This case should not raise an error. */ rt2 = rtalloc1_locked((struct sockaddr *)sin6, 0, RTF_CLONING | RTF_PRCLONING); if (rt2) { RT_LOCK(rt2); if ((rt2->rt_flags & (RTF_CLONING|RTF_HOST|RTF_GATEWAY)) == RTF_CLONING && rt2->rt_gateway && rt2->rt_gateway->sa_family == AF_LINK && rt2->rt_ifp == rt->rt_ifp) { ret = rt2->rt_nodes; } RT_UNLOCK(rt2); rtfree_locked(rt2); } } if (ret != NULL && (rt->rt_flags & RTF_DYNAMIC) != 0) in6dynroutes++; return ret; }