static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr, struct in6_addr *saddr) { int err; struct rt6_info *rt; /* * Clone the route. */ rt = ip6_rt_copy(ort); if (rt) { ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); if (!(rt->rt6i_flags&RTF_GATEWAY)) ipv6_addr_copy(&rt->rt6i_gateway, daddr); rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; rt->u.dst.flags |= DST_HOST; #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen && saddr) { ipv6_addr_copy(&rt->rt6i_src.addr, saddr); rt->rt6i_src.plen = 128; } #endif rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); dst_hold(&rt->u.dst); err = rt6_ins(rt, NULL); if (err == 0) return rt; rt->u.dst.error = err; return rt; } dst_hold(&ip6_null_entry.u.dst); return &ip6_null_entry; }
static struct dst_entry *tunnel_dst_get(struct ip_tunnel *t) { struct dst_entry *dst; rcu_read_lock(); dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); if (dst) dst_hold(dst); rcu_read_unlock(); return dst; }
/** * skb_dst_set_noref - sets skb dst, without a reference * @skb: buffer * @dst: dst entry * * Sets skb dst, assuming a reference was not taken on dst * skb_dst_drop() should not dst_release() this dst */ void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); /* If dst not in cache, we must take a reference, because * dst_release() will destroy dst as soon as its refcount becomes zero */ if (unlikely(dst->flags & DST_NOCACHE)) { dst_hold(dst); skb_dst_set(skb, dst); } else { skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; } }
static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp) { struct dn_route *rth, **rthp; unsigned long now = jiffies; rthp = &dn_rt_hash_table[hash].chain; spin_lock_bh(&dn_rt_hash_table[hash].lock); while((rth = *rthp) != NULL) { if (compare_keys(&rth->fl, &rt->fl)) { /* Put it first */ *rthp = rth->u.rt_next; rcu_assign_pointer(rth->u.rt_next, dn_rt_hash_table[hash].chain); rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth); rth->u.dst.__use++; dst_hold(&rth->u.dst); rth->u.dst.lastuse = now; spin_unlock_bh(&dn_rt_hash_table[hash].lock); dnrt_drop(rt); *rp = rth; return 0; } rthp = &rth->u.rt_next; } rcu_assign_pointer(rt->u.rt_next, dn_rt_hash_table[hash].chain); rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt); dst_hold(&rt->u.dst); rt->u.dst.__use++; rt->u.dst.lastuse = now; spin_unlock_bh(&dn_rt_hash_table[hash].lock); *rp = rt; return 0; }
static inline struct dst_entry * __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie) { struct dst_entry *dst = dest->dst_cache; if (!dst) return NULL; if ((dst->obsolete || rtos != dest->dst_rtos) && dst->ops->check(dst, cookie) == NULL) { dest->dst_cache = NULL; return NULL; } dst_hold(dst); return dst; }
static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev) { struct nfp_repr *repr = netdev_priv(netdev); unsigned int len = skb->len; int ret; skb_dst_drop(skb); dst_hold((struct dst_entry *)repr->dst); skb_dst_set(skb, (struct dst_entry *)repr->dst); skb->dev = repr->dst->u.port_info.lower_dev; ret = dev_queue_xmit(skb); nfp_repr_inc_tx_stats(netdev, len, ret); return NETDEV_TX_OK; }
a_status_t __adf_net_indicate_packet(adf_net_handle_t hdl, struct sk_buff *skb, uint32_t len) { struct net_device *netdev = hdl_to_netdev(hdl); __adf_softc_t *sc = hdl_to_softc(hdl); /** * For pseudo devices IP checksum has to computed */ if(adf_os_unlikely(skb->ip_summed == CHECKSUM_UNNECESSARY)) __adf_net_ip_cksum(skb); /** * also pulls the ether header */ skb->protocol = eth_type_trans(skb, netdev); skb->dev = netdev; netdev->last_rx = jiffies; #ifdef LIMIT_MTU_SIZE if (skb->len >= LIMITED_MTU) { skb->h.raw = skb->nh.raw = skb->data; skb->dst = (struct dst_entry *)&__fake_rtable; skb->pkt_type = PACKET_HOST; dst_hold(skb->dst); #if 0 printk("addrs : sa : %x : da:%x\n", skb->nh.iph->saddr, skb->nh.iph->daddr); printk("head : %p tail : %p iph %p %p\n", skb->head, skb->tail, skb->nh.iph, skb->mac.raw); #endif icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(LIMITED_MTU - 4 )); dev_kfree_skb_any(skb); return A_STATUS_OK; } #endif if(sc->vlgrp) __vlan_hwaccel_put_tag(skb, sc->vid); if (in_irq()) netif_rx(skb); else netif_receive_skb(skb); return A_STATUS_OK; }
static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie) { struct dst_entry *dst; rcu_read_lock(); dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); if (dst) { if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) { rcu_read_unlock(); tunnel_dst_reset(t); return NULL; } dst_hold(dst); } rcu_read_unlock(); return (struct rtable *)dst; }
struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) { struct rt6_info *rt; struct fib6_node *fn; fn = &ip6_routing_table; write_lock_bh(&rt6_lock); for (rt = fn->leaf; rt; rt=rt->u.next) { if (dev == rt->rt6i_dev && ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0) break; } if (rt) dst_hold(&rt->u.dst); write_unlock_bh(&rt6_lock); return rt; }
static struct rtable *vrf_get_rtable(const struct net_device *dev, const struct flowi4 *fl4) { struct rtable *rth = NULL; if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) { struct net_vrf *vrf = netdev_priv(dev); rcu_read_lock(); rth = rcu_dereference(vrf->rth); if (likely(rth)) dst_hold(&rth->dst); rcu_read_unlock(); } return rth; }
struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, int oif, int strict) { struct fib6_node *fn; struct rt6_info *rt; read_lock_bh(&rt6_lock); fn = fib6_lookup(&ip6_routing_table, daddr, saddr); rt = rt6_device_match(fn->leaf, oif, strict); dst_hold(&rt->u.dst); rt->u.dst.__use++; read_unlock_bh(&rt6_lock); rt->u.dst.lastuse = jiffies; if (rt->u.dst.error == 0) return rt; dst_release(&rt->u.dst); return NULL; }
/* Undo the changes made for ip6tables PREROUTING and continue the * bridge PRE_ROUTING hook. */ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; if (nf_bridge->mask & BRNF_PKT_TYPE) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->mask ^= BRNF_PKT_TYPE; } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; skb->dst = (struct dst_entry *)&__fake_rtable; dst_hold(skb->dst); skb->dev = nf_bridge->physindev; nf_bridge_push_encap_header(skb); NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, br_handle_frame_finish, 1); return 0; }
struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, int oif, int flags) { struct fib6_node *fn; struct rt6_info *rt; read_lock_bh(&rt6_lock); fn = fib6_lookup(&ip6_routing_table, daddr, saddr); rt = rt6_device_match(fn->leaf, oif, !!(flags & RT6_LOOKUP_FLAG_STRICT)); dst_hold(&rt->u.dst); rt->u.dst.__use++; read_unlock_bh(&rt6_lock); if (!(flags & RT6_LOOKUP_FLAG_NOUSE)) rt->u.dst.lastuse = jiffies; if (rt->u.dst.error == 0) return rt; dst_release(&rt->u.dst); return NULL; }
void ipv6_ac_destroy_dev(struct inet6_dev *idev) { struct ifacaddr6 *aca; write_lock_bh(&idev->lock); while ((aca = idev->ac_list) != NULL) { idev->ac_list = aca->aca_next; write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); dst_hold(&aca->aca_rt->dst); ip6_del_rt(aca->aca_rt); aca_put(aca); write_lock_bh(&idev->lock); } write_unlock_bh(&idev->lock); }
static struct dst_entry *rxe_find_route(struct net_device *ndev, struct rxe_qp *qp, struct rxe_av *av) { struct dst_entry *dst = NULL; if (qp_type(qp) == IB_QPT_RC) dst = sk_dst_get(qp->sk->sk); if (!dst || !dst_check(dst, qp->dst_cookie)) { if (dst) dst_release(dst); if (av->network_type == RDMA_NETWORK_IPV4) { struct in_addr *saddr; struct in_addr *daddr; saddr = &av->sgid_addr._sockaddr_in.sin_addr; daddr = &av->dgid_addr._sockaddr_in.sin_addr; dst = rxe_find_route4(ndev, saddr, daddr); } else if (av->network_type == RDMA_NETWORK_IPV6) { struct in6_addr *saddr6; struct in6_addr *daddr6; saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr; daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr; dst = rxe_find_route6(ndev, saddr6, daddr6); #if IS_ENABLED(CONFIG_IPV6) if (dst) qp->dst_cookie = rt6_get_cookie((struct rt6_info *)dst); #endif } if (dst && (qp_type(qp) == IB_QPT_RC)) { dst_hold(dst); sk_dst_set(qp->sk->sk, dst); } } return dst; }
static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, const struct flowi6 *fl6) { struct dst_entry *dst = NULL; if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) { struct net_vrf *vrf = netdev_priv(dev); struct rt6_info *rt; rcu_read_lock(); rt = rcu_dereference(vrf->rt6); if (likely(rt)) { dst = &rt->dst; dst_hold(dst); } rcu_read_unlock(); } return dst; }
/* Undo the changes made for ip6tables PREROUTING and continue the * bridge PRE_ROUTING hook. */ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; if (nf_bridge->mask & BRNF_PKT_TYPE) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->mask ^= BRNF_PKT_TYPE; } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; skb->dst = (struct dst_entry *)&__fake_rtable; dst_hold(skb->dst); skb->dev = nf_bridge->physindev; if (skb->protocol == htons(ETH_P_8021Q)) { skb_push(skb, VLAN_HLEN); skb->nh.raw -= VLAN_HLEN; } NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, br_handle_frame_finish, 1); return 0; }
int ip_xfrm_me_harder(struct sk_buff **pskb) { struct flowi fl; unsigned int hh_len; struct dst_entry *dst; if (IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) return 0; if (xfrm_decode_session(*pskb, &fl, AF_INET) < 0) return -1; dst = (*pskb)->dst; if (dst->xfrm) dst = ((struct xfrm_dst *)dst)->route; dst_hold(dst); if (xfrm_lookup(&dst, &fl, (*pskb)->sk, 0) < 0) return -1; dst_release((*pskb)->dst); (*pskb)->dst = dst; /* Change in oif may mean change in hh_len. */ hh_len = (*pskb)->dst->dev->hard_header_len; if (skb_headroom(*pskb) < hh_len) { struct sk_buff *nskb; nskb = skb_realloc_headroom(*pskb, hh_len); if (!nskb) return -1; if ((*pskb)->sk) skb_set_owner_w(nskb, (*pskb)->sk); kfree_skb(*pskb); *pskb = nskb; } return 0; }
static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct ipv6_pinfo *np; struct sock *sk; int err; struct tcp_sock *tp; __u32 seq; struct net *net = dev_net(skb->dev); sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex); if (sk == NULL) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; } if (sk->sk_state == TCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); return; } bh_lock_sock(sk); if (sock_owned_by_user(sk)) NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) goto out; if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) { NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); goto out; } tp = tcp_sk(sk); seq = ntohl(th->seq); if (sk->sk_state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst; if (sock_owned_by_user(sk)) goto out; if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) goto out; dst = __sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = np->daddr; fl6.saddr = np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; fl6.flowi6_uid = sock_i_uid(sk); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); if (IS_ERR(dst)) { sk->sk_err_soft = -PTR_ERR(dst); goto out; } } else dst_hold(dst); if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); } dst_release(dst); goto out; } icmpv6_err_convert(type, code, &err); switch (sk->sk_state) { struct request_sock *req, **prev; case TCP_LISTEN: if (sock_owned_by_user(sk)) goto out; req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr, &hdr->saddr, inet6_iif(skb)); if (!req) goto out; WARN_ON(req->sk != NULL); if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case TCP_SYN_SENT: case TCP_SYN_RECV: if (!sock_owned_by_user(sk)) { sk->sk_err = err; sk->sk_error_report(sk); tcp_done(sk); } else sk->sk_err_soft = err; goto out; } if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else sk->sk_err_soft = err; out: bh_unlock_sock(sk); sock_put(sk); }
/* * Handle redirects */ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, struct neighbour *neigh, int on_link) { struct rt6_info *rt, *nrt; /* Locate old route to this destination. */ rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1); if (rt == NULL) return; if (neigh->dev != rt->rt6i_dev) goto out; /* Redirect received -> path was valid. Look, redirects are sent only in response to data packets, so that this nexthop apparently is reachable. --ANK */ dst_confirm(&rt->u.dst); /* Duplicate redirect: silently ignore. */ if (neigh == rt->u.dst.neighbour) goto out; /* Current route is on-link; redirect is always invalid. Seems, previous statement is not true. It could be node, which looks for us as on-link (f.e. proxy ndisc) But then router serving it might decide, that we should know truth 8)8) --ANK (980726). */ if (!(rt->rt6i_flags&RTF_GATEWAY)) goto out; /* * RFC 1970 specifies that redirects should only be * accepted if they come from the nexthop to the target. * Due to the way default routers are chosen, this notion * is a bit fuzzy and one might need to check all default * routers. */ if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) { if (rt->rt6i_flags & RTF_DEFAULT) { struct rt6_info *rt1; read_lock(&rt6_lock); for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) { if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) { dst_hold(&rt1->u.dst); dst_release(&rt->u.dst); read_unlock(&rt6_lock); rt = rt1; goto source_ok; } } read_unlock(&rt6_lock); } if (net_ratelimit()) printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " "for redirect target\n"); goto out; } source_ok: /* * We have finally decided to accept it. */ nrt = ip6_rt_copy(rt); if (nrt == NULL) goto out; nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; if (on_link) nrt->rt6i_flags &= ~RTF_GATEWAY; ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); nrt->rt6i_dst.plen = 128; nrt->u.dst.flags |= DST_HOST; ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); nrt->rt6i_nexthop = neigh_clone(neigh); /* Reset pmtu, it may be better */ nrt->u.dst.pmtu = ipv6_get_mtu(neigh->dev); nrt->u.dst.advmss = max_t(unsigned int, nrt->u.dst.pmtu - 60, ip6_rt_min_advmss); if (rt->u.dst.advmss > 65535-20) rt->u.dst.advmss = 65535; nrt->rt6i_hoplimit = ipv6_get_hoplimit(neigh->dev); if (rt6_ins(nrt, NULL)) goto out; if (rt->rt6i_flags&RTF_CACHE) { ip6_del_rt(rt, NULL); return; } out: dst_release(&rt->u.dst); return; }
static int __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, struct flowi *fl, struct dst_entry **dst_p) { struct dst_entry *dst, *dst_prev; struct rtable *rt0 = (struct rtable*)(*dst_p); struct rtable *rt = rt0; struct flowi fl_tunnel = { .nl_u = { .ip4_u = { .saddr = fl->fl4_src, .daddr = fl->fl4_dst, .tos = fl->fl4_tos } } }; int i; int err; int header_len = 0; int trailer_len = 0; dst = dst_prev = NULL; dst_hold(&rt->u.dst); for (i = 0; i < nx; i++) { struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops); struct xfrm_dst *xdst; if (unlikely(dst1 == NULL)) { err = -ENOBUFS; dst_release(&rt->u.dst); goto error; } if (!dst) dst = dst1; else { dst_prev->child = dst1; dst1->flags |= DST_NOHASH; dst_clone(dst1); } xdst = (struct xfrm_dst *)dst1; xdst->route = &rt->u.dst; xdst->genid = xfrm[i]->genid; dst1->next = dst_prev; dst_prev = dst1; header_len += xfrm[i]->props.header_len; trailer_len += xfrm[i]->props.trailer_len; if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) { unsigned short encap_family = xfrm[i]->props.family; switch(encap_family) { case AF_INET: fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4; fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4; break; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) case AF_INET6: ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct in6_addr*)&xfrm[i]->id.daddr.a6); ipv6_addr_copy(&fl_tunnel.fl6_src, (struct in6_addr*)&xfrm[i]->props.saddr.a6); break; #endif default: BUG_ON(1); } err = xfrm_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel, encap_family); if (err) goto error; } else dst_hold(&rt->u.dst); } dst_prev->child = &rt->u.dst; dst->path = &rt->u.dst; *dst_p = dst; dst = dst_prev; dst_prev = *dst_p; i = 0; for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; struct xfrm_state_afinfo *afinfo; x->u.rt.fl = *fl; dst_prev->xfrm = xfrm[i++]; dst_prev->dev = rt->u.dst.dev; if (rt->u.dst.dev) dev_hold(rt->u.dst.dev); dst_prev->obsolete = -1; dst_prev->flags |= DST_HOST; dst_prev->lastuse = jiffies; dst_prev->header_len = header_len; dst_prev->nfheader_len = 0; dst_prev->trailer_len = trailer_len; memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); /* Copy neighbout for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; /* XXX: When IPv6 module can be unloaded, we should manage reference * to xfrm6_output in afinfo->output. Miyazawa * */ afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family); if (!afinfo) { dst = *dst_p; err = -EAFNOSUPPORT; goto error; } dst_prev->output = afinfo->output; xfrm_state_put_afinfo(afinfo); if (dst_prev->xfrm->props.family == AF_INET && rt->peer) atomic_inc(&rt->peer->refcnt); x->u.rt.peer = rt->peer; /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL); x->u.rt.rt_type = rt->rt_type; x->u.rt.rt_src = rt0->rt_src; x->u.rt.rt_dst = rt0->rt_dst; x->u.rt.rt_gateway = rt->rt_gateway; x->u.rt.rt_spec_dst = rt0->rt_spec_dst; x->u.rt.idev = rt0->idev; in_dev_hold(rt0->idev); header_len -= x->u.dst.xfrm->props.header_len; trailer_len -= x->u.dst.xfrm->props.trailer_len; } xfrm_init_pmtu(dst); return 0; error: if (dst) dst_free(dst); return err; }
void ip6_route_input(struct sk_buff *skb) { struct fib6_node *fn; struct rt6_info *rt; int strict; int attempts = 3; strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL); relookup: read_lock_bh(&rt6_lock); fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr); restart: rt = fn->leaf; if ((rt->rt6i_flags & RTF_CACHE)) { if (ip6_rt_policy == 0) { rt = rt6_device_match(rt, skb->dev->ifindex, strict); BACKTRACK(); dst_hold(&rt->u.dst); goto out; } #ifdef CONFIG_RT6_POLICY if ((rt->rt6i_flags & RTF_FLOW)) { struct rt6_info *sprt; for (sprt = rt; sprt; sprt = sprt->u.next) { if (rt6_flow_match_in(sprt, skb)) { rt = sprt; dst_hold(&rt->u.dst); goto out; } } } #endif } rt = rt6_device_match(rt, skb->dev->ifindex, 0); BACKTRACK(); if (ip6_rt_policy == 0) { if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { read_unlock_bh(&rt6_lock); rt = rt6_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr); if (rt->u.dst.error != -EEXIST || --attempts <= 0) goto out2; /* Race condition! In the gap, when rt6_lock was released someone could insert this route. Relookup. */ goto relookup; } dst_hold(&rt->u.dst); } else { #ifdef CONFIG_RT6_POLICY rt = rt6_flow_lookup_in(rt, skb); #else /* NEVER REACHED */ #endif } out: read_unlock_bh(&rt6_lock); out2: rt->u.dst.lastuse = jiffies; rt->u.dst.__use++; skb->dst = (struct dst_entry *) rt; }
struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) { struct fib6_node *fn; struct rt6_info *rt; int strict; int attempts = 3; strict = ipv6_addr_type(fl->nl_u.ip6_u.daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL); relookup: read_lock_bh(&rt6_lock); fn = fib6_lookup(&ip6_routing_table, fl->nl_u.ip6_u.daddr, fl->nl_u.ip6_u.saddr); restart: rt = fn->leaf; if ((rt->rt6i_flags & RTF_CACHE)) { if (ip6_rt_policy == 0) { rt = rt6_device_match(rt, fl->oif, strict); BACKTRACK(); dst_hold(&rt->u.dst); goto out; } #ifdef CONFIG_RT6_POLICY if ((rt->rt6i_flags & RTF_FLOW)) { struct rt6_info *sprt; for (sprt = rt; sprt; sprt = sprt->u.next) { if (rt6_flow_match_out(sprt, sk)) { rt = sprt; dst_hold(&rt->u.dst); goto out; } } } #endif } if (rt->rt6i_flags & RTF_DEFAULT) { if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF) rt = rt6_best_dflt(rt, fl->oif); } else { rt = rt6_device_match(rt, fl->oif, strict); BACKTRACK(); } if (ip6_rt_policy == 0) { if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { read_unlock_bh(&rt6_lock); rt = rt6_cow(rt, fl->nl_u.ip6_u.daddr, fl->nl_u.ip6_u.saddr); if (rt->u.dst.error != -EEXIST || --attempts <= 0) goto out2; /* Race condition! In the gap, when rt6_lock was released someone could insert this route. Relookup. */ goto relookup; } dst_hold(&rt->u.dst); } else { #ifdef CONFIG_RT6_POLICY rt = rt6_flow_lookup_out(rt, sk, fl); #else /* NEVER REACHED */ #endif } out: read_unlock_bh(&rt6_lock); out2: rt->u.dst.lastuse = jiffies; rt->u.dst.__use++; return &rt->u.dst; }
int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags, int dontfrag) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; struct sk_buff *skb, *skb_prev = NULL; unsigned int maxfraglen, fragheaderlen; int exthdrlen; int hh_len; int mtu; int copy; int err; int offset = 0; int csummode = CHECKSUM_NONE; __u8 tx_flags = 0; if (flags&MSG_PROBE) return 0; cork = &inet->cork.base; if (skb_queue_empty(&sk->sk_write_queue)) { /* * setup for corking */ if (opt) { if (WARN_ON(np->cork.opt)) return -EINVAL; np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation); if (unlikely(np->cork.opt == NULL)) return -ENOBUFS; np->cork.opt->tot_len = opt->tot_len; np->cork.opt->opt_flen = opt->opt_flen; np->cork.opt->opt_nflen = opt->opt_nflen; np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation); if (opt->dst0opt && !np->cork.opt->dst0opt) return -ENOBUFS; np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation); if (opt->dst1opt && !np->cork.opt->dst1opt) return -ENOBUFS; np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation); if (opt->hopopt && !np->cork.opt->hopopt) return -ENOBUFS; np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation); if (opt->srcrt && !np->cork.opt->srcrt) return -ENOBUFS; /* need source address above miyazawa*/ } dst_hold(&rt->dst); cork->dst = &rt->dst; inet->cork.fl.u.ip6 = *fl6; np->cork.hop_limit = hlimit; np->cork.tclass = tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? rt->dst.dev->mtu : dst_mtu(&rt->dst); else mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? rt->dst.dev->mtu : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; } cork->fragsize = mtu; if (dst_allfrag(rt->dst.path)) cork->flags |= IPCORK_ALLFRAG; cork->length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; length += exthdrlen; transhdrlen += exthdrlen; } else { rt = (struct rt6_info *)cork->dst; fl6 = &inet->cork.fl.u.ip6; opt = np->cork.opt; transhdrlen = 0; exthdrlen = 0; mtu = cork->fragsize; } hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); return -EMSGSIZE; } } /* For UDP, check if TX timestamp is enabled */ if (sk->sk_type == SOCK_DGRAM) { err = sock_tx_timestamp(sk, &tx_flags); if (err) goto error; } /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. * Otherwise, we need to reserve fragment header and * fragment alignment (= 8-15 octects, in total). * * Note that we may need to "move" the data from the tail of * of the buffer to the new fragment when we split * the message. * * FIXME: It may be fragmented into multiple chunks * at once if non-fragmentable extension headers * are too large. * --yoshfuji */ cork->length += length; if (length > mtu) { int proto = sk->sk_protocol; if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); return -EMSGSIZE; } if (proto == IPPROTO_UDP && (rt->dst.dev->features & NETIF_F_UFO)) { err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags, rt); if (err) goto error; return 0; } } if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) goto alloc_new_skb; while (length > 0) { /* Check if the remaining data fits into current packet. */ copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; if (copy < length) copy = maxfraglen - skb->len; if (copy <= 0) { char *data; unsigned int datalen; unsigned int fraglen; unsigned int fraggap; unsigned int alloclen; alloc_new_skb: /* There's no room in the current skb */ if (skb) fraggap = skb->len - maxfraglen; else fraggap = 0; /* update mtu and maxfraglen if necessary */ if (skb == NULL || skb_prev == NULL) ip6_append_data_mtu(&mtu, &maxfraglen, fragheaderlen, skb, rt); skb_prev = skb; /* * If remaining data exceeds the mtu, * we know we need more fragment(s). */ datalen = length + fraggap; if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; else alloclen = datalen + fragheaderlen; if (datalen != length + fraggap) { /* * this is not the last fragment, the trailer * space is regarded as data space. */ datalen += rt->dst.trailer_len; } alloclen += rt->dst.trailer_len; fraglen = datalen + fragheaderlen; /* * We just reserve space for fragment header. * Note: this may be overallocation if the message * (without MSG_MORE) fits into the MTU. */ alloclen += sizeof(struct frag_hdr); if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen + hh_len, (flags & MSG_DONTWAIT), &err); } else { skb = NULL; if (atomic_read(&sk->sk_wmem_alloc) <= 2 * sk->sk_sndbuf) skb = sock_wmalloc(sk, alloclen + hh_len, 1, sk->sk_allocation); if (unlikely(skb == NULL)) err = -ENOBUFS; else { /* Only the initial fragment * is time stamped. */ tx_flags = 0; } } if (skb == NULL) goto error; /* * Fill in the control structures */ skb->ip_summed = csummode; skb->csum = 0; /* reserve for fragmentation */ skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); if (sk->sk_type == SOCK_DGRAM) skb_shinfo(skb)->tx_flags = tx_flags; /* * Find where to start putting bytes */ data = skb_put(skb, fraglen); skb_set_network_header(skb, exthdrlen); data += fragheaderlen; skb->transport_header = (skb->network_header + fragheaderlen); if (fraggap) { skb->csum = skb_copy_and_csum_bits( skb_prev, maxfraglen, data + transhdrlen, fraggap, 0); skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); data += fraggap; pskb_trim_unique(skb_prev, maxfraglen); } copy = datalen - transhdrlen - fraggap; if (copy < 0) { err = -EINVAL; kfree_skb(skb); goto error; } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; } offset += copy; length -= datalen - fraggap; transhdrlen = 0; exthdrlen = 0; csummode = CHECKSUM_NONE; /* * Put the packet on the pending queue */ __skb_queue_tail(&sk->sk_write_queue, skb); continue; } if (copy > length) copy = length; if (!(rt->dst.dev->features&NETIF_F_SG)) { unsigned int off; off = skb->len; if (getfrag(from, skb_put(skb, copy), offset, copy, off, skb) < 0) { __skb_trim(skb, off); err = -EFAULT; goto error; } } else { int i = skb_shinfo(skb)->nr_frags; skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; struct page *page = sk->sk_sndmsg_page; int off = sk->sk_sndmsg_off; unsigned int left; if (page && (left = PAGE_SIZE - off) > 0) { if (copy >= left) copy = left; if (page != frag->page) { if (i == MAX_SKB_FRAGS) { err = -EMSGSIZE; goto error; } get_page(page); skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); frag = &skb_shinfo(skb)->frags[i]; } } else if(i < MAX_SKB_FRAGS) { if (copy > PAGE_SIZE) copy = PAGE_SIZE; page = alloc_pages(sk->sk_allocation, 0); if (page == NULL) { err = -ENOMEM; goto error; } sk->sk_sndmsg_page = page; sk->sk_sndmsg_off = 0; skb_fill_page_desc(skb, i, page, 0, 0); frag = &skb_shinfo(skb)->frags[i]; } else { err = -EMSGSIZE; goto error; } if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { err = -EFAULT; goto error; } sk->sk_sndmsg_off += copy; frag->size += copy; skb->len += copy; skb->data_len += copy; skb->truesize += copy; atomic_add(copy, &sk->sk_wmem_alloc); } offset += copy; length -= copy; } return 0; error: cork->length -= length; IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; }
int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; unsigned int maxfraglen, fragheaderlen; int exthdrlen; int hh_len; int mtu; int copy; int err; int offset = 0; int csummode = CHECKSUM_NONE; if (flags&MSG_PROBE) return 0; if (skb_queue_empty(&sk->sk_write_queue)) { /* * setup for corking */ if (opt) { if (np->cork.opt == NULL) { np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); if (unlikely(np->cork.opt == NULL)) return -ENOBUFS; } else if (np->cork.opt->tot_len < opt->tot_len) { printk(KERN_DEBUG "ip6_append_data: invalid option length\n"); return -EINVAL; } memcpy(np->cork.opt, opt, opt->tot_len); inet->cork.flags |= IPCORK_OPT; /* need source address above miyazawa*/ } dst_hold(&rt->u.dst); np->cork.rt = rt; inet->cork.fl = *fl; np->cork.hop_limit = hlimit; inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path); if (dst_allfrag(rt->u.dst.path)) inet->cork.flags |= IPCORK_ALLFRAG; inet->cork.length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0); length += exthdrlen; transhdrlen += exthdrlen; } else { rt = np->cork.rt; fl = &inet->cork.fl; if (inet->cork.flags & IPCORK_OPT) opt = np->cork.opt; transhdrlen = 0; exthdrlen = 0; mtu = inet->cork.fragsize; } hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); return -EMSGSIZE; } } /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. * Otherwise, we need to reserve fragment header and * fragment alignment (= 8-15 octects, in total). * * Note that we may need to "move" the data from the tail of * of the buffer to the new fragment when we split * the message. * * FIXME: It may be fragmented into multiple chunks * at once if non-fragmentable extension headers * are too large. * --yoshfuji */ inet->cork.length += length; if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) goto alloc_new_skb; while (length > 0) { /* Check if the remaining data fits into current packet. */ copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; if (copy < length) copy = maxfraglen - skb->len; if (copy <= 0) { char *data; unsigned int datalen; unsigned int fraglen; unsigned int fraggap; unsigned int alloclen; struct sk_buff *skb_prev; alloc_new_skb: skb_prev = skb; /* There's no room in the current skb */ if (skb_prev) fraggap = skb_prev->len - maxfraglen; else fraggap = 0; /* * If remaining data exceeds the mtu, * we know we need more fragment(s). */ datalen = length + fraggap; if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) datalen = maxfraglen - fragheaderlen; fraglen = datalen + fragheaderlen; if ((flags & MSG_MORE) && !(rt->u.dst.dev->features&NETIF_F_SG)) alloclen = mtu; else alloclen = datalen + fragheaderlen; /* * The last fragment gets additional space at tail. * Note: we overallocate on fragments with MSG_MODE * because we have no idea if we're the last one. */ if (datalen == length + fraggap) alloclen += rt->u.dst.trailer_len; /* * We just reserve space for fragment header. * Note: this may be overallocation if the message * (without MSG_MORE) fits into the MTU. */ alloclen += sizeof(struct frag_hdr); if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen + hh_len, (flags & MSG_DONTWAIT), &err); } else { skb = NULL; if (atomic_read(&sk->sk_wmem_alloc) <= 2 * sk->sk_sndbuf) skb = sock_wmalloc(sk, alloclen + hh_len, 1, sk->sk_allocation); if (unlikely(skb == NULL)) err = -ENOBUFS; } if (skb == NULL) goto error; /* * Fill in the control structures */ skb->ip_summed = csummode; skb->csum = 0; /* reserve for fragmentation */ skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); /* * Find where to start putting bytes */ data = skb_put(skb, fraglen); skb->nh.raw = data + exthdrlen; data += fragheaderlen; skb->h.raw = data + exthdrlen; if (fraggap) { skb->csum = skb_copy_and_csum_bits( skb_prev, maxfraglen, data + transhdrlen, fraggap, 0); skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); data += fraggap; skb_trim(skb_prev, maxfraglen); } copy = datalen - transhdrlen - fraggap; if (copy < 0) { err = -EINVAL; kfree_skb(skb); goto error; } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; } offset += copy; length -= datalen - fraggap; transhdrlen = 0; exthdrlen = 0; csummode = CHECKSUM_NONE; /* * Put the packet on the pending queue */ __skb_queue_tail(&sk->sk_write_queue, skb); continue; } if (copy > length) copy = length; if (!(rt->u.dst.dev->features&NETIF_F_SG)) { unsigned int off; off = skb->len; if (getfrag(from, skb_put(skb, copy), offset, copy, off, skb) < 0) { __skb_trim(skb, off); err = -EFAULT; goto error; } } else { int i = skb_shinfo(skb)->nr_frags; skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; struct page *page = sk->sk_sndmsg_page; int off = sk->sk_sndmsg_off; unsigned int left; if (page && (left = PAGE_SIZE - off) > 0) { if (copy >= left) copy = left; if (page != frag->page) { if (i == MAX_SKB_FRAGS) { err = -EMSGSIZE; goto error; } get_page(page); skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); frag = &skb_shinfo(skb)->frags[i]; } } else if(i < MAX_SKB_FRAGS) { if (copy > PAGE_SIZE) copy = PAGE_SIZE; page = alloc_pages(sk->sk_allocation, 0); if (page == NULL) { err = -ENOMEM; goto error; } sk->sk_sndmsg_page = page; sk->sk_sndmsg_off = 0; skb_fill_page_desc(skb, i, page, 0, 0); frag = &skb_shinfo(skb)->frags[i]; skb->truesize += PAGE_SIZE; atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); } else { err = -EMSGSIZE; goto error; } if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { err = -EFAULT; goto error; } sk->sk_sndmsg_off += copy; frag->size += copy; skb->len += copy; skb->data_len += copy; } offset += copy; length -= copy; } return 0; error: inet->cork.length -= length; IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); return err; }
static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt, struct in6_addr *daddr, struct in6_addr *saddr, struct fl_acc_args *args) { struct flow_rule *frule; struct rt6_info *nrt = NULL; struct pol_chain *pol; for (pol = rt6_pol_list; pol; pol = pol->next) { struct fib6_node *fn; struct rt6_info *sprt; fn = fib6_lookup(pol->rules, daddr, saddr); do { for (sprt = fn->leaf; sprt; sprt=sprt->u.next) { int res; frule = sprt->rt6i_flowr; #if RT6_DEBUG >= 2 if (frule == NULL) { printk(KERN_DEBUG "NULL flowr\n"); goto error; } #endif res = frule->ops->accept(rt, sprt, args, &nrt); switch (res) { case FLOWR_SELECT: goto found; case FLOWR_CLEAR: goto next_policy; case FLOWR_NODECISION: break; default: goto error; }; } fn = fn->parent; } while ((fn->fn_flags & RTN_TL_ROOT) == 0); next_policy: } error: dst_hold(&ip6_null_entry.u.dst); return &ip6_null_entry; found: if (nrt == NULL) goto error; nrt->rt6i_flags |= RTF_CACHE; dst_hold(&nrt->u.dst); err = rt6_ins(nrt, NULL); if (err) nrt->u.dst.error = err; return nrt; } #endif static int fib6_ifdown(struct rt6_info *rt, void *arg) { if (((void*)rt->rt6i_dev == arg || arg == NULL) && rt != &ip6_null_entry) { RT6_TRACE("deleted by ifdown %p\n", rt); return -1; } return 0; } void rt6_ifdown(struct net_device *dev) { write_lock_bh(&rt6_lock); fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev); write_unlock_bh(&rt6_lock); }
static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, struct inet6_cork *v6_cork, int hlimit, int tclass, struct ipv6_txoptions *opt, struct rt6_info *rt, struct flowi6 *fl6) { struct ipv6_pinfo *np = inet6_sk(sk); unsigned int mtu; /* * setup for corking */ if (opt) { if (WARN_ON(v6_cork->opt)) return -EINVAL; v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation); if (unlikely(!v6_cork->opt)) return -ENOBUFS; v6_cork->opt->tot_len = opt->tot_len; v6_cork->opt->opt_flen = opt->opt_flen; v6_cork->opt->opt_nflen = opt->opt_nflen; v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation); if (opt->dst0opt && !v6_cork->opt->dst0opt) return -ENOBUFS; v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation); if (opt->dst1opt && !v6_cork->opt->dst1opt) return -ENOBUFS; v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation); if (opt->hopopt && !v6_cork->opt->hopopt) return -ENOBUFS; v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation); if (opt->srcrt && !v6_cork->opt->srcrt) return -ENOBUFS; /* need source address above miyazawa*/ } dst_hold(&rt->dst); cork->base.dst = &rt->dst; cork->fl.u.ip6 = *fl6; v6_cork->hop_limit = hlimit; v6_cork->tclass = tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? rt->dst.dev->mtu : dst_mtu(&rt->dst); else mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? rt->dst.dev->mtu : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; } cork->base.fragsize = mtu; if (dst_allfrag(rt->dst.path)) cork->base.flags |= IPCORK_ALLFRAG; cork->base.length = 0; return 0; }
int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; unsigned int mtu, hlen, left, len; int hroom, troom; __be32 frag_id; int ptr, offset = 0, err = 0; u8 *prevhdr, nexthdr = 0; hlen = ip6_find_1stfragopt(skb, &prevhdr); nexthdr = *prevhdr; mtu = ip6_skb_dst_mtu(skb); /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ if (unlikely(!skb->ignore_df && skb->len > mtu)) goto fail_toobig; if (IP6CB(skb)->frag_max_size) { if (IP6CB(skb)->frag_max_size > mtu) goto fail_toobig; /* don't send fragments larger than what we received */ mtu = IP6CB(skb)->frag_max_size; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; } if (np && np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; } if (mtu < hlen + sizeof(struct frag_hdr) + 8) goto fail_toobig; mtu -= hlen + sizeof(struct frag_hdr); frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr); if (skb->ip_summed == CHECKSUM_PARTIAL && (err = skb_checksum_help(skb))) goto fail; hroom = LL_RESERVED_SPACE(rt->dst.dev); if (skb_has_frag_list(skb)) { int first_len = skb_pagelen(skb); struct sk_buff *frag2; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || skb_cloned(skb) || skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) goto slow_path; skb_walk_frags(skb, frag) { /* Correct geometry. */ if (frag->len > mtu || ((frag->len & 7) && frag->next) || skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) goto slow_path_clean; /* Partially cloned skb? */ if (skb_shared(frag)) goto slow_path_clean; BUG_ON(frag->sk); if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; } skb->truesize -= frag->truesize; } err = 0; offset = 0; /* BUILD HEADER */ *prevhdr = NEXTHDR_FRAGMENT; tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); if (!tmp_hdr) { IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; } frag = skb_shinfo(skb)->frag_list; skb_frag_list_init(skb); __skb_pull(skb, hlen); fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr)); __skb_push(skb, hlen); skb_reset_network_header(skb); memcpy(skb_network_header(skb), tmp_hdr, hlen); fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(IP6_MF); fh->identification = frag_id; first_len = skb_pagelen(skb); skb->data_len = first_len - skb_headlen(skb); skb->len = first_len; ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); dst_hold(&rt->dst); for (;;) { /* Prepare header of the next frame, * before previous one went down. */ if (frag) { frag->ip_summed = CHECKSUM_NONE; skb_reset_transport_header(frag); fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr)); __skb_push(frag, hlen); skb_reset_network_header(frag); memcpy(skb_network_header(frag), tmp_hdr, hlen); offset += skb->len - hlen - sizeof(struct frag_hdr); fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(offset); if (frag->next) fh->frag_off |= htons(IP6_MF); fh->identification = frag_id; ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); ip6_copy_metadata(frag, skb); } err = output(net, sk, skb); if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGCREATES); if (err || !frag) break; skb = frag; frag = skb->next; skb->next = NULL; } kfree(tmp_hdr); if (err == 0) { IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); ip6_rt_put(rt); return 0; } kfree_skb_list(frag); IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); ip6_rt_put(rt); return err; slow_path_clean: skb_walk_frags(skb, frag2) { if (frag2 == frag) break; frag2->sk = NULL; frag2->destructor = NULL; skb->truesize += frag2->truesize; } } slow_path: left = skb->len - hlen; /* Space per frame */ ptr = hlen; /* Where to start from */ /* * Fragment the datagram. */ *prevhdr = NEXTHDR_FRAGMENT; troom = rt->dst.dev->needed_tailroom; /* * Keep copying data until we run out. */ while (left > 0) { len = left; /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > mtu) len = mtu; /* IF: we are not sending up to and including the packet end then align the next start on an eight byte boundary */ if (len < left) { len &= ~7; } /* Allocate buffer */ frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + hroom + troom, GFP_ATOMIC); if (!frag) { IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; } /* * Set up data on packet */ ip6_copy_metadata(frag, skb); skb_reserve(frag, hroom); skb_put(frag, len + hlen + sizeof(struct frag_hdr)); skb_reset_network_header(frag); fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); frag->transport_header = (frag->network_header + hlen + sizeof(struct frag_hdr)); /* * Charge the memory for the fragment to any owner * it might possess */ if (skb->sk) skb_set_owner_w(frag, skb->sk); /* * Copy the packet header into the new buffer. */ skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); /* * Build fragment header. */ fh->nexthdr = nexthdr; fh->reserved = 0; fh->identification = frag_id; /* * Copy a block of the IP datagram. */ BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag), len)); left -= len; fh->frag_off = htons(offset); if (left > 0) fh->frag_off |= htons(IP6_MF); ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); ptr += len; offset += len; /* * Put this fragment into the sending queue. */ err = output(net, sk, frag); if (err) goto fail; IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGCREATES); } IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGOKS); consume_skb(skb); return err; fail_toobig: if (skb->sk && dst_allfrag(skb_dst(skb))) sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); skb->dev = skb_dst(skb)->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); err = -EMSGSIZE; fail: IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return err; }
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; unsigned int mtu, hlen, left, len; __be32 frag_id = 0; int ptr, offset = 0, err=0; u8 *prevhdr, nexthdr = 0; struct net *net = dev_net(skb_dst(skb)->dev); hlen = ip6_find_1stfragopt(skb, &prevhdr); nexthdr = *prevhdr; mtu = ip6_skb_dst_mtu(skb); /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ if (!skb->local_df && skb->len > mtu) { skb->dev = skb_dst(skb)->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } if (np && np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; } mtu -= hlen + sizeof(struct frag_hdr); if (skb_has_frag_list(skb)) { int first_len = skb_pagelen(skb); struct sk_buff *frag2; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || skb_cloned(skb)) goto slow_path; skb_walk_frags(skb, frag) { /* Correct geometry. */ if (frag->len > mtu || ((frag->len & 7) && frag->next) || skb_headroom(frag) < hlen) goto slow_path_clean; /* Partially cloned skb? */ if (skb_shared(frag)) goto slow_path_clean; BUG_ON(frag->sk); if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; } skb->truesize -= frag->truesize; } err = 0; offset = 0; frag = skb_shinfo(skb)->frag_list; skb_frag_list_init(skb); /* BUILD HEADER */ *prevhdr = NEXTHDR_FRAGMENT; tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); if (!tmp_hdr) { IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); return -ENOMEM; } __skb_pull(skb, hlen); fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); __skb_push(skb, hlen); skb_reset_network_header(skb); memcpy(skb_network_header(skb), tmp_hdr, hlen); ipv6_select_ident(fh, &rt->rt6i_dst.addr); fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(IP6_MF); frag_id = fh->identification; first_len = skb_pagelen(skb); skb->data_len = first_len - skb_headlen(skb); skb->len = first_len; ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); dst_hold(&rt->dst); for (;;) { /* Prepare header of the next frame, * before previous one went down. */ if (frag) { frag->ip_summed = CHECKSUM_NONE; skb_reset_transport_header(frag); fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); __skb_push(frag, hlen); skb_reset_network_header(frag); memcpy(skb_network_header(frag), tmp_hdr, hlen); offset += skb->len - hlen - sizeof(struct frag_hdr); fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(offset); if (frag->next != NULL) fh->frag_off |= htons(IP6_MF); fh->identification = frag_id; ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); ip6_copy_metadata(frag, skb); } err = output(skb); if(!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGCREATES); if (err || !frag) break; skb = frag; frag = skb->next; skb->next = NULL; } kfree(tmp_hdr); if (err == 0) { IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); dst_release(&rt->dst); return 0; } while (frag) { skb = frag->next; kfree_skb(frag); frag = skb; } IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); dst_release(&rt->dst); return err; slow_path_clean: skb_walk_frags(skb, frag2) { if (frag2 == frag) break; frag2->sk = NULL; frag2->destructor = NULL; skb->truesize += frag2->truesize; } } slow_path: left = skb->len - hlen; /* Space per frame */ ptr = hlen; /* Where to start from */ /* * Fragment the datagram. */ *prevhdr = NEXTHDR_FRAGMENT; /* * Keep copying data until we run out. */ while(left > 0) { len = left; /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > mtu) len = mtu; /* IF: we are not sending up to and including the packet end then align the next start on an eight byte boundary */ if (len < left) { len &= ~7; } /* * Allocate buffer. */ if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) { NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; } /* * Set up data on packet */ ip6_copy_metadata(frag, skb); skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev)); skb_put(frag, len + hlen + sizeof(struct frag_hdr)); skb_reset_network_header(frag); fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); frag->transport_header = (frag->network_header + hlen + sizeof(struct frag_hdr)); /* * Charge the memory for the fragment to any owner * it might possess */ if (skb->sk) skb_set_owner_w(frag, skb->sk); /* * Copy the packet header into the new buffer. */ skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); /* * Build fragment header. */ fh->nexthdr = nexthdr; fh->reserved = 0; if (!frag_id) { ipv6_select_ident(fh, &rt->rt6i_dst.addr); frag_id = fh->identification; } else fh->identification = frag_id; /* * Copy a block of the IP datagram. */ if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) BUG(); left -= len; fh->frag_off = htons(offset); if (left > 0) fh->frag_off |= htons(IP6_MF); ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); ptr += len; offset += len; /* * Put this fragment into the sending queue. */ err = output(frag); if (err) goto fail; IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGCREATES); } IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGOKS); kfree_skb(skb); return err; fail: IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return err; }
static int __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, struct flowi *fl, struct dst_entry **dst_p) { struct dst_entry *dst, *dst_prev; struct rt6_info *rt0 = (struct rt6_info*)(*dst_p); struct rt6_info *rt = rt0; struct in6_addr *remote = &fl->fl6_dst; struct in6_addr *local = &fl->fl6_src; int i; int err = 0; int header_len = 0; int trailer_len = 0; dst = dst_prev = NULL; for (i = 0; i < nx; i++) { struct dst_entry *dst1 = dst_alloc(&xfrm6_dst_ops); if (unlikely(dst1 == NULL)) { err = -ENOBUFS; goto error; } if (!dst) dst = dst1; else { dst_prev->child = dst1; dst1->flags |= DST_NOHASH; dst_clone(dst1); } dst_prev = dst1; if (xfrm[i]->props.mode) { remote = (struct in6_addr*)&xfrm[i]->id.daddr; local = (struct in6_addr*)&xfrm[i]->props.saddr; } header_len += xfrm[i]->props.header_len; trailer_len += xfrm[i]->props.trailer_len; } if (ipv6_addr_cmp(remote, &fl->fl6_dst)) { struct flowi fl_tunnel; memset(&fl_tunnel, 0, sizeof(fl_tunnel)); ipv6_addr_copy(&fl_tunnel.fl6_dst, remote); ipv6_addr_copy(&fl_tunnel.fl6_src, local); err = xfrm_dst_lookup((struct xfrm_dst **) &rt, &fl_tunnel, AF_INET6); if (err) goto error; } else { dst_hold(&rt->u.dst); } dst_prev->child = &rt->u.dst; i = 0; for (dst_prev = dst; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; dst_prev->xfrm = xfrm[i++]; dst_prev->dev = rt->u.dst.dev; if (rt->u.dst.dev) dev_hold(rt->u.dst.dev); dst_prev->obsolete = -1; dst_prev->flags |= DST_HOST; dst_prev->lastuse = jiffies; dst_prev->header_len = header_len; dst_prev->trailer_len = trailer_len; memcpy(&dst_prev->metrics, &rt->u.dst.metrics, sizeof(dst_prev->metrics)); dst_prev->path = &rt->u.dst; /* Copy neighbour for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; dst_prev->output = dst_prev->xfrm->type->output; /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ x->u.rt6.rt6i_flags = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL); x->u.rt6.rt6i_metric = rt0->rt6i_metric; x->u.rt6.rt6i_node = rt0->rt6i_node; x->u.rt6.rt6i_gateway = rt0->rt6i_gateway; memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway)); x->u.rt6.rt6i_dst = rt0->rt6i_dst; x->u.rt6.rt6i_src = rt0->rt6i_src; header_len -= x->u.dst.xfrm->props.header_len; trailer_len -= x->u.dst.xfrm->props.trailer_len; } *dst_p = dst; return 0; error: if (dst) dst_free(dst); return err; }