static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) { struct rtable *rt; int flag = 0; /*unsigned long now; */ struct net *net = dev_net(dev); rt = ip_route_output(net, sip, tip, 0, 0); if (IS_ERR(rt)) return 1; if (rt->dst.dev != dev) { NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); flag = 1; } ip_rt_put(rt); return flag; }
int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall, struct net *net, struct sk_buff *skb, u8 ipproto, __be16 tp_src, __be16 tp_dst) { struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info; struct ip_tunnel_info *tun_info = skb_tunnel_info(skb); const struct ip_tunnel_key *tun_key; u32 skb_mark = skb->mark; struct rtable *rt; struct flowi4 fl; if (unlikely(!tun_info)) return -EINVAL; if (ip_tunnel_info_af(tun_info) != AF_INET) return -EINVAL; tun_key = &tun_info->key; /* Route lookup to get srouce IP address. * The process may need to be changed if the corresponding process * in vports ops changed. */ rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto); if (IS_ERR(rt)) return PTR_ERR(rt); ip_rt_put(rt); /* Generate egress_tun_info based on tun_info, * saddr, tp_src and tp_dst */ ip_tunnel_key_init(&egress_tun_info->key, fl.saddr, tun_key->u.ipv4.dst, tun_key->tos, tun_key->ttl, tp_src, tp_dst, tun_key->tun_id, tun_key->tun_flags); egress_tun_info->options_len = tun_info->options_len; egress_tun_info->mode = tun_info->mode; upcall->egress_tun_opts = ip_tunnel_info_opts(tun_info); return 0; }
static int ip_tunnel_bind_dev(struct net_device *dev) { struct net_device *tdev = NULL; struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *iph; int hlen = LL_MAX_HEADER; int mtu = ETH_DATA_LEN; int t_hlen = tunnel->hlen + sizeof(struct iphdr); iph = &tunnel->parms.iph; /* Guess output device to choose reasonable mtu and needed_headroom */ if (iph->daddr) { struct flowi4 fl4; struct rtable *rt; init_tunnel_flow(&fl4, iph->protocol, iph->daddr, iph->saddr, tunnel->parms.o_key, RT_TOS(iph->tos), tunnel->parms.link); rt = ip_route_output_key(tunnel->net, &fl4); if (!IS_ERR(rt)) { tdev = rt->dst.dev; tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); ip_rt_put(rt); } if (dev->type != ARPHRD_ETHER) dev->flags |= IFF_POINTOPOINT; } if (!tdev && tunnel->parms.link) tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); if (tdev) { hlen = tdev->hard_header_len + tdev->needed_headroom; mtu = tdev->mtu; } dev->needed_headroom = t_hlen + hlen; mtu -= (dev->hard_header_len + t_hlen); if (mtu < 68) mtu = 68; return mtu; }
/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) { static int warned = 0; struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; struct rtable *rt; if (ip_route_output_key(&rt, &fl) != 0) return; if (rt->rt_src != srcip && !warned) { printk("NAT: no longer support implicit source local NAT\n"); printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n", NIPQUAD(srcip), NIPQUAD(dstip)); warned = 1; } ip_rt_put(rt); }
static int arp_filter(__u32 sip, __u32 tip, struct net_device *dev) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip, .saddr = tip } } }; struct rtable *rt; int flag = 0; /*unsigned long now; */ if (ip_route_output_key(&rt, &fl) < 0) return 1; if (rt->u.dst.dev != dev) { NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); flag = 1; } ip_rt_put(rt); return flag; }
static int ipip6_tunnel_init(struct net_device *dev) { struct net_device *tdev = NULL; struct ip_tunnel *tunnel; struct iphdr *iph; tunnel = netdev_priv(dev); iph = &tunnel->parms.iph; tunnel->dev = dev; strcpy(tunnel->parms.name, dev->name); memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); if (iph->daddr) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = iph->daddr, .saddr = iph->saddr, .tos = RT_TOS(iph->tos) } }, .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; struct rtable *rt; if (!ip_route_output_key(&rt, &fl)) { tdev = rt->u.dst.dev; ip_rt_put(rt); } dev->flags |= IFF_POINTOPOINT; } if (!tdev && tunnel->parms.link) tdev = __dev_get_by_index(tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); dev->mtu = tdev->mtu - sizeof(struct iphdr); if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; } dev->iflink = tunnel->parms.link; return 0; }
static int clip_setentry(struct atm_vcc *vcc, __be32 ip) { struct neighbour *neigh; struct atmarp_entry *entry; int error; struct clip_vcc *clip_vcc; struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = 1}} }; struct rtable *rt; if (vcc->push != clip_push) { printk(KERN_WARNING "clip_setentry: non-CLIP VCC\n"); return -EBADF; } clip_vcc = CLIP_VCC(vcc); if (!ip) { if (!clip_vcc->entry) { printk(KERN_ERR "hiding hidden ATMARP entry\n"); return 0; } DPRINTK("setentry: remove\n"); unlink_clip_vcc(clip_vcc); return 0; } error = ip_route_output_key(&rt, &fl); if (error) return error; neigh = __neigh_lookup(&clip_tbl, &ip, rt->u.dst.dev, 1); ip_rt_put(rt); if (!neigh) return -ENOMEM; entry = NEIGH2ENTRY(neigh); if (entry != clip_vcc->entry) { if (!clip_vcc->entry) DPRINTK("setentry: add\n"); else { DPRINTK("setentry: update\n"); unlink_clip_vcc(clip_vcc); } link_vcc(clip_vcc, entry); } error = neigh_update(neigh, llc_oui, NUD_PERMANENT, NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN); neigh_release(neigh); return error; }
int arp_req_delete(struct arpreq *r, struct net_device * dev) { int err; u32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; struct neighbour *neigh; if (r->arp_flags & ATF_PUBL) { u32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr; if (mask == 0xFFFFFFFF) return pneigh_delete(&arp_tbl, &ip, dev); if (mask == 0) { if (dev == NULL) { ipv4_devconf.proxy_arp = 0; return 0; } if (__in_dev_get(dev)) { __in_dev_get(dev)->cnf.proxy_arp = 0; return 0; } return -ENXIO; } return -EINVAL; } if (dev == NULL) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = RTO_ONLINK } } }; struct rtable * rt; if ((err = ip_route_output_key(&rt, &fl)) != 0) return err; dev = rt->u.dst.dev; ip_rt_put(rt); if (!dev) return -EINVAL; } err = -ENXIO; neigh = neigh_lookup(&arp_tbl, &ip, dev); if (neigh) { if (neigh->nud_state&~NUD_NOARP) err = neigh_update(neigh, NULL, NUD_FAILED, 1, 0); neigh_release(neigh); } return err; }
static int clip_setentry(struct atm_vcc *vcc, __be32 ip) { struct neighbour *neigh; struct atmarp_entry *entry; int error; struct clip_vcc *clip_vcc; struct rtable *rt; if (vcc->push != clip_push) { pr_warning("non-CLIP VCC\n"); return -EBADF; } clip_vcc = CLIP_VCC(vcc); if (!ip) { if (!clip_vcc->entry) { pr_err("hiding hidden ATMARP entry\n"); return 0; } pr_debug("remove\n"); unlink_clip_vcc(clip_vcc); return 0; } rt = ip_route_output(&init_net, ip, 0, 1, 0); if (IS_ERR(rt)) return PTR_ERR(rt); neigh = __neigh_lookup(&arp_tbl, &ip, rt->dst.dev, 1); ip_rt_put(rt); if (!neigh) return -ENOMEM; entry = neighbour_priv(neigh); if (entry != clip_vcc->entry) { if (!clip_vcc->entry) pr_debug("add\n"); else { pr_debug("update\n"); unlink_clip_vcc(clip_vcc); } link_vcc(clip_vcc, entry); } error = neigh_update(neigh, llc_oui, NUD_PERMANENT, NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN); neigh_release(neigh); return error; }
int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info, struct net *net, const struct ovs_tunnel_info *tun_info, u8 ipproto, u32 skb_mark, __be16 tp_src, __be16 tp_dst) { const struct ovs_key_ipv4_tunnel *tun_key; struct rtable *rt; struct flowi4 fl; if (unlikely(!tun_info)) return -EINVAL; tun_key = &tun_info->tunnel; /* Route lookup to get srouce IP address. * The process may need to be changed if the corresponding process * in vports ops changed. */ rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto); if (IS_ERR(rt)) return PTR_ERR(rt); ip_rt_put(rt); /* Generate egress_tun_info based on tun_info, * saddr, tp_src and tp_dst */ __ovs_flow_tun_info_init(egress_tun_info, fl.saddr, tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_ttl, tp_src, tp_dst, tun_key->tun_id, tun_key->tun_flags, tun_info->options, tun_info->options_len); return 0; }
static int arp_req_delete(struct net *net, struct arpreq *r, struct net_device *dev) { __be32 ip; if (r->arp_flags & ATF_PUBL) return arp_req_delete_public(net, r, dev); ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; if (dev == NULL) { struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0); if (IS_ERR(rt)) return PTR_ERR(rt); dev = rt->dst.dev; ip_rt_put(rt); if (!dev) return -EINVAL; } return arp_invalidate(dev, ip); }
/* This will route a SYN-ACK, i.e., the response to a request to open a new connection. */ struct dst_entry *serval_ipv4_req_route(struct sock *sk, struct request_sock *rsk, int protocol, u32 saddr, u32 daddr) { struct rtable *rt; struct ip_options *opt = NULL; /* inet_rsk(req)->opt; */ struct flowi fl; serval_flow_init_output(&fl, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), 0, protocol, #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28)) inet_sk_flowi_flags(sk), #else 0, #endif daddr, saddr, 0, 0); serval_security_req_classify_flow(rsk, &fl); rt = serval_ip_route_output_flow(sock_net(sk), &fl, sk, 0); if (!rt) goto no_route; if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto route_err; return route_dst(rt); route_err: ip_rt_put(rt); no_route: #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25)) IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); #else IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES); #endif return NULL; }
static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) { struct rtable *rt; struct device *dev = NULL; if (imr->imr_address.s_addr) { dev = ip_dev_find(imr->imr_address.s_addr); if (!dev) return NULL; } if (!dev && !ip_route_output(&rt, imr->imr_multiaddr.s_addr, 0, 0, 0)) { dev = rt->u.dst.dev; ip_rt_put(rt); } if (dev) { imr->imr_ifindex = dev->ifindex; return dev->ip_ptr; } return NULL; }
static int ipgre_open(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); if (ipv4_is_multicast(t->parms.iph.daddr)) { struct flowi fl = { .oif = t->parms.link, .nl_u = { .ip4_u = { .daddr = t->parms.iph.daddr, .saddr = t->parms.iph.saddr, .tos = RT_TOS(t->parms.iph.tos) } }, .proto = IPPROTO_GRE }; struct rtable *rt; if (ip_route_output_key(dev_net(dev), &rt, &fl)) return -EADDRNOTAVAIL; dev = rt->u.dst.dev; ip_rt_put(rt); if (__in_dev_get_rtnl(dev) == NULL) return -EADDRNOTAVAIL; t->mlink = dev->ifindex; ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); }
static int vrf_send_v4_prep(struct sk_buff *skb, struct flowi4 *fl4, struct net_device *vrf_dev) { struct rtable *rt; int err = 1; rt = ip_route_output_flow(dev_net(vrf_dev), fl4, NULL); if (IS_ERR(rt)) goto out; /* TO-DO: what about broadcast ? */ if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { ip_rt_put(rt); goto out; } skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); err = 0; out: return err; }
static int arp_req_delete(struct net *net, struct arpreq *r, struct net_device * dev) { int err; __be32 ip; struct neighbour *neigh; if (r->arp_flags & ATF_PUBL) return arp_req_delete_public(net, r, dev); ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; if (dev == NULL) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = RTO_ONLINK } } }; struct rtable * rt; if ((err = ip_route_output_key(net, &rt, &fl)) != 0) return err; dev = rt->u.dst.dev; ip_rt_put(rt); if (!dev) return -EINVAL; } err = -ENXIO; neigh = neigh_lookup(&arp_tbl, &ip, dev); if (neigh) { if (neigh->nud_state&~NUD_NOARP) err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE| NEIGH_UPDATE_F_ADMIN); neigh_release(neigh); } return err; }
/* * ICMP packet transmitter * called by the ip_vs_in_icmp */ int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset) { struct rtable *rt; /* Route to the other host */ int mtu; int rc; int local; EnterFunction(10); /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be forwarded directly here, because there is no need to translate address/port back */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { if (cp->packet_xmit) rc = cp->packet_xmit(skb, cp, pp); else rc = NF_ACCEPT; /* do not touch skb anymore */ atomic_inc_unchecked(&cp->in_pkts); goto out; } /* * mangle and send the packet here (only for VS/NAT) */ if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(ip_hdr(skb)->tos), IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR))) goto tx_error_icmp; local = rt->rt_flags & RTCF_LOCAL; /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI4\n", __func__, &cp->daddr.ip); goto tx_error_put; } } #endif /* From world but DNAT to loopback address? */ if (local && ipv4_is_loopback(rt->rt_dst) && rt_is_input_route(skb_rtable(skb))) { IP_VS_DBG(1, "%s(): " "stopping DNAT to loopback %pI4\n", __func__, &cp->daddr.ip); goto tx_error_put; } /* MTU checking */ mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) && !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, offset)) goto tx_error_put; if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; ip_vs_nat_icmp(skb, pp, cp, 0); if (!local) { /* drop the old route when skb is not shared */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); } else { ip_rt_put(rt); /* * Some IPv4 replies get local address from routes, * not from iph, so while we DNAT after routing * we need this second input/output route. */ if (!__ip_vs_reroute_locally(skb)) goto tx_error; } /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); rc = NF_STOLEN; goto out; tx_error_icmp: dst_link_failure(skb); tx_error: dev_kfree_skb(skb); rc = NF_STOLEN; out: LeaveFunction(10); return rc; tx_error_put: ip_rt_put(rt); goto tx_error; }
int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); __be16 orig_sport, orig_dport; __be32 daddr, nexthop; struct flowi4 *fl4; struct rtable *rt; int err; struct ip_options_rcu *inet_opt; dp->dccps_role = DCCP_ROLE_CLIENT; if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; if (usin->sin_family != AF_INET) return -EAFNOSUPPORT; nexthop = daddr = usin->sin_addr.s_addr; inet_opt = rcu_dereference_protected(inet->inet_opt, sock_owned_by_user(sk)); if (inet_opt != NULL && inet_opt->opt.srr) { if (daddr == 0) return -EINVAL; nexthop = inet_opt->opt.faddr; } orig_sport = inet->inet_sport; orig_dport = usin->sin_port; fl4 = &inet->cork.fl.u.ip4; rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_DCCP, orig_sport, orig_dport, sk); if (IS_ERR(rt)) return PTR_ERR(rt); if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { ip_rt_put(rt); return -ENETUNREACH; } if (inet_opt == NULL || !inet_opt->opt.srr) daddr = fl4->daddr; if (inet->inet_saddr == 0) inet->inet_saddr = fl4->saddr; sk_rcv_saddr_set(sk, inet->inet_saddr); inet->inet_dport = usin->sin_port; sk_daddr_set(sk, daddr); inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; /* * Socket identity is still unknown (sport may be zero). * However we set state to DCCP_REQUESTING and not releasing socket * lock select source port, enter ourselves into the hash tables and * complete initialization after this. */ dccp_set_state(sk, DCCP_REQUESTING); err = inet_hash_connect(&dccp_death_row, sk); if (err != 0) goto failure; rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, inet->inet_sport, inet->inet_dport, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; goto failure; } /* OK, now commit destination to socket. */ sk_setup_caps(sk, &rt->dst); dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr, inet->inet_daddr, inet->inet_sport, inet->inet_dport); inet->inet_id = dp->dccps_iss ^ jiffies; err = dccp_connect(sk); rt = NULL; if (err != 0) goto failure; out: return err; failure: /* * This unhashes the socket and releases the local port, if necessary. */ dccp_set_state(sk, DCCP_CLOSED); ip_rt_put(rt); sk->sk_route_caps = 0; inet->inet_dport = 0; goto out; }
static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; struct net_device_stats *stats = &tunnel->stat; struct iphdr *tiph = &tunnel->parms.iph; struct ipv6hdr *iph6 = skb->nh.ipv6h; u8 tos = tunnel->parms.iph.tos; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ struct iphdr *iph; /* Our new IP header */ int max_headroom; /* The extra header space needed */ u32 dst = tiph->daddr; int mtu; struct in6_addr *addr6; int addr_type; if (tunnel->recursion++) { tunnel->stat.collisions++; goto tx_error; } if (skb->protocol != htons(ETH_P_IPV6)) goto tx_error; if (!dst) dst = try_6to4(&iph6->daddr); if (!dst) { struct neighbour *neigh = NULL; if (skb->dst) neigh = skb->dst->neighbour; if (neigh == NULL) { if (net_ratelimit()) printk(KERN_DEBUG "sit: nexthop == NULL\n"); goto tx_error; } addr6 = (struct in6_addr*)&neigh->primary_key; addr_type = ipv6_addr_type(addr6); if (addr_type == IPV6_ADDR_ANY) { addr6 = &skb->nh.ipv6h->daddr; addr_type = ipv6_addr_type(addr6); } if ((addr_type & IPV6_ADDR_COMPATv4) == 0) goto tx_error_icmp; dst = addr6->s6_addr32[3]; } { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst, .saddr = tiph->saddr, .tos = RT_TOS(tos) } }, .oif = tunnel->parms.link }; if (ip_route_output_key(&rt, &fl)) { tunnel->stat.tx_carrier_errors++; goto tx_error_icmp; } } if (rt->rt_type != RTN_UNICAST) { tunnel->stat.tx_carrier_errors++; goto tx_error_icmp; } tdev = rt->u.dst.dev; if (tdev == dev) { ip_rt_put(rt); tunnel->stat.collisions++; goto tx_error; } if (tiph->frag_off) mtu = dst_pmtu(&rt->u.dst) - sizeof(struct iphdr); else mtu = skb->dst ? dst_pmtu(skb->dst) : dev->mtu; if (mtu < 68) { tunnel->stat.collisions++; ip_rt_put(rt); goto tx_error; } if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; if (tunnel->parms.iph.daddr && skb->dst) skb->dst->ops->update_pmtu(skb->dst, mtu); if (skb->len > mtu) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); ip_rt_put(rt); goto tx_error; } if (tunnel->err_count > 0) { if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { tunnel->err_count--; dst_link_failure(skb); } else tunnel->err_count = 0; } skb->h.raw = skb->nh.raw; /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr); if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); stats->tx_dropped++; dev_kfree_skb(skb); tunnel->recursion--; return 0; } if (skb->sk) skb_set_owner_w(new_skb, skb->sk); dev_kfree_skb(skb); skb = new_skb; iph6 = skb->nh.ipv6h; } skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); dst_release(skb->dst); skb->dst = &rt->u.dst; /* * Push down and install the IPIP header. */ iph = skb->nh.iph; iph->version = 4; iph->ihl = sizeof(struct iphdr)>>2; if (mtu > IPV6_MIN_MTU) iph->frag_off = htons(IP_DF); else iph->frag_off = 0; iph->protocol = IPPROTO_IPV6; iph->tos = INET_ECN_encapsulate(tos, ip6_get_dsfield(iph6)); iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; if ((iph->ttl = tiph->ttl) == 0) iph->ttl = iph6->hop_limit; #ifdef CONFIG_NETFILTER nf_conntrack_put(skb->nfct); skb->nfct = NULL; #ifdef CONFIG_NETFILTER_DEBUG skb->nf_debug = 0; #endif #endif IPTUNNEL_XMIT(); tunnel->recursion--; return 0; tx_error_icmp: dst_link_failure(skb); tx_error: stats->tx_errors++; dev_kfree_skb(skb); tunnel->recursion--; return 0; }
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol) { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *inner_iph; struct flowi4 fl4; u8 tos, ttl; __be16 df; struct rtable *rt; /* Route to the other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; int err; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); dst = tnl_params->daddr; if (dst == 0) { /* NBMA tunnel */ if (skb_dst(skb) == NULL) { dev->stats.tx_fifo_errors++; goto tx_error; } if (skb->protocol == htons(ETH_P_IP)) { rt = skb_rtable(skb); dst = rt_nexthop(rt, inner_iph->daddr); } #if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) { const struct in6_addr *addr6; struct neighbour *neigh; bool do_tx_error_icmp; int addr_type; neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr); if (neigh == NULL) goto tx_error; addr6 = (const struct in6_addr *)&neigh->primary_key; addr_type = ipv6_addr_type(addr6); if (addr_type == IPV6_ADDR_ANY) { addr6 = &ipv6_hdr(skb)->daddr; addr_type = ipv6_addr_type(addr6); } if ((addr_type & IPV6_ADDR_COMPATv4) == 0) do_tx_error_icmp = true; else { do_tx_error_icmp = false; dst = addr6->s6_addr32[3]; } neigh_release(neigh); if (do_tx_error_icmp) goto tx_error_icmp; } #endif else goto tx_error; } tos = tnl_params->tos; if (tos & 0x1) { tos &= ~0x1; if (skb->protocol == htons(ETH_P_IP)) tos = inner_iph->tos; else if (skb->protocol == htons(ETH_P_IPV6)) tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); } rt = ip_route_output_tunnel(tunnel->net, &fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); if (IS_ERR(rt)) { dev->stats.tx_carrier_errors++; goto tx_error; } if (rt->dst.dev == dev) { ip_rt_put(rt); dev->stats.collisions++; goto tx_error; } if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { ip_rt_put(rt); goto tx_error; } if (tunnel->net != dev_net(dev)) skb_scrub_packet(skb); if (tunnel->err_count > 0) { if (time_before(jiffies, tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { tunnel->err_count--; dst_link_failure(skb); } else tunnel->err_count = 0; } tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); ttl = tnl_params->ttl; if (ttl == 0) { if (skb->protocol == htons(ETH_P_IP)) ttl = inner_iph->ttl; #if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; #endif else ttl = ip4_dst_hoplimit(&rt->dst); } df = tnl_params->frag_off; if (skb->protocol == htons(ETH_P_IP)) df |= (inner_iph->frag_off&htons(IP_DF)); max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len; if (max_headroom > dev->needed_headroom) dev->needed_headroom = max_headroom; if (skb_cow_head(skb, dev->needed_headroom)) { dev->stats.tx_dropped++; dev_kfree_skb(skb); return; } err = iptunnel_xmit(dev_net(dev), rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return; #if IS_ENABLED(CONFIG_IPV6) tx_error_icmp: dst_link_failure(skb); #endif tx_error: dev->stats.tx_errors++; dev_kfree_skb(skb); }
static int ipip_tunnel_xmit(struct sk_buff *skb, struct device *dev) { struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; struct net_device_stats *stats = &tunnel->stat; struct iphdr *tiph = &tunnel->parms.iph; u8 tos = tunnel->parms.iph.tos; u16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct device *tdev; /* Device to other host */ struct iphdr *old_iph = skb->nh.iph; struct iphdr *iph; /* Our new IP header */ int max_headroom; /* The extra header space needed */ u32 dst = tiph->daddr; int mtu; if (tunnel->recursion++) { tunnel->stat.collisions++; goto tx_error; } if (skb->protocol != __constant_htons(ETH_P_IP)) goto tx_error; if (tos&1) tos = old_iph->tos; if (!dst) { /* NBMA tunnel */ if ((rt = (struct rtable*)skb->dst) == NULL) { tunnel->stat.tx_fifo_errors++; goto tx_error; } if ((dst = rt->rt_gateway) == 0) goto tx_error_icmp; } if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) { tunnel->stat.tx_carrier_errors++; goto tx_error_icmp; } tdev = rt->u.dst.dev; if (tdev == dev) { ip_rt_put(rt); tunnel->stat.collisions++; goto tx_error; } mtu = rt->u.dst.pmtu - sizeof(struct iphdr); if (mtu < 68) { tunnel->stat.collisions++; ip_rt_put(rt); goto tx_error; } if (skb->dst && mtu < skb->dst->pmtu) skb->dst->pmtu = mtu; df |= (old_iph->frag_off&__constant_htons(IP_DF)); if ((old_iph->frag_off&__constant_htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); goto tx_error; } if (tunnel->err_count > 0) { if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { tunnel->err_count--; dst_link_failure(skb); } else tunnel->err_count = 0; } skb->h.raw = skb->nh.raw; /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr)); if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); stats->tx_dropped++; dev_kfree_skb(skb); tunnel->recursion--; return 0; } if (skb->sk) skb_set_owner_w(new_skb, skb->sk); dev_kfree_skb(skb); skb = new_skb; } skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); dst_release(skb->dst); skb->dst = &rt->u.dst; /* * Push down and install the IPIP header. */ iph = skb->nh.iph; iph->version = 4; iph->ihl = sizeof(struct iphdr)>>2; iph->frag_off = df; iph->protocol = IPPROTO_IPIP; iph->tos = tos; iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; if ((iph->ttl = tiph->ttl) == 0) iph->ttl = old_iph->ttl; iph->tot_len = htons(skb->len); iph->id = htons(ip_id_count++); ip_send_check(iph); stats->tx_bytes += skb->len; stats->tx_packets++; ip_send(skb); tunnel->recursion--; return 0; tx_error_icmp: dst_link_failure(skb); tx_error: stats->tx_errors++; dev_kfree_skb(skb); tunnel->recursion--; return 0; }
/* * IP Tunneling transmitter * * This function encapsulates the packet in a new IP packet, its * destination will be set to cp->daddr. Most code of this function * is taken from ipip.c. * * It is used in VS/TUN cluster. The load balancer selects a real * server from a cluster based on a scheduling algorithm, * encapsulates the request packet and forwards it to the selected * server. For example, all real servers are configured with * "ifconfig tunl0 <Virtual IP Address> up". When the server receives * the encapsulated packet, it will decapsulate the packet, processe * the request and return the response packets directly to the client * without passing the load balancer. This can greatly increase the * scalability of virtual server. * * Used for ANY protocol */ int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ struct iphdr *old_iph = ip_hdr(skb); u8 tos = old_iph->tos; __be16 df = old_iph->frag_off; struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int mtu; int ret; EnterFunction(10); if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(tos), IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL))) goto tx_error_icmp; if (rt->rt_flags & RTCF_LOCAL) { ip_rt_put(rt); IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); } tdev = rt->dst.dev; mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (mtu < 68) { IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); goto tx_error_put; } if (skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); df |= (old_iph->frag_off & htons(IP_DF)); if ((old_iph->frag_off & htons(IP_DF) && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; } /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); kfree_skb(skb); IP_VS_ERR_RL("%s(): no memory\n", __func__); return NF_STOLEN; } kfree_skb(skb); skb = new_skb; old_iph = ip_hdr(skb); } skb->transport_header = skb->network_header; /* fix old IP header checksum */ ip_send_check(old_iph); skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); /* drop old route */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. */ iph = ip_hdr(skb); iph->version = 4; iph->ihl = sizeof(struct iphdr)>>2; iph->frag_off = df; iph->protocol = IPPROTO_IPIP; iph->tos = tos; iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; iph->ttl = old_iph->ttl; ip_select_ident(iph, &rt->dst, NULL); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; ret = IP_VS_XMIT_TUNNEL(skb, cp); if (ret == NF_ACCEPT) ip_local_out(skb); else if (ret == NF_DROP) kfree_skb(skb); LeaveFunction(10); return NF_STOLEN; tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); LeaveFunction(10); return NF_STOLEN; tx_error_put: ip_rt_put(rt); goto tx_error; }
static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) { struct sock *sk = (struct sock *) chan->private; struct pppox_sock *po = pppox_sk(sk); struct pptp_opt *opt=&po->proto.pptp; struct pptp_gre_header *hdr; unsigned int header_len=sizeof(*hdr); int len=skb?skb->len:0; int err=0; int window; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ struct iphdr *iph; /* Our new IP header */ int max_headroom; /* The extra header space needed */ INC_TX_PACKETS; spin_lock_bh(&opt->xmit_lock); window=WRAPPED(opt->ack_recv,opt->seq_sent)?(__u32)0xffffffff-opt->seq_sent+opt->ack_recv:opt->seq_sent-opt->ack_recv; if (!skb){ if (opt->ack_sent == opt->seq_recv) goto exit; }else if (window>opt->window){ __set_bit(PPTP_FLAG_PAUSE,(unsigned long*)&opt->flags); #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) mod_timer(&opt->ack_timeout_timer,opt->stat->rtt/100*HZ/10000); #else schedule_delayed_work(&opt->ack_timeout_work,opt->stat->rtt/100*HZ/10000); #endif goto exit; } #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) { struct rt_key key = { .dst=opt->dst_addr.sin_addr.s_addr, .src=opt->src_addr.sin_addr.s_addr, .tos=RT_TOS(0), }; if ((err=ip_route_output_key(&rt, &key))) { goto tx_error; } } #else { struct flowi fl = { .oif = 0, .nl_u = { .ip4_u = { .daddr = opt->dst_addr.sin_addr.s_addr, .saddr = opt->src_addr.sin_addr.s_addr, .tos = RT_TOS(0) } }, .proto = IPPROTO_GRE }; if ((err=ip_route_output_key(&rt, &fl))) { goto tx_error; } } #endif tdev = rt->u.dst.dev; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) max_headroom = ((tdev->hard_header_len+15)&~15) + sizeof(*iph)+sizeof(*hdr)+2; #else max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(*iph)+sizeof(*hdr)+2; #endif if (!skb){ skb=dev_alloc_skb(max_headroom); if (!skb) { ip_rt_put(rt); goto tx_error; } skb_reserve(skb,max_headroom-skb_headroom(skb)); }else if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); goto tx_error; } if (skb->sk) skb_set_owner_w(new_skb, skb->sk); kfree_skb(skb); skb = new_skb; } if (skb->len){ int islcp; unsigned char *data=skb->data; islcp=((data[0] << 8) + data[1])== PPP_LCP && 1 <= data[2] && data[2] <= 7; /* compress protocol field */ if ((opt->ppp_flags & SC_COMP_PROT) && data[0]==0 && !islcp) skb_pull(skb,1); /* * Put in the address/control bytes if necessary */ if ((opt->ppp_flags & SC_COMP_AC) == 0 || islcp) { data=skb_push(skb,2); data[0]=0xff; data[1]=0x03; } } len=skb->len; if (len==0) header_len-=sizeof(hdr->seq); if (opt->ack_sent == opt->seq_recv) header_len-=sizeof(hdr->ack); // Push down and install GRE header skb_push(skb,header_len); hdr=(struct pptp_gre_header *)(skb->data); hdr->flags = PPTP_GRE_FLAG_K; hdr->ver = PPTP_GRE_VER; hdr->protocol = htons(PPTP_GRE_PROTO); hdr->call_id = htons(opt->dst_addr.call_id); if (!len){ hdr->payload_len = 0; hdr->ver |= PPTP_GRE_FLAG_A; /* ack is in odd place because S == 0 */ hdr->seq = htonl(opt->seq_recv); opt->ack_sent = opt->seq_recv; opt->stat->tx_acks++; }else { hdr->flags |= PPTP_GRE_FLAG_S; hdr->seq = htonl(opt->seq_sent++); if (log_level>=3 && opt->seq_sent<=log_packets) printk(KERN_INFO"PPTP[%i]: send packet: seq=%i",opt->src_addr.call_id,opt->seq_sent); if (opt->ack_sent != opt->seq_recv) { /* send ack with this message */ hdr->ver |= PPTP_GRE_FLAG_A; hdr->ack = htonl(opt->seq_recv); opt->ack_sent = opt->seq_recv; if (log_level>=3 && opt->seq_sent<=log_packets) printk(" ack=%i",opt->seq_recv); } hdr->payload_len = htons(len); if (log_level>=3 && opt->seq_sent<=log_packets) printk("\n"); } /* * Push down and install the IP header. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) skb->transport_header = skb->network_header; skb_push(skb, sizeof(*iph)); skb_reset_network_header(skb); #else skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, sizeof(*iph)); #endif memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) iph = ip_hdr(skb); #else iph = skb->nh.iph; #endif iph->version = 4; iph->ihl = sizeof(struct iphdr) >> 2; iph->frag_off = 0;//df; iph->protocol = IPPROTO_GRE; iph->tos = 0; iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) iph->ttl = sysctl_ip_default_ttl; #else iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); #endif iph->tot_len = htons(skb->len); dst_release(skb->dst); skb->dst = &rt->u.dst; nf_reset(skb); skb->ip_summed = CHECKSUM_NONE; ip_select_ident(iph, &rt->u.dst, NULL); ip_send_check(iph); err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); wake_up(&opt->wait); if (err == NET_XMIT_SUCCESS || err == NET_XMIT_CN) { opt->stat->tx_sent++; if (!opt->stat->pt_seq){ opt->stat->pt_seq = opt->seq_sent; do_gettimeofday(&opt->stat->pt_time); } }else{ INC_TX_ERRORS; opt->stat->tx_failed++; } spin_unlock_bh(&opt->xmit_lock); return 1; tx_error: INC_TX_ERRORS; opt->stat->tx_failed++; if (!len) kfree_skb(skb); spin_unlock_bh(&opt->xmit_lock); return 1; exit: spin_unlock_bh(&opt->xmit_lock); return 0; }
/* This requires some explaining. If DNAT has taken place, * we will need to fix up the destination Ethernet address. * This is also true when SNAT takes place (for the reply direction). * * There are two cases to consider: * 1. The packet was DNAT'ed to a device in the same bridge * port group as it was received on. We can still bridge * the packet. * 2. The packet was DNAT'ed to a different device, either * a non-bridged device or another bridge port group. * The packet will need to be routed. * * The correct way of distinguishing between these two cases is to * call ip_route_input() and to look at skb->dst->dev, which is * changed to the destination device if ip_route_input() succeeds. * * Let's first consider the case that ip_route_input() succeeds: * * If the output device equals the logical bridge device the packet * came in on, we can consider this bridging. The corresponding MAC * address will be obtained in br_nf_pre_routing_finish_bridge. * Otherwise, the packet is considered to be routed and we just * change the destination MAC address so that the packet will * later be passed up to the IP stack to be routed. For a redirected * packet, ip_route_input() will give back the localhost as output device, * which differs from the bridge device. * * Let's now consider the case that ip_route_input() fails: * * This can be because the destination address is martian, in which case * the packet will be dropped. * If IP forwarding is disabled, ip_route_input() will fail, while * ip_route_output_key() can return success. The source * address for ip_route_output_key() is set to zero, so ip_route_output_key() * thinks we're handling a locally generated packet and won't care * if IP forwarding is enabled. If the output device equals the logical bridge * device, we proceed as if ip_route_input() succeeded. If it differs from the * logical bridge port or if ip_route_output_key() fails we drop the packet. */ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb->dev; struct iphdr *iph = ip_hdr(skb); struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; int err; nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; } nf_bridge->in_prerouting = 0; if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { struct in_device *in_dev = __in_dev_get_rcu(dev); /* If err equals -EHOSTUNREACH the error is due to a * martian destination or due to the fact that * forwarding is disabled. For most martian packets, * ip_route_output_key() will fail. It won't fail for 2 types of * martian destinations: loopback destinations and destination * 0.0.0.0. In both cases the packet will be dropped because the * destination is the loopback device and not the bridge. */ if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) goto free_skb; rt = ip_route_output(net, iph->daddr, 0, RT_TOS(iph->tos), 0); if (!IS_ERR(rt)) { /* - Bridged-and-DNAT'ed traffic doesn't * require ip_forwarding. */ if (rt->dst.dev == dev) { skb_dst_set(skb, &rt->dst); goto bridged_dnat; } ip_rt_put(rt); } free_skb: kfree_skb(skb); return 0; } else { if (skb_dst(skb)->dev == dev) { bridged_dnat: skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, br_nf_pre_routing_finish_bridge); return 0; } ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); skb->pkt_type = PACKET_HOST; } } else { rt = bridge_parent_rtable(nf_bridge->physindev); if (!rt) { kfree_skb(skb); return 0; } skb_dst_set_noref(skb, &rt->dst); } skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, br_handle_frame_finish); return 0; }
static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct net_device_stats *stats = &tunnel->stat; struct iphdr *tiph = &tunnel->parms.iph; struct ipv6hdr *iph6 = ipv6_hdr(skb); u8 tos = tunnel->parms.iph.tos; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; int mtu; if (tunnel->recursion++) { tunnel->stat.collisions++; goto tx_error; } if (skb->protocol != htons(ETH_P_IPV6)) goto tx_error; if (extract_ipv4_endpoint(&iph6->daddr, &dst) < 0) goto tx_error_icmp; { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst, .saddr = tiph->saddr, .tos = RT_TOS(tos) } }, .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; if (ip_route_output_key(&rt, &fl)) { tunnel->stat.tx_carrier_errors++; goto tx_error_icmp; } } if (rt->rt_type != RTN_UNICAST) { ip_rt_put(rt); tunnel->stat.tx_carrier_errors++; goto tx_error_icmp; } tdev = rt->u.dst.dev; if (tdev == dev) { ip_rt_put(rt); tunnel->stat.collisions++; goto tx_error; } if (tiph->frag_off) mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); else mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; if (mtu < 68) { tunnel->stat.collisions++; ip_rt_put(rt); goto tx_error; } if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; if (tunnel->parms.iph.daddr && skb->dst) skb->dst->ops->update_pmtu(skb->dst, mtu); if (skb->len > mtu) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); ip_rt_put(rt); goto tx_error; } if (tunnel->err_count > 0) { if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { tunnel->err_count--; dst_link_failure(skb); } else tunnel->err_count = 0; } /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr); if (skb_headroom(skb) < max_headroom || skb_shared(skb) || (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); stats->tx_dropped++; dev_kfree_skb(skb); tunnel->recursion--; return 0; } if (skb->sk) skb_set_owner_w(new_skb, skb->sk); dev_kfree_skb(skb); skb = new_skb; iph6 = ipv6_hdr(skb); } skb->transport_header = skb->network_header; skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags = 0; dst_release(skb->dst); skb->dst = &rt->u.dst; /* * Push down and install the IPIP header. */ iph = ip_hdr(skb); iph->version = 4; iph->ihl = sizeof(struct iphdr)>>2; if (mtu > IPV6_MIN_MTU) iph->frag_off = htons(IP_DF); else iph->frag_off = 0; iph->protocol = IPPROTO_IPV6; iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; if ((iph->ttl = tiph->ttl) == 0) iph->ttl = iph6->hop_limit; nf_reset(skb); IPTUNNEL_XMIT(); tunnel->recursion--; return 0; tx_error_icmp: dst_link_failure(skb); tx_error: stats->tx_errors++; dev_kfree_skb(skb); tunnel->recursion--; return 0; }
/* Reroute packet to local IPv4 stack after DNAT */ static int __ip_vs_reroute_locally(struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); struct net_device *dev = rt->dst.dev; struct net *net = dev_net(dev); struct iphdr *iph = ip_hdr(skb); if (rt_is_input_route(rt)) { unsigned long orefdst = skb->_skb_refdst; if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, skb->dev)) return 0; refdst_drop(orefdst); } else { struct flowi4 fl4 = { .daddr = iph->daddr, .saddr = iph->saddr, .flowi4_tos = RT_TOS(iph->tos), .flowi4_mark = skb->mark, }; rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return 0; if (!(rt->rt_flags & RTCF_LOCAL)) { ip_rt_put(rt); return 0; } /* Drop old route. */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); } return 1; } #ifdef CONFIG_IP_VS_IPV6 static inline int __ip_vs_is_local_route6(struct rt6_info *rt) { return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK; } static struct dst_entry * __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, struct in6_addr *ret_saddr, int do_xfrm) { struct dst_entry *dst; struct flowi6 fl6 = { .daddr = *daddr, }; dst = ip6_route_output(net, NULL, &fl6); if (dst->error) goto out_err; if (!ret_saddr) return dst; if (ipv6_addr_any(&fl6.saddr) && ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, &fl6.daddr, 0, &fl6.saddr) < 0) goto out_err; if (do_xfrm) { dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); if (IS_ERR(dst)) { dst = NULL; goto out_err; } } ipv6_addr_copy(ret_saddr, &fl6.saddr); return dst; out_err: dst_release(dst); IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr); return NULL; } /* * Get route to destination or remote server * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest, * &4=Allow redirect from remote daddr to local */ static struct rt6_info * __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, struct in6_addr *daddr, struct in6_addr *ret_saddr, int do_xfrm, int rt_mode) { struct net *net = dev_net(skb_dst(skb)->dev); struct rt6_info *rt; /* Route to the other host */ struct rt6_info *ort; /* Original route */ struct dst_entry *dst; int local; if (dest) { spin_lock(&dest->dst_lock); rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0); if (!rt) { u32 cookie; dst = __ip_vs_route_output_v6(net, &dest->addr.in6, &dest->dst_saddr, do_xfrm); if (!dst) { spin_unlock(&dest->dst_lock); return NULL; } rt = (struct rt6_info *) dst; cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie); IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", &dest->addr.in6, &dest->dst_saddr, atomic_read(&rt->dst.__refcnt)); } if (ret_saddr) ipv6_addr_copy(ret_saddr, &dest->dst_saddr); spin_unlock(&dest->dst_lock); } else { dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); if (!dst) return NULL; rt = (struct rt6_info *) dst; } local = __ip_vs_is_local_route6(rt); if (!((local ? 1 : 2) & rt_mode)) { IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n", local ? "local":"non-local", daddr); dst_release(&rt->dst); return NULL; } if (local && !(rt_mode & 4) && !((ort = (struct rt6_info *) skb_dst(skb)) && __ip_vs_is_local_route6(ort))) { IP_VS_DBG_RL("Redirect from non-local address %pI6 to local " "requires NAT method, dest: %pI6\n", &ipv6_hdr(skb)->daddr, daddr); dst_release(&rt->dst); return NULL; } if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LOOPBACK)) { IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 " "to non-local address, dest: %pI6\n", &ipv6_hdr(skb)->saddr, daddr); dst_release(&rt->dst); return NULL; } return rt; } #endif /* * Release dest->dst_cache before a dest is removed */ void ip_vs_dst_reset(struct ip_vs_dest *dest) { struct dst_entry *old_dst; old_dst = dest->dst_cache; dest->dst_cache = NULL; dst_release(old_dst); } #define IP_VS_XMIT_TUNNEL(skb, cp) \ ({ \ int __ret = NF_ACCEPT; \ \ (skb)->ipvs_property = 1; \ if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \ __ret = ip_vs_confirm_conntrack(skb, cp); \ if (__ret == NF_ACCEPT) { \ nf_reset(skb); \ skb_forward_csum(skb); \ } \ __ret; \ }) #define IP_VS_XMIT_NAT(pf, skb, cp, local) \ do { \ (skb)->ipvs_property = 1; \ if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ ip_vs_notrack(skb); \ else \ ip_vs_update_conntrack(skb, cp, 1); \ if (local) \ return NF_ACCEPT; \ skb_forward_csum(skb); \ NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ skb_dst(skb)->dev, dst_output); \ } while (0) #define IP_VS_XMIT(pf, skb, cp, local) \ do { \ (skb)->ipvs_property = 1; \ if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ ip_vs_notrack(skb); \ if (local) \ return NF_ACCEPT; \ skb_forward_csum(skb); \ NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ skb_dst(skb)->dev, dst_output); \ } while (0) /* * NULL transmitter (do nothing except return NF_ACCEPT) */ int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { /* we do not touch skb and do not need pskb ptr */ IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); }
/* * NAT transmitter (only for outside-to-inside nat forwarding) * Not used for related ICMP */ int ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { struct rtable *rt; /* Route to the other host */ int mtu; struct iphdr *iph = ip_hdr(skb); int local; EnterFunction(10); /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { __be16 _pt, *p; p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt); if (p == NULL) goto tx_error; ip_vs_conn_fill_cport(cp, *p); IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(iph->tos), IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR))) goto tx_error_icmp; local = rt->rt_flags & RTCF_LOCAL; /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " "stopping DNAT to local address"); goto tx_error_put; } } #endif /* From world but DNAT to loopback address? */ if (local && ipv4_is_loopback(rt->rt_dst) && rt_is_input_route(skb_rtable(skb))) { IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " "stopping DNAT to loopback address"); goto tx_error_put; } /* MTU checking */ mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); goto tx_error_put; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, sizeof(struct iphdr))) goto tx_error_put; if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) goto tx_error_put; ip_hdr(skb)->daddr = cp->daddr.ip; ip_send_check(ip_hdr(skb)); if (!local) { /* drop old route */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); } else { ip_rt_put(rt); /* * Some IPv4 replies get local address from routes, * not from iph, so while we DNAT after routing * we need this second input/output route. */ if (!__ip_vs_reroute_locally(skb)) goto tx_error; } IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */ /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); LeaveFunction(10); return NF_STOLEN; tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); LeaveFunction(10); return NF_STOLEN; tx_error_put: ip_rt_put(rt); goto tx_error; }
int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct rtable *rt; __be32 daddr, nexthop; int tmp; int err; dp->dccps_role = DCCP_ROLE_CLIENT; if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; if (usin->sin_family != AF_INET) return -EAFNOSUPPORT; nexthop = daddr = usin->sin_addr.s_addr; if (inet->opt != NULL && inet->opt->srr) { if (daddr == 0) return -EINVAL; nexthop = inet->opt->faddr; } tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_DCCP, inet->inet_sport, usin->sin_port, sk, 1); if (tmp < 0) return tmp; if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { ip_rt_put(rt); return -ENETUNREACH; } if (inet->opt == NULL || !inet->opt->srr) daddr = rt->rt_dst; if (inet->inet_saddr == 0) inet->inet_saddr = rt->rt_src; inet->inet_rcv_saddr = inet->inet_saddr; inet->inet_dport = usin->sin_port; inet->inet_daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet->opt != NULL) inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; /* * Socket identity is still unknown (sport may be zero). * However we set state to DCCP_REQUESTING and not releasing socket * lock select source port, enter ourselves into the hash tables and * complete initialization after this. */ dccp_set_state(sk, DCCP_REQUESTING); err = inet_hash_connect(&dccp_death_row, sk); if (err != 0) goto failure; err = ip_route_newports(&rt, IPPROTO_DCCP, inet->inet_sport, inet->inet_dport, sk); if (err != 0) goto failure; /* OK, now commit destination to socket. */ sk_setup_caps(sk, &rt->u.dst); dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr, inet->inet_daddr, inet->inet_sport, inet->inet_dport); inet->inet_id = dp->dccps_iss ^ jiffies; err = dccp_connect(sk); rt = NULL; if (err != 0) goto failure; out: return err; failure: /* * This unhashes the socket and releases the local port, if necessary. */ dccp_set_state(sk, DCCP_CLOSED); ip_rt_put(rt); sk->sk_route_caps = 0; inet->inet_dport = 0; goto out; }
/* Get route to destination or remote server */ static struct rtable * __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, __be32 daddr, u32 rtos, int rt_mode) { struct net *net = dev_net(skb_dst(skb)->dev); struct rtable *rt; /* Route to the other host */ struct rtable *ort; /* Original route */ int local; if (dest) { spin_lock(&dest->dst_lock); if (!(rt = (struct rtable *) __ip_vs_dst_check(dest, rtos))) { rt = ip_route_output(net, dest->addr.ip, 0, rtos, 0); if (IS_ERR(rt)) { spin_unlock(&dest->dst_lock); IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &dest->addr.ip); return NULL; } __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", &dest->addr.ip, atomic_read(&rt->dst.__refcnt), rtos); } spin_unlock(&dest->dst_lock); } else { rt = ip_route_output(net, daddr, 0, rtos, 0); if (IS_ERR(rt)) { IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); return NULL; } } local = rt->rt_flags & RTCF_LOCAL; if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & rt_mode)) { IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", (rt->rt_flags & RTCF_LOCAL) ? "local":"non-local", &rt->rt_dst); ip_rt_put(rt); return NULL; } if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) { IP_VS_DBG_RL("Redirect from non-local address %pI4 to local " "requires NAT method, dest: %pI4\n", &ip_hdr(skb)->daddr, &rt->rt_dst); ip_rt_put(rt); return NULL; } if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) { IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 " "to non-local address, dest: %pI4\n", &ip_hdr(skb)->saddr, &rt->rt_dst); ip_rt_put(rt); return NULL; } return rt; }
void ipip_err(struct sk_buff *skb, unsigned char *dp, int len) { #ifndef I_WISH_WORLD_WERE_PERFECT /* It is not :-( All the routers (except for Linux) return only 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. */ struct iphdr *iph = (struct iphdr*)dp; int type = skb->h.icmph->type; int code = skb->h.icmph->code; struct ip_tunnel *t; if (len < sizeof(struct iphdr)) return; switch (type) { default: case ICMP_PARAMETERPROB: return; case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: case ICMP_PORT_UNREACH: /* Impossible event. */ return; case ICMP_FRAG_NEEDED: /* Soft state for pmtu is maintained by IP core. */ return; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, I believe they are just ether pollution. --ANK */ break; } break; case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) return; break; } t = ipip_tunnel_lookup(iph->daddr, iph->saddr); if (t == NULL || t->parms.iph.daddr == 0) return; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) return; if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) t->err_count++; else t->err_count = 1; t->err_time = jiffies; return; #else struct iphdr *iph = (struct iphdr*)dp; int hlen = iph->ihl<<2; struct iphdr *eiph; int type = skb->h.icmph->type; int code = skb->h.icmph->code; int rel_type = 0; int rel_code = 0; int rel_info = 0; struct sk_buff *skb2; struct rtable *rt; if (len < hlen + sizeof(struct iphdr)) return; eiph = (struct iphdr*)(dp + hlen); switch (type) { default: return; case ICMP_PARAMETERPROB: if (skb->h.icmph->un.gateway < hlen) return; /* So... This guy found something strange INSIDE encapsulated packet. Well, he is fool, but what can we do ? */ rel_type = ICMP_PARAMETERPROB; rel_info = skb->h.icmph->un.gateway - hlen; break; case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: case ICMP_PORT_UNREACH: /* Impossible event. */ return; case ICMP_FRAG_NEEDED: /* And it is the only really necessary thing :-) */ rel_info = ntohs(skb->h.icmph->un.frag.mtu); if (rel_info < hlen+68) return; rel_info -= hlen; /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ if (rel_info > ntohs(eiph->tot_len)) return; break; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, I believe, it is just ether pollution. --ANK */ rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_HOST_UNREACH; break; } break; case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) return; break; } /* Prepare fake skb to feed it to icmp_send */ skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2 == NULL) return; dst_release(skb2->dst); skb2->dst = NULL; skb_pull(skb2, skb->data - (u8*)eiph); skb2->nh.raw = skb2->data; /* Try to guess incoming interface */ if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) { kfree_skb(skb2); return; } skb2->dev = rt->u.dst.dev; /* route "incoming" packet */ if (rt->rt_flags&RTCF_LOCAL) { ip_rt_put(rt); rt = NULL; if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) || rt->u.dst.dev->type != ARPHRD_IPGRE) { ip_rt_put(rt); kfree_skb(skb2); return; } } else { ip_rt_put(rt); if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || skb2->dst->dev->type != ARPHRD_IPGRE) { kfree_skb(skb2); return; } } /* change mtu on this route */ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { if (rel_info > skb2->dst->pmtu) { kfree_skb(skb2); return; } skb2->dst->pmtu = rel_info; rel_info = htonl(rel_info); } else if (type == ICMP_TIME_EXCEEDED) { struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv; if (t->parms.iph.ttl) { rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_HOST_UNREACH; } } icmp_send(skb2, rel_type, rel_code, rel_info); kfree_skb(skb2); return; #endif }