int ip6_forward(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); if (net->ipv6.devconf_all->forwarding == 0) goto error; if (skb_warn_if_lro(skb)) goto drop; if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); goto drop; } skb_forward_csum(skb); /* * We DO NOT make any processing on * RA packets, pushing them to user level AS IS * without ane WARRANTY that application will be able * to interpret them. The reason is that we * cannot make anything clever here. * * We are not end-node, so that if packet contains * AH/ESP, we cannot make anything. * Defragmentation also would be mistake, RA packets * cannot be fragmented, because there is no warranty * that different fragments will go along one path. --ANK */ if (opt->ra) { u8 *ptr = skb_network_header(skb) + opt->ra; if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) return 0; } /* * check and decrement ttl */ if (hdr->hop_limit <= 1) { /* Force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; } /* XXX: idev->cnf.proxy_ndp? */ if (net->ipv6.devconf_all->proxy_ndp && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) return ip6_input(skb); else if (proxied < 0) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); goto drop; } } if (!xfrm6_route_forward(skb)) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); goto drop; } dst = skb_dst(skb); /* IPv6 specs say nothing about it, but it is clear that we cannot send redirects to source routed frames. We don't send redirects to frames decapsulated from IPsec. */ if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && !skb_sec_path(skb)) { struct in6_addr *target = NULL; struct rt6_info *rt; struct neighbour *n = dst->neighbour; /* * incoming and outgoing devices are the same * send a redirect. */ rt = (struct rt6_info *) dst; if ((rt->rt6i_flags & RTF_GATEWAY)) target = (struct in6_addr*)&n->primary_key; else target = &hdr->daddr; /* Limit redirects both by destination (here) and by source (inside ndisc_send_redirect) */ if (xrlim_allow(dst, 1*HZ)) ndisc_send_redirect(skb, n, target); } else { int addrtype = ipv6_addr_type(&hdr->saddr); /* This check is security critical. */ if (addrtype == IPV6_ADDR_ANY || addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) goto error; if (addrtype & IPV6_ADDR_LINKLOCAL) { icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOT_NEIGHBOUR, 0, skb->dev); goto error; } } if (skb->len > dst_mtu(dst) && !skb_is_gso(skb)) { /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev); IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } if (skb_cow(skb, dst->dev->hard_header_len)) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); goto drop; } hdr = ipv6_hdr(skb); /* Mangling hops number delayed to point after skb COW */ hdr->hop_limit--; IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish); error: IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); drop: kfree_skb(skb); return -EINVAL; }
static struct sock *dccp_v6_request_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { struct inet6_request_sock *ireq6 = inet6_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct ipv6_txoptions *opt; struct inet_sock *newinet; struct dccp6_sock *newdp6; struct sock *newsk; struct ipv6_txoptions *opt; if (skb->protocol == htons(ETH_P_IP)) { /* * v6 mapped */ newsk = dccp_v4_request_recv_sock(sk, skb, req, dst); if (newsk == NULL) return NULL; newdp6 = (struct dccp6_sock *)newsk; newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr); ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); newnp->rcv_saddr = newnp->saddr; inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped; newsk->sk_backlog_rcv = dccp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count * here, dccp_create_openreq_child now does this for us, see the comment in * that function for the gory details. -acme */ /* It is tricky place. Until this moment IPv4 tcp worked with IPv6 icsk.icsk_af_ops. Sync it now. */ dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); return newsk; } opt = np->opt; if (sk_acceptq_is_full(sk)) goto out_overflow; if (dst == NULL) { struct in6_addr *final_p, final; struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_DCCP; fl6.daddr = ireq6->rmt_addr; final_p = fl6_update_dst(&fl6, opt, &final); fl6.saddr = ireq6->loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_rsk(req)->loc_port; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) goto out; } newsk = dccp_create_openreq_child(sk, req, skb); if (newsk == NULL) goto out_nonewsk; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks * count here, dccp_create_openreq_child now does this for us, see the * comment in that function for the gory details. -acme */ __ip6_dst_store(newsk, dst, NULL, NULL); newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newdp6 = (struct dccp6_sock *)newsk; newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); newnp->daddr = ireq6->rmt_addr; newnp->saddr = ireq6->loc_addr; newnp->rcv_saddr = ireq6->loc_addr; newsk->sk_bound_dev_if = ireq6->iif; /* Now IPv6 options... First: no IPv4 options. */ newinet->inet_opt = NULL; /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; if (ireq6->pktopts != NULL) { newnp->pktoptions = skb_clone(ireq6->pktopts, GFP_ATOMIC); kfree_skb(ireq6->pktopts); ireq6->pktopts = NULL; if (newnp->pktoptions) skb_set_owner_r(newnp->pktoptions, newsk); } newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; /* * Clone native IPv6 options from listening socket (if any) * * Yes, keeping reference count would be much more clever, but we make * one more one thing there: reattach optmem to newsk. */ opt = rcu_dereference(np->opt); if (opt) { opt = ipv6_dup_options(newsk, opt); RCU_INIT_POINTER(newnp->opt, opt); } inet_csk(newsk)->icsk_ext_hdr_len = 0; if (opt) inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + opt->opt_flen; dccp_sync_mss(newsk, dst_mtu(dst)); newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; newinet->inet_rcv_saddr = LOOPBACK4_IPV6; if (__inet_inherit_port(sk, newsk) < 0) { sock_put(newsk); goto out; } __inet6_hash(newsk, NULL); return newsk; out_overflow: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out_nonewsk: dst_release(dst); out: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); if (opt != NULL && opt != np->opt) sock_kfree_s(sk, opt, opt->tot_len); return NULL; }
static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct net_device_stats *stats = &tunnel->dev->stats; struct iphdr *tiph = &tunnel->parms.iph; struct ipv6hdr *iph6 = ipv6_hdr(skb); u8 tos = tunnel->parms.iph.tos; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; int mtu; struct in6_addr *addr6; int addr_type; if (skb->protocol != htons(ETH_P_IPV6)) goto tx_error; /* ISATAP (RFC4214) - must come before 6to4 */ if (dev->priv_flags & IFF_ISATAP) { struct neighbour *neigh = NULL; if (skb_dst(skb)) neigh = skb_dst(skb)->neighbour; if (neigh == NULL) { if (net_ratelimit()) printk(KERN_DEBUG "sit: nexthop == NULL\n"); goto tx_error; } addr6 = (struct in6_addr*)&neigh->primary_key; addr_type = ipv6_addr_type(addr6); if ((addr_type & IPV6_ADDR_UNICAST) && ipv6_addr_is_isatap(addr6)) dst = addr6->s6_addr32[3]; else goto tx_error; } if (!dst) dst = try_6to4(&iph6->daddr); if (!dst) { struct neighbour *neigh = NULL; if (skb_dst(skb)) neigh = skb_dst(skb)->neighbour; if (neigh == NULL) { if (net_ratelimit()) printk(KERN_DEBUG "sit: nexthop == NULL\n"); goto tx_error; } addr6 = (struct in6_addr*)&neigh->primary_key; addr_type = ipv6_addr_type(addr6); if (addr_type == IPV6_ADDR_ANY) { addr6 = &ipv6_hdr(skb)->daddr; addr_type = ipv6_addr_type(addr6); } if ((addr_type & IPV6_ADDR_COMPATv4) == 0) goto tx_error_icmp; dst = addr6->s6_addr32[3]; } { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst, .saddr = tiph->saddr, .tos = RT_TOS(tos) } }, .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { stats->tx_carrier_errors++; goto tx_error_icmp; } } if (rt->rt_type != RTN_UNICAST) { ip_rt_put(rt); stats->tx_carrier_errors++; goto tx_error_icmp; } tdev = rt->u.dst.dev; if (tdev == dev) { ip_rt_put(rt); stats->collisions++; goto tx_error; } if (tiph->frag_off) mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); else mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; if (mtu < 68) { stats->collisions++; ip_rt_put(rt); goto tx_error; } if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; if (tunnel->parms.iph.daddr && skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); if (skb->len > mtu) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); ip_rt_put(rt); goto tx_error; } if (tunnel->err_count > 0) { if (time_before(jiffies, tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { tunnel->err_count--; dst_link_failure(skb); } else tunnel->err_count = 0; } /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr); if (skb_headroom(skb) < max_headroom || skb_shared(skb) || (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); stats->tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; } if (skb->sk) skb_set_owner_w(new_skb, skb->sk); dev_kfree_skb(skb); skb = new_skb; iph6 = ipv6_hdr(skb); } skb->transport_header = skb->network_header; skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags = 0; skb_dst_drop(skb); skb_dst_set(skb, &rt->u.dst); /* * Push down and install the IPIP header. */ iph = ip_hdr(skb); iph->version = 4; iph->ihl = sizeof(struct iphdr)>>2; if (mtu > IPV6_MIN_MTU) iph->frag_off = tiph->frag_off; else iph->frag_off = 0; iph->protocol = IPPROTO_IPV6; iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; if ((iph->ttl = tiph->ttl) == 0) iph->ttl = iph6->hop_limit; nf_reset(skb); IPTUNNEL_XMIT(); return NETDEV_TX_OK; tx_error_icmp: dst_link_failure(skb); tx_error: stats->tx_errors++; dev_kfree_skb(skb); return NETDEV_TX_OK; }
int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, struct ipv6_txoptions *opt, int tclass) { struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; int seg_len = skb->len; int hlimit = -1; u32 mtu; if (opt) { unsigned int head_room; /* First: exthdrs may take lots of space (~8K for now) MAX_HEADER is not enough. */ head_room = opt->opt_nflen + opt->opt_flen; seg_len += head_room; head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); if (skb_headroom(skb) < head_room) { struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); if (skb2 == NULL) { IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -ENOBUFS; } consume_skb(skb); skb = skb2; skb_set_owner_w(skb, sk); } if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt->opt_nflen) ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); } skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); /* * Fill in the IPv6 header */ if (np) hlimit = np->hop_limit; if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); ip6_flow_hdr(hdr, tclass, fl6->flowlabel); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; hdr->hop_limit = hlimit; hdr->saddr = fl6->saddr; hdr->daddr = *first_hop; skb->protocol = htons(ETH_P_IPV6); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, dst_output); } skb->dev = dst->dev; ipv6_local_error(sk, EMSGSIZE, fl6, mtu); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; }
static unsigned int echo_tg6(struct sk_buff **poldskb, const struct xt_action_param *par) { const struct sk_buff *oldskb = *poldskb; const struct udphdr *oldudp; const struct ipv6hdr *oldip; struct udphdr *newudp, oldudp_buf; struct ipv6hdr *newip; struct sk_buff *newskb; unsigned int data_len; void *payload; struct flowi6 fl; struct dst_entry *dst = NULL; struct net *net = dev_net((par->in != NULL) ? par->in : par->out); /* This allows us to do the copy operation in fewer lines of code. */ if (skb_linearize(*poldskb) < 0) return NF_DROP; oldip = ipv6_hdr(oldskb); oldudp = skb_header_pointer(oldskb, par->thoff, sizeof(*oldudp), &oldudp_buf); if (oldudp == NULL) return NF_DROP; if (ntohs(oldudp->len) <= sizeof(*oldudp)) return NF_DROP; newskb = alloc_skb(LL_MAX_HEADER + sizeof(*newip) + ntohs(oldudp->len), GFP_ATOMIC); if (newskb == NULL) return NF_DROP; skb_reserve(newskb, LL_MAX_HEADER); newskb->protocol = oldskb->protocol; skb_reset_network_header(newskb); newip = (void *)skb_put(newskb, sizeof(*newip)); newip->version = oldip->version; newip->priority = oldip->priority; memcpy(newip->flow_lbl, oldip->flow_lbl, sizeof(newip->flow_lbl)); newip->nexthdr = par->target->proto; newip->saddr = oldip->daddr; newip->daddr = oldip->saddr; skb_reset_transport_header(newskb); newudp = (void *)skb_put(newskb, sizeof(*newudp)); newudp->source = oldudp->dest; newudp->dest = oldudp->source; newudp->len = oldudp->len; data_len = htons(oldudp->len) - sizeof(*oldudp); payload = skb_header_pointer(oldskb, par->thoff + sizeof(*oldudp), data_len, NULL); memcpy(skb_put(newskb, data_len), payload, data_len); #if 0 /* * Since no fields are modified (we just swapped things around), * this works too in our specific echo case. */ newudp->check = oldudp->check; #else newudp->check = 0; newudp->check = csum_ipv6_magic(&newip->saddr, &newip->daddr, ntohs(newudp->len), IPPROTO_UDP, csum_partial(newudp, ntohs(newudp->len), 0)); #endif memset(&fl, 0, sizeof(fl)); fl.flowi6_proto = newip->nexthdr; memcpy(&fl.saddr, &newip->saddr, sizeof(fl.saddr)); memcpy(&fl.daddr, &newip->daddr, sizeof(fl.daddr)); fl.fl6_sport = newudp->source; fl.fl6_dport = newudp->dest; security_skb_classify_flow((struct sk_buff *)oldskb, flowi6_to_flowi(&fl)); dst = ip6_route_output(net, NULL, &fl); if (dst == NULL || dst->error != 0) { dst_release(dst); goto free_nskb; } skb_dst_set(newskb, dst); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38) newip->hop_limit = ip6_dst_hoplimit(skb_dst(newskb)); #else newip->hop_limit = dst_metric(skb_dst(newskb), RTAX_HOPLIMIT); #endif newskb->ip_summed = CHECKSUM_NONE; /* "Never happens" (?) */ if (newskb->len > dst_mtu(skb_dst(newskb))) goto free_nskb; nf_ct_attach(newskb, *poldskb); ip6_local_out(newskb); return NF_DROP; free_nskb: kfree_skb(newskb); return NF_DROP; }
int ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { struct ipv6_pinfo *np = inet6_sk(sk); int len; int val; if (level == SOL_IP && sk->sk_type != SOCK_RAW) return udp_prot.getsockopt(sk, level, optname, optval, optlen); if(level!=SOL_IPV6) return -ENOPROTOOPT; if (get_user(len, optlen)) return -EFAULT; switch (optname) { case IPV6_ADDRFORM: if (sk->sk_protocol != IPPROTO_UDP && sk->sk_protocol != IPPROTO_TCP) return -EINVAL; if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; val = sk->sk_family; break; case MCAST_MSFILTER: { struct group_filter gsf; int err; if (len < GROUP_FILTER_SIZE(0)) return -EINVAL; if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) return -EFAULT; lock_sock(sk); err = ip6_mc_msfget(sk, &gsf, (struct group_filter __user *)optval, optlen); release_sock(sk); return err; } case IPV6_PKTOPTIONS: { struct msghdr msg; struct sk_buff *skb; if (sk->sk_type != SOCK_STREAM) return -ENOPROTOOPT; msg.msg_control = optval; msg.msg_controllen = len; msg.msg_flags = 0; lock_sock(sk); skb = np->pktoptions; if (skb) atomic_inc(&skb->users); release_sock(sk); if (skb) { int err = datagram_recv_ctl(sk, &msg, skb); kfree_skb(skb); if (err) return err; } else { if (np->rxopt.bits.rxinfo) { struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif; ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr); put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { int hlim = np->mcast_hops; put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } } len -= msg.msg_controllen; return put_user(len, optlen); } case IPV6_MTU: { struct dst_entry *dst; val = 0; lock_sock(sk); dst = sk_dst_get(sk); if (dst) { val = dst_mtu(dst); dst_release(dst); } release_sock(sk); if (!val) return -ENOTCONN; break; } case IPV6_V6ONLY: val = np->ipv6only; break; case IPV6_PKTINFO: val = np->rxopt.bits.rxinfo; break; case IPV6_HOPLIMIT: val = np->rxopt.bits.rxhlim; break; case IPV6_RTHDR: val = np->rxopt.bits.srcrt; break; case IPV6_HOPOPTS: val = np->rxopt.bits.hopopts; break; case IPV6_DSTOPTS: val = np->rxopt.bits.dstopts; break; case IPV6_FLOWINFO: val = np->rxopt.bits.rxflow; break; case IPV6_UNICAST_HOPS: val = np->hop_limit; break; case IPV6_MULTICAST_HOPS: val = np->mcast_hops; break; case IPV6_MULTICAST_LOOP: val = np->mc_loop; break; case IPV6_MULTICAST_IF: val = np->mcast_oif; break; case IPV6_MTU_DISCOVER: val = np->pmtudisc; break; case IPV6_RECVERR: val = np->recverr; break; case IPV6_FLOWINFO_SEND: val = np->sndflow; break; default: #ifdef CONFIG_NETFILTER lock_sock(sk); val = nf_getsockopt(sk, PF_INET6, optname, optval, &len); release_sock(sk); if (val >= 0) val = put_user(len, optlen); return val; #else return -EINVAL; #endif } len = min_t(unsigned int, sizeof(int), len); if(put_user(len, optlen)) return -EFAULT; if(copy_to_user(optval,&val,len)) return -EFAULT; return 0; }
static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __u32 info) { struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct ipv6_pinfo *np; struct sock *sk; int err; struct tcp_sock *tp; __u32 seq; sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex); if (sk == NULL) { ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; } if (sk->sk_state == TCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); return; } bh_lock_sock(sk); if (sock_owned_by_user(sk)) NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) goto out; tp = tcp_sk(sk); seq = ntohl(th->seq); if (sk->sk_state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) { NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); goto out; } np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; if (sock_owned_by_user(sk)) goto out; if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) goto out; /* icmp should have updated the destination cache entry */ dst = __sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); struct flowi fl; /* BUGGG_FUTURE: Again, it is not clear how to handle rthdr case. Ignore this complexity for now. */ memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; security_skb_classify_flow(skb, &fl); if ((err = ip6_dst_lookup(sk, &dst, &fl))) { sk->sk_err_soft = -err; goto out; } if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; goto out; } } else dst_hold(dst); if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); } /* else let the usual retransmit timer handle it */ dst_release(dst); goto out; } icmpv6_err_convert(type, code, &err); /* Might be for an request_sock */ switch (sk->sk_state) { struct request_sock *req, **prev; case TCP_LISTEN: if (sock_owned_by_user(sk)) goto out; req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr, &hdr->saddr, inet6_iif(skb)); if (!req) goto out; /* ICMPs are not backlogged, hence we cannot get * an established socket here. */ BUG_TRAP(req->sk == NULL); if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); goto out; } inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case TCP_SYN_SENT: case TCP_SYN_RECV: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { sk->sk_err = err; sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ tcp_done(sk); } else sk->sk_err_soft = err; goto out; } if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else sk->sk_err_soft = err; out: bh_unlock_sock(sk); sock_put(sk); }
static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { struct ipv6_pinfo *np = inet6_sk(sk); int len; int val; if (ip6_mroute_opt(optname)) return ip6_mroute_getsockopt(sk, optname, optval, optlen); if (get_user(len, optlen)) return -EFAULT; switch (optname) { case IPV6_ADDRFORM: if (sk->sk_protocol != IPPROTO_UDP && sk->sk_protocol != IPPROTO_UDPLITE && sk->sk_protocol != IPPROTO_TCP) return -ENOPROTOOPT; if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; val = sk->sk_family; break; case MCAST_MSFILTER: { struct group_filter gsf; int err; if (len < GROUP_FILTER_SIZE(0)) return -EINVAL; if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) return -EFAULT; if (gsf.gf_group.ss_family != AF_INET6) return -EADDRNOTAVAIL; lock_sock(sk); err = ip6_mc_msfget(sk, &gsf, (struct group_filter __user *)optval, optlen); release_sock(sk); return err; } case IPV6_2292PKTOPTIONS: { struct msghdr msg; struct sk_buff *skb; if (sk->sk_type != SOCK_STREAM) return -ENOPROTOOPT; msg.msg_control = optval; msg.msg_controllen = len; msg.msg_flags = 0; lock_sock(sk); skb = np->pktoptions; if (skb) atomic_inc(&skb->users); release_sock(sk); if (skb) { int err = datagram_recv_ctl(sk, &msg, skb); kfree_skb(skb); if (err) return err; } else { if (np->rxopt.bits.rxinfo) { struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : np->sticky_pktinfo.ipi6_ifindex; np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) : ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr)); put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { int hlim = np->mcast_hops; put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxoinfo) { struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : np->sticky_pktinfo.ipi6_ifindex; np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) : ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr)); put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { int hlim = np->mcast_hops; put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); } } len -= msg.msg_controllen; return put_user(len, optlen); } case IPV6_MTU: { struct dst_entry *dst; val = 0; rcu_read_lock(); dst = __sk_dst_get(sk); if (dst) val = dst_mtu(dst); rcu_read_unlock(); if (!val) return -ENOTCONN; break; } case IPV6_V6ONLY: val = np->ipv6only; break; case IPV6_RECVPKTINFO: val = np->rxopt.bits.rxinfo; break; case IPV6_2292PKTINFO: val = np->rxopt.bits.rxoinfo; break; case IPV6_RECVHOPLIMIT: val = np->rxopt.bits.rxhlim; break; case IPV6_2292HOPLIMIT: val = np->rxopt.bits.rxohlim; break; case IPV6_RECVRTHDR: val = np->rxopt.bits.srcrt; break; case IPV6_2292RTHDR: val = np->rxopt.bits.osrcrt; break; case IPV6_HOPOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: { lock_sock(sk); len = ipv6_getsockopt_sticky(sk, np->opt, optname, optval, len); release_sock(sk); /* check if ipv6_getsockopt_sticky() returns err code */ if (len < 0) return len; return put_user(len, optlen); } case IPV6_RECVHOPOPTS: val = np->rxopt.bits.hopopts; break; case IPV6_2292HOPOPTS: val = np->rxopt.bits.ohopopts; break; case IPV6_RECVDSTOPTS: val = np->rxopt.bits.dstopts; break; case IPV6_2292DSTOPTS: val = np->rxopt.bits.odstopts; break; case IPV6_TCLASS: val = np->tclass; break; case IPV6_RECVTCLASS: val = np->rxopt.bits.rxtclass; break; case IPV6_FLOWINFO: val = np->rxopt.bits.rxflow; break; case IPV6_RECVPATHMTU: val = np->rxopt.bits.rxpmtu; break; case IPV6_PATHMTU: { struct dst_entry *dst; struct ip6_mtuinfo mtuinfo; if (len < sizeof(mtuinfo)) return -EINVAL; len = sizeof(mtuinfo); memset(&mtuinfo, 0, sizeof(mtuinfo)); rcu_read_lock(); dst = __sk_dst_get(sk); if (dst) mtuinfo.ip6m_mtu = dst_mtu(dst); rcu_read_unlock(); if (!mtuinfo.ip6m_mtu) return -ENOTCONN; if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &mtuinfo, len)) return -EFAULT; return 0; break; } case IPV6_TRANSPARENT: val = inet_sk(sk)->transparent; break; case IPV6_RECVORIGDSTADDR: val = np->rxopt.bits.rxorigdstaddr; break; case IPV6_UNICAST_HOPS: case IPV6_MULTICAST_HOPS: { struct dst_entry *dst; if (optname == IPV6_UNICAST_HOPS) val = np->hop_limit; else val = np->mcast_hops; if (val < 0) { rcu_read_lock(); dst = __sk_dst_get(sk); if (dst) val = ip6_dst_hoplimit(dst); rcu_read_unlock(); } if (val < 0) val = sock_net(sk)->ipv6.devconf_all->hop_limit; break; } case IPV6_MULTICAST_LOOP: val = np->mc_loop; break; case IPV6_MULTICAST_IF: val = np->mcast_oif; break; case IPV6_MTU_DISCOVER: val = np->pmtudisc; break; case IPV6_RECVERR: val = np->recverr; break; case IPV6_FLOWINFO_SEND: val = np->sndflow; break; case IPV6_ADDR_PREFERENCES: val = 0; if (np->srcprefs & IPV6_PREFER_SRC_TMP) val |= IPV6_PREFER_SRC_TMP; else if (np->srcprefs & IPV6_PREFER_SRC_PUBLIC) val |= IPV6_PREFER_SRC_PUBLIC; else { /* XXX: should we return system default? */ val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT; } if (np->srcprefs & IPV6_PREFER_SRC_COA) val |= IPV6_PREFER_SRC_COA; else val |= IPV6_PREFER_SRC_HOME; break; case IPV6_MINHOPCOUNT: val = np->min_hopcount; break; case IPV6_DONTFRAG: val = np->dontfrag; break; default: return -ENOPROTOOPT; } len = min_t(unsigned int, sizeof(int), len); if(put_user(len, optlen)) return -EFAULT; if(copy_to_user(optval,&val,len)) return -EFAULT; return 0; }
static int ip6_tnl_xmit2(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, struct flowi *fl, int encap_limit, __u32 *pmtu) { struct net *net = dev_net(dev); struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct ipv6_tel_txoption opt; struct dst_entry *dst; struct net_device *tdev; int mtu; unsigned int max_headroom = sizeof(struct ipv6hdr); u8 proto; int err = -1; int pkt_len; if ((dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); else { dst = ip6_route_output(net, NULL, fl); if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0) goto tx_err_link_failure; } tdev = dst->dev; if (tdev == dev) { stats->collisions++; if (net_ratelimit()) printk(KERN_WARNING "%s: Local routing loop detected!\n", t->parms.name); goto tx_err_dst_release; } mtu = dst_mtu(dst) - sizeof (*ipv6h); if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; } if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; if (skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); if (skb->len > mtu) { *pmtu = mtu; err = -EMSGSIZE; goto tx_err_dst_release; } /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom += LL_RESERVED_SPACE(tdev); if (skb_headroom(skb) < max_headroom || skb_shared(skb) || (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb; if (!(new_skb = skb_realloc_headroom(skb, max_headroom))) goto tx_err_dst_release; if (skb->sk) skb_set_owner_w(new_skb, skb->sk); kfree_skb(skb); skb = new_skb; } skb_dst_drop(skb); skb_dst_set(skb, dst_clone(dst)); skb->transport_header = skb->network_header; proto = fl->proto; if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); } skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000); dsfield = INET_ECN_encapsulate(0, dsfield); ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src); ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst); nf_reset(skb); pkt_len = skb->len; err = ip6_local_out(skb); if (net_xmit_eval(err) == 0) { stats->tx_bytes += pkt_len; stats->tx_packets++; } else { stats->tx_errors++; stats->tx_aborted_errors++; } ip6_tnl_dst_store(t, dst); return 0; tx_err_link_failure: stats->tx_carrier_errors++; dst_link_failure(skb); tx_err_dst_release: dst_release(dst); return err; }
static int ax8netfilter_do_ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { struct inet_sock *inet = inet_sk(sk); int val; int len; if (level != SOL_IP) return -EOPNOTSUPP; if (ip_mroute_opt(optname)) return ip_mroute_getsockopt(sk, optname, optval, optlen); if (get_user(len, optlen)) return -EFAULT; if (len < 0) return -EINVAL; lock_sock(sk); switch (optname) { case IP_OPTIONS: { unsigned char optbuf[sizeof(struct ip_options)+40]; struct ip_options * opt = (struct ip_options *)optbuf; opt->optlen = 0; if (inet->opt) memcpy(optbuf, inet->opt, sizeof(struct ip_options)+ inet->opt->optlen); release_sock(sk); if (opt->optlen == 0) return put_user(0, optlen); ax8netfilter_ip_options_undo(opt); len = min_t(unsigned int, len, opt->optlen); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, opt->__data, len)) return -EFAULT; return 0; } case IP_PKTINFO: val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0; break; case IP_RECVTTL: val = (inet->cmsg_flags & IP_CMSG_TTL) != 0; break; case IP_RECVTOS: val = (inet->cmsg_flags & IP_CMSG_TOS) != 0; break; case IP_RECVOPTS: val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0; break; case IP_RETOPTS: val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0; break; case IP_PASSSEC: val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0; break; case IP_RECVORIGDSTADDR: val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0; break; case IP_TOS: val = inet->tos; break; case IP_TTL: val = (inet->uc_ttl == -1 ? *ax8netfilter_sysctl_ip_default_ttl : inet->uc_ttl); break; case IP_HDRINCL: val = inet->hdrincl; break; case IP_MTU_DISCOVER: val = inet->pmtudisc; break; case IP_MTU: { struct dst_entry *dst; val = 0; dst = sk_dst_get(sk); if (dst) { val = dst_mtu(dst); dst_release(dst); } if (!val) { release_sock(sk); return -ENOTCONN; } break; } case IP_RECVERR: val = inet->recverr; break; case IP_MULTICAST_TTL: val = inet->mc_ttl; break; case IP_MULTICAST_LOOP: val = inet->mc_loop; break; case IP_MULTICAST_IF: { struct in_addr addr; len = min_t(unsigned int, len, sizeof(struct in_addr)); addr.s_addr = inet->mc_addr; release_sock(sk); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &addr, len)) return -EFAULT; return 0; } case IP_MSFILTER: { struct ip_msfilter msf; int err; if (len < IP_MSFILTER_SIZE(0)) { release_sock(sk); return -EINVAL; } if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) { release_sock(sk); return -EFAULT; } err = ax8netfilter_ip_mc_msfget(sk, &msf, (struct ip_msfilter __user *)optval, optlen); release_sock(sk); return err; } case MCAST_MSFILTER: { struct group_filter gsf; int err; if (len < GROUP_FILTER_SIZE(0)) { release_sock(sk); return -EINVAL; } if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) { release_sock(sk); return -EFAULT; } err = ax8netfilter_ip_mc_gsfget(sk, &gsf, (struct group_filter __user *)optval, optlen); release_sock(sk); return err; } case IP_PKTOPTIONS: { struct msghdr msg; release_sock(sk); if (sk->sk_type != SOCK_STREAM) return -ENOPROTOOPT; msg.msg_control = optval; msg.msg_controllen = len; msg.msg_flags = 0; if (inet->cmsg_flags & IP_CMSG_PKTINFO) { struct in_pktinfo info; info.ipi_addr.s_addr = inet->rcv_saddr; info.ipi_spec_dst.s_addr = inet->rcv_saddr; info.ipi_ifindex = inet->mc_index; put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); } if (inet->cmsg_flags & IP_CMSG_TTL) { int hlim = inet->mc_ttl; put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); } len -= msg.msg_controllen; return put_user(len, optlen); } case IP_FREEBIND: val = inet->freebind; break; case IP_TRANSPARENT: val = inet->transparent; break; default: release_sock(sk); return -ENOPROTOOPT; } release_sock(sk); if (len < sizeof(int) && len > 0 && val>=0 && val<=255) { unsigned char ucval = (unsigned char)val; len = 1; if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &ucval, 1)) return -EFAULT; } else { len = min_t(unsigned int, sizeof(int), len); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; } return 0; }
static int ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { int rel_msg = 0; u8 rel_type = type; u8 rel_code = code; __u32 rel_info = ntohl(info); int err; struct sk_buff *skb2; struct iphdr *eiph; struct flowi fl; struct rtable *rt; err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code, &rel_msg, &rel_info, offset); if (err < 0) return err; if (rel_msg == 0) return 0; switch (rel_type) { case ICMPV6_DEST_UNREACH: if (rel_code != ICMPV6_ADDR_UNREACH) return 0; rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_HOST_UNREACH; break; case ICMPV6_PKT_TOOBIG: if (rel_code != 0) return 0; rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_FRAG_NEEDED; break; default: return 0; } if (!pskb_may_pull(skb, offset + sizeof(struct iphdr))) return 0; skb2 = skb_clone(skb, GFP_ATOMIC); if (!skb2) return 0; skb_dst_drop(skb2); skb_pull(skb2, offset); skb_reset_network_header(skb2); eiph = ip_hdr(skb2); /* Try to guess incoming interface */ memset(&fl, 0, sizeof(fl)); fl.fl4_dst = eiph->saddr; fl.fl4_tos = RT_TOS(eiph->tos); fl.proto = IPPROTO_IPIP; if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) goto out; skb2->dev = rt->u.dst.dev; /* route "incoming" packet */ if (rt->rt_flags & RTCF_LOCAL) { ip_rt_put(rt); rt = NULL; fl.fl4_dst = eiph->daddr; fl.fl4_src = eiph->saddr; fl.fl4_tos = eiph->tos; if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || rt->u.dst.dev->type != ARPHRD_TUNNEL) { ip_rt_put(rt); goto out; } skb_dst_set(skb2, (struct dst_entry *)rt); } else { ip_rt_put(rt); if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) goto out; } /* change mtu on this route */ if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) { if (rel_info > dst_mtu(skb_dst(skb2))) goto out; skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); } icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); out: kfree_skb(skb2); return 0; }
static int tcpmss_mangle_packet(struct sk_buff *skb, const struct xt_action_param *par, unsigned int in_mtu, unsigned int tcphoff, unsigned int minlen) { const struct xt_tcpmss_info *info = par->targinfo; struct tcphdr *tcph; int len, tcp_hdrlen; unsigned int i; __be16 oldval; u16 newmss; u8 *opt; /* This is a fragment, no TCP header is available */ if (par->fragoff != 0) return 0; if (!skb_make_writable(skb, skb->len)) return -1; len = skb->len - tcphoff; if (len < (int)sizeof(struct tcphdr)) return -1; tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); tcp_hdrlen = tcph->doff * 4; if (len < tcp_hdrlen) return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { if (dst_mtu(skb_dst(skb)) <= minlen) { net_err_ratelimited("unknown or invalid path-MTU (%u)\n", dst_mtu(skb_dst(skb))); return -1; } if (in_mtu <= minlen) { net_err_ratelimited("unknown or invalid path-MTU (%u)\n", in_mtu); return -1; } newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen; } else newmss = info->mss; opt = (u_int8_t *)tcph; for (i = sizeof(struct tcphdr); i <= tcp_hdrlen - TCPOLEN_MSS; i += optlen(opt, i)) { if (opt[i] == TCPOPT_MSS && opt[i+1] == TCPOLEN_MSS) { u_int16_t oldmss; oldmss = (opt[i+2] << 8) | opt[i+3]; /* Never increase MSS, even when setting it, as * doing so results in problems for hosts that rely * on MSS being set correctly. */ if (oldmss <= newmss) return 0; opt[i+2] = (newmss & 0xff00) >> 8; opt[i+3] = newmss & 0x00ff; inet_proto_csum_replace2(&tcph->check, skb, htons(oldmss), htons(newmss), 0); return 0; } }
static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; int err; __u64 seq; struct net *net = dev_net(skb->dev); if (skb->len < offset + sizeof(*dh) || skb->len < offset + __dccp_basic_hdr_len(dh)) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; } sk = inet6_lookup(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); if (sk == NULL) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; } if (sk->sk_state == DCCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); return; } bh_lock_sock(sk); if (sock_owned_by_user(sk)) NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; dp = dccp_sk(sk); seq = dccp_hdr_seq(dh); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; if (sock_owned_by_user(sk)) goto out; if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED)) goto out; /* icmp should have updated the destination cache entry */ dst = __sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); struct flowi6 fl6; /* BUGGG_FUTURE: Again, it is not clear how to handle rthdr case. Ignore this complexity for now. */ memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_DCCP; fl6.daddr = np->daddr; fl6.saddr = np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); if (IS_ERR(dst)) { sk->sk_err_soft = -PTR_ERR(dst); goto out; } } else dst_hold(dst); if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { dccp_sync_mss(sk, dst_mtu(dst)); } /* else let the usual retransmit timer handle it */ dst_release(dst); goto out; } icmpv6_err_convert(type, code, &err); /* Might be for an request_sock */ switch (sk->sk_state) { struct request_sock *req, **prev; case DCCP_LISTEN: if (sock_owned_by_user(sk)) goto out; req = inet6_csk_search_req(sk, &prev, dh->dccph_dport, &hdr->daddr, &hdr->saddr, inet6_iif(skb)); if (req == NULL) goto out; /* * ICMPs are not backlogged, hence we cannot get an established * socket here. */ WARN_ON(req->sk != NULL); if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case DCCP_REQUESTING: case DCCP_RESPOND: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; /* * Wake people up to see the error * (see connect in sock.c) */ sk->sk_error_report(sk); dccp_done(sk); } else sk->sk_err_soft = err; goto out; } if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else sk->sk_err_soft = err; out: bh_unlock_sock(sk); sock_put(sk); }
/* * IP Tunneling transmitter * * This function encapsulates the packet in a new IP packet, its * destination will be set to cp->daddr. Most code of this function * is taken from ipip.c. * * It is used in VS/TUN cluster. The load balancer selects a real * server from a cluster based on a scheduling algorithm, * encapsulates the request packet and forwards it to the selected * server. For example, all real servers are configured with * "ifconfig tunl0 <Virtual IP Address> up". When the server receives * the encapsulated packet, it will decapsulate the packet, processe * the request and return the response packets directly to the client * without passing the load balancer. This can greatly increase the * scalability of virtual server. * * Used for ANY protocol */ int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { struct rtable *rt; /* Route to the other host */ __be32 saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ struct iphdr *old_iph = ip_hdr(skb); u8 tos = old_iph->tos; __be16 df = old_iph->frag_off; struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int mtu; int ret; EnterFunction(10); if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(tos), IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL, &saddr))) goto tx_error_icmp; if (rt->rt_flags & RTCF_LOCAL) { ip_rt_put(rt); IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); } tdev = rt->dst.dev; mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (mtu < 68) { IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); goto tx_error_put; } if (skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); df |= (old_iph->frag_off & htons(IP_DF)); if ((old_iph->frag_off & htons(IP_DF) && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; } /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); kfree_skb(skb); IP_VS_ERR_RL("%s(): no memory\n", __func__); return NF_STOLEN; } kfree_skb(skb); skb = new_skb; old_iph = ip_hdr(skb); } skb->transport_header = skb->network_header; /* fix old IP header checksum */ ip_send_check(old_iph); skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); /* drop old route */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. */ iph = ip_hdr(skb); iph->version = 4; iph->ihl = sizeof(struct iphdr)>>2; iph->frag_off = df; iph->protocol = IPPROTO_IPIP; iph->tos = tos; iph->daddr = cp->daddr.ip; iph->saddr = saddr; iph->ttl = old_iph->ttl; ip_select_ident(iph, &rt->dst, NULL); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; ret = IP_VS_XMIT_TUNNEL(skb, cp); if (ret == NF_ACCEPT) ip_local_out(skb); else if (ret == NF_DROP) kfree_skb(skb); LeaveFunction(10); return NF_STOLEN; tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); LeaveFunction(10); return NF_STOLEN; tx_error_put: ip_rt_put(rt); goto tx_error; }
/* Send RST reply */ static void send_reset(struct sk_buff *oldskb, int hook) { struct sk_buff *nskb; const struct iphdr *oiph; struct iphdr *niph; const struct tcphdr *oth; struct tcphdr _otcph, *tcph; /* IP header checks: fragment. */ if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) return; oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), sizeof(_otcph), &_otcph); if (oth == NULL) return; /* No RST for RST. */ if (oth->rst) return; if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) return; /* Check checksum */ if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) return; oiph = ip_hdr(oldskb); nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + LL_MAX_HEADER, GFP_ATOMIC); if (!nskb) return; skb_reserve(nskb, LL_MAX_HEADER); skb_reset_network_header(nskb); niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); niph->version = 4; niph->ihl = sizeof(struct iphdr) / 4; niph->tos = 0; niph->id = 0; niph->frag_off = htons(IP_DF); niph->protocol = IPPROTO_TCP; niph->check = 0; niph->saddr = oiph->daddr; niph->daddr = oiph->saddr; tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); memset(tcph, 0, sizeof(*tcph)); tcph->source = oth->dest; tcph->dest = oth->source; tcph->doff = sizeof(struct tcphdr) / 4; if (oth->ack) tcph->seq = oth->ack_seq; else { tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + oldskb->len - ip_hdrlen(oldskb) - (oth->doff << 2)); tcph->ack = 1; } tcph->rst = 1; tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, niph->daddr, 0); nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum_start = (unsigned char *)tcph - nskb->head; nskb->csum_offset = offsetof(struct tcphdr, check); /* ip_route_me_harder expects skb->dst to be set */ skb_dst_set_noref(nskb, skb_dst(oldskb)); nskb->protocol = htons(ETH_P_IP); if (ip_route_me_harder(nskb, RTN_UNSPEC)) goto free_nskb; niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); /* "Never happens" */ if (nskb->len > dst_mtu(skb_dst(nskb))) goto free_nskb; nf_ct_attach(nskb, oldskb); ip_local_out(nskb); return; free_nskb: kfree_skb(nskb); }
int ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { struct rt6_info *rt; /* Route to the other host */ struct in6_addr saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ struct ipv6hdr *old_iph = ipv6_hdr(skb); struct ipv6hdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int mtu; int ret; EnterFunction(10); if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, &saddr, 1, (IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL)))) goto tx_error_icmp; if (__ip_vs_is_local_route6(rt)) { dst_release(&rt->dst); IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); } tdev = rt->dst.dev; mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); if (mtu < IPV6_MIN_MTU) { IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, IPV6_MIN_MTU); goto tx_error_put; } if (skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) && !skb_is_gso(skb)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); skb->dev = net->loopback_dev; } icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; } /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { dst_release(&rt->dst); kfree_skb(skb); IP_VS_ERR_RL("%s(): no memory\n", __func__); return NF_STOLEN; } kfree_skb(skb); skb = new_skb; old_iph = ipv6_hdr(skb); } skb->transport_header = skb->network_header; skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); /* drop old route */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. */ iph = ipv6_hdr(skb); iph->version = 6; iph->nexthdr = IPPROTO_IPV6; iph->payload_len = old_iph->payload_len; be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); iph->priority = old_iph->priority; memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); ipv6_addr_copy(&iph->daddr, &cp->daddr.in6); ipv6_addr_copy(&iph->saddr, &saddr); iph->hop_limit = old_iph->hop_limit; /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; ret = IP_VS_XMIT_TUNNEL(skb, cp); if (ret == NF_ACCEPT) ip6_local_out(skb); else if (ret == NF_DROP) kfree_skb(skb); LeaveFunction(10); return NF_STOLEN; tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); LeaveFunction(10); return NF_STOLEN; tx_error_put: dst_release(&rt->dst); goto tx_error; }
//----------------------------------------------------------------------------- static int _gtpusp_ksocket_process_gtp(const unsigned char * const rx_buf_pP, const int lenP, unsigned char* tx_buf_pP) //----------------------------------------------------------------------------- { gtpv1u_msg_t gtpv1u_msg; uint8_t msg_type; struct iphdr *iph_p = NULL; struct iphdr *new_iph_p= NULL; struct sk_buff *skb_p = NULL; const unsigned char * rx_buf_p = rx_buf_pP; int err = 0; struct rtable *rt = NULL; struct flowi fl = { .u = { .ip4 = { .daddr = 0, .flowi4_tos = 0, .flowi4_scope = RT_SCOPE_UNIVERSE, } } }; msg_type = rx_buf_pP[1]; switch(msg_type) { case GTP_ECHO_REQ: PR_INFO(MODULE_NAME": TODO GTP ECHO_REQ, SEND TO GTPV1U TASK USER SPACE\n"); //TODO; return 0; break; case GTP_ERROR_INDICATION: PR_INFO(MODULE_NAME":TODO GTP ERROR INDICATION, SEND TO GTPV1U TASK USER SPACE\n"); //TODO; return 0; break; case GTP_ECHO_RSP: PR_INFO(MODULE_NAME":GTP ECHO_RSP, SEND TO GTPV1U TASK USER SPACE\n"); return 0; break; case GTP_GPDU: { gtpv1u_msg.version = ((*rx_buf_p) & 0xE0) >> 5; gtpv1u_msg.protocol_type = ((*rx_buf_p) & 0x10) >> 4; gtpv1u_msg.ext_hdr_flag = ((*rx_buf_p) & 0x04) >> 2; gtpv1u_msg.seq_num_flag = ((*rx_buf_p) & 0x02) >> 1; gtpv1u_msg.npdu_num_flag = ((*rx_buf_p) & 0x01); rx_buf_p++; gtpv1u_msg.msg_type = *(rx_buf_p); rx_buf_p++; rx_buf_p += 2; gtpv1u_msg.teid = ntohl(*((u_int32_t *)rx_buf_p)); rx_buf_p += 4; if(gtpv1u_msg.ext_hdr_flag || gtpv1u_msg.seq_num_flag || gtpv1u_msg.npdu_num_flag) { gtpv1u_msg.seq_num = ntohs(*(((u_int16_t *)rx_buf_p))); rx_buf_p += 2; gtpv1u_msg.npdu_num = *(rx_buf_p++); gtpv1u_msg.next_ext_hdr_type = *(rx_buf_p++); } gtpv1u_msg.msg_buf_offset = (u_int32_t)(rx_buf_p - rx_buf_pP); gtpv1u_msg.msg_buf_len = lenP - gtpv1u_msg.msg_buf_offset; gtpv1u_msg.msg_len = lenP; iph_p = (struct iphdr*)(&rx_buf_pP[gtpv1u_msg.msg_buf_offset]); fl.u.ip4.daddr = iph_p->daddr; fl.u.ip4.flowi4_tos = RT_TOS(iph_p->tos); rt = ip_route_output_key(&init_net, &fl.u.ip4); if (rt == NULL) { PR_INFO("GTPURH: Failed to route packet to dst 0x%x. Error: (%d)\n", fl.u.ip4.daddr, err); return NF_DROP; } if (rt->dst.dev == NULL) { pr_info("GTPURH: dst dev NULL\n"); return 0; } skb_p = alloc_skb(LL_MAX_HEADER + ntohs(iph_p->tot_len), GFP_ATOMIC); if (skb_p == NULL) { return 0; } skb_p->priority = rt_tos2priority(iph_p->tos); skb_p->pkt_type = PACKET_OTHERHOST; skb_dst_set(skb_p, dst_clone(&rt->dst)); skb_p->dev = skb_dst(skb_p)->dev; skb_reserve(skb_p, LL_MAX_HEADER + ntohs(iph_p->tot_len)); skb_p->protocol = htons(ETH_P_IP); new_iph_p = (void *)skb_push(skb_p, ntohs(iph_p->tot_len) - (iph_p->ihl << 2)); skb_reset_transport_header(skb_p); new_iph_p = (void *)skb_push(skb_p, iph_p->ihl << 2); memcpy(new_iph_p, iph_p, ntohs(iph_p->tot_len)); skb_reset_network_header(skb_p); skb_reset_inner_network_header(skb_p); skb_reset_inner_transport_header(skb_p); skb_p->mark = gtpv1u_msg.teid; new_iph_p->ttl = ip4_dst_hoplimit(skb_dst(skb_p)); skb_p->ip_summed = CHECKSUM_NONE; if (skb_p->len > dst_mtu(skb_dst(skb_p))) { PR_INFO("GTPURH: bad length\n"); goto free_skb; } ip_local_out(skb_p); return 0; free_skb: pr_info("GTPURH: Dropped skb\n"); kfree_skb(skb_p); return 0; } break; default: PR_INFO(MODULE_NAME":ERROR GTPU msg type %u\n", msg_type); return 0; } }
static unsigned int echo_tg4(struct sk_buff **poldskb, const struct xt_action_param *par) { const struct sk_buff *oldskb = *poldskb; const struct udphdr *oldudp; const struct iphdr *oldip; struct udphdr *newudp, oldudp_buf; struct iphdr *newip; struct sk_buff *newskb; unsigned int data_len; void *payload; /* This allows us to do the copy operation in fewer lines of code. */ if (skb_linearize(*poldskb) < 0) return NF_DROP; oldip = ip_hdr(oldskb); oldudp = skb_header_pointer(oldskb, par->thoff, sizeof(*oldudp), &oldudp_buf); if (oldudp == NULL) return NF_DROP; if (ntohs(oldudp->len) <= sizeof(*oldudp)) return NF_DROP; newskb = alloc_skb(LL_MAX_HEADER + sizeof(*newip) + ntohs(oldudp->len), GFP_ATOMIC); if (newskb == NULL) return NF_DROP; skb_reserve(newskb, LL_MAX_HEADER); newskb->protocol = oldskb->protocol; skb_reset_network_header(newskb); newip = (void *)skb_put(newskb, sizeof(*newip)); newip->version = oldip->version; newip->ihl = sizeof(*newip) / 4; newip->tos = oldip->tos; newip->id = 0; newip->frag_off = htons(IP_DF); newip->protocol = oldip->protocol; newip->check = 0; newip->saddr = oldip->daddr; newip->daddr = oldip->saddr; skb_reset_transport_header(newskb); newudp = (void *)skb_put(newskb, sizeof(*newudp)); newudp->source = oldudp->dest; newudp->dest = oldudp->source; newudp->len = oldudp->len; data_len = htons(oldudp->len) - sizeof(*oldudp); payload = skb_header_pointer(oldskb, par->thoff + sizeof(*oldudp), data_len, NULL); memcpy(skb_put(newskb, data_len), payload, data_len); #if 0 /* * Since no fields are modified (we just swapped things around), * this works too in our specific echo case. */ newudp->check = oldudp->check; #else newudp->check = 0; newudp->check = csum_tcpudp_magic(newip->saddr, newip->daddr, ntohs(newudp->len), IPPROTO_UDP, csum_partial(newudp, ntohs(newudp->len), 0)); #endif /* ip_route_me_harder expects the skb's dst to be set */ skb_dst_set(newskb, dst_clone(skb_dst(oldskb))); if (ip_route_me_harder(&newskb, RTN_UNSPEC) != 0) goto free_nskb; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38) newip->ttl = ip4_dst_hoplimit(skb_dst(newskb)); #else newip->ttl = dst_metric(skb_dst(newskb), RTAX_HOPLIMIT); #endif newskb->ip_summed = CHECKSUM_NONE; /* "Never happens" (?) */ if (newskb->len > dst_mtu(skb_dst(newskb))) goto free_nskb; nf_ct_attach(newskb, *poldskb); ip_local_out(newskb); return NF_DROP; free_nskb: kfree_skb(newskb); return NF_DROP; }
int ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { struct rt6_info *rt; /* Route to the other host */ int mtu; EnterFunction(10); if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 0, (IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL)))) goto tx_error_icmp; if (__ip_vs_is_local_route6(rt)) { dst_release(&rt->dst); IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); } /* MTU checking */ mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); skb->dev = net->loopback_dev; } icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); dst_release(&rt->dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; } /* * Call ip_send_check because we are not sure it is called * after ip_defrag. Is copy-on-write needed? */ skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(skb == NULL)) { dst_release(&rt->dst); return NF_STOLEN; } /* drop old route */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); LeaveFunction(10); return NF_STOLEN; tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); LeaveFunction(10); return NF_STOLEN; }
int ip_forward(struct sk_buff *skb) { struct iphdr *iph; /* Our header */ struct rtable *rt; /* Route we use */ struct ip_options * opt = &(IPCB(skb)->opt); if (skb_warn_if_lro(skb)) goto drop; if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) goto drop; if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb)) return NET_RX_SUCCESS; if (skb->pkt_type != PACKET_HOST) goto drop; skb_forward_csum(skb); /* * According to the RFC, we must first decrease the TTL field. If * that reaches zero, we must reply an ICMP control message telling * that the packet's lifetime expired. */ if (ip_hdr(skb)->ttl <= 1) goto too_many_hops; if (!xfrm4_route_forward(skb)) goto drop; rt = skb->rtable; if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto sr_failed; if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) && (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(dst_mtu(&rt->u.dst))); goto drop; } /* We are about to mangle packet. Copy it! */ #if 0 if( (LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len) > skb_headroom(skb)) { printk("=========%s(%d),dst.dev(%s),reserved(%d),header_len(%d),hard_header_len(%d),needed_headroom(%d)\n",__FUNCTION__,__LINE__,rt->u.dst.dev->name,LL_RESERVED_SPACE(rt->u.dst.dev), rt->u.dst.header_len,rt->u.dst.dev->hard_header_len,rt->u.dst.dev->needed_headroom); printk("======%s(%d),skb->headroom(%d)\n",__FUNCTION__,__LINE__,skb_headroom(skb)); } #endif if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len)) goto drop; iph = ip_hdr(skb); /* Decrease ttl after skb cow done */ ip_decrease_ttl(iph); /* * We now generate an ICMP HOST REDIRECT giving the route * we calculated. */ if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb)) ip_rt_send_redirect(skb); skb->priority = rt_tos2priority(iph->tos); return NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, rt->u.dst.dev, ip_forward_finish); sr_failed: /* * Strict routing permits no gatewaying */ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0); goto drop; too_many_hops: /* Tell the sender its packet died... */ IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); drop: kfree_skb(skb); return NET_RX_DROP; }
/* * ICMP packet transmitter * called by the ip_vs_in_icmp */ int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset, unsigned int hooknum) { struct rtable *rt; /* Route to the other host */ int mtu; int rc; int local; int rt_mode; EnterFunction(10); /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be forwarded directly here, because there is no need to translate address/port back */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { if (cp->packet_xmit) rc = cp->packet_xmit(skb, cp, pp); else rc = NF_ACCEPT; /* do not touch skb anymore */ atomic_inc(&cp->in_pkts); goto out; } /* * mangle and send the packet here (only for VS/NAT) */ /* LOCALNODE from FORWARD hook is not supported */ rt_mode = (hooknum != NF_INET_FORWARD) ? IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(ip_hdr(skb)->tos), rt_mode, NULL))) goto tx_error_icmp; local = rt->rt_flags & RTCF_LOCAL; /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI4\n", __func__, &cp->daddr.ip); goto tx_error_put; } } #endif /* From world but DNAT to loopback address? */ if (local && ipv4_is_loopback(cp->daddr.ip) && rt_is_input_route(skb_rtable(skb))) { IP_VS_DBG(1, "%s(): " "stopping DNAT to loopback %pI4\n", __func__, &cp->daddr.ip); goto tx_error_put; } /* MTU checking */ mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) && !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, offset)) goto tx_error_put; if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; ip_vs_nat_icmp(skb, pp, cp, 0); if (!local) { /* drop the old route when skb is not shared */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); } else { ip_rt_put(rt); /* * Some IPv4 replies get local address from routes, * not from iph, so while we DNAT after routing * we need this second input/output route. */ if (!__ip_vs_reroute_locally(skb)) goto tx_error; } /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); rc = NF_STOLEN; goto out; tx_error_icmp: dst_link_failure(skb); tx_error: dev_kfree_skb(skb); rc = NF_STOLEN; out: LeaveFunction(10); return rc; tx_error_put: ip_rt_put(rt); goto tx_error; }
/* Send RST reply */ static void send_reset(struct sk_buff *oldskb, int hook) { struct sk_buff *nskb; struct iphdr *iph = oldskb->nh.iph; struct tcphdr _otcph, *oth, *tcph; __be16 tmp_port; __be32 tmp_addr; int needs_ack; unsigned int addr_type; /* IP header checks: fragment. */ if (oldskb->nh.iph->frag_off & htons(IP_OFFSET)) return; oth = skb_header_pointer(oldskb, oldskb->nh.iph->ihl * 4, sizeof(_otcph), &_otcph); if (oth == NULL) return; /* No RST for RST. */ if (oth->rst) return; /* Check checksum */ if (nf_ip_checksum(oldskb, hook, iph->ihl * 4, IPPROTO_TCP)) return; /* We need a linear, writeable skb. We also need to expand headroom in case hh_len of incoming interface < hh_len of outgoing interface */ nskb = skb_copy_expand(oldskb, LL_MAX_HEADER, skb_tailroom(oldskb), GFP_ATOMIC); if (!nskb) return; /* This packet will not be the same as the other: clear nf fields */ nf_reset(nskb); nskb->mark = 0; skb_init_secmark(nskb); tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl); /* Swap source and dest */ tmp_addr = nskb->nh.iph->saddr; nskb->nh.iph->saddr = nskb->nh.iph->daddr; nskb->nh.iph->daddr = tmp_addr; tmp_port = tcph->source; tcph->source = tcph->dest; tcph->dest = tmp_port; /* Truncate to length (no data) */ tcph->doff = sizeof(struct tcphdr)/4; skb_trim(nskb, nskb->nh.iph->ihl*4 + sizeof(struct tcphdr)); nskb->nh.iph->tot_len = htons(nskb->len); if (tcph->ack) { needs_ack = 0; tcph->seq = oth->ack_seq; tcph->ack_seq = 0; } else { needs_ack = 1; tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + oldskb->len - oldskb->nh.iph->ihl*4 - (oth->doff<<2)); tcph->seq = 0; } /* Reset flags */ ((u_int8_t *)tcph)[13] = 0; tcph->rst = 1; tcph->ack = needs_ack; tcph->window = 0; tcph->urg_ptr = 0; /* Adjust TCP checksum */ tcph->check = 0; tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr), nskb->nh.iph->saddr, nskb->nh.iph->daddr, csum_partial((char *)tcph, sizeof(struct tcphdr), 0)); /* Set DF, id = 0 */ nskb->nh.iph->frag_off = htons(IP_DF); nskb->nh.iph->id = 0; addr_type = RTN_UNSPEC; if (hook != NF_IP_FORWARD #ifdef CONFIG_BRIDGE_NETFILTER || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED) #endif ) addr_type = RTN_LOCAL; if (ip_route_me_harder(&nskb, addr_type)) goto free_nskb; nskb->ip_summed = CHECKSUM_NONE; /* Adjust IP TTL */ nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT); /* Adjust IP checksum */ nskb->nh.iph->check = 0; nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph, nskb->nh.iph->ihl); /* "Never happens" */ if (nskb->len > dst_mtu(nskb->dst)) goto free_nskb; nf_ct_attach(nskb, oldskb); NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev, dst_output); return; free_nskb: kfree_skb(nskb); }
int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset, unsigned int hooknum) { struct rt6_info *rt; /* Route to the other host */ int mtu; int rc; int local; int rt_mode; EnterFunction(10); /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be forwarded directly here, because there is no need to translate address/port back */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { if (cp->packet_xmit) rc = cp->packet_xmit(skb, cp, pp); else rc = NF_ACCEPT; /* do not touch skb anymore */ atomic_inc(&cp->in_pkts); goto out; } /* * mangle and send the packet here (only for VS/NAT) */ /* LOCALNODE from FORWARD hook is not supported */ rt_mode = (hooknum != NF_INET_FORWARD) ? IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 0, rt_mode))) goto tx_error_icmp; local = __ip_vs_is_local_route6(rt); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI6\n", __func__, &cp->daddr.in6); goto tx_error_put; } } #endif /* From world but DNAT to loopback address? */ if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { IP_VS_DBG(1, "%s(): " "stopping DNAT to loopback %pI6\n", __func__, &cp->daddr.in6); goto tx_error_put; } /* MTU checking */ mtu = dst_mtu(&rt->dst); if (skb->len > mtu && !skb_is_gso(skb)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); skb->dev = net->loopback_dev; } icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, offset)) goto tx_error_put; if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; ip_vs_nat_icmp_v6(skb, pp, cp, 0); if (!local || !skb->dev) { /* drop the old route when skb is not shared */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); } else { /* destined to loopback, do we need to change route? */ dst_release(&rt->dst); } /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); rc = NF_STOLEN; goto out; tx_error_icmp: dst_link_failure(skb); tx_error: dev_kfree_skb(skb); rc = NF_STOLEN; out: LeaveFunction(10); return rc; tx_error_put: dst_release(&rt->dst); goto tx_error; }
int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags, int dontfrag) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; struct sk_buff *skb, *skb_prev = NULL; unsigned int maxfraglen, fragheaderlen, mtu; int exthdrlen; int dst_exthdrlen; int hh_len; int copy; int err; int offset = 0; __u8 tx_flags = 0; if (flags&MSG_PROBE) return 0; cork = &inet->cork.base; if (skb_queue_empty(&sk->sk_write_queue)) { /* * setup for corking */ if (opt) { if (WARN_ON(np->cork.opt)) return -EINVAL; np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation); if (unlikely(np->cork.opt == NULL)) return -ENOBUFS; np->cork.opt->tot_len = opt->tot_len; np->cork.opt->opt_flen = opt->opt_flen; np->cork.opt->opt_nflen = opt->opt_nflen; np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation); if (opt->dst0opt && !np->cork.opt->dst0opt) return -ENOBUFS; np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation); if (opt->dst1opt && !np->cork.opt->dst1opt) return -ENOBUFS; np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation); if (opt->hopopt && !np->cork.opt->hopopt) return -ENOBUFS; np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation); if (opt->srcrt && !np->cork.opt->srcrt) return -ENOBUFS; /* need source address above miyazawa*/ } dst_hold(&rt->dst); cork->dst = &rt->dst; inet->cork.fl.u.ip6 = *fl6; np->cork.hop_limit = hlimit; np->cork.tclass = tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? rt->dst.dev->mtu : dst_mtu(&rt->dst); else mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? rt->dst.dev->mtu : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; } cork->fragsize = mtu; if (dst_allfrag(rt->dst.path)) cork->flags |= IPCORK_ALLFRAG; cork->length = 0; exthdrlen = (opt ? opt->opt_flen : 0); length += exthdrlen; transhdrlen += exthdrlen; dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; } else { rt = (struct rt6_info *)cork->dst; fl6 = &inet->cork.fl.u.ip6; opt = np->cork.opt; transhdrlen = 0; exthdrlen = 0; dst_exthdrlen = 0; mtu = cork->fragsize; } hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); return -EMSGSIZE; } } /* For UDP, check if TX timestamp is enabled */ if (sk->sk_type == SOCK_DGRAM) sock_tx_timestamp(sk, &tx_flags); /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. * Otherwise, we need to reserve fragment header and * fragment alignment (= 8-15 octects, in total). * * Note that we may need to "move" the data from the tail of * of the buffer to the new fragment when we split * the message. * * FIXME: It may be fragmented into multiple chunks * at once if non-fragmentable extension headers * are too large. * --yoshfuji */ if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_RAW)) { ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); return -EMSGSIZE; } skb = skb_peek_tail(&sk->sk_write_queue); cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags, rt); if (err) goto error; return 0; } if (!skb) goto alloc_new_skb; while (length > 0) { /* Check if the remaining data fits into current packet. */ copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; if (copy < length) copy = maxfraglen - skb->len; if (copy <= 0) { char *data; unsigned int datalen; unsigned int fraglen; unsigned int fraggap; unsigned int alloclen; alloc_new_skb: /* There's no room in the current skb */ if (skb) fraggap = skb->len - maxfraglen; else fraggap = 0; /* update mtu and maxfraglen if necessary */ if (skb == NULL || skb_prev == NULL) ip6_append_data_mtu(&mtu, &maxfraglen, fragheaderlen, skb, rt, np->pmtudisc == IPV6_PMTUDISC_PROBE); skb_prev = skb; /* * If remaining data exceeds the mtu, * we know we need more fragment(s). */ datalen = length + fraggap; if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; else alloclen = datalen + fragheaderlen; alloclen += dst_exthdrlen; if (datalen != length + fraggap) { /* * this is not the last fragment, the trailer * space is regarded as data space. */ datalen += rt->dst.trailer_len; } alloclen += rt->dst.trailer_len; fraglen = datalen + fragheaderlen; /* * We just reserve space for fragment header. * Note: this may be overallocation if the message * (without MSG_MORE) fits into the MTU. */ alloclen += sizeof(struct frag_hdr); if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen + hh_len, (flags & MSG_DONTWAIT), &err); } else { skb = NULL; if (atomic_read(&sk->sk_wmem_alloc) <= 2 * sk->sk_sndbuf) skb = sock_wmalloc(sk, alloclen + hh_len, 1, sk->sk_allocation); if (unlikely(skb == NULL)) err = -ENOBUFS; else { /* Only the initial fragment * is time stamped. */ tx_flags = 0; } } if (skb == NULL) goto error; /* * Fill in the control structures */ skb->protocol = htons(ETH_P_IPV6); skb->ip_summed = CHECKSUM_NONE; skb->csum = 0; /* reserve for fragmentation and ipsec header */ skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + dst_exthdrlen); if (sk->sk_type == SOCK_DGRAM) skb_shinfo(skb)->tx_flags = tx_flags; /* * Find where to start putting bytes */ data = skb_put(skb, fraglen); skb_set_network_header(skb, exthdrlen); data += fragheaderlen; skb->transport_header = (skb->network_header + fragheaderlen); if (fraggap) { skb->csum = skb_copy_and_csum_bits( skb_prev, maxfraglen, data + transhdrlen, fraggap, 0); skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); data += fraggap; pskb_trim_unique(skb_prev, maxfraglen); } copy = datalen - transhdrlen - fraggap; if (copy < 0) { err = -EINVAL; kfree_skb(skb); goto error; } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; } offset += copy; length -= datalen - fraggap; transhdrlen = 0; exthdrlen = 0; dst_exthdrlen = 0; /* * Put the packet on the pending queue */ __skb_queue_tail(&sk->sk_write_queue, skb); continue; } if (copy > length) copy = length; if (!(rt->dst.dev->features&NETIF_F_SG)) { unsigned int off; off = skb->len; if (getfrag(from, skb_put(skb, copy), offset, copy, off, skb) < 0) { __skb_trim(skb, off); err = -EFAULT; goto error; } } else { int i = skb_shinfo(skb)->nr_frags; struct page_frag *pfrag = sk_page_frag(sk); err = -ENOMEM; if (!sk_page_frag_refill(sk, pfrag)) goto error; if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { err = -EMSGSIZE; if (i == MAX_SKB_FRAGS) goto error; __skb_fill_page_desc(skb, i, pfrag->page, pfrag->offset, 0); skb_shinfo(skb)->nr_frags = ++i; get_page(pfrag->page); } copy = min_t(int, copy, pfrag->size - pfrag->offset); if (getfrag(from, page_address(pfrag->page) + pfrag->offset, offset, copy, skb->len, skb) < 0) goto error_efault; pfrag->offset += copy; skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); skb->len += copy; skb->data_len += copy; skb->truesize += copy; atomic_add(copy, &sk->sk_wmem_alloc); } offset += copy; length -= copy; } return 0; error_efault: err = -EFAULT; error: cork->length -= length; IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; }
/* * NAT transmitter (only for outside-to-inside nat forwarding) * Not used for related ICMP */ int ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { struct rtable *rt; /* Route to the other host */ int mtu; struct iphdr *iph = ip_hdr(skb); int local; EnterFunction(10); /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { __be16 _pt, *p; p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt); if (p == NULL) goto tx_error; ip_vs_conn_fill_cport(cp, *p); IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(iph->tos), IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR, NULL))) goto tx_error_icmp; local = rt->rt_flags & RTCF_LOCAL; /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " "stopping DNAT to local address"); goto tx_error_put; } } #endif /* From world but DNAT to loopback address? */ if (local && ipv4_is_loopback(cp->daddr.ip) && rt_is_input_route(skb_rtable(skb))) { IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " "stopping DNAT to loopback address"); goto tx_error_put; } /* MTU checking */ mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); goto tx_error_put; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, sizeof(struct iphdr))) goto tx_error_put; if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) goto tx_error_put; ip_hdr(skb)->daddr = cp->daddr.ip; ip_send_check(ip_hdr(skb)); if (!local) { /* drop old route */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); } else { ip_rt_put(rt); /* * Some IPv4 replies get local address from routes, * not from iph, so while we DNAT after routing * we need this second input/output route. */ if (!__ip_vs_reroute_locally(skb)) goto tx_error; } IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */ /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); LeaveFunction(10); return NF_STOLEN; tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); LeaveFunction(10); return NF_STOLEN; tx_error_put: ip_rt_put(rt); goto tx_error; }
int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct ipv6_txoptions *opt, int ipfragok) { struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl->fl6_dst; struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr; u8 proto = fl->proto; int seg_len = skb->len; int hlimit = -1; int tclass = 0; u32 mtu; if (opt) { unsigned int head_room; /* First: exthdrs may take lots of space (~8K for now) MAX_HEADER is not enough. */ head_room = opt->opt_nflen + opt->opt_flen; seg_len += head_room; head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); if (skb_headroom(skb) < head_room) { struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); if (skb2 == NULL) { IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -ENOBUFS; } kfree_skb(skb); skb = skb2; if (sk) skb_set_owner_w(skb, sk); } if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt->opt_nflen) ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); } skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); /* Allow local fragmentation. */ if (ipfragok) skb->local_df = 1; /* * Fill in the IPv6 header */ if (np) { tclass = np->tclass; hlimit = np->hop_limit; } if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; hdr->hop_limit = hlimit; ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); ipv6_addr_copy(&hdr->daddr, first_hop); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, dst_output); } if (net_ratelimit()) printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; }
int ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { struct rt6_info *rt; /* Route to the other host */ int mtu; int local; EnterFunction(10); /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { __be16 _pt, *p; p = skb_header_pointer(skb, sizeof(struct ipv6hdr), sizeof(_pt), &_pt); if (p == NULL) goto tx_error; ip_vs_conn_fill_cport(cp, *p); IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 0, (IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR)))) goto tx_error_icmp; local = __ip_vs_is_local_route6(rt); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): " "stopping DNAT to local address"); goto tx_error_put; } } #endif /* From world but DNAT to loopback address? */ if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): " "stopping DNAT to loopback address"); goto tx_error_put; } /* MTU checking */ mtu = dst_mtu(&rt->dst); if (skb->len > mtu && !skb_is_gso(skb)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); skb->dev = net->loopback_dev; } icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): frag needed for"); goto tx_error_put; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) goto tx_error_put; if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) goto tx_error; ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6); if (!local || !skb->dev) { /* drop the old route when skb is not shared */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); } else { /* destined to loopback, do we need to change route? */ dst_release(&rt->dst); } IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */ /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); LeaveFunction(10); return NF_STOLEN; tx_error_icmp: dst_link_failure(skb); tx_error: LeaveFunction(10); kfree_skb(skb); return NF_STOLEN; tx_error_put: dst_release(&rt->dst); goto tx_error; }
int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; unsigned int maxfraglen, fragheaderlen; int exthdrlen; int hh_len; int mtu; int copy; int err; int offset = 0; int csummode = CHECKSUM_NONE; if (flags&MSG_PROBE) return 0; if (skb_queue_empty(&sk->sk_write_queue)) { /* * setup for corking */ if (opt) { if (WARN_ON(np->cork.opt)) return -EINVAL; np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); if (unlikely(np->cork.opt == NULL)) return -ENOBUFS; np->cork.opt->tot_len = opt->tot_len; np->cork.opt->opt_flen = opt->opt_flen; np->cork.opt->opt_nflen = opt->opt_nflen; np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation); if (opt->dst0opt && !np->cork.opt->dst0opt) return -ENOBUFS; np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation); if (opt->dst1opt && !np->cork.opt->dst1opt) return -ENOBUFS; np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation); if (opt->hopopt && !np->cork.opt->hopopt) return -ENOBUFS; np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation); if (opt->srcrt && !np->cork.opt->srcrt) return -ENOBUFS; /* need source address above miyazawa*/ } dst_hold(&rt->u.dst); inet->cork.dst = &rt->u.dst; inet->cork.fl = *fl; np->cork.hop_limit = hlimit; np->cork.tclass = tclass; mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; } inet->cork.fragsize = mtu; if (dst_allfrag(rt->u.dst.path)) inet->cork.flags |= IPCORK_ALLFRAG; inet->cork.length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; length += exthdrlen; transhdrlen += exthdrlen; } else { rt = (struct rt6_info *)inet->cork.dst; fl = &inet->cork.fl; opt = np->cork.opt; transhdrlen = 0; exthdrlen = 0; mtu = inet->cork.fragsize; } hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); return -EMSGSIZE; } } /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. * Otherwise, we need to reserve fragment header and * fragment alignment (= 8-15 octects, in total). * * Note that we may need to "move" the data from the tail of * of the buffer to the new fragment when we split * the message. * * FIXME: It may be fragmented into multiple chunks * at once if non-fragmentable extension headers * are too large. * --yoshfuji */ inet->cork.length += length; if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && (rt->u.dst.dev->features & NETIF_F_UFO)) { err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags); if (err) goto error; return 0; } if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) goto alloc_new_skb; while (length > 0) { /* Check if the remaining data fits into current packet. */ copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; if (copy < length) copy = maxfraglen - skb->len; if (copy <= 0) { char *data; unsigned int datalen; unsigned int fraglen; unsigned int fraggap; unsigned int alloclen; struct sk_buff *skb_prev; alloc_new_skb: skb_prev = skb; /* There's no room in the current skb */ if (skb_prev) fraggap = skb_prev->len - maxfraglen; else fraggap = 0; /* * If remaining data exceeds the mtu, * we know we need more fragment(s). */ datalen = length + fraggap; if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) datalen = maxfraglen - fragheaderlen; fraglen = datalen + fragheaderlen; if ((flags & MSG_MORE) && !(rt->u.dst.dev->features&NETIF_F_SG)) alloclen = mtu; else alloclen = datalen + fragheaderlen; /* * The last fragment gets additional space at tail. * Note: we overallocate on fragments with MSG_MODE * because we have no idea if we're the last one. */ if (datalen == length + fraggap) alloclen += rt->u.dst.trailer_len; /* * We just reserve space for fragment header. * Note: this may be overallocation if the message * (without MSG_MORE) fits into the MTU. */ alloclen += sizeof(struct frag_hdr); if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen + hh_len, (flags & MSG_DONTWAIT), &err); } else { skb = NULL; if (atomic_read(&sk->sk_wmem_alloc) <= 2 * sk->sk_sndbuf) skb = sock_wmalloc(sk, alloclen + hh_len, 1, sk->sk_allocation); if (unlikely(skb == NULL)) err = -ENOBUFS; } if (skb == NULL) goto error; /* * Fill in the control structures */ skb->ip_summed = csummode; skb->csum = 0; /* reserve for fragmentation */ skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); /* * Find where to start putting bytes */ data = skb_put(skb, fraglen); skb_set_network_header(skb, exthdrlen); data += fragheaderlen; skb->transport_header = (skb->network_header + fragheaderlen); if (fraggap) { skb->csum = skb_copy_and_csum_bits( skb_prev, maxfraglen, data + transhdrlen, fraggap, 0); skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); data += fraggap; pskb_trim_unique(skb_prev, maxfraglen); } copy = datalen - transhdrlen - fraggap; if (copy < 0) { err = -EINVAL; kfree_skb(skb); goto error; } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; } offset += copy; length -= datalen - fraggap; transhdrlen = 0; exthdrlen = 0; csummode = CHECKSUM_NONE; /* * Put the packet on the pending queue */ __skb_queue_tail(&sk->sk_write_queue, skb); continue; } if (copy > length) copy = length; if (!(rt->u.dst.dev->features&NETIF_F_SG)) { unsigned int off; off = skb->len; if (getfrag(from, skb_put(skb, copy), offset, copy, off, skb) < 0) { __skb_trim(skb, off); err = -EFAULT; goto error; } } else { int i = skb_shinfo(skb)->nr_frags; skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; struct page *page = sk->sk_sndmsg_page; int off = sk->sk_sndmsg_off; unsigned int left; if (page && (left = PAGE_SIZE - off) > 0) { if (copy >= left) copy = left; if (page != frag->page) { if (i == MAX_SKB_FRAGS) { err = -EMSGSIZE; goto error; } get_page(page); skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); frag = &skb_shinfo(skb)->frags[i]; } } else if(i < MAX_SKB_FRAGS) { if (copy > PAGE_SIZE) copy = PAGE_SIZE; page = alloc_pages(sk->sk_allocation, 0); if (page == NULL) { err = -ENOMEM; goto error; } sk->sk_sndmsg_page = page; sk->sk_sndmsg_off = 0; skb_fill_page_desc(skb, i, page, 0, 0); frag = &skb_shinfo(skb)->frags[i]; } else { err = -EMSGSIZE; goto error; } if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { err = -EFAULT; goto error; } sk->sk_sndmsg_off += copy; frag->size += copy; skb->len += copy; skb->data_len += copy; skb->truesize += copy; atomic_add(copy, &sk->sk_wmem_alloc); } offset += copy; length -= copy; } return 0; error: inet->cork.length -= length; IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; }
/* Send RST reply */ static void send_reset(struct sk_buff *oldskb, int hook) { struct sk_buff *nskb; struct iphdr *iph = oldskb->nh.iph; struct tcphdr _otcph, *oth, *tcph; struct rtable *rt; u_int16_t tmp_port; u_int32_t tmp_addr; unsigned int tcplen; int needs_ack; int hh_len; /* IP header checks: fragment. */ if (oldskb->nh.iph->frag_off & htons(IP_OFFSET)) return; oth = skb_header_pointer(oldskb, oldskb->nh.iph->ihl * 4, sizeof(_otcph), &_otcph); if (oth == NULL) return; /* No RST for RST. */ if (oth->rst) return; /* Check checksum */ tcplen = oldskb->len - iph->ihl * 4; if (((hook != NF_IP_LOCAL_IN && oldskb->ip_summed != CHECKSUM_HW) || (hook == NF_IP_LOCAL_IN && oldskb->ip_summed != CHECKSUM_UNNECESSARY)) && csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP, oldskb->ip_summed == CHECKSUM_HW ? oldskb->csum : skb_checksum(oldskb, iph->ihl * 4, tcplen, 0))) return; if ((rt = route_reverse(oldskb, oth, hook)) == NULL) return; hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); /* We need a linear, writeable skb. We also need to expand headroom in case hh_len of incoming interface < hh_len of outgoing interface */ nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb), GFP_ATOMIC); if (!nskb) { dst_release(&rt->u.dst); return; } dst_release(nskb->dst); nskb->dst = &rt->u.dst; /* This packet will not be the same as the other: clear nf fields */ nf_reset(nskb); nskb->nfmark = 0; #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(nskb->nf_bridge); nskb->nf_bridge = NULL; #endif tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl); /* Swap source and dest */ tmp_addr = nskb->nh.iph->saddr; nskb->nh.iph->saddr = nskb->nh.iph->daddr; nskb->nh.iph->daddr = tmp_addr; tmp_port = tcph->source; tcph->source = tcph->dest; tcph->dest = tmp_port; /* Truncate to length (no data) */ tcph->doff = sizeof(struct tcphdr)/4; skb_trim(nskb, nskb->nh.iph->ihl*4 + sizeof(struct tcphdr)); nskb->nh.iph->tot_len = htons(nskb->len); if (tcph->ack) { needs_ack = 0; tcph->seq = oth->ack_seq; tcph->ack_seq = 0; } else { needs_ack = 1; tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + oldskb->len - oldskb->nh.iph->ihl*4 - (oth->doff<<2)); tcph->seq = 0; } /* Reset flags */ ((u_int8_t *)tcph)[13] = 0; tcph->rst = 1; tcph->ack = needs_ack; tcph->window = 0; tcph->urg_ptr = 0; /* Adjust TCP checksum */ tcph->check = 0; tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr), nskb->nh.iph->saddr, nskb->nh.iph->daddr, csum_partial((char *)tcph, sizeof(struct tcphdr), 0)); /* Adjust IP TTL, DF */ nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT); /* Set DF, id = 0 */ nskb->nh.iph->frag_off = htons(IP_DF); nskb->nh.iph->id = 0; /* Adjust IP checksum */ nskb->nh.iph->check = 0; nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph, nskb->nh.iph->ihl); /* "Never happens" */ if (nskb->len > dst_mtu(nskb->dst)) goto free_nskb; nf_ct_attach(nskb, oldskb); NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev, dst_output); return; free_nskb: kfree_skb(nskb); }