static void ip_expire(unsigned long arg) { struct ipq *qp; qp = (struct ipq *)arg; /* * Send an ICMP "Fragment Reassembly Timeout" message. */ ip_statistics.IpReasmTimeout++; ip_statistics.IpReasmFails++; /* This if is always true... shrug */ if(qp->fragments!=NULL) icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0, qp->dev); /* * Nuke the fragment queue. */ ip_free(qp); }
static inline int ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct iphdr *iph = ip_hdr(skb); int encap_limit = -1; struct flowi fl; __u8 dsfield; __u32 mtu; int err; if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) || !ip6_tnl_xmit_ctl(t)) return -1; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; memcpy(&fl, &t->fl, sizeof (fl)); fl.proto = IPPROTO_IPIP; dsfield = ipv4_get_dsfield(iph); if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) & IPV6_TCLASS_MASK; err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); if (err != 0) { /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); return -1; } return 0; }
static bool ipv6_validate_packet_len(struct sk_buff *skb_in, struct sk_buff *skb_out) { struct ipv6hdr *ip6_hdr = ipv6_hdr(skb_out); struct hdr_iterator iterator = HDR_ITERATOR_INIT(ip6_hdr); unsigned int ipv6_mtu; unsigned int ipv4_mtu; if (skb_out->len <= skb_out->dev->mtu) return true; hdr_iterator_last(&iterator); if (iterator.hdr_type == IPPROTO_ICMPV6) { struct icmp6hdr *icmpv6_hdr = icmp6_hdr(skb_out); if (is_icmp6_error(icmpv6_hdr->icmp6_type)) { int new_packet_len = skb_out->dev->mtu; int l3_payload_len = new_packet_len - (iterator.data - (void *) ip6_hdr); skb_trim(skb_out, new_packet_len); ip6_hdr->payload_len = cpu_to_be16(l3_payload_len); icmpv6_hdr->icmp6_cksum = 0; icmpv6_hdr->icmp6_cksum = csum_ipv6_magic(&ip6_hdr->saddr, &ip6_hdr->daddr, l3_payload_len, IPPROTO_ICMPV6, csum_partial(icmpv6_hdr, l3_payload_len, 0)); return true; } } ipv6_mtu = skb_out->dev->mtu; ipv4_mtu = skb_in->dev->mtu; log_debug("Packet is too large for the outgoing MTU and IPv6 routers don't do fragmentation. " "Dropping..."); icmp_send(skb_in, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, cpu_to_be32(min_uint(ipv6_mtu, ipv4_mtu + 20))); return false; }
static int icmp_hnd_echo_request(const struct icmphdr *icmph, const struct icmpbody_echo *echo_req, struct sk_buff *skb) { size_t len; struct icmpbody_echo *echo_rep; if (icmph->code != 0) { skb_free(skb); return 0; /* error: bad code */ } len = ip_data_length(ip_hdr(skb)); if (sizeof *icmph + sizeof *echo_req > len) { skb_free(skb); return 0; /* error: invalid length */ } len -= sizeof *icmph + sizeof *echo_req; echo_rep = &icmp_hdr(skb)->body[0].echo; return icmp_send(ICMP_ECHO_REPLY, 0, echo_rep, sizeof *echo_rep + len, skb); }
/** * \ingroup tcpcmd * \b PING-Befehl Sende "Ping" an angegebene Adresse */ int16_t command_ping (char *outbuffer) { uint32_t var = 0; if (getLong(&var)) { unsigned long dest_ip=0; for (uint8_t i=0; i<4; ++i) { dest_ip += (var<<(8*i)); if (!getLong(&var)) break; } //ARP Request senden arp_request (dest_ip); //ICMP-Nachricht Type=8 Code=0: Echo-Anfrage //TODO: Sequenznummer, Identifier icmp_send(dest_ip,0x08,0x00,1,1); } return 0; }
int xfrm4_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; struct dst_entry *dst; struct iphdr *iph = skb->nh.iph; if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) goto out; IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; if (!(iph->frag_off & htons(IP_DF))) goto out; dst = skb->dst; mtu = dst_pmtu(dst) - dst->header_len - dst->trailer_len; if (skb->len > mtu) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); ret = -EMSGSIZE; } out: return ret; }
static int ipip6_rcv(struct sk_buff *skb) { struct iphdr *iph; struct ip_tunnel *tunnel; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; iph = skb->nh.iph; read_lock(&ipip6_lock); if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) { secpath_reset(skb); skb->mac.raw = skb->nh.raw; skb->nh.raw = skb->data; memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); IPCB(skb)->flags = 0; skb->protocol = htons(ETH_P_IPV6); skb->pkt_type = PACKET_HOST; tunnel->stat.rx_packets++; tunnel->stat.rx_bytes += skb->len; skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; nf_reset(skb); ipip6_ecn_decapsulate(iph, skb); netif_rx(skb); read_unlock(&ipip6_lock); return 0; } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); kfree_skb(skb); read_unlock(&ipip6_lock); out: return 0; }
static int gre_rcv(struct sk_buff *skb) { struct tnl_ptk_info tpi; bool csum_err = false; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { /* Looped back packet, drop it! */ if (rt_is_output_route(skb_rtable(skb))) goto drop; } #endif if (parse_gre_header(skb, &tpi, &csum_err) < 0) goto drop; if (ipgre_rcv(skb, &tpi) == PACKET_RCVD) return 0; icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); drop: kfree_skb(skb); return 0; }
int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { int ret; struct xfrm4_protocol *handler; struct xfrm4_protocol __rcu **head = proto_handlers(nexthdr); XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; XFRM_SPI_SKB_CB(skb)->family = AF_INET; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); if (!head) goto out; for_each_protocol_rcu(*head, handler) if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL) return ret; out: icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); kfree_skb(skb); return 0; }
/* * Pass or drop the packet. * Called by ip_vs_in, when the virtual service is available but * no destination is available for a new connection. */ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_protocol *pp) { __be16 _ports[2], *pptr; struct iphdr *iph = ip_hdr(skb); pptr = skb_header_pointer(skb, iph->ihl*4, sizeof(_ports), _ports); if (pptr == NULL) { ip_vs_service_put(svc); return NF_DROP; } /* if it is fwmark-based service, the cache_bypass sysctl is up and the destination is RTN_UNICAST (and not local), then create a cache_bypass connection entry */ if (sysctl_ip_vs_cache_bypass && svc->fwmark && (inet_addr_type(iph->daddr) == RTN_UNICAST)) { int ret, cs; struct ip_vs_conn *cp; ip_vs_service_put(svc); /* create a new connection entry */ IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n"); cp = ip_vs_conn_new(iph->protocol, iph->saddr, pptr[0], iph->daddr, pptr[1], 0, 0, IP_VS_CONN_F_BYPASS, NULL); if (cp == NULL) return NF_DROP; /* statistics */ ip_vs_in_stats(cp, skb); /* set state */ cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); /* transmit the first SYN packet */ ret = cp->packet_xmit(skb, cp, pp); /* do not touch skb anymore */ atomic_inc(&cp->in_pkts); ip_vs_conn_put(cp); return ret; } /* * When the virtual ftp service is presented, packets destined * for other services on the VIP may get here (except services * listed in the ipvs table), pass the packets, because it is * not ipvs job to decide to drop the packets. */ if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) { ip_vs_service_put(svc); return NF_ACCEPT; } ip_vs_service_put(svc); /* * Notify the client that the destination is unreachable, and * release the socket buffer. * Since it is in IP layer, the TCP socket is not actually * created, the TCP RST packet cannot be sent, instead that * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ */ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); return NF_DROP; }
static unsigned int fw_in(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { int ret = FW_BLOCK; u_int16_t redirpt; /* Assume worse case: any hook could change packet */ (*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED; if ((*pskb)->ip_summed == CHECKSUM_HW) if (skb_checksum_help(*pskb, (out == NULL))) return NF_DROP; switch (hooknum) { case NF_IP_PRE_ROUTING: if (fwops->fw_acct_in) fwops->fw_acct_in(fwops, PF_INET, (struct net_device *)in, &redirpt, pskb); if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { *pskb = ip_ct_gather_frags(*pskb); if (!*pskb) return NF_STOLEN; } ret = fwops->fw_input(fwops, PF_INET, (struct net_device *)in, &redirpt, pskb); break; case NF_IP_FORWARD: /* Connection will only be set if it was demasqueraded: if so, skip forward chain. */ if ((*pskb)->nfct) ret = FW_ACCEPT; else ret = fwops->fw_forward(fwops, PF_INET, (struct net_device *)out, &redirpt, pskb); break; case NF_IP_POST_ROUTING: ret = fwops->fw_output(fwops, PF_INET, (struct net_device *)out, &redirpt, pskb); if (ret == FW_ACCEPT || ret == FW_SKIP) { if (fwops->fw_acct_out) fwops->fw_acct_out(fwops, PF_INET, (struct net_device *)out, &redirpt, pskb); /* ip_conntrack_confirm return NF_DROP or NF_ACCEPT */ if (ip_conntrack_confirm(*pskb) == NF_DROP) ret = FW_BLOCK; } break; } switch (ret) { case FW_REJECT: { /* Alexey says: * * Generally, routing is THE FIRST thing to make, when * packet enters IP stack. Before packet is routed you * cannot call any service routines from IP stack. */ struct iphdr *iph = (*pskb)->nh.iph; if ((*pskb)->dst != NULL || ip_route_input(*pskb, iph->daddr, iph->saddr, iph->tos, (struct net_device *)in) == 0) icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); return NF_DROP; } case FW_ACCEPT: case FW_SKIP: if (hooknum == NF_IP_PRE_ROUTING) { check_for_demasq(pskb); check_for_redirect(*pskb); } else if (hooknum == NF_IP_POST_ROUTING) { check_for_unredirect(*pskb); /* Handle ICMP errors from client here */ if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP && (*pskb)->nfct) check_for_masq_error(pskb); } return NF_ACCEPT; case FW_MASQUERADE: if (hooknum == NF_IP_FORWARD) { #ifdef CONFIG_IP_VS /* check if it is for ip_vs */ if (check_for_ip_vs_out(pskb, okfn) == NF_STOLEN) return NF_STOLEN; #endif return do_masquerade(pskb, out); } else return NF_ACCEPT; case FW_REDIRECT: if (hooknum == NF_IP_PRE_ROUTING) return do_redirect(*pskb, in, redirpt); else return NF_ACCEPT; default: /* FW_BLOCK */ return NF_DROP; } }
int icmp_error(struct iphdr * __ip, int __type, int __code, struct ifnet * __if) { // uint8_t icmp_buf[sizeof(struct icmp) + IP_MAXOPTLEN + 8]; struct icmp * icp; int len; /* TODO: if (type != ICMP_REDIRECT) icmpstat.icps_error++; */ /* Don't send error if not the first fragment Ref.: TCP/IP Illustrated Volume 2, pg. 325 */ icp = (struct icmp *)__ip->opt; if (__ip->ip_off & ~(IP_MF | IP_DF)) { DCC_LOG1(LOG_WARNING, "not first fragment!; ip_off = %04x", __ip->ip_off); return -1; } len = MIN((__ip->hlen << 2) + 8 , ntohs(__ip->tot_len)); memcpy((void *)&(icp->icmp_ip), __ip, len); /* if ((ip->proto == IPPROTO_ICMP) && (type != ICMP_REDIRECT) && (totlen >= iplen + ICMP_MINLEN) ) { TODO: icmpstat.icps_oldicmp++; return -1; } */ /* Don't send error in response to a multicast or broadcast packet */ if (in_broadcast(__ip->daddr, __if) || IN_MULTICAST(__ip->daddr)) { DCC_LOG(LOG_INFO, "broadcast/multicast!"); return -1; } if ((unsigned int)__type > ICMP_MAXTYPE) { DCC_LOG(LOG_PANIC, "icmp_error"); return -1; } /* TODO: icmpstat.icps_outhist[type]++; */ icp->icmp_type = __type; icp->icmp_void = 0; if (__type == ICMP_PARAMETERPROB) { icp->icmp_pptr = __code; __code = 0; } else { if ((__type == ICMP_DEST_UNREACH) && (__code == ICMP_FRAG_NEEDED) && (__if)) { icp->icmp_nextmtu = htons(__if->if_mtu); } } icp->icmp_code = __code; return icmp_send(__ip, icp, len); }
/* * Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ static void ip_expire(struct timer_list *t) { struct inet_frag_queue *frag = from_timer(frag, t, timer); struct ipq *qp; struct net *net; qp = container_of(frag, struct ipq, q); net = container_of(qp->q.net, struct net, ipv4.frags); rcu_read_lock(); spin_lock(&qp->q.lock); if (qp->q.flags & INET_FRAG_COMPLETE) goto out; ipq_kill(qp); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); if (!inet_frag_evicting(&qp->q)) { struct sk_buff *clone, *head = qp->q.fragments; const struct iphdr *iph; int err; __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) goto out; head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) goto out; /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); if (err) goto out; /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ if (frag_expire_skip_icmp(qp->user) && (skb_rtable(head)->rt_type != RTN_LOCAL)) goto out; clone = skb_clone(head, GFP_ATOMIC); /* Send an ICMP "Fragment Reassembly Timeout" message. */ if (clone) { spin_unlock(&qp->q.lock); icmp_send(clone, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); consume_skb(clone); goto out_rcu_unlock; } } out: spin_unlock(&qp->q.lock); out_rcu_unlock: rcu_read_unlock(); ipq_put(qp); }
/* * NAT transmitter (only for outside-to-inside nat forwarding) * Not used for related ICMP */ int ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { struct rtable *rt; /* Route to the other host */ int mtu; struct iphdr *iph = ip_hdr(skb); int local; EnterFunction(10); /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { __be16 _pt, *p; p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt); if (p == NULL) goto tx_error; ip_vs_conn_fill_cport(cp, *p); IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(iph->tos), IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR))) goto tx_error_icmp; local = rt->rt_flags & RTCF_LOCAL; /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " "stopping DNAT to local address"); goto tx_error_put; } } #endif /* From world but DNAT to loopback address? */ if (local && ipv4_is_loopback(rt->rt_dst) && rt_is_input_route(skb_rtable(skb))) { IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " "stopping DNAT to loopback address"); goto tx_error_put; } /* MTU checking */ mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); goto tx_error_put; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, sizeof(struct iphdr))) goto tx_error_put; if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) goto tx_error_put; ip_hdr(skb)->daddr = cp->daddr.ip; ip_send_check(ip_hdr(skb)); if (!local) { /* drop old route */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); } else { ip_rt_put(rt); /* * Some IPv4 replies get local address from routes, * not from iph, so while we DNAT after routing * we need this second input/output route. */ if (!__ip_vs_reroute_locally(skb)) goto tx_error; } IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */ /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); LeaveFunction(10); return NF_STOLEN; tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); LeaveFunction(10); return NF_STOLEN; tx_error_put: ip_rt_put(rt); goto tx_error; }
int ip_forward(struct sk_buff *skb) { struct iphdr *iph; /* Our header */ struct rtable *rt; /* Route we use */ struct ip_options * opt = &(IPCB(skb)->opt); if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) goto drop; if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb)) return NET_RX_SUCCESS; if (skb->pkt_type != PACKET_HOST) goto drop; skb->ip_summed = CHECKSUM_NONE; /* * According to the RFC, we must first decrease the TTL field. If * that reaches zero, we must reply an ICMP control message telling * that the packet's lifetime expired. */ iph = skb->nh.iph; if (iph->ttl <= 1) goto too_many_hops; if (!xfrm4_route_forward(skb)) goto drop; iph = skb->nh.iph; rt = (struct rtable*)skb->dst; if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto sr_failed; /* We are about to mangle packet. Copy it! */ if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len)) goto drop; iph = skb->nh.iph; /* Decrease ttl after skb cow done */ ip_decrease_ttl(iph); /* * We now generate an ICMP HOST REDIRECT giving the route * we calculated. */ if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr) ip_rt_send_redirect(skb); skb->priority = rt_tos2priority(iph->tos); return NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, rt->u.dst.dev, ip_forward_finish); sr_failed: /* * Strict routing permits no gatewaying */ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0); goto drop; too_many_hops: /* Tell the sender its packet died... */ icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); drop: kfree_skb(skb); return NET_RX_DROP; }
void ipip_err(struct sk_buff *skb, unsigned char *dp, int len) { #ifndef I_WISH_WORLD_WERE_PERFECT /* It is not :-( All the routers (except for Linux) return only 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. */ struct iphdr *iph = (struct iphdr*)dp; int type = skb->h.icmph->type; int code = skb->h.icmph->code; struct ip_tunnel *t; if (len < sizeof(struct iphdr)) return; switch (type) { default: case ICMP_PARAMETERPROB: return; case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: case ICMP_PORT_UNREACH: /* Impossible event. */ return; case ICMP_FRAG_NEEDED: /* Soft state for pmtu is maintained by IP core. */ return; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, I believe they are just ether pollution. --ANK */ break; } break; case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) return; break; } t = ipip_tunnel_lookup(iph->daddr, iph->saddr); if (t == NULL || t->parms.iph.daddr == 0) return; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) return; if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) t->err_count++; else t->err_count = 1; t->err_time = jiffies; return; #else struct iphdr *iph = (struct iphdr*)dp; int hlen = iph->ihl<<2; struct iphdr *eiph; int type = skb->h.icmph->type; int code = skb->h.icmph->code; int rel_type = 0; int rel_code = 0; int rel_info = 0; struct sk_buff *skb2; struct rtable *rt; if (len < hlen + sizeof(struct iphdr)) return; eiph = (struct iphdr*)(dp + hlen); switch (type) { default: return; case ICMP_PARAMETERPROB: if (skb->h.icmph->un.gateway < hlen) return; /* So... This guy found something strange INSIDE encapsulated packet. Well, he is fool, but what can we do ? */ rel_type = ICMP_PARAMETERPROB; rel_info = skb->h.icmph->un.gateway - hlen; break; case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: case ICMP_PORT_UNREACH: /* Impossible event. */ return; case ICMP_FRAG_NEEDED: /* And it is the only really necessary thing :-) */ rel_info = ntohs(skb->h.icmph->un.frag.mtu); if (rel_info < hlen+68) return; rel_info -= hlen; /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ if (rel_info > ntohs(eiph->tot_len)) return; break; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, I believe, it is just ether pollution. --ANK */ rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_HOST_UNREACH; break; } break; case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) return; break; } /* Prepare fake skb to feed it to icmp_send */ skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2 == NULL) return; dst_release(skb2->dst); skb2->dst = NULL; skb_pull(skb2, skb->data - (u8*)eiph); skb2->nh.raw = skb2->data; /* Try to guess incoming interface */ if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) { kfree_skb(skb2); return; } skb2->dev = rt->u.dst.dev; /* route "incoming" packet */ if (rt->rt_flags&RTCF_LOCAL) { ip_rt_put(rt); rt = NULL; if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) || rt->u.dst.dev->type != ARPHRD_IPGRE) { ip_rt_put(rt); kfree_skb(skb2); return; } } else { ip_rt_put(rt); if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || skb2->dst->dev->type != ARPHRD_IPGRE) { kfree_skb(skb2); return; } } /* change mtu on this route */ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { if (rel_info > skb2->dst->pmtu) { kfree_skb(skb2); return; } skb2->dst->pmtu = rel_info; rel_info = htonl(rel_info); } else if (type == ICMP_TIME_EXCEEDED) { struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv; if (t->parms.iph.ttl) { rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_HOST_UNREACH; } } icmp_send(skb2, rel_type, rel_code, rel_info); kfree_skb(skb2); return; #endif }
static inline void send_unreach(struct sk_buff *skb_in, int code) { icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); }
static int ipgre_rcv(struct sk_buff *skb) { struct iphdr *iph; u8 *h; __be16 flags; __sum16 csum = 0; __be32 key = 0; u32 seqno = 0; struct ip_tunnel *tunnel; int offset = 4; __be16 gre_proto; unsigned int len; if (!pskb_may_pull(skb, 16)) goto drop_nolock; iph = ip_hdr(skb); h = skb->data; flags = *(__be16*)h; if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { /* - Version must be 0. - We do not support routing headers. */ if (flags&(GRE_VERSION|GRE_ROUTING)) goto drop_nolock; if (flags&GRE_CSUM) { switch (skb->ip_summed) { case CHECKSUM_COMPLETE: csum = csum_fold(skb->csum); if (!csum) break; /* fall through */ case CHECKSUM_NONE: skb->csum = 0; csum = __skb_checksum_complete(skb); skb->ip_summed = CHECKSUM_COMPLETE; } offset += 4; } if (flags&GRE_KEY) { key = *(__be32*)(h + offset); offset += 4; } if (flags&GRE_SEQ) { seqno = ntohl(*(__be32*)(h + offset)); offset += 4; } } gre_proto = *(__be16 *)(h + 2); read_lock(&ipgre_lock); if ((tunnel = ipgre_tunnel_lookup(skb->dev, iph->saddr, iph->daddr, key, gre_proto))) { struct net_device_stats *stats = &tunnel->dev->stats; secpath_reset(skb); skb->protocol = gre_proto; /* WCCP version 1 and 2 protocol decoding. * - Change protocol to IP * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { skb->protocol = htons(ETH_P_IP); if ((*(h + offset) & 0xF0) != 0x40) offset += 4; } skb->mac_header = skb->network_header; __pskb_pull(skb, offset); skb_postpull_rcsum(skb, skb_transport_header(skb), offset); skb->pkt_type = PACKET_HOST; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { /* Looped back packet, drop it! */ if (skb_rtable(skb)->fl.iif == 0) goto drop; stats->multicast++; skb->pkt_type = PACKET_BROADCAST; } #endif if (((flags&GRE_CSUM) && csum) || (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { stats->rx_crc_errors++; stats->rx_errors++; goto drop; } if (tunnel->parms.i_flags&GRE_SEQ) { if (!(flags&GRE_SEQ) || (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { stats->rx_fifo_errors++; stats->rx_errors++; goto drop; } tunnel->i_seqno = seqno + 1; } len = skb->len; /* Warning: All skb pointers will be invalidated! */ if (tunnel->dev->type == ARPHRD_ETHER) { if (!pskb_may_pull(skb, ETH_HLEN)) { stats->rx_length_errors++; stats->rx_errors++; goto drop; } iph = ip_hdr(skb); skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } stats->rx_packets++; stats->rx_bytes += len; skb->dev = tunnel->dev; skb_dst_drop(skb); nf_reset(skb); skb_reset_network_header(skb); ipgre_ecn_decapsulate(iph, skb); netif_rx(skb); read_unlock(&ipgre_lock); return(0); } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); drop: read_unlock(&ipgre_lock); drop_nolock: kfree_skb(skb); return(0); }
int ip_forward(struct sk_buff *skb) { struct iphdr *iph; struct rtable *rt; struct ip_options * opt = &(IPCB(skb)->opt); if (skb_warn_if_lro(skb)) goto drop; if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) goto drop; if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb)) return NET_RX_SUCCESS; if (skb->pkt_type != PACKET_HOST) goto drop; skb_forward_csum(skb); if (ip_hdr(skb)->ttl <= 1) goto too_many_hops; if (!xfrm4_route_forward(skb)) goto drop; rt = skb_rtable(skb); if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto sr_failed; if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) && (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(dst_mtu(&rt->u.dst))); goto drop; } if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len)) goto drop; iph = ip_hdr(skb); ip_decrease_ttl(iph); if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb)) ip_rt_send_redirect(skb); skb->priority = rt_tos2priority(iph->tos); return NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, rt->u.dst.dev, ip_forward_finish); sr_failed: icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0); goto drop; too_many_hops: IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); drop: kfree_skb(skb); return NET_RX_DROP; }
int ip_forward(struct sk_buff *skb) { struct net_device *dev2; /* Output device */ struct iphdr *iph; /* Our header */ struct rtable *rt; /* Route we use */ struct ip_options * opt = &(IPCB(skb)->opt); unsigned short mtu; if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb)) return NET_RX_SUCCESS; if (skb->pkt_type != PACKET_HOST) goto drop; skb->ip_summed = CHECKSUM_NONE; /* * According to the RFC, we must first decrease the TTL field. If * that reaches zero, we must reply an ICMP control message telling * that the packet's lifetime expired. */ iph = skb->nh.iph; rt = (struct rtable*)skb->dst; if (iph->ttl <= 1) goto too_many_hops; if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto sr_failed; /* * Having picked a route we can now send the frame out * after asking the firewall permission to do so. */ skb->priority = rt_tos2priority(iph->tos); dev2 = rt->u.dst.dev; mtu = rt->u.dst.pmtu; /* * We now generate an ICMP HOST REDIRECT giving the route * we calculated. */ if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr) ip_rt_send_redirect(skb); /* We are about to mangle packet. Copy it! */ if (skb_cow(skb, dev2->hard_header_len)) goto drop; iph = skb->nh.iph; /* Decrease ttl after skb cow done */ ip_decrease_ttl(iph); /* * We now may allocate a new buffer, and copy the datagram into it. * If the indicated interface is up and running, kick it. */ if (skb->len > mtu && (ntohs(iph->frag_off) & IP_DF)) goto frag_needed; #ifdef CONFIG_IP_ROUTE_NAT if (rt->rt_flags & RTCF_NAT) { if (ip_do_nat(skb)) { kfree_skb(skb); return NET_RX_BAD; } } #endif return NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev2, ip_forward_finish); frag_needed: IP_INC_STATS_BH(IpFragFails); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); goto drop; sr_failed: /* * Strict routing permits no gatewaying */ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0); goto drop; too_many_hops: /* Tell the sender its packet died... */ icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); drop: kfree_skb(skb); return NET_RX_DROP; }
int ip_forward(struct sk_buff *skb) { struct iphdr *iph; /* Our header */ struct rtable *rt; /* Route we use */ struct ip_options * opt = &(IPCB(skb)->opt); if (skb_warn_if_lro(skb)) goto drop; if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) goto drop; if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb)) return NET_RX_SUCCESS; if (skb->pkt_type != PACKET_HOST) goto drop; skb_forward_csum(skb); /* * According to the RFC, we must first decrease the TTL field. If * that reaches zero, we must reply an ICMP control message telling * that the packet's lifetime expired. */ if (ip_hdr(skb)->ttl <= 1) goto too_many_hops; if (!xfrm4_route_forward(skb)) goto drop; rt = skb_rtable(skb); if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto sr_failed; if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) && (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(dst_mtu(&rt->u.dst))); goto drop; } /* We are about to mangle packet. Copy it! */ if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len)) goto drop; iph = ip_hdr(skb); /* Decrease ttl after skb cow done */ ip_decrease_ttl(iph); /* * We now generate an ICMP HOST REDIRECT giving the route * we calculated. */ if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb)) ip_rt_send_redirect(skb); skb->priority = rt_tos2priority(iph->tos); return NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, rt->u.dst.dev, ip_forward_finish); sr_failed: /* * Strict routing permits no gatewaying */ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0); goto drop; too_many_hops: /* Tell the sender its packet died... */ IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); drop: kfree_skb(skb); return NET_RX_DROP; }
/* * It is hooked at the NF_IP_FORWARD chain, used only for VS/NAT. * Check if outgoing packet belongs to the established ip_vs_conn, * rewrite addresses of the packet and send it on its way... */ static unsigned int ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct sk_buff *skb = *pskb; struct iphdr *iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; int ihl; EnterFunction(11); if (skb->ipvs_property) return NF_ACCEPT; iph = ip_hdr(skb); if (unlikely(iph->protocol == IPPROTO_ICMP)) { int related, verdict = ip_vs_out_icmp(pskb, &related); if (related) return verdict; skb = *pskb; iph = ip_hdr(skb); } pp = ip_vs_proto_get(iph->protocol); if (unlikely(!pp)) return NF_ACCEPT; /* reassemble IP fragments */ if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) && !pp->dont_defrag)) { skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); if (!skb) return NF_STOLEN; iph = ip_hdr(skb); *pskb = skb; } ihl = iph->ihl << 2; /* * Check if the packet belongs to an existing entry */ cp = pp->conn_out_get(skb, pp, iph, ihl, 0); if (unlikely(!cp)) { if (sysctl_ip_vs_nat_icmp_send && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP)) { __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, ihl, sizeof(_ports), _ports); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ if (ip_vs_lookup_real_service(iph->protocol, iph->saddr, pptr[0])) { /* * Notify the real server: there is no * existing entry if it is not RST * packet or not TCP packet. */ if (iph->protocol != IPPROTO_TCP || !is_tcp_reset(skb)) { icmp_send(skb,ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); return NF_DROP; } } } IP_VS_DBG_PKT(12, pp, skb, 0, "packet continues traversal as normal"); return NF_ACCEPT; } IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); if (!ip_vs_make_skb_writable(pskb, ihl)) goto drop; /* mangle the packet */ if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp)) goto drop; skb = *pskb; ip_hdr(skb)->saddr = cp->vaddr; ip_send_check(ip_hdr(skb)); /* For policy routing, packets originating from this * machine itself may be routed differently to packets * passing through. We want this packet to be routed as * if it came from this machine itself. So re-compute * the routing information. */ if (ip_route_me_harder(pskb, RTN_LOCAL) != 0) goto drop; skb = *pskb; IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); ip_vs_conn_put(cp); skb->ipvs_property = 1; LeaveFunction(11); return NF_ACCEPT; drop: ip_vs_conn_put(cp); kfree_skb(*pskb); return NF_STOLEN; }
int ip_options_compile(struct net *net, struct ip_options * opt, struct sk_buff * skb) { int l; unsigned char * iph; unsigned char * optptr; int optlen; unsigned char * pp_ptr = NULL; struct rtable *rt = NULL; if (skb != NULL) { rt = skb_rtable(skb); optptr = (unsigned char *)&(ip_hdr(skb)[1]); } else optptr = opt->__data; iph = optptr - sizeof(struct iphdr); for (l = opt->optlen; l > 0; ) { switch (*optptr) { case IPOPT_END: for (optptr++, l--; l>0; optptr++, l--) { if (*optptr != IPOPT_END) { *optptr = IPOPT_END; opt->is_changed = 1; } } goto eol; case IPOPT_NOOP: l--; optptr++; continue; } optlen = optptr[1]; if (optlen<2 || optlen>l) { pp_ptr = optptr; goto error; } switch (*optptr) { case IPOPT_SSRR: case IPOPT_LSRR: if (optlen < 3) { pp_ptr = optptr + 1; goto error; } if (optptr[2] < 4) { pp_ptr = optptr + 2; goto error; } /* NB: cf RFC-1812 5.2.4.1 */ if (opt->srr) { pp_ptr = optptr; goto error; } if (!skb) { if (optptr[2] != 4 || optlen < 7 || ((optlen-3) & 3)) { pp_ptr = optptr + 1; goto error; } memcpy(&opt->faddr, &optptr[3], 4); if (optlen > 7) memmove(&optptr[3], &optptr[7], optlen-7); } opt->is_strictroute = (optptr[0] == IPOPT_SSRR); opt->srr = optptr - iph; break; case IPOPT_RR: if (opt->rr) { pp_ptr = optptr; goto error; } if (optlen < 3) { pp_ptr = optptr + 1; goto error; } if (optptr[2] < 4) { pp_ptr = optptr + 2; goto error; } if (optptr[2] <= optlen) { if (optptr[2]+3 > optlen) { pp_ptr = optptr + 2; goto error; } if (rt) { memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); opt->is_changed = 1; } optptr[2] += 4; opt->rr_needaddr = 1; } opt->rr = optptr - iph; break; case IPOPT_TIMESTAMP: if (opt->ts) { pp_ptr = optptr; goto error; } if (optlen < 4) { pp_ptr = optptr + 1; goto error; } if (optptr[2] < 5) { pp_ptr = optptr + 2; goto error; } if (optptr[2] <= optlen) { __be32 *timeptr = NULL; if (optptr[2]+3 > optptr[1]) { pp_ptr = optptr + 2; goto error; } switch (optptr[3]&0xF) { case IPOPT_TS_TSONLY: opt->ts = optptr - iph; if (skb) timeptr = (__be32*)&optptr[optptr[2]-1]; opt->ts_needtime = 1; optptr[2] += 4; break; case IPOPT_TS_TSANDADDR: if (optptr[2]+7 > optptr[1]) { pp_ptr = optptr + 2; goto error; } opt->ts = optptr - iph; if (rt) { memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); timeptr = (__be32*)&optptr[optptr[2]+3]; } opt->ts_needaddr = 1; opt->ts_needtime = 1; optptr[2] += 8; break; case IPOPT_TS_PRESPEC: if (optptr[2]+7 > optptr[1]) { pp_ptr = optptr + 2; goto error; } opt->ts = optptr - iph; { __be32 addr; memcpy(&addr, &optptr[optptr[2]-1], 4); if (inet_addr_type(net, addr) == RTN_UNICAST) break; if (skb) timeptr = (__be32*)&optptr[optptr[2]+3]; } opt->ts_needtime = 1; optptr[2] += 8; break; default: if (!skb && !capable(CAP_NET_RAW)) { pp_ptr = optptr + 3; goto error; } break; } if (timeptr) { struct timespec tv; __be32 midtime; getnstimeofday(&tv); midtime = htonl((tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC); memcpy(timeptr, &midtime, sizeof(__be32)); opt->is_changed = 1; } } else { unsigned overflow = optptr[3]>>4; if (overflow == 15) { pp_ptr = optptr + 3; goto error; } opt->ts = optptr - iph; if (skb) { optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4); opt->is_changed = 1; } } break; case IPOPT_RA: if (optlen < 4) { pp_ptr = optptr + 1; goto error; } if (optptr[2] == 0 && optptr[3] == 0) opt->router_alert = optptr - iph; break; case IPOPT_CIPSO: if ((!skb && !capable(CAP_NET_RAW)) || opt->cipso) { pp_ptr = optptr; goto error; } opt->cipso = optptr - iph; if (cipso_v4_validate(skb, &optptr)) { pp_ptr = optptr; goto error; } break; case IPOPT_SEC: case IPOPT_SID: default: if (!skb && !capable(CAP_NET_RAW)) { pp_ptr = optptr; goto error; } break; } l -= optlen; optptr += optlen; } eol: if (!pp_ptr) return 0; error: if (skb) { icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24)); } return -EINVAL; }
/* Get route to destination or remote server */ static int __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, __be32 daddr, int rt_mode, __be32 *ret_saddr) { struct net *net = dev_net(skb_dst(skb)->dev); struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_dest_dst *dest_dst; struct rtable *rt; /* Route to the other host */ struct rtable *ort; /* Original route */ struct iphdr *iph; __be16 df; int mtu; int local, noref = 1; if (dest) { dest_dst = __ip_vs_dst_check(dest); if (likely(dest_dst)) rt = (struct rtable *) dest_dst->dst_cache; else { dest_dst = ip_vs_dest_dst_alloc(); spin_lock_bh(&dest->dst_lock); if (!dest_dst) { __ip_vs_dst_set(dest, NULL, NULL, 0); spin_unlock_bh(&dest->dst_lock); goto err_unreach; } rt = do_output_route4(net, dest->addr.ip, rt_mode, &dest_dst->dst_saddr.ip); if (!rt) { __ip_vs_dst_set(dest, NULL, NULL, 0); spin_unlock_bh(&dest->dst_lock); ip_vs_dest_dst_free(dest_dst); goto err_unreach; } __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); spin_unlock_bh(&dest->dst_lock); IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", &dest->addr.ip, &dest_dst->dst_saddr.ip, atomic_read(&rt->dst.__refcnt)); } daddr = dest->addr.ip; if (ret_saddr) *ret_saddr = dest_dst->dst_saddr.ip; } else { __be32 saddr = htonl(INADDR_ANY); noref = 0; /* For such unconfigured boxes avoid many route lookups * for performance reasons because we do not remember saddr */ rt_mode &= ~IP_VS_RT_MODE_CONNECT; rt = do_output_route4(net, daddr, rt_mode, &saddr); if (!rt) goto err_unreach; if (ret_saddr) *ret_saddr = saddr; } local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0; if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & rt_mode)) { IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", (rt->rt_flags & RTCF_LOCAL) ? "local":"non-local", &daddr); goto err_put; } iph = ip_hdr(skb); if (likely(!local)) { if (unlikely(ipv4_is_loopback(iph->saddr))) { IP_VS_DBG_RL("Stopping traffic from loopback address " "%pI4 to non-local address, dest: %pI4\n", &iph->saddr, &daddr); goto err_put; } } else { ort = skb_rtable(skb); if (!(rt_mode & IP_VS_RT_MODE_RDR) && !(ort->rt_flags & RTCF_LOCAL)) { IP_VS_DBG_RL("Redirect from non-local address %pI4 to " "local requires NAT method, dest: %pI4\n", &iph->daddr, &daddr); goto err_put; } /* skb to local stack, preserve old route */ if (!noref) ip_rt_put(rt); return local; } if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) { mtu = dst_mtu(&rt->dst); df = iph->frag_off & htons(IP_DF); } else { struct sock *sk = skb->sk; mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (mtu < 68) { IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); goto err_put; } ort = skb_rtable(skb); if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT) ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); /* MTU check allowed? */ df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0; } /* MTU checking */ if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr); goto err_put; } skb_dst_drop(skb); if (noref) { if (!local) skb_dst_set_noref_force(skb, &rt->dst); else skb_dst_set(skb, dst_clone(&rt->dst)); } else skb_dst_set(skb, &rt->dst); return local; err_put: if (!noref) ip_rt_put(rt); return -1; err_unreach: dst_link_failure(skb); return -1; }
static int ipip_tunnel_xmit(struct sk_buff *skb, struct device *dev) { struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; struct net_device_stats *stats = &tunnel->stat; struct iphdr *tiph = &tunnel->parms.iph; u8 tos = tunnel->parms.iph.tos; u16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct device *tdev; /* Device to other host */ struct iphdr *old_iph = skb->nh.iph; struct iphdr *iph; /* Our new IP header */ int max_headroom; /* The extra header space needed */ u32 dst = tiph->daddr; int mtu; if (tunnel->recursion++) { tunnel->stat.collisions++; goto tx_error; } if (skb->protocol != __constant_htons(ETH_P_IP)) goto tx_error; if (tos&1) tos = old_iph->tos; if (!dst) { /* NBMA tunnel */ if ((rt = (struct rtable*)skb->dst) == NULL) { tunnel->stat.tx_fifo_errors++; goto tx_error; } if ((dst = rt->rt_gateway) == 0) goto tx_error_icmp; } if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) { tunnel->stat.tx_carrier_errors++; goto tx_error_icmp; } tdev = rt->u.dst.dev; if (tdev == dev) { ip_rt_put(rt); tunnel->stat.collisions++; goto tx_error; } mtu = rt->u.dst.pmtu - sizeof(struct iphdr); if (mtu < 68) { tunnel->stat.collisions++; ip_rt_put(rt); goto tx_error; } if (skb->dst && mtu < skb->dst->pmtu) skb->dst->pmtu = mtu; df |= (old_iph->frag_off&__constant_htons(IP_DF)); if ((old_iph->frag_off&__constant_htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); goto tx_error; } if (tunnel->err_count > 0) { if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { tunnel->err_count--; dst_link_failure(skb); } else tunnel->err_count = 0; } skb->h.raw = skb->nh.raw; /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr)); if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); stats->tx_dropped++; dev_kfree_skb(skb); tunnel->recursion--; return 0; } if (skb->sk) skb_set_owner_w(new_skb, skb->sk); dev_kfree_skb(skb); skb = new_skb; } skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); dst_release(skb->dst); skb->dst = &rt->u.dst; /* * Push down and install the IPIP header. */ iph = skb->nh.iph; iph->version = 4; iph->ihl = sizeof(struct iphdr)>>2; iph->frag_off = df; iph->protocol = IPPROTO_IPIP; iph->tos = tos; iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; if ((iph->ttl = tiph->ttl) == 0) iph->ttl = old_iph->ttl; iph->tot_len = htons(skb->len); iph->id = htons(ip_id_count++); ip_send_check(iph); stats->tx_bytes += skb->len; stats->tx_packets++; ip_send(skb); tunnel->recursion--; return 0; tx_error_icmp: dst_link_failure(skb); tx_error: stats->tx_errors++; dev_kfree_skb(skb); tunnel->recursion--; return 0; }
void * arp_queue_caretaker(void * router){ struct sr_instance * sr = router; while(TRUE) { // walk the packet queue, send and clear any that are sendable rmutex_lock(&sr->arp_queue_lock); for(int i = 0; i < OUTSTANDING_ARP_LIMIT; i++) { if (sr->pq[i].payload) { addr_mac_t result; if (arp_table_get_entry(sr,sr->pq[i].ip,&result)) { Debug("found destination in arp cache, sending packet\n"); memcpy(sr->pq[i].payload,&result,ETHER_ADDR_LEN); sr_send_packet(sr,sr->pq[i].payload,sr->pq[i].payload_len,sr->pq[i].iface); free(sr->pq[i].payload); sr->pq[i].payload = NULL; continue; } // last chance failed, drop this packet else if (sr->pq[i].expiry < time(NULL)) { // grab the pointer so we can send the icmp error, // but free up the outgoing IP packet cache entry uint8_t * buf = sr->pq[i].payload; sr->pq[i].payload = NULL; Debug("sending icmp host unreach on arp timeout\n"); // send ICMP HOST UNREACH back to sender to inform // them that we cannot route this packet uint32_t dst = ((struct ip *)(buf + sizeof(struct sr_ethernet_hdr)))->ip_src.s_addr; icmp_send(sr,dst,0,buf + sizeof(struct sr_ethernet_hdr),sr->pq[i].payload_len - sizeof(struct sr_ethernet_hdr),ICMP_TYPE_DEST_UNREACH,ICMP_CODE_HOST_UNREACH,0,0); free(buf); } } } // walk the pending arp request queue, sending or deleting entries // as necessary for(int i = 0; i < OUTSTANDING_ARP_LIMIT; i++) { // clear entries that responses have been found for addr_mac_t ret; if (sr->aq[i].ip && arp_table_get_entry(sr,sr->aq[i].ip,&ret)) { sr->aq[i].ip = 0; continue; } if (sr->aq[i].ip != 0 && sr->aq[i].expiry < time(NULL)) { if(sr->aq[i].req_remaining-- < 1) { // done trying this one, fail sr->aq[i].ip = 0; continue; } // reset the time remaining sr->aq[i].expiry = time(NULL) + ARP_TIMEOUT; // send an ARP request for this packet int len = sizeof(struct sr_ethernet_hdr) + sizeof(struct sr_arphdr); uint8_t * buf = malloc(len); memset(buf,0,len); Debug("crafting arp packet from %s\n",sr->aq[i].iface); addr_mac_t saddr = sr_get_interface(sr,sr->aq[i].iface)->addr; uint32_t src_ip = sr_get_interface(sr,sr->aq[i].iface)->ip; struct sr_ethernet_hdr * eh = (struct sr_ethernet_hdr *)buf; struct sr_arphdr * ah = (struct sr_arphdr *) (buf + sizeof(struct sr_ethernet_hdr)); // set ethernet fields eh->ether_shost = saddr; memset(&eh->ether_dhost,0xFF,ETHER_ADDR_LEN); eh->ether_type = htons(ETHERTYPE_ARP); // set arp fields ah->ar_hrd = htons(ARPHDR_ETHER); ah->ar_pro = htons(ETHERTYPE_IP); ah->ar_hln = 6; ah->ar_pln = 4; ah->ar_op = htons(ARP_REQUEST); ah->ar_sha = saddr; ah->ar_sip = src_ip; ah->ar_tip = sr->aq[i].ip; memset(&ah->ar_tha,0,ETHER_ADDR_LEN); Debug("sr_send_packet sending arp request out interface %s with length %d\n",sr->aq[i].iface,len); sr_send_packet(sr,buf,len,sr->aq[i].iface); free(buf); } } rmutex_unlock(&sr->arp_queue_lock); sleep(1); } return NULL; }
/* * ICMP packet transmitter * called by the ip_vs_in_icmp */ int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset) { struct rtable *rt; /* Route to the other host */ int mtu; int rc; int local; EnterFunction(10); /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be forwarded directly here, because there is no need to translate address/port back */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { if (cp->packet_xmit) rc = cp->packet_xmit(skb, cp, pp); else rc = NF_ACCEPT; /* do not touch skb anymore */ atomic_inc_unchecked(&cp->in_pkts); goto out; } /* * mangle and send the packet here (only for VS/NAT) */ if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(ip_hdr(skb)->tos), IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR))) goto tx_error_icmp; local = rt->rt_flags & RTCF_LOCAL; /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI4\n", __func__, &cp->daddr.ip); goto tx_error_put; } } #endif /* From world but DNAT to loopback address? */ if (local && ipv4_is_loopback(rt->rt_dst) && rt_is_input_route(skb_rtable(skb))) { IP_VS_DBG(1, "%s(): " "stopping DNAT to loopback %pI4\n", __func__, &cp->daddr.ip); goto tx_error_put; } /* MTU checking */ mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) && !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, offset)) goto tx_error_put; if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; ip_vs_nat_icmp(skb, pp, cp, 0); if (!local) { /* drop the old route when skb is not shared */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); } else { ip_rt_put(rt); /* * Some IPv4 replies get local address from routes, * not from iph, so while we DNAT after routing * we need this second input/output route. */ if (!__ip_vs_reroute_locally(skb)) goto tx_error; } /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); rc = NF_STOLEN; goto out; tx_error_icmp: dst_link_failure(skb); tx_error: dev_kfree_skb(skb); rc = NF_STOLEN; out: LeaveFunction(10); return rc; tx_error_put: ip_rt_put(rt); goto tx_error; }
static unsigned int ipt_route_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const void *targinfo, void *userinfo) { const struct ipt_route_target_info *route_info = targinfo; struct sk_buff *skb = *pskb; unsigned int res; if (skb->nfct == &route_tee_track.ct_general) { /* Loopback - a packet we already routed, is to be * routed another time. Avoid that, now. */ if (net_ratelimit()) DEBUGP(KERN_DEBUG "ipt_ROUTE: loopback - DROP!\n"); return NF_DROP; } /* If we are at PREROUTING or INPUT hook * the TTL isn't decreased by the IP stack */ if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_IN) { struct iphdr *iph = skb->nh.iph; if (iph->ttl <= 1) { struct rtable *rt; struct flowi fl = { .oif = 0, .nl_u = { .ip4_u = { .daddr = iph->daddr, .saddr = iph->saddr, .tos = RT_TOS(iph->tos), .scope = ((iph->tos & RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE) } } }; if (ip_route_output_key(&rt, &fl)) { return NF_DROP; } if (skb->dev == rt->u.dst.dev) { /* Drop old route. */ dst_release(skb->dst); skb->dst = &rt->u.dst; /* this will traverse normal stack, and * thus call conntrack on the icmp packet */ icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); } return NF_DROP; } /* * If we are at INPUT the checksum must be recalculated since * the length could change as the result of a defragmentation. */ if(hooknum == NF_IP_LOCAL_IN) { iph->ttl = iph->ttl - 1; iph->check = 0; iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } else { ip_decrease_ttl(iph); } } if ((route_info->flags & IPT_ROUTE_TEE)) { /* * Copy the *pskb, and route the copy. Will later return * IPT_CONTINUE for the original skb, which should continue * on its way as if nothing happened. The copy should be * independantly delivered to the ROUTE --gw. */ skb = skb_copy(*pskb, GFP_ATOMIC); if (!skb) { if (net_ratelimit()) DEBUGP(KERN_DEBUG "ipt_ROUTE: copy failed!\n"); return IPT_CONTINUE; } } /* Tell conntrack to forget this packet since it may get confused * when a packet is leaving with dst address == our address. * Good idea ? Dunno. Need advice. * * NEW: mark the skb with our &route_tee_track, so we avoid looping * on any already routed packet. */ if (!(route_info->flags & IPT_ROUTE_CONTINUE)) { nf_conntrack_put(skb->nfct); skb->nfct = &route_tee_track.ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); skb->nfcache = 0; #ifdef CONFIG_NETFILTER_DEBUG skb->nf_debug = 0; #endif } if (route_info->oif[0] != '\0') { res = route_oif(route_info, skb); } else if (route_info->iif[0] != '\0') { res = route_iif(route_info, skb); } else if (route_info->gw) { res = route_gw(route_info, skb); } else { if (net_ratelimit()) DEBUGP(KERN_DEBUG "ipt_ROUTE: no parameter !\n"); res = IPT_CONTINUE; } if ((route_info->flags & IPT_ROUTE_TEE)) res = IPT_CONTINUE; return res; }
/* * IP Tunneling transmitter * * This function encapsulates the packet in a new IP packet, its * destination will be set to cp->daddr. Most code of this function * is taken from ipip.c. * * It is used in VS/TUN cluster. The load balancer selects a real * server from a cluster based on a scheduling algorithm, * encapsulates the request packet and forwards it to the selected * server. For example, all real servers are configured with * "ifconfig tunl0 <Virtual IP Address> up". When the server receives * the encapsulated packet, it will decapsulate the packet, processe * the request and return the response packets directly to the client * without passing the load balancer. This can greatly increase the * scalability of virtual server. * * Used for ANY protocol */ int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ struct iphdr *old_iph = ip_hdr(skb); u8 tos = old_iph->tos; __be16 df = old_iph->frag_off; struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int mtu; int ret; EnterFunction(10); if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(tos), IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL))) goto tx_error_icmp; if (rt->rt_flags & RTCF_LOCAL) { ip_rt_put(rt); IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); } tdev = rt->dst.dev; mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (mtu < 68) { IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); goto tx_error_put; } if (skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); df |= (old_iph->frag_off & htons(IP_DF)); if ((old_iph->frag_off & htons(IP_DF) && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; } /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); kfree_skb(skb); IP_VS_ERR_RL("%s(): no memory\n", __func__); return NF_STOLEN; } kfree_skb(skb); skb = new_skb; old_iph = ip_hdr(skb); } skb->transport_header = skb->network_header; /* fix old IP header checksum */ ip_send_check(old_iph); skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); /* drop old route */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. */ iph = ip_hdr(skb); iph->version = 4; iph->ihl = sizeof(struct iphdr)>>2; iph->frag_off = df; iph->protocol = IPPROTO_IPIP; iph->tos = tos; iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; iph->ttl = old_iph->ttl; ip_select_ident(iph, &rt->dst, NULL); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; ret = IP_VS_XMIT_TUNNEL(skb, cp); if (ret == NF_ACCEPT) ip_local_out(skb); else if (ret == NF_DROP) kfree_skb(skb); LeaveFunction(10); return NF_STOLEN; tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); LeaveFunction(10); return NF_STOLEN; tx_error_put: ip_rt_put(rt); goto tx_error; }
static int ip_local_deliver_finish(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); __skb_pull(skb, ip_hdrlen(skb)); /* Point into the IP datagram, just past the header. */ skb_reset_transport_header(skb); rcu_read_lock(); { int protocol = ip_hdr(skb)->protocol; int hash, raw; const struct net_protocol *ipprot; resubmit: raw = raw_local_deliver(skb, protocol); hash = protocol & (MAX_INET_PROTOS - 1); ipprot = rcu_dereference(inet_protos[hash]); if (ipprot != NULL) { int ret; if (!net_eq(net, &init_net) && !ipprot->netns_ok) { if (net_ratelimit()) printk("%s: proto %d isn't netns-ready\n", __func__, protocol); kfree_skb(skb); goto out; } if (!ipprot->no_policy) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { kfree_skb(skb); goto out; } nf_reset(skb); } ret = ipprot->handler(skb); if (ret < 0) { protocol = -ret; goto resubmit; } IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); } else { if (!raw) { if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { IP_INC_STATS_BH(net, IPSTATS_MIB_INUNKNOWNPROTOS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); } } else IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); kfree_skb(skb); } } out: rcu_read_unlock(); return 0; }