int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) { struct iphdr *iph = skb->h.iph; struct sock *raw_sk=NULL; unsigned char hash; unsigned char flag = 0; struct inet_protocol *ipprot; int brd=IS_MYADDR; struct options * opt = NULL; int is_frag=0; __u32 daddr; #ifdef CONFIG_FIREWALL int fwres; __u16 rport; #endif #ifdef CONFIG_IP_MROUTE int mroute_pkt=0; #endif #ifdef CONFIG_NET_IPV6 /* * Intercept IPv6 frames. We dump ST-II and invalid types just below.. */ if(iph->version == 6) return ipv6_rcv(skb,dev,pt); #endif ip_statistics.IpInReceives++; /* * Tag the ip header of this packet so we can find it */ skb->ip_hdr = iph; /* * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. * RFC1122: 3.1.2.3 MUST discard a frame with invalid source address [NEEDS FIXING]. * * Is the datagram acceptable? * * 1. Length at least the size of an ip header * 2. Version of 4 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] * 4. Doesn't have a bogus length * (5. We ought to check for IP multicast addresses and undefined types.. does this matter ?) */ if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0 || skb->len < ntohs(iph->tot_len)) { ip_statistics.IpInHdrErrors++; kfree_skb(skb, FREE_WRITE); return(0); } /* * Our transport medium may have padded the buffer out. Now we know it * is IP we can trim to the true length of the frame. * Note this now means skb->len holds ntohs(iph->tot_len). */ skb_trim(skb,ntohs(iph->tot_len)); if(skb->len < (iph->ihl<<2)) { ip_statistics.IpInHdrErrors++; kfree_skb(skb, FREE_WRITE); return 0; } /* * Account for the packet (even if the packet is * not accepted by the firewall!). We do this after * the sanity checks and the additional ihl check * so we dont account garbage as we might do before. */ #ifdef CONFIG_IP_ACCT ip_fw_chk(iph,dev,NULL,ip_acct_chain,0,IP_FW_MODE_ACCT_IN); #endif /* * Try to select closest <src,dst> alias device, if any. * net_alias_dev_rx32 returns main device if it * fails to found other. * If successful, also incr. alias rx count. * * Only makes sense for unicasts - Thanks ANK. */ #ifdef CONFIG_NET_ALIAS if (skb->pkt_type == PACKET_HOST && iph->daddr != skb->dev->pa_addr && net_alias_has(skb->dev)) { skb->dev = dev = net_alias_dev_rx32(skb->dev, AF_INET, iph->saddr, iph->daddr); } #endif if (iph->ihl > 5) { skb->ip_summed = 0; if (ip_options_compile(NULL, skb)) return(0); opt = (struct options*)skb->proto_priv; #ifdef CONFIG_IP_NOSR if (opt->srr) { kfree_skb(skb, FREE_READ); return -EINVAL; } #endif } #if defined(CONFIG_IP_TRANSPARENT_PROXY) && !defined(CONFIG_IP_ALWAYS_DEFRAG) #define CONFIG_IP_ALWAYS_DEFRAG 1 #endif #ifdef CONFIG_IP_ALWAYS_DEFRAG /* * Defragment all incoming traffic before even looking at it. * If you have forwarding enabled, this makes the system a * defragmenting router. Not a common thing. * You probably DON'T want to enable this unless you have to. * You NEED to use this if you want to use transparent proxying, * otherwise, we can't vouch for your sanity. */ /* * See if the frame is fragmented. */ if(iph->frag_off) { if (iph->frag_off & htons(IP_MF)) is_frag|=IPFWD_FRAGMENT; /* * Last fragment ? */ if (iph->frag_off & htons(IP_OFFSET)) is_frag|=IPFWD_LASTFRAG; /* * Reassemble IP fragments. */ if(is_frag) { /* Defragment. Obtain the complete packet if there is one */ skb=ip_defrag(iph,skb,dev); if(skb==NULL) return 0; skb->dev = dev; iph=skb->h.iph; is_frag = 0; /* * When the reassembled packet gets forwarded, the ip * header checksum should be correct. * For better performance, this should actually only * be done in that particular case, i.e. set a flag * here and calculate the checksum in ip_forward. */ ip_send_check(iph); } } #endif /* * See if the firewall wants to dispose of the packet. */ #ifdef CONFIG_FIREWALL if ((fwres=call_in_firewall(PF_INET, skb->dev, iph, &rport))<FW_ACCEPT) { if(fwres==FW_REJECT) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev); kfree_skb(skb, FREE_WRITE); return 0; } #ifdef CONFIG_IP_TRANSPARENT_PROXY if (fwres==FW_REDIRECT) skb->redirport = rport; else #endif skb->redirport = 0; #endif #ifndef CONFIG_IP_ALWAYS_DEFRAG /* * Remember if the frame is fragmented. */ if(iph->frag_off) { if (iph->frag_off & htons(IP_MF)) is_frag|=IPFWD_FRAGMENT; /* * Last fragment ? */ if (iph->frag_off & htons(IP_OFFSET)) is_frag|=IPFWD_LASTFRAG; } #endif /* * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday. * * This is inefficient. While finding out if it is for us we could also compute * the routing table entry. This is where the great unified cache theory comes * in as and when someone implements it * * For most hosts over 99% of packets match the first conditional * and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at * function entry. */ daddr = iph->daddr; #ifdef CONFIG_IP_TRANSPARENT_PROXY /* * ip_chksock adds still more overhead for forwarded traffic... */ if ( iph->daddr == skb->dev->pa_addr || skb->redirport || (brd = ip_chk_addr(iph->daddr)) != 0 || ip_chksock(skb)) #else if ( iph->daddr == skb->dev->pa_addr || (brd = ip_chk_addr(iph->daddr)) != 0) #endif { if (opt && opt->srr) { int srrspace, srrptr; __u32 nexthop; unsigned char * optptr = ((unsigned char *)iph) + opt->srr; if (brd != IS_MYADDR || skb->pkt_type != PACKET_HOST) { kfree_skb(skb, FREE_WRITE); return 0; } for ( srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4 ) { int brd2; if (srrptr + 3 > srrspace) { icmp_send(skb, ICMP_PARAMETERPROB, 0, opt->srr+2, skb->dev); kfree_skb(skb, FREE_WRITE); return 0; } memcpy(&nexthop, &optptr[srrptr-1], 4); if ((brd2 = ip_chk_addr(nexthop)) == 0) break; if (brd2 != IS_MYADDR) { /* * ANK: should we implement weak tunneling of multicasts? * Are they obsolete? DVMRP specs (RFC-1075) is old enough... * [They are obsolete] */ kfree_skb(skb, FREE_WRITE); return -EINVAL; } memcpy(&daddr, &optptr[srrptr-1], 4); } if (srrptr <= srrspace) { opt->srr_is_hit = 1; opt->is_changed = 1; if (sysctl_ip_forward) { if (ip_forward(skb, dev, is_frag, nexthop)) kfree_skb(skb, FREE_WRITE); } else { ip_statistics.IpInAddrErrors++; kfree_skb(skb, FREE_WRITE); } return 0; } } #ifdef CONFIG_IP_MULTICAST if(!(dev->flags&IFF_ALLMULTI) && brd==IS_MULTICAST && iph->daddr!=IGMP_ALL_HOSTS && !(dev->flags&IFF_LOOPBACK)) { /* * Check it is for one of our groups */ struct ip_mc_list *ip_mc=dev->ip_mc_list; do { if(ip_mc==NULL) { kfree_skb(skb, FREE_WRITE); return 0; } if(ip_mc->multiaddr==iph->daddr) break; ip_mc=ip_mc->next; } while(1); } #endif #ifndef CONFIG_IP_ALWAYS_DEFRAG /* * Reassemble IP fragments. */ if(is_frag) { /* Defragment. Obtain the complete packet if there is one */ skb=ip_defrag(iph,skb,dev); if(skb==NULL) return 0; skb->dev = dev; iph=skb->h.iph; } #endif #ifdef CONFIG_IP_MASQUERADE /* * Do we need to de-masquerade this packet? */ { int ret = ip_fw_demasquerade(&skb,dev); if (ret < 0) { kfree_skb(skb, FREE_WRITE); return 0; } if (ret) { struct iphdr *iph=skb->h.iph; if (ip_forward(skb, dev, IPFWD_MASQUERADED, iph->daddr)) kfree_skb(skb, FREE_WRITE); return 0; } } #endif /* * Point into the IP datagram, just past the header. */ skb->ip_hdr = iph; skb->h.raw += iph->ihl*4; #ifdef CONFIG_IP_MROUTE /* * Check the state on multicast routing (multicast and not 224.0.0.z) */ if(brd==IS_MULTICAST && (iph->daddr&htonl(0xFFFFFF00))!=htonl(0xE0000000)) mroute_pkt=1; #endif /* * Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies. * * RFC 1122: SHOULD pass TOS value up to the transport layer. */ /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */ hash = iph->protocol & (MAX_INET_PROTOS - 1); /* * If there maybe a raw socket we must check - if not we don't care less */ if((raw_sk = raw_v4_htable[hash]) != NULL) { struct sock *sknext = NULL; struct sk_buff *skb1; raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr, iph->daddr); if(raw_sk) { /* Any raw sockets */ do { /* Find the next */ sknext = raw_v4_lookup(raw_sk->next, iph->protocol, iph->saddr, iph->daddr); if(sknext) skb1 = skb_clone(skb, GFP_ATOMIC); else break; /* One pending raw socket left */ if(skb1) raw_rcv(raw_sk, skb1, dev, iph->saddr,daddr); raw_sk = sknext; } while(raw_sk!=NULL); /* * Here either raw_sk is the last raw socket, or NULL if none */ /* * We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy */ } } /* * skb->h.raw now points at the protocol beyond the IP header. */ for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next) { struct sk_buff *skb2; if (ipprot->protocol != iph->protocol) continue; /* * See if we need to make a copy of it. This will * only be set if more than one protocol wants it. * and then not for the last one. If there is a pending * raw delivery wait for that */ #ifdef CONFIG_IP_MROUTE if (ipprot->copy || raw_sk || mroute_pkt) #else if (ipprot->copy || raw_sk) #endif { skb2 = skb_clone(skb, GFP_ATOMIC); if(skb2==NULL) continue; } else { skb2 = skb; } flag = 1; /* * Pass on the datagram to each protocol that wants it, * based on the datagram protocol. We should really * check the protocol handler's return values here... */ ipprot->handler(skb2, dev, opt, daddr, (ntohs(iph->tot_len) - (iph->ihl * 4)), iph->saddr, 0, ipprot); } /* * All protocols checked. * If this packet was a broadcast, we may *not* reply to it, since that * causes (proven, grin) ARP storms and a leakage of memory (i.e. all * ICMP reply messages get queued up for transmission...) */ #ifdef CONFIG_IP_MROUTE /* * Forward the last copy to the multicast router. If * there is a pending raw delivery however make a copy * and forward that. */ if(mroute_pkt) { flag=1; if(raw_sk==NULL) ipmr_forward(skb, is_frag); else { struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC); if(skb2) { skb2->free=1; ipmr_forward(skb2, is_frag); } } } #endif if(raw_sk!=NULL) /* Shift to last raw user */ raw_rcv(raw_sk, skb, dev, iph->saddr, daddr); else if (!flag) /* Free and report errors */ { if (brd != IS_BROADCAST && brd!=IS_MULTICAST) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0, dev); kfree_skb(skb, FREE_WRITE); } return(0); } /* * Do any unicast IP forwarding required. */ /* * Don't forward multicast or broadcast frames. */ if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST) { kfree_skb(skb,FREE_WRITE); return 0; } /* * The packet is for another target. Forward the frame */ if (sysctl_ip_forward) { if (opt && opt->is_strictroute) { icmp_send(skb, ICMP_PARAMETERPROB, 0, 16, skb->dev); kfree_skb(skb, FREE_WRITE); return -1; } if (ip_forward(skb, dev, is_frag, iph->daddr)) kfree_skb(skb, FREE_WRITE); } else { /* printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n", iph->saddr,iph->daddr);*/ ip_statistics.IpInAddrErrors++; kfree_skb(skb, FREE_WRITE); } return(0); }
static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len) { struct iphdr *iph; int hash; struct inet_protocol *ipprot; unsigned char *dp; struct sock *raw_sk; /* * Incomplete header ? * Only checks for the IP header, there should be an * additional check for longer headers in upper levels. */ if(len<sizeof(struct iphdr)) { icmp_statistics.IcmpInErrors++; return; } iph = (struct iphdr *) (icmph + 1); dp = (unsigned char*)iph; if(icmph->type==ICMP_TIME_EXCEEDED) { /* ABD */ play_that_funky_music (icmph); } if(icmph->type==ICMP_DEST_UNREACH) { switch(icmph->code & 15) { case ICMP_NET_UNREACH: break; case ICMP_HOST_UNREACH: break; case ICMP_PORT_UNREACH: play_that_funky_music (icmph); /* ABD */ break; case ICMP_PROT_UNREACH: break; case ICMP_FRAG_NEEDED: if (ipv4_config.no_pmtu_disc) { if (sysctl_ip_always_defrag == 0 && net_ratelimit()) printk(KERN_INFO "ICMP: %s: fragmentation needed and DF set.\n", in_ntoa(iph->daddr)); } else { unsigned short new_mtu; new_mtu = ip_rt_frag_needed(iph, ntohs(icmph->un.frag.mtu)); if (!new_mtu) return; icmph->un.frag.mtu = htons(new_mtu); } break; case ICMP_SR_FAILED: if (sysctl_ip_always_defrag == 0 && net_ratelimit()) printk(KERN_INFO "ICMP: %s: Source Route Failed.\n", in_ntoa(iph->daddr)); break; default: break; } if (icmph->code>NR_ICMP_UNREACH) return; } /* * Throw it at our lower layers * * RFC 1122: 3.2.2 MUST extract the protocol ID from the passed header. * RFC 1122: 3.2.2.1 MUST pass ICMP unreach messages to the transport layer. * RFC 1122: 3.2.2.2 MUST pass ICMP time expired messages to transport layer. */ /* * Check the other end isnt violating RFC 1122. Some routers send * bogus responses to broadcast frames. If you see this message * first check your netmask matches at both ends, if it does then * get the other vendor to fix their kit. */ if (!sysctl_icmp_ignore_bogus_error_responses) { if (inet_addr_type(iph->daddr) == RTN_BROADCAST) { if (sysctl_ip_always_defrag == 0 && net_ratelimit()) printk(KERN_WARNING "%s sent an invalid ICMP error to a broadcast.\n", in_ntoa(skb->nh.iph->saddr)); return; } } /* * Deliver ICMP message to raw sockets. Pretty useless feature? */ /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */ hash = iph->protocol & (MAX_INET_PROTOS - 1); if ((raw_sk = raw_v4_htable[hash]) != NULL) { while ((raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex)) != NULL) { raw_err(raw_sk, skb); raw_sk = raw_sk->next; } } /* * This can't change while we are doing it. */ ipprot = (struct inet_protocol *) inet_protos[hash]; while(ipprot != NULL) { struct inet_protocol *nextip; nextip = (struct inet_protocol *) ipprot->next; /* * Pass it off to everyone who wants it. */ /* RFC1122: OK. Passes appropriate ICMP errors to the */ /* appropriate protocol layer (MUST), as per 3.2.2. */ if (iph->protocol == ipprot->protocol && ipprot->err_handler) ipprot->err_handler(skb, dp, len); ipprot = nextip; } }
/* * Deliver IP Packets to the higher protocol layers. */ int ip_local_deliver(struct sk_buff *skb) { struct iphdr *iph = skb->nh.iph; struct inet_protocol *ipprot; struct sock *raw_sk=NULL; unsigned char hash; int flag = 0; #ifndef CONFIG_IP_ALWAYS_DEFRAG /* * Reassemble IP fragments. */ if (iph->frag_off & htons(IP_MF|IP_OFFSET)) { skb = ip_defrag(skb); if (!skb) return 0; iph = skb->nh.iph; } #endif #ifdef CONFIG_IP_MASQUERADE /* * Do we need to de-masquerade this packet? */ { int ret = ip_fw_demasquerade(&skb); if (ret < 0) { kfree_skb(skb); return 0; } if (ret) { iph=skb->nh.iph; IPCB(skb)->flags |= IPSKB_MASQUERADED; dst_release(skb->dst); skb->dst = NULL; if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, skb->dev)) { kfree_skb(skb); return 0; } return skb->dst->input(skb); } } #endif /* * Point into the IP datagram, just past the header. */ skb->h.raw = skb->nh.raw + iph->ihl*4; /* * Deliver to raw sockets. This is fun as to avoid copies we want to make no * surplus copies. * * RFC 1122: SHOULD pass TOS value up to the transport layer. * -> It does. And not only TOS, but all IP header. */ /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */ hash = iph->protocol & (MAX_INET_PROTOS - 1); /* * If there maybe a raw socket we must check - if not we don't care less */ if((raw_sk = raw_v4_htable[hash]) != NULL) { struct sock *sknext = NULL; struct sk_buff *skb1; raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); if(raw_sk) { /* Any raw sockets */ do { /* Find the next */ sknext = raw_v4_lookup(raw_sk->next, iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); if (iph->protocol != IPPROTO_ICMP || !icmp_filter(raw_sk, skb)) { if (sknext == NULL) break; skb1 = skb_clone(skb, GFP_ATOMIC); if(skb1) { raw_rcv(raw_sk, skb1); } } raw_sk = sknext; } while(raw_sk!=NULL); /* Here either raw_sk is the last raw socket, or NULL if * none. We deliver to the last raw socket AFTER the * protocol checks as it avoids a surplus copy. */ } } /* * skb->h.raw now points at the protocol beyond the IP header. */ for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next) { struct sk_buff *skb2; if (ipprot->protocol != iph->protocol) continue; /* * See if we need to make a copy of it. This will * only be set if more than one protocol wants it. * and then not for the last one. If there is a pending * raw delivery wait for that */ if (ipprot->copy || raw_sk) { skb2 = skb_clone(skb, GFP_ATOMIC); if(skb2==NULL) continue; } else { skb2 = skb; } flag = 1; /* * Pass on the datagram to each protocol that wants it, * based on the datagram protocol. We should really * check the protocol handler's return values here... */ ipprot->handler(skb2, ntohs(iph->tot_len) - (iph->ihl * 4)); } /* * All protocols checked. * If this packet was a broadcast, we may *not* reply to it, since that * causes (proven, grin) ARP storms and a leakage of memory (i.e. all * ICMP reply messages get queued up for transmission...) */ if(raw_sk!=NULL) /* Shift to last raw user */ { raw_rcv(raw_sk, skb); } else if (!flag) /* Free and report errors */ { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); kfree_skb(skb); } return(0); }