/* * Process Router Attention IP option */ int ip_call_ra_chain(struct sk_buff *skb) { struct ip_ra_chain *ra; u8 protocol = skb->nh.iph->protocol; struct sock *last = NULL; for (ra = ip_ra_chain; ra; ra = ra->next) { struct sock *sk = ra->sk; if (sk && sk->num == protocol) { if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { skb = ip_defrag(skb); if (skb == NULL) return 1; } if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) raw_rcv(last, skb2); } last = sk; } } if (last) { raw_rcv(last, skb); return 1; } return 0; }
/* IP input processing comes here for RAW socket delivery. * Caller owns SKB, so we must make clones. * * RFC 1122: SHOULD pass TOS value up to the transport layer. * -> It does. And not only TOS, but all IP header. */ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) { struct sock *sk; struct hlist_head *head; int delivered = 0; struct net *net; read_lock(&raw_v4_hashinfo.lock); head = &raw_v4_hashinfo.ht[hash]; if (hlist_empty(head)) goto out; net = dev_net(skb->dev); sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); while (sk) { delivered = 1; if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ if (clone) raw_rcv(sk, clone); } sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); } out: read_unlock(&raw_v4_hashinfo.lock); return delivered; }
/* IP input processing comes here for RAW socket delivery. * Caller owns SKB, so we must make clones. * * RFC 1122: SHOULD pass TOS value up to the transport layer. * -> It does. And not only TOS, but all IP header. */ void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) { struct sock *sk; struct hlist_head *head; read_lock(&raw_v4_lock); head = &raw_v4_htable[hash]; if (hlist_empty(head)) goto out; sk = __raw_v4_lookup(__sk_head(head), iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); while (sk) { if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ if (clone) raw_rcv(sk, clone); } sk = __raw_v4_lookup(sk_next(sk), iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); } out: read_unlock(&raw_v4_lock); }
/* IP input processing comes here for RAW socket delivery. * This is fun as to avoid copies we want to make no surplus * copies. * * RFC 1122: SHOULD pass TOS value up to the transport layer. * -> It does. And not only TOS, but all IP header. */ struct sock *raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) { struct sock *sk; read_lock(&raw_v4_lock); if ((sk = raw_v4_htable[hash]) == NULL) goto out; sk = __raw_v4_lookup(sk, iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); while (sk) { struct sock *sknext = __raw_v4_lookup(sk->next, iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) { struct sk_buff *clone; if (!sknext) break; clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ if (clone) raw_rcv(sk, clone); } sk = sknext; } out: if (sk) sock_hold(sk); read_unlock(&raw_v4_lock); return sk; }
/* IP input processing comes here for RAW socket delivery. * Caller owns SKB, so we must make clones. * * RFC 1122: SHOULD pass TOS value up to the transport layer. * -> It does. And not only TOS, but all IP header. */ static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) { struct sock *sk; struct hlist_head *head; int delivered = 0; struct net *net; read_lock(&raw_v4_hashinfo.lock); head = &raw_v4_hashinfo.ht[hash]; if (hlist_empty(head)) goto out; //head就是指向一个链表...该链表的结点都是protocol相同的struct sock... //然后遍历该链表来确定是否有哪个struct sock来处理该数据包啦.. net = dev_net(skb->dev); sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); while (sk) { //来到这里代表至少说有一个struct sock可以处理该数据包....因此设置返回值为1... delivered = 1; if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ if (clone) raw_rcv(sk, clone); } //可能存在多个struct sock同时处理一个数据包啦...主要还是看匹配规则(lookup).. //这里看到只是匹配协议/源地址/目的地址. sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); } out: read_unlock(&raw_v4_hashinfo.lock); return delivered; }
int ip_call_ra_chain(struct sk_buff *skb) { struct ip_ra_chain *ra; u8 protocol = ip_hdr(skb)->protocol; struct sock *last = NULL; struct net_device *dev = skb->dev; read_lock(&ip_ra_lock); for (ra = ip_ra_chain; ra; ra = ra->next) { struct sock *sk = ra->sk; /* If socket is bound to an interface, only report * the packet if it came from that interface. */ if (sk && inet_sk(sk)->num == protocol && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dev->ifindex) && sock_net(sk) == dev_net(dev)) { if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { read_unlock(&ip_ra_lock); return 1; } } if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) raw_rcv(last, skb2); } last = sk; } } if (last) { raw_rcv(last, skb); read_unlock(&ip_ra_lock); return 1; } read_unlock(&ip_ra_lock); return 0; }
/* * Process Router Attention IP option */ int ip_call_ra_chain(struct sk_buff *skb) { struct ip_ra_chain *ra; u8 protocol = skb->nh.iph->protocol; struct sock *last = NULL; read_lock(&ip_ra_lock); for (ra = ip_ra_chain; ra; ra = ra->next) { struct sock *sk = ra->sk; /* If socket is bound to an interface, only report * the packet if it came from that interface. */ if (sk && sk->num == protocol && ((sk->bound_dev_if == 0) || (sk->bound_dev_if == skb->dev->ifindex))) { if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { skb = ip_defrag(skb); if (skb == NULL) { read_unlock(&ip_ra_lock); return 1; } } if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) raw_rcv(last, skb2); } last = sk; } } if (last) { raw_rcv(last, skb); read_unlock(&ip_ra_lock); return 1; } read_unlock(&ip_ra_lock); return 0; }
/* * Process Router Attention IP option (RFC 2113) */ bool ip_call_ra_chain(struct sk_buff *skb) { struct ip_ra_chain *ra; u8 protocol = ip_hdr(skb)->protocol; struct sock *last = NULL; struct net_device *dev = skb->dev; struct net *net = dev_net(dev); for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) { struct sock *sk = ra->sk; /* If socket is bound to an interface, only report * the packet if it came from that interface. */ if (sk && inet_sk(sk)->inet_num == protocol && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dev->ifindex) && net_eq(sock_net(sk), net)) { if (ip_is_fragment(ip_hdr(skb))) { if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN)) return true; } if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) raw_rcv(last, skb2); } last = sk; } } if (last) { raw_rcv(last, skb); return true; } return false; }
static int ip_rcv(struct sk_buff *skb, struct net_device *dev) { net_device_stats_t *stats = &dev->stats; const struct net_proto *nproto; iphdr_t *iph = ip_hdr(skb); __u16 old_check; size_t ip_len; int optlen; sk_buff_t *complete_skb; /** * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. * Is the datagram acceptable? * 1. Length at least the size of an ip header * 2. Version of 4 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] * 4. Doesn't have a bogus length */ if (skb->len < dev->hdr_len + IP_MIN_HEADER_SIZE || IP_HEADER_SIZE(iph) < IP_MIN_HEADER_SIZE || skb->len < dev->hdr_len + IP_HEADER_SIZE(iph)) { DBG(printk("ip_rcv: invalid IPv4 header length\n")); stats->rx_length_errors++; skb_free(skb); return 0; /* error: invalid header length */ } if (iph->version != 4) { DBG(printk("ip_rcv: invalid IPv4 version\n")); stats->rx_err++; skb_free(skb); return 0; /* error: not ipv4 */ } old_check = iph->check; ip_set_check_field(iph); if (old_check != iph->check) { DBG(printk("ip_rcv: invalid checksum %hx(%hx)\n", ntohs(old_check), ntohs(iph->check))); stats->rx_crc_errors++; skb_free(skb); return 0; /* error: invalid crc */ } ip_len = ntohs(iph->tot_len); if (ip_len < IP_HEADER_SIZE(iph) || skb->len < dev->hdr_len + ip_len) { DBG(printk("ip_rcv: invalid IPv4 length\n")); stats->rx_length_errors++; skb_free(skb); return 0; /* error: invalid length */ } /* Setup transport layer (L4) header */ skb->h.raw = skb->nh.raw + IP_HEADER_SIZE(iph); /* Validating */ if (0 != nf_test_skb(NF_CHAIN_INPUT, NF_TARGET_ACCEPT, skb)) { DBG(printk("ip_rcv: dropped by input netfilter\n")); stats->rx_dropped++; skb_free(skb); return 0; /* error: dropped */ } /* Forwarding */ assert(skb->dev); assert(inetdev_get_by_dev(skb->dev)); if (inetdev_get_by_dev(skb->dev)->ifa_address != 0) { /** * FIXME * This check needed for BOOTP protocol * disable forwarding if interface is not set yet */ /** * Check the destination address, and if it doesn't match * any of own addresses, retransmit packet according to the routing table. */ if (!ip_is_local(iph->daddr, IP_LOCAL_BROADCAST)) { if (0 != nf_test_skb(NF_CHAIN_FORWARD, NF_TARGET_ACCEPT, skb)) { DBG(printk("ip_rcv: dropped by forward netfilter\n")); stats->rx_dropped++; skb_free(skb); return 0; /* error: dropped */ } return ip_forward(skb); } } memset(skb->cb, 0, sizeof(skb->cb)); optlen = IP_HEADER_SIZE(iph) - IP_MIN_HEADER_SIZE; if (optlen > 0) { /* NOTE : maybe it'd be better to copy skb here, * 'cause options may cause modifications * but smart people who wrote linux kernel * say that this is extremely rarely needed */ ip_options_t *opts = (ip_options_t*)(skb->cb); memset(skb->cb, 0, sizeof(skb->cb)); opts->optlen = optlen; if (ip_options_compile(skb, opts)) { DBG(printk("ip_rcv: invalid options\n")); stats->rx_err++; skb_free(skb); return 0; /* error: bad ops */ } if (ip_options_handle_srr(skb)) { DBG(printk("ip_rcv: can't handle options\n")); stats->tx_err++; skb_free(skb); return 0; /* error: can't handle ops */ } } /* It's very useful for us to have complete packet even for forwarding * (we may apply any filter, we may perform NAT etc), * but it'll break routing if different parts of a fragmented * packet will use different routes. So they can't be assembled. * See RFC 1812 for details */ if (ntohs(skb->nh.iph->frag_off) & (IP_MF | IP_OFFSET)) { if ((complete_skb = ip_defrag(skb)) == NULL) { if (skb == NULL) { return 0; /* error: */ } return 0; } else { skb = complete_skb; iph = ip_hdr(complete_skb); } } /* When a packet is received, it is passed to any raw sockets * which have been bound to its protocol or to socket with concrete protocol */ raw_rcv(skb); nproto = net_proto_lookup(ETH_P_IP, iph->proto); if (nproto != NULL) { return nproto->handle(skb); } DBG(printk("ip_rcv: unknown protocol\n")); skb_free(skb); return 0; /* error: nobody wants this packet */ }
static inline int ip_local_deliver_finish(struct sk_buff *skb) { int ihl = skb->nh.iph->ihl*4; #ifdef CONFIG_NETFILTER_DEBUG nf_debug_ip_local_deliver(skb); #endif /*CONFIG_NETFILTER_DEBUG*/ __skb_pull(skb, ihl); #ifdef CONFIG_NETFILTER /* Free reference early: we don't need it any more, and it may hold ip_conntrack module loaded indefinitely. */ nf_conntrack_put(skb->nfct); skb->nfct = NULL; #endif /*CONFIG_NETFILTER*/ /* Point into the IP datagram, just past the header. */ skb->h.raw = skb->data; { /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */ int protocol = skb->nh.iph->protocol; int hash = protocol & (MAX_INET_PROTOS - 1); struct sock *raw_sk = raw_v4_htable[hash]; struct inet_protocol *ipprot; int flag; /* If there maybe a raw socket we must check - if not we * don't care less */ if(raw_sk != NULL) raw_sk = raw_v4_input(skb, skb->nh.iph, hash); ipprot = (struct inet_protocol *) inet_protos[hash]; flag = 0; if(ipprot != NULL) { if(raw_sk == NULL && ipprot->next == NULL && ipprot->protocol == protocol) { int ret; /* Fast path... */ ret = ipprot->handler(skb); return ret; } else { flag = ip_run_ipprot(skb, skb->nh.iph, ipprot, (raw_sk != NULL)); } } /* All protocols checked. * If this packet was a broadcast, we may *not* reply to it, since that * causes (proven, grin) ARP storms and a leakage of memory (i.e. all * ICMP reply messages get queued up for transmission...) */ if(raw_sk != NULL) { /* Shift to last raw user */ raw_rcv(raw_sk, skb); sock_put(raw_sk); } else if (!flag) { /* Free and report errors */ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); kfree_skb(skb); } } return 0; }
int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) { struct iphdr *iph = skb->h.iph; struct sock *raw_sk=NULL; unsigned char hash; unsigned char flag = 0; struct inet_protocol *ipprot; int brd=IS_MYADDR; struct options * opt = NULL; int is_frag=0; __u32 daddr; #ifdef CONFIG_FIREWALL int fwres; __u16 rport; #endif #ifdef CONFIG_IP_MROUTE int mroute_pkt=0; #endif #ifdef CONFIG_NET_IPV6 /* * Intercept IPv6 frames. We dump ST-II and invalid types just below.. */ if(iph->version == 6) return ipv6_rcv(skb,dev,pt); #endif ip_statistics.IpInReceives++; /* * Tag the ip header of this packet so we can find it */ skb->ip_hdr = iph; /* * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. * RFC1122: 3.1.2.3 MUST discard a frame with invalid source address [NEEDS FIXING]. * * Is the datagram acceptable? * * 1. Length at least the size of an ip header * 2. Version of 4 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] * 4. Doesn't have a bogus length * (5. We ought to check for IP multicast addresses and undefined types.. does this matter ?) */ if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0 || skb->len < ntohs(iph->tot_len)) { ip_statistics.IpInHdrErrors++; kfree_skb(skb, FREE_WRITE); return(0); } /* * Our transport medium may have padded the buffer out. Now we know it * is IP we can trim to the true length of the frame. * Note this now means skb->len holds ntohs(iph->tot_len). */ skb_trim(skb,ntohs(iph->tot_len)); if(skb->len < (iph->ihl<<2)) { ip_statistics.IpInHdrErrors++; kfree_skb(skb, FREE_WRITE); return 0; } /* * Account for the packet (even if the packet is * not accepted by the firewall!). We do this after * the sanity checks and the additional ihl check * so we dont account garbage as we might do before. */ #ifdef CONFIG_IP_ACCT ip_fw_chk(iph,dev,NULL,ip_acct_chain,0,IP_FW_MODE_ACCT_IN); #endif /* * Try to select closest <src,dst> alias device, if any. * net_alias_dev_rx32 returns main device if it * fails to found other. * If successful, also incr. alias rx count. * * Only makes sense for unicasts - Thanks ANK. */ #ifdef CONFIG_NET_ALIAS if (skb->pkt_type == PACKET_HOST && iph->daddr != skb->dev->pa_addr && net_alias_has(skb->dev)) { skb->dev = dev = net_alias_dev_rx32(skb->dev, AF_INET, iph->saddr, iph->daddr); } #endif if (iph->ihl > 5) { skb->ip_summed = 0; if (ip_options_compile(NULL, skb)) return(0); opt = (struct options*)skb->proto_priv; #ifdef CONFIG_IP_NOSR if (opt->srr) { kfree_skb(skb, FREE_READ); return -EINVAL; } #endif } #if defined(CONFIG_IP_TRANSPARENT_PROXY) && !defined(CONFIG_IP_ALWAYS_DEFRAG) #define CONFIG_IP_ALWAYS_DEFRAG 1 #endif #ifdef CONFIG_IP_ALWAYS_DEFRAG /* * Defragment all incoming traffic before even looking at it. * If you have forwarding enabled, this makes the system a * defragmenting router. Not a common thing. * You probably DON'T want to enable this unless you have to. * You NEED to use this if you want to use transparent proxying, * otherwise, we can't vouch for your sanity. */ /* * See if the frame is fragmented. */ if(iph->frag_off) { if (iph->frag_off & htons(IP_MF)) is_frag|=IPFWD_FRAGMENT; /* * Last fragment ? */ if (iph->frag_off & htons(IP_OFFSET)) is_frag|=IPFWD_LASTFRAG; /* * Reassemble IP fragments. */ if(is_frag) { /* Defragment. Obtain the complete packet if there is one */ skb=ip_defrag(iph,skb,dev); if(skb==NULL) return 0; skb->dev = dev; iph=skb->h.iph; is_frag = 0; /* * When the reassembled packet gets forwarded, the ip * header checksum should be correct. * For better performance, this should actually only * be done in that particular case, i.e. set a flag * here and calculate the checksum in ip_forward. */ ip_send_check(iph); } } #endif /* * See if the firewall wants to dispose of the packet. */ #ifdef CONFIG_FIREWALL if ((fwres=call_in_firewall(PF_INET, skb->dev, iph, &rport))<FW_ACCEPT) { if(fwres==FW_REJECT) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev); kfree_skb(skb, FREE_WRITE); return 0; } #ifdef CONFIG_IP_TRANSPARENT_PROXY if (fwres==FW_REDIRECT) skb->redirport = rport; else #endif skb->redirport = 0; #endif #ifndef CONFIG_IP_ALWAYS_DEFRAG /* * Remember if the frame is fragmented. */ if(iph->frag_off) { if (iph->frag_off & htons(IP_MF)) is_frag|=IPFWD_FRAGMENT; /* * Last fragment ? */ if (iph->frag_off & htons(IP_OFFSET)) is_frag|=IPFWD_LASTFRAG; } #endif /* * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday. * * This is inefficient. While finding out if it is for us we could also compute * the routing table entry. This is where the great unified cache theory comes * in as and when someone implements it * * For most hosts over 99% of packets match the first conditional * and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at * function entry. */ daddr = iph->daddr; #ifdef CONFIG_IP_TRANSPARENT_PROXY /* * ip_chksock adds still more overhead for forwarded traffic... */ if ( iph->daddr == skb->dev->pa_addr || skb->redirport || (brd = ip_chk_addr(iph->daddr)) != 0 || ip_chksock(skb)) #else if ( iph->daddr == skb->dev->pa_addr || (brd = ip_chk_addr(iph->daddr)) != 0) #endif { if (opt && opt->srr) { int srrspace, srrptr; __u32 nexthop; unsigned char * optptr = ((unsigned char *)iph) + opt->srr; if (brd != IS_MYADDR || skb->pkt_type != PACKET_HOST) { kfree_skb(skb, FREE_WRITE); return 0; } for ( srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4 ) { int brd2; if (srrptr + 3 > srrspace) { icmp_send(skb, ICMP_PARAMETERPROB, 0, opt->srr+2, skb->dev); kfree_skb(skb, FREE_WRITE); return 0; } memcpy(&nexthop, &optptr[srrptr-1], 4); if ((brd2 = ip_chk_addr(nexthop)) == 0) break; if (brd2 != IS_MYADDR) { /* * ANK: should we implement weak tunneling of multicasts? * Are they obsolete? DVMRP specs (RFC-1075) is old enough... * [They are obsolete] */ kfree_skb(skb, FREE_WRITE); return -EINVAL; } memcpy(&daddr, &optptr[srrptr-1], 4); } if (srrptr <= srrspace) { opt->srr_is_hit = 1; opt->is_changed = 1; if (sysctl_ip_forward) { if (ip_forward(skb, dev, is_frag, nexthop)) kfree_skb(skb, FREE_WRITE); } else { ip_statistics.IpInAddrErrors++; kfree_skb(skb, FREE_WRITE); } return 0; } } #ifdef CONFIG_IP_MULTICAST if(!(dev->flags&IFF_ALLMULTI) && brd==IS_MULTICAST && iph->daddr!=IGMP_ALL_HOSTS && !(dev->flags&IFF_LOOPBACK)) { /* * Check it is for one of our groups */ struct ip_mc_list *ip_mc=dev->ip_mc_list; do { if(ip_mc==NULL) { kfree_skb(skb, FREE_WRITE); return 0; } if(ip_mc->multiaddr==iph->daddr) break; ip_mc=ip_mc->next; } while(1); } #endif #ifndef CONFIG_IP_ALWAYS_DEFRAG /* * Reassemble IP fragments. */ if(is_frag) { /* Defragment. Obtain the complete packet if there is one */ skb=ip_defrag(iph,skb,dev); if(skb==NULL) return 0; skb->dev = dev; iph=skb->h.iph; } #endif #ifdef CONFIG_IP_MASQUERADE /* * Do we need to de-masquerade this packet? */ { int ret = ip_fw_demasquerade(&skb,dev); if (ret < 0) { kfree_skb(skb, FREE_WRITE); return 0; } if (ret) { struct iphdr *iph=skb->h.iph; if (ip_forward(skb, dev, IPFWD_MASQUERADED, iph->daddr)) kfree_skb(skb, FREE_WRITE); return 0; } } #endif /* * Point into the IP datagram, just past the header. */ skb->ip_hdr = iph; skb->h.raw += iph->ihl*4; #ifdef CONFIG_IP_MROUTE /* * Check the state on multicast routing (multicast and not 224.0.0.z) */ if(brd==IS_MULTICAST && (iph->daddr&htonl(0xFFFFFF00))!=htonl(0xE0000000)) mroute_pkt=1; #endif /* * Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies. * * RFC 1122: SHOULD pass TOS value up to the transport layer. */ /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */ hash = iph->protocol & (MAX_INET_PROTOS - 1); /* * If there maybe a raw socket we must check - if not we don't care less */ if((raw_sk = raw_v4_htable[hash]) != NULL) { struct sock *sknext = NULL; struct sk_buff *skb1; raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr, iph->daddr); if(raw_sk) { /* Any raw sockets */ do { /* Find the next */ sknext = raw_v4_lookup(raw_sk->next, iph->protocol, iph->saddr, iph->daddr); if(sknext) skb1 = skb_clone(skb, GFP_ATOMIC); else break; /* One pending raw socket left */ if(skb1) raw_rcv(raw_sk, skb1, dev, iph->saddr,daddr); raw_sk = sknext; } while(raw_sk!=NULL); /* * Here either raw_sk is the last raw socket, or NULL if none */ /* * We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy */ } } /* * skb->h.raw now points at the protocol beyond the IP header. */ for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next) { struct sk_buff *skb2; if (ipprot->protocol != iph->protocol) continue; /* * See if we need to make a copy of it. This will * only be set if more than one protocol wants it. * and then not for the last one. If there is a pending * raw delivery wait for that */ #ifdef CONFIG_IP_MROUTE if (ipprot->copy || raw_sk || mroute_pkt) #else if (ipprot->copy || raw_sk) #endif { skb2 = skb_clone(skb, GFP_ATOMIC); if(skb2==NULL) continue; } else { skb2 = skb; } flag = 1; /* * Pass on the datagram to each protocol that wants it, * based on the datagram protocol. We should really * check the protocol handler's return values here... */ ipprot->handler(skb2, dev, opt, daddr, (ntohs(iph->tot_len) - (iph->ihl * 4)), iph->saddr, 0, ipprot); } /* * All protocols checked. * If this packet was a broadcast, we may *not* reply to it, since that * causes (proven, grin) ARP storms and a leakage of memory (i.e. all * ICMP reply messages get queued up for transmission...) */ #ifdef CONFIG_IP_MROUTE /* * Forward the last copy to the multicast router. If * there is a pending raw delivery however make a copy * and forward that. */ if(mroute_pkt) { flag=1; if(raw_sk==NULL) ipmr_forward(skb, is_frag); else { struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC); if(skb2) { skb2->free=1; ipmr_forward(skb2, is_frag); } } } #endif if(raw_sk!=NULL) /* Shift to last raw user */ raw_rcv(raw_sk, skb, dev, iph->saddr, daddr); else if (!flag) /* Free and report errors */ { if (brd != IS_BROADCAST && brd!=IS_MULTICAST) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0, dev); kfree_skb(skb, FREE_WRITE); } return(0); } /* * Do any unicast IP forwarding required. */ /* * Don't forward multicast or broadcast frames. */ if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST) { kfree_skb(skb,FREE_WRITE); return 0; } /* * The packet is for another target. Forward the frame */ if (sysctl_ip_forward) { if (opt && opt->is_strictroute) { icmp_send(skb, ICMP_PARAMETERPROB, 0, 16, skb->dev); kfree_skb(skb, FREE_WRITE); return -1; } if (ip_forward(skb, dev, is_frag, iph->daddr)) kfree_skb(skb, FREE_WRITE); } else { /* printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n", iph->saddr,iph->daddr);*/ ip_statistics.IpInAddrErrors++; kfree_skb(skb, FREE_WRITE); } return(0); }
/* * Deliver IP Packets to the higher protocol layers. */ int ip_local_deliver(struct sk_buff *skb) { struct iphdr *iph = skb->nh.iph; struct inet_protocol *ipprot; struct sock *raw_sk=NULL; unsigned char hash; int flag = 0; #ifndef CONFIG_IP_ALWAYS_DEFRAG /* * Reassemble IP fragments. */ if (iph->frag_off & htons(IP_MF|IP_OFFSET)) { skb = ip_defrag(skb); if (!skb) return 0; iph = skb->nh.iph; } #endif #ifdef CONFIG_IP_MASQUERADE /* * Do we need to de-masquerade this packet? */ { int ret = ip_fw_demasquerade(&skb); if (ret < 0) { kfree_skb(skb); return 0; } if (ret) { iph=skb->nh.iph; IPCB(skb)->flags |= IPSKB_MASQUERADED; dst_release(skb->dst); skb->dst = NULL; if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, skb->dev)) { kfree_skb(skb); return 0; } return skb->dst->input(skb); } } #endif /* * Point into the IP datagram, just past the header. */ skb->h.raw = skb->nh.raw + iph->ihl*4; /* * Deliver to raw sockets. This is fun as to avoid copies we want to make no * surplus copies. * * RFC 1122: SHOULD pass TOS value up to the transport layer. * -> It does. And not only TOS, but all IP header. */ /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */ hash = iph->protocol & (MAX_INET_PROTOS - 1); /* * If there maybe a raw socket we must check - if not we don't care less */ if((raw_sk = raw_v4_htable[hash]) != NULL) { struct sock *sknext = NULL; struct sk_buff *skb1; raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); if(raw_sk) { /* Any raw sockets */ do { /* Find the next */ sknext = raw_v4_lookup(raw_sk->next, iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); if (iph->protocol != IPPROTO_ICMP || !icmp_filter(raw_sk, skb)) { if (sknext == NULL) break; skb1 = skb_clone(skb, GFP_ATOMIC); if(skb1) { raw_rcv(raw_sk, skb1); } } raw_sk = sknext; } while(raw_sk!=NULL); /* Here either raw_sk is the last raw socket, or NULL if * none. We deliver to the last raw socket AFTER the * protocol checks as it avoids a surplus copy. */ } } /* * skb->h.raw now points at the protocol beyond the IP header. */ for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next) { struct sk_buff *skb2; if (ipprot->protocol != iph->protocol) continue; /* * See if we need to make a copy of it. This will * only be set if more than one protocol wants it. * and then not for the last one. If there is a pending * raw delivery wait for that */ if (ipprot->copy || raw_sk) { skb2 = skb_clone(skb, GFP_ATOMIC); if(skb2==NULL) continue; } else { skb2 = skb; } flag = 1; /* * Pass on the datagram to each protocol that wants it, * based on the datagram protocol. We should really * check the protocol handler's return values here... */ ipprot->handler(skb2, ntohs(iph->tot_len) - (iph->ihl * 4)); } /* * All protocols checked. * If this packet was a broadcast, we may *not* reply to it, since that * causes (proven, grin) ARP storms and a leakage of memory (i.e. all * ICMP reply messages get queued up for transmission...) */ if(raw_sk!=NULL) /* Shift to last raw user */ { raw_rcv(raw_sk, skb); } else if (!flag) /* Free and report errors */ { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); kfree_skb(skb); } return(0); }