int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) { struct iphdr *iph = skb->h.iph; struct sock *raw_sk=NULL; unsigned char hash; unsigned char flag = 0; struct inet_protocol *ipprot; int brd=IS_MYADDR; struct options * opt = NULL; int is_frag=0; __u32 daddr; #ifdef CONFIG_FIREWALL int fwres; __u16 rport; #endif #ifdef CONFIG_IP_MROUTE int mroute_pkt=0; #endif #ifdef CONFIG_NET_IPV6 /* * Intercept IPv6 frames. We dump ST-II and invalid types just below.. */ if(iph->version == 6) return ipv6_rcv(skb,dev,pt); #endif ip_statistics.IpInReceives++; /* * Tag the ip header of this packet so we can find it */ skb->ip_hdr = iph; /* * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. * RFC1122: 3.1.2.3 MUST discard a frame with invalid source address [NEEDS FIXING]. * * Is the datagram acceptable? * * 1. Length at least the size of an ip header * 2. Version of 4 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] * 4. Doesn't have a bogus length * (5. We ought to check for IP multicast addresses and undefined types.. does this matter ?) */ if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0 || skb->len < ntohs(iph->tot_len)) { ip_statistics.IpInHdrErrors++; kfree_skb(skb, FREE_WRITE); return(0); } /* * Our transport medium may have padded the buffer out. Now we know it * is IP we can trim to the true length of the frame. * Note this now means skb->len holds ntohs(iph->tot_len). */ skb_trim(skb,ntohs(iph->tot_len)); if(skb->len < (iph->ihl<<2)) { ip_statistics.IpInHdrErrors++; kfree_skb(skb, FREE_WRITE); return 0; } /* * Account for the packet (even if the packet is * not accepted by the firewall!). We do this after * the sanity checks and the additional ihl check * so we dont account garbage as we might do before. */ #ifdef CONFIG_IP_ACCT ip_fw_chk(iph,dev,NULL,ip_acct_chain,0,IP_FW_MODE_ACCT_IN); #endif /* * Try to select closest <src,dst> alias device, if any. * net_alias_dev_rx32 returns main device if it * fails to found other. * If successful, also incr. alias rx count. * * Only makes sense for unicasts - Thanks ANK. */ #ifdef CONFIG_NET_ALIAS if (skb->pkt_type == PACKET_HOST && iph->daddr != skb->dev->pa_addr && net_alias_has(skb->dev)) { skb->dev = dev = net_alias_dev_rx32(skb->dev, AF_INET, iph->saddr, iph->daddr); } #endif if (iph->ihl > 5) { skb->ip_summed = 0; if (ip_options_compile(NULL, skb)) return(0); opt = (struct options*)skb->proto_priv; #ifdef CONFIG_IP_NOSR if (opt->srr) { kfree_skb(skb, FREE_READ); return -EINVAL; } #endif } #if defined(CONFIG_IP_TRANSPARENT_PROXY) && !defined(CONFIG_IP_ALWAYS_DEFRAG) #define CONFIG_IP_ALWAYS_DEFRAG 1 #endif #ifdef CONFIG_IP_ALWAYS_DEFRAG /* * Defragment all incoming traffic before even looking at it. * If you have forwarding enabled, this makes the system a * defragmenting router. Not a common thing. * You probably DON'T want to enable this unless you have to. * You NEED to use this if you want to use transparent proxying, * otherwise, we can't vouch for your sanity. */ /* * See if the frame is fragmented. */ if(iph->frag_off) { if (iph->frag_off & htons(IP_MF)) is_frag|=IPFWD_FRAGMENT; /* * Last fragment ? */ if (iph->frag_off & htons(IP_OFFSET)) is_frag|=IPFWD_LASTFRAG; /* * Reassemble IP fragments. */ if(is_frag) { /* Defragment. Obtain the complete packet if there is one */ skb=ip_defrag(iph,skb,dev); if(skb==NULL) return 0; skb->dev = dev; iph=skb->h.iph; is_frag = 0; /* * When the reassembled packet gets forwarded, the ip * header checksum should be correct. * For better performance, this should actually only * be done in that particular case, i.e. set a flag * here and calculate the checksum in ip_forward. */ ip_send_check(iph); } } #endif /* * See if the firewall wants to dispose of the packet. */ #ifdef CONFIG_FIREWALL if ((fwres=call_in_firewall(PF_INET, skb->dev, iph, &rport))<FW_ACCEPT) { if(fwres==FW_REJECT) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev); kfree_skb(skb, FREE_WRITE); return 0; } #ifdef CONFIG_IP_TRANSPARENT_PROXY if (fwres==FW_REDIRECT) skb->redirport = rport; else #endif skb->redirport = 0; #endif #ifndef CONFIG_IP_ALWAYS_DEFRAG /* * Remember if the frame is fragmented. */ if(iph->frag_off) { if (iph->frag_off & htons(IP_MF)) is_frag|=IPFWD_FRAGMENT; /* * Last fragment ? */ if (iph->frag_off & htons(IP_OFFSET)) is_frag|=IPFWD_LASTFRAG; } #endif /* * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday. * * This is inefficient. While finding out if it is for us we could also compute * the routing table entry. This is where the great unified cache theory comes * in as and when someone implements it * * For most hosts over 99% of packets match the first conditional * and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at * function entry. */ daddr = iph->daddr; #ifdef CONFIG_IP_TRANSPARENT_PROXY /* * ip_chksock adds still more overhead for forwarded traffic... */ if ( iph->daddr == skb->dev->pa_addr || skb->redirport || (brd = ip_chk_addr(iph->daddr)) != 0 || ip_chksock(skb)) #else if ( iph->daddr == skb->dev->pa_addr || (brd = ip_chk_addr(iph->daddr)) != 0) #endif { if (opt && opt->srr) { int srrspace, srrptr; __u32 nexthop; unsigned char * optptr = ((unsigned char *)iph) + opt->srr; if (brd != IS_MYADDR || skb->pkt_type != PACKET_HOST) { kfree_skb(skb, FREE_WRITE); return 0; } for ( srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4 ) { int brd2; if (srrptr + 3 > srrspace) { icmp_send(skb, ICMP_PARAMETERPROB, 0, opt->srr+2, skb->dev); kfree_skb(skb, FREE_WRITE); return 0; } memcpy(&nexthop, &optptr[srrptr-1], 4); if ((brd2 = ip_chk_addr(nexthop)) == 0) break; if (brd2 != IS_MYADDR) { /* * ANK: should we implement weak tunneling of multicasts? * Are they obsolete? DVMRP specs (RFC-1075) is old enough... * [They are obsolete] */ kfree_skb(skb, FREE_WRITE); return -EINVAL; } memcpy(&daddr, &optptr[srrptr-1], 4); } if (srrptr <= srrspace) { opt->srr_is_hit = 1; opt->is_changed = 1; if (sysctl_ip_forward) { if (ip_forward(skb, dev, is_frag, nexthop)) kfree_skb(skb, FREE_WRITE); } else { ip_statistics.IpInAddrErrors++; kfree_skb(skb, FREE_WRITE); } return 0; } } #ifdef CONFIG_IP_MULTICAST if(!(dev->flags&IFF_ALLMULTI) && brd==IS_MULTICAST && iph->daddr!=IGMP_ALL_HOSTS && !(dev->flags&IFF_LOOPBACK)) { /* * Check it is for one of our groups */ struct ip_mc_list *ip_mc=dev->ip_mc_list; do { if(ip_mc==NULL) { kfree_skb(skb, FREE_WRITE); return 0; } if(ip_mc->multiaddr==iph->daddr) break; ip_mc=ip_mc->next; } while(1); } #endif #ifndef CONFIG_IP_ALWAYS_DEFRAG /* * Reassemble IP fragments. */ if(is_frag) { /* Defragment. Obtain the complete packet if there is one */ skb=ip_defrag(iph,skb,dev); if(skb==NULL) return 0; skb->dev = dev; iph=skb->h.iph; } #endif #ifdef CONFIG_IP_MASQUERADE /* * Do we need to de-masquerade this packet? */ { int ret = ip_fw_demasquerade(&skb,dev); if (ret < 0) { kfree_skb(skb, FREE_WRITE); return 0; } if (ret) { struct iphdr *iph=skb->h.iph; if (ip_forward(skb, dev, IPFWD_MASQUERADED, iph->daddr)) kfree_skb(skb, FREE_WRITE); return 0; } } #endif /* * Point into the IP datagram, just past the header. */ skb->ip_hdr = iph; skb->h.raw += iph->ihl*4; #ifdef CONFIG_IP_MROUTE /* * Check the state on multicast routing (multicast and not 224.0.0.z) */ if(brd==IS_MULTICAST && (iph->daddr&htonl(0xFFFFFF00))!=htonl(0xE0000000)) mroute_pkt=1; #endif /* * Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies. * * RFC 1122: SHOULD pass TOS value up to the transport layer. */ /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */ hash = iph->protocol & (MAX_INET_PROTOS - 1); /* * If there maybe a raw socket we must check - if not we don't care less */ if((raw_sk = raw_v4_htable[hash]) != NULL) { struct sock *sknext = NULL; struct sk_buff *skb1; raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr, iph->daddr); if(raw_sk) { /* Any raw sockets */ do { /* Find the next */ sknext = raw_v4_lookup(raw_sk->next, iph->protocol, iph->saddr, iph->daddr); if(sknext) skb1 = skb_clone(skb, GFP_ATOMIC); else break; /* One pending raw socket left */ if(skb1) raw_rcv(raw_sk, skb1, dev, iph->saddr,daddr); raw_sk = sknext; } while(raw_sk!=NULL); /* * Here either raw_sk is the last raw socket, or NULL if none */ /* * We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy */ } } /* * skb->h.raw now points at the protocol beyond the IP header. */ for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next) { struct sk_buff *skb2; if (ipprot->protocol != iph->protocol) continue; /* * See if we need to make a copy of it. This will * only be set if more than one protocol wants it. * and then not for the last one. If there is a pending * raw delivery wait for that */ #ifdef CONFIG_IP_MROUTE if (ipprot->copy || raw_sk || mroute_pkt) #else if (ipprot->copy || raw_sk) #endif { skb2 = skb_clone(skb, GFP_ATOMIC); if(skb2==NULL) continue; } else { skb2 = skb; } flag = 1; /* * Pass on the datagram to each protocol that wants it, * based on the datagram protocol. We should really * check the protocol handler's return values here... */ ipprot->handler(skb2, dev, opt, daddr, (ntohs(iph->tot_len) - (iph->ihl * 4)), iph->saddr, 0, ipprot); } /* * All protocols checked. * If this packet was a broadcast, we may *not* reply to it, since that * causes (proven, grin) ARP storms and a leakage of memory (i.e. all * ICMP reply messages get queued up for transmission...) */ #ifdef CONFIG_IP_MROUTE /* * Forward the last copy to the multicast router. If * there is a pending raw delivery however make a copy * and forward that. */ if(mroute_pkt) { flag=1; if(raw_sk==NULL) ipmr_forward(skb, is_frag); else { struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC); if(skb2) { skb2->free=1; ipmr_forward(skb2, is_frag); } } } #endif if(raw_sk!=NULL) /* Shift to last raw user */ raw_rcv(raw_sk, skb, dev, iph->saddr, daddr); else if (!flag) /* Free and report errors */ { if (brd != IS_BROADCAST && brd!=IS_MULTICAST) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0, dev); kfree_skb(skb, FREE_WRITE); } return(0); } /* * Do any unicast IP forwarding required. */ /* * Don't forward multicast or broadcast frames. */ if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST) { kfree_skb(skb,FREE_WRITE); return 0; } /* * The packet is for another target. Forward the frame */ if (sysctl_ip_forward) { if (opt && opt->is_strictroute) { icmp_send(skb, ICMP_PARAMETERPROB, 0, 16, skb->dev); kfree_skb(skb, FREE_WRITE); return -1; } if (ip_forward(skb, dev, is_frag, iph->daddr)) kfree_skb(skb, FREE_WRITE); } else { /* printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n", iph->saddr,iph->daddr);*/ ip_statistics.IpInAddrErrors++; kfree_skb(skb, FREE_WRITE); } return(0); }
int ip_fw_ctl(int stage, void *m, int len) { int cmd, fwtype; cmd = stage & IP_FW_COMMAND; fwtype = (stage & IP_FW_TYPE) >> IP_FW_SHIFT; if ( cmd == IP_FW_FLUSH ) { free_fw_chain(chains[fwtype]); return(0); } if ( cmd == IP_FW_ZERO ) { zero_fw_chain(*chains[fwtype]); return(0); } if ( cmd == IP_FW_POLICY ) { int *tmp_policy_ptr; tmp_policy_ptr=(int *)m; *policies[fwtype] = *tmp_policy_ptr; return 0; } if ( cmd == IP_FW_CHECK ) { struct device *viadev; struct ip_fwpkt *ipfwp; struct iphdr *ip; if ( len != sizeof(struct ip_fwpkt) ) { #ifdef DEBUG_IP_FIREWALL printk("ip_fw_ctl: length=%d, expected %d\n", len, sizeof(struct ip_fwpkt)); #endif return( EINVAL ); } ipfwp = (struct ip_fwpkt *)m; ip = &(ipfwp->fwp_iph); if ( !(viadev = dev_get(ipfwp->fwp_vianame)) ) { #ifdef DEBUG_IP_FIREWALL printk("ip_fw_ctl: invalid device \"%s\"\n", ipfwp->fwp_vianame); #endif return(EINVAL); } else if ( viadev->pa_addr != ipfwp->fwp_via.s_addr ) { #ifdef DEBUG_IP_FIREWALL printk("ip_fw_ctl: device \"%s\" has another IP address\n", ipfwp->fwp_vianame); #endif return(EINVAL); } else if ( ip->ihl != sizeof(struct iphdr) / sizeof(int)) { #ifdef DEBUG_IP_FIREWALL printk("ip_fw_ctl: ip->ihl=%d, want %d\n",ip->ihl, sizeof(struct iphdr)/sizeof(int)); #endif return(EINVAL); } switch (ip_fw_chk(ip, viadev, NULL, *chains[fwtype], *policies[fwtype], IP_FW_MODE_CHK)) { case FW_ACCEPT: return(0); case FW_REDIRECT: return(ECONNABORTED); case FW_MASQUERADE: return(ECONNRESET); case FW_REJECT: return(ECONNREFUSED); default: /* FW_BLOCK */ return(ETIMEDOUT); } } if ( cmd == IP_FW_MASQ_TIMEOUTS ) { #ifdef CONFIG_IP_MASQUERADE struct ip_fw_masq *masq; if ( len != sizeof(struct ip_fw_masq) ) { #ifdef DEBUG_IP_FIREWALL printk("ip_fw_ctl (masq): length %d, expected %d\n", len, sizeof(struct ip_fw_masq)); #endif return( EINVAL ); } masq = (struct ip_fw_masq *) m; if (masq->tcp_timeout) { ip_masq_expire->tcp_timeout = masq->tcp_timeout; } if (masq->tcp_fin_timeout) { ip_masq_expire->tcp_fin_timeout = masq->tcp_fin_timeout; } if (masq->udp_timeout) { ip_masq_expire->udp_timeout = masq->udp_timeout; } return 0; #else return( EINVAL ); #endif } /* * Here we really working hard-adding new elements * to blocking/forwarding chains or deleting 'em */ if ( cmd == IP_FW_INSERT || cmd == IP_FW_APPEND || cmd == IP_FW_DELETE ) { struct ip_fw *frwl; int fwtype; frwl=check_ipfw_struct(m,len); if (frwl==NULL) return (EINVAL); fwtype = (stage & IP_FW_TYPE) >> IP_FW_SHIFT; switch (cmd) { case IP_FW_INSERT: return(insert_in_chain(chains[fwtype],frwl,len)); case IP_FW_APPEND: return(append_to_chain(chains[fwtype],frwl,len)); case IP_FW_DELETE: return(del_from_chain(chains[fwtype],frwl)); default: /* * Should be panic but... (Why are BSD people panic obsessed ??) */ #ifdef DEBUG_IP_FIREWALL printk("ip_fw_ctl: unknown request %d\n",stage); #endif return(EINVAL); } } #ifdef DEBUG_IP_FIREWALL printk("ip_fw_ctl: unknown request %d\n",stage); #endif return(EINVAL); }
void ip_queue_xmit(struct sock *sk, struct device *dev, struct sk_buff *skb, int free) { unsigned int tot_len; struct iphdr *iph; IS_SKB(skb); /* * Do some book-keeping in the packet for later */ skb->sk = sk; skb->dev = dev; skb->when = jiffies; /* * Find the IP header and set the length. This is bad * but once we get the skb data handling code in the * hardware will push its header sensibly and we will * set skb->ip_hdr to avoid this mess and the fixed * header length problem */ iph = skb->ip_hdr; tot_len = skb->len - (((unsigned char *)iph) - skb->data); iph->tot_len = htons(tot_len); switch (free) { /* No reassigning numbers to fragments... */ default: free = 1; break; case 0: add_to_send_queue(sk, skb); /* fall through */ case 1: iph->id = htons(ip_id_count++); } skb->free = free; /* Sanity check */ if (dev == NULL) goto no_device; #ifdef CONFIG_FIREWALL if (call_out_firewall(PF_INET, skb->dev, iph, NULL) < FW_ACCEPT) goto out; #endif /* * Do we need to fragment. Again this is inefficient. * We need to somehow lock the original buffer and use * bits of it. */ if (tot_len > dev->mtu) goto fragment; /* * Add an IP checksum */ ip_send_check(iph); /* * More debugging. You cannot queue a packet already on a list * Spot this and moan loudly. */ if (skb->next != NULL) { NETDEBUG(printk("ip_queue_xmit: next != NULL\n")); skb_unlink(skb); } /* * If the indicated interface is up and running, send the packet. */ ip_statistics.IpOutRequests++; #ifdef CONFIG_IP_ACCT ip_fw_chk(iph,dev,NULL,ip_acct_chain,0,IP_FW_MODE_ACCT_OUT); #endif #ifdef CONFIG_IP_MULTICAST /* * Multicasts are looped back for other local users */ if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK)) { if(sk==NULL || sk->ip_mc_loop) { if(iph->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI)) { ip_loopback(dev,skb); } else { struct ip_mc_list *imc=dev->ip_mc_list; while(imc!=NULL) { if(imc->multiaddr==iph->daddr) { ip_loopback(dev,skb); break; } imc=imc->next; } } } /* Multicasts with ttl 0 must not go beyond the host */ if (iph->ttl==0) goto out; } #endif if ((dev->flags & IFF_BROADCAST) && !(dev->flags & IFF_LOOPBACK) && (iph->daddr==dev->pa_brdaddr || iph->daddr==0xFFFFFFFF)) ip_loopback(dev,skb); if (dev->flags & IFF_UP) { /* * If we have an owner use its priority setting, * otherwise use NORMAL */ int priority = SOPRI_NORMAL; if (sk) priority = sk->priority; dev_queue_xmit(skb, dev, priority); return; } if(sk) sk->err = ENETDOWN; ip_statistics.IpOutDiscards++; out: if (free) kfree_skb(skb, FREE_WRITE); return; no_device: NETDEBUG(printk("IP: ip_queue_xmit dev = NULL\n")); goto out; fragment: ip_fragment(sk,skb,dev,0); goto out; }