static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { struct sock *sk=icmp_socket->sk; struct ipcm_cookie ipc; struct rtable *rt = (struct rtable*)skb->dst; u32 daddr; if (ip_options_echo(&icmp_param->replyopts, skb)) return; icmp_param->icmph.checksum=0; icmp_param->csum=0; icmp_out_count(icmp_param->icmph.type); sk->ip_tos = skb->nh.iph->tos; daddr = ipc.addr = rt->rt_src; ipc.opt = &icmp_param->replyopts; if (ipc.opt->srr) daddr = icmp_param->replyopts.faddr; if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0)) return; ip_build_xmit(sk, icmp_glue_bits, icmp_param, icmp_param->data_len+sizeof(struct icmphdr), &ipc, rt, MSG_DONTWAIT); ip_rt_put(rt); }
static void icmp_build_xmit(struct icmp_bxm *icmp_param, __u32 saddr, __u32 daddr, __u8 tos) { struct sock *sk=icmp_socket.data; icmp_param->icmph.checksum=0; icmp_param->csum=0; icmp_out_count(icmp_param->icmph.type); sk->ip_tos = tos; ip_build_xmit(sk, icmp_glue_bits, icmp_param, icmp_param->data_len+sizeof(struct icmphdr), daddr, saddr, &icmp_param->replyopts, 0, IPPROTO_ICMP, 1); }
static int raw_sendto(struct sock *sk, const unsigned char *from, int len, int noblock, unsigned flags, struct sockaddr_in *usin, int addr_len) { int err; struct sockaddr_in sin; /* * Check the flags. Only MSG_DONTROUTE is permitted. */ if (flags & MSG_OOB) /* Mirror BSD error message compatibility */ return -EOPNOTSUPP; if (flags & ~MSG_DONTROUTE) return(-EINVAL); /* * Get and verify the address. */ if (usin) { if (addr_len < sizeof(sin)) return(-EINVAL); memcpy(&sin, usin, sizeof(sin)); if (sin.sin_family && sin.sin_family != AF_INET) return(-EINVAL); } else { if (sk->state != TCP_ESTABLISHED) return(-EINVAL); sin.sin_family = AF_INET; sin.sin_port = sk->num; sin.sin_addr.s_addr = sk->daddr; } if (sin.sin_port == 0) sin.sin_port = sk->num; if (sin.sin_addr.s_addr == INADDR_ANY) sin.sin_addr.s_addr = ip_my_addr(); /* * BSD raw sockets forget to check SO_BROADCAST .... */ if (!sk->bsdism && sk->broadcast == 0 && ip_chk_addr(sin.sin_addr.s_addr)==IS_BROADCAST) return -EACCES; if(sk->ip_hdrincl) { if(len>65535) return -EMSGSIZE; err=ip_build_xmit(sk, raw_getrawfrag, from, len, sin.sin_addr.s_addr, 0, sk->opt, flags, sin.sin_port, noblock); } else { if(len>65535-sizeof(struct iphdr)) return -EMSGSIZE; err=ip_build_xmit(sk, raw_getfrag, from, len, sin.sin_addr.s_addr, 0, sk->opt, flags, sin.sin_port, noblock); } return err<0?err:len; }
/* Shared by v4/v6 tcp. */ unsigned short udp_good_socknum(void) { int result; static int start = 0; int i, best, best_size_so_far; SOCKHASH_LOCK(); /* Select initial not-so-random "best" */ best = PROT_SOCK + 1 + (start & 1023); best_size_so_far = 32767; /* "big" num */ result = best; for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { struct sock *sk; int size; sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)]; /* No clashes - take it */ if (!sk) goto out; /* Is this one better than our best so far? */ size = 0; do { if(++size >= best_size_so_far) goto next; } while((sk = sk->next) != NULL); best_size_so_far = size; best = result; next: } while (udp_lport_inuse(best)) best += UDP_HTABLE_SIZE; result = best; out: start = result; SOCKHASH_UNLOCK(); return result; } static void udp_v4_hash(struct sock *sk) { struct sock **skp; int num = sk->num; num &= (UDP_HTABLE_SIZE - 1); skp = &udp_hash[num]; SOCKHASH_LOCK(); sk->next = *skp; *skp = sk; sk->hashent = num; SOCKHASH_UNLOCK(); } static void udp_v4_unhash(struct sock *sk) { struct sock **skp; int num = sk->num; num &= (UDP_HTABLE_SIZE - 1); skp = &udp_hash[num]; SOCKHASH_LOCK(); while(*skp != NULL) { if(*skp == sk) { *skp = sk->next; break; } skp = &((*skp)->next); } SOCKHASH_UNLOCK(); } static void udp_v4_rehash(struct sock *sk) { struct sock **skp; int num = sk->num; int oldnum = sk->hashent; num &= (UDP_HTABLE_SIZE - 1); skp = &udp_hash[oldnum]; SOCKHASH_LOCK(); while(*skp != NULL) { if(*skp == sk) { *skp = sk->next; break; } skp = &((*skp)->next); } sk->next = udp_hash[num]; udp_hash[num] = sk; sk->hashent = num; SOCKHASH_UNLOCK(); } /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, struct device *dev) { struct sock *sk, *result = NULL; unsigned short hnum = ntohs(dport); int badness = -1; for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) { if((sk->num == hnum) && !(sk->dead && (sk->state == TCP_CLOSE))) { int score = 0; if(sk->rcv_saddr) { if(sk->rcv_saddr != daddr) continue; score++; } if(sk->daddr) { if(sk->daddr != saddr) continue; score++; } if(sk->dummy_th.dest) { if(sk->dummy_th.dest != sport) continue; score++; } /* If this socket is bound to a particular interface, * did the packet come in on it? */ if (sk->bound_device) { if (dev == sk->bound_device) score++; else continue; /* mismatch--not this sock */ } if(score == 4) { result = sk; break; } else if(score > badness) { result = sk; badness = score; } } } return result; } #ifdef CONFIG_IP_TRANSPARENT_PROXY struct sock *udp_v4_proxy_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, u32 paddr, u16 rport, struct device *dev) { struct sock *hh[3], *sk, *result = NULL; int i; int badness = -1; unsigned short hnum = ntohs(dport); unsigned short hpnum = ntohs(rport); SOCKHASH_LOCK(); hh[0] = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; hh[1] = udp_hash[hpnum & (UDP_HTABLE_SIZE - 1)]; for (i = 0; i < 2; i++) { for(sk = hh[i]; sk != NULL; sk = sk->next) { if(sk->num == hnum || sk->num == hpnum) { int score = 0; if(sk->dead && (sk->state == TCP_CLOSE)) continue; if(sk->rcv_saddr) { if((sk->num != hpnum || sk->rcv_saddr != paddr) && (sk->num != hnum || sk->rcv_saddr != daddr)) continue; score++; } if(sk->daddr) { if(sk->daddr != saddr) continue; score++; } if(sk->dummy_th.dest) { if(sk->dummy_th.dest != sport) continue; score++; } /* If this socket is bound to a particular interface, * did the packet come in on it? */ if(sk->bound_device) { if (sk->bound_device != dev) continue; score++; } if(score == 4 && sk->num == hnum) { result = sk; break; } else if(score > badness && (sk->num == hpnum || sk->rcv_saddr)) { result = sk; badness = score; } } } } SOCKHASH_UNLOCK(); return result; } #endif static inline struct sock *udp_v4_mcast_next(struct sock *sk, unsigned short num, unsigned long raddr, unsigned short rnum, unsigned long laddr, struct device *dev) { struct sock *s = sk; unsigned short hnum = ntohs(num); for(; s; s = s->next) { if ((s->num != hnum) || (s->dead && (s->state == TCP_CLOSE)) || (s->daddr && s->daddr!=raddr) || (s->dummy_th.dest != rnum && s->dummy_th.dest != 0) || ((s->bound_device) && (s->bound_device!=dev)) || (s->rcv_saddr && s->rcv_saddr != laddr)) continue; break; } return s; } #define min(a,b) ((a)<(b)?(a):(b)) /* * This routine is called by the ICMP module when it gets some * sort of error condition. If err < 0 then the socket should * be closed and the error returned to the user. If err > 0 * it's just the icmp type << 8 | icmp code. * Header points to the ip header of the error packet. We move * on past this. Then (as it used to claim before adjustment) * header points to the first 8 bytes of the udp header. We need * to find the appropriate port. */ void udp_err(int type, int code, unsigned char *header, __u32 daddr, __u32 saddr, struct inet_protocol *protocol, int len) { struct udphdr *uh; struct sock *sk; /* * Find the 8 bytes of post IP header ICMP included for us */ if(len<sizeof(struct udphdr)) return; uh = (struct udphdr *)header; sk = udp_v4_lookup(daddr, uh->dest, saddr, uh->source, NULL); if (sk == NULL) return; /* No socket for error */ if (type == ICMP_SOURCE_QUENCH) { /* Slow down! */ if (sk->cong_window > 1) sk->cong_window = sk->cong_window/2; return; } if (type == ICMP_PARAMETERPROB) { sk->err = EPROTO; sk->error_report(sk); return; } /* * Various people wanted BSD UDP semantics. Well they've come * back out because they slow down response to stuff like dead * or unreachable name servers and they screw term users something * chronic. Oh and it violates RFC1122. So basically fix your * client code people. */ /* RFC1122: OK. Passes ICMP errors back to application, as per */ /* 4.1.3.3. */ /* After the comment above, that should be no surprise. */ if(code<=NR_ICMP_UNREACH && icmp_err_convert[code].fatal) { /* * 4.x BSD compatibility item. Break RFC1122 to * get BSD socket semantics. */ if(sk->bsdism && sk->state!=TCP_ESTABLISHED) return; sk->err = icmp_err_convert[code].errno; sk->error_report(sk); } } static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base) { return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base)); } struct udpfakehdr { struct udphdr uh; __u32 daddr; __u32 other; const char *from; __u32 wcheck; }; /* * Copy and checksum a UDP packet from user space into a buffer. We still have to do the planning to * get ip_build_xmit to spot direct transfer to network card and provide an additional callback mode * for direct user->board I/O transfers. That one will be fun. */ static void udp_getfrag(const void *p, __u32 saddr, char * to, unsigned int offset, unsigned int fraglen) { struct udpfakehdr *ufh = (struct udpfakehdr *)p; const char *src; char *dst; unsigned int len; if (offset) { len = fraglen; src = ufh->from+(offset-sizeof(struct udphdr)); dst = to; } else { len = fraglen-sizeof(struct udphdr); src = ufh->from; dst = to+sizeof(struct udphdr); } ufh->wcheck = csum_partial_copy_fromuser(src, dst, len, ufh->wcheck); if (offset == 0) { ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr), ufh->wcheck); ufh->uh.check = csum_tcpudp_magic(saddr, ufh->daddr, ntohs(ufh->uh.len), IPPROTO_UDP, ufh->wcheck); if (ufh->uh.check == 0) ufh->uh.check = -1; memcpy(to, ufh, sizeof(struct udphdr)); } } /* * Unchecksummed UDP is sufficiently critical to stuff like ATM video conferencing * that we use two routines for this for speed. Probably we ought to have a CONFIG_FAST_NET * set for >10Mb/second boards to activate this sort of coding. Timing needed to verify if * this is a valid decision. */ static void udp_getfrag_nosum(const void *p, __u32 saddr, char * to, unsigned int offset, unsigned int fraglen) { struct udpfakehdr *ufh = (struct udpfakehdr *)p; const char *src; char *dst; unsigned int len; if (offset) { len = fraglen; src = ufh->from+(offset-sizeof(struct udphdr)); dst = to; } else { len = fraglen-sizeof(struct udphdr); src = ufh->from; dst = to+sizeof(struct udphdr); } memcpy_fromfs(dst,src,len); if (offset == 0) memcpy(to, ufh, sizeof(struct udphdr)); } /* * Send UDP frames. */ static int udp_send(struct sock *sk, struct sockaddr_in *sin, const unsigned char *from, int len, int rt, __u32 saddr, int noblock) { int ulen = len + sizeof(struct udphdr); int a; struct udpfakehdr ufh; if(ulen>65535-sizeof(struct iphdr)) return -EMSGSIZE; ufh.uh.source = sk->dummy_th.source; ufh.uh.dest = sin->sin_port; ufh.uh.len = htons(ulen); ufh.uh.check = 0; ufh.daddr = sin->sin_addr.s_addr; ufh.other = (htons(ulen) << 16) + IPPROTO_UDP*256; ufh.from = from; ufh.wcheck = 0; #ifdef CONFIG_IP_TRANSPARENT_PROXY if (rt&MSG_PROXY) { /* * We map the first 8 bytes of a second sockaddr_in * into the last 8 (unused) bytes of a sockaddr_in. * This _is_ ugly, but it's the only way to do it * easily, without adding system calls. */ struct sockaddr_in *sinfrom = (struct sockaddr_in *) sin->sin_zero; if (!suser()) return(-EPERM); if (sinfrom->sin_family && sinfrom->sin_family != AF_INET) return(-EINVAL); if (sinfrom->sin_port == 0) return(-EINVAL); saddr = sinfrom->sin_addr.s_addr; ufh.uh.source = sinfrom->sin_port; } #endif /* RFC1122: OK. Provides the checksumming facility (MUST) as per */ /* 4.1.3.4. It's configurable by the application via setsockopt() */ /* (MAY) and it defaults to on (MUST). Almost makes up for the */ /* violation above. -- MS */ if(sk->no_check) a = ip_build_xmit(sk, udp_getfrag_nosum, &ufh, ulen, sin->sin_addr.s_addr, saddr, sk->opt, rt, IPPROTO_UDP, noblock); else a = ip_build_xmit(sk, udp_getfrag, &ufh, ulen, sin->sin_addr.s_addr, saddr, sk->opt, rt, IPPROTO_UDP, noblock); if(a<0) return a; udp_statistics.UdpOutDatagrams++; return len; }
void icmp_send(struct sk_buff *skb_in, int type, int code, unsigned long info) { struct iphdr *iph; struct icmphdr *icmph; int room; struct icmp_bxm icmp_param; struct rtable *rt = (struct rtable*)skb_in->dst; struct ipcm_cookie ipc; u32 saddr; u8 tos; /* * Find the original header */ iph = skb_in->nh.iph; /* * No replies to physical multicast/broadcast */ if (skb_in->pkt_type!=PACKET_HOST) return; /* * Now check at the protocol level */ if (!rt) { if (sysctl_ip_always_defrag == 0 && net_ratelimit()) printk(KERN_DEBUG "icmp_send: destinationless packet\n"); return; } if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST)) return; /* * Only reply to fragment 0. We byte re-order the constant * mask for efficiency. */ if (iph->frag_off&htons(IP_OFFSET)) return; /* * If we send an ICMP error to an ICMP error a mess would result.. */ if (icmp_pointers[type].error) { /* * We are an error, check if we are replying to an ICMP error */ if (iph->protocol==IPPROTO_ICMP) { icmph = (struct icmphdr *)((char *)iph + (iph->ihl<<2)); /* * Assume any unknown ICMP type is an error. This isn't * specified by the RFC, but think about it.. */ if (icmph->type>NR_ICMP_TYPES || icmp_pointers[icmph->type].error) return; } } /* * Construct source address and options. */ #ifdef CONFIG_IP_ROUTE_NAT /* * Restore original addresses if packet has been translated. */ if (rt->rt_flags&RTCF_NAT && IPCB(skb_in)->flags&IPSKB_TRANSLATED) { iph->daddr = rt->key.dst; iph->saddr = rt->key.src; } #endif #ifdef CONFIG_IP_MASQUERADE if (type==ICMP_DEST_UNREACH && IPCB(skb_in)->flags&IPSKB_MASQUERADED) { ip_fw_unmasq_icmp(skb_in); } #endif saddr = iph->daddr; if (!(rt->rt_flags & RTCF_LOCAL)) saddr = 0; tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : iph->tos; /* XXX: use a more aggressive expire for routes created by * this call (not longer than the rate limit timeout). * It could be also worthwhile to not put them into ipv4 * fast routing cache at first. Otherwise an attacker can * grow the routing table. */ if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0)) return; if (ip_options_echo(&icmp_param.replyopts, skb_in)) goto ende; /* * Prepare data for ICMP header. */ icmp_param.icmph.type=type; icmp_param.icmph.code=code; icmp_param.icmph.un.gateway = info; icmp_param.icmph.checksum=0; icmp_param.csum=0; icmp_param.data_ptr=iph; icmp_out_count(icmp_param.icmph.type); icmp_socket->sk->ip_tos = tos; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts; if (icmp_param.replyopts.srr) { ip_rt_put(rt); if (ip_route_output(&rt, icmp_param.replyopts.faddr, saddr, RT_TOS(tos), 0)) return; } if (!icmpv4_xrlim_allow(rt, type, code)) goto ende; /* RFC says return as much as we can without exceeding 576 bytes. */ room = rt->u.dst.pmtu; if (room > 576) room = 576; room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; room -= sizeof(struct icmphdr); icmp_param.data_len=(iph->ihl<<2)+skb_in->len; if (icmp_param.data_len > room) icmp_param.data_len = room; ip_build_xmit(icmp_socket->sk, icmp_glue_bits, &icmp_param, icmp_param.data_len+sizeof(struct icmphdr), &ipc, rt, MSG_DONTWAIT); ende: ip_rt_put(rt); }
static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len) { struct ipcm_cookie ipc; struct rawfakehdr rfh; struct rtable *rt = NULL; int free = 0; u32 daddr; u8 tos; int err; /* This check is ONLY to check for arithmetic overflow on integer(!) len. Not more! Real check will be made in ip_build_xmit --ANK BTW socket.c -> af_*.c -> ... make multiple invalid conversions size_t -> int. We MUST repair it f.e. by replacing all of them with size_t and revise all the places sort of len += sizeof(struct iphdr) If len was ULONG_MAX-10 it would be cathastrophe --ANK */ err = -EMSGSIZE; if (len < 0 || len > 0xFFFF) goto out; /* * Check the flags. */ err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message */ goto out; /* compatibility */ /* * Get and verify the address. */ if (msg->msg_namelen) { struct sockaddr_in *usin = (struct sockaddr_in*)msg->msg_name; err = -EINVAL; if (msg->msg_namelen < sizeof(*usin)) goto out; if (usin->sin_family != AF_INET) { static int complained; if (!complained++) printk(KERN_INFO "%s forgot to set AF_INET in " "raw sendmsg. Fix it!\n", current->comm); err = -EINVAL; if (usin->sin_family) goto out; } daddr = usin->sin_addr.s_addr; /* ANK: I did not forget to get protocol from port field. * I just do not know, who uses this weirdness. * IP_HDRINCL is much more convenient. */ } else { err = -EDESTADDRREQ; if (sk->state != TCP_ESTABLISHED) goto out; daddr = sk->daddr; } ipc.addr = sk->saddr; ipc.opt = NULL; ipc.oif = sk->bound_dev_if; if (msg->msg_controllen) { err = ip_cmsg_send(msg, &ipc); if (err) goto out; if (ipc.opt) free = 1; } rfh.saddr = ipc.addr; ipc.addr = daddr; if (!ipc.opt) ipc.opt = sk->protinfo.af_inet.opt; if (ipc.opt) { err = -EINVAL; /* Linux does not mangle headers on raw sockets, * so that IP options + IP_HDRINCL is non-sense. */ if (sk->protinfo.af_inet.hdrincl) goto done; if (ipc.opt->srr) { if (!daddr) goto done; daddr = ipc.opt->faddr; } } tos = RT_TOS(sk->protinfo.af_inet.tos) | sk->localroute; if (msg->msg_flags & MSG_DONTROUTE) tos |= RTO_ONLINK; if (MULTICAST(daddr)) { if (!ipc.oif) ipc.oif = sk->protinfo.af_inet.mc_index; if (!rfh.saddr) rfh.saddr = sk->protinfo.af_inet.mc_addr; } err = ip_route_output(&rt, daddr, rfh.saddr, tos, ipc.oif); if (err) goto done; err = -EACCES; if (rt->rt_flags & RTCF_BROADCAST && !sk->broadcast) goto done; if (msg->msg_flags & MSG_CONFIRM) goto do_confirm; back_from_confirm: rfh.iov = msg->msg_iov; rfh.saddr = rt->rt_src; rfh.dst = &rt->u.dst; if (!ipc.addr) ipc.addr = rt->rt_dst; err = ip_build_xmit(sk, sk->protinfo.af_inet.hdrincl ? raw_getrawfrag : raw_getfrag, &rfh, len, &ipc, rt, msg->msg_flags); done: if (free) kfree(ipc.opt); ip_rt_put(rt); out: return err < 0 ? err : len; do_confirm: dst_confirm(&rt->u.dst); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; goto done; }