/* * Multicasts and broadcasts go to each listener. */ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, u32 saddr, u32 daddr) { struct sock *sk; int given = 0; SOCKHASH_LOCK(); sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]; sk = udp_v4_mcast_next(sk, uh->dest, saddr, uh->source, daddr, skb->dev); if(sk) { struct sock *sknext = NULL; do { struct sk_buff *skb1 = skb; sknext = udp_v4_mcast_next(sk->next, uh->dest, saddr, uh->source, daddr, skb->dev); if(sknext) skb1 = skb_clone(skb, GFP_ATOMIC); if(skb1) udp_deliver(sk, skb1); sk = sknext; } while(sknext); given = 1; } SOCKHASH_UNLOCK(); if(!given) kfree_skb(skb, FREE_READ); return 0; }
static int udp_v4_verify_bind(struct sock *sk, unsigned short snum) { struct sock *sk2; int retval = 0, sk_reuse = sk->reuse; SOCKHASH_LOCK(); for(sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; sk2 != NULL; sk2 = sk2->next) { if((sk2->num == snum) && (sk2 != sk)) { int sk2_reuse = sk2->reuse; /* Two sockets can be bound to the same port if they're * bound to different interfaces... */ if (sk->bound_device != sk2->bound_device) continue; if(!sk2->rcv_saddr || !sk->rcv_saddr) { if((!sk2_reuse) || (!sk_reuse)) { retval = 1; break; } } else if(sk2->rcv_saddr == sk->rcv_saddr) { if((!sk_reuse) || (!sk2_reuse)) { retval = 1; break; } } } } SOCKHASH_UNLOCK(); return retval; }
/* * Get__netinfo returns the length of that string. * * KNOWN BUGS * As in get_unix_netinfo, the buffer might be too small. If this * happens, get__netinfo returns only part of the available infos. * * Assumes that buffer length is a multiply of 128 - if not it will * write past the end. */ static int get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t offset, int length) { struct sock *sp, *next; int len=0, i = 0; off_t pos=0; off_t begin; char tmpbuf[129]; if (offset < 128) len += sprintf(buffer, "%-127s\n", " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout inode"); pos = 128; SOCKHASH_LOCK(); sp = pro->sklist_next; while(sp != (struct sock *)pro) { if (format == 0 && sp->state == TCP_LISTEN) { struct open_request *req; for (req = sp->tp_pinfo.af_tcp.syn_wait_queue; req; i++, req = req->dl_next) { if (req->sk) continue; pos += 128; if (pos < offset) continue; get__openreq(sp, req, tmpbuf, i); len += sprintf(buffer+len, "%-127s\n", tmpbuf); if(len >= length) goto out; } } pos += 128; if (pos < offset) goto next; get__sock(sp, tmpbuf, i, format); len += sprintf(buffer+len, "%-127s\n", tmpbuf); if(len >= length) break; next: next = sp->sklist_next; sp = next; i++; } out: SOCKHASH_UNLOCK(); begin = len - (pos - offset); *start = buffer + begin; len -= begin; if(len>length) len = length; if (len<0) len = 0; return len; }
static void raw_v6_unhash(struct sock *sk) { SOCKHASH_LOCK(); if (sk->pprev) { if (sk->next) sk->next->pprev = sk->pprev; *sk->pprev = sk->next; sk->pprev = NULL; } SOCKHASH_UNLOCK(); }
static void raw_v6_hash(struct sock *sk) { struct sock **skp = &raw_v6_htable[sk->num & (RAWV6_HTABLE_SIZE - 1)]; SOCKHASH_LOCK(); if ((sk->next = *skp) != NULL) (*skp)->pprev = &sk->next; *skp = sk; sk->pprev = skp; SOCKHASH_UNLOCK(); }
static void aarp_expire_timeout(unsigned long unused) { int ct=0; SOCKHASH_LOCK(); for(ct=0;ct<AARP_HASH_SIZE;ct++) { aarp_expire_timer(&resolved[ct]); aarp_kick(&unresolved[ct]); aarp_expire_timer(&unresolved[ct]); aarp_expire_timer(&proxies[ct]); } SOCKHASH_UNLOCK(); mod_timer(&aarp_timer, jiffies + (unresolved_count ? sysctl_aarp_tick_time: sysctl_aarp_expiry_time)); }
/* Shared by v4/v6 tcp. */ unsigned short udp_good_socknum(void) { int result; static int start = 0; int i, best, best_size_so_far; SOCKHASH_LOCK(); /* Select initial not-so-random "best" */ best = PROT_SOCK + 1 + (start & 1023); best_size_so_far = 32767; /* "big" num */ result = best; for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { struct sock *sk; int size; sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)]; /* No clashes - take it */ if (!sk) goto out; /* Is this one better than our best so far? */ size = 0; do { if(++size >= best_size_so_far) goto next; } while((sk = sk->next) != NULL); best_size_so_far = size; best = result; next: } while (udp_lport_inuse(best)) best += UDP_HTABLE_SIZE; result = best; out: start = result; SOCKHASH_UNLOCK(); return result; } static void udp_v4_hash(struct sock *sk) { struct sock **skp; int num = sk->num; num &= (UDP_HTABLE_SIZE - 1); skp = &udp_hash[num]; SOCKHASH_LOCK(); sk->next = *skp; *skp = sk; sk->hashent = num; SOCKHASH_UNLOCK(); } static void udp_v4_unhash(struct sock *sk) { struct sock **skp; int num = sk->num; num &= (UDP_HTABLE_SIZE - 1); skp = &udp_hash[num]; SOCKHASH_LOCK(); while(*skp != NULL) { if(*skp == sk) { *skp = sk->next; break; } skp = &((*skp)->next); } SOCKHASH_UNLOCK(); } static void udp_v4_rehash(struct sock *sk) { struct sock **skp; int num = sk->num; int oldnum = sk->hashent; num &= (UDP_HTABLE_SIZE - 1); skp = &udp_hash[oldnum]; SOCKHASH_LOCK(); while(*skp != NULL) { if(*skp == sk) { *skp = sk->next; break; } skp = &((*skp)->next); } sk->next = udp_hash[num]; udp_hash[num] = sk; sk->hashent = num; SOCKHASH_UNLOCK(); } /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, struct device *dev) { struct sock *sk, *result = NULL; unsigned short hnum = ntohs(dport); int badness = -1; for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) { if((sk->num == hnum) && !(sk->dead && (sk->state == TCP_CLOSE))) { int score = 0; if(sk->rcv_saddr) { if(sk->rcv_saddr != daddr) continue; score++; } if(sk->daddr) { if(sk->daddr != saddr) continue; score++; } if(sk->dummy_th.dest) { if(sk->dummy_th.dest != sport) continue; score++; } /* If this socket is bound to a particular interface, * did the packet come in on it? */ if (sk->bound_device) { if (dev == sk->bound_device) score++; else continue; /* mismatch--not this sock */ } if(score == 4) { result = sk; break; } else if(score > badness) { result = sk; badness = score; } } } return result; } #ifdef CONFIG_IP_TRANSPARENT_PROXY struct sock *udp_v4_proxy_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, u32 paddr, u16 rport, struct device *dev) { struct sock *hh[3], *sk, *result = NULL; int i; int badness = -1; unsigned short hnum = ntohs(dport); unsigned short hpnum = ntohs(rport); SOCKHASH_LOCK(); hh[0] = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; hh[1] = udp_hash[hpnum & (UDP_HTABLE_SIZE - 1)]; for (i = 0; i < 2; i++) { for(sk = hh[i]; sk != NULL; sk = sk->next) { if(sk->num == hnum || sk->num == hpnum) { int score = 0; if(sk->dead && (sk->state == TCP_CLOSE)) continue; if(sk->rcv_saddr) { if((sk->num != hpnum || sk->rcv_saddr != paddr) && (sk->num != hnum || sk->rcv_saddr != daddr)) continue; score++; } if(sk->daddr) { if(sk->daddr != saddr) continue; score++; } if(sk->dummy_th.dest) { if(sk->dummy_th.dest != sport) continue; score++; } /* If this socket is bound to a particular interface, * did the packet come in on it? */ if(sk->bound_device) { if (sk->bound_device != dev) continue; score++; } if(score == 4 && sk->num == hnum) { result = sk; break; } else if(score > badness && (sk->num == hpnum || sk->rcv_saddr)) { result = sk; badness = score; } } } } SOCKHASH_UNLOCK(); return result; } #endif static inline struct sock *udp_v4_mcast_next(struct sock *sk, unsigned short num, unsigned long raddr, unsigned short rnum, unsigned long laddr, struct device *dev) { struct sock *s = sk; unsigned short hnum = ntohs(num); for(; s; s = s->next) { if ((s->num != hnum) || (s->dead && (s->state == TCP_CLOSE)) || (s->daddr && s->daddr!=raddr) || (s->dummy_th.dest != rnum && s->dummy_th.dest != 0) || ((s->bound_device) && (s->bound_device!=dev)) || (s->rcv_saddr && s->rcv_saddr != laddr)) continue; break; } return s; } #define min(a,b) ((a)<(b)?(a):(b)) /* * This routine is called by the ICMP module when it gets some * sort of error condition. If err < 0 then the socket should * be closed and the error returned to the user. If err > 0 * it's just the icmp type << 8 | icmp code. * Header points to the ip header of the error packet. We move * on past this. Then (as it used to claim before adjustment) * header points to the first 8 bytes of the udp header. We need * to find the appropriate port. */ void udp_err(int type, int code, unsigned char *header, __u32 daddr, __u32 saddr, struct inet_protocol *protocol, int len) { struct udphdr *uh; struct sock *sk; /* * Find the 8 bytes of post IP header ICMP included for us */ if(len<sizeof(struct udphdr)) return; uh = (struct udphdr *)header; sk = udp_v4_lookup(daddr, uh->dest, saddr, uh->source, NULL); if (sk == NULL) return; /* No socket for error */ if (type == ICMP_SOURCE_QUENCH) { /* Slow down! */ if (sk->cong_window > 1) sk->cong_window = sk->cong_window/2; return; } if (type == ICMP_PARAMETERPROB) { sk->err = EPROTO; sk->error_report(sk); return; } /* * Various people wanted BSD UDP semantics. Well they've come * back out because they slow down response to stuff like dead * or unreachable name servers and they screw term users something * chronic. Oh and it violates RFC1122. So basically fix your * client code people. */ /* RFC1122: OK. Passes ICMP errors back to application, as per */ /* 4.1.3.3. */ /* After the comment above, that should be no surprise. */ if(code<=NR_ICMP_UNREACH && icmp_err_convert[code].fatal) { /* * 4.x BSD compatibility item. Break RFC1122 to * get BSD socket semantics. */ if(sk->bsdism && sk->state!=TCP_ESTABLISHED) return; sk->err = icmp_err_convert[code].errno; sk->error_report(sk); } } static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base) { return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base)); } struct udpfakehdr { struct udphdr uh; __u32 daddr; __u32 other; const char *from; __u32 wcheck; }; /* * Copy and checksum a UDP packet from user space into a buffer. We still have to do the planning to * get ip_build_xmit to spot direct transfer to network card and provide an additional callback mode * for direct user->board I/O transfers. That one will be fun. */ static void udp_getfrag(const void *p, __u32 saddr, char * to, unsigned int offset, unsigned int fraglen) { struct udpfakehdr *ufh = (struct udpfakehdr *)p; const char *src; char *dst; unsigned int len; if (offset) { len = fraglen; src = ufh->from+(offset-sizeof(struct udphdr)); dst = to; } else { len = fraglen-sizeof(struct udphdr); src = ufh->from; dst = to+sizeof(struct udphdr); } ufh->wcheck = csum_partial_copy_fromuser(src, dst, len, ufh->wcheck); if (offset == 0) { ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr), ufh->wcheck); ufh->uh.check = csum_tcpudp_magic(saddr, ufh->daddr, ntohs(ufh->uh.len), IPPROTO_UDP, ufh->wcheck); if (ufh->uh.check == 0) ufh->uh.check = -1; memcpy(to, ufh, sizeof(struct udphdr)); } } /* * Unchecksummed UDP is sufficiently critical to stuff like ATM video conferencing * that we use two routines for this for speed. Probably we ought to have a CONFIG_FAST_NET * set for >10Mb/second boards to activate this sort of coding. Timing needed to verify if * this is a valid decision. */ static void udp_getfrag_nosum(const void *p, __u32 saddr, char * to, unsigned int offset, unsigned int fraglen) { struct udpfakehdr *ufh = (struct udpfakehdr *)p; const char *src; char *dst; unsigned int len; if (offset) { len = fraglen; src = ufh->from+(offset-sizeof(struct udphdr)); dst = to; } else { len = fraglen-sizeof(struct udphdr); src = ufh->from; dst = to+sizeof(struct udphdr); } memcpy_fromfs(dst,src,len); if (offset == 0) memcpy(to, ufh, sizeof(struct udphdr)); } /* * Send UDP frames. */ static int udp_send(struct sock *sk, struct sockaddr_in *sin, const unsigned char *from, int len, int rt, __u32 saddr, int noblock) { int ulen = len + sizeof(struct udphdr); int a; struct udpfakehdr ufh; if(ulen>65535-sizeof(struct iphdr)) return -EMSGSIZE; ufh.uh.source = sk->dummy_th.source; ufh.uh.dest = sin->sin_port; ufh.uh.len = htons(ulen); ufh.uh.check = 0; ufh.daddr = sin->sin_addr.s_addr; ufh.other = (htons(ulen) << 16) + IPPROTO_UDP*256; ufh.from = from; ufh.wcheck = 0; #ifdef CONFIG_IP_TRANSPARENT_PROXY if (rt&MSG_PROXY) { /* * We map the first 8 bytes of a second sockaddr_in * into the last 8 (unused) bytes of a sockaddr_in. * This _is_ ugly, but it's the only way to do it * easily, without adding system calls. */ struct sockaddr_in *sinfrom = (struct sockaddr_in *) sin->sin_zero; if (!suser()) return(-EPERM); if (sinfrom->sin_family && sinfrom->sin_family != AF_INET) return(-EINVAL); if (sinfrom->sin_port == 0) return(-EINVAL); saddr = sinfrom->sin_addr.s_addr; ufh.uh.source = sinfrom->sin_port; } #endif /* RFC1122: OK. Provides the checksumming facility (MUST) as per */ /* 4.1.3.4. It's configurable by the application via setsockopt() */ /* (MAY) and it defaults to on (MUST). Almost makes up for the */ /* violation above. -- MS */ if(sk->no_check) a = ip_build_xmit(sk, udp_getfrag_nosum, &ufh, ulen, sin->sin_addr.s_addr, saddr, sk->opt, rt, IPPROTO_UDP, noblock); else a = ip_build_xmit(sk, udp_getfrag, &ufh, ulen, sin->sin_addr.s_addr, saddr, sk->opt, rt, IPPROTO_UDP, noblock); if(a<0) return a; udp_statistics.UdpOutDatagrams++; return len; }
/* This is the main implementation workhorse of all these routines. */ static int get__netinfo6(struct proto *pro, char *buffer, int format, char **start, off_t offset, int length) { struct sock *sp; struct tcp_opt *tp; int timer_active, timer_active1, timer_active2; unsigned long timer_expires; struct in6_addr *dest, *src; unsigned short destp, srcp; int len = 0, i = 0; off_t pos = 0; off_t begin; char tmpbuf[150]; if(offset < 149) len += sprintf(buffer, "%-148s\n", " sl " /* 6 */ "local_address " /* 38 */ "remote_address " /* 38 */ "st tx_queue rx_queue tr tm->when retrnsmt" /* 41 */ " uid timeout inode"); /* 21 */ /*----*/ /*144 */ pos = 149; SOCKHASH_LOCK(); sp = pro->sklist_next; while(sp != (struct sock *)pro) { struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sp; int tw_bucket = 0; pos += 149; if(pos < offset) goto next; tp = &(sp->tp_pinfo.af_tcp); if((format == 0) && (sp->state == TCP_TIME_WAIT)) { tw_bucket = 1; dest = &tw->v6_daddr; src = &tw->v6_rcv_saddr; } else { dest = &sp->net_pinfo.af_inet6.daddr; src = &sp->net_pinfo.af_inet6.rcv_saddr; } destp = ntohs(sp->dport); srcp = ntohs(sp->sport); if((format == 0) && (sp->state == TCP_TIME_WAIT)) { extern int tcp_tw_death_row_slot; int slot_dist; timer_active1 = timer_active2 = 0; timer_active = 3; slot_dist = tw->death_slot; if(slot_dist > tcp_tw_death_row_slot) slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot; else slot_dist = tcp_tw_death_row_slot - slot_dist; timer_expires = jiffies + (slot_dist * TCP_TWKILL_PERIOD); } else { timer_active1 = del_timer(&tp->retransmit_timer); timer_active2 = del_timer(&sp->timer); if(!timer_active1) tp->retransmit_timer.expires = 0; if(!timer_active2) sp->timer.expires = 0; timer_active = 0; timer_expires = (unsigned) -1; } if(timer_active1 && tp->retransmit_timer.expires < timer_expires) { timer_active = timer_active1; timer_expires = tp->retransmit_timer.expires; } if(timer_active2 && sp->timer.expires < timer_expires) { timer_active = timer_active2; timer_expires = sp->timer.expires; } if(timer_active == 0) timer_expires = jiffies; sprintf(tmpbuf, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, sp->state, (tw_bucket ? 0 : (format == 0) ? tp->write_seq-tp->snd_una : atomic_read(&sp->wmem_alloc)), (tw_bucket ? 0 : (format == 0) ? tp->rcv_nxt-tp->copied_seq : atomic_read(&sp->rmem_alloc)), timer_active, timer_expires-jiffies, (tw_bucket ? 0 : tp->retransmits), ((!tw_bucket && sp->socket) ? sp->socket->inode->i_uid : 0), (!tw_bucket && timer_active) ? sp->timeout : 0, ((!tw_bucket && sp->socket) ? sp->socket->inode->i_ino : 0)); if(timer_active1) add_timer(&tp->retransmit_timer); if(timer_active2) add_timer(&sp->timer); len += sprintf(buffer+len, "%-148s\n", tmpbuf); if(len >= length) break; next: sp = sp->sklist_next; i++; } SOCKHASH_UNLOCK(); begin = len - (pos - offset); *start = buffer + begin; len -= begin; if(len > length) len = length; return len; }