/* m->m_data points at ip packet header * m->m_len length ip packet * ip->ip_len length data (IPDU) */ void udp_input(PNATState pData, register struct mbuf *m, int iphlen) { register struct ip *ip; register struct udphdr *uh; int len; struct ip save_ip; struct socket *so; int ret; int ttl, tos; LogFlowFunc(("ENTER: m = %p, iphlen = %d\n", m, iphlen)); ip = mtod(m, struct ip *); Log2(("%RTnaipv4 iphlen = %d\n", ip->ip_dst, iphlen)); udpstat.udps_ipackets++; /* * Strip IP options, if any; should skip this, * make available to user, and use on returned packets, * but we don't yet have a way to check the checksum * with options still present. */ if (iphlen > sizeof(struct ip)) { ip_stripoptions(m, (struct mbuf *)0); iphlen = sizeof(struct ip); } /* * Get IP and UDP header together in first mbuf. */ ip = mtod(m, struct ip *); uh = (struct udphdr *)((caddr_t)ip + iphlen); /* * Make mbuf data length reflect UDP length. * If not enough data to reflect UDP length, drop. */ len = RT_N2H_U16((u_int16_t)uh->uh_ulen); Assert(ip->ip_len + iphlen == (ssize_t)m_length(m, NULL)); if (ip->ip_len != len) { if (len > ip->ip_len) { udpstat.udps_badlen++; Log3(("NAT: IP(id: %hd) has bad size\n", ip->ip_id)); goto bad_free_mbuf; } m_adj(m, len - ip->ip_len); ip->ip_len = len; } /* * Save a copy of the IP header in case we want restore it * for sending an ICMP error message in response. */ save_ip = *ip; save_ip.ip_len+= iphlen; /* tcp_input subtracts this */ /* * Checksum extended UDP header and data. */ if (udpcksum && uh->uh_sum) { memset(((struct ipovly *)ip)->ih_x1, 0, 9); ((struct ipovly *)ip)->ih_len = uh->uh_ulen; #if 0 /* keep uh_sum for ICMP reply */ uh->uh_sum = cksum(m, len + sizeof (struct ip)); if (uh->uh_sum) { #endif if (cksum(m, len + iphlen)) { udpstat.udps_badsum++; Log3(("NAT: IP(id: %hd) has bad (udp) cksum\n", ip->ip_id)); goto bad_free_mbuf; } } #if 0 } #endif /* * handle DHCP/BOOTP */ if (uh->uh_dport == RT_H2N_U16_C(BOOTP_SERVER)) { bootp_input(pData, m); goto done_free_mbuf; } LogFunc(("uh src: %RTnaipv4:%d, dst: %RTnaipv4:%d\n", ip->ip_src.s_addr, RT_N2H_U16(uh->uh_sport), ip->ip_dst.s_addr, RT_N2H_U16(uh->uh_dport))); /* * handle DNS host resolver without creating a socket */ if ( pData->fUseHostResolver && uh->uh_dport == RT_H2N_U16_C(53) && CTL_CHECK(ip->ip_dst.s_addr, CTL_DNS)) { struct sockaddr_in dst, src; src.sin_addr.s_addr = ip->ip_dst.s_addr; src.sin_port = uh->uh_dport; dst.sin_addr.s_addr = ip->ip_src.s_addr; dst.sin_port = uh->uh_sport; m_adj(m, sizeof(struct udpiphdr)); m = hostresolver(pData, m, ip->ip_src.s_addr, uh->uh_sport); if (m == NULL) goto done_free_mbuf; slirpMbufTagService(pData, m, CTL_DNS); udp_output2(pData, NULL, m, &src, &dst, IPTOS_LOWDELAY); LogFlowFuncLeave(); return; } /* * handle TFTP */ if ( uh->uh_dport == RT_H2N_U16_C(TFTP_SERVER) && CTL_CHECK(ip->ip_dst.s_addr, CTL_TFTP)) { if (pData->pvTftpSessions) slirpTftpInput(pData, m); goto done_free_mbuf; } /* * XXX: DNS proxy currently relies on the fact that each socket * only serves one request. */ if ( pData->fUseDnsProxy && CTL_CHECK(ip->ip_dst.s_addr, CTL_DNS) && (uh->uh_dport == RT_H2N_U16_C(53))) { so = NULL; goto new_socket; } /* * Locate pcb for datagram. */ so = udp_last_so; if ( so->so_lport != uh->uh_sport || so->so_laddr.s_addr != ip->ip_src.s_addr) { struct socket *tmp; for (tmp = udb.so_next; tmp != &udb; tmp = tmp->so_next) { if ( tmp->so_lport == uh->uh_sport && tmp->so_laddr.s_addr == ip->ip_src.s_addr) { so = tmp; break; } } if (tmp == &udb) so = NULL; else { udpstat.udpps_pcbcachemiss++; udp_last_so = so; } } new_socket: if (so == NULL) { /* * If there's no socket for this packet, * create one */ if ((so = socreate()) == NULL) { Log2(("NAT: IP(id: %hd) failed to create socket\n", ip->ip_id)); goto bad_free_mbuf; } /* * Setup fields */ so->so_laddr = ip->ip_src; so->so_lport = uh->uh_sport; so->so_iptos = ip->ip_tos; if (udp_attach(pData, so) <= 0) { Log2(("NAT: IP(id: %hd) udp_attach errno = %d (%s)\n", ip->ip_id, errno, strerror(errno))); sofree(pData, so); goto bad_free_mbuf; } /* udp_last_so = so; */ /* * XXXXX Here, check if it's in udpexec_list, * and if it is, do the fork_exec() etc. */ } so->so_faddr = ip->ip_dst; /* XXX */ so->so_fport = uh->uh_dport; /* XXX */ Assert(so->so_type == IPPROTO_UDP); /* * DNS proxy */ if ( pData->fUseDnsProxy && CTL_CHECK(ip->ip_dst.s_addr, CTL_DNS) && (uh->uh_dport == RT_H2N_U16_C(53))) { dnsproxy_query(pData, so, m, iphlen); goto done_free_mbuf; } iphlen += sizeof(struct udphdr); m->m_len -= iphlen; m->m_data += iphlen; ttl = ip->ip_ttl = save_ip.ip_ttl; if (ttl != so->so_sottl) { ret = setsockopt(so->s, IPPROTO_IP, IP_TTL, (char *)&ttl, sizeof(ttl)); if (RT_LIKELY(ret == 0)) so->so_sottl = ttl; } tos = save_ip.ip_tos; if (tos != so->so_sotos) { ret = setsockopt(so->s, IPPROTO_IP, IP_TOS, (char *)&tos, sizeof(tos)); if (RT_LIKELY(ret == 0)) so->so_sotos = tos; } { /* * Different OSes have different socket options for DF. We * can't use IP_HDRINCL here as it's only valid for SOCK_RAW. */ # define USE_DF_OPTION(_Optname) \ const int dfopt = _Optname #if defined(IP_MTU_DISCOVER) USE_DF_OPTION(IP_MTU_DISCOVER); #elif defined(IP_DONTFRAG) /* Solaris 11+, FreeBSD */ USE_DF_OPTION(IP_DONTFRAG); #elif defined(IP_DONTFRAGMENT) /* Windows */ USE_DF_OPTION(IP_DONTFRAGMENT); #else USE_DF_OPTION(0); #endif if (dfopt) { int df = (save_ip.ip_off & IP_DF) != 0; #if defined(IP_MTU_DISCOVER) df = df ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT; #endif if (df != so->so_sodf) { ret = setsockopt(so->s, IPPROTO_IP, dfopt, (char *)&df, sizeof(df)); if (RT_LIKELY(ret == 0)) so->so_sodf = df; } } } if ( sosendto(pData, so, m) == -1 && ( !soIgnorableErrorCode(errno) && errno != ENOTCONN)) { m->m_len += iphlen; m->m_data -= iphlen; *ip = save_ip; Log2(("NAT: UDP tx errno = %d (%s) on sent to %RTnaipv4\n", errno, strerror(errno), ip->ip_dst)); icmp_error(pData, m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); so->so_m = NULL; LogFlowFuncLeave(); return; } if (so->so_m) m_freem(pData, so->so_m); /* used for ICMP if error on sorecvfrom */ /* restore the orig mbuf packet */ m->m_len += iphlen; m->m_data -= iphlen; *ip = save_ip; so->so_m = m; /* ICMP backup */ LogFlowFuncLeave(); return; bad_free_mbuf: Log2(("NAT: UDP(id: %hd) datagram to %RTnaipv4 with size(%d) claimed as bad\n", ip->ip_id, &ip->ip_dst, ip->ip_len)); done_free_mbuf: /* some services like bootp(built-in), dns(buildt-in) and dhcp don't need sockets * and create new m'buffers to send them to guest, so we'll free their incomming * buffers here. */ if (m != NULL) m_freem(pData, m); LogFlowFuncLeave(); return; }
/* * Read from so's socket into sb_snd, updating all relevant sbuf fields * NOTE: This will only be called if it is select()ed for reading, so * a read() of 0 (or less) means it's disconnected */ int soread(PNATState pData, struct socket *so) { int n, nn, lss, total; struct sbuf *sb = &so->so_snd; u_int len = sb->sb_datalen - sb->sb_cc; struct iovec iov[2]; int mss = so->so_tcpcb->t_maxseg; int sockerr; STAM_PROFILE_START(&pData->StatIOread, a); STAM_COUNTER_RESET(&pData->StatIORead_in_1); STAM_COUNTER_RESET(&pData->StatIORead_in_2); QSOCKET_LOCK(tcb); SOCKET_LOCK(so); QSOCKET_UNLOCK(tcb); LogFlow(("soread: so = %R[natsock]\n", so)); Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb)); /* * No need to check if there's enough room to read. * soread wouldn't have been called if there weren't */ len = sb->sb_datalen - sb->sb_cc; iov[0].iov_base = sb->sb_wptr; iov[1].iov_base = 0; iov[1].iov_len = 0; if (sb->sb_wptr < sb->sb_rptr) { iov[0].iov_len = sb->sb_rptr - sb->sb_wptr; /* Should never succeed, but... */ if (iov[0].iov_len > len) iov[0].iov_len = len; if (iov[0].iov_len > mss) iov[0].iov_len -= iov[0].iov_len%mss; n = 1; } else { iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr; /* Should never succeed, but... */ if (iov[0].iov_len > len) iov[0].iov_len = len; len -= iov[0].iov_len; if (len) { iov[1].iov_base = sb->sb_data; iov[1].iov_len = sb->sb_rptr - sb->sb_data; if (iov[1].iov_len > len) iov[1].iov_len = len; total = iov[0].iov_len + iov[1].iov_len; if (total > mss) { lss = total % mss; if (iov[1].iov_len > lss) { iov[1].iov_len -= lss; n = 2; } else { lss -= iov[1].iov_len; iov[0].iov_len -= lss; n = 1; } } else n = 2; } else { if (iov[0].iov_len > mss) iov[0].iov_len -= iov[0].iov_len%mss; n = 1; } } #ifdef HAVE_READV nn = readv(so->s, (struct iovec *)iov, n); #else nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, (so->so_tcpcb->t_force? MSG_OOB:0)); #endif if (nn < 0) sockerr = errno; /* save it, as it may be clobbered by logging */ else sockerr = 0; Log2(("%s: read(1) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn)); Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb)); if (nn <= 0) { #ifdef RT_OS_WINDOWS /* * Windows reports ESHUTDOWN after SHUT_RD (SD_RECEIVE) * instead of just returning EOF indication. */ if (nn < 0 && sockerr == ESHUTDOWN) { nn = 0; sockerr = 0; } #endif if (nn == 0) /* XXX: should this be inside #if defined(RT_OS_WINDOWS)? */ { /* * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that * _could_ mean that the connection is closed. But we will receive an * FD_CLOSE event later if the connection was _really_ closed. With * www.youtube.com I see this very often. Closing the socket too early * would be dangerous. */ int status; unsigned long pending = 0; status = ioctlsocket(so->s, FIONREAD, &pending); if (status < 0) Log(("NAT:%s: error in WSAIoctl: %d\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, errno)); if (pending != 0) { SOCKET_UNLOCK(so); STAM_PROFILE_STOP(&pData->StatIOread, a); return 0; } } if ( nn < 0 && soIgnorableErrorCode(sockerr)) { SOCKET_UNLOCK(so); STAM_PROFILE_STOP(&pData->StatIOread, a); return 0; } else { int fUninitializedTemplate = 0; int shuterr; fUninitializedTemplate = RT_BOOL(( sototcpcb(so) && ( sototcpcb(so)->t_template.ti_src.s_addr == INADDR_ANY || sototcpcb(so)->t_template.ti_dst.s_addr == INADDR_ANY))); /* nn == 0 means peer has performed an orderly shutdown */ Log2(("%s: disconnected, nn = %d, errno = %d (%s)\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sockerr, strerror(sockerr))); shuterr = sofcantrcvmore(so); if (!sockerr && !shuterr && !fUninitializedTemplate) tcp_sockclosed(pData, sototcpcb(so)); else { LogRel2(("NAT: sockerr %d, shuterr %d - %R[natsock]\n", sockerr, shuterr, so)); tcp_drop(pData, sototcpcb(so), sockerr); } SOCKET_UNLOCK(so); STAM_PROFILE_STOP(&pData->StatIOread, a); return -1; } } STAM_STATS( if (n == 1) { STAM_COUNTER_INC(&pData->StatIORead_in_1); STAM_COUNTER_ADD(&pData->StatIORead_in_1_bytes, nn); } else { STAM_COUNTER_INC(&pData->StatIORead_in_2); STAM_COUNTER_ADD(&pData->StatIORead_in_2_1st_bytes, nn); } );
/* m->m_data points at ip packet header * m->m_len length ip packet * ip->ip_len length data (IPDU) */ void udp_input(PNATState pData, register struct mbuf *m, int iphlen) { register struct ip *ip; register struct udphdr *uh; int len; struct ip save_ip; struct socket *so; int ret; int ttl; LogFlowFunc(("ENTER: m = %p, iphlen = %d\n", m, iphlen)); ip = mtod(m, struct ip *); Log2(("%RTnaipv4 iphlen = %d\n", ip->ip_dst, iphlen)); udpstat.udps_ipackets++; /* * Strip IP options, if any; should skip this, * make available to user, and use on returned packets, * but we don't yet have a way to check the checksum * with options still present. */ if (iphlen > sizeof(struct ip)) { ip_stripoptions(m, (struct mbuf *)0); iphlen = sizeof(struct ip); } /* * Get IP and UDP header together in first mbuf. */ ip = mtod(m, struct ip *); uh = (struct udphdr *)((caddr_t)ip + iphlen); /* * Make mbuf data length reflect UDP length. * If not enough data to reflect UDP length, drop. */ len = RT_N2H_U16((u_int16_t)uh->uh_ulen); Assert((ip->ip_len == len)); Assert((ip->ip_len + iphlen == m_length(m, NULL))); if (ip->ip_len != len) { if (len > ip->ip_len) { udpstat.udps_badlen++; Log3(("NAT: IP(id: %hd) has bad size\n", ip->ip_id)); } m_adj(m, len - ip->ip_len); ip->ip_len = len; } /* * Save a copy of the IP header in case we want restore it * for sending an ICMP error message in response. */ save_ip = *ip; save_ip.ip_len+= iphlen; /* tcp_input subtracts this */ /* * Checksum extended UDP header and data. */ if (udpcksum && uh->uh_sum) { memset(((struct ipovly *)ip)->ih_x1, 0, 9); ((struct ipovly *)ip)->ih_len = uh->uh_ulen; #if 0 /* keep uh_sum for ICMP reply */ uh->uh_sum = cksum(m, len + sizeof (struct ip)); if (uh->uh_sum) { #endif if (cksum(m, len + iphlen)) { udpstat.udps_badsum++; Log3(("NAT: IP(id: %hd) has bad (udp) cksum\n", ip->ip_id)); goto bad_free_mbuf; } } #if 0 } #endif /* * handle DHCP/BOOTP */ if (uh->uh_dport == RT_H2N_U16_C(BOOTP_SERVER)) { bootp_input(pData, m); goto done_free_mbuf; } LogFunc(("uh src: %RTnaipv4:%d, dst: %RTnaipv4:%d\n", ip->ip_src, RT_H2N_U16_C(uh->uh_sport), ip->ip_dst, RT_H2N_U16_C(uh->uh_dport))); if ( pData->fUseHostResolver && uh->uh_dport == RT_H2N_U16_C(53) && CTL_CHECK(ip->ip_dst.s_addr, CTL_DNS)) { struct sockaddr_in dst, src; src.sin_addr.s_addr = ip->ip_dst.s_addr; src.sin_port = uh->uh_dport; dst.sin_addr.s_addr = ip->ip_src.s_addr; dst.sin_port = uh->uh_sport; slirpMbufTagService(pData, m, CTL_DNS); /* udp_output2() expects a pointer to the body of UDP packet. */ m->m_data += sizeof(struct udpiphdr); m->m_len -= sizeof(struct udpiphdr); udp_output2(pData, NULL, m, &src, &dst, IPTOS_LOWDELAY); LogFlowFuncLeave(); return; } /* * handle TFTP */ if ( uh->uh_dport == RT_H2N_U16_C(TFTP_SERVER) && CTL_CHECK(ip->ip_dst.s_addr, CTL_TFTP)) { if (pData->pvTftpSessions) slirpTftpInput(pData, m); goto done_free_mbuf; } /* * Locate pcb for datagram. */ so = udp_last_so; if ( so->so_lport != uh->uh_sport || so->so_laddr.s_addr != ip->ip_src.s_addr) { struct socket *tmp; for (tmp = udb.so_next; tmp != &udb; tmp = tmp->so_next) { if ( tmp->so_lport == uh->uh_sport && tmp->so_laddr.s_addr == ip->ip_src.s_addr) { so = tmp; break; } } if (tmp == &udb) so = NULL; else { udpstat.udpps_pcbcachemiss++; udp_last_so = so; } } if (so == NULL) { /* * If there's no socket for this packet, * create one */ if ((so = socreate()) == NULL) { Log2(("NAT: IP(id: %hd) failed to create socket\n", ip->ip_id)); goto bad_free_mbuf; } if (udp_attach(pData, so) <= 0) { Log2(("NAT: IP(id: %hd) udp_attach errno = %d (%s)\n", ip->ip_id, errno, strerror(errno))); sofree(pData, so); goto bad_free_mbuf; } /* * Setup fields */ /* udp_last_so = so; */ so->so_laddr = ip->ip_src; so->so_lport = uh->uh_sport; so->so_iptos = ip->ip_tos; /* * XXXXX Here, check if it's in udpexec_list, * and if it is, do the fork_exec() etc. */ } so->so_faddr = ip->ip_dst; /* XXX */ so->so_fport = uh->uh_dport; /* XXX */ Assert(so->so_type == IPPROTO_UDP); /* * DNS proxy */ if ( pData->fUseDnsProxy && CTL_CHECK(ip->ip_dst.s_addr, CTL_DNS) && (uh->uh_dport == RT_H2N_U16_C(53))) { dnsproxy_query(pData, so, m, iphlen); goto done_free_mbuf; } iphlen += sizeof(struct udphdr); m->m_len -= iphlen; m->m_data += iphlen; ttl = ip->ip_ttl = save_ip.ip_ttl; ret = setsockopt(so->s, IPPROTO_IP, IP_TTL, (const char*)&ttl, sizeof(ttl)); if (ret < 0) LogRel(("NAT: Error (%s) occurred while setting TTL(%d) attribute " "of IP packet to socket %R[natsock]\n", strerror(errno), ip->ip_ttl, so)); if ( sosendto(pData, so, m) == -1 && ( !soIgnorableErrorCode(errno) && errno != ENOTCONN)) { m->m_len += iphlen; m->m_data -= iphlen; *ip = save_ip; Log2(("NAT: UDP tx errno = %d (%s) on sent to %RTnaipv4\n", errno, strerror(errno), ip->ip_dst)); #if 0 /* ICMP_SOURCEQUENCH haven't got any effect, the idea here * inform guest about the exosting NAT resources with assumption that * that guest reduce traffic. But it doesn't work */ if( errno == EAGAIN || errno == EWOULDBLOCK || errno == EINPROGRESS || errno == ENOTCONN) icmp_error(pData, m, ICMP_SOURCEQUENCH, 0, 1, strerror(errno)); else #endif icmp_error(pData, m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); so->so_m = NULL; LogFlowFuncLeave(); return; } if (so->so_m) m_freem(pData, so->so_m); /* used for ICMP if error on sorecvfrom */ /* restore the orig mbuf packet */ m->m_len += iphlen; m->m_data -= iphlen; *ip = save_ip; so->so_m = m; /* ICMP backup */ LogFlowFuncLeave(); return; bad_free_mbuf: Log2(("NAT: UDP(id: %hd) datagram to %RTnaipv4 with size(%d) claimed as bad\n", ip->ip_id, &ip->ip_dst, ip->ip_len)); done_free_mbuf: /* some services like bootp(built-in), dns(buildt-in) and dhcp don't need sockets * and create new m'buffers to send them to guest, so we'll free their incomming * buffers here. */ m_freem(pData, m); LogFlowFuncLeave(); return; }