void slirp_socket_recv(int addr_low_byte, int guest_port, const uint8_t *buf, int size) { int ret; struct socket *so = slirp_find_ctl_socket(addr_low_byte, guest_port); if (!so) return; ret = soreadbuf(so, (const char *)buf, size); if (ret > 0) tcp_output(sototcpcb(so)); }
/* * remque and free a socket, clobber cache */ void sofree(PNATState pData, struct socket *so) { LogFlowFunc(("ENTER:%R[natsock]\n", so)); /* * We should not remove socket when polling routine do the polling * instead we mark it for deletion. */ if (so->fUnderPolling) { so->fShouldBeRemoved = 1; LogFlowFunc(("LEAVE:%R[natsock] postponed deletion\n", so)); return; } /** * Check that we don't freeng socket with tcbcb */ Assert(!sototcpcb(so)); /* udp checks */ Assert(!so->so_timeout); Assert(!so->so_timeout_arg); if (so == tcp_last_so) tcp_last_so = &tcb; else if (so == udp_last_so) udp_last_so = &udb; /* check if mbuf haven't been already freed */ if (so->so_m != NULL) { m_freem(pData, so->so_m); so->so_m = NULL; } if (so->so_ohdr != NULL) { RTMemFree(so->so_ohdr); so->so_ohdr = NULL; } if (so->so_next && so->so_prev) { remque(pData, so); /* crashes if so is not in a queue */ NSOCK_DEC(); } RTMemFree(so); LogFlowFuncLeave(); }
/* * Tcp protocol timeout routine called every 500 ms. * Updates the timers in all active tcb's and * causes finite state machine actions if timers expire. */ void tcp_slowtimo() { register struct socket *ip, *ipnxt; register struct tcpcb *tp; register int i; DEBUG_CALL("tcp_slowtimo"); tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl; /* * Search through tcb's and update active timers. */ ip = tcb.so_next; if (ip == 0) return; for (; ip != &tcb; ip = ipnxt) { ipnxt = ip->so_next; tp = sototcpcb(ip); if (tp == 0) continue; for (i = 0; i < TCPT_NTIMERS; i++) { if (tp->t_timer[i] && --tp->t_timer[i] == 0) { tcp_timers(tp,i); if (ipnxt->so_prev != ip) goto tpgone; } } tp->t_idle++; if (tp->t_rtt) tp->t_rtt++; tpgone: ; } tcp_iss += TCP_ISSINCR/PR_SLOWHZ; /* increment iss */ #ifdef TCP_COMPAT_42 if ((int)tcp_iss < 0) tcp_iss = 0; /* XXX */ #endif tcp_now++; /* for timestamps */ }
int soreadbuf(struct socket *so, const char *buf, int size) { int n, nn, copy = size; struct sbuf *sb = &so->so_snd; struct iovec iov[2]; DEBUG_CALL("soreadbuf"); DEBUG_ARG("so = %lx", (long )so); /* * No need to check if there's enough room to read. * soread wouldn't have been called if there weren't */ if (sopreprbuf(so, iov, &n) < size) goto err; nn = MIN(iov[0].iov_len, copy); memcpy(iov[0].iov_base, buf, nn); copy -= nn; buf += nn; if (copy == 0) goto done; memcpy(iov[1].iov_base, buf, copy); done: /* Update fields */ sb->sb_cc += size; sb->sb_wptr += size; if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) sb->sb_wptr -= sb->sb_datalen; return size; err: sofcantrcvmore(so); tcp_sockclosed(sototcpcb(so)); fprintf(stderr, "soreadbuf buffer to small"); return -1; }
/* * Get urgent data * * When the socket is created, we set it SO_OOBINLINE, * so when OOB data arrives, we soread() it and everything * in the send buffer is sent as urgent data */ void sorecvoob(struct socket *so) { struct tcpcb *tp = sototcpcb(so); DEBUG_CALL("sorecvoob"); DEBUG_ARG("so = %p", so); /* * We take a guess at how much urgent data has arrived. * In most situations, when urgent data arrives, the next * read() should get all the urgent data. This guess will * be wrong however if more data arrives just after the * urgent data, or the read() doesn't return all the * urgent data. */ soread(so); tp->snd_up = tp->snd_una + so->so_snd.sb_cc; tp->t_force = 1; tcp_output(tp); tp->t_force = 0; }
void tcp_pulloutofband(struct socket *so, struct tcpiphdr *ti, struct mbuf *m) { int cnt = ti->ti_urp - 1; while (cnt >= 0) { if (m->m_len > cnt) { char *cp = mtod(m, caddr_t) + cnt; struct tcpcb *tp = sototcpcb(so); tp->t_iobc = *cp; tp->t_oobflags |= TCPOOB_HAVEDATA; memcpy(sp, cp+1, (unsigned)(m->m_len - cnt - 1)); m->m_len--; return; } cnt -= m->m_len; m = m->m_next; /* XXX WRONG! Fix it! */ if (m == 0) break; } panic("tcp_pulloutofband"); }
/* * Tcp protocol timeout routine called every 500 ms. * Updates the timers in all active tcb's and * causes finite state machine actions if timers expire. */ void tcp_slowtimo(Slirp *slirp) { register struct socket *ip, *ipnxt; register struct tcpcb *tp; register int i; DEBUG_CALL("tcp_slowtimo"); /* * Search through tcb's and update active timers. */ ip = slirp->tcb.so_next; if (ip == NULL) { return; } for (; ip != &slirp->tcb; ip = ipnxt) { ipnxt = ip->so_next; tp = sototcpcb(ip); if (tp == NULL) { continue; } for (i = 0; i < TCPT_NTIMERS; i++) { if (tp->t_timer[i] && --tp->t_timer[i] == 0) { tcp_timers(tp,i); if (ipnxt->so_prev != ip) goto tpgone; } } tp->t_idle++; if (tp->t_rtt) tp->t_rtt++; tpgone: ; } slirp->tcp_iss += TCP_ISSINCR/PR_SLOWHZ; /* increment iss */ slirp->tcp_now++; /* for timestamps */ }
/* * Pull out of band byte out of a segment so * it doesn't appear in the user's data queue. * It is still reflected in the segment length for * sequencing purposes. */ void tcp_pulloutofband(struct usn_socket *so, struct tcpiphdr *ti, usn_mbuf_t *m) { int cnt = ti->ti_urp - 1; while (cnt >= 0) { if (m->mlen > cnt) { char *cp = mtod(m, caddr_t) + cnt; struct tcpcb *tp = sototcpcb(so); tp->t_iobc = *cp; tp->t_oobflags |= TCPOOB_HAVEDATA; bcopy(cp+1, cp, (unsigned)(m->mlen - cnt - 1)); m->mlen--; return; } cnt -= m->mlen; m = m->next; if (m == 0) break; } DEBUG("panic: tcp_pulloutofband"); }
void slirp_select_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds) { struct socket *so, *so_next; int ret; global_readfds = readfds; global_writefds = writefds; global_xfds = xfds; /* Update time */ updtime(); /* * See if anything has timed out */ if (link_up) { if (time_fasttimo && ((curtime - time_fasttimo) >= FAST_TIMO)) { tcp_fasttimo(); time_fasttimo = 0; } if (do_slowtimo && ((curtime - last_slowtimo) >= SLOW_TIMO)) { ip_slowtimo(); tcp_slowtimo(); last_slowtimo = curtime; } } /* * Check sockets */ if (link_up) { /* * Check TCP sockets */ for (so = tcb.so_next; so != &tcb; so = so_next) { so_next = so->so_next; /* * FD_ISSET is meaningless on these sockets * (and they can crash the program) */ if (so->so_state & SS_NOFDREF || so->s == -1) continue; /* * Check for URG data * This will soread as well, so no need to * test for readfds below if this succeeds */ if (FD_ISSET(so->s, xfds)) sorecvoob(so); /* * Check sockets for reading */ else if (FD_ISSET(so->s, readfds)) { /* * Check for incoming connections */ if (so->so_state & SS_FACCEPTCONN) { tcp_connect(so); continue; } /* else */ ret = soread(so); /* Output it if we read something */ if (ret > 0) tcp_output(sototcpcb(so)); } /* * Check sockets for writing */ if (FD_ISSET(so->s, writefds)) { /* * Check for non-blocking, still-connecting sockets */ if (so->so_state & SS_ISFCONNECTING) { /* Connected */ so->so_state &= ~SS_ISFCONNECTING; ret = send(so->s, &ret, 0, 0); if (ret < 0) { /* XXXXX Must fix, zero bytes is a NOP */ if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINPROGRESS || errno == ENOTCONN) continue; /* else failed */ so->so_state = SS_NOFDREF; } /* else so->so_state &= ~SS_ISFCONNECTING; */ /* * Continue tcp_input */ tcp_input((struct mbuf *)NULL, sizeof(struct ip), so); /* continue; */ } else ret = sowrite(so); /* * XXXXX If we wrote something (a lot), there * could be a need for a window update. * In the worst case, the remote will send * a window probe to get things going again */ } /* * Probe a still-connecting, non-blocking socket * to check if it's still alive */ #ifdef PROBE_CONN if (so->so_state & SS_ISFCONNECTING) { ret = recv(so->s, (char *)&ret, 0,0); if (ret < 0) { /* XXX */ if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINPROGRESS || errno == ENOTCONN) continue; /* Still connecting, continue */ /* else failed */ so->so_state = SS_NOFDREF; /* tcp_input will take care of it */ } else { ret = send(so->s, &ret, 0,0); if (ret < 0) { /* XXX */ if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINPROGRESS || errno == ENOTCONN) continue; /* else failed */ so->so_state = SS_NOFDREF; } else so->so_state &= ~SS_ISFCONNECTING; } tcp_input((struct mbuf *)NULL, sizeof(struct ip),so); } /* SS_ISFCONNECTING */ #endif } /* * Now UDP sockets. * Incoming packets are sent straight away, they're not buffered. * Incoming UDP data isn't buffered either. */ for (so = udb.so_next; so != &udb; so = so_next) { so_next = so->so_next; if (so->s != -1 && FD_ISSET(so->s, readfds)) { sorecvfrom(so); } } } /* * See if we can start outputting */ if (if_queued && link_up) if_start(); /* clear global file descriptor sets. * these reside on the stack in vl.c * so they're unusable if we're not in * slirp_select_fill or slirp_select_poll. */ global_readfds = NULL; global_writefds = NULL; global_xfds = NULL; }
/* * Read from so's socket into sb_snd, updating all relevant sbuf fields * NOTE: This will only be called if it is select()ed for reading, so * a read() of 0 (or less) means it's disconnected */ int soread(PNATState pData, struct socket *so) { int n, nn, lss, total; struct sbuf *sb = &so->so_snd; u_int len = sb->sb_datalen - sb->sb_cc; struct iovec iov[2]; int mss = so->so_tcpcb->t_maxseg; int sockerr; STAM_PROFILE_START(&pData->StatIOread, a); STAM_COUNTER_RESET(&pData->StatIORead_in_1); STAM_COUNTER_RESET(&pData->StatIORead_in_2); QSOCKET_LOCK(tcb); SOCKET_LOCK(so); QSOCKET_UNLOCK(tcb); LogFlow(("soread: so = %R[natsock]\n", so)); Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb)); /* * No need to check if there's enough room to read. * soread wouldn't have been called if there weren't */ len = sb->sb_datalen - sb->sb_cc; iov[0].iov_base = sb->sb_wptr; iov[1].iov_base = 0; iov[1].iov_len = 0; if (sb->sb_wptr < sb->sb_rptr) { iov[0].iov_len = sb->sb_rptr - sb->sb_wptr; /* Should never succeed, but... */ if (iov[0].iov_len > len) iov[0].iov_len = len; if (iov[0].iov_len > mss) iov[0].iov_len -= iov[0].iov_len%mss; n = 1; } else { iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr; /* Should never succeed, but... */ if (iov[0].iov_len > len) iov[0].iov_len = len; len -= iov[0].iov_len; if (len) { iov[1].iov_base = sb->sb_data; iov[1].iov_len = sb->sb_rptr - sb->sb_data; if (iov[1].iov_len > len) iov[1].iov_len = len; total = iov[0].iov_len + iov[1].iov_len; if (total > mss) { lss = total % mss; if (iov[1].iov_len > lss) { iov[1].iov_len -= lss; n = 2; } else { lss -= iov[1].iov_len; iov[0].iov_len -= lss; n = 1; } } else n = 2; } else { if (iov[0].iov_len > mss) iov[0].iov_len -= iov[0].iov_len%mss; n = 1; } } #ifdef HAVE_READV nn = readv(so->s, (struct iovec *)iov, n); #else nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, (so->so_tcpcb->t_force? MSG_OOB:0)); #endif if (nn < 0) sockerr = errno; /* save it, as it may be clobbered by logging */ else sockerr = 0; Log2(("%s: read(1) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn)); Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb)); if (nn <= 0) { #ifdef RT_OS_WINDOWS /* * Windows reports ESHUTDOWN after SHUT_RD (SD_RECEIVE) * instead of just returning EOF indication. */ if (nn < 0 && sockerr == ESHUTDOWN) { nn = 0; sockerr = 0; } #endif if (nn == 0) /* XXX: should this be inside #if defined(RT_OS_WINDOWS)? */ { /* * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that * _could_ mean that the connection is closed. But we will receive an * FD_CLOSE event later if the connection was _really_ closed. With * www.youtube.com I see this very often. Closing the socket too early * would be dangerous. */ int status; unsigned long pending = 0; status = ioctlsocket(so->s, FIONREAD, &pending); if (status < 0) Log(("NAT:%s: error in WSAIoctl: %d\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, errno)); if (pending != 0) { SOCKET_UNLOCK(so); STAM_PROFILE_STOP(&pData->StatIOread, a); return 0; } } if ( nn < 0 && soIgnorableErrorCode(sockerr)) { SOCKET_UNLOCK(so); STAM_PROFILE_STOP(&pData->StatIOread, a); return 0; } else { int fUninitializedTemplate = 0; int shuterr; fUninitializedTemplate = RT_BOOL(( sototcpcb(so) && ( sototcpcb(so)->t_template.ti_src.s_addr == INADDR_ANY || sototcpcb(so)->t_template.ti_dst.s_addr == INADDR_ANY))); /* nn == 0 means peer has performed an orderly shutdown */ Log2(("%s: disconnected, nn = %d, errno = %d (%s)\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sockerr, strerror(sockerr))); shuterr = sofcantrcvmore(so); if (!sockerr && !shuterr && !fUninitializedTemplate) tcp_sockclosed(pData, sototcpcb(so)); else { LogRel2(("NAT: sockerr %d, shuterr %d - %R[natsock]\n", sockerr, shuterr, so)); tcp_drop(pData, sototcpcb(so), sockerr); } SOCKET_UNLOCK(so); STAM_PROFILE_STOP(&pData->StatIOread, a); return -1; } } STAM_STATS( if (n == 1) { STAM_COUNTER_INC(&pData->StatIORead_in_1); STAM_COUNTER_ADD(&pData->StatIORead_in_1_bytes, nn); } else { STAM_COUNTER_INC(&pData->StatIORead_in_2); STAM_COUNTER_ADD(&pData->StatIORead_in_2_1st_bytes, nn); } );
__private_extern__ void inpcb_get_ports_used(uint32_t ifindex, int protocol, uint32_t flags, bitstr_t *bitfield, struct inpcbinfo *pcbinfo) { struct inpcb *inp; struct socket *so; inp_gen_t gencnt; bool iswildcard, wildcardok, nowakeok; bool recvanyifonly, extbgidleok; bool activeonly; wildcardok = ((flags & INPCB_GET_PORTS_USED_WILDCARDOK) != 0); nowakeok = ((flags & INPCB_GET_PORTS_USED_NOWAKEUPOK) != 0); recvanyifonly = ((flags & INPCB_GET_PORTS_USED_RECVANYIFONLY) != 0); extbgidleok = ((flags & INPCB_GET_PORTS_USED_EXTBGIDLEONLY) != 0); activeonly = ((flags & INPCB_GET_PORTS_USED_ACTIVEONLY) != 0); lck_rw_lock_shared(pcbinfo->ipi_lock); gencnt = pcbinfo->ipi_gencnt; for (inp = LIST_FIRST(pcbinfo->ipi_listhead); inp; inp = LIST_NEXT(inp, inp_list)) { uint16_t port; if (inp->inp_gencnt > gencnt || inp->inp_state == INPCB_STATE_DEAD || inp->inp_wantcnt == WNT_STOPUSING) continue; if ((so = inp->inp_socket) == NULL || (so->so_state & SS_DEFUNCT) || (so->so_state & SS_ISDISCONNECTED)) continue; if (!(protocol == PF_UNSPEC || (protocol == PF_INET && (inp->inp_vflag & INP_IPV4)) || (protocol == PF_INET6 && (inp->inp_vflag & INP_IPV6)))) continue; iswildcard = (((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr == INADDR_ANY) || ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))); if (!wildcardok && iswildcard) continue; if ((so->so_options & SO_NOWAKEFROMSLEEP) && !nowakeok) continue; if (!(inp->inp_flags & INP_RECV_ANYIF) && recvanyifonly) continue; if (!(so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) && extbgidleok) continue; if (!iswildcard && !(ifindex == 0 || inp->inp_last_outifp == NULL || ifindex == inp->inp_last_outifp->if_index)) continue; if (SOCK_PROTO(inp->inp_socket) == IPPROTO_UDP && so->so_state & SS_CANTRCVMORE) continue; if (SOCK_PROTO(inp->inp_socket) == IPPROTO_TCP) { struct tcpcb *tp = sototcpcb(inp->inp_socket); /* * Workaround race where inp_ppcb is NULL during * socket initialization */ if (tp == NULL) continue; switch (tp->t_state) { case TCPS_CLOSED: continue; /* NOT REACHED */ case TCPS_LISTEN: case TCPS_SYN_SENT: case TCPS_SYN_RECEIVED: case TCPS_ESTABLISHED: case TCPS_FIN_WAIT_1: /* * Note: FIN_WAIT_1 is an active state * because we need our FIN to be * acknowledged */ break; case TCPS_CLOSE_WAIT: case TCPS_CLOSING: case TCPS_LAST_ACK: case TCPS_FIN_WAIT_2: /* * In the closing states, the connection * is not idle when there is outgoing * data having to be acknowledged */ if (activeonly && so->so_snd.sb_cc == 0) continue; break; case TCPS_TIME_WAIT: continue; /* NOT REACHED */ } } /* * Final safeguard to exclude unspecified local port */ port = ntohs(inp->inp_lport); if (port == 0) continue; bit_set(bitfield, port); } lck_rw_done(pcbinfo->ipi_lock); }
int tcp_usrreq(struct socket * so, struct mbuf * m, struct mbuf * nam) { struct inpcb * inp; struct tcpcb * tp; int error = 0; int req; #ifdef DO_TCPTRACE int ostate; #endif req = so->so_req; /* get request from socket struct */ inp = sotoinpcb(so); /* * When a TCP is attached to a socket, then there will be * a (struct inpcb) pointed at by the socket, and this * structure will point at a subsidary (struct tcpcb). */ if (inp == 0 && req != PRU_ATTACH) { return (EINVAL); } if (inp) tp = intotcpcb(inp); else /* inp and tp not set, make sure this is OK: */ { if (req == PRU_ATTACH) tp = NULL; /* stifle compiler warnings about using unassigned tp*/ else { dtrap(); /* programming error? */ return EINVAL; } } switch (req) { /* * TCP attaches to socket via PRU_ATTACH, reserving space, * and an internet control block. */ case PRU_ATTACH: if (inp) { error = EISCONN; break; } error = tcp_attach(so); if (error) break; if ((so->so_options & SO_LINGER) && so->so_linger == 0) so->so_linger = TCP_LINGERTIME; #ifdef DO_TCPTRACE SETTP(tp, sototcpcb(so)); #endif break; /* * PRU_DETACH detaches the TCP protocol from the socket. * If the protocol state is non-embryonic, then can't * do this directly: have to initiate a PRU_DISCONNECT, * which may finish later; embryonic TCB's can just * be discarded here. */ case PRU_DETACH: if (tp->t_state > TCPS_LISTEN) SETTP(tp, tcp_disconnect(tp)); else SETTP(tp, tcp_close(tp)); break; /* * Give the socket an address. */ case PRU_BIND: /* bind is quite different for IPv4 and v6, so we use two * seperate pcbbind routines. so_domain was checked for * validity way up in t_bind() */ #ifdef IP_V4 if(inp->inp_socket->so_domain == AF_INET) { error = in_pcbbind(inp, nam); break; } #endif /* IP_V4 */ #ifdef IP_V6 if(inp->inp_socket->so_domain == AF_INET6) { error = ip6_pcbbind(inp, nam); break; } #endif /* IP_V6 */ dtrap(); /* not v4 or v6? */ error = EINVAL; break; /* * Prepare to accept connections. */ case PRU_LISTEN: if (inp->inp_lport == 0) error = in_pcbbind(inp, (struct mbuf *)0); if (error == 0) tp->t_state = TCPS_LISTEN; break; /* * Initiate connection to peer. * Create a template for use in transmissions on this connection. * Enter SYN_SENT state, and mark socket as connecting. * Start keep-alive timer, and seed output sequence space. * Send initial segment on connection. */ case PRU_CONNECT: if (inp->inp_lport == 0) { #ifdef IP_V4 #ifndef IP_V6 /* v4 only */ error = in_pcbbind(inp, (struct mbuf *)0); #else /* dual mode */ if(so->so_domain == AF_INET) error = in_pcbbind(inp, (struct mbuf *)0); else error = ip6_pcbbind(inp, (struct mbuf *)0); #endif /* end dual mode code */ #else /* no v4, v6 only */ error = ip6_pcbbind(inp, (struct mbuf *)0); #endif /* end v6 only */ if (error) break; } #ifdef IP_V4 #ifndef IP_V6 /* v4 only */ error = in_pcbconnect(inp, nam); #else /* dual mode */ if(so->so_domain == AF_INET) error = in_pcbconnect(inp, nam); else error = ip6_pcbconnect(inp, nam); #endif /* end dual mode code */ #else /* no v4, v6 only */ error = ip6_pcbconnect(inp, nam); #endif /* end v6 only */ if (error) break; tp->t_template = tcp_template(tp); if (tp->t_template == 0) { #ifdef IP_V4 #ifndef IP_V6 /* v4 only */ in_pcbdisconnect(inp); #else /* dual mode */ if(so->so_domain == AF_INET) in_pcbdisconnect(inp); else ip6_pcbdisconnect(inp); #endif /* end dual mode code */ #else /* no v4, v6 only */ ip6_pcbdisconnect(inp); #endif /* end v6 only */ error = ENOBUFS; break; } soisconnecting(so); tcpstat.tcps_connattempt++; tp->t_state = TCPS_SYN_SENT; tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; tp->iss = tcp_iss; tcp_iss += (tcp_seq)(TCP_ISSINCR/2); tcp_sendseqinit(tp); error = tcp_output(tp); if (!error) TCP_MIB_INC(tcpActiveOpens); /* keep MIB stats */ break; /* * Create a TCP connection between two sockets. */ case PRU_CONNECT2: error = EOPNOTSUPP; break; /* * Initiate disconnect from peer. * If connection never passed embryonic stage, just drop; * else if don't need to let data drain, then can just drop anyways, * else have to begin TCP shutdown process: mark socket disconnecting, * drain unread data, state switch to reflect user close, and * send segment (e.g. FIN) to peer. Socket will be really disconnected * when peer sends FIN and acks ours. * * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. */ case PRU_DISCONNECT: SETTP(tp, tcp_disconnect(tp)); break; /* * Accept a connection. Essentially all the work is * done at higher levels; just return the address * of the peer, storing through addr. */ case PRU_ACCEPT: { struct sockaddr_in * sin = mtod(nam, struct sockaddr_in *); #ifdef IP_V6 struct sockaddr_in6 * sin6 = mtod(nam, struct sockaddr_in6 *); #endif #ifdef IP_V6 if (so->so_domain == AF_INET6) { nam->m_len = sizeof (struct sockaddr_in6); sin6->sin6_port = inp->inp_fport; sin6->sin6_family = AF_INET6; IP6CPY(&sin6->sin6_addr, &inp->ip6_faddr); } #endif #ifdef IP_V4 if (so->so_domain == AF_INET) { nam->m_len = sizeof (struct sockaddr_in); sin->sin_family = AF_INET; sin->sin_port = inp->inp_fport; sin->sin_addr = inp->inp_faddr; } #endif if ( !(so->so_domain == AF_INET) && !(so->so_domain == AF_INET6) ) { dprintf("*** PRU_ACCEPT bad domain = %d\n", so->so_domain); dtrap(); } TCP_MIB_INC(tcpPassiveOpens); /* keep MIB stats */ break; } /* * Mark the connection as being incapable of further output. */ case PRU_SHUTDOWN: socantsendmore(so); tp = tcp_usrclosed(tp); if (tp) error = tcp_output(tp); break; /* * After a receive, possibly send window update to peer. */ case PRU_RCVD: (void) tcp_output(tp); break; /* * Do a send by putting data in output queue and updating urgent * marker if URG set. Possibly send more data. */ case PRU_SEND: if (so->so_pcb == NULL) { /* Return EPIPE error if socket is not connected */ error = EPIPE; break; } sbappend(&so->so_snd, m); error = tcp_output(tp); if (error == ENOBUFS) sbdropend(&so->so_snd,m); /* Remove data from socket buffer */ break; /* * Abort the TCP. */ case PRU_ABORT: SETTP(tp, tcp_drop(tp, ECONNABORTED)); break; case PRU_SENSE: /* ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; */ dtrap(); /* does this ever happen? */ return (0); case PRU_RCVOOB: if ((so->so_oobmark == 0 && (so->so_state & SS_RCVATMARK) == 0) || #ifdef SO_OOBINLINE so->so_options & SO_OOBINLINE || #endif tp->t_oobflags & TCPOOB_HADDATA) { error = EINVAL; break; } if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { error = EWOULDBLOCK; break; } m->m_len = 1; *mtod(m, char *) = tp->t_iobc; if ((MBUF2LONG(nam) & MSG_PEEK) == 0) tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); break; case PRU_SENDOOB: if (so->so_pcb == NULL) { /* Return EPIPE error if socket is not connected */ error = EPIPE; break; } if (sbspace(&so->so_snd) == 0) { m_freem(m); error = ENOBUFS; break; } /* * According to RFC961 (Assigned Protocols), * the urgent pointer points to the last octet * of urgent data. We continue, however, * to consider it to indicate the first octet * of data past the urgent section. * Otherwise, snd_up should be one lower. */ sbappend(&so->so_snd, m); tp->snd_up = tp->snd_una + so->so_snd.sb_cc; tp->t_force = 1; error = tcp_output(tp); if (error == ENOBUFS) sbdropend(&so->so_snd,m); /* Remove data from socket buffer */ tp->t_force = 0; break; case PRU_SOCKADDR: /* sockaddr and peeraddr have to switch based on IP type */ #ifdef IP_V4 #ifndef IP_V6 /* v4 only */ in_setsockaddr(inp, nam); #else /* dual mode */ if(so->so_domain == AF_INET6) ip6_setsockaddr(inp, nam); else in_setsockaddr(inp, nam); #endif /* dual mode */ #else /* IP_V6 */ ip6_setsockaddr(inp, nam); #endif break; case PRU_PEERADDR: #ifdef IP_V4 #ifndef IP_V6 /* v4 only */ in_setpeeraddr(inp, nam); #else /* dual mode */ if(so->so_domain == AF_INET6) ip6_setpeeraddr(inp, nam); else in_setpeeraddr(inp, nam); #endif /* dual mode */ #else /* IP_V6 */ ip6_setpeeraddr(inp, nam); #endif break; case PRU_SLOWTIMO: SETTP(tp, tcp_timers(tp, (int)MBUF2LONG(nam))); #ifdef DO_TCPTRACE req |= (long)nam << 8; /* for debug's sake */ #endif break; default: panic("tcp_usrreq"); } #ifdef DO_TCPTRACE if (tp && (so->so_options & SO_DEBUG)) tcp_trace("usrreq: state: %d, tcpcb: %x, req: %d", ostate, tp, req); #endif return (error); }
/* * Read from so's socket into sb_snd, updating all relevant sbuf fields * NOTE: This will only be called if it is select()ed for reading, so * a read() of 0 (or less) means it's disconnected */ int soread(struct socket *so) { int n, nn, lss, total; struct sbuf *sb = &so->so_snd; int len = sb->sb_datalen - sb->sb_cc; struct iovec iov[2]; int mss = so->so_tcpcb->t_maxseg; DEBUG_CALL("soread"); DEBUG_ARG("so = %lx", (long )so); /* * No need to check if there's enough room to read. * soread wouldn't have been called if there weren't */ len = sb->sb_datalen - sb->sb_cc; iov[0].iov_base = sb->sb_wptr; if (sb->sb_wptr < sb->sb_rptr) { iov[0].iov_len = sb->sb_rptr - sb->sb_wptr; /* Should never succeed, but... */ if (iov[0].iov_len > len) iov[0].iov_len = len; if (iov[0].iov_len > mss) iov[0].iov_len -= iov[0].iov_len%mss; n = 1; } else { iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr; /* Should never succeed, but... */ if (iov[0].iov_len > len) iov[0].iov_len = len; len -= iov[0].iov_len; if (len) { iov[1].iov_base = sb->sb_data; iov[1].iov_len = sb->sb_rptr - sb->sb_data; if(iov[1].iov_len > len) iov[1].iov_len = len; total = iov[0].iov_len + iov[1].iov_len; if (total > mss) { lss = total%mss; if (iov[1].iov_len > lss) { iov[1].iov_len -= lss; n = 2; } else { lss -= iov[1].iov_len; iov[0].iov_len -= lss; n = 1; } } else n = 2; } else { if (iov[0].iov_len > mss) iov[0].iov_len -= iov[0].iov_len%mss; n = 1; } } #ifdef HAVE_READV nn = readv(so->s, (struct iovec *)iov, n); DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn)); #else nn = recv(so->s, iov[0].iov_base, iov[0].iov_len,0); #endif if (nn <= 0) { if (nn < 0 && (errno == EINTR || errno == EAGAIN)) return 0; else { DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n", nn, errno,strerror(errno))); sofcantrcvmore(so); tcp_sockclosed(sototcpcb(so)); return -1; } } #ifndef HAVE_READV /* * If there was no error, try and read the second time round * We read again if n = 2 (ie, there's another part of the buffer) * and we read as much as we could in the first read * We don't test for <= 0 this time, because there legitimately * might not be any more data (since the socket is non-blocking), * a close will be detected on next iteration. * A return of -1 wont (shouldn't) happen, since it didn't happen above */ if (n == 2 && nn == iov[0].iov_len) { int ret; ret = recv(so->s, iov[1].iov_base, iov[1].iov_len,0); if (ret > 0) nn += ret; } DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn)); #endif /* Update fields */ sb->sb_cc += nn; sb->sb_wptr += nn; if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) sb->sb_wptr -= sb->sb_datalen; return nn; }
void tcp_cleanup(void) { while (tcb.so_next != &tcb) { tcp_close(sototcpcb(tcb.so_next)); } }
void slirp_select_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds) { struct socket *so, *so_next; int ret; global_readfds = readfds; global_writefds = writefds; global_xfds = xfds; updtime(); if (link_up) { if (time_fasttimo && ((curtime - time_fasttimo) >= 2)) { tcp_fasttimo(); time_fasttimo = 0; } if (do_slowtimo && ((curtime - last_slowtimo) >= 499)) { ip_slowtimo(); tcp_slowtimo(); last_slowtimo = curtime; } } if (link_up) { for (so = tcb.so_next; so != &tcb; so = so_next) { so_next = so->so_next; if (so->so_state & SS_NOFDREF || so->s == -1) continue; if ((so->so_state & SS_PROXIFIED) != 0) continue; if (FD_ISSET(so->s, xfds)) sorecvoob(so); else if (FD_ISSET(so->s, readfds)) { if (so->so_state & SS_FACCEPTCONN) { tcp_connect(so); continue; } ret = soread(so); if (ret > 0) tcp_output(sototcpcb(so)); } if (FD_ISSET(so->s, writefds)) { if (so->so_state & SS_ISFCONNECTING) { so->so_state &= ~SS_ISFCONNECTING; ret = socket_send(so->s, (const void *)&ret, 0); if (ret < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINPROGRESS || errno == ENOTCONN) continue; so->so_state = SS_NOFDREF; } tcp_input((struct mbuf *)NULL, sizeof(struct ip), so); } else ret = sowrite(so); } #ifdef PROBE_CONN if (so->so_state & SS_ISFCONNECTING) { ret = socket_recv(so->s, (char *)&ret, 0); if (ret < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINPROGRESS || errno == ENOTCONN) continue; so->so_state = SS_NOFDREF; } else { ret = socket_send(so->s, &ret, 0); if (ret < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINPROGRESS || errno == ENOTCONN) continue; so->so_state = SS_NOFDREF; } else so->so_state &= ~SS_ISFCONNECTING; } tcp_input((struct mbuf *)NULL, sizeof(struct ip),so); } #endif } for (so = udb.so_next; so != &udb; so = so_next) { so_next = so->so_next; if ((so->so_state & SS_PROXIFIED) != 0) continue; if (so->s != -1 && FD_ISSET(so->s, readfds)) { sorecvfrom(so); } } } proxy_manager_poll(readfds, writefds, xfds); if (if_queued && link_up) if_start(); global_readfds = NULL; global_writefds = NULL; global_xfds = NULL; }
/* * TCP input routine, follows pages 65-76 of the * protocol specification dated September, 1981 very closely. */ void tcp_input(struct mbuf *m, int iphlen, struct socket *inso) { struct ip save_ip, *ip; register struct tcpiphdr *ti; caddr_t optp = NULL; int optlen = 0; int len, tlen, off; register struct tcpcb *tp = NULL; register int tiflags; struct socket *so = NULL; int todrop, acked, ourfinisacked, needoutput = 0; int iss = 0; u_long tiwin; int ret; struct ex_list *ex_ptr; Slirp *slirp; DEBUG_CALL("tcp_input"); DEBUG_ARGS((dfd, " m = %8lx iphlen = %2d inso = %lx\n", (long )m, iphlen, (long )inso )); /* * If called with m == 0, then we're continuing the connect */ if (m == NULL) { so = inso; slirp = so->slirp; /* Re-set a few variables */ tp = sototcpcb(so); m = so->so_m; so->so_m = NULL; ti = so->so_ti; tiwin = ti->ti_win; tiflags = ti->ti_flags; goto cont_conn; } slirp = m->slirp; /* * Get IP and TCP header together in first mbuf. * Note: IP leaves IP header in first mbuf. */ ti = mtod(m, struct tcpiphdr *); if (iphlen > sizeof(struct ip )) { ip_stripoptions(m, (struct mbuf *)0); iphlen=sizeof(struct ip ); } /* XXX Check if too short */ /* * Save a copy of the IP header in case we want restore it * for sending an ICMP error message in response. */ ip=mtod(m, struct ip *); save_ip = *ip; save_ip.ip_len+= iphlen; /* * Checksum extended TCP header and data. */ tlen = ((struct ip *)ti)->ip_len; tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; memset(&ti->ti_i.ih_mbuf, 0 , sizeof(struct mbuf_ptr)); ti->ti_x1 = 0; ti->ti_len = htons((uint16_t)tlen); len = sizeof(struct ip ) + tlen; if(cksum(m, len)) { goto drop; } /* * Check that TCP offset makes sense, * pull out TCP options and adjust length. XXX */ off = ti->ti_off << 2; if (off < sizeof (struct tcphdr) || off > tlen) { goto drop; } tlen -= off; ti->ti_len = tlen; if (off > sizeof (struct tcphdr)) { optlen = off - sizeof (struct tcphdr); optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr); } tiflags = ti->ti_flags; /* * Convert TCP protocol specific fields to host format. */ NTOHL(ti->ti_seq); NTOHL(ti->ti_ack); NTOHS(ti->ti_win); NTOHS(ti->ti_urp); /* * Drop TCP, IP headers and TCP options. */ m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); /* * Locate pcb for segment. */ findso: so = slirp->tcp_last_so; if (so->so_fport != ti->ti_dport || so->so_lport != ti->ti_sport || so->so_laddr.s_addr != ti->ti_src.s_addr || so->so_faddr.s_addr != ti->ti_dst.s_addr) { so = solookup(&slirp->tcb, ti->ti_src, ti->ti_sport, ti->ti_dst, ti->ti_dport); if (so) slirp->tcp_last_so = so; } /* * If the state is CLOSED (i.e., TCB does not exist) then * all data in the incoming segment is discarded. * If the TCB exists but is in CLOSED state, it is embryonic, * but should either do a listen or a connect soon. * * state == CLOSED means we've done socreate() but haven't * attached it to a protocol yet... * * XXX If a TCB does not exist, and the TH_SYN flag is * the only flag set, then create a session, mark it * as if it was LISTENING, and continue... */ if (so == NULL) { if (slirp->restricted) { /* Any hostfwds will have an existing socket, so we only get here * for non-hostfwd connections. These should be dropped, unless it * happens to be a guestfwd. */ for (ex_ptr = slirp->exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { if (ex_ptr->ex_fport == ti->ti_dport && ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) { break; } } if (!ex_ptr) { goto dropwithreset; } } if ((tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) != TH_SYN) goto dropwithreset; if ((so = socreate(slirp)) == NULL) goto dropwithreset; if (tcp_attach(so) < 0) { free(so); /* Not sofree (if it failed, it's not insqued) */ goto dropwithreset; } sbreserve(&so->so_snd, TCP_SNDSPACE); sbreserve(&so->so_rcv, TCP_RCVSPACE); so->so_laddr = ti->ti_src; so->so_lport = ti->ti_sport; so->so_faddr = ti->ti_dst; so->so_fport = ti->ti_dport; if ((so->so_iptos = tcp_tos(so)) == 0) so->so_iptos = ((struct ip *)ti)->ip_tos; tp = sototcpcb(so); tp->t_state = TCPS_LISTEN; } /* * If this is a still-connecting socket, this probably * a retransmit of the SYN. Whether it's a retransmit SYN * or something else, we nuke it. */ if (so->so_state & SS_ISFCONNECTING) goto drop; tp = sototcpcb(so); /* XXX Should never fail */ if (tp == NULL) goto dropwithreset; if (tp->t_state == TCPS_CLOSED) goto drop; tiwin = ti->ti_win; /* * Segment received on connection. * Reset idle time and keep-alive timer. */ tp->t_idle = 0; if (SO_OPTIONS) tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; else tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; /* * Process options if not in LISTEN state, * else do it below (after getting remote address). */ if (optp && tp->t_state != TCPS_LISTEN) tcp_dooptions(tp, (u_char *)optp, optlen, ti); /* * Header prediction: check for the two common cases * of a uni-directional data xfer. If the packet has * no control flags, is in-sequence, the window didn't * change and we're not retransmitting, it's a * candidate. If the length is zero and the ack moved * forward, we're the sender side of the xfer. Just * free the data acked & wake any higher level process * that was blocked waiting for space. If the length * is non-zero and the ack didn't move, we're the * receiver side. If we're getting packets in-order * (the reassembly queue is empty), add the data to * the socket buffer and note that we need a delayed ack. * * XXX Some of these tests are not needed * eg: the tiwin == tp->snd_wnd prevents many more * predictions.. with no *real* advantage.. */ if (tp->t_state == TCPS_ESTABLISHED && (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK && ti->ti_seq == tp->rcv_nxt && tiwin && tiwin == tp->snd_wnd && tp->snd_nxt == tp->snd_max) { if (ti->ti_len == 0) { if (SEQ_GT(ti->ti_ack, tp->snd_una) && SEQ_LEQ(ti->ti_ack, tp->snd_max) && tp->snd_cwnd >= tp->snd_wnd) { /* * this is a pure ack for outstanding data. */ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) tcp_xmit_timer(tp, tp->t_rtt); acked = ti->ti_ack - tp->snd_una; sbdrop(&so->so_snd, acked); tp->snd_una = ti->ti_ack; m_free(m); /* * If all outstanding data are acked, stop * retransmit timer, otherwise restart timer * using current (possibly backed-off) value. * If process is waiting for space, * wakeup/selwakeup/signal. If data * are ready to send, let tcp_output * decide between more output or persist. */ if (tp->snd_una == tp->snd_max) tp->t_timer[TCPT_REXMT] = 0; else if (tp->t_timer[TCPT_PERSIST] == 0) tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; /* * This is called because sowwakeup might have * put data into so_snd. Since we don't so sowwakeup, * we don't need this.. XXX??? */ if (so->so_snd.sb_cc) (void) tcp_output(tp); return; } } else if (ti->ti_ack == tp->snd_una && tcpfrag_list_empty(tp) && ti->ti_len <= sbspace(&so->so_rcv)) { /* * this is a pure, in-sequence data packet * with nothing on the reassembly queue and * we have enough buffer space to take it. */ tp->rcv_nxt += ti->ti_len; /* * Add data to socket buffer. */ if (so->so_emu) { if (tcp_emu(so,m)) sbappend(so, m); } else sbappend(so, m); /* * If this is a short packet, then ACK now - with Nagel * congestion avoidance sender won't send more until * he gets an ACK. * * It is better to not delay acks at all to maximize * TCP throughput. See RFC 2581. */ tp->t_flags |= TF_ACKNOW; tcp_output(tp); return; } } /* header prediction */ /* * Calculate amount of space in receive window, * and then do TCP input processing. * Receive window is amount of space in rcv queue, * but not less than advertised window. */ { int win; win = sbspace(&so->so_rcv); if (win < 0) win = 0; tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt)); } switch (tp->t_state) { /* * If the state is LISTEN then ignore segment if it contains an RST. * If the segment contains an ACK then it is bad and send a RST. * If it does not contain a SYN then it is not interesting; drop it. * Don't bother responding if the destination was a broadcast. * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial * tp->iss, and send a segment: * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. * Fill in remote peer address fields if not previously specified. * Enter SYN_RECEIVED state, and process any other fields of this * segment in this state. */ case TCPS_LISTEN: { if (tiflags & TH_RST) goto drop; if (tiflags & TH_ACK) goto dropwithreset; if ((tiflags & TH_SYN) == 0) goto drop; /* * This has way too many gotos... * But a bit of spaghetti code never hurt anybody :) */ /* * If this is destined for the control address, then flag to * tcp_ctl once connected, otherwise connect */ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == slirp->vnetwork_addr.s_addr) { if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr && so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) { /* May be an add exec */ for (ex_ptr = slirp->exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { if(ex_ptr->ex_fport == so->so_fport && so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { so->so_state |= SS_CTL; break; } } if (so->so_state & SS_CTL) { goto cont_input; } } /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */ } if (so->so_emu & EMU_NOCONNECT) { so->so_emu &= ~EMU_NOCONNECT; goto cont_input; } if ((tcp_fconnect(so) == -1) && #if defined(_WIN32) socket_error() != WSAEWOULDBLOCK #else (errno != EINPROGRESS) && (errno != EWOULDBLOCK) #endif ) { u_char code=ICMP_UNREACH_NET; DEBUG_MISC((dfd, " tcp fconnect errno = %d-%s\n", errno,strerror(errno))); if(errno == ECONNREFUSED) { /* ACK the SYN, send RST to refuse the connection */ tcp_respond(tp, ti, m, ti->ti_seq+1, (tcp_seq)0, TH_RST|TH_ACK); } else { if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST; HTONL(ti->ti_seq); /* restore tcp header */ HTONL(ti->ti_ack); HTONS(ti->ti_win); HTONS(ti->ti_urp); m->m_data -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); m->m_len += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); *ip=save_ip; icmp_error(m, ICMP_UNREACH,code, 0,strerror(errno)); } tcp_close(tp); m_free(m); } else { /* * Haven't connected yet, save the current mbuf * and ti, and return * XXX Some OS's don't tell us whether the connect() * succeeded or not. So we must time it out. */ so->so_m = m; so->so_ti = ti; tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; tp->t_state = TCPS_SYN_RECEIVED; tcp_template(tp); } return; cont_conn: /* m==NULL * Check if the connect succeeded */ if (so->so_state & SS_NOFDREF) { tp = tcp_close(tp); goto dropwithreset; } cont_input: tcp_template(tp); if (optp) tcp_dooptions(tp, (u_char *)optp, optlen, ti); if (iss) tp->iss = iss; else tp->iss = slirp->tcp_iss; slirp->tcp_iss += TCP_ISSINCR/2; tp->irs = ti->ti_seq; tcp_sendseqinit(tp); tcp_rcvseqinit(tp); tp->t_flags |= TF_ACKNOW; tp->t_state = TCPS_SYN_RECEIVED; tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; goto trimthenstep6; } /* case TCPS_LISTEN */ /* * If the state is SYN_SENT: * if seg contains an ACK, but not for our SYN, drop the input. * if seg contains a RST, then drop the connection. * if seg does not contain SYN, then drop it. * Otherwise this is an acceptable SYN segment * initialize tp->rcv_nxt and tp->irs * if seg contains ack then advance tp->snd_una * if SYN has been acked change to ESTABLISHED else SYN_RCVD state * arrange for segment to be acked (eventually) * continue processing rest of data/controls, beginning with URG */ case TCPS_SYN_SENT: if ((tiflags & TH_ACK) && (SEQ_LEQ(ti->ti_ack, tp->iss) || SEQ_GT(ti->ti_ack, tp->snd_max))) goto dropwithreset; if (tiflags & TH_RST) { if (tiflags & TH_ACK) { tcp_drop(tp, 0); /* XXX Check t_softerror! */ } goto drop; } if ((tiflags & TH_SYN) == 0) goto drop; if (tiflags & TH_ACK) { tp->snd_una = ti->ti_ack; if (SEQ_LT(tp->snd_nxt, tp->snd_una)) tp->snd_nxt = tp->snd_una; } tp->t_timer[TCPT_REXMT] = 0; tp->irs = ti->ti_seq; tcp_rcvseqinit(tp); tp->t_flags |= TF_ACKNOW; if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { soisfconnected(so); tp->t_state = TCPS_ESTABLISHED; (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); /* * if we didn't have to retransmit the SYN, * use its rtt as our initial srtt & rtt var. */ if (tp->t_rtt) tcp_xmit_timer(tp, tp->t_rtt); } else tp->t_state = TCPS_SYN_RECEIVED; trimthenstep6: /* * Advance ti->ti_seq to correspond to first data byte. * If data, trim to stay within window, * dropping FIN if necessary. */ ti->ti_seq++; if (ti->ti_len > tp->rcv_wnd) { todrop = ti->ti_len - tp->rcv_wnd; m_adj(m, -todrop); ti->ti_len = tp->rcv_wnd; tiflags &= ~TH_FIN; } tp->snd_wl1 = ti->ti_seq - 1; tp->rcv_up = ti->ti_seq; goto step6; } /* switch tp->t_state */ /* * States other than LISTEN or SYN_SENT. * Check that at least some bytes of segment are within * receive window. If segment begins before rcv_nxt, * drop leading data (and SYN); if nothing left, just ack. */ todrop = tp->rcv_nxt - ti->ti_seq; if (todrop > 0) { if (tiflags & TH_SYN) { tiflags &= ~TH_SYN; ti->ti_seq++; if (ti->ti_urp > 1) ti->ti_urp--; else tiflags &= ~TH_URG; todrop--; } /* * Following if statement from Stevens, vol. 2, p. 960. */ if (todrop > ti->ti_len || (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) { /* * Any valid FIN must be to the left of the window. * At this point the FIN must be a duplicate or out * of sequence; drop it. */ tiflags &= ~TH_FIN; /* * Send an ACK to resynchronize and drop any data. * But keep on processing for RST or ACK. */ tp->t_flags |= TF_ACKNOW; todrop = ti->ti_len; } m_adj(m, todrop); ti->ti_seq += todrop; ti->ti_len -= todrop; if (ti->ti_urp > todrop) ti->ti_urp -= todrop; else { tiflags &= ~TH_URG; ti->ti_urp = 0; } } /* * If new data are received on a connection after the * user processes are gone, then RST the other end. */ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) { tp = tcp_close(tp); goto dropwithreset; } /* * If segment ends after window, drop trailing data * (and PUSH and FIN); if nothing left, just ACK. */ todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd); if (todrop > 0) { if (todrop >= ti->ti_len) { /* * If a new connection request is received * while in TIME_WAIT, drop the old connection * and start over if the sequence numbers * are above the previous ones. */ if (tiflags & TH_SYN && tp->t_state == TCPS_TIME_WAIT && SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { iss = tp->rcv_nxt + TCP_ISSINCR; tp = tcp_close(tp); goto findso; } /* * If window is closed can only take segments at * window edge, and have to drop data and PUSH from * incoming segments. Continue processing, but * remember to ack. Otherwise, drop segment * and ack. */ if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { tp->t_flags |= TF_ACKNOW; } else { goto dropafterack; } } m_adj(m, -todrop); ti->ti_len -= todrop; tiflags &= ~(TH_PUSH|TH_FIN); } /* * If the RST bit is set examine the state: * SYN_RECEIVED STATE: * If passive open, return to LISTEN state. * If active open, inform user that connection was refused. * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: * Inform user that connection was reset, and close tcb. * CLOSING, LAST_ACK, TIME_WAIT STATES * Close the tcb. */ if (tiflags&TH_RST) switch (tp->t_state) { case TCPS_SYN_RECEIVED: case TCPS_ESTABLISHED: case TCPS_FIN_WAIT_1: case TCPS_FIN_WAIT_2: case TCPS_CLOSE_WAIT: tp->t_state = TCPS_CLOSED; tcp_close(tp); goto drop; case TCPS_CLOSING: case TCPS_LAST_ACK: case TCPS_TIME_WAIT: tcp_close(tp); goto drop; } /* * If a SYN is in the window, then this is an * error and we send an RST and drop the connection. */ if (tiflags & TH_SYN) { tp = tcp_drop(tp,0); goto dropwithreset; } /* * If the ACK bit is off we drop the segment and return. */ if ((tiflags & TH_ACK) == 0) goto drop; /* * Ack processing. */ switch (tp->t_state) { /* * In SYN_RECEIVED state if the ack ACKs our SYN then enter * ESTABLISHED state and continue processing, otherwise * send an RST. una<=ack<=max */ case TCPS_SYN_RECEIVED: if (SEQ_GT(tp->snd_una, ti->ti_ack) || SEQ_GT(ti->ti_ack, tp->snd_max)) goto dropwithreset; tp->t_state = TCPS_ESTABLISHED; /* * The sent SYN is ack'ed with our sequence number +1 * The first data byte already in the buffer will get * lost if no correction is made. This is only needed for * SS_CTL since the buffer is empty otherwise. * tp->snd_una++; or: */ tp->snd_una=ti->ti_ack; if (so->so_state & SS_CTL) { /* So tcp_ctl reports the right state */ ret = tcp_ctl(so); if (ret == 1) { soisfconnected(so); so->so_state &= ~SS_CTL; /* success XXX */ } else if (ret == 2) { so->so_state &= SS_PERSISTENT_MASK; so->so_state |= SS_NOFDREF; /* CTL_CMD */ } else { needoutput = 1; tp->t_state = TCPS_FIN_WAIT_1; } } else { soisfconnected(so); } (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); tp->snd_wl1 = ti->ti_seq - 1; /* Avoid ack processing; snd_una==ti_ack => dup ack */ goto synrx_to_est; /* fall into ... */ /* * In ESTABLISHED state: drop duplicate ACKs; ACK out of range * ACKs. If the ack is in the range * tp->snd_una < ti->ti_ack <= tp->snd_max * then advance tp->snd_una to ti->ti_ack and drop * data from the retransmission queue. If this ACK reflects * more up to date window information we update our window information. */ case TCPS_ESTABLISHED: case TCPS_FIN_WAIT_1: case TCPS_FIN_WAIT_2: case TCPS_CLOSE_WAIT: case TCPS_CLOSING: case TCPS_LAST_ACK: case TCPS_TIME_WAIT: if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { DEBUG_MISC((dfd, " dup ack m = %lx so = %lx\n", (long )m, (long )so)); /* * If we have outstanding data (other than * a window probe), this is a completely * duplicate ack (ie, window info didn't * change), the ack is the biggest we've * seen and we've seen exactly our rexmt * threshold of them, assume a packet * has been dropped and retransmit it. * Kludge snd_nxt & the congestion * window so we send only this one * packet. * * We know we're losing at the current * window size so do congestion avoidance * (set ssthresh to half the current window * and pull our congestion window back to * the new ssthresh). * * Dup acks mean that packets have left the * network (they're now cached at the receiver) * so bump cwnd by the amount in the receiver * to keep a constant cwnd packets in the * network. */ if (tp->t_timer[TCPT_REXMT] == 0 || ti->ti_ack != tp->snd_una) tp->t_dupacks = 0; else if (++tp->t_dupacks == TCPREXMTTHRESH) { tcp_seq onxt = tp->snd_nxt; u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; if (win < 2) win = 2; tp->snd_ssthresh = win * tp->t_maxseg; tp->t_timer[TCPT_REXMT] = 0; tp->t_rtt = 0; tp->snd_nxt = ti->ti_ack; tp->snd_cwnd = tp->t_maxseg; (void) tcp_output(tp); tp->snd_cwnd = tp->snd_ssthresh + tp->t_maxseg * tp->t_dupacks; if (SEQ_GT(onxt, tp->snd_nxt)) tp->snd_nxt = onxt; goto drop; } else if (tp->t_dupacks > TCPREXMTTHRESH) { tp->snd_cwnd += tp->t_maxseg; (void) tcp_output(tp); goto drop; } } else tp->t_dupacks = 0; break; } synrx_to_est: /* * If the congestion window was inflated to account * for the other side's cached packets, retract it. */ if (tp->t_dupacks > TCPREXMTTHRESH && tp->snd_cwnd > tp->snd_ssthresh) tp->snd_cwnd = tp->snd_ssthresh; tp->t_dupacks = 0; if (SEQ_GT(ti->ti_ack, tp->snd_max)) { goto dropafterack; } acked = ti->ti_ack - tp->snd_una; /* * If transmit timer is running and timed sequence * number was acked, update smoothed round trip time. * Since we now have an rtt measurement, cancel the * timer backoff (cf., Phil Karn's retransmit alg.). * Recompute the initial retransmit timer. */ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) tcp_xmit_timer(tp,tp->t_rtt); /* * If all outstanding data is acked, stop retransmit * timer and remember to restart (more output or persist). * If there is more data to be acked, restart retransmit * timer, using current (possibly backed-off) value. */ if (ti->ti_ack == tp->snd_max) { tp->t_timer[TCPT_REXMT] = 0; needoutput = 1; } else if (tp->t_timer[TCPT_PERSIST] == 0) tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; /* * When new data is acked, open the congestion window. * If the window gives us less than ssthresh packets * in flight, open exponentially (maxseg per packet). * Otherwise open linearly: maxseg per window * (maxseg^2 / cwnd per packet). */ { register u_int cw = tp->snd_cwnd; register u_int incr = tp->t_maxseg; if (cw > tp->snd_ssthresh) incr = incr * incr / cw; tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale); } if (acked > so->so_snd.sb_cc) { tp->snd_wnd -= so->so_snd.sb_cc; sbdrop(&so->so_snd, (int )so->so_snd.sb_cc); ourfinisacked = 1; } else { sbdrop(&so->so_snd, acked); tp->snd_wnd -= acked; ourfinisacked = 0; } tp->snd_una = ti->ti_ack; if (SEQ_LT(tp->snd_nxt, tp->snd_una)) tp->snd_nxt = tp->snd_una; switch (tp->t_state) { /* * In FIN_WAIT_1 STATE in addition to the processing * for the ESTABLISHED state if our FIN is now acknowledged * then enter FIN_WAIT_2. */ case TCPS_FIN_WAIT_1: if (ourfinisacked) { /* * If we can't receive any more * data, then closing user can proceed. * Starting the timer is contrary to the * specification, but if we don't get a FIN * we'll hang forever. */ if (so->so_state & SS_FCANTRCVMORE) { tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE; } tp->t_state = TCPS_FIN_WAIT_2; } break; /* * In CLOSING STATE in addition to the processing for * the ESTABLISHED state if the ACK acknowledges our FIN * then enter the TIME-WAIT state, otherwise ignore * the segment. */ case TCPS_CLOSING: if (ourfinisacked) { tp->t_state = TCPS_TIME_WAIT; tcp_canceltimers(tp); tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; } break; /* * In LAST_ACK, we may still be waiting for data to drain * and/or to be acked, as well as for the ack of our FIN. * If our FIN is now acknowledged, delete the TCB, * enter the closed state and return. */ case TCPS_LAST_ACK: if (ourfinisacked) { tcp_close(tp); goto drop; } break; /* * In TIME_WAIT state the only thing that should arrive * is a retransmission of the remote FIN. Acknowledge * it and restart the finack timer. */ case TCPS_TIME_WAIT: tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; goto dropafterack; } } /* switch(tp->t_state) */ step6: /* * Update window information. * Don't look at window if no ACK: TAC's send garbage on first SYN. */ if ((tiflags & TH_ACK) && (SEQ_LT(tp->snd_wl1, ti->ti_seq) || (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) || (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) { tp->snd_wnd = tiwin; tp->snd_wl1 = ti->ti_seq; tp->snd_wl2 = ti->ti_ack; if (tp->snd_wnd > tp->max_sndwnd) tp->max_sndwnd = tp->snd_wnd; needoutput = 1; } /* * Process segments with URG. */ if ((tiflags & TH_URG) && ti->ti_urp && TCPS_HAVERCVDFIN(tp->t_state) == 0) { /* * This is a kludge, but if we receive and accept * random urgent pointers, we'll crash in * soreceive. It's hard to imagine someone * actually wanting to send this much urgent data. */ if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) { ti->ti_urp = 0; tiflags &= ~TH_URG; goto dodata; } /* * If this segment advances the known urgent pointer, * then mark the data stream. This should not happen * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since * a FIN has been received from the remote side. * In these states we ignore the URG. * * According to RFC961 (Assigned Protocols), * the urgent pointer points to the last octet * of urgent data. We continue, however, * to consider it to indicate the first octet * of data past the urgent section as the original * spec states (in one of two places). */ if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) { tp->rcv_up = ti->ti_seq + ti->ti_urp; so->so_urgc = so->so_rcv.sb_cc + (tp->rcv_up - tp->rcv_nxt); /* -1; */ tp->rcv_up = ti->ti_seq + ti->ti_urp; } } else /* * If no out of band data is expected, * pull receive urgent pointer along * with the receive window. */ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) tp->rcv_up = tp->rcv_nxt; dodata: /* * If this is a small packet, then ACK now - with Nagel * congestion avoidance sender won't send more until * he gets an ACK. */ if (ti->ti_len && (unsigned)ti->ti_len <= 5 && ((struct tcpiphdr_2 *)ti)->first_char == (char)27) { tp->t_flags |= TF_ACKNOW; } /* * Process the segment text, merging it into the TCP sequencing queue, * and arranging for acknowledgment of receipt if necessary. * This process logically involves adjusting tp->rcv_wnd as data * is presented to the user (this happens in tcp_usrreq.c, * case PRU_RCVD). If a FIN has already been received on this * connection then we just ignore the text. */ if ((ti->ti_len || (tiflags&TH_FIN)) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { TCP_REASS(tp, ti, m, so, tiflags); } else { m_free(m); tiflags &= ~TH_FIN; } /* * If FIN is received ACK the FIN and let the user know * that the connection is closing. */ if (tiflags & TH_FIN) { if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { /* * If we receive a FIN we can't send more data, * set it SS_FDRAIN * Shutdown the socket if there is no rx data in the * buffer. * soread() is called on completion of shutdown() and * will got to TCPS_LAST_ACK, and use tcp_output() * to send the FIN. */ sofwdrain(so); tp->t_flags |= TF_ACKNOW; tp->rcv_nxt++; } switch (tp->t_state) { /* * In SYN_RECEIVED and ESTABLISHED STATES * enter the CLOSE_WAIT state. */ case TCPS_SYN_RECEIVED: case TCPS_ESTABLISHED: if(so->so_emu == EMU_CTL) /* no shutdown on socket */ tp->t_state = TCPS_LAST_ACK; else tp->t_state = TCPS_CLOSE_WAIT; break; /* * If still in FIN_WAIT_1 STATE FIN has not been acked so * enter the CLOSING state. */ case TCPS_FIN_WAIT_1: tp->t_state = TCPS_CLOSING; break; /* * In FIN_WAIT_2 state enter the TIME_WAIT state, * starting the time-wait timer, turning off the other * standard timers. */ case TCPS_FIN_WAIT_2: tp->t_state = TCPS_TIME_WAIT; tcp_canceltimers(tp); tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; break; /* * In TIME_WAIT state restart the 2 MSL time_wait timer. */ case TCPS_TIME_WAIT: tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; break; } } /* * Return any desired output. */ if (needoutput || (tp->t_flags & TF_ACKNOW)) { (void) tcp_output(tp); } return; dropafterack: /* * Generate an ACK dropping incoming segment if it occupies * sequence space, where the ACK reflects our state. */ if (tiflags & TH_RST) goto drop; m_free(m); tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); return; dropwithreset: /* reuses m if m!=NULL, m_free() unnecessary */ if (tiflags & TH_ACK) tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST); else { if (tiflags & TH_SYN) ti->ti_len++; tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0, TH_RST|TH_ACK); } return; drop: /* * Drop space held by incoming segment and return. */ m_free(m); }
void tcp_cleanup(Slirp *slirp) { while (slirp->tcb.so_next != &slirp->tcb) { tcp_close(sototcpcb(slirp->tcb.so_next)); } }
/* * Write data from so_rcv to so's socket, * updating all sbuf field as necessary */ int sowrite(struct socket *so) { int n,nn; struct sbuf *sb = &so->so_rcv; int len = sb->sb_cc; struct iovec iov[2]; DEBUG_CALL("sowrite"); DEBUG_ARG("so = %lx", (long)so); if (so->so_urgc) { sosendoob(so); if (sb->sb_cc == 0) return 0; } /* * No need to check if there's something to write, * sowrite wouldn't have been called otherwise */ iov[0].iov_base = sb->sb_rptr; iov[1].iov_base = NULL; iov[1].iov_len = 0; if (sb->sb_rptr < sb->sb_wptr) { iov[0].iov_len = sb->sb_wptr - sb->sb_rptr; /* Should never succeed, but... */ if (iov[0].iov_len > len) iov[0].iov_len = len; n = 1; } else { iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; if (iov[0].iov_len > len) iov[0].iov_len = len; len -= iov[0].iov_len; if (len) { iov[1].iov_base = sb->sb_data; iov[1].iov_len = sb->sb_wptr - sb->sb_data; if (iov[1].iov_len > len) iov[1].iov_len = len; n = 2; } else n = 1; } /* Check if there's urgent data to send, and if so, send it */ #ifdef HAVE_READV nn = writev(so->s, (const struct iovec *)iov, n); DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn)); #else nn = slirp_send(so, iov[0].iov_base, iov[0].iov_len,0); #endif /* This should never happen, but people tell me it does *shrug* */ if (nn < 0 && (errno == EAGAIN || errno == EINTR)) return 0; if (nn <= 0) { DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n", so->so_state, errno)); sofcantsendmore(so); tcp_sockclosed(sototcpcb(so)); return -1; } #ifndef HAVE_READV if (n == 2 && nn == iov[0].iov_len) { int ret; ret = slirp_send(so, iov[1].iov_base, iov[1].iov_len,0); if (ret > 0) nn += ret; } DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn)); #endif /* Update sbuf */ sb->sb_cc -= nn; sb->sb_rptr += nn; if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) sb->sb_rptr -= sb->sb_datalen; /* * If in DRAIN mode, and there's no more data, set * it CANTSENDMORE */ if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0) sofcantsendmore(so); return nn; }
/* * Read from so's socket into sb_snd, updating all relevant sbuf fields * NOTE: This will only be called if it is select()ed for reading, so * a read() of 0 (or less) means it's disconnected */ int soread(struct socket *so) { int n, nn; struct sbuf *sb = &so->so_snd; struct iovec iov[2]; DEBUG_CALL("soread"); DEBUG_ARG("so = %p", so); /* * No need to check if there's enough room to read. * soread wouldn't have been called if there weren't */ sopreprbuf(so, iov, &n); #ifdef HAVE_READV nn = readv(so->s, (struct iovec *)iov, n); DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn)); #else nn = qemu_recv(so->s, iov[0].iov_base, iov[0].iov_len,0); #endif if (nn <= 0) { if (nn < 0 && (errno == EINTR || errno == EAGAIN)) return 0; else { int err; socklen_t slen = sizeof err; err = errno; if (nn == 0) { getsockopt(so->s, SOL_SOCKET, SO_ERROR, &err, &slen); } DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n", nn, errno,strerror(errno))); sofcantrcvmore(so); if (err == ECONNRESET || err == ECONNREFUSED || err == ENOTCONN || err == EPIPE) { tcp_drop(sototcpcb(so), err); } else { tcp_sockclosed(sototcpcb(so)); } return -1; } } #ifndef HAVE_READV /* * If there was no error, try and read the second time round * We read again if n = 2 (ie, there's another part of the buffer) * and we read as much as we could in the first read * We don't test for <= 0 this time, because there legitimately * might not be any more data (since the socket is non-blocking), * a close will be detected on next iteration. * A return of -1 won't (shouldn't) happen, since it didn't happen above */ if (n == 2 && nn == iov[0].iov_len) { int ret; ret = qemu_recv(so->s, iov[1].iov_base, iov[1].iov_len,0); if (ret > 0) nn += ret; } DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn)); #endif /* Update fields */ sb->sb_cc += nn; sb->sb_wptr += nn; if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) sb->sb_wptr -= sb->sb_datalen; return nn; }