void tcp_timer_rexmt(void * xtp) { struct tcpcb *tp = xtp; CURVNET_SET(tp->t_vnet); int rexmt; int headlocked; struct inpcb *inp; #ifdef TCPDEBUG int ostate; ostate = tp->t_state; #endif INP_INFO_RLOCK(&V_tcbinfo); inp = tp->t_inpcb; /* * XXXRW: While this assert is in fact correct, bugs in the tcpcb * tear-down mean we need it as a work-around for races between * timers and tcp_discardcb(). * * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL")); */ if (inp == NULL) { tcp_timer_race++; INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } INP_WLOCK(inp); if (callout_pending(&tp->t_timers->tt_rexmt) || !callout_active(&tp->t_timers->tt_rexmt)) { INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } callout_deactivate(&tp->t_timers->tt_rexmt); if ((inp->inp_flags & INP_DROPPED) != 0) { INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } tcp_free_sackholes(tp); /* * Retransmission timer went off. Message has not * been acked within retransmit interval. Back off * to a longer retransmit interval and retransmit one segment. */ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { tp->t_rxtshift = TCP_MAXRXTSHIFT; TCPSTAT_INC(tcps_timeoutdrop); in_pcbref(inp); INP_INFO_RUNLOCK(&V_tcbinfo); INP_WUNLOCK(inp); INP_INFO_WLOCK(&V_tcbinfo); INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) { INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } tp = tcp_drop(tp, tp->t_softerror ? tp->t_softerror : ETIMEDOUT); headlocked = 1; goto out; } INP_INFO_RUNLOCK(&V_tcbinfo); headlocked = 0; if (tp->t_state == TCPS_SYN_SENT) { /* * If the SYN was retransmitted, indicate CWND to be * limited to 1 segment in cc_conn_init(). */ tp->snd_cwnd = 1; } else if (tp->t_rxtshift == 1) { /* * first retransmit; record ssthresh and cwnd so they can * be recovered if this turns out to be a "bad" retransmit. * A retransmit is considered "bad" if an ACK for this * segment is received within RTT/2 interval; the assumption * here is that the ACK was already in flight. See * "On Estimating End-to-End Network Path Properties" by * Allman and Paxson for more details. */ tp->snd_cwnd_prev = tp->snd_cwnd; tp->snd_ssthresh_prev = tp->snd_ssthresh; tp->snd_recover_prev = tp->snd_recover; if (IN_FASTRECOVERY(tp->t_flags)) tp->t_flags |= TF_WASFRECOVERY; else tp->t_flags &= ~TF_WASFRECOVERY; if (IN_CONGRECOVERY(tp->t_flags)) tp->t_flags |= TF_WASCRECOVERY; else tp->t_flags &= ~TF_WASCRECOVERY; tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); tp->t_flags |= TF_PREVVALID; } else
void tcp_timer_persist(void *xtp) { struct tcpcb *tp = xtp; struct inpcb *inp; CURVNET_SET(tp->t_vnet); #ifdef TCPDEBUG int ostate; ostate = tp->t_state; #endif INP_INFO_WLOCK(&V_tcbinfo); inp = tp->t_inpcb; /* * XXXRW: While this assert is in fact correct, bugs in the tcpcb * tear-down mean we need it as a work-around for races between * timers and tcp_discardcb(). * * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL")); */ if (inp == NULL) { tcp_timer_race++; INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } INP_WLOCK(inp); if (callout_pending(&tp->t_timers->tt_persist) || !callout_active(&tp->t_timers->tt_persist)) { INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } callout_deactivate(&tp->t_timers->tt_persist); if ((inp->inp_flags & INP_DROPPED) != 0) { INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } /* * Persistance timer into zero window. * Force a byte to be output, if possible. */ TCPSTAT_INC(tcps_persisttimeo); /* * Hack: if the peer is dead/unreachable, we do not * time out if the window is closed. After a full * backoff, drop the connection if the idle time * (no responses to probes) reaches the maximum * backoff that we would use if retransmitting. */ if (tp->t_rxtshift == TCP_MAXRXTSHIFT && (ticks - tp->t_rcvtime >= tcp_maxpersistidle || ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { TCPSTAT_INC(tcps_persistdrop); tp = tcp_drop(tp, ETIMEDOUT); goto out; } /* * If the user has closed the socket then drop a persisting * connection after a much reduced timeout. */ if (tp->t_state > TCPS_CLOSE_WAIT && (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { TCPSTAT_INC(tcps_persistdrop); tp = tcp_drop(tp, ETIMEDOUT); goto out; } tcp_setpersist(tp); tp->t_flags |= TF_FORCEDATA; (void) tcp_output(tp); tp->t_flags &= ~TF_FORCEDATA; out: #ifdef TCPDEBUG if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); #endif if (tp != NULL) INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); }
/* * Read from so's socket into sb_snd, updating all relevant sbuf fields * NOTE: This will only be called if it is select()ed for reading, so * a read() of 0 (or less) means it's disconnected */ int soread(PNATState pData, struct socket *so) { int n, nn, lss, total; struct sbuf *sb = &so->so_snd; size_t len = sb->sb_datalen - sb->sb_cc; struct iovec iov[2]; int mss = so->so_tcpcb->t_maxseg; STAM_PROFILE_START(&pData->StatIOread, a); STAM_COUNTER_RESET(&pData->StatIORead_in_1); STAM_COUNTER_RESET(&pData->StatIORead_in_2); QSOCKET_LOCK(tcb); SOCKET_LOCK(so); QSOCKET_UNLOCK(tcb); LogFlow(("soread: so = %R[natsock]\n", so)); Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb)); /* * No need to check if there's enough room to read. * soread wouldn't have been called if there weren't */ len = sb->sb_datalen - sb->sb_cc; iov[0].iov_base = sb->sb_wptr; iov[1].iov_base = 0; iov[1].iov_len = 0; if (sb->sb_wptr < sb->sb_rptr) { iov[0].iov_len = sb->sb_rptr - sb->sb_wptr; /* Should never succeed, but... */ if (iov[0].iov_len > len) iov[0].iov_len = len; if (iov[0].iov_len > mss) iov[0].iov_len -= iov[0].iov_len%mss; n = 1; } else { iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr; /* Should never succeed, but... */ if (iov[0].iov_len > len) iov[0].iov_len = len; len -= iov[0].iov_len; if (len) { iov[1].iov_base = sb->sb_data; iov[1].iov_len = sb->sb_rptr - sb->sb_data; if (iov[1].iov_len > len) iov[1].iov_len = len; total = iov[0].iov_len + iov[1].iov_len; if (total > mss) { lss = total % mss; if (iov[1].iov_len > lss) { iov[1].iov_len -= lss; n = 2; } else { lss -= iov[1].iov_len; iov[0].iov_len -= lss; n = 1; } } else n = 2; } else { if (iov[0].iov_len > mss) iov[0].iov_len -= iov[0].iov_len%mss; n = 1; } } #ifdef HAVE_READV nn = readv(so->s, (struct iovec *)iov, n); #else nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, (so->so_tcpcb->t_force? MSG_OOB:0)); #endif Log2(("%s: read(1) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn)); Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb)); if (nn <= 0) { /* * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that * _could_ mean that the connection is closed. But we will receive an * FD_CLOSE event later if the connection was _really_ closed. With * www.youtube.com I see this very often. Closing the socket too early * would be dangerous. */ int status; unsigned long pending = 0; status = ioctlsocket(so->s, FIONREAD, &pending); if (status < 0) Log(("NAT:%s: error in WSAIoctl: %d\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, errno)); if (nn == 0 && (pending != 0)) { SOCKET_UNLOCK(so); STAM_PROFILE_STOP(&pData->StatIOread, a); return 0; } if ( nn < 0 && soIgnorableErrorCode(errno)) { SOCKET_UNLOCK(so); STAM_PROFILE_STOP(&pData->StatIOread, a); return 0; } else { int fUninitiolizedTemplate = 0; fUninitiolizedTemplate = RT_BOOL(( sototcpcb(so) && ( sototcpcb(so)->t_template.ti_src.s_addr == INADDR_ANY || sototcpcb(so)->t_template.ti_dst.s_addr == INADDR_ANY))); /* nn == 0 means peer has performed an orderly shutdown */ Log2(("%s: disconnected, nn = %d, errno = %d (%s)\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, errno, strerror(errno))); sofcantrcvmore(so); if (!fUninitiolizedTemplate) tcp_sockclosed(pData, sototcpcb(so)); else tcp_drop(pData, sototcpcb(so), errno); SOCKET_UNLOCK(so); STAM_PROFILE_STOP(&pData->StatIOread, a); return -1; } } STAM_STATS( if (n == 1) { STAM_COUNTER_INC(&pData->StatIORead_in_1); STAM_COUNTER_ADD(&pData->StatIORead_in_1_bytes, nn); } else { STAM_COUNTER_INC(&pData->StatIORead_in_2); STAM_COUNTER_ADD(&pData->StatIORead_in_2_1st_bytes, nn); } );
void tcp_timer_keep(void *xtp) { struct tcpcb *tp = xtp; struct tcptemp *t_template; struct inpcb *inp; CURVNET_SET(tp->t_vnet); #ifdef TCPDEBUG int ostate; ostate = tp->t_state; #endif INP_INFO_WLOCK(&V_tcbinfo); inp = tp->t_inpcb; /* * XXXRW: While this assert is in fact correct, bugs in the tcpcb * tear-down mean we need it as a work-around for races between * timers and tcp_discardcb(). * * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL")); */ if (inp == NULL) { tcp_timer_race++; INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } INP_WLOCK(inp); if (callout_pending(&tp->t_timers->tt_keep) || !callout_active(&tp->t_timers->tt_keep)) { INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } callout_deactivate(&tp->t_timers->tt_keep); if ((inp->inp_flags & INP_DROPPED) != 0) { INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } /* * Keep-alive timer went off; send something * or drop connection if idle for too long. */ TCPSTAT_INC(tcps_keeptimeo); if (tp->t_state < TCPS_ESTABLISHED) goto dropit; if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && tp->t_state <= TCPS_CLOSING) { if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp)) goto dropit; /* * Send a packet designed to force a response * if the peer is up and reachable: * either an ACK if the connection is still alive, * or an RST if the peer has closed the connection * due to timeout or reboot. * Using sequence number tp->snd_una-1 * causes the transmitted zero-length segment * to lie outside the receive window; * by the protocol spec, this requires the * correspondent TCP to respond. */ TCPSTAT_INC(tcps_keepprobe); t_template = tcpip_maketemplate(inp); if (t_template) { tcp_respond(tp, t_template->tt_ipgen, &t_template->tt_t, (struct mbuf *)NULL, tp->rcv_nxt, tp->snd_una - 1, 0); free(t_template, M_TEMP); } callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp), tcp_timer_keep, tp, INP_CPU(inp)); } else callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp), tcp_timer_keep, tp, INP_CPU(inp)); #ifdef TCPDEBUG if (inp->inp_socket->so_options & SO_DEBUG) tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, PRU_SLOWTIMO); #endif INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; dropit: TCPSTAT_INC(tcps_keepdrops); tp = tcp_drop(tp, ETIMEDOUT); #ifdef TCPDEBUG if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, PRU_SLOWTIMO); #endif if (tp != NULL) INP_WUNLOCK(tp->t_inpcb); INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); }
/* Caller should be in critical section */ static struct tcpcb * tcp_timer_keep_handler(struct tcpcb *tp) { struct tcptemp *t_template; #ifdef TCPDEBUG int ostate = tp->t_state; #endif /* * Keep-alive timer went off; send something * or drop connection if idle for too long. */ tcpstat.tcps_keeptimeo++; if (tp->t_state < TCPS_ESTABLISHED) goto dropit; if ((always_keepalive || (tp->t_flags & TF_KEEPALIVE) || (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE)) && tp->t_state <= TCPS_CLOSING) { if ((ticks - tp->t_rcvtime) >= tp->t_keepidle + tp->t_maxidle) goto dropit; /* * Send a packet designed to force a response * if the peer is up and reachable: * either an ACK if the connection is still alive, * or an RST if the peer has closed the connection * due to timeout or reboot. * Using sequence number tp->snd_una-1 * causes the transmitted zero-length segment * to lie outside the receive window; * by the protocol spec, this requires the * correspondent TCP to respond. */ tcpstat.tcps_keepprobe++; t_template = tcp_maketemplate(tp); if (t_template) { tcp_respond(tp, t_template->tt_ipgen, &t_template->tt_t, NULL, tp->rcv_nxt, tp->snd_una - 1, 0); tcp_freetemplate(t_template); } tcp_callout_reset(tp, tp->tt_keep, tp->t_keepintvl, tcp_timer_keep); } else { tcp_callout_reset(tp, tp->tt_keep, tp->t_keepidle, tcp_timer_keep); } #ifdef TCPDEBUG if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); #endif return tp; dropit: tcpstat.tcps_keepdrops++; tp = tcp_drop(tp, ETIMEDOUT); #ifdef TCPDEBUG if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); #endif return tp; }