Example #1
0
void
tcp_timer_rexmt(void *arg)
{
	struct tcpcb *tp = arg;
	uint32_t rto;
#ifdef TCP_DEBUG
	struct socket *so = NULL;
	short ostate;
#endif

	mutex_enter(softnet_lock);
	if ((tp->t_flags & TF_DEAD) != 0) {
		mutex_exit(softnet_lock);
		return;
	}
	if (!callout_expired(&tp->t_timer[TCPT_REXMT])) {
		mutex_exit(softnet_lock);
		return;
	}

	KERNEL_LOCK(1, NULL);
	if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
	    SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
	    SEQ_LT(tp->t_pmtud_th_seq, (int)(tp->snd_una + tp->t_ourmss))) {
		extern struct sockaddr_in icmpsrc;
		struct icmp icmp;

		tp->t_flags &= ~TF_PMTUD_PEND;

		/* XXX create fake icmp message with relevant entries */
		icmp.icmp_nextmtu = tp->t_pmtud_nextmtu;
		icmp.icmp_ip.ip_len = tp->t_pmtud_ip_len;
		icmp.icmp_ip.ip_hl = tp->t_pmtud_ip_hl;
		icmpsrc.sin_addr = tp->t_inpcb->inp_faddr;
		icmp_mtudisc(&icmp, icmpsrc.sin_addr);

		/*
		 * Notify all connections to the same peer about
		 * new mss and trigger retransmit.
		 */
		in_pcbnotifyall(&tcbtable, icmpsrc.sin_addr, EMSGSIZE,
		    tcp_mtudisc);
		KERNEL_UNLOCK_ONE(NULL);
		mutex_exit(softnet_lock);
 		return;
 	}
#ifdef TCP_DEBUG
#ifdef INET
	if (tp->t_inpcb)
		so = tp->t_inpcb->inp_socket;
#endif
#ifdef INET6
	if (tp->t_in6pcb)
		so = tp->t_in6pcb->in6p_socket;
#endif
	ostate = tp->t_state;
#endif /* TCP_DEBUG */

	/*
	 * Clear the SACK scoreboard, reset FACK estimate.
	 */
	tcp_free_sackholes(tp);
	tp->snd_fack = tp->snd_una;

	/*
	 * Retransmission timer went off.  Message has not
	 * been acked within retransmit interval.  Back off
	 * to a longer retransmit interval and retransmit one segment.
	 */

	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
		tp->t_rxtshift = TCP_MAXRXTSHIFT;
		TCP_STATINC(TCP_STAT_TIMEOUTDROP);
		tp = tcp_drop(tp, tp->t_softerror ?
		    tp->t_softerror : ETIMEDOUT);
		goto out;
	}
	TCP_STATINC(TCP_STAT_REXMTTIMEO);
	rto = TCP_REXMTVAL(tp);
	if (rto < tp->t_rttmin)
		rto = tp->t_rttmin;
	TCPT_RANGESET(tp->t_rxtcur, rto * tcp_backoff[tp->t_rxtshift],
	    tp->t_rttmin, TCPTV_REXMTMAX);
	TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);

	/*
	 * If we are losing and we are trying path MTU discovery,
	 * try turning it off.  This will avoid black holes in
	 * the network which suppress or fail to send "packet
	 * too big" ICMP messages.  We should ideally do
	 * lots more sophisticated searching to find the right
	 * value here...
	 */
	if (tp->t_mtudisc && tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) {
		TCP_STATINC(TCP_STAT_PMTUBLACKHOLE);

#ifdef INET
		/* try turning PMTUD off */
		if (tp->t_inpcb)
			tp->t_mtudisc = 0;
#endif
#ifdef INET6
		/* try using IPv6 minimum MTU */
		if (tp->t_in6pcb)
			tp->t_mtudisc = 0;
#endif

		/* XXX: more sophisticated Black hole recovery code? */
	}

	/*
	 * If losing, let the lower level know and try for
	 * a better route.  Also, if we backed off this far,
	 * our srtt estimate is probably bogus.  Clobber it
	 * so we'll take the next rtt measurement as our srtt;
	 * move the current srtt into rttvar to keep the current
	 * retransmit times until then.
	 */
	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
#ifdef INET
		if (tp->t_inpcb)
			in_losing(tp->t_inpcb);
#endif
#ifdef INET6
		if (tp->t_in6pcb)
			in6_losing(tp->t_in6pcb);
#endif
		/*
		 * This operation is not described in RFC2988.  The
		 * point is to keep srtt+4*rttvar constant, so we
		 * should shift right 2 bits to divide by 4, and then
		 * shift right one bit because the storage
		 * representation of rttvar is 1/16s vs 1/32s for
		 * srtt.
		 */
		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
		tp->t_srtt = 0;
	}
	tp->snd_nxt = tp->snd_una;
	tp->snd_high = tp->snd_max;
	/*
	 * If timing a segment in this window, stop the timer.
	 */
	tp->t_rtttime = 0;
	/*
	 * Remember if we are retransmitting a SYN, because if
	 * we do, set the initial congestion window must be set
	 * to 1 segment.
	 */
	if (tp->t_state == TCPS_SYN_SENT)
		tp->t_flags |= TF_SYN_REXMT;

	/*
	 * Adjust congestion control parameters.
	 */
	tp->t_congctl->slow_retransmit(tp);

	(void) tcp_output(tp);

 out:
#ifdef TCP_DEBUG
	if (tp && so->so_options & SO_DEBUG)
		tcp_trace(TA_USER, ostate, tp, NULL,
		    PRU_SLOWTIMO | (TCPT_REXMT << 8));
#endif
	KERNEL_UNLOCK_ONE(NULL);
	mutex_exit(softnet_lock);
}
//-------------------------------------------------------------------------//
// TCP timer processing.
//-------------------------------------------------------------------------//
static
struct tcpcb * tcp_timers(
    Node *node,
    struct tcpcb *tp,
    int timer,
    UInt32 tcp_now,
    struct tcpstat *tcp_stat)
{
    int rexmt;
    TransportDataTcp *tcpLayer = (TransportDataTcp *)
                                 node->transportData.tcp;

    switch (timer) {

    //
    // 2 MSL timeout in shutdown went off.  If we're closed but
    // still waiting for peer to close and connection has been idle
    // too long, or if 2MSL time is up from TIME_WAIT, delete connection
    // control block.  Otherwise, check again in a bit.
    //
    case TCPT_2MSL:
        if (tp->t_state != TCPS_TIME_WAIT &&
            tp->t_idle <= TCPTV_MAXIDLE)
            tp->t_timer[TCPT_2MSL] = TCPTV_KEEPINTVL;
        else {
        	// printf("TCP: Connection closed by timer\n");
            tp = tcp_close(node, tp, tcp_stat);
        }
        break;

    //
    // Retransmission timer went off.  Message has not
    // been acked within retransmit interval.  Back off
    // to a longer retransmit interval and retransmit one segment.
    //
    case TCPT_REXMT:
        if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
            tp->t_rxtshift = TCP_MAXRXTSHIFT;
            //if (tcp_stat)
                //tcp_stat->tcps_timeoutdrop++;
            printf("TCP: Retransmission timer went off\n");
            tp = tcp_drop(node, tp, tcp_now, tcp_stat);
            break;
        }
        //if (tcp_stat)
            //tcp_stat->tcps_rexmttimeo++;
        rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];

        TCPT_RANGESET(tp->t_rxtcur, rexmt,
                      tp->t_rttmin, TCPTV_REXMTMAX);
        tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;

        //
        // If we backed off this far,
        // our srtt estimate is probably bogus.  Clobber it
        // so we'll take the next rtt measurement as our srtt;
        // move the current srtt into rttvar to keep the current
        // retransmit times until then.
        //
        if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {

            tp->t_rttvar +=
                (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));

            tp->t_srtt = 0;
        }
        tp->snd_nxt = tp->snd_una;

        if (TCP_VARIANT_IS_SACK(tp) && tp->isSackFastRextOn) {
            TransportTcpSackRextTimeoutInit(tp);
            TransportTcpTrace(node, 0, 0, "Faxt: timeout");
        }

        // Force a segment to be sent.
        tp->t_flags |= TF_ACKNOW;

        // If timing a segment in this window, stop the timer.
        // The retransmitted segment shouldn't be timed.
        tp->t_rtt = 0;

        //
        // Close the congestion window down to one segment
        // (we'll open it by one segment for each ack we get).
        // Since we probably have a window's worth of unacked
        // data accumulated, this "slow start" keeps us from
        // dumping all that data as back-to-back packets (which
        // might overwhelm an intermediate gateway).
        //
        // There are two phases to the opening: Initially we
        // open by one mss on each ack.  This makes the window
        // size increase exponentially with time.  If the
        // window is larger than the path can handle, this
        // exponential growth results in dropped packet(s)
        // almost immediately.  To get more time between
        // drops but still "push" the network to take advantage
        // of improving conditions, we switch from exponential
        // to linear window opening at some threshhold size.
        // For a threshhold, we use half the current window
        // size, truncated to a multiple of the mss.
        //
        // (the minimum cwnd that will give us exponential
        // growth is 2 mss.  We don't allow the threshhold
        // to go below this.)
        //
        {
            unsigned int win;
            win = MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
            if (win < 2)
                win = 2;
            tp->snd_cwnd = tp->t_maxseg;

            tp->snd_ssthresh = win * tp->t_maxseg;
            tp->t_partialacks = -1;
            tp->t_dupacks = 0;
        }
        tp->t_ecnFlags |= TF_CWND_REDUCED;
        TransportTcpTrace(node, 0, 0, "Rext: timeout");

        //
        // To eliminates the problem of multiple Fast Retransmits we uses this
        // new variable "send_high", whose initial value is the initial send
        // sequence number. After each retransmit timeout, the highest sequence
        // numbers transmitted so far is recorded in the variable "send_high".
        //
        if (TCP_VARIANT_IS_NEWRENO(tp)) {
            tp->send_high = tp->snd_max;
        }

        tcp_output(node, tp, tcp_now, tcp_stat);
        break;

    //
    // Persistance timer into zero window.
    // Force a byte to be output, if possible.
    //
    case TCPT_PERSIST:
        //if (tcp_stat)
            //tcp_stat->tcps_persisttimeo++;
        //
        // Hack: if the peer is dead/unreachable, we do not
        // time out if the window is closed.  After a full
        // backoff, drop the connection if the idle time
        // (no responses to probes) reaches the maximum
        // backoff that we would use if retransmitting.
        //
        if (tp->t_rxtshift == TCP_MAXRXTSHIFT) {
            UInt32 maxidle = TCP_REXMTVAL(tp);
            if (maxidle < tp->t_rttmin)
                maxidle = tp->t_rttmin;
            maxidle *= tcp_totbackoff;
            if (tp->t_idle >= TCPTV_KEEP_IDLE ||
                tp->t_idle >= maxidle) {
                //if (tcp_stat)
                    //tcp_stat->tcps_persistdrop++;
            	printf("TCP: Idle timer went off\n");
                tp = tcp_drop(node, tp, tcp_now, tcp_stat);
                break;
            }
        }
        tcp_setpersist(tp);
        tp->t_force = 1;
        tcp_output(node, tp, tcp_now, tcp_stat);
        tp->t_force = 0;
        break;

    //
    // Keep-alive timer went off; send something
    // or drop connection if idle for too long.
    //
    case TCPT_KEEP:
        //if (tcp_stat)
            //tcp_stat->tcps_keeptimeo++;
        if (tp->t_state < TCPS_ESTABLISHED)
        	printf("TCP: Keep-alive timer went off before established\n");
            goto dropit;
        if (tcpLayer->tcpUseKeepAliveProbes && tp->t_state <= TCPS_CLOSING) {

            //
            // If the connection has been idle for more than the sum of
            // TCPTV_KEEP_IDLE (set to 2 hours) and TCPTV_MAXIDLE
            // (set to the total time taken to send all the probes),
            // it's time to drop the connection.
            //
            if (tp->t_idle >= TCPTV_KEEP_IDLE + TCPTV_MAXIDLE)
            	printf("TCP: Keep-alive timer went off\n");
                goto dropit;

            //
            // Send a packet designed to force a response
            // if the peer is up and reachable:
            // either an ACK if the connection is still alive,
            // or an RST if the peer has closed the connection
            // due to timeout or reboot.
            // Using sequence number tp->snd_una-1
            // causes the transmitted zero-length segment
            // to lie outside the receive window;
            // by the protocol spec, this requires the
            // correspondent TCP to respond.
            //
            //if (tcp_stat)
                //tcp_stat->tcps_keepprobe++;
            tcp_respond(node, tp, tp->t_template,
                        0, tp->rcv_nxt, tp->snd_una - 1,
                        0, tcp_stat);

            tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL;
        } else {
            //
            // If the tcpUseKeepAliveProbes is FALSE
            // or the connection state is greater than TCPS_CLOSING,
            // reset the keepalive timer to TCPTV_KEEP_IDLE.
            //
            tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE;
        }
        break;
    dropit:
        //if (tcp_stat) {
            //
            // Note that this counter counts connection drops due to
            // failure in connection establishment and the keepalive
            // timer timeouts
            //
            //tcp_stat->tcps_keepdrops++;
        //}
	    // printf("TCP: Unknown timer went off\n");
        tp = tcp_drop(node, tp, tcp_now, tcp_stat);
        break;
    }
Example #3
0
/*
 * TCP timer processing.
 */
static struct tcpcb *
tcp_timers(register struct tcpcb *tp, int timer)
{
	register int rexmt;

	DEBUG_CALL("tcp_timers");

	switch (timer) {

	/*
	 * 2 MSL timeout in shutdown went off.  If we're closed but
	 * still waiting for peer to close and connection has been idle
	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
	 * control block.  Otherwise, check again in a bit.
	 */
	case TCPT_2MSL:
		if (tp->t_state != TCPS_TIME_WAIT &&
		    tp->t_idle <= TCP_MAXIDLE)
			tp->t_timer[TCPT_2MSL] = TCPTV_KEEPINTVL;
		else
			tp = tcp_close(tp);
		break;

	/*
	 * Retransmission timer went off.  Message has not
	 * been acked within retransmit interval.  Back off
	 * to a longer retransmit interval and retransmit one segment.
	 */
	case TCPT_REXMT:

		/*
		 * XXXXX If a packet has timed out, then remove all the queued
		 * packets for that session.
		 */

		if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
			/*
			 * This is a hack to suit our terminal server here at the uni of canberra
			 * since they have trouble with zeroes... It usually lets them through
			 * unharmed, but under some conditions, it'll eat the zeros.  If we
			 * keep retransmitting it, it'll keep eating the zeroes, so we keep
			 * retransmitting, and eventually the connection dies...
			 * (this only happens on incoming data)
			 *
			 * So, if we were gonna drop the connection from too many retransmits,
			 * don't... instead halve the t_maxseg, which might break up the NULLs and
			 * let them through
			 *
			 * *sigh*
			 */

			tp->t_maxseg >>= 1;
			if (tp->t_maxseg < 32) {
				/*
				 * We tried our best, now the connection must die!
				 */
				tp->t_rxtshift = TCP_MAXRXTSHIFT;
				tp = tcp_drop(tp, tp->t_softerror);
				/* tp->t_softerror : ETIMEDOUT); */ /* XXX */
				return (tp); /* XXX */
			}

			/*
			 * Set rxtshift to 6, which is still at the maximum
			 * backoff time
			 */
			tp->t_rxtshift = 6;
		}
		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
		TCPT_RANGESET(tp->t_rxtcur, rexmt,
		    (short)tp->t_rttmin, TCPTV_REXMTMAX); /* XXX */
		tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
		/*
		 * If losing, let the lower level know and try for
		 * a better route.  Also, if we backed off this far,
		 * our srtt estimate is probably bogus.  Clobber it
		 * so we'll take the next rtt measurement as our srtt;
		 * move the current srtt into rttvar to keep the current
		 * retransmit times until then.
		 */
		if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
			tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
			tp->t_srtt = 0;
		}
Example #4
0
/*
 * TCP timer processing.
 */
struct tcpcb*
tcp_timers(struct tcpcb *tp, int timer)
{
	int rexmt;

	switch (timer) {

	/*
	 * 2 MSL timeout in shutdown went off.  If we're closed but
	 * still waiting for peer to close and connection has been idle
	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
	 * control block.  Otherwise, check again in a bit.
    * If TIME_WAIT is not set, this is FIN_WAIT_2 timer.
	 */
	case TCPT_2MSL:
		if (tp->t_state != TCPS_TIME_WAIT &&
		    tp->t_idle <= g_tcp_maxidle)
			tp->t_timer[TCPT_2MSL] = g_tcp_keepintvl;
		else
			tp = tcp_close(tp);
		break;

	/*
	 * Retransmission timer went off.  Message has not
	 * been acked within retransmit interval.  Back off
	 * to a longer retransmit interval and retransmit one segment.
	 */
	case TCPT_REXMT:
		if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
			tp->t_rxtshift = TCP_MAXRXTSHIFT;
			g_tcpstat.tcps_timeoutdrop++;
			tp = tcp_drop(tp, tp->t_softerror ?
			    tp->t_softerror : ETIMEDOUT);
			break;
		}
		g_tcpstat.tcps_rexmttimeo++;
		rexmt = TCP_REXMTVAL(tp) * g_tcp_backoff[tp->t_rxtshift];
		TCPT_RANGESET(tp->t_rxtcur, rexmt,
		    tp->t_rttmin, TCPTV_REXMTMAX);
		tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
		/*
		 * If losing, let the lower level know and try for
		 * a better route.  Also, if we backed off this far,
		 * our srtt estimate is probably bogus.  Clobber it
		 * so we'll take the next rtt measurement as our srtt;
		 * move the current srtt into rttvar to keep the current
		 * retransmit times until then.
		 */
		if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
			in_losing(tp->t_inpcb);
			tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
			tp->t_srtt = 0;
		}
		tp->snd_nxt = tp->snd_una;
		/*
		 * If timing a segment in this window, stop the timer.
		 */
		tp->t_rtt = 0;
		/*
		 * Close the congestion window down to one segment
		 * (we'll open it by one segment for each ack we get).
		 * Since we probably have a window's worth of unacked
		 * data accumulated, this "slow start" keeps us from
		 * dumping all that data as back-to-back packets (which
		 * might overwhelm an intermediate gateway).
		 *
		 * There are two phases to the opening: Initially we
		 * open by one mss on each ack.  This makes the window
		 * size increase exponentially with time.  If the
		 * window is larger than the path can handle, this
		 * exponential growth results in dropped packet(s)
		 * almost immediately.  To get more time between 
		 * drops but still "push" the network to take advantage
		 * of improving conditions, we switch from exponential
		 * to linear window opening at some threshhold size.
		 * For a threshhold, we use half the current window
		 * size, truncated to a multiple of the mss.
		 *
		 * (the minimum cwnd that will give us exponential
		 * growth is 2 mss.  We don't allow the threshhold
		 * to go below this.)
		 */
		{
		u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
		if (win < 2)
			win = 2;
		tp->snd_cwnd = tp->t_maxseg;
		tp->snd_ssthresh = win * tp->t_maxseg;
		tp->t_dupacks = 0;
		}
		(void) tcp_output(tp);
		break;

	/*
	 * Persistance timer into zero window.
	 * Force a byte to be output, if possible.
	 */
	case TCPT_PERSIST:
		g_tcpstat.tcps_persisttimeo++;
		/*
		 * Hack: if the peer is dead/unreachable, we do not
		 * time out if the window is closed.  After a full
		 * backoff, drop the connection if the idle time
		 * (no responses to probes) reaches the maximum
		 * backoff that we would use if retransmitting.
		 */
		if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
		    (tp->t_idle >= g_tcp_maxpersistidle ||
		    tp->t_idle >= TCP_REXMTVAL(tp) * g_tcp_totbackoff)) {
			g_tcpstat.tcps_persistdrop++;
			tp = tcp_drop(tp, ETIMEDOUT);
			break;
		}
		tcp_setpersist(tp);
		tp->t_force = 1;
		(void) tcp_output(tp);
		tp->t_force = 0;
		break;

	/*
	 * Keep-alive timer went off; send something
	 * or drop connection if idle for too long.
	 */
	case TCPT_KEEP:
		g_tcpstat.tcps_keeptimeo++;
		if (tp->t_state < TCPS_ESTABLISHED) // connection-establishment timer.
			goto dropit;
		if (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE &&
		    tp->t_state <= TCPS_CLOSE_WAIT) { // keepalive timer.
		    	if (tp->t_idle >= g_tcp_keepidle + g_tcp_maxidle)
				   goto dropit;
			/*
			 * Send a packet designed to force a response
			 * if the peer is up and reachable:
			 * either an ACK if the connection is still alive,
			 * or an RST if the peer has closed the connection
			 * due to timeout or reboot.
			 * Using sequence number tp->snd_una-1
			 * causes the transmitted zero-length segment
			 * to lie outside the receive window;
			 * by the protocol spec, this requires the
			 * correspondent TCP to respond.
			 */
			g_tcpstat.tcps_keepprobe++;
			tcp_respond(tp, tp->t_template, (usn_mbuf_t *)NULL,
			    tp->rcv_nxt, tp->snd_una - 1, 0);
			tp->t_timer[TCPT_KEEP] = g_tcp_keepintvl;
		} else
			tp->t_timer[TCPT_KEEP] = g_tcp_keepidle;
		break;
	dropit:
		g_tcpstat.tcps_keepdrops++;
		tp = tcp_drop(tp, ETIMEDOUT);
		break;
	}