Example #1
0
void
tcp_timer_delack(void *xtp)
{
	struct tcpcb *tp = xtp;
	struct inpcb *inp;
	CURVNET_SET(tp->t_vnet);

	inp = tp->t_inpcb;
	/*
	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
	 * tear-down mean we need it as a work-around for races between
	 * timers and tcp_discardcb().
	 *
	 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
	 */
	if (inp == NULL) {
		tcp_timer_race++;
		CURVNET_RESTORE();
		return;
	}
	INP_WLOCK(inp);
	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_delack)
	    || !callout_active(&tp->t_timers->tt_delack)) {
		INP_WUNLOCK(inp);
		CURVNET_RESTORE();
		return;
	}
	callout_deactivate(&tp->t_timers->tt_delack);

	tp->t_flags |= TF_ACKNOW;
	TCPSTAT_INC(tcps_delack);
	(void) tcp_output(tp);
	INP_WUNLOCK(inp);
	CURVNET_RESTORE();
}
Example #2
0
void
tcp_timer_delack(void *xtp)
{
	struct tcpcb *tp = xtp;
	struct inpcb *inp;
	CURVNET_SET(tp->t_vnet);

	inp = tp->t_inpcb;
	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
	INP_WLOCK(inp);
	if (callout_pending(&tp->t_timers->tt_delack) ||
	    !callout_active(&tp->t_timers->tt_delack)) {
		INP_WUNLOCK(inp);
		CURVNET_RESTORE();
		return;
	}
	callout_deactivate(&tp->t_timers->tt_delack);
	if ((inp->inp_flags & INP_DROPPED) != 0) {
		INP_WUNLOCK(inp);
		CURVNET_RESTORE();
		return;
	}
	tp->t_flags |= TF_ACKNOW;
	TCPSTAT_INC(tcps_delack);
	(void) tp->t_fb->tfb_tcp_output(tp);
	INP_WUNLOCK(inp);
	CURVNET_RESTORE();
}
Example #3
0
void
tcp_twclose(struct tcptw *tw, int reuse)
{
    struct socket *so;
    struct inpcb *inp;

    /*
     * At this point, we are in one of two situations:
     *
     * (1) We have no socket, just an inpcb<->twtcp pair.  We can free
     *     all state.
     *
     * (2) We have a socket -- if we own a reference, release it and
     *     notify the socket layer.
     */
    inp = tw->tw_inpcb;
//ScenSim-Port//    KASSERT((inp->inp_flags & INP_TIMEWAIT), ("tcp_twclose: !timewait"));
//ScenSim-Port//    KASSERT(intotw(inp) == tw, ("tcp_twclose: inp_ppcb != tw"));
//ScenSim-Port//    INP_INFO_WLOCK_ASSERT(&V_tcbinfo);  /* tcp_tw_2msl_stop(). */
//ScenSim-Port//    INP_WLOCK_ASSERT(inp);

    tw->tw_inpcb = NULL;
    tcp_tw_2msl_stop(tw);
    inp->inp_ppcb = NULL;
    in_pcbdrop(inp);

    so = inp->inp_socket;
    if (so != NULL) {
        /*
         * If there's a socket, handle two cases: first, we own a
         * strong reference, which we will now release, or we don't
         * in which case another reference exists (XXXRW: think
         * about this more), and we don't need to take action.
         */
        if (inp->inp_flags & INP_SOCKREF) {
            inp->inp_flags &= ~INP_SOCKREF;
//ScenSim-Port//            INP_WUNLOCK(inp);
//ScenSim-Port//            ACCEPT_LOCK();
//ScenSim-Port//            SOCK_LOCK(so);
//ScenSim-Port//            KASSERT(so->so_state & SS_PROTOREF,
//ScenSim-Port//                ("tcp_twclose: INP_SOCKREF && !SS_PROTOREF"));
            so->so_state &= ~SS_PROTOREF;
            sofree(so);
        } else {
            /*
             * If we don't own the only reference, the socket and
             * inpcb need to be left around to be handled by
             * tcp_usr_detach() later.
             */
//ScenSim-Port//            INP_WUNLOCK(inp);
        }
    } else
        in_pcbfree(inp);
    TCPSTAT_INC(tcps_closed);
//ScenSim-Port//    crfree(tw->tw_cred);
//ScenSim-Port//    tw->tw_cred = NULL;
    if (reuse)
        return;
    uma_zfree(V_tcptw_zone, tw);
}
Example #4
0
/*
 * Common subroutine to open a TCP connection to remote host specified
 * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
 * port number if needed.  Call in_pcbconnect_setup to do the routing and
 * to choose a local host address (interface).  If there is an existing
 * incarnation of the same connection in TIME-WAIT state and if the remote
 * host was sending CC options and if the connection duration was < MSL, then
 * truncate the previous TIME-WAIT state and proceed.
 * Initialize connection parameters and enter SYN-SENT state.
 */
static int
tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
{
	struct inpcb *inp = tp->t_inpcb, *oinp;
	struct socket *so = inp->inp_socket;
	struct in_addr laddr;
	u_short lport;
	int error;

	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
	INP_WLOCK_ASSERT(inp);

	if (inp->inp_lport == 0) {
		error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
		if (error)
			return error;
	}

	/*
	 * Cannot simply call in_pcbconnect, because there might be an
	 * earlier incarnation of this same connection still in
	 * TIME_WAIT state, creating an ADDRINUSE error.
	 */
	laddr = inp->inp_laddr;
	lport = inp->inp_lport;
	error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
	    &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred);
	if (error && oinp == NULL)
		return error;
	if (oinp)
		return EADDRINUSE;
	inp->inp_laddr = laddr;
	in_pcbrehash(inp);

	/*
	 * Compute window scaling to request:
	 * Scale to fit into sweet spot.  See tcp_syncache.c.
	 * XXX: This should move to tcp_output().
	 */
	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
	    (TCP_MAXWIN << tp->request_r_scale) < sb_max)
		tp->request_r_scale++;

	soisconnecting(so);
	TCPSTAT_INC(tcps_connattempt);
	tp->t_state = TCPS_SYN_SENT;
	tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
	tp->iss = tcp_new_isn(tp);
	tp->t_bw_rtseq = tp->iss;
	tcp_sendseqinit(tp);

	return 0;
}
Example #5
0
/*
 * Allocate struct sackhole.
 */
static struct sackhole *
tcp_sackhole_alloc(struct tcpcb *tp, tcp_seq start, tcp_seq end)
{
    struct sackhole *hole;

    if (tp->snd_numholes >= V_tcp_sack_maxholes ||
        V_tcp_sack_globalholes >= V_tcp_sack_globalmaxholes) {
        TCPSTAT_INC(tcps_sack_sboverflow);
        return NULL;
    }

    hole = (struct sackhole *)uma_zalloc(V_sack_hole_zone, M_NOWAIT);
    if (hole == NULL)
        return NULL;

    hole->start = start;
    hole->end = end;
    hole->rxmit = start;

    tp->snd_numholes++;
    atomic_add_int(&V_tcp_sack_globalholes, 1);

    return hole;
}
Example #6
0
void
tcp_timer_rexmt(void * xtp)
{
	struct tcpcb *tp = xtp;
	CURVNET_SET(tp->t_vnet);
	int rexmt;
	struct inpcb *inp;
#ifdef TCPDEBUG
	int ostate;

	ostate = tp->t_state;
#endif
	inp = tp->t_inpcb;
	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
	INP_WLOCK(inp);
	if (callout_pending(&tp->t_timers->tt_rexmt) ||
	    !callout_active(&tp->t_timers->tt_rexmt)) {
		INP_WUNLOCK(inp);
		CURVNET_RESTORE();
		return;
	}
	callout_deactivate(&tp->t_timers->tt_rexmt);
	if ((inp->inp_flags & INP_DROPPED) != 0) {
		INP_WUNLOCK(inp);
		CURVNET_RESTORE();
		return;
	}
	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
	tcp_free_sackholes(tp);
	if (tp->t_fb->tfb_tcp_rexmit_tmr) {
		/* The stack has a timer action too. */
		(*tp->t_fb->tfb_tcp_rexmit_tmr)(tp);
	}
	/*
	 * Retransmission timer went off.  Message has not
	 * been acked within retransmit interval.  Back off
	 * to a longer retransmit interval and retransmit one segment.
	 */
	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
		tp->t_rxtshift = TCP_MAXRXTSHIFT;
		TCPSTAT_INC(tcps_timeoutdrop);
		if (tcp_inpinfo_lock_add(inp)) {
			tcp_inpinfo_lock_del(inp, tp);
			goto out;
		}
		tp = tcp_drop(tp, tp->t_softerror ?
			      tp->t_softerror : ETIMEDOUT);
		tcp_inpinfo_lock_del(inp, tp);
		goto out;
	}
	if (tp->t_state == TCPS_SYN_SENT) {
		/*
		 * If the SYN was retransmitted, indicate CWND to be
		 * limited to 1 segment in cc_conn_init().
		 */
		tp->snd_cwnd = 1;
	} else if (tp->t_rxtshift == 1) {
		/*
		 * first retransmit; record ssthresh and cwnd so they can
		 * be recovered if this turns out to be a "bad" retransmit.
		 * A retransmit is considered "bad" if an ACK for this
		 * segment is received within RTT/2 interval; the assumption
		 * here is that the ACK was already in flight.  See
		 * "On Estimating End-to-End Network Path Properties" by
		 * Allman and Paxson for more details.
		 */
		tp->snd_cwnd_prev = tp->snd_cwnd;
		tp->snd_ssthresh_prev = tp->snd_ssthresh;
		tp->snd_recover_prev = tp->snd_recover;
		if (IN_FASTRECOVERY(tp->t_flags))
			tp->t_flags |= TF_WASFRECOVERY;
		else
			tp->t_flags &= ~TF_WASFRECOVERY;
		if (IN_CONGRECOVERY(tp->t_flags))
			tp->t_flags |= TF_WASCRECOVERY;
		else
			tp->t_flags &= ~TF_WASCRECOVERY;
		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
		tp->t_flags |= TF_PREVVALID;
	} else
Example #7
0
void
tcp_timer_persist(void *xtp)
{
	struct tcpcb *tp = xtp;
	struct inpcb *inp;
	CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
	int ostate;

	ostate = tp->t_state;
#endif
	inp = tp->t_inpcb;
	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
	INP_WLOCK(inp);
	if (callout_pending(&tp->t_timers->tt_persist) ||
	    !callout_active(&tp->t_timers->tt_persist)) {
		INP_WUNLOCK(inp);
		CURVNET_RESTORE();
		return;
	}
	callout_deactivate(&tp->t_timers->tt_persist);
	if ((inp->inp_flags & INP_DROPPED) != 0) {
		INP_WUNLOCK(inp);
		CURVNET_RESTORE();
		return;
	}
	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
	/*
	 * Persistence timer into zero window.
	 * Force a byte to be output, if possible.
	 */
	TCPSTAT_INC(tcps_persisttimeo);
	/*
	 * Hack: if the peer is dead/unreachable, we do not
	 * time out if the window is closed.  After a full
	 * backoff, drop the connection if the idle time
	 * (no responses to probes) reaches the maximum
	 * backoff that we would use if retransmitting.
	 */
	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
		TCPSTAT_INC(tcps_persistdrop);
		if (tcp_inpinfo_lock_add(inp)) {
			tcp_inpinfo_lock_del(inp, tp);
			goto out;
		}
		tp = tcp_drop(tp, ETIMEDOUT);
		tcp_inpinfo_lock_del(inp, tp);
		goto out;
	}
	/*
	 * If the user has closed the socket then drop a persisting
	 * connection after a much reduced timeout.
	 */
	if (tp->t_state > TCPS_CLOSE_WAIT &&
	    (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
		TCPSTAT_INC(tcps_persistdrop);
		if (tcp_inpinfo_lock_add(inp)) {
			tcp_inpinfo_lock_del(inp, tp);
			goto out;
		}
		tp = tcp_drop(tp, ETIMEDOUT);
		tcp_inpinfo_lock_del(inp, tp);
		goto out;
	}
	tcp_setpersist(tp);
	tp->t_flags |= TF_FORCEDATA;
	(void) tp->t_fb->tfb_tcp_output(tp);
	tp->t_flags &= ~TF_FORCEDATA;

#ifdef TCPDEBUG
	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
#endif
	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
	INP_WUNLOCK(inp);
out:
	CURVNET_RESTORE();
}
Example #8
0
void
tcp_timer_keep(void *xtp)
{
	struct tcpcb *tp = xtp;
	struct tcptemp *t_template;
	struct inpcb *inp;
	CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
	int ostate;

	ostate = tp->t_state;
#endif
	inp = tp->t_inpcb;
	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
	INP_WLOCK(inp);
	if (callout_pending(&tp->t_timers->tt_keep) ||
	    !callout_active(&tp->t_timers->tt_keep)) {
		INP_WUNLOCK(inp);
		CURVNET_RESTORE();
		return;
	}
	callout_deactivate(&tp->t_timers->tt_keep);
	if ((inp->inp_flags & INP_DROPPED) != 0) {
		INP_WUNLOCK(inp);
		CURVNET_RESTORE();
		return;
	}
	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
		("%s: tp %p tcpcb can't be stopped here", __func__, tp));

	/*
	 * Because we don't regularly reset the keepalive callout in
	 * the ESTABLISHED state, it may be that we don't actually need
	 * to send a keepalive yet. If that occurs, schedule another
	 * call for the next time the keepalive timer might expire.
	 */
	if (TCPS_HAVEESTABLISHED(tp->t_state)) {
		u_int idletime;

		idletime = ticks - tp->t_rcvtime;
		if (idletime < TP_KEEPIDLE(tp)) {
			callout_reset(&tp->t_timers->tt_keep,
			    TP_KEEPIDLE(tp) - idletime, tcp_timer_keep, tp);
			INP_WUNLOCK(inp);
			CURVNET_RESTORE();
			return;
		}
	}

	/*
	 * Keep-alive timer went off; send something
	 * or drop connection if idle for too long.
	 */
	TCPSTAT_INC(tcps_keeptimeo);
	if (tp->t_state < TCPS_ESTABLISHED)
		goto dropit;
	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
	    tp->t_state <= TCPS_CLOSING) {
		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
			goto dropit;
		/*
		 * Send a packet designed to force a response
		 * if the peer is up and reachable:
		 * either an ACK if the connection is still alive,
		 * or an RST if the peer has closed the connection
		 * due to timeout or reboot.
		 * Using sequence number tp->snd_una-1
		 * causes the transmitted zero-length segment
		 * to lie outside the receive window;
		 * by the protocol spec, this requires the
		 * correspondent TCP to respond.
		 */
		TCPSTAT_INC(tcps_keepprobe);
		t_template = tcpip_maketemplate(inp);
		if (t_template) {
			tcp_respond(tp, t_template->tt_ipgen,
				    &t_template->tt_t, (struct mbuf *)NULL,
				    tp->rcv_nxt, tp->snd_una - 1, 0);
			free(t_template, M_TEMP);
		}
		callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
			      tcp_timer_keep, tp);
	} else
		callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
			      tcp_timer_keep, tp);

#ifdef TCPDEBUG
	if (inp->inp_socket->so_options & SO_DEBUG)
		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
			  PRU_SLOWTIMO);
#endif
	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
	INP_WUNLOCK(inp);
	CURVNET_RESTORE();
	return;

dropit:
	TCPSTAT_INC(tcps_keepdrops);

	if (tcp_inpinfo_lock_add(inp)) {
		tcp_inpinfo_lock_del(inp, tp);
		goto out;
	}
	tp = tcp_drop(tp, ETIMEDOUT);

#ifdef TCPDEBUG
	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
			  PRU_SLOWTIMO);
#endif
	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
	tcp_inpinfo_lock_del(inp, tp);
out:
	CURVNET_RESTORE();
}
Example #9
0
void
tcp_timer_2msl(void *xtp)
{
	struct tcpcb *tp = xtp;
	struct inpcb *inp;
	CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
	int ostate;

	ostate = tp->t_state;
#endif
	inp = tp->t_inpcb;
	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
	INP_WLOCK(inp);
	tcp_free_sackholes(tp);
	if (callout_pending(&tp->t_timers->tt_2msl) ||
	    !callout_active(&tp->t_timers->tt_2msl)) {
		INP_WUNLOCK(tp->t_inpcb);
		CURVNET_RESTORE();
		return;
	}
	callout_deactivate(&tp->t_timers->tt_2msl);
	if ((inp->inp_flags & INP_DROPPED) != 0) {
		INP_WUNLOCK(inp);
		CURVNET_RESTORE();
		return;
	}
	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
	/*
	 * 2 MSL timeout in shutdown went off.  If we're closed but
	 * still waiting for peer to close and connection has been idle
	 * too long delete connection control block.  Otherwise, check
	 * again in a bit.
	 *
	 * If in TIME_WAIT state just ignore as this timeout is handled in
	 * tcp_tw_2msl_scan().
	 *
	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 
	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 
	 * Ignore fact that there were recent incoming segments.
	 */
	if ((inp->inp_flags & INP_TIMEWAIT) != 0) {
		INP_WUNLOCK(inp);
		CURVNET_RESTORE();
		return;
	}
	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
	    tp->t_inpcb && tp->t_inpcb->inp_socket && 
	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
		TCPSTAT_INC(tcps_finwait2_drops);
		if (tcp_inpinfo_lock_add(inp)) {
			tcp_inpinfo_lock_del(inp, tp);
			goto out;
		}
		tp = tcp_close(tp);             
		tcp_inpinfo_lock_del(inp, tp);
		goto out;
	} else {
		if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
			callout_reset(&tp->t_timers->tt_2msl,
				      TP_KEEPINTVL(tp), tcp_timer_2msl, tp);
		} else {
			if (tcp_inpinfo_lock_add(inp)) {
				tcp_inpinfo_lock_del(inp, tp);
				goto out;
			}
			tp = tcp_close(tp);
			tcp_inpinfo_lock_del(inp, tp);
			goto out;
		}
       }

#ifdef TCPDEBUG
	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
			  PRU_SLOWTIMO);
#endif
	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);

	if (tp != NULL)
		INP_WUNLOCK(inp);
out:
	CURVNET_RESTORE();
}
Example #10
0
void
tcp_timer_rexmt(void * xtp)
{
	struct tcpcb *tp = xtp;
	CURVNET_SET(tp->t_vnet);
	int rexmt;
	int headlocked;
	struct inpcb *inp;
#ifdef TCPDEBUG
	int ostate;

	ostate = tp->t_state;
#endif
	INP_INFO_RLOCK(&V_tcbinfo);
	inp = tp->t_inpcb;
	/*
	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
	 * tear-down mean we need it as a work-around for races between
	 * timers and tcp_discardcb().
	 *
	 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
	 */
	if (inp == NULL) {
		tcp_timer_race++;
		INP_INFO_RUNLOCK(&V_tcbinfo);
		CURVNET_RESTORE();
		return;
	}
	INP_WLOCK(inp);
	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_rexmt)
	    || !callout_active(&tp->t_timers->tt_rexmt)) {
		INP_WUNLOCK(inp);
		INP_INFO_RUNLOCK(&V_tcbinfo);
		CURVNET_RESTORE();
		return;
	}
	callout_deactivate(&tp->t_timers->tt_rexmt);
	tcp_free_sackholes(tp);
	/*
	 * Retransmission timer went off.  Message has not
	 * been acked within retransmit interval.  Back off
	 * to a longer retransmit interval and retransmit one segment.
	 */
	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
		tp->t_rxtshift = TCP_MAXRXTSHIFT;
		TCPSTAT_INC(tcps_timeoutdrop);
		in_pcbref(inp);
		INP_INFO_RUNLOCK(&V_tcbinfo);
		INP_WUNLOCK(inp);
		INP_INFO_WLOCK(&V_tcbinfo);
		INP_WLOCK(inp);
		if (in_pcbrele_wlocked(inp)) {
			INP_INFO_WUNLOCK(&V_tcbinfo);
			CURVNET_RESTORE();
			return;
		}
		if (inp->inp_flags & INP_DROPPED) {
			INP_WUNLOCK(inp);
			INP_INFO_WUNLOCK(&V_tcbinfo);
			CURVNET_RESTORE();
			return;
		}

		tp = tcp_drop(tp, tp->t_softerror ?
			      tp->t_softerror : ETIMEDOUT);
		headlocked = 1;
		goto out;
	}
	INP_INFO_RUNLOCK(&V_tcbinfo);
	headlocked = 0;
	if (tp->t_rxtshift == 1) {
		/*
		 * first retransmit; record ssthresh and cwnd so they can
		 * be recovered if this turns out to be a "bad" retransmit.
		 * A retransmit is considered "bad" if an ACK for this
		 * segment is received within RTT/2 interval; the assumption
		 * here is that the ACK was already in flight.  See
		 * "On Estimating End-to-End Network Path Properties" by
		 * Allman and Paxson for more details.
		 */
		tp->snd_cwnd_prev = tp->snd_cwnd;
		tp->snd_ssthresh_prev = tp->snd_ssthresh;
		tp->snd_recover_prev = tp->snd_recover;
		if (IN_FASTRECOVERY(tp->t_flags))
			tp->t_flags |= TF_WASFRECOVERY;
		else
			tp->t_flags &= ~TF_WASFRECOVERY;
		if (IN_CONGRECOVERY(tp->t_flags))
			tp->t_flags |= TF_WASCRECOVERY;
		else
			tp->t_flags &= ~TF_WASCRECOVERY;
		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
		tp->t_flags |= TF_PREVVALID;
	} else
Example #11
0
int
tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
{
	struct socket *so = tp->t_inpcb->inp_socket;
	struct mbuf *mq, *mp;
	int flags, wakeup;

	INP_WLOCK_ASSERT(tp->t_inpcb);

	/*
	 * XXX: tcp_reass() is rather inefficient with its data structures
	 * and should be rewritten (see NetBSD for optimizations).
	 */

	/*
	 * Call with th==NULL after become established to
	 * force pre-ESTABLISHED data up to user socket.
	 */
	if (th == NULL)
		goto present;

	M_ASSERTPKTHDR(m);
	KASSERT(*tlenp == m->m_pkthdr.len, ("%s: tlenp %u len %u", __func__,
	    *tlenp, m->m_pkthdr.len));

	/*
	 * Limit the number of segments that can be queued to reduce the
	 * potential for mbuf exhaustion. For best performance, we want to be
	 * able to queue a full window's worth of segments. The size of the
	 * socket receive buffer determines our advertised window and grows
	 * automatically when socket buffer autotuning is enabled. Use it as the
	 * basis for our queue limit.
	 * Always let the missing segment through which caused this queue.
	 * NB: Access to the socket buffer is left intentionally unlocked as we
	 * can tolerate stale information here.
	 */
	if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) &&
	    tp->t_segqlen + m->m_pkthdr.len >= sbspace(&so->so_rcv)) {
		char *s;

		TCPSTAT_INC(tcps_rcvreassfull);
		*tlenp = 0;
		if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL,
		    NULL))) {
			log(LOG_DEBUG, "%s; %s: queue limit reached, "
			    "segment dropped\n", s, __func__);
			free(s, M_TCPLOG);
		}
		m_freem(m);
		return (0);
	}

	/*
	 * Find a segment which begins after this one does.
	 */
	mp = NULL;
	for (mq = tp->t_segq; mq != NULL; mq = mq->m_nextpkt) {
		if (SEQ_GT(M_TCPHDR(mq)->th_seq, th->th_seq))
			break;
		mp = mq;
	}

	/*
	 * If there is a preceding segment, it may provide some of
	 * our data already.  If so, drop the data from the incoming
	 * segment.  If it provides all of our data, drop us.
	 */
	if (mp != NULL) {
		int i;

		/* conversion to int (in i) handles seq wraparound */
		i = M_TCPHDR(mp)->th_seq + mp->m_pkthdr.len - th->th_seq;
		if (i > 0) {
			if (i >= *tlenp) {
				TCPSTAT_INC(tcps_rcvduppack);
				TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp);
				m_freem(m);
				/*
				 * Try to present any queued data
				 * at the left window edge to the user.
				 * This is needed after the 3-WHS
				 * completes.
				 */
				goto present;	/* ??? */
			}
			m_adj(m, i);
			*tlenp -= i;
			th->th_seq += i;
		}
	}
	tp->t_rcvoopack++;
	TCPSTAT_INC(tcps_rcvoopack);
	TCPSTAT_ADD(tcps_rcvoobyte, *tlenp);

	/*
	 * While we overlap succeeding segments trim them or,
	 * if they are completely covered, dequeue them.
	 */
	while (mq) {
		struct mbuf *nq;
		int i;

		i = (th->th_seq + *tlenp) - M_TCPHDR(mq)->th_seq;
		if (i <= 0)
			break;
		if (i < mq->m_pkthdr.len) {
			M_TCPHDR(mq)->th_seq += i;
			m_adj(mq, i);
			tp->t_segqlen -= i;
			break;
		}

		nq = mq->m_nextpkt;
		tp->t_segqlen -= mq->m_pkthdr.len;
		m_freem(mq);
		if (mp)
			mp->m_nextpkt = nq;
		else
			tp->t_segq = nq;
		mq = nq;
	}

	/*
	 * Insert the new segment queue entry into place.  Try to collapse
	 * mbuf chains if segments are adjacent.
	 */
	if (mp) {
		if (M_TCPHDR(mp)->th_seq + mp->m_pkthdr.len == th->th_seq)
			m_catpkt(mp, m);
		else {
			m->m_nextpkt = mp->m_nextpkt;
			mp->m_nextpkt = m;
			m->m_pkthdr.pkt_tcphdr = th;
		}
	} else {
		mq = tp->t_segq;
		tp->t_segq = m;
		if (mq && th->th_seq + *tlenp == M_TCPHDR(mq)->th_seq) {
			m->m_nextpkt = mq->m_nextpkt;
			mq->m_nextpkt = NULL;
			m_catpkt(m, mq);
		} else
			m->m_nextpkt = mq;
		m->m_pkthdr.pkt_tcphdr = th;
	}
	tp->t_segqlen += *tlenp;

present:
	/*
	 * Present data to user, advancing rcv_nxt through
	 * completed sequence space.
	 */
	if (!TCPS_HAVEESTABLISHED(tp->t_state))
		return (0);

	flags = 0;
	wakeup = 0;
	SOCKBUF_LOCK(&so->so_rcv);
	while ((mq = tp->t_segq) != NULL &&
	    M_TCPHDR(mq)->th_seq == tp->rcv_nxt) {
		tp->t_segq = mq->m_nextpkt;

		tp->rcv_nxt += mq->m_pkthdr.len;
		tp->t_segqlen -= mq->m_pkthdr.len;
		flags = M_TCPHDR(mq)->th_flags & TH_FIN;

		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
			m_freem(mq);
		else {
			mq->m_nextpkt = NULL;
			sbappendstream_locked(&so->so_rcv, mq, 0);
			wakeup = 1;
		}
	}
	ND6_HINT(tp);
	if (wakeup)
		sorwakeup_locked(so);
	else
		SOCKBUF_UNLOCK(&so->so_rcv);
	return (flags);
}
Example #12
0
void
tcp_timer_keep(void *xtp)
{
	struct tcpcb *tp = xtp;
	struct tcptemp *t_template;
	struct inpcb *inp;
	CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
	int ostate;

	ostate = tp->t_state;
#endif
	INP_INFO_WLOCK(&V_tcbinfo);
	inp = tp->t_inpcb;
	/*
	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
	 * tear-down mean we need it as a work-around for races between
	 * timers and tcp_discardcb().
	 *
	 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
	 */
	if (inp == NULL) {
		tcp_timer_race++;
		INP_INFO_WUNLOCK(&V_tcbinfo);
		CURVNET_RESTORE();
		return;
	}
	INP_WLOCK(inp);
	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_keep)
	    || !callout_active(&tp->t_timers->tt_keep)) {
		INP_WUNLOCK(inp);
		INP_INFO_WUNLOCK(&V_tcbinfo);
		CURVNET_RESTORE();
		return;
	}
	callout_deactivate(&tp->t_timers->tt_keep);
	/*
	 * Keep-alive timer went off; send something
	 * or drop connection if idle for too long.
	 */
	TCPSTAT_INC(tcps_keeptimeo);
	if (tp->t_state < TCPS_ESTABLISHED)
		goto dropit;
	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
	    tp->t_state <= TCPS_CLOSING) {
		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
			goto dropit;
		/*
		 * Send a packet designed to force a response
		 * if the peer is up and reachable:
		 * either an ACK if the connection is still alive,
		 * or an RST if the peer has closed the connection
		 * due to timeout or reboot.
		 * Using sequence number tp->snd_una-1
		 * causes the transmitted zero-length segment
		 * to lie outside the receive window;
		 * by the protocol spec, this requires the
		 * correspondent TCP to respond.
		 */
		TCPSTAT_INC(tcps_keepprobe);
		t_template = tcpip_maketemplate(inp);
		if (t_template) {
			tcp_respond(tp, t_template->tt_ipgen,
				    &t_template->tt_t, (struct mbuf *)NULL,
				    tp->rcv_nxt, tp->snd_una - 1, 0);
			free(t_template, M_TEMP);
		}
		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
		    tcp_timer_keep, tp, INP_CPU(inp));
	} else
		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
		    tcp_timer_keep, tp, INP_CPU(inp));

#ifdef TCPDEBUG
	if (inp->inp_socket->so_options & SO_DEBUG)
		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
			  PRU_SLOWTIMO);
#endif
	INP_WUNLOCK(inp);
	INP_INFO_WUNLOCK(&V_tcbinfo);
	CURVNET_RESTORE();
	return;

dropit:
	TCPSTAT_INC(tcps_keepdrops);
	tp = tcp_drop(tp, ETIMEDOUT);

#ifdef TCPDEBUG
	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
			  PRU_SLOWTIMO);
#endif
	if (tp != NULL)
		INP_WUNLOCK(tp->t_inpcb);
	INP_INFO_WUNLOCK(&V_tcbinfo);
	CURVNET_RESTORE();
}
Example #13
0
void
tcp_timer_2msl(void *xtp)
{
	struct tcpcb *tp = xtp;
	struct inpcb *inp;
	CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
	int ostate;

	ostate = tp->t_state;
#endif
	/*
	 * XXXRW: Does this actually happen?
	 */
	INP_INFO_WLOCK(&V_tcbinfo);
	inp = tp->t_inpcb;
	/*
	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
	 * tear-down mean we need it as a work-around for races between
	 * timers and tcp_discardcb().
	 *
	 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
	 */
	if (inp == NULL) {
		tcp_timer_race++;
		INP_INFO_WUNLOCK(&V_tcbinfo);
		CURVNET_RESTORE();
		return;
	}
	INP_WLOCK(inp);
	tcp_free_sackholes(tp);
	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_2msl) ||
	    !callout_active(&tp->t_timers->tt_2msl)) {
		INP_WUNLOCK(tp->t_inpcb);
		INP_INFO_WUNLOCK(&V_tcbinfo);
		CURVNET_RESTORE();
		return;
	}
	callout_deactivate(&tp->t_timers->tt_2msl);
	/*
	 * 2 MSL timeout in shutdown went off.  If we're closed but
	 * still waiting for peer to close and connection has been idle
	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
	 * control block.  Otherwise, check again in a bit.
	 *
	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 
	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 
	 * Ignore fact that there were recent incoming segments.
	 */
	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
	    tp->t_inpcb && tp->t_inpcb->inp_socket && 
	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
		TCPSTAT_INC(tcps_finwait2_drops);
		tp = tcp_close(tp);             
	} else {
		if (tp->t_state != TCPS_TIME_WAIT &&
		   ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
		       callout_reset_on(&tp->t_timers->tt_2msl,
			   TP_KEEPINTVL(tp), tcp_timer_2msl, tp, INP_CPU(inp));
	       else
		       tp = tcp_close(tp);
       }

#ifdef TCPDEBUG
	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
			  PRU_SLOWTIMO);
#endif
	if (tp != NULL)
		INP_WUNLOCK(inp);
	INP_INFO_WUNLOCK(&V_tcbinfo);
	CURVNET_RESTORE();
}
void
tcp_timer_keep(void *xtp)
{
	struct tcpcb *tp = xtp;
	struct tcptemp *t_template;
	struct inpcb *inp;
	CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
	int ostate;

	ostate = tp->t_state;
#endif
	INP_INFO_RLOCK(&V_tcbinfo);
	inp = tp->t_inpcb;
	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
	INP_WLOCK(inp);
	if (callout_pending(&tp->t_timers->tt_keep) ||
	    !callout_active(&tp->t_timers->tt_keep)) {
		INP_WUNLOCK(inp);
		INP_INFO_RUNLOCK(&V_tcbinfo);
		CURVNET_RESTORE();
		return;
	}
	callout_deactivate(&tp->t_timers->tt_keep);
	if ((inp->inp_flags & INP_DROPPED) != 0) {
		INP_WUNLOCK(inp);
		INP_INFO_RUNLOCK(&V_tcbinfo);
		CURVNET_RESTORE();
		return;
	}
	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
	KASSERT((tp->t_timers->tt_flags & TT_KEEP) != 0,
		("%s: tp %p keep callout should be running", __func__, tp));
	/*
	 * Keep-alive timer went off; send something
	 * or drop connection if idle for too long.
	 */
	TCPSTAT_INC(tcps_keeptimeo);
	if (tp->t_state < TCPS_ESTABLISHED)
		goto dropit;
	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
	    tp->t_state <= TCPS_CLOSING) {
		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
			goto dropit;
		/*
		 * Send a packet designed to force a response
		 * if the peer is up and reachable:
		 * either an ACK if the connection is still alive,
		 * or an RST if the peer has closed the connection
		 * due to timeout or reboot.
		 * Using sequence number tp->snd_una-1
		 * causes the transmitted zero-length segment
		 * to lie outside the receive window;
		 * by the protocol spec, this requires the
		 * correspondent TCP to respond.
		 */
		TCPSTAT_INC(tcps_keepprobe);
		t_template = tcpip_maketemplate(inp);
		if (t_template) {
			tcp_respond(tp, t_template->tt_ipgen,
				    &t_template->tt_t, (struct mbuf *)NULL,
				    tp->rcv_nxt, tp->snd_una - 1, 0);
			free(t_template, M_TEMP);
		}
		if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
		    tcp_timer_keep, tp)) {
			tp->t_timers->tt_flags &= ~TT_KEEP_RST;
		}
	} else if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
		    tcp_timer_keep, tp)) {
			tp->t_timers->tt_flags &= ~TT_KEEP_RST;
		}

#ifdef TCPDEBUG
	if (inp->inp_socket->so_options & SO_DEBUG)
		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
			  PRU_SLOWTIMO);
#endif
	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
	INP_WUNLOCK(inp);
	INP_INFO_RUNLOCK(&V_tcbinfo);
	CURVNET_RESTORE();
	return;

dropit:
	TCPSTAT_INC(tcps_keepdrops);
	tp = tcp_drop(tp, ETIMEDOUT);

#ifdef TCPDEBUG
	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
			  PRU_SLOWTIMO);
#endif
	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
	if (tp != NULL)
		INP_WUNLOCK(tp->t_inpcb);
	INP_INFO_RUNLOCK(&V_tcbinfo);
	CURVNET_RESTORE();
}
Example #15
0
void
tcp_timer_persist(void *xtp)
{
	struct tcpcb *tp = xtp;
	struct inpcb *inp;
	CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
	int ostate;

	ostate = tp->t_state;
#endif
	INP_INFO_WLOCK(&V_tcbinfo);
	inp = tp->t_inpcb;
	/*
	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
	 * tear-down mean we need it as a work-around for races between
	 * timers and tcp_discardcb().
	 *
	 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
	 */
	if (inp == NULL) {
		tcp_timer_race++;
		INP_INFO_WUNLOCK(&V_tcbinfo);
		CURVNET_RESTORE();
		return;
	}
	INP_WLOCK(inp);
	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_persist)
	    || !callout_active(&tp->t_timers->tt_persist)) {
		INP_WUNLOCK(inp);
		INP_INFO_WUNLOCK(&V_tcbinfo);
		CURVNET_RESTORE();
		return;
	}
	callout_deactivate(&tp->t_timers->tt_persist);
	/*
	 * Persistance timer into zero window.
	 * Force a byte to be output, if possible.
	 */
	TCPSTAT_INC(tcps_persisttimeo);
	/*
	 * Hack: if the peer is dead/unreachable, we do not
	 * time out if the window is closed.  After a full
	 * backoff, drop the connection if the idle time
	 * (no responses to probes) reaches the maximum
	 * backoff that we would use if retransmitting.
	 */
	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
		TCPSTAT_INC(tcps_persistdrop);
		tp = tcp_drop(tp, ETIMEDOUT);
		goto out;
	}
	tcp_setpersist(tp);
	tp->t_flags |= TF_FORCEDATA;
	(void) tcp_output(tp);
	tp->t_flags &= ~TF_FORCEDATA;

out:
#ifdef TCPDEBUG
	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
#endif
	if (tp != NULL)
		INP_WUNLOCK(inp);
	INP_INFO_WUNLOCK(&V_tcbinfo);
	CURVNET_RESTORE();
}
Example #16
0
static int
tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
{
	struct inpcb *inp = tp->t_inpcb, *oinp;
	struct socket *so = inp->inp_socket;
	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
	struct in6_addr addr6;
	int error;

	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
	INP_WLOCK_ASSERT(inp);

	if (inp->inp_lport == 0) {
		error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
		if (error)
			return error;
	}

	/*
	 * Cannot simply call in_pcbconnect, because there might be an
	 * earlier incarnation of this same connection still in
	 * TIME_WAIT state, creating an ADDRINUSE error.
	 * in6_pcbladdr() also handles scope zone IDs.
	 */
	error = in6_pcbladdr(inp, nam, &addr6);
	if (error)
		return error;
	oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
				  &sin6->sin6_addr, sin6->sin6_port,
				  IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
				  ? &addr6
				  : &inp->in6p_laddr,
				  inp->inp_lport,  0, NULL);
	if (oinp)
		return EADDRINUSE;
	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
		inp->in6p_laddr = addr6;
	inp->in6p_faddr = sin6->sin6_addr;
	inp->inp_fport = sin6->sin6_port;
	/* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
	inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
	if (inp->inp_flags & IN6P_AUTOFLOWLABEL)
		inp->inp_flow |=
		    (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
	in_pcbrehash(inp);

	/* Compute window scaling to request.  */
	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
	    (TCP_MAXWIN << tp->request_r_scale) < sb_max)
		tp->request_r_scale++;

	soisconnecting(so);
	TCPSTAT_INC(tcps_connattempt);
	tp->t_state = TCPS_SYN_SENT;
	tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
	tp->iss = tcp_new_isn(tp);
	tp->t_bw_rtseq = tp->iss;
	tcp_sendseqinit(tp);

	return 0;
}
Example #17
0
/*
 * Completes some final bits of initialization for just established connections
 * and changes their state to TCPS_ESTABLISHED.
 *
 * The ISNs are from after the exchange of SYNs.  i.e., the true ISN + 1.
 */
void
make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn,
    uint16_t opt)
{
	struct inpcb *inp = toep->inp;
	struct socket *so = inp->inp_socket;
	struct tcpcb *tp = intotcpcb(inp);
	long bufsize;
	uint32_t iss = be32toh(snd_isn) - 1;	/* true ISS */
	uint32_t irs = be32toh(rcv_isn) - 1;	/* true IRS */
	uint16_t tcpopt = be16toh(opt);
	struct flowc_tx_params ftxp;

	INP_WLOCK_ASSERT(inp);
	KASSERT(tp->t_state == TCPS_SYN_SENT ||
	    tp->t_state == TCPS_SYN_RECEIVED,
	    ("%s: TCP state %s", __func__, tcpstates[tp->t_state]));

	CTR4(KTR_CXGBE, "%s: tid %d, toep %p, inp %p",
	    __func__, toep->tid, toep, inp);

	tp->t_state = TCPS_ESTABLISHED;
	tp->t_starttime = ticks;
	TCPSTAT_INC(tcps_connects);

	tp->irs = irs;
	tcp_rcvseqinit(tp);
	tp->rcv_wnd = toep->rx_credits << 10;
	tp->rcv_adv += tp->rcv_wnd;
	tp->last_ack_sent = tp->rcv_nxt;

	/*
	 * If we were unable to send all rx credits via opt0, save the remainder
	 * in rx_credits so that they can be handed over with the next credit
	 * update.
	 */
	SOCKBUF_LOCK(&so->so_rcv);
	bufsize = select_rcv_wnd(so);
	SOCKBUF_UNLOCK(&so->so_rcv);
	toep->rx_credits = bufsize - tp->rcv_wnd;

	tp->iss = iss;
	tcp_sendseqinit(tp);
	tp->snd_una = iss + 1;
	tp->snd_nxt = iss + 1;
	tp->snd_max = iss + 1;

	assign_rxopt(tp, tcpopt);

	SOCKBUF_LOCK(&so->so_snd);
	if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf)
		bufsize = V_tcp_autosndbuf_max;
	else
		bufsize = sbspace(&so->so_snd);
	SOCKBUF_UNLOCK(&so->so_snd);

	ftxp.snd_nxt = tp->snd_nxt;
	ftxp.rcv_nxt = tp->rcv_nxt;
	ftxp.snd_space = bufsize;
	ftxp.mss = tp->t_maxseg;
	send_flowc_wr(toep, &ftxp);

	soisconnected(so);
}