コード例 #1
0
ファイル: uipc_usrreq.c プロジェクト: UnitedMarsupials/kame
static int
uipc_rcvd(struct socket *so, int flags)
{
	struct unpcb *unp = sotounpcb(so);
	struct socket *so2;

	if (unp == 0)
		return EINVAL;
	switch (so->so_type) {
	case SOCK_DGRAM:
		panic("uipc_rcvd DGRAM?");
		/*NOTREACHED*/

	case SOCK_STREAM:
		if (unp->unp_conn == 0)
			break;
		so2 = unp->unp_conn->unp_socket;
		/*
		 * Adjust backpressure on sender
		 * and wakeup any waiting to write.
		 */
		so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt;
		unp->unp_mbcnt = so->so_rcv.sb_mbcnt;
		so2->so_snd.sb_hiwat += unp->unp_cc - so->so_rcv.sb_cc;
		(void)chgsbsize(so2->so_cred->cr_uid,
		    (rlim_t)unp->unp_cc - so->so_rcv.sb_cc);
		unp->unp_cc = so->so_rcv.sb_cc;
		sowwakeup(so2);
		break;

	default:
		panic("uipc_rcvd unknown socktype");
	}
	return 0;
}
コード例 #2
0
void
soisconnected(struct socket *so)
{
	struct socket	*head;

	head = so->so_head;

	KASSERT(solocked(so));
	KASSERT(head == NULL || solocked2(so, head));

	so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING);
	so->so_state |= SS_ISCONNECTED;
	if (head && so->so_onq == &head->so_q0) {
		if ((so->so_options & SO_ACCEPTFILTER) == 0) {
			soqremque(so, 0);
			soqinsque(head, so, 1);
			sorwakeup(head);
			cv_broadcast(&head->so_cv);
		} else {
			so->so_upcall =
			    head->so_accf->so_accept_filter->accf_callback;
			so->so_upcallarg = head->so_accf->so_accept_filter_arg;
			so->so_rcv.sb_flags |= SB_UPCALL;
			so->so_options &= ~SO_ACCEPTFILTER;
			(*so->so_upcall)(so, so->so_upcallarg,
					 POLLIN|POLLRDNORM, M_DONTWAIT);
		}
	} else {
		cv_broadcast(&so->so_cv);
		sorwakeup(so);
		sowwakeup(so);
	}
}
コード例 #3
0
static void
sdp_nagle_timeout(void *data)
{
	struct sdp_sock *ssk = (struct sdp_sock *)data;
	struct socket *sk = ssk->socket;

	sdp_dbg_data(sk, "last_unacked = %ld\n", ssk->nagle_last_unacked);

	if (!callout_active(&ssk->nagle_timer))
		return;
	callout_deactivate(&ssk->nagle_timer);

	if (!ssk->nagle_last_unacked)
		goto out;
	if (ssk->state == TCPS_CLOSED)
		return;
	ssk->nagle_last_unacked = 0;
	sdp_post_sends(ssk, M_NOWAIT);

	sowwakeup(ssk->socket);
out:
	if (sk->so_snd.sb_sndptr)
		callout_reset(&ssk->nagle_timer, SDP_NAGLE_TIMEOUT,
		    sdp_nagle_timeout, ssk);
}
コード例 #4
0
ファイル: udp_usrreq.c プロジェクト: iHaD/DragonFlyBSD
/*
 * Notify a udp user of an asynchronous error;
 * just wake up so that he can collect error status.
 */
void
udp_notify(struct inpcb *inp, int error)
{
	inp->inp_socket->so_error = error;
	sorwakeup(inp->inp_socket);
	sowwakeup(inp->inp_socket);
}
コード例 #5
0
ファイル: uipc_socket2.c プロジェクト: ajinkya93/OpenBSD
void
socantsendmore(struct socket *so)
{

	so->so_state |= SS_CANTSENDMORE;
	sowwakeup(so);
}
コード例 #6
0
ファイル: sdp_tx.c プロジェクト: AhmadTux/freebsd
static int
sdp_process_tx_cq(struct sdp_sock *ssk)
{
	struct ib_wc ibwc[SDP_NUM_WC];
	int n, i;
	int wc_processed = 0;

	SDP_WLOCK_ASSERT(ssk);

	if (!ssk->tx_ring.cq) {
		sdp_dbg(ssk->socket, "tx irq on destroyed tx_cq\n");
		return 0;
	}

	do {
		n = ib_poll_cq(ssk->tx_ring.cq, SDP_NUM_WC, ibwc);
		for (i = 0; i < n; ++i) {
			sdp_process_tx_wc(ssk, ibwc + i);
			wc_processed++;
		}
	} while (n == SDP_NUM_WC);

	if (wc_processed) {
		sdp_post_sends(ssk, M_DONTWAIT);
		sdp_prf1(sk, NULL, "Waking sendmsg. inflight=%d", 
				(u32) tx_ring_posted(ssk));
		sowwakeup(ssk->socket);
	}

	return wc_processed;
}
コード例 #7
0
/*
 * socantsendmore: indicates that no more data will be sent on the
 * socket; it would normally be applied to a socket when the user
 * informs the system that no more data is to be sent, by the protocol
 * code (in case pr_shutdown()).
 */
void
socantsendmore(struct socket *so)
{
	KASSERT(solocked(so));

	so->so_state |= SS_CANTSENDMORE;
	sowwakeup(so);
}
コード例 #8
0
ファイル: rfcomm_socket.c プロジェクト: ryo/netbsd-src
/*
 * rfcomm_complete(rfcomm_dlc, length)
 *
 * length bytes are sent and may be removed from socket buffer
 */
static void
rfcomm_complete(void *arg, int length)
{
	struct socket *so = arg;

	sbdrop(&so->so_snd, length);
	sowwakeup(so);
}
コード例 #9
0
ファイル: sdp_rx.c プロジェクト: Lxg1582/freebsd
/* Wakeup writers if we now have credits. */
static void
sdp_bzcopy_write_space(struct sdp_sock *ssk)
{
	struct socket *sk = ssk->socket;

	if (tx_credits(ssk) >= ssk->min_bufs && sk)
		sowwakeup(sk);
}
コード例 #10
0
ファイル: l2cap_socket.c プロジェクト: sofuture/bitrig
static void
l2cap_complete(void *arg, int count)
{
	struct socket *so = arg;

	while (count-- > 0)
		sbdroprecord(&so->so_snd);

	sowwakeup(so);
}
コード例 #11
0
static void
sco_complete(void *arg, int num)
{
	struct socket *so = arg;

	while (num-- > 0)
		sbdroprecord(&so->so_snd);

	sowwakeup(so);
}
コード例 #12
0
ファイル: uipc_socket2.c プロジェクト: ajinkya93/OpenBSD
void
soisdisconnecting(struct socket *so)
{

	so->so_state &= ~SS_ISCONNECTING;
	so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
	wakeup(&so->so_timeo);
	sowwakeup(so);
	sorwakeup(so);
}
コード例 #13
0
ファイル: uipc_socket2.c プロジェクト: 0871087123/rtems
void
soisdisconnected(struct socket *so)
{

	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
	soconnwakeup(so);
	sowwakeup(so);
	sorwakeup(so);
}
コード例 #14
0
void
soisdisconnected(struct socket *so)
{

	KASSERT(solocked(so));

	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
	cv_broadcast(&so->so_cv);
	sowwakeup(so);
	sorwakeup(so);
}
コード例 #15
0
ファイル: sdp_tx.c プロジェクト: AhmadTux/freebsd
static inline void
sdp_process_tx_wc(struct sdp_sock *ssk, struct ib_wc *wc)
{

	if (likely(wc->wr_id & SDP_OP_SEND)) {
		sdp_handle_send_comp(ssk, wc);
		return;
	}

#ifdef SDP_ZCOPY
	if (wc->wr_id & SDP_OP_RDMA) {
		/* TODO: handle failed RDMA read cqe */

		sdp_dbg_data(ssk->socket,
	 	    "TX comp: RDMA read. status: %d\n", wc->status);
		sdp_prf1(sk, NULL, "TX comp: RDMA read");

		if (!ssk->tx_ring.rdma_inflight) {
			sdp_warn(ssk->socket, "ERROR: unexpected RDMA read\n");
			return;
		}

		if (!ssk->tx_ring.rdma_inflight->busy) {
			sdp_warn(ssk->socket,
			    "ERROR: too many RDMA read completions\n");
			return;
		}

		/* Only last RDMA read WR is signalled. Order is guaranteed -
		 * therefore if Last RDMA read WR is completed - all other
		 * have, too */
		ssk->tx_ring.rdma_inflight->busy = 0;
		sowwakeup(ssk->socket);
		sdp_dbg_data(ssk->socket, "woke up sleepers\n");
		return;
	}
#endif

	/* Keepalive probe sent cleanup */
	sdp_cnt(sdp_keepalive_probes_sent);

	if (likely(!wc->status))
		return;

	sdp_dbg(ssk->socket, " %s consumes KEEPALIVE status %d\n",
			__func__, wc->status);

	if (wc->status == IB_WC_WR_FLUSH_ERR)
		return;

	sdp_notify(ssk, ECONNRESET);
}
コード例 #16
0
void
soisdisconnecting(struct socket *so)
{

	KASSERT(solocked(so));

	so->so_state &= ~SS_ISCONNECTING;
	so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
#if 0
	cv_broadcast(&so->so_cv);
#endif
	sowwakeup(so);
	sorwakeup(so);
}
コード例 #17
0
ファイル: uipc_socket2.c プロジェクト: ajinkya93/OpenBSD
void
soisconnected(struct socket *so)
{
	struct socket *head = so->so_head;

	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
	so->so_state |= SS_ISCONNECTED;
	if (head && soqremque(so, 0)) {
		soqinsque(head, so, 1);
		sorwakeup(head);
		wakeup_one(&head->so_timeo);
	} else {
		wakeup(&so->so_timeo);
		sorwakeup(so);
		sowwakeup(so);
	}
}
コード例 #18
0
ファイル: udp_usrreq.c プロジェクト: ChristosKa/freebsd
/*
 * Notify a udp user of an asynchronous error; just wake up so that they can
 * collect error status.
 */
struct inpcb *
udp_notify(struct inpcb *inp, int errno)
{

	/*
	 * While udp_ctlinput() always calls udp_notify() with a read lock
	 * when invoking it directly, in_pcbnotifyall() currently uses write
	 * locks due to sharing code with TCP.  For now, accept either a read
	 * or a write lock, but a read lock is sufficient.
	 */
	INP_LOCK_ASSERT(inp);

	inp->inp_socket->so_error = errno;
	sorwakeup(inp->inp_socket);
	sowwakeup(inp->inp_socket);
	return (inp);
}
コード例 #19
0
static int
soo_drain(struct fileproc *fp, __unused vfs_context_t ctx)
{
    int error = 0;
    struct socket *so = (struct socket *)fp->f_fglob->fg_data;

    if (so) {
        socket_lock(so, 1);
        so->so_state |= SS_DRAINING;

        wakeup((caddr_t)&so->so_timeo);
        sorwakeup(so);
        sowwakeup(so);

        socket_unlock(so, 1);
    }

    return (error);
}
コード例 #20
0
ファイル: uipc_socket2.c プロジェクト: 0871087123/rtems
void
soisconnected(struct socket *so)
{
	register struct socket *head = so->so_head;

	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
	so->so_state |= SS_ISCONNECTED;
	if (head && (so->so_state & SS_INCOMP)) {
		TAILQ_REMOVE(&head->so_incomp, so, so_list);
		head->so_incqlen--;
		so->so_state &= ~SS_INCOMP;
		TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
		so->so_state |= SS_COMP;
		sorwakeup(head);
		soconnwakeup(head);
	} else {
		soconnwakeup(so);
		sorwakeup(so);
		sowwakeup(so);
	}
}
コード例 #21
0
ファイル: usnet_tcp_input.c プロジェクト: venkynv/libusnet
/*
 * TCP input routine, follows pages 65-76 of the
 * protocol specification dated September, 1981 very closely.
 */
void
tcp_input(usn_mbuf_t *m, int iphlen)
{
	struct tcpiphdr *ti;
	struct inpcb *inp;
	u_char *optp = NULL;
	int optlen;
	int len, tlen, off;
	struct tcpcb *tp = 0;
	int tiflags;
	struct usn_socket *so = 0;
	int todrop, acked, ourfinisacked;
   int needoutput = 0;
	short ostate;
	struct usn_in_addr laddr;
	int dropsocket = 0;
	int iss = 0;
	u_long tiwin, ts_val, ts_ecr;
	int ts_present = 0;

   (void)needoutput;
	g_tcpstat.tcps_rcvtotal++;
 
	// Get IP and TCP header together in first mbuf.
	// Note: IP leaves IP header in first mbuf.
	ti = mtod(m, struct tcpiphdr *);
	if (iphlen > sizeof (usn_ip_t))
		ip_stripoptions(m, (usn_mbuf_t *)0);
	if (m->mlen < sizeof (struct tcpiphdr)) {
		if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
			g_tcpstat.tcps_rcvshort++;
			return;
		}
		ti = mtod(m, struct tcpiphdr *);
	}

#ifdef DUMP_PAYLOAD
   dump_chain(m,"tcp");
#endif

   /*
	 * Checksum extended TCP header and data.
    */
	tlen = ntohs(((usn_ip_t *)ti)->ip_len);
	len = sizeof (usn_ip_t) + tlen;
	ti->ti_next = ti->ti_prev = 0;
	ti->ti_x1 = 0;
	ti->ti_len = (u_short)tlen;
	HTONS(ti->ti_len);
   ti->ti_sum = in_cksum(m, len);
	if (ti->ti_sum) {
		g_tcpstat.tcps_rcvbadsum++;
		goto drop;
	}
   /*
	 * Check that TCP offset makes sense,
	 * pull out TCP options and adjust length. XXX
    */
	off = ti->ti_off << 2;
	if (off < sizeof (struct tcphdr) || off > tlen) {
		g_tcpstat.tcps_rcvbadoff++;
		goto drop;
	}
	tlen -= off;
	ti->ti_len = tlen;
	if (off > sizeof (struct tcphdr)) {
		if (m->mlen < sizeof(usn_ip_t) + off) {
			if ((m = m_pullup(m, sizeof (usn_ip_t) + off)) == 0) {
				g_tcpstat.tcps_rcvshort++;
				return;
			}
			ti = mtod(m, struct tcpiphdr *);
		}
		optlen = off - sizeof (struct tcphdr);
		optp = mtod(m, u_char *) + sizeof (struct tcpiphdr);

      //	Do quick retrieval of timestamp options ("options
      // prediction?"). If timestamp is the only option and it's
      // formatted as recommended in RFC 1323 appendix A, we
      // quickly get the values now and not bother calling
      // tcp_dooptions(), etc.
		if ((optlen == TCPOLEN_TSTAMP_APPA ||
		     (optlen > TCPOLEN_TSTAMP_APPA &&
			optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
		     *(u_int *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
		     (ti->ti_flags & TH_SYN) == 0) {
			ts_present = 1;
			ts_val = ntohl(*(u_long *)(optp + 4));
			ts_ecr = ntohl(*(u_long *)(optp + 8));
			optp = NULL;	// we've parsed the options
		}
	}
	tiflags = ti->ti_flags;

	// Convert TCP protocol specific fields to host format.
	NTOHL(ti->ti_seq);
	NTOHL(ti->ti_ack);
	NTOHS(ti->ti_win);
	NTOHS(ti->ti_urp);

	// Locate pcb for segment.
findpcb:
	inp = g_tcp_last_inpcb;
	if (inp->inp_lport != ti->ti_dport ||
	    inp->inp_fport != ti->ti_sport ||
	    inp->inp_faddr.s_addr != ti->ti_src.s_addr ||
	    inp->inp_laddr.s_addr != ti->ti_dst.s_addr) {
		inp = in_pcblookup(&g_tcb, ti->ti_src, ti->ti_sport,
		    ti->ti_dst, ti->ti_dport, INPLOOKUP_WILDCARD);
		if (inp)
			g_tcp_last_inpcb = inp;
		++g_tcpstat.tcps_pcbcachemiss;
	}

	// If the state is CLOSED (i.e., TCB does not exist) then
	// all data in the incoming segment is discarded.
	// If the TCB exists but is in CLOSED state, it is embryonic,
	// but should either do a listen or a connect soon.
	if (inp == 0)
		goto dropwithreset;

	tp = intotcpcb(inp);

   DEBUG("found inp cb, laddr=%x, lport=%d, faddr=%x,"
         " fport=%d, tp_state=%d, tp_flags=%d",
         inp->inp_laddr.s_addr,
         inp->inp_lport,
         inp->inp_faddr.s_addr,
         inp->inp_fport, tp->t_state, tp->t_flags);

	if (tp == 0)
		goto dropwithreset;
	if (tp->t_state == TCPS_CLOSED)
		goto drop;
	
	// Unscale the window into a 32-bit value. 
	if ((tiflags & TH_SYN) == 0)
		tiwin = ti->ti_win << tp->snd_scale;
	else
		tiwin = ti->ti_win;

	so = inp->inp_socket;
   DEBUG("socket info, options=%x", so->so_options);

	if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
		if (so->so_options & SO_DEBUG) {
			ostate = tp->t_state;
			g_tcp_saveti = *ti;
		}
		if (so->so_options & SO_ACCEPTCONN) {
			if ((tiflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) {
				// Note: dropwithreset makes sure we don't
				// send a reset in response to a RST.
				if (tiflags & TH_ACK) {
					g_tcpstat.tcps_badsyn++;
					goto dropwithreset;
				}
            DEBUG("SYN is expected, tiflags=%d", tiflags);
				goto drop;
			}
			so = sonewconn(so, 0);
			if (so == 0) {
            DEBUG("failed to create new connection, tiflags=%d", tiflags);
				goto drop;
         }

			// Mark socket as temporary until we're
			// committed to keeping it.  The code at
			// ``drop'' and ``dropwithreset'' check the
			// flag dropsocket to see if the temporary
			// socket created here should be discarded.
			// We mark the socket as discardable until
			// we're committed to it below in TCPS_LISTEN.
			dropsocket++;
			inp = (struct inpcb *)so->so_pcb;
			inp->inp_laddr = ti->ti_dst;
			inp->inp_lport = ti->ti_dport;

         // BSD >= 4.3
			inp->inp_options = ip_srcroute();

			tp = intotcpcb(inp);
			tp->t_state = TCPS_LISTEN;

			// Compute proper scaling value from buffer space
			while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
			   TCP_MAXWIN << tp->request_r_scale < so->so_rcv->sb_hiwat)
				tp->request_r_scale++;
		}
	}

	// Segment received on connection.
	// Reset idle time and keep-alive timer.
	tp->t_idle = 0;
	tp->t_timer[TCPT_KEEP] = g_tcp_keepidle;

	// Process options if not in LISTEN state,
	// else do it below (after getting remote address).
	if (optp && tp->t_state != TCPS_LISTEN)
		tcp_dooptions(tp, optp, optlen, ti,
			&ts_present, &ts_val, &ts_ecr);

	// Header prediction: check for the two common cases
	// of a uni-directional data xfer.  If the packet has
	// no control flags, is in-sequence, the window didn't
	// change and we're not retransmitting, it's a
	// candidate.  If the length is zero and the ack moved
	// forward, we're the sender side of the xfer.  Just
	// free the data acked & wake any higher level process
	// that was blocked waiting for space.  If the length
	// is non-zero and the ack didn't move, we're the
	// receiver side.  If we're getting packets in-order
	// (the reassembly queue is empty), add the data to
	// the socket buffer and note that we need a delayed ack.
	if (tp->t_state == TCPS_ESTABLISHED &&
	    (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
	    (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) &&
	    ti->ti_seq == tp->rcv_nxt &&
	    tiwin && tiwin == tp->snd_wnd &&
	    tp->snd_nxt == tp->snd_max) {
		// If last ACK falls within this segment's sequence numbers,
		// record the timestamp.
      if ( ts_present && TSTMP_GEQ(ts_val, tp->ts_recent) &&
            SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) ){
			tp->ts_recent_age = g_tcp_now;
			tp->ts_recent = ts_val;
		}

		if (ti->ti_len == 0) {
			if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
			    SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
			    tp->snd_cwnd >= tp->snd_wnd) {
				// this is a pure ack for outstanding data.
				++g_tcpstat.tcps_predack;
				if (ts_present)
					tcp_xmit_timer(tp, g_tcp_now-ts_ecr+1);
				else if (tp->t_rtt &&
					    SEQ_GT(ti->ti_ack, tp->t_rtseq))
					tcp_xmit_timer(tp, tp->t_rtt);

				acked = ti->ti_ack - tp->snd_una;
				g_tcpstat.tcps_rcvackpack++;
				g_tcpstat.tcps_rcvackbyte += acked;
            TRACE("drop so_snd buffer, drop_bytes=%d, len=%d", 
                  acked, so->so_snd.sb_cc);

				sbdrop(so->so_snd, acked);
				tp->snd_una = ti->ti_ack;
				usn_free_cmbuf(m);

				// If all outstanding data are acked, stop
				// retransmit timer, otherwise restart timer
				// using current (possibly backed-off) value.
				// If process is waiting for space,
				// wakeup/selwakeup/signal.  If data
				// are ready to send, let tcp_output
				// decide between more output or persist.
				if (tp->snd_una == tp->snd_max)
					tp->t_timer[TCPT_REXMT] = 0;
				else if (tp->t_timer[TCPT_PERSIST] == 0)
					tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;

	         if (so->so_options & SO_DEBUG)
             	tcp_trace(TA_INPUT, ostate, tp, &g_tcp_saveti, 0);

				//if (so->so_snd->sb_flags & SB_NOTIFY) {
            //   usnet_tcpin_wwakeup(so, USN_TCP_IN, usn_tcpev_sbnotify, 0);
				//	sowwakeup(so);
            //}

            // send buffer is available for app thread. 
            usnet_tcpin_wwakeup(so, USN_TCP_IN, USN_TCPEV_WRITE, 0);

				if (so->so_snd->sb_cc)
					tcp_output(tp);
				return;
			}
		} else if (ti->ti_ack == tp->snd_una &&
		    tp->seg_next == (struct tcpiphdr *)tp &&
		    ti->ti_len <= sbspace(so->so_rcv)) {

			// this is a pure, in-sequence data packet
			// with nothing on the reassembly queue and
			// we have enough buffer space to take it.
			++g_tcpstat.tcps_preddat;
			tp->rcv_nxt += ti->ti_len;
			g_tcpstat.tcps_rcvpack++;
			g_tcpstat.tcps_rcvbyte += ti->ti_len;

			// Drop TCP, IP headers and TCP options then add data
			// to socket buffer.
			m->head += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
			m->mlen -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);

         TRACE("add data to rcv buf");
			sbappend(so->so_rcv, m);
			sorwakeup(so);

         // new data is available for app threads.
         usnet_tcpin_rwakeup(so, USN_TCP_IN, USN_TCPEV_READ, m);

	      if (so->so_options & SO_DEBUG) {
            TRACE("tcp trace, so_options=%d", so->so_options);
          	tcp_trace(TA_INPUT, ostate, tp, &g_tcp_saveti, 0);
         }

			tp->t_flags |= TF_DELACK;
			return;
		}
	}

	// Drop TCP, IP headers and TCP options.
	m->head += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
	m->mlen -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);

	// Calculate amount of space in receive window,
	// and then do TCP input processing.
	// Receive window is amount of space in rcv queue,
	// but not less than advertised window.
   {
	   int win;
	   win = sbspace(so->so_rcv);
	   if (win < 0)
	      win = 0;
  	   tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt));
	}

	switch (tp->t_state) {
	// If the state is LISTEN then ignore segment if it contains an RST.
	// If the segment contains an ACK then it is bad and send a RST.
	// If it does not contain a SYN then it is not interesting; drop it.
	// Don't bother responding if the destination was a broadcast.
	// Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
	// tp->iss, and send a segment:
	//     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
	// Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
	// Fill in remote peer address fields if not previously specified.
	// Enter SYN_RECEIVED state, and process any other fields of this
	// segment in this state.
	case TCPS_LISTEN: {
		usn_mbuf_t *am;
		struct usn_sockaddr_in *sin;

		if (tiflags & TH_RST)
			goto drop;
		if (tiflags & TH_ACK)
			goto dropwithreset;
		if ((tiflags & TH_SYN) == 0)
			goto drop;

		// RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
		// in_broadcast() should never return true on a received
		// packet with M_BCAST not set.

		//if (m->m_flags & (M_BCAST|M_MCAST) ||
		//    IN_MULTICAST(ntohl(ti->ti_dst.s_addr)))
		//	goto drop;

		am = usn_get_mbuf(0, BUF_MSIZE, 0);	// XXX: the size!
		if (am == NULL)
			goto drop;
		am->mlen = sizeof (struct usn_sockaddr_in);
		sin = mtod(am, struct usn_sockaddr_in *);
		sin->sin_family = AF_INET;
		sin->sin_len = sizeof(*sin);
		sin->sin_addr = ti->ti_src;
		sin->sin_port = ti->ti_sport;
		bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));

		laddr = inp->inp_laddr;
		if (inp->inp_laddr.s_addr == USN_INADDR_ANY)
			inp->inp_laddr = ti->ti_dst;

		if (in_pcbconnect(inp, am)) {
			inp->inp_laddr = laddr;
			usn_free_mbuf(am);
			goto drop;
		}
		usn_free_mbuf(am);
		tp->t_template = tcp_template(tp);
		if (tp->t_template == 0) {
			tp = tcp_drop(tp, ENOBUFS);
			dropsocket = 0;		// socket is already gone
			goto drop;
		}
		if (optp)
			tcp_dooptions(tp, optp, optlen, ti,
				&ts_present, &ts_val, &ts_ecr);
		if (iss)
			tp->iss = iss;
		else
			tp->iss = g_tcp_iss;
		g_tcp_iss += TCP_ISSINCR/4;
		tp->irs = ti->ti_seq;
		tcp_sendseqinit(tp);
		tcp_rcvseqinit(tp);
		tp->t_flags |= TF_ACKNOW;
      TRACE("change tcp state to TCPS_SYN_RECEIVED, state=%d, tp_flags=%d",
            tp->t_state, tp->t_flags);
		tp->t_state = TCPS_SYN_RECEIVED;

      // tcp event
      usnet_tcpin_ewakeup(so, USN_TCP_IN, USN_TCPST_SYN_RECEIVED, 0);

		tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
		dropsocket = 0;		// committed to socket
		g_tcpstat.tcps_accepts++;
		goto trimthenstep6;
	}


	// If the state is SYN_SENT:
	//	if seg contains an ACK, but not for our SYN, drop the input.
	//	if seg contains a RST, then drop the connection.
	//	if seg does not contain SYN, then drop it.
	// Otherwise this is an acceptable SYN segment
	//	initialize tp->rcv_nxt and tp->irs
	//	if seg contains ack then advance tp->snd_una
	//	if SYN has been acked change to ESTABLISHED else SYN_RCVD state
	//	arrange for segment to be acked (eventually)
	//	continue processing rest of data/controls, beginning with URG
	case TCPS_SYN_SENT:
		if ((tiflags & TH_ACK) &&
		    (SEQ_LEQ(ti->ti_ack, tp->iss) ||
		     SEQ_GT(ti->ti_ack, tp->snd_max)))
			goto dropwithreset;
		if (tiflags & TH_RST) {
			if (tiflags & TH_ACK)
				tp = tcp_drop(tp, ECONNREFUSED);
			goto drop;
		}
		if ((tiflags & TH_SYN) == 0)
			goto drop;
		if (tiflags & TH_ACK) {
			tp->snd_una = ti->ti_ack;
			if (SEQ_LT(tp->snd_nxt, tp->snd_una))
				tp->snd_nxt = tp->snd_una;
		   tp->t_timer[TCPT_REXMT] = 0; 
		}
		
		tp->irs = ti->ti_seq;
		tcp_rcvseqinit(tp);
		tp->t_flags |= TF_ACKNOW;
      TRACE("ack now, tp flags=%d", tp->t_flags);

      // XXX: remove second test.
		if (tiflags & TH_ACK /*&& SEQ_GT(tp->snd_una, tp->iss)*/) {
			g_tcpstat.tcps_connects++;
			soisconnected(so);
         TRACE("change tcp state to TCPS_ESTABLISHED,"
               " state=%d, tp_flags=%d", tp->t_state, tp->t_flags);
			tp->t_state = TCPS_ESTABLISHED;

			// Do window scaling on this connection?
			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
				tp->snd_scale = tp->requested_s_scale;
				tp->rcv_scale = tp->request_r_scale;
			}
			tcp_reass(tp, (struct tcpiphdr *)0, (usn_mbuf_t *)0);

			// if we didn't have to retransmit the SYN,
			// use its rtt as our initial srtt & rtt var.
			if (tp->t_rtt)
				tcp_xmit_timer(tp, tp->t_rtt);
		} else {
         TRACE("change tcp state to TCPS_SYN_RECEIVED, state=%d, tp_flags=%d", 
               tp->t_state, tp->t_flags);
			tp->t_state = TCPS_SYN_RECEIVED;
         // tcp event
         usnet_tcpin_ewakeup(so, USN_TCP_IN, USN_TCPST_SYN_RECEIVED, 0);
      }

trimthenstep6:

		// Advance ti->ti_seq to correspond to first data byte.
		// If data, trim to stay within window,
		// dropping FIN if necessary.
		ti->ti_seq++;
		if (ti->ti_len > tp->rcv_wnd) {
			todrop = ti->ti_len - tp->rcv_wnd;
			m_adj(m, -todrop);
			ti->ti_len = tp->rcv_wnd;
			tiflags &= ~TH_FIN;
			g_tcpstat.tcps_rcvpackafterwin++;
			g_tcpstat.tcps_rcvbyteafterwin += todrop;
		}
		tp->snd_wl1 = ti->ti_seq - 1;
		tp->rcv_up = ti->ti_seq;
		goto step6;
	}

	// States other than LISTEN or SYN_SENT.
	// First check timestamp, if present.
	// Then check that at least some bytes of segment are within 
	// receive window.  If segment begins before rcv_nxt,
	// drop leading data (and SYN); if nothing left, just ack.
	// 
	// RFC 1323 PAWS: If we have a timestamp reply on this segment
	// and it's less than ts_recent, drop it.
	if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
	    TSTMP_LT(ts_val, tp->ts_recent)) {
		// Check to see if ts_recent is over 24 days old.
		if ((int)(g_tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
			// Invalidate ts_recent.  If this segment updates
			// ts_recent, the age will be reset later and ts_recent
			// will get a valid value.  If it does not, setting
			// ts_recent to zero will at least satisfy the
			// requirement that zero be placed in the timestamp
			// echo reply when ts_recent isn't valid.  The
			// age isn't reset until we get a valid ts_recent
			// because we don't want out-of-order segments to be
			// dropped when ts_recent is old.
			tp->ts_recent = 0;
		} else {
			g_tcpstat.tcps_rcvduppack++;
			g_tcpstat.tcps_rcvdupbyte += ti->ti_len;
			g_tcpstat.tcps_pawsdrop++;
			goto dropafterack;
		}
	}

	todrop = tp->rcv_nxt - ti->ti_seq;
	if (todrop > 0) {
		if (tiflags & TH_SYN) {
			tiflags &= ~TH_SYN;
			ti->ti_seq++;
			if (ti->ti_urp > 1) 
				ti->ti_urp--;
			else
				tiflags &= ~TH_URG;
			todrop--;
		}
      if ( todrop >= ti->ti_len || 
           ( todrop == ti->ti_len && (tiflags & TH_FIN ) == 0 ) ) {
         // Any valid FIN must be to the left of the window.
         // At this point the FIN must be a duplicate or
         // out of sequence; drop it.
         tiflags &= ~TH_FIN;
         // Send an ACK to resynchronize and drop any data
         // But keep on processing for RST or ACK.
         tp->t_flags |= TF_ACKNOW;
         TRACE("send ack now to resync, tp_flags=%d", tp->t_flags);
         todrop = ti->ti_len;
         g_tcpstat.tcps_rcvdupbyte += ti->ti_len;
         g_tcpstat.tcps_rcvduppack++;
      } else {
         g_tcpstat.tcps_rcvpartduppack++;
         g_tcpstat.tcps_rcvpartdupbyte += ti->ti_len;
      }

		m_adj(m, todrop);
		ti->ti_seq += todrop;
		ti->ti_len -= todrop;
		if (ti->ti_urp > todrop)
			ti->ti_urp -= todrop;
		else {
			tiflags &= ~TH_URG;
			ti->ti_urp = 0;
		}
	}

	// If new data are received on a connection after the
	// user processes are gone, then RST the other end.
	if ((so->so_state & USN_NOFDREF) && 
	    tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
		tp = tcp_close(tp);
		g_tcpstat.tcps_rcvafterclose++;
		goto dropwithreset;
	}


	// If segment ends after window, drop trailing data
	// (and PUSH and FIN); if nothing left, just ACK.
	todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
	if (todrop > 0) {
		g_tcpstat.tcps_rcvpackafterwin++;
		if (todrop >= ti->ti_len) {
			g_tcpstat.tcps_rcvbyteafterwin += ti->ti_len;

			// If a new connection request is received
			// while in TIME_WAIT, drop the old connection
			// and start over if the sequence numbers
			// are above the previous ones.
			if (tiflags & TH_SYN &&
			    tp->t_state == TCPS_TIME_WAIT &&
			    SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
				iss = tp->snd_nxt + TCP_ISSINCR;
				tp = tcp_close(tp);
				goto findpcb;
			}

			// If window is closed can only take segments at
			// window edge, and have to drop data and PUSH from
			// incoming segments.  Continue processing, but
			// remember to ack.  Otherwise, drop segment
			// and ack.
			if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
				tp->t_flags |= TF_ACKNOW;
				g_tcpstat.tcps_rcvwinprobe++;
			} else
				goto dropafterack;
		} else
			g_tcpstat.tcps_rcvbyteafterwin += todrop;
		m_adj(m, -todrop);
		ti->ti_len -= todrop;
		tiflags &= ~(TH_PUSH|TH_FIN);
	}

   // check valid timestamp. Replace code above.
   if (ts_present && TSTMP_GEQ(ts_val, tp->ts_recent) &&
         SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) ) {
		tp->ts_recent_age = g_tcp_now;
		tp->ts_recent = ts_val;
   }

	// If the RST bit is set examine the state:
	//    SYN_RECEIVED STATE:
	//	If passive open, return to LISTEN state.
	//	If active open, inform user that connection was refused.
	//    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
	//	Inform user that connection was reset, and close tcb.
	//    CLOSING, LAST_ACK, TIME_WAIT STATES
	//	Close the tcb.
	if (tiflags&TH_RST) switch (tp->t_state) {

	case TCPS_SYN_RECEIVED:
		so->so_error = ECONNREFUSED;
		goto close;

	case TCPS_ESTABLISHED:
	case TCPS_FIN_WAIT_1:
	case TCPS_FIN_WAIT_2:
	case TCPS_CLOSE_WAIT:
		so->so_error = ECONNRESET;
close:
      DEBUG("change tcp state to TCPS_CLOSED, state=%d", tp->t_state);
		tp->t_state = TCPS_CLOSED;
      // tcp event
      usnet_tcpin_ewakeup(so, USN_TCP_IN, USN_TCPST_CLOSED, 0);
		g_tcpstat.tcps_drops++;
		tp = tcp_close(tp);
		goto drop;

	case TCPS_CLOSING:
	case TCPS_LAST_ACK:
	case TCPS_TIME_WAIT:
		tp = tcp_close(tp);
		goto drop;
	}

	// If a SYN is in the window, then this is an
	// error and we send an RST and drop the connection.
	if (tiflags & TH_SYN) {
		tp = tcp_drop(tp, ECONNRESET);
		goto dropwithreset;
	}

	// If the ACK bit is off we drop the segment and return.
	if ((tiflags & TH_ACK) == 0)
		goto drop;

	// Ack processing.
	switch (tp->t_state) {

	// In SYN_RECEIVED state if the ack ACKs our SYN then enter
	// ESTABLISHED state and continue processing, otherwise
	// send an RST.
	case TCPS_SYN_RECEIVED:
		if (SEQ_GT(tp->snd_una, ti->ti_ack) ||
		    SEQ_GT(ti->ti_ack, tp->snd_max))
			goto dropwithreset;
		g_tcpstat.tcps_connects++;

      DEBUG("change tcp state to TCPS_ESTABLISHED, state=%d", tp->t_state);
		tp->t_state = TCPS_ESTABLISHED;
		soisconnected(so);

		// Do window scaling?
		if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
			(TF_RCVD_SCALE|TF_REQ_SCALE)) {
			tp->snd_scale = tp->requested_s_scale;
			tp->rcv_scale = tp->request_r_scale;
		}
		tcp_reass(tp, (struct tcpiphdr *)0, (usn_mbuf_t *)0);
		tp->snd_wl1 = ti->ti_seq - 1;
		// fall into ...

	// In ESTABLISHED state: drop duplicate ACKs; ACK out of range
	// ACKs.  If the ack is in the range
	//	tp->snd_una < ti->ti_ack <= tp->snd_max
	// then advance tp->snd_una to ti->ti_ack and drop
	// data from the retransmission queue.  If this ACK reflects
	// more up to date window information we update our window information.
	case TCPS_ESTABLISHED:
	case TCPS_FIN_WAIT_1:
	case TCPS_FIN_WAIT_2:
	case TCPS_CLOSE_WAIT:
	case TCPS_CLOSING:
	case TCPS_LAST_ACK:
	case TCPS_TIME_WAIT:

		if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
			if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
				g_tcpstat.tcps_rcvdupack++;
				// If we have outstanding data (other than
				// a window probe), this is a completely
				// duplicate ack (ie, window info didn't
				// change), the ack is the biggest we've
				// seen and we've seen exactly our rexmt
				// threshhold of them, assume a packet
				// has been dropped and retransmit it.
				// Kludge snd_nxt & the congestion
				// window so we send only this one
				// packet.
				//
				// We know we're losing at the current
				// window size so do congestion avoidance
				// (set ssthresh to half the current window
				// and pull our congestion window back to
				// the new ssthresh).
				//
				// Dup acks mean that packets have left the
				// network (they're now cached at the receiver) 
				// so bump cwnd by the amount in the receiver
				// to keep a constant cwnd packets in the
				// network.
				if (tp->t_timer[TCPT_REXMT] == 0 ||
				    ti->ti_ack != tp->snd_una)
					tp->t_dupacks = 0;
				else if (++tp->t_dupacks == g_tcprexmtthresh) {
               // congestion avoidance
					tcp_seq onxt = tp->snd_nxt;
					u_int win =
					    min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;

					if (win < 2)
						win = 2;
					tp->snd_ssthresh = win * tp->t_maxseg;
					tp->t_timer[TCPT_REXMT] = 0;
					tp->t_rtt = 0;
					tp->snd_nxt = ti->ti_ack;
					tp->snd_cwnd = tp->t_maxseg;
					tcp_output(tp);
					tp->snd_cwnd = tp->snd_ssthresh +
					       tp->t_maxseg * tp->t_dupacks;
					if (SEQ_GT(onxt, tp->snd_nxt))
						tp->snd_nxt = onxt;
					goto drop;
				} else if (tp->t_dupacks > g_tcprexmtthresh) {
					tp->snd_cwnd += tp->t_maxseg;
					tcp_output(tp);
					goto drop;
				}
			} else
				tp->t_dupacks = 0;
			break;
		}

		// If the congestion window was inflated to account
		// for the other side's cached packets, retract it.
		if (tp->t_dupacks > g_tcprexmtthresh &&
		    tp->snd_cwnd > tp->snd_ssthresh)
			tp->snd_cwnd = tp->snd_ssthresh;
		tp->t_dupacks = 0;
		if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
			g_tcpstat.tcps_rcvacktoomuch++;
			goto dropafterack;
		}
		acked = ti->ti_ack - tp->snd_una;
		g_tcpstat.tcps_rcvackpack++;
		g_tcpstat.tcps_rcvackbyte += acked;

		// If we have a timestamp reply, update smoothed
		// round trip time.  If no timestamp is present but
		// transmit timer is running and timed sequence
		// number was acked, update smoothed round trip time.
		// Since we now have an rtt measurement, cancel the
		// timer backoff (cf., Phil Karn's retransmit alg.).
		// Recompute the initial retransmit timer.
		if (ts_present)
			tcp_xmit_timer(tp, g_tcp_now-ts_ecr+1);
		else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
			tcp_xmit_timer(tp,tp->t_rtt);

		// If all outstanding data is acked, stop retransmit
		// timer and remember to restart (more output or persist).
		// If there is more data to be acked, restart retransmit
		// timer, using current (possibly backed-off) value.
		if (ti->ti_ack == tp->snd_max) {
			tp->t_timer[TCPT_REXMT] = 0;
         DEBUG("change needoutput to 1");
			needoutput = 1;
         tp->t_flags |= TF_NEEDOUTPUT;
		} else if (tp->t_timer[TCPT_PERSIST] == 0)
			tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;

		// When new data is acked, open the congestion window.
		// If the window gives us less than ssthresh packets
		// in flight, open exponentially (maxseg per packet).
		// Otherwise open linearly: maxseg per window
		// (maxseg * (maxseg / cwnd) per packet).
		{
		   u_int cw = tp->snd_cwnd;
	   	u_int incr = tp->t_maxseg;

	   	if (cw > tp->snd_ssthresh)
	   		incr = incr * incr / cw;
   		tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
		}

		if (acked > so->so_snd->sb_cc) {
			tp->snd_wnd -= so->so_snd->sb_cc;
         DEBUG("drop all so_snd buffer, drop_bytes=%d, acked=%d", 
               so->so_snd->sb_cc, acked);
			sbdrop(so->so_snd, (int)so->so_snd->sb_cc);
			ourfinisacked = 1;
		} else {
         DEBUG("drop so_snd buffer, drop_bytes=%d, len=%d", acked, so->so_snd->sb_cc);
			sbdrop(so->so_snd, acked);
			tp->snd_wnd -= acked;
			ourfinisacked = 0;
		}
		//if (so->so_snd->sb_flags & SB_NOTIFY) {
			sowwakeup(so);
         usnet_tcpin_wwakeup(so, USN_TCP_IN, USN_TCPEV_WRITE, 0);
      //}

		tp->snd_una = ti->ti_ack;
		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
			tp->snd_nxt = tp->snd_una;

		switch (tp->t_state) {

		// In FIN_WAIT_1 STATE in addition to the processing
		// for the ESTABLISHED state if our FIN is now acknowledged
		// then enter FIN_WAIT_2.
		case TCPS_FIN_WAIT_1:
			if (ourfinisacked) {
				// If we can't receive any more
				// data, then closing user can proceed.
				// Starting the timer is contrary to the
				// specification, but if we don't get a FIN
				// we'll hang forever.
				if (so->so_state & USN_CANTRCVMORE) {
					soisdisconnected(so);
					tp->t_timer[TCPT_2MSL] = g_tcp_maxidle;
				}
            DEBUG("change tcp state to TCPS_FIN_WAIT_2, state=%d", tp->t_state);
				tp->t_state = TCPS_FIN_WAIT_2;
            usnet_tcpin_ewakeup(so, USN_TCP_IN, USN_TCPST_FIN_WAIT2, 0);
			}
			break;

		// In CLOSING STATE in addition to the processing for
		// the ESTABLISHED state if the ACK acknowledges our FIN
		// then enter the TIME-WAIT state, otherwise ignore
		// the segment.
		case TCPS_CLOSING:
			if (ourfinisacked) {
            DEBUG("change tcp state to TCPS_TIME_WAIT, state=%d", tp->t_state);
				tp->t_state = TCPS_TIME_WAIT;
            usnet_tcpin_ewakeup(so, USN_TCP_IN, USN_TCPST_TIME_WAIT, 0);
				tcp_canceltimers(tp);
				tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
				soisdisconnected(so);
			}
			break;
		
		// In LAST_ACK, we may still be waiting for data to drain
		// and/or to be acked, as well as for the ack of our FIN.
		// If our FIN is now acknowledged, delete the TCB,
		// enter the closed state and return.
		case TCPS_LAST_ACK:
			if (ourfinisacked) {
				tp = tcp_close(tp);
				goto drop;
			}
			break;


		// In TIME_WAIT state the only thing that should arrive
		// is a retransmission of the remote FIN.  Acknowledge
		// it and restart the finack timer.
		case TCPS_TIME_WAIT:
			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
			goto dropafterack;
		}
	}

step6:

	// Update window information.
	// Don't look at window if no ACK: TAC's send garbage on first SYN.
	if ((tiflags & TH_ACK) &&
	    (SEQ_LT(tp->snd_wl1, ti->ti_seq) || 
        (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
	     (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd) ))  )) {
		// keep track of pure window updates
		if (ti->ti_len == 0 &&
		    tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd)
			g_tcpstat.tcps_rcvwinupd++;
		tp->snd_wnd = tiwin;
		tp->snd_wl1 = ti->ti_seq;
		tp->snd_wl2 = ti->ti_ack;
		if (tp->snd_wnd > tp->max_sndwnd)
			tp->max_sndwnd = tp->snd_wnd;
      DEBUG("change needoutput to 1");
      tp->t_flags |= TF_NEEDOUTPUT;
		needoutput = 1;
	}

	
	// Process segments with URG.
	if ((tiflags & TH_URG) && ti->ti_urp &&
	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {

		// This is a kludge, but if we receive and accept
		// random urgent pointers, we'll crash in
		// soreceive.  It's hard to imagine someone
		// actually wanting to send this much urgent data.
		if (ti->ti_urp + so->so_rcv->sb_cc > g_sb_max) {
			ti->ti_urp = 0;			// XXX
			tiflags &= ~TH_URG;		// XXX
			goto dodata;			// XXX
		}

		// If this segment advances the known urgent pointer,
		// then mark the data stream.  This should not happen
		// in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
		// a FIN has been received from the remote side. 
		// In these states we ignore the URG.
		//
		// According to RFC961 (Assigned Protocols),
		// the urgent pointer points to the last octet
		// of urgent data.  We continue, however,
		// to consider it to indicate the first octet
		// of data past the urgent section as the original 
		// spec states (in one of two places).
		if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
			tp->rcv_up = ti->ti_seq + ti->ti_urp;
			so->so_oobmark = so->so_rcv->sb_cc +
			    (tp->rcv_up - tp->rcv_nxt) - 1;
			if (so->so_oobmark == 0)
				so->so_state |= USN_RCVATMARK;
			sohasoutofband(so);
         // send async event to app threads.
         usnet_tcpin_ewakeup(so, USN_TCP_IN, USN_TCPEV_OUTOFBOUND, 0);
			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
		}

		// Remove out of band data so doesn't get presented to user.
		// This can happen independent of advancing the URG pointer,
		// but if two URG's are pending at once, some out-of-band
		// data may creep in... ick.
		if (ti->ti_urp <= ti->ti_len
#ifdef SO_OOBINLINE
		     && (so->so_options & SO_OOBINLINE) == 0
#endif
		     )
			tcp_pulloutofband(so, ti, m);
	} else
		// If no out of band data is expected,
		// pull receive urgent pointer along
		// with the receive window.
		if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
			tp->rcv_up = tp->rcv_nxt;
dodata:							// XXX
#ifdef DUMP_PAYLOAD
   DEBUG("Handle data");
   dump_chain(m,"tcp");
#endif

	// Process the segment text, merging it into the TCP sequencing queue,
	// and arranging for acknowledgment of receipt if necessary.
	// This process logically involves adjusting tp->rcv_wnd as data
	// is presented to the user (this happens in tcp_usrreq.c,
	// case PRU_RCVD).  If a FIN has already been received on this
	// connection then we just ignore the text.
	if ((ti->ti_len || (tiflags&TH_FIN)) &&
	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
		TCP_REASS(tp, ti, m, so, tiflags);
		// Note the amount of data that peer has sent into
		// our window, in order to estimate the sender's
		// buffer size.
		len = so->so_rcv->sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
	} else {
		usn_free_cmbuf(m);
		tiflags &= ~TH_FIN;
	}

	// If FIN is received ACK the FIN and let the user know
	// that the connection is closing.
	if (tiflags & TH_FIN) {
		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
			socantrcvmore(so);
			tp->t_flags |= TF_ACKNOW;
         TRACE("ack FIN now, tp flags=%d", tp->t_flags);
			tp->rcv_nxt++;
		}
		switch (tp->t_state) {

		// In SYN_RECEIVED and ESTABLISHED STATES
		// enter the CLOSE_WAIT state.
		case TCPS_SYN_RECEIVED:
		case TCPS_ESTABLISHED:
         TRACE("change tcp state to TCPS_CLOSE_WAIT, state=%d", tp->t_state);
			tp->t_state = TCPS_CLOSE_WAIT;
         soewakeup(so, 0);
         usnet_tcpin_ewakeup(so, USN_TCP_IN, USN_TCPST_CLOSE_WAIT, 0);
			break;

		// If still in FIN_WAIT_1 STATE FIN has not been acked so
		// enter the CLOSING state.
		case TCPS_FIN_WAIT_1:
         TRACE("change tcp state to TCPS_CLOSING, state=%d", tp->t_state);
			tp->t_state = TCPS_CLOSING;
         usnet_tcpin_ewakeup(so, USN_TCP_IN, USN_TCPST_CLOSING, 0);
			break;

		// In FIN_WAIT_2 state enter the TIME_WAIT state,
		// starting the time-wait timer, turning off the other 
		// standard timers.
		case TCPS_FIN_WAIT_2:
         TRACE("change tcp state to TCPS_TIME_WAIT, state=%d", tp->t_state);
			tp->t_state = TCPS_TIME_WAIT;
			tcp_canceltimers(tp);
			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
			soisdisconnected(so);
         usnet_tcpin_ewakeup(so, USN_TCP_IN, USN_TCPST_TIME_WAIT, 0);
			break;

		// In TIME_WAIT state restart the 2 MSL time_wait timer.
		case TCPS_TIME_WAIT:
			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
			break;
		}
	}
	if (so->so_options & SO_DEBUG) {
      TRACE("tcp trace, so_options=%d", so->so_options);
		tcp_trace(TA_INPUT, ostate, tp, &g_tcp_saveti, 0);
   }

	// Return any desired output.
	//if (needoutput || (tp->t_flags & TF_ACKNOW)){
	if (tp->t_flags & TF_NEEDOUTPUT || (tp->t_flags & TF_ACKNOW)){
      TRACE("ack now or need to ouput, tp->t_flags=%d", tp->t_flags);
		tcp_output(tp);
   }
	return;

dropafterack:
   TRACE("dropafterack");
	// Generate an ACK dropping incoming segment if it occupies
	// sequence space, where the ACK reflects our state.
	if (tiflags & TH_RST)
		goto drop;
	usn_free_cmbuf(m);
	tp->t_flags |= TF_ACKNOW;
   TRACE("ack now, tp flags=%d", tp->t_flags);
	tcp_output(tp);
	return;

dropwithreset:
   TRACE("dropwithreset");
	// Generate a RST, dropping incoming segment.
	// Make ACK acceptable to originator of segment.
	// Don't bother to respond if destination was broadcast/multicast.
#define USN_MULTICAST(i) (((u_int)(i) & 0xf0000000) == 0xe0000000)
	if ((tiflags & TH_RST) || m->flags & (BUF_BCAST|BUF_MCAST) ||
	    USN_MULTICAST(ntohl(ti->ti_dst.s_addr)))
		goto drop;
   
	if (tiflags & TH_ACK)
		tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
	else {
		if (tiflags & TH_SYN)
			ti->ti_len++;
		tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0,
		    TH_RST|TH_ACK);
	}
	// destroy temporarily created socket
	if (dropsocket)
		soabort(so);
	return;

drop:
   TRACE("drop");
	// Drop space held by incoming segment and return.
	if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) {
      TRACE("tcp trace: drop a socket");
		tcp_trace(TA_DROP, ostate, tp, &g_tcp_saveti, 0);
   }
	usn_free_cmbuf(m);
	// destroy temporarily created socket
	if (dropsocket)
		soabort(so);
	return;
}
コード例 #22
0
ファイル: nptcp.c プロジェクト: ECE492W2014G4/G4Capstone
void
so_icmpdu(PACKET p, struct destun * pdp)
{
   ip_addr lhost;    /* IP address of originator (our iface) */
   ip_addr fhost;    /* IP address we sent to */
   unshort  fport;   /* TCP/UDP port we sent to */
   unshort  lport;   /* TCP/UDP port we sent from */
   struct inpcb * inp;
   struct socket *   so;
   struct tcpcb * tp;

   /* extract information about packet which generated DU */
   fhost = htonl(pdp->dip.ip_dest);
   lhost = htonl(pdp->dip.ip_src);
   lport = htons(*(unshort*)(&pdp->ddata[0]));
   fport = htons(*(unshort*)(&pdp->ddata[2]));

#ifndef IP_PMTU
   /* if it's a datagram-too-big message, ignore it -- As the
    * build isn't using PMTU Discovery this packet is most 
    * probably a Denial of Service Attack.
    */
    if(pdp->dcode == DSTFRAG)
    {
       goto done;
    }
#endif   /* IP_PMTU */

   /* if it's a TCP connection, clean it up */
   if (pdp->dip.ip_prot == TCPTP)
   {
      /* find associated data structs and socket */
      inp = in_pcblookup(&tcb, fhost, fport, lhost, lport, INPLOOKUP_WILDCARD);
      if (inp == 0)
         goto done;
      so = inp->inp_socket;
      if (so == 0)
         goto done;
      tp = intotcpcb(inp);
      if (tp)
      {
         if (tp->t_state <= TCPS_LISTEN)
         {
            goto done;
         }

#ifdef ICMP_TCP_DOS
         {
         struct ip * pip;
         struct tcpiphdr * ti;

         pip = ip_head(p);  /* find IP header */
         ti = (struct tcpiphdr *)p->nb_prot;

         if(!((tp->snd_una <=  ti->ti_seq) && (ti->ti_seq <= tp->snd_nxt)))
            goto done;

         /* If we get an ICMP Type 3 (Destination Unreachable) - Code 2
          * (Protocol Unreachable) message and during the life of a TCP
          * connection, then its most probably a Denial of Service Attack.
          * As the only other interpretation would be that the support for
          * the transport protocol has been removed from the host sending
          * the error message during the life of the corresponding 
          * connection. As in common practice this is higly unlikely in most
          * cases, we will treat this message as a DOS attack.
          */
         if(pdp->dcode == DSTPROT)
         {
            if((tp->t_state >= TCPS_ESTABLISHED) && (tp->t_state <= TCPS_TIME_WAIT))
               goto done;
         }

        /* Note some ICMP error messages generated by intermediate routers,
         * include more than the recommended 64 bits of the IP Data. If the
         * TCP ACK number happens to be present then use it in detecting a
         * Denial of Service attack.
         *
         * This way we can ensure that the TCP Acknowledgement number should
         * correspond to data that have already been acknowledged. This way
         * we can further reduce the possiblity of considering a spoofed ICMP
         * packet by a factor of 2.
         */
        if(pip->ip_len >= 32)
        {
            if(!(ti->ti_seq <= tp->rcv_nxt))
               goto done;
        }
   }
#endif

         tcp_close(tp);
      }
      so->so_error = ECONNREFUSED;  /* set error for socket owner */
   }   
#ifdef UDP_SOCKETS   /* this sockets layer supports UDP too */
   else if(pdp->dip.ip_prot == UDP_PROT)
   {
      UDPCONN tmp;
      /* search udp table (which keeps hosts in net endian) */
      for (tmp = firstudp; tmp; tmp = tmp->u_next)
         if ((tmp->u_fport == fport || tmp->u_fport == 0) &&
             (tmp->u_fhost == htonl(fhost)) &&
             (tmp->u_lport == lport))
         {
            break;   /* found our UDP table entry */
         }
      if (!tmp) 
         goto done;
      so = (struct socket *)tmp->u_data;
      /* May be non-socket (lightweight) UDP connection. */
      if (so->so_type != SOCK_DGRAM)
         goto done;
      so->so_error = ECONNREFUSED;  /* set error for socket owner */
      /* do a select() notify on socket here */
      sorwakeup(so);
      sowwakeup(so);
   }
#endif   /* UDP_SOCKETS */
   else
      goto done;

#ifdef IP_PMTU
   /* if this is a datagram-too-big message, update the Path MTU cache */
   if (pdp->dcode == DSTFRAG)
      pmtucache_set(pdp->dip.ip_dest, htons(pdp->dno2));
#endif   /* IP_PMTU */

done:
   LOCK_NET_RESOURCE(FREEQ_RESID);
   pk_free(p); /* done with original packet */
   UNLOCK_NET_RESOURCE(FREEQ_RESID);
   return;
}
コード例 #23
0
ファイル: tcp_usrreq.c プロジェクト: iHaD/DragonFlyBSD
/*
 * Do a send by putting data in output queue and updating urgent
 * marker if URG set.  Possibly send more data.  Unlike the other
 * pru_*() routines, the mbuf chains are our responsibility.  We
 * must either enqueue them or free them.  The other pru_* routines
 * generally are caller-frees.
 */
static void
tcp_usr_send(netmsg_t msg)
{
	struct socket *so = msg->send.base.nm_so;
	int flags = msg->send.nm_flags;
	struct mbuf *m = msg->send.nm_m;
	int error = 0;
	struct inpcb *inp;
	struct tcpcb *tp;
	TCPDEBUG0;

	KKASSERT(msg->send.nm_control == NULL);
	KKASSERT(msg->send.nm_addr == NULL);
	KKASSERT((flags & PRUS_FREEADDR) == 0);

	inp = so->so_pcb;

	if (inp == NULL) {
		/*
		 * OOPS! we lost a race, the TCP session got reset after
		 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
		 * network interrupt in the non-critical section of sosend().
		 */
		m_freem(m);
		error = ECONNRESET;	/* XXX EPIPE? */
		tp = NULL;
		TCPDEBUG1();
		goto out;
	}
	tp = intotcpcb(inp);
	TCPDEBUG1();

#ifdef foo
	/*
	 * This is no longer necessary, since:
	 * - sosendtcp() has already checked it for us
	 * - It does not work with asynchronized send
	 */

	/*
	 * Don't let too much OOB data build up
	 */
	if (flags & PRUS_OOB) {
		if (ssb_space(&so->so_snd) < -512) {
			m_freem(m);
			error = ENOBUFS;
			goto out;
		}
	}
#endif

	/*
	 * Pump the data into the socket.
	 */
	if (m) {
		ssb_appendstream(&so->so_snd, m);
		sowwakeup(so);
	}
	if (flags & PRUS_OOB) {
		/*
		 * According to RFC961 (Assigned Protocols),
		 * the urgent pointer points to the last octet
		 * of urgent data.  We continue, however,
		 * to consider it to indicate the first octet
		 * of data past the urgent section.
		 * Otherwise, snd_up should be one lower.
		 */
		tp->snd_up = tp->snd_una + so->so_snd.ssb_cc;
		tp->t_flags |= TF_FORCE;
		error = tcp_output(tp);
		tp->t_flags &= ~TF_FORCE;
	} else {
		if (flags & PRUS_EOF) {
			/*
			 * Close the send side of the connection after
			 * the data is sent.
			 */
			socantsendmore(so);
			tp = tcp_usrclosed(tp);
		}
		if (tp != NULL && !tcp_output_pending(tp)) {
			if (flags & PRUS_MORETOCOME)
				tp->t_flags |= TF_MORETOCOME;
			error = tcp_output_fair(tp);
			if (flags & PRUS_MORETOCOME)
				tp->t_flags &= ~TF_MORETOCOME;
		}
	}
	COMMON_END1((flags & PRUS_OOB) ? PRU_SENDOOB :
		   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND),
		   (flags & PRUS_NOREPLY));
}