Ejemplo n.º 1
0
/* XXX: handle_ddp_data code duplication */
void
insert_ddp_data(struct toepcb *toep, uint32_t n)
{
    struct inpcb *inp = toep->inp;
    struct tcpcb *tp = intotcpcb(inp);
    struct sockbuf *sb = &inp->inp_socket->so_rcv;
    struct mbuf *m;

    INP_WLOCK_ASSERT(inp);
    SOCKBUF_LOCK_ASSERT(sb);

    m = get_ddp_mbuf(n);
    tp->rcv_nxt += n;
#ifndef USE_DDP_RX_FLOW_CONTROL
    KASSERT(tp->rcv_wnd >= n, ("%s: negative window size", __func__));
    tp->rcv_wnd -= n;
#endif

    KASSERT(toep->sb_cc >= sbused(sb),
            ("%s: sb %p has more data (%d) than last time (%d).",
             __func__, sb, sbused(sb), toep->sb_cc));
    toep->rx_credits += toep->sb_cc - sbused(sb);
#ifdef USE_DDP_RX_FLOW_CONTROL
    toep->rx_credits -= n;	/* adjust for F_RX_FC_DDP */
#endif
    sbappendstream_locked(sb, m, 0);
    toep->sb_cc = sbused(sb);
}
Ejemplo n.º 2
0
void
handle_ddp_close(struct toepcb *toep, struct tcpcb *tp, struct sockbuf *sb,
                 __be32 rcv_nxt)
{
    struct mbuf *m;
    int len;

    SOCKBUF_LOCK_ASSERT(sb);
    INP_WLOCK_ASSERT(toep->inp);
    len = be32toh(rcv_nxt) - tp->rcv_nxt;

    /* Signal handle_ddp() to break out of its sleep loop. */
    toep->ddp_flags &= ~(DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE);
    if (len == 0)
        return;

    tp->rcv_nxt += len;
    KASSERT(toep->sb_cc >= sbused(sb),
            ("%s: sb %p has more data (%d) than last time (%d).",
             __func__, sb, sbused(sb), toep->sb_cc));
    toep->rx_credits += toep->sb_cc - sbused(sb);
#ifdef USE_DDP_RX_FLOW_CONTROL
    toep->rx_credits -= len;	/* adjust for F_RX_FC_DDP */
#endif

    m = get_ddp_mbuf(len);

    sbappendstream_locked(sb, m, 0);
    toep->sb_cc = sbused(sb);
}
Ejemplo n.º 3
0
/*
 * This version of sbappend() should only be used when the caller absolutely
 * knows that there will never be more than one record in the socket buffer,
 * that is, a stream protocol (such as TCP).
 */
void
sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags)
{

	SOCKBUF_LOCK(sb);
	sbappendstream_locked(sb, m, flags);
	SOCKBUF_UNLOCK(sb);
}
Ejemplo n.º 4
0
/*
 * This version of sbappend() should only be used when the caller absolutely
 * knows that there will never be more than one record in the socket buffer,
 * that is, a stream protocol (such as TCP).
 */
void
sbappendstream(struct sockbuf *sb, struct mbuf *m)
{

	SOCKBUF_LOCK(sb);
	sbappendstream_locked(sb, m);
	SOCKBUF_UNLOCK(sb);
}
Ejemplo n.º 5
0
/*
 * Do a send by putting data in output queue and updating urgent
 * marker if URG set.  Possibly send more data.  Unlike the other
 * pru_*() routines, the mbuf chains are our responsibility.  We
 * must either enqueue them or free them.  The other pru_* routines
 * generally are caller-frees.
 */
static int
tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
    struct sockaddr *nam, struct mbuf *control, struct thread *td)
{
	int error = 0;
	struct inpcb *inp;
	struct tcpcb *tp = NULL;
	int headlocked = 0;
#ifdef INET6
	int isipv6;
#endif
	TCPDEBUG0;

	/*
	 * We require the pcbinfo lock in two cases:
	 *
	 * (1) An implied connect is taking place, which can result in
	 *     binding IPs and ports and hence modification of the pcb hash
	 *     chains.
	 *
	 * (2) PRUS_EOF is set, resulting in explicit close on the send.
	 */
	if ((nam != NULL) || (flags & PRUS_EOF)) {
		INP_INFO_WLOCK(&V_tcbinfo);
		headlocked = 1;
	}
	inp = sotoinpcb(so);
	KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
	INP_WLOCK(inp);
	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
		if (control)
			m_freem(control);
		if (m)
			m_freem(m);
		error = ECONNRESET;
		goto out;
	}
#ifdef INET6
	isipv6 = nam && nam->sa_family == AF_INET6;
#endif /* INET6 */
	tp = intotcpcb(inp);
	TCPDEBUG1();
	if (control) {
		/* TCP doesn't do control messages (rights, creds, etc) */
		if (control->m_len) {
			m_freem(control);
			if (m)
				m_freem(m);
			error = EINVAL;
			goto out;
		}
		m_freem(control);	/* empty control, just free it */
	}
	if (!(flags & PRUS_OOB)) {
		sbappendstream(&so->so_snd, m);
		if (nam && tp->t_state < TCPS_SYN_SENT) {
			/*
			 * Do implied connect if not yet connected,
			 * initialize window to default value, and
			 * initialize maxseg/maxopd using peer's cached
			 * MSS.
			 */
			INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
#ifdef INET6
			if (isipv6)
				error = tcp6_connect(tp, nam, td);
			else
#endif /* INET6 */
			error = tcp_connect(tp, nam, td);
			if (error)
				goto out;
			tp->snd_wnd = TTCP_CLIENT_SND_WND;
			tcp_mss(tp, -1);
		}
		if (flags & PRUS_EOF) {
			/*
			 * Close the send side of the connection after
			 * the data is sent.
			 */
			INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
			socantsendmore(so);
			tcp_usrclosed(tp);
		}
		if (headlocked) {
			INP_INFO_WUNLOCK(&V_tcbinfo);
			headlocked = 0;
		}
		if (!(inp->inp_flags & INP_DROPPED)) {
			if (flags & PRUS_MORETOCOME)
				tp->t_flags |= TF_MORETOCOME;
			error = tcp_output_send(tp);
			if (flags & PRUS_MORETOCOME)
				tp->t_flags &= ~TF_MORETOCOME;
		}
	} else {
		/*
		 * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
		 */
		SOCKBUF_LOCK(&so->so_snd);
		if (sbspace(&so->so_snd) < -512) {
			SOCKBUF_UNLOCK(&so->so_snd);
			m_freem(m);
			error = ENOBUFS;
			goto out;
		}
		/*
		 * According to RFC961 (Assigned Protocols),
		 * the urgent pointer points to the last octet
		 * of urgent data.  We continue, however,
		 * to consider it to indicate the first octet
		 * of data past the urgent section.
		 * Otherwise, snd_up should be one lower.
		 */
		sbappendstream_locked(&so->so_snd, m);
		SOCKBUF_UNLOCK(&so->so_snd);
		if (nam && tp->t_state < TCPS_SYN_SENT) {
			/*
			 * Do implied connect if not yet connected,
			 * initialize window to default value, and
			 * initialize maxseg/maxopd using peer's cached
			 * MSS.
			 */
			INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
#ifdef INET6
			if (isipv6)
				error = tcp6_connect(tp, nam, td);
			else
#endif /* INET6 */
			error = tcp_connect(tp, nam, td);
			if (error)
				goto out;
			tp->snd_wnd = TTCP_CLIENT_SND_WND;
			tcp_mss(tp, -1);
			INP_INFO_WUNLOCK(&V_tcbinfo);
			headlocked = 0;
		} else if (nam) {
			INP_INFO_WUNLOCK(&V_tcbinfo);
			headlocked = 0;
		}
		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
		tp->t_flags |= TF_FORCEDATA;
		error = tcp_output_send(tp);
		tp->t_flags &= ~TF_FORCEDATA;
	}
out:
	TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
		  ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
	INP_WUNLOCK(inp);
	if (headlocked)
		INP_INFO_WUNLOCK(&V_tcbinfo);
	return (error);
}
Ejemplo n.º 6
0
static int
handle_ddp_data(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, int len)
{
    uint32_t report = be32toh(ddp_report);
    unsigned int db_flag;
    struct inpcb *inp = toep->inp;
    struct tcpcb *tp;
    struct socket *so;
    struct sockbuf *sb;
    struct mbuf *m;

    db_flag = report & F_DDP_BUF_IDX ? DDP_BUF1_ACTIVE : DDP_BUF0_ACTIVE;

    if (__predict_false(!(report & F_DDP_INV)))
        CXGBE_UNIMPLEMENTED("DDP buffer still valid");

    INP_WLOCK(inp);
    so = inp_inpcbtosocket(inp);
    sb = &so->so_rcv;
    if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) {

        /*
         * XXX: think a bit more.
         * tcpcb probably gone, but socket should still be around
         * because we always wait for DDP completion in soreceive no
         * matter what.  Just wake it up and let it clean up.
         */

        CTR5(KTR_CXGBE, "%s: tid %u, seq 0x%x, len %d, inp_flags 0x%x",
             __func__, toep->tid, be32toh(rcv_nxt), len, inp->inp_flags);
        SOCKBUF_LOCK(sb);
        goto wakeup;
    }

    tp = intotcpcb(inp);
    len += be32toh(rcv_nxt) - tp->rcv_nxt;
    tp->rcv_nxt += len;
    tp->t_rcvtime = ticks;
#ifndef USE_DDP_RX_FLOW_CONTROL
    KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
    tp->rcv_wnd -= len;
#endif
    m = get_ddp_mbuf(len);

    SOCKBUF_LOCK(sb);
    if (report & F_DDP_BUF_COMPLETE)
        toep->ddp_score = DDP_HIGH_SCORE;
    else
        discourage_ddp(toep);

    /* receive buffer autosize */
    if (sb->sb_flags & SB_AUTOSIZE &&
            V_tcp_do_autorcvbuf &&
            sb->sb_hiwat < V_tcp_autorcvbuf_max &&
            len > (sbspace(sb) / 8 * 7)) {
        unsigned int hiwat = sb->sb_hiwat;
        unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
                                   V_tcp_autorcvbuf_max);

        if (!sbreserve_locked(sb, newsize, so, NULL))
            sb->sb_flags &= ~SB_AUTOSIZE;
        else
            toep->rx_credits += newsize - hiwat;
    }

    KASSERT(toep->sb_cc >= sbused(sb),
            ("%s: sb %p has more data (%d) than last time (%d).",
             __func__, sb, sbused(sb), toep->sb_cc));
    toep->rx_credits += toep->sb_cc - sbused(sb);
#ifdef USE_DDP_RX_FLOW_CONTROL
    toep->rx_credits -= len;	/* adjust for F_RX_FC_DDP */
#endif
    sbappendstream_locked(sb, m, 0);
    toep->sb_cc = sbused(sb);
wakeup:
    KASSERT(toep->ddp_flags & db_flag,
            ("%s: DDP buffer not active. toep %p, ddp_flags 0x%x, report 0x%x",
             __func__, toep, toep->ddp_flags, report));
    toep->ddp_flags &= ~db_flag;
    sorwakeup_locked(so);
    SOCKBUF_UNLOCK_ASSERT(sb);

    INP_WUNLOCK(inp);
    return (0);
}
Ejemplo n.º 7
0
int
tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
{
	struct socket *so = tp->t_inpcb->inp_socket;
	struct mbuf *mq, *mp;
	int flags, wakeup;

	INP_WLOCK_ASSERT(tp->t_inpcb);

	/*
	 * XXX: tcp_reass() is rather inefficient with its data structures
	 * and should be rewritten (see NetBSD for optimizations).
	 */

	/*
	 * Call with th==NULL after become established to
	 * force pre-ESTABLISHED data up to user socket.
	 */
	if (th == NULL)
		goto present;

	M_ASSERTPKTHDR(m);
	KASSERT(*tlenp == m->m_pkthdr.len, ("%s: tlenp %u len %u", __func__,
	    *tlenp, m->m_pkthdr.len));

	/*
	 * Limit the number of segments that can be queued to reduce the
	 * potential for mbuf exhaustion. For best performance, we want to be
	 * able to queue a full window's worth of segments. The size of the
	 * socket receive buffer determines our advertised window and grows
	 * automatically when socket buffer autotuning is enabled. Use it as the
	 * basis for our queue limit.
	 * Always let the missing segment through which caused this queue.
	 * NB: Access to the socket buffer is left intentionally unlocked as we
	 * can tolerate stale information here.
	 */
	if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) &&
	    tp->t_segqlen + m->m_pkthdr.len >= sbspace(&so->so_rcv)) {
		char *s;

		TCPSTAT_INC(tcps_rcvreassfull);
		*tlenp = 0;
		if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL,
		    NULL))) {
			log(LOG_DEBUG, "%s; %s: queue limit reached, "
			    "segment dropped\n", s, __func__);
			free(s, M_TCPLOG);
		}
		m_freem(m);
		return (0);
	}

	/*
	 * Find a segment which begins after this one does.
	 */
	mp = NULL;
	for (mq = tp->t_segq; mq != NULL; mq = mq->m_nextpkt) {
		if (SEQ_GT(M_TCPHDR(mq)->th_seq, th->th_seq))
			break;
		mp = mq;
	}

	/*
	 * If there is a preceding segment, it may provide some of
	 * our data already.  If so, drop the data from the incoming
	 * segment.  If it provides all of our data, drop us.
	 */
	if (mp != NULL) {
		int i;

		/* conversion to int (in i) handles seq wraparound */
		i = M_TCPHDR(mp)->th_seq + mp->m_pkthdr.len - th->th_seq;
		if (i > 0) {
			if (i >= *tlenp) {
				TCPSTAT_INC(tcps_rcvduppack);
				TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp);
				m_freem(m);
				/*
				 * Try to present any queued data
				 * at the left window edge to the user.
				 * This is needed after the 3-WHS
				 * completes.
				 */
				goto present;	/* ??? */
			}
			m_adj(m, i);
			*tlenp -= i;
			th->th_seq += i;
		}
	}
	tp->t_rcvoopack++;
	TCPSTAT_INC(tcps_rcvoopack);
	TCPSTAT_ADD(tcps_rcvoobyte, *tlenp);

	/*
	 * While we overlap succeeding segments trim them or,
	 * if they are completely covered, dequeue them.
	 */
	while (mq) {
		struct mbuf *nq;
		int i;

		i = (th->th_seq + *tlenp) - M_TCPHDR(mq)->th_seq;
		if (i <= 0)
			break;
		if (i < mq->m_pkthdr.len) {
			M_TCPHDR(mq)->th_seq += i;
			m_adj(mq, i);
			tp->t_segqlen -= i;
			break;
		}

		nq = mq->m_nextpkt;
		tp->t_segqlen -= mq->m_pkthdr.len;
		m_freem(mq);
		if (mp)
			mp->m_nextpkt = nq;
		else
			tp->t_segq = nq;
		mq = nq;
	}

	/*
	 * Insert the new segment queue entry into place.  Try to collapse
	 * mbuf chains if segments are adjacent.
	 */
	if (mp) {
		if (M_TCPHDR(mp)->th_seq + mp->m_pkthdr.len == th->th_seq)
			m_catpkt(mp, m);
		else {
			m->m_nextpkt = mp->m_nextpkt;
			mp->m_nextpkt = m;
			m->m_pkthdr.pkt_tcphdr = th;
		}
	} else {
		mq = tp->t_segq;
		tp->t_segq = m;
		if (mq && th->th_seq + *tlenp == M_TCPHDR(mq)->th_seq) {
			m->m_nextpkt = mq->m_nextpkt;
			mq->m_nextpkt = NULL;
			m_catpkt(m, mq);
		} else
			m->m_nextpkt = mq;
		m->m_pkthdr.pkt_tcphdr = th;
	}
	tp->t_segqlen += *tlenp;

present:
	/*
	 * Present data to user, advancing rcv_nxt through
	 * completed sequence space.
	 */
	if (!TCPS_HAVEESTABLISHED(tp->t_state))
		return (0);

	flags = 0;
	wakeup = 0;
	SOCKBUF_LOCK(&so->so_rcv);
	while ((mq = tp->t_segq) != NULL &&
	    M_TCPHDR(mq)->th_seq == tp->rcv_nxt) {
		tp->t_segq = mq->m_nextpkt;

		tp->rcv_nxt += mq->m_pkthdr.len;
		tp->t_segqlen -= mq->m_pkthdr.len;
		flags = M_TCPHDR(mq)->th_flags & TH_FIN;

		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
			m_freem(mq);
		else {
			mq->m_nextpkt = NULL;
			sbappendstream_locked(&so->so_rcv, mq, 0);
			wakeup = 1;
		}
	}
	ND6_HINT(tp);
	if (wakeup)
		sorwakeup_locked(so);
	else
		SOCKBUF_UNLOCK(&so->so_rcv);
	return (flags);
}
Ejemplo n.º 8
0
/*
 * Peer has sent us a FIN.
 */
static int
do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
	struct adapter *sc = iq->adapter;
	const struct cpl_peer_close *cpl = (const void *)(rss + 1);
	unsigned int tid = GET_TID(cpl);
	struct toepcb *toep = lookup_tid(sc, tid);
	struct inpcb *inp = toep->inp;
	struct tcpcb *tp = NULL;
	struct socket *so;
	struct sockbuf *sb;
#ifdef INVARIANTS
	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
#endif

	KASSERT(opcode == CPL_PEER_CLOSE,
	    ("%s: unexpected opcode 0x%x", __func__, opcode));
	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));

	if (__predict_false(toep->flags & TPF_SYNQE)) {
#ifdef INVARIANTS
		struct synq_entry *synqe = (void *)toep;

		INP_WLOCK(synqe->lctx->inp);
		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
			    ("%s: listen socket closed but tid %u not aborted.",
			    __func__, tid));
		} else {
			/*
			 * do_pass_accept_req is still running and will
			 * eventually take care of this tid.
			 */
		}
		INP_WUNLOCK(synqe->lctx->inp);
#endif
		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
		    toep, toep->flags);
		return (0);
	}

	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));

	INP_INFO_WLOCK(&V_tcbinfo);
	INP_WLOCK(inp);
	tp = intotcpcb(inp);

	CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__,
	    tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp);

	if (toep->flags & TPF_ABORT_SHUTDOWN)
		goto done;

	tp->rcv_nxt++;	/* FIN */

	so = inp->inp_socket;
	sb = &so->so_rcv;
	SOCKBUF_LOCK(sb);
	if (__predict_false(toep->ddp_flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) {
		m = m_get(M_NOWAIT, MT_DATA);
		if (m == NULL)
			CXGBE_UNIMPLEMENTED("mbuf alloc failure");

		m->m_len = be32toh(cpl->rcv_nxt) - tp->rcv_nxt;
		m->m_flags |= M_DDP;	/* Data is already where it should be */
		m->m_data = "nothing to see here";
		tp->rcv_nxt = be32toh(cpl->rcv_nxt);

		toep->ddp_flags &= ~(DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE);

		KASSERT(toep->sb_cc >= sb->sb_cc,
		    ("%s: sb %p has more data (%d) than last time (%d).",
		    __func__, sb, sb->sb_cc, toep->sb_cc));
		toep->rx_credits += toep->sb_cc - sb->sb_cc;
#ifdef USE_DDP_RX_FLOW_CONTROL
		toep->rx_credits -= m->m_len;	/* adjust for F_RX_FC_DDP */
#endif
		sbappendstream_locked(sb, m);
		toep->sb_cc = sb->sb_cc;
	}
	socantrcvmore_locked(so);	/* unlocks the sockbuf */

	KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt),
	    ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt,
	    be32toh(cpl->rcv_nxt)));

	switch (tp->t_state) {
	case TCPS_SYN_RECEIVED:
		tp->t_starttime = ticks;
		/* FALLTHROUGH */ 

	case TCPS_ESTABLISHED:
		tp->t_state = TCPS_CLOSE_WAIT;
		break;

	case TCPS_FIN_WAIT_1:
		tp->t_state = TCPS_CLOSING;
		break;

	case TCPS_FIN_WAIT_2:
		tcp_twstart(tp);
		INP_UNLOCK_ASSERT(inp);	 /* safe, we have a ref on the inp */
		INP_INFO_WUNLOCK(&V_tcbinfo);

		INP_WLOCK(inp);
		final_cpl_received(toep);
		return (0);

	default:
		log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n",
		    __func__, tid, tp->t_state);
	}
done:
	INP_WUNLOCK(inp);
	INP_INFO_WUNLOCK(&V_tcbinfo);
	return (0);
}
Ejemplo n.º 9
0
static int
do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
	struct adapter *sc = iq->adapter;
	const struct cpl_rx_data *cpl = mtod(m, const void *);
	unsigned int tid = GET_TID(cpl);
	struct toepcb *toep = lookup_tid(sc, tid);
	struct inpcb *inp = toep->inp;
	struct tcpcb *tp;
	struct socket *so;
	struct sockbuf *sb;
	int len;
	uint32_t ddp_placed = 0;

	if (__predict_false(toep->flags & TPF_SYNQE)) {
#ifdef INVARIANTS
		struct synq_entry *synqe = (void *)toep;

		INP_WLOCK(synqe->lctx->inp);
		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
			    ("%s: listen socket closed but tid %u not aborted.",
			    __func__, tid));
		} else {
			/*
			 * do_pass_accept_req is still running and will
			 * eventually take care of this tid.
			 */
		}
		INP_WUNLOCK(synqe->lctx->inp);
#endif
		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
		    toep, toep->flags);
		m_freem(m);
		return (0);
	}

	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));

	/* strip off CPL header */
	m_adj(m, sizeof(*cpl));
	len = m->m_pkthdr.len;

	INP_WLOCK(inp);
	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
		    __func__, tid, len, inp->inp_flags);
		INP_WUNLOCK(inp);
		m_freem(m);
		return (0);
	}

	tp = intotcpcb(inp);

	if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq)))
		ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt;

	tp->rcv_nxt += len;
	KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
	tp->rcv_wnd -= len;
	tp->t_rcvtime = ticks;

	so = inp_inpcbtosocket(inp);
	sb = &so->so_rcv;
	SOCKBUF_LOCK(sb);

	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
		CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
		    __func__, tid, len);
		m_freem(m);
		SOCKBUF_UNLOCK(sb);
		INP_WUNLOCK(inp);

		INP_INFO_WLOCK(&V_tcbinfo);
		INP_WLOCK(inp);
		tp = tcp_drop(tp, ECONNRESET);
		if (tp)
			INP_WUNLOCK(inp);
		INP_INFO_WUNLOCK(&V_tcbinfo);

		return (0);
	}

	/* receive buffer autosize */
	if (sb->sb_flags & SB_AUTOSIZE &&
	    V_tcp_do_autorcvbuf &&
	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
	    len > (sbspace(sb) / 8 * 7)) {
		unsigned int hiwat = sb->sb_hiwat;
		unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
		    V_tcp_autorcvbuf_max);

		if (!sbreserve_locked(sb, newsize, so, NULL))
			sb->sb_flags &= ~SB_AUTOSIZE;
		else
			toep->rx_credits += newsize - hiwat;
	}

	if (toep->ulp_mode == ULP_MODE_TCPDDP) {
		int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off;

		if (changed) {
			if (toep->ddp_flags & DDP_SC_REQ)
				toep->ddp_flags ^= DDP_ON | DDP_SC_REQ;
			else {
				KASSERT(cpl->ddp_off == 1,
				    ("%s: DDP switched on by itself.",
				    __func__));

				/* Fell out of DDP mode */
				toep->ddp_flags &= ~(DDP_ON | DDP_BUF0_ACTIVE |
				    DDP_BUF1_ACTIVE);

				if (ddp_placed)
					insert_ddp_data(toep, ddp_placed);
			}
		}

		if ((toep->ddp_flags & DDP_OK) == 0 &&
		    time_uptime >= toep->ddp_disabled + DDP_RETRY_WAIT) {
			toep->ddp_score = DDP_LOW_SCORE;
			toep->ddp_flags |= DDP_OK;
			CTR3(KTR_CXGBE, "%s: tid %u DDP_OK @ %u",
			    __func__, tid, time_uptime);
		}

		if (toep->ddp_flags & DDP_ON) {

			/*
			 * CPL_RX_DATA with DDP on can only be an indicate.  Ask
			 * soreceive to post a buffer or disable DDP.  The
			 * payload that arrived in this indicate is appended to
			 * the socket buffer as usual.
			 */

#if 0
			CTR5(KTR_CXGBE,
			    "%s: tid %u (0x%x) DDP indicate (seq 0x%x, len %d)",
			    __func__, tid, toep->flags, be32toh(cpl->seq), len);
#endif
			sb->sb_flags |= SB_DDP_INDICATE;
		} else if ((toep->ddp_flags & (DDP_OK|DDP_SC_REQ)) == DDP_OK &&
		    tp->rcv_wnd > DDP_RSVD_WIN && len >= sc->tt.ddp_thres) {

			/*
			 * DDP allowed but isn't on (and a request to switch it
			 * on isn't pending either), and conditions are ripe for
			 * it to work.  Switch it on.
			 */

			enable_ddp(sc, toep);
		}
	}

	KASSERT(toep->sb_cc >= sb->sb_cc,
	    ("%s: sb %p has more data (%d) than last time (%d).",
	    __func__, sb, sb->sb_cc, toep->sb_cc));
	toep->rx_credits += toep->sb_cc - sb->sb_cc;
	sbappendstream_locked(sb, m);
	toep->sb_cc = sb->sb_cc;
	sorwakeup_locked(so);
	SOCKBUF_UNLOCK_ASSERT(sb);

	INP_WUNLOCK(inp);
	return (0);
}
Ejemplo n.º 10
0
static int
do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
	struct adapter *sc = iq->adapter;
	const struct cpl_rx_data *cpl = mtod(m, const void *);
	unsigned int tid = GET_TID(cpl);
	struct toepcb *toep = lookup_tid(sc, tid);
	struct inpcb *inp = toep->inp;
	struct tcpcb *tp;
	struct socket *so;
	struct sockbuf *sb;
	int len;
	uint32_t ddp_placed = 0;

	if (__predict_false(toep->flags & TPF_SYNQE)) {
#ifdef INVARIANTS
		struct synq_entry *synqe = (void *)toep;

		INP_WLOCK(synqe->lctx->inp);
		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
			    ("%s: listen socket closed but tid %u not aborted.",
			    __func__, tid));
		} else {
			/*
			 * do_pass_accept_req is still running and will
			 * eventually take care of this tid.
			 */
		}
		INP_WUNLOCK(synqe->lctx->inp);
#endif
		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
		    toep, toep->flags);
		m_freem(m);
		return (0);
	}

	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));

	/* strip off CPL header */
	m_adj(m, sizeof(*cpl));
	len = m->m_pkthdr.len;

	INP_WLOCK(inp);
	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
		    __func__, tid, len, inp->inp_flags);
		INP_WUNLOCK(inp);
		m_freem(m);
		return (0);
	}

	tp = intotcpcb(inp);

	if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq)))
		ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt;

	tp->rcv_nxt += len;
	if (tp->rcv_wnd < len) {
		KASSERT(toep->ulp_mode == ULP_MODE_RDMA,
				("%s: negative window size", __func__));
	}

	tp->rcv_wnd -= len;
	tp->t_rcvtime = ticks;

	if (toep->ulp_mode == ULP_MODE_TCPDDP)
		DDP_LOCK(toep);
	so = inp_inpcbtosocket(inp);
	sb = &so->so_rcv;
	SOCKBUF_LOCK(sb);

	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
		CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
		    __func__, tid, len);
		m_freem(m);
		SOCKBUF_UNLOCK(sb);
		if (toep->ulp_mode == ULP_MODE_TCPDDP)
			DDP_UNLOCK(toep);
		INP_WUNLOCK(inp);

		INP_INFO_RLOCK(&V_tcbinfo);
		INP_WLOCK(inp);
		tp = tcp_drop(tp, ECONNRESET);
		if (tp)
			INP_WUNLOCK(inp);
		INP_INFO_RUNLOCK(&V_tcbinfo);

		return (0);
	}

	/* receive buffer autosize */
	CURVNET_SET(so->so_vnet);
	if (sb->sb_flags & SB_AUTOSIZE &&
	    V_tcp_do_autorcvbuf &&
	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
	    len > (sbspace(sb) / 8 * 7)) {
		unsigned int hiwat = sb->sb_hiwat;
		unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
		    V_tcp_autorcvbuf_max);

		if (!sbreserve_locked(sb, newsize, so, NULL))
			sb->sb_flags &= ~SB_AUTOSIZE;
		else
			toep->rx_credits += newsize - hiwat;
	}

	if (toep->ddp_waiting_count != 0 || toep->ddp_active_count != 0)
		CTR3(KTR_CXGBE, "%s: tid %u, non-ddp rx (%d bytes)", __func__,
		    tid, len);

	if (toep->ulp_mode == ULP_MODE_TCPDDP) {
		int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off;

		if (changed) {
			if (toep->ddp_flags & DDP_SC_REQ)
				toep->ddp_flags ^= DDP_ON | DDP_SC_REQ;
			else {
				KASSERT(cpl->ddp_off == 1,
				    ("%s: DDP switched on by itself.",
				    __func__));

				/* Fell out of DDP mode */
				toep->ddp_flags &= ~DDP_ON;
				CTR1(KTR_CXGBE, "%s: fell out of DDP mode",
				    __func__);

				insert_ddp_data(toep, ddp_placed);
			}
		}

		if (toep->ddp_flags & DDP_ON) {
			/*
			 * CPL_RX_DATA with DDP on can only be an indicate.
			 * Start posting queued AIO requests via DDP.  The
			 * payload that arrived in this indicate is appended
			 * to the socket buffer as usual.
			 */
			handle_ddp_indicate(toep);
		}
	}

	KASSERT(toep->sb_cc >= sbused(sb),
	    ("%s: sb %p has more data (%d) than last time (%d).",
	    __func__, sb, sbused(sb), toep->sb_cc));
	toep->rx_credits += toep->sb_cc - sbused(sb);
	sbappendstream_locked(sb, m, 0);
	toep->sb_cc = sbused(sb);
	if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) {
		int credits;

		credits = send_rx_credits(sc, toep, toep->rx_credits);
		toep->rx_credits -= credits;
		tp->rcv_wnd += credits;
		tp->rcv_adv += credits;
	}

	if (toep->ddp_waiting_count > 0 && sbavail(sb) != 0) {
		CTR2(KTR_CXGBE, "%s: tid %u queueing AIO task", __func__,
		    tid);
		ddp_queue_toep(toep);
	}
	sorwakeup_locked(so);
	SOCKBUF_UNLOCK_ASSERT(sb);
	if (toep->ulp_mode == ULP_MODE_TCPDDP)
		DDP_UNLOCK(toep);

	INP_WUNLOCK(inp);
	CURVNET_RESTORE();
	return (0);
}
Ejemplo n.º 11
0
static int
userfw_sosend(struct socket *so,
		int flags,
		struct mbuf *m,
		struct sockaddr *addr_,
		struct mbuf *control,
		struct thread *td)
{
	int err = 0;
	struct userfwpcb *pcb = sotopcb(so);
	struct userfw_io_header msg;
	int cmd_ready = 0;
	unsigned char *data = NULL;
	struct mdchain chain;

	if (pcb == NULL)
		err = ENOTCONN;

	if (control != NULL)
		err = EINVAL;

	SOCKBUF_LOCK(&(so->so_snd));

	if (err == 0)
	{
		sbappendstream_locked(&(so->so_snd), m);
		m = NULL;

		md_initm(&chain, so->so_snd.sb_mb);
		if (SOCKBUF_LEN(so->so_snd) >= sizeof(msg))
		{
			md_get_mem(&chain, (caddr_t)(&msg), sizeof(msg), MB_MSYSTEM);
			if (SOCKBUF_LEN(so->so_snd) >= msg.length)
				cmd_ready = 1;
		}
	}

	if (err == 0 && cmd_ready)
	{
		if (msg.type != T_CONTAINER || (msg.subtype != ST_MESSAGE && msg.subtype != ST_CMDCALL))
		{
			cmd_ready = 0;
			sbdrop_locked(&(so->so_snd), msg.length);
		}
	}

	if (err == 0 && cmd_ready)
	{
		data = malloc(msg.length, M_USERFW, M_WAITOK);
		md_initm(&chain, so->so_snd.sb_mb);
		md_get_mem(&chain, data, msg.length, MB_MSYSTEM);

		err = userfw_cmd_dispatch(data, so, td);
		sbdrop_locked(&(so->so_snd), msg.length);
		free(data, M_USERFW);
	}

	SOCKBUF_UNLOCK(&(so->so_snd));

	if (control != NULL)
		m_freem(control);
	if (m != NULL)
		m_freem(m);

	return err;
}