Example #1
0
static int
do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
	struct adapter *sc = iq->adapter;
	const struct cpl_rx_data *cpl = mtod(m, const void *);
	unsigned int tid = GET_TID(cpl);
	struct toepcb *toep = lookup_tid(sc, tid);
	struct inpcb *inp = toep->inp;
	struct tcpcb *tp;
	struct socket *so;
	struct sockbuf *sb;
	int len;
	uint32_t ddp_placed = 0;

	if (__predict_false(toep->flags & TPF_SYNQE)) {
#ifdef INVARIANTS
		struct synq_entry *synqe = (void *)toep;

		INP_WLOCK(synqe->lctx->inp);
		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
			    ("%s: listen socket closed but tid %u not aborted.",
			    __func__, tid));
		} else {
			/*
			 * do_pass_accept_req is still running and will
			 * eventually take care of this tid.
			 */
		}
		INP_WUNLOCK(synqe->lctx->inp);
#endif
		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
		    toep, toep->flags);
		m_freem(m);
		return (0);
	}

	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));

	/* strip off CPL header */
	m_adj(m, sizeof(*cpl));
	len = m->m_pkthdr.len;

	INP_WLOCK(inp);
	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
		    __func__, tid, len, inp->inp_flags);
		INP_WUNLOCK(inp);
		m_freem(m);
		return (0);
	}

	tp = intotcpcb(inp);

	if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq)))
		ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt;

	tp->rcv_nxt += len;
	KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
	tp->rcv_wnd -= len;
	tp->t_rcvtime = ticks;

	so = inp_inpcbtosocket(inp);
	sb = &so->so_rcv;
	SOCKBUF_LOCK(sb);

	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
		CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
		    __func__, tid, len);
		m_freem(m);
		SOCKBUF_UNLOCK(sb);
		INP_WUNLOCK(inp);

		INP_INFO_WLOCK(&V_tcbinfo);
		INP_WLOCK(inp);
		tp = tcp_drop(tp, ECONNRESET);
		if (tp)
			INP_WUNLOCK(inp);
		INP_INFO_WUNLOCK(&V_tcbinfo);

		return (0);
	}

	/* receive buffer autosize */
	if (sb->sb_flags & SB_AUTOSIZE &&
	    V_tcp_do_autorcvbuf &&
	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
	    len > (sbspace(sb) / 8 * 7)) {
		unsigned int hiwat = sb->sb_hiwat;
		unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
		    V_tcp_autorcvbuf_max);

		if (!sbreserve_locked(sb, newsize, so, NULL))
			sb->sb_flags &= ~SB_AUTOSIZE;
		else
			toep->rx_credits += newsize - hiwat;
	}

	if (toep->ulp_mode == ULP_MODE_TCPDDP) {
		int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off;

		if (changed) {
			if (toep->ddp_flags & DDP_SC_REQ)
				toep->ddp_flags ^= DDP_ON | DDP_SC_REQ;
			else {
				KASSERT(cpl->ddp_off == 1,
				    ("%s: DDP switched on by itself.",
				    __func__));

				/* Fell out of DDP mode */
				toep->ddp_flags &= ~(DDP_ON | DDP_BUF0_ACTIVE |
				    DDP_BUF1_ACTIVE);

				if (ddp_placed)
					insert_ddp_data(toep, ddp_placed);
			}
		}

		if ((toep->ddp_flags & DDP_OK) == 0 &&
		    time_uptime >= toep->ddp_disabled + DDP_RETRY_WAIT) {
			toep->ddp_score = DDP_LOW_SCORE;
			toep->ddp_flags |= DDP_OK;
			CTR3(KTR_CXGBE, "%s: tid %u DDP_OK @ %u",
			    __func__, tid, time_uptime);
		}

		if (toep->ddp_flags & DDP_ON) {

			/*
			 * CPL_RX_DATA with DDP on can only be an indicate.  Ask
			 * soreceive to post a buffer or disable DDP.  The
			 * payload that arrived in this indicate is appended to
			 * the socket buffer as usual.
			 */

#if 0
			CTR5(KTR_CXGBE,
			    "%s: tid %u (0x%x) DDP indicate (seq 0x%x, len %d)",
			    __func__, tid, toep->flags, be32toh(cpl->seq), len);
#endif
			sb->sb_flags |= SB_DDP_INDICATE;
		} else if ((toep->ddp_flags & (DDP_OK|DDP_SC_REQ)) == DDP_OK &&
		    tp->rcv_wnd > DDP_RSVD_WIN && len >= sc->tt.ddp_thres) {

			/*
			 * DDP allowed but isn't on (and a request to switch it
			 * on isn't pending either), and conditions are ripe for
			 * it to work.  Switch it on.
			 */

			enable_ddp(sc, toep);
		}
	}

	KASSERT(toep->sb_cc >= sb->sb_cc,
	    ("%s: sb %p has more data (%d) than last time (%d).",
	    __func__, sb, sb->sb_cc, toep->sb_cc));
	toep->rx_credits += toep->sb_cc - sb->sb_cc;
	sbappendstream_locked(sb, m);
	toep->sb_cc = sb->sb_cc;
	sorwakeup_locked(so);
	SOCKBUF_UNLOCK_ASSERT(sb);

	INP_WUNLOCK(inp);
	return (0);
}
Example #2
0
static int
handle_ddp_data(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, int len)
{
    uint32_t report = be32toh(ddp_report);
    unsigned int db_flag;
    struct inpcb *inp = toep->inp;
    struct tcpcb *tp;
    struct socket *so;
    struct sockbuf *sb;
    struct mbuf *m;

    db_flag = report & F_DDP_BUF_IDX ? DDP_BUF1_ACTIVE : DDP_BUF0_ACTIVE;

    if (__predict_false(!(report & F_DDP_INV)))
        CXGBE_UNIMPLEMENTED("DDP buffer still valid");

    INP_WLOCK(inp);
    so = inp_inpcbtosocket(inp);
    sb = &so->so_rcv;
    if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) {

        /*
         * XXX: think a bit more.
         * tcpcb probably gone, but socket should still be around
         * because we always wait for DDP completion in soreceive no
         * matter what.  Just wake it up and let it clean up.
         */

        CTR5(KTR_CXGBE, "%s: tid %u, seq 0x%x, len %d, inp_flags 0x%x",
             __func__, toep->tid, be32toh(rcv_nxt), len, inp->inp_flags);
        SOCKBUF_LOCK(sb);
        goto wakeup;
    }

    tp = intotcpcb(inp);
    len += be32toh(rcv_nxt) - tp->rcv_nxt;
    tp->rcv_nxt += len;
    tp->t_rcvtime = ticks;
#ifndef USE_DDP_RX_FLOW_CONTROL
    KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
    tp->rcv_wnd -= len;
#endif
    m = get_ddp_mbuf(len);

    SOCKBUF_LOCK(sb);
    if (report & F_DDP_BUF_COMPLETE)
        toep->ddp_score = DDP_HIGH_SCORE;
    else
        discourage_ddp(toep);

    /* receive buffer autosize */
    if (sb->sb_flags & SB_AUTOSIZE &&
            V_tcp_do_autorcvbuf &&
            sb->sb_hiwat < V_tcp_autorcvbuf_max &&
            len > (sbspace(sb) / 8 * 7)) {
        unsigned int hiwat = sb->sb_hiwat;
        unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
                                   V_tcp_autorcvbuf_max);

        if (!sbreserve_locked(sb, newsize, so, NULL))
            sb->sb_flags &= ~SB_AUTOSIZE;
        else
            toep->rx_credits += newsize - hiwat;
    }

    KASSERT(toep->sb_cc >= sbused(sb),
            ("%s: sb %p has more data (%d) than last time (%d).",
             __func__, sb, sbused(sb), toep->sb_cc));
    toep->rx_credits += toep->sb_cc - sbused(sb);
#ifdef USE_DDP_RX_FLOW_CONTROL
    toep->rx_credits -= len;	/* adjust for F_RX_FC_DDP */
#endif
    sbappendstream_locked(sb, m, 0);
    toep->sb_cc = sbused(sb);
wakeup:
    KASSERT(toep->ddp_flags & db_flag,
            ("%s: DDP buffer not active. toep %p, ddp_flags 0x%x, report 0x%x",
             __func__, toep, toep->ddp_flags, report));
    toep->ddp_flags &= ~db_flag;
    sorwakeup_locked(so);
    SOCKBUF_UNLOCK_ASSERT(sb);

    INP_WUNLOCK(inp);
    return (0);
}
Example #3
0
static int
do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
	struct adapter *sc = iq->adapter;
	const struct cpl_rx_data *cpl = mtod(m, const void *);
	unsigned int tid = GET_TID(cpl);
	struct toepcb *toep = lookup_tid(sc, tid);
	struct inpcb *inp = toep->inp;
	struct tcpcb *tp;
	struct socket *so;
	struct sockbuf *sb;
	int len;
	uint32_t ddp_placed = 0;

	if (__predict_false(toep->flags & TPF_SYNQE)) {
#ifdef INVARIANTS
		struct synq_entry *synqe = (void *)toep;

		INP_WLOCK(synqe->lctx->inp);
		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
			    ("%s: listen socket closed but tid %u not aborted.",
			    __func__, tid));
		} else {
			/*
			 * do_pass_accept_req is still running and will
			 * eventually take care of this tid.
			 */
		}
		INP_WUNLOCK(synqe->lctx->inp);
#endif
		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
		    toep, toep->flags);
		m_freem(m);
		return (0);
	}

	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));

	/* strip off CPL header */
	m_adj(m, sizeof(*cpl));
	len = m->m_pkthdr.len;

	INP_WLOCK(inp);
	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
		    __func__, tid, len, inp->inp_flags);
		INP_WUNLOCK(inp);
		m_freem(m);
		return (0);
	}

	tp = intotcpcb(inp);

	if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq)))
		ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt;

	tp->rcv_nxt += len;
	if (tp->rcv_wnd < len) {
		KASSERT(toep->ulp_mode == ULP_MODE_RDMA,
				("%s: negative window size", __func__));
	}

	tp->rcv_wnd -= len;
	tp->t_rcvtime = ticks;

	if (toep->ulp_mode == ULP_MODE_TCPDDP)
		DDP_LOCK(toep);
	so = inp_inpcbtosocket(inp);
	sb = &so->so_rcv;
	SOCKBUF_LOCK(sb);

	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
		CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
		    __func__, tid, len);
		m_freem(m);
		SOCKBUF_UNLOCK(sb);
		if (toep->ulp_mode == ULP_MODE_TCPDDP)
			DDP_UNLOCK(toep);
		INP_WUNLOCK(inp);

		INP_INFO_RLOCK(&V_tcbinfo);
		INP_WLOCK(inp);
		tp = tcp_drop(tp, ECONNRESET);
		if (tp)
			INP_WUNLOCK(inp);
		INP_INFO_RUNLOCK(&V_tcbinfo);

		return (0);
	}

	/* receive buffer autosize */
	CURVNET_SET(so->so_vnet);
	if (sb->sb_flags & SB_AUTOSIZE &&
	    V_tcp_do_autorcvbuf &&
	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
	    len > (sbspace(sb) / 8 * 7)) {
		unsigned int hiwat = sb->sb_hiwat;
		unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
		    V_tcp_autorcvbuf_max);

		if (!sbreserve_locked(sb, newsize, so, NULL))
			sb->sb_flags &= ~SB_AUTOSIZE;
		else
			toep->rx_credits += newsize - hiwat;
	}

	if (toep->ddp_waiting_count != 0 || toep->ddp_active_count != 0)
		CTR3(KTR_CXGBE, "%s: tid %u, non-ddp rx (%d bytes)", __func__,
		    tid, len);

	if (toep->ulp_mode == ULP_MODE_TCPDDP) {
		int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off;

		if (changed) {
			if (toep->ddp_flags & DDP_SC_REQ)
				toep->ddp_flags ^= DDP_ON | DDP_SC_REQ;
			else {
				KASSERT(cpl->ddp_off == 1,
				    ("%s: DDP switched on by itself.",
				    __func__));

				/* Fell out of DDP mode */
				toep->ddp_flags &= ~DDP_ON;
				CTR1(KTR_CXGBE, "%s: fell out of DDP mode",
				    __func__);

				insert_ddp_data(toep, ddp_placed);
			}
		}

		if (toep->ddp_flags & DDP_ON) {
			/*
			 * CPL_RX_DATA with DDP on can only be an indicate.
			 * Start posting queued AIO requests via DDP.  The
			 * payload that arrived in this indicate is appended
			 * to the socket buffer as usual.
			 */
			handle_ddp_indicate(toep);
		}
	}

	KASSERT(toep->sb_cc >= sbused(sb),
	    ("%s: sb %p has more data (%d) than last time (%d).",
	    __func__, sb, sbused(sb), toep->sb_cc));
	toep->rx_credits += toep->sb_cc - sbused(sb);
	sbappendstream_locked(sb, m, 0);
	toep->sb_cc = sbused(sb);
	if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) {
		int credits;

		credits = send_rx_credits(sc, toep, toep->rx_credits);
		toep->rx_credits -= credits;
		tp->rcv_wnd += credits;
		tp->rcv_adv += credits;
	}

	if (toep->ddp_waiting_count > 0 && sbavail(sb) != 0) {
		CTR2(KTR_CXGBE, "%s: tid %u queueing AIO task", __func__,
		    tid);
		ddp_queue_toep(toep);
	}
	sorwakeup_locked(so);
	SOCKBUF_UNLOCK_ASSERT(sb);
	if (toep->ulp_mode == ULP_MODE_TCPDDP)
		DDP_UNLOCK(toep);

	INP_WUNLOCK(inp);
	CURVNET_RESTORE();
	return (0);
}