Exemplo n.º 1
0
void
ofp_socantrcvmore_locked(struct socket *so)
{
	SOCKBUF_LOCK_ASSERT(&so->so_rcv);

	so->so_rcv.sb_state |= SBS_CANTRCVMORE;
	sorwakeup_locked(so);
}
Exemplo n.º 2
0
void
socantrcvmore_locked(struct socket *so)
{

	SOCKBUF_LOCK_ASSERT(&so->so_rcv);

	so->so_rcv.sb_state |= SBS_CANTRCVMORE;
	sorwakeup_locked(so);
	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
}
Exemplo n.º 3
0
static int
udp6_append(struct inpcb *inp, struct mbuf *n, int off,
    struct sockaddr_in6 *fromsa)
{
	struct socket *so;
	struct mbuf *opts;
	struct udpcb *up;

	INP_LOCK_ASSERT(inp);

	/*
	 * Engage the tunneling protocol.
	 */
	up = intoudpcb(inp);
	if (up->u_tun_func != NULL) {
		in_pcbref(inp);
		INP_RUNLOCK(inp);
		(*up->u_tun_func)(n, off, inp, (struct sockaddr *)fromsa,
		    up->u_tun_ctx);
		INP_RLOCK(inp);
		return (in_pcbrele_rlocked(inp));
	}
#ifdef IPSEC
	/* Check AH/ESP integrity. */
	if (ipsec6_in_reject(n, inp)) {
		m_freem(n);
		return (0);
	}
#endif /* IPSEC */
#ifdef MAC
	if (mac_inpcb_check_deliver(inp, n) != 0) {
		m_freem(n);
		return (0);
	}
#endif
	opts = NULL;
	if (inp->inp_flags & INP_CONTROLOPTS ||
	    inp->inp_socket->so_options & SO_TIMESTAMP)
		ip6_savecontrol(inp, n, &opts);
	m_adj(n, off + sizeof(struct udphdr));

	so = inp->inp_socket;
	SOCKBUF_LOCK(&so->so_rcv);
	if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)fromsa, n,
	    opts) == 0) {
		SOCKBUF_UNLOCK(&so->so_rcv);
		m_freem(n);
		if (opts)
			m_freem(opts);
		UDPSTAT_INC(udps_fullsock);
	} else
		sorwakeup_locked(so);
	return (0);
}
Exemplo n.º 4
0
static inline struct mbuf *
sdp_sock_queue_rcv_mb(struct socket *sk, struct mbuf *mb)
{
	struct sdp_sock *ssk = sdp_sk(sk);
	struct sdp_bsdh *h;

	h = mtod(mb, struct sdp_bsdh *);

#ifdef SDP_ZCOPY
	SDP_SKB_CB(mb)->seq = rcv_nxt(ssk);
	if (h->mid == SDP_MID_SRCAVAIL) {
		struct sdp_srcah *srcah = (struct sdp_srcah *)(h+1);
		struct rx_srcavail_state *rx_sa;
		
		ssk->srcavail_cancel_mseq = 0;

		ssk->rx_sa = rx_sa = RX_SRCAVAIL_STATE(mb) = kzalloc(
				sizeof(struct rx_srcavail_state), M_NOWAIT);

		rx_sa->mseq = ntohl(h->mseq);
		rx_sa->used = 0;
		rx_sa->len = mb_len = ntohl(srcah->len);
		rx_sa->rkey = ntohl(srcah->rkey);
		rx_sa->vaddr = be64_to_cpu(srcah->vaddr);
		rx_sa->flags = 0;

		if (ssk->tx_sa) {
			sdp_dbg_data(ssk->socket, "got RX SrcAvail while waiting "
					"for TX SrcAvail. waking up TX SrcAvail"
					"to be aborted\n");
			wake_up(sk->sk_sleep);
		}

		atomic_add(mb->len, &ssk->rcv_nxt);
		sdp_dbg_data(sk, "queueing SrcAvail. mb_len = %d vaddr = %lld\n",
			mb_len, rx_sa->vaddr);
	} else
#endif
	{
		atomic_add(mb->m_pkthdr.len, &ssk->rcv_nxt);
	}

	m_adj(mb, SDP_HEAD_SIZE);
	SOCKBUF_LOCK(&sk->so_rcv);
	if (unlikely(h->flags & SDP_OOB_PRES))
		sdp_urg(ssk, mb);
	sbappend_locked(&sk->so_rcv, mb);
	sorwakeup_locked(sk);
	return mb;
}
Exemplo n.º 5
0
/*
 * Send a SeND message to user space, that was either received and has to be
 * validated or was about to be send out and has to be handled by the SEND
 * daemon adding SeND ICMPv6 options.
 */
static int
send_input(struct mbuf *m, struct ifnet *ifp, int direction, int msglen __unused)
{
	struct ip6_hdr *ip6;
	struct sockaddr_send sendsrc;

	SEND_LOCK();
	if (V_send_so == NULL) {
		SEND_UNLOCK();
		return (-1);
	}

	/*
	 * Make sure to clear any possible internally embedded scope before
	 * passing the packet to user space for SeND cryptographic signature
	 * validation to succeed.
	 */
	ip6 = mtod(m, struct ip6_hdr *);
	in6_clearscope(&ip6->ip6_src);
	in6_clearscope(&ip6->ip6_dst);

	bzero(&sendsrc, sizeof(sendsrc));
	sendsrc.send_len = sizeof(sendsrc);
	sendsrc.send_family = AF_INET6;
	sendsrc.send_direction = direction;
	sendsrc.send_ifidx = ifp->if_index;

	/*
	 * Send incoming or outgoing traffic to user space either to be
	 * protected (outgoing) or validated (incoming) according to rfc3971.
	 */
	SOCKBUF_LOCK(&V_send_so->so_rcv);
	if (sbappendaddr_locked(&V_send_so->so_rcv,
	    (struct sockaddr *)&sendsrc, m, NULL) == 0) {
		SOCKBUF_UNLOCK(&V_send_so->so_rcv);
		/* XXX stats. */
		m_freem(m);
	} else {
		sorwakeup_locked(V_send_so);
	}

	SEND_UNLOCK();
	return (0);
}
Exemplo n.º 6
0
/*
 * Subroutine of udp_input(), which appends the provided mbuf chain to the
 * passed pcb/socket.  The caller must provide a sockaddr_in via udp_in that
 * contains the source address.  If the socket ends up being an IPv6 socket,
 * udp_append() will convert to a sockaddr_in6 before passing the address
 * into the socket code.
 */
static void
udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
    struct sockaddr_in *udp_in)
{
	struct sockaddr *append_sa;
	struct socket *so;
	struct mbuf *opts = 0;
#ifdef INET6
	struct sockaddr_in6 udp_in6;
#endif
	struct udpcb *up;

	INP_LOCK_ASSERT(inp);

	/*
	 * Engage the tunneling protocol.
	 */
	up = intoudpcb(inp);
	if (up->u_tun_func != NULL) {
		(*up->u_tun_func)(n, off, inp);
		return;
	}

	if (n == NULL)
		return;

	off += sizeof(struct udphdr);

#ifdef IPSEC
	/* Check AH/ESP integrity. */
	if (ipsec4_in_reject(n, inp)) {
		m_freem(n);
		IPSECSTAT_INC(ips_in_polvio);
		return;
	}
#ifdef IPSEC_NAT_T
	up = intoudpcb(inp);
	KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
	if (up->u_flags & UF_ESPINUDP_ALL) {	/* IPSec UDP encaps. */
		n = udp4_espdecap(inp, n, off);
		if (n == NULL)				/* Consumed. */
			return;
	}
#endif /* IPSEC_NAT_T */
#endif /* IPSEC */
#ifdef MAC
	if (mac_inpcb_check_deliver(inp, n) != 0) {
		m_freem(n);
		return;
	}
#endif /* MAC */
	if (inp->inp_flags & INP_CONTROLOPTS ||
	    inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
#ifdef INET6
		if (inp->inp_vflag & INP_IPV6)
			(void)ip6_savecontrol_v4(inp, n, &opts, NULL);
		else
#endif /* INET6 */
			ip_savecontrol(inp, &opts, ip, n);
	}
#ifdef INET6
	if (inp->inp_vflag & INP_IPV6) {
		bzero(&udp_in6, sizeof(udp_in6));
		udp_in6.sin6_len = sizeof(udp_in6);
		udp_in6.sin6_family = AF_INET6;
		in6_sin_2_v4mapsin6(udp_in, &udp_in6);
		append_sa = (struct sockaddr *)&udp_in6;
	} else
#endif /* INET6 */
		append_sa = (struct sockaddr *)udp_in;
	m_adj(n, off);

	so = inp->inp_socket;
	SOCKBUF_LOCK(&so->so_rcv);
	if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
		SOCKBUF_UNLOCK(&so->so_rcv);
		m_freem(n);
		if (opts)
			m_freem(opts);
		UDPSTAT_INC(udps_fullsock);
	} else
		sorwakeup_locked(so);
}
Exemplo n.º 7
0
static int
handle_ddp_data(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, int len)
{
    uint32_t report = be32toh(ddp_report);
    unsigned int db_flag;
    struct inpcb *inp = toep->inp;
    struct tcpcb *tp;
    struct socket *so;
    struct sockbuf *sb;
    struct mbuf *m;

    db_flag = report & F_DDP_BUF_IDX ? DDP_BUF1_ACTIVE : DDP_BUF0_ACTIVE;

    if (__predict_false(!(report & F_DDP_INV)))
        CXGBE_UNIMPLEMENTED("DDP buffer still valid");

    INP_WLOCK(inp);
    so = inp_inpcbtosocket(inp);
    sb = &so->so_rcv;
    if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) {

        /*
         * XXX: think a bit more.
         * tcpcb probably gone, but socket should still be around
         * because we always wait for DDP completion in soreceive no
         * matter what.  Just wake it up and let it clean up.
         */

        CTR5(KTR_CXGBE, "%s: tid %u, seq 0x%x, len %d, inp_flags 0x%x",
             __func__, toep->tid, be32toh(rcv_nxt), len, inp->inp_flags);
        SOCKBUF_LOCK(sb);
        goto wakeup;
    }

    tp = intotcpcb(inp);
    len += be32toh(rcv_nxt) - tp->rcv_nxt;
    tp->rcv_nxt += len;
    tp->t_rcvtime = ticks;
#ifndef USE_DDP_RX_FLOW_CONTROL
    KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
    tp->rcv_wnd -= len;
#endif
    m = get_ddp_mbuf(len);

    SOCKBUF_LOCK(sb);
    if (report & F_DDP_BUF_COMPLETE)
        toep->ddp_score = DDP_HIGH_SCORE;
    else
        discourage_ddp(toep);

    /* receive buffer autosize */
    if (sb->sb_flags & SB_AUTOSIZE &&
            V_tcp_do_autorcvbuf &&
            sb->sb_hiwat < V_tcp_autorcvbuf_max &&
            len > (sbspace(sb) / 8 * 7)) {
        unsigned int hiwat = sb->sb_hiwat;
        unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
                                   V_tcp_autorcvbuf_max);

        if (!sbreserve_locked(sb, newsize, so, NULL))
            sb->sb_flags &= ~SB_AUTOSIZE;
        else
            toep->rx_credits += newsize - hiwat;
    }

    KASSERT(toep->sb_cc >= sbused(sb),
            ("%s: sb %p has more data (%d) than last time (%d).",
             __func__, sb, sbused(sb), toep->sb_cc));
    toep->rx_credits += toep->sb_cc - sbused(sb);
#ifdef USE_DDP_RX_FLOW_CONTROL
    toep->rx_credits -= len;	/* adjust for F_RX_FC_DDP */
#endif
    sbappendstream_locked(sb, m, 0);
    toep->sb_cc = sbused(sb);
wakeup:
    KASSERT(toep->ddp_flags & db_flag,
            ("%s: DDP buffer not active. toep %p, ddp_flags 0x%x, report 0x%x",
             __func__, toep, toep->ddp_flags, report));
    toep->ddp_flags &= ~db_flag;
    sorwakeup_locked(so);
    SOCKBUF_UNLOCK_ASSERT(sb);

    INP_WUNLOCK(inp);
    return (0);
}
Exemplo n.º 8
0
int
tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
{
	struct socket *so = tp->t_inpcb->inp_socket;
	struct mbuf *mq, *mp;
	int flags, wakeup;

	INP_WLOCK_ASSERT(tp->t_inpcb);

	/*
	 * XXX: tcp_reass() is rather inefficient with its data structures
	 * and should be rewritten (see NetBSD for optimizations).
	 */

	/*
	 * Call with th==NULL after become established to
	 * force pre-ESTABLISHED data up to user socket.
	 */
	if (th == NULL)
		goto present;

	M_ASSERTPKTHDR(m);
	KASSERT(*tlenp == m->m_pkthdr.len, ("%s: tlenp %u len %u", __func__,
	    *tlenp, m->m_pkthdr.len));

	/*
	 * Limit the number of segments that can be queued to reduce the
	 * potential for mbuf exhaustion. For best performance, we want to be
	 * able to queue a full window's worth of segments. The size of the
	 * socket receive buffer determines our advertised window and grows
	 * automatically when socket buffer autotuning is enabled. Use it as the
	 * basis for our queue limit.
	 * Always let the missing segment through which caused this queue.
	 * NB: Access to the socket buffer is left intentionally unlocked as we
	 * can tolerate stale information here.
	 */
	if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) &&
	    tp->t_segqlen + m->m_pkthdr.len >= sbspace(&so->so_rcv)) {
		char *s;

		TCPSTAT_INC(tcps_rcvreassfull);
		*tlenp = 0;
		if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL,
		    NULL))) {
			log(LOG_DEBUG, "%s; %s: queue limit reached, "
			    "segment dropped\n", s, __func__);
			free(s, M_TCPLOG);
		}
		m_freem(m);
		return (0);
	}

	/*
	 * Find a segment which begins after this one does.
	 */
	mp = NULL;
	for (mq = tp->t_segq; mq != NULL; mq = mq->m_nextpkt) {
		if (SEQ_GT(M_TCPHDR(mq)->th_seq, th->th_seq))
			break;
		mp = mq;
	}

	/*
	 * If there is a preceding segment, it may provide some of
	 * our data already.  If so, drop the data from the incoming
	 * segment.  If it provides all of our data, drop us.
	 */
	if (mp != NULL) {
		int i;

		/* conversion to int (in i) handles seq wraparound */
		i = M_TCPHDR(mp)->th_seq + mp->m_pkthdr.len - th->th_seq;
		if (i > 0) {
			if (i >= *tlenp) {
				TCPSTAT_INC(tcps_rcvduppack);
				TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp);
				m_freem(m);
				/*
				 * Try to present any queued data
				 * at the left window edge to the user.
				 * This is needed after the 3-WHS
				 * completes.
				 */
				goto present;	/* ??? */
			}
			m_adj(m, i);
			*tlenp -= i;
			th->th_seq += i;
		}
	}
	tp->t_rcvoopack++;
	TCPSTAT_INC(tcps_rcvoopack);
	TCPSTAT_ADD(tcps_rcvoobyte, *tlenp);

	/*
	 * While we overlap succeeding segments trim them or,
	 * if they are completely covered, dequeue them.
	 */
	while (mq) {
		struct mbuf *nq;
		int i;

		i = (th->th_seq + *tlenp) - M_TCPHDR(mq)->th_seq;
		if (i <= 0)
			break;
		if (i < mq->m_pkthdr.len) {
			M_TCPHDR(mq)->th_seq += i;
			m_adj(mq, i);
			tp->t_segqlen -= i;
			break;
		}

		nq = mq->m_nextpkt;
		tp->t_segqlen -= mq->m_pkthdr.len;
		m_freem(mq);
		if (mp)
			mp->m_nextpkt = nq;
		else
			tp->t_segq = nq;
		mq = nq;
	}

	/*
	 * Insert the new segment queue entry into place.  Try to collapse
	 * mbuf chains if segments are adjacent.
	 */
	if (mp) {
		if (M_TCPHDR(mp)->th_seq + mp->m_pkthdr.len == th->th_seq)
			m_catpkt(mp, m);
		else {
			m->m_nextpkt = mp->m_nextpkt;
			mp->m_nextpkt = m;
			m->m_pkthdr.pkt_tcphdr = th;
		}
	} else {
		mq = tp->t_segq;
		tp->t_segq = m;
		if (mq && th->th_seq + *tlenp == M_TCPHDR(mq)->th_seq) {
			m->m_nextpkt = mq->m_nextpkt;
			mq->m_nextpkt = NULL;
			m_catpkt(m, mq);
		} else
			m->m_nextpkt = mq;
		m->m_pkthdr.pkt_tcphdr = th;
	}
	tp->t_segqlen += *tlenp;

present:
	/*
	 * Present data to user, advancing rcv_nxt through
	 * completed sequence space.
	 */
	if (!TCPS_HAVEESTABLISHED(tp->t_state))
		return (0);

	flags = 0;
	wakeup = 0;
	SOCKBUF_LOCK(&so->so_rcv);
	while ((mq = tp->t_segq) != NULL &&
	    M_TCPHDR(mq)->th_seq == tp->rcv_nxt) {
		tp->t_segq = mq->m_nextpkt;

		tp->rcv_nxt += mq->m_pkthdr.len;
		tp->t_segqlen -= mq->m_pkthdr.len;
		flags = M_TCPHDR(mq)->th_flags & TH_FIN;

		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
			m_freem(mq);
		else {
			mq->m_nextpkt = NULL;
			sbappendstream_locked(&so->so_rcv, mq, 0);
			wakeup = 1;
		}
	}
	ND6_HINT(tp);
	if (wakeup)
		sorwakeup_locked(so);
	else
		SOCKBUF_UNLOCK(&so->so_rcv);
	return (flags);
}
Exemplo n.º 9
0
static int
do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
	struct adapter *sc = iq->adapter;
	const struct cpl_rx_data *cpl = mtod(m, const void *);
	unsigned int tid = GET_TID(cpl);
	struct toepcb *toep = lookup_tid(sc, tid);
	struct inpcb *inp = toep->inp;
	struct tcpcb *tp;
	struct socket *so;
	struct sockbuf *sb;
	int len;
	uint32_t ddp_placed = 0;

	if (__predict_false(toep->flags & TPF_SYNQE)) {
#ifdef INVARIANTS
		struct synq_entry *synqe = (void *)toep;

		INP_WLOCK(synqe->lctx->inp);
		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
			    ("%s: listen socket closed but tid %u not aborted.",
			    __func__, tid));
		} else {
			/*
			 * do_pass_accept_req is still running and will
			 * eventually take care of this tid.
			 */
		}
		INP_WUNLOCK(synqe->lctx->inp);
#endif
		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
		    toep, toep->flags);
		m_freem(m);
		return (0);
	}

	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));

	/* strip off CPL header */
	m_adj(m, sizeof(*cpl));
	len = m->m_pkthdr.len;

	INP_WLOCK(inp);
	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
		    __func__, tid, len, inp->inp_flags);
		INP_WUNLOCK(inp);
		m_freem(m);
		return (0);
	}

	tp = intotcpcb(inp);

	if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq)))
		ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt;

	tp->rcv_nxt += len;
	KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
	tp->rcv_wnd -= len;
	tp->t_rcvtime = ticks;

	so = inp_inpcbtosocket(inp);
	sb = &so->so_rcv;
	SOCKBUF_LOCK(sb);

	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
		CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
		    __func__, tid, len);
		m_freem(m);
		SOCKBUF_UNLOCK(sb);
		INP_WUNLOCK(inp);

		INP_INFO_WLOCK(&V_tcbinfo);
		INP_WLOCK(inp);
		tp = tcp_drop(tp, ECONNRESET);
		if (tp)
			INP_WUNLOCK(inp);
		INP_INFO_WUNLOCK(&V_tcbinfo);

		return (0);
	}

	/* receive buffer autosize */
	if (sb->sb_flags & SB_AUTOSIZE &&
	    V_tcp_do_autorcvbuf &&
	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
	    len > (sbspace(sb) / 8 * 7)) {
		unsigned int hiwat = sb->sb_hiwat;
		unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
		    V_tcp_autorcvbuf_max);

		if (!sbreserve_locked(sb, newsize, so, NULL))
			sb->sb_flags &= ~SB_AUTOSIZE;
		else
			toep->rx_credits += newsize - hiwat;
	}

	if (toep->ulp_mode == ULP_MODE_TCPDDP) {
		int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off;

		if (changed) {
			if (toep->ddp_flags & DDP_SC_REQ)
				toep->ddp_flags ^= DDP_ON | DDP_SC_REQ;
			else {
				KASSERT(cpl->ddp_off == 1,
				    ("%s: DDP switched on by itself.",
				    __func__));

				/* Fell out of DDP mode */
				toep->ddp_flags &= ~(DDP_ON | DDP_BUF0_ACTIVE |
				    DDP_BUF1_ACTIVE);

				if (ddp_placed)
					insert_ddp_data(toep, ddp_placed);
			}
		}

		if ((toep->ddp_flags & DDP_OK) == 0 &&
		    time_uptime >= toep->ddp_disabled + DDP_RETRY_WAIT) {
			toep->ddp_score = DDP_LOW_SCORE;
			toep->ddp_flags |= DDP_OK;
			CTR3(KTR_CXGBE, "%s: tid %u DDP_OK @ %u",
			    __func__, tid, time_uptime);
		}

		if (toep->ddp_flags & DDP_ON) {

			/*
			 * CPL_RX_DATA with DDP on can only be an indicate.  Ask
			 * soreceive to post a buffer or disable DDP.  The
			 * payload that arrived in this indicate is appended to
			 * the socket buffer as usual.
			 */

#if 0
			CTR5(KTR_CXGBE,
			    "%s: tid %u (0x%x) DDP indicate (seq 0x%x, len %d)",
			    __func__, tid, toep->flags, be32toh(cpl->seq), len);
#endif
			sb->sb_flags |= SB_DDP_INDICATE;
		} else if ((toep->ddp_flags & (DDP_OK|DDP_SC_REQ)) == DDP_OK &&
		    tp->rcv_wnd > DDP_RSVD_WIN && len >= sc->tt.ddp_thres) {

			/*
			 * DDP allowed but isn't on (and a request to switch it
			 * on isn't pending either), and conditions are ripe for
			 * it to work.  Switch it on.
			 */

			enable_ddp(sc, toep);
		}
	}

	KASSERT(toep->sb_cc >= sb->sb_cc,
	    ("%s: sb %p has more data (%d) than last time (%d).",
	    __func__, sb, sb->sb_cc, toep->sb_cc));
	toep->rx_credits += toep->sb_cc - sb->sb_cc;
	sbappendstream_locked(sb, m);
	toep->sb_cc = sb->sb_cc;
	sorwakeup_locked(so);
	SOCKBUF_UNLOCK_ASSERT(sb);

	INP_WUNLOCK(inp);
	return (0);
}
Exemplo n.º 10
0
static int
do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
	struct adapter *sc = iq->adapter;
	const struct cpl_rx_data *cpl = mtod(m, const void *);
	unsigned int tid = GET_TID(cpl);
	struct toepcb *toep = lookup_tid(sc, tid);
	struct inpcb *inp = toep->inp;
	struct tcpcb *tp;
	struct socket *so;
	struct sockbuf *sb;
	int len;
	uint32_t ddp_placed = 0;

	if (__predict_false(toep->flags & TPF_SYNQE)) {
#ifdef INVARIANTS
		struct synq_entry *synqe = (void *)toep;

		INP_WLOCK(synqe->lctx->inp);
		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
			    ("%s: listen socket closed but tid %u not aborted.",
			    __func__, tid));
		} else {
			/*
			 * do_pass_accept_req is still running and will
			 * eventually take care of this tid.
			 */
		}
		INP_WUNLOCK(synqe->lctx->inp);
#endif
		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
		    toep, toep->flags);
		m_freem(m);
		return (0);
	}

	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));

	/* strip off CPL header */
	m_adj(m, sizeof(*cpl));
	len = m->m_pkthdr.len;

	INP_WLOCK(inp);
	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
		    __func__, tid, len, inp->inp_flags);
		INP_WUNLOCK(inp);
		m_freem(m);
		return (0);
	}

	tp = intotcpcb(inp);

	if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq)))
		ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt;

	tp->rcv_nxt += len;
	if (tp->rcv_wnd < len) {
		KASSERT(toep->ulp_mode == ULP_MODE_RDMA,
				("%s: negative window size", __func__));
	}

	tp->rcv_wnd -= len;
	tp->t_rcvtime = ticks;

	if (toep->ulp_mode == ULP_MODE_TCPDDP)
		DDP_LOCK(toep);
	so = inp_inpcbtosocket(inp);
	sb = &so->so_rcv;
	SOCKBUF_LOCK(sb);

	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
		CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
		    __func__, tid, len);
		m_freem(m);
		SOCKBUF_UNLOCK(sb);
		if (toep->ulp_mode == ULP_MODE_TCPDDP)
			DDP_UNLOCK(toep);
		INP_WUNLOCK(inp);

		INP_INFO_RLOCK(&V_tcbinfo);
		INP_WLOCK(inp);
		tp = tcp_drop(tp, ECONNRESET);
		if (tp)
			INP_WUNLOCK(inp);
		INP_INFO_RUNLOCK(&V_tcbinfo);

		return (0);
	}

	/* receive buffer autosize */
	CURVNET_SET(so->so_vnet);
	if (sb->sb_flags & SB_AUTOSIZE &&
	    V_tcp_do_autorcvbuf &&
	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
	    len > (sbspace(sb) / 8 * 7)) {
		unsigned int hiwat = sb->sb_hiwat;
		unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
		    V_tcp_autorcvbuf_max);

		if (!sbreserve_locked(sb, newsize, so, NULL))
			sb->sb_flags &= ~SB_AUTOSIZE;
		else
			toep->rx_credits += newsize - hiwat;
	}

	if (toep->ddp_waiting_count != 0 || toep->ddp_active_count != 0)
		CTR3(KTR_CXGBE, "%s: tid %u, non-ddp rx (%d bytes)", __func__,
		    tid, len);

	if (toep->ulp_mode == ULP_MODE_TCPDDP) {
		int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off;

		if (changed) {
			if (toep->ddp_flags & DDP_SC_REQ)
				toep->ddp_flags ^= DDP_ON | DDP_SC_REQ;
			else {
				KASSERT(cpl->ddp_off == 1,
				    ("%s: DDP switched on by itself.",
				    __func__));

				/* Fell out of DDP mode */
				toep->ddp_flags &= ~DDP_ON;
				CTR1(KTR_CXGBE, "%s: fell out of DDP mode",
				    __func__);

				insert_ddp_data(toep, ddp_placed);
			}
		}

		if (toep->ddp_flags & DDP_ON) {
			/*
			 * CPL_RX_DATA with DDP on can only be an indicate.
			 * Start posting queued AIO requests via DDP.  The
			 * payload that arrived in this indicate is appended
			 * to the socket buffer as usual.
			 */
			handle_ddp_indicate(toep);
		}
	}

	KASSERT(toep->sb_cc >= sbused(sb),
	    ("%s: sb %p has more data (%d) than last time (%d).",
	    __func__, sb, sbused(sb), toep->sb_cc));
	toep->rx_credits += toep->sb_cc - sbused(sb);
	sbappendstream_locked(sb, m, 0);
	toep->sb_cc = sbused(sb);
	if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) {
		int credits;

		credits = send_rx_credits(sc, toep, toep->rx_credits);
		toep->rx_credits -= credits;
		tp->rcv_wnd += credits;
		tp->rcv_adv += credits;
	}

	if (toep->ddp_waiting_count > 0 && sbavail(sb) != 0) {
		CTR2(KTR_CXGBE, "%s: tid %u queueing AIO task", __func__,
		    tid);
		ddp_queue_toep(toep);
	}
	sorwakeup_locked(so);
	SOCKBUF_UNLOCK_ASSERT(sb);
	if (toep->ulp_mode == ULP_MODE_TCPDDP)
		DDP_UNLOCK(toep);

	INP_WUNLOCK(inp);
	CURVNET_RESTORE();
	return (0);
}