Пример #1
0
/*
 * This fuction measures the RTT of a particular segment/ack pair, or the next
 * closest if this will yield an inaccurate result due to delayed acking or
 * other issues.
 */
static void inline
marked_packet_rtt(struct txseginfo *txsi, struct ertt *e_t, struct tcpcb *tp,
    uint32_t *pmeasurenext, int *pmeasurenext_len, int *prtt_bytes_adjust,
    int mflag)
{

	/*
	 * If we can't measure this one properly due to delayed acking adjust
	 * byte counters and flag to measure next txsi. Note that since the
	 * marked packet's transmitted bytes are measured we need to subtract the
	 * transmitted bytes. Then pretend the next txsi was marked.
	 */
	if (mflag & (MULTI_ACK|OLD_TXSI)) {
		*pmeasurenext = txsi->tx_ts;
		*pmeasurenext_len = txsi->len;
		*prtt_bytes_adjust += *pmeasurenext_len;
	} else {
		if (mflag & FORCED_MEASUREMENT) {
			e_t->markedpkt_rtt = tcp_ts_getticks() -
			    *pmeasurenext + 1;
			e_t->bytes_tx_in_marked_rtt = e_t->bytes_tx_in_rtt +
			    *pmeasurenext_len - *prtt_bytes_adjust;
		} else {
			e_t->markedpkt_rtt = tcp_ts_getticks() -
			    txsi->tx_ts + 1;
			e_t->bytes_tx_in_marked_rtt = e_t->bytes_tx_in_rtt -
			    *prtt_bytes_adjust;
		}
		e_t->marked_snd_cwnd = tp->snd_cwnd;

		/*
		 * Reset the ERTT_MEASUREMENT_IN_PROGRESS flag to indicate to
		 * add_tx_segment_info that a new measurement should be started.
		 */
		e_t->flags &= ~ERTT_MEASUREMENT_IN_PROGRESS;
		/*
		 * Set ERTT_NEW_MEASUREMENT to tell the congestion control
		 * algorithm that a new marked RTT measurement has has been made
		 * and is available for use.
		 */
		e_t->flags |= ERTT_NEW_MEASUREMENT;

		if (tp->t_flags & TF_TSO) {
			/* Temporarily disable TSO to aid a new measurment. */
			tp->t_flags &= ~TF_TSO;
			/* Keep track that we've disabled it. */
			e_t->flags |= ERTT_TSO_DISABLED;
		}
	}
}
Пример #2
0
/*
 * Called when a connection is established to translate the TCP options
 * reported by HW to FreeBSD's native format.
 */
static void
assign_rxopt(struct tcpcb *tp, unsigned int opt)
{
	struct toepcb *toep = tp->t_toe;
	struct adapter *sc = td_adapter(toep->td);

	INP_LOCK_ASSERT(tp->t_inpcb);

	tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(opt)] - 40;

	if (G_TCPOPT_TSTAMP(opt)) {
		tp->t_flags |= TF_RCVD_TSTMP;	/* timestamps ok */
		tp->ts_recent = 0;		/* hmmm */
		tp->ts_recent_age = tcp_ts_getticks();
		tp->t_maxseg -= TCPOLEN_TSTAMP_APPA;
	}

	if (G_TCPOPT_SACK(opt))
		tp->t_flags |= TF_SACK_PERMIT;	/* should already be set */
	else
		tp->t_flags &= ~TF_SACK_PERMIT;	/* sack disallowed by peer */

	if (G_TCPOPT_WSCALE_OK(opt))
		tp->t_flags |= TF_RCVD_SCALE;

	/* Doing window scaling? */
	if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
	    (TF_RCVD_SCALE | TF_REQ_SCALE)) {
		tp->rcv_scale = tp->request_r_scale;
		tp->snd_scale = G_TCPOPT_SND_WSCALE(opt);
	}
}
Пример #3
0
/*
 * Called when a connection is established to translate the TCP options
 * reported by HW to FreeBSD's native format.
 */
static void
assign_rxopt(struct tcpcb *tp, unsigned int opt)
{
	struct toepcb *toep = tp->t_toe;
	struct inpcb *inp = tp->t_inpcb;
	struct adapter *sc = td_adapter(toep->td);
	int n;

	INP_LOCK_ASSERT(inp);

	if (inp->inp_inc.inc_flags & INC_ISIPV6)
		n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
	else
		n = sizeof(struct ip) + sizeof(struct tcphdr);
	tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(opt)] - n;

	CTR4(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u)", __func__, toep->tid,
	    G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)]);

	if (G_TCPOPT_TSTAMP(opt)) {
		tp->t_flags |= TF_RCVD_TSTMP;	/* timestamps ok */
		tp->ts_recent = 0;		/* hmmm */
		tp->ts_recent_age = tcp_ts_getticks();
		tp->t_maxseg -= TCPOLEN_TSTAMP_APPA;
	}

	if (G_TCPOPT_SACK(opt))
		tp->t_flags |= TF_SACK_PERMIT;	/* should already be set */
	else
		tp->t_flags &= ~TF_SACK_PERMIT;	/* sack disallowed by peer */

	if (G_TCPOPT_WSCALE_OK(opt))
		tp->t_flags |= TF_RCVD_SCALE;

	/* Doing window scaling? */
	if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
	    (TF_RCVD_SCALE | TF_REQ_SCALE)) {
		tp->rcv_scale = tp->request_r_scale;
		tp->snd_scale = G_TCPOPT_SND_WSCALE(opt);
	}
}
Пример #4
0
/*
 * Add information about a transmitted segment to a list.
 * This is called via the helper hook in tcp_output.c
 */
static int
ertt_add_tx_segment_info_hook(int hhook_type, int hhook_id, void *udata,
    void *ctx_data, void *hdata, struct osd *hosd)
{
	struct ertt *e_t;
	struct tcpcb *tp;
	struct tcphdr *th;
	struct tcpopt *to;
	struct tcp_hhook_data *thdp;
	struct txseginfo *txsi;
	uint32_t len;
	int tso;

	KASSERT(ctx_data != NULL, ("%s: ctx_data is NULL!", __func__));
	KASSERT(hdata != NULL, ("%s: hdata is NULL!", __func__));

	e_t = (struct ertt *)hdata;
	thdp = ctx_data;
	tp = thdp->tp;
	th = thdp->th;
	to = thdp->to;
	len = thdp->len;
	tso = thdp->tso;

	INP_WLOCK_ASSERT(tp->t_inpcb);

	if (len > 0) {
		txsi = uma_zalloc(txseginfo_zone, M_NOWAIT);
		if (txsi != NULL) {
			/* Construct txsi setting the necessary flags. */
			txsi->flags = 0; /* Needs to be initialised. */
			txsi->seq = ntohl(th->th_seq);
			txsi->len = len;
			if (tso)
				txsi->flags |= TXSI_TSO;
			else if (e_t->flags & ERTT_TSO_DISABLED) {
				tp->t_flags |= TF_TSO;
				e_t->flags &= ~ERTT_TSO_DISABLED;
			}

			if (e_t->flags & ERTT_MEASUREMENT_IN_PROGRESS) {
				e_t->bytes_tx_in_rtt += len;
			} else {
				txsi->flags |= TXSI_RTT_MEASURE_START;
				e_t->flags |= ERTT_MEASUREMENT_IN_PROGRESS;
				e_t->bytes_tx_in_rtt = len;
			}

			if (((tp->t_flags & TF_NOOPT) == 0) &&
			    (to->to_flags & TOF_TS)) {
				txsi->tx_ts = ntohl(to->to_tsval) -
				    tp->ts_offset;
				txsi->rx_ts = ntohl(to->to_tsecr);
			} else {
				txsi->tx_ts = tcp_ts_getticks();
				txsi->rx_ts = 0; /* No received time stamp. */
			}
			TAILQ_INSERT_TAIL(&e_t->txsegi_q, txsi, txsegi_lnk);
		}
	}

	return (0);
}
Пример #5
0
/*
 * Ertt_packet_measurements uses a small amount of state kept on each packet
 * sent to match incoming acknowledgements. This enables more accurate and
 * secure round trip time measurements. The resulting measurement is used for
 * congestion control algorithms which require a more accurate time.
 * Ertt_packet_measurements is called via the helper hook in tcp_input.c
 */
static int
ertt_packet_measurement_hook(int hhook_type, int hhook_id, void *udata,
    void *ctx_data, void *hdata, struct osd *hosd)
{
	struct ertt *e_t;
	struct tcpcb *tp;
	struct tcphdr *th;
	struct tcpopt *to;
	struct tcp_hhook_data *thdp;
	struct txseginfo *txsi;
	int acked, measurenext_len, multiack, new_sacked_bytes, rtt_bytes_adjust;
	uint32_t measurenext, rts;
	tcp_seq ack;

	KASSERT(ctx_data != NULL, ("%s: ctx_data is NULL!", __func__));
	KASSERT(hdata != NULL, ("%s: hdata is NULL!", __func__));

	e_t = (struct ertt *)hdata;
	thdp = ctx_data;
	tp = thdp->tp;
	th = thdp->th;
	to = thdp->to;
	new_sacked_bytes = (tp->sackhint.last_sack_ack != 0);
	measurenext = measurenext_len = multiack = rts = rtt_bytes_adjust = 0;
	acked = th->th_ack - tp->snd_una;

	INP_WLOCK_ASSERT(tp->t_inpcb);

	/* Packet has provided new acknowledgements. */
	if (acked > 0 || new_sacked_bytes) {
		if (acked == 0 && new_sacked_bytes) {
			/* Use last sacked data. */
			ack = tp->sackhint.last_sack_ack;
		} else
			ack = th->th_ack;

		txsi = TAILQ_FIRST(&e_t->txsegi_q);
		while (txsi != NULL) {
			rts = 0;

			/* Acknowledgement is acking more than this txsi. */
			if (SEQ_GT(ack, txsi->seq + txsi->len)) {
				if (txsi->flags & TXSI_RTT_MEASURE_START ||
				    measurenext) {
					marked_packet_rtt(txsi, e_t, tp,
					    &measurenext, &measurenext_len,
					    &rtt_bytes_adjust, MULTI_ACK);
				}
				TAILQ_REMOVE(&e_t->txsegi_q, txsi, txsegi_lnk);
				uma_zfree(txseginfo_zone, txsi);
				txsi = TAILQ_FIRST(&e_t->txsegi_q);
				continue;
			}

			/*
			 * Guess if delayed acks are being used by the receiver.
			 *
			 * XXXDH: A simple heuristic that could be improved
			 */
			if (!new_sacked_bytes) {
				if (acked > tp->t_maxseg) {
					e_t->dlyack_rx +=
					    (e_t->dlyack_rx < DLYACK_SMOOTH) ?
					    1 : 0;
					multiack = 1;
				} else if (acked > txsi->len) {
					multiack = 1;
					e_t->dlyack_rx +=
					    (e_t->dlyack_rx < DLYACK_SMOOTH) ?
					    1 : 0;
				} else if (acked == tp->t_maxseg ||
					   acked == txsi->len) {
					e_t->dlyack_rx -=
					    (e_t->dlyack_rx > 0) ? 1 : 0;
				}
				/* Otherwise leave dlyack_rx the way it was. */
			}

			/*
			 * Time stamps are only to help match the txsi with the
			 * received acknowledgements.
			 */
			if (e_t->timestamp_errors < MAX_TS_ERR &&
			    (to->to_flags & TOF_TS) != 0 && to->to_tsecr) {
				/*
				 * Note: All packets sent with the offload will
				 * have the same time stamp. If we are sending
				 * on a fast interface and the t_maxseg is much
				 * smaller than one tick, this will be fine. The
				 * time stamp would be the same whether we were
				 * using tso or not. However, if the interface
				 * is slow, this will cause problems with the
				 * calculations. If the interface is slow, there
				 * is not reason to be using tso, and it should
				 * be turned off.
				 */
				/*
				 * If there are too many time stamp errors, time
				 * stamps won't be trusted
				 */
				rts = to->to_tsecr;
				/* Before this packet. */
				if (!e_t->dlyack_rx && TSTMP_LT(rts, txsi->tx_ts))
					/* When delayed acking is used, the
					 * reflected time stamp is of the first
					 * packet and thus may be before
					 * txsi->tx_ts.
					 */
					break;
				if (TSTMP_GT(rts, txsi->tx_ts)) {
					/*
					 * If reflected time stamp is later than
					 * tx_tsi, then this txsi is old.
					 */
					if (txsi->flags & TXSI_RTT_MEASURE_START
					    || measurenext) {
						marked_packet_rtt(txsi, e_t, tp,
						    &measurenext, &measurenext_len,
						    &rtt_bytes_adjust, OLD_TXSI);
					}
					TAILQ_REMOVE(&e_t->txsegi_q, txsi,
					    txsegi_lnk);
					uma_zfree(txseginfo_zone, txsi);
					txsi = TAILQ_FIRST(&e_t->txsegi_q);
					continue;
				}
				if (rts == txsi->tx_ts &&
				    TSTMP_LT(to->to_tsval, txsi->rx_ts)) {
					/*
					 * Segment received before sent!
					 * Something is wrong with the received
					 * timestamps so increment errors. If
					 * this keeps up we will ignore
					 * timestamps.
					 */
					e_t->timestamp_errors++;
				}
			}
			/*
			 * Acknowledging a sequence number before this txsi.
			 * If it is an old txsi that may have had the same seq
			 * numbers, it should have been removed if time stamps
			 * are being used.
			 */
			if (SEQ_LEQ(ack, txsi->seq))
				break; /* Before first packet in txsi. */

			/*
			 * Only ack > txsi->seq and ack <= txsi->seq+txsi->len
			 * past this point.
			 *
			 * If delayed acks are being used, an acknowledgement
			 * for a single segment will have been delayed by the
			 * receiver and will yield an inaccurate measurement. In
			 * this case, we only make the measurement if more than
			 * one segment is being acknowledged or sack is
			 * currently being used.
			 */
			if (!e_t->dlyack_rx || multiack || new_sacked_bytes) {
				/* Make an accurate new measurement. */
				e_t->rtt = tcp_ts_getticks() - txsi->tx_ts + 1;

				if (e_t->rtt < e_t->minrtt || e_t->minrtt == 0)
					e_t->minrtt = e_t->rtt;

				if (e_t->rtt > e_t->maxrtt || e_t->maxrtt == 0)
					e_t->maxrtt = e_t->rtt;
			}

			if (txsi->flags & TXSI_RTT_MEASURE_START || measurenext)
				marked_packet_rtt(txsi, e_t, tp,
				    &measurenext, &measurenext_len,
				    &rtt_bytes_adjust, CORRECT_ACK);

			if (txsi->flags & TXSI_TSO) {
				if (txsi->len > acked) {
					txsi->len -= acked;
					/*
					 * This presumes ack for first bytes in
					 * txsi, this may not be true but it
					 * shouldn't cause problems for the
					 * timing.
					 *
					 * We remeasure RTT even though we only
					 * have a single txsi. The rationale
					 * behind this is that it is better to
					 * have a slightly inaccurate
					 * measurement than no additional
					 * measurement for the rest of the bulk
					 * transfer. Since TSO is only used on
					 * high speed interface cards, so the
					 * packets should be transmitted at line
					 * rate back to back with little
					 * difference in transmission times (in
					 * ticks).
					 */
					txsi->seq += acked;
					/*
					 * Reset txsi measure flag so we don't
					 * use it for another RTT measurement.
					 */
					txsi->flags &= ~TXSI_RTT_MEASURE_START;
					/*
					 * There is still more data to be acked
					 * from tso bulk transmission, so we
					 * won't remove it from the TAILQ yet.
					 */
					break;
				}
				txsi->len = 0;
			}

			TAILQ_REMOVE(&e_t->txsegi_q, txsi, txsegi_lnk);
			uma_zfree(txseginfo_zone, txsi);
			break;
		}

		if (measurenext) {
			/*
			 * We need to do a RTT measurement. It won't be the best
			 * if we do it here.
			 */
			marked_packet_rtt(txsi, e_t, tp,
			    &measurenext, &measurenext_len,
			    &rtt_bytes_adjust, FORCED_MEASUREMENT);
		}
	}

	return (0);
}
Пример #6
0
static int
do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
	struct adapter *sc = iq->adapter;
	const struct cpl_fw4_ack *cpl = (const void *)(rss + 1);
	unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl)));
	struct toepcb *toep = lookup_tid(sc, tid);
	struct inpcb *inp;
	struct tcpcb *tp;
	struct socket *so;
	uint8_t credits = cpl->credits;
	struct ofld_tx_sdesc *txsd;
	int plen;
#ifdef INVARIANTS
	unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl)));
#endif

	/*
	 * Very unusual case: we'd sent a flowc + abort_req for a synq entry and
	 * now this comes back carrying the credits for the flowc.
	 */
	if (__predict_false(toep->flags & TPF_SYNQE)) {
		KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
		    ("%s: credits for a synq entry %p", __func__, toep));
		return (0);
	}

	inp = toep->inp;

	KASSERT(opcode == CPL_FW4_ACK,
	    ("%s: unexpected opcode 0x%x", __func__, opcode));
	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));

	INP_WLOCK(inp);

	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) {
		INP_WUNLOCK(inp);
		return (0);
	}

	KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0,
	    ("%s: inp_flags 0x%x", __func__, inp->inp_flags));

	tp = intotcpcb(inp);

	if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) {
		tcp_seq snd_una = be32toh(cpl->snd_una);

#ifdef INVARIANTS
		if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
			log(LOG_ERR,
			    "%s: unexpected seq# %x for TID %u, snd_una %x\n",
			    __func__, snd_una, toep->tid, tp->snd_una);
		}
#endif

		if (tp->snd_una != snd_una) {
			tp->snd_una = snd_una;
			tp->ts_recent_age = tcp_ts_getticks();
		}
	}

	so = inp->inp_socket;
	txsd = &toep->txsd[toep->txsd_cidx];
	plen = 0;
	while (credits) {
		KASSERT(credits >= txsd->tx_credits,
		    ("%s: too many (or partial) credits", __func__));
		credits -= txsd->tx_credits;
		toep->tx_credits += txsd->tx_credits;
		plen += txsd->plen;
		txsd++;
		toep->txsd_avail++;
		KASSERT(toep->txsd_avail <= toep->txsd_total,
		    ("%s: txsd avail > total", __func__));
		if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) {
			txsd = &toep->txsd[0];
			toep->txsd_cidx = 0;
		}
	}

	if (plen > 0) {
		struct sockbuf *sb = &so->so_snd;

		SOCKBUF_LOCK(sb);
		sbdrop_locked(sb, plen);
		sowwakeup_locked(so);
		SOCKBUF_UNLOCK_ASSERT(sb);
	}

	/* XXX */
	if ((toep->flags & TPF_TX_SUSPENDED &&
	    toep->tx_credits >= MIN_OFLD_TX_CREDITS) ||
	    toep->tx_credits == toep->txsd_total *
	    howmany((sizeof(struct fw_ofld_tx_data_wr) + 1), 16)) {
		toep->flags &= ~TPF_TX_SUSPENDED;
		t4_push_frames(sc, toep);
	}
	INP_WUNLOCK(inp);

	return (0);
}
Пример #7
0
static int
do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
	struct adapter *sc = iq->adapter;
	const struct cpl_fw4_ack *cpl = (const void *)(rss + 1);
	unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl)));
	struct toepcb *toep = lookup_tid(sc, tid);
	struct inpcb *inp;
	struct tcpcb *tp;
	struct socket *so;
	uint8_t credits = cpl->credits;
	struct ofld_tx_sdesc *txsd;
	int plen;
#ifdef INVARIANTS
	unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl)));
#endif

	/*
	 * Very unusual case: we'd sent a flowc + abort_req for a synq entry and
	 * now this comes back carrying the credits for the flowc.
	 */
	if (__predict_false(toep->flags & TPF_SYNQE)) {
		KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
		    ("%s: credits for a synq entry %p", __func__, toep));
		return (0);
	}

	inp = toep->inp;

	KASSERT(opcode == CPL_FW4_ACK,
	    ("%s: unexpected opcode 0x%x", __func__, opcode));
	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));

	INP_WLOCK(inp);

	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) {
		INP_WUNLOCK(inp);
		return (0);
	}

	KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0,
	    ("%s: inp_flags 0x%x", __func__, inp->inp_flags));

	tp = intotcpcb(inp);

	if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) {
		tcp_seq snd_una = be32toh(cpl->snd_una);

#ifdef INVARIANTS
		if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
			log(LOG_ERR,
			    "%s: unexpected seq# %x for TID %u, snd_una %x\n",
			    __func__, snd_una, toep->tid, tp->snd_una);
		}
#endif

		if (tp->snd_una != snd_una) {
			tp->snd_una = snd_una;
			tp->ts_recent_age = tcp_ts_getticks();
		}
	}

	so = inp->inp_socket;
	txsd = &toep->txsd[toep->txsd_cidx];
	plen = 0;
	while (credits) {
		KASSERT(credits >= txsd->tx_credits,
		    ("%s: too many (or partial) credits", __func__));
		credits -= txsd->tx_credits;
		toep->tx_credits += txsd->tx_credits;
		plen += txsd->plen;
		txsd++;
		toep->txsd_avail++;
		KASSERT(toep->txsd_avail <= toep->txsd_total,
		    ("%s: txsd avail > total", __func__));
		if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) {
			txsd = &toep->txsd[0];
			toep->txsd_cidx = 0;
		}
	}

	if (toep->tx_credits == toep->tx_total) {
		toep->tx_nocompl = 0;
		toep->plen_nocompl = 0;
	}

	if (toep->flags & TPF_TX_SUSPENDED &&
	    toep->tx_credits >= toep->tx_total / 4) {
		toep->flags &= ~TPF_TX_SUSPENDED;
		if (toep->ulp_mode == ULP_MODE_ISCSI)
			t4_push_pdus(sc, toep, plen);
		else
			t4_push_frames(sc, toep, plen);
	} else if (plen > 0) {
		struct sockbuf *sb = &so->so_snd;
		int sbu;

		SOCKBUF_LOCK(sb);
		sbu = sbused(sb);
		if (toep->ulp_mode == ULP_MODE_ISCSI) {

			if (__predict_false(sbu > 0)) {
				/*
				 * The data trasmitted before the tid's ULP mode
				 * changed to ISCSI is still in so_snd.
				 * Incoming credits should account for so_snd
				 * first.
				 */
				sbdrop_locked(sb, min(sbu, plen));
				plen -= min(sbu, plen);
			}
			sowwakeup_locked(so);	/* unlocks so_snd */
			rqdrop_locked(&toep->ulp_pdu_reclaimq, plen);
		} else {
			sbdrop_locked(sb, plen);
			sowwakeup_locked(so);	/* unlocks so_snd */
		}
		SOCKBUF_UNLOCK_ASSERT(sb);
	}

	INP_WUNLOCK(inp);

	return (0);
}