示例#1
0
文件: t4_ddp.c 项目: fengsi/freebsd
void
handle_ddp_close(struct toepcb *toep, struct tcpcb *tp, struct sockbuf *sb,
                 __be32 rcv_nxt)
{
    struct mbuf *m;
    int len;

    SOCKBUF_LOCK_ASSERT(sb);
    INP_WLOCK_ASSERT(toep->inp);
    len = be32toh(rcv_nxt) - tp->rcv_nxt;

    /* Signal handle_ddp() to break out of its sleep loop. */
    toep->ddp_flags &= ~(DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE);
    if (len == 0)
        return;

    tp->rcv_nxt += len;
    KASSERT(toep->sb_cc >= sbused(sb),
            ("%s: sb %p has more data (%d) than last time (%d).",
             __func__, sb, sbused(sb), toep->sb_cc));
    toep->rx_credits += toep->sb_cc - sbused(sb);
#ifdef USE_DDP_RX_FLOW_CONTROL
    toep->rx_credits -= len;	/* adjust for F_RX_FC_DDP */
#endif

    m = get_ddp_mbuf(len);

    sbappendstream_locked(sb, m, 0);
    toep->sb_cc = sbused(sb);
}
示例#2
0
文件: t4_ddp.c 项目: fengsi/freebsd
/* XXX: handle_ddp_data code duplication */
void
insert_ddp_data(struct toepcb *toep, uint32_t n)
{
    struct inpcb *inp = toep->inp;
    struct tcpcb *tp = intotcpcb(inp);
    struct sockbuf *sb = &inp->inp_socket->so_rcv;
    struct mbuf *m;

    INP_WLOCK_ASSERT(inp);
    SOCKBUF_LOCK_ASSERT(sb);

    m = get_ddp_mbuf(n);
    tp->rcv_nxt += n;
#ifndef USE_DDP_RX_FLOW_CONTROL
    KASSERT(tp->rcv_wnd >= n, ("%s: negative window size", __func__));
    tp->rcv_wnd -= n;
#endif

    KASSERT(toep->sb_cc >= sbused(sb),
            ("%s: sb %p has more data (%d) than last time (%d).",
             __func__, sb, sbused(sb), toep->sb_cc));
    toep->rx_credits += toep->sb_cc - sbused(sb);
#ifdef USE_DDP_RX_FLOW_CONTROL
    toep->rx_credits -= n;	/* adjust for F_RX_FC_DDP */
#endif
    sbappendstream_locked(sb, m, 0);
    toep->sb_cc = sbused(sb);
}
示例#3
0
void
t4_rcvd_locked(struct toedev *tod, struct tcpcb *tp)
{
	struct adapter *sc = tod->tod_softc;
	struct inpcb *inp = tp->t_inpcb;
	struct socket *so = inp->inp_socket;
	struct sockbuf *sb = &so->so_rcv;
	struct toepcb *toep = tp->t_toe;
	int credits;

	INP_WLOCK_ASSERT(inp);

	SOCKBUF_LOCK_ASSERT(sb);
	KASSERT(toep->sb_cc >= sbused(sb),
	    ("%s: sb %p has more data (%d) than last time (%d).",
	    __func__, sb, sbused(sb), toep->sb_cc));

	toep->rx_credits += toep->sb_cc - sbused(sb);
	toep->sb_cc = sbused(sb);

	if (toep->rx_credits > 0 &&
	    (tp->rcv_wnd <= 32 * 1024 || toep->rx_credits >= 64 * 1024 ||
	    (toep->rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) ||
	    toep->sb_cc + tp->rcv_wnd < sb->sb_lowat)) {

		credits = send_rx_credits(sc, toep, toep->rx_credits);
		toep->rx_credits -= credits;
		tp->rcv_wnd += credits;
		tp->rcv_adv += credits;
	}
}
示例#4
0
文件: sdp_rx.c 项目: Lxg1582/freebsd
static inline int
sdp_post_recvs_needed(struct sdp_sock *ssk)
{
	unsigned long bytes_in_process;
	unsigned long max_bytes;
	int buffer_size;
	int posted;

	if (!ssk->qp_active || !ssk->socket)
		return 0;

	posted = rx_ring_posted(ssk);
	if (posted >= SDP_RX_SIZE)
		return 0;
	if (posted < SDP_MIN_TX_CREDITS)
		return 1;

	buffer_size = ssk->recv_bytes;
	max_bytes = max(ssk->socket->so_snd.sb_hiwat,
	    (1 + SDP_MIN_TX_CREDITS) * buffer_size);
	max_bytes *= rcvbuf_scale;
	/*
	 * Compute bytes in the receive queue and socket buffer.
	 */
	bytes_in_process = (posted - SDP_MIN_TX_CREDITS) * buffer_size;
	bytes_in_process += sbused(&ssk->socket->so_rcv);

	return bytes_in_process < max_bytes;
}
示例#5
0
static bool_t
svc_vc_ack(SVCXPRT *xprt, uint32_t *ack)
{

	*ack = atomic_load_acq_32(&xprt->xp_snt_cnt);
	*ack -= sbused(&xprt->xp_socket->so_snd);
	return (TRUE);
}
示例#6
0
static int
do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
	struct adapter *sc = iq->adapter;
	struct cxgbei_data *ci = sc->iscsi_ulp_softc;
	const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1);
	u_int tid = GET_TID(cpl);
	struct toepcb *toep = lookup_tid(sc, tid);
	struct inpcb *inp = toep->inp;
	struct socket *so;
	struct sockbuf *sb;
	struct tcpcb *tp;
	struct icl_cxgbei_conn *icc;
	struct icl_conn *ic;
	struct icl_cxgbei_pdu *icp = toep->ulpcb2;
	struct icl_pdu *ip;
	u_int pdu_len, val;

	MPASS(m == NULL);

	/* Must already be assembling a PDU. */
	MPASS(icp != NULL);
	MPASS(icp->icp_flags & ICPF_RX_HDR);	/* Data is optional. */
	MPASS((icp->icp_flags & ICPF_RX_STATUS) == 0);

	pdu_len = be16toh(cpl->len);	/* includes everything. */
	val = be32toh(cpl->ddpvld);

#if 0
	CTR5(KTR_CXGBE,
	    "%s: tid %u, cpl->len %u, ddpvld 0x%08x, icp_flags 0x%08x",
	    __func__, tid, pdu_len, val, icp->icp_flags);
#endif

	icp->icp_flags |= ICPF_RX_STATUS;
	ip = &icp->ip;
	if (val & F_DDP_PADDING_ERR)
		icp->icp_flags |= ICPF_PAD_ERR;
	if (val & F_DDP_HDRCRC_ERR)
		icp->icp_flags |= ICPF_HCRC_ERR;
	if (val & F_DDP_DATACRC_ERR)
		icp->icp_flags |= ICPF_DCRC_ERR;
	if (val & F_DDP_PDU && ip->ip_data_mbuf == NULL) {
		MPASS((icp->icp_flags & ICPF_RX_FLBUF) == 0);
		MPASS(ip->ip_data_len > 0);
		icp->icp_flags |= ICPF_RX_DDP;
		counter_u64_add(ci->ddp_pdus, 1);
		counter_u64_add(ci->ddp_bytes, ip->ip_data_len);
	}

	INP_WLOCK(inp);
	if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) {
		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
		    __func__, tid, pdu_len, inp->inp_flags);
		INP_WUNLOCK(inp);
		icl_cxgbei_conn_pdu_free(NULL, ip);
#ifdef INVARIANTS
		toep->ulpcb2 = NULL;
#endif
		return (0);
	}

	tp = intotcpcb(inp);
	MPASS(icp->icp_seq == tp->rcv_nxt);
	MPASS(tp->rcv_wnd >= pdu_len);
	tp->rcv_nxt += pdu_len;
	tp->rcv_wnd -= pdu_len;
	tp->t_rcvtime = ticks;

	/* update rx credits */
	toep->rx_credits += pdu_len;
	t4_rcvd(&toep->td->tod, tp);	/* XXX: sc->tom_softc.tod */

	so = inp->inp_socket;
	sb = &so->so_rcv;
	SOCKBUF_LOCK(sb);

	icc = toep->ulpcb;
	if (__predict_false(icc == NULL || sb->sb_state & SBS_CANTRCVMORE)) {
		CTR5(KTR_CXGBE,
		    "%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x",
		    __func__, tid, pdu_len, icc, sb->sb_state);
		SOCKBUF_UNLOCK(sb);
		INP_WUNLOCK(inp);

		INP_INFO_RLOCK(&V_tcbinfo);
		INP_WLOCK(inp);
		tp = tcp_drop(tp, ECONNRESET);
		if (tp)
			INP_WUNLOCK(inp);
		INP_INFO_RUNLOCK(&V_tcbinfo);

		icl_cxgbei_conn_pdu_free(NULL, ip);
#ifdef INVARIANTS
		toep->ulpcb2 = NULL;
#endif
		return (0);
	}
	MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
	ic = &icc->ic;
	icl_cxgbei_new_pdu_set_conn(ip, ic);

	MPASS(m == NULL); /* was unused, we'll use it now. */
	m = sbcut_locked(sb, sbused(sb)); /* XXXNP: toep->sb_cc accounting? */
	if (__predict_false(m != NULL)) {
		int len = m_length(m, NULL);

		/*
		 * PDUs were received before the tid transitioned to ULP mode.
		 * Convert them to icl_cxgbei_pdus and send them to ICL before
		 * the PDU in icp/ip.
		 */
		CTR3(KTR_CXGBE, "%s: tid %u, %u bytes in so_rcv", __func__, tid,
		    len);

		/* XXXNP: needs to be rewritten. */
		if (len == sizeof(struct iscsi_bhs) || len == 4 + sizeof(struct
		    iscsi_bhs)) {
			struct icl_cxgbei_pdu *icp0;
			struct icl_pdu *ip0;

			ip0 = icl_cxgbei_new_pdu(M_NOWAIT);
			icl_cxgbei_new_pdu_set_conn(ip0, ic);
			if (ip0 == NULL)
				CXGBE_UNIMPLEMENTED("PDU allocation failure");
			icp0 = ip_to_icp(ip0);
			icp0->icp_seq = 0; /* XXX */
			icp0->icp_flags = ICPF_RX_HDR | ICPF_RX_STATUS;
			m_copydata(m, 0, sizeof(struct iscsi_bhs), (void *)ip0->ip_bhs);
			STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip0, ip_next);
		}
		m_freem(m);
	}

	STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next);
	if ((icc->rx_flags & RXF_ACTIVE) == 0) {
		struct cxgbei_worker_thread_softc *cwt = &cwt_softc[icc->cwt];

		mtx_lock(&cwt->cwt_lock);
		icc->rx_flags |= RXF_ACTIVE;
		TAILQ_INSERT_TAIL(&cwt->rx_head, icc, rx_link);
		if (cwt->cwt_state == CWT_SLEEPING) {
			cwt->cwt_state = CWT_RUNNING;
			cv_signal(&cwt->cwt_cv);
		}
		mtx_unlock(&cwt->cwt_lock);
	}
	SOCKBUF_UNLOCK(sb);
	INP_WUNLOCK(inp);

#ifdef INVARIANTS
	toep->ulpcb2 = NULL;
#endif

	return (0);
}
示例#7
0
static int
soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
    struct thread *td)
{
	struct socket *so = fp->f_data;
	int error = 0;

	switch (cmd) {
	case FIONBIO:
		SOCK_LOCK(so);
		if (*(int *)data)
			so->so_state |= SS_NBIO;
		else
			so->so_state &= ~SS_NBIO;
		SOCK_UNLOCK(so);
		break;

	case FIOASYNC:
		/*
		 * XXXRW: This code separately acquires SOCK_LOCK(so) and
		 * SOCKBUF_LOCK(&so->so_rcv) even though they are the same
		 * mutex to avoid introducing the assumption that they are
		 * the same.
		 */
		if (*(int *)data) {
			SOCK_LOCK(so);
			so->so_state |= SS_ASYNC;
			SOCK_UNLOCK(so);
			SOCKBUF_LOCK(&so->so_rcv);
			so->so_rcv.sb_flags |= SB_ASYNC;
			SOCKBUF_UNLOCK(&so->so_rcv);
			SOCKBUF_LOCK(&so->so_snd);
			so->so_snd.sb_flags |= SB_ASYNC;
			SOCKBUF_UNLOCK(&so->so_snd);
		} else {
			SOCK_LOCK(so);
			so->so_state &= ~SS_ASYNC;
			SOCK_UNLOCK(so);
			SOCKBUF_LOCK(&so->so_rcv);
			so->so_rcv.sb_flags &= ~SB_ASYNC;
			SOCKBUF_UNLOCK(&so->so_rcv);
			SOCKBUF_LOCK(&so->so_snd);
			so->so_snd.sb_flags &= ~SB_ASYNC;
			SOCKBUF_UNLOCK(&so->so_snd);
		}
		break;

	case FIONREAD:
		/* Unlocked read. */
		*(int *)data = sbavail(&so->so_rcv);
		break;

	case FIONWRITE:
		/* Unlocked read. */
		*(int *)data = sbavail(&so->so_snd);
		break;

	case FIONSPACE:
		/* Unlocked read. */
		if ((so->so_snd.sb_hiwat < sbused(&so->so_snd)) ||
		    (so->so_snd.sb_mbmax < so->so_snd.sb_mbcnt))
			*(int *)data = 0;
		else
			*(int *)data = sbspace(&so->so_snd);
		break;

	case FIOSETOWN:
		error = fsetown(*(int *)data, &so->so_sigio);
		break;

	case FIOGETOWN:
		*(int *)data = fgetown(&so->so_sigio);
		break;

	case SIOCSPGRP:
		error = fsetown(-(*(int *)data), &so->so_sigio);
		break;

	case SIOCGPGRP:
		*(int *)data = -fgetown(&so->so_sigio);
		break;

	case SIOCATMARK:
		/* Unlocked read. */
		*(int *)data = (so->so_rcv.sb_state & SBS_RCVATMARK) != 0;
		break;
	default:
		/*
		 * Interface/routing/protocol specific ioctls: interface and
		 * routing ioctls should have a different entry since a
		 * socket is unnecessary.
		 */
		if (IOCGROUP(cmd) == 'i')
			error = ifioctl(so, cmd, data, td);
		else if (IOCGROUP(cmd) == 'r') {
			CURVNET_SET(so->so_vnet);
			error = rtioctl_fib(cmd, data, so->so_fibnum);
			CURVNET_RESTORE();
		} else {
			CURVNET_SET(so->so_vnet);
			error = ((*so->so_proto->pr_usrreqs->pru_control)
			    (so, cmd, data, 0, td));
			CURVNET_RESTORE();
		}
		break;
	}
	return (error);
}
示例#8
0
文件: t4_ddp.c 项目: fengsi/freebsd
static int
handle_ddp_data(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, int len)
{
    uint32_t report = be32toh(ddp_report);
    unsigned int db_flag;
    struct inpcb *inp = toep->inp;
    struct tcpcb *tp;
    struct socket *so;
    struct sockbuf *sb;
    struct mbuf *m;

    db_flag = report & F_DDP_BUF_IDX ? DDP_BUF1_ACTIVE : DDP_BUF0_ACTIVE;

    if (__predict_false(!(report & F_DDP_INV)))
        CXGBE_UNIMPLEMENTED("DDP buffer still valid");

    INP_WLOCK(inp);
    so = inp_inpcbtosocket(inp);
    sb = &so->so_rcv;
    if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) {

        /*
         * XXX: think a bit more.
         * tcpcb probably gone, but socket should still be around
         * because we always wait for DDP completion in soreceive no
         * matter what.  Just wake it up and let it clean up.
         */

        CTR5(KTR_CXGBE, "%s: tid %u, seq 0x%x, len %d, inp_flags 0x%x",
             __func__, toep->tid, be32toh(rcv_nxt), len, inp->inp_flags);
        SOCKBUF_LOCK(sb);
        goto wakeup;
    }

    tp = intotcpcb(inp);
    len += be32toh(rcv_nxt) - tp->rcv_nxt;
    tp->rcv_nxt += len;
    tp->t_rcvtime = ticks;
#ifndef USE_DDP_RX_FLOW_CONTROL
    KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
    tp->rcv_wnd -= len;
#endif
    m = get_ddp_mbuf(len);

    SOCKBUF_LOCK(sb);
    if (report & F_DDP_BUF_COMPLETE)
        toep->ddp_score = DDP_HIGH_SCORE;
    else
        discourage_ddp(toep);

    /* receive buffer autosize */
    if (sb->sb_flags & SB_AUTOSIZE &&
            V_tcp_do_autorcvbuf &&
            sb->sb_hiwat < V_tcp_autorcvbuf_max &&
            len > (sbspace(sb) / 8 * 7)) {
        unsigned int hiwat = sb->sb_hiwat;
        unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
                                   V_tcp_autorcvbuf_max);

        if (!sbreserve_locked(sb, newsize, so, NULL))
            sb->sb_flags &= ~SB_AUTOSIZE;
        else
            toep->rx_credits += newsize - hiwat;
    }

    KASSERT(toep->sb_cc >= sbused(sb),
            ("%s: sb %p has more data (%d) than last time (%d).",
             __func__, sb, sbused(sb), toep->sb_cc));
    toep->rx_credits += toep->sb_cc - sbused(sb);
#ifdef USE_DDP_RX_FLOW_CONTROL
    toep->rx_credits -= len;	/* adjust for F_RX_FC_DDP */
#endif
    sbappendstream_locked(sb, m, 0);
    toep->sb_cc = sbused(sb);
wakeup:
    KASSERT(toep->ddp_flags & db_flag,
            ("%s: DDP buffer not active. toep %p, ddp_flags 0x%x, report 0x%x",
             __func__, toep, toep->ddp_flags, report));
    toep->ddp_flags &= ~db_flag;
    sorwakeup_locked(so);
    SOCKBUF_UNLOCK_ASSERT(sb);

    INP_WUNLOCK(inp);
    return (0);
}
示例#9
0
/*
 * Send data and/or a FIN to the peer.
 *
 * The socket's so_snd buffer consists of a stream of data starting with sb_mb
 * and linked together with m_next.  sb_sndptr, if set, is the last mbuf that
 * was transmitted.
 *
 * drop indicates the number of bytes that should be dropped from the head of
 * the send buffer.  It is an optimization that lets do_fw4_ack avoid creating
 * contention on the send buffer lock (before this change it used to do
 * sowwakeup and then t4_push_frames right after that when recovering from tx
 * stalls).  When drop is set this function MUST drop the bytes and wake up any
 * writers.
 */
void
t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
{
	struct mbuf *sndptr, *m, *sb_sndptr;
	struct fw_ofld_tx_data_wr *txwr;
	struct wrqe *wr;
	u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf;
	struct inpcb *inp = toep->inp;
	struct tcpcb *tp = intotcpcb(inp);
	struct socket *so = inp->inp_socket;
	struct sockbuf *sb = &so->so_snd;
	int tx_credits, shove, compl, sowwakeup;
	struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];

	INP_WLOCK_ASSERT(inp);
	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
	    ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));

	KASSERT(toep->ulp_mode == ULP_MODE_NONE ||
	    toep->ulp_mode == ULP_MODE_TCPDDP ||
	    toep->ulp_mode == ULP_MODE_RDMA,
	    ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep));

	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN))
		return;

	/*
	 * This function doesn't resume by itself.  Someone else must clear the
	 * flag and call this function.
	 */
	if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) {
		KASSERT(drop == 0,
		    ("%s: drop (%d) != 0 but tx is suspended", __func__, drop));
		return;
	}

	do {
		tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
		max_imm = max_imm_payload(tx_credits);
		max_nsegs = max_dsgl_nsegs(tx_credits);

		SOCKBUF_LOCK(sb);
		sowwakeup = drop;
		if (drop) {
			sbdrop_locked(sb, drop);
			drop = 0;
		}
		sb_sndptr = sb->sb_sndptr;
		sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb;
		plen = 0;
		nsegs = 0;
		max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */
		for (m = sndptr; m != NULL; m = m->m_next) {
			int n = sglist_count(mtod(m, void *), m->m_len);

			nsegs += n;
			plen += m->m_len;

			/* This mbuf sent us _over_ the nsegs limit, back out */
			if (plen > max_imm && nsegs > max_nsegs) {
				nsegs -= n;
				plen -= m->m_len;
				if (plen == 0) {
					/* Too few credits */
					toep->flags |= TPF_TX_SUSPENDED;
					if (sowwakeup)
						sowwakeup_locked(so);
					else
						SOCKBUF_UNLOCK(sb);
					SOCKBUF_UNLOCK_ASSERT(sb);
					return;
				}
				break;
			}

			if (max_nsegs_1mbuf < n)
				max_nsegs_1mbuf = n;
			sb_sndptr = m;	/* new sb->sb_sndptr if all goes well */

			/* This mbuf put us right at the max_nsegs limit */
			if (plen > max_imm && nsegs == max_nsegs) {
				m = m->m_next;
				break;
			}
		}

		if (sbused(sb) > sb->sb_hiwat * 5 / 8 &&
		    toep->plen_nocompl + plen >= sb->sb_hiwat / 4)
			compl = 1;
		else
			compl = 0;

		if (sb->sb_flags & SB_AUTOSIZE &&
		    V_tcp_do_autosndbuf &&
		    sb->sb_hiwat < V_tcp_autosndbuf_max &&
		    sbused(sb) >= sb->sb_hiwat * 7 / 8) {
			int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc,
			    V_tcp_autosndbuf_max);

			if (!sbreserve_locked(sb, newsize, so, NULL))
				sb->sb_flags &= ~SB_AUTOSIZE;
			else
				sowwakeup = 1;	/* room available */
		}
		if (sowwakeup)
			sowwakeup_locked(so);
		else
			SOCKBUF_UNLOCK(sb);
		SOCKBUF_UNLOCK_ASSERT(sb);

		/* nothing to send */
		if (plen == 0) {
			KASSERT(m == NULL,
			    ("%s: nothing to send, but m != NULL", __func__));
			break;
		}

		if (__predict_false(toep->flags & TPF_FIN_SENT))
			panic("%s: excess tx.", __func__);

		shove = m == NULL && !(tp->t_flags & TF_MORETOCOME);
		if (plen <= max_imm) {

			/* Immediate data tx */

			wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16),
					toep->ofld_txq);
			if (wr == NULL) {
				/* XXX: how will we recover from this? */
				toep->flags |= TPF_TX_SUSPENDED;
				return;
			}
			txwr = wrtod(wr);
			credits = howmany(wr->wr_len, 16);
			write_tx_wr(txwr, toep, plen, plen, credits, shove, 0,
			    sc->tt.tx_align);
			m_copydata(sndptr, 0, plen, (void *)(txwr + 1));
			nsegs = 0;
		} else {
			int wr_len;

			/* DSGL tx */

			wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) +
			    ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
			wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq);
			if (wr == NULL) {
				/* XXX: how will we recover from this? */
				toep->flags |= TPF_TX_SUSPENDED;
				return;
			}
			txwr = wrtod(wr);
			credits = howmany(wr_len, 16);
			write_tx_wr(txwr, toep, 0, plen, credits, shove, 0,
			    sc->tt.tx_align);
			write_tx_sgl(txwr + 1, sndptr, m, nsegs,
			    max_nsegs_1mbuf);
			if (wr_len & 0xf) {
				uint64_t *pad = (uint64_t *)
				    ((uintptr_t)txwr + wr_len);
				*pad = 0;
			}
		}

		KASSERT(toep->tx_credits >= credits,
			("%s: not enough credits", __func__));

		toep->tx_credits -= credits;
		toep->tx_nocompl += credits;
		toep->plen_nocompl += plen;
		if (toep->tx_credits <= toep->tx_total * 3 / 8 &&
		    toep->tx_nocompl >= toep->tx_total / 4)
			compl = 1;

		if (compl || toep->ulp_mode == ULP_MODE_RDMA) {
			txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL);
			toep->tx_nocompl = 0;
			toep->plen_nocompl = 0;
		}

		tp->snd_nxt += plen;
		tp->snd_max += plen;

		SOCKBUF_LOCK(sb);
		KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__));
		sb->sb_sndptr = sb_sndptr;
		SOCKBUF_UNLOCK(sb);

		toep->flags |= TPF_TX_DATA_SENT;
		if (toep->tx_credits < MIN_OFLD_TX_CREDITS)
			toep->flags |= TPF_TX_SUSPENDED;

		KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
		txsd->plen = plen;
		txsd->tx_credits = credits;
		txsd++;
		if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
			toep->txsd_pidx = 0;
			txsd = &toep->txsd[0];
		}
		toep->txsd_avail--;

		t4_l2t_send(sc, wr, toep->l2te);
	} while (m != NULL);

	/* Send a FIN if requested, but only if there's no more data to send */
	if (m == NULL && toep->flags & TPF_SEND_FIN)
		close_conn(sc, toep);
}
示例#10
0
static int
do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
	struct adapter *sc = iq->adapter;
	const struct cpl_fw4_ack *cpl = (const void *)(rss + 1);
	unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl)));
	struct toepcb *toep = lookup_tid(sc, tid);
	struct inpcb *inp;
	struct tcpcb *tp;
	struct socket *so;
	uint8_t credits = cpl->credits;
	struct ofld_tx_sdesc *txsd;
	int plen;
#ifdef INVARIANTS
	unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl)));
#endif

	/*
	 * Very unusual case: we'd sent a flowc + abort_req for a synq entry and
	 * now this comes back carrying the credits for the flowc.
	 */
	if (__predict_false(toep->flags & TPF_SYNQE)) {
		KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
		    ("%s: credits for a synq entry %p", __func__, toep));
		return (0);
	}

	inp = toep->inp;

	KASSERT(opcode == CPL_FW4_ACK,
	    ("%s: unexpected opcode 0x%x", __func__, opcode));
	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));

	INP_WLOCK(inp);

	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) {
		INP_WUNLOCK(inp);
		return (0);
	}

	KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0,
	    ("%s: inp_flags 0x%x", __func__, inp->inp_flags));

	tp = intotcpcb(inp);

	if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) {
		tcp_seq snd_una = be32toh(cpl->snd_una);

#ifdef INVARIANTS
		if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
			log(LOG_ERR,
			    "%s: unexpected seq# %x for TID %u, snd_una %x\n",
			    __func__, snd_una, toep->tid, tp->snd_una);
		}
#endif

		if (tp->snd_una != snd_una) {
			tp->snd_una = snd_una;
			tp->ts_recent_age = tcp_ts_getticks();
		}
	}

	so = inp->inp_socket;
	txsd = &toep->txsd[toep->txsd_cidx];
	plen = 0;
	while (credits) {
		KASSERT(credits >= txsd->tx_credits,
		    ("%s: too many (or partial) credits", __func__));
		credits -= txsd->tx_credits;
		toep->tx_credits += txsd->tx_credits;
		plen += txsd->plen;
		txsd++;
		toep->txsd_avail++;
		KASSERT(toep->txsd_avail <= toep->txsd_total,
		    ("%s: txsd avail > total", __func__));
		if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) {
			txsd = &toep->txsd[0];
			toep->txsd_cidx = 0;
		}
	}

	if (toep->tx_credits == toep->tx_total) {
		toep->tx_nocompl = 0;
		toep->plen_nocompl = 0;
	}

	if (toep->flags & TPF_TX_SUSPENDED &&
	    toep->tx_credits >= toep->tx_total / 4) {
		toep->flags &= ~TPF_TX_SUSPENDED;
		if (toep->ulp_mode == ULP_MODE_ISCSI)
			t4_push_pdus(sc, toep, plen);
		else
			t4_push_frames(sc, toep, plen);
	} else if (plen > 0) {
		struct sockbuf *sb = &so->so_snd;
		int sbu;

		SOCKBUF_LOCK(sb);
		sbu = sbused(sb);
		if (toep->ulp_mode == ULP_MODE_ISCSI) {

			if (__predict_false(sbu > 0)) {
				/*
				 * The data trasmitted before the tid's ULP mode
				 * changed to ISCSI is still in so_snd.
				 * Incoming credits should account for so_snd
				 * first.
				 */
				sbdrop_locked(sb, min(sbu, plen));
				plen -= min(sbu, plen);
			}
			sowwakeup_locked(so);	/* unlocks so_snd */
			rqdrop_locked(&toep->ulp_pdu_reclaimq, plen);
		} else {
			sbdrop_locked(sb, plen);
			sowwakeup_locked(so);	/* unlocks so_snd */
		}
		SOCKBUF_UNLOCK_ASSERT(sb);
	}

	INP_WUNLOCK(inp);

	return (0);
}
示例#11
0
static int
do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
	struct adapter *sc = iq->adapter;
	const struct cpl_rx_data *cpl = mtod(m, const void *);
	unsigned int tid = GET_TID(cpl);
	struct toepcb *toep = lookup_tid(sc, tid);
	struct inpcb *inp = toep->inp;
	struct tcpcb *tp;
	struct socket *so;
	struct sockbuf *sb;
	int len;
	uint32_t ddp_placed = 0;

	if (__predict_false(toep->flags & TPF_SYNQE)) {
#ifdef INVARIANTS
		struct synq_entry *synqe = (void *)toep;

		INP_WLOCK(synqe->lctx->inp);
		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
			    ("%s: listen socket closed but tid %u not aborted.",
			    __func__, tid));
		} else {
			/*
			 * do_pass_accept_req is still running and will
			 * eventually take care of this tid.
			 */
		}
		INP_WUNLOCK(synqe->lctx->inp);
#endif
		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
		    toep, toep->flags);
		m_freem(m);
		return (0);
	}

	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));

	/* strip off CPL header */
	m_adj(m, sizeof(*cpl));
	len = m->m_pkthdr.len;

	INP_WLOCK(inp);
	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
		    __func__, tid, len, inp->inp_flags);
		INP_WUNLOCK(inp);
		m_freem(m);
		return (0);
	}

	tp = intotcpcb(inp);

	if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq)))
		ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt;

	tp->rcv_nxt += len;
	if (tp->rcv_wnd < len) {
		KASSERT(toep->ulp_mode == ULP_MODE_RDMA,
				("%s: negative window size", __func__));
	}

	tp->rcv_wnd -= len;
	tp->t_rcvtime = ticks;

	if (toep->ulp_mode == ULP_MODE_TCPDDP)
		DDP_LOCK(toep);
	so = inp_inpcbtosocket(inp);
	sb = &so->so_rcv;
	SOCKBUF_LOCK(sb);

	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
		CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
		    __func__, tid, len);
		m_freem(m);
		SOCKBUF_UNLOCK(sb);
		if (toep->ulp_mode == ULP_MODE_TCPDDP)
			DDP_UNLOCK(toep);
		INP_WUNLOCK(inp);

		INP_INFO_RLOCK(&V_tcbinfo);
		INP_WLOCK(inp);
		tp = tcp_drop(tp, ECONNRESET);
		if (tp)
			INP_WUNLOCK(inp);
		INP_INFO_RUNLOCK(&V_tcbinfo);

		return (0);
	}

	/* receive buffer autosize */
	CURVNET_SET(so->so_vnet);
	if (sb->sb_flags & SB_AUTOSIZE &&
	    V_tcp_do_autorcvbuf &&
	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
	    len > (sbspace(sb) / 8 * 7)) {
		unsigned int hiwat = sb->sb_hiwat;
		unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
		    V_tcp_autorcvbuf_max);

		if (!sbreserve_locked(sb, newsize, so, NULL))
			sb->sb_flags &= ~SB_AUTOSIZE;
		else
			toep->rx_credits += newsize - hiwat;
	}

	if (toep->ddp_waiting_count != 0 || toep->ddp_active_count != 0)
		CTR3(KTR_CXGBE, "%s: tid %u, non-ddp rx (%d bytes)", __func__,
		    tid, len);

	if (toep->ulp_mode == ULP_MODE_TCPDDP) {
		int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off;

		if (changed) {
			if (toep->ddp_flags & DDP_SC_REQ)
				toep->ddp_flags ^= DDP_ON | DDP_SC_REQ;
			else {
				KASSERT(cpl->ddp_off == 1,
				    ("%s: DDP switched on by itself.",
				    __func__));

				/* Fell out of DDP mode */
				toep->ddp_flags &= ~DDP_ON;
				CTR1(KTR_CXGBE, "%s: fell out of DDP mode",
				    __func__);

				insert_ddp_data(toep, ddp_placed);
			}
		}

		if (toep->ddp_flags & DDP_ON) {
			/*
			 * CPL_RX_DATA with DDP on can only be an indicate.
			 * Start posting queued AIO requests via DDP.  The
			 * payload that arrived in this indicate is appended
			 * to the socket buffer as usual.
			 */
			handle_ddp_indicate(toep);
		}
	}

	KASSERT(toep->sb_cc >= sbused(sb),
	    ("%s: sb %p has more data (%d) than last time (%d).",
	    __func__, sb, sbused(sb), toep->sb_cc));
	toep->rx_credits += toep->sb_cc - sbused(sb);
	sbappendstream_locked(sb, m, 0);
	toep->sb_cc = sbused(sb);
	if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) {
		int credits;

		credits = send_rx_credits(sc, toep, toep->rx_credits);
		toep->rx_credits -= credits;
		tp->rcv_wnd += credits;
		tp->rcv_adv += credits;
	}

	if (toep->ddp_waiting_count > 0 && sbavail(sb) != 0) {
		CTR2(KTR_CXGBE, "%s: tid %u queueing AIO task", __func__,
		    tid);
		ddp_queue_toep(toep);
	}
	sorwakeup_locked(so);
	SOCKBUF_UNLOCK_ASSERT(sb);
	if (toep->ulp_mode == ULP_MODE_TCPDDP)
		DDP_UNLOCK(toep);

	INP_WUNLOCK(inp);
	CURVNET_RESTORE();
	return (0);
}
示例#12
0
static int
handle_ddp(struct socket *so, struct uio *uio, int flags, int error)
{
	struct sockbuf *sb = &so->so_rcv;
	struct tcpcb *tp = so_sototcpcb(so);
	struct toepcb *toep = tp->t_toe;
	struct adapter *sc = td_adapter(toep->td);
	vm_page_t *pages;
	int npages, db_idx, rc, buf_flag;
	struct ddp_buffer *db;
	struct wrqe *wr;
	uint64_t ddp_flags;

	SOCKBUF_LOCK_ASSERT(sb);

#if 0
	if (sbused(sb) + sc->tt.ddp_thres > uio->uio_resid) {
		CTR4(KTR_CXGBE, "%s: sb_cc %d, threshold %d, resid %d",
		    __func__, sbused(sb), sc->tt.ddp_thres, uio->uio_resid);
	}
#endif

	/* XXX: too eager to disable DDP, could handle NBIO better than this. */
	if (sbused(sb) >= uio->uio_resid || uio->uio_resid < sc->tt.ddp_thres ||
	    uio->uio_resid > MAX_DDP_BUFFER_SIZE || uio->uio_iovcnt > 1 ||
	    so->so_state & SS_NBIO || flags & (MSG_DONTWAIT | MSG_NBIO) ||
	    error || so->so_error || sb->sb_state & SBS_CANTRCVMORE)
		goto no_ddp;

	/*
	 * Fault in and then hold the pages of the uio buffers.  We'll wire them
	 * a bit later if everything else works out.
	 */
	SOCKBUF_UNLOCK(sb);
	if (hold_uio(uio, &pages, &npages) != 0) {
		SOCKBUF_LOCK(sb);
		goto no_ddp;
	}
	SOCKBUF_LOCK(sb);
	if (__predict_false(so->so_error || sb->sb_state & SBS_CANTRCVMORE)) {
		vm_page_unhold_pages(pages, npages);
		free(pages, M_CXGBE);
		goto no_ddp;
	}

	/*
	 * Figure out which one of the two DDP buffers to use this time.
	 */
	db_idx = select_ddp_buffer(sc, toep, pages, npages,
	    (uintptr_t)uio->uio_iov->iov_base & PAGE_MASK, uio->uio_resid);
	pages = NULL;	/* handed off to select_ddp_buffer */
	if (db_idx < 0)
		goto no_ddp;
	db = toep->db[db_idx];
	buf_flag = db_idx == 0 ? DDP_BUF0_ACTIVE : DDP_BUF1_ACTIVE;

	/*
	 * Build the compound work request that tells the chip where to DMA the
	 * payload.
	 */
	ddp_flags = select_ddp_flags(so, flags, db_idx);
	wr = mk_update_tcb_for_ddp(sc, toep, db_idx, sbused(sb), ddp_flags);
	if (wr == NULL) {
		/*
		 * Just unhold the pages.  The DDP buffer's software state is
		 * left as-is in the toep.  The page pods were written
		 * successfully and we may have an opportunity to use it in the
		 * future.
		 */
		vm_page_unhold_pages(db->pages, db->npages);
		goto no_ddp;
	}

	/* Wire (and then unhold) the pages, and give the chip the go-ahead. */
	wire_ddp_buffer(db);
	t4_wrq_tx(sc, wr);
	sb->sb_flags &= ~SB_DDP_INDICATE;
	toep->ddp_flags |= buf_flag;

	/*
	 * Wait for the DDP operation to complete and then unwire the pages.
	 * The return code from the sbwait will be the final return code of this
	 * function.  But we do need to wait for DDP no matter what.
	 */
	rc = sbwait(sb);
	while (toep->ddp_flags & buf_flag) {
		/* XXXGL: shouldn't here be sbwait() call? */
		sb->sb_flags |= SB_WAIT;
		msleep(&sb->sb_acc, &sb->sb_mtx, PSOCK , "sbwait", 0);
	}
	unwire_ddp_buffer(db);
	return (rc);
no_ddp:
	disable_ddp(sc, toep);
	discourage_ddp(toep);
	sb->sb_flags &= ~SB_DDP_INDICATE;
	return (0);
}
示例#13
0
static int
do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
	struct adapter *sc = iq->adapter;
	const struct cpl_rx_data *cpl = mtod(m, const void *);
	unsigned int tid = GET_TID(cpl);
	struct toepcb *toep = lookup_tid(sc, tid);
	struct inpcb *inp = toep->inp;
	struct tcpcb *tp;
	struct socket *so;
	struct sockbuf *sb;
	int len;
	uint32_t ddp_placed = 0;

	if (__predict_false(toep->flags & TPF_SYNQE)) {
#ifdef INVARIANTS
		struct synq_entry *synqe = (void *)toep;

		INP_WLOCK(synqe->lctx->inp);
		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
			    ("%s: listen socket closed but tid %u not aborted.",
			    __func__, tid));
		} else {
			/*
			 * do_pass_accept_req is still running and will
			 * eventually take care of this tid.
			 */
		}
		INP_WUNLOCK(synqe->lctx->inp);
#endif
		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
		    toep, toep->flags);
		m_freem(m);
		return (0);
	}

	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));

	/* strip off CPL header */
	m_adj(m, sizeof(*cpl));
	len = m->m_pkthdr.len;

	INP_WLOCK(inp);
	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
		    __func__, tid, len, inp->inp_flags);
		INP_WUNLOCK(inp);
		m_freem(m);
		return (0);
	}

	tp = intotcpcb(inp);

	if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq)))
		ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt;

	tp->rcv_nxt += len;
	KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
	tp->rcv_wnd -= len;
	tp->t_rcvtime = ticks;

	so = inp_inpcbtosocket(inp);
	sb = &so->so_rcv;
	SOCKBUF_LOCK(sb);

	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
		CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
		    __func__, tid, len);
		m_freem(m);
		SOCKBUF_UNLOCK(sb);
		INP_WUNLOCK(inp);

		INP_INFO_RLOCK(&V_tcbinfo);
		INP_WLOCK(inp);
		tp = tcp_drop(tp, ECONNRESET);
		if (tp)
			INP_WUNLOCK(inp);
		INP_INFO_RUNLOCK(&V_tcbinfo);

		return (0);
	}

	/* receive buffer autosize */
	if (sb->sb_flags & SB_AUTOSIZE &&
	    V_tcp_do_autorcvbuf &&
	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
	    len > (sbspace(sb) / 8 * 7)) {
		unsigned int hiwat = sb->sb_hiwat;
		unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
		    V_tcp_autorcvbuf_max);

		if (!sbreserve_locked(sb, newsize, so, NULL))
			sb->sb_flags &= ~SB_AUTOSIZE;
		else
			toep->rx_credits += newsize - hiwat;
	}

	if (toep->ulp_mode == ULP_MODE_TCPDDP) {
		int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off;

		if (changed) {
			if (toep->ddp_flags & DDP_SC_REQ)
				toep->ddp_flags ^= DDP_ON | DDP_SC_REQ;
			else {
				KASSERT(cpl->ddp_off == 1,
				    ("%s: DDP switched on by itself.",
				    __func__));

				/* Fell out of DDP mode */
				toep->ddp_flags &= ~(DDP_ON | DDP_BUF0_ACTIVE |
				    DDP_BUF1_ACTIVE);

				if (ddp_placed)
					insert_ddp_data(toep, ddp_placed);
			}
		}

		if ((toep->ddp_flags & DDP_OK) == 0 &&
		    time_uptime >= toep->ddp_disabled + DDP_RETRY_WAIT) {
			toep->ddp_score = DDP_LOW_SCORE;
			toep->ddp_flags |= DDP_OK;
			CTR3(KTR_CXGBE, "%s: tid %u DDP_OK @ %u",
			    __func__, tid, time_uptime);
		}

		if (toep->ddp_flags & DDP_ON) {

			/*
			 * CPL_RX_DATA with DDP on can only be an indicate.  Ask
			 * soreceive to post a buffer or disable DDP.  The
			 * payload that arrived in this indicate is appended to
			 * the socket buffer as usual.
			 */

#if 0
			CTR5(KTR_CXGBE,
			    "%s: tid %u (0x%x) DDP indicate (seq 0x%x, len %d)",
			    __func__, tid, toep->flags, be32toh(cpl->seq), len);
#endif
			sb->sb_flags |= SB_DDP_INDICATE;
		} else if ((toep->ddp_flags & (DDP_OK|DDP_SC_REQ)) == DDP_OK &&
		    tp->rcv_wnd > DDP_RSVD_WIN && len >= sc->tt.ddp_thres) {

			/*
			 * DDP allowed but isn't on (and a request to switch it
			 * on isn't pending either), and conditions are ripe for
			 * it to work.  Switch it on.
			 */

			enable_ddp(sc, toep);
		}
	}

	KASSERT(toep->sb_cc >= sbused(sb),
	    ("%s: sb %p has more data (%d) than last time (%d).",
	    __func__, sb, sbused(sb), toep->sb_cc));
	toep->rx_credits += toep->sb_cc - sbused(sb);
	sbappendstream_locked(sb, m, 0);
	toep->sb_cc = sbused(sb);
	if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) {
		int credits;

		credits = send_rx_credits(sc, toep, toep->rx_credits);
		toep->rx_credits -= credits;
		tp->rcv_wnd += credits;
		tp->rcv_adv += credits;
	}
	sorwakeup_locked(so);
	SOCKBUF_UNLOCK_ASSERT(sb);

	INP_WUNLOCK(inp);
	return (0);
}
示例#14
0
static int
soo_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
{
	struct sockaddr *sa;
	struct inpcb *inpcb;
	struct unpcb *unpcb;
	struct socket *so;
	int error;

	kif->kf_type = KF_TYPE_SOCKET;
	so = fp->f_data;
	kif->kf_un.kf_sock.kf_sock_domain0 =
	    so->so_proto->pr_domain->dom_family;
	kif->kf_un.kf_sock.kf_sock_type0 = so->so_type;
	kif->kf_un.kf_sock.kf_sock_protocol0 = so->so_proto->pr_protocol;
	kif->kf_un.kf_sock.kf_sock_pcb = (uintptr_t)so->so_pcb;
	switch (kif->kf_un.kf_sock.kf_sock_domain0) {
	case AF_INET:
	case AF_INET6:
		if (kif->kf_un.kf_sock.kf_sock_protocol0 == IPPROTO_TCP) {
			if (so->so_pcb != NULL) {
				inpcb = (struct inpcb *)(so->so_pcb);
				kif->kf_un.kf_sock.kf_sock_inpcb =
				    (uintptr_t)inpcb->inp_ppcb;
				kif->kf_un.kf_sock.kf_sock_sendq =
				    sbused(&so->so_snd);
				kif->kf_un.kf_sock.kf_sock_recvq =
				    sbused(&so->so_rcv);
			}
		}
		break;
	case AF_UNIX:
		if (so->so_pcb != NULL) {
			unpcb = (struct unpcb *)(so->so_pcb);
			if (unpcb->unp_conn) {
				kif->kf_un.kf_sock.kf_sock_unpconn =
				    (uintptr_t)unpcb->unp_conn;
				kif->kf_un.kf_sock.kf_sock_rcv_sb_state =
				    so->so_rcv.sb_state;
				kif->kf_un.kf_sock.kf_sock_snd_sb_state =
				    so->so_snd.sb_state;
				kif->kf_un.kf_sock.kf_sock_sendq =
				    sbused(&so->so_snd);
				kif->kf_un.kf_sock.kf_sock_recvq =
				    sbused(&so->so_rcv);
			}
		}
		break;
	}
	error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
	if (error == 0 &&
	    sa->sa_len <= sizeof(kif->kf_un.kf_sock.kf_sa_local)) {
		bcopy(sa, &kif->kf_un.kf_sock.kf_sa_local, sa->sa_len);
		free(sa, M_SONAME);
	}
	error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa);
	if (error == 0 &&
	    sa->sa_len <= sizeof(kif->kf_un.kf_sock.kf_sa_peer)) {
		bcopy(sa, &kif->kf_un.kf_sock.kf_sa_peer, sa->sa_len);
		free(sa, M_SONAME);
	}
	strncpy(kif->kf_path, so->so_proto->pr_domain->dom_name,
	    sizeof(kif->kf_path));
	return (0);	
}