Example #1
0
static int
udp6_send(struct socket *so, int flags, struct mbuf *m,
    struct sockaddr *addr, struct mbuf *control, struct thread *td)
{
	struct inpcb *inp;
	struct inpcbinfo *pcbinfo;
	int error = 0;

	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
	inp = sotoinpcb(so);
	KASSERT(inp != NULL, ("udp6_send: inp == NULL"));

	INP_WLOCK(inp);
	if (addr) {
		if (addr->sa_len != sizeof(struct sockaddr_in6)) {
			error = EINVAL;
			goto bad;
		}
		if (addr->sa_family != AF_INET6) {
			error = EAFNOSUPPORT;
			goto bad;
		}
	}

#ifdef INET
	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
		int hasv4addr;
		struct sockaddr_in6 *sin6 = NULL;

		if (addr == NULL)
			hasv4addr = (inp->inp_vflag & INP_IPV4);
		else {
			sin6 = (struct sockaddr_in6 *)addr;
			hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)
			    ? 1 : 0;
		}
		if (hasv4addr) {
			struct pr_usrreqs *pru;
			uint8_t nxt;

			nxt = (inp->inp_socket->so_proto->pr_protocol ==
			    IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE;
			/*
			 * XXXRW: We release UDP-layer locks before calling
			 * udp_send() in order to avoid recursion.  However,
			 * this does mean there is a short window where inp's
			 * fields are unstable.  Could this lead to a
			 * potential race in which the factors causing us to
			 * select the UDPv4 output routine are invalidated?
			 */
			INP_WUNLOCK(inp);
			if (sin6)
				in6_sin6_2_sin_in_sock(addr);
			pru = inetsw[ip_protox[nxt]].pr_usrreqs;
			/* addr will just be freed in sendit(). */
			return ((*pru->pru_send)(so, flags, m, addr, control,
			    td));
		}
	}
#endif
#ifdef MAC
	mac_inpcb_create_mbuf(inp, m);
#endif
	INP_HASH_WLOCK(pcbinfo);
	error = udp6_output(inp, m, addr, control, td);
	INP_HASH_WUNLOCK(pcbinfo);
	INP_WUNLOCK(inp);
	return (error);

bad:
	INP_WUNLOCK(inp);
	m_freem(m);
	return (error);
}
Example #2
0
/*
 * active open (soconnect).
 *
 * State of affairs on entry:
 * soisconnecting (so_state |= SS_ISCONNECTING)
 * tcbinfo not locked (This has changed - used to be WLOCKed)
 * inp WLOCKed
 * tp->t_state = TCPS_SYN_SENT
 * rtalloc1, RT_UNLOCK on rt.
 */
int
t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
    struct sockaddr *nam)
{
	struct adapter *sc = tod->tod_softc;
	struct toepcb *toep = NULL;
	struct wrqe *wr = NULL;
	struct ifnet *rt_ifp = rt->rt_ifp;
	struct vi_info *vi;
	int mtu_idx, rscale, qid_atid, rc, isipv6, txqid, rxqid;
	struct inpcb *inp = sotoinpcb(so);
	struct tcpcb *tp = intotcpcb(inp);
	int reason;
	struct offload_settings settings;
	uint16_t vid = 0xfff, pcp = 0;

	INP_WLOCK_ASSERT(inp);
	KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6,
	    ("%s: dest addr %p has family %u", __func__, nam, nam->sa_family));

	if (rt_ifp->if_type == IFT_ETHER)
		vi = rt_ifp->if_softc;
	else if (rt_ifp->if_type == IFT_L2VLAN) {
		struct ifnet *ifp = VLAN_TRUNKDEV(rt_ifp);

		vi = ifp->if_softc;
		VLAN_TAG(rt_ifp, &vid);
		VLAN_PCP(rt_ifp, &pcp);
	} else if (rt_ifp->if_type == IFT_IEEE8023ADLAG)
		DONT_OFFLOAD_ACTIVE_OPEN(ENOSYS); /* XXX: implement lagg+TOE */
	else
		DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP);

	rw_rlock(&sc->policy_lock);
	settings = *lookup_offload_policy(sc, OPEN_TYPE_ACTIVE, NULL,
	    EVL_MAKETAG(vid, pcp, 0), inp);
	rw_runlock(&sc->policy_lock);
	if (!settings.offload)
		DONT_OFFLOAD_ACTIVE_OPEN(EPERM);

	if (settings.txq >= 0 && settings.txq < vi->nofldtxq)
		txqid = settings.txq;
	else
		txqid = arc4random() % vi->nofldtxq;
	txqid += vi->first_ofld_txq;
	if (settings.rxq >= 0 && settings.rxq < vi->nofldrxq)
		rxqid = settings.rxq;
	else
		rxqid = arc4random() % vi->nofldrxq;
	rxqid += vi->first_ofld_rxq;

	toep = alloc_toepcb(vi, txqid, rxqid, M_NOWAIT | M_ZERO);
	if (toep == NULL)
		DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);

	toep->tid = alloc_atid(sc, toep);
	if (toep->tid < 0)
		DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);

	toep->l2te = t4_l2t_get(vi->pi, rt_ifp,
	    rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam);
	if (toep->l2te == NULL)
		DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);

	isipv6 = nam->sa_family == AF_INET6;
	wr = alloc_wrqe(act_open_cpl_size(sc, isipv6), toep->ctrlq);
	if (wr == NULL)
		DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);

	toep->vnet = so->so_vnet;
	set_ulp_mode(toep, select_ulp_mode(so, sc, &settings));
	SOCKBUF_LOCK(&so->so_rcv);
	/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
	toep->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
	SOCKBUF_UNLOCK(&so->so_rcv);

	/*
	 * The kernel sets request_r_scale based on sb_max whereas we need to
	 * take hardware's MAX_RCV_WND into account too.  This is normally a
	 * no-op as MAX_RCV_WND is much larger than the default sb_max.
	 */
	if (tp->t_flags & TF_REQ_SCALE)
		rscale = tp->request_r_scale = select_rcv_wscale();
	else
		rscale = 0;
	mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, &settings);
	qid_atid = V_TID_QID(toep->ofld_rxq->iq.abs_id) | V_TID_TID(toep->tid) |
	    V_TID_COOKIE(CPL_COOKIE_TOM);

	if (isipv6) {
		struct cpl_act_open_req6 *cpl = wrtod(wr);
		struct cpl_t5_act_open_req6 *cpl5 = (void *)cpl;
		struct cpl_t6_act_open_req6 *cpl6 = (void *)cpl;

		if ((inp->inp_vflag & INP_IPV6) == 0)
			DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP);

		toep->ce = t4_hold_lip(sc, &inp->in6p_laddr, NULL);
		if (toep->ce == NULL)
			DONT_OFFLOAD_ACTIVE_OPEN(ENOENT);

		switch (chip_id(sc)) {
		case CHELSIO_T4:
			INIT_TP_WR(cpl, 0);
			cpl->params = select_ntuple(vi, toep->l2te);
			break;
		case CHELSIO_T5:
			INIT_TP_WR(cpl5, 0);
			cpl5->iss = htobe32(tp->iss);
			cpl5->params = select_ntuple(vi, toep->l2te);
			break;
		case CHELSIO_T6:
		default:
			INIT_TP_WR(cpl6, 0);
			cpl6->iss = htobe32(tp->iss);
			cpl6->params = select_ntuple(vi, toep->l2te);
			break;
		}
		OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
		    qid_atid));
		cpl->local_port = inp->inp_lport;
		cpl->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
		cpl->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
		cpl->peer_port = inp->inp_fport;
		cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0];
		cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8];
		cpl->opt0 = calc_opt0(so, vi, toep->l2te, mtu_idx, rscale,
		    toep->rx_credits, toep->ulp_mode, &settings);
		cpl->opt2 = calc_opt2a(so, toep, &settings);
	} else {
		struct cpl_act_open_req *cpl = wrtod(wr);
		struct cpl_t5_act_open_req *cpl5 = (void *)cpl;
		struct cpl_t6_act_open_req *cpl6 = (void *)cpl;

		switch (chip_id(sc)) {
		case CHELSIO_T4:
			INIT_TP_WR(cpl, 0);
			cpl->params = select_ntuple(vi, toep->l2te);
			break;
		case CHELSIO_T5:
			INIT_TP_WR(cpl5, 0);
			cpl5->iss = htobe32(tp->iss);
			cpl5->params = select_ntuple(vi, toep->l2te);
			break;
		case CHELSIO_T6:
		default:
			INIT_TP_WR(cpl6, 0);
			cpl6->iss = htobe32(tp->iss);
			cpl6->params = select_ntuple(vi, toep->l2te);
			break;
		}
		OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
		    qid_atid));
		inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port,
		    &cpl->peer_ip, &cpl->peer_port);
		cpl->opt0 = calc_opt0(so, vi, toep->l2te, mtu_idx, rscale,
		    toep->rx_credits, toep->ulp_mode, &settings);
		cpl->opt2 = calc_opt2a(so, toep, &settings);
	}

	CTR5(KTR_CXGBE, "%s: atid %u (%s), toep %p, inp %p", __func__,
	    toep->tid, tcpstates[tp->t_state], toep, inp);

	offload_socket(so, toep);
	rc = t4_l2t_send(sc, wr, toep->l2te);
	if (rc == 0) {
		toep->flags |= TPF_CPL_PENDING;
		return (0);
	}

	undo_offload_socket(so);
	reason = __LINE__;
failed:
	CTR3(KTR_CXGBE, "%s: not offloading (%d), rc %d", __func__, reason, rc);

	if (wr)
		free_wrqe(wr);

	if (toep) {
		if (toep->tid >= 0)
			free_atid(sc, toep->tid);
		if (toep->l2te)
			t4_l2t_release(toep->l2te);
		if (toep->ce)
			t4_release_lip(sc, toep->ce);
		free_toepcb(toep);
	}

	return (rc);
}
Example #3
0
static int
udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
{
	struct inpcb *inp;
	struct inpcbinfo *pcbinfo;
	struct sockaddr_in6 *sin6;
	int error;

	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
	inp = sotoinpcb(so);
	sin6 = (struct sockaddr_in6 *)nam;
	KASSERT(inp != NULL, ("udp6_connect: inp == NULL"));

	/*
	 * XXXRW: Need to clarify locking of v4/v6 flags.
	 */
	INP_WLOCK(inp);
#ifdef INET
	if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
		struct sockaddr_in sin;

		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
			error = EINVAL;
			goto out;
		}
		if (inp->inp_faddr.s_addr != INADDR_ANY) {
			error = EISCONN;
			goto out;
		}
		in6_sin6_2_sin(&sin, sin6);
		inp->inp_vflag |= INP_IPV4;
		inp->inp_vflag &= ~INP_IPV6;
		error = prison_remote_ip4(td->td_ucred, &sin.sin_addr);
		if (error != 0)
			goto out;
		INP_HASH_WLOCK(pcbinfo);
		error = in_pcbconnect(inp, (struct sockaddr *)&sin,
		    td->td_ucred);
		INP_HASH_WUNLOCK(pcbinfo);
		if (error == 0)
			soisconnected(so);
		goto out;
	}
#endif
	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
		error = EISCONN;
		goto out;
	}
	inp->inp_vflag &= ~INP_IPV4;
	inp->inp_vflag |= INP_IPV6;
	error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr);
	if (error != 0)
		goto out;
	INP_HASH_WLOCK(pcbinfo);
	error = in6_pcbconnect(inp, nam, td->td_ucred);
	INP_HASH_WUNLOCK(pcbinfo);
	if (error == 0)
		soisconnected(so);
out:
	INP_WUNLOCK(inp);
	return (error);
}
/*
 * Do a send by putting data in output queue and updating urgent
 * marker if URG set.  Possibly send more data.  Unlike the other
 * pru_*() routines, the mbuf chains are our responsibility.  We
 * must either enqueue them or free them.  The other pru_* routines
 * generally are caller-frees.
 */
static int
tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
    struct sockaddr *nam, struct mbuf *control, struct thread *td)
{
	int error = 0;
	struct inpcb *inp;
	struct tcpcb *tp = NULL;
	int headlocked = 0;
#ifdef INET6
	int isipv6;
#endif
	TCPDEBUG0;

	//printf("tcp_usr_send: called\n");
	/*
	 * We require the pcbinfo lock in two cases:
	 *
	 * (1) An implied connect is taking place, which can result in
	 *     binding IPs and ports and hence modification of the pcb hash
	 *     chains.
	 *
	 * (2) PRUS_EOF is set, resulting in explicit close on the send.
	 */
	if ((nam != NULL) || (flags & PRUS_EOF)) {
		INP_INFO_WLOCK(&tcbinfo);
		headlocked = 1;
	}
	inp = sotoinpcb(so);
	KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
	INP_LOCK(inp);
	if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
		if (control)
			m_freem(control);
		if (m)
			m_freem(m);
		error = ECONNRESET;
		goto out;
	}
#ifdef INET6
	isipv6 = nam && nam->sa_family == AF_INET6;
#endif /* INET6 */
	tp = intotcpcb(inp);
	TCPDEBUG1();
	if (control) {
		/* TCP doesn't do control messages (rights, creds, etc) */
		if (control->m_len) {
			m_freem(control);
			if (m)
				m_freem(m);
			error = EINVAL;
			goto out;
		}
		m_freem(control);	/* empty control, just free it */
	}
	if (!(flags & PRUS_OOB)) {
        //printf("===tcp_usr_send(1): ===>  so->so_snd.sb_cc=%d\n", so->so_snd.sb_cc);
		sbappendstream(&so->so_snd, m);
        //printf("===tcp_usr_send(1.5): ===>  so->so_snd.sb_cc=%d\n", so->so_snd.sb_cc);
        //printf("===tcp_usr_send(1.7): ===>  m->m_data=0x%lx  m->m_len=%d\n",
        //       (long)m->m_data, m->m_len);
		if (nam && tp->t_state < TCPS_SYN_SENT) {
			/*
			 * Do implied connect if not yet connected,
			 * initialize window to default value, and
			 * initialize maxseg/maxopd using peer's cached
			 * MSS.
			 */
			INP_INFO_WLOCK_ASSERT(&tcbinfo);
#ifdef INET6
			if (isipv6)
				error = tcp6_connect(tp, nam, td);
			else
#endif /* INET6 */
			error = tcp_connect(tp, nam, td);
			if (error)
				goto out;
			tp->snd_wnd = TTCP_CLIENT_SND_WND;
			tcp_mss(tp, -1);
		}
		if (flags & PRUS_EOF) {
			/*
			 * Close the send side of the connection after
			 * the data is sent.
			 */
			INP_INFO_WLOCK_ASSERT(&tcbinfo);
			socantsendmore(so);
			tcp_usrclosed(tp);
		}
		if (headlocked) {
			INP_INFO_WUNLOCK(&tcbinfo);
			headlocked = 0;
		}
		if (tp != NULL) {
			if (flags & PRUS_MORETOCOME)
				tp->t_flags |= TF_MORETOCOME;
			error = tcp_output(tp);
			if (flags & PRUS_MORETOCOME)
				tp->t_flags &= ~TF_MORETOCOME;
		}
	} else {
		/*
		 * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
		 */
		SOCKBUF_LOCK(&so->so_snd);
		if (sbspace(&so->so_snd) < -512) {
			SOCKBUF_UNLOCK(&so->so_snd);
			m_freem(m);
			error = ENOBUFS;
			goto out;
		}
		/*
		 * According to RFC961 (Assigned Protocols),
		 * the urgent pointer points to the last octet
		 * of urgent data.  We continue, however,
		 * to consider it to indicate the first octet
		 * of data past the urgent section.
		 * Otherwise, snd_up should be one lower.
		 */
		sbappendstream_locked(&so->so_snd, m);
        //printf("===tcp_usr_send(2): ===>  m->m_data=0x%lx  m->m_len=%d\n",
        //        (long)m->m_data, m->m_len);

		SOCKBUF_UNLOCK(&so->so_snd);
		if (nam && tp->t_state < TCPS_SYN_SENT) {
			/*
			 * Do implied connect if not yet connected,
			 * initialize window to default value, and
			 * initialize maxseg/maxopd using peer's cached
			 * MSS.
			 */
			INP_INFO_WLOCK_ASSERT(&tcbinfo);
#ifdef INET6
			if (isipv6)
				error = tcp6_connect(tp, nam, td);
			else
#endif /* INET6 */
			error = tcp_connect(tp, nam, td);
			if (error)
				goto out;
			tp->snd_wnd = TTCP_CLIENT_SND_WND;
			tcp_mss(tp, -1);
			INP_INFO_WUNLOCK(&tcbinfo);
			headlocked = 0;
		} else if (nam) {
			INP_INFO_WUNLOCK(&tcbinfo);
			headlocked = 0;
		}
		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
		tp->t_flags |= TF_FORCEDATA;
#ifdef MAXHE_TODO
		error = tcp_output(tp);
#else
		if(so->nkn_where==0) {
		error = tcp_output(tp);
		}
		else {
		error = 0;
		}
#endif // MAXHE_TODO
		tp->t_flags &= ~TF_FORCEDATA;
	}
out:
	TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
		  ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
	INP_UNLOCK(inp);
	if (headlocked)
		INP_INFO_WUNLOCK(&tcbinfo);
	return (error);
}
Example #5
0
/*
 * Do a send by putting data in output queue and updating urgent
 * marker if URG set.  Possibly send more data.  Unlike the other
 * pru_*() routines, the mbuf chains are our responsibility.  We
 * must either enqueue them or free them.  The other pru_* routines
 * generally are caller-frees.
 */
static int
tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 
	     struct sockaddr *nam, struct mbuf *control, struct proc *p)
{
	int s = splnet();
	int error = 0;
	struct inpcb *inp = sotoinpcb(so);
	struct tcpcb *tp;
#ifdef INET6
	int isipv6;
#endif
	TCPDEBUG0;

	if (inp == NULL) {
		/*
		 * OOPS! we lost a race, the TCP session got reset after
		 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
		 * network interrupt in the non-splnet() section of sosend().
		 */
		if (m)
			m_freem(m);
		if (control)
			m_freem(control);
		error = ECONNRESET;	/* XXX EPIPE? */
		tp = NULL;
		TCPDEBUG1();
		goto out;
	}
#ifdef INET6
	isipv6 = nam && nam->sa_family == AF_INET6;
#endif /* INET6 */
	tp = intotcpcb(inp);
	TCPDEBUG1();
	if (control) {
		/* TCP doesn't do control messages (rights, creds, etc) */
		if (control->m_len) {
			m_freem(control);
			if (m)
				m_freem(m);
			error = EINVAL;
			goto out;
		}
		m_freem(control);	/* empty control, just free it */
	}
	if(!(flags & PRUS_OOB)) {
		sbappend(&so->so_snd, m);
		if (nam && tp->t_state < TCPS_SYN_SENT) {
			/*
			 * Do implied connect if not yet connected,
			 * initialize window to default value, and
			 * initialize maxseg/maxopd using peer's cached
			 * MSS.
			 */
#ifdef INET6
			if (isipv6)
				error = tcp6_connect(tp, nam, p);
			else
#endif /* INET6 */
			error = tcp_connect(tp, nam, p);
			if (error)
				goto out;
			tp->snd_wnd = TTCP_CLIENT_SND_WND;
			tcp_mss(tp, -1);
		}

		if (flags & PRUS_EOF) {
			/*
			 * Close the send side of the connection after
			 * the data is sent.
			 */
			socantsendmore(so);
			tp = tcp_usrclosed(tp);
		}
		if (tp != NULL) {
			if (flags & PRUS_MORETOCOME)
				tp->t_flags |= TF_MORETOCOME;
			error = tcp_output(tp);
			if (flags & PRUS_MORETOCOME)
				tp->t_flags &= ~TF_MORETOCOME;
		}
	} else {
		if (sbspace(&so->so_snd) < -512) {
			m_freem(m);
			error = ENOBUFS;
			goto out;
		}
		/*
		 * According to RFC961 (Assigned Protocols),
		 * the urgent pointer points to the last octet
		 * of urgent data.  We continue, however,
		 * to consider it to indicate the first octet
		 * of data past the urgent section.
		 * Otherwise, snd_up should be one lower.
		 */
		sbappend(&so->so_snd, m);
		if (nam && tp->t_state < TCPS_SYN_SENT) {
			/*
			 * Do implied connect if not yet connected,
			 * initialize window to default value, and
			 * initialize maxseg/maxopd using peer's cached
			 * MSS.
			 */
#ifdef INET6
			if (isipv6)
				error = tcp6_connect(tp, nam, p);
			else
#endif /* INET6 */
			error = tcp_connect(tp, nam, p);
			if (error)
				goto out;
			tp->snd_wnd = TTCP_CLIENT_SND_WND;
			tcp_mss(tp, -1);
		}
		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
		tp->t_force = 1;
		error = tcp_output(tp);
		tp->t_force = 0;
	}
	COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : 
		   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
}
/*
 * The new sockopt interface makes it possible for us to block in the
 * copyin/out step (if we take a page fault).  Taking a page fault at
 * splnet() is probably a Bad Thing.  (Since sockets and pcbs both now
 * use TSM, there probably isn't any need for this function to run at
 * splnet() any more.  This needs more examination.)
 *
 * XXXRW: The locking here is wrong; we may take a page fault while holding
 * the inpcb lock.
 */
int
tcp_ctloutput(struct socket *so, struct sockopt *sopt)
{
	int	error, opt, optval;
	struct	inpcb *inp;
	struct	tcpcb *tp;
	struct	tcp_info ti;

	error = 0;
	inp = sotoinpcb(so);
	KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
	INP_LOCK(inp);
	if (sopt->sopt_level != IPPROTO_TCP) {
		INP_UNLOCK(inp);
#ifdef INET6
		if (INP_CHECK_SOCKAF(so, AF_INET6))
			error = ip6_ctloutput(so, sopt);
		else
#endif /* INET6 */
		error = ip_ctloutput(so, sopt);
		return (error);
	}
	if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
		error = ECONNRESET;
		goto out;
	}
	tp = intotcpcb(inp);

	switch (sopt->sopt_dir) {
	case SOPT_SET:
		switch (sopt->sopt_name) {
#ifdef TCP_SIGNATURE
		case TCP_MD5SIG:
			error = sooptcopyin(sopt, &optval, sizeof optval,
					    sizeof optval);
			if (error)
				break;

			if (optval > 0)
				tp->t_flags |= TF_SIGNATURE;
			else
				tp->t_flags &= ~TF_SIGNATURE;
			break;
#endif /* TCP_SIGNATURE */
		case TCP_NODELAY:
		case TCP_NOOPT:
			error = sooptcopyin(sopt, &optval, sizeof optval,
					    sizeof optval);
			if (error)
				break;

			switch (sopt->sopt_name) {
			case TCP_NODELAY:
				opt = TF_NODELAY;
				break;
			case TCP_NOOPT:
				opt = TF_NOOPT;
				break;
			default:
				opt = 0; /* dead code to fool gcc */
				break;
			}

			if (optval)
				tp->t_flags |= opt;
			else
				tp->t_flags &= ~opt;
			break;

		case TCP_NOPUSH:
			error = sooptcopyin(sopt, &optval, sizeof optval,
					    sizeof optval);
			if (error)
				break;

			if (optval)
				tp->t_flags |= TF_NOPUSH;
			else {
				tp->t_flags &= ~TF_NOPUSH;
				error = tcp_output(tp);
			}
			break;

		case TCP_MAXSEG:
			error = sooptcopyin(sopt, &optval, sizeof optval,
					    sizeof optval);
			if (error)
				break;

			if (optval > 0 && optval <= tp->t_maxseg &&
			    optval + 40 >= tcp_minmss)
				tp->t_maxseg = optval;
			else
				error = EINVAL;
			break;

		case TCP_INFO:
			error = EINVAL;
			break;

		default:
			error = ENOPROTOOPT;
			break;
		}
		break;

	case SOPT_GET:
		switch (sopt->sopt_name) {
#ifdef TCP_SIGNATURE
		case TCP_MD5SIG:
			optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
			error = sooptcopyout(sopt, &optval, sizeof optval);
			break;
#endif
		case TCP_NODELAY:
			optval = tp->t_flags & TF_NODELAY;
			error = sooptcopyout(sopt, &optval, sizeof optval);
			break;
		case TCP_MAXSEG:
			optval = tp->t_maxseg;
			error = sooptcopyout(sopt, &optval, sizeof optval);
			break;
		case TCP_NOOPT:
			optval = tp->t_flags & TF_NOOPT;
			error = sooptcopyout(sopt, &optval, sizeof optval);
			break;
		case TCP_NOPUSH:
			optval = tp->t_flags & TF_NOPUSH;
			error = sooptcopyout(sopt, &optval, sizeof optval);
			break;
		case TCP_INFO:
			tcp_fill_info(tp, &ti);
			error = sooptcopyout(sopt, &ti, sizeof ti);
			break;
		default:
			error = ENOPROTOOPT;
			break;
		}
		break;
	}
out:
	INP_UNLOCK(inp);
	return (error);
}
Example #7
0
/*
 * NAME: tp_attach()
 *
 * CALLED FROM:
 *	tp_usrreq, PRU_ATTACH
 *
 * FUNCTION and ARGUMENTS:
 *  given a socket (so) and a protocol family (dom), allocate a tpcb
 *  and ref structure, initialize everything in the structures that
 *  needs to be initialized.
 *
 * RETURN VALUE:
 *  0 ok
 *  EINVAL if DEBUG(X) in is on and a disaster has occurred
 *  ENOPROTOOPT if TP hasn't been configured or if the
 *   socket wasn't created with tp as its protocol
 *  EISCONN if this socket is already part of a connection
 *  ETOOMANYREFS if ran out of tp reference numbers.
 *  E* whatever error is returned from soreserve()
 *    for from the network-layer pcb allocation routine
 *
 * SIDE EFFECTS:
 *
 * NOTES:
 */
int
tp_attach(struct socket *so, int protocol)
{
	struct tp_pcb *tpcb;
	int             error = 0;
	int             dom = so->so_proto->pr_domain->dom_family;
	u_long          lref;

#ifdef ARGO_DEBUG
	if (argo_debug[D_CONN]) {
		printf("tp_attach:dom 0x%x so %p ", dom, so);
	}
#endif
#ifdef TPPT
	if (tp_traceflags[D_CONN]) {
		tptrace(TPPTmisc, "tp_attach:dom so", dom, so, 0, 0);
	}
#endif

	if (so->so_pcb != NULL) {
		return EISCONN;	/* socket already part of a connection */
	}
	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0)
		error = soreserve(so, tp_sendspace, tp_recvspace);
	/* later an ioctl will allow reallocation IF still in closed state */

	if (error)
		goto bad2;

	MALLOC(tpcb, struct tp_pcb *, sizeof(*tpcb), M_PCB, M_NOWAIT|M_ZERO);
	if (tpcb == NULL) {
		error = ENOBUFS;
		goto bad2;
	}

	if (((lref = tp_getref(tpcb)) & TP_ENOREF) != 0) {
		error = ETOOMANYREFS;
		goto bad3;
	}
	tpcb->tp_lref = lref;
	tpcb->tp_sock = so;
	tpcb->tp_domain = dom;
	tpcb->tp_rhiwat = so->so_rcv.sb_hiwat;
	/* tpcb->tp_proto = protocol; someday maybe? */
	if (protocol && protocol < ISOPROTO_TP4) {
		tpcb->tp_netservice = ISO_CONS;
		tpcb->tp_snduna = (SeqNum) - 1;	/* kludge so the pseudo-ack
						 * from the CR/CC will
						 * generate correct fake-ack
						 * values */
	} else {
		tpcb->tp_netservice = (dom == AF_INET) ? IN_CLNS : ISO_CLNS;
		/* the default */
	}
	tpcb->_tp_param = tp_conn_param[tpcb->tp_netservice];

	tpcb->tp_state = TP_CLOSED;
	tpcb->tp_vers = TP_VERSION;
	tpcb->tp_notdetached = 1;

	/*
	 * Spec says default is 128 octets, that is, if the tpdusize argument
	 * never appears, use 128. As the initiator, we will always "propose"
	 * the 2048 size, that is, we will put this argument in the CR
	 * always, but accept what the other side sends on the CC. If the
	 * initiator sends us something larger on a CR, we'll respond w/
	 * this. Our maximum is 4096.  See tp_chksum.c comments.
	 */
	tpcb->tp_cong_win =
		tpcb->tp_l_tpdusize = 1 << tpcb->tp_tpdusize;

	tpcb->tp_seqmask = TP_NML_FMT_MASK;
	tpcb->tp_seqbit = TP_NML_FMT_BIT;
	tpcb->tp_seqhalf = tpcb->tp_seqbit >> 1;

	/* attach to a network-layer protoswitch */
	if ((error = tp_set_npcb(tpcb)) != 0)
		goto bad4;
	ASSERT(tpcb->tp_nlproto->nlp_afamily == tpcb->tp_domain);

	/* nothing to do for iso case */
	if (dom == AF_INET) {
		/* tp_set_npcb sets it */
		KASSERT(so->so_pcb != NULL);
		sotoinpcb(so)->inp_ppcb = (void *) tpcb;
	}

	return 0;

bad4:
#ifdef ARGO_DEBUG
	if (argo_debug[D_CONN]) {
		printf("BAD4 in tp_attach, so %p\n", so);
	}
#endif
	tp_freeref(tpcb->tp_lref);

bad3:
#ifdef ARGO_DEBUG
	if (argo_debug[D_CONN]) {
		printf("BAD3 in tp_attach, so %p\n", so);
	}
#endif

	free((void *) tpcb, M_PCB);	/* never a cluster  */

bad2:
#ifdef ARGO_DEBUG
	if (argo_debug[D_CONN]) {
		printf("BAD2 in tp_attach, so %p\n", so);
	}
#endif
	so->so_pcb = 0;

	/* bad: */
#ifdef ARGO_DEBUG
	if (argo_debug[D_CONN]) {
		printf("BAD in tp_attach, so %p\n", so);
	}
#endif
	return error;
}
Example #8
0
int
udp_ctloutput(struct socket *so, struct sockopt *sopt)
{
	struct inpcb *inp;
	struct udpcb *up;
	int isudplite, error, optval;

	error = 0;
	isudplite = (so->so_proto->pr_protocol == IPPROTO_UDPLITE) ? 1 : 0;
	inp = sotoinpcb(so);
	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
	INP_WLOCK(inp);
	if (sopt->sopt_level != so->so_proto->pr_protocol) {
#ifdef INET6
		if (INP_CHECK_SOCKAF(so, AF_INET6)) {
			INP_WUNLOCK(inp);
			error = ip6_ctloutput(so, sopt);
		}
#endif
#if defined(INET) && defined(INET6)
		else
#endif
#ifdef INET
		{
			INP_WUNLOCK(inp);
			error = ip_ctloutput(so, sopt);
		}
#endif
		return (error);
	}

	switch (sopt->sopt_dir) {
	case SOPT_SET:
		switch (sopt->sopt_name) {
		case UDP_ENCAP:
			INP_WUNLOCK(inp);
			error = sooptcopyin(sopt, &optval, sizeof optval,
					    sizeof optval);
			if (error)
				break;
			inp = sotoinpcb(so);
			KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
			INP_WLOCK(inp);
#ifdef IPSEC_NAT_T
			up = intoudpcb(inp);
			KASSERT(up != NULL, ("%s: up == NULL", __func__));
#endif
			switch (optval) {
			case 0:
				/* Clear all UDP encap. */
#ifdef IPSEC_NAT_T
				up->u_flags &= ~UF_ESPINUDP_ALL;
#endif
				break;
#ifdef IPSEC_NAT_T
			case UDP_ENCAP_ESPINUDP:
			case UDP_ENCAP_ESPINUDP_NON_IKE:
				up->u_flags &= ~UF_ESPINUDP_ALL;
				if (optval == UDP_ENCAP_ESPINUDP)
					up->u_flags |= UF_ESPINUDP;
				else if (optval == UDP_ENCAP_ESPINUDP_NON_IKE)
					up->u_flags |= UF_ESPINUDP_NON_IKE;
				break;
#endif
			default:
				error = EINVAL;
				break;
			}
			INP_WUNLOCK(inp);
			break;
		case UDPLITE_SEND_CSCOV:
		case UDPLITE_RECV_CSCOV:
			if (!isudplite) {
				INP_WUNLOCK(inp);
				error = ENOPROTOOPT;
				break;
			}
			INP_WUNLOCK(inp);
			error = sooptcopyin(sopt, &optval, sizeof(optval),
			    sizeof(optval));
			if (error != 0)
				break;
			inp = sotoinpcb(so);
			KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
			INP_WLOCK(inp);
			up = intoudpcb(inp);
			KASSERT(up != NULL, ("%s: up == NULL", __func__));
			if (optval != 0 && optval < 8) {
				INP_WUNLOCK(inp);
				error = EINVAL;
				break;
			}
			if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
				up->u_txcslen = optval;
			else
				up->u_rxcslen = optval;
			INP_WUNLOCK(inp);
			break;
		default:
			INP_WUNLOCK(inp);
			error = ENOPROTOOPT;
			break;
		}
		break;
	case SOPT_GET:
		switch (sopt->sopt_name) {
#ifdef IPSEC_NAT_T
		case UDP_ENCAP:
			up = intoudpcb(inp);
			KASSERT(up != NULL, ("%s: up == NULL", __func__));
			optval = up->u_flags & UF_ESPINUDP_ALL;
			INP_WUNLOCK(inp);
			error = sooptcopyout(sopt, &optval, sizeof optval);
			break;
#endif
		case UDPLITE_SEND_CSCOV:
		case UDPLITE_RECV_CSCOV:
			if (!isudplite) {
				INP_WUNLOCK(inp);
				error = ENOPROTOOPT;
				break;
			}
			up = intoudpcb(inp);
			KASSERT(up != NULL, ("%s: up == NULL", __func__));
			if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
				optval = up->u_txcslen;
			else
				optval = up->u_rxcslen;
			INP_WUNLOCK(inp);
			error = sooptcopyout(sopt, &optval, sizeof(optval));
			break;
		default:
			INP_WUNLOCK(inp);
			error = ENOPROTOOPT;
			break;
		}
		break;
	}	
	return (error);
}
Example #9
0
int
tcp_usrreq(struct socket * so, 
   struct mbuf *  m,
   struct mbuf *  nam)
{
   struct inpcb * inp;
   struct tcpcb * tp;
   int   error =  0;
   int   req;

#ifdef DO_TCPTRACE
   int   ostate;
#endif

   req = so->so_req;    /* get request from socket struct */
   inp = sotoinpcb(so);
   /*
    * When a TCP is attached to a socket, then there will be
    * a (struct inpcb) pointed at by the socket, and this
    * structure will point at a subsidary (struct tcpcb).
    */
   if (inp == 0 && req != PRU_ATTACH) 
   {
      return (EINVAL);
   }

   if (inp)
      tp = intotcpcb(inp);
   else  /* inp and tp not set, make sure this is OK: */
   { 
      if (req == PRU_ATTACH)
         tp = NULL;  /* stifle compiler warnings about using unassigned tp*/
      else
      {
         dtrap(); /* programming error? */
         return EINVAL;
      }
   }

   switch (req) 
   {
   /*
    * TCP attaches to socket via PRU_ATTACH, reserving space,
    * and an internet control block.
    */
   case PRU_ATTACH:
      if (inp) 
      {
         error = EISCONN;
         break;
      }
      error = tcp_attach(so);
      if (error)
         break;
      if ((so->so_options & SO_LINGER) && so->so_linger == 0)
         so->so_linger = TCP_LINGERTIME;
#ifdef   DO_TCPTRACE
      SETTP(tp, sototcpcb(so));
#endif
      break;

   /*
    * PRU_DETACH detaches the TCP protocol from the socket.
    * If the protocol state is non-embryonic, then can't
    * do this directly: have to initiate a PRU_DISCONNECT,
    * which may finish later; embryonic TCB's can just
    * be discarded here.
    */
   case PRU_DETACH:
      if (tp->t_state > TCPS_LISTEN)
         SETTP(tp, tcp_disconnect(tp));
      else
         SETTP(tp, tcp_close(tp));
      break;

   /*
    * Give the socket an address.
    */
   case PRU_BIND:

      /* bind is quite different for IPv4 and v6, so we use two 
       * seperate pcbbind routines. so_domain was checked for 
       * validity way up in t_bind()
       */
#ifdef IP_V4
      if(inp->inp_socket->so_domain == AF_INET)
      {
         error = in_pcbbind(inp, nam);
         break;
      }
#endif /* IP_V4 */
#ifdef IP_V6
      if(inp->inp_socket->so_domain == AF_INET6)
      {
         error = ip6_pcbbind(inp, nam);
         break;
      }
#endif /* IP_V6 */
      dtrap();    /* not v4 or v6? */
      error = EINVAL;
      break;
   /*
    * Prepare to accept connections.
    */
   case PRU_LISTEN:
      if (inp->inp_lport == 0)
         error = in_pcbbind(inp, (struct mbuf *)0);
      if (error == 0)
         tp->t_state = TCPS_LISTEN;
      break;

   /*
    * Initiate connection to peer.
    * Create a template for use in transmissions on this connection.
    * Enter SYN_SENT state, and mark socket as connecting.
    * Start keep-alive timer, and seed output sequence space.
    * Send initial segment on connection.
    */
   case PRU_CONNECT:
      if (inp->inp_lport == 0) 
      {

#ifdef IP_V4
#ifndef IP_V6  /* v4 only */
      error = in_pcbbind(inp, (struct mbuf *)0);
#else    /* dual mode */
      if(so->so_domain == AF_INET)
         error = in_pcbbind(inp, (struct mbuf *)0);
      else
         error = ip6_pcbbind(inp, (struct mbuf *)0);
#endif   /* end dual mode code */
#else    /* no v4, v6 only */
      error = ip6_pcbbind(inp, (struct mbuf *)0);
#endif   /* end v6 only */

         if (error)
            break;
      }

#ifdef IP_V4
#ifndef IP_V6  /* v4 only */
      error = in_pcbconnect(inp, nam);
#else    /* dual mode */
      if(so->so_domain == AF_INET)
         error = in_pcbconnect(inp, nam);
      else
         error = ip6_pcbconnect(inp, nam);
#endif   /* end dual mode code */
#else    /* no v4, v6 only */
      error = ip6_pcbconnect(inp, nam);
#endif   /* end v6 only */

      if (error)
         break;
      tp->t_template = tcp_template(tp);
      if (tp->t_template == 0) 
      {

#ifdef IP_V4
#ifndef IP_V6  /* v4 only */
         in_pcbdisconnect(inp);
#else    /* dual mode */
         if(so->so_domain == AF_INET)
            in_pcbdisconnect(inp);
         else
            ip6_pcbdisconnect(inp);
#endif   /* end dual mode code */
#else    /* no v4, v6 only */
         ip6_pcbdisconnect(inp);
#endif   /* end v6 only */

         error = ENOBUFS;
         break;
      }

      soisconnecting(so);
      tcpstat.tcps_connattempt++;
      tp->t_state = TCPS_SYN_SENT;
      tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
      tp->iss = tcp_iss; 
      tcp_iss += (tcp_seq)(TCP_ISSINCR/2);
      tcp_sendseqinit(tp);
      error = tcp_output(tp);
      if (!error)
         TCP_MIB_INC(tcpActiveOpens);     /* keep MIB stats */
      break;

   /*
    * Create a TCP connection between two sockets.
    */
   case PRU_CONNECT2:
      error = EOPNOTSUPP;
      break;

   /*
    * Initiate disconnect from peer.
    * If connection never passed embryonic stage, just drop;
    * else if don't need to let data drain, then can just drop anyways,
    * else have to begin TCP shutdown process: mark socket disconnecting,
    * drain unread data, state switch to reflect user close, and
    * send segment (e.g. FIN) to peer.  Socket will be really disconnected
    * when peer sends FIN and acks ours.
    *
    * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
    */
   case PRU_DISCONNECT:
      SETTP(tp, tcp_disconnect(tp));
      break;

   /*
    * Accept a connection.  Essentially all the work is
    * done at higher levels; just return the address
    * of the peer, storing through addr.
    */
   case PRU_ACCEPT: 
   {
         struct sockaddr_in * sin   =  mtod(nam,   struct sockaddr_in *);
#ifdef IP_V6
         struct sockaddr_in6 * sin6 = mtod(nam,   struct sockaddr_in6 *);
#endif

#ifdef IP_V6
         if (so->so_domain == AF_INET6)
         {
            nam->m_len = sizeof (struct sockaddr_in6);
            sin6->sin6_port = inp->inp_fport;
            sin6->sin6_family = AF_INET6;
            IP6CPY(&sin6->sin6_addr, &inp->ip6_faddr);
         }
#endif

#ifdef IP_V4
         if (so->so_domain == AF_INET)
         {
            nam->m_len = sizeof (struct sockaddr_in);
            sin->sin_family = AF_INET;
            sin->sin_port = inp->inp_fport;
            sin->sin_addr = inp->inp_faddr;
         }
#endif
         if ( !(so->so_domain == AF_INET) &&
              !(so->so_domain == AF_INET6)
             )
         {
            dprintf("*** PRU_ACCEPT bad domain = %d\n", so->so_domain);
            dtrap();
         } 
         TCP_MIB_INC(tcpPassiveOpens);    /* keep MIB stats */
         break;
      }

   /*
    * Mark the connection as being incapable of further output.
    */
   case PRU_SHUTDOWN:
      socantsendmore(so);
      tp = tcp_usrclosed(tp);
      if (tp)
         error = tcp_output(tp);
      break;

   /*
    * After a receive, possibly send window update to peer.
    */
   case PRU_RCVD:
      (void) tcp_output(tp);
      break;

   /*
    * Do a send by putting data in output queue and updating urgent
    * marker if URG set.  Possibly send more data.
    */
   case PRU_SEND:
      if (so->so_pcb == NULL)
      {                    /* Return EPIPE error if socket is not connected */
         error = EPIPE;
         break;
      }
      sbappend(&so->so_snd, m);
      error = tcp_output(tp);
      if (error == ENOBUFS)
         sbdropend(&so->so_snd,m);  /* Remove data from socket buffer */
      break;

   /*
    * Abort the TCP.
    */
   case PRU_ABORT:
      SETTP(tp, tcp_drop(tp, ECONNABORTED));
      break;

   case PRU_SENSE:
      /*      ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; */
      dtrap();    /* does this ever happen? */
      return (0);

   case PRU_RCVOOB:
      if ((so->so_oobmark == 0 &&
          (so->so_state & SS_RCVATMARK) == 0) ||
#ifdef SO_OOBINLINE
       so->so_options & SO_OOBINLINE ||
#endif
       tp->t_oobflags & TCPOOB_HADDATA) 
       {
         error = EINVAL;
         break;
      }
      if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) 
      {
         error = EWOULDBLOCK;
         break;
      }
      m->m_len = 1;
      *mtod(m, char *) = tp->t_iobc;
      if ((MBUF2LONG(nam) & MSG_PEEK) == 0)
         tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
      break;

   case PRU_SENDOOB:
      if (so->so_pcb == NULL)
      {                    /* Return EPIPE error if socket is not connected */
         error = EPIPE;
         break;
      }
      if (sbspace(&so->so_snd) == 0) 
      {
         m_freem(m);
         error = ENOBUFS;
         break;
      }
      /*
       * According to RFC961 (Assigned Protocols),
       * the urgent pointer points to the last octet
       * of urgent data.  We continue, however,
       * to consider it to indicate the first octet
       * of data past the urgent section.
       * Otherwise, snd_up should be one lower.
       */
      sbappend(&so->so_snd, m);
      tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
      tp->t_force = 1;
      error = tcp_output(tp);
      if (error == ENOBUFS)
         sbdropend(&so->so_snd,m);  /* Remove data from socket buffer */
      tp->t_force = 0;
      break;

   case PRU_SOCKADDR:

   /* sockaddr and peeraddr have to switch based on IP type */
#ifdef IP_V4
#ifndef IP_V6  /* v4 only */
      in_setsockaddr(inp, nam);
#else /* dual mode */
      if(so->so_domain == AF_INET6)
         ip6_setsockaddr(inp, nam);
      else
         in_setsockaddr(inp, nam);
#endif   /* dual mode */
#else    /* IP_V6 */
         ip6_setsockaddr(inp, nam);
#endif
      break;         

   case PRU_PEERADDR:
#ifdef IP_V4
#ifndef IP_V6  /* v4 only */
      in_setpeeraddr(inp, nam);
#else /* dual mode */
      if(so->so_domain == AF_INET6)
         ip6_setpeeraddr(inp, nam);
      else
         in_setpeeraddr(inp, nam);
#endif   /* dual mode */
#else    /* IP_V6 */
         ip6_setpeeraddr(inp, nam);
#endif
      break;

   case PRU_SLOWTIMO:
      SETTP(tp, tcp_timers(tp, (int)MBUF2LONG(nam)));
#ifdef DO_TCPTRACE
      req |= (long)nam << 8;        /* for debug's sake */
#endif
      break;

      default:
      panic("tcp_usrreq");
   }
#ifdef DO_TCPTRACE
   if (tp && (so->so_options & SO_DEBUG))
      tcp_trace("usrreq: state: %d, tcpcb: %x, req: %d",
    ostate, tp, req);
#endif
   return (error);
}
Example #10
0
/*
 * active open (soconnect).
 *
 * State of affairs on entry:
 * soisconnecting (so_state |= SS_ISCONNECTING)
 * tcbinfo not locked (This has changed - used to be WLOCKed)
 * inp WLOCKed
 * tp->t_state = TCPS_SYN_SENT
 * rtalloc1, RT_UNLOCK on rt.
 */
int
t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
    struct sockaddr *nam)
{
	struct adapter *sc = tod->tod_softc;
	struct tom_data *td = tod_td(tod);
	struct toepcb *toep = NULL;
	struct wrqe *wr = NULL;
	struct ifnet *rt_ifp = rt->rt_ifp;
	struct port_info *pi;
	int mtu_idx, rscale, qid_atid, rc, isipv6;
	struct inpcb *inp = sotoinpcb(so);
	struct tcpcb *tp = intotcpcb(inp);
	int reason;

	INP_WLOCK_ASSERT(inp);
	KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6,
	    ("%s: dest addr %p has family %u", __func__, nam, nam->sa_family));

	if (rt_ifp->if_type == IFT_ETHER)
		pi = rt_ifp->if_softc;
	else if (rt_ifp->if_type == IFT_L2VLAN) {
		struct ifnet *ifp = VLAN_COOKIE(rt_ifp);

		pi = ifp->if_softc;
	} else if (rt_ifp->if_type == IFT_IEEE8023ADLAG)
		DONT_OFFLOAD_ACTIVE_OPEN(ENOSYS); /* XXX: implement lagg+TOE */
	else
		DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP);

	toep = alloc_toepcb(pi, -1, -1, M_NOWAIT);
	if (toep == NULL)
		DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);

	toep->tid = alloc_atid(sc, toep);
	if (toep->tid < 0)
		DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);

	toep->l2te = t4_l2t_get(pi, rt_ifp,
	    rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam);
	if (toep->l2te == NULL)
		DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);

	isipv6 = nam->sa_family == AF_INET6;
	wr = alloc_wrqe(isipv6 ? sizeof(struct cpl_act_open_req6) :
	    sizeof(struct cpl_act_open_req), toep->ctrlq);
	if (wr == NULL)
		DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);

	if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0)
		set_tcpddp_ulp_mode(toep);
	else
		toep->ulp_mode = ULP_MODE_NONE;
	SOCKBUF_LOCK(&so->so_rcv);
	/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
	toep->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
	SOCKBUF_UNLOCK(&so->so_rcv);

	/*
	 * The kernel sets request_r_scale based on sb_max whereas we need to
	 * take hardware's MAX_RCV_WND into account too.  This is normally a
	 * no-op as MAX_RCV_WND is much larger than the default sb_max.
	 */
	if (tp->t_flags & TF_REQ_SCALE)
		rscale = tp->request_r_scale = select_rcv_wscale();
	else
		rscale = 0;
	mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, 0);
	qid_atid = (toep->ofld_rxq->iq.abs_id << 14) | toep->tid;

	if (isipv6) {
		struct cpl_act_open_req6 *cpl = wrtod(wr);

		if ((inp->inp_vflag & INP_IPV6) == 0) {
			/* XXX think about this a bit more */
			log(LOG_ERR,
			    "%s: time to think about AF_INET6 + vflag 0x%x.\n",
			    __func__, inp->inp_vflag);
			DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP);
		}

		toep->ce = hold_lip(td, &inp->in6p_laddr);
		if (toep->ce == NULL)
			DONT_OFFLOAD_ACTIVE_OPEN(ENOENT);

		INIT_TP_WR(cpl, 0);
		OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
		    qid_atid));

		cpl->local_port = inp->inp_lport;
		cpl->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
		cpl->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
		cpl->peer_port = inp->inp_fport;
		cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0];
		cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8];
		cpl->opt0 = calc_opt0(so, pi, toep->l2te, mtu_idx, rscale,
		    toep->rx_credits, toep->ulp_mode);
		cpl->params = select_ntuple(pi, toep->l2te, sc->filter_mode);
		cpl->opt2 = calc_opt2a(so, toep);
	} else {
		struct cpl_act_open_req *cpl = wrtod(wr);

		INIT_TP_WR(cpl, 0);
		OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
		    qid_atid));
		inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port,
		    &cpl->peer_ip, &cpl->peer_port);
		cpl->opt0 = calc_opt0(so, pi, toep->l2te, mtu_idx, rscale,
		    toep->rx_credits, toep->ulp_mode);
		cpl->params = select_ntuple(pi, toep->l2te, sc->filter_mode);
		cpl->opt2 = calc_opt2a(so, toep);
	}

	CTR5(KTR_CXGBE, "%s: atid %u (%s), toep %p, inp %p", __func__,
	    toep->tid, tcpstates[tp->t_state], toep, inp);

	offload_socket(so, toep);
	rc = t4_l2t_send(sc, wr, toep->l2te);
	if (rc == 0) {
		toep->flags |= TPF_CPL_PENDING;
		return (0);
	}

	undo_offload_socket(so);
	reason = __LINE__;
failed:
	CTR3(KTR_CXGBE, "%s: not offloading (%d), rc %d", __func__, reason, rc);

	if (wr)
		free_wrqe(wr);

	if (toep) {
		if (toep->tid >= 0)
			free_atid(sc, toep->tid);
		if (toep->l2te)
			t4_l2t_release(toep->l2te);
		if (toep->ce)
			release_lip(td, toep->ce);
		free_toepcb(toep);
	}

	return (rc);
}