/* * As above, except the mbuf chain begins a new record. */ void sbappendrecord(struct sockbuf *sb, struct mbuf *m0) { SOCKBUF_LOCK(sb); sbappendrecord_locked(sb, m0); SOCKBUF_UNLOCK(sb); }
/* * This version of sbappend() should only be used when the caller absolutely * knows that there will never be more than one record in the socket buffer, * that is, a stream protocol (such as TCP). */ void sbappendstream(struct sockbuf *sb, struct mbuf *m) { SOCKBUF_LOCK(sb); sbappendstream_locked(sb, m); SOCKBUF_UNLOCK(sb); }
void sbflush(struct sockbuf *sb) { SOCKBUF_LOCK(sb); sbflush_locked(sb); SOCKBUF_UNLOCK(sb); }
/* * Drop a record off the front of a sockbuf and move the next record to the * front. */ void sbdroprecord(struct sockbuf *sb) { SOCKBUF_LOCK(sb); sbdroprecord_locked(sb); SOCKBUF_UNLOCK(sb); }
void sbrelease(struct sockbuf *sb, struct socket *so) { SOCKBUF_LOCK(sb); sbrelease_locked(sb, so); SOCKBUF_UNLOCK(sb); }
void sbdrop(struct sockbuf *sb, int len) { SOCKBUF_LOCK(sb); sbdrop_locked(sb, len); SOCKBUF_UNLOCK(sb); }
void socantsendmore(struct socket *so) { SOCKBUF_LOCK(&so->so_snd); socantsendmore_locked(so); mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); }
/* * This version of sbappend() should only be used when the caller absolutely * knows that there will never be more than one record in the socket buffer, * that is, a stream protocol (such as TCP). */ void sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags) { SOCKBUF_LOCK(sb); sbappendstream_locked(sb, m, flags); SOCKBUF_UNLOCK(sb); }
/* * Usage: * xprt = svc_vc_create(sock, send_buf_size, recv_buf_size); * * Creates, registers, and returns a (rpc) tcp based transporter. * Once *xprt is initialized, it is registered as a transporter * see (svc.h, xprt_register). This routine returns * a NULL if a problem occurred. * * The filedescriptor passed in is expected to refer to a bound, but * not yet connected socket. * * Since streams do buffered io similar to stdio, the caller can specify * how big the send and receive buffers are via the second and third parms; * 0 => use the system default. */ SVCXPRT * svc_vc_create(SVCPOOL *pool, struct socket *so, size_t sendsize, size_t recvsize) { SVCXPRT *xprt = NULL; struct sockaddr* sa; int error; SOCK_LOCK(so); if (so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED)) { SOCK_UNLOCK(so); CURVNET_SET(so->so_vnet); error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa); CURVNET_RESTORE(); if (error) return (NULL); xprt = svc_vc_create_conn(pool, so, sa); free(sa, M_SONAME); return (xprt); } SOCK_UNLOCK(so); xprt = svc_xprt_alloc(); sx_init(&xprt->xp_lock, "xprt->xp_lock"); xprt->xp_pool = pool; xprt->xp_socket = so; xprt->xp_p1 = NULL; xprt->xp_p2 = NULL; xprt->xp_ops = &svc_vc_rendezvous_ops; CURVNET_SET(so->so_vnet); error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa); CURVNET_RESTORE(); if (error) { goto cleanup_svc_vc_create; } memcpy(&xprt->xp_ltaddr, sa, sa->sa_len); free(sa, M_SONAME); xprt_register(xprt); solisten(so, -1, curthread); SOCKBUF_LOCK(&so->so_rcv); xprt->xp_upcallset = 1; soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt); SOCKBUF_UNLOCK(&so->so_rcv); return (xprt); cleanup_svc_vc_create: if (xprt) { sx_destroy(&xprt->xp_lock); svc_xprt_free(xprt); } return (NULL); }
int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) { int retval; SOCKBUF_LOCK(sb); retval = sbappendcontrol_locked(sb, m0, control); SOCKBUF_UNLOCK(sb); return (retval); }
SVCXPRT * svc_dg_create(SVCPOOL *pool, struct socket *so, size_t sendsize, size_t recvsize) { SVCXPRT *xprt; struct __rpc_sockinfo si; struct sockaddr* sa; int error; if (!__rpc_socket2sockinfo(so, &si)) { printf(svc_dg_str, svc_dg_err1); return (NULL); } /* * Find the receive and the send size */ sendsize = __rpc_get_t_size(si.si_af, si.si_proto, (int)sendsize); recvsize = __rpc_get_t_size(si.si_af, si.si_proto, (int)recvsize); if ((sendsize == 0) || (recvsize == 0)) { printf(svc_dg_str, svc_dg_err2); return (NULL); } xprt = svc_xprt_alloc(); sx_init(&xprt->xp_lock, "xprt->xp_lock"); xprt->xp_pool = pool; xprt->xp_socket = so; xprt->xp_p1 = NULL; xprt->xp_p2 = NULL; xprt->xp_ops = &svc_dg_ops; CURVNET_SET(so->so_vnet); error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa); CURVNET_RESTORE(); if (error) goto freedata; memcpy(&xprt->xp_ltaddr, sa, sa->sa_len); free(sa, M_SONAME); xprt_register(xprt); SOCKBUF_LOCK(&so->so_rcv); soupcall_set(so, SO_RCV, svc_dg_soupcall, xprt); SOCKBUF_UNLOCK(&so->so_rcv); return (xprt); freedata: (void) printf(svc_dg_str, __no_mem_str); if (xprt) { svc_xprt_free(xprt); } return (NULL); }
int sbreserve(struct sockbuf *sb, u_long cc, struct socket *so, struct thread *td) { int error; SOCKBUF_LOCK(sb); error = sbreserve_locked(sb, cc, so, td); SOCKBUF_UNLOCK(sb); return (error); }
static int udp6_append(struct inpcb *inp, struct mbuf *n, int off, struct sockaddr_in6 *fromsa) { struct socket *so; struct mbuf *opts; struct udpcb *up; INP_LOCK_ASSERT(inp); /* * Engage the tunneling protocol. */ up = intoudpcb(inp); if (up->u_tun_func != NULL) { in_pcbref(inp); INP_RUNLOCK(inp); (*up->u_tun_func)(n, off, inp, (struct sockaddr *)fromsa, up->u_tun_ctx); INP_RLOCK(inp); return (in_pcbrele_rlocked(inp)); } #ifdef IPSEC /* Check AH/ESP integrity. */ if (ipsec6_in_reject(n, inp)) { m_freem(n); return (0); } #endif /* IPSEC */ #ifdef MAC if (mac_inpcb_check_deliver(inp, n) != 0) { m_freem(n); return (0); } #endif opts = NULL; if (inp->inp_flags & INP_CONTROLOPTS || inp->inp_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(inp, n, &opts); m_adj(n, off + sizeof(struct udphdr)); so = inp->inp_socket; SOCKBUF_LOCK(&so->so_rcv); if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)fromsa, n, opts) == 0) { SOCKBUF_UNLOCK(&so->so_rcv); m_freem(n); if (opts) m_freem(opts); UDPSTAT_INC(udps_fullsock); } else sorwakeup_locked(so); return (0); }
void sbdrop(struct sockbuf *sb, int len) { struct mbuf *mfree; SOCKBUF_LOCK(sb); mfree = sbcut_internal(sb, len); SOCKBUF_UNLOCK(sb); m_freem(mfree); }
/* * Append address and data, and optionally, control (ancillary) data to the * receive queue of a socket. If present, m0 must include a packet header * with total length. Returns 0 if no space in sockbuf or insufficient * mbufs. */ int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control) { int retval; SOCKBUF_LOCK(sb); retval = sbappendaddr_locked(sb, asa, m0, control); SOCKBUF_UNLOCK(sb); return (retval); }
void t4_rcvd(struct toedev *tod, struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_rcv; SOCKBUF_LOCK(sb); t4_rcvd_locked(tod, tp); SOCKBUF_UNLOCK(sb); }
/* * Set up the socket for TCP offload. */ void offload_socket(struct socket *so, struct toepcb *toep) { struct tom_data *td = toep->td; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); struct sockbuf *sb; INP_WLOCK_ASSERT(inp); /* Update socket */ sb = &so->so_snd; SOCKBUF_LOCK(sb); sb->sb_flags |= SB_NOCOALESCE; SOCKBUF_UNLOCK(sb); sb = &so->so_rcv; SOCKBUF_LOCK(sb); sb->sb_flags |= SB_NOCOALESCE; if (toep->ulp_mode == ULP_MODE_TCPDDP) { if (inp->inp_vflag & INP_IPV6) so->so_proto = &ddp6_protosw; else so->so_proto = &ddp_protosw; } SOCKBUF_UNLOCK(sb); /* Update TCP PCB */ tp->tod = &td->tod; tp->t_toe = toep; tp->t_flags |= TF_TOE; /* Install an extra hold on inp */ toep->inp = inp; toep->flags |= TPF_ATTACHED; in_pcbref(inp); /* Add the TOE PCB to the active list */ mtx_lock(&td->toep_list_lock); TAILQ_INSERT_HEAD(&td->toep_list, toep, link); mtx_unlock(&td->toep_list_lock); }
static int soo_stat(struct file *fp, struct stat *ub, struct ucred *active_cred, struct thread *td) { struct socket *so = fp->f_data; #ifdef MAC int error; #endif bzero((caddr_t)ub, sizeof (*ub)); ub->st_mode = S_IFSOCK; #ifdef MAC error = mac_socket_check_stat(active_cred, so); if (error) return (error); #endif if (!SOLISTENING(so)) { struct sockbuf *sb; /* * If SBS_CANTRCVMORE is set, but there's still data left * in the receive buffer, the socket is still readable. */ sb = &so->so_rcv; SOCKBUF_LOCK(sb); if ((sb->sb_state & SBS_CANTRCVMORE) == 0 || sbavail(sb)) ub->st_mode |= S_IRUSR | S_IRGRP | S_IROTH; ub->st_size = sbavail(sb) - sb->sb_ctl; SOCKBUF_UNLOCK(sb); sb = &so->so_snd; SOCKBUF_LOCK(sb); if ((sb->sb_state & SBS_CANTSENDMORE) == 0) ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH; SOCKBUF_UNLOCK(sb); } ub->st_uid = so->so_cred->cr_uid; ub->st_gid = so->so_cred->cr_gid; return (*so->so_proto->pr_usrreqs->pru_sense)(so, ub); }
/* * Destroy node */ static int ng_ksocket_shutdown(node_p node) { const priv_p priv = NG_NODE_PRIVATE(node); priv_p embryo; /* Close our socket (if any) */ if (priv->so != NULL) { SOCKBUF_LOCK(&priv->so->so_rcv); priv->so->so_rcv.sb_flags &= ~SB_UPCALL; SOCKBUF_UNLOCK(&priv->so->so_rcv); SOCKBUF_LOCK(&priv->so->so_snd); priv->so->so_snd.sb_flags &= ~SB_UPCALL; SOCKBUF_UNLOCK(&priv->so->so_snd); priv->so->so_upcall = NULL; soclose(priv->so); priv->so = NULL; } /* If we are an embryo, take ourselves out of the parent's list */ if (priv->flags & KSF_EMBRYONIC) { LIST_REMOVE(priv, siblings); priv->flags &= ~KSF_EMBRYONIC; } /* Remove any embryonic children we have */ while (!LIST_EMPTY(&priv->embryos)) { embryo = LIST_FIRST(&priv->embryos); ng_rmnode_self(embryo->node); } /* Take down netgraph node */ bzero(priv, sizeof(*priv)); kfree(priv, M_NETGRAPH); NG_NODE_SET_PRIVATE(node, NULL); NG_NODE_UNREF(node); /* let the node escape */ return (0); }
/* * XXX Need to implement reconnecting as necessary. If that were to be * needed, most likely all current vnodes would have to be renegotiated * or otherwise invalidated (a la NFS "stale file handle"). */ static int p9fs_connect(struct mount *mp) { struct p9fsmount *p9mp = VFSTOP9(mp); struct p9fs_session *p9s = &p9mp->p9_session; struct socket *so; int error; error = socreate(p9s->p9s_sockaddr.sa_family, &p9s->p9s_sock, p9s->p9s_socktype, p9s->p9s_proto, curthread->td_ucred, curthread); if (error != 0) { vfs_mount_error(mp, "socreate"); goto out; } so = p9s->p9s_sock; error = soconnect(so, &p9s->p9s_sockaddr, curthread); SOCK_LOCK(so); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, "connec", 0); if (error) break; } if (error == 0) { error = so->so_error; so->so_error = 0; } SOCK_UNLOCK(so); if (error) { vfs_mount_error(mp, "soconnect"); if (error == EINTR) so->so_state &= ~SS_ISCONNECTING; goto out; } if (so->so_proto->pr_flags & PR_CONNREQUIRED) p9fs_setsockopt(so, SO_KEEPALIVE); if (so->so_proto->pr_protocol == IPPROTO_TCP) p9fs_setsockopt(so, TCP_NODELAY); SOCKBUF_LOCK(&so->so_rcv); soupcall_set(so, SO_RCV, p9fs_client_upcall, p9mp); SOCKBUF_UNLOCK(&so->so_rcv); error = 0; out: return (error); }
static inline struct mbuf * sdp_sock_queue_rcv_mb(struct socket *sk, struct mbuf *mb) { struct sdp_sock *ssk = sdp_sk(sk); struct sdp_bsdh *h; h = mtod(mb, struct sdp_bsdh *); #ifdef SDP_ZCOPY SDP_SKB_CB(mb)->seq = rcv_nxt(ssk); if (h->mid == SDP_MID_SRCAVAIL) { struct sdp_srcah *srcah = (struct sdp_srcah *)(h+1); struct rx_srcavail_state *rx_sa; ssk->srcavail_cancel_mseq = 0; ssk->rx_sa = rx_sa = RX_SRCAVAIL_STATE(mb) = kzalloc( sizeof(struct rx_srcavail_state), M_NOWAIT); rx_sa->mseq = ntohl(h->mseq); rx_sa->used = 0; rx_sa->len = mb_len = ntohl(srcah->len); rx_sa->rkey = ntohl(srcah->rkey); rx_sa->vaddr = be64_to_cpu(srcah->vaddr); rx_sa->flags = 0; if (ssk->tx_sa) { sdp_dbg_data(ssk->socket, "got RX SrcAvail while waiting " "for TX SrcAvail. waking up TX SrcAvail" "to be aborted\n"); wake_up(sk->sk_sleep); } atomic_add(mb->len, &ssk->rcv_nxt); sdp_dbg_data(sk, "queueing SrcAvail. mb_len = %d vaddr = %lld\n", mb_len, rx_sa->vaddr); } else #endif { atomic_add(mb->m_pkthdr.len, &ssk->rcv_nxt); } m_adj(mb, SDP_HEAD_SIZE); SOCKBUF_LOCK(&sk->so_rcv); if (unlikely(h->flags & SDP_OOB_PRES)) sdp_urg(ssk, mb); sbappend_locked(&sk->so_rcv, mb); sorwakeup_locked(sk); return mb; }
static void svc_dg_destroy(SVCXPRT *xprt) { SOCKBUF_LOCK(&xprt->xp_socket->so_rcv); soupcall_clear(xprt->xp_socket, SO_RCV); SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv); sx_destroy(&xprt->xp_lock); if (xprt->xp_socket) (void)soclose(xprt->xp_socket); if (xprt->xp_netid) (void) mem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1); svc_xprt_free(xprt); }
static void svc_vc_destroy_common(SVCXPRT *xprt) { SOCKBUF_LOCK(&xprt->xp_socket->so_rcv); if (xprt->xp_upcallset) { xprt->xp_upcallset = 0; soupcall_clear(xprt->xp_socket, SO_RCV); } SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv); if (xprt->xp_socket) (void)soclose(xprt->xp_socket); if (xprt->xp_netid) (void) mem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1); svc_xprt_free(xprt); }
/* * Send a SeND message to user space, that was either received and has to be * validated or was about to be send out and has to be handled by the SEND * daemon adding SeND ICMPv6 options. */ static int send_input(struct mbuf *m, struct ifnet *ifp, int direction, int msglen __unused) { struct ip6_hdr *ip6; struct sockaddr_send sendsrc; SEND_LOCK(); if (V_send_so == NULL) { SEND_UNLOCK(); return (-1); } /* * Make sure to clear any possible internally embedded scope before * passing the packet to user space for SeND cryptographic signature * validation to succeed. */ ip6 = mtod(m, struct ip6_hdr *); in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); bzero(&sendsrc, sizeof(sendsrc)); sendsrc.send_len = sizeof(sendsrc); sendsrc.send_family = AF_INET6; sendsrc.send_direction = direction; sendsrc.send_ifidx = ifp->if_index; /* * Send incoming or outgoing traffic to user space either to be * protected (outgoing) or validated (incoming) according to rfc3971. */ SOCKBUF_LOCK(&V_send_so->so_rcv); if (sbappendaddr_locked(&V_send_so->so_rcv, (struct sockaddr *)&sendsrc, m, NULL) == 0) { SOCKBUF_UNLOCK(&V_send_so->so_rcv); /* XXX stats. */ m_freem(m); } else { sorwakeup_locked(V_send_so); } SEND_UNLOCK(); return (0); }
static int soo_aio_queue(struct file *fp, struct kaiocb *job) { struct socket *so; struct sockbuf *sb; int error; so = fp->f_data; error = (*so->so_proto->pr_usrreqs->pru_aio_queue)(so, job); if (error == 0) return (0); switch (job->uaiocb.aio_lio_opcode) { case LIO_READ: sb = &so->so_rcv; break; case LIO_WRITE: sb = &so->so_snd; break; default: return (EINVAL); } SOCKBUF_LOCK(sb); if (!aio_set_cancel_function(job, soo_aio_cancel)) panic("new job was cancelled"); TAILQ_INSERT_TAIL(&sb->sb_aiojobq, job, list); if (!(sb->sb_flags & SB_AIO_RUNNING)) { if (soaio_ready(so, sb)) sowakeup_aio(so, sb); else sb->sb_flags |= SB_AIO; } SOCKBUF_UNLOCK(sb); return (0); }
/* * Do a send by putting data in output queue and updating urgent * marker if URG set. Possibly send more data. Unlike the other * pru_*() routines, the mbuf chains are our responsibility. We * must either enqueue them or free them. The other pru_* routines * generally are caller-frees. */ static int tcp_usr_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { int error = 0; struct inpcb *inp; struct tcpcb *tp = NULL; int headlocked = 0; #ifdef INET6 int isipv6; #endif TCPDEBUG0; /* * We require the pcbinfo lock in two cases: * * (1) An implied connect is taking place, which can result in * binding IPs and ports and hence modification of the pcb hash * chains. * * (2) PRUS_EOF is set, resulting in explicit close on the send. */ if ((nam != NULL) || (flags & PRUS_EOF)) { INP_INFO_WLOCK(&V_tcbinfo); headlocked = 1; } inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { if (control) m_freem(control); if (m) m_freem(m); error = ECONNRESET; goto out; } #ifdef INET6 isipv6 = nam && nam->sa_family == AF_INET6; #endif /* INET6 */ tp = intotcpcb(inp); TCPDEBUG1(); if (control) { /* TCP doesn't do control messages (rights, creds, etc) */ if (control->m_len) { m_freem(control); if (m) m_freem(m); error = EINVAL; goto out; } m_freem(control); /* empty control, just free it */ } if (!(flags & PRUS_OOB)) { sbappendstream(&so->so_snd, m); if (nam && tp->t_state < TCPS_SYN_SENT) { /* * Do implied connect if not yet connected, * initialize window to default value, and * initialize maxseg/maxopd using peer's cached * MSS. */ INP_INFO_WLOCK_ASSERT(&V_tcbinfo); #ifdef INET6 if (isipv6) error = tcp6_connect(tp, nam, td); else #endif /* INET6 */ error = tcp_connect(tp, nam, td); if (error) goto out; tp->snd_wnd = TTCP_CLIENT_SND_WND; tcp_mss(tp, -1); } if (flags & PRUS_EOF) { /* * Close the send side of the connection after * the data is sent. */ INP_INFO_WLOCK_ASSERT(&V_tcbinfo); socantsendmore(so); tcp_usrclosed(tp); } if (headlocked) { INP_INFO_WUNLOCK(&V_tcbinfo); headlocked = 0; } if (!(inp->inp_flags & INP_DROPPED)) { if (flags & PRUS_MORETOCOME) tp->t_flags |= TF_MORETOCOME; error = tcp_output_send(tp); if (flags & PRUS_MORETOCOME) tp->t_flags &= ~TF_MORETOCOME; } } else { /* * XXXRW: PRUS_EOF not implemented with PRUS_OOB? */ SOCKBUF_LOCK(&so->so_snd); if (sbspace(&so->so_snd) < -512) { SOCKBUF_UNLOCK(&so->so_snd); m_freem(m); error = ENOBUFS; goto out; } /* * According to RFC961 (Assigned Protocols), * the urgent pointer points to the last octet * of urgent data. We continue, however, * to consider it to indicate the first octet * of data past the urgent section. * Otherwise, snd_up should be one lower. */ sbappendstream_locked(&so->so_snd, m); SOCKBUF_UNLOCK(&so->so_snd); if (nam && tp->t_state < TCPS_SYN_SENT) { /* * Do implied connect if not yet connected, * initialize window to default value, and * initialize maxseg/maxopd using peer's cached * MSS. */ INP_INFO_WLOCK_ASSERT(&V_tcbinfo); #ifdef INET6 if (isipv6) error = tcp6_connect(tp, nam, td); else #endif /* INET6 */ error = tcp_connect(tp, nam, td); if (error) goto out; tp->snd_wnd = TTCP_CLIENT_SND_WND; tcp_mss(tp, -1); INP_INFO_WUNLOCK(&V_tcbinfo); headlocked = 0; } else if (nam) { INP_INFO_WUNLOCK(&V_tcbinfo); headlocked = 0; } tp->snd_up = tp->snd_una + so->so_snd.sb_cc; tp->t_flags |= TF_FORCEDATA; error = tcp_output_send(tp); tp->t_flags &= ~TF_FORCEDATA; } out: TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB : ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); INP_WUNLOCK(inp); if (headlocked) INP_INFO_WUNLOCK(&V_tcbinfo); return (error); }
/* * active open (soconnect). * * State of affairs on entry: * soisconnecting (so_state |= SS_ISCONNECTING) * tcbinfo not locked (This has changed - used to be WLOCKed) * inp WLOCKed * tp->t_state = TCPS_SYN_SENT * rtalloc1, RT_UNLOCK on rt. */ int t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, struct sockaddr *nam) { struct adapter *sc = tod->tod_softc; struct tom_data *td = tod_td(tod); struct toepcb *toep = NULL; struct wrqe *wr = NULL; struct ifnet *rt_ifp = rt->rt_ifp; struct port_info *pi; int mtu_idx, rscale, qid_atid, rc, isipv6; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); int reason; INP_WLOCK_ASSERT(inp); KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6, ("%s: dest addr %p has family %u", __func__, nam, nam->sa_family)); if (rt_ifp->if_type == IFT_ETHER) pi = rt_ifp->if_softc; else if (rt_ifp->if_type == IFT_L2VLAN) { struct ifnet *ifp = VLAN_COOKIE(rt_ifp); pi = ifp->if_softc; } else if (rt_ifp->if_type == IFT_IEEE8023ADLAG) DONT_OFFLOAD_ACTIVE_OPEN(ENOSYS); /* XXX: implement lagg+TOE */ else DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); toep = alloc_toepcb(pi, -1, -1, M_NOWAIT); if (toep == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); toep->tid = alloc_atid(sc, toep); if (toep->tid < 0) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); toep->l2te = t4_l2t_get(pi, rt_ifp, rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam); if (toep->l2te == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); isipv6 = nam->sa_family == AF_INET6; wr = alloc_wrqe(act_open_cpl_size(sc, isipv6), toep->ctrlq); if (wr == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) set_tcpddp_ulp_mode(toep); else toep->ulp_mode = ULP_MODE_NONE; SOCKBUF_LOCK(&so->so_rcv); /* opt0 rcv_bufsiz initially, assumes its normal meaning later */ toep->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ); SOCKBUF_UNLOCK(&so->so_rcv); /* * The kernel sets request_r_scale based on sb_max whereas we need to * take hardware's MAX_RCV_WND into account too. This is normally a * no-op as MAX_RCV_WND is much larger than the default sb_max. */ if (tp->t_flags & TF_REQ_SCALE) rscale = tp->request_r_scale = select_rcv_wscale(); else rscale = 0; mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, 0); qid_atid = (toep->ofld_rxq->iq.abs_id << 14) | toep->tid; if (isipv6) { struct cpl_act_open_req6 *cpl = wrtod(wr); if ((inp->inp_vflag & INP_IPV6) == 0) { /* XXX think about this a bit more */ log(LOG_ERR, "%s: time to think about AF_INET6 + vflag 0x%x.\n", __func__, inp->inp_vflag); DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); } toep->ce = hold_lip(td, &inp->in6p_laddr); if (toep->ce == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOENT); if (is_t4(sc)) { INIT_TP_WR(cpl, 0); cpl->params = select_ntuple(pi, toep->l2te); } else { struct cpl_t5_act_open_req6 *c5 = (void *)cpl; INIT_TP_WR(c5, 0); c5->iss = htobe32(tp->iss); c5->params = select_ntuple(pi, toep->l2te); } OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, qid_atid)); cpl->local_port = inp->inp_lport; cpl->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0]; cpl->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8]; cpl->peer_port = inp->inp_fport; cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0]; cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8]; cpl->opt0 = calc_opt0(so, pi, toep->l2te, mtu_idx, rscale, toep->rx_credits, toep->ulp_mode); cpl->opt2 = calc_opt2a(so, toep); } else { struct cpl_act_open_req *cpl = wrtod(wr); if (is_t4(sc)) { INIT_TP_WR(cpl, 0); cpl->params = select_ntuple(pi, toep->l2te); } else { struct cpl_t5_act_open_req *c5 = (void *)cpl; INIT_TP_WR(c5, 0); c5->iss = htobe32(tp->iss); c5->params = select_ntuple(pi, toep->l2te); } OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_atid)); inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip, &cpl->peer_port); cpl->opt0 = calc_opt0(so, pi, toep->l2te, mtu_idx, rscale, toep->rx_credits, toep->ulp_mode); cpl->opt2 = calc_opt2a(so, toep); } CTR5(KTR_CXGBE, "%s: atid %u (%s), toep %p, inp %p", __func__, toep->tid, tcpstates[tp->t_state], toep, inp); offload_socket(so, toep); rc = t4_l2t_send(sc, wr, toep->l2te); if (rc == 0) { toep->flags |= TPF_CPL_PENDING; return (0); } undo_offload_socket(so); reason = __LINE__; failed: CTR3(KTR_CXGBE, "%s: not offloading (%d), rc %d", __func__, reason, rc); if (wr) free_wrqe(wr); if (toep) { if (toep->tid >= 0) free_atid(sc, toep->tid); if (toep->l2te) t4_l2t_release(toep->l2te); if (toep->ce) release_lip(td, toep->ce); free_toepcb(toep); } return (rc); }
static bool_t svc_vc_recv(SVCXPRT *xprt, struct rpc_msg *msg, struct sockaddr **addrp, struct mbuf **mp) { struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1; struct uio uio; struct mbuf *m; XDR xdrs; int error, rcvflag; /* * Serialise access to the socket and our own record parsing * state. */ sx_xlock(&xprt->xp_lock); for (;;) { /* * If we have an mbuf chain in cd->mpending, try to parse a * record from it, leaving the result in cd->mreq. If we don't * have a complete record, leave the partial result in * cd->mreq and try to read more from the socket. */ if (cd->mpending) { /* * If cd->resid is non-zero, we have part of the * record already, otherwise we are expecting a record * marker. */ if (!cd->resid) { /* * See if there is enough data buffered to * make up a record marker. Make sure we can * handle the case where the record marker is * split across more than one mbuf. */ size_t n = 0; uint32_t header; m = cd->mpending; while (n < sizeof(uint32_t) && m) { n += m->m_len; m = m->m_next; } if (n < sizeof(uint32_t)) goto readmore; if (cd->mpending->m_len < sizeof(uint32_t)) cd->mpending = m_pullup(cd->mpending, sizeof(uint32_t)); memcpy(&header, mtod(cd->mpending, uint32_t *), sizeof(header)); header = ntohl(header); cd->eor = (header & 0x80000000) != 0; cd->resid = header & 0x7fffffff; m_adj(cd->mpending, sizeof(uint32_t)); } /* * Start pulling off mbufs from cd->mpending * until we either have a complete record or * we run out of data. We use m_split to pull * data - it will pull as much as possible and * split the last mbuf if necessary. */ while (cd->mpending && cd->resid) { m = cd->mpending; if (cd->mpending->m_next || cd->mpending->m_len > cd->resid) cd->mpending = m_split(cd->mpending, cd->resid, M_WAIT); else cd->mpending = NULL; if (cd->mreq) m_last(cd->mreq)->m_next = m; else cd->mreq = m; while (m) { cd->resid -= m->m_len; m = m->m_next; } } /* * If cd->resid is zero now, we have managed to * receive a record fragment from the stream. Check * for the end-of-record mark to see if we need more. */ if (cd->resid == 0) { if (!cd->eor) continue; /* * Success - we have a complete record in * cd->mreq. */ xdrmbuf_create(&xdrs, cd->mreq, XDR_DECODE); cd->mreq = NULL; sx_xunlock(&xprt->xp_lock); if (! xdr_callmsg(&xdrs, msg)) { XDR_DESTROY(&xdrs); return (FALSE); } *addrp = NULL; *mp = xdrmbuf_getall(&xdrs); XDR_DESTROY(&xdrs); return (TRUE); } } readmore: /* * The socket upcall calls xprt_active() which will eventually * cause the server to call us here. We attempt to * read as much as possible from the socket and put * the result in cd->mpending. If the read fails, * we have drained both cd->mpending and the socket so * we can call xprt_inactive(). */ uio.uio_resid = 1000000000; uio.uio_td = curthread; m = NULL; rcvflag = MSG_DONTWAIT; CURVNET_SET(xprt->xp_socket->so_vnet); error = soreceive(xprt->xp_socket, NULL, &uio, &m, NULL, &rcvflag); CURVNET_RESTORE(); if (error == EWOULDBLOCK) { /* * We must re-test for readability after * taking the lock to protect us in the case * where a new packet arrives on the socket * after our call to soreceive fails with * EWOULDBLOCK. The pool lock protects us from * racing the upcall after our soreadable() * call returns false. */ mtx_lock(&xprt->xp_pool->sp_lock); if (!soreadable(xprt->xp_socket)) xprt_inactive_locked(xprt); mtx_unlock(&xprt->xp_pool->sp_lock); sx_xunlock(&xprt->xp_lock); return (FALSE); } if (error) { SOCKBUF_LOCK(&xprt->xp_socket->so_rcv); if (xprt->xp_upcallset) { xprt->xp_upcallset = 0; soupcall_clear(xprt->xp_socket, SO_RCV); } SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv); xprt_inactive(xprt); cd->strm_stat = XPRT_DIED; sx_xunlock(&xprt->xp_lock); return (FALSE); } if (!m) { /* * EOF - the other end has closed the socket. */ xprt_inactive(xprt); cd->strm_stat = XPRT_DIED; sx_xunlock(&xprt->xp_lock); return (FALSE); } if (cd->mpending) m_last(cd->mpending)->m_next = m; else cd->mpending = m; }
/*ARGSUSED*/ static bool_t svc_vc_rendezvous_recv(SVCXPRT *xprt, struct rpc_msg *msg, struct sockaddr **addrp, struct mbuf **mp) { struct socket *so = NULL; struct sockaddr *sa = NULL; int error; SVCXPRT *new_xprt; /* * The socket upcall calls xprt_active() which will eventually * cause the server to call us here. We attempt to accept a * connection from the socket and turn it into a new * transport. If the accept fails, we have drained all pending * connections so we call xprt_inactive(). */ sx_xlock(&xprt->xp_lock); error = svc_vc_accept(xprt->xp_socket, &so); if (error == EWOULDBLOCK) { /* * We must re-test for new connections after taking * the lock to protect us in the case where a new * connection arrives after our call to accept fails * with EWOULDBLOCK. The pool lock protects us from * racing the upcall after our TAILQ_EMPTY() call * returns false. */ ACCEPT_LOCK(); mtx_lock(&xprt->xp_pool->sp_lock); if (TAILQ_EMPTY(&xprt->xp_socket->so_comp)) xprt_inactive_locked(xprt); mtx_unlock(&xprt->xp_pool->sp_lock); ACCEPT_UNLOCK(); sx_xunlock(&xprt->xp_lock); return (FALSE); } if (error) { SOCKBUF_LOCK(&xprt->xp_socket->so_rcv); if (xprt->xp_upcallset) { xprt->xp_upcallset = 0; soupcall_clear(xprt->xp_socket, SO_RCV); } SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv); xprt_inactive(xprt); sx_xunlock(&xprt->xp_lock); return (FALSE); } sx_xunlock(&xprt->xp_lock); sa = 0; error = soaccept(so, &sa); if (error) { /* * XXX not sure if I need to call sofree or soclose here. */ if (sa) free(sa, M_SONAME); return (FALSE); } /* * svc_vc_create_conn will call xprt_register - we don't need * to do anything with the new connection except derefence it. */ new_xprt = svc_vc_create_conn(xprt->xp_pool, so, sa); if (!new_xprt) { soclose(so); } else { SVC_RELEASE(new_xprt); } free(sa, M_SONAME); return (FALSE); /* there is never an rpc msg to be processed */ }
/* * Create a new transport for a socket optained via soaccept(). */ SVCXPRT * svc_vc_create_conn(SVCPOOL *pool, struct socket *so, struct sockaddr *raddr) { SVCXPRT *xprt = NULL; struct cf_conn *cd = NULL; struct sockaddr* sa = NULL; struct sockopt opt; int one = 1; int error; bzero(&opt, sizeof(struct sockopt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = SOL_SOCKET; opt.sopt_name = SO_KEEPALIVE; opt.sopt_val = &one; opt.sopt_valsize = sizeof(one); CURVNET_SET(so->so_vnet); error = sosetopt(so, &opt); if (error) { CURVNET_RESTORE(); return (NULL); } if (so->so_proto->pr_protocol == IPPROTO_TCP) { bzero(&opt, sizeof(struct sockopt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = IPPROTO_TCP; opt.sopt_name = TCP_NODELAY; opt.sopt_val = &one; opt.sopt_valsize = sizeof(one); error = sosetopt(so, &opt); if (error) { CURVNET_RESTORE(); return (NULL); } } CURVNET_RESTORE(); cd = mem_alloc(sizeof(*cd)); cd->strm_stat = XPRT_IDLE; xprt = svc_xprt_alloc(); sx_init(&xprt->xp_lock, "xprt->xp_lock"); xprt->xp_pool = pool; xprt->xp_socket = so; xprt->xp_p1 = cd; xprt->xp_p2 = NULL; xprt->xp_ops = &svc_vc_ops; /* * See http://www.connectathon.org/talks96/nfstcp.pdf - client * has a 5 minute timer, server has a 6 minute timer. */ xprt->xp_idletimeout = 6 * 60; memcpy(&xprt->xp_rtaddr, raddr, raddr->sa_len); error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa); if (error) goto cleanup_svc_vc_create; memcpy(&xprt->xp_ltaddr, sa, sa->sa_len); free(sa, M_SONAME); xprt_register(xprt); SOCKBUF_LOCK(&so->so_rcv); xprt->xp_upcallset = 1; soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt); SOCKBUF_UNLOCK(&so->so_rcv); /* * Throw the transport into the active list in case it already * has some data buffered. */ sx_xlock(&xprt->xp_lock); xprt_active(xprt); sx_xunlock(&xprt->xp_lock); return (xprt); cleanup_svc_vc_create: if (xprt) { mem_free(xprt, sizeof(*xprt)); } if (cd) mem_free(cd, sizeof(*cd)); return (NULL); }