int sosend(struct socket *so, struct mbuf *nam, /* sockaddr, if UDP socket, NULL if TCP */ char *data, /* data to send */ int *data_length, /* IN/OUT length of (remaining) data */ int flags) { struct mbuf *head = (struct mbuf *)NULL; struct mbuf *m; int space; int resid; int len; int error = 0; int dontroute; int first = 1; resid = *data_length; /* * In theory resid should be unsigned. * However, space must be signed, as it might be less than 0 * if we over-committed, and we must use a signed comparison * of space and resid. On the other hand, a negative resid * causes us to loop sending 0-length segments to the protocol. */ if (resid < 0) return (EINVAL); INET_TRACE (INETM_IO, ("INET:sosend: so %lx resid %d sb_hiwat %d so_state %x\n", so, resid, so->so_snd.sb_hiwat, so->so_state)); if (sosendallatonce(so) && (resid > (int)so->so_snd.sb_hiwat)) return (EMSGSIZE); dontroute = (flags & MSG_DONTROUTE) && ((so->so_options & SO_DONTROUTE) == 0) && (so->so_proto->pr_flags & PR_ATOMIC); #define snderr(errno) { error = errno; goto release; } restart: sblock(&so->so_snd); do { if (so->so_error) { error = so->so_error; so->so_error = 0; /* ??? */ goto release; } if (so->so_state & SS_CANTSENDMORE) snderr(EPIPE); if ((so->so_state & SS_ISCONNECTED) == 0) { if (so->so_proto->pr_flags & PR_CONNREQUIRED) snderr(ENOTCONN); if (nam == 0) snderr(EDESTADDRREQ); } if (flags & MSG_OOB) space = 1024; else { space = (int)sbspace(&so->so_snd); if ((sosendallatonce(so) && (space < resid)) || ((resid >= CLBYTES) && (space < CLBYTES) && (so->so_snd.sb_cc >= CLBYTES) && ((so->so_state & SS_NBIO) == 0) && ((flags & MSG_DONTWAIT) == 0))) { if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { if (first) error = EWOULDBLOCK; goto release; } sbunlock(&so->so_snd); sbwait(&so->so_snd); goto restart; } } if ( space <= 0 ) { /* no space in socket send buffer - see if we can wait */ if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { if (first) /* report first error */ error = EWOULDBLOCK; goto release; } /* If blocking socket, let someone else run */ sbunlock(&so->so_snd); sbwait(&so->so_snd); goto restart; } while (space > 0) { len = resid; if ( so->so_type == SOCK_STREAM ) { m = m_getwithdata(MT_TXDATA, len); if (!m) snderr(ENOBUFS); MEMCPY(m->m_data, data, len); so->so_snd.sb_flags |= SB_MBCOMP; /* allow compression */ } else { m = m_get (M_WAIT, MT_TXDATA); m->m_data = data; } INET_TRACE (INETM_IO, ("sosend:got %d bytes so %lx mlen %d, off %d mtod %x\n", len, so, m->m_len, m->m_off, mtod (m, caddr_t))); *data_length -= len; resid -= len; data += len; m->m_len = len; if (head == (struct mbuf *)NULL) head = m; if (error) goto release; if (*data_length <= 0) break; } if (dontroute) so->so_options |= SO_DONTROUTE; so->so_req = (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND; error = (*so->so_proto->pr_usrreq)(so, head, nam); if (dontroute) so->so_options &= ~SO_DONTROUTE; head = (struct mbuf *)NULL; first = 0; } while ((resid != 0) && (error == 0)); release: sbunlock(&so->so_snd); if (head) m_freem(head); return error; }
/* * Slightly changed version of sosend() */ static int kttcp_sosend(struct socket *so, unsigned long long slen, unsigned long long *done, struct lwp *l, int flags) { struct mbuf **mp, *m, *top; long space, len, mlen; int error, dontroute, atomic; long long resid; atomic = sosendallatonce(so); resid = slen; top = NULL; /* * In theory resid should be unsigned. * However, space must be signed, as it might be less than 0 * if we over-committed, and we must use a signed comparison * of space and resid. On the other hand, a negative resid * causes us to loop sending 0-length segments to the protocol. */ if (resid < 0) { error = EINVAL; goto out; } dontroute = (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && (so->so_proto->pr_flags & PR_ATOMIC); l->l_ru.ru_msgsnd++; #define snderr(errno) { error = errno; goto release; } solock(so); restart: if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) goto out; do { if (so->so_state & SS_CANTSENDMORE) snderr(EPIPE); if (so->so_error) { error = so->so_error; so->so_error = 0; goto release; } if ((so->so_state & SS_ISCONNECTED) == 0) { if (so->so_proto->pr_flags & PR_CONNREQUIRED) { snderr(ENOTCONN); } else { snderr(EDESTADDRREQ); } } space = sbspace(&so->so_snd); if (flags & MSG_OOB) space += 1024; if ((atomic && resid > so->so_snd.sb_hiwat)) snderr(EMSGSIZE); if (space < resid && (atomic || space < so->so_snd.sb_lowat)) { if (so->so_state & SS_NBIO) snderr(EWOULDBLOCK); SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive sbwait 1"); SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive sbwait 1"); sbunlock(&so->so_snd); error = sbwait(&so->so_snd); if (error) goto out; goto restart; } mp = ⊤ do { sounlock(so); do { if (top == 0) { m = m_gethdr(M_WAIT, MT_DATA); mlen = MHLEN; m->m_pkthdr.len = 0; m->m_pkthdr.rcvif = NULL; } else { m = m_get(M_WAIT, MT_DATA); mlen = MLEN; } if (resid >= MINCLSIZE && space >= MCLBYTES) { m_clget(m, M_WAIT); if ((m->m_flags & M_EXT) == 0) goto nopages; mlen = MCLBYTES; #ifdef MAPPED_MBUFS len = lmin(MCLBYTES, resid); #else if (atomic && top == 0) { len = lmin(MCLBYTES - max_hdr, resid); m->m_data += max_hdr; } else len = lmin(MCLBYTES, resid); #endif space -= len; } else { nopages: len = lmin(lmin(mlen, resid), space); space -= len; /* * For datagram protocols, leave room * for protocol headers in first mbuf. */ if (atomic && top == 0 && len < mlen) MH_ALIGN(m, len); } resid -= len; m->m_len = len; *mp = m; top->m_pkthdr.len += len; if (error) goto release; mp = &m->m_next; if (resid <= 0) { if (flags & MSG_EOR) top->m_flags |= M_EOR; break; } } while (space > 0 && atomic); solock(so); if (so->so_state & SS_CANTSENDMORE) snderr(EPIPE); if (dontroute) so->so_options |= SO_DONTROUTE; if (resid > 0) so->so_state |= SS_MORETOCOME; if (flags & MSG_OOB) error = (*so->so_proto->pr_usrreqs->pr_sendoob)(so, top, NULL); else error = (*so->so_proto->pr_usrreqs->pr_send)(so, top, NULL, NULL, l); if (dontroute) so->so_options &= ~SO_DONTROUTE; if (resid > 0) so->so_state &= ~SS_MORETOCOME; top = 0; mp = ⊤ if (error) goto release; } while (resid && space > 0); } while (resid); release: sbunlock(&so->so_snd); out: sounlock(so); if (top) m_freem(top); *done = slen - resid; #if 0 printf("sosend: error %d slen %llu resid %lld\n", error, slen, resid); #endif return (error); }
static int kttcp_soreceive(struct socket *so, unsigned long long slen, unsigned long long *done, struct lwp *l, int *flagsp) { struct mbuf *m, **mp; int flags, len, error, offset, moff, type; long long orig_resid, resid; const struct protosw *pr; struct mbuf *nextrecord; pr = so->so_proto; mp = NULL; type = 0; resid = orig_resid = slen; if (flagsp) flags = *flagsp &~ MSG_EOR; else flags = 0; if (flags & MSG_OOB) { m = m_get(M_WAIT, MT_DATA); solock(so); error = (*pr->pr_usrreqs->pr_recvoob)(so, m, flags & MSG_PEEK); sounlock(so); if (error) goto bad; do { resid -= min(resid, m->m_len); m = m_free(m); } while (resid && error == 0 && m); bad: if (m) m_freem(m); return (error); } if (mp) *mp = NULL; solock(so); restart: if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) return (error); m = so->so_rcv.sb_mb; /* * If we have less data than requested, block awaiting more * (subject to any timeout) if: * 1. the current count is less than the low water mark, * 2. MSG_WAITALL is set, and it is possible to do the entire * receive operation at once if we block (resid <= hiwat), or * 3. MSG_DONTWAIT is not set. * If MSG_WAITALL is set but resid is larger than the receive buffer, * we have to do the receive in sections, and thus risk returning * a short count if a timeout or signal occurs after we start. */ if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && so->so_rcv.sb_cc < resid) && (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || ((flags & MSG_WAITALL) && resid <= so->so_rcv.sb_hiwat)) && m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { #ifdef DIAGNOSTIC if (m == NULL && so->so_rcv.sb_cc) panic("receive 1"); #endif if (so->so_error) { if (m) goto dontblock; error = so->so_error; if ((flags & MSG_PEEK) == 0) so->so_error = 0; goto release; } if (so->so_state & SS_CANTRCVMORE) { if (m) goto dontblock; else goto release; } for (; m; m = m->m_next) if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { m = so->so_rcv.sb_mb; goto dontblock; } if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && (so->so_proto->pr_flags & PR_CONNREQUIRED)) { error = ENOTCONN; goto release; } if (resid == 0) goto release; if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) { error = EWOULDBLOCK; goto release; } sbunlock(&so->so_rcv); error = sbwait(&so->so_rcv); if (error) { sounlock(so); return (error); } goto restart; } dontblock: /* * On entry here, m points to the first record of the socket buffer. * While we process the initial mbufs containing address and control * info, we save a copy of m->m_nextpkt into nextrecord. */ #ifdef notyet /* XXXX */ if (uio->uio_lwp) uio->uio_lwp->l_ru.ru_msgrcv++; #endif KASSERT(m == so->so_rcv.sb_mb); SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive 1"); SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive 1"); nextrecord = m->m_nextpkt; if (pr->pr_flags & PR_ADDR) { #ifdef DIAGNOSTIC if (m->m_type != MT_SONAME) panic("receive 1a"); #endif orig_resid = 0; if (flags & MSG_PEEK) { m = m->m_next; } else { sbfree(&so->so_rcv, m); MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; } } while (m && m->m_type == MT_CONTROL && error == 0) { if (flags & MSG_PEEK) { m = m->m_next; } else { sbfree(&so->so_rcv, m); MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; } } /* * If m is non-NULL, we have some data to read. From now on, * make sure to keep sb_lastrecord consistent when working on * the last packet on the chain (nextrecord == NULL) and we * change m->m_nextpkt. */ if (m) { if ((flags & MSG_PEEK) == 0) { m->m_nextpkt = nextrecord; /* * If nextrecord == NULL (this is a single chain), * then sb_lastrecord may not be valid here if m * was changed earlier. */ if (nextrecord == NULL) { KASSERT(so->so_rcv.sb_mb == m); so->so_rcv.sb_lastrecord = m; } } type = m->m_type; if (type == MT_OOBDATA) flags |= MSG_OOB; } else { if ((flags & MSG_PEEK) == 0) { KASSERT(so->so_rcv.sb_mb == m); so->so_rcv.sb_mb = nextrecord; SB_EMPTY_FIXUP(&so->so_rcv); } } SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive 2"); SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive 2"); moff = 0; offset = 0; while (m && resid > 0 && error == 0) { if (m->m_type == MT_OOBDATA) { if (type != MT_OOBDATA) break; } else if (type == MT_OOBDATA) break; #ifdef DIAGNOSTIC else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) panic("receive 3"); #endif so->so_state &= ~SS_RCVATMARK; len = resid; if (so->so_oobmark && len > so->so_oobmark - offset) len = so->so_oobmark - offset; if (len > m->m_len - moff) len = m->m_len - moff; /* * If mp is set, just pass back the mbufs. * Otherwise copy them out via the uio, then free. * Sockbuf must be consistent here (points to current mbuf, * it points to next record) when we drop priority; * we must note any additions to the sockbuf when we * block interrupts again. */ resid -= len; if (len == m->m_len - moff) { if (m->m_flags & M_EOR) flags |= MSG_EOR; if (flags & MSG_PEEK) { m = m->m_next; moff = 0; } else { nextrecord = m->m_nextpkt; sbfree(&so->so_rcv, m); if (mp) { *mp = m; mp = &m->m_next; so->so_rcv.sb_mb = m = m->m_next; *mp = NULL; } else { MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; } /* * If m != NULL, we also know that * so->so_rcv.sb_mb != NULL. */ KASSERT(so->so_rcv.sb_mb == m); if (m) { m->m_nextpkt = nextrecord; if (nextrecord == NULL) so->so_rcv.sb_lastrecord = m; } else { so->so_rcv.sb_mb = nextrecord; SB_EMPTY_FIXUP(&so->so_rcv); } SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive 3"); SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive 3"); } } else { if (flags & MSG_PEEK) moff += len; else { if (mp) { sounlock(so); *mp = m_copym(m, 0, len, M_WAIT); solock(so); } m->m_data += len; m->m_len -= len; so->so_rcv.sb_cc -= len; } } if (so->so_oobmark) { if ((flags & MSG_PEEK) == 0) { so->so_oobmark -= len; if (so->so_oobmark == 0) { so->so_state |= SS_RCVATMARK; break; } } else { offset += len; if (offset == so->so_oobmark) break; } } if (flags & MSG_EOR) break; /* * If the MSG_WAITALL flag is set (for non-atomic socket), * we must not quit until "uio->uio_resid == 0" or an error * termination. If a signal/timeout occurs, return * with a short count but without error. * Keep sockbuf locked against other readers. */ while (flags & MSG_WAITALL && m == NULL && resid > 0 && !sosendallatonce(so) && !nextrecord) { if (so->so_error || so->so_state & SS_CANTRCVMORE) break; /* * If we are peeking and the socket receive buffer is * full, stop since we can't get more data to peek at. */ if ((flags & MSG_PEEK) && sbspace(&so->so_rcv) <= 0) break; /* * If we've drained the socket buffer, tell the * protocol in case it needs to do something to * get it filled again. */ if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) { (*pr->pr_usrreqs->pr_rcvd)(so, flags, l); } SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive sbwait 2"); SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive sbwait 2"); error = sbwait(&so->so_rcv); if (error) { sbunlock(&so->so_rcv); sounlock(so); return (0); } if ((m = so->so_rcv.sb_mb) != NULL) nextrecord = m->m_nextpkt; } } if (m && pr->pr_flags & PR_ATOMIC) { flags |= MSG_TRUNC; if ((flags & MSG_PEEK) == 0) (void) sbdroprecord(&so->so_rcv); } if ((flags & MSG_PEEK) == 0) { if (m == NULL) { /* * First part is an SB_EMPTY_FIXUP(). Second part * makes sure sb_lastrecord is up-to-date if * there is still data in the socket buffer. */ so->so_rcv.sb_mb = nextrecord; if (so->so_rcv.sb_mb == NULL) { so->so_rcv.sb_mbtail = NULL; so->so_rcv.sb_lastrecord = NULL; } else if (nextrecord->m_nextpkt == NULL) so->so_rcv.sb_lastrecord = nextrecord; } SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive 4"); SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive 4"); if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) { (*pr->pr_usrreqs->pr_rcvd)(so, flags, l); } } if (orig_resid == resid && orig_resid && (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { sbunlock(&so->so_rcv); goto restart; } if (flagsp) *flagsp |= flags; release: sbunlock(&so->so_rcv); sounlock(so); *done = slen - resid; #if 0 printf("soreceive: error %d slen %llu resid %lld\n", error, slen, resid); #endif return (error); }
/* * Send on a socket. * If send must go all at once and message is larger than * send buffering, then hard error. * Lock against other senders. * If must go all at once and not enough room now, then * inform user that this would block and do nothing. * Otherwise, if nonblocking, send as much as possible. * The data to be sent is described by "uio" if nonzero, * otherwise by the mbuf chain "top" (which must be null * if uio is not). Data provided in mbuf chain must be small * enough to send all at once. * * Returns nonzero on error, timeout or signal; callers * must check for short counts if EINTR/ERESTART are returned. * Data and control buffers are freed on return. */ int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { struct mbuf **mp; struct mbuf *m; size_t resid; int space, len; int clen = 0, error, dontroute, mlen; int atomic = sosendallatonce(so) || top; int pru_flags; if (uio) { resid = uio->uio_resid; } else { resid = (size_t)top->m_pkthdr.len; #ifdef INVARIANTS len = 0; for (m = top; m; m = m->m_next) len += m->m_len; KKASSERT(top->m_pkthdr.len == len); #endif } /* * WARNING! resid is unsigned, space and len are signed. space * can wind up negative if the sockbuf is overcommitted. * * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM * type sockets since that's an error. */ if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { error = EINVAL; goto out; } dontroute = (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && (so->so_proto->pr_flags & PR_ATOMIC); if (td->td_lwp != NULL) td->td_lwp->lwp_ru.ru_msgsnd++; if (control) clen = control->m_len; #define gotoerr(errcode) { error = errcode; goto release; } restart: error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); if (error) goto out; do { if (so->so_state & SS_CANTSENDMORE) gotoerr(EPIPE); if (so->so_error) { error = so->so_error; so->so_error = 0; goto release; } if ((so->so_state & SS_ISCONNECTED) == 0) { /* * `sendto' and `sendmsg' is allowed on a connection- * based socket if it supports implied connect. * Return ENOTCONN if not connected and no address is * supplied. */ if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { if ((so->so_state & SS_ISCONFIRMING) == 0 && !(resid == 0 && clen != 0)) gotoerr(ENOTCONN); } else if (addr == 0) gotoerr(so->so_proto->pr_flags & PR_CONNREQUIRED ? ENOTCONN : EDESTADDRREQ); } if ((atomic && resid > so->so_snd.ssb_hiwat) || clen > so->so_snd.ssb_hiwat) { gotoerr(EMSGSIZE); } space = ssb_space(&so->so_snd); if (flags & MSG_OOB) space += 1024; if ((space < 0 || (size_t)space < resid + clen) && uio && (atomic || space < so->so_snd.ssb_lowat || space < clen)) { if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) gotoerr(EWOULDBLOCK); ssb_unlock(&so->so_snd); error = ssb_wait(&so->so_snd); if (error) goto out; goto restart; } mp = ⊤ space -= clen; do { if (uio == NULL) { /* * Data is prepackaged in "top". */ resid = 0; if (flags & MSG_EOR) top->m_flags |= M_EOR; } else do { if (resid > INT_MAX) resid = INT_MAX; m = m_getl((int)resid, MB_WAIT, MT_DATA, top == NULL ? M_PKTHDR : 0, &mlen); if (top == NULL) { m->m_pkthdr.len = 0; m->m_pkthdr.rcvif = NULL; } len = imin((int)szmin(mlen, resid), space); if (resid < MINCLSIZE) { /* * For datagram protocols, leave room * for protocol headers in first mbuf. */ if (atomic && top == 0 && len < mlen) MH_ALIGN(m, len); } space -= len; error = uiomove(mtod(m, caddr_t), (size_t)len, uio); resid = uio->uio_resid; m->m_len = len; *mp = m; top->m_pkthdr.len += len; if (error) goto release; mp = &m->m_next; if (resid == 0) { if (flags & MSG_EOR) top->m_flags |= M_EOR; break; } } while (space > 0 && atomic); if (dontroute) so->so_options |= SO_DONTROUTE; if (flags & MSG_OOB) { pru_flags = PRUS_OOB; } else if ((flags & MSG_EOF) && (so->so_proto->pr_flags & PR_IMPLOPCL) && (resid == 0)) { /* * If the user set MSG_EOF, the protocol * understands this flag and nothing left to * send then use PRU_SEND_EOF instead of PRU_SEND. */ pru_flags = PRUS_EOF; } else if (resid > 0 && space > 0) { /* If there is more to send, set PRUS_MORETOCOME */ pru_flags = PRUS_MORETOCOME; } else { pru_flags = 0; } /* * XXX all the SS_CANTSENDMORE checks previously * done could be out of date. We could have recieved * a reset packet in an interrupt or maybe we slept * while doing page faults in uiomove() etc. We could * probably recheck again inside the splnet() protection * here, but there are probably other places that this * also happens. We must rethink this. */ error = so_pru_send(so, pru_flags, top, addr, control, td); if (dontroute) so->so_options &= ~SO_DONTROUTE; clen = 0; control = 0; top = NULL; mp = ⊤ if (error) goto release; } while (resid && space > 0); } while (resid); release: ssb_unlock(&so->so_snd); out: if (top) m_freem(top); if (control) m_freem(control); return (error); }