Example #1
0
/*
 * Partition an mbuf chain in two pieces, returning the tail --
 * all but the first len0 bytes.  In case of failure, it returns NULL and
 * attempts to restore the chain to its original state.
 */
struct mbuf *
m_split(struct mbuf *m0, int len0, int wait)
{
	struct mbuf *m, *n;
	unsigned len = len0, remain, olen;

	for (m = m0; m && len > m->m_len; m = m->m_next)
		len -= m->m_len;
	if (m == NULL)
		return (NULL);
	remain = m->m_len - len;
	if (m0->m_flags & M_PKTHDR) {
		MGETHDR(n, wait, m0->m_type);
		if (n == NULL)
			return (NULL);
		if (m_dup_pkthdr(n, m0, wait)) {
			m_freem(n);
			return (NULL);
		}
		n->m_pkthdr.len -= len0;
		olen = m0->m_pkthdr.len;
		m0->m_pkthdr.len = len0;
		if (m->m_flags & M_EXT)
			goto extpacket;
		if (remain > MHLEN) {
			/* m can't be the lead packet */
			MH_ALIGN(n, 0);
			n->m_next = m_split(m, len, wait);
			if (n->m_next == NULL) {
				(void) m_free(n);
				m0->m_pkthdr.len = olen;
				return (NULL);
			} else
				return (n);
		} else
			MH_ALIGN(n, remain);
	} else if (remain == 0) {
		n = m->m_next;
		m->m_next = NULL;
		return (n);
	} else {
		MGET(n, wait, m->m_type);
		if (n == NULL)
			return (NULL);
		M_ALIGN(n, remain);
	}
extpacket:
	if (m->m_flags & M_EXT) {
		n->m_ext = m->m_ext;
		MCLADDREFERENCE(m, n);
		n->m_data = m->m_data + len;
	} else {
		memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain);
	}
	n->m_len = remain;
	m->m_len = len;
	n->m_next = m->m_next;
	m->m_next = NULL;
	return (n);
}
Example #2
0
/*
 * Builds and sends a single ARP request to locate the server
 *
 * Return value:
 *	0 on success
 *	errno on error
 */
static int
netdump_send_arp(in_addr_t dst)
{
	struct ether_addr bcast;
	struct mbuf *m;
	struct arphdr *ah;
	int pktlen;

	MPASS(nd_ifp != NULL);

	/* Fill-up a broadcast address. */
	memset(&bcast, 0xFF, ETHER_ADDR_LEN);
	m = m_gethdr(M_NOWAIT, MT_DATA);
	if (m == NULL) {
		printf("netdump_send_arp: Out of mbufs\n");
		return (ENOBUFS);
	}
	pktlen = arphdr_len2(ETHER_ADDR_LEN, sizeof(struct in_addr));
	m->m_len = pktlen;
	m->m_pkthdr.len = pktlen;
	MH_ALIGN(m, pktlen);
	ah = mtod(m, struct arphdr *);
	ah->ar_hrd = htons(ARPHRD_ETHER);
	ah->ar_pro = htons(ETHERTYPE_IP);
	ah->ar_hln = ETHER_ADDR_LEN;
	ah->ar_pln = sizeof(struct in_addr);
	ah->ar_op = htons(ARPOP_REQUEST);
	memcpy(ar_sha(ah), IF_LLADDR(nd_ifp), ETHER_ADDR_LEN);
	((struct in_addr *)ar_spa(ah))->s_addr = nd_client.s_addr;
	bzero(ar_tha(ah), ETHER_ADDR_LEN);
	((struct in_addr *)ar_tpa(ah))->s_addr = dst;
	return (netdump_ether_output(m, nd_ifp, bcast, ETHERTYPE_ARP));
}
Example #3
0
/*
 * Allocate and setup a management frame of the specified
 * size.  We return the mbuf and a pointer to the start
 * of the contiguous data area that's been reserved based
 * on the packet length.  The data area is forced to 32-bit
 * alignment and the buffer length to a multiple of 4 bytes.
 * This is done mainly so beacon frames (that require this)
 * can use this interface too.
 */
struct mbuf *
ieee80211_getmgtframe(uint8_t **frm, int headroom, int pktlen)
{
	struct mbuf *m;
	u_int len;

	/*
	 * NB: we know the mbuf routines will align the data area
	 *     so we don't need to do anything special.
	 */
	len = roundup2(headroom + pktlen, 4);
	KASSERT(len <= MCLBYTES, ("802.11 mgt frame too large: %u", len));
	if (len < MINCLSIZE) {
		m = m_gethdr(M_NOWAIT, MT_DATA);
		/*
		 * Align the data in case additional headers are added.
		 * This should only happen when a WEP header is added
		 * which only happens for shared key authentication mgt
		 * frames which all fit in MHLEN.
		 */
		if (m != NULL)
			MH_ALIGN(m, len);
	} else {
		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
		if (m != NULL)
			MC_ALIGN(m, len);
	}
	if (m != NULL) {
		m->m_data += headroom;
		*frm = m->m_data;
	}
	return m;
}
Example #4
0
/*
 * Ensure len bytes of contiguous space at the beginning of the mbuf chain
 */
struct mbuf *
m_prepend(struct mbuf *m, int len, int how)
{
	struct mbuf *mn;

	if (len > MHLEN)
		panic("mbuf prepend length too big");

	if (M_LEADINGSPACE(m) >= len) {
		m->m_data -= len;
		m->m_len += len;
	} else {
		MGET(mn, how, m->m_type);
		if (mn == NULL) {
			m_freem(m);
			return (NULL);
		}
		if (m->m_flags & M_PKTHDR)
			M_MOVE_PKTHDR(mn, m);
		mn->m_next = m;
		m = mn;
		MH_ALIGN(m, len);
		m->m_len = len;
	}
	if (m->m_flags & M_PKTHDR)
		m->m_pkthdr.len += len;
	return (m);
}
Example #5
0
File: igmp.c Project: AoLaD/rtems
static void
igmp_sendpkt(struct in_multi *inm, int type, unsigned long addr)
{
        struct mbuf *m;
        struct igmp *igmp;
        struct ip *ip;
        struct ip_moptions imo;

        MGETHDR(m, M_DONTWAIT, MT_HEADER);
        if (m == NULL)
                return;

	m->m_pkthdr.rcvif = loif;
	m->m_pkthdr.len = sizeof(struct ip) + IGMP_MINLEN;
	MH_ALIGN(m, IGMP_MINLEN + sizeof(struct ip));
	m->m_data += sizeof(struct ip);
        m->m_len = IGMP_MINLEN;
        igmp = mtod(m, struct igmp *);
        igmp->igmp_type   = type;
        igmp->igmp_code   = 0;
        igmp->igmp_group  = inm->inm_addr;
        igmp->igmp_cksum  = 0;
        igmp->igmp_cksum  = in_cksum(m, IGMP_MINLEN);

        m->m_data -= sizeof(struct ip);
        m->m_len += sizeof(struct ip);
        ip = mtod(m, struct ip *);
        ip->ip_tos        = 0;
        ip->ip_len        = sizeof(struct ip) + IGMP_MINLEN;
        ip->ip_off        = 0;
        ip->ip_p          = IPPROTO_IGMP;
        ip->ip_src.s_addr = INADDR_ANY;
        ip->ip_dst.s_addr = addr ? addr : igmp->igmp_group.s_addr;

        imo.imo_multicast_ifp  = inm->inm_ifp;
        imo.imo_multicast_ttl  = 1;
	imo.imo_multicast_vif  = -1;
        /*
         * Request loopback of the report if we are acting as a multicast
         * router, so that the process-level routing demon can hear it.
         */
        imo.imo_multicast_loop = (ip_mrouter != NULL);

	/*
	 * XXX
	 * Do we have to worry about reentrancy here?  Don't think so.
	 */
        ip_output(m, router_alert, &igmprt, 0, &imo);

        ++igmpstat.igps_snd_reports;
}
Example #6
0
/*
 * Lesser-used path for M_PREPEND:
 * allocate new mbuf to prepend to chain,
 * copy junk along.
 */
struct mbuf *
m_prepend(struct mbuf *m, int len, int how)
{
	struct mbuf *mn;

	if (len > MHLEN)
		panic("mbuf prepend length too big");

	MGET(mn, how, m->m_type);
	if (mn == NULL) {
		m_freem(m);
		return (NULL);
	}
	if (m->m_flags & M_PKTHDR)
		M_MOVE_PKTHDR(mn, m);
	mn->m_next = m;
	m = mn;
	MH_ALIGN(m, len);
	m->m_len = len;
	return (m);
}
Example #7
0
struct mbuf*
ieee80211_getmgtframe(uint8_t** frm, int headroom, int pktlen)
{
	struct mbuf* m;
	u_int len;

	len = roundup2(headroom + pktlen, 4);
	KASSERT(len <= MCLBYTES, ("802.11 mgt frame too large: %u", len));
	if (len < MINCLSIZE) {
		m = m_gethdr(M_NOWAIT, MT_DATA);
		if (m != NULL)
			MH_ALIGN(m, len);
	} else {
		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
		if (m != NULL)
			MC_ALIGN(m, len);
	}
	if (m != NULL) {
		m->m_data += headroom;
		*frm = (uint8_t*)m->m_data;
	}
	return m;
}
Example #8
0
int
isc_sendPDU(isc_session_t *sp, pduq_t *pq)
{
     struct mbuf *mh, **mp;
     pdu_t		*pp = &pq->pdu;
     int		len, error;

     debug_called(8);
     /*
      | mbuf for the iSCSI header
      */
     MGETHDR(mh, MB_TRYWAIT, MT_DATA);
     mh->m_len = mh->m_pkthdr.len = sizeof(union ipdu_u);
     mh->m_pkthdr.rcvif = NULL;
     MH_ALIGN(mh, sizeof(union ipdu_u));
     bcopy(&pp->ipdu, mh->m_data, sizeof(union ipdu_u));
     mh->m_next = NULL;

     if(sp->hdrDigest)
	  pq->pdu.hdr_dig = sp->hdrDigest(&pp->ipdu, sizeof(union ipdu_u), 0);
     if(pp->ahs_len) {
          /*
	   | Add any AHS to the iSCSI hdr mbuf
           |  XXX Assert: (mh->m_pkthdr.len + pp->ahs_len) < MHLEN
	   */
          bcopy(pp->ahs, (mh->m_data + mh->m_len), pp->ahs_len);
          mh->m_len += pp->ahs_len;
          mh->m_pkthdr.len += pp->ahs_len;

	  if(sp->hdrDigest)
	       pq->pdu.hdr_dig = sp->hdrDigest(&pp->ahs, pp->ahs_len, pq->pdu.hdr_dig);
     }
     if(sp->hdrDigest) {
	  debug(2, "hdr_dig=%x", pq->pdu.hdr_dig);
          /*
	   | Add header digest to the iSCSI hdr mbuf
	   | XXX Assert: (mh->m_pkthdr.len + 4) < MHLEN
	   */
          bcopy(&pp->hdr_dig, (mh->m_data + mh->m_len), sizeof(int));
          mh->m_len += sizeof(int);
          mh->m_pkthdr.len += sizeof(int);
     }
     mp = &mh->m_next;
     if(pq->pdu.ds) {
          struct mbuf   *md;
          int           off = 0;

          len = pp->ds_len;
	  while(len & 03) // the specs say it must be int alligned
	       len++;
          while(len > 0) {
                int       l;

	       MGET(md, MB_TRYWAIT, MT_DATA);
	       pq->refcnt++;

                l = min(MCLBYTES, len);
	       debug(5, "setting ext_free(arg=%p len/l=%d/%d)", pq->buf, len, l);
	       md->m_ext.ext_buf = pq->buf;
	       md->m_ext.ext_free = ext_free;
	       md->m_ext.ext_ref = ext_ref;
	       md->m_ext.ext_arg = pq;
	       md->m_ext.ext_size = l;
	       md->m_flags |= M_EXT;
	       md->m_data = pp->ds + off;
	       md->m_len = l;
	       md->m_next = NULL;
	       mh->m_pkthdr.len += l;
	       *mp = md;
	       mp = &md->m_next;
	       len -= l;
	       off += l;
          }
     }
     if(sp->dataDigest) {
          struct mbuf   *me;

	  pp->ds_dig = sp->dataDigest(pp->ds, pp->ds_len, 0);

	  MGET(me, MB_TRYWAIT, MT_DATA);
          me->m_len = sizeof(int);
          MH_ALIGN(mh, sizeof(int));
          bcopy(&pp->ds_dig, me->m_data, sizeof(int));
          me->m_next = NULL;
          mh->m_pkthdr.len += sizeof(int);
          *mp = me;
     }
     if((error = sosend(sp->soc, NULL, NULL, mh, 0, 0, curthread)) != 0) {
	  sdebug(3, "error=%d", error);
	  return error;
     }
     sp->stats.nsent++;
     getmicrouptime(&sp->stats.t_sent);
     return 0;
}
Example #9
0
/*
 * Construct and reliably send a netdump packet.  May fail from a resource
 * shortage or extreme number of unacknowledged retransmissions.  Wait for
 * an acknowledgement before returning.  Splits packets into chunks small
 * enough to be sent without fragmentation (looks up the interface MTU)
 *
 * Parameters:
 *	type	netdump packet type (HERALD, FINISHED, or VMCORE)
 *	offset	vmcore data offset (bytes)
 *	data	vmcore data
 *	datalen	vmcore data size (bytes)
 *
 * Returns:
 *	int see errno.h, 0 for success
 */
static int
netdump_send(uint32_t type, off_t offset, unsigned char *data, uint32_t datalen)
{
	struct netdump_msg_hdr *nd_msg_hdr;
	struct mbuf *m, *m2;
	uint64_t want_acks;
	uint32_t i, pktlen, sent_so_far;
	int retries, polls, error;

	want_acks = 0;
	rcvd_acks = 0;
	retries = 0;

	MPASS(nd_ifp != NULL);

retransmit:
	/* Chunks can be too big to fit in packets. */
	for (i = sent_so_far = 0; sent_so_far < datalen ||
	    (i == 0 && datalen == 0); i++) {
		pktlen = datalen - sent_so_far;

		/* First bound: the packet structure. */
		pktlen = min(pktlen, NETDUMP_DATASIZE);

		/* Second bound: the interface MTU (assume no IP options). */
		pktlen = min(pktlen, nd_ifp->if_mtu - sizeof(struct udpiphdr) -
		    sizeof(struct netdump_msg_hdr));

		/*
		 * Check if it is retransmitting and this has been ACKed
		 * already.
		 */
		if ((rcvd_acks & (1 << i)) != 0) {
			sent_so_far += pktlen;
			continue;
		}

		/*
		 * Get and fill a header mbuf, then chain data as an extended
		 * mbuf.
		 */
		m = m_gethdr(M_NOWAIT, MT_DATA);
		if (m == NULL) {
			printf("netdump_send: Out of mbufs\n");
			return (ENOBUFS);
		}
		m->m_len = sizeof(struct netdump_msg_hdr);
		m->m_pkthdr.len = sizeof(struct netdump_msg_hdr);
		MH_ALIGN(m, sizeof(struct netdump_msg_hdr));
		nd_msg_hdr = mtod(m, struct netdump_msg_hdr *);
		nd_msg_hdr->mh_seqno = htonl(nd_seqno + i);
		nd_msg_hdr->mh_type = htonl(type);
		nd_msg_hdr->mh_offset = htobe64(offset + sent_so_far);
		nd_msg_hdr->mh_len = htonl(pktlen);
		nd_msg_hdr->mh__pad = 0;

		if (pktlen != 0) {
			m2 = m_get(M_NOWAIT, MT_DATA);
			if (m2 == NULL) {
				m_freem(m);
				printf("netdump_send: Out of mbufs\n");
				return (ENOBUFS);
			}
			MEXTADD(m2, data + sent_so_far, pktlen,
			    netdump_mbuf_free, NULL, NULL, 0, EXT_DISPOSABLE);
			m2->m_len = pktlen;

			m_cat(m, m2);
			m->m_pkthdr.len += pktlen;
		}
		error = netdump_udp_output(m);
		if (error != 0)
			return (error);

		/* Note that we're waiting for this packet in the bitfield. */
		want_acks |= (1 << i);
		sent_so_far += pktlen;
	}
	if (i >= NETDUMP_MAX_IN_FLIGHT)
		printf("Warning: Sent more than %d packets (%d). "
		    "Acknowledgements will fail unless the size of "
		    "rcvd_acks/want_acks is increased.\n",
		    NETDUMP_MAX_IN_FLIGHT, i);

	/*
	 * Wait for acks.  A *real* window would speed things up considerably.
	 */
	polls = 0;
	while (rcvd_acks != want_acks) {
		if (polls++ > nd_polls) {
			if (retries++ > nd_retries)
				return (ETIMEDOUT);
			printf(". ");
			goto retransmit;
		}
		netdump_network_poll();
		DELAY(500);
	}
	nd_seqno += i;
	return (0);
}
Example #10
0
File: aarp.c Project: MarginC/kame
static void
aarpwhohas( struct arpcom *ac, struct sockaddr_at *sat )
{
    struct mbuf		*m;
    struct ether_header	*eh;
    struct ether_aarp	*ea;
    struct at_ifaddr	*aa;
    struct llc		*llc;
    struct sockaddr	sa;

    if (( m = m_gethdr( M_DONTWAIT, MT_DATA )) == NULL ) {
	return;
    }
#ifdef MAC
    mac_create_mbuf_linklayer(&ac->ac_if, m);
#endif
    m->m_len = sizeof( *ea );
    m->m_pkthdr.len = sizeof( *ea );
    MH_ALIGN( m, sizeof( *ea ));

    ea = mtod( m, struct ether_aarp *);
    bzero((caddr_t)ea, sizeof( *ea ));

    ea->aarp_hrd = htons( AARPHRD_ETHER );
    ea->aarp_pro = htons( ETHERTYPE_AT );
    ea->aarp_hln = sizeof( ea->aarp_sha );
    ea->aarp_pln = sizeof( ea->aarp_spu );
    ea->aarp_op = htons( AARPOP_REQUEST );
    bcopy((caddr_t)ac->ac_enaddr, (caddr_t)ea->aarp_sha,
	    sizeof( ea->aarp_sha ));

    /*
     * We need to check whether the output ethernet type should
     * be phase 1 or 2. We have the interface that we'll be sending
     * the aarp out. We need to find an AppleTalk network on that
     * interface with the same address as we're looking for. If the
     * net is phase 2, generate an 802.2 and SNAP header.
     */
    if ((aa = at_ifawithnet( sat )) == NULL) {
	m_freem( m );
	return;
    }

    eh = (struct ether_header *)sa.sa_data;

    if ( aa->aa_flags & AFA_PHASE2 ) {
	bcopy((caddr_t)atmulticastaddr, (caddr_t)eh->ether_dhost,
		sizeof( eh->ether_dhost ));
	eh->ether_type = htons(sizeof(struct llc) + sizeof(struct ether_aarp));
	M_PREPEND( m, sizeof( struct llc ), M_TRYWAIT );
	llc = mtod( m, struct llc *);
	llc->llc_dsap = llc->llc_ssap = LLC_SNAP_LSAP;
	llc->llc_control = LLC_UI;
	bcopy( aarp_org_code, llc->llc_org_code, sizeof( aarp_org_code ));
	llc->llc_ether_type = htons( ETHERTYPE_AARP );

	bcopy( &AA_SAT( aa )->sat_addr.s_net, ea->aarp_spnet,
	       sizeof( ea->aarp_spnet ));
	bcopy( &sat->sat_addr.s_net, ea->aarp_tpnet,
	       sizeof( ea->aarp_tpnet ));
	ea->aarp_spnode = AA_SAT( aa )->sat_addr.s_node;
	ea->aarp_tpnode = sat->sat_addr.s_node;
    } else {
Example #11
0
/*
 * Slightly changed version of sosend()
 */
static int
kttcp_sosend(struct socket *so, unsigned long long slen,
	     unsigned long long *done, struct lwp *l, int flags)
{
	struct mbuf **mp, *m, *top;
	long space, len, mlen;
	int error, dontroute, atomic;
	long long resid;

	atomic = sosendallatonce(so);
	resid = slen;
	top = NULL;
	/*
	 * In theory resid should be unsigned.
	 * However, space must be signed, as it might be less than 0
	 * if we over-committed, and we must use a signed comparison
	 * of space and resid.  On the other hand, a negative resid
	 * causes us to loop sending 0-length segments to the protocol.
	 */
	if (resid < 0) {
		error = EINVAL;
		goto out;
	}
	dontroute =
	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
	    (so->so_proto->pr_flags & PR_ATOMIC);
	l->l_ru.ru_msgsnd++;
#define	snderr(errno)	{ error = errno; goto release; }
	solock(so);
 restart:
	if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
		goto out;
	do {
		if (so->so_state & SS_CANTSENDMORE)
			snderr(EPIPE);
		if (so->so_error) {
			error = so->so_error;
			so->so_error = 0;
			goto release;
		}
		if ((so->so_state & SS_ISCONNECTED) == 0) {
			if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
				snderr(ENOTCONN);
			} else {
				snderr(EDESTADDRREQ);
			}
		}
		space = sbspace(&so->so_snd);
		if (flags & MSG_OOB)
			space += 1024;
		if ((atomic && resid > so->so_snd.sb_hiwat))
			snderr(EMSGSIZE);
		if (space < resid && (atomic || space < so->so_snd.sb_lowat)) {
			if (so->so_state & SS_NBIO)
				snderr(EWOULDBLOCK);
			SBLASTRECORDCHK(&so->so_rcv,
			    "kttcp_soreceive sbwait 1");
			SBLASTMBUFCHK(&so->so_rcv,
			    "kttcp_soreceive sbwait 1");
			sbunlock(&so->so_snd);
			error = sbwait(&so->so_snd);
			if (error)
				goto out;
			goto restart;
		}
		mp = &top;
		do {
			sounlock(so);
			do {
				if (top == 0) {
					m = m_gethdr(M_WAIT, MT_DATA);
					mlen = MHLEN;
					m->m_pkthdr.len = 0;
					m->m_pkthdr.rcvif = NULL;
				} else {
					m = m_get(M_WAIT, MT_DATA);
					mlen = MLEN;
				}
				if (resid >= MINCLSIZE && space >= MCLBYTES) {
					m_clget(m, M_WAIT);
					if ((m->m_flags & M_EXT) == 0)
						goto nopages;
					mlen = MCLBYTES;
#ifdef	MAPPED_MBUFS
					len = lmin(MCLBYTES, resid);
#else
					if (atomic && top == 0) {
						len = lmin(MCLBYTES - max_hdr,
						    resid);
						m->m_data += max_hdr;
					} else
						len = lmin(MCLBYTES, resid);
#endif
					space -= len;
				} else {
nopages:
					len = lmin(lmin(mlen, resid), space);
					space -= len;
					/*
					 * For datagram protocols, leave room
					 * for protocol headers in first mbuf.
					 */
					if (atomic && top == 0 && len < mlen)
						MH_ALIGN(m, len);
				}
				resid -= len;
				m->m_len = len;
				*mp = m;
				top->m_pkthdr.len += len;
				if (error)
					goto release;
				mp = &m->m_next;
				if (resid <= 0) {
					if (flags & MSG_EOR)
						top->m_flags |= M_EOR;
					break;
				}
			} while (space > 0 && atomic);
			solock(so);

			if (so->so_state & SS_CANTSENDMORE)
				snderr(EPIPE);
			if (dontroute)
				so->so_options |= SO_DONTROUTE;
			if (resid > 0)
				so->so_state |= SS_MORETOCOME;
			if (flags & MSG_OOB)
				error = (*so->so_proto->pr_usrreqs->pr_sendoob)(so,
				    top, NULL);
			else
				error = (*so->so_proto->pr_usrreqs->pr_send)(so,
				    top, NULL, NULL, l);
			if (dontroute)
				so->so_options &= ~SO_DONTROUTE;
			if (resid > 0)
				so->so_state &= ~SS_MORETOCOME;
			top = 0;
			mp = &top;
			if (error)
				goto release;
		} while (resid && space > 0);
	} while (resid);

 release:
	sbunlock(&so->so_snd);
 out:
 	sounlock(so);
	if (top)
		m_freem(top);
	*done = slen - resid;
#if 0
	printf("sosend: error %d slen %llu resid %lld\n", error, slen, resid);
#endif
	return (error);
}
Example #12
0
/*
 * Send on a socket.
 * If send must go all at once and message is larger than
 * send buffering, then hard error.
 * Lock against other senders.
 * If must go all at once and not enough room now, then
 * inform user that this would block and do nothing.
 * Otherwise, if nonblocking, send as much as possible.
 * The data to be sent is described by "uio" if nonzero,
 * otherwise by the mbuf chain "top" (which must be null
 * if uio is not).  Data provided in mbuf chain must be small
 * enough to send all at once.
 *
 * Returns nonzero on error, timeout or signal; callers
 * must check for short counts if EINTR/ERESTART are returned.
 * Data and control buffers are freed on return.
 */
int
sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
	struct mbuf *top, struct mbuf *control, int flags,
	struct thread *td)
{
	struct mbuf **mp;
	struct mbuf *m;
	size_t resid;
	int space, len;
	int clen = 0, error, dontroute, mlen;
	int atomic = sosendallatonce(so) || top;
	int pru_flags;

	if (uio) {
		resid = uio->uio_resid;
	} else {
		resid = (size_t)top->m_pkthdr.len;
#ifdef INVARIANTS
		len = 0;
		for (m = top; m; m = m->m_next)
			len += m->m_len;
		KKASSERT(top->m_pkthdr.len == len);
#endif
	}

	/*
	 * WARNING!  resid is unsigned, space and len are signed.  space
	 * 	     can wind up negative if the sockbuf is overcommitted.
	 *
	 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
	 * type sockets since that's an error.
	 */
	if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) {
		error = EINVAL;
		goto out;
	}

	dontroute =
	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
	    (so->so_proto->pr_flags & PR_ATOMIC);
	if (td->td_lwp != NULL)
		td->td_lwp->lwp_ru.ru_msgsnd++;
	if (control)
		clen = control->m_len;
#define	gotoerr(errcode)	{ error = errcode; goto release; }

restart:
	error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags));
	if (error)
		goto out;

	do {
		if (so->so_state & SS_CANTSENDMORE)
			gotoerr(EPIPE);
		if (so->so_error) {
			error = so->so_error;
			so->so_error = 0;
			goto release;
		}
		if ((so->so_state & SS_ISCONNECTED) == 0) {
			/*
			 * `sendto' and `sendmsg' is allowed on a connection-
			 * based socket if it supports implied connect.
			 * Return ENOTCONN if not connected and no address is
			 * supplied.
			 */
			if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
			    (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
				    !(resid == 0 && clen != 0))
					gotoerr(ENOTCONN);
			} else if (addr == 0)
			    gotoerr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
				   ENOTCONN : EDESTADDRREQ);
		}
		if ((atomic && resid > so->so_snd.ssb_hiwat) ||
		    clen > so->so_snd.ssb_hiwat) {
			gotoerr(EMSGSIZE);
		}
		space = ssb_space(&so->so_snd);
		if (flags & MSG_OOB)
			space += 1024;
		if ((space < 0 || (size_t)space < resid + clen) && uio &&
		    (atomic || space < so->so_snd.ssb_lowat || space < clen)) {
			if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT))
				gotoerr(EWOULDBLOCK);
			ssb_unlock(&so->so_snd);
			error = ssb_wait(&so->so_snd);
			if (error)
				goto out;
			goto restart;
		}
		mp = &top;
		space -= clen;
		do {
		    if (uio == NULL) {
			/*
			 * Data is prepackaged in "top".
			 */
			resid = 0;
			if (flags & MSG_EOR)
				top->m_flags |= M_EOR;
		    } else do {
			if (resid > INT_MAX)
				resid = INT_MAX;
			m = m_getl((int)resid, MB_WAIT, MT_DATA,
				   top == NULL ? M_PKTHDR : 0, &mlen);
			if (top == NULL) {
				m->m_pkthdr.len = 0;
				m->m_pkthdr.rcvif = NULL;
			}
			len = imin((int)szmin(mlen, resid), space);
			if (resid < MINCLSIZE) {
				/*
				 * For datagram protocols, leave room
				 * for protocol headers in first mbuf.
				 */
				if (atomic && top == 0 && len < mlen)
					MH_ALIGN(m, len);
			}
			space -= len;
			error = uiomove(mtod(m, caddr_t), (size_t)len, uio);
			resid = uio->uio_resid;
			m->m_len = len;
			*mp = m;
			top->m_pkthdr.len += len;
			if (error)
				goto release;
			mp = &m->m_next;
			if (resid == 0) {
				if (flags & MSG_EOR)
					top->m_flags |= M_EOR;
				break;
			}
		    } while (space > 0 && atomic);
		    if (dontroute)
			    so->so_options |= SO_DONTROUTE;
		    if (flags & MSG_OOB) {
		    	    pru_flags = PRUS_OOB;
		    } else if ((flags & MSG_EOF) &&
		    	       (so->so_proto->pr_flags & PR_IMPLOPCL) &&
			       (resid == 0)) {
			    /*
			     * If the user set MSG_EOF, the protocol
			     * understands this flag and nothing left to
			     * send then use PRU_SEND_EOF instead of PRU_SEND.
			     */
		    	    pru_flags = PRUS_EOF;
		    } else if (resid > 0 && space > 0) {
			    /* If there is more to send, set PRUS_MORETOCOME */
		    	    pru_flags = PRUS_MORETOCOME;
		    } else {
		    	    pru_flags = 0;
		    }
		    /*
		     * XXX all the SS_CANTSENDMORE checks previously
		     * done could be out of date.  We could have recieved
		     * a reset packet in an interrupt or maybe we slept
		     * while doing page faults in uiomove() etc. We could
		     * probably recheck again inside the splnet() protection
		     * here, but there are probably other places that this
		     * also happens.  We must rethink this.
		     */
		    error = so_pru_send(so, pru_flags, top, addr, control, td);
		    if (dontroute)
			    so->so_options &= ~SO_DONTROUTE;
		    clen = 0;
		    control = 0;
		    top = NULL;
		    mp = &top;
		    if (error)
			    goto release;
		} while (resid && space > 0);
	} while (resid);

release:
	ssb_unlock(&so->so_snd);
out:
	if (top)
		m_freem(top);
	if (control)
		m_freem(control);
	return (error);
}