static struct mbuf * sockargs (void * arg, int arglen, int type) { struct mbuf * m; LOCK_NET_RESOURCE(NET_RESID); /* protect mfreeq */ m = m_getwithdata (type, arglen); UNLOCK_NET_RESOURCE(NET_RESID); if (m == NULL) return NULL; m->m_len = arglen; MEMCPY(mtod (m, char *), arg, arglen); return m; }
int tcp_output(struct tcpcb * tp) { struct socket * so = tp->t_inpcb->inp_socket; int len; long win; int off, flags, error; struct mbuf * m; struct tcpiphdr * ti; unsigned optlen = 0; int idle, sendalot; struct mbuf * sendm; /* mbuf which contains data to send */ struct mbuf * tcp_mbuf; /* mbuf containing TCP header */ int bufoff; /* offset of data in sendm->m_data */ #ifdef TCP_SACK int sack_resend; int sack_hole = 0; /* next sack hole to fill */ if(tp->t_flags & TF_SACKREPLY) { /* we are resending based on a received SACK header */ sack_resend = TRUE; tp->t_flags &= ~TF_SACKREPLY; /* clear flag */ } else sack_resend = FALSE; #endif /* TCP_SACK */ /* * Determine length of data that should be transmitted, * and flags that will be used. * If there is some data or critical controls (SYN, RST) * to send, then transmit; otherwise, investigate further. */ idle = (tp->snd_max == tp->snd_una); again: sendalot = 0; off = (int)(tp->snd_nxt - tp->snd_una); win = (long)tp->snd_wnd; /* set basic send window */ if (win > (long)tp->snd_cwnd) /* see if we need congestion control */ { win = (int)(tp->snd_cwnd & ~(ALIGN_TYPE-1)); /* keep data aligned */ } /* * If in persist timeout with window of 0, send 1 byte. * Otherwise, if window is small but nonzero * and timer expired, we will send what we can * and go to transmit state. */ if (tp->t_force) { if (win == 0) win = 1; else { tp->t_timer[TCPT_PERSIST] = 0; tp->t_rxtshift = 0; } } #ifdef TCP_SACK /* See if we need to adjust the offset for a sack resend */ if(sack_resend) { off = (int)(tp->sack_hole_start[sack_hole] - tp->snd_una); /* if this hole's already been acked then punt and move to next hole */ if(off < 0) { /* clear out the acked hole */ tp->sack_hole_start[sack_hole] = tp->sack_hole_end[sack_hole] = 0; /* see if we're done with SACK hole list (2 tests) */ if(++sack_hole >= SACK_BLOCKS) return 0; if(tp->sack_hole_start[sack_hole] == tp->sack_hole_end[sack_hole]) return 0; goto again; } tp->snd_nxt = tp->sack_hole_start[sack_hole]; len = (int)(tp->sack_hole_end[sack_hole] - tp->sack_hole_start[sack_hole]); len = (int)MIN(len, (int)win); } else #endif /* TCP_SACK */ { /* set length of packets which are not sack resends */ len = (int)MIN(so->so_snd.sb_cc, (unsigned)win) - off; } flags = tcp_outflags[tp->t_state]; /* See if we need to build TCP options field. This test should be fast. */ #if (defined(TCP_TIMESTAMP) | defined(TCP_SACK)) if((flags & TH_SYN) || /* !!!??? (so->so_options & SO_TIMESTAMP) || */ (tp->t_flags & TF_SACKNOW) ) { optlen = bld_options(tp, &tcp_optionbuf[optlen], flags, so); } #else /* If other options not defined this build then don't bother to call bld_options() except * on SYN packets */ if(flags & TH_SYN) { optlen = bld_options(tp, &tcp_optionbuf[optlen], flags, so); } #endif if (len < 0) { /* * If FIN has been sent but not acked, * but we haven't been called to retransmit, * len will be -1. Otherwise, window shrank * after we sent into it. If window shrank to 0, * cancel pending retransmit and pull snd_nxt * back to (closed) window. We will enter persist * state below. If the window didn't close completely, * just wait for an ACK. */ len = 0; if (win == 0) { tp->t_timer[TCPT_REXMT] = 0; tp->snd_nxt = tp->snd_una; } } if (len > (int)tp->t_maxseg) { len = tp->t_maxseg; sendalot = 1; } #ifdef IP_V4 #ifdef IP_PMTU { int pmtu = tp->t_inpcb->inp_pmtu - 40; if (len > pmtu) { len = pmtu - 40; sendalot = 1; } } #endif /* IP_PMTU */ /* We don't need a pmtu test for IPv6. V6 code limits t_maxseg to * the Path MTU, so the test above the v4 ifdef above covers us. */ #endif /* IP_V4 */ if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) flags &= ~TH_FIN; win = (long)(sbspace(&so->so_rcv)); /* * If our state indicates that FIN should be sent * and we have not yet done so, or we're retransmitting the FIN, * then we need to send. */ if ((flags & TH_FIN) && (so->so_snd.sb_cc == 0) && ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) { goto send; } /* * Send if we owe peer an ACK. */ if (tp->t_flags & TF_ACKNOW) goto send; if (flags & (TH_SYN|TH_RST)) goto send; if (SEQ_GT(tp->snd_up, tp->snd_una)) goto send; /* * Sender silly window avoidance. If connection is idle * and can send all data, a maximum segment, * at least a maximum default-size segment do it, * or are forced, do it; otherwise don't bother. * If peer's buffer is tiny, then send * when window is at least half open. * If retransmitting (possibly after persist timer forced us * to send into a small window), then must resend. */ if (len) { if (len == (int)tp->t_maxseg) goto send; if ((idle || tp->t_flags & TF_NODELAY) && len + off >= (int)so->so_snd.sb_cc) { goto send; } if (tp->t_force) goto send; if (len >= (int)(tp->max_sndwnd / 2)) goto send; if (SEQ_LT(tp->snd_nxt, tp->snd_max)) goto send; } /* * Compare available window to amount of window * known to peer (as advertised window less * next expected input). If the difference is at least two * max size segments or at least 35% of the maximum possible * window, then want to send a window update to peer. */ if (win > 0) { int adv = (int)win - (int)(tp->rcv_adv - tp->rcv_nxt); if (so->so_rcv.sb_cc == 0 && adv >= (int)(tp->t_maxseg * 2)) goto send; if (100 * (u_int)adv / so->so_rcv.sb_hiwat >= 35) goto send; } /* * TCP window updates are not reliable, rather a polling protocol * using ``persist'' packets is used to insure receipt of window * updates. The three ``states'' for the output side are: * idle not doing retransmits or persists * persisting to move a small or zero window * (re)transmitting and thereby not persisting * * tp->t_timer[TCPT_PERSIST] * is set when we are in persist state. * tp->t_force * is set when we are called to send a persist packet. * tp->t_timer[TCPT_REXMT] * is set when we are retransmitting * The output side is idle when both timers are zero. * * If send window is too small, there is data to transmit, and no * retransmit or persist is pending, then go to persist state. * If nothing happens soon, send when timer expires: * if window is nonzero, transmit what we can, * otherwise force out a byte. */ if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && tp->t_timer[TCPT_PERSIST] == 0) { tp->t_rxtshift = 0; tcp_setpersist(tp); } /* * No reason to send a segment, just return. */ return (0); send: ENTER_CRIT_SECTION(tp); /* Limit send length to the current buffer so as to * avoid doing the "mbuf shuffle" in m_copy(). */ bufoff = off; sendm = so->so_snd.sb_mb; if (len) { /* find mbuf containing data to send (at "off") */ while (sendm) /* loop through socket send list */ { bufoff -= sendm->m_len; if (bufoff < 0) /* if off is in this buffer, break */ break; sendm = sendm->m_next; } if (!sendm) { dtrap(); /* shouldn't happen */ } bufoff += sendm->m_len; /* index to next data to send in msend */ /* if socket has multiple unsent mbufs, set flag for send to loop */ if ((sendm->m_next) && (len > (int)sendm->m_len)) { flags &= ~TH_FIN; /* don't FIN on segment prior to last */ sendalot = 1; /* set to send more segments */ } if((flags & TH_FIN) && (so->so_snd.sb_cc > (unsigned)len)) { /* This can happen on slow links (PPP) which retry the last * segment - the one with the FIN bit attached to data. */ flags &= ~TH_FIN; /* don't FIN on segment prior to last */ } /* only send the rest of msend */ len = min(len, (int)sendm->m_len); /* if we're not sending starting at sendm->m_data (in which * case bufoff != 0), then we will copy the data; else we would * write IP/TCP headers over sent but un-ack'ed data in sendm. * Similarly, if sendm->m_data is not aligned with respect to * sendm->m_base and ALIGN_TYPE, we will copy the data to * ensure that it (and the then-prepended IP/TCP headers) will * be aligned according to ALIGN_TYPE. */ if ((bufoff != 0) || /* data not front aligned in send mbuf? */ (((sendm->m_data - sendm->m_base) & (ALIGN_TYPE - 1)) != 0)) { len = min(len, (int)(sendm->m_len - bufoff)); /* limit len again */ /* One more test - if this data is not aligned with the front * of the m_data buffer then we can't use it in place, else we * might write the IP/TCP header over data that has not yet * been acked. In this case we must make sure our send * fits into a little buffer and send what we can. */ if ((len > (int)(lilbufsiz - HDRSLEN)) && /* length is bigger the small buffer? */ (bigfreeq.q_len < 2)) /* and we are low on big buffers */ { len = lilbufsiz - HDRSLEN; } } } /* if send data is sufficiently aligned in packet, prepend TCP/IP header * in the space provided. */ if (len && (bufoff == 0) && (sendm->pkt->inuse == 1) && (((sendm->m_data - sendm->m_base) & (ALIGN_TYPE - 1)) == 0) && (optlen == 0)) { /* get an empty mbuf to "clone" the data */ m = m_getnbuf(MT_TXDATA, 0); if (!m) { EXIT_CRIT_SECTION(tp); return (ENOBUFS); } m->pkt = sendm->pkt; /* copy packet location in new mbuf */ m->pkt->inuse++; /* bump packet's use count */ m->m_base = sendm->m_base; /* clone mbuf members */ m->m_memsz = sendm->m_memsz; m->m_len = len + TCPIPHDRSZ; /* adjust clone for header */ m->m_data = sendm->m_data - TCPIPHDRSZ; } else /* either no data or data is not front aligned in mbuf */ { /* Grab a header mbuf, attaching a copy of data to be * transmitted, and initialize the header from * the template for sends on this connection. */ m = m_getwithdata (MT_HEADER, IFNETHDR_SIZE + TCPIPHDRSZ); if (m ==(struct mbuf *)NULL) { EXIT_CRIT_SECTION(tp); return ENOBUFS; } m->m_len = TCPIPHDRSZ; m->m_data += IFNETHDR_SIZE;/* Move this to sizeof tcpip hdr leave*/ /* 14 bytes for ethernet header */ if (len) /* attach any data to send */ { m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len); if (m->m_next == 0) { m_freem(m); EXIT_CRIT_SECTION(tp); return ENOBUFS; } } } EXIT_CRIT_SECTION(tp); if (len) { if (tp->t_force && len == 1) tcpstat.tcps_sndprobe++; else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) { tcpstat.tcps_sndrexmitpack++; tcpstat.tcps_sndrexmitbyte += len; #ifdef TCP_SACK if(sack_resend) tcpstat.tcps_sackresend++; #endif } else { tcpstat.tcps_sndpack++; tcpstat.tcps_sndbyte += len; } } else if (tp->t_flags & TF_ACKNOW) { tcpstat.tcps_sndacks++; } else if (flags & (TH_SYN|TH_FIN|TH_RST)) tcpstat.tcps_sndctrl++; else if (SEQ_GT(tp->snd_up, tp->snd_una)) tcpstat.tcps_sndurg++; else tcpstat.tcps_sndwinup++; ti = (struct tcpiphdr *)(m->m_data+sizeof(struct ip)-sizeof(struct ipovly)); if ((char *)ti < m->pkt->nb_buff) { panic("tcp_out- packet ptr underflow\n"); } tcp_mbuf = m; /* flag TCP header mbuf */ #ifdef IP_V6 /* Dual mode code */ if(so->so_domain == AF_INET6) { m = mbuf_prepend(m, sizeof(struct ipv6)); if(m == NULL) { /* this can happen when we run out of mbufs or pkt buffers * That is, mfreeq is empty or (lilfreeq, bigfreeq) are empty. * One solution is to find out which one is getting full and * then increase them. */ dtrap(); /* This is really rare... */ m_freem(tcp_mbuf); /* Free TCP/data chain */ return ENOBUFS; } /* strip overlay from front of TCP header */ tcp_mbuf->m_data += sizeof(struct ipovly); tcp_mbuf->m_len -= sizeof(struct ipovly); } #endif /* end IP_V6 */ if (tp->t_template == 0) panic("tcp_output"); MEMCPY((char*)ti, (char*)tp->t_template, sizeof(struct tcpiphdr)); /* * Fill in fields, remembering maximum advertised * window for use in delaying messages about window sizes. * If resending a FIN, be sure not to use a new sequence number. */ if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && tp->snd_nxt == tp->snd_max) { tp->snd_nxt--; } ti->ti_seq = htonl(tp->snd_nxt); ti->ti_ack = htonl(tp->rcv_nxt); /* * If we're sending a SYN, check the IP address of the interface * that we will (likely) use to send the IP datagram -- if it's * changed from what is in the template (as it might if this is * a retransmission, and the original SYN caused PPP to start * bringing the interface up, and PPP has got a new IP address * via IPCP), update the template and the inpcb with the new * address. */ if (flags & TH_SYN) { struct inpcb * inp; inp = (struct inpcb *)so->so_pcb; switch(so->so_domain) { #ifdef IP_V4 case AF_INET: { ip_addr src; #ifdef INCLUDE_PPP if(((flags & TH_ACK) == 0) && /* SYN only, not SYN/ACK */ (inp->ifp) && /* Make sure we have iface */ (inp->ifp->mib.ifType == PPP)) /* only PPP type */ { dtrap(); /* remove after confirmed to work in PPP */ src = ip_mymach(ti->ti_dst.s_addr); if (src != ti->ti_src.s_addr) { ti->ti_src.s_addr = src; tp->t_template->ti_src.s_addr = src; tp->t_inpcb->inp_laddr.s_addr = src; } } #endif /* INCLUDE_PPP */ /* If this is a SYN (not a SYN/ACK) then set the pmtu */ if((flags & TH_ACK) == 0) { #ifdef IP_PMTU inp->inp_pmtu = pmtucache_get(inp->inp_faddr.s_addr); #else /* not compiled for pathmtu, guess based on iface */ { NET ifp; /* find iface for route. Pass "src" as nexthop return */ ifp = iproute(ti->ti_dst.s_addr, &src); if(ifp) inp->inp_pmtu = ifp->n_mtu - (ifp->n_lnh + 40); else inp->inp_pmtu = 580; /* Ugh. */ } #endif /* IP_PMTU */ } break; } #endif /* IP_V4 */ #ifdef IP_V6 case AF_INET6: { struct ip6_inaddr * local; local = ip6_myaddr(&tp->t_inpcb->ip6_faddr, inp->ifp); /* If we got a local address & it's not the one in the pcb, then * we assume it changed at the iface and fix it in the pcb. Unlike * v4, we don't have an IP header yet, not do we have a template * to worry about. */ if((local) && (!IP6EQ(&local->addr, &tp->t_inpcb->ip6_laddr))) { IP6CPY(&tp->t_inpcb->ip6_laddr, &local->addr); } /* If this is a SYN (not a SYN/ACK) then set the pmtu */ if((flags & TH_ACK) == 0) { inp->inp_pmtu = ip6_pmtulookup(&inp->ip6_laddr, inp->ifp); } break; } #endif /* IP_V6 */ default: dtrap(); /* bad domain setting */ } } /* fill in options if any are set */ if (optlen) { struct mbuf * mopt; mopt = m_getwithdata(MT_TXDATA, MAXOPTLEN); if (mopt == NULL) { m_freem(m); return (ENOBUFS); } /* insert options mbuf after after tmp_mbuf */ mopt->m_next = tcp_mbuf->m_next; tcp_mbuf->m_next = mopt; /* extend options to aligned address */ while(optlen & 0x03) tcp_optionbuf[optlen++] = TCPOPT_EOL; MEMCPY(mtod(mopt, char *), tcp_optionbuf, optlen); mopt->m_len = optlen; /* use portable macro to set tcp data offset bits */ SET_TH_OFF(ti->ti_t, ((sizeof (struct tcphdr) + optlen) >> 2)); } ti->ti_flags = (u_char)flags; /* * Calculate receive window. Don't shrink window, * but avoid silly window syndrome. */ if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg) win = 0; if (win < (long)(tp->rcv_adv - tp->rcv_nxt)) win = (long)(tp->rcv_adv - tp->rcv_nxt); /* do check for Iniche buffer limits -JB- */ if (bigfreeq.q_len == 0) /* If queue length is 0, set window to 0 */ { win = 0; } else if(win > (((long)bigfreeq.q_len - 1) * (long)bigbufsiz)) { win = ((long)bigfreeq.q_len - 1) * bigbufsiz; } #ifdef TCP_WIN_SCALE if(tp->t_flags & TF_WINSCALE) { ti->ti_win = htons((u_short)(win >> tp->rcv_wind_scale)); /* apply scale */ }
static int t_getname(long s, struct sockaddr * addr, int * addrlen, int opcode) { struct socket * so; struct mbuf * m; int err; so = LONG2SO(s); SOC_CHECK(so); so->so_error = 0; INET_TRACE (INETM_SOCKET, ("INET:get[sock|peer]name so %x\n", so)); if((opcode == PRU_PEERADDR) && (so->so_state & SS_ISCONNECTED) == 0) { so->so_error = ENOTCONN; return SOCKET_ERROR; } LOCK_NET_RESOURCE(NET_RESID); m = m_getwithdata (MT_SONAME, sizeof (struct sockaddr)); if (m == NULL) { so->so_error = ENOMEM; UNLOCK_NET_RESOURCE(NET_RESID); return SOCKET_ERROR; } so->so_req = opcode; if ((err = (*so->so_proto->pr_usrreq)(so, 0, m)) != 0) goto bad; #ifdef IP_V4 if(so->so_domain == AF_INET) { if(*addrlen < sizeof(struct sockaddr_in)) { dtrap(); /* programming error */ m_freem(m); UNLOCK_NET_RESOURCE(NET_RESID); return EINVAL; } MEMCPY(addr, m->m_data, sizeof(struct sockaddr_in)); *addrlen = sizeof(struct sockaddr_in); } #endif /* IP_V4 */ #ifdef IP_V6 if(so->so_domain == AF_INET6) { if(*addrlen < sizeof(struct sockaddr_in6)) { dtrap(); /* programming error */ m_freem(m); UNLOCK_NET_RESOURCE(NET_RESID); return EINVAL; } MEMCPY(addr, m->m_data, sizeof(struct sockaddr_in6)); *addrlen = sizeof(struct sockaddr_in6); } #endif /* IP_V6 */ bad: m_freem(m); UNLOCK_NET_RESOURCE(NET_RESID); if (err) { so->so_error = err; return SOCKET_ERROR; } return 0; }
long t_accept(long s, struct sockaddr * addr, int * addrlen) { #ifdef SOCKDEBUG char logbuf[10]; #endif struct socket * so; struct mbuf * nam; so = LONG2SO(s); SOC_CHECK(so); DOMAIN_CHECK(so, *addrlen); so->so_error = 0; INET_TRACE (INETM_SOCKET, ("INET:accept:so %x so_qlen %d so_state %x\n", so, so->so_qlen, so->so_state)); if ((so->so_options & SO_ACCEPTCONN) == 0) { so->so_error = EINVAL; #ifdef SOCKDEBUG sprintf(logbuf, "t_accept[%d]: %d", __LINE__, so->so_error); glog_with_type(LOG_TYPE_DEBUG, logbuf, 1); #endif return SOCKET_ERROR; } if ((so->so_state & SS_NBIO) && so->so_qlen == 0) { so->so_error = EWOULDBLOCK; #ifdef SOCKDEBUG sprintf(logbuf, "t_accept[%d]: %d", __LINE__, so->so_error); glog_with_type(LOG_TYPE_DEBUG, logbuf, 1); #endif return SOCKET_ERROR; } LOCK_NET_RESOURCE(NET_RESID); while (so->so_qlen == 0 && so->so_error == 0) { if (so->so_state & SS_CANTRCVMORE) { so->so_error = ECONNABORTED; UNLOCK_NET_RESOURCE(NET_RESID); return SOCKET_ERROR; } tcp_sleep ((char *)&so->so_timeo); } if (so->so_error) { #ifdef SOCKDEBUG sprintf(logbuf, "t_accept[%d]: %d", __LINE__, so->so_error); glog_with_type(LOG_TYPE_DEBUG, logbuf, 1); #endif UNLOCK_NET_RESOURCE(NET_RESID); return SOCKET_ERROR; } nam = m_getwithdata (MT_SONAME, sizeof (struct sockaddr)); if (nam == NULL) { UNLOCK_NET_RESOURCE(NET_RESID); so->so_error = ENOMEM; #ifdef SOCKDEBUG sprintf(logbuf, "t_accept[%d]: %d", __LINE__, so->so_error); glog_with_type(LOG_TYPE_DEBUG, logbuf, 1); #endif return SOCKET_ERROR; } { struct socket *aso = so->so_q; if (soqremque (aso, 1) == 0) panic("accept"); so = aso; } (void)soaccept (so, nam); #ifdef TRACE_DEBUG { struct sockaddr_in *sin; sin = mtod(nam, struct sockaddr_in *); INET_TRACE (INETM_SOCKET, ("INET:accept:done so %lx port %d addr %lx\n", so, sin->sin_port, sin->sin_addr.s_addr)); } #endif /* TRACE_INET */ /* return the addressing info in the passed structure */ if (addr != NULL) MEMCPY(addr, nam->m_data, *addrlen); m_freem (nam); UNLOCK_NET_RESOURCE(NET_RESID); SOC_RANGE(so); return SO2LONG(so); }
void tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, tcp_seq ack, tcp_seq seq, int flags, struct mbuf *ti_mbuf) { int tlen; /* tcp data len - 0 or 1 */ int domain; /* AF_INET or AF_INET6 */ int win = 0; /* window to use in sent packet */ struct mbuf *m; /* mbuf to send */ struct tcpiphdr *tmp_thdr; /* scratch */ if (tp) win = (int)sbspace(&tp->t_inpcb->inp_socket->so_rcv); /* Figure out of we can recycle the passed buffer or if we need a * new one. Construct the easy parts of the the TCP and IP headers. */ if (flags == 0) /* sending keepalive from timer */ { /* no flags == need a new buffer */ m = m_getwithdata (MT_HEADER, HDRSLEN); if (m == NULL) return; tlen = 1; /* Keepalives have one byte of data */ m->m_len = TCPIPHDRSZ + tlen; /* * Copy template contents into the mbuf and set ti to point * to the header structure in the mbuf. */ tmp_thdr = (struct tcpiphdr *)((char *)m->m_data + sizeof(struct ip) - sizeof(struct ipovly)); if ((char *)tmp_thdr < m->pkt->nb_buff) { panic("tcp_respond- packet ptr underflow\n"); } MEMCPY(tmp_thdr, ti, sizeof(struct tcpiphdr)); ti = tmp_thdr; flags = TH_ACK; domain = tp->t_inpcb->inp_socket->so_domain; } else /* Flag was passed (e.g. reset); recycle passed mbuf */ { m = ti_mbuf; /*dtom(ti);*/ if (m->pkt->type == IPTP) /* IPv4 packet */ domain = AF_INET; else domain = AF_INET6; M_FREEM(m->m_next); m->m_next = 0; tlen = 0; /* NO data */ m->m_len = TCPIPHDRSZ; xchg(ti->ti_dport, ti->ti_sport, u_short); if (m->pkt->type == IPTP) xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long); if (flags & TH_RST) /* count resets in MIB */ TCP_MIB_INC(tcpOutRsts); /* keep MIB stats */ } /* finish constructing the TCP header */ ti->ti_seq = htonl(seq); ti->ti_ack = htonl(ack); ti->ti_t.th_doff = 0x50; /* NetPort: init data offset bits */ ti->ti_flags = (u_char)flags; ti->ti_win = htons((u_short)win); ti->ti_urp = 0; ti->ti_t.th_sum = 0; /* Finish constructing IP header and send, based on IP type in use */ switch(domain) { #ifdef IP_V4 case AF_INET: { struct ip *pip; pip = (struct ip *)((char *)ti + sizeof(struct ipovly) - sizeof(struct ip)); m->pkt->nb_tlen = m->pkt->nb_plen = pip->ip_len = (unshort)(TCPIPHDRSZ + tlen); /* If our system's max. MAC header size is geater than the size * of the MAC header in the received packet then we need to * adjust the IP header offset to allow for this. Since the packets * are only headers they should always fit. */ if (pip >= (struct ip *)(m->pkt->nb_buff + MaxLnh)) { /* headers will fit, just set pointer */ m->m_data = m->pkt->nb_prot = (char *)pip; } else /* MAC may not fit, adjust pointer and move headers back */ { m->m_data = m->pkt->nb_prot = m->pkt->nb_buff + MaxLnh; /* new ptr */ MEMMOVE(m->m_data, pip, TCPIPHDRSZ); /* move back tcp/ip headers */ } #ifdef DOS_SYN if (!tp) { /* In the case of a SYN DOS attack, many RST|ACK replies * have no tp structure and need to be freed. */ M_FREEM(m); } else #endif { struct ip_socopts *sopts; int ret; if (tp && tp->t_inpcb && tp->t_inpcb->inp_socket) { sopts = tp->t_inpcb->inp_socket->so_optsPack; } else sopts = (struct ip_socopts *)NULL; ret = ip_output(m, sopts); } break; } #endif /* IP_V4 */ #ifdef IP_V6 case AF_INET6: { struct ipv6 * pip6; struct mbuf * ip_m; /* IP header's mbuf */ /* Get mbuf space for the IP header. mbuf m shold contain the * TCP header somewhere, so set m_dsata to that and try to prepend * an IPv6 header. */ m->m_data = (char *)&ti->ti_t; /* TCP header */ m->m_len = sizeof(struct tcphdr); ip_m = mbuf_prepend(m, sizeof(struct ipv6)); if (!ip_m) { m_free(m); return; } pip6 = (struct ipv6 *)ip_m->m_data; /* we have to find the IPv6 addresses. If a packet was passed * then get them form that, otherwise get them from the passed tp. * we should always have one or the other. */ if (ti_mbuf) { ip6_addr tmp; struct ipv6 *inpip = ti_mbuf->pkt->ip6_hdr; /* pip6 and inpip may be the same, so swap the IP addresses * through a tmp variable. */ IP6CPY(&tmp, &inpip->ip_src); IP6CPY(&pip6->ip_src, &inpip->ip_dest); IP6CPY(&pip6->ip_dest, &tmp); } else if (tp) { struct inpcb *inp = tp->t_inpcb; IP6CPY(&pip6->ip_src, &inp->ip6_laddr); IP6CPY(&pip6->ip_dest, &inp->ip6_faddr); } else { dtrap(); break; } /* best effort send */ /* send down to glue layer to IPv6 */ /* and don't forget the so_optsPack */ #ifdef DOS_SYN if (!tp) { /* In the case of a SYN DOS attack, many RST|ACK replies * have no tp structure and need to be freed. */ M_FREEM(m); } else #endif /* DOS_SYN */ { struct ip_socopts *sopts; int ret; if (tp && tp->t_inpcb && tp->t_inpcb->inp_socket) sopts = tp->t_inpcb->inp_socket->so_optsPack; else sopts = (struct ip_socopts *)NULL; ret = tcp6_send(tp, ip_m, &ti->ti_t, sizeof(struct ipv6) + sizeof(struct tcphdr) + tlen, sopts); } break; } #endif /* IP_V6 */ default: dtrap(); break; } return; }
int sosend(struct socket *so, struct mbuf *nam, /* sockaddr, if UDP socket, NULL if TCP */ char *data, /* data to send */ int *data_length, /* IN/OUT length of (remaining) data */ int flags) { struct mbuf *head = (struct mbuf *)NULL; struct mbuf *m; int space; int resid; int len; int error = 0; int dontroute; int first = 1; resid = *data_length; /* * In theory resid should be unsigned. * However, space must be signed, as it might be less than 0 * if we over-committed, and we must use a signed comparison * of space and resid. On the other hand, a negative resid * causes us to loop sending 0-length segments to the protocol. */ if (resid < 0) return (EINVAL); INET_TRACE (INETM_IO, ("INET:sosend: so %lx resid %d sb_hiwat %d so_state %x\n", so, resid, so->so_snd.sb_hiwat, so->so_state)); if (sosendallatonce(so) && (resid > (int)so->so_snd.sb_hiwat)) return (EMSGSIZE); dontroute = (flags & MSG_DONTROUTE) && ((so->so_options & SO_DONTROUTE) == 0) && (so->so_proto->pr_flags & PR_ATOMIC); #define snderr(errno) { error = errno; goto release; } restart: sblock(&so->so_snd); do { if (so->so_error) { error = so->so_error; so->so_error = 0; /* ??? */ goto release; } if (so->so_state & SS_CANTSENDMORE) snderr(EPIPE); if ((so->so_state & SS_ISCONNECTED) == 0) { if (so->so_proto->pr_flags & PR_CONNREQUIRED) snderr(ENOTCONN); if (nam == 0) snderr(EDESTADDRREQ); } if (flags & MSG_OOB) space = 1024; else { space = (int)sbspace(&so->so_snd); if ((sosendallatonce(so) && (space < resid)) || ((resid >= CLBYTES) && (space < CLBYTES) && (so->so_snd.sb_cc >= CLBYTES) && ((so->so_state & SS_NBIO) == 0) && ((flags & MSG_DONTWAIT) == 0))) { if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { if (first) error = EWOULDBLOCK; goto release; } sbunlock(&so->so_snd); sbwait(&so->so_snd); goto restart; } } if ( space <= 0 ) { /* no space in socket send buffer - see if we can wait */ if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { if (first) /* report first error */ error = EWOULDBLOCK; goto release; } /* If blocking socket, let someone else run */ sbunlock(&so->so_snd); sbwait(&so->so_snd); goto restart; } while (space > 0) { len = resid; if ( so->so_type == SOCK_STREAM ) { m = m_getwithdata(MT_TXDATA, len); if (!m) snderr(ENOBUFS); MEMCPY(m->m_data, data, len); so->so_snd.sb_flags |= SB_MBCOMP; /* allow compression */ } else { m = m_get (M_WAIT, MT_TXDATA); m->m_data = data; } INET_TRACE (INETM_IO, ("sosend:got %d bytes so %lx mlen %d, off %d mtod %x\n", len, so, m->m_len, m->m_off, mtod (m, caddr_t))); *data_length -= len; resid -= len; data += len; m->m_len = len; if (head == (struct mbuf *)NULL) head = m; if (error) goto release; if (*data_length <= 0) break; } if (dontroute) so->so_options |= SO_DONTROUTE; so->so_req = (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND; error = (*so->so_proto->pr_usrreq)(so, head, nam); if (dontroute) so->so_options &= ~SO_DONTROUTE; head = (struct mbuf *)NULL; first = 0; } while ((resid != 0) && (error == 0)); release: sbunlock(&so->so_snd); if (head) m_freem(head); return error; }
struct mbuf * m_copy(struct mbuf * m, int off, int len) { struct mbuf * nb, * head, * tail; int tocopy; if (len == 0) /* nothing to do */ return NULL; #ifdef NPDEBUG /* sanity test parms */ if (off < 0 || (len < 0 && len != M_COPYALL)) { dtrap(); return NULL; } #endif /* NPDEBUG */ /* move forward through mbuf q to "off" point */ while (off > 0) { if (!m) { dtrap(); return NULL; } if (off < (int)m->m_len) break; off -= m->m_len; m = m->m_next; } head = tail = NULL; while (len > 0) { if (m == NULL) /* at end of queue? */ { panic("m_copy: bad len"); return NULL; } tocopy = (int)MIN(len, (int)(m->m_len - off)); /* mbuf data is expected to be aligned according to * ALIGN_TYPE, so if the offset isn't aligned, we must * copy the buffer instead of cloning it. * Also, don't permit multiple clones; they sometimes * lead to corrupted data. */ if ((off & (ALIGN_TYPE - 1)) || (m->pkt->inuse != 1)) { if ((nb = m_getwithdata (m->m_type, tocopy)) == NULL) goto nospace; MEMCPY(nb->m_data, m->m_data+off, tocopy); nb->m_len = tocopy; /* set length of data we just moved into new mbuf */ tcpstat.tcps_mcopies++; tcpstat.tcps_mcopiedbytes += tocopy; } else { /* Rather than memcpy every mbuf's data, "clone" the data by * making a duplicate of the mbufs involved and bumping the * inuse count of the actual packet structs */ if ((nb = m_getwithdata (m->m_type, 0)) == NULL) goto nospace; m->pkt->inuse++; /* bump pkt use count to clone it */ /* set up new mbuf with pointers to cloned packet */ nb->pkt = m->pkt; nb->m_base = m->m_base; nb->m_memsz = m->m_memsz; nb->m_data = m->m_data + off; nb->m_len = tocopy; tcpstat.tcps_mclones++; tcpstat.tcps_mclonedbytes += tocopy; } len -= tocopy; off = 0; if (tail) /* head & tail are set by first pass thru loop */ tail->m_next = nb; else head = nb; tail = nb; /* always make new mbuf the tail */ m = m->m_next; } return head; nospace: m_freem (head); return NULL; }