/* * Tcp output routine: figure out what should be sent and send it. */ int tcp_output(struct tcpcb *tp) { struct inpcb * const inp = tp->t_inpcb; struct socket *so = inp->inp_socket; long len, recvwin, sendwin; int nsacked = 0; int off, flags, error = 0; #ifdef TCP_SIGNATURE int sigoff = 0; #endif struct mbuf *m; struct ip *ip; struct tcphdr *th; u_char opt[TCP_MAXOLEN]; unsigned int ipoptlen, optlen, hdrlen; int idle; boolean_t sendalot; struct ip6_hdr *ip6; #ifdef INET6 const boolean_t isipv6 = INP_ISIPV6(inp); #else const boolean_t isipv6 = FALSE; #endif boolean_t can_tso = FALSE, use_tso; boolean_t report_sack, idle_cwv = FALSE; u_int segsz, tso_hlen, tso_lenmax = 0; int segcnt = 0; boolean_t need_sched = FALSE; KKASSERT(so->so_port == &curthread->td_msgport); /* * Determine length of data that should be transmitted, * and flags that will be used. * If there is some data or critical controls (SYN, RST) * to send, then transmit; otherwise, investigate further. */ /* * If we have been idle for a while, the send congestion window * could be no longer representative of the current state of the * link; need to validate congestion window. However, we should * not perform congestion window validation here, since we could * be asked to send pure ACK. */ if (tp->snd_max == tp->snd_una && (ticks - tp->snd_last) >= tp->t_rxtcur && tcp_idle_restart) idle_cwv = TRUE; /* * Calculate whether the transmit stream was previously idle * and adjust TF_LASTIDLE for the next time. */ idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una); if (idle && (tp->t_flags & TF_MORETOCOME)) tp->t_flags |= TF_LASTIDLE; else tp->t_flags &= ~TF_LASTIDLE; if (TCP_DO_SACK(tp) && tp->snd_nxt != tp->snd_max && !IN_FASTRECOVERY(tp)) nsacked = tcp_sack_bytes_below(&tp->scb, tp->snd_nxt); /* * Find out whether TSO could be used or not * * For TSO capable devices, the following assumptions apply to * the processing of TCP flags: * - If FIN is set on the large TCP segment, the device must set * FIN on the last segment that it creates from the large TCP * segment. * - If PUSH is set on the large TCP segment, the device must set * PUSH on the last segment that it creates from the large TCP * segment. */ #if !defined(IPSEC) && !defined(FAST_IPSEC) if (tcp_do_tso #ifdef TCP_SIGNATURE && (tp->t_flags & TF_SIGNATURE) == 0 #endif ) { if (!isipv6) { struct rtentry *rt = inp->inp_route.ro_rt; if (rt != NULL && (rt->rt_flags & RTF_UP) && (rt->rt_ifp->if_hwassist & CSUM_TSO)) { can_tso = TRUE; tso_lenmax = rt->rt_ifp->if_tsolen; } } } #endif /* !IPSEC && !FAST_IPSEC */ again: m = NULL; ip = NULL; th = NULL; ip6 = NULL; if ((tp->t_flags & (TF_SACK_PERMITTED | TF_NOOPT)) == TF_SACK_PERMITTED && (!TAILQ_EMPTY(&tp->t_segq) || tp->reportblk.rblk_start != tp->reportblk.rblk_end)) report_sack = TRUE; else report_sack = FALSE; /* Make use of SACK information when slow-starting after a RTO. */ if (TCP_DO_SACK(tp) && tp->snd_nxt != tp->snd_max && !IN_FASTRECOVERY(tp)) { tcp_seq old_snd_nxt = tp->snd_nxt; tcp_sack_skip_sacked(&tp->scb, &tp->snd_nxt); nsacked += tp->snd_nxt - old_snd_nxt; } sendalot = FALSE; off = tp->snd_nxt - tp->snd_una; sendwin = min(tp->snd_wnd, tp->snd_cwnd + nsacked); sendwin = min(sendwin, tp->snd_bwnd); flags = tcp_outflags[tp->t_state]; /* * Get standard flags, and add SYN or FIN if requested by 'hidden' * state flags. */ if (tp->t_flags & TF_NEEDFIN) flags |= TH_FIN; if (tp->t_flags & TF_NEEDSYN) flags |= TH_SYN; /* * If in persist timeout with window of 0, send 1 byte. * Otherwise, if window is small but nonzero * and timer expired, we will send what we can * and go to transmit state. */ if (tp->t_flags & TF_FORCE) { if (sendwin == 0) { /* * If we still have some data to send, then * clear the FIN bit. Usually this would * happen below when it realizes that we * aren't sending all the data. However, * if we have exactly 1 byte of unsent data, * then it won't clear the FIN bit below, * and if we are in persist state, we wind * up sending the packet without recording * that we sent the FIN bit. * * We can't just blindly clear the FIN bit, * because if we don't have any more data * to send then the probe will be the FIN * itself. */ if (off < so->so_snd.ssb_cc) flags &= ~TH_FIN; sendwin = 1; } else { tcp_callout_stop(tp, tp->tt_persist); tp->t_rxtshift = 0; } } /* * If snd_nxt == snd_max and we have transmitted a FIN, the * offset will be > 0 even if so_snd.ssb_cc is 0, resulting in * a negative length. This can also occur when TCP opens up * its congestion window while receiving additional duplicate * acks after fast-retransmit because TCP will reset snd_nxt * to snd_max after the fast-retransmit. * * A negative length can also occur when we are in the * TCPS_SYN_RECEIVED state due to a simultanious connect where * our SYN has not been acked yet. * * In the normal retransmit-FIN-only case, however, snd_nxt will * be set to snd_una, the offset will be 0, and the length may * wind up 0. */ len = (long)ulmin(so->so_snd.ssb_cc, sendwin) - off; /* * Lop off SYN bit if it has already been sent. However, if this * is SYN-SENT state and if segment contains data, suppress sending * segment (sending the segment would be an option if we still * did TAO and the remote host supported it). */ if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) { flags &= ~TH_SYN; off--, len++; if (len > 0 && tp->t_state == TCPS_SYN_SENT) { tp->t_flags &= ~(TF_ACKNOW | TF_XMITNOW); return 0; } } /* * Be careful not to send data and/or FIN on SYN segments. * This measure is needed to prevent interoperability problems * with not fully conformant TCP implementations. */ if (flags & TH_SYN) { len = 0; flags &= ~TH_FIN; } if (len < 0) { /* * A negative len can occur if our FIN has been sent but not * acked, or if we are in a simultanious connect in the * TCPS_SYN_RECEIVED state with our SYN sent but not yet * acked. * * If our window has contracted to 0 in the FIN case * (which can only occur if we have NOT been called to * retransmit as per code a few paragraphs up) then we * want to shift the retransmit timer over to the * persist timer. * * However, if we are in the TCPS_SYN_RECEIVED state * (the SYN case) we will be in a simultanious connect and * the window may be zero degeneratively. In this case we * do not want to shift to the persist timer after the SYN * or the SYN+ACK transmission. */ len = 0; if (sendwin == 0 && tp->t_state != TCPS_SYN_RECEIVED) { tcp_callout_stop(tp, tp->tt_rexmt); tp->t_rxtshift = 0; tp->snd_nxt = tp->snd_una; if (!tcp_callout_active(tp, tp->tt_persist)) tcp_setpersist(tp); } } KASSERT(len >= 0, ("%s: len < 0", __func__)); /* * Automatic sizing of send socket buffer. Often the send buffer * size is not optimally adjusted to the actual network conditions * at hand (delay bandwidth product). Setting the buffer size too * small limits throughput on links with high bandwidth and high * delay (eg. trans-continental/oceanic links). Setting the * buffer size too big consumes too much real kernel memory, * especially with many connections on busy servers. * * The criteria to step up the send buffer one notch are: * 1. receive window of remote host is larger than send buffer * (with a fudge factor of 5/4th); * 2. hiwat has not significantly exceeded bwnd (inflight) * (bwnd is a maximal value if inflight is disabled). * 3. send buffer is filled to 7/8th with data (so we actually * have data to make use of it); * 4. hiwat has not hit maximal automatic size; * 5. our send window (slow start and cogestion controlled) is * larger than sent but unacknowledged data in send buffer. * * The remote host receive window scaling factor may limit the * growing of the send buffer before it reaches its allowed * maximum. * * It scales directly with slow start or congestion window * and does at most one step per received ACK. This fast * scaling has the drawback of growing the send buffer beyond * what is strictly necessary to make full use of a given * delay*bandwith product. However testing has shown this not * to be much of an problem. At worst we are trading wasting * of available bandwith (the non-use of it) for wasting some * socket buffer memory. * * The criteria for shrinking the buffer is based solely on * the inflight code (snd_bwnd). If inflight is disabled, * the buffer will not be shrinked. Note that snd_bwnd already * has a fudge factor. Our test adds a little hysteresis. */ if (tcp_do_autosndbuf && (so->so_snd.ssb_flags & SSB_AUTOSIZE)) { const int asbinc = tcp_autosndbuf_inc; const int hiwat = so->so_snd.ssb_hiwat; const int lowat = so->so_snd.ssb_lowat; u_long newsize; if ((tp->snd_wnd / 4 * 5) >= hiwat && so->so_snd.ssb_cc >= (hiwat / 8 * 7) && hiwat < tp->snd_bwnd + hiwat / 10 && hiwat + asbinc < tcp_autosndbuf_max && hiwat < (TCP_MAXWIN << tp->snd_scale) && sendwin >= (so->so_snd.ssb_cc - (tp->snd_nxt - tp->snd_una))) { newsize = ulmin(hiwat + asbinc, tcp_autosndbuf_max); if (!ssb_reserve(&so->so_snd, newsize, so, NULL)) atomic_clear_int(&so->so_snd.ssb_flags, SSB_AUTOSIZE); #if 0 if (newsize >= (TCP_MAXWIN << tp->snd_scale)) atomic_clear_int(&so->so_snd.ssb_flags, SSB_AUTOSIZE); #endif } else if ((long)tp->snd_bwnd < (long)(hiwat * 3 / 4 - lowat - asbinc) && hiwat > tp->t_maxseg * 2 + asbinc && hiwat + asbinc >= tcp_autosndbuf_min && tcp_do_autosndbuf == 1) { newsize = ulmax(hiwat - asbinc, tp->t_maxseg * 2); ssb_reserve(&so->so_snd, newsize, so, NULL); } } /* * Don't use TSO, if: * - Congestion window needs validation * - There are SACK blocks to report * - RST or SYN flags is set * - URG will be set * * XXX * Checking for SYN|RST looks overkill, just to be safe than sorry */ use_tso = can_tso; if (report_sack || idle_cwv || (flags & (TH_RST | TH_SYN))) use_tso = FALSE; if (use_tso) { tcp_seq ugr_nxt = tp->snd_nxt; if ((flags & TH_FIN) && (tp->t_flags & TF_SENTFIN) && tp->snd_nxt == tp->snd_max) --ugr_nxt; if (SEQ_GT(tp->snd_up, ugr_nxt)) use_tso = FALSE; } if (use_tso) { /* * Find out segment size and header length for TSO */ error = tcp_tso_getsize(tp, &segsz, &tso_hlen); if (error) use_tso = FALSE; } if (!use_tso) { segsz = tp->t_maxseg; tso_hlen = 0; /* not used */ } /* * Truncate to the maximum segment length if not TSO, and ensure that * FIN is removed if the length no longer contains the last data byte. */ if (len > segsz) { if (!use_tso) { len = segsz; ++segcnt; } else { int nsegs; if (__predict_false(tso_lenmax < segsz)) tso_lenmax = segsz << 1; /* * Truncate TSO transfers to (IP_MAXPACKET - iphlen - * thoff), and make sure that we send equal size * transfers down the stack (rather than big-small- * big-small-...). */ len = min(len, tso_lenmax); nsegs = min(len, (IP_MAXPACKET - tso_hlen)) / segsz; KKASSERT(nsegs > 0); len = nsegs * segsz; if (len <= segsz) { use_tso = FALSE; ++segcnt; } else { segcnt += nsegs; } } sendalot = TRUE; } else { use_tso = FALSE; if (len > 0) ++segcnt; } if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.ssb_cc)) flags &= ~TH_FIN; recvwin = ssb_space(&so->so_rcv); /* * Sender silly window avoidance. We transmit under the following * conditions when len is non-zero: * * - We have a full segment * - This is the last buffer in a write()/send() and we are * either idle or running NODELAY * - we've timed out (e.g. persist timer) * - we have more then 1/2 the maximum send window's worth of * data (receiver may be limiting the window size) * - we need to retransmit */ if (len) { if (len >= segsz) goto send; /* * NOTE! on localhost connections an 'ack' from the remote * end may occur synchronously with the output and cause * us to flush a buffer queued with moretocome. XXX * * note: the len + off check is almost certainly unnecessary. */ if (!(tp->t_flags & TF_MORETOCOME) && /* normal case */ (idle || (tp->t_flags & TF_NODELAY)) && len + off >= so->so_snd.ssb_cc && !(tp->t_flags & TF_NOPUSH)) { goto send; } if (tp->t_flags & TF_FORCE) /* typ. timeout case */ goto send; if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) goto send; if (SEQ_LT(tp->snd_nxt, tp->snd_max)) /* retransmit case */ goto send; if (tp->t_flags & TF_XMITNOW) goto send; } /* * Compare available window to amount of window * known to peer (as advertised window less * next expected input). If the difference is at least two * max size segments, or at least 50% of the maximum possible * window, then want to send a window update to peer. */ if (recvwin > 0) { /* * "adv" is the amount we can increase the window, * taking into account that we are limited by * TCP_MAXWIN << tp->rcv_scale. */ long adv = min(recvwin, (long)TCP_MAXWIN << tp->rcv_scale) - (tp->rcv_adv - tp->rcv_nxt); long hiwat; /* * This ack case typically occurs when the user has drained * the TCP socket buffer sufficiently to warrent an ack * containing a 'pure window update'... that is, an ack that * ONLY updates the tcp window. * * It is unclear why we would need to do a pure window update * past 2 segments if we are going to do one at 1/2 the high * water mark anyway, especially since under normal conditions * the user program will drain the socket buffer quickly. * The 2-segment pure window update will often add a large * number of extra, unnecessary acks to the stream. * * avoid_pure_win_update now defaults to 1. */ if (avoid_pure_win_update == 0 || (tp->t_flags & TF_RXRESIZED)) { if (adv >= (long) (2 * segsz)) { goto send; } } hiwat = (long)(TCP_MAXWIN << tp->rcv_scale); if (hiwat > (long)so->so_rcv.ssb_hiwat) hiwat = (long)so->so_rcv.ssb_hiwat; if (adv >= hiwat / 2) goto send; } /* * Send if we owe the peer an ACK, RST, SYN, or urgent data. ACKNOW * is also a catch-all for the retransmit timer timeout case. */ if (tp->t_flags & TF_ACKNOW) goto send; if ((flags & TH_RST) || ((flags & TH_SYN) && !(tp->t_flags & TF_NEEDSYN))) goto send; if (SEQ_GT(tp->snd_up, tp->snd_una)) goto send; /* * If our state indicates that FIN should be sent * and we have not yet done so, then we need to send. */ if ((flags & TH_FIN) && (!(tp->t_flags & TF_SENTFIN) || tp->snd_nxt == tp->snd_una)) goto send; /* * TCP window updates are not reliable, rather a polling protocol * using ``persist'' packets is used to insure receipt of window * updates. The three ``states'' for the output side are: * idle not doing retransmits or persists * persisting to move a small or zero window * (re)transmitting and thereby not persisting * * tcp_callout_active(tp, tp->tt_persist) * is true when we are in persist state. * The TF_FORCE flag in tp->t_flags * is set when we are called to send a persist packet. * tcp_callout_active(tp, tp->tt_rexmt) * is set when we are retransmitting * The output side is idle when both timers are zero. * * If send window is too small, there is data to transmit, and no * retransmit or persist is pending, then go to persist state. * * If nothing happens soon, send when timer expires: * if window is nonzero, transmit what we can, otherwise force out * a byte. * * Don't try to set the persist state if we are in TCPS_SYN_RECEIVED * with data pending. This situation can occur during a * simultanious connect. */ if (so->so_snd.ssb_cc > 0 && tp->t_state != TCPS_SYN_RECEIVED && !tcp_callout_active(tp, tp->tt_rexmt) && !tcp_callout_active(tp, tp->tt_persist)) { tp->t_rxtshift = 0; tcp_setpersist(tp); } /* * No reason to send a segment, just return. */ tp->t_flags &= ~TF_XMITNOW; return (0); send: if (need_sched && len > 0) { tcp_output_sched(tp); return 0; } /* * Before ESTABLISHED, force sending of initial options * unless TCP set not to do any options. * NOTE: we assume that the IP/TCP header plus TCP options * always fit in a single mbuf, leaving room for a maximum * link header, i.e. * max_linkhdr + sizeof(struct tcpiphdr) + optlen <= MCLBYTES */ optlen = 0; if (isipv6) hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); else hdrlen = sizeof(struct tcpiphdr); if (flags & TH_SYN) { tp->snd_nxt = tp->iss; if (!(tp->t_flags & TF_NOOPT)) { u_short mss; opt[0] = TCPOPT_MAXSEG; opt[1] = TCPOLEN_MAXSEG; mss = htons((u_short) tcp_mssopt(tp)); memcpy(opt + 2, &mss, sizeof mss); optlen = TCPOLEN_MAXSEG; if ((tp->t_flags & TF_REQ_SCALE) && (!(flags & TH_ACK) || (tp->t_flags & TF_RCVD_SCALE))) { *((u_int32_t *)(opt + optlen)) = htonl( TCPOPT_NOP << 24 | TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 | tp->request_r_scale); optlen += 4; } if ((tcp_do_sack && !(flags & TH_ACK)) || tp->t_flags & TF_SACK_PERMITTED) { uint32_t *lp = (uint32_t *)(opt + optlen); *lp = htonl(TCPOPT_SACK_PERMITTED_ALIGNED); optlen += TCPOLEN_SACK_PERMITTED_ALIGNED; } } } /* * Send a timestamp and echo-reply if this is a SYN and our side * wants to use timestamps (TF_REQ_TSTMP is set) or both our side * and our peer have sent timestamps in our SYN's. */ if ((tp->t_flags & (TF_REQ_TSTMP | TF_NOOPT)) == TF_REQ_TSTMP && !(flags & TH_RST) && (!(flags & TH_ACK) || (tp->t_flags & TF_RCVD_TSTMP))) { u_int32_t *lp = (u_int32_t *)(opt + optlen); /* Form timestamp option as shown in appendix A of RFC 1323. */ *lp++ = htonl(TCPOPT_TSTAMP_HDR); *lp++ = htonl(ticks); *lp = htonl(tp->ts_recent); optlen += TCPOLEN_TSTAMP_APPA; } /* Set receive buffer autosizing timestamp. */ if (tp->rfbuf_ts == 0 && (so->so_rcv.ssb_flags & SSB_AUTOSIZE)) tp->rfbuf_ts = ticks; /* * If this is a SACK connection and we have a block to report, * fill in the SACK blocks in the TCP options. */ if (report_sack) tcp_sack_fill_report(tp, opt, &optlen); #ifdef TCP_SIGNATURE if (tp->t_flags & TF_SIGNATURE) { int i; u_char *bp; /* * Initialize TCP-MD5 option (RFC2385) */ bp = (u_char *)opt + optlen; *bp++ = TCPOPT_SIGNATURE; *bp++ = TCPOLEN_SIGNATURE; sigoff = optlen + 2; for (i = 0; i < TCP_SIGLEN; i++) *bp++ = 0; optlen += TCPOLEN_SIGNATURE; /* * Terminate options list and maintain 32-bit alignment. */ *bp++ = TCPOPT_NOP; *bp++ = TCPOPT_EOL; optlen += 2; } #endif /* TCP_SIGNATURE */ KASSERT(optlen <= TCP_MAXOLEN, ("too many TCP options")); hdrlen += optlen; if (isipv6) { ipoptlen = ip6_optlen(inp); } else { if (inp->inp_options) { ipoptlen = inp->inp_options->m_len - offsetof(struct ipoption, ipopt_list); } else {
int port_in_use(const char *if_name, unsigned eport, int proto, const char *iaddr, unsigned iport) { int found = 0; char ip_addr_str[INET_ADDRSTRLEN]; struct in_addr ip_addr; #ifdef __linux__ /* linux code */ char line[256]; FILE *f; const char * tcpfile = "/proc/net/tcp"; const char * udpfile = "/proc/net/udp"; #endif if(getifaddr(if_name, ip_addr_str, INET_ADDRSTRLEN, &ip_addr, NULL) < 0) { ip_addr.s_addr = 0; ip_addr_str[0] = '\0'; } syslog(LOG_DEBUG, "Check protocol %s for port %u on ext_if %s %s, %08X", (proto==IPPROTO_TCP)?"tcp":"udp", eport, if_name, ip_addr_str, (unsigned)ip_addr.s_addr); /* Phase 1 : check for local sockets (would be listed by netstat) */ #if defined(__linux__) f = fopen((proto==IPPROTO_TCP)?tcpfile:udpfile, "r"); if (!f) { syslog(LOG_ERR, "cannot open %s", (proto==IPPROTO_TCP)?tcpfile:udpfile); return -1; } while (fgets(line, 255, f)) { char eaddr[68]; unsigned tmp_port; if (sscanf(line, "%*d: %64[0-9A-Fa-f]:%x %*x:%*x %*x %*x:%*x " "%*x:%*x %*x %*d %*d %*llu", eaddr, &tmp_port) == 2 ) { /* TODO add IPV6 support if enabled * Presently assumes IPV4 */ #ifdef DEBUG syslog(LOG_DEBUG, "port_in_use check port %u and address %s", tmp_port, eaddr); #endif if (tmp_port == eport) { char tmp_addr[4]; struct in_addr *tmp_ip_addr = (struct in_addr *)tmp_addr; if (sscanf(eaddr,"%2hhx%2hhx%2hhx%2hhx", &tmp_addr[3],&tmp_addr[2],&tmp_addr[1],&tmp_addr[0]) == 4) { if (tmp_ip_addr->s_addr == 0 || tmp_ip_addr->s_addr == ip_addr.s_addr) { found++; break; /* don't care how many, just that we found at least one */ } } } } } fclose(f); #elif defined(__OpenBSD__) static struct nlist list[] = { #if 0 {"_tcpstat", 0, 0, 0, 0}, {"_udpstat", 0, 0, 0, 0}, {"_tcbinfo", 0, 0, 0, 0}, {"_udbinfo", 0, 0, 0, 0}, #endif {"_tcbtable", 0, 0, 0, 0}, {"_udbtable", 0, 0, 0, 0}, {NULL,0, 0, 0, 0} }; char errstr[_POSIX2_LINE_MAX]; kvm_t *kd; ssize_t n; struct inpcbtable table; struct inpcb *next; struct inpcb inpcb; kd = kvm_openfiles(NULL, NULL, NULL, O_RDONLY, errstr); if(!kd) { syslog(LOG_ERR, "%s: kvm_openfiles(): %s", "portinuse()", errstr); return -1; } if(kvm_nlist(kd, list) < 0) { syslog(LOG_ERR, "%s: kvm_nlist(): %s", "portinuse()", kvm_geterr(kd)); kvm_close(kd); return -1; } n = kvm_read(kd, list[(proto==IPPROTO_TCP)?0:1].n_value, &table, sizeof(table)); if(n < 0) { syslog(LOG_ERR, "%s: kvm_read(): %s", "portinuse()", kvm_geterr(kd)); kvm_close(kd); return -1; } next = CIRCLEQ_FIRST(&table.inpt_queue); /*TAILQ_FIRST(&table.inpt_queue);*/ while(next != NULL) { if(((u_long)next & 3) != 0) break; n = kvm_read(kd, (u_long)next, &inpcb, sizeof(inpcb)); if(n < 0) { syslog(LOG_ERR, "kvm_read(): %s", kvm_geterr(kd)); break; } next = CIRCLEQ_NEXT(&inpcb, inp_queue); /*TAILQ_NEXT(&inpcb, inp_queue);*/ /* skip IPv6 sockets */ if((inpcb.inp_flags & INP_IPV6) != 0) continue; #ifdef DEBUG syslog(LOG_DEBUG, "%08lx:%hu %08lx:%hu", (u_long)inpcb.inp_laddr.s_addr, ntohs(inpcb.inp_lport), (u_long)inpcb.inp_faddr.s_addr, ntohs(inpcb.inp_fport)); #endif if(eport == (unsigned)ntohs(inpcb.inp_lport)) { if(inpcb.inp_laddr.s_addr == INADDR_ANY || inpcb.inp_laddr.s_addr == ip_addr.s_addr) { found++; break; /* don't care how many, just that we found at least one */ } } } kvm_close(kd); #elif defined(__DragonFly__) const char *varname; struct xinpcb *xip; struct xtcpcb *xtp; struct inpcb *inp; void *buf = NULL; void *so_begin, *so_end; size_t len; switch (proto) { case IPPROTO_TCP: varname = "net.inet.tcp.pcblist"; break; case IPPROTO_UDP: varname = "net.inet.udp.pcblist"; break; default: syslog(LOG_ERR, "port_in_use() unknown proto=%d", proto); return -1; } if (sysctlbyname(varname, NULL, &len, NULL, 0) < 0) { syslog(LOG_ERR, "sysctlbyname(%s, NULL, ...): %m", varname); return -1; } buf = malloc(len); if (buf == NULL) { syslog(LOG_ERR, "malloc(%u) failed", (unsigned)len); return -1; } if (sysctlbyname(varname, buf, &len, NULL, 0) < 0) { syslog(LOG_ERR, "sysctlbyname(%s, buf, ...): %m", varname); free(buf); return -1; } so_begin = buf; so_end = (uint8_t *)buf + len; for (so_begin = buf, so_end = (uint8_t *)so_begin + len; (uint8_t *)so_begin + sizeof(size_t) < (uint8_t *)so_end && (uint8_t *)so_begin + *(size_t *)so_begin <= (uint8_t *)so_end; so_begin = (uint8_t *)so_begin + *(size_t *)so_begin) { switch (proto) { case IPPROTO_TCP: xtp = (struct xtcpcb *)so_begin; if (xtp->xt_len != sizeof *xtp) { syslog(LOG_WARNING, "struct xtcpcb size mismatch; %ld vs %ld", (long)xtp->xt_len, sizeof *xtp); free(buf); return -1; } inp = &xtp->xt_inp; break; case IPPROTO_UDP: xip = (struct xinpcb *)so_begin; if (xip->xi_len != sizeof *xip) { syslog(LOG_WARNING, "struct xinpcb size mismatch : %ld vs %ld", (long)xip->xi_len, sizeof *xip); free(buf); return -1; } inp = &xip->xi_inp; break; default: abort(); } /* no support for IPv6 */ if (INP_ISIPV6(inp) != 0) continue; syslog(LOG_DEBUG, "%08lx:%hu %08lx:%hu <=> %hu %08lx:%hu", (u_long)inp->inp_laddr.s_addr, ntohs(inp->inp_lport), (u_long)inp->inp_faddr.s_addr, ntohs(inp->inp_fport), eport, (u_long)ip_addr.s_addr, iport ); if (eport == (unsigned)ntohs(inp->inp_lport)) { if (inp->inp_laddr.s_addr == INADDR_ANY || inp->inp_laddr.s_addr == ip_addr.s_addr) { found++; break; /* don't care how many, just that we found at least one */ } } } if (buf) { free(buf); buf = NULL; } #elif defined(__FreeBSD__) const char *varname; struct xinpgen *xig, *exig; struct xinpcb *xip; struct xtcpcb *xtp; struct inpcb *inp; void *buf = NULL; size_t len; switch (proto) { case IPPROTO_TCP: varname = "net.inet.tcp.pcblist"; break; case IPPROTO_UDP: varname = "net.inet.udp.pcblist"; break; default: syslog(LOG_ERR, "port_in_use() unknown proto=%d", proto); return -1; } if (sysctlbyname(varname, NULL, &len, NULL, 0) < 0) { syslog(LOG_ERR, "sysctlbyname(%s, NULL, ...): %m", varname); return -1; } buf = malloc(len); if (buf == NULL) { syslog(LOG_ERR, "malloc(%u) failed", (unsigned)len); return -1; } if (sysctlbyname(varname, buf, &len, NULL, 0) < 0) { syslog(LOG_ERR, "sysctlbyname(%s, buf, ...): %m", varname); free(buf); return -1; } xig = (struct xinpgen *)buf; exig = (struct xinpgen *)(void *)((char *)buf + len - sizeof *exig); if (xig->xig_len != sizeof *xig) { syslog(LOG_WARNING, "struct xinpgen size mismatch; %ld vs %ld", (long)xig->xig_len, sizeof *xig); free(buf); return -1; } if (exig->xig_len != sizeof *exig) { syslog(LOG_WARNING, "struct xinpgen size mismatch; %ld vs %ld", (long)exig->xig_len, sizeof *exig); free(buf); return -1; } while (1) { xig = (struct xinpgen *)(void *)((char *)xig + xig->xig_len); if (xig >= exig) break; switch (proto) { case IPPROTO_TCP: xtp = (struct xtcpcb *)xig; if (xtp->xt_len != sizeof *xtp) { syslog(LOG_WARNING, "struct xtcpcb size mismatch; %ld vs %ld", (long)xtp->xt_len, sizeof *xtp); free(buf); return -1; } inp = &xtp->xt_inp; break; case IPPROTO_UDP: xip = (struct xinpcb *)xig; if (xip->xi_len != sizeof *xip) { syslog(LOG_WARNING, "struct xinpcb size mismatch : %ld vs %ld", (long)xip->xi_len, sizeof *xip); free(buf); return -1; } inp = &xip->xi_inp; break; default: abort(); } /* no support for IPv6 */ if ((inp->inp_vflag & INP_IPV6) != 0) continue; syslog(LOG_DEBUG, "%08lx:%hu %08lx:%hu <=> %hu %08lx:%hu", (u_long)inp->inp_laddr.s_addr, ntohs(inp->inp_lport), (u_long)inp->inp_faddr.s_addr, ntohs(inp->inp_fport), eport, (u_long)ip_addr.s_addr, iport ); if (eport == (unsigned)ntohs(inp->inp_lport)) { if (inp->inp_laddr.s_addr == INADDR_ANY || inp->inp_laddr.s_addr == ip_addr.s_addr) { found++; break; /* don't care how many, just that we found at least one */ } } } if (buf) { free(buf); buf = NULL; } /* #elif __NetBSD__ */ #else /* TODO : NetBSD / Darwin (OS X) / Solaris code */ #error "No port_in_use() implementation available for this OS" #endif /* Phase 2 : check existing mappings * TODO : implement for pf/ipfw/etc. */ #if defined(USE_NETFILTER) if (!found) { char iaddr_old[16]; unsigned short iport_old; int i; for (i = 0; chains_to_check[i]; i++) { if (get_nat_redirect_rule(chains_to_check[i], if_name, eport, proto, iaddr_old, sizeof(iaddr_old), &iport_old, 0, 0, 0, 0, 0, 0, 0) == 0) { syslog(LOG_DEBUG, "port_in_use check port %d on nat chain %s redirected to %s port %d", eport, chains_to_check[i], iaddr_old, iport_old); if (!(strcmp(iaddr, iaddr_old)==0 && iport==iport_old)) { /* only "in use" if redirected to somewhere else */ found++; break; /* don't care how many, just that we found at least one */ } } } } #else /* USE_NETFILTER */ UNUSED(iport); UNUSED(iaddr); #endif /* USE_NETFILTER */ return found; }
int udp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ip6_hdr *ip6; struct udphdr *uh; struct inpcb *in6p; struct mbuf *opts = NULL; int off = *offp; int plen, ulen; struct sockaddr_in6 udp_in6; struct socket *so; struct inpcbinfo *pcbinfo = &udbinfo[0]; IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE); ip6 = mtod(m, struct ip6_hdr *); if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) { /* XXX send icmp6 host/port unreach? */ m_freem(m); return IPPROTO_DONE; } udp_stat.udps_ipackets++; plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6); uh = (struct udphdr *)((caddr_t)ip6 + off); ulen = ntohs((u_short)uh->uh_ulen); if (plen != ulen) { udp_stat.udps_badlen++; goto bad; } /* * Checksum extended UDP header and data. */ if (uh->uh_sum == 0) udp_stat.udps_nosum++; else if (in6_cksum(m, IPPROTO_UDP, off, ulen) != 0) { udp_stat.udps_badsum++; goto bad; } if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct inpcb *last, *marker; /* * Deliver a multicast datagram to all sockets * for which the local and remote addresses and ports match * those of the incoming datagram. This allows more than * one process to receive multicasts on the same port. * (This really ought to be done for unicast datagrams as * well, but that would cause problems with existing * applications that open both address-specific sockets and * a wildcard socket listening to the same port -- they would * end up receiving duplicates of every unicast datagram. * Those applications open the multiple sockets to overcome an * inadequacy of the UDP socket interface, but for backwards * compatibility we avoid the problem here rather than * fixing the interface. Maybe 4.5BSD will remedy this?) */ /* * In a case that laddr should be set to the link-local * address (this happens in RIPng), the multicast address * specified in the received packet does not match with * laddr. To cure this situation, the matching is relaxed * if the receiving interface is the same as one specified * in the socket and if the destination multicast address * matches one of the multicast groups specified in the socket. */ /* * Construct sockaddr format source address. */ init_sin6(&udp_in6, m); /* general init */ udp_in6.sin6_port = uh->uh_sport; /* * KAME note: traditionally we dropped udpiphdr from mbuf here. * We need udphdr for IPsec processing so we do that later. */ /* * Locate pcb(s) for datagram. * (Algorithm copied from raw_intr().) */ last = NULL; marker = in_pcbmarker(mycpuid); GET_PCBINFO_TOKEN(pcbinfo); LIST_INSERT_HEAD(&pcbinfo->pcblisthead, marker, inp_list); while ((in6p = LIST_NEXT(marker, inp_list)) != NULL) { LIST_REMOVE(marker, inp_list); LIST_INSERT_AFTER(in6p, marker, inp_list); if (in6p->inp_flags & INP_PLACEMARKER) continue; if (!INP_ISIPV6(in6p)) continue; if (in6p->in6p_lport != uh->uh_dport) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst) && !in6_mcmatch(in6p, &ip6->ip6_dst, m->m_pkthdr.rcvif)) continue; } if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src) || in6p->in6p_fport != uh->uh_sport) continue; } if (last != NULL) { struct mbuf *n; #ifdef IPSEC /* * Check AH/ESP integrity. */ if (ipsec6_in_reject_so(m, last->inp_socket)) ipsec6stat.in_polvio++; /* do not inject data into pcb */ else #endif /* IPSEC */ #ifdef FAST_IPSEC /* * Check AH/ESP integrity. */ if (ipsec6_in_reject(m, last)) ; else #endif /* FAST_IPSEC */ if ((n = m_copy(m, 0, M_COPYALL)) != NULL) { /* * KAME NOTE: do not * m_copy(m, offset, ...) above. * ssb_appendaddr() expects M_PKTHDR, * and m_copy() will copy M_PKTHDR * only if offset is 0. */ so = last->in6p_socket; if ((last->in6p_flags & IN6P_CONTROLOPTS) || (so->so_options & SO_TIMESTAMP)) { ip6_savecontrol(last, &opts, ip6, n); } m_adj(n, off + sizeof(struct udphdr)); lwkt_gettoken(&so->so_rcv.ssb_token); if (ssb_appendaddr(&so->so_rcv, (struct sockaddr *)&udp_in6, n, opts) == 0) { m_freem(n); if (opts) m_freem(opts); udp_stat.udps_fullsock++; } else { sorwakeup(so); } lwkt_reltoken(&so->so_rcv.ssb_token); opts = NULL; } } last = in6p; /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR * socket options set. This heuristic avoids searching * through all pcbs in the common case of a non-shared * port. It assumes that an application will never * clear these options after setting them. */ if ((last->in6p_socket->so_options & (SO_REUSEPORT | SO_REUSEADDR)) == 0) break; } LIST_REMOVE(marker, inp_list); REL_PCBINFO_TOKEN(pcbinfo); if (last == NULL) { /* * No matching pcb found; discard datagram. * (No need to send an ICMP Port Unreachable * for a broadcast or multicast datgram.) */ udp_stat.udps_noport++; udp_stat.udps_noportmcast++; goto bad; } #ifdef IPSEC /* * Check AH/ESP integrity. */ if (ipsec6_in_reject_so(m, last->inp_socket)) { ipsec6stat.in_polvio++; goto bad; } #endif /* IPSEC */ #ifdef FAST_IPSEC /* * Check AH/ESP integrity. */ if (ipsec6_in_reject(m, last)) { goto bad; } #endif /* FAST_IPSEC */ if (last->in6p_flags & IN6P_CONTROLOPTS || last->in6p_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, &opts, ip6, m); m_adj(m, off + sizeof(struct udphdr)); so = last->in6p_socket; lwkt_gettoken(&so->so_rcv.ssb_token); if (ssb_appendaddr(&so->so_rcv, (struct sockaddr *)&udp_in6, m, opts) == 0) { udp_stat.udps_fullsock++; lwkt_reltoken(&so->so_rcv.ssb_token); goto bad; } sorwakeup(so); lwkt_reltoken(&so->so_rcv.ssb_token); return IPPROTO_DONE; } /* * Locate pcb for datagram. */ in6p = in6_pcblookup_hash(pcbinfo, &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif); if (in6p == NULL) { if (log_in_vain) { char buf[INET6_ADDRSTRLEN]; strcpy(buf, ip6_sprintf(&ip6->ip6_dst)); log(LOG_INFO, "Connection attempt to UDP [%s]:%d from [%s]:%d\n", buf, ntohs(uh->uh_dport), ip6_sprintf(&ip6->ip6_src), ntohs(uh->uh_sport)); } udp_stat.udps_noport++; if (m->m_flags & M_MCAST) { kprintf("UDP6: M_MCAST is set in a unicast packet.\n"); udp_stat.udps_noportmcast++; goto bad; } icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0); return IPPROTO_DONE; } #ifdef IPSEC /* * Check AH/ESP integrity. */ if (ipsec6_in_reject_so(m, in6p->in6p_socket)) { ipsec6stat.in_polvio++; goto bad; } #endif /* IPSEC */ #ifdef FAST_IPSEC /* * Check AH/ESP integrity. */ if (ipsec6_in_reject(m, in6p)) { goto bad; } #endif /* FAST_IPSEC */ /* * Construct sockaddr format source address. * Stuff source address and datagram in user buffer. */ init_sin6(&udp_in6, m); /* general init */ udp_in6.sin6_port = uh->uh_sport; if (in6p->in6p_flags & IN6P_CONTROLOPTS || in6p->in6p_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(in6p, &opts, ip6, m); m_adj(m, off + sizeof(struct udphdr)); so = in6p->in6p_socket; lwkt_gettoken(&so->so_rcv.ssb_token); if (ssb_appendaddr(&so->so_rcv, (struct sockaddr *)&udp_in6, m, opts) == 0) { udp_stat.udps_fullsock++; lwkt_reltoken(&so->so_rcv.ssb_token); goto bad; } sorwakeup(so); lwkt_reltoken(&so->so_rcv.ssb_token); return IPPROTO_DONE; bad: if (m) m_freem(m); if (opts) m_freem(opts); return IPPROTO_DONE; }
/** * * @retval 0 no errors * @retval !0 errors */ static int _load(netsnmp_container *container, u_int load_flags) { size_t len; int sname[] = { CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_PCBLIST }; char *udpcb_buf = NULL; #if defined(dragonfly) struct xinpcb *xig = NULL; #else struct xinpgen *xig = NULL; #endif netsnmp_udp_endpoint_entry *entry; int rc = 0; /* * Read in the buffer containing the TCP table data */ len = 0; if (sysctl(sname, 4, 0, &len, 0, 0) < 0 || (udpcb_buf = malloc(len)) == NULL) return -1; if (sysctl(sname, 4, udpcb_buf, &len, 0, 0) < 0) { free(udpcb_buf); return -1; } /* * Unpick this into the constituent 'xinpgen' structures, and extract * the 'inpcb' elements into a linked list (built in reverse) */ #if defined(dragonfly) xig = (struct xinpcb *) udpcb_buf; #else xig = (struct xinpgen *) udpcb_buf; xig = (struct xinpgen *) ((char *) xig + xig->xig_len); #endif #if defined(dragonfly) while (xig && (xig->xi_len >= sizeof(struct xinpcb))) #else while (xig && (xig->xig_len > sizeof(struct xinpgen))) #endif { NS_ELEM pcb = *((NS_ELEM *) xig); #if defined(dragonfly) xig = (struct xinpcb *) ((char *) xig + xig->xi_len); #else xig = (struct xinpgen *) ((char *) xig + xig->xig_len); #endif #if !defined(NETSNMP_ENABLE_IPV6) #ifdef INP_ISIPV6 if (INP_ISIPV6(&pcb.xi_inp)) #else if (pcb.xi_inp.inp_vflag & INP_IPV6) #endif continue; #endif entry = netsnmp_access_udp_endpoint_entry_create(); if(NULL == entry) { rc = -3; break; } /** oddly enough, these appear to already be in network order */ entry->loc_port = htons(pcb.xi_inp.inp_lport); entry->rmt_port = htons(pcb.xi_inp.inp_fport); entry->pid = 0; /** the addr string may need work */ #ifdef INP_ISIPV6 if (INP_ISIPV6(&pcb.xi_inp)) { #else if (pcb.xi_inp.inp_vflag & INP_IPV6) { #endif entry->loc_addr_len = entry->rmt_addr_len = 16; memcpy(entry->loc_addr, &pcb.xi_inp.in6p_laddr, 16); memcpy(entry->rmt_addr, &pcb.xi_inp.in6p_faddr, 16); } else { entry->loc_addr_len = entry->rmt_addr_len = 4; memcpy(entry->loc_addr, &pcb.xi_inp.inp_laddr, 4); memcpy(entry->rmt_addr, &pcb.xi_inp.inp_faddr, 4); } /* * add entry to container */ entry->index = CONTAINER_SIZE(container) + 1; CONTAINER_INSERT(container, entry); } free(udpcb_buf); if(rc<0) return rc; return 0; }