static int icmp_send(struct socket *so, struct mbuf *m, int hlen) { struct ip *ip = mtod(m, struct ip *); struct sockaddr_in addr; so->s = qemu_socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); if (so->s == -1) { return -1; } so->so_m = m; so->so_faddr = ip->ip_dst; so->so_laddr = ip->ip_src; so->so_iptos = ip->ip_tos; so->so_type = IPPROTO_ICMP; so->so_state = SS_ISFCONNECTED; so->so_expire = curtime + SO_EXPIRE; addr.sin_family = AF_INET; addr.sin_addr = so->so_faddr; insque(so, &so->slirp->icmp); if (sendto(so->s, m->m_data + hlen, m->m_len - hlen, 0, (struct sockaddr *)&addr, sizeof(addr)) == -1) { DEBUG_MISC((dfd, "icmp_input icmp sendto tx errno = %d-%s\n", errno, strerror(errno))); icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); icmp_detach(so); } return 0; }
/* NOTE: icmp dont drop @ipkb */ void icmp_send(unsigned char type, unsigned char code, unsigned int data, struct pkbuf *pkb_in) { struct pkbuf *pkb; struct ip *iphdr = pkb2ip(pkb_in); struct icmp *icmphdr; int paylen = _ntohs(iphdr->ip_len); /* icmp payload length */ if (paylen < iphlen(iphdr) + 8) return; /* * RFC 1812 Section 4.3.2.7 for sanity check * An ICMP error message MUST NOT be sent as the result of receiving: * 1. A packet sent as a Link Layer broadcast or multicast * 2. A packet destined to an IP broadcast or IP multicast address *[3] A packet whose source address has a network prefix of zero or is an * invalid source address (as defined in Section [5.3.7]) * 4. Any fragment of a datagram other then the first fragment (i.e., a * packet for which the fragment offset in the IP header is nonzero). * 5. An ICMP error message */ if (pkb_in->pk_type != PKT_LOCALHOST) return; if (MULTICAST(iphdr->ip_dst) || BROADCAST(iphdr->ip_dst)) return; if (iphdr->ip_fragoff & _htons(IP_FRAG_OFF)) return; if (icmp_type_error(type) && iphdr->ip_pro == IP_P_ICMP) { icmphdr = ip2icmp(iphdr); if (icmphdr->icmp_type > ICMP_T_MAXNUM || icmp_error(icmphdr)) return; } /* build icmp packet and send */ /* ip packet size must be smaller than 576 bytes */ if (IP_HRD_SZ + ICMP_HRD_SZ + paylen > 576) paylen = 576 - IP_HRD_SZ - ICMP_HRD_SZ; pkb = alloc_pkb(ETH_HRD_SZ + IP_HRD_SZ + ICMP_HRD_SZ + paylen); icmphdr = (struct icmp *)(pkb2ip(pkb)->ip_data); icmphdr->icmp_type = type; icmphdr->icmp_code = code; icmphdr->icmp_cksum = 0; icmphdr->icmp_undata = data; memcpy(icmphdr->icmp_data, (unsigned char *)iphdr, paylen); icmphdr->icmp_cksum = icmp_chksum((unsigned short *)icmphdr, ICMP_HRD_SZ + paylen); icmpdbg("to "IPFMT"(payload %d) [type %d code %d]\n", ipfmt(iphdr->ip_src), paylen, type, code); ip_send_info(pkb, 0, IP_HRD_SZ + ICMP_HRD_SZ + paylen, 0, IP_P_ICMP, iphdr->ip_src); }
/* * TCP input routine, follows pages 65-76 of the * protocol specification dated September, 1981 very closely. */ void tcp_input(struct mbuf *m, int iphlen, struct socket *inso) { struct ip save_ip, *ip; register struct tcpiphdr *ti; caddr_t optp = NULL; int optlen = 0; int len, tlen, off; register struct tcpcb *tp = NULL; register int tiflags; struct socket *so = NULL; int todrop, acked, ourfinisacked, needoutput = 0; int iss = 0; u_long tiwin; int ret; struct ex_list *ex_ptr; Slirp *slirp; DEBUG_CALL("tcp_input"); DEBUG_ARGS((dfd, " m = %8lx iphlen = %2d inso = %lx\n", (long )m, iphlen, (long )inso )); /* * If called with m == 0, then we're continuing the connect */ if (m == NULL) { so = inso; slirp = so->slirp; /* Re-set a few variables */ tp = sototcpcb(so); m = so->so_m; so->so_m = NULL; ti = so->so_ti; tiwin = ti->ti_win; tiflags = ti->ti_flags; goto cont_conn; } slirp = m->slirp; /* * Get IP and TCP header together in first mbuf. * Note: IP leaves IP header in first mbuf. */ ti = mtod(m, struct tcpiphdr *); if (iphlen > sizeof(struct ip )) { ip_stripoptions(m, (struct mbuf *)0); iphlen=sizeof(struct ip ); } /* XXX Check if too short */ /* * Save a copy of the IP header in case we want restore it * for sending an ICMP error message in response. */ ip=mtod(m, struct ip *); save_ip = *ip; save_ip.ip_len+= iphlen; /* * Checksum extended TCP header and data. */ tlen = ((struct ip *)ti)->ip_len; tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; memset(&ti->ti_i.ih_mbuf, 0 , sizeof(struct mbuf_ptr)); ti->ti_x1 = 0; ti->ti_len = htons((uint16_t)tlen); len = sizeof(struct ip ) + tlen; if(cksum(m, len)) { goto drop; } /* * Check that TCP offset makes sense, * pull out TCP options and adjust length. XXX */ off = ti->ti_off << 2; if (off < sizeof (struct tcphdr) || off > tlen) { goto drop; } tlen -= off; ti->ti_len = tlen; if (off > sizeof (struct tcphdr)) { optlen = off - sizeof (struct tcphdr); optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr); } tiflags = ti->ti_flags; /* * Convert TCP protocol specific fields to host format. */ NTOHL(ti->ti_seq); NTOHL(ti->ti_ack); NTOHS(ti->ti_win); NTOHS(ti->ti_urp); /* * Drop TCP, IP headers and TCP options. */ m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); /* * Locate pcb for segment. */ findso: so = slirp->tcp_last_so; if (so->so_fport != ti->ti_dport || so->so_lport != ti->ti_sport || so->so_laddr.s_addr != ti->ti_src.s_addr || so->so_faddr.s_addr != ti->ti_dst.s_addr) { so = solookup(&slirp->tcb, ti->ti_src, ti->ti_sport, ti->ti_dst, ti->ti_dport); if (so) slirp->tcp_last_so = so; } /* * If the state is CLOSED (i.e., TCB does not exist) then * all data in the incoming segment is discarded. * If the TCB exists but is in CLOSED state, it is embryonic, * but should either do a listen or a connect soon. * * state == CLOSED means we've done socreate() but haven't * attached it to a protocol yet... * * XXX If a TCB does not exist, and the TH_SYN flag is * the only flag set, then create a session, mark it * as if it was LISTENING, and continue... */ if (so == NULL) { if (slirp->restricted) { /* Any hostfwds will have an existing socket, so we only get here * for non-hostfwd connections. These should be dropped, unless it * happens to be a guestfwd. */ for (ex_ptr = slirp->exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { if (ex_ptr->ex_fport == ti->ti_dport && ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) { break; } } if (!ex_ptr) { goto dropwithreset; } } if ((tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) != TH_SYN) goto dropwithreset; if ((so = socreate(slirp)) == NULL) goto dropwithreset; if (tcp_attach(so) < 0) { free(so); /* Not sofree (if it failed, it's not insqued) */ goto dropwithreset; } sbreserve(&so->so_snd, TCP_SNDSPACE); sbreserve(&so->so_rcv, TCP_RCVSPACE); so->so_laddr = ti->ti_src; so->so_lport = ti->ti_sport; so->so_faddr = ti->ti_dst; so->so_fport = ti->ti_dport; if ((so->so_iptos = tcp_tos(so)) == 0) so->so_iptos = ((struct ip *)ti)->ip_tos; tp = sototcpcb(so); tp->t_state = TCPS_LISTEN; } /* * If this is a still-connecting socket, this probably * a retransmit of the SYN. Whether it's a retransmit SYN * or something else, we nuke it. */ if (so->so_state & SS_ISFCONNECTING) goto drop; tp = sototcpcb(so); /* XXX Should never fail */ if (tp == NULL) goto dropwithreset; if (tp->t_state == TCPS_CLOSED) goto drop; tiwin = ti->ti_win; /* * Segment received on connection. * Reset idle time and keep-alive timer. */ tp->t_idle = 0; if (SO_OPTIONS) tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; else tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; /* * Process options if not in LISTEN state, * else do it below (after getting remote address). */ if (optp && tp->t_state != TCPS_LISTEN) tcp_dooptions(tp, (u_char *)optp, optlen, ti); /* * Header prediction: check for the two common cases * of a uni-directional data xfer. If the packet has * no control flags, is in-sequence, the window didn't * change and we're not retransmitting, it's a * candidate. If the length is zero and the ack moved * forward, we're the sender side of the xfer. Just * free the data acked & wake any higher level process * that was blocked waiting for space. If the length * is non-zero and the ack didn't move, we're the * receiver side. If we're getting packets in-order * (the reassembly queue is empty), add the data to * the socket buffer and note that we need a delayed ack. * * XXX Some of these tests are not needed * eg: the tiwin == tp->snd_wnd prevents many more * predictions.. with no *real* advantage.. */ if (tp->t_state == TCPS_ESTABLISHED && (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK && ti->ti_seq == tp->rcv_nxt && tiwin && tiwin == tp->snd_wnd && tp->snd_nxt == tp->snd_max) { if (ti->ti_len == 0) { if (SEQ_GT(ti->ti_ack, tp->snd_una) && SEQ_LEQ(ti->ti_ack, tp->snd_max) && tp->snd_cwnd >= tp->snd_wnd) { /* * this is a pure ack for outstanding data. */ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) tcp_xmit_timer(tp, tp->t_rtt); acked = ti->ti_ack - tp->snd_una; sbdrop(&so->so_snd, acked); tp->snd_una = ti->ti_ack; m_free(m); /* * If all outstanding data are acked, stop * retransmit timer, otherwise restart timer * using current (possibly backed-off) value. * If process is waiting for space, * wakeup/selwakeup/signal. If data * are ready to send, let tcp_output * decide between more output or persist. */ if (tp->snd_una == tp->snd_max) tp->t_timer[TCPT_REXMT] = 0; else if (tp->t_timer[TCPT_PERSIST] == 0) tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; /* * This is called because sowwakeup might have * put data into so_snd. Since we don't so sowwakeup, * we don't need this.. XXX??? */ if (so->so_snd.sb_cc) (void) tcp_output(tp); return; } } else if (ti->ti_ack == tp->snd_una && tcpfrag_list_empty(tp) && ti->ti_len <= sbspace(&so->so_rcv)) { /* * this is a pure, in-sequence data packet * with nothing on the reassembly queue and * we have enough buffer space to take it. */ tp->rcv_nxt += ti->ti_len; /* * Add data to socket buffer. */ if (so->so_emu) { if (tcp_emu(so,m)) sbappend(so, m); } else sbappend(so, m); /* * If this is a short packet, then ACK now - with Nagel * congestion avoidance sender won't send more until * he gets an ACK. * * It is better to not delay acks at all to maximize * TCP throughput. See RFC 2581. */ tp->t_flags |= TF_ACKNOW; tcp_output(tp); return; } } /* header prediction */ /* * Calculate amount of space in receive window, * and then do TCP input processing. * Receive window is amount of space in rcv queue, * but not less than advertised window. */ { int win; win = sbspace(&so->so_rcv); if (win < 0) win = 0; tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt)); } switch (tp->t_state) { /* * If the state is LISTEN then ignore segment if it contains an RST. * If the segment contains an ACK then it is bad and send a RST. * If it does not contain a SYN then it is not interesting; drop it. * Don't bother responding if the destination was a broadcast. * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial * tp->iss, and send a segment: * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. * Fill in remote peer address fields if not previously specified. * Enter SYN_RECEIVED state, and process any other fields of this * segment in this state. */ case TCPS_LISTEN: { if (tiflags & TH_RST) goto drop; if (tiflags & TH_ACK) goto dropwithreset; if ((tiflags & TH_SYN) == 0) goto drop; /* * This has way too many gotos... * But a bit of spaghetti code never hurt anybody :) */ /* * If this is destined for the control address, then flag to * tcp_ctl once connected, otherwise connect */ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == slirp->vnetwork_addr.s_addr) { if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr && so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) { /* May be an add exec */ for (ex_ptr = slirp->exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { if(ex_ptr->ex_fport == so->so_fport && so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { so->so_state |= SS_CTL; break; } } if (so->so_state & SS_CTL) { goto cont_input; } } /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */ } if (so->so_emu & EMU_NOCONNECT) { so->so_emu &= ~EMU_NOCONNECT; goto cont_input; } if ((tcp_fconnect(so) == -1) && #if defined(_WIN32) socket_error() != WSAEWOULDBLOCK #else (errno != EINPROGRESS) && (errno != EWOULDBLOCK) #endif ) { u_char code=ICMP_UNREACH_NET; DEBUG_MISC((dfd, " tcp fconnect errno = %d-%s\n", errno,strerror(errno))); if(errno == ECONNREFUSED) { /* ACK the SYN, send RST to refuse the connection */ tcp_respond(tp, ti, m, ti->ti_seq+1, (tcp_seq)0, TH_RST|TH_ACK); } else { if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST; HTONL(ti->ti_seq); /* restore tcp header */ HTONL(ti->ti_ack); HTONS(ti->ti_win); HTONS(ti->ti_urp); m->m_data -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); m->m_len += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); *ip=save_ip; icmp_error(m, ICMP_UNREACH,code, 0,strerror(errno)); } tcp_close(tp); m_free(m); } else { /* * Haven't connected yet, save the current mbuf * and ti, and return * XXX Some OS's don't tell us whether the connect() * succeeded or not. So we must time it out. */ so->so_m = m; so->so_ti = ti; tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; tp->t_state = TCPS_SYN_RECEIVED; tcp_template(tp); } return; cont_conn: /* m==NULL * Check if the connect succeeded */ if (so->so_state & SS_NOFDREF) { tp = tcp_close(tp); goto dropwithreset; } cont_input: tcp_template(tp); if (optp) tcp_dooptions(tp, (u_char *)optp, optlen, ti); if (iss) tp->iss = iss; else tp->iss = slirp->tcp_iss; slirp->tcp_iss += TCP_ISSINCR/2; tp->irs = ti->ti_seq; tcp_sendseqinit(tp); tcp_rcvseqinit(tp); tp->t_flags |= TF_ACKNOW; tp->t_state = TCPS_SYN_RECEIVED; tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; goto trimthenstep6; } /* case TCPS_LISTEN */ /* * If the state is SYN_SENT: * if seg contains an ACK, but not for our SYN, drop the input. * if seg contains a RST, then drop the connection. * if seg does not contain SYN, then drop it. * Otherwise this is an acceptable SYN segment * initialize tp->rcv_nxt and tp->irs * if seg contains ack then advance tp->snd_una * if SYN has been acked change to ESTABLISHED else SYN_RCVD state * arrange for segment to be acked (eventually) * continue processing rest of data/controls, beginning with URG */ case TCPS_SYN_SENT: if ((tiflags & TH_ACK) && (SEQ_LEQ(ti->ti_ack, tp->iss) || SEQ_GT(ti->ti_ack, tp->snd_max))) goto dropwithreset; if (tiflags & TH_RST) { if (tiflags & TH_ACK) { tcp_drop(tp, 0); /* XXX Check t_softerror! */ } goto drop; } if ((tiflags & TH_SYN) == 0) goto drop; if (tiflags & TH_ACK) { tp->snd_una = ti->ti_ack; if (SEQ_LT(tp->snd_nxt, tp->snd_una)) tp->snd_nxt = tp->snd_una; } tp->t_timer[TCPT_REXMT] = 0; tp->irs = ti->ti_seq; tcp_rcvseqinit(tp); tp->t_flags |= TF_ACKNOW; if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { soisfconnected(so); tp->t_state = TCPS_ESTABLISHED; (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); /* * if we didn't have to retransmit the SYN, * use its rtt as our initial srtt & rtt var. */ if (tp->t_rtt) tcp_xmit_timer(tp, tp->t_rtt); } else tp->t_state = TCPS_SYN_RECEIVED; trimthenstep6: /* * Advance ti->ti_seq to correspond to first data byte. * If data, trim to stay within window, * dropping FIN if necessary. */ ti->ti_seq++; if (ti->ti_len > tp->rcv_wnd) { todrop = ti->ti_len - tp->rcv_wnd; m_adj(m, -todrop); ti->ti_len = tp->rcv_wnd; tiflags &= ~TH_FIN; } tp->snd_wl1 = ti->ti_seq - 1; tp->rcv_up = ti->ti_seq; goto step6; } /* switch tp->t_state */ /* * States other than LISTEN or SYN_SENT. * Check that at least some bytes of segment are within * receive window. If segment begins before rcv_nxt, * drop leading data (and SYN); if nothing left, just ack. */ todrop = tp->rcv_nxt - ti->ti_seq; if (todrop > 0) { if (tiflags & TH_SYN) { tiflags &= ~TH_SYN; ti->ti_seq++; if (ti->ti_urp > 1) ti->ti_urp--; else tiflags &= ~TH_URG; todrop--; } /* * Following if statement from Stevens, vol. 2, p. 960. */ if (todrop > ti->ti_len || (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) { /* * Any valid FIN must be to the left of the window. * At this point the FIN must be a duplicate or out * of sequence; drop it. */ tiflags &= ~TH_FIN; /* * Send an ACK to resynchronize and drop any data. * But keep on processing for RST or ACK. */ tp->t_flags |= TF_ACKNOW; todrop = ti->ti_len; } m_adj(m, todrop); ti->ti_seq += todrop; ti->ti_len -= todrop; if (ti->ti_urp > todrop) ti->ti_urp -= todrop; else { tiflags &= ~TH_URG; ti->ti_urp = 0; } } /* * If new data are received on a connection after the * user processes are gone, then RST the other end. */ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) { tp = tcp_close(tp); goto dropwithreset; } /* * If segment ends after window, drop trailing data * (and PUSH and FIN); if nothing left, just ACK. */ todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd); if (todrop > 0) { if (todrop >= ti->ti_len) { /* * If a new connection request is received * while in TIME_WAIT, drop the old connection * and start over if the sequence numbers * are above the previous ones. */ if (tiflags & TH_SYN && tp->t_state == TCPS_TIME_WAIT && SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { iss = tp->rcv_nxt + TCP_ISSINCR; tp = tcp_close(tp); goto findso; } /* * If window is closed can only take segments at * window edge, and have to drop data and PUSH from * incoming segments. Continue processing, but * remember to ack. Otherwise, drop segment * and ack. */ if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { tp->t_flags |= TF_ACKNOW; } else { goto dropafterack; } } m_adj(m, -todrop); ti->ti_len -= todrop; tiflags &= ~(TH_PUSH|TH_FIN); } /* * If the RST bit is set examine the state: * SYN_RECEIVED STATE: * If passive open, return to LISTEN state. * If active open, inform user that connection was refused. * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: * Inform user that connection was reset, and close tcb. * CLOSING, LAST_ACK, TIME_WAIT STATES * Close the tcb. */ if (tiflags&TH_RST) switch (tp->t_state) { case TCPS_SYN_RECEIVED: case TCPS_ESTABLISHED: case TCPS_FIN_WAIT_1: case TCPS_FIN_WAIT_2: case TCPS_CLOSE_WAIT: tp->t_state = TCPS_CLOSED; tcp_close(tp); goto drop; case TCPS_CLOSING: case TCPS_LAST_ACK: case TCPS_TIME_WAIT: tcp_close(tp); goto drop; } /* * If a SYN is in the window, then this is an * error and we send an RST and drop the connection. */ if (tiflags & TH_SYN) { tp = tcp_drop(tp,0); goto dropwithreset; } /* * If the ACK bit is off we drop the segment and return. */ if ((tiflags & TH_ACK) == 0) goto drop; /* * Ack processing. */ switch (tp->t_state) { /* * In SYN_RECEIVED state if the ack ACKs our SYN then enter * ESTABLISHED state and continue processing, otherwise * send an RST. una<=ack<=max */ case TCPS_SYN_RECEIVED: if (SEQ_GT(tp->snd_una, ti->ti_ack) || SEQ_GT(ti->ti_ack, tp->snd_max)) goto dropwithreset; tp->t_state = TCPS_ESTABLISHED; /* * The sent SYN is ack'ed with our sequence number +1 * The first data byte already in the buffer will get * lost if no correction is made. This is only needed for * SS_CTL since the buffer is empty otherwise. * tp->snd_una++; or: */ tp->snd_una=ti->ti_ack; if (so->so_state & SS_CTL) { /* So tcp_ctl reports the right state */ ret = tcp_ctl(so); if (ret == 1) { soisfconnected(so); so->so_state &= ~SS_CTL; /* success XXX */ } else if (ret == 2) { so->so_state &= SS_PERSISTENT_MASK; so->so_state |= SS_NOFDREF; /* CTL_CMD */ } else { needoutput = 1; tp->t_state = TCPS_FIN_WAIT_1; } } else { soisfconnected(so); } (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); tp->snd_wl1 = ti->ti_seq - 1; /* Avoid ack processing; snd_una==ti_ack => dup ack */ goto synrx_to_est; /* fall into ... */ /* * In ESTABLISHED state: drop duplicate ACKs; ACK out of range * ACKs. If the ack is in the range * tp->snd_una < ti->ti_ack <= tp->snd_max * then advance tp->snd_una to ti->ti_ack and drop * data from the retransmission queue. If this ACK reflects * more up to date window information we update our window information. */ case TCPS_ESTABLISHED: case TCPS_FIN_WAIT_1: case TCPS_FIN_WAIT_2: case TCPS_CLOSE_WAIT: case TCPS_CLOSING: case TCPS_LAST_ACK: case TCPS_TIME_WAIT: if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { DEBUG_MISC((dfd, " dup ack m = %lx so = %lx\n", (long )m, (long )so)); /* * If we have outstanding data (other than * a window probe), this is a completely * duplicate ack (ie, window info didn't * change), the ack is the biggest we've * seen and we've seen exactly our rexmt * threshold of them, assume a packet * has been dropped and retransmit it. * Kludge snd_nxt & the congestion * window so we send only this one * packet. * * We know we're losing at the current * window size so do congestion avoidance * (set ssthresh to half the current window * and pull our congestion window back to * the new ssthresh). * * Dup acks mean that packets have left the * network (they're now cached at the receiver) * so bump cwnd by the amount in the receiver * to keep a constant cwnd packets in the * network. */ if (tp->t_timer[TCPT_REXMT] == 0 || ti->ti_ack != tp->snd_una) tp->t_dupacks = 0; else if (++tp->t_dupacks == TCPREXMTTHRESH) { tcp_seq onxt = tp->snd_nxt; u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; if (win < 2) win = 2; tp->snd_ssthresh = win * tp->t_maxseg; tp->t_timer[TCPT_REXMT] = 0; tp->t_rtt = 0; tp->snd_nxt = ti->ti_ack; tp->snd_cwnd = tp->t_maxseg; (void) tcp_output(tp); tp->snd_cwnd = tp->snd_ssthresh + tp->t_maxseg * tp->t_dupacks; if (SEQ_GT(onxt, tp->snd_nxt)) tp->snd_nxt = onxt; goto drop; } else if (tp->t_dupacks > TCPREXMTTHRESH) { tp->snd_cwnd += tp->t_maxseg; (void) tcp_output(tp); goto drop; } } else tp->t_dupacks = 0; break; } synrx_to_est: /* * If the congestion window was inflated to account * for the other side's cached packets, retract it. */ if (tp->t_dupacks > TCPREXMTTHRESH && tp->snd_cwnd > tp->snd_ssthresh) tp->snd_cwnd = tp->snd_ssthresh; tp->t_dupacks = 0; if (SEQ_GT(ti->ti_ack, tp->snd_max)) { goto dropafterack; } acked = ti->ti_ack - tp->snd_una; /* * If transmit timer is running and timed sequence * number was acked, update smoothed round trip time. * Since we now have an rtt measurement, cancel the * timer backoff (cf., Phil Karn's retransmit alg.). * Recompute the initial retransmit timer. */ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) tcp_xmit_timer(tp,tp->t_rtt); /* * If all outstanding data is acked, stop retransmit * timer and remember to restart (more output or persist). * If there is more data to be acked, restart retransmit * timer, using current (possibly backed-off) value. */ if (ti->ti_ack == tp->snd_max) { tp->t_timer[TCPT_REXMT] = 0; needoutput = 1; } else if (tp->t_timer[TCPT_PERSIST] == 0) tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; /* * When new data is acked, open the congestion window. * If the window gives us less than ssthresh packets * in flight, open exponentially (maxseg per packet). * Otherwise open linearly: maxseg per window * (maxseg^2 / cwnd per packet). */ { register u_int cw = tp->snd_cwnd; register u_int incr = tp->t_maxseg; if (cw > tp->snd_ssthresh) incr = incr * incr / cw; tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale); } if (acked > so->so_snd.sb_cc) { tp->snd_wnd -= so->so_snd.sb_cc; sbdrop(&so->so_snd, (int )so->so_snd.sb_cc); ourfinisacked = 1; } else { sbdrop(&so->so_snd, acked); tp->snd_wnd -= acked; ourfinisacked = 0; } tp->snd_una = ti->ti_ack; if (SEQ_LT(tp->snd_nxt, tp->snd_una)) tp->snd_nxt = tp->snd_una; switch (tp->t_state) { /* * In FIN_WAIT_1 STATE in addition to the processing * for the ESTABLISHED state if our FIN is now acknowledged * then enter FIN_WAIT_2. */ case TCPS_FIN_WAIT_1: if (ourfinisacked) { /* * If we can't receive any more * data, then closing user can proceed. * Starting the timer is contrary to the * specification, but if we don't get a FIN * we'll hang forever. */ if (so->so_state & SS_FCANTRCVMORE) { tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE; } tp->t_state = TCPS_FIN_WAIT_2; } break; /* * In CLOSING STATE in addition to the processing for * the ESTABLISHED state if the ACK acknowledges our FIN * then enter the TIME-WAIT state, otherwise ignore * the segment. */ case TCPS_CLOSING: if (ourfinisacked) { tp->t_state = TCPS_TIME_WAIT; tcp_canceltimers(tp); tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; } break; /* * In LAST_ACK, we may still be waiting for data to drain * and/or to be acked, as well as for the ack of our FIN. * If our FIN is now acknowledged, delete the TCB, * enter the closed state and return. */ case TCPS_LAST_ACK: if (ourfinisacked) { tcp_close(tp); goto drop; } break; /* * In TIME_WAIT state the only thing that should arrive * is a retransmission of the remote FIN. Acknowledge * it and restart the finack timer. */ case TCPS_TIME_WAIT: tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; goto dropafterack; } } /* switch(tp->t_state) */ step6: /* * Update window information. * Don't look at window if no ACK: TAC's send garbage on first SYN. */ if ((tiflags & TH_ACK) && (SEQ_LT(tp->snd_wl1, ti->ti_seq) || (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) || (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) { tp->snd_wnd = tiwin; tp->snd_wl1 = ti->ti_seq; tp->snd_wl2 = ti->ti_ack; if (tp->snd_wnd > tp->max_sndwnd) tp->max_sndwnd = tp->snd_wnd; needoutput = 1; } /* * Process segments with URG. */ if ((tiflags & TH_URG) && ti->ti_urp && TCPS_HAVERCVDFIN(tp->t_state) == 0) { /* * This is a kludge, but if we receive and accept * random urgent pointers, we'll crash in * soreceive. It's hard to imagine someone * actually wanting to send this much urgent data. */ if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) { ti->ti_urp = 0; tiflags &= ~TH_URG; goto dodata; } /* * If this segment advances the known urgent pointer, * then mark the data stream. This should not happen * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since * a FIN has been received from the remote side. * In these states we ignore the URG. * * According to RFC961 (Assigned Protocols), * the urgent pointer points to the last octet * of urgent data. We continue, however, * to consider it to indicate the first octet * of data past the urgent section as the original * spec states (in one of two places). */ if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) { tp->rcv_up = ti->ti_seq + ti->ti_urp; so->so_urgc = so->so_rcv.sb_cc + (tp->rcv_up - tp->rcv_nxt); /* -1; */ tp->rcv_up = ti->ti_seq + ti->ti_urp; } } else /* * If no out of band data is expected, * pull receive urgent pointer along * with the receive window. */ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) tp->rcv_up = tp->rcv_nxt; dodata: /* * If this is a small packet, then ACK now - with Nagel * congestion avoidance sender won't send more until * he gets an ACK. */ if (ti->ti_len && (unsigned)ti->ti_len <= 5 && ((struct tcpiphdr_2 *)ti)->first_char == (char)27) { tp->t_flags |= TF_ACKNOW; } /* * Process the segment text, merging it into the TCP sequencing queue, * and arranging for acknowledgment of receipt if necessary. * This process logically involves adjusting tp->rcv_wnd as data * is presented to the user (this happens in tcp_usrreq.c, * case PRU_RCVD). If a FIN has already been received on this * connection then we just ignore the text. */ if ((ti->ti_len || (tiflags&TH_FIN)) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { TCP_REASS(tp, ti, m, so, tiflags); } else { m_free(m); tiflags &= ~TH_FIN; } /* * If FIN is received ACK the FIN and let the user know * that the connection is closing. */ if (tiflags & TH_FIN) { if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { /* * If we receive a FIN we can't send more data, * set it SS_FDRAIN * Shutdown the socket if there is no rx data in the * buffer. * soread() is called on completion of shutdown() and * will got to TCPS_LAST_ACK, and use tcp_output() * to send the FIN. */ sofwdrain(so); tp->t_flags |= TF_ACKNOW; tp->rcv_nxt++; } switch (tp->t_state) { /* * In SYN_RECEIVED and ESTABLISHED STATES * enter the CLOSE_WAIT state. */ case TCPS_SYN_RECEIVED: case TCPS_ESTABLISHED: if(so->so_emu == EMU_CTL) /* no shutdown on socket */ tp->t_state = TCPS_LAST_ACK; else tp->t_state = TCPS_CLOSE_WAIT; break; /* * If still in FIN_WAIT_1 STATE FIN has not been acked so * enter the CLOSING state. */ case TCPS_FIN_WAIT_1: tp->t_state = TCPS_CLOSING; break; /* * In FIN_WAIT_2 state enter the TIME_WAIT state, * starting the time-wait timer, turning off the other * standard timers. */ case TCPS_FIN_WAIT_2: tp->t_state = TCPS_TIME_WAIT; tcp_canceltimers(tp); tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; break; /* * In TIME_WAIT state restart the 2 MSL time_wait timer. */ case TCPS_TIME_WAIT: tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; break; } } /* * Return any desired output. */ if (needoutput || (tp->t_flags & TF_ACKNOW)) { (void) tcp_output(tp); } return; dropafterack: /* * Generate an ACK dropping incoming segment if it occupies * sequence space, where the ACK reflects our state. */ if (tiflags & TH_RST) goto drop; m_free(m); tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); return; dropwithreset: /* reuses m if m!=NULL, m_free() unnecessary */ if (tiflags & TH_ACK) tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST); else { if (tiflags & TH_SYN) ti->ti_len++; tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0, TH_RST|TH_ACK); } return; drop: /* * Drop space held by incoming segment and return. */ m_free(m); }
void udp_input(register struct mbuf *m, int iphlen) { register struct ip *ip; register struct udphdr *uh; /* struct mbuf *opts = 0;*/ int len; struct ip save_ip; struct socket *so; DEBUG_CALL("udp_input"); DEBUG_ARG("m = %lx", (long)m); DEBUG_ARG("iphlen = %d", iphlen); STAT(udpstat.udps_ipackets++); /* * Strip IP options, if any; should skip this, * make available to user, and use on returned packets, * but we don't yet have a way to check the checksum * with options still present. */ if(iphlen > sizeof(struct ip)) { ip_stripoptions(m, (struct mbuf *)0); iphlen = sizeof(struct ip); } /* * Get IP and UDP header together in first mbuf. */ ip = mtod(m, struct ip *); uh = (struct udphdr *)((caddr_t)ip + iphlen); /* * Make mbuf data length reflect UDP length. * If not enough data to reflect UDP length, drop. */ len = ntohs((u_int16_t)uh->uh_ulen); if (ip->ip_len != len) { if (len > ip->ip_len) { STAT(udpstat.udps_badlen++); goto bad; } m_adj(m, len - ip->ip_len); ip->ip_len = len; } /* * Save a copy of the IP header in case we want restore it * for sending an ICMP error message in response. */ save_ip = *ip; save_ip.ip_len+= iphlen; /* tcp_input subtracts this */ /* * Checksum extended UDP header and data. */ if (UDPCKSUM && uh->uh_sum) { memset(&((struct ipovly *)ip)->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ((struct ipovly *)ip)->ih_x1 = 0; ((struct ipovly *)ip)->ih_len = uh->uh_ulen; /* keep uh_sum for ICMP reply * uh->uh_sum = cksum(m, len + sizeof (struct ip)); * if (uh->uh_sum) { */ if(cksum(m, len + sizeof(struct ip))) { STAT(udpstat.udps_badsum++); goto bad; } } /* * handle DHCP/BOOTP */ if (ntohs(uh->uh_dport) == BOOTP_SERVER) { bootp_input(m); goto bad; } if (slirp_restrict) goto bad; /* * handle TFTP */ if (ntohs(uh->uh_dport) == TFTP_SERVER) { tftp_input(m); goto bad; } /* * Locate pcb for datagram. */ so = udp_last_so; if (so->so_lport != uh->uh_sport || so->so_laddr.s_addr != ip->ip_src.s_addr) { struct socket *tmp; for (tmp = udb.so_next; tmp != &udb; tmp = tmp->so_next) { if (tmp->so_lport == uh->uh_sport && tmp->so_laddr.s_addr == ip->ip_src.s_addr) { tmp->so_faddr.s_addr = ip->ip_dst.s_addr; tmp->so_fport = uh->uh_dport; so = tmp; break; } } if (tmp == &udb) { so = NULL; } else { STAT(udpstat.udpps_pcbcachemiss++); udp_last_so = so; } } if (so == NULL) { /* * If there's no socket for this packet, * create one */ if ((so = socreate()) == NULL) goto bad; if(udp_attach(so) == -1) { DEBUG_MISC((dfd," udp_attach errno = %d-%s\n", errno,strerror(errno))); sofree(so); goto bad; } /* * Setup fields */ /* udp_last_so = so; */ so->so_laddr = ip->ip_src; so->so_lport = uh->uh_sport; if ((so->so_iptos = udp_tos(so)) == 0) so->so_iptos = ip->ip_tos; /* * XXXXX Here, check if it's in udpexec_list, * and if it is, do the fork_exec() etc. */ } so->so_faddr = ip->ip_dst; /* XXX */ so->so_fport = uh->uh_dport; /* XXX */ iphlen += sizeof(struct udphdr); m->m_len -= iphlen; m->m_data += iphlen; /* * Now we sendto() the packet. */ if (so->so_emu) udp_emu(so, m); if(sosendto(so,m) == -1) { m->m_len += iphlen; m->m_data -= iphlen; *ip=save_ip; DEBUG_MISC((dfd,"udp tx errno = %d-%s\n",errno,strerror(errno))); icmp_error(m, ICMP_UNREACH,ICMP_UNREACH_NET, 0,strerror(errno)); } m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ /* restore the orig mbuf packet */ m->m_len += iphlen; m->m_data -= iphlen; *ip=save_ip; so->so_m=m; /* ICMP backup */ return; bad: m_freem(m); /* if (opts) m_freem(opts); */ return; }
/* * Ip input routine. Checksum and byte swap header. If fragmented * try to reassemble. Process options. Pass to next level. */ void ip_input(struct mbuf *m) { Slirp *slirp = m->slirp; struct ip *ip; int hlen; DEBUG_CALL("ip_input"); DEBUG_ARG("m = %lx", (long)m); DEBUG_ARG("m_len = %d", m->m_len); if (m->m_len < sizeof (struct ip)) { return; } ip = mtod(m, struct ip *); if (ip->ip_v != IPVERSION) { goto bad; } hlen = ip->ip_hl << 2; if (hlen<sizeof(struct ip ) || hlen>m->m_len) {/* min header length */ goto bad; /* or packet too short */ } /* keep ip header intact for ICMP reply * ip->ip_sum = cksum(m, hlen); * if (ip->ip_sum) { */ if(cksum(m,hlen)) { goto bad; } /* * Convert fields to host representation. */ NTOHS(ip->ip_len); if (ip->ip_len < hlen) { goto bad; } NTOHS(ip->ip_id); NTOHS(ip->ip_off); /* * Check that the amount of data in the buffers * is as at least much as the IP header would have us expect. * Trim mbufs if longer than we expect. * Drop packet if shorter than we expect. */ if (m->m_len < ip->ip_len) { goto bad; } if (slirp->restricted) { if ((ip->ip_dst.s_addr & slirp->vnetwork_mask.s_addr) == slirp->vnetwork_addr.s_addr) { if (ip->ip_dst.s_addr == 0xffffffff && ip->ip_p != IPPROTO_UDP) goto bad; } else { uint32_t inv_mask = ~slirp->vnetwork_mask.s_addr; struct ex_list *ex_ptr; if ((ip->ip_dst.s_addr & inv_mask) == inv_mask) { goto bad; } for (ex_ptr = slirp->exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) if (ex_ptr->ex_addr.s_addr == ip->ip_dst.s_addr) break; if (!ex_ptr) goto bad; } } /* Should drop packet if mbuf too long? hmmm... */ if (m->m_len > ip->ip_len) m_adj(m, ip->ip_len - m->m_len); /* check ip_ttl for a correct ICMP reply */ if(ip->ip_ttl==0 || ip->ip_ttl==1) { icmp_error(m, ICMP_TIMXCEED,ICMP_TIMXCEED_INTRANS, 0,"ttl"); goto bad; } /* * If offset or IP_MF are set, must reassemble. * Otherwise, nothing need be done. * (We could look in the reassembly queue to see * if the packet was previously fragmented, * but it's not worth the time; just let them time out.) * * XXX This should fail, don't fragment yet */ if (ip->ip_off &~ IP_DF) { struct ipq *fp; struct qlink *l; /* * Look for queue of fragments * of this datagram. */ for (l = slirp->ipq.ip_link.next; l != &slirp->ipq.ip_link; l = l->next) { fp = container_of(l, struct ipq, ip_link); if (ip->ip_id == fp->ipq_id && ip->ip_src.s_addr == fp->ipq_src.s_addr && ip->ip_dst.s_addr == fp->ipq_dst.s_addr && ip->ip_p == fp->ipq_p) goto found; } fp = NULL; found: /* * Adjust ip_len to not reflect header, * set ip_mff if more fragments are expected, * convert offset of this to bytes. */ ip->ip_len -= hlen; if (ip->ip_off & IP_MF) ip->ip_tos |= 1; else ip->ip_tos &= ~1; ip->ip_off <<= 3; /* * If datagram marked as having more fragments * or if this is not the first fragment, * attempt reassembly; if it succeeds, proceed. */ if (ip->ip_tos & 1 || ip->ip_off) { ip = ip_reass(slirp, ip, fp); if (ip == NULL) return; m = dtom(slirp, ip); } else if (fp) ip_freef(slirp, fp); } else
/* * Do option processing on a datagram, possibly discarding it if bad options * are encountered, or forwarding it if source-routed. * * The pass argument is used when operating in the IPSTEALTH mode to tell * what options to process: [LS]SRR (pass 0) or the others (pass 1). The * reason for as many as two passes is that when doing IPSTEALTH, non-routing * options should be processed only if the packet is for us. * * Returns 1 if packet has been forwarded/freed, 0 if the packet should be * processed further. */ int ip_dooptions(struct mbuf *m, int pass) { struct ip *ip = mtod(m, struct ip *); u_char *cp; struct in_ifaddr *ia; int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0; struct in_addr *sin, dst; uint32_t ntime; struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET }; /* Ignore or reject packets with IP options. */ if (ip_doopts == 0) return 0; else if (ip_doopts == 2) { type = ICMP_UNREACH; code = ICMP_UNREACH_FILTER_PROHIB; goto bad; } dst = ip->ip_dst; cp = (u_char *)(ip + 1); cnt = (ip->ip_hl << 2) - sizeof (struct ip); for (; cnt > 0; cnt -= optlen, cp += optlen) { opt = cp[IPOPT_OPTVAL]; if (opt == IPOPT_EOL) break; if (opt == IPOPT_NOP) optlen = 1; else { if (cnt < IPOPT_OLEN + sizeof(*cp)) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; } optlen = cp[IPOPT_OLEN]; if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; } } switch (opt) { default: break; /* * Source routing with record. Find interface with current * destination address. If none on this machine then drop if * strictly routed, or do nothing if loosely routed. Record * interface address and bring up next address component. If * strictly routed make sure next address is on directly * accessible net. */ case IPOPT_LSRR: case IPOPT_SSRR: #ifdef IPSTEALTH if (V_ipstealth && pass > 0) break; #endif if (optlen < IPOPT_OFFSET + sizeof(*cp)) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; } if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { code = &cp[IPOPT_OFFSET] - (u_char *)ip; goto bad; } ipaddr.sin_addr = ip->ip_dst; if (ifa_ifwithaddr_check((struct sockaddr *)&ipaddr) == 0) { if (opt == IPOPT_SSRR) { type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; goto bad; } if (!ip_dosourceroute) goto nosourcerouting; /* * Loose routing, and not at next destination * yet; nothing to do except forward. */ break; } off--; /* 0 origin */ if (off > optlen - (int)sizeof(struct in_addr)) { /* * End of source route. Should be for us. */ if (!ip_acceptsourceroute) goto nosourcerouting; save_rte(m, cp, ip->ip_src); break; } #ifdef IPSTEALTH if (V_ipstealth) goto dropit; #endif if (!ip_dosourceroute) { if (V_ipforwarding) { char buf[16]; /* aaa.bbb.ccc.ddd\0 */ /* * Acting as a router, so generate * ICMP */ nosourcerouting: strcpy(buf, inet_ntoa(ip->ip_dst)); log(LOG_WARNING, "attempted source route from %s to %s\n", inet_ntoa(ip->ip_src), buf); type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; goto bad; } else { /* * Not acting as a router, so * silently drop. */ #ifdef IPSTEALTH dropit: #endif IPSTAT_INC(ips_cantforward); m_freem(m); return (1); } } /* * locate outgoing interface */ (void)memcpy(&ipaddr.sin_addr, cp + off, sizeof(ipaddr.sin_addr)); if (opt == IPOPT_SSRR) { #define INA struct in_ifaddr * #define SA struct sockaddr * if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == NULL) ia = (INA)ifa_ifwithnet((SA)&ipaddr, 0); } else /* XXX MRT 0 for routing */ ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m)); if (ia == NULL) { type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; goto bad; } ip->ip_dst = ipaddr.sin_addr; (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr), sizeof(struct in_addr)); ifa_free(&ia->ia_ifa); cp[IPOPT_OFFSET] += sizeof(struct in_addr); /* * Let ip_intr's mcast routing check handle mcast pkts */ forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr)); break; case IPOPT_RR: #ifdef IPSTEALTH if (V_ipstealth && pass == 0) break; #endif if (optlen < IPOPT_OFFSET + sizeof(*cp)) { code = &cp[IPOPT_OFFSET] - (u_char *)ip; goto bad; } if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { code = &cp[IPOPT_OFFSET] - (u_char *)ip; goto bad; } /* * If no space remains, ignore. */ off--; /* 0 origin */ if (off > optlen - (int)sizeof(struct in_addr)) break; (void)memcpy(&ipaddr.sin_addr, &ip->ip_dst, sizeof(ipaddr.sin_addr)); /* * Locate outgoing interface; if we're the * destination, use the incoming interface (should be * same). */ if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == NULL && (ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m))) == NULL) { type = ICMP_UNREACH; code = ICMP_UNREACH_HOST; goto bad; } (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr), sizeof(struct in_addr)); ifa_free(&ia->ia_ifa); cp[IPOPT_OFFSET] += sizeof(struct in_addr); break; case IPOPT_TS: #ifdef IPSTEALTH if (V_ipstealth && pass == 0) break; #endif code = cp - (u_char *)ip; if (optlen < 4 || optlen > 40) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; } if ((off = cp[IPOPT_OFFSET]) < 5) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; } if (off > optlen - (int)sizeof(int32_t)) { cp[IPOPT_OFFSET + 1] += (1 << 4); if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) { code = &cp[IPOPT_OFFSET] - (u_char *)ip; goto bad; } break; } off--; /* 0 origin */ sin = (struct in_addr *)(cp + off); switch (cp[IPOPT_OFFSET + 1] & 0x0f) { case IPOPT_TS_TSONLY: break; case IPOPT_TS_TSANDADDR: if (off + sizeof(uint32_t) + sizeof(struct in_addr) > optlen) { code = &cp[IPOPT_OFFSET] - (u_char *)ip; goto bad; } ipaddr.sin_addr = dst; ia = (INA)ifaof_ifpforaddr((SA)&ipaddr, m->m_pkthdr.rcvif); if (ia == NULL) continue; (void)memcpy(sin, &IA_SIN(ia)->sin_addr, sizeof(struct in_addr)); ifa_free(&ia->ia_ifa); cp[IPOPT_OFFSET] += sizeof(struct in_addr); off += sizeof(struct in_addr); break; case IPOPT_TS_PRESPEC: if (off + sizeof(uint32_t) + sizeof(struct in_addr) > optlen) { code = &cp[IPOPT_OFFSET] - (u_char *)ip; goto bad; } (void)memcpy(&ipaddr.sin_addr, sin, sizeof(struct in_addr)); if (ifa_ifwithaddr_check((SA)&ipaddr) == 0) continue; cp[IPOPT_OFFSET] += sizeof(struct in_addr); off += sizeof(struct in_addr); break; default: code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip; goto bad; } ntime = iptime(); (void)memcpy(cp + off, &ntime, sizeof(uint32_t)); cp[IPOPT_OFFSET] += sizeof(uint32_t); } } if (forward && V_ipforwarding) { ip_forward(m, 1); return (1); } return (0); bad: icmp_error(m, type, code, 0, 0); IPSTAT_INC(ips_badoptions); return (1); }
/* * Ip input routine. Checksum and byte swap header. If fragmented * try to reassemble. Process options. Pass to next level. */ void ip_input(PNATState pData, struct mbuf *m) { register struct ip *ip; int hlen = 0; int mlen = 0; STAM_PROFILE_START(&pData->StatIP_input, a); LogFlowFunc(("ENTER: m = %lx\n", (long)m)); ip = mtod(m, struct ip *); Log2(("ip_dst=%RTnaipv4(len:%d) m_len = %d\n", ip->ip_dst, RT_N2H_U16(ip->ip_len), m->m_len)); ipstat.ips_total++; { int rc; STAM_PROFILE_START(&pData->StatALIAS_input, b); rc = LibAliasIn(select_alias(pData, m), mtod(m, char *), m_length(m, NULL)); STAM_PROFILE_STOP(&pData->StatALIAS_input, b); Log2(("NAT: LibAlias return %d\n", rc)); if (m->m_len != RT_N2H_U16(ip->ip_len)) m->m_len = RT_N2H_U16(ip->ip_len); } mlen = m->m_len; if (mlen < sizeof(struct ip)) { ipstat.ips_toosmall++; goto bad_free_m; } ip = mtod(m, struct ip *); if (ip->ip_v != IPVERSION) { ipstat.ips_badvers++; goto bad_free_m; } hlen = ip->ip_hl << 2; if ( hlen < sizeof(struct ip) || hlen > m->m_len) { /* min header length */ ipstat.ips_badhlen++; /* or packet too short */ goto bad_free_m; } /* keep ip header intact for ICMP reply * ip->ip_sum = cksum(m, hlen); * if (ip->ip_sum) { */ if (cksum(m, hlen)) { ipstat.ips_badsum++; goto bad_free_m; } /* * Convert fields to host representation. */ NTOHS(ip->ip_len); if (ip->ip_len < hlen) { ipstat.ips_badlen++; goto bad_free_m; } NTOHS(ip->ip_id); NTOHS(ip->ip_off); /* * Check that the amount of data in the buffers * is as at least much as the IP header would have us expect. * Trim mbufs if longer than we expect. * Drop packet if shorter than we expect. */ if (mlen < ip->ip_len) { ipstat.ips_tooshort++; goto bad_free_m; } /* Should drop packet if mbuf too long? hmmm... */ if (mlen > ip->ip_len) m_adj(m, ip->ip_len - m->m_len); /* check ip_ttl for a correct ICMP reply */ if (ip->ip_ttl==0 || ip->ip_ttl == 1) { icmp_error(pData, m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, "ttl"); goto no_free_m; } ip->ip_ttl--; /* * If offset or IP_MF are set, must reassemble. * Otherwise, nothing need be done. * (We could look in the reassembly queue to see * if the packet was previously fragmented, * but it's not worth the time; just let them time out.) * */ if (ip->ip_off & (IP_MF | IP_OFFMASK)) { m = ip_reass(pData, m); if (m == NULL) goto no_free_m; ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; }
/* * recvfrom() a UDP socket */ void sorecvfrom(struct socket *so) { struct sockaddr_in addr; socklen_t addrlen = sizeof(struct sockaddr_in); DEBUG_CALL("sorecvfrom"); DEBUG_ARG("so = %lx", (long)so); if (so->so_type == IPPROTO_ICMP) { /* This is a "ping" reply */ char buff[256]; int len; len = recvfrom(so->s, buff, 256, 0, (struct sockaddr *)&addr, &addrlen); /* XXX Check if reply is "correct"? */ if(len == -1 || len == 0) { u_char code=ICMP_UNREACH_PORT; if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST; else if(errno == ENETUNREACH) code=ICMP_UNREACH_NET; DEBUG_MISC((dfd," udp icmp rx errno = %d-%s\n", errno,strerror(errno))); icmp_error(so->so_m, ICMP_UNREACH,code, 0,strerror(errno)); } else { icmp_reflect(so->so_m); so->so_m = NULL; /* Don't m_free() it again! */ } /* No need for this socket anymore, udp_detach it */ udp_detach(so); } else { /* A "normal" UDP packet */ struct mbuf *m; int len; #ifdef _WIN32 unsigned long n; #else int n; #endif m = m_get(so->slirp); if (!m) { return; } m->m_data += IF_MAXLINKHDR; /* * XXX Shouldn't FIONREAD packets destined for port 53, * but I don't know the max packet size for DNS lookups */ len = M_FREEROOM(m); /* if (so->so_fport != htons(53)) { */ ioctlsocket(so->s, FIONREAD, &n); if (n > len) { n = (m->m_data - m->m_dat) + m->m_len + n + 1; m_inc(m, n); len = M_FREEROOM(m); } /* } */ m->m_len = recvfrom(so->s, m->m_data, len, 0, (struct sockaddr *)&addr, &addrlen); DEBUG_MISC((dfd, " did recvfrom %d, errno = %d-%s\n", m->m_len, errno,strerror(errno))); if(m->m_len<0) { u_char code=ICMP_UNREACH_PORT; if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST; else if(errno == ENETUNREACH) code=ICMP_UNREACH_NET; DEBUG_MISC((dfd," rx error, tx icmp ICMP_UNREACH:%i\n", code)); icmp_error(so->so_m, ICMP_UNREACH,code, 0,strerror(errno)); m_free(m); } else { /* * Hack: domain name lookup will be used the most for UDP, * and since they'll only be used once there's no need * for the 4 minute (or whatever) timeout... So we time them * out much quicker (10 seconds for now...) */ if (so->so_expire) { if (so->so_fport == htons(53)) so->so_expire = curtime + SO_EXPIREFAST; else so->so_expire = curtime + SO_EXPIRE; } /* * If this packet was destined for CTL_ADDR, * make it look like that's where it came from, done by udp_output */ udp_output(so, m, &addr); } /* rx error */ } /* if ping packet */ }
void icmp_input(struct mbuf *m, int hlen) { register struct icmp *icp; register struct ip *ip=mtod(m, struct ip *); int icmplen=ip->ip_len; /* int code; */ DEBUG_CALL("icmp_input"); DEBUG_ARG("m = %lx", (long )m); DEBUG_ARG("m_len = %d", m->m_len); STAT(icmpstat.icps_received++); /* * Locate icmp structure in mbuf, and check * that its not corrupted and of at least minimum length. */ if (icmplen < ICMP_MINLEN) { /* min 8 bytes payload */ STAT(icmpstat.icps_tooshort++); freeit: m_freem(m); goto end_error; } m->m_len -= hlen; m->m_data += hlen; icp = mtod(m, struct icmp *); if (cksum(m, icmplen)) { STAT(icmpstat.icps_checksum++); goto freeit; } m->m_len += hlen; m->m_data -= hlen; /* icmpstat.icps_inhist[icp->icmp_type]++; */ /* code = icp->icmp_code; */ DEBUG_ARG("icmp_type = %d", icp->icmp_type); switch (icp->icmp_type) { case ICMP_ECHO: icp->icmp_type = ICMP_ECHOREPLY; ip->ip_len += hlen; /* since ip_input subtracts this */ if (ip_geth(ip->ip_dst) == alias_addr_ip) { icmp_reflect(m); } else { struct socket *so; SockAddress addr; uint32_t addr_ip; uint16_t addr_port; if ((so = socreate()) == NULL) goto freeit; if(udp_attach(so) == -1) { DEBUG_MISC((dfd,"icmp_input udp_attach errno = %d-%s\n", errno,errno_str)); sofree(so); m_free(m); goto end_error; } so->so_m = m; so->so_faddr_ip = ip_geth(ip->ip_dst); so->so_faddr_port = 7; so->so_laddr_ip = ip_geth(ip->ip_src); so->so_laddr_port = 9; so->so_iptos = ip->ip_tos; so->so_type = IPPROTO_ICMP; so->so_state = SS_ISFCONNECTED; /* Send the packet */ if ((so->so_faddr_ip & 0xffffff00) == special_addr_ip) { /* It's an alias */ int low = so->so_faddr_ip & 0xff; if (low >= CTL_DNS && low < CTL_DNS + dns_addr_count) addr_ip = dns_addr[low - CTL_DNS]; else addr_ip = loopback_addr_ip; } else { addr_ip = so->so_faddr_ip; } addr_port = so->so_faddr_port; sock_address_init_inet( &addr, addr_ip, addr_port ); if(socket_sendto(so->s, icmp_ping_msg, strlen(icmp_ping_msg), &addr) < 0) { DEBUG_MISC((dfd,"icmp_input udp sendto tx errno = %d-%s\n", errno,errno_str)); icmp_error(m, ICMP_UNREACH,ICMP_UNREACH_NET, 0,errno_str); udp_detach(so); } } /* if ip->ip_dst.s_addr == alias_addr.s_addr */ break; case ICMP_UNREACH: /* XXX? report error? close socket? */ case ICMP_TIMXCEED: case ICMP_PARAMPROB: case ICMP_SOURCEQUENCH: case ICMP_TSTAMP: case ICMP_MASKREQ: case ICMP_REDIRECT: STAT(icmpstat.icps_notsupp++); m_freem(m); break; default: STAT(icmpstat.icps_badtype++); m_freem(m); } /* swith */ end_error: /* m is m_free()'d xor put in a socket xor or given to ip_send */ return; }
/* * Ip input routine. Checksum and byte swap header. If fragmented * try to reassemble. Process options. Pass to next level. */ void ip_input(struct mbuf *m) { register struct ip *ip; int hlen; DEBUG_CALL("ip_input"); DEBUG_ARG("m = %lx", (long)m); DEBUG_ARG("m_len = %d", m->m_len); STAT(ipstat.ips_total++); if (m->m_len < (int)sizeof (struct ip)) { STAT(ipstat.ips_toosmall++); return; } ip = mtod(m, struct ip *); if (ip->ip_v != IPVERSION) { STAT(ipstat.ips_badvers++); goto bad; } hlen = ip->ip_hl << 2; if (hlen < (int)sizeof(struct ip ) || hlen>m->m_len) {/* min header length */ STAT(ipstat.ips_badhlen++); /* or packet too short */ goto bad; } /* keep ip header intact for ICMP reply * ip->ip_sum = cksum(m, hlen); * if (ip->ip_sum) { */ if(cksum(m,hlen)) { STAT(ipstat.ips_badsum++); goto bad; } /* * Convert fields to host representation. */ NTOHS(ip->ip_len); if (ip->ip_len < hlen) { STAT(ipstat.ips_badlen++); goto bad; } NTOHS(ip->ip_id); NTOHS(ip->ip_off); /* * Check that the amount of data in the buffers * is as at least much as the IP header would have us expect. * Trim mbufs if longer than we expect. * Drop packet if shorter than we expect. */ if (m->m_len < ip->ip_len) { STAT(ipstat.ips_tooshort++); goto bad; } if (slirp_restrict) { if (ip_geth(ip->ip_dst) != special_addr_ip) { if (ip_getn(ip->ip_dst) == 0xffffffffu && ip->ip_p != IPPROTO_UDP) goto bad; } else { int host = ip_geth(ip->ip_dst) & 0xff; struct ex_list *ex_ptr; if (host == 0xff) goto bad; for (ex_ptr = exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) if (ex_ptr->ex_addr == host) break; if (!ex_ptr) goto bad; } } /* Should drop packet if mbuf too long? hmmm... */ if (m->m_len > ip->ip_len) m_adj(m, ip->ip_len - m->m_len); /* check ip_ttl for a correct ICMP reply */ if(ip->ip_ttl==0 || ip->ip_ttl==1) { icmp_error(m, ICMP_TIMXCEED,ICMP_TIMXCEED_INTRANS, 0,"ttl"); goto bad; } /* * Process options and, if not destined for us, * ship it on. ip_dooptions returns 1 when an * error was detected (causing an icmp message * to be sent and the original packet to be freed). */ /* We do no IP options */ /* if (hlen > sizeof (struct ip) && ip_dooptions(m)) * goto next; */ /* * If offset or IP_MF are set, must reassemble. * Otherwise, nothing need be done. * (We could look in the reassembly queue to see * if the packet was previously fragmented, * but it's not worth the time; just let them time out.) * * XXX This should fail, don't fragment yet */ if (ip->ip_off &~ IP_DF) { register struct ipq *fp; struct qlink *l; /* * Look for queue of fragments * of this datagram. */ for (l = ipq.ip_link.next; l != &ipq.ip_link; l = l->next) { fp = container_of(l, struct ipq, ip_link); if (ip->ip_id == fp->ipq_id && ip_equal(ip->ip_src, fp->ipq_src) && ip_equal(ip->ip_dst, fp->ipq_dst) && ip->ip_p == fp->ipq_p) goto found; } fp = NULL; found: /* * Adjust ip_len to not reflect header, * set ip_mff if more fragments are expected, * convert offset of this to bytes. */ ip->ip_len -= hlen; if (ip->ip_off & IP_MF) ip->ip_tos |= 1; else ip->ip_tos &= ~1; ip->ip_off <<= 3; /* * If datagram marked as having more fragments * or if this is not the first fragment, * attempt reassembly; if it succeeds, proceed. */ if (ip->ip_tos & 1 || ip->ip_off) { STAT(ipstat.ips_fragments++); ip = ip_reass(ip, fp); if (ip == NULL) return; STAT(ipstat.ips_reassembled++); m = dtom(ip); } else if (fp) ip_freef(fp); } else
/* * Ip input routine. Checksum and byte swap header. If fragmented * try to reassemble. Process options. Pass to next level. */ void ip_input(struct mbuf *m) { struct ip *ip; int hlen; DEBUG_CALL("ip_input"); DEBUG_ARG("m = %lx", (long)m); DEBUG_ARG("m_len = %d", m->m_len); ipstat.ips_total++; if (m->m_len < sizeof (struct ip)) { ipstat.ips_toosmall++; return; } ip = mtod(m, struct ip *); if (ip->ip_v != IPVERSION) { ipstat.ips_badvers++; goto bad; } hlen = ip->ip_hl << 2; if (hlen<sizeof(struct ip ) || hlen>m->m_len) {/* min header length */ ipstat.ips_badhlen++; /* or packet too short */ goto bad; } /* keep ip header intact for ICMP reply * ip->ip_sum = cksum(m, hlen); * if (ip->ip_sum) { */ if(cksum(m,hlen)) { ipstat.ips_badsum++; goto bad; } /* * Convert fields to host representation. */ NTOHS(ip->ip_len); if (ip->ip_len < hlen) { ipstat.ips_badlen++; goto bad; } NTOHS(ip->ip_id); NTOHS(ip->ip_off); /* * Check that the amount of data in the buffers * is as at least much as the IP header would have us expect. * Trim mbufs if longer than we expect. * Drop packet if shorter than we expect. */ if (m->m_len < ip->ip_len) { ipstat.ips_tooshort++; goto bad; } /* Should drop packet if mbuf too long? hmmm... */ if (m->m_len > ip->ip_len) m_adj(m, ip->ip_len - m->m_len); /* check ip_ttl for a correct ICMP reply */ if(ip->ip_ttl==0 || ip->ip_ttl==1) { icmp_error(m, ICMP_TIMXCEED,ICMP_TIMXCEED_INTRANS, 0,"ttl"); goto bad; } /* * Process options and, if not destined for us, * ship it on. ip_dooptions returns 1 when an * error was detected (causing an icmp message * to be sent and the original packet to be freed). */ /* We do no IP options */ /* if (hlen > sizeof (struct ip) && ip_dooptions(m)) * goto next; */ /* * If offset or IP_MF are set, must reassemble. * Otherwise, nothing need be done. * (We could look in the reassembly queue to see * if the packet was previously fragmented, * but it's not worth the time; just let them time out.) * * XXX This should fail, don't fragment yet */ if (ip->ip_off &~ IP_DF) { register struct ipq *fp; /* * Look for queue of fragments * of this datagram. */ for (fp = (struct ipq *) ipq.next; fp != &ipq; fp = (struct ipq *) fp->next) if (ip->ip_id == fp->ipq_id && ip->ip_src.s_addr == fp->ipq_src.s_addr && ip->ip_dst.s_addr == fp->ipq_dst.s_addr && ip->ip_p == fp->ipq_p) goto found; fp = 0; found: /* * Adjust ip_len to not reflect header, * set ip_mff if more fragments are expected, * convert offset of this to bytes. */ ip->ip_len -= hlen; if (ip->ip_off & IP_MF) ((struct ipasfrag *)ip)->ipf_mff |= 1; else ((struct ipasfrag *)ip)->ipf_mff &= ~1; ip->ip_off <<= 3; /* * If datagram marked as having more fragments * or if this is not the first fragment, * attempt reassembly; if it succeeds, proceed. */ if (((struct ipasfrag *)ip)->ipf_mff & 1 || ip->ip_off) { ipstat.ips_fragments++; ip = ip_reass((struct ipasfrag *)ip, fp); if (ip == 0) return; ipstat.ips_reassembled++; m = dtom(ip); } else if (fp) ip_freef(fp); } else ip->ip_len -= hlen; /* * Switch out to protocol's input routine. */ ipstat.ips_delivered++; switch (ip->ip_p) { case IPPROTO_TCP: tcp_input(m, hlen, (struct socket *)NULL); break; case IPPROTO_UDP: udp_input(m, hlen); break; case IPPROTO_ICMP: icmp_input(m, hlen); break; default: ipstat.ips_noproto++; m_free(m); } return; bad: m_freem(m); return; }
/* m->m_data points at ip packet header * m->m_len length ip packet * ip->ip_len length data (IPDU) */ void udp_input(PNATState pData, register struct mbuf *m, int iphlen) { register struct ip *ip; register struct udphdr *uh; int len; struct ip save_ip; struct socket *so; int ret; int ttl; LogFlowFunc(("ENTER: m = %p, iphlen = %d\n", m, iphlen)); ip = mtod(m, struct ip *); Log2(("%RTnaipv4 iphlen = %d\n", ip->ip_dst, iphlen)); udpstat.udps_ipackets++; /* * Strip IP options, if any; should skip this, * make available to user, and use on returned packets, * but we don't yet have a way to check the checksum * with options still present. */ if (iphlen > sizeof(struct ip)) { ip_stripoptions(m, (struct mbuf *)0); iphlen = sizeof(struct ip); } /* * Get IP and UDP header together in first mbuf. */ ip = mtod(m, struct ip *); uh = (struct udphdr *)((caddr_t)ip + iphlen); /* * Make mbuf data length reflect UDP length. * If not enough data to reflect UDP length, drop. */ len = RT_N2H_U16((u_int16_t)uh->uh_ulen); Assert((ip->ip_len == len)); Assert((ip->ip_len + iphlen == m_length(m, NULL))); if (ip->ip_len != len) { if (len > ip->ip_len) { udpstat.udps_badlen++; Log3(("NAT: IP(id: %hd) has bad size\n", ip->ip_id)); } m_adj(m, len - ip->ip_len); ip->ip_len = len; } /* * Save a copy of the IP header in case we want restore it * for sending an ICMP error message in response. */ save_ip = *ip; save_ip.ip_len+= iphlen; /* tcp_input subtracts this */ /* * Checksum extended UDP header and data. */ if (udpcksum && uh->uh_sum) { memset(((struct ipovly *)ip)->ih_x1, 0, 9); ((struct ipovly *)ip)->ih_len = uh->uh_ulen; #if 0 /* keep uh_sum for ICMP reply */ uh->uh_sum = cksum(m, len + sizeof (struct ip)); if (uh->uh_sum) { #endif if (cksum(m, len + iphlen)) { udpstat.udps_badsum++; Log3(("NAT: IP(id: %hd) has bad (udp) cksum\n", ip->ip_id)); goto bad_free_mbuf; } } #if 0 } #endif /* * handle DHCP/BOOTP */ if (uh->uh_dport == RT_H2N_U16_C(BOOTP_SERVER)) { bootp_input(pData, m); goto done_free_mbuf; } LogFunc(("uh src: %RTnaipv4:%d, dst: %RTnaipv4:%d\n", ip->ip_src, RT_H2N_U16_C(uh->uh_sport), ip->ip_dst, RT_H2N_U16_C(uh->uh_dport))); if ( pData->fUseHostResolver && uh->uh_dport == RT_H2N_U16_C(53) && CTL_CHECK(ip->ip_dst.s_addr, CTL_DNS)) { struct sockaddr_in dst, src; src.sin_addr.s_addr = ip->ip_dst.s_addr; src.sin_port = uh->uh_dport; dst.sin_addr.s_addr = ip->ip_src.s_addr; dst.sin_port = uh->uh_sport; slirpMbufTagService(pData, m, CTL_DNS); /* udp_output2() expects a pointer to the body of UDP packet. */ m->m_data += sizeof(struct udpiphdr); m->m_len -= sizeof(struct udpiphdr); udp_output2(pData, NULL, m, &src, &dst, IPTOS_LOWDELAY); LogFlowFuncLeave(); return; } /* * handle TFTP */ if ( uh->uh_dport == RT_H2N_U16_C(TFTP_SERVER) && CTL_CHECK(ip->ip_dst.s_addr, CTL_TFTP)) { if (pData->pvTftpSessions) slirpTftpInput(pData, m); goto done_free_mbuf; } /* * Locate pcb for datagram. */ so = udp_last_so; if ( so->so_lport != uh->uh_sport || so->so_laddr.s_addr != ip->ip_src.s_addr) { struct socket *tmp; for (tmp = udb.so_next; tmp != &udb; tmp = tmp->so_next) { if ( tmp->so_lport == uh->uh_sport && tmp->so_laddr.s_addr == ip->ip_src.s_addr) { so = tmp; break; } } if (tmp == &udb) so = NULL; else { udpstat.udpps_pcbcachemiss++; udp_last_so = so; } } if (so == NULL) { /* * If there's no socket for this packet, * create one */ if ((so = socreate()) == NULL) { Log2(("NAT: IP(id: %hd) failed to create socket\n", ip->ip_id)); goto bad_free_mbuf; } if (udp_attach(pData, so) <= 0) { Log2(("NAT: IP(id: %hd) udp_attach errno = %d (%s)\n", ip->ip_id, errno, strerror(errno))); sofree(pData, so); goto bad_free_mbuf; } /* * Setup fields */ /* udp_last_so = so; */ so->so_laddr = ip->ip_src; so->so_lport = uh->uh_sport; so->so_iptos = ip->ip_tos; /* * XXXXX Here, check if it's in udpexec_list, * and if it is, do the fork_exec() etc. */ } so->so_faddr = ip->ip_dst; /* XXX */ so->so_fport = uh->uh_dport; /* XXX */ Assert(so->so_type == IPPROTO_UDP); /* * DNS proxy */ if ( pData->fUseDnsProxy && CTL_CHECK(ip->ip_dst.s_addr, CTL_DNS) && (uh->uh_dport == RT_H2N_U16_C(53))) { dnsproxy_query(pData, so, m, iphlen); goto done_free_mbuf; } iphlen += sizeof(struct udphdr); m->m_len -= iphlen; m->m_data += iphlen; ttl = ip->ip_ttl = save_ip.ip_ttl; ret = setsockopt(so->s, IPPROTO_IP, IP_TTL, (const char*)&ttl, sizeof(ttl)); if (ret < 0) LogRel(("NAT: Error (%s) occurred while setting TTL(%d) attribute " "of IP packet to socket %R[natsock]\n", strerror(errno), ip->ip_ttl, so)); if ( sosendto(pData, so, m) == -1 && ( !soIgnorableErrorCode(errno) && errno != ENOTCONN)) { m->m_len += iphlen; m->m_data -= iphlen; *ip = save_ip; Log2(("NAT: UDP tx errno = %d (%s) on sent to %RTnaipv4\n", errno, strerror(errno), ip->ip_dst)); #if 0 /* ICMP_SOURCEQUENCH haven't got any effect, the idea here * inform guest about the exosting NAT resources with assumption that * that guest reduce traffic. But it doesn't work */ if( errno == EAGAIN || errno == EWOULDBLOCK || errno == EINPROGRESS || errno == ENOTCONN) icmp_error(pData, m, ICMP_SOURCEQUENCH, 0, 1, strerror(errno)); else #endif icmp_error(pData, m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); so->so_m = NULL; LogFlowFuncLeave(); return; } if (so->so_m) m_freem(pData, so->so_m); /* used for ICMP if error on sorecvfrom */ /* restore the orig mbuf packet */ m->m_len += iphlen; m->m_data -= iphlen; *ip = save_ip; so->so_m = m; /* ICMP backup */ LogFlowFuncLeave(); return; bad_free_mbuf: Log2(("NAT: UDP(id: %hd) datagram to %RTnaipv4 with size(%d) claimed as bad\n", ip->ip_id, &ip->ip_dst, ip->ip_len)); done_free_mbuf: /* some services like bootp(built-in), dns(buildt-in) and dhcp don't need sockets * and create new m'buffers to send them to guest, so we'll free their incomming * buffers here. */ m_freem(pData, m); LogFlowFuncLeave(); return; }
/* * Process a received ICMP message. */ void icmp_input(struct mbuf *m, int hlen) { register struct icmp *icp; register struct ip *ip=mtod(m, struct ip *); int icmplen=ip->ip_len; /* int code; */ DEBUG_CALL("icmp_input"); DEBUG_ARG("m = %p", m); DEBUG_ARG("m_len = %d", m->m_len); icmpstat.icps_received++; /* * Locate icmp structure in mbuf, and check * that its not corrupted and of at least minimum length. */ if (icmplen < ICMP_MINLEN) { /* min 8 bytes payload */ icmpstat.icps_tooshort++; freeit: m_freem(m); goto end_error; } m->m_len -= hlen; m->m_data += hlen; icp = mtod(m, struct icmp *); if (cksum(m, icmplen)) { icmpstat.icps_checksum++; goto freeit; } m->m_len += hlen; m->m_data -= hlen; /* icmpstat.icps_inhist[icp->icmp_type]++; */ /* code = icp->icmp_code; */ DEBUG_ARG("icmp_type = %d", icp->icmp_type); switch (icp->icmp_type) { case ICMP_ECHO: icp->icmp_type = ICMP_ECHOREPLY; ip->ip_len += hlen; /* since ip_input subtracts this */ if (ip->ip_dst.s_addr == alias_addr.s_addr) { icmp_reflect(m); } else { struct socket *so; struct sockaddr_in addr; if ((so = socreate()) == NULL) goto freeit; if (icmp_send(so, m, hlen) == 0) return; if(udp_attach(so) == -1) { DEBUG_MISC(("icmp_input udp_attach errno = %d-%s\n", errno,strerror(errno))); sofree(so); m_free(m); goto end_error; } so->so_m = m; so->so_faddr = ip->ip_dst; so->so_fport = htons(7); so->so_laddr = ip->ip_src; so->so_lport = htons(9); so->so_iptos = ip->ip_tos; so->so_type = IPPROTO_ICMP; so->so_state = SS_ISFCONNECTED; /* Send the packet */ addr.sin_family = AF_INET; if ((so->so_faddr.s_addr & htonl(0xffffff00)) == special_addr.s_addr) { /* It's an alias */ switch(ntohl(so->so_faddr.s_addr) & 0xff) { case CTL_DNS: addr.sin_addr = dns_addr; break; case CTL_ALIAS: default: addr.sin_addr = loopback_addr; break; } } else { addr.sin_addr = so->so_faddr; } addr.sin_port = so->so_fport; if(sendto(so->s, icmp_ping_msg, strlen(icmp_ping_msg), 0, (struct sockaddr *)&addr, sizeof(addr)) == -1) { DEBUG_MISC(("icmp_input udp sendto tx errno = %d-%s\n", errno,strerror(errno))); icmp_error(m, ICMP_UNREACH,ICMP_UNREACH_NET, 0,strerror(errno)); udp_detach(so); } } /* if ip->ip_dst.s_addr == alias_addr.s_addr */ break; case ICMP_UNREACH: /* XXX? report error? close socket? */ case ICMP_TIMXCEED: case ICMP_PARAMPROB: case ICMP_SOURCEQUENCH: case ICMP_TSTAMP: case ICMP_MASKREQ: case ICMP_REDIRECT: icmpstat.icps_notsupp++; m_freem(m); break; default: icmpstat.icps_badtype++; m_freem(m); } /* swith */ end_error: /* m is m_free()'d xor put in a socket xor or given to ip_send */ return; }
void udp_input(struct mbuf *m, int off) { int iphlen = off; struct ip *ip; struct udphdr *uh; struct ifnet *ifp; struct inpcb *inp; uint16_t len, ip_len; struct inpcbinfo *pcbinfo; struct ip save_ip; struct sockaddr_in udp_in; struct m_tag *fwd_tag; int cscov_partial; uint8_t pr; ifp = m->m_pkthdr.rcvif; UDPSTAT_INC(udps_ipackets); /* * Strip IP options, if any; should skip this, make available to * user, and use on returned packets, but we don't yet have a way to * check the checksum with options still present. */ if (iphlen > sizeof (struct ip)) { ip_stripoptions(m); iphlen = sizeof(struct ip); } /* * Get IP and UDP header together in first mbuf. */ ip = mtod(m, struct ip *); if (m->m_len < iphlen + sizeof(struct udphdr)) { if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == NULL) { UDPSTAT_INC(udps_hdrops); return; } ip = mtod(m, struct ip *); } uh = (struct udphdr *)((caddr_t)ip + iphlen); pr = ip->ip_p; cscov_partial = (pr == IPPROTO_UDPLITE) ? 1 : 0; /* * Destination port of 0 is illegal, based on RFC768. */ if (uh->uh_dport == 0) goto badunlocked; /* * Construct sockaddr format source address. Stuff source address * and datagram in user buffer. */ bzero(&udp_in, sizeof(udp_in)); udp_in.sin_len = sizeof(udp_in); udp_in.sin_family = AF_INET; udp_in.sin_port = uh->uh_sport; udp_in.sin_addr = ip->ip_src; /* * Make mbuf data length reflect UDP length. If not enough data to * reflect UDP length, drop. */ len = ntohs((u_short)uh->uh_ulen); ip_len = ntohs(ip->ip_len) - iphlen; if (pr == IPPROTO_UDPLITE && len == 0) { /* Zero means checksum over the complete packet. */ len = ip_len; cscov_partial = 0; } if (ip_len != len) { if (len > ip_len || len < sizeof(struct udphdr)) { UDPSTAT_INC(udps_badlen); goto badunlocked; } if (pr == IPPROTO_UDP) m_adj(m, len - ip_len); } /* * Save a copy of the IP header in case we want restore it for * sending an ICMP error message in response. */ if (!V_udp_blackhole) save_ip = *ip; else memset(&save_ip, 0, sizeof(save_ip)); /* * Checksum extended UDP header and data. */ if (uh->uh_sum) { u_short uh_sum; if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID) && !cscov_partial) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) uh_sum = m->m_pkthdr.csum_data; else uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl((u_short)len + m->m_pkthdr.csum_data + pr)); uh_sum ^= 0xffff; } else { char b[9]; bcopy(((struct ipovly *)ip)->ih_x1, b, 9); bzero(((struct ipovly *)ip)->ih_x1, 9); ((struct ipovly *)ip)->ih_len = (pr == IPPROTO_UDP) ? uh->uh_ulen : htons(ip_len); uh_sum = in_cksum(m, len + sizeof (struct ip)); bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); } if (uh_sum) { UDPSTAT_INC(udps_badsum); m_freem(m); return; } } else UDPSTAT_INC(udps_nosum); pcbinfo = get_inpcbinfo(pr); if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || in_broadcast(ip->ip_dst, ifp)) { struct inpcb *last; struct inpcbhead *pcblist; struct ip_moptions *imo; INP_INFO_RLOCK(pcbinfo); pcblist = get_pcblist(pr); last = NULL; LIST_FOREACH(inp, pcblist, inp_list) { if (inp->inp_lport != uh->uh_dport) continue; #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_laddr.s_addr != INADDR_ANY && inp->inp_laddr.s_addr != ip->ip_dst.s_addr) continue; if (inp->inp_faddr.s_addr != INADDR_ANY && inp->inp_faddr.s_addr != ip->ip_src.s_addr) continue; if (inp->inp_fport != 0 && inp->inp_fport != uh->uh_sport) continue; INP_RLOCK(inp); /* * XXXRW: Because we weren't holding either the inpcb * or the hash lock when we checked for a match * before, we should probably recheck now that the * inpcb lock is held. */ /* * Handle socket delivery policy for any-source * and source-specific multicast. [RFC3678] */ imo = inp->inp_moptions; if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { struct sockaddr_in group; int blocked; if (imo == NULL) { INP_RUNLOCK(inp); continue; } bzero(&group, sizeof(struct sockaddr_in)); group.sin_len = sizeof(struct sockaddr_in); group.sin_family = AF_INET; group.sin_addr = ip->ip_dst; blocked = imo_multi_filter(imo, ifp, (struct sockaddr *)&group, (struct sockaddr *)&udp_in); if (blocked != MCAST_PASS) { if (blocked == MCAST_NOTGMEMBER) IPSTAT_INC(ips_notmember); if (blocked == MCAST_NOTSMEMBER || blocked == MCAST_MUTED) UDPSTAT_INC(udps_filtermcast); INP_RUNLOCK(inp); continue; } } if (last != NULL) { struct mbuf *n; n = m_copy(m, 0, M_COPYALL); udp_append(last, ip, n, iphlen, &udp_in); INP_RUNLOCK(last); } last = inp; /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR * socket options set. This heuristic avoids * searching through all pcbs in the common case of a * non-shared port. It assumes that an application * will never clear these options after setting them. */ if ((last->inp_socket->so_options & (SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } if (last == NULL) { /* * No matching pcb found; discard datagram. (No need * to send an ICMP Port Unreachable for a broadcast * or multicast datgram.) */ UDPSTAT_INC(udps_noportbcast); if (inp) INP_RUNLOCK(inp); INP_INFO_RUNLOCK(pcbinfo); goto badunlocked; } udp_append(last, ip, m, iphlen, &udp_in); INP_RUNLOCK(last); INP_INFO_RUNLOCK(pcbinfo); return; } /* * Locate pcb for datagram. */ /* * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ if ((m->m_flags & M_IP_NEXTHOP) && (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { struct sockaddr_in *next_hop; next_hop = (struct sockaddr_in *)(fwd_tag + 1); /* * Transparently forwarded. Pretend to be the destination. * Already got one like this? */ inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport, ip->ip_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, ifp, m); if (!inp) { /* * It's new. Try to find the ambushing socket. * Because we've rewritten the destination address, * any hardware-generated hash is ignored. */ inp = in_pcblookup(pcbinfo, ip->ip_src, uh->uh_sport, next_hop->sin_addr, next_hop->sin_port ? htons(next_hop->sin_port) : uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, ifp); } /* Remove the tag from the packet. We don't need it anymore. */ m_tag_delete(m, fwd_tag); m->m_flags &= ~M_IP_NEXTHOP; } else inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport, ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, ifp, m); if (inp == NULL) { if (udp_log_in_vain) { char buf[4*sizeof "123"]; strcpy(buf, inet_ntoa(ip->ip_dst)); log(LOG_INFO, "Connection attempt to UDP %s:%d from %s:%d\n", buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), ntohs(uh->uh_sport)); } UDPSTAT_INC(udps_noport); if (m->m_flags & (M_BCAST | M_MCAST)) { UDPSTAT_INC(udps_noportbcast); goto badunlocked; } if (V_udp_blackhole) goto badunlocked; if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) goto badunlocked; *ip = save_ip; icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); return; } /* * Check the minimum TTL for socket. */ INP_RLOCK_ASSERT(inp); if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) { INP_RUNLOCK(inp); m_freem(m); return; } if (cscov_partial) { struct udpcb *up; up = intoudpcb(inp); if (up->u_rxcslen > len) { INP_RUNLOCK(inp); m_freem(m); return; } } UDP_PROBE(receive, NULL, inp, ip, inp, uh); udp_append(inp, ip, m, iphlen, &udp_in); INP_RUNLOCK(inp); return; badunlocked: m_freem(m); }
/* m->m_data points at ip packet header * m->m_len length ip packet * ip->ip_len length data (IPDU) */ void udp_input(PNATState pData, register struct mbuf *m, int iphlen) { register struct ip *ip; register struct udphdr *uh; int len; struct ip save_ip; struct socket *so; int ret; int ttl, tos; LogFlowFunc(("ENTER: m = %p, iphlen = %d\n", m, iphlen)); ip = mtod(m, struct ip *); Log2(("%RTnaipv4 iphlen = %d\n", ip->ip_dst, iphlen)); udpstat.udps_ipackets++; /* * Strip IP options, if any; should skip this, * make available to user, and use on returned packets, * but we don't yet have a way to check the checksum * with options still present. */ if (iphlen > sizeof(struct ip)) { ip_stripoptions(m, (struct mbuf *)0); iphlen = sizeof(struct ip); } /* * Get IP and UDP header together in first mbuf. */ ip = mtod(m, struct ip *); uh = (struct udphdr *)((caddr_t)ip + iphlen); /* * Make mbuf data length reflect UDP length. * If not enough data to reflect UDP length, drop. */ len = RT_N2H_U16((u_int16_t)uh->uh_ulen); Assert(ip->ip_len + iphlen == (ssize_t)m_length(m, NULL)); if (ip->ip_len != len) { if (len > ip->ip_len) { udpstat.udps_badlen++; Log3(("NAT: IP(id: %hd) has bad size\n", ip->ip_id)); goto bad_free_mbuf; } m_adj(m, len - ip->ip_len); ip->ip_len = len; } /* * Save a copy of the IP header in case we want restore it * for sending an ICMP error message in response. */ save_ip = *ip; save_ip.ip_len+= iphlen; /* tcp_input subtracts this */ /* * Checksum extended UDP header and data. */ if (udpcksum && uh->uh_sum) { memset(((struct ipovly *)ip)->ih_x1, 0, 9); ((struct ipovly *)ip)->ih_len = uh->uh_ulen; #if 0 /* keep uh_sum for ICMP reply */ uh->uh_sum = cksum(m, len + sizeof (struct ip)); if (uh->uh_sum) { #endif if (cksum(m, len + iphlen)) { udpstat.udps_badsum++; Log3(("NAT: IP(id: %hd) has bad (udp) cksum\n", ip->ip_id)); goto bad_free_mbuf; } } #if 0 } #endif /* * handle DHCP/BOOTP */ if (uh->uh_dport == RT_H2N_U16_C(BOOTP_SERVER)) { bootp_input(pData, m); goto done_free_mbuf; } LogFunc(("uh src: %RTnaipv4:%d, dst: %RTnaipv4:%d\n", ip->ip_src.s_addr, RT_N2H_U16(uh->uh_sport), ip->ip_dst.s_addr, RT_N2H_U16(uh->uh_dport))); /* * handle DNS host resolver without creating a socket */ if ( pData->fUseHostResolver && uh->uh_dport == RT_H2N_U16_C(53) && CTL_CHECK(ip->ip_dst.s_addr, CTL_DNS)) { struct sockaddr_in dst, src; src.sin_addr.s_addr = ip->ip_dst.s_addr; src.sin_port = uh->uh_dport; dst.sin_addr.s_addr = ip->ip_src.s_addr; dst.sin_port = uh->uh_sport; m_adj(m, sizeof(struct udpiphdr)); m = hostresolver(pData, m, ip->ip_src.s_addr, uh->uh_sport); if (m == NULL) goto done_free_mbuf; slirpMbufTagService(pData, m, CTL_DNS); udp_output2(pData, NULL, m, &src, &dst, IPTOS_LOWDELAY); LogFlowFuncLeave(); return; } /* * handle TFTP */ if ( uh->uh_dport == RT_H2N_U16_C(TFTP_SERVER) && CTL_CHECK(ip->ip_dst.s_addr, CTL_TFTP)) { if (pData->pvTftpSessions) slirpTftpInput(pData, m); goto done_free_mbuf; } /* * XXX: DNS proxy currently relies on the fact that each socket * only serves one request. */ if ( pData->fUseDnsProxy && CTL_CHECK(ip->ip_dst.s_addr, CTL_DNS) && (uh->uh_dport == RT_H2N_U16_C(53))) { so = NULL; goto new_socket; } /* * Locate pcb for datagram. */ so = udp_last_so; if ( so->so_lport != uh->uh_sport || so->so_laddr.s_addr != ip->ip_src.s_addr) { struct socket *tmp; for (tmp = udb.so_next; tmp != &udb; tmp = tmp->so_next) { if ( tmp->so_lport == uh->uh_sport && tmp->so_laddr.s_addr == ip->ip_src.s_addr) { so = tmp; break; } } if (tmp == &udb) so = NULL; else { udpstat.udpps_pcbcachemiss++; udp_last_so = so; } } new_socket: if (so == NULL) { /* * If there's no socket for this packet, * create one */ if ((so = socreate()) == NULL) { Log2(("NAT: IP(id: %hd) failed to create socket\n", ip->ip_id)); goto bad_free_mbuf; } /* * Setup fields */ so->so_laddr = ip->ip_src; so->so_lport = uh->uh_sport; so->so_iptos = ip->ip_tos; if (udp_attach(pData, so) <= 0) { Log2(("NAT: IP(id: %hd) udp_attach errno = %d (%s)\n", ip->ip_id, errno, strerror(errno))); sofree(pData, so); goto bad_free_mbuf; } /* udp_last_so = so; */ /* * XXXXX Here, check if it's in udpexec_list, * and if it is, do the fork_exec() etc. */ } so->so_faddr = ip->ip_dst; /* XXX */ so->so_fport = uh->uh_dport; /* XXX */ Assert(so->so_type == IPPROTO_UDP); /* * DNS proxy */ if ( pData->fUseDnsProxy && CTL_CHECK(ip->ip_dst.s_addr, CTL_DNS) && (uh->uh_dport == RT_H2N_U16_C(53))) { dnsproxy_query(pData, so, m, iphlen); goto done_free_mbuf; } iphlen += sizeof(struct udphdr); m->m_len -= iphlen; m->m_data += iphlen; ttl = ip->ip_ttl = save_ip.ip_ttl; if (ttl != so->so_sottl) { ret = setsockopt(so->s, IPPROTO_IP, IP_TTL, (char *)&ttl, sizeof(ttl)); if (RT_LIKELY(ret == 0)) so->so_sottl = ttl; } tos = save_ip.ip_tos; if (tos != so->so_sotos) { ret = setsockopt(so->s, IPPROTO_IP, IP_TOS, (char *)&tos, sizeof(tos)); if (RT_LIKELY(ret == 0)) so->so_sotos = tos; } { /* * Different OSes have different socket options for DF. We * can't use IP_HDRINCL here as it's only valid for SOCK_RAW. */ # define USE_DF_OPTION(_Optname) \ const int dfopt = _Optname #if defined(IP_MTU_DISCOVER) USE_DF_OPTION(IP_MTU_DISCOVER); #elif defined(IP_DONTFRAG) /* Solaris 11+, FreeBSD */ USE_DF_OPTION(IP_DONTFRAG); #elif defined(IP_DONTFRAGMENT) /* Windows */ USE_DF_OPTION(IP_DONTFRAGMENT); #else USE_DF_OPTION(0); #endif if (dfopt) { int df = (save_ip.ip_off & IP_DF) != 0; #if defined(IP_MTU_DISCOVER) df = df ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT; #endif if (df != so->so_sodf) { ret = setsockopt(so->s, IPPROTO_IP, dfopt, (char *)&df, sizeof(df)); if (RT_LIKELY(ret == 0)) so->so_sodf = df; } } } if ( sosendto(pData, so, m) == -1 && ( !soIgnorableErrorCode(errno) && errno != ENOTCONN)) { m->m_len += iphlen; m->m_data -= iphlen; *ip = save_ip; Log2(("NAT: UDP tx errno = %d (%s) on sent to %RTnaipv4\n", errno, strerror(errno), ip->ip_dst)); icmp_error(pData, m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); so->so_m = NULL; LogFlowFuncLeave(); return; } if (so->so_m) m_freem(pData, so->so_m); /* used for ICMP if error on sorecvfrom */ /* restore the orig mbuf packet */ m->m_len += iphlen; m->m_data -= iphlen; *ip = save_ip; so->so_m = m; /* ICMP backup */ LogFlowFuncLeave(); return; bad_free_mbuf: Log2(("NAT: UDP(id: %hd) datagram to %RTnaipv4 with size(%d) claimed as bad\n", ip->ip_id, &ip->ip_dst, ip->ip_len)); done_free_mbuf: /* some services like bootp(built-in), dns(buildt-in) and dhcp don't need sockets * and create new m'buffers to send them to guest, so we'll free their incomming * buffers here. */ if (m != NULL) m_freem(pData, m); LogFlowFuncLeave(); return; }
int udp_input(struct mbuf **mp, int *offp, int proto) { struct sockaddr_in udp_in = { sizeof udp_in, AF_INET }; int iphlen; struct ip *ip; struct udphdr *uh; struct inpcb *inp; struct mbuf *m; struct mbuf *opts = NULL; int len, off; struct ip save_ip; struct inpcbinfo *pcbinfo = &udbinfo[mycpuid]; off = *offp; m = *mp; *mp = NULL; iphlen = off; udp_stat.udps_ipackets++; /* * Strip IP options, if any; should skip this, * make available to user, and use on returned packets, * but we don't yet have a way to check the checksum * with options still present. */ if (iphlen > sizeof(struct ip)) { ip_stripoptions(m); iphlen = sizeof(struct ip); } /* * IP and UDP headers are together in first mbuf. * Already checked and pulled up in ip_demux(). */ KASSERT(m->m_len >= iphlen + sizeof(struct udphdr), ("UDP header not in one mbuf")); ip = mtod(m, struct ip *); uh = (struct udphdr *)((caddr_t)ip + iphlen); /* destination port of 0 is illegal, based on RFC768. */ if (uh->uh_dport == 0) goto bad; /* * Make mbuf data length reflect UDP length. * If not enough data to reflect UDP length, drop. */ len = ntohs((u_short)uh->uh_ulen); if (ip->ip_len != len) { if (len > ip->ip_len || len < sizeof(struct udphdr)) { udp_stat.udps_badlen++; goto bad; } m_adj(m, len - ip->ip_len); /* ip->ip_len = len; */ } /* * Save a copy of the IP header in case we want restore it * for sending an ICMP error message in response. */ save_ip = *ip; /* * Checksum extended UDP header and data. */ if (uh->uh_sum) { if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) uh->uh_sum = m->m_pkthdr.csum_data; else uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl((u_short)len + m->m_pkthdr.csum_data + IPPROTO_UDP)); uh->uh_sum ^= 0xffff; } else { char b[9]; bcopy(((struct ipovly *)ip)->ih_x1, b, 9); bzero(((struct ipovly *)ip)->ih_x1, 9); ((struct ipovly *)ip)->ih_len = uh->uh_ulen; uh->uh_sum = in_cksum(m, len + sizeof(struct ip)); bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); } if (uh->uh_sum) { udp_stat.udps_badsum++; m_freem(m); return(IPPROTO_DONE); } } else udp_stat.udps_nosum++; if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { struct inpcbhead *connhead; struct inpcontainer *ic, *ic_marker; struct inpcontainerhead *ichead; struct udp_mcast_arg arg; struct inpcb *last; int error; /* * Deliver a multicast or broadcast datagram to *all* sockets * for which the local and remote addresses and ports match * those of the incoming datagram. This allows more than * one process to receive multi/broadcasts on the same port. * (This really ought to be done for unicast datagrams as * well, but that would cause problems with existing * applications that open both address-specific sockets and * a wildcard socket listening to the same port -- they would * end up receiving duplicates of every unicast datagram. * Those applications open the multiple sockets to overcome an * inadequacy of the UDP socket interface, but for backwards * compatibility we avoid the problem here rather than * fixing the interface. Maybe 4.5BSD will remedy this?) */ /* * Construct sockaddr format source address. */ udp_in.sin_port = uh->uh_sport; udp_in.sin_addr = ip->ip_src; arg.udp_in = &udp_in; /* * Locate pcb(s) for datagram. * (Algorithm copied from raw_intr().) */ last = NULL; arg.iphlen = iphlen; connhead = &pcbinfo->hashbase[ INP_PCBCONNHASH(ip->ip_src.s_addr, uh->uh_sport, ip->ip_dst.s_addr, uh->uh_dport, pcbinfo->hashmask)]; LIST_FOREACH(inp, connhead, inp_hash) { #ifdef INET6 if (!INP_ISIPV4(inp)) continue; #endif if (!in_hosteq(inp->inp_faddr, ip->ip_src) || !in_hosteq(inp->inp_laddr, ip->ip_dst) || inp->inp_fport != uh->uh_sport || inp->inp_lport != uh->uh_dport) continue; arg.inp = inp; arg.last = last; arg.ip = ip; arg.m = m; error = udp_mcast_input(&arg); if (error == ERESTART) continue; last = arg.last; if (error == EJUSTRETURN) goto done; } ichead = &pcbinfo->wildcardhashbase[ INP_PCBWILDCARDHASH(uh->uh_dport, pcbinfo->wildcardhashmask)]; ic_marker = in_pcbcontainer_marker(mycpuid); GET_PCBINFO_TOKEN(pcbinfo); LIST_INSERT_HEAD(ichead, ic_marker, ic_list); while ((ic = LIST_NEXT(ic_marker, ic_list)) != NULL) { LIST_REMOVE(ic_marker, ic_list); LIST_INSERT_AFTER(ic, ic_marker, ic_list); inp = ic->ic_inp; if (inp->inp_flags & INP_PLACEMARKER) continue; #ifdef INET6 if (!INP_ISIPV4(inp)) continue; #endif if (inp->inp_lport != uh->uh_dport) continue; if (inp->inp_laddr.s_addr != INADDR_ANY && inp->inp_laddr.s_addr != ip->ip_dst.s_addr) continue; arg.inp = inp; arg.last = last; arg.ip = ip; arg.m = m; error = udp_mcast_input(&arg); if (error == ERESTART) continue; last = arg.last; if (error == EJUSTRETURN) break; } LIST_REMOVE(ic_marker, ic_list); REL_PCBINFO_TOKEN(pcbinfo); done: if (last == NULL) { /* * No matching pcb found; discard datagram. * (No need to send an ICMP Port Unreachable * for a broadcast or multicast datgram.) */ udp_stat.udps_noportbcast++; goto bad; } #ifdef IPSEC /* check AH/ESP integrity. */ if (ipsec4_in_reject_so(m, last->inp_socket)) { ipsecstat.in_polvio++; goto bad; } #endif /*IPSEC*/ #ifdef FAST_IPSEC /* check AH/ESP integrity. */ if (ipsec4_in_reject(m, last)) goto bad; #endif /*FAST_IPSEC*/ udp_append(last, ip, m, iphlen + sizeof(struct udphdr), &udp_in); return(IPPROTO_DONE); } /* * Locate pcb for datagram. */ inp = in_pcblookup_pkthash(pcbinfo, ip->ip_src, uh->uh_sport, ip->ip_dst, uh->uh_dport, TRUE, m->m_pkthdr.rcvif, udp_reuseport_ext ? m : NULL); if (inp == NULL) { if (log_in_vain) { char buf[sizeof "aaa.bbb.ccc.ddd"]; strcpy(buf, inet_ntoa(ip->ip_dst)); log(LOG_INFO, "Connection attempt to UDP %s:%d from %s:%d\n", buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), ntohs(uh->uh_sport)); } udp_stat.udps_noport++; if (m->m_flags & (M_BCAST | M_MCAST)) { udp_stat.udps_noportbcast++; goto bad; } if (blackhole) goto bad; #ifdef ICMP_BANDLIM if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) goto bad; #endif *ip = save_ip; ip->ip_len += iphlen; icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); return(IPPROTO_DONE); } KASSERT(INP_ISIPV4(inp), ("not inet inpcb")); #ifdef IPSEC if (ipsec4_in_reject_so(m, inp->inp_socket)) { ipsecstat.in_polvio++; goto bad; } #endif /*IPSEC*/ #ifdef FAST_IPSEC if (ipsec4_in_reject(m, inp)) goto bad; #endif /*FAST_IPSEC*/ /* * Check the minimum TTL for socket. */ if (ip->ip_ttl < inp->inp_ip_minttl) goto bad; /* * Construct sockaddr format source address. * Stuff source address and datagram in user buffer. */ udp_in.sin_port = uh->uh_sport; udp_in.sin_addr = ip->ip_src; if ((inp->inp_flags & INP_CONTROLOPTS) || (inp->inp_socket->so_options & SO_TIMESTAMP)) ip_savecontrol(inp, &opts, ip, m); m_adj(m, iphlen + sizeof(struct udphdr)); lwkt_gettoken(&inp->inp_socket->so_rcv.ssb_token); if (ssb_appendaddr(&inp->inp_socket->so_rcv, (struct sockaddr *)&udp_in, m, opts) == 0) { lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); udp_stat.udps_fullsock++; goto bad; } lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); sorwakeup(inp->inp_socket); return(IPPROTO_DONE); bad: m_freem(m); if (opts) m_freem(opts); return(IPPROTO_DONE); }
/* * Process a received ICMP message. */ void icmp_input(struct mbuf *m, int hlen) { register struct icmp *icp; register struct ip *ip=mtod(m, struct ip *); int icmplen=ip->ip_len; Slirp *slirp = m->slirp; DEBUG_CALL("icmp_input"); DEBUG_ARG("m = %lx", (long )m); DEBUG_ARG("m_len = %d", m->m_len); /* * Locate icmp structure in mbuf, and check * that its not corrupted and of at least minimum length. */ if (icmplen < ICMP_MINLEN) { /* min 8 bytes payload */ freeit: m_free(m); goto end_error; } m->m_len -= hlen; m->m_data += hlen; icp = mtod(m, struct icmp *); if (cksum(m, icmplen)) { goto freeit; } m->m_len += hlen; m->m_data -= hlen; DEBUG_ARG("icmp_type = %d", icp->icmp_type); switch (icp->icmp_type) { case ICMP_ECHO: ip->ip_len += hlen; /* since ip_input subtracts this */ if (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr) { icmp_reflect(m); } else if (slirp->restricted) { goto freeit; } else { struct socket *so; struct sockaddr_in addr; if ((so = socreate(slirp)) == NULL) goto freeit; if (icmp_send(so, m, hlen) == 0) { return; } if(udp_attach(so) == -1) { DEBUG_MISC((dfd,"icmp_input udp_attach errno = %d-%s\n", errno,strerror(errno))); sofree(so); m_free(m); goto end_error; } so->so_m = m; so->so_faddr = ip->ip_dst; so->so_fport = htons(7); so->so_laddr = ip->ip_src; so->so_lport = htons(9); so->so_iptos = ip->ip_tos; so->so_type = IPPROTO_ICMP; so->so_state = SS_ISFCONNECTED; /* Send the packet */ addr.sin_family = AF_INET; if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == slirp->vnetwork_addr.s_addr) { /* It's an alias */ if (so->so_faddr.s_addr == slirp->vnameserver_addr.s_addr) { if (get_dns_addr(&addr.sin_addr) < 0) addr.sin_addr = loopback_addr; } else { addr.sin_addr = loopback_addr; } } else { addr.sin_addr = so->so_faddr; } addr.sin_port = so->so_fport; if(sendto(so->s, icmp_ping_msg, strlen(icmp_ping_msg), 0, (struct sockaddr *)&addr, sizeof(addr)) == -1) { DEBUG_MISC((dfd,"icmp_input udp sendto tx errno = %d-%s\n", errno,strerror(errno))); icmp_error(m, ICMP_UNREACH,ICMP_UNREACH_NET, 0,strerror(errno)); udp_detach(so); } } /* if ip->ip_dst.s_addr == alias_addr.s_addr */ break; case ICMP_UNREACH: /* XXX? report error? close socket? */ case ICMP_TIMXCEED: case ICMP_PARAMPROB: case ICMP_SOURCEQUENCH: case ICMP_TSTAMP: case ICMP_MASKREQ: case ICMP_REDIRECT: m_free(m); break; default: m_free(m); } /* swith */ end_error: /* m is m_free()'d xor put in a socket xor or given to ip_send */ return; }
static int ip_fw_chk(struct ip **pip, int hlen, struct ifnet *oif, int ignport, struct mbuf **m) { struct ip_fw_chain *chain; struct ip_fw *rule = NULL; struct ip *ip = *pip; struct ifnet *const rif = (*m)->m_pkthdr.rcvif; u_short offset = (ip->ip_off & IP_OFFMASK); u_short src_port, dst_port; /* * Go down the chain, looking for enlightment */ for (chain=ip_fw_chain.lh_first; chain; chain = chain->chain.le_next) { register struct ip_fw *const f = chain->rule; /* Check direction inbound */ if (!oif && !(f->fw_flg & IP_FW_F_IN)) continue; /* Check direction outbound */ if (oif && !(f->fw_flg & IP_FW_F_OUT)) continue; /* Fragments */ if ((f->fw_flg & IP_FW_F_FRAG) && !(ip->ip_off & IP_OFFMASK)) continue; /* If src-addr doesn't match, not this rule. */ if (((f->fw_flg & IP_FW_F_INVSRC) != 0) ^ ((ip->ip_src.s_addr & f->fw_smsk.s_addr) != f->fw_src.s_addr)) continue; /* If dest-addr doesn't match, not this rule. */ if (((f->fw_flg & IP_FW_F_INVDST) != 0) ^ ((ip->ip_dst.s_addr & f->fw_dmsk.s_addr) != f->fw_dst.s_addr)) continue; /* Interface check */ if ((f->fw_flg & IF_FW_F_VIAHACK) == IF_FW_F_VIAHACK) { struct ifnet *const iface = oif ? oif : rif; /* Backwards compatibility hack for "via" */ if (!iface || !iface_match(iface, &f->fw_in_if, f->fw_flg & IP_FW_F_OIFNAME)) continue; } else { /* Check receive interface */ if ((f->fw_flg & IP_FW_F_IIFACE) && (!rif || !iface_match(rif, &f->fw_in_if, f->fw_flg & IP_FW_F_IIFNAME))) continue; /* Check outgoing interface */ if ((f->fw_flg & IP_FW_F_OIFACE) && (!oif || !iface_match(oif, &f->fw_out_if, f->fw_flg & IP_FW_F_OIFNAME))) continue; } /* Check IP options */ if (f->fw_ipopt != f->fw_ipnopt && !ipopts_match(ip, f)) continue; /* Check protocol; if wildcard, match */ if (f->fw_prot == IPPROTO_IP) goto got_match; /* If different, don't match */ if (ip->ip_p != f->fw_prot) continue; #define PULLUP_TO(len) do { \ if ((*m)->m_len < (len) \ && (*m = m_pullup(*m, (len))) == 0) { \ goto bogusfrag; \ } \ *pip = ip = mtod(*m, struct ip *); \ offset = (ip->ip_off & IP_OFFMASK); \ } while (0) /* Protocol specific checks */ switch (ip->ip_p) { case IPPROTO_TCP: { struct tcphdr *tcp; if (offset == 1) /* cf. RFC 1858 */ goto bogusfrag; if (offset != 0) { /* * TCP flags and ports aren't available in this * packet -- if this rule specified either one, * we consider the rule a non-match. */ if (f->fw_nports != 0 || f->fw_tcpf != f->fw_tcpnf) continue; break; } PULLUP_TO(hlen + 14); tcp = (struct tcphdr *) ((u_long *)ip + ip->ip_hl); if (f->fw_tcpf != f->fw_tcpnf && !tcpflg_match(tcp, f)) continue; src_port = ntohs(tcp->th_sport); dst_port = ntohs(tcp->th_dport); goto check_ports; } case IPPROTO_UDP: { struct udphdr *udp; if (offset != 0) { /* * Port specification is unavailable -- if this * rule specifies a port, we consider the rule * a non-match. */ if (f->fw_nports != 0) continue; break; } PULLUP_TO(hlen + 4); udp = (struct udphdr *) ((u_long *)ip + ip->ip_hl); src_port = ntohs(udp->uh_sport); dst_port = ntohs(udp->uh_dport); check_ports: if (!port_match(&f->fw_pts[0], IP_FW_GETNSRCP(f), src_port, f->fw_flg & IP_FW_F_SRNG)) continue; if (!port_match(&f->fw_pts[IP_FW_GETNSRCP(f)], IP_FW_GETNDSTP(f), dst_port, f->fw_flg & IP_FW_F_DRNG)) continue; break; } case IPPROTO_ICMP: { struct icmp *icmp; if (offset != 0) /* Type isn't valid */ break; PULLUP_TO(hlen + 2); icmp = (struct icmp *) ((u_long *)ip + ip->ip_hl); if (!icmptype_match(icmp, f)) continue; break; } #undef PULLUP_TO bogusfrag: if (fw_verbose) ipfw_report(NULL, ip, rif, oif); goto dropit; } got_match: /* Ignore divert/tee rule if socket port is "ignport" */ switch (f->fw_flg & IP_FW_F_COMMAND) { case IP_FW_F_DIVERT: case IP_FW_F_TEE: if (f->fw_divert_port == ignport) continue; /* ignore this rule */ break; } /* Update statistics */ f->fw_pcnt += 1; f->fw_bcnt += ip->ip_len; f->timestamp = rtems_bsdnet_seconds_since_boot(); /* Log to console if desired */ if ((f->fw_flg & IP_FW_F_PRN) && fw_verbose) ipfw_report(f, ip, rif, oif); /* Take appropriate action */ switch (f->fw_flg & IP_FW_F_COMMAND) { case IP_FW_F_ACCEPT: return(0); case IP_FW_F_COUNT: continue; case IP_FW_F_DIVERT: return(f->fw_divert_port); case IP_FW_F_TEE: /* * XXX someday tee packet here, but beware that you * can't use m_copym() or m_copypacket() because * the divert input routine modifies the mbuf * (and these routines only increment reference * counts in the case of mbuf clusters), so need * to write custom routine. */ continue; case IP_FW_F_SKIPTO: #ifdef DIAGNOSTIC while (chain->chain.le_next && chain->chain.le_next->rule->fw_number < f->fw_skipto_rule) #else while (chain->chain.le_next->rule->fw_number < f->fw_skipto_rule) #endif chain = chain->chain.le_next; continue; } /* Deny/reject this packet using this rule */ rule = f; break; } #ifdef DIAGNOSTIC /* Rule 65535 should always be there and should always match */ if (!chain) panic("ip_fw: chain"); #endif /* * At this point, we're going to drop the packet. * Send a reject notice if all of the following are true: * * - The packet matched a reject rule * - The packet is not an ICMP packet * - The packet is not a multicast or broadcast packet */ if ((rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_REJECT && ip->ip_p != IPPROTO_ICMP && !((*m)->m_flags & (M_BCAST|M_MCAST)) && !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { switch (rule->fw_reject_code) { case IP_FW_REJECT_RST: { struct tcphdr *const tcp = (struct tcphdr *) ((u_long *)ip + ip->ip_hl); struct tcpiphdr ti, *const tip = (struct tcpiphdr *) ip; if (offset != 0 || (tcp->th_flags & TH_RST)) break; ti.ti_i = *((struct ipovly *) ip); ti.ti_t = *tcp; bcopy(&ti, ip, sizeof(ti)); NTOHL(tip->ti_seq); NTOHL(tip->ti_ack); tip->ti_len = ip->ip_len - hlen - (tip->ti_off << 2); if (tcp->th_flags & TH_ACK) { tcp_respond(NULL, tip, *m, (tcp_seq)0, ntohl(tcp->th_ack), TH_RST); } else { if (tcp->th_flags & TH_SYN) tip->ti_len++; tcp_respond(NULL, tip, *m, tip->ti_seq + tip->ti_len, (tcp_seq)0, TH_RST|TH_ACK); } *m = NULL; break; } default: /* Send an ICMP unreachable using code */ icmp_error(*m, ICMP_UNREACH, rule->fw_reject_code, 0L, 0); *m = NULL; break; } } dropit: /* * Finally, drop the packet. */ if (*m) { m_freem(*m); *m = NULL; } return(0); }
static int ip_fw_chk(struct ip **pip, int hlen, struct ifnet *oif, u_int16_t *cookie, struct mbuf **m, struct ip_fw_chain **flow_id) { struct ip_fw_chain *chain; struct ip_fw *rule = NULL; struct ip *ip = NULL ; struct ifnet *const rif = (*m)->m_pkthdr.rcvif; u_short offset ; u_short src_port, dst_port; #ifdef IPFW_DIVERT_RESTART u_int16_t skipto = *cookie; #else u_int16_t ignport = ntohs(*cookie); #endif if (pip) { /* normal ip packet */ ip = *pip; offset = (ip->ip_off & IP_OFFMASK); } else { /* bridged or non-ip packet */ struct ether_header *eh = mtod(*m, struct ether_header *); switch (ntohs(eh->ether_type)) { case ETHERTYPE_IP : if ((*m)->m_len<sizeof(struct ether_header) + sizeof(struct ip)) goto non_ip ; ip = (struct ip *)(eh + 1 ); if (ip->ip_v != IPVERSION) goto non_ip ; hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) /* minimum header length */ goto non_ip ; if ((*m)->m_len < 14 + hlen + 14) { printf("-- m_len %d, need more...\n", (*m)->m_len); goto non_ip ; } offset = (ip->ip_off & IP_OFFMASK); break ; default : non_ip: ip = NULL ; break ; } } if (*flow_id) { if (fw_one_pass) return 0 ; /* accept if passed first test */ /* * pkt has already been tagged. Look for the next rule * to restart processing */ if ( (chain = (*flow_id)->rule->next_rule_ptr) == NULL ) chain = (*flow_id)->rule->next_rule_ptr = lookup_next_rule(*flow_id) ; if (!chain) goto dropit; } else { chain=LIST_FIRST(&ip_fw_chain); #ifdef IPFW_DIVERT_RESTART if ( skipto ) { /* * If we've been asked to start at a given rule immediatly, * do so. */ if (skipto >= 65535) goto dropit; while (chain && (chain->rule->fw_number <= skipto)) { chain = LIST_NEXT(chain, chain); } if (! chain) goto dropit; } #endif /* IPFW_DIVERT_RESTART */ } *cookie = 0; for (; chain; chain = LIST_NEXT(chain, chain)) { register struct ip_fw *f; again: f = chain->rule; if (oif) { /* Check direction outbound */ if (!(f->fw_flg & IP_FW_F_OUT)) continue; } else { /* Check direction inbound */ if (!(f->fw_flg & IP_FW_F_IN)) continue; } if (ip == NULL ) { /* * do relevant checks for non-ip packets: * after this, only goto got_match or continue */ struct ether_header *eh = mtod(*m, struct ether_header *); int i, h, l ; #if 0 printf("-- ip_fw: rule %d(%d) for %6D <- %6D type 0x%04x\n", f->fw_number, IP_FW_GETNSRCP(f), eh->ether_dhost, ".", eh->ether_shost, ".", ntohs(eh->ether_type) ); #endif /* * make default rule always match or we have a panic */ if (f->fw_number == 65535) goto got_match ; /* * temporary hack: * udp from 0.0.0.0 means this rule applies. * 1 src port is match ether type * 2 src ports (interval) is match ether type * 3 src ports is match ether address */ if (f->fw_src.s_addr != 0 || f->fw_prot != IPPROTO_UDP) continue ; switch (IP_FW_GETNSRCP(f)) { case 1: /* match one type */ if ( /* ( (f->fw_flg & IP_FW_F_INVSRC) != 0) ^ */ ( f->fw_pts[0] == ntohs(eh->ether_type) ) ) { printf("match!\n"); goto got_match ; } default: break ; } continue; } /* Fragments */ if ((f->fw_flg & IP_FW_F_FRAG) && !(ip->ip_off & IP_OFFMASK)) continue; /* If src-addr doesn't match, not this rule. */ if (((f->fw_flg & IP_FW_F_INVSRC) != 0) ^ ((ip->ip_src.s_addr & f->fw_smsk.s_addr) != f->fw_src.s_addr)) continue; /* If dest-addr doesn't match, not this rule. */ if (((f->fw_flg & IP_FW_F_INVDST) != 0) ^ ((ip->ip_dst.s_addr & f->fw_dmsk.s_addr) != f->fw_dst.s_addr)) continue; /* Interface check */ if ((f->fw_flg & IF_FW_F_VIAHACK) == IF_FW_F_VIAHACK) { struct ifnet *const iface = oif ? oif : rif; /* Backwards compatibility hack for "via" */ if (!iface || !iface_match(iface, &f->fw_in_if, f->fw_flg & IP_FW_F_OIFNAME)) continue; } else { /* Check receive interface */ if ((f->fw_flg & IP_FW_F_IIFACE) && (!rif || !iface_match(rif, &f->fw_in_if, f->fw_flg & IP_FW_F_IIFNAME))) continue; /* Check outgoing interface */ if ((f->fw_flg & IP_FW_F_OIFACE) && (!oif || !iface_match(oif, &f->fw_out_if, f->fw_flg & IP_FW_F_OIFNAME))) continue; } /* Check IP options */ if (f->fw_ipopt != f->fw_ipnopt && !ipopts_match(ip, f)) continue; /* Check protocol; if wildcard, match */ if (f->fw_prot == IPPROTO_IP) goto got_match; /* If different, don't match */ if (ip->ip_p != f->fw_prot) continue; #define PULLUP_TO(len) do { \ if ((*m)->m_len < (len) ) { \ if ( (*m = m_pullup(*m, (len))) == 0) \ goto bogusfrag; \ *pip = ip = mtod(*m, struct ip *); \ offset = (ip->ip_off & IP_OFFMASK); \ } \ } while (0) /* Protocol specific checks */ switch (ip->ip_p) { case IPPROTO_TCP: { struct tcphdr *tcp; if (offset == 1) /* cf. RFC 1858 */ goto bogusfrag; if (offset != 0) { /* * TCP flags and ports aren't available in this * packet -- if this rule specified either one, * we consider the rule a non-match. */ if (f->fw_nports != 0 || f->fw_tcpf != f->fw_tcpnf) continue; break; } PULLUP_TO(hlen + 14); tcp = (struct tcphdr *) ((u_long *)ip + ip->ip_hl); if (f->fw_tcpf != f->fw_tcpnf && !tcpflg_match(tcp, f)) continue; src_port = ntohs(tcp->th_sport); dst_port = ntohs(tcp->th_dport); goto check_ports; } case IPPROTO_UDP: { struct udphdr *udp; if (offset != 0) { /* * Port specification is unavailable -- if this * rule specifies a port, we consider the rule * a non-match. */ if (f->fw_nports != 0) continue; break; } PULLUP_TO(hlen + 4); udp = (struct udphdr *) ((u_long *)ip + ip->ip_hl); src_port = ntohs(udp->uh_sport); dst_port = ntohs(udp->uh_dport); check_ports: if (!port_match(&f->fw_pts[0], IP_FW_GETNSRCP(f), src_port, f->fw_flg & IP_FW_F_SRNG)) continue; if (!port_match(&f->fw_pts[IP_FW_GETNSRCP(f)], IP_FW_GETNDSTP(f), dst_port, f->fw_flg & IP_FW_F_DRNG)) continue; break; } case IPPROTO_ICMP: { struct icmp *icmp; if (offset != 0) /* Type isn't valid */ break; PULLUP_TO(hlen + 2); icmp = (struct icmp *) ((u_long *)ip + ip->ip_hl); if (!icmptype_match(icmp, f)) continue; break; } #undef PULLUP_TO bogusfrag: if (fw_verbose) ipfw_report(NULL, ip, rif, oif); goto dropit; } got_match: *flow_id = chain ; /* XXX set flow id */ #ifndef IPFW_DIVERT_RESTART /* Ignore divert/tee rule if socket port is "ignport" */ switch (f->fw_flg & IP_FW_F_COMMAND) { case IP_FW_F_DIVERT: case IP_FW_F_TEE: if (f->fw_divert_port == ignport) continue; /* ignore this rule */ break; } #endif /* IPFW_DIVERT_RESTART */ /* Update statistics */ f->fw_pcnt += 1; /* * note -- bridged-ip packets still have some fields * in network order, including ip_len */ if (ip) { if (pip) f->fw_bcnt += ip->ip_len; else f->fw_bcnt += ntohs(ip->ip_len); } f->timestamp = time.tv_sec; /* Log to console if desired */ if ((f->fw_flg & IP_FW_F_PRN) && fw_verbose) ipfw_report(f, ip, rif, oif); /* Take appropriate action */ switch (f->fw_flg & IP_FW_F_COMMAND) { case IP_FW_F_ACCEPT: return(0); case IP_FW_F_COUNT: continue; #ifdef IPDIVERT case IP_FW_F_DIVERT: #ifdef IPFW_DIVERT_RESTART *cookie = f->fw_number; #else *cookie = htons(f->fw_divert_port); #endif /* IPFW_DIVERT_RESTART */ return(f->fw_divert_port); #endif case IP_FW_F_TEE: /* * XXX someday tee packet here, but beware that you * can't use m_copym() or m_copypacket() because * the divert input routine modifies the mbuf * (and these routines only increment reference * counts in the case of mbuf clusters), so need * to write custom routine. */ continue; case IP_FW_F_SKIPTO: /* XXX check */ if ( f->next_rule_ptr ) chain = f->next_rule_ptr ; else chain = lookup_next_rule(chain) ; if (!chain) goto dropit; goto again ; #ifdef DUMMYNET case IP_FW_F_PIPE: return(f->fw_pipe_nr | 0x10000 ); #endif } /* Deny/reject this packet using this rule */ rule = f; break; } #ifdef DIAGNOSTIC /* Rule 65535 should always be there and should always match */ if (!chain) panic("ip_fw: chain"); #endif /* * At this point, we're going to drop the packet. * Send a reject notice if all of the following are true: * * - The packet matched a reject rule * - The packet is not an ICMP packet, or is an ICMP query packet * - The packet is not a multicast or broadcast packet */ if ((rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_REJECT && ip && (ip->ip_p != IPPROTO_ICMP || is_icmp_query(ip)) && !((*m)->m_flags & (M_BCAST|M_MCAST)) && !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { switch (rule->fw_reject_code) { case IP_FW_REJECT_RST: { struct tcphdr *const tcp = (struct tcphdr *) ((u_long *)ip + ip->ip_hl); struct tcpiphdr ti, *const tip = (struct tcpiphdr *) ip; if (offset != 0 || (tcp->th_flags & TH_RST)) break; ti.ti_i = *((struct ipovly *) ip); ti.ti_t = *tcp; bcopy(&ti, ip, sizeof(ti)); NTOHL(tip->ti_seq); NTOHL(tip->ti_ack); tip->ti_len = ip->ip_len - hlen - (tip->ti_off << 2); if (tcp->th_flags & TH_ACK) { tcp_respond(NULL, tip, *m, (tcp_seq)0, ntohl(tcp->th_ack), TH_RST); } else { if (tcp->th_flags & TH_SYN) tip->ti_len++; tcp_respond(NULL, tip, *m, tip->ti_seq + tip->ti_len, (tcp_seq)0, TH_RST|TH_ACK); } *m = NULL; break; } default: /* Send an ICMP unreachable using code */ icmp_error(*m, ICMP_UNREACH, rule->fw_reject_code, 0L, 0); *m = NULL; break; } } dropit: /* * Finally, drop the packet. */ if (*m) { m_freem(*m); *m = NULL; } return(0); }