/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ static int tcp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum) { struct iphdr *iph = skb->nh.iph; struct tcphdr _tcph, *th; unsigned int tcplen = skb->len - iph->ihl * 4; u_int8_t tcpflags; /* Smaller that minimal TCP header? */ th = skb_header_pointer(skb, iph->ihl * 4, sizeof(_tcph), &_tcph); if (th == NULL) { if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: short packet "); return -NF_ACCEPT; } if(sysctl_spi_enable){ /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: truncated/malformed packet "); printk(KERN_NOTICE "Blocked incoming TCP packet from %u.%u.%u.%u:%hu to %u.%u.%u.%u:%hu with unexpected sequence\n", NIPQUAD(iph->saddr), ntohs(th->source), NIPQUAD(iph->daddr), ntohs(th->dest)); return -NF_ACCEPT; } /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path * because the semantic of CHECKSUM_HW is different there * and moreover root might send raw packets. */ /* FIXME: Source route IP option packets --RR */ if (hooknum == NF_IP_PRE_ROUTING && skb->ip_summed != CHECKSUM_UNNECESSARY && csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP, skb->ip_summed == CHECKSUM_HW ? skb->csum : skb_checksum(skb, iph->ihl*4, tcplen, 0))) { if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: bad TCP checksum "); printk(KERN_NOTICE "Blocked incoming TCP packet from %u.%u.%u.%u:%hu to %u.%u.%u.%u:%hu with unexpected sequence\n", NIPQUAD(iph->saddr), ntohs(th->source), NIPQUAD(iph->daddr), ntohs(th->dest)); return -NF_ACCEPT; } } /* Check TCP flags. */ tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR)); if (!tcp_valid_flags[tcpflags]) { if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: invalid TCP flag combination "); return -NF_ACCEPT; } return NF_ACCEPT; }
/* Small and modified version of icmp_rcv */ static int icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum) { struct icmphdr _ih, *icmph; /* Not enough header? */ icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih); if (icmph == NULL) { if (LOG_INVALID(IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_icmp: short packet "); return -NF_ACCEPT; } /* See ip_conntrack_proto_tcp.c */ if (hooknum != NF_IP_PRE_ROUTING) goto checksum_skipped; switch (skb->ip_summed) { case CHECKSUM_HW: if (!(u16)csum_fold(skb->csum)) break; /* fall through */ case CHECKSUM_NONE: skb->csum = 0; if (__skb_checksum_complete(skb)) { if (LOG_INVALID(IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_icmp: bad ICMP checksum "); return -NF_ACCEPT; } } checksum_skipped: /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently * discarded. */ if (icmph->type > NR_ICMP_TYPES) { if (LOG_INVALID(IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_icmp: invalid ICMP type "); return -NF_ACCEPT; } /* Need to track icmp error message? */ if (icmph->type != ICMP_DEST_UNREACH && icmph->type != ICMP_SOURCE_QUENCH && icmph->type != ICMP_TIME_EXCEEDED && icmph->type != ICMP_PARAMETERPROB && icmph->type != ICMP_REDIRECT) return NF_ACCEPT; return icmp_error_message(skb, ctinfo, hooknum); }
/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ static int tcp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct tcphdr *th; struct tcphdr _tcph; unsigned int tcplen = skb->len - dataoff; u_int8_t tcpflags; /* Smaller that minimal TCP header? */ th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); if (th == NULL) { if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: short packet "); return -NF_ACCEPT; } /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: truncated/malformed packet "); return -NF_ACCEPT; } /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path * because the checksum is assumed to be correct. */ /* FIXME: Source route IP option packets --RR */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: bad TCP checksum "); return -NF_ACCEPT; } /* Check TCP flags. */ tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH)); if (!tcp_valid_flags[tcpflags]) { if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid TCP flag combination "); return -NF_ACCEPT; } return NF_ACCEPT; }
static int udplite_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { unsigned int udplen = skb->len - dataoff; const struct udphdr *hdr; struct udphdr _hdr; unsigned int cscov; /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: short packet "); return -NF_ACCEPT; } cscov = ntohs(hdr->len); if (cscov == 0) cscov = udplen; else if (cscov < sizeof(*hdr) || cscov > udplen) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: invalid checksum coverage "); return -NF_ACCEPT; } /* UDPLITE mandates checksums */ if (!hdr->check) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: checksum missing "); return -NF_ACCEPT; } /* Checksum invalid? Ignore. */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, pf)) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: bad UDPLite checksum "); return -NF_ACCEPT; } return NF_ACCEPT; }
static int udp_error(struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) { unsigned int udplen = skb->len - dataoff; const struct udphdr *hdr; struct udphdr _hdr; /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { if (LOG_INVALID(IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: short packet "); return -NF_ACCEPT; } /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { if (LOG_INVALID(IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: truncated/malformed packet "); return -NF_ACCEPT; } #if defined(CONFIG_BCM_NAT) || defined(CONFIG_BCM_NAT_MODULE) if (nf_conntrack_fastnat) return NF_ACCEPT; #endif /* Packet with no checksum */ if (!hdr->check) return NF_ACCEPT; /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path * because the checksum is assumed to be correct. * FIXME: Source route IP option packets --RR */ if (nf_conntrack_checksum && ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) && nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { if (LOG_INVALID(IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: bad UDP checksum "); return -NF_ACCEPT; } return NF_ACCEPT; }
static int tcp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct tcphdr *th; struct tcphdr _tcph; unsigned int tcplen = skb->len - dataoff; u_int8_t tcpflags; th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); if (th == NULL) { if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: short packet "); return -NF_ACCEPT; } if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: truncated/malformed packet "); return -NF_ACCEPT; } if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: bad TCP checksum "); return -NF_ACCEPT; } tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); if (!tcp_valid_flags[tcpflags]) { if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid TCP flag combination "); return -NF_ACCEPT; } return NF_ACCEPT; }
/* Small and modified version of icmp_rcv */ static int icmp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmphdr *icmph; struct icmphdr _ih; /* Not enough header? */ icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); if (icmph == NULL) { if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: short packet "); return -NF_ACCEPT; } /* See ip_conntrack_proto_tcp.c */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip_checksum(skb, hooknum, dataoff, 0)) { if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: bad HW ICMP checksum "); return -NF_ACCEPT; } /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently * discarded. */ if (icmph->type > NR_ICMP_TYPES) { if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: invalid ICMP type "); return -NF_ACCEPT; } /* Need to track icmp error message? */ if (icmph->type != ICMP_DEST_UNREACH && icmph->type != ICMP_SOURCE_QUENCH && icmph->type != ICMP_TIME_EXCEEDED && icmph->type != ICMP_PARAMETERPROB && icmph->type != ICMP_REDIRECT) return NF_ACCEPT; return icmp_error_message(net, tmpl, skb, ctinfo, hooknum); }
static int icmpv6_error(struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) { const struct icmp6hdr *icmp6h; struct icmp6hdr _ih; icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmp6h == NULL) { if (LOG_INVALID(IPPROTO_ICMPV6)) nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: short packet "); return -NF_ACCEPT; } if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: ICMPv6 checksum failed\n"); return -NF_ACCEPT; } /* is not error message ? */ if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; return icmpv6_error_message(skb, dataoff, ctinfo, hooknum); }
static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { struct net *net = nf_ct_net(ct); struct dccp_hdr _dh, *dh; const char *msg; u_int8_t state; dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh); BUG_ON(dh == NULL); state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE]; switch (state) { default: if (nf_ct_dccp_loose == 0) { msg = "nf_ct_dccp: not picking up existing connection "; goto out_invalid; } case CT_DCCP_REQUEST: break; case CT_DCCP_INVALID: msg = "nf_ct_dccp: invalid state transition "; goto out_invalid; } ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; ct->proto.dccp.state = CT_DCCP_NONE; return true; out_invalid: if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg); return false; }
static int udp_error(struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum, int (*csum)(const struct sk_buff *, unsigned int)) { unsigned int udplen = skb->len - dataoff; struct udphdr _hdr, *hdr; /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { if (LOG_INVALID(IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: short packet "); return -NF_ACCEPT; } /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { if (LOG_INVALID(IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: truncated/malformed packet "); return -NF_ACCEPT; } /* Packet with no checksum */ if (!hdr->check) return NF_ACCEPT; /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path * because the semantic of CHECKSUM_HW is different there * and moreover root might send raw packets. * FIXME: Source route IP option packets --RR */ if (((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) && skb->ip_summed != CHECKSUM_UNNECESSARY && csum(skb, dataoff)) { if (LOG_INVALID(IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: bad UDP checksum "); return -NF_ACCEPT; } return NF_ACCEPT; }
static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u_int8_t pf, unsigned int hooknum) { unsigned int udplen = skb->len - dataoff; const struct udphdr *hdr; struct udphdr _hdr; /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: short packet "); return -NF_ACCEPT; } /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: truncated/malformed packet "); return -NF_ACCEPT; } /* Packet with no checksum */ if (!hdr->check) return NF_ACCEPT; /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path * because the checksum is assumed to be correct. * FIXME: Source route IP option packets --RR */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: bad UDP checksum "); return -NF_ACCEPT; } return NF_ACCEPT; }
static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { unsigned int udplen = skb->len - dataoff; const struct udphdr *hdr; struct udphdr _hdr; /* */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: short packet "); return -NF_ACCEPT; } /* */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: truncated/malformed packet "); return -NF_ACCEPT; } /* */ if (!hdr->check) return NF_ACCEPT; /* */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: bad UDP checksum "); return -NF_ACCEPT; } return NF_ACCEPT; }
static int icmpv6_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmp6hdr *icmp6h; struct icmp6hdr _ih; int type; icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmp6h == NULL) { if (LOG_INVALID(net, IPPROTO_ICMPV6)) nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: short packet "); return -NF_ACCEPT; } if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { if (LOG_INVALID(net, IPPROTO_ICMPV6)) nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: ICMPv6 checksum failed "); return -NF_ACCEPT; } type = icmp6h->icmp6_type - 130; if (type >= 0 && type < sizeof(noct_valid_new) && noct_valid_new[type]) { skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); return NF_ACCEPT; } if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum); }
static int dccp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { struct dccp_hdr _dh, *dh; unsigned int dccp_len = skb->len - dataoff; unsigned int cscov; const char *msg; dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); if (dh == NULL) { msg = "nf_ct_dccp: short packet "; goto out_invalid; } if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) || dh->dccph_doff * 4 > dccp_len) { msg = "nf_ct_dccp: truncated/malformed packet "; goto out_invalid; } cscov = dccp_len; if (dh->dccph_cscov) { cscov = (dh->dccph_cscov - 1) * 4; if (cscov > dccp_len) { msg = "nf_ct_dccp: bad checksum coverage "; goto out_invalid; } } if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP, pf)) { msg = "nf_ct_dccp: bad checksum "; goto out_invalid; } if (dh->dccph_type >= DCCP_PKT_INVALID) { msg = "nf_ct_dccp: reserved packet type "; goto out_invalid; } return NF_ACCEPT; out_invalid: if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg); return -NF_ACCEPT; }
/* Called when a new connection for this protocol found. */ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { static const u_int8_t valid_new[] = { [ICMPV6_ECHO_REQUEST - 128] = 1, [ICMPV6_NI_QUERY - 128] = 1 }; int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128; if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) { /* Can't create a new ICMPv6 `conn' with this. */ pr_debug("icmpv6: can't create new conn with type %u\n", type + 128); nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6)) nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: invalid new with type %d ", type + 128); return false; } return true; }
static bool tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp *state, enum ip_conntrack_dir dir, unsigned int index, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, u_int8_t pf) { struct net *net = nf_ct_net(ct); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; s16 receiver_offset; bool res; seq = ntohl(tcph->seq); ack = sack = ntohl(tcph->ack_seq); win = ntohs(tcph->window); end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) tcp_sack(skb, dataoff, tcph, &sack); receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1); ack -= receiver_offset; sack -= receiver_offset; pr_debug("tcp_in_window: START\n"); pr_debug("tcp_in_window: "); nf_ct_dump_tuple(tuple); pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", seq, ack, receiver_offset, sack, receiver_offset, win, end); pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, sender->td_scale, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); if (sender->td_maxwin == 0) { if (tcph->syn) { sender->td_end = sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, dataoff, tcph, sender); if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) sender->td_scale = receiver->td_scale = 0; if (!tcph->ack) return true; } else { sender->td_end = end; swin = win << sender->td_scale; sender->td_maxwin = (swin == 0 ? 1 : swin); sender->td_maxend = end + sender->td_maxwin; if (receiver->td_maxwin == 0) receiver->td_end = receiver->td_maxend = sack; } } else if (((state->state == TCP_CONNTRACK_SYN_SENT && dir == IP_CT_DIR_ORIGINAL) || (state->state == TCP_CONNTRACK_SYN_RECV && dir == IP_CT_DIR_REPLY)) && after(end, sender->td_end)) { sender->td_end = sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, dataoff, tcph, sender); } if (!(tcph->ack)) { ack = sack = receiver->td_end; } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == (TCP_FLAG_ACK|TCP_FLAG_RST)) && (ack == 0)) { ack = sack = receiver->td_end; } if (seq == end && (!tcph->rst || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) seq = end = sender->td_end; pr_debug("tcp_in_window: "); nf_ct_dump_tuple(tuple); pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", seq, ack, receiver_offset, sack, receiver_offset, win, end); pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, sender->td_scale, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n", before(seq, sender->td_maxend + 1), after(end, sender->td_end - receiver->td_maxwin - 1), before(sack, receiver->td_end + 1), after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); if (before(seq, sender->td_maxend + 1) && after(end, sender->td_end - receiver->td_maxwin - 1) && before(sack, receiver->td_end + 1) && after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { if (!tcph->syn) win <<= sender->td_scale; swin = win + (sack - ack); if (sender->td_maxwin < swin) sender->td_maxwin = swin; if (after(end, sender->td_end)) { sender->td_end = end; sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; } if (tcph->ack) { if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { sender->td_maxack = ack; sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; } else if (after(ack, sender->td_maxack)) sender->td_maxack = ack; } if (receiver->td_maxwin != 0 && after(end, sender->td_maxend)) receiver->td_maxwin += end - sender->td_maxend; if (after(sack + win, receiver->td_maxend - 1)) { receiver->td_maxend = sack + win; if (win == 0) receiver->td_maxend++; } if (ack == receiver->td_end) receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; if (index == TCP_ACK_SET) { if (state->last_dir == dir && state->last_seq == seq && state->last_ack == ack && state->last_end == end && state->last_win == win) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; state->last_win = win; state->retrans = 0; } } res = true; } else { res = false; if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || nf_ct_tcp_be_liberal) res = true; if (!res && LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ? before(sack, receiver->td_end + 1) ? after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" : "ACK is under the lower bound (possible overly delayed ACK)" : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" : "SEQ is over the upper bound (over the window of the receiver)"); } pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u " "receiver end=%u maxend=%u maxwin=%u\n", res, sender->td_end, sender->td_maxend, sender->td_maxwin, receiver->td_end, receiver->td_maxend, receiver->td_maxwin); return res; }
static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, unsigned int hooknum, unsigned int *timeouts) { struct net *net = nf_ct_net(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; u_int8_t type, old_state, new_state; enum ct_dccp_roles role; dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); BUG_ON(dh == NULL); type = dh->dccph_type; if (type == DCCP_PKT_RESET && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { /* Tear down connection immediately if only reply is a RESET */ nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } spin_lock_bh(&ct->lock); role = ct->proto.dccp.role[dir]; old_state = ct->proto.dccp.state; new_state = dccp_state_table[role][type][old_state]; switch (new_state) { case CT_DCCP_REQUEST: if (old_state == CT_DCCP_TIMEWAIT && role == CT_DCCP_ROLE_SERVER) { /* Reincarnation in the reverse direction: reopen and * reverse client/server roles. */ ct->proto.dccp.role[dir] = CT_DCCP_ROLE_CLIENT; ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_SERVER; } break; case CT_DCCP_RESPOND: if (old_state == CT_DCCP_REQUEST) ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); break; case CT_DCCP_PARTOPEN: if (old_state == CT_DCCP_RESPOND && type == DCCP_PKT_ACK && dccp_ack_seq(dh) == ct->proto.dccp.handshake_seq) set_bit(IPS_ASSURED_BIT, &ct->status); break; case CT_DCCP_IGNORE: /* * Connection tracking might be out of sync, so we ignore * packets that might establish a new connection and resync * if the server responds with a valid Response. */ if (ct->proto.dccp.last_dir == !dir && ct->proto.dccp.last_pkt == DCCP_PKT_REQUEST && type == DCCP_PKT_RESPONSE) { ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_CLIENT; ct->proto.dccp.role[dir] = CT_DCCP_ROLE_SERVER; ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); new_state = CT_DCCP_RESPOND; break; } ct->proto.dccp.last_dir = dir; ct->proto.dccp.last_pkt = type; spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid packet ignored "); return NF_ACCEPT; case CT_DCCP_INVALID: spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid state transition "); return -NF_ACCEPT; } ct->proto.dccp.last_dir = dir; ct->proto.dccp.last_pkt = type; ct->proto.dccp.state = new_state; spin_unlock_bh(&ct->lock); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); return NF_ACCEPT; }
/* Returns verdict for packet, or -1 for invalid. */ static int tcp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, unsigned int hooknum) { struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple *tuple; #if defined(CONFIG_MV_ETH_NFP_CT_LEARN) struct nf_conntrack_tuple *tupleInverseDir; #endif enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; const struct tcphdr *th; struct tcphdr _tcph; unsigned long timeout; unsigned int index; th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); BUG_ON(th == NULL); spin_lock_bh(&ct->lock); old_state = ct->proto.tcp.state; dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); new_state = tcp_conntracks[dir][index][old_state]; tuple = &ct->tuplehash[dir].tuple; #if defined(CONFIG_MV_ETH_NFP_CT_LEARN) tupleInverseDir = &ct->tuplehash[!dir].tuple; #endif switch (new_state) { case TCP_CONNTRACK_SYN_SENT: if (old_state < TCP_CONNTRACK_TIME_WAIT) break; /* RFC 1122: "When a connection is closed actively, * it MUST linger in TIME-WAIT state for a time 2xMSL * (Maximum Segment Lifetime). However, it MAY accept * a new SYN from the remote TCP to reopen the connection * directly from TIME-WAIT state, if..." * We ignore the conditions because we are in the * TIME-WAIT state anyway. * * Handle aborted connections: we and the server * think there is an existing connection but the client * aborts it and starts a new one. */ if (((ct->proto.tcp.seen[dir].flags | ct->proto.tcp.seen[!dir].flags) & IP_CT_TCP_FLAG_CLOSE_INIT) || (ct->proto.tcp.last_dir == dir && ct->proto.tcp.last_index == TCP_RST_SET)) { /* Attempt to reopen a closed/aborted connection. * Delete this connection and look up again. */ spin_unlock_bh(&ct->lock); /* Only repeat if we can actually remove the timer. * Destruction may already be in progress in process * context and we must give it a chance to terminate. */ if (nf_ct_kill(ct)) return -NF_REPEAT; return NF_DROP; } /* Fall through */ case TCP_CONNTRACK_IGNORE: /* Ignored packets: * * Our connection entry may be out of sync, so ignore * packets which may signal the real connection between * the client and the server. * * a) SYN in ORIGINAL * b) SYN/ACK in REPLY * c) ACK in reply direction after initial SYN in original. * * If the ignored packet is invalid, the receiver will send * a RST we'll catch below. */ if (index == TCP_SYNACK_SET && ct->proto.tcp.last_index == TCP_SYN_SET && ct->proto.tcp.last_dir != dir && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { /* b) This SYN/ACK acknowledges a SYN that we earlier * ignored as invalid. This means that the client and * the server are both in sync, while the firewall is * not. We kill this session and block the SYN/ACK so * that the client cannot but retransmit its SYN and * thus initiate a clean new session. */ spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: killing out of sync session "); nf_ct_kill(ct); return NF_DROP; } ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; ct->proto.tcp.last_seq = ntohl(th->seq); ct->proto.tcp.last_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid packet ignored "); return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Invalid packet */ pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { /* Invalid RST */ spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid RST "); return -NF_ACCEPT; } if (index == TCP_RST_SET && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_SYN_SET) || (!test_bit(IPS_ASSURED_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { /* RST sent to invalid SYN or ACK we had let through * at a) and c) above: * * a) SYN was in window then * c) we hold a half-open connection. * * Delete our connection entry. * We skip window checking, because packet might ACK * segments we ignored. */ goto in_window; } /* Just fall through */ default: /* Keep compilers happy. */ break; } #if defined(CONFIG_MV_ETH_NFP_CT_LEARN) /* * When connection is handled by NFP, we have to relax TCP tracking * rules as not all packets goes through Linux conntrack. */ if ((tuple->nfp) || (tupleInverseDir->nfp)) goto in_window; #endif /* CONFIG_MV_ETH_NFP_CT_LEARN */ if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, skb, dataoff, th, pf)) { spin_unlock_bh(&ct->lock); return -NF_ACCEPT; } in_window: /* From now on we have got in-window packets */ ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; pr_debug("tcp_conntracks: "); nf_ct_dump_tuple(tuple); pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", (th->syn ? 1 : 0), (th->ack ? 1 : 0), (th->fin ? 1 : 0), (th->rst ? 1 : 0), old_state, new_state); ct->proto.tcp.state = new_state; if (old_state != new_state && new_state == TCP_CONNTRACK_FIN_WAIT) ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans && tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans) timeout = nf_ct_tcp_timeout_max_retrans; else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && tcp_timeouts[new_state] > nf_ct_tcp_timeout_unacknowledged) timeout = nf_ct_tcp_timeout_unacknowledged; else timeout = tcp_timeouts[new_state]; spin_unlock_bh(&ct->lock); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { /* If only reply is a RST, we can consider ourselves not to have an established connection: this is a fairly common problem case, so we can delete the conntrack immediately. --RR */ if (th->rst) { nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) && (old_state == TCP_CONNTRACK_SYN_RECV || old_state == TCP_CONNTRACK_ESTABLISHED) && new_state == TCP_CONNTRACK_ESTABLISHED) { /* Set ASSURED if we see see valid ack in ESTABLISHED after SYN_RECV or a valid answer for a picked up connection. */ set_bit(IPS_ASSURED_BIT, &ct->status); nf_conntrack_event_cache(IPCT_STATUS, ct); } nf_ct_refresh_acct(ct, ctinfo, skb, timeout); return NF_ACCEPT; }
static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, unsigned int hooknum) { struct net *net = nf_ct_net(ct); struct dccp_net *dn; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; u_int8_t type, old_state, new_state; enum ct_dccp_roles role; dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh); BUG_ON(dh == NULL); type = dh->dccph_type; if (type == DCCP_PKT_RESET && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } spin_lock_bh(&ct->lock); role = ct->proto.dccp.role[dir]; old_state = ct->proto.dccp.state; new_state = dccp_state_table[role][type][old_state]; switch (new_state) { case CT_DCCP_REQUEST: if (old_state == CT_DCCP_TIMEWAIT && role == CT_DCCP_ROLE_SERVER) { ct->proto.dccp.role[dir] = CT_DCCP_ROLE_CLIENT; ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_SERVER; } break; case CT_DCCP_RESPOND: if (old_state == CT_DCCP_REQUEST) ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); break; case CT_DCCP_PARTOPEN: if (old_state == CT_DCCP_RESPOND && type == DCCP_PKT_ACK && dccp_ack_seq(dh) == ct->proto.dccp.handshake_seq) set_bit(IPS_ASSURED_BIT, &ct->status); break; case CT_DCCP_IGNORE: if (ct->proto.dccp.last_dir == !dir && ct->proto.dccp.last_pkt == DCCP_PKT_REQUEST && type == DCCP_PKT_RESPONSE) { ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_CLIENT; ct->proto.dccp.role[dir] = CT_DCCP_ROLE_SERVER; ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); new_state = CT_DCCP_RESPOND; break; } ct->proto.dccp.last_dir = dir; ct->proto.dccp.last_pkt = type; spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid packet ignored "); return NF_ACCEPT; case CT_DCCP_INVALID: spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid state transition "); return -NF_ACCEPT; } ct->proto.dccp.last_dir = dir; ct->proto.dccp.last_pkt = type; ct->proto.dccp.state = new_state; spin_unlock_bh(&ct->lock); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); dn = dccp_pernet(net); nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]); return NF_ACCEPT; }
static bool tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp *state, enum ip_conntrack_dir dir, unsigned int index, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, u_int8_t pf) { struct net *net = nf_ct_net(ct); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; s16 receiver_offset; bool res; /* * Get the required data from the packet. */ seq = ntohl(tcph->seq); ack = sack = ntohl(tcph->ack_seq); win = ntohs(tcph->window); end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) tcp_sack(skb, dataoff, tcph, &sack); /* Take into account NAT sequence number mangling */ receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1); ack -= receiver_offset; sack -= receiver_offset; pr_debug("tcp_in_window: START\n"); pr_debug("tcp_in_window: "); nf_ct_dump_tuple(tuple); pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", seq, ack, receiver_offset, sack, receiver_offset, win, end); pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, sender->td_scale, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); if (sender->td_maxwin == 0) { /* * Initialize sender data. */ if (tcph->syn) { /* * SYN-ACK in reply to a SYN * or SYN from reply direction in simultaneous open. */ sender->td_end = sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, dataoff, tcph, sender); /* * RFC 1323: * Both sides must send the Window Scale option * to enable window scaling in either direction. */ if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) sender->td_scale = receiver->td_scale = 0; if (!tcph->ack) /* Simultaneous open */ return true; } else { /* * We are in the middle of a connection, * its history is lost for us. * Let's try to use the data from the packet. */ sender->td_end = end; sender->td_maxwin = (win == 0 ? 1 : win); sender->td_maxend = end + sender->td_maxwin; } } else if (((state->state == TCP_CONNTRACK_SYN_SENT && dir == IP_CT_DIR_ORIGINAL) || (state->state == TCP_CONNTRACK_SYN_RECV && dir == IP_CT_DIR_REPLY)) && after(end, sender->td_end)) { /* * RFC 793: "if a TCP is reinitialized ... then it need * not wait at all; it must only be sure to use sequence * numbers larger than those recently used." */ sender->td_end = sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, dataoff, tcph, sender); } if (!(tcph->ack)) { /* * If there is no ACK, just pretend it was set and OK. */ ack = sack = receiver->td_end; } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == (TCP_FLAG_ACK|TCP_FLAG_RST)) && (ack == 0)) { /* * Broken TCP stacks, that set ACK in RST packets as well * with zero ack value. */ ack = sack = receiver->td_end; } if (seq == end && (!tcph->rst || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) /* * Packets contains no data: we assume it is valid * and check the ack value only. * However RST segments are always validated by their * SEQ number, except when seq == 0 (reset sent answering * SYN. */ seq = end = sender->td_end; pr_debug("tcp_in_window: "); nf_ct_dump_tuple(tuple); pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", seq, ack, receiver_offset, sack, receiver_offset, win, end); pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, sender->td_scale, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n", before(seq, sender->td_maxend + 1), after(end, sender->td_end - receiver->td_maxwin - 1), before(sack, receiver->td_end + 1), after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); if (before(seq, sender->td_maxend + 1) && after(end, sender->td_end - receiver->td_maxwin - 1) && before(sack, receiver->td_end + 1) && after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { /* * Take into account window scaling (RFC 1323). */ if (!tcph->syn) win <<= sender->td_scale; /* * Update sender data. */ swin = win + (sack - ack); if (sender->td_maxwin < swin) sender->td_maxwin = swin; if (after(end, sender->td_end)) { sender->td_end = end; sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; } if (tcph->ack) { if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { sender->td_maxack = ack; sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; } else if (after(ack, sender->td_maxack)) sender->td_maxack = ack; } /* * Update receiver data. */ if (after(end, sender->td_maxend)) receiver->td_maxwin += end - sender->td_maxend; if (after(sack + win, receiver->td_maxend - 1)) { receiver->td_maxend = sack + win; if (win == 0) receiver->td_maxend++; } if (ack == receiver->td_end) receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; /* * Check retransmissions. */ if (index == TCP_ACK_SET) { if (state->last_dir == dir && state->last_seq == seq && state->last_ack == ack && state->last_end == end && state->last_win == win) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; state->last_win = win; state->retrans = 0; } } res = true; } else { res = false; if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || nf_ct_tcp_be_liberal) res = true; if (!res && LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ? before(sack, receiver->td_end + 1) ? after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" : "ACK is under the lower bound (possible overly delayed ACK)" : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" : "SEQ is over the upper bound (over the window of the receiver)"); } pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u " "receiver end=%u maxend=%u maxwin=%u\n", res, sender->td_end, sender->td_maxend, sender->td_maxwin, receiver->td_end, receiver->td_maxend, receiver->td_maxwin); return res; }
static unsigned int ip_nat_fn(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; struct ip_nat_info *info; /* maniptype == SRC for postrouting. */ enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum); /* We never see fragments: conntrack defrags on pre-routing and local-out, and ip_nat_out protects post-routing. */ IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET))); ct = ip_conntrack_get(*pskb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would have dropped it. Hence it's the user's responsibilty to packet filter it out, or implement conntrack/NAT for that protocol. 8) --RR */ if (!ct) { /* Exception: ICMP redirect to new connection (not in hash table yet). We must not let this through, in case we're doing NAT to the same network. */ if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { struct icmphdr _hdr, *hp; hp = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, sizeof(_hdr), &_hdr); if (hp != NULL && hp->type == ICMP_REDIRECT) { if (LOG_INVALID(IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, *pskb, NULL, NULL, NULL, "ip_nat_fn: " "untracked ICMP redirect "); return NF_DROP; } } return NF_ACCEPT; } /* Don't try to NAT if this packet is not conntracked */ if (ct == &ip_conntrack_untracked) return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { if (!ip_nat_icmp_reply_translation(ct, ctinfo, hooknum, pskb)) return NF_DROP; else return NF_ACCEPT; } /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ case IP_CT_NEW: info = &ct->nat.info; /* Seen it before? This can happen for loopback, retrans, or local packets.. */ if (!ip_nat_initialized(ct, maniptype)) { unsigned int ret; if (unlikely(is_confirmed(ct))) /* NAT module was loaded late */ ret = alloc_null_binding_confirmed(ct, info, hooknum); else if (hooknum == NF_IP_LOCAL_IN) /* LOCAL_IN hook doesn't have a chain! */ ret = alloc_null_binding(ct, info, hooknum); else ret = ip_nat_rule_find(pskb, hooknum, in, out, ct, info); if (ret != NF_ACCEPT) { return ret; } } else DEBUGP("Already setup manip %s for ct %p\n", maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", ct); break; default: /* ESTABLISHED */ IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); info = &ct->nat.info; } IP_NF_ASSERT(info); return ip_nat_packet(ct, ctinfo, hooknum, pskb); }
/* Returns verdict for packet, or -1 for invalid. */ static int tcp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, unsigned int hooknum, unsigned int *timeouts) { struct net *net = nf_ct_net(ct); struct nf_tcp_net *tn = tcp_pernet(net); struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; const struct tcphdr *th; struct tcphdr _tcph; unsigned long timeout; unsigned int index; th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); BUG_ON(th == NULL); spin_lock_bh(&ct->lock); old_state = ct->proto.tcp.state; dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); new_state = tcp_conntracks[dir][index][old_state]; tuple = &ct->tuplehash[dir].tuple; switch (new_state) { case TCP_CONNTRACK_SYN_SENT: if (old_state < TCP_CONNTRACK_TIME_WAIT) break; /* RFC 1122: "When a connection is closed actively, * it MUST linger in TIME-WAIT state for a time 2xMSL * (Maximum Segment Lifetime). However, it MAY accept * a new SYN from the remote TCP to reopen the connection * directly from TIME-WAIT state, if..." * We ignore the conditions because we are in the * TIME-WAIT state anyway. * * Handle aborted connections: we and the server * think there is an existing connection but the client * aborts it and starts a new one. */ if (((ct->proto.tcp.seen[dir].flags | ct->proto.tcp.seen[!dir].flags) & IP_CT_TCP_FLAG_CLOSE_INIT) || (ct->proto.tcp.last_dir == dir && ct->proto.tcp.last_index == TCP_RST_SET)) { /* Attempt to reopen a closed/aborted connection. * Delete this connection and look up again. */ spin_unlock_bh(&ct->lock); /* Only repeat if we can actually remove the timer. * Destruction may already be in progress in process * context and we must give it a chance to terminate. */ if (nf_ct_kill(ct)) return -NF_REPEAT; return NF_DROP; } /* Fall through */ case TCP_CONNTRACK_IGNORE: /* Ignored packets: * * Our connection entry may be out of sync, so ignore * packets which may signal the real connection between * the client and the server. * * a) SYN in ORIGINAL * b) SYN/ACK in REPLY * c) ACK in reply direction after initial SYN in original. * * If the ignored packet is invalid, the receiver will send * a RST we'll catch below. */ if (index == TCP_SYNACK_SET && ct->proto.tcp.last_index == TCP_SYN_SET && ct->proto.tcp.last_dir != dir && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { /* b) This SYN/ACK acknowledges a SYN that we earlier * ignored as invalid. This means that the client and * the server are both in sync, while the firewall is * not. We get in sync from the previously annotated * values. */ old_state = TCP_CONNTRACK_SYN_SENT; new_state = TCP_CONNTRACK_SYN_RECV; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end = ct->proto.tcp.last_end; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend = ct->proto.tcp.last_end; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin = ct->proto.tcp.last_win == 0 ? 1 : ct->proto.tcp.last_win; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale = ct->proto.tcp.last_wscale; ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = ct->proto.tcp.last_flags; memset(&ct->proto.tcp.seen[dir], 0, sizeof(struct ip_ct_tcp_state)); break; } ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; ct->proto.tcp.last_seq = ntohl(th->seq); ct->proto.tcp.last_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); ct->proto.tcp.last_win = ntohs(th->window); /* a) This is a SYN in ORIGINAL. The client and the server * may be in sync but we are not. In that case, we annotate * the TCP options and let the packet go through. If it is a * valid SYN packet, the server will reply with a SYN/ACK, and * then we'll get in sync. Otherwise, the server potentially * responds with a challenge ACK if implementing RFC5961. */ if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) { struct ip_ct_tcp_state seen = {}; ct->proto.tcp.last_flags = ct->proto.tcp.last_wscale = 0; tcp_options(skb, dataoff, th, &seen); if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { ct->proto.tcp.last_flags |= IP_CT_TCP_FLAG_WINDOW_SCALE; ct->proto.tcp.last_wscale = seen.td_scale; } if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) { ct->proto.tcp.last_flags |= IP_CT_TCP_FLAG_SACK_PERM; } /* Mark the potential for RFC5961 challenge ACK, * this pose a special problem for LAST_ACK state * as ACK is intrepretated as ACKing last FIN. */ if (old_state == TCP_CONNTRACK_LAST_ACK) ct->proto.tcp.last_flags |= IP_CT_EXP_CHALLENGE_ACK; } spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid packet ignored in " "state %s ", tcp_conntrack_names[old_state]); return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Special case for SYN proxy: when the SYN to the server or * the SYN/ACK from the server is lost, the client may transmit * a keep-alive packet while in SYN_SENT state. This needs to * be associated with the original conntrack entry in order to * generate a new SYN with the correct sequence number. */ if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT && index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL && ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL && ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) { pr_debug("nf_ct_tcp: SYN proxy client keep alive\n"); spin_unlock_bh(&ct->lock); return NF_ACCEPT; } /* Invalid packet */ pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_TIME_WAIT: /* RFC5961 compliance cause stack to send "challenge-ACK" * e.g. in response to spurious SYNs. Conntrack MUST * not believe this ACK is acking last FIN. */ if (old_state == TCP_CONNTRACK_LAST_ACK && index == TCP_ACK_SET && ct->proto.tcp.last_dir != dir && ct->proto.tcp.last_index == TCP_SYN_SET && (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) { /* Detected RFC5961 challenge ACK */ ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: challenge-ACK ignored "); return NF_ACCEPT; /* Don't change state */ } break; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { /* Invalid RST */ spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid RST "); return -NF_ACCEPT; } if (index == TCP_RST_SET && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_SYN_SET) || (!test_bit(IPS_ASSURED_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { /* RST sent to invalid SYN or ACK we had let through * at a) and c) above: * * a) SYN was in window then * c) we hold a half-open connection. * * Delete our connection entry. * We skip window checking, because packet might ACK * segments we ignored. */ goto in_window; } /* Just fall through */ default: /* Keep compilers happy. */ break; } if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, skb, dataoff, th, pf)) { spin_unlock_bh(&ct->lock); return -NF_ACCEPT; } in_window: /* From now on we have got in-window packets */ ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; pr_debug("tcp_conntracks: "); nf_ct_dump_tuple(tuple); pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", (th->syn ? 1 : 0), (th->ack ? 1 : 0), (th->fin ? 1 : 0), (th->rst ? 1 : 0), old_state, new_state); ct->proto.tcp.state = new_state; if (old_state != new_state && new_state == TCP_CONNTRACK_FIN_WAIT) ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; if (ct->proto.tcp.retrans >= tn->tcp_max_retrans && timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) timeout = timeouts[TCP_CONNTRACK_RETRANS]; else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) timeout = timeouts[TCP_CONNTRACK_UNACK]; else timeout = timeouts[new_state]; spin_unlock_bh(&ct->lock); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { /* If only reply is a RST, we can consider ourselves not to have an established connection: this is a fairly common problem case, so we can delete the conntrack immediately. --RR */ if (th->rst) { nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection * pickup with loose=1. Avoid large ESTABLISHED timeout. */ if (new_state == TCP_CONNTRACK_ESTABLISHED && timeout > timeouts[TCP_CONNTRACK_UNACK]) timeout = timeouts[TCP_CONNTRACK_UNACK]; } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) && (old_state == TCP_CONNTRACK_SYN_RECV || old_state == TCP_CONNTRACK_ESTABLISHED) && new_state == TCP_CONNTRACK_ESTABLISHED) { /* Set ASSURED if we see see valid ack in ESTABLISHED after SYN_RECV or a valid answer for a picked up connection. */ set_bit(IPS_ASSURED_BIT, &ct->status); nf_conntrack_event_cache(IPCT_ASSURED, ct); } nf_ct_refresh_acct(ct, ctinfo, skb, timeout); return NF_ACCEPT; }
/* Returns verdict for packet, or -1 for invalid. */ static int tcp_packet(struct ip_conntrack *conntrack, const struct sk_buff *skb, enum ip_conntrack_info ctinfo) { enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; struct iphdr *iph = skb->nh.iph; struct tcphdr *th, _tcph; unsigned long timeout; unsigned int index; th = skb_header_pointer(skb, iph->ihl * 4, sizeof(_tcph), &_tcph); BUG_ON(th == NULL); WRITE_LOCK(&tcp_lock); old_state = conntrack->proto.tcp.state; dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); new_state = tcp_conntracks[dir][index][old_state]; switch (new_state) { case TCP_CONNTRACK_IGNORE: /* Either SYN in ORIGINAL * or SYN/ACK in REPLY * or ACK in REPLY direction (half-open connection). */ if (index == TCP_SYNACK_SET && conntrack->proto.tcp.last_index == TCP_SYN_SET && conntrack->proto.tcp.last_dir != dir && after(ntohl(th->ack_seq), conntrack->proto.tcp.last_seq)) { /* This SYN/ACK acknowledges a SYN that we earlier * ignored as invalid. This means that the client and * the server are both in sync, while the firewall is * not. We kill this session and block the SYN/ACK so * that the client cannot but retransmit its SYN and * thus initiate a clean new session. */ WRITE_UNLOCK(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, "ip_ct_tcp: killing out of sync session "); if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long) conntrack); return -NF_DROP; } conntrack->proto.tcp.last_index = index; conntrack->proto.tcp.last_dir = dir; conntrack->proto.tcp.last_seq = ntohl(th->seq); WRITE_UNLOCK(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, "ip_ct_tcp: invalid packet ignored "); return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Invalid packet */ DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); WRITE_UNLOCK(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, "ip_ct_tcp: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_SYN_SENT: if (old_state >= TCP_CONNTRACK_TIME_WAIT) { /* Attempt to reopen a closed connection. * Delete this connection and look up again. */ WRITE_UNLOCK(&tcp_lock); if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long) conntrack); return -NF_REPEAT; } break; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) && conntrack->proto.tcp.last_index <= TCP_SYNACK_SET) || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) && conntrack->proto.tcp.last_index == TCP_ACK_SET)) && after(ntohl(th->ack_seq), conntrack->proto.tcp.last_seq)) { /* Ignore RST closing down invalid SYN or ACK we had let trough. */ WRITE_UNLOCK(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, "ip_ct_tcp: invalid RST (ignored) "); return NF_ACCEPT; } /* Just fall trough */ default: /* Keep compilers happy. */ break; } if (!tcp_in_window(&conntrack->proto.tcp, dir, &index, skb, iph, th)) { WRITE_UNLOCK(&tcp_lock); return -NF_ACCEPT; } /* From now on we have got in-window packets */ /* If FIN was trimmed off, we don't change state. */ conntrack->proto.tcp.last_index = index; new_state = tcp_conntracks[dir][index][old_state]; DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", NIPQUAD(iph->saddr), ntohs(th->source), NIPQUAD(iph->daddr), ntohs(th->dest), (th->syn ? 1 : 0), (th->ack ? 1 : 0), (th->fin ? 1 : 0), (th->rst ? 1 : 0), old_state, new_state); conntrack->proto.tcp.state = new_state; timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; WRITE_UNLOCK(&tcp_lock); if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { /* If only reply is a RST, we can consider ourselves not to have an established connection: this is a fairly common problem case, so we can delete the conntrack immediately. --RR */ if (th->rst) { if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long) conntrack); return NF_ACCEPT; } } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status) && (old_state == TCP_CONNTRACK_SYN_RECV || old_state == TCP_CONNTRACK_ESTABLISHED) && new_state == TCP_CONNTRACK_ESTABLISHED) { /* Set ASSURED if we see see valid ack in ESTABLISHED after SYN_RECV or a valid answer for a picked up connection. */ set_bit(IPS_ASSURED_BIT, &conntrack->status); } ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout); return NF_ACCEPT; }
static int tcp_in_window(struct ip_ct_tcp *state, enum ip_conntrack_dir dir, unsigned int index, const struct sk_buff *skb, unsigned int dataoff, struct tcphdr *tcph, int pf) { struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; __u32 seq, ack, sack, end, win; #if defined (CONFIG_RA_NAT_NONE) __u32 swin; #endif int res; /* * Get the required data from the packet. */ seq = ntohl(tcph->seq); ack = sack = ntohl(tcph->ack_seq); win = ntohs(tcph->window); end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) tcp_sack(skb, dataoff, tcph, &sack); DEBUGP("tcp_in_window: START\n"); DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " "seq=%u ack=%u sack=%u win=%u end=%u\n", NIPQUAD(iph->saddr), ntohs(tcph->source), NIPQUAD(iph->daddr), ntohs(tcph->dest), seq, ack, sack, win, end); DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, sender->td_scale, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); if (sender->td_end == 0) { /* * Initialize sender data. */ if (tcph->syn && tcph->ack) { /* * Outgoing SYN-ACK in reply to a SYN. */ sender->td_end = sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, dataoff, tcph, sender); /* * RFC 1323: * Both sides must send the Window Scale option * to enable window scaling in either direction. */ if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) sender->td_scale = receiver->td_scale = 0; } else { /* * We are in the middle of a connection, * its history is lost for us. * Let's try to use the data from the packet. */ sender->td_end = end; sender->td_maxwin = (win == 0 ? 1 : win); sender->td_maxend = end + sender->td_maxwin; } } else if (((state->state == TCP_CONNTRACK_SYN_SENT && dir == IP_CT_DIR_ORIGINAL) || (state->state == TCP_CONNTRACK_SYN_RECV && dir == IP_CT_DIR_REPLY)) && after(end, sender->td_end)) { /* * RFC 793: "if a TCP is reinitialized ... then it need * not wait at all; it must only be sure to use sequence * numbers larger than those recently used." */ sender->td_end = sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, dataoff, tcph, sender); } if (!(tcph->ack)) { /* * If there is no ACK, just pretend it was set and OK. */ ack = sack = receiver->td_end; } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == (TCP_FLAG_ACK|TCP_FLAG_RST)) && (ack == 0)) { /* * Broken TCP stacks, that set ACK in RST packets as well * with zero ack value. */ ack = sack = receiver->td_end; } if (seq == end && (!tcph->rst || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) /* * Packets contains no data: we assume it is valid * and check the ack value only. * However RST segments are always validated by their * SEQ number, except when seq == 0 (reset sent answering * SYN. */ seq = end = sender->td_end; DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " "seq=%u ack=%u sack =%u win=%u end=%u\n", NIPQUAD(iph->saddr), ntohs(tcph->source), NIPQUAD(iph->daddr), ntohs(tcph->dest), seq, ack, sack, win, end); DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, sender->td_scale, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n", before(seq, sender->td_maxend + 1), after(end, sender->td_end - receiver->td_maxwin - 1), before(sack, receiver->td_end + 1), after(ack, receiver->td_end - MAXACKWINDOW(sender))); #if defined (CONFIG_RA_NAT_NONE) if (before(seq, sender->td_maxend + 1) && after(end, sender->td_end - receiver->td_maxwin - 1) && before(sack, receiver->td_end + 1) && after(ack, receiver->td_end - MAXACKWINDOW(sender))) { /* * Take into account window scaling (RFC 1323). */ if (!tcph->syn) win <<= sender->td_scale; /* * Update sender data. */ swin = win + (sack - ack); if (sender->td_maxwin < swin) sender->td_maxwin = swin; if (after(end, sender->td_end)) sender->td_end = end; /* * Update receiver data. */ if (after(end, sender->td_maxend)) receiver->td_maxwin += end - sender->td_maxend; if (after(sack + win, receiver->td_maxend - 1)) { receiver->td_maxend = sack + win; if (win == 0) receiver->td_maxend++; } /* * Check retransmissions. */ if (index == TCP_ACK_SET) { if (state->last_dir == dir && state->last_seq == seq && state->last_ack == ack && state->last_end == end && state->last_win == win) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; state->last_win = win; state->retrans = 0; } } res = 1; } else { res = 0; if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || nf_ct_tcp_be_liberal) res = 1; if (!res && LOG_INVALID(IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ? before(sack, receiver->td_end + 1) ? after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" : "ACK is under the lower bound (possible overly delayed ACK)" : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" : "SEQ is over the upper bound (over the window of the receiver)"); } #else res = 1; #endif DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " "receiver end=%u maxend=%u maxwin=%u\n", res, sender->td_end, sender->td_maxend, sender->td_maxwin, receiver->td_end, receiver->td_maxend, receiver->td_maxwin); return res; }
static int tcp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, unsigned int hooknum, unsigned int *timeouts) { struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; const struct tcphdr *th; struct tcphdr _tcph; unsigned long timeout; unsigned int index; th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); BUG_ON(th == NULL); spin_lock_bh(&ct->lock); old_state = ct->proto.tcp.state; dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); new_state = tcp_conntracks[dir][index][old_state]; tuple = &ct->tuplehash[dir].tuple; switch (new_state) { case TCP_CONNTRACK_SYN_SENT: if (old_state < TCP_CONNTRACK_TIME_WAIT) break; if (((ct->proto.tcp.seen[dir].flags | ct->proto.tcp.seen[!dir].flags) & IP_CT_TCP_FLAG_CLOSE_INIT) || (ct->proto.tcp.last_dir == dir && ct->proto.tcp.last_index == TCP_RST_SET)) { spin_unlock_bh(&ct->lock); if (nf_ct_kill(ct)) return -NF_REPEAT; return NF_DROP; } case TCP_CONNTRACK_IGNORE: if (index == TCP_SYNACK_SET && ct->proto.tcp.last_index == TCP_SYN_SET && ct->proto.tcp.last_dir != dir && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { old_state = TCP_CONNTRACK_SYN_SENT; new_state = TCP_CONNTRACK_SYN_RECV; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end = ct->proto.tcp.last_end; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend = ct->proto.tcp.last_end; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin = ct->proto.tcp.last_win == 0 ? 1 : ct->proto.tcp.last_win; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale = ct->proto.tcp.last_wscale; ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = ct->proto.tcp.last_flags; memset(&ct->proto.tcp.seen[dir], 0, sizeof(struct ip_ct_tcp_state)); break; } ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; ct->proto.tcp.last_seq = ntohl(th->seq); ct->proto.tcp.last_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); ct->proto.tcp.last_win = ntohs(th->window); if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) { struct ip_ct_tcp_state seen = {}; ct->proto.tcp.last_flags = ct->proto.tcp.last_wscale = 0; tcp_options(skb, dataoff, th, &seen); if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { ct->proto.tcp.last_flags |= IP_CT_TCP_FLAG_WINDOW_SCALE; ct->proto.tcp.last_wscale = seen.td_scale; } if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) { ct->proto.tcp.last_flags |= IP_CT_TCP_FLAG_SACK_PERM; } } spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid packet ignored "); return NF_ACCEPT; case TCP_CONNTRACK_MAX: pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid RST "); return -NF_ACCEPT; } if (index == TCP_RST_SET && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_SYN_SET) || (!test_bit(IPS_ASSURED_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { goto in_window; } default: break; } if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, skb, dataoff, th, pf)) { spin_unlock_bh(&ct->lock); return -NF_ACCEPT; } in_window: ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; pr_debug("tcp_conntracks: "); nf_ct_dump_tuple(tuple); pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", (th->syn ? 1 : 0), (th->ack ? 1 : 0), (th->fin ? 1 : 0), (th->rst ? 1 : 0), old_state, new_state); ct->proto.tcp.state = new_state; if (old_state != new_state && new_state == TCP_CONNTRACK_FIN_WAIT) ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans && timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) timeout = timeouts[TCP_CONNTRACK_RETRANS]; else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) timeout = timeouts[TCP_CONNTRACK_UNACK]; else timeout = timeouts[new_state]; spin_unlock_bh(&ct->lock); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { if (th->rst) { nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) && (old_state == TCP_CONNTRACK_SYN_RECV || old_state == TCP_CONNTRACK_ESTABLISHED) && new_state == TCP_CONNTRACK_ESTABLISHED) { set_bit(IPS_ASSURED_BIT, &ct->status); nf_conntrack_event_cache(IPCT_ASSURED, ct); } nf_ct_refresh_acct(ct, ctinfo, skb, timeout); return NF_ACCEPT; }
/* Returns verdict for packet, or -1 for invalid. */ static int tcp_packet(struct nf_conn *conntrack, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, int pf, unsigned int hooknum) { enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; struct tcphdr *th, _tcph; unsigned long timeout; unsigned int index; th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); BUG_ON(th == NULL); write_lock_bh(&tcp_lock); old_state = conntrack->proto.tcp.state; dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); new_state = tcp_conntracks[dir][index][old_state]; switch (new_state) { case TCP_CONNTRACK_IGNORE: /* Ignored packets: * * a) SYN in ORIGINAL * b) SYN/ACK in REPLY * c) ACK in reply direction after initial SYN in original. */ if (index == TCP_SYNACK_SET && conntrack->proto.tcp.last_index == TCP_SYN_SET && conntrack->proto.tcp.last_dir != dir && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { /* This SYN/ACK acknowledges a SYN that we earlier * ignored as invalid. This means that the client and * the server are both in sync, while the firewall is * not. We kill this session and block the SYN/ACK so * that the client cannot but retransmit its SYN and * thus initiate a clean new session. */ write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: killing out of sync session "); if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long) conntrack); return -NF_DROP; } conntrack->proto.tcp.last_index = index; conntrack->proto.tcp.last_dir = dir; conntrack->proto.tcp.last_seq = ntohl(th->seq); conntrack->proto.tcp.last_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid packed ignored "); return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Invalid packet */ DEBUGP("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_SYN_SENT: if (old_state < TCP_CONNTRACK_TIME_WAIT) break; if ((conntrack->proto.tcp.seen[dir].flags & IP_CT_TCP_FLAG_CLOSE_INIT) || after(ntohl(th->seq), conntrack->proto.tcp.seen[dir].td_end)) { /* Attempt to reopen a closed connection. * Delete this connection and look up again. */ write_unlock_bh(&tcp_lock); if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long) conntrack); return -NF_REPEAT; } else { write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid SYN"); return -NF_ACCEPT; } case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) && conntrack->proto.tcp.last_index == TCP_SYN_SET) || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) && conntrack->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { /* RST sent to invalid SYN or ACK we had let through * at a) and c) above: * * a) SYN was in window then * c) we hold a half-open connection. * * Delete our connection entry. * We skip window checking, because packet might ACK * segments we ignored. */ goto in_window; } /* Just fall through */ default: /* Keep compilers happy. */ break; } if (!tcp_in_window(&conntrack->proto.tcp, dir, index, skb, dataoff, th, pf)) { write_unlock_bh(&tcp_lock); return -NF_ACCEPT; } in_window: /* From now on we have got in-window packets */ conntrack->proto.tcp.last_index = index; DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", NIPQUAD(iph->saddr), ntohs(th->source), NIPQUAD(iph->daddr), ntohs(th->dest), (th->syn ? 1 : 0), (th->ack ? 1 : 0), (th->fin ? 1 : 0), (th->rst ? 1 : 0), old_state, new_state); conntrack->proto.tcp.state = new_state; if (old_state != new_state && (new_state == TCP_CONNTRACK_FIN_WAIT || new_state == TCP_CONNTRACK_CLOSE)) conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans && *tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans ? nf_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; write_unlock_bh(&tcp_lock); nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, skb); if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { /* If only reply is a RST, we can consider ourselves not to have an established connection: this is a fairly common problem case, so we can delete the conntrack immediately. --RR */ if (th->rst) { if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long) conntrack); return NF_ACCEPT; } } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status) && (old_state == TCP_CONNTRACK_SYN_RECV || old_state == TCP_CONNTRACK_ESTABLISHED) && new_state == TCP_CONNTRACK_ESTABLISHED) { /* Set ASSURED if we see see valid ack in ESTABLISHED after SYN_RECV or a valid answer for a picked up connection. */ set_bit(IPS_ASSURED_BIT, &conntrack->status); nf_conntrack_event_cache(IPCT_STATUS, skb); } nf_ct_refresh_acct(conntrack, ctinfo, skb, timeout); return NF_ACCEPT; }