int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, const struct net_device *in, const struct net_device *out, struct nf_conn *ct) { struct net *net = nf_ct_net(ct); int ret; ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); if (ret == NF_ACCEPT) { if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) /* NUL mapping */ ret = alloc_null_binding(ct, hooknum); } return ret; }
static unsigned int ipv4_confirm(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; const struct nf_conn_help *help; const struct nf_conntrack_helper *helper; unsigned int ret; /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) goto out; help = nfct_help(ct); if (!help) goto out; /* rcu_read_lock()ed by nf_hook_slow */ helper = rcu_dereference(help->helper); if (!helper) goto out; ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), ct, ctinfo); if (ret != NF_ACCEPT) return ret; if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { typeof(nf_nat_seq_adjust_hook) seq_adjust; seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) { NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); return NF_DROP; } } out: /* We've seen it coming out the other side: confirm it */ return nf_conntrack_confirm(skb); }
static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, union nf_inet_addr *addr, __be16 port) { struct net *net = nf_ct_net(ct); struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; memset(&tuple.src.u3, 0, sizeof(tuple.src.u3)); tuple.src.u.tcp.port = 0; memcpy(&tuple.dst.u3, addr, sizeof(tuple.dst.u3)); tuple.dst.u.tcp.port = port; tuple.dst.protonum = IPPROTO_TCP; exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); if (exp && exp->master == ct) return exp; return NULL; }
void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, const struct nf_conn *ct, const char *fmt, ...) { struct va_format vaf; struct net *net; va_list args; net = nf_ct_net(ct); if (likely(net->ct.sysctl_log_invalid == 0)) return; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct), nf_ct_protonum(ct), "%pV", &vaf); va_end(args); }
static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, unsigned int *timeouts) { struct net *net = nf_ct_net(ct); struct dccp_net *dn; struct dccp_hdr _dh, *dh; const char *msg; u_int8_t state; dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); BUG_ON(dh == NULL); state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE]; switch (state) { default: dn = dccp_pernet(net); if (dn->dccp_loose == 0) { msg = "nf_ct_dccp: not picking up existing connection "; goto out_invalid; } case CT_DCCP_REQUEST: break; case CT_DCCP_INVALID: msg = "nf_ct_dccp: invalid state transition "; goto out_invalid; } ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; ct->proto.dccp.state = CT_DCCP_NONE; ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST; ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL; ct->proto.dccp.handshake_seq = 0; return true; out_invalid: if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, "%s", msg); return false; }
static void pptp_expectfn(struct nf_conn *ct, struct nf_conntrack_expect *exp) { struct net *net = nf_ct_net(ct); typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn; pr_debug("increasing timeouts\n"); /* increase timeout of GRE data channel conntrack entry */ ct->proto.gre.timeout = PPTP_GRE_TIMEOUT; ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT; /* Can you see how rusty this code is, compared with the pre-2.6.11 * one? That's what happened to my shiny newnat of 2002 ;( -HW */ rcu_read_lock(); nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn); if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK) nf_nat_pptp_expectfn(ct, exp); else { struct nf_conntrack_tuple inv_t; struct nf_conntrack_expect *exp_other; /* obviously this tuple inversion only works until you do NAT */ nf_ct_invert_tuplepr(&inv_t, &exp->tuple); pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple(&inv_t); exp_other = nf_ct_expect_find_get(net, &inv_t); if (exp_other) { /* delete other expectation. */ pr_debug("found\n"); nf_ct_unexpect_related(exp_other); nf_ct_expect_put(exp_other); } else { pr_debug("not found\n"); } } rcu_read_unlock(); }
/* Called when a new connection for this protocol found. */ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { static const u_int8_t valid_new[] = { [ICMPV6_ECHO_REQUEST - 128] = 1, [ICMPV6_NI_QUERY - 128] = 1 }; int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128; if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) { /* Can't create a new ICMPv6 `conn' with this. */ pr_debug("icmpv6: can't create new conn with type %u\n", type + 128); nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6)) nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: invalid new with type %d ", type + 128); return false; } return true; }
/* Returns verdict for packet, and may modify conntracktype */ int nf_conntrack_udp_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { unsigned int *timeouts; if (udp_error(skb, dataoff, state)) return -NF_ACCEPT; timeouts = nf_ct_timeout_lookup(ct); if (!timeouts) timeouts = udp_get_timeouts(nf_ct_net(ct)); if (!nf_ct_is_confirmed(ct)) ct->proto.udp.stream_ts = 2 * HZ + jiffies; /* If we've seen traffic both ways, this is some kind of UDP * stream. Set Assured. */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { unsigned long extra = timeouts[UDP_CT_UNREPLIED]; /* Still active after two seconds? Extend timeout. */ if (time_after(jiffies, ct->proto.udp.stream_ts)) extra = timeouts[UDP_CT_REPLIED]; nf_ct_refresh_acct(ct, ctinfo, skb, extra); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } else { nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[UDP_CT_UNREPLIED]); } return NF_ACCEPT; }
static unsigned int ipv4_confirm(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED_REPLY) goto out; /* adjust seqs for loopback traffic only in outgoing direction */ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && !nf_is_loopback_packet(skb)) { if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) { NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); return NF_DROP; } } out: /* We've seen it coming out the other side: confirm it */ return nf_conntrack_confirm(skb); }
/* Returns verdict for packet, or -1 for invalid. */ static int icmpv6_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { unsigned int *timeout = nf_ct_timeout_lookup(ct); static const u8 valid_new[] = { [ICMPV6_ECHO_REQUEST - 128] = 1, [ICMPV6_NI_QUERY - 128] = 1 }; if (state->pf != NFPROTO_IPV6) return -NF_ACCEPT; if (!nf_ct_is_confirmed(ct)) { int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128; if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) { /* Can't create a new ICMPv6 `conn' with this. */ pr_debug("icmpv6: can't create new conn with type %u\n", type + 128); nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); return -NF_ACCEPT; } } if (!timeout) timeout = icmpv6_get_timeouts(nf_ct_net(ct)); /* Do not immediately delete the connection after the first successful reply to avoid excessive conntrackd traffic and also to handle correctly ICMP echo reply duplicates. */ nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); return NF_ACCEPT; }
/* timeout GRE data connections */ static void pptp_destroy_siblings(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); const struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_tuple t; nf_ct_gre_keymap_destroy(ct); /* try original (pns->pac) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id; if (!destroy_sibling_or_exp(net, &t)) pr_debug("failed to timeout original pns->pac ct/exp\n"); /* try reply (pac->pns) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id; if (!destroy_sibling_or_exp(net, &t)) pr_debug("failed to timeout reply pac->pns ct/exp\n"); }
static void pptp_nat_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) { struct net *net = nf_ct_net(ct); const struct nf_conn *master = ct->master; struct nf_conntrack_expect *other_exp; struct nf_conntrack_tuple t = {}; const struct nf_ct_pptp_master *ct_pptp_info; const struct nf_nat_pptp *nat_pptp_info; struct nf_nat_range range; ct_pptp_info = nfct_help_data(master); nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; /* And here goes the grand finale of corrosion... */ if (exp->dir == IP_CT_DIR_ORIGINAL) { pr_debug("we are PNS->PAC\n"); /* therefore, build tuple for PAC->PNS */ t.src.l3num = AF_INET; t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip; t.src.u.gre.key = ct_pptp_info->pac_call_id; t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip; t.dst.u.gre.key = ct_pptp_info->pns_call_id; t.dst.protonum = IPPROTO_GRE; } else { pr_debug("we are PAC->PNS\n"); /* build tuple for PNS->PAC */ t.src.l3num = AF_INET; t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip; t.src.u.gre.key = nat_pptp_info->pns_call_id; t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip; t.dst.u.gre.key = nat_pptp_info->pac_call_id; t.dst.protonum = IPPROTO_GRE; } pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple_ip(&t); other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t); if (other_exp) { nf_ct_unexpect_related(other_exp); nf_ct_expect_put(other_exp); pr_debug("success\n"); } else { pr_debug("not found!\n"); } /* This must be a fresh one. */ BUG_ON(ct->status & IPS_NAT_DONE_MASK); /* Change src to where master sends to */ range.flags = NF_NAT_RANGE_MAP_IPS; range.min_addr = range.max_addr = ct->master->tuplehash[!exp->dir].tuple.dst.u3; if (exp->dir == IP_CT_DIR_ORIGINAL) { range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; range.min_proto = range.max_proto = exp->saved_proto; } nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ range.flags = NF_NAT_RANGE_MAP_IPS; range.min_addr = range.max_addr = ct->master->tuplehash[!exp->dir].tuple.src.u3; if (exp->dir == IP_CT_DIR_REPLY) { range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; range.min_proto = range.max_proto = exp->saved_proto; } nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); }
static bool tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp *state, enum ip_conntrack_dir dir, unsigned int index, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, u_int8_t pf) { struct net *net = nf_ct_net(ct); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; s16 receiver_offset; bool res; seq = ntohl(tcph->seq); ack = sack = ntohl(tcph->ack_seq); win = ntohs(tcph->window); end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) tcp_sack(skb, dataoff, tcph, &sack); receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1); ack -= receiver_offset; sack -= receiver_offset; pr_debug("tcp_in_window: START\n"); pr_debug("tcp_in_window: "); nf_ct_dump_tuple(tuple); pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", seq, ack, receiver_offset, sack, receiver_offset, win, end); pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, sender->td_scale, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); if (sender->td_maxwin == 0) { if (tcph->syn) { sender->td_end = sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, dataoff, tcph, sender); if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) sender->td_scale = receiver->td_scale = 0; if (!tcph->ack) return true; } else { sender->td_end = end; swin = win << sender->td_scale; sender->td_maxwin = (swin == 0 ? 1 : swin); sender->td_maxend = end + sender->td_maxwin; if (receiver->td_maxwin == 0) receiver->td_end = receiver->td_maxend = sack; } } else if (((state->state == TCP_CONNTRACK_SYN_SENT && dir == IP_CT_DIR_ORIGINAL) || (state->state == TCP_CONNTRACK_SYN_RECV && dir == IP_CT_DIR_REPLY)) && after(end, sender->td_end)) { sender->td_end = sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, dataoff, tcph, sender); } if (!(tcph->ack)) { ack = sack = receiver->td_end; } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == (TCP_FLAG_ACK|TCP_FLAG_RST)) && (ack == 0)) { ack = sack = receiver->td_end; } if (seq == end && (!tcph->rst || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) seq = end = sender->td_end; pr_debug("tcp_in_window: "); nf_ct_dump_tuple(tuple); pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", seq, ack, receiver_offset, sack, receiver_offset, win, end); pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, sender->td_scale, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n", before(seq, sender->td_maxend + 1), after(end, sender->td_end - receiver->td_maxwin - 1), before(sack, receiver->td_end + 1), after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); if (before(seq, sender->td_maxend + 1) && after(end, sender->td_end - receiver->td_maxwin - 1) && before(sack, receiver->td_end + 1) && after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { if (!tcph->syn) win <<= sender->td_scale; swin = win + (sack - ack); if (sender->td_maxwin < swin) sender->td_maxwin = swin; if (after(end, sender->td_end)) { sender->td_end = end; sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; } if (tcph->ack) { if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { sender->td_maxack = ack; sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; } else if (after(ack, sender->td_maxack)) sender->td_maxack = ack; } if (receiver->td_maxwin != 0 && after(end, sender->td_maxend)) receiver->td_maxwin += end - sender->td_maxend; if (after(sack + win, receiver->td_maxend - 1)) { receiver->td_maxend = sack + win; if (win == 0) receiver->td_maxend++; } if (ack == receiver->td_end) receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; if (index == TCP_ACK_SET) { if (state->last_dir == dir && state->last_seq == seq && state->last_ack == ack && state->last_end == end && state->last_win == win) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; state->last_win = win; state->retrans = 0; } } res = true; } else { res = false; if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || nf_ct_tcp_be_liberal) res = true; if (!res && LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ? before(sack, receiver->td_end + 1) ? after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" : "ACK is under the lower bound (possible overly delayed ACK)" : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" : "SEQ is over the upper bound (over the window of the receiver)"); } pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u " "receiver end=%u maxend=%u maxwin=%u\n", res, sender->td_end, sender->td_maxend, sender->td_maxwin, receiver->td_end, receiver->td_maxend, receiver->td_maxwin); return res; }
static int tcp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, unsigned int hooknum, unsigned int *timeouts) { struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; const struct tcphdr *th; struct tcphdr _tcph; unsigned long timeout; unsigned int index; th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); BUG_ON(th == NULL); spin_lock_bh(&ct->lock); old_state = ct->proto.tcp.state; dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); new_state = tcp_conntracks[dir][index][old_state]; tuple = &ct->tuplehash[dir].tuple; switch (new_state) { case TCP_CONNTRACK_SYN_SENT: if (old_state < TCP_CONNTRACK_TIME_WAIT) break; if (((ct->proto.tcp.seen[dir].flags | ct->proto.tcp.seen[!dir].flags) & IP_CT_TCP_FLAG_CLOSE_INIT) || (ct->proto.tcp.last_dir == dir && ct->proto.tcp.last_index == TCP_RST_SET)) { spin_unlock_bh(&ct->lock); if (nf_ct_kill(ct)) return -NF_REPEAT; return NF_DROP; } case TCP_CONNTRACK_IGNORE: if (index == TCP_SYNACK_SET && ct->proto.tcp.last_index == TCP_SYN_SET && ct->proto.tcp.last_dir != dir && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { old_state = TCP_CONNTRACK_SYN_SENT; new_state = TCP_CONNTRACK_SYN_RECV; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end = ct->proto.tcp.last_end; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend = ct->proto.tcp.last_end; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin = ct->proto.tcp.last_win == 0 ? 1 : ct->proto.tcp.last_win; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale = ct->proto.tcp.last_wscale; ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = ct->proto.tcp.last_flags; memset(&ct->proto.tcp.seen[dir], 0, sizeof(struct ip_ct_tcp_state)); break; } ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; ct->proto.tcp.last_seq = ntohl(th->seq); ct->proto.tcp.last_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); ct->proto.tcp.last_win = ntohs(th->window); if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) { struct ip_ct_tcp_state seen = {}; ct->proto.tcp.last_flags = ct->proto.tcp.last_wscale = 0; tcp_options(skb, dataoff, th, &seen); if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { ct->proto.tcp.last_flags |= IP_CT_TCP_FLAG_WINDOW_SCALE; ct->proto.tcp.last_wscale = seen.td_scale; } if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) { ct->proto.tcp.last_flags |= IP_CT_TCP_FLAG_SACK_PERM; } } spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid packet ignored "); return NF_ACCEPT; case TCP_CONNTRACK_MAX: pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid RST "); return -NF_ACCEPT; } if (index == TCP_RST_SET && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_SYN_SET) || (!test_bit(IPS_ASSURED_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { goto in_window; } default: break; } if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, skb, dataoff, th, pf)) { spin_unlock_bh(&ct->lock); return -NF_ACCEPT; } in_window: ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; pr_debug("tcp_conntracks: "); nf_ct_dump_tuple(tuple); pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", (th->syn ? 1 : 0), (th->ack ? 1 : 0), (th->fin ? 1 : 0), (th->rst ? 1 : 0), old_state, new_state); ct->proto.tcp.state = new_state; if (old_state != new_state && new_state == TCP_CONNTRACK_FIN_WAIT) ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans && timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) timeout = timeouts[TCP_CONNTRACK_RETRANS]; else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) timeout = timeouts[TCP_CONNTRACK_UNACK]; else timeout = timeouts[new_state]; spin_unlock_bh(&ct->lock); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { if (th->rst) { nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) && (old_state == TCP_CONNTRACK_SYN_RECV || old_state == TCP_CONNTRACK_ESTABLISHED) && new_state == TCP_CONNTRACK_ESTABLISHED) { set_bit(IPS_ASSURED_BIT, &ct->status); nf_conntrack_event_cache(IPCT_ASSURED, ct); } nf_ct_refresh_acct(ct, ctinfo, skb, timeout); return NF_ACCEPT; }
static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, unsigned int hooknum) { struct net *net = nf_ct_net(ct); struct dccp_net *dn; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; u_int8_t type, old_state, new_state; enum ct_dccp_roles role; dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh); BUG_ON(dh == NULL); type = dh->dccph_type; if (type == DCCP_PKT_RESET && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } spin_lock_bh(&ct->lock); role = ct->proto.dccp.role[dir]; old_state = ct->proto.dccp.state; new_state = dccp_state_table[role][type][old_state]; switch (new_state) { case CT_DCCP_REQUEST: if (old_state == CT_DCCP_TIMEWAIT && role == CT_DCCP_ROLE_SERVER) { ct->proto.dccp.role[dir] = CT_DCCP_ROLE_CLIENT; ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_SERVER; } break; case CT_DCCP_RESPOND: if (old_state == CT_DCCP_REQUEST) ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); break; case CT_DCCP_PARTOPEN: if (old_state == CT_DCCP_RESPOND && type == DCCP_PKT_ACK && dccp_ack_seq(dh) == ct->proto.dccp.handshake_seq) set_bit(IPS_ASSURED_BIT, &ct->status); break; case CT_DCCP_IGNORE: if (ct->proto.dccp.last_dir == !dir && ct->proto.dccp.last_pkt == DCCP_PKT_REQUEST && type == DCCP_PKT_RESPONSE) { ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_CLIENT; ct->proto.dccp.role[dir] = CT_DCCP_ROLE_SERVER; ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); new_state = CT_DCCP_RESPOND; break; } ct->proto.dccp.last_dir = dir; ct->proto.dccp.last_pkt = type; spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid packet ignored "); return NF_ACCEPT; case CT_DCCP_INVALID: spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid state transition "); return -NF_ACCEPT; } ct->proto.dccp.last_dir = dir; ct->proto.dccp.last_pkt = type; ct->proto.dccp.state = new_state; spin_unlock_bh(&ct->lock); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); dn = dccp_pernet(net); nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]); return NF_ACCEPT; }
static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, union nf_inet_addr *daddr, __be16 port, enum sip_expectation_classes class, unsigned int mediaoff, unsigned int medialen) { struct nf_conntrack_expect *exp, *rtp_exp, *rtcp_exp; enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); struct net *net = nf_ct_net(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); union nf_inet_addr *saddr; struct nf_conntrack_tuple tuple; int direct_rtp = 0, skip_expect = 0, ret = NF_DROP; u_int16_t base_port; __be16 rtp_port, rtcp_port; typeof(nf_nat_sdp_port_hook) nf_nat_sdp_port; typeof(nf_nat_sdp_media_hook) nf_nat_sdp_media; saddr = NULL; if (sip_direct_media) { if (!nf_inet_addr_cmp(daddr, &ct->tuplehash[dir].tuple.src.u3)) return NF_ACCEPT; saddr = &ct->tuplehash[!dir].tuple.src.u3; } /* We need to check whether the registration exists before attempting * to register it since we can see the same media description multiple * times on different connections in case multiple endpoints receive * the same call. *
/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, * we change the source to map into the range. For NF_INET_PRE_ROUTING * and NF_INET_LOCAL_OUT, we change the destination to map into the * range. It might not be possible to get a unique tuple, but we try. * At worst (or if we race), we will end up with a final duplicate in * __ip_conntrack_confirm and drop the packet. */ static void get_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig_tuple, const struct nf_nat_range *range, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { const struct nf_conntrack_zone *zone; const struct nf_nat_l3proto *l3proto; const struct nf_nat_l4proto *l4proto; struct net *net = nf_ct_net(ct); zone = nf_ct_zone(ct); rcu_read_lock(); l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num); l4proto = __nf_nat_l4proto_find(orig_tuple->src.l3num, orig_tuple->dst.protonum); /* 1) If this srcip/proto/src-proto-part is currently mapped, * and that same mapping gives a unique tuple within the given * range, use that. * * This is only required for source (ie. NAT/masq) mappings. * So far, we don't do local source mappings, so multiple * manips not an issue. */ if (maniptype == NF_NAT_MANIP_SRC && !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { /* try the original tuple first */ if (in_range(l3proto, l4proto, orig_tuple, range)) { if (!nf_nat_used_tuple(orig_tuple, ct)) { *tuple = *orig_tuple; goto out; } } else if (find_appropriate_src(net, zone, l3proto, l4proto, orig_tuple, tuple, range)) { pr_debug("get_unique_tuple: Found current src map\n"); if (!nf_nat_used_tuple(tuple, ct)) goto out; } } /* 2) Select the least-used IP/proto combination in the given range */ *tuple = *orig_tuple; find_best_ips_proto(zone, tuple, range, ct, maniptype); /* 3) The per-protocol part of the manip is made to map into * the range to make a unique tuple. */ /* Only bother mapping if it's not already in range and unique */ if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (l4proto->in_range(tuple, maniptype, &range->min_proto, &range->max_proto) && (range->min_proto.all == range->max_proto.all || !nf_nat_used_tuple(tuple, ct))) goto out; } else if (!nf_nat_used_tuple(tuple, ct)) { goto out; } } /* Last change: get protocol to try to obtain unique tuple. */ l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct); out: rcu_read_unlock(); }
static unsigned int conenat_tg(struct sk_buff *skb, const struct xt_target_param *par) { struct net *net; struct nf_conn *ct; struct nf_conn_nat *nat; enum ip_conntrack_info ctinfo; struct nf_nat_range newrange; const struct nf_nat_multi_range_compat *mr; struct rtable *rt; __be32 newsrc; NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); ct = nf_ct_get(skb, &ctinfo); nat = nfct_nat(ct); net = nf_ct_net(ct); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); /* Source address is 0.0.0.0 - locally generated packet that is * probably not supposed to be masqueraded. */ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) return NF_ACCEPT; mr = par->targinfo; rt = skb->rtable; newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE); if (!newsrc) { printk("CONENAT: %s ate my IP address\n", par->out->name); return NF_DROP; } write_lock_bh(&conenat_lock); nat->masq_index = par->out->ifindex; write_unlock_bh(&conenat_lock); if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == IPPROTO_UDP) { unsigned int ret,expectcount = net->ct.expect_count; u_int16_t minport, maxport; u_int16_t newport, tmpport; struct nf_conntrack_expect *exp=NULL; struct nf_conntrack_tuple tuple; struct nf_conn_help *help = nfct_help(ct); /* Choose port */ spin_lock_bh(&nf_conntrack_lock); #if 0 exp = LIST_FIND(&nf_conntrack_expect_list, exp_src_cmp, struct nf_conntrack_expect *, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); #endif memset(&tuple,0,sizeof(tuple)); //src tuple.src.l3num = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num; tuple.src.u3.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip; tuple.src.u.udp.port = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.udp.port; //dst tuple.dst.u3.ip = newsrc; //tuple.dst.u.udp.port = htons(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port); newport = htons(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port); tuple.dst.protonum = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; pr_debug("tupple1 = %pI4:%hu\n", &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip,ntohs(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port)); if(expectcount > 0){ for(tmpport=0; (tmpport<=expectcount)&&(newport<=65535); tmpport++,newport++){ tuple.dst.u.udp.port=newport; exp = __nf_ct_expect_find_bysave(net, &tuple, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); if(exp) break; } } if (exp) { minport = maxport = exp->tuple.dst.u.udp.port; pr_debug("existing mapped port = %hu\n", ntohs(minport)); } else { minport = mr->range[0].min.udp.port == 0 ? ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port : mr->range[0].min.udp.port; maxport = mr->range[0].max.udp.port == 0 ? htons(65535) : mr->range[0].max.udp.port; for (newport = ntohs(minport),tmpport = ntohs(maxport); newport <= tmpport; newport++) { #if 0 exp = LIST_FIND(&ip_conntrack_expect_list, exp_cmp, struct nf_conntrack_expect *, newsrc, htons(newport), ct->tuplehash[IP_CT_DIR_ORIGINAL]. tuple.dst.protonum); #endif //dst tuple.dst.u.udp.port = htons(newport); exp = __nf_ct_expect_find(net, &tuple); if (!exp) { pr_debug("new mapping: %pI4:%hu -> %pI4:%hu\n", &(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip), ntohs(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.udp.port), &newsrc, newport); minport = maxport = htons(newport); break; } } } spin_unlock_bh(&nf_conntrack_lock); newrange.flags = mr->range[0].flags | IP_NAT_RANGE_MAP_IPS |IP_NAT_RANGE_PROTO_SPECIFIED; newrange.min_ip = newrange.max_ip = newsrc; newrange.min.udp.port = minport; newrange.max.udp.port = maxport; /* Set ct helper */ ret = nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_SRC); if (ret == NF_ACCEPT) { rcu_read_lock(); if (help == NULL) { help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help == NULL) { return NF_ACCEPT; } } else { memset(&help->help, 0, sizeof(help->help)); } rcu_assign_pointer(help->helper, &nf_conntrack_helper_cone_nat); rcu_read_unlock(); pr_debug("helper setup, skb=%p\n", skb); } return ret; }
static void pptp_nat_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) { struct net *net = nf_ct_net(ct); const struct nf_conn *master = ct->master; struct nf_conntrack_expect *other_exp; struct nf_conntrack_tuple t; const struct nf_ct_pptp_master *ct_pptp_info; const struct nf_nat_pptp *nat_pptp_info; struct nf_nat_ipv4_range range; ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; if (exp->dir == IP_CT_DIR_ORIGINAL) { pr_debug("we are PNS->PAC\n"); t.src.l3num = AF_INET; t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip; t.src.u.gre.key = ct_pptp_info->pac_call_id; t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip; t.dst.u.gre.key = ct_pptp_info->pns_call_id; t.dst.protonum = IPPROTO_GRE; } else { pr_debug("we are PAC->PNS\n"); t.src.l3num = AF_INET; t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip; t.src.u.gre.key = nat_pptp_info->pns_call_id; t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip; t.dst.u.gre.key = nat_pptp_info->pac_call_id; t.dst.protonum = IPPROTO_GRE; } pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple_ip(&t); other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t); if (other_exp) { nf_ct_unexpect_related(other_exp); nf_ct_expect_put(other_exp); pr_debug("success\n"); } else { pr_debug("not found!\n"); } BUG_ON(ct->status & IPS_NAT_DONE_MASK); range.flags = NF_NAT_RANGE_MAP_IPS; range.min_ip = range.max_ip = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; if (exp->dir == IP_CT_DIR_ORIGINAL) { range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; range.min = range.max = exp->saved_proto; } nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); range.flags = NF_NAT_RANGE_MAP_IPS; range.min_ip = range.max_ip = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; if (exp->dir == IP_CT_DIR_REPLY) { range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; range.min = range.max = exp->saved_proto; } nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); }
/* Returns verdict for packet, or -1 for invalid. */ static int tcp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, unsigned int hooknum) { struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple *tuple; #if defined(CONFIG_MV_ETH_NFP_CT_LEARN) struct nf_conntrack_tuple *tupleInverseDir; #endif enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; const struct tcphdr *th; struct tcphdr _tcph; unsigned long timeout; unsigned int index; th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); BUG_ON(th == NULL); spin_lock_bh(&ct->lock); old_state = ct->proto.tcp.state; dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); new_state = tcp_conntracks[dir][index][old_state]; tuple = &ct->tuplehash[dir].tuple; #if defined(CONFIG_MV_ETH_NFP_CT_LEARN) tupleInverseDir = &ct->tuplehash[!dir].tuple; #endif switch (new_state) { case TCP_CONNTRACK_SYN_SENT: if (old_state < TCP_CONNTRACK_TIME_WAIT) break; /* RFC 1122: "When a connection is closed actively, * it MUST linger in TIME-WAIT state for a time 2xMSL * (Maximum Segment Lifetime). However, it MAY accept * a new SYN from the remote TCP to reopen the connection * directly from TIME-WAIT state, if..." * We ignore the conditions because we are in the * TIME-WAIT state anyway. * * Handle aborted connections: we and the server * think there is an existing connection but the client * aborts it and starts a new one. */ if (((ct->proto.tcp.seen[dir].flags | ct->proto.tcp.seen[!dir].flags) & IP_CT_TCP_FLAG_CLOSE_INIT) || (ct->proto.tcp.last_dir == dir && ct->proto.tcp.last_index == TCP_RST_SET)) { /* Attempt to reopen a closed/aborted connection. * Delete this connection and look up again. */ spin_unlock_bh(&ct->lock); /* Only repeat if we can actually remove the timer. * Destruction may already be in progress in process * context and we must give it a chance to terminate. */ if (nf_ct_kill(ct)) return -NF_REPEAT; return NF_DROP; } /* Fall through */ case TCP_CONNTRACK_IGNORE: /* Ignored packets: * * Our connection entry may be out of sync, so ignore * packets which may signal the real connection between * the client and the server. * * a) SYN in ORIGINAL * b) SYN/ACK in REPLY * c) ACK in reply direction after initial SYN in original. * * If the ignored packet is invalid, the receiver will send * a RST we'll catch below. */ if (index == TCP_SYNACK_SET && ct->proto.tcp.last_index == TCP_SYN_SET && ct->proto.tcp.last_dir != dir && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { /* b) This SYN/ACK acknowledges a SYN that we earlier * ignored as invalid. This means that the client and * the server are both in sync, while the firewall is * not. We kill this session and block the SYN/ACK so * that the client cannot but retransmit its SYN and * thus initiate a clean new session. */ spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: killing out of sync session "); nf_ct_kill(ct); return NF_DROP; } ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; ct->proto.tcp.last_seq = ntohl(th->seq); ct->proto.tcp.last_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid packet ignored "); return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Invalid packet */ pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { /* Invalid RST */ spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid RST "); return -NF_ACCEPT; } if (index == TCP_RST_SET && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_SYN_SET) || (!test_bit(IPS_ASSURED_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { /* RST sent to invalid SYN or ACK we had let through * at a) and c) above: * * a) SYN was in window then * c) we hold a half-open connection. * * Delete our connection entry. * We skip window checking, because packet might ACK * segments we ignored. */ goto in_window; } /* Just fall through */ default: /* Keep compilers happy. */ break; } #if defined(CONFIG_MV_ETH_NFP_CT_LEARN) /* * When connection is handled by NFP, we have to relax TCP tracking * rules as not all packets goes through Linux conntrack. */ if ((tuple->nfp) || (tupleInverseDir->nfp)) goto in_window; #endif /* CONFIG_MV_ETH_NFP_CT_LEARN */ if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, skb, dataoff, th, pf)) { spin_unlock_bh(&ct->lock); return -NF_ACCEPT; } in_window: /* From now on we have got in-window packets */ ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; pr_debug("tcp_conntracks: "); nf_ct_dump_tuple(tuple); pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", (th->syn ? 1 : 0), (th->ack ? 1 : 0), (th->fin ? 1 : 0), (th->rst ? 1 : 0), old_state, new_state); ct->proto.tcp.state = new_state; if (old_state != new_state && new_state == TCP_CONNTRACK_FIN_WAIT) ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans && tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans) timeout = nf_ct_tcp_timeout_max_retrans; else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && tcp_timeouts[new_state] > nf_ct_tcp_timeout_unacknowledged) timeout = nf_ct_tcp_timeout_unacknowledged; else timeout = tcp_timeouts[new_state]; spin_unlock_bh(&ct->lock); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { /* If only reply is a RST, we can consider ourselves not to have an established connection: this is a fairly common problem case, so we can delete the conntrack immediately. --RR */ if (th->rst) { nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) && (old_state == TCP_CONNTRACK_SYN_RECV || old_state == TCP_CONNTRACK_ESTABLISHED) && new_state == TCP_CONNTRACK_ESTABLISHED) { /* Set ASSURED if we see see valid ack in ESTABLISHED after SYN_RECV or a valid answer for a picked up connection. */ set_bit(IPS_ASSURED_BIT, &ct->status); nf_conntrack_event_cache(IPCT_STATUS, ct); } nf_ct_refresh_acct(ct, ctinfo, skb, timeout); return NF_ACCEPT; }
static bool tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp *state, enum ip_conntrack_dir dir, unsigned int index, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, u_int8_t pf) { struct net *net = nf_ct_net(ct); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; s16 receiver_offset; bool res; /* * Get the required data from the packet. */ seq = ntohl(tcph->seq); ack = sack = ntohl(tcph->ack_seq); win = ntohs(tcph->window); end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) tcp_sack(skb, dataoff, tcph, &sack); /* Take into account NAT sequence number mangling */ receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1); ack -= receiver_offset; sack -= receiver_offset; pr_debug("tcp_in_window: START\n"); pr_debug("tcp_in_window: "); nf_ct_dump_tuple(tuple); pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", seq, ack, receiver_offset, sack, receiver_offset, win, end); pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, sender->td_scale, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); if (sender->td_maxwin == 0) { /* * Initialize sender data. */ if (tcph->syn) { /* * SYN-ACK in reply to a SYN * or SYN from reply direction in simultaneous open. */ sender->td_end = sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, dataoff, tcph, sender); /* * RFC 1323: * Both sides must send the Window Scale option * to enable window scaling in either direction. */ if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) sender->td_scale = receiver->td_scale = 0; if (!tcph->ack) /* Simultaneous open */ return true; } else { /* * We are in the middle of a connection, * its history is lost for us. * Let's try to use the data from the packet. */ sender->td_end = end; <<<<<<< HEAD win <<= sender->td_scale; sender->td_maxwin = (win == 0 ? 1 : win); sender->td_maxend = end + sender->td_maxwin; /* * We haven't seen traffic in the other direction yet * but we have to tweak window tracking to pass III * and IV until that happens. */ if (receiver->td_maxwin == 0) receiver->td_end = receiver->td_maxend = sack; ======= sender->td_maxwin = (win == 0 ? 1 : win); sender->td_maxend = end + sender->td_maxwin; >>>>>>> 296c66da8a02d52243f45b80521febece5ed498a } } else if (((state->state == TCP_CONNTRACK_SYN_SENT
static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *th) { enum tcp_conntrack new_state; struct net *net = nf_ct_net(ct); const struct nf_tcp_net *tn = nf_tcp_pernet(net); const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0]; const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1]; /* Don't need lock here: this conntrack not in circulation yet */ new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE]; /* Invalid: delete conntrack */ if (new_state >= TCP_CONNTRACK_MAX) { pr_debug("nf_ct_tcp: invalid new deleting.\n"); return false; } if (new_state == TCP_CONNTRACK_SYN_SENT) { memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); /* SYN packet */ ct->proto.tcp.seen[0].td_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window); if (ct->proto.tcp.seen[0].td_maxwin == 0) ct->proto.tcp.seen[0].td_maxwin = 1; ct->proto.tcp.seen[0].td_maxend = ct->proto.tcp.seen[0].td_end; tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); } else if (tn->tcp_loose == 0) { /* Don't try to pick up connections. */ return false; } else { memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); /* * We are in the middle of a connection, * its history is lost for us. * Let's try to use the data from the packet. */ ct->proto.tcp.seen[0].td_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window); if (ct->proto.tcp.seen[0].td_maxwin == 0) ct->proto.tcp.seen[0].td_maxwin = 1; ct->proto.tcp.seen[0].td_maxend = ct->proto.tcp.seen[0].td_end + ct->proto.tcp.seen[0].td_maxwin; /* We assume SACK and liberal window checking to handle * window scaling */ ct->proto.tcp.seen[0].flags = ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM | IP_CT_TCP_FLAG_BE_LIBERAL; } /* tcp_packet will set them */ ct->proto.tcp.last_index = TCP_NONE_SET; pr_debug("%s: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", __func__, sender->td_end, sender->td_maxend, sender->td_maxwin, sender->td_scale, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); return true; }
/* Returns verdict for packet, or -1 for invalid. */ static int tcp_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { struct net *net = nf_ct_net(ct); struct nf_tcp_net *tn = nf_tcp_pernet(net); struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; unsigned int index, *timeouts; enum ip_conntrack_dir dir; const struct tcphdr *th; struct tcphdr _tcph; unsigned long timeout; th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); if (th == NULL) return -NF_ACCEPT; if (tcp_error(th, skb, dataoff, state)) return -NF_ACCEPT; if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th)) return -NF_ACCEPT; spin_lock_bh(&ct->lock); old_state = ct->proto.tcp.state; dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); new_state = tcp_conntracks[dir][index][old_state]; tuple = &ct->tuplehash[dir].tuple; switch (new_state) { case TCP_CONNTRACK_SYN_SENT: if (old_state < TCP_CONNTRACK_TIME_WAIT) break; /* RFC 1122: "When a connection is closed actively, * it MUST linger in TIME-WAIT state for a time 2xMSL * (Maximum Segment Lifetime). However, it MAY accept * a new SYN from the remote TCP to reopen the connection * directly from TIME-WAIT state, if..." * We ignore the conditions because we are in the * TIME-WAIT state anyway. * * Handle aborted connections: we and the server * think there is an existing connection but the client * aborts it and starts a new one. */ if (((ct->proto.tcp.seen[dir].flags | ct->proto.tcp.seen[!dir].flags) & IP_CT_TCP_FLAG_CLOSE_INIT) || (ct->proto.tcp.last_dir == dir && ct->proto.tcp.last_index == TCP_RST_SET)) { /* Attempt to reopen a closed/aborted connection. * Delete this connection and look up again. */ spin_unlock_bh(&ct->lock); /* Only repeat if we can actually remove the timer. * Destruction may already be in progress in process * context and we must give it a chance to terminate. */ if (nf_ct_kill(ct)) return -NF_REPEAT; return NF_DROP; } /* Fall through */ case TCP_CONNTRACK_IGNORE: /* Ignored packets: * * Our connection entry may be out of sync, so ignore * packets which may signal the real connection between * the client and the server. * * a) SYN in ORIGINAL * b) SYN/ACK in REPLY * c) ACK in reply direction after initial SYN in original. * * If the ignored packet is invalid, the receiver will send * a RST we'll catch below. */ if (index == TCP_SYNACK_SET && ct->proto.tcp.last_index == TCP_SYN_SET && ct->proto.tcp.last_dir != dir && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { /* b) This SYN/ACK acknowledges a SYN that we earlier * ignored as invalid. This means that the client and * the server are both in sync, while the firewall is * not. We get in sync from the previously annotated * values. */ old_state = TCP_CONNTRACK_SYN_SENT; new_state = TCP_CONNTRACK_SYN_RECV; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end = ct->proto.tcp.last_end; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend = ct->proto.tcp.last_end; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin = ct->proto.tcp.last_win == 0 ? 1 : ct->proto.tcp.last_win; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale = ct->proto.tcp.last_wscale; ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = ct->proto.tcp.last_flags; memset(&ct->proto.tcp.seen[dir], 0, sizeof(struct ip_ct_tcp_state)); break; } ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; ct->proto.tcp.last_seq = ntohl(th->seq); ct->proto.tcp.last_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); ct->proto.tcp.last_win = ntohs(th->window); /* a) This is a SYN in ORIGINAL. The client and the server * may be in sync but we are not. In that case, we annotate * the TCP options and let the packet go through. If it is a * valid SYN packet, the server will reply with a SYN/ACK, and * then we'll get in sync. Otherwise, the server potentially * responds with a challenge ACK if implementing RFC5961. */ if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) { struct ip_ct_tcp_state seen = {}; ct->proto.tcp.last_flags = ct->proto.tcp.last_wscale = 0; tcp_options(skb, dataoff, th, &seen); if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { ct->proto.tcp.last_flags |= IP_CT_TCP_FLAG_WINDOW_SCALE; ct->proto.tcp.last_wscale = seen.td_scale; } if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) { ct->proto.tcp.last_flags |= IP_CT_TCP_FLAG_SACK_PERM; } /* Mark the potential for RFC5961 challenge ACK, * this pose a special problem for LAST_ACK state * as ACK is intrepretated as ACKing last FIN. */ if (old_state == TCP_CONNTRACK_LAST_ACK) ct->proto.tcp.last_flags |= IP_CT_EXP_CHALLENGE_ACK; } spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in " "state %s ", tcp_conntrack_names[old_state]); return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Special case for SYN proxy: when the SYN to the server or * the SYN/ACK from the server is lost, the client may transmit * a keep-alive packet while in SYN_SENT state. This needs to * be associated with the original conntrack entry in order to * generate a new SYN with the correct sequence number. */ if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT && index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL && ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL && ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) { pr_debug("nf_ct_tcp: SYN proxy client keep alive\n"); spin_unlock_bh(&ct->lock); return NF_ACCEPT; } /* Invalid packet */ pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, "invalid state"); return -NF_ACCEPT; case TCP_CONNTRACK_TIME_WAIT: /* RFC5961 compliance cause stack to send "challenge-ACK" * e.g. in response to spurious SYNs. Conntrack MUST * not believe this ACK is acking last FIN. */ if (old_state == TCP_CONNTRACK_LAST_ACK && index == TCP_ACK_SET && ct->proto.tcp.last_dir != dir && ct->proto.tcp.last_index == TCP_SYN_SET && (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) { /* Detected RFC5961 challenge ACK */ ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored"); return NF_ACCEPT; /* Don't change state */ } break; case TCP_CONNTRACK_SYN_SENT2: /* tcp_conntracks table is not smart enough to handle * simultaneous open. */ ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN; break; case TCP_CONNTRACK_SYN_RECV: if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET && ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN) new_state = TCP_CONNTRACK_ESTABLISHED; break; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { /* Invalid RST */ spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, "invalid rst"); return -NF_ACCEPT; } if (index == TCP_RST_SET && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_SYN_SET) || (!test_bit(IPS_ASSURED_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { /* RST sent to invalid SYN or ACK we had let through * at a) and c) above: * * a) SYN was in window then * c) we hold a half-open connection. * * Delete our connection entry. * We skip window checking, because packet might ACK * segments we ignored. */ goto in_window; } /* Just fall through */ default: /* Keep compilers happy. */ break; } if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, skb, dataoff, th)) { spin_unlock_bh(&ct->lock); return -NF_ACCEPT; } in_window: /* From now on we have got in-window packets */ ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; pr_debug("tcp_conntracks: "); nf_ct_dump_tuple(tuple); pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", (th->syn ? 1 : 0), (th->ack ? 1 : 0), (th->fin ? 1 : 0), (th->rst ? 1 : 0), old_state, new_state); ct->proto.tcp.state = new_state; if (old_state != new_state && new_state == TCP_CONNTRACK_FIN_WAIT) ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; timeouts = nf_ct_timeout_lookup(ct); if (!timeouts) timeouts = tn->timeouts; if (ct->proto.tcp.retrans >= tn->tcp_max_retrans && timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) timeout = timeouts[TCP_CONNTRACK_RETRANS]; else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) timeout = timeouts[TCP_CONNTRACK_UNACK]; else if (ct->proto.tcp.last_win == 0 && timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) timeout = timeouts[TCP_CONNTRACK_RETRANS]; else timeout = timeouts[new_state]; spin_unlock_bh(&ct->lock); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { /* If only reply is a RST, we can consider ourselves not to have an established connection: this is a fairly common problem case, so we can delete the conntrack immediately. --RR */ if (th->rst) { nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection * pickup with loose=1. Avoid large ESTABLISHED timeout. */ if (new_state == TCP_CONNTRACK_ESTABLISHED && timeout > timeouts[TCP_CONNTRACK_UNACK]) timeout = timeouts[TCP_CONNTRACK_UNACK]; } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) && (old_state == TCP_CONNTRACK_SYN_RECV || old_state == TCP_CONNTRACK_ESTABLISHED) && new_state == TCP_CONNTRACK_ESTABLISHED) { /* Set ASSURED if we see see valid ack in ESTABLISHED after SYN_RECV or a valid answer for a picked up connection. */ set_bit(IPS_ASSURED_BIT, &ct->status); nf_conntrack_event_cache(IPCT_ASSURED, ct); } nf_ct_refresh_acct(ct, ctinfo, skb, timeout); return NF_ACCEPT; }
static bool tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp *state, enum ip_conntrack_dir dir, unsigned int index, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, u_int8_t pf) { struct net *net = nf_ct_net(ct); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; s16 receiver_offset; bool res; /* * Get the required data from the packet. */ seq = ntohl(tcph->seq); ack = sack = ntohl(tcph->ack_seq); win = ntohs(tcph->window); end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) tcp_sack(skb, dataoff, tcph, &sack); /* Take into account NAT sequence number mangling */ receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1); ack -= receiver_offset; sack -= receiver_offset; pr_debug("tcp_in_window: START\n"); pr_debug("tcp_in_window: "); nf_ct_dump_tuple(tuple); pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", seq, ack, receiver_offset, sack, receiver_offset, win, end); pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, sender->td_scale, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); if (sender->td_maxwin == 0) { /* * Initialize sender data. */ if (tcph->syn) { /* * SYN-ACK in reply to a SYN * or SYN from reply direction in simultaneous open. */ sender->td_end = sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, dataoff, tcph, sender); /* * RFC 1323: * Both sides must send the Window Scale option * to enable window scaling in either direction. */ if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) sender->td_scale = receiver->td_scale = 0; if (!tcph->ack) /* Simultaneous open */ return true; } else { /* * We are in the middle of a connection, * its history is lost for us. * Let's try to use the data from the packet. */ sender->td_end = end; sender->td_maxwin = (win == 0 ? 1 : win); sender->td_maxend = end + sender->td_maxwin; } } else if (((state->state == TCP_CONNTRACK_SYN_SENT && dir == IP_CT_DIR_ORIGINAL) || (state->state == TCP_CONNTRACK_SYN_RECV && dir == IP_CT_DIR_REPLY)) && after(end, sender->td_end)) { /* * RFC 793: "if a TCP is reinitialized ... then it need * not wait at all; it must only be sure to use sequence * numbers larger than those recently used." */ sender->td_end = sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, dataoff, tcph, sender); } if (!(tcph->ack)) { /* * If there is no ACK, just pretend it was set and OK. */ ack = sack = receiver->td_end; } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == (TCP_FLAG_ACK|TCP_FLAG_RST)) && (ack == 0)) { /* * Broken TCP stacks, that set ACK in RST packets as well * with zero ack value. */ ack = sack = receiver->td_end; } if (seq == end && (!tcph->rst || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) /* * Packets contains no data: we assume it is valid * and check the ack value only. * However RST segments are always validated by their * SEQ number, except when seq == 0 (reset sent answering * SYN. */ seq = end = sender->td_end; pr_debug("tcp_in_window: "); nf_ct_dump_tuple(tuple); pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", seq, ack, receiver_offset, sack, receiver_offset, win, end); pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, sender->td_scale, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n", before(seq, sender->td_maxend + 1), after(end, sender->td_end - receiver->td_maxwin - 1), before(sack, receiver->td_end + 1), after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); if (before(seq, sender->td_maxend + 1) && after(end, sender->td_end - receiver->td_maxwin - 1) && before(sack, receiver->td_end + 1) && after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { /* * Take into account window scaling (RFC 1323). */ if (!tcph->syn) win <<= sender->td_scale; /* * Update sender data. */ swin = win + (sack - ack); if (sender->td_maxwin < swin) sender->td_maxwin = swin; if (after(end, sender->td_end)) { sender->td_end = end; sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; } if (tcph->ack) { if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { sender->td_maxack = ack; sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; } else if (after(ack, sender->td_maxack)) sender->td_maxack = ack; } /* * Update receiver data. */ if (after(end, sender->td_maxend)) receiver->td_maxwin += end - sender->td_maxend; if (after(sack + win, receiver->td_maxend - 1)) { receiver->td_maxend = sack + win; if (win == 0) receiver->td_maxend++; } if (ack == receiver->td_end) receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; /* * Check retransmissions. */ if (index == TCP_ACK_SET) { if (state->last_dir == dir && state->last_seq == seq && state->last_ack == ack && state->last_end == end && state->last_win == win) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; state->last_win = win; state->retrans = 0; } } res = true; } else { res = false; if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || nf_ct_tcp_be_liberal) res = true; if (!res && LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ? before(sack, receiver->td_end + 1) ? after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" : "ACK is under the lower bound (possible overly delayed ACK)" : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" : "SEQ is over the upper bound (over the window of the receiver)"); } pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u " "receiver end=%u maxend=%u maxwin=%u\n", res, sender->td_end, sender->td_maxend, sender->td_maxwin, receiver->td_end, receiver->td_maxend, receiver->td_maxwin); return res; }
unsigned int rtl_find_appropriate_newrange(struct nf_conn *ct, __be32 newsrc, const struct nf_nat_multi_range_compat *mr) { struct net *net; unsigned int ret,expectcount = net->ct.expect_count; u_int16_t minport, maxport; u_int16_t newport, tmpport; struct nf_conntrack_expect *exp=NULL; struct nf_conntrack_tuple tuple; struct nf_nat_range newrange; struct nf_conn_help *help = nfct_help(ct); net = nf_ct_net(ct); expectcount = net->ct.expect_count; /* Choose port */ spin_lock_bh(&nf_conntrack_lock); #if 0 exp = LIST_FIND(&nf_conntrack_expect_list, exp_src_cmp, struct nf_conntrack_expect *, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); #endif memset(&tuple,0,sizeof(tuple)); //src tuple.src.l3num = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num; tuple.src.u3.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip; tuple.src.u.udp.port = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.udp.port; //dst tuple.dst.u3.ip = newsrc; //tuple.dst.u.udp.port = htons(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port); newport = htons(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port); tuple.dst.protonum = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; pr_debug("tupple1 = %pI4:%hu\n", &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip,ntohs(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port)); if(expectcount > 0){ for(tmpport=0; (tmpport<=expectcount)&&(newport<=65535); tmpport++,newport++){ tuple.dst.u.udp.port=newport; exp = __nf_ct_expect_find_bysave(net, &tuple, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); if(exp) break; } } if (exp) { minport = maxport = exp->tuple.dst.u.udp.port; pr_debug("existing mapped port = %hu\n", ntohs(minport)); } else { minport = mr->range[0].min.udp.port == 0 ? ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port : mr->range[0].min.udp.port; maxport = mr->range[0].max.udp.port == 0 ? htons(65535) : mr->range[0].max.udp.port; for (newport = ntohs(minport),tmpport = ntohs(maxport); newport <= tmpport; newport++) { #if 0 exp = LIST_FIND(&ip_conntrack_expect_list, exp_cmp, struct nf_conntrack_expect *, newsrc, htons(newport), ct->tuplehash[IP_CT_DIR_ORIGINAL]. tuple.dst.protonum); #endif //dst tuple.dst.u.udp.port = htons(newport); exp = __nf_ct_expect_find(net, &tuple); if (!exp) { pr_debug("new mapping: %pI4:%hu -> %pI4:%hu\n", &(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip), ntohs(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.udp.port), &newsrc, newport); minport = maxport = htons(newport); break; } } } spin_unlock_bh(&nf_conntrack_lock); newrange.flags = mr->range[0].flags | IP_NAT_RANGE_MAP_IPS |IP_NAT_RANGE_PROTO_SPECIFIED; newrange.min_ip = newrange.max_ip = newsrc; newrange.min.udp.port = minport; newrange.max.udp.port = maxport; /* Set ct helper */ ret = nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_SRC); if (ret == NF_ACCEPT) { rcu_read_lock(); if (help == NULL) { help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help == NULL) { return NF_ACCEPT; } } else { memset(&help->help, 0, sizeof(help->help)); } rcu_assign_pointer(help->helper, &nf_conntrack_helper_cone_nat); rcu_read_unlock(); } return ret; }
static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, unsigned int hooknum, unsigned int *timeouts) { struct net *net = nf_ct_net(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; u_int8_t type, old_state, new_state; enum ct_dccp_roles role; dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); BUG_ON(dh == NULL); type = dh->dccph_type; if (type == DCCP_PKT_RESET && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { /* Tear down connection immediately if only reply is a RESET */ nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } spin_lock_bh(&ct->lock); role = ct->proto.dccp.role[dir]; old_state = ct->proto.dccp.state; new_state = dccp_state_table[role][type][old_state]; switch (new_state) { case CT_DCCP_REQUEST: if (old_state == CT_DCCP_TIMEWAIT && role == CT_DCCP_ROLE_SERVER) { /* Reincarnation in the reverse direction: reopen and * reverse client/server roles. */ ct->proto.dccp.role[dir] = CT_DCCP_ROLE_CLIENT; ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_SERVER; } break; case CT_DCCP_RESPOND: if (old_state == CT_DCCP_REQUEST) ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); break; case CT_DCCP_PARTOPEN: if (old_state == CT_DCCP_RESPOND && type == DCCP_PKT_ACK && dccp_ack_seq(dh) == ct->proto.dccp.handshake_seq) set_bit(IPS_ASSURED_BIT, &ct->status); break; case CT_DCCP_IGNORE: /* * Connection tracking might be out of sync, so we ignore * packets that might establish a new connection and resync * if the server responds with a valid Response. */ if (ct->proto.dccp.last_dir == !dir && ct->proto.dccp.last_pkt == DCCP_PKT_REQUEST && type == DCCP_PKT_RESPONSE) { ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_CLIENT; ct->proto.dccp.role[dir] = CT_DCCP_ROLE_SERVER; ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); new_state = CT_DCCP_RESPOND; break; } ct->proto.dccp.last_dir = dir; ct->proto.dccp.last_pkt = type; spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid packet ignored "); return NF_ACCEPT; case CT_DCCP_INVALID: spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid state transition "); return -NF_ACCEPT; } ct->proto.dccp.last_dir = dir; ct->proto.dccp.last_pkt = type; ct->proto.dccp.state = new_state; spin_unlock_bh(&ct->lock); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); return NF_ACCEPT; }
static int ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) { struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; const struct ipv6_pinfo *inet6 = inet6_sk(sk); const struct inet_sock *inet = inet_sk(sk); const struct nf_conntrack_tuple_hash *h; struct sockaddr_in6 sin6; struct nf_conn *ct; __be32 flow_label; int bound_dev_if; lock_sock(sk); tuple.src.u3.in6 = sk->sk_v6_rcv_saddr; tuple.src.u.tcp.port = inet->inet_sport; tuple.dst.u3.in6 = sk->sk_v6_daddr; tuple.dst.u.tcp.port = inet->inet_dport; tuple.dst.protonum = sk->sk_protocol; bound_dev_if = sk->sk_bound_dev_if; flow_label = inet6->flow_label; release_sock(sk); if (tuple.dst.protonum != IPPROTO_TCP && tuple.dst.protonum != IPPROTO_SCTP) return -ENOPROTOOPT; if (*len < 0 || (unsigned int)*len < sizeof(sin6)) return -EINVAL; h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple); if (!h) { pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n", &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port), &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port)); return -ENOENT; } ct = nf_ct_tuplehash_to_ctrack(h); sin6.sin6_family = AF_INET6; sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK; memcpy(&sin6.sin6_addr, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6, sizeof(sin6.sin6_addr)); nf_ct_put(ct); sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if); return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; } static struct nf_sockopt_ops so_getorigdst6 = { .pf = NFPROTO_IPV6, .get_optmin = IP6T_SO_ORIGINAL_DST, .get_optmax = IP6T_SO_ORIGINAL_DST + 1, .get = ipv6_getorigdst, .owner = THIS_MODULE, }; static unsigned int ipv6_confirm(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; unsigned char pnum = ipv6_hdr(skb)->nexthdr; int protoff; __be16 frag_off; ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED_REPLY) goto out; protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off); if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("proto header not found\n"); goto out; } /* adjust seqs for loopback traffic only in outgoing direction */ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && !nf_is_loopback_packet(skb)) { if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) { NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); return NF_DROP; } } out: /* We've seen it coming out the other side: confirm it */ return nf_conntrack_confirm(skb); } static unsigned int ipv6_conntrack_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); } static unsigned int ipv6_conntrack_local(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); } static unsigned int ipv6_helper(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_conn *ct; const struct nf_conn_help *help; const struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; __be16 frag_off; int protoff; u8 nexthdr; /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED_REPLY) return NF_ACCEPT; help = nfct_help(ct); if (!help) return NF_ACCEPT; /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); if (!helper) return NF_ACCEPT; nexthdr = ipv6_hdr(skb)->nexthdr; protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("proto header not found\n"); return NF_ACCEPT; } return helper->help(skb, protoff, ct, ctinfo); } static const struct nf_hook_ops ipv6_conntrack_ops[] = { { .hook = ipv6_conntrack_in, .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_CONNTRACK, }, { .hook = ipv6_conntrack_local, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_CONNTRACK, }, { .hook = ipv6_helper,