static struct nf_conn_colo *nfct_create_colo(struct nf_conn *ct, u32 vm_pid, u32 flag) { struct nf_conn_colo *conn = NULL; size_t length = 0; if (nf_ct_is_confirmed(ct)) { pr_dbg("conntrack %p is confirmed!\n", ct); //return NULL; } if (nf_ct_protonum(ct) == IPPROTO_TCP) { length = sizeof(union nf_conn_colo_tcp); if (flag & COLO_CONN_SECONDARY) { /* seq adjust is only meaningful for TCP conn */ if (!nfct_seqadj_ext_add(ct)) { pr_dbg("failed to add SEQADJ extension\n"); } } } conn = (struct nf_conn_colo *) nf_ct_ext_add_length(ct, NF_CT_EXT_COLO, length, GFP_ATOMIC); if (!conn) { pr_dbg("add extend failed\n"); return NULL; } conn->nfct = &ct->ct_general; return conn; }
static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); struct nf_conn_nat *nat; enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); unsigned int ret; if (ct == NULL || nf_ct_is_untracked(ct)) return NF_ACCEPT; NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); nat = nfct_nat(ct); if (nat == NULL) { /* Conntrack module was loaded late, can't add extension. */ if (nf_ct_is_confirmed(ct)) return NF_ACCEPT; nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); if (nat == NULL) return NF_ACCEPT; } switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED + IP_CT_IS_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, ops->hooknum)) return NF_DROP; else return NF_ACCEPT; } /* Fall through */ case IP_CT_NEW: if (nf_nat_initialized(ct, maniptype)) break; ret = nft_do_chain(ops, skb, in, out, okfn); if (ret != NF_ACCEPT) return ret; if (!nf_nat_initialized(ct, maniptype)) { ret = nf_nat_alloc_null_binding(ct, ops->hooknum); if (ret != NF_ACCEPT) return ret; } default: break; } return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); }
struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct) { struct nf_conn_nat *nat = nfct_nat(ct); if (nat) return nat; if (!nf_ct_is_confirmed(ct)) nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); return nat; }
/* deliver cached events and clear cache entry - must be called with locally * disabled softirqs */ static inline void __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) { if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) && ecache->events) atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events, ecache->ct); ecache->events = 0; nf_ct_put(ecache->ct); ecache->ct = NULL; }
/* deliver cached events and clear cache entry - must be called with locally * disabled softirqs */ void nf_ct_deliver_cached_events(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); unsigned long events, missed; struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; struct nf_ct_event item; int ret; rcu_read_lock(); notify = rcu_dereference(net->ct.nf_conntrack_event_cb); if (notify == NULL) goto out_unlock; e = nf_ct_ecache_find(ct); if (e == NULL) goto out_unlock; events = xchg(&e->cache, 0); if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct) || !events) goto out_unlock; /* We make a copy of the missed event cache without taking * the lock, thus we may send missed events twice. However, * this does not harm and it happens very rarely. */ missed = e->missed; if (!((events | missed) & e->ctmask)) goto out_unlock; item.ct = ct; item.portid = 0; item.report = 0; ret = notify->fcn(events | missed, &item); if (likely(ret >= 0 && !missed)) goto out_unlock; spin_lock_bh(&ct->lock); if (ret < 0) e->missed |= events; else e->missed &= ~missed; spin_unlock_bh(&ct->lock); out_unlock: rcu_read_unlock(); }
void nf_ct_deliver_cached_events(struct nf_conn *ct) { unsigned long events, missed; struct nf_conntrack_ecache *e; struct nf_ct_event item; int ret = 0; e = nf_ct_ecache_find(ct); if (e == NULL) return; events = xchg(&e->cache, 0); if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct) || !events) return; /* We make a copy of the missed event cache without taking * the lock, thus we may send missed events twice. However, * this does not harm and it happens very rarely. */ missed = e->missed; if (!((events | missed) & e->ctmask)) return; item.ct = ct; item.pid = 0; item.report = 0; atomic_notifier_call_chain(&nf_conntrack_chain, events | missed, &item); if (likely(ret >= 0 && !missed)) return; spin_lock_bh(&ct->lock); if (ret < 0) e->missed |= events; else e->missed &= ~missed; spin_unlock_bh(&ct->lock); }
/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has * previously sent the packet to conntrack via the ct action. */ static void ovs_ct_update_key(const struct sk_buff *skb, struct sw_flow_key *key, bool post_ct) { const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; enum ip_conntrack_info ctinfo; struct nf_conn *ct; u8 state = 0; ct = nf_ct_get(skb, &ctinfo); if (ct) { state = ovs_ct_get_state(ctinfo); if (!nf_ct_is_confirmed(ct)) state |= OVS_CS_F_NEW; if (ct->master) state |= OVS_CS_F_RELATED; zone = nf_ct_zone(ct); } else if (post_ct) { state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID; } __ovs_ct_update_key(key, state, zone, ct); }
/* Returns verdict for packet, and may modify conntracktype */ int nf_conntrack_udp_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { unsigned int *timeouts; if (udp_error(skb, dataoff, state)) return -NF_ACCEPT; timeouts = nf_ct_timeout_lookup(ct); if (!timeouts) timeouts = udp_get_timeouts(nf_ct_net(ct)); if (!nf_ct_is_confirmed(ct)) ct->proto.udp.stream_ts = 2 * HZ + jiffies; /* If we've seen traffic both ways, this is some kind of UDP * stream. Set Assured. */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { unsigned long extra = timeouts[UDP_CT_UNREPLIED]; /* Still active after two seconds? Extend timeout. */ if (time_after(jiffies, ct->proto.udp.stream_ts)) extra = timeouts[UDP_CT_REPLIED]; nf_ct_refresh_acct(ct, ctinfo, skb, extra); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } else { nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[UDP_CT_UNREPLIED]); } return NF_ACCEPT; }
/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has * previously sent the packet to conntrack via the ct action. If * 'keep_nat_flags' is true, the existing NAT flags retained, else they are * initialized from the connection status. */ static void ovs_ct_update_key(const struct sk_buff *skb, const struct ovs_conntrack_info *info, struct sw_flow_key *key, bool post_ct, bool keep_nat_flags) { const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; enum ip_conntrack_info ctinfo; struct nf_conn *ct; u8 state = 0; ct = nf_ct_get(skb, &ctinfo); if (ct) { state = ovs_ct_get_state(ctinfo); /* All unconfirmed entries are NEW connections. */ if (!nf_ct_is_confirmed(ct)) state |= OVS_CS_F_NEW; /* OVS persists the related flag for the duration of the * connection. */ if (ct->master) state |= OVS_CS_F_RELATED; if (keep_nat_flags) { state |= key->ct.state & OVS_CS_F_NAT_MASK; } else { if (ct->status & IPS_SRC_NAT) state |= OVS_CS_F_SRC_NAT; if (ct->status & IPS_DST_NAT) state |= OVS_CS_F_DST_NAT; } zone = nf_ct_zone(ct); } else if (post_ct) { state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID; if (info) zone = &info->zone; } __ovs_ct_update_key(key, state, zone, ct); }
/* Returns verdict for packet, or -1 for invalid. */ static int icmpv6_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { unsigned int *timeout = nf_ct_timeout_lookup(ct); static const u8 valid_new[] = { [ICMPV6_ECHO_REQUEST - 128] = 1, [ICMPV6_NI_QUERY - 128] = 1 }; if (state->pf != NFPROTO_IPV6) return -NF_ACCEPT; if (!nf_ct_is_confirmed(ct)) { int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128; if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) { /* Can't create a new ICMPv6 `conn' with this. */ pr_debug("icmpv6: can't create new conn with type %u\n", type + 128); nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); return -NF_ACCEPT; } } if (!timeout) timeout = icmpv6_get_timeouts(nf_ct_net(ct)); /* Do not immediately delete the connection after the first successful reply to avoid excessive conntrackd traffic and also to handle correctly ICMP echo reply duplicates. */ nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); return NF_ACCEPT; }
static unsigned int nf_nat_ipv6_fn(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); __be16 frag_off; int hdrlen; u8 nexthdr; ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would * have dropped it. Hence it's the user's responsibilty to * packet filter it out, or implement conntrack/NAT for that * protocol. 8) --RR */ if (!ct) return NF_ACCEPT; /* Don't try to NAT if this packet is not conntracked */ if (nf_ct_is_untracked(ct)) return NF_ACCEPT; nat = nfct_nat(ct); if (!nat) { /* NAT module was loaded late. */ if (nf_ct_is_confirmed(ct)) return NF_ACCEPT; nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); if (nat == NULL) { pr_debug("failed to add NAT extension\n"); return NF_ACCEPT; } } switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED_REPLY: nexthdr = ipv6_hdr(skb)->nexthdr; hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, hooknum, hdrlen)) return NF_DROP; else return NF_ACCEPT; } /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ case IP_CT_NEW: /* Seen it before? This can happen for loopback, retrans, * or local packets. */ if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) goto oif_changed; } break; default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) goto oif_changed; } return nf_nat_packet(ct, ctinfo, hooknum, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); return NF_DROP; }
static unsigned int nf_nat_fn(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; /* maniptype == SRC for postrouting. */ enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); /* We never see fragments: conntrack defrags on pre-routing and local-out, and nf_nat_out protects post-routing. */ NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would have dropped it. Hence it's the user's responsibilty to packet filter it out, or implement conntrack/NAT for that protocol. 8) --RR */ if (!ct) return NF_ACCEPT; /* Don't try to NAT if this packet is not conntracked */ if (ct == &nf_conntrack_untracked) return NF_ACCEPT; nat = nfct_nat(ct); if (!nat) { /* NAT module was loaded late. */ if (nf_ct_is_confirmed(ct)) return NF_ACCEPT; nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); if (nat == NULL) { pr_debug("failed to add NAT extension\n"); return NF_ACCEPT; } } switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(ct, ctinfo, hooknum, skb)) return NF_DROP; else return NF_ACCEPT; } /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ case IP_CT_NEW: /* Seen it before? This can happen for loopback, retrans, or local packets.. */ if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; if (hooknum == NF_INET_LOCAL_IN) /* LOCAL_IN hook doesn't have a chain! */ ret = alloc_null_binding(ct, hooknum); else ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) { return ret; } } else pr_debug("Already setup manip %s for ct %p\n", maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", ct); break; default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); } return nf_nat_packet(ct, ctinfo, hooknum, skb); }
unsigned int nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype) { struct nf_conntrack_tuple curr_tuple, new_tuple; /* Can't setup nat info for confirmed ct. */ if (nf_ct_is_confirmed(ct)) return NF_ACCEPT; NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || maniptype == NF_NAT_MANIP_DST); BUG_ON(nf_nat_initialized(ct, maniptype)); /* What we've got will look like inverse of reply. Normally * this is what is in the conntrack, except for prior * manipulations (future optimization: if num_manips == 0, * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ nf_ct_invert_tuplepr(&curr_tuple, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { struct nf_conntrack_tuple reply; /* Alter conntrack table so will recognize replies. */ nf_ct_invert_tuplepr(&reply, &new_tuple); nf_conntrack_alter_reply(ct, &reply); /* Non-atomic: we own this at the moment. */ if (maniptype == NF_NAT_MANIP_SRC) ct->status |= IPS_SRC_NAT; else ct->status |= IPS_DST_NAT; if (nfct_help(ct)) if (!nfct_seqadj_ext_add(ct)) return NF_DROP; } if (maniptype == NF_NAT_MANIP_SRC) { struct nf_nat_conn_key key = { .net = nf_ct_net(ct), .tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, .zone = nf_ct_zone(ct), }; int err; err = rhltable_insert_key(&nf_nat_bysource_table, &key, &ct->nat_bysource, nf_nat_bysource_params); if (err) return NF_DROP; } /* It's done. */ if (maniptype == NF_NAT_MANIP_DST) ct->status |= IPS_DST_NAT_DONE; else ct->status |= IPS_SRC_NAT_DONE; return NF_ACCEPT; } EXPORT_SYMBOL(nf_nat_setup_info); static unsigned int __nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip) { /* Force range to this IP; let proto decide mapping for * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). * Use reply in case it's already been mangled (eg local packet). */ union nf_inet_addr ip = (manip == NF_NAT_MANIP_SRC ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3); struct nf_nat_range range = { .flags = NF_NAT_RANGE_MAP_IPS, .min_addr = ip, .max_addr = ip, }; return nf_nat_setup_info(ct, &range, manip); } unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) { return __nf_nat_alloc_null_binding(ct, HOOK2MANIP(hooknum)); } EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding); /* Do packet manipulations according to nf_nat_setup_info. */ unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, struct sk_buff *skb) { const struct nf_nat_l3proto *l3proto; const struct nf_nat_l4proto *l4proto; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); if (mtype == NF_NAT_MANIP_SRC) statusbit = IPS_SRC_NAT; else statusbit = IPS_DST_NAT; /* Invert if this is reply dir. */ if (dir == IP_CT_DIR_REPLY) statusbit ^= IPS_NAT_MASK; /* Non-atomic: these bits don't change. */ if (ct->status & statusbit) { struct nf_conntrack_tuple target; /* We are aiming to look like inverse of other direction. */ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); l3proto = __nf_nat_l3proto_find(target.src.l3num); l4proto = __nf_nat_l4proto_find(target.src.l3num, target.dst.protonum); if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype)) return NF_DROP; } return NF_ACCEPT; } EXPORT_SYMBOL_GPL(nf_nat_packet); struct nf_nat_proto_clean { u8 l3proto; u8 l4proto; }; /* kill conntracks with affected NAT section */ static int nf_nat_proto_remove(struct nf_conn *i, void *data) { const struct nf_nat_proto_clean *clean = data; if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) return 0; return i->status & IPS_NAT_MASK ? 1 : 0; } static int nf_nat_proto_clean(struct nf_conn *ct, void *data) { if (nf_nat_proto_remove(ct, data)) return 1; if ((ct->status & IPS_SRC_NAT_DONE) == 0) return 0; /* This netns is being destroyed, and conntrack has nat null binding. * Remove it from bysource hash, as the table will be freed soon. * * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() * will delete entry from already-freed table. */ clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource, nf_nat_bysource_params); /* don't delete conntrack. Although that would make things a lot * simpler, we'd end up flushing all conntracks on nat rmmod. */ return 0; } static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) { struct nf_nat_proto_clean clean = { .l3proto = l3proto, .l4proto = l4proto, }; struct net *net; rtnl_lock(); for_each_net(net) nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0); rtnl_unlock(); } static void nf_nat_l3proto_clean(u8 l3proto) { struct nf_nat_proto_clean clean = { .l3proto = l3proto, }; struct net *net; rtnl_lock(); for_each_net(net) nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0); rtnl_unlock(); } /* Protocol registration. */ int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto) { const struct nf_nat_l4proto **l4protos; unsigned int i; int ret = 0; mutex_lock(&nf_nat_proto_mutex); if (nf_nat_l4protos[l3proto] == NULL) { l4protos = kmalloc(IPPROTO_MAX * sizeof(struct nf_nat_l4proto *), GFP_KERNEL); if (l4protos == NULL) { ret = -ENOMEM; goto out; } for (i = 0; i < IPPROTO_MAX; i++) RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown); /* Before making proto_array visible to lockless readers, * we must make sure its content is committed to memory. */ smp_wmb(); nf_nat_l4protos[l3proto] = l4protos; } if (rcu_dereference_protected( nf_nat_l4protos[l3proto][l4proto->l4proto], lockdep_is_held(&nf_nat_proto_mutex) ) != &nf_nat_l4proto_unknown) { ret = -EBUSY; goto out; } RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto); out: mutex_unlock(&nf_nat_proto_mutex); return ret; } EXPORT_SYMBOL_GPL(nf_nat_l4proto_register); /* No one stores the protocol anywhere; simply delete it. */ void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto) { mutex_lock(&nf_nat_proto_mutex); RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], &nf_nat_l4proto_unknown); mutex_unlock(&nf_nat_proto_mutex); synchronize_rcu(); nf_nat_l4proto_clean(l3proto, l4proto->l4proto); }
static unsigned int nf_nat_fn(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); ct = nf_ct_get(skb, &ctinfo); if (!ct) return NF_ACCEPT; if (ct == &nf_conntrack_untracked) return NF_ACCEPT; nat = nfct_nat(ct); if (!nat) { if (nf_ct_is_confirmed(ct)) return NF_ACCEPT; nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); if (nat == NULL) { pr_debug("failed to add NAT extension\n"); return NF_ACCEPT; } } switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(ct, ctinfo, hooknum, skb)) return NF_DROP; else return NF_ACCEPT; } case IP_CT_NEW: if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; if (hooknum == NF_INET_LOCAL_IN) ret = alloc_null_binding(ct, hooknum); else ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) { return ret; } } else pr_debug("Already setup manip %s for ct %p\n", maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", ct); break; default: NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); } return nf_nat_packet(ct, ctinfo, hooknum, skb); }
static unsigned int nf_nat_fn(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; /* maniptype == SRC for postrouting. */ enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); /* We never see fragments: conntrack defrags on pre-routing and local-out, and nf_nat_out protects post-routing. */ NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would have dropped it. Hence it's the user's responsibilty to packet filter it out, or implement conntrack/NAT for that protocol. 8) --RR */ if (!ct) { /* Exception: ICMP redirect to new connection (not in hash table yet). We must not let this through, in case we're doing NAT to the same network. */ if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { struct icmphdr _hdr, *hp; hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); if (hp != NULL && hp->type == ICMP_REDIRECT) return NF_DROP; } return NF_ACCEPT; } /* Don't try to NAT if this packet is not conntracked */ if (ct == &nf_conntrack_untracked) return NF_ACCEPT; nat = nfct_nat(ct); if (!nat) return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(ct, ctinfo, hooknum, skb)) return NF_DROP; else return NF_ACCEPT; } /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ case IP_CT_NEW: /* Seen it before? This can happen for loopback, retrans, or local packets.. */ if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; if (unlikely(nf_ct_is_confirmed(ct))) /* NAT module was loaded late */ ret = alloc_null_binding_confirmed(ct, hooknum); else if (hooknum == NF_IP_LOCAL_IN) /* LOCAL_IN hook doesn't have a chain! */ ret = alloc_null_binding(ct, hooknum); else ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) { return ret; } ipt_cone_place_in_hashes(ct); } else DEBUGP("Already setup manip %s for ct %p\n", maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", ct); break; default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); } return nf_nat_packet(ct, ctinfo, hooknum, skb); }
/* Returns verdict for packet, or -1 for invalid. */ static int tcp_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { struct net *net = nf_ct_net(ct); struct nf_tcp_net *tn = nf_tcp_pernet(net); struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; unsigned int index, *timeouts; enum ip_conntrack_dir dir; const struct tcphdr *th; struct tcphdr _tcph; unsigned long timeout; th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); if (th == NULL) return -NF_ACCEPT; if (tcp_error(th, skb, dataoff, state)) return -NF_ACCEPT; if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th)) return -NF_ACCEPT; spin_lock_bh(&ct->lock); old_state = ct->proto.tcp.state; dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); new_state = tcp_conntracks[dir][index][old_state]; tuple = &ct->tuplehash[dir].tuple; switch (new_state) { case TCP_CONNTRACK_SYN_SENT: if (old_state < TCP_CONNTRACK_TIME_WAIT) break; /* RFC 1122: "When a connection is closed actively, * it MUST linger in TIME-WAIT state for a time 2xMSL * (Maximum Segment Lifetime). However, it MAY accept * a new SYN from the remote TCP to reopen the connection * directly from TIME-WAIT state, if..." * We ignore the conditions because we are in the * TIME-WAIT state anyway. * * Handle aborted connections: we and the server * think there is an existing connection but the client * aborts it and starts a new one. */ if (((ct->proto.tcp.seen[dir].flags | ct->proto.tcp.seen[!dir].flags) & IP_CT_TCP_FLAG_CLOSE_INIT) || (ct->proto.tcp.last_dir == dir && ct->proto.tcp.last_index == TCP_RST_SET)) { /* Attempt to reopen a closed/aborted connection. * Delete this connection and look up again. */ spin_unlock_bh(&ct->lock); /* Only repeat if we can actually remove the timer. * Destruction may already be in progress in process * context and we must give it a chance to terminate. */ if (nf_ct_kill(ct)) return -NF_REPEAT; return NF_DROP; } /* Fall through */ case TCP_CONNTRACK_IGNORE: /* Ignored packets: * * Our connection entry may be out of sync, so ignore * packets which may signal the real connection between * the client and the server. * * a) SYN in ORIGINAL * b) SYN/ACK in REPLY * c) ACK in reply direction after initial SYN in original. * * If the ignored packet is invalid, the receiver will send * a RST we'll catch below. */ if (index == TCP_SYNACK_SET && ct->proto.tcp.last_index == TCP_SYN_SET && ct->proto.tcp.last_dir != dir && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { /* b) This SYN/ACK acknowledges a SYN that we earlier * ignored as invalid. This means that the client and * the server are both in sync, while the firewall is * not. We get in sync from the previously annotated * values. */ old_state = TCP_CONNTRACK_SYN_SENT; new_state = TCP_CONNTRACK_SYN_RECV; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end = ct->proto.tcp.last_end; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend = ct->proto.tcp.last_end; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin = ct->proto.tcp.last_win == 0 ? 1 : ct->proto.tcp.last_win; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale = ct->proto.tcp.last_wscale; ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = ct->proto.tcp.last_flags; memset(&ct->proto.tcp.seen[dir], 0, sizeof(struct ip_ct_tcp_state)); break; } ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; ct->proto.tcp.last_seq = ntohl(th->seq); ct->proto.tcp.last_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); ct->proto.tcp.last_win = ntohs(th->window); /* a) This is a SYN in ORIGINAL. The client and the server * may be in sync but we are not. In that case, we annotate * the TCP options and let the packet go through. If it is a * valid SYN packet, the server will reply with a SYN/ACK, and * then we'll get in sync. Otherwise, the server potentially * responds with a challenge ACK if implementing RFC5961. */ if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) { struct ip_ct_tcp_state seen = {}; ct->proto.tcp.last_flags = ct->proto.tcp.last_wscale = 0; tcp_options(skb, dataoff, th, &seen); if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { ct->proto.tcp.last_flags |= IP_CT_TCP_FLAG_WINDOW_SCALE; ct->proto.tcp.last_wscale = seen.td_scale; } if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) { ct->proto.tcp.last_flags |= IP_CT_TCP_FLAG_SACK_PERM; } /* Mark the potential for RFC5961 challenge ACK, * this pose a special problem for LAST_ACK state * as ACK is intrepretated as ACKing last FIN. */ if (old_state == TCP_CONNTRACK_LAST_ACK) ct->proto.tcp.last_flags |= IP_CT_EXP_CHALLENGE_ACK; } spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in " "state %s ", tcp_conntrack_names[old_state]); return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Special case for SYN proxy: when the SYN to the server or * the SYN/ACK from the server is lost, the client may transmit * a keep-alive packet while in SYN_SENT state. This needs to * be associated with the original conntrack entry in order to * generate a new SYN with the correct sequence number. */ if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT && index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL && ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL && ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) { pr_debug("nf_ct_tcp: SYN proxy client keep alive\n"); spin_unlock_bh(&ct->lock); return NF_ACCEPT; } /* Invalid packet */ pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, "invalid state"); return -NF_ACCEPT; case TCP_CONNTRACK_TIME_WAIT: /* RFC5961 compliance cause stack to send "challenge-ACK" * e.g. in response to spurious SYNs. Conntrack MUST * not believe this ACK is acking last FIN. */ if (old_state == TCP_CONNTRACK_LAST_ACK && index == TCP_ACK_SET && ct->proto.tcp.last_dir != dir && ct->proto.tcp.last_index == TCP_SYN_SET && (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) { /* Detected RFC5961 challenge ACK */ ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored"); return NF_ACCEPT; /* Don't change state */ } break; case TCP_CONNTRACK_SYN_SENT2: /* tcp_conntracks table is not smart enough to handle * simultaneous open. */ ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN; break; case TCP_CONNTRACK_SYN_RECV: if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET && ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN) new_state = TCP_CONNTRACK_ESTABLISHED; break; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { /* Invalid RST */ spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, "invalid rst"); return -NF_ACCEPT; } if (index == TCP_RST_SET && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_SYN_SET) || (!test_bit(IPS_ASSURED_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { /* RST sent to invalid SYN or ACK we had let through * at a) and c) above: * * a) SYN was in window then * c) we hold a half-open connection. * * Delete our connection entry. * We skip window checking, because packet might ACK * segments we ignored. */ goto in_window; } /* Just fall through */ default: /* Keep compilers happy. */ break; } if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, skb, dataoff, th)) { spin_unlock_bh(&ct->lock); return -NF_ACCEPT; } in_window: /* From now on we have got in-window packets */ ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; pr_debug("tcp_conntracks: "); nf_ct_dump_tuple(tuple); pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", (th->syn ? 1 : 0), (th->ack ? 1 : 0), (th->fin ? 1 : 0), (th->rst ? 1 : 0), old_state, new_state); ct->proto.tcp.state = new_state; if (old_state != new_state && new_state == TCP_CONNTRACK_FIN_WAIT) ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; timeouts = nf_ct_timeout_lookup(ct); if (!timeouts) timeouts = tn->timeouts; if (ct->proto.tcp.retrans >= tn->tcp_max_retrans && timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) timeout = timeouts[TCP_CONNTRACK_RETRANS]; else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) timeout = timeouts[TCP_CONNTRACK_UNACK]; else if (ct->proto.tcp.last_win == 0 && timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) timeout = timeouts[TCP_CONNTRACK_RETRANS]; else timeout = timeouts[new_state]; spin_unlock_bh(&ct->lock); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { /* If only reply is a RST, we can consider ourselves not to have an established connection: this is a fairly common problem case, so we can delete the conntrack immediately. --RR */ if (th->rst) { nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection * pickup with loose=1. Avoid large ESTABLISHED timeout. */ if (new_state == TCP_CONNTRACK_ESTABLISHED && timeout > timeouts[TCP_CONNTRACK_UNACK]) timeout = timeouts[TCP_CONNTRACK_UNACK]; } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) && (old_state == TCP_CONNTRACK_SYN_RECV || old_state == TCP_CONNTRACK_ESTABLISHED) && new_state == TCP_CONNTRACK_ESTABLISHED) { /* Set ASSURED if we see see valid ack in ESTABLISHED after SYN_RECV or a valid answer for a picked up connection. */ set_bit(IPS_ASSURED_BIT, &ct->status); nf_conntrack_event_cache(IPCT_ASSURED, ct); } nf_ct_refresh_acct(ct, ctinfo, skb, timeout); return NF_ACCEPT; }
static unsigned int nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; /* maniptype == SRC for postrouting. */ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); /* We never see fragments: conntrack defrags on pre-routing * and local-out, and nf_nat_out protects post-routing. */ NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would * have dropped it. Hence it's the user's responsibilty to * packet filter it out, or implement conntrack/NAT for that * protocol. 8) --RR */ if (!ct) return NF_ACCEPT; /* Don't try to NAT if this packet is not conntracked */ if (nf_ct_is_untracked(ct)) return NF_ACCEPT; nat = nfct_nat(ct); if (!nat) { /* NAT module was loaded late. */ if (nf_ct_is_confirmed(ct)) return NF_ACCEPT; nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); if (nat == NULL) { pr_debug("failed to add NAT extension\n"); return NF_ACCEPT; } } switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, ops->hooknum)) return NF_DROP; else return NF_ACCEPT; } /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ case IP_CT_NEW: /* Seen it before? This can happen for loopback, retrans, * or local packets. */ if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } break; default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); return NF_DROP; }
/* after ipt_filter */ static unsigned int ezp_nat_pre_hook(unsigned int hooknum, struct sk_buff *skb, const struct net_device *indev, const struct net_device *outdev, int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; int ret = NF_ACCEPT; enum ip_conntrack_dir dir; __u32 dnat_addr = 0, snat_addr = 0; int* nat_flag; struct dst_entry** dst_to_use = NULL; struct iphdr *iph = ip_hdr(skb); struct icmphdr *hdr = icmp_hdr(skb); struct tcphdr *tcph = tcp_hdr(skb); /* EZP: enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); */ if(!ezp_nat_enable_flag){ return NF_ACCEPT; } ct = nf_ct_get(skb, &ctinfo); if (!ct) { if (iph->protocol == IPPROTO_ICMP && hdr->type == ICMP_REDIRECT) return NF_DROP; return NF_ACCEPT; } /* TCP or UDP. */ if ((iph->protocol != IPPROTO_TCP) && (iph->protocol != IPPROTO_UDP) ) { return NF_ACCEPT; } if ((iph->protocol == IPPROTO_TCP) && ((tcp_flag_word(tcph) & (TCP_FLAG_RST | TCP_FLAG_SYN)) == TCP_FLAG_SYN)) { return NF_ACCEPT; } /* Make sure it is confirmed. */ if (!nf_ct_is_confirmed(ct)) { return NF_ACCEPT; } /* We comment out this part since ((tcp_flag_word((*pskb)->h.th) == TCP_FLAG_SYN) || * 1. conntrack establishing is a 2 way process, but after routing, we have * established routing entry and address resolution table, so we don't * need to check ESTABLISH state. * 2. With establishing state, we need to go through forward state and * routing several times. It may occur that our holded entry may be * replaced. */ /* if ((ctinfo != IP_CT_ESTABLISHED) && (ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY)) { return NF_ACCEPT; } */ dir = CTINFO2DIR(ctinfo); if (dir == IP_CT_DIR_ORIGINAL) { if (!ct->orgdir_dst) { return NF_ACCEPT; } else { nat_flag = &ct->orgdir_rid; if (!(*nat_flag & ((1 << IP_NAT_MANIP_DST) | (1 << IP_NAT_MANIP_SRC) | (1 << EZP_IP_LOCAL_IN)))) { return NF_ACCEPT; } /* Check only in forward case and ignore input case */ if (!(*nat_flag & (1 << EZP_IP_LOCAL_IN))) { if ((!ct->orgdir_dst->hh) && (!ct->orgdir_dst->neighbour)) { printk("%s:orig dst and neighbour null dir\n",__FUNCTION__); return NF_ACCEPT; } } if (skb->dst) { /* skb might has its own dst already. * e.g. output to local input */ dst_release(skb->dst); } skb->protocol = htons(ETH_P_IP); skb->dst = ct->orgdir_dst; /* XXX: */ skb->dev = ct->orgdir_dst->dev; /* skb uses this dst_entry */ dst_use(skb->dst, jiffies); dst_to_use = &ct->orgdir_dst; } } else { /* IP_CT_DIR_REPLY */ if (!ct->replydir_dst) { return NF_ACCEPT; } else { nat_flag = &ct->replydir_rid; if (!(*nat_flag & ((1 << IP_NAT_MANIP_DST) | (1 << IP_NAT_MANIP_SRC) | (1 << EZP_IP_LOCAL_IN)))) { return NF_ACCEPT; } /* Check only in forward case and ignore input case */ if (!(*nat_flag & (1 << EZP_IP_LOCAL_IN))) { if ((!ct->replydir_dst->hh) && (!ct->replydir_dst->neighbour)) { printk("%s:reply dst and neighbour null\n",__FUNCTION__); return NF_ACCEPT; } } if (skb->dst) { /* skb might has its own dst already. */ /* e.g. output to local input */ dst_release(skb->dst); } skb->protocol = htons(ETH_P_IP); skb->dst = ct->replydir_dst; /* XXX: */ skb->dev = ct->replydir_dst->dev; /* skb uses this dst_entry */ dst_use(skb->dst, jiffies); dst_to_use = &ct->replydir_dst; } } /* After this point, every "return NF_ACCEPT" action need to release * holded dst entry. So we use "goto release_dst_and_return" to handle the * action commonly. */ /* EZP: if (!nf_nat_initialized(ct, maniptype)) { goto release_dst_and_return; } */ /* If we have helper, we need to go original path until conntrack * confirmed */ if(nfct_help(ct)){ goto release_dst_and_return; } if (dir == IP_CT_DIR_ORIGINAL) { (skb)->imq_flags = ct->ct_orig_imq_flags; } else{ (skb)->imq_flags = ct->ct_repl_imq_flags; } /* PRE_ROUTING NAT */ /* Assume DNAT conntrack is ready. */ if ((*nat_flag & (1 << IP_NAT_MANIP_DST))){ dnat_addr = iph->daddr; ret = nf_nat_packet(ct, ctinfo, NF_INET_PRE_ROUTING, skb); if (ret != NF_ACCEPT) { goto release_dst_and_return; } if (dnat_addr == iph->daddr) { *nat_flag &= ~(1 << IP_NAT_MANIP_DST); } } /* INPUT */ if ((*nat_flag & (1 << EZP_IP_LOCAL_IN))){ /* TODO: use ip_local_deliver_finish() and add ip_defrag(). */ /* XXX: Not sure this will hit or not. */ /* * Reassemble IP fragments. */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) { /* If return value is not 0, defrag error */ /* return 0; */ /* XXX: return NF_STOLEN? */ goto release_dst_and_return; } } /* For INPUT path, there is no need to check dst_mtu but defrag. if (skb->len > dst_mtu(&((struct rtable*)skb->dst)->u.dst)) { goto release_dst_and_return; }*/ if (ezp_nat_queue_enable_flag) { if ((skb)->imq_flags & IMQ_F_ENQUEUE) { struct nf_hook_ops *elem = nf_get_imq_ops(); /* With to apply IMQ, we have to check the IMQ flag, if the flag is * set, we have to enquene this skb and leave it to IMQ*/ if (elem != NULL) { nf_queue(skb, (struct list_head*)elem, AF_INET, NF_INET_POST_ROUTING, (struct net_device*)indev, (struct net_device*) ((struct rtable*)skb->dst)->u.dst.dev, ip_local_deliver_finish, NF_ACCEPT >> NF_VERDICT_BITS); return NF_STOLEN; } } } ret = ip_local_deliver_finish(skb); return NF_STOLEN; }