Esempio n. 1
0
/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has
 * previously sent the packet to conntrack via the ct action.
 */
static void ovs_ct_update_key(const struct sk_buff *skb,
			      struct sw_flow_key *key, bool post_ct)
{
	const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
	enum ip_conntrack_info ctinfo;
	struct nf_conn *ct;
	u8 state = 0;

	ct = nf_ct_get(skb, &ctinfo);
	if (ct) {
		state = ovs_ct_get_state(ctinfo);
		if (!nf_ct_is_confirmed(ct))
			state |= OVS_CS_F_NEW;
		if (ct->master)
			state |= OVS_CS_F_RELATED;
		zone = nf_ct_zone(ct);
	} else if (post_ct) {
		state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
	}
	__ovs_ct_update_key(key, state, zone, ct);
}
Esempio n. 2
0
static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
						struct sk_buff *skb)
{
	u16 zone = NF_CT_DEFAULT_ZONE;

#if IS_ENABLED(CONFIG_NF_CONNTRACK)
	if (skb->nfct)
		zone = nf_ct_zone((struct nf_conn *)skb->nfct);
#endif

#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
	if (skb->nf_bridge &&
	    skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
		return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
#endif
	if (hooknum == NF_INET_PRE_ROUTING)
		return IP6_DEFRAG_CONNTRACK_IN + zone;
	else
		return IP6_DEFRAG_CONNTRACK_OUT + zone;

}
Esempio n. 3
0
static void pptp_expectfn(struct nf_conn *ct,
			 struct nf_conntrack_expect *exp)
{
	struct net *net = nf_ct_net(ct);
	typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn;
	pr_debug("increasing timeouts\n");

	/* increase timeout of GRE data channel conntrack entry */
	ct->proto.gre.timeout	     = PPTP_GRE_TIMEOUT;
	ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;

	/* Can you see how rusty this code is, compared with the pre-2.6.11
	 * one? That's what happened to my shiny newnat of 2002 ;( -HW */

	rcu_read_lock();
	nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn);
	if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK)
		nf_nat_pptp_expectfn(ct, exp);
	else {
		struct nf_conntrack_tuple inv_t;
		struct nf_conntrack_expect *exp_other;

		/* obviously this tuple inversion only works until you do NAT */
		nf_ct_invert_tuplepr(&inv_t, &exp->tuple);
		pr_debug("trying to unexpect other dir: ");
		nf_ct_dump_tuple(&inv_t);

		exp_other = nf_ct_expect_find_get(net, nf_ct_zone(ct), &inv_t);
		if (exp_other) {
			/* delete other expectation.  */
			pr_debug("found\n");
			nf_ct_unexpect_related(exp_other);
			nf_ct_expect_put(exp_other);
		} else {
			pr_debug("not found\n");
		}
	}
	rcu_read_unlock();
}
Esempio n. 4
0
/* Determine whether skb->nfct is equal to the result of conntrack lookup. */
static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,
			    const struct ovs_conntrack_info *info)
{
	enum ip_conntrack_info ctinfo;
	struct nf_conn *ct;

	ct = nf_ct_get(skb, &ctinfo);
	if (!ct)
		return false;
	if (!net_eq(net, read_pnet(&ct->ct_net)))
		return false;
	if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct)))
		return false;
	if (info->helper) {
		struct nf_conn_help *help;

		help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
		if (help && rcu_access_pointer(help->helper) != info->helper)
			return false;
	}

	return true;
}
Esempio n. 5
0
/* Update 'key' based on skb->nfct.  If 'post_ct' is true, then OVS has
 * previously sent the packet to conntrack via the ct action.  If
 * 'keep_nat_flags' is true, the existing NAT flags retained, else they are
 * initialized from the connection status.
 */
static void ovs_ct_update_key(const struct sk_buff *skb,
			      const struct ovs_conntrack_info *info,
			      struct sw_flow_key *key, bool post_ct,
			      bool keep_nat_flags)
{
	const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
	enum ip_conntrack_info ctinfo;
	struct nf_conn *ct;
	u8 state = 0;

	ct = nf_ct_get(skb, &ctinfo);
	if (ct) {
		state = ovs_ct_get_state(ctinfo);
		/* All unconfirmed entries are NEW connections. */
		if (!nf_ct_is_confirmed(ct))
			state |= OVS_CS_F_NEW;
		/* OVS persists the related flag for the duration of the
		 * connection.
		 */
		if (ct->master)
			state |= OVS_CS_F_RELATED;
		if (keep_nat_flags) {
			state |= key->ct.state & OVS_CS_F_NAT_MASK;
		} else {
			if (ct->status & IPS_SRC_NAT)
				state |= OVS_CS_F_SRC_NAT;
			if (ct->status & IPS_DST_NAT)
				state |= OVS_CS_F_DST_NAT;
		}
		zone = nf_ct_zone(ct);
	} else if (post_ct) {
		state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
		if (info)
			zone = &info->zone;
	}
	__ovs_ct_update_key(key, state, zone, ct);
}
Esempio n. 6
0
/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
 * we change the source to map into the range. For NF_INET_PRE_ROUTING
 * and NF_INET_LOCAL_OUT, we change the destination to map into the
 * range. It might not be possible to get a unique tuple, but we try.
 * At worst (or if we race), we will end up with a final duplicate in
 * __ip_conntrack_confirm and drop the packet. */
static void
get_unique_tuple(struct nf_conntrack_tuple *tuple,
		 const struct nf_conntrack_tuple *orig_tuple,
		 const struct nf_nat_range *range,
		 struct nf_conn *ct,
		 enum nf_nat_manip_type maniptype)
{
	const struct nf_conntrack_zone *zone;
	const struct nf_nat_l3proto *l3proto;
	const struct nf_nat_l4proto *l4proto;
	struct net *net = nf_ct_net(ct);

	zone = nf_ct_zone(ct);

	rcu_read_lock();
	l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num);
	l4proto = __nf_nat_l4proto_find(orig_tuple->src.l3num,
					orig_tuple->dst.protonum);

	/* 1) If this srcip/proto/src-proto-part is currently mapped,
	 * and that same mapping gives a unique tuple within the given
	 * range, use that.
	 *
	 * This is only required for source (ie. NAT/masq) mappings.
	 * So far, we don't do local source mappings, so multiple
	 * manips not an issue.
	 */
	if (maniptype == NF_NAT_MANIP_SRC &&
	    !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
		/* try the original tuple first */
		if (in_range(l3proto, l4proto, orig_tuple, range)) {
			if (!nf_nat_used_tuple(orig_tuple, ct)) {
				*tuple = *orig_tuple;
				goto out;
			}
		} else if (find_appropriate_src(net, zone, l3proto, l4proto,
						orig_tuple, tuple, range)) {
			pr_debug("get_unique_tuple: Found current src map\n");
			if (!nf_nat_used_tuple(tuple, ct))
				goto out;
		}
	}

	/* 2) Select the least-used IP/proto combination in the given range */
	*tuple = *orig_tuple;
	find_best_ips_proto(zone, tuple, range, ct, maniptype);

	/* 3) The per-protocol part of the manip is made to map into
	 * the range to make a unique tuple.
	 */

	/* Only bother mapping if it's not already in range and unique */
	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
			if (l4proto->in_range(tuple, maniptype,
					      &range->min_proto,
					      &range->max_proto) &&
			    (range->min_proto.all == range->max_proto.all ||
			     !nf_nat_used_tuple(tuple, ct)))
				goto out;
		} else if (!nf_nat_used_tuple(tuple, ct)) {
			goto out;
		}
	}

	/* Last change: get protocol to try to obtain unique tuple. */
	l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct);
out:
	rcu_read_unlock();
}
Esempio n. 7
0
unsigned int
nf_nat_setup_info(struct nf_conn *ct,
		  const struct nf_nat_range *range,
		  enum nf_nat_manip_type maniptype)
{
	struct nf_conntrack_tuple curr_tuple, new_tuple;

	/* Can't setup nat info for confirmed ct. */
	if (nf_ct_is_confirmed(ct))
		return NF_ACCEPT;

	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
		     maniptype == NF_NAT_MANIP_DST);
	BUG_ON(nf_nat_initialized(ct, maniptype));

	/* What we've got will look like inverse of reply. Normally
	 * this is what is in the conntrack, except for prior
	 * manipulations (future optimization: if num_manips == 0,
	 * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
	 */
	nf_ct_invert_tuplepr(&curr_tuple,
			     &ct->tuplehash[IP_CT_DIR_REPLY].tuple);

	get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);

	if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
		struct nf_conntrack_tuple reply;

		/* Alter conntrack table so will recognize replies. */
		nf_ct_invert_tuplepr(&reply, &new_tuple);
		nf_conntrack_alter_reply(ct, &reply);

		/* Non-atomic: we own this at the moment. */
		if (maniptype == NF_NAT_MANIP_SRC)
			ct->status |= IPS_SRC_NAT;
		else
			ct->status |= IPS_DST_NAT;

		if (nfct_help(ct))
			if (!nfct_seqadj_ext_add(ct))
				return NF_DROP;
	}

	if (maniptype == NF_NAT_MANIP_SRC) {
		struct nf_nat_conn_key key = {
			.net = nf_ct_net(ct),
			.tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
			.zone = nf_ct_zone(ct),
		};
		int err;

		err = rhltable_insert_key(&nf_nat_bysource_table,
					  &key,
					  &ct->nat_bysource,
					  nf_nat_bysource_params);
		if (err)
			return NF_DROP;
	}

	/* It's done. */
	if (maniptype == NF_NAT_MANIP_DST)
		ct->status |= IPS_DST_NAT_DONE;
	else
		ct->status |= IPS_SRC_NAT_DONE;

	return NF_ACCEPT;
}
EXPORT_SYMBOL(nf_nat_setup_info);

static unsigned int
__nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip)
{
	/* Force range to this IP; let proto decide mapping for
	 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
	 * Use reply in case it's already been mangled (eg local packet).
	 */
	union nf_inet_addr ip =
		(manip == NF_NAT_MANIP_SRC ?
		ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
		ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
	struct nf_nat_range range = {
		.flags		= NF_NAT_RANGE_MAP_IPS,
		.min_addr	= ip,
		.max_addr	= ip,
	};
	return nf_nat_setup_info(ct, &range, manip);
}

unsigned int
nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
{
	return __nf_nat_alloc_null_binding(ct, HOOK2MANIP(hooknum));
}
EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding);

/* Do packet manipulations according to nf_nat_setup_info. */
unsigned int nf_nat_packet(struct nf_conn *ct,
			   enum ip_conntrack_info ctinfo,
			   unsigned int hooknum,
			   struct sk_buff *skb)
{
	const struct nf_nat_l3proto *l3proto;
	const struct nf_nat_l4proto *l4proto;
	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
	unsigned long statusbit;
	enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);

	if (mtype == NF_NAT_MANIP_SRC)
		statusbit = IPS_SRC_NAT;
	else
		statusbit = IPS_DST_NAT;

	/* Invert if this is reply dir. */
	if (dir == IP_CT_DIR_REPLY)
		statusbit ^= IPS_NAT_MASK;

	/* Non-atomic: these bits don't change. */
	if (ct->status & statusbit) {
		struct nf_conntrack_tuple target;

		/* We are aiming to look like inverse of other direction. */
		nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);

		l3proto = __nf_nat_l3proto_find(target.src.l3num);
		l4proto = __nf_nat_l4proto_find(target.src.l3num,
						target.dst.protonum);
		if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype))
			return NF_DROP;
	}
	return NF_ACCEPT;
}
EXPORT_SYMBOL_GPL(nf_nat_packet);

struct nf_nat_proto_clean {
	u8	l3proto;
	u8	l4proto;
};

/* kill conntracks with affected NAT section */
static int nf_nat_proto_remove(struct nf_conn *i, void *data)
{
	const struct nf_nat_proto_clean *clean = data;

	if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) ||
	    (clean->l4proto && nf_ct_protonum(i) != clean->l4proto))
		return 0;

	return i->status & IPS_NAT_MASK ? 1 : 0;
}

static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
{
	if (nf_nat_proto_remove(ct, data))
		return 1;

	if ((ct->status & IPS_SRC_NAT_DONE) == 0)
		return 0;

	/* This netns is being destroyed, and conntrack has nat null binding.
	 * Remove it from bysource hash, as the table will be freed soon.
	 *
	 * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
	 * will delete entry from already-freed table.
	 */
	clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
	rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
			nf_nat_bysource_params);

	/* don't delete conntrack.  Although that would make things a lot
	 * simpler, we'd end up flushing all conntracks on nat rmmod.
	 */
	return 0;
}

static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
{
	struct nf_nat_proto_clean clean = {
		.l3proto = l3proto,
		.l4proto = l4proto,
	};
	struct net *net;

	rtnl_lock();
	for_each_net(net)
		nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
	rtnl_unlock();
}

static void nf_nat_l3proto_clean(u8 l3proto)
{
	struct nf_nat_proto_clean clean = {
		.l3proto = l3proto,
	};
	struct net *net;

	rtnl_lock();

	for_each_net(net)
		nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
	rtnl_unlock();
}

/* Protocol registration. */
int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto)
{
	const struct nf_nat_l4proto **l4protos;
	unsigned int i;
	int ret = 0;

	mutex_lock(&nf_nat_proto_mutex);
	if (nf_nat_l4protos[l3proto] == NULL) {
		l4protos = kmalloc(IPPROTO_MAX * sizeof(struct nf_nat_l4proto *),
				   GFP_KERNEL);
		if (l4protos == NULL) {
			ret = -ENOMEM;
			goto out;
		}

		for (i = 0; i < IPPROTO_MAX; i++)
			RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown);

		/* Before making proto_array visible to lockless readers,
		 * we must make sure its content is committed to memory.
		 */
		smp_wmb();

		nf_nat_l4protos[l3proto] = l4protos;
	}

	if (rcu_dereference_protected(
			nf_nat_l4protos[l3proto][l4proto->l4proto],
			lockdep_is_held(&nf_nat_proto_mutex)
			) != &nf_nat_l4proto_unknown) {
		ret = -EBUSY;
		goto out;
	}
	RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto);
 out:
	mutex_unlock(&nf_nat_proto_mutex);
	return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_l4proto_register);

/* No one stores the protocol anywhere; simply delete it. */
void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto)
{
	mutex_lock(&nf_nat_proto_mutex);
	RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto],
			 &nf_nat_l4proto_unknown);
	mutex_unlock(&nf_nat_proto_mutex);
	synchronize_rcu();

	nf_nat_l4proto_clean(l3proto, l4proto->l4proto);
}
Esempio n. 8
0
static void pptp_nat_expected(struct nf_conn *ct,
			      struct nf_conntrack_expect *exp)
{
	struct net *net = nf_ct_net(ct);
	const struct nf_conn *master = ct->master;
	struct nf_conntrack_expect *other_exp;
	struct nf_conntrack_tuple t = {};
	const struct nf_ct_pptp_master *ct_pptp_info;
	const struct nf_nat_pptp *nat_pptp_info;
	struct nf_nat_range range;

	ct_pptp_info = nfct_help_data(master);
	nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;

	/* And here goes the grand finale of corrosion... */
	if (exp->dir == IP_CT_DIR_ORIGINAL) {
		pr_debug("we are PNS->PAC\n");
		/* therefore, build tuple for PAC->PNS */
		t.src.l3num = AF_INET;
		t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip;
		t.src.u.gre.key = ct_pptp_info->pac_call_id;
		t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip;
		t.dst.u.gre.key = ct_pptp_info->pns_call_id;
		t.dst.protonum = IPPROTO_GRE;
	} else {
		pr_debug("we are PAC->PNS\n");
		/* build tuple for PNS->PAC */
		t.src.l3num = AF_INET;
		t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip;
		t.src.u.gre.key = nat_pptp_info->pns_call_id;
		t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip;
		t.dst.u.gre.key = nat_pptp_info->pac_call_id;
		t.dst.protonum = IPPROTO_GRE;
	}

	pr_debug("trying to unexpect other dir: ");
	nf_ct_dump_tuple_ip(&t);
	other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t);
	if (other_exp) {
		nf_ct_unexpect_related(other_exp);
		nf_ct_expect_put(other_exp);
		pr_debug("success\n");
	} else {
		pr_debug("not found!\n");
	}

	/* This must be a fresh one. */
	BUG_ON(ct->status & IPS_NAT_DONE_MASK);

	/* Change src to where master sends to */
	range.flags = NF_NAT_RANGE_MAP_IPS;
	range.min_addr = range.max_addr
		= ct->master->tuplehash[!exp->dir].tuple.dst.u3;
	if (exp->dir == IP_CT_DIR_ORIGINAL) {
		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
		range.min_proto = range.max_proto = exp->saved_proto;
	}
	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);

	/* For DST manip, map port here to where it's expected. */
	range.flags = NF_NAT_RANGE_MAP_IPS;
	range.min_addr = range.max_addr
		= ct->master->tuplehash[!exp->dir].tuple.src.u3;
	if (exp->dir == IP_CT_DIR_REPLY) {
		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
		range.min_proto = range.max_proto = exp->saved_proto;
	}
	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
}
static void pptp_nat_expected(struct nf_conn *ct,
			      struct nf_conntrack_expect *exp)
{
	struct net *net = nf_ct_net(ct);
	const struct nf_conn *master = ct->master;
	struct nf_conntrack_expect *other_exp;
	struct nf_conntrack_tuple t;
	const struct nf_ct_pptp_master *ct_pptp_info;
	const struct nf_nat_pptp *nat_pptp_info;
	struct nf_nat_ipv4_range range;

	ct_pptp_info = &nfct_help(master)->help.ct_pptp_info;
	nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;

	
	if (exp->dir == IP_CT_DIR_ORIGINAL) {
		pr_debug("we are PNS->PAC\n");
		
		t.src.l3num = AF_INET;
		t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip;
		t.src.u.gre.key = ct_pptp_info->pac_call_id;
		t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip;
		t.dst.u.gre.key = ct_pptp_info->pns_call_id;
		t.dst.protonum = IPPROTO_GRE;
	} else {
		pr_debug("we are PAC->PNS\n");
		
		t.src.l3num = AF_INET;
		t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip;
		t.src.u.gre.key = nat_pptp_info->pns_call_id;
		t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip;
		t.dst.u.gre.key = nat_pptp_info->pac_call_id;
		t.dst.protonum = IPPROTO_GRE;
	}

	pr_debug("trying to unexpect other dir: ");
	nf_ct_dump_tuple_ip(&t);
	other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t);
	if (other_exp) {
		nf_ct_unexpect_related(other_exp);
		nf_ct_expect_put(other_exp);
		pr_debug("success\n");
	} else {
		pr_debug("not found!\n");
	}

	
	BUG_ON(ct->status & IPS_NAT_DONE_MASK);

	
	range.flags = NF_NAT_RANGE_MAP_IPS;
	range.min_ip = range.max_ip
		= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
	if (exp->dir == IP_CT_DIR_ORIGINAL) {
		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
		range.min = range.max = exp->saved_proto;
	}
	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);

	
	range.flags = NF_NAT_RANGE_MAP_IPS;
	range.min_ip = range.max_ip
		= ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
	if (exp->dir == IP_CT_DIR_REPLY) {
		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
		range.min = range.max = exp->saved_proto;
	}
	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
}