Beispiel #1
0
/* No one using conntrack by the time this called. */
static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
	if (ct->status & IPS_SRC_NAT_DONE)
		rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
				nf_nat_bysource_params);
}
static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array,
				  unsigned int entries)
{
	struct test_obj *obj;
	int err;
	unsigned int i, insert_retries = 0;
	s64 start, end;

	/*
	 * Insertion Test:
	 * Insert entries into table with all keys even numbers
	 */
	pr_info("  Adding %d keys\n", entries);
	start = ktime_get_ns();
	for (i = 0; i < entries; i++) {
		struct test_obj *obj = &array[i];

		obj->value.id = i * 2;
		err = insert_retry(ht, obj, test_rht_params);
		if (err > 0)
			insert_retries += err;
		else if (err)
			return err;
	}

	if (insert_retries)
		pr_info("  %u insertions retried due to memory pressure\n",
			insert_retries);

	test_bucket_stats(ht, entries);
	rcu_read_lock();
	test_rht_lookup(ht, array, entries);
	rcu_read_unlock();

	test_bucket_stats(ht, entries);

	pr_info("  Deleting %d keys\n", entries);
	for (i = 0; i < entries; i++) {
		struct test_obj_val key = {
			.id = i * 2,
		};

		if (array[i].value.id != TEST_INSERT_FAIL) {
			obj = rhashtable_lookup_fast(ht, &key, test_rht_params);
			BUG_ON(!obj);

			rhashtable_remove_fast(ht, &obj->node, test_rht_params);
		}

		cond_resched();
	}

	end = ktime_get_ns();
	pr_info("  Duration of test: %lld ns\n", end - start);

	return end - start;
}

static struct rhashtable ht;
static struct rhltable rhlt;

static int __init test_rhltable(unsigned int entries)
{
	struct test_obj_rhl *rhl_test_objects;
	unsigned long *obj_in_table;
	unsigned int i, j, k;
	int ret, err;

	if (entries == 0)
		entries = 1;

	rhl_test_objects = vzalloc(sizeof(*rhl_test_objects) * entries);
	if (!rhl_test_objects)
		return -ENOMEM;

	ret = -ENOMEM;
	obj_in_table = vzalloc(BITS_TO_LONGS(entries) * sizeof(unsigned long));
	if (!obj_in_table)
		goto out_free;

	/* nulls_base not supported in rhlist interface */
	test_rht_params.nulls_base = 0;
	err = rhltable_init(&rhlt, &test_rht_params);
	if (WARN_ON(err))
		goto out_free;

	k = prandom_u32();
	ret = 0;
	for (i = 0; i < entries; i++) {
		rhl_test_objects[i].value.id = k;
		err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node,
				      test_rht_params);
		if (WARN(err, "error %d on element %d\n", err, i))
			break;
		if (err == 0)
			set_bit(i, obj_in_table);
	}

	if (err)
		ret = err;

	pr_info("test %d add/delete pairs into rhlist\n", entries);
	for (i = 0; i < entries; i++) {
		struct rhlist_head *h, *pos;
		struct test_obj_rhl *obj;
		struct test_obj_val key = {
			.id = k,
		};
		bool found;

		rcu_read_lock();
		h = rhltable_lookup(&rhlt, &key, test_rht_params);
		if (WARN(!h, "key not found during iteration %d of %d", i, entries)) {
			rcu_read_unlock();
			break;
		}

		if (i) {
			j = i - 1;
			rhl_for_each_entry_rcu(obj, pos, h, list_node) {
				if (WARN(pos == &rhl_test_objects[j].list_node, "old element found, should be gone"))
					break;
			}
		}

		cond_resched_rcu();

		found = false;

		rhl_for_each_entry_rcu(obj, pos, h, list_node) {
			if (pos == &rhl_test_objects[i].list_node) {
				found = true;
				break;
			}
		}

		rcu_read_unlock();

		if (WARN(!found, "element %d not found", i))
			break;

		err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params);
		WARN(err, "rhltable_remove: err %d for iteration %d\n", err, i);
		if (err == 0)
			clear_bit(i, obj_in_table);
	}

	if (ret == 0 && err)
		ret = err;

	for (i = 0; i < entries; i++) {
		WARN(test_bit(i, obj_in_table), "elem %d allegedly still present", i);

		err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node,
				      test_rht_params);
		if (WARN(err, "error %d on element %d\n", err, i))
			break;
		if (err == 0)
			set_bit(i, obj_in_table);
	}

	pr_info("test %d random rhlist add/delete operations\n", entries);
	for (j = 0; j < entries; j++) {
		u32 i = prandom_u32_max(entries);
		u32 prand = prandom_u32();

		cond_resched();

		if (prand == 0)
			prand = prandom_u32();

		if (prand & 1) {
			prand >>= 1;
			continue;
		}

		err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params);
		if (test_bit(i, obj_in_table)) {
			clear_bit(i, obj_in_table);
			if (WARN(err, "cannot remove element at slot %d", i))
				continue;
		} else {
			if (WARN(err != -ENOENT, "removed non-existant element %d, error %d not %d",
			     i, err, -ENOENT))
				continue;
		}

		if (prand & 1) {
			prand >>= 1;
			continue;
		}

		err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params);
		if (err == 0) {
			if (WARN(test_and_set_bit(i, obj_in_table), "succeeded to insert same object %d", i))
				continue;
		} else {
			if (WARN(!test_bit(i, obj_in_table), "failed to insert object %d", i))
				continue;
		}

		if (prand & 1) {
			prand >>= 1;
			continue;
		}
Beispiel #3
0
unsigned int
nf_nat_setup_info(struct nf_conn *ct,
		  const struct nf_nat_range *range,
		  enum nf_nat_manip_type maniptype)
{
	struct nf_conntrack_tuple curr_tuple, new_tuple;

	/* Can't setup nat info for confirmed ct. */
	if (nf_ct_is_confirmed(ct))
		return NF_ACCEPT;

	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
		     maniptype == NF_NAT_MANIP_DST);
	BUG_ON(nf_nat_initialized(ct, maniptype));

	/* What we've got will look like inverse of reply. Normally
	 * this is what is in the conntrack, except for prior
	 * manipulations (future optimization: if num_manips == 0,
	 * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
	 */
	nf_ct_invert_tuplepr(&curr_tuple,
			     &ct->tuplehash[IP_CT_DIR_REPLY].tuple);

	get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);

	if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
		struct nf_conntrack_tuple reply;

		/* Alter conntrack table so will recognize replies. */
		nf_ct_invert_tuplepr(&reply, &new_tuple);
		nf_conntrack_alter_reply(ct, &reply);

		/* Non-atomic: we own this at the moment. */
		if (maniptype == NF_NAT_MANIP_SRC)
			ct->status |= IPS_SRC_NAT;
		else
			ct->status |= IPS_DST_NAT;

		if (nfct_help(ct))
			if (!nfct_seqadj_ext_add(ct))
				return NF_DROP;
	}

	if (maniptype == NF_NAT_MANIP_SRC) {
		struct nf_nat_conn_key key = {
			.net = nf_ct_net(ct),
			.tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
			.zone = nf_ct_zone(ct),
		};
		int err;

		err = rhltable_insert_key(&nf_nat_bysource_table,
					  &key,
					  &ct->nat_bysource,
					  nf_nat_bysource_params);
		if (err)
			return NF_DROP;
	}

	/* It's done. */
	if (maniptype == NF_NAT_MANIP_DST)
		ct->status |= IPS_DST_NAT_DONE;
	else
		ct->status |= IPS_SRC_NAT_DONE;

	return NF_ACCEPT;
}
EXPORT_SYMBOL(nf_nat_setup_info);

static unsigned int
__nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip)
{
	/* Force range to this IP; let proto decide mapping for
	 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
	 * Use reply in case it's already been mangled (eg local packet).
	 */
	union nf_inet_addr ip =
		(manip == NF_NAT_MANIP_SRC ?
		ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
		ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
	struct nf_nat_range range = {
		.flags		= NF_NAT_RANGE_MAP_IPS,
		.min_addr	= ip,
		.max_addr	= ip,
	};
	return nf_nat_setup_info(ct, &range, manip);
}

unsigned int
nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
{
	return __nf_nat_alloc_null_binding(ct, HOOK2MANIP(hooknum));
}
EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding);

/* Do packet manipulations according to nf_nat_setup_info. */
unsigned int nf_nat_packet(struct nf_conn *ct,
			   enum ip_conntrack_info ctinfo,
			   unsigned int hooknum,
			   struct sk_buff *skb)
{
	const struct nf_nat_l3proto *l3proto;
	const struct nf_nat_l4proto *l4proto;
	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
	unsigned long statusbit;
	enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);

	if (mtype == NF_NAT_MANIP_SRC)
		statusbit = IPS_SRC_NAT;
	else
		statusbit = IPS_DST_NAT;

	/* Invert if this is reply dir. */
	if (dir == IP_CT_DIR_REPLY)
		statusbit ^= IPS_NAT_MASK;

	/* Non-atomic: these bits don't change. */
	if (ct->status & statusbit) {
		struct nf_conntrack_tuple target;

		/* We are aiming to look like inverse of other direction. */
		nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);

		l3proto = __nf_nat_l3proto_find(target.src.l3num);
		l4proto = __nf_nat_l4proto_find(target.src.l3num,
						target.dst.protonum);
		if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype))
			return NF_DROP;
	}
	return NF_ACCEPT;
}
EXPORT_SYMBOL_GPL(nf_nat_packet);

struct nf_nat_proto_clean {
	u8	l3proto;
	u8	l4proto;
};

/* kill conntracks with affected NAT section */
static int nf_nat_proto_remove(struct nf_conn *i, void *data)
{
	const struct nf_nat_proto_clean *clean = data;

	if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) ||
	    (clean->l4proto && nf_ct_protonum(i) != clean->l4proto))
		return 0;

	return i->status & IPS_NAT_MASK ? 1 : 0;
}

static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
{
	if (nf_nat_proto_remove(ct, data))
		return 1;

	if ((ct->status & IPS_SRC_NAT_DONE) == 0)
		return 0;

	/* This netns is being destroyed, and conntrack has nat null binding.
	 * Remove it from bysource hash, as the table will be freed soon.
	 *
	 * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
	 * will delete entry from already-freed table.
	 */
	clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
	rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
			nf_nat_bysource_params);

	/* don't delete conntrack.  Although that would make things a lot
	 * simpler, we'd end up flushing all conntracks on nat rmmod.
	 */
	return 0;
}

static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
{
	struct nf_nat_proto_clean clean = {
		.l3proto = l3proto,
		.l4proto = l4proto,
	};
	struct net *net;

	rtnl_lock();
	for_each_net(net)
		nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
	rtnl_unlock();
}

static void nf_nat_l3proto_clean(u8 l3proto)
{
	struct nf_nat_proto_clean clean = {
		.l3proto = l3proto,
	};
	struct net *net;

	rtnl_lock();

	for_each_net(net)
		nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
	rtnl_unlock();
}

/* Protocol registration. */
int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto)
{
	const struct nf_nat_l4proto **l4protos;
	unsigned int i;
	int ret = 0;

	mutex_lock(&nf_nat_proto_mutex);
	if (nf_nat_l4protos[l3proto] == NULL) {
		l4protos = kmalloc(IPPROTO_MAX * sizeof(struct nf_nat_l4proto *),
				   GFP_KERNEL);
		if (l4protos == NULL) {
			ret = -ENOMEM;
			goto out;
		}

		for (i = 0; i < IPPROTO_MAX; i++)
			RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown);

		/* Before making proto_array visible to lockless readers,
		 * we must make sure its content is committed to memory.
		 */
		smp_wmb();

		nf_nat_l4protos[l3proto] = l4protos;
	}

	if (rcu_dereference_protected(
			nf_nat_l4protos[l3proto][l4proto->l4proto],
			lockdep_is_held(&nf_nat_proto_mutex)
			) != &nf_nat_l4proto_unknown) {
		ret = -EBUSY;
		goto out;
	}
	RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto);
 out:
	mutex_unlock(&nf_nat_proto_mutex);
	return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_l4proto_register);

/* No one stores the protocol anywhere; simply delete it. */
void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto)
{
	mutex_lock(&nf_nat_proto_mutex);
	RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto],
			 &nf_nat_l4proto_unknown);
	mutex_unlock(&nf_nat_proto_mutex);
	synchronize_rcu();

	nf_nat_l4proto_clean(l3proto, l4proto->l4proto);
}