Example #1
0
void flow_table_rem(flow_table_t *ft, struct flow_keys match) {
    int index = 0;
    unsigned long flags = 0;
    match_stk_t *curr = NULL;
    match_stk_t *last = NULL;

    index = flow_keys_hash(match) % ft->size;

    curr = rcu_dereference_bh(ft->table[index]);

    pr_debug("FT: removing bucket entry at %d\n", index);
    while(curr != NULL && !flow_key_equal(match, curr->match)) {
        last = curr;
        curr = rcu_dereference_bh(curr->next);
    }

    if(curr != NULL && flow_key_equal(match, curr->match)) {
        // entry exists
        ft_lock(flags);
        if(curr == ft->table[index]){
            // if at the beginning of bucket
            rcu_assign_pointer(ft->table[index], curr->next);
        } else {
            // otherwise
            rcu_assign_pointer(last->next, curr->next);
        }
        ft_unlock(flags);
        // free both stk and entry
        free_match_stk_entry(curr);
        atomic_dec(&ft->num_flows);
    }
    // else entry doesn't exist
}
Example #2
0
/**
 * Copies this module's current configuration to "clone".
 *
 * @param[out] clone a copy of the current config will be placed here. Must be already allocated.
 * @return zero on success, nonzero on failure.
 */
int filtering_clone_config(struct filtering_config *clone)
{
	rcu_read_lock_bh();
	*clone = *rcu_dereference_bh(config);
	rcu_read_unlock_bh();
	return 0;
}
Example #3
0
static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
				 int *qerr)
{
	struct sfq_sched_data *q = qdisc_priv(sch);
	struct tcf_result res;
	struct tcf_proto *fl;
	int result;

	if (TC_H_MAJ(skb->priority) == sch->handle &&
	    TC_H_MIN(skb->priority) > 0 &&
	    TC_H_MIN(skb->priority) <= q->divisor)
		return TC_H_MIN(skb->priority);

	fl = rcu_dereference_bh(q->filter_list);
	if (!fl)
		return sfq_hash(q, skb) + 1;

	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
	result = tc_classify(skb, fl, &res, false);
	if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
		switch (result) {
		case TC_ACT_STOLEN:
		case TC_ACT_QUEUED:
			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
		case TC_ACT_SHOT:
			return 0;
		}
#endif
		if (TC_H_MIN(res.classid) <= q->divisor)
			return TC_H_MIN(res.classid);
	}
	return 0;
}
Example #4
0
static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
	struct ingress_qdisc_data *p = qdisc_priv(sch);
	struct tcf_result res;
	struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
	int result;

	result = tc_classify(skb, fl, &res);

	qdisc_bstats_update(sch, skb);
	switch (result) {
	case TC_ACT_SHOT:
		result = TC_ACT_SHOT;
		qdisc_qstats_drop(sch);
		break;
	case TC_ACT_STOLEN:
	case TC_ACT_QUEUED:
		result = TC_ACT_STOLEN;
		break;
	case TC_ACT_RECLASSIFY:
	case TC_ACT_OK:
		skb->tc_index = TC_H_MIN(res.classid);
	default:
		result = TC_ACT_OK;
		break;
	}

	return result;
}
Example #5
0
static struct Qdisc *
multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
{
	struct multiq_sched_data *q = qdisc_priv(sch);
	u32 band;
	struct tcf_result res;
	struct tcf_proto *fl = rcu_dereference_bh(q->filter_list);
	int err;

	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
	err = tcf_classify(skb, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
	switch (err) {
	case TC_ACT_STOLEN:
	case TC_ACT_QUEUED:
	case TC_ACT_TRAP:
		*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
	case TC_ACT_SHOT:
		return NULL;
	}
#endif
	band = skb_get_queue_mapping(skb);

	if (band >= q->bands)
		return q->queues[0];

	return q->queues[band];
}
Example #6
0
static void tcf_action_goto_chain_exec(const struct tc_action *a,
				       struct tcf_result *res)
{
	const struct tcf_chain *chain = a->goto_chain;

	res->goto_tp = rcu_dereference_bh(chain->filter_chain);
}
Example #7
0
/* Hash to port mapping select tx port */
static struct team_port *lb_htpm_select_tx_port(struct team *team,
						struct lb_priv *lb_priv,
						struct sk_buff *skb,
						unsigned char hash)
{
	return rcu_dereference_bh(LB_HTPM_PORT_BY_HASH(lb_priv, hash));
}
Example #8
0
static struct Qdisc *
prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
{
	struct prio_sched_data *q = qdisc_priv(sch);
	u32 band = skb->priority;
	struct tcf_result res;
	struct tcf_proto *fl;
	int err;

	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
	if (TC_H_MAJ(skb->priority) != sch->handle) {
		fl = rcu_dereference_bh(q->filter_list);
		err = tc_classify(skb, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
		switch (err) {
		case TC_ACT_STOLEN:
		case TC_ACT_QUEUED:
			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
		case TC_ACT_SHOT:
			return NULL;
		}
#endif
		if (!fl || err < 0) {
			if (TC_H_MAJ(band))
				band = 0;
			return q->queues[q->prio2band[band & TC_PRIO_MAX]];
		}
		band = res.classid;
	}
	band = TC_H_MIN(band) - 1;
	if (band >= q->bands)
		return q->queues[q->prio2band[0]];

	return q->queues[band];
}
Example #9
0
static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
			  struct tcf_result *res)
{
	struct tcf_tunnel_key *t = to_tunnel_key(a);
	struct tcf_tunnel_key_params *params;
	int action;

	params = rcu_dereference_bh(t->params);

	tcf_lastuse_update(&t->tcf_tm);
	bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
	action = READ_ONCE(t->tcf_action);

	switch (params->tcft_action) {
	case TCA_TUNNEL_KEY_ACT_RELEASE:
		skb_dst_drop(skb);
		break;
	case TCA_TUNNEL_KEY_ACT_SET:
		skb_dst_drop(skb);
		skb_dst_set(skb, dst_clone(&params->tcft_enc_metadata->dst));
		break;
	default:
		WARN_ONCE(1, "Bad tunnel_key action %d.\n",
			  params->tcft_action);
		break;
	}

	return action;
}
Example #10
0
int flush_bytes_array(void)
{
	struct bytes *tmp, *old;

	rcu_read_lock_bh();
	if (!(rcu_dereference_bh(bytes_to_skip)->array)) {
		log_info("Byte array list is empty nothing to flush");
		rcu_read_unlock_bh();
		return 0;
	}
	rcu_read_unlock_bh();

	tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
	if (!tmp) {
		log_err("Could not allocate struct bytes.");
		return -ENOMEM;
	}

	old = bytes_to_skip;
	*tmp = *bytes_to_skip;

	/* Delete. */
	tmp->array = NULL;
	tmp->count = 0;

	rcu_assign_pointer(bytes_to_skip, tmp);
	synchronize_rcu_bh();

	if (old->array)
		kfree(old->array);

	kfree(old);
	return 0;
}
Example #11
0
static int pcrypt_do_parallel(struct padata_priv *padata, unsigned int *cb_cpu,
			      struct padata_pcrypt *pcrypt)
{
	unsigned int cpu_index, cpu, i;
	struct pcrypt_cpumask *cpumask;

	cpu = *cb_cpu;

	rcu_read_lock_bh();
	cpumask = rcu_dereference_bh(pcrypt->cb_cpumask);
	if (cpumask_test_cpu(cpu, cpumask->mask))
			goto out;

	if (!cpumask_weight(cpumask->mask))
			goto out;

	cpu_index = cpu % cpumask_weight(cpumask->mask);

	cpu = cpumask_first(cpumask->mask);
	for (i = 0; i < cpu_index; i++)
		cpu = cpumask_next(cpu, cpumask->mask);

	*cb_cpu = cpu;

out:
	rcu_read_unlock_bh();
	return padata_do_parallel(pcrypt->pinst, padata, cpu);
}
Example #12
0
/*
 * Classify flow using either:
 *  1. pre-existing classification result in skb
 *  2. fast internal classification
 *  3. use TC filter based classification
 */
static bool choke_classify(struct sk_buff *skb,
			   struct Qdisc *sch, int *qerr)

{
	struct choke_sched_data *q = qdisc_priv(sch);
	struct tcf_result res;
	struct tcf_proto *fl;
	int result;

	fl = rcu_dereference_bh(q->filter_list);
	result = tc_classify(skb, fl, &res);
	if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
		switch (result) {
		case TC_ACT_STOLEN:
		case TC_ACT_QUEUED:
			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
		case TC_ACT_SHOT:
			return false;
		}
#endif
		choke_set_classid(skb, TC_H_MIN(res.classid));
		return true;
	}

	return false;
}
Example #13
0
static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
			   struct tcf_result *res)
{
	struct tcf_skbedit *d = to_skbedit(a);
	struct tcf_skbedit_params *params;
	int action;

	tcf_lastuse_update(&d->tcf_tm);
	bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);

	params = rcu_dereference_bh(d->params);
	action = READ_ONCE(d->tcf_action);

	if (params->flags & SKBEDIT_F_PRIORITY)
		skb->priority = params->priority;
	if (params->flags & SKBEDIT_F_INHERITDSFIELD) {
		int wlen = skb_network_offset(skb);

		switch (tc_skb_protocol(skb)) {
		case htons(ETH_P_IP):
			wlen += sizeof(struct iphdr);
			if (!pskb_may_pull(skb, wlen))
				goto err;
			skb->priority = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
			break;

		case htons(ETH_P_IPV6):
			wlen += sizeof(struct ipv6hdr);
			if (!pskb_may_pull(skb, wlen))
				goto err;
			skb->priority = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
			break;
		}
	}
Example #14
0
struct stack flow_table_get( flow_table_t *ft, struct flow_keys match, routing_table_t* routing_table, u32 dst_ip) {
    struct stack no_stack;
    struct stack new_stk;
    struct stack old_stk;
    unsigned long flags = 0;
    match_stk_t *curr = NULL;
    match_stk_t *prev = NULL;
    int index = 0;
    no_stack.num_tags = -1;
    index = flow_keys_hash(match) % ft->size;

    curr = rcu_dereference_bh(ft->table[index]);
    
    while(curr != NULL && !flow_key_equal(match, curr->match)) {
        pr_debug("FT: active non-matching flow\n");
        prev = curr;
        curr = rcu_dereference_bh(curr->next);
    }

    // flow_table returns "no_stack" on miss
    if(curr == NULL || !flow_key_equal(match, curr->match)) {
        pr_debug("FT: no matching flow\n");
        return no_stack;
    }

    // found a matching stack - check idle timeout before returning
    if(flow_idle_time(curr->last_used) > IDLE_TIMEOUT){
        pr_debug("FT: matched flow - timeout.. num_flows %d -> %d\n", 
                atomic_read(&ft->num_flows), atomic_read(&ft->num_flows) - 1);
        // if idle timed-out, remove flow entry and return no_stack
        new_stk = stack_dup(get_random_stack_for_dst(dst_ip, routing_table));
        old_stk = curr->stk;
        ft_lock(flags);
        curr->stk = new_stk;
        ft_unlock(flags);
        stack_free(old_stk);
    }

    pr_debug("FT: matched flow - updating last_used\n");
    // update the last_used value for successful match
    curr->last_used = jiffies;
    return curr->stk;
}
Example #15
0
/**
 * Use this function to safely obtain the configuration value which dictates whether Jool should
 * drop all informational ICMP packets that are traveling from IPv6 to IPv4.
 *
 * @return whether Jool should drop all ICMPv6 info packets.
 */
static bool filter_icmpv6_info(void)
{
	bool result;

	rcu_read_lock_bh();
	result = rcu_dereference_bh(config)->drop_icmp6_info;
	rcu_read_unlock_bh();

	return result;
}
Example #16
0
/**
 * Use this function to safely obtain the configuration value which dictates whether Jool should
 * be applying "address-dependent filtering" (Look that up in the RFC).
 *
 * @return whether Jool should apply "address-dependent filtering".
 */
static bool address_dependent_filtering(void)
{
	bool result;

	rcu_read_lock_bh();
	result = rcu_dereference_bh(config)->drop_by_addr;
	rcu_read_unlock_bh();

	return result;
}
Example #17
0
/**
 * Use this function to safely obtain the configuration value which dictates whether IPv4 nodes
 * should be allowed to initiate conversations with IPv6 nodes.
 *
 * @return whether IPv4 nodes should be allowed to initiate conversations with IPv6 nodes.
 */
static bool drop_external_connections(void)
{
	bool result;

	rcu_read_lock_bh();
	result = rcu_dereference_bh(config)->drop_external_tcp;
	rcu_read_unlock_bh();

	return result;
}
Example #18
0
static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
			 struct tcf_result *res)
{
	struct cls_mall_head *head = rcu_dereference_bh(tp->root);
	struct cls_mall_filter *f = head->filter;

	if (tc_skip_sw(f->flags))
		return -1;

	return tcf_exts_exec(skb, &f->exts, res);
}
Example #19
0
int sk_detach_filter(struct sock *sk)
{
	int ret = -ENOENT;
	struct sk_filter *filter;

	rcu_read_lock_bh();
	filter = rcu_dereference_bh(sk->sk_filter);
	if (filter) {
		rcu_assign_pointer(sk->sk_filter, NULL);
		sk_filter_delayed_uncharge(sk, filter);
		ret = 0;
	}
	rcu_read_unlock_bh();
	return ret;
}
Example #20
0
File: padata.c Project: Lyude/linux
/**
 * padata_do_parallel - padata parallelization function
 *
 * @pinst: padata instance
 * @padata: object to be parallelized
 * @cb_cpu: cpu the serialization callback function will run on,
 *          must be in the serial cpumask of padata(i.e. cpumask.cbcpu).
 *
 * The parallelization callback function will run with BHs off.
 * Note: Every object which is parallelized by padata_do_parallel
 * must be seen by padata_do_serial.
 */
int padata_do_parallel(struct padata_instance *pinst,
		       struct padata_priv *padata, int cb_cpu)
{
	int target_cpu, err;
	struct padata_parallel_queue *queue;
	struct parallel_data *pd;

	rcu_read_lock_bh();

	pd = rcu_dereference_bh(pinst->pd);

	err = -EINVAL;
	if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID)
		goto out;

	if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu))
		goto out;

	err =  -EBUSY;
	if ((pinst->flags & PADATA_RESET))
		goto out;

	if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM)
		goto out;

	err = 0;
	atomic_inc(&pd->refcnt);
	padata->pd = pd;
	padata->cb_cpu = cb_cpu;

	target_cpu = padata_cpu_hash(pd);
	padata->cpu = target_cpu;
	queue = per_cpu_ptr(pd->pqueue, target_cpu);

	spin_lock(&queue->parallel.lock);
	list_add_tail(&padata->list, &queue->parallel.list);
	spin_unlock(&queue->parallel.lock);

	queue_work_on(target_cpu, pinst->wq, &queue->work);

out:
	rcu_read_unlock_bh();

	return err;
}
Example #21
0
/**
 *	sk_filter - run a packet through a socket filter
 *	@sk: sock associated with &sk_buff
 *	@skb: buffer to filter
 *
 * Run the filter code and then cut skb->data to correct size returned by
 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
 * than pkt_len we keep whole skb->data. This is the socket level
 * wrapper to sk_run_filter. It returns 0 if the packet should
 * be accepted or -EPERM if the packet should be tossed.
 *
 */
int sk_filter(struct sock *sk, struct sk_buff *skb)
{
	int err;
	struct sk_filter *filter;

	err = security_sock_rcv_skb(sk, skb);
	if (err)
		return err;

	rcu_read_lock_bh();
	filter = rcu_dereference_bh(sk->sk_filter);
	if (filter) {
		unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
	}
	rcu_read_unlock_bh();

	return err;
}
Example #22
0
bool skb_compare(struct sk_buff *expected, struct sk_buff *actual)
{
	struct bytes *skip_byte;
	unsigned char *expected_ptr, *actual_ptr;
	unsigned int i, min_len, skip_count;
	int errors = 0;

	if (expected->len != actual->len) {
		print_error_table_hdr(errors);
		log_info("    Length\t%d\t    %d", expected->len, actual->len);
		errors++;
	}

	expected_ptr = skb_network_header(expected);
	actual_ptr = skb_network_header(actual);
	min_len = (expected->len < actual->len) ? expected->len : actual->len;

	rcu_read_lock_bh();
	skip_byte = rcu_dereference_bh(bytes_to_skip);
	skip_count = 0;

	for (i = 0; i < min_len; i++) {
		if (skip_count < skip_byte->count && skip_byte->array[skip_count] == i) {
			skip_count++;
			continue;
		}

		if (expected_ptr[i] != actual_ptr[i]) {
			print_error_table_hdr(errors);
			log_info("    byte %u\t0x%x\t    0x%x", i,
					expected_ptr[i], actual_ptr[i]);
			errors++;
			if (errors >= 8)
				break;
		}
	}

	rcu_read_unlock_bh();
	return !errors;
}
Example #23
0
static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
			  struct tcf_result *res)
{
	struct tcf_sample *s = to_sample(a);
	struct psample_group *psample_group;
	int retval;
	int size;
	int iif;
	int oif;

	tcf_lastuse_update(&s->tcf_tm);
	bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
	retval = READ_ONCE(s->tcf_action);

	psample_group = rcu_dereference_bh(s->psample_group);

	/* randomly sample packets according to rate */
	if (psample_group && (prandom_u32() % s->rate == 0)) {
		if (!skb_at_tc_ingress(skb)) {
			iif = skb->skb_iif;
			oif = skb->dev->ifindex;
		} else {
			iif = skb->dev->ifindex;
			oif = 0;
		}

		/* on ingress, the mac header gets popped, so push it back */
		if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev))
			skb_push(skb, skb->mac_len);

		size = s->truncate ? s->trunc_size : skb->len;
		psample_sample_packet(psample_group, skb, size, iif, oif,
				      s->rate);

		if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev))
			skb_pull(skb, skb->mac_len);
	}

	return retval;
}
Example #24
0
/**
 *	sk_attach_filter - attach a socket filter
 *	@fprog: the filter program
 *	@sk: the socket to use
 *
 * Attach the user's filter code. We first run some sanity checks on
 * it to make sure it does not explode on us later. If an error
 * occurs or there is insufficient memory for the filter a negative
 * errno code is returned. On success the return is zero.
 */
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
	struct sk_filter *fp, *old_fp;
	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
	int err;

	/* Make sure new filter is there and in the right amounts. */
	if (fprog->filter == NULL)
		return -EINVAL;

	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
	if (!fp)
		return -ENOMEM;
	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
		sock_kfree_s(sk, fp, fsize+sizeof(*fp));
		return -EFAULT;
	}

	atomic_set(&fp->refcnt, 1);
	fp->len = fprog->len;
	fp->bpf_func = sk_run_filter;

	err = sk_chk_filter(fp->insns, fp->len);
	if (err) {
		sk_filter_uncharge(sk, fp);
		return err;
	}

	bpf_jit_compile(fp);

	rcu_read_lock_bh();
	old_fp = rcu_dereference_bh(sk->sk_filter);
	rcu_assign_pointer(sk->sk_filter, fp);
	rcu_read_unlock_bh();

	if (old_fp)
		sk_filter_delayed_uncharge(sk, old_fp);
	return 0;
}
Example #25
0
/**
 * Returns in "result" the IPv4 address an ICMP error towards "out"'s
 * destination should be sourced with.
 */
static int get_rfc6791_address(struct packet *in, __u64 count, __be32 *result)
{
    struct list_head *list;
    struct list_head *node;
    struct pool_entry *entry = NULL;
    unsigned int addr_index;

    if (config_randomize_rfc6791_pool())
        get_random_bytes(&addr_index, sizeof(addr_index));
    else
        addr_index = pkt_ip6_hdr(in)->hop_limit;

    /* unsigned int % __u64 does something weird, hence the trouble. */
    if (count <= 0xFFFFFFFFU)
        addr_index %= (unsigned int) count;

    list = rcu_dereference_bh(pool);
    list_for_each_rcu_bh(node, list) {
        entry = list_entry(node, struct pool_entry, list_hook);
        count = prefix4_get_addr_count(&entry->prefix);
        if (count >= addr_index)
            break;
        addr_index -= count;
    }
Example #26
0
static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
				      int *qerr)
{
	struct fq_codel_sched_data *q = qdisc_priv(sch);
	struct tcf_proto *filter;
	struct tcf_result res;
	int result;

	if (TC_H_MAJ(skb->priority) == sch->handle &&
	    TC_H_MIN(skb->priority) > 0 &&
	    TC_H_MIN(skb->priority) <= q->flows_cnt)
		return TC_H_MIN(skb->priority);

	filter = rcu_dereference_bh(q->filter_list);
	if (!filter)
		return fq_codel_hash(q, skb) + 1;

	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
	result = tcf_classify(skb, filter, &res, false);
	if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
		switch (result) {
		case TC_ACT_STOLEN:
		case TC_ACT_QUEUED:
		case TC_ACT_TRAP:
			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
			/* fall through */
		case TC_ACT_SHOT:
			return 0;
		}
#endif
		if (TC_H_MIN(res.classid) <= q->flows_cnt)
			return TC_H_MIN(res.classid);
	}
	return 0;
}
Example #27
0
static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{

	struct sfb_sched_data *q = qdisc_priv(sch);
	struct Qdisc *child = q->qdisc;
	struct tcf_proto *fl;
	int i;
	u32 p_min = ~0;
	u32 minqlen = ~0;
	u32 r, sfbhash;
	u32 slot = q->slot;
	int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;

	if (unlikely(sch->q.qlen >= q->limit)) {
		qdisc_qstats_overlimit(sch);
		q->stats.queuedrop++;
		goto drop;
	}

	if (q->rehash_interval > 0) {
		unsigned long limit = q->rehash_time + q->rehash_interval;

		if (unlikely(time_after(jiffies, limit))) {
			sfb_swap_slot(q);
			q->rehash_time = jiffies;
		} else if (unlikely(!q->double_buffering && q->warmup_time > 0 &&
				    time_after(jiffies, limit - q->warmup_time))) {
			q->double_buffering = true;
		}
	}

	fl = rcu_dereference_bh(q->filter_list);
	if (fl) {
		u32 salt;

		/* If using external classifiers, get result and record it. */
		if (!sfb_classify(skb, fl, &ret, &salt))
			goto other_drop;
		sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
	} else {
		sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation);
	}


	if (!sfbhash)
		sfbhash = 1;
	sfb_skb_cb(skb)->hashes[slot] = sfbhash;

	for (i = 0; i < SFB_LEVELS; i++) {
		u32 hash = sfbhash & SFB_BUCKET_MASK;
		struct sfb_bucket *b = &q->bins[slot].bins[i][hash];

		sfbhash >>= SFB_BUCKET_SHIFT;
		if (b->qlen == 0)
			decrement_prob(b, q);
		else if (b->qlen >= q->bin_size)
			increment_prob(b, q);
		if (minqlen > b->qlen)
			minqlen = b->qlen;
		if (p_min > b->p_mark)
			p_min = b->p_mark;
	}

	slot ^= 1;
	sfb_skb_cb(skb)->hashes[slot] = 0;

	if (unlikely(minqlen >= q->max)) {
		qdisc_qstats_overlimit(sch);
		q->stats.bucketdrop++;
		goto drop;
	}

	if (unlikely(p_min >= SFB_MAX_PROB)) {
		/* Inelastic flow */
		if (q->double_buffering) {
			sfbhash = skb_get_hash_perturb(skb,
			    q->bins[slot].perturbation);
			if (!sfbhash)
				sfbhash = 1;
			sfb_skb_cb(skb)->hashes[slot] = sfbhash;

			for (i = 0; i < SFB_LEVELS; i++) {
				u32 hash = sfbhash & SFB_BUCKET_MASK;
				struct sfb_bucket *b = &q->bins[slot].bins[i][hash];

				sfbhash >>= SFB_BUCKET_SHIFT;
				if (b->qlen == 0)
					decrement_prob(b, q);
				else if (b->qlen >= q->bin_size)
					increment_prob(b, q);
			}
		}
		if (sfb_rate_limit(skb, q)) {
			qdisc_qstats_overlimit(sch);
			q->stats.penaltydrop++;
			goto drop;
		}
		goto enqueue;
	}
Example #28
0
void flow_table_set( flow_table_t *ft, struct flow_keys match,
        struct stack orig_stk ) {
    unsigned long flags = 0;
    int index = 0;
    match_stk_t *new_match_stk = NULL;
    match_stk_t *curr = NULL;
    match_stk_t *prev = NULL;
    match_stk_t *tmp = NULL;
    match_stk_t *to_free = NULL;

    // keep separate copies of stack in routing and flow tables
    struct stack stk = stack_dup(orig_stk);

    new_match_stk = flow_table_new_match_stk(match, stk);
    if(new_match_stk == NULL){
        stack_free(stk);
        return;
    }
    index = flow_keys_hash(match) % ft->size;

    pr_debug("FT: setting flow entry %d ... num_flows: %d -> %d\n", index, 
            atomic_read(&ft->num_flows), atomic_read(&ft->num_flows)+1);

    ft_lock(flags);
    curr = rcu_dereference_bh(ft->table[index]);

    while(curr != NULL && !flow_key_equal(match, curr->match)) {
        // ---
        if(flow_idle_time(curr->last_used) > IDLE_TIMEOUT && 0){
            // if idle timed-out, remove flow entry
            pr_debug("FT: non-matching flow timeout remove it %d -> %d flows\n",
                    atomic_read(&ft->num_flows), 
                    atomic_read(&ft->num_flows)-1);
            if(prev == NULL) { 
                // at the beginning of bucket
                tmp = curr->next;
                rcu_assign_pointer(ft->table[index], tmp); 
                curr->next = to_free;
                to_free = curr;
                curr = rcu_dereference_bh(ft->table[index]);
            }
            else { 
                // otherwise
                prev->next = curr->next;
                curr->next = to_free;
                to_free = curr;
                curr = rcu_dereference_bh(prev->next);
            }
            atomic_dec(&ft->num_flows);
        }
        else {
            // Else advance to next entry in bucket
            pr_debug("FT: active non-matching flow\n");
            prev = curr;
            curr = rcu_dereference_bh(curr->next);
        }
    }

    if(curr != NULL && flow_key_equal(match, curr->match)) {
        curr->stk = stk;
        free_match_stk_entry(new_match_stk);
    }
    else {
        tmp = rcu_dereference_bh(ft->table[index]);
        if(curr == tmp) {
          pr_debug("FT: creating new bucket entry at %d\n", index);
          new_match_stk->next = curr;
          rcu_assign_pointer(ft->table[index], new_match_stk);
        } 
        else if (curr == NULL) {
          pr_debug("FT: appending bucket entry at %d\n", index);
          rcu_assign_pointer(prev->next, new_match_stk);
        } 
        else {
          pr_debug("FT: inserting bucket entry at %d\n", index);
          new_match_stk->next = curr;
          rcu_assign_pointer(prev->next, new_match_stk);
        }
       atomic_inc(&ft->num_flows);
    }
    ft_unlock(flags);

}
Example #29
0
kni_sock_rcvmsg(struct socket *sock,
	   struct msghdr *m, size_t len, int flags)
#endif /* HAVE_KIOCB_MSG_PARAM */
{
	int vnet_hdr_len = 0;
	int pkt_len = 0;
	struct kni_vhost_queue *q =
		container_of(sock->sk, struct kni_vhost_queue, sk);
	static struct virtio_net_hdr
		__attribute__ ((unused)) vnet_hdr = {
		.flags = 0,
		.gso_type = VIRTIO_NET_HDR_GSO_NONE
	};

	if (unlikely(q == NULL || q->kni == NULL))
		return 0;

#ifdef RTE_KNI_VHOST_VNET_HDR_EN
	if (likely(q->flags & IFF_VNET_HDR)) {
		vnet_hdr_len = q->vnet_hdr_sz;
		if ((len -= vnet_hdr_len) < 0)
			return -EINVAL;
	}
#endif

	if (unlikely(0 == (pkt_len = kni_vhost_net_rx(q->kni,
		m, vnet_hdr_len, len))))
		return 0;

#ifdef RTE_KNI_VHOST_VNET_HDR_EN
	/* no need to copy hdr when no pkt received */
#ifdef HAVE_IOV_ITER_MSGHDR
	if (unlikely(copy_to_iter((void *)&vnet_hdr, vnet_hdr_len,
		&m->msg_iter)))
#else
	if (unlikely(memcpy_toiovecend(m->msg_iov,
		(void *)&vnet_hdr, 0, vnet_hdr_len)))
#endif /* HAVE_IOV_ITER_MSGHDR */
		return -EFAULT;
#endif /* RTE_KNI_VHOST_VNET_HDR_EN */
	KNI_DBG_RX("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n",
		   (unsigned long)len, q->flags, pkt_len);

	return (pkt_len + vnet_hdr_len);
}

/* dummy tap like ioctl */
static int
kni_sock_ioctl(struct socket *sock, unsigned int cmd,
	      unsigned long arg)
{
	void __user *argp = (void __user *)arg;
	struct ifreq __user *ifr = argp;
	unsigned int __user *up = argp;
	struct kni_vhost_queue *q =
		container_of(sock->sk, struct kni_vhost_queue, sk);
	struct kni_dev *kni;
	unsigned int u;
	int __user *sp = argp;
	int s;
	int ret;

	KNI_DBG("tap ioctl cmd 0x%08x\n", cmd);

	switch (cmd) {
	case TUNSETIFF:
		KNI_DBG("TUNSETIFF\n");
		/* ignore the name, just look at flags */
		if (get_user(u, &ifr->ifr_flags))
			return -EFAULT;

		ret = 0;
		if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP))
			ret = -EINVAL;
		else
			q->flags = u;

		return ret;

	case TUNGETIFF:
		KNI_DBG("TUNGETIFF\n");
		rcu_read_lock_bh();
		kni = rcu_dereference_bh(q->kni);
		if (kni)
			dev_hold(kni->net_dev);
		rcu_read_unlock_bh();

		if (!kni)
			return -ENOLINK;

		ret = 0;
		if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) ||
		    put_user(q->flags, &ifr->ifr_flags))
			ret = -EFAULT;
		dev_put(kni->net_dev);
		return ret;

	case TUNGETFEATURES:
		KNI_DBG("TUNGETFEATURES\n");
		u = IFF_TAP | IFF_NO_PI;
#ifdef RTE_KNI_VHOST_VNET_HDR_EN
		u |= IFF_VNET_HDR;
#endif
		if (put_user(u, up))
			return -EFAULT;
		return 0;

	case TUNSETSNDBUF:
		KNI_DBG("TUNSETSNDBUF\n");
		if (get_user(u, up))
			return -EFAULT;

		q->sk.sk_sndbuf = u;
		return 0;

	case TUNGETVNETHDRSZ:
		s = q->vnet_hdr_sz;
		if (put_user(s, sp))
			return -EFAULT;
		KNI_DBG("TUNGETVNETHDRSZ %d\n", s);
		return 0;

	case TUNSETVNETHDRSZ:
		if (get_user(s, sp))
			return -EFAULT;
		if (s < (int)sizeof(struct virtio_net_hdr))
			return -EINVAL;

		KNI_DBG("TUNSETVNETHDRSZ %d\n", s);
		q->vnet_hdr_sz = s;
		return 0;

	case TUNSETOFFLOAD:
		KNI_DBG("TUNSETOFFLOAD %lx\n", arg);
#ifdef RTE_KNI_VHOST_VNET_HDR_EN
		/* not support any offload yet */
		if (!(q->flags & IFF_VNET_HDR))
			return  -EINVAL;

		return 0;
#else
		return -EINVAL;
#endif

	default:
		KNI_DBG("NOT SUPPORT\n");
		return -EINVAL;
	}
}