void flow_table_rem(flow_table_t *ft, struct flow_keys match) { int index = 0; unsigned long flags = 0; match_stk_t *curr = NULL; match_stk_t *last = NULL; index = flow_keys_hash(match) % ft->size; curr = rcu_dereference_bh(ft->table[index]); pr_debug("FT: removing bucket entry at %d\n", index); while(curr != NULL && !flow_key_equal(match, curr->match)) { last = curr; curr = rcu_dereference_bh(curr->next); } if(curr != NULL && flow_key_equal(match, curr->match)) { // entry exists ft_lock(flags); if(curr == ft->table[index]){ // if at the beginning of bucket rcu_assign_pointer(ft->table[index], curr->next); } else { // otherwise rcu_assign_pointer(last->next, curr->next); } ft_unlock(flags); // free both stk and entry free_match_stk_entry(curr); atomic_dec(&ft->num_flows); } // else entry doesn't exist }
/** * Copies this module's current configuration to "clone". * * @param[out] clone a copy of the current config will be placed here. Must be already allocated. * @return zero on success, nonzero on failure. */ int filtering_clone_config(struct filtering_config *clone) { rcu_read_lock_bh(); *clone = *rcu_dereference_bh(config); rcu_read_unlock_bh(); return 0; }
static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { struct sfq_sched_data *q = qdisc_priv(sch); struct tcf_result res; struct tcf_proto *fl; int result; if (TC_H_MAJ(skb->priority) == sch->handle && TC_H_MIN(skb->priority) > 0 && TC_H_MIN(skb->priority) <= q->divisor) return TC_H_MIN(skb->priority); fl = rcu_dereference_bh(q->filter_list); if (!fl) return sfq_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; result = tc_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return 0; } #endif if (TC_H_MIN(res.classid) <= q->divisor) return TC_H_MIN(res.classid); } return 0; }
static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct ingress_qdisc_data *p = qdisc_priv(sch); struct tcf_result res; struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); int result; result = tc_classify(skb, fl, &res); qdisc_bstats_update(sch, skb); switch (result) { case TC_ACT_SHOT: result = TC_ACT_SHOT; qdisc_qstats_drop(sch); break; case TC_ACT_STOLEN: case TC_ACT_QUEUED: result = TC_ACT_STOLEN; break; case TC_ACT_RECLASSIFY: case TC_ACT_OK: skb->tc_index = TC_H_MIN(res.classid); default: result = TC_ACT_OK; break; } return result; }
static struct Qdisc * multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { struct multiq_sched_data *q = qdisc_priv(sch); u32 band; struct tcf_result res; struct tcf_proto *fl = rcu_dereference_bh(q->filter_list); int err; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; err = tcf_classify(skb, fl, &res, false); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; } #endif band = skb_get_queue_mapping(skb); if (band >= q->bands) return q->queues[0]; return q->queues[band]; }
static void tcf_action_goto_chain_exec(const struct tc_action *a, struct tcf_result *res) { const struct tcf_chain *chain = a->goto_chain; res->goto_tp = rcu_dereference_bh(chain->filter_chain); }
/* Hash to port mapping select tx port */ static struct team_port *lb_htpm_select_tx_port(struct team *team, struct lb_priv *lb_priv, struct sk_buff *skb, unsigned char hash) { return rcu_dereference_bh(LB_HTPM_PORT_BY_HASH(lb_priv, hash)); }
static struct Qdisc * prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { struct prio_sched_data *q = qdisc_priv(sch); u32 band = skb->priority; struct tcf_result res; struct tcf_proto *fl; int err; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; if (TC_H_MAJ(skb->priority) != sch->handle) { fl = rcu_dereference_bh(q->filter_list); err = tc_classify(skb, fl, &res, false); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; } #endif if (!fl || err < 0) { if (TC_H_MAJ(band)) band = 0; return q->queues[q->prio2band[band & TC_PRIO_MAX]]; } band = res.classid; } band = TC_H_MIN(band) - 1; if (band >= q->bands) return q->queues[q->prio2band[0]]; return q->queues[band]; }
static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params; int action; params = rcu_dereference_bh(t->params); tcf_lastuse_update(&t->tcf_tm); bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb); action = READ_ONCE(t->tcf_action); switch (params->tcft_action) { case TCA_TUNNEL_KEY_ACT_RELEASE: skb_dst_drop(skb); break; case TCA_TUNNEL_KEY_ACT_SET: skb_dst_drop(skb); skb_dst_set(skb, dst_clone(¶ms->tcft_enc_metadata->dst)); break; default: WARN_ONCE(1, "Bad tunnel_key action %d.\n", params->tcft_action); break; } return action; }
int flush_bytes_array(void) { struct bytes *tmp, *old; rcu_read_lock_bh(); if (!(rcu_dereference_bh(bytes_to_skip)->array)) { log_info("Byte array list is empty nothing to flush"); rcu_read_unlock_bh(); return 0; } rcu_read_unlock_bh(); tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); if (!tmp) { log_err("Could not allocate struct bytes."); return -ENOMEM; } old = bytes_to_skip; *tmp = *bytes_to_skip; /* Delete. */ tmp->array = NULL; tmp->count = 0; rcu_assign_pointer(bytes_to_skip, tmp); synchronize_rcu_bh(); if (old->array) kfree(old->array); kfree(old); return 0; }
static int pcrypt_do_parallel(struct padata_priv *padata, unsigned int *cb_cpu, struct padata_pcrypt *pcrypt) { unsigned int cpu_index, cpu, i; struct pcrypt_cpumask *cpumask; cpu = *cb_cpu; rcu_read_lock_bh(); cpumask = rcu_dereference_bh(pcrypt->cb_cpumask); if (cpumask_test_cpu(cpu, cpumask->mask)) goto out; if (!cpumask_weight(cpumask->mask)) goto out; cpu_index = cpu % cpumask_weight(cpumask->mask); cpu = cpumask_first(cpumask->mask); for (i = 0; i < cpu_index; i++) cpu = cpumask_next(cpu, cpumask->mask); *cb_cpu = cpu; out: rcu_read_unlock_bh(); return padata_do_parallel(pcrypt->pinst, padata, cpu); }
/* * Classify flow using either: * 1. pre-existing classification result in skb * 2. fast internal classification * 3. use TC filter based classification */ static bool choke_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { struct choke_sched_data *q = qdisc_priv(sch); struct tcf_result res; struct tcf_proto *fl; int result; fl = rcu_dereference_bh(q->filter_list); result = tc_classify(skb, fl, &res); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return false; } #endif choke_set_classid(skb, TC_H_MIN(res.classid)); return true; } return false; }
static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_skbedit *d = to_skbedit(a); struct tcf_skbedit_params *params; int action; tcf_lastuse_update(&d->tcf_tm); bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); params = rcu_dereference_bh(d->params); action = READ_ONCE(d->tcf_action); if (params->flags & SKBEDIT_F_PRIORITY) skb->priority = params->priority; if (params->flags & SKBEDIT_F_INHERITDSFIELD) { int wlen = skb_network_offset(skb); switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): wlen += sizeof(struct iphdr); if (!pskb_may_pull(skb, wlen)) goto err; skb->priority = ipv4_get_dsfield(ip_hdr(skb)) >> 2; break; case htons(ETH_P_IPV6): wlen += sizeof(struct ipv6hdr); if (!pskb_may_pull(skb, wlen)) goto err; skb->priority = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; break; } }
struct stack flow_table_get( flow_table_t *ft, struct flow_keys match, routing_table_t* routing_table, u32 dst_ip) { struct stack no_stack; struct stack new_stk; struct stack old_stk; unsigned long flags = 0; match_stk_t *curr = NULL; match_stk_t *prev = NULL; int index = 0; no_stack.num_tags = -1; index = flow_keys_hash(match) % ft->size; curr = rcu_dereference_bh(ft->table[index]); while(curr != NULL && !flow_key_equal(match, curr->match)) { pr_debug("FT: active non-matching flow\n"); prev = curr; curr = rcu_dereference_bh(curr->next); } // flow_table returns "no_stack" on miss if(curr == NULL || !flow_key_equal(match, curr->match)) { pr_debug("FT: no matching flow\n"); return no_stack; } // found a matching stack - check idle timeout before returning if(flow_idle_time(curr->last_used) > IDLE_TIMEOUT){ pr_debug("FT: matched flow - timeout.. num_flows %d -> %d\n", atomic_read(&ft->num_flows), atomic_read(&ft->num_flows) - 1); // if idle timed-out, remove flow entry and return no_stack new_stk = stack_dup(get_random_stack_for_dst(dst_ip, routing_table)); old_stk = curr->stk; ft_lock(flags); curr->stk = new_stk; ft_unlock(flags); stack_free(old_stk); } pr_debug("FT: matched flow - updating last_used\n"); // update the last_used value for successful match curr->last_used = jiffies; return curr->stk; }
/** * Use this function to safely obtain the configuration value which dictates whether Jool should * drop all informational ICMP packets that are traveling from IPv6 to IPv4. * * @return whether Jool should drop all ICMPv6 info packets. */ static bool filter_icmpv6_info(void) { bool result; rcu_read_lock_bh(); result = rcu_dereference_bh(config)->drop_icmp6_info; rcu_read_unlock_bh(); return result; }
/** * Use this function to safely obtain the configuration value which dictates whether Jool should * be applying "address-dependent filtering" (Look that up in the RFC). * * @return whether Jool should apply "address-dependent filtering". */ static bool address_dependent_filtering(void) { bool result; rcu_read_lock_bh(); result = rcu_dereference_bh(config)->drop_by_addr; rcu_read_unlock_bh(); return result; }
/** * Use this function to safely obtain the configuration value which dictates whether IPv4 nodes * should be allowed to initiate conversations with IPv6 nodes. * * @return whether IPv4 nodes should be allowed to initiate conversations with IPv6 nodes. */ static bool drop_external_connections(void) { bool result; rcu_read_lock_bh(); result = rcu_dereference_bh(config)->drop_external_tcp; rcu_read_unlock_bh(); return result; }
static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct cls_mall_head *head = rcu_dereference_bh(tp->root); struct cls_mall_filter *f = head->filter; if (tc_skip_sw(f->flags)) return -1; return tcf_exts_exec(skb, &f->exts, res); }
int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; struct sk_filter *filter; rcu_read_lock_bh(); filter = rcu_dereference_bh(sk->sk_filter); if (filter) { rcu_assign_pointer(sk->sk_filter, NULL); sk_filter_delayed_uncharge(sk, filter); ret = 0; } rcu_read_unlock_bh(); return ret; }
/** * padata_do_parallel - padata parallelization function * * @pinst: padata instance * @padata: object to be parallelized * @cb_cpu: cpu the serialization callback function will run on, * must be in the serial cpumask of padata(i.e. cpumask.cbcpu). * * The parallelization callback function will run with BHs off. * Note: Every object which is parallelized by padata_do_parallel * must be seen by padata_do_serial. */ int padata_do_parallel(struct padata_instance *pinst, struct padata_priv *padata, int cb_cpu) { int target_cpu, err; struct padata_parallel_queue *queue; struct parallel_data *pd; rcu_read_lock_bh(); pd = rcu_dereference_bh(pinst->pd); err = -EINVAL; if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID) goto out; if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu)) goto out; err = -EBUSY; if ((pinst->flags & PADATA_RESET)) goto out; if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM) goto out; err = 0; atomic_inc(&pd->refcnt); padata->pd = pd; padata->cb_cpu = cb_cpu; target_cpu = padata_cpu_hash(pd); padata->cpu = target_cpu; queue = per_cpu_ptr(pd->pqueue, target_cpu); spin_lock(&queue->parallel.lock); list_add_tail(&padata->list, &queue->parallel.list); spin_unlock(&queue->parallel.lock); queue_work_on(target_cpu, pinst->wq, &queue->work); out: rcu_read_unlock_bh(); return err; }
/** * sk_filter - run a packet through a socket filter * @sk: sock associated with &sk_buff * @skb: buffer to filter * * Run the filter code and then cut skb->data to correct size returned by * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller * than pkt_len we keep whole skb->data. This is the socket level * wrapper to sk_run_filter. It returns 0 if the packet should * be accepted or -EPERM if the packet should be tossed. * */ int sk_filter(struct sock *sk, struct sk_buff *skb) { int err; struct sk_filter *filter; err = security_sock_rcv_skb(sk, skb); if (err) return err; rcu_read_lock_bh(); filter = rcu_dereference_bh(sk->sk_filter); if (filter) { unsigned int pkt_len = SK_RUN_FILTER(filter, skb); err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } rcu_read_unlock_bh(); return err; }
bool skb_compare(struct sk_buff *expected, struct sk_buff *actual) { struct bytes *skip_byte; unsigned char *expected_ptr, *actual_ptr; unsigned int i, min_len, skip_count; int errors = 0; if (expected->len != actual->len) { print_error_table_hdr(errors); log_info(" Length\t%d\t %d", expected->len, actual->len); errors++; } expected_ptr = skb_network_header(expected); actual_ptr = skb_network_header(actual); min_len = (expected->len < actual->len) ? expected->len : actual->len; rcu_read_lock_bh(); skip_byte = rcu_dereference_bh(bytes_to_skip); skip_count = 0; for (i = 0; i < min_len; i++) { if (skip_count < skip_byte->count && skip_byte->array[skip_count] == i) { skip_count++; continue; } if (expected_ptr[i] != actual_ptr[i]) { print_error_table_hdr(errors); log_info(" byte %u\t0x%x\t 0x%x", i, expected_ptr[i], actual_ptr[i]); errors++; if (errors >= 8) break; } } rcu_read_unlock_bh(); return !errors; }
static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_sample *s = to_sample(a); struct psample_group *psample_group; int retval; int size; int iif; int oif; tcf_lastuse_update(&s->tcf_tm); bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb); retval = READ_ONCE(s->tcf_action); psample_group = rcu_dereference_bh(s->psample_group); /* randomly sample packets according to rate */ if (psample_group && (prandom_u32() % s->rate == 0)) { if (!skb_at_tc_ingress(skb)) { iif = skb->skb_iif; oif = skb->dev->ifindex; } else { iif = skb->dev->ifindex; oif = 0; } /* on ingress, the mac header gets popped, so push it back */ if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev)) skb_push(skb, skb->mac_len); size = s->truncate ? s->trunc_size : skb->len; psample_sample_packet(psample_group, skb, size, iif, oif, s->rate); if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev)) skb_pull(skb, skb->mac_len); } return retval; }
/** * sk_attach_filter - attach a socket filter * @fprog: the filter program * @sk: the socket to use * * Attach the user's filter code. We first run some sanity checks on * it to make sure it does not explode on us later. If an error * occurs or there is insufficient memory for the filter a negative * errno code is returned. On success the return is zero. */ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { struct sk_filter *fp, *old_fp; unsigned int fsize = sizeof(struct sock_filter) * fprog->len; int err; /* Make sure new filter is there and in the right amounts. */ if (fprog->filter == NULL) return -EINVAL; fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL); if (!fp) return -ENOMEM; if (copy_from_user(fp->insns, fprog->filter, fsize)) { sock_kfree_s(sk, fp, fsize+sizeof(*fp)); return -EFAULT; } atomic_set(&fp->refcnt, 1); fp->len = fprog->len; fp->bpf_func = sk_run_filter; err = sk_chk_filter(fp->insns, fp->len); if (err) { sk_filter_uncharge(sk, fp); return err; } bpf_jit_compile(fp); rcu_read_lock_bh(); old_fp = rcu_dereference_bh(sk->sk_filter); rcu_assign_pointer(sk->sk_filter, fp); rcu_read_unlock_bh(); if (old_fp) sk_filter_delayed_uncharge(sk, old_fp); return 0; }
/** * Returns in "result" the IPv4 address an ICMP error towards "out"'s * destination should be sourced with. */ static int get_rfc6791_address(struct packet *in, __u64 count, __be32 *result) { struct list_head *list; struct list_head *node; struct pool_entry *entry = NULL; unsigned int addr_index; if (config_randomize_rfc6791_pool()) get_random_bytes(&addr_index, sizeof(addr_index)); else addr_index = pkt_ip6_hdr(in)->hop_limit; /* unsigned int % __u64 does something weird, hence the trouble. */ if (count <= 0xFFFFFFFFU) addr_index %= (unsigned int) count; list = rcu_dereference_bh(pool); list_for_each_rcu_bh(node, list) { entry = list_entry(node, struct pool_entry, list_hook); count = prefix4_get_addr_count(&entry->prefix); if (count >= addr_index) break; addr_index -= count; }
static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { struct fq_codel_sched_data *q = qdisc_priv(sch); struct tcf_proto *filter; struct tcf_result res; int result; if (TC_H_MAJ(skb->priority) == sch->handle && TC_H_MIN(skb->priority) > 0 && TC_H_MIN(skb->priority) <= q->flows_cnt) return TC_H_MIN(skb->priority); filter = rcu_dereference_bh(q->filter_list); if (!filter) return fq_codel_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; result = tcf_classify(skb, filter, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; /* fall through */ case TC_ACT_SHOT: return 0; } #endif if (TC_H_MIN(res.classid) <= q->flows_cnt) return TC_H_MIN(res.classid); } return 0; }
static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct sfb_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; struct tcf_proto *fl; int i; u32 p_min = ~0; u32 minqlen = ~0; u32 r, sfbhash; u32 slot = q->slot; int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; if (unlikely(sch->q.qlen >= q->limit)) { qdisc_qstats_overlimit(sch); q->stats.queuedrop++; goto drop; } if (q->rehash_interval > 0) { unsigned long limit = q->rehash_time + q->rehash_interval; if (unlikely(time_after(jiffies, limit))) { sfb_swap_slot(q); q->rehash_time = jiffies; } else if (unlikely(!q->double_buffering && q->warmup_time > 0 && time_after(jiffies, limit - q->warmup_time))) { q->double_buffering = true; } } fl = rcu_dereference_bh(q->filter_list); if (fl) { u32 salt; /* If using external classifiers, get result and record it. */ if (!sfb_classify(skb, fl, &ret, &salt)) goto other_drop; sfbhash = jhash_1word(salt, q->bins[slot].perturbation); } else { sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation); } if (!sfbhash) sfbhash = 1; sfb_skb_cb(skb)->hashes[slot] = sfbhash; for (i = 0; i < SFB_LEVELS; i++) { u32 hash = sfbhash & SFB_BUCKET_MASK; struct sfb_bucket *b = &q->bins[slot].bins[i][hash]; sfbhash >>= SFB_BUCKET_SHIFT; if (b->qlen == 0) decrement_prob(b, q); else if (b->qlen >= q->bin_size) increment_prob(b, q); if (minqlen > b->qlen) minqlen = b->qlen; if (p_min > b->p_mark) p_min = b->p_mark; } slot ^= 1; sfb_skb_cb(skb)->hashes[slot] = 0; if (unlikely(minqlen >= q->max)) { qdisc_qstats_overlimit(sch); q->stats.bucketdrop++; goto drop; } if (unlikely(p_min >= SFB_MAX_PROB)) { /* Inelastic flow */ if (q->double_buffering) { sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation); if (!sfbhash) sfbhash = 1; sfb_skb_cb(skb)->hashes[slot] = sfbhash; for (i = 0; i < SFB_LEVELS; i++) { u32 hash = sfbhash & SFB_BUCKET_MASK; struct sfb_bucket *b = &q->bins[slot].bins[i][hash]; sfbhash >>= SFB_BUCKET_SHIFT; if (b->qlen == 0) decrement_prob(b, q); else if (b->qlen >= q->bin_size) increment_prob(b, q); } } if (sfb_rate_limit(skb, q)) { qdisc_qstats_overlimit(sch); q->stats.penaltydrop++; goto drop; } goto enqueue; }
void flow_table_set( flow_table_t *ft, struct flow_keys match, struct stack orig_stk ) { unsigned long flags = 0; int index = 0; match_stk_t *new_match_stk = NULL; match_stk_t *curr = NULL; match_stk_t *prev = NULL; match_stk_t *tmp = NULL; match_stk_t *to_free = NULL; // keep separate copies of stack in routing and flow tables struct stack stk = stack_dup(orig_stk); new_match_stk = flow_table_new_match_stk(match, stk); if(new_match_stk == NULL){ stack_free(stk); return; } index = flow_keys_hash(match) % ft->size; pr_debug("FT: setting flow entry %d ... num_flows: %d -> %d\n", index, atomic_read(&ft->num_flows), atomic_read(&ft->num_flows)+1); ft_lock(flags); curr = rcu_dereference_bh(ft->table[index]); while(curr != NULL && !flow_key_equal(match, curr->match)) { // --- if(flow_idle_time(curr->last_used) > IDLE_TIMEOUT && 0){ // if idle timed-out, remove flow entry pr_debug("FT: non-matching flow timeout remove it %d -> %d flows\n", atomic_read(&ft->num_flows), atomic_read(&ft->num_flows)-1); if(prev == NULL) { // at the beginning of bucket tmp = curr->next; rcu_assign_pointer(ft->table[index], tmp); curr->next = to_free; to_free = curr; curr = rcu_dereference_bh(ft->table[index]); } else { // otherwise prev->next = curr->next; curr->next = to_free; to_free = curr; curr = rcu_dereference_bh(prev->next); } atomic_dec(&ft->num_flows); } else { // Else advance to next entry in bucket pr_debug("FT: active non-matching flow\n"); prev = curr; curr = rcu_dereference_bh(curr->next); } } if(curr != NULL && flow_key_equal(match, curr->match)) { curr->stk = stk; free_match_stk_entry(new_match_stk); } else { tmp = rcu_dereference_bh(ft->table[index]); if(curr == tmp) { pr_debug("FT: creating new bucket entry at %d\n", index); new_match_stk->next = curr; rcu_assign_pointer(ft->table[index], new_match_stk); } else if (curr == NULL) { pr_debug("FT: appending bucket entry at %d\n", index); rcu_assign_pointer(prev->next, new_match_stk); } else { pr_debug("FT: inserting bucket entry at %d\n", index); new_match_stk->next = curr; rcu_assign_pointer(prev->next, new_match_stk); } atomic_inc(&ft->num_flows); } ft_unlock(flags); }
kni_sock_rcvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) #endif /* HAVE_KIOCB_MSG_PARAM */ { int vnet_hdr_len = 0; int pkt_len = 0; struct kni_vhost_queue *q = container_of(sock->sk, struct kni_vhost_queue, sk); static struct virtio_net_hdr __attribute__ ((unused)) vnet_hdr = { .flags = 0, .gso_type = VIRTIO_NET_HDR_GSO_NONE }; if (unlikely(q == NULL || q->kni == NULL)) return 0; #ifdef RTE_KNI_VHOST_VNET_HDR_EN if (likely(q->flags & IFF_VNET_HDR)) { vnet_hdr_len = q->vnet_hdr_sz; if ((len -= vnet_hdr_len) < 0) return -EINVAL; } #endif if (unlikely(0 == (pkt_len = kni_vhost_net_rx(q->kni, m, vnet_hdr_len, len)))) return 0; #ifdef RTE_KNI_VHOST_VNET_HDR_EN /* no need to copy hdr when no pkt received */ #ifdef HAVE_IOV_ITER_MSGHDR if (unlikely(copy_to_iter((void *)&vnet_hdr, vnet_hdr_len, &m->msg_iter))) #else if (unlikely(memcpy_toiovecend(m->msg_iov, (void *)&vnet_hdr, 0, vnet_hdr_len))) #endif /* HAVE_IOV_ITER_MSGHDR */ return -EFAULT; #endif /* RTE_KNI_VHOST_VNET_HDR_EN */ KNI_DBG_RX("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n", (unsigned long)len, q->flags, pkt_len); return (pkt_len + vnet_hdr_len); } /* dummy tap like ioctl */ static int kni_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct ifreq __user *ifr = argp; unsigned int __user *up = argp; struct kni_vhost_queue *q = container_of(sock->sk, struct kni_vhost_queue, sk); struct kni_dev *kni; unsigned int u; int __user *sp = argp; int s; int ret; KNI_DBG("tap ioctl cmd 0x%08x\n", cmd); switch (cmd) { case TUNSETIFF: KNI_DBG("TUNSETIFF\n"); /* ignore the name, just look at flags */ if (get_user(u, &ifr->ifr_flags)) return -EFAULT; ret = 0; if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP)) ret = -EINVAL; else q->flags = u; return ret; case TUNGETIFF: KNI_DBG("TUNGETIFF\n"); rcu_read_lock_bh(); kni = rcu_dereference_bh(q->kni); if (kni) dev_hold(kni->net_dev); rcu_read_unlock_bh(); if (!kni) return -ENOLINK; ret = 0; if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) || put_user(q->flags, &ifr->ifr_flags)) ret = -EFAULT; dev_put(kni->net_dev); return ret; case TUNGETFEATURES: KNI_DBG("TUNGETFEATURES\n"); u = IFF_TAP | IFF_NO_PI; #ifdef RTE_KNI_VHOST_VNET_HDR_EN u |= IFF_VNET_HDR; #endif if (put_user(u, up)) return -EFAULT; return 0; case TUNSETSNDBUF: KNI_DBG("TUNSETSNDBUF\n"); if (get_user(u, up)) return -EFAULT; q->sk.sk_sndbuf = u; return 0; case TUNGETVNETHDRSZ: s = q->vnet_hdr_sz; if (put_user(s, sp)) return -EFAULT; KNI_DBG("TUNGETVNETHDRSZ %d\n", s); return 0; case TUNSETVNETHDRSZ: if (get_user(s, sp)) return -EFAULT; if (s < (int)sizeof(struct virtio_net_hdr)) return -EINVAL; KNI_DBG("TUNSETVNETHDRSZ %d\n", s); q->vnet_hdr_sz = s; return 0; case TUNSETOFFLOAD: KNI_DBG("TUNSETOFFLOAD %lx\n", arg); #ifdef RTE_KNI_VHOST_VNET_HDR_EN /* not support any offload yet */ if (!(q->flags & IFF_VNET_HDR)) return -EINVAL; return 0; #else return -EINVAL; #endif default: KNI_DBG("NOT SUPPORT\n"); return -EINVAL; } }