static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { const struct nf_conntrack_tuple_hash *thash; struct nf_conntrack_tuple tuple; enum ip_conntrack_info ctinfo; struct tcf_connmark_info *ca = a->priv; struct nf_conntrack_zone zone; struct nf_conn *c; int proto; spin_lock(&ca->tcf_lock); ca->tcf_tm.lastuse = jiffies; bstats_update(&ca->tcf_bstats, skb); if (skb->protocol == htons(ETH_P_IP)) { if (skb->len < sizeof(struct iphdr)) goto out; proto = NFPROTO_IPV4; } else if (skb->protocol == htons(ETH_P_IPV6)) { if (skb->len < sizeof(struct ipv6hdr)) goto out; proto = NFPROTO_IPV6; } else { goto out; } c = nf_ct_get(skb, &ctinfo); if (c) { skb->mark = c->mark; /* using overlimits stats to count how many packets marked */ ca->tcf_qstats.overlimits++; goto out; } if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, ca->net, &tuple)) goto out; zone.id = ca->zone; zone.dir = NF_CT_DEFAULT_ZONE_DIR; thash = nf_conntrack_find_get(ca->net, &zone, &tuple); if (!thash) goto out; c = nf_ct_tuplehash_to_ctrack(thash); /* using overlimits stats to count how many packets marked */ ca->tcf_qstats.overlimits++; skb->mark = c->mark; nf_ct_put(c); out: spin_unlock(&ca->tcf_lock); return ca->tcf_action; }
static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, struct tcf_result *res) { struct tcf_bpf *prog = act->priv; int action, filter_res; bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS; if (unlikely(!skb_mac_header_was_set(skb))) return TC_ACT_UNSPEC; spin_lock(&prog->tcf_lock); prog->tcf_tm.lastuse = jiffies; bstats_update(&prog->tcf_bstats, skb); /* Needed here for accessing maps. */ rcu_read_lock(); if (at_ingress) { __skb_push(skb, skb->mac_len); filter_res = BPF_PROG_RUN(prog->filter, skb); __skb_pull(skb, skb->mac_len); } else { filter_res = BPF_PROG_RUN(prog->filter, skb); } rcu_read_unlock(); /* A BPF program may overwrite the default action opcode. * Similarly as in cls_bpf, if filter_res == -1 we use the * default action specified from tc. * * In case a different well-known TC_ACT opcode has been * returned, it will overwrite the default one. * * For everything else that is unkown, TC_ACT_UNSPEC is * returned. */ switch (filter_res) { case TC_ACT_PIPE: case TC_ACT_RECLASSIFY: case TC_ACT_OK: action = filter_res; break; case TC_ACT_SHOT: action = filter_res; prog->tcf_qstats.drops++; break; case TC_ACT_UNSPEC: action = prog->tcf_action; break; default: action = TC_ACT_UNSPEC; break; } spin_unlock(&prog->tcf_lock); return action; }
static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { int ret = 0, result = 0; struct tcf_ipt *ipt = a->priv; struct xt_action_param par; if (skb_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) return TC_ACT_UNSPEC; } spin_lock(&ipt->tcf_lock); ipt->tcf_tm.lastuse = jiffies; bstats_update(&ipt->tcf_bstats, skb); /* yes, we have to worry about both in and out dev * worry later - danger - this API seems to have changed * from earlier kernels */ par.in = skb->dev; par.out = NULL; par.hooknum = ipt->tcfi_hook; par.target = ipt->tcfi_t->u.kernel.target; par.targinfo = ipt->tcfi_t->data; ret = par.target->target(skb, &par); switch (ret) { case NF_ACCEPT: result = TC_ACT_OK; break; case NF_DROP: result = TC_ACT_SHOT; ipt->tcf_qstats.drops++; break; case XT_CONTINUE: result = TC_ACT_PIPE; break; default: if (net_ratelimit()) pr_notice("tc filter: Bogus netfilter code" " %d assume ACCEPT\n", ret); result = TC_POLICE_OK; break; } spin_unlock(&ipt->tcf_lock); return result; }
static int tcf_simp(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_defact *d = to_defact(a); spin_lock(&d->tcf_lock); tcf_lastuse_update(&d->tcf_tm); bstats_update(&d->tcf_bstats, skb); /* print policy string followed by _ then packet count * Example if this was the 3rd packet and the string was "hello" * then it would look like "hello_3" (without quotes) */ pr_info("simple: %s_%d\n", (char *)d->tcfd_defdata, d->tcf_bstats.packets); spin_unlock(&d->tcf_lock); return d->tcf_action; }
static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_skbedit *d = a->priv; spin_lock(&d->tcf_lock); d->tcf_tm.lastuse = jiffies; bstats_update(&d->tcf_bstats, skb); if (d->flags & SKBEDIT_F_PRIORITY) skb->priority = d->priority; if (d->flags & SKBEDIT_F_QUEUE_MAPPING && skb->dev->real_num_tx_queues > d->queue_mapping) skb_set_queue_mapping(skb, d->queue_mapping); if (d->flags & SKBEDIT_F_MARK) skb->mark = d->mark; spin_unlock(&d->tcf_lock); return d->tcf_action; }
static int tcf_csum(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_csum *p = a->priv; int action; u32 update_flags; spin_lock(&p->tcf_lock); p->tcf_tm.lastuse = jiffies; bstats_update(&p->tcf_bstats, skb); action = p->tcf_action; update_flags = p->update_flags; spin_unlock(&p->tcf_lock); if (unlikely(action == TC_ACT_SHOT)) goto drop; switch (skb->protocol) { case cpu_to_be16(ETH_P_IP): if (!tcf_csum_ipv4(skb, update_flags)) goto drop; break; case cpu_to_be16(ETH_P_IPV6): if (!tcf_csum_ipv6(skb, update_flags)) goto drop; break; } return action; drop: spin_lock(&p->tcf_lock); p->tcf_qstats.drops++; spin_unlock(&p->tcf_lock); return TC_ACT_SHOT; }
static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_vlan *v = a->priv; int action; int err; spin_lock(&v->tcf_lock); v->tcf_tm.lastuse = jiffies; bstats_update(&v->tcf_bstats, skb); action = v->tcf_action; switch (v->tcfv_action) { case TCA_VLAN_ACT_POP: err = skb_vlan_pop(skb); if (err) goto drop; break; case TCA_VLAN_ACT_PUSH: err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid); if (err) goto drop; break; default: BUG(); } goto unlock; drop: action = TC_ACT_SHOT; v->tcf_qstats.drops++; unlock: spin_unlock(&v->tcf_lock); return action; }
static int tcf_nat(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { struct tcf_nat *p = a->priv; struct iphdr *iph; __be32 old_addr; __be32 new_addr; __be32 mask; __be32 addr; int egress; int action; int ihl; int noff; spin_lock(&p->tcf_lock); p->tcf_tm.lastuse = jiffies; old_addr = p->old_addr; new_addr = p->new_addr; mask = p->mask; egress = p->flags & TCA_NAT_FLAG_EGRESS; action = p->tcf_action; bstats_update(&p->tcf_bstats, skb); spin_unlock(&p->tcf_lock); if (unlikely(action == TC_ACT_SHOT)) goto drop; noff = skb_network_offset(skb); if (!pskb_may_pull(skb, sizeof(*iph) + noff)) goto drop; iph = ip_hdr(skb); if (egress) addr = iph->saddr; else addr = iph->daddr; if (!((old_addr ^ addr) & mask)) { if (skb_cloned(skb) && !skb_clone_writable(skb, sizeof(*iph) + noff) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) goto drop; new_addr &= mask; new_addr |= addr & ~mask; /* Rewrite IP header */ iph = ip_hdr(skb); if (egress) iph->saddr = new_addr; else iph->daddr = new_addr; csum_replace4(&iph->check, addr, new_addr); } else if ((iph->frag_off & htons(IP_OFFSET)) || iph->protocol != IPPROTO_ICMP) { goto out; } ihl = iph->ihl * 4; /* It would be nice to share code with stateful NAT. */ switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { case IPPROTO_TCP: { struct tcphdr *tcph; if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) || (skb_cloned(skb) && !skb_clone_writable(skb, ihl + sizeof(*tcph) + noff) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) goto drop; tcph = (void *)(skb_network_header(skb) + ihl); inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1); break; } case IPPROTO_UDP: { struct udphdr *udph; if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) || (skb_cloned(skb) && !skb_clone_writable(skb, ihl + sizeof(*udph) + noff) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) goto drop; udph = (void *)(skb_network_header(skb) + ihl); if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace4(&udph->check, skb, addr, new_addr, 1); if (!udph->check) udph->check = CSUM_MANGLED_0; } break; } case IPPROTO_ICMP: { struct icmphdr *icmph; if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + noff)) goto drop; icmph = (void *)(skb_network_header(skb) + ihl); if ((icmph->type != ICMP_DEST_UNREACH) && (icmph->type != ICMP_TIME_EXCEEDED) && (icmph->type != ICMP_PARAMETERPROB)) break; if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph) + noff)) goto drop; icmph = (void *)(skb_network_header(skb) + ihl); iph = (void *)(icmph + 1); if (egress) addr = iph->daddr; else addr = iph->saddr; if ((old_addr ^ addr) & mask) break; if (skb_cloned(skb) && !skb_clone_writable(skb, ihl + sizeof(*icmph) + sizeof(*iph) + noff) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) goto drop; icmph = (void *)(skb_network_header(skb) + ihl); iph = (void *)(icmph + 1); new_addr &= mask; new_addr |= addr & ~mask; /* XXX Fix up the inner checksums. */ if (egress) iph->daddr = new_addr; else iph->saddr = new_addr; inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, 0); break; } default: break; } out: return action; drop: spin_lock(&p->tcf_lock); p->tcf_qstats.drops++; spin_unlock(&p->tcf_lock); return TC_ACT_SHOT; }
static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_police *police = to_police(a); s64 now; s64 toks; s64 ptoks = 0; spin_lock(&police->tcf_lock); bstats_update(&police->tcf_bstats, skb); tcf_lastuse_update(&police->tcf_tm); if (police->tcfp_ewma_rate) { struct gnet_stats_rate_est64 sample; if (!gen_estimator_read(&police->tcf_rate_est, &sample) || sample.bps >= police->tcfp_ewma_rate) { police->tcf_qstats.overlimits++; if (police->tcf_action == TC_ACT_SHOT) police->tcf_qstats.drops++; spin_unlock(&police->tcf_lock); return police->tcf_action; } } if (qdisc_pkt_len(skb) <= police->tcfp_mtu) { if (!police->rate_present) { spin_unlock(&police->tcf_lock); return police->tcfp_result; } now = ktime_get_ns(); toks = min_t(s64, now - police->tcfp_t_c, police->tcfp_burst); if (police->peak_present) { ptoks = toks + police->tcfp_ptoks; if (ptoks > police->tcfp_mtu_ptoks) ptoks = police->tcfp_mtu_ptoks; ptoks -= (s64) psched_l2t_ns(&police->peak, qdisc_pkt_len(skb)); } toks += police->tcfp_toks; if (toks > police->tcfp_burst) toks = police->tcfp_burst; toks -= (s64) psched_l2t_ns(&police->rate, qdisc_pkt_len(skb)); if ((toks|ptoks) >= 0) { police->tcfp_t_c = now; police->tcfp_toks = toks; police->tcfp_ptoks = ptoks; if (police->tcfp_result == TC_ACT_SHOT) police->tcf_qstats.drops++; spin_unlock(&police->tcf_lock); return police->tcfp_result; } } police->tcf_qstats.overlimits++; if (police->tcf_action == TC_ACT_SHOT) police->tcf_qstats.drops++; spin_unlock(&police->tcf_lock); return police->tcf_action; }
static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { int ret = 0, result = 0; struct tcf_ipt *ipt = to_ipt(a); struct xt_action_param par; struct nf_hook_state state = { .net = dev_net(skb->dev), .in = skb->dev, .hook = ipt->tcfi_hook, .pf = NFPROTO_IPV4, }; if (skb_unclone(skb, GFP_ATOMIC)) return TC_ACT_UNSPEC; spin_lock(&ipt->tcf_lock); tcf_lastuse_update(&ipt->tcf_tm); bstats_update(&ipt->tcf_bstats, skb); /* yes, we have to worry about both in and out dev * worry later - danger - this API seems to have changed * from earlier kernels */ par.state = &state; par.target = ipt->tcfi_t->u.kernel.target; par.targinfo = ipt->tcfi_t->data; ret = par.target->target(skb, &par); switch (ret) { case NF_ACCEPT: result = TC_ACT_OK; break; case NF_DROP: result = TC_ACT_SHOT; ipt->tcf_qstats.drops++; break; case XT_CONTINUE: result = TC_ACT_PIPE; break; default: net_notice_ratelimited("tc filter: Bogus netfilter code %d assume ACCEPT\n", ret); result = TC_ACT_OK; break; } spin_unlock(&ipt->tcf_lock); return result; } static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_ipt *ipt = to_ipt(a); struct xt_entry_target *t; struct tcf_t tm; struct tc_cnt c; /* for simple targets kernel size == user size * user name = target name * for foolproof you need to not assume this */ t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); if (unlikely(!t)) goto nla_put_failure; c.bindcnt = ipt->tcf_bindcnt - bind; c.refcnt = ipt->tcf_refcnt - ref; strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name); if (nla_put(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t) || nla_put_u32(skb, TCA_IPT_INDEX, ipt->tcf_index) || nla_put_u32(skb, TCA_IPT_HOOK, ipt->tcfi_hook) || nla_put(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c) || nla_put_string(skb, TCA_IPT_TABLE, ipt->tcfi_tname)) goto nla_put_failure; tcf_tm_dump(&tm, &ipt->tcf_tm); if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD)) goto nla_put_failure; kfree(t); return skb->len; nla_put_failure: nlmsg_trim(skb, b); kfree(t); return -1; }