static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_skbedit *d = to_skbedit(a); struct tcf_skbedit_params *params; int action; tcf_lastuse_update(&d->tcf_tm); bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); params = rcu_dereference_bh(d->params); action = READ_ONCE(d->tcf_action); if (params->flags & SKBEDIT_F_PRIORITY) skb->priority = params->priority; if (params->flags & SKBEDIT_F_INHERITDSFIELD) { int wlen = skb_network_offset(skb); switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): wlen += sizeof(struct iphdr); if (!pskb_may_pull(skb, wlen)) goto err; skb->priority = ipv4_get_dsfield(ip_hdr(skb)) >> 2; break; case htons(ETH_P_IPV6): wlen += sizeof(struct ipv6hdr); if (!pskb_may_pull(skb, wlen)) goto err; skb->priority = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; break; } }
static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params; int action; params = rcu_dereference_bh(t->params); tcf_lastuse_update(&t->tcf_tm); bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb); action = READ_ONCE(t->tcf_action); switch (params->tcft_action) { case TCA_TUNNEL_KEY_ACT_RELEASE: skb_dst_drop(skb); break; case TCA_TUNNEL_KEY_ACT_SET: skb_dst_drop(skb); skb_dst_set(skb, dst_clone(¶ms->tcft_enc_metadata->dst)); break; default: WARN_ONCE(1, "Bad tunnel_key action %d.\n", params->tcft_action); break; } return action; }
static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { const struct nf_conntrack_tuple_hash *thash; struct nf_conntrack_tuple tuple; enum ip_conntrack_info ctinfo; struct tcf_connmark_info *ca = to_connmark(a); struct nf_conntrack_zone zone; struct nf_conn *c; int proto; spin_lock(&ca->tcf_lock); tcf_lastuse_update(&ca->tcf_tm); bstats_update(&ca->tcf_bstats, skb); if (skb->protocol == htons(ETH_P_IP)) { if (skb->len < sizeof(struct iphdr)) goto out; proto = NFPROTO_IPV4; } else if (skb->protocol == htons(ETH_P_IPV6)) { if (skb->len < sizeof(struct ipv6hdr)) goto out; proto = NFPROTO_IPV6; } else { goto out; } c = nf_ct_get(skb, &ctinfo); if (c) { skb->mark = c->mark; /* using overlimits stats to count how many packets marked */ ca->tcf_qstats.overlimits++; goto out; } if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, ca->net, &tuple)) goto out; zone.id = ca->zone; zone.dir = NF_CT_DEFAULT_ZONE_DIR; thash = nf_conntrack_find_get(ca->net, &zone, &tuple); if (!thash) goto out; c = nf_ct_tuplehash_to_ctrack(thash); /* using overlimits stats to count how many packets marked */ ca->tcf_qstats.overlimits++; skb->mark = c->mark; nf_ct_put(c); out: spin_unlock(&ca->tcf_lock); return ca->tcf_action; }
static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = to_pedit(a); int i; unsigned int off; if (skb_unclone(skb, GFP_ATOMIC)) return p->tcf_action; off = skb_network_offset(skb); spin_lock(&p->tcf_lock); tcf_lastuse_update(&p->tcf_tm); if (p->tcfp_nkeys > 0) { struct tc_pedit_key *tkey = p->tcfp_keys; for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { u32 *ptr, _data; int offset = tkey->off; if (tkey->offmask) { char *d, _d; d = skb_header_pointer(skb, off + tkey->at, 1, &_d); if (!d) goto bad; offset += (*d & tkey->offmask) >> tkey->shift; } if (offset % 4) { pr_info("tc filter pedit" " offset must be on 32 bit boundaries\n"); goto bad; } if (offset > 0 && offset > skb->len) { pr_info("tc filter pedit" " offset %d can't exceed pkt length %d\n", offset, skb->len); goto bad; } ptr = skb_header_pointer(skb, off + offset, 4, &_data); if (!ptr) goto bad; /* just do it, baby */ *ptr = ((*ptr & tkey->mask) ^ tkey->val); if (ptr == &_data) skb_store_bits(skb, off + offset, ptr, 4); } goto done; } else
static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, struct tcf_result *res) { struct tcf_bpf *prog = act->priv; struct bpf_prog *filter; int action, filter_res; bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS; if (unlikely(!skb_mac_header_was_set(skb))) return TC_ACT_UNSPEC; tcf_lastuse_update(&prog->tcf_tm); bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb); rcu_read_lock(); filter = rcu_dereference(prog->filter); if (at_ingress) { __skb_push(skb, skb->mac_len); filter_res = BPF_PROG_RUN(filter, skb); __skb_pull(skb, skb->mac_len); } else { filter_res = BPF_PROG_RUN(filter, skb); } rcu_read_unlock(); /* A BPF program may overwrite the default action opcode. * Similarly as in cls_bpf, if filter_res == -1 we use the * default action specified from tc. * * In case a different well-known TC_ACT opcode has been * returned, it will overwrite the default one. * * For everything else that is unkown, TC_ACT_UNSPEC is * returned. */ switch (filter_res) { case TC_ACT_PIPE: case TC_ACT_RECLASSIFY: case TC_ACT_OK: case TC_ACT_REDIRECT: action = filter_res; break; case TC_ACT_SHOT: action = filter_res; qstats_drop_inc(this_cpu_ptr(prog->common.cpu_qstats)); break; case TC_ACT_UNSPEC: action = prog->tcf_action; break; default: action = TC_ACT_UNSPEC; break; } return action; }
static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_skbmod *d = to_skbmod(a); int action; struct tcf_skbmod_params *p; u64 flags; int err; tcf_lastuse_update(&d->tcf_tm); bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); /* XXX: if you are going to edit more fields beyond ethernet header * (example when you add IP header replacement or vlan swap) * then MAX_EDIT_LEN needs to change appropriately */ err = skb_ensure_writable(skb, MAX_EDIT_LEN); if (unlikely(err)) { /* best policy is to drop on the floor */ qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); return TC_ACT_SHOT; } rcu_read_lock(); action = READ_ONCE(d->tcf_action); if (unlikely(action == TC_ACT_SHOT)) { qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); rcu_read_unlock(); return action; } p = rcu_dereference(d->skbmod_p); flags = p->flags; if (flags & SKBMOD_F_DMAC) ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst); if (flags & SKBMOD_F_SMAC) ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src); if (flags & SKBMOD_F_ETYPE) eth_hdr(skb)->h_proto = p->eth_type; rcu_read_unlock(); if (flags & SKBMOD_F_SWAPMAC) { u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */ /*XXX: I am sure we can come up with more efficient swapping*/ ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest); ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source); ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr); } return action; }
static int tcf_simp(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_defact *d = to_defact(a); spin_lock(&d->tcf_lock); tcf_lastuse_update(&d->tcf_tm); bstats_update(&d->tcf_bstats, skb); /* print policy string followed by _ then packet count * Example if this was the 3rd packet and the string was "hello" * then it would look like "hello_3" (without quotes) */ pr_info("simple: %s_%d\n", (char *)d->tcfd_defdata, d->tcf_bstats.packets); spin_unlock(&d->tcf_lock); return d->tcf_action; }
static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_sample *s = to_sample(a); struct psample_group *psample_group; int retval; int size; int iif; int oif; tcf_lastuse_update(&s->tcf_tm); bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb); retval = READ_ONCE(s->tcf_action); rcu_read_lock(); psample_group = rcu_dereference(s->psample_group); /* randomly sample packets according to rate */ if (psample_group && (prandom_u32() % s->rate == 0)) { if (!skb_at_tc_ingress(skb)) { iif = skb->skb_iif; oif = skb->dev->ifindex; } else { iif = skb->dev->ifindex; oif = 0; } /* on ingress, the mac header gets popped, so push it back */ if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev)) skb_push(skb, skb->mac_len); size = s->truncate ? s->trunc_size : skb->len; psample_sample_packet(psample_group, skb, size, iif, oif, s->rate); if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev)) skb_pull(skb, skb->mac_len); } rcu_read_unlock(); return retval; }
static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_skbedit *d = a->priv; spin_lock(&d->tcf_lock); tcf_lastuse_update(&d->tcf_tm); bstats_update(&d->tcf_bstats, skb); if (d->flags & SKBEDIT_F_PRIORITY) skb->priority = d->priority; if (d->flags & SKBEDIT_F_QUEUE_MAPPING && skb->dev->real_num_tx_queues > d->queue_mapping) skb_set_queue_mapping(skb, d->queue_mapping); if (d->flags & SKBEDIT_F_MARK) skb->mark = d->mark; spin_unlock(&d->tcf_lock); return d->tcf_action; }
static int tcf_gact(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_gact *gact = a->priv; int action = READ_ONCE(gact->tcf_action); #ifdef CONFIG_GACT_PROB { u32 ptype = READ_ONCE(gact->tcfg_ptype); if (ptype) action = gact_rand[ptype](gact); } #endif bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), skb); if (action == TC_ACT_SHOT) qstats_drop_inc(this_cpu_ptr(gact->common.cpu_qstats)); tcf_lastuse_update(&gact->tcf_tm); return action; }
static int tcf_csum(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_csum *p = to_tcf_csum(a); int action; u32 update_flags; spin_lock(&p->tcf_lock); tcf_lastuse_update(&p->tcf_tm); bstats_update(&p->tcf_bstats, skb); action = p->tcf_action; update_flags = p->update_flags; spin_unlock(&p->tcf_lock); if (unlikely(action == TC_ACT_SHOT)) goto drop; switch (tc_skb_protocol(skb)) { case cpu_to_be16(ETH_P_IP): if (!tcf_csum_ipv4(skb, update_flags)) goto drop; break; case cpu_to_be16(ETH_P_IPV6): if (!tcf_csum_ipv6(skb, update_flags)) goto drop; break; } return action; drop: spin_lock(&p->tcf_lock); p->tcf_qstats.drops++; spin_unlock(&p->tcf_lock); return TC_ACT_SHOT; }
static int tcf_nat_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_nat *p = to_tcf_nat(a); struct iphdr *iph; __be32 old_addr; __be32 new_addr; __be32 mask; __be32 addr; int egress; int action; int ihl; int noff; spin_lock(&p->tcf_lock); tcf_lastuse_update(&p->tcf_tm); old_addr = p->old_addr; new_addr = p->new_addr; mask = p->mask; egress = p->flags & TCA_NAT_FLAG_EGRESS; action = p->tcf_action; bstats_update(&p->tcf_bstats, skb); spin_unlock(&p->tcf_lock); if (unlikely(action == TC_ACT_SHOT)) goto drop; noff = skb_network_offset(skb); if (!pskb_may_pull(skb, sizeof(*iph) + noff)) goto drop; iph = ip_hdr(skb); if (egress) addr = iph->saddr; else addr = iph->daddr; if (!((old_addr ^ addr) & mask)) { if (skb_try_make_writable(skb, sizeof(*iph) + noff)) goto drop; new_addr &= mask; new_addr |= addr & ~mask; /* Rewrite IP header */ iph = ip_hdr(skb); if (egress) iph->saddr = new_addr; else iph->daddr = new_addr; csum_replace4(&iph->check, addr, new_addr); } else if ((iph->frag_off & htons(IP_OFFSET)) || iph->protocol != IPPROTO_ICMP) { goto out; } ihl = iph->ihl * 4; /* It would be nice to share code with stateful NAT. */ switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { case IPPROTO_TCP: { struct tcphdr *tcph; if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) || skb_try_make_writable(skb, ihl + sizeof(*tcph) + noff)) goto drop; tcph = (void *)(skb_network_header(skb) + ihl); inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); break; } case IPPROTO_UDP: { struct udphdr *udph; if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) || skb_try_make_writable(skb, ihl + sizeof(*udph) + noff)) goto drop; udph = (void *)(skb_network_header(skb) + ihl); if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace4(&udph->check, skb, addr, new_addr, true); if (!udph->check) udph->check = CSUM_MANGLED_0; } break; } case IPPROTO_ICMP: { struct icmphdr *icmph; if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + noff)) goto drop; icmph = (void *)(skb_network_header(skb) + ihl); if ((icmph->type != ICMP_DEST_UNREACH) && (icmph->type != ICMP_TIME_EXCEEDED) && (icmph->type != ICMP_PARAMETERPROB)) break; if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph) + noff)) goto drop; icmph = (void *)(skb_network_header(skb) + ihl); iph = (void *)(icmph + 1); if (egress) addr = iph->daddr; else addr = iph->saddr; if ((old_addr ^ addr) & mask) break; if (skb_try_make_writable(skb, ihl + sizeof(*icmph) + sizeof(*iph) + noff)) goto drop; icmph = (void *)(skb_network_header(skb) + ihl); iph = (void *)(icmph + 1); new_addr &= mask; new_addr |= addr & ~mask; /* XXX Fix up the inner checksums. */ if (egress) iph->daddr = new_addr; else iph->saddr = new_addr; inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, false); break; } default: break; } out: return action; drop: spin_lock(&p->tcf_lock); p->tcf_qstats.drops++; spin_unlock(&p->tcf_lock); return TC_ACT_SHOT; }
static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = to_pedit(a); int i; if (skb_unclone(skb, GFP_ATOMIC)) return p->tcf_action; spin_lock(&p->tcf_lock); tcf_lastuse_update(&p->tcf_tm); if (p->tcfp_nkeys > 0) { struct tc_pedit_key *tkey = p->tcfp_keys; struct tcf_pedit_key_ex *tkey_ex = p->tcfp_keys_ex; enum pedit_header_type htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK; enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET; for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { u32 *ptr, hdata; int offset = tkey->off; int hoffset; u32 val; int rc; if (tkey_ex) { htype = tkey_ex->htype; cmd = tkey_ex->cmd; tkey_ex++; } rc = pedit_skb_hdr_offset(skb, htype, &hoffset); if (rc) { pr_info("tc action pedit bad header type specified (0x%x)\n", htype); goto bad; } if (tkey->offmask) { u8 *d, _d; if (!offset_valid(skb, hoffset + tkey->at)) { pr_info("tc action pedit 'at' offset %d out of bounds\n", hoffset + tkey->at); goto bad; } d = skb_header_pointer(skb, hoffset + tkey->at, sizeof(_d), &_d); if (!d) goto bad; offset += (*d & tkey->offmask) >> tkey->shift; } if (offset % 4) { pr_info("tc action pedit offset must be on 32 bit boundaries\n"); goto bad; } if (!offset_valid(skb, hoffset + offset)) { pr_info("tc action pedit offset %d out of bounds\n", hoffset + offset); goto bad; } ptr = skb_header_pointer(skb, hoffset + offset, sizeof(hdata), &hdata); if (!ptr) goto bad; /* just do it, baby */ switch (cmd) { case TCA_PEDIT_KEY_EX_CMD_SET: val = tkey->val; break; case TCA_PEDIT_KEY_EX_CMD_ADD: val = (*ptr + tkey->val) & ~tkey->mask; break; default: pr_info("tc action pedit bad command (%d)\n", cmd); goto bad; } *ptr = ((*ptr & tkey->mask) ^ val); if (ptr == &hdata) skb_store_bits(skb, hoffset + offset, ptr, 4); } goto done; } else {
static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_police *police = to_police(a); s64 now; s64 toks; s64 ptoks = 0; spin_lock(&police->tcf_lock); bstats_update(&police->tcf_bstats, skb); tcf_lastuse_update(&police->tcf_tm); if (police->tcfp_ewma_rate) { struct gnet_stats_rate_est64 sample; if (!gen_estimator_read(&police->tcf_rate_est, &sample) || sample.bps >= police->tcfp_ewma_rate) { police->tcf_qstats.overlimits++; if (police->tcf_action == TC_ACT_SHOT) police->tcf_qstats.drops++; spin_unlock(&police->tcf_lock); return police->tcf_action; } } if (qdisc_pkt_len(skb) <= police->tcfp_mtu) { if (!police->rate_present) { spin_unlock(&police->tcf_lock); return police->tcfp_result; } now = ktime_get_ns(); toks = min_t(s64, now - police->tcfp_t_c, police->tcfp_burst); if (police->peak_present) { ptoks = toks + police->tcfp_ptoks; if (ptoks > police->tcfp_mtu_ptoks) ptoks = police->tcfp_mtu_ptoks; ptoks -= (s64) psched_l2t_ns(&police->peak, qdisc_pkt_len(skb)); } toks += police->tcfp_toks; if (toks > police->tcfp_burst) toks = police->tcfp_burst; toks -= (s64) psched_l2t_ns(&police->rate, qdisc_pkt_len(skb)); if ((toks|ptoks) >= 0) { police->tcfp_t_c = now; police->tcfp_toks = toks; police->tcfp_ptoks = ptoks; if (police->tcfp_result == TC_ACT_SHOT) police->tcf_qstats.drops++; spin_unlock(&police->tcf_lock); return police->tcfp_result; } } police->tcf_qstats.overlimits++; if (police->tcf_action == TC_ACT_SHOT) police->tcf_qstats.drops++; spin_unlock(&police->tcf_lock); return police->tcf_action; }
static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { int ret = 0, result = 0; struct tcf_ipt *ipt = to_ipt(a); struct xt_action_param par; struct nf_hook_state state = { .net = dev_net(skb->dev), .in = skb->dev, .hook = ipt->tcfi_hook, .pf = NFPROTO_IPV4, }; if (skb_unclone(skb, GFP_ATOMIC)) return TC_ACT_UNSPEC; spin_lock(&ipt->tcf_lock); tcf_lastuse_update(&ipt->tcf_tm); bstats_update(&ipt->tcf_bstats, skb); /* yes, we have to worry about both in and out dev * worry later - danger - this API seems to have changed * from earlier kernels */ par.state = &state; par.target = ipt->tcfi_t->u.kernel.target; par.targinfo = ipt->tcfi_t->data; ret = par.target->target(skb, &par); switch (ret) { case NF_ACCEPT: result = TC_ACT_OK; break; case NF_DROP: result = TC_ACT_SHOT; ipt->tcf_qstats.drops++; break; case XT_CONTINUE: result = TC_ACT_PIPE; break; default: net_notice_ratelimited("tc filter: Bogus netfilter code %d assume ACCEPT\n", ret); result = TC_ACT_OK; break; } spin_unlock(&ipt->tcf_lock); return result; } static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_ipt *ipt = to_ipt(a); struct xt_entry_target *t; struct tcf_t tm; struct tc_cnt c; /* for simple targets kernel size == user size * user name = target name * for foolproof you need to not assume this */ t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); if (unlikely(!t)) goto nla_put_failure; c.bindcnt = ipt->tcf_bindcnt - bind; c.refcnt = ipt->tcf_refcnt - ref; strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name); if (nla_put(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t) || nla_put_u32(skb, TCA_IPT_INDEX, ipt->tcf_index) || nla_put_u32(skb, TCA_IPT_HOOK, ipt->tcfi_hook) || nla_put(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c) || nla_put_string(skb, TCA_IPT_TABLE, ipt->tcfi_tname)) goto nla_put_failure; tcf_tm_dump(&tm, &ipt->tcf_tm); if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD)) goto nla_put_failure; kfree(t); return skb->len; nla_put_failure: nlmsg_trim(skb, b); kfree(t); return -1; }