static int show_dev_tc_bpf(int sock, unsigned int nl_pid, struct ip_devname_ifindex *dev) { struct bpf_filter_t filter_info; struct bpf_tcinfo_t tcinfo; int i, handle, ret = 0; tcinfo.handle_array = NULL; tcinfo.used_len = 0; tcinfo.array_len = 0; tcinfo.is_qdisc = false; ret = libbpf_nl_get_class(sock, nl_pid, dev->ifindex, dump_class_qdisc_nlmsg, &tcinfo); if (ret) goto out; tcinfo.is_qdisc = true; ret = libbpf_nl_get_qdisc(sock, nl_pid, dev->ifindex, dump_class_qdisc_nlmsg, &tcinfo); if (ret) goto out; filter_info.devname = dev->devname; filter_info.ifindex = dev->ifindex; for (i = 0; i < tcinfo.used_len; i++) { filter_info.kind = tcinfo.handle_array[i].kind; ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, tcinfo.handle_array[i].handle, dump_filter_nlmsg, &filter_info); if (ret) goto out; } /* root, ingress and egress handle */ handle = TC_H_ROOT; filter_info.kind = "root"; ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle, dump_filter_nlmsg, &filter_info); if (ret) goto out; handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); filter_info.kind = "clsact/ingress"; ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle, dump_filter_nlmsg, &filter_info); if (ret) goto out; handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS); filter_info.kind = "clsact/egress"; ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle, dump_filter_nlmsg, &filter_info); if (ret) goto out; out: free(tcinfo.handle_array); return 0; }
u32 qdisc_alloc_handle(struct device *dev) { int i = 0x10000; static u32 autohandle = TC_H_MAKE(0x80000000U, 0); do { autohandle += TC_H_MAKE(0x10000U, 0); if (autohandle == TC_H_MAKE(TC_H_ROOT, 0)) autohandle = TC_H_MAKE(0x80000000U, 0); } while (qdisc_lookup(dev, autohandle) && --i > 0); return i>0 ? autohandle : 0; }
static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit) { struct Qdisc *q; struct rtattr *rta; int ret; q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops, TC_H_MAKE(sch->handle, 1)); if (q) { rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); if (rta) { rta->rta_type = RTM_NEWQDISC; rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt)); ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit; ret = q->ops->change(q, rta); kfree(rta); if (ret == 0) return q; } qdisc_destroy(q); } return NULL; }
static int mq_init(struct Qdisc *sch, struct nlattr *opt) { struct net_device *dev = qdisc_dev(sch); struct mq_sched *priv = qdisc_priv(sch); struct netdev_queue *dev_queue; struct Qdisc *qdisc; unsigned int ntx; if (sch->parent != TC_H_ROOT) return -EOPNOTSUPP; if (!netif_is_multiqueue(dev)) return -EOPNOTSUPP; /* pre-allocate qdiscs, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); if (!priv->qdiscs) return -ENOMEM; for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { dev_queue = netdev_get_tx_queue(dev, ntx); qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx), TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(ntx + 1))); if (!qdisc) return -ENOMEM; priv->qdiscs[ntx] = qdisc; qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } sch->flags |= TCQ_F_MQROOT; return 0; }
int cls_loop(struct __sk_buff *skb) { printt("cb: %u\n", skb->cb[0]++); tail_call(skb, &jmp_tc, 0); skb->tc_classid = TC_H_MAKE(1, 42); return TC_ACT_OK; }
static __inline__ u32 tcf_auto_prio(struct tcf_proto *tp, u32 prio) { u32 first = TC_H_MAKE(0xC0000000U,0U); if (!tp || tp->next == NULL) return first; if (prio == TC_H_MAKE(0xFFFF0000U,0U)) first = tp->prio+1; else first = tp->prio-1; if (first == prio) first = tp->prio; return first; }
static __inline__ u32 tcf_auto_prio(struct tcf_proto *tp) { u32 first = TC_H_MAKE(0xC0000000U,0U); if (tp) first = tp->prio-1; return first; }
static int prio_tune(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct prio_sched_data *q = qdisc_priv(sch); struct Qdisc *queues[TCQ_PRIO_BANDS]; int oldbands = q->bands, i; struct tc_prio_qopt *qopt; if (nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) return -EINVAL; for (i = 0; i <= TC_PRIO_MAX; i++) { if (qopt->priomap[i] >= qopt->bands) return -EINVAL; } /* Before commit, make sure we can allocate all new qdiscs */ for (i = oldbands; i < qopt->bands; i++) { queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1), extack); if (!queues[i]) { while (i > oldbands) qdisc_put(queues[--i]); return -ENOMEM; } } prio_offload(sch, qopt); sch_tree_lock(sch); q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i = q->bands; i < oldbands; i++) { struct Qdisc *child = q->queues[i]; qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); } for (i = oldbands; i < q->bands; i++) { q->queues[i] = queues[i]; if (q->queues[i] != &noop_qdisc) qdisc_hash_add(q->queues[i], true); } sch_tree_unlock(sch); for (i = q->bands; i < oldbands; i++) qdisc_put(q->queues[i]); return 0; }
static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct multiq_sched_data *q = qdisc_priv(sch); struct tc_multiq_qopt *qopt; int i; if (!netif_is_multiqueue(qdisc_dev(sch))) return -EOPNOTSUPP; if (nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); qopt->bands = qdisc_dev(sch)->real_num_tx_queues; sch_tree_lock(sch); q->bands = qopt->bands; for (i = q->bands; i < q->max_bands; i++) { if (q->queues[i] != &noop_qdisc) { struct Qdisc *child = q->queues[i]; q->queues[i] = &noop_qdisc; qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); qdisc_put(child); } } sch_tree_unlock(sch); for (i = 0; i < q->bands; i++) { if (q->queues[i] == &noop_qdisc) { struct Qdisc *child, *old; child = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1), extack); if (child) { sch_tree_lock(sch); old = q->queues[i]; q->queues[i] = child; if (child != &noop_qdisc) qdisc_hash_add(child, true); if (old != &noop_qdisc) { qdisc_tree_reduce_backlog(old, old->q.qlen, old->qstats.backlog); qdisc_put(old); } sch_tree_unlock(sch); } } } return 0; }
static int CLASSIFY_string_to_priority(const char *s, unsigned int *p) { unsigned int i, j; if (sscanf(s, "%x:%x", &i, &j) != 2) return 1; *p = TC_H_MAKE(i<<16, j); return 0; }
static int prio_tune(struct Qdisc *sch, struct nlattr *opt) { struct prio_sched_data *q = qdisc_priv(sch); struct tc_prio_qopt *qopt; int i; if (nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) return -EINVAL; for (i = 0; i <= TC_PRIO_MAX; i++) { if (qopt->priomap[i] >= qopt->bands) return -EINVAL; } sch_tree_lock(sch); q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i = q->bands; i < TCQ_PRIO_BANDS; i++) { struct Qdisc *child = q->queues[i]; q->queues[i] = &noop_qdisc; if (child != &noop_qdisc) { qdisc_tree_decrease_qlen(child, child->q.qlen); qdisc_destroy(child); } } sch_tree_unlock(sch); for (i = 0; i < q->bands; i++) { if (q->queues[i] == &noop_qdisc) { struct Qdisc *child, *old; child = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1)); if (child) { sch_tree_lock(sch); old = q->queues[i]; q->queues[i] = child; if (old != &noop_qdisc) { qdisc_tree_decrease_qlen(old, old->q.qlen); qdisc_destroy(old); } sch_tree_unlock(sch); } } } return 0; }
static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) { struct multiq_sched_data *q = qdisc_priv(sch); struct tc_multiq_qopt *qopt; int i; if (!netif_is_multiqueue(qdisc_dev(sch))) return -EOPNOTSUPP; if (nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); qopt->bands = qdisc_dev(sch)->real_num_tx_queues; sch_tree_lock(sch); q->bands = qopt->bands; for (i = q->bands; i < q->max_bands; i++) { if (q->queues[i] != &noop_qdisc) { struct Qdisc *child = q->queues[i]; q->queues[i] = &noop_qdisc; qdisc_tree_decrease_qlen(child, child->q.qlen); qdisc_destroy(child); } } sch_tree_unlock(sch); for (i = 0; i < q->bands; i++) { if (q->queues[i] == &noop_qdisc) { struct Qdisc *child, *old; child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1)); if (child) { sch_tree_lock(sch); old = q->queues[i]; q->queues[i] = child; if (old != &noop_qdisc) { qdisc_tree_decrease_qlen(old, old->q.qlen); qdisc_destroy(old); } sch_tree_unlock(sch); } } } return 0; }
TError TNlCgFilter::Create(const TNlLink &link) { TError error = TError::Success(); struct nl_msg *msg; int ret; struct tcmsg tchdr; tchdr.tcm_family = AF_UNSPEC; tchdr.tcm_ifindex = link.GetIndex(); tchdr.tcm_handle = Handle; tchdr.tcm_parent = Parent; tchdr.tcm_info = TC_H_MAKE(FilterPrio << 16, htons(ETH_P_ALL)); msg = nlmsg_alloc_simple(RTM_NEWTFILTER, NLM_F_EXCL|NLM_F_CREATE); if (!msg) return TError(EError::Unknown, "Unable to add filter: no memory"); ret = nlmsg_append(msg, &tchdr, sizeof(tchdr), NLMSG_ALIGNTO); if (ret < 0) { error = TError(EError::Unknown, std::string("Unable to add filter: ") + nl_geterror(ret)); goto free_msg; } ret = nla_put(msg, TCA_KIND, strlen(FilterType) + 1, FilterType); if (ret < 0) { error = TError(EError::Unknown, std::string("Unable to add filter: ") + nl_geterror(ret)); goto free_msg; } ret = nla_put(msg, TCA_OPTIONS, 0, NULL); if (ret < 0) { error = TError(EError::Unknown, std::string("Unable to add filter: ") + nl_geterror(ret)); goto free_msg; } L() << "netlink " << link.GetDesc() << ": add tfilter id 0x" << std::hex << Handle << " parent 0x" << Parent << std::dec << std::endl; ret = nl_send_sync(link.GetSock(), msg); if (ret) error = TError(EError::Unknown, std::string("Unable to add filter: ") + nl_geterror(ret)); if (!Exists(link)) error = TError(EError::Unknown, "BUG: created filter doesn't exist"); return error; free_msg: nlmsg_free(msg); return error; }
static int cls_build(struct rtnl_cls *cls, int type, int flags, struct nl_msg **result) { int err, prio, proto; struct tcmsg *tchdr; err = rtnl_tc_msg_build(TC_CAST(cls), type, flags, result); if (err < 0) return err; tchdr = nlmsg_data(nlmsg_hdr(*result)); prio = rtnl_cls_get_prio(cls); proto = rtnl_cls_get_protocol(cls); tchdr->tcm_info = TC_H_MAKE(prio << 16, htons(proto)); return 0; }
static int mq_init(struct Qdisc *sch, struct nlattr *opt) { struct net_device *dev = qdisc_dev(sch); struct mq_sched *priv = qdisc_priv(sch); struct netdev_queue *dev_queue; struct Qdisc *qdisc; unsigned int ntx; if (sch->parent != TC_H_ROOT) return -EOPNOTSUPP; if (!netif_is_multiqueue(dev)) return -EOPNOTSUPP; /* pre-allocate qdiscs, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); if (priv->qdiscs == NULL) return -ENOMEM; for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { dev_queue = netdev_get_tx_queue(dev, ntx); qdisc = qdisc_create_dflt(dev, dev_queue, &pfifo_fast_ops, TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(ntx + 1))); if (qdisc == NULL) goto err; qdisc->flags |= TCQ_F_CAN_BYPASS; priv->qdiscs[ntx] = qdisc; } sch->flags |= TCQ_F_MQROOT; return 0; err: mq_destroy(sch); return -ENOMEM; }
static int cls_build(struct rtnl_cls *cls, int type, int flags, struct nl_msg **result) { int err, prio, proto; struct tcmsg *tchdr; uint32_t required = TCA_ATTR_IFINDEX; if ((cls->ce_mask & required) != required) { APPBUG("ifindex must be specified"); return -NLE_MISSING_ATTR; } err = rtnl_tc_msg_build(TC_CAST(cls), type, flags, result); if (err < 0) return err; tchdr = nlmsg_data(nlmsg_hdr(*result)); prio = rtnl_cls_get_prio(cls); proto = rtnl_cls_get_protocol(cls); tchdr->tcm_info = TC_H_MAKE(prio << 16, htons(proto)); return 0; }
static int install_tbf(struct rtnl_handle *rth, int ifindex, int rate, int burst) { struct qdisc_opt opt = { .kind = "tbf", .handle = 0x00010000, .parent = TC_H_ROOT, .rate = rate, .buffer = burst, .latency = conf_latency, .qdisc = qdisc_tbf, }; return tc_qdisc_modify(rth, ifindex, RTM_NEWQDISC, NLM_F_EXCL|NLM_F_CREATE, &opt); } static int install_htb(struct rtnl_handle *rth, int ifindex, int rate, int burst) { struct qdisc_opt opt1 = { .kind = "htb", .handle = 0x00010000, .parent = TC_H_ROOT, .quantum = conf_r2q, .defcls = 1, .qdisc = qdisc_htb_root, }; struct qdisc_opt opt2 = { .kind = "htb", .handle = 0x00010001, .parent = 0x00010000, .rate = rate, .buffer = burst, .quantum = conf_quantum, .qdisc = qdisc_htb_class, }; if (tc_qdisc_modify(rth, ifindex, RTM_NEWQDISC, NLM_F_EXCL|NLM_F_CREATE, &opt1)) return -1; if (tc_qdisc_modify(rth, ifindex, RTM_NEWTCLASS, NLM_F_EXCL|NLM_F_CREATE, &opt2)) return -1; return 0; } static int install_police(struct rtnl_handle *rth, int ifindex, int rate, int burst) { __u32 rtab[256]; struct rtattr *tail, *tail1, *tail2, *tail3; int Rcell_log = -1; int mtu = conf_mtu, flowid = 1; unsigned int linklayer = LINKLAYER_ETHERNET; /* Assume ethernet */ struct { struct nlmsghdr n; struct tcmsg t; char buf[TCA_BUF_MAX]; } req; struct qdisc_opt opt1 = { .kind = "ingress", .handle = 0xffff0000, .parent = TC_H_INGRESS, }; struct sel { struct tc_u32_sel sel; struct tc_u32_key key; } sel = { .sel.nkeys = 1, .sel.flags = TC_U32_TERMINAL, // .key.off = 12, }; struct tc_police police = { .action = TC_POLICE_SHOT, .rate.rate = rate, .rate.mpu = conf_mpu, .limit = (double)rate * conf_latency + burst, .burst = tc_calc_xmittime(rate, burst), }; if (tc_qdisc_modify(rth, ifindex, RTM_NEWQDISC, NLM_F_EXCL|NLM_F_CREATE, &opt1)) return -1; if (tc_calc_rtable(&police.rate, rtab, Rcell_log, mtu, linklayer) < 0) { log_ppp_error("shaper: failed to calculate ceil rate table.\n"); return -1; } memset(&req, 0, sizeof(req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); req.n.nlmsg_flags = NLM_F_REQUEST|NLM_F_EXCL|NLM_F_CREATE; req.n.nlmsg_type = RTM_NEWTFILTER; req.t.tcm_family = AF_UNSPEC; req.t.tcm_ifindex = ifindex; req.t.tcm_handle = 1; req.t.tcm_parent = 0xffff0000; req.t.tcm_info = TC_H_MAKE(100 << 16, ntohs(ETH_P_ALL)); addattr_l(&req.n, sizeof(req), TCA_KIND, "u32", 4); tail = NLMSG_TAIL(&req.n); addattr_l(&req.n, MAX_MSG, TCA_OPTIONS, NULL, 0); tail1 = NLMSG_TAIL(&req.n); addattr_l(&req.n, MAX_MSG, TCA_U32_ACT, NULL, 0); tail2 = NLMSG_TAIL(&req.n); addattr_l(&req.n, MAX_MSG, 1, NULL, 0); addattr_l(&req.n, MAX_MSG, TCA_ACT_KIND, "police", 7); tail3 = NLMSG_TAIL(&req.n); addattr_l(&req.n, MAX_MSG, TCA_ACT_OPTIONS, NULL, 0); addattr_l(&req.n, MAX_MSG, TCA_POLICE_TBF, &police, sizeof(police)); addattr_l(&req.n, MAX_MSG, TCA_POLICE_RATE, rtab, 1024); tail3->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)tail3; tail2->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)tail2; tail1->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)tail1; addattr_l(&req.n, MAX_MSG, TCA_U32_CLASSID, &flowid, 4); addattr_l(&req.n, MAX_MSG, TCA_U32_SEL, &sel, sizeof(sel)); tail->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)tail; if (rtnl_talk(rth, &req.n, 0, 0, NULL, NULL, NULL, 0) < 0) return -1; return 0; } static int install_htb_ifb(struct rtnl_handle *rth, int ifindex, __u32 priority, int rate, int burst) { struct rtattr *tail, *tail1, *tail2, *tail3; struct { struct nlmsghdr n; struct tcmsg t; char buf[TCA_BUF_MAX]; } req; struct qdisc_opt opt1 = { .kind = "htb", .handle = 0x00010000 + priority, .parent = 0x00010000, .rate = rate, .buffer = burst, .quantum = conf_quantum, .qdisc = qdisc_htb_class, }; struct qdisc_opt opt2 = { .kind = "ingress", .handle = 0xffff0000, .parent = TC_H_INGRESS, }; struct sel { struct tc_u32_sel sel; struct tc_u32_key key; } sel = { .sel.nkeys = 1, .sel.flags = TC_U32_TERMINAL, .key.off = 0, }; struct tc_skbedit p1 = { .action = TC_ACT_PIPE, }; struct tc_mirred p2 = { .eaction = TCA_EGRESS_REDIR, .action = TC_ACT_STOLEN, .ifindex = conf_ifb_ifindex, }; if (tc_qdisc_modify(rth, conf_ifb_ifindex, RTM_NEWTCLASS, NLM_F_EXCL|NLM_F_CREATE, &opt1)) return -1; if (tc_qdisc_modify(rth, ifindex, RTM_NEWQDISC, NLM_F_EXCL|NLM_F_CREATE, &opt2)) return -1; memset(&req, 0, sizeof(req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); req.n.nlmsg_flags = NLM_F_REQUEST|NLM_F_EXCL|NLM_F_CREATE; req.n.nlmsg_type = RTM_NEWTFILTER; req.t.tcm_family = AF_UNSPEC; req.t.tcm_ifindex = ifindex; req.t.tcm_handle = 1; req.t.tcm_parent = 0xffff0000; req.t.tcm_info = TC_H_MAKE(100 << 16, ntohs(ETH_P_ALL)); addattr_l(&req.n, sizeof(req), TCA_KIND, "u32", 4); tail = NLMSG_TAIL(&req.n); addattr_l(&req.n, MAX_MSG, TCA_OPTIONS, NULL, 0); tail1 = NLMSG_TAIL(&req.n); addattr_l(&req.n, MAX_MSG, TCA_U32_ACT, NULL, 0); // action skbedit priority X pipe tail2 = NLMSG_TAIL(&req.n); addattr_l(&req.n, MAX_MSG, 1, NULL, 0); addattr_l(&req.n, MAX_MSG, TCA_ACT_KIND, "skbedit", 8); tail3 = NLMSG_TAIL(&req.n); addattr_l(&req.n, MAX_MSG, TCA_ACT_OPTIONS, NULL, 0); addattr_l(&req.n, MAX_MSG, TCA_SKBEDIT_PARMS, &p1, sizeof(p1)); priority--; addattr_l(&req.n, MAX_MSG, TCA_SKBEDIT_PRIORITY, &priority, sizeof(priority)); tail3->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)tail3; tail2->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)tail2; tail1->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)tail1; // action mirred egress redirect dev ifb0 tail2 = NLMSG_TAIL(&req.n); addattr_l(&req.n, MAX_MSG, 2, NULL, 0); addattr_l(&req.n, MAX_MSG, TCA_ACT_KIND, "mirred", 7); tail3 = NLMSG_TAIL(&req.n); addattr_l(&req.n, MAX_MSG, TCA_ACT_OPTIONS, NULL, 0); addattr_l(&req.n, MAX_MSG, TCA_MIRRED_PARMS, &p2, sizeof(p2)); tail3->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)tail3; tail2->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)tail2; tail1->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)tail1; // addattr32(&req.n, TCA_BUF_MAX, TCA_U32_CLASSID, 1); addattr_l(&req.n, MAX_MSG, TCA_U32_SEL, &sel, sizeof(sel)); tail->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)tail; if (rtnl_talk(rth, &req.n, 0, 0, NULL, NULL, NULL, 0) < 0) return -1; return 0; } static int install_fwmark(struct rtnl_handle *rth, int ifindex, int parent) { struct rtattr *tail; struct { struct nlmsghdr n; struct tcmsg t; char buf[1024]; } req; memset(&req, 0, sizeof(req) - 1024); req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); req.n.nlmsg_flags = NLM_F_REQUEST|NLM_F_EXCL|NLM_F_CREATE; req.n.nlmsg_type = RTM_NEWTFILTER; req.t.tcm_family = AF_UNSPEC; req.t.tcm_ifindex = ifindex; req.t.tcm_handle = conf_fwmark; req.t.tcm_parent = parent; req.t.tcm_info = TC_H_MAKE(90 << 16, ntohs(ETH_P_IP)); addattr_l(&req.n, sizeof(req), TCA_KIND, "fw", 3); tail = NLMSG_TAIL(&req.n); addattr_l(&req.n, TCA_BUF_MAX, TCA_OPTIONS, NULL, 0); addattr32(&req.n, TCA_BUF_MAX, TCA_FW_CLASSID, TC_H_MAKE(1 << 16, 0)); tail->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)tail; return rtnl_talk(rth, &req.n, 0, 0, NULL, NULL, NULL, 0); } static int remove_root(struct rtnl_handle *rth, int ifindex) { struct qdisc_opt opt = { .handle = 0x00010000, .parent = TC_H_ROOT, }; return tc_qdisc_modify(rth, ifindex, RTM_DELQDISC, 0, &opt); } static int remove_ingress(struct rtnl_handle *rth, int ifindex) { struct qdisc_opt opt = { .handle = 0xffff0000, .parent = TC_H_INGRESS, }; return tc_qdisc_modify(rth, ifindex, RTM_DELQDISC, 0, &opt); } static int remove_htb_ifb(struct rtnl_handle *rth, int ifindex, int priority) { struct qdisc_opt opt = { .handle = 0x00010000 + priority, .parent = 0x00010000, }; return tc_qdisc_modify(rth, conf_ifb_ifindex, RTM_DELTCLASS, 0, &opt); } int install_limiter(struct ap_session *ses, int down_speed, int down_burst, int up_speed, int up_burst, int idx) { struct rtnl_handle *rth = net->rtnl_get(); int r = 0; if (!rth) { log_ppp_error("shaper: cannot open rtnetlink\n"); return -1; } if (down_speed) { down_speed = down_speed * 1000 / 8; down_burst = down_burst ? down_burst : conf_down_burst_factor * down_speed; if (conf_down_limiter == LIM_TBF) r = install_tbf(rth, ses->ifindex, down_speed, down_burst); else { r = install_htb(rth, ses->ifindex, down_speed, down_burst); if (r == 0) r = install_leaf_qdisc(rth, ses->ifindex, 0x00010001, 0x00020000); } } if (up_speed) { up_speed = up_speed * 1000 / 8; up_burst = up_burst ? up_burst : conf_up_burst_factor * up_speed; if (conf_up_limiter == LIM_POLICE) r = install_police(rth, ses->ifindex, up_speed, up_burst); else { r = install_htb_ifb(rth, ses->ifindex, idx, up_speed, up_burst); if (r == 0) r = install_leaf_qdisc(rth, conf_ifb_ifindex, 0x00010000 + idx, idx << 16); } } if (conf_fwmark) install_fwmark(rth, ses->ifindex, 0x00010000); net->rtnl_put(rth); return r; } int remove_limiter(struct ap_session *ses, int idx) { struct rtnl_handle *rth = net->rtnl_get(); if (!rth) { log_ppp_error("shaper: cannot open rtnetlink\n"); return -1; } remove_root(rth, ses->ifindex); remove_ingress(rth, ses->ifindex); if (conf_up_limiter == LIM_HTB) remove_htb_ifb(rth, ses->ifindex, idx); net->rtnl_put(rth); return 0; } int init_ifb(const char *name) { struct rtnl_handle rth; struct rtattr *tail; struct ifreq ifr; int r; int sock_fd = socket(AF_INET, SOCK_DGRAM, 0); struct { struct nlmsghdr n; struct tcmsg t; char buf[TCA_BUF_MAX]; } req; struct qdisc_opt opt = { .kind = "htb", .handle = 0x00010000, .parent = TC_H_ROOT, .quantum = conf_r2q, .qdisc = qdisc_htb_root, }; if (system("modprobe -q ifb")) log_warn("failed to load ifb kernel module\n"); memset(&ifr, 0, sizeof(ifr)); strcpy(ifr.ifr_name, name); if (ioctl(sock_fd, SIOCGIFINDEX, &ifr)) { log_emerg("shaper: ioctl(SIOCGIFINDEX): %s\n", strerror(errno)); close(sock_fd); return -1; } conf_ifb_ifindex = ifr.ifr_ifindex; ifr.ifr_flags |= IFF_UP; if (ioctl(sock_fd, SIOCSIFFLAGS, &ifr)) { log_emerg("shaper: ioctl(SIOCSIFINDEX): %s\n", strerror(errno)); close(sock_fd); return -1; } if (rtnl_open(&rth, 0)) { log_emerg("shaper: cannot open rtnetlink\n"); close(sock_fd); return -1; } tc_qdisc_modify(&rth, conf_ifb_ifindex, RTM_DELQDISC, 0, &opt); r = tc_qdisc_modify(&rth, conf_ifb_ifindex, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_REPLACE, &opt); if (r) goto out; memset(&req, 0, sizeof(req)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); req.n.nlmsg_flags = NLM_F_REQUEST|NLM_F_EXCL|NLM_F_CREATE; req.n.nlmsg_type = RTM_NEWTFILTER; req.t.tcm_family = AF_UNSPEC; req.t.tcm_ifindex = conf_ifb_ifindex; req.t.tcm_handle = 1; req.t.tcm_parent = 0x00010000; req.t.tcm_info = TC_H_MAKE(100 << 16, ntohs(ETH_P_ALL)); addattr_l(&req.n, sizeof(req), TCA_KIND, "flow", 5); tail = NLMSG_TAIL(&req.n); addattr_l(&req.n, TCA_BUF_MAX, TCA_OPTIONS, NULL, 0); addattr32(&req.n, TCA_BUF_MAX, TCA_FLOW_KEYS, 1 << FLOW_KEY_PRIORITY); addattr32(&req.n, TCA_BUF_MAX, TCA_FLOW_MODE, FLOW_MODE_MAP); tail->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)tail; r = rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL, 0); out: rtnl_close(&rth); close(sock_fd); return r; }
static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct rtattr **tca = arg; struct tcmsg *t = NLMSG_DATA(n); u32 protocol = TC_H_MIN(t->tcm_info); u32 prio = TC_H_MAJ(t->tcm_info); u32 nprio = prio; u32 parent = t->tcm_parent; struct device *dev; struct Qdisc *q; struct tcf_proto **back, **chain; struct tcf_proto *tp = NULL; struct tcf_proto_ops *tp_ops; struct Qdisc_class_ops *cops; unsigned long cl = 0; unsigned long fh; int err; if (prio == 0) { /* If no priority is given, user wants we allocated it. */ if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) return -ENOENT; if (n->nlmsg_flags&NLM_F_APPEND) prio = TC_H_MAKE(0xFFFF0000U,0U); else prio = TC_H_MAKE(0x80000000U,0U); } /* Find head of filter chain. */ /* Find link */ if ((dev = dev_get_by_index(t->tcm_ifindex)) == NULL) return -ENODEV; /* Find qdisc */ if (!parent) { q = dev->qdisc_sleeping; parent = q->handle; } else if ((q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent))) == NULL) return -EINVAL; /* Is it classful? */ if ((cops = q->ops->cl_ops) == NULL) return -EINVAL; /* Do we search for filter, attached to class? */ if (TC_H_MIN(parent)) { cl = cops->get(q, parent); if (cl == 0) return -ENOENT; } /* And the last stroke */ chain = cops->tcf_chain(q, cl); err = -EINVAL; if (chain == NULL) goto errout; /* Check the chain for existence of proto-tcf with this priority */ for (back = chain; (tp=*back) != NULL; back = &tp->next) { if (tp->prio >= prio) { if (tp->prio == prio) { if (!nprio || (tp->protocol != protocol && protocol)) goto errout; } else tp = NULL; break; } } if (tp == NULL) { /* Proto-tcf does not exist, create new one */ if (tca[TCA_KIND-1] == NULL || !protocol) goto errout; err = -ENOENT; if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) goto errout; /* Create new proto tcf */ err = -ENOBUFS; if ((tp = kmalloc(sizeof(*tp), GFP_KERNEL)) == NULL) goto errout; tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND-1]); if (tp_ops == NULL) { err = -EINVAL; kfree(tp); goto errout; } memset(tp, 0, sizeof(*tp)); tp->ops = tp_ops; tp->protocol = protocol; tp->prio = nprio ? : tcf_auto_prio(*back, prio); tp->q = q; tp->classify = tp_ops->classify; tp->classid = parent; err = tp_ops->init(tp); if (err) { kfree(tp); goto errout; } tp->next = *back; *back = tp; } else if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], tp->ops->kind))
static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) { struct tc_fifo_qopt opt = { .limit = sch->limit }; if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) goto nla_put_failure; return skb->len; nla_put_failure: return -1; } struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { .id = "pfifo", .priv_size = 0, .enqueue = pfifo_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, .init = fifo_init, .reset = qdisc_reset_queue, .change = fifo_init, .dump = fifo_dump, .owner = THIS_MODULE, }; EXPORT_SYMBOL(pfifo_qdisc_ops); struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { .id = "bfifo", .priv_size = 0, .enqueue = bfifo_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, .init = fifo_init, .reset = qdisc_reset_queue, .change = fifo_init, .dump = fifo_dump, .owner = THIS_MODULE, }; EXPORT_SYMBOL(bfifo_qdisc_ops); struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = { .id = "pfifo_head_drop", .priv_size = 0, .enqueue = pfifo_tail_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, .init = fifo_init, .reset = qdisc_reset_queue, .change = fifo_init, .dump = fifo_dump, .owner = THIS_MODULE, }; /* Pass size change message down to embedded FIFO */ int fifo_set_limit(struct Qdisc *q, unsigned int limit) { struct nlattr *nla; int ret = -ENOMEM; /* Hack to avoid sending change message to non-FIFO */ if (strncmp(q->ops->id + 1, "fifo", 4) != 0) return 0; nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); if (nla) { nla->nla_type = RTM_NEWQDISC; nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt)); ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit; ret = q->ops->change(q, nla); kfree(nla); } return ret; } EXPORT_SYMBOL(fifo_set_limit); struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, unsigned int limit) { struct Qdisc *q; int err = -ENOMEM; q = qdisc_create_dflt(sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1)); if (q) { err = fifo_set_limit(q, limit); if (err < 0) { qdisc_destroy(q); q = NULL; } } return q ? : ERR_PTR(err); } EXPORT_SYMBOL(fifo_create_dflt);
static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; spinlock_t *root_lock; struct tcmsg *t; u32 protocol; u32 prio; u32 nprio; u32 parent; struct net_device *dev; struct Qdisc *q; struct tcf_proto **back, **chain; struct tcf_proto *tp; const struct tcf_proto_ops *tp_ops; const struct Qdisc_class_ops *cops; unsigned long cl; unsigned long fh; int err; int tp_created = 0; if ((n->nlmsg_type != RTM_GETTFILTER) && !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; replay: err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); if (err < 0) return err; t = nlmsg_data(n); protocol = TC_H_MIN(t->tcm_info); prio = TC_H_MAJ(t->tcm_info); nprio = prio; parent = t->tcm_parent; cl = 0; if (prio == 0) { /* If no priority is given, user wants we allocated it. */ if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags & NLM_F_CREATE)) return -ENOENT; prio = TC_H_MAKE(0x80000000U, 0U); } /* Find head of filter chain. */ /* Find link */ dev = __dev_get_by_index(net, t->tcm_ifindex); if (dev == NULL) return -ENODEV; /* Find qdisc */ if (!parent) { q = dev->qdisc; parent = q->handle; } else { q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent)); if (q == NULL) return -EINVAL; } /* Is it classful? */ cops = q->ops->cl_ops; if (!cops) return -EINVAL; if (cops->tcf_chain == NULL) return -EOPNOTSUPP; /* Do we search for filter, attached to class? */ if (TC_H_MIN(parent)) { cl = cops->get(q, parent); if (cl == 0) return -ENOENT; } /* And the last stroke */ chain = cops->tcf_chain(q, cl); err = -EINVAL; if (chain == NULL) goto errout; /* Check the chain for existence of proto-tcf with this priority */ for (back = chain; (tp = *back) != NULL; back = &tp->next) { if (tp->prio >= prio) { if (tp->prio == prio) { if (!nprio || (tp->protocol != protocol && protocol)) goto errout; } else tp = NULL; break; } } root_lock = qdisc_root_sleeping_lock(q); if (tp == NULL) { /* Proto-tcf does not exist, create new one */ if (tca[TCA_KIND] == NULL || !protocol) goto errout; err = -ENOENT; if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags & NLM_F_CREATE)) goto errout; /* Create new proto tcf */ err = -ENOBUFS; tp = kzalloc(sizeof(*tp), GFP_KERNEL); if (tp == NULL) goto errout; err = -ENOENT; tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]); if (tp_ops == NULL) { #ifdef CONFIG_MODULES struct nlattr *kind = tca[TCA_KIND]; char name[IFNAMSIZ]; if (kind != NULL && nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { rtnl_unlock(); request_module("cls_%s", name); rtnl_lock(); tp_ops = tcf_proto_lookup_ops(kind); /* We dropped the RTNL semaphore in order to * perform the module load. So, even if we * succeeded in loading the module we have to * replay the request. We indicate this using * -EAGAIN. */ if (tp_ops != NULL) { module_put(tp_ops->owner); err = -EAGAIN; } } #endif kfree(tp); goto errout; } tp->ops = tp_ops; tp->protocol = protocol; tp->prio = nprio ? : TC_H_MAJ(tcf_auto_prio(*back)); tp->q = q; tp->classify = tp_ops->classify; tp->classid = parent; err = tp_ops->init(tp); if (err != 0) { module_put(tp_ops->owner); kfree(tp); goto errout; } tp_created = 1; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind))
static int prio_tune(struct Qdisc *sch, struct nlattr *opt) { struct prio_sched_data *q = qdisc_priv(sch); struct tc_prio_qopt *qopt; struct nlattr *tb[TCA_PRIO_MAX + 1] = {0}; int err; int i; qopt = nla_data(opt); if (nla_len(opt) < sizeof(*qopt)) return -1; if (nla_len(opt) >= sizeof(*qopt) + sizeof(struct nlattr)) { err = nla_parse_nested(tb, TCA_PRIO_MAX, (struct nlattr *) (qopt + 1), NULL); if (err < 0) return err; } q->bands = qopt->bands; /* If we're multiqueue, make sure the number of incoming bands * matches the number of queues on the device we're associating with. * If the number of bands requested is zero, then set q->bands to * dev->egress_subqueue_count. Also, the root qdisc must be the * only one that is enabled for multiqueue, since it's the only one * that interacts with the underlying device. */ q->mq = nla_get_flag(tb[TCA_PRIO_MQ]); if (q->mq) { if (sch->parent != TC_H_ROOT) return -EINVAL; if (netif_is_multiqueue(sch->dev)) { if (q->bands == 0) q->bands = sch->dev->egress_subqueue_count; else if (q->bands != sch->dev->egress_subqueue_count) return -EINVAL; } else return -EOPNOTSUPP; } if (q->bands > TCQ_PRIO_BANDS || q->bands < 2) return -EINVAL; for (i=0; i<=TC_PRIO_MAX; i++) { if (qopt->priomap[i] >= q->bands) return -EINVAL; } sch_tree_lock(sch); memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); if (child != &noop_qdisc) { qdisc_tree_decrease_qlen(child, child->q.qlen); qdisc_destroy(child); } } sch_tree_unlock(sch); for (i=0; i<q->bands; i++) { if (q->queues[i] == &noop_qdisc) { struct Qdisc *child; child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1)); if (child) { sch_tree_lock(sch); child = xchg(&q->queues[i], child); if (child != &noop_qdisc) { qdisc_tree_decrease_qlen(child, child->q.qlen); qdisc_destroy(child); } sch_tree_unlock(sch); } } } return 0; }
static int tc_qdisc_modify(int cmd, unsigned int flags, int argc, char **argv) { struct qdisc_util *q = NULL; struct tc_estimator est; struct { struct tc_sizespec szopts; __u16 *data; } stab; char d[16]; char k[16]; struct { struct nlmsghdr n; struct tcmsg t; char buf[TCA_BUF_MAX]; } req; memset(&req, 0, sizeof(req)); memset(&stab, 0, sizeof(stab)); memset(&est, 0, sizeof(est)); memset(&d, 0, sizeof(d)); memset(&k, 0, sizeof(k)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); req.n.nlmsg_flags = NLM_F_REQUEST|flags; req.n.nlmsg_type = cmd; req.t.tcm_family = AF_UNSPEC; while (argc > 0) { if (strcmp(*argv, "dev") == 0) { NEXT_ARG(); if (d[0]) duparg("dev", *argv); strncpy(d, *argv, sizeof(d)-1); } else if (strcmp(*argv, "handle") == 0) { __u32 handle; if (req.t.tcm_handle) duparg("handle", *argv); NEXT_ARG(); if (get_qdisc_handle(&handle, *argv)) invarg("invalid qdisc ID", *argv); req.t.tcm_handle = handle; } else if (strcmp(*argv, "root") == 0) { if (req.t.tcm_parent) { fprintf(stderr, "Error: \"root\" is duplicate parent ID\n"); return -1; } req.t.tcm_parent = TC_H_ROOT; } else if (strcmp(*argv, "clsact") == 0) { if (req.t.tcm_parent) { fprintf(stderr, "Error: \"clsact\" is a duplicate parent ID\n"); return -1; } req.t.tcm_parent = TC_H_CLSACT; strncpy(k, "clsact", sizeof(k) - 1); q = get_qdisc_kind(k); req.t.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0); NEXT_ARG_FWD(); break; } else if (strcmp(*argv, "ingress") == 0) { if (req.t.tcm_parent) { fprintf(stderr, "Error: \"ingress\" is a duplicate parent ID\n"); return -1; } req.t.tcm_parent = TC_H_INGRESS; strncpy(k, "ingress", sizeof(k) - 1); q = get_qdisc_kind(k); req.t.tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0); NEXT_ARG_FWD(); break; } else if (strcmp(*argv, "parent") == 0) { __u32 handle; NEXT_ARG(); if (req.t.tcm_parent) duparg("parent", *argv); if (get_tc_classid(&handle, *argv)) invarg("invalid parent ID", *argv); req.t.tcm_parent = handle; } else if (matches(*argv, "estimator") == 0) { if (parse_estimator(&argc, &argv, &est)) return -1; } else if (matches(*argv, "stab") == 0) { if (parse_size_table(&argc, &argv, &stab.szopts) < 0) return -1; continue; } else if (matches(*argv, "help") == 0) { usage(); } else { strncpy(k, *argv, sizeof(k)-1); q = get_qdisc_kind(k); argc--; argv++; break; } argc--; argv++; } if (k[0]) addattr_l(&req.n, sizeof(req), TCA_KIND, k, strlen(k)+1); if (est.ewma_log) addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est)); if (q) { if (q->parse_qopt) { if (q->parse_qopt(q, argc, argv, &req.n)) return 1; } else if (argc) { fprintf(stderr, "qdisc '%s' does not support option parsing\n", k); return -1; } } else { if (argc) { if (matches(*argv, "help") == 0) usage(); fprintf(stderr, "Garbage instead of arguments \"%s ...\". Try \"tc qdisc help\".\n", *argv); return -1; } } if (check_size_table_opts(&stab.szopts)) { struct rtattr *tail; if (tc_calc_size_table(&stab.szopts, &stab.data) < 0) { fprintf(stderr, "failed to calculate size table.\n"); return -1; } tail = NLMSG_TAIL(&req.n); addattr_l(&req.n, sizeof(req), TCA_STAB, NULL, 0); addattr_l(&req.n, sizeof(req), TCA_STAB_BASE, &stab.szopts, sizeof(stab.szopts)); if (stab.data) addattr_l(&req.n, sizeof(req), TCA_STAB_DATA, stab.data, stab.szopts.tsize * sizeof(__u16)); tail->rta_len = (void *)NLMSG_TAIL(&req.n) - (void *)tail; if (stab.data) free(stab.data); } if (d[0]) { int idx; ll_init_map(&rth); if ((idx = ll_name_to_index(d)) == 0) { fprintf(stderr, "Cannot find device \"%s\"\n", d); return 1; } req.t.tcm_ifindex = idx; } if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) return 2; return 0; }
static int prio_tune(struct Qdisc *sch, struct nlattr *opt) { struct prio_sched_data *q = qdisc_priv(sch); struct tc_prio_qopt *qopt; int i; int flow_change = 0; if (nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) return -EINVAL; for (i = 0; i <= TC_PRIO_MAX; i++) { if (qopt->priomap[i] >= qopt->bands) return -EINVAL; } sch_tree_lock(sch); if (q->enable_flow != qopt->enable_flow) { q->enable_flow = qopt->enable_flow; flow_change = 1; } q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i = q->bands; i < TCQ_PRIO_BANDS; i++) { struct Qdisc *child = q->queues[i]; q->queues[i] = &noop_qdisc; if (child != &noop_qdisc) { qdisc_tree_decrease_qlen(child, child->q.qlen); qdisc_destroy(child); } } sch_tree_unlock(sch); for (i = 0; i < q->bands; i++) { if (q->queues[i] == &noop_qdisc) { struct Qdisc *child, *old; child = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1)); if (child) { sch_tree_lock(sch); old = q->queues[i]; q->queues[i] = child; if (old != &noop_qdisc) { qdisc_tree_decrease_qlen(old, old->q.qlen); qdisc_destroy(old); } sch_tree_unlock(sch); } } } /* Schedule qdisc when flow re-enabled */ if (flow_change && q->enable_flow) { if (!test_bit(__QDISC_STATE_DEACTIVATED, &sch->state)) __netif_schedule(qdisc_root(sch)); } return 0; }
static int cls_request_update(struct nl_cache *cache, struct nl_sock *sk) { struct tcmsg tchdr = { .tcm_family = AF_UNSPEC, .tcm_ifindex = cache->c_iarg1, .tcm_parent = cache->c_iarg2, }; return nl_send_simple(sk, RTM_GETTFILTER, NLM_F_DUMP, &tchdr, sizeof(tchdr)); } static int cls_build(struct rtnl_cls *cls, int type, int flags, struct nl_msg **result) { struct rtnl_cls_ops *cops; int err, prio, proto; struct tcmsg *tchdr; err = tca_build_msg((struct rtnl_tca *) cls, type, flags, result); if (err < 0) return err; tchdr = nlmsg_data(nlmsg_hdr(*result)); prio = rtnl_cls_get_prio(cls); proto = rtnl_cls_get_protocol(cls); tchdr->tcm_info = TC_H_MAKE(prio << 16, htons(proto)); cops = rtnl_cls_lookup_ops(cls); if (cops && cops->co_get_opts) { struct nl_msg *opts; if (!(opts = nlmsg_alloc())) { err = -NLE_NOMEM; goto errout; } if (!(err = cops->co_get_opts(cls, opts))) err = nla_put_nested(*result, TCA_OPTIONS, opts); nlmsg_free(opts); if (err < 0) goto errout; } return 0; errout: nlmsg_free(*result); return err; } /** * @name Classifier Addition/Modification/Deletion * @{ */ /** * Build a netlink message to add a new classifier * @arg cls classifier to add * @arg flags additional netlink message flags * @arg result Pointer to store resulting message. * * Builds a new netlink message requesting an addition of a classifier * The netlink message header isn't fully equipped with all relevant * fields and must be sent out via nl_send_auto_complete() or * supplemented as needed. \a classifier must contain the attributes of * the new classifier set via \c rtnl_cls_set_* functions. \a opts * may point to the clsasifier specific options. * * @return 0 on success or a negative error code. */ int rtnl_cls_build_add_request(struct rtnl_cls *cls, int flags, struct nl_msg **result) { return cls_build(cls, RTM_NEWTFILTER, NLM_F_CREATE | flags, result); } /** * Add a new classifier * @arg sk Netlink socket. * @arg cls classifier to add * @arg flags additional netlink message flags * * Builds a netlink message by calling rtnl_cls_build_add_request(), * sends the request to the kernel and waits for the next ACK to be * received and thus blocks until the request has been processed. * * @return 0 on sucess or a negative error if an error occured. */ int rtnl_cls_add(struct nl_sock *sk, struct rtnl_cls *cls, int flags) { struct nl_msg *msg; int err; if ((err = rtnl_cls_build_add_request(cls, flags, &msg)) < 0) return err; err = nl_send_auto_complete(sk, msg); nlmsg_free(msg); if (err < 0) return err; return nl_wait_for_ack(sk); } /** * Build a netlink message to change classifier attributes * @arg cls classifier to change * @arg flags additional netlink message flags * @arg result Pointer to store resulting message. * * Builds a new netlink message requesting a change of a neigh * attributes. The netlink message header isn't fully equipped with * all relevant fields and must thus be sent out via nl_send_auto_complete() * or supplemented as needed. * * @return 0 on success or a negative error code. */ int rtnl_cls_build_change_request(struct rtnl_cls *cls, int flags, struct nl_msg **result) { return cls_build(cls, RTM_NEWTFILTER, NLM_F_REPLACE | flags, result); } /** * Change a classifier * @arg sk Netlink socket. * @arg cls classifier to change * @arg flags additional netlink message flags * * Builds a netlink message by calling rtnl_cls_build_change_request(), * sends the request to the kernel and waits for the next ACK to be * received and thus blocks until the request has been processed. * * @return 0 on sucess or a negative error if an error occured. */ int rtnl_cls_change(struct nl_sock *sk, struct rtnl_cls *cls, int flags) { struct nl_msg *msg; int err; if ((err = rtnl_cls_build_change_request(cls, flags, &msg)) < 0) return err; err = nl_send_auto_complete(sk, msg); nlmsg_free(msg); if (err < 0) return err; return nl_wait_for_ack(sk); } /** * Build a netlink request message to delete a classifier * @arg cls classifier to delete * @arg flags additional netlink message flags * @arg result Pointer to store resulting message. * * Builds a new netlink message requesting a deletion of a classifier. * The netlink message header isn't fully equipped with all relevant * fields and must thus be sent out via nl_send_auto_complete() * or supplemented as needed. * * @return 0 on success or a negative error code. */ int rtnl_cls_build_delete_request(struct rtnl_cls *cls, int flags, struct nl_msg **result) { return cls_build(cls, RTM_DELTFILTER, flags, result); } /** * Delete a classifier * @arg sk Netlink socket. * @arg cls classifier to delete * @arg flags additional netlink message flags * * Builds a netlink message by calling rtnl_cls_build_delete_request(), * sends the request to the kernel and waits for the next ACK to be * received and thus blocks until the request has been processed. * * @return 0 on sucess or a negative error if an error occured. */ int rtnl_cls_delete(struct nl_sock *sk, struct rtnl_cls *cls, int flags) { struct nl_msg *msg; int err; if ((err = rtnl_cls_build_delete_request(cls, flags, &msg)) < 0) return err; err = nl_send_auto_complete(sk, msg); nlmsg_free(msg); if (err < 0) return err; return nl_wait_for_ack(sk); } /** @} */ /** * @name Cache Management * @{ */ /** * Build a classifier cache including all classifiers attached to the * specified class/qdisc on eht specified interface. * @arg sk Netlink socket. * @arg ifindex interface index of the link the classes are * attached to. * @arg parent parent qdisc/class * @arg result Pointer to store resulting cache. * * Allocates a new cache, initializes it properly and updates it to * include all classes attached to the specified interface. * * @note The caller is responsible for destroying and freeing the * cache after using it. * @return 0 on success or a negative error code. */ int rtnl_cls_alloc_cache(struct nl_sock *sk, int ifindex, uint32_t parent, struct nl_cache **result) { struct nl_cache * cache; int err; if (!(cache = nl_cache_alloc(&rtnl_cls_ops))) return -NLE_NOMEM; cache->c_iarg1 = ifindex; cache->c_iarg2 = parent; if (sk && (err = nl_cache_refill(sk, cache)) < 0) { nl_cache_free(cache); return err; } *result = cache; return 0; } /** @} */ static struct nl_cache_ops rtnl_cls_ops = { .co_name = "route/cls", .co_hdrsize = sizeof(struct tcmsg), .co_msgtypes = { { RTM_NEWTFILTER, NL_ACT_NEW, "new" }, { RTM_DELTFILTER, NL_ACT_DEL, "del" }, { RTM_GETTFILTER, NL_ACT_GET, "get" }, END_OF_MSGTYPES_LIST, }, .co_protocol = NETLINK_ROUTE, .co_request_update = cls_request_update, .co_msg_parser = cls_msg_parser, .co_obj_ops = &cls_obj_ops, }; static void __init cls_init(void) { nl_cache_mngt_register(&rtnl_cls_ops); } static void __exit cls_exit(void) { nl_cache_mngt_unregister(&rtnl_cls_ops); }
static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) { struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); struct netdev_queue *dev_queue; struct Qdisc *qdisc; int i, err = -EOPNOTSUPP; struct tc_mqprio_qopt *qopt = NULL; BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK); if (sch->parent != TC_H_ROOT) return -EOPNOTSUPP; if (!netif_is_multiqueue(dev)) return -EOPNOTSUPP; if (!opt || nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); if (mqprio_parse_opt(dev, qopt)) return -EINVAL; /* pre-allocate qdisc, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); if (priv->qdiscs == NULL) { err = -ENOMEM; goto err; } for (i = 0; i < dev->num_tx_queues; i++) { dev_queue = netdev_get_tx_queue(dev, i); qdisc = qdisc_create_dflt(dev, dev_queue, &pfifo_fast_ops, TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(i + 1))); if (qdisc == NULL) { err = -ENOMEM; goto err; } qdisc->flags |= TCQ_F_CAN_BYPASS; priv->qdiscs[i] = qdisc; } netdev_set_num_tc(dev, qopt->num_tc); for (i = 0; i < qopt->num_tc; i++) netdev_set_tc_queue(dev, i, qopt->count[i], qopt->offset[i]); /* Always use supplied priority mappings */ for (i = 0; i < TC_BITMASK + 1; i++) netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]); sch->flags |= TCQ_F_MQROOT; return 0; err: mqprio_destroy(sch); return err; }
static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct rtattr **tca; struct tcmsg *t; u32 protocol; u32 prio; u32 nprio; u32 parent; struct net_device *dev; struct Qdisc *q; struct tcf_proto **back, **chain; struct tcf_proto *tp; struct tcf_proto_ops *tp_ops; struct Qdisc_class_ops *cops; unsigned long cl; unsigned long fh; int err; replay: tca = arg; t = NLMSG_DATA(n); protocol = TC_H_MIN(t->tcm_info); prio = TC_H_MAJ(t->tcm_info); nprio = prio; parent = t->tcm_parent; cl = 0; if (prio == 0) { /* If no priority is given, user wants we allocated it. */ if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) return -ENOENT; prio = TC_H_MAKE(0x80000000U,0U); } /* Find head of filter chain. */ /* Find link */ if ((dev = __dev_get_by_index(t->tcm_ifindex)) == NULL) return -ENODEV; /* Find qdisc */ if (!parent) { q = dev->qdisc_sleeping; parent = q->handle; } else if ((q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent))) == NULL) return -EINVAL; /* Is it classful? */ if ((cops = q->ops->cl_ops) == NULL) return -EINVAL; /* Do we search for filter, attached to class? */ if (TC_H_MIN(parent)) { cl = cops->get(q, parent); if (cl == 0) return -ENOENT; } /* And the last stroke */ chain = cops->tcf_chain(q, cl); err = -EINVAL; if (chain == NULL) goto errout; /* Check the chain for existence of proto-tcf with this priority */ for (back = chain; (tp=*back) != NULL; back = &tp->next) { if (tp->prio >= prio) { if (tp->prio == prio) { if (!nprio || (tp->protocol != protocol && protocol)) goto errout; } else tp = NULL; break; } } if (tp == NULL) { /* Proto-tcf does not exist, create new one */ if (tca[TCA_KIND-1] == NULL || !protocol) goto errout; err = -ENOENT; if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) goto errout; /* Create new proto tcf */ err = -ENOBUFS; if ((tp = kmalloc(sizeof(*tp), GFP_KERNEL)) == NULL) goto errout; err = -EINVAL; tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND-1]); if (tp_ops == NULL) { #ifdef CONFIG_KMOD struct rtattr *kind = tca[TCA_KIND-1]; char name[IFNAMSIZ]; if (kind != NULL && rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { rtnl_unlock(); request_module("cls_%s", name); rtnl_lock(); tp_ops = tcf_proto_lookup_ops(kind); /* We dropped the RTNL semaphore in order to * perform the module load. So, even if we * succeeded in loading the module we have to * replay the request. We indicate this using * -EAGAIN. */ if (tp_ops != NULL) { module_put(tp_ops->owner); err = -EAGAIN; } } #endif kfree(tp); goto errout; } memset(tp, 0, sizeof(*tp)); tp->ops = tp_ops; tp->protocol = protocol; tp->prio = nprio ? : tcf_auto_prio(*back); tp->q = q; tp->classify = tp_ops->classify; tp->classid = parent; if ((err = tp_ops->init(tp)) != 0) { module_put(tp_ops->owner); kfree(tp); goto errout; } qdisc_lock_tree(dev); tp->next = *back; *back = tp; qdisc_unlock_tree(dev); } else if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], tp->ops->kind))
static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) { struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); struct netdev_queue *dev_queue; struct Qdisc *qdisc; int i, err = -EOPNOTSUPP; struct tc_mqprio_qopt *qopt = NULL; BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK); if (sch->parent != TC_H_ROOT) return -EOPNOTSUPP; if (!netif_is_multiqueue(dev)) return -EOPNOTSUPP; if (nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); if (mqprio_parse_opt(dev, qopt)) return -EINVAL; /* pre-allocate qdisc, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); if (priv->qdiscs == NULL) { err = -ENOMEM; goto err; } for (i = 0; i < dev->num_tx_queues; i++) { dev_queue = netdev_get_tx_queue(dev, i); qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(i + 1))); if (qdisc == NULL) { err = -ENOMEM; goto err; } priv->qdiscs[i] = qdisc; } /* If the mqprio options indicate that hardware should own * the queue mapping then run ndo_setup_tc otherwise use the * supplied and verified mapping */ if (qopt->hw) { priv->hw_owned = 1; err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc); if (err) goto err; } else { netdev_set_num_tc(dev, qopt->num_tc); for (i = 0; i < qopt->num_tc; i++) netdev_set_tc_queue(dev, i, qopt->count[i], qopt->offset[i]); } /* Always use supplied priority mappings */ for (i = 0; i < TC_BITMASK + 1; i++) netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]); sch->flags |= TCQ_F_MQROOT; return 0; err: mqprio_destroy(sch); return err; }