static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) { struct tbf_sched_data *q = qdisc_priv(sch); struct nlattr *nest; struct tc_tbf_qopt opt; sch->qstats.backlog = q->qdisc->qstats.backlog; nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; opt.limit = q->limit; psched_ratecfg_getrate(&opt.rate, &q->rate); if (q->peak_present) psched_ratecfg_getrate(&opt.peakrate, &q->peak); else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); opt.mtu = PSCHED_NS2TICKS(q->mtu); opt.buffer = PSCHED_NS2TICKS(q->buffer); if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if (q->rate.rate_bytes_ps >= (1ULL << 32) && nla_put_u64(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps)) goto nla_put_failure; if (q->peak_present && q->peak.rate_bytes_ps >= (1ULL << 32) && nla_put_u64(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps)) goto nla_put_failure; nla_nest_end(skb, nest); return skb->len; nla_put_failure: nla_nest_cancel(skb, nest); return -1; }
static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct sfq_sched_data *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); struct tc_sfq_qopt_v1 opt; struct red_parms *p = q->red_parms; memset(&opt, 0, sizeof(opt)); opt.v0.quantum = q->quantum; opt.v0.perturb_period = q->perturb_period / HZ; opt.v0.limit = q->limit; opt.v0.divisor = q->divisor; opt.v0.flows = q->maxflows; opt.depth = q->maxdepth; opt.headdrop = q->headdrop; if (p) { opt.qth_min = p->qth_min >> p->Wlog; opt.qth_max = p->qth_max >> p->Wlog; opt.Wlog = p->Wlog; opt.Plog = p->Plog; opt.Scell_log = p->Scell_log; opt.max_P = p->max_P; } memcpy(&opt.stats, &q->stats, sizeof(opt.stats)); opt.flags = q->flags; if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) goto nla_put_failure; return skb->len; nla_put_failure: nlmsg_trim(skb, b); return -1; }
static void teql_destroy(struct Qdisc *sch) { struct Qdisc *q, *prev; struct teql_sched_data *dat = qdisc_priv(sch); struct teql_master *master = dat->m; prev = master->slaves; if (prev) { do { q = NEXT_SLAVE(prev); if (q == sch) { NEXT_SLAVE(prev) = NEXT_SLAVE(q); if (q == master->slaves) { master->slaves = NEXT_SLAVE(q); if (q == master->slaves) { struct netdev_queue *txq; spinlock_t *root_lock; txq = netdev_get_tx_queue(master->dev, 0); master->slaves = NULL; root_lock = qdisc_root_sleeping_lock(txq->qdisc); spin_lock_bh(root_lock); qdisc_reset(txq->qdisc); spin_unlock_bh(root_lock); } } skb_queue_purge(&dat->q); teql_neigh_release(xchg(&dat->ncache, NULL)); break; } } while ((prev = q) != master->slaves); } }
static int __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev, struct netdev_queue *txq, struct neighbour *mn) { struct teql_sched_data *q = qdisc_priv(txq->qdisc); struct neighbour *n = q->ncache; if (mn->tbl == NULL) return -EINVAL; if (n && n->tbl == mn->tbl && memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) { atomic_inc(&n->refcnt); } else { n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev); if (IS_ERR(n)) return PTR_ERR(n); } if (neigh_event_send(n, skb_res) == 0) { int err; char haddr[MAX_ADDR_LEN]; neigh_ha_snapshot(haddr, n, dev); err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr, NULL, skb->len); if (err < 0) { neigh_release(n); return -EINVAL; } teql_neigh_release(xchg(&q->ncache, n)); return 0; } neigh_release(n); return (skb_res == NULL) ? -EAGAIN : 1; }
static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct fq_sched_data *q = qdisc_priv(sch); struct fq_flow *f; if (unlikely(sch->q.qlen >= sch->limit)) return qdisc_drop(skb, sch, to_free); f = fq_classify(skb, q); if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) { q->stat_flows_plimit++; return qdisc_drop(skb, sch, to_free); } f->qlen++; if (skb_is_retransmit(skb)) q->stat_tcp_retrans++; qdisc_qstats_backlog_inc(sch, skb); if (fq_flow_is_detached(f)) { fq_flow_add_tail(&q->new_flows, f); if (time_after(jiffies, f->age + q->flow_refill_delay)) f->credit = max_t(u32, f->credit, q->quantum); q->inactive_flows--; } /* Note: this overwrites f->age */ flow_queue_add(f, skb); if (unlikely(f == &q->internal)) { q->stat_internal_packets++; } sch->q.qlen++; return NET_XMIT_SUCCESS; }
static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct fq_sched_data *q = qdisc_priv(sch); struct tc_fq_qd_stats st; sch_tree_lock(sch); st.gc_flows = q->stat_gc_flows; st.highprio_packets = q->stat_internal_packets; st.tcp_retrans = q->stat_tcp_retrans; st.throttled = q->stat_throttled; st.flows_plimit = q->stat_flows_plimit; st.pkts_too_long = q->stat_pkts_too_long; st.allocation_errors = q->stat_allocation_errors; st.time_next_delayed_flow = q->time_next_delayed_flow - ktime_get_ns(); st.flows = q->flows; st.inactive_flows = q->inactive_flows; st.throttled_flows = q->throttled_flows; st.unthrottle_latency_ns = min_t(unsigned long, q->unthrottle_latency_ns, ~0U); sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); }
static int codel_init(struct Qdisc *sch, struct nlattr *opt) { struct codel_sched_data *q = qdisc_priv(sch); sch->limit = DEFAULT_CODEL_LIMIT; codel_params_init(&q->params); codel_vars_init(&q->vars); codel_stats_init(&q->stats); if (opt) { int err = codel_change(sch, opt); if (err) return err; } if (sch->limit >= 1) sch->flags |= TCQ_F_CAN_BYPASS; else sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; }
static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); unsigned int len = qdisc_pkt_len(skb); int ret; if (qdisc_pkt_len(skb) > q->max_size) { if (skb_is_gso(skb) && skb_gso_validate_mac_len(skb, q->max_size)) return tbf_segment(skb, sch, to_free); return qdisc_drop(skb, sch, to_free); } ret = qdisc_enqueue(skb, q->qdisc, to_free); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) qdisc_qstats_drop(sch); return ret; } sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; }
static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct fq_codel_sched_data *q = qdisc_priv(sch); struct tc_fq_codel_xstats st = { .type = TCA_FQ_CODEL_XSTATS_QDISC, }; struct list_head *pos; st.qdisc_stats.maxpacket = q->cstats.maxpacket; st.qdisc_stats.drop_overlimit = q->drop_overlimit; st.qdisc_stats.ecn_mark = q->cstats.ecn_mark; st.qdisc_stats.new_flow_count = q->new_flow_count; st.qdisc_stats.ce_mark = q->cstats.ce_mark; st.qdisc_stats.memory_usage = q->memory_usage; st.qdisc_stats.drop_overmemory = q->drop_overmemory; list_for_each(pos, &q->new_flows) st.qdisc_stats.new_flows_len++; list_for_each(pos, &q->old_flows) st.qdisc_stats.old_flows_len++; return gnet_stats_copy_app(d, &st, sizeof(st)); }
static void fq_codel_reset(struct Qdisc *sch) { struct fq_codel_sched_data *q = qdisc_priv(sch); int i; INIT_LIST_HEAD(&q->new_flows); INIT_LIST_HEAD(&q->old_flows); for (i = 0; i < q->flows_cnt; i++) { struct fq_codel_flow *flow = q->flows + i; while (flow->head) { struct sk_buff *skb = dequeue_head(flow); qdisc_qstats_backlog_dec(sch, skb); kfree_skb(skb); } INIT_LIST_HEAD(&flow->flowchain); codel_vars_init(&flow->cvars); } memset(q->backlogs, 0, q->flows_cnt * sizeof(u32)); sch->q.qlen = 0; q->memory_usage = 0; }
static void netem_watchdog(unsigned long arg) { struct Qdisc *sch = (struct Qdisc *)arg; struct netem_sched_data *q = qdisc_priv(sch); struct net_device *dev = sch->dev; struct sk_buff *skb; psched_time_t now; pr_debug("netem_watchdog: fired @%lu\n", jiffies); spin_lock_bh(&dev->queue_lock); PSCHED_GET_TIME(now); while ((skb = skb_peek(&q->delayed)) != NULL) { const struct netem_skb_cb *cb = (const struct netem_skb_cb *)skb->cb; long delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); pr_debug("netem_watchdog: skb %p@%lu %ld\n", skb, jiffies, delay); /* if more time remaining? */ if (delay > 0) { mod_timer(&q->timer, jiffies + delay); break; } __skb_unlink(skb, &q->delayed); if (q->qdisc->enqueue(skb, q->qdisc)) { sch->q.qlen--; sch->qstats.drops++; } } qdisc_run(dev); spin_unlock_bh(&dev->queue_lock); }
static struct sk_buff *netem_dequeue(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; skb = q->qdisc->dequeue(q->qdisc); if (skb) { const struct netem_skb_cb *cb = (const struct netem_skb_cb *)skb->cb; psched_time_t now; /* if more time remaining? */ PSCHED_GET_TIME(now); if (PSCHED_TLESS(cb->time_to_send, now)) { pr_debug("netem_dequeue: return skb=%p\n", skb); sch->q.qlen--; sch->flags &= ~TCQ_F_THROTTLED; return skb; } else { psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now); if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) { qdisc_tree_decrease_qlen(q->qdisc, 1); sch->qstats.drops++; printk(KERN_ERR "netem: queue discpline %s could not requeue\n", q->qdisc->ops->id); } mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay)); sch->flags |= TCQ_F_THROTTLED; } } return NULL; }
static struct sk_buff * sfq_dequeue(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; sfq_index a, old_a; /* No active slots */ if (q->tail == SFQ_DEPTH) return NULL; a = old_a = q->next[q->tail]; /* Grab packet */ skb = __skb_dequeue(&q->qs[a]); sfq_dec(q, a); sch->q.qlen--; sch->qstats.backlog -= qdisc_pkt_len(skb); /* Is the slot empty? */ if (q->qs[a].qlen == 0) { q->ht[q->hash[a]] = SFQ_DEPTH; a = q->next[a]; if (a == old_a) { q->tail = SFQ_DEPTH; return skb; } q->next[q->tail] = a; q->allot[a] += q->quantum; } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { q->tail = a; a = q->next[a]; q->allot[a] += q->quantum; } return skb; }
static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt, struct netlink_ext_ack *extack) { unsigned int qlen = qdisc_dev(qdisc)->tx_queue_len; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); int prio; /* guard against zero length rings */ if (!qlen) return -EINVAL; for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { struct skb_array *q = band2list(priv, prio); int err; err = skb_array_init(q, qlen, GFP_KERNEL); if (err) return -ENOMEM; } /* Can by-pass the queue discipline */ qdisc->flags |= TCQ_F_CAN_BYPASS; return 0; }
/* Put skb in the private delayed queue. */ static int delay_skb(struct Qdisc *sch, struct sk_buff *skb) { struct netem_sched_data *q = qdisc_priv(sch); struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb; psched_tdiff_t td; psched_time_t now; PSCHED_GET_TIME(now); td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist); PSCHED_TADD2(now, td, cb->time_to_send); /* Always queue at tail to keep packets in order */ if (likely(q->delayed.qlen < q->limit)) { __skb_queue_tail(&q->delayed, skb); if (!timer_pending(&q->timer)) { q->timer.expires = jiffies + PSCHED_US2JIFFIE(td); add_timer(&q->timer); } return NET_XMIT_SUCCESS; } kfree_skb(skb); return NET_XMIT_DROP; }
static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct sfb_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; int i; u32 p_min = ~0; u32 minqlen = ~0; u32 r, slot, salt, sfbhash; int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; struct flow_keys keys; if (unlikely(sch->q.qlen >= q->limit)) { sch->qstats.overlimits++; q->stats.queuedrop++; goto drop; } if (q->rehash_interval > 0) { unsigned long limit = q->rehash_time + q->rehash_interval; if (unlikely(time_after(jiffies, limit))) { sfb_swap_slot(q); q->rehash_time = jiffies; } else if (unlikely(!q->double_buffering && q->warmup_time > 0 && time_after(jiffies, limit - q->warmup_time))) { q->double_buffering = true; } } if (q->filter_list) { /* If using external classifiers, get result and record it. */ if (!sfb_classify(skb, q, &ret, &salt)) goto other_drop; keys.src = salt; keys.dst = 0; keys.ports = 0; } else { skb_flow_dissect(skb, &keys); } slot = q->slot; sfbhash = jhash_3words((__force u32)keys.dst, (__force u32)keys.src, (__force u32)keys.ports, q->bins[slot].perturbation); if (!sfbhash) sfbhash = 1; sfb_skb_cb(skb)->hashes[slot] = sfbhash; for (i = 0; i < SFB_LEVELS; i++) { u32 hash = sfbhash & SFB_BUCKET_MASK; struct sfb_bucket *b = &q->bins[slot].bins[i][hash]; sfbhash >>= SFB_BUCKET_SHIFT; if (b->qlen == 0) decrement_prob(b, q); else if (b->qlen >= q->bin_size) increment_prob(b, q); if (minqlen > b->qlen) minqlen = b->qlen; if (p_min > b->p_mark) p_min = b->p_mark; } slot ^= 1; sfb_skb_cb(skb)->hashes[slot] = 0; if (unlikely(minqlen >= q->max)) { sch->qstats.overlimits++; q->stats.bucketdrop++; goto drop; } if (unlikely(p_min >= SFB_MAX_PROB)) { /* Inelastic flow */ if (q->double_buffering) { sfbhash = jhash_3words((__force u32)keys.dst, (__force u32)keys.src, (__force u32)keys.ports, q->bins[slot].perturbation); if (!sfbhash) sfbhash = 1; sfb_skb_cb(skb)->hashes[slot] = sfbhash; for (i = 0; i < SFB_LEVELS; i++) { u32 hash = sfbhash & SFB_BUCKET_MASK; struct sfb_bucket *b = &q->bins[slot].bins[i][hash]; sfbhash >>= SFB_BUCKET_SHIFT; if (b->qlen == 0) decrement_prob(b, q); else if (b->qlen >= q->bin_size) increment_prob(b, q); } } if (sfb_rate_limit(skb, q)) { sch->qstats.overlimits++; q->stats.penaltydrop++; goto drop; } goto enqueue; }
static inline struct sk_buff_head *prio2list(struct sk_buff *skb, struct Qdisc *qdisc) { struct sk_buff_head *list = qdisc_priv(qdisc); return list + prio2band[skb->priority & TC_PRIO_MAX]; }
static int sfq_change(struct Qdisc *sch, struct nlattr *opt) { struct sfq_sched_data *q = qdisc_priv(sch); struct tc_sfq_qopt *ctl = nla_data(opt); struct tc_sfq_qopt_v1 *ctl_v1 = NULL; unsigned int qlen; struct red_parms *p = NULL; if (opt->nla_len < nla_attr_size(sizeof(*ctl))) return -EINVAL; if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1))) ctl_v1 = nla_data(opt); if (ctl->divisor && (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) return -EINVAL; if (ctl_v1 && ctl_v1->qth_min) { p = kmalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; } sch_tree_lock(sch); if (ctl->quantum) { q->quantum = ctl->quantum; q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); } q->perturb_period = ctl->perturb_period * HZ; if (ctl->flows) q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); if (ctl->divisor) { q->divisor = ctl->divisor; q->maxflows = min_t(u32, q->maxflows, q->divisor); } if (ctl_v1) { if (ctl_v1->depth) q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); if (p) { swap(q->red_parms, p); red_set_parms(q->red_parms, ctl_v1->qth_min, ctl_v1->qth_max, ctl_v1->Wlog, ctl_v1->Plog, ctl_v1->Scell_log, NULL, ctl_v1->max_P); } q->flags = ctl_v1->flags; q->headdrop = ctl_v1->headdrop; } if (ctl->limit) { q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows); q->maxflows = min_t(u32, q->maxflows, q->limit); } qlen = sch->q.qlen; while (sch->q.qlen > q->limit) sfq_drop(sch); qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); del_timer(&q->perturb_timer); if (q->perturb_period) { mod_timer(&q->perturb_timer, jiffies + q->perturb_period); q->perturbation = prandom_u32(); } sch_tree_unlock(sch); kfree(p); return 0; }
/* * When q->perturbation is changed, we rehash all queued skbs * to avoid OOO (Out Of Order) effects. * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change * counters. */ static void sfq_rehash(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; int i; struct sfq_slot *slot; struct sk_buff_head list; int dropped = 0; __skb_queue_head_init(&list); for (i = 0; i < q->maxflows; i++) { slot = &q->slots[i]; if (!slot->qlen) continue; while (slot->qlen) { skb = slot_dequeue_head(slot); sfq_dec(q, i); __skb_queue_tail(&list, skb); } slot->backlog = 0; red_set_vars(&slot->vars); q->ht[slot->hash] = SFQ_EMPTY_SLOT; } q->tail = NULL; while ((skb = __skb_dequeue(&list)) != NULL) { unsigned int hash = sfq_hash(q, skb); sfq_index x = q->ht[hash]; slot = &q->slots[x]; if (x == SFQ_EMPTY_SLOT) { x = q->dep[0].next; /* get a free slot */ if (x >= SFQ_MAX_FLOWS) { drop: sch->qstats.backlog -= qdisc_pkt_len(skb); kfree_skb(skb); dropped++; continue; } q->ht[hash] = x; slot = &q->slots[x]; slot->hash = hash; } if (slot->qlen >= q->maxdepth) goto drop; slot_queue_add(slot, skb); if (q->red_parms) slot->vars.qavg = red_calc_qavg(q->red_parms, &slot->vars, slot->backlog); slot->backlog += qdisc_pkt_len(skb); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ if (q->tail == NULL) { /* It is the first flow */ slot->next = x; } else { slot->next = q->tail->next; q->tail->next = x; } q->tail = slot; slot->allot = q->scaled_quantum; } } sch->q.qlen -= dropped; qdisc_tree_decrease_qlen(sch, dropped); }
static int sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); unsigned int hash; sfq_index x, qlen; struct sfq_slot *slot; int uninitialized_var(ret); struct sk_buff *head; int delta; hash = sfq_classify(skb, sch, &ret); if (hash == 0) { if (ret & __NET_XMIT_BYPASS) sch->qstats.drops++; kfree_skb(skb); return ret; } hash--; x = q->ht[hash]; slot = &q->slots[x]; if (x == SFQ_EMPTY_SLOT) { x = q->dep[0].next; /* get a free slot */ if (x >= SFQ_MAX_FLOWS) return qdisc_drop(skb, sch); q->ht[hash] = x; slot = &q->slots[x]; slot->hash = hash; slot->backlog = 0; /* should already be 0 anyway... */ red_set_vars(&slot->vars); goto enqueue; } if (q->red_parms) { slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms, &slot->vars, slot->backlog); switch (red_action(q->red_parms, &slot->vars, slot->vars.qavg)) { case RED_DONT_MARK: break; case RED_PROB_MARK: sch->qstats.overlimits++; if (sfq_prob_mark(q)) { /* We know we have at least one packet in queue */ if (sfq_headdrop(q) && INET_ECN_set_ce(slot->skblist_next)) { q->stats.prob_mark_head++; break; } if (INET_ECN_set_ce(skb)) { q->stats.prob_mark++; break; } } q->stats.prob_drop++; goto congestion_drop; case RED_HARD_MARK: sch->qstats.overlimits++; if (sfq_hard_mark(q)) { /* We know we have at least one packet in queue */ if (sfq_headdrop(q) && INET_ECN_set_ce(slot->skblist_next)) { q->stats.forced_mark_head++; break; } if (INET_ECN_set_ce(skb)) { q->stats.forced_mark++; break; } } q->stats.forced_drop++; goto congestion_drop; } } if (slot->qlen >= q->maxdepth) { congestion_drop: if (!sfq_headdrop(q)) return qdisc_drop(skb, sch); /* We know we have at least one packet in queue */ head = slot_dequeue_head(slot); delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb); sch->qstats.backlog -= delta; slot->backlog -= delta; qdisc_drop(head, sch); slot_queue_add(slot, skb); return NET_XMIT_CN; } enqueue: sch->qstats.backlog += qdisc_pkt_len(skb); slot->backlog += qdisc_pkt_len(skb); slot_queue_add(slot, skb); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ if (q->tail == NULL) { /* It is the first flow */ slot->next = x; } else { slot->next = q->tail->next; q->tail->next = x; } /* We put this flow at the end of our flow list. * This might sound unfair for a new flow to wait after old ones, * but we could endup servicing new flows only, and freeze old ones. */ q->tail = slot; /* We could use a bigger initial quantum for new flows */ slot->allot = q->scaled_quantum; } if (++sch->q.qlen <= q->limit) return NET_XMIT_SUCCESS; qlen = slot->qlen; sfq_drop(sch); /* Return Congestion Notification only if we dropped a packet * from this flow. */ if (qlen != slot->qlen) return NET_XMIT_CN; /* As we dropped a packet, better let upper stack know this */ qdisc_tree_decrease_qlen(sch, 1); return NET_XMIT_SUCCESS; }
static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { const struct codel_sched_data *q = qdisc_priv(sch); struct tc_codel_xstats st = { .maxpacket = q->stats.maxpacket, .count = q->vars.count, .lastcount = q->vars.lastcount, .drop_overlimit = q->drop_overlimit, .ldelay = codel_time_to_us(q->vars.ldelay), .dropping = q->vars.dropping, .ecn_mark = q->stats.ecn_mark, .ce_mark = q->stats.ce_mark, }; if (q->vars.dropping) { codel_tdiff_t delta = q->vars.drop_next - codel_get_time(); if (delta >= 0) st.drop_next = codel_time_to_us(delta); else st.drop_next = -codel_time_to_us(-delta); } return gnet_stats_copy_app(d, &st, sizeof(st)); } static void codel_reset(struct Qdisc *sch) { struct codel_sched_data *q = qdisc_priv(sch); qdisc_reset_queue(sch); codel_vars_init(&q->vars); } static struct Qdisc_ops codel_qdisc_ops __read_mostly = { .id = "codel", .priv_size = sizeof(struct codel_sched_data), .enqueue = codel_qdisc_enqueue, .dequeue = codel_qdisc_dequeue, .peek = qdisc_peek_dequeued, .init = codel_init, .reset = codel_reset, .change = codel_change, .dump = codel_dump, .dump_stats = codel_dump_stats, .owner = THIS_MODULE, }; static int __init codel_module_init(void) { return register_qdisc(&codel_qdisc_ops); } static void __exit codel_module_exit(void) { unregister_qdisc(&codel_qdisc_ops); } module_init(codel_module_init) module_exit(codel_module_exit) MODULE_DESCRIPTION("Controlled Delay queue discipline"); MODULE_AUTHOR("Dave Taht"); MODULE_AUTHOR("Eric Dumazet"); MODULE_LICENSE("Dual BSD/GPL");
/* Parse netlink message to set options */ static int netem_change(struct Qdisc *sch, struct rtattr *opt) { struct netem_sched_data *q = qdisc_priv(sch); struct tc_netem_qopt *qopt; int ret; if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt)) return -EINVAL; qopt = RTA_DATA(opt); ret = set_fifo_limit(q->qdisc, qopt->limit); if (ret) { pr_debug("netem: can't set fifo limit\n"); return ret; } q->latency = qopt->latency; q->jitter = qopt->jitter; q->limit = qopt->limit; q->gap = qopt->gap; q->counter = 0; q->loss = qopt->loss; q->duplicate = qopt->duplicate; /* for compatibility with earlier versions. * if gap is set, need to assume 100% probability */ if (q->gap) q->reorder = ~0; /* Handle nested options after initial queue options. * Should have put all options in nested format but too late now. */ if (RTA_PAYLOAD(opt) > sizeof(*qopt)) { struct rtattr *tb[TCA_NETEM_MAX]; if (rtattr_parse(tb, TCA_NETEM_MAX, RTA_DATA(opt) + sizeof(*qopt), RTA_PAYLOAD(opt) - sizeof(*qopt))) return -EINVAL; if (tb[TCA_NETEM_CORR-1]) { ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]); if (ret) return ret; } if (tb[TCA_NETEM_DELAY_DIST-1]) { ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]); if (ret) return ret; } if (tb[TCA_NETEM_REORDER-1]) { ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]); if (ret) return ret; } if (tb[TCA_NETEM_CORRUPT-1]) { ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT-1]); if (ret) return ret; } } return 0; }
/* * Insert one skb into qdisc. * Note: parent depends on return value to account for queue length. * NET_XMIT_DROP: queue length didn't change. * NET_XMIT_SUCCESS: one skb was queued. */ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); /* We don't fill cb now as skb_unshare() may invalidate it */ struct netem_skb_cb *cb; struct sk_buff *skb2; int ret; int count = 1; pr_debug("netem_enqueue skb=%p\n", skb); /* Random duplication */ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) ++count; /* Random packet drop 0 => none, ~0 => all */ if (q->loss && q->loss >= get_crandom(&q->loss_cor)) --count; if (count == 0) { sch->qstats.drops++; kfree_skb(skb); return NET_XMIT_BYPASS; } skb_orphan(skb); /* * If we need to duplicate packet, then re-insert at top of the * qdisc tree, since parent queuer expects that only one * skb will be queued. */ if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { struct Qdisc *rootq = sch->dev->qdisc; u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ q->duplicate = 0; rootq->enqueue(skb2, rootq); q->duplicate = dupsave; } /* * Randomized packet corruption. * Make copy if needed since we are modifying * If packet is going to be hardware checksummed, then * do it now in software before we mangle it. */ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))) { sch->qstats.drops++; return NET_XMIT_DROP; } skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); } cb = (struct netem_skb_cb *)skb->cb; if (q->gap == 0 /* not doing reordering */ || q->counter < q->gap /* inside last reordering gap */ || q->reorder < get_crandom(&q->reorder_cor)) { psched_time_t now; psched_tdiff_t delay; delay = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist); now = psched_get_time(); cb->time_to_send = now + delay; ++q->counter; ret = q->qdisc->enqueue(skb, q->qdisc); } else { /* * Do re-ordering by putting one out of N packets at the front * of the queue. */ cb->time_to_send = psched_get_time(); q->counter = 0; ret = q->qdisc->ops->requeue(skb, q->qdisc); } if (likely(ret == NET_XMIT_SUCCESS)) { sch->q.qlen++; sch->bstats.bytes += skb->len; sch->bstats.packets++; } else sch->qstats.drops++; pr_debug("netem: enqueue ret %d\n", ret); return ret; }
static int sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); unsigned int hash; sfq_index x, qlen; struct sfq_slot *slot; int uninitialized_var(ret); hash = sfq_classify(skb, sch, &ret); if (hash == 0) { if (ret & __NET_XMIT_BYPASS) sch->qstats.drops++; kfree_skb(skb); return ret; } hash--; x = q->ht[hash]; slot = &q->slots[x]; if (x == SFQ_EMPTY_SLOT) { x = q->dep[0].next; /* get a free slot */ q->ht[hash] = x; slot = &q->slots[x]; slot->hash = hash; } /* If selected queue has length q->limit, do simple tail drop, * i.e. drop _this_ packet. */ if (slot->qlen >= q->limit) return qdisc_drop(skb, sch); sch->qstats.backlog += qdisc_pkt_len(skb); slot_queue_add(slot, skb); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ if (q->tail == NULL) { /* It is the first flow */ slot->next = x; } else { slot->next = q->tail->next; q->tail->next = x; } q->tail = slot; slot->allot = q->scaled_quantum; } if (++sch->q.qlen <= q->limit) return NET_XMIT_SUCCESS; qlen = slot->qlen; sfq_drop(sch); /* Return Congestion Notification only if we dropped a packet * from this flow. */ if (qlen != slot->qlen) return NET_XMIT_CN; /* As we dropped a packet, better let upper stack know this */ qdisc_tree_decrease_qlen(sch, 1); return NET_XMIT_SUCCESS; }
/* * Insert one skb into qdisc. * Note: parent depends on return value to account for queue length. * NET_XMIT_DROP: queue length didn't change. * NET_XMIT_SUCCESS: one skb was queued. */ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); /* We don't fill cb now as skb_unshare() may invalidate it */ struct netem_skb_cb *cb; struct sk_buff *skb2; int count = 1; /* Random duplication */ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) ++count; /* Drop packet? */ if (loss_event(q)) --count; if (count == 0) { sch->qstats.drops++; kfree_skb(skb); return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; } skb_orphan(skb); /* * If we need to duplicate packet, then re-insert at top of the * qdisc tree, since parent queuer expects that only one * skb will be queued. */ if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { struct Qdisc *rootq = qdisc_root(sch); u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ q->duplicate = 0; qdisc_enqueue_root(skb2, rootq); q->duplicate = dupsave; } /* * Randomized packet corruption. * Make copy if needed since we are modifying * If packet is going to be hardware checksummed, then * do it now in software before we mangle it. */ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))) return qdisc_drop(skb, sch); skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); } if (unlikely(skb_queue_len(&sch->q) >= sch->limit)) return qdisc_reshape_fail(skb, sch); sch->qstats.backlog += qdisc_pkt_len(skb); cb = netem_skb_cb(skb); if (q->gap == 0 || /* not doing reordering */ q->counter < q->gap - 1 || /* inside last reordering gap */ q->reorder < get_crandom(&q->reorder_cor)) { psched_time_t now; psched_tdiff_t delay; delay = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist); now = psched_get_time(); if (q->rate) { struct sk_buff_head *list = &sch->q; delay += packet_len_2_sched_time(skb->len, q); if (!skb_queue_empty(list)) { /* * Last packet in queue is reference point (now). * First packet in queue is already in flight, * calculate this time bonus and substract * from delay. */ delay -= now - netem_skb_cb(skb_peek(list))->time_to_send; now = netem_skb_cb(skb_peek_tail(list))->time_to_send; } } cb->time_to_send = now + delay; ++q->counter; tfifo_enqueue(skb, sch); } else { /* * Do re-ordering by putting one out of N packets at the front * of the queue. */ cb->time_to_send = psched_get_time(); q->counter = 0; __skb_queue_head(&sch->q, skb); sch->qstats.requeues++; } return NET_XMIT_SUCCESS; }
goto nla_put_failure; if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) goto nla_put_failure; return skb->len; nla_put_failure: nlmsg_trim(skb, b); return -1; } static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { struct prio_sched_data *q = qdisc_priv(sch); struct tc_prio_qopt_offload graft_offload; unsigned long band = arg - 1; if (new == NULL) new = &noop_qdisc; *old = qdisc_replace(sch, new, &q->queues[band]); graft_offload.handle = sch->handle; graft_offload.parent = sch->parent; graft_offload.graft_params.band = band; graft_offload.graft_params.child_handle = new->handle; graft_offload.command = TC_PRIO_GRAFT; qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, *old,
static int choke_dump(struct Qdisc *sch, struct sk_buff *skb) { struct choke_sched_data *q = qdisc_priv(sch); struct nlattr *opts = NULL; struct tc_red_qopt opt = { .limit = q->limit, .flags = q->flags, .qth_min = q->parms.qth_min >> q->parms.Wlog, .qth_max = q->parms.qth_max >> q->parms.Wlog, .Wlog = q->parms.Wlog, .Plog = q->parms.Plog, .Scell_log = q->parms.Scell_log, }; opts = nla_nest_start(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt); NLA_PUT_U32(skb, TCA_CHOKE_MAX_P, q->parms.max_P); return nla_nest_end(skb, opts); nla_put_failure: nla_nest_cancel(skb, opts); return -EMSGSIZE; } static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct choke_sched_data *q = qdisc_priv(sch); struct tc_choke_xstats st = { .early = q->stats.prob_drop + q->stats.forced_drop, .marked = q->stats.prob_mark + q->stats.forced_mark, .pdrop = q->stats.pdrop, .other = q->stats.other, .matched = q->stats.matched, }; return gnet_stats_copy_app(d, &st, sizeof(st)); } static void choke_destroy(struct Qdisc *sch) { struct choke_sched_data *q = qdisc_priv(sch); tcf_destroy_chain(&q->filter_list); choke_free(q->tab); } static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg) { return NULL; } static unsigned long choke_get(struct Qdisc *sch, u32 classid) { return 0; } static void choke_put(struct Qdisc *q, unsigned long cl) { } static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent, u32 classid) { return 0; } static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl) { struct choke_sched_data *q = qdisc_priv(sch); if (cl) return NULL; return &q->filter_list; } static int choke_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { tcm->tcm_handle |= TC_H_MIN(cl); return 0; } static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg) { if (!arg->stop) { if (arg->fn(sch, 1, arg) < 0) { arg->stop = 1; return; } arg->count++; } } static const struct Qdisc_class_ops choke_class_ops = { .leaf = choke_leaf, .get = choke_get, .put = choke_put, .tcf_chain = choke_find_tcf, .bind_tcf = choke_bind, .unbind_tcf = choke_put, .dump = choke_dump_class, .walk = choke_walk, }; static struct sk_buff *choke_peek_head(struct Qdisc *sch) { struct choke_sched_data *q = qdisc_priv(sch); return (q->head != q->tail) ? q->tab[q->head] : NULL; } static struct Qdisc_ops choke_qdisc_ops __read_mostly = { .id = "choke", .priv_size = sizeof(struct choke_sched_data), .enqueue = choke_enqueue, .dequeue = choke_dequeue, .peek = choke_peek_head, .drop = choke_drop, .init = choke_init, .destroy = choke_destroy, .reset = choke_reset, .change = choke_change, .dump = choke_dump, .dump_stats = choke_dump_stats, .owner = THIS_MODULE, }; static int __init choke_module_init(void) { return register_qdisc(&choke_qdisc_ops); } static void __exit choke_module_exit(void) { unregister_qdisc(&choke_qdisc_ops); } module_init(choke_module_init) module_exit(choke_module_exit) MODULE_LICENSE("GPL");
static int sfq_requeue(struct sk_buff *skb, struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); unsigned int hash; sfq_index x; int ret; hash = sfq_classify(skb, sch, &ret); if (hash == 0) { if (ret == NET_XMIT_BYPASS) sch->qstats.drops++; kfree_skb(skb); return ret; } hash--; x = q->ht[hash]; if (x == SFQ_DEPTH) { q->ht[hash] = x = q->dep[SFQ_DEPTH].next; q->hash[x] = hash; } sch->qstats.backlog += skb->len; __skb_queue_head(&q->qs[x], skb); /* If selected queue has length q->limit+1, this means that * all another queues are empty and we do simple tail drop. * This packet is still requeued at head of queue, tail packet * is dropped. */ if (q->qs[x].qlen > q->limit) { skb = q->qs[x].prev; __skb_unlink(skb, &q->qs[x]); sch->qstats.drops++; sch->qstats.backlog -= skb->len; kfree_skb(skb); return NET_XMIT_CN; } sfq_inc(q, x); if (q->qs[x].qlen == 1) { /* The flow is new */ if (q->tail == SFQ_DEPTH) { /* It is the first flow */ q->tail = x; q->next[x] = x; q->allot[x] = q->quantum; } else { q->next[x] = q->next[q->tail]; q->next[q->tail] = x; q->tail = x; } } if (++sch->q.qlen <= q->limit) { sch->qstats.requeues++; return 0; } sch->qstats.drops++; sfq_drop(sch); return NET_XMIT_CN; }
static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { struct sfq_sched_data *q = qdisc_priv(sch); sfq_index idx = q->ht[cl-1]; struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen }; struct tc_sfq_xstats xstats = { .allot = q->allot[idx] }; if (gnet_stats_copy_queue(d, &qs) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); } static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct sfq_sched_data *q = qdisc_priv(sch); unsigned int i; if (arg->stop) return; for (i = 0; i < SFQ_HASH_DIVISOR; i++) { if (q->ht[i] == SFQ_DEPTH || arg->count < arg->skip) { arg->count++; continue; } if (arg->fn(sch, i + 1, arg) < 0) { arg->stop = 1; break; } arg->count++; } } static const struct Qdisc_class_ops sfq_class_ops = { .get = sfq_get, .change = sfq_change_class, .tcf_chain = sfq_find_tcf, .dump = sfq_dump_class, .dump_stats = sfq_dump_class_stats, .walk = sfq_walk, }; static struct Qdisc_ops sfq_qdisc_ops __read_mostly = { .cl_ops = &sfq_class_ops, .id = "sfq", .priv_size = sizeof(struct sfq_sched_data), .enqueue = sfq_enqueue, .dequeue = sfq_dequeue, .requeue = sfq_requeue, .drop = sfq_drop, .init = sfq_init, .reset = sfq_reset, .destroy = sfq_destroy, .change = NULL, .dump = sfq_dump, .owner = THIS_MODULE, }; static int __init sfq_module_init(void) { return register_qdisc(&sfq_qdisc_ops); } static void __exit sfq_module_exit(void) { unregister_qdisc(&sfq_qdisc_ops); } module_init(sfq_module_init) module_exit(sfq_module_exit) MODULE_LICENSE("GPL");
static int choke_change(struct Qdisc *sch, struct nlattr *opt) { struct choke_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_CHOKE_MAX + 1]; const struct tc_red_qopt *ctl; int err; struct sk_buff **old = NULL; unsigned int mask; u32 max_P; if (opt == NULL) return -EINVAL; err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy); if (err < 0) return err; if (tb[TCA_CHOKE_PARMS] == NULL || tb[TCA_CHOKE_STAB] == NULL) return -EINVAL; max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0; ctl = nla_data(tb[TCA_CHOKE_PARMS]); if (ctl->limit > CHOKE_MAX_QUEUE) return -EINVAL; mask = roundup_pow_of_two(ctl->limit + 1) - 1; if (mask != q->tab_mask) { struct sk_buff **ntab; ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL); if (!ntab) ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *)); if (!ntab) return -ENOMEM; sch_tree_lock(sch); old = q->tab; if (old) { unsigned int oqlen = sch->q.qlen, tail = 0; while (q->head != q->tail) { struct sk_buff *skb = q->tab[q->head]; q->head = (q->head + 1) & q->tab_mask; if (!skb) continue; if (tail < mask) { ntab[tail++] = skb; continue; } sch->qstats.backlog -= qdisc_pkt_len(skb); --sch->q.qlen; qdisc_drop(skb, sch); } qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen); q->head = 0; q->tail = tail; } q->tab_mask = mask; q->tab = ntab; } else sch_tree_lock(sch); q->flags = ctl->flags; q->limit = ctl->limit; red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, ctl->Scell_log, nla_data(tb[TCA_CHOKE_STAB]), max_P); red_set_vars(&q->vars); if (q->head == q->tail) red_end_of_idle_period(&q->vars); sch_tree_unlock(sch); choke_free(old); return 0; }