static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); struct fq_flow *f; if (unlikely(sch->q.qlen >= sch->limit)) return qdisc_drop(skb, sch); f = fq_classify(skb, q); if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) { q->stat_flows_plimit++; return qdisc_drop(skb, sch); } f->qlen++; flow_queue_add(f, skb); if (skb_is_retransmit(skb)) q->stat_tcp_retrans++; sch->qstats.backlog += qdisc_pkt_len(skb); if (fq_flow_is_detached(f)) { fq_flow_add_tail(&q->new_flows, f); if (q->quantum > f->credit) f->credit = q->quantum; q->inactive_flows--; qdisc_unthrottled(sch); } if (unlikely(f == &q->internal)) { q->stat_internal_packets++; qdisc_unthrottled(sch); } sch->q.qlen++; return NET_XMIT_SUCCESS; }
static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); struct fq_flow *f; if (unlikely(sch->q.qlen >= sch->limit)) return qdisc_drop(skb, sch); f = fq_classify(skb, q); if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) { q->stat_flows_plimit++; return qdisc_drop(skb, sch); } f->qlen++; if (skb_is_retransmit(skb)) q->stat_tcp_retrans++; qdisc_qstats_backlog_inc(sch, skb); if (fq_flow_is_detached(f)) { fq_flow_add_tail(&q->new_flows, f); if (time_after(jiffies, f->age + q->flow_refill_delay)) f->credit = max_t(u32, f->credit, q->quantum); q->inactive_flows--; } /* Note: this overwrites f->age */ flow_queue_add(f, skb); if (unlikely(f == &q->internal)) { q->stat_internal_packets++; } sch->q.qlen++; return NET_XMIT_SUCCESS; }
static void fq_check_throttled(struct fq_sched_data *q, u64 now) { unsigned long sample; struct rb_node *p; if (q->time_next_delayed_flow > now) return; /* Update unthrottle latency EWMA. * This is cheap and can help diagnosing timer/latency problems. */ sample = (unsigned long)(now - q->time_next_delayed_flow); q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3; q->unthrottle_latency_ns += sample >> 3; q->time_next_delayed_flow = ~0ULL; while ((p = rb_first(&q->delayed)) != NULL) { struct fq_flow *f = container_of(p, struct fq_flow, rate_node); if (f->time_next_packet > now) { q->time_next_delayed_flow = f->time_next_packet; break; } rb_erase(p, &q->delayed); q->throttled_flows--; fq_flow_add_tail(&q->old_flows, f); } }
static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct fq_sched_data *q = qdisc_priv(sch); struct fq_flow *f; if (unlikely(sch->q.qlen >= sch->limit)) return qdisc_drop(skb, sch, to_free); f = fq_classify(skb, q); if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) { q->stat_flows_plimit++; return qdisc_drop(skb, sch, to_free); } f->qlen++; if (skb_is_retransmit(skb)) q->stat_tcp_retrans++; qdisc_qstats_backlog_inc(sch, skb); if (fq_flow_is_detached(f)) { struct sock *sk = skb->sk; fq_flow_add_tail(&q->new_flows, f); if (time_after(jiffies, f->age + q->flow_refill_delay)) f->credit = max_t(u32, f->credit, q->quantum); if (sk && q->rate_enable) { if (unlikely(smp_load_acquire(&sk->sk_pacing_status) != SK_PACING_FQ)) smp_store_release(&sk->sk_pacing_status, SK_PACING_FQ); } q->inactive_flows--; } /* Note: this overwrites f->age */ flow_queue_add(f, skb); if (unlikely(f == &q->internal)) { q->stat_internal_packets++; } sch->q.qlen++; return NET_XMIT_SUCCESS; }
static void fq_check_throttled(struct fq_sched_data *q, u64 now) { struct rb_node *p; if (q->time_next_delayed_flow > now) return; q->time_next_delayed_flow = ~0ULL; while ((p = rb_first(&q->delayed)) != NULL) { struct fq_flow *f = container_of(p, struct fq_flow, rate_node); if (f->time_next_packet > now) { q->time_next_delayed_flow = f->time_next_packet; break; } rb_erase(p, &q->delayed); q->throttled_flows--; fq_flow_add_tail(&q->old_flows, f); } }
static struct sk_buff *fq_dequeue(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); u64 now = ktime_get_ns(); struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; u32 rate; skb = fq_dequeue_head(sch, &q->internal); if (skb) goto out; fq_check_throttled(q, now); begin: head = &q->new_flows; if (!head->first) { head = &q->old_flows; if (!head->first) { if (q->time_next_delayed_flow != ~0ULL) qdisc_watchdog_schedule_ns(&q->watchdog, q->time_next_delayed_flow); return NULL; } } f = head->first; if (f->credit <= 0) { f->credit += q->quantum; head->first = f->next; fq_flow_add_tail(&q->old_flows, f); goto begin; } skb = f->head; if (unlikely(skb && now < f->time_next_packet && !skb_is_tcp_pure_ack(skb))) { head->first = f->next; fq_flow_set_throttled(q, f); goto begin; } skb = fq_dequeue_head(sch, f); if (!skb) { head->first = f->next; /* force a pass through old_flows to prevent starvation */ if ((head == &q->new_flows) && q->old_flows.first) { fq_flow_add_tail(&q->old_flows, f); } else { fq_flow_set_detached(f); q->inactive_flows++; } goto begin; } prefetch(&skb->end); f->credit -= qdisc_pkt_len(skb); if (f->credit > 0 || !q->rate_enable) goto out; /* Do not pace locally generated ack packets */ if (skb_is_tcp_pure_ack(skb)) goto out; rate = q->flow_max_rate; if (skb->sk) rate = min(skb->sk->sk_pacing_rate, rate); if (rate != ~0U) { u32 plen = max(qdisc_pkt_len(skb), q->quantum); u64 len = (u64)plen * NSEC_PER_SEC; if (likely(rate)) do_div(len, rate); /* Since socket rate can change later, * clamp the delay to 1 second. * Really, providers of too big packets should be fixed ! */ if (unlikely(len > NSEC_PER_SEC)) { len = NSEC_PER_SEC; q->stat_pkts_too_long++; } f->time_next_packet = now + len; } out: qdisc_bstats_update(sch, skb); return skb; }
static struct sk_buff *fq_dequeue(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); u64 now = ktime_to_ns(ktime_get()); struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; u32 rate; skb = fq_dequeue_head(sch, &q->internal); if (skb) goto out; fq_check_throttled(q, now); begin: head = &q->new_flows; if (!head->first) { head = &q->old_flows; if (!head->first) { if (q->time_next_delayed_flow != ~0ULL) qdisc_watchdog_schedule_ns(&q->watchdog, q->time_next_delayed_flow); return NULL; } } f = head->first; if (f->credit <= 0) { f->credit += q->quantum; head->first = f->next; fq_flow_add_tail(&q->old_flows, f); goto begin; } if (unlikely(f->head && now < f->time_next_packet)) { head->first = f->next; fq_flow_set_throttled(q, f); goto begin; } skb = fq_dequeue_head(sch, f); if (!skb) { head->first = f->next; /* force a pass through old_flows to prevent starvation */ if ((head == &q->new_flows) && q->old_flows.first) { fq_flow_add_tail(&q->old_flows, f); } else { fq_flow_set_detached(f); f->age = jiffies; q->inactive_flows++; } goto begin; } prefetch(&skb->end); f->time_next_packet = now; f->credit -= qdisc_pkt_len(skb); if (f->credit > 0 || !q->rate_enable) goto out; rate = q->flow_max_rate; if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) rate = min(skb->sk->sk_pacing_rate, rate); if (rate != ~0U) { u32 plen = max(qdisc_pkt_len(skb), q->quantum); u64 len = (u64)plen * NSEC_PER_SEC; if (likely(rate)) do_div(len, rate); /* Since socket rate can change later, * clamp the delay to 125 ms. * TODO: maybe segment the too big skb, as in commit * e43ac79a4bc ("sch_tbf: segment too big GSO packets") */ if (unlikely(len > 125 * NSEC_PER_MSEC)) { len = 125 * NSEC_PER_MSEC; q->stat_pkts_too_long++; } f->time_next_packet = now + len; } out: qdisc_bstats_update(sch, skb); qdisc_unthrottled(sch); return skb; }
static void fq_flow_unset_throttled(struct fq_sched_data *q, struct fq_flow *f) { rb_erase(&f->rate_node, &q->delayed); q->throttled_flows--; fq_flow_add_tail(&q->old_flows, f); }