static struct sk_buff *fq_dequeue(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); u64 now = ktime_get_ns(); struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; u32 rate; skb = fq_dequeue_head(sch, &q->internal); if (skb) goto out; fq_check_throttled(q, now); begin: head = &q->new_flows; if (!head->first) { head = &q->old_flows; if (!head->first) { if (q->time_next_delayed_flow != ~0ULL) qdisc_watchdog_schedule_ns(&q->watchdog, q->time_next_delayed_flow); return NULL; } } f = head->first; if (f->credit <= 0) { f->credit += q->quantum; head->first = f->next; fq_flow_add_tail(&q->old_flows, f); goto begin; } skb = f->head; if (unlikely(skb && now < f->time_next_packet && !skb_is_tcp_pure_ack(skb))) { head->first = f->next; fq_flow_set_throttled(q, f); goto begin; } skb = fq_dequeue_head(sch, f); if (!skb) { head->first = f->next; /* force a pass through old_flows to prevent starvation */ if ((head == &q->new_flows) && q->old_flows.first) { fq_flow_add_tail(&q->old_flows, f); } else { fq_flow_set_detached(f); q->inactive_flows++; } goto begin; } prefetch(&skb->end); f->credit -= qdisc_pkt_len(skb); if (f->credit > 0 || !q->rate_enable) goto out; /* Do not pace locally generated ack packets */ if (skb_is_tcp_pure_ack(skb)) goto out; rate = q->flow_max_rate; if (skb->sk) rate = min(skb->sk->sk_pacing_rate, rate); if (rate != ~0U) { u32 plen = max(qdisc_pkt_len(skb), q->quantum); u64 len = (u64)plen * NSEC_PER_SEC; if (likely(rate)) do_div(len, rate); /* Since socket rate can change later, * clamp the delay to 1 second. * Really, providers of too big packets should be fixed ! */ if (unlikely(len > NSEC_PER_SEC)) { len = NSEC_PER_SEC; q->stat_pkts_too_long++; } f->time_next_packet = now + len; } out: qdisc_bstats_update(sch, skb); return skb; }
static struct sk_buff *fq_dequeue(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); u64 now = ktime_to_ns(ktime_get()); struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; u32 rate; skb = fq_dequeue_head(sch, &q->internal); if (skb) goto out; fq_check_throttled(q, now); begin: head = &q->new_flows; if (!head->first) { head = &q->old_flows; if (!head->first) { if (q->time_next_delayed_flow != ~0ULL) qdisc_watchdog_schedule_ns(&q->watchdog, q->time_next_delayed_flow); return NULL; } } f = head->first; if (f->credit <= 0) { f->credit += q->quantum; head->first = f->next; fq_flow_add_tail(&q->old_flows, f); goto begin; } if (unlikely(f->head && now < f->time_next_packet)) { head->first = f->next; fq_flow_set_throttled(q, f); goto begin; } skb = fq_dequeue_head(sch, f); if (!skb) { head->first = f->next; /* force a pass through old_flows to prevent starvation */ if ((head == &q->new_flows) && q->old_flows.first) { fq_flow_add_tail(&q->old_flows, f); } else { fq_flow_set_detached(f); f->age = jiffies; q->inactive_flows++; } goto begin; } prefetch(&skb->end); f->time_next_packet = now; f->credit -= qdisc_pkt_len(skb); if (f->credit > 0 || !q->rate_enable) goto out; rate = q->flow_max_rate; if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) rate = min(skb->sk->sk_pacing_rate, rate); if (rate != ~0U) { u32 plen = max(qdisc_pkt_len(skb), q->quantum); u64 len = (u64)plen * NSEC_PER_SEC; if (likely(rate)) do_div(len, rate); /* Since socket rate can change later, * clamp the delay to 125 ms. * TODO: maybe segment the too big skb, as in commit * e43ac79a4bc ("sch_tbf: segment too big GSO packets") */ if (unlikely(len > 125 * NSEC_PER_MSEC)) { len = 125 * NSEC_PER_MSEC; q->stat_pkts_too_long++; } f->time_next_packet = now + len; } out: qdisc_bstats_update(sch, skb); qdisc_unthrottled(sch); return skb; }
static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) { struct rb_node **p, *parent; struct sock *sk = skb->sk; struct rb_root *root; struct fq_flow *f; /* warning: no starvation prevention... */ if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) return &q->internal; /* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket * or a listener (SYNCOOKIE mode) * 1) request sockets are not full blown, * they do not contain sk_pacing_rate * 2) They are not part of a 'flow' yet * 3) We do not want to rate limit them (eg SYNFLOOD attack), * especially if the listener set SO_MAX_PACING_RATE * 4) We pretend they are orphaned */ if (!sk || sk_listener(sk)) { unsigned long hash = skb_get_hash(skb) & q->orphan_mask; /* By forcing low order bit to 1, we make sure to not * collide with a local flow (socket pointers are word aligned) */ sk = (struct sock *)((hash << 1) | 1UL); skb_orphan(skb); } root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)]; if (q->flows >= (2U << q->fq_trees_log) && q->inactive_flows > q->flows/2) fq_gc(q, root, sk); p = &root->rb_node; parent = NULL; while (*p) { parent = *p; f = container_of(parent, struct fq_flow, fq_node); if (f->sk == sk) { /* socket might have been reallocated, so check * if its sk_hash is the same. * It not, we need to refill credit with * initial quantum */ if (unlikely(skb->sk && f->socket_hash != sk->sk_hash)) { f->credit = q->initial_quantum; f->socket_hash = sk->sk_hash; f->time_next_packet = 0ULL; } return f; } if (f->sk > sk) p = &parent->rb_right; else p = &parent->rb_left; } f = kmem_cache_zalloc(fq_flow_cachep, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!f)) { q->stat_allocation_errors++; return &q->internal; } fq_flow_set_detached(f); f->sk = sk; if (skb->sk) f->socket_hash = sk->sk_hash; f->credit = q->initial_quantum; rb_link_node(&f->fq_node, parent, p); rb_insert_color(&f->fq_node, root); q->flows++; q->inactive_flows++; return f; }
static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) { struct rb_node **p, *parent; struct sock *sk = skb->sk; struct rb_root *root; struct fq_flow *f; int band; /* warning: no starvation prevention... */ band = prio2band[skb->priority & TC_PRIO_MAX]; if (unlikely(band == 0)) return &q->internal; if (unlikely(!sk)) { /* By forcing low order bit to 1, we make sure to not * collide with a local flow (socket pointers are word aligned) */ sk = (struct sock *)(skb_get_rxhash(skb) | 1L); } root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)]; if (q->flows >= (2U << q->fq_trees_log) && q->inactive_flows > q->flows/2) fq_gc(q, root, sk); p = &root->rb_node; parent = NULL; while (*p) { parent = *p; f = container_of(parent, struct fq_flow, fq_node); if (f->sk == sk) { /* socket might have been reallocated, so check * if its sk_hash is the same. * It not, we need to refill credit with * initial quantum */ if (unlikely(skb->sk && f->socket_hash != sk->sk_hash)) { f->credit = q->initial_quantum; f->socket_hash = sk->sk_hash; } return f; } if (f->sk > sk) p = &parent->rb_right; else p = &parent->rb_left; } f = kmem_cache_zalloc(fq_flow_cachep, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!f)) { q->stat_allocation_errors++; return &q->internal; } fq_flow_set_detached(f); f->sk = sk; if (skb->sk) f->socket_hash = sk->sk_hash; f->credit = q->initial_quantum; rb_link_node(&f->fq_node, parent, p); rb_insert_color(&f->fq_node, root); q->flows++; q->inactive_flows++; return f; }