int bpf_prog2(struct pt_regs *ctx) { u64 *ts, cur_ts, delta; int key, cpu; long *val; cpu = bpf_get_smp_processor_id(); ts = bpf_map_lookup_elem(&my_map, &cpu); if (!ts) return 0; cur_ts = bpf_ktime_get_ns(); delta = log2l(cur_ts - *ts); if (delta > MAX_ENTRIES - 1) delta = MAX_ENTRIES - 1; key = cpu * MAX_ENTRIES + delta; val = bpf_map_lookup_elem(&my_lat, &key); if (val) __sync_fetch_and_add((long *)val, 1); return 0; }
int bpf_prog1(struct pt_regs *ctx) { long rq = PT_REGS_PARM1(ctx); u64 val = bpf_ktime_get_ns(); bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY); return 0; }
int bpf_prog1(struct pt_regs *ctx) { long rq = ctx->di; u64 val = bpf_ktime_get_ns(); bpf_map_update_elem(&start_ts, &rq, &val, BPF_ANY); return 0; }
int bpf_prog1(struct pt_regs *ctx) { int cpu = bpf_get_smp_processor_id(); u64 *ts = bpf_map_lookup_elem(&my_map, &cpu); if (ts) *ts = bpf_ktime_get_ns(); return 0; }
int bpf_prog2(struct pt_regs *ctx) { long rq = PT_REGS_PARM1(ctx); u64 *value, l, base; u32 index; value = bpf_map_lookup_elem(&my_map, &rq); if (!value) return 0; u64 cur_time = bpf_ktime_get_ns(); u64 delta = cur_time - *value; bpf_map_delete_elem(&my_map, &rq); /* the lines below are computing index = log10(delta)*10 * using integer arithmetic * index = 29 ~ 1 usec * index = 59 ~ 1 msec * index = 89 ~ 1 sec * index = 99 ~ 10sec or more * log10(x)*10 = log2(x)*10/log2(10) = log2(x)*3 */ l = log2l(delta); base = 1ll << l; index = (l * 64 + (delta - base) * 64 / base) * 3 / 64; if (index >= SLOTS) index = SLOTS - 1; value = bpf_map_lookup_elem(&lat_map, &index); if (value) *value += 1; return 0; }
int bpf_prog2(struct pt_regs *ctx) { long rq = ctx->di; struct request *req = (struct request *)ctx->di; u64 *value, l, base, cur_time, delta; u32 index; /* calculate latency */ value = bpf_map_lookup_elem(&start_ts, &rq); if (!value) return 0; cur_time = bpf_ktime_get_ns(); delta = cur_time - *value; bpf_map_delete_elem(&start_ts, &rq); /* using bpf_trace_printk() for DEBUG ONLY; limited to 3 args. */ char fmt[] = "%d %x %d\n"; bpf_trace_printk(fmt, sizeof(fmt), _(req->__data_len), /* bytes */ _(req->cmd_flags), /* flags */ delta / 1000); /* lat_us */ return 0; }
int bpf_prog2(struct cpu_args *ctx) { u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta; u32 key, cpu, pstate_idx; u64 *val; cpu = ctx->cpu_id; key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME; pts = bpf_map_lookup_elem(&my_map, &key); if (!pts) return 0; key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX; pstate = bpf_map_lookup_elem(&my_map, &key); if (!pstate) return 0; key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX; cstate = bpf_map_lookup_elem(&my_map, &key); if (!cstate) return 0; prev_state = *pstate; *pstate = ctx->state; if (!*pts) { *pts = bpf_ktime_get_ns(); return 0; } cur_ts = bpf_ktime_get_ns(); delta = cur_ts - *pts; *pts = cur_ts; /* When CPU is in idle, bail out to skip pstate statistics */ if (*cstate != (u32)(-1)) return 0; /* * The cpu changes to another different OPP (in below diagram * change frequency from OPP3 to OPP1), need recording interval * for previous frequency OPP3 and update timestamp as start * time for new frequency OPP1. * * OPP3 * +---------------------+ * OPP2 | | * ---------+ | * | OPP1 * +--------------- * * |<- pstate duration ->| * ^ ^ * pts cur_ts */ pstate_idx = find_cpu_pstate_idx(*pstate); if (pstate_idx >= MAX_PSTATE_ENTRIES) return 0; key = cpu * MAX_PSTATE_ENTRIES + pstate_idx; val = bpf_map_lookup_elem(&pstate_duration, &key); if (val) __sync_fetch_and_add((long *)val, delta); return 0; }
int bpf_prog1(struct cpu_args *ctx) { u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta; u32 key, cpu, pstate_idx; u64 *val; if (ctx->cpu_id > MAX_CPU) return 0; cpu = ctx->cpu_id; key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME; cts = bpf_map_lookup_elem(&my_map, &key); if (!cts) return 0; key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX; cstate = bpf_map_lookup_elem(&my_map, &key); if (!cstate) return 0; key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME; pts = bpf_map_lookup_elem(&my_map, &key); if (!pts) return 0; key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX; pstate = bpf_map_lookup_elem(&my_map, &key); if (!pstate) return 0; prev_state = *cstate; *cstate = ctx->state; if (!*cts) { *cts = bpf_ktime_get_ns(); return 0; } cur_ts = bpf_ktime_get_ns(); delta = cur_ts - *cts; *cts = cur_ts; /* * When state doesn't equal to (u32)-1, the cpu will enter * one idle state; for this case we need to record interval * for the pstate. * * OPP2 * +---------------------+ * OPP1 | | * ---------+ | * | Idle state * +--------------- * * |<- pstate duration ->| * ^ ^ * pts cur_ts */ if (ctx->state != (u32)-1) { /* record pstate after have first cpu_frequency event */ if (!*pts) return 0; delta = cur_ts - *pts; pstate_idx = find_cpu_pstate_idx(*pstate); if (pstate_idx >= MAX_PSTATE_ENTRIES) return 0; key = cpu * MAX_PSTATE_ENTRIES + pstate_idx; val = bpf_map_lookup_elem(&pstate_duration, &key); if (val) __sync_fetch_and_add((long *)val, delta); /* * When state equal to (u32)-1, the cpu just exits from one * specific idle state; for this case we need to record * interval for the pstate. * * OPP2 * -----------+ * | OPP1 * | +----------- * | Idle state | * +---------------------+ * * |<- cstate duration ->| * ^ ^ * cts cur_ts */ } else { key = cpu * MAX_CSTATE_ENTRIES + prev_state; val = bpf_map_lookup_elem(&cstate_duration, &key); if (val) __sync_fetch_and_add((long *)val, delta); } /* Update timestamp for pstate as new start time */ if (*pts) *pts = cur_ts; return 0; }
int trace_undnat_return(struct pt_regs *ctx) { struct trace_data d; d.ts_us = bpf_ktime_get_ns() / 1000; dnat_events.perf_submit(ctx, &d, sizeof(d)); return 0; };
int _hbm_out_cg(struct __sk_buff *skb) { struct hbm_pkt_info pkti; int len = skb->len; unsigned int queue_index = 0; unsigned long long curtime; int credit; signed long long delta = 0, zero = 0; int max_credit = MAX_CREDIT; bool congestion_flag = false; bool drop_flag = false; bool cwr_flag = false; struct hbm_vqueue *qdp; struct hbm_queue_stats *qsp = NULL; int rv = ALLOW_PKT; qsp = bpf_map_lookup_elem(&queue_stats, &queue_index); if (qsp != NULL && !qsp->loopback && (skb->ifindex == 1)) return ALLOW_PKT; hbm_get_pkt_info(skb, &pkti); // We may want to account for the length of headers in len // calculation, like ETH header + overhead, specially if it // is a gso packet. But I am not doing it right now. qdp = bpf_get_local_storage(&queue_state, 0); if (!qdp) return ALLOW_PKT; else if (qdp->lasttime == 0) hbm_init_vqueue(qdp, 1024); curtime = bpf_ktime_get_ns(); // Begin critical section bpf_spin_lock(&qdp->lock); credit = qdp->credit; delta = curtime - qdp->lasttime; /* delta < 0 implies that another process with a curtime greater * than ours beat us to the critical section and already added * the new credit, so we should not add it ourselves */ if (delta > 0) { qdp->lasttime = curtime; credit += CREDIT_PER_NS(delta, qdp->rate); if (credit > MAX_CREDIT) credit = MAX_CREDIT; } credit -= len; qdp->credit = credit; bpf_spin_unlock(&qdp->lock); // End critical section // Check if we should update rate if (qsp != NULL && (qsp->rate * 128) != qdp->rate) { qdp->rate = qsp->rate * 128; bpf_printk("Updating rate: %d (1sec:%llu bits)\n", (int)qdp->rate, CREDIT_PER_NS(1000000000, qdp->rate) * 8); } // Set flags (drop, congestion, cwr) // Dropping => we are congested, so ignore congestion flag if (credit < -DROP_THRESH || (len > LARGE_PKT_THRESH && credit < -LARGE_PKT_DROP_THRESH)) { // Very congested, set drop flag drop_flag = true; } else if (credit < 0) { // Congested, set congestion flag if (pkti.ecn) { if (credit < -MARK_THRESH) congestion_flag = true; else congestion_flag = false; } else { congestion_flag = true; } } if (congestion_flag) { if (!bpf_skb_ecn_set_ce(skb)) { if (len > LARGE_PKT_THRESH) { // Problem if too many small packets? drop_flag = true; } } } if (drop_flag) rv = DROP_PKT; hbm_update_stats(qsp, len, curtime, congestion_flag, drop_flag); if (rv == DROP_PKT) __sync_add_and_fetch(&(qdp->credit), len); return rv; }