int bpf_iw(struct bpf_sock_ops *skops) { int bufsize = 1500000; int rwnd_init = 40; int iw = 40; int rv = 0; int op; /* For testing purposes, only execute rest of BPF program * if neither port numberis 55601 */ if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601) { skops->reply = -1; return 1; } op = (int) skops->op; #ifdef DEBUG bpf_printk("BPF command: %d\n", op); #endif /* Usually there would be a check to insure the hosts are far * from each other so it makes sense to increase buffer sizes */ switch (op) { case BPF_SOCK_OPS_RWND_INIT: rv = rwnd_init; break; case BPF_SOCK_OPS_TCP_CONNECT_CB: /* Set sndbuf and rcvbuf of active connections */ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize)); break; case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: rv = bpf_setsockopt(skops, SOL_TCP, TCP_BPF_IW, &iw, sizeof(iw)); break; case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: /* Set sndbuf and rcvbuf of passive connections */ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize)); break; default: rv = -1; } #ifdef DEBUG bpf_printk("Returning %d\n", rv); #endif skops->reply = rv; return 1; }
int bpf_prog2(struct __sk_buff *skb) { void *data_end = (void *)(long) skb->data_end; void *data = (void *)(long) skb->data; __u32 lport = skb->local_port; __u32 rport = skb->remote_port; __u8 *d = data; __u8 sk, map; if (data + 8 > data_end) return SK_DROP; map = d[0]; sk = d[1]; d[0] = 0xd; d[1] = 0xe; d[2] = 0xa; d[3] = 0xd; d[4] = 0xb; d[5] = 0xe; d[6] = 0xe; d[7] = 0xf; bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk); if (!map) return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0); return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0); }
int bpf_sockmap(struct bpf_sock_ops *skops) { __u32 lport, rport; int op, err = 0, index, key, ret; op = (int) skops->op; switch (op) { case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: lport = skops->local_port; rport = skops->remote_port; if (lport == 10000) { ret = 1; err = bpf_sock_map_update(skops, &sock_map, &ret, BPF_NOEXIST); bpf_printk("passive(%i -> %i) map ctx update err: %d\n", lport, bpf_ntohl(rport), err); } break; case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: lport = skops->local_port; rport = skops->remote_port; if (bpf_ntohl(rport) == 10001) { ret = 10; err = bpf_sock_map_update(skops, &sock_map, &ret, BPF_NOEXIST); bpf_printk("active(%i -> %i) map ctx update err: %d\n", lport, bpf_ntohl(rport), err); } break; default: break; } return 0; }
int bpf_synrto(struct bpf_sock_ops *skops) { int rv = -1; int op; /* For testing purposes, only execute rest of BPF program * if neither port numberis 55601 */ if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601) { skops->reply = -1; return 1; } op = (int) skops->op; #ifdef DEBUG bpf_printk("BPF command: %d\n", op); #endif /* Check for TIMEOUT_INIT operation and IPv6 addresses */ if (op == BPF_SOCK_OPS_TIMEOUT_INIT && skops->family == AF_INET6) { /* If the first 5.5 bytes of the IPv6 address are the same * then both hosts are in the same datacenter * so use an RTO of 10ms */ if (skops->local_ip6[0] == skops->remote_ip6[0] && (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) == (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) rv = 10; } #ifdef DEBUG bpf_printk("Returning %d\n", rv); #endif skops->reply = rv; return 1; }
int bpf_prog2(struct __sk_buff *skb) { __u32 lport = skb->local_port; __u32 rport = skb->remote_port; int ret = 0; if (lport == 10000) ret = 10; else ret = 1; bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret); return bpf_sk_redirect_map(&sock_map, ret, 0); }
int xdp_sample_prog(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; /* Metadata will be in the perf event before the packet data. */ struct S { u16 cookie; u16 pkt_len; } __packed metadata; if (data < data_end) { /* The XDP perf_event_output handler will use the upper 32 bits * of the flags argument as a number of bytes to include of the * packet payload in the event data. If the size is too big, the * call to bpf_perf_event_output will fail and return -EFAULT. * * See bpf_xdp_event_output in net/core/filter.c. * * The BPF_F_CURRENT_CPU flag means that the event output fd * will be indexed by the CPU number in the event map. */ u64 flags = BPF_F_CURRENT_CPU; u16 sample_size; int ret; metadata.cookie = 0xdead; metadata.pkt_len = (u16)(data_end - data); sample_size = min(metadata.pkt_len, SAMPLE_SIZE); flags |= (u64)sample_size << 32; ret = bpf_perf_event_output(ctx, &my_map, flags, &metadata, sizeof(metadata)); if (ret) bpf_printk("perf_event_output failed: %d\n", ret); } return XDP_PASS; }
int _hbm_out_cg(struct __sk_buff *skb) { struct hbm_pkt_info pkti; int len = skb->len; unsigned int queue_index = 0; unsigned long long curtime; int credit; signed long long delta = 0, zero = 0; int max_credit = MAX_CREDIT; bool congestion_flag = false; bool drop_flag = false; bool cwr_flag = false; struct hbm_vqueue *qdp; struct hbm_queue_stats *qsp = NULL; int rv = ALLOW_PKT; qsp = bpf_map_lookup_elem(&queue_stats, &queue_index); if (qsp != NULL && !qsp->loopback && (skb->ifindex == 1)) return ALLOW_PKT; hbm_get_pkt_info(skb, &pkti); // We may want to account for the length of headers in len // calculation, like ETH header + overhead, specially if it // is a gso packet. But I am not doing it right now. qdp = bpf_get_local_storage(&queue_state, 0); if (!qdp) return ALLOW_PKT; else if (qdp->lasttime == 0) hbm_init_vqueue(qdp, 1024); curtime = bpf_ktime_get_ns(); // Begin critical section bpf_spin_lock(&qdp->lock); credit = qdp->credit; delta = curtime - qdp->lasttime; /* delta < 0 implies that another process with a curtime greater * than ours beat us to the critical section and already added * the new credit, so we should not add it ourselves */ if (delta > 0) { qdp->lasttime = curtime; credit += CREDIT_PER_NS(delta, qdp->rate); if (credit > MAX_CREDIT) credit = MAX_CREDIT; } credit -= len; qdp->credit = credit; bpf_spin_unlock(&qdp->lock); // End critical section // Check if we should update rate if (qsp != NULL && (qsp->rate * 128) != qdp->rate) { qdp->rate = qsp->rate * 128; bpf_printk("Updating rate: %d (1sec:%llu bits)\n", (int)qdp->rate, CREDIT_PER_NS(1000000000, qdp->rate) * 8); } // Set flags (drop, congestion, cwr) // Dropping => we are congested, so ignore congestion flag if (credit < -DROP_THRESH || (len > LARGE_PKT_THRESH && credit < -LARGE_PKT_DROP_THRESH)) { // Very congested, set drop flag drop_flag = true; } else if (credit < 0) { // Congested, set congestion flag if (pkti.ecn) { if (credit < -MARK_THRESH) congestion_flag = true; else congestion_flag = false; } else { congestion_flag = true; } } if (congestion_flag) { if (!bpf_skb_ecn_set_ce(skb)) { if (len > LARGE_PKT_THRESH) { // Problem if too many small packets? drop_flag = true; } } } if (drop_flag) rv = DROP_PKT; hbm_update_stats(qsp, len, curtime, congestion_flag, drop_flag); if (rv == DROP_PKT) __sync_add_and_fetch(&(qdp->credit), len); return rv; }