Exemple #1
0
int bpf_iw(struct bpf_sock_ops *skops)
{
	int bufsize = 1500000;
	int rwnd_init = 40;
	int iw = 40;
	int rv = 0;
	int op;

	/* For testing purposes, only execute rest of BPF program
	 * if neither port numberis 55601
	 */
	if (bpf_ntohl(skops->remote_port) != 55601 &&
	    skops->local_port != 55601) {
		skops->reply = -1;
		return 1;
	}

	op = (int) skops->op;

#ifdef DEBUG
	bpf_printk("BPF command: %d\n", op);
#endif

	/* Usually there would be a check to insure the hosts are far
	 * from each other so it makes sense to increase buffer sizes
	 */
	switch (op) {
	case BPF_SOCK_OPS_RWND_INIT:
		rv = rwnd_init;
		break;
	case BPF_SOCK_OPS_TCP_CONNECT_CB:
		/* Set sndbuf and rcvbuf of active connections */
		rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
				    sizeof(bufsize));
		rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
				     &bufsize, sizeof(bufsize));
		break;
	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
		rv = bpf_setsockopt(skops, SOL_TCP, TCP_BPF_IW, &iw,
				    sizeof(iw));
		break;
	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
		/* Set sndbuf and rcvbuf of passive connections */
		rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
				    sizeof(bufsize));
		rv +=  bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
				      &bufsize, sizeof(bufsize));
		break;
	default:
		rv = -1;
	}
#ifdef DEBUG
	bpf_printk("Returning %d\n", rv);
#endif
	skops->reply = rv;
	return 1;
}
int bpf_prog2(struct __sk_buff *skb)
{
	void *data_end = (void *)(long) skb->data_end;
	void *data = (void *)(long) skb->data;
	__u32 lport = skb->local_port;
	__u32 rport = skb->remote_port;
	__u8 *d = data;
	__u8 sk, map;

	if (data + 8 > data_end)
		return SK_DROP;

	map = d[0];
	sk = d[1];

	d[0] = 0xd;
	d[1] = 0xe;
	d[2] = 0xa;
	d[3] = 0xd;
	d[4] = 0xb;
	d[5] = 0xe;
	d[6] = 0xe;
	d[7] = 0xf;

	bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk);

	if (!map)
		return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0);
	return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0);
}
Exemple #3
0
int bpf_sockmap(struct bpf_sock_ops *skops)
{
	__u32 lport, rport;
	int op, err = 0, index, key, ret;


	op = (int) skops->op;

	switch (op) {
	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
		lport = skops->local_port;
		rport = skops->remote_port;

		if (lport == 10000) {
			ret = 1;
			err = bpf_sock_map_update(skops, &sock_map, &ret,
						  BPF_NOEXIST);
			bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
				   lport, bpf_ntohl(rport), err);
		}
		break;
	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
		lport = skops->local_port;
		rport = skops->remote_port;

		if (bpf_ntohl(rport) == 10001) {
			ret = 10;
			err = bpf_sock_map_update(skops, &sock_map, &ret,
						  BPF_NOEXIST);
			bpf_printk("active(%i -> %i) map ctx update err: %d\n",
				   lport, bpf_ntohl(rport), err);
		}
		break;
	default:
		break;
	}

	return 0;
}
Exemple #4
0
int bpf_synrto(struct bpf_sock_ops *skops)
{
	int rv = -1;
	int op;

	/* For testing purposes, only execute rest of BPF program
	 * if neither port numberis 55601
	 */
	if (bpf_ntohl(skops->remote_port) != 55601 &&
	    skops->local_port != 55601) {
		skops->reply = -1;
		return 1;
	}

	op = (int) skops->op;

#ifdef DEBUG
	bpf_printk("BPF command: %d\n", op);
#endif

	/* Check for TIMEOUT_INIT operation and IPv6 addresses */
	if (op == BPF_SOCK_OPS_TIMEOUT_INIT &&
		skops->family == AF_INET6) {

		/* If the first 5.5 bytes of the IPv6 address are the same
		 * then both hosts are in the same datacenter
		 * so use an RTO of 10ms
		 */
		if (skops->local_ip6[0] == skops->remote_ip6[0] &&
		    (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) ==
		    (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000))
			rv = 10;
	}
#ifdef DEBUG
	bpf_printk("Returning %d\n", rv);
#endif
	skops->reply = rv;
	return 1;
}
Exemple #5
0
int bpf_prog2(struct __sk_buff *skb)
{
	__u32 lport = skb->local_port;
	__u32 rport = skb->remote_port;
	int ret = 0;

	if (lport == 10000)
		ret = 10;
	else
		ret = 1;

	bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret);
	return bpf_sk_redirect_map(&sock_map, ret, 0);
}
int xdp_sample_prog(struct xdp_md *ctx)
{
	void *data_end = (void *)(long)ctx->data_end;
	void *data = (void *)(long)ctx->data;

	/* Metadata will be in the perf event before the packet data. */
	struct S {
		u16 cookie;
		u16 pkt_len;
	} __packed metadata;

	if (data < data_end) {
		/* The XDP perf_event_output handler will use the upper 32 bits
		 * of the flags argument as a number of bytes to include of the
		 * packet payload in the event data. If the size is too big, the
		 * call to bpf_perf_event_output will fail and return -EFAULT.
		 *
		 * See bpf_xdp_event_output in net/core/filter.c.
		 *
		 * The BPF_F_CURRENT_CPU flag means that the event output fd
		 * will be indexed by the CPU number in the event map.
		 */
		u64 flags = BPF_F_CURRENT_CPU;
		u16 sample_size;
		int ret;

		metadata.cookie = 0xdead;
		metadata.pkt_len = (u16)(data_end - data);
		sample_size = min(metadata.pkt_len, SAMPLE_SIZE);
		flags |= (u64)sample_size << 32;

		ret = bpf_perf_event_output(ctx, &my_map, flags,
					    &metadata, sizeof(metadata));
		if (ret)
			bpf_printk("perf_event_output failed: %d\n", ret);
	}

	return XDP_PASS;
}
Exemple #7
0
int _hbm_out_cg(struct __sk_buff *skb)
{
	struct hbm_pkt_info pkti;
	int len = skb->len;
	unsigned int queue_index = 0;
	unsigned long long curtime;
	int credit;
	signed long long delta = 0, zero = 0;
	int max_credit = MAX_CREDIT;
	bool congestion_flag = false;
	bool drop_flag = false;
	bool cwr_flag = false;
	struct hbm_vqueue *qdp;
	struct hbm_queue_stats *qsp = NULL;
	int rv = ALLOW_PKT;

	qsp = bpf_map_lookup_elem(&queue_stats, &queue_index);
	if (qsp != NULL && !qsp->loopback && (skb->ifindex == 1))
		return ALLOW_PKT;

	hbm_get_pkt_info(skb, &pkti);

	// We may want to account for the length of headers in len
	// calculation, like ETH header + overhead, specially if it
	// is a gso packet. But I am not doing it right now.

	qdp = bpf_get_local_storage(&queue_state, 0);
	if (!qdp)
		return ALLOW_PKT;
	else if (qdp->lasttime == 0)
		hbm_init_vqueue(qdp, 1024);

	curtime = bpf_ktime_get_ns();

	// Begin critical section
	bpf_spin_lock(&qdp->lock);
	credit = qdp->credit;
	delta = curtime - qdp->lasttime;
	/* delta < 0 implies that another process with a curtime greater
	 * than ours beat us to the critical section and already added
	 * the new credit, so we should not add it ourselves
	 */
	if (delta > 0) {
		qdp->lasttime = curtime;
		credit += CREDIT_PER_NS(delta, qdp->rate);
		if (credit > MAX_CREDIT)
			credit = MAX_CREDIT;
	}
	credit -= len;
	qdp->credit = credit;
	bpf_spin_unlock(&qdp->lock);
	// End critical section

	// Check if we should update rate
	if (qsp != NULL && (qsp->rate * 128) != qdp->rate) {
		qdp->rate = qsp->rate * 128;
		bpf_printk("Updating rate: %d (1sec:%llu bits)\n",
			   (int)qdp->rate,
			   CREDIT_PER_NS(1000000000, qdp->rate) * 8);
	}

	// Set flags (drop, congestion, cwr)
	// Dropping => we are congested, so ignore congestion flag
	if (credit < -DROP_THRESH ||
	    (len > LARGE_PKT_THRESH &&
	     credit < -LARGE_PKT_DROP_THRESH)) {
		// Very congested, set drop flag
		drop_flag = true;
	} else if (credit < 0) {
		// Congested, set congestion flag
		if (pkti.ecn) {
			if (credit < -MARK_THRESH)
				congestion_flag = true;
			else
				congestion_flag = false;
		} else {
			congestion_flag = true;
		}
	}

	if (congestion_flag) {
		if (!bpf_skb_ecn_set_ce(skb)) {
			if (len > LARGE_PKT_THRESH) {
				// Problem if too many small packets?
				drop_flag = true;
			}
		}
	}

	if (drop_flag)
		rv = DROP_PKT;

	hbm_update_stats(qsp, len, curtime, congestion_flag, drop_flag);

	if (rv == DROP_PKT)
		__sync_add_and_fetch(&(qdp->credit), len);

	return rv;
}