void bpf_jit_compile(struct bpf_prog *fp)
{
	unsigned int cleanup_addr, proglen, oldproglen = 0;
	u32 temp[8], *prog, *func, seen = 0, pass;
	const struct sock_filter *filter = fp->insns;
	int i, flen = fp->len, pc_ret0 = -1;
	unsigned int *addrs;
	void *image;

	if (!bpf_jit_enable)
		return;

	addrs = kmalloc_array(flen, sizeof(*addrs), GFP_KERNEL);
	if (addrs == NULL)
		return;

	/* Before first pass, make a rough estimation of addrs[]
	 * each bpf instruction is translated to less than 64 bytes
	 */
	for (proglen = 0, i = 0; i < flen; i++) {
		proglen += 64;
		addrs[i] = proglen;
	}
	cleanup_addr = proglen; /* epilogue address */
	image = NULL;
	for (pass = 0; pass < 10; pass++) {
		u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen;

		/* no prologue/epilogue for trivial filters (RET something) */
		proglen = 0;
		prog = temp;

		/* Prologue */
		if (seen_or_pass0) {
			if (seen_or_pass0 & SEEN_MEM) {
				unsigned int sz = BASE_STACKFRAME;
				sz += BPF_MEMWORDS * sizeof(u32);
				emit_alloc_stack(sz);
			}

			/* Make sure we dont leek kernel memory. */
			if (seen_or_pass0 & SEEN_XREG)
				emit_clear(r_X);

			/* If this filter needs to access skb data,
			 * load %o4 and %o5 with:
			 *  %o4 = skb->len - skb->data_len
			 *  %o5 = skb->data
			 * And also back up %o7 into r_saved_O7 so we can
			 * invoke the stubs using 'call'.
			 */
			if (seen_or_pass0 & SEEN_DATAREF) {
				emit_load32(r_SKB, struct sk_buff, len, r_HEADLEN);
				emit_load32(r_SKB, struct sk_buff, data_len, r_TMP);
				emit_sub(r_HEADLEN, r_TMP, r_HEADLEN);
				emit_loadptr(r_SKB, struct sk_buff, data, r_SKB_DATA);
			}
		}
		emit_reg_move(O7, r_saved_O7);

		/* Make sure we dont leak kernel information to the user. */
		if (bpf_needs_clear_a(&filter[0]))
			emit_clear(r_A); /* A = 0 */

		for (i = 0; i < flen; i++) {
			unsigned int K = filter[i].k;
			unsigned int t_offset;
			unsigned int f_offset;
			u32 t_op, f_op;
			u16 code = bpf_anc_helper(&filter[i]);
			int ilen;

			switch (code) {
			case BPF_ALU | BPF_ADD | BPF_X:	/* A += X; */
				emit_alu_X(ADD);
				break;
			case BPF_ALU | BPF_ADD | BPF_K:	/* A += K; */
				emit_alu_K(ADD, K);
				break;
			case BPF_ALU | BPF_SUB | BPF_X:	/* A -= X; */
				emit_alu_X(SUB);
				break;
			case BPF_ALU | BPF_SUB | BPF_K:	/* A -= K */
				emit_alu_K(SUB, K);
				break;
			case BPF_ALU | BPF_AND | BPF_X:	/* A &= X */
				emit_alu_X(AND);
				break;
			case BPF_ALU | BPF_AND | BPF_K:	/* A &= K */
				emit_alu_K(AND, K);
				break;
			case BPF_ALU | BPF_OR | BPF_X:	/* A |= X */
				emit_alu_X(OR);
				break;
			case BPF_ALU | BPF_OR | BPF_K:	/* A |= K */
				emit_alu_K(OR, K);
				break;
			case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */
			case BPF_ALU | BPF_XOR | BPF_X:
				emit_alu_X(XOR);
				break;
			case BPF_ALU | BPF_XOR | BPF_K:	/* A ^= K */
				emit_alu_K(XOR, K);
				break;
			case BPF_ALU | BPF_LSH | BPF_X:	/* A <<= X */
				emit_alu_X(SLL);
				break;
			case BPF_ALU | BPF_LSH | BPF_K:	/* A <<= K */
				emit_alu_K(SLL, K);
				break;
			case BPF_ALU | BPF_RSH | BPF_X:	/* A >>= X */
				emit_alu_X(SRL);
				break;
			case BPF_ALU | BPF_RSH | BPF_K:	/* A >>= K */
				emit_alu_K(SRL, K);
				break;
			case BPF_ALU | BPF_MUL | BPF_X:	/* A *= X; */
				emit_alu_X(MUL);
				break;
			case BPF_ALU | BPF_MUL | BPF_K:	/* A *= K */
				emit_alu_K(MUL, K);
				break;
			case BPF_ALU | BPF_DIV | BPF_K:	/* A /= K with K != 0*/
				if (K == 1)
					break;
				emit_write_y(G0);
				/* The Sparc v8 architecture requires
				 * three instructions between a %y
				 * register write and the first use.
				 */
				emit_nop();
				emit_nop();
				emit_nop();
				emit_alu_K(DIV, K);
				break;
			case BPF_ALU | BPF_DIV | BPF_X:	/* A /= X; */
				emit_cmpi(r_X, 0);
				if (pc_ret0 > 0) {
					t_offset = addrs[pc_ret0 - 1];
					emit_branch(BE, t_offset + 20);
					emit_nop(); /* delay slot */
				} else {
					emit_branch_off(BNE, 16);
					emit_nop();
					emit_jump(cleanup_addr + 20);
					emit_clear(r_A);
				}
				emit_write_y(G0);
				/* The Sparc v8 architecture requires
				 * three instructions between a %y
				 * register write and the first use.
				 */
				emit_nop();
				emit_nop();
				emit_nop();
				emit_alu_X(DIV);
				break;
			case BPF_ALU | BPF_NEG:
				emit_neg();
				break;
			case BPF_RET | BPF_K:
				if (!K) {
					if (pc_ret0 == -1)
						pc_ret0 = i;
					emit_clear(r_A);
				} else {
					emit_loadimm(K, r_A);
				}
				/* Fallthrough */
			case BPF_RET | BPF_A:
				if (seen_or_pass0) {
					if (i != flen - 1) {
						emit_jump(cleanup_addr);
						emit_nop();
						break;
					}
					if (seen_or_pass0 & SEEN_MEM) {
						unsigned int sz = BASE_STACKFRAME;
						sz += BPF_MEMWORDS * sizeof(u32);
						emit_release_stack(sz);
					}
				}
				/* jmpl %r_saved_O7 + 8, %g0 */
				emit_jmpl(r_saved_O7, 8, G0);
				emit_reg_move(r_A, O0); /* delay slot */
				break;
			case BPF_MISC | BPF_TAX:
				seen |= SEEN_XREG;
				emit_reg_move(r_A, r_X);
				break;
			case BPF_MISC | BPF_TXA:
				seen |= SEEN_XREG;
				emit_reg_move(r_X, r_A);
				break;
			case BPF_ANC | SKF_AD_CPU:
				emit_load_cpu(r_A);
				break;
			case BPF_ANC | SKF_AD_PROTOCOL:
				emit_skb_load16(protocol, r_A);
				break;
			case BPF_ANC | SKF_AD_PKTTYPE:
				__emit_skb_load8(__pkt_type_offset, r_A);
				emit_andi(r_A, PKT_TYPE_MAX, r_A);
				emit_alu_K(SRL, 5);
				break;
			case BPF_ANC | SKF_AD_IFINDEX:
				emit_skb_loadptr(dev, r_A);
				emit_cmpi(r_A, 0);
				emit_branch(BE_PTR, cleanup_addr + 4);
				emit_nop();
				emit_load32(r_A, struct net_device, ifindex, r_A);
				break;
			case BPF_ANC | SKF_AD_MARK:
				emit_skb_load32(mark, r_A);
				break;
			case BPF_ANC | SKF_AD_QUEUE:
				emit_skb_load16(queue_mapping, r_A);
				break;
			case BPF_ANC | SKF_AD_HATYPE:
				emit_skb_loadptr(dev, r_A);
				emit_cmpi(r_A, 0);
				emit_branch(BE_PTR, cleanup_addr + 4);
				emit_nop();
				emit_load16(r_A, struct net_device, type, r_A);
				break;
			case BPF_ANC | SKF_AD_RXHASH:
				emit_skb_load32(hash, r_A);
				break;
			case BPF_ANC | SKF_AD_VLAN_TAG:
				emit_skb_load16(vlan_tci, r_A);
				break;
			case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
				__emit_skb_load8(__pkt_vlan_present_offset, r_A);
				if (PKT_VLAN_PRESENT_BIT)
					emit_alu_K(SRL, PKT_VLAN_PRESENT_BIT);
				if (PKT_VLAN_PRESENT_BIT < 7)
					emit_andi(r_A, 1, r_A);
				break;
			case BPF_LD | BPF_W | BPF_LEN:
				emit_skb_load32(len, r_A);
				break;
			case BPF_LDX | BPF_W | BPF_LEN:
				emit_skb_load32(len, r_X);
				break;
			case BPF_LD | BPF_IMM:
				emit_loadimm(K, r_A);
				break;
			case BPF_LDX | BPF_IMM:
				emit_loadimm(K, r_X);
				break;
			case BPF_LD | BPF_MEM:
				seen |= SEEN_MEM;
				emit_ldmem(K * 4, r_A);
				break;
			case BPF_LDX | BPF_MEM:
				seen |= SEEN_MEM | SEEN_XREG;
				emit_ldmem(K * 4, r_X);
				break;
			case BPF_ST:
				seen |= SEEN_MEM;
				emit_stmem(K * 4, r_A);
				break;
			case BPF_STX:
				seen |= SEEN_MEM | SEEN_XREG;
				emit_stmem(K * 4, r_X);
				break;

#define CHOOSE_LOAD_FUNC(K, func) \
	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)

			case BPF_LD | BPF_W | BPF_ABS:
				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_word);
common_load:			seen |= SEEN_DATAREF;
				emit_loadimm(K, r_OFF);
				emit_call(func);
				break;
			case BPF_LD | BPF_H | BPF_ABS:
				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_half);
				goto common_load;
			case BPF_LD | BPF_B | BPF_ABS:
				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte);
				goto common_load;
			case BPF_LDX | BPF_B | BPF_MSH:
				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte_msh);
				goto common_load;
			case BPF_LD | BPF_W | BPF_IND:
				func = bpf_jit_load_word;
common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
				if (K) {
					if (is_simm13(K)) {
						emit_addi(r_X, K, r_OFF);
					} else {
						emit_loadimm(K, r_TMP);
						emit_add(r_X, r_TMP, r_OFF);
					}
				} else {
					emit_reg_move(r_X, r_OFF);
				}
				emit_call(func);
				break;
			case BPF_LD | BPF_H | BPF_IND:
				func = bpf_jit_load_half;
				goto common_load_ind;
			case BPF_LD | BPF_B | BPF_IND:
				func = bpf_jit_load_byte;
				goto common_load_ind;
			case BPF_JMP | BPF_JA:
				emit_jump(addrs[i + K]);
				emit_nop();
				break;

#define COND_SEL(CODE, TOP, FOP)	\
	case CODE:			\
		t_op = TOP;		\
		f_op = FOP;		\
		goto cond_branch

			COND_SEL(BPF_JMP | BPF_JGT | BPF_K, BGU, BLEU);
			COND_SEL(BPF_JMP | BPF_JGE | BPF_K, BGEU, BLU);
			COND_SEL(BPF_JMP | BPF_JEQ | BPF_K, BE, BNE);
			COND_SEL(BPF_JMP | BPF_JSET | BPF_K, BNE, BE);
			COND_SEL(BPF_JMP | BPF_JGT | BPF_X, BGU, BLEU);
			COND_SEL(BPF_JMP | BPF_JGE | BPF_X, BGEU, BLU);
			COND_SEL(BPF_JMP | BPF_JEQ | BPF_X, BE, BNE);
			COND_SEL(BPF_JMP | BPF_JSET | BPF_X, BNE, BE);

cond_branch:			f_offset = addrs[i + filter[i].jf];
				t_offset = addrs[i + filter[i].jt];

				/* same targets, can avoid doing the test :) */
				if (filter[i].jt == filter[i].jf) {
					emit_jump(t_offset);
					emit_nop();
					break;
				}

				switch (code) {
				case BPF_JMP | BPF_JGT | BPF_X:
				case BPF_JMP | BPF_JGE | BPF_X:
				case BPF_JMP | BPF_JEQ | BPF_X:
					seen |= SEEN_XREG;
					emit_cmp(r_A, r_X);
					break;
				case BPF_JMP | BPF_JSET | BPF_X:
					seen |= SEEN_XREG;
					emit_btst(r_A, r_X);
					break;
				case BPF_JMP | BPF_JEQ | BPF_K:
				case BPF_JMP | BPF_JGT | BPF_K:
				case BPF_JMP | BPF_JGE | BPF_K:
					if (is_simm13(K)) {
						emit_cmpi(r_A, K);
					} else {
						emit_loadimm(K, r_TMP);
						emit_cmp(r_A, r_TMP);
					}
					break;
				case BPF_JMP | BPF_JSET | BPF_K:
					if (is_simm13(K)) {
						emit_btsti(r_A, K);
					} else {
						emit_loadimm(K, r_TMP);
						emit_btst(r_A, r_TMP);
					}
					break;
				}
				if (filter[i].jt != 0) {
					if (filter[i].jf)
						t_offset += 8;
					emit_branch(t_op, t_offset);
					emit_nop(); /* delay slot */
					if (filter[i].jf) {
						emit_jump(f_offset);
						emit_nop();
					}
					break;
				}
				emit_branch(f_op, f_offset);
				emit_nop(); /* delay slot */
				break;

			default:
				/* hmm, too complex filter, give up with jit compiler */
				goto out;
			}
			ilen = (void *) prog - (void *) temp;
			if (image) {
				if (unlikely(proglen + ilen > oldproglen)) {
					pr_err("bpb_jit_compile fatal error\n");
					kfree(addrs);
					module_memfree(image);
					return;
				}
				memcpy(image + proglen, temp, ilen);
			}
			proglen += ilen;
			addrs[i] = proglen;
			prog = temp;
		}
		/* last bpf instruction is always a RET :
		 * use it to give the cleanup instruction(s) addr
		 */
		cleanup_addr = proglen - 8; /* jmpl; mov r_A,%o0; */
		if (seen_or_pass0 & SEEN_MEM)
			cleanup_addr -= 4; /* add %sp, X, %sp; */

		if (image) {
			if (proglen != oldproglen)
				pr_err("bpb_jit_compile proglen=%u != oldproglen=%u\n",
				       proglen, oldproglen);
			break;
		}
		if (proglen == oldproglen) {
			image = module_alloc(proglen);
			if (!image)
				goto out;
		}
		oldproglen = proglen;
	}

	if (bpf_jit_enable > 1)
		bpf_jit_dump(flen, proglen, pass + 1, image);

	if (image) {
		fp->bpf_func = (void *)image;
		fp->jited = 1;
	}
out:
	kfree(addrs);
	return;
}
예제 #2
0
파일: bpf_jit_comp.c 프로젝트: 3null/linux
/* Assemble the body code between the prologue & epilogue. */
static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
			      struct codegen_context *ctx,
			      unsigned int *addrs)
{
	const struct sock_filter *filter = fp->insns;
	int flen = fp->len;
	u8 *func;
	unsigned int true_cond;
	int i;

	/* Start of epilogue code */
	unsigned int exit_addr = addrs[flen];

	for (i = 0; i < flen; i++) {
		unsigned int K = filter[i].k;
		u16 code = bpf_anc_helper(&filter[i]);

		/*
		 * addrs[] maps a BPF bytecode address into a real offset from
		 * the start of the body code.
		 */
		addrs[i] = ctx->idx * 4;

		switch (code) {
			/*** ALU ops ***/
		case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */
			ctx->seen |= SEEN_XREG;
			PPC_ADD(r_A, r_A, r_X);
			break;
		case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */
			if (!K)
				break;
			PPC_ADDI(r_A, r_A, IMM_L(K));
			if (K >= 32768)
				PPC_ADDIS(r_A, r_A, IMM_HA(K));
			break;
		case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */
			ctx->seen |= SEEN_XREG;
			PPC_SUB(r_A, r_A, r_X);
			break;
		case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
			if (!K)
				break;
			PPC_ADDI(r_A, r_A, IMM_L(-K));
			if (K >= 32768)
				PPC_ADDIS(r_A, r_A, IMM_HA(-K));
			break;
		case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */
			ctx->seen |= SEEN_XREG;
			PPC_MUL(r_A, r_A, r_X);
			break;
		case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
			if (K < 32768)
				PPC_MULI(r_A, r_A, K);
			else {
				PPC_LI32(r_scratch1, K);
				PPC_MUL(r_A, r_A, r_scratch1);
			}
			break;
		case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */
			ctx->seen |= SEEN_XREG;
			PPC_CMPWI(r_X, 0);
			if (ctx->pc_ret0 != -1) {
				PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
			} else {
				PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
				PPC_LI(r_ret, 0);
				PPC_JMP(exit_addr);
			}
			PPC_DIVWU(r_scratch1, r_A, r_X);
			PPC_MUL(r_scratch1, r_X, r_scratch1);
			PPC_SUB(r_A, r_A, r_scratch1);
			break;
		case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */
			PPC_LI32(r_scratch2, K);
			PPC_DIVWU(r_scratch1, r_A, r_scratch2);
			PPC_MUL(r_scratch1, r_scratch2, r_scratch1);
			PPC_SUB(r_A, r_A, r_scratch1);
			break;
		case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */
			ctx->seen |= SEEN_XREG;
			PPC_CMPWI(r_X, 0);
			if (ctx->pc_ret0 != -1) {
				PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
			} else {
				/*
				 * Exit, returning 0; first pass hits here
				 * (longer worst-case code size).
				 */
				PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
				PPC_LI(r_ret, 0);
				PPC_JMP(exit_addr);
			}
			PPC_DIVWU(r_A, r_A, r_X);
			break;
		case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */
			if (K == 1)
				break;
			PPC_LI32(r_scratch1, K);
			PPC_DIVWU(r_A, r_A, r_scratch1);
			break;
		case BPF_ALU | BPF_AND | BPF_X:
			ctx->seen |= SEEN_XREG;
			PPC_AND(r_A, r_A, r_X);
			break;
		case BPF_ALU | BPF_AND | BPF_K:
			if (!IMM_H(K))
				PPC_ANDI(r_A, r_A, K);
			else {
				PPC_LI32(r_scratch1, K);
				PPC_AND(r_A, r_A, r_scratch1);
			}
			break;
		case BPF_ALU | BPF_OR | BPF_X:
			ctx->seen |= SEEN_XREG;
			PPC_OR(r_A, r_A, r_X);
			break;
		case BPF_ALU | BPF_OR | BPF_K:
			if (IMM_L(K))
				PPC_ORI(r_A, r_A, IMM_L(K));
			if (K >= 65536)
				PPC_ORIS(r_A, r_A, IMM_H(K));
			break;
		case BPF_ANC | SKF_AD_ALU_XOR_X:
		case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */
			ctx->seen |= SEEN_XREG;
			PPC_XOR(r_A, r_A, r_X);
			break;
		case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
			if (IMM_L(K))
				PPC_XORI(r_A, r_A, IMM_L(K));
			if (K >= 65536)
				PPC_XORIS(r_A, r_A, IMM_H(K));
			break;
		case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */
			ctx->seen |= SEEN_XREG;
			PPC_SLW(r_A, r_A, r_X);
			break;
		case BPF_ALU | BPF_LSH | BPF_K:
			if (K == 0)
				break;
			else
				PPC_SLWI(r_A, r_A, K);
			break;
		case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */
			ctx->seen |= SEEN_XREG;
			PPC_SRW(r_A, r_A, r_X);
			break;
		case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */
			if (K == 0)
				break;
			else
				PPC_SRWI(r_A, r_A, K);
			break;
		case BPF_ALU | BPF_NEG:
			PPC_NEG(r_A, r_A);
			break;
		case BPF_RET | BPF_K:
			PPC_LI32(r_ret, K);
			if (!K) {
				if (ctx->pc_ret0 == -1)
					ctx->pc_ret0 = i;
			}
			/*
			 * If this isn't the very last instruction, branch to
			 * the epilogue if we've stuff to clean up.  Otherwise,
			 * if there's nothing to tidy, just return.  If we /are/
			 * the last instruction, we're about to fall through to
			 * the epilogue to return.
			 */
			if (i != flen - 1) {
				/*
				 * Note: 'seen' is properly valid only on pass
				 * #2.	Both parts of this conditional are the
				 * same instruction size though, meaning the
				 * first pass will still correctly determine the
				 * code size/addresses.
				 */
				if (ctx->seen)
					PPC_JMP(exit_addr);
				else
					PPC_BLR();
			}
			break;
		case BPF_RET | BPF_A:
			PPC_MR(r_ret, r_A);
			if (i != flen - 1) {
				if (ctx->seen)
					PPC_JMP(exit_addr);
				else
					PPC_BLR();
			}
			break;
		case BPF_MISC | BPF_TAX: /* X = A */
			PPC_MR(r_X, r_A);
			break;
		case BPF_MISC | BPF_TXA: /* A = X */
			ctx->seen |= SEEN_XREG;
			PPC_MR(r_A, r_X);
			break;

			/*** Constant loads/M[] access ***/
		case BPF_LD | BPF_IMM: /* A = K */
			PPC_LI32(r_A, K);
			break;
		case BPF_LDX | BPF_IMM: /* X = K */
			PPC_LI32(r_X, K);
			break;
		case BPF_LD | BPF_MEM: /* A = mem[K] */
			PPC_MR(r_A, r_M + (K & 0xf));
			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
			break;
		case BPF_LDX | BPF_MEM: /* X = mem[K] */
			PPC_MR(r_X, r_M + (K & 0xf));
			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
			break;
		case BPF_ST: /* mem[K] = A */
			PPC_MR(r_M + (K & 0xf), r_A);
			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
			break;
		case BPF_STX: /* mem[K] = X */
			PPC_MR(r_M + (K & 0xf), r_X);
			ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
			break;
		case BPF_LD | BPF_W | BPF_LEN: /*	A = skb->len; */
			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
			break;
		case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
			PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));
			break;

			/*** Ancillary info loads ***/
		case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
						  protocol) != 2);
			PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff,
							    protocol));
			break;
		case BPF_ANC | SKF_AD_IFINDEX:
			PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
								dev));
			PPC_CMPDI(r_scratch1, 0);
			if (ctx->pc_ret0 != -1) {
				PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
			} else {
				/* Exit, returning 0; first pass hits here. */
				PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
				PPC_LI(r_ret, 0);
				PPC_JMP(exit_addr);
			}
			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
						  ifindex) != 4);
			PPC_LWZ_OFFS(r_A, r_scratch1,
				     offsetof(struct net_device, ifindex));
			break;
		case BPF_ANC | SKF_AD_MARK:
			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
							  mark));
			break;
		case BPF_ANC | SKF_AD_RXHASH:
			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
							  hash));
			break;
		case BPF_ANC | SKF_AD_VLAN_TAG:
		case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
			BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);

			PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
							  vlan_tci));
			if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
				PPC_ANDI(r_A, r_A, ~VLAN_TAG_PRESENT);
			} else {
				PPC_ANDI(r_A, r_A, VLAN_TAG_PRESENT);
				PPC_SRWI(r_A, r_A, 12);
			}
			break;
		case BPF_ANC | SKF_AD_QUEUE:
			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
						  queue_mapping) != 2);
			PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
							  queue_mapping));
			break;
		case BPF_ANC | SKF_AD_CPU:
#ifdef CONFIG_SMP
			/*
			 * PACA ptr is r13:
			 * raw_smp_processor_id() = local_paca->paca_index
			 */
			BUILD_BUG_ON(FIELD_SIZEOF(struct paca_struct,
						  paca_index) != 2);
			PPC_LHZ_OFFS(r_A, 13,
				     offsetof(struct paca_struct, paca_index));
#else
			PPC_LI(r_A, 0);
#endif
			break;

			/*** Absolute loads from packet header/data ***/
		case BPF_LD | BPF_W | BPF_ABS:
			func = CHOOSE_LOAD_FUNC(K, sk_load_word);
			goto common_load;
		case BPF_LD | BPF_H | BPF_ABS:
			func = CHOOSE_LOAD_FUNC(K, sk_load_half);
			goto common_load;
		case BPF_LD | BPF_B | BPF_ABS:
			func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
		common_load:
			/* Load from [K]. */
			ctx->seen |= SEEN_DATAREF;
			PPC_LI64(r_scratch1, func);
			PPC_MTLR(r_scratch1);
			PPC_LI32(r_addr, K);
			PPC_BLRL();
			/*
			 * Helper returns 'lt' condition on error, and an
			 * appropriate return value in r3
			 */
			PPC_BCC(COND_LT, exit_addr);
			break;

			/*** Indirect loads from packet header/data ***/
		case BPF_LD | BPF_W | BPF_IND:
			func = sk_load_word;
			goto common_load_ind;
		case BPF_LD | BPF_H | BPF_IND:
			func = sk_load_half;
			goto common_load_ind;
		case BPF_LD | BPF_B | BPF_IND:
			func = sk_load_byte;
		common_load_ind:
			/*
			 * Load from [X + K].  Negative offsets are tested for
			 * in the helper functions.
			 */
			ctx->seen |= SEEN_DATAREF | SEEN_XREG;
			PPC_LI64(r_scratch1, func);
			PPC_MTLR(r_scratch1);
			PPC_ADDI(r_addr, r_X, IMM_L(K));
			if (K >= 32768)
				PPC_ADDIS(r_addr, r_addr, IMM_HA(K));
			PPC_BLRL();
			/* If error, cr0.LT set */
			PPC_BCC(COND_LT, exit_addr);
			break;

		case BPF_LDX | BPF_B | BPF_MSH:
			func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
			goto common_load;
			break;

			/*** Jump and branches ***/
		case BPF_JMP | BPF_JA:
			if (K != 0)
				PPC_JMP(addrs[i + 1 + K]);
			break;

		case BPF_JMP | BPF_JGT | BPF_K:
		case BPF_JMP | BPF_JGT | BPF_X:
			true_cond = COND_GT;
			goto cond_branch;
		case BPF_JMP | BPF_JGE | BPF_K:
		case BPF_JMP | BPF_JGE | BPF_X:
			true_cond = COND_GE;
			goto cond_branch;
		case BPF_JMP | BPF_JEQ | BPF_K:
		case BPF_JMP | BPF_JEQ | BPF_X:
			true_cond = COND_EQ;
			goto cond_branch;
		case BPF_JMP | BPF_JSET | BPF_K:
		case BPF_JMP | BPF_JSET | BPF_X:
			true_cond = COND_NE;
			/* Fall through */
		cond_branch:
			/* same targets, can avoid doing the test :) */
			if (filter[i].jt == filter[i].jf) {
				if (filter[i].jt > 0)
					PPC_JMP(addrs[i + 1 + filter[i].jt]);
				break;
			}

			switch (code) {
			case BPF_JMP | BPF_JGT | BPF_X:
			case BPF_JMP | BPF_JGE | BPF_X:
			case BPF_JMP | BPF_JEQ | BPF_X:
				ctx->seen |= SEEN_XREG;
				PPC_CMPLW(r_A, r_X);
				break;
			case BPF_JMP | BPF_JSET | BPF_X:
				ctx->seen |= SEEN_XREG;
				PPC_AND_DOT(r_scratch1, r_A, r_X);
				break;
			case BPF_JMP | BPF_JEQ | BPF_K:
			case BPF_JMP | BPF_JGT | BPF_K:
			case BPF_JMP | BPF_JGE | BPF_K:
				if (K < 32768)
					PPC_CMPLWI(r_A, K);
				else {
					PPC_LI32(r_scratch1, K);
					PPC_CMPLW(r_A, r_scratch1);
				}
				break;
			case BPF_JMP | BPF_JSET | BPF_K:
				if (K < 32768)
					/* PPC_ANDI is /only/ dot-form */
					PPC_ANDI(r_scratch1, r_A, K);
				else {
					PPC_LI32(r_scratch1, K);
					PPC_AND_DOT(r_scratch1, r_A,
						    r_scratch1);
				}
				break;
			}
			/* Sometimes branches are constructed "backward", with
			 * the false path being the branch and true path being
			 * a fallthrough to the next instruction.
			 */
			if (filter[i].jt == 0)
				/* Swap the sense of the branch */
				PPC_BCC(true_cond ^ COND_CMP_TRUE,
					addrs[i + 1 + filter[i].jf]);
			else {
				PPC_BCC(true_cond, addrs[i + 1 + filter[i].jt]);
				if (filter[i].jf != 0)
					PPC_JMP(addrs[i + 1 + filter[i].jf]);
			}
			break;
		default:
			/* The filter contains something cruel & unusual.
			 * We don't handle it, but also there shouldn't be
			 * anything missing from our list.
			 */
			if (printk_ratelimit())
				pr_err("BPF filter opcode %04x (@%d) unsupported\n",
				       filter[i].code, i);
			return -ENOTSUPP;
		}

	}
	/* Set end-of-body-code address for exit. */
	addrs[i] = ctx->idx * 4;

	return 0;
}