コード例 #1
0
void bpf2_jit_compile(struct bpf_program *prog)
{
	struct bpf_binary_header *header = NULL;
	int proglen, oldproglen = 0;
	int *addrs;
	u8 *image = NULL;
	int pass;
	int i;

	if (!prog || !prog->cb || !prog->cb->jit_select_func)
		return;

	addrs = kmalloc(prog->insn_cnt * sizeof(*addrs), GFP_KERNEL);
	if (!addrs)
		return;

	for (proglen = 0, i = 0; i < prog->insn_cnt; i++) {
		proglen += 64;
		addrs[i] = proglen;
	}
	for (pass = 0; pass < 10; pass++) {
		proglen = do_jit(prog, addrs, image, oldproglen);
		if (proglen <= 0) {
			image = NULL;
			goto out;
		}
		if (image) {
			if (proglen != oldproglen)
				pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
				       proglen, oldproglen);
			break;
		}
		if (proglen == oldproglen) {
			header = bpf_alloc_binary(proglen, &image);
			if (!header)
				goto out;
		}
		oldproglen = proglen;
	}

	if (image) {
		bpf_flush_icache(header, image + proglen);
		set_memory_ro((unsigned long)header, header->pages);
	}
out:
	kfree(addrs);
	prog->jit_image = (void (*)(struct bpf_context *ctx))image;
	return;
}
コード例 #2
0
ファイル: bpf_jit_comp.c プロジェクト: 020gzh/linux
void bpf_jit_compile(struct bpf_prog *fp)
{
	unsigned int cleanup_addr, proglen, oldproglen = 0;
	u32 temp[8], *prog, *func, seen = 0, pass;
	const struct sock_filter *filter = fp->insns;
	int i, flen = fp->len, pc_ret0 = -1;
	unsigned int *addrs;
	void *image;

	if (!bpf_jit_enable)
		return;

	addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
	if (addrs == NULL)
		return;

	/* Before first pass, make a rough estimation of addrs[]
	 * each bpf instruction is translated to less than 64 bytes
	 */
	for (proglen = 0, i = 0; i < flen; i++) {
		proglen += 64;
		addrs[i] = proglen;
	}
	cleanup_addr = proglen; /* epilogue address */
	image = NULL;
	for (pass = 0; pass < 10; pass++) {
		u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen;

		/* no prologue/epilogue for trivial filters (RET something) */
		proglen = 0;
		prog = temp;

		/* Prologue */
		if (seen_or_pass0) {
			if (seen_or_pass0 & SEEN_MEM) {
				unsigned int sz = BASE_STACKFRAME;
				sz += BPF_MEMWORDS * sizeof(u32);
				emit_alloc_stack(sz);
			}

			/* Make sure we dont leek kernel memory. */
			if (seen_or_pass0 & SEEN_XREG)
				emit_clear(r_X);

			/* If this filter needs to access skb data,
			 * load %o4 and %o5 with:
			 *  %o4 = skb->len - skb->data_len
			 *  %o5 = skb->data
			 * And also back up %o7 into r_saved_O7 so we can
			 * invoke the stubs using 'call'.
			 */
			if (seen_or_pass0 & SEEN_DATAREF) {
				emit_load32(r_SKB, struct sk_buff, len, r_HEADLEN);
				emit_load32(r_SKB, struct sk_buff, data_len, r_TMP);
				emit_sub(r_HEADLEN, r_TMP, r_HEADLEN);
				emit_loadptr(r_SKB, struct sk_buff, data, r_SKB_DATA);
			}
		}
		emit_reg_move(O7, r_saved_O7);

		/* Make sure we dont leak kernel information to the user. */
		if (bpf_needs_clear_a(&filter[0]))
			emit_clear(r_A); /* A = 0 */

		for (i = 0; i < flen; i++) {
			unsigned int K = filter[i].k;
			unsigned int t_offset;
			unsigned int f_offset;
			u32 t_op, f_op;
			u16 code = bpf_anc_helper(&filter[i]);
			int ilen;

			switch (code) {
			case BPF_ALU | BPF_ADD | BPF_X:	/* A += X; */
				emit_alu_X(ADD);
				break;
			case BPF_ALU | BPF_ADD | BPF_K:	/* A += K; */
				emit_alu_K(ADD, K);
				break;
			case BPF_ALU | BPF_SUB | BPF_X:	/* A -= X; */
				emit_alu_X(SUB);
				break;
			case BPF_ALU | BPF_SUB | BPF_K:	/* A -= K */
				emit_alu_K(SUB, K);
				break;
			case BPF_ALU | BPF_AND | BPF_X:	/* A &= X */
				emit_alu_X(AND);
				break;
			case BPF_ALU | BPF_AND | BPF_K:	/* A &= K */
				emit_alu_K(AND, K);
				break;
			case BPF_ALU | BPF_OR | BPF_X:	/* A |= X */
				emit_alu_X(OR);
				break;
			case BPF_ALU | BPF_OR | BPF_K:	/* A |= K */
				emit_alu_K(OR, K);
				break;
			case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */
			case BPF_ALU | BPF_XOR | BPF_X:
				emit_alu_X(XOR);
				break;
			case BPF_ALU | BPF_XOR | BPF_K:	/* A ^= K */
				emit_alu_K(XOR, K);
				break;
			case BPF_ALU | BPF_LSH | BPF_X:	/* A <<= X */
				emit_alu_X(SLL);
				break;
			case BPF_ALU | BPF_LSH | BPF_K:	/* A <<= K */
				emit_alu_K(SLL, K);
				break;
			case BPF_ALU | BPF_RSH | BPF_X:	/* A >>= X */
				emit_alu_X(SRL);
				break;
			case BPF_ALU | BPF_RSH | BPF_K:	/* A >>= K */
				emit_alu_K(SRL, K);
				break;
			case BPF_ALU | BPF_MUL | BPF_X:	/* A *= X; */
				emit_alu_X(MUL);
				break;
			case BPF_ALU | BPF_MUL | BPF_K:	/* A *= K */
				emit_alu_K(MUL, K);
				break;
			case BPF_ALU | BPF_DIV | BPF_K:	/* A /= K with K != 0*/
				if (K == 1)
					break;
				emit_write_y(G0);
#ifdef CONFIG_SPARC32
				/* The Sparc v8 architecture requires
				 * three instructions between a %y
				 * register write and the first use.
				 */
				emit_nop();
				emit_nop();
				emit_nop();
#endif
				emit_alu_K(DIV, K);
				break;
			case BPF_ALU | BPF_DIV | BPF_X:	/* A /= X; */
				emit_cmpi(r_X, 0);
				if (pc_ret0 > 0) {
					t_offset = addrs[pc_ret0 - 1];
#ifdef CONFIG_SPARC32
					emit_branch(BE, t_offset + 20);
#else
					emit_branch(BE, t_offset + 8);
#endif
					emit_nop(); /* delay slot */
				} else {
					emit_branch_off(BNE, 16);
					emit_nop();
#ifdef CONFIG_SPARC32
					emit_jump(cleanup_addr + 20);
#else
					emit_jump(cleanup_addr + 8);
#endif
					emit_clear(r_A);
				}
				emit_write_y(G0);
#ifdef CONFIG_SPARC32
				/* The Sparc v8 architecture requires
				 * three instructions between a %y
				 * register write and the first use.
				 */
				emit_nop();
				emit_nop();
				emit_nop();
#endif
				emit_alu_X(DIV);
				break;
			case BPF_ALU | BPF_NEG:
				emit_neg();
				break;
			case BPF_RET | BPF_K:
				if (!K) {
					if (pc_ret0 == -1)
						pc_ret0 = i;
					emit_clear(r_A);
				} else {
					emit_loadimm(K, r_A);
				}
				/* Fallthrough */
			case BPF_RET | BPF_A:
				if (seen_or_pass0) {
					if (i != flen - 1) {
						emit_jump(cleanup_addr);
						emit_nop();
						break;
					}
					if (seen_or_pass0 & SEEN_MEM) {
						unsigned int sz = BASE_STACKFRAME;
						sz += BPF_MEMWORDS * sizeof(u32);
						emit_release_stack(sz);
					}
				}
				/* jmpl %r_saved_O7 + 8, %g0 */
				emit_jmpl(r_saved_O7, 8, G0);
				emit_reg_move(r_A, O0); /* delay slot */
				break;
			case BPF_MISC | BPF_TAX:
				seen |= SEEN_XREG;
				emit_reg_move(r_A, r_X);
				break;
			case BPF_MISC | BPF_TXA:
				seen |= SEEN_XREG;
				emit_reg_move(r_X, r_A);
				break;
			case BPF_ANC | SKF_AD_CPU:
				emit_load_cpu(r_A);
				break;
			case BPF_ANC | SKF_AD_PROTOCOL:
				emit_skb_load16(protocol, r_A);
				break;
			case BPF_ANC | SKF_AD_PKTTYPE:
				__emit_skb_load8(__pkt_type_offset, r_A);
				emit_andi(r_A, PKT_TYPE_MAX, r_A);
				emit_alu_K(SRL, 5);
				break;
			case BPF_ANC | SKF_AD_IFINDEX:
				emit_skb_loadptr(dev, r_A);
				emit_cmpi(r_A, 0);
				emit_branch(BE_PTR, cleanup_addr + 4);
				emit_nop();
				emit_load32(r_A, struct net_device, ifindex, r_A);
				break;
			case BPF_ANC | SKF_AD_MARK:
				emit_skb_load32(mark, r_A);
				break;
			case BPF_ANC | SKF_AD_QUEUE:
				emit_skb_load16(queue_mapping, r_A);
				break;
			case BPF_ANC | SKF_AD_HATYPE:
				emit_skb_loadptr(dev, r_A);
				emit_cmpi(r_A, 0);
				emit_branch(BE_PTR, cleanup_addr + 4);
				emit_nop();
				emit_load16(r_A, struct net_device, type, r_A);
				break;
			case BPF_ANC | SKF_AD_RXHASH:
				emit_skb_load32(hash, r_A);
				break;
			case BPF_ANC | SKF_AD_VLAN_TAG:
			case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
				emit_skb_load16(vlan_tci, r_A);
				if (code != (BPF_ANC | SKF_AD_VLAN_TAG)) {
					emit_alu_K(SRL, 12);
					emit_andi(r_A, 1, r_A);
				} else {
					emit_loadimm(~VLAN_TAG_PRESENT, r_TMP);
					emit_and(r_A, r_TMP, r_A);
				}
				break;
			case BPF_LD | BPF_W | BPF_LEN:
				emit_skb_load32(len, r_A);
				break;
			case BPF_LDX | BPF_W | BPF_LEN:
				emit_skb_load32(len, r_X);
				break;
			case BPF_LD | BPF_IMM:
				emit_loadimm(K, r_A);
				break;
			case BPF_LDX | BPF_IMM:
				emit_loadimm(K, r_X);
				break;
			case BPF_LD | BPF_MEM:
				seen |= SEEN_MEM;
				emit_ldmem(K * 4, r_A);
				break;
			case BPF_LDX | BPF_MEM:
				seen |= SEEN_MEM | SEEN_XREG;
				emit_ldmem(K * 4, r_X);
				break;
			case BPF_ST:
				seen |= SEEN_MEM;
				emit_stmem(K * 4, r_A);
				break;
			case BPF_STX:
				seen |= SEEN_MEM | SEEN_XREG;
				emit_stmem(K * 4, r_X);
				break;

#define CHOOSE_LOAD_FUNC(K, func) \
	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)

			case BPF_LD | BPF_W | BPF_ABS:
				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_word);
common_load:			seen |= SEEN_DATAREF;
				emit_loadimm(K, r_OFF);
				emit_call(func);
				break;
			case BPF_LD | BPF_H | BPF_ABS:
				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_half);
				goto common_load;
			case BPF_LD | BPF_B | BPF_ABS:
				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte);
				goto common_load;
			case BPF_LDX | BPF_B | BPF_MSH:
				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte_msh);
				goto common_load;
			case BPF_LD | BPF_W | BPF_IND:
				func = bpf_jit_load_word;
common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
				if (K) {
					if (is_simm13(K)) {
						emit_addi(r_X, K, r_OFF);
					} else {
						emit_loadimm(K, r_TMP);
						emit_add(r_X, r_TMP, r_OFF);
					}
				} else {
					emit_reg_move(r_X, r_OFF);
				}
				emit_call(func);
				break;
			case BPF_LD | BPF_H | BPF_IND:
				func = bpf_jit_load_half;
				goto common_load_ind;
			case BPF_LD | BPF_B | BPF_IND:
				func = bpf_jit_load_byte;
				goto common_load_ind;
			case BPF_JMP | BPF_JA:
				emit_jump(addrs[i + K]);
				emit_nop();
				break;

#define COND_SEL(CODE, TOP, FOP)	\
	case CODE:			\
		t_op = TOP;		\
		f_op = FOP;		\
		goto cond_branch

			COND_SEL(BPF_JMP | BPF_JGT | BPF_K, BGU, BLEU);
			COND_SEL(BPF_JMP | BPF_JGE | BPF_K, BGEU, BLU);
			COND_SEL(BPF_JMP | BPF_JEQ | BPF_K, BE, BNE);
			COND_SEL(BPF_JMP | BPF_JSET | BPF_K, BNE, BE);
			COND_SEL(BPF_JMP | BPF_JGT | BPF_X, BGU, BLEU);
			COND_SEL(BPF_JMP | BPF_JGE | BPF_X, BGEU, BLU);
			COND_SEL(BPF_JMP | BPF_JEQ | BPF_X, BE, BNE);
			COND_SEL(BPF_JMP | BPF_JSET | BPF_X, BNE, BE);

cond_branch:			f_offset = addrs[i + filter[i].jf];
				t_offset = addrs[i + filter[i].jt];

				/* same targets, can avoid doing the test :) */
				if (filter[i].jt == filter[i].jf) {
					emit_jump(t_offset);
					emit_nop();
					break;
				}

				switch (code) {
				case BPF_JMP | BPF_JGT | BPF_X:
				case BPF_JMP | BPF_JGE | BPF_X:
				case BPF_JMP | BPF_JEQ | BPF_X:
					seen |= SEEN_XREG;
					emit_cmp(r_A, r_X);
					break;
				case BPF_JMP | BPF_JSET | BPF_X:
					seen |= SEEN_XREG;
					emit_btst(r_A, r_X);
					break;
				case BPF_JMP | BPF_JEQ | BPF_K:
				case BPF_JMP | BPF_JGT | BPF_K:
				case BPF_JMP | BPF_JGE | BPF_K:
					if (is_simm13(K)) {
						emit_cmpi(r_A, K);
					} else {
						emit_loadimm(K, r_TMP);
						emit_cmp(r_A, r_TMP);
					}
					break;
				case BPF_JMP | BPF_JSET | BPF_K:
					if (is_simm13(K)) {
						emit_btsti(r_A, K);
					} else {
						emit_loadimm(K, r_TMP);
						emit_btst(r_A, r_TMP);
					}
					break;
				}
				if (filter[i].jt != 0) {
					if (filter[i].jf)
						t_offset += 8;
					emit_branch(t_op, t_offset);
					emit_nop(); /* delay slot */
					if (filter[i].jf) {
						emit_jump(f_offset);
						emit_nop();
					}
					break;
				}
				emit_branch(f_op, f_offset);
				emit_nop(); /* delay slot */
				break;

			default:
				/* hmm, too complex filter, give up with jit compiler */
				goto out;
			}
			ilen = (void *) prog - (void *) temp;
			if (image) {
				if (unlikely(proglen + ilen > oldproglen)) {
					pr_err("bpb_jit_compile fatal error\n");
					kfree(addrs);
					module_memfree(image);
					return;
				}
				memcpy(image + proglen, temp, ilen);
			}
			proglen += ilen;
			addrs[i] = proglen;
			prog = temp;
		}
		/* last bpf instruction is always a RET :
		 * use it to give the cleanup instruction(s) addr
		 */
		cleanup_addr = proglen - 8; /* jmpl; mov r_A,%o0; */
		if (seen_or_pass0 & SEEN_MEM)
			cleanup_addr -= 4; /* add %sp, X, %sp; */

		if (image) {
			if (proglen != oldproglen)
				pr_err("bpb_jit_compile proglen=%u != oldproglen=%u\n",
				       proglen, oldproglen);
			break;
		}
		if (proglen == oldproglen) {
			image = module_alloc(proglen);
			if (!image)
				goto out;
		}
		oldproglen = proglen;
	}

	if (bpf_jit_enable > 1)
		bpf_jit_dump(flen, proglen, pass + 1, image);

	if (image) {
		bpf_flush_icache(image, image + proglen);
		fp->bpf_func = (void *)image;
		fp->jited = 1;
	}
out:
	kfree(addrs);
	return;
}
コード例 #3
0
ファイル: bpf_jit_comp.c プロジェクト: 3null/linux
void bpf_jit_compile(struct bpf_prog *fp)
{
	unsigned int proglen;
	unsigned int alloclen;
	u32 *image = NULL;
	u32 *code_base;
	unsigned int *addrs;
	struct codegen_context cgctx;
	int pass;
	int flen = fp->len;

	if (!bpf_jit_enable)
		return;

	addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
	if (addrs == NULL)
		return;

	/*
	 * There are multiple assembly passes as the generated code will change
	 * size as it settles down, figuring out the max branch offsets/exit
	 * paths required.
	 *
	 * The range of standard conditional branches is +/- 32Kbytes.	Since
	 * BPF_MAXINSNS = 4096, we can only jump from (worst case) start to
	 * finish with 8 bytes/instruction.  Not feasible, so long jumps are
	 * used, distinct from short branches.
	 *
	 * Current:
	 *
	 * For now, both branch types assemble to 2 words (short branches padded
	 * with a NOP); this is less efficient, but assembly will always complete
	 * after exactly 3 passes:
	 *
	 * First pass: No code buffer; Program is "faux-generated" -- no code
	 * emitted but maximum size of output determined (and addrs[] filled
	 * in).	 Also, we note whether we use M[], whether we use skb data, etc.
	 * All generation choices assumed to be 'worst-case', e.g. branches all
	 * far (2 instructions), return path code reduction not available, etc.
	 *
	 * Second pass: Code buffer allocated with size determined previously.
	 * Prologue generated to support features we have seen used.  Exit paths
	 * determined and addrs[] is filled in again, as code may be slightly
	 * smaller as a result.
	 *
	 * Third pass: Code generated 'for real', and branch destinations
	 * determined from now-accurate addrs[] map.
	 *
	 * Ideal:
	 *
	 * If we optimise this, near branches will be shorter.	On the
	 * first assembly pass, we should err on the side of caution and
	 * generate the biggest code.  On subsequent passes, branches will be
	 * generated short or long and code size will reduce.  With smaller
	 * code, more branches may fall into the short category, and code will
	 * reduce more.
	 *
	 * Finally, if we see one pass generate code the same size as the
	 * previous pass we have converged and should now generate code for
	 * real.  Allocating at the end will also save the memory that would
	 * otherwise be wasted by the (small) current code shrinkage.
	 * Preferably, we should do a small number of passes (e.g. 5) and if we
	 * haven't converged by then, get impatient and force code to generate
	 * as-is, even if the odd branch would be left long.  The chances of a
	 * long jump are tiny with all but the most enormous of BPF filter
	 * inputs, so we should usually converge on the third pass.
	 */

	cgctx.idx = 0;
	cgctx.seen = 0;
	cgctx.pc_ret0 = -1;
	/* Scouting faux-generate pass 0 */
	if (bpf_jit_build_body(fp, 0, &cgctx, addrs))
		/* We hit something illegal or unsupported. */
		goto out;

	/*
	 * Pretend to build prologue, given the features we've seen.  This will
	 * update ctgtx.idx as it pretends to output instructions, then we can
	 * calculate total size from idx.
	 */
	bpf_jit_build_prologue(fp, 0, &cgctx);
	bpf_jit_build_epilogue(0, &cgctx);

	proglen = cgctx.idx * 4;
	alloclen = proglen + FUNCTION_DESCR_SIZE;
	image = module_alloc(alloclen);
	if (!image)
		goto out;

	code_base = image + (FUNCTION_DESCR_SIZE/4);

	/* Code generation passes 1-2 */
	for (pass = 1; pass < 3; pass++) {
		/* Now build the prologue, body code & epilogue for real. */
		cgctx.idx = 0;
		bpf_jit_build_prologue(fp, code_base, &cgctx);
		bpf_jit_build_body(fp, code_base, &cgctx, addrs);
		bpf_jit_build_epilogue(code_base, &cgctx);

		if (bpf_jit_enable > 1)
			pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
				proglen - (cgctx.idx * 4), cgctx.seen);
	}

	if (bpf_jit_enable > 1)
		/* Note that we output the base address of the code_base
		 * rather than image, since opcodes are in code_base.
		 */
		bpf_jit_dump(flen, proglen, pass, code_base);

	if (image) {
		bpf_flush_icache(code_base, code_base + (proglen/4));
		/* Function descriptor nastiness: Address + TOC */
		((u64 *)image)[0] = (u64)code_base;
		((u64 *)image)[1] = local_paca->kernel_toc;
		fp->bpf_func = (void *)image;
		fp->jited = true;
	}
out:
	kfree(addrs);
	return;
}
コード例 #4
0
ファイル: bpf_jit_comp.c プロジェクト: 020gzh/linux
void bpf_int_jit_compile(struct bpf_prog *prog)
{
	struct bpf_binary_header *header = NULL;
	int proglen, oldproglen = 0;
	struct jit_context ctx = {};
	u8 *image = NULL;
	int *addrs;
	int pass;
	int i;

	if (!bpf_jit_enable)
		return;

	if (!prog || !prog->len)
		return;

	addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
	if (!addrs)
		return;

	/* Before first pass, make a rough estimation of addrs[]
	 * each bpf instruction is translated to less than 64 bytes
	 */
	for (proglen = 0, i = 0; i < prog->len; i++) {
		proglen += 64;
		addrs[i] = proglen;
	}
	ctx.cleanup_addr = proglen;

	/* JITed image shrinks with every pass and the loop iterates
	 * until the image stops shrinking. Very large bpf programs
	 * may converge on the last pass. In such case do one more
	 * pass to emit the final image
	 */
	for (pass = 0; pass < 10 || image; pass++) {
		proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
		if (proglen <= 0) {
			image = NULL;
			if (header)
				bpf_jit_binary_free(header);
			goto out;
		}
		if (image) {
			if (proglen != oldproglen) {
				pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
				       proglen, oldproglen);
				goto out;
			}
			break;
		}
		if (proglen == oldproglen) {
			header = bpf_jit_binary_alloc(proglen, &image,
						      1, jit_fill_hole);
			if (!header)
				goto out;
		}
		oldproglen = proglen;
	}

	if (bpf_jit_enable > 1)
		bpf_jit_dump(prog->len, proglen, pass + 1, image);

	if (image) {
		bpf_flush_icache(header, image + proglen);
		set_memory_ro((unsigned long)header, header->pages);
		prog->bpf_func = (void *)image;
		prog->jited = 1;
	}
out:
	kfree(addrs);
}
コード例 #5
0
ファイル: bpf_jit_comp64.c プロジェクト: AlexShiLucky/linux
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
	u32 proglen;
	u32 alloclen;
	u8 *image = NULL;
	u32 *code_base;
	u32 *addrs;
	struct powerpc64_jit_data *jit_data;
	struct codegen_context cgctx;
	int pass;
	int flen;
	struct bpf_binary_header *bpf_hdr;
	struct bpf_prog *org_fp = fp;
	struct bpf_prog *tmp_fp;
	bool bpf_blinded = false;
	bool extra_pass = false;

	if (!fp->jit_requested)
		return org_fp;

	tmp_fp = bpf_jit_blind_constants(org_fp);
	if (IS_ERR(tmp_fp))
		return org_fp;

	if (tmp_fp != org_fp) {
		bpf_blinded = true;
		fp = tmp_fp;
	}

	jit_data = fp->aux->jit_data;
	if (!jit_data) {
		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
		if (!jit_data) {
			fp = org_fp;
			goto out;
		}
		fp->aux->jit_data = jit_data;
	}

	flen = fp->len;
	addrs = jit_data->addrs;
	if (addrs) {
		cgctx = jit_data->ctx;
		image = jit_data->image;
		bpf_hdr = jit_data->header;
		proglen = jit_data->proglen;
		alloclen = proglen + FUNCTION_DESCR_SIZE;
		extra_pass = true;
		goto skip_init_ctx;
	}

	addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
	if (addrs == NULL) {
		fp = org_fp;
		goto out_addrs;
	}

	memset(&cgctx, 0, sizeof(struct codegen_context));

	/* Make sure that the stack is quadword aligned. */
	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);

	/* Scouting faux-generate pass 0 */
	if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
		/* We hit something illegal or unsupported. */
		fp = org_fp;
		goto out_addrs;
	}

	/*
	 * Pretend to build prologue, given the features we've seen.  This will
	 * update ctgtx.idx as it pretends to output instructions, then we can
	 * calculate total size from idx.
	 */
	bpf_jit_build_prologue(0, &cgctx);
	bpf_jit_build_epilogue(0, &cgctx);

	proglen = cgctx.idx * 4;
	alloclen = proglen + FUNCTION_DESCR_SIZE;

	bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4,
			bpf_jit_fill_ill_insns);
	if (!bpf_hdr) {
		fp = org_fp;
		goto out_addrs;
	}

skip_init_ctx:
	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);

	if (extra_pass) {
		/*
		 * Do not touch the prologue and epilogue as they will remain
		 * unchanged. Only fix the branch target address for subprog
		 * calls in the body.
		 *
		 * This does not change the offsets and lengths of the subprog
		 * call instruction sequences and hence, the size of the JITed
		 * image as well.
		 */
		bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);

		/* There is no need to perform the usual passes. */
		goto skip_codegen_passes;
	}

	/* Code generation passes 1-2 */
	for (pass = 1; pass < 3; pass++) {
		/* Now build the prologue, body code & epilogue for real. */
		cgctx.idx = 0;
		bpf_jit_build_prologue(code_base, &cgctx);
		bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
		bpf_jit_build_epilogue(code_base, &cgctx);

		if (bpf_jit_enable > 1)
			pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
				proglen - (cgctx.idx * 4), cgctx.seen);
	}

skip_codegen_passes:
	if (bpf_jit_enable > 1)
		/*
		 * Note that we output the base address of the code_base
		 * rather than image, since opcodes are in code_base.
		 */
		bpf_jit_dump(flen, proglen, pass, code_base);

#ifdef PPC64_ELF_ABI_v1
	/* Function descriptor nastiness: Address + TOC */
	((u64 *)image)[0] = (u64)code_base;
	((u64 *)image)[1] = local_paca->kernel_toc;
#endif

	fp->bpf_func = (void *)image;
	fp->jited = 1;
	fp->jited_len = alloclen;

	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
	if (!fp->is_func || extra_pass) {
out_addrs:
		kfree(addrs);
		kfree(jit_data);
		fp->aux->jit_data = NULL;
	} else {
		jit_data->addrs = addrs;
		jit_data->ctx = cgctx;
		jit_data->proglen = proglen;
		jit_data->image = image;
		jit_data->header = bpf_hdr;
	}

out:
	if (bpf_blinded)
		bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);

	return fp;
}