Exemple #1
0
/*===-------------------------------------------------------------------------
expr1 * expr2 * expr3
-------------------------------------------------------------------------===*/
static symbol *equ_expr2ic(ast *root) {
	ast *t = root, *t1, *t2;
	symbol *s1, *s2, *val, *lab;
	type *dt, *ty = t->ty;
	int pre;

	if (t->node != AST_EQU && t->node != AST_UE)
		return rel_expr2ic(t);
	assert(ty == int_type);
	// s1 = expr1
	t1 = t->left;
	s1 = rel_expr2ic(t1);
	// more than one operator of equal precedence
	//ic = eval_icode_node(t->node);
	pre = t->node;
	t = t->right;
	while (t->node == AST_EQU || t->node == AST_UE) {
		t2 = t->left;
		s2 = rel_expr2ic(t2);
		dt = get_arith_type(s1->ty, s2->ty);
		emit_ic(IC_CMP + gen_post(dt->cat, dt->cat), cast(s1, dt), cast(s2, dt));
		lab = gen_label(NULL);
		emit_branch(IC_JE, lab);
		val = gen_vit_reg(ty);
		if (pre == AST_EQU)
			emit_ic(IC_MOV + i32, val, cnst_zero);
		else
			emit_ic(IC_MOV + i32, val, cnst_one);
		new_label(lab);
		if (pre == AST_EQU)
			emit_ic(IC_MOV + i32, val, cnst_one);
		else
			emit_ic(IC_MOV + i32, val, cnst_zero);
		///
		s1 = val;
		pre = t->node;
		t = t->right;
	}
	// last sub-expression or only has one operator(etc, a + b)
	s2 = rel_expr2ic(t);
	dt = get_arith_type(s1->ty, s2->ty);
	emit_ic(IC_CMP + gen_post(dt->cat, dt->cat), cast(s1, dt), cast(s2, dt));
	lab = gen_label(NULL);
	emit_branch(IC_JE, lab);
	val = gen_vit_reg(ty);
	if (pre == AST_EQU)
		emit_ic(IC_MOV + i32, val, cnst_zero);
	else
		emit_ic(IC_MOV + i32, val, cnst_one);
	new_label(lab);
	if (pre == AST_EQU)
		emit_ic(IC_MOV + i32, val, cnst_one);
	else
		emit_ic(IC_MOV + i32, val, cnst_zero);

	return val;
}
Exemple #2
0
/*===-------------------------------------------------------------------------
expr1 * expr2 * expr3
-------------------------------------------------------------------------===*/
static symbol *rel_expr2ic(ast *root) {
	ast *t = root, *t1, *t2;
	symbol *s1, *s2, *val, *lab;
	type *dt, *ty = t->ty;
	int ic;

	assert(t->node != AST_G && t->node != AST_GE);
	if (t->node != AST_L && t->node != AST_LE)
		return arith_expr2ic(t);
	assert(ty == int_type);
	// s1 = expr1
	t1 = t->left;
	s1 = arith_expr2ic(t1);
	// more than one operator of equal precedence
	if (t->node == AST_L)
		ic = IC_JL;
	else
		ic = IC_JLE;
	t = t->right;
	while (t->node == AST_L || t->node == AST_LE) {
		t2 = t->left;
		s2 = arith_expr2ic(t2);
		dt = get_arith_type(s1->ty, s2->ty);
		emit_ic(IC_CMP + gen_post(dt->cat, dt->cat), cast(s1, dt), cast(s2, dt));
		lab = gen_label(NULL);
		emit_branch(ic, lab);
		val = gen_vit_reg(ty);
		emit_ic(IC_MOV + i32, val, cnst_zero);
		new_label(lab);
		emit_ic(IC_MOV + i32, val, cnst_one);
		///
		s1 = val;
		if (t->node == AST_L)
			ic = IC_JL;
		else
			ic = IC_JLE;
		t = t->right;
	}
	// last sub-expression or only has one operator(etc, a + b)
	s2 = arith_expr2ic(t);
	dt = get_arith_type(s1->ty, s2->ty);
	emit_ic(IC_CMP + gen_post(dt->cat, dt->cat), cast(s1, dt), cast(s2, dt));
	lab = gen_label(NULL);
	emit_branch(ic, lab);
	val = gen_vit_reg(ty);
	emit_ic(IC_MOV + i32, val, cnst_zero);
	new_label(lab);
	emit_ic(IC_MOV + i32, val, cnst_one);

	return val;
}
Exemple #3
0
void emit_bccz(codegendata *cd, basicblock *target, s4 condition, s4 reg, u4 options)
{
	s4 branchmpc;
	s4 disp;

	/* Target basic block already has an PC, so we can generate the
	   branch immediately. */

	if ((target->mpc >= 0)) {
		STATISTICS(count_branches_resolved++);

		/* calculate the mpc of the branch instruction */

		branchmpc = cd->mcodeptr - cd->mcodebase;
		disp      = target->mpc - branchmpc;

		emit_branch(cd, disp, condition, reg, options);
	}
	else {
		/* current mcodeptr is the correct position,
		   afterwards emit the NOPs */

		codegen_add_branch_ref(cd, target, condition, reg, options);

		/* generate NOPs as placeholder for branch code */

		BRANCH_NOPS;
	}
}
Exemple #4
0
static rsexp compile_conditional (RState* r, rsexp expr, rsexp next)
{
    rsize length;
    rsexp then_expr, then_code;
    rsexp else_expr, else_code;
    rsexp test_expr, code;

    length = validate_contional (r, expr);

    if (length == 0)
        return R_FAILURE;

    test_expr = r_cadr (expr);
    then_expr = r_caddr (expr);
    else_expr = length == 3 ? R_UNSPECIFIED : r_cadddr (expr);

    r_gc_scope_open (r);

    ensure_or_goto (code = then_code = compile (r, then_expr, next), exit);
    ensure_or_goto (code = else_code = compile (r, else_expr, next), exit);

    ensure_or_goto (code = emit_branch (r, then_code, else_code), exit);
    ensure_or_goto (code = compile (r, test_expr, code), exit);

exit:
    r_gc_scope_close_and_protect (r, code);

    return code;
}
Exemple #5
0
/*===-------------------------------------------------------------------------

-------------------------------------------------------------------------===*/
static symbol *or_expr2ic(ast *t) {
	ast *t1, *t2;
	symbol *s1, *s2, *val, *lab;
	type *dt = int_type;

	if (t->node != AST_OR)
		/* less precedent expression*/
		return and_expr2ic(t);
	// branch label
	lab = gen_label(NULL);
	// s1 = expr1
	t1 = t->left;
	s1 = and_expr2ic(t1);
	emit_ic(IC_CMP_0 + i32, s1, NULL);
	emit_branch(IC_JNE, lab);
	// more than one operator of equal precedence
	t = t->right;
	while (t->node == AST_OR) {
		t2 = t->left;
		s2 = and_expr2ic(t2);
		emit_ic(IC_CMP_0 + i32, cast(s2, dt), NULL);
		emit_branch(IC_JNE, lab);
		t = t->right;
	}
	// last sub-expression or only has one operator(etc, a + b)
	s2 = and_expr2ic(t);
	emit_ic(IC_CMP_0 + i32, cast(s2, dt), NULL);
	emit_branch(IC_JNE, lab);
	/**
		movl 0, val
	lab:
		movl 1, val
	*/
	val = gen_vit_reg(int_type);
	emit_ic(IC_MOV + i32, val, cnst_zero);
	new_label(lab);
	emit_ic(IC_MOV + i32, val, cnst_one);

	return val;
}
Exemple #6
0
void emit_label(codegendata *cd, s4 label)
{
	list_t             *list;
	branch_label_ref_t *br;
	s4                  mpc;
	s4                  disp;
	u1                 *mcodeptr;

	/* get the label list */

	list = cd->brancheslabel;

	/* search if the label is already in the list */

	for (br = list_first_unsynced(list); br != NULL;
		 br = list_next_unsynced(list, br)) {
		/* is this entry the correct label? */

		if (br->label == label)
			break;
	}

	/* a branch reference was found */

	if (br != NULL) {
		/* calculate the mpc of the branch instruction */

		mpc  = cd->mcodeptr - cd->mcodebase;
		disp = mpc - br->mpc;

		/* temporary set the mcodeptr */

		mcodeptr     = cd->mcodeptr;
		cd->mcodeptr = cd->mcodebase + br->mpc;

		emit_branch(cd, disp, br->condition, br->reg, br->options);

		/* restore mcodeptr */

		cd->mcodeptr = mcodeptr;

		/* now remove the branch reference */

		list_remove_unsynced(list, br);
	}
	else {
		/* add the label to the list (use invalid values for condition
		   and register) */

		codegen_branch_label_add(cd, label, -1, -1, BRANCH_OPT_NONE );
	}
}
Exemple #7
0
/*===--------------------------------------------------------------------------
generate icode that should affect the flag register
---------------------------------------------------------------------------===*/
static void expr2ic_psw(ast *t1, symbol *s2, symbol *true_label, symbol *false_label) {
	symbol *s1;
	type *dt;

	s1 = expr2ic(t1->expr[0]);
	dt = s1->ty;
	assert(dt);
	if (s2->is_immediate && s2->u.cnst.i == 0)
		emit_ic(IC_CMP_0 + gen_post(dt->cat, dt->cat), s1, NULL);
	else if (s2->is_immediate && s2->u.cnst.i == 1)
		emit_ic(IC_CMP_1 + gen_post(dt->cat, dt->cat), s1, NULL);
	else
		emit_ic(IC_CMP + gen_post(dt->cat, dt->cat), s1, cast(s2, dt));
	if (true_label && false_label == NULL)
		emit_branch(IC_JNE, true_label);
	else if (true_label == NULL && false_label)
		emit_branch(IC_JE, false_label);
	else
		assert(0);

	return;
}
Exemple #8
0
void emit_label_bccz(codegendata *cd, s4 label, s4 condition, s4 reg, u4 options)
{
	list_t             *list;
	branch_label_ref_t *br;
	s4                  mpc;
	s4                  disp;

	/* get the label list */

	list = cd->brancheslabel;

	/* search if the label is already in the list */

	for (br = list_first_unsynced(list); br != NULL;
		 br = list_next_unsynced(list, br)) {
		/* is this entry the correct label? */

		if (br->label == label)
			break;
	}

	/* a branch reference was found */

	if (br != NULL) {
		/* calculate the mpc of the branch instruction */

		mpc  = cd->mcodeptr - cd->mcodebase;
		disp = br->mpc - mpc;

		emit_branch(cd, disp, condition, reg, options);

		/* now remove the branch reference */

		list_remove_unsynced(list, br);
	}
	else {
		/* current mcodeptr is the correct position,
		   afterwards emit the NOPs */

		codegen_branch_label_add(cd, label, condition, reg, options);

		/* generate NOPs as placeholder for branch code */

		BRANCH_NOPS;
	}
}
Exemple #9
0
/*===-------------------------------------------------------------------------

-------------------------------------------------------------------------===*/
static symbol *cond2ic(ast *t1) {
	symbol *cond, *s1, *s2, *val, *lab;
	type *dt = t1->ty;
	
	if (t1->right && t1->right->node == AST_COND)
		cond = or_expr2ic(t1->cond);
	else
		return or_expr2ic(t1);
	s1 = expr2ic(t1->left);
	s2 = cond2ic(t1->right);
	assert(cond->ty == int_type);
	emit_ic(IC_CMP_0 + i32, cond, NULL);
	lab = gen_label(NULL);
	emit_branch(IC_JE, lab);
	val = gen_vit_reg(dt);
	emit_ic(IC_MOV + gen_post(dt->cat, dt->cat), val, cast(s1, dt));
	new_label(lab);
	emit_ic(IC_MOV + gen_post(dt->cat, dt->cat), val, cast(s2, dt));

	return val;
}
Exemple #10
0
/*===-------------------------------------------------------------------------

-------------------------------------------------------------------------===*/
static void stmt2ic(ast *tree) {
	ast *t1 = tree;

	if (tree == NULL)
		return;
	/*===-------------------- now the hard work begin-------------------------
	statement =>
		labeled-statement
		compound-statement
		expression-statement
		selection-statement
		iteration-statement
		jump-statement
	---------------------------------------------------------------------===*/
	/*==-- deal with all the kinds of AST of all the statement--==*/
	switch (t1->node) {
	/* ==--statement--==*/
	case AST_BLOCK:
		block2ic(t1);
		break;
	case AST_IF:
		/*==-------------------------------------------------------
		if (expr) statement1
		else statement2
		===--->
			if (expr == 0) goto L
			statement1
			goto L+1
		L:	statemetn2
		L+1:
		-------------------------------------------------------==*/
		{
			symbol *lab0, *lab1;
			lab0 = gen_label(NULL);
			lab1 = gen_label(NULL);
			
			expr2ic_psw(t1->expr[0], cnst_zero, NULL, lab0);
			stmt2ic(t1->left);
			//emit_ic(IC_JMP, NULL, NULL, lab1);
			emit_branch(IC_JMP, lab1);
			emit_ic(IC_LABEL, lab0, NULL);
			/* if has no else statement, peephole optimzation can delete the redundant jmp*/
			stmt2ic(t1->right);
			emit_ic(IC_LABEL, lab1, NULL);
		}
		break;
	case AST_DO:
		/*==-------------------------------------------------------
		do statement
		while (expr);
		===--->	
		L:	statement
		L+1:if (expr != 0) goto L
		L+2:
		-------------------------------------------------------==*/
		{
			symbol *lab0 = gen_label(NULL);
			symbol *lab1 = gen_label(NULL);
			symbol *lab2 = gen_label(NULL);

			t1->bk_label = lab2;
			t1->ct_label = lab1;
			emit_ic(IC_LABEL, lab0, NULL);
			stmt2ic(t1->right);
			emit_ic(IC_LABEL, lab1, NULL);
			expr2ic_psw(t1->expr[0], cnst_zero, lab0, NULL);
			emit_ic(IC_LABEL, lab2, NULL);
		}
		break;
	case AST_FOR:
		/*==-------------------------------------------------------
		for (expr1; exor2; expr3) statement
		===--->
			expr1
			goto L+2
		L:	statement
		L+1:expr3
		L+2:if (expr2 != 0) goto L
		L+3:
		-------------------------------------------------------==*/
		{
			symbol *lab0 = gen_label(NULL);
			symbol *lab1 = gen_label(NULL);
			symbol *lab2 = gen_label(NULL);
			symbol *lab3 = gen_label(NULL);

			t1->bk_label = lab3;
			/* attention this*/
			t1->ct_label = lab1;
			expr2ic(t1->expr[0]);
			//emit_ic(IC_JMP, NULL, NULL, lab2);
			emit_branch(IC_JMP, lab2);
			emit_ic(IC_LABEL, lab0, NULL);
			stmt2ic(t1->right);
			emit_ic(IC_LABEL, lab1, NULL);
			expr2ic(t1->expr[2]);
			emit_ic(IC_LABEL, lab2, NULL);
			expr2ic_psw(t1->expr[1], cnst_zero, lab0, NULL);
			emit_ic(IC_LABEL, lab3, NULL);
		}
		break;
	case AST_WHILE:
		/*==-------------------------------------------------------
		while (expr)
		statement
		===--->
			goto L+1
		L:	statement
		L+1:if (expr != 0) goto L
		L+2:
		-------------------------------------------------------==*/
		{
			symbol *lab0 = gen_label(NULL);
			symbol *lab1 = gen_label(NULL);
			symbol *lab2 = gen_label(NULL);

			t1->bk_label = lab2;
			t1->ct_label = lab1;
			//emit_ic(IC_JMP, NULL, NULL, lab1);
			emit_branch(IC_JMP, lab1);
			emit_ic(IC_LABEL, lab0, NULL);
			stmt2ic(t1->right);
			emit_ic(IC_LABEL, lab1, NULL);
			expr2ic_psw(t1->expr[0], cnst_zero, lab0, NULL);
			emit_ic(IC_LABEL, lab2, NULL);
		}
		break;
	case AST_SWITCH:
		/*==-------------------------------------------------------
		switch (expr)
		case c1: statement1;
		case c2: statement2;
		case c3: statement3;
		default: statement
		===--->
			t1 = expr
		L+1:if(t1 != c1)
			goto L+2
			statement1
		L+2:if(t1 != c2)
			goto L+3
			statement2
		L+3:if(t1 != c3)
			goto def_lab
			statement1
		def_lab:
			statement
		lab_break:
		-------------------------------------------------------==*/
		{
			symbol *cur_lab, *next_lab;
			ast *it;// *case_val;
			dlist *idx = t1->cases;
			ast *def = t1->def;

			/* icode time*/
			cur_lab = gen_label(NULL);
			for (; idx; idx = idx->next) {
				it = (ast *)idx->it;
				next_lab = gen_label(NULL);
				//case_val = new_node(AST_CNST, NULL, NULL);
				//case_val->u.cnst = it->u.cnst;
				new_label(cur_lab);
				expr2ic_psw(t1->expr[0], cnst_zero, next_lab, NULL);
				//s1 = expr2ic(t1->expr[0]);
				//emit_ic(IC_CMP + i32, cast(s1, int_type), it->u.cnst);
				//emit_branch(IC_JNE, next_lab);
				stmt2ic(it->right);
				cur_lab = next_lab;
			}
			/* default statement*/
			if (def) {
				new_label(next_lab);
				stmt2ic(def->right);
				next_lab = gen_label(NULL);
			} else
				t1->bk_label = next_lab;
		}

	case AST_GOTO:
		//fold_labels(left);
		//emit_ic(IC_JMP, NULL, NULL, (symbol *)t1->dst);
		emit_branch(IC_JMP, (symbol *)t1->dst);
		break;
	case AST_BREAK:
		assert(t1->bk_label);
		//emit_ic(IC_JMP, NULL, NULL, t1->bk_label);
		emit_branch(IC_JMP, t1->bk_label);
		break;
	case AST_CONTINUE:
		assert(t1->ct_label);
		//emit_ic(IC_JMP, NULL, NULL, t1->ct_label);
		emit_branch(IC_JMP, t1->ct_label);
		break;
	case AST_RETURN:
		//ret_stmt2icode(t1);
		{
			symbol *s1 = expr2ic(t1->right);
			//type *t1 = cur_func->ty;
			s1 = cast(s1, _F.cur_func->ty);
			emit_ic(IC_FUNC_END, s1, NULL);
			//emit_ic(IC_JMP, NULL, NULL, _F.cur_func_end);
			emit_branch(IC_JMP, _F.cur_func_end);
		}
		break;
	case AST_CASE:
	case AST_DEFAULT:
		/* have be processed, just pass it*/
		break;
	/* ==--expression--==*/
	default:
		expr2ic(t1);
	}

	return;
}
void bpf_jit_compile(struct bpf_prog *fp)
{
	unsigned int cleanup_addr, proglen, oldproglen = 0;
	u32 temp[8], *prog, *func, seen = 0, pass;
	const struct sock_filter *filter = fp->insns;
	int i, flen = fp->len, pc_ret0 = -1;
	unsigned int *addrs;
	void *image;

	if (!bpf_jit_enable)
		return;

	addrs = kmalloc_array(flen, sizeof(*addrs), GFP_KERNEL);
	if (addrs == NULL)
		return;

	/* Before first pass, make a rough estimation of addrs[]
	 * each bpf instruction is translated to less than 64 bytes
	 */
	for (proglen = 0, i = 0; i < flen; i++) {
		proglen += 64;
		addrs[i] = proglen;
	}
	cleanup_addr = proglen; /* epilogue address */
	image = NULL;
	for (pass = 0; pass < 10; pass++) {
		u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen;

		/* no prologue/epilogue for trivial filters (RET something) */
		proglen = 0;
		prog = temp;

		/* Prologue */
		if (seen_or_pass0) {
			if (seen_or_pass0 & SEEN_MEM) {
				unsigned int sz = BASE_STACKFRAME;
				sz += BPF_MEMWORDS * sizeof(u32);
				emit_alloc_stack(sz);
			}

			/* Make sure we dont leek kernel memory. */
			if (seen_or_pass0 & SEEN_XREG)
				emit_clear(r_X);

			/* If this filter needs to access skb data,
			 * load %o4 and %o5 with:
			 *  %o4 = skb->len - skb->data_len
			 *  %o5 = skb->data
			 * And also back up %o7 into r_saved_O7 so we can
			 * invoke the stubs using 'call'.
			 */
			if (seen_or_pass0 & SEEN_DATAREF) {
				emit_load32(r_SKB, struct sk_buff, len, r_HEADLEN);
				emit_load32(r_SKB, struct sk_buff, data_len, r_TMP);
				emit_sub(r_HEADLEN, r_TMP, r_HEADLEN);
				emit_loadptr(r_SKB, struct sk_buff, data, r_SKB_DATA);
			}
		}
		emit_reg_move(O7, r_saved_O7);

		/* Make sure we dont leak kernel information to the user. */
		if (bpf_needs_clear_a(&filter[0]))
			emit_clear(r_A); /* A = 0 */

		for (i = 0; i < flen; i++) {
			unsigned int K = filter[i].k;
			unsigned int t_offset;
			unsigned int f_offset;
			u32 t_op, f_op;
			u16 code = bpf_anc_helper(&filter[i]);
			int ilen;

			switch (code) {
			case BPF_ALU | BPF_ADD | BPF_X:	/* A += X; */
				emit_alu_X(ADD);
				break;
			case BPF_ALU | BPF_ADD | BPF_K:	/* A += K; */
				emit_alu_K(ADD, K);
				break;
			case BPF_ALU | BPF_SUB | BPF_X:	/* A -= X; */
				emit_alu_X(SUB);
				break;
			case BPF_ALU | BPF_SUB | BPF_K:	/* A -= K */
				emit_alu_K(SUB, K);
				break;
			case BPF_ALU | BPF_AND | BPF_X:	/* A &= X */
				emit_alu_X(AND);
				break;
			case BPF_ALU | BPF_AND | BPF_K:	/* A &= K */
				emit_alu_K(AND, K);
				break;
			case BPF_ALU | BPF_OR | BPF_X:	/* A |= X */
				emit_alu_X(OR);
				break;
			case BPF_ALU | BPF_OR | BPF_K:	/* A |= K */
				emit_alu_K(OR, K);
				break;
			case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */
			case BPF_ALU | BPF_XOR | BPF_X:
				emit_alu_X(XOR);
				break;
			case BPF_ALU | BPF_XOR | BPF_K:	/* A ^= K */
				emit_alu_K(XOR, K);
				break;
			case BPF_ALU | BPF_LSH | BPF_X:	/* A <<= X */
				emit_alu_X(SLL);
				break;
			case BPF_ALU | BPF_LSH | BPF_K:	/* A <<= K */
				emit_alu_K(SLL, K);
				break;
			case BPF_ALU | BPF_RSH | BPF_X:	/* A >>= X */
				emit_alu_X(SRL);
				break;
			case BPF_ALU | BPF_RSH | BPF_K:	/* A >>= K */
				emit_alu_K(SRL, K);
				break;
			case BPF_ALU | BPF_MUL | BPF_X:	/* A *= X; */
				emit_alu_X(MUL);
				break;
			case BPF_ALU | BPF_MUL | BPF_K:	/* A *= K */
				emit_alu_K(MUL, K);
				break;
			case BPF_ALU | BPF_DIV | BPF_K:	/* A /= K with K != 0*/
				if (K == 1)
					break;
				emit_write_y(G0);
				/* The Sparc v8 architecture requires
				 * three instructions between a %y
				 * register write and the first use.
				 */
				emit_nop();
				emit_nop();
				emit_nop();
				emit_alu_K(DIV, K);
				break;
			case BPF_ALU | BPF_DIV | BPF_X:	/* A /= X; */
				emit_cmpi(r_X, 0);
				if (pc_ret0 > 0) {
					t_offset = addrs[pc_ret0 - 1];
					emit_branch(BE, t_offset + 20);
					emit_nop(); /* delay slot */
				} else {
					emit_branch_off(BNE, 16);
					emit_nop();
					emit_jump(cleanup_addr + 20);
					emit_clear(r_A);
				}
				emit_write_y(G0);
				/* The Sparc v8 architecture requires
				 * three instructions between a %y
				 * register write and the first use.
				 */
				emit_nop();
				emit_nop();
				emit_nop();
				emit_alu_X(DIV);
				break;
			case BPF_ALU | BPF_NEG:
				emit_neg();
				break;
			case BPF_RET | BPF_K:
				if (!K) {
					if (pc_ret0 == -1)
						pc_ret0 = i;
					emit_clear(r_A);
				} else {
					emit_loadimm(K, r_A);
				}
				/* Fallthrough */
			case BPF_RET | BPF_A:
				if (seen_or_pass0) {
					if (i != flen - 1) {
						emit_jump(cleanup_addr);
						emit_nop();
						break;
					}
					if (seen_or_pass0 & SEEN_MEM) {
						unsigned int sz = BASE_STACKFRAME;
						sz += BPF_MEMWORDS * sizeof(u32);
						emit_release_stack(sz);
					}
				}
				/* jmpl %r_saved_O7 + 8, %g0 */
				emit_jmpl(r_saved_O7, 8, G0);
				emit_reg_move(r_A, O0); /* delay slot */
				break;
			case BPF_MISC | BPF_TAX:
				seen |= SEEN_XREG;
				emit_reg_move(r_A, r_X);
				break;
			case BPF_MISC | BPF_TXA:
				seen |= SEEN_XREG;
				emit_reg_move(r_X, r_A);
				break;
			case BPF_ANC | SKF_AD_CPU:
				emit_load_cpu(r_A);
				break;
			case BPF_ANC | SKF_AD_PROTOCOL:
				emit_skb_load16(protocol, r_A);
				break;
			case BPF_ANC | SKF_AD_PKTTYPE:
				__emit_skb_load8(__pkt_type_offset, r_A);
				emit_andi(r_A, PKT_TYPE_MAX, r_A);
				emit_alu_K(SRL, 5);
				break;
			case BPF_ANC | SKF_AD_IFINDEX:
				emit_skb_loadptr(dev, r_A);
				emit_cmpi(r_A, 0);
				emit_branch(BE_PTR, cleanup_addr + 4);
				emit_nop();
				emit_load32(r_A, struct net_device, ifindex, r_A);
				break;
			case BPF_ANC | SKF_AD_MARK:
				emit_skb_load32(mark, r_A);
				break;
			case BPF_ANC | SKF_AD_QUEUE:
				emit_skb_load16(queue_mapping, r_A);
				break;
			case BPF_ANC | SKF_AD_HATYPE:
				emit_skb_loadptr(dev, r_A);
				emit_cmpi(r_A, 0);
				emit_branch(BE_PTR, cleanup_addr + 4);
				emit_nop();
				emit_load16(r_A, struct net_device, type, r_A);
				break;
			case BPF_ANC | SKF_AD_RXHASH:
				emit_skb_load32(hash, r_A);
				break;
			case BPF_ANC | SKF_AD_VLAN_TAG:
				emit_skb_load16(vlan_tci, r_A);
				break;
			case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
				__emit_skb_load8(__pkt_vlan_present_offset, r_A);
				if (PKT_VLAN_PRESENT_BIT)
					emit_alu_K(SRL, PKT_VLAN_PRESENT_BIT);
				if (PKT_VLAN_PRESENT_BIT < 7)
					emit_andi(r_A, 1, r_A);
				break;
			case BPF_LD | BPF_W | BPF_LEN:
				emit_skb_load32(len, r_A);
				break;
			case BPF_LDX | BPF_W | BPF_LEN:
				emit_skb_load32(len, r_X);
				break;
			case BPF_LD | BPF_IMM:
				emit_loadimm(K, r_A);
				break;
			case BPF_LDX | BPF_IMM:
				emit_loadimm(K, r_X);
				break;
			case BPF_LD | BPF_MEM:
				seen |= SEEN_MEM;
				emit_ldmem(K * 4, r_A);
				break;
			case BPF_LDX | BPF_MEM:
				seen |= SEEN_MEM | SEEN_XREG;
				emit_ldmem(K * 4, r_X);
				break;
			case BPF_ST:
				seen |= SEEN_MEM;
				emit_stmem(K * 4, r_A);
				break;
			case BPF_STX:
				seen |= SEEN_MEM | SEEN_XREG;
				emit_stmem(K * 4, r_X);
				break;

#define CHOOSE_LOAD_FUNC(K, func) \
	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)

			case BPF_LD | BPF_W | BPF_ABS:
				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_word);
common_load:			seen |= SEEN_DATAREF;
				emit_loadimm(K, r_OFF);
				emit_call(func);
				break;
			case BPF_LD | BPF_H | BPF_ABS:
				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_half);
				goto common_load;
			case BPF_LD | BPF_B | BPF_ABS:
				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte);
				goto common_load;
			case BPF_LDX | BPF_B | BPF_MSH:
				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte_msh);
				goto common_load;
			case BPF_LD | BPF_W | BPF_IND:
				func = bpf_jit_load_word;
common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
				if (K) {
					if (is_simm13(K)) {
						emit_addi(r_X, K, r_OFF);
					} else {
						emit_loadimm(K, r_TMP);
						emit_add(r_X, r_TMP, r_OFF);
					}
				} else {
					emit_reg_move(r_X, r_OFF);
				}
				emit_call(func);
				break;
			case BPF_LD | BPF_H | BPF_IND:
				func = bpf_jit_load_half;
				goto common_load_ind;
			case BPF_LD | BPF_B | BPF_IND:
				func = bpf_jit_load_byte;
				goto common_load_ind;
			case BPF_JMP | BPF_JA:
				emit_jump(addrs[i + K]);
				emit_nop();
				break;

#define COND_SEL(CODE, TOP, FOP)	\
	case CODE:			\
		t_op = TOP;		\
		f_op = FOP;		\
		goto cond_branch

			COND_SEL(BPF_JMP | BPF_JGT | BPF_K, BGU, BLEU);
			COND_SEL(BPF_JMP | BPF_JGE | BPF_K, BGEU, BLU);
			COND_SEL(BPF_JMP | BPF_JEQ | BPF_K, BE, BNE);
			COND_SEL(BPF_JMP | BPF_JSET | BPF_K, BNE, BE);
			COND_SEL(BPF_JMP | BPF_JGT | BPF_X, BGU, BLEU);
			COND_SEL(BPF_JMP | BPF_JGE | BPF_X, BGEU, BLU);
			COND_SEL(BPF_JMP | BPF_JEQ | BPF_X, BE, BNE);
			COND_SEL(BPF_JMP | BPF_JSET | BPF_X, BNE, BE);

cond_branch:			f_offset = addrs[i + filter[i].jf];
				t_offset = addrs[i + filter[i].jt];

				/* same targets, can avoid doing the test :) */
				if (filter[i].jt == filter[i].jf) {
					emit_jump(t_offset);
					emit_nop();
					break;
				}

				switch (code) {
				case BPF_JMP | BPF_JGT | BPF_X:
				case BPF_JMP | BPF_JGE | BPF_X:
				case BPF_JMP | BPF_JEQ | BPF_X:
					seen |= SEEN_XREG;
					emit_cmp(r_A, r_X);
					break;
				case BPF_JMP | BPF_JSET | BPF_X:
					seen |= SEEN_XREG;
					emit_btst(r_A, r_X);
					break;
				case BPF_JMP | BPF_JEQ | BPF_K:
				case BPF_JMP | BPF_JGT | BPF_K:
				case BPF_JMP | BPF_JGE | BPF_K:
					if (is_simm13(K)) {
						emit_cmpi(r_A, K);
					} else {
						emit_loadimm(K, r_TMP);
						emit_cmp(r_A, r_TMP);
					}
					break;
				case BPF_JMP | BPF_JSET | BPF_K:
					if (is_simm13(K)) {
						emit_btsti(r_A, K);
					} else {
						emit_loadimm(K, r_TMP);
						emit_btst(r_A, r_TMP);
					}
					break;
				}
				if (filter[i].jt != 0) {
					if (filter[i].jf)
						t_offset += 8;
					emit_branch(t_op, t_offset);
					emit_nop(); /* delay slot */
					if (filter[i].jf) {
						emit_jump(f_offset);
						emit_nop();
					}
					break;
				}
				emit_branch(f_op, f_offset);
				emit_nop(); /* delay slot */
				break;

			default:
				/* hmm, too complex filter, give up with jit compiler */
				goto out;
			}
			ilen = (void *) prog - (void *) temp;
			if (image) {
				if (unlikely(proglen + ilen > oldproglen)) {
					pr_err("bpb_jit_compile fatal error\n");
					kfree(addrs);
					module_memfree(image);
					return;
				}
				memcpy(image + proglen, temp, ilen);
			}
			proglen += ilen;
			addrs[i] = proglen;
			prog = temp;
		}
		/* last bpf instruction is always a RET :
		 * use it to give the cleanup instruction(s) addr
		 */
		cleanup_addr = proglen - 8; /* jmpl; mov r_A,%o0; */
		if (seen_or_pass0 & SEEN_MEM)
			cleanup_addr -= 4; /* add %sp, X, %sp; */

		if (image) {
			if (proglen != oldproglen)
				pr_err("bpb_jit_compile proglen=%u != oldproglen=%u\n",
				       proglen, oldproglen);
			break;
		}
		if (proglen == oldproglen) {
			image = module_alloc(proglen);
			if (!image)
				goto out;
		}
		oldproglen = proglen;
	}

	if (bpf_jit_enable > 1)
		bpf_jit_dump(flen, proglen, pass + 1, image);

	if (image) {
		fp->bpf_func = (void *)image;
		fp->jited = 1;
	}
out:
	kfree(addrs);
	return;
}
sad_fn gen_sad( uint32_t* buffer, uint32_t buffer_length,
                uint32_t bytes_per_entry_ref, uint32_t bytes_per_entry_src,
                uint16_t offset_ref, uint16_t offset_src,
                uint32_t square_length )
{

  /* Register usage in generated code
      lr/r14 : sad accumulation
      r0 : ref - r2 : data read with r0
      r1 : src - r3 : data read with r1
      ip : counter
  */

  uint32_t *b;
  int8_t *buffer_save, *buffer_current;
  sad_fn sad = (sad_fn) NULL;

  uint32_t prologue[]   = {
                            0xe52de004, // str lr, [sp, #-4]!
                            0xe3a0c000, // mov ip, 0x0
                            0xe3a0e000  // mov lr, 0x0
                          };

  uint32_t epilogue[]   = {
                            0xe1a0000e, // mov r0, lr
                            0xe49df004  // ldr pc, [sp], #4
                          };

  uint32_t sad_acc[]    = {
                            0xe0732002, // rsbs r2, r3, r2
                            0xb2622000, // rsblt r2, r2, #0
                            0xe08ee002  // add lr, lr, r2
                          };

  uint32_t dec_count[]  = {
                            0xe25cc001  // subs ip, ip, #1
                          };

  offset_ref += ( 1 - square_length );
  offset_src += ( 1 - square_length );

  if( offset_ref < 1024 && offset_src < 1024 )
  {
    sad = (sad_fn) buffer;

    b = &buffer[0];
    buffer = emit_buffer( buffer, prologue, sizeof(prologue) );
    b[1] |= square_length;

    switch( bytes_per_entry_ref )
    {
      case MEM_FMT_8:
        buffer = emit_load8_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_0, ARM_REG_2, 1 );
        break;

      case MEM_FMT_16:
        buffer = emit_load16_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_0, ARM_REG_2, 1 );
        break;

      case MEM_FMT_32:
        buffer = emit_load32_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_0, ARM_REG_2, 1 );
        break;
    }

    switch( bytes_per_entry_src )
    {
      case MEM_FMT_8:
        buffer = emit_load8_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_1, ARM_REG_3, 1 );
        break;

      case MEM_FMT_16:
        buffer = emit_load16_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_1, ARM_REG_3, 1 );
        break;

      case MEM_FMT_32:
        buffer = emit_load32_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_1, ARM_REG_3, 1 );
        break;
    }

    buffer_save = (int8_t*)&buffer[0];

    square_length --;

    while( square_length > 0 )
    {
      emit_buffer( buffer, sad_acc, sizeof(sad_acc) );

      switch( bytes_per_entry_ref )
      {
        case MEM_FMT_8:
          buffer = emit_load8_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_0, ARM_REG_2, 1 );
          break;

        case MEM_FMT_16:
          buffer = emit_load16_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_0, ARM_REG_2, 1 );
          break;

        case MEM_FMT_32:
          buffer = emit_load32_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_0, ARM_REG_2, 1 );
          break;
      }

      switch( bytes_per_entry_src )
      {
        case MEM_FMT_8:
          buffer = emit_load8_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_1, ARM_REG_3, 1 );
          break;

        case MEM_FMT_16:
          buffer = emit_load16_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_1, ARM_REG_3, 1 );
          break;

        case MEM_FMT_32:
          buffer = emit_load32_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_1, ARM_REG_3, 1 );
          break;
      }

      square_length --;
    }

    buffer = emit_buffer( buffer, sad_acc, sizeof(sad_acc) );
    buffer = emit_buffer( buffer, dec_count, sizeof(dec_count) );

    switch( bytes_per_entry_ref )
    {
      case MEM_FMT_8:
        buffer = emit_load8_pi( COND_FIELD_NE, buffer, ARM_REG_0, ARM_REG_2, offset_ref );
        break;

      case MEM_FMT_16:
        buffer = emit_load16_pi( COND_FIELD_NE, buffer, ARM_REG_0, ARM_REG_2, offset_ref );
        break;

      case MEM_FMT_32:
        buffer = emit_load32_pi( COND_FIELD_NE, buffer, ARM_REG_0, ARM_REG_2, offset_ref );
        break;
    }

    switch( bytes_per_entry_src )
    {
      case MEM_FMT_8:
        buffer = emit_load8_pi( COND_FIELD_NE, buffer, ARM_REG_1, ARM_REG_3, offset_src );
        break;

      case MEM_FMT_16:
        buffer = emit_load16_pi( COND_FIELD_NE, buffer, ARM_REG_1, ARM_REG_3, offset_src );
        break;

      case MEM_FMT_32:
        buffer = emit_load32_pi( COND_FIELD_NE, buffer, ARM_REG_1, ARM_REG_3, offset_src );
        break;
    }

    buffer_current = (int8_t*)&buffer[0];
    buffer = emit_branch( COND_FIELD_NE, buffer, buffer_save - buffer_current - 8 );

    buffer = emit_buffer( buffer, epilogue, sizeof(epilogue) );

    arm_mmu_flush_dcache();
  }

  return sad;
}