Пример #1
0
static void unroll_op(pTHX_ struct sljit_compiler* compiler, OP* op)
{
    DEBUGf(("  ; %s [%p]\n", OP_NAME(op), op));

    emit_label(compiler, op); // TODO(dgl): Can we avoid doing this for every op?
    sljit_emit_ijump(compiler, SLJIT_CALL0, SLJIT_IMM, (sljit_w) op->op_ppaddr);
    sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM, (sljit_w) &PL_op, SLJIT_RETURN_REG, 0);
}
Пример #2
0
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
	sljit_s32 i, tmp, size, saved_register_size;
	sljit_u8 *inst;

	CHECK_ERROR();
	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);

#ifdef _WIN64
	/* Two/four register slots for parameters plus space for xmm6 register if needed. */
	if (fscratches >= 6 || fsaveds >= 1)
		compiler->locals_offset = 6 * sizeof(sljit_sw);
	else
		compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw);
#endif

	/* Including the return address saved by the call instruction. */
	saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);

	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
	for (i = SLJIT_S0; i >= tmp; i--) {
		size = reg_map[i] >= 8 ? 2 : 1;
		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
		FAIL_IF(!inst);
		INC_SIZE(size);
		if (reg_map[i] >= 8)
			*inst++ = REX_B;
		PUSH_REG(reg_lmap[i]);
	}

	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
		size = reg_map[i] >= 8 ? 2 : 1;
		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
		FAIL_IF(!inst);
		INC_SIZE(size);
		if (reg_map[i] >= 8)
			*inst++ = REX_B;
		PUSH_REG(reg_lmap[i]);
	}

	if (args > 0) {
		size = args * 3;
		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
		FAIL_IF(!inst);

		INC_SIZE(size);

#ifndef _WIN64
		if (args > 0) {
			*inst++ = REX_W;
			*inst++ = MOV_r_rm;
			*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
		}
		if (args > 1) {
			*inst++ = REX_W | REX_R;
			*inst++ = MOV_r_rm;
			*inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
		}
		if (args > 2) {
			*inst++ = REX_W | REX_R;
			*inst++ = MOV_r_rm;
			*inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
		}
#else
		if (args > 0) {
			*inst++ = REX_W;
			*inst++ = MOV_r_rm;
			*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
		}
		if (args > 1) {
			*inst++ = REX_W;
			*inst++ = MOV_r_rm;
			*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
		}
		if (args > 2) {
			*inst++ = REX_W | REX_B;
			*inst++ = MOV_r_rm;
			*inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
		}
#endif
	}

	local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
	compiler->local_size = local_size;

#ifdef _WIN64
	if (local_size > 1024) {
		/* Allocate stack for the callback, which grows the stack. */
		inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_s32)));
		FAIL_IF(!inst);
		INC_SIZE(4 + (3 + sizeof(sljit_s32)));
		*inst++ = REX_W;
		*inst++ = GROUP_BINARY_83;
		*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
		/* Allocated size for registers must be divisible by 8. */
		SLJIT_ASSERT(!(saved_register_size & 0x7));
		/* Aligned to 16 byte. */
		if (saved_register_size & 0x8) {
			*inst++ = 5 * sizeof(sljit_sw);
			local_size -= 5 * sizeof(sljit_sw);
		} else {
			*inst++ = 4 * sizeof(sljit_sw);
			local_size -= 4 * sizeof(sljit_sw);
		}
		/* Second instruction */
		SLJIT_ASSERT(reg_map[SLJIT_R0] < 8);
		*inst++ = REX_W;
		*inst++ = MOV_rm_i32;
		*inst++ = MOD_REG | reg_lmap[SLJIT_R0];
		sljit_unaligned_store_s32(inst, local_size);
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
			|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
		compiler->skip_checks = 1;
#endif
		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
	}
#endif

	if (local_size > 0) {
		if (local_size <= 127) {
			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
			FAIL_IF(!inst);
			INC_SIZE(4);
			*inst++ = REX_W;
			*inst++ = GROUP_BINARY_83;
			*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
			*inst++ = local_size;
		}
		else {
			inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
			FAIL_IF(!inst);
			INC_SIZE(7);
			*inst++ = REX_W;
			*inst++ = GROUP_BINARY_81;
			*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
			sljit_unaligned_store_s32(inst, local_size);
			inst += sizeof(sljit_s32);
		}
	}

#ifdef _WIN64
	/* Save xmm6 register: movaps [rsp + 0x20], xmm6 */
	if (fscratches >= 6 || fsaveds >= 1) {
		inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
		FAIL_IF(!inst);
		INC_SIZE(5);
		*inst++ = GROUP_0F;
		sljit_unaligned_store_s32(inst, 0x20247429);
	}
#endif

	return SLJIT_SUCCESS;
}
Пример #3
0
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
{
	sljit_si size;
	sljit_si locals_offset;
	sljit_ub *inst;

	CHECK_ERROR();
	check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);

	compiler->scratches = scratches;
	compiler->saveds = saveds;
	compiler->args = args;
	compiler->flags_saved = 0;
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
	compiler->logical_local_size = local_size;
#endif

#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
	size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0);
#else
	size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (2 + args * 3) : 0);
#endif
	inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
	FAIL_IF(!inst);

	INC_SIZE(size);
	PUSH_REG(reg_map[TMP_REGISTER]);
#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
	if (args > 0) {
		*inst++ = MOV_r_rm;
		*inst++ = MOD_REG | (reg_map[TMP_REGISTER] << 3) | 0x4 /* esp */;
	}
#endif
	if (saveds > 2)
		PUSH_REG(reg_map[SLJIT_SAVED_REG3]);
	if (saveds > 1)
		PUSH_REG(reg_map[SLJIT_SAVED_REG2]);
	if (saveds > 0)
		PUSH_REG(reg_map[SLJIT_SAVED_REG1]);

#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
	if (args > 0) {
		*inst++ = MOV_r_rm;
		*inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[SLJIT_SCRATCH_REG3];
	}
	if (args > 1) {
		*inst++ = MOV_r_rm;
		*inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[SLJIT_SCRATCH_REG2];
	}
	if (args > 2) {
		*inst++ = MOV_r_rm;
		*inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x4 /* esp */;
		*inst++ = 0x24;
		*inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
	}
#else
	if (args > 0) {
		*inst++ = MOV_r_rm;
		*inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[TMP_REGISTER];
		*inst++ = sizeof(sljit_sw) * 2;
	}
	if (args > 1) {
		*inst++ = MOV_r_rm;
		*inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[TMP_REGISTER];
		*inst++ = sizeof(sljit_sw) * 3;
	}
	if (args > 2) {
		*inst++ = MOV_r_rm;
		*inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG3] << 3) | reg_map[TMP_REGISTER];
		*inst++ = sizeof(sljit_sw) * 4;
	}
#endif

#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
	locals_offset = 2 * sizeof(sljit_uw);
#else
	SLJIT_COMPILE_ASSERT(FIXED_LOCALS_OFFSET >= 2 * sizeof(sljit_uw), require_at_least_two_words);
	locals_offset = FIXED_LOCALS_OFFSET;
#endif
	compiler->scratches_start = locals_offset;
	if (scratches > 3)
		locals_offset += (scratches - 3) * sizeof(sljit_uw);
	compiler->saveds_start = locals_offset;
	if (saveds > 3)
		locals_offset += (saveds - 3) * sizeof(sljit_uw);
	compiler->locals_offset = locals_offset;
	local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1));

	compiler->local_size = local_size;
#ifdef _WIN32
	if (local_size > 1024) {
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
		FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_SCRATCH_REG1], local_size));
#else
		local_size -= FIXED_LOCALS_OFFSET;
		FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_SCRATCH_REG1], local_size));
		FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
			SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, FIXED_LOCALS_OFFSET));
#endif
		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
	}
#endif

	SLJIT_ASSERT(local_size > 0);
	return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
		SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size);
}
Пример #4
0
int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size)
{
	int size;
	sljit_ub *buf;

	CHECK_ERROR();
	check_sljit_emit_enter(compiler, args, temporaries, generals, local_size);

	compiler->temporaries = temporaries;
	compiler->generals = generals;
	compiler->args = args;
	compiler->flags_saved = 0;

#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
	size = 1 + (generals <= 3 ? generals : 3) + (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0);
#else
	size = 1 + (generals <= 3 ? generals : 3) + (args > 0 ? (2 + args * 3) : 0);
#endif
	buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
	FAIL_IF(!buf);

	INC_SIZE(size);
	PUSH_REG(reg_map[TMP_REGISTER]);
#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
	if (args > 0) {
		*buf++ = 0x8b;
		*buf++ = 0xc4 | (reg_map[TMP_REGISTER] << 3);
	}
#endif
	if (generals > 2)
		PUSH_REG(reg_map[SLJIT_GENERAL_REG3]);
	if (generals > 1)
		PUSH_REG(reg_map[SLJIT_GENERAL_REG2]);
	if (generals > 0)
		PUSH_REG(reg_map[SLJIT_GENERAL_REG1]);

#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
	if (args > 0) {
		*buf++ = 0x8b;
		*buf++ = 0xc0 | (reg_map[SLJIT_GENERAL_REG1] << 3) | reg_map[SLJIT_TEMPORARY_REG3];
	}
	if (args > 1) {
		*buf++ = 0x8b;
		*buf++ = 0xc0 | (reg_map[SLJIT_GENERAL_REG2] << 3) | reg_map[SLJIT_TEMPORARY_REG2];
	}
	if (args > 2) {
		*buf++ = 0x8b;
		*buf++ = 0x44 | (reg_map[SLJIT_GENERAL_REG3] << 3);
		*buf++ = 0x24;
		*buf++ = sizeof(sljit_w) * (3 + 2); /* generals >= 3 as well. */
	}
#else
	if (args > 0) {
		*buf++ = 0x8b;
		*buf++ = 0x40 | (reg_map[SLJIT_GENERAL_REG1] << 3) | reg_map[TMP_REGISTER];
		*buf++ = sizeof(sljit_w) * 2;
	}
	if (args > 1) {
		*buf++ = 0x8b;
		*buf++ = 0x40 | (reg_map[SLJIT_GENERAL_REG2] << 3) | reg_map[TMP_REGISTER];
		*buf++ = sizeof(sljit_w) * 3;
	}
	if (args > 2) {
		*buf++ = 0x8b;
		*buf++ = 0x40 | (reg_map[SLJIT_GENERAL_REG3] << 3) | reg_map[TMP_REGISTER];
		*buf++ = sizeof(sljit_w) * 4;
	}
#endif

	local_size = (local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1);
	compiler->temporaries_start = local_size;
	if (temporaries > 3)
		local_size += (temporaries - 3) * sizeof(sljit_uw);
	compiler->generals_start = local_size;
	if (generals > 3)
		local_size += (generals - 3) * sizeof(sljit_uw);

#ifdef _WIN32
	if (local_size > 1024) {
		FAIL_IF(emit_do_imm(compiler, 0xb8 + reg_map[SLJIT_TEMPORARY_REG1], local_size));
		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_touch_stack)));
	}
#endif

	compiler->local_size = local_size;
	if (local_size > 0)
		return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d,
			SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size);

	/* Mov arguments to general registers. */
	return SLJIT_SUCCESS;
}
Пример #5
0
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
{
	sljit_si size, pushed_size;
	sljit_ub *inst;

	CHECK_ERROR();
	check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);

	compiler->scratches = scratches;
	compiler->saveds = saveds;
	compiler->flags_saved = 0;
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
	compiler->logical_local_size = local_size;
#endif

	size = saveds;
	/* Including the return address saved by the call instruction. */
	pushed_size = (saveds + 1) * sizeof(sljit_sw);
#ifndef _WIN64
	if (saveds >= 2)
		size += saveds - 1;
#else
	if (saveds >= 4)
		size += saveds - 3;
	if (scratches >= 5) {
		size += (5 - 4) * 2;
		pushed_size += sizeof(sljit_sw);
	}
#endif
	size += args * 3;
	if (size > 0) {
		inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
		FAIL_IF(!inst);

		INC_SIZE(size);
		if (saveds >= 5) {
			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG2] >= 8, saved_ereg2_is_hireg);
			*inst++ = REX_B;
			PUSH_REG(reg_lmap[SLJIT_SAVED_EREG2]);
		}
		if (saveds >= 4) {
			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG1] >= 8, saved_ereg1_is_hireg);
			*inst++ = REX_B;
			PUSH_REG(reg_lmap[SLJIT_SAVED_EREG1]);
		}
		if (saveds >= 3) {
#ifndef _WIN64
			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] >= 8, saved_reg3_is_hireg);
			*inst++ = REX_B;
#else
			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] < 8, saved_reg3_is_loreg);
#endif
			PUSH_REG(reg_lmap[SLJIT_SAVED_REG3]);
		}
		if (saveds >= 2) {
#ifndef _WIN64
			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] >= 8, saved_reg2_is_hireg);
			*inst++ = REX_B;
#else
			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] < 8, saved_reg2_is_loreg);
#endif
			PUSH_REG(reg_lmap[SLJIT_SAVED_REG2]);
		}
		if (saveds >= 1) {
			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG1] < 8, saved_reg1_is_loreg);
			PUSH_REG(reg_lmap[SLJIT_SAVED_REG1]);
		}
#ifdef _WIN64
		if (scratches >= 5) {
			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_EREG2] >= 8, temporary_ereg2_is_hireg);
			*inst++ = REX_B;
			PUSH_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
		}
#endif

#ifndef _WIN64
		if (args > 0) {
			*inst++ = REX_W;
			*inst++ = MOV_r_rm;
			*inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x7 /* rdi */;
		}
		if (args > 1) {
			*inst++ = REX_W | REX_R;
			*inst++ = MOV_r_rm;
			*inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG2] << 3) | 0x6 /* rsi */;
		}
		if (args > 2) {
			*inst++ = REX_W | REX_R;
			*inst++ = MOV_r_rm;
			*inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG3] << 3) | 0x2 /* rdx */;
		}
#else
		if (args > 0) {
			*inst++ = REX_W;
			*inst++ = MOV_r_rm;
			*inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x1 /* rcx */;
		}
		if (args > 1) {
			*inst++ = REX_W;
			*inst++ = MOV_r_rm;
			*inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG2] << 3) | 0x2 /* rdx */;
		}
		if (args > 2) {
			*inst++ = REX_W | REX_B;
			*inst++ = MOV_r_rm;
			*inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x0 /* r8 */;
		}
#endif
	}

	local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size;
	compiler->local_size = local_size;
#ifdef _WIN64
	if (local_size > 1024) {
		/* Allocate stack for the callback, which grows the stack. */
		inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_si)));
		FAIL_IF(!inst);
		INC_SIZE(4 + (3 + sizeof(sljit_si)));
		*inst++ = REX_W;
		*inst++ = GROUP_BINARY_83;
		*inst++ = MOD_REG | SUB | 4;
		/* Pushed size must be divisible by 8. */
		SLJIT_ASSERT(!(pushed_size & 0x7));
		if (pushed_size & 0x8) {
			*inst++ = 5 * sizeof(sljit_sw);
			local_size -= 5 * sizeof(sljit_sw);
		} else {
			*inst++ = 4 * sizeof(sljit_sw);
			local_size -= 4 * sizeof(sljit_sw);
		}
		/* Second instruction */
		SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] < 8, temporary_reg1_is_loreg);
		*inst++ = REX_W;
		*inst++ = MOV_rm_i32;
		*inst++ = MOD_REG | reg_lmap[SLJIT_SCRATCH_REG1];
		*(sljit_si*)inst = local_size;
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
		compiler->skip_checks = 1;
#endif
		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
	}
#endif
	SLJIT_ASSERT(local_size > 0);
	if (local_size <= 127) {
		inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
		FAIL_IF(!inst);
		INC_SIZE(4);
		*inst++ = REX_W;
		*inst++ = GROUP_BINARY_83;
		*inst++ = MOD_REG | SUB | 4;
		*inst++ = local_size;
	}
	else {
		inst = (sljit_ub*)ensure_buf(compiler, 1 + 7);
		FAIL_IF(!inst);
		INC_SIZE(7);
		*inst++ = REX_W;
		*inst++ = GROUP_BINARY_81;
		*inst++ = MOD_REG | SUB | 4;
		*(sljit_si*)inst = local_size;
		inst += sizeof(sljit_si);
	}
#ifdef _WIN64
	/* Save xmm6 with MOVAPS instruction. */
	inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
	FAIL_IF(!inst);
	INC_SIZE(5);
	*inst++ = GROUP_0F;
	*(sljit_si*)inst = 0x20247429;
#endif

	return SLJIT_SUCCESS;
}
Пример #6
0
SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
{
	int size, pushed_size;
	sljit_ub *buf;

	CHECK_ERROR();
	check_sljit_emit_enter(compiler, args, temporaries, saveds, local_size);

	compiler->temporaries = temporaries;
	compiler->saveds = saveds;
	compiler->flags_saved = 0;

	size = saveds;
	/* Including the return address saved by the call instruction. */
	pushed_size = (saveds + 1) * sizeof(sljit_w);
#ifndef _WIN64
	if (saveds >= 2)
		size += saveds - 1;
#else
	/* Saving the virtual stack pointer. */
	compiler->has_locals = local_size > 0;
	if (local_size > 0) {
		size += 2;
		pushed_size += sizeof(sljit_w);
	}
	if (saveds >= 4)
		size += saveds - 3;
	if (temporaries >= 5) {
		size += (5 - 4) * 2;
		pushed_size += sizeof(sljit_w);
	}
#endif
	size += args * 3;
	if (size > 0) {
		buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
		FAIL_IF(!buf);

		INC_SIZE(size);
		if (saveds >= 5) {
			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG2] >= 8, saved_ereg2_is_hireg);
			*buf++ = REX_B;
			PUSH_REG(reg_lmap[SLJIT_SAVED_EREG2]);
		}
		if (saveds >= 4) {
			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG1] >= 8, saved_ereg1_is_hireg);
			*buf++ = REX_B;
			PUSH_REG(reg_lmap[SLJIT_SAVED_EREG1]);
		}
		if (saveds >= 3) {
#ifndef _WIN64
			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] >= 8, saved_reg3_is_hireg);
			*buf++ = REX_B;
#else
			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] < 8, saved_reg3_is_loreg);
#endif
			PUSH_REG(reg_lmap[SLJIT_SAVED_REG3]);
		}
		if (saveds >= 2) {
#ifndef _WIN64
			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] >= 8, saved_reg2_is_hireg);
			*buf++ = REX_B;
#else
			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] < 8, saved_reg2_is_loreg);
#endif
			PUSH_REG(reg_lmap[SLJIT_SAVED_REG2]);
		}
		if (saveds >= 1) {
			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG1] < 8, saved_reg1_is_loreg);
			PUSH_REG(reg_lmap[SLJIT_SAVED_REG1]);
		}
#ifdef _WIN64
		if (temporaries >= 5) {
			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_EREG2] >= 8, temporary_ereg2_is_hireg);
			*buf++ = REX_B;
			PUSH_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
		}
		if (local_size > 0) {
			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_LOCALS_REG] >= 8, locals_reg_is_hireg);
			*buf++ = REX_B;
			PUSH_REG(reg_lmap[SLJIT_LOCALS_REG]);
		}
#endif

#ifndef _WIN64
		if (args > 0) {
			*buf++ = REX_W;
			*buf++ = 0x8b;
			*buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x7;
		}
		if (args > 1) {
			*buf++ = REX_W | REX_R;
			*buf++ = 0x8b;
			*buf++ = 0xc0 | (reg_lmap[SLJIT_SAVED_REG2] << 3) | 0x6;
		}
		if (args > 2) {
			*buf++ = REX_W | REX_R;
			*buf++ = 0x8b;
			*buf++ = 0xc0 | (reg_lmap[SLJIT_SAVED_REG3] << 3) | 0x2;
		}
#else
		if (args > 0) {
			*buf++ = REX_W;
			*buf++ = 0x8b;
			*buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x1;
		}
		if (args > 1) {
			*buf++ = REX_W;
			*buf++ = 0x8b;
			*buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG2] << 3) | 0x2;
		}
		if (args > 2) {
			*buf++ = REX_W | REX_B;
			*buf++ = 0x8b;
			*buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x0;
		}
#endif
	}

	local_size = ((local_size + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size;
#ifdef _WIN64
	local_size += 4 * sizeof(sljit_w);
	compiler->local_size = local_size;
	if (local_size > 1024) {
		/* Allocate the stack for the function itself. */
		buf = (sljit_ub*)ensure_buf(compiler, 1 + 4);
		FAIL_IF(!buf);
		INC_SIZE(4);
		*buf++ = REX_W;
		*buf++ = 0x83;
		*buf++ = 0xc0 | (5 << 3) | 4;
		/* Pushed size must be divisible by 8. */
		SLJIT_ASSERT(!(pushed_size & 0x7));
		if (pushed_size & 0x8) {
			*buf++ = 5 * sizeof(sljit_w);
			local_size -= 5 * sizeof(sljit_w);
		} else {
			*buf++ = 4 * sizeof(sljit_w);
			local_size -= 4 * sizeof(sljit_w);
		}
		FAIL_IF(emit_load_imm64(compiler, SLJIT_TEMPORARY_REG1, local_size));
		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_touch_stack)));
	}
#else
	compiler->local_size = local_size;
	if (local_size > 0) {
#endif
		/* In case of Win64, local_size is always > 4 * sizeof(sljit_w) */
		if (local_size <= 127) {
			buf = (sljit_ub*)ensure_buf(compiler, 1 + 4);
			FAIL_IF(!buf);
			INC_SIZE(4);
			*buf++ = REX_W;
			*buf++ = 0x83;
			*buf++ = 0xc0 | (5 << 3) | 4;
			*buf++ = local_size;
		}
		else {
			buf = (sljit_ub*)ensure_buf(compiler, 1 + 7);
			FAIL_IF(!buf);
			INC_SIZE(7);
			*buf++ = REX_W;
			*buf++ = 0x81;
			*buf++ = 0xc0 | (5 << 3) | 4;
			*(sljit_hw*)buf = local_size;
			buf += sizeof(sljit_hw);
		}
#ifndef _WIN64
	}
#endif

#ifdef _WIN64
	if (compiler->has_locals) {
		buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
		FAIL_IF(!buf);
		INC_SIZE(5);
		*buf++ = REX_W | REX_R;
		*buf++ = 0x8d;
		*buf++ = 0x40 | (reg_lmap[SLJIT_LOCALS_REG] << 3) | 0x4;
		*buf++ = 0x24;
		*buf = 4 * sizeof(sljit_w);
	}
#endif

	return SLJIT_SUCCESS;
}
Пример #7
0
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
    sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
{
    sljit_si size;
    sljit_ub *inst;

    CHECK_ERROR();
    CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
    set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);

    compiler->args = args;
    compiler->flags_saved = 0;

    size = 1 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3);
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0);
#else
    size += (args > 0 ? (2 + args * 3) : 0);
#endif
    inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
    FAIL_IF(!inst);

    INC_SIZE(size);
    PUSH_REG(reg_map[TMP_REG1]);
#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    if (args > 0) {
        *inst++ = MOV_r_rm;
        *inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */;
    }
#endif
    if (saveds > 2 || scratches > 7)
        PUSH_REG(reg_map[SLJIT_S2]);
    if (saveds > 1 || scratches > 8)
        PUSH_REG(reg_map[SLJIT_S1]);
    if (saveds > 0 || scratches > 9)
        PUSH_REG(reg_map[SLJIT_S0]);

#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    if (args > 0) {
        *inst++ = MOV_r_rm;
        *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
    }
    if (args > 1) {
        *inst++ = MOV_r_rm;
        *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
    }
    if (args > 2) {
        *inst++ = MOV_r_rm;
        *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
        *inst++ = 0x24;
        *inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
    }
#else
    if (args > 0) {
        *inst++ = MOV_r_rm;
        *inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
        *inst++ = sizeof(sljit_sw) * 2;
    }
    if (args > 1) {
        *inst++ = MOV_r_rm;
        *inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
        *inst++ = sizeof(sljit_sw) * 3;
    }
    if (args > 2) {
        *inst++ = MOV_r_rm;
        *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
        *inst++ = sizeof(sljit_sw) * 4;
    }
#endif

    SLJIT_COMPILE_ASSERT(SLJIT_LOCALS_OFFSET >= (2 + 4) * sizeof(sljit_uw), require_at_least_two_words);
#if defined(__APPLE__)
    /* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */
    saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
    local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
#else
    if (options & SLJIT_DOUBLE_ALIGNMENT) {
        local_size = SLJIT_LOCALS_OFFSET + ((local_size + 7) & ~7);

        inst = (sljit_ub*)ensure_buf(compiler, 1 + 17);
        FAIL_IF(!inst);

        INC_SIZE(17);
        inst[0] = MOV_r_rm;
        inst[1] = MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[SLJIT_SP];
        inst[2] = GROUP_F7;
        inst[3] = MOD_REG | (0 << 3) | reg_map[SLJIT_SP];
        *(sljit_sw*)(inst + 4) = 0x4;
        inst[8] = JNE_i8;
        inst[9] = 6;
        inst[10] = GROUP_BINARY_81;
        inst[11] = MOD_REG | (5 << 3) | reg_map[SLJIT_SP];
        *(sljit_sw*)(inst + 12) = 0x4;
        inst[16] = PUSH_r + reg_map[TMP_REG1];
    }
    else
        local_size = SLJIT_LOCALS_OFFSET + ((local_size + 3) & ~3);
#endif

    compiler->local_size = local_size;
#ifdef _WIN32
    if (local_size > 1024) {
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
        FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
#else
        local_size -= SLJIT_LOCALS_OFFSET;
        FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
        FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
            SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, SLJIT_LOCALS_OFFSET));
#endif
        FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
    }
#endif

    SLJIT_ASSERT(local_size > 0);
    return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
        SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size);
}