Пример #1
0
gpointer
mono_arch_get_gsharedvt_trampoline (MonoTrampInfo **info, gboolean aot)
{
	guint8 *code, *buf;
	int buf_len, cfa_offset;
	GSList *unwind_ops = NULL;
	MonoJumpInfo *ji = NULL;
	guint8 *br_out, *br [16];
	int info_offset, mrgctx_offset;

	buf_len = 320;
	buf = code = mono_global_codeman_reserve (buf_len);

	/*
	 * This trampoline is responsible for marshalling calls between normal code and gsharedvt code. The
	 * caller is a normal or gshared method which uses the signature of the inflated method to make the call, while
	 * the callee is a gsharedvt method which has a signature which uses valuetypes in place of type parameters, i.e.
	 * caller:
	 * foo<bool> (bool b)
	 * callee:
	 * T=<type used to represent vtype type arguments, currently TypedByRef>
	 * foo<T> (T b)
	 * The trampoline is responsible for marshalling the arguments and marshalling the result back. To simplify
	 * things, we create our own stack frame, and do most of the work in a C function, which receives a
	 * GSharedVtCallInfo structure as an argument. The structure should contain information to execute the C function to
	 * be as fast as possible. The argument is received in EAX from a gsharedvt trampoline. So the real
	 * call sequence looks like this:
	 * caller -> gsharedvt trampoline -> gsharevt in trampoline -> start_gsharedvt_call
	 * FIXME: Optimize this.
	 */

	cfa_offset = sizeof (gpointer);
	mono_add_unwind_op_def_cfa (unwind_ops, code, buf, X86_ESP, cfa_offset);
	mono_add_unwind_op_offset (unwind_ops, code, buf, X86_NREG, -cfa_offset);
	x86_push_reg (code, X86_EBP);
	cfa_offset += sizeof (gpointer);
	mono_add_unwind_op_def_cfa_offset (unwind_ops, code, buf, cfa_offset);
	mono_add_unwind_op_offset (unwind_ops, code, buf, X86_EBP, - cfa_offset);
	x86_mov_reg_reg (code, X86_EBP, X86_ESP, sizeof (gpointer));
	mono_add_unwind_op_def_cfa_reg (unwind_ops, code, buf, X86_EBP);
	/* Alloc stack frame/align stack */
	x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
	info_offset = -4;
	mrgctx_offset = - 8;
	/* The info struct is put into EAX by the gsharedvt trampoline */
	/* Save info struct addr */
	x86_mov_membase_reg (code, X86_EBP, info_offset, X86_EAX, 4);
	/* Save rgctx */
	x86_mov_membase_reg (code, X86_EBP, mrgctx_offset, MONO_ARCH_RGCTX_REG, 4);

	/* Allocate stack area used to pass arguments to the method */
	x86_mov_reg_membase (code, X86_EAX, X86_EAX, MONO_STRUCT_OFFSET (GSharedVtCallInfo, stack_usage), sizeof (gpointer));
	x86_alu_reg_reg (code, X86_SUB, X86_ESP, X86_EAX);

#if 0
	/* Stack alignment check */
	x86_mov_reg_reg (code, X86_ECX, X86_ESP, 4);
	x86_alu_reg_imm (code, X86_AND, X86_ECX, MONO_ARCH_FRAME_ALIGNMENT - 1);
	x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
	x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
	x86_breakpoint (code);
#endif

	/* ecx = caller argument area */
	x86_mov_reg_reg (code, X86_ECX, X86_EBP, 4);
	x86_alu_reg_imm (code, X86_ADD, X86_ECX, 8);
	/* eax = callee argument area */
	x86_mov_reg_reg (code, X86_EAX, X86_ESP, 4);

	/* Call start_gsharedvt_call */
	/* Arg 4 */
	x86_push_membase (code, X86_EBP, mrgctx_offset);
	/* Arg3 */
	x86_push_reg (code, X86_EAX);
	/* Arg2 */
	x86_push_reg (code, X86_ECX);
	/* Arg1 */
	x86_push_membase (code, X86_EBP, info_offset);
	if (aot) {
		code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_x86_start_gsharedvt_call");
		x86_call_reg (code, X86_EAX);
	} else {
		x86_call_code (code, mono_x86_start_gsharedvt_call);
	}
	x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4 * 4);
	/* The address to call is in eax */
	/* The stack is now setup for the real call */
	/* Load info struct */
	x86_mov_reg_membase (code, X86_ECX, X86_EBP, info_offset, 4);
	/* Load rgctx */
	x86_mov_reg_membase (code, MONO_ARCH_RGCTX_REG, X86_EBP, mrgctx_offset, sizeof (gpointer));
	/* Make the call */
	x86_call_reg (code, X86_EAX);
	/* The return value is either in registers, or stored to an area beginning at sp [info->vret_slot] */
	/* EAX/EDX might contain the return value, only ECX is free */
	/* Load info struct */
	x86_mov_reg_membase (code, X86_ECX, X86_EBP, info_offset, 4);

	/* Branch to the in/out handling code */
	x86_alu_membase_imm (code, X86_CMP, X86_ECX, MONO_STRUCT_OFFSET (GSharedVtCallInfo, gsharedvt_in), 1);	
	br_out = code;
	x86_branch32 (code, X86_CC_NE, 0, TRUE);

	/*
	 * IN CASE
	 */

	/* Load ret marshal type */
	x86_mov_reg_membase (code, X86_ECX, X86_ECX, MONO_STRUCT_OFFSET (GSharedVtCallInfo, ret_marshal), 4);
	x86_alu_reg_imm (code, X86_CMP, X86_ECX, GSHAREDVT_RET_NONE);
	br [0] = code;
	x86_branch8 (code, X86_CC_NE, 0, TRUE);

	/* Normal return, no marshalling required */
	x86_leave (code);
	x86_ret (code);

	/* Return value marshalling */
	x86_patch (br [0], code);
	/* Load info struct */
	x86_mov_reg_membase (code, X86_EAX, X86_EBP, info_offset, 4);
	/* Load 'vret_slot' */
	x86_mov_reg_membase (code, X86_EAX, X86_EAX, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_slot), 4);
	/* Compute ret area address */
	x86_shift_reg_imm (code, X86_SHL, X86_EAX, 2);
	x86_alu_reg_reg (code, X86_ADD, X86_EAX, X86_ESP);
	/* The callee does a ret $4, so sp is off by 4 */
	x86_alu_reg_imm (code, X86_SUB, X86_EAX, sizeof (gpointer));

	/* Branch to specific marshalling code */
	// FIXME: Move the I4 case to the top */
	x86_alu_reg_imm (code, X86_CMP, X86_ECX, GSHAREDVT_RET_DOUBLE_FPSTACK);
	br [1] = code;
	x86_branch8 (code, X86_CC_E, 0, TRUE);
	x86_alu_reg_imm (code, X86_CMP, X86_ECX, GSHAREDVT_RET_FLOAT_FPSTACK);
	br [2] = code;
	x86_branch8 (code, X86_CC_E, 0, TRUE);
	x86_alu_reg_imm (code, X86_CMP, X86_ECX, GSHAREDVT_RET_STACK_POP);
	br [3] = code;
	x86_branch8 (code, X86_CC_E, 0, TRUE);
	x86_alu_reg_imm (code, X86_CMP, X86_ECX, GSHAREDVT_RET_I1);
	br [4] = code;
	x86_branch8 (code, X86_CC_E, 0, TRUE);
	x86_alu_reg_imm (code, X86_CMP, X86_ECX, GSHAREDVT_RET_U1);
	br [5] = code;
	x86_branch8 (code, X86_CC_E, 0, TRUE);
	x86_alu_reg_imm (code, X86_CMP, X86_ECX, GSHAREDVT_RET_I2);
	br [6] = code;
	x86_branch8 (code, X86_CC_E, 0, TRUE);
	x86_alu_reg_imm (code, X86_CMP, X86_ECX, GSHAREDVT_RET_U2);
	br [7] = code;
	x86_branch8 (code, X86_CC_E, 0, TRUE);
	/* IREGS case */
	/* Load both eax and edx for simplicity */
	x86_mov_reg_membase (code, X86_EDX, X86_EAX, sizeof (gpointer), sizeof (gpointer));
	x86_mov_reg_membase (code, X86_EAX, X86_EAX, 0, sizeof (gpointer));
	x86_leave (code);
	x86_ret (code);
	/* DOUBLE_FPSTACK case */
	x86_patch (br [1], code);
	x86_fld_membase (code, X86_EAX, 0, TRUE);
	x86_jump8 (code, 0);
	x86_leave (code);
	x86_ret (code);
	/* FLOAT_FPSTACK case */
	x86_patch (br [2], code);
	x86_fld_membase (code, X86_EAX, 0, FALSE);
	x86_leave (code);
	x86_ret (code);
	/* STACK_POP case */
	x86_patch (br [3], code);
	x86_leave (code);
	x86_ret_imm (code, 4);
	/* I1 case */
	x86_patch (br [4], code);
	x86_widen_membase (code, X86_EAX, X86_EAX, 0, TRUE, FALSE);
	x86_leave (code);
	x86_ret (code);
	/* U1 case */
	x86_patch (br [5], code);
	x86_widen_membase (code, X86_EAX, X86_EAX, 0, FALSE, FALSE);
	x86_leave (code);
	x86_ret (code);
	/* I2 case */
	x86_patch (br [6], code);
	x86_widen_membase (code, X86_EAX, X86_EAX, 0, TRUE, TRUE);
	x86_leave (code);
	x86_ret (code);
	/* U2 case */
	x86_patch (br [7], code);
	x86_widen_membase (code, X86_EAX, X86_EAX, 0, FALSE, TRUE);
	x86_leave (code);
	x86_ret (code);

	/*
	 * OUT CASE
	 */

	x86_patch (br_out, code);
	/* Load ret marshal type into ECX */
	x86_mov_reg_membase (code, X86_ECX, X86_ECX, MONO_STRUCT_OFFSET (GSharedVtCallInfo, ret_marshal), 4);
	x86_alu_reg_imm (code, X86_CMP, X86_ECX, GSHAREDVT_RET_NONE);
	br [0] = code;
	x86_branch8 (code, X86_CC_NE, 0, TRUE);

	/* Normal return, no marshalling required */
	x86_leave (code);
	x86_ret (code);

	/* Return value marshalling */
	x86_patch (br [0], code);

	/* EAX might contain the return value */
	// FIXME: Use moves
	x86_push_reg (code, X86_EAX);

	/* Load info struct */
	x86_mov_reg_membase (code, X86_EAX, X86_EBP, info_offset, 4);
	/* Load 'vret_arg_slot' */
	x86_mov_reg_membase (code, X86_EAX, X86_EAX, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_arg_slot), 4);
	/* Compute ret area address in the caller frame in EAX */
	x86_shift_reg_imm (code, X86_SHL, X86_EAX, 2);
	x86_alu_reg_reg (code, X86_ADD, X86_EAX, X86_EBP);
	x86_alu_reg_imm (code, X86_ADD, X86_EAX, 8);
	x86_mov_reg_membase (code, X86_EAX, X86_EAX, 0, sizeof (gpointer));

	/* Branch to specific marshalling code */
	x86_alu_reg_imm (code, X86_CMP, X86_ECX, GSHAREDVT_RET_DOUBLE_FPSTACK);
	br [1] = code;
	x86_branch8 (code, X86_CC_E, 0, TRUE);
	x86_alu_reg_imm (code, X86_CMP, X86_ECX, GSHAREDVT_RET_FLOAT_FPSTACK);
	br [2] = code;
	x86_branch8 (code, X86_CC_E, 0, TRUE);
	x86_alu_reg_imm (code, X86_CMP, X86_ECX, GSHAREDVT_RET_STACK_POP);
	br [3] = code;
	x86_branch8 (code, X86_CC_E, 0, TRUE);
	x86_alu_reg_imm (code, X86_CMP, X86_ECX, GSHAREDVT_RET_IREGS);
	br [4] = code;
	x86_branch8 (code, X86_CC_E, 0, TRUE);
	/* IREG case */
	x86_mov_reg_reg (code, X86_ECX, X86_EAX, sizeof (gpointer));
	x86_pop_reg (code, X86_EAX);
	x86_mov_membase_reg (code, X86_ECX, 0, X86_EAX, sizeof (gpointer));
	x86_leave (code);
	x86_ret_imm (code, 4);
	/* IREGS case */
	x86_patch (br [4], code);
	x86_mov_reg_reg (code, X86_ECX, X86_EAX, sizeof (gpointer));
	x86_pop_reg (code, X86_EAX);
	x86_mov_membase_reg (code, X86_ECX, sizeof (gpointer), X86_EDX, sizeof (gpointer));
	x86_mov_membase_reg (code, X86_ECX, 0, X86_EAX, sizeof (gpointer));
	x86_leave (code);
	x86_ret_imm (code, 4);
	/* DOUBLE_FPSTACK case */
	x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
	x86_patch (br [1], code);
	x86_fst_membase (code, X86_EAX, 0, TRUE, TRUE);
	x86_jump8 (code, 0);
	x86_leave (code);
	x86_ret_imm (code, 4);
	/* FLOAT_FPSTACK case */
	x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
	x86_patch (br [2], code);
	x86_fst_membase (code, X86_EAX, 0, FALSE, TRUE);
	x86_leave (code);
	x86_ret_imm (code, 4);
	/* STACK_POP case */
	x86_patch (br [3], code);
	x86_leave (code);
	x86_ret_imm (code, 4);

	g_assert ((code - buf) < buf_len);

	if (info)
		*info = mono_tramp_info_create ("gsharedvt_trampoline", buf, code - buf, ji, unwind_ops);

	mono_arch_flush_icache (buf, code - buf);
	return buf;
}
Пример #2
0
gpointer
mono_arch_get_gsharedvt_trampoline (MonoTrampInfo **info, gboolean aot)
{
	guint8 *code, *buf;
	int buf_len, cfa_offset;
	GSList *unwind_ops = NULL;
	MonoJumpInfo *ji = NULL;
	int n_arg_regs, n_arg_fregs, framesize, i;
	int info_offset, offset, rgctx_arg_reg_offset;
	int caller_reg_area_offset, callee_reg_area_offset, callee_stack_area_offset;
	guint8 *br_out, *br [64], *br_ret [64];
	int b_ret_index;
	int reg_area_size;

	buf_len = 2048;
	buf = code = mono_global_codeman_reserve (buf_len + MONO_MAX_TRAMPOLINE_UNWINDINFO_SIZE);

	/*
	 * We are being called by an gsharedvt arg trampoline, the info argument is in AMD64_RAX.
	 */
	n_arg_regs = PARAM_REGS;
	n_arg_fregs = FLOAT_PARAM_REGS;

	/* Compute stack frame size and offsets */
	offset = 0;
	/* info reg */
	info_offset = offset;
	offset += 8;

	/* rgctx reg */
	rgctx_arg_reg_offset = offset;
	offset += 8;

	/*callconv in regs */
	caller_reg_area_offset = offset;
	reg_area_size = ALIGN_TO ((n_arg_regs + n_arg_fregs) * 8, MONO_ARCH_FRAME_ALIGNMENT);
	offset += reg_area_size;

	framesize = offset;

	g_assert (framesize % MONO_ARCH_FRAME_ALIGNMENT == 0);
	g_assert (reg_area_size % MONO_ARCH_FRAME_ALIGNMENT == 0);

	/* unwind markers 1/3 */
	cfa_offset = sizeof (gpointer);
	mono_add_unwind_op_def_cfa (unwind_ops, code, buf, AMD64_RSP, cfa_offset);
	mono_add_unwind_op_offset (unwind_ops, code, buf, AMD64_RIP, -cfa_offset);

	/* save the old frame pointer */
	amd64_push_reg (code, AMD64_RBP);

	/* unwind markers 2/3 */
	cfa_offset += sizeof (gpointer);
	mono_add_unwind_op_def_cfa_offset (unwind_ops, code, buf, cfa_offset);
	mono_add_unwind_op_offset (unwind_ops, code, buf, AMD64_RBP, - cfa_offset);

	/* set it as the new frame pointer */
	amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, sizeof(mgreg_t));

	/* unwind markers 3/3 */
	mono_add_unwind_op_def_cfa_reg (unwind_ops, code, buf, AMD64_RBP);
	mono_add_unwind_op_fp_alloc (unwind_ops, code, buf, AMD64_RBP, 0);

	/* setup the frame */
	amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, framesize);
	
	/* save stuff */

	/* save info */
	amd64_mov_membase_reg (code, AMD64_RSP, info_offset, AMD64_RAX, sizeof (mgreg_t));
	/* save rgctx */
	amd64_mov_membase_reg (code, AMD64_RSP, rgctx_arg_reg_offset, MONO_ARCH_RGCTX_REG, sizeof (mgreg_t));

	for (i = 0; i < n_arg_regs; ++i)
		amd64_mov_membase_reg (code, AMD64_RSP, caller_reg_area_offset + i * 8, param_regs [i], sizeof (mgreg_t));

	for (i = 0; i < n_arg_fregs; ++i)
		amd64_sse_movsd_membase_reg (code, AMD64_RSP, caller_reg_area_offset + (i + n_arg_regs) * 8, i);

	/* TODO Allocate stack area used to pass arguments to the method */


	/* Allocate callee register area just below the caller area so it can be accessed from start_gsharedvt_call using negative offsets */
	/* XXX figure out alignment */
	callee_reg_area_offset = reg_area_size - ((n_arg_regs + n_arg_fregs) * 8); /* Ensure alignment */
	callee_stack_area_offset = callee_reg_area_offset + reg_area_size;
	amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, reg_area_size);

	/* Allocate stack area used to pass arguments to the method */
	amd64_mov_reg_membase (code, AMD64_R11, AMD64_RAX, MONO_STRUCT_OFFSET (GSharedVtCallInfo, stack_usage), 4);
	amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, AMD64_R11);

	/* The stack now looks like this:

	<caller stack params area>
	<return address>
	<old frame pointer>
	<caller registers area>
	<rgctx>
	<gsharedvt info>
	<callee stack area>
	<callee reg area>
	 */

	/* Call start_gsharedvt_call () */
	/* arg1 == info */
	amd64_mov_reg_reg (code, MONO_AMD64_ARG_REG1, AMD64_RAX, sizeof(mgreg_t));
	/* arg2 = caller stack area */
	amd64_lea_membase (code, MONO_AMD64_ARG_REG2, AMD64_RBP, -(framesize - caller_reg_area_offset)); 

	/* arg3 == callee stack area */
	amd64_lea_membase (code, MONO_AMD64_ARG_REG3, AMD64_RSP, callee_reg_area_offset);

	/* arg4 = mrgctx reg */
	amd64_mov_reg_reg (code, MONO_AMD64_ARG_REG4, MONO_ARCH_RGCTX_REG, sizeof(mgreg_t));

	if (aot) {
		code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_amd64_start_gsharedvt_call");
		#ifdef TARGET_WIN32
			/* Since we are doing a call as part of setting up stackframe, the reserved shadow stack used by Windows platform is allocated up in
			the callee stack area but currently the callee reg area is in between. Windows calling convention dictates that room is made on stack where
			callee can save any parameters passed in registers. Since Windows x64 calling convention
			uses 4 registers for the first 4 parameters, stack needs to be adjusted before making the call.
			NOTE, Windows calling convention assumes that space for all registers have been reserved, regardless
			of the number of function parameters actually used.
			*/
			int shadow_reg_size = 0;

			shadow_reg_size = ALIGN_TO (PARAM_REGS * sizeof(gpointer), MONO_ARCH_FRAME_ALIGNMENT);
			amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, shadow_reg_size);
			amd64_call_reg (code, AMD64_R11);
			amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, shadow_reg_size);
		#else
			amd64_call_reg (code, AMD64_R11);
		#endif
	} else {
		amd64_call_code (code, mono_amd64_start_gsharedvt_call);
	}

	/* Method to call is now on RAX. Restore regs and jump */
	amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, sizeof(mgreg_t));

	for (i = 0; i < n_arg_regs; ++i)
		amd64_mov_reg_membase (code, param_regs [i], AMD64_RSP, callee_reg_area_offset + i * 8, sizeof (mgreg_t));

	for (i = 0; i < n_arg_fregs; ++i)
		amd64_sse_movsd_reg_membase (code, i, AMD64_RSP, callee_reg_area_offset + (i + n_arg_regs) * 8);

	//load rgctx
	amd64_mov_reg_membase (code, MONO_ARCH_RGCTX_REG, AMD64_RBP, -(framesize - rgctx_arg_reg_offset), sizeof (mgreg_t));

	/* Clear callee reg area */
	amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, reg_area_size);

	/* Call the thing */
	amd64_call_reg (code, AMD64_R11);

	/* Marshal return value. Available registers: R10 and R11 */
	/* Load info struct */
	amd64_mov_reg_membase (code, AMD64_R10, AMD64_RBP, -(framesize - info_offset), sizeof (mgreg_t));

	/* Branch to the in/out handling code */
	amd64_alu_membase_imm_size (code, X86_CMP, AMD64_R10, MONO_STRUCT_OFFSET (GSharedVtCallInfo, gsharedvt_in), 1, 4);

	b_ret_index = 0;
	br_out = code;
	x86_branch32 (code, X86_CC_NE, 0, TRUE);

	/*
	 * IN CASE
	 */

	/* Load vret_slot */
	/* Use first input parameter register as scratch since it is volatile on all platforms */
	amd64_mov_reg_membase (code, MONO_AMD64_ARG_REG1, AMD64_R10, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_slot), 4);
	amd64_alu_reg_imm (code, X86_SUB, MONO_AMD64_ARG_REG1, n_arg_regs + n_arg_fregs);
	amd64_shift_reg_imm (code, X86_SHL, MONO_AMD64_ARG_REG1, 3);

	/* vret address is RBP - (framesize - caller_reg_area_offset) */
	amd64_mov_reg_reg (code, AMD64_R11, AMD64_RSP, sizeof(mgreg_t));
	amd64_alu_reg_reg (code, X86_ADD, AMD64_R11, MONO_AMD64_ARG_REG1);

	/* Load ret marshal type */
	/* Load vret address in R11 */
	amd64_mov_reg_membase (code, AMD64_R10, AMD64_R10, MONO_STRUCT_OFFSET (GSharedVtCallInfo, ret_marshal), 4);

	for (i = GSHAREDVT_RET_NONE; i < GSHAREDVT_RET_NUM; ++i) {
		amd64_alu_reg_imm (code, X86_CMP, AMD64_R10, i);
		br [i] = code;
		amd64_branch8 (code, X86_CC_EQ, 0, TRUE);
	}
	x86_breakpoint (code); /* unhandled case */

	for (i = GSHAREDVT_RET_NONE; i < GSHAREDVT_RET_NUM; ++i) {
		mono_amd64_patch (br [i], code);
		switch (i) {
		case GSHAREDVT_RET_NONE:
			break;
		case GSHAREDVT_RET_I1:
			amd64_widen_membase (code, AMD64_RAX, AMD64_R11, 0, TRUE, FALSE);
			break;
		case GSHAREDVT_RET_U1:
			amd64_widen_membase (code, AMD64_RAX, AMD64_R11, 0, FALSE, FALSE);
			break;
		case GSHAREDVT_RET_I2:
			amd64_widen_membase (code, AMD64_RAX, AMD64_R11, 0, TRUE, TRUE);
			break;
		case GSHAREDVT_RET_U2:
			amd64_widen_membase (code, AMD64_RAX, AMD64_R11, 0, FALSE, TRUE);
			break;
		case GSHAREDVT_RET_I4: // CORRECT
		case GSHAREDVT_RET_U4: // THIS IS INCORRECT. WHY IS IT NOT FAILING?
			amd64_movsxd_reg_membase (code, AMD64_RAX, AMD64_R11, 0);
			break;
		case GSHAREDVT_RET_I8:
			amd64_mov_reg_membase (code, AMD64_RAX, AMD64_R11, 0, 8);
			break;
		case GSHAREDVT_RET_IREGS_1:
			amd64_mov_reg_membase (code, return_regs [i - GSHAREDVT_RET_IREGS_1], AMD64_R11, 0, 8);
			break;
		case GSHAREDVT_RET_R8:
			amd64_sse_movsd_reg_membase (code, AMD64_XMM0, AMD64_R11, 0);
			break;
		default:
			x86_breakpoint (code); /* can't handle specific case */
		}

		br_ret [b_ret_index ++] = code;
		x86_jump32 (code, 0);
	}

	/*
	 * OUT CASE
	 */
	mono_amd64_patch (br_out, code);

	/*
		Address to write return to is in the original value of the register specified by vret_arg_reg.
		This will be either RSI, RDI (System V) or RCX, RDX (Windows) depending on whether this is a static call.
		Its location:
		We alloc 'framesize' bytes below RBP to save regs, info and rgctx. RSP = RBP - framesize
		We store RDI (System V), RCX (Windows) at RSP + caller_reg_area_offset + slot_index_of (register) * 8.

		address: RBP - framesize + caller_reg_area_offset + 8*slot
	*/

	int caller_vret_offset = caller_reg_area_offset - framesize;

	/* Load vret address in R11 */
	/* Position to return to is passed as a hidden argument. Load 'vret_arg_slot' to find it */
	amd64_movsxd_reg_membase (code, AMD64_R11, AMD64_R10, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_arg_reg));

	// In the GSHAREDVT_RET_NONE case, vret_arg_slot is -1. In this case, skip marshalling.
	amd64_alu_reg_imm (code, X86_CMP, AMD64_R11, 0);
	br_ret [b_ret_index ++] = code;
	amd64_branch32 (code, X86_CC_LT, 0, TRUE);

	/* Compute ret area address in the caller frame, *( ((gpointer *)RBP) [R11+2] ) */
	amd64_shift_reg_imm (code, X86_SHL, AMD64_R11, 3);
	amd64_alu_reg_imm (code, X86_ADD, AMD64_R11, caller_vret_offset);
	amd64_alu_reg_reg (code, X86_ADD, AMD64_R11, AMD64_RBP);
	amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 0, sizeof (gpointer));

	/* Load ret marshal type in R10 */
	amd64_mov_reg_membase (code, AMD64_R10, AMD64_R10, MONO_STRUCT_OFFSET (GSharedVtCallInfo, ret_marshal), 4);

	// Switch table for ret_marshal value
	for (i = GSHAREDVT_RET_NONE; i < GSHAREDVT_RET_NUM; ++i) {
		amd64_alu_reg_imm (code, X86_CMP, AMD64_R10, i);
		br [i] = code;
		amd64_branch8 (code, X86_CC_EQ, 0, TRUE);
	}
	x86_breakpoint (code); /* unhandled case */

	for (i = GSHAREDVT_RET_NONE; i < GSHAREDVT_RET_NUM; ++i) {
		mono_amd64_patch (br [i], code);
		switch (i) {
		case GSHAREDVT_RET_NONE:
			break;
		case GSHAREDVT_RET_IREGS_1:
			amd64_mov_membase_reg (code, AMD64_R11, 0, return_regs [i - GSHAREDVT_RET_IREGS_1], 8);
			break;
		case GSHAREDVT_RET_R8:
			amd64_sse_movsd_membase_reg (code, AMD64_R11, 0, AMD64_XMM0);
			break;
		default:
			x86_breakpoint (code); /* can't handle specific case */
		}

		br_ret [b_ret_index ++] = code;
		x86_jump32 (code, 0);
	}

	/* exit path */
	for (i = 0; i < b_ret_index; ++i)
		mono_amd64_patch (br_ret [i], code);

	/* Exit code path */
#if TARGET_WIN32
	amd64_lea_membase (code, AMD64_RSP, AMD64_RBP, 0);
	amd64_pop_reg (code, AMD64_RBP);
	mono_add_unwind_op_same_value (unwind_ops, code, buf, AMD64_RBP);
#else
	amd64_leave (code);
#endif
	amd64_ret (code);

	g_assert ((code - buf) < buf_len);
	g_assert_checked (mono_arch_unwindinfo_validate_size (unwind_ops, MONO_MAX_TRAMPOLINE_UNWINDINFO_SIZE));

	if (info)
		*info = mono_tramp_info_create ("gsharedvt_trampoline", buf, code - buf, ji, unwind_ops);

	mono_arch_flush_icache (buf, code - buf);
	return buf;
}
Пример #3
0
gpointer
mono_arch_get_gsharedvt_trampoline (MonoTrampInfo **info, gboolean aot)
{
	guint8 *code, *buf;
	int buf_len, cfa_offset;
	GSList *unwind_ops = NULL;
	MonoJumpInfo *ji = NULL;
	guint8 *br_out, *br [64], *br_ret [64], *bcc_ret [64];
	int i, n_arg_regs, n_arg_fregs, offset, arg_reg, info_offset, rgctx_arg_reg_offset;
	int caller_reg_area_offset, callee_reg_area_offset, callee_stack_area_offset;
	int br_ret_index, bcc_ret_index;

	buf_len = 2048;
	buf = code = mono_global_codeman_reserve (buf_len);

	/*
	 * We are being called by an gsharedvt arg trampoline, the info argument is in IP1.
	 */
	arg_reg = ARMREG_IP1;
	n_arg_regs = NUM_GSHAREDVT_ARG_GREGS;
	n_arg_fregs = NUM_GSHAREDVT_ARG_FREGS;

	/* Compute stack frame size and offsets */
	offset = 0;
	/* frame block */
	offset += 2 * 8;
	/* info argument */
	info_offset = offset;
	offset += 8;
	/* saved rgctx */
	rgctx_arg_reg_offset = offset;
	offset += 8;
	/* alignment */
	offset += 8;
	/* argument regs */
	caller_reg_area_offset = offset;
	offset += (n_arg_regs + n_arg_fregs) * 8;

	/* We need the argument regs to be saved at the top of the frame */
	g_assert (offset % MONO_ARCH_FRAME_ALIGNMENT == 0);

	cfa_offset = offset;

	/* Setup frame */
	arm_stpx_pre (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, -cfa_offset);
	mono_add_unwind_op_def_cfa (unwind_ops, code, buf, ARMREG_SP, cfa_offset);
	mono_add_unwind_op_offset (unwind_ops, code, buf, ARMREG_FP, -cfa_offset + 0);
	mono_add_unwind_op_offset (unwind_ops, code, buf, ARMREG_LR, -cfa_offset + 8);
	arm_movspx (code, ARMREG_FP, ARMREG_SP);
	mono_add_unwind_op_def_cfa_reg (unwind_ops, code, buf, ARMREG_FP);

	/* Save info argument */
	arm_strx (code, arg_reg, ARMREG_FP, info_offset);

	/* Save rgxctx */
	arm_strx (code, MONO_ARCH_RGCTX_REG, ARMREG_FP, rgctx_arg_reg_offset);

	/* Save argument regs below the stack arguments */
	for (i = 0; i < n_arg_regs; ++i)
		arm_strx (code, i, ARMREG_SP, caller_reg_area_offset + (i * 8));
	// FIXME: Only do this if fp regs are used
	for (i = 0; i < n_arg_fregs; ++i)
		arm_strfpx (code, i, ARMREG_SP, caller_reg_area_offset + ((n_arg_regs + i) * 8));

	/* Allocate callee area */
	arm_ldrw (code, ARMREG_IP0, arg_reg, MONO_STRUCT_OFFSET (GSharedVtCallInfo, stack_usage));
	arm_movspx (code, ARMREG_LR, ARMREG_SP);
	arm_subx (code, ARMREG_LR, ARMREG_LR, ARMREG_IP0);
	arm_movspx (code, ARMREG_SP, ARMREG_LR);
	/* Allocate callee register area just below the callee area so it can be accessed from start_gsharedvt_call using negative offsets */
	/* The + 8 is for alignment */
	callee_reg_area_offset = 8;
	callee_stack_area_offset = callee_reg_area_offset + (n_arg_regs * sizeof (gpointer));
	arm_subx_imm (code, ARMREG_SP, ARMREG_SP, ((n_arg_regs + n_arg_fregs) * sizeof (gpointer)) + 8);

	/*
	 * The stack now looks like this:
	 * <caller frame>
	 * <saved r0-r8>
	 * <our frame>
	 * <saved fp, lr> <- fp
	 * <callee area> <- sp
	 */

	/* Call start_gsharedvt_call () */
	/* arg1 == info */
	arm_ldrx (code, ARMREG_R0, ARMREG_FP, info_offset);
	/* arg2 = caller stack area */
	arm_addx_imm (code, ARMREG_R1, ARMREG_FP, caller_reg_area_offset);
	/* arg3 == callee stack area */
	arm_addx_imm (code, ARMREG_R2, ARMREG_SP, callee_reg_area_offset);
	/* arg4 = mrgctx reg */
	arm_ldrx (code, ARMREG_R3, ARMREG_FP, rgctx_arg_reg_offset);

	if (aot)
		code = mono_arm_emit_aotconst (&ji, code, buf, ARMREG_IP0, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_arm_start_gsharedvt_call");
	else
		code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)mono_arm_start_gsharedvt_call);
	arm_blrx (code, ARMREG_IP0);

	/* Make the real method call */
	/* R0 contains the addr to call */
	arm_movx (code, ARMREG_IP1, ARMREG_R0);
	/* Load rgxctx */
	arm_ldrx (code, MONO_ARCH_RGCTX_REG, ARMREG_FP, rgctx_arg_reg_offset);
	/* Load argument registers */
	// FIXME:
	for (i = 0; i < n_arg_regs; ++i)
		arm_ldrx (code, i, ARMREG_SP, callee_reg_area_offset + (i * 8));
	// FIXME: Only do this if needed
	for (i = 0; i < n_arg_fregs; ++i)
		arm_ldrfpx (code, i, ARMREG_SP, callee_reg_area_offset + ((n_arg_regs + i) * 8));
	/* Clear callee reg area */
	arm_addx_imm (code, ARMREG_SP, ARMREG_SP, ((n_arg_regs + n_arg_fregs) * sizeof (gpointer)) + 8);
	/* Make the call */
	arm_blrx (code, ARMREG_IP1);

	br_ret_index = 0;
	bcc_ret_index = 0;

	// FIXME: Use a switch
	/* Branch between IN/OUT cases */
	arm_ldrx (code, ARMREG_IP1, ARMREG_FP, info_offset);
	arm_ldrw (code, ARMREG_IP1, ARMREG_IP1, MONO_STRUCT_OFFSET (GSharedVtCallInfo, gsharedvt_in));
	br_out = code;
	arm_cbzx (code, ARMREG_IP1, 0);

	/* IN CASE */

	/* IP1 == return marshalling type */
	arm_ldrx (code, ARMREG_IP1, ARMREG_FP, info_offset);
	arm_ldrw (code, ARMREG_IP1, ARMREG_IP1, MONO_STRUCT_OFFSET (GSharedVtCallInfo, ret_marshal));

	/* Continue if no marshalling required */
	// FIXME: Use cmpx_imm
	code = mono_arm_emit_imm64 (code, ARMREG_IP0, GSHAREDVT_RET_NONE);
	arm_cmpx (code, ARMREG_IP0, ARMREG_IP1);
	bcc_ret [bcc_ret_index ++] = code;
	arm_bcc (code, ARMCOND_EQ, 0);

	/* Compute vret area address in LR */
	arm_ldrx (code, ARMREG_LR, ARMREG_FP, info_offset);
	arm_ldrw (code, ARMREG_LR, ARMREG_LR, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_slot));
	arm_subx_imm (code, ARMREG_LR, ARMREG_LR, n_arg_regs + n_arg_fregs);
	arm_lslx (code, ARMREG_LR, ARMREG_LR, 3);
	arm_movspx (code, ARMREG_IP0, ARMREG_SP);
	arm_addx (code, ARMREG_LR, ARMREG_IP0, ARMREG_LR);

	/* Branch to specific marshalling code */
	for (i = GSHAREDVT_RET_NONE; i < GSHAREDVT_RET_NUM; ++i) {
		code = mono_arm_emit_imm64 (code, ARMREG_IP0, i);
		arm_cmpx (code, ARMREG_IP0, ARMREG_IP1);
		br [i] = code;
		arm_bcc (code, ARMCOND_EQ, 0);
	}

	arm_brk (code, 0);

	/*
	 * The address of the return value area is in LR, have to load it into
	 * registers.
	 */
	for (i = GSHAREDVT_RET_NONE; i < GSHAREDVT_RET_NUM; ++i) {
		mono_arm_patch (br [i], code, MONO_R_ARM64_BCC);
		switch (i) {
		case GSHAREDVT_RET_NONE:
			break;
		case GSHAREDVT_RET_I8:
			arm_ldrx (code, ARMREG_R0, ARMREG_LR, 0);
			break;
		case GSHAREDVT_RET_I1:
			arm_ldrsbx (code, ARMREG_R0, ARMREG_LR, 0);
			break;
		case GSHAREDVT_RET_U1:
			arm_ldrb (code, ARMREG_R0, ARMREG_LR, 0);
			break;
		case GSHAREDVT_RET_I2:
			arm_ldrshx (code, ARMREG_R0, ARMREG_LR, 0);
			break;
		case GSHAREDVT_RET_U2:
			arm_ldrh (code, ARMREG_R0, ARMREG_LR, 0);
			break;
		case GSHAREDVT_RET_I4:
			arm_ldrswx (code, ARMREG_R0, ARMREG_LR, 0);
			break;
		case GSHAREDVT_RET_U4:
			arm_ldrw (code, ARMREG_R0, ARMREG_LR, 0);
			break;
		case GSHAREDVT_RET_R8:
			arm_ldrfpx (code, ARMREG_D0, ARMREG_LR, 0);
			break;
		case GSHAREDVT_RET_R4:
			arm_ldrfpw (code, ARMREG_D0, ARMREG_LR, 0);
			break;
		case GSHAREDVT_RET_IREGS_1:
		case GSHAREDVT_RET_IREGS_2:
		case GSHAREDVT_RET_IREGS_3:
		case GSHAREDVT_RET_IREGS_4:
		case GSHAREDVT_RET_IREGS_5:
		case GSHAREDVT_RET_IREGS_6:
		case GSHAREDVT_RET_IREGS_7:
		case GSHAREDVT_RET_IREGS_8: {
			int j;

			for (j = 0; j < i - GSHAREDVT_RET_IREGS_1 + 1; ++j)
				arm_ldrx (code, j, ARMREG_LR, j * 8);
			break;
		}
		case GSHAREDVT_RET_HFAR8_1:
		case GSHAREDVT_RET_HFAR8_2:
		case GSHAREDVT_RET_HFAR8_3:
		case GSHAREDVT_RET_HFAR8_4: {
			int j;

			for (j = 0; j < i - GSHAREDVT_RET_HFAR8_1 + 1; ++j)
				arm_ldrfpx (code, j, ARMREG_LR, j * 8);
			break;
		}
		case GSHAREDVT_RET_HFAR4_1:
		case GSHAREDVT_RET_HFAR4_2:
		case GSHAREDVT_RET_HFAR4_3:
		case GSHAREDVT_RET_HFAR4_4: {
			int j;

			for (j = 0; j < i - GSHAREDVT_RET_HFAR4_1 + 1; ++j)
				arm_ldrfpw (code, j, ARMREG_LR, j * 4);
			break;
		}
		default:
			g_assert_not_reached ();
			break;
		}
		br_ret [br_ret_index ++] = code;
		arm_b (code, 0);
	}

	/* OUT CASE */
	mono_arm_patch (br_out, code, MONO_R_ARM64_CBZ);

	/* Compute vret area address in LR */
	arm_ldrx (code, ARMREG_LR, ARMREG_FP, caller_reg_area_offset + (ARMREG_R8 * 8));

	/* IP1 == return marshalling type */
	arm_ldrx (code, ARMREG_IP1, ARMREG_FP, info_offset);
	arm_ldrw (code, ARMREG_IP1, ARMREG_IP1, MONO_STRUCT_OFFSET (GSharedVtCallInfo, ret_marshal));

	/* Branch to specific marshalling code */
	for (i = GSHAREDVT_RET_NONE; i < GSHAREDVT_RET_NUM; ++i) {
		code = mono_arm_emit_imm64 (code, ARMREG_IP0, i);
		arm_cmpx (code, ARMREG_IP0, ARMREG_IP1);
		br [i] = code;
		arm_bcc (code, ARMCOND_EQ, 0);
	}

	/*
	 * The return value is in registers, need to save to the return area passed by the caller in
	 * R8.
	 */
	for (i = GSHAREDVT_RET_NONE; i < GSHAREDVT_RET_NUM; ++i) {
		mono_arm_patch (br [i], code, MONO_R_ARM64_BCC);
		switch (i) {
		case GSHAREDVT_RET_NONE:
			break;
		case GSHAREDVT_RET_I8:
			arm_strx (code, ARMREG_R0, ARMREG_LR, 0);
			break;
		case GSHAREDVT_RET_I1:
		case GSHAREDVT_RET_U1:
			arm_strb (code, ARMREG_R0, ARMREG_LR, 0);
			break;
		case GSHAREDVT_RET_I2:
		case GSHAREDVT_RET_U2:
			arm_strh (code, ARMREG_R0, ARMREG_LR, 0);
			break;
		case GSHAREDVT_RET_I4:
		case GSHAREDVT_RET_U4:
			arm_strw (code, ARMREG_R0, ARMREG_LR, 0);
			break;
		case GSHAREDVT_RET_R8:
			arm_strfpx (code, ARMREG_D0, ARMREG_LR, 0);
			break;
		case GSHAREDVT_RET_R4:
			arm_strfpw (code, ARMREG_D0, ARMREG_LR, 0);
			break;
		case GSHAREDVT_RET_IREGS_1:
		case GSHAREDVT_RET_IREGS_2:
		case GSHAREDVT_RET_IREGS_3:
		case GSHAREDVT_RET_IREGS_4:
		case GSHAREDVT_RET_IREGS_5:
		case GSHAREDVT_RET_IREGS_6:
		case GSHAREDVT_RET_IREGS_7:
		case GSHAREDVT_RET_IREGS_8: {
			int j;

			for (j = 0; j < i - GSHAREDVT_RET_IREGS_1 + 1; ++j)
				arm_strx (code, j, ARMREG_LR, j * 8);
			break;
		}
		case GSHAREDVT_RET_HFAR8_1:
		case GSHAREDVT_RET_HFAR8_2:
		case GSHAREDVT_RET_HFAR8_3:
		case GSHAREDVT_RET_HFAR8_4: {
			int j;

			for (j = 0; j < i - GSHAREDVT_RET_HFAR8_1 + 1; ++j)
				arm_strfpx (code, j, ARMREG_LR, j * 8);
			break;
		}
		case GSHAREDVT_RET_HFAR4_1:
		case GSHAREDVT_RET_HFAR4_2:
		case GSHAREDVT_RET_HFAR4_3:
		case GSHAREDVT_RET_HFAR4_4: {
			int j;

			for (j = 0; j < i - GSHAREDVT_RET_HFAR4_1 + 1; ++j)
				arm_strfpw (code, j, ARMREG_LR, j * 4);
			break;
		}
		default:
			arm_brk (code, i);
			break;
		}
		br_ret [br_ret_index ++] = code;
		arm_b (code, 0);
	}

	arm_brk (code, 0);

	for (i = 0; i < br_ret_index; ++i)
		mono_arm_patch (br_ret [i], code, MONO_R_ARM64_B);
	for (i = 0; i < bcc_ret_index; ++i)
		mono_arm_patch (bcc_ret [i], code, MONO_R_ARM64_BCC);

	/* Normal return */
	arm_movspx (code, ARMREG_SP, ARMREG_FP);
	arm_ldpx_post (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, offset);
	arm_retx (code, ARMREG_LR);

	g_assert ((code - buf) < buf_len);

	if (info)
		*info = mono_tramp_info_create ("gsharedvt_trampoline", buf, code - buf, ji, unwind_ops);

	mono_arch_flush_icache (buf, code - buf);
	return buf;
}
Пример #4
0
gpointer
mono_arch_get_gsharedvt_trampoline (MonoTrampInfo **info, gboolean aot)
{
	guint8 *code, *buf;
	int buf_len, cfa_offset;
	GSList *unwind_ops = NULL;
	MonoJumpInfo *ji = NULL;
	guint8 *br_out, *br [16], *br_ret [16];
	int i, offset, arg_reg, npushed, info_offset, mrgctx_offset;
	int caller_reg_area_offset, caller_freg_area_offset, callee_reg_area_offset, callee_freg_area_offset;
	int lr_offset, fp, br_ret_index, args_size;

	buf_len = 784;
	buf = code = mono_global_codeman_reserve (buf_len);

	arg_reg = ARMREG_R0;
	/* Registers pushed by the arg trampoline */
	npushed = 4;

	// ios abi compatible frame
	fp = ARMREG_R7;
	cfa_offset = npushed * TARGET_SIZEOF_VOID_P;
	mono_add_unwind_op_def_cfa (unwind_ops, code, buf, ARMREG_SP, cfa_offset);
	ARM_PUSH (code, (1 << fp) | (1 << ARMREG_LR));
	cfa_offset += 2 * TARGET_SIZEOF_VOID_P;
	mono_add_unwind_op_def_cfa_offset (unwind_ops, code, buf, cfa_offset);
	mono_add_unwind_op_offset (unwind_ops, code, buf, fp, (- cfa_offset));
	mono_add_unwind_op_offset (unwind_ops, code, buf, ARMREG_LR, ((- cfa_offset) + 4));
	ARM_MOV_REG_REG (code, fp, ARMREG_SP);
	mono_add_unwind_op_def_cfa_reg (unwind_ops, code, buf, fp);
	/* Allocate stack frame */
	ARM_SUB_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, 32 + (16 * sizeof (double)));
	if (MONO_ARCH_FRAME_ALIGNMENT > 8)
		ARM_SUB_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, (MONO_ARCH_FRAME_ALIGNMENT - 8));
	offset = 4;
	info_offset = -offset;
	offset += 4;
	mrgctx_offset = -offset;
	offset += 4 * 4;
	callee_reg_area_offset = -offset;
	offset += 8 * 8;
	caller_freg_area_offset = -offset;
	offset += 8 * 8;
	callee_freg_area_offset = -offset;

	caller_reg_area_offset = cfa_offset - (npushed * TARGET_SIZEOF_VOID_P);
	lr_offset = 4;
	/* Save info struct which is in r0 */
	ARM_STR_IMM (code, arg_reg, fp, info_offset);
	/* Save rgctx reg */
	ARM_STR_IMM (code, MONO_ARCH_RGCTX_REG, fp, mrgctx_offset);
	/* Allocate callee area */
	ARM_LDR_IMM (code, ARMREG_IP, arg_reg, MONO_STRUCT_OFFSET (GSharedVtCallInfo, stack_usage));
	ARM_SUB_REG_REG (code, ARMREG_SP, ARMREG_SP, ARMREG_IP);
	/* Allocate callee register area just below the callee area so the slots are correct */
	ARM_SUB_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, 4 * TARGET_SIZEOF_VOID_P);
	if (mono_arm_is_hard_float ()) {
		/* Save caller fregs */
		ARM_SUB_REG_IMM8 (code, ARMREG_IP, fp, -caller_freg_area_offset);
		for (i = 0; i < 8; ++i)
			ARM_FSTD (code, i * 2, ARMREG_IP, (i * sizeof (double)));
	}

	/*
	 * The stack now looks like this:
	 * <caller frame>
	 * <saved r0-r3, lr>
	 * <saved fp> <- fp
	 * <our frame>
	 * <callee area> <- sp
	 */
	g_assert (mono_arm_thumb_supported ());

	/* Call start_gsharedvt_call () */
	/* 6 arguments, needs 2 stack slot, need to clean it up after the call */
	args_size = 2 * TARGET_SIZEOF_VOID_P;
	ARM_SUB_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, args_size);
	/* arg1 == info */
	ARM_LDR_IMM (code, ARMREG_R0, fp, info_offset);
	/* arg2 == caller stack area */
	ARM_ADD_REG_IMM8 (code, ARMREG_R1, fp, cfa_offset - 4 * TARGET_SIZEOF_VOID_P);
	/* arg3 == callee stack area */
	ARM_ADD_REG_IMM8 (code, ARMREG_R2, ARMREG_SP, args_size);
	/* arg4 == mrgctx reg */
	ARM_LDR_IMM (code, ARMREG_R3, fp, mrgctx_offset);
	/* arg5 == caller freg area */
	ARM_SUB_REG_IMM8 (code, ARMREG_IP, fp, -caller_freg_area_offset);
	ARM_STR_IMM (code, ARMREG_IP, ARMREG_SP, 0);
	/* arg6 == callee freg area */
	ARM_SUB_REG_IMM8 (code, ARMREG_IP, fp, -callee_freg_area_offset);
	ARM_STR_IMM (code, ARMREG_IP, ARMREG_SP, 4);
	/* Make the call */
	if (aot) {
		ji = mono_patch_info_list_prepend (ji, code - buf, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_arm_start_gsharedvt_call");
		ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
		ARM_B (code, 0);
		*(gpointer*)code = NULL;
		code += 4;
		ARM_LDR_REG_REG (code, ARMREG_IP, ARMREG_PC, ARMREG_IP);
	} else {
		ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
		ARM_B (code, 0);
		*(gpointer*)code = (gpointer)mono_arm_start_gsharedvt_call;
		code += 4;
	}
	ARM_MOV_REG_REG (code, ARMREG_LR, ARMREG_PC);
	code = emit_bx (code, ARMREG_IP);
	/* Clean up stack */
	ARM_ADD_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, args_size);

	/* Make the real method call */
	/* R0 contains the addr to call */
	ARM_MOV_REG_REG (code, ARMREG_IP, ARMREG_R0);
	/* Load argument registers */
	ARM_LDM (code, ARMREG_SP, (1 << ARMREG_R0) | (1 << ARMREG_R1) | (1 << ARMREG_R2) | (1 << ARMREG_R3));
	if (mono_arm_is_hard_float ()) {
		/* Load argument fregs */
		ARM_SUB_REG_IMM8 (code, ARMREG_LR, fp, -callee_freg_area_offset);
		for (i = 0; i < 8; ++i)
			ARM_FLDD (code, i * 2, ARMREG_LR, (i * sizeof (double)));
	}
	/* Pop callee register area */
	ARM_ADD_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, 4 * TARGET_SIZEOF_VOID_P);
	/* Load rgctx */
	ARM_LDR_IMM (code, MONO_ARCH_RGCTX_REG, fp, mrgctx_offset);
	/* Make the call */
#if 0
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, addr));
#endif
	/* mono_arch_find_imt_method () depends on this */
	ARM_ADD_REG_IMM8 (code, ARMREG_LR, ARMREG_PC, 4);
	ARM_BX (code, ARMREG_IP);
	*((gpointer*)code) = NULL;
	code += 4;

	br_ret_index = 0;

	/* Branch between IN/OUT cases */
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, gsharedvt_in));

	ARM_CMP_REG_IMM8 (code, ARMREG_IP, 1);
	br_out = code;
	ARM_B_COND (code, ARMCOND_NE, 0);

	/* IN CASE */

	/* LR == return marshalling type */
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, ret_marshal));

	/* Continue if no marshalling required */
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_NONE);
	br_ret [br_ret_index ++] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);

	/* Compute vret area address in LR */
	ARM_LDR_IMM (code, ARMREG_LR, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_LR, ARMREG_LR, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_slot));
	/* The slot value is off by 4 */
	ARM_SUB_REG_IMM8 (code, ARMREG_LR, ARMREG_LR, 4);
	ARM_SHL_IMM (code, ARMREG_LR, ARMREG_LR, 2);
	ARM_ADD_REG_REG (code, ARMREG_LR, ARMREG_LR, ARMREG_SP);

	/* Branch to specific marshalling code */
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_IREG);
	br [0] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_IREGS);
	br [1] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_I1);
	br [2] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_U1);
	br [3] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_I2);
	br [4] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_U2);
	br [5] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_VFP_R4);
	br [6] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_VFP_R8);
	br [7] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);

	/* IN IREG case */
	arm_patch (br [0], code);
	ARM_LDR_IMM (code, ARMREG_R0, ARMREG_LR, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	/* IN IREGS case */
	arm_patch (br [1], code);
	ARM_LDR_IMM (code, ARMREG_R0, ARMREG_LR, 0);
	ARM_LDR_IMM (code, ARMREG_R1, ARMREG_LR, 4);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	/* I1 case */
	arm_patch (br [2], code);
	ARM_LDRSB_IMM (code, ARMREG_R0, ARMREG_LR, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	/* U1 case */
	arm_patch (br [3], code);
	ARM_LDRB_IMM (code, ARMREG_R0, ARMREG_LR, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	/* I2 case */
	arm_patch (br [4], code);
	ARM_LDRSH_IMM (code, ARMREG_R0, ARMREG_LR, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	/* U2 case */
	arm_patch (br [5], code);
	ARM_LDRH_IMM (code, ARMREG_R0, ARMREG_LR, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	/* R4 case */
	arm_patch (br [6], code);
	ARM_FLDS (code, ARM_VFP_D0, ARMREG_LR, 0);
	code += 4;
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	/* R8 case */
	arm_patch (br [7], code);
	ARM_FLDD (code, ARM_VFP_D0, ARMREG_LR, 0);
	code += 4;
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);

	/* OUT CASE */
	arm_patch (br_out, code);

	/* Marshal return value */
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, ret_marshal));

	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_IREGS);
	br [0] = code;
	ARM_B_COND (code, ARMCOND_NE, 0);

	/* OUT IREGS case */
	/* Load vtype ret addr from the caller arg regs */
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_arg_reg));
	ARM_SHL_IMM (code, ARMREG_IP, ARMREG_IP, 2);
	ARM_ADD_REG_REG (code, ARMREG_IP, ARMREG_IP, fp);
	ARM_ADD_REG_IMM8 (code, ARMREG_IP, ARMREG_IP, caller_reg_area_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, 0);
	/* Save both registers for simplicity */
	ARM_STR_IMM (code, ARMREG_R0, ARMREG_IP, 0);
	ARM_STR_IMM (code, ARMREG_R1, ARMREG_IP, 4);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	arm_patch (br [0], code);

	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_IREG);
	br [0] = code;
	ARM_B_COND (code, ARMCOND_NE, 0);

	/* OUT IREG case */
	/* Load vtype ret addr from the caller arg regs */
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_arg_reg));
	ARM_SHL_IMM (code, ARMREG_IP, ARMREG_IP, 2);
	ARM_ADD_REG_REG (code, ARMREG_IP, ARMREG_IP, fp);
	ARM_ADD_REG_IMM8 (code, ARMREG_IP, ARMREG_IP, caller_reg_area_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, 0);
	/* Save the return value to the buffer pointed to by the vret addr */
	ARM_STR_IMM (code, ARMREG_R0, ARMREG_IP, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	arm_patch (br [0], code);

	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_U1);
	br [0] = code;
	ARM_B_COND (code, ARMCOND_NE, 0);

	/* OUT U1 case */
	/* Load vtype ret addr from the caller arg regs */
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_arg_reg));
	ARM_SHL_IMM (code, ARMREG_IP, ARMREG_IP, 2);
	ARM_ADD_REG_REG (code, ARMREG_IP, ARMREG_IP, fp);
	ARM_ADD_REG_IMM8 (code, ARMREG_IP, ARMREG_IP, caller_reg_area_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, 0);
	/* Save the return value to the buffer pointed to by the vret addr */
	ARM_STRB_IMM (code, ARMREG_R0, ARMREG_IP, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	arm_patch (br [0], code);

	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_VFP_R4);
	br [0] = code;
	ARM_B_COND (code, ARMCOND_NE, 0);

	/* OUT R4 case */
	/* Load vtype ret addr from the caller arg regs */
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_arg_reg));
	ARM_SHL_IMM (code, ARMREG_IP, ARMREG_IP, 2);
	ARM_ADD_REG_REG (code, ARMREG_IP, ARMREG_IP, fp);
	ARM_ADD_REG_IMM8 (code, ARMREG_IP, ARMREG_IP, caller_reg_area_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, 0);
	/* Save the return value to the buffer pointed to by the vret addr */
	ARM_FSTS (code, ARM_VFP_D0, ARMREG_IP, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	arm_patch (br [0], code);

	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_VFP_R8);
	br [0] = code;
	ARM_B_COND (code, ARMCOND_NE, 0);

	/* OUT R8 case */
	/* Load vtype ret addr from the caller arg regs */
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_arg_reg));
	ARM_SHL_IMM (code, ARMREG_IP, ARMREG_IP, 2);
	ARM_ADD_REG_REG (code, ARMREG_IP, ARMREG_IP, fp);
	ARM_ADD_REG_IMM8 (code, ARMREG_IP, ARMREG_IP, caller_reg_area_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, 0);
	/* Save the return value to the buffer pointed to by the vret addr */
	ARM_FSTD (code, ARM_VFP_D0, ARMREG_IP, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	arm_patch (br [0], code);

	/* OUT other cases */
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);

	for (i = 0; i < br_ret_index; ++i)
		arm_patch (br_ret [i], code);

	/* Normal return */
	/* Restore registers + stack */
	ARM_MOV_REG_REG (code, ARMREG_SP, fp);
	ARM_LDM (code, fp, (1 << fp) | (1 << ARMREG_LR));
	ARM_ADD_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, cfa_offset);
	/* Return */
	ARM_BX (code, ARMREG_LR);

	g_assert ((code - buf) < buf_len);

	if (info)
		*info = mono_tramp_info_create ("gsharedvt_trampoline", buf, code - buf, ji, unwind_ops);

	mono_arch_flush_icache (buf, code - buf);
	return buf;
}
Пример #5
0
/*
 * mono_arch_create_sdb_trampoline:
 *
 *   Return a trampoline which captures the current context, passes it to
 * mini_get_dbg_callbacks ()->single_step_from_context ()/mini_get_dbg_callbacks ()->breakpoint_from_context (),
 * then restores the (potentially changed) context.
 */
guint8*
mono_arch_create_sdb_trampoline (gboolean single_step, MonoTrampInfo **info, gboolean aot)
{
	int tramp_size = 256;
	int framesize, ctx_offset, cfa_offset;
	guint8 *code, *buf;
	GSList *unwind_ops = NULL;
	MonoJumpInfo *ji = NULL;

	code = buf = mono_global_codeman_reserve (tramp_size);

	framesize = 0;

	/* Argument area */
	framesize += sizeof (target_mgreg_t);

	framesize = ALIGN_TO (framesize, 8);
	ctx_offset = framesize;
	framesize += sizeof (MonoContext);

	framesize = ALIGN_TO (framesize, MONO_ARCH_FRAME_ALIGNMENT);

	// CFA = sp + 4
	cfa_offset = 4;
	mono_add_unwind_op_def_cfa (unwind_ops, code, buf, X86_ESP, 4);
	// IP saved at CFA - 4
	mono_add_unwind_op_offset (unwind_ops, code, buf, X86_NREG, -cfa_offset);

	x86_push_reg (code, X86_EBP);
	cfa_offset += sizeof (target_mgreg_t);
	mono_add_unwind_op_def_cfa_offset (unwind_ops, code, buf, cfa_offset);
	mono_add_unwind_op_offset (unwind_ops, code, buf, X86_EBP, - cfa_offset);

	x86_mov_reg_reg (code, X86_EBP, X86_ESP);
	mono_add_unwind_op_def_cfa_reg (unwind_ops, code, buf, X86_EBP);
	/* The + 8 makes the stack aligned */
	x86_alu_reg_imm (code, X86_SUB, X86_ESP, framesize + 8);

	/* Initialize a MonoContext structure on the stack */
	x86_mov_membase_reg (code, X86_ESP, ctx_offset + G_STRUCT_OFFSET (MonoContext, eax), X86_EAX, sizeof (target_mgreg_t));
	x86_mov_membase_reg (code, X86_ESP, ctx_offset + G_STRUCT_OFFSET (MonoContext, ebx), X86_EBX, sizeof (target_mgreg_t));
	x86_mov_membase_reg (code, X86_ESP, ctx_offset + G_STRUCT_OFFSET (MonoContext, ecx), X86_ECX, sizeof (target_mgreg_t));
	x86_mov_membase_reg (code, X86_ESP, ctx_offset + G_STRUCT_OFFSET (MonoContext, edx), X86_EDX, sizeof (target_mgreg_t));
	x86_mov_reg_membase (code, X86_EAX, X86_EBP, 0, sizeof (target_mgreg_t));
	x86_mov_membase_reg (code, X86_ESP, ctx_offset + G_STRUCT_OFFSET (MonoContext, ebp), X86_EAX, sizeof (target_mgreg_t));
	x86_mov_reg_reg (code, X86_EAX, X86_EBP);
	x86_alu_reg_imm (code, X86_ADD, X86_EAX, cfa_offset);
	x86_mov_membase_reg (code, X86_ESP, ctx_offset + G_STRUCT_OFFSET (MonoContext, esp), X86_ESP, sizeof (target_mgreg_t));
	x86_mov_membase_reg (code, X86_ESP, ctx_offset + G_STRUCT_OFFSET (MonoContext, esi), X86_ESI, sizeof (target_mgreg_t));
	x86_mov_membase_reg (code, X86_ESP, ctx_offset + G_STRUCT_OFFSET (MonoContext, edi), X86_EDI, sizeof (target_mgreg_t));
	x86_mov_reg_membase (code, X86_EAX, X86_EBP, 4, sizeof (target_mgreg_t));
	x86_mov_membase_reg (code, X86_ESP, ctx_offset + G_STRUCT_OFFSET (MonoContext, eip), X86_EAX, sizeof (target_mgreg_t));

	/* Call the single step/breakpoint function in sdb */
	x86_lea_membase (code, X86_EAX, X86_ESP, ctx_offset);
	x86_mov_membase_reg (code, X86_ESP, 0, X86_EAX, sizeof (target_mgreg_t));

	if (aot) {
		x86_breakpoint (code);
	} else {
		if (single_step)
			x86_call_code (code, mini_get_dbg_callbacks ()->single_step_from_context);
		else
			x86_call_code (code, mini_get_dbg_callbacks ()->breakpoint_from_context);
	}

	/* Restore registers from ctx */
	/* Overwrite the saved ebp */
	x86_mov_reg_membase (code, X86_EAX, X86_ESP, ctx_offset + G_STRUCT_OFFSET (MonoContext, ebp), sizeof (target_mgreg_t));
	x86_mov_membase_reg (code, X86_EBP, 0, X86_EAX, sizeof (target_mgreg_t));
	/* Overwrite saved eip */
	x86_mov_reg_membase (code, X86_EAX, X86_ESP, ctx_offset + G_STRUCT_OFFSET (MonoContext, eip), sizeof (target_mgreg_t));
	x86_mov_membase_reg (code, X86_EBP, 4, X86_EAX, sizeof (target_mgreg_t));
	x86_mov_reg_membase (code, X86_EAX, X86_ESP, ctx_offset + G_STRUCT_OFFSET (MonoContext, eax), sizeof (target_mgreg_t));
	x86_mov_reg_membase (code, X86_EBX, X86_ESP, ctx_offset + G_STRUCT_OFFSET (MonoContext, ebx), sizeof (target_mgreg_t));
	x86_mov_reg_membase (code, X86_ECX, X86_ESP, ctx_offset + G_STRUCT_OFFSET (MonoContext, ecx), sizeof (target_mgreg_t));
	x86_mov_reg_membase (code, X86_EDX, X86_ESP, ctx_offset + G_STRUCT_OFFSET (MonoContext, edx), sizeof (target_mgreg_t));
	x86_mov_reg_membase (code, X86_ESI, X86_ESP, ctx_offset + G_STRUCT_OFFSET (MonoContext, esi), sizeof (target_mgreg_t));
	x86_mov_reg_membase (code, X86_EDI, X86_ESP, ctx_offset + G_STRUCT_OFFSET (MonoContext, edi), sizeof (target_mgreg_t));

	x86_leave (code);
	cfa_offset -= sizeof (target_mgreg_t);
	mono_add_unwind_op_def_cfa (unwind_ops, code, buf, X86_ESP, cfa_offset);
	x86_ret (code);

	mono_arch_flush_icache (code, code - buf);
	MONO_PROFILER_RAISE (jit_code_buffer, (buf, code - buf, MONO_PROFILER_CODE_BUFFER_HELPER, NULL));
	g_assert (code - buf <= tramp_size);

	const char *tramp_name = single_step ? "sdb_single_step_trampoline" : "sdb_breakpoint_trampoline";
	*info = mono_tramp_info_create (tramp_name, buf, code - buf, ji, unwind_ops);

	return buf;
}
Пример #6
0
guchar*
mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInfo **info, gboolean aot)
{
	const char *tramp_name;
	guint8 *buf, *code, *tramp, *br_ex_check;
	GSList *unwind_ops = NULL;
	MonoJumpInfo *ji = NULL;
	int i, offset, frame_size, regarray_offset, lmf_offset, caller_ip_offset, arg_offset;
	int cfa_offset; /* cfa = cfa_reg + cfa_offset */

	code = buf = mono_global_codeman_reserve (256);

	/* Note that there is a single argument to the trampoline
	 * and it is stored at: esp + pushed_args * sizeof (target_mgreg_t)
	 * the ret address is at: esp + (pushed_args + 1) * sizeof (target_mgreg_t)
	 */

	/* Compute frame offsets relative to the frame pointer %ebp */
	arg_offset = sizeof (target_mgreg_t);
	caller_ip_offset = 2 * sizeof (target_mgreg_t);
	offset = 0;
	offset += sizeof (MonoLMF);
	lmf_offset = -offset;
	offset += X86_NREG * sizeof (target_mgreg_t);
	regarray_offset = -offset;
	/* Argument area */
	offset += 4 * sizeof (target_mgreg_t);
	frame_size = ALIGN_TO (offset, MONO_ARCH_FRAME_ALIGNMENT);

	/* ret addr and arg are on the stack */
	cfa_offset = 2 * sizeof (target_mgreg_t);
	mono_add_unwind_op_def_cfa (unwind_ops, code, buf, X86_ESP, cfa_offset);
	// IP saved at CFA - 4
	mono_add_unwind_op_offset (unwind_ops, code, buf, X86_NREG, -4);

	/* Allocate frame */
	x86_push_reg (code, X86_EBP);
	cfa_offset += sizeof (target_mgreg_t);
	mono_add_unwind_op_def_cfa_offset (unwind_ops, code, buf, cfa_offset);
	mono_add_unwind_op_offset (unwind_ops, code, buf, X86_EBP, -cfa_offset);

	x86_mov_reg_reg (code, X86_EBP, X86_ESP);
	mono_add_unwind_op_def_cfa_reg (unwind_ops, code, buf, X86_EBP);

	/* There are three words on the stack, adding + 4 aligns the stack to 16, which is needed on osx */
	x86_alu_reg_imm (code, X86_SUB, X86_ESP, frame_size + sizeof (target_mgreg_t));

	/* Save all registers */
	for (i = X86_EAX; i <= X86_EDI; ++i) {
		int reg = i;

		if (i == X86_EBP) {
			/* Save original ebp */
			/* EAX is already saved */
			x86_mov_reg_membase (code, X86_EAX, X86_EBP, 0, sizeof (target_mgreg_t));
			reg = X86_EAX;
		} else if (i == X86_ESP) {
			/* Save original esp */
			/* EAX is already saved */
			x86_mov_reg_reg (code, X86_EAX, X86_EBP);
			/* Saved ebp + trampoline arg + return addr */
			x86_alu_reg_imm (code, X86_ADD, X86_EAX, 3 * sizeof (target_mgreg_t));
			reg = X86_EAX;
		}
		x86_mov_membase_reg (code, X86_EBP, regarray_offset + (i * sizeof (target_mgreg_t)), reg, sizeof (target_mgreg_t));
	}

	/* Setup LMF */
	/* eip */
	if (tramp_type == MONO_TRAMPOLINE_JUMP) {
		x86_mov_membase_imm (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, eip), 0, sizeof (target_mgreg_t));
	} else {
		x86_mov_reg_membase (code, X86_EAX, X86_EBP, caller_ip_offset, sizeof (target_mgreg_t));
		x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, eip), X86_EAX, sizeof (target_mgreg_t));
	}
	/* method */
	if ((tramp_type == MONO_TRAMPOLINE_JIT) || (tramp_type == MONO_TRAMPOLINE_JUMP)) {
		x86_mov_reg_membase (code, X86_EAX, X86_EBP, arg_offset, sizeof (target_mgreg_t));
		x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), X86_EAX, sizeof (target_mgreg_t));
	} else {
		x86_mov_membase_imm (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), 0, sizeof (target_mgreg_t));
	}
	/* esp */
	x86_mov_reg_membase (code, X86_EAX, X86_EBP, regarray_offset + (X86_ESP * sizeof (target_mgreg_t)), sizeof (target_mgreg_t));
	x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esp), X86_EAX, sizeof (target_mgreg_t));
	/* callee save registers */
	x86_mov_reg_membase (code, X86_EAX, X86_EBP, regarray_offset + (X86_EBX * sizeof (target_mgreg_t)), sizeof (target_mgreg_t));
	x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), X86_EAX, sizeof (target_mgreg_t));
	x86_mov_reg_membase (code, X86_EAX, X86_EBP, regarray_offset + (X86_EDI * sizeof (target_mgreg_t)), sizeof (target_mgreg_t));
	x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), X86_EAX, sizeof (target_mgreg_t));
	x86_mov_reg_membase (code, X86_EAX, X86_EBP, regarray_offset + (X86_ESI * sizeof (target_mgreg_t)), sizeof (target_mgreg_t));
	x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), X86_EAX, sizeof (target_mgreg_t));
	x86_mov_reg_membase (code, X86_EAX, X86_EBP, regarray_offset + (X86_EBP * sizeof (target_mgreg_t)), sizeof (target_mgreg_t));
	x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebp), X86_EAX, sizeof (target_mgreg_t));

	/* Push LMF */
	/* get the address of lmf for the current thread */
	if (aot) {
		code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_get_lmf_addr");
		x86_call_reg (code, X86_EAX);
	} else {
		x86_call_code (code, mono_get_lmf_addr);
	}
	/* lmf->lmf_addr = lmf_addr (%eax) */
	x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), X86_EAX, sizeof (target_mgreg_t));
	/* lmf->previous_lmf = *(lmf_addr) */
	x86_mov_reg_membase (code, X86_ECX, X86_EAX, 0, sizeof (target_mgreg_t));
	/* Signal to mono_arch_unwind_frame () that this is a trampoline frame */
	x86_alu_reg_imm (code, X86_ADD, X86_ECX, 1);
	x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), X86_ECX, sizeof (target_mgreg_t));
	/* *lmf_addr = lmf */
	x86_lea_membase (code, X86_ECX, X86_EBP, lmf_offset);
	x86_mov_membase_reg (code, X86_EAX, 0, X86_ECX, sizeof (target_mgreg_t));

	/* Call trampoline function */
	/* Arg 1 - registers */
	x86_lea_membase (code, X86_EAX, X86_EBP, regarray_offset);
	x86_mov_membase_reg (code, X86_ESP, (0 * sizeof (target_mgreg_t)), X86_EAX, sizeof (target_mgreg_t));
	/* Arg2 - calling code */
	if (tramp_type == MONO_TRAMPOLINE_JUMP) {
		x86_mov_membase_imm (code, X86_ESP, (1 * sizeof (target_mgreg_t)), 0, sizeof (target_mgreg_t));
	} else {
		x86_mov_reg_membase (code, X86_EAX, X86_EBP, caller_ip_offset, sizeof (target_mgreg_t));
		x86_mov_membase_reg (code, X86_ESP, (1 * sizeof (target_mgreg_t)), X86_EAX, sizeof (target_mgreg_t));
	}
	/* Arg3 - trampoline argument */
	x86_mov_reg_membase (code, X86_EAX, X86_EBP, arg_offset, sizeof (target_mgreg_t));
	x86_mov_membase_reg (code, X86_ESP, (2 * sizeof (target_mgreg_t)), X86_EAX, sizeof (target_mgreg_t));
	/* Arg4 - trampoline address */
	// FIXME:
	x86_mov_membase_imm (code, X86_ESP, (3 * sizeof (target_mgreg_t)), 0, sizeof (target_mgreg_t));

#ifdef __APPLE__
	/* check the stack is aligned after the ret ip is pushed */
	/*
	x86_mov_reg_reg (code, X86_EDX, X86_ESP);
	x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
	x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
	x86_branch_disp (code, X86_CC_Z, 3, FALSE);
	x86_breakpoint (code);
	*/
#endif

	if (aot) {
		code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_TRAMPOLINE_FUNC_ADDR, GINT_TO_POINTER (tramp_type));
		x86_call_reg (code, X86_EAX);
	} else {
		tramp = (guint8*)mono_get_trampoline_func (tramp_type);
		x86_call_code (code, tramp);
	}

	/*
	 * Overwrite the trampoline argument with the address we need to jump to,
	 * to free %eax.
	 */
	x86_mov_membase_reg (code, X86_EBP, arg_offset, X86_EAX, 4);

	/* Restore LMF */
	x86_mov_reg_membase (code, X86_EAX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), sizeof (target_mgreg_t));
	x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), sizeof (target_mgreg_t));
	x86_alu_reg_imm (code, X86_SUB, X86_ECX, 1);
	x86_mov_membase_reg (code, X86_EAX, 0, X86_ECX, sizeof (target_mgreg_t));

	/* Check for interruptions */
	if (aot) {
		code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_thread_force_interruption_checkpoint_noraise");
		x86_call_reg (code, X86_EAX);
	} else {
		x86_call_code (code, (guint8*)mono_thread_force_interruption_checkpoint_noraise);
	}

	x86_test_reg_reg (code, X86_EAX, X86_EAX);
	br_ex_check = code;
	x86_branch8 (code, X86_CC_Z, -1, 1);

	/*
	 * Exception case:
	 * We have an exception we want to throw in the caller's frame, so pop
	 * the trampoline frame and throw from the caller.
	 */
	x86_leave (code);
	/*
	 * The exception is in eax.
	 * We are calling the throw trampoline used by OP_THROW, so we have to setup the
	 * stack to look the same.
	 * The stack contains the ret addr, and the trampoline argument, the throw trampoline
	 * expects it to contain the ret addr and the exception. It also needs to be aligned
	 * after the exception is pushed.
	 */
	/* Align stack */
	x86_push_reg (code, X86_EAX);
	/* Push the exception */
	x86_push_reg (code, X86_EAX);
	//x86_breakpoint (code);
	/* Push the original return value */
	x86_push_membase (code, X86_ESP, 3 * 4);
	/*
	 * EH is initialized after trampolines, so get the address of the variable
	 * which contains throw_exception, and load it from there.
	 */
	if (aot) {
		/* Not really a jit icall */
		code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, "rethrow_preserve_exception_addr");
	} else {
		x86_mov_reg_imm (code, X86_ECX, (guint8*)mono_get_rethrow_preserve_exception_addr ());
	}
	x86_mov_reg_membase (code, X86_ECX, X86_ECX, 0, sizeof (target_mgreg_t));
	x86_jump_reg (code, X86_ECX);

	/* Normal case */
	mono_x86_patch (br_ex_check, code);

	/* Restore registers */
	for (i = X86_EAX; i <= X86_EDI; ++i) {
		if (i == X86_ESP || i == X86_EBP)
			continue;
		if (i == X86_EAX && tramp_type != MONO_TRAMPOLINE_AOT_PLT)
			continue;
		x86_mov_reg_membase (code, i, X86_EBP, regarray_offset + (i * 4), 4);
	}

	/* Restore frame */
	x86_leave (code);
	cfa_offset -= sizeof (target_mgreg_t);
	mono_add_unwind_op_def_cfa (unwind_ops, code, buf, X86_ESP, cfa_offset);
	mono_add_unwind_op_same_value (unwind_ops, code, buf, X86_EBP);

	if (MONO_TRAMPOLINE_TYPE_MUST_RETURN (tramp_type)) {
		/* Load the value returned by the trampoline */
		x86_mov_reg_membase (code, X86_EAX, X86_ESP, 0, 4);
		/* The trampoline returns normally, pop the trampoline argument */
		x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
		cfa_offset -= sizeof (target_mgreg_t);
		mono_add_unwind_op_def_cfa_offset (unwind_ops, code, buf, cfa_offset);
		x86_ret (code);
	} else {
		x86_ret (code);
	}

	g_assert ((code - buf) <= 256);
	MONO_PROFILER_RAISE (jit_code_buffer, (buf, code - buf, MONO_PROFILER_CODE_BUFFER_HELPER, NULL));

	tramp_name = mono_get_generic_trampoline_name (tramp_type);
	*info = mono_tramp_info_create (tramp_name, buf, code - buf, ji, unwind_ops);

	return buf;
}