gpointer
mono_arch_get_gsharedvt_trampoline (MonoTrampInfo **info, gboolean aot)
{
	guint8 *code, *buf;
	int buf_len, cfa_offset;
	GSList *unwind_ops = NULL;
	MonoJumpInfo *ji = NULL;
	guint8 *br_out, *br [16], *br_ret [16];
	int i, offset, arg_reg, npushed, info_offset, mrgctx_offset;
	int caller_reg_area_offset, caller_freg_area_offset, callee_reg_area_offset, callee_freg_area_offset;
	int lr_offset, fp, br_ret_index, args_size;

	buf_len = 784;
	buf = code = mono_global_codeman_reserve (buf_len);

	arg_reg = ARMREG_R0;
	/* Registers pushed by the arg trampoline */
	npushed = 4;

	// ios abi compatible frame
	fp = ARMREG_R7;
	cfa_offset = npushed * TARGET_SIZEOF_VOID_P;
	mono_add_unwind_op_def_cfa (unwind_ops, code, buf, ARMREG_SP, cfa_offset);
	ARM_PUSH (code, (1 << fp) | (1 << ARMREG_LR));
	cfa_offset += 2 * TARGET_SIZEOF_VOID_P;
	mono_add_unwind_op_def_cfa_offset (unwind_ops, code, buf, cfa_offset);
	mono_add_unwind_op_offset (unwind_ops, code, buf, fp, (- cfa_offset));
	mono_add_unwind_op_offset (unwind_ops, code, buf, ARMREG_LR, ((- cfa_offset) + 4));
	ARM_MOV_REG_REG (code, fp, ARMREG_SP);
	mono_add_unwind_op_def_cfa_reg (unwind_ops, code, buf, fp);
	/* Allocate stack frame */
	ARM_SUB_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, 32 + (16 * sizeof (double)));
	if (MONO_ARCH_FRAME_ALIGNMENT > 8)
		ARM_SUB_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, (MONO_ARCH_FRAME_ALIGNMENT - 8));
	offset = 4;
	info_offset = -offset;
	offset += 4;
	mrgctx_offset = -offset;
	offset += 4 * 4;
	callee_reg_area_offset = -offset;
	offset += 8 * 8;
	caller_freg_area_offset = -offset;
	offset += 8 * 8;
	callee_freg_area_offset = -offset;

	caller_reg_area_offset = cfa_offset - (npushed * TARGET_SIZEOF_VOID_P);
	lr_offset = 4;
	/* Save info struct which is in r0 */
	ARM_STR_IMM (code, arg_reg, fp, info_offset);
	/* Save rgctx reg */
	ARM_STR_IMM (code, MONO_ARCH_RGCTX_REG, fp, mrgctx_offset);
	/* Allocate callee area */
	ARM_LDR_IMM (code, ARMREG_IP, arg_reg, MONO_STRUCT_OFFSET (GSharedVtCallInfo, stack_usage));
	ARM_SUB_REG_REG (code, ARMREG_SP, ARMREG_SP, ARMREG_IP);
	/* Allocate callee register area just below the callee area so the slots are correct */
	ARM_SUB_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, 4 * TARGET_SIZEOF_VOID_P);
	if (mono_arm_is_hard_float ()) {
		/* Save caller fregs */
		ARM_SUB_REG_IMM8 (code, ARMREG_IP, fp, -caller_freg_area_offset);
		for (i = 0; i < 8; ++i)
			ARM_FSTD (code, i * 2, ARMREG_IP, (i * sizeof (double)));
	}

	/*
	 * The stack now looks like this:
	 * <caller frame>
	 * <saved r0-r3, lr>
	 * <saved fp> <- fp
	 * <our frame>
	 * <callee area> <- sp
	 */
	g_assert (mono_arm_thumb_supported ());

	/* Call start_gsharedvt_call () */
	/* 6 arguments, needs 2 stack slot, need to clean it up after the call */
	args_size = 2 * TARGET_SIZEOF_VOID_P;
	ARM_SUB_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, args_size);
	/* arg1 == info */
	ARM_LDR_IMM (code, ARMREG_R0, fp, info_offset);
	/* arg2 == caller stack area */
	ARM_ADD_REG_IMM8 (code, ARMREG_R1, fp, cfa_offset - 4 * TARGET_SIZEOF_VOID_P);
	/* arg3 == callee stack area */
	ARM_ADD_REG_IMM8 (code, ARMREG_R2, ARMREG_SP, args_size);
	/* arg4 == mrgctx reg */
	ARM_LDR_IMM (code, ARMREG_R3, fp, mrgctx_offset);
	/* arg5 == caller freg area */
	ARM_SUB_REG_IMM8 (code, ARMREG_IP, fp, -caller_freg_area_offset);
	ARM_STR_IMM (code, ARMREG_IP, ARMREG_SP, 0);
	/* arg6 == callee freg area */
	ARM_SUB_REG_IMM8 (code, ARMREG_IP, fp, -callee_freg_area_offset);
	ARM_STR_IMM (code, ARMREG_IP, ARMREG_SP, 4);
	/* Make the call */
	if (aot) {
		ji = mono_patch_info_list_prepend (ji, code - buf, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_arm_start_gsharedvt_call");
		ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
		ARM_B (code, 0);
		*(gpointer*)code = NULL;
		code += 4;
		ARM_LDR_REG_REG (code, ARMREG_IP, ARMREG_PC, ARMREG_IP);
	} else {
		ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
		ARM_B (code, 0);
		*(gpointer*)code = (gpointer)mono_arm_start_gsharedvt_call;
		code += 4;
	}
	ARM_MOV_REG_REG (code, ARMREG_LR, ARMREG_PC);
	code = emit_bx (code, ARMREG_IP);
	/* Clean up stack */
	ARM_ADD_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, args_size);

	/* Make the real method call */
	/* R0 contains the addr to call */
	ARM_MOV_REG_REG (code, ARMREG_IP, ARMREG_R0);
	/* Load argument registers */
	ARM_LDM (code, ARMREG_SP, (1 << ARMREG_R0) | (1 << ARMREG_R1) | (1 << ARMREG_R2) | (1 << ARMREG_R3));
	if (mono_arm_is_hard_float ()) {
		/* Load argument fregs */
		ARM_SUB_REG_IMM8 (code, ARMREG_LR, fp, -callee_freg_area_offset);
		for (i = 0; i < 8; ++i)
			ARM_FLDD (code, i * 2, ARMREG_LR, (i * sizeof (double)));
	}
	/* Pop callee register area */
	ARM_ADD_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, 4 * TARGET_SIZEOF_VOID_P);
	/* Load rgctx */
	ARM_LDR_IMM (code, MONO_ARCH_RGCTX_REG, fp, mrgctx_offset);
	/* Make the call */
#if 0
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, addr));
#endif
	/* mono_arch_find_imt_method () depends on this */
	ARM_ADD_REG_IMM8 (code, ARMREG_LR, ARMREG_PC, 4);
	ARM_BX (code, ARMREG_IP);
	*((gpointer*)code) = NULL;
	code += 4;

	br_ret_index = 0;

	/* Branch between IN/OUT cases */
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, gsharedvt_in));

	ARM_CMP_REG_IMM8 (code, ARMREG_IP, 1);
	br_out = code;
	ARM_B_COND (code, ARMCOND_NE, 0);

	/* IN CASE */

	/* LR == return marshalling type */
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, ret_marshal));

	/* Continue if no marshalling required */
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_NONE);
	br_ret [br_ret_index ++] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);

	/* Compute vret area address in LR */
	ARM_LDR_IMM (code, ARMREG_LR, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_LR, ARMREG_LR, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_slot));
	/* The slot value is off by 4 */
	ARM_SUB_REG_IMM8 (code, ARMREG_LR, ARMREG_LR, 4);
	ARM_SHL_IMM (code, ARMREG_LR, ARMREG_LR, 2);
	ARM_ADD_REG_REG (code, ARMREG_LR, ARMREG_LR, ARMREG_SP);

	/* Branch to specific marshalling code */
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_IREG);
	br [0] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_IREGS);
	br [1] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_I1);
	br [2] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_U1);
	br [3] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_I2);
	br [4] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_U2);
	br [5] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_VFP_R4);
	br [6] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);
	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_VFP_R8);
	br [7] = code;
	ARM_B_COND (code, ARMCOND_EQ, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);

	/* IN IREG case */
	arm_patch (br [0], code);
	ARM_LDR_IMM (code, ARMREG_R0, ARMREG_LR, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	/* IN IREGS case */
	arm_patch (br [1], code);
	ARM_LDR_IMM (code, ARMREG_R0, ARMREG_LR, 0);
	ARM_LDR_IMM (code, ARMREG_R1, ARMREG_LR, 4);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	/* I1 case */
	arm_patch (br [2], code);
	ARM_LDRSB_IMM (code, ARMREG_R0, ARMREG_LR, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	/* U1 case */
	arm_patch (br [3], code);
	ARM_LDRB_IMM (code, ARMREG_R0, ARMREG_LR, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	/* I2 case */
	arm_patch (br [4], code);
	ARM_LDRSH_IMM (code, ARMREG_R0, ARMREG_LR, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	/* U2 case */
	arm_patch (br [5], code);
	ARM_LDRH_IMM (code, ARMREG_R0, ARMREG_LR, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	/* R4 case */
	arm_patch (br [6], code);
	ARM_FLDS (code, ARM_VFP_D0, ARMREG_LR, 0);
	code += 4;
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	/* R8 case */
	arm_patch (br [7], code);
	ARM_FLDD (code, ARM_VFP_D0, ARMREG_LR, 0);
	code += 4;
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);

	/* OUT CASE */
	arm_patch (br_out, code);

	/* Marshal return value */
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, ret_marshal));

	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_IREGS);
	br [0] = code;
	ARM_B_COND (code, ARMCOND_NE, 0);

	/* OUT IREGS case */
	/* Load vtype ret addr from the caller arg regs */
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_arg_reg));
	ARM_SHL_IMM (code, ARMREG_IP, ARMREG_IP, 2);
	ARM_ADD_REG_REG (code, ARMREG_IP, ARMREG_IP, fp);
	ARM_ADD_REG_IMM8 (code, ARMREG_IP, ARMREG_IP, caller_reg_area_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, 0);
	/* Save both registers for simplicity */
	ARM_STR_IMM (code, ARMREG_R0, ARMREG_IP, 0);
	ARM_STR_IMM (code, ARMREG_R1, ARMREG_IP, 4);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	arm_patch (br [0], code);

	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_IREG);
	br [0] = code;
	ARM_B_COND (code, ARMCOND_NE, 0);

	/* OUT IREG case */
	/* Load vtype ret addr from the caller arg regs */
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_arg_reg));
	ARM_SHL_IMM (code, ARMREG_IP, ARMREG_IP, 2);
	ARM_ADD_REG_REG (code, ARMREG_IP, ARMREG_IP, fp);
	ARM_ADD_REG_IMM8 (code, ARMREG_IP, ARMREG_IP, caller_reg_area_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, 0);
	/* Save the return value to the buffer pointed to by the vret addr */
	ARM_STR_IMM (code, ARMREG_R0, ARMREG_IP, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	arm_patch (br [0], code);

	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_U1);
	br [0] = code;
	ARM_B_COND (code, ARMCOND_NE, 0);

	/* OUT U1 case */
	/* Load vtype ret addr from the caller arg regs */
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_arg_reg));
	ARM_SHL_IMM (code, ARMREG_IP, ARMREG_IP, 2);
	ARM_ADD_REG_REG (code, ARMREG_IP, ARMREG_IP, fp);
	ARM_ADD_REG_IMM8 (code, ARMREG_IP, ARMREG_IP, caller_reg_area_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, 0);
	/* Save the return value to the buffer pointed to by the vret addr */
	ARM_STRB_IMM (code, ARMREG_R0, ARMREG_IP, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	arm_patch (br [0], code);

	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_VFP_R4);
	br [0] = code;
	ARM_B_COND (code, ARMCOND_NE, 0);

	/* OUT R4 case */
	/* Load vtype ret addr from the caller arg regs */
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_arg_reg));
	ARM_SHL_IMM (code, ARMREG_IP, ARMREG_IP, 2);
	ARM_ADD_REG_REG (code, ARMREG_IP, ARMREG_IP, fp);
	ARM_ADD_REG_IMM8 (code, ARMREG_IP, ARMREG_IP, caller_reg_area_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, 0);
	/* Save the return value to the buffer pointed to by the vret addr */
	ARM_FSTS (code, ARM_VFP_D0, ARMREG_IP, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	arm_patch (br [0], code);

	ARM_CMP_REG_IMM8 (code, ARMREG_IP, GSHAREDVT_RET_VFP_R8);
	br [0] = code;
	ARM_B_COND (code, ARMCOND_NE, 0);

	/* OUT R8 case */
	/* Load vtype ret addr from the caller arg regs */
	ARM_LDR_IMM (code, ARMREG_IP, fp, info_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_arg_reg));
	ARM_SHL_IMM (code, ARMREG_IP, ARMREG_IP, 2);
	ARM_ADD_REG_REG (code, ARMREG_IP, ARMREG_IP, fp);
	ARM_ADD_REG_IMM8 (code, ARMREG_IP, ARMREG_IP, caller_reg_area_offset);
	ARM_LDR_IMM (code, ARMREG_IP, ARMREG_IP, 0);
	/* Save the return value to the buffer pointed to by the vret addr */
	ARM_FSTD (code, ARM_VFP_D0, ARMREG_IP, 0);
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);
	arm_patch (br [0], code);

	/* OUT other cases */
	br_ret [br_ret_index ++] = code;
	ARM_B (code, 0);

	for (i = 0; i < br_ret_index; ++i)
		arm_patch (br_ret [i], code);

	/* Normal return */
	/* Restore registers + stack */
	ARM_MOV_REG_REG (code, ARMREG_SP, fp);
	ARM_LDM (code, fp, (1 << fp) | (1 << ARMREG_LR));
	ARM_ADD_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, cfa_offset);
	/* Return */
	ARM_BX (code, ARMREG_LR);

	g_assert ((code - buf) < buf_len);

	if (info)
		*info = mono_tramp_info_create ("gsharedvt_trampoline", buf, code - buf, ji, unwind_ops);

	mono_arch_flush_icache (buf, code - buf);
	return buf;
}
Exemple #2
0
/*
 * Refer to ARM Procedure Call Standard (APCS) for more info.
 */
MonoPIFunc mono_arch_create_trampoline (MonoMethodSignature *sig, gboolean string_ctor)
{
    MonoType* param;
    MonoPIFunc code_buff;
    arminstr_t* p;
    guint32 code_size, stack_size;
    guint32 simple_type;
    int i, hasthis, aregs, regc, stack_offs;
    int this_loaded;
    guchar reg_alloc [ARM_NUM_ARG_REGS];

    /* pessimistic estimation for prologue/epilogue size */
    code_size = 16 + 16;
    /* push/pop work regs */
    code_size += 2;
    /* call */
    code_size += 2;
    /* handle retval */
    code_size += 2;

    stack_size = 0;
    hasthis = sig->hasthis ? 1 : 0;

    aregs = ARM_NUM_ARG_REGS - hasthis;

    for (i = 0, regc = aregs; i < sig->param_count; ++i) {
        param = sig->params [i];

        /* keep track of argument sizes */
        if (i < ARM_NUM_ARG_REGS) reg_alloc [i] = 0;

        if (param->byref) {
            if (regc > 0) {
                code_size += 1;
                reg_alloc [i] = regc;
                --regc;
            } else {
                code_size += 2;
                stack_size += sizeof(gpointer);
            }
        } else {
            simple_type = param->type;
enum_calc_size:
            switch (simple_type) {
            case MONO_TYPE_BOOLEAN:
            case MONO_TYPE_CHAR:
            case MONO_TYPE_I1:
            case MONO_TYPE_U1:
            case MONO_TYPE_I2:
            case MONO_TYPE_U2:
            case MONO_TYPE_I4:
            case MONO_TYPE_U4:
            case MONO_TYPE_I:
            case MONO_TYPE_U:
            case MONO_TYPE_PTR:
            case MONO_TYPE_R4:
            case MONO_TYPE_SZARRAY:
            case MONO_TYPE_CLASS:
            case MONO_TYPE_OBJECT:
            case MONO_TYPE_STRING:
                if (regc > 0) {
                    /* register arg */
                    code_size += 1;
                    reg_alloc [i] = regc;
                    --regc;
                } else {
                    /* stack arg */
                    code_size += 2;
                    stack_size += 4;
                }
                break;
            case MONO_TYPE_I8:
            case MONO_TYPE_U8:
            case MONO_TYPE_R8:
                /* keep track of argument sizes */
                if (regc > 1) {
                    /* fits into registers, two LDRs */
                    code_size += 2;
                    reg_alloc [i] = regc;
                    regc -= 2;
                } else if (regc > 0) {
                    /* first half fits into register, one LDR */
                    code_size += 1;
                    reg_alloc [i] = regc;
                    --regc;
                    /* the rest on the stack, LDR/STR */
                    code_size += 2;
                    stack_size += 4;
                } else {
                    /* stack arg, 4 instrs - 2x(LDR/STR) */
                    code_size += 4;
                    stack_size += 2 * 4;
                }
                break;
            case MONO_TYPE_VALUETYPE:
                if (param->data.klass->enumtype) {
                    simple_type = param->data.klass->enum_basetype->type;
                    goto enum_calc_size;
                }

                if (mono_class_value_size(param->data.klass, NULL) != 4) {
                    g_error("can only marshal enums, not generic structures (size: %d)", mono_class_value_size(param->data.klass, NULL));
                }
                if (regc > 0) {
                    /* register arg */
                    code_size += 1;
                    reg_alloc [i] = regc;
                    --regc;
                } else {
                    /* stack arg */
                    code_size += 2;
                    stack_size += 4;
                }
                break;
            default :
                break;
            }
        }
    }

    code_buff = (MonoPIFunc)alloc_code_buff(code_size);
    p = (arminstr_t*)code_buff;

    /* prologue */
    p = arm_emit_lean_prologue(p, stack_size,
                               /* save workset (r4-r7) */
                               (1 << ARMREG_R4) | (1 << ARMREG_R5) | (1 << ARMREG_R6) | (1 << ARMREG_R7));


    /* copy args into workset */
    /* callme - always present */
    ARM_MOV_REG_REG(p, ARMREG_R4, ARMREG_A1);
    /* retval */
    if (sig->ret->byref || string_ctor || (sig->ret->type != MONO_TYPE_VOID)) {
        ARM_MOV_REG_REG(p, ARMREG_R5, ARMREG_A2);
    }
    /* this_obj */
    if (sig->hasthis) {
        this_loaded = 0;
        if (stack_size == 0) {
            ARM_MOV_REG_REG(p, ARMREG_A1, ARMREG_A3);
            this_loaded = 1;
        } else {
            ARM_MOV_REG_REG(p, ARMREG_R6, ARMREG_A3);
        }
    }
    /* args */
    if (sig->param_count != 0) {
        ARM_MOV_REG_REG(p, ARMREG_R7, ARMREG_A4);
    }

    stack_offs = stack_size;

    /* handle arguments */
    /* in reverse order so we could use r0 (arg1) for memory transfers */
    for (i = sig->param_count; --i >= 0;) {
        param = sig->params [i];
        if (param->byref) {
            if (i < aregs && reg_alloc[i] > 0) {
                ARM_LDR_IMM(p, ARMREG_A1 + i, REG_ARGP, i*ARG_SIZE);
            } else {
                stack_offs -= sizeof(armword_t);
                ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i*ARG_SIZE);
                ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs);
            }
        } else {
            simple_type = param->type;
enum_marshal:
            switch (simple_type) {
            case MONO_TYPE_BOOLEAN:
            case MONO_TYPE_CHAR:
            case MONO_TYPE_I1:
            case MONO_TYPE_U1:
            case MONO_TYPE_I2:
            case MONO_TYPE_U2:
            case MONO_TYPE_I4:
            case MONO_TYPE_U4:
            case MONO_TYPE_I:
            case MONO_TYPE_U:
            case MONO_TYPE_PTR:
            case MONO_TYPE_R4:
            case MONO_TYPE_SZARRAY:
            case MONO_TYPE_CLASS:
            case MONO_TYPE_OBJECT:
            case MONO_TYPE_STRING:
                if (i < aregs && reg_alloc [i] > 0) {
                    /* pass in register */
                    ARM_LDR_IMM(p, ARMREG_A1 + hasthis + (aregs - reg_alloc [i]), REG_ARGP, i*ARG_SIZE);
                } else {
                    stack_offs -= sizeof(armword_t);
                    ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i*ARG_SIZE);
                    ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs);
                }
                break;
            case MONO_TYPE_I8:
            case MONO_TYPE_U8:
            case MONO_TYPE_R8:
                if (i < aregs && reg_alloc [i] > 0) {
                    if (reg_alloc [i] > 1) {
                        /* pass in registers */
                        ARM_LDR_IMM(p, ARMREG_A1 + hasthis + (aregs - reg_alloc [i]), REG_ARGP, i*ARG_SIZE);
                        ARM_LDR_IMM(p, ARMREG_A1 + hasthis + (aregs - reg_alloc [i]) + 1, REG_ARGP, i*ARG_SIZE + 4);
                    } else {
                        stack_offs -= sizeof(armword_t);
                        ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i*ARG_SIZE + 4);
                        ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs);
                        ARM_LDR_IMM(p, ARMREG_A1 + hasthis + (aregs - reg_alloc [i]), REG_ARGP, i*ARG_SIZE);
                    }
                } else {
                    /* two words transferred on the stack */
                    stack_offs -= 2*sizeof(armword_t);
                    ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i*ARG_SIZE);
                    ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs);
                    ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i*ARG_SIZE + 4);
                    ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs + 4);
                }
                break;
            case MONO_TYPE_VALUETYPE:
                if (param->data.klass->enumtype) {
                    /* it's an enum value, proceed based on its base type */
                    simple_type = param->data.klass->enum_basetype->type;
                    goto enum_marshal;
                } else {
                    if (i < aregs && reg_alloc[i] > 0) {
                        int vtreg = ARMREG_A1 + hasthis +
                                    hasthis + (aregs - reg_alloc[i]);
                        ARM_LDR_IMM(p, vtreg, REG_ARGP, i * ARG_SIZE);
                        ARM_LDR_IMM(p, vtreg, vtreg, 0);
                    } else {
                        stack_offs -= sizeof(armword_t);
                        ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i * ARG_SIZE);
                        ARM_LDR_IMM(p, ARMREG_R0, ARMREG_R0, 0);
                        ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs);
                    }
                }
                break;

            default:
                break;
            }
        }
    }

    if (sig->hasthis && !this_loaded) {
        /* [this] always passed in A1, regardless of sig->call_convention */
        ARM_MOV_REG_REG(p, ARMREG_A1, REG_THIS);
    }

    /* call [func] */
    ARM_MOV_REG_REG(p, ARMREG_LR, ARMREG_PC);
    ARM_MOV_REG_REG(p, ARMREG_PC, REG_FUNC_ADDR);

    /* handle retval */
    if (sig->ret->byref || string_ctor) {
        ARM_STR_IMM(p, ARMREG_R0, REG_RETVAL, 0);
    } else {
        simple_type = sig->ret->type;
enum_retvalue:
        switch (simple_type) {
        case MONO_TYPE_BOOLEAN:
        case MONO_TYPE_I1:
        case MONO_TYPE_U1:
            ARM_STRB_IMM(p, ARMREG_R0, REG_RETVAL, 0);
            break;
        case MONO_TYPE_CHAR:
        case MONO_TYPE_I2:
        case MONO_TYPE_U2:
            ARM_STRH_IMM(p, ARMREG_R0, REG_RETVAL, 0);
            break;
        /*
         * A 32-bit integer and integer-equivalent return value
         * is returned in R0.
         * Single-precision floating-point values are returned in R0.
         */
        case MONO_TYPE_I:
        case MONO_TYPE_U:
        case MONO_TYPE_I4:
        case MONO_TYPE_U4:
        case MONO_TYPE_R4:
        case MONO_TYPE_OBJECT:
        case MONO_TYPE_CLASS:
        case MONO_TYPE_ARRAY:
        case MONO_TYPE_SZARRAY:
        case MONO_TYPE_STRING:
            ARM_STR_IMM(p, ARMREG_R0, REG_RETVAL, 0);
            break;
        /*
         * A 64-bit integer is returned in R0 and R1.
         * Double-precision floating-point values are returned in R0 and R1.
         */
        case MONO_TYPE_I8:
        case MONO_TYPE_U8:
        case MONO_TYPE_R8:
            ARM_STR_IMM(p, ARMREG_R0, REG_RETVAL, 0);
            ARM_STR_IMM(p, ARMREG_R1, REG_RETVAL, 4);
            break;
        case MONO_TYPE_VALUETYPE:
            if (sig->ret->data.klass->enumtype) {
                simple_type = sig->ret->data.klass->enum_basetype->type;
                goto enum_retvalue;
            }
            break;
        case MONO_TYPE_VOID:
            break;
        default:
            break;
        }
    }

    p = arm_emit_std_epilogue(p, stack_size,
                              /* restore R4-R7 */
                              (1 << ARMREG_R4) | (1 << ARMREG_R5) | (1 << ARMREG_R6) | (1 << ARMREG_R7));

    flush_icache();

#ifdef ARM_DUMP_DISASM
    _armdis_decode((arminstr_t*)code_buff, ((guint8*)p) - ((guint8*)code_buff));
#endif

    return code_buff;
}