gpointer mono_arch_get_static_rgctx_trampoline (gpointer arg, gpointer addr) { guint8 *code, *start; int buf_len; GSList *unwind_ops; MonoDomain *domain = mono_domain_get (); buf_len = 10; start = code = mono_domain_code_reserve (domain, buf_len); unwind_ops = mono_arch_get_cie_program (); x86_mov_reg_imm (code, MONO_ARCH_RGCTX_REG, arg); x86_jump_code (code, addr); g_assert ((code - start) <= buf_len); mono_arch_flush_icache (start, code - start); MONO_PROFILER_RAISE (jit_code_buffer, (start, code - start, MONO_PROFILER_CODE_BUFFER_GENERICS_TRAMPOLINE, NULL)); mono_tramp_info_register (mono_tramp_info_create (NULL, start, code - start, NULL, unwind_ops), domain); return start; }
void x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm ) { DUMP_RI( dst, imm ); if(dst.mod == mod_REG) x86_mov_reg_imm(p, dst, imm); else { emit_1ub(p, 0xc7); emit_modrm_noreg(p, 0, dst); emit_1i(p, imm); } }
/* * get_throw_trampoline: * * Generate a call to mono_x86_throw_exception/ * mono_x86_throw_corlib_exception. * If LLVM is true, generate code which assumes the caller is LLVM generated code, * which doesn't push the arguments. */ static guint8* get_throw_trampoline (const char *name, gboolean rethrow, gboolean llvm, gboolean corlib, gboolean llvm_abs, gboolean resume_unwind, MonoTrampInfo **info, gboolean aot) { guint8 *start, *code, *labels [16]; int i, stack_size, stack_offset, arg_offsets [5], regs_offset; MonoJumpInfo *ji = NULL; GSList *unwind_ops = NULL; guint kMaxCodeSize = 192; start = code = mono_global_codeman_reserve (kMaxCodeSize); stack_size = 128; /* * On apple, the stack is misaligned by the pushing of the return address. */ if (!llvm && corlib) /* On OSX, we don't generate alignment code to save space */ stack_size += 4; else stack_size += MONO_ARCH_FRAME_ALIGNMENT - 4; /* * The stack looks like this: * <pc offset> (only if corlib is TRUE) * <exception object>/<type token> * <return addr> <- esp (unaligned on apple) */ unwind_ops = mono_arch_get_cie_program (); /* Alloc frame */ x86_alu_reg_imm (code, X86_SUB, X86_ESP, stack_size); mono_add_unwind_op_def_cfa_offset (unwind_ops, code, start, stack_size + 4); arg_offsets [0] = 0; arg_offsets [1] = 4; arg_offsets [2] = 8; arg_offsets [3] = 12; regs_offset = 16; /* Save registers */ for (i = 0; i < X86_NREG; ++i) if (i != X86_ESP) x86_mov_membase_reg (code, X86_ESP, regs_offset + (i * 4), i, 4); /* Calculate the offset between the current sp and the sp of the caller */ if (llvm) { /* LLVM doesn't push the arguments */ stack_offset = stack_size + 4; } else { if (corlib) { /* Two arguments */ stack_offset = stack_size + 4 + 8; #ifdef __APPLE__ /* We don't generate stack alignment code on osx to save space */ #endif } else { /* One argument + stack alignment */ stack_offset = stack_size + 4 + 4; #ifdef __APPLE__ /* Pop the alignment added by OP_THROW too */ stack_offset += MONO_ARCH_FRAME_ALIGNMENT - 4; #else if (mono_do_x86_stack_align) stack_offset += MONO_ARCH_FRAME_ALIGNMENT - 4; #endif } } /* Save ESP */ x86_lea_membase (code, X86_EAX, X86_ESP, stack_offset); x86_mov_membase_reg (code, X86_ESP, regs_offset + (X86_ESP * 4), X86_EAX, 4); /* Clear fp stack */ labels [0] = code; x86_fnstsw (code); x86_shift_reg_imm (code, X86_SHR, X86_EAX, 11); x86_alu_reg_imm (code, X86_AND, X86_EAX, 7); x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0); labels [1] = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE); x86_fstp (code, 0); x86_jump_code (code, labels [0]); mono_x86_patch (labels [1], code); /* Set arg1 == regs */ x86_lea_membase (code, X86_EAX, X86_ESP, regs_offset); x86_mov_membase_reg (code, X86_ESP, arg_offsets [0], X86_EAX, 4); /* Set arg2 == exc/ex_token_index */ if (resume_unwind) x86_mov_reg_imm (code, X86_EAX, 0); else x86_mov_reg_membase (code, X86_EAX, X86_ESP, stack_size + 4, 4); x86_mov_membase_reg (code, X86_ESP, arg_offsets [1], X86_EAX, 4); /* Set arg3 == eip */ if (llvm_abs) x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX); else x86_mov_reg_membase (code, X86_EAX, X86_ESP, stack_size, 4); x86_mov_membase_reg (code, X86_ESP, arg_offsets [2], X86_EAX, 4); /* Set arg4 == rethrow/pc_offset */ if (resume_unwind) { x86_mov_membase_imm (code, X86_ESP, arg_offsets [3], 0, 4); } else if (corlib) { x86_mov_reg_membase (code, X86_EAX, X86_ESP, stack_size + 8, 4); if (llvm_abs) { /* * The caller is LLVM code which passes the absolute address not a pc offset, * so compensate by passing 0 as 'ip' and passing the negated abs address as * the pc offset. */ x86_neg_reg (code, X86_EAX); } x86_mov_membase_reg (code, X86_ESP, arg_offsets [3], X86_EAX, 4); } else { x86_mov_membase_imm (code, X86_ESP, arg_offsets [3], rethrow, 4); } /* Make the call */ if (aot) { // This can be called from runtime code, which can't guarantee that // ebx contains the got address. // So emit the got address loading code too code = mono_arch_emit_load_got_addr (start, code, NULL, &ji); code = mono_arch_emit_load_aotconst (start, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, corlib ? "mono_x86_throw_corlib_exception" : "mono_x86_throw_exception"); x86_call_reg (code, X86_EAX); } else { x86_call_code (code, resume_unwind ? (gpointer)(mono_x86_resume_unwind) : (corlib ? (gpointer)mono_x86_throw_corlib_exception : (gpointer)mono_x86_throw_exception)); } x86_breakpoint (code); g_assert ((code - start) < kMaxCodeSize); if (info) *info = mono_tramp_info_create (name, start, code - start, ji, unwind_ops); else { GSList *l; for (l = unwind_ops; l; l = l->next) g_free (l->data); g_slist_free (unwind_ops); } mono_arch_flush_icache (start, code - start); mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_EXCEPTION_HANDLING, NULL); return start; }
guchar* mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInfo **info, gboolean aot) { const char *tramp_name; guint8 *buf, *code, *tramp, *br_ex_check; GSList *unwind_ops = NULL; MonoJumpInfo *ji = NULL; int i, offset, frame_size, regarray_offset, lmf_offset, caller_ip_offset, arg_offset; int cfa_offset; /* cfa = cfa_reg + cfa_offset */ code = buf = mono_global_codeman_reserve (256); /* Note that there is a single argument to the trampoline * and it is stored at: esp + pushed_args * sizeof (target_mgreg_t) * the ret address is at: esp + (pushed_args + 1) * sizeof (target_mgreg_t) */ /* Compute frame offsets relative to the frame pointer %ebp */ arg_offset = sizeof (target_mgreg_t); caller_ip_offset = 2 * sizeof (target_mgreg_t); offset = 0; offset += sizeof (MonoLMF); lmf_offset = -offset; offset += X86_NREG * sizeof (target_mgreg_t); regarray_offset = -offset; /* Argument area */ offset += 4 * sizeof (target_mgreg_t); frame_size = ALIGN_TO (offset, MONO_ARCH_FRAME_ALIGNMENT); /* ret addr and arg are on the stack */ cfa_offset = 2 * sizeof (target_mgreg_t); mono_add_unwind_op_def_cfa (unwind_ops, code, buf, X86_ESP, cfa_offset); // IP saved at CFA - 4 mono_add_unwind_op_offset (unwind_ops, code, buf, X86_NREG, -4); /* Allocate frame */ x86_push_reg (code, X86_EBP); cfa_offset += sizeof (target_mgreg_t); mono_add_unwind_op_def_cfa_offset (unwind_ops, code, buf, cfa_offset); mono_add_unwind_op_offset (unwind_ops, code, buf, X86_EBP, -cfa_offset); x86_mov_reg_reg (code, X86_EBP, X86_ESP); mono_add_unwind_op_def_cfa_reg (unwind_ops, code, buf, X86_EBP); /* There are three words on the stack, adding + 4 aligns the stack to 16, which is needed on osx */ x86_alu_reg_imm (code, X86_SUB, X86_ESP, frame_size + sizeof (target_mgreg_t)); /* Save all registers */ for (i = X86_EAX; i <= X86_EDI; ++i) { int reg = i; if (i == X86_EBP) { /* Save original ebp */ /* EAX is already saved */ x86_mov_reg_membase (code, X86_EAX, X86_EBP, 0, sizeof (target_mgreg_t)); reg = X86_EAX; } else if (i == X86_ESP) { /* Save original esp */ /* EAX is already saved */ x86_mov_reg_reg (code, X86_EAX, X86_EBP); /* Saved ebp + trampoline arg + return addr */ x86_alu_reg_imm (code, X86_ADD, X86_EAX, 3 * sizeof (target_mgreg_t)); reg = X86_EAX; } x86_mov_membase_reg (code, X86_EBP, regarray_offset + (i * sizeof (target_mgreg_t)), reg, sizeof (target_mgreg_t)); } /* Setup LMF */ /* eip */ if (tramp_type == MONO_TRAMPOLINE_JUMP) { x86_mov_membase_imm (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, eip), 0, sizeof (target_mgreg_t)); } else { x86_mov_reg_membase (code, X86_EAX, X86_EBP, caller_ip_offset, sizeof (target_mgreg_t)); x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, eip), X86_EAX, sizeof (target_mgreg_t)); } /* method */ if ((tramp_type == MONO_TRAMPOLINE_JIT) || (tramp_type == MONO_TRAMPOLINE_JUMP)) { x86_mov_reg_membase (code, X86_EAX, X86_EBP, arg_offset, sizeof (target_mgreg_t)); x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), X86_EAX, sizeof (target_mgreg_t)); } else { x86_mov_membase_imm (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), 0, sizeof (target_mgreg_t)); } /* esp */ x86_mov_reg_membase (code, X86_EAX, X86_EBP, regarray_offset + (X86_ESP * sizeof (target_mgreg_t)), sizeof (target_mgreg_t)); x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esp), X86_EAX, sizeof (target_mgreg_t)); /* callee save registers */ x86_mov_reg_membase (code, X86_EAX, X86_EBP, regarray_offset + (X86_EBX * sizeof (target_mgreg_t)), sizeof (target_mgreg_t)); x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), X86_EAX, sizeof (target_mgreg_t)); x86_mov_reg_membase (code, X86_EAX, X86_EBP, regarray_offset + (X86_EDI * sizeof (target_mgreg_t)), sizeof (target_mgreg_t)); x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), X86_EAX, sizeof (target_mgreg_t)); x86_mov_reg_membase (code, X86_EAX, X86_EBP, regarray_offset + (X86_ESI * sizeof (target_mgreg_t)), sizeof (target_mgreg_t)); x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), X86_EAX, sizeof (target_mgreg_t)); x86_mov_reg_membase (code, X86_EAX, X86_EBP, regarray_offset + (X86_EBP * sizeof (target_mgreg_t)), sizeof (target_mgreg_t)); x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebp), X86_EAX, sizeof (target_mgreg_t)); /* Push LMF */ /* get the address of lmf for the current thread */ if (aot) { code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_get_lmf_addr"); x86_call_reg (code, X86_EAX); } else { x86_call_code (code, mono_get_lmf_addr); } /* lmf->lmf_addr = lmf_addr (%eax) */ x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), X86_EAX, sizeof (target_mgreg_t)); /* lmf->previous_lmf = *(lmf_addr) */ x86_mov_reg_membase (code, X86_ECX, X86_EAX, 0, sizeof (target_mgreg_t)); /* Signal to mono_arch_unwind_frame () that this is a trampoline frame */ x86_alu_reg_imm (code, X86_ADD, X86_ECX, 1); x86_mov_membase_reg (code, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), X86_ECX, sizeof (target_mgreg_t)); /* *lmf_addr = lmf */ x86_lea_membase (code, X86_ECX, X86_EBP, lmf_offset); x86_mov_membase_reg (code, X86_EAX, 0, X86_ECX, sizeof (target_mgreg_t)); /* Call trampoline function */ /* Arg 1 - registers */ x86_lea_membase (code, X86_EAX, X86_EBP, regarray_offset); x86_mov_membase_reg (code, X86_ESP, (0 * sizeof (target_mgreg_t)), X86_EAX, sizeof (target_mgreg_t)); /* Arg2 - calling code */ if (tramp_type == MONO_TRAMPOLINE_JUMP) { x86_mov_membase_imm (code, X86_ESP, (1 * sizeof (target_mgreg_t)), 0, sizeof (target_mgreg_t)); } else { x86_mov_reg_membase (code, X86_EAX, X86_EBP, caller_ip_offset, sizeof (target_mgreg_t)); x86_mov_membase_reg (code, X86_ESP, (1 * sizeof (target_mgreg_t)), X86_EAX, sizeof (target_mgreg_t)); } /* Arg3 - trampoline argument */ x86_mov_reg_membase (code, X86_EAX, X86_EBP, arg_offset, sizeof (target_mgreg_t)); x86_mov_membase_reg (code, X86_ESP, (2 * sizeof (target_mgreg_t)), X86_EAX, sizeof (target_mgreg_t)); /* Arg4 - trampoline address */ // FIXME: x86_mov_membase_imm (code, X86_ESP, (3 * sizeof (target_mgreg_t)), 0, sizeof (target_mgreg_t)); #ifdef __APPLE__ /* check the stack is aligned after the ret ip is pushed */ /* x86_mov_reg_reg (code, X86_EDX, X86_ESP); x86_alu_reg_imm (code, X86_AND, X86_EDX, 15); x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0); x86_branch_disp (code, X86_CC_Z, 3, FALSE); x86_breakpoint (code); */ #endif if (aot) { code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_TRAMPOLINE_FUNC_ADDR, GINT_TO_POINTER (tramp_type)); x86_call_reg (code, X86_EAX); } else { tramp = (guint8*)mono_get_trampoline_func (tramp_type); x86_call_code (code, tramp); } /* * Overwrite the trampoline argument with the address we need to jump to, * to free %eax. */ x86_mov_membase_reg (code, X86_EBP, arg_offset, X86_EAX, 4); /* Restore LMF */ x86_mov_reg_membase (code, X86_EAX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), sizeof (target_mgreg_t)); x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), sizeof (target_mgreg_t)); x86_alu_reg_imm (code, X86_SUB, X86_ECX, 1); x86_mov_membase_reg (code, X86_EAX, 0, X86_ECX, sizeof (target_mgreg_t)); /* Check for interruptions */ if (aot) { code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_thread_force_interruption_checkpoint_noraise"); x86_call_reg (code, X86_EAX); } else { x86_call_code (code, (guint8*)mono_thread_force_interruption_checkpoint_noraise); } x86_test_reg_reg (code, X86_EAX, X86_EAX); br_ex_check = code; x86_branch8 (code, X86_CC_Z, -1, 1); /* * Exception case: * We have an exception we want to throw in the caller's frame, so pop * the trampoline frame and throw from the caller. */ x86_leave (code); /* * The exception is in eax. * We are calling the throw trampoline used by OP_THROW, so we have to setup the * stack to look the same. * The stack contains the ret addr, and the trampoline argument, the throw trampoline * expects it to contain the ret addr and the exception. It also needs to be aligned * after the exception is pushed. */ /* Align stack */ x86_push_reg (code, X86_EAX); /* Push the exception */ x86_push_reg (code, X86_EAX); //x86_breakpoint (code); /* Push the original return value */ x86_push_membase (code, X86_ESP, 3 * 4); /* * EH is initialized after trampolines, so get the address of the variable * which contains throw_exception, and load it from there. */ if (aot) { /* Not really a jit icall */ code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, "rethrow_preserve_exception_addr"); } else { x86_mov_reg_imm (code, X86_ECX, (guint8*)mono_get_rethrow_preserve_exception_addr ()); } x86_mov_reg_membase (code, X86_ECX, X86_ECX, 0, sizeof (target_mgreg_t)); x86_jump_reg (code, X86_ECX); /* Normal case */ mono_x86_patch (br_ex_check, code); /* Restore registers */ for (i = X86_EAX; i <= X86_EDI; ++i) { if (i == X86_ESP || i == X86_EBP) continue; if (i == X86_EAX && tramp_type != MONO_TRAMPOLINE_AOT_PLT) continue; x86_mov_reg_membase (code, i, X86_EBP, regarray_offset + (i * 4), 4); } /* Restore frame */ x86_leave (code); cfa_offset -= sizeof (target_mgreg_t); mono_add_unwind_op_def_cfa (unwind_ops, code, buf, X86_ESP, cfa_offset); mono_add_unwind_op_same_value (unwind_ops, code, buf, X86_EBP); if (MONO_TRAMPOLINE_TYPE_MUST_RETURN (tramp_type)) { /* Load the value returned by the trampoline */ x86_mov_reg_membase (code, X86_EAX, X86_ESP, 0, 4); /* The trampoline returns normally, pop the trampoline argument */ x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4); cfa_offset -= sizeof (target_mgreg_t); mono_add_unwind_op_def_cfa_offset (unwind_ops, code, buf, cfa_offset); x86_ret (code); } else { x86_ret (code); } g_assert ((code - buf) <= 256); MONO_PROFILER_RAISE (jit_code_buffer, (buf, code - buf, MONO_PROFILER_CODE_BUFFER_HELPER, NULL)); tramp_name = mono_get_generic_trampoline_name (tramp_type); *info = mono_tramp_info_create (tramp_name, buf, code - buf, ji, unwind_ops); return buf; }
void _jit_gen_load_value(jit_gencode_t gen, int reg, int other_reg, jit_value_t value) { jit_type_t type; int src_reg, other_src_reg; void *ptr; int offset; /* Make sure that we have sufficient space */ jit_cache_setup_output(16); type = jit_type_normalize(value->type); /* Load zero */ if(value->is_constant) { switch(type->kind) { case JIT_TYPE_SBYTE: case JIT_TYPE_UBYTE: case JIT_TYPE_SHORT: case JIT_TYPE_USHORT: case JIT_TYPE_INT: case JIT_TYPE_UINT: { if((jit_nint)(value->address) == 0) { x86_clear_reg(inst, _jit_reg_info[reg].cpu_reg); } else { x86_mov_reg_imm(inst, _jit_reg_info[reg].cpu_reg, (jit_nint)(value->address)); } } break; case JIT_TYPE_LONG: case JIT_TYPE_ULONG: { jit_long long_value; long_value = jit_value_get_long_constant(value); if(long_value == 0) { #ifdef JIT_NATIVE_INT64 x86_clear_reg(inst, _jit_reg_info[reg].cpu_reg); #else x86_clear_reg(inst, _jit_reg_info[reg].cpu_reg); x86_clear_reg(inst, _jit_reg_info[other_reg].cpu_reg); #endif } else { #ifdef JIT_NATIVE_INT64 x86_mov_reg_imm(inst, _jit_reg_info[reg].cpu_reg, (jit_nint)long_value); #else x86_mov_reg_imm(inst, _jit_reg_info[reg].cpu_reg, (jit_int)long_value); x86_mov_reg_imm(inst, _jit_reg_info[other_reg].cpu_reg, (jit_int)(long_value >> 32)); #endif } } break; case JIT_TYPE_FLOAT32: { jit_float32 float32_value; float32_value = jit_value_get_float32_constant(value); if(IS_WORD_REG(reg)) { union { jit_float32 float32_value; jit_int int_value; } un; un.float32_value = float32_value; x86_mov_reg_imm(inst, _jit_reg_info[reg].cpu_reg, un.int_value); } else { if(float32_value == (jit_float32) 0.0) { x86_fldz(inst); } else if(float32_value == (jit_float32) 1.0) { x86_fld1(inst); } else { ptr = _jit_cache_alloc(&(gen->posn), sizeof(jit_float32)); jit_memcpy(ptr, &float32_value, sizeof(float32_value)); x86_fld(inst, ptr, 0); } } } break; case JIT_TYPE_FLOAT64: { jit_float64 float64_value; float64_value = jit_value_get_float64_constant(value); if(IS_WORD_REG(reg)) { union { jit_float64 float64_value; jit_int int_value[2]; } un; un.float64_value = float64_value; x86_mov_reg_imm(inst, _jit_reg_info[reg].cpu_reg, un.int_value[0]); x86_mov_reg_imm(inst, _jit_reg_info[other_reg].cpu_reg, un.int_value[1]); } else { if(float64_value == (jit_float64) 0.0) { x86_fldz(inst); } else if(float64_value == (jit_float64) 1.0) { x86_fld1(inst); } else { ptr = _jit_cache_alloc(&(gen->posn), sizeof(jit_float64)); jit_memcpy(ptr, &float64_value, sizeof(float64_value)); x86_fld(inst, ptr, 1); } } } break; case JIT_TYPE_NFLOAT: { jit_nfloat nfloat_value; nfloat_value = jit_value_get_nfloat_constant(value); if(IS_WORD_REG(reg) && sizeof(jit_nfloat) == sizeof(jit_float64)) { union { jit_nfloat nfloat_value; jit_int int_value[2]; } un; un.nfloat_value = nfloat_value; x86_mov_reg_imm(inst, _jit_reg_info[reg].cpu_reg, un.int_value[0]); x86_mov_reg_imm(inst, _jit_reg_info[other_reg].cpu_reg, un.int_value[1]); } else { if(nfloat_value == (jit_nfloat) 0.0) { x86_fldz(inst); } else if(nfloat_value == (jit_nfloat) 1.0) { x86_fld1(inst); } else { ptr = _jit_cache_alloc(&(gen->posn), sizeof(jit_nfloat)); jit_memcpy(ptr, &nfloat_value, sizeof(nfloat_value)); if(sizeof(jit_nfloat) == sizeof(jit_float64)) { x86_fld(inst, ptr, 1); } else { x86_fld80_mem(inst, ptr); } } } } break; } } else if(value->in_register || value->in_global_register)
/* * get_throw_trampoline: * * Generate a call to mono_x86_throw_exception/ * mono_x86_throw_corlib_exception. * If LLVM is true, generate code which assumes the caller is LLVM generated code, * which doesn't push the arguments. */ static guint8* get_throw_trampoline (const char *name, gboolean rethrow, gboolean llvm, gboolean corlib, gboolean llvm_abs, gboolean resume_unwind, MonoTrampInfo **info, gboolean aot) { guint8 *start, *code; int i, stack_size, stack_offset, arg_offsets [5], regs_offset; MonoJumpInfo *ji = NULL; GSList *unwind_ops = NULL; start = code = mono_global_codeman_reserve (128); stack_size = 128; /* * On apple, the stack is misaligned by the pushing of the return address. */ if (!llvm && corlib) /* On OSX, we don't generate alignment code to save space */ stack_size += 4; else stack_size += MONO_ARCH_FRAME_ALIGNMENT - 4; /* * The stack looks like this: * <pc offset> (only if corlib is TRUE) * <exception object>/<type token> * <return addr> <- esp (unaligned on apple) */ mono_add_unwind_op_def_cfa (unwind_ops, (guint8*)NULL, (guint8*)NULL, X86_ESP, 4); mono_add_unwind_op_offset (unwind_ops, (guint8*)NULL, (guint8*)NULL, X86_NREG, -4); /* Alloc frame */ x86_alu_reg_imm (code, X86_SUB, X86_ESP, stack_size); mono_add_unwind_op_def_cfa_offset (unwind_ops, code, start, stack_size + 4); arg_offsets [0] = 0; arg_offsets [1] = 4; arg_offsets [2] = 8; arg_offsets [3] = 12; regs_offset = 16; /* Save registers */ for (i = 0; i < X86_NREG; ++i) if (i != X86_ESP) x86_mov_membase_reg (code, X86_ESP, regs_offset + (i * 4), i, 4); /* Calculate the offset between the current sp and the sp of the caller */ if (llvm) { /* LLVM doesn't push the arguments */ stack_offset = stack_size + 4; } else { if (corlib) { /* Two arguments */ stack_offset = stack_size + 4 + 8; #ifdef __APPLE__ /* We don't generate stack alignment code on osx to save space */ #endif } else { /* One argument */ stack_offset = stack_size + 4 + 4; #ifdef __APPLE__ /* Pop the alignment added by OP_THROW too */ stack_offset += MONO_ARCH_FRAME_ALIGNMENT - 4; #endif } } /* Save ESP */ x86_lea_membase (code, X86_EAX, X86_ESP, stack_offset); x86_mov_membase_reg (code, X86_ESP, regs_offset + (X86_ESP * 4), X86_EAX, 4); /* Set arg1 == regs */ x86_lea_membase (code, X86_EAX, X86_ESP, regs_offset); x86_mov_membase_reg (code, X86_ESP, arg_offsets [0], X86_EAX, 4); /* Set arg2 == exc/ex_token_index */ if (resume_unwind) x86_mov_reg_imm (code, X86_EAX, 0); else x86_mov_reg_membase (code, X86_EAX, X86_ESP, stack_size + 4, 4); x86_mov_membase_reg (code, X86_ESP, arg_offsets [1], X86_EAX, 4); /* Set arg3 == eip */ if (llvm_abs) x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX); else x86_mov_reg_membase (code, X86_EAX, X86_ESP, stack_size, 4); x86_mov_membase_reg (code, X86_ESP, arg_offsets [2], X86_EAX, 4); /* Set arg4 == rethrow/pc_offset */ if (resume_unwind) { x86_mov_membase_imm (code, X86_ESP, arg_offsets [3], 0, 4); } else if (corlib) { x86_mov_reg_membase (code, X86_EAX, X86_ESP, stack_size + 8, 4); if (llvm_abs) { /* * The caller is LLVM code which passes the absolute address not a pc offset, * so compensate by passing 0 as 'ip' and passing the negated abs address as * the pc offset. */ x86_neg_reg (code, X86_EAX); } x86_mov_membase_reg (code, X86_ESP, arg_offsets [3], X86_EAX, 4); } else { x86_mov_membase_imm (code, X86_ESP, arg_offsets [3], rethrow, 4); } /* Make the call */ if (aot) { // This can be called from runtime code, which can't guarantee that // ebx contains the got address. // So emit the got address loading code too code = mono_arch_emit_load_got_addr (start, code, NULL, &ji); code = mono_arch_emit_load_aotconst (start, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, corlib ? "mono_x86_throw_corlib_exception" : "mono_x86_throw_exception"); x86_call_reg (code, X86_EAX); } else { x86_call_code (code, resume_unwind ? (mono_x86_resume_unwind) : (corlib ? (gpointer)mono_x86_throw_corlib_exception : (gpointer)mono_x86_throw_exception)); } x86_breakpoint (code); g_assert ((code - start) < 128); if (info) *info = mono_tramp_info_create (g_strdup (name), start, code - start, ji, unwind_ops); return start; }
transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) { uint32_t offsets[8] = {0, 4*N, 2*N, 6*N, N, 5*N, 7*N, 3*N}; uint32_t offsets_o[8] = {0, 4*N, 2*N, 6*N, 7*N, 3*N, N, 5*N}; int32_t pAddr = 0; int32_t pN = 0; int32_t pLUT = 0; insns_t *fp; insns_t *start; insns_t *x_4_addr; insns_t *x_8_addr; uint32_t loop_count; int count; ptrdiff_t len; size_t *ps; size_t *pps; count = ffts_tree_count(N, leaf_N, 0) + 1; ps = pps = malloc(2 * count * sizeof(*ps)); if (!ps) { return NULL; } ffts_elaborate_tree(&pps, N, leaf_N, 0); pps[0] = 0; pps[1] = 0; pps = ps; #ifdef HAVE_SSE if (sign < 0) { p->constants = (const void*) sse_constants; } else { p->constants = (const void*) sse_constants_inv; } #endif fp = (insns_t*) p->transform_base; /* generate base cases */ x_4_addr = generate_size4_base_case(&fp, sign); x_8_addr = generate_size8_base_case(&fp, sign); #ifdef __arm__ start = generate_prologue(&fp, p); #ifdef HAVE_NEON memcpy(fp, neon_ee, neon_oo - neon_ee); if (sign < 0) { fp[33] ^= 0x00200000; fp[37] ^= 0x00200000; fp[38] ^= 0x00200000; fp[39] ^= 0x00200000; fp[40] ^= 0x00200000; fp[41] ^= 0x00200000; fp[44] ^= 0x00200000; fp[45] ^= 0x00200000; fp[46] ^= 0x00200000; fp[47] ^= 0x00200000; fp[48] ^= 0x00200000; fp[57] ^= 0x00200000; } fp += (neon_oo - neon_ee) / 4; #else memcpy(fp, vfp_e, vfp_o - vfp_e); if (sign > 0) { fp[64] ^= 0x00000040; fp[65] ^= 0x00000040; fp[68] ^= 0x00000040; fp[75] ^= 0x00000040; fp[76] ^= 0x00000040; fp[79] ^= 0x00000040; fp[80] ^= 0x00000040; fp[83] ^= 0x00000040; fp[84] ^= 0x00000040; fp[87] ^= 0x00000040; fp[91] ^= 0x00000040; fp[93] ^= 0x00000040; } fp += (vfp_o - vfp_e) / 4; #endif #else /* generate functions */ start = generate_prologue(&fp, p); loop_count = 4 * p->i0; generate_leaf_init(&fp, loop_count); if (ffts_ctzl(N) & 1) { generate_leaf_ee(&fp, offsets, p->i1 ? 6 : 0); if (p->i1) { loop_count += 4 * p->i1; generate_leaf_oo(&fp, loop_count, offsets_o, 7); } loop_count += 4; generate_leaf_oe(&fp, offsets_o); } else { generate_leaf_ee(&fp, offsets, N >= 256 ? 2 : 8); loop_count += 4; generate_leaf_eo(&fp, offsets); if (p->i1) { loop_count += 4 * p->i1; generate_leaf_oo(&fp, loop_count, offsets_o, N >= 256 ? 4 : 7); } } if (p->i1) { uint32_t offsets_oe[8] = {7*N, 3*N, N, 5*N, 0, 4*N, 6*N, 2*N}; loop_count += 4 * p->i1; /* align loop/jump destination */ #ifdef _M_X64 x86_mov_reg_imm(fp, X86_EBX, loop_count); #else x86_mov_reg_imm(fp, X86_ECX, loop_count); ffts_align_mem16(&fp, 9); #endif generate_leaf_ee(&fp, offsets_oe, 0); } generate_transform_init(&fp); /* generate subtransform calls */ count = 2; while (pps[0]) { size_t ws_is; if (!pN) { #ifdef _M_X64 x86_mov_reg_imm(fp, X86_EBX, pps[0]); #else x86_mov_reg_imm(fp, X86_ECX, pps[0] / 4); #endif } else { int offset = (4 * pps[1]) - pAddr; if (offset) { #ifdef _M_X64 x64_alu_reg_imm_size(fp, X86_ADD, X64_R8, offset, 8); #else x64_alu_reg_imm_size(fp, X86_ADD, X64_RDX, offset, 8); #endif } if (pps[0] > leaf_N && pps[0] - pN) { int factor = ffts_ctzl(pps[0]) - ffts_ctzl(pN); #ifdef _M_X64 if (factor > 0) { x86_shift_reg_imm(fp, X86_SHL, X86_EBX, factor); } else { x86_shift_reg_imm(fp, X86_SHR, X86_EBX, -factor); } #else if (factor > 0) { x86_shift_reg_imm(fp, X86_SHL, X86_ECX, factor); } else { x86_shift_reg_imm(fp, X86_SHR, X86_ECX, -factor); } #endif } } ws_is = 8 * p->ws_is[ffts_ctzl(pps[0] / leaf_N) - 1]; if (ws_is != pLUT) { int offset = (int) (ws_is - pLUT); #ifdef _M_X64 x64_alu_reg_imm_size(fp, X86_ADD, X64_R9, offset, 8); #else x64_alu_reg_imm_size(fp, X86_ADD, X64_R8, offset, 8); #endif } if (pps[0] == 2 * leaf_N) { x64_call_code(fp, x_4_addr); } else { x64_call_code(fp, x_8_addr); } pAddr = 4 * pps[1]; if (pps[0] > leaf_N) { pN = pps[0]; } pLUT = ws_is;//LUT_offset(pps[0], leafN); //fprintf(stderr, "LUT offset for %d is %d\n", pN, pLUT); count += 4; pps += 2; } #endif #ifdef __arm__ #ifdef HAVE_NEON if (ffts_ctzl(N) & 1) { ADDI(&fp, 2, 7, 0); ADDI(&fp, 7, 9, 0); ADDI(&fp, 9, 2, 0); ADDI(&fp, 2, 8, 0); ADDI(&fp, 8, 10, 0); ADDI(&fp, 10, 2, 0); if(p->i1) { MOVI(&fp, 11, p->i1); memcpy(fp, neon_oo, neon_eo - neon_oo); if(sign < 0) { fp[12] ^= 0x00200000; fp[13] ^= 0x00200000; fp[14] ^= 0x00200000; fp[15] ^= 0x00200000; fp[27] ^= 0x00200000; fp[29] ^= 0x00200000; fp[30] ^= 0x00200000; fp[31] ^= 0x00200000; fp[46] ^= 0x00200000; fp[47] ^= 0x00200000; fp[48] ^= 0x00200000; fp[57] ^= 0x00200000; } fp += (neon_eo - neon_oo) / 4; } *fp = LDRI(11, 1, ((uint32_t)&p->oe_ws) - ((uint32_t)p)); fp++; memcpy(fp, neon_oe, neon_end - neon_oe); if(sign < 0) { fp[19] ^= 0x00200000; fp[20] ^= 0x00200000; fp[22] ^= 0x00200000; fp[23] ^= 0x00200000; fp[37] ^= 0x00200000; fp[38] ^= 0x00200000; fp[40] ^= 0x00200000; fp[41] ^= 0x00200000; fp[64] ^= 0x00200000; fp[65] ^= 0x00200000; fp[66] ^= 0x00200000; fp[67] ^= 0x00200000; } fp += (neon_end - neon_oe) / 4; } else { *fp = LDRI(11, 1, ((uint32_t)&p->eo_ws) - ((uint32_t)p)); fp++; memcpy(fp, neon_eo, neon_oe - neon_eo); if(sign < 0) { fp[10] ^= 0x00200000; fp[11] ^= 0x00200000; fp[13] ^= 0x00200000; fp[14] ^= 0x00200000; fp[31] ^= 0x00200000; fp[33] ^= 0x00200000; fp[34] ^= 0x00200000; fp[35] ^= 0x00200000; fp[59] ^= 0x00200000; fp[60] ^= 0x00200000; fp[61] ^= 0x00200000; fp[62] ^= 0x00200000; } fp += (neon_oe - neon_eo) / 4; ADDI(&fp, 2, 7, 0); ADDI(&fp, 7, 9, 0); ADDI(&fp, 9, 2, 0); ADDI(&fp, 2, 8, 0); ADDI(&fp, 8, 10, 0); ADDI(&fp, 10, 2, 0); if(p->i1) { MOVI(&fp, 11, p->i1); memcpy(fp, neon_oo, neon_eo - neon_oo); if(sign < 0) { fp[12] ^= 0x00200000; fp[13] ^= 0x00200000; fp[14] ^= 0x00200000; fp[15] ^= 0x00200000; fp[27] ^= 0x00200000; fp[29] ^= 0x00200000; fp[30] ^= 0x00200000; fp[31] ^= 0x00200000; fp[46] ^= 0x00200000; fp[47] ^= 0x00200000; fp[48] ^= 0x00200000; fp[57] ^= 0x00200000; } fp += (neon_eo - neon_oo) / 4; } } if(p->i1) { ADDI(&fp, 2, 3, 0); ADDI(&fp, 3, 7, 0); ADDI(&fp, 7, 2, 0); ADDI(&fp, 2, 4, 0); ADDI(&fp, 4, 8, 0); ADDI(&fp, 8, 2, 0); ADDI(&fp, 2, 5, 0); ADDI(&fp, 5, 9, 0); ADDI(&fp, 9, 2, 0); ADDI(&fp, 2, 6, 0); ADDI(&fp, 6, 10, 0); ADDI(&fp, 10, 2, 0); ADDI(&fp, 2, 9, 0); ADDI(&fp, 9, 10, 0); ADDI(&fp, 10, 2, 0); *fp = LDRI(2, 1, ((uint32_t)&p->ee_ws) - ((uint32_t)p)); fp++; MOVI(&fp, 11, p->i1); memcpy(fp, neon_ee, neon_oo - neon_ee); if(sign < 0) { fp[33] ^= 0x00200000; fp[37] ^= 0x00200000; fp[38] ^= 0x00200000; fp[39] ^= 0x00200000; fp[40] ^= 0x00200000; fp[41] ^= 0x00200000; fp[44] ^= 0x00200000; fp[45] ^= 0x00200000; fp[46] ^= 0x00200000; fp[47] ^= 0x00200000; fp[48] ^= 0x00200000; fp[57] ^= 0x00200000; } fp += (neon_oo - neon_ee) / 4; } #else ADDI(&fp, 2, 7, 0); ADDI(&fp, 7, 9, 0); ADDI(&fp, 9, 2, 0); ADDI(&fp, 2, 8, 0); ADDI(&fp, 8, 10, 0); ADDI(&fp, 10, 2, 0); MOVI(&fp, 11, (p->i1>0) ? p->i1 : 1); memcpy(fp, vfp_o, vfp_x4 - vfp_o); if(sign > 0) { fp[22] ^= 0x00000040; fp[24] ^= 0x00000040; fp[25] ^= 0x00000040; fp[26] ^= 0x00000040; fp[62] ^= 0x00000040; fp[64] ^= 0x00000040; fp[65] ^= 0x00000040; fp[66] ^= 0x00000040; } fp += (vfp_x4 - vfp_o) / 4; ADDI(&fp, 2, 3, 0); ADDI(&fp, 3, 7, 0); ADDI(&fp, 7, 2, 0); ADDI(&fp, 2, 4, 0); ADDI(&fp, 4, 8, 0); ADDI(&fp, 8, 2, 0); ADDI(&fp, 2, 5, 0); ADDI(&fp, 5, 9, 0); ADDI(&fp, 9, 2, 0); ADDI(&fp, 2, 6, 0); ADDI(&fp, 6, 10, 0); ADDI(&fp, 10, 2, 0); ADDI(&fp, 2, 9, 0); ADDI(&fp, 9, 10, 0); ADDI(&fp, 10, 2, 0); *fp = LDRI(2, 1, ((uint32_t)&p->ee_ws) - ((uint32_t)p)); fp++; MOVI(&fp, 11, (p->i2>0) ? p->i2 : 1); memcpy(fp, vfp_e, vfp_o - vfp_e); if(sign > 0) { fp[64] ^= 0x00000040; fp[65] ^= 0x00000040; fp[68] ^= 0x00000040; fp[75] ^= 0x00000040; fp[76] ^= 0x00000040; fp[79] ^= 0x00000040; fp[80] ^= 0x00000040; fp[83] ^= 0x00000040; fp[84] ^= 0x00000040; fp[87] ^= 0x00000040; fp[91] ^= 0x00000040; fp[93] ^= 0x00000040; } fp += (vfp_o - vfp_e) / 4; #endif *fp = LDRI(2, 1, ((uint32_t)&p->ws) - ((uint32_t)p)); fp++; // load offsets into r12 //ADDI(&fp, 2, 1, 0); MOVI(&fp, 1, 0); // args: r0 - out // r1 - N // r2 - ws // ADDI(&fp, 3, 1, 0); // put N into r3 for counter count = 2; while(pps[0]) { // fprintf(stderr, "size %zu at %zu - diff %zu\n", pps[0], pps[1]*4, (pps[1]*4) - pAddr); if(!pN) { MOVI(&fp, 1, pps[0]); } else { if((pps[1]*4)-pAddr) ADDI(&fp, 0, 0, (pps[1] * 4)- pAddr); if(pps[0] - pN) ADDI(&fp, 1, 1, pps[0] - pN); } if (p->ws_is[ffts_ctzl(pps[0]/leaf_N)-1]*8 - pLUT) { ADDI(&fp, 2, 2, p->ws_is[ffts_ctzl(pps[0]/leaf_N)-1]*8 - pLUT); } if(pps[0] == 2 * leaf_N) { *fp = BL(fp+2, x_4_addr); fp++; } else if(!pps[2]) { //uint32_t *x_8_t_addr = fp; #ifdef HAVE_NEON memcpy(fp, neon_x8_t, neon_ee - neon_x8_t); if(sign < 0) { fp[31] ^= 0x00200000; fp[32] ^= 0x00200000; fp[33] ^= 0x00200000; fp[34] ^= 0x00200000; fp[65] ^= 0x00200000; fp[66] ^= 0x00200000; fp[70] ^= 0x00200000; fp[74] ^= 0x00200000; fp[97] ^= 0x00200000; fp[98] ^= 0x00200000; fp[102] ^= 0x00200000; fp[104] ^= 0x00200000; } fp += (neon_ee - neon_x8_t) / 4; //*fp++ = BL(fp+2, x_8_t_addr); #else *fp = BL(fp+2, x_8_addr); fp++; #endif } else { *fp = BL(fp+2, x_8_addr); fp++; } pAddr = pps[1] * 4; pN = pps[0]; pLUT = p->ws_is[ffts_ctzl(pps[0]/leaf_N)-1]*8;//LUT_offset(pps[0], leafN); // fprintf(stderr, "LUT offset for %d is %d\n", pN, pLUT); count += 4; pps += 2; } *fp++ = 0xecbd8b10; *fp++ = POP_LR(); count++; #else generate_epilogue(&fp); #endif // *fp++ = B(14); count++; //for(int i=0;i<(neon_x8 - neon_x4)/4;i++) // fprintf(stderr, "%08x\n", x_4_addr[i]); //fprintf(stderr, "\n"); //for(int i=0;i<count;i++) //fprintf(stderr, "size of transform %u = %d\n", N, (fp - x_8_addr) * sizeof(*fp)); free(ps); #if defined(_MSC_VER) #pragma warning(push) /* disable type cast warning from data pointer to function pointer */ #pragma warning(disable : 4055) #endif return (transform_func_t) start; #if defined(_MSC_VER) #pragma warning(pop) #endif }