/* output a single import thunk */ static void output_import_thunk( const char *name, const char *table, int pos ) { output( "\n\t.align %d\n", get_alignment(4) ); output( "\t%s\n", func_declaration(name) ); output( "%s\n", asm_globl(name) ); output_cfi( ".cfi_startproc" ); switch(target_cpu) { case CPU_x86: if (!UsePIC) { output( "\tjmp *(%s+%d)\n", table, pos ); } else { output( "\tcall %s\n", asm_name("__wine_spec_get_pc_thunk_eax") ); output( "1:\tjmp *%s+%d-1b(%%eax)\n", table, pos ); } break; case CPU_x86_64: output( "\tjmpq *%s+%d(%%rip)\n", table, pos ); break; case CPU_SPARC: if ( !UsePIC ) { output( "\tsethi %%hi(%s+%d), %%g1\n", table, pos ); output( "\tld [%%g1+%%lo(%s+%d)], %%g1\n", table, pos ); output( "\tjmp %%g1\n" ); output( "\tnop\n" ); } else { /* Hmpf. Stupid sparc assembler always interprets global variable names as GOT offsets, so we have to do it the long way ... */ output( "\tsave %%sp, -96, %%sp\n" ); output( "0:\tcall 1f\n" ); output( "\tnop\n" ); output( "1:\tsethi %%hi(%s+%d-0b), %%g1\n", table, pos ); output( "\tor %%g1, %%lo(%s+%d-0b), %%g1\n", table, pos ); output( "\tld [%%g1+%%o7], %%g1\n" ); output( "\tjmp %%g1\n" ); output( "\trestore\n" ); } break; case CPU_ARM: output( "\tldr IP,[PC,#0]\n"); output( "\tldr PC,[IP,#%d]\n", pos); output( "\t.long %s\n", table ); break; case CPU_ARM64: output( "\tadr x9, 1f\n" ); output( "\tldur x9, [x9, #0]\n" ); if (pos & 0xf000) output( "\tadd x9, x9, #%u\n", pos & 0xf000 ); if (pos & 0x0f00) output( "\tadd x9, x9, #%u\n", pos & 0x0f00 ); if (pos & 0x00f0) output( "\tadd x9, x9, #%u\n", pos & 0x00f0 ); if (pos & 0x000f) output( "\tadd x9, x9, #%u\n", pos & 0x000f ); output( "\tldur x9, [x9, #0]\n" ); output( "\tbr x9\n" ); output( "1:\t.quad %s\n", table ); break; case CPU_POWERPC: output( "\tmr %s, %s\n", ppc_reg(0), ppc_reg(31) ); if (target_platform == PLATFORM_APPLE) { output( "\tlis %s, ha16(%s+%d+32768)\n", ppc_reg(31), table, pos ); output( "\tla %s, lo16(%s+%d)(%s)\n", ppc_reg(31), table, pos, ppc_reg(31) ); } else { output( "\tlis %s, (%s+%d+32768)@h\n", ppc_reg(31), table, pos ); output( "\tla %s, (%s+%d)@l(%s)\n", ppc_reg(31), table, pos, ppc_reg(31) ); } output( "\tlwz %s, 0(%s)\n", ppc_reg(31), ppc_reg(31) ); output( "\tmtctr %s\n", ppc_reg(31) ); output( "\tmr %s, %s\n", ppc_reg(31), ppc_reg(0) ); output( "\tbctr\n" ); break; } output_cfi( ".cfi_endproc" ); output_function_size( name ); }
/* output the delayed import thunks of a Win32 module */ static void output_delayed_import_thunks( const DLLSPEC *spec ) { int i, idx, j, pos, extra_stack_storage = 0; static const char delayed_import_loaders[] = "__wine_spec_delayed_import_loaders"; static const char delayed_import_thunks[] = "__wine_spec_delayed_import_thunks"; if (!nb_delayed) return; output( "\n/* delayed import thunks */\n\n" ); output( "\t.text\n" ); output( "\t.align %d\n", get_alignment(8) ); output( "%s:\n", asm_name(delayed_import_loaders)); output( "\t%s\n", func_declaration("__wine_delay_load_asm") ); output( "%s:\n", asm_name("__wine_delay_load_asm") ); output_cfi( ".cfi_startproc" ); switch(target_cpu) { case CPU_x86: output( "\tpushl %%ecx\n" ); output_cfi( ".cfi_adjust_cfa_offset 4" ); output( "\tpushl %%edx\n" ); output_cfi( ".cfi_adjust_cfa_offset 4" ); output( "\tpushl %%eax\n" ); output_cfi( ".cfi_adjust_cfa_offset 4" ); output( "\tcall %s\n", asm_name("__wine_spec_delay_load") ); output_cfi( ".cfi_adjust_cfa_offset -4" ); output( "\tpopl %%edx\n" ); output_cfi( ".cfi_adjust_cfa_offset -4" ); output( "\tpopl %%ecx\n" ); output_cfi( ".cfi_adjust_cfa_offset -4" ); output( "\tjmp *%%eax\n" ); break; case CPU_x86_64: output( "\tsubq $88,%%rsp\n" ); output_cfi( ".cfi_adjust_cfa_offset 88" ); output( "\tmovq %%rdx,80(%%rsp)\n" ); output( "\tmovq %%rcx,72(%%rsp)\n" ); output( "\tmovq %%r8,64(%%rsp)\n" ); output( "\tmovq %%r9,56(%%rsp)\n" ); output( "\tmovq %%r10,48(%%rsp)\n" ); output( "\tmovq %%r11,40(%%rsp)\n" ); output( "\tmovq %%rax,%%rcx\n" ); output( "\tcall %s\n", asm_name("__wine_spec_delay_load") ); output( "\tmovq 40(%%rsp),%%r11\n" ); output( "\tmovq 48(%%rsp),%%r10\n" ); output( "\tmovq 56(%%rsp),%%r9\n" ); output( "\tmovq 64(%%rsp),%%r8\n" ); output( "\tmovq 72(%%rsp),%%rcx\n" ); output( "\tmovq 80(%%rsp),%%rdx\n" ); output( "\taddq $88,%%rsp\n" ); output_cfi( ".cfi_adjust_cfa_offset -88" ); output( "\tjmp *%%rax\n" ); break; case CPU_SPARC: output( "\tsave %%sp, -96, %%sp\n" ); output( "\tcall %s\n", asm_name("__wine_spec_delay_load") ); output( "\tmov %%g1, %%o0\n" ); output( "\tjmp %%o0\n" ); output( "\trestore\n" ); break; case CPU_ARM: output( "\tstmfd SP!, {r4-r10,FP,LR}\n" ); output( "\tmov LR,PC\n"); output( "\tadd LR,LR,#8\n"); output( "\tldr PC,[PC,#-4]\n"); output( "\t.long %s\n", asm_name("__wine_spec_delay_load") ); output( "\tmov IP,r0\n"); output( "\tldmfd SP!, {r4-r10,FP,LR}\n" ); output( "\tldmfd SP!, {r0-r3}\n" ); output( "\tmov PC,IP\n"); break; case CPU_ARM64: output( "\tstp x29, x30, [sp,#-16]!\n" ); output( "\tmov x29, sp\n" ); output( "\tadr x9, 1f\n" ); output( "\tldur x9, [x9, #0]\n" ); output( "\tblr x9\n" ); output( "\tmov x9, x0\n" ); output( "\tldp x29, x30, [sp],#16\n" ); output( "\tldp x0, x1, [sp,#16]\n" ); output( "\tldp x2, x3, [sp,#32]\n" ); output( "\tldp x4, x5, [sp,#48]\n" ); output( "\tldp x6, x7, [sp],#80\n" ); output( "\tbr x9\n" ); /* or "ret x9" */ output( "1:\t.quad %s\n", asm_name("__wine_spec_delay_load") ); break; case CPU_POWERPC: if (target_platform == PLATFORM_APPLE) extra_stack_storage = 56; /* Save all callee saved registers into a stackframe. */ output( "\tstwu %s, -%d(%s)\n",ppc_reg(1), 48+extra_stack_storage, ppc_reg(1)); output( "\tstw %s, %d(%s)\n", ppc_reg(3), 4+extra_stack_storage, ppc_reg(1)); output( "\tstw %s, %d(%s)\n", ppc_reg(4), 8+extra_stack_storage, ppc_reg(1)); output( "\tstw %s, %d(%s)\n", ppc_reg(5), 12+extra_stack_storage, ppc_reg(1)); output( "\tstw %s, %d(%s)\n", ppc_reg(6), 16+extra_stack_storage, ppc_reg(1)); output( "\tstw %s, %d(%s)\n", ppc_reg(7), 20+extra_stack_storage, ppc_reg(1)); output( "\tstw %s, %d(%s)\n", ppc_reg(8), 24+extra_stack_storage, ppc_reg(1)); output( "\tstw %s, %d(%s)\n", ppc_reg(9), 28+extra_stack_storage, ppc_reg(1)); output( "\tstw %s, %d(%s)\n", ppc_reg(10),32+extra_stack_storage, ppc_reg(1)); output( "\tstw %s, %d(%s)\n", ppc_reg(11),36+extra_stack_storage, ppc_reg(1)); output( "\tstw %s, %d(%s)\n", ppc_reg(12),40+extra_stack_storage, ppc_reg(1)); /* r0 -> r3 (arg1) */ output( "\tmr %s, %s\n", ppc_reg(3), ppc_reg(0)); /* save return address */ output( "\tmflr %s\n", ppc_reg(0)); output( "\tstw %s, %d(%s)\n", ppc_reg(0), 44+extra_stack_storage, ppc_reg(1)); /* Call the __wine_delay_load function, arg1 is arg1. */ output( "\tbl %s\n", asm_name("__wine_spec_delay_load") ); /* Load return value from call into ctr register */ output( "\tmtctr %s\n", ppc_reg(3)); /* restore all saved registers and drop stackframe. */ output( "\tlwz %s, %d(%s)\n", ppc_reg(3), 4+extra_stack_storage, ppc_reg(1)); output( "\tlwz %s, %d(%s)\n", ppc_reg(4), 8+extra_stack_storage, ppc_reg(1)); output( "\tlwz %s, %d(%s)\n", ppc_reg(5), 12+extra_stack_storage, ppc_reg(1)); output( "\tlwz %s, %d(%s)\n", ppc_reg(6), 16+extra_stack_storage, ppc_reg(1)); output( "\tlwz %s, %d(%s)\n", ppc_reg(7), 20+extra_stack_storage, ppc_reg(1)); output( "\tlwz %s, %d(%s)\n", ppc_reg(8), 24+extra_stack_storage, ppc_reg(1)); output( "\tlwz %s, %d(%s)\n", ppc_reg(9), 28+extra_stack_storage, ppc_reg(1)); output( "\tlwz %s, %d(%s)\n", ppc_reg(10),32+extra_stack_storage, ppc_reg(1)); output( "\tlwz %s, %d(%s)\n", ppc_reg(11),36+extra_stack_storage, ppc_reg(1)); output( "\tlwz %s, %d(%s)\n", ppc_reg(12),40+extra_stack_storage, ppc_reg(1)); /* Load return value from call into return register */ output( "\tlwz %s, %d(%s)\n", ppc_reg(0), 44+extra_stack_storage, ppc_reg(1)); output( "\tmtlr %s\n", ppc_reg(0)); output( "\taddi %s, %s, %d\n", ppc_reg(1), ppc_reg(1), 48+extra_stack_storage); /* branch to ctr register. */ output( "\tbctr\n"); break; } output_cfi( ".cfi_endproc" ); output_function_size( "__wine_delay_load_asm" ); output( "\n" ); for (i = idx = 0; i < nb_imports; i++) { if (!dll_imports[i]->delay) continue; for (j = 0; j < dll_imports[i]->nb_imports; j++) { ORDDEF *odp = dll_imports[i]->imports[j]; const char *name = odp->name ? odp->name : odp->export_name; output( ".L__wine_delay_imp_%d_%s:\n", i, name ); output_cfi( ".cfi_startproc" ); switch(target_cpu) { case CPU_x86: output( "\tmovl $%d, %%eax\n", (idx << 16) | j ); output( "\tjmp %s\n", asm_name("__wine_delay_load_asm") ); break; case CPU_x86_64: output( "\tmovq $%d,%%rax\n", (idx << 16) | j ); output( "\tjmp %s\n", asm_name("__wine_delay_load_asm") ); break; case CPU_SPARC: output( "\tset %d, %%g1\n", (idx << 16) | j ); output( "\tb,a %s\n", asm_name("__wine_delay_load_asm") ); output( "\tnop\n" ); break; case CPU_ARM: output( "\tstmfd SP!, {r0-r3}\n" ); output( "\tmov r0, #%d\n", idx ); output( "\tmov r1, #16384\n" ); output( "\tmul r1, r0, r1\n" ); output( "\tmov r0, r1\n" ); output( "\tmov r1, #4\n" ); output( "\tmul r1, r0, r1\n" ); output( "\tmov r0, r1\n" ); output( "\tadd r0, #%d\n", j ); output( "\tldr PC,[PC,#-4]\n"); output( "\t.long %s\n", asm_name("__wine_delay_load_asm") ); break; case CPU_ARM64: output( "\tstp x6, x7, [sp,#-80]!\n" ); output( "\tstp x4, x5, [sp,#48]\n" ); output( "\tstp x2, x3, [sp,#32]\n" ); output( "\tstp x0, x1, [sp,#16]\n" ); output( "\tmov x0, #%d\n", idx ); output( "\tmov x1, #16384\n" ); output( "\tmul x1, x0, x1\n" ); output( "\tmov x0, x1\n" ); output( "\tmov x1, #4\n" ); output( "\tmul x1, x0, x1\n" ); output( "\tmov x0, x1\n" ); output( "\tadd x0, x0, #%d\n", j ); output( "\tadr x9, 1f\n" ); output( "\tldur x9, [x9, #0]\n" ); output( "\tbr x9\n" ); output( "1:\t.quad %s\n", asm_name("__wine_delay_load_asm") ); break; case CPU_POWERPC: switch(target_platform) { case PLATFORM_APPLE: /* On Darwin we can use r0 and r2 */ /* Upper part in r2 */ output( "\tlis %s, %d\n", ppc_reg(2), idx); /* Lower part + r2 -> r0, Note we can't use r0 directly */ output( "\taddi %s, %s, %d\n", ppc_reg(0), ppc_reg(2), j); output( "\tb %s\n", asm_name("__wine_delay_load_asm") ); break; default: /* On linux we can't use r2 since r2 is not a scratch register (hold the TOC) */ /* Save r13 on the stack */ output( "\taddi %s, %s, -0x4\n", ppc_reg(1), ppc_reg(1)); output( "\tstw %s, 0(%s)\n", ppc_reg(13), ppc_reg(1)); /* Upper part in r13 */ output( "\tlis %s, %d\n", ppc_reg(13), idx); /* Lower part + r13 -> r0, Note we can't use r0 directly */ output( "\taddi %s, %s, %d\n", ppc_reg(0), ppc_reg(13), j); /* Restore r13 */ output( "\tstw %s, 0(%s)\n", ppc_reg(13), ppc_reg(1)); output( "\taddic %s, %s, 0x4\n", ppc_reg(1), ppc_reg(1)); output( "\tb %s\n", asm_name("__wine_delay_load_asm") ); break; } break; } output_cfi( ".cfi_endproc" ); } idx++; } output_function_size( delayed_import_loaders ); output( "\n\t.align %d\n", get_alignment(get_ptr_size()) ); output( "%s:\n", asm_name(delayed_import_thunks)); for (i = pos = 0; i < nb_imports; i++) { if (!dll_imports[i]->delay) continue; for (j = 0; j < dll_imports[i]->nb_imports; j++, pos += get_ptr_size()) { ORDDEF *odp = dll_imports[i]->imports[j]; output_import_thunk( odp->name ? odp->name : odp->export_name, ".L__wine_delay_IAT", pos ); } } output_function_size( delayed_import_thunks ); }
/* output a single import thunk */ static void output_import_thunk( const char *name, const char *table, int pos ) { output( "\n\t.align %d\n", get_alignment(4) ); output( "\t%s\n", func_declaration(name) ); output( "%s\n", asm_globl(name) ); output_cfi( ".cfi_startproc" ); switch(target_cpu) { case CPU_x86: if (!UsePIC) { output( "\tjmp *(%s+%d)\n", table, pos ); } else { output( "\tcall %s\n", asm_name("__wine_spec_get_pc_thunk_eax") ); output( "1:\tjmp *%s+%d-1b(%%eax)\n", table, pos ); } break; case CPU_x86_64: output( "\tjmpq *%s+%d(%%rip)\n", table, pos ); break; case CPU_ARM: output( "\tldr IP,1f\n"); output( "\tldr PC,[PC,IP]\n" ); output( "1:\t.long %s+%u-(1b+4)\n", table, pos ); break; case CPU_ARM64: output( "\tadr x9, 1f\n" ); output( "\tldur x9, [x9, #0]\n" ); if (pos & 0xf000) output( "\tadd x9, x9, #%u\n", pos & 0xf000 ); if (pos & 0x0f00) output( "\tadd x9, x9, #%u\n", pos & 0x0f00 ); if (pos & 0x00f0) output( "\tadd x9, x9, #%u\n", pos & 0x00f0 ); if (pos & 0x000f) output( "\tadd x9, x9, #%u\n", pos & 0x000f ); output( "\tldur x9, [x9, #0]\n" ); output( "\tbr x9\n" ); output( "1:\t.quad %s\n", table ); break; case CPU_POWERPC: output( "\tmr %s, %s\n", ppc_reg(0), ppc_reg(31) ); if (target_platform == PLATFORM_APPLE) { output( "\tlis %s, ha16(%s+%d+32768)\n", ppc_reg(31), table, pos ); output( "\tla %s, lo16(%s+%d)(%s)\n", ppc_reg(31), table, pos, ppc_reg(31) ); } else { output( "\tlis %s, (%s+%d+32768)@h\n", ppc_reg(31), table, pos ); output( "\tla %s, (%s+%d)@l(%s)\n", ppc_reg(31), table, pos, ppc_reg(31) ); } output( "\tlwz %s, 0(%s)\n", ppc_reg(31), ppc_reg(31) ); output( "\tmtctr %s\n", ppc_reg(31) ); output( "\tmr %s, %s\n", ppc_reg(31), ppc_reg(0) ); output( "\tbctr\n" ); break; } output_cfi( ".cfi_endproc" ); output_function_size( name ); }