Пример #1
0
/*******************************************************************
 *         BuildPendingEventCheck
 *
 * Build a function that checks whether there are any
 * pending DPMI events.
 *
 * Stack layout:
 *   
 * (sp+12) long   eflags
 * (sp+6)  long   cs
 * (sp+2)  long   ip
 * (sp)    word   fs
 *
 * On entry to function, fs register points to a valid TEB.
 * On exit from function, stack will be popped.
 */
static void BuildPendingEventCheck(void)
{
    /* Function header */

    function_header( "DPMI_PendingEventCheck" );

    /* Check for pending events. */

    output( "\t.byte 0x64\n\ttestl $0xffffffff,(%d)\n",
            STRUCTOFFSET(TEB,GdiTebBatch) + STRUCTOFFSET(WINE_VM86_TEB_INFO,vm86_pending) );
    output( "\tje %s\n", asm_name("DPMI_PendingEventCheck_Cleanup") );
    output( "\t.byte 0x64\n\ttestl $0xffffffff,(%d)\n",
            STRUCTOFFSET(TEB,GdiTebBatch) + STRUCTOFFSET(WINE_VM86_TEB_INFO,dpmi_vif) );
    output( "\tje %s\n", asm_name("DPMI_PendingEventCheck_Cleanup") );

    /* Process pending events. */

    output( "\tsti\n" );

    /* Start cleanup. Restore fs register. */

    output( "%s\n", asm_globl("DPMI_PendingEventCheck_Cleanup") );
    output( "\tpopw %%fs\n" );

    /* Return from function. */

    output( "%s\n", asm_globl("DPMI_PendingEventCheck_Return") );
    output( "\tiret\n" );

    output_function_size( "DPMI_PendingEventCheck" );
}
Пример #2
0
/* output the import thunks of a Win32 module */
static void output_immediate_import_thunks(void)
{
    int i, j, pos;
    int nb_imm = nb_imports - nb_delayed;
    static const char import_thunks[] = "__wine_spec_import_thunks";

    if (!nb_imm) return;

    output( "\n/* immediate import thunks */\n\n" );
    output( "\t.text\n" );
    output( "\t.align %d\n", get_alignment(8) );
    output( "%s:\n", asm_name(import_thunks));

    for (i = pos = 0; i < nb_imports; i++)
    {
        if (dll_imports[i]->delay) continue;
        for (j = 0; j < dll_imports[i]->nb_imports; j++, pos += get_ptr_size())
        {
            ORDDEF *odp = dll_imports[i]->imports[j];
            output_import_thunk( odp->name ? odp->name : odp->export_name,
                                 ".L__wine_spec_import_data_ptrs", pos );
        }
        pos += get_ptr_size();
    }
    output_function_size( import_thunks );
}
Пример #3
0
/*******************************************************************
 *         BuildRet16Func
 *
 * Build the return code for 16-bit callbacks
 */
static void BuildRet16Func(void)
{
    function_header( "__wine_call_to_16_ret" );

    /* Save %esp into %esi */
    output( "\tmovl %%esp,%%esi\n" );

    /* Restore 32-bit segment registers */

    output( "\t.byte 0x2e\n\tmovl %s", asm_name("CallTo16_DataSelector") );
    output( "-%s,%%edi\n", asm_name("__wine_call16_start") );
    output( "\tmovw %%di,%%ds\n" );
    output( "\tmovw %%di,%%es\n" );

    output( "\t.byte 0x2e\n\tmov %s", asm_name("CallTo16_TebSelector") );
    output( "-%s,%%fs\n", asm_name("__wine_call16_start") );

    output( "\t.byte 0x64\n\tmov (%d),%%gs\n", GS_OFFSET );

    /* Restore the 32-bit stack */

    output( "\tmovw %%di,%%ss\n" );
    output( "\t.byte 0x64\n\tmovl (%d),%%esp\n", STACKOFFSET );

    /* Return to caller */

    output( "\tlret\n" );
    output_function_size( "__wine_call_to_16_ret" );
}
Пример #4
0
/* output import stubs for exported entry points that link to external symbols */
static void output_external_link_imports( DLLSPEC *spec )
{
    unsigned int i, pos;

    if (!ext_link_imports.count) return;  /* nothing to do */

    sort_names( &ext_link_imports );

    /* get rid of duplicate names */
    for (i = 1; i < ext_link_imports.count; i++)
    {
        if (!strcmp( ext_link_imports.names[i-1], ext_link_imports.names[i] ))
            remove_name( &ext_link_imports, i-- );
    }

    output( "\n/* external link thunks */\n\n" );
    output( "\t.data\n" );
    output( "\t.align %d\n", get_alignment(get_ptr_size()) );
    output( ".L__wine_spec_external_links:\n" );
    for (i = 0; i < ext_link_imports.count; i++)
        output( "\t%s %s\n", get_asm_ptr_keyword(), asm_name(ext_link_imports.names[i]) );

    output( "\n\t.text\n" );
    output( "\t.align %d\n", get_alignment(get_ptr_size()) );
    output( "%s:\n", asm_name("__wine_spec_external_link_thunks") );

    for (i = pos = 0; i < ext_link_imports.count; i++)
    {
        char *buffer = strmake( "__wine_spec_ext_link_%s", ext_link_imports.names[i] );
        output_import_thunk( buffer, ".L__wine_spec_external_links", pos );
        free( buffer );
        pos += get_ptr_size();
    }
    output_function_size( "__wine_spec_external_link_thunks" );
}
Пример #5
0
/* output a single import thunk */
static void output_import_thunk( const char *name, const char *table, int pos )
{
    output( "\n\t.align %d\n", get_alignment(4) );
    output( "\t%s\n", func_declaration(name) );
    output( "%s\n", asm_globl(name) );
    output_cfi( ".cfi_startproc" );

    switch(target_cpu)
    {
    case CPU_x86:
        if (!UsePIC)
        {
            output( "\tjmp *(%s+%d)\n", table, pos );
        }
        else
        {
            output( "\tcall %s\n", asm_name("__wine_spec_get_pc_thunk_eax") );
            output( "1:\tjmp *%s+%d-1b(%%eax)\n", table, pos );
        }
        break;
    case CPU_x86_64:
        output( "\tjmpq *%s+%d(%%rip)\n", table, pos );
        break;
    case CPU_ARM:
        output( "\tldr IP,1f\n");
        output( "\tldr PC,[PC,IP]\n" );
        output( "1:\t.long %s+%u-(1b+4)\n", table, pos );
        break;
    case CPU_ARM64:
        output( "\tadr x9, 1f\n" );
        output( "\tldur x9, [x9, #0]\n" );
        if (pos & 0xf000) output( "\tadd x9, x9, #%u\n", pos & 0xf000 );
        if (pos & 0x0f00) output( "\tadd x9, x9, #%u\n", pos & 0x0f00 );
        if (pos & 0x00f0) output( "\tadd x9, x9, #%u\n", pos & 0x00f0 );
        if (pos & 0x000f) output( "\tadd x9, x9, #%u\n", pos & 0x000f );
        output( "\tldur x9, [x9, #0]\n" );
        output( "\tbr x9\n" );
        output( "1:\t.quad %s\n", table );
        break;
    case CPU_POWERPC:
        output( "\tmr %s, %s\n", ppc_reg(0), ppc_reg(31) );
        if (target_platform == PLATFORM_APPLE)
        {
            output( "\tlis %s, ha16(%s+%d+32768)\n", ppc_reg(31), table, pos );
            output( "\tla  %s, lo16(%s+%d)(%s)\n", ppc_reg(31), table, pos, ppc_reg(31) );
        }
        else
        {
            output( "\tlis %s, (%s+%d+32768)@h\n", ppc_reg(31), table, pos );
            output( "\tla  %s, (%s+%d)@l(%s)\n", ppc_reg(31), table, pos, ppc_reg(31) );
        }
        output( "\tlwz   %s, 0(%s)\n", ppc_reg(31), ppc_reg(31) );
        output( "\tmtctr %s\n", ppc_reg(31) );
        output( "\tmr    %s, %s\n", ppc_reg(31), ppc_reg(0) );
        output( "\tbctr\n" );
        break;
    }
    output_cfi( ".cfi_endproc" );
    output_function_size( name );
}
Пример #6
0
/*******************************************************************
 *         BuildRelays16
 *
 * Build all the 16-bit relay callbacks
 */
void BuildRelays16(void)
{
    if (target_cpu != CPU_x86)
    {
        output( "/* File not used with this architecture. Do not edit! */\n\n" );
        return;
    }

    /* File header */

    output( "/* File generated automatically. Do not edit! */\n\n" );
    output( "\t.text\n" );

    output( "%s:\n\n", asm_name("__wine_spec_thunk_text_16") );

    output( "%s\n", asm_globl("__wine_call16_start") );

    /* Standard CallFrom16 routine */
    BuildCallFrom16Core( FALSE, FALSE );

    /* Register CallFrom16 routine */
    BuildCallFrom16Core( TRUE, FALSE );

    /* C16ThkSL CallFrom16 routine */
    BuildCallFrom16Core( FALSE, TRUE );

    /* Standard CallTo16 routine */
    BuildCallTo16Core( 0 );

    /* Register CallTo16 routine */
    BuildCallTo16Core( 1 );

    /* Standard CallTo16 return stub */
    BuildRet16Func();

    /* CBClientThunkSL routine */
    BuildCallTo32CBClient( FALSE );

    /* CBClientThunkSLEx routine */
    BuildCallTo32CBClient( TRUE  );

    /* Pending DPMI events check stub */
    BuildPendingEventCheck();

    output( "%s\n", asm_globl("__wine_call16_end") );
    output_function_size( "__wine_spec_thunk_text_16" );

    /* Declare the return address and data selector variables */
    output( "\n\t.data\n\t.align %d\n", get_alignment(4) );
    output( "%s\n\t.long 0\n", asm_globl("CallTo16_DataSelector") );
    output( "%s\n\t.long 0\n", asm_globl("CallTo16_TebSelector") );
    if (UsePIC) output( "wine_ldt_copy_ptr:\t.long %s\n", asm_name("wine_ldt_copy") );
    output_gnu_stack_note();
}
Пример #7
0
/* output the get_pc thunk if needed */
void output_get_pc_thunk(void)
{
    if (target_cpu != CPU_x86) return;
    if (!UsePIC) return;
    output( "\n\t.text\n" );
    output( "\t.align %d\n", get_alignment(4) );
    output( "\t%s\n", func_declaration("__wine_spec_get_pc_thunk_eax") );
    output( "%s:\n", asm_name("__wine_spec_get_pc_thunk_eax") );
    output_cfi( ".cfi_startproc" );
    output( "\tmovl (%%esp),%%eax\n" );
    output( "\tret\n" );
    output_cfi( ".cfi_endproc" );
    output_function_size( "__wine_spec_get_pc_thunk_eax" );
}
Пример #8
0
/*******************************************************************
 *         BuildRelays32
 *
 * Build all the 32-bit relay callbacks
 */
void BuildRelays32(void)
{
    if (target_cpu != CPU_x86)
    {
        output( "/* File not used with this architecture. Do not edit! */\n\n" );
        return;
    }

    /* File header */

    output( "/* File generated automatically. Do not edit! */\n\n" );
    output( "\t.text\n" );
    output( "%s:\n\n", asm_name("__wine_spec_thunk_text_32") );

    /* 32-bit register entry point */
    BuildCallFrom32Regs();

    output_function_size( "__wine_spec_thunk_text_32" );
    output_gnu_stack_note();
}
Пример #9
0
/*******************************************************************
 *         output_asm_relays16
 *
 * Build all the 16-bit relay callbacks
 */
void output_asm_relays16(void)
{
    /* File header */

    output( "\t.text\n" );
    output( "%s:\n\n", asm_name("__wine_spec_thunk_text_16") );

    output( "%s\n", asm_globl("__wine_call16_start") );

    /* Standard CallFrom16 routine */
    BuildCallFrom16Core( 0, 0 );

    /* Register CallFrom16 routine */
    BuildCallFrom16Core( 1, 0 );

    /* C16ThkSL CallFrom16 routine */
    BuildCallFrom16Core( 0, 1 );

    /* Standard CallTo16 routine */
    BuildCallTo16Core( 0 );

    /* Register CallTo16 routine */
    BuildCallTo16Core( 1 );

    /* Standard CallTo16 return stub */
    BuildRet16Func();

    /* CBClientThunkSL routine */
    BuildCallTo32CBClient( 0 );

    /* CBClientThunkSLEx routine */
    BuildCallTo32CBClient( 1  );

    output( "%s\n", asm_globl("__wine_call16_end") );
    output_function_size( "__wine_spec_thunk_text_16" );

    /* Declare the return address and data selector variables */
    output( "\n\t.data\n\t.align %d\n", get_alignment(4) );
    output( "%s\n\t.long 0\n", asm_globl("CallTo16_DataSelector") );
    output( "%s\n\t.long 0\n", asm_globl("CallTo16_TebSelector") );
}
Пример #10
0
/*******************************************************************
 *         BuildCallTo16Core
 *
 * This routine builds the core routines used in 32->16 thunks:
 *
 * extern DWORD WINAPI wine_call_to_16( FARPROC16 target, DWORD cbArgs, PEXCEPTION_HANDLER handler );
 * extern void WINAPI wine_call_to_16_regs( CONTEXT86 *context, DWORD cbArgs, PEXCEPTION_HANDLER handler );
 *
 * These routines can be called directly from 32-bit code.
 *
 * All routines expect that the 16-bit stack contents (arguments) and the
 * return address (segptr to CallTo16_Ret) were already set up by the
 * caller; nb_args must contain the number of bytes to be conserved.  The
 * 16-bit SS:SP will be set accordingly.
 *
 * All other registers are either taken from the CONTEXT86 structure
 * or else set to default values.  The target routine address is either
 * given directly or taken from the CONTEXT86.
 */
static void BuildCallTo16Core( int reg_func )
{
    const char *name = reg_func ? "wine_call_to_16_regs" : "wine_call_to_16";

    /* Function header */
    function_header( name );

    /* Function entry sequence */
    output_cfi( ".cfi_startproc" );
    output( "\tpushl %%ebp\n" );
    output_cfi( ".cfi_adjust_cfa_offset 4" );
    output_cfi( ".cfi_rel_offset %%ebp,0" );
    output( "\tmovl %%esp, %%ebp\n" );
    output_cfi( ".cfi_def_cfa_register %%ebp" );

    /* Save the 32-bit registers */
    output( "\tpushl %%ebx\n" );
    output_cfi( ".cfi_rel_offset %%ebx,-4" );
    output( "\tpushl %%esi\n" );
    output_cfi( ".cfi_rel_offset %%esi,-8" );
    output( "\tpushl %%edi\n" );
    output_cfi( ".cfi_rel_offset %%edi,-12" );
    output( "\t.byte 0x64\n\tmov %%gs,(%d)\n", GS_OFFSET );

    /* Setup exception frame */
    output( "\t.byte 0x64\n\tpushl (%d)\n", STACKOFFSET );
    output( "\tpushl 16(%%ebp)\n" ); /* handler */
    output( "\t.byte 0x64\n\tpushl (0)\n" );
    output( "\t.byte 0x64\n\tmovl %%esp,(0)\n" );

    /* Call the actual CallTo16 routine (simulate a lcall) */
    output( "\tpushl %%cs\n" );
    output( "\tcall .L%s\n", name );

    /* Remove exception frame */
    output( "\t.byte 0x64\n\tpopl (0)\n" );
    output( "\taddl $4, %%esp\n" );
    output( "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );

    if ( !reg_func )
    {
        /* Convert return value */
        output( "\tandl $0xffff,%%eax\n" );
        output( "\tshll $16,%%edx\n" );
        output( "\torl %%edx,%%eax\n" );
    }
    else
    {
        /*
         * Modify CONTEXT86 structure to contain new values
         *
         * NOTE:  We restore only EAX, EBX, EDX, EDX, EBP, and ESP.
         *        The segment registers as well as ESI and EDI should
         *        not be modified by a well-behaved 16-bit routine in
         *        any case.  [If necessary, we could restore them as well,
         *        at the cost of a somewhat less efficient return path.]
         */

        output( "\tmovl 0x14(%%esp),%%edi\n" ); /* FIELD_OFFSET(STACK32FRAME,target) - FIELD_OFFSET(STACK32FRAME,edi) */
                /* everything above edi has been popped already */

        output( "\tmovl %%eax,0xb0(%%edi)\n");  /* Eax */
        output( "\tmovl %%ebx,0xa4(%%edi)\n");  /* Ebx */
        output( "\tmovl %%ecx,0xac(%%edi)\n");  /* Ecx */
        output( "\tmovl %%edx,0xa8(%%edi)\n");  /* Edx */
        output( "\tmovl %%ebp,0xb4(%%edi)\n");  /* Ebp */
        output( "\tmovl %%esi,0xc4(%%edi)\n");  /* Esp */
                 /* The return glue code saved %esp into %esi */
    }

    /* Restore the 32-bit registers */
    output( "\tpopl %%edi\n" );
    output_cfi( ".cfi_same_value %%edi" );
    output( "\tpopl %%esi\n" );
    output_cfi( ".cfi_same_value %%esi" );
    output( "\tpopl %%ebx\n" );
    output_cfi( ".cfi_same_value %%ebx" );

    /* Function exit sequence */
    output( "\tpopl %%ebp\n" );
    output_cfi( ".cfi_def_cfa %%esp,4" );
    output_cfi( ".cfi_same_value %%ebp" );
    output( "\tret $12\n" );
    output_cfi( ".cfi_endproc" );


    /* Start of the actual CallTo16 routine */

    output( ".L%s:\n", name );

    /* Switch to the 16-bit stack */
    output( "\tmovl %%esp,%%edx\n" );
    output( "\t.byte 0x64\n\tmovw (%d),%%ss\n", STACKOFFSET + 2);
    output( "\t.byte 0x64\n\tmovw (%d),%%sp\n", STACKOFFSET );
    output( "\t.byte 0x64\n\tmovl %%edx,(%d)\n", STACKOFFSET );

    /* Make %bp point to the previous stackframe (built by CallFrom16) */
    output( "\tmovzwl %%sp,%%ebp\n" );
    output( "\tleal 0x2a(%%ebp),%%ebp\n");  /* FIELD_OFFSET(STACK16FRAME,bp) */

    /* Add the specified offset to the new sp */
    output( "\tsubw 0x2c(%%edx), %%sp\n");  /* FIELD_OFFSET(STACK32FRAME,nb_args) */

    if (reg_func)
    {
        /* Push the called routine address */
        output( "\tmovl 0x28(%%edx),%%edx\n");  /* FIELD_OFFSET(STACK32FRAME,target) */
        output( "\tpushw 0xbc(%%edx)\n");  /* SegCs */
        output( "\tpushw 0xb8(%%edx)\n");  /* Eip */

        /* Get the registers */
        output( "\tpushw 0x98(%%edx)\n");  /* SegDs */
        output( "\tpushl 0x94(%%edx)\n");  /* SegEs */
        output( "\tpopl %%es\n" );
        output( "\tpushl 0x90(%%edx)\n");  /* SegFs */
        output( "\tpopl %%fs\n" );
        output( "\tpushl 0x8c(%%edx)\n");  /* SegGs */
        output( "\tpopl %%gs\n" );
        output( "\tmovl 0xb4(%%edx),%%ebp\n");  /* Ebp */
        output( "\tmovl 0xa0(%%edx),%%esi\n");  /* Esi */
        output( "\tmovl 0x9c(%%edx),%%edi\n");  /* Edi */
        output( "\tmovl 0xb0(%%edx),%%eax\n");  /* Eax */
        output( "\tmovl 0xa4(%%edx),%%ebx\n");  /* Ebx */
        output( "\tmovl 0xac(%%edx),%%ecx\n");  /* Ecx */
        output( "\tmovl 0xa8(%%edx),%%edx\n");  /* Edx */

        /* Get the 16-bit ds */
        output( "\tpopw %%ds\n" );
    }
    else  /* not a register function */
    {
        /* Push the called routine address */
        output( "\tpushl 0x28(%%edx)\n"); /* FIELD_OFFSET(STACK32FRAME,target) */

        /* Set %fs and %gs to the value saved by the last CallFrom16 */
        output( "\tpushw -22(%%ebp)\n" ); /* FIELD_OFFSET(STACK16FRAME,fs)-FIELD_OFFSET(STACK16FRAME,bp) */
        output( "\tpopw %%fs\n" );
        output( "\tpushw -20(%%ebp)\n" ); /* FIELD_OFFSET(STACK16FRAME,gs)-FIELD_OFFSET(STACK16FRAME,bp) */
        output( "\tpopw %%gs\n" );

        /* Set %ds and %es (and %ax just in case) equal to %ss */
        output( "\tmovw %%ss,%%ax\n" );
        output( "\tmovw %%ax,%%ds\n" );
        output( "\tmovw %%ax,%%es\n" );
    }

    /* Jump to the called routine */
    output( "\t.byte 0x66\n" );
    output( "\tlret\n" );

    /* Function footer */
    output_function_size( name );
}
Пример #11
0
/* output a single import thunk */
static void output_import_thunk( const char *name, const char *table, int pos )
{
    output( "\n\t.align %d\n", get_alignment(4) );
    output( "\t%s\n", func_declaration(name) );
    output( "%s\n", asm_globl(name) );
    output_cfi( ".cfi_startproc" );

    switch(target_cpu)
    {
    case CPU_x86:
        if (!UsePIC)
        {
            output( "\tjmp *(%s+%d)\n", table, pos );
        }
        else
        {
            output( "\tcall %s\n", asm_name("__wine_spec_get_pc_thunk_eax") );
            output( "1:\tjmp *%s+%d-1b(%%eax)\n", table, pos );
        }
        break;
    case CPU_x86_64:
        output( "\tjmpq *%s+%d(%%rip)\n", table, pos );
        break;
    case CPU_SPARC:
        if ( !UsePIC )
        {
            output( "\tsethi %%hi(%s+%d), %%g1\n", table, pos );
            output( "\tld [%%g1+%%lo(%s+%d)], %%g1\n", table, pos );
            output( "\tjmp %%g1\n" );
            output( "\tnop\n" );
        }
        else
        {
            /* Hmpf.  Stupid sparc assembler always interprets global variable
               names as GOT offsets, so we have to do it the long way ... */
            output( "\tsave %%sp, -96, %%sp\n" );
            output( "0:\tcall 1f\n" );
            output( "\tnop\n" );
            output( "1:\tsethi %%hi(%s+%d-0b), %%g1\n", table, pos );
            output( "\tor %%g1, %%lo(%s+%d-0b), %%g1\n", table, pos );
            output( "\tld [%%g1+%%o7], %%g1\n" );
            output( "\tjmp %%g1\n" );
            output( "\trestore\n" );
        }
        break;
    case CPU_ARM:
        output( "\tldr IP,[PC,#0]\n");
        output( "\tldr PC,[IP,#%d]\n", pos);
        output( "\t.long %s\n", table );
        break;
    case CPU_ARM64:
        output( "\tadr x9, 1f\n" );
        output( "\tldur x9, [x9, #0]\n" );
        if (pos & 0xf000) output( "\tadd x9, x9, #%u\n", pos & 0xf000 );
        if (pos & 0x0f00) output( "\tadd x9, x9, #%u\n", pos & 0x0f00 );
        if (pos & 0x00f0) output( "\tadd x9, x9, #%u\n", pos & 0x00f0 );
        if (pos & 0x000f) output( "\tadd x9, x9, #%u\n", pos & 0x000f );
        output( "\tldur x9, [x9, #0]\n" );
        output( "\tbr x9\n" );
        output( "1:\t.quad %s\n", table );
        break;
    case CPU_POWERPC:
        output( "\tmr %s, %s\n", ppc_reg(0), ppc_reg(31) );
        if (target_platform == PLATFORM_APPLE)
        {
            output( "\tlis %s, ha16(%s+%d+32768)\n", ppc_reg(31), table, pos );
            output( "\tla  %s, lo16(%s+%d)(%s)\n", ppc_reg(31), table, pos, ppc_reg(31) );
        }
        else
        {
            output( "\tlis %s, (%s+%d+32768)@h\n", ppc_reg(31), table, pos );
            output( "\tla  %s, (%s+%d)@l(%s)\n", ppc_reg(31), table, pos, ppc_reg(31) );
        }
        output( "\tlwz   %s, 0(%s)\n", ppc_reg(31), ppc_reg(31) );
        output( "\tmtctr %s\n", ppc_reg(31) );
        output( "\tmr    %s, %s\n", ppc_reg(31), ppc_reg(0) );
        output( "\tbctr\n" );
        break;
    }
    output_cfi( ".cfi_endproc" );
    output_function_size( name );
}
Пример #12
0
/*******************************************************************
 *         output_stubs
 *
 * Output the functions for stub entry points
 */
void output_stubs( DLLSPEC *spec )
{
    const char *name, *exp_name;
    int i, count;

    if (!has_stubs( spec )) return;

    output( "\n/* stub functions */\n\n" );
    output( "\t.text\n" );

    for (i = count = 0; i < spec->nb_entry_points; i++)
    {
        ORDDEF *odp = &spec->entry_points[i];
        if (odp->type != TYPE_STUB) continue;

        name = get_stub_name( odp, spec );
        exp_name = odp->name ? odp->name : odp->export_name;
        output( "\t.align %d\n", get_alignment(4) );
        output( "\t%s\n", func_declaration(name) );
        output( "%s:\n", asm_name(name) );
        output_cfi( ".cfi_startproc" );

        switch (target_cpu)
        {
        case CPU_x86:
            /* flesh out the stub a bit to make safedisc happy */
            output(" \tnop\n" );
            output(" \tnop\n" );
            output(" \tnop\n" );
            output(" \tnop\n" );
            output(" \tnop\n" );
            output(" \tnop\n" );
            output(" \tnop\n" );
            output(" \tnop\n" );
            output(" \tnop\n" );

            output( "\tsubl $12,%%esp\n" );
            output_cfi( ".cfi_adjust_cfa_offset 12" );
            if (UsePIC)
            {
                output( "\tcall %s\n", asm_name("__wine_spec_get_pc_thunk_eax") );
                output( "1:" );
                if (exp_name)
                {
                    output( "\tleal .L%s_string-1b(%%eax),%%ecx\n", name );
                    output( "\tmovl %%ecx,4(%%esp)\n" );
                    count++;
                }
                else
                    output( "\tmovl $%d,4(%%esp)\n", odp->ordinal );
                output( "\tleal .L__wine_spec_file_name-1b(%%eax),%%ecx\n" );
                output( "\tmovl %%ecx,(%%esp)\n" );
            }
            else
            {
                if (exp_name)
                {
                    output( "\tmovl $.L%s_string,4(%%esp)\n", name );
                    count++;
                }
                else
                    output( "\tmovl $%d,4(%%esp)\n", odp->ordinal );
                output( "\tmovl $.L__wine_spec_file_name,(%%esp)\n" );
            }
            output( "\tcall %s\n", asm_name("__wine_spec_unimplemented_stub") );
            break;
        case CPU_x86_64:
            output( "\tsubq $8,%%rsp\n" );
            output_cfi( ".cfi_adjust_cfa_offset 8" );
            output( "\tleaq .L__wine_spec_file_name(%%rip),%%rdi\n" );
            if (exp_name)
            {
                output( "leaq .L%s_string(%%rip),%%rsi\n", name );
                count++;
            }
            else
                output( "\tmovq $%d,%%rsi\n", odp->ordinal );
            output( "\tcall %s\n", asm_name("__wine_spec_unimplemented_stub") );
            break;
        case CPU_ARM:
            output( "\tldr r0,[PC,#0]\n");
            output( "\tmov PC,PC\n");
            output( "\t.long .L__wine_spec_file_name\n" );
            output( "\tldr r1,[PC,#0]\n");
            output( "\tmov PC,PC\n");
            if (exp_name)
            {
                output( "\t.long .L%s_string\n", name );
                count++;
            }
            else
                output( "\t.long %d\n", odp->ordinal );
            output( "\tbl %s\n", asm_name("__wine_spec_unimplemented_stub") );
            break;
        default:
            assert(0);
        }
        output_cfi( ".cfi_endproc" );
        output_function_size( name );
    }

    if (count)
    {
        output( "\t%s\n", get_asm_string_section() );
        for (i = 0; i < spec->nb_entry_points; i++)
        {
            ORDDEF *odp = &spec->entry_points[i];
            if (odp->type != TYPE_STUB) continue;
            exp_name = odp->name ? odp->name : odp->export_name;
            if (exp_name)
            {
                name = get_stub_name( odp, spec );
                output( ".L%s_string:\n", name );
                output( "\t%s \"%s\"\n", get_asm_string_keyword(), exp_name );
            }
        }
    }
}
Пример #13
0
/*******************************************************************
 *         BuildCallTo32CBClient
 *
 * Call a CBClient relay stub from 32-bit code (KERNEL.620).
 *
 * Since the relay stub is itself 32-bit, this should not be a problem;
 * unfortunately, the relay stubs are expected to switch back to a
 * 16-bit stack (and 16-bit code) after completion :-(
 *
 * This would conflict with our 16- vs. 32-bit stack handling, so
 * we simply switch *back* to our 32-bit stack before returning to
 * the caller ...
 *
 * The CBClient relay stub expects to be called with the following
 * 16-bit stack layout, and with ebp and ebx pointing into the 16-bit
 * stack at the designated places:
 *
 *    ...
 *  (ebp+14) original arguments to the callback routine
 *  (ebp+10) far return address to original caller
 *  (ebp+6)  Thunklet target address
 *  (ebp+2)  Thunklet relay ID code
 *  (ebp)    BP (saved by CBClientGlueSL)
 *  (ebp-2)  SI (saved by CBClientGlueSL)
 *  (ebp-4)  DI (saved by CBClientGlueSL)
 *  (ebp-6)  DS (saved by CBClientGlueSL)
 *
 *   ...     buffer space used by the 16-bit side glue for temp copies
 *
 *  (ebx+4)  far return address to 16-bit side glue code
 *  (ebx)    saved 16-bit ss:sp (pointing to ebx+4)
 *
 * The 32-bit side glue code accesses both the original arguments (via ebp)
 * and the temporary copies prepared by the 16-bit side glue (via ebx).
 * After completion, the stub will load ss:sp from the buffer at ebx
 * and perform a far return to 16-bit code.
 *
 * To trick the relay stub into returning to us, we replace the 16-bit
 * return address to the glue code by a cs:ip pair pointing to our
 * return entry point (the original return address is saved first).
 * Our return stub thus called will then reload the 32-bit ss:esp and
 * return to 32-bit code (by using and ss:esp value that we have also
 * pushed onto the 16-bit stack before and a cs:eip values found at
 * that position on the 32-bit stack).  The ss:esp to be restored is
 * found relative to the 16-bit stack pointer at:
 *
 *  (ebx-4)   ss  (flat)
 *  (ebx-8)   sp  (32-bit stack pointer)
 *
 * The second variant of this routine, CALL32_CBClientEx, which is used
 * to implement KERNEL.621, has to cope with yet another problem: Here,
 * the 32-bit side directly returns to the caller of the CBClient thunklet,
 * restoring registers saved by CBClientGlueSL and cleaning up the stack.
 * As we have to return to our 32-bit code first, we have to adapt the
 * layout of our temporary area so as to include values for the registers
 * that are to be restored, and later (in the implementation of KERNEL.621)
 * we *really* restore them. The return stub restores DS, DI, SI, and BP
 * from the stack, skips the next 8 bytes (CBClient relay code / target),
 * and then performs a lret NN, where NN is the number of arguments to be
 * removed. Thus, we prepare our temporary area as follows:
 *
 *     (ebx+22) 16-bit cs  (this segment)
 *     (ebx+20) 16-bit ip  ('16-bit' return entry point)
 *     (ebx+16) 32-bit ss  (flat)
 *     (ebx+12) 32-bit sp  (32-bit stack pointer)
 *     (ebx+10) 16-bit bp  (points to ebx+24)
 *     (ebx+8)  16-bit si  (ignored)
 *     (ebx+6)  16-bit di  (ignored)
 *     (ebx+4)  16-bit ds  (we actually use the flat DS here)
 *     (ebx+2)  16-bit ss  (16-bit stack segment)
 *     (ebx+0)  16-bit sp  (points to ebx+4)
 *
 * Note that we ensure that DS is not changed and remains the flat segment,
 * and the 32-bit stack pointer our own return stub needs fits just
 * perfectly into the 8 bytes that are skipped by the Windows stub.
 * One problem is that we have to determine the number of removed arguments,
 * as these have to be really removed in KERNEL.621. Thus, the BP value
 * that we place in the temporary area to be restored, contains the value
 * that SP would have if no arguments were removed. By comparing the actual
 * value of SP with this value in our return stub we can compute the number
 * of removed arguments. This is then returned to KERNEL.621.
 *
 * The stack layout of this function:
 * (ebp+20)  nArgs     pointer to variable receiving nr. of args (Ex only)
 * (ebp+16)  esi       pointer to caller's esi value
 * (ebp+12)  arg       ebp value to be set for relay stub
 * (ebp+8)   func      CBClient relay stub address
 * (ebp+4)   ret addr
 * (ebp)     ebp
 */
static void BuildCallTo32CBClient( int isEx )
{
    function_header( isEx ? "CALL32_CBClientEx" : "CALL32_CBClient" );

    /* Entry code */

    output_cfi( ".cfi_startproc" );
    output( "\tpushl %%ebp\n" );
    output_cfi( ".cfi_adjust_cfa_offset 4" );
    output_cfi( ".cfi_rel_offset %%ebp,0" );
    output( "\tmovl %%esp,%%ebp\n" );
    output_cfi( ".cfi_def_cfa_register %%ebp" );
    output( "\tpushl %%edi\n" );
    output_cfi( ".cfi_rel_offset %%edi,-4" );
    output( "\tpushl %%esi\n" );
    output_cfi( ".cfi_rel_offset %%esi,-8" );
    output( "\tpushl %%ebx\n" );
    output_cfi( ".cfi_rel_offset %%ebx,-12" );

    /* Get pointer to temporary area and save the 32-bit stack pointer */

    output( "\tmovl 16(%%ebp), %%ebx\n" );
    output( "\tleal -8(%%esp), %%eax\n" );

    if ( !isEx )
        output( "\tmovl %%eax, -8(%%ebx)\n" );
    else
        output( "\tmovl %%eax, 12(%%ebx)\n" );

    /* Set up registers and call CBClient relay stub (simulating a far call) */

    output( "\tmovl 20(%%ebp), %%esi\n" );
    output( "\tmovl (%%esi), %%esi\n" );

    output( "\tmovl 8(%%ebp), %%eax\n" );
    output( "\tmovl 12(%%ebp), %%ebp\n" );

    output( "\tpushl %%cs\n" );
    output( "\tcall *%%eax\n" );

    /* Return new esi value to caller */

    output( "\tmovl 32(%%esp), %%edi\n" );
    output( "\tmovl %%esi, (%%edi)\n" );

    /* Return argument size to caller */
    if ( isEx )
    {
        output( "\tmovl 36(%%esp), %%ebx\n" );
        output( "\tmovl %%ebp, (%%ebx)\n" );
    }

    /* Restore registers and return */

    output( "\tpopl %%ebx\n" );
    output_cfi( ".cfi_same_value %%ebx" );
    output( "\tpopl %%esi\n" );
    output_cfi( ".cfi_same_value %%esi" );
    output( "\tpopl %%edi\n" );
    output_cfi( ".cfi_same_value %%edi" );
    output( "\tpopl %%ebp\n" );
    output_cfi( ".cfi_def_cfa %%esp,4" );
    output_cfi( ".cfi_same_value %%ebp" );
    output( "\tret\n" );
    output_cfi( ".cfi_endproc" );
    output_function_size( isEx ? "CALL32_CBClientEx" : "CALL32_CBClient" );

    /* '16-bit' return stub */

    function_header( isEx ? "CALL32_CBClientEx_Ret" : "CALL32_CBClient_Ret" );
    if ( !isEx )
    {
        output( "\tmovzwl %%sp, %%ebx\n" );
        output( "\tlssl %%ss:-16(%%ebx), %%esp\n" );
    }
    else
    {
        output( "\tmovzwl %%bp, %%ebx\n" );
        output( "\tsubw %%bp, %%sp\n" );
        output( "\tmovzwl %%sp, %%ebp\n" );
        output( "\tlssl %%ss:-12(%%ebx), %%esp\n" );
    }
    output( "\tlret\n" );
    output_function_size( isEx ? "CALL32_CBClientEx_Ret" : "CALL32_CBClient_Ret" );
}
Пример #14
0
/*******************************************************************
 *         BuildCallFrom32Regs
 *
 * Build a 32-bit-to-Wine call-back function for a 'register' function.
 * 'args' is the number of dword arguments.
 *
 * Stack layout:
 *   ...
 * (ebp+16)  first arg
 * (ebp+12)  ret addr to user code
 * (ebp+8)   eax saved by relay code
 * (ebp+4)   ret addr to relay code
 * (ebp+0)   saved ebp
 * (ebp-128) buffer area to allow stack frame manipulation
 * (ebp-332) CONTEXT86 struct
 * (ebp-336) padding for stack alignment
 * (ebp-336-n) CONTEXT86 *argument
 *  ....     other arguments copied from (ebp+12)
 *
 * The entry point routine is called with a CONTEXT* extra argument,
 * following the normal args. In this context structure, EIP_reg
 * contains the return address to user code, and ESP_reg the stack
 * pointer on return (with the return address and arguments already
 * removed).
 */
static void BuildCallFrom32Regs(void)
{
    static const int STACK_SPACE = 128 + sizeof(CONTEXT86);

    /* Function header */

    function_header( "__wine_call_from_32_regs" );

    /* Allocate some buffer space on the stack */

    output( "\tpushl %%ebp\n" );
    output( "\tmovl %%esp,%%ebp\n ");
    output( "\tleal -%d(%%esp), %%esp\n", STACK_SPACE + 4 /* for context arg */);

    /* Build the context structure */

    output( "\tpushfl\n" );
    output( "\tpopl %%eax\n" );
    output( "\tmovl %%eax,%d(%%ebp)\n", CONTEXTOFFSET(EFlags) - STACK_SPACE );
    output( "\tmovl 0(%%ebp),%%eax\n" );
    output( "\tmovl %%eax,%d(%%ebp)\n", CONTEXTOFFSET(Ebp) - STACK_SPACE );
    output( "\tmovl 8(%%ebp),%%eax\n" );
    output( "\tmovl %%eax,%d(%%ebp)\n", CONTEXTOFFSET(Eax) - STACK_SPACE );
    output( "\tmovl %%ebx,%d(%%ebp)\n", CONTEXTOFFSET(Ebx) - STACK_SPACE );
    output( "\tmovl %%ecx,%d(%%ebp)\n", CONTEXTOFFSET(Ecx) - STACK_SPACE );
    output( "\tmovl %%edx,%d(%%ebp)\n", CONTEXTOFFSET(Edx) - STACK_SPACE );
    output( "\tmovl %%esi,%d(%%ebp)\n", CONTEXTOFFSET(Esi) - STACK_SPACE );
    output( "\tmovl %%edi,%d(%%ebp)\n", CONTEXTOFFSET(Edi) - STACK_SPACE );

    output( "\txorl %%eax,%%eax\n" );
    output( "\tmovw %%cs,%%ax\n" );
    output( "\tmovl %%eax,%d(%%ebp)\n", CONTEXTOFFSET(SegCs) - STACK_SPACE );
    output( "\tmovw %%es,%%ax\n" );
    output( "\tmovl %%eax,%d(%%ebp)\n", CONTEXTOFFSET(SegEs) - STACK_SPACE );
    output( "\tmovw %%fs,%%ax\n" );
    output( "\tmovl %%eax,%d(%%ebp)\n", CONTEXTOFFSET(SegFs) - STACK_SPACE );
    output( "\tmovw %%gs,%%ax\n" );
    output( "\tmovl %%eax,%d(%%ebp)\n", CONTEXTOFFSET(SegGs) - STACK_SPACE );
    output( "\tmovw %%ss,%%ax\n" );
    output( "\tmovl %%eax,%d(%%ebp)\n", CONTEXTOFFSET(SegSs) - STACK_SPACE );
    output( "\tmovw %%ds,%%ax\n" );
    output( "\tmovl %%eax,%d(%%ebp)\n", CONTEXTOFFSET(SegDs) - STACK_SPACE );
    output( "\tmovw %%ax,%%es\n" );  /* set %es equal to %ds just in case */

    output( "\tmovl $0x%x,%%eax\n", CONTEXT86_FULL );
    output( "\tmovl %%eax,%d(%%ebp)\n", CONTEXTOFFSET(ContextFlags) - STACK_SPACE );

    output( "\tmovl 12(%%ebp),%%eax\n" ); /* Get %eip at time of call */
    output( "\tmovl %%eax,%d(%%ebp)\n", CONTEXTOFFSET(Eip) - STACK_SPACE );

    /* Transfer the arguments */

    output( "\tmovl 4(%%ebp),%%ebx\n" );   /* get relay code addr */
    output( "\tmovzbl 4(%%ebx),%%ecx\n" ); /* fetch number of args to copy */
    output( "\tsubl %%ecx,%%esp\n" );
    output( "\tandl $~15,%%esp\n" );
    output( "\tleal 16(%%ebp),%%esi\n" );  /* get %esp at time of call */
    output( "\tmovl %%esp,%%edi\n" );
    output( "\tshrl $2,%%ecx\n" );
    output( "\tjz 1f\n" );
    output( "\tcld\n" );
    output( "\trep\n\tmovsl\n" );  /* copy args */
    output( "1:\tleal %d(%%ebp),%%eax\n", -STACK_SPACE );  /* get addr of context struct */
    output( "\tmovl %%eax,(%%edi)\n" );    /* and pass it as extra arg */
    output( "\tmovzbl 5(%%ebx),%%eax\n" ); /* fetch number of args to remove */
    output( "\tleal 16(%%ebp,%%eax),%%eax\n" );
    output( "\tmovl %%eax,%d(%%ebp)\n", CONTEXTOFFSET(Esp) - STACK_SPACE );

    /* Call the entry point */

    output( "\taddl (%%ebx),%%ebx\n" );
    output( "\tcall *%%ebx\n" );
    output( "\tleal -%d(%%ebp),%%ecx\n", STACK_SPACE );

    /* Restore the context structure */

    output( "2:\tpushl %d(%%ecx)\n", CONTEXTOFFSET(SegEs) );
    output( "\tpopl %%es\n" );
    output( "\tpushl %d(%%ecx)\n", CONTEXTOFFSET(SegFs) );
    output( "\tpopl %%fs\n" );
    output( "\tpushl %d(%%ecx)\n", CONTEXTOFFSET(SegGs) );
    output( "\tpopl %%gs\n" );

    output( "\tmovl %d(%%ecx),%%edi\n", CONTEXTOFFSET(Edi) );
    output( "\tmovl %d(%%ecx),%%esi\n", CONTEXTOFFSET(Esi) );
    output( "\tmovl %d(%%ecx),%%edx\n", CONTEXTOFFSET(Edx) );
    output( "\tmovl %d(%%ecx),%%ebx\n", CONTEXTOFFSET(Ebx) );
    output( "\tmovl %d(%%ecx),%%eax\n", CONTEXTOFFSET(Eax) );
    output( "\tmovl %d(%%ecx),%%ebp\n", CONTEXTOFFSET(Ebp) );

    output( "\tpushl %d(%%ecx)\n", CONTEXTOFFSET(SegSs) );
    output( "\tpopl %%ss\n" );
    output( "\tmovl %d(%%ecx),%%esp\n", CONTEXTOFFSET(Esp) );

    output( "\tpushl %d(%%ecx)\n", CONTEXTOFFSET(EFlags) );
    output( "\tpushl %d(%%ecx)\n", CONTEXTOFFSET(SegCs) );
    output( "\tpushl %d(%%ecx)\n", CONTEXTOFFSET(Eip) );
    output( "\tpushl %d(%%ecx)\n", CONTEXTOFFSET(SegDs) );
    output( "\tmovl %d(%%ecx),%%ecx\n", CONTEXTOFFSET(Ecx) );

    output( "\tpopl %%ds\n" );
    output( "\tiret\n" );
    output_function_size( "__wine_call_from_32_regs" );

    function_header( "__wine_call_from_32_restore_regs" );
    output( "\tmovl 4(%%esp),%%ecx\n" );
    output( "\tjmp 2b\n" );
    output_function_size( "__wine_call_from_32_restore_regs" );
}
Пример #15
0
/* output the delayed import table of a Win32 module */
static void output_delayed_imports( const DLLSPEC *spec )
{
    int i, j, mod;

    if (!nb_delayed) return;

    output( "\n/* delayed imports */\n\n" );
    output( "\t.data\n" );
    output( "\t.align %d\n", get_alignment(get_ptr_size()) );
    output( "%s\n", asm_globl("__wine_spec_delay_imports") );

    /* list of dlls */

    for (i = j = mod = 0; i < nb_imports; i++)
    {
        if (!dll_imports[i]->delay) continue;
        output( "\t%s 0\n", get_asm_ptr_keyword() );   /* grAttrs */
        output( "\t%s .L__wine_delay_name_%d\n",       /* szName */
                get_asm_ptr_keyword(), i );
        output( "\t%s .L__wine_delay_modules+%d\n",    /* phmod */
                get_asm_ptr_keyword(), mod * get_ptr_size() );
        output( "\t%s .L__wine_delay_IAT+%d\n",        /* pIAT */
                get_asm_ptr_keyword(), j * get_ptr_size() );
        output( "\t%s .L__wine_delay_INT+%d\n",        /* pINT */
                get_asm_ptr_keyword(), j * get_ptr_size() );
        output( "\t%s 0\n", get_asm_ptr_keyword() );   /* pBoundIAT */
        output( "\t%s 0\n", get_asm_ptr_keyword() );   /* pUnloadIAT */
        output( "\t%s 0\n", get_asm_ptr_keyword() );   /* dwTimeStamp */
        j += dll_imports[i]->nb_imports;
        mod++;
    }
    output( "\t%s 0\n", get_asm_ptr_keyword() );   /* grAttrs */
    output( "\t%s 0\n", get_asm_ptr_keyword() );   /* szName */
    output( "\t%s 0\n", get_asm_ptr_keyword() );   /* phmod */
    output( "\t%s 0\n", get_asm_ptr_keyword() );   /* pIAT */
    output( "\t%s 0\n", get_asm_ptr_keyword() );   /* pINT */
    output( "\t%s 0\n", get_asm_ptr_keyword() );   /* pBoundIAT */
    output( "\t%s 0\n", get_asm_ptr_keyword() );   /* pUnloadIAT */
    output( "\t%s 0\n", get_asm_ptr_keyword() );   /* dwTimeStamp */

    output( "\n.L__wine_delay_IAT:\n" );
    for (i = 0; i < nb_imports; i++)
    {
        if (!dll_imports[i]->delay) continue;
        for (j = 0; j < dll_imports[i]->nb_imports; j++)
        {
            ORDDEF *odp = dll_imports[i]->imports[j];
            const char *name = odp->name ? odp->name : odp->export_name;
            output( "\t%s .L__wine_delay_imp_%d_%s\n",
                    get_asm_ptr_keyword(), i, name );
        }
    }

    output( "\n.L__wine_delay_INT:\n" );
    for (i = 0; i < nb_imports; i++)
    {
        if (!dll_imports[i]->delay) continue;
        for (j = 0; j < dll_imports[i]->nb_imports; j++)
        {
            ORDDEF *odp = dll_imports[i]->imports[j];
            if (!odp->name)
                output( "\t%s %d\n", get_asm_ptr_keyword(), odp->ordinal );
            else
                output( "\t%s .L__wine_delay_data_%d_%s\n",
                        get_asm_ptr_keyword(), i, odp->name );
        }
    }

    output( "\n.L__wine_delay_modules:\n" );
    for (i = 0; i < nb_imports; i++)
    {
        if (dll_imports[i]->delay) output( "\t%s 0\n", get_asm_ptr_keyword() );
    }

    for (i = 0; i < nb_imports; i++)
    {
        if (!dll_imports[i]->delay) continue;
        output( ".L__wine_delay_name_%d:\n", i );
        output( "\t%s \"%s\"\n",
                get_asm_string_keyword(), dll_imports[i]->spec->file_name );
    }

    for (i = 0; i < nb_imports; i++)
    {
        if (!dll_imports[i]->delay) continue;
        for (j = 0; j < dll_imports[i]->nb_imports; j++)
        {
            ORDDEF *odp = dll_imports[i]->imports[j];
            if (!odp->name) continue;
            output( ".L__wine_delay_data_%d_%s:\n", i, odp->name );
            output( "\t%s \"%s\"\n", get_asm_string_keyword(), odp->name );
        }
    }
    output_function_size( "__wine_spec_delay_imports" );
}
Пример #16
0
/*******************************************************************
 *         BuildCallFrom16Core
 *
 * This routine builds the core routines used in 16->32 thunks:
 * CallFrom16Word, CallFrom16Long, CallFrom16Register, and CallFrom16Thunk.
 *
 * These routines are intended to be called via a far call (with 32-bit
 * operand size) from 16-bit code.  The 16-bit code stub must push %bp,
 * the 32-bit entry point to be called, and the argument conversion
 * routine to be used (see stack layout below).
 *
 * The core routine completes the STACK16FRAME on the 16-bit stack and
 * switches to the 32-bit stack.  Then, the argument conversion routine
 * is called; it gets passed the 32-bit entry point and a pointer to the
 * 16-bit arguments (on the 16-bit stack) as parameters. (You can either
 * use conversion routines automatically generated by BuildCallFrom16,
 * or write your own for special purposes.)
 *
 * The conversion routine must call the 32-bit entry point, passing it
 * the converted arguments, and return its return value to the core.
 * After the conversion routine has returned, the core switches back
 * to the 16-bit stack, converts the return value to the DX:AX format
 * (CallFrom16Long), and returns to the 16-bit call stub.  All parameters,
 * including %bp, are popped off the stack.
 *
 * The 16-bit call stub now returns to the caller, popping the 16-bit
 * arguments if necessary (pascal calling convention).
 *
 * In the case of a 'register' function, CallFrom16Register fills a
 * CONTEXT86 structure with the values all registers had at the point
 * the first instruction of the 16-bit call stub was about to be
 * executed.  A pointer to this CONTEXT86 is passed as third parameter
 * to the argument conversion routine, which typically passes it on
 * to the called 32-bit entry point.
 *
 * CallFrom16Thunk is a special variant used by the implementation of
 * the Win95 16->32 thunk functions C16ThkSL and C16ThkSL01 and is
 * implemented as follows:
 * On entry, the EBX register is set up to contain a flat pointer to the
 * 16-bit stack such that EBX+22 points to the first argument.
 * Then, the entry point is called, while EBP is set up to point
 * to the return address (on the 32-bit stack).
 * The called function returns with CX set to the number of bytes
 * to be popped of the caller's stack.
 *
 * Stack layout upon entry to the core routine (STACK16FRAME):
 *  ...           ...
 * (sp+24) word   first 16-bit arg
 * (sp+22) word   cs
 * (sp+20) word   ip
 * (sp+18) word   bp
 * (sp+14) long   32-bit entry point (reused for Win16 mutex recursion count)
 * (sp+12) word   ip of actual entry point (necessary for relay debugging)
 * (sp+8)  long   relay (argument conversion) function entry point
 * (sp+4)  long   cs of 16-bit entry point
 * (sp)    long   ip of 16-bit entry point
 *
 * Added on the stack:
 * (sp-2)  word   saved gs
 * (sp-4)  word   saved fs
 * (sp-6)  word   saved es
 * (sp-8)  word   saved ds
 * (sp-12) long   saved ebp
 * (sp-16) long   saved ecx
 * (sp-20) long   saved edx
 * (sp-24) long   saved previous stack
 */
static void BuildCallFrom16Core( int reg_func, int thunk )
{
    /* Function header */
    if (thunk) function_header( "__wine_call_from_16_thunk" );
    else if (reg_func) function_header( "__wine_call_from_16_regs" );
    else function_header( "__wine_call_from_16" );

    /* Create STACK16FRAME (except STACK32FRAME link) */
    output( "\tpushw %%gs\n" );
    output( "\tpushw %%fs\n" );
    output( "\tpushw %%es\n" );
    output( "\tpushw %%ds\n" );
    output( "\tpushl %%ebp\n" );
    output( "\tpushl %%ecx\n" );
    output( "\tpushl %%edx\n" );

    /* Save original EFlags register */
    if (reg_func) output( "\tpushfl\n" );

    if ( UsePIC )
    {
        output( "\tcall 1f\n" );
        output( "1:\tpopl %%ecx\n" );
        output( "\t.byte 0x2e\n\tmovl %s-1b(%%ecx),%%edx\n", asm_name("CallTo16_DataSelector") );
    }
    else
        output( "\t.byte 0x2e\n\tmovl %s,%%edx\n", asm_name("CallTo16_DataSelector") );

    /* Load 32-bit segment registers */
    output( "\tmovw %%dx, %%ds\n" );
    output( "\tmovw %%dx, %%es\n" );

    if ( UsePIC )
        output( "\tmovw %s-1b(%%ecx), %%fs\n", asm_name("CallTo16_TebSelector") );
    else
        output( "\tmovw %s, %%fs\n", asm_name("CallTo16_TebSelector") );

    output( "\t.byte 0x64\n\tmov (%d),%%gs\n", GS_OFFSET );

    /* Translate STACK16FRAME base to flat offset in %edx */
    output( "\tmovw %%ss, %%dx\n" );
    output( "\tandl $0xfff8, %%edx\n" );
    output( "\tshrl $1, %%edx\n" );
    if (UsePIC)
    {
        output( "\taddl wine_ldt_copy_ptr-1b(%%ecx),%%edx\n" );
        output( "\tmovl (%%edx), %%edx\n" );
    }
    else
        output( "\tmovl %s(%%edx), %%edx\n", asm_name("wine_ldt_copy") );
    output( "\tmovzwl %%sp, %%ebp\n" );
    output( "\tleal %d(%%ebp,%%edx), %%edx\n", reg_func ? 0 : -4 );

    /* Get saved flags into %ecx */
    if (reg_func) output( "\tpopl %%ecx\n" );

    /* Get the 32-bit stack pointer from the TEB and complete STACK16FRAME */
    output( "\t.byte 0x64\n\tmovl (%d), %%ebp\n", STACKOFFSET );
    output( "\tpushl %%ebp\n" );

    /* Switch stacks */
    output( "\t.byte 0x64\n\tmovw %%ss, (%d)\n", STACKOFFSET + 2 );
    output( "\t.byte 0x64\n\tmovw %%sp, (%d)\n", STACKOFFSET );
    output( "\tpushl %%ds\n" );
    output( "\tpopl %%ss\n" );
    output( "\tmovl %%ebp, %%esp\n" );
    output( "\taddl $0x20,%%ebp\n");  /* FIELD_OFFSET(STACK32FRAME,ebp) */


    /* At this point:
       STACK16FRAME is completely set up
       DS, ES, SS: flat data segment
       FS: current TEB
       ESP: points to last STACK32FRAME
       EBP: points to ebp member of last STACK32FRAME
       EDX: points to current STACK16FRAME
       ECX: contains saved flags
       all other registers: unchanged */

    /* Special case: C16ThkSL stub */
    if ( thunk )
    {
        /* Set up registers as expected and call thunk */
        output( "\tleal 0x1a(%%edx),%%ebx\n" );  /* sizeof(STACK16FRAME)-22 */
        output( "\tleal -4(%%esp), %%ebp\n" );

        output( "\tcall *0x26(%%edx)\n");  /* FIELD_OFFSET(STACK16FRAME,entry_point) */

        /* Switch stack back */
        output( "\t.byte 0x64\n\tmovw (%d), %%ss\n", STACKOFFSET+2 );
        output( "\t.byte 0x64\n\tmovzwl (%d), %%esp\n", STACKOFFSET );
        output( "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );

        /* Restore registers and return directly to caller */
        output( "\taddl $8, %%esp\n" );
        output( "\tpopl %%ebp\n" );
        output( "\tpopw %%ds\n" );
        output( "\tpopw %%es\n" );
        output( "\tpopw %%fs\n" );
        output( "\tpopw %%gs\n" );
        output( "\taddl $20, %%esp\n" );

        output( "\txorb %%ch, %%ch\n" );
        output( "\tpopl %%ebx\n" );
        output( "\taddw %%cx, %%sp\n" );
        output( "\tpush %%ebx\n" );

        output( "\t.byte 0x66\n" );
        output( "\tlret\n" );

        output_function_size( "__wine_call_from_16_thunk" );
        return;
    }


    /* Build register CONTEXT */
    if ( reg_func )
    {
        output( "\tsubl $0x2cc,%%esp\n" );       /* sizeof(CONTEXT86) */

        output( "\tmovl %%ecx,0xc0(%%esp)\n" );  /* EFlags */

        output( "\tmovl %%eax,0xb0(%%esp)\n" );  /* Eax */
        output( "\tmovl %%ebx,0xa4(%%esp)\n" );  /* Ebx */
        output( "\tmovl %%esi,0xa0(%%esp)\n" );  /* Esi */
        output( "\tmovl %%edi,0x9c(%%esp)\n" );  /* Edi */

        output( "\tmovl 0x0c(%%edx),%%eax\n");   /* FIELD_OFFSET(STACK16FRAME,ebp) */
        output( "\tmovl %%eax,0xb4(%%esp)\n" );  /* Ebp */
        output( "\tmovl 0x08(%%edx),%%eax\n");   /* FIELD_OFFSET(STACK16FRAME,ecx) */
        output( "\tmovl %%eax,0xac(%%esp)\n" );  /* Ecx */
        output( "\tmovl 0x04(%%edx),%%eax\n");   /* FIELD_OFFSET(STACK16FRAME,edx) */
        output( "\tmovl %%eax,0xa8(%%esp)\n" );  /* Edx */

        output( "\tmovzwl 0x10(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,ds) */
        output( "\tmovl %%eax,0x98(%%esp)\n" );  /* SegDs */
        output( "\tmovzwl 0x12(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,es) */
        output( "\tmovl %%eax,0x94(%%esp)\n" );  /* SegEs */
        output( "\tmovzwl 0x14(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,fs) */
        output( "\tmovl %%eax,0x90(%%esp)\n" );  /* SegFs */
        output( "\tmovzwl 0x16(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,gs) */
        output( "\tmovl %%eax,0x8c(%%esp)\n" );  /* SegGs */

        output( "\tmovzwl 0x2e(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,cs) */
        output( "\tmovl %%eax,0xbc(%%esp)\n" );  /* SegCs */
        output( "\tmovzwl 0x2c(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,ip) */
        output( "\tmovl %%eax,0xb8(%%esp)\n" );  /* Eip */

        output( "\t.byte 0x64\n\tmovzwl (%d), %%eax\n", STACKOFFSET+2 );
        output( "\tmovl %%eax,0xc8(%%esp)\n" );  /* SegSs */
        output( "\t.byte 0x64\n\tmovzwl (%d), %%eax\n", STACKOFFSET );
        output( "\taddl $0x2c,%%eax\n");         /* FIELD_OFFSET(STACK16FRAME,ip) */
        output( "\tmovl %%eax,0xc4(%%esp)\n" );  /* Esp */
#if 0
        output( "\tfsave 0x1c(%%esp)\n" ); /* FloatSave */
#endif

        /* Push address of CONTEXT86 structure -- popped by the relay routine */
        output( "\tmovl %%esp,%%eax\n" );
        output( "\tandl $~15,%%esp\n" );
        output( "\tsubl $4,%%esp\n" );
        output( "\tpushl %%eax\n" );
    }
    else
    {
        output( "\tsubl $8,%%esp\n" );
        output( "\tandl $~15,%%esp\n" );
        output( "\taddl $8,%%esp\n" );
    }

    /* Call relay routine (which will call the API entry point) */
    output( "\tleal 0x30(%%edx),%%eax\n" ); /* sizeof(STACK16FRAME) */
    output( "\tpushl %%eax\n" );
    output( "\tpushl 0x26(%%edx)\n");  /* FIELD_OFFSET(STACK16FRAME,entry_point) */
    output( "\tcall *0x20(%%edx)\n");  /* FIELD_OFFSET(STACK16FRAME,relay) */

    if ( reg_func )
    {
        output( "\tleal -748(%%ebp),%%ebx\n" ); /* sizeof(CONTEXT) + FIELD_OFFSET(STACK32FRAME,ebp) */

        /* Switch stack back */
        output( "\t.byte 0x64\n\tmovw (%d), %%ss\n", STACKOFFSET+2 );
        output( "\t.byte 0x64\n\tmovzwl (%d), %%esp\n", STACKOFFSET );
        output( "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );

        /* Get return address to CallFrom16 stub */
        output( "\taddw $0x14,%%sp\n" ); /* FIELD_OFFSET(STACK16FRAME,callfrom_ip)-4 */
        output( "\tpopl %%eax\n" );
        output( "\tpopl %%edx\n" );

        /* Restore all registers from CONTEXT */
        output( "\tmovw 0xc8(%%ebx),%%ss\n");   /* SegSs */
        output( "\tmovl 0xc4(%%ebx),%%esp\n");  /* Esp */
        output( "\taddl $4, %%esp\n" );  /* room for final return address */

        output( "\tpushw 0xbc(%%ebx)\n");  /* SegCs */
        output( "\tpushw 0xb8(%%ebx)\n");  /* Eip */
        output( "\tpushl %%edx\n" );
        output( "\tpushl %%eax\n" );
        output( "\tpushl 0xc0(%%ebx)\n");  /* EFlags */
        output( "\tpushl 0x98(%%ebx)\n");  /* SegDs */

        output( "\tpushl 0x94(%%ebx)\n");  /* SegEs */
        output( "\tpopl %%es\n" );
        output( "\tpushl 0x90(%%ebx)\n");  /* SegFs */
        output( "\tpopl %%fs\n" );
        output( "\tpushl 0x8c(%%ebx)\n");  /* SegGs */
        output( "\tpopl %%gs\n" );

        output( "\tmovl 0xb4(%%ebx),%%ebp\n");  /* Ebp */
        output( "\tmovl 0xa0(%%ebx),%%esi\n");  /* Esi */
        output( "\tmovl 0x9c(%%ebx),%%edi\n");  /* Edi */
        output( "\tmovl 0xb0(%%ebx),%%eax\n");  /* Eax */
        output( "\tmovl 0xa8(%%ebx),%%edx\n");  /* Edx */
        output( "\tmovl 0xac(%%ebx),%%ecx\n");  /* Ecx */
        output( "\tmovl 0xa4(%%ebx),%%ebx\n");  /* Ebx */

        output( "\tpopl %%ds\n" );
        output( "\tpopfl\n" );
        output( "\tlret\n" );

        output_function_size( "__wine_call_from_16_regs" );
    }
    else
    {
        /* Switch stack back */
        output( "\t.byte 0x64\n\tmovw (%d), %%ss\n", STACKOFFSET+2 );
        output( "\t.byte 0x64\n\tmovzwl (%d), %%esp\n", STACKOFFSET );
        output( "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );

        /* Restore registers */
        output( "\tpopl %%edx\n" );
        output( "\tpopl %%ecx\n" );
        output( "\tpopl %%ebp\n" );
        output( "\tpopw %%ds\n" );
        output( "\tpopw %%es\n" );
        output( "\tpopw %%fs\n" );
        output( "\tpopw %%gs\n" );

        /* Return to return stub which will return to caller */
        output( "\tlret $12\n" );

        output_function_size( "__wine_call_from_16" );
    }
}
Пример #17
0
/*******************************************************************
 *         output_call16_function
 *
 * Build a 16-bit-to-Wine callback glue function.
 *
 * The generated routines are intended to be used as argument conversion
 * routines to be called by the CallFrom16... core. Thus, the prototypes of
 * the generated routines are (see also CallFrom16):
 *
 *  extern WORD WINAPI __wine_spec_call16_C_xxx( FARPROC func, LPBYTE args );
 *  extern LONG WINAPI __wine_spec_call16_C_xxx( FARPROC func, LPBYTE args );
 *  extern void WINAPI __wine_spec_call16_C_xxx_regs( FARPROC func, LPBYTE args, CONTEXT86 *context );
 *
 * where 'C' is the calling convention ('p' for pascal or 'c' for cdecl),
 * and each 'x' is an argument  ('w'=word, 's'=signed word, 'l'=long,
 * 'p'=linear pointer, 't'=linear pointer to null-terminated string,
 * 'T'=segmented pointer to null-terminated string).
 *
 * The generated routines fetch the arguments from the 16-bit stack (pointed
 * to by 'args'); the offsets of the single argument values are computed
 * according to the calling convention and the argument types.  Then, the
 * 32-bit entry point is called with these arguments.
 *
 * For register functions, the arguments (if present) are converted just
 * the same as for normal functions, but in addition the CONTEXT86 pointer
 * filled with the current register values is passed to the 32-bit routine.
 */
static void output_call16_function( ORDDEF *odp )
{
    char *name;
    int i, pos, stack_words;
    int argsize = get_function_argsize( odp );
    int needs_ldt = (strpbrk( get_args_str( odp ), "pt" ) != NULL);

    name = strmake( ".L__wine_spec_call16_%s", get_relay_name(odp) );

    output( "\t.align %d\n", get_alignment(4) );
    output( "\t%s\n", func_declaration(name) );
    output( "%s:\n", name );
    output_cfi( ".cfi_startproc" );
    output( "\tpushl %%ebp\n" );
    output_cfi( ".cfi_adjust_cfa_offset 4" );
    output_cfi( ".cfi_rel_offset %%ebp,0" );
    output( "\tmovl %%esp,%%ebp\n" );
    output_cfi( ".cfi_def_cfa_register %%ebp" );
    stack_words = 2;
    if (needs_ldt)
    {
        output( "\tpushl %%esi\n" );
        output_cfi( ".cfi_rel_offset %%esi,-4" );
        stack_words++;
        if (UsePIC)
        {
            output( "\tcall %s\n", asm_name("__wine_spec_get_pc_thunk_eax") );
            output( "1:\tmovl wine_ldt_copy_ptr-1b(%%eax),%%esi\n" );
        }
        else
            output( "\tmovl $%s,%%esi\n", asm_name("_imp__wine_ldt_copy") );
    }

    /* preserve 16-byte stack alignment */
    stack_words += odp->u.func.nb_args;
    for (i = 0; i < odp->u.func.nb_args; i++)
        if (odp->u.func.args[i] == ARG_DOUBLE || odp->u.func.args[i] == ARG_INT64) stack_words++;
    if ((odp->flags & FLAG_REGISTER) || (odp->type == TYPE_VARARGS)) stack_words++;
    if (stack_words % 4) output( "\tsubl $%d,%%esp\n", 16 - 4 * (stack_words % 4) );

    if (odp->u.func.nb_args || odp->type == TYPE_VARARGS)
        output( "\tmovl 12(%%ebp),%%ecx\n" );  /* args */

    if (odp->flags & FLAG_REGISTER)
    {
        output( "\tpushl 16(%%ebp)\n" );  /* context */
    }
    else if (odp->type == TYPE_VARARGS)
    {
        output( "\tleal %d(%%ecx),%%eax\n", argsize );
        output( "\tpushl %%eax\n" );  /* va_list16 */
    }

    pos = (odp->type == TYPE_PASCAL) ? 0 : argsize;
    for (i = odp->u.func.nb_args - 1; i >= 0; i--)
    {
        switch (odp->u.func.args[i])
        {
        case ARG_WORD:
            if (odp->type != TYPE_PASCAL) pos -= 2;
            output( "\tmovzwl %d(%%ecx),%%eax\n", pos );
            output( "\tpushl %%eax\n" );
            if (odp->type == TYPE_PASCAL) pos += 2;
            break;

        case ARG_SWORD:
            if (odp->type != TYPE_PASCAL) pos -= 2;
            output( "\tmovswl %d(%%ecx),%%eax\n", pos );
            output( "\tpushl %%eax\n" );
            if (odp->type == TYPE_PASCAL) pos += 2;
            break;

        case ARG_INT64:
        case ARG_DOUBLE:
            if (odp->type != TYPE_PASCAL) pos -= 4;
            output( "\tpushl %d(%%ecx)\n", pos );
            if (odp->type == TYPE_PASCAL) pos += 4;
            /* fall through */
        case ARG_LONG:
        case ARG_FLOAT:
        case ARG_SEGPTR:
        case ARG_SEGSTR:
            if (odp->type != TYPE_PASCAL) pos -= 4;
            output( "\tpushl %d(%%ecx)\n", pos );
            if (odp->type == TYPE_PASCAL) pos += 4;
            break;

        case ARG_PTR:
        case ARG_STR:
        case ARG_WSTR:
        case ARG_INT128:
            if (odp->type != TYPE_PASCAL) pos -= 4;
            output( "\tmovzwl %d(%%ecx),%%edx\n", pos + 2 ); /* sel */
            output( "\tshr $3,%%edx\n" );
            output( "\tmovzwl %d(%%ecx),%%eax\n", pos ); /* offset */
            output( "\taddl (%%esi,%%edx,4),%%eax\n" );
            output( "\tpushl %%eax\n" );
            if (odp->type == TYPE_PASCAL) pos += 4;
            break;
        }
    }

    output( "\tcall *8(%%ebp)\n" );

    if (needs_ldt)
    {
        output( "\tmovl -4(%%ebp),%%esi\n" );
        output_cfi( ".cfi_same_value %%esi" );
    }
    output( "\tleave\n" );
    output_cfi( ".cfi_def_cfa %%esp,4" );
    output_cfi( ".cfi_same_value %%ebp" );
    output( "\tret\n" );
    output_cfi( ".cfi_endproc" );
    output_function_size( name );
    free( name );
}
Пример #18
0
/* output the delayed import thunks of a Win32 module */
static void output_delayed_import_thunks( const DLLSPEC *spec )
{
    int i, idx, j, pos, extra_stack_storage = 0;
    static const char delayed_import_loaders[] = "__wine_spec_delayed_import_loaders";
    static const char delayed_import_thunks[] = "__wine_spec_delayed_import_thunks";

    if (!nb_delayed) return;

    output( "\n/* delayed import thunks */\n\n" );
    output( "\t.text\n" );
    output( "\t.align %d\n", get_alignment(8) );
    output( "%s:\n", asm_name(delayed_import_loaders));
    output( "\t%s\n", func_declaration("__wine_delay_load_asm") );
    output( "%s:\n", asm_name("__wine_delay_load_asm") );
    output_cfi( ".cfi_startproc" );
    switch(target_cpu)
    {
    case CPU_x86:
        output( "\tpushl %%ecx\n" );
        output_cfi( ".cfi_adjust_cfa_offset 4" );
        output( "\tpushl %%edx\n" );
        output_cfi( ".cfi_adjust_cfa_offset 4" );
        output( "\tpushl %%eax\n" );
        output_cfi( ".cfi_adjust_cfa_offset 4" );
        output( "\tcall %s\n", asm_name("__wine_spec_delay_load") );
        output_cfi( ".cfi_adjust_cfa_offset -4" );
        output( "\tpopl %%edx\n" );
        output_cfi( ".cfi_adjust_cfa_offset -4" );
        output( "\tpopl %%ecx\n" );
        output_cfi( ".cfi_adjust_cfa_offset -4" );
        output( "\tjmp *%%eax\n" );
        break;
    case CPU_x86_64:
        output( "\tsubq $88,%%rsp\n" );
        output_cfi( ".cfi_adjust_cfa_offset 88" );
        output( "\tmovq %%rdx,80(%%rsp)\n" );
        output( "\tmovq %%rcx,72(%%rsp)\n" );
        output( "\tmovq %%r8,64(%%rsp)\n" );
        output( "\tmovq %%r9,56(%%rsp)\n" );
        output( "\tmovq %%r10,48(%%rsp)\n" );
        output( "\tmovq %%r11,40(%%rsp)\n" );
        output( "\tmovq %%rax,%%rcx\n" );
        output( "\tcall %s\n", asm_name("__wine_spec_delay_load") );
        output( "\tmovq 40(%%rsp),%%r11\n" );
        output( "\tmovq 48(%%rsp),%%r10\n" );
        output( "\tmovq 56(%%rsp),%%r9\n" );
        output( "\tmovq 64(%%rsp),%%r8\n" );
        output( "\tmovq 72(%%rsp),%%rcx\n" );
        output( "\tmovq 80(%%rsp),%%rdx\n" );
        output( "\taddq $88,%%rsp\n" );
        output_cfi( ".cfi_adjust_cfa_offset -88" );
        output( "\tjmp *%%rax\n" );
        break;
    case CPU_SPARC:
        output( "\tsave %%sp, -96, %%sp\n" );
        output( "\tcall %s\n", asm_name("__wine_spec_delay_load") );
        output( "\tmov %%g1, %%o0\n" );
        output( "\tjmp %%o0\n" );
        output( "\trestore\n" );
        break;
    case CPU_ARM:
        output( "\tstmfd  SP!, {r4-r10,FP,LR}\n" );
        output( "\tmov LR,PC\n");
        output( "\tadd LR,LR,#8\n");
        output( "\tldr PC,[PC,#-4]\n");
        output( "\t.long %s\n", asm_name("__wine_spec_delay_load") );
        output( "\tmov IP,r0\n");
        output( "\tldmfd  SP!, {r4-r10,FP,LR}\n" );
        output( "\tldmfd  SP!, {r0-r3}\n" );
        output( "\tmov PC,IP\n");
        break;
    case CPU_ARM64:
        output( "\tstp x29, x30, [sp,#-16]!\n" );
        output( "\tmov x29, sp\n" );
        output( "\tadr x9, 1f\n" );
        output( "\tldur x9, [x9, #0]\n" );
        output( "\tblr x9\n" );
        output( "\tmov x9, x0\n" );
        output( "\tldp x29, x30, [sp],#16\n" );
        output( "\tldp x0, x1, [sp,#16]\n" );
        output( "\tldp x2, x3, [sp,#32]\n" );
        output( "\tldp x4, x5, [sp,#48]\n" );
        output( "\tldp x6, x7, [sp],#80\n" );
        output( "\tbr x9\n" ); /* or "ret x9" */
        output( "1:\t.quad %s\n", asm_name("__wine_spec_delay_load") );
        break;
    case CPU_POWERPC:
        if (target_platform == PLATFORM_APPLE) extra_stack_storage = 56;

        /* Save all callee saved registers into a stackframe. */
        output( "\tstwu %s, -%d(%s)\n",ppc_reg(1), 48+extra_stack_storage, ppc_reg(1));
        output( "\tstw  %s, %d(%s)\n", ppc_reg(3),  4+extra_stack_storage, ppc_reg(1));
        output( "\tstw  %s, %d(%s)\n", ppc_reg(4),  8+extra_stack_storage, ppc_reg(1));
        output( "\tstw  %s, %d(%s)\n", ppc_reg(5), 12+extra_stack_storage, ppc_reg(1));
        output( "\tstw  %s, %d(%s)\n", ppc_reg(6), 16+extra_stack_storage, ppc_reg(1));
        output( "\tstw  %s, %d(%s)\n", ppc_reg(7), 20+extra_stack_storage, ppc_reg(1));
        output( "\tstw  %s, %d(%s)\n", ppc_reg(8), 24+extra_stack_storage, ppc_reg(1));
        output( "\tstw  %s, %d(%s)\n", ppc_reg(9), 28+extra_stack_storage, ppc_reg(1));
        output( "\tstw  %s, %d(%s)\n", ppc_reg(10),32+extra_stack_storage, ppc_reg(1));
        output( "\tstw  %s, %d(%s)\n", ppc_reg(11),36+extra_stack_storage, ppc_reg(1));
        output( "\tstw  %s, %d(%s)\n", ppc_reg(12),40+extra_stack_storage, ppc_reg(1));

        /* r0 -> r3 (arg1) */
        output( "\tmr %s, %s\n", ppc_reg(3), ppc_reg(0));

        /* save return address */
        output( "\tmflr %s\n", ppc_reg(0));
        output( "\tstw  %s, %d(%s)\n", ppc_reg(0), 44+extra_stack_storage, ppc_reg(1));

        /* Call the __wine_delay_load function, arg1 is arg1. */
        output( "\tbl %s\n", asm_name("__wine_spec_delay_load") );

        /* Load return value from call into ctr register */
        output( "\tmtctr %s\n", ppc_reg(3));

        /* restore all saved registers and drop stackframe. */
        output( "\tlwz  %s, %d(%s)\n", ppc_reg(3),  4+extra_stack_storage, ppc_reg(1));
        output( "\tlwz  %s, %d(%s)\n", ppc_reg(4),  8+extra_stack_storage, ppc_reg(1));
        output( "\tlwz  %s, %d(%s)\n", ppc_reg(5), 12+extra_stack_storage, ppc_reg(1));
        output( "\tlwz  %s, %d(%s)\n", ppc_reg(6), 16+extra_stack_storage, ppc_reg(1));
        output( "\tlwz  %s, %d(%s)\n", ppc_reg(7), 20+extra_stack_storage, ppc_reg(1));
        output( "\tlwz  %s, %d(%s)\n", ppc_reg(8), 24+extra_stack_storage, ppc_reg(1));
        output( "\tlwz  %s, %d(%s)\n", ppc_reg(9), 28+extra_stack_storage, ppc_reg(1));
        output( "\tlwz  %s, %d(%s)\n", ppc_reg(10),32+extra_stack_storage, ppc_reg(1));
        output( "\tlwz  %s, %d(%s)\n", ppc_reg(11),36+extra_stack_storage, ppc_reg(1));
        output( "\tlwz  %s, %d(%s)\n", ppc_reg(12),40+extra_stack_storage, ppc_reg(1));

        /* Load return value from call into return register */
        output( "\tlwz  %s,  %d(%s)\n", ppc_reg(0), 44+extra_stack_storage, ppc_reg(1));
        output( "\tmtlr %s\n", ppc_reg(0));
        output( "\taddi %s, %s, %d\n", ppc_reg(1), ppc_reg(1),  48+extra_stack_storage);

        /* branch to ctr register. */
        output( "\tbctr\n");
        break;
    }
    output_cfi( ".cfi_endproc" );
    output_function_size( "__wine_delay_load_asm" );
    output( "\n" );

    for (i = idx = 0; i < nb_imports; i++)
    {
        if (!dll_imports[i]->delay) continue;
        for (j = 0; j < dll_imports[i]->nb_imports; j++)
        {
            ORDDEF *odp = dll_imports[i]->imports[j];
            const char *name = odp->name ? odp->name : odp->export_name;

            output( ".L__wine_delay_imp_%d_%s:\n", i, name );
            output_cfi( ".cfi_startproc" );
            switch(target_cpu)
            {
            case CPU_x86:
                output( "\tmovl $%d, %%eax\n", (idx << 16) | j );
                output( "\tjmp %s\n", asm_name("__wine_delay_load_asm") );
                break;
            case CPU_x86_64:
                output( "\tmovq $%d,%%rax\n", (idx << 16) | j );
                output( "\tjmp %s\n", asm_name("__wine_delay_load_asm") );
                break;
            case CPU_SPARC:
                output( "\tset %d, %%g1\n", (idx << 16) | j );
                output( "\tb,a %s\n", asm_name("__wine_delay_load_asm") );
                output( "\tnop\n" );
                break;
            case CPU_ARM:
                output( "\tstmfd  SP!, {r0-r3}\n" );
                output( "\tmov r0, #%d\n", idx );
                output( "\tmov r1, #16384\n" );
                output( "\tmul r1, r0, r1\n" );
                output( "\tmov r0, r1\n" );
                output( "\tmov r1, #4\n" );
                output( "\tmul r1, r0, r1\n" );
                output( "\tmov r0, r1\n" );
                output( "\tadd r0, #%d\n", j );
                output( "\tldr PC,[PC,#-4]\n");
                output( "\t.long %s\n", asm_name("__wine_delay_load_asm") );
                break;
            case CPU_ARM64:
                output( "\tstp x6, x7, [sp,#-80]!\n" );
                output( "\tstp x4, x5, [sp,#48]\n" );
                output( "\tstp x2, x3, [sp,#32]\n" );
                output( "\tstp x0, x1, [sp,#16]\n" );
                output( "\tmov x0, #%d\n", idx );
                output( "\tmov x1, #16384\n" );
                output( "\tmul x1, x0, x1\n" );
                output( "\tmov x0, x1\n" );
                output( "\tmov x1, #4\n" );
                output( "\tmul x1, x0, x1\n" );
                output( "\tmov x0, x1\n" );
                output( "\tadd x0, x0, #%d\n", j );
                output( "\tadr x9, 1f\n" );
                output( "\tldur x9, [x9, #0]\n" );
                output( "\tbr x9\n" );
                output( "1:\t.quad %s\n", asm_name("__wine_delay_load_asm") );
                break;
            case CPU_POWERPC:
                switch(target_platform)
                {
                case PLATFORM_APPLE:
                    /* On Darwin we can use r0 and r2 */
                    /* Upper part in r2 */
                    output( "\tlis %s, %d\n", ppc_reg(2), idx);
                    /* Lower part + r2 -> r0, Note we can't use r0 directly */
                    output( "\taddi %s, %s, %d\n", ppc_reg(0), ppc_reg(2), j);
                    output( "\tb %s\n", asm_name("__wine_delay_load_asm") );
                    break;
                default:
                    /* On linux we can't use r2 since r2 is not a scratch register (hold the TOC) */
                    /* Save r13 on the stack */
                    output( "\taddi %s, %s, -0x4\n", ppc_reg(1), ppc_reg(1));
                    output( "\tstw  %s, 0(%s)\n",    ppc_reg(13), ppc_reg(1));
                    /* Upper part in r13 */
                    output( "\tlis %s, %d\n", ppc_reg(13), idx);
                    /* Lower part + r13 -> r0, Note we can't use r0 directly */
                    output( "\taddi %s, %s, %d\n", ppc_reg(0), ppc_reg(13), j);
                    /* Restore r13 */
                    output( "\tstw  %s, 0(%s)\n",    ppc_reg(13), ppc_reg(1));
                    output( "\taddic %s, %s, 0x4\n", ppc_reg(1), ppc_reg(1));
                    output( "\tb %s\n", asm_name("__wine_delay_load_asm") );
                    break;
                }
                break;
            }
            output_cfi( ".cfi_endproc" );
        }
        idx++;
    }
    output_function_size( delayed_import_loaders );

    output( "\n\t.align %d\n", get_alignment(get_ptr_size()) );
    output( "%s:\n", asm_name(delayed_import_thunks));
    for (i = pos = 0; i < nb_imports; i++)
    {
        if (!dll_imports[i]->delay) continue;
        for (j = 0; j < dll_imports[i]->nb_imports; j++, pos += get_ptr_size())
        {
            ORDDEF *odp = dll_imports[i]->imports[j];
            output_import_thunk( odp->name ? odp->name : odp->export_name,
                                 ".L__wine_delay_IAT", pos );
        }
    }
    output_function_size( delayed_import_thunks );
}
Пример #19
0
/*******************************************************************
 *         BuildCallTo16Core
 *
 * This routine builds the core routines used in 32->16 thunks:
 *
 * extern DWORD WINAPI wine_call_to_16( FARPROC16 target, DWORD cbArgs, PEXCEPTION_HANDLER handler );
 * extern void WINAPI wine_call_to_16_regs( CONTEXT86 *context, DWORD cbArgs, PEXCEPTION_HANDLER handler );
 *
 * These routines can be called directly from 32-bit code.
 *
 * All routines expect that the 16-bit stack contents (arguments) and the
 * return address (segptr to CallTo16_Ret) were already set up by the
 * caller; nb_args must contain the number of bytes to be conserved.  The
 * 16-bit SS:SP will be set accordingly.
 *
 * All other registers are either taken from the CONTEXT86 structure
 * or else set to default values.  The target routine address is either
 * given directly or taken from the CONTEXT86.
 */
static void BuildCallTo16Core( int reg_func )
{
    const char *name = reg_func ? "wine_call_to_16_regs" : "wine_call_to_16";

    /* Function header */
    function_header( name );

    /* Function entry sequence */
    output( "\tpushl %%ebp\n" );
    output( "\tmovl %%esp, %%ebp\n" );

    /* Save the 32-bit registers */
    output( "\tpushl %%ebx\n" );
    output( "\tpushl %%esi\n" );
    output( "\tpushl %%edi\n" );
    output( "\t.byte 0x64\n\tmov %%gs,(%d)\n", GS_OFFSET );

    /* Setup exception frame */
    output( "\t.byte 0x64\n\tpushl (%d)\n", STACKOFFSET );
    output( "\tpushl 16(%%ebp)\n" ); /* handler */
    output( "\t.byte 0x64\n\tpushl (0)\n" );
    output( "\t.byte 0x64\n\tmovl %%esp,(0)\n" );

    /* Call the actual CallTo16 routine (simulate a lcall) */
    output( "\tpushl %%cs\n" );
    output( "\tcall .L%s\n", name );

    /* Remove exception frame */
    output( "\t.byte 0x64\n\tpopl (0)\n" );
    output( "\taddl $4, %%esp\n" );
    output( "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );

    if ( !reg_func )
    {
        /* Convert return value */
        output( "\tandl $0xffff,%%eax\n" );
        output( "\tshll $16,%%edx\n" );
        output( "\torl %%edx,%%eax\n" );
    }
    else
    {
        /*
         * Modify CONTEXT86 structure to contain new values
         *
         * NOTE:  We restore only EAX, EBX, EDX, EDX, EBP, and ESP.
         *        The segment registers as well as ESI and EDI should
         *        not be modified by a well-behaved 16-bit routine in
         *        any case.  [If necessary, we could restore them as well,
         *        at the cost of a somewhat less efficient return path.]
         */

        output( "\tmovl %d(%%esp), %%edi\n", STACK32OFFSET(target) - STACK32OFFSET(edi));
                /* everything above edi has been popped already */

        output( "\tmovl %%eax, %d(%%edi)\n", CONTEXTOFFSET(Eax) );
        output( "\tmovl %%ebx, %d(%%edi)\n", CONTEXTOFFSET(Ebx) );
        output( "\tmovl %%ecx, %d(%%edi)\n", CONTEXTOFFSET(Ecx) );
        output( "\tmovl %%edx, %d(%%edi)\n", CONTEXTOFFSET(Edx) );
        output( "\tmovl %%ebp, %d(%%edi)\n", CONTEXTOFFSET(Ebp) );
        output( "\tmovl %%esi, %d(%%edi)\n", CONTEXTOFFSET(Esp) );
                 /* The return glue code saved %esp into %esi */
    }

    /* Restore the 32-bit registers */
    output( "\tpopl %%edi\n" );
    output( "\tpopl %%esi\n" );
    output( "\tpopl %%ebx\n" );

    /* Function exit sequence */
    output( "\tpopl %%ebp\n" );
    output( "\tret $12\n" );


    /* Start of the actual CallTo16 routine */

    output( ".L%s:\n", name );

    /* Switch to the 16-bit stack */
    output( "\tmovl %%esp,%%edx\n" );
    output( "\t.byte 0x64\n\tmovw (%d),%%ss\n", STACKOFFSET + 2);
    output( "\t.byte 0x64\n\tmovw (%d),%%sp\n", STACKOFFSET );
    output( "\t.byte 0x64\n\tmovl %%edx,(%d)\n", STACKOFFSET );

    /* Make %bp point to the previous stackframe (built by CallFrom16) */
    output( "\tmovzwl %%sp,%%ebp\n" );
    output( "\tleal %d(%%ebp),%%ebp\n", STACK16OFFSET(bp) );

    /* Add the specified offset to the new sp */
    output( "\tsubw %d(%%edx), %%sp\n", STACK32OFFSET(nb_args) );

    if (reg_func)
    {
        /* Push the called routine address */
        output( "\tmovl %d(%%edx),%%edx\n", STACK32OFFSET(target) );
        output( "\tpushw %d(%%edx)\n", CONTEXTOFFSET(SegCs) );
        output( "\tpushw %d(%%edx)\n", CONTEXTOFFSET(Eip) );

        /* Get the registers */
        output( "\tpushw %d(%%edx)\n", CONTEXTOFFSET(SegDs) );
        output( "\tpushl %d(%%edx)\n", CONTEXTOFFSET(SegEs) );
        output( "\tpopl %%es\n" );
        output( "\tpushl %d(%%edx)\n", CONTEXTOFFSET(SegFs) );
        output( "\tpopl %%fs\n" );
        output( "\tpushl %d(%%edx)\n", CONTEXTOFFSET(SegGs) );
        output( "\tpopl %%gs\n" );
        output( "\tmovl %d(%%edx),%%ebp\n", CONTEXTOFFSET(Ebp) );
        output( "\tmovl %d(%%edx),%%esi\n", CONTEXTOFFSET(Esi) );
        output( "\tmovl %d(%%edx),%%edi\n", CONTEXTOFFSET(Edi) );
        output( "\tmovl %d(%%edx),%%eax\n", CONTEXTOFFSET(Eax) );
        output( "\tmovl %d(%%edx),%%ebx\n", CONTEXTOFFSET(Ebx) );
        output( "\tmovl %d(%%edx),%%ecx\n", CONTEXTOFFSET(Ecx) );
        output( "\tmovl %d(%%edx),%%edx\n", CONTEXTOFFSET(Edx) );

        /* Get the 16-bit ds */
        output( "\tpopw %%ds\n" );
    }
    else  /* not a register function */
    {
        /* Push the called routine address */
        output( "\tpushl %d(%%edx)\n", STACK32OFFSET(target) );

        /* Set %fs and %gs to the value saved by the last CallFrom16 */
        output( "\tpushw %d(%%ebp)\n", STACK16OFFSET(fs)-STACK16OFFSET(bp) );
        output( "\tpopw %%fs\n" );
        output( "\tpushw %d(%%ebp)\n", STACK16OFFSET(gs)-STACK16OFFSET(bp) );
        output( "\tpopw %%gs\n" );

        /* Set %ds and %es (and %ax just in case) equal to %ss */
        output( "\tmovw %%ss,%%ax\n" );
        output( "\tmovw %%ax,%%ds\n" );
        output( "\tmovw %%ax,%%es\n" );
    }

    /* Jump to the called routine */
    output( "\t.byte 0x66\n" );
    output( "\tlret\n" );

    /* Function footer */
    output_function_size( name );
}