Exemple #1
0
/*******************************************************************
 *         BuildCallTo16Core
 *
 * This routine builds the core routines used in 32->16 thunks:
 *
 *   extern void WINAPI wine_call_to_16_word( SEGPTR target, int nb_args );
 *   extern void WINAPI wine_call_to_16_long( SEGPTR target, int nb_args );
 *   extern void WINAPI wine_call_to_16_regs_short( const CONTEXT86 *context, int nb_args );
 *   extern void WINAPI wine_call_to_16_regs_long ( const CONTEXT86 *context, int nb_args );
 *
 * These routines can be called directly from 32-bit code.
 *
 * All routines expect that the 16-bit stack contents (arguments) were
 * already set up by the caller; nb_args must contain the number of bytes
 * to be conserved.  The 16-bit SS:SP will be set accordinly.
 *
 * All other registers are either taken from the CONTEXT86 structure
 * or else set to default values.  The target routine address is either
 * given directly or taken from the CONTEXT86.
 *
 * If you want to call a 16-bit routine taking only standard argument types
 * (WORD and LONG), you can also have an appropriate argument conversion
 * stub automatically generated (see BuildCallTo16); you'd then call this
 * stub, which in turn would prepare the 16-bit stack and call the appropiate
 * core routine.
 *
 */
static void BuildCallTo16Core( FILE *outfile, int short_ret, int reg_func )
{
    char *name = reg_func == 2 ? "regs_long" :
                 reg_func == 1 ? "regs_short" :
                 short_ret? "word" : "long";

    /* Function header */
    if (reg_func == 2) function_header( outfile, "wine_call_to_16_regs_long" );
    else if (reg_func == 1) function_header( outfile, "wine_call_to_16_regs_short" );
    else if (short_ret) function_header( outfile, "wine_call_to_16_word" );
    else function_header( outfile, "wine_call_to_16_long" );

    /* Function entry sequence */
    fprintf( outfile, "\tpushl %%ebp\n" );
    fprintf( outfile, "\tmovl %%esp, %%ebp\n" );

    /* Save the 32-bit registers */
    fprintf( outfile, "\tpushl %%ebx\n" );
    fprintf( outfile, "\tpushl %%ecx\n" );
    fprintf( outfile, "\tpushl %%edx\n" );
    fprintf( outfile, "\tpushl %%esi\n" );
    fprintf( outfile, "\tpushl %%edi\n" );

    if ( UsePIC )
    {
        fprintf( outfile, "\tcall 1f\n" );
        fprintf( outfile, "1:\tpopl %%ebx\n" );
    }

    /* Enter Win16 Mutex */
    fprintf( outfile, "\tcall " PREFIX "_EnterWin16Lock\n" );

    /* Print debugging info */
    if (debugging)
    {
        /* Push flags, number of arguments, and target */
        fprintf( outfile, "\tpushl $%d\n", reg_func );
        fprintf( outfile, "\tpushl 12(%%ebp)\n" );
        fprintf( outfile, "\tpushl  8(%%ebp)\n" );

        fprintf( outfile, "\tcall " PREFIX "RELAY_DebugCallTo16\n" );

        fprintf( outfile, "\taddl $12, %%esp\n" );
    }

    /* Get return address */
    if ( UsePIC )
        fprintf( outfile, "\tmovl " PREFIX "CallTo16_RetAddr-1b(%%ebx), %%ecx\n" );
    else
        fprintf( outfile, "\tmovl " PREFIX "CallTo16_RetAddr, %%ecx\n" );

    /* Call the actual CallTo16 routine (simulate a lcall) */
    fprintf( outfile, "\tpushl %%cs\n" );
    fprintf( outfile, "\tcall .Lwine_call_to_16_%s\n", name );

    if ( !reg_func )
    {
        /* Convert and push return value */
        if ( short_ret )
        {
            fprintf( outfile, "\tmovzwl %%ax, %%eax\n" );
            fprintf( outfile, "\tpushl %%eax\n" );
        }
        else
        {
            fprintf( outfile, "\tshll $16,%%edx\n" );
            fprintf( outfile, "\tmovw %%ax,%%dx\n" );
            fprintf( outfile, "\tpushl %%edx\n" );
        }
    }
    else
    {
        /*
         * Modify CONTEXT86 structure to contain new values
         *
         * NOTE:  We restore only EAX, EBX, EDX, EDX, EBP, and ESP.
         *        The segment registers as well as ESI and EDI should
         *        not be modified by a well-behaved 16-bit routine in
         *        any case.  [If necessary, we could restore them as well,
         *        at the cost of a somewhat less efficient return path.]
         */

        fprintf( outfile, "\tmovl %d(%%esp), %%edi\n", STACK32OFFSET(target)-12 );
        fprintf( outfile, "\tmovl %%eax, %d(%%edi)\n", CONTEXTOFFSET(Eax) );
        fprintf( outfile, "\tmovl %%ebx, %d(%%edi)\n", CONTEXTOFFSET(Ebx) );
        fprintf( outfile, "\tmovl %%ecx, %d(%%edi)\n", CONTEXTOFFSET(Ecx) );
        fprintf( outfile, "\tmovl %%edx, %d(%%edi)\n", CONTEXTOFFSET(Edx) );
        fprintf( outfile, "\tmovl %%ebp, %d(%%edi)\n", CONTEXTOFFSET(Ebp) );
        fprintf( outfile, "\tmovl %%esi, %d(%%edi)\n", CONTEXTOFFSET(Esp) );
                 /* The return glue code saved %esp into %esi */

        fprintf( outfile, "\tpushl %%edi\n" );
    }

    /* Print debugging info */
    if (debugging)
    {
        fprintf( outfile, "\tpushl $%d\n", reg_func );

        fprintf( outfile, "\tcall " PREFIX "RELAY_DebugCallTo16Ret\n" );

        fprintf( outfile, "\taddl $4, %%esp\n" );
    }

    /* Leave Win16 Mutex */
    fprintf( outfile, "\tcall " PREFIX "_LeaveWin16Lock\n" );

    /* Get return value */
    fprintf( outfile, "\tpopl %%eax\n" );

    /* Restore the 32-bit registers */
    fprintf( outfile, "\tpopl %%edi\n" );
    fprintf( outfile, "\tpopl %%esi\n" );
    fprintf( outfile, "\tpopl %%edx\n" );
    fprintf( outfile, "\tpopl %%ecx\n" );
    fprintf( outfile, "\tpopl %%ebx\n" );

    /* Function exit sequence */
    fprintf( outfile, "\tpopl %%ebp\n" );
    fprintf( outfile, "\tret $8\n" );


    /* Start of the actual CallTo16 routine */

    fprintf( outfile, ".Lwine_call_to_16_%s:\n", name );

    /* Complete STACK32FRAME */
    fprintf( outfile, "\t.byte 0x64\n\tpushl (%d)\n", STACKOFFSET );
    fprintf( outfile, "\tmovl %%esp,%%edx\n" );

    /* Switch to the 16-bit stack */
#ifdef __svr4__
    fprintf( outfile,"\tdata16\n");
#endif
    fprintf( outfile, "\t.byte 0x64\n\tmovw (%d),%%ss\n", STACKOFFSET + 2);
    fprintf( outfile, "\t.byte 0x64\n\tmovw (%d),%%sp\n", STACKOFFSET );
    fprintf( outfile, "\t.byte 0x64\n\tmovl %%edx,(%d)\n", STACKOFFSET );

    /* Make %bp point to the previous stackframe (built by CallFrom16) */
    fprintf( outfile, "\tmovzwl %%sp,%%ebp\n" );
    fprintf( outfile, "\tleal %d(%%ebp),%%ebp\n", STACK16OFFSET(bp) );

    /* Add the specified offset to the new sp */
    fprintf( outfile, "\tsubw %d(%%edx), %%sp\n", STACK32OFFSET(nb_args) );

    /* Push the return address
     * With sreg suffix, we push 16:16 address (normal lret)
     * With lreg suffix, we push 16:32 address (0x66 lret, for KERNEL32_45)
     */
    if (reg_func != 2)
        fprintf( outfile, "\tpushl %%ecx\n" );
    else
    {
        fprintf( outfile, "\tshldl $16, %%ecx, %%eax\n" );
        fprintf( outfile, "\tpushw $0\n" );
        fprintf( outfile, "\tpushw %%ax\n" );
        fprintf( outfile, "\tpushw $0\n" );
        fprintf( outfile, "\tpushw %%cx\n" );
    }

    if (reg_func)
    {
        /* Push the called routine address */
        fprintf( outfile, "\tmovl %d(%%edx),%%edx\n", STACK32OFFSET(target) );
        fprintf( outfile, "\tpushw %d(%%edx)\n", CONTEXTOFFSET(SegCs) );
        fprintf( outfile, "\tpushw %d(%%edx)\n", CONTEXTOFFSET(Eip) );

        /* Get the registers */
        fprintf( outfile, "\tpushw %d(%%edx)\n", CONTEXTOFFSET(SegDs) );
        fprintf( outfile, "\tmovl %d(%%edx),%%eax\n", CONTEXTOFFSET(SegEs) );
        fprintf( outfile, "\tmovw %%ax,%%es\n" );
        fprintf( outfile, "\tmovl %d(%%edx),%%eax\n", CONTEXTOFFSET(SegFs) );
        fprintf( outfile, "\tmovw %%ax,%%fs\n" );
        fprintf( outfile, "\tmovl %d(%%edx),%%ebp\n", CONTEXTOFFSET(Ebp) );
        fprintf( outfile, "\tmovl %d(%%edx),%%esi\n", CONTEXTOFFSET(Esi) );
        fprintf( outfile, "\tmovl %d(%%edx),%%edi\n", CONTEXTOFFSET(Edi) );
        fprintf( outfile, "\tmovl %d(%%edx),%%eax\n", CONTEXTOFFSET(Eax) );
        fprintf( outfile, "\tmovl %d(%%edx),%%ebx\n", CONTEXTOFFSET(Ebx) );
        fprintf( outfile, "\tmovl %d(%%edx),%%ecx\n", CONTEXTOFFSET(Ecx) );
        fprintf( outfile, "\tmovl %d(%%edx),%%edx\n", CONTEXTOFFSET(Edx) );

        /* Get the 16-bit ds */
        fprintf( outfile, "\tpopw %%ds\n" );
    }
    else  /* not a register function */
    {
        /* Push the called routine address */
        fprintf( outfile, "\tpushl %d(%%edx)\n", STACK32OFFSET(target) );

        /* Set %fs to the value saved by the last CallFrom16 */
        fprintf( outfile, "\tmovw %d(%%ebp),%%ax\n", STACK16OFFSET(fs)-STACK16OFFSET(bp) );
        fprintf( outfile, "\tmovw %%ax,%%fs\n" );

        /* Set %ds and %es (and %ax just in case) equal to %ss */
        fprintf( outfile, "\tmovw %%ss,%%ax\n" );
        fprintf( outfile, "\tmovw %%ax,%%ds\n" );
        fprintf( outfile, "\tmovw %%ax,%%es\n" );
    }

    /* Jump to the called routine */
    fprintf( outfile, "\t.byte 0x66\n" );
    fprintf( outfile, "\tlret\n" );
}
Exemple #2
0
/*******************************************************************
 *         BuildCallTo16Core
 *
 * This routine builds the core routines used in 32->16 thunks:
 *
 * extern DWORD WINAPI wine_call_to_16( FARPROC16 target, DWORD cbArgs, PEXCEPTION_HANDLER handler );
 * extern void WINAPI wine_call_to_16_regs( CONTEXT86 *context, DWORD cbArgs, PEXCEPTION_HANDLER handler );
 *
 * These routines can be called directly from 32-bit code.
 *
 * All routines expect that the 16-bit stack contents (arguments) and the
 * return address (segptr to CallTo16_Ret) were already set up by the
 * caller; nb_args must contain the number of bytes to be conserved.  The
 * 16-bit SS:SP will be set accordingly.
 *
 * All other registers are either taken from the CONTEXT86 structure
 * or else set to default values.  The target routine address is either
 * given directly or taken from the CONTEXT86.
 */
static void BuildCallTo16Core( int reg_func )
{
    const char *name = reg_func ? "wine_call_to_16_regs" : "wine_call_to_16";

    /* Function header */
    function_header( name );

    /* Function entry sequence */
    output( "\tpushl %%ebp\n" );
    output( "\tmovl %%esp, %%ebp\n" );

    /* Save the 32-bit registers */
    output( "\tpushl %%ebx\n" );
    output( "\tpushl %%esi\n" );
    output( "\tpushl %%edi\n" );
    output( "\t.byte 0x64\n\tmov %%gs,(%d)\n", GS_OFFSET );

    /* Setup exception frame */
    output( "\t.byte 0x64\n\tpushl (%d)\n", STACKOFFSET );
    output( "\tpushl 16(%%ebp)\n" ); /* handler */
    output( "\t.byte 0x64\n\tpushl (0)\n" );
    output( "\t.byte 0x64\n\tmovl %%esp,(0)\n" );

    /* Call the actual CallTo16 routine (simulate a lcall) */
    output( "\tpushl %%cs\n" );
    output( "\tcall .L%s\n", name );

    /* Remove exception frame */
    output( "\t.byte 0x64\n\tpopl (0)\n" );
    output( "\taddl $4, %%esp\n" );
    output( "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );

    if ( !reg_func )
    {
        /* Convert return value */
        output( "\tandl $0xffff,%%eax\n" );
        output( "\tshll $16,%%edx\n" );
        output( "\torl %%edx,%%eax\n" );
    }
    else
    {
        /*
         * Modify CONTEXT86 structure to contain new values
         *
         * NOTE:  We restore only EAX, EBX, EDX, EDX, EBP, and ESP.
         *        The segment registers as well as ESI and EDI should
         *        not be modified by a well-behaved 16-bit routine in
         *        any case.  [If necessary, we could restore them as well,
         *        at the cost of a somewhat less efficient return path.]
         */

        output( "\tmovl %d(%%esp), %%edi\n", STACK32OFFSET(target) - STACK32OFFSET(edi));
                /* everything above edi has been popped already */

        output( "\tmovl %%eax, %d(%%edi)\n", CONTEXTOFFSET(Eax) );
        output( "\tmovl %%ebx, %d(%%edi)\n", CONTEXTOFFSET(Ebx) );
        output( "\tmovl %%ecx, %d(%%edi)\n", CONTEXTOFFSET(Ecx) );
        output( "\tmovl %%edx, %d(%%edi)\n", CONTEXTOFFSET(Edx) );
        output( "\tmovl %%ebp, %d(%%edi)\n", CONTEXTOFFSET(Ebp) );
        output( "\tmovl %%esi, %d(%%edi)\n", CONTEXTOFFSET(Esp) );
                 /* The return glue code saved %esp into %esi */
    }

    /* Restore the 32-bit registers */
    output( "\tpopl %%edi\n" );
    output( "\tpopl %%esi\n" );
    output( "\tpopl %%ebx\n" );

    /* Function exit sequence */
    output( "\tpopl %%ebp\n" );
    output( "\tret $12\n" );


    /* Start of the actual CallTo16 routine */

    output( ".L%s:\n", name );

    /* Switch to the 16-bit stack */
    output( "\tmovl %%esp,%%edx\n" );
    output( "\t.byte 0x64\n\tmovw (%d),%%ss\n", STACKOFFSET + 2);
    output( "\t.byte 0x64\n\tmovw (%d),%%sp\n", STACKOFFSET );
    output( "\t.byte 0x64\n\tmovl %%edx,(%d)\n", STACKOFFSET );

    /* Make %bp point to the previous stackframe (built by CallFrom16) */
    output( "\tmovzwl %%sp,%%ebp\n" );
    output( "\tleal %d(%%ebp),%%ebp\n", STACK16OFFSET(bp) );

    /* Add the specified offset to the new sp */
    output( "\tsubw %d(%%edx), %%sp\n", STACK32OFFSET(nb_args) );

    if (reg_func)
    {
        /* Push the called routine address */
        output( "\tmovl %d(%%edx),%%edx\n", STACK32OFFSET(target) );
        output( "\tpushw %d(%%edx)\n", CONTEXTOFFSET(SegCs) );
        output( "\tpushw %d(%%edx)\n", CONTEXTOFFSET(Eip) );

        /* Get the registers */
        output( "\tpushw %d(%%edx)\n", CONTEXTOFFSET(SegDs) );
        output( "\tpushl %d(%%edx)\n", CONTEXTOFFSET(SegEs) );
        output( "\tpopl %%es\n" );
        output( "\tpushl %d(%%edx)\n", CONTEXTOFFSET(SegFs) );
        output( "\tpopl %%fs\n" );
        output( "\tpushl %d(%%edx)\n", CONTEXTOFFSET(SegGs) );
        output( "\tpopl %%gs\n" );
        output( "\tmovl %d(%%edx),%%ebp\n", CONTEXTOFFSET(Ebp) );
        output( "\tmovl %d(%%edx),%%esi\n", CONTEXTOFFSET(Esi) );
        output( "\tmovl %d(%%edx),%%edi\n", CONTEXTOFFSET(Edi) );
        output( "\tmovl %d(%%edx),%%eax\n", CONTEXTOFFSET(Eax) );
        output( "\tmovl %d(%%edx),%%ebx\n", CONTEXTOFFSET(Ebx) );
        output( "\tmovl %d(%%edx),%%ecx\n", CONTEXTOFFSET(Ecx) );
        output( "\tmovl %d(%%edx),%%edx\n", CONTEXTOFFSET(Edx) );

        /* Get the 16-bit ds */
        output( "\tpopw %%ds\n" );
    }
    else  /* not a register function */
    {
        /* Push the called routine address */
        output( "\tpushl %d(%%edx)\n", STACK32OFFSET(target) );

        /* Set %fs and %gs to the value saved by the last CallFrom16 */
        output( "\tpushw %d(%%ebp)\n", STACK16OFFSET(fs)-STACK16OFFSET(bp) );
        output( "\tpopw %%fs\n" );
        output( "\tpushw %d(%%ebp)\n", STACK16OFFSET(gs)-STACK16OFFSET(bp) );
        output( "\tpopw %%gs\n" );

        /* Set %ds and %es (and %ax just in case) equal to %ss */
        output( "\tmovw %%ss,%%ax\n" );
        output( "\tmovw %%ax,%%ds\n" );
        output( "\tmovw %%ax,%%es\n" );
    }

    /* Jump to the called routine */
    output( "\t.byte 0x66\n" );
    output( "\tlret\n" );

    /* Function footer */
    output_function_size( name );
}
Exemple #3
0
/*******************************************************************
 *         BuildCallFrom16Core
 *
 * This routine builds the core routines used in 16->32 thunks:
 * CallFrom16Word, CallFrom16Long, CallFrom16Register, and CallFrom16Thunk.
 *
 * These routines are intended to be called via a far call (with 32-bit
 * operand size) from 16-bit code.  The 16-bit code stub must push %bp,
 * the 32-bit entry point to be called, and the argument conversion
 * routine to be used (see stack layout below).
 *
 * The core routine completes the STACK16FRAME on the 16-bit stack and
 * switches to the 32-bit stack.  Then, the argument conversion routine
 * is called; it gets passed the 32-bit entry point and a pointer to the
 * 16-bit arguments (on the 16-bit stack) as parameters. (You can either
 * use conversion routines automatically generated by BuildCallFrom16,
 * or write your own for special purposes.)
 *
 * The conversion routine must call the 32-bit entry point, passing it
 * the converted arguments, and return its return value to the core.
 * After the conversion routine has returned, the core switches back
 * to the 16-bit stack, converts the return value to the DX:AX format
 * (CallFrom16Long), and returns to the 16-bit call stub.  All parameters,
 * including %bp, are popped off the stack.
 *
 * The 16-bit call stub now returns to the caller, popping the 16-bit
 * arguments if necessary (pascal calling convention).
 *
 * In the case of a 'register' function, CallFrom16Register fills a
 * CONTEXT86 structure with the values all registers had at the point
 * the first instruction of the 16-bit call stub was about to be
 * executed.  A pointer to this CONTEXT86 is passed as third parameter
 * to the argument conversion routine, which typically passes it on
 * to the called 32-bit entry point.
 *
 * CallFrom16Thunk is a special variant used by the implementation of
 * the Win95 16->32 thunk functions C16ThkSL and C16ThkSL01 and is
 * implemented as follows:
 * On entry, the EBX register is set up to contain a flat pointer to the
 * 16-bit stack such that EBX+22 points to the first argument.
 * Then, the entry point is called, while EBP is set up to point
 * to the return address (on the 32-bit stack).
 * The called function returns with CX set to the number of bytes
 * to be popped of the caller's stack.
 *
 * Stack layout upon entry to the core routine (STACK16FRAME):
 *  ...           ...
 * (sp+24) word   first 16-bit arg
 * (sp+22) word   cs
 * (sp+20) word   ip
 * (sp+18) word   bp
 * (sp+14) long   32-bit entry point (reused for Win16 mutex recursion count)
 * (sp+12) word   ip of actual entry point (necessary for relay debugging)
 * (sp+8)  long   relay (argument conversion) function entry point
 * (sp+4)  long   cs of 16-bit entry point
 * (sp)    long   ip of 16-bit entry point
 *
 * Added on the stack:
 * (sp-2)  word   saved gs
 * (sp-4)  word   saved fs
 * (sp-6)  word   saved es
 * (sp-8)  word   saved ds
 * (sp-12) long   saved ebp
 * (sp-16) long   saved ecx
 * (sp-20) long   saved edx
 * (sp-24) long   saved previous stack
 */
static void BuildCallFrom16Core( FILE *outfile, int reg_func, int thunk, int short_ret )
{

    /* Function header */
    if (thunk) function_header( outfile, "__wine_call_from_16_thunk" );
    else if (reg_func) function_header( outfile, "__wine_call_from_16_regs" );
    else if (short_ret) function_header( outfile, "__wine_call_from_16_word" );
    else function_header( outfile, "__wine_call_from_16_long" );

    /* Create STACK16FRAME (except STACK32FRAME link) */
    fprintf( outfile, "\tpushw %%gs\n" );
    fprintf( outfile, "\tpushw %%fs\n" );
    fprintf( outfile, "\tpushw %%es\n" );
    fprintf( outfile, "\tpushw %%ds\n" );
    fprintf( outfile, "\tpushl %%ebp\n" );
    fprintf( outfile, "\tpushl %%ecx\n" );
    fprintf( outfile, "\tpushl %%edx\n" );

    /* Save original EFlags register */
    fprintf( outfile, "\tpushfl\n" );

    if ( UsePIC )
    {
        /* Get Global Offset Table into %ecx */
        fprintf( outfile, "\tcall 1f\n" );
        fprintf( outfile, "1:\tpopl %%ecx\n" );
    }

    if (UsePIC)
        fprintf( outfile, "\t.byte 0x2e\n\tmovl " PREFIX "CallTo16_DataSelector-1b(%%ecx),%%edx\n" );
    else
        fprintf( outfile, "\t.byte 0x2e\n\tmovl " PREFIX "CallTo16_DataSelector,%%edx\n" );

    /* Load 32-bit segment registers */
#ifdef __svr4__
    fprintf( outfile, "\tdata16\n");
#endif
    fprintf( outfile, "\tmovw %%dx, %%ds\n" );
#ifdef __svr4__
    fprintf( outfile, "\tdata16\n");
#endif
    fprintf( outfile, "\tmovw %%dx, %%es\n" );

    if ( UsePIC )
        fprintf( outfile, "\tmovw " PREFIX "SYSLEVEL_Win16CurrentTeb-1b(%%ecx), %%fs\n" );
    else
        fprintf( outfile, "\tmovw " PREFIX "SYSLEVEL_Win16CurrentTeb, %%fs\n" );

    /* Get address of wine_ldt_copy array into %ecx */
    if ( UsePIC )
        fprintf( outfile, "\tmovl wine_ldt_copy_ptr-1b(%%ecx), %%ecx\n" );
    else
        fprintf( outfile, "\tmovl $" PREFIX "wine_ldt_copy, %%ecx\n" );

    /* Translate STACK16FRAME base to flat offset in %edx */
    fprintf( outfile, "\tmovw %%ss, %%dx\n" );
    fprintf( outfile, "\tandl $0xfff8, %%edx\n" );
    fprintf( outfile, "\tshrl $1, %%edx\n" );
    fprintf( outfile, "\tmovl (%%ecx,%%edx), %%edx\n" );
    fprintf( outfile, "\tmovzwl %%sp, %%ebp\n" );
    fprintf( outfile, "\tleal (%%ebp,%%edx), %%edx\n" );

    /* Get saved flags into %ecx */
    fprintf( outfile, "\tpopl %%ecx\n" );

    /* Get the 32-bit stack pointer from the TEB and complete STACK16FRAME */
    fprintf( outfile, "\t.byte 0x64\n\tmovl (%d), %%ebp\n", STACKOFFSET );
    fprintf( outfile, "\tpushl %%ebp\n" );

    /* Switch stacks */
#ifdef __svr4__
    fprintf( outfile,"\tdata16\n");
#endif
    fprintf( outfile, "\t.byte 0x64\n\tmovw %%ss, (%d)\n", STACKOFFSET + 2 );
    fprintf( outfile, "\t.byte 0x64\n\tmovw %%sp, (%d)\n", STACKOFFSET );
    fprintf( outfile, "\tpushl %%ds\n" );
    fprintf( outfile, "\tpopl %%ss\n" );
    fprintf( outfile, "\tmovl %%ebp, %%esp\n" );
    fprintf( outfile, "\taddl $%d, %%ebp\n", STRUCTOFFSET(STACK32FRAME, ebp) );


    /* At this point:
       STACK16FRAME is completely set up
       DS, ES, SS: flat data segment
       FS: current TEB
       ESP: points to last STACK32FRAME
       EBP: points to ebp member of last STACK32FRAME
       EDX: points to current STACK16FRAME
       ECX: contains saved flags
       all other registers: unchanged */

    /* Special case: C16ThkSL stub */
    if ( thunk )
    {
        /* Set up registers as expected and call thunk */
        fprintf( outfile, "\tleal %lu(%%edx), %%ebx\n", sizeof(STACK16FRAME)-22 );
        fprintf( outfile, "\tleal -4(%%esp), %%ebp\n" );

        fprintf( outfile, "\tcall *%d(%%edx)\n", STACK16OFFSET(entry_point) );

        /* Switch stack back */
        fprintf( outfile, "\t.byte 0x64\n\tmovw (%d), %%ss\n", STACKOFFSET+2 );
        fprintf( outfile, "\t.byte 0x64\n\tmovzwl (%d), %%esp\n", STACKOFFSET );
        fprintf( outfile, "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );

        /* Restore registers and return directly to caller */
        fprintf( outfile, "\taddl $8, %%esp\n" );
        fprintf( outfile, "\tpopl %%ebp\n" );
        fprintf( outfile, "\tpopw %%ds\n" );
        fprintf( outfile, "\tpopw %%es\n" );
        fprintf( outfile, "\tpopw %%fs\n" );
        fprintf( outfile, "\tpopw %%gs\n" );
        fprintf( outfile, "\taddl $20, %%esp\n" );

        fprintf( outfile, "\txorb %%ch, %%ch\n" );
        fprintf( outfile, "\tpopl %%ebx\n" );
        fprintf( outfile, "\taddw %%cx, %%sp\n" );
        fprintf( outfile, "\tpush %%ebx\n" );

        fprintf( outfile, "\t.byte 0x66\n" );
        fprintf( outfile, "\tlret\n" );

        return;
    }


    /* Build register CONTEXT */
    if ( reg_func )
    {
        fprintf( outfile, "\tsubl $%lu, %%esp\n", sizeof(CONTEXT86) );

        fprintf( outfile, "\tmovl %%ecx, %d(%%esp)\n", CONTEXTOFFSET(EFlags) );

        fprintf( outfile, "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(Eax) );
        fprintf( outfile, "\tmovl %%ebx, %d(%%esp)\n", CONTEXTOFFSET(Ebx) );
        fprintf( outfile, "\tmovl %%esi, %d(%%esp)\n", CONTEXTOFFSET(Esi) );
        fprintf( outfile, "\tmovl %%edi, %d(%%esp)\n", CONTEXTOFFSET(Edi) );

        fprintf( outfile, "\tmovl %d(%%edx), %%eax\n", STACK16OFFSET(ebp) );
        fprintf( outfile, "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(Ebp) );
        fprintf( outfile, "\tmovl %d(%%edx), %%eax\n", STACK16OFFSET(ecx) );
        fprintf( outfile, "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(Ecx) );
        fprintf( outfile, "\tmovl %d(%%edx), %%eax\n", STACK16OFFSET(edx) );
        fprintf( outfile, "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(Edx) );

        fprintf( outfile, "\tmovzwl %d(%%edx), %%eax\n", STACK16OFFSET(ds) );
        fprintf( outfile, "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(SegDs) );
        fprintf( outfile, "\tmovzwl %d(%%edx), %%eax\n", STACK16OFFSET(es) );
        fprintf( outfile, "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(SegEs) );
        fprintf( outfile, "\tmovzwl %d(%%edx), %%eax\n", STACK16OFFSET(fs) );
        fprintf( outfile, "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(SegFs) );
        fprintf( outfile, "\tmovzwl %d(%%edx), %%eax\n", STACK16OFFSET(gs) );
        fprintf( outfile, "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(SegGs) );

        fprintf( outfile, "\tmovzwl %d(%%edx), %%eax\n", STACK16OFFSET(cs) );
        fprintf( outfile, "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(SegCs) );
        fprintf( outfile, "\tmovzwl %d(%%edx), %%eax\n", STACK16OFFSET(ip) );
        fprintf( outfile, "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(Eip) );

        fprintf( outfile, "\t.byte 0x64\n\tmovzwl (%d), %%eax\n", STACKOFFSET+2 );
        fprintf( outfile, "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(SegSs) );
        fprintf( outfile, "\t.byte 0x64\n\tmovzwl (%d), %%eax\n", STACKOFFSET );
        fprintf( outfile, "\taddl $%d, %%eax\n", STACK16OFFSET(ip) );
        fprintf( outfile, "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(Esp) );
#if 0
        fprintf( outfile, "\tfsave %d(%%esp)\n", CONTEXTOFFSET(FloatSave) );
#endif

        /* Push address of CONTEXT86 structure -- popped by the relay routine */
        fprintf( outfile, "\tpushl %%esp\n" );
    }


    /* Print debug info before call */
    if ( debugging )
    {
        fprintf( outfile, "\tpushl %%edx\n" );
        if ( reg_func )
            fprintf( outfile, "\tleal -%lu(%%ebp), %%eax\n\tpushl %%eax\n",
                              sizeof(CONTEXT) + STRUCTOFFSET(STACK32FRAME, ebp) );
        else
            fprintf( outfile, "\tpushl $0\n" );

            fprintf( outfile, "\tcall " PREFIX "RELAY_DebugCallFrom16\n ");

        fprintf( outfile, "\tpopl %%edx\n" );
        fprintf( outfile, "\tpopl %%edx\n" );
    }

    /* Call relay routine (which will call the API entry point) */
    fprintf( outfile, "\tleal %lu(%%edx), %%eax\n", sizeof(STACK16FRAME) );
    fprintf( outfile, "\tpushl %%eax\n" );
    fprintf( outfile, "\tpushl %d(%%edx)\n", STACK16OFFSET(entry_point) );
    fprintf( outfile, "\tcall *%d(%%edx)\n", STACK16OFFSET(relay) );

    /* Print debug info after call */
    if ( debugging )
    {
        fprintf( outfile, "\tpushl %%eax\n" );
        if ( reg_func )
            fprintf( outfile, "\tleal -%lu(%%ebp), %%eax\n\tpushl %%eax\n",
                              sizeof(CONTEXT) + STRUCTOFFSET(STACK32FRAME, ebp) );
        else
            fprintf( outfile, "\tpushl $0\n" );

        fprintf( outfile, "\tcall " PREFIX "RELAY_DebugCallFrom16Ret\n ");

        fprintf( outfile, "\tpopl %%eax\n" );
        fprintf( outfile, "\tpopl %%eax\n" );
    }


    if ( reg_func )
    {
        fprintf( outfile, "\tmovl %%esp, %%ebx\n" );

        /* Switch stack back */
        fprintf( outfile, "\t.byte 0x64\n\tmovw (%d), %%ss\n", STACKOFFSET+2 );
        fprintf( outfile, "\t.byte 0x64\n\tmovzwl (%d), %%esp\n", STACKOFFSET );
        fprintf( outfile, "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );

        /* Get return address to CallFrom16 stub */
        fprintf( outfile, "\taddw $%d, %%sp\n", STACK16OFFSET(callfrom_ip)-4 );
        fprintf( outfile, "\tpopl %%eax\n" );
        fprintf( outfile, "\tpopl %%edx\n" );

        /* Restore all registers from CONTEXT */
        fprintf( outfile, "\tmovw %d(%%ebx), %%ss\n", CONTEXTOFFSET(SegSs) );
        fprintf( outfile, "\tmovl %d(%%ebx), %%esp\n", CONTEXTOFFSET(Esp) );
        fprintf( outfile, "\taddl $4, %%esp\n" );  /* room for final return address */

        fprintf( outfile, "\tpushw %d(%%ebx)\n", CONTEXTOFFSET(SegCs) );
        fprintf( outfile, "\tpushw %d(%%ebx)\n", CONTEXTOFFSET(Eip) );
        fprintf( outfile, "\tpushl %%edx\n" );
        fprintf( outfile, "\tpushl %%eax\n" );
        fprintf( outfile, "\tpushl %d(%%ebx)\n", CONTEXTOFFSET(EFlags) );
        fprintf( outfile, "\tpushl %d(%%ebx)\n", CONTEXTOFFSET(SegDs) );

        fprintf( outfile, "\tmovw %d(%%ebx), %%es\n", CONTEXTOFFSET(SegEs) );
        fprintf( outfile, "\tmovw %d(%%ebx), %%fs\n", CONTEXTOFFSET(SegFs) );
        fprintf( outfile, "\tmovw %d(%%ebx), %%gs\n", CONTEXTOFFSET(SegGs) );

        fprintf( outfile, "\tmovl %d(%%ebx), %%ebp\n", CONTEXTOFFSET(Ebp) );
        fprintf( outfile, "\tmovl %d(%%ebx), %%esi\n", CONTEXTOFFSET(Esi) );
        fprintf( outfile, "\tmovl %d(%%ebx), %%edi\n", CONTEXTOFFSET(Edi) );
        fprintf( outfile, "\tmovl %d(%%ebx), %%eax\n", CONTEXTOFFSET(Eax) );
        fprintf( outfile, "\tmovl %d(%%ebx), %%edx\n", CONTEXTOFFSET(Edx) );
        fprintf( outfile, "\tmovl %d(%%ebx), %%ecx\n", CONTEXTOFFSET(Ecx) );
        fprintf( outfile, "\tmovl %d(%%ebx), %%ebx\n", CONTEXTOFFSET(Ebx) );

        fprintf( outfile, "\tpopl %%ds\n" );
        fprintf( outfile, "\tpopfl\n" );
        fprintf( outfile, "\tlret\n" );
    }
    else
    {
        /* Switch stack back */
        fprintf( outfile, "\t.byte 0x64\n\tmovw (%d), %%ss\n", STACKOFFSET+2 );
        fprintf( outfile, "\t.byte 0x64\n\tmovzwl (%d), %%esp\n", STACKOFFSET );
        fprintf( outfile, "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );

        /* Restore registers */
        fprintf( outfile, "\tpopl %%edx\n" );
        fprintf( outfile, "\tpopl %%ecx\n" );
        fprintf( outfile, "\tpopl %%ebp\n" );
        fprintf( outfile, "\tpopw %%ds\n" );
        fprintf( outfile, "\tpopw %%es\n" );
        fprintf( outfile, "\tpopw %%fs\n" );
        fprintf( outfile, "\tpopw %%gs\n" );

        /* Prepare return value and set flags accordingly */
        if ( !short_ret )
            fprintf( outfile, "\tshldl $16, %%eax, %%edx\n" );
        fprintf( outfile, "\torl %%eax, %%eax\n" );

        /* Return to return stub which will return to caller */
        fprintf( outfile, "\tlret $12\n" );
    }
}
Exemple #4
0
/*******************************************************************
 *         BuildCallFrom16Core
 *
 * This routine builds the core routines used in 16->32 thunks:
 * CallFrom16Word, CallFrom16Long, CallFrom16Register, and CallFrom16Thunk.
 *
 * These routines are intended to be called via a far call (with 32-bit
 * operand size) from 16-bit code.  The 16-bit code stub must push %bp,
 * the 32-bit entry point to be called, and the argument conversion
 * routine to be used (see stack layout below).
 *
 * The core routine completes the STACK16FRAME on the 16-bit stack and
 * switches to the 32-bit stack.  Then, the argument conversion routine
 * is called; it gets passed the 32-bit entry point and a pointer to the
 * 16-bit arguments (on the 16-bit stack) as parameters. (You can either
 * use conversion routines automatically generated by BuildCallFrom16,
 * or write your own for special purposes.)
 *
 * The conversion routine must call the 32-bit entry point, passing it
 * the converted arguments, and return its return value to the core.
 * After the conversion routine has returned, the core switches back
 * to the 16-bit stack, converts the return value to the DX:AX format
 * (CallFrom16Long), and returns to the 16-bit call stub.  All parameters,
 * including %bp, are popped off the stack.
 *
 * The 16-bit call stub now returns to the caller, popping the 16-bit
 * arguments if necessary (pascal calling convention).
 *
 * In the case of a 'register' function, CallFrom16Register fills a
 * CONTEXT86 structure with the values all registers had at the point
 * the first instruction of the 16-bit call stub was about to be
 * executed.  A pointer to this CONTEXT86 is passed as third parameter
 * to the argument conversion routine, which typically passes it on
 * to the called 32-bit entry point.
 *
 * CallFrom16Thunk is a special variant used by the implementation of
 * the Win95 16->32 thunk functions C16ThkSL and C16ThkSL01 and is
 * implemented as follows:
 * On entry, the EBX register is set up to contain a flat pointer to the
 * 16-bit stack such that EBX+22 points to the first argument.
 * Then, the entry point is called, while EBP is set up to point
 * to the return address (on the 32-bit stack).
 * The called function returns with CX set to the number of bytes
 * to be popped of the caller's stack.
 *
 * Stack layout upon entry to the core routine (STACK16FRAME):
 *  ...           ...
 * (sp+24) word   first 16-bit arg
 * (sp+22) word   cs
 * (sp+20) word   ip
 * (sp+18) word   bp
 * (sp+14) long   32-bit entry point (reused for Win16 mutex recursion count)
 * (sp+12) word   ip of actual entry point (necessary for relay debugging)
 * (sp+8)  long   relay (argument conversion) function entry point
 * (sp+4)  long   cs of 16-bit entry point
 * (sp)    long   ip of 16-bit entry point
 *
 * Added on the stack:
 * (sp-2)  word   saved gs
 * (sp-4)  word   saved fs
 * (sp-6)  word   saved es
 * (sp-8)  word   saved ds
 * (sp-12) long   saved ebp
 * (sp-16) long   saved ecx
 * (sp-20) long   saved edx
 * (sp-24) long   saved previous stack
 */
static void BuildCallFrom16Core( int reg_func, int thunk )
{
    /* Function header */
    if (thunk) function_header( "__wine_call_from_16_thunk" );
    else if (reg_func) function_header( "__wine_call_from_16_regs" );
    else function_header( "__wine_call_from_16" );

    /* Create STACK16FRAME (except STACK32FRAME link) */
    output( "\tpushw %%gs\n" );
    output( "\tpushw %%fs\n" );
    output( "\tpushw %%es\n" );
    output( "\tpushw %%ds\n" );
    output( "\tpushl %%ebp\n" );
    output( "\tpushl %%ecx\n" );
    output( "\tpushl %%edx\n" );

    /* Save original EFlags register */
    if (reg_func) output( "\tpushfl\n" );

    if ( UsePIC )
    {
        output( "\tcall 1f\n" );
        output( "1:\tpopl %%ecx\n" );
        output( "\t.byte 0x2e\n\tmovl %s-1b(%%ecx),%%edx\n", asm_name("CallTo16_DataSelector") );
    }
    else
        output( "\t.byte 0x2e\n\tmovl %s,%%edx\n", asm_name("CallTo16_DataSelector") );

    /* Load 32-bit segment registers */
    output( "\tmovw %%dx, %%ds\n" );
    output( "\tmovw %%dx, %%es\n" );

    if ( UsePIC )
        output( "\tmovw %s-1b(%%ecx), %%fs\n", asm_name("CallTo16_TebSelector") );
    else
        output( "\tmovw %s, %%fs\n", asm_name("CallTo16_TebSelector") );

    output( "\t.byte 0x64\n\tmov (%d),%%gs\n", GS_OFFSET );

    /* Translate STACK16FRAME base to flat offset in %edx */
    output( "\tmovw %%ss, %%dx\n" );
    output( "\tandl $0xfff8, %%edx\n" );
    output( "\tshrl $1, %%edx\n" );
    if (UsePIC)
    {
        output( "\taddl wine_ldt_copy_ptr-1b(%%ecx),%%edx\n" );
        output( "\tmovl (%%edx), %%edx\n" );
    }
    else
        output( "\tmovl %s(%%edx), %%edx\n", asm_name("wine_ldt_copy") );
    output( "\tmovzwl %%sp, %%ebp\n" );
    output( "\tleal %d(%%ebp,%%edx), %%edx\n", reg_func ? 0 : -4 );

    /* Get saved flags into %ecx */
    if (reg_func) output( "\tpopl %%ecx\n" );

    /* Get the 32-bit stack pointer from the TEB and complete STACK16FRAME */
    output( "\t.byte 0x64\n\tmovl (%d), %%ebp\n", STACKOFFSET );
    output( "\tpushl %%ebp\n" );

    /* Switch stacks */
    output( "\t.byte 0x64\n\tmovw %%ss, (%d)\n", STACKOFFSET + 2 );
    output( "\t.byte 0x64\n\tmovw %%sp, (%d)\n", STACKOFFSET );
    output( "\tpushl %%ds\n" );
    output( "\tpopl %%ss\n" );
    output( "\tmovl %%ebp, %%esp\n" );
    output( "\taddl $%d, %%ebp\n", STACK32OFFSET(ebp) );


    /* At this point:
       STACK16FRAME is completely set up
       DS, ES, SS: flat data segment
       FS: current TEB
       ESP: points to last STACK32FRAME
       EBP: points to ebp member of last STACK32FRAME
       EDX: points to current STACK16FRAME
       ECX: contains saved flags
       all other registers: unchanged */

    /* Special case: C16ThkSL stub */
    if ( thunk )
    {
        /* Set up registers as expected and call thunk */
        output( "\tleal %d(%%edx), %%ebx\n", (int)sizeof(STACK16FRAME)-22 );
        output( "\tleal -4(%%esp), %%ebp\n" );

        output( "\tcall *%d(%%edx)\n", STACK16OFFSET(entry_point) );

        /* Switch stack back */
        output( "\t.byte 0x64\n\tmovw (%d), %%ss\n", STACKOFFSET+2 );
        output( "\t.byte 0x64\n\tmovzwl (%d), %%esp\n", STACKOFFSET );
        output( "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );

        /* Restore registers and return directly to caller */
        output( "\taddl $8, %%esp\n" );
        output( "\tpopl %%ebp\n" );
        output( "\tpopw %%ds\n" );
        output( "\tpopw %%es\n" );
        output( "\tpopw %%fs\n" );
        output( "\tpopw %%gs\n" );
        output( "\taddl $20, %%esp\n" );

        output( "\txorb %%ch, %%ch\n" );
        output( "\tpopl %%ebx\n" );
        output( "\taddw %%cx, %%sp\n" );
        output( "\tpush %%ebx\n" );

        output( "\t.byte 0x66\n" );
        output( "\tlret\n" );

        return;
    }


    /* Build register CONTEXT */
    if ( reg_func )
    {
        output( "\tsubl $%d, %%esp\n", (int)sizeof(CONTEXT86) );

        output( "\tmovl %%ecx, %d(%%esp)\n", CONTEXTOFFSET(EFlags) );

        output( "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(Eax) );
        output( "\tmovl %%ebx, %d(%%esp)\n", CONTEXTOFFSET(Ebx) );
        output( "\tmovl %%esi, %d(%%esp)\n", CONTEXTOFFSET(Esi) );
        output( "\tmovl %%edi, %d(%%esp)\n", CONTEXTOFFSET(Edi) );

        output( "\tmovl %d(%%edx), %%eax\n", STACK16OFFSET(ebp) );
        output( "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(Ebp) );
        output( "\tmovl %d(%%edx), %%eax\n", STACK16OFFSET(ecx) );
        output( "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(Ecx) );
        output( "\tmovl %d(%%edx), %%eax\n", STACK16OFFSET(edx) );
        output( "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(Edx) );

        output( "\tmovzwl %d(%%edx), %%eax\n", STACK16OFFSET(ds) );
        output( "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(SegDs) );
        output( "\tmovzwl %d(%%edx), %%eax\n", STACK16OFFSET(es) );
        output( "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(SegEs) );
        output( "\tmovzwl %d(%%edx), %%eax\n", STACK16OFFSET(fs) );
        output( "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(SegFs) );
        output( "\tmovzwl %d(%%edx), %%eax\n", STACK16OFFSET(gs) );
        output( "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(SegGs) );

        output( "\tmovzwl %d(%%edx), %%eax\n", STACK16OFFSET(cs) );
        output( "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(SegCs) );
        output( "\tmovzwl %d(%%edx), %%eax\n", STACK16OFFSET(ip) );
        output( "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(Eip) );

        output( "\t.byte 0x64\n\tmovzwl (%d), %%eax\n", STACKOFFSET+2 );
        output( "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(SegSs) );
        output( "\t.byte 0x64\n\tmovzwl (%d), %%eax\n", STACKOFFSET );
        output( "\taddl $%d, %%eax\n", STACK16OFFSET(ip) );
        output( "\tmovl %%eax, %d(%%esp)\n", CONTEXTOFFSET(Esp) );
#if 0
        output( "\tfsave %d(%%esp)\n", CONTEXTOFFSET(FloatSave) );
#endif

        /* Push address of CONTEXT86 structure -- popped by the relay routine */
        output( "\tmovl %%esp,%%eax\n" );
        output( "\tandl $~15,%%esp\n" );
        output( "\tsubl $4,%%esp\n" );
        output( "\tpushl %%eax\n" );
    }
    else
    {
        output( "\tsubl $8,%%esp\n" );
        output( "\tandl $~15,%%esp\n" );
        output( "\taddl $8,%%esp\n" );
    }

    /* Call relay routine (which will call the API entry point) */
    output( "\tleal %d(%%edx), %%eax\n", (int)sizeof(STACK16FRAME) );
    output( "\tpushl %%eax\n" );
    output( "\tpushl %d(%%edx)\n", STACK16OFFSET(entry_point) );
    output( "\tcall *%d(%%edx)\n", STACK16OFFSET(relay) );

    if ( reg_func )
    {
        output( "\tleal -%d(%%ebp), %%ebx\n", (int)sizeof(CONTEXT) + STACK32OFFSET(ebp) );

        /* Switch stack back */
        output( "\t.byte 0x64\n\tmovw (%d), %%ss\n", STACKOFFSET+2 );
        output( "\t.byte 0x64\n\tmovzwl (%d), %%esp\n", STACKOFFSET );
        output( "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );

        /* Get return address to CallFrom16 stub */
        output( "\taddw $%d, %%sp\n", STACK16OFFSET(callfrom_ip)-4 );
        output( "\tpopl %%eax\n" );
        output( "\tpopl %%edx\n" );

        /* Restore all registers from CONTEXT */
        output( "\tmovw %d(%%ebx), %%ss\n", CONTEXTOFFSET(SegSs) );
        output( "\tmovl %d(%%ebx), %%esp\n", CONTEXTOFFSET(Esp) );
        output( "\taddl $4, %%esp\n" );  /* room for final return address */

        output( "\tpushw %d(%%ebx)\n", CONTEXTOFFSET(SegCs) );
        output( "\tpushw %d(%%ebx)\n", CONTEXTOFFSET(Eip) );
        output( "\tpushl %%edx\n" );
        output( "\tpushl %%eax\n" );
        output( "\tpushl %d(%%ebx)\n", CONTEXTOFFSET(EFlags) );
        output( "\tpushl %d(%%ebx)\n", CONTEXTOFFSET(SegDs) );

        output( "\tpushl %d(%%ebx)\n", CONTEXTOFFSET(SegEs) );
        output( "\tpopl %%es\n" );
        output( "\tpushl %d(%%ebx)\n", CONTEXTOFFSET(SegFs) );
        output( "\tpopl %%fs\n" );
        output( "\tpushl %d(%%ebx)\n", CONTEXTOFFSET(SegGs) );
        output( "\tpopl %%gs\n" );

        output( "\tmovl %d(%%ebx), %%ebp\n", CONTEXTOFFSET(Ebp) );
        output( "\tmovl %d(%%ebx), %%esi\n", CONTEXTOFFSET(Esi) );
        output( "\tmovl %d(%%ebx), %%edi\n", CONTEXTOFFSET(Edi) );
        output( "\tmovl %d(%%ebx), %%eax\n", CONTEXTOFFSET(Eax) );
        output( "\tmovl %d(%%ebx), %%edx\n", CONTEXTOFFSET(Edx) );
        output( "\tmovl %d(%%ebx), %%ecx\n", CONTEXTOFFSET(Ecx) );
        output( "\tmovl %d(%%ebx), %%ebx\n", CONTEXTOFFSET(Ebx) );

        output( "\tpopl %%ds\n" );
        output( "\tpopfl\n" );
        output( "\tlret\n" );
    }
    else
    {
        /* Switch stack back */
        output( "\t.byte 0x64\n\tmovw (%d), %%ss\n", STACKOFFSET+2 );
        output( "\t.byte 0x64\n\tmovzwl (%d), %%esp\n", STACKOFFSET );
        output( "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );

        /* Restore registers */
        output( "\tpopl %%edx\n" );
        output( "\tpopl %%ecx\n" );
        output( "\tpopl %%ebp\n" );
        output( "\tpopw %%ds\n" );
        output( "\tpopw %%es\n" );
        output( "\tpopw %%fs\n" );
        output( "\tpopw %%gs\n" );

        /* Return to return stub which will return to caller */
        output( "\tlret $12\n" );
    }
    if (thunk) output_function_size( "__wine_call_from_16_thunk" );
    else if (reg_func) output_function_size( "__wine_call_from_16_regs" );
    else output_function_size( "__wine_call_from_16" );
}