コード例 #1
0
ファイル: nudge.c プロジェクト: derekbruening/dynamorio
static
#endif
void
handle_nudge(dcontext_t *dcontext, nudge_arg_t *arg)
{
    uint nudge_action_mask = arg->nudge_action_mask;

    /* Future version checks would go here. */
    ASSERT_CURIOSITY(arg->version == NUDGE_ARG_CURRENT_VERSION);

    /* Nudge shouldn't start with any locks held.  Do this assert after the
     * dynamo_exited check, other wise the locks may be deleted. */
    ASSERT_OWN_NO_LOCKS();

    STATS_INC(num_nudges);

#ifdef WINDOWS
    /* Linux does this in signal.c */
    SYSLOG_INTERNAL_INFO("received nudge mask=0x%x id=0x%08x arg=0x"ZHEX64_FORMAT_STRING,
                         arg->nudge_action_mask, arg->client_id, arg->client_arg);
#endif

    if (nudge_action_mask == 0) {
        ASSERT_CURIOSITY(false && "Nudge: no action specified");
        return;
    } else if (nudge_action_mask >= NUDGE_GENERIC(PARAMETRIZED_END)) {
        ASSERT(false && "Nudge: unknown nudge action");
        return;
    }

    /* In -thin_client mode only detach and process_control nudges are allowed;
     * case 8888. */
#define VALID_THIN_CLIENT_NUDGES (NUDGE_GENERIC(process_control)|NUDGE_GENERIC(detach))
    if (DYNAMO_OPTION(thin_client)) {
        if (TEST(VALID_THIN_CLIENT_NUDGES, nudge_action_mask)) {
             /* If it is a valid thin client nudge, then disable all others. */
             nudge_action_mask &= VALID_THIN_CLIENT_NUDGES;
        } else {
            return;   /* invalid nudge for thin_client, so mute it */
        }
    }

    /* FIXME: NYI action handlers. As implemented move to desired order. */
    if (TEST(NUDGE_GENERIC(upgrade), nudge_action_mask)) {
        /* FIXME: watch out for flushed clean-call fragment */
        nudge_action_mask &= ~NUDGE_GENERIC(upgrade);
        ASSERT_NOT_IMPLEMENTED(false && "case 4179");
    }
    if (TEST(NUDGE_GENERIC(kstats), nudge_action_mask)) {
        nudge_action_mask &= ~NUDGE_GENERIC(kstats);
        ASSERT_NOT_IMPLEMENTED(false);
    }
#ifdef INTERNAL
    if (TEST(NUDGE_GENERIC(stats), nudge_action_mask)) {
        nudge_action_mask &= ~NUDGE_GENERIC(stats);
        ASSERT_NOT_IMPLEMENTED(false);
    }
    if (TEST(NUDGE_GENERIC(invalidate), nudge_action_mask)) {
        /* FIXME: watch out for flushed clean-call fragment  */
        nudge_action_mask &= ~NUDGE_GENERIC(invalidate);
        ASSERT_NOT_IMPLEMENTED(false);
    }
    if (TEST(NUDGE_GENERIC(recreate_pc), nudge_action_mask)) {
        nudge_action_mask &= ~NUDGE_GENERIC(recreate_pc);
        ASSERT_NOT_IMPLEMENTED(false);
    }
    if (TEST(NUDGE_GENERIC(recreate_state), nudge_action_mask)) {
        nudge_action_mask &= ~NUDGE_GENERIC(recreate_state);
        ASSERT_NOT_IMPLEMENTED(false);
    }
    if (TEST(NUDGE_GENERIC(reattach), nudge_action_mask)) {
        /* FIXME: watch out for flushed clean-call fragment */
        nudge_action_mask &= ~NUDGE_GENERIC(reattach);
        ASSERT_NOT_IMPLEMENTED(false);
    }
#endif /* INTERNAL */
    if (TEST(NUDGE_GENERIC(diagnose), nudge_action_mask)) {
        nudge_action_mask &= ~NUDGE_GENERIC(diagnose);
        ASSERT_NOT_IMPLEMENTED(false);
    }

    /* Implemented action handlers */
    if (TEST(NUDGE_GENERIC(opt), nudge_action_mask)) {
        nudge_action_mask &= ~NUDGE_GENERIC(opt);
        synchronize_dynamic_options();
    }
    if (TEST(NUDGE_GENERIC(ldmp), nudge_action_mask)) {
        nudge_action_mask &= ~NUDGE_GENERIC(ldmp);
        os_dump_core("Nudge triggered ldmp.");
    }
    if (TEST(NUDGE_GENERIC(freeze), nudge_action_mask)) {
        nudge_action_mask &= ~NUDGE_GENERIC(freeze);
        coarse_units_freeze_all(true/*in-place: FIXME: separate nudge for non?*/);
    }
    if (TEST(NUDGE_GENERIC(persist), nudge_action_mask)) {
        nudge_action_mask &= ~NUDGE_GENERIC(persist);
        coarse_units_freeze_all(false/*!in-place==persist*/);
    }
#ifdef CLIENT_INTERFACE
    if (TEST(NUDGE_GENERIC(client), nudge_action_mask)) {
        nudge_action_mask &= ~NUDGE_GENERIC(client);
        instrument_nudge(dcontext, arg->client_id, arg->client_arg);
    }
#endif
#ifdef PROCESS_CONTROL
    if (TEST(NUDGE_GENERIC(process_control), nudge_action_mask)) {  /* Case 8594 */
        nudge_action_mask &= ~NUDGE_GENERIC(process_control);
        /* Need to synchronize because process control can be switched between
         * on (white or black list) & off.  FIXME - the nudge mask should specify this,
         * but doesn't hurt to do it again. */
        synchronize_dynamic_options();
        if (IS_PROCESS_CONTROL_ON())
            process_control();

        /* If process control is enforced then control won't come back.  If
         * either -detect_mode is on or if there was nothing to enforce, control
         * comes back in which case it is safe to let remaining nudges be
         * processed because no core state would have been changed. */
    }
#endif
#ifdef HOTPATCHING
    if (DYNAMO_OPTION(hot_patching) && DYNAMO_OPTION(liveshields) &&
        TEST_ANY(NUDGE_GENERIC(policy)|NUDGE_GENERIC(mode)|NUDGE_GENERIC(lstats),
                 nudge_action_mask)) {
        hotp_nudge_update(nudge_action_mask &
                          (NUDGE_GENERIC(policy)|NUDGE_GENERIC(mode)|NUDGE_GENERIC(lstats)));
        nudge_action_mask &= ~(NUDGE_GENERIC(policy)|NUDGE_GENERIC(mode)|NUDGE_GENERIC(lstats));
    }
#endif
#ifdef PROGRAM_SHEPHERDING
    if (TEST(NUDGE_GENERIC(violation), nudge_action_mask)) {
        nudge_action_mask &= ~NUDGE_GENERIC(violation);
        /* Use nudge mechanism to trigger a security violation at an
         * arbitrary time. Note - is only useful for testing kill process attack
         * handling as this is not an app thread (we injected it). */
        /* see bug 652 for planned improvements */
        security_violation(dcontext, dcontext->next_tag,
                           ATTACK_SIM_NUDGE_VIOLATION, OPTION_BLOCK|OPTION_REPORT);
    }
#endif
    if (TEST(NUDGE_GENERIC(reset), nudge_action_mask)) {
        nudge_action_mask &= ~NUDGE_GENERIC(reset);
        if (DYNAMO_OPTION(enable_reset)) {
            mutex_lock(&reset_pending_lock);
            /* fcache_reset_all_caches_proactively() will unlock */
            fcache_reset_all_caches_proactively(RESET_ALL);
            /* NOTE - reset is safe since we won't return to the code cache below (we
             * will in fact not return at all). */
        } else {
            SYSLOG_INTERNAL_WARNING("nudge reset ignored since resets are disabled");
        }
    }
#ifdef WINDOWS
    /* The detach handler is last since in the common case it doesn't return. */
    if (TEST(NUDGE_GENERIC(detach), nudge_action_mask)) {
        dcontext->free_app_stack = false;
        nudge_action_mask &= ~NUDGE_GENERIC(detach);
        detach_helper(DETACH_NORMAL_TYPE);
    }
#endif
}
コード例 #2
0
ファイル: tls_linux_x86.c プロジェクト: FirstBlue/dynamorio
void
tls_thread_init(os_local_state_t *os_tls, byte *segment)
{
    /* We have four different ways to obtain TLS, each with its own limitations:
     *
     * 1) Piggyback on the threading system (like we do on Windows): here that would
     *    be pthreads, which uses a segment since at least RH9, and uses gdt-based
     *    segments for NPTL.  The advantage is we won't run out of ldt or gdt entries
     *    (except when the app itself would).  The disadvantage is we're stealing
     *    application slots and we rely on user mode interfaces.
     *
     * 2) Steal an ldt entry via SYS_modify_ldt.  This suffers from the 8K ldt entry
     *    limit and requires that we update manually on a new thread.  For 64-bit
     *    we're limited here to a 32-bit base.  (Strangely, the kernel's
     *    include/asm-x86_64/ldt.h implies that the base is ignored: but it doesn't
     *    seem to be.)
     *
     * 3) Steal a gdt entry via SYS_set_thread_area.  There is a 3rd unused entry
     *    (after pthreads and wine) we could use.  The kernel swaps for us, and with
     *    CLONE_TLS the kernel will set up the entry for a new thread for us.  Xref
     *    PR 192231 and PR 285898.  This system call is disabled on 64-bit 2.6
     *    kernels (though the man page for arch_prctl implies it isn't for 2.5
     *    kernels?!?)
     *
     * 4) Use SYS_arch_prctl.  This is only implemented on 64-bit kernels, and can
     *    only be used to set the gdt entries that fs and gs select for.  Faster to
     *    use <4GB base (obtain with mmap MAP_32BIT) since can use gdt; else have to
     *    use wrmsr.  The man pages say "ARCH_SET_GS is disabled in some kernels".
     */
    uint selector;
    int index = -1;
    int res;
#ifdef X64
    /* First choice is gdt, which means arch_prctl.  Since this may fail
     * on some kernels, we require -heap_in_lower_4GB so we can fall back
     * on modify_ldt.
     */
    byte *cur_gs;
    res = dynamorio_syscall(SYS_arch_prctl, 2, ARCH_GET_GS, &cur_gs);
    if (res >= 0) {
        LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: cur gs base is "PFX"\n", cur_gs);
        /* If we're a non-initial thread, gs will be set to the parent thread's value */
        if (cur_gs == NULL || is_dynamo_address(cur_gs) ||
            /* By resolving i#107, we can handle gs conflicts between app and dr. */
            INTERNAL_OPTION(mangle_app_seg)) {
            res = dynamorio_syscall(SYS_arch_prctl, 2, ARCH_SET_GS, segment);
            if (res >= 0) {
                os_tls->tls_type = TLS_TYPE_ARCH_PRCTL;
                LOG(GLOBAL, LOG_THREADS, 1,
                    "os_tls_init: arch_prctl successful for base "PFX"\n", segment);
                res = dynamorio_syscall(SYS_arch_prctl, 2, ARCH_GET_GS, &cur_gs);
                if (res >= 0 && cur_gs != segment && !on_WSL) {
                    /* XXX i#1896: on WSL, ARCH_GET_GS is broken and does not return
                     * the true value.  (Plus, fs and gs start out equal to ss (0x2b)
                     * and are not set by ARCH_SET_*).  i#2089's safe read TLS
                     * solution solves this, but we still warn as we haven't fixed
                     * later issues.  Without the safe read we have to abort.
                     */
                    on_WSL = true;
                    LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: running on WSL\n");
                    if (INTERNAL_OPTION(safe_read_tls_init)) {
                        SYSLOG_INTERNAL_WARNING
                            ("Support for the Windows Subsystem for Linux is still "
                             "preliminary, due to missing kernel features.  "
                             "Continuing, but please report any problems encountered.");
                    } else {
                        SYSLOG(SYSLOG_ERROR, WSL_UNSUPPORTED_FATAL, 2,
                               get_application_name(), get_application_pid());
                        os_terminate(NULL, TERMINATE_PROCESS);
                        ASSERT_NOT_REACHED();
                    }
                }
                /* Kernel should have written %gs for us if using GDT */
                if (!dynamo_initialized &&
                    /* We assume that WSL is using MSR */
                    (on_WSL || read_thread_register(SEG_TLS) == 0)) {
                    LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: using MSR\n");
                    tls_using_msr = true;
                }
                if (IF_CLIENT_INTERFACE_ELSE(INTERNAL_OPTION(private_loader), false)) {
                    res = dynamorio_syscall(SYS_arch_prctl, 2, ARCH_SET_FS,
                                            os_tls->os_seg_info.priv_lib_tls_base);
                    /* Assuming set fs must be successful if set gs succeeded. */
                    ASSERT(res >= 0);
                }
            } else {
                /* we've found a kernel where ARCH_SET_GS is disabled */
                ASSERT_CURIOSITY(false && "arch_prctl failed on set but not get");
                LOG(GLOBAL, LOG_THREADS, 1,
                    "os_tls_init: arch_prctl failed: error %d\n", res);
            }
        } else {
            /* FIXME PR 205276: we don't currently handle it: fall back on ldt, but
             * we'll have the same conflict w/ the selector...
             */
            ASSERT_BUG_NUM(205276, cur_gs == NULL);
        }
    }
#endif

    if (os_tls->tls_type == TLS_TYPE_NONE) {
        /* Second choice is set_thread_area */
        /* PR 285898: if we added CLONE_SETTLS to all clone calls (and emulated vfork
         * with clone) we could avoid having to set tls up for each thread (as well
         * as solve race PR 207903), at least for kernel 2.5.32+.  For now we stick
         * w/ manual setup.
         */
        our_modify_ldt_t desc;

        /* Pick which GDT slots we'll use for DR TLS and for library TLS if
         * using the private loader.
         */
        choose_gdt_slots(os_tls);

        if (tls_gdt_index > -1) {
            /* Now that we know which GDT slot to use, install the per-thread base
             * into it.
             */
            /* Base here must be 32-bit */
            IF_X64(ASSERT(DYNAMO_OPTION(heap_in_lower_4GB) &&
                          segment <= (byte*)UINT_MAX));
            initialize_ldt_struct(&desc, segment, PAGE_SIZE, tls_gdt_index);
            res = dynamorio_syscall(SYS_set_thread_area, 1, &desc);
            LOG(GLOBAL, LOG_THREADS, 3,
                "%s: set_thread_area %d => %d res, %d index\n",
                __FUNCTION__, tls_gdt_index, res, desc.entry_number);
            ASSERT(res < 0 || desc.entry_number == tls_gdt_index);
        } else {
            res = -1;  /* fall back on LDT */
        }

        if (res >= 0) {
            LOG(GLOBAL, LOG_THREADS, 1,
                "os_tls_init: set_thread_area successful for base "PFX" @index %d\n",
                segment, tls_gdt_index);
            os_tls->tls_type = TLS_TYPE_GDT;
            index = tls_gdt_index;
            selector = GDT_SELECTOR(index);
            WRITE_DR_SEG(selector); /* macro needs lvalue! */
        } else {
            IF_VMX86(ASSERT_NOT_REACHED()); /* since no modify_ldt */
            LOG(GLOBAL, LOG_THREADS, 1,
                "os_tls_init: set_thread_area failed: error %d\n", res);
        }

#ifdef CLIENT_INTERFACE
        /* Install the library TLS base. */
        if (INTERNAL_OPTION(private_loader) && res >= 0) {
            app_pc base = os_tls->os_seg_info.priv_lib_tls_base;
            /* lib_tls_gdt_index is picked in choose_gdt_slots. */
            ASSERT(lib_tls_gdt_index >= gdt_entry_tls_min);
            initialize_ldt_struct(&desc, base, GDT_NO_SIZE_LIMIT,
                                  lib_tls_gdt_index);
            res = dynamorio_syscall(SYS_set_thread_area, 1, &desc);
            LOG(GLOBAL, LOG_THREADS, 3,
                "%s: set_thread_area %d => %d res, %d index\n",
                __FUNCTION__, lib_tls_gdt_index, res, desc.entry_number);
            if (res >= 0) {
                /* i558 update lib seg reg to enforce the segment changes */
                selector = GDT_SELECTOR(lib_tls_gdt_index);
                LOG(GLOBAL, LOG_THREADS, 2, "%s: setting %s to selector 0x%x\n",
                    __FUNCTION__, reg_names[LIB_SEG_TLS], selector);
                WRITE_LIB_SEG(selector);
            }
        }
#endif
    }

    if (os_tls->tls_type == TLS_TYPE_NONE) {
        /* Third choice: modify_ldt, which should be available on kernel 2.3.99+ */
        /* Base here must be 32-bit */
        IF_X64(ASSERT(DYNAMO_OPTION(heap_in_lower_4GB) && segment <= (byte*)UINT_MAX));
        /* we have the thread_initexit_lock so no race here */
        index = find_unused_ldt_index();
        selector = LDT_SELECTOR(index);
        ASSERT(index != -1);
        create_ldt_entry((void *)segment, PAGE_SIZE, index);
        os_tls->tls_type = TLS_TYPE_LDT;
        WRITE_DR_SEG(selector); /* macro needs lvalue! */
        LOG(GLOBAL, LOG_THREADS, 1,
            "os_tls_init: modify_ldt successful for base "PFX" w/ index %d\n",
            segment, index);
    }

    os_tls->ldt_index = index;
}
コード例 #3
0
ファイル: inject.c プロジェクト: kumarak/Granary-Drk
/* pass non-NULL for thandle if you want this routine to use
 *   Get/SetThreadContext to get the context -- you must still pass
 *   in a pointer to a cxt
 */
BOOL
inject_into_thread(HANDLE phandle, CONTEXT *cxt, HANDLE thandle,
                   char *dynamo_path)
{
    size_t              nbytes;
    BOOL                success = FALSE;
    ptr_uint_t          dynamo_entry_esp;
    ptr_uint_t          dynamo_path_esp;
    LPVOID              load_dynamo_code = NULL; /* = base of code allocation */
    ptr_uint_t          addr;
    reg_t               *bufptr;
    char                buf[MAX_PATH];
    uint                old_prot;

    ASSERT(cxt != NULL);

#ifndef NOT_DYNAMORIO_CORE_PROPER
    /* FIXME - if we were early injected we couldn't call inject_init during
     * startup because kernel32 wasn't loaded yet, so we call it here which
     * isn't safe because it uses app locks. If we want to support a mix
     * of early and late follow children injection we should change load_dynamo
     * to use Nt functions (which we can link) rather then kernel32 functions
     * (which we have to look up).  We could also use module.c code to safely
     * walk the exports of kernel32.dll (we can cache its mod handle when it
     * is loaded). */ 
    if (!inject_initialized) {
        SYSLOG_INTERNAL_WARNING("Using late inject follow children from early injected process, unsafe LdrLock usage");
        SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT);
        inject_init();
        SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT);
    }
#else
    ASSERT(inject_initialized);
#endif

    /* soon we'll start using alternative injection with case 102 - leaving block */
    {
        reg_t app_xsp;
        if (thandle != NULL) {
            /* grab the context of the app's main thread */                 
            cxt->ContextFlags = CONTEXT_DR_STATE;
            if (!NT_SUCCESS(nt_get_context(thandle, cxt))) {
                display_error("GetThreadContext failed");
                goto error;
            }
        }
        app_xsp = cxt->CXT_XSP;

        /* copy load_dynamo() into the address space of the new process */
        ASSERT(BUFFER_SIZE_BYTES(buf) > SIZE_OF_LOAD_DYNAMO);
        memcpy(buf, (char*)load_dynamo, SIZE_OF_LOAD_DYNAMO);
        /* R-X protection is adequate for our non-self modifying code,
         * and we'll update that after we're done with
         * nt_write_virtual_memory() calls */

        /* get allocation, this will be freed by os_heap_free, so make sure
         * is compatible allocation method */
        if (!NT_SUCCESS(nt_remote_allocate_virtual_memory(phandle, &load_dynamo_code, 
                                                          SIZE_OF_LOAD_DYNAMO,
                                                          PAGE_EXECUTE_READWRITE,
                                                          MEMORY_COMMIT))) {
            display_error("Failed to allocate memory for injection code");
            goto error;
        }
        if (!nt_write_virtual_memory(phandle, load_dynamo_code, buf,
                                     SIZE_OF_LOAD_DYNAMO, &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }

        /* Xref PR 252745 & PR 252008 - we can use the app's stack to hold our data
         * even on WOW64 and 64-bit since we're using set context to set xsp. */
   
        /* copy the DYNAMORIO_ENTRY string to the app's stack */
        _snprintf(buf, BUFFER_SIZE_ELEMENTS(buf), "%s", DYNAMORIO_ENTRY);
        NULL_TERMINATE_BUFFER(buf);
        nbytes = strlen(buf) + 1; // include the trailing '\0'
        /* keep esp at pointer-sized alignment */
        cxt->CXT_XSP -= ALIGN_FORWARD(nbytes, XSP_SZ);
        dynamo_entry_esp = cxt->CXT_XSP;
        if (!nt_write_virtual_memory(phandle, (LPVOID)cxt->CXT_XSP, 
                                     buf, nbytes, &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }

        /* copy the dynamorio_path string to the app's stack */
        _snprintf(buf, BUFFER_SIZE_ELEMENTS(buf), "%s", dynamo_path);
        NULL_TERMINATE_BUFFER(buf);
        nbytes = strlen(buf) + 1; // include the trailing '\0'
        /* keep esp at pointer-sized byte alignment */
        cxt->CXT_XSP -= ALIGN_FORWARD(nbytes, XSP_SZ);
        dynamo_path_esp = cxt->CXT_XSP;
        if (!nt_write_virtual_memory(phandle, (LPVOID)cxt->CXT_XSP, 
                                     buf, nbytes, &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }

        /* copy the current context to the app's stack. Only need the
         * control registers, so we use a dr_mcontext_t layout.
         */
        bufptr = (reg_t*) buf;
        *bufptr++ = cxt->CXT_XDI;
        *bufptr++ = cxt->CXT_XSI;
        *bufptr++ = cxt->CXT_XBP;
        *bufptr++ = app_xsp;
        *bufptr++ = cxt->CXT_XBX;
        *bufptr++ = cxt->CXT_XDX;
        *bufptr++ = cxt->CXT_XCX;
        *bufptr++ = cxt->CXT_XAX;
#ifdef X64
        *bufptr++ = cxt->R8;
        *bufptr++ = cxt->R9;
        *bufptr++ = cxt->R10;
        *bufptr++ = cxt->R11;
        *bufptr++ = cxt->R12;
        *bufptr++ = cxt->R13;
        *bufptr++ = cxt->R14;
        *bufptr++ = cxt->R15;
#endif
        /* It would be nice to use preserve_xmm_caller_saved(), but we'd need to
         * link proc.c and deal w/ messy dependencies to get it into arch_exports.h,
         * so we do our own check.  We go ahead and put in the xmm slots even
         * if the underlying processor has no xmm support: no harm done.
         */
        if (IF_X64_ELSE(true, is_wow64_process(NT_CURRENT_PROCESS))) {
            /* PR 264138: preserve xmm0-5.  We fill in all slots even though
             * for 32-bit we don't use them (PR 306394).
             */
            int i, j;
            for (i = 0; i < NUM_XMM_SLOTS; i++) {
                for (j = 0; j < IF_X64_ELSE(2,4); j++) {
                    *bufptr++ = CXT_XMM(cxt, i)->reg[j];
                }
            }
        } else {
            /* skip xmm slots */
            bufptr += XMM_SLOTS_SIZE/sizeof(*bufptr);
        }
        *bufptr++ = cxt->CXT_XFLAGS;
        *bufptr++ = cxt->CXT_XIP;
        ASSERT((char *)bufptr - (char *)buf == sizeof(dr_mcontext_t));
        *bufptr++ = (ptr_uint_t)load_dynamo_code;
        *bufptr++ = SIZE_OF_LOAD_DYNAMO;
        nbytes = sizeof(dr_mcontext_t) + 2*sizeof(reg_t);
        cxt->CXT_XSP -= nbytes;
#ifdef X64
        /* We need xsp to be aligned prior to each call, but we can only pad
         * before the context as all later users assume the info they need is
         * at TOS.
         */
        cxt->CXT_XSP = ALIGN_BACKWARD(cxt->CXT_XSP, XMM_ALIGN);
#endif
        if (!nt_write_virtual_memory(phandle, (LPVOID)cxt->CXT_XSP,
                                     buf, nbytes, &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }

        /* push the address of the DYNAMORIO_ENTRY string on the app's stack */
        cxt->CXT_XSP -= XSP_SZ;
        if (!nt_write_virtual_memory(phandle, (LPVOID)cxt->CXT_XSP, 
                                     &dynamo_entry_esp, sizeof(dynamo_entry_esp),
                                     &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }

        /* push the address of GetProcAddress on the app's stack */
        ASSERT(addr_getprocaddr);
        addr = addr_getprocaddr;
        cxt->CXT_XSP -= XSP_SZ;
        if (!nt_write_virtual_memory(phandle, (LPVOID)cxt->CXT_XSP, 
                                     &addr, sizeof(addr), &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }

        /* push the address of the dynamorio_path string on the app's stack */
        cxt->CXT_XSP -= XSP_SZ;
        if (!nt_write_virtual_memory(phandle, (LPVOID)cxt->CXT_XSP, 
                                     &dynamo_path_esp, sizeof(dynamo_path_esp),
                                     &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }

        /* push the address of LoadLibraryA on the app's stack */
        ASSERT(addr_loadlibrarya);
        addr = addr_loadlibrarya;
        cxt->CXT_XSP -= XSP_SZ;
        if (!nt_write_virtual_memory(phandle, (LPVOID)cxt->CXT_XSP, 
                                     &addr, sizeof(addr), &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }

#ifdef LOAD_DYNAMO_DEBUGBREAK
        /* push the address of DebugBreak on the app's stack */
        ASSERT(addr_debugbreak);
        addr = addr_debugbreak;
        cxt->CXT_XSP -= XSP_SZ;
        if (!nt_write_virtual_memory(phandle, (LPVOID)cxt->CXT_XSP, 
                                     &addr, sizeof(addr), &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }
#endif

        /* make the code R-X now */
        if (!nt_remote_protect_virtual_memory(phandle, load_dynamo_code, 
                                              SIZE_OF_LOAD_DYNAMO,
                                              PAGE_EXECUTE_READ, &old_prot)) {
            display_error("Failed to make injection code R-X");
            goto error;
        }
        ASSERT(old_prot == PAGE_EXECUTE_READWRITE);

        /* now change Eip to point to the entry point of load_dynamo(), so that
           when we resume, load_dynamo is invoked automatically */
        cxt->CXT_XIP = (ptr_uint_t)load_dynamo_code;
        cxt->CXT_XFLAGS = 0;
        if (thandle != NULL) {
            if (!NT_SUCCESS(nt_set_context(thandle, cxt))) {
                display_error("SetThreadContext failed");
                goto error;
            }
        }

        success = TRUE;
    }
    error:
        /* we do not recover any changes in the child's address space */

    return success;
}