Exemple #1
0
int
read_and_verify_dr_marker(HANDLE process, dr_marker_t *marker)
{
    return read_and_verify_dr_marker_common(process, marker, IF_X64_ELSE(true, false));
}
Exemple #2
0
static bool
check_architecture(const char *dll, char **argv)
{
    bool is_64bit, also_32bit;
    if (drfront_is_64bit_app(dll, &is_64bit, &also_32bit) != DRFRONT_SUCCESS) {
        printf("ERROR: unable to get the architecture infomation of"
               " the target module %s\n", dll);
        return false;
    }
    if (IF_X64_ELSE(!is_64bit, is_64bit && !also_32bit)) {
        char *orig_argv0 = argv[0];
        char root[MAXIMUM_PATH];
        char buf[MAXIMUM_PATH];
        char *basename;
        int errcode;
        void *inject_data;
        bool is_readable;
        if (drfront_get_app_full_path(argv[0], root, BUFFER_SIZE_ELEMENTS(root)) !=
            DRFRONT_SUCCESS) {
            printf("ERROR: unable to get base dir of %s\n", argv[0]);
            return false;
        }
        basename = root + strlen(root) - 1;
        while (*basename != DIRSEP && *basename != ALT_DIRSEP && basename > root)
            basename--;
        if (basename <= root) {
            printf("ERROR: unable to get base dir of %s\n", argv[0]);
            return false;
        }
        *basename = '\0';
        basename++;
        _snprintf(buf, BUFFER_SIZE_ELEMENTS(buf) ,
                  "%s%c..%c%s%c%s", root, DIRSEP, DIRSEP,
                  IF_X64_ELSE("bin", "bin64"), DIRSEP, basename);
        NULL_TERMINATE_BUFFER(buf);
        if (drfront_access(buf, DRFRONT_READ, &is_readable) != DRFRONT_SUCCESS ||
            !is_readable) {
            printf("ERROR: unable to find frontend %s to match target file bitwidth: "
                   "is this an incomplete installation?\n", buf);
        }
        argv[0] = buf;
#ifdef UNIX
        errcode = dr_inject_prepare_to_exec(buf, (const char **)argv, &inject_data);
        if (errcode == 0 || errcode == WARN_IMAGE_MACHINE_TYPE_MISMATCH_EXE)
            dr_inject_process_run(inject_data); /* shouldn't return */
        printf("ERROR (%d): unable to launch frontend to match target file bitwidth\n",
               errcode);
        argv[0] = orig_argv0;
        return false;
#else
        errcode = dr_inject_process_create(buf, argv, &inject_data);
        if (errcode == 0 || errcode == WARN_IMAGE_MACHINE_TYPE_MISMATCH_EXE) {
            dr_inject_process_run(inject_data);
            /* Wait for the child so user's shell prompt doesn't come back early */
            errcode = WaitForSingleObject(dr_inject_get_process_handle(inject_data),
                                          INFINITE);
            if (errcode != WAIT_OBJECT_0)
                printf("WARNING: failed to wait for cross-arch frontend\n");
            dr_inject_process_exit(inject_data, false);
            argv[0] = orig_argv0;
            return false;
        } else {
            printf("ERROR (%d): unable to launch frontend to match target file bitwidth\n",
                  errcode);
            argv[0] = orig_argv0;
            return false;
        }
#endif
    }
    return true;
}
Exemple #3
0
                        )
{
    uint references_found = 0;  /* only for debugging  */
    DEBUG_DECLARE(uint references_already_known = 0;)

    app_pc cur_addr;
    app_pc last_addr = text_end - sizeof(app_pc); /* inclusive */

    LOG(GLOBAL, LOG_RCT, 2,
        "find_address_references: text["PFX", "PFX"), referto["PFX", "PFX")\n",
        text_start, text_end, referto_start, referto_end);

    ASSERT(text_start <= text_end); /* empty ok */
    ASSERT(referto_start <= referto_end); /* empty ok */

    ASSERT(sizeof(app_pc) == IF_X64_ELSE(8,4));
    ASSERT((ptr_uint_t)(last_addr+1) == (((ptr_uint_t)last_addr)+1));/* byte increments */

    ASSERT(is_readable_without_exception(text_start, text_end - text_start));

    /* FIXME: could try to read dword[pc] dword[pc+4] and then merging them with shifts
     * and | to get dword[pc+1] dword[pc+2] dword[pc+3]  instead of reading memory
     * but of course only if KSTAT says the latter is indeed faster!
     */

    KSTART(rct_no_reloc);
    for (cur_addr = text_start; cur_addr <= last_addr; cur_addr++) {
        DEBUG_DECLARE(bool known_ref = false;)

        app_pc ref = *(app_pc*)cur_addr; /* note dereference here */
        if (rct_check_ref_and_add(dcontext, ref, referto_start, referto_end
Exemple #4
0
/* For 32-bit build, supports looking for x64 marker (in WOW64 process).
 * For 64-bit build, only supports looking for x64 marker.
 */
static int
read_and_verify_dr_marker_common(HANDLE process, dr_marker_t *marker, bool x64)
{
    byte buf[8]; /* only needs to be 5, but dword pad just in case */
    size_t res;
    void *target = NULL;
#if !defined(NOT_DYNAMORIO_CORE) && !defined(NOT_DYNAMORIO_CORE_PROPER)
    GET_NTDLL(DR_MARKER_HOOKED_FUNCTION, DR_MARKER_HOOKED_FUNCTION_ARGS);
    void *hook_func = (void *)DR_MARKER_HOOKED_FUNCTION;
#else
    if (IF_X64_ELSE(!x64, x64 && !is_wow64_process(NT_CURRENT_PROCESS)))
        return DR_MARKER_ERROR;
    if (x64) {
# ifndef X64
        uint64 hook_func = get_proc_address_64
            (get_module_handle_64(L_DR_MARKER_HOOKED_DLL),
             DR_MARKER_HOOKED_FUNCTION_STRING);
        uint64 landing_pad = 0;
        if (hook_func == 0)
            return DR_MARKER_ERROR;
        if (!NT_SUCCESS(nt_wow64_read_virtual_memory64(process, hook_func, buf, 5, &res))
            || res != 5) {
            return DR_MARKER_ERROR;
        }
        if (buf[0] != OP_jmp_byte)
            return DR_MARKER_NOT_FOUND;

        /* jmp offset + EIP (after jmp = hook_func + size of jmp (5 bytes)) */
        /* for 64-bit, the target is stored in front of the trampoline */
        landing_pad = *(int *)&buf[1] + hook_func + 5 - 8;
         if (!NT_SUCCESS(nt_wow64_read_virtual_memory64(process, landing_pad, buf, 8,
                                                        &res)) ||
            res != 8U)
            return DR_MARKER_ERROR;
        /* trampoline address is stored at the top of the landing pad for 64-bit */
        target = (void *)PAGE_START(*(ptr_int_t *)buf);
    } else {
# endif /* !X64 */
        void *hook_func = (void *)GetProcAddress(GetModuleHandle(DR_MARKER_HOOKED_DLL),
                                                 DR_MARKER_HOOKED_FUNCTION_STRING);
#endif
        void *landing_pad;
        if (hook_func == NULL)
            return DR_MARKER_ERROR;
        if (!READ_FUNC(process, hook_func, buf, 5, &res) || res != 5)
            return DR_MARKER_ERROR;
        if (buf[0] != OP_jmp_byte)
            return DR_MARKER_NOT_FOUND;

        /* jmp offset + EIP (after jmp = hook_func + size of jmp (5 bytes)) */
        landing_pad = (void *)(*(int *)&buf[1] + (ptr_int_t)hook_func + 5);
        /* for 64-bit, the target is stored in front of the trampoline */
        if (x64)
            landing_pad = (byte *)landing_pad - 8;
        /* see emit_landing_pad_code() for layout of landing pad */
        if (!READ_FUNC(process, landing_pad, buf, (x64 ? 8 : 5), &res) ||
            res != (x64 ? 8U : 5U))
            return DR_MARKER_ERROR;
        if (x64) {
            /* trampoline address is stored at the top of the landing pad for 64-bit */
            target = (void *)PAGE_START(*(ptr_int_t *)buf);
        } else {
            /* jmp offset + EIP (after jmp = landing_pad + size of jmp (5 bytes)) */
            target = (void *)PAGE_START(*(int *)&buf[1] + (ptr_int_t)landing_pad + 5);
        }
#if defined(NOT_DYNAMORIO_CORE) || defined(NOT_DYNAMORIO_CORE_PROPER)
    }
#endif

    if (target == NULL)
        return DR_MARKER_ERROR;
    if (!READ_FUNC(process, target, marker, sizeof(dr_marker_t), &res) ||
        res != sizeof(dr_marker_t)) {
        return DR_MARKER_NOT_FOUND;
    }

    if (dr_marker_verify(process, marker)) {
        return DR_MARKER_FOUND;
    }

    return DR_MARKER_NOT_FOUND; /* probably some other hooker */
}
Exemple #5
0
# define GS_TLS 1 /* used in arch_prctl handling */
#else
/* Linux GDT layout in x86_32
 * 6 - TLS segment #1 0x33 [ glibc's TLS segment ]
 * 7 - TLS segment #2 0x3b [ Wine's %fs Win32 segment ]
 * 8 - TLS segment #3 0x43
 * FS and GS is not hardcode.
 */
#endif
#define GDT_ENTRY_TLS_MIN_32 6
#define GDT_ENTRY_TLS_MIN_64 12
/* when x86-64 emulate i386, it still use 12-14, so using ifdef x64
 * cannot detect the right value.
 * The actual value will be updated later in os_tls_app_seg_init.
 */
static uint gdt_entry_tls_min = IF_X64_ELSE(GDT_ENTRY_TLS_MIN_64,
                                            GDT_ENTRY_TLS_MIN_32);

static bool tls_global_init = false;

/* GDT slot we use for set_thread_area.
 * This depends on the kernel, not on the app!
 */
static int tls_gdt_index = -1;
/* GDT slot we use for private library TLS. */
static int lib_tls_gdt_index = -1;

#ifdef X64
static bool tls_using_msr;
#endif

/* Indicates that on the next request for a GDT entry, we should return the GDT
Exemple #6
0
/* pass non-NULL for thandle if you want this routine to use
 *   Get/SetThreadContext to get the context -- you must still pass
 *   in a pointer to a cxt
 */
BOOL
inject_into_thread(HANDLE phandle, CONTEXT *cxt, HANDLE thandle,
                   char *dynamo_path)
{
    size_t              nbytes;
    BOOL                success = FALSE;
    ptr_uint_t          dynamo_entry_esp;
    ptr_uint_t          dynamo_path_esp;
    LPVOID              load_dynamo_code = NULL; /* = base of code allocation */
    ptr_uint_t          addr;
    reg_t               *bufptr;
    char                buf[MAX_PATH];
    uint                old_prot;

    ASSERT(cxt != NULL);

#ifndef NOT_DYNAMORIO_CORE_PROPER
    /* FIXME - if we were early injected we couldn't call inject_init during
     * startup because kernel32 wasn't loaded yet, so we call it here which
     * isn't safe because it uses app locks. If we want to support a mix
     * of early and late follow children injection we should change load_dynamo
     * to use Nt functions (which we can link) rather then kernel32 functions
     * (which we have to look up).  We could also use module.c code to safely
     * walk the exports of kernel32.dll (we can cache its mod handle when it
     * is loaded). */ 
    if (!inject_initialized) {
        SYSLOG_INTERNAL_WARNING("Using late inject follow children from early injected process, unsafe LdrLock usage");
        SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT);
        inject_init();
        SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT);
    }
#else
    ASSERT(inject_initialized);
#endif

    /* soon we'll start using alternative injection with case 102 - leaving block */
    {
        reg_t app_xsp;
        if (thandle != NULL) {
            /* grab the context of the app's main thread */                 
            cxt->ContextFlags = CONTEXT_DR_STATE;
            if (!NT_SUCCESS(nt_get_context(thandle, cxt))) {
                display_error("GetThreadContext failed");
                goto error;
            }
        }
        app_xsp = cxt->CXT_XSP;

        /* copy load_dynamo() into the address space of the new process */
        ASSERT(BUFFER_SIZE_BYTES(buf) > SIZE_OF_LOAD_DYNAMO);
        memcpy(buf, (char*)load_dynamo, SIZE_OF_LOAD_DYNAMO);
        /* R-X protection is adequate for our non-self modifying code,
         * and we'll update that after we're done with
         * nt_write_virtual_memory() calls */

        /* get allocation, this will be freed by os_heap_free, so make sure
         * is compatible allocation method */
        if (!NT_SUCCESS(nt_remote_allocate_virtual_memory(phandle, &load_dynamo_code, 
                                                          SIZE_OF_LOAD_DYNAMO,
                                                          PAGE_EXECUTE_READWRITE,
                                                          MEMORY_COMMIT))) {
            display_error("Failed to allocate memory for injection code");
            goto error;
        }
        if (!nt_write_virtual_memory(phandle, load_dynamo_code, buf,
                                     SIZE_OF_LOAD_DYNAMO, &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }

        /* Xref PR 252745 & PR 252008 - we can use the app's stack to hold our data
         * even on WOW64 and 64-bit since we're using set context to set xsp. */
   
        /* copy the DYNAMORIO_ENTRY string to the app's stack */
        _snprintf(buf, BUFFER_SIZE_ELEMENTS(buf), "%s", DYNAMORIO_ENTRY);
        NULL_TERMINATE_BUFFER(buf);
        nbytes = strlen(buf) + 1; // include the trailing '\0'
        /* keep esp at pointer-sized alignment */
        cxt->CXT_XSP -= ALIGN_FORWARD(nbytes, XSP_SZ);
        dynamo_entry_esp = cxt->CXT_XSP;
        if (!nt_write_virtual_memory(phandle, (LPVOID)cxt->CXT_XSP, 
                                     buf, nbytes, &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }

        /* copy the dynamorio_path string to the app's stack */
        _snprintf(buf, BUFFER_SIZE_ELEMENTS(buf), "%s", dynamo_path);
        NULL_TERMINATE_BUFFER(buf);
        nbytes = strlen(buf) + 1; // include the trailing '\0'
        /* keep esp at pointer-sized byte alignment */
        cxt->CXT_XSP -= ALIGN_FORWARD(nbytes, XSP_SZ);
        dynamo_path_esp = cxt->CXT_XSP;
        if (!nt_write_virtual_memory(phandle, (LPVOID)cxt->CXT_XSP, 
                                     buf, nbytes, &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }

        /* copy the current context to the app's stack. Only need the
         * control registers, so we use a dr_mcontext_t layout.
         */
        bufptr = (reg_t*) buf;
        *bufptr++ = cxt->CXT_XDI;
        *bufptr++ = cxt->CXT_XSI;
        *bufptr++ = cxt->CXT_XBP;
        *bufptr++ = app_xsp;
        *bufptr++ = cxt->CXT_XBX;
        *bufptr++ = cxt->CXT_XDX;
        *bufptr++ = cxt->CXT_XCX;
        *bufptr++ = cxt->CXT_XAX;
#ifdef X64
        *bufptr++ = cxt->R8;
        *bufptr++ = cxt->R9;
        *bufptr++ = cxt->R10;
        *bufptr++ = cxt->R11;
        *bufptr++ = cxt->R12;
        *bufptr++ = cxt->R13;
        *bufptr++ = cxt->R14;
        *bufptr++ = cxt->R15;
#endif
        /* It would be nice to use preserve_xmm_caller_saved(), but we'd need to
         * link proc.c and deal w/ messy dependencies to get it into arch_exports.h,
         * so we do our own check.  We go ahead and put in the xmm slots even
         * if the underlying processor has no xmm support: no harm done.
         */
        if (IF_X64_ELSE(true, is_wow64_process(NT_CURRENT_PROCESS))) {
            /* PR 264138: preserve xmm0-5.  We fill in all slots even though
             * for 32-bit we don't use them (PR 306394).
             */
            int i, j;
            for (i = 0; i < NUM_XMM_SLOTS; i++) {
                for (j = 0; j < IF_X64_ELSE(2,4); j++) {
                    *bufptr++ = CXT_XMM(cxt, i)->reg[j];
                }
            }
        } else {
            /* skip xmm slots */
            bufptr += XMM_SLOTS_SIZE/sizeof(*bufptr);
        }
        *bufptr++ = cxt->CXT_XFLAGS;
        *bufptr++ = cxt->CXT_XIP;
        ASSERT((char *)bufptr - (char *)buf == sizeof(dr_mcontext_t));
        *bufptr++ = (ptr_uint_t)load_dynamo_code;
        *bufptr++ = SIZE_OF_LOAD_DYNAMO;
        nbytes = sizeof(dr_mcontext_t) + 2*sizeof(reg_t);
        cxt->CXT_XSP -= nbytes;
#ifdef X64
        /* We need xsp to be aligned prior to each call, but we can only pad
         * before the context as all later users assume the info they need is
         * at TOS.
         */
        cxt->CXT_XSP = ALIGN_BACKWARD(cxt->CXT_XSP, XMM_ALIGN);
#endif
        if (!nt_write_virtual_memory(phandle, (LPVOID)cxt->CXT_XSP,
                                     buf, nbytes, &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }

        /* push the address of the DYNAMORIO_ENTRY string on the app's stack */
        cxt->CXT_XSP -= XSP_SZ;
        if (!nt_write_virtual_memory(phandle, (LPVOID)cxt->CXT_XSP, 
                                     &dynamo_entry_esp, sizeof(dynamo_entry_esp),
                                     &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }

        /* push the address of GetProcAddress on the app's stack */
        ASSERT(addr_getprocaddr);
        addr = addr_getprocaddr;
        cxt->CXT_XSP -= XSP_SZ;
        if (!nt_write_virtual_memory(phandle, (LPVOID)cxt->CXT_XSP, 
                                     &addr, sizeof(addr), &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }

        /* push the address of the dynamorio_path string on the app's stack */
        cxt->CXT_XSP -= XSP_SZ;
        if (!nt_write_virtual_memory(phandle, (LPVOID)cxt->CXT_XSP, 
                                     &dynamo_path_esp, sizeof(dynamo_path_esp),
                                     &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }

        /* push the address of LoadLibraryA on the app's stack */
        ASSERT(addr_loadlibrarya);
        addr = addr_loadlibrarya;
        cxt->CXT_XSP -= XSP_SZ;
        if (!nt_write_virtual_memory(phandle, (LPVOID)cxt->CXT_XSP, 
                                     &addr, sizeof(addr), &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }

#ifdef LOAD_DYNAMO_DEBUGBREAK
        /* push the address of DebugBreak on the app's stack */
        ASSERT(addr_debugbreak);
        addr = addr_debugbreak;
        cxt->CXT_XSP -= XSP_SZ;
        if (!nt_write_virtual_memory(phandle, (LPVOID)cxt->CXT_XSP, 
                                     &addr, sizeof(addr), &nbytes)) {
            display_error("WriteMemory failed");
            goto error;
        }
#endif

        /* make the code R-X now */
        if (!nt_remote_protect_virtual_memory(phandle, load_dynamo_code, 
                                              SIZE_OF_LOAD_DYNAMO,
                                              PAGE_EXECUTE_READ, &old_prot)) {
            display_error("Failed to make injection code R-X");
            goto error;
        }
        ASSERT(old_prot == PAGE_EXECUTE_READWRITE);

        /* now change Eip to point to the entry point of load_dynamo(), so that
           when we resume, load_dynamo is invoked automatically */
        cxt->CXT_XIP = (ptr_uint_t)load_dynamo_code;
        cxt->CXT_XFLAGS = 0;
        if (thandle != NULL) {
            if (!NT_SUCCESS(nt_set_context(thandle, cxt))) {
                display_error("SetThreadContext failed");
                goto error;
            }
        }

        success = TRUE;
    }
    error:
        /* we do not recover any changes in the child's address space */

    return success;
}
Exemple #7
0
void
tls_thread_init(os_local_state_t *os_tls, byte *segment)
{
    /* We have four different ways to obtain TLS, each with its own limitations:
     *
     * 1) Piggyback on the threading system (like we do on Windows): here that would
     *    be pthreads, which uses a segment since at least RH9, and uses gdt-based
     *    segments for NPTL.  The advantage is we won't run out of ldt or gdt entries
     *    (except when the app itself would).  The disadvantage is we're stealing
     *    application slots and we rely on user mode interfaces.
     *
     * 2) Steal an ldt entry via SYS_modify_ldt.  This suffers from the 8K ldt entry
     *    limit and requires that we update manually on a new thread.  For 64-bit
     *    we're limited here to a 32-bit base.  (Strangely, the kernel's
     *    include/asm-x86_64/ldt.h implies that the base is ignored: but it doesn't
     *    seem to be.)
     *
     * 3) Steal a gdt entry via SYS_set_thread_area.  There is a 3rd unused entry
     *    (after pthreads and wine) we could use.  The kernel swaps for us, and with
     *    CLONE_TLS the kernel will set up the entry for a new thread for us.  Xref
     *    PR 192231 and PR 285898.  This system call is disabled on 64-bit 2.6
     *    kernels (though the man page for arch_prctl implies it isn't for 2.5
     *    kernels?!?)
     *
     * 4) Use SYS_arch_prctl.  This is only implemented on 64-bit kernels, and can
     *    only be used to set the gdt entries that fs and gs select for.  Faster to
     *    use <4GB base (obtain with mmap MAP_32BIT) since can use gdt; else have to
     *    use wrmsr.  The man pages say "ARCH_SET_GS is disabled in some kernels".
     */
    uint selector;
    int index = -1;
    int res;
#ifdef X64
    /* First choice is gdt, which means arch_prctl.  Since this may fail
     * on some kernels, we require -heap_in_lower_4GB so we can fall back
     * on modify_ldt.
     */
    byte *cur_gs;
    res = dynamorio_syscall(SYS_arch_prctl, 2, ARCH_GET_GS, &cur_gs);
    if (res >= 0) {
        LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: cur gs base is "PFX"\n", cur_gs);
        /* If we're a non-initial thread, gs will be set to the parent thread's value */
        if (cur_gs == NULL || is_dynamo_address(cur_gs) ||
            /* By resolving i#107, we can handle gs conflicts between app and dr. */
            INTERNAL_OPTION(mangle_app_seg)) {
            res = dynamorio_syscall(SYS_arch_prctl, 2, ARCH_SET_GS, segment);
            if (res >= 0) {
                os_tls->tls_type = TLS_TYPE_ARCH_PRCTL;
                LOG(GLOBAL, LOG_THREADS, 1,
                    "os_tls_init: arch_prctl successful for base "PFX"\n", segment);
                /* Kernel should have written %gs for us if using GDT */
                if (!dynamo_initialized && read_thread_register(SEG_TLS) == 0) {
                    LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: using MSR\n");
                    tls_using_msr = true;
                }
                if (IF_CLIENT_INTERFACE_ELSE(INTERNAL_OPTION(private_loader), false)) {
                    res = dynamorio_syscall(SYS_arch_prctl, 2, ARCH_SET_FS,
                                            os_tls->os_seg_info.dr_fs_base);
                    /* Assuming set fs must be successful if set gs succeeded. */
                    ASSERT(res >= 0);
                }
            } else {
                /* we've found a kernel where ARCH_SET_GS is disabled */
                ASSERT_CURIOSITY(false && "arch_prctl failed on set but not get");
                LOG(GLOBAL, LOG_THREADS, 1,
                    "os_tls_init: arch_prctl failed: error %d\n", res);
            }
        } else {
            /* FIXME PR 205276: we don't currently handle it: fall back on ldt, but
             * we'll have the same conflict w/ the selector...
             */
            ASSERT_BUG_NUM(205276, cur_gs == NULL);
        }
    }
#endif

    if (os_tls->tls_type == TLS_TYPE_NONE) {
        /* Second choice is set_thread_area */
        /* PR 285898: if we added CLONE_SETTLS to all clone calls (and emulated vfork
         * with clone) we could avoid having to set tls up for each thread (as well
         * as solve race PR 207903), at least for kernel 2.5.32+.  For now we stick
         * w/ manual setup.
         */
        our_modify_ldt_t desc;

        /* Pick which GDT slots we'll use for DR TLS and for library TLS if
         * using the private loader.
         */
        choose_gdt_slots(os_tls);

        if (tls_gdt_index > -1) {
            /* Now that we know which GDT slot to use, install the per-thread base
             * into it.
             */
            /* Base here must be 32-bit */
            IF_X64(ASSERT(DYNAMO_OPTION(heap_in_lower_4GB) &&
                          segment <= (byte*)UINT_MAX));
            initialize_ldt_struct(&desc, segment, PAGE_SIZE, tls_gdt_index);
            res = dynamorio_syscall(SYS_set_thread_area, 1, &desc);
            LOG(GLOBAL, LOG_THREADS, 3,
                "%s: set_thread_area %d => %d res, %d index\n",
                __FUNCTION__, tls_gdt_index, res, desc.entry_number);
            ASSERT(res < 0 || desc.entry_number == tls_gdt_index);
        } else {
            res = -1;  /* fall back on LDT */
        }

        if (res >= 0) {
            LOG(GLOBAL, LOG_THREADS, 1,
                "os_tls_init: set_thread_area successful for base "PFX" @index %d\n",
                segment, tls_gdt_index);
            os_tls->tls_type = TLS_TYPE_GDT;
            index = tls_gdt_index;
            selector = GDT_SELECTOR(index);
            WRITE_DR_SEG(selector); /* macro needs lvalue! */
        } else {
            IF_VMX86(ASSERT_NOT_REACHED()); /* since no modify_ldt */
            LOG(GLOBAL, LOG_THREADS, 1,
                "os_tls_init: set_thread_area failed: error %d\n", res);
        }

#ifdef CLIENT_INTERFACE
        /* Install the library TLS base. */
        if (INTERNAL_OPTION(private_loader) && res >= 0) {
            app_pc base = IF_X64_ELSE(os_tls->os_seg_info.dr_fs_base,
                                      os_tls->os_seg_info.dr_gs_base);
            /* lib_tls_gdt_index is picked in choose_gdt_slots. */
            ASSERT(lib_tls_gdt_index >= gdt_entry_tls_min);
            initialize_ldt_struct(&desc, base, GDT_NO_SIZE_LIMIT,
                                  lib_tls_gdt_index);
            res = dynamorio_syscall(SYS_set_thread_area, 1, &desc);
            LOG(GLOBAL, LOG_THREADS, 3,
                "%s: set_thread_area %d => %d res, %d index\n",
                __FUNCTION__, lib_tls_gdt_index, res, desc.entry_number);
            if (res >= 0) {
                /* i558 update lib seg reg to enforce the segment changes */
                selector = GDT_SELECTOR(lib_tls_gdt_index);
                LOG(GLOBAL, LOG_THREADS, 2, "%s: setting %s to selector 0x%x\n",
                    __FUNCTION__, reg_names[LIB_SEG_TLS], selector);
                WRITE_LIB_SEG(selector);
            }
        }
#endif
    }

    if (os_tls->tls_type == TLS_TYPE_NONE) {
        /* Third choice: modify_ldt, which should be available on kernel 2.3.99+ */
        /* Base here must be 32-bit */
        IF_X64(ASSERT(DYNAMO_OPTION(heap_in_lower_4GB) && segment <= (byte*)UINT_MAX));
        /* we have the thread_initexit_lock so no race here */
        index = find_unused_ldt_index();
        selector = LDT_SELECTOR(index);
        ASSERT(index != -1);
        create_ldt_entry((void *)segment, PAGE_SIZE, index);
        os_tls->tls_type = TLS_TYPE_LDT;
        WRITE_DR_SEG(selector); /* macro needs lvalue! */
        LOG(GLOBAL, LOG_THREADS, 1,
            "os_tls_init: modify_ldt successful for base "PFX" w/ index %d\n",
            segment, index);
    }

    os_tls->ldt_index = index;
}
Exemple #8
0
/* Queries the set of available GDT slots, and initializes:
 * - tls_gdt_index
 * - gdt_entry_tls_min on ia32
 * - lib_tls_gdt_index if using private loader
 * GDT slots are initialized with a base and limit of zero.  The caller is
 * responsible for setting them to a real base.
 */
static void
choose_gdt_slots(os_local_state_t *os_tls)
{
    our_modify_ldt_t desc;
    int i;
    int avail_index[GDT_NUM_TLS_SLOTS];
    our_modify_ldt_t clear_desc;
    int res;

    /* using local static b/c dynamo_initialized is not set for a client thread
     * when created in client's dr_init routine
     */
    /* FIXME: Could be racy if we have multiple threads initializing during
     * startup.
     */
    if (tls_global_init)
        return;
    tls_global_init = true;

    /* We don't want to break the assumptions of pthreads or wine,
     * so we try to take the last slot.  We don't want to hardcode
     * the index b/c the kernel will let us clobber entries so we want
     * to only pass in -1.
     */
    ASSERT(!dynamo_initialized);
    ASSERT(tls_gdt_index == -1);
    for (i = 0; i < GDT_NUM_TLS_SLOTS; i++)
        avail_index[i] = -1;
    for (i = 0; i < GDT_NUM_TLS_SLOTS; i++) {
        /* We use a base and limit of 0 for testing what's available. */
        initialize_ldt_struct(&desc, NULL, 0, -1);
        res = dynamorio_syscall(SYS_set_thread_area, 1, &desc);
        LOG(GLOBAL, LOG_THREADS, 4,
            "%s: set_thread_area -1 => %d res, %d index\n",
            __FUNCTION__, res, desc.entry_number);
        if (res >= 0) {
            /* We assume monotonic increases */
            avail_index[i] = desc.entry_number;
            ASSERT(avail_index[i] > tls_gdt_index);
            tls_gdt_index = desc.entry_number;
        } else
            break;
    }

#ifndef X64
    /* In x86-64's ia32 emulation,
     * set_thread_area(6 <= entry_number && entry_number <= 8) fails
     * with EINVAL (22) because x86-64 only accepts GDT indices 12 to 14
     * for TLS entries.
     */
    if (tls_gdt_index > (gdt_entry_tls_min + GDT_NUM_TLS_SLOTS))
        gdt_entry_tls_min = GDT_ENTRY_TLS_MIN_64;  /* The kernel is x64. */
#endif

    /* Now give up the earlier slots */
    for (i = 0; i < GDT_NUM_TLS_SLOTS; i++) {
        if (avail_index[i] > -1 &&
            avail_index[i] != tls_gdt_index) {
            LOG(GLOBAL, LOG_THREADS, 4,
                "clearing set_thread_area index %d\n", avail_index[i]);
            clear_ldt_struct(&clear_desc, avail_index[i]);
            res = dynamorio_syscall(SYS_set_thread_area, 1, &clear_desc);
            ASSERT(res >= 0);
        }
    }

#ifndef VMX86_SERVER
    ASSERT_CURIOSITY(tls_gdt_index ==
                     (kernel_is_64bit() ? GDT_64BIT : GDT_32BIT));
#endif

#ifdef CLIENT_INTERFACE
    if (INTERNAL_OPTION(private_loader) && tls_gdt_index != -1) {
        /* Use the app's selector with our own TLS base for libraries.  app_fs
         * and app_gs are initialized by the caller in os_tls_app_seg_init().
         */
        int index = SELECTOR_INDEX(IF_X64_ELSE(os_tls->app_fs,
                                               os_tls->app_gs));
        if (index == 0) {
            /* An index of zero means the app has no TLS (yet), and happens
             * during early injection.  We use -1 to grab a new entry.  When the
             * app asks for its first table entry with set_thread_area, we give
             * it this one and emulate its usage of the segment.
             */
            ASSERT_CURIOSITY(DYNAMO_OPTION(early_inject) && "app has "
                             "no TLS, but we used non-early injection");
            initialize_ldt_struct(&desc, NULL, 0, -1);
            res = dynamorio_syscall(SYS_set_thread_area, 1, &desc);
            LOG(GLOBAL, LOG_THREADS, 4,
                "%s: set_thread_area -1 => %d res, %d index\n",
                __FUNCTION__, res, desc.entry_number);
            ASSERT(res >= 0);
            if (res >= 0) {
                return_stolen_lib_tls_gdt = true;
                index = desc.entry_number;
            }
        }
        lib_tls_gdt_index = index;
    }
#endif
}
Exemple #9
0
int
main()
{
    int *pc;
    int release_build = 0; /* 1 == release, 0 == debug */
    void *dcontext;
    int *dstack;
    int tls_offs;
    ptr_int_t owning_thread;
    INIT();

#ifdef UNIX
    intercept_signal(SIGSEGV, (handler_3_t) signal_handler, false);
#else
    SetUnhandledExceptionFilter((LPTOP_LEVEL_EXCEPTION_FILTER) our_top_handler);
#endif

#ifdef WINDOWS
    /* brute force loop over all TLS entries,
     * and see whether owning_thread is GetCurrentThreadId()
     *     0:001> dt getdc owning_thread
     *        +0x05c owning_thread : 0xed8
     *
     *      0:001> dt _TEB TLS64
     *        +0xe10 TLS64 : [64] Ptr32 Void
     */
    for (tls_offs = 63; tls_offs >=0; tls_offs--) {
        enum {offsetof_TLS64_in_TEB = IF_X64_ELSE(0x1480, 0xe10)};
        dcontext_tls_offset = offsetof_TLS64_in_TEB +
            tls_offs*sizeof(void*);
        GET_DCONTEXT(dcontext);
#if VERBOSE
        print("%d idx, %x offs\n", tls_offs, dcontext_tls_offset);
#endif
        where = SIGSETJMP(mark);
        if (where == 0) {
            owning_thread = *(ptr_int_t *)(((char *)dcontext) +
                                           OWNING_THREAD_OFFSET_IN_DCONTEXT);
            /* we didn't crash reading, is it really thread ID? */
#if VERBOSE
            print("     %d thread %d vs %d\n", tls_offs, owning_thread, GetCurrentThreadId());
#endif
            if (owning_thread == GetCurrentThreadId()) {
#if VERBOSE
                print("     %d is dcontext!\n", tls_offs);
#endif
                break;
            }
        } else {
#if VERBOSE
            print("     %d crashed\n", tls_offs);
#endif
            /* we crashed reading, try next offset */
        }
    }
    if (tls_offs < 0) {
        print("error obtaining dcontext (TLS offset not found): "
              "are you running natively?!?\n");
        exit(1);
    }
#endif
    where = SIGSETJMP(mark);
    if (where != 0) {
        print("error obtaining dcontext (SIGSETJMP failed): "
              "are you running natively?!?\n");
        exit(1);
    }
    GET_DCONTEXT(dcontext)
#if VERBOSE
    print("dcontext is "PFX"\n", dcontext);
#endif
    dstack = *(int **)(((char *)dcontext) + DSTACK_OFFSET_IN_DCONTEXT);
    if (dstack == NULL || !ALIGNED(dstack, PAGE_SIZE)) {
        print("can't find dstack: old build, or new where dstack offset changed?\n");
        while (1)
            ;
        exit(-1);
    }
    dstack_base = (int *) (((char *)dstack) - DSTACK_SIZE);
#if VERBOSE
    print("dstack is "PFX"-"PFX"\n", dstack_base, dstack);
#endif
    print("dcontext->dstack successfully obtained\n");
    where = SIGSETJMP(mark);
#if VERBOSE
    print("SIGSETJMP returned %d\n", where);
#endif
    if (where == 0) {
        /* if we do the copy in a C loop, trace heads cause us to exit before
         * we've hit the cxt switch return address, so we crash rather than taking
         * control -- so we hand-code a copy that in C looks like this:
         *          for (pc = dstack_base; pc++; pc < dstack)
         *              *pc = (int) evil;
         * we assume df is cleared
         * FIXME: popf in old fcache_return can trigger a trap crash before
         * get to ret that goes to evil!
         * FIXME: I had this getting to evil w/o crashing first, but it's
         * a little fragile, and on win32 I get issues later b/c we have
         * trampolines, etc. and so don't completely lose control.
         * But, in all cases we fail, so whether it's a nice shell code
         * execution or a crash doesn't matter -- the test does what it's supposed
         * to do!
         */
        evil_copy(dstack_base, DSTACK_SIZE / sizeof(int), (ptr_int_t)evil);
        print("wrote to entire dstack without incident!\n");
    } else if (where == 1) {
        print("error writing to "PFX" in expected dstack "PFX"-"PFX"\n",
              pc, dstack_base, dstack);
    } else if (where == 2) {
        print("DR has been cracked!  Malicious code is now runnning...\n");
    }
}