/* Queries the set of available GDT slots, and initializes: * - tls_gdt_index * - gdt_entry_tls_min on ia32 * - lib_tls_gdt_index if using private loader * GDT slots are initialized with a base and limit of zero. The caller is * responsible for setting them to a real base. */ static void choose_gdt_slots(os_local_state_t *os_tls) { our_modify_ldt_t desc; int i; int avail_index[GDT_NUM_TLS_SLOTS]; our_modify_ldt_t clear_desc; int res; /* using local static b/c dynamo_initialized is not set for a client thread * when created in client's dr_client_main routine */ /* FIXME: Could be racy if we have multiple threads initializing during * startup. */ if (tls_global_init) return; tls_global_init = true; /* We don't want to break the assumptions of pthreads or wine, * so we try to take the last slot. We don't want to hardcode * the index b/c the kernel will let us clobber entries so we want * to only pass in -1. */ ASSERT(!dynamo_initialized); ASSERT(tls_gdt_index == -1); for (i = 0; i < GDT_NUM_TLS_SLOTS; i++) avail_index[i] = -1; for (i = 0; i < GDT_NUM_TLS_SLOTS; i++) { /* We use a base and limit of 0 for testing what's available. */ initialize_ldt_struct(&desc, NULL, 0, -1); res = dynamorio_syscall(SYS_set_thread_area, 1, &desc); LOG(GLOBAL, LOG_THREADS, 4, "%s: set_thread_area -1 => %d res, %d index\n", __FUNCTION__, res, desc.entry_number); if (res >= 0) { /* We assume monotonic increases */ avail_index[i] = desc.entry_number; ASSERT(avail_index[i] > tls_gdt_index); tls_gdt_index = desc.entry_number; } else break; } #ifndef X64 /* In x86-64's ia32 emulation, * set_thread_area(6 <= entry_number && entry_number <= 8) fails * with EINVAL (22) because x86-64 only accepts GDT indices 12 to 14 * for TLS entries. */ if (tls_gdt_index > (gdt_entry_tls_min + GDT_NUM_TLS_SLOTS)) gdt_entry_tls_min = GDT_ENTRY_TLS_MIN_64; /* The kernel is x64. */ #endif /* Now give up the earlier slots */ for (i = 0; i < GDT_NUM_TLS_SLOTS; i++) { if (avail_index[i] > -1 && avail_index[i] != tls_gdt_index) { LOG(GLOBAL, LOG_THREADS, 4, "clearing set_thread_area index %d\n", avail_index[i]); clear_ldt_struct(&clear_desc, avail_index[i]); res = dynamorio_syscall(SYS_set_thread_area, 1, &clear_desc); ASSERT(res >= 0); } } #ifndef VMX86_SERVER ASSERT_CURIOSITY(tls_gdt_index == (kernel_is_64bit() ? GDT_64BIT : GDT_32BIT)); #endif #ifdef CLIENT_INTERFACE if (INTERNAL_OPTION(private_loader) && tls_gdt_index != -1) { /* Use the app's selector with our own TLS base for libraries. app_fs * and app_gs are initialized by the caller in os_tls_app_seg_init(). */ int index = SELECTOR_INDEX(os_tls->app_lib_tls_reg); if (index == 0) { /* An index of zero means the app has no TLS (yet), and happens * during early injection. We use -1 to grab a new entry. When the * app asks for its first table entry with set_thread_area, we give * it this one and emulate its usage of the segment. */ ASSERT_CURIOSITY(DYNAMO_OPTION(early_inject) && "app has " "no TLS, but we used non-early injection"); initialize_ldt_struct(&desc, NULL, 0, -1); res = dynamorio_syscall(SYS_set_thread_area, 1, &desc); LOG(GLOBAL, LOG_THREADS, 4, "%s: set_thread_area -1 => %d res, %d index\n", __FUNCTION__, res, desc.entry_number); ASSERT(res >= 0); if (res >= 0) { return_stolen_lib_tls_gdt = true; index = desc.entry_number; } } lib_tls_gdt_index = index; } else { /* For no private loader, e.g., app statically linked with DR, * we use app's lib tls gdt index. */ lib_tls_gdt_index = SELECTOR_INDEX(os_tls->app_lib_tls_reg); } #endif }
void tls_thread_init(os_local_state_t *os_tls, byte *segment) { /* We have four different ways to obtain TLS, each with its own limitations: * * 1) Piggyback on the threading system (like we do on Windows): here that would * be pthreads, which uses a segment since at least RH9, and uses gdt-based * segments for NPTL. The advantage is we won't run out of ldt or gdt entries * (except when the app itself would). The disadvantage is we're stealing * application slots and we rely on user mode interfaces. * * 2) Steal an ldt entry via SYS_modify_ldt. This suffers from the 8K ldt entry * limit and requires that we update manually on a new thread. For 64-bit * we're limited here to a 32-bit base. (Strangely, the kernel's * include/asm-x86_64/ldt.h implies that the base is ignored: but it doesn't * seem to be.) * * 3) Steal a gdt entry via SYS_set_thread_area. There is a 3rd unused entry * (after pthreads and wine) we could use. The kernel swaps for us, and with * CLONE_TLS the kernel will set up the entry for a new thread for us. Xref * PR 192231 and PR 285898. This system call is disabled on 64-bit 2.6 * kernels (though the man page for arch_prctl implies it isn't for 2.5 * kernels?!?) * * 4) Use SYS_arch_prctl. This is only implemented on 64-bit kernels, and can * only be used to set the gdt entries that fs and gs select for. Faster to * use <4GB base (obtain with mmap MAP_32BIT) since can use gdt; else have to * use wrmsr. The man pages say "ARCH_SET_GS is disabled in some kernels". */ uint selector; int index = -1; int res; #ifdef X64 /* First choice is gdt, which means arch_prctl. Since this may fail * on some kernels, we require -heap_in_lower_4GB so we can fall back * on modify_ldt. */ byte *cur_gs; res = dynamorio_syscall(SYS_arch_prctl, 2, ARCH_GET_GS, &cur_gs); if (res >= 0) { LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: cur gs base is "PFX"\n", cur_gs); /* If we're a non-initial thread, gs will be set to the parent thread's value */ if (cur_gs == NULL || is_dynamo_address(cur_gs) || /* By resolving i#107, we can handle gs conflicts between app and dr. */ INTERNAL_OPTION(mangle_app_seg)) { res = dynamorio_syscall(SYS_arch_prctl, 2, ARCH_SET_GS, segment); if (res >= 0) { os_tls->tls_type = TLS_TYPE_ARCH_PRCTL; LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: arch_prctl successful for base "PFX"\n", segment); /* Kernel should have written %gs for us if using GDT */ if (!dynamo_initialized && read_thread_register(SEG_TLS) == 0) { LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: using MSR\n"); tls_using_msr = true; } if (IF_CLIENT_INTERFACE_ELSE(INTERNAL_OPTION(private_loader), false)) { res = dynamorio_syscall(SYS_arch_prctl, 2, ARCH_SET_FS, os_tls->os_seg_info.priv_lib_tls_base); /* Assuming set fs must be successful if set gs succeeded. */ ASSERT(res >= 0); } } else { /* we've found a kernel where ARCH_SET_GS is disabled */ ASSERT_CURIOSITY(false && "arch_prctl failed on set but not get"); LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: arch_prctl failed: error %d\n", res); } } else { /* FIXME PR 205276: we don't currently handle it: fall back on ldt, but * we'll have the same conflict w/ the selector... */ ASSERT_BUG_NUM(205276, cur_gs == NULL); } } #endif if (os_tls->tls_type == TLS_TYPE_NONE) { /* Second choice is set_thread_area */ /* PR 285898: if we added CLONE_SETTLS to all clone calls (and emulated vfork * with clone) we could avoid having to set tls up for each thread (as well * as solve race PR 207903), at least for kernel 2.5.32+. For now we stick * w/ manual setup. */ our_modify_ldt_t desc; /* Pick which GDT slots we'll use for DR TLS and for library TLS if * using the private loader. */ choose_gdt_slots(os_tls); if (tls_gdt_index > -1) { /* Now that we know which GDT slot to use, install the per-thread base * into it. */ /* Base here must be 32-bit */ IF_X64(ASSERT(DYNAMO_OPTION(heap_in_lower_4GB) && segment <= (byte*)UINT_MAX)); initialize_ldt_struct(&desc, segment, PAGE_SIZE, tls_gdt_index); res = dynamorio_syscall(SYS_set_thread_area, 1, &desc); LOG(GLOBAL, LOG_THREADS, 3, "%s: set_thread_area %d => %d res, %d index\n", __FUNCTION__, tls_gdt_index, res, desc.entry_number); ASSERT(res < 0 || desc.entry_number == tls_gdt_index); } else { res = -1; /* fall back on LDT */ } if (res >= 0) { LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: set_thread_area successful for base "PFX" @index %d\n", segment, tls_gdt_index); os_tls->tls_type = TLS_TYPE_GDT; index = tls_gdt_index; selector = GDT_SELECTOR(index); WRITE_DR_SEG(selector); /* macro needs lvalue! */ } else { IF_VMX86(ASSERT_NOT_REACHED()); /* since no modify_ldt */ LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: set_thread_area failed: error %d\n", res); } #ifdef CLIENT_INTERFACE /* Install the library TLS base. */ if (INTERNAL_OPTION(private_loader) && res >= 0) { app_pc base = os_tls->os_seg_info.priv_lib_tls_base; /* lib_tls_gdt_index is picked in choose_gdt_slots. */ ASSERT(lib_tls_gdt_index >= gdt_entry_tls_min); initialize_ldt_struct(&desc, base, GDT_NO_SIZE_LIMIT, lib_tls_gdt_index); res = dynamorio_syscall(SYS_set_thread_area, 1, &desc); LOG(GLOBAL, LOG_THREADS, 3, "%s: set_thread_area %d => %d res, %d index\n", __FUNCTION__, lib_tls_gdt_index, res, desc.entry_number); if (res >= 0) { /* i558 update lib seg reg to enforce the segment changes */ selector = GDT_SELECTOR(lib_tls_gdt_index); LOG(GLOBAL, LOG_THREADS, 2, "%s: setting %s to selector 0x%x\n", __FUNCTION__, reg_names[LIB_SEG_TLS], selector); WRITE_LIB_SEG(selector); } } #endif } if (os_tls->tls_type == TLS_TYPE_NONE) { /* Third choice: modify_ldt, which should be available on kernel 2.3.99+ */ /* Base here must be 32-bit */ IF_X64(ASSERT(DYNAMO_OPTION(heap_in_lower_4GB) && segment <= (byte*)UINT_MAX)); /* we have the thread_initexit_lock so no race here */ index = find_unused_ldt_index(); selector = LDT_SELECTOR(index); ASSERT(index != -1); create_ldt_entry((void *)segment, PAGE_SIZE, index); os_tls->tls_type = TLS_TYPE_LDT; WRITE_DR_SEG(selector); /* macro needs lvalue! */ LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: modify_ldt successful for base "PFX" w/ index %d\n", segment, index); } os_tls->ldt_index = index; }
if (htable->free_payload_func != NULL) (*htable->free_payload_func)(entry->payload); HEAP_TYPE_FREE(dcontext, entry, generic_entry_t, ACCT_OTHER, PROTECTED); } /* Wrapper routines to implement our generic_entry_t and free-func layer */ generic_table_t * generic_hash_create(dcontext_t *dcontext, uint bits, uint load_factor_percent, uint table_flags, void (*free_payload_func)(void*) _IF_DEBUG(const char *table_name)) { generic_table_t *table = HEAP_TYPE_ALLOC(dcontext, generic_table_t, ACCT_OTHER, PROTECTED); hashtable_generic_init(dcontext, table, bits, load_factor_percent, (hash_function_t)INTERNAL_OPTION(alt_hash_func), 0 /* hash_mask_offset */, table_flags _IF_DEBUG(table_name)); table->free_payload_func = free_payload_func; return table; } void generic_hash_clear(dcontext_t *dcontext, generic_table_t *htable) { hashtable_generic_clear(dcontext, htable); } void generic_hash_destroy(dcontext_t *dcontext, generic_table_t *htable) {
void clean_call_opt_exit(void) { /* FIXME i#1569: NYI on AArch64 */ ASSERT_NOT_IMPLEMENTED(INTERNAL_OPTION(opt_cleancall) == 0); }