/** Install I/O Permission bitmap. * * Current task's I/O permission bitmap, if any, is installed * in the current CPU's TSS. * * Interrupts must be disabled prior this call. * */ void io_perm_bitmap_install(void) { /* First, copy the I/O Permission Bitmap. */ irq_spinlock_lock(&TASK->lock, false); size_t ver = TASK->arch.iomapver; size_t elements = TASK->arch.iomap.elements; if (elements > 0) { ASSERT(TASK->arch.iomap.bits); bitmap_t iomap; bitmap_initialize(&iomap, TSS_IOMAP_SIZE * 8, CPU->arch.tss->iomap); bitmap_copy(&iomap, &TASK->arch.iomap, elements); /* * Set the trailing bits in the last byte of the map to disable * I/O access. */ bitmap_set_range(&iomap, elements, ALIGN_UP(elements, 8) - elements); /* * It is safe to set the trailing eight bits because of the * extra convenience byte in TSS_IOMAP_SIZE. */ bitmap_set_range(&iomap, ALIGN_UP(elements, 8), 8); } irq_spinlock_unlock(&TASK->lock, false); /* * Second, adjust TSS segment limit. * Take the extra ending byte with all bits set into account. */ ptr_16_64_t cpugdtr; gdtr_store(&cpugdtr); descriptor_t *gdt_p = (descriptor_t *) cpugdtr.base; size_t size = bitmap_size(elements); gdt_tss_setlimit(&gdt_p[TSS_DES], TSS_BASIC_SIZE + size); gdtr_load(&cpugdtr); /* * Before we load new TSS limit, the current TSS descriptor * type must be changed to describe inactive TSS. */ tss_descriptor_t *tss_desc = (tss_descriptor_t *) &gdt_p[TSS_DES]; tss_desc->type = AR_TSS; tr_load(GDT_SELECTOR(TSS_DES)); /* * Update the generation count so that faults caused by * early accesses can be serviced. */ CPU->arch.iomapver_copy = ver; }
void pm_init(void) { descriptor_t *gdt_p = (descriptor_t *) gdtr.base; ptr_16_32_t idtr; /* * Update addresses in GDT and IDT to their virtual counterparts. */ idtr.limit = sizeof(idt); idtr.base = (uintptr_t) idt; gdtr_load(&gdtr); idtr_load(&idtr); /* * Each CPU has its private GDT and TSS. * All CPUs share one IDT. */ if (config.cpu_active == 1) { idt_init(); /* * NOTE: bootstrap CPU has statically allocated TSS, because * the heap hasn't been initialized so far. */ tss_p = &tss0; } else { tss_p = (tss_t *) malloc(sizeof(tss_t), FRAME_ATOMIC); if (!tss_p) panic("Cannot allocate TSS."); } tss_initialize(tss_p); gdt_p[TSS_DES].access = AR_PRESENT | AR_TSS | DPL_KERNEL; gdt_p[TSS_DES].special = 1; gdt_p[TSS_DES].granularity = 0; gdt_setbase(&gdt_p[TSS_DES], (uintptr_t) tss_p); gdt_setlimit(&gdt_p[TSS_DES], TSS_BASIC_SIZE - 1); /* * As of this moment, the current CPU has its own GDT pointing * to its own TSS. We just need to load the TR register. */ tr_load(GDT_SELECTOR(TSS_DES)); /* Disable I/O on nonprivileged levels and clear NT flag. */ write_eflags(read_eflags() & ~(EFLAGS_IOPL | EFLAGS_NT)); /* Disable alignment check */ write_cr0(read_cr0() & ~CR0_AM); }
/** Initialize segmentation - code/data/idt tables * */ void pm_init(void) { descriptor_t *gdt_p = (descriptor_t *) gdtr.base; tss_descriptor_t *tss_desc; /* * Each CPU has its private GDT and TSS. * All CPUs share one IDT. */ if (config.cpu_active == 1) { idt_init(); /* * NOTE: bootstrap CPU has statically allocated TSS, because * the heap hasn't been initialized so far. */ tss_p = &tss; } else { /* We are going to use malloc, which may return * non boot-mapped pointer, initialize the CR3 register * ahead of page_init */ write_cr3((uintptr_t) AS_KERNEL->genarch.page_table); tss_p = (tss_t *) malloc(sizeof(tss_t), FRAME_ATOMIC); if (!tss_p) panic("Cannot allocate TSS."); } tss_initialize(tss_p); tss_desc = (tss_descriptor_t *) (&gdt_p[TSS_DES]); tss_desc->present = 1; tss_desc->type = AR_TSS; tss_desc->dpl = PL_KERNEL; gdt_tss_setbase(&gdt_p[TSS_DES], (uintptr_t) tss_p); gdt_tss_setlimit(&gdt_p[TSS_DES], TSS_BASIC_SIZE - 1); gdtr_load(&gdtr); idtr_load(&idtr); /* * As of this moment, the current CPU has its own GDT pointing * to its own TSS. We just need to load the TR register. */ tr_load(GDT_SELECTOR(TSS_DES)); }
/** Perform ia32 specific tasks needed before the new thread is scheduled. * * THREAD is locked and interrupts are disabled. */ void before_thread_runs_arch(void) { uintptr_t kstk = (uintptr_t) &THREAD->kstack[STACK_SIZE]; #ifndef PROCESSOR_i486 if (CPU->arch.fi.bits.sep) { /* Set kernel stack for CP3 -> CPL0 switch via SYSENTER */ write_msr(IA32_MSR_SYSENTER_ESP, kstk - sizeof(istate_t)); } #endif /* Set kernel stack for CPL3 -> CPL0 switch via interrupt */ CPU->arch.tss->esp0 = kstk; CPU->arch.tss->ss0 = GDT_SELECTOR(KDATA_DES); /* Set up TLS in GS register */ set_tls_desc(THREAD->arch.tls); }
/* * This function takes care of proper setup of IDT and IDTR. */ void idt_init(void) { idescriptor_t *d; unsigned int i; for (i = 0; i < IDT_ITEMS; i++) { d = &idt[i]; d->unused = 0; d->selector = GDT_SELECTOR(KTEXT_DES); if (i == VECTOR_SYSCALL) { /* * The syscall trap gate must be callable from * userland. Interrupts will remain enabled. */ d->access = AR_PRESENT | AR_TRAP | DPL_USER; } else { /* * Other interrupts use interrupt gates which * disable interrupts. */ d->access = AR_PRESENT | AR_INTERRUPT; } } d = &idt[0]; idt_setoffset(d++, (uintptr_t) &int_0); idt_setoffset(d++, (uintptr_t) &int_1); idt_setoffset(d++, (uintptr_t) &int_2); idt_setoffset(d++, (uintptr_t) &int_3); idt_setoffset(d++, (uintptr_t) &int_4); idt_setoffset(d++, (uintptr_t) &int_5); idt_setoffset(d++, (uintptr_t) &int_6); idt_setoffset(d++, (uintptr_t) &int_7); idt_setoffset(d++, (uintptr_t) &int_8); idt_setoffset(d++, (uintptr_t) &int_9); idt_setoffset(d++, (uintptr_t) &int_10); idt_setoffset(d++, (uintptr_t) &int_11); idt_setoffset(d++, (uintptr_t) &int_12); idt_setoffset(d++, (uintptr_t) &int_13); idt_setoffset(d++, (uintptr_t) &int_14); idt_setoffset(d++, (uintptr_t) &int_15); idt_setoffset(d++, (uintptr_t) &int_16); idt_setoffset(d++, (uintptr_t) &int_17); idt_setoffset(d++, (uintptr_t) &int_18); idt_setoffset(d++, (uintptr_t) &int_19); idt_setoffset(d++, (uintptr_t) &int_20); idt_setoffset(d++, (uintptr_t) &int_21); idt_setoffset(d++, (uintptr_t) &int_22); idt_setoffset(d++, (uintptr_t) &int_23); idt_setoffset(d++, (uintptr_t) &int_24); idt_setoffset(d++, (uintptr_t) &int_25); idt_setoffset(d++, (uintptr_t) &int_26); idt_setoffset(d++, (uintptr_t) &int_27); idt_setoffset(d++, (uintptr_t) &int_28); idt_setoffset(d++, (uintptr_t) &int_29); idt_setoffset(d++, (uintptr_t) &int_30); idt_setoffset(d++, (uintptr_t) &int_31); idt_setoffset(d++, (uintptr_t) &int_32); idt_setoffset(d++, (uintptr_t) &int_33); idt_setoffset(d++, (uintptr_t) &int_34); idt_setoffset(d++, (uintptr_t) &int_35); idt_setoffset(d++, (uintptr_t) &int_36); idt_setoffset(d++, (uintptr_t) &int_37); idt_setoffset(d++, (uintptr_t) &int_38); idt_setoffset(d++, (uintptr_t) &int_39); idt_setoffset(d++, (uintptr_t) &int_40); idt_setoffset(d++, (uintptr_t) &int_41); idt_setoffset(d++, (uintptr_t) &int_42); idt_setoffset(d++, (uintptr_t) &int_43); idt_setoffset(d++, (uintptr_t) &int_44); idt_setoffset(d++, (uintptr_t) &int_45); idt_setoffset(d++, (uintptr_t) &int_46); idt_setoffset(d++, (uintptr_t) &int_47); idt_setoffset(d++, (uintptr_t) &int_48); idt_setoffset(d++, (uintptr_t) &int_49); idt_setoffset(d++, (uintptr_t) &int_50); idt_setoffset(d++, (uintptr_t) &int_51); idt_setoffset(d++, (uintptr_t) &int_52); idt_setoffset(d++, (uintptr_t) &int_53); idt_setoffset(d++, (uintptr_t) &int_54); idt_setoffset(d++, (uintptr_t) &int_55); idt_setoffset(d++, (uintptr_t) &int_56); idt_setoffset(d++, (uintptr_t) &int_57); idt_setoffset(d++, (uintptr_t) &int_58); idt_setoffset(d++, (uintptr_t) &int_59); idt_setoffset(d++, (uintptr_t) &int_60); idt_setoffset(d++, (uintptr_t) &int_61); idt_setoffset(d++, (uintptr_t) &int_62); idt_setoffset(d++, (uintptr_t) &int_63); idt_setoffset(&idt[VECTOR_SYSCALL], (uintptr_t) &int_syscall); }
void tls_thread_init(os_local_state_t *os_tls, byte *segment) { /* We have four different ways to obtain TLS, each with its own limitations: * * 1) Piggyback on the threading system (like we do on Windows): here that would * be pthreads, which uses a segment since at least RH9, and uses gdt-based * segments for NPTL. The advantage is we won't run out of ldt or gdt entries * (except when the app itself would). The disadvantage is we're stealing * application slots and we rely on user mode interfaces. * * 2) Steal an ldt entry via SYS_modify_ldt. This suffers from the 8K ldt entry * limit and requires that we update manually on a new thread. For 64-bit * we're limited here to a 32-bit base. (Strangely, the kernel's * include/asm-x86_64/ldt.h implies that the base is ignored: but it doesn't * seem to be.) * * 3) Steal a gdt entry via SYS_set_thread_area. There is a 3rd unused entry * (after pthreads and wine) we could use. The kernel swaps for us, and with * CLONE_TLS the kernel will set up the entry for a new thread for us. Xref * PR 192231 and PR 285898. This system call is disabled on 64-bit 2.6 * kernels (though the man page for arch_prctl implies it isn't for 2.5 * kernels?!?) * * 4) Use SYS_arch_prctl. This is only implemented on 64-bit kernels, and can * only be used to set the gdt entries that fs and gs select for. Faster to * use <4GB base (obtain with mmap MAP_32BIT) since can use gdt; else have to * use wrmsr. The man pages say "ARCH_SET_GS is disabled in some kernels". */ uint selector; int index = -1; int res; #ifdef X64 /* First choice is gdt, which means arch_prctl. Since this may fail * on some kernels, we require -heap_in_lower_4GB so we can fall back * on modify_ldt. */ byte *cur_gs; res = dynamorio_syscall(SYS_arch_prctl, 2, ARCH_GET_GS, &cur_gs); if (res >= 0) { LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: cur gs base is "PFX"\n", cur_gs); /* If we're a non-initial thread, gs will be set to the parent thread's value */ if (cur_gs == NULL || is_dynamo_address(cur_gs) || /* By resolving i#107, we can handle gs conflicts between app and dr. */ INTERNAL_OPTION(mangle_app_seg)) { res = dynamorio_syscall(SYS_arch_prctl, 2, ARCH_SET_GS, segment); if (res >= 0) { os_tls->tls_type = TLS_TYPE_ARCH_PRCTL; LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: arch_prctl successful for base "PFX"\n", segment); /* Kernel should have written %gs for us if using GDT */ if (!dynamo_initialized && read_thread_register(SEG_TLS) == 0) { LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: using MSR\n"); tls_using_msr = true; } if (IF_CLIENT_INTERFACE_ELSE(INTERNAL_OPTION(private_loader), false)) { res = dynamorio_syscall(SYS_arch_prctl, 2, ARCH_SET_FS, os_tls->os_seg_info.priv_lib_tls_base); /* Assuming set fs must be successful if set gs succeeded. */ ASSERT(res >= 0); } } else { /* we've found a kernel where ARCH_SET_GS is disabled */ ASSERT_CURIOSITY(false && "arch_prctl failed on set but not get"); LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: arch_prctl failed: error %d\n", res); } } else { /* FIXME PR 205276: we don't currently handle it: fall back on ldt, but * we'll have the same conflict w/ the selector... */ ASSERT_BUG_NUM(205276, cur_gs == NULL); } } #endif if (os_tls->tls_type == TLS_TYPE_NONE) { /* Second choice is set_thread_area */ /* PR 285898: if we added CLONE_SETTLS to all clone calls (and emulated vfork * with clone) we could avoid having to set tls up for each thread (as well * as solve race PR 207903), at least for kernel 2.5.32+. For now we stick * w/ manual setup. */ our_modify_ldt_t desc; /* Pick which GDT slots we'll use for DR TLS and for library TLS if * using the private loader. */ choose_gdt_slots(os_tls); if (tls_gdt_index > -1) { /* Now that we know which GDT slot to use, install the per-thread base * into it. */ /* Base here must be 32-bit */ IF_X64(ASSERT(DYNAMO_OPTION(heap_in_lower_4GB) && segment <= (byte*)UINT_MAX)); initialize_ldt_struct(&desc, segment, PAGE_SIZE, tls_gdt_index); res = dynamorio_syscall(SYS_set_thread_area, 1, &desc); LOG(GLOBAL, LOG_THREADS, 3, "%s: set_thread_area %d => %d res, %d index\n", __FUNCTION__, tls_gdt_index, res, desc.entry_number); ASSERT(res < 0 || desc.entry_number == tls_gdt_index); } else { res = -1; /* fall back on LDT */ } if (res >= 0) { LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: set_thread_area successful for base "PFX" @index %d\n", segment, tls_gdt_index); os_tls->tls_type = TLS_TYPE_GDT; index = tls_gdt_index; selector = GDT_SELECTOR(index); WRITE_DR_SEG(selector); /* macro needs lvalue! */ } else { IF_VMX86(ASSERT_NOT_REACHED()); /* since no modify_ldt */ LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: set_thread_area failed: error %d\n", res); } #ifdef CLIENT_INTERFACE /* Install the library TLS base. */ if (INTERNAL_OPTION(private_loader) && res >= 0) { app_pc base = os_tls->os_seg_info.priv_lib_tls_base; /* lib_tls_gdt_index is picked in choose_gdt_slots. */ ASSERT(lib_tls_gdt_index >= gdt_entry_tls_min); initialize_ldt_struct(&desc, base, GDT_NO_SIZE_LIMIT, lib_tls_gdt_index); res = dynamorio_syscall(SYS_set_thread_area, 1, &desc); LOG(GLOBAL, LOG_THREADS, 3, "%s: set_thread_area %d => %d res, %d index\n", __FUNCTION__, lib_tls_gdt_index, res, desc.entry_number); if (res >= 0) { /* i558 update lib seg reg to enforce the segment changes */ selector = GDT_SELECTOR(lib_tls_gdt_index); LOG(GLOBAL, LOG_THREADS, 2, "%s: setting %s to selector 0x%x\n", __FUNCTION__, reg_names[LIB_SEG_TLS], selector); WRITE_LIB_SEG(selector); } } #endif } if (os_tls->tls_type == TLS_TYPE_NONE) { /* Third choice: modify_ldt, which should be available on kernel 2.3.99+ */ /* Base here must be 32-bit */ IF_X64(ASSERT(DYNAMO_OPTION(heap_in_lower_4GB) && segment <= (byte*)UINT_MAX)); /* we have the thread_initexit_lock so no race here */ index = find_unused_ldt_index(); selector = LDT_SELECTOR(index); ASSERT(index != -1); create_ldt_entry((void *)segment, PAGE_SIZE, index); os_tls->tls_type = TLS_TYPE_LDT; WRITE_DR_SEG(selector); /* macro needs lvalue! */ LOG(GLOBAL, LOG_THREADS, 1, "os_tls_init: modify_ldt successful for base "PFX" w/ index %d\n", segment, index); } os_tls->ldt_index = index; }
/* * This function takes care of proper setup of IDT and IDTR. */ void idt_init(void) { idescriptor_t *d; unsigned int i; for (i = 0; i < IDT_ITEMS; i++) { d = &idt[i]; d->unused = 0; d->selector = GDT_SELECTOR(KTEXT_DES); d->present = 1; d->type = AR_INTERRUPT; /* masking interrupt */ } d = &idt[0]; idt_setoffset(d++, (uintptr_t) &int_0); idt_setoffset(d++, (uintptr_t) &int_1); idt_setoffset(d++, (uintptr_t) &int_2); idt_setoffset(d++, (uintptr_t) &int_3); idt_setoffset(d++, (uintptr_t) &int_4); idt_setoffset(d++, (uintptr_t) &int_5); idt_setoffset(d++, (uintptr_t) &int_6); idt_setoffset(d++, (uintptr_t) &int_7); idt_setoffset(d++, (uintptr_t) &int_8); idt_setoffset(d++, (uintptr_t) &int_9); idt_setoffset(d++, (uintptr_t) &int_10); idt_setoffset(d++, (uintptr_t) &int_11); idt_setoffset(d++, (uintptr_t) &int_12); idt_setoffset(d++, (uintptr_t) &int_13); idt_setoffset(d++, (uintptr_t) &int_14); idt_setoffset(d++, (uintptr_t) &int_15); idt_setoffset(d++, (uintptr_t) &int_16); idt_setoffset(d++, (uintptr_t) &int_17); idt_setoffset(d++, (uintptr_t) &int_18); idt_setoffset(d++, (uintptr_t) &int_19); idt_setoffset(d++, (uintptr_t) &int_20); idt_setoffset(d++, (uintptr_t) &int_21); idt_setoffset(d++, (uintptr_t) &int_22); idt_setoffset(d++, (uintptr_t) &int_23); idt_setoffset(d++, (uintptr_t) &int_24); idt_setoffset(d++, (uintptr_t) &int_25); idt_setoffset(d++, (uintptr_t) &int_26); idt_setoffset(d++, (uintptr_t) &int_27); idt_setoffset(d++, (uintptr_t) &int_28); idt_setoffset(d++, (uintptr_t) &int_29); idt_setoffset(d++, (uintptr_t) &int_30); idt_setoffset(d++, (uintptr_t) &int_31); idt_setoffset(d++, (uintptr_t) &int_32); idt_setoffset(d++, (uintptr_t) &int_33); idt_setoffset(d++, (uintptr_t) &int_34); idt_setoffset(d++, (uintptr_t) &int_35); idt_setoffset(d++, (uintptr_t) &int_36); idt_setoffset(d++, (uintptr_t) &int_37); idt_setoffset(d++, (uintptr_t) &int_38); idt_setoffset(d++, (uintptr_t) &int_39); idt_setoffset(d++, (uintptr_t) &int_40); idt_setoffset(d++, (uintptr_t) &int_41); idt_setoffset(d++, (uintptr_t) &int_42); idt_setoffset(d++, (uintptr_t) &int_43); idt_setoffset(d++, (uintptr_t) &int_44); idt_setoffset(d++, (uintptr_t) &int_45); idt_setoffset(d++, (uintptr_t) &int_46); idt_setoffset(d++, (uintptr_t) &int_47); idt_setoffset(d++, (uintptr_t) &int_48); idt_setoffset(d++, (uintptr_t) &int_49); idt_setoffset(d++, (uintptr_t) &int_50); idt_setoffset(d++, (uintptr_t) &int_51); idt_setoffset(d++, (uintptr_t) &int_52); idt_setoffset(d++, (uintptr_t) &int_53); idt_setoffset(d++, (uintptr_t) &int_54); idt_setoffset(d++, (uintptr_t) &int_55); idt_setoffset(d++, (uintptr_t) &int_56); idt_setoffset(d++, (uintptr_t) &int_57); idt_setoffset(d++, (uintptr_t) &int_58); idt_setoffset(d++, (uintptr_t) &int_59); idt_setoffset(d++, (uintptr_t) &int_60); idt_setoffset(d++, (uintptr_t) &int_61); idt_setoffset(d++, (uintptr_t) &int_62); idt_setoffset(d++, (uintptr_t) &int_63); }