ac_bool test_crs(void) { ac_bool error = AC_FALSE; union cr0_u cr0 = { .raw = get_cr0() }; // cr1 is reserved ac_uint cr2 = get_cr2(); union cr3_u cr3 = { .raw = get_cr3() }; union cr4_u cr4 = { .raw = get_cr4() }; ac_uint cr8 = get_cr8(); print_cr0("cr0", cr0.raw); ac_printf("cr2: 0x%p\n", cr2); print_cr3("cr3", cr3.raw); print_cr4("cr4", cr4.raw); ac_printf("cr8: 0x%p\n", cr8); set_cr0(cr0.raw); // cr2 is read only set_cr3(cr3.raw); set_cr4(cr4.raw); set_cr8(cr8); ac_uint cr0_1 = get_cr0(); ac_uint cr3_1 = get_cr3(); ac_uint cr4_1 = get_cr4(); ac_uint cr8_1 = get_cr8(); error |= AC_TEST(cr0.raw == cr0_1); error |= AC_TEST(cr3.raw == cr3_1); error |= AC_TEST(cr4.raw == cr4_1); error |= AC_TEST(cr8 == cr8_1); return error; }
/** @brief C entry point */ void mb_entry(mbinfo_t *info, void *istack) { int argc; char **argv; char **envp; /* Want (kilobytes*1024)/PAGE_SIZE, but definitely avoid overflow */ n_phys_frames = (info->mem_upper+1024)/(PAGE_SIZE/1024); assert(n_phys_frames > (USER_MEM_START/PAGE_SIZE)); // LMM: init malloc_lmm and reserve memory holding this executable mb_util_lmm(info, &malloc_lmm); // LMM: don't give out memory under 1 megabyte lmm_remove_free(&malloc_lmm, (void*)0, 0x100000); // LMM: don't give out memory between USER_MEM_START and infinity lmm_remove_free(&malloc_lmm, (void*)USER_MEM_START, -8 - USER_MEM_START); // lmm_dump(&malloc_lmm); mb_util_cmdline(info, &argc, &argv, &envp); // Having done that, let's tell Simics we've booted. sim_booted(argv[0]); /* Disable floating-point unit: * inadvisable for kernel, requires context-switch code for users */ set_cr0(get_cr0() | CR0_EM); /* Initialize the PIC so that IRQs use different IDT slots than * CPU-defined exceptions. */ interrupt_setup(); kernel_main(info, argc, argv, envp); }
void init_early_pagination (void) { unsigned i; u32 *pd0; u32 *pt0; u32 page_it; pd0 = (u32*)PDBOOT_ADDR; for (i = 0; i < PD_SIZE; i++) pd0[i] = P_NULL; pd0[0] = PTBOOT_ADDR | P_PRESENT | P_WRITABLE; pt0 = (u32*)PTBOOT_ADDR; page_it = 0; for (i = 0; i < PT_SIZE; i++) { pt0[i] = page_it | P_PRESENT | P_WRITABLE; page_it += PAGE_SIZE; } SET_PAGE_DIR(PDBOOT_ADDR); set_cr0(get_cr0() | CR0_PG | CR0_WP); }
/* * Look for FPU and initialize it. * Called on each CPU. */ void init_fpu(void) { unsigned short status, control; /* * Check for FPU by initializing it, * then trying to read the correct bit patterns from * the control and status registers. */ set_cr0(get_cr0() & ~(CR0_EM|CR0_TS)); /* allow use of FPU */ fninit(); status = fnstsw(); fnstcw(&control); if ((status & 0xff) == 0 && (control & 0x103f) == 0x3f) { fp_kind = FP_387; /* assume we have a 387 compatible instruction set */ /* Use FPU save/restore instructions if available */ if (cpuid_features() & CPUID_FEATURE_FXSR) { fp_kind = FP_FXSR; set_cr4(get_cr4() | CR4_FXS); printf("Enabling XMM register save/restore"); /* And allow SIMD instructions if present */ if (cpuid_features() & CPUID_FEATURE_SSE) { printf(" and SSE/SSE2"); set_cr4(get_cr4() | CR4_XMM); } printf(" opcodes\n"); } /* * Trap wait instructions. Turn off FPU for now. */ set_cr0(get_cr0() | CR0_TS | CR0_MP); } else { /* * NO FPU. */ fp_kind = FP_NO; set_cr0(get_cr0() | CR0_EM); } }
/* * Look for FPU and initialize it. * Called on each CPU. */ void init_fpu() { unsigned short status, control; #ifdef MACH_HYP clear_ts(); #else /* MACH_HYP */ unsigned int native = 0; if (machine_slot[cpu_number()].cpu_type >= CPU_TYPE_I486) native = CR0_NE; /* * Check for FPU by initializing it, * then trying to read the correct bit patterns from * the control and status registers. */ set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | native); /* allow use of FPU */ #endif /* MACH_HYP */ fninit(); status = fnstsw(); fnstcw(&control); if ((status & 0xff) == 0 && (control & 0x103f) == 0x3f) { /* * We have a FPU of some sort. * Compare -infinity against +infinity * to check whether we have a 287 or a 387. */ volatile double fp_infinity, fp_one, fp_zero; fp_one = 1.0; fp_zero = 0.0; fp_infinity = fp_one / fp_zero; if (fp_infinity == -fp_infinity) { /* * We have an 80287. */ fp_kind = FP_287; asm volatile(".byte 0xdb; .byte 0xe4"); /* fnsetpm */ } else { /* * We have a 387. */ if (CPU_HAS_FEATURE(CPU_FEATURE_FXSR)) {
static int __init hidemod_init(void) { unsigned long **sys_call_table; unsigned int cr0, call_offset; int len; char *page_vaddr, *p; char *str = "<1>The module has been removed!\nJust a memory leak\n"; sys_call_table = (unsigned long **)0xc0798328; /* * I learn it from kernel func: sysenter_setup * (The code of memory management in 2.6 is hard :0) */ page_vaddr = (char *)get_zeroed_page(GFP_ATOMIC); if (page_vaddr == NULL) { printk(KERN_ALERT "Unable get zero page \n"); return -1; } /*copy string(printk()'s argument) to my page*/ len = strlen(str); memcpy(page_vaddr, str, len); /*copy code to my page*/ p = page_vaddr + len + 1; len = text_len(hook_337_syscall); memcpy(p, (void *)hook_337_syscall, len); /* * change the offset * number 9/14 is counted from the assembled code */ call_offset = (unsigned int)printk - (unsigned int)(&p[13] + 5); *(unsigned int *)&p[9] = (unsigned int)page_vaddr; *(unsigned int *)&p[14] = call_offset; cr0 = set_cr0(); sys_call_table[337] = (unsigned long *)p; set_back_cr0(cr0); return 0; }
/* * enable the Write Protection bit in CR0 register */ kern_return_t enable_wp(void) { uintptr_t cr0; // retrieve current value cr0 = get_cr0(); // add the WP bit cr0 = cr0 | CR0_WP; // and write it back set_cr0(cr0); // verify if we were successful if ((get_cr0() & CR0_WP) != 0) { return KERN_SUCCESS; } else { return KERN_FAILURE; } }
/* * enable/disable the Write Protection bit in CR0 register */ kern_return_t enable_wp(boolean_t enable) { uintptr_t cr0; // retrieve current value cr0 = get_cr0(); if (enable) { // add the WP bit cr0 = cr0 | CR0_WP; } else { // remove the WP bit cr0 = cr0 & ~CR0_WP; } // and write it back set_cr0(cr0); // verify if we were successful if (((get_cr0() & CR0_WP) != 0 && enable) || ((get_cr0() & CR0_WP) == 0 && !enable)) { return KERN_SUCCESS; } else { return KERN_FAILURE; } }
void base_cpu_init(void) { unsigned int efl, cr0; /* Initialize the processor tables. */ base_trap_init(); base_irq_init(); base_gdt_init(); base_tss_init(); /* * Setting these flags sets up alignment checking of * all memory accesses. */ efl = get_eflags(); efl |= EFL_AC; set_eflags( efl ); cr0 = get_cr0(); cr0 |= CR0_AM; set_cr0( cr0 ); }
int setup_kernel_memory(uint64_t kernmem, uint64_t p_kern_start, uint64_t p_kern_end, uint64_t p_vdo_buff_start, uint32_t *modulep) { struct kernel_mm_struct *mm = get_kernel_mm(); // Set up vma // Kernel virtual memory space if(-1 == set_kernel_memory(kernmem , kernmem - p_kern_start + p_kern_end)) { return -1; } // Video buffer memory // TODO: Check return value uint64_t vdo_start_addr = get_unmapped_area(&(mm->mmap), kernmem + p_vdo_buff_start, SIZEOF_PAGE); if(-1 == set_video_buffer_memory(vdo_start_addr, vdo_start_addr + SIZEOF_PAGE)) { return -1; } //ASCI memory uint64_t ahci_start_addr = get_unmapped_area(&(mm->mmap), kernmem, SIZEOF_PAGE); if(-1 == set_ahci_memory(ahci_start_addr, ahci_start_addr + SIZEOF_PAGE)) { return -1; } // Scan physical pages struct smap_t { uint64_t base, length; uint32_t type; }__attribute__((packed)) *smap; uint64_t phys_end_addr = 0; int lower_chunk = 0; uint64_t lower_chunk_start = 0; uint64_t lower_chunk_end = 0; while(modulep[0] != 0x9001) modulep += modulep[1]+2; for(smap = (struct smap_t*)(modulep+2); smap < (struct smap_t*)((char*)modulep + modulep[1] + 2*4); ++smap) { if (smap->type == 1 && smap->length != 0) { if(phys_end_addr < smap->base + smap->length) { phys_end_addr = smap->base + smap->length; } if(!lower_chunk) { lower_chunk_start = smap->base; lower_chunk_end = smap->base + smap->length; lower_chunk ++; } if(!new_chunk(smap->base, smap->base + smap->length)) { return -1; } } } // TODO: Check return value uint64_t phys_mem_offset = get_unmapped_area(&(mm->mmap), kernmem, phys_end_addr); if(-1 == set_phys_memory(phys_mem_offset, phys_mem_offset + phys_end_addr)) { return -1; } if(-1 == scan_all_chunks()) { return -1; } // Mark used physical pages // The first page - just like that if(0 > inc_ref_count_pages(0, SIZEOF_PAGE)) { return -1; } // Video buffer memory - is not part of chunks obtained from modulep. No // need to mark. // Kernel physical pages if(0 > inc_ref_count_pages(p_kern_start, p_kern_end)) { return -1; } // Ignore lower chunk if(0 > inc_ref_count_pages(lower_chunk_start, lower_chunk_end)) { return -1; } // Initialize free pages if(-1 == init_free_phys_page_manager()) { return -1; } /* printf("start kernel: %p\n", mm->start_kernel); printf("end kernel : %p\n", mm->end_kernel); printf("start vdo : %p\n", mm->start_vdo_buff); printf("end vdo : %p\n", mm->end_vdo_buff); printf("start phys : %p\n", mm->start_phys_mem); printf("end phys : %p\n", mm->end_phys_mem); printf("start ahci : %p\n", mm->start_ahci_mem); printf("end ahci : %p\n", mm->end_ahci_mem); */ // Set up page tables uint64_t pml4_page = get_selfref_PML4(NULL); uint64_t paddr = p_kern_start; uint64_t vaddr = kernmem; while(paddr < p_kern_end) { update_page_table_idmap(pml4_page, paddr, vaddr, PAGE_TRANS_READ_WRITE); paddr += SIZEOF_PAGE; vaddr += SIZEOF_PAGE; } // TODO: Remove user supervisor permission from video buffer update_page_table_idmap(pml4_page, p_vdo_buff_start, vdo_start_addr, PAGE_TRANS_READ_WRITE | PAGE_TRANS_USER_SUPERVISOR); update_page_table_idmap(pml4_page, P_AHCI_START, ahci_start_addr, PAGE_TRANS_READ_WRITE | PAGE_TRANS_USER_SUPERVISOR); phys_mem_offset_map(pml4_page, phys_mem_offset); // Protect read-only pages from supervisor-level writes set_cr0(get_cr0() | CR0_WP); // Set cr3 struct str_cr3 cr3 = get_default_cr3(); cr3.p_PML4E_4Kb = pml4_page >> 12; set_cr3(cr3); // Indicate memory set up done kmDeviceMemorySetUpDone(); global_video_vaddr = (void *)vdo_start_addr; set_phys_mem_virt_map_base(phys_mem_offset); return 0; }
/** * Set up the Boot Page-Directory and Tables, and enable Paging. * * This function is executed at the physicallKernel address. * Therefore it can not use global variables or switch() statements. **/ void paging_init(void) { uint index; uint loop; uint* table; // // Boot Page-Directory // uint* dir = (uint*)0x124000; index = 0; // The 1e Page-Table. dir[index++] = 0x125000 | X86_PAGE_PRESENT | X86_PAGE_WRITE; // The rest of the tables till 3GB are not present. for (loop = 0; loop < 768 - 1; loop++) dir[index++] = 0; // Not present. // The 2e Page-Table. dir[index++] = 0x126000 | X86_PAGE_PRESENT | X86_PAGE_WRITE; // The rest of the tables is not present. for (loop = 0; loop < 256 - 1; loop++) dir[index++] = 0; // // Page-Table 1. // table = (uint*)0x125000; index = 0; // Map the First 2MB (512 Pages) for (loop = 0; loop < 512; loop++) table[index++] = (PAGE_SIZE * loop) | X86_PAGE_PRESENT | X86_PAGE_WRITE; // The rest is not present. for (loop = 0; loop < 512; loop++) table[index++] = 0; // // Page-Table 2. // table = (uint*)0x126000; index = 0; // Map the First 2MB (512 Pages) for (loop = 0; loop < 512; loop++) table[index++] = (PAGE_SIZE * loop) | X86_PAGE_PRESENT | X86_PAGE_WRITE; // The rest is not present. for (loop = 0; loop < 512; loop++) table[index++] = 0; // load cr3 (Page-Directory Base Register) with the Page-Directory we are going to use, // which is the Page-Directory from the Kernel process. set_cr3((uint)dir); // // Enable paging. // set_cr0(get_cr0() | 0x80000000); // Set the paging bit. }
kern_return_t pal_efi_call_in_32bit_mode(uint32_t func, struct pal_efi_registers *efi_reg, void *stack_contents, size_t stack_contents_size, /* 16-byte multiple */ uint32_t *efi_status) { DBG("pal_efi_call_in_32bit_mode(0x%08x, %p, %p, %lu, %p)\n", func, efi_reg, stack_contents, stack_contents_size, efi_status); if (func == 0) { return KERN_INVALID_ADDRESS; } if ((efi_reg == NULL) || (stack_contents == NULL) || (stack_contents_size % 16 != 0)) { return KERN_INVALID_ARGUMENT; } if (!gPEEFISystemTable || !gPEEFIRuntimeServices) { return KERN_NOT_SUPPORTED; } DBG("pal_efi_call_in_32bit_mode() efi_reg:\n"); DBG(" rcx: 0x%016llx\n", efi_reg->rcx); DBG(" rdx: 0x%016llx\n", efi_reg->rdx); DBG(" r8: 0x%016llx\n", efi_reg->r8); DBG(" r9: 0x%016llx\n", efi_reg->r9); DBG(" rax: 0x%016llx\n", efi_reg->rax); DBG("pal_efi_call_in_32bit_mode() stack:\n"); #if PAL_DEBUG size_t i; for (i = 0; i < stack_contents_size; i += sizeof(uint32_t)) { uint32_t *p = (uint32_t *) ((uintptr_t)stack_contents + i); DBG(" %p: 0x%08x\n", p, *p); } #endif #ifdef __x86_64__ /* * Ensure no interruptions. * Taking a spinlock for serialization is technically unnecessary * because the EFIRuntime kext should serialize. */ boolean_t istate = ml_set_interrupts_enabled(FALSE); simple_lock(&pal_efi_lock); /* * Switch to special page tables with the entire high kernel space * double-mapped into the bottom 4GB. * * NB: We assume that all data passed exchanged with RuntimeServices is * located in the 4GB of KVA based at VM_MIN_ADDRESS. In particular, kexts * loaded the basement (below VM_MIN_ADDRESS) cannot pass static data. * Kernel stack and heap space is OK. */ MARK_CPU_IDLE(cpu_number()); pal_efi_saved_cr3 = get_cr3_raw(); pal_efi_saved_cr0 = get_cr0(); IDPML4[KERNEL_PML4_INDEX] = IdlePML4[KERNEL_PML4_INDEX]; IDPML4[0] = IdlePML4[KERNEL_PML4_INDEX]; clear_ts(); set_cr3_raw((uint64_t) ID_MAP_VTOP(IDPML4)); swapgs(); /* Save kernel's GS base */ /* Set segment state ready for compatibility mode */ set_gs(NULL_SEG); set_fs(NULL_SEG); set_es(KERNEL_DS); set_ds(KERNEL_DS); set_ss(KERNEL_DS); _pal_efi_call_in_32bit_mode_asm(func, efi_reg, stack_contents, stack_contents_size); /* Restore NULL segment state */ set_ss(NULL_SEG); set_es(NULL_SEG); set_ds(NULL_SEG); swapgs(); /* Restore kernel's GS base */ /* Restore the 64-bit user GS base we just destroyed */ wrmsr64(MSR_IA32_KERNEL_GS_BASE, current_cpu_datap()->cpu_uber.cu_user_gs_base); /* End of mapping games */ set_cr3_raw(pal_efi_saved_cr3); set_cr0(pal_efi_saved_cr0); MARK_CPU_ACTIVE(cpu_number()); simple_unlock(&pal_efi_lock); ml_set_interrupts_enabled(istate); #else _pal_efi_call_in_32bit_mode_asm(func, efi_reg, stack_contents, stack_contents_size); #endif *efi_status = (uint32_t)efi_reg->rax; DBG("pal_efi_call_in_32bit_mode() efi_status: 0x%x\n", *efi_status); return KERN_SUCCESS; }