/* * We can receive a page fault from a migrating PTE at any time. * Handle it by just waiting until the fault resolves. * * It's also possible to get a migrating kernel PTE that resolves * itself during the downcall from hypervisor to Linux. We just check * here to see if the PTE seems valid, and if so we retry it. * * NOTE! We MUST NOT take any locks for this case. We may be in an * interrupt or a critical region, and must do as little as possible. * Similarly, we can't use atomic ops here, since we may be handling a * fault caused by an atomic op access. * * If we find a migrating PTE while we're in an NMI context, and we're * at a PC that has a registered exception handler, we don't wait, * since this thread may (e.g.) have been interrupted while migrating * its own stack, which would then cause us to self-deadlock. */ static int handle_migrating_pte(pgd_t *pgd, int fault_num, unsigned long address, unsigned long pc, int is_kernel_mode, int write) { pud_t *pud; pmd_t *pmd; pte_t *pte; pte_t pteval; if (pgd_addr_invalid(address)) return 0; pgd += pgd_index(address); pud = pud_offset(pgd, address); if (!pud || !pud_present(*pud)) return 0; pmd = pmd_offset(pud, address); if (!pmd || !pmd_present(*pmd)) return 0; pte = pmd_huge_page(*pmd) ? ((pte_t *)pmd) : pte_offset_kernel(pmd, address); pteval = *pte; if (pte_migrating(pteval)) { if (in_nmi() && search_exception_tables(pc)) return 0; wait_for_migration(pte); return 1; } if (!is_kernel_mode || !pte_present(pteval)) return 0; if (fault_num == INT_ITLB_MISS) { if (pte_exec(pteval)) return 1; } else if (write) { if (pte_write(pteval)) return 1; } else { if (pte_read(pteval)) return 1; } return 0; }
static void fix_range(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force) { pgd_t *npgd; pud_t *npud; pmd_t *npmd; pte_t *npte; unsigned long addr, end; int r, w, x, err, fd; if(mm == NULL) return; fd = mm->context.skas.mm_fd; for(addr = start_addr; addr < end_addr;){ npgd = pgd_offset(mm, addr); if(!pgd_present(*npgd)){ if(force || pgd_newpage(*npgd)){ end = addr + PGDIR_SIZE; if(end > end_addr) end = end_addr; err = unmap(fd, (void *) addr, end - addr); if(err < 0) panic("munmap failed, errno = %d\n", -err); pgd_mkuptodate(*npgd); } addr += PGDIR_SIZE; continue; } npud = pud_offset(npgd, addr); if(!pud_present(*npud)){ if(force || pud_newpage(*npud)){ end = addr + PUD_SIZE; if(end > end_addr) end = end_addr; err = unmap(fd, (void *) addr, end - addr); if(err < 0) panic("munmap failed, errno = %d\n", -err); pud_mkuptodate(*npud); } addr += PUD_SIZE; continue; } npmd = pmd_offset(npud, addr); if(!pmd_present(*npmd)){ if(force || pmd_newpage(*npmd)){ end = addr + PMD_SIZE; if(end > end_addr) end = end_addr; err = unmap(fd, (void *) addr, end - addr); if(err < 0) panic("munmap failed, errno = %d\n", -err); pmd_mkuptodate(*npmd); } addr += PMD_SIZE; continue; } npte = pte_offset_kernel(npmd, addr); r = pte_read(*npte); w = pte_write(*npte); x = pte_exec(*npte); if(!pte_dirty(*npte)) w = 0; if(!pte_young(*npte)){ r = 0; w = 0; } if(force || pte_newpage(*npte)){ err = unmap(fd, (void *) addr, PAGE_SIZE); if(err < 0) panic("munmap failed, errno = %d\n", -err); if(pte_present(*npte)) map(fd, addr, pte_val(*npte) & PAGE_MASK, PAGE_SIZE, r, w, x); } else if(pte_newprot(*npte)) protect(fd, addr, PAGE_SIZE, r, w, x, 1); *npte = pte_mkuptodate(*npte); addr += PAGE_SIZE; } }
static void fix_range(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force) { pgd_t *npgd; pmd_t *npmd; pte_t *npte; unsigned long addr; int r, w, x, err; if((current->thread.mode.tt.extern_pid != -1) && (current->thread.mode.tt.extern_pid != os_getpid())) panic("fix_range fixing wrong address space, current = 0x%p", current); if(mm == NULL) return; for(addr=start_addr;addr<end_addr;){ if(addr == TASK_SIZE){ /* Skip over kernel text, kernel data, and physical * memory, which don't have ptes, plus kernel virtual * memory, which is flushed separately, and remap * the process stack. The only way to get here is * if (end_addr == STACK_TOP) > TASK_SIZE, which is * only true in the honeypot case. */ addr = STACK_TOP - ABOVE_KMEM; continue; } npgd = pgd_offset(mm, addr); npmd = pmd_offset(npgd, addr); if(pmd_present(*npmd)){ npte = pte_offset_kernel(npmd, addr); r = pte_read(*npte); w = pte_write(*npte); x = pte_exec(*npte); if(!pte_dirty(*npte)) w = 0; if(!pte_young(*npte)){ r = 0; w = 0; } if(force || pte_newpage(*npte)){ err = os_unmap_memory((void *) addr, PAGE_SIZE); if(err < 0) panic("munmap failed, errno = %d\n", -err); if(pte_present(*npte)) map_memory(addr, pte_val(*npte) & PAGE_MASK, PAGE_SIZE, r, w, x); } else if(pte_newprot(*npte)){ protect_memory(addr, PAGE_SIZE, r, w, x, 1); } *npte = pte_mkuptodate(*npte); addr += PAGE_SIZE; } else { if(force || pmd_newpage(*npmd)){ err = os_unmap_memory((void *) addr, PMD_SIZE); if(err < 0) panic("munmap failed, errno = %d\n", -err); pmd_mkuptodate(*npmd); } addr += PMD_SIZE; } } }