void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t orig, int fullmm) { if (tlb_type != hypervisor && pte_dirty(orig)) { unsigned long paddr, pfn = pte_pfn(orig); struct address_space *mapping; struct page *page; if (!pfn_valid(pfn)) goto no_cache_flush; page = pfn_to_page(pfn); if (PageReserved(page)) goto no_cache_flush; /* A real file page? */ mapping = page_mapping(page); if (!mapping) goto no_cache_flush; paddr = (unsigned long) page_address(page); if ((paddr ^ vaddr) & (1 << 13)) flush_dcache_page_all(mm, page); } no_cache_flush: if (!fullmm) tlb_batch_add_one(mm, vaddr, pte_exec(orig)); }
void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t orig) { struct tlb_batch *tb = &get_cpu_var(tlb_batch); unsigned long nr; vaddr &= PAGE_MASK; if (pte_exec(orig)) vaddr |= 0x1UL; if (tlb_type != hypervisor && pte_dirty(orig)) { unsigned long paddr, pfn = pte_pfn(orig); struct address_space *mapping; struct page *page; if (!pfn_valid(pfn)) goto no_cache_flush; page = pfn_to_page(pfn); if (PageReserved(page)) goto no_cache_flush; /* A real file page? */ mapping = page_mapping(page); if (!mapping) goto no_cache_flush; paddr = (unsigned long) page_address(page); if ((paddr ^ vaddr) & (1 << 13)) flush_dcache_page_all(mm, page); } no_cache_flush: /* if (tb->fullmm) { put_cpu_var(tlb_batch); return; } */ nr = tb->tlb_nr; if (unlikely(nr != 0 && mm != tb->mm)) { flush_tlb_pending(); nr = 0; } if (nr == 0) tb->mm = mm; tb->vaddrs[nr] = vaddr; tb->tlb_nr = ++nr; if (nr >= TLB_BATCH_NR) flush_tlb_pending(); put_cpu_var(tlb_batch); }
void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { pmd_t orig = *pmdp; *pmdp = pmd; if (mm == &init_mm) return; if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { /* * Note that this routine only sets pmds for THP pages. * Hugetlb pages are handled elsewhere. We need to check * for huge zero page. Huge zero pages are like hugetlb * pages in that there is no RSS, but there is the need * for TSB entries. So, huge zero page counts go into * hugetlb_pte_count. */ if (pmd_val(pmd) & _PAGE_PMD_HUGE) { if (is_huge_zero_page(pmd_page(pmd))) mm->context.hugetlb_pte_count++; else mm->context.thp_pte_count++; } else { if (is_huge_zero_page(pmd_page(orig))) mm->context.hugetlb_pte_count--; else mm->context.thp_pte_count--; } /* Do not try to allocate the TSB hash table if we * don't have one already. We have various locks held * and thus we'll end up doing a GFP_KERNEL allocation * in an atomic context. * * Instead, we let the first TLB miss on a hugepage * take care of this. */ } if (!pmd_none(orig)) { addr &= HPAGE_MASK; if (pmd_trans_huge(orig)) { pte_t orig_pte = __pte(pmd_val(orig)); bool exec = pte_exec(orig_pte); tlb_batch_add_one(mm, addr, exec, REAL_HPAGE_SHIFT); tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec, REAL_HPAGE_SHIFT); } else { tlb_batch_pmd_scan(mm, addr, orig); } } }
/* * We can receive a page fault from a migrating PTE at any time. * Handle it by just waiting until the fault resolves. * * It's also possible to get a migrating kernel PTE that resolves * itself during the downcall from hypervisor to Linux. We just check * here to see if the PTE seems valid, and if so we retry it. * * NOTE! We MUST NOT take any locks for this case. We may be in an * interrupt or a critical region, and must do as little as possible. * Similarly, we can't use atomic ops here, since we may be handling a * fault caused by an atomic op access. * * If we find a migrating PTE while we're in an NMI context, and we're * at a PC that has a registered exception handler, we don't wait, * since this thread may (e.g.) have been interrupted while migrating * its own stack, which would then cause us to self-deadlock. */ static int handle_migrating_pte(pgd_t *pgd, int fault_num, unsigned long address, unsigned long pc, int is_kernel_mode, int write) { pud_t *pud; pmd_t *pmd; pte_t *pte; pte_t pteval; if (pgd_addr_invalid(address)) return 0; pgd += pgd_index(address); pud = pud_offset(pgd, address); if (!pud || !pud_present(*pud)) return 0; pmd = pmd_offset(pud, address); if (!pmd || !pmd_present(*pmd)) return 0; pte = pmd_huge_page(*pmd) ? ((pte_t *)pmd) : pte_offset_kernel(pmd, address); pteval = *pte; if (pte_migrating(pteval)) { if (in_nmi() && search_exception_tables(pc)) return 0; wait_for_migration(pte); return 1; } if (!is_kernel_mode || !pte_present(pteval)) return 0; if (fault_num == INT_ITLB_MISS) { if (pte_exec(pteval)) return 1; } else if (write) { if (pte_write(pteval)) return 1; } else { if (pte_read(pteval)) return 1; } return 0; }
static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr, pmd_t pmd) { unsigned long end; pte_t *pte; pte = pte_offset_map(&pmd, vaddr); end = vaddr + HPAGE_SIZE; while (vaddr < end) { if (pte_val(*pte) & _PAGE_VALID) { bool exec = pte_exec(*pte); tlb_batch_add_one(mm, vaddr, exec); } pte++; vaddr += PAGE_SIZE; } pte_unmap(pte); }
void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { pmd_t orig = *pmdp; *pmdp = pmd; if (mm == &init_mm) return; if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { if (pmd_val(pmd) & _PAGE_PMD_HUGE) mm->context.huge_pte_count++; else mm->context.huge_pte_count--; /* Do not try to allocate the TSB hash table if we * don't have one already. We have various locks held * and thus we'll end up doing a GFP_KERNEL allocation * in an atomic context. * * Instead, we let the first TLB miss on a hugepage * take care of this. */ } if (!pmd_none(orig)) { addr &= HPAGE_MASK; if (pmd_trans_huge(orig)) { pte_t orig_pte = __pte(pmd_val(orig)); bool exec = pte_exec(orig_pte); tlb_batch_add_one(mm, addr, exec); tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec); } else { tlb_batch_pmd_scan(mm, addr, orig); } } }
/* check the permissions of a address and return its type - */ static int memory_check_addr_perm_task(const void *addr, word *size, int write, byte *read_only, byte *executable, struct task_struct *task) { struct vm_area_struct *vma; word start = ROUNDDOWN((word)addr, PAGE_SIZE); word end = ROUNDUP((word)addr + *size, PAGE_SIZE); word total_size = 0; byte local_read_only = 0; byte local_executable = 0; int ret = ADDR_UNDEF; int atomic; #ifdef HAS_LOOKUP_ADDRESS pte_t *pte; unsigned int level; #endif if (NULL == read_only) { read_only = &local_read_only; } if (NULL == executable) { executable = &local_executable; } *read_only = 0; *executable = 0; atomic = in_atomic(); if (!atomic) { down_read(&task->mm->mmap_sem); } while (start < end) { if (task && task->mm) { /* check if it's a user address */ vma = find_vma(task->mm, start); if (vma && vma->vm_start <= start) { if (ret != ADDR_UNDEF && ret != ADDR_OUTSIDE) { goto end; } if (!(vma->vm_flags & VM_READ)) { goto end; } if (!(vma->vm_flags & VM_WRITE)) { if (write) { /* no more writable bytes */ goto end; } else if (ret != ADDR_UNDEF && !(*read_only)) { /* the permissions has changed. this is where we stop the buffer */ goto end; } *read_only = 1; } start = vma->vm_end; total_size = start - (word)addr; ret = ADDR_OUTSIDE; continue; } } /* check if it's a kernel virtual address */ #ifdef HAS_LOOKUP_ADDRESS pte = lookup_address((unsigned long)addr, &level); if (NULL == pte) { goto end; } if (ret == ADDR_UNDEF) { *executable = pte_exec(*pte); } if (pte_present(*pte)) { if (ret != ADDR_UNDEF && ret != ADDR_INSIDE) { goto end; } if (!pte_write(*pte)) { if (write) { /* no more writable bytes */ goto end; } else if (ret != ADDR_UNDEF && !(*read_only)) { /* the permissions has changed. this is where we stop the buffer */ goto end; } *read_only = 1; } start += PAGE_SIZE; total_size = start - (word)addr; ret = ADDR_INSIDE; continue; } goto end; #else if (ret != ADDR_UNDEF && ret != ADDR_INSIDE) { goto end; } if ( start >= PAGE_OFFSET || (start >= MODULES_VADDR && start < MODULES_END) || (start >= VMALLOC_START && start < VMALLOC_END)) { /* this is not totally safe. but it's enough for now. */ *executable = 1; start += PAGE_SIZE; total_size = start - (word)addr; ret = ADDR_INSIDE; continue; } goto end; #endif } end: if (!atomic) { up_read(&task->mm->mmap_sem); } if (total_size) { if (total_size < *size) { *size = total_size; } return ret; } else { return ADDR_UNDEF; } }
static void fix_range(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force) { pgd_t *npgd; pud_t *npud; pmd_t *npmd; pte_t *npte; unsigned long addr, end; int r, w, x, err, fd; if(mm == NULL) return; fd = mm->context.skas.mm_fd; for(addr = start_addr; addr < end_addr;){ npgd = pgd_offset(mm, addr); if(!pgd_present(*npgd)){ if(force || pgd_newpage(*npgd)){ end = addr + PGDIR_SIZE; if(end > end_addr) end = end_addr; err = unmap(fd, (void *) addr, end - addr); if(err < 0) panic("munmap failed, errno = %d\n", -err); pgd_mkuptodate(*npgd); } addr += PGDIR_SIZE; continue; } npud = pud_offset(npgd, addr); if(!pud_present(*npud)){ if(force || pud_newpage(*npud)){ end = addr + PUD_SIZE; if(end > end_addr) end = end_addr; err = unmap(fd, (void *) addr, end - addr); if(err < 0) panic("munmap failed, errno = %d\n", -err); pud_mkuptodate(*npud); } addr += PUD_SIZE; continue; } npmd = pmd_offset(npud, addr); if(!pmd_present(*npmd)){ if(force || pmd_newpage(*npmd)){ end = addr + PMD_SIZE; if(end > end_addr) end = end_addr; err = unmap(fd, (void *) addr, end - addr); if(err < 0) panic("munmap failed, errno = %d\n", -err); pmd_mkuptodate(*npmd); } addr += PMD_SIZE; continue; } npte = pte_offset_kernel(npmd, addr); r = pte_read(*npte); w = pte_write(*npte); x = pte_exec(*npte); if(!pte_dirty(*npte)) w = 0; if(!pte_young(*npte)){ r = 0; w = 0; } if(force || pte_newpage(*npte)){ err = unmap(fd, (void *) addr, PAGE_SIZE); if(err < 0) panic("munmap failed, errno = %d\n", -err); if(pte_present(*npte)) map(fd, addr, pte_val(*npte) & PAGE_MASK, PAGE_SIZE, r, w, x); } else if(pte_newprot(*npte)) protect(fd, addr, PAGE_SIZE, r, w, x, 1); *npte = pte_mkuptodate(*npte); addr += PAGE_SIZE; } }
static void fix_range(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force) { pgd_t *npgd; pmd_t *npmd; pte_t *npte; unsigned long addr; int r, w, x, err; if((current->thread.mode.tt.extern_pid != -1) && (current->thread.mode.tt.extern_pid != os_getpid())) panic("fix_range fixing wrong address space, current = 0x%p", current); if(mm == NULL) return; for(addr=start_addr;addr<end_addr;){ if(addr == TASK_SIZE){ /* Skip over kernel text, kernel data, and physical * memory, which don't have ptes, plus kernel virtual * memory, which is flushed separately, and remap * the process stack. The only way to get here is * if (end_addr == STACK_TOP) > TASK_SIZE, which is * only true in the honeypot case. */ addr = STACK_TOP - ABOVE_KMEM; continue; } npgd = pgd_offset(mm, addr); npmd = pmd_offset(npgd, addr); if(pmd_present(*npmd)){ npte = pte_offset_kernel(npmd, addr); r = pte_read(*npte); w = pte_write(*npte); x = pte_exec(*npte); if(!pte_dirty(*npte)) w = 0; if(!pte_young(*npte)){ r = 0; w = 0; } if(force || pte_newpage(*npte)){ err = os_unmap_memory((void *) addr, PAGE_SIZE); if(err < 0) panic("munmap failed, errno = %d\n", -err); if(pte_present(*npte)) map_memory(addr, pte_val(*npte) & PAGE_MASK, PAGE_SIZE, r, w, x); } else if(pte_newprot(*npte)){ protect_memory(addr, PAGE_SIZE, r, w, x, 1); } *npte = pte_mkuptodate(*npte); addr += PAGE_SIZE; } else { if(force || pmd_newpage(*npmd)){ err = os_unmap_memory((void *) addr, PMD_SIZE); if(err < 0) panic("munmap failed, errno = %d\n", -err); pmd_mkuptodate(*npmd); } addr += PMD_SIZE; } } }