/*H:480 * (vi) Mapping the Switcher when the Guest is about to run. * * The Switcher and the two pages for this CPU need to be visible in the Guest * (and not the pages for other CPUs). * * The pages for the pagetables have all been allocated before: we just need * to make sure the actual PTEs are up-to-date for the CPU we're about to run * on. */ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) { unsigned long base; struct page *percpu_switcher_page, *regs_page; pte_t *pte; struct pgdir *pgdir = &cpu->lg->pgdirs[cpu->cpu_pgd]; /* Switcher page should always be mapped by now! */ BUG_ON(!pgdir->switcher_mapped); /* * Remember that we have two pages for each Host CPU, so we can run a * Guest on each CPU without them interfering. We need to make sure * those pages are mapped correctly in the Guest, but since we usually * run on the same CPU, we cache that, and only update the mappings * when we move. */ if (pgdir->last_host_cpu == raw_smp_processor_id()) return; /* -1 means unknown so we remove everything. */ if (pgdir->last_host_cpu == -1) { unsigned int i; for_each_possible_cpu(i) remove_switcher_percpu_map(cpu, i); } else { /* We know exactly what CPU mapping to remove. */ remove_switcher_percpu_map(cpu, pgdir->last_host_cpu); } /* * When we're running the Guest, we want the Guest's "regs" page to * appear where the first Switcher page for this CPU is. This is an * optimization: when the Switcher saves the Guest registers, it saves * them into the first page of this CPU's "struct lguest_pages": if we * make sure the Guest's register page is already mapped there, we * don't have to copy them out again. */ /* Find the shadow PTE for this regs page. */ base = switcher_addr + PAGE_SIZE + raw_smp_processor_id() * sizeof(struct lguest_pages); pte = find_spte(cpu, base, false, 0, 0); regs_page = pfn_to_page(__pa(cpu->regs_page) >> PAGE_SHIFT); get_page(regs_page); set_pte(pte, mk_pte(regs_page, __pgprot(__PAGE_KERNEL & ~_PAGE_GLOBAL))); /* * We map the second page of the struct lguest_pages read-only in * the Guest: the IDT, GDT and other things it's not supposed to * change. */ pte = find_spte(cpu, base + PAGE_SIZE, false, 0, 0); percpu_switcher_page = lg_switcher_pages[1 + raw_smp_processor_id()*2 + 1]; get_page(percpu_switcher_page); set_pte(pte, mk_pte(percpu_switcher_page, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL))); pgdir->last_host_cpu = raw_smp_processor_id(); }
static int __change_page_attr(struct page *page, pgprot_t prot) { pte_t *kpte; unsigned long address; struct page *kpte_page; BUG_ON(PageHighMem(page)); address = (unsigned long)page_address(page); kpte = lookup_address(address); if (!kpte) return -EINVAL; kpte_page = virt_to_page(kpte); if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { if ((pte_val(*kpte) & _PAGE_PSE) == 0) { set_pte_atomic(kpte, mk_pte(page, prot)); } else { pgprot_t ref_prot; struct page *split; ref_prot = ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) ? PAGE_KERNEL_EXEC : PAGE_KERNEL; split = split_large_page(address, prot, ref_prot); if (!split) return -ENOMEM; set_pmd_pte(kpte,address,mk_pte(split, ref_prot)); kpte_page = split; } page_private(kpte_page)++; } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); BUG_ON(page_private(kpte_page) == 0); page_private(kpte_page)--; } else BUG(); /* * If the pte was reserved, it means it was created at boot * time (not via split_large_page) and in turn we must not * replace it with a largepage. */ if (!PageReserved(kpte_page)) { if (cpu_has_pse && (page_private(kpte_page) == 0)) { ClearPagePrivate(kpte_page); list_add(&kpte_page->lru, &df_list); revert_page(kpte_page, address); } } return 0; }
/* * This routine is used to map in a page into an address space: needed by * execve() for the initial stack and environment pages. */ unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address) { pgd_t * pgd; pmd_t * pmd; pte_t * pte; if (page >= high_memory) printk("put_dirty_page: trying to put page %08lx at %08lx\n",page,address); if (mem_map[MAP_NR(page)].count != 1) printk("mem_map disagrees with %08lx at %08lx\n",page,address); pgd = pgd_offset(tsk->mm,address); pmd = pmd_alloc(pgd, address); if (!pmd) { free_page(page); oom(tsk); return 0; } pte = pte_alloc(pmd, address); if (!pte) { free_page(page); oom(tsk); return 0; } if (!pte_none(*pte)) { printk("put_dirty_page: page already exists\n"); free_page(page); return 0; } flush_page_to_ram(page); set_pte(pte, pte_mkwrite(pte_mkdirty(mk_pte(page, PAGE_COPY)))); /* no need for invalidate */ return page; }
static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, int *nr) { pte_t *pte; /* * nr is a running index into the array which helps higher level * callers keep track of where we're up to. */ pte = pte_alloc_kernel(pmd, addr); if (!pte) return -ENOMEM; do { struct page *page = pages[*nr]; if (WARN_ON(!pte_none(*pte))) return -EBUSY; if (WARN_ON(!page)) return -ENOMEM; set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); (*nr)++; } while (pte++, addr += PAGE_SIZE, addr != end); return 0; }
/* * page not present ... go through shm_pages */ static unsigned long shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share) { pte_t pte; struct shmid_kernel *shp; unsigned int id, idx; id = SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK; idx = (address - shmd->vm_start + shmd->vm_offset) >> PAGE_SHIFT; #ifdef DEBUG_SHM if (id > max_shmid) { printk ("shm_nopage: id=%d too big. proc mem corrupted\n", id); return 0; } #endif shp = shm_segs[id]; #ifdef DEBUG_SHM if (shp == IPC_UNUSED || shp == IPC_NOID) { printk ("shm_nopage: id=%d invalid. Race.\n", id); return 0; } #endif /* This can occur on a remap */ if (idx >= shp->shm_npages) { return 0; } pte = __pte(shp->shm_pages[idx]); if (!pte_present(pte)) { unsigned long page = get_free_page(GFP_USER); if (!page) return -1; pte = __pte(shp->shm_pages[idx]); if (pte_present(pte)) { free_page (page); /* doesn't sleep */ goto done; } if (!pte_none(pte)) { rw_swap_page_nocache(READ, pte_val(pte), (char *)page); pte = __pte(shp->shm_pages[idx]); if (pte_present(pte)) { free_page (page); /* doesn't sleep */ goto done; } swap_free(pte_val(pte)); shm_swp--; } shm_rss++; pte = pte_mkdirty(mk_pte(page, PAGE_SHARED)); shp->shm_pages[idx] = pte_val(pte); } else --current->maj_flt; /* was incremented in do_no_page */ done: /* pte_val(pte) == shp->shm_pages[idx] */ current->min_flt++; atomic_inc(&mem_map[MAP_NR(pte_page(pte))].count); return pte_page(pte); }
/* * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock */ static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, pte_t *page_table) { flush_page_to_ram(new_page); flush_cache_page(vma, address); establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)))); }
/* * Trying to stop swapping from a file is fraught with races, so * we repeat quite a bit here when we have to pause. swapoff() * isn't exactly timing-critical, so who cares (but this is /really/ * inefficient, ugh). * * We return 1 after having slept, which makes the process start over * from the beginning for this process.. */ static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address, pte_t *dir, unsigned int type, unsigned long page) { pte_t pte = *dir; if (pte_none(pte)) return 0; if (pte_present(pte)) { unsigned long page_nr = MAP_NR(pte_page(pte)); if (page_nr >= MAP_NR(high_memory)) return 0; if (!in_swap_cache(page_nr)) return 0; if (SWP_TYPE(in_swap_cache(page_nr)) != type) return 0; delete_from_swap_cache(page_nr); set_pte(dir, pte_mkdirty(pte)); return 0; } if (SWP_TYPE(pte_val(pte)) != type) return 0; read_swap_page(pte_val(pte), (char *) page); #if 0 /* Is this really needed here, hasn't it been solved elsewhere? */ flush_page_to_ram(page); #endif if (pte_val(*dir) != pte_val(pte)) { free_page(page); return 1; } set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)))); flush_tlb_page(vma, address); ++vma->vm_mm->rss; swap_free(pte_val(pte)); return 1; }
/* * Map the 32bit vsyscall page on demand. * * RED-PEN: This knows too much about high level VM. * * Alternative would be to generate a vma with appropriate backing options * and let it be handled by generic VM. */ int __map_syscall32(struct mm_struct *mm, unsigned long address) { pgd_t *pgd; pud_t *pud; pte_t *pte; pmd_t *pmd; int err = -ENOMEM; spin_lock(&mm->page_table_lock); pgd = pgd_offset(mm, address); pud = pud_alloc(mm, pgd, address); if (pud) { pmd = pmd_alloc(mm, pud, address); if (pmd && (pte = pte_alloc_map(mm, pmd, address)) != NULL) { if (pte_none(*pte)) { set_pte(pte, mk_pte(virt_to_page(syscall32_page), PAGE_KERNEL_VSYSCALL32)); } /* Flush only the local CPU. Other CPUs taking a fault will just end up here again This probably not needed and just paranoia. */ __flush_tlb_one(address); err = 0; } } spin_unlock(&mm->page_table_lock); return err; }
/** * 建立临时内核映射 * type和CPU共同确定用哪个固定映射的线性地址映射请求页。 */ void *kmap_atomic(struct page *page, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ inc_preempt_count(); /** * 如果被映射的页不属于高端内存,当然用不着映射。直接返回线性地址就行了。 */ if (!PageHighMem(page)) return page_address(page); /** * 通过type和CPU确定线性地址。 */ idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); #ifdef CONFIG_DEBUG_HIGHMEM if (!pte_none(*(kmap_pte-idx))) BUG(); #endif /** * 将线性地址与页表项建立映射。 */ set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); /** * 当然,最后必须刷新一下TLB。然后才能返回线性地址。 */ __flush_tlb_one(vaddr); return (void*) vaddr; }
int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot) { int error = 0; pgd_t * dir; unsigned long beg = address; unsigned long end = address + size; pte_t zero_pte; zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE, prot)); dir = pgd_offset(current->mm, address); flush_cache_range(current->mm, beg, end); while (address < end) { pmd_t *pmd = pmd_alloc(dir, address); error = -ENOMEM; if (!pmd) break; error = zeromap_pmd_range(pmd, address, end - address, zero_pte); if (error) break; address = (address + PGDIR_SIZE) & PGDIR_MASK; #ifdef CONFIG_BESTA if (!address) break; /* unsigned overflow */ #endif dir++; } flush_tlb_range(current->mm, beg, end); return error; }
void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte) { unsigned int idx, cpu = smp_processor_id(); int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu); unsigned long vaddr, flags; pte_t pte, *ptep; idx = KM_L1_CACHE + KM_TYPE_NR * cpu; vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); ptep = TOP_PTE(vaddr); pte = mk_pte(page, kmap_prot); BUG_ON(pte_val(*ptep) != pte_val(pte)); BUG_ON(*depth <= 0); raw_local_irq_save(flags); (*depth)--; if (*depth != 0 && pte_val(pte) != pte_val(saved_pte)) { set_pte_ext(ptep, saved_pte, 0); local_flush_tlb_kernel_page(vaddr); } raw_local_irq_restore(flags); if (!in_interrupt()) preempt_enable(); }
void *kmap_atomic(struct page *page, enum km_type type) { unsigned long idx; unsigned long vaddr; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ pagefault_disable(); if (!PageHighMem(page)) return page_address(page); debug_kmap_atomic(type); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); /* XXX Fix - Anton */ #if 0 __flush_cache_one(vaddr); #else flush_cache_all(); #endif #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(!pte_none(*(kmap_pte-idx))); #endif set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); /* XXX Fix - Anton */ #if 0 __flush_tlb_one(vaddr); #else flush_tlb_all(); #endif return (void*) vaddr; }
static int init_stub_pte(struct mm_struct *mm, unsigned long proc, unsigned long kernel) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; pgd = pgd_offset(mm, proc); pud = pud_alloc(mm, pgd, proc); if (!pud) goto out; pmd = pmd_alloc(mm, pud, proc); if (!pmd) goto out_pmd; pte = pte_alloc_map(mm, NULL, pmd, proc); if (!pte) goto out_pte; *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); *pte = pte_mkread(*pte); return 0; out_pte: pmd_free(mm, pmd); out_pmd: pud_free(mm, pud); out: return -ENOMEM; }
void *kmap_atomic(struct page *page, enum km_type type) { unsigned int idx; unsigned long vaddr; void *kmap; pagefault_disable(); if (!PageHighMem(page)) return page_address(page); debug_kmap_atomic(type); kmap = kmap_high_get(page); if (kmap) return kmap; idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); #ifdef CONFIG_DEBUG_HIGHMEM /* * With debugging enabled, kunmap_atomic forces that entry to 0. * Make sure it was indeed properly unmapped. */ BUG_ON(!pte_none(*(TOP_PTE(vaddr)))); #endif set_pte_ext(TOP_PTE(vaddr), mk_pte(page, kmap_prot), 0); /* * When debugging is off, kunmap_atomic leaves the previous mapping * in place, so this TLB flush ensures the TLB is updated with the * new mapping. */ local_flush_tlb_kernel_page(vaddr); return (void *)vaddr; }
/*H:501 * We do need the Switcher code mapped at all times, so we allocate that * part of the Guest page table here. We map the Switcher code immediately, * but defer mapping of the guest register page and IDT/LDT etc page until * just before we run the guest in map_switcher_in_guest(). * * We *could* do this setup in map_switcher_in_guest(), but at that point * we've interrupts disabled, and allocating pages like that is fraught: we * can't sleep if we need to free up some memory. */ static bool allocate_switcher_mapping(struct lg_cpu *cpu) { int i; for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { pte_t *pte = find_spte(cpu, switcher_addr + i * PAGE_SIZE, true, CHECK_GPGD_MASK, _PAGE_TABLE); if (!pte) return false; /* * Map the switcher page if not already there. It might * already be there because we call allocate_switcher_mapping() * in guest_set_pgd() just in case it did discard our Switcher * mapping, but it probably didn't. */ if (i == 0 && !(pte_flags(*pte) & _PAGE_PRESENT)) { /* Get a reference to the Switcher page. */ get_page(lg_switcher_pages[0]); /* Create a read-only, exectuable, kernel-style PTE */ set_pte(pte, mk_pte(lg_switcher_pages[0], PAGE_KERNEL_RX)); } } cpu->lg->pgdirs[cpu->cpu_pgd].switcher_mapped = true; return true; }
void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte) { unsigned int idx, cpu = smp_processor_id(); int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu); unsigned long vaddr, flags; pte_t pte, *ptep; idx = KM_L1_CACHE + KM_TYPE_NR * cpu; vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); ptep = TOP_PTE(vaddr); pte = mk_pte(page, kmap_prot); if (!in_interrupt()) preempt_disable(); raw_local_irq_save(flags); (*depth)++; if (pte_val(*ptep) == pte_val(pte)) { *saved_pte = pte; } else { *saved_pte = *ptep; set_pte_ext(ptep, pte, 0); local_flush_tlb_kernel_page(vaddr); } raw_local_irq_restore(flags); return (void *)vaddr; }
/* * maps a range of vmalloc()ed memory into the requested pages. the old * mappings are removed. */ static inline void vmap_pte_range (pte_t *pte, unsigned long address, unsigned long size, unsigned long vaddr) { unsigned long end; pgd_t *vdir; pmd_t *vpmd; pte_t *vpte; address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; do { pte_t oldpage = *pte; struct page * page; pte_clear(pte); vdir = pgd_offset_k (vaddr); vpmd = pmd_offset (vdir, vaddr); vpte = pte_offset (vpmd, vaddr); page = pte_page (*vpte); set_pte(pte, mk_pte(page, PAGE_USERIO)); forget_pte(oldpage); address += PAGE_SIZE; vaddr += PAGE_SIZE; pte++; } while (address < end); }
/*H:520 * Setting up the Switcher PTE page for given CPU is fairly easy, given * the CPU number and the "struct page"s for the Switcher code itself. * * Currently the Switcher is less than a page long, so "pages" is always 1. */ static __init void populate_switcher_pte_page(unsigned int cpu, struct page *switcher_page[], unsigned int pages) { unsigned int i; pte_t *pte = switcher_pte_page(cpu); /* The first entries are easy: they map the Switcher code. */ for (i = 0; i < pages; i++) { set_pte(&pte[i], mk_pte(switcher_page[i], __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); } /* The only other thing we map is this CPU's pair of pages. */ i = pages + cpu*2; /* First page (Guest registers) is writable from the Guest */ set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]), __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW))); /* * The second page contains the "struct lguest_ro_state", and is * read-only. */ set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]), __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); }
/* * No need to decide whether this PTE shares the swap entry with others, * just let do_wp_page work it out if a write is requested later - to * force COW, vm_page_prot omits write permission from any private vma. */ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, swp_entry_t entry, struct page *page) { spinlock_t *ptl; pte_t *pte; int ret = 1; if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) ret = -ENOMEM; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { if (ret > 0) mem_cgroup_uncharge_page(page); ret = 0; goto out; } inc_mm_counter(vma->vm_mm, anon_rss); get_page(page); set_pte_at(vma->vm_mm, addr, pte, pte_mkold(mk_pte(page, vma->vm_page_prot))); page_add_anon_rmap(page, vma, addr); swap_free(entry); /* * Move the page to the active list so it is not * immediately swapped out again after swapon. */ activate_page(page); out: pte_unmap_unlock(pte, ptl); return ret; }
void *kmap_atomic(struct page *page) { unsigned long vaddr; long idx, type; preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); /* XXX Fix - Anton */ #if 0 __flush_cache_one(vaddr); #else flush_cache_all(); #endif #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(!pte_none(*(kmap_pte-idx))); #endif set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); /* XXX Fix - Anton */ #if 0 __flush_tlb_one(vaddr); #else flush_tlb_all(); #endif return (void*) vaddr; }
static int __change_page_attr(struct page *page, pgprot_t prot) { pte_t *kpte; unsigned long address; struct page *kpte_page; BUG_ON(PageHighMem(page)); address = (unsigned long)page_address(page); kpte = lookup_address(address); if (!kpte) return -EINVAL; kpte_page = virt_to_page(kpte); if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { if ((pte_val(*kpte) & _PAGE_PSE) == 0) { set_pte_atomic(kpte, mk_pte(page, prot)); } else { struct page *split = split_large_page(address, prot); if (!split) return -ENOMEM; set_pmd_pte(kpte,address,mk_pte(split, PAGE_KERNEL)); kpte_page = split; } get_page(kpte_page); } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); __put_page(kpte_page); } else BUG(); /* * If the pte was reserved, it means it was created at boot * time (not via split_large_page) and in turn we must not * replace it with a largepage. */ if (!PageReserved(kpte_page)) { /* memleak and potential failed 2M page regeneration */ BUG_ON(!page_count(kpte_page)); if (cpu_has_pse && (page_count(kpte_page) == 1)) { list_add(&kpte_page->lru, &df_list); revert_page(kpte_page, address); } } return 0; }
/* this routine handles present pages, when users try to write to a shared page. */ void do_wp_page(struct vm_area_struct *vma, unsigned long address, int write_access) { pgd_t *pgd; pmd_t *pmd; pte_t *page_table,pte; unsigned long old_page, new_page; new_page = get_free_page(GFP_KERNEL); pgd = pgd_offset(vma->vm_task, address); if(pgd_none(*pgd)) goto end_wp_page; if(pgd_bad(*pgd)) goto bad_wp_page; pmd = pmd_offset(pgd,address); if(pmd_none(*pmd)) goto end_wp_page; if(pmd_bad(*pmd)) goto bad_wp_page; page_table = pte_offset(pmd,address); pte = *page_table; if(!pte_present(pte)) goto end_wp_page; if(pte_write(pte)) goto end_wp_page; old_page = pte_page(pte); if(old_page >= main_memory_end) goto bad_wp_page; (vma->vm_task->mm->min_flt)++; if(mem_map[MAP_NR(old_page)].flags & PAGE_PRESENT) { if(new_page) { if(mem_map[MAP_NR(old_page)].flags & MAP_PAGE_RESERVED) ++(vma->vm_task->mm->rss); copy_page(old_page, new_page); *page_table = pte_mkwrite(pte_mkdirty(mk_pte((unsigned long)&new_page, vma->vm_page_prot))); free_page(old_page); return; } pte_val(*page_table) &= PAGE_BAD; free_page(old_page); oom(); return; } *page_table = pte_mkdirty(pte_mkwrite(pte)); if(new_page) free_page(new_page); return; bad_wp_page: printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page); goto end_wp_page; end_wp_page: if(new_page) free_page(new_page); return; }
pte_t __bad_page(void) { extern char empty_bad_page[PAGE_SIZE]; unsigned long page = (unsigned long)empty_bad_page; clear_page(page); return pte_mkdirty(mk_pte(page, PAGE_SHARED)); }
/* * This only needs the MM semaphore */ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr) { struct page *page = NULL; pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); if (write_access) { page = alloc_page(GFP_HIGHUSER); if (!page) return -1; clear_user_highpage(page, addr); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); mm->rss++; flush_page_to_ram(page); } set_pte(page_table, entry); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, addr, entry); return 1; /* Minor fault */ }
static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, void *data) { struct page *page = virt_to_page(addr); pgprot_t prot = *(pgprot_t *)data; set_pte(pte, mk_pte(page, prot)); return 0; }
static void * __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, const void *caller) { struct arm_vmregion *c; size_t align; int bit; if (!consistent_pte) { printk(KERN_ERR "%s: not initialised\n", __func__); dump_stack(); return NULL; } /* * Align the virtual region allocation - maximum alignment is * a section size, minimum is a page size. This helps reduce * fragmentation of the DMA space, and also prevents allocations * smaller than a section from crossing a section boundary. */ bit = fls(size - 1); if (bit > SECTION_SHIFT) bit = SECTION_SHIFT; align = 1 << bit; /* * Allocate a virtual address in the consistent mapping region. */ c = arm_vmregion_alloc(&consistent_head, align, size, gfp & ~(__GFP_DMA | __GFP_HIGHMEM), caller); if (c) { pte_t *pte; int idx = CONSISTENT_PTE_INDEX(c->vm_start); u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); pte = consistent_pte[idx] + off; c->vm_pages = page; do { BUG_ON(!pte_none(*pte)); set_pte_ext(pte, mk_pte(page, prot), 0); page++; pte++; off++; if (off >= PTRS_PER_PTE) { off = 0; pte = consistent_pte[++idx]; } } while (size -= PAGE_SIZE); dsb(); return (void *)c->vm_start; } return NULL; }
/** * __replace_page - replace page in vma by new page. * based on replace_page in mm/ksm.c * * @vma: vma that holds the pte pointing to page * @addr: address the old @page is mapped at * @page: the cowed page we are replacing by kpage * @kpage: the modified page we replace page by * * Returns 0 on success, -EFAULT on failure. */ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, struct page *old_page, struct page *new_page) { struct mm_struct *mm = vma->vm_mm; spinlock_t *ptl; pte_t *ptep; int err; /* For mmu_notifiers */ const unsigned long mmun_start = addr; const unsigned long mmun_end = addr + PAGE_SIZE; struct mem_cgroup *memcg; err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg, false); if (err) return err; /* For try_to_free_swap() and munlock_vma_page() below */ lock_page(old_page); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); err = -EAGAIN; ptep = page_check_address(old_page, mm, addr, &ptl, 0); if (!ptep) { mem_cgroup_cancel_charge(new_page, memcg, false); goto unlock; } get_page(new_page); page_add_new_anon_rmap(new_page, vma, addr, false); mem_cgroup_commit_charge(new_page, memcg, false, false); lru_cache_add_active_or_unevictable(new_page, vma); if (!PageAnon(old_page)) { dec_mm_counter(mm, mm_counter_file(old_page)); inc_mm_counter(mm, MM_ANONPAGES); } flush_cache_page(vma, addr, pte_pfn(*ptep)); ptep_clear_flush_notify(vma, addr, ptep); set_pte_at_notify(mm, addr, ptep, mk_pte(new_page, vma->vm_page_prot)); page_remove_rmap(old_page, false); if (!page_mapped(old_page)) try_to_free_swap(old_page); pte_unmap_unlock(ptep, ptl); if (vma->vm_flags & VM_LOCKED) munlock_vma_page(old_page); put_page(old_page); err = 0; unlock: mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); unlock_page(old_page); return err; }
/* * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock */ static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, pte_t *page_table) { flush_page_to_ram(new_page); #ifndef CONFIG_SUPERH /* Not needed for VIPT cache (need better API for caches) */ flush_cache_page(vma, address); #endif establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)))); }
/* * We are called with the MM semaphore and page_table_lock * spinlock held to protect against concurrent faults in * multithreaded programs. */ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr) { pte_t entry; /* Read-only mapping of ZERO_PAGE. */ entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); /* ..except if it's a write access */ if (write_access) { struct page *page; /* Allocate our own private page. */ spin_unlock(&mm->page_table_lock); page = alloc_page(GFP_HIGHUSER); if (!page) goto no_mem; clear_user_highpage(page, addr); spin_lock(&mm->page_table_lock); if (!pte_none(*page_table)) { page_cache_release(page); spin_unlock(&mm->page_table_lock); return 1; } mm->rss++; flush_page_to_ram(page); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); lru_cache_add(page); mark_page_accessed(page); } set_pte(page_table, entry); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, addr, entry); spin_unlock(&mm->page_table_lock); return 1; /* Minor fault */ no_mem: return -1; }
pte_t __bad_page(void) { extern char empty_bad_page[PAGE_SIZE]; __asm__ __volatile__("cld ; rep ; stosl": :"a" (0), "D" ((long) empty_bad_page), "c" (PAGE_SIZE/4) :"di","cx"); return pte_mkdirty(mk_pte((unsigned long) empty_bad_page, PAGE_SHARED)); }