// swap_out_vma - try unmap pte & move pages into swap active list. static int swap_out_vma(struct mm_struct *mm, struct vma_struct *vma, uintptr_t addr, size_t require) { if (require == 0 || !(addr >= vma->vm_start && addr < vma->vm_end)) { return 0; } uintptr_t end; size_t free_count = 0; addr = ROUNDDOWN(addr, PGSIZE), end = ROUNDUP(vma->vm_end, PGSIZE); while (addr < end && require != 0) { pte_t *ptep = get_pte(mm->pgdir, addr, 0); if (ptep == NULL) { if (get_pud(mm->pgdir, addr, 0) == NULL) { addr = ROUNDDOWN(addr + PUSIZE, PUSIZE); } else if (get_pmd(mm->pgdir, addr, 0) == NULL) { addr = ROUNDDOWN(addr + PMSIZE, PMSIZE); } else { addr = ROUNDDOWN(addr + PTSIZE, PTSIZE); } continue ; } if (ptep_present(ptep)) { struct Page *page = pte2page(*ptep); assert(!PageReserved(page)); if (ptep_accessed(ptep)) { ptep_unset_accessed(ptep); mp_tlb_invalidate(mm->pgdir, addr); goto try_next_entry; } if (!PageSwap(page)) { if (!swap_page_add(page, 0)) { goto try_next_entry; } swap_active_list_add(page); } else if (ptep_dirty(ptep)) { SetPageDirty(page); } swap_entry_t entry = page->index; swap_duplicate(entry); page_ref_dec(page); ptep_copy(ptep, &entry); mp_tlb_invalidate(mm->pgdir, addr); mm->swap_address = addr + PGSIZE; free_count ++, require --; if ((vma->vm_flags & VM_SHARE) && page_ref(page) == 1) { uintptr_t shmem_addr = addr - vma->vm_start + vma->shmem_off; pte_t *sh_ptep = shmem_get_entry(vma->shmem, shmem_addr, 0); assert(sh_ptep != NULL && ! ptep_invalid(sh_ptep)); if (ptep_present(sh_ptep)) { shmem_insert_entry(vma->shmem, shmem_addr, entry); } } } try_next_entry: addr += PGSIZE; } return free_count; }
int shmem_insert_entry(struct shmem_struct *shmem, uintptr_t addr, pte_t entry) { pte_t *ptep = shmem_get_entry(shmem, addr, 1); if (ptep == NULL) { return -E_NO_MEM; } if (! ptep_invalid(ptep)) { shmem_remove_entry_pte(ptep); } if (ptep_present(&entry)) { page_ref_inc(pte2page(entry)); } else if (! ptep_invalid(&entry)) { swap_duplicate(entry); } ptep_copy(ptep, &entry); return 0; }
// check_swap - check the correctness of swap & page replacement algorithm static void check_swap(void) { size_t nr_used_pages_store = nr_used_pages(); size_t slab_allocated_store = slab_allocated(); size_t offset; for (offset = 2; offset < max_swap_offset; offset ++) { mem_map[offset] = 1; } struct mm_struct *mm = mm_create(); assert(mm != NULL); extern struct mm_struct *check_mm_struct; assert(check_mm_struct == NULL); check_mm_struct = mm; pgd_t *pgdir = mm->pgdir = init_pgdir_get(); assert(pgdir[PGX(TEST_PAGE)] == 0); struct vma_struct *vma = vma_create(TEST_PAGE, TEST_PAGE + PTSIZE, VM_WRITE | VM_READ); assert(vma != NULL); insert_vma_struct(mm, vma); struct Page *rp0 = alloc_page(), *rp1 = alloc_page(); assert(rp0 != NULL && rp1 != NULL); pte_perm_t perm; ptep_unmap (&perm); ptep_set_u_write(&perm); int ret = page_insert(pgdir, rp1, TEST_PAGE, perm); assert(ret == 0 && page_ref(rp1) == 1); page_ref_inc(rp1); ret = page_insert(pgdir, rp0, TEST_PAGE, perm); assert(ret == 0 && page_ref(rp1) == 1 && page_ref(rp0) == 1); // check try_alloc_swap_entry swap_entry_t entry = try_alloc_swap_entry(); assert(swap_offset(entry) == 1); mem_map[1] = 1; assert(try_alloc_swap_entry() == 0); // set rp1, Swap, Active, add to hash_list, active_list swap_page_add(rp1, entry); swap_active_list_add(rp1); assert(PageSwap(rp1)); mem_map[1] = 0; entry = try_alloc_swap_entry(); assert(swap_offset(entry) == 1); assert(!PageSwap(rp1)); // check swap_remove_entry assert(swap_hash_find(entry) == NULL); mem_map[1] = 2; swap_remove_entry(entry); assert(mem_map[1] == 1); swap_page_add(rp1, entry); swap_inactive_list_add(rp1); swap_remove_entry(entry); assert(PageSwap(rp1)); assert(rp1->index == entry && mem_map[1] == 0); // check page_launder, move page from inactive_list to active_list assert(page_ref(rp1) == 1); assert(nr_active_pages == 0 && nr_inactive_pages == 1); assert(list_next(&(inactive_list.swap_list)) == &(rp1->swap_link)); page_launder(); assert(nr_active_pages == 1 && nr_inactive_pages == 0); assert(PageSwap(rp1) && PageActive(rp1)); entry = try_alloc_swap_entry(); assert(swap_offset(entry) == 1); assert(!PageSwap(rp1) && nr_active_pages == 0); assert(list_empty(&(active_list.swap_list))); // set rp1 inactive again assert(page_ref(rp1) == 1); swap_page_add(rp1, 0); assert(PageSwap(rp1) && swap_offset(rp1->index) == 1); swap_inactive_list_add(rp1); mem_map[1] = 1; assert(nr_inactive_pages == 1); page_ref_dec(rp1); size_t count = nr_used_pages(); swap_remove_entry(entry); assert(nr_inactive_pages == 0 && nr_used_pages() == count - 1); // check swap_out_mm pte_t *ptep0 = get_pte(pgdir, TEST_PAGE, 0), *ptep1; assert(ptep0 != NULL && pte2page(*ptep0) == rp0); ret = swap_out_mm(mm, 0); assert(ret == 0); ret = swap_out_mm(mm, 10); assert(ret == 1 && mm->swap_address == TEST_PAGE + PGSIZE); ret = swap_out_mm(mm, 10); assert(ret == 0 && *ptep0 == entry && mem_map[1] == 1); assert(PageDirty(rp0) && PageActive(rp0) && page_ref(rp0) == 0); assert(nr_active_pages == 1 && list_next(&(active_list.swap_list)) == &(rp0->swap_link)); // check refill_inactive_scan() refill_inactive_scan(); assert(!PageActive(rp0) && page_ref(rp0) == 0); assert(nr_inactive_pages == 1 && list_next(&(inactive_list.swap_list)) == &(rp0->swap_link)); page_ref_inc(rp0); page_launder(); assert(PageActive(rp0) && page_ref(rp0) == 1); assert(nr_active_pages == 1 && list_next(&(active_list.swap_list)) == &(rp0->swap_link)); page_ref_dec(rp0); refill_inactive_scan(); assert(!PageActive(rp0)); // save data in rp0 int i; for (i = 0; i < PGSIZE; i ++) { ((char *)page2kva(rp0))[i] = (char)i; } page_launder(); assert(nr_inactive_pages == 0 && list_empty(&(inactive_list.swap_list))); assert(mem_map[1] == 1); rp1 = alloc_page(); assert(rp1 != NULL); ret = swapfs_read(entry, rp1); assert(ret == 0); for (i = 0; i < PGSIZE; i ++) { assert(((char *)page2kva(rp1))[i] == (char)i); } // page fault now *(char *)(TEST_PAGE) = 0xEF; rp0 = pte2page(*ptep0); assert(page_ref(rp0) == 1); assert(PageSwap(rp0) && PageActive(rp0)); entry = try_alloc_swap_entry(); assert(swap_offset(entry) == 1 && mem_map[1] == SWAP_UNUSED); assert(!PageSwap(rp0) && nr_active_pages == 0 && nr_inactive_pages == 0); // clear accessed flag assert(rp0 == pte2page(*ptep0)); assert(!PageSwap(rp0)); ret = swap_out_mm(mm, 10); assert(ret == 0); assert(!PageSwap(rp0) && ptep_present(ptep0)); // change page table ret = swap_out_mm(mm, 10); assert(ret == 1); assert(*ptep0 == entry && page_ref(rp0) == 0 && mem_map[1] == 1); count = nr_used_pages(); refill_inactive_scan(); page_launder(); assert(count - 1 == nr_used_pages()); ret = swapfs_read(entry, rp1); assert(ret == 0 && *(char *)(page2kva(rp1)) == (char)0xEF); free_page(rp1); // duplictate *ptep0 ptep1 = get_pte(pgdir, TEST_PAGE + PGSIZE, 0); assert(ptep1 != NULL && ptep_invalid(ptep1)); swap_duplicate(*ptep0); ptep_copy(ptep1, ptep0); mp_tlb_invalidate (pgdir, TEST_PAGE + PGSIZE); // page fault again // update for copy on write *(char *)(TEST_PAGE + 1) = 0x88; *(char *)(TEST_PAGE + PGSIZE) = 0x8F; *(char *)(TEST_PAGE + PGSIZE + 1) = 0xFF; assert(pte2page(*ptep0) != pte2page(*ptep1)); assert(*(char *)(TEST_PAGE) == (char)0xEF); assert(*(char *)(TEST_PAGE + 1) == (char)0x88); assert(*(char *)(TEST_PAGE + PGSIZE) == (char)0x8F); assert(*(char *)(TEST_PAGE + PGSIZE + 1) == (char)0xFF); rp0 = pte2page(*ptep0); rp1 = pte2page(*ptep1); assert(!PageSwap(rp0) && PageSwap(rp1) && PageActive(rp1)); entry = try_alloc_swap_entry(); assert(!PageSwap(rp0) && !PageSwap(rp1)); assert(swap_offset(entry) == 1 && mem_map[1] == SWAP_UNUSED); assert(list_empty(&(active_list.swap_list))); assert(list_empty(&(inactive_list.swap_list))); ptep_set_accessed(&perm); page_insert(pgdir, rp0, TEST_PAGE + PGSIZE, perm); // check swap_out_mm *(char *)(TEST_PAGE) = *(char *)(TEST_PAGE + PGSIZE) = 0xEE; mm->swap_address = TEST_PAGE + PGSIZE * 2; ret = swap_out_mm(mm, 2); assert(ret == 0); assert(ptep_present(ptep0) && ! ptep_accessed(ptep0)); assert(ptep_present(ptep1) && ! ptep_accessed(ptep1)); ret = swap_out_mm(mm, 2); assert(ret == 2); assert(mem_map[1] == 2 && page_ref(rp0) == 0); refill_inactive_scan(); page_launder(); assert(mem_map[1] == 2 && swap_hash_find(entry) == NULL); // check copy entry swap_remove_entry(entry); ptep_unmap(ptep1); assert(mem_map[1] == 1); swap_entry_t store; ret = swap_copy_entry(entry, &store); assert(ret == -E_NO_MEM); mem_map[2] = SWAP_UNUSED; ret = swap_copy_entry(entry, &store); assert(ret == 0 && swap_offset(store) == 2 && mem_map[2] == 0); mem_map[2] = 1; ptep_copy(ptep1, &store); assert(*(char *)(TEST_PAGE + PGSIZE) == (char)0xEE && *(char *)(TEST_PAGE + PGSIZE + 1)== (char)0x88); *(char *)(TEST_PAGE + PGSIZE) = 1, *(char *)(TEST_PAGE + PGSIZE + 1) = 2; assert(*(char *)TEST_PAGE == (char)0xEE && *(char *)(TEST_PAGE + 1) == (char)0x88); ret = swap_in_page(entry, &rp0); assert(ret == 0); ret = swap_in_page(store, &rp1); assert(ret == 0); assert(rp1 != rp0); // free memory swap_list_del(rp0), swap_list_del(rp1); swap_page_del(rp0), swap_page_del(rp1); assert(page_ref(rp0) == 1 && page_ref(rp1) == 1); assert(nr_active_pages == 0 && list_empty(&(active_list.swap_list))); assert(nr_inactive_pages == 0 && list_empty(&(inactive_list.swap_list))); for (i = 0; i < HASH_LIST_SIZE; i ++) { assert(list_empty(hash_list + i)); } page_remove(pgdir, TEST_PAGE); page_remove(pgdir, (TEST_PAGE + PGSIZE)); #if PMXSHIFT != PUXSHIFT free_page(pa2page(PMD_ADDR(*get_pmd(pgdir, TEST_PAGE, 0)))); #endif #if PUXSHIFT != PGXSHIFT free_page(pa2page(PUD_ADDR(*get_pud(pgdir, TEST_PAGE, 0)))); #endif free_page(pa2page(PGD_ADDR(*get_pgd(pgdir, TEST_PAGE, 0)))); pgdir[PGX(TEST_PAGE)] = 0; mm->pgdir = NULL; mm_destroy(mm); check_mm_struct = NULL; assert(nr_active_pages == 0 && nr_inactive_pages == 0); for (offset = 0; offset < max_swap_offset; offset ++) { mem_map[offset] = SWAP_UNUSED; } assert(nr_used_pages_store == nr_used_pages()); assert(slab_allocated_store == slab_allocated()); kprintf("check_swap() succeeded.\n"); }
/* ucore use copy-on-write when forking a new process, * thus copy_range only copy pdt/pte and set their permission to * READONLY, a write will be handled in pgfault */ int copy_range(pgd_t *to, pgd_t *from, uintptr_t start, uintptr_t end, bool share) { assert(start % PGSIZE == 0 && end % PGSIZE == 0); assert(USER_ACCESS(start, end)); do { pte_t *ptep = get_pte(from, start, 0), *nptep; if (ptep == NULL) { if (get_pud(from, start, 0) == NULL) { start = ROUNDDOWN(start + PUSIZE, PUSIZE); } else if (get_pmd(from, start, 0) == NULL) { start = ROUNDDOWN(start + PMSIZE, PMSIZE); } else { start = ROUNDDOWN(start + PTSIZE, PTSIZE); } continue ; } if (*ptep != 0) { if ((nptep = get_pte(to, start, 1)) == NULL) { return -E_NO_MEM; } int ret; //kprintf("%08x %08x %08x\n", nptep, *nptep, start); assert(*ptep != 0 && *nptep == 0); #ifdef ARCH_ARM //TODO add code to handle swap if (ptep_present(ptep)){ //no body should be able to write this page //before a W-pgfault pte_perm_t perm = PTE_P; if(ptep_u_read(ptep)) perm |= PTE_U; if(!share){ //Original page should be set to readonly! //because Copy-on-write may happen //after the current proccess modifies its page ptep_set_perm(ptep, perm); }else{ if(ptep_u_write(ptep)){ perm |= PTE_W; } } struct Page *page = pte2page(*ptep); ret = page_insert(to, page, start, perm); } #else /* ARCH_ARM */ if (ptep_present(ptep)) { pte_perm_t perm = ptep_get_perm(ptep, PTE_USER); struct Page *page = pte2page(*ptep); if (!share && ptep_s_write(ptep)) { ptep_unset_s_write(&perm); pte_perm_t perm_with_swap_stat = ptep_get_perm(ptep, PTE_SWAP); ptep_set_perm(&perm_with_swap_stat, perm); page_insert(from, page, start, perm_with_swap_stat); } ret = page_insert(to, page, start, perm); assert(ret == 0); } #endif /* ARCH_ARM */ else { #ifdef CONFIG_NO_SWAP assert(0); #endif swap_entry_t entry; ptep_copy(&entry, ptep); swap_duplicate(entry); ptep_copy(nptep, &entry); } } start += PGSIZE; } while (start != 0 && start < end); #ifdef ARCH_ARM /* we have modified the PTE of the original * process, so invalidate TLB */ tlb_invalidate_all(); #endif return 0; }
int do_pgfault(struct mm_struct *mm, machine_word_t error_code, uintptr_t addr) { if (mm == NULL) { assert(current != NULL); /* Chen Yuheng * give handler a chance to deal with it */ kprintf ("page fault in kernel thread: pid = %d, name = %s, %d %08x.\n", current->pid, current->name, error_code, addr); return -E_KILLED; } bool need_unlock = 1; if (!try_lock_mm(mm)) { if (current != NULL && mm->locked_by == current->pid) { need_unlock = 0; } else { lock_mm(mm); } } int ret = -E_INVAL; struct vma_struct *vma = find_vma(mm, addr); if (vma == NULL || vma->vm_start > addr) { goto failed; } if (vma->vm_flags & VM_STACK) { if (addr < vma->vm_start + PGSIZE) { goto failed; } } //kprintf("@ %x %08x\n", vma->vm_flags, vma->vm_start); //assert((vma->vm_flags & VM_IO)==0); if (vma->vm_flags & VM_IO) { ret = -E_INVAL; goto failed; } switch (error_code & 3) { default: /* default is 3: write, present */ case 2: /* write, not present */ if (!(vma->vm_flags & VM_WRITE)) { goto failed; } break; case 1: /* read, present */ goto failed; case 0: /* read, not present */ if (!(vma->vm_flags & (VM_READ | VM_EXEC))) { goto failed; } } pte_perm_t perm, nperm; #ifdef ARCH_ARM /* ARM9 software emulated PTE_xxx */ perm = PTE_P | PTE_U; if (vma->vm_flags & VM_WRITE) { perm |= PTE_W; } #else ptep_unmap(&perm); ptep_set_u_read(&perm); if (vma->vm_flags & VM_WRITE) { ptep_set_u_write(&perm); } #endif addr = ROUNDDOWN(addr, PGSIZE); ret = -E_NO_MEM; pte_t *ptep; if ((ptep = get_pte(mm->pgdir, addr, 1)) == NULL) { goto failed; } if (ptep_invalid(ptep)) { #ifdef UCONFIG_BIONIC_LIBC if (vma->mfile.file != NULL) { struct file *file = vma->mfile.file; off_t old_pos = file->pos, new_pos = vma->mfile.offset + addr - vma->vm_start; #ifdef SHARE_MAPPED_FILE struct mapped_addr *maddr = find_maddr(file, new_pos, NULL); if (maddr == NULL) { #endif // SHARE_MAPPED_FILE struct Page *page; if ((page = alloc_page()) == NULL) { assert(false); goto failed; } nperm = perm; #ifdef ARCH_ARM /* ARM9 software emulated PTE_xxx */ nperm &= ~PTE_W; #else ptep_unset_s_write(&nperm); #endif page_insert_pte(mm->pgdir, page, ptep, addr, nperm); if ((ret = filestruct_setpos(file, new_pos)) != 0) { assert(false); goto failed; } filestruct_read(file, page2kva(page), PGSIZE); if ((ret = filestruct_setpos(file, old_pos)) != 0) { assert(false); goto failed; } #ifdef SHARE_MAPPED_FILE if ((maddr = (struct mapped_addr *) kmalloc(sizeof(struct mapped_addr))) != NULL) { maddr->page = page; maddr->offset = new_pos; page->maddr = maddr; list_add(& (file->node->mapped_addr_list), &(maddr->list)); } else { assert(false); } } else { nperm = perm; #ifdef ARCH_ARM /* ARM9 software emulated PTE_xxx */ nperm &= ~PTE_W; #else ptep_unset_s_write(&nperm); #endif page_insert_pte(mm->pgdir, maddr->page, ptep, addr, nperm); } #endif //SHARE_MAPPED_FILE } else #endif //UCONFIG_BIONIC_LIBC if (!(vma->vm_flags & VM_SHARE)) { if (pgdir_alloc_page(mm->pgdir, addr, perm) == NULL) { goto failed; } #ifdef UCONFIG_BIONIC_LIBC if (vma->vm_flags & VM_ANONYMOUS) { memset((void *)addr, 0, PGSIZE); } #endif //UCONFIG_BIONIC_LIBC } else { //shared mem lock_shmem(vma->shmem); uintptr_t shmem_addr = addr - vma->vm_start + vma->shmem_off; pte_t *sh_ptep = shmem_get_entry(vma->shmem, shmem_addr, 1); if (sh_ptep == NULL || ptep_invalid(sh_ptep)) { unlock_shmem(vma->shmem); goto failed; } unlock_shmem(vma->shmem); if (ptep_present(sh_ptep)) { page_insert(mm->pgdir, pa2page(*sh_ptep), addr, perm); } else { #ifdef UCONFIG_SWAP swap_duplicate(*ptep); ptep_copy(ptep, sh_ptep); #else panic("NO SWAP\n"); #endif } } } else { //a present page, handle copy-on-write (cow) struct Page *page, *newpage = NULL; bool cow = ((vma->vm_flags & (VM_SHARE | VM_WRITE)) == VM_WRITE), may_copy = 1; #if 1 if (!(!ptep_present(ptep) || ((error_code & 2) && !ptep_u_write(ptep) && cow))) { //assert(PADDR(mm->pgdir) == rcr3()); kprintf("%p %p %d %d %x\n", *ptep, addr, error_code, cow, vma->vm_flags); assert(0); } #endif if (cow) { newpage = alloc_page(); } if (ptep_present(ptep)) { page = pte2page(*ptep); } else { #ifdef UCONFIG_SWAP if ((ret = swap_in_page(*ptep, &page)) != 0) { if (newpage != NULL) { free_page(newpage); } goto failed; } #else assert(0); #endif if (!(error_code & 2) && cow) { #ifdef ARCH_ARM //#warning ARM9 software emulated PTE_xxx perm &= ~PTE_W; #else ptep_unset_s_write(&perm); #endif may_copy = 0; } } if (cow && may_copy) { #ifdef UCONFIG_SWAP if (page_ref(page) + swap_page_count(page) > 1) { #else if (page_ref(page) > 1) { #endif if (newpage == NULL) { goto failed; } memcpy(page2kva(newpage), page2kva(page), PGSIZE); //kprintf("COW!\n"); page = newpage, newpage = NULL; } } #ifdef UCONFIG_BIONIC_LIBC else if (vma->mfile.file != NULL) { #ifdef UCONFIG_SWAP assert(page_reg(page) + swap_page_count(page) == 1); #else assert(page_ref(page) == 1); #endif #ifdef SHARE_MAPPED_FILE off_t offset = vma->mfile.offset + addr - vma->vm_start; struct mapped_addr *maddr = find_maddr(vma->mfile.file, offset, page); if (maddr != NULL) { list_del(&(maddr->list)); kfree(maddr); page->maddr = NULL; assert(find_maddr(vma->mfile.file, offset, page) == NULL); } else { } #endif //SHARE_MAPPED_FILE } #endif //UCONFIG_BIONIC_LIBC else { } page_insert(mm->pgdir, page, addr, perm); if (newpage != NULL) { free_page(newpage); } } ret = 0; failed: if (need_unlock) { unlock_mm(mm); } return ret; }