Example #1
0
// swap_copy_entry - copy a content of swap out page frame to a new page
//                 - set this new page PG_swap flag and add to swap active list
int
swap_copy_entry(swap_entry_t entry, swap_entry_t *store) {
    if (store == NULL) {
        return -E_INVAL;
    }

    int ret = -E_NO_MEM;
    struct Page *page, *newpage;
    swap_duplicate(entry);
    if ((newpage = alloc_page()) == NULL) {
        goto failed;
    }
    if ((ret = swap_in_page(entry, &page)) != 0) {
        goto failed_free_page;
    }
    ret = -E_NO_MEM;
    if (!swap_page_add(newpage, 0)) {
        goto failed_free_page;
    }
    swap_active_list_add(newpage);
    memcpy(page2kva(newpage), page2kva(page), PGSIZE);
    *store = newpage->index;
    ret = 0;
out:
    swap_remove_entry(entry);
    return ret;

failed_free_page:
    free_page(newpage);
failed:
    goto out;
}
Example #2
0
static inline void copy_one_pte(pte_t * old_pte, pte_t * new_pte, int cow)
{
	pte_t pte = *old_pte;
	unsigned long page_nr;

	if (pte_none(pte))
		return;
	if (!pte_present(pte)) {
		swap_duplicate(pte_val(pte));
		set_pte(new_pte, pte);
		return;
	}
	page_nr = MAP_NR(pte_page(pte));
	if (page_nr >= MAP_NR(high_memory) || PageReserved(mem_map+page_nr)) {
		set_pte(new_pte, pte);
		return;
	}
	if (cow)
		pte = pte_wrprotect(pte);
	if (delete_from_swap_cache(page_nr))
		pte = pte_mkdirty(pte);
	set_pte(new_pte, pte_mkold(pte));
	set_pte(old_pte, pte);
	mem_map[page_nr].count++;
}
Example #3
0
// swap_out_vma - try unmap pte & move pages into swap active list.
static int
swap_out_vma(struct mm_struct *mm, struct vma_struct *vma, uintptr_t addr, size_t require) {
    if (require == 0 || !(addr >= vma->vm_start && addr < vma->vm_end)) {
        return 0;
    }
    uintptr_t end;
    size_t free_count = 0;
    addr = ROUNDDOWN(addr, PGSIZE), end = ROUNDUP(vma->vm_end, PGSIZE);
    while (addr < end && require != 0) {
        pte_t *ptep = get_pte(mm->pgdir, addr, 0);
        if (ptep == NULL) {
            if (get_pud(mm->pgdir, addr, 0) == NULL) {
                addr = ROUNDDOWN(addr + PUSIZE, PUSIZE);
            }
            else if (get_pmd(mm->pgdir, addr, 0) == NULL) {
                addr = ROUNDDOWN(addr + PMSIZE, PMSIZE);
            }
            else {
                addr = ROUNDDOWN(addr + PTSIZE, PTSIZE);
            }
            continue ;
        }
        if (ptep_present(ptep)) {
            struct Page *page = pte2page(*ptep);
            assert(!PageReserved(page));
            if (ptep_accessed(ptep)) {
                ptep_unset_accessed(ptep);
                mp_tlb_invalidate(mm->pgdir, addr);
                goto try_next_entry;
            }
            if (!PageSwap(page)) {
                if (!swap_page_add(page, 0)) {
                    goto try_next_entry;
                }
                swap_active_list_add(page);
            }
            else if (ptep_dirty(ptep)) {
                SetPageDirty(page);
            }
            swap_entry_t entry = page->index;
            swap_duplicate(entry);
            page_ref_dec(page);
			ptep_copy(ptep, &entry);
            mp_tlb_invalidate(mm->pgdir, addr);
            mm->swap_address = addr + PGSIZE;
            free_count ++, require --;
            if ((vma->vm_flags & VM_SHARE) && page_ref(page) == 1) {
                uintptr_t shmem_addr = addr - vma->vm_start + vma->shmem_off;
                pte_t *sh_ptep = shmem_get_entry(vma->shmem, shmem_addr, 0);
                assert(sh_ptep != NULL && ! ptep_invalid(sh_ptep));
                if (ptep_present(sh_ptep)) {
                    shmem_insert_entry(vma->shmem, shmem_addr, entry);
                }
            }
        }
    try_next_entry:
        addr += PGSIZE;
    }
    return free_count;
}
Example #4
0
/* 
 * Locate a page of swap in physical memory, reserving swap cache space
 * and reading the disk if it is not already cached.
 * A failure return means that either the page allocation failed or that
 * the swap entry is no longer in use.
 */
struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
			struct vm_area_struct *vma, unsigned long addr)
{
	struct page *found_page, *new_page = NULL;
	int err;

	do {
		/*
		 * First check the swap cache.  Since this is normally
		 * called after lookup_swap_cache() failed, re-calling
		 * that would confuse statistics.
		 */
		found_page = find_get_page(&swapper_space, entry.val);
		if (found_page)
			break;

		/*
		 * Get a new page to read into from swap.
		 */
		if (!new_page) {
			new_page = alloc_page_vma(gfp_mask, vma, addr);
			if (!new_page)
				break;		/* Out of memory */
		}

		/*
		 * Swap entry may have been freed since our caller observed it.
		 */
		if (!swap_duplicate(entry))
			break;

		/*
		 * Associate the page with swap entry in the swap cache.
		 * May fail (-EEXIST) if there is already a page associated
		 * with this entry in the swap cache: added by a racing
		 * read_swap_cache_async, or add_to_swap or shmem_writepage
		 * re-using the just freed swap entry for an existing page.
		 * May fail (-ENOMEM) if radix-tree node allocation failed.
		 */
		__set_page_locked(new_page);
		SetPageSwapBacked(new_page);
		err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
		if (likely(!err)) {
			/*
			 * Initiate read into locked page and return.
			 */
			lru_cache_add_anon(new_page);
			swap_readpage(NULL, new_page);
			return new_page;
		}
		ClearPageSwapBacked(new_page);
		__clear_page_locked(new_page);
		swap_free(entry);
	} while (err != -ENOMEM);

	if (new_page)
		page_cache_release(new_page);
	return found_page;
}
Example #5
0
struct page * read_swap_cache_async(unsigned long entry, int wait)
{
	struct page *found_page = 0, *new_page;
	unsigned long new_page_addr;
	
#ifdef DEBUG_SWAP
	printk("DebugVM: read_swap_cache_async entry %08lx%s\n",
	       entry, wait ? ", wait" : "");
#endif
	/*
	 * Make sure the swap entry is still in use.
	 */
	if (!swap_duplicate(entry))	/* Account for the swap cache */
		goto out;
	/*
	 * Look for the page in the swap cache.
	 */
	found_page = lookup_swap_cache(entry);
	if (found_page)
		goto out_free_swap;

	new_page_addr = __get_free_page(GFP_USER);
	if (!new_page_addr)
		goto out_free_swap;	/* Out of memory */
	new_page = mem_map + MAP_NR(new_page_addr);

	/*
	 * Check the swap cache again, in case we stalled above.
	 */
	found_page = lookup_swap_cache(entry);
	if (found_page)
		goto out_free_page;
	/* 
	 * Add it to the swap cache and read its contents.
	 */
	if (!add_to_swap_cache(new_page, entry))
		goto out_free_page;

	set_bit(PG_locked, &new_page->flags);
	rw_swap_page(READ, entry, (char *) new_page_addr, wait);
#ifdef DEBUG_SWAP
	printk("DebugVM: read_swap_cache_async created "
	       "entry %08lx at %p\n",
	       entry, (char *) page_address(new_page));
#endif
	return new_page;

out_free_page:
	__free_page(new_page);
out_free_swap:
	swap_free(entry);
out:
	return found_page;
}
Example #6
0
/*
 * Strange swizzling function only for use by shmem_writepage
 */
int move_to_swap_cache(struct page *page, swp_entry_t entry)
{
	int err = __add_to_swap_cache(page, entry, GFP_ATOMIC);
	if (!err) {
		remove_from_page_cache(page);
		page_cache_release(page);	/* pagecache ref */
		if (!swap_duplicate(entry))
			BUG();
		SetPageDirty(page);
		INC_CACHE_INFO(add_total);
	} else if (err == -EEXIST)
		INC_CACHE_INFO(exist_race);
	return err;
}
Example #7
0
int
shmem_insert_entry(struct shmem_struct *shmem, uintptr_t addr, pte_t entry) {
    pte_t *ptep = shmem_get_entry(shmem, addr, 1);
    if (ptep == NULL) {
        return -E_NO_MEM;
    }
    if (*ptep != 0) {
        shmem_remove_entry_pte(ptep);
    }
    if (entry & PTE_P) {
        page_ref_inc(pte2page(entry));
    }
    else if (entry != 0) {
        swap_duplicate(entry);
    }
    *ptep = entry;
    return 0;
}
Example #8
0
int
shmem_insert_entry(struct shmem_struct *shmem, uintptr_t addr, pte_t entry) {
    pte_t *ptep = shmem_get_entry(shmem, addr, 1);
    if (ptep == NULL) {
        return -E_NO_MEM;
    }
    if (! ptep_invalid(ptep)) {
        shmem_remove_entry_pte(ptep);
    }
    if (ptep_present(&entry)) {
        page_ref_inc(pte2page(entry));
    }
    else if (! ptep_invalid(&entry)) {
        swap_duplicate(entry);
    }
	ptep_copy(ptep, &entry);
    return 0;
}
Example #9
0
struct page * read_swap_cache_async(swp_entry_t entry, int wait)
{
	struct page *found_page = 0, *new_page;
	unsigned long new_page_addr;
	
	/*
	 * Make sure the swap entry is still in use.
	 */
	if (!swap_duplicate(entry))	/* Account for the swap cache */
		goto out;
	/*
	 * Look for the page in the swap cache.
	 */
	found_page = lookup_swap_cache(entry);
	if (found_page)
		goto out_free_swap;

	new_page_addr = __get_free_page(GFP_USER);
	if (!new_page_addr)
		goto out_free_swap;	/* Out of memory */
	new_page = virt_to_page(new_page_addr);

	/*
	 * Check the swap cache again, in case we stalled above.
	 */
	found_page = lookup_swap_cache(entry);
	if (found_page)
		goto out_free_page;
	/* 
	 * Add it to the swap cache and read its contents.
	 */
	lock_page(new_page);
	add_to_swap_cache(new_page, entry);
	rw_swap_page(READ, new_page, wait);
	return new_page;

out_free_page:
	page_cache_release(new_page);
out_free_swap:
	swap_free(entry);
out:
	return found_page;
}
Example #10
0
int add_to_swap_cache(struct page *page, swp_entry_t entry)
{
	if (page->mapping)
		BUG();
	if (!swap_duplicate(entry)) {
		INC_CACHE_INFO(noent_race);
		return -ENOENT;
	}
	if (add_to_page_cache_unique(page, &swapper_space, entry.val,
			page_hash(&swapper_space, entry.val)) != 0) {
		swap_free(entry);
		INC_CACHE_INFO(exist_race);
		return -EEXIST;
	}
	if (!PageLocked(page))
		BUG();
	if (!PageSwapCache(page))
		BUG();
	INC_CACHE_INFO(add_total);
	return 0;
}
Example #11
0
static int add_to_swap_cache(struct page *page, swp_entry_t entry)
{
	int error;

	if (!swap_duplicate(entry)) {
		INC_CACHE_INFO(noent_race);
		return -ENOENT;
	}
	error = __add_to_swap_cache(page, entry, GFP_KERNEL);
	/*
	 * Anon pages are already on the LRU, we don't run lru_cache_add here.
	 */
	if (error) {
		swap_free(entry);
		if (error == -EEXIST)
			INC_CACHE_INFO(exist_race);
		return error;
	}
	INC_CACHE_INFO(add_total);
	return 0;
}
Example #12
0
// page_launder - try to move page to swap_active_list OR swap_inactive_list, 
//              - and call swap_fs_write to swap out pages in swap_inactive_list
int
page_launder(void) {
    size_t maxscan = nr_inactive_pages, free_count = 0;
    list_entry_t *list = &(inactive_list.swap_list), *le = list_next(list);
    while (maxscan -- > 0 && le != list) {
        struct Page *page = le2page(le, swap_link);
        le = list_next(le);
        if (!(PageSwap(page) && !PageActive(page))) {
            panic("inactive: wrong swap list.\n");
        }
        swap_list_del(page);
        if (page_ref(page) != 0) {
            swap_active_list_add(page);
            continue ;
        }
        swap_entry_t entry = page->index;
        if (!try_free_swap_entry(entry)) {
            if (PageDirty(page)) {
                ClearPageDirty(page);
                swap_duplicate(entry);
                if (swapfs_write(entry, page) != 0) {
                    SetPageDirty(page);
                }
                mem_map[swap_offset(entry)] --;
                if (page_ref(page) != 0) {
                    swap_active_list_add(page);
                    continue ;
                }
                if (PageDirty(page)) {
                    swap_inactive_list_add(page);
                    continue ;
                }
                try_free_swap_entry(entry);
            }
        }
        free_count ++;
        swap_free_page(page);
    }
    return free_count;
}
Example #13
0
int do_pgfault(struct mm_struct *mm, machine_word_t error_code, uintptr_t addr)
{
	if (mm == NULL) {
		assert(current != NULL);
		/* Chen Yuheng 
		 * give handler a chance to deal with it 
		 */
		kprintf
		    ("page fault in kernel thread: pid = %d, name = %s, %d %08x.\n",
		     current->pid, current->name, error_code, addr);
		return -E_KILLED;
	}

	bool need_unlock = 1;
	if (!try_lock_mm(mm)) {
		if (current != NULL && mm->locked_by == current->pid) {
			need_unlock = 0;
		} else {
			lock_mm(mm);
		}
	}

	int ret = -E_INVAL;
	struct vma_struct *vma = find_vma(mm, addr);
	if (vma == NULL || vma->vm_start > addr) {
		goto failed;
	}
	if (vma->vm_flags & VM_STACK) {
		if (addr < vma->vm_start + PGSIZE) {
			goto failed;
		}
	}
	//kprintf("@ %x %08x\n", vma->vm_flags, vma->vm_start);
	//assert((vma->vm_flags & VM_IO)==0);
	if (vma->vm_flags & VM_IO) {
		ret = -E_INVAL;
		goto failed;
	}
	switch (error_code & 3) {
	default:
		/* default is 3: write, present */
	case 2:		/* write, not present */
		if (!(vma->vm_flags & VM_WRITE)) {
			goto failed;
		}
		break;
	case 1:		/* read, present */
		goto failed;
	case 0:		/* read, not present */
		if (!(vma->vm_flags & (VM_READ | VM_EXEC))) {
			goto failed;
		}
	}

	pte_perm_t perm, nperm;
#ifdef ARCH_ARM
	/* ARM9 software emulated PTE_xxx */
	perm = PTE_P | PTE_U;
	if (vma->vm_flags & VM_WRITE) {
		perm |= PTE_W;
	}
#else
	ptep_unmap(&perm);
	ptep_set_u_read(&perm);
	if (vma->vm_flags & VM_WRITE) {
		ptep_set_u_write(&perm);
	}
#endif
	addr = ROUNDDOWN(addr, PGSIZE);

	ret = -E_NO_MEM;

	pte_t *ptep;
	if ((ptep = get_pte(mm->pgdir, addr, 1)) == NULL) {
		goto failed;
	}
	if (ptep_invalid(ptep)) {
#ifdef UCONFIG_BIONIC_LIBC
		if (vma->mfile.file != NULL) {
			struct file *file = vma->mfile.file;
			off_t old_pos = file->pos, new_pos =
			    vma->mfile.offset + addr - vma->vm_start;
#ifdef SHARE_MAPPED_FILE
			struct mapped_addr *maddr =
			    find_maddr(file, new_pos, NULL);
			if (maddr == NULL) {
#endif // SHARE_MAPPED_FILE
				struct Page *page;
				if ((page = alloc_page()) == NULL) {
					assert(false);
					goto failed;
				}
				nperm = perm;
#ifdef ARCH_ARM
				/* ARM9 software emulated PTE_xxx */
				nperm &= ~PTE_W;
#else
				ptep_unset_s_write(&nperm);
#endif
				page_insert_pte(mm->pgdir, page, ptep, addr,
						nperm);

				if ((ret =
				     filestruct_setpos(file, new_pos)) != 0) {
					assert(false);
					goto failed;
				}
				filestruct_read(file, page2kva(page), PGSIZE);
				if ((ret =
				     filestruct_setpos(file, old_pos)) != 0) {
					assert(false);
					goto failed;
				}
#ifdef SHARE_MAPPED_FILE
				if ((maddr = (struct mapped_addr *)
				     kmalloc(sizeof(struct mapped_addr))) !=
				    NULL) {
					maddr->page = page;
					maddr->offset = new_pos;
					page->maddr = maddr;
					list_add(&
						 (file->node->mapped_addr_list),
						 &(maddr->list));
				} else {
					assert(false);
				}
			} else {
				nperm = perm;
#ifdef ARCH_ARM
				/* ARM9 software emulated PTE_xxx */
				nperm &= ~PTE_W;
#else
				ptep_unset_s_write(&nperm);
#endif
				page_insert_pte(mm->pgdir, maddr->page, ptep,
						addr, nperm);
			}
#endif //SHARE_MAPPED_FILE

		} else
#endif //UCONFIG_BIONIC_LIBC
		if (!(vma->vm_flags & VM_SHARE)) {
			if (pgdir_alloc_page(mm->pgdir, addr, perm) == NULL) {
				goto failed;
			}
#ifdef UCONFIG_BIONIC_LIBC
			if (vma->vm_flags & VM_ANONYMOUS) {
				memset((void *)addr, 0, PGSIZE);
			}
#endif //UCONFIG_BIONIC_LIBC
		} else {	//shared mem
			lock_shmem(vma->shmem);
			uintptr_t shmem_addr =
			    addr - vma->vm_start + vma->shmem_off;
			pte_t *sh_ptep =
			    shmem_get_entry(vma->shmem, shmem_addr, 1);
			if (sh_ptep == NULL || ptep_invalid(sh_ptep)) {
				unlock_shmem(vma->shmem);
				goto failed;
			}
			unlock_shmem(vma->shmem);
			if (ptep_present(sh_ptep)) {
				page_insert(mm->pgdir, pa2page(*sh_ptep), addr,
					    perm);
			} else {
#ifdef UCONFIG_SWAP
				swap_duplicate(*ptep);
				ptep_copy(ptep, sh_ptep);
#else
				panic("NO SWAP\n");
#endif
			}
		}
	} else {		//a present page, handle copy-on-write (cow) 
		struct Page *page, *newpage = NULL;
		bool cow =
		    ((vma->vm_flags & (VM_SHARE | VM_WRITE)) == VM_WRITE),
		    may_copy = 1;

#if 1
		if (!(!ptep_present(ptep)
		      || ((error_code & 2) && !ptep_u_write(ptep) && cow))) {
			//assert(PADDR(mm->pgdir) == rcr3());
			kprintf("%p %p %d %d %x\n", *ptep, addr, error_code,
				cow, vma->vm_flags);
			assert(0);
		}
#endif

		if (cow) {
			newpage = alloc_page();
		}
		if (ptep_present(ptep)) {
			page = pte2page(*ptep);
		} else {
#ifdef UCONFIG_SWAP
			if ((ret = swap_in_page(*ptep, &page)) != 0) {
				if (newpage != NULL) {
					free_page(newpage);
				}
				goto failed;
			}
#else
			assert(0);
#endif
			if (!(error_code & 2) && cow) {
#ifdef ARCH_ARM
//#warning ARM9 software emulated PTE_xxx
				perm &= ~PTE_W;
#else
				ptep_unset_s_write(&perm);
#endif
				may_copy = 0;
			}
		}

		if (cow && may_copy) {
#ifdef UCONFIG_SWAP
			if (page_ref(page) + swap_page_count(page) > 1) {
#else
			if (page_ref(page) > 1) {
#endif
				if (newpage == NULL) {
					goto failed;
				}
				memcpy(page2kva(newpage), page2kva(page),
				       PGSIZE);
				//kprintf("COW!\n");
				page = newpage, newpage = NULL;
			}
		}
#ifdef UCONFIG_BIONIC_LIBC
		else if (vma->mfile.file != NULL) {
#ifdef UCONFIG_SWAP
			assert(page_reg(page) + swap_page_count(page) == 1);
#else
			assert(page_ref(page) == 1);
#endif

#ifdef SHARE_MAPPED_FILE
			off_t offset = vma->mfile.offset + addr - vma->vm_start;
			struct mapped_addr *maddr =
			    find_maddr(vma->mfile.file, offset, page);
			if (maddr != NULL) {
				list_del(&(maddr->list));
				kfree(maddr);
				page->maddr = NULL;
				assert(find_maddr(vma->mfile.file, offset, page)
				       == NULL);
			} else {
			}
#endif //SHARE_MAPPED_FILE
		}
#endif //UCONFIG_BIONIC_LIBC
		else {
		}
		page_insert(mm->pgdir, page, addr, perm);
		if (newpage != NULL) {
			free_page(newpage);
		}
	}
	ret = 0;

failed:
	if (need_unlock) {
		unlock_mm(mm);
	}
	return ret;
}
Example #14
0
/*
 * We completely avoid races by reading each swap page in advance,
 * and then search for the process using it.  All the necessary
 * page table adjustments can then be made atomically.
 */
static int try_to_unuse(unsigned int type)
{
	struct swap_info_struct * si = &swap_info[type];
	struct mm_struct *start_mm;
	unsigned short *swap_map;
	unsigned short swcount;
	struct page *page;
	swp_entry_t entry;
	unsigned int i = 0;
	int retval = 0;
	int reset_overflow = 0;
	int shmem;

	/*
	 * When searching mms for an entry, a good strategy is to
	 * start at the first mm we freed the previous entry from
	 * (though actually we don't notice whether we or coincidence
	 * freed the entry).  Initialize this start_mm with a hold.
	 *
	 * A simpler strategy would be to start at the last mm we
	 * freed the previous entry from; but that would take less
	 * advantage of mmlist ordering, which clusters forked mms
	 * together, child after parent.  If we race with dup_mmap(), we
	 * prefer to resolve parent before child, lest we miss entries
	 * duplicated after we scanned child: using last mm would invert
	 * that.  Though it's only a serious concern when an overflowed
	 * swap count is reset from SWAP_MAP_MAX, preventing a rescan.
	 */
	start_mm = &init_mm;
	atomic_inc(&init_mm.mm_users);

	/*
	 * Keep on scanning until all entries have gone.  Usually,
	 * one pass through swap_map is enough, but not necessarily:
	 * there are races when an instance of an entry might be missed.
	 */
	while ((i = find_next_to_unuse(si, i)) != 0) {
		if (signal_pending(current)) {
			retval = -EINTR;
			break;
		}

		/* 
		 * Get a page for the entry, using the existing swap
		 * cache page if there is one.  Otherwise, get a clean
		 * page and read the swap into it. 
		 */
		swap_map = &si->swap_map[i];
		entry = swp_entry(type, i);
		page = read_swap_cache_async(entry, NULL, 0);
		if (!page) {
			/*
			 * Either swap_duplicate() failed because entry
			 * has been freed independently, and will not be
			 * reused since sys_swapoff() already disabled
			 * allocation from here, or alloc_page() failed.
			 */
			if (!*swap_map)
				continue;
			retval = -ENOMEM;
			break;
		}

		/*
		 * Don't hold on to start_mm if it looks like exiting.
		 */
		if (atomic_read(&start_mm->mm_users) == 1) {
			mmput(start_mm);
			start_mm = &init_mm;
			atomic_inc(&init_mm.mm_users);
		}

		/*
		 * Wait for and lock page.  When do_swap_page races with
		 * try_to_unuse, do_swap_page can handle the fault much
		 * faster than try_to_unuse can locate the entry.  This
		 * apparently redundant "wait_on_page_locked" lets try_to_unuse
		 * defer to do_swap_page in such a case - in some tests,
		 * do_swap_page and try_to_unuse repeatedly compete.
		 */
		wait_on_page_locked(page);
		wait_on_page_writeback(page);
		lock_page(page);
		wait_on_page_writeback(page);

		/*
		 * Remove all references to entry.
		 * Whenever we reach init_mm, there's no address space
		 * to search, but use it as a reminder to search shmem.
		 */
		shmem = 0;
		swcount = *swap_map;
		if (swcount > 1) {
			if (start_mm == &init_mm)
				shmem = shmem_unuse(entry, page);
			else
				retval = unuse_mm(start_mm, entry, page);
		}
		if (*swap_map > 1) {
			int set_start_mm = (*swap_map >= swcount);
			struct list_head *p = &start_mm->mmlist;
			struct mm_struct *new_start_mm = start_mm;
			struct mm_struct *prev_mm = start_mm;
			struct mm_struct *mm;

			atomic_inc(&new_start_mm->mm_users);
			atomic_inc(&prev_mm->mm_users);
			spin_lock(&mmlist_lock);
			while (*swap_map > 1 && !retval &&
					(p = p->next) != &start_mm->mmlist) {
				mm = list_entry(p, struct mm_struct, mmlist);
				if (!atomic_inc_not_zero(&mm->mm_users))
					continue;
				spin_unlock(&mmlist_lock);
				mmput(prev_mm);
				prev_mm = mm;

				cond_resched();

				swcount = *swap_map;
				if (swcount <= 1)
					;
				else if (mm == &init_mm) {
					set_start_mm = 1;
					shmem = shmem_unuse(entry, page);
				} else
					retval = unuse_mm(mm, entry, page);
				if (set_start_mm && *swap_map < swcount) {
					mmput(new_start_mm);
					atomic_inc(&mm->mm_users);
					new_start_mm = mm;
					set_start_mm = 0;
				}
				spin_lock(&mmlist_lock);
			}
			spin_unlock(&mmlist_lock);
			mmput(prev_mm);
			mmput(start_mm);
			start_mm = new_start_mm;
		}
		if (retval) {
			unlock_page(page);
			page_cache_release(page);
			break;
		}

		/*
		 * How could swap count reach 0x7fff when the maximum
		 * pid is 0x7fff, and there's no way to repeat a swap
		 * page within an mm (except in shmem, where it's the
		 * shared object which takes the reference count)?
		 * We believe SWAP_MAP_MAX cannot occur in Linux 2.4.
		 *
		 * If that's wrong, then we should worry more about
		 * exit_mmap() and do_munmap() cases described above:
		 * we might be resetting SWAP_MAP_MAX too early here.
		 * We know "Undead"s can happen, they're okay, so don't
		 * report them; but do report if we reset SWAP_MAP_MAX.
		 */
		if (*swap_map == SWAP_MAP_MAX) {
			spin_lock(&swap_lock);
			*swap_map = 1;
			spin_unlock(&swap_lock);
			reset_overflow = 1;
		}

		/*
		 * If a reference remains (rare), we would like to leave
		 * the page in the swap cache; but try_to_unmap could
		 * then re-duplicate the entry once we drop page lock,
		 * so we might loop indefinitely; also, that page could
		 * not be swapped out to other storage meanwhile.  So:
		 * delete from cache even if there's another reference,
		 * after ensuring that the data has been saved to disk -
		 * since if the reference remains (rarer), it will be
		 * read from disk into another page.  Splitting into two
		 * pages would be incorrect if swap supported "shared
		 * private" pages, but they are handled by tmpfs files.
		 *
		 * Note shmem_unuse already deleted a swappage from
		 * the swap cache, unless the move to filepage failed:
		 * in which case it left swappage in cache, lowered its
		 * swap count to pass quickly through the loops above,
		 * and now we must reincrement count to try again later.
		 */
		if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {
			struct writeback_control wbc = {
				.sync_mode = WB_SYNC_NONE,
			};

			swap_writepage(page, &wbc);
			lock_page(page);
			wait_on_page_writeback(page);
		}
		if (PageSwapCache(page)) {
			if (shmem)
				swap_duplicate(entry);
			else
				delete_from_swap_cache(page);
		}

		/*
		 * So we could skip searching mms once swap count went
		 * to 1, we did not mark any present ptes as dirty: must
		 * mark page dirty so shrink_page_list will preserve it.
		 */
		SetPageDirty(page);
		unlock_page(page);
		page_cache_release(page);

		/*
		 * Make sure that we aren't completely killing
		 * interactive performance.
		 */
		cond_resched();
	}
Example #15
0
/* ucore use copy-on-write when forking a new process,
 * thus copy_range only copy pdt/pte and set their permission to 
 * READONLY, a write will be handled in pgfault
 */
int
copy_range(pgd_t *to, pgd_t *from, uintptr_t start, uintptr_t end, bool share) {
    assert(start % PGSIZE == 0 && end % PGSIZE == 0);
    assert(USER_ACCESS(start, end));

    do { 
        pte_t *ptep = get_pte(from, start, 0), *nptep;
        if (ptep == NULL) {
            if (get_pud(from, start, 0) == NULL) {
                start = ROUNDDOWN(start + PUSIZE, PUSIZE);
            }
            else if (get_pmd(from, start, 0) == NULL) {
                start = ROUNDDOWN(start + PMSIZE, PMSIZE);
            }
            else {
                start = ROUNDDOWN(start + PTSIZE, PTSIZE);
            }
            continue ;
        }
        if (*ptep != 0) {
            if ((nptep = get_pte(to, start, 1)) == NULL) {
                return -E_NO_MEM;
            }
            int ret;
            //kprintf("%08x %08x %08x\n", nptep, *nptep, start);
            assert(*ptep != 0 && *nptep == 0);
#ifdef ARCH_ARM
            //TODO  add code to handle swap 
            if (ptep_present(ptep)){ 
              //no body should be able to write this page
              //before a W-pgfault
              pte_perm_t perm = PTE_P;
              if(ptep_u_read(ptep))
                perm |= PTE_U;
              if(!share){
                //Original page should be set to readonly!
                //because Copy-on-write may happen
                //after the current proccess modifies its page
                ptep_set_perm(ptep, perm);
              }else{
                if(ptep_u_write(ptep)){
                  perm |= PTE_W;
                }
              }
              struct Page *page = pte2page(*ptep);
              ret = page_insert(to, page, start, perm);

            }
#else /* ARCH_ARM */
            if (ptep_present(ptep)) {
              pte_perm_t perm = ptep_get_perm(ptep, PTE_USER);
              struct Page *page = pte2page(*ptep);
              if (!share && ptep_s_write(ptep)) {
                ptep_unset_s_write(&perm);
                pte_perm_t perm_with_swap_stat = ptep_get_perm(ptep, PTE_SWAP);
                ptep_set_perm(&perm_with_swap_stat, perm);
                page_insert(from, page, start, perm_with_swap_stat);
                }
                ret = page_insert(to, page, start, perm);
                assert(ret == 0);
            }
#endif /* ARCH_ARM */
            else {
#ifdef CONFIG_NO_SWAP
              assert(0);
#endif
              swap_entry_t entry;
              ptep_copy(&entry, ptep);
              swap_duplicate(entry);
              ptep_copy(nptep, &entry);
            }
        }
        start += PGSIZE;
    } while (start != 0 && start < end);
#ifdef ARCH_ARM
    /* we have modified the PTE of the original
     * process, so invalidate TLB */
    tlb_invalidate_all();
#endif
    return 0;
}
Example #16
0
/*
 * The swap-out functions return 1 if they successfully
 * threw something out, and we got a free page. It returns
 * zero if it couldn't do anything, and any other value
 * indicates it decreased rss, but the page was shared.
 *
 * NOTE! If it sleeps, it *must* return 1 to make sure we
 * don't continue with the swap-out. Otherwise we may be
 * using a process that no longer actually exists (it might
 * have died while we slept).
 */
static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask)
{
	pte_t pte;
	swp_entry_t entry;
	struct page * page;
	int onlist;

	pte = *page_table;
	if (!pte_present(pte))
		goto out_failed;
	page = pte_page(pte);
	if ((!VALID_PAGE(page)) || PageReserved(page))
		goto out_failed;

	if (mm->swap_cnt)
		mm->swap_cnt--;

	onlist = PageActive(page);
	/* Don't look at this pte if it's been accessed recently. */
	if (ptep_test_and_clear_young(page_table)) {
		age_page_up(page);
		goto out_failed;
	}
	if (!onlist)
		/* The page is still mapped, so it can't be freeable... */
		age_page_down_ageonly(page);

	/*
	 * If the page is in active use by us, or if the page
	 * is in active use by others, don't unmap it or
	 * (worse) start unneeded IO.
	 */
	if (page->age > 0)
		goto out_failed;

	if (TryLockPage(page))
		goto out_failed;

	/* From this point on, the odds are that we're going to
	 * nuke this pte, so read and clear the pte.  This hook
	 * is needed on CPUs which update the accessed and dirty
	 * bits in hardware.
	 */
	pte = ptep_get_and_clear(page_table);

	/*
	 * Is the page already in the swap cache? If so, then
	 * we can just drop our reference to it without doing
	 * any IO - it's already up-to-date on disk.
	 *
	 * Return 0, as we didn't actually free any real
	 * memory, and we should just continue our scan.
	 */
	if (PageSwapCache(page)) {
		entry.val = page->index;
		if (pte_dirty(pte))
			set_page_dirty(page);
set_swap_pte:
		swap_duplicate(entry);
		set_pte(page_table, swp_entry_to_pte(entry));
drop_pte:
		UnlockPage(page);
		mm->rss--;
		flush_tlb_page(vma, address);
		deactivate_page(page);
		page_cache_release(page);
out_failed:
		return 0;
	}

	/*
	 * Is it a clean page? Then it must be recoverable
	 * by just paging it in again, and we can just drop
	 * it..
	 *
	 * However, this won't actually free any real
	 * memory, as the page will just be in the page cache
	 * somewhere, and as such we should just continue
	 * our scan.
	 *
	 * Basically, this just makes it possible for us to do
	 * some real work in the future in "refill_inactive()".
	 */
	flush_cache_page(vma, address);
	if (!pte_dirty(pte))
		goto drop_pte;

	/*
	 * Ok, it's really dirty. That means that
	 * we should either create a new swap cache
	 * entry for it, or we should write it back
	 * to its own backing store.
	 */
	if (page->mapping) {
		set_page_dirty(page);
		goto drop_pte;
	}

	/*
	 * This is a dirty, swappable page.  First of all,
	 * get a suitable swap entry for it, and make sure
	 * we have the swap cache set up to associate the
	 * page with that swap entry.
	 */
	entry = get_swap_page();
	if (!entry.val)
		goto out_unlock_restore; /* No swap space left */

	/* Add it to the swap cache and mark it dirty */
	add_to_swap_cache(page, entry);
	set_page_dirty(page);
	goto set_swap_pte;

out_unlock_restore:
	set_pte(page_table, pte);
	UnlockPage(page);
	return 0;
}
Example #17
0
File: vmm.c Project: jefjin/ucore
// do_pgfault - interrupt handler to process the page fault execption
int
do_pgfault(struct mm_struct *mm, uint32_t error_code, uintptr_t addr) {
    if (mm == NULL) {
        assert(current != NULL);
        panic("page fault in kernel thread: pid = %d, %d %08x.\n",
                current->pid, error_code, addr);
    }
    lock_mm(mm);

    int ret = -E_INVAL;
    struct vma_struct *vma = find_vma(mm, addr);
    if (vma == NULL || vma->vm_start > addr) {
        goto failed;
    }
    if (vma->vm_flags & VM_STACK) {
        if (addr < vma->vm_start + PGSIZE) {
            goto failed;
        }
    }

    switch (error_code & 3) {
    default:
            /* default is 3: write, present */
    case 2: /* write, not present */
        if (!(vma->vm_flags & VM_WRITE)) {
            goto failed;
        }
        break;
    case 1: /* read, present */
        goto failed;
    case 0: /* read, not present */
        if (!(vma->vm_flags & (VM_READ | VM_EXEC))) {
            goto failed;
        }
    }

    uint32_t perm = PTE_U;
    if (vma->vm_flags & VM_WRITE) {
        perm |= PTE_W;
    }
    addr = ROUNDDOWN(addr, PGSIZE);

    ret = -E_NO_MEM;
    pte_t *ptep;

    if ((ptep = get_pte(mm->pgdir, addr, 1)) == NULL) {
        goto failed;
    }
    if (*ptep == 0) {
        if (!(vma->vm_flags & VM_SHARE)) {
            if (pgdir_alloc_page(mm->pgdir, addr, perm) == NULL) {
                goto failed;
            }
        }
        else {
            lock_shmem(vma->shmem);
            uintptr_t shmem_addr = addr - vma->vm_start + vma->shmem_off;
            pte_t *sh_ptep = shmem_get_entry(vma->shmem, shmem_addr, 1);
            if (sh_ptep == NULL || *sh_ptep == 0) {
                unlock_shmem(vma->shmem);
                goto failed;
            }
            unlock_shmem(vma->shmem);
            if (*sh_ptep & PTE_P) {
                page_insert(mm->pgdir, pa2page(*sh_ptep), addr, perm);
            }
            else {
                swap_duplicate(*ptep);
                *ptep = *sh_ptep;
            }
        }
    }
    else {
        struct Page *page, *newpage = NULL;
        bool cow = ((vma->vm_flags & (VM_SHARE | VM_WRITE)) == VM_WRITE), may_copy = 1;

        assert(!(*ptep & PTE_P) || ((error_code & 2) && !(*ptep & PTE_W) && cow));
        if (cow) {
            newpage = alloc_page();
        }
        if (*ptep & PTE_P) {
            page = pte2page(*ptep);
        }
        else {
            if ((ret = swap_in_page(*ptep, &page)) != 0) {
                if (newpage != NULL) {
                    free_page(newpage);
                }
                goto failed;
            }
            if (!(error_code & 2) && cow) {
                perm &= ~PTE_W;
                may_copy = 0;
            }
        }

        if (cow && may_copy) {
            if (page_ref(page) + swap_page_count(page) > 1) {
                if (newpage == NULL) {
                    goto failed;
                }
                memcpy(page2kva(newpage), page2kva(page), PGSIZE);
                page = newpage, newpage = NULL;
            }
        }
        page_insert(mm->pgdir, page, addr, perm);
        if (newpage != NULL) {
            free_page(newpage);
        }
    }
    ret = 0;

failed:
    unlock_mm(mm);
    return ret;
}
Example #18
0
// check_swap - check the correctness of swap & page replacement algorithm
static void
check_swap(void) {
    size_t nr_used_pages_store = nr_used_pages();
    size_t slab_allocated_store = slab_allocated();

    size_t offset;
    for (offset = 2; offset < max_swap_offset; offset ++) {
        mem_map[offset] = 1;
    }

    struct mm_struct *mm = mm_create();
    assert(mm != NULL);

    extern struct mm_struct *check_mm_struct;
    assert(check_mm_struct == NULL);

    check_mm_struct = mm;

    pgd_t *pgdir = mm->pgdir = init_pgdir_get();
    assert(pgdir[PGX(TEST_PAGE)] == 0);

    struct vma_struct *vma = vma_create(TEST_PAGE, TEST_PAGE + PTSIZE, VM_WRITE | VM_READ);
    assert(vma != NULL);

    insert_vma_struct(mm, vma);

    struct Page *rp0 = alloc_page(), *rp1 = alloc_page();
    assert(rp0 != NULL && rp1 != NULL);

    pte_perm_t perm;
	ptep_unmap (&perm);
	ptep_set_u_write(&perm);
    int ret = page_insert(pgdir, rp1, TEST_PAGE, perm);
    assert(ret == 0 && page_ref(rp1) == 1);

    page_ref_inc(rp1);
    ret = page_insert(pgdir, rp0, TEST_PAGE, perm);
    assert(ret == 0 && page_ref(rp1) == 1 && page_ref(rp0) == 1);

    // check try_alloc_swap_entry

    swap_entry_t entry = try_alloc_swap_entry();
    assert(swap_offset(entry) == 1);
    mem_map[1] = 1;
    assert(try_alloc_swap_entry() == 0);

    // set rp1, Swap, Active, add to hash_list, active_list

    swap_page_add(rp1, entry);
    swap_active_list_add(rp1);
    assert(PageSwap(rp1));

    mem_map[1] = 0;
    entry = try_alloc_swap_entry();
    assert(swap_offset(entry) == 1);
    assert(!PageSwap(rp1));

    // check swap_remove_entry

    assert(swap_hash_find(entry) == NULL);
    mem_map[1] = 2;
    swap_remove_entry(entry);
    assert(mem_map[1] == 1);

    swap_page_add(rp1, entry);
    swap_inactive_list_add(rp1);
    swap_remove_entry(entry);
    assert(PageSwap(rp1));
    assert(rp1->index == entry && mem_map[1] == 0);

    // check page_launder, move page from inactive_list to active_list

    assert(page_ref(rp1) == 1);
    assert(nr_active_pages == 0 && nr_inactive_pages == 1);
    assert(list_next(&(inactive_list.swap_list)) == &(rp1->swap_link));

    page_launder();
    assert(nr_active_pages == 1 && nr_inactive_pages == 0);
    assert(PageSwap(rp1) && PageActive(rp1));

    entry = try_alloc_swap_entry();
    assert(swap_offset(entry) == 1);
    assert(!PageSwap(rp1) && nr_active_pages == 0);
    assert(list_empty(&(active_list.swap_list)));

    // set rp1 inactive again

    assert(page_ref(rp1) == 1);
    swap_page_add(rp1, 0);
    assert(PageSwap(rp1) && swap_offset(rp1->index) == 1);
    swap_inactive_list_add(rp1);
    mem_map[1] = 1;
    assert(nr_inactive_pages == 1);
    page_ref_dec(rp1);

    size_t count = nr_used_pages();
    swap_remove_entry(entry);
    assert(nr_inactive_pages == 0 && nr_used_pages() == count - 1);

    // check swap_out_mm

    pte_t *ptep0 = get_pte(pgdir, TEST_PAGE, 0), *ptep1;
    assert(ptep0 != NULL && pte2page(*ptep0) == rp0);

    ret = swap_out_mm(mm, 0);
    assert(ret == 0);

    ret = swap_out_mm(mm, 10);
    assert(ret == 1 && mm->swap_address == TEST_PAGE + PGSIZE);

    ret = swap_out_mm(mm, 10);
    assert(ret == 0 && *ptep0 == entry && mem_map[1] == 1);
    assert(PageDirty(rp0) && PageActive(rp0) && page_ref(rp0) == 0);
    assert(nr_active_pages == 1 && list_next(&(active_list.swap_list)) == &(rp0->swap_link));

    // check refill_inactive_scan()

    refill_inactive_scan();
    assert(!PageActive(rp0) && page_ref(rp0) == 0);
    assert(nr_inactive_pages == 1 && list_next(&(inactive_list.swap_list)) == &(rp0->swap_link));

    page_ref_inc(rp0);
    page_launder();
    assert(PageActive(rp0) && page_ref(rp0) == 1);
    assert(nr_active_pages == 1 && list_next(&(active_list.swap_list)) == &(rp0->swap_link));

    page_ref_dec(rp0);
    refill_inactive_scan();
    assert(!PageActive(rp0));

    // save data in rp0

    int i;
    for (i = 0; i < PGSIZE; i ++) {
        ((char *)page2kva(rp0))[i] = (char)i;
    }

    page_launder();
    assert(nr_inactive_pages == 0 && list_empty(&(inactive_list.swap_list)));
    assert(mem_map[1] == 1);

    rp1 = alloc_page();
    assert(rp1 != NULL);
    ret = swapfs_read(entry, rp1);
    assert(ret == 0);

    for (i = 0; i < PGSIZE; i ++) {
        assert(((char *)page2kva(rp1))[i] == (char)i);
    }

    // page fault now

    *(char *)(TEST_PAGE) = 0xEF;

    rp0 = pte2page(*ptep0);
    assert(page_ref(rp0) == 1);
    assert(PageSwap(rp0) && PageActive(rp0));

    entry = try_alloc_swap_entry();
    assert(swap_offset(entry) == 1 && mem_map[1] == SWAP_UNUSED);
    assert(!PageSwap(rp0) && nr_active_pages == 0 && nr_inactive_pages == 0);

    // clear accessed flag

    assert(rp0 == pte2page(*ptep0));
    assert(!PageSwap(rp0));

    ret = swap_out_mm(mm, 10);
    assert(ret == 0);
    assert(!PageSwap(rp0) && ptep_present(ptep0));

    // change page table

    ret = swap_out_mm(mm, 10);
    assert(ret == 1);
    assert(*ptep0 == entry && page_ref(rp0) == 0 && mem_map[1] == 1);

    count = nr_used_pages();
    refill_inactive_scan();
    page_launder();
    assert(count - 1 == nr_used_pages());

    ret = swapfs_read(entry, rp1);
    assert(ret == 0 && *(char *)(page2kva(rp1)) == (char)0xEF);
    free_page(rp1);

    // duplictate *ptep0

    ptep1 = get_pte(pgdir, TEST_PAGE + PGSIZE, 0);
    assert(ptep1 != NULL && ptep_invalid(ptep1));
    swap_duplicate(*ptep0);
	ptep_copy(ptep1, ptep0);
	mp_tlb_invalidate (pgdir, TEST_PAGE + PGSIZE);

    // page fault again
    // update for copy on write

    *(char *)(TEST_PAGE + 1) = 0x88;
    *(char *)(TEST_PAGE + PGSIZE) = 0x8F;
    *(char *)(TEST_PAGE + PGSIZE + 1) = 0xFF;
    assert(pte2page(*ptep0) != pte2page(*ptep1));
    assert(*(char *)(TEST_PAGE) == (char)0xEF);
    assert(*(char *)(TEST_PAGE + 1) == (char)0x88);
    assert(*(char *)(TEST_PAGE + PGSIZE) == (char)0x8F);
    assert(*(char *)(TEST_PAGE + PGSIZE + 1) == (char)0xFF);

    rp0 = pte2page(*ptep0);
    rp1 = pte2page(*ptep1);
    assert(!PageSwap(rp0) && PageSwap(rp1) && PageActive(rp1));

    entry = try_alloc_swap_entry();
    assert(!PageSwap(rp0) && !PageSwap(rp1));
    assert(swap_offset(entry) == 1 && mem_map[1] == SWAP_UNUSED);
    assert(list_empty(&(active_list.swap_list)));
    assert(list_empty(&(inactive_list.swap_list)));

	ptep_set_accessed(&perm);
    page_insert(pgdir, rp0, TEST_PAGE + PGSIZE, perm);

    // check swap_out_mm

    *(char *)(TEST_PAGE) = *(char *)(TEST_PAGE + PGSIZE) = 0xEE;
    mm->swap_address = TEST_PAGE + PGSIZE * 2;
    ret = swap_out_mm(mm, 2);
    assert(ret == 0);
    assert(ptep_present(ptep0) && ! ptep_accessed(ptep0));
    assert(ptep_present(ptep1) && ! ptep_accessed(ptep1));

    ret = swap_out_mm(mm, 2);
    assert(ret == 2);
    assert(mem_map[1] == 2 && page_ref(rp0) == 0);

    refill_inactive_scan();
    page_launder();
    assert(mem_map[1] == 2 && swap_hash_find(entry) == NULL);

    // check copy entry

    swap_remove_entry(entry);
	ptep_unmap(ptep1);
    assert(mem_map[1] == 1);

    swap_entry_t store;
    ret = swap_copy_entry(entry, &store);
    assert(ret == -E_NO_MEM);
    mem_map[2] = SWAP_UNUSED;

    ret = swap_copy_entry(entry, &store);
    assert(ret == 0 && swap_offset(store) == 2 && mem_map[2] == 0);
    mem_map[2] = 1;
	ptep_copy(ptep1, &store);

    assert(*(char *)(TEST_PAGE + PGSIZE) == (char)0xEE && *(char *)(TEST_PAGE + PGSIZE + 1)== (char)0x88);

    *(char *)(TEST_PAGE + PGSIZE) = 1, *(char *)(TEST_PAGE + PGSIZE + 1) = 2;
    assert(*(char *)TEST_PAGE == (char)0xEE && *(char *)(TEST_PAGE + 1) == (char)0x88);

    ret = swap_in_page(entry, &rp0);
    assert(ret == 0);
    ret = swap_in_page(store, &rp1);
    assert(ret == 0);
    assert(rp1 != rp0);

    // free memory

    swap_list_del(rp0), swap_list_del(rp1);
    swap_page_del(rp0), swap_page_del(rp1);

    assert(page_ref(rp0) == 1 && page_ref(rp1) == 1);
    assert(nr_active_pages == 0 && list_empty(&(active_list.swap_list)));
    assert(nr_inactive_pages == 0 && list_empty(&(inactive_list.swap_list)));

    for (i = 0; i < HASH_LIST_SIZE; i ++) {
        assert(list_empty(hash_list + i));
    }

    page_remove(pgdir, TEST_PAGE);
    page_remove(pgdir, (TEST_PAGE + PGSIZE));

#if PMXSHIFT != PUXSHIFT
    free_page(pa2page(PMD_ADDR(*get_pmd(pgdir, TEST_PAGE, 0))));
#endif
#if PUXSHIFT != PGXSHIFT
    free_page(pa2page(PUD_ADDR(*get_pud(pgdir, TEST_PAGE, 0))));
#endif
    free_page(pa2page(PGD_ADDR(*get_pgd(pgdir, TEST_PAGE, 0))));
    pgdir[PGX(TEST_PAGE)] = 0;

    mm->pgdir = NULL;
    mm_destroy(mm);
    check_mm_struct = NULL;

    assert(nr_active_pages == 0 && nr_inactive_pages == 0);
    for (offset = 0; offset < max_swap_offset; offset ++) {
        mem_map[offset] = SWAP_UNUSED;
    }

    assert(nr_used_pages_store == nr_used_pages());
    assert(slab_allocated_store == slab_allocated());

    kprintf("check_swap() succeeded.\n");
}
Example #19
0
File: swap.c Project: jefjin/ucore
// check_swap - check the correctness of swap & page replacement algorithm
static void
check_swap(void) {
    size_t nr_free_pages_store = nr_free_pages();
    size_t slab_allocated_store = slab_allocated();

    size_t offset;
    for (offset = 2; offset < max_swap_offset; offset ++) {
        mem_map[offset] = 1;
    }

    struct mm_struct *mm = mm_create();
    assert(mm != NULL);

    extern struct mm_struct *check_mm_struct;
    assert(check_mm_struct == NULL);

    check_mm_struct = mm;

    pde_t *pgdir = mm->pgdir = boot_pgdir;
    assert(pgdir[0] == 0);

    struct vma_struct *vma = vma_create(0, PTSIZE, VM_WRITE | VM_READ);
    assert(vma != NULL);

    insert_vma_struct(mm, vma);

    struct Page *rp0 = alloc_page(), *rp1 = alloc_page();
    assert(rp0 != NULL && rp1 != NULL);

    uint32_t perm = PTE_U | PTE_W;
    int ret = page_insert(pgdir, rp1, 0, perm);
    assert(ret == 0 && page_ref(rp1) == 1);

    page_ref_inc(rp1);
    ret = page_insert(pgdir, rp0, 0, perm);
    assert(ret == 0 && page_ref(rp1) == 1 && page_ref(rp0) == 1);

    // check try_alloc_swap_entry

    swap_entry_t entry = try_alloc_swap_entry();
    assert(swap_offset(entry) == 1);
    mem_map[1] = 1;
    assert(try_alloc_swap_entry() == 0);

    // set rp1, Swap, Active, add to hash_list, active_list

    swap_page_add(rp1, entry);
    swap_active_list_add(rp1);
    assert(PageSwap(rp1));

    mem_map[1] = 0;
    entry = try_alloc_swap_entry();
    assert(swap_offset(entry) == 1);
    assert(!PageSwap(rp1));

    // check swap_remove_entry

    assert(swap_hash_find(entry) == NULL);
    mem_map[1] = 2;
    swap_remove_entry(entry);
    assert(mem_map[1] == 1);

    swap_page_add(rp1, entry);
    swap_inactive_list_add(rp1);
    swap_remove_entry(entry);
    assert(PageSwap(rp1));
    assert(rp1->index == entry && mem_map[1] == 0);

    // check page_launder, move page from inactive_list to active_list

    assert(page_ref(rp1) == 1);
    assert(nr_active_pages == 0 && nr_inactive_pages == 1);
    assert(list_next(&(inactive_list.swap_list)) == &(rp1->swap_link));

    page_launder();
    assert(nr_active_pages == 1 && nr_inactive_pages == 0);
    assert(PageSwap(rp1) && PageActive(rp1));

    entry = try_alloc_swap_entry();
    assert(swap_offset(entry) == 1);
    assert(!PageSwap(rp1) && nr_active_pages == 0);
    assert(list_empty(&(active_list.swap_list)));

    // set rp1 inactive again

    assert(page_ref(rp1) == 1);
    swap_page_add(rp1, 0);
    assert(PageSwap(rp1) && swap_offset(rp1->index) == 1);
    swap_inactive_list_add(rp1);
    mem_map[1] = 1;
    assert(nr_inactive_pages == 1);
    page_ref_dec(rp1);

    size_t count = nr_free_pages();
    swap_remove_entry(entry);
    assert(nr_inactive_pages == 0 && nr_free_pages() == count + 1);

    // check swap_out_mm

    pte_t *ptep0 = get_pte(pgdir, 0, 0), *ptep1;
    assert(ptep0 != NULL && pte2page(*ptep0) == rp0);

    ret = swap_out_mm(mm, 0);
    assert(ret == 0);

    ret = swap_out_mm(mm, 10);
    assert(ret == 1 && mm->swap_address == PGSIZE);

    ret = swap_out_mm(mm, 10);
    assert(ret == 0 && *ptep0 == entry && mem_map[1] == 1);
    assert(PageDirty(rp0) && PageActive(rp0) && page_ref(rp0) == 0);
    assert(nr_active_pages == 1 && list_next(&(active_list.swap_list)) == &(rp0->swap_link));

    // check refill_inactive_scan()

    refill_inactive_scan();
    assert(!PageActive(rp0) && page_ref(rp0) == 0);
    assert(nr_inactive_pages == 1 && list_next(&(inactive_list.swap_list)) == &(rp0->swap_link));

    page_ref_inc(rp0);
    page_launder();
    assert(PageActive(rp0) && page_ref(rp0) == 1);
    assert(nr_active_pages == 1 && list_next(&(active_list.swap_list)) == &(rp0->swap_link));

    page_ref_dec(rp0);
    refill_inactive_scan();
    assert(!PageActive(rp0));

    // save data in rp0

    int i;
    for (i = 0; i < PGSIZE; i ++) {
        ((char *)page2kva(rp0))[i] = (char)i;
    }

    page_launder();
    assert(nr_inactive_pages == 0 && list_empty(&(inactive_list.swap_list)));
    assert(mem_map[1] == 1);

    rp1 = alloc_page();
    assert(rp1 != NULL);
    ret = swapfs_read(entry, rp1);
    assert(ret == 0);

    for (i = 0; i < PGSIZE; i ++) {
        assert(((char *)page2kva(rp1))[i] == (char)i);
    }

    // page fault now

    *(char *)0 = 0xEF;

    rp0 = pte2page(*ptep0);
    assert(page_ref(rp0) == 1);
    assert(PageSwap(rp0) && PageActive(rp0));

    entry = try_alloc_swap_entry();
    assert(swap_offset(entry) == 1 && mem_map[1] == SWAP_UNUSED);
    assert(!PageSwap(rp0) && nr_active_pages == 0 && nr_inactive_pages == 0);

    // clear accessed flag

    assert(rp0 == pte2page(*ptep0));
    assert(!PageSwap(rp0));

    ret = swap_out_mm(mm, 10);
    assert(ret == 0);
    assert(!PageSwap(rp0) && (*ptep0 & PTE_P));

    // change page table

    ret = swap_out_mm(mm, 10);
    assert(ret == 1);
    assert(*ptep0 == entry && page_ref(rp0) == 0 && mem_map[1] == 1);

    count = nr_free_pages();
    refill_inactive_scan();
    page_launder();
    assert(count + 1 == nr_free_pages());

    ret = swapfs_read(entry, rp1);
    assert(ret == 0 && *(char *)(page2kva(rp1)) == (char)0xEF);
    free_page(rp1);

    // duplictate *ptep0

    ptep1 = get_pte(pgdir, PGSIZE, 0);
    assert(ptep1 != NULL && *ptep1 == 0);
    swap_duplicate(*ptep0);
    *ptep1 = *ptep0;

    // page fault again

    *(char *)0 = 0xFF;
    *(char *)(PGSIZE + 1) = 0x88;
    assert(pte2page(*ptep0) == pte2page(*ptep1));
    rp0 = pte2page(*ptep0);
    assert(*(char *)1 == (char)0x88 && *(char *)PGSIZE == (char)0xFF);

    assert(page_ref(rp0) == 2 && rp0->index == entry && mem_map[1] == 0);

    assert(PageSwap(rp0) && PageActive(rp0));
    entry = try_alloc_swap_entry();
    assert(swap_offset(entry) == 1 && mem_map[1] == SWAP_UNUSED);
    assert(!PageSwap(rp0));
    assert(list_empty(&(active_list.swap_list)));
    assert(list_empty(&(inactive_list.swap_list)));

    // check swap_out_mm

    *(char *)0 = *(char *)PGSIZE = 0xEE;
    mm->swap_address = PGSIZE * 2;
    ret = swap_out_mm(mm, 2);
    assert(ret == 0);
    assert((*ptep0 & PTE_P) && !(*ptep0 & PTE_A));
    assert((*ptep1 & PTE_P) && !(*ptep1 & PTE_A));

    ret = swap_out_mm(mm, 2);
    assert(ret == 2);
    assert(mem_map[1] == 2 && page_ref(rp0) == 0);

    refill_inactive_scan();
    page_launder();
    assert(mem_map[1] == 2 && swap_hash_find(entry) == NULL);

    // check copy entry

    swap_remove_entry(entry);
    *ptep1 = 0;
    assert(mem_map[1] == 1);

    swap_entry_t store;
    ret = swap_copy_entry(entry, &store);
    assert(ret == -E_NO_MEM);
    mem_map[2] = SWAP_UNUSED;

    ret = swap_copy_entry(entry, &store);
    assert(ret == 0 && swap_offset(store) == 2 && mem_map[2] == 0);
    mem_map[2] = 1;
    *ptep1 = store;

    assert(*(char *)PGSIZE == (char)0xEE && *(char *)(PGSIZE + 1)== (char)0x88);

    *(char *)PGSIZE = 1, *(char *)(PGSIZE + 1) = 2;
    assert(*(char *)0 == (char)0xEE && *(char *)1 == (char)0x88);

    ret = swap_in_page(entry, &rp0);
    assert(ret == 0);
    ret = swap_in_page(store, &rp1);
    assert(ret == 0);
    assert(rp1 != rp0);

    // free memory

    swap_list_del(rp0), swap_list_del(rp1);
    swap_page_del(rp0), swap_page_del(rp1);

    assert(page_ref(rp0) == 1 && page_ref(rp1) == 1);
    assert(nr_active_pages == 0 && list_empty(&(active_list.swap_list)));
    assert(nr_inactive_pages == 0 && list_empty(&(inactive_list.swap_list)));

    for (i = 0; i < HASH_LIST_SIZE; i ++) {
        assert(list_empty(hash_list + i));
    }

    page_remove(pgdir, 0);
    page_remove(pgdir, PGSIZE);

    free_page(pa2page(pgdir[0]));
    pgdir[0] = 0;

    mm->pgdir = NULL;
    mm_destroy(mm);
    check_mm_struct = NULL;

    assert(nr_active_pages == 0 && nr_inactive_pages == 0);
    for (offset = 0; offset < max_swap_offset; offset ++) {
        mem_map[offset] = SWAP_UNUSED;
    }

    assert(nr_free_pages_store == nr_free_pages());
    assert(slab_allocated_store == slab_allocated());

    cprintf("check_swap() succeeded.\n");
}
Example #20
0
/*
 * copy one vm_area from one task to the other. Assumes the page tables
 * already present in the new task to be cleared in the whole range
 * covered by this vma.
 *
 * 08Jan98 Merged into one routine from several inline routines to reduce
 *         variable count and make things faster. -jj
 */
int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
			struct vm_area_struct *vma)
{
	pgd_t * src_pgd, * dst_pgd;
	unsigned long address = vma->vm_start;
	unsigned long end = vma->vm_end;
	unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;

	src_pgd = pgd_offset(src, address)-1;
	dst_pgd = pgd_offset(dst, address)-1;
	
	for (;;) {
		pmd_t * src_pmd, * dst_pmd;

		src_pgd++; dst_pgd++;
		
		/* copy_pmd_range */
		
		if (pgd_none(*src_pgd))
			goto skip_copy_pmd_range;
		if (pgd_bad(*src_pgd)) {
			pgd_ERROR(*src_pgd);
			pgd_clear(src_pgd);
skip_copy_pmd_range:	address = (address + PGDIR_SIZE) & PGDIR_MASK;
			if (!address || (address >= end))
				goto out;
			continue;
		}
		if (pgd_none(*dst_pgd)) {
			if (!pmd_alloc(dst_pgd, 0))
				goto nomem;
		}
		
		src_pmd = pmd_offset(src_pgd, address);
		dst_pmd = pmd_offset(dst_pgd, address);

		do {
			pte_t * src_pte, * dst_pte;
		
			/* copy_pte_range */
		
			if (pmd_none(*src_pmd))
				goto skip_copy_pte_range;
			if (pmd_bad(*src_pmd)) {
				pmd_ERROR(*src_pmd);
				pmd_clear(src_pmd);
skip_copy_pte_range:		address = (address + PMD_SIZE) & PMD_MASK;
				if (address >= end)
					goto out;
				goto cont_copy_pmd_range;
			}
			if (pmd_none(*dst_pmd)) {
				if (!pte_alloc(dst_pmd, 0))
					goto nomem;
			}
			
			src_pte = pte_offset(src_pmd, address);
			dst_pte = pte_offset(dst_pmd, address);
			
			do {
				pte_t pte = *src_pte;
				struct page *ptepage;
				
				/* copy_one_pte */

				if (pte_none(pte))
					goto cont_copy_pte_range_noset;
				if (!pte_present(pte)) {
					swap_duplicate(pte_to_swp_entry(pte));
					goto cont_copy_pte_range;
				}
				ptepage = pte_page(pte);
				if ((!VALID_PAGE(ptepage)) || 
				    PageReserved(ptepage))
					goto cont_copy_pte_range;

				/* If it's a COW mapping, write protect it both in the parent and the child */
				if (cow) {
					ptep_set_wrprotect(src_pte);
					pte = *src_pte;
				}

				/* If it's a shared mapping, mark it clean in the child */
				if (vma->vm_flags & VM_SHARED)
					pte = pte_mkclean(pte);
				pte = pte_mkold(pte);
				get_page(ptepage);

cont_copy_pte_range:		set_pte(dst_pte, pte);
cont_copy_pte_range_noset:	address += PAGE_SIZE;
				if (address >= end)
					goto out;
				src_pte++;
				dst_pte++;
			} while ((unsigned long)src_pte & PTE_TABLE_MASK);
		
cont_copy_pmd_range:	src_pmd++;
			dst_pmd++;
		} while ((unsigned long)src_pmd & PMD_TABLE_MASK);
	}
out:
	return 0;

nomem:
	return -ENOMEM;
}
Example #21
0
/* mm->page_table_lock is held. mmap_sem is not held */
static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page, zone_t * classzone)
{
	pte_t pte;
	swp_entry_t entry;

	/* Don't look at this pte if it's been accessed recently. */
	if ((vma->vm_flags & VM_LOCKED) || ptep_test_and_clear_young(page_table)) {
		mark_page_accessed(page);
		return 0;
	}

	/* Don't bother unmapping pages that are active */
	if (PageActive(page))
		return 0;

	/* Don't bother replenishing zones not under pressure.. */
	if (!memclass(page->zone, classzone))
		return 0;

	if (TryLockPage(page))
		return 0;

	/* From this point on, the odds are that we're going to
	 * nuke this pte, so read and clear the pte.  This hook
	 * is needed on CPUs which update the accessed and dirty
	 * bits in hardware.
	 */
	flush_cache_page(vma, address);
	pte = ptep_get_and_clear(page_table);
	flush_tlb_page(vma, address);

	if (pte_dirty(pte))
		set_page_dirty(page);

	/*
	 * Is the page already in the swap cache? If so, then
	 * we can just drop our reference to it without doing
	 * any IO - it's already up-to-date on disk.
	 */
	if (PageSwapCache(page)) {
		entry.val = page->index;
		swap_duplicate(entry);
set_swap_pte:
		set_pte(page_table, swp_entry_to_pte(entry));
drop_pte:
		mm->rss--;
		UnlockPage(page);
		{
			int freeable = page_count(page) - !!page->buffers <= 2;
			page_cache_release(page);
			return freeable;
		}
	}

	/*
	 * Is it a clean page? Then it must be recoverable
	 * by just paging it in again, and we can just drop
	 * it..  or if it's dirty but has backing store,
	 * just mark the page dirty and drop it.
	 *
	 * However, this won't actually free any real
	 * memory, as the page will just be in the page cache
	 * somewhere, and as such we should just continue
	 * our scan.
	 *
	 * Basically, this just makes it possible for us to do
	 * some real work in the future in "refill_inactive()".
	 */
	if (page->mapping)
		goto drop_pte;
	if (!PageDirty(page))
		goto drop_pte;

	/*
	 * Anonymous buffercache pages can be left behind by
	 * concurrent truncate and pagefault.
	 */
	if (page->buffers)
		goto preserve;

	/*
	 * This is a dirty, swappable page.  First of all,
	 * get a suitable swap entry for it, and make sure
	 * we have the swap cache set up to associate the
	 * page with that swap entry.
	 */
	for (;;) {
		entry = get_swap_page();
		if (!entry.val)
			break;
		/* Add it to the swap cache and mark it dirty
		 * (adding to the page cache will clear the dirty
		 * and uptodate bits, so we need to do it again)
		 */
		if (add_to_swap_cache(page, entry) == 0) {
			SetPageUptodate(page);
			set_page_dirty(page);
			goto set_swap_pte;
		}
		/* Raced with "speculative" read_swap_cache_async */
		swap_free(entry);
	}

	/* No swap space left */
preserve:
	set_pte(page_table, pte);
	UnlockPage(page);
	return 0;
}
Example #22
0
/*
 * We completely avoid races by reading each swap page in advance,
 * and then search for the process using it.  All the necessary
 * page table adjustments can then be made atomically.
 */
static int try_to_unuse(unsigned int type)
{
	struct swap_info_struct * si = &swap_info[type];
	struct mm_struct *start_mm;
	unsigned short *swap_map;
	unsigned short swcount;
	struct page *page;
	swp_entry_t entry;
	int i = 0;
	int retval = 0;
	int reset_overflow = 0;
	int shmem;

	/*
	 * When searching mms for an entry, a good strategy is to
	 * start at the first mm we freed the previous entry from
	 * (though actually we don't notice whether we or coincidence
	 * freed the entry).  Initialize this start_mm with a hold.
	 *
	 * A simpler strategy would be to start at the last mm we
	 * freed the previous entry from; but that would take less
	 * advantage of mmlist ordering (now preserved by swap_out()),
	 * which clusters forked address spaces together, most recent
	 * child immediately after parent.  If we race with dup_mmap(),
	 * we very much want to resolve parent before child, otherwise
	 * we may miss some entries: using last mm would invert that.
	 */
	start_mm = &init_mm;
	atomic_inc(&init_mm.mm_users);

	/*
	 * Keep on scanning until all entries have gone.  Usually,
	 * one pass through swap_map is enough, but not necessarily:
	 * mmput() removes mm from mmlist before exit_mmap() and its
	 * zap_page_range().  That's not too bad, those entries are
	 * on their way out, and handled faster there than here.
	 * do_munmap() behaves similarly, taking the range out of mm's
	 * vma list before zap_page_range().  But unfortunately, when
	 * unmapping a part of a vma, it takes the whole out first,
	 * then reinserts what's left after (might even reschedule if
	 * open() method called) - so swap entries may be invisible
	 * to swapoff for a while, then reappear - but that is rare.
	 */
	while ((i = find_next_to_unuse(si, i))) {
		/* 
		 * Get a page for the entry, using the existing swap
		 * cache page if there is one.  Otherwise, get a clean
		 * page and read the swap into it. 
		 */
		swap_map = &si->swap_map[i];
		entry = SWP_ENTRY(type, i);
		page = read_swap_cache_async(entry);
		if (!page) {
			/*
			 * Either swap_duplicate() failed because entry
			 * has been freed independently, and will not be
			 * reused since sys_swapoff() already disabled
			 * allocation from here, or alloc_page() failed.
			 */
			if (!*swap_map)
				continue;
			retval = -ENOMEM;
			break;
		}

		/*
		 * Don't hold on to start_mm if it looks like exiting.
		 */
		if (atomic_read(&start_mm->mm_users) == 1) {
			mmput(start_mm);
			start_mm = &init_mm;
			atomic_inc(&init_mm.mm_users);
		}

		/*
		 * Wait for and lock page.  When do_swap_page races with
		 * try_to_unuse, do_swap_page can handle the fault much
		 * faster than try_to_unuse can locate the entry.  This
		 * apparently redundant "wait_on_page" lets try_to_unuse
		 * defer to do_swap_page in such a case - in some tests,
		 * do_swap_page and try_to_unuse repeatedly compete.
		 */
		wait_on_page(page);
		lock_page(page);

		/*
		 * Remove all references to entry, without blocking.
		 * Whenever we reach init_mm, there's no address space
		 * to search, but use it as a reminder to search shmem.
		 */
		shmem = 0;
		swcount = *swap_map;
		if (swcount > 1) {
			flush_page_to_ram(page);
			if (start_mm == &init_mm)
				shmem = shmem_unuse(entry, page);
			else
				unuse_process(start_mm, entry, page);
		}
		if (*swap_map > 1) {
			int set_start_mm = (*swap_map >= swcount);
			struct list_head *p = &start_mm->mmlist;
			struct mm_struct *new_start_mm = start_mm;
			struct mm_struct *mm;

			spin_lock(&mmlist_lock);
			while (*swap_map > 1 &&
					(p = p->next) != &start_mm->mmlist) {
				mm = list_entry(p, struct mm_struct, mmlist);
				swcount = *swap_map;
				if (mm == &init_mm) {
					set_start_mm = 1;
					spin_unlock(&mmlist_lock);
					shmem = shmem_unuse(entry, page);
					spin_lock(&mmlist_lock);
				} else
					unuse_process(mm, entry, page);
				if (set_start_mm && *swap_map < swcount) {
					new_start_mm = mm;
					set_start_mm = 0;
				}
			}
			atomic_inc(&new_start_mm->mm_users);
			spin_unlock(&mmlist_lock);
			mmput(start_mm);
			start_mm = new_start_mm;
		}

		/*
		 * How could swap count reach 0x7fff when the maximum
		 * pid is 0x7fff, and there's no way to repeat a swap
		 * page within an mm (except in shmem, where it's the
		 * shared object which takes the reference count)?
		 * We believe SWAP_MAP_MAX cannot occur in Linux 2.4.
		 *
		 * If that's wrong, then we should worry more about
		 * exit_mmap() and do_munmap() cases described above:
		 * we might be resetting SWAP_MAP_MAX too early here.
		 * We know "Undead"s can happen, they're okay, so don't
		 * report them; but do report if we reset SWAP_MAP_MAX.
		 */
		if (*swap_map == SWAP_MAP_MAX) {
			swap_list_lock();
			swap_device_lock(si);
			nr_swap_pages++;
			*swap_map = 1;
			swap_device_unlock(si);
			swap_list_unlock();
			reset_overflow = 1;
		}

		/*
		 * If a reference remains (rare), we would like to leave
		 * the page in the swap cache; but try_to_swap_out could
		 * then re-duplicate the entry once we drop page lock,
		 * so we might loop indefinitely; also, that page could
		 * not be swapped out to other storage meanwhile.  So:
		 * delete from cache even if there's another reference,
		 * after ensuring that the data has been saved to disk -
		 * since if the reference remains (rarer), it will be
		 * read from disk into another page.  Splitting into two
		 * pages would be incorrect if swap supported "shared
		 * private" pages, but they are handled by tmpfs files.
		 *
		 * Note shmem_unuse already deleted swappage from cache,
		 * unless corresponding filepage found already in cache:
		 * in which case it left swappage in cache, lowered its
		 * swap count to pass quickly through the loops above,
		 * and now we must reincrement count to try again later.
		 */
		if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {
			rw_swap_page(WRITE, page);
			lock_page(page);
		}
		if (PageSwapCache(page)) {
			if (shmem)
				swap_duplicate(entry);
			else
				delete_from_swap_cache(page);
		}

		/*
		 * So we could skip searching mms once swap count went
		 * to 1, we did not mark any present ptes as dirty: must
		 * mark page dirty so try_to_swap_out will preserve it.
		 */
		SetPageDirty(page);
		UnlockPage(page);
		page_cache_release(page);

		/*
		 * Make sure that we aren't completely killing
		 * interactive performance.  Interruptible check on
		 * signal_pending() would be nice, but changes the spec?
		 */
		if (current->need_resched)
			schedule();
	}