Beispiel #1
0
static void mmu_spte_walk(struct vmmr0_vcpu *vcpu, inspect_spte_fn fn)
{
	int i;
	struct vmmr0_mmu_page *sp;

	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
		return;

	if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
		hpa_t root = vcpu->arch.mmu.root_hpa;

		sp = page_header(root);
		__mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_LEVEL);
		return;
	}

	for (i = 0; i < 4; ++i) {
		hpa_t root = vcpu->arch.mmu.pae_root[i];

		if (root && VALID_PAGE(root)) {
			root &= PT64_BASE_ADDR_MASK;
			sp = page_header(root);
			__mmu_spte_walk(vcpu, sp, fn, 2);
		}
	}

	return;
}
Beispiel #2
0
static struct page *lookup_page_table(const struct mm_struct *mm,unsigned int address,int clear) {
    pgd_t *pgd;
    pmd_t *pmd;
    pte_t *pte;
    struct page *page = NOPAGE_SIGBUS;//default no PAGE
    pgd = pgd_offset(mm,address);
    if(! pgd_none(*pgd) ) {
        /*Go  for a PMD lookup */
        pmd = pmd_offset(pgd,address);
        if( ! pmd_none(*pmd) ) {
            pte = pte_offset(pmd,address); //get the Pte entry 
            if(pte_present(*pte) ) {
                page = pte_page(*pte); //get the page from the entry
                if(clear && VALID_PAGE(page) && (!PageReserved(page) ) ) {
                    pte_t x = ptep_get_and_clear(pte); //clear the pte
                    (void)x;
#ifdef DEBUG_NOT_NOW
                    printk(KERN_ALERT "Non Contiguous Pages getting cleared off the List(%lx):\n",PAGE_VIRTUAL(page)); 
#endif
                    __free_page(page); //free of the page
                    page = (struct page *) 1;
                }
            }
        }
    }
    return page;
}
Beispiel #3
0
/*
 * This routine handles present pages, when users try to write
 * to a shared page. It is done by copying the page to a new address
 * and decrementing the shared-page counter for the old page.
 *
 * Goto-purists beware: the only reason for goto's here is that it results
 * in better assembly code.. The "default" path will see no jumps at all.
 *
 * Note that this routine assumes that the protection checks have been
 * done by the caller (the low-level page fault routine in most cases).
 * Thus we can safely just mark it writable once we've done any necessary
 * COW.
 *
 * We also mark the page dirty at this point even though the page will
 * change only once the write actually happens. This avoids a few races,
 * and potentially makes it more efficient.
 *
 * We hold the mm semaphore and the page_table_lock on entry and exit
 * with the page_table_lock released.
 */
static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
	unsigned long address, pte_t *page_table, pte_t pte)
{
	struct page *old_page, *new_page;

	old_page = pte_page(pte);
	if (!VALID_PAGE(old_page))
		goto bad_wp_page;

	if (!TryLockPage(old_page)) {
		int reuse = can_share_swap_page(old_page);
		unlock_page(old_page);
		if (reuse) {
#ifndef CONFIG_SUPERH
			/* Not needed for VIPT cache */
			flush_cache_page(vma, address);
#endif
			establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
			spin_unlock(&mm->page_table_lock);
			return 1;	/* Minor fault */
		}
	}

	/*
	 * Ok, we need to copy. Oh, well..
	 */
	page_cache_get(old_page);
	spin_unlock(&mm->page_table_lock);

	new_page = alloc_page(GFP_HIGHUSER);
	if (!new_page)
		goto no_mem;
	copy_cow_page(old_page,new_page,address);

	/*
	 * Re-check the pte - we dropped the lock
	 */
	spin_lock(&mm->page_table_lock);
	if (pte_same(*page_table, pte)) {
		if (PageReserved(old_page))
			++mm->rss;
		break_cow(vma, new_page, address, page_table);
		lru_cache_add(new_page);

		/* Free the old page.. */
		new_page = old_page;
	}
	spin_unlock(&mm->page_table_lock);
	page_cache_release(new_page);
	page_cache_release(old_page);
	return 1;	/* Minor fault */

bad_wp_page:
	spin_unlock(&mm->page_table_lock);
	printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page);
	return -1;
no_mem:
	page_cache_release(old_page);
	return -1;
}
Beispiel #4
0
/*
 * Called by TLB shootdown 
 */
void __free_pte(pte_t pte)
{
	struct page *page = pte_page(pte);
	if ((!VALID_PAGE(page)) || PageReserved(page))
		return;
	if (pte_dirty(pte))
		set_page_dirty(page);		
	free_page_and_swap_cache(page);
}
Beispiel #5
0
void
update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
{
	struct page *page = pte_page(pte);

	if (VALID_PAGE(page) && page->mapping &&
	    test_bit(PG_dcache_dirty, &page->flags)) {

		flush_kernel_dcache_page(page_address(page));
		clear_bit(PG_dcache_dirty, &page->flags);
	}
}
Beispiel #6
0
static inline void forget_pte(pte_t page)
{
	if (pte_none(page))
		return;
	if (pte_present(page)) {
		struct page *ptpage = pte_page(page);
		if ((!VALID_PAGE(ptpage)) || PageReserved(ptpage))
			return;
		page_cache_release(ptpage);
		return;
	}
	swap_free(pte_to_swp_entry(page));
}
Beispiel #7
0
static inline void free_pte(pte_t page)
{
	if (pte_present(page)) {
		struct page *ptpage = pte_page(page);
		if ((!VALID_PAGE(ptpage)) || PageReserved(ptpage))
			return;
		__free_page(ptpage);
		if (current->mm->rss <= 0)
			return;
		current->mm->rss--;
		return;
	}
	swap_free(pte_to_swp_entry(page));
}
Beispiel #8
0
/* mm->page_table_lock is held. mmap_sem is not held */
static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count, zone_t * classzone)
{
	pte_t * pte;
	unsigned long pmd_end;

	DEFINE_LOCK_COUNT();

	if (pmd_none(*dir))
		return count;
	if (pmd_bad(*dir)) {
		pmd_ERROR(*dir);
		pmd_clear(dir);
		return count;
	}
	
	pte = pte_offset(dir, address);
	
	pmd_end = (address + PMD_SIZE) & PMD_MASK;
	if (end > pmd_end)
		end = pmd_end;

	do {
		if (pte_present(*pte)) {
			struct page *page = pte_page(*pte);

			if (VALID_PAGE(page) && !PageReserved(page)) {
				count -= try_to_swap_out(mm, vma, address, pte, page, classzone);
				if (!count) {
					address += PAGE_SIZE;
					break;
				}
				/* we reach this with a lock depth of 1 or 2 */
#if 0
				if (TEST_LOCK_COUNT(4)) {
					if (conditional_schedule_needed())
						return count;
					RESET_LOCK_COUNT();
				}
#endif
			}
		}
		address += PAGE_SIZE;
		pte++;
	} while (address && (address < end));
	mm->swap_address = address;
	return count;
}
Beispiel #9
0
static inline void forget_pte(pte_t page)
{
	if (pte_none(page))
		return;
	if (pte_present(page)) {
		struct page *ptpage = pte_page(page);
		if ((!VALID_PAGE(ptpage)) || PageReserved(ptpage))
			return;
		/* 
		 * free_page() used to be able to clear swap cache
		 * entries.  We may now have to do it manually.  
		 */
		free_page_and_swap_cache(ptpage);
		return;
	}
	swap_free(pte_to_swp_entry(page));
}
Beispiel #10
0
/*
 * Return indicates whether a page was freed so caller can adjust rss
 */
static inline int free_pte(pte_t pte)
{
	if (pte_present(pte)) {
		struct page *page = pte_page(pte);
		if ((!VALID_PAGE(page)) || PageReserved(page))
			return 0;
		/* 
		 * free_page() used to be able to clear swap cache
		 * entries.  We may now have to do it manually.  
		 */
		if (pte_dirty(pte) && page->mapping)
			set_page_dirty(page);
		free_page_and_swap_cache(page);
		return 1;
	}
	swap_free(pte_to_swp_entry(pte));
	return 0;
}
Beispiel #11
0
struct page *kmem_vm_nopage(struct vm_area_struct *vma, unsigned long address, int write)
{
	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
	unsigned long kaddr;
	pgd_t *pgd;
	pmd_t *pmd;
	pte_t *ptep, pte;
	struct page *page = NULL;

	/* address is user VA; convert to kernel VA of desired page */
	kaddr = (address - vma->vm_start) + offset;
	kaddr = VMALLOC_VMADDR(kaddr);

	spin_lock(&init_mm.page_table_lock);

	/* Lookup page structure for kernel VA */
	pgd = pgd_offset(&init_mm, kaddr);
	if (pgd_none(*pgd) || pgd_bad(*pgd))
		goto out;
	pmd = pmd_offset(pgd, kaddr);
	if (pmd_none(*pmd) || pmd_bad(*pmd))
		goto out;
	ptep = pte_offset(pmd, kaddr);
	if (!ptep)
		goto out;
	pte = *ptep;
	if (!pte_present(pte))
		goto out;
	if (write && !pte_write(pte))
		goto out;
	page = pte_page(pte);
	if (!VALID_PAGE(page)) {
		page = NULL;
		goto out;
	}

	/* Increment reference count on page */
	get_page(page);

out:
	spin_unlock(&init_mm.page_table_lock);

	return page;
}
Beispiel #12
0
void __update_cache(struct vm_area_struct *vma, unsigned long address,
        pte_t pte)
{
	unsigned long addr;
	struct page *page;

	if (!cpu_has_dc_aliases)
		return;

	page = pte_page(pte);
	if (VALID_PAGE(page) && page->mapping &&
	    (page->flags & (1UL << PG_dcache_dirty))) {
		if (pages_do_alias((unsigned long)page_address(page), address & PAGE_MASK)) {
			addr = (unsigned long) page_address(page);
			flush_data_cache_page(addr);
		}

		ClearPageDcacheDirty(page);
	}
}
Beispiel #13
0
/*
 * maps a range of physical memory into the requested pages. the old
 * mappings are removed. any references to nonexistent pages results
 * in null mappings (currently treated as "copy-on-access")
 */
static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned long size,
	unsigned long phys_addr, pgprot_t prot)
{
	unsigned long end;

	address &= ~PMD_MASK;
	end = address + size;
	if (end > PMD_SIZE)
		end = PMD_SIZE;
	do {
		struct page *page;
		pte_t oldpage;
		oldpage = ptep_get_and_clear(pte);

		page = virt_to_page(__va(phys_addr));
		if ((!VALID_PAGE(page)) || PageReserved(page))
 			set_pte(pte, mk_pte_phys(phys_addr, prot));
		forget_pte(oldpage);
		address += PAGE_SIZE;
		phys_addr += PAGE_SIZE;
		pte++;
	} while (address && (address < end));
}
Beispiel #14
0
/* mm->page_table_lock is held. mmap_sem is not held */
static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count, zone_t * classzone)
{
	pte_t * pte;
	unsigned long pmd_end;

	if (pmd_none(*dir))
		return count;
	if (pmd_bad(*dir)) {
		pmd_ERROR(*dir);
		pmd_clear(dir);
		return count;
	}
	
	pte = pte_offset(dir, address);
	
	pmd_end = (address + PMD_SIZE) & PMD_MASK;
	if (end > pmd_end)
		end = pmd_end;

	do {
		if (pte_present(*pte)) {
			struct page *page = pte_page(*pte);

			if (VALID_PAGE(page) && !PageReserved(page)) {
				count -= try_to_swap_out(mm, vma, address, pte, page, classzone);
				if (!count) {
					address += PAGE_SIZE;
					break;
				}
			}
		}
		address += PAGE_SIZE;
		pte++;
	} while (address && (address < end));
	mm->swap_address = address;
	return count;
}
Beispiel #15
0
static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
{
	unsigned long offset;
	pte_t * ptep;
	int freed = 0;

	if (pmd_none(*pmd))
		return 0;
	if (pmd_bad(*pmd)) {
		pmd_ERROR(*pmd);
		pmd_clear(pmd);
		return 0;
	}
	ptep = pte_offset(pmd, address);
	offset = address & ~PMD_MASK;
	if (offset + size > PMD_SIZE)
		size = PMD_SIZE - offset;
	size &= PAGE_MASK;
	for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
		pte_t pte = *ptep;
		if (pte_none(pte))
			continue;
		if (pte_present(pte)) {
			struct page *page = pte_page(pte);
			if (VALID_PAGE(page) && !PageReserved(page))
				freed ++;
			/* This will eventually call __free_pte on the pte. */
			tlb_remove_page(tlb, ptep, address + offset);
		} else {
			free_swap_and_cache(pte_to_swp_entry(pte));
			pte_clear(ptep);
		}
	}

	return freed;
}
static void __free_pages_ok (struct page *page, unsigned int order)
{
	unsigned long index, page_idx, mask, flags;
	free_area_t *area;
	struct page *base;
	zone_t *zone;

	/*
	 * Yes, think what happens when other parts of the kernel take 
	 * a reference to a page in order to pin it for io. -ben
	 */
	if (PageLRU(page)) {
		if (unlikely(in_interrupt()))
			BUG();
		lru_cache_del(page);
	}

	if (page->buffers)
		BUG();
	if (page->mapping)
		BUG();
	if (!VALID_PAGE(page))
		BUG();
	if (PageLocked(page))
		BUG();
	if (PageActive(page))
		BUG();
	page->flags &= ~((1<<PG_referenced) | (1<<PG_dirty));

	if (current->flags & PF_FREE_PAGES)
		goto local_freelist;
 back_local_freelist:

	zone = page_zone(page);

	mask = (~0UL) << order;
	base = zone->zone_mem_map;
	page_idx = page - base;
	if (page_idx & ~mask)
		BUG();
	index = page_idx >> (1 + order);

	area = zone->free_area + order;

	spin_lock_irqsave(&zone->lock, flags);

	zone->free_pages -= mask;

	while (mask + (1 << (MAX_ORDER-1))) {
		struct page *buddy1, *buddy2;

		if (area >= zone->free_area + MAX_ORDER)
			BUG();
		if (!__test_and_change_bit(index, area->map))
			/*
			 * the buddy page is still allocated.
			 */
			break;
		/*
		 * Move the buddy up one level.
		 * This code is taking advantage of the identity:
		 * 	-mask = 1+~mask
		 */
		buddy1 = base + (page_idx ^ -mask);
		buddy2 = base + page_idx;
		if (BAD_RANGE(zone,buddy1))
			BUG();
		if (BAD_RANGE(zone,buddy2))
			BUG();

		list_del(&buddy1->list);
		mask <<= 1;
		area++;
		index >>= 1;
		page_idx &= mask;
	}
	list_add(&(base + page_idx)->list, &area->free_list);

	spin_unlock_irqrestore(&zone->lock, flags);
	return;

 local_freelist:
	if (current->nr_local_pages)
		goto back_local_freelist;
	if (in_interrupt())
		goto back_local_freelist;		

	list_add(&page->list, &current->local_pages);
	page->index = order;
	current->nr_local_pages++;
}
Beispiel #17
0
/*
 * This routine handles present pages, when users try to write
 * to a shared page. It is done by copying the page to a new address
 * and decrementing the shared-page counter for the old page.
 *
 * Goto-purists beware: the only reason for goto's here is that it results
 * in better assembly code.. The "default" path will see no jumps at all.
 *
 * Note that this routine assumes that the protection checks have been
 * done by the caller (the low-level page fault routine in most cases).
 * Thus we can safely just mark it writable once we've done any necessary
 * COW.
 *
 * We also mark the page dirty at this point even though the page will
 * change only once the write actually happens. This avoids a few races,
 * and potentially makes it more efficient.
 *
 * We enter with the page table read-lock held, and need to exit without
 * it.
 */
static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
	unsigned long address, pte_t *page_table, pte_t pte)
{
	struct page *old_page, *new_page;

	old_page = pte_page(pte);
	if (!VALID_PAGE(old_page))
		goto bad_wp_page;
	
	/*
	 * We can avoid the copy if:
	 * - we're the only user (count == 1)
	 * - the only other user is the swap cache,
	 *   and the only swap cache user is itself,
	 *   in which case we can just continue to
	 *   use the same swap cache (it will be
	 *   marked dirty).
	 */
	switch (page_count(old_page)) {
	case 2:
		/*
		 * Lock the page so that no one can look it up from
		 * the swap cache, grab a reference and start using it.
		 * Can not do lock_page, holding page_table_lock.
		 */
		if (!PageSwapCache(old_page) || TryLockPage(old_page))
			break;
		if (is_page_shared(old_page)) {
			UnlockPage(old_page);
			break;
		}
		UnlockPage(old_page);
		/* FallThrough */
	case 1:
		flush_cache_page(vma, address);
		establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
		spin_unlock(&mm->page_table_lock);
		return 1;	/* Minor fault */
	}

	/*
	 * Ok, we need to copy. Oh, well..
	 */
	spin_unlock(&mm->page_table_lock);
	new_page = page_cache_alloc();
	if (!new_page)
		return -1;
	spin_lock(&mm->page_table_lock);

	/*
	 * Re-check the pte - we dropped the lock
	 */
	if (pte_same(*page_table, pte)) {
		if (PageReserved(old_page))
			++mm->rss;
		break_cow(vma, old_page, new_page, address, page_table);

		/* Free the old page.. */
		new_page = old_page;
	}
	spin_unlock(&mm->page_table_lock);
	page_cache_release(new_page);
	return 1;	/* Minor fault */

bad_wp_page:
	spin_unlock(&mm->page_table_lock);
	printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page);
	return -1;
}
Beispiel #18
0
static inline struct page * get_page_map(struct page *page)
{
	if (!VALID_PAGE(page))
		return 0;
	return page;
}
Beispiel #19
0
/*
 * copy one vm_area from one task to the other. Assumes the page tables
 * already present in the new task to be cleared in the whole range
 * covered by this vma.
 *
 * 08Jan98 Merged into one routine from several inline routines to reduce
 *         variable count and make things faster. -jj
 */
int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
			struct vm_area_struct *vma)
{
	pgd_t * src_pgd, * dst_pgd;
	unsigned long address = vma->vm_start;
	unsigned long end = vma->vm_end;
	unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;

	src_pgd = pgd_offset(src, address)-1;
	dst_pgd = pgd_offset(dst, address)-1;
	
	for (;;) {
		pmd_t * src_pmd, * dst_pmd;

		src_pgd++; dst_pgd++;
		
		/* copy_pmd_range */
		
		if (pgd_none(*src_pgd))
			goto skip_copy_pmd_range;
		if (pgd_bad(*src_pgd)) {
			pgd_ERROR(*src_pgd);
			pgd_clear(src_pgd);
skip_copy_pmd_range:	address = (address + PGDIR_SIZE) & PGDIR_MASK;
			if (!address || (address >= end))
				goto out;
			continue;
		}
		if (pgd_none(*dst_pgd)) {
			if (!pmd_alloc(dst_pgd, 0))
				goto nomem;
		}
		
		src_pmd = pmd_offset(src_pgd, address);
		dst_pmd = pmd_offset(dst_pgd, address);

		do {
			pte_t * src_pte, * dst_pte;
		
			/* copy_pte_range */
		
			if (pmd_none(*src_pmd))
				goto skip_copy_pte_range;
			if (pmd_bad(*src_pmd)) {
				pmd_ERROR(*src_pmd);
				pmd_clear(src_pmd);
skip_copy_pte_range:		address = (address + PMD_SIZE) & PMD_MASK;
				if (address >= end)
					goto out;
				goto cont_copy_pmd_range;
			}
			if (pmd_none(*dst_pmd)) {
				if (!pte_alloc(dst_pmd, 0))
					goto nomem;
			}
			
			src_pte = pte_offset(src_pmd, address);
			dst_pte = pte_offset(dst_pmd, address);
			
			do {
				pte_t pte = *src_pte;
				struct page *ptepage;
				
				/* copy_one_pte */

				if (pte_none(pte))
					goto cont_copy_pte_range_noset;
				if (!pte_present(pte)) {
					swap_duplicate(pte_to_swp_entry(pte));
					goto cont_copy_pte_range;
				}
				ptepage = pte_page(pte);
				if ((!VALID_PAGE(ptepage)) || 
				    PageReserved(ptepage))
					goto cont_copy_pte_range;

				/* If it's a COW mapping, write protect it both in the parent and the child */
				if (cow) {
					ptep_set_wrprotect(src_pte);
					pte = *src_pte;
				}

				/* If it's a shared mapping, mark it clean in the child */
				if (vma->vm_flags & VM_SHARED)
					pte = pte_mkclean(pte);
				pte = pte_mkold(pte);
				get_page(ptepage);

cont_copy_pte_range:		set_pte(dst_pte, pte);
cont_copy_pte_range_noset:	address += PAGE_SIZE;
				if (address >= end)
					goto out;
				src_pte++;
				dst_pte++;
			} while ((unsigned long)src_pte & PTE_TABLE_MASK);
		
cont_copy_pmd_range:	src_pmd++;
			dst_pmd++;
		} while ((unsigned long)src_pmd & PMD_TABLE_MASK);
	}
out:
	return 0;

nomem:
	return -ENOMEM;
}
Beispiel #20
0
static void __free_pages_ok (struct page *page, unsigned int order)
{
	unsigned long index, page_idx, mask, flags;
	free_area_t *area;
	struct page *base;
	zone_t *zone;

	/*
	 * Subtle. We do not want to test this in the inlined part of
	 * __free_page() - it's a rare condition and just increases
	 * cache footprint unnecesserily. So we do an 'incorrect'
	 * decrement on page->count for reserved pages, but this part
	 * makes it safe.
	 */
	if (PageReserved(page))
		return;

	/*
	 * Yes, think what happens when other parts of the kernel take 
	 * a reference to a page in order to pin it for io. -ben
	 */
	if (PageLRU(page)) {
		if (unlikely(in_interrupt())) {
			unsigned long flags;

			spin_lock_irqsave(&free_pages_ok_no_irq_lock, flags);
			page->next_hash = free_pages_ok_no_irq_head;
			free_pages_ok_no_irq_head = page;
			page->index = order;
	
			spin_unlock_irqrestore(&free_pages_ok_no_irq_lock, flags);
	
			schedule_task(&free_pages_ok_no_irq_task);
			return;
		}
		
		lru_cache_del(page);
	}

	if (page->buffers)
		BUG();
	if (page->mapping)
		BUG();
	if (!VALID_PAGE(page))
		BUG();
	if (PageLocked(page))
		BUG();
	if (PageActive(page))
		BUG();

	ClearPageReferenced(page);
	ClearPageDirty(page);

	/* de-reference all the pages for this order */
	for (page_idx = 1; page_idx < (1 << order); page_idx++)
		set_page_count(&page[page_idx], 0);

	if (current->flags & PF_FREE_PAGES)
		goto local_freelist;
 back_local_freelist:

	zone = page_zone(page);

	mask = (~0UL) << order;
	base = zone->zone_mem_map;
	page_idx = page - base;
	if (page_idx & ~mask)
		BUG();
	index = page_idx >> (1 + order);

	area = zone->free_area + order;

	spin_lock_irqsave(&zone->lock, flags);

	zone->free_pages -= mask;

	while (mask + (1 << (MAX_ORDER-1))) {
		struct page *buddy1, *buddy2;

		if (area >= zone->free_area + MAX_ORDER)
			BUG();
		if (!__test_and_change_bit(index, area->map))
			/*
			 * the buddy page is still allocated.
			 */
			break;
		/*
		 * Move the buddy up one level.
		 * This code is taking advantage of the identity:
		 * 	-mask = 1+~mask
		 */
		buddy1 = base + (page_idx ^ -mask);
		buddy2 = base + page_idx;
		if (BAD_RANGE(zone,buddy1))
			BUG();
		if (BAD_RANGE(zone,buddy2))
			BUG();

		list_del(&buddy1->list);
		mask <<= 1;
		area++;
		index >>= 1;
		page_idx &= mask;
	}
	list_add(&(base + page_idx)->list, &area->free_list);

	spin_unlock_irqrestore(&zone->lock, flags);
	return;

 local_freelist:
	if (current->nr_local_pages)
		goto back_local_freelist;
	if (in_interrupt())
		goto back_local_freelist;		

	list_add(&page->list, &current->local_pages);
	page->index = order;
	current->nr_local_pages++;
}
Beispiel #21
0
static void __free_pages_ok (struct page *page, unsigned int order)
{
    unsigned long index, page_idx, mask, flags;
    free_area_t *area;
    struct page *base;
    zone_t *zone;

    if (PageLRU(page))
        lru_cache_del(page);

    if (page->buffers)
        BUG();
    if (page->mapping)
        BUG();
    if (!VALID_PAGE(page))
        BUG();
    if (PageSwapCache(page))
        BUG();
    if (PageLocked(page))
        BUG();
    if (PageLRU(page))
        BUG();
    if (PageActive(page))
        BUG();

    TRACE_MEMORY(TRACE_EV_MEMORY_PAGE_FREE, order);

    page->flags &= ~((1<<PG_referenced) | (1<<PG_dirty));

    if (current->flags & PF_FREE_PAGES)
        goto local_freelist;
back_local_freelist:

    zone = page->zone;

    mask = (~0UL) << order;
    base = zone->zone_mem_map;
    page_idx = page - base;
    if (page_idx & ~mask)
        BUG();
    index = page_idx >> (1 + order);

    area = zone->free_area + order;

    spin_lock_irqsave(&zone->lock, flags);

    zone->free_pages -= mask;

    while (mask + (1 << (MAX_ORDER-1))) {
        struct page *buddy1, *buddy2;

        if (area >= zone->free_area + MAX_ORDER)
            BUG();
        if (!__test_and_change_bit(index, area->map))
            /*
             * the buddy page is still allocated.
             */
            break;
        /*
         * Move the buddy up one level.
         */
        buddy1 = base + (page_idx ^ -mask);
        buddy2 = base + page_idx;
        if (BAD_RANGE(zone,buddy1))
            BUG();
        if (BAD_RANGE(zone,buddy2))
            BUG();

        memlist_del(&buddy1->list);
        mask <<= 1;
        area++;
        index >>= 1;
        page_idx &= mask;
    }
    memlist_add_head(&(base + page_idx)->list, &area->free_list);

    spin_unlock_irqrestore(&zone->lock, flags);
    return;

local_freelist:
    if (current->nr_local_pages)
        goto back_local_freelist;
    if (in_interrupt())
        goto back_local_freelist;

    list_add(&page->list, &current->local_pages);
    page->index = order;
    current->nr_local_pages++;
}
int rtR0MemObjNativeLockKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pv, size_t cb, uint32_t fAccess)
{
    void           *pvLast = (uint8_t *)pv + cb - 1;
    size_t const    cPages = cb >> PAGE_SHIFT;
    PRTR0MEMOBJLNX  pMemLnx;
    bool            fLinearMapping;
    int             rc;
    uint8_t        *pbPage;
    size_t          iPage;
    NOREF(fAccess);

    /*
     * Classify the memory and check that we can deal with it.
     */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
    fLinearMapping = virt_addr_valid(pvLast)          && virt_addr_valid(pv);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 0)
    fLinearMapping = VALID_PAGE(virt_to_page(pvLast)) && VALID_PAGE(virt_to_page(pv));
#else
# error "not supported"
#endif
    if (!fLinearMapping)
    {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 19)
        if (   !RTR0MemKernelIsValidAddr(pv)
            || !RTR0MemKernelIsValidAddr(pv + cb))
#endif
            return VERR_INVALID_PARAMETER;
    }

    /*
     * Allocate the memory object.
     */
    pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK, pv, cb);
    if (!pMemLnx)
        return VERR_NO_MEMORY;

    /*
     * Gather the pages.
     * We ASSUME all kernel pages are non-swappable.
     */
    rc     = VINF_SUCCESS;
    pbPage = (uint8_t *)pvLast;
    iPage  = cPages;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 19)
    if (!fLinearMapping)
    {
        while (iPage-- > 0)
        {
            struct page *pPage = vmalloc_to_page(pbPage);
            if (RT_UNLIKELY(!pPage))
            {
                rc = VERR_LOCK_FAILED;
                break;
            }
            pMemLnx->apPages[iPage] = pPage;
            pbPage -= PAGE_SIZE;
        }
    }
    else
#endif
    {
        while (iPage-- > 0)
        {
            pMemLnx->apPages[iPage] = virt_to_page(pbPage);
            pbPage -= PAGE_SIZE;
        }
    }
    if (RT_SUCCESS(rc))
    {
        /*
         * Complete the memory object and return.
         */
        pMemLnx->Core.u.Lock.R0Process = NIL_RTR0PROCESS;
        pMemLnx->cPages = cPages;
        Assert(!pMemLnx->fMappedToRing0);
        *ppMem = &pMemLnx->Core;

        return VINF_SUCCESS;
    }

    rtR0MemObjDelete(&pMemLnx->Core);
    return rc;
}
Beispiel #23
0
/*
 * The swap-out functions return 1 if they successfully
 * threw something out, and we got a free page. It returns
 * zero if it couldn't do anything, and any other value
 * indicates it decreased rss, but the page was shared.
 *
 * NOTE! If it sleeps, it *must* return 1 to make sure we
 * don't continue with the swap-out. Otherwise we may be
 * using a process that no longer actually exists (it might
 * have died while we slept).
 */
static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask)
{
	pte_t pte;
	swp_entry_t entry;
	struct page * page;
	int onlist;

	pte = *page_table;
	if (!pte_present(pte))
		goto out_failed;
	page = pte_page(pte);
	if ((!VALID_PAGE(page)) || PageReserved(page))
		goto out_failed;

	if (mm->swap_cnt)
		mm->swap_cnt--;

	onlist = PageActive(page);
	/* Don't look at this pte if it's been accessed recently. */
	if (ptep_test_and_clear_young(page_table)) {
		age_page_up(page);
		goto out_failed;
	}
	if (!onlist)
		/* The page is still mapped, so it can't be freeable... */
		age_page_down_ageonly(page);

	/*
	 * If the page is in active use by us, or if the page
	 * is in active use by others, don't unmap it or
	 * (worse) start unneeded IO.
	 */
	if (page->age > 0)
		goto out_failed;

	if (TryLockPage(page))
		goto out_failed;

	/* From this point on, the odds are that we're going to
	 * nuke this pte, so read and clear the pte.  This hook
	 * is needed on CPUs which update the accessed and dirty
	 * bits in hardware.
	 */
	pte = ptep_get_and_clear(page_table);

	/*
	 * Is the page already in the swap cache? If so, then
	 * we can just drop our reference to it without doing
	 * any IO - it's already up-to-date on disk.
	 *
	 * Return 0, as we didn't actually free any real
	 * memory, and we should just continue our scan.
	 */
	if (PageSwapCache(page)) {
		entry.val = page->index;
		if (pte_dirty(pte))
			set_page_dirty(page);
set_swap_pte:
		swap_duplicate(entry);
		set_pte(page_table, swp_entry_to_pte(entry));
drop_pte:
		UnlockPage(page);
		mm->rss--;
		flush_tlb_page(vma, address);
		deactivate_page(page);
		page_cache_release(page);
out_failed:
		return 0;
	}

	/*
	 * Is it a clean page? Then it must be recoverable
	 * by just paging it in again, and we can just drop
	 * it..
	 *
	 * However, this won't actually free any real
	 * memory, as the page will just be in the page cache
	 * somewhere, and as such we should just continue
	 * our scan.
	 *
	 * Basically, this just makes it possible for us to do
	 * some real work in the future in "refill_inactive()".
	 */
	flush_cache_page(vma, address);
	if (!pte_dirty(pte))
		goto drop_pte;

	/*
	 * Ok, it's really dirty. That means that
	 * we should either create a new swap cache
	 * entry for it, or we should write it back
	 * to its own backing store.
	 */
	if (page->mapping) {
		set_page_dirty(page);
		goto drop_pte;
	}

	/*
	 * This is a dirty, swappable page.  First of all,
	 * get a suitable swap entry for it, and make sure
	 * we have the swap cache set up to associate the
	 * page with that swap entry.
	 */
	entry = get_swap_page();
	if (!entry.val)
		goto out_unlock_restore; /* No swap space left */

	/* Add it to the swap cache and mark it dirty */
	add_to_swap_cache(page, entry);
	set_page_dirty(page);
	goto set_swap_pte;

out_unlock_restore:
	set_pte(page_table, pte);
	UnlockPage(page);
	return 0;
}