static void mmu_spte_walk(struct vmmr0_vcpu *vcpu, inspect_spte_fn fn) { int i; struct vmmr0_mmu_page *sp; if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { hpa_t root = vcpu->arch.mmu.root_hpa; sp = page_header(root); __mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_LEVEL); return; } for (i = 0; i < 4; ++i) { hpa_t root = vcpu->arch.mmu.pae_root[i]; if (root && VALID_PAGE(root)) { root &= PT64_BASE_ADDR_MASK; sp = page_header(root); __mmu_spte_walk(vcpu, sp, fn, 2); } } return; }
static struct page *lookup_page_table(const struct mm_struct *mm,unsigned int address,int clear) { pgd_t *pgd; pmd_t *pmd; pte_t *pte; struct page *page = NOPAGE_SIGBUS;//default no PAGE pgd = pgd_offset(mm,address); if(! pgd_none(*pgd) ) { /*Go for a PMD lookup */ pmd = pmd_offset(pgd,address); if( ! pmd_none(*pmd) ) { pte = pte_offset(pmd,address); //get the Pte entry if(pte_present(*pte) ) { page = pte_page(*pte); //get the page from the entry if(clear && VALID_PAGE(page) && (!PageReserved(page) ) ) { pte_t x = ptep_get_and_clear(pte); //clear the pte (void)x; #ifdef DEBUG_NOT_NOW printk(KERN_ALERT "Non Contiguous Pages getting cleared off the List(%lx):\n",PAGE_VIRTUAL(page)); #endif __free_page(page); //free of the page page = (struct page *) 1; } } } } return page; }
/* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address * and decrementing the shared-page counter for the old page. * * Goto-purists beware: the only reason for goto's here is that it results * in better assembly code.. The "default" path will see no jumps at all. * * Note that this routine assumes that the protection checks have been * done by the caller (the low-level page fault routine in most cases). * Thus we can safely just mark it writable once we've done any necessary * COW. * * We also mark the page dirty at this point even though the page will * change only once the write actually happens. This avoids a few races, * and potentially makes it more efficient. * * We hold the mm semaphore and the page_table_lock on entry and exit * with the page_table_lock released. */ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t pte) { struct page *old_page, *new_page; old_page = pte_page(pte); if (!VALID_PAGE(old_page)) goto bad_wp_page; if (!TryLockPage(old_page)) { int reuse = can_share_swap_page(old_page); unlock_page(old_page); if (reuse) { #ifndef CONFIG_SUPERH /* Not needed for VIPT cache */ flush_cache_page(vma, address); #endif establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); spin_unlock(&mm->page_table_lock); return 1; /* Minor fault */ } } /* * Ok, we need to copy. Oh, well.. */ page_cache_get(old_page); spin_unlock(&mm->page_table_lock); new_page = alloc_page(GFP_HIGHUSER); if (!new_page) goto no_mem; copy_cow_page(old_page,new_page,address); /* * Re-check the pte - we dropped the lock */ spin_lock(&mm->page_table_lock); if (pte_same(*page_table, pte)) { if (PageReserved(old_page)) ++mm->rss; break_cow(vma, new_page, address, page_table); lru_cache_add(new_page); /* Free the old page.. */ new_page = old_page; } spin_unlock(&mm->page_table_lock); page_cache_release(new_page); page_cache_release(old_page); return 1; /* Minor fault */ bad_wp_page: spin_unlock(&mm->page_table_lock); printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page); return -1; no_mem: page_cache_release(old_page); return -1; }
/* * Called by TLB shootdown */ void __free_pte(pte_t pte) { struct page *page = pte_page(pte); if ((!VALID_PAGE(page)) || PageReserved(page)) return; if (pte_dirty(pte)) set_page_dirty(page); free_page_and_swap_cache(page); }
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) { struct page *page = pte_page(pte); if (VALID_PAGE(page) && page->mapping && test_bit(PG_dcache_dirty, &page->flags)) { flush_kernel_dcache_page(page_address(page)); clear_bit(PG_dcache_dirty, &page->flags); } }
static inline void forget_pte(pte_t page) { if (pte_none(page)) return; if (pte_present(page)) { struct page *ptpage = pte_page(page); if ((!VALID_PAGE(ptpage)) || PageReserved(ptpage)) return; page_cache_release(ptpage); return; } swap_free(pte_to_swp_entry(page)); }
static inline void free_pte(pte_t page) { if (pte_present(page)) { struct page *ptpage = pte_page(page); if ((!VALID_PAGE(ptpage)) || PageReserved(ptpage)) return; __free_page(ptpage); if (current->mm->rss <= 0) return; current->mm->rss--; return; } swap_free(pte_to_swp_entry(page)); }
/* mm->page_table_lock is held. mmap_sem is not held */ static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count, zone_t * classzone) { pte_t * pte; unsigned long pmd_end; DEFINE_LOCK_COUNT(); if (pmd_none(*dir)) return count; if (pmd_bad(*dir)) { pmd_ERROR(*dir); pmd_clear(dir); return count; } pte = pte_offset(dir, address); pmd_end = (address + PMD_SIZE) & PMD_MASK; if (end > pmd_end) end = pmd_end; do { if (pte_present(*pte)) { struct page *page = pte_page(*pte); if (VALID_PAGE(page) && !PageReserved(page)) { count -= try_to_swap_out(mm, vma, address, pte, page, classzone); if (!count) { address += PAGE_SIZE; break; } /* we reach this with a lock depth of 1 or 2 */ #if 0 if (TEST_LOCK_COUNT(4)) { if (conditional_schedule_needed()) return count; RESET_LOCK_COUNT(); } #endif } } address += PAGE_SIZE; pte++; } while (address && (address < end)); mm->swap_address = address; return count; }
static inline void forget_pte(pte_t page) { if (pte_none(page)) return; if (pte_present(page)) { struct page *ptpage = pte_page(page); if ((!VALID_PAGE(ptpage)) || PageReserved(ptpage)) return; /* * free_page() used to be able to clear swap cache * entries. We may now have to do it manually. */ free_page_and_swap_cache(ptpage); return; } swap_free(pte_to_swp_entry(page)); }
/* * Return indicates whether a page was freed so caller can adjust rss */ static inline int free_pte(pte_t pte) { if (pte_present(pte)) { struct page *page = pte_page(pte); if ((!VALID_PAGE(page)) || PageReserved(page)) return 0; /* * free_page() used to be able to clear swap cache * entries. We may now have to do it manually. */ if (pte_dirty(pte) && page->mapping) set_page_dirty(page); free_page_and_swap_cache(page); return 1; } swap_free(pte_to_swp_entry(pte)); return 0; }
struct page *kmem_vm_nopage(struct vm_area_struct *vma, unsigned long address, int write) { unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; unsigned long kaddr; pgd_t *pgd; pmd_t *pmd; pte_t *ptep, pte; struct page *page = NULL; /* address is user VA; convert to kernel VA of desired page */ kaddr = (address - vma->vm_start) + offset; kaddr = VMALLOC_VMADDR(kaddr); spin_lock(&init_mm.page_table_lock); /* Lookup page structure for kernel VA */ pgd = pgd_offset(&init_mm, kaddr); if (pgd_none(*pgd) || pgd_bad(*pgd)) goto out; pmd = pmd_offset(pgd, kaddr); if (pmd_none(*pmd) || pmd_bad(*pmd)) goto out; ptep = pte_offset(pmd, kaddr); if (!ptep) goto out; pte = *ptep; if (!pte_present(pte)) goto out; if (write && !pte_write(pte)) goto out; page = pte_page(pte); if (!VALID_PAGE(page)) { page = NULL; goto out; } /* Increment reference count on page */ get_page(page); out: spin_unlock(&init_mm.page_table_lock); return page; }
void __update_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) { unsigned long addr; struct page *page; if (!cpu_has_dc_aliases) return; page = pte_page(pte); if (VALID_PAGE(page) && page->mapping && (page->flags & (1UL << PG_dcache_dirty))) { if (pages_do_alias((unsigned long)page_address(page), address & PAGE_MASK)) { addr = (unsigned long) page_address(page); flush_data_cache_page(addr); } ClearPageDcacheDirty(page); } }
/* * maps a range of physical memory into the requested pages. the old * mappings are removed. any references to nonexistent pages results * in null mappings (currently treated as "copy-on-access") */ static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned long size, unsigned long phys_addr, pgprot_t prot) { unsigned long end; address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; do { struct page *page; pte_t oldpage; oldpage = ptep_get_and_clear(pte); page = virt_to_page(__va(phys_addr)); if ((!VALID_PAGE(page)) || PageReserved(page)) set_pte(pte, mk_pte_phys(phys_addr, prot)); forget_pte(oldpage); address += PAGE_SIZE; phys_addr += PAGE_SIZE; pte++; } while (address && (address < end)); }
/* mm->page_table_lock is held. mmap_sem is not held */ static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count, zone_t * classzone) { pte_t * pte; unsigned long pmd_end; if (pmd_none(*dir)) return count; if (pmd_bad(*dir)) { pmd_ERROR(*dir); pmd_clear(dir); return count; } pte = pte_offset(dir, address); pmd_end = (address + PMD_SIZE) & PMD_MASK; if (end > pmd_end) end = pmd_end; do { if (pte_present(*pte)) { struct page *page = pte_page(*pte); if (VALID_PAGE(page) && !PageReserved(page)) { count -= try_to_swap_out(mm, vma, address, pte, page, classzone); if (!count) { address += PAGE_SIZE; break; } } } address += PAGE_SIZE; pte++; } while (address && (address < end)); mm->swap_address = address; return count; }
static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size) { unsigned long offset; pte_t * ptep; int freed = 0; if (pmd_none(*pmd)) return 0; if (pmd_bad(*pmd)) { pmd_ERROR(*pmd); pmd_clear(pmd); return 0; } ptep = pte_offset(pmd, address); offset = address & ~PMD_MASK; if (offset + size > PMD_SIZE) size = PMD_SIZE - offset; size &= PAGE_MASK; for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) { pte_t pte = *ptep; if (pte_none(pte)) continue; if (pte_present(pte)) { struct page *page = pte_page(pte); if (VALID_PAGE(page) && !PageReserved(page)) freed ++; /* This will eventually call __free_pte on the pte. */ tlb_remove_page(tlb, ptep, address + offset); } else { free_swap_and_cache(pte_to_swp_entry(pte)); pte_clear(ptep); } } return freed; }
static void __free_pages_ok (struct page *page, unsigned int order) { unsigned long index, page_idx, mask, flags; free_area_t *area; struct page *base; zone_t *zone; /* * Yes, think what happens when other parts of the kernel take * a reference to a page in order to pin it for io. -ben */ if (PageLRU(page)) { if (unlikely(in_interrupt())) BUG(); lru_cache_del(page); } if (page->buffers) BUG(); if (page->mapping) BUG(); if (!VALID_PAGE(page)) BUG(); if (PageLocked(page)) BUG(); if (PageActive(page)) BUG(); page->flags &= ~((1<<PG_referenced) | (1<<PG_dirty)); if (current->flags & PF_FREE_PAGES) goto local_freelist; back_local_freelist: zone = page_zone(page); mask = (~0UL) << order; base = zone->zone_mem_map; page_idx = page - base; if (page_idx & ~mask) BUG(); index = page_idx >> (1 + order); area = zone->free_area + order; spin_lock_irqsave(&zone->lock, flags); zone->free_pages -= mask; while (mask + (1 << (MAX_ORDER-1))) { struct page *buddy1, *buddy2; if (area >= zone->free_area + MAX_ORDER) BUG(); if (!__test_and_change_bit(index, area->map)) /* * the buddy page is still allocated. */ break; /* * Move the buddy up one level. * This code is taking advantage of the identity: * -mask = 1+~mask */ buddy1 = base + (page_idx ^ -mask); buddy2 = base + page_idx; if (BAD_RANGE(zone,buddy1)) BUG(); if (BAD_RANGE(zone,buddy2)) BUG(); list_del(&buddy1->list); mask <<= 1; area++; index >>= 1; page_idx &= mask; } list_add(&(base + page_idx)->list, &area->free_list); spin_unlock_irqrestore(&zone->lock, flags); return; local_freelist: if (current->nr_local_pages) goto back_local_freelist; if (in_interrupt()) goto back_local_freelist; list_add(&page->list, ¤t->local_pages); page->index = order; current->nr_local_pages++; }
/* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address * and decrementing the shared-page counter for the old page. * * Goto-purists beware: the only reason for goto's here is that it results * in better assembly code.. The "default" path will see no jumps at all. * * Note that this routine assumes that the protection checks have been * done by the caller (the low-level page fault routine in most cases). * Thus we can safely just mark it writable once we've done any necessary * COW. * * We also mark the page dirty at this point even though the page will * change only once the write actually happens. This avoids a few races, * and potentially makes it more efficient. * * We enter with the page table read-lock held, and need to exit without * it. */ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t pte) { struct page *old_page, *new_page; old_page = pte_page(pte); if (!VALID_PAGE(old_page)) goto bad_wp_page; /* * We can avoid the copy if: * - we're the only user (count == 1) * - the only other user is the swap cache, * and the only swap cache user is itself, * in which case we can just continue to * use the same swap cache (it will be * marked dirty). */ switch (page_count(old_page)) { case 2: /* * Lock the page so that no one can look it up from * the swap cache, grab a reference and start using it. * Can not do lock_page, holding page_table_lock. */ if (!PageSwapCache(old_page) || TryLockPage(old_page)) break; if (is_page_shared(old_page)) { UnlockPage(old_page); break; } UnlockPage(old_page); /* FallThrough */ case 1: flush_cache_page(vma, address); establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); spin_unlock(&mm->page_table_lock); return 1; /* Minor fault */ } /* * Ok, we need to copy. Oh, well.. */ spin_unlock(&mm->page_table_lock); new_page = page_cache_alloc(); if (!new_page) return -1; spin_lock(&mm->page_table_lock); /* * Re-check the pte - we dropped the lock */ if (pte_same(*page_table, pte)) { if (PageReserved(old_page)) ++mm->rss; break_cow(vma, old_page, new_page, address, page_table); /* Free the old page.. */ new_page = old_page; } spin_unlock(&mm->page_table_lock); page_cache_release(new_page); return 1; /* Minor fault */ bad_wp_page: spin_unlock(&mm->page_table_lock); printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page); return -1; }
static inline struct page * get_page_map(struct page *page) { if (!VALID_PAGE(page)) return 0; return page; }
/* * copy one vm_area from one task to the other. Assumes the page tables * already present in the new task to be cleared in the whole range * covered by this vma. * * 08Jan98 Merged into one routine from several inline routines to reduce * variable count and make things faster. -jj */ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma) { pgd_t * src_pgd, * dst_pgd; unsigned long address = vma->vm_start; unsigned long end = vma->vm_end; unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; src_pgd = pgd_offset(src, address)-1; dst_pgd = pgd_offset(dst, address)-1; for (;;) { pmd_t * src_pmd, * dst_pmd; src_pgd++; dst_pgd++; /* copy_pmd_range */ if (pgd_none(*src_pgd)) goto skip_copy_pmd_range; if (pgd_bad(*src_pgd)) { pgd_ERROR(*src_pgd); pgd_clear(src_pgd); skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK; if (!address || (address >= end)) goto out; continue; } if (pgd_none(*dst_pgd)) { if (!pmd_alloc(dst_pgd, 0)) goto nomem; } src_pmd = pmd_offset(src_pgd, address); dst_pmd = pmd_offset(dst_pgd, address); do { pte_t * src_pte, * dst_pte; /* copy_pte_range */ if (pmd_none(*src_pmd)) goto skip_copy_pte_range; if (pmd_bad(*src_pmd)) { pmd_ERROR(*src_pmd); pmd_clear(src_pmd); skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK; if (address >= end) goto out; goto cont_copy_pmd_range; } if (pmd_none(*dst_pmd)) { if (!pte_alloc(dst_pmd, 0)) goto nomem; } src_pte = pte_offset(src_pmd, address); dst_pte = pte_offset(dst_pmd, address); do { pte_t pte = *src_pte; struct page *ptepage; /* copy_one_pte */ if (pte_none(pte)) goto cont_copy_pte_range_noset; if (!pte_present(pte)) { swap_duplicate(pte_to_swp_entry(pte)); goto cont_copy_pte_range; } ptepage = pte_page(pte); if ((!VALID_PAGE(ptepage)) || PageReserved(ptepage)) goto cont_copy_pte_range; /* If it's a COW mapping, write protect it both in the parent and the child */ if (cow) { ptep_set_wrprotect(src_pte); pte = *src_pte; } /* If it's a shared mapping, mark it clean in the child */ if (vma->vm_flags & VM_SHARED) pte = pte_mkclean(pte); pte = pte_mkold(pte); get_page(ptepage); cont_copy_pte_range: set_pte(dst_pte, pte); cont_copy_pte_range_noset: address += PAGE_SIZE; if (address >= end) goto out; src_pte++; dst_pte++; } while ((unsigned long)src_pte & PTE_TABLE_MASK); cont_copy_pmd_range: src_pmd++; dst_pmd++; } while ((unsigned long)src_pmd & PMD_TABLE_MASK); } out: return 0; nomem: return -ENOMEM; }
static void __free_pages_ok (struct page *page, unsigned int order) { unsigned long index, page_idx, mask, flags; free_area_t *area; struct page *base; zone_t *zone; /* * Subtle. We do not want to test this in the inlined part of * __free_page() - it's a rare condition and just increases * cache footprint unnecesserily. So we do an 'incorrect' * decrement on page->count for reserved pages, but this part * makes it safe. */ if (PageReserved(page)) return; /* * Yes, think what happens when other parts of the kernel take * a reference to a page in order to pin it for io. -ben */ if (PageLRU(page)) { if (unlikely(in_interrupt())) { unsigned long flags; spin_lock_irqsave(&free_pages_ok_no_irq_lock, flags); page->next_hash = free_pages_ok_no_irq_head; free_pages_ok_no_irq_head = page; page->index = order; spin_unlock_irqrestore(&free_pages_ok_no_irq_lock, flags); schedule_task(&free_pages_ok_no_irq_task); return; } lru_cache_del(page); } if (page->buffers) BUG(); if (page->mapping) BUG(); if (!VALID_PAGE(page)) BUG(); if (PageLocked(page)) BUG(); if (PageActive(page)) BUG(); ClearPageReferenced(page); ClearPageDirty(page); /* de-reference all the pages for this order */ for (page_idx = 1; page_idx < (1 << order); page_idx++) set_page_count(&page[page_idx], 0); if (current->flags & PF_FREE_PAGES) goto local_freelist; back_local_freelist: zone = page_zone(page); mask = (~0UL) << order; base = zone->zone_mem_map; page_idx = page - base; if (page_idx & ~mask) BUG(); index = page_idx >> (1 + order); area = zone->free_area + order; spin_lock_irqsave(&zone->lock, flags); zone->free_pages -= mask; while (mask + (1 << (MAX_ORDER-1))) { struct page *buddy1, *buddy2; if (area >= zone->free_area + MAX_ORDER) BUG(); if (!__test_and_change_bit(index, area->map)) /* * the buddy page is still allocated. */ break; /* * Move the buddy up one level. * This code is taking advantage of the identity: * -mask = 1+~mask */ buddy1 = base + (page_idx ^ -mask); buddy2 = base + page_idx; if (BAD_RANGE(zone,buddy1)) BUG(); if (BAD_RANGE(zone,buddy2)) BUG(); list_del(&buddy1->list); mask <<= 1; area++; index >>= 1; page_idx &= mask; } list_add(&(base + page_idx)->list, &area->free_list); spin_unlock_irqrestore(&zone->lock, flags); return; local_freelist: if (current->nr_local_pages) goto back_local_freelist; if (in_interrupt()) goto back_local_freelist; list_add(&page->list, ¤t->local_pages); page->index = order; current->nr_local_pages++; }
static void __free_pages_ok (struct page *page, unsigned int order) { unsigned long index, page_idx, mask, flags; free_area_t *area; struct page *base; zone_t *zone; if (PageLRU(page)) lru_cache_del(page); if (page->buffers) BUG(); if (page->mapping) BUG(); if (!VALID_PAGE(page)) BUG(); if (PageSwapCache(page)) BUG(); if (PageLocked(page)) BUG(); if (PageLRU(page)) BUG(); if (PageActive(page)) BUG(); TRACE_MEMORY(TRACE_EV_MEMORY_PAGE_FREE, order); page->flags &= ~((1<<PG_referenced) | (1<<PG_dirty)); if (current->flags & PF_FREE_PAGES) goto local_freelist; back_local_freelist: zone = page->zone; mask = (~0UL) << order; base = zone->zone_mem_map; page_idx = page - base; if (page_idx & ~mask) BUG(); index = page_idx >> (1 + order); area = zone->free_area + order; spin_lock_irqsave(&zone->lock, flags); zone->free_pages -= mask; while (mask + (1 << (MAX_ORDER-1))) { struct page *buddy1, *buddy2; if (area >= zone->free_area + MAX_ORDER) BUG(); if (!__test_and_change_bit(index, area->map)) /* * the buddy page is still allocated. */ break; /* * Move the buddy up one level. */ buddy1 = base + (page_idx ^ -mask); buddy2 = base + page_idx; if (BAD_RANGE(zone,buddy1)) BUG(); if (BAD_RANGE(zone,buddy2)) BUG(); memlist_del(&buddy1->list); mask <<= 1; area++; index >>= 1; page_idx &= mask; } memlist_add_head(&(base + page_idx)->list, &area->free_list); spin_unlock_irqrestore(&zone->lock, flags); return; local_freelist: if (current->nr_local_pages) goto back_local_freelist; if (in_interrupt()) goto back_local_freelist; list_add(&page->list, ¤t->local_pages); page->index = order; current->nr_local_pages++; }
int rtR0MemObjNativeLockKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pv, size_t cb, uint32_t fAccess) { void *pvLast = (uint8_t *)pv + cb - 1; size_t const cPages = cb >> PAGE_SHIFT; PRTR0MEMOBJLNX pMemLnx; bool fLinearMapping; int rc; uint8_t *pbPage; size_t iPage; NOREF(fAccess); /* * Classify the memory and check that we can deal with it. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) fLinearMapping = virt_addr_valid(pvLast) && virt_addr_valid(pv); #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 0) fLinearMapping = VALID_PAGE(virt_to_page(pvLast)) && VALID_PAGE(virt_to_page(pv)); #else # error "not supported" #endif if (!fLinearMapping) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 19) if ( !RTR0MemKernelIsValidAddr(pv) || !RTR0MemKernelIsValidAddr(pv + cb)) #endif return VERR_INVALID_PARAMETER; } /* * Allocate the memory object. */ pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK, pv, cb); if (!pMemLnx) return VERR_NO_MEMORY; /* * Gather the pages. * We ASSUME all kernel pages are non-swappable. */ rc = VINF_SUCCESS; pbPage = (uint8_t *)pvLast; iPage = cPages; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 19) if (!fLinearMapping) { while (iPage-- > 0) { struct page *pPage = vmalloc_to_page(pbPage); if (RT_UNLIKELY(!pPage)) { rc = VERR_LOCK_FAILED; break; } pMemLnx->apPages[iPage] = pPage; pbPage -= PAGE_SIZE; } } else #endif { while (iPage-- > 0) { pMemLnx->apPages[iPage] = virt_to_page(pbPage); pbPage -= PAGE_SIZE; } } if (RT_SUCCESS(rc)) { /* * Complete the memory object and return. */ pMemLnx->Core.u.Lock.R0Process = NIL_RTR0PROCESS; pMemLnx->cPages = cPages; Assert(!pMemLnx->fMappedToRing0); *ppMem = &pMemLnx->Core; return VINF_SUCCESS; } rtR0MemObjDelete(&pMemLnx->Core); return rc; }
/* * The swap-out functions return 1 if they successfully * threw something out, and we got a free page. It returns * zero if it couldn't do anything, and any other value * indicates it decreased rss, but the page was shared. * * NOTE! If it sleeps, it *must* return 1 to make sure we * don't continue with the swap-out. Otherwise we may be * using a process that no longer actually exists (it might * have died while we slept). */ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask) { pte_t pte; swp_entry_t entry; struct page * page; int onlist; pte = *page_table; if (!pte_present(pte)) goto out_failed; page = pte_page(pte); if ((!VALID_PAGE(page)) || PageReserved(page)) goto out_failed; if (mm->swap_cnt) mm->swap_cnt--; onlist = PageActive(page); /* Don't look at this pte if it's been accessed recently. */ if (ptep_test_and_clear_young(page_table)) { age_page_up(page); goto out_failed; } if (!onlist) /* The page is still mapped, so it can't be freeable... */ age_page_down_ageonly(page); /* * If the page is in active use by us, or if the page * is in active use by others, don't unmap it or * (worse) start unneeded IO. */ if (page->age > 0) goto out_failed; if (TryLockPage(page)) goto out_failed; /* From this point on, the odds are that we're going to * nuke this pte, so read and clear the pte. This hook * is needed on CPUs which update the accessed and dirty * bits in hardware. */ pte = ptep_get_and_clear(page_table); /* * Is the page already in the swap cache? If so, then * we can just drop our reference to it without doing * any IO - it's already up-to-date on disk. * * Return 0, as we didn't actually free any real * memory, and we should just continue our scan. */ if (PageSwapCache(page)) { entry.val = page->index; if (pte_dirty(pte)) set_page_dirty(page); set_swap_pte: swap_duplicate(entry); set_pte(page_table, swp_entry_to_pte(entry)); drop_pte: UnlockPage(page); mm->rss--; flush_tlb_page(vma, address); deactivate_page(page); page_cache_release(page); out_failed: return 0; } /* * Is it a clean page? Then it must be recoverable * by just paging it in again, and we can just drop * it.. * * However, this won't actually free any real * memory, as the page will just be in the page cache * somewhere, and as such we should just continue * our scan. * * Basically, this just makes it possible for us to do * some real work in the future in "refill_inactive()". */ flush_cache_page(vma, address); if (!pte_dirty(pte)) goto drop_pte; /* * Ok, it's really dirty. That means that * we should either create a new swap cache * entry for it, or we should write it back * to its own backing store. */ if (page->mapping) { set_page_dirty(page); goto drop_pte; } /* * This is a dirty, swappable page. First of all, * get a suitable swap entry for it, and make sure * we have the swap cache set up to associate the * page with that swap entry. */ entry = get_swap_page(); if (!entry.val) goto out_unlock_restore; /* No swap space left */ /* Add it to the swap cache and mark it dirty */ add_to_swap_cache(page, entry); set_page_dirty(page); goto set_swap_pte; out_unlock_restore: set_pte(page_table, pte); UnlockPage(page); return 0; }