/* * Primitive swap readahead code. We simply read an aligned block of * (1 << page_cluster) entries in the swap area. This method is chosen * because it doesn't cost us any seek time. We also make sure to queue * the 'original' request together with the readahead ones... */ void swapin_readahead(swp_entry_t entry) { int i, num; struct page *new_page; unsigned long offset; /* * Get the number of handles we should do readahead io to. Also, * grab temporary references on them, releasing them as io completes. */ num = valid_swaphandles(entry, &offset); for (i = 0; i < num; offset++, i++) { /* Don't block on I/O for read-ahead */ if (atomic_read(&nr_async_pages) >= pager_daemon.swap_cluster * (1 << page_cluster)) { while (i++ < num) swap_free(SWP_ENTRY(SWP_TYPE(entry), offset++)); break; } /* Ok, do the async read-ahead now */ new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset), 0); if (new_page != NULL) page_cache_release(new_page); swap_free(SWP_ENTRY(SWP_TYPE(entry), offset)); } return; }
/* * page not present ... go through shm_pages */ static unsigned long shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share) { pte_t pte; struct shmid_kernel *shp; unsigned int id, idx; id = SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK; idx = (address - shmd->vm_start + shmd->vm_offset) >> PAGE_SHIFT; #ifdef DEBUG_SHM if (id > max_shmid) { printk ("shm_nopage: id=%d too big. proc mem corrupted\n", id); return 0; } #endif shp = shm_segs[id]; #ifdef DEBUG_SHM if (shp == IPC_UNUSED || shp == IPC_NOID) { printk ("shm_nopage: id=%d invalid. Race.\n", id); return 0; } #endif /* This can occur on a remap */ if (idx >= shp->shm_npages) { return 0; } pte = __pte(shp->shm_pages[idx]); if (!pte_present(pte)) { unsigned long page = get_free_page(GFP_USER); if (!page) return -1; pte = __pte(shp->shm_pages[idx]); if (pte_present(pte)) { free_page (page); /* doesn't sleep */ goto done; } if (!pte_none(pte)) { rw_swap_page_nocache(READ, pte_val(pte), (char *)page); pte = __pte(shp->shm_pages[idx]); if (pte_present(pte)) { free_page (page); /* doesn't sleep */ goto done; } swap_free(pte_val(pte)); shm_swp--; } shm_rss++; pte = pte_mkdirty(mk_pte(page, PAGE_SHARED)); shp->shm_pages[idx] = pte_val(pte); } else --current->maj_flt; /* was incremented in do_no_page */ done: /* pte_val(pte) == shp->shm_pages[idx] */ current->min_flt++; atomic_inc(&mem_map[MAP_NR(pte_page(pte))].count); return pte_page(pte); }
/* * lpage_destroy: deallocates a logical page. Releases any RAM or swap * pages involved. * * Synchronization: Someone might be in the process of evicting the * page if it's resident, so it might be pinned. So lock and pin * together. * * We assume that lpages are not shared between address spaces and * address spaces are not shared between threads. */ void lpage_destroy(struct lpage *lp) { paddr_t pa; KASSERT(lp != NULL); lpage_lock_and_pin(lp); pa = lp->lp_paddr & PAGE_FRAME; if (pa != INVALID_PADDR) { DEBUG(DB_VM, "lpage_destroy: freeing paddr 0x%x\n", pa); lp->lp_paddr = INVALID_PADDR; lpage_unlock(lp); coremap_free(pa, false /* iskern */); coremap_unpin(pa); } else { lpage_unlock(lp); } if (lp->lp_swapaddr != INVALID_SWAPADDR) { DEBUG(DB_VM, "lpage_destroy: freeing swap addr 0x%llx\n", lp->lp_swapaddr); swap_free(lp->lp_swapaddr); } spinlock_cleanup(&lp->lp_spinlock); kfree(lp); }
/* * Trying to stop swapping from a file is fraught with races, so * we repeat quite a bit here when we have to pause. swapoff() * isn't exactly timing-critical, so who cares (but this is /really/ * inefficient, ugh). * * We return 1 after having slept, which makes the process start over * from the beginning for this process.. */ static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address, pte_t *dir, unsigned int type, unsigned long page) { pte_t pte = *dir; if (pte_none(pte)) return 0; if (pte_present(pte)) { unsigned long page_nr = MAP_NR(pte_page(pte)); if (page_nr >= MAP_NR(high_memory)) return 0; if (!in_swap_cache(page_nr)) return 0; if (SWP_TYPE(in_swap_cache(page_nr)) != type) return 0; delete_from_swap_cache(page_nr); set_pte(dir, pte_mkdirty(pte)); return 0; } if (SWP_TYPE(pte_val(pte)) != type) return 0; read_swap_page(pte_val(pte), (char *) page); #if 0 /* Is this really needed here, hasn't it been solved elsewhere? */ flush_page_to_ram(page); #endif if (pte_val(*dir) != pte_val(pte)) { free_page(page); return 1; } set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)))); flush_tlb_page(vma, address); ++vma->vm_mm->rss; swap_free(pte_val(pte)); return 1; }
static void free_pagedir_entries(void) { int i; for (i = 0; i < swsusp_info.pagedir_pages; i++) swap_free(swsusp_info.pagedir[i]); }
/* * No need to decide whether this PTE shares the swap entry with others, * just let do_wp_page work it out if a write is requested later - to * force COW, vm_page_prot omits write permission from any private vma. */ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, swp_entry_t entry, struct page *page) { spinlock_t *ptl; pte_t *pte; int ret = 1; if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) ret = -ENOMEM; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { if (ret > 0) mem_cgroup_uncharge_page(page); ret = 0; goto out; } inc_mm_counter(vma->vm_mm, anon_rss); get_page(page); set_pte_at(vma->vm_mm, addr, pte, pte_mkold(mk_pte(page, vma->vm_page_prot))); page_add_anon_rmap(page, vma, addr); swap_free(entry); /* * Move the page to the active list so it is not * immediately swapped out again after swapon. */ activate_page(page); out: pte_unmap_unlock(pte, ptl); return ret; }
/* * Locate a page of swap in physical memory, reserving swap cache space * and reading the disk if it is not already cached. * A failure return means that either the page allocation failed or that * the swap entry is no longer in use. */ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr) { struct page *found_page, *new_page = NULL; int err; do { /* * First check the swap cache. Since this is normally * called after lookup_swap_cache() failed, re-calling * that would confuse statistics. */ found_page = find_get_page(&swapper_space, entry.val); if (found_page) break; /* * Get a new page to read into from swap. */ if (!new_page) { new_page = alloc_page_vma(gfp_mask, vma, addr); if (!new_page) break; /* Out of memory */ } /* * Swap entry may have been freed since our caller observed it. */ if (!swap_duplicate(entry)) break; /* * Associate the page with swap entry in the swap cache. * May fail (-EEXIST) if there is already a page associated * with this entry in the swap cache: added by a racing * read_swap_cache_async, or add_to_swap or shmem_writepage * re-using the just freed swap entry for an existing page. * May fail (-ENOMEM) if radix-tree node allocation failed. */ __set_page_locked(new_page); SetPageSwapBacked(new_page); err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL); if (likely(!err)) { /* * Initiate read into locked page and return. */ lru_cache_add_anon(new_page); swap_readpage(NULL, new_page); return new_page; } ClearPageSwapBacked(new_page); __clear_page_locked(new_page); swap_free(entry); } while (err != -ENOMEM); if (new_page) page_cache_release(new_page); return found_page; }
static void free_pagedir_entries(void) { int num = pmdisk_info.pagedir_pages; int i; for (i = 0; i < num; i++) swap_free(pmdisk_info.pagedir[i]); }
/** * add_to_swap - allocate swap space for a page * @page: page we want to move to swap * * Allocate swap space for the page and add the page to the * swap cache. Caller needs to hold the page lock. */ int add_to_swap(struct page * page, gfp_t gfp_mask) { swp_entry_t entry; int err; if (!PageLocked(page)) BUG(); for (;;) { entry = get_swap_page(); if (!entry.val) return 0; /* * Radix-tree node allocations from PF_MEMALLOC contexts could * completely exhaust the page allocator. __GFP_NOMEMALLOC * stops emergency reserves from being allocated. * * TODO: this could cause a theoretical memory reclaim * deadlock in the swap out path. */ /* * Add it to the swap cache and mark it dirty */ err = __add_to_swap_cache(page, entry, gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN); switch (err) { case 0: /* Success */ SetPageUptodate(page); SetPageDirty(page); INC_CACHE_INFO(add_total); return 1; case -EEXIST: /* Raced with "speculative" read_swap_cache_async */ INC_CACHE_INFO(exist_race); swap_free(entry); continue; default: /* -ENOMEM radix-tree allocation failure */ swap_free(entry); return 0; } } }
int shm_swap (int prio, int gfp_mask) { pte_t page; struct shmid_kernel *shp; unsigned long swap_nr; unsigned long id, idx; int loop = 0; int counter; counter = shm_rss >> prio; if (!counter || !(swap_nr = get_swap_page())) return 0; check_id: shp = shm_segs[swap_id]; if (shp == IPC_UNUSED || shp == IPC_NOID || shp->u.shm_perm.mode & SHM_LOCKED ) { next_id: swap_idx = 0; if (++swap_id > max_shmid) { swap_id = 0; if (loop) goto failed; loop = 1; } goto check_id; } id = swap_id; check_table: idx = swap_idx++; if (idx >= shp->shm_npages) goto next_id; page = __pte(shp->shm_pages[idx]); if (!pte_present(page)) goto check_table; if ((gfp_mask & __GFP_DMA) && !PageDMA(&mem_map[MAP_NR(pte_page(page))])) goto check_table; swap_attempts++; if (--counter < 0) { /* failed */ failed: swap_free (swap_nr); return 0; } if (atomic_read(&mem_map[MAP_NR(pte_page(page))].count) != 1) goto check_table; shp->shm_pages[idx] = swap_nr; rw_swap_page_nocache (WRITE, swap_nr, (char *) pte_page(page)); free_page(pte_page(page)); swap_successes++; shm_swp++; shm_rss--; return 1; }
struct page * read_swap_cache_async(unsigned long entry, int wait) { struct page *found_page = 0, *new_page; unsigned long new_page_addr; #ifdef DEBUG_SWAP printk("DebugVM: read_swap_cache_async entry %08lx%s\n", entry, wait ? ", wait" : ""); #endif /* * Make sure the swap entry is still in use. */ if (!swap_duplicate(entry)) /* Account for the swap cache */ goto out; /* * Look for the page in the swap cache. */ found_page = lookup_swap_cache(entry); if (found_page) goto out_free_swap; new_page_addr = __get_free_page(GFP_USER); if (!new_page_addr) goto out_free_swap; /* Out of memory */ new_page = mem_map + MAP_NR(new_page_addr); /* * Check the swap cache again, in case we stalled above. */ found_page = lookup_swap_cache(entry); if (found_page) goto out_free_page; /* * Add it to the swap cache and read its contents. */ if (!add_to_swap_cache(new_page, entry)) goto out_free_page; set_bit(PG_locked, &new_page->flags); rw_swap_page(READ, entry, (char *) new_page_addr, wait); #ifdef DEBUG_SWAP printk("DebugVM: read_swap_cache_async created " "entry %08lx at %p\n", entry, (char *) page_address(new_page)); #endif return new_page; out_free_page: __free_page(new_page); out_free_swap: swap_free(entry); out: return found_page; }
/* * This must be called only on pages that have * been verified to be in the swap cache. */ void __delete_from_swap_cache(struct page *page) { swp_entry_t entry; entry.val = page->index; #ifdef SWAP_CACHE_INFO swap_cache_del_total++; #endif remove_from_swap_cache(page); swap_free(entry); }
static inline void forget_pte(pte_t page) { if (pte_none(page)) return; if (pte_present(page)) { struct page *ptpage = pte_page(page); if ((!VALID_PAGE(ptpage)) || PageReserved(ptpage)) return; page_cache_release(ptpage); return; } swap_free(pte_to_swp_entry(page)); }
/* * This must be called only on pages that have * been verified to be in the swap cache and locked. * It will never put the page into the free list, * the caller has a reference on the page. */ void delete_from_swap_cache(struct page *page) { swp_entry_t entry; entry.val = page_private(page); write_lock_irq(&swapper_space.tree_lock); __delete_from_swap_cache(page); write_unlock_irq(&swapper_space.tree_lock); swap_free(entry); page_cache_release(page); }
unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap) { unsigned long offset; offset = swp_offset(get_swap_page_of_type(swap)); if (offset) { if (bitmap_set(bitmap, offset)) { swap_free(swp_entry(swap, offset)); offset = 0; } } return offset; }
sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap) { unsigned long offset; offset = swp_offset(get_swap_page_of_type(swap)); if (offset) { if (bitmap_set(bitmap, offset)) swap_free(swp_entry(swap, offset)); else return swapdev_block(swap, offset); } return 0; }
/* * Free the swap entry and set the new pte for the shm page. */ static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx, unsigned long page, unsigned long entry) { pte_t pte; pte = pte_mkdirty(mk_pte(page, PAGE_SHARED)); shp->shm_pages[idx] = pte_val(pte); atomic_inc(&mem_map[MAP_NR(page)].count); shm_rss++; swap_free(entry); shm_swp--; }
static void free_data(void) { swp_entry_t entry; int i; for (i = 0; i < pmdisk_pages; i++) { entry = (pm_pagedir_nosave + i)->swap_address; if (entry.val) swap_free(entry); else break; (pm_pagedir_nosave + i)->swap_address = (swp_entry_t){0}; } }
static void data_free(void) { swp_entry_t entry; int i; for (i = 0; i < nr_copy_pages; i++) { entry = (pagedir_nosave + i)->swap_address; if (entry.val) swap_free(entry); else break; (pagedir_nosave + i)->swap_address = (swp_entry_t){0}; } }
static inline void free_pte(pte_t page) { if (pte_present(page)) { struct page *ptpage = pte_page(page); if ((!VALID_PAGE(ptpage)) || PageReserved(ptpage)) return; __free_page(ptpage); if (current->mm->rss <= 0) return; current->mm->rss--; return; } swap_free(pte_to_swp_entry(page)); }
static inline void free_pte(pte_t page) { if (pte_present(page)) { unsigned long addr = pte_page(page); if (addr >= high_memory || PageReserved(mem_map+MAP_NR(addr))) return; free_page(addr); if (current->mm->rss <= 0) return; current->mm->rss--; return; } swap_free(pte_val(page)); }
/* * This must be called only on pages that have * been verified to be in the swap cache and locked. * It will never put the page into the free list, * the caller has a reference on the page. */ void delete_from_swap_cache(struct page *page) { swp_entry_t entry; entry.val = page_private(page); spin_lock_irq(&swapper_space.tree_lock); __delete_from_swap_cache(page); spin_unlock_irq(&swapper_space.tree_lock); mem_cgroup_uncharge_swapcache(page, entry); swap_free(entry); page_cache_release(page); }
/* mmlist_lock and vma->vm_mm->page_table_lock are held */ static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address, pte_t *dir, swp_entry_t entry, struct page* page) { pte_t pte = *dir; if (likely(pte_to_swp_entry(pte).val != entry.val)) return; if (unlikely(pte_none(pte) || pte_present(pte))) return; get_page(page); set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot))); swap_free(entry); ++vma->vm_mm->rss; }
/* * This must be called only on pages that have * been verified to be in the swap cache. */ void delete_from_swap_cache(struct page *page) { long entry = page->offset; #ifdef SWAP_CACHE_INFO swap_cache_del_total++; #endif #ifdef DEBUG_SWAP printk("DebugVM: delete_from_swap_cache(%08lx count %d, " "entry %08lx)\n", page_address(page), atomic_read(&page->count), entry); #endif remove_from_swap_cache (page); swap_free (entry); }
/* * No need to decide whether this PTE shares the swap entry with others, * just let do_wp_page work it out if a write is requested later - to * force COW, vm_page_prot omits write permission from any private vma. */ static void unuse_pte(struct vm_area_struct *vma, pte_t *pte, unsigned long addr, swp_entry_t entry, struct page *page) { inc_mm_counter(vma->vm_mm, anon_rss); get_page(page); set_pte_at(vma->vm_mm, addr, pte, pte_mkold(mk_pte(page, vma->vm_page_prot))); page_add_anon_rmap(page, vma, addr); swap_free(entry); /* * Move the page to the active list so it is not * immediately swapped out again after swapon. */ activate_page(page); }
void free_all_swap_pages(int swap, struct bitmap_page *bitmap) { unsigned int bit, n; unsigned long test; bit = 0; while (bitmap) { for (n = 0; n < BITMAP_PAGE_CHUNKS; n++) for (test = 1UL; test; test <<= 1) { if (bitmap->chunks[n] & test) swap_free(swp_entry(swap, bit)); bit++; } bitmap = bitmap->next; } }
static inline void forget_pte(pte_t page) { if (pte_none(page)) return; if (pte_present(page)) { unsigned long addr = pte_page(page); if (MAP_NR(addr) >= max_mapnr || PageReserved(mem_map+MAP_NR(addr))) return; /* * free_page() used to be able to clear swap cache * entries. We may now have to do it manually. */ free_page_and_swap_cache(addr); return; } swap_free(pte_val(page)); }
static inline void forget_pte(pte_t page) { if (pte_none(page)) return; if (pte_present(page)) { struct page *ptpage = pte_page(page); if ((!VALID_PAGE(ptpage)) || PageReserved(ptpage)) return; /* * free_page() used to be able to clear swap cache * entries. We may now have to do it manually. */ free_page_and_swap_cache(ptpage); return; } swap_free(pte_to_swp_entry(page)); }
/* * Return indicates whether a page was freed so caller can adjust rss */ static inline int free_pte(pte_t pte) { if (pte_present(pte)) { struct page *page = pte_page(pte); if ((!VALID_PAGE(page)) || PageReserved(page)) return 0; /* * free_page() used to be able to clear swap cache * entries. We may now have to do it manually. */ if (pte_dirty(pte) && page->mapping) set_page_dirty(page); free_page_and_swap_cache(page); return 1; } swap_free(pte_to_swp_entry(pte)); return 0; }
/* * This must be called only on pages that have * been verified to be in the swap cache and locked. * It will never put the page into the free list, * the caller has a reference on the page. */ void delete_from_swap_cache(struct page *page) { swp_entry_t entry; if (!PageLocked(page)) BUG(); block_flushpage(page, 0); entry.val = page->index; spin_lock(&pagecache_lock); __delete_from_swap_cache(page); spin_unlock(&pagecache_lock); swap_free(entry); page_cache_release(page); }