/* * "Store" data from a page to frontswap and associate it with the page's * swaptype and offset. Page must be locked and in the swap cache. * If frontswap already contains a page with matching swaptype and * offset, the frontswap implementation may either overwrite the data and * return success or invalidate the page from frontswap and return failure. */ int __frontswap_store(struct page *page) { int ret = -1, dup = 0; swp_entry_t entry = { .val = page_private(page), }; int type = swp_type(entry); struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) dup = 1; ret = frontswap_ops.store(type, offset, page); if (ret == 0) { frontswap_set(sis, offset); inc_frontswap_succ_stores(); if (!dup) atomic_inc(&sis->frontswap_pages); } else { /* failed dup always results in automatic invalidate of the (older) page from frontswap */ inc_frontswap_failed_stores(); if (dup) __frontswap_clear(sis, offset); } if (frontswap_writethrough_enabled) /* report failure so swap also writes to swap device */ ret = -1; return ret; }
pte_t swp_entry_to_pte (swp_entry_t entry) { swp_entry_t arch_entry; arch_entry = (swp_entry_t){mk_swap_pte (swp_offset (entry)).pte}; __BUG_ON ((unsigned long) pte_file ((pte_t) {arch_entry.val})); return (pte_t) {arch_entry.val}; }
/* * "Put" data from a page to frontswap and associate it with the page's * swaptype and offset. Page must be locked and in the swap cache. * If frontswap already contains a page with matching swaptype and * offset, the frontswap implmentation may either overwrite the data * and return success or flush the page from frontswap and return failure */ int __frontswap_put_page(struct page *page) { int ret = -1, dup = 0; swp_entry_t entry = { .val = page_private(page), }; int type = swp_type(entry); struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); BUG_ON(!PageLocked(page)); if (frontswap_test(sis, offset)) dup = 1; ret = (*frontswap_ops.put_page)(type, offset, page); if (ret == 0) { frontswap_set(sis, offset); frontswap_succ_puts++; if (!dup) sis->frontswap_pages++; } else if (dup) { /* failed dup always results in automatic flush of the (older) page from frontswap */ frontswap_clear(sis, offset); sis->frontswap_pages--; frontswap_failed_puts++; } else frontswap_failed_puts++; return ret; }
struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr) { struct page *page; unsigned long offset = swp_offset(entry); unsigned long start_offset, end_offset; unsigned long mask = (1UL << page_cluster) - 1; start_offset = offset & ~mask; end_offset = offset | mask; if (!start_offset) start_offset++; for (offset = start_offset; offset <= end_offset ; offset++) { page = read_swap_cache_async(swp_entry(swp_type(entry), offset), gfp_mask, vma, addr); if (!page) continue; page_cache_release(page); } lru_add_drain(); return read_swap_cache_async(entry, gfp_mask, vma, addr); }
static struct swap_info_struct * swap_info_get(swp_entry_t entry) { struct swap_info_struct * p; unsigned long offset, type; if (!entry.val) goto out; type = swp_type(entry); if (type >= nr_swapfiles) goto bad_nofile; p = & swap_info[type]; if (!(p->flags & SWP_USED)) goto bad_device; offset = swp_offset(entry); if (offset >= p->max) goto bad_offset; if (!p->swap_map[offset]) goto bad_free; spin_lock(&swap_lock); return p; bad_free: printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val); goto out; bad_offset: printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val); goto out; bad_device: printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val); goto out; bad_nofile: printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val); out: return NULL; }
/* * Free the swap entry like above, but also try to * free the page cache entry if it is the last user. */ void free_swap_and_cache(swp_entry_t entry) { struct swap_info_struct * p; struct page *page = NULL; if (is_migration_entry(entry)) return; p = swap_info_get(entry); if (p) { if (swap_entry_free(p, swp_offset(entry)) == 1) { page = find_get_page(&swapper_space, entry.val); if (page && unlikely(TestSetPageLocked(page))) { page_cache_release(page); page = NULL; } } spin_unlock(&swap_lock); } if (page) { int one_user; BUG_ON(PagePrivate(page)); one_user = (page_count(page) == 2); /* Only cache user (+us), or swap space full? Free it! */ /* Also recheck PageSwapCache after page is locked (above) */ if (PageSwapCache(page) && !PageWriteback(page) && (one_user || vm_swap_full())) { delete_from_swap_cache(page); SetPageDirty(page); } unlock_page(page); page_cache_release(page); } }
/** * swapin_readahead - swap in pages in hope we need them soon * @entry: swap entry of this memory * @gfp_mask: memory allocation flags * @vma: user vma this address belongs to * @addr: target address for mempolicy * * Returns the struct page for entry and addr, after queueing swapin. * * Primitive swap readahead code. We simply read an aligned block of * (1 << page_cluster) entries in the swap area. This method is chosen * because it doesn't cost us any seek time. We also make sure to queue * the 'original' request together with the readahead ones... * * This has been extended to use the NUMA policies from the mm triggering * the readahead. * * Caller must hold down_read on the vma->vm_mm if vma is not NULL. */ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr) { struct page *page; unsigned long offset = swp_offset(entry); unsigned long start_offset, end_offset; unsigned long mask = is_swap_fast(entry) ? 0 : (1UL << page_cluster) - 1; /* Read a page_cluster sized and aligned cluster around offset. */ start_offset = offset & ~mask; end_offset = offset | mask; if (!start_offset) /* First page is swap header. */ start_offset++; for (offset = start_offset; offset <= end_offset ; offset++) { /* Ok, do the async read-ahead now */ page = read_swap_cache_async(swp_entry(swp_type(entry), offset), gfp_mask, vma, addr); if (!page) continue; page_cache_release(page); } lru_add_drain(); /* Push any new pages onto the LRU now */ return read_swap_cache_async(entry, gfp_mask, vma, addr); }
/** * swapin_readahead - swap in pages in hope we need them soon * @entry: swap entry of this memory * @gfp_mask: memory allocation flags * @vma: user vma this address belongs to * @addr: target address for mempolicy * * Returns the struct page for entry and addr, after queueing swapin. * * Primitive swap readahead code. We simply read an aligned block of * (1 << page_cluster) entries in the swap area. This method is chosen * because it doesn't cost us any seek time. We also make sure to queue * the 'original' request together with the readahead ones... * * This has been extended to use the NUMA policies from the mm triggering * the readahead. * * Caller must hold down_read on the vma->vm_mm if vma is not NULL. */ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr) { #ifdef CONFIG_SWAP_ENABLE_READAHEAD struct page *page; unsigned long offset = swp_offset(entry); unsigned long start_offset, end_offset; unsigned long mask = (1UL << page_cluster) - 1; struct blk_plug plug; /* Read a page_cluster sized and aligned cluster around offset. */ start_offset = offset & ~mask; end_offset = offset | mask; if (!start_offset) /* First page is swap header. */ start_offset++; blk_start_plug(&plug); for (offset = start_offset; offset <= end_offset ; offset++) { /* Ok, do the async read-ahead now */ page = read_swap_cache_async(swp_entry(swp_type(entry), offset), gfp_mask, vma, addr); if (!page) continue; page_cache_release(page); } blk_finish_plug(&plug); lru_add_drain(); /* Push any new pages onto the LRU now */ #endif /* CONFIG_SWAP_ENABLE_READAHEAD */ return read_swap_cache_async(entry, gfp_mask, vma, addr); }
static int __init read_pagedir(void) { unsigned long addr; int i, n = pmdisk_info.pagedir_pages; int error = 0; pagedir_order = get_bitmask_order(n); addr =__get_free_pages(GFP_ATOMIC, pagedir_order); if (!addr) return -ENOMEM; pm_pagedir_nosave = (struct pbe *)addr; pr_debug("pmdisk: Reading pagedir (%d Pages)\n",n); for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) { unsigned long offset = swp_offset(pmdisk_info.pagedir[i]); if (offset) error = read_page(offset, (void *)addr); else error = -EFAULT; } if (error) free_pages((unsigned long)pm_pagedir_nosave,pagedir_order); return error; }
void probe_swap_in(void *_data, struct page *page, swp_entry_t entry) { trace_mark_tp(mm, swap_in, swap_in, probe_swap_in, "pfn %lu filp %p offset %lu", page_to_pfn(page), get_swap_info_struct(swp_type(entry))->swap_file, swp_offset(entry)); }
void end_swap_bio_read(struct bio *bio, int err) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct page *page = bio->bi_io_vec[0].bv_page; if (!uptodate) { SetPageError(page); ClearPageUptodate(page); printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n", imajor(bio->bi_bdev->bd_inode), iminor(bio->bi_bdev->bd_inode), (unsigned long long)bio->bi_sector); goto out; } SetPageUptodate(page); /* * There is no guarantee that the page is in swap cache - the software * suspend code (at least) uses end_swap_bio_read() against a non- * swapcache page. So we must check PG_swapcache before proceeding with * this optimization. */ if (likely(PageSwapCache(page))) { /* * The swap subsystem performs lazy swap slot freeing, * expecting that the page will be swapped out again. * So we can avoid an unnecessary write if the page * isn't redirtied. * This is good for real swap storage because we can * reduce unnecessary I/O and enhance wear-leveling * if an SSD is used as the as swap device. * But if in-memory swap device (eg zram) is used, * this causes a duplicated copy between uncompressed * data in VM-owned memory and compressed data in * zram-owned memory. So let's free zram-owned memory * and make the VM-owned decompressed page *dirty*, * so the page should be swapped out somewhere again if * we again wish to reclaim it. */ struct gendisk *disk = bio->bi_bdev->bd_disk; if (disk->fops->swap_slot_free_notify) { swp_entry_t entry; unsigned long offset; entry.val = page_private(page); offset = swp_offset(entry); SetPageDirty(page); disk->fops->swap_slot_free_notify(bio->bi_bdev, offset); } } out: unlock_page(page); bio_put(bio); }
static int get_swap_reader(struct swap_map_handle *handle, swp_entry_t start) { int error; if (!swp_offset(start)) return -EINVAL; handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); if (!handle->cur) return -ENOMEM; error = bio_read_page(swp_offset(start), handle->cur); if (error) { release_swap_reader(handle); return error; } handle->k = 0; return 0; }
void probe_swap_out(struct page *page) { trace_mark_tp(mm, swap_out, swap_out, probe_swap_out, "pfn %lu filp %p offset %lu", page_to_pfn(page), get_swap_info_struct(swp_type( page_swp_entry(page)))->swap_file, swp_offset(page_swp_entry(page))); }
/* * Caller has made sure that the swapdevice corresponding to entry * is still around or has not been recycled. */ void swap_free(swp_entry_t entry) { struct swap_info_struct * p; p = swap_info_get(entry); if (p) { swap_entry_free(p, swp_offset(entry)); spin_unlock(&swap_lock); } }
sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap) { unsigned long offset; offset = swp_offset(get_swap_page_of_type(swap)); if (offset) { if (bitmap_set(bitmap, offset)) swap_free(swp_entry(swap, offset)); else return swapdev_block(swap, offset); } return 0; }
unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap) { unsigned long offset; offset = swp_offset(get_swap_page_of_type(swap)); if (offset) { if (bitmap_set(bitmap, offset)) { swap_free(swp_entry(swap, offset)); offset = 0; } } return offset; }
/* * How many references to page are currently swapped out? */ static inline int page_swapcount(struct page *page) { int count = 0; struct swap_info_struct *p; swp_entry_t entry; entry.val = page_private(page); p = swap_info_get(entry); if (p) { /* Subtract the 1 for the swap cache itself */ count = p->swap_map[swp_offset(entry)] - 1; spin_unlock(&swap_lock); } return count; }
/* * "Get" data from frontswap associated with swaptype and offset that were * specified when the data was put to frontswap and use it to fill the * specified page with data. Page must be locked and in the swap cache */ int __frontswap_get_page(struct page *page) { int ret = -1; swp_entry_t entry = { .val = page_private(page), }; int type = swp_type(entry); struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); BUG_ON(!PageLocked(page)); if (frontswap_test(sis, offset)) ret = (*frontswap_ops.get_page)(type, offset, page); if (ret == 0) frontswap_gets++; return ret; }
/* * Lookup a swap entry in the swap cache. A found page will be returned * unlocked and with its refcount incremented - we rely on the kernel * lock getting page table operations atomic even if we drop the page * lock before returning. */ struct page * lookup_swap_cache(swp_entry_t entry) { struct page *page; page = find_get_page(swap_address_space(entry), swp_offset(entry)); if (page && likely(!PageTransCompound(page))) { INC_CACHE_INFO(find_success); if (TestClearPageReadahead(page)) atomic_inc(&swapin_readahead_hits); } INC_CACHE_INFO(find_total); return page; }
static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent, struct swap_cgroup_ctrl **ctrlp) { pgoff_t offset = swp_offset(ent); struct swap_cgroup_ctrl *ctrl; struct page *mappage; struct swap_cgroup *sc; ctrl = &swap_cgroup_ctrl[swp_type(ent)]; if (ctrlp) *ctrlp = ctrl; mappage = ctrl->map[offset / SC_PER_PAGE]; sc = page_address(mappage); return sc + offset % SC_PER_PAGE; }
static int __init read_image_data(void) { struct pbe * p; int error = 0; int i; printk( "Reading image data (%d pages): ", pmdisk_pages ); for(i = 0, p = pm_pagedir_nosave; i < pmdisk_pages && !error; i++, p++) { if (!(i%100)) printk( "." ); error = read_page(swp_offset(p->swap_address), (void *)p->address); } printk(" %d done.\n",i); return error; }
static int __init check_header(void) { const char * reason = NULL; int error; if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info))) return error; /* Is this same machine? */ if ((reason = sanity_check())) { printk(KERN_ERR "swsusp: Resume mismatch: %s\n",reason); return -EPERM; } nr_copy_pages = swsusp_info.image_pages; pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)); return error; }
/* * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, * but sets SwapCache flag and private instead of mapping and index. */ int __add_to_swap_cache(struct page *page, swp_entry_t entry) { int error, i, nr = hpage_nr_pages(page); struct address_space *address_space; pgoff_t idx = swp_offset(entry); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageSwapCache(page), page); VM_BUG_ON_PAGE(!PageSwapBacked(page), page); page_ref_add(page, nr); SetPageSwapCache(page); address_space = swap_address_space(entry); spin_lock_irq(&address_space->tree_lock); for (i = 0; i < nr; i++) { set_page_private(page + i, entry.val + i); error = radix_tree_insert(&address_space->page_tree, idx + i, page + i); if (unlikely(error)) break; } if (likely(!error)) { address_space->nrpages += nr; __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr); ADD_CACHE_INFO(add_total, nr); } else { /* * Only the context which have set SWAP_HAS_CACHE flag * would call add_to_swap_cache(). * So add_to_swap_cache() doesn't returns -EEXIST. */ VM_BUG_ON(error == -EEXIST); set_page_private(page + i, 0UL); while (i--) { radix_tree_delete(&address_space->page_tree, idx + i); set_page_private(page + i, 0UL); } ClearPageSwapCache(page); page_ref_sub(page, nr); } spin_unlock_irq(&address_space->tree_lock); return error; }
/** * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry * @ent: swap entry to be looked up. * * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID) */ unsigned short lookup_swap_cgroup(swp_entry_t ent) { int type = swp_type(ent); unsigned long offset = swp_offset(ent); unsigned long idx = offset / SC_PER_PAGE; unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; struct page *mappage; struct swap_cgroup *sc; unsigned short ret; ctrl = &swap_cgroup_ctrl[type]; mappage = ctrl->map[idx]; sc = page_address(mappage); sc += pos; ret = sc->id; return ret; }
/* * This must be called only on pages that have * been verified to be in the swap cache. */ void __delete_from_swap_cache(struct page *page) { swp_entry_t entry; struct address_space *address_space; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageSwapCache(page), page); VM_BUG_ON_PAGE(PageWriteback(page), page); entry.val = page_private(page); address_space = swap_address_space(entry); radix_tree_delete(&address_space->page_tree, swp_offset(entry)); set_page_private(page, 0); ClearPageSwapCache(page); address_space->nrpages--; __dec_node_page_state(page, NR_FILE_PAGES); INC_CACHE_INFO(del_total); }
static int write_swap_page(unsigned long addr, swp_entry_t * loc) { swp_entry_t entry; int error = 0; entry = get_swap_page(); if (swp_offset(entry) && swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) { error = rw_swap_page_sync(WRITE, entry, virt_to_page(addr)); if (error == -EIO) error = 0; if (!error) *loc = entry; } else error = -ENOSPC; return error; }
static int __init check_header(void) { const char * reason = NULL; int error; init_header(); if ((error = read_page(swp_offset(pmdisk_header.pmdisk_info), &pmdisk_info))) return error; /* Is this same machine? */ if ((reason = sanity_check())) { printk(KERN_ERR "pmdisk: Resume mismatch: %s\n",reason); return -EPERM; } pmdisk_pages = pmdisk_info.image_pages; return error; }
/* * Work out if there are any other processes sharing this * swap cache page. Free it if you can. Return success. */ int remove_exclusive_swap_page(struct page *page) { int retval; struct swap_info_struct * p; swp_entry_t entry; BUG_ON(PagePrivate(page)); BUG_ON(!PageLocked(page)); if (!PageSwapCache(page)) return 0; if (PageWriteback(page)) return 0; if (page_count(page) != 2) /* 2: us + cache */ return 0; entry.val = page_private(page); p = swap_info_get(entry); if (!p) return 0; /* Is the only swap cache user the cache itself? */ retval = 0; if (p->swap_map[swp_offset(entry)] == 1) { /* Recheck the page count with the swapcache lock held.. */ write_lock_irq(&swapper_space.tree_lock); if ((page_count(page) == 2) && !PageWriteback(page)) { __delete_from_swap_cache(page); SetPageDirty(page); retval = 1; } write_unlock_irq(&swapper_space.tree_lock); } spin_unlock(&swap_lock); if (retval) { swap_free(entry); page_cache_release(page); } return retval; }
/* /proc/kpageswapn - an array exposing page swap counts * * Each entry is a u64 representing the corresponding * physical page swap count. */ static ssize_t kpageswapn_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { u64 __user *out = (u64 __user *)buf; unsigned long src = *ppos, dst; swp_entry_t swap_entry; ssize_t ret = 0; struct swap_info_struct *p; dst = src / KPMSIZE; /* Format the swap entry from the corresponding pagemap value */ swap_entry = swp_entry(dst >> (SWP_TYPE_SHIFT(swap_entry) + RADIX_TREE_EXCEPTIONAL_SHIFT), dst & SWP_OFFSET_MASK(swap_entry)); /*pr_info("kpageswapn_read src: %lx\n", src); */ /*pr_info("kpageswapn_read swap entry: %lx\n", swap_entry.val); */ if (src & KPMMASK || count & KPMMASK) { pr_err("kpageswapn_read return EINVAL\n"); return -EINVAL; } p = swap_info_get(swap_entry); if (p) { u64 swapcount = swap_count(p->swap_map[swp_offset(swap_entry)]); if (put_user(swapcount, out)) { pr_err("pageswapn_read put user failed\n"); ret = -EFAULT; } swap_info_unlock(p); } else { pr_err("kpageswapn_read swap_info_get failed\n"); ret = -EFAULT; } if (!ret) { *ppos += KPMSIZE; ret = KPMSIZE; } return ret; }
/** * swap_cgroup_record - record mem_cgroup for this swp_entry. * @ent: swap entry to be recorded into * @mem: mem_cgroup to be recorded * * Returns old value at success, 0 at failure. * (Of course, old value can be 0.) */ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) { int type = swp_type(ent); unsigned long offset = swp_offset(ent); unsigned long idx = offset / SC_PER_PAGE; unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; struct page *mappage; struct swap_cgroup *sc; unsigned short old; ctrl = &swap_cgroup_ctrl[type]; mappage = ctrl->map[idx]; sc = page_address(mappage); sc += pos; old = sc->id; sc->id = id; return old; }