void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page) { swp_entry_t entry; down_read(&swap_unplug_sem); entry.val = page_private(page); if (PageSwapCache(page)) { struct block_device *bdev = swap_info[swp_type(entry)].bdev; struct backing_dev_info *bdi; /* * If the page is removed from swapcache from under us (with a * racy try_to_unuse/swapoff) we need an additional reference * count to avoid reading garbage from page_private(page) above. * If the WARN_ON triggers during a swapoff it maybe the race * condition and it's harmless. However if it triggers without * swapoff it signals a problem. */ WARN_ON(page_count(page) <= 1); bdi = bdev->bd_inode->i_mapping->backing_dev_info; blk_run_backing_dev(bdi, page); } up_read(&swap_unplug_sem); }
static struct swap_info_struct * swap_info_get(swp_entry_t entry) { struct swap_info_struct * p; unsigned long offset, type; if (!entry.val) goto out; type = swp_type(entry); if (type >= nr_swapfiles) goto bad_nofile; p = & swap_info[type]; if (!(p->flags & SWP_USED)) goto bad_device; offset = swp_offset(entry); if (offset >= p->max) goto bad_offset; if (!p->swap_map[offset]) goto bad_free; spin_lock(&swap_lock); return p; bad_free: printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val); goto out; bad_offset: printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val); goto out; bad_device: printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val); goto out; bad_nofile: printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val); out: return NULL; }
/** * swapin_readahead - swap in pages in hope we need them soon * @entry: swap entry of this memory * @gfp_mask: memory allocation flags * @vma: user vma this address belongs to * @addr: target address for mempolicy * * Returns the struct page for entry and addr, after queueing swapin. * * Primitive swap readahead code. We simply read an aligned block of * (1 << page_cluster) entries in the swap area. This method is chosen * because it doesn't cost us any seek time. We also make sure to queue * the 'original' request together with the readahead ones... * * This has been extended to use the NUMA policies from the mm triggering * the readahead. * * Caller must hold down_read on the vma->vm_mm if vma is not NULL. */ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr) { #ifdef CONFIG_SWAP_ENABLE_READAHEAD struct page *page; unsigned long offset = swp_offset(entry); unsigned long start_offset, end_offset; unsigned long mask = (1UL << page_cluster) - 1; struct blk_plug plug; /* Read a page_cluster sized and aligned cluster around offset. */ start_offset = offset & ~mask; end_offset = offset | mask; if (!start_offset) /* First page is swap header. */ start_offset++; blk_start_plug(&plug); for (offset = start_offset; offset <= end_offset ; offset++) { /* Ok, do the async read-ahead now */ page = read_swap_cache_async(swp_entry(swp_type(entry), offset), gfp_mask, vma, addr); if (!page) continue; page_cache_release(page); } blk_finish_plug(&plug); lru_add_drain(); /* Push any new pages onto the LRU now */ #endif /* CONFIG_SWAP_ENABLE_READAHEAD */ return read_swap_cache_async(entry, gfp_mask, vma, addr); }
/* * "Store" data from a page to frontswap and associate it with the page's * swaptype and offset. Page must be locked and in the swap cache. * If frontswap already contains a page with matching swaptype and * offset, the frontswap implementation may either overwrite the data and * return success or invalidate the page from frontswap and return failure. */ int __frontswap_store(struct page *page) { int ret = -1, dup = 0; swp_entry_t entry = { .val = page_private(page), }; int type = swp_type(entry); struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) dup = 1; ret = frontswap_ops.store(type, offset, page); if (ret == 0) { frontswap_set(sis, offset); inc_frontswap_succ_stores(); if (!dup) atomic_inc(&sis->frontswap_pages); } else { /* failed dup always results in automatic invalidate of the (older) page from frontswap */ inc_frontswap_failed_stores(); if (dup) __frontswap_clear(sis, offset); } if (frontswap_writethrough_enabled) /* report failure so swap also writes to swap device */ ret = -1; return ret; }
/** * swapin_readahead - swap in pages in hope we need them soon * @entry: swap entry of this memory * @gfp_mask: memory allocation flags * @vma: user vma this address belongs to * @addr: target address for mempolicy * * Returns the struct page for entry and addr, after queueing swapin. * * Primitive swap readahead code. We simply read an aligned block of * (1 << page_cluster) entries in the swap area. This method is chosen * because it doesn't cost us any seek time. We also make sure to queue * the 'original' request together with the readahead ones... * * This has been extended to use the NUMA policies from the mm triggering * the readahead. * * Caller must hold down_read on the vma->vm_mm if vma is not NULL. */ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr) { struct page *page; unsigned long offset = swp_offset(entry); unsigned long start_offset, end_offset; unsigned long mask = is_swap_fast(entry) ? 0 : (1UL << page_cluster) - 1; /* Read a page_cluster sized and aligned cluster around offset. */ start_offset = offset & ~mask; end_offset = offset | mask; if (!start_offset) /* First page is swap header. */ start_offset++; for (offset = start_offset; offset <= end_offset ; offset++) { /* Ok, do the async read-ahead now */ page = read_swap_cache_async(swp_entry(swp_type(entry), offset), gfp_mask, vma, addr); if (!page) continue; page_cache_release(page); } lru_add_drain(); /* Push any new pages onto the LRU now */ return read_swap_cache_async(entry, gfp_mask, vma, addr); }
/** * swapin_readahead - swap in pages in hope we need them soon * @entry: swap entry of this memory * @gfp_mask: memory allocation flags * @vma: user vma this address belongs to * @addr: target address for mempolicy * * Returns the struct page for entry and addr, after queueing swapin. * * Primitive swap readahead code. We simply read an aligned block of * (1 << page_cluster) entries in the swap area. This method is chosen * because it doesn't cost us any seek time. We also make sure to queue * the 'original' request together with the readahead ones... * * This has been extended to use the NUMA policies from the mm triggering * the readahead. * * Caller must hold down_read on the vma->vm_mm if vma is not NULL. */ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr) { int nr_pages; struct page *page; unsigned long offset; unsigned long end_offset; /* * Get starting offset for readaround, and number of pages to read. * Adjust starting address by readbehind (for NUMA interleave case)? * No, it's very unlikely that swap layout would follow vma layout, * more likely that neighbouring swap pages came from the same node: * so use the same "addr" to choose the same node for each swap read. */ nr_pages = valid_swaphandles(entry, &offset); for (end_offset = offset + nr_pages; offset < end_offset; offset++) { /* Ok, do the async read-ahead now */ page = read_swap_cache_async(swp_entry(swp_type(entry), offset), gfp_mask, vma, addr); if (!page) break; page_cache_release(page); } lru_add_drain(); /* Push any new pages onto the LRU now */ return read_swap_cache_async(entry, gfp_mask, vma, addr); }
/* * "Put" data from a page to frontswap and associate it with the page's * swaptype and offset. Page must be locked and in the swap cache. * If frontswap already contains a page with matching swaptype and * offset, the frontswap implmentation may either overwrite the data * and return success or flush the page from frontswap and return failure */ int __frontswap_put_page(struct page *page) { int ret = -1, dup = 0; swp_entry_t entry = { .val = page_private(page), }; int type = swp_type(entry); struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); BUG_ON(!PageLocked(page)); if (frontswap_test(sis, offset)) dup = 1; ret = (*frontswap_ops.put_page)(type, offset, page); if (ret == 0) { frontswap_set(sis, offset); frontswap_succ_puts++; if (!dup) sis->frontswap_pages++; } else if (dup) { /* failed dup always results in automatic flush of the (older) page from frontswap */ frontswap_clear(sis, offset); sis->frontswap_pages--; frontswap_failed_puts++; } else frontswap_failed_puts++; return ret; }
struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr) { struct page *page; unsigned long offset = swp_offset(entry); unsigned long start_offset, end_offset; unsigned long mask = (1UL << page_cluster) - 1; start_offset = offset & ~mask; end_offset = offset | mask; if (!start_offset) start_offset++; for (offset = start_offset; offset <= end_offset ; offset++) { page = read_swap_cache_async(swp_entry(swp_type(entry), offset), gfp_mask, vma, addr); if (!page) continue; page_cache_release(page); } lru_add_drain(); return read_swap_cache_async(entry, gfp_mask, vma, addr); }
void probe_swap_in(void *_data, struct page *page, swp_entry_t entry) { trace_mark_tp(mm, swap_in, swap_in, probe_swap_in, "pfn %lu filp %p offset %lu", page_to_pfn(page), get_swap_info_struct(swp_type(entry))->swap_file, swp_offset(entry)); }
void probe_swap_out(struct page *page) { trace_mark_tp(mm, swap_out, swap_out, probe_swap_out, "pfn %lu filp %p offset %lu", page_to_pfn(page), get_swap_info_struct(swp_type( page_swp_entry(page)))->swap_file, swp_offset(page_swp_entry(page))); }
/* * "Get" data from frontswap associated with swaptype and offset that were * specified when the data was put to frontswap and use it to fill the * specified page with data. Page must be locked and in the swap cache */ int __frontswap_get_page(struct page *page) { int ret = -1; swp_entry_t entry = { .val = page_private(page), }; int type = swp_type(entry); struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); BUG_ON(!PageLocked(page)); if (frontswap_test(sis, offset)) ret = (*frontswap_ops.get_page)(type, offset, page); if (ret == 0) frontswap_gets++; return ret; }
static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent, struct swap_cgroup_ctrl **ctrlp) { pgoff_t offset = swp_offset(ent); struct swap_cgroup_ctrl *ctrl; struct page *mappage; struct swap_cgroup *sc; ctrl = &swap_cgroup_ctrl[swp_type(ent)]; if (ctrlp) *ctrlp = ctrl; mappage = ctrl->map[offset / SC_PER_PAGE]; sc = page_address(mappage); return sc + offset % SC_PER_PAGE; }
/** * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry * @ent: swap entry to be looked up. * * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID) */ unsigned short lookup_swap_cgroup(swp_entry_t ent) { int type = swp_type(ent); unsigned long offset = swp_offset(ent); unsigned long idx = offset / SC_PER_PAGE; unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; struct page *mappage; struct swap_cgroup *sc; unsigned short ret; ctrl = &swap_cgroup_ctrl[type]; mappage = ctrl->map[idx]; sc = page_address(mappage); sc += pos; ret = sc->id; return ret; }
static int write_swap_page(unsigned long addr, swp_entry_t * loc) { swp_entry_t entry; int error = 0; entry = get_swap_page(); if (swp_offset(entry) && swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) { error = rw_swap_page_sync(WRITE, entry, virt_to_page(addr)); if (error == -EIO) error = 0; if (!error) *loc = entry; } else error = -ENOSPC; return error; }
/** * swap_cgroup_record - record mem_cgroup for this swp_entry. * @ent: swap entry to be recorded into * @mem: mem_cgroup to be recorded * * Returns old value at success, 0 at failure. * (Of course, old value can be 0.) */ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) { int type = swp_type(ent); unsigned long offset = swp_offset(ent); unsigned long idx = offset / SC_PER_PAGE; unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; struct page *mappage; struct swap_cgroup *sc; unsigned short old; ctrl = &swap_cgroup_ctrl[type]; mappage = ctrl->map[idx]; sc = page_address(mappage); sc += pos; old = sc->id; sc->id = id; return old; }
/** * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry * @ent: swap entry to be looked up. * * Returns pointer to mem_cgroup at success. NULL at failure. */ struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) { int type = swp_type(ent); unsigned long offset = swp_offset(ent); unsigned long idx = offset / SC_PER_PAGE; unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; struct page *mappage; struct swap_cgroup *sc; struct mem_cgroup *ret; if (!do_swap_account) return NULL; ctrl = &swap_cgroup_ctrl[type]; mappage = ctrl->map[idx]; sc = page_address(mappage); sc += pos; ret = sc->val; return ret; }
/* * "Get" data from frontswap associated with swaptype and offset that were * specified when the data was put to frontswap and use it to fill the * specified page with data. Page must be locked and in the swap cache. */ int __frontswap_load(struct page *page) { int ret = -1; swp_entry_t entry = { .val = page_private(page), }; int type = swp_type(entry); struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) ret = frontswap_ops.load(type, offset, page); if (ret == 0) { inc_frontswap_loads(); if (frontswap_tmem_exclusive_gets_enabled) { SetPageDirty(page); frontswap_clear(sis, offset); } } return ret; }
/** * swapin_readahead - swap in pages in hope we need them soon * @entry: swap entry of this memory * @gfp_mask: memory allocation flags * @vma: user vma this address belongs to * @addr: target address for mempolicy * * Returns the struct page for entry and addr, after queueing swapin. * * Primitive swap readahead code. We simply read an aligned block of * (1 << page_cluster) entries in the swap area. This method is chosen * because it doesn't cost us any seek time. We also make sure to queue * the 'original' request together with the readahead ones... * * This has been extended to use the NUMA policies from the mm triggering * the readahead. * * Caller must hold down_read on the vma->vm_mm if vma is not NULL. */ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr) { struct page *page; unsigned long entry_offset = swp_offset(entry); unsigned long offset = entry_offset; unsigned long start_offset, end_offset; unsigned long mask; struct blk_plug plug; bool do_poll = true; mask = swapin_nr_pages(offset) - 1; if (!mask) goto skip; do_poll = false; /* Read a page_cluster sized and aligned cluster around offset. */ start_offset = offset & ~mask; end_offset = offset | mask; if (!start_offset) /* First page is swap header. */ start_offset++; blk_start_plug(&plug); for (offset = start_offset; offset <= end_offset ; offset++) { /* Ok, do the async read-ahead now */ page = read_swap_cache_async(swp_entry(swp_type(entry), offset), gfp_mask, vma, addr, false); if (!page) continue; if (offset != entry_offset && likely(!PageTransCompound(page))) SetPageReadahead(page); put_page(page); } blk_finish_plug(&plug); lru_add_drain(); /* Push any new pages onto the LRU now */ skip: return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll); }
return -ENOMEM; } /** * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry. * @end: swap entry to be cmpxchged * @old: old id * @new: new id * * Returns old id at success, 0 at failure. * (There is no mem_cgroup useing 0 as its id) */ unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, unsigned short old, unsigned short new) { int type = swp_type(ent); unsigned long offset = swp_offset(ent); unsigned long idx = offset / SC_PER_PAGE; unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; struct page *mappage; struct swap_cgroup *sc; unsigned long flags; unsigned short retval; ctrl = &swap_cgroup_ctrl[type]; mappage = ctrl->map[idx]; sc = page_address(mappage); sc += pos; spin_lock_irqsave(&ctrl->lock, flags);
/** * write_suspend_image - Write entire image to disk. * * After writing suspend signature to the disk, suspend may no * longer fail: we have ready-to-run image in swap, and rollback * would happen on next reboot -- corrupting data. * * Note: The buffer we allocate to use to write the suspend header is * not freed; its not needed since system is going down anyway * (plus it causes oops and I'm lazy^H^H^H^Htoo busy). */ static int write_suspend_image(void) { int i; swp_entry_t entry, prev = { 0 }; int nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages); union diskpage *cur, *buffer = (union diskpage *)get_zeroed_page(GFP_ATOMIC); unsigned long address; struct page *page; if (!buffer) return -ENOMEM; printk( "Writing data to swap (%d pages): ", nr_copy_pages ); for (i=0; i<nr_copy_pages; i++) { if (!(i%100)) printk( "." ); if (!(entry = get_swap_page()).val) panic("\nNot enough swapspace when writing data" ); if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) panic("\nPage %d: not enough swapspace on suspend device", i ); address = (pagedir_nosave+i)->address; page = virt_to_page(address); rw_swap_page_sync(WRITE, entry, page); (pagedir_nosave+i)->swap_address = entry; } printk( "|\n" ); printk( "Writing pagedir (%d pages): ", nr_pgdir_pages); for (i=0; i<nr_pgdir_pages; i++) { cur = (union diskpage *)((char *) pagedir_nosave)+i; BUG_ON ((char *) cur != (((char *) pagedir_nosave) + i*PAGE_SIZE)); printk( "." ); if (!(entry = get_swap_page()).val) { printk(KERN_CRIT "Not enough swapspace when writing pgdir\n" ); panic("Don't know how to recover"); free_page((unsigned long) buffer); return -ENOSPC; } if(swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) panic("\nNot enough swapspace for pagedir on suspend device" ); BUG_ON (sizeof(swp_entry_t) != sizeof(long)); BUG_ON (PAGE_SIZE % sizeof(struct pbe)); cur->link.next = prev; page = virt_to_page((unsigned long)cur); rw_swap_page_sync(WRITE, entry, page); prev = entry; } printk("H"); BUG_ON (sizeof(struct suspend_header) > PAGE_SIZE-sizeof(swp_entry_t)); BUG_ON (sizeof(union diskpage) != PAGE_SIZE); if (!(entry = get_swap_page()).val) panic( "\nNot enough swapspace when writing header" ); if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) panic("\nNot enough swapspace for header on suspend device" ); cur = (void *) buffer; if (fill_suspend_header(&cur->sh)) panic("\nOut of memory while writing header"); cur->link.next = prev; page = virt_to_page((unsigned long)cur); rw_swap_page_sync(WRITE, entry, page); prev = entry; printk( "S" ); mark_swapfiles(prev, MARK_SWAP_SUSPEND); printk( "|\n" ); MDELAY(1000); return 0; }
/* * Attempts to free an entry by adding a page to the swap cache, * decompressing the entry data into the page, and issuing a * bio write to write the page back to the swap device. * * This can be thought of as a "resumed writeback" of the page * to the swap device. We are basically resuming the same swap * writeback path that was intercepted with the frontswap_store() * in the first place. After the page has been decompressed into * the swap cache, the compressed version stored by zswap can be * freed. */ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) { struct zswap_header *zhdr; swp_entry_t swpentry; struct zswap_tree *tree; pgoff_t offset; struct zswap_entry *entry; struct page *page; u8 *src, *dst; unsigned int dlen; int ret; struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, }; /* extract swpentry from data */ zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO); swpentry = zhdr->swpentry; /* here */ zpool_unmap_handle(pool, handle); tree = zswap_trees[swp_type(swpentry)]; offset = swp_offset(swpentry); /* find and ref zswap entry */ spin_lock(&tree->lock); entry = zswap_entry_find_get(&tree->rbroot, offset); if (!entry) { /* entry was invalidated */ spin_unlock(&tree->lock); return 0; } spin_unlock(&tree->lock); BUG_ON(offset != entry->offset); /* try to allocate swap cache page */ switch (zswap_get_swap_cache_page(swpentry, &page)) { case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */ ret = -ENOMEM; goto fail; case ZSWAP_SWAPCACHE_EXIST: /* page is already in the swap cache, ignore for now */ page_cache_release(page); ret = -EEXIST; goto fail; case ZSWAP_SWAPCACHE_NEW: /* page is locked */ /* decompress */ dlen = PAGE_SIZE; src = (u8 *)zpool_map_handle(zswap_pool, entry->handle, ZPOOL_MM_RO) + sizeof(struct zswap_header); dst = kmap_atomic(page); ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length, dst, &dlen); kunmap_atomic(dst); zpool_unmap_handle(zswap_pool, entry->handle); BUG_ON(ret); BUG_ON(dlen != PAGE_SIZE); /* page is up to date */ SetPageUptodate(page); } /* move it to the tail of the inactive list after end_writeback */ SetPageReclaim(page); /* start writeback */ __swap_writepage(page, &wbc, end_swap_bio_write); page_cache_release(page); zswap_written_back_pages++; spin_lock(&tree->lock); /* drop local reference */ zswap_entry_put(tree, entry); /* * There are two possible situations for entry here: * (1) refcount is 1(normal case), entry is valid and on the tree * (2) refcount is 0, entry is freed and not on the tree * because invalidate happened during writeback * search the tree and free the entry if find entry */ if (entry == zswap_rb_search(&tree->rbroot, offset)) zswap_entry_put(tree, entry); spin_unlock(&tree->lock); goto end; /* * if we get here due to ZSWAP_SWAPCACHE_EXIST * a load may happening concurrently * it is safe and okay to not free the entry * if we free the entry in the following put * it it either okay to return !0 */ fail: spin_lock(&tree->lock); zswap_entry_put(tree, entry); spin_unlock(&tree->lock); end: return ret; }