Example #1
0
/*
 * Lookup a swap entry in the swap cache. A found page will be returned
 * unlocked and with its refcount incremented - we rely on the kernel
 * lock getting page table operations atomic even if we drop the page
 * lock before returning.
 */
struct page * lookup_swap_cache(swp_entry_t entry)
{
	struct page *page;

	page = find_get_page(&swapper_space, entry.val);

	if (page)
		INC_CACHE_INFO(find_success);

	INC_CACHE_INFO(find_total);
	return page;
}
Example #2
0
int btrfs_write_marked_extents(struct btrfs_root *root,
                               struct extent_io_tree *dirty_pages, int mark)
{
    int ret;
    int err = 0;
    int werr = 0;
    struct page *page;
    struct inode *btree_inode = root->fs_info->btree_inode;
    u64 start = 0;
    u64 end;
    unsigned long index;

    while (1) {
        ret = find_first_extent_bit(dirty_pages, start, &start, &end,
                                    mark);
        if (ret)
            break;
        while (start <= end) {
            cond_resched();

            index = start >> PAGE_CACHE_SHIFT;
            start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
            page = find_get_page(btree_inode->i_mapping, index);
            if (!page)
                continue;

            btree_lock_page_hook(page);
            if (!page->mapping) {
                unlock_page(page);
                page_cache_release(page);
                continue;
            }

            if (PageWriteback(page)) {
                if (PageDirty(page))
                    wait_on_page_writeback(page);
                else {
                    unlock_page(page);
                    page_cache_release(page);
                    continue;
                }
            }
            err = write_one_page(page, 0);
            if (err)
                werr = err;
            page_cache_release(page);
        }
    }
    if (err)
        werr = err;
    return werr;
}
/*
 * Use the cached Readdirplus results in order to avoid a LOOKUP call
 * whenever we believe that the parent directory has not changed.
 *
 * We assume that any file creation/rename changes the directory mtime.
 * As this results in a page cache invalidation whenever it occurs,
 * we don't require any other tests for cache coherency.
 */
static
int nfs_cached_lookup(struct inode *dir, struct dentry *dentry,
			struct nfs_fh *fh, struct nfs_fattr *fattr)
{
	nfs_readdir_descriptor_t desc;
	struct nfs_server *server;
	struct nfs_entry entry;
	struct page *page;
	unsigned long timestamp;
	int res;

	if (!NFS_USE_READDIRPLUS(dir))
		return -ENOENT;
	server = NFS_SERVER(dir);
	/* Don't use readdirplus unless the cache is stable */
	if ((server->flags & NFS_MOUNT_NOAC) != 0
			|| nfs_caches_unstable(dir)
			|| nfs_attribute_timeout(dir))
		return -ENOENT;
	if ((NFS_FLAGS(dir) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) != 0)
		return -ENOENT;
	timestamp = NFS_I(dir)->readdir_timestamp;

	entry.fh = fh;
	entry.fattr = fattr;

	desc.decode = NFS_PROTO(dir)->decode_dirent;
	desc.entry = &entry;
	desc.page_index = 0;
	desc.plus = 1;

	for(;(page = find_get_page(dir->i_mapping, desc.page_index)); desc.page_index++) {

		res = -EIO;
		if (PageUptodate(page)) {
			void * kaddr = kmap_atomic(page, KM_USER0);
			desc.ptr = kaddr;
			res = find_dirent_name(&desc, page, dentry);
			kunmap_atomic(kaddr, KM_USER0);
		}
		page_cache_release(page);

		if (res == 0)
			goto out_found;
		if (res != -EAGAIN)
			break;
	}
	return -ENOENT;
 out_found:
	fattr->timestamp = timestamp;
	return 0;
}
Example #4
0
struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
			struct vm_area_struct *vma, unsigned long addr)
{
	struct page *found_page, *new_page = NULL;
	int err;

	do {
		found_page = find_get_page(&swapper_space, entry.val);
		if (found_page)
			break;

		if (!new_page) {
			new_page = alloc_page_vma(gfp_mask, vma, addr);
			if (!new_page)
				break;		
		}

		err = radix_tree_preload(gfp_mask & GFP_KERNEL);
		if (err)
			break;

		err = swapcache_prepare(entry);
		if (err == -EEXIST) {	
			radix_tree_preload_end();
			continue;
		}
		if (err) {		
			radix_tree_preload_end();
			break;
		}

		
		__set_page_locked(new_page);
		SetPageSwapBacked(new_page);
		err = __add_to_swap_cache(new_page, entry);
		if (likely(!err)) {
			radix_tree_preload_end();
			lru_cache_add_anon(new_page);
			swap_readpage(new_page);
			return new_page;
		}
		radix_tree_preload_end();
		ClearPageSwapBacked(new_page);
		__clear_page_locked(new_page);
		swapcache_free(entry, NULL);
	} while (err != -ENOMEM);

	if (new_page)
		page_cache_release(new_page);
	return found_page;
}
Example #5
0
/* 
 * Locate a page of swap in physical memory, reserving swap cache space
 * and reading the disk if it is not already cached.
 * A failure return means that either the page allocation failed or that
 * the swap entry is no longer in use.
 */
struct page *read_swap_cache_async(swp_entry_t entry,
			struct vm_area_struct *vma, unsigned long addr)
{
	struct page *found_page, *new_page = NULL;
	int err;

	do {
		/*
		 * First check the swap cache.  Since this is normally
		 * called after lookup_swap_cache() failed, re-calling
		 * that would confuse statistics.
		 */
		found_page = find_get_page(&swapper_space, entry.val);
		if (found_page)
			break;

		/*
		 * Get a new page to read into from swap.
		 */
		if (!new_page) {
			new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
			if (!new_page)
				break;		/* Out of memory */
		}

		/*
		 * Associate the page with swap entry in the swap cache.
		 * May fail (-ENOENT) if swap entry has been freed since
		 * our caller observed it.  May fail (-EEXIST) if there
		 * is already a page associated with this entry in the
		 * swap cache: added by a racing read_swap_cache_async,
		 * or by try_to_swap_out (or shmem_writepage) re-using
		 * the just freed swap entry for an existing page.
		 * May fail (-ENOMEM) if radix-tree node allocation failed.
		 */
		err = add_to_swap_cache(new_page, entry);
		if (!err) {
			/*
			 * Initiate read into locked page and return.
			 */
			lru_cache_add_active(new_page);
			swap_readpage(NULL, new_page);
			return new_page;
		}
	} while (err != -ENOENT && err != -ENOMEM);

	if (new_page)
		page_cache_release(new_page);
	return found_page;
}
Example #6
0
/*
 * Lookup a swap entry in the swap cache. A found page will be returned
 * unlocked and with its refcount incremented - we rely on the kernel
 * lock getting page table operations atomic even if we drop the page
 * lock before returning.
 */
struct page * lookup_swap_cache(swp_entry_t entry)
{
	struct page *page;

	page = find_get_page(swap_address_space(entry), swp_offset(entry));

	if (page && likely(!PageTransCompound(page))) {
		INC_CACHE_INFO(find_success);
		if (TestClearPageReadahead(page))
			atomic_inc(&swapin_readahead_hits);
	}

	INC_CACHE_INFO(find_total);
	return page;
}
Example #7
0
/*
 * Later we can get more picky about what "in core" means precisely.
 * For now, simply check to see if the page is in the page cache,
 * and is up to date; i.e. that no page-in operation would be required
 * at this time if an application were to map and access this page.
 */
static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
{
	unsigned char present = 0;
	struct page *page;

	/*
	 * When tmpfs swaps out a page from a file, any process mapping that
	 * file will not get a swp_entry_t in its pte, but rather it is like
	 * any other file mapping (ie. marked !present and faulted in with
	 * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
<<<<<<< HEAD
=======
<<<<<<< HEAD
>>>>>>> ae1773bb70f3d7cf73324ce8fba787e01d8fa9f2
	 */
	page = find_get_page(mapping, pgoff);
#ifdef CONFIG_SWAP
	/* shmem/tmpfs may return swap: account for swapcache page too. */
	if (radix_tree_exceptional_entry(page)) {
		swp_entry_t swap = radix_to_swp_entry(page);
		page = find_get_page(&swapper_space, swap.val);
	}
#endif
<<<<<<< HEAD
Example #8
0
/*
 * Lookup a swap entry in the swap cache. A found page will be returned
 * unlocked and with its refcount incremented - we rely on the kernel
 * lock getting page table operations atomic even if we drop the page
 * lock before returning.
 */
struct page * lookup_swap_cache(swp_entry_t entry)
{
    struct page *page;

    page = find_get_page(swap_address_space(entry), entry.val);

    if (page) {
        INC_CACHE_INFO(find_success);
        if (TestClearPageReadahead(page))
            atomic_inc(&swapin_readahead_hits);
    }

    INC_CACHE_INFO(find_total);
    return page;
}
Example #9
0
/*
 * Later we can get more picky about what "in core" means precisely.
 * For now, simply check to see if the page is in the page cache,
 * and is up to date; i.e. that no page-in operation would be required
 * at this time if an application were to map and access this page.
 */
static unsigned char mincore_page(struct vm_area_struct * vma,
	unsigned long pgoff)
{
	unsigned char present = 0;
	struct address_space * as = vma->vm_file->f_mapping;
	struct page * page;

	page = find_get_page(as, pgoff);
	if (page) {
		present = PageUptodate(page);
		page_cache_release(page);
	}

	return present;
}
Example #10
0
void do_generic_file_read(struct file *filp,unsigned int *ppos,
			  read_descriptor_t *desc,int dumm){
  struct address_space *mapping = filp->f_dentry->d_inode->i_mapping;
  struct inode *inode = mapping->host;
  unsigned long index = *ppos >> PAGE_CACHE_SHIFT;
  unsigned long offset = *ppos & ~PAGE_CACHE_SHIFT;
  for(;;){
    struct page *page = find_get_page(mapping,index);
    unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
    if(index > end_index) break;
    unsigned long nr,ret;
    nr = PAGE_CACHE_SIZE;
    nr = nr - offset;
    mapping->a_ops->readpage(filp,page);
  }
}
Example #11
0
/*
 * Lookup a swap entry in the swap cache. A found page will be returned
 * unlocked and with its refcount incremented - we rely on the kernel
 * lock getting page table operations atomic even if we drop the page
 * lock before returning.
 */
struct page * lookup_swap_cache(swp_entry_t entry)
{
	struct page *found;

	found = find_get_page(&swapper_space, entry.val);
	/*
	 * Unsafe to assert PageSwapCache and mapping on page found:
	 * if SMP nothing prevents swapoff from deleting this page from
	 * the swap cache at this moment.  find_lock_page would prevent
	 * that, but no need to change: we _have_ got the right page.
	 */
	INC_CACHE_INFO(find_total);
	if (found)
		INC_CACHE_INFO(find_success);
	return found;
}
Example #12
0
/*
 * Later we can get more picky about what "in core" means precisely.
 * For now, simply check to see if the page is in the page cache,
 * and is up to date; i.e. that no page-in operation would be required
 * at this time if an application were to map and access this page.
 */
static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
{
	unsigned char present = 0;
	struct page *page;

	/*
	 * When tmpfs swaps out a page from a file, any process mapping that
	 * file will not get a swp_entry_t in its pte, but rather it is like
	 * any other file mapping (ie. marked !present and faulted in with
	 * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
	 *
	 * However when tmpfs moves the page from pagecache and into swapcache,
	 * it is still in core, but the find_get_page below won't find it.
	 * No big deal, but make a note of it.
	 */
	page = find_get_page(mapping, pgoff);
	if (page) {
		present = PageUptodate(page);
		page_cache_release(page);
	}

	return present;
}
Example #13
0
struct page *find_data_page(struct inode *inode, pgoff_t index)
{
    struct address_space *mapping = inode->i_mapping;
    struct page *page;

    page = find_get_page(mapping, index);
    if (page && PageUptodate(page))
        return page;
    f2fs_put_page(page, 0);

    page = get_read_data_page(inode, index, READ_SYNC);
    if (IS_ERR(page))
        return page;

    if (PageUptodate(page))
        return page;

    wait_on_page_locked(page);
    if (unlikely(!PageUptodate(page))) {
        f2fs_put_page(page, 0);
        return ERR_PTR(-EIO);
    }
    return page;
}
Example #14
0
/* 
 * Locate a page of swap in physical memory, reserving swap cache space
 * and reading the disk if it is not already cached.
 * A failure return means that either the page allocation failed or that
 * the swap entry is no longer in use.
 */
struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
			struct vm_area_struct *vma, unsigned long addr)
{
	struct page *found_page, *new_page = NULL;
	int err;

	do {
		/*
		 * First check the swap cache.  Since this is normally
		 * called after lookup_swap_cache() failed, re-calling
		 * that would confuse statistics.
		 */
		found_page = find_get_page(&swapper_space, entry.val);
		if (found_page)
			break;

		/*
		 * Get a new page to read into from swap.
		 */
		if (!new_page) {
			new_page = alloc_page_vma(gfp_mask, vma, addr);
			if (!new_page)
				break;		/* Out of memory */
		}

		/*
		 * call radix_tree_preload() while we can wait.
		 */
		err = radix_tree_preload(gfp_mask & GFP_KERNEL);
		if (err)
			break;

		/*
		 * Swap entry may have been freed since our caller observed it.
		 */
		err = swapcache_prepare(entry);
		if (err == -EEXIST) {	/* seems racy */
			radix_tree_preload_end();
			continue;
		}
		if (err) {		/* swp entry is obsolete ? */
			radix_tree_preload_end();
			break;
		}

		/* May fail (-ENOMEM) if radix-tree node allocation failed. */
		__set_page_locked(new_page);
		SetPageSwapBacked(new_page);
		err = __add_to_swap_cache(new_page, entry);
		if (likely(!err)) {
			radix_tree_preload_end();
			/*
			 * Initiate read into locked page and return.
			 */
			lru_cache_add_anon(new_page);
			swap_readpage(new_page);
			return new_page;
		}
		radix_tree_preload_end();
		ClearPageSwapBacked(new_page);
		__clear_page_locked(new_page);
		/*
		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
		 * clear SWAP_HAS_CACHE flag.
		 */
		swapcache_free(entry, NULL);
	} while (err != -ENOMEM);

	if (new_page)
		page_cache_release(new_page);
	return found_page;
}
Example #15
0
struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
                                     struct vm_area_struct *vma, unsigned long addr,
                                     bool *new_page_allocated)
{
    struct page *found_page, *new_page = NULL;
    struct address_space *swapper_space = swap_address_space(entry);
    int err;
    *new_page_allocated = false;

    do {
        /*
         * First check the swap cache.  Since this is normally
         * called after lookup_swap_cache() failed, re-calling
         * that would confuse statistics.
         */
        found_page = find_get_page(swapper_space, entry.val);
        if (found_page)
            break;

        /*
         * Get a new page to read into from swap.
         */
        if (!new_page) {
            new_page = alloc_page_vma(gfp_mask, vma, addr);
            if (!new_page)
                break;		/* Out of memory */
        }

        /*
         * call radix_tree_preload() while we can wait.
         */
        err = radix_tree_maybe_preload(gfp_mask & GFP_KERNEL);
        if (err)
            break;

        /*
         * Swap entry may have been freed since our caller observed it.
         */
        err = swapcache_prepare(entry);
        if (err == -EEXIST) {
            radix_tree_preload_end();
            /*
             * We might race against get_swap_page() and stumble
             * across a SWAP_HAS_CACHE swap_map entry whose page
             * has not been brought into the swapcache yet, while
             * the other end is scheduled away waiting on discard
             * I/O completion at scan_swap_map().
             *
             * In order to avoid turning this transitory state
             * into a permanent loop around this -EEXIST case
             * if !CONFIG_PREEMPT and the I/O completion happens
             * to be waiting on the CPU waitqueue where we are now
             * busy looping, we just conditionally invoke the
             * scheduler here, if there are some more important
             * tasks to run.
             */
            cond_resched();
            continue;
        }
        if (err) {		/* swp entry is obsolete ? */
            radix_tree_preload_end();
            break;
        }

        /* May fail (-ENOMEM) if radix-tree node allocation failed. */
        __SetPageLocked(new_page);
        __SetPageSwapBacked(new_page);
        err = __add_to_swap_cache(new_page, entry);
        if (likely(!err)) {
            radix_tree_preload_end();
            /*
             * Initiate read into locked page and return.
             */
            lru_cache_add_anon(new_page);
            *new_page_allocated = true;
            return new_page;
        }
        radix_tree_preload_end();
        __ClearPageLocked(new_page);
        /*
         * add_to_swap_cache() doesn't return -EEXIST, so we can safely
         * clear SWAP_HAS_CACHE flag.
         */
        swapcache_free(entry);
    } while (err != -ENOMEM);

    if (new_page)
        put_page(new_page);
    return found_page;
}
Example #16
0
static int tux3_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct inode *inode = file_inode(vma->vm_file);
	struct sb *sb = tux_sb(inode->i_sb);
	struct page *clone, *page = vmf->page;
	void *ptr;
	int ret;

	sb_start_pagefault(inode->i_sb);

retry:
	down_read(&tux_inode(inode)->truncate_lock);
	lock_page(page);
	if (page->mapping != mapping(inode)) {
		unlock_page(page);
		ret = VM_FAULT_NOPAGE;
		goto out;
	}

	/*
	 * page fault can be happened while holding change_begin/end()
	 * (e.g. copy of user data between ->write_begin and
	 * ->write_end for write(2)).
	 *
	 * So, we use nested version here.
	 */
	change_begin_atomic_nested(sb, &ptr);

	/*
	 * FIXME: Caller releases vmf->page (old_page) unconditionally.
	 * So, this takes additional refcount to workaround it.
	 */
	if (vmf->page == page)
		page_cache_get(page);

	clone = pagefork_for_blockdirty(page, tux3_get_current_delta());
	if (IS_ERR(clone)) {
		/* Someone did page fork */
		pgoff_t index = page->index;

		change_end_atomic_nested(sb, ptr);
		unlock_page(page);
		page_cache_release(page);
		up_read(&tux_inode(inode)->truncate_lock);

		switch (PTR_ERR(clone)) {
		case -EAGAIN:
			page = find_get_page(inode->i_mapping, index);
			assert(page);
			goto retry;
		case -ENOMEM:
			ret = VM_FAULT_OOM;
			break;
		default:
			ret = VM_FAULT_SIGBUS;
			break;
		}

		goto out;
	}

	file_update_time(vma->vm_file);

	/* Assign buffers to dirty */
	if (!page_has_buffers(clone))
		create_empty_buffers(clone, sb->blocksize, 0);

	/*
	 * We mark the page dirty already here so that when freeze is in
	 * progress, we are guaranteed that writeback during freezing will
	 * see the dirty page and writeprotect it again.
	 */
	tux3_set_page_dirty(clone);
#if 1
	/* FIXME: Caller doesn't see the changed vmf->page */
	vmf->page = clone;

	change_end_atomic_nested(sb, ptr);
	/* FIXME: caller doesn't know about pagefork */
	unlock_page(clone);
	page_cache_release(clone);
	ret = 0;
//	ret = VM_FAULT_LOCKED;
#endif
out:
	up_read(&tux_inode(inode)->truncate_lock);
	sb_end_pagefault(inode->i_sb);

	return ret;
}
Example #17
0
File: dax.c Project: 020gzh/linux
static int copy_user_bh(struct page *to, struct inode *inode,
		struct buffer_head *bh, unsigned long vaddr)
{
	struct blk_dax_ctl dax = {
		.sector = to_sector(bh, inode),
		.size = bh->b_size,
	};
	struct block_device *bdev = bh->b_bdev;
	void *vto;

	if (dax_map_atomic(bdev, &dax) < 0)
		return PTR_ERR(dax.addr);
	vto = kmap_atomic(to);
	copy_user_page(vto, (void __force *)dax.addr, vaddr, to);
	kunmap_atomic(vto);
	dax_unmap_atomic(bdev, &dax);
	return 0;
}

#define NO_SECTOR -1
#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT))

static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
		sector_t sector, bool pmd_entry, bool dirty)
{
	struct radix_tree_root *page_tree = &mapping->page_tree;
	pgoff_t pmd_index = DAX_PMD_INDEX(index);
	int type, error = 0;
	void *entry;

	WARN_ON_ONCE(pmd_entry && !dirty);
	if (dirty)
		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);

	spin_lock_irq(&mapping->tree_lock);

	entry = radix_tree_lookup(page_tree, pmd_index);
	if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) {
		index = pmd_index;
		goto dirty;
	}

	entry = radix_tree_lookup(page_tree, index);
	if (entry) {
		type = RADIX_DAX_TYPE(entry);
		if (WARN_ON_ONCE(type != RADIX_DAX_PTE &&
					type != RADIX_DAX_PMD)) {
			error = -EIO;
			goto unlock;
		}

		if (!pmd_entry || type == RADIX_DAX_PMD)
			goto dirty;

		/*
		 * We only insert dirty PMD entries into the radix tree.  This
		 * means we don't need to worry about removing a dirty PTE
		 * entry and inserting a clean PMD entry, thus reducing the
		 * range we would flush with a follow-up fsync/msync call.
		 */
		radix_tree_delete(&mapping->page_tree, index);
		mapping->nrexceptional--;
	}

	if (sector == NO_SECTOR) {
		/*
		 * This can happen during correct operation if our pfn_mkwrite
		 * fault raced against a hole punch operation.  If this
		 * happens the pte that was hole punched will have been
		 * unmapped and the radix tree entry will have been removed by
		 * the time we are called, but the call will still happen.  We
		 * will return all the way up to wp_pfn_shared(), where the
		 * pte_same() check will fail, eventually causing page fault
		 * to be retried by the CPU.
		 */
		goto unlock;
	}

	error = radix_tree_insert(page_tree, index,
			RADIX_DAX_ENTRY(sector, pmd_entry));
	if (error)
		goto unlock;

	mapping->nrexceptional++;
 dirty:
	if (dirty)
		radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
 unlock:
	spin_unlock_irq(&mapping->tree_lock);
	return error;
}

static int dax_writeback_one(struct block_device *bdev,
		struct address_space *mapping, pgoff_t index, void *entry)
{
	struct radix_tree_root *page_tree = &mapping->page_tree;
	int type = RADIX_DAX_TYPE(entry);
	struct radix_tree_node *node;
	struct blk_dax_ctl dax;
	void **slot;
	int ret = 0;

	spin_lock_irq(&mapping->tree_lock);
	/*
	 * Regular page slots are stabilized by the page lock even
	 * without the tree itself locked.  These unlocked entries
	 * need verification under the tree lock.
	 */
	if (!__radix_tree_lookup(page_tree, index, &node, &slot))
		goto unlock;
	if (*slot != entry)
		goto unlock;

	/* another fsync thread may have already written back this entry */
	if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
		goto unlock;

	if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) {
		ret = -EIO;
		goto unlock;
	}

	dax.sector = RADIX_DAX_SECTOR(entry);
	dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE);
	spin_unlock_irq(&mapping->tree_lock);

	/*
	 * We cannot hold tree_lock while calling dax_map_atomic() because it
	 * eventually calls cond_resched().
	 */
	ret = dax_map_atomic(bdev, &dax);
	if (ret < 0)
		return ret;

	if (WARN_ON_ONCE(ret < dax.size)) {
		ret = -EIO;
		goto unmap;
	}

	wb_cache_pmem(dax.addr, dax.size);

	spin_lock_irq(&mapping->tree_lock);
	radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
	spin_unlock_irq(&mapping->tree_lock);
 unmap:
	dax_unmap_atomic(bdev, &dax);
	return ret;

 unlock:
	spin_unlock_irq(&mapping->tree_lock);
	return ret;
}

/*
 * Flush the mapping to the persistent domain within the byte range of [start,
 * end]. This is required by data integrity operations to ensure file data is
 * on persistent storage prior to completion of the operation.
 */
int dax_writeback_mapping_range(struct address_space *mapping,
		struct block_device *bdev, struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;
	pgoff_t start_index, end_index, pmd_index;
	pgoff_t indices[PAGEVEC_SIZE];
	struct pagevec pvec;
	bool done = false;
	int i, ret = 0;
	void *entry;

	if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
		return -EIO;

	if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
		return 0;

	start_index = wbc->range_start >> PAGE_SHIFT;
	end_index = wbc->range_end >> PAGE_SHIFT;
	pmd_index = DAX_PMD_INDEX(start_index);

	rcu_read_lock();
	entry = radix_tree_lookup(&mapping->page_tree, pmd_index);
	rcu_read_unlock();

	/* see if the start of our range is covered by a PMD entry */
	if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
		start_index = pmd_index;

	tag_pages_for_writeback(mapping, start_index, end_index);

	pagevec_init(&pvec, 0);
	while (!done) {
		pvec.nr = find_get_entries_tag(mapping, start_index,
				PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE,
				pvec.pages, indices);

		if (pvec.nr == 0)
			break;

		for (i = 0; i < pvec.nr; i++) {
			if (indices[i] > end_index) {
				done = true;
				break;
			}

			ret = dax_writeback_one(bdev, mapping, indices[i],
					pvec.pages[i]);
			if (ret < 0)
				return ret;
		}
	}
	wmb_pmem();
	return 0;
}
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);

static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
			struct vm_area_struct *vma, struct vm_fault *vmf)
{
	unsigned long vaddr = (unsigned long)vmf->virtual_address;
	struct address_space *mapping = inode->i_mapping;
	struct block_device *bdev = bh->b_bdev;
	struct blk_dax_ctl dax = {
		.sector = to_sector(bh, inode),
		.size = bh->b_size,
	};
	pgoff_t size;
	int error;

	i_mmap_lock_read(mapping);

	/*
	 * Check truncate didn't happen while we were allocating a block.
	 * If it did, this block may or may not be still allocated to the
	 * file.  We can't tell the filesystem to free it because we can't
	 * take i_mutex here.  In the worst case, the file still has blocks
	 * allocated past the end of the file.
	 */
	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
	if (unlikely(vmf->pgoff >= size)) {
		error = -EIO;
		goto out;
	}

	if (dax_map_atomic(bdev, &dax) < 0) {
		error = PTR_ERR(dax.addr);
		goto out;
	}

	if (buffer_unwritten(bh) || buffer_new(bh)) {
		clear_pmem(dax.addr, PAGE_SIZE);
		wmb_pmem();
	}
	dax_unmap_atomic(bdev, &dax);

	error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false,
			vmf->flags & FAULT_FLAG_WRITE);
	if (error)
		goto out;

	error = vm_insert_mixed(vma, vaddr, dax.pfn);

 out:
	i_mmap_unlock_read(mapping);

	return error;
}

/**
 * __dax_fault - handle a page fault on a DAX file
 * @vma: The virtual memory area where the fault occurred
 * @vmf: The description of the fault
 * @get_block: The filesystem method used to translate file offsets to blocks
 * @complete_unwritten: The filesystem method used to convert unwritten blocks
 *	to written so the data written to them is exposed. This is required for
 *	required by write faults for filesystems that will return unwritten
 *	extent mappings from @get_block, but it is optional for reads as
 *	dax_insert_mapping() will always zero unwritten blocks. If the fs does
 *	not support unwritten extents, the it should pass NULL.
 *
 * When a page fault occurs, filesystems may call this helper in their
 * fault handler for DAX files. __dax_fault() assumes the caller has done all
 * the necessary locking for the page fault to proceed successfully.
 */
int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
			get_block_t get_block, dax_iodone_t complete_unwritten)
{
	struct file *file = vma->vm_file;
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = mapping->host;
	struct page *page;
	struct buffer_head bh;
	unsigned long vaddr = (unsigned long)vmf->virtual_address;
	unsigned blkbits = inode->i_blkbits;
	sector_t block;
	pgoff_t size;
	int error;
	int major = 0;

	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
	if (vmf->pgoff >= size)
		return VM_FAULT_SIGBUS;

	memset(&bh, 0, sizeof(bh));
	block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
	bh.b_bdev = inode->i_sb->s_bdev;
	bh.b_size = PAGE_SIZE;

 repeat:
	page = find_get_page(mapping, vmf->pgoff);
	if (page) {
		if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
			put_page(page);
			return VM_FAULT_RETRY;
		}
		if (unlikely(page->mapping != mapping)) {
			unlock_page(page);
			put_page(page);
			goto repeat;
		}
		size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
		if (unlikely(vmf->pgoff >= size)) {
			/*
			 * We have a struct page covering a hole in the file
			 * from a read fault and we've raced with a truncate
			 */
			error = -EIO;
			goto unlock_page;
		}
	}

	error = get_block(inode, block, &bh, 0);
	if (!error && (bh.b_size < PAGE_SIZE))
		error = -EIO;		/* fs corruption? */
	if (error)
		goto unlock_page;

	if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) {
		if (vmf->flags & FAULT_FLAG_WRITE) {
			error = get_block(inode, block, &bh, 1);
			count_vm_event(PGMAJFAULT);
			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
			major = VM_FAULT_MAJOR;
			if (!error && (bh.b_size < PAGE_SIZE))
				error = -EIO;
			if (error)
				goto unlock_page;
		} else {
			return dax_load_hole(mapping, page, vmf);
		}
	}

	if (vmf->cow_page) {
		struct page *new_page = vmf->cow_page;
		if (buffer_written(&bh))
			error = copy_user_bh(new_page, inode, &bh, vaddr);
		else
			clear_user_highpage(new_page, vaddr);
		if (error)
			goto unlock_page;
		vmf->page = page;
		if (!page) {
			i_mmap_lock_read(mapping);
			/* Check we didn't race with truncate */
			size = (i_size_read(inode) + PAGE_SIZE - 1) >>
								PAGE_SHIFT;
			if (vmf->pgoff >= size) {
				i_mmap_unlock_read(mapping);
				error = -EIO;
				goto out;
			}
		}
		return VM_FAULT_LOCKED;
	}

	/* Check we didn't race with a read fault installing a new page */
	if (!page && major)
		page = find_lock_page(mapping, vmf->pgoff);

	if (page) {
		unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT,
							PAGE_SIZE, 0);
		delete_from_page_cache(page);
		unlock_page(page);
		put_page(page);
		page = NULL;
	}

	/*
	 * If we successfully insert the new mapping over an unwritten extent,
	 * we need to ensure we convert the unwritten extent. If there is an
	 * error inserting the mapping, the filesystem needs to leave it as
	 * unwritten to prevent exposure of the stale underlying data to
	 * userspace, but we still need to call the completion function so
	 * the private resources on the mapping buffer can be released. We
	 * indicate what the callback should do via the uptodate variable, same
	 * as for normal BH based IO completions.
	 */
	error = dax_insert_mapping(inode, &bh, vma, vmf);
	if (buffer_unwritten(&bh)) {
		if (complete_unwritten)
			complete_unwritten(&bh, !error);
		else
			WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
	}

 out:
	if (error == -ENOMEM)
		return VM_FAULT_OOM | major;
	/* -EBUSY is fine, somebody else faulted on the same PTE */
	if ((error < 0) && (error != -EBUSY))
		return VM_FAULT_SIGBUS | major;
	return VM_FAULT_NOPAGE | major;

 unlock_page:
	if (page) {
		unlock_page(page);
		put_page(page);
	}
	goto out;
}
/*
 * zswap_get_swap_cache_page
 *
 * This is an adaption of read_swap_cache_async()
 *
 * This function tries to find a page with the given swap entry
 * in the swapper_space address space (the swap cache).  If the page
 * is found, it is returned in retpage.  Otherwise, a page is allocated,
 * added to the swap cache, and returned in retpage.
 *
 * If success, the swap cache page is returned in retpage
 * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache
 * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated,
 *     the new page is added to swapcache and locked
 * Returns ZSWAP_SWAPCACHE_FAIL on error
 */
static int zswap_get_swap_cache_page(swp_entry_t entry,
				struct page **retpage)
{
	struct page *found_page, *new_page = NULL;
	struct address_space *swapper_space = swap_address_space(entry);
	int err;

	*retpage = NULL;
	do {
		/*
		 * First check the swap cache.  Since this is normally
		 * called after lookup_swap_cache() failed, re-calling
		 * that would confuse statistics.
		 */
		found_page = find_get_page(swapper_space, entry.val);
		if (found_page)
			break;

		/*
		 * Get a new page to read into from swap.
		 */
		if (!new_page) {
			new_page = alloc_page(GFP_KERNEL);
			if (!new_page)
				break; /* Out of memory */
		}

		/*
		 * call radix_tree_preload() while we can wait.
		 */
		err = radix_tree_preload(GFP_KERNEL);
		if (err)
			break;

		/*
		 * Swap entry may have been freed since our caller observed it.
		 */
		err = swapcache_prepare(entry);
		if (err == -EEXIST) { /* seems racy */
			radix_tree_preload_end();
			continue;
		}
		if (err) { /* swp entry is obsolete ? */
			radix_tree_preload_end();
			break;
		}

		/* May fail (-ENOMEM) if radix-tree node allocation failed. */
		__set_page_locked(new_page);
		SetPageSwapBacked(new_page);
		err = __add_to_swap_cache(new_page, entry);
		if (likely(!err)) {
			radix_tree_preload_end();
			lru_cache_add_anon(new_page);
			*retpage = new_page;
			return ZSWAP_SWAPCACHE_NEW;
		}
		radix_tree_preload_end();
		ClearPageSwapBacked(new_page);
		__clear_page_locked(new_page);
		/*
		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
		 * clear SWAP_HAS_CACHE flag.
		 */
		swapcache_free(entry, NULL);
	} while (err != -ENOMEM);

	if (new_page)
		page_cache_release(new_page);
	if (!found_page)
		return ZSWAP_SWAPCACHE_FAIL;
	*retpage = found_page;
	return ZSWAP_SWAPCACHE_EXIST;
}
Example #19
0
static int
__generic_file_splice_read(struct file *in, loff_t *ppos,
			   struct pipe_inode_info *pipe, size_t len,
			   unsigned int flags)
{
	struct address_space *mapping = in->f_mapping;
	unsigned int loff, nr_pages;
	struct page *pages[PIPE_BUFFERS];
	struct partial_page partial[PIPE_BUFFERS];
	struct page *page;
	pgoff_t index, end_index;
	loff_t isize;
	size_t total_len;
	int error, page_nr;
	struct splice_pipe_desc spd = {
		.pages = pages,
		.partial = partial,
		.flags = flags,
		.ops = &page_cache_pipe_buf_ops,
	};

	index = *ppos >> PAGE_CACHE_SHIFT;
	loff = *ppos & ~PAGE_CACHE_MASK;
	nr_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;

	if (nr_pages > PIPE_BUFFERS)
		nr_pages = PIPE_BUFFERS;

	/*
	 * Initiate read-ahead on this page range. however, don't call into
	 * read-ahead if this is a non-zero offset (we are likely doing small
	 * chunk splice and the page is already there) for a single page.
	 */
	if (!loff || nr_pages > 1)
		page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages);

	/*
	 * Now fill in the holes:
	 */
	error = 0;
	total_len = 0;

	/*
	 * Lookup the (hopefully) full range of pages we need.
	 */
	spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages);

	/*
	 * If find_get_pages_contig() returned fewer pages than we needed,
	 * allocate the rest.
	 */
	index += spd.nr_pages;
	while (spd.nr_pages < nr_pages) {
		/*
		 * Page could be there, find_get_pages_contig() breaks on
		 * the first hole.
		 */
		page = find_get_page(mapping, index);
		if (!page) {
			/*
			 * Make sure the read-ahead engine is notified
			 * about this failure.
			 */
			handle_ra_miss(mapping, &in->f_ra, index);

			/*
			 * page didn't exist, allocate one.
			 */
			page = page_cache_alloc_cold(mapping);
			if (!page)
				break;

			error = add_to_page_cache_lru(page, mapping, index,
					      GFP_KERNEL);
			if (unlikely(error)) {
				page_cache_release(page);
				if (error == -EEXIST)
					continue;
				break;
			}
			/*
			 * add_to_page_cache() locks the page, unlock it
			 * to avoid convoluting the logic below even more.
			 */
			unlock_page(page);
		}

		pages[spd.nr_pages++] = page;
		index++;
	}

	/*
	 * Now loop over the map and see if we need to start IO on any
	 * pages, fill in the partial map, etc.
	 */
	index = *ppos >> PAGE_CACHE_SHIFT;
	nr_pages = spd.nr_pages;
	spd.nr_pages = 0;
	for (page_nr = 0; page_nr < nr_pages; page_nr++) {
		unsigned int this_len;

		if (!len)
			break;

		/*
		 * this_len is the max we'll use from this page
		 */
		this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
		page = pages[page_nr];

		/*
		 * If the page isn't uptodate, we may need to start io on it
		 */
		if (!PageUptodate(page)) {
			/*
			 * If in nonblock mode then dont block on waiting
			 * for an in-flight io page
			 */
			if (flags & SPLICE_F_NONBLOCK)
				break;

			lock_page(page);

			/*
			 * page was truncated, stop here. if this isn't the
			 * first page, we'll just complete what we already
			 * added
			 */
			if (!page->mapping) {
				unlock_page(page);
				break;
			}
			/*
			 * page was already under io and is now done, great
			 */
			if (PageUptodate(page)) {
				unlock_page(page);
				goto fill_it;
			}

			/*
			 * need to read in the page
			 */
			error = mapping->a_ops->readpage(in, page);
			if (unlikely(error)) {
				/*
				 * We really should re-lookup the page here,
				 * but it complicates things a lot. Instead
				 * lets just do what we already stored, and
				 * we'll get it the next time we are called.
				 */
				if (error == AOP_TRUNCATED_PAGE)
					error = 0;

				break;
			}

			/*
			 * i_size must be checked after ->readpage().
			 */
			isize = i_size_read(mapping->host);
			end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
			if (unlikely(!isize || index > end_index))
				break;

			/*
			 * if this is the last page, see if we need to shrink
			 * the length and stop
			 */
			if (end_index == index) {
				loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK);
				if (total_len + loff > isize)
					break;
				/*
				 * force quit after adding this page
				 */
				len = this_len;
				this_len = min(this_len, loff);
				loff = 0;
			}
		}
fill_it:
		partial[page_nr].offset = loff;
		partial[page_nr].len = this_len;
		len -= this_len;
		total_len += this_len;
		loff = 0;
		spd.nr_pages++;
		index++;
	}

	/*
	 * Release any pages at the end, if we quit early. 'i' is how far
	 * we got, 'nr_pages' is how many pages are in the map.
	 */
	while (page_nr < nr_pages)
		page_cache_release(pages[page_nr++]);

	if (spd.nr_pages)
		return splice_to_pipe(pipe, &spd);

	return error;
}
Example #20
0
#ifdef CONFIG_SWAP
	/* shmem/tmpfs may return swap: account for swapcache page too. */
	if (radix_tree_exceptional_entry(page)) {
		swp_entry_t swap = radix_to_swp_entry(page);
		page = find_get_page(&swapper_space, swap.val);
	}
#endif
<<<<<<< HEAD
=======
=======
	 *
	 * However when tmpfs moves the page from pagecache and into swapcache,
	 * it is still in core, but the find_get_page below won't find it.
	 * No big deal, but make a note of it.
	 */
	page = find_get_page(mapping, pgoff);
>>>>>>> 58a75b6a81be54a8b491263ca1af243e9d8617b9
>>>>>>> ae1773bb70f3d7cf73324ce8fba787e01d8fa9f2
	if (page) {
		present = PageUptodate(page);
		page_cache_release(page);
	}

	return present;
}

static void mincore_unmapped_range(struct vm_area_struct *vma,
				unsigned long addr, unsigned long end,
				unsigned char *vec)
{
	unsigned long nr = (end - addr) >> PAGE_SHIFT;
Example #21
0
struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
			struct vm_area_struct *vma, unsigned long addr,
			bool *new_page_allocated)
{
	struct page *found_page, *new_page = NULL;
	struct address_space *swapper_space = swap_address_space(entry);
	int err;
	*new_page_allocated = false;

	do {
		/*
		 * First check the swap cache.  Since this is normally
		 * called after lookup_swap_cache() failed, re-calling
		 * that would confuse statistics.
		 */
		found_page = find_get_page(swapper_space, swp_offset(entry));
		if (found_page)
			break;

		/*
		 * Just skip read ahead for unused swap slot.
		 * During swap_off when swap_slot_cache is disabled,
		 * we have to handle the race between putting
		 * swap entry in swap cache and marking swap slot
		 * as SWAP_HAS_CACHE.  That's done in later part of code or
		 * else swap_off will be aborted if we return NULL.
		 */
		if (!__swp_swapcount(entry) && swap_slot_cache_enabled)
			break;

		/*
		 * Get a new page to read into from swap.
		 */
		if (!new_page) {
			new_page = alloc_page_vma(gfp_mask, vma, addr);
			if (!new_page)
				break;		/* Out of memory */
		}

		/*
		 * call radix_tree_preload() while we can wait.
		 */
		err = radix_tree_maybe_preload(gfp_mask & GFP_KERNEL);
		if (err)
			break;

		/*
		 * Swap entry may have been freed since our caller observed it.
		 */
		err = swapcache_prepare(entry);
		if (err == -EEXIST) {
			radix_tree_preload_end();
			/*
			 * We might race against get_swap_page() and stumble
			 * across a SWAP_HAS_CACHE swap_map entry whose page
			 * has not been brought into the swapcache yet.
			 */
			cond_resched();
			continue;
		}
		if (err) {		/* swp entry is obsolete ? */
			radix_tree_preload_end();
			break;
		}

		/* May fail (-ENOMEM) if radix-tree node allocation failed. */
		__SetPageLocked(new_page);
		__SetPageSwapBacked(new_page);
		err = __add_to_swap_cache(new_page, entry);
		if (likely(!err)) {
			radix_tree_preload_end();
			/*
			 * Initiate read into locked page and return.
			 */
			lru_cache_add_anon(new_page);
			*new_page_allocated = true;
			return new_page;
		}
		radix_tree_preload_end();
		__ClearPageLocked(new_page);
		/*
		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
		 * clear SWAP_HAS_CACHE flag.
		 */
		put_swap_page(new_page, entry);
	} while (err != -ENOMEM);

	if (new_page)
		put_page(new_page);
	return found_page;
}
Example #22
0
/*
 * zcache_get_swap_cache_page
 *
 * This is an adaption of read_swap_cache_async()
 *
 * If success, page is returned in retpage
 * Returns 0 if page was already in the swap cache, page is not locked
 * Returns 1 if the new page needs to be populated, page is locked
 */
static int zcache_get_swap_cache_page(int type, pgoff_t offset,
				struct page *new_page)
{
	struct page *found_page;
	swp_entry_t entry = swp_entry(type, offset);
	int err;

	BUG_ON(new_page == NULL);
	do {
		/*
		 * First check the swap cache.  Since this is normally
		 * called after lookup_swap_cache() failed, re-calling
		 * that would confuse statistics.
		 */
		found_page = find_get_page(&swapper_space, entry.val);
		if (found_page)
			return 0;

		/*
		 * call radix_tree_preload() while we can wait.
		 */
		err = radix_tree_preload(GFP_KERNEL);
		if (err)
			break;

		/*
		 * Swap entry may have been freed since our caller observed it.
		 */
		err = swapcache_prepare(entry);
		if (err == -EEXIST) { /* seems racy */
			radix_tree_preload_end();
			continue;
		}
		if (err) { /* swp entry is obsolete ? */
			radix_tree_preload_end();
			break;
		}

		/* May fail (-ENOMEM) if radix-tree node allocation failed. */
		__set_page_locked(new_page);
		SetPageSwapBacked(new_page);
		err = __add_to_swap_cache(new_page, entry);
		if (likely(!err)) {
			radix_tree_preload_end();
			lru_cache_add_anon(new_page);
			return 1;
		}
		radix_tree_preload_end();
		ClearPageSwapBacked(new_page);
		__clear_page_locked(new_page);
		/*
		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
		 * clear SWAP_HAS_CACHE flag.
		 */
		swapcache_free(entry, NULL);
		/* FIXME: is it possible to get here without err==-ENOMEM?
		 * If not, we can dispense with the do loop, use goto retry */
	} while (err != -ENOMEM);

	return -ENOMEM;
}