Beispiel #1
0
/**
 * pagecache_isize_extended - update pagecache after extension of i_size
 * @inode:	inode for which i_size was extended
 * @from:	original inode size
 * @to:		new inode size
 *
 * Handle extension of inode size either caused by extending truncate or by
 * write starting after current i_size. We mark the page straddling current
 * i_size RO so that page_mkwrite() is called on the nearest write access to
 * the page.  This way filesystem can be sure that page_mkwrite() is called on
 * the page before user writes to the page via mmap after the i_size has been
 * changed.
 *
 * The function must be called after i_size is updated so that page fault
 * coming after we unlock the page will already see the new i_size.
 * The function must be called while we still hold i_mutex - this not only
 * makes sure i_size is stable but also that userspace cannot observe new
 * i_size value before we are prepared to store mmap writes at new inode size.
 */
void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
{
    int bsize = 1 << inode->i_blkbits;
    loff_t rounded_from;
    struct page *page;
    pgoff_t index;

    WARN_ON(to > inode->i_size);

    if (from >= to || bsize == PAGE_CACHE_SIZE)
        return;
    /* Page straddling @from will not have any hole block created? */
    rounded_from = round_up(from, bsize);
    if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1)))
        return;

    index = from >> PAGE_CACHE_SHIFT;
    page = find_lock_page(inode->i_mapping, index);
    /* Page not cached? Nothing to do */
    if (!page)
        return;
    /*
     * See clear_page_dirty_for_io() for details why set_page_dirty()
     * is needed.
     */
    if (page_mkclean(page))
        set_page_dirty(page);
    unlock_page(page);
    page_cache_release(page);
}
Beispiel #2
0
/**
 * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache
 * @dmap: destination page cache
 * @smap: source page cache
 *
 * No pages must no be added to the cache during this process.
 * This must be ensured by the caller.
 */
void nilfs_copy_back_pages(struct address_space *dmap,
			   struct address_space *smap)
{
	struct pagevec pvec;
	unsigned int i, n;
	pgoff_t index = 0;
	int err;

	pagevec_init(&pvec);
repeat:
	n = pagevec_lookup(&pvec, smap, &index);
	if (!n)
		return;

	for (i = 0; i < pagevec_count(&pvec); i++) {
		struct page *page = pvec.pages[i], *dpage;
		pgoff_t offset = page->index;

		lock_page(page);
		dpage = find_lock_page(dmap, offset);
		if (dpage) {
			/* override existing page on the destination cache */
			WARN_ON(PageDirty(dpage));
			nilfs_copy_page(dpage, page, 0);
			unlock_page(dpage);
			put_page(dpage);
		} else {
			struct page *page2;

			/* move the page to the destination cache */
			spin_lock_irq(&smap->tree_lock);
			page2 = radix_tree_delete(&smap->page_tree, offset);
			WARN_ON(page2 != page);

			smap->nrpages--;
			spin_unlock_irq(&smap->tree_lock);

			spin_lock_irq(&dmap->tree_lock);
			err = radix_tree_insert(&dmap->page_tree, offset, page);
			if (unlikely(err < 0)) {
				WARN_ON(err == -EEXIST);
				page->mapping = NULL;
				put_page(page); /* for cache */
			} else {
				page->mapping = dmap;
				dmap->nrpages++;
				if (PageDirty(page))
					radix_tree_tag_set(&dmap->page_tree,
							   offset,
							   PAGECACHE_TAG_DIRTY);
			}
			spin_unlock_irq(&dmap->tree_lock);
		}
		unlock_page(page);
	}
	pagevec_release(&pvec);
	cond_resched();

	goto repeat;
}
Beispiel #3
0
static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
		size_t nr_pages)
{
	struct logfs_super *super = logfs_super(sb);
	struct address_space *mapping = super->s_mapping_inode->i_mapping;
	struct bio *bio;
	struct page *page;
	struct request_queue *q = bdev_get_queue(sb->s_bdev);
	unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
	int i;

	if (max_pages > BIO_MAX_PAGES)
		max_pages = BIO_MAX_PAGES;
	bio = bio_alloc(GFP_NOFS, max_pages);
	BUG_ON(!bio);

	for (i = 0; i < nr_pages; i++) {
		if (i >= max_pages) {
			/* Block layer cannot split bios :( */
			bio->bi_vcnt = i;
			bio->bi_idx = 0;
			bio->bi_size = i * PAGE_SIZE;
			bio->bi_bdev = super->s_bdev;
			bio->bi_sector = ofs >> 9;
			bio->bi_private = sb;
			bio->bi_end_io = writeseg_end_io;
			atomic_inc(&super->s_pending_writes);
			submit_bio(WRITE, bio);

			ofs += i * PAGE_SIZE;
			index += i;
			nr_pages -= i;
			i = 0;

			bio = bio_alloc(GFP_NOFS, max_pages);
			BUG_ON(!bio);
		}
		page = find_lock_page(mapping, index + i);
		BUG_ON(!page);
		bio->bi_io_vec[i].bv_page = page;
		bio->bi_io_vec[i].bv_len = PAGE_SIZE;
		bio->bi_io_vec[i].bv_offset = 0;

		BUG_ON(PageWriteback(page));
		set_page_writeback(page);
		unlock_page(page);
	}
Beispiel #4
0
struct buffer_head *nilfs_grab_buffer(struct inode *inode,
				      struct address_space *mapping,
				      unsigned long blkoff,
				      unsigned long b_state)
{
	int blkbits = inode->i_blkbits;
	pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits);
	struct page *page, *opage;
	struct buffer_head *bh, *obh;

	page = grab_cache_page(mapping, index);
	if (unlikely(!page))
		return NULL;

	bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
	if (unlikely(!bh)) {
		unlock_page(page);
		page_cache_release(page);
		return NULL;
	}
	if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) {
		/*
		 * Shadow page cache uses assoc_mapping to point its original
		 * page cache.  The following code tries the original cache
		 * if the given cache is a shadow and it didn't hit.
		 */
		opage = find_lock_page(mapping->assoc_mapping, index);
		if (!opage)
			return bh;

		obh = __nilfs_get_page_block(opage, blkoff, index, blkbits,
					     b_state);
		if (buffer_uptodate(obh)) {
			nilfs_copy_buffer(bh, obh);
			if (buffer_dirty(obh)) {
				nilfs_mark_buffer_dirty(bh);
				if (!buffer_nilfs_node(bh) && NILFS_MDT(inode))
					nilfs_mdt_mark_dirty(inode);
			}
		}
		brelse(obh);
		unlock_page(opage);
		page_cache_release(opage);
	}
	return bh;
}
Beispiel #5
0
struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
{
	struct address_space *mapping = gl->gl_aspace->i_mapping;
	struct gfs2_sbd *sdp = gl->gl_sbd;
	struct page *page;
	struct buffer_head *bh;
	unsigned int shift;
	unsigned long index;
	unsigned int bufnum;

	shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
	index = blkno >> shift;             /* convert block to page */
	bufnum = blkno - (index << shift);  /* block buf index within page */

	if (create) {
		for (;;) {
			page = grab_cache_page(mapping, index);
			if (page)
				break;
			yield();
		}
	} else {
		page = find_lock_page(mapping, index);
		if (!page)
			return NULL;
	}

	if (!page_has_buffers(page))
		create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);

	/* Locate header for our buffer within our page */
	for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
		/* Do nothing */;
	get_bh(bh);

	if (!buffer_mapped(bh))
		map_bh(bh, sdp->sd_vfs, blkno);

	unlock_page(page);
	mark_page_accessed(page);
	page_cache_release(page);

	return bh;
}
Beispiel #6
0
struct page * lookup_swap_cache(swp_entry_t entry)
{
	struct page *found;

#ifdef SWAP_CACHE_INFO
	swap_cache_find_total++;
#endif
	while (1) {
		/*
		 * Right now the pagecache is 32-bit only.  But it's a 32 bit index. =)
		 */
repeat:
		found = find_lock_page(&swapper_space, entry.val);
		if (!found)
			return 0;
		/*
		 * Though the "found" page was in the swap cache an instant
		 * earlier, it might have been removed by refill_inactive etc.
		 * Re search ... Since find_lock_page grabs a reference on
		 * the page, it can not be reused for anything else, namely
		 * it can not be associated with another swaphandle, so it
		 * is enough to check whether the page is still in the scache.
		 */
		if (!PageSwapCache(found)) {
			UnlockPage(found);
			page_cache_release(found);
			goto repeat;
		}
		if (found->mapping != &swapper_space)
			goto out_bad;
#ifdef SWAP_CACHE_INFO
		swap_cache_find_success++;
#endif
		UnlockPage(found);
		return found;
	}

out_bad:
	printk (KERN_ERR "VM: Found a non-swapper swap page!\n");
	UnlockPage(found);
	page_cache_release(found);
	return 0;
}
Beispiel #7
0
static struct dentry *
__dcache_find_get_entry(struct dentry *parent, u64 idx,
			struct ceph_readdir_cache_control *cache_ctl)
{
	struct inode *dir = d_inode(parent);
	struct dentry *dentry;
	unsigned idx_mask = (PAGE_SIZE / sizeof(struct dentry *)) - 1;
	loff_t ptr_pos = idx * sizeof(struct dentry *);
	pgoff_t ptr_pgoff = ptr_pos >> PAGE_SHIFT;

	if (ptr_pos >= i_size_read(dir))
		return NULL;

	if (!cache_ctl->page || ptr_pgoff != page_index(cache_ctl->page)) {
		ceph_readdir_cache_release(cache_ctl);
		cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff);
		if (!cache_ctl->page) {
			dout(" page %lu not found\n", ptr_pgoff);
			return ERR_PTR(-EAGAIN);
		}
		/* reading/filling the cache are serialized by
		   i_mutex, no need to use page lock */
		unlock_page(cache_ctl->page);
		cache_ctl->dentries = kmap(cache_ctl->page);
	}

	cache_ctl->index = idx & idx_mask;

	rcu_read_lock();
	spin_lock(&parent->d_lock);
	/* check i_size again here, because empty directory can be
	 * marked as complete while not holding the i_mutex. */
	if (ceph_dir_is_complete_ordered(dir) && ptr_pos < i_size_read(dir))
		dentry = cache_ctl->dentries[cache_ctl->index];
	else
		dentry = NULL;
	spin_unlock(&parent->d_lock);
	if (dentry && !lockref_get_not_dead(&dentry->d_lockref))
		dentry = NULL;
	rcu_read_unlock();
	return dentry ? : ERR_PTR(-EAGAIN);
}
static void write_wbuf(struct super_block *sb, struct logfs_area *area,
		void *wbuf)
{
	struct logfs_super *super = logfs_super(sb);
	struct address_space *mapping = super->s_mapping_inode->i_mapping;
	u64 ofs;
	pgoff_t index;
	int page_ofs;
	struct page *page;

	ofs = dev_ofs(sb, area->a_segno,
			area->a_used_bytes & ~(super->s_writesize - 1));
	index = ofs >> PAGE_SHIFT;
	page_ofs = ofs & (PAGE_SIZE - 1);

	page = find_lock_page(mapping, index);
	BUG_ON(!page);
	memcpy(wbuf, page_address(page) + page_ofs, super->s_writesize);
	unlock_page(page);
}
Beispiel #9
0
struct page *ufs_get_locked_page(struct address_space *mapping,
				 pgoff_t index)
{
	struct page *page;

	page = find_lock_page(mapping, index);
	if (!page) {
		page = read_mapping_page(mapping, index, NULL);

		if (IS_ERR(page)) {
			printk(KERN_ERR "ufs_change_blocknr: "
			       "read_mapping_page error: ino %lu, index: %lu\n",
			       mapping->host->i_ino, index);
			goto out;
		}

		lock_page(page);

		if (unlikely(page->mapping == NULL)) {
			/* Truncate got there first */
			unlock_page(page);
			put_page(page);
			page = NULL;
			goto out;
		}

		if (!PageUptodate(page) || PageError(page)) {
			unlock_page(page);
			put_page(page);

			printk(KERN_ERR "ufs_change_blocknr: "
			       "can not read page: ino %lu, index: %lu\n",
			       mapping->host->i_ino, index);

			page = ERR_PTR(-EIO);
		}
	}
out:
	return page;
}
Beispiel #10
0
Datei: dir.c Projekt: 274914765/C
/*
 * Read a directory, using filldir to fill the dirent memory.
 * smb_proc_readdir does the actual reading from the smb server.
 *
 * The cache code is almost directly taken from ncpfs
 */
static int 
smb_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
    struct dentry *dentry = filp->f_path.dentry;
    struct inode *dir = dentry->d_inode;
    struct smb_sb_info *server = server_from_dentry(dentry);
    union  smb_dir_cache *cache = NULL;
    struct smb_cache_control ctl;
    struct page *page = NULL;
    int result;

    ctl.page  = NULL;
    ctl.cache = NULL;

    VERBOSE("reading %s/%s, f_pos=%d\n",
        DENTRY_PATH(dentry),  (int) filp->f_pos);

    result = 0;

    lock_kernel();

    switch ((unsigned int) filp->f_pos) {
    case 0:
        if (filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR) < 0)
            goto out;
        filp->f_pos = 1;
        /* fallthrough */
    case 1:
        if (filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR) < 0)
            goto out;
        filp->f_pos = 2;
    }

    /*
     * Make sure our inode is up-to-date.
     */
    result = smb_revalidate_inode(dentry);
    if (result)
        goto out;


    page = grab_cache_page(&dir->i_data, 0);
    if (!page)
        goto read_really;

    ctl.cache = cache = kmap(page);
    ctl.head  = cache->head;

    if (!PageUptodate(page) || !ctl.head.eof) {
        VERBOSE("%s/%s, page uptodate=%d, eof=%d\n",
             DENTRY_PATH(dentry), PageUptodate(page),ctl.head.eof);
        goto init_cache;
    }

    if (filp->f_pos == 2) {
        if (jiffies - ctl.head.time >= SMB_MAX_AGE(server))
            goto init_cache;

        /*
         * N.B. ncpfs checks mtime of dentry too here, we don't.
         *   1. common smb servers do not update mtime on dir changes
         *   2. it requires an extra smb request
         *      (revalidate has the same timeout as ctl.head.time)
         *
         * Instead smbfs invalidates its own cache on local changes
         * and remote changes are not seen until timeout.
         */
    }

    if (filp->f_pos > ctl.head.end)
        goto finished;

    ctl.fpos = filp->f_pos + (SMB_DIRCACHE_START - 2);
    ctl.ofs  = ctl.fpos / SMB_DIRCACHE_SIZE;
    ctl.idx  = ctl.fpos % SMB_DIRCACHE_SIZE;

    for (;;) {
        if (ctl.ofs != 0) {
            ctl.page = find_lock_page(&dir->i_data, ctl.ofs);
            if (!ctl.page)
                goto invalid_cache;
            ctl.cache = kmap(ctl.page);
            if (!PageUptodate(ctl.page))
                goto invalid_cache;
        }
        while (ctl.idx < SMB_DIRCACHE_SIZE) {
            struct dentry *dent;
            int res;

            dent = smb_dget_fpos(ctl.cache->dentry[ctl.idx],
                         dentry, filp->f_pos);
            if (!dent)
                goto invalid_cache;

            res = filldir(dirent, dent->d_name.name,
                      dent->d_name.len, filp->f_pos,
                      dent->d_inode->i_ino, DT_UNKNOWN);
            dput(dent);
            if (res)
                goto finished;
            filp->f_pos += 1;
            ctl.idx += 1;
            if (filp->f_pos > ctl.head.end)
                goto finished;
        }
        if (ctl.page) {
            kunmap(ctl.page);
            SetPageUptodate(ctl.page);
            unlock_page(ctl.page);
            page_cache_release(ctl.page);
            ctl.page = NULL;
        }
        ctl.idx  = 0;
        ctl.ofs += 1;
    }
invalid_cache:
    if (ctl.page) {
        kunmap(ctl.page);
        unlock_page(ctl.page);
        page_cache_release(ctl.page);
        ctl.page = NULL;
    }
    ctl.cache = cache;
init_cache:
    smb_invalidate_dircache_entries(dentry);
    ctl.head.time = jiffies;
    ctl.head.eof = 0;
    ctl.fpos = 2;
    ctl.ofs = 0;
    ctl.idx = SMB_DIRCACHE_START;
    ctl.filled = 0;
    ctl.valid  = 1;
read_really:
    result = server->ops->readdir(filp, dirent, filldir, &ctl);
    if (result == -ERESTARTSYS && page)
        ClearPageUptodate(page);
    if (ctl.idx == -1)
        goto invalid_cache;    /* retry */
    ctl.head.end = ctl.fpos - 1;
    ctl.head.eof = ctl.valid;
finished:
    if (page) {
        cache->head = ctl.head;
        kunmap(page);
        if (result != -ERESTARTSYS)
            SetPageUptodate(page);
        unlock_page(page);
        page_cache_release(page);
    }
    if (ctl.page) {
        kunmap(ctl.page);
        SetPageUptodate(ctl.page);
        unlock_page(ctl.page);
        page_cache_release(ctl.page);
    }
out:
    unlock_kernel();
    return result;
}
Beispiel #11
0
/*
 * This function was originally taken from fs/mpage.c, and customized for f2fs.
 * Major change was from block_size == page_size in f2fs by default.
 */
static int f2fs_mpage_readpages(struct address_space *mapping,
                                struct list_head *pages, struct page *page,
                                unsigned nr_pages)
{
    struct bio *bio = NULL;
    unsigned page_idx;
    sector_t last_block_in_bio = 0;
    struct inode *inode = mapping->host;
    const unsigned blkbits = inode->i_blkbits;
    const unsigned blocksize = 1 << blkbits;
    sector_t block_in_file;
    sector_t last_block;
    sector_t last_block_in_file;
    sector_t block_nr;
    struct block_device *bdev = inode->i_sb->s_bdev;
    struct f2fs_map_blocks map;

    map.m_pblk = 0;
    map.m_lblk = 0;
    map.m_len = 0;
    map.m_flags = 0;

    for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {

        prefetchw(&page->flags);
        if (pages) {
            page = list_entry(pages->prev, struct page, lru);
            list_del(&page->lru);
            if (add_to_page_cache_lru(page, mapping,
                                      page->index, GFP_KERNEL))
                goto next_page;
        }

        block_in_file = (sector_t)page->index;
        last_block = block_in_file + nr_pages;
        last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
                             blkbits;
        if (last_block > last_block_in_file)
            last_block = last_block_in_file;

        /*
         * Map blocks using the previous result first.
         */
        if ((map.m_flags & F2FS_MAP_MAPPED) &&
                block_in_file > map.m_lblk &&
                block_in_file < (map.m_lblk + map.m_len))
            goto got_it;

        /*
         * Then do more f2fs_map_blocks() calls until we are
         * done with this page.
         */
        map.m_flags = 0;

        if (block_in_file < last_block) {
            map.m_lblk = block_in_file;
            map.m_len = last_block - block_in_file;

            if (f2fs_map_blocks(inode, &map, 0, false))
                goto set_error_page;
        }
got_it:
        if ((map.m_flags & F2FS_MAP_MAPPED)) {
            block_nr = map.m_pblk + block_in_file - map.m_lblk;
            SetPageMappedToDisk(page);

            if (!PageUptodate(page) && !cleancache_get_page(page)) {
                SetPageUptodate(page);
                goto confused;
            }
        } else {
            zero_user_segment(page, 0, PAGE_CACHE_SIZE);
            SetPageUptodate(page);
            unlock_page(page);
            goto next_page;
        }

        /*
         * This page will go to BIO.  Do we need to send this
         * BIO off first?
         */
        if (bio && (last_block_in_bio != block_nr - 1)) {
submit_and_realloc:
            submit_bio(READ, bio);
            bio = NULL;
        }
        if (bio == NULL) {
            struct f2fs_crypto_ctx *ctx = NULL;

            if (f2fs_encrypted_inode(inode) &&
                    S_ISREG(inode->i_mode)) {
                struct page *cpage;

                ctx = f2fs_get_crypto_ctx(inode);
                if (IS_ERR(ctx))
                    goto set_error_page;

                /* wait the page to be moved by cleaning */
                cpage = find_lock_page(
                            META_MAPPING(F2FS_I_SB(inode)),
                            block_nr);
                if (cpage) {
                    f2fs_wait_on_page_writeback(cpage,
                                                DATA);
                    f2fs_put_page(cpage, 1);
                }
            }

            bio = bio_alloc(GFP_KERNEL,
                            min_t(int, nr_pages, BIO_MAX_PAGES));
            if (!bio) {
                if (ctx)
                    f2fs_release_crypto_ctx(ctx);
                goto set_error_page;
            }
            bio->bi_bdev = bdev;
            bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr);
            bio->bi_end_io = f2fs_read_end_io;
            bio->bi_private = ctx;
        }

        if (bio_add_page(bio, page, blocksize, 0) < blocksize)
            goto submit_and_realloc;

        last_block_in_bio = block_nr;
        goto next_page;
set_error_page:
        SetPageError(page);
        zero_user_segment(page, 0, PAGE_CACHE_SIZE);
        unlock_page(page);
        goto next_page;
confused:
        if (bio) {
            submit_bio(READ, bio);
            bio = NULL;
        }
        unlock_page(page);
next_page:
        if (pages)
            page_cache_release(page);
    }
    BUG_ON(pages && !list_empty(pages));
    if (bio)
        submit_bio(READ, bio);
    return 0;
}
Beispiel #12
0
/**
 * truncate_inode_pages - truncate *all* the pages from an offset
 * @mapping: mapping to truncate
 * @lstart: offset from which to truncate
 *
 * Truncate the page cache at a set offset, removing the pages that are beyond
 * that offset (and zeroing out partial pages).
 *
 * Truncate takes two passes - the first pass is nonblocking.  It will not
 * block on page locks and it will not block on writeback.  The second pass
 * will wait.  This is to prevent as much IO as possible in the affected region.
 * The first pass will remove most pages, so the search cost of the second pass
 * is low.
 *
 * When looking at page->index outside the page lock we need to be careful to
 * copy it into a local to avoid races (it could change at any time).
 *
 * We pass down the cache-hot hint to the page freeing code.  Even if the
 * mapping is large, it is probably the case that the final pages are the most
 * recently touched, and freeing happens in ascending file offset order.
 *
 * Called under (and serialised by) inode->i_sem.
 */
void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
{
	const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
	const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
	struct pagevec pvec;
	pgoff_t next;
	int i;

	if (mapping->nrpages == 0)
		return;

	pagevec_init(&pvec, 0);
	next = start;
	while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
		for (i = 0; i < pagevec_count(&pvec); i++) {
			struct page *page = pvec.pages[i];
			pgoff_t page_index = page->index;

			if (page_index > next)
				next = page_index;
			next++;
			if (TestSetPageLocked(page))
				continue;
			if (PageWriteback(page)) {
				unlock_page(page);
				continue;
			}
			truncate_complete_page(mapping, page);
			unlock_page(page);
		}
		pagevec_release(&pvec);
		cond_resched();
	}

	if (partial) {
		struct page *page = find_lock_page(mapping, start - 1);
		if (page) {
			wait_on_page_writeback(page);
			truncate_partial_page(page, partial);
			unlock_page(page);
			page_cache_release(page);
		}
	}

	next = start;
	for ( ; ; ) {
		cond_resched();
		if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
			if (next == start)
				break;
			next = start;
			continue;
		}
		for (i = 0; i < pagevec_count(&pvec); i++) {
			struct page *page = pvec.pages[i];

			lock_page(page);
			wait_on_page_writeback(page);
			if (page->index > next)
				next = page->index;
			next++;
			truncate_complete_page(mapping, page);
			unlock_page(page);
		}
		pagevec_release(&pvec);
	}
}
Beispiel #13
0
/**
 * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets
 * @mapping: mapping to truncate
 * @lstart: offset from which to truncate
 * @lend: offset to which to truncate (inclusive)
 *
 * Truncate the page cache, removing the pages that are between
 * specified offsets (and zeroing out partial pages
 * if lstart or lend + 1 is not page aligned).
 *
 * Truncate takes two passes - the first pass is nonblocking.  It will not
 * block on page locks and it will not block on writeback.  The second pass
 * will wait.  This is to prevent as much IO as possible in the affected region.
 * The first pass will remove most pages, so the search cost of the second pass
 * is low.
 *
 * We pass down the cache-hot hint to the page freeing code.  Even if the
 * mapping is large, it is probably the case that the final pages are the most
 * recently touched, and freeing happens in ascending file offset order.
 *
 * Note that since ->invalidatepage() accepts range to invalidate
 * truncate_inode_pages_range is able to handle cases where lend + 1 is not
 * page aligned properly.
 */
void truncate_inode_pages_range(struct address_space *mapping,
                                loff_t lstart, loff_t lend)
{
    pgoff_t		start;		/* inclusive */
    pgoff_t		end;		/* exclusive */
    unsigned int	partial_start;	/* inclusive */
    unsigned int	partial_end;	/* exclusive */
    struct pagevec	pvec;
    pgoff_t		indices[PAGEVEC_SIZE];
    pgoff_t		index;
    int		i;

    cleancache_invalidate_inode(mapping);
    if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
        return;

    /* Offsets within partial pages */
    partial_start = lstart & (PAGE_CACHE_SIZE - 1);
    partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);

    /*
     * 'start' and 'end' always covers the range of pages to be fully
     * truncated. Partial pages are covered with 'partial_start' at the
     * start of the range and 'partial_end' at the end of the range.
     * Note that 'end' is exclusive while 'lend' is inclusive.
     */
    start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
    if (lend == -1)
        /*
         * lend == -1 indicates end-of-file so we have to set 'end'
         * to the highest possible pgoff_t and since the type is
         * unsigned we're using -1.
         */
        end = -1;
    else
        end = (lend + 1) >> PAGE_CACHE_SHIFT;

    pagevec_init(&pvec, 0);
    index = start;
    while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
            min(end - index, (pgoff_t)PAGEVEC_SIZE),
            indices)) {
        for (i = 0; i < pagevec_count(&pvec); i++) {
            struct page *page = pvec.pages[i];

            /* We rely upon deletion not changing page->index */
            index = indices[i];
            if (index >= end)
                break;

            if (radix_tree_exceptional_entry(page)) {
                clear_exceptional_entry(mapping, index, page);
                continue;
            }

            if (!trylock_page(page))
                continue;
            WARN_ON(page->index != index);
            if (PageWriteback(page)) {
                unlock_page(page);
                continue;
            }
            truncate_inode_page(mapping, page);
            unlock_page(page);
        }
        pagevec_remove_exceptionals(&pvec);
        pagevec_release(&pvec);
        cond_resched();
        index++;
    }

    if (partial_start) {
        struct page *page = find_lock_page(mapping, start - 1);
        if (page) {
            unsigned int top = PAGE_CACHE_SIZE;
            if (start > end) {
                /* Truncation within a single page */
                top = partial_end;
                partial_end = 0;
            }
            wait_on_page_writeback(page);
            zero_user_segment(page, partial_start, top);
            cleancache_invalidate_page(mapping, page);
            if (page_has_private(page))
                do_invalidatepage(page, partial_start,
                                  top - partial_start);
            unlock_page(page);
            page_cache_release(page);
        }
    }
    if (partial_end) {
        struct page *page = find_lock_page(mapping, end);
        if (page) {
            wait_on_page_writeback(page);
            zero_user_segment(page, 0, partial_end);
            cleancache_invalidate_page(mapping, page);
            if (page_has_private(page))
                do_invalidatepage(page, 0,
                                  partial_end);
            unlock_page(page);
            page_cache_release(page);
        }
    }
    /*
     * If the truncation happened within a single page no pages
     * will be released, just zeroed, so we can bail out now.
     */
    if (start >= end)
        return;

    index = start;
    for ( ; ; ) {
        cond_resched();
        if (!pagevec_lookup_entries(&pvec, mapping, index,
                                    min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) {
            /* If all gone from start onwards, we're done */
            if (index == start)
                break;
            /* Otherwise restart to make sure all gone */
            index = start;
            continue;
        }
        if (index == start && indices[0] >= end) {
            /* All gone out of hole to be punched, we're done */
            pagevec_remove_exceptionals(&pvec);
            pagevec_release(&pvec);
            break;
        }
        for (i = 0; i < pagevec_count(&pvec); i++) {
            struct page *page = pvec.pages[i];

            /* We rely upon deletion not changing page->index */
            index = indices[i];
            if (index >= end) {
                /* Restart punch to make sure all gone */
                index = start - 1;
                break;
            }

            if (radix_tree_exceptional_entry(page)) {
                clear_exceptional_entry(mapping, index, page);
                continue;
            }

            lock_page(page);
            WARN_ON(page->index != index);
            wait_on_page_writeback(page);
            truncate_inode_page(mapping, page);
            unlock_page(page);
        }
        pagevec_remove_exceptionals(&pvec);
        pagevec_release(&pvec);
        index++;
    }
    cleancache_invalidate_inode(mapping);
}
Beispiel #14
0
/*
 * This is a little more tricky than the file -> pipe splicing. There are
 * basically three cases:
 *
 *	- Destination page already exists in the address space and there
 *	  are users of it. For that case we have no other option that
 *	  copying the data. Tough luck.
 *	- Destination page already exists in the address space, but there
 *	  are no users of it. Make sure it's uptodate, then drop it. Fall
 *	  through to last case.
 *	- Destination page does not exist, we can add the pipe page to
 *	  the page cache and avoid the copy.
 *
 * If asked to move pages to the output file (SPLICE_F_MOVE is set in
 * sd->flags), we attempt to migrate pages from the pipe to the output
 * file address space page cache. This is possible if no one else has
 * the pipe page referenced outside of the pipe and page cache. If
 * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
 * a new page in the output file page cache and fill/dirty that.
 */
static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
			struct splice_desc *sd)
{
	struct file *file = sd->file;
	struct address_space *mapping = file->f_mapping;
	unsigned int offset, this_len;
	struct page *page;
	pgoff_t index;
	int ret;

	/*
	 * make sure the data in this buffer is uptodate
	 */
	ret = buf->ops->pin(pipe, buf);
	if (unlikely(ret))
		return ret;

	index = sd->pos >> PAGE_CACHE_SHIFT;
	offset = sd->pos & ~PAGE_CACHE_MASK;

	this_len = sd->len;
	if (this_len + offset > PAGE_CACHE_SIZE)
		this_len = PAGE_CACHE_SIZE - offset;

	/*
	 * Reuse buf page, if SPLICE_F_MOVE is set and we are doing a full
	 * page.
	 */
	if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) {
		/*
		 * If steal succeeds, buf->page is now pruned from the
		 * pagecache and we can reuse it. The page will also be
		 * locked on successful return.
		 */
		if (buf->ops->steal(pipe, buf))
			goto find_page;

		page = buf->page;
		if (add_to_page_cache(page, mapping, index, GFP_KERNEL)) {
			unlock_page(page);
			goto find_page;
		}

		page_cache_get(page);

		if (!(buf->flags & PIPE_BUF_FLAG_LRU))
			lru_cache_add(page);
	} else {
find_page:
		page = find_lock_page(mapping, index);
		if (!page) {
			ret = -ENOMEM;
			page = page_cache_alloc_cold(mapping);
			if (unlikely(!page))
				goto out_ret;

			/*
			 * This will also lock the page
			 */
			ret = add_to_page_cache_lru(page, mapping, index,
						    GFP_KERNEL);
			if (unlikely(ret))
				goto out;
		}

		/*
		 * We get here with the page locked. If the page is also
		 * uptodate, we don't need to do more. If it isn't, we
		 * may need to bring it in if we are not going to overwrite
		 * the full page.
		 */
		if (!PageUptodate(page)) {
			if (this_len < PAGE_CACHE_SIZE) {
				ret = mapping->a_ops->readpage(file, page);
				if (unlikely(ret))
					goto out;

				lock_page(page);

				if (!PageUptodate(page)) {
					/*
					 * Page got invalidated, repeat.
					 */
					if (!page->mapping) {
						unlock_page(page);
						page_cache_release(page);
						goto find_page;
					}
					ret = -EIO;
					goto out;
				}
			} else
				SetPageUptodate(page);
		}
	}

	ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
	if (unlikely(ret)) {
		loff_t isize = i_size_read(mapping->host);

		if (ret != AOP_TRUNCATED_PAGE)
			unlock_page(page);
		page_cache_release(page);
		if (ret == AOP_TRUNCATED_PAGE)
			goto find_page;

		/*
		 * prepare_write() may have instantiated a few blocks
		 * outside i_size.  Trim these off again.
		 */
		if (sd->pos + this_len > isize)
			vmtruncate(mapping->host, isize);

		goto out_ret;
	}

	if (buf->page != page) {
		/*
		 * Careful, ->map() uses KM_USER0!
		 */
		char *src = buf->ops->map(pipe, buf, 1);
		char *dst = kmap_atomic(page, KM_USER1);

		memcpy(dst + offset, src + buf->offset, this_len);
		flush_dcache_page(page);
		kunmap_atomic(dst, KM_USER1);
		buf->ops->unmap(pipe, buf, src);
	}

	ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
	if (!ret) {
		/*
		 * Return the number of bytes written and mark page as
		 * accessed, we are now done!
		 */
		ret = this_len;
		mark_page_accessed(page);
		balance_dirty_pages_ratelimited(mapping);
	} else if (ret == AOP_TRUNCATED_PAGE) {
		page_cache_release(page);
		goto find_page;
	}
out:
	page_cache_release(page);
	unlock_page(page);
out_ret:
	return ret;
}
Beispiel #15
0
static int copy_user_bh(struct page *to, struct inode *inode,
		struct buffer_head *bh, unsigned long vaddr)
{
	struct blk_dax_ctl dax = {
		.sector = to_sector(bh, inode),
		.size = bh->b_size,
	};
	struct block_device *bdev = bh->b_bdev;
	void *vto;

	if (dax_map_atomic(bdev, &dax) < 0)
		return PTR_ERR(dax.addr);
	vto = kmap_atomic(to);
	copy_user_page(vto, (void __force *)dax.addr, vaddr, to);
	kunmap_atomic(vto);
	dax_unmap_atomic(bdev, &dax);
	return 0;
}

#define NO_SECTOR -1
#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT))

static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
		sector_t sector, bool pmd_entry, bool dirty)
{
	struct radix_tree_root *page_tree = &mapping->page_tree;
	pgoff_t pmd_index = DAX_PMD_INDEX(index);
	int type, error = 0;
	void *entry;

	WARN_ON_ONCE(pmd_entry && !dirty);
	if (dirty)
		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);

	spin_lock_irq(&mapping->tree_lock);

	entry = radix_tree_lookup(page_tree, pmd_index);
	if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) {
		index = pmd_index;
		goto dirty;
	}

	entry = radix_tree_lookup(page_tree, index);
	if (entry) {
		type = RADIX_DAX_TYPE(entry);
		if (WARN_ON_ONCE(type != RADIX_DAX_PTE &&
					type != RADIX_DAX_PMD)) {
			error = -EIO;
			goto unlock;
		}

		if (!pmd_entry || type == RADIX_DAX_PMD)
			goto dirty;

		/*
		 * We only insert dirty PMD entries into the radix tree.  This
		 * means we don't need to worry about removing a dirty PTE
		 * entry and inserting a clean PMD entry, thus reducing the
		 * range we would flush with a follow-up fsync/msync call.
		 */
		radix_tree_delete(&mapping->page_tree, index);
		mapping->nrexceptional--;
	}

	if (sector == NO_SECTOR) {
		/*
		 * This can happen during correct operation if our pfn_mkwrite
		 * fault raced against a hole punch operation.  If this
		 * happens the pte that was hole punched will have been
		 * unmapped and the radix tree entry will have been removed by
		 * the time we are called, but the call will still happen.  We
		 * will return all the way up to wp_pfn_shared(), where the
		 * pte_same() check will fail, eventually causing page fault
		 * to be retried by the CPU.
		 */
		goto unlock;
	}

	error = radix_tree_insert(page_tree, index,
			RADIX_DAX_ENTRY(sector, pmd_entry));
	if (error)
		goto unlock;

	mapping->nrexceptional++;
 dirty:
	if (dirty)
		radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
 unlock:
	spin_unlock_irq(&mapping->tree_lock);
	return error;
}

static int dax_writeback_one(struct block_device *bdev,
		struct address_space *mapping, pgoff_t index, void *entry)
{
	struct radix_tree_root *page_tree = &mapping->page_tree;
	int type = RADIX_DAX_TYPE(entry);
	struct radix_tree_node *node;
	struct blk_dax_ctl dax;
	void **slot;
	int ret = 0;

	spin_lock_irq(&mapping->tree_lock);
	/*
	 * Regular page slots are stabilized by the page lock even
	 * without the tree itself locked.  These unlocked entries
	 * need verification under the tree lock.
	 */
	if (!__radix_tree_lookup(page_tree, index, &node, &slot))
		goto unlock;
	if (*slot != entry)
		goto unlock;

	/* another fsync thread may have already written back this entry */
	if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
		goto unlock;

	if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) {
		ret = -EIO;
		goto unlock;
	}

	dax.sector = RADIX_DAX_SECTOR(entry);
	dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE);
	spin_unlock_irq(&mapping->tree_lock);

	/*
	 * We cannot hold tree_lock while calling dax_map_atomic() because it
	 * eventually calls cond_resched().
	 */
	ret = dax_map_atomic(bdev, &dax);
	if (ret < 0)
		return ret;

	if (WARN_ON_ONCE(ret < dax.size)) {
		ret = -EIO;
		goto unmap;
	}

	wb_cache_pmem(dax.addr, dax.size);

	spin_lock_irq(&mapping->tree_lock);
	radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
	spin_unlock_irq(&mapping->tree_lock);
 unmap:
	dax_unmap_atomic(bdev, &dax);
	return ret;

 unlock:
	spin_unlock_irq(&mapping->tree_lock);
	return ret;
}

/*
 * Flush the mapping to the persistent domain within the byte range of [start,
 * end]. This is required by data integrity operations to ensure file data is
 * on persistent storage prior to completion of the operation.
 */
int dax_writeback_mapping_range(struct address_space *mapping,
		struct block_device *bdev, struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;
	pgoff_t start_index, end_index, pmd_index;
	pgoff_t indices[PAGEVEC_SIZE];
	struct pagevec pvec;
	bool done = false;
	int i, ret = 0;
	void *entry;

	if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
		return -EIO;

	if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
		return 0;

	start_index = wbc->range_start >> PAGE_SHIFT;
	end_index = wbc->range_end >> PAGE_SHIFT;
	pmd_index = DAX_PMD_INDEX(start_index);

	rcu_read_lock();
	entry = radix_tree_lookup(&mapping->page_tree, pmd_index);
	rcu_read_unlock();

	/* see if the start of our range is covered by a PMD entry */
	if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
		start_index = pmd_index;

	tag_pages_for_writeback(mapping, start_index, end_index);

	pagevec_init(&pvec, 0);
	while (!done) {
		pvec.nr = find_get_entries_tag(mapping, start_index,
				PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE,
				pvec.pages, indices);

		if (pvec.nr == 0)
			break;

		for (i = 0; i < pvec.nr; i++) {
			if (indices[i] > end_index) {
				done = true;
				break;
			}

			ret = dax_writeback_one(bdev, mapping, indices[i],
					pvec.pages[i]);
			if (ret < 0)
				return ret;
		}
	}
	wmb_pmem();
	return 0;
}
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);

static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
			struct vm_area_struct *vma, struct vm_fault *vmf)
{
	unsigned long vaddr = (unsigned long)vmf->virtual_address;
	struct address_space *mapping = inode->i_mapping;
	struct block_device *bdev = bh->b_bdev;
	struct blk_dax_ctl dax = {
		.sector = to_sector(bh, inode),
		.size = bh->b_size,
	};
	pgoff_t size;
	int error;

	i_mmap_lock_read(mapping);

	/*
	 * Check truncate didn't happen while we were allocating a block.
	 * If it did, this block may or may not be still allocated to the
	 * file.  We can't tell the filesystem to free it because we can't
	 * take i_mutex here.  In the worst case, the file still has blocks
	 * allocated past the end of the file.
	 */
	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
	if (unlikely(vmf->pgoff >= size)) {
		error = -EIO;
		goto out;
	}

	if (dax_map_atomic(bdev, &dax) < 0) {
		error = PTR_ERR(dax.addr);
		goto out;
	}

	if (buffer_unwritten(bh) || buffer_new(bh)) {
		clear_pmem(dax.addr, PAGE_SIZE);
		wmb_pmem();
	}
	dax_unmap_atomic(bdev, &dax);

	error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false,
			vmf->flags & FAULT_FLAG_WRITE);
	if (error)
		goto out;

	error = vm_insert_mixed(vma, vaddr, dax.pfn);

 out:
	i_mmap_unlock_read(mapping);

	return error;
}

/**
 * __dax_fault - handle a page fault on a DAX file
 * @vma: The virtual memory area where the fault occurred
 * @vmf: The description of the fault
 * @get_block: The filesystem method used to translate file offsets to blocks
 * @complete_unwritten: The filesystem method used to convert unwritten blocks
 *	to written so the data written to them is exposed. This is required for
 *	required by write faults for filesystems that will return unwritten
 *	extent mappings from @get_block, but it is optional for reads as
 *	dax_insert_mapping() will always zero unwritten blocks. If the fs does
 *	not support unwritten extents, the it should pass NULL.
 *
 * When a page fault occurs, filesystems may call this helper in their
 * fault handler for DAX files. __dax_fault() assumes the caller has done all
 * the necessary locking for the page fault to proceed successfully.
 */
int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
			get_block_t get_block, dax_iodone_t complete_unwritten)
{
	struct file *file = vma->vm_file;
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = mapping->host;
	struct page *page;
	struct buffer_head bh;
	unsigned long vaddr = (unsigned long)vmf->virtual_address;
	unsigned blkbits = inode->i_blkbits;
	sector_t block;
	pgoff_t size;
	int error;
	int major = 0;

	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
	if (vmf->pgoff >= size)
		return VM_FAULT_SIGBUS;

	memset(&bh, 0, sizeof(bh));
	block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
	bh.b_bdev = inode->i_sb->s_bdev;
	bh.b_size = PAGE_SIZE;

 repeat:
	page = find_get_page(mapping, vmf->pgoff);
	if (page) {
		if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
			put_page(page);
			return VM_FAULT_RETRY;
		}
		if (unlikely(page->mapping != mapping)) {
			unlock_page(page);
			put_page(page);
			goto repeat;
		}
		size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
		if (unlikely(vmf->pgoff >= size)) {
			/*
			 * We have a struct page covering a hole in the file
			 * from a read fault and we've raced with a truncate
			 */
			error = -EIO;
			goto unlock_page;
		}
	}

	error = get_block(inode, block, &bh, 0);
	if (!error && (bh.b_size < PAGE_SIZE))
		error = -EIO;		/* fs corruption? */
	if (error)
		goto unlock_page;

	if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) {
		if (vmf->flags & FAULT_FLAG_WRITE) {
			error = get_block(inode, block, &bh, 1);
			count_vm_event(PGMAJFAULT);
			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
			major = VM_FAULT_MAJOR;
			if (!error && (bh.b_size < PAGE_SIZE))
				error = -EIO;
			if (error)
				goto unlock_page;
		} else {
			return dax_load_hole(mapping, page, vmf);
		}
	}

	if (vmf->cow_page) {
		struct page *new_page = vmf->cow_page;
		if (buffer_written(&bh))
			error = copy_user_bh(new_page, inode, &bh, vaddr);
		else
			clear_user_highpage(new_page, vaddr);
		if (error)
			goto unlock_page;
		vmf->page = page;
		if (!page) {
			i_mmap_lock_read(mapping);
			/* Check we didn't race with truncate */
			size = (i_size_read(inode) + PAGE_SIZE - 1) >>
								PAGE_SHIFT;
			if (vmf->pgoff >= size) {
				i_mmap_unlock_read(mapping);
				error = -EIO;
				goto out;
			}
		}
		return VM_FAULT_LOCKED;
	}

	/* Check we didn't race with a read fault installing a new page */
	if (!page && major)
		page = find_lock_page(mapping, vmf->pgoff);

	if (page) {
		unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT,
							PAGE_SIZE, 0);
		delete_from_page_cache(page);
		unlock_page(page);
		put_page(page);
		page = NULL;
	}

	/*
	 * If we successfully insert the new mapping over an unwritten extent,
	 * we need to ensure we convert the unwritten extent. If there is an
	 * error inserting the mapping, the filesystem needs to leave it as
	 * unwritten to prevent exposure of the stale underlying data to
	 * userspace, but we still need to call the completion function so
	 * the private resources on the mapping buffer can be released. We
	 * indicate what the callback should do via the uptodate variable, same
	 * as for normal BH based IO completions.
	 */
	error = dax_insert_mapping(inode, &bh, vma, vmf);
	if (buffer_unwritten(&bh)) {
		if (complete_unwritten)
			complete_unwritten(&bh, !error);
		else
			WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
	}

 out:
	if (error == -ENOMEM)
		return VM_FAULT_OOM | major;
	/* -EBUSY is fine, somebody else faulted on the same PTE */
	if ((error < 0) && (error != -EBUSY))
		return VM_FAULT_SIGBUS | major;
	return VM_FAULT_NOPAGE | major;

 unlock_page:
	if (page) {
		unlock_page(page);
		put_page(page);
	}
	goto out;
}
Beispiel #16
0
 * negative error code is returned.
 *
 * %-EBUSY - page has an active buffer.
 *
 * %-ENOENT - page cache has no page addressed by the offset.
 */
int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
{
	pgoff_t index = (pgoff_t)block >>
		(PAGE_CACHE_SHIFT - inode->i_blkbits);
	struct page *page;
	unsigned long first_block;
	int ret = 0;
	int still_dirty;

	page = find_lock_page(inode->i_mapping, index);
	if (!page)
		return -ENOENT;

	wait_on_page_writeback(page);

	first_block = (unsigned long)index <<
		(PAGE_CACHE_SHIFT - inode->i_blkbits);
	if (page_has_buffers(page)) {
		struct buffer_head *bh;

		bh = nilfs_page_get_nth_block(page, block - first_block);
		nilfs_forget_buffer(bh);
	}
	still_dirty = PageDirty(page);
	unlock_page(page);
Beispiel #17
0
/*
 * This is a little more tricky than the file -> pipe splicing. There are
 * basically three cases:
 *
 *	- Destination page already exists in the address space and there
 *	  are users of it. For that case we have no other option that
 *	  copying the data. Tough luck.
 *	- Destination page already exists in the address space, but there
 *	  are no users of it. Make sure it's uptodate, then drop it. Fall
 *	  through to last case.
 *	- Destination page does not exist, we can add the pipe page to
 *	  the page cache and avoid the copy.
 *
 * If asked to move pages to the output file (SPLICE_F_MOVE is set in
 * sd->flags), we attempt to migrate pages from the pipe to the output
 * file address space page cache. This is possible if no one else has
 * the pipe page referenced outside of the pipe and page cache. If
 * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
 * a new page in the output file page cache and fill/dirty that.
 */
static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
			struct splice_desc *sd)
{
	struct file *file = sd->file;
	struct address_space *mapping = file->f_mapping;
	unsigned int offset, this_len;
	struct page *page;
	pgoff_t index;
	int ret;

	/*
	 * make sure the data in this buffer is uptodate
	 */
	ret = buf->ops->pin(pipe, buf);
	if (unlikely(ret))
		return ret;

	index = sd->pos >> PAGE_CACHE_SHIFT;
	offset = sd->pos & ~PAGE_CACHE_MASK;

	this_len = sd->len;
	if (this_len + offset > PAGE_CACHE_SIZE)
		this_len = PAGE_CACHE_SIZE - offset;

find_page:
	page = find_lock_page(mapping, index);
	if (!page) {
		ret = -ENOMEM;
		page = page_cache_alloc_cold(mapping);
		if (unlikely(!page))
			goto out_ret;

		/*
		 * This will also lock the page
		 */
		ret = add_to_page_cache_lru(page, mapping, index,
					    GFP_KERNEL);
		if (unlikely(ret))
			goto out;
	}

	ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
	if (unlikely(ret)) {
		loff_t isize = i_size_read(mapping->host);

		if (ret != AOP_TRUNCATED_PAGE)
			unlock_page(page);
		page_cache_release(page);
		if (ret == AOP_TRUNCATED_PAGE)
			goto find_page;

		/*
		 * prepare_write() may have instantiated a few blocks
		 * outside i_size.  Trim these off again.
		 */
		if (sd->pos + this_len > isize)
			vmtruncate(mapping->host, isize);

		goto out_ret;
	}

	if (buf->page != page) {
		/*
		 * Careful, ->map() uses KM_USER0!
		 */
		char *src = buf->ops->map(pipe, buf, 1);
		char *dst = kmap_atomic(page, KM_USER1);

		memcpy(dst + offset, src + buf->offset, this_len);
		flush_dcache_page(page);
		kunmap_atomic(dst, KM_USER1);
		buf->ops->unmap(pipe, buf, src);
	}

	ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
	if (ret) {
		if (ret == AOP_TRUNCATED_PAGE) {
			page_cache_release(page);
			goto find_page;
		}
		if (ret < 0)
			goto out;
		/*
		 * Partial write has happened, so 'ret' already initialized by
		 * number of bytes written, Where is nothing we have to do here.
		 */
	} else
		ret = this_len;
	/*
	 * Return the number of bytes written and mark page as
	 * accessed, we are now done!
	 */
	mark_page_accessed(page);
	balance_dirty_pages_ratelimited(mapping);
out:
	page_cache_release(page);
	unlock_page(page);
out_ret:
	return ret;
}