/*
 * If the page can not be invalidated, it is moved to the
 * inactive list to speed up its reclaim.  It is moved to the
 * head of the list, rather than the tail, to give the flusher
 * threads some time to write it out, as this is much more
 * effective than the single-page writeout from reclaim.
 *
 * If the page isn't page_mapped and dirty/writeback, the page
 * could reclaim asap using PG_reclaim.
 *
 * 1. active, mapped page -> none
 * 2. active, dirty/writeback page -> inactive, head, PG_reclaim
 * 3. inactive, mapped page -> none
 * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim
 * 5. inactive, clean -> inactive, tail
 * 6. Others -> none
 *
 * In 4, why it moves inactive's head, the VM expects the page would
 * be write it out by flusher threads as this is much more effective
 * than the single-page writeout from reclaim.
 */
static void lru_deactivate_fn(struct page *page, void *arg)
{
	int lru, file;
	bool active;
	struct zone *zone = page_zone(page);

	if (!PageLRU(page))
		return;

	if (PageUnevictable(page))
		return;

	/* Some processes are using the page */
	if (page_mapped(page))
		return;

	active = PageActive(page);

	file = page_is_file_cache(page);
	lru = page_lru_base_type(page);
	del_page_from_lru_list(zone, page, lru + active);
	ClearPageActive(page);
	ClearPageReferenced(page);
	add_page_to_lru_list(zone, page, lru);

	if (PageWriteback(page) || PageDirty(page)) {
		/*
		 * PG_reclaim could be raced with end_page_writeback
		 * It can make readahead confusing.  But race window
		 * is _really_ small and  it's non-critical problem.
		 */
		SetPageReclaim(page);
	} else {
		/*
		 * The page's writeback ends up during pagevec
		 * We moves tha page into tail of inactive.
		 */
		list_move_tail(&page->lru, &zone->lru[lru].list);
		mem_cgroup_rotate_reclaimable_page(page);
		__count_vm_event(PGROTATED);
	}

	if (active)
		__count_vm_event(PGDEACTIVATE);
	update_page_reclaim_stat(zone, page, file, 0);
}
static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
		size_t nr_pages)
{
	struct logfs_super *super = logfs_super(sb);
	struct address_space *mapping = super->s_mapping_inode->i_mapping;
	struct bio *bio;
	struct page *page;
	unsigned int max_pages;
	int i;

	max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));

	bio = bio_alloc(GFP_NOFS, max_pages);
	BUG_ON(!bio);

	for (i = 0; i < nr_pages; i++) {
		if (i >= max_pages) {
			/* Block layer cannot split bios :( */
			bio->bi_vcnt = i;
			bio->bi_idx = 0;
			bio->bi_size = i * PAGE_SIZE;
			bio->bi_bdev = super->s_bdev;
			bio->bi_sector = ofs >> 9;
			bio->bi_private = sb;
			bio->bi_end_io = writeseg_end_io;
			atomic_inc(&super->s_pending_writes);
			submit_bio(WRITE, bio);

			ofs += i * PAGE_SIZE;
			index += i;
			nr_pages -= i;
			i = 0;

			bio = bio_alloc(GFP_NOFS, max_pages);
			BUG_ON(!bio);
		}
		page = find_lock_page(mapping, index + i);
		BUG_ON(!page);
		bio->bi_io_vec[i].bv_page = page;
		bio->bi_io_vec[i].bv_len = PAGE_SIZE;
		bio->bi_io_vec[i].bv_offset = 0;

		BUG_ON(PageWriteback(page));
		set_page_writeback(page);
		unlock_page(page);
	}
Exemple #3
0
/*
 * This must be called only on pages that have
 * been verified to be in the swap cache.
 */
void __delete_from_swap_cache(struct page *page)
{
    swp_entry_t entry;
    struct address_space *address_space;

    VM_BUG_ON_PAGE(!PageLocked(page), page);
    VM_BUG_ON_PAGE(!PageSwapCache(page), page);
    VM_BUG_ON_PAGE(PageWriteback(page), page);

    entry.val = page_private(page);
    address_space = swap_address_space(entry);
    radix_tree_delete(&address_space->page_tree, page_private(page));
    set_page_private(page, 0);
    ClearPageSwapCache(page);
    address_space->nrpages--;
    __dec_zone_page_state(page, NR_FILE_PAGES);
    INC_CACHE_INFO(del_total);
}
Exemple #4
0
/**
 * Implements Linux VM address_space::invalidatepage() method. This method is
 * called when the page is truncate from a file, either as a result of
 * explicit truncate, or when inode is removed from memory (as a result of
 * final iput(), umount, or memory pressure induced icache shrinking).
 *
 * [0, offset] bytes of the page remain valid (this is for a case of not-page
 * aligned truncate). Lustre leaves partially truncated page in the cache,
 * relying on struct inode::i_size to limit further accesses.
 */
static void ll_invalidatepage(struct page *vmpage,
#ifdef HAVE_INVALIDATE_RANGE
                              unsigned int offset, unsigned int length
#else
                              unsigned long offset
#endif
                             )
{
    struct inode     *inode;
    struct lu_env    *env;
    struct cl_page   *page;
    struct cl_object *obj;

    LASSERT(PageLocked(vmpage));
    LASSERT(!PageWriteback(vmpage));

    /*
     * It is safe to not check anything in invalidatepage/releasepage
     * below because they are run with page locked and all our io is
     * happening with locked page too
     */
#ifdef HAVE_INVALIDATE_RANGE
    if (offset == 0 && length == PAGE_SIZE) {
#else
    if (offset == 0) {
#endif
        /* See the comment in ll_releasepage() */
        env = cl_env_percpu_get();
        LASSERT(!IS_ERR(env));

        inode = vmpage->mapping->host;
        obj = ll_i2info(inode)->lli_clob;
        if (obj != NULL) {
            page = cl_vmpage_page(vmpage, obj);
            if (page != NULL) {
                cl_page_delete(env, page);
                cl_page_put(env, page);
            }
        } else
            LASSERT(vmpage->private == 0);

        cl_env_percpu_put(env);
    }
}
Exemple #5
0
/*
 * write out a page to a file
 */
static int write_page(struct bitmap *bitmap, struct page *page, int wait)
{
    int ret = -ENOMEM;

    if (bitmap->file == NULL)
        return write_sb_page(bitmap->mddev, bitmap->offset, page, wait);

    flush_dcache_page(page); /* make sure visible to anyone reading the file */

    if (wait)
        lock_page(page);
    else {
        if (TestSetPageLocked(page))
            return -EAGAIN; /* already locked */
        if (PageWriteback(page)) {
            unlock_page(page);
            return -EAGAIN;
        }
    }

    ret = page->mapping->a_ops->prepare_write(bitmap->file, page, 0, PAGE_SIZE);
    if (!ret)
        ret = page->mapping->a_ops->commit_write(bitmap->file, page, 0,
                PAGE_SIZE);
    if (ret) {
        unlock_page(page);
        return ret;
    }

    set_page_dirty(page); /* force it to be written out */

    if (!wait) {
        /* add to list to be waited for by daemon */
        struct page_list *item = mempool_alloc(bitmap->write_pool, GFP_NOIO);
        item->page = page;
        get_page(page);
        spin_lock(&bitmap->write_lock);
        list_add(&item->list, &bitmap->complete_pages);
        spin_unlock(&bitmap->write_lock);
        md_wakeup_thread(bitmap->writeback_daemon);
    }
    return write_one_page(page, wait);
}
Exemple #6
0
/**
 * nilfs_copy_page -- copy the page with buffers
 * @dst: destination page
 * @src: source page
 * @copy_dirty: flag whether to copy dirty states on the page's buffer heads.
 *
 * This function is for both data pages and btnode pages.  The dirty flag
 * should be treated by caller.  The page must not be under i/o.
 * Both src and dst page must be locked
 */
static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
{
	struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
	unsigned long mask = NILFS_BUFFER_INHERENT_BITS;

	BUG_ON(PageWriteback(dst));

	sbh = sbufs = page_buffers(src);
	if (!page_has_buffers(dst))
		create_empty_buffers(dst, sbh->b_size, 0);

	if (copy_dirty)
		mask |= BIT(BH_Dirty);

	dbh = dbufs = page_buffers(dst);
	do {
		lock_buffer(sbh);
		lock_buffer(dbh);
		dbh->b_state = sbh->b_state & mask;
		dbh->b_blocknr = sbh->b_blocknr;
		dbh->b_bdev = sbh->b_bdev;
		sbh = sbh->b_this_page;
		dbh = dbh->b_this_page;
	} while (dbh != dbufs);

	copy_highpage(dst, src);

	if (PageUptodate(src) && !PageUptodate(dst))
		SetPageUptodate(dst);
	else if (!PageUptodate(src) && PageUptodate(dst))
		ClearPageUptodate(dst);
	if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
		SetPageMappedToDisk(dst);
	else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
		ClearPageMappedToDisk(dst);

	do {
		unlock_buffer(sbh);
		unlock_buffer(dbh);
		sbh = sbh->b_this_page;
		dbh = dbh->b_this_page;
	} while (dbh != dbufs);
}
Exemple #7
0
/*
 * invalidate part or all of a page
 */
static void afs_file_invalidatepage(struct page *page, unsigned long offset)
{
	_enter("{%lu},%lu", page->index, offset);

	BUG_ON(!PageLocked(page));

	if (PagePrivate(page)) {
		/* We release buffers only if the entire page is being
		 * invalidated.
		 * The get_block cached value has been unconditionally
		 * invalidated, so real IO is not possible anymore.
		 */
		if (offset == 0 && !PageWriteback(page))
			page->mapping->a_ops->releasepage(page, 0);
	}

	_leave("");

} /* end afs_file_invalidatepage() */
Exemple #8
0
int
zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
{
	struct address_space *mapping = data;

	ASSERT(PageLocked(pp));
	ASSERT(!PageWriteback(pp));
	ASSERT(!(current->flags & PF_NOFS));

	/*
	 * Annotate this call path with a flag that indicates that it is
	 * unsafe to use KM_SLEEP during memory allocations due to the
	 * potential for a deadlock.  KM_PUSHPAGE should be used instead.
	 */
	current->flags |= PF_NOFS;
	(void) zfs_putpage(mapping->host, pp, wbc);
	current->flags &= ~PF_NOFS;

	return (0);
}
Exemple #9
0
/**
 * Implements Linux VM address_space::invalidatepage() method. This method is
 * called when the page is truncate from a file, either as a result of
 * explicit truncate, or when inode is removed from memory (as a result of
 * final iput(), umount, or memory pressure induced icache shrinking).
 *
 * [0, offset] bytes of the page remain valid (this is for a case of not-page
 * aligned truncate). Lustre leaves partially truncated page in the cache,
 * relying on struct inode::i_size to limit further accesses.
 */
static int cl_invalidatepage(struct page *vmpage, unsigned long offset)
{
        struct inode     *inode;
        struct lu_env    *env;
        struct cl_page   *page;
        struct cl_object *obj;

        int result;
        int refcheck;

        LASSERT(PageLocked(vmpage));
        LASSERT(!PageWriteback(vmpage));

        /*
         * It is safe to not check anything in invalidatepage/releasepage
         * below because they are run with page locked and all our io is
         * happening with locked page too
         */
        result = 0;
        if (offset == 0) {
                env = cl_env_get(&refcheck);
                if (!IS_ERR(env)) {
                        inode = vmpage->mapping->host;
                        obj = ll_i2info(inode)->lli_clob;
                        if (obj != NULL) {
                                page = cl_vmpage_page(vmpage, obj);
                                if (page != NULL) {
                                        lu_ref_add(&page->cp_reference,
                                                   "delete", vmpage);
                                        cl_page_delete(env, page);
                                        result = 1;
                                        lu_ref_del(&page->cp_reference,
                                                   "delete", vmpage);
                                        cl_page_put(env, page);
                                }
                        } else
                                LASSERT(vmpage->private == 0);
                        cl_env_put(env, &refcheck);
                }
Exemple #10
0
static int bc_io_show(struct seq_file *f, void *v)
{
	struct list_head *lh;
	struct page_beancounter *pb;
	struct page *pg;

	lh = (struct list_head *)v;
	if (lh == &pb_io_list) {
		seq_printf(f, "Races: anon %lu missed %lu\n",
				anon_pages, not_released);

		seq_printf(f, "%-*s %-1s %-*s %-4s %*s %*s "
				"%-*s %-*s %-1s %-*s %-*s\n",
				PTR_SIZE, "pb", "",
				PTR_SIZE, "page", "flg",
				INT_SIZE, "cnt", INT_SIZE, "mcnt",
				PTR_SIZE, "pb_list",
				PTR_SIZE, "page_pb", "",
				PTR_SIZE, "mapping",
				INT_SIZE, "ub");
		return 0;
	}

	pb = list_entry(lh, struct page_beancounter, io_list);
	pg = pb->page;
	seq_printf(f, "%p %c %p %c%c%c%c %*d %*d %p %p %c %p %d\n",
			pb, pb->io_debug ? 'e' : 'm', pg,
			PageDirty(pg) ? 'D' : 'd',
			PageAnon(pg) ? 'A' : 'a',
			PageWriteback(pg) ? 'W' : 'w',
			PageLocked(pg) ? 'L' : 'l',
			INT_SIZE, page_count(pg),
			INT_SIZE, page_mapcount(pg),
			pb->page_pb_list, page_pbc(pg),
			iopb_to_pb(page_pbc(pg)) == pb ? ' ' : '!',
			pg->mapping, pb->ub->ub_uid);
	return 0;
}
Exemple #11
0
/*
 * This must be called only on pages that have
 * been verified to be in the swap cache.
 */
void __delete_from_swap_cache(struct page *page)
{
	struct address_space *address_space;
	int i, nr = hpage_nr_pages(page);
	swp_entry_t entry;
	pgoff_t idx;

	VM_BUG_ON_PAGE(!PageLocked(page), page);
	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
	VM_BUG_ON_PAGE(PageWriteback(page), page);

	entry.val = page_private(page);
	address_space = swap_address_space(entry);
	idx = swp_offset(entry);
	for (i = 0; i < nr; i++) {
		radix_tree_delete(&address_space->page_tree, idx + i);
		set_page_private(page + i, 0);
	}
	ClearPageSwapCache(page);
	address_space->nrpages -= nr;
	__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
	ADD_CACHE_INFO(del_total, nr);
}
Exemple #12
0
/**
 * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
 * @mapping: the address_space which holds the pages to invalidate
 * @start: the offset 'from' which to invalidate
 * @end: the offset 'to' which to invalidate (inclusive)
 *
 * This function only removes the unlocked pages, if you want to
 * remove all the pages of one inode, you must call truncate_inode_pages.
 *
 * invalidate_mapping_pages() will not block on IO activity. It will not
 * invalidate pages which are dirty, locked, under writeback or mapped into
 * pagetables.
 */
unsigned long invalidate_mapping_pages(struct address_space *mapping,
				pgoff_t start, pgoff_t end)
{
	struct pagevec pvec;
	pgoff_t next = start;
	unsigned long ret = 0;
	int i;

	pagevec_init(&pvec, 0);
	while (next <= end &&
			pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
		for (i = 0; i < pagevec_count(&pvec); i++) {
			struct page *page = pvec.pages[i];

			if (TestSetPageLocked(page)) {
				next++;
				continue;
			}
			if (page->index > next)
				next = page->index;
			next++;
			if (PageDirty(page) || PageWriteback(page))
				goto unlock;
			if (page_mapped(page))
				goto unlock;
			ret += invalidate_complete_page(mapping, page);
unlock:
			unlock_page(page);
			if (next > end)
				break;
		}
		pagevec_release(&pvec);
		cond_resched();
	}
	return ret;
}
Exemple #13
0
/**
 * truncate_inode_pages - truncate *all* the pages from an offset
 * @mapping: mapping to truncate
 * @lstart: offset from which to truncate
 *
 * Truncate the page cache at a set offset, removing the pages that are beyond
 * that offset (and zeroing out partial pages).
 *
 * Truncate takes two passes - the first pass is nonblocking.  It will not
 * block on page locks and it will not block on writeback.  The second pass
 * will wait.  This is to prevent as much IO as possible in the affected region.
 * The first pass will remove most pages, so the search cost of the second pass
 * is low.
 *
 * When looking at page->index outside the page lock we need to be careful to
 * copy it into a local to avoid races (it could change at any time).
 *
 * We pass down the cache-hot hint to the page freeing code.  Even if the
 * mapping is large, it is probably the case that the final pages are the most
 * recently touched, and freeing happens in ascending file offset order.
 *
 * Called under (and serialised by) inode->i_sem.
 */
void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
{
	const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
	const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
	struct pagevec pvec;
	pgoff_t next;
	int i;

	if (mapping->nrpages == 0)
		return;

	pagevec_init(&pvec, 0);
	next = start;
	while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
		for (i = 0; i < pagevec_count(&pvec); i++) {
			struct page *page = pvec.pages[i];
			pgoff_t page_index = page->index;

			if (page_index > next)
				next = page_index;
			next++;
			if (TestSetPageLocked(page))
				continue;
			if (PageWriteback(page)) {
				unlock_page(page);
				continue;
			}
			truncate_complete_page(mapping, page);
			unlock_page(page);
		}
		pagevec_release(&pvec);
		cond_resched();
	}

	if (partial) {
		struct page *page = find_lock_page(mapping, start - 1);
		if (page) {
			wait_on_page_writeback(page);
			truncate_partial_page(page, partial);
			unlock_page(page);
			page_cache_release(page);
		}
	}

	next = start;
	for ( ; ; ) {
		cond_resched();
		if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
			if (next == start)
				break;
			next = start;
			continue;
		}
		for (i = 0; i < pagevec_count(&pvec); i++) {
			struct page *page = pvec.pages[i];

			lock_page(page);
			wait_on_page_writeback(page);
			if (page->index > next)
				next = page->index;
			next++;
			truncate_complete_page(mapping, page);
			unlock_page(page);
		}
		pagevec_release(&pvec);
	}
}
Exemple #14
0
static int ll_write_begin(struct file *file, struct address_space *mapping,
                          loff_t pos, unsigned len, unsigned flags,
                          struct page **pagep, void **fsdata)
{
    struct ll_cl_context *lcc;
    const struct lu_env  *env = NULL;
    struct cl_io   *io;
    struct cl_page *page = NULL;

    struct cl_object *clob = ll_i2info(mapping->host)->lli_clob;
    pgoff_t index = pos >> PAGE_SHIFT;
    struct page *vmpage = NULL;
    unsigned from = pos & (PAGE_SIZE - 1);
    unsigned to = from + len;
    int result = 0;
    ENTRY;

    CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len);

    lcc = ll_cl_find(file);
    if (lcc == NULL) {
        io = NULL;
        GOTO(out, result = -EIO);
    }

    env = lcc->lcc_env;
    io  = lcc->lcc_io;

    /* To avoid deadlock, try to lock page first. */
    vmpage = grab_cache_page_nowait(mapping, index);

    if (unlikely(vmpage == NULL ||
                 PageDirty(vmpage) || PageWriteback(vmpage))) {
        struct vvp_io *vio = vvp_env_io(env);
        struct cl_page_list *plist = &vio->u.write.vui_queue;

        /* if the page is already in dirty cache, we have to commit
        * the pages right now; otherwise, it may cause deadlock
        	 * because it holds page lock of a dirty page and request for
        	 * more grants. It's okay for the dirty page to be the first
        	 * one in commit page list, though. */
        if (vmpage != NULL && plist->pl_nr > 0) {
            unlock_page(vmpage);
            put_page(vmpage);
            vmpage = NULL;
        }

        /* commit pages and then wait for page lock */
        result = vvp_io_write_commit(env, io);
        if (result < 0)
            GOTO(out, result);

        if (vmpage == NULL) {
            vmpage = grab_cache_page_write_begin(mapping, index,
                                                 flags);
            if (vmpage == NULL)
                GOTO(out, result = -ENOMEM);
        }
    }

    page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
    if (IS_ERR(page))
        GOTO(out, result = PTR_ERR(page));

    lcc->lcc_page = page;
    lu_ref_add(&page->cp_reference, "cl_io", io);

    cl_page_assume(env, io, page);
    if (!PageUptodate(vmpage)) {
        /*
         * We're completely overwriting an existing page,
         * so _don't_ set it up to date until commit_write
         */
        if (from == 0 && to == PAGE_SIZE) {
            CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n");
            POISON_PAGE(vmpage, 0x11);
        } else {
            /* TODO: can be optimized at OSC layer to check if it
             * is a lockless IO. In that case, it's not necessary
             * to read the data. */
            result = ll_prepare_partial_page(env, io, page);
            if (result == 0)
                SetPageUptodate(vmpage);
        }
    }
    if (result < 0)
        cl_page_unassume(env, io, page);
    EXIT;
out:
    if (result < 0) {
        if (vmpage != NULL) {
            unlock_page(vmpage);
            put_page(vmpage);
        }
        if (!IS_ERR_OR_NULL(page)) {
            lu_ref_del(&page->cp_reference, "cl_io", io);
            cl_page_put(env, page);
        }
        if (io)
            io->ci_result = result;
    } else {
        *pagep = vmpage;
        *fsdata = lcc;
    }
    RETURN(result);
}
/**
 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
 * @mapping: address space structure to write
 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
 * @writepage: function called for each page
 * @data: data passed to writepage function
 *
 * If a page is already under I/O, write_cache_pages() skips it, even
 * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
 * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
 * and msync() need to guarantee that all the data which was dirty at the time
 * the call was made get new I/O started against them.  If wbc->sync_mode is
 * WB_SYNC_ALL then we were called for data integrity and we must wait for
 * existing IO to complete.
 */
int write_cache_pages(struct address_space *mapping,
		      struct writeback_control *wbc, writepage_t writepage,
		      void *data)
{
	struct backing_dev_info *bdi = mapping->backing_dev_info;
	int ret = 0;
	int done = 0;
	struct pagevec pvec;
	int nr_pages;
	pgoff_t index;
	pgoff_t end;		/* Inclusive */
	int scanned = 0;
	int range_whole = 0;
	long nr_to_write = wbc->nr_to_write;

	if (wbc->nonblocking && bdi_write_congested(bdi)) {
		wbc->encountered_congestion = 1;
		return 0;
	}

	pagevec_init(&pvec, 0);
	if (wbc->range_cyclic) {
		index = mapping->writeback_index; /* Start from prev offset */
		end = -1;
	} else {
		index = wbc->range_start >> PAGE_CACHE_SHIFT;
		end = wbc->range_end >> PAGE_CACHE_SHIFT;
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			range_whole = 1;
		scanned = 1;
	}
retry:
	while (!done && (index <= end) &&
	       (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
					      PAGECACHE_TAG_DIRTY,
					      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
		unsigned i;

		scanned = 1;
		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];

			/*
			 * At this point we hold neither mapping->tree_lock nor
			 * lock on the page itself: the page may be truncated or
			 * invalidated (changing page->mapping to NULL), or even
			 * swizzled back from swapper_space to tmpfs file
			 * mapping
			 */
			lock_page(page);

			if (unlikely(page->mapping != mapping)) {
				unlock_page(page);
				continue;
			}

			if (!wbc->range_cyclic && page->index > end) {
				done = 1;
				unlock_page(page);
				continue;
			}

			if (wbc->sync_mode != WB_SYNC_NONE)
				wait_on_page_writeback(page);

			if (PageWriteback(page) ||
			    !clear_page_dirty_for_io(page)) {
				unlock_page(page);
				continue;
			}

			ret = (*writepage)(page, wbc, data);

			if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
				unlock_page(page);
				ret = 0;
			}
			if (ret || (--nr_to_write <= 0))
				done = 1;
			if (wbc->nonblocking && bdi_write_congested(bdi)) {
				wbc->encountered_congestion = 1;
				done = 1;
			}
		}
		pagevec_release(&pvec);
		cond_resched();
	}
	if (!scanned && !done) {
		/*
		 * We hit the last page and there is more work to be done: wrap
		 * back to the start of the file
		 */
		scanned = 1;
		index = 0;
		goto retry;
	}
	if (!wbc->no_nrwrite_index_update) {
		if (wbc->range_cyclic || (range_whole && nr_to_write > 0))
			mapping->writeback_index = index;
		wbc->nr_to_write = nr_to_write;
	}

	return ret;
}
Exemple #16
0
int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
{
	struct f2fs_io_info fio = {
		.sbi = F2FS_I_SB(dn->inode),
		.type = DATA,
		.op = REQ_OP_WRITE,
		.op_flags = REQ_SYNC | REQ_PRIO,
		.page = page,
		.encrypted_page = NULL,
	};
	int dirty, err;

	if (!f2fs_exist_data(dn->inode))
		goto clear_out;

	err = f2fs_reserve_block(dn, 0);
	if (err)
		return err;

	f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));

	read_inline_data(page, dn->inode_page);
	set_page_dirty(page);

	/* clear dirty state */
	dirty = clear_page_dirty_for_io(page);

	/* write data page to try to make data consistent */
	set_page_writeback(page);
	fio.old_blkaddr = dn->data_blkaddr;
	set_inode_flag(dn->inode, FI_HOT_DATA);
	write_data_page(dn, &fio);
	f2fs_wait_on_page_writeback(page, DATA, true);
	if (dirty) {
		inode_dec_dirty_pages(dn->inode);
		remove_dirty_inode(dn->inode);
	}

	/* this converted inline_data should be recovered. */
	set_inode_flag(dn->inode, FI_APPEND_WRITE);

	/* clear inline data and flag after data writeback */
	truncate_inline_inode(dn->inode, dn->inode_page, 0);
	clear_inline_node(dn->inode_page);
clear_out:
	stat_dec_inline_inode(dn->inode);
	clear_inode_flag(dn->inode, FI_INLINE_DATA);
	f2fs_put_dnode(dn);
	return 0;
}

int f2fs_convert_inline_inode(struct inode *inode)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct dnode_of_data dn;
	struct page *ipage, *page;
	int err = 0;

	if (!f2fs_has_inline_data(inode))
		return 0;

	page = f2fs_grab_cache_page(inode->i_mapping, 0, false);
	if (!page)
		return -ENOMEM;

	f2fs_lock_op(sbi);

	ipage = get_node_page(sbi, inode->i_ino);
	if (IS_ERR(ipage)) {
		err = PTR_ERR(ipage);
		goto out;
	}

	set_new_dnode(&dn, inode, ipage, ipage, 0);

	if (f2fs_has_inline_data(inode))
		err = f2fs_convert_inline_page(&dn, page);

	f2fs_put_dnode(&dn);
out:
	f2fs_unlock_op(sbi);

	f2fs_put_page(page, 1);

	f2fs_balance_fs(sbi, dn.node_changed);

	return err;
}

int f2fs_write_inline_data(struct inode *inode, struct page *page)
{
	void *src_addr, *dst_addr;
	struct dnode_of_data dn;
	int err;

	set_new_dnode(&dn, inode, NULL, NULL, 0);
	err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
	if (err)
		return err;

	if (!f2fs_has_inline_data(inode)) {
		f2fs_put_dnode(&dn);
		return -EAGAIN;
	}

	f2fs_bug_on(F2FS_I_SB(inode), page->index);

	f2fs_wait_on_page_writeback(dn.inode_page, NODE, true);
	src_addr = kmap_atomic(page);
	dst_addr = inline_data_addr(dn.inode_page);
	memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
	kunmap_atomic(src_addr);
	set_page_dirty(dn.inode_page);

	set_inode_flag(inode, FI_APPEND_WRITE);
	set_inode_flag(inode, FI_DATA_EXIST);

	clear_inline_node(dn.inode_page);
	f2fs_put_dnode(&dn);
	return 0;
}
Exemple #17
0
static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
		      void *data)
{
	struct mpage_data *mpd = data;
	struct bio *bio = mpd->bio;
	struct address_space *mapping = page->mapping;
	struct inode *inode = page->mapping->host;
	const unsigned blkbits = inode->i_blkbits;
	unsigned long end_index;
	const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
	sector_t last_block;
	sector_t block_in_file;
	sector_t blocks[MAX_BUF_PER_PAGE];
	unsigned page_block;
	unsigned first_unmapped = blocks_per_page;
	struct block_device *bdev = NULL;
	int boundary = 0;
	sector_t boundary_block = 0;
	struct block_device *boundary_bdev = NULL;
	int length;
	struct buffer_head map_bh;
	loff_t i_size = i_size_read(inode);
	int ret = 0;
	int wr = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE);

	if (page_has_buffers(page)) {
		struct buffer_head *head = page_buffers(page);
		struct buffer_head *bh = head;

		/* If they're all mapped and dirty, do it */
		page_block = 0;
		do {
			BUG_ON(buffer_locked(bh));
			if (!buffer_mapped(bh)) {
				/*
				 * unmapped dirty buffers are created by
				 * __set_page_dirty_buffers -> mmapped data
				 */
				if (buffer_dirty(bh))
					goto confused;
				if (first_unmapped == blocks_per_page)
					first_unmapped = page_block;
				continue;
			}

			if (first_unmapped != blocks_per_page)
				goto confused;	/* hole -> non-hole */

			if (!buffer_dirty(bh) || !buffer_uptodate(bh))
				goto confused;
			if (page_block) {
				if (bh->b_blocknr != blocks[page_block-1] + 1)
					goto confused;
			}
			blocks[page_block++] = bh->b_blocknr;
			boundary = buffer_boundary(bh);
			if (boundary) {
				boundary_block = bh->b_blocknr;
				boundary_bdev = bh->b_bdev;
			}
			bdev = bh->b_bdev;
		} while ((bh = bh->b_this_page) != head);

		if (first_unmapped)
			goto page_is_mapped;

		/*
		 * Page has buffers, but they are all unmapped. The page was
		 * created by pagein or read over a hole which was handled by
		 * block_read_full_page().  If this address_space is also
		 * using mpage_readpages then this can rarely happen.
		 */
		goto confused;
	}

	/*
	 * The page has no buffers: map it to disk
	 */
	BUG_ON(!PageUptodate(page));
	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
	last_block = (i_size - 1) >> blkbits;
	map_bh.b_page = page;
	for (page_block = 0; page_block < blocks_per_page; ) {

		map_bh.b_state = 0;
		map_bh.b_size = 1 << blkbits;
		if (mpd->get_block(inode, block_in_file, &map_bh, 1))
			goto confused;
		if (buffer_new(&map_bh))
			unmap_underlying_metadata(map_bh.b_bdev,
						map_bh.b_blocknr);
		if (buffer_boundary(&map_bh)) {
			boundary_block = map_bh.b_blocknr;
			boundary_bdev = map_bh.b_bdev;
		}
		if (page_block) {
			if (map_bh.b_blocknr != blocks[page_block-1] + 1)
				goto confused;
		}
		blocks[page_block++] = map_bh.b_blocknr;
		boundary = buffer_boundary(&map_bh);
		bdev = map_bh.b_bdev;
		if (block_in_file == last_block)
			break;
		block_in_file++;
	}
	BUG_ON(page_block == 0);

	first_unmapped = page_block;

page_is_mapped:
	end_index = i_size >> PAGE_CACHE_SHIFT;
	if (page->index >= end_index) {
		/*
		 * The page straddles i_size.  It must be zeroed out on each
		 * and every writepage invocation because it may be mmapped.
		 * "A file is mapped in multiples of the page size.  For a file
		 * that is not a multiple of the page size, the remaining memory
		 * is zeroed when mapped, and writes to that region are not
		 * written out to the file."
		 */
		unsigned offset = i_size & (PAGE_CACHE_SIZE - 1);

		if (page->index > end_index || !offset)
			goto confused;
		zero_user_segment(page, offset, PAGE_CACHE_SIZE);
	}

	/*
	 * This page will go to BIO.  Do we need to send this BIO off first?
	 */
	if (bio && mpd->last_block_in_bio != blocks[0] - 1)
		bio = mpage_bio_submit(wr, bio);

alloc_new:
	if (bio == NULL) {
		if (first_unmapped == blocks_per_page) {
			if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9),
								page, wbc)) {
				clean_buffers(page, first_unmapped);
				goto out;
			}
		}
		bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
				BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH);
		if (bio == NULL)
			goto confused;

		wbc_init_bio(wbc, bio);
	}

	/*
	 * Must try to add the page before marking the buffer clean or
	 * the confused fail path above (OOM) will be very confused when
	 * it finds all bh marked clean (i.e. it will not write anything)
	 */
	wbc_account_io(wbc, page, PAGE_SIZE);
	length = first_unmapped << blkbits;
	if (bio_add_page(bio, page, length, 0) < length) {
		bio = mpage_bio_submit(wr, bio);
		goto alloc_new;
	}

	clean_buffers(page, first_unmapped);

	BUG_ON(PageWriteback(page));
	set_page_writeback(page);
	unlock_page(page);
	if (boundary || (first_unmapped != blocks_per_page)) {
		bio = mpage_bio_submit(wr, bio);
		if (boundary_block) {
			write_boundary_block(boundary_bdev,
					boundary_block, 1 << blkbits);
		}
	} else {
		mpd->last_block_in_bio = blocks[blocks_per_page - 1];
	}
	goto out;

confused:
	if (bio)
		bio = mpage_bio_submit(wr, bio);

	if (mpd->use_writepage) {
		ret = mapping->a_ops->writepage(page, wbc);
	} else {
		ret = -EAGAIN;
		goto out;
	}
	/*
	 * The caller has a ref on the inode, so *mapping is stable
	 */
	mapping_set_error(mapping, ret);
out:
	mpd->bio = bio;
	return ret;
}
Exemple #18
0
int ext4_bio_write_page(struct ext4_io_submit *io,
			struct page *page,
			int len,
			struct writeback_control *wbc)
{
	struct inode *inode = page->mapping->host;
	unsigned block_start, block_end, blocksize;
	struct ext4_io_page *io_page;
	struct buffer_head *bh, *head;
	int ret = 0;

	blocksize = 1 << inode->i_blkbits;

	BUG_ON(!PageLocked(page));
	BUG_ON(PageWriteback(page));

	io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
	if (!io_page) {
		set_page_dirty(page);
		unlock_page(page);
		return -ENOMEM;
	}
	io_page->p_page = page;
	atomic_set(&io_page->p_count, 1);
	get_page(page);
	set_page_writeback(page);
	ClearPageError(page);

	for (bh = head = page_buffers(page), block_start = 0;
	     bh != head || !block_start;
	     block_start = block_end, bh = bh->b_this_page) {

		block_end = block_start + blocksize;
		if (block_start >= len) {
			clear_buffer_dirty(bh);
			set_buffer_uptodate(bh);
			continue;
		}
		clear_buffer_dirty(bh);
		ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
		if (ret) {
			/*
			 * We only get here on ENOMEM.  Not much else
			 * we can do but mark the page as dirty, and
			 * better luck next time.
			 */
			set_page_dirty(page);
			break;
		}
	}
	unlock_page(page);
	/*
	 * If the page was truncated before we could do the writeback,
	 * or we had a memory allocation error while trying to write
	 * the first buffer head, we won't have submitted any pages for
	 * I/O.  In that case we need to make sure we've cleared the
	 * PageWriteback bit from the page to prevent the system from
	 * wedging later on.
	 */
	put_io_page(io_page);
	return ret;
}
Exemple #19
0
int ext4_bio_write_page(struct ext4_io_submit *io,
			struct page *page,
			int len,
			struct writeback_control *wbc)
{
	struct inode *inode = page->mapping->host;
	unsigned block_start, block_end, blocksize;
	struct ext4_io_page *io_page;
	struct buffer_head *bh, *head;
	int ret = 0;

	blocksize = 1 << inode->i_blkbits;

	BUG_ON(!PageLocked(page));
	BUG_ON(PageWriteback(page));

	io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
	if (!io_page) {
		set_page_dirty(page);
		unlock_page(page);
		return -ENOMEM;
	}
	io_page->p_page = page;
	atomic_set(&io_page->p_count, 1);
	get_page(page);
	set_page_writeback(page);
	ClearPageError(page);

	/*
	 * Comments copied from block_write_full_page_endio:
	 *
	 * The page straddles i_size.  It must be zeroed out on each and every
	 * writepage invocation because it may be mmapped.  "A file is mapped
	 * in multiples of the page size.  For a file that is not a multiple of
	 * the page size, the remaining memory is zeroed when mapped, and
	 * writes to that region are not written out to the file."
	 */
	if (len < PAGE_CACHE_SIZE)
		zero_user_segment(page, len, PAGE_CACHE_SIZE);

	for (bh = head = page_buffers(page), block_start = 0;
	     bh != head || !block_start;
	     block_start = block_end, bh = bh->b_this_page) {

		block_end = block_start + blocksize;
		if (block_start >= len) {
			clear_buffer_dirty(bh);
			set_buffer_uptodate(bh);
			continue;
		}
		clear_buffer_dirty(bh);
		ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
		if (ret) {
			/*
			 * We only get here on ENOMEM.  Not much else
			 * we can do but mark the page as dirty, and
			 * better luck next time.
			 */
			set_page_dirty(page);
			break;
		}
	}
	unlock_page(page);
	/*
	 * If the page was truncated before we could do the writeback,
	 * or we had a memory allocation error while trying to write
	 * the first buffer head, we won't have submitted any pages for
	 * I/O.  In that case we need to make sure we've cleared the
	 * PageWriteback bit from the page to prevent the system from
	 * wedging later on.
	 */
	put_io_page(io_page);
	return ret;
}
/**
 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
 * @mapping: address space structure to write
 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
 * @writepage: function called for each page
 * @data: data passed to writepage function
 *
 * If a page is already under I/O, write_cache_pages() skips it, even
 * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
 * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
 * and msync() need to guarantee that all the data which was dirty at the time
 * the call was made get new I/O started against them.  If wbc->sync_mode is
 * WB_SYNC_ALL then we were called for data integrity and we must wait for
 * existing IO to complete.
 */
int write_cache_pages(struct address_space *mapping,
		      struct writeback_control *wbc, writepage_t writepage,
		      void *data)
{
	struct backing_dev_info *bdi = mapping->backing_dev_info;
	int ret = 0;
	int done = 0;
	struct pagevec pvec;
	int nr_pages;
	pgoff_t uninitialized_var(writeback_index);
	pgoff_t index;
	pgoff_t end;		/* Inclusive */
	pgoff_t done_index;
	int cycled;
	int range_whole = 0;

	if (wbc->nonblocking && bdi_write_congested(bdi)) {
		wbc->encountered_congestion = 1;
		return 0;
	}

	pagevec_init(&pvec, 0);
	if (wbc->range_cyclic) {
		writeback_index = mapping->writeback_index; /* prev offset */
		index = writeback_index;
		if (index == 0)
			cycled = 1;
		else
			cycled = 0;
		end = -1;
	} else {
		index = wbc->range_start >> PAGE_CACHE_SHIFT;
		end = wbc->range_end >> PAGE_CACHE_SHIFT;
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			range_whole = 1;
		cycled = 1; /* ignore range_cyclic tests */
	}
retry:
	done_index = index;
	while (!done && (index <= end)) {
		int i;

		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
			      PAGECACHE_TAG_DIRTY,
			      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
		if (nr_pages == 0)
			break;

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];

			/*
			 * At this point, the page may be truncated or
			 * invalidated (changing page->mapping to NULL), or
			 * even swizzled back from swapper_space to tmpfs file
			 * mapping. However, page->index will not change
			 * because we have a reference on the page.
			 */
			if (page->index > end) {
				/*
				 * can't be range_cyclic (1st pass) because
				 * end == -1 in that case.
				 */
				done = 1;
				break;
			}

			done_index = page->index + 1;

			lock_page(page);

			/*
			 * Page truncated or invalidated. We can freely skip it
			 * then, even for data integrity operations: the page
			 * has disappeared concurrently, so there could be no
			 * real expectation of this data interity operation
			 * even if there is now a new, dirty page at the same
			 * pagecache address.
			 */
			if (unlikely(page->mapping != mapping)) {
continue_unlock:
				unlock_page(page);
				continue;
			}

			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

			if (PageWriteback(page)) {
				if (wbc->sync_mode != WB_SYNC_NONE)
					wait_on_page_writeback(page);
				else
					goto continue_unlock;
			}

			BUG_ON(PageWriteback(page));
			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

			ret = (*writepage)(page, wbc, data);

			if (unlikely(ret)) {
				if (ret == AOP_WRITEPAGE_ACTIVATE) {
					unlock_page(page);
					ret = 0;
				} else {
					/*
					 * done_index is set past this page,
					 * so media errors will not choke
					 * background writeout for the entire
					 * file. This has consequences for
					 * range_cyclic semantics (ie. it may
					 * not be suitable for data integrity
					 * writeout).
					 */
					done = 1;
					break;
				}
 			}

			if (wbc->nr_to_write > 0) {
				wbc->nr_to_write--;
				if (wbc->nr_to_write == 0 &&
				    wbc->sync_mode == WB_SYNC_NONE) {
					/*
					 * We stop writing back only if we are
					 * not doing integrity sync. In case of
					 * integrity sync we have to keep going
					 * because someone may be concurrently
					 * dirtying pages, and we might have
					 * synced a lot of newly appeared dirty
					 * pages, but have not synced all of the
					 * old dirty pages.
					 */
					done = 1;
					break;
				}
			}

			if (wbc->nonblocking && bdi_write_congested(bdi)) {
				wbc->encountered_congestion = 1;
				done = 1;
				break;
			}
		}
		pagevec_release(&pvec);
		cond_resched();
	}
	if (!cycled && !done) {
		/*
		 * range_cyclic:
		 * We hit the last page and there is more work to be done: wrap
		 * back to the start of the file
		 */
		cycled = 1;
		index = 0;
		end = writeback_index - 1;
		goto retry;
	}
	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
		mapping->writeback_index = done_index;

	return ret;
}
Exemple #21
0
/*
 * Try to free buffers if "page" has them.
 */
static int
remap_preparepage(struct page *page, int fastmode)
{
    struct address_space *mapping;
    int waitcnt = fastmode ? 0 : 10;

    BUG_ON(!PageLocked(page));

    mapping = page_mapping(page);

    if (PageWriteback(page) && !PagePrivate(page) && !PageSwapCache(page)) {
        printk("remap: mapping %p page %p\n", page->mapping, page);
        return -REMAPPREP_WB;
    }

    if (PageWriteback(page))
        wait_on_page_writeback(page);

    if (PagePrivate(page)) {
#ifdef DEBUG_MSG
        printk("rmap: process page with buffers...\n");
#endif
        /* XXX copied from shrink_list() */
        if (PageDirty(page) &&
                is_page_cache_freeable(page) &&
                mapping != NULL &&
                mapping->a_ops->writepage != NULL) {
            spin_lock_irq(&mapping->tree_lock);
            if (clear_page_dirty_for_io(page)) {
                int res;
                struct writeback_control wbc = {
                    .sync_mode = WB_SYNC_NONE,
                    .nr_to_write = SWAP_CLUSTER_MAX,
                    .nonblocking = 1,
                    .for_reclaim = 1,
                };

                spin_unlock_irq(&mapping->tree_lock);

                SetPageReclaim(page);
                res = mapping->a_ops->writepage(page, &wbc);

                if (res < 0)
                    /* not implemented. help */
                    BUG();
                if (res == WRITEPAGE_ACTIVATE) {
                    ClearPageReclaim(page);
                    return -REMAPPREP_WB;
                }
                if (!PageWriteback(page)) {
                    /* synchronous write or broken a_ops? */
                    ClearPageReclaim(page);
                }
                lock_page(page);
                if (!PagePrivate(page))
                    return 0;
            } else
                spin_unlock_irq(&mapping->tree_lock);
        }

        while (1) {
            if (try_to_release_page(page, GFP_KERNEL))
                break;
            if (!waitcnt)
                return -REMAPPREP_BUFFER;
            msleep(10);
            waitcnt--;
            if (!waitcnt)
                print_buffer(page);
        }
    }
static int gfs2_write_jdata_pagevec(struct address_space *mapping,
				    struct writeback_control *wbc,
				    struct pagevec *pvec,
				    int nr_pages, pgoff_t end,
				    pgoff_t *done_index)
{
	struct inode *inode = mapping->host;
	struct gfs2_sbd *sdp = GFS2_SB(inode);
	unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
	int i;
	int ret;

	ret = gfs2_trans_begin(sdp, nrblocks, nrblocks);
	if (ret < 0)
		return ret;

	for(i = 0; i < nr_pages; i++) {
		struct page *page = pvec->pages[i];

		/*
		 * At this point, the page may be truncated or
		 * invalidated (changing page->mapping to NULL), or
		 * even swizzled back from swapper_space to tmpfs file
		 * mapping. However, page->index will not change
		 * because we have a reference on the page.
		 */
		if (page->index > end) {
			/*
			 * can't be range_cyclic (1st pass) because
			 * end == -1 in that case.
			 */
			ret = 1;
			break;
		}

		*done_index = page->index;

		lock_page(page);

		if (unlikely(page->mapping != mapping)) {
continue_unlock:
			unlock_page(page);
			continue;
		}

		if (!PageDirty(page)) {
			/* someone wrote it for us */
			goto continue_unlock;
		}

		if (PageWriteback(page)) {
			if (wbc->sync_mode != WB_SYNC_NONE)
				wait_on_page_writeback(page);
			else
				goto continue_unlock;
		}

		BUG_ON(PageWriteback(page));
		if (!clear_page_dirty_for_io(page))
			goto continue_unlock;

		trace_wbc_writepage(wbc, mapping->backing_dev_info);

		ret = __gfs2_jdata_writepage(page, wbc);
		if (unlikely(ret)) {
			if (ret == AOP_WRITEPAGE_ACTIVATE) {
				unlock_page(page);
				ret = 0;
			} else {

				/*
				 * done_index is set past this page,
				 * so media errors will not choke
				 * background writeout for the entire
				 * file. This has consequences for
				 * range_cyclic semantics (ie. it may
				 * not be suitable for data integrity
				 * writeout).
				 */
				*done_index = page->index + 1;
				ret = 1;
				break;
			}
		}

		/*
		 * We stop writing back only if we are not doing
		 * integrity sync. In case of integrity sync we have to
		 * keep going until we have written all the pages
		 * we tagged for writeback prior to entering this loop.
		 */
		if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) {
			ret = 1;
			break;
		}

	}
	gfs2_trans_end(sdp);
	return ret;
}
Exemple #23
0
void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask)
{
	struct page_ext *page_ext = lookup_page_ext(page);
	struct stack_trace trace = {
		.nr_entries = 0,
		.max_entries = ARRAY_SIZE(page_ext->trace_entries),
		.entries = &page_ext->trace_entries[0],
		.skip = 3,
	};

	save_stack_trace(&trace);

	page_ext->order = order;
	page_ext->gfp_mask = gfp_mask;
	page_ext->nr_entries = trace.nr_entries;

	__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
}

static ssize_t
print_page_owner(char __user *buf, size_t count, unsigned long pfn,
		struct page *page, struct page_ext *page_ext)
{
	int ret;
	int pageblock_mt, page_mt;
	char *kbuf;
	struct stack_trace trace = {
		.nr_entries = page_ext->nr_entries,
		.entries = &page_ext->trace_entries[0],
	};

	kbuf = kmalloc(count, GFP_KERNEL);
	if (!kbuf)
		return -ENOMEM;

	ret = snprintf(kbuf, count,
			"Page allocated via order %u, mask 0x%x\n",
			page_ext->order, page_ext->gfp_mask);

	if (ret >= count)
		goto err;

	/* Print information relevant to grouping pages by mobility */
	pageblock_mt = get_pfnblock_migratetype(page, pfn);
	page_mt  = gfpflags_to_migratetype(page_ext->gfp_mask);
	ret += snprintf(kbuf + ret, count - ret,
			"PFN %lu Block %lu type %d %s Flags %s%s%s%s%s%s%s%s%s%s%s%s\n",
			pfn,
			pfn >> pageblock_order,
			pageblock_mt,
			pageblock_mt != page_mt ? "Fallback" : "        ",
			PageLocked(page)	? "K" : " ",
			PageError(page)		? "E" : " ",
			PageReferenced(page)	? "R" : " ",
			PageUptodate(page)	? "U" : " ",
			PageDirty(page)		? "D" : " ",
			PageLRU(page)		? "L" : " ",
			PageActive(page)	? "A" : " ",
			PageSlab(page)		? "S" : " ",
			PageWriteback(page)	? "W" : " ",
			PageCompound(page)	? "C" : " ",
			PageSwapCache(page)	? "B" : " ",
			PageMappedToDisk(page)	? "M" : " ");

	if (ret >= count)
		goto err;

	ret += snprint_stack_trace(kbuf + ret, count - ret, &trace, 0);
	if (ret >= count)
		goto err;

	ret += snprintf(kbuf + ret, count - ret, "\n");
	if (ret >= count)
		goto err;

	if (copy_to_user(buf, kbuf, ret))
		ret = -EFAULT;

	kfree(kbuf);
	return ret;

err:
	kfree(kbuf);
	return -ENOMEM;
}

static ssize_t
read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
	unsigned long pfn;
	struct page *page;
	struct page_ext *page_ext;

	if (!page_owner_inited)
		return -EINVAL;

	page = NULL;
	pfn = min_low_pfn + *ppos;

	/* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */
	while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0)
		pfn++;

	drain_all_pages(NULL);

	/* Find an allocated page */
	for (; pfn < max_pfn; pfn++) {
		/*
		 * If the new page is in a new MAX_ORDER_NR_PAGES area,
		 * validate the area as existing, skip it if not
		 */
		if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) {
			pfn += MAX_ORDER_NR_PAGES - 1;
			continue;
		}

		/* Check for holes within a MAX_ORDER area */
		if (!pfn_valid_within(pfn))
			continue;

		page = pfn_to_page(pfn);
		if (PageBuddy(page)) {
			unsigned long freepage_order = page_order_unsafe(page);

			if (freepage_order < MAX_ORDER)
				pfn += (1UL << freepage_order) - 1;
			continue;
		}

		page_ext = lookup_page_ext(page);

		/*
		 * Some pages could be missed by concurrent allocation or free,
		 * because we don't hold the zone lock.
		 */
		if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
			continue;

		/* Record the next PFN to read in the file offset */
		*ppos = (pfn - min_low_pfn) + 1;

		return print_page_owner(buf, count, pfn, page, page_ext);
	}

	return 0;
}
Exemple #24
0
/**
 * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets
 * @mapping: mapping to truncate
 * @lstart: offset from which to truncate
 * @lend: offset to which to truncate (inclusive)
 *
 * Truncate the page cache, removing the pages that are between
 * specified offsets (and zeroing out partial pages
 * if lstart or lend + 1 is not page aligned).
 *
 * Truncate takes two passes - the first pass is nonblocking.  It will not
 * block on page locks and it will not block on writeback.  The second pass
 * will wait.  This is to prevent as much IO as possible in the affected region.
 * The first pass will remove most pages, so the search cost of the second pass
 * is low.
 *
 * We pass down the cache-hot hint to the page freeing code.  Even if the
 * mapping is large, it is probably the case that the final pages are the most
 * recently touched, and freeing happens in ascending file offset order.
 *
 * Note that since ->invalidatepage() accepts range to invalidate
 * truncate_inode_pages_range is able to handle cases where lend + 1 is not
 * page aligned properly.
 */
void truncate_inode_pages_range(struct address_space *mapping,
                                loff_t lstart, loff_t lend)
{
    pgoff_t		start;		/* inclusive */
    pgoff_t		end;		/* exclusive */
    unsigned int	partial_start;	/* inclusive */
    unsigned int	partial_end;	/* exclusive */
    struct pagevec	pvec;
    pgoff_t		indices[PAGEVEC_SIZE];
    pgoff_t		index;
    int		i;

    cleancache_invalidate_inode(mapping);
    if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
        return;

    /* Offsets within partial pages */
    partial_start = lstart & (PAGE_CACHE_SIZE - 1);
    partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);

    /*
     * 'start' and 'end' always covers the range of pages to be fully
     * truncated. Partial pages are covered with 'partial_start' at the
     * start of the range and 'partial_end' at the end of the range.
     * Note that 'end' is exclusive while 'lend' is inclusive.
     */
    start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
    if (lend == -1)
        /*
         * lend == -1 indicates end-of-file so we have to set 'end'
         * to the highest possible pgoff_t and since the type is
         * unsigned we're using -1.
         */
        end = -1;
    else
        end = (lend + 1) >> PAGE_CACHE_SHIFT;

    pagevec_init(&pvec, 0);
    index = start;
    while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
            min(end - index, (pgoff_t)PAGEVEC_SIZE),
            indices)) {
        for (i = 0; i < pagevec_count(&pvec); i++) {
            struct page *page = pvec.pages[i];

            /* We rely upon deletion not changing page->index */
            index = indices[i];
            if (index >= end)
                break;

            if (radix_tree_exceptional_entry(page)) {
                clear_exceptional_entry(mapping, index, page);
                continue;
            }

            if (!trylock_page(page))
                continue;
            WARN_ON(page->index != index);
            if (PageWriteback(page)) {
                unlock_page(page);
                continue;
            }
            truncate_inode_page(mapping, page);
            unlock_page(page);
        }
        pagevec_remove_exceptionals(&pvec);
        pagevec_release(&pvec);
        cond_resched();
        index++;
    }

    if (partial_start) {
        struct page *page = find_lock_page(mapping, start - 1);
        if (page) {
            unsigned int top = PAGE_CACHE_SIZE;
            if (start > end) {
                /* Truncation within a single page */
                top = partial_end;
                partial_end = 0;
            }
            wait_on_page_writeback(page);
            zero_user_segment(page, partial_start, top);
            cleancache_invalidate_page(mapping, page);
            if (page_has_private(page))
                do_invalidatepage(page, partial_start,
                                  top - partial_start);
            unlock_page(page);
            page_cache_release(page);
        }
    }
    if (partial_end) {
        struct page *page = find_lock_page(mapping, end);
        if (page) {
            wait_on_page_writeback(page);
            zero_user_segment(page, 0, partial_end);
            cleancache_invalidate_page(mapping, page);
            if (page_has_private(page))
                do_invalidatepage(page, 0,
                                  partial_end);
            unlock_page(page);
            page_cache_release(page);
        }
    }
    /*
     * If the truncation happened within a single page no pages
     * will be released, just zeroed, so we can bail out now.
     */
    if (start >= end)
        return;

    index = start;
    for ( ; ; ) {
        cond_resched();
        if (!pagevec_lookup_entries(&pvec, mapping, index,
                                    min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) {
            /* If all gone from start onwards, we're done */
            if (index == start)
                break;
            /* Otherwise restart to make sure all gone */
            index = start;
            continue;
        }
        if (index == start && indices[0] >= end) {
            /* All gone out of hole to be punched, we're done */
            pagevec_remove_exceptionals(&pvec);
            pagevec_release(&pvec);
            break;
        }
        for (i = 0; i < pagevec_count(&pvec); i++) {
            struct page *page = pvec.pages[i];

            /* We rely upon deletion not changing page->index */
            index = indices[i];
            if (index >= end) {
                /* Restart punch to make sure all gone */
                index = start - 1;
                break;
            }

            if (radix_tree_exceptional_entry(page)) {
                clear_exceptional_entry(mapping, index, page);
                continue;
            }

            lock_page(page);
            WARN_ON(page->index != index);
            wait_on_page_writeback(page);
            truncate_inode_page(mapping, page);
            unlock_page(page);
        }
        pagevec_remove_exceptionals(&pvec);
        pagevec_release(&pvec);
        index++;
    }
    cleancache_invalidate_inode(mapping);
}
Exemple #25
0
/*
 * Inspired by write_cache_pages from /mm/page-writeback.c
 */
static int ecryptfs_writepages(struct address_space *mapping,
			       struct writeback_control *wbc)
{
	int ret = 0;
	int done = 0;
	struct pagevec pvec;
	int nr_pages;
	pgoff_t uninitialized_var(writeback_index);
	pgoff_t index;
	pgoff_t end;		/* Inclusive */
	pgoff_t done_index;
	int cycled;
	int range_whole = 0;
	int tag;
	struct page **pgs;
	int pgidx;
	
	/* printk("[g-ecryptfs] Info: call writepages\n"); */

	pgs = kmalloc(sizeof(struct page*)*PAGEVEC_SIZE, GFP_KERNEL);
	if (!pgs) {
		printk("[g-ecryptfs] Error: pgs alloc failed!\n");
		return -EFAULT;
	}

	pagevec_init(&pvec, 0);
	if (wbc->range_cyclic) {
		writeback_index = mapping->writeback_index; /* prev offset */
		index = writeback_index;
		if (index == 0)
			cycled = 1;
		else
			cycled = 0;
		end = -1;
	} else {
		index = wbc->range_start >> PAGE_CACHE_SHIFT;
		end = wbc->range_end >> PAGE_CACHE_SHIFT;
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			range_whole = 1;
		cycled = 1; /* ignore range_cyclic tests */
	}
	if (wbc->sync_mode == WB_SYNC_ALL)
		tag = PAGECACHE_TAG_TOWRITE;
	else
		tag = PAGECACHE_TAG_DIRTY;
retry:
	if (wbc->sync_mode == WB_SYNC_ALL)
		tag_pages_for_writeback(mapping, index, end);
	done_index = index;
	while (!done && (index <= end)) {
		int i;
		struct page *page;

		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
			      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
		if (nr_pages == 0)
			break;
		pgidx = 0;

		for (i = 0; i < nr_pages; i++) {
			page = pvec.pages[i];

			/*
			 * At this point, the page may be truncated or
			 * invalidated (changing page->mapping to NULL), or
			 * even swizzled back from swapper_space to tmpfs file
			 * mapping. However, page->index will not change
			 * because we have a reference on the page.
			 */
			if (page->index > end) {
				/*
				 * can't be range_cyclic (1st pass) because
				 * end == -1 in that case.
				 */
				done = 1;
				break;
			}

			done_index = page->index + 1;

			lock_page(page);

			/*
			 * Page truncated or invalidated. We can freely skip it
			 * then, even for data integrity operations: the page
			 * has disappeared concurrently, so there could be no
			 * real expectation of this data interity operation
			 * even if there is now a new, dirty page at the same
			 * pagecache address.
			 */
			if (unlikely(page->mapping != mapping)) {
continue_unlock:
				unlock_page(page);
				continue;
			}

			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

			if (PageWriteback(page)) {
				if (wbc->sync_mode != WB_SYNC_NONE)
					wait_on_page_writeback(page);
				else
					goto continue_unlock;
			}

			BUG_ON(PageWriteback(page));
			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

			pgs[pgidx++] = page;
		}

		/*trace_wbc_writepage(wbc, mapping->backing_dev_info);*/
		ret = ecryptfs_encrypt_pages(pgs, pgidx);
		//printk("[g-ecryptfs] Info: enc %d pages in writepages\n", pgidx);
		mapping_set_error(mapping, ret);

		for (i = 0; i < nr_pages; i++) {
			page = pvec.pages[i];

			if (unlikely(ret)) {
				if (ret == AOP_WRITEPAGE_ACTIVATE) {
					if (PageLocked(page))
						unlock_page(page);
					ret = 0;
				} else {
					/*
					 * done_index is set past this page,
					 * so media errors will not choke
					 * background writeout for the entire
					 * file. This has consequences for
					 * range_cyclic semantics (ie. it may
					 * not be suitable for data integrity
					 * writeout).
					 */
					done = 1;
					break;
				}
			}

			/*
			 * We stop writing back only if we are not doing
			 * integrity sync. In case of integrity sync we have to
			 * keep going until we have written all the pages
			 * we tagged for writeback prior to entering this loop.
			 */
			if (--wbc->nr_to_write <= 0 &&
			    wbc->sync_mode == WB_SYNC_NONE) {
				done = 1;
				break;
			}
		}
		pagevec_release(&pvec);
		cond_resched();
	}
	if (!cycled && !done) {
		/*
		 * range_cyclic:
		 * We hit the last page and there is more work to be done: wrap
		 * back to the start of the file
		 */
		cycled = 1;
		index = 0;
		end = writeback_index - 1;
		goto retry;
	}
	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
		mapping->writeback_index = done_index;

	kfree(pgs);

	return ret;
}
/**ltl
功能:遍历给定地址空间的"脏"页面,写这些页面
参数:
返回值:
说明:
*/
int write_cache_pages(struct address_space *mapping,
		      struct writeback_control *wbc, writepage_t writepage,
		      void *data)
{
	int ret = 0;
	int done = 0;
	struct pagevec pvec;
	int nr_pages;
	pgoff_t uninitialized_var(writeback_index);
	pgoff_t index;/*要冲刷页面的索引*/
	pgoff_t end;		/* Inclusive *//*冲刷最后一个页面的索引,-1表示要循环冲刷*/
	pgoff_t done_index;
	int cycled;/*主要用在回绕需要分成两段进行冲刷的情况下,为1表示前一段冲刷已经完成。*/
	int range_whole = 0;
	int tag;

	pagevec_init(&pvec, 0);
	if (wbc->range_cyclic)
	{//是否要循环进行冲刷
		writeback_index = mapping->writeback_index; /* prev offset */
		index = writeback_index;
		if (index == 0)
			cycled = 1;
		else
			cycled = 0;
		end = -1;
	}
	else 
	{
		index = wbc->range_start >> PAGE_CACHE_SHIFT;
		end = wbc->range_end >> PAGE_CACHE_SHIFT;
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			range_whole = 1;
		cycled = 1; /* ignore range_cyclic tests */
	}
	if (wbc->sync_mode == WB_SYNC_ALL)
		tag = PAGECACHE_TAG_TOWRITE;
	else
		tag = PAGECACHE_TAG_DIRTY;
retry:
	if (wbc->sync_mode == WB_SYNC_ALL)
		tag_pages_for_writeback(mapping, index, end);
	done_index = index;
	
	while (!done && (index <= end)) 
	{
		int i;
		/*在地址空间中查找设备了PAGECACHE_TAG_DIRTY标志的页面,将结果保存在pagevec中*/
		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
			      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
		if (nr_pages == 0)
			break;
		//对找到的页面进行处理。
		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];

			/*
			 * At this point, the page may be truncated or
			 * invalidated (changing page->mapping to NULL), or
			 * even swizzled back from swapper_space to tmpfs file
			 * mapping. However, page->index will not change
			 * because we have a reference on the page.
			 */
			if (page->index > end) {
				/*
				 * can't be range_cyclic (1st pass) because
				 * end == -1 in that case.
				 */
				done = 1;
				break;
			}

			done_index = page->index + 1;
			//页面加锁
			lock_page(page);

			/*
			 * Page truncated or invalidated. We can freely skip it
			 * then, even for data integrity operations: the page
			 * has disappeared concurrently, so there could be no
			 * real expectation of this data interity operation
			 * even if there is now a new, dirty page at the same
			 * pagecache address.
			 */
			 /*由于在加锁过程中可能其它进程对页面做过改动,因此要做以下判断*/
			if (unlikely(page->mapping != mapping)) {//页面无效
continue_unlock:
				unlock_page(page);
				continue;
			}

			if (!PageDirty(page)) {//页面回写完成,I_DIRTY标志已经清除。
				/* someone wrote it for us */
				goto continue_unlock;
			}

			if (PageWriteback(page)) {//页面正在回写中,那要根据sync_mode采取策略
				if (wbc->sync_mode != WB_SYNC_NONE)
					wait_on_page_writeback(page);//要等待正在回写完成后才继续
				else
					goto continue_unlock;
			}

			BUG_ON(PageWriteback(page));
			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

			trace_wbc_writepage(wbc, mapping->backing_dev_info);
			//开始回写"脏"页面
			ret = (*writepage)(page, wbc, data);
			if (unlikely(ret)) {
				if (ret == AOP_WRITEPAGE_ACTIVATE) {
					unlock_page(page);
					ret = 0;
				} else {
					/*
					 * done_index is set past this page,
					 * so media errors will not choke
					 * background writeout for the entire
					 * file. This has consequences for
					 * range_cyclic semantics (ie. it may
					 * not be suitable for data integrity
					 * writeout).
					 */
					done = 1;
					break;
				}
			}

			/*
			 * We stop writing back only if we are not doing
			 * integrity sync. In case of integrity sync we have to
			 * keep going until we have written all the pages
			 * we tagged for writeback prior to entering this loop.
			 */
			/*页面写成功后,递减计数器*/
			if (--wbc->nr_to_write <= 0 &&
			    wbc->sync_mode == WB_SYNC_NONE) {
				done = 1;
				break;
			}
		}
		pagevec_release(&pvec);
		cond_resched();
	}
	if (!cycled && !done) {
		/*
		 * range_cyclic:
		 * We hit the last page and there is more work to be done: wrap
		 * back to the start of the file
		 */
		cycled = 1;
		index = 0;
		end = writeback_index - 1;
		goto retry;
	}
	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
		mapping->writeback_index = done_index;

	return ret;
}
/*
 * when btree blocks are allocated, they have some corresponding bits set for
 * them in one of two extent_io trees.  This is used to make sure all of
 * those extents are on disk for transaction or log commit
 */
int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
					struct extent_io_tree *dirty_pages)
{
	int ret;
	int err = 0;
	int werr = 0;
	struct page *page;
	struct inode *btree_inode = root->fs_info->btree_inode;
	u64 start = 0;
	u64 end;
	unsigned long index;

	while (1) {
		ret = find_first_extent_bit(dirty_pages, start, &start, &end,
					    EXTENT_DIRTY);
		if (ret)
			break;
		while (start <= end) {
			cond_resched();

			index = start >> PAGE_CACHE_SHIFT;
			start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
			page = find_get_page(btree_inode->i_mapping, index);
			if (!page)
				continue;

			btree_lock_page_hook(page);
			if (!page->mapping) {
				unlock_page(page);
				page_cache_release(page);
				continue;
			}

			if (PageWriteback(page)) {
				if (PageDirty(page))
					wait_on_page_writeback(page);
				else {
					unlock_page(page);
					page_cache_release(page);
					continue;
				}
			}
			err = write_one_page(page, 0);
			if (err)
				werr = err;
			page_cache_release(page);
		}
	}
	while (1) {
		ret = find_first_extent_bit(dirty_pages, 0, &start, &end,
					    EXTENT_DIRTY);
		if (ret)
			break;

		clear_extent_dirty(dirty_pages, start, end, GFP_NOFS);
		while (start <= end) {
			index = start >> PAGE_CACHE_SHIFT;
			start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
			page = find_get_page(btree_inode->i_mapping, index);
			if (!page)
				continue;
			if (PageDirty(page)) {
				btree_lock_page_hook(page);
				wait_on_page_writeback(page);
				err = write_one_page(page, 0);
				if (err)
					werr = err;
			}
			wait_on_page_writeback(page);
			page_cache_release(page);
			cond_resched();
		}
	}
	if (err)
		werr = err;
	return werr;
}
Exemple #28
0
static int f2fs_write_data_page(struct page *page,
                                struct writeback_control *wbc)
{
    struct inode *inode = page->mapping->host;
    struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
    loff_t i_size = i_size_read(inode);
    const pgoff_t end_index = ((unsigned long long) i_size)
                              >> PAGE_CACHE_SHIFT;
    unsigned offset = 0;
    bool need_balance_fs = false;
    int err = 0;
    struct f2fs_io_info fio = {
        .sbi = sbi,
        .type = DATA,
        .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
        .page = page,
        .encrypted_page = NULL,
    };

    trace_f2fs_writepage(page, DATA);

    if (page->index < end_index)
        goto write;

    /*
     * If the offset is out-of-range of file size,
     * this page does not have to be written to disk.
     */
    offset = i_size & (PAGE_CACHE_SIZE - 1);
    if ((page->index >= end_index + 1) || !offset)
        goto out;

    zero_user_segment(page, offset, PAGE_CACHE_SIZE);
write:
    if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
        goto redirty_out;
    if (f2fs_is_drop_cache(inode))
        goto out;
    if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim &&
            available_free_memory(sbi, BASE_CHECK))
        goto redirty_out;

    /* Dentry blocks are controlled by checkpoint */
    if (S_ISDIR(inode->i_mode)) {
        if (unlikely(f2fs_cp_error(sbi)))
            goto redirty_out;
        err = do_write_data_page(&fio);
        goto done;
    }

    /* we should bypass data pages to proceed the kworkder jobs */
    if (unlikely(f2fs_cp_error(sbi))) {
        SetPageError(page);
        goto out;
    }

    if (!wbc->for_reclaim)
        need_balance_fs = true;
    else if (has_not_enough_free_secs(sbi, 0))
        goto redirty_out;

    err = -EAGAIN;
    f2fs_lock_op(sbi);
    if (f2fs_has_inline_data(inode))
        err = f2fs_write_inline_data(inode, page);
    if (err == -EAGAIN)
        err = do_write_data_page(&fio);
    f2fs_unlock_op(sbi);
done:
    if (err && err != -ENOENT)
        goto redirty_out;

    clear_cold_data(page);
out:
    inode_dec_dirty_pages(inode);
    if (err)
        ClearPageUptodate(page);
    unlock_page(page);
    if (need_balance_fs)
        f2fs_balance_fs(sbi);
    if (wbc->for_reclaim)
        f2fs_submit_merged_bio(sbi, DATA, WRITE);
    return 0;

redirty_out:
    redirty_page_for_writepage(wbc, page);
    return AOP_WRITEPAGE_ACTIVATE;
}

static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
                            void *data)
{
    struct address_space *mapping = data;
    int ret = mapping->a_ops->writepage(page, wbc);
    mapping_set_error(mapping, ret);
    return ret;
}

/*
 * This function was copied from write_cche_pages from mm/page-writeback.c.
 * The major change is making write step of cold data page separately from
 * warm/hot data page.
 */
static int f2fs_write_cache_pages(struct address_space *mapping,
                                  struct writeback_control *wbc, writepage_t writepage,
                                  void *data)
{
    int ret = 0;
    int done = 0;
    struct pagevec pvec;
    int nr_pages;
    pgoff_t uninitialized_var(writeback_index);
    pgoff_t index;
    pgoff_t end;		/* Inclusive */
    pgoff_t done_index;
    int cycled;
    int range_whole = 0;
    int tag;
    int step = 0;

    pagevec_init(&pvec, 0);
next:
    if (wbc->range_cyclic) {
        writeback_index = mapping->writeback_index; /* prev offset */
        index = writeback_index;
        if (index == 0)
            cycled = 1;
        else
            cycled = 0;
        end = -1;
    } else {
        index = wbc->range_start >> PAGE_CACHE_SHIFT;
        end = wbc->range_end >> PAGE_CACHE_SHIFT;
        if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
            range_whole = 1;
        cycled = 1; /* ignore range_cyclic tests */
    }
    if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
        tag = PAGECACHE_TAG_TOWRITE;
    else
        tag = PAGECACHE_TAG_DIRTY;
retry:
    if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
        tag_pages_for_writeback(mapping, index, end);
    done_index = index;
    while (!done && (index <= end)) {
        int i;

        nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
                                      min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1);
        if (nr_pages == 0)
            break;

        for (i = 0; i < nr_pages; i++) {
            struct page *page = pvec.pages[i];

            if (page->index > end) {
                done = 1;
                break;
            }

            done_index = page->index;

            lock_page(page);

            if (unlikely(page->mapping != mapping)) {
continue_unlock:
                unlock_page(page);
                continue;
            }

            if (!PageDirty(page)) {
                /* someone wrote it for us */
                goto continue_unlock;
            }

            if (step == is_cold_data(page))
                goto continue_unlock;

            if (PageWriteback(page)) {
                if (wbc->sync_mode != WB_SYNC_NONE)
                    f2fs_wait_on_page_writeback(page, DATA);
                else
                    goto continue_unlock;
            }

            BUG_ON(PageWriteback(page));
            if (!clear_page_dirty_for_io(page))
                goto continue_unlock;

            ret = (*writepage)(page, wbc, data);
            if (unlikely(ret)) {
                if (ret == AOP_WRITEPAGE_ACTIVATE) {
                    unlock_page(page);
                    ret = 0;
                } else {
                    done_index = page->index + 1;
                    done = 1;
                    break;
                }
            }

            if (--wbc->nr_to_write <= 0 &&
                    wbc->sync_mode == WB_SYNC_NONE) {
                done = 1;
                break;
            }
        }
        pagevec_release(&pvec);
        cond_resched();
    }

    if (step < 1) {
        step++;
        goto next;
    }

    if (!cycled && !done) {
        cycled = 1;
        index = 0;
        end = writeback_index - 1;
        goto retry;
    }
    if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
        mapping->writeback_index = done_index;

    return ret;
}
static int gfs2_write_jdata_pagevec(struct address_space *mapping,
				    struct writeback_control *wbc,
				    struct pagevec *pvec,
				    int nr_pages, pgoff_t end)
{
	struct inode *inode = mapping->host;
	struct gfs2_sbd *sdp = GFS2_SB(inode);
	loff_t i_size = i_size_read(inode);
	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
	unsigned offset = i_size & (PAGE_CACHE_SIZE-1);
	unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
	struct backing_dev_info *bdi = mapping->backing_dev_info;
	int i;
	int ret;

	ret = gfs2_trans_begin(sdp, nrblocks, nrblocks);
	if (ret < 0)
		return ret;

	for(i = 0; i < nr_pages; i++) {
		struct page *page = pvec->pages[i];

		lock_page(page);

		if (unlikely(page->mapping != mapping)) {
			unlock_page(page);
			continue;
		}

		if (!wbc->range_cyclic && page->index > end) {
			ret = 1;
			unlock_page(page);
			continue;
		}

		if (wbc->sync_mode != WB_SYNC_NONE)
			wait_on_page_writeback(page);

		if (PageWriteback(page) ||
		    !clear_page_dirty_for_io(page)) {
			unlock_page(page);
			continue;
		}

		/* Is the page fully outside i_size? (truncate in progress) */
		if (page->index > end_index || (page->index == end_index && !offset)) {
			page->mapping->a_ops->invalidatepage(page, 0);
			unlock_page(page);
			continue;
		}

		ret = __gfs2_jdata_writepage(page, wbc);

		if (ret || (--(wbc->nr_to_write) <= 0))
			ret = 1;
		if (wbc->nonblocking && bdi_write_congested(bdi)) {
			wbc->encountered_congestion = 1;
			ret = 1;
		}

	}
	gfs2_trans_end(sdp);
	return ret;
}
Exemple #30
0
int ext4_bio_write_page(struct ext4_io_submit *io,
			struct page *page,
			int len,
			struct writeback_control *wbc)
{
	struct inode *inode = page->mapping->host;
	unsigned block_start, block_end, blocksize;
	struct ext4_io_page *io_page;
	struct buffer_head *bh, *head;
	int ret = 0;

	blocksize = 1 << inode->i_blkbits;

	BUG_ON(!PageLocked(page));
	BUG_ON(PageWriteback(page));

	io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
	if (!io_page) {
		set_page_dirty(page);
		unlock_page(page);
		return -ENOMEM;
	}
	io_page->p_page = page;
	atomic_set(&io_page->p_count, 1);
	get_page(page);
	set_page_writeback(page);
	ClearPageError(page);

	for (bh = head = page_buffers(page), block_start = 0;
	     bh != head || !block_start;
	     block_start = block_end, bh = bh->b_this_page) {

		block_end = block_start + blocksize;
		if (block_start >= len) {
			/*
			 * Comments copied from block_write_full_page_endio:
			 *
			 * The page straddles i_size.  It must be zeroed out on
			 * each and every writepage invocation because it may
			 * be mmapped.  "A file is mapped in multiples of the
			 * page size.  For a file that is not a multiple of
			 * the  page size, the remaining memory is zeroed when
			 * mapped, and writes to that region are not written
			 * out to the file."
			 */
			zero_user_segment(page, block_start, block_end);
			clear_buffer_dirty(bh);
			set_buffer_uptodate(bh);
			continue;
		}
		clear_buffer_dirty(bh);
		ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
		if (ret) {
			set_page_dirty(page);
			break;
		}
	}
	unlock_page(page);
	put_io_page(io_page);
	return ret;
}