Example #1
0
static struct page *init_inode_metadata(struct inode *inode,
		struct inode *dir, const struct qstr *name)
{
	struct page *page;
	int err;

	if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
		page = new_inode_page(inode, name);
		if (IS_ERR(page))
			return page;

		if (S_ISDIR(inode->i_mode)) {
			err = make_empty_dir(inode, dir, page);
			if (err)
				goto error;
		}

		err = f2fs_init_acl(inode, dir, page);
		if (err)
			goto put_error;

		err = f2fs_init_security(inode, dir, name, page);
		if (err)
			goto put_error;

		wait_on_page_writeback(page);
	} else {
		page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
		if (IS_ERR(page))
			return page;

		wait_on_page_writeback(page);
		set_cold_node(inode, page);
	}

	init_dent_inode(name, page);

	/*
	 * This file should be checkpointed during fsync.
	 * We lost i_pino from now on.
	 */
	if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
		file_lost_pino(inode);
		inc_nlink(inode);
	}
	return page;

put_error:
	f2fs_put_page(page, 1);
error:
	remove_inode_page(inode);
	return ERR_PTR(err);
}
Example #2
0
/*
 * It only removes the dentry from the dentry page,corresponding name
 * entry in name page does not need to be touched during deletion.
 */
void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
						struct inode *inode)
{
	struct	f2fs_dentry_block *dentry_blk;
	unsigned int bit_pos;
	struct address_space *mapping = page->mapping;
	struct inode *dir = mapping->host;
	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
	int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
	void *kaddr = page_address(page);
	int i;

	lock_page(page);
	wait_on_page_writeback(page);

	dentry_blk = (struct f2fs_dentry_block *)kaddr;
	bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
	for (i = 0; i < slots; i++)
		test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);

	/* Let's check and deallocate this dentry page */
	bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
			NR_DENTRY_IN_BLOCK,
			0);
	kunmap(page); /* kunmap - pair of f2fs_find_entry */
	set_page_dirty(page);

	dir->i_ctime = dir->i_mtime = CURRENT_TIME;

	if (inode && S_ISDIR(inode->i_mode)) {
		drop_nlink(dir);
		update_inode_page(dir);
	} else {
		mark_inode_dirty(dir);
	}

	if (inode) {
		inode->i_ctime = CURRENT_TIME;
		drop_nlink(inode);
		if (S_ISDIR(inode->i_mode)) {
			drop_nlink(inode);
			i_size_write(inode, 0);
		}
		update_inode_page(inode);

		if (inode->i_nlink == 0)
			add_orphan_inode(sbi, inode->i_ino);
		else
			release_orphan_inode(sbi);
	}

	if (bit_pos == NR_DENTRY_IN_BLOCK) {
		truncate_hole(dir, page->index, page->index + 1);
		clear_page_dirty_for_io(page);
		ClearPageUptodate(page);
		dec_page_count(sbi, F2FS_DIRTY_DENTS);
		inode_dec_dirty_dents(dir);
	}
	f2fs_put_page(page, 1);
}
Example #3
0
/**
 * invalidate_inode_pages2 - remove all unmapped pages from an address_space
 * @mapping - the address_space
 *
 * invalidate_inode_pages2() is like truncate_inode_pages(), except for the case
 * where the page is seen to be mapped into process pagetables.  In that case,
 * the page is marked clean but is left attached to its address_space.
 *
 * The page is also marked not uptodate so that a subsequent pagefault will
 * perform I/O to bringthe page's contents back into sync with its backing
 * store.
 *
 * FIXME: invalidate_inode_pages2() is probably trivially livelockable.
 */
void invalidate_inode_pages2(struct address_space *mapping)
{
	struct pagevec pvec;
	pgoff_t next = 0;
	int i;

	pagevec_init(&pvec, 0);
	while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
		for (i = 0; i < pagevec_count(&pvec); i++) {
			struct page *page = pvec.pages[i];

			lock_page(page);
			if (page->mapping == mapping) {	/* truncate race? */
				wait_on_page_writeback(page);
				next = page->index + 1;
				if (page_mapped(page)) {
					clear_page_dirty(page);
					ClearPageUptodate(page);
				} else {
					if (!invalidate_complete_page(mapping,
								      page)) {
						clear_page_dirty(page);
						ClearPageUptodate(page);
					}
				}
			}
			unlock_page(page);
		}
		pagevec_release(&pvec);
		cond_resched();
	}
}
/*
 * Attempt to steal a page from a pipe buffer. This should perhaps go into
 * a vm helper function, it's already simplified quite a bit by the
 * addition of remove_mapping(). If success is returned, the caller may
 * attempt to reuse this page for another destination.
 */
static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
				     struct pipe_buffer *buf)
{
	struct page *page = buf->page;
	struct address_space *mapping = page_mapping(page);

	lock_page(page);

	WARN_ON(!PageUptodate(page));

	/*
	 * At least for ext2 with nobh option, we need to wait on writeback
	 * completing on this page, since we'll remove it from the pagecache.
	 * Otherwise truncate wont wait on the page, allowing the disk
	 * blocks to be reused by someone else before we actually wrote our
	 * data to them. fs corruption ensues.
	 */
	wait_on_page_writeback(page);

	if (PagePrivate(page))
		try_to_release_page(page, mapping_gfp_mask(mapping));

	if (!remove_mapping(mapping, page)) {
		unlock_page(page);
		return 1;
	}

	buf->flags |= PIPE_BUF_FLAG_LRU;
	return 0;
}
Example #5
0
static int gfs2_write_jdata_pagevec(struct address_space *mapping,
				    struct writeback_control *wbc,
				    struct pagevec *pvec,
				    int nr_pages, pgoff_t end)
{
	struct inode *inode = mapping->host;
	struct gfs2_sbd *sdp = GFS2_SB(inode);
	loff_t i_size = i_size_read(inode);
	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
	unsigned offset = i_size & (PAGE_CACHE_SIZE-1);
	unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
	int i;
	int ret;

	ret = gfs2_trans_begin(sdp, nrblocks, nrblocks);
	if (ret < 0)
		return ret;

	for(i = 0; i < nr_pages; i++) {
		struct page *page = pvec->pages[i];

		lock_page(page);

		if (unlikely(page->mapping != mapping)) {
			unlock_page(page);
			continue;
		}

		if (!wbc->range_cyclic && page->index > end) {
			ret = 1;
			unlock_page(page);
			continue;
		}

		if (wbc->sync_mode != WB_SYNC_NONE)
			wait_on_page_writeback(page);

		if (PageWriteback(page) ||
		    !clear_page_dirty_for_io(page)) {
			unlock_page(page);
			continue;
		}

		/* Is the page fully outside i_size? (truncate in progress) */
		if (page->index > end_index || (page->index == end_index && !offset)) {
			page->mapping->a_ops->invalidatepage(page, 0,
							     PAGE_CACHE_SIZE);
			unlock_page(page);
			continue;
		}

		ret = __gfs2_jdata_writepage(page, wbc);

		if (ret || (--(wbc->nr_to_write) <= 0))
			ret = 1;
	}
	gfs2_trans_end(sdp);
	return ret;
}
static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct page *page = vmf->page;
	struct inode *inode = vma->vm_file->f_dentry->d_inode;
	struct nilfs_transaction_info ti;
	int ret;

	if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info)))
		return VM_FAULT_SIGBUS; 

	lock_page(page);
	if (page->mapping != inode->i_mapping ||
	    page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) {
		unlock_page(page);
		return VM_FAULT_NOPAGE; 
	}

	if (PageMappedToDisk(page))
		goto mapped;

	if (page_has_buffers(page)) {
		struct buffer_head *bh, *head;
		int fully_mapped = 1;

		bh = head = page_buffers(page);
		do {
			if (!buffer_mapped(bh)) {
				fully_mapped = 0;
				break;
			}
		} while (bh = bh->b_this_page, bh != head);

		if (fully_mapped) {
			SetPageMappedToDisk(page);
			goto mapped;
		}
	}
	unlock_page(page);

	ret = nilfs_transaction_begin(inode->i_sb, &ti, 1);
	
	if (unlikely(ret))
		return VM_FAULT_SIGBUS;

	ret = block_page_mkwrite(vma, vmf, nilfs_get_block);
	if (ret != VM_FAULT_LOCKED) {
		nilfs_transaction_abort(inode->i_sb);
		return ret;
	}
	nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits));
	nilfs_transaction_commit(inode->i_sb);

 mapped:
	wait_on_page_writeback(page);
	return VM_FAULT_LOCKED;
}
/* added: begin address space operations definitions */
static int wrapfs_writepage(struct page *page, struct writeback_control *wbc){
    int err = -EIO;
    struct inode *inode;
    struct inode *lower_inode;
    struct page *lower_page;
    struct address_space *lower_mapping; /* lower inode mapping */
    gfp_t mask;

    BUG_ON(!PageUptodate(page));
    inode = page->mapping->host;
    if (!inode || !WRAPFS_I(inode)){
        err = 0;
        goto out;
    }
    lower_inode = wrapfs_lower_inode(inode);
    lower_mapping = lower_inode->i_mapping;
    mask = mapping_gfp_mask(lower_mapping) & ~(__GFP_FS);
    lower_page = find_or_create_page(lower_mapping, page->index, mask);

    if (!lower_page) {
        err = 0;
        set_page_dirty(page);
        goto out;
    }

    copy_highpage(lower_page, page);
    flush_dcache_page(lower_page);
    SetPageUptodate(lower_page);
    set_page_dirty(lower_page);

    if (wbc->for_reclaim) {
        unlock_page(lower_page);
        goto out_release;
    }

    BUG_ON(!lower_mapping->a_ops->writepage);
    wait_on_page_writeback(lower_page); /* prevent multiple writers */
    clear_page_dirty_for_io(lower_page); /* emulate VFS behavior */
    err = lower_mapping->a_ops->writepage(lower_page, wbc);
    if (err < 0)
        goto out_release;

    if (err == AOP_WRITEPAGE_ACTIVATE) {
         err = 0;
         unlock_page(lower_page);
    }

    fsstack_copy_attr_times(inode, lower_inode);

out_release:
    page_cache_release(lower_page);

out:
    unlock_page(page);
    return err;
}
Example #8
0
void reiser4_wait_page_writeback(struct page *page)
{
	assert("zam-783", PageLocked(page));

	do {
		unlock_page(page);
		wait_on_page_writeback(page);
		lock_page(page);
	} while (PageWriteback(page));
}
Example #9
0
int btrfs_wait_marked_extents(struct btrfs_root *root,
                              struct extent_io_tree *dirty_pages, int mark)
{
    int ret;
    int err = 0;
    int werr = 0;
    struct page *page;
    struct inode *btree_inode = root->fs_info->btree_inode;
    u64 start = 0;
    u64 end;
    unsigned long index;

    while (1) {
        ret = find_first_extent_bit(dirty_pages, start, &start, &end,
                                    mark);
        if (ret)
            break;

        clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
        while (start <= end) {
            index = start >> PAGE_CACHE_SHIFT;
            start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
            page = find_get_page(btree_inode->i_mapping, index);
            if (!page)
                continue;
            if (PageDirty(page)) {
                btree_lock_page_hook(page);
                wait_on_page_writeback(page);
                err = write_one_page(page, 0);
                if (err)
                    werr = err;
            }
            wait_on_page_writeback(page);
            page_cache_release(page);
            cond_resched();
        }
    }
    if (err)
        werr = err;
    return werr;
}
Example #10
0
void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
		struct page *page, struct inode *inode)
{
	lock_page(page);
	wait_on_page_writeback(page);
	de->ino = cpu_to_le32(inode->i_ino);
	set_de_type(de, inode);
	kunmap(page);
	set_page_dirty(page);
	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
	mark_inode_dirty(dir);

	f2fs_put_page(page, 1);
}
Example #11
0
File: dir.c Project: mbgg/linux
void init_dent_inode(const struct qstr *name, struct page *ipage)
{
	struct f2fs_node *rn;

	if (IS_ERR(ipage))
		return;

	wait_on_page_writeback(ipage);

	/* copy name info. to this inode page */
	rn = (struct f2fs_node *)page_address(ipage);
	rn->i.i_namelen = cpu_to_le32(name->len);
	memcpy(rn->i.i_name, name->name, name->len);
	set_page_dirty(ipage);
}
Example #12
0
void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
		struct page *page, struct inode *inode)
{
	lock_page(page);
	wait_on_page_writeback(page);
	de->ino = cpu_to_le32(inode->i_ino);
	set_de_type(de, inode);
	kunmap(page);
	set_page_dirty(page);
	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
	mark_inode_dirty(dir);

	/* update parent inode number before releasing dentry page */
	F2FS_I(inode)->i_pino = dir->i_ino;

	f2fs_put_page(page, 1);
}
Example #13
0
/*
 * Attempt to steal a page from a pipe buffer. This should perhaps go into
 * a vm helper function, it's already simplified quite a bit by the
 * addition of remove_mapping(). If success is returned, the caller may
 * attempt to reuse this page for another destination.
 */
static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
				     struct pipe_buffer *buf)
{
	struct page *page = buf->page;
	struct address_space *mapping;

	lock_page(page);

	mapping = page_mapping(page);
	if (mapping) {
		WARN_ON(!PageUptodate(page));

		/*
		 * At least for ext2 with nobh option, we need to wait on
		 * writeback completing on this page, since we'll remove it
		 * from the pagecache.  Otherwise truncate wont wait on the
		 * page, allowing the disk blocks to be reused by someone else
		 * before we actually wrote our data to them. fs corruption
		 * ensues.
		 */
		wait_on_page_writeback(page);

		if (PagePrivate(page)
		    && try_to_release_page(page, GFP_KERNEL))
			goto out_unlock;

		/*
		 * If we succeeded in removing the mapping, set LRU flag
		 * and return good.
		 */
		if (remove_mapping(mapping, page)) {
			buf->flags |= PIPE_BUF_FLAG_LRU;
			return 0;
		}
	}

	/*
	 * Raced with truncate or failed to remove page from current
	 * address space, unlock and return failure.
	 */
out_unlock:
	unlock_page(page);
	return 1;
}
void nilfs_btnode_delete(struct buffer_head *bh)
{
	struct address_space *mapping;
	struct page *page = bh->b_page;
	pgoff_t index = page_index(page);
	int still_dirty;

	page_cache_get(page);
	lock_page(page);
	wait_on_page_writeback(page);

	nilfs_forget_buffer(bh);
	still_dirty = PageDirty(page);
	mapping = page->mapping;
	unlock_page(page);
	page_cache_release(page);

	if (!still_dirty && mapping)
		invalidate_inode_pages2_range(mapping, index, index);
}
Example #15
0
/*
 * Notification that a PTE pointing to an NFS page is about to be made
 * writable, implying that someone is about to modify the page through a
 * shared-writable mapping
 */
static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct page *page = vmf->page;
	struct file *filp = vma->vm_file;
	struct inode *inode = file_inode(filp);
	unsigned pagelen;
	int ret = VM_FAULT_NOPAGE;
	struct address_space *mapping;

	dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n",
		filp, filp->f_mapping->host->i_ino,
		(long long)page_offset(page));

	/* make sure the cache has finished storing the page */
	nfs_fscache_wait_on_page_write(NFS_I(inode), page);

	wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING,
			nfs_wait_bit_killable, TASK_KILLABLE);

	lock_page(page);
	mapping = page_file_mapping(page);
	if (mapping != inode->i_mapping)
		goto out_unlock;

	wait_on_page_writeback(page);

	pagelen = nfs_page_length(page);
	if (pagelen == 0)
		goto out_unlock;

	ret = VM_FAULT_LOCKED;
	if (nfs_flush_incompatible(filp, page) == 0 &&
	    nfs_updatepage(filp, page, 0, pagelen) == 0)
		goto out;

	ret = VM_FAULT_SIGBUS;
out_unlock:
	unlock_page(page);
out:
	return ret;
}
Example #16
0
/*
 * Notification that a PTE pointing to an NFS page is about to be made
 * writable, implying that someone is about to modify the page through a
 * shared-writable mapping
 */
static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct page *page = vmf->page;
	struct file *filp = vma->vm_file;
	struct dentry *dentry = filp->f_path.dentry;
	unsigned pagelen;
	int ret = VM_FAULT_NOPAGE;
	struct address_space *mapping;

	dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n",
		dentry->d_parent->d_name.name, dentry->d_name.name,
		filp->f_mapping->host->i_ino,
		(long long)page_offset(page));

	/* make sure the cache has finished storing the page */
	nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page);

	lock_page(page);
	mapping = page_file_mapping(page);
	if (mapping != dentry->d_inode->i_mapping)
		goto out_unlock;

	wait_on_page_writeback(page);

	pagelen = nfs_page_length(page);
	if (pagelen == 0)
		goto out_unlock;

	ret = VM_FAULT_LOCKED;
	if (nfs_flush_incompatible(filp, page) == 0 &&
	    nfs_updatepage(filp, page, 0, pagelen) == 0)
		goto out;

	ret = VM_FAULT_SIGBUS;
out_unlock:
	unlock_page(page);
out:
	return ret;
}
Example #17
0
/**
 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
 * @mapping: address space structure to write
 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
 * @writepage: function called for each page
 * @data: data passed to writepage function
 *
 * If a page is already under I/O, write_cache_pages() skips it, even
 * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
 * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
 * and msync() need to guarantee that all the data which was dirty at the time
 * the call was made get new I/O started against them.  If wbc->sync_mode is
 * WB_SYNC_ALL then we were called for data integrity and we must wait for
 * existing IO to complete.
 */
int write_cache_pages(struct address_space *mapping,
		      struct writeback_control *wbc, writepage_t writepage,
		      void *data)
{
	struct backing_dev_info *bdi = mapping->backing_dev_info;
	int ret = 0;
	int done = 0;
	struct pagevec pvec;
	int nr_pages;
	pgoff_t uninitialized_var(writeback_index);
	pgoff_t index;
	pgoff_t end;		/* Inclusive */
	pgoff_t done_index;
	int cycled;
	int range_whole = 0;

	if (wbc->nonblocking && bdi_write_congested(bdi)) {
		wbc->encountered_congestion = 1;
		return 0;
	}

	pagevec_init(&pvec, 0);
	if (wbc->range_cyclic) {
		writeback_index = mapping->writeback_index; /* prev offset */
		index = writeback_index;
		if (index == 0)
			cycled = 1;
		else
			cycled = 0;
		end = -1;
	} else {
		index = wbc->range_start >> PAGE_CACHE_SHIFT;
		end = wbc->range_end >> PAGE_CACHE_SHIFT;
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			range_whole = 1;
		cycled = 1; /* ignore range_cyclic tests */
	}
retry:
	done_index = index;
	while (!done && (index <= end)) {
		int i;

		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
			      PAGECACHE_TAG_DIRTY,
			      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
		if (nr_pages == 0)
			break;

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];

			/*
			 * At this point, the page may be truncated or
			 * invalidated (changing page->mapping to NULL), or
			 * even swizzled back from swapper_space to tmpfs file
			 * mapping. However, page->index will not change
			 * because we have a reference on the page.
			 */
			if (page->index > end) {
				/*
				 * can't be range_cyclic (1st pass) because
				 * end == -1 in that case.
				 */
				done = 1;
				break;
			}

			done_index = page->index + 1;

			lock_page(page);

			/*
			 * Page truncated or invalidated. We can freely skip it
			 * then, even for data integrity operations: the page
			 * has disappeared concurrently, so there could be no
			 * real expectation of this data interity operation
			 * even if there is now a new, dirty page at the same
			 * pagecache address.
			 */
			if (unlikely(page->mapping != mapping)) {
continue_unlock:
				unlock_page(page);
				continue;
			}

			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

			if (PageWriteback(page)) {
				if (wbc->sync_mode != WB_SYNC_NONE)
					wait_on_page_writeback(page);
				else
					goto continue_unlock;
			}

			BUG_ON(PageWriteback(page));
			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

			ret = (*writepage)(page, wbc, data);

			if (unlikely(ret)) {
				if (ret == AOP_WRITEPAGE_ACTIVATE) {
					unlock_page(page);
					ret = 0;
				} else {
					/*
					 * done_index is set past this page,
					 * so media errors will not choke
					 * background writeout for the entire
					 * file. This has consequences for
					 * range_cyclic semantics (ie. it may
					 * not be suitable for data integrity
					 * writeout).
					 */
					done = 1;
					break;
				}
 			}

			if (wbc->nr_to_write > 0) {
				wbc->nr_to_write--;
				if (wbc->nr_to_write == 0 &&
				    wbc->sync_mode == WB_SYNC_NONE) {
					/*
					 * We stop writing back only if we are
					 * not doing integrity sync. In case of
					 * integrity sync we have to keep going
					 * because someone may be concurrently
					 * dirtying pages, and we might have
					 * synced a lot of newly appeared dirty
					 * pages, but have not synced all of the
					 * old dirty pages.
					 */
					done = 1;
					break;
				}
			}

			if (wbc->nonblocking && bdi_write_congested(bdi)) {
				wbc->encountered_congestion = 1;
				done = 1;
				break;
			}
		}
		pagevec_release(&pvec);
		cond_resched();
	}
	if (!cycled && !done) {
		/*
		 * range_cyclic:
		 * We hit the last page and there is more work to be done: wrap
		 * back to the start of the file
		 */
		cycled = 1;
		index = 0;
		end = writeback_index - 1;
		goto retry;
	}
	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
		mapping->writeback_index = done_index;

	return ret;
}
Example #18
0
/**
 * write_one_page - write out a single page and optionally wait on I/O
 * @page: the page to write
 * @wait: if true, wait on writeout
 *
 * The page must be locked by the caller and will be unlocked upon return.
 *
 * write_one_page() returns a negative error code if I/O failed.
 */
int write_one_page(struct page *page, int wait)
{
	struct address_space *mapping = page->mapping;
	int ret = 0;
	struct writeback_control wbc = {
		.sync_mode = WB_SYNC_ALL,
		.nr_to_write = 1,
	};

	BUG_ON(!PageLocked(page));

	if (wait)
		wait_on_page_writeback(page);

	if (clear_page_dirty_for_io(page)) {
		page_cache_get(page);
		ret = mapping->a_ops->writepage(page, &wbc);
		if (ret == 0 && wait) {
			wait_on_page_writeback(page);
			if (PageError(page))
				ret = -EIO;
		}
		page_cache_release(page);
	} else {
		unlock_page(page);
	}
	return ret;
}
EXPORT_SYMBOL(write_one_page);

/*
 * For address_spaces which do not use buffers nor write back.
 */
int __set_page_dirty_no_writeback(struct page *page)
{
	if (!PageDirty(page))
		SetPageDirty(page);
	return 0;
}

/*
 * For address_spaces which do not use buffers.  Just tag the page as dirty in
 * its radix tree.
 *
 * This is also used when a single buffer is being dirtied: we want to set the
 * page dirty in that case, but not all the buffers.  This is a "bottom-up"
 * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying.
 *
 * Most callers have locked the page, which pins the address_space in memory.
 * But zap_pte_range() does not lock the page, however in that case the
 * mapping is pinned by the vma's ->vm_file reference.
 *
 * We take care to handle the case where the page was truncated from the
 * mapping by re-checking page_mapping() inside tree_lock.
 */
int __set_page_dirty_nobuffers(struct page *page)
{
	if (!TestSetPageDirty(page)) {
		struct address_space *mapping = page_mapping(page);
		struct address_space *mapping2;

		if (!mapping)
			return 1;

		spin_lock_irq(&mapping->tree_lock);
		mapping2 = page_mapping(page);
		if (mapping2) { /* Race with truncate? */
			BUG_ON(mapping2 != mapping);
			WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
			if (mapping_cap_account_dirty(mapping)) {
				__inc_zone_page_state(page, NR_FILE_DIRTY);
				__inc_bdi_stat(mapping->backing_dev_info,
						BDI_RECLAIMABLE);
				task_io_account_write(PAGE_CACHE_SIZE);
			}
			radix_tree_tag_set(&mapping->page_tree,
				page_index(page), PAGECACHE_TAG_DIRTY);
		}
		spin_unlock_irq(&mapping->tree_lock);
		if (mapping->host) {
			/* !PageAnon && !swapper_space */
			__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
		}
		return 1;
	}
	return 0;
}
EXPORT_SYMBOL(__set_page_dirty_nobuffers);

/*
 * When a writepage implementation decides that it doesn't want to write this
 * page for some reason, it should redirty the locked page via
 * redirty_page_for_writepage() and it should then unlock the page and return 0
 */
int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
{
	wbc->pages_skipped++;
	return __set_page_dirty_nobuffers(page);
}
static int gfs2_write_jdata_pagevec(struct address_space *mapping,
				    struct writeback_control *wbc,
				    struct pagevec *pvec,
				    int nr_pages, pgoff_t end,
				    pgoff_t *done_index)
{
	struct inode *inode = mapping->host;
	struct gfs2_sbd *sdp = GFS2_SB(inode);
	unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
	int i;
	int ret;

	ret = gfs2_trans_begin(sdp, nrblocks, nrblocks);
	if (ret < 0)
		return ret;

	for(i = 0; i < nr_pages; i++) {
		struct page *page = pvec->pages[i];

		/*
		 * At this point, the page may be truncated or
		 * invalidated (changing page->mapping to NULL), or
		 * even swizzled back from swapper_space to tmpfs file
		 * mapping. However, page->index will not change
		 * because we have a reference on the page.
		 */
		if (page->index > end) {
			/*
			 * can't be range_cyclic (1st pass) because
			 * end == -1 in that case.
			 */
			ret = 1;
			break;
		}

		*done_index = page->index;

		lock_page(page);

		if (unlikely(page->mapping != mapping)) {
continue_unlock:
			unlock_page(page);
			continue;
		}

		if (!PageDirty(page)) {
			/* someone wrote it for us */
			goto continue_unlock;
		}

		if (PageWriteback(page)) {
			if (wbc->sync_mode != WB_SYNC_NONE)
				wait_on_page_writeback(page);
			else
				goto continue_unlock;
		}

		BUG_ON(PageWriteback(page));
		if (!clear_page_dirty_for_io(page))
			goto continue_unlock;

		trace_wbc_writepage(wbc, mapping->backing_dev_info);

		ret = __gfs2_jdata_writepage(page, wbc);
		if (unlikely(ret)) {
			if (ret == AOP_WRITEPAGE_ACTIVATE) {
				unlock_page(page);
				ret = 0;
			} else {

				/*
				 * done_index is set past this page,
				 * so media errors will not choke
				 * background writeout for the entire
				 * file. This has consequences for
				 * range_cyclic semantics (ie. it may
				 * not be suitable for data integrity
				 * writeout).
				 */
				*done_index = page->index + 1;
				ret = 1;
				break;
			}
		}

		/*
		 * We stop writing back only if we are not doing
		 * integrity sync. In case of integrity sync we have to
		 * keep going until we have written all the pages
		 * we tagged for writeback prior to entering this loop.
		 */
		if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) {
			ret = 1;
			break;
		}

	}
	gfs2_trans_end(sdp);
	return ret;
}
/**
 * write_one_page - write out a single page and optionally wait on I/O
 * @page: the page to write
 * @wait: if true, wait on writeout
 *
 * The page must be locked by the caller and will be unlocked upon return.
 *
 * write_one_page() returns a negative error code if I/O failed.
 */
int write_one_page(struct page *page, int wait)
{
	struct address_space *mapping = page->mapping;
	int ret = 0;
	struct writeback_control wbc = {
		.sync_mode = WB_SYNC_ALL,
		.nr_to_write = 1,
	};

	BUG_ON(!PageLocked(page));

	if (wait)
		wait_on_page_writeback(page);

	if (clear_page_dirty_for_io(page)) {
		page_cache_get(page);
		ret = mapping->a_ops->writepage(page, &wbc);
		if (ret == 0 && wait) {
			wait_on_page_writeback(page);
			if (PageError(page))
				ret = -EIO;
		}
		page_cache_release(page);
	} else {
		unlock_page(page);
	}
	return ret;
}
EXPORT_SYMBOL(write_one_page);

/*
 * For address_spaces which do not use buffers nor write back.
 */
int __set_page_dirty_no_writeback(struct page *page)
{
	if (!PageDirty(page))
		return !TestSetPageDirty(page);
	return 0;
}

/*
 * Helper function for set_page_dirty family.
 * NOTE: This relies on being atomic wrt interrupts.
 */
void account_page_dirtied(struct page *page, struct address_space *mapping)
{
	if (mapping_cap_account_dirty(mapping)) {
		__inc_zone_page_state(page, NR_FILE_DIRTY);
		__inc_zone_page_state(page, NR_DIRTIED);
		__inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
		task_dirty_inc(current);
		task_io_account_write(PAGE_CACHE_SIZE);
	}
}
EXPORT_SYMBOL(account_page_dirtied);

/*
 * Helper function for set_page_writeback family.
 * NOTE: Unlike account_page_dirtied this does not rely on being atomic
 * wrt interrupts.
 */
void account_page_writeback(struct page *page)
{
	inc_zone_page_state(page, NR_WRITEBACK);
	inc_zone_page_state(page, NR_WRITTEN);
}
/**ltl
功能:遍历给定地址空间的"脏"页面,写这些页面
参数:
返回值:
说明:
*/
int write_cache_pages(struct address_space *mapping,
		      struct writeback_control *wbc, writepage_t writepage,
		      void *data)
{
	int ret = 0;
	int done = 0;
	struct pagevec pvec;
	int nr_pages;
	pgoff_t uninitialized_var(writeback_index);
	pgoff_t index;/*要冲刷页面的索引*/
	pgoff_t end;		/* Inclusive *//*冲刷最后一个页面的索引,-1表示要循环冲刷*/
	pgoff_t done_index;
	int cycled;/*主要用在回绕需要分成两段进行冲刷的情况下,为1表示前一段冲刷已经完成。*/
	int range_whole = 0;
	int tag;

	pagevec_init(&pvec, 0);
	if (wbc->range_cyclic)
	{//是否要循环进行冲刷
		writeback_index = mapping->writeback_index; /* prev offset */
		index = writeback_index;
		if (index == 0)
			cycled = 1;
		else
			cycled = 0;
		end = -1;
	}
	else 
	{
		index = wbc->range_start >> PAGE_CACHE_SHIFT;
		end = wbc->range_end >> PAGE_CACHE_SHIFT;
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			range_whole = 1;
		cycled = 1; /* ignore range_cyclic tests */
	}
	if (wbc->sync_mode == WB_SYNC_ALL)
		tag = PAGECACHE_TAG_TOWRITE;
	else
		tag = PAGECACHE_TAG_DIRTY;
retry:
	if (wbc->sync_mode == WB_SYNC_ALL)
		tag_pages_for_writeback(mapping, index, end);
	done_index = index;
	
	while (!done && (index <= end)) 
	{
		int i;
		/*在地址空间中查找设备了PAGECACHE_TAG_DIRTY标志的页面,将结果保存在pagevec中*/
		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
			      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
		if (nr_pages == 0)
			break;
		//对找到的页面进行处理。
		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];

			/*
			 * At this point, the page may be truncated or
			 * invalidated (changing page->mapping to NULL), or
			 * even swizzled back from swapper_space to tmpfs file
			 * mapping. However, page->index will not change
			 * because we have a reference on the page.
			 */
			if (page->index > end) {
				/*
				 * can't be range_cyclic (1st pass) because
				 * end == -1 in that case.
				 */
				done = 1;
				break;
			}

			done_index = page->index + 1;
			//页面加锁
			lock_page(page);

			/*
			 * Page truncated or invalidated. We can freely skip it
			 * then, even for data integrity operations: the page
			 * has disappeared concurrently, so there could be no
			 * real expectation of this data interity operation
			 * even if there is now a new, dirty page at the same
			 * pagecache address.
			 */
			 /*由于在加锁过程中可能其它进程对页面做过改动,因此要做以下判断*/
			if (unlikely(page->mapping != mapping)) {//页面无效
continue_unlock:
				unlock_page(page);
				continue;
			}

			if (!PageDirty(page)) {//页面回写完成,I_DIRTY标志已经清除。
				/* someone wrote it for us */
				goto continue_unlock;
			}

			if (PageWriteback(page)) {//页面正在回写中,那要根据sync_mode采取策略
				if (wbc->sync_mode != WB_SYNC_NONE)
					wait_on_page_writeback(page);//要等待正在回写完成后才继续
				else
					goto continue_unlock;
			}

			BUG_ON(PageWriteback(page));
			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

			trace_wbc_writepage(wbc, mapping->backing_dev_info);
			//开始回写"脏"页面
			ret = (*writepage)(page, wbc, data);
			if (unlikely(ret)) {
				if (ret == AOP_WRITEPAGE_ACTIVATE) {
					unlock_page(page);
					ret = 0;
				} else {
					/*
					 * done_index is set past this page,
					 * so media errors will not choke
					 * background writeout for the entire
					 * file. This has consequences for
					 * range_cyclic semantics (ie. it may
					 * not be suitable for data integrity
					 * writeout).
					 */
					done = 1;
					break;
				}
			}

			/*
			 * We stop writing back only if we are not doing
			 * integrity sync. In case of integrity sync we have to
			 * keep going until we have written all the pages
			 * we tagged for writeback prior to entering this loop.
			 */
			/*页面写成功后,递减计数器*/
			if (--wbc->nr_to_write <= 0 &&
			    wbc->sync_mode == WB_SYNC_NONE) {
				done = 1;
				break;
			}
		}
		pagevec_release(&pvec);
		cond_resched();
	}
	if (!cycled && !done) {
		/*
		 * range_cyclic:
		 * We hit the last page and there is more work to be done: wrap
		 * back to the start of the file
		 */
		cycled = 1;
		index = 0;
		end = writeback_index - 1;
		goto retry;
	}
	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
		mapping->writeback_index = done_index;

	return ret;
}
Example #22
0
/*
 * Caller should grab and release a rwsem by calling f2fs_lock_op() and
 * f2fs_unlock_op().
 */
int __f2fs_add_link(struct inode *dir, const struct qstr *name,
						struct inode *inode)
{
	unsigned int bit_pos;
	unsigned int level;
	unsigned int current_depth;
	unsigned long bidx, block;
	f2fs_hash_t dentry_hash;
	struct f2fs_dir_entry *de;
	unsigned int nbucket, nblock;
	size_t namelen = name->len;
	struct page *dentry_page = NULL;
	struct f2fs_dentry_block *dentry_blk = NULL;
	int slots = GET_DENTRY_SLOTS(namelen);
	struct page *page;
	int err = 0;
	int i;

	dentry_hash = f2fs_dentry_hash(name->name, name->len);
	level = 0;
	current_depth = F2FS_I(dir)->i_current_depth;
	if (F2FS_I(dir)->chash == dentry_hash) {
		level = F2FS_I(dir)->clevel;
		F2FS_I(dir)->chash = 0;
	}

start:
	if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
		return -ENOSPC;

	/* Increase the depth, if required */
	if (level == current_depth)
		++current_depth;

	nbucket = dir_buckets(level);
	nblock = bucket_blocks(level);

	bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket));

	for (block = bidx; block <= (bidx + nblock - 1); block++) {
		dentry_page = get_new_data_page(dir, NULL, block, true);
		if (IS_ERR(dentry_page))
			return PTR_ERR(dentry_page);

		dentry_blk = kmap(dentry_page);
		bit_pos = room_for_filename(dentry_blk, slots);
		if (bit_pos < NR_DENTRY_IN_BLOCK)
			goto add_dentry;

		kunmap(dentry_page);
		f2fs_put_page(dentry_page, 1);
	}

	/* Move to next level to find the empty slot for new dentry */
	++level;
	goto start;
add_dentry:
	wait_on_page_writeback(dentry_page);

	page = init_inode_metadata(inode, dir, name);
	if (IS_ERR(page)) {
		err = PTR_ERR(page);
		goto fail;
	}
	de = &dentry_blk->dentry[bit_pos];
	de->hash_code = dentry_hash;
	de->name_len = cpu_to_le16(namelen);
	memcpy(dentry_blk->filename[bit_pos], name->name, name->len);
	de->ino = cpu_to_le32(inode->i_ino);
	set_de_type(de, inode);
	for (i = 0; i < slots; i++)
		test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
	set_page_dirty(dentry_page);

	/* we don't need to mark_inode_dirty now */
	F2FS_I(inode)->i_pino = dir->i_ino;
	update_inode(inode, page);
	f2fs_put_page(page, 1);

	update_parent_metadata(dir, inode, current_depth);
fail:
	clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
	kunmap(dentry_page);
	f2fs_put_page(dentry_page, 1);
	return err;
}
Example #23
0
static int wrapfs_writepage(struct page *page, struct writeback_control *wbc)
{
        int err = -EIO;
        struct inode *inode;
        struct inode *lower_inode;
        struct page *lower_page;
        struct address_space *lower_mapping; /* lower inode mapping */
        gfp_t mask;

        /*printk(KERN_ALERT "in writepage() \n");*/

        BUG_ON(!PageUptodate(page));
        inode = page->mapping->host;
        /* if no lower inode, nothing to do */
        if (!inode || !WRAPFS_I(inode) || WRAPFS_I(inode)->lower_inode) {
                err = 0;
                goto out;
        }
        lower_inode = wrapfs_lower_inode(inode);
        lower_mapping = lower_inode->i_mapping;

        /*
         * find lower page (returns a locked page)
         *
         * We turn off __GFP_FS while we look for or create a new lower
         * page.  This prevents a recursion into the file system code, which
         * under memory pressure conditions could lead to a deadlock.  This
         * is similar to how the loop driver behaves (see loop_set_fd in
         * drivers/block/loop.c).  If we can't find the lower page, we
         * redirty our page and return "success" so that the VM will call us
         * again in the (hopefully near) future.
         */
        mask = mapping_gfp_mask(lower_mapping) & ~(__GFP_FS);
        lower_page = find_or_create_page(lower_mapping, page->index, mask);
        if (!lower_page) {
                err = 0;
                set_page_dirty(page);
                goto out;
        }

        /* copy page data from our upper page to the lower page */
        copy_highpage(lower_page, page);
        flush_dcache_page(lower_page);
        SetPageUptodate(lower_page);
        set_page_dirty(lower_page);

        /*
         * Call lower writepage (expects locked page).  However, if we are
         * called with wbc->for_reclaim, then the VFS/VM just wants to
         * reclaim our page.  Therefore, we don't need to call the lower
         * ->writepage: just copy our data to the lower page (already done
         * above), then mark the lower page dirty and unlock it, and return
         * success.
         */
        if (wbc->for_reclaim) {
                unlock_page(lower_page);
                goto out_release;
        }

        BUG_ON(!lower_mapping->a_ops->writepage);
        wait_on_page_writeback(lower_page); /* prevent multiple writers */
        clear_page_dirty_for_io(lower_page); /* emulate VFS behavior */
        err = lower_mapping->a_ops->writepage(lower_page, wbc);
        if (err < 0)
                goto out_release;

        /*
         * Lower file systems such as ramfs and tmpfs, may return
         * AOP_WRITEPAGE_ACTIVATE so that the VM won't try to (pointlessly)
         * write the page again for a while.  But those lower file systems
         * also set the page dirty bit back again.  Since we successfully
         * copied our page data to the lower page, then the VM will come
         * back to the lower page (directly) and try to flush it.  So we can
         * save the VM the hassle of coming back to our page and trying to
         * flush too.  Therefore, we don't re-dirty our own page, and we
         * never return AOP_WRITEPAGE_ACTIVATE back to the VM (we consider
         * this a success).
         *
         * We also unlock the lower page if the lower ->writepage returned
         * AOP_WRITEPAGE_ACTIVATE.  (This "anomalous" behaviour may be
         * addressed in future shmem/VM code.)
         */
        if (err == AOP_WRITEPAGE_ACTIVATE) {
                err = 0;
                unlock_page(lower_page);
        }

        /* all is well */

        /* lower mtimes have changed: update ours */
        /*	fsstack_copy_inode_size(dentry->d_inode,
				lower_file->f_path.dentry->d_inode);
        fsstack_copy_attr_times(dentry->d_inode,
				lower_file->f_path.dentry->d_inode);	
        */

out_release:
        /* b/c find_or_create_page increased refcnt */
        page_cache_release(lower_page);
out:
        /*
         * We unlock our page unconditionally, because we never return
         * AOP_WRITEPAGE_ACTIVATE.
         */
        unlock_page(page);
        return err;
}
Example #24
0
struct inode *f2fs_iget_nowait(struct super_block *sb, unsigned long ino)
{
	struct f2fs_iget_args args = {
		.ino = ino,
		.on_free = 0
	};
	struct inode *inode = ilookup5(sb, ino, f2fs_iget_test, &args);

	if (inode)
		return inode;
	if (!args.on_free)
		return f2fs_iget(sb, ino);
	return ERR_PTR(-ENOENT);
}

static int do_read_inode(struct inode *inode)
{
	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
	struct f2fs_inode_info *fi = F2FS_I(inode);
	struct page *node_page;
	struct f2fs_node *rn;
	struct f2fs_inode *ri;

	/* Check if ino is within scope */
	check_nid_range(sbi, inode->i_ino);

	node_page = get_node_page(sbi, inode->i_ino);
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);

	rn = page_address(node_page);
	ri = &(rn->i);

	inode->i_mode = le16_to_cpu(ri->i_mode);
	i_uid_write(inode, le32_to_cpu(ri->i_uid));
	i_gid_write(inode, le32_to_cpu(ri->i_gid));
	set_nlink(inode, le32_to_cpu(ri->i_links));
	inode->i_size = le64_to_cpu(ri->i_size);
	inode->i_blocks = le64_to_cpu(ri->i_blocks);

	inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime);
	inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime);
	inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
	inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
	inode->i_generation = le32_to_cpu(ri->i_generation);

	fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
	fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
	fi->i_flags = le32_to_cpu(ri->i_flags);
	fi->flags = 0;
	fi->data_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver) - 1;
	fi->i_advise = ri->i_advise;
	fi->i_pino = le32_to_cpu(ri->i_pino);
	get_extent_info(&fi->ext, ri->i_ext);
	f2fs_put_page(node_page, 1);
	return 0;
}

struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
{
	struct f2fs_sb_info *sbi = F2FS_SB(sb);
	struct inode *inode;
	int ret;

	inode = iget_locked(sb, ino);
	if (!inode)
		return ERR_PTR(-ENOMEM);
	if (!(inode->i_state & I_NEW))
		return inode;
	if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi))
		goto make_now;

	ret = do_read_inode(inode);
	if (ret)
		goto bad_inode;

	if (!sbi->por_doing && inode->i_nlink == 0) {
		ret = -ENOENT;
		goto bad_inode;
	}

make_now:
	if (ino == F2FS_NODE_INO(sbi)) {
		inode->i_mapping->a_ops = &f2fs_node_aops;
		mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
	} else if (ino == F2FS_META_INO(sbi)) {
		inode->i_mapping->a_ops = &f2fs_meta_aops;
		mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
	} else if (S_ISREG(inode->i_mode)) {
		inode->i_op = &f2fs_file_inode_operations;
		inode->i_fop = &f2fs_file_operations;
		inode->i_mapping->a_ops = &f2fs_dblock_aops;
	} else if (S_ISDIR(inode->i_mode)) {
		inode->i_op = &f2fs_dir_inode_operations;
		inode->i_fop = &f2fs_dir_operations;
		inode->i_mapping->a_ops = &f2fs_dblock_aops;
		mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER_MOVABLE |
				__GFP_ZERO);
	} else if (S_ISLNK(inode->i_mode)) {
		inode->i_op = &f2fs_symlink_inode_operations;
		inode->i_mapping->a_ops = &f2fs_dblock_aops;
	} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
			S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
		inode->i_op = &f2fs_special_inode_operations;
		init_special_inode(inode, inode->i_mode, inode->i_rdev);
	} else {
		ret = -EIO;
		goto bad_inode;
	}
	unlock_new_inode(inode);

	return inode;

bad_inode:
	iget_failed(inode);
	return ERR_PTR(ret);
}

void update_inode(struct inode *inode, struct page *node_page)
{
	struct f2fs_node *rn;
	struct f2fs_inode *ri;

	wait_on_page_writeback(node_page);

	rn = page_address(node_page);
	ri = &(rn->i);

	ri->i_mode = cpu_to_le16(inode->i_mode);
	ri->i_advise = F2FS_I(inode)->i_advise;
	ri->i_uid = cpu_to_le32(i_uid_read(inode));
	ri->i_gid = cpu_to_le32(i_gid_read(inode));
	ri->i_links = cpu_to_le32(inode->i_nlink);
	ri->i_size = cpu_to_le64(i_size_read(inode));
	ri->i_blocks = cpu_to_le64(inode->i_blocks);
	set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext);

	ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
	ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
	ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
	ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
	ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
	ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
	ri->i_current_depth = cpu_to_le32(F2FS_I(inode)->i_current_depth);
	ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid);
	ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
	ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
	ri->i_generation = cpu_to_le32(inode->i_generation);
	set_cold_node(inode, node_page);
	set_page_dirty(node_page);
}

int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
	struct page *node_page;
	bool need_lock = false;

	if (inode->i_ino == F2FS_NODE_INO(sbi) ||
			inode->i_ino == F2FS_META_INO(sbi))
		return 0;

	if (wbc)
		f2fs_balance_fs(sbi);

	node_page = get_node_page(sbi, inode->i_ino);
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);

	if (!PageDirty(node_page)) {
		need_lock = true;
		f2fs_put_page(node_page, 1);
		mutex_lock(&sbi->write_inode);
		node_page = get_node_page(sbi, inode->i_ino);
		if (IS_ERR(node_page)) {
			mutex_unlock(&sbi->write_inode);
			return PTR_ERR(node_page);
		}
	}
	update_inode(inode, node_page);
	f2fs_put_page(node_page, 1);
	if (need_lock)
		mutex_unlock(&sbi->write_inode);
	return 0;
}

/*
 * Called at the last iput() if i_nlink is zero
 */
void f2fs_evict_inode(struct inode *inode)
{
	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);

	truncate_inode_pages(&inode->i_data, 0);

	if (inode->i_ino == F2FS_NODE_INO(sbi) ||
			inode->i_ino == F2FS_META_INO(sbi))
		goto no_delete;

	BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents));
	remove_dirty_dir_inode(inode);

	if (inode->i_nlink || is_bad_inode(inode))
		goto no_delete;

	set_inode_flag(F2FS_I(inode), FI_NO_ALLOC);
	i_size_write(inode, 0);

	if (F2FS_HAS_BLOCKS(inode))
		f2fs_truncate(inode);

	remove_inode_page(inode);
no_delete:
	clear_inode(inode);
}
Example #25
0
/**
 * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets
 * @mapping: mapping to truncate
 * @lstart: offset from which to truncate
 * @lend: offset to which to truncate (inclusive)
 *
 * Truncate the page cache, removing the pages that are between
 * specified offsets (and zeroing out partial pages
 * if lstart or lend + 1 is not page aligned).
 *
 * Truncate takes two passes - the first pass is nonblocking.  It will not
 * block on page locks and it will not block on writeback.  The second pass
 * will wait.  This is to prevent as much IO as possible in the affected region.
 * The first pass will remove most pages, so the search cost of the second pass
 * is low.
 *
 * We pass down the cache-hot hint to the page freeing code.  Even if the
 * mapping is large, it is probably the case that the final pages are the most
 * recently touched, and freeing happens in ascending file offset order.
 *
 * Note that since ->invalidatepage() accepts range to invalidate
 * truncate_inode_pages_range is able to handle cases where lend + 1 is not
 * page aligned properly.
 */
void truncate_inode_pages_range(struct address_space *mapping,
                                loff_t lstart, loff_t lend)
{
    pgoff_t		start;		/* inclusive */
    pgoff_t		end;		/* exclusive */
    unsigned int	partial_start;	/* inclusive */
    unsigned int	partial_end;	/* exclusive */
    struct pagevec	pvec;
    pgoff_t		indices[PAGEVEC_SIZE];
    pgoff_t		index;
    int		i;

    cleancache_invalidate_inode(mapping);
    if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
        return;

    /* Offsets within partial pages */
    partial_start = lstart & (PAGE_CACHE_SIZE - 1);
    partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);

    /*
     * 'start' and 'end' always covers the range of pages to be fully
     * truncated. Partial pages are covered with 'partial_start' at the
     * start of the range and 'partial_end' at the end of the range.
     * Note that 'end' is exclusive while 'lend' is inclusive.
     */
    start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
    if (lend == -1)
        /*
         * lend == -1 indicates end-of-file so we have to set 'end'
         * to the highest possible pgoff_t and since the type is
         * unsigned we're using -1.
         */
        end = -1;
    else
        end = (lend + 1) >> PAGE_CACHE_SHIFT;

    pagevec_init(&pvec, 0);
    index = start;
    while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
            min(end - index, (pgoff_t)PAGEVEC_SIZE),
            indices)) {
        for (i = 0; i < pagevec_count(&pvec); i++) {
            struct page *page = pvec.pages[i];

            /* We rely upon deletion not changing page->index */
            index = indices[i];
            if (index >= end)
                break;

            if (radix_tree_exceptional_entry(page)) {
                clear_exceptional_entry(mapping, index, page);
                continue;
            }

            if (!trylock_page(page))
                continue;
            WARN_ON(page->index != index);
            if (PageWriteback(page)) {
                unlock_page(page);
                continue;
            }
            truncate_inode_page(mapping, page);
            unlock_page(page);
        }
        pagevec_remove_exceptionals(&pvec);
        pagevec_release(&pvec);
        cond_resched();
        index++;
    }

    if (partial_start) {
        struct page *page = find_lock_page(mapping, start - 1);
        if (page) {
            unsigned int top = PAGE_CACHE_SIZE;
            if (start > end) {
                /* Truncation within a single page */
                top = partial_end;
                partial_end = 0;
            }
            wait_on_page_writeback(page);
            zero_user_segment(page, partial_start, top);
            cleancache_invalidate_page(mapping, page);
            if (page_has_private(page))
                do_invalidatepage(page, partial_start,
                                  top - partial_start);
            unlock_page(page);
            page_cache_release(page);
        }
    }
    if (partial_end) {
        struct page *page = find_lock_page(mapping, end);
        if (page) {
            wait_on_page_writeback(page);
            zero_user_segment(page, 0, partial_end);
            cleancache_invalidate_page(mapping, page);
            if (page_has_private(page))
                do_invalidatepage(page, 0,
                                  partial_end);
            unlock_page(page);
            page_cache_release(page);
        }
    }
    /*
     * If the truncation happened within a single page no pages
     * will be released, just zeroed, so we can bail out now.
     */
    if (start >= end)
        return;

    index = start;
    for ( ; ; ) {
        cond_resched();
        if (!pagevec_lookup_entries(&pvec, mapping, index,
                                    min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) {
            /* If all gone from start onwards, we're done */
            if (index == start)
                break;
            /* Otherwise restart to make sure all gone */
            index = start;
            continue;
        }
        if (index == start && indices[0] >= end) {
            /* All gone out of hole to be punched, we're done */
            pagevec_remove_exceptionals(&pvec);
            pagevec_release(&pvec);
            break;
        }
        for (i = 0; i < pagevec_count(&pvec); i++) {
            struct page *page = pvec.pages[i];

            /* We rely upon deletion not changing page->index */
            index = indices[i];
            if (index >= end) {
                /* Restart punch to make sure all gone */
                index = start - 1;
                break;
            }

            if (radix_tree_exceptional_entry(page)) {
                clear_exceptional_entry(mapping, index, page);
                continue;
            }

            lock_page(page);
            WARN_ON(page->index != index);
            wait_on_page_writeback(page);
            truncate_inode_page(mapping, page);
            unlock_page(page);
        }
        pagevec_remove_exceptionals(&pvec);
        pagevec_release(&pvec);
        index++;
    }
    cleancache_invalidate_inode(mapping);
}
Example #26
0
/**
 * invalidate_inode_pages2_range - remove range of pages from an address_space
 * @mapping: the address_space
 * @start: the page offset 'from' which to invalidate
 * @end: the page offset 'to' which to invalidate (inclusive)
 *
 * Any pages which are found to be mapped into pagetables are unmapped prior to
 * invalidation.
 *
 * Returns -EBUSY if any pages could not be invalidated.
 */
int invalidate_inode_pages2_range(struct address_space *mapping,
                                  pgoff_t start, pgoff_t end)
{
    pgoff_t indices[PAGEVEC_SIZE];
    struct pagevec pvec;
    pgoff_t index;
    int i;
    int ret = 0;
    int ret2 = 0;
    int did_range_unmap = 0;

    cleancache_invalidate_inode(mapping);
    pagevec_init(&pvec, 0);
    index = start;
    while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
            min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
            indices)) {
        for (i = 0; i < pagevec_count(&pvec); i++) {
            struct page *page = pvec.pages[i];

            /* We rely upon deletion not changing page->index */
            index = indices[i];
            if (index > end)
                break;

            if (radix_tree_exceptional_entry(page)) {
                clear_exceptional_entry(mapping, index, page);
                continue;
            }

            lock_page(page);
            WARN_ON(page->index != index);
            if (page->mapping != mapping) {
                unlock_page(page);
                continue;
            }
            wait_on_page_writeback(page);
            if (page_mapped(page)) {
                if (!did_range_unmap) {
                    /*
                     * Zap the rest of the file in one hit.
                     */
                    unmap_mapping_range(mapping,
                                        (loff_t)index << PAGE_CACHE_SHIFT,
                                        (loff_t)(1 + end - index)
                                        << PAGE_CACHE_SHIFT,
                                        0);
                    did_range_unmap = 1;
                } else {
                    /*
                     * Just zap this page
                     */
                    unmap_mapping_range(mapping,
                                        (loff_t)index << PAGE_CACHE_SHIFT,
                                        PAGE_CACHE_SIZE, 0);
                }
            }
            BUG_ON(page_mapped(page));
            ret2 = do_launder_page(mapping, page);
            if (ret2 == 0) {
                if (!invalidate_complete_page2(mapping, page))
                    ret2 = -EBUSY;
            }
            if (ret2 < 0)
                ret = ret2;
            unlock_page(page);
        }
        pagevec_remove_exceptionals(&pvec);
        pagevec_release(&pvec);
        cond_resched();
        index++;
    }
    cleancache_invalidate_inode(mapping);
    return ret;
}
static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
    struct page *page = vmf->page;
    struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
    struct gfs2_inode *ip = GFS2_I(inode);
    struct gfs2_sbd *sdp = GFS2_SB(inode);
    unsigned long last_index;
    u64 pos = page->index << PAGE_CACHE_SHIFT;
    unsigned int data_blocks, ind_blocks, rblocks;
    struct gfs2_holder gh;
    struct gfs2_qadata *qa;
    loff_t size;
    int ret;

    vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);

    gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
    ret = gfs2_glock_nq(&gh);
    if (ret)
        goto out;

    set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
    set_bit(GIF_SW_PAGED, &ip->i_flags);

    if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) {
        lock_page(page);
        if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
            ret = -EAGAIN;
            unlock_page(page);
        }
        goto out_unlock;
    }

    ret = -ENOMEM;
    qa = gfs2_qadata_get(ip);
    if (qa == NULL)
        goto out_unlock;

    ret = gfs2_quota_lock_check(ip);
    if (ret)
        goto out_alloc_put;
    gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
    ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
    if (ret)
        goto out_quota_unlock;

    rblocks = RES_DINODE + ind_blocks;
    if (gfs2_is_jdata(ip))
        rblocks += data_blocks ? data_blocks : 1;
    if (ind_blocks || data_blocks) {
        rblocks += RES_STATFS + RES_QUOTA;
        rblocks += gfs2_rg_blocks(ip);
    }
    ret = gfs2_trans_begin(sdp, rblocks, 0);
    if (ret)
        goto out_trans_fail;

    lock_page(page);
    ret = -EINVAL;
    size = i_size_read(inode);
    last_index = (size - 1) >> PAGE_CACHE_SHIFT;

    if (size == 0 || (page->index > last_index))
        goto out_trans_end;

    ret = -EAGAIN;
    if (!PageUptodate(page) || page->mapping != inode->i_mapping)
        goto out_trans_end;


    ret = 0;
    if (gfs2_is_stuffed(ip))
        ret = gfs2_unstuff_dinode(ip, page);
    if (ret == 0)
        ret = gfs2_allocate_page_backing(page);

out_trans_end:
    if (ret)
        unlock_page(page);
    gfs2_trans_end(sdp);
out_trans_fail:
    gfs2_inplace_release(ip);
out_quota_unlock:
    gfs2_quota_unlock(ip);
out_alloc_put:
    gfs2_qadata_put(ip);
out_unlock:
    gfs2_glock_dq(&gh);
out:
    gfs2_holder_uninit(&gh);
    if (ret == 0) {
        set_page_dirty(page);

        if (inode->i_sb->s_frozen == SB_UNFROZEN) {
            wait_on_page_writeback(page);
        } else {
            ret = -EAGAIN;
            unlock_page(page);
        }
    }
    return block_page_mkwrite_return(ret);
}
Example #28
0
/*
 * We completely avoid races by reading each swap page in advance,
 * and then search for the process using it.  All the necessary
 * page table adjustments can then be made atomically.
 */
static int try_to_unuse(unsigned int type)
{
	struct swap_info_struct * si = &swap_info[type];
	struct mm_struct *start_mm;
	unsigned short *swap_map;
	unsigned short swcount;
	struct page *page;
	swp_entry_t entry;
	unsigned int i = 0;
	int retval = 0;
	int reset_overflow = 0;
	int shmem;

	/*
	 * When searching mms for an entry, a good strategy is to
	 * start at the first mm we freed the previous entry from
	 * (though actually we don't notice whether we or coincidence
	 * freed the entry).  Initialize this start_mm with a hold.
	 *
	 * A simpler strategy would be to start at the last mm we
	 * freed the previous entry from; but that would take less
	 * advantage of mmlist ordering, which clusters forked mms
	 * together, child after parent.  If we race with dup_mmap(), we
	 * prefer to resolve parent before child, lest we miss entries
	 * duplicated after we scanned child: using last mm would invert
	 * that.  Though it's only a serious concern when an overflowed
	 * swap count is reset from SWAP_MAP_MAX, preventing a rescan.
	 */
	start_mm = &init_mm;
	atomic_inc(&init_mm.mm_users);

	/*
	 * Keep on scanning until all entries have gone.  Usually,
	 * one pass through swap_map is enough, but not necessarily:
	 * there are races when an instance of an entry might be missed.
	 */
	while ((i = find_next_to_unuse(si, i)) != 0) {
		if (signal_pending(current)) {
			retval = -EINTR;
			break;
		}

		/* 
		 * Get a page for the entry, using the existing swap
		 * cache page if there is one.  Otherwise, get a clean
		 * page and read the swap into it. 
		 */
		swap_map = &si->swap_map[i];
		entry = swp_entry(type, i);
		page = read_swap_cache_async(entry, NULL, 0);
		if (!page) {
			/*
			 * Either swap_duplicate() failed because entry
			 * has been freed independently, and will not be
			 * reused since sys_swapoff() already disabled
			 * allocation from here, or alloc_page() failed.
			 */
			if (!*swap_map)
				continue;
			retval = -ENOMEM;
			break;
		}

		/*
		 * Don't hold on to start_mm if it looks like exiting.
		 */
		if (atomic_read(&start_mm->mm_users) == 1) {
			mmput(start_mm);
			start_mm = &init_mm;
			atomic_inc(&init_mm.mm_users);
		}

		/*
		 * Wait for and lock page.  When do_swap_page races with
		 * try_to_unuse, do_swap_page can handle the fault much
		 * faster than try_to_unuse can locate the entry.  This
		 * apparently redundant "wait_on_page_locked" lets try_to_unuse
		 * defer to do_swap_page in such a case - in some tests,
		 * do_swap_page and try_to_unuse repeatedly compete.
		 */
		wait_on_page_locked(page);
		wait_on_page_writeback(page);
		lock_page(page);
		wait_on_page_writeback(page);

		/*
		 * Remove all references to entry.
		 * Whenever we reach init_mm, there's no address space
		 * to search, but use it as a reminder to search shmem.
		 */
		shmem = 0;
		swcount = *swap_map;
		if (swcount > 1) {
			if (start_mm == &init_mm)
				shmem = shmem_unuse(entry, page);
			else
				retval = unuse_mm(start_mm, entry, page);
		}
		if (*swap_map > 1) {
			int set_start_mm = (*swap_map >= swcount);
			struct list_head *p = &start_mm->mmlist;
			struct mm_struct *new_start_mm = start_mm;
			struct mm_struct *prev_mm = start_mm;
			struct mm_struct *mm;

			atomic_inc(&new_start_mm->mm_users);
			atomic_inc(&prev_mm->mm_users);
			spin_lock(&mmlist_lock);
			while (*swap_map > 1 && !retval &&
					(p = p->next) != &start_mm->mmlist) {
				mm = list_entry(p, struct mm_struct, mmlist);
				if (!atomic_inc_not_zero(&mm->mm_users))
					continue;
				spin_unlock(&mmlist_lock);
				mmput(prev_mm);
				prev_mm = mm;

				cond_resched();

				swcount = *swap_map;
				if (swcount <= 1)
					;
				else if (mm == &init_mm) {
					set_start_mm = 1;
					shmem = shmem_unuse(entry, page);
				} else
					retval = unuse_mm(mm, entry, page);
				if (set_start_mm && *swap_map < swcount) {
					mmput(new_start_mm);
					atomic_inc(&mm->mm_users);
					new_start_mm = mm;
					set_start_mm = 0;
				}
				spin_lock(&mmlist_lock);
			}
			spin_unlock(&mmlist_lock);
			mmput(prev_mm);
			mmput(start_mm);
			start_mm = new_start_mm;
		}
		if (retval) {
			unlock_page(page);
			page_cache_release(page);
			break;
		}

		/*
		 * How could swap count reach 0x7fff when the maximum
		 * pid is 0x7fff, and there's no way to repeat a swap
		 * page within an mm (except in shmem, where it's the
		 * shared object which takes the reference count)?
		 * We believe SWAP_MAP_MAX cannot occur in Linux 2.4.
		 *
		 * If that's wrong, then we should worry more about
		 * exit_mmap() and do_munmap() cases described above:
		 * we might be resetting SWAP_MAP_MAX too early here.
		 * We know "Undead"s can happen, they're okay, so don't
		 * report them; but do report if we reset SWAP_MAP_MAX.
		 */
		if (*swap_map == SWAP_MAP_MAX) {
			spin_lock(&swap_lock);
			*swap_map = 1;
			spin_unlock(&swap_lock);
			reset_overflow = 1;
		}

		/*
		 * If a reference remains (rare), we would like to leave
		 * the page in the swap cache; but try_to_unmap could
		 * then re-duplicate the entry once we drop page lock,
		 * so we might loop indefinitely; also, that page could
		 * not be swapped out to other storage meanwhile.  So:
		 * delete from cache even if there's another reference,
		 * after ensuring that the data has been saved to disk -
		 * since if the reference remains (rarer), it will be
		 * read from disk into another page.  Splitting into two
		 * pages would be incorrect if swap supported "shared
		 * private" pages, but they are handled by tmpfs files.
		 *
		 * Note shmem_unuse already deleted a swappage from
		 * the swap cache, unless the move to filepage failed:
		 * in which case it left swappage in cache, lowered its
		 * swap count to pass quickly through the loops above,
		 * and now we must reincrement count to try again later.
		 */
		if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {
			struct writeback_control wbc = {
				.sync_mode = WB_SYNC_NONE,
			};

			swap_writepage(page, &wbc);
			lock_page(page);
			wait_on_page_writeback(page);
		}
		if (PageSwapCache(page)) {
			if (shmem)
				swap_duplicate(entry);
			else
				delete_from_swap_cache(page);
		}

		/*
		 * So we could skip searching mms once swap count went
		 * to 1, we did not mark any present ptes as dirty: must
		 * mark page dirty so shrink_page_list will preserve it.
		 */
		SetPageDirty(page);
		unlock_page(page);
		page_cache_release(page);

		/*
		 * Make sure that we aren't completely killing
		 * interactive performance.
		 */
		cond_resched();
	}
Example #29
0
static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct page *page = vmf->page;
	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_sbd *sdp = GFS2_SB(inode);
	unsigned long last_index;
	u64 pos = page->index << PAGE_CACHE_SHIFT;
	unsigned int data_blocks, ind_blocks, rblocks;
	struct gfs2_holder gh;
	loff_t size;
	int ret;

	sb_start_pagefault(inode->i_sb);

	/* Update file times before taking page lock */
	file_update_time(vma->vm_file);

	ret = gfs2_rs_alloc(ip);
	if (ret)
		return ret;

	gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE);

	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
	ret = gfs2_glock_nq(&gh);
	if (ret)
		goto out;

	set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
	set_bit(GIF_SW_PAGED, &ip->i_flags);

	if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) {
		lock_page(page);
		if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
			ret = -EAGAIN;
			unlock_page(page);
		}
		goto out_unlock;
	}

	ret = gfs2_rindex_update(sdp);
	if (ret)
		goto out_unlock;

	ret = gfs2_quota_lock_check(ip);
	if (ret)
		goto out_unlock;
	gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
	ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0);
	if (ret)
		goto out_quota_unlock;

	rblocks = RES_DINODE + ind_blocks;
	if (gfs2_is_jdata(ip))
		rblocks += data_blocks ? data_blocks : 1;
	if (ind_blocks || data_blocks) {
		rblocks += RES_STATFS + RES_QUOTA;
		rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
	}
	ret = gfs2_trans_begin(sdp, rblocks, 0);
	if (ret)
		goto out_trans_fail;

	lock_page(page);
	ret = -EINVAL;
	size = i_size_read(inode);
	last_index = (size - 1) >> PAGE_CACHE_SHIFT;
	/* Check page index against inode size */
	if (size == 0 || (page->index > last_index))
		goto out_trans_end;

	ret = -EAGAIN;
	/* If truncated, we must retry the operation, we may have raced
	 * with the glock demotion code.
	 */
	if (!PageUptodate(page) || page->mapping != inode->i_mapping)
		goto out_trans_end;

	/* Unstuff, if required, and allocate backing blocks for page */
	ret = 0;
	if (gfs2_is_stuffed(ip))
		ret = gfs2_unstuff_dinode(ip, page);
	if (ret == 0)
		ret = gfs2_allocate_page_backing(page);

out_trans_end:
	if (ret)
		unlock_page(page);
	gfs2_trans_end(sdp);
out_trans_fail:
	gfs2_inplace_release(ip);
out_quota_unlock:
	gfs2_quota_unlock(ip);
out_unlock:
	gfs2_glock_dq(&gh);
out:
	gfs2_holder_uninit(&gh);
	if (ret == 0) {
		set_page_dirty(page);
		wait_on_page_writeback(page);
	}
	sb_end_pagefault(inode->i_sb);
	return block_page_mkwrite_return(ret);
}
Example #30
0
/**
 * truncate_inode_pages - truncate *all* the pages from an offset
 * @mapping: mapping to truncate
 * @lstart: offset from which to truncate
 *
 * Truncate the page cache at a set offset, removing the pages that are beyond
 * that offset (and zeroing out partial pages).
 *
 * Truncate takes two passes - the first pass is nonblocking.  It will not
 * block on page locks and it will not block on writeback.  The second pass
 * will wait.  This is to prevent as much IO as possible in the affected region.
 * The first pass will remove most pages, so the search cost of the second pass
 * is low.
 *
 * When looking at page->index outside the page lock we need to be careful to
 * copy it into a local to avoid races (it could change at any time).
 *
 * We pass down the cache-hot hint to the page freeing code.  Even if the
 * mapping is large, it is probably the case that the final pages are the most
 * recently touched, and freeing happens in ascending file offset order.
 *
 * Called under (and serialised by) inode->i_sem.
 */
void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
{
	const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
	const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
	struct pagevec pvec;
	pgoff_t next;
	int i;

	if (mapping->nrpages == 0)
		return;

	pagevec_init(&pvec, 0);
	next = start;
	while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
		for (i = 0; i < pagevec_count(&pvec); i++) {
			struct page *page = pvec.pages[i];
			pgoff_t page_index = page->index;

			if (page_index > next)
				next = page_index;
			next++;
			if (TestSetPageLocked(page))
				continue;
			if (PageWriteback(page)) {
				unlock_page(page);
				continue;
			}
			truncate_complete_page(mapping, page);
			unlock_page(page);
		}
		pagevec_release(&pvec);
		cond_resched();
	}

	if (partial) {
		struct page *page = find_lock_page(mapping, start - 1);
		if (page) {
			wait_on_page_writeback(page);
			truncate_partial_page(page, partial);
			unlock_page(page);
			page_cache_release(page);
		}
	}

	next = start;
	for ( ; ; ) {
		cond_resched();
		if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
			if (next == start)
				break;
			next = start;
			continue;
		}
		for (i = 0; i < pagevec_count(&pvec); i++) {
			struct page *page = pvec.pages[i];

			lock_page(page);
			wait_on_page_writeback(page);
			if (page->index > next)
				next = page->index;
			next++;
			truncate_complete_page(mapping, page);
			unlock_page(page);
		}
		pagevec_release(&pvec);
	}
}