int ext4_bio_write_page(struct ext4_io_submit *io,
			struct page *page,
			int len,
			struct writeback_control *wbc)
{
	struct inode *inode = page->mapping->host;
	unsigned block_start, block_end, blocksize;
	struct ext4_io_page *io_page;
	struct buffer_head *bh, *head;
	int ret = 0;

	blocksize = 1 << inode->i_blkbits;

	BUG_ON(!PageLocked(page));
	BUG_ON(PageWriteback(page));

	io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
	if (!io_page) {
		set_page_dirty(page);
		unlock_page(page);
		return -ENOMEM;
	}
	io_page->p_page = page;
	atomic_set(&io_page->p_count, 1);
	get_page(page);
	set_page_writeback(page);
	ClearPageError(page);

	for (bh = head = page_buffers(page), block_start = 0;
	     bh != head || !block_start;
	     block_start = block_end, bh = bh->b_this_page) {

		block_end = block_start + blocksize;
		if (block_start >= len) {
			clear_buffer_dirty(bh);
			set_buffer_uptodate(bh);
			continue;
		}
		clear_buffer_dirty(bh);
		ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
		if (ret) {
			/*
			 * We only get here on ENOMEM.  Not much else
			 * we can do but mark the page as dirty, and
			 * better luck next time.
			 */
			set_page_dirty(page);
			break;
		}
	}
	unlock_page(page);
	/*
	 * If the page was truncated before we could do the writeback,
	 * or we had a memory allocation error while trying to write
	 * the first buffer head, we won't have submitted any pages for
	 * I/O.  In that case we need to make sure we've cleared the
	 * PageWriteback bit from the page to prevent the system from
	 * wedging later on.
	 */
	put_io_page(io_page);
	return ret;
}
/*
 * balance_dirty_pages() must be called by processes which are generating dirty
 * data.  It looks at the number of dirty pages in the machine and will force
 * the caller to perform writeback if the system is over `vm_dirty_ratio'.
 * If we're over `background_thresh' then the writeback threads are woken to
 * perform some writeout.
 */
static void balance_dirty_pages(struct address_space *mapping,
				unsigned long write_chunk)
{
	long nr_reclaimable, bdi_nr_reclaimable;
	long nr_writeback, bdi_nr_writeback;
	unsigned long background_thresh;
	unsigned long dirty_thresh;
	unsigned long bdi_thresh;
	unsigned long pages_written = 0;
	unsigned long pause = 1;

	struct backing_dev_info *bdi = mapping->backing_dev_info;

	for (;;) {
		struct writeback_control wbc = {
			.sync_mode	= WB_SYNC_NONE,
			.older_than_this = NULL,
			.nr_to_write	= write_chunk,
			.range_cyclic	= 1,
		};

		get_dirty_limits(&background_thresh, &dirty_thresh,
				&bdi_thresh, bdi);

		nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
					global_page_state(NR_UNSTABLE_NFS);
		nr_writeback = global_page_state(NR_WRITEBACK);

		bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
		bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);

		if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
			break;

		/*
		 * Throttle it only when the background writeback cannot
		 * catch-up. This avoids (excessively) small writeouts
		 * when the bdi limits are ramping up.
		 */
		if (nr_reclaimable + nr_writeback <
				(background_thresh + dirty_thresh) / 2)
			break;

		if (!bdi->dirty_exceeded)
			bdi->dirty_exceeded = 1;

		/* Note: nr_reclaimable denotes nr_dirty + nr_unstable.
		 * Unstable writes are a feature of certain networked
		 * filesystems (i.e. NFS) in which data may have been
		 * written to the server's write cache, but has not yet
		 * been flushed to permanent storage.
		 * Only move pages to writeback if this bdi is over its
		 * threshold otherwise wait until the disk writes catch
		 * up.
		 */
		if (bdi_nr_reclaimable > bdi_thresh) {
			writeback_inodes_wb(&bdi->wb, &wbc);
			pages_written += write_chunk - wbc.nr_to_write;
			get_dirty_limits(&background_thresh, &dirty_thresh,
				       &bdi_thresh, bdi);
		}

		/*
		 * In order to avoid the stacked BDI deadlock we need
		 * to ensure we accurately count the 'dirty' pages when
		 * the threshold is low.
		 *
		 * Otherwise it would be possible to get thresh+n pages
		 * reported dirty, even though there are thresh-m pages
		 * actually dirty; with m+n sitting in the percpu
		 * deltas.
		 */
		if (bdi_thresh < 2*bdi_stat_error(bdi)) {
			bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE);
			bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK);
		} else if (bdi_nr_reclaimable) {
			bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
			bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
		}

		if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
			break;
		if (pages_written >= write_chunk)
			break;		/* We've done our duty */

		__set_current_state(TASK_UNINTERRUPTIBLE);
		io_schedule_timeout(pause);

		/*
		 * Increase the delay for each loop, up to our previous
		 * default of taking a 100ms nap.
		 */
		pause <<= 1;
		if (pause > HZ / 10)
			pause = HZ / 10;
	}

	if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh &&
			bdi->dirty_exceeded)
		bdi->dirty_exceeded = 0;

	if (writeback_in_progress(bdi))
		return;

	/*
	 * In laptop mode, we wait until hitting the higher threshold before
	 * starting background writeout, and then write out all the way down
	 * to the lower threshold.  So slow writers cause minimal disk activity.
	 *
	 * In normal mode, we start background writeout at the lower
	 * background_thresh, to keep the amount of dirty memory low.
	 */
	if ((laptop_mode && pages_written) ||
	    (!laptop_mode && ((global_page_state(NR_FILE_DIRTY)
			       + global_page_state(NR_UNSTABLE_NFS))
					  > background_thresh)))
		bdi_start_background_writeback(bdi);
}

void set_page_dirty_balance(struct page *page, int page_mkwrite)
{
	if (set_page_dirty(page) || page_mkwrite) {
		struct address_space *mapping = page_mapping(page);

		if (mapping)
			balance_dirty_pages_ratelimited(mapping);
	}
}

static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0;

/**
 * balance_dirty_pages_ratelimited_nr - balance dirty memory state
 * @mapping: address_space which was dirtied
 * @nr_pages_dirtied: number of pages which the caller has just dirtied
 *
 * Processes which are dirtying memory should call in here once for each page
 * which was newly dirtied.  The function will periodically check the system's
 * dirty state and will initiate writeback if needed.
 *
 * On really big machines, get_writeback_state is expensive, so try to avoid
 * calling it too often (ratelimiting).  But once we're over the dirty memory
 * limit we decrease the ratelimiting by a lot, to prevent individual processes
 * from overshooting the limit by (ratelimit_pages) each.
 */
void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
					unsigned long nr_pages_dirtied)
{
	unsigned long ratelimit;
	unsigned long *p;

	ratelimit = ratelimit_pages;
	if (mapping->backing_dev_info->dirty_exceeded)
		ratelimit = 8;

	/*
	 * Check the rate limiting. Also, we do not want to throttle real-time
	 * tasks in balance_dirty_pages(). Period.
	 */
	preempt_disable();
	p =  &__get_cpu_var(bdp_ratelimits);
	*p += nr_pages_dirtied;
	if (unlikely(*p >= ratelimit)) {
		ratelimit = sync_writeback_pages(*p);
		*p = 0;
		preempt_enable();
		balance_dirty_pages(mapping, ratelimit);
		return;
	}
	preempt_enable();
}
Beispiel #3
0
int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistant_swap_storage)
{
	struct address_space *swap_space;
	struct file *swap_storage;
	struct page *from_page;
	struct page *to_page;
	void *from_virtual;
	void *to_virtual;
	int i;
	int ret = -ENOMEM;

	BUG_ON(ttm->state != tt_unbound && ttm->state != tt_unpopulated);
	BUG_ON(ttm->caching_state != tt_cached);

	/*
	 * For user buffers, just unpin the pages, as there should be
	 * vma references.
	 */

	if (ttm->page_flags & TTM_PAGE_FLAG_USER) {
		ttm_tt_free_user_pages(ttm);
		ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED;
		ttm->swap_storage = NULL;
		return 0;
	}

	if (!persistant_swap_storage) {
		swap_storage = shmem_file_setup("ttm swap",
						ttm->num_pages << PAGE_SHIFT,
						0);
		if (unlikely(IS_ERR(swap_storage))) {
			printk(KERN_ERR "Failed allocating swap storage.\n");
			return PTR_ERR(swap_storage);
		}
	} else
		swap_storage = persistant_swap_storage;

	swap_space = swap_storage->f_path.dentry->d_inode->i_mapping;

	for (i = 0; i < ttm->num_pages; ++i) {
		from_page = ttm->pages[i];
		if (unlikely(from_page == NULL))
			continue;
		to_page = read_mapping_page(swap_space, i, NULL);
		if (unlikely(IS_ERR(to_page))) {
			ret = PTR_ERR(to_page);
			goto out_err;
		}
		preempt_disable();
		from_virtual = kmap_atomic(from_page, KM_USER0);
		to_virtual = kmap_atomic(to_page, KM_USER1);
		memcpy(to_virtual, from_virtual, PAGE_SIZE);
		kunmap_atomic(to_virtual, KM_USER1);
		kunmap_atomic(from_virtual, KM_USER0);
		preempt_enable();
		set_page_dirty(to_page);
		mark_page_accessed(to_page);
		page_cache_release(to_page);
	}

	ttm_tt_free_alloced_pages(ttm);
	ttm->swap_storage = swap_storage;
	ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED;
	if (persistant_swap_storage)
		ttm->page_flags |= TTM_PAGE_FLAG_PERSISTANT_SWAP;

	return 0;
out_err:
	if (!persistant_swap_storage)
		fput(swap_storage);

	return ret;
}
Beispiel #4
0
static int do_read_inode(struct inode *inode)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct f2fs_inode_info *fi = F2FS_I(inode);
	struct page *node_page;
	struct f2fs_inode *ri;

	/* Check if ino is within scope */
	if (check_nid_range(sbi, inode->i_ino)) {
		f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu",
			 (unsigned long) inode->i_ino);
		WARN_ON(1);
		return -EINVAL;
	}

	node_page = get_node_page(sbi, inode->i_ino);
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);

	ri = F2FS_INODE(node_page);

	inode->i_mode = le16_to_cpu(ri->i_mode);
	i_uid_write(inode, le32_to_cpu(ri->i_uid));
	i_gid_write(inode, le32_to_cpu(ri->i_gid));
	set_nlink(inode, le32_to_cpu(ri->i_links));
	inode->i_size = le64_to_cpu(ri->i_size);
	inode->i_blocks = le64_to_cpu(ri->i_blocks);

	inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime);
	inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime);
	inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
	inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
	inode->i_generation = le32_to_cpu(ri->i_generation);

	fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
	fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
	fi->i_flags = le32_to_cpu(ri->i_flags);
	fi->flags = 0;
	fi->i_advise = ri->i_advise;
	fi->i_pino = le32_to_cpu(ri->i_pino);
	fi->i_dir_level = ri->i_dir_level;

	if (f2fs_init_extent_tree(inode, &ri->i_ext))
		set_page_dirty(node_page);

	get_inline_info(fi, ri);

	/* check data exist */
	if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
		__recover_inline_status(inode, node_page);

	/* get rdev by using inline_info */
	__get_inode_rdev(inode, ri);

	if (__written_first_block(ri))
		set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);

	f2fs_put_page(node_page, 1);

	stat_inc_inline_xattr(inode);
	stat_inc_inline_inode(inode);
	stat_inc_inline_dir(inode);

	return 0;
}
Beispiel #5
0
/*
 * Caller should grab and release a rwsem by calling f2fs_lock_op() and
 * f2fs_unlock_op().
 */
int __f2fs_add_link(struct inode *dir, const struct qstr *name,
						struct inode *inode)
{
	unsigned int bit_pos;
	unsigned int level;
	unsigned int current_depth;
	unsigned long bidx, block;
	f2fs_hash_t dentry_hash;
	struct f2fs_dir_entry *de;
	unsigned int nbucket, nblock;
	size_t namelen = name->len;
	struct page *dentry_page = NULL;
	struct f2fs_dentry_block *dentry_blk = NULL;
	int slots = GET_DENTRY_SLOTS(namelen);
	struct page *page;
	int err = 0;
	int i;

	dentry_hash = f2fs_dentry_hash(name);
	level = 0;
	current_depth = F2FS_I(dir)->i_current_depth;
	if (F2FS_I(dir)->chash == dentry_hash) {
		level = F2FS_I(dir)->clevel;
		F2FS_I(dir)->chash = 0;
	}

start:
	if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
		return -ENOSPC;

	/* Increase the depth, if required */
	if (level == current_depth)
		++current_depth;

	nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
	nblock = bucket_blocks(level);

	bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
				(le32_to_cpu(dentry_hash) % nbucket));

	for (block = bidx; block <= (bidx + nblock - 1); block++) {
		dentry_page = get_new_data_page(dir, NULL, block, true);
		if (IS_ERR(dentry_page))
			return PTR_ERR(dentry_page);

		dentry_blk = kmap(dentry_page);
		bit_pos = room_for_filename(dentry_blk, slots);
		if (bit_pos < NR_DENTRY_IN_BLOCK)
			goto add_dentry;

		kunmap(dentry_page);
		f2fs_put_page(dentry_page, 1);
	}

	/* Move to next level to find the empty slot for new dentry */
	++level;
	goto start;
add_dentry:
	f2fs_wait_on_page_writeback(dentry_page, DATA);

	down_write(&F2FS_I(inode)->i_sem);
	page = init_inode_metadata(inode, dir, name);
	if (IS_ERR(page)) {
		err = PTR_ERR(page);
		goto fail;
	}
	de = &dentry_blk->dentry[bit_pos];
	de->hash_code = dentry_hash;
	de->name_len = cpu_to_le16(namelen);
	memcpy(dentry_blk->filename[bit_pos], name->name, name->len);
	de->ino = cpu_to_le32(inode->i_ino);
	set_de_type(de, inode);
	for (i = 0; i < slots; i++)
		test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
	set_page_dirty(dentry_page);

	/* we don't need to mark_inode_dirty now */
	F2FS_I(inode)->i_pino = dir->i_ino;
	update_inode(inode, page);
	f2fs_put_page(page, 1);

	update_parent_metadata(dir, inode, current_depth);
fail:
	up_write(&F2FS_I(inode)->i_sem);

	if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
		update_inode_page(dir);
		clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
	}
	kunmap(dentry_page);
	f2fs_put_page(dentry_page, 1);
	return err;
}
Beispiel #6
0
int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
					const void *value, size_t value_len)
{
	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
	struct f2fs_inode_info *fi = F2FS_I(inode);
	struct f2fs_xattr_header *header = NULL;
	struct f2fs_xattr_entry *here, *last;
	struct page *page;
	void *base_addr;
	int error, found, free, name_len, newsize;
	char *pval;

	if (name == NULL)
		return -EINVAL;
	name_len = strlen(name);

	if (value == NULL)
		value_len = 0;

	if (name_len > 255 || value_len > MAX_VALUE_LEN)
		return -ERANGE;

	mutex_lock_op(sbi, NODE_NEW);
	if (!fi->i_xattr_nid) {
		/* Allocate new attribute block */
		struct dnode_of_data dn;

		if (!alloc_nid(sbi, &fi->i_xattr_nid)) {
			mutex_unlock_op(sbi, NODE_NEW);
			return -ENOSPC;
		}
		set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid);
		mark_inode_dirty(inode);

		page = new_node_page(&dn, XATTR_NODE_OFFSET);
		if (IS_ERR(page)) {
			alloc_nid_failed(sbi, fi->i_xattr_nid);
			fi->i_xattr_nid = 0;
			mutex_unlock_op(sbi, NODE_NEW);
			return PTR_ERR(page);
		}

		alloc_nid_done(sbi, fi->i_xattr_nid);
		base_addr = page_address(page);
		header = XATTR_HDR(base_addr);
		header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC);
		header->h_refcount = cpu_to_le32(1);
	} else {
		/* The inode already has an extended attribute block. */
		page = get_node_page(sbi, fi->i_xattr_nid);
		if (IS_ERR(page)) {
			mutex_unlock_op(sbi, NODE_NEW);
			return PTR_ERR(page);
		}

		base_addr = page_address(page);
		header = XATTR_HDR(base_addr);
	}

	if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) {
		error = -EIO;
		goto cleanup;
	}

	/* find entry with wanted name. */
	found = 0;
	list_for_each_xattr(here, base_addr) {
		if (here->e_name_index != name_index)
			continue;
		if (here->e_name_len != name_len)
			continue;
		if (!memcmp(here->e_name, name, name_len)) {
			found = 1;
			break;
		}
	}

	last = here;

	while (!IS_XATTR_LAST_ENTRY(last))
		last = XATTR_NEXT_ENTRY(last);

	newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) +
			name_len + value_len);

	/* 1. Check space */
	if (value) {
		/* If value is NULL, it is remove operation.
		 * In case of update operation, we caculate free.
		 */
		free = MIN_OFFSET - ((char *)last - (char *)header);
		if (found)
			free = free - ENTRY_SIZE(here);

		if (free < newsize) {
			error = -ENOSPC;
			goto cleanup;
		}
	}

	/* 2. Remove old entry */
	if (found) {
		/* If entry is found, remove old entry.
		 * If not found, remove operation is not needed.
		 */
		struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here);
		int oldsize = ENTRY_SIZE(here);

		memmove(here, next, (char *)last - (char *)next);
		last = (struct f2fs_xattr_entry *)((char *)last - oldsize);
		memset(last, 0, oldsize);
	}

	/* 3. Write new entry */
	if (value) {
		/* Before we come here, old entry is removed.
		 * We just write new entry. */
		memset(last, 0, newsize);
		last->e_name_index = name_index;
		last->e_name_len = name_len;
		memcpy(last->e_name, name, name_len);
		pval = last->e_name + name_len;
		memcpy(pval, value, value_len);
		last->e_value_size = cpu_to_le16(value_len);
	}

	set_page_dirty(page);
	f2fs_put_page(page, 1);

	if (is_inode_flag_set(fi, FI_ACL_MODE)) {
		inode->i_mode = fi->i_acl_mode;
		inode->i_ctime = CURRENT_TIME_SEC;
		clear_inode_flag(fi, FI_ACL_MODE);
	}
	f2fs_write_inode(inode, NULL);
	mutex_unlock_op(sbi, NODE_NEW);

	return 0;
cleanup:
	f2fs_put_page(page, 1);
	mutex_unlock_op(sbi, NODE_NEW);
	return error;
}
Beispiel #7
0
int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
{
	struct f2fs_io_info fio = {
		.sbi = F2FS_I_SB(dn->inode),
		.type = DATA,
		.rw = WRITE_SYNC | REQ_PRIO,
		.page = page,
		.encrypted_page = NULL,
	};
	int dirty, err;

	if (!f2fs_exist_data(dn->inode))
		goto clear_out;

	err = f2fs_reserve_block(dn, 0);
	if (err)
		return err;

	f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));

	read_inline_data(page, dn->inode_page);
	set_page_dirty(page);

	/* clear dirty state */
	dirty = clear_page_dirty_for_io(page);

	/* write data page to try to make data consistent */
	set_page_writeback(page);
	fio.old_blkaddr = dn->data_blkaddr;
	write_data_page(dn, &fio);
	f2fs_wait_on_page_writeback(page, DATA, true);
	if (dirty)
		inode_dec_dirty_pages(dn->inode);

	/* this converted inline_data should be recovered. */
	set_inode_flag(dn->inode, FI_APPEND_WRITE);

	/* clear inline data and flag after data writeback */
	truncate_inline_inode(dn->inode_page, 0);
	clear_inline_node(dn->inode_page);
clear_out:
	stat_dec_inline_inode(dn->inode);
	f2fs_clear_inline_inode(dn->inode);
	f2fs_put_dnode(dn);
	return 0;
}

int f2fs_convert_inline_inode(struct inode *inode)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct dnode_of_data dn;
	struct page *ipage, *page;
	int err = 0;

	if (!f2fs_has_inline_data(inode))
		return 0;

	page = f2fs_grab_cache_page(inode->i_mapping, 0, false);
	if (!page)
		return -ENOMEM;

	f2fs_lock_op(sbi);

	ipage = get_node_page(sbi, inode->i_ino);
	if (IS_ERR(ipage)) {
		err = PTR_ERR(ipage);
		goto out;
	}

	set_new_dnode(&dn, inode, ipage, ipage, 0);

	if (f2fs_has_inline_data(inode))
		err = f2fs_convert_inline_page(&dn, page);

	f2fs_put_dnode(&dn);
out:
	f2fs_unlock_op(sbi);

	f2fs_put_page(page, 1);

	f2fs_balance_fs(sbi, dn.node_changed);

	return err;
}

int f2fs_write_inline_data(struct inode *inode, struct page *page)
{
	void *src_addr, *dst_addr;
	struct dnode_of_data dn;
	int err;

	set_new_dnode(&dn, inode, NULL, NULL, 0);
	err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
	if (err)
		return err;

	if (!f2fs_has_inline_data(inode)) {
		f2fs_put_dnode(&dn);
		return -EAGAIN;
	}

	f2fs_bug_on(F2FS_I_SB(inode), page->index);

	f2fs_wait_on_page_writeback(dn.inode_page, NODE, true);
	src_addr = kmap_atomic(page);
	dst_addr = inline_data_addr(dn.inode_page);
	memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
	kunmap_atomic(src_addr);
	set_page_dirty(dn.inode_page);

	set_inode_flag(inode, FI_APPEND_WRITE);
	set_inode_flag(inode, FI_DATA_EXIST);

	clear_inline_node(dn.inode_page);
	f2fs_put_dnode(&dn);
	return 0;
}
static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
						struct vm_fault *vmf)
{
	struct page *page = vmf->page;
	struct inode *inode = file_inode(vma->vm_file);
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct dnode_of_data dn;
	int err;

	f2fs_balance_fs(sbi);

	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);

	f2fs_bug_on(sbi, f2fs_has_inline_data(inode));

	/* block allocation */
	f2fs_lock_op(sbi);
	set_new_dnode(&dn, inode, NULL, NULL, 0);
	err = f2fs_reserve_block(&dn, page->index);
	if (err) {
		f2fs_unlock_op(sbi);
		goto out;
	}
	f2fs_put_dnode(&dn);
	f2fs_unlock_op(sbi);

	file_update_time(vma->vm_file);
	lock_page(page);
	if (unlikely(page->mapping != inode->i_mapping ||
			page_offset(page) > i_size_read(inode) ||
			!PageUptodate(page))) {
		unlock_page(page);
		err = -EFAULT;
		goto out;
	}

	/*
	 * check to see if the page is mapped already (no holes)
	 */
	if (PageMappedToDisk(page))
		goto mapped;

	/* page is wholly or partially inside EOF */
	if (((loff_t)(page->index + 1) << PAGE_CACHE_SHIFT) >
						i_size_read(inode)) {
		unsigned offset;
		offset = i_size_read(inode) & ~PAGE_CACHE_MASK;
		zero_user_segment(page, offset, PAGE_CACHE_SIZE);
	}
	set_page_dirty(page);
	SetPageUptodate(page);

	trace_f2fs_vm_page_mkwrite(page, DATA);
mapped:
	/* fill the page */
	f2fs_wait_on_page_writeback(page, DATA);

	/* wait for GCed encrypted page writeback */
	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
		f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);

	/* if gced page is attached, don't write to cold segment */
	clear_cold_data(page);
out:
	return block_page_mkwrite_return(err);
}
int truncate_blocks(struct inode *inode, u64 from, bool lock)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	unsigned int blocksize = inode->i_sb->s_blocksize;
	struct dnode_of_data dn;
	pgoff_t free_from;
	int count = 0, err = 0;
	struct page *ipage;
	bool truncate_page = false;

	trace_f2fs_truncate_blocks_enter(inode, from);

	free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1);

	if (lock)
		f2fs_lock_op(sbi);

	ipage = get_node_page(sbi, inode->i_ino);
	if (IS_ERR(ipage)) {
		err = PTR_ERR(ipage);
		goto out;
	}

	if (f2fs_has_inline_data(inode)) {
		if (truncate_inline_inode(ipage, from))
			set_page_dirty(ipage);
		f2fs_put_page(ipage, 1);
		truncate_page = true;
		goto out;
	}

	set_new_dnode(&dn, inode, ipage, NULL, 0);
	err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
	if (err) {
		if (err == -ENOENT)
			goto free_next;
		goto out;
	}

	count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));

	count -= dn.ofs_in_node;
	f2fs_bug_on(sbi, count < 0);

	if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
		truncate_data_blocks_range(&dn, count);
		free_from += count;
	}

	f2fs_put_dnode(&dn);
free_next:
	err = truncate_inode_blocks(inode, free_from);
out:
	if (lock)
		f2fs_unlock_op(sbi);

	/* lastly zero out the first data page */
	if (!err)
		err = truncate_partial_data_page(inode, from, truncate_page);

	trace_f2fs_truncate_blocks_exit(inode, err);
	return err;
}
static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct page *page = vmf->page;
	struct inode *inode = file_inode(vma->vm_file);
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_sbd *sdp = GFS2_SB(inode);
	unsigned long last_index;
	u64 pos = page->index << PAGE_CACHE_SHIFT;
	unsigned int data_blocks, ind_blocks, rblocks;
	struct gfs2_holder gh;
	struct gfs2_qadata *qa;
	loff_t size;
	int ret;

	/* Wait if fs is frozen. This is racy so we check again later on
	 * and retry if the fs has been frozen after the page lock has
	 * been acquired
	 */
	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);

	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
	ret = gfs2_glock_nq(&gh);
	if (ret)
		goto out;

	set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
	set_bit(GIF_SW_PAGED, &ip->i_flags);

	if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) {
		lock_page(page);
		if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
			ret = -EAGAIN;
			unlock_page(page);
		}
		goto out_unlock;
	}

	ret = -ENOMEM;
	qa = gfs2_qadata_get(ip);
	if (qa == NULL)
		goto out_unlock;

	ret = gfs2_quota_lock_check(ip);
	if (ret)
		goto out_alloc_put;
	gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
	ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
	if (ret)
		goto out_quota_unlock;

	rblocks = RES_DINODE + ind_blocks;
	if (gfs2_is_jdata(ip))
		rblocks += data_blocks ? data_blocks : 1;
	if (ind_blocks || data_blocks) {
		rblocks += RES_STATFS + RES_QUOTA;
		rblocks += gfs2_rg_blocks(ip);
	}
	ret = gfs2_trans_begin(sdp, rblocks, 0);
	if (ret)
		goto out_trans_fail;

	lock_page(page);
	ret = -EINVAL;
	size = i_size_read(inode);
	last_index = (size - 1) >> PAGE_CACHE_SHIFT;
	/* Check page index against inode size */
	if (size == 0 || (page->index > last_index))
		goto out_trans_end;

	ret = -EAGAIN;
	/* If truncated, we must retry the operation, we may have raced
	 * with the glock demotion code.
	 */
	if (!PageUptodate(page) || page->mapping != inode->i_mapping)
		goto out_trans_end;

	/* Unstuff, if required, and allocate backing blocks for page */
	ret = 0;
	if (gfs2_is_stuffed(ip))
		ret = gfs2_unstuff_dinode(ip, page);
	if (ret == 0)
		ret = gfs2_allocate_page_backing(page);

out_trans_end:
	if (ret)
		unlock_page(page);
	gfs2_trans_end(sdp);
out_trans_fail:
	gfs2_inplace_release(ip);
out_quota_unlock:
	gfs2_quota_unlock(ip);
out_alloc_put:
	gfs2_qadata_put(ip);
out_unlock:
	gfs2_glock_dq(&gh);
out:
	gfs2_holder_uninit(&gh);
	if (ret == 0) {
		set_page_dirty(page);
		/* This check must be post dropping of transaction lock */
		if (inode->i_sb->s_frozen == SB_UNFROZEN) {
			wait_on_page_writeback(page);
		} else {
			ret = -EAGAIN;
			unlock_page(page);
		}
	}
	return block_page_mkwrite_return(ret);
}
static int ramdisk_commit_write(struct file *file, struct page *page,
				unsigned offset, unsigned to)
{
	set_page_dirty(page);
	return 0;
}
static void
free_pagelist(PAGELIST_T *pagelist, int actual)
{
        unsigned long *need_release;
	struct page **pages;
	unsigned int num_pages, i;

	vchiq_log_trace(vchiq_arm_log_level,
		"free_pagelist - %x, %d", (unsigned int)pagelist, actual);

	num_pages =
		(pagelist->length + pagelist->offset + PAGE_SIZE - 1) /
		PAGE_SIZE;

        need_release = (unsigned long *)(pagelist->addrs + num_pages);
	pages = (struct page **)(pagelist->addrs + num_pages + 1);

	/* Deal with any partial cache lines (fragments) */
	if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) {
		FRAGMENTS_T *fragments = g_fragments_base +
			(pagelist->type - PAGELIST_READ_WITH_FRAGMENTS);
		int head_bytes, tail_bytes;
		head_bytes = (CACHE_LINE_SIZE - pagelist->offset) &
			(CACHE_LINE_SIZE - 1);
		tail_bytes = (pagelist->offset + actual) &
			(CACHE_LINE_SIZE - 1);

		if ((actual >= 0) && (head_bytes != 0)) {
			if (head_bytes > actual)
				head_bytes = actual;

			memcpy((char *)page_address(pages[0]) +
				pagelist->offset,
				fragments->headbuf,
				head_bytes);
		}
		if ((actual >= 0) && (head_bytes < actual) &&
			(tail_bytes != 0)) {
			memcpy((char *)page_address(pages[num_pages - 1]) +
				((pagelist->offset + actual) &
				(PAGE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1)),
				fragments->tailbuf, tail_bytes);
		}

		down(&g_free_fragments_mutex);
		*(FRAGMENTS_T **) fragments = g_free_fragments;
		g_free_fragments = fragments;
		up(&g_free_fragments_mutex);
		up(&g_free_fragments_sema);
	}

        if (*need_release) {
		for (i = 0; i < num_pages; i++) {
			if (pagelist->type != PAGELIST_WRITE)
				set_page_dirty(pages[i]);

			page_cache_release(pages[i]);
		}
        }

	kfree(pagelist);
}
int intel_scu_ipc_write_umip(u8 *data, int len, int offset)
{
	int i, ret = 0, offset_align;
	int remainder, len_align = 0;
	u32 dptr, sptr, cmd;
	u8 cs, tbl_cs = 0, *buf = NULL;
	Sector sect;
	struct block_device *bdev;
	char *buffer = NULL;
	int *holderId = NULL;
	int sect_no;
	u8 checksum;

	if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_CLOVERVIEW) {

		/* Opening the mmcblk0boot0 */
		bdev = get_emmc_bdev();
		if (bdev == NULL) {
			pr_err("%s: get_emmc failed!\n", __func__);
			return -ENODEV;
		}

		/* make sure the block device is open rw */
		ret = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, holderId);
		if (ret < 0) {
			pr_err("%s: blk_dev_get failed!\n", __func__);
			return -ret;
		}

		/* get memmap of the UMIP header */
		sect_no = offset / SECTOR_SIZE;
		remainder = offset % SECTOR_SIZE;
		buffer = read_dev_sector(bdev, sect_no +
					UMIP_HEADER_HEADROOM_SECTOR, &sect);

		/* Shouldn't need to access UMIP sector 0/1 */
		if (sect_no < UMIP_TOTAL_HEADER_SECTOR_NO) {
			pr_err("invalid umip offset\n");
			ret = -EINVAL;
			goto bd_put;
		} else if (data == NULL || buffer == NULL) {
			pr_err("buffer is empty\n");
			ret = -ENODEV;
			goto bd_put;
		} else if (len > (SECTOR_SIZE - remainder)) {
			pr_err("too much data to write\n");
			ret = -EINVAL;
			goto bd_put;
		}

		lock_page(sect.v);
		memcpy(buffer + remainder, data, len);
		checksum = calc_checksum(buffer, SECTOR_SIZE);

		set_page_dirty(sect.v);
		unlock_page(sect.v);
		sync_blockdev(bdev);
		put_dev_sector(sect);

		/*
		 * Updating the checksum, sector 0 (starting from UMIP
		 * offset 0x08), we maintains 4 bytes for tracking each of
		 * sector changes individually. For example, the dword at
		 * offset 0x08 is used to checksum data integrity of sector
		 * number 2, and so on so forth. It's worthnoting that only
		 * the first byte in each 4 bytes stores checksum.
		 * For detail, please check CTP FAS UMIP header definition
		 */

		buffer = read_dev_sector(bdev, UMIP_HEADER_SECTOR +
					UMIP_HEADER_HEADROOM_SECTOR, &sect);

		if (buffer == NULL) {
			pr_err("buffer is empty\n");
			ret = -ENODEV;
			goto bd_put;
		}

		lock_page(sect.v);
		memcpy(buffer + 4 * (sect_no - UMIP_TOTAL_HEADER_SECTOR_NO) +
			UMIP_START_CHKSUM_ADDR, &checksum, 1/* one byte */);

		/* Change UMIP prologue chksum to zero */
		*(buffer + UMIP_HEADER_CHKSUM_ADDR) = 0;

		for (i = 0; i < UMIP_TOTAL_CHKSUM_ENTRY; i++) {
			tbl_cs ^= *(u8 *)(buffer + 4 * i +
					UMIP_START_CHKSUM_ADDR);
		}

		/* Finish up with re-calcuating UMIP prologue checksum */
		cs = dword_to_byte_chksum(xorblock((u32 *)buffer,
							SECTOR_SIZE));

		*(buffer + UMIP_HEADER_CHKSUM_ADDR) = tbl_cs ^ cs;

		set_page_dirty(sect.v);
		unlock_page(sect.v);
		sync_blockdev(bdev);
bd_put:
		if (buffer)
			put_dev_sector(sect);

		blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
		return ret;
	} else {

		if (!intel_mip_base)
			return -ENODEV;

		if (offset + len > IPC_MIP_MAX_ADDR)
			return -EINVAL;

		rpmsg_global_lock();

		offset_align = offset & (~0x3);
		len_align = (len + (offset - offset_align) + 3) & (~0x3);

		if (len != len_align) {
			buf = kzalloc(len_align, GFP_KERNEL);
			if (!buf) {
				pr_err("Alloc memory failed\n");
				ret = -ENOMEM;
				goto fail;
			}
			ret = read_mip(buf, len_align, offset_align, 0);
			if (ret)
				goto fail;
			memcpy(buf + offset - offset_align, data, len);
		} else {
			buf = data;
		}

		dptr = offset_align;
		sptr = len_align / 4;
		cmd = IPC_CMD_UMIP_WR << 12 | IPCMSG_MIP_ACCESS;

		memcpy(intel_mip_base, buf, len_align);

		ret = rpmsg_send_raw_command(mip_instance, cmd, 0, NULL,
			NULL, 0, 0, sptr, dptr);

fail:
		if (buf && len_align != len)
			kfree(buf);

		rpmsg_global_unlock();

		return ret;
	}
}
Beispiel #14
0
static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
					struct file *filp,
					struct f2fs_defragment *range)
{
	struct inode *inode = file_inode(filp);
	struct f2fs_map_blocks map = { .m_next_pgofs = NULL };
	struct extent_info ei;
	pgoff_t pg_start, pg_end;
	unsigned int blk_per_seg = sbi->blocks_per_seg;
	unsigned int total = 0, sec_num;
	unsigned int pages_per_sec = sbi->segs_per_sec * blk_per_seg;
	block_t blk_end = 0;
	bool fragmented = false;
	int err;

	/* if in-place-update policy is enabled, don't waste time here */
	if (need_inplace_update(inode))
		return -EINVAL;

	pg_start = range->start >> PAGE_CACHE_SHIFT;
	pg_end = (range->start + range->len) >> PAGE_CACHE_SHIFT;

	f2fs_balance_fs(sbi, true);

	inode_lock(inode);

	/* writeback all dirty pages in the range */
	err = filemap_write_and_wait_range(inode->i_mapping, range->start,
						range->start + range->len - 1);
	if (err)
		goto out;

	/*
	 * lookup mapping info in extent cache, skip defragmenting if physical
	 * block addresses are continuous.
	 */
	if (f2fs_lookup_extent_cache(inode, pg_start, &ei)) {
		if (ei.fofs + ei.len >= pg_end)
			goto out;
	}

	map.m_lblk = pg_start;

	/*
	 * lookup mapping info in dnode page cache, skip defragmenting if all
	 * physical block addresses are continuous even if there are hole(s)
	 * in logical blocks.
	 */
	while (map.m_lblk < pg_end) {
		map.m_len = pg_end - map.m_lblk;
		err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ);
		if (err)
			goto out;

		if (!(map.m_flags & F2FS_MAP_FLAGS)) {
			map.m_lblk++;
			continue;
		}

		if (blk_end && blk_end != map.m_pblk) {
			fragmented = true;
			break;
		}
		blk_end = map.m_pblk + map.m_len;

		map.m_lblk += map.m_len;
	}

	if (!fragmented)
		goto out;

	map.m_lblk = pg_start;
	map.m_len = pg_end - pg_start;

	sec_num = (map.m_len + pages_per_sec - 1) / pages_per_sec;

	/*
	 * make sure there are enough free section for LFS allocation, this can
	 * avoid defragment running in SSR mode when free section are allocated
	 * intensively
	 */
	if (has_not_enough_free_secs(sbi, sec_num)) {
		err = -EAGAIN;
		goto out;
	}

	while (map.m_lblk < pg_end) {
		pgoff_t idx;
		int cnt = 0;

do_map:
		map.m_len = pg_end - map.m_lblk;
		err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ);
		if (err)
			goto clear_out;

		if (!(map.m_flags & F2FS_MAP_FLAGS)) {
			map.m_lblk++;
			continue;
		}

		set_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);

		idx = map.m_lblk;
		while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) {
			struct page *page;

			page = get_lock_data_page(inode, idx, true);
			if (IS_ERR(page)) {
				err = PTR_ERR(page);
				goto clear_out;
			}

			set_page_dirty(page);
			f2fs_put_page(page, 1);

			idx++;
			cnt++;
			total++;
		}

		map.m_lblk = idx;

		if (idx < pg_end && cnt < blk_per_seg)
			goto do_map;

		clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);

		err = filemap_fdatawrite(inode->i_mapping);
		if (err)
			goto out;
	}
clear_out:
	clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
out:
	inode_unlock(inode);
	if (!err)
		range->len = (u64)total << PAGE_CACHE_SHIFT;
	return err;
}
Beispiel #15
0
static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
				void *txattr_addr, struct page *ipage)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	size_t inline_size = 0;
	void *xattr_addr;
	struct page *xpage;
	nid_t new_nid = 0;
	int err;

	inline_size = inline_xattr_size(inode);

	if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid)
		if (!alloc_nid(sbi, &new_nid))
			return -ENOSPC;

	/* write to inline xattr */
	if (inline_size) {
		struct page *page = NULL;
		void *inline_addr;

		if (ipage) {
			inline_addr = inline_xattr_addr(ipage);
			f2fs_wait_on_page_writeback(ipage, NODE, true);
		} else {
			page = get_node_page(sbi, inode->i_ino);
			if (IS_ERR(page)) {
				alloc_nid_failed(sbi, new_nid);
				return PTR_ERR(page);
			}
			inline_addr = inline_xattr_addr(page);
			f2fs_wait_on_page_writeback(page, NODE, true);
		}
		memcpy(inline_addr, txattr_addr, inline_size);
		f2fs_put_page(page, 1);

		/* no need to use xattr node block */
		if (hsize <= inline_size) {
			err = truncate_xattr_node(inode, ipage);
			alloc_nid_failed(sbi, new_nid);
			return err;
		}
	}

	/* write to xattr node block */
	if (F2FS_I(inode)->i_xattr_nid) {
		xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
		if (IS_ERR(xpage)) {
			alloc_nid_failed(sbi, new_nid);
			return PTR_ERR(xpage);
		}
		f2fs_bug_on(sbi, new_nid);
		f2fs_wait_on_page_writeback(xpage, NODE, true);
	} else {
		struct dnode_of_data dn;
		set_new_dnode(&dn, inode, NULL, NULL, new_nid);
		xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage);
		if (IS_ERR(xpage)) {
			alloc_nid_failed(sbi, new_nid);
			return PTR_ERR(xpage);
		}
		alloc_nid_done(sbi, new_nid);
	}

	xattr_addr = page_address(xpage);
	memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE -
						sizeof(struct node_footer));
	set_page_dirty(xpage);
	f2fs_put_page(xpage, 1);

	/* need to checkpoint during fsync */
	F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
	return 0;
}
static int __exchange_data_block(struct inode *inode, pgoff_t src,
					pgoff_t dst, bool full)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct dnode_of_data dn;
	block_t new_addr;
	bool do_replace = false;
	int ret;

	set_new_dnode(&dn, inode, NULL, NULL, 0);
	ret = get_dnode_of_data(&dn, src, LOOKUP_NODE_RA);
	if (ret && ret != -ENOENT) {
		return ret;
	} else if (ret == -ENOENT) {
		new_addr = NULL_ADDR;
	} else {
		new_addr = dn.data_blkaddr;
		if (!is_checkpointed_data(sbi, new_addr)) {
			dn.data_blkaddr = NULL_ADDR;
			/* do not invalidate this block address */
			set_data_blkaddr(&dn);
			f2fs_update_extent_cache(&dn);
			do_replace = true;
		}
		f2fs_put_dnode(&dn);
	}

	if (new_addr == NULL_ADDR)
		return full ? truncate_hole(inode, dst, dst + 1) : 0;

	if (do_replace) {
		struct page *ipage = get_node_page(sbi, inode->i_ino);
		struct node_info ni;

		if (IS_ERR(ipage)) {
			ret = PTR_ERR(ipage);
			goto err_out;
		}

		set_new_dnode(&dn, inode, ipage, NULL, 0);
		ret = f2fs_reserve_block(&dn, dst);
		if (ret)
			goto err_out;

		truncate_data_blocks_range(&dn, 1);

		get_node_info(sbi, dn.nid, &ni);
		f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr,
				ni.version, true);
		f2fs_put_dnode(&dn);
	} else {
		struct page *psrc, *pdst;

		psrc = get_lock_data_page(inode, src, true);
		if (IS_ERR(psrc))
			return PTR_ERR(psrc);
		pdst = get_new_data_page(inode, NULL, dst, false);
		if (IS_ERR(pdst)) {
			f2fs_put_page(psrc, 1);
			return PTR_ERR(pdst);
		}
		f2fs_copy_page(psrc, pdst);
		set_page_dirty(pdst);
		f2fs_put_page(pdst, 1);
		f2fs_put_page(psrc, 1);

		return truncate_hole(inode, src, src + 1);
	}
	return 0;

err_out:
	if (!get_dnode_of_data(&dn, src, LOOKUP_NODE)) {
		dn.data_blkaddr = new_addr;
		set_data_blkaddr(&dn);
		f2fs_update_extent_cache(&dn);
		f2fs_put_dnode(&dn);
	}
	return ret;
}
Beispiel #17
0
static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct page *page = vmf->page;
	struct inode *inode = file_inode(vma->vm_file);
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_sbd *sdp = GFS2_SB(inode);
	struct gfs2_alloc_parms ap = { .aflags = 0, };
	unsigned long last_index;
	u64 pos = page->index << PAGE_CACHE_SHIFT;
	unsigned int data_blocks, ind_blocks, rblocks;
	struct gfs2_holder gh;
	loff_t size;
	int ret;

	sb_start_pagefault(inode->i_sb);

	/* Update file times before taking page lock */
	file_update_time(vma->vm_file);

	ret = get_write_access(inode);
	if (ret)
		goto out;

	ret = gfs2_rs_alloc(ip);
	if (ret)
		goto out_write_access;

	gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE);

	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
	ret = gfs2_glock_nq(&gh);
	if (ret)
		goto out_uninit;

	set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
	set_bit(GIF_SW_PAGED, &ip->i_flags);

	if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) {
		lock_page(page);
		if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
			ret = -EAGAIN;
			unlock_page(page);
		}
		goto out_unlock;
	}

	ret = gfs2_rindex_update(sdp);
	if (ret)
		goto out_unlock;

	gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
	ap.target = data_blocks + ind_blocks;
	ret = gfs2_quota_lock_check(ip, &ap);
	if (ret)
		goto out_unlock;
	ret = gfs2_inplace_reserve(ip, &ap);
	if (ret)
		goto out_quota_unlock;

	rblocks = RES_DINODE + ind_blocks;
	if (gfs2_is_jdata(ip))
		rblocks += data_blocks ? data_blocks : 1;
	if (ind_blocks || data_blocks) {
		rblocks += RES_STATFS + RES_QUOTA;
		rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
	}
	ret = gfs2_trans_begin(sdp, rblocks, 0);
	if (ret)
		goto out_trans_fail;

	lock_page(page);
	ret = -EINVAL;
	size = i_size_read(inode);
	last_index = (size - 1) >> PAGE_CACHE_SHIFT;
	/* Check page index against inode size */
	if (size == 0 || (page->index > last_index))
		goto out_trans_end;

	ret = -EAGAIN;
	/* If truncated, we must retry the operation, we may have raced
	 * with the glock demotion code.
	 */
	if (!PageUptodate(page) || page->mapping != inode->i_mapping)
		goto out_trans_end;

	/* Unstuff, if required, and allocate backing blocks for page */
	ret = 0;
	if (gfs2_is_stuffed(ip))
		ret = gfs2_unstuff_dinode(ip, page);
	if (ret == 0)
		ret = gfs2_allocate_page_backing(page);

out_trans_end:
	if (ret)
		unlock_page(page);
	gfs2_trans_end(sdp);
out_trans_fail:
	gfs2_inplace_release(ip);
out_quota_unlock:
	gfs2_quota_unlock(ip);
out_unlock:
	gfs2_glock_dq(&gh);
out_uninit:
	gfs2_holder_uninit(&gh);
	if (ret == 0) {
		set_page_dirty(page);
		wait_for_stable_page(page);
	}
out_write_access:
	put_write_access(inode);
out:
	sb_end_pagefault(inode->i_sb);
	return block_page_mkwrite_return(ret);
}

static const struct vm_operations_struct gfs2_vm_ops = {
	.fault = filemap_fault,
	.map_pages = filemap_map_pages,
	.page_mkwrite = gfs2_page_mkwrite,
};

/**
 * gfs2_mmap -
 * @file: The file to map
 * @vma: The VMA which described the mapping
 *
 * There is no need to get a lock here unless we should be updating
 * atime. We ignore any locking errors since the only consequence is
 * a missed atime update (which will just be deferred until later).
 *
 * Returns: 0
 */

static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
{
	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);

	if (!(file->f_flags & O_NOATIME) &&
	    !IS_NOATIME(&ip->i_inode)) {
		struct gfs2_holder i_gh;
		int error;

		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
					   &i_gh);
		if (error)
			return error;
		/* grab lock to update inode */
		gfs2_glock_dq_uninit(&i_gh);
		file_accessed(file);
	}
	vma->vm_ops = &gfs2_vm_ops;

	return 0;
}

/**
 * gfs2_open_common - This is common to open and atomic_open
 * @inode: The inode being opened
 * @file: The file being opened
 *
 * This maybe called under a glock or not depending upon how it has
 * been called. We must always be called under a glock for regular
 * files, however. For other file types, it does not matter whether
 * we hold the glock or not.
 *
 * Returns: Error code or 0 for success
 */

int gfs2_open_common(struct inode *inode, struct file *file)
{
	struct gfs2_file *fp;
	int ret;

	if (S_ISREG(inode->i_mode)) {
		ret = generic_file_open(inode, file);
		if (ret)
			return ret;
	}

	fp = kzalloc(sizeof(struct gfs2_file), GFP_NOFS);
	if (!fp)
		return -ENOMEM;

	mutex_init(&fp->f_fl_mutex);

	gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
	file->private_data = fp;
	return 0;
}

/**
 * gfs2_open - open a file
 * @inode: the inode to open
 * @file: the struct file for this opening
 *
 * After atomic_open, this function is only used for opening files
 * which are already cached. We must still get the glock for regular
 * files to ensure that we have the file size uptodate for the large
 * file check which is in the common code. That is only an issue for
 * regular files though.
 *
 * Returns: errno
 */

static int gfs2_open(struct inode *inode, struct file *file)
{
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_holder i_gh;
	int error;
	bool need_unlock = false;

	if (S_ISREG(ip->i_inode.i_mode)) {
		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
					   &i_gh);
		if (error)
			return error;
		need_unlock = true;
	}

	error = gfs2_open_common(inode, file);

	if (need_unlock)
		gfs2_glock_dq_uninit(&i_gh);

	return error;
}

/**
 * gfs2_release - called to close a struct file
 * @inode: the inode the struct file belongs to
 * @file: the struct file being closed
 *
 * Returns: errno
 */

static int gfs2_release(struct inode *inode, struct file *file)
{
	struct gfs2_inode *ip = GFS2_I(inode);

	kfree(file->private_data);
	file->private_data = NULL;

	if (!(file->f_mode & FMODE_WRITE))
		return 0;

	gfs2_rs_delete(ip, &inode->i_writecount);
	return 0;
}

/**
 * gfs2_fsync - sync the dirty data for a file (across the cluster)
 * @file: the file that points to the dentry
 * @start: the start position in the file to sync
 * @end: the end position in the file to sync
 * @datasync: set if we can ignore timestamp changes
 *
 * We split the data flushing here so that we don't wait for the data
 * until after we've also sent the metadata to disk. Note that for
 * data=ordered, we will write & wait for the data at the log flush
 * stage anyway, so this is unlikely to make much of a difference
 * except in the data=writeback case.
 *
 * If the fdatawrite fails due to any reason except -EIO, we will
 * continue the remainder of the fsync, although we'll still report
 * the error at the end. This is to match filemap_write_and_wait_range()
 * behaviour.
 *
 * Returns: errno
 */

static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
		      int datasync)
{
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = mapping->host;
	int sync_state = inode->i_state & I_DIRTY_ALL;
	struct gfs2_inode *ip = GFS2_I(inode);
	int ret = 0, ret1 = 0;

	if (mapping->nrpages) {
		ret1 = filemap_fdatawrite_range(mapping, start, end);
		if (ret1 == -EIO)
			return ret1;
	}

	if (!gfs2_is_jdata(ip))
		sync_state &= ~I_DIRTY_PAGES;
	if (datasync)
		sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME);

	if (sync_state) {
		ret = sync_inode_metadata(inode, 1);
		if (ret)
			return ret;
		if (gfs2_is_jdata(ip))
			filemap_write_and_wait(mapping);
		gfs2_ail_flush(ip->i_gl, 1);
	}

	if (mapping->nrpages)
		ret = filemap_fdatawait_range(mapping, start, end);

	return ret ? ret : ret1;
}

/**
 * gfs2_file_write_iter - Perform a write to a file
 * @iocb: The io context
 * @iov: The data to write
 * @nr_segs: Number of @iov segments
 * @pos: The file position
 *
 * We have to do a lock/unlock here to refresh the inode size for
 * O_APPEND writes, otherwise we can land up writing at the wrong
 * offset. There is still a race, but provided the app is using its
 * own file locking, this will make O_APPEND work as expected.
 *
 */

static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
	struct file *file = iocb->ki_filp;
	struct gfs2_inode *ip = GFS2_I(file_inode(file));
	int ret;

	ret = gfs2_rs_alloc(ip);
	if (ret)
		return ret;

	gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from));

	if (iocb->ki_flags & IOCB_APPEND) {
		struct gfs2_holder gh;

		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
		if (ret)
			return ret;
		gfs2_glock_dq_uninit(&gh);
	}

	return generic_file_write_iter(iocb, from);
}

static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
			   int mode)
{
	struct gfs2_inode *ip = GFS2_I(inode);
	struct buffer_head *dibh;
	int error;
	unsigned int nr_blks;
	sector_t lblock = offset >> inode->i_blkbits;

	error = gfs2_meta_inode_buffer(ip, &dibh);
	if (unlikely(error))
		return error;

	gfs2_trans_add_meta(ip->i_gl, dibh);

	if (gfs2_is_stuffed(ip)) {
		error = gfs2_unstuff_dinode(ip, NULL);
		if (unlikely(error))
			goto out;
	}

	while (len) {
		struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
		bh_map.b_size = len;
		set_buffer_zeronew(&bh_map);

		error = gfs2_block_map(inode, lblock, &bh_map, 1);
		if (unlikely(error))
			goto out;
		len -= bh_map.b_size;
		nr_blks = bh_map.b_size >> inode->i_blkbits;
		lblock += nr_blks;
		if (!buffer_new(&bh_map))
			continue;
		if (unlikely(!buffer_zeronew(&bh_map))) {
			error = -EIO;
			goto out;
		}
	}
out:
	brelse(dibh);
	return error;
}
/**
 * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of
 *                     blocks, determine how many bytes can be written.
 * @ip:          The inode in question.
 * @len:         Max cap of bytes. What we return in *len must be <= this.
 * @data_blocks: Compute and return the number of data blocks needed
 * @ind_blocks:  Compute and return the number of indirect blocks needed
 * @max_blocks:  The total blocks available to work with.
 *
 * Returns: void, but @len, @data_blocks and @ind_blocks are filled in.
 */
static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len,
			    unsigned int *data_blocks, unsigned int *ind_blocks,
			    unsigned int max_blocks)
{
	loff_t max = *len;
	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
	unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);

	for (tmp = max_data; tmp > sdp->sd_diptrs;) {
		tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
		max_data -= tmp;
	}

	*data_blocks = max_data;
	*ind_blocks = max_blocks - max_data;
	*len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
	if (*len > max) {
		*len = max;
		gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
	}
}

static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
	struct inode *inode = file_inode(file);
	struct gfs2_sbd *sdp = GFS2_SB(inode);
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_alloc_parms ap = { .aflags = 0, };
	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
	loff_t bytes, max_bytes, max_blks = UINT_MAX;
	int error;
	const loff_t pos = offset;
	const loff_t count = len;
	loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
	loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
	loff_t max_chunk_size = UINT_MAX & bsize_mask;

	next = (next + 1) << sdp->sd_sb.sb_bsize_shift;

	offset &= bsize_mask;

	len = next - offset;
	bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
	if (!bytes)
		bytes = UINT_MAX;
	bytes &= bsize_mask;
	if (bytes == 0)
		bytes = sdp->sd_sb.sb_bsize;

	gfs2_size_hint(file, offset, len);

	gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
	ap.min_target = data_blocks + ind_blocks;

	while (len > 0) {
		if (len < bytes)
			bytes = len;
		if (!gfs2_write_alloc_required(ip, offset, bytes)) {
			len -= bytes;
			offset += bytes;
			continue;
		}

		/* We need to determine how many bytes we can actually
		 * fallocate without exceeding quota or going over the
		 * end of the fs. We start off optimistically by assuming
		 * we can write max_bytes */
		max_bytes = (len > max_chunk_size) ? max_chunk_size : len;

		/* Since max_bytes is most likely a theoretical max, we
		 * calculate a more realistic 'bytes' to serve as a good
		 * starting point for the number of bytes we may be able
		 * to write */
		gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
		ap.target = data_blocks + ind_blocks;

		error = gfs2_quota_lock_check(ip, &ap);
		if (error)
			return error;
		/* ap.allowed tells us how many blocks quota will allow
		 * us to write. Check if this reduces max_blks */
		if (ap.allowed && ap.allowed < max_blks)
			max_blks = ap.allowed;

		error = gfs2_inplace_reserve(ip, &ap);
		if (error)
			goto out_qunlock;

		/* check if the selected rgrp limits our max_blks further */
		if (ap.allowed && ap.allowed < max_blks)
			max_blks = ap.allowed;

		/* Almost done. Calculate bytes that can be written using
		 * max_blks. We also recompute max_bytes, data_blocks and
		 * ind_blocks */
		calc_max_reserv(ip, &max_bytes, &data_blocks,
				&ind_blocks, max_blks);

		rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
			  RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
		if (gfs2_is_jdata(ip))
			rblocks += data_blocks ? data_blocks : 1;

		error = gfs2_trans_begin(sdp, rblocks,
					 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
		if (error)
			goto out_trans_fail;

		error = fallocate_chunk(inode, offset, max_bytes, mode);
		gfs2_trans_end(sdp);

		if (error)
			goto out_trans_fail;

		len -= max_bytes;
		offset += max_bytes;
		gfs2_inplace_release(ip);
		gfs2_quota_unlock(ip);
	}

	if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size) {
		i_size_write(inode, pos + count);
		/* Marks the inode as dirty */
		file_update_time(file);
	}

	return generic_write_sync(file, pos, count);

out_trans_fail:
	gfs2_inplace_release(ip);
out_qunlock:
	gfs2_quota_unlock(ip);
	return error;
}

static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
	struct inode *inode = file_inode(file);
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_holder gh;
	int ret;

	if ((mode & ~FALLOC_FL_KEEP_SIZE) || gfs2_is_jdata(ip))
		return -EOPNOTSUPP;

	mutex_lock(&inode->i_mutex);

	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
	ret = gfs2_glock_nq(&gh);
	if (ret)
		goto out_uninit;

	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
	    (offset + len) > inode->i_size) {
		ret = inode_newsize_ok(inode, offset + len);
		if (ret)
			goto out_unlock;
	}

	ret = get_write_access(inode);
	if (ret)
		goto out_unlock;

	ret = gfs2_rs_alloc(ip);
	if (ret)
		goto out_putw;

	ret = __gfs2_fallocate(file, mode, offset, len);
	if (ret)
		gfs2_rs_deltree(ip->i_res);
out_putw:
	put_write_access(inode);
out_unlock:
	gfs2_glock_dq(&gh);
out_uninit:
	gfs2_holder_uninit(&gh);
	mutex_unlock(&inode->i_mutex);
	return ret;
}

static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
				      struct file *out, loff_t *ppos,
				      size_t len, unsigned int flags)
{
	int error;
	struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);

	error = gfs2_rs_alloc(ip);
	if (error)
		return (ssize_t)error;

	gfs2_size_hint(out, *ppos, len);

	return iter_file_splice_write(pipe, out, ppos, len, flags);
}

#ifdef CONFIG_GFS2_FS_LOCKING_DLM

/**
 * gfs2_lock - acquire/release a posix lock on a file
 * @file: the file pointer
 * @cmd: either modify or retrieve lock state, possibly wait
 * @fl: type and range of lock
 *
 * Returns: errno
 */

static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
{
	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
	struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
	struct lm_lockstruct *ls = &sdp->sd_lockstruct;

	if (!(fl->fl_flags & FL_POSIX))
		return -ENOLCK;
	if (__mandatory_lock(&ip->i_inode) && fl->fl_type != F_UNLCK)
		return -ENOLCK;

	if (cmd == F_CANCELLK) {
		/* Hack: */
		cmd = F_SETLK;
		fl->fl_type = F_UNLCK;
	}
	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
		if (fl->fl_type == F_UNLCK)
			locks_lock_file_wait(file, fl);
		return -EIO;
	}
	if (IS_GETLK(cmd))
		return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
	else if (fl->fl_type == F_UNLCK)
		return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl);
	else
		return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl);
}

static int do_flock(struct file *file, int cmd, struct file_lock *fl)
{
	struct gfs2_file *fp = file->private_data;
	struct gfs2_holder *fl_gh = &fp->f_fl_gh;
	struct gfs2_inode *ip = GFS2_I(file_inode(file));
	struct gfs2_glock *gl;
	unsigned int state;
	int flags;
	int error = 0;
	int sleeptime;

	state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
	flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY_1CB) | GL_EXACT;

	mutex_lock(&fp->f_fl_mutex);

	gl = fl_gh->gh_gl;
	if (gl) {
		if (fl_gh->gh_state == state)
			goto out;
		locks_lock_file_wait(file,
				     &(struct file_lock){.fl_type = F_UNLCK});
		gfs2_glock_dq(fl_gh);
		gfs2_holder_reinit(state, flags, fl_gh);
	} else {
Beispiel #18
0
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
    struct inode *inode = file->f_mapping->host;
    struct f2fs_inode_info *fi = F2FS_I(inode);
    struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
    nid_t ino = inode->i_ino;
    int ret = 0;
    bool need_cp = false;
    struct writeback_control wbc = {
        .sync_mode = WB_SYNC_ALL,
        .nr_to_write = LONG_MAX,
        .for_reclaim = 0,
    };

    if (unlikely(f2fs_readonly(inode->i_sb)))
        return 0;

    trace_f2fs_sync_file_enter(inode);

    /* if fdatasync is triggered, let's do in-place-update */
    if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
        set_inode_flag(fi, FI_NEED_IPU);
    ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
    clear_inode_flag(fi, FI_NEED_IPU);

    if (ret) {
        trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
        return ret;
    }

    /* if the inode is dirty, let's recover all the time */
    if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) {
        update_inode_page(inode);
        goto go_write;
    }

    /*
     * if there is no written data, don't waste time to write recovery info.
     */
    if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
            !exist_written_data(sbi, ino, APPEND_INO)) {

        /* it may call write_inode just prior to fsync */
        if (need_inode_page_update(sbi, ino))
            goto go_write;

        if (is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
                exist_written_data(sbi, ino, UPDATE_INO))
            goto flush_out;
        goto out;
    }
go_write:
    /* guarantee free sections for fsync */
    f2fs_balance_fs(sbi);

    /*
     * Both of fdatasync() and fsync() are able to be recovered from
     * sudden-power-off.
     */
    down_read(&fi->i_sem);
    need_cp = need_do_checkpoint(inode);
    up_read(&fi->i_sem);

    if (need_cp) {
        /* all the dirty node pages should be flushed for POR */
        ret = f2fs_sync_fs(inode->i_sb, 1);

        /*
         * We've secured consistency through sync_fs. Following pino
         * will be used only for fsynced inodes after checkpoint.
         */
        try_to_fix_pino(inode);
        clear_inode_flag(fi, FI_APPEND_WRITE);
        clear_inode_flag(fi, FI_UPDATE_WRITE);
        goto out;
    }
sync_nodes:
    sync_node_pages(sbi, ino, &wbc);

    /* if cp_error was enabled, we should avoid infinite loop */
    if (unlikely(f2fs_cp_error(sbi)))
        goto out;

    if (need_inode_block_update(sbi, ino)) {
        mark_inode_dirty_sync(inode);
        f2fs_write_inode(inode, NULL);
        goto sync_nodes;
    }

    ret = wait_on_node_pages_writeback(sbi, ino);
    if (ret)
        goto out;

    /* once recovery info is written, don't need to tack this */
    remove_dirty_inode(sbi, ino, APPEND_INO);
    clear_inode_flag(fi, FI_APPEND_WRITE);
flush_out:
    remove_dirty_inode(sbi, ino, UPDATE_INO);
    clear_inode_flag(fi, FI_UPDATE_WRITE);
    ret = f2fs_issue_flush(sbi);
out:
    trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
    f2fs_trace_ios(NULL, 1);
    return ret;
}

static pgoff_t __get_first_dirty_index(struct address_space *mapping,
                                       pgoff_t pgofs, int whence)
{
    struct pagevec pvec;
    int nr_pages;

    if (whence != SEEK_DATA)
        return 0;

    /* find first dirty page index */
    pagevec_init(&pvec, 0);
    nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs,
                                  PAGECACHE_TAG_DIRTY, 1);
    pgofs = nr_pages ? pvec.pages[0]->index : LONG_MAX;
    pagevec_release(&pvec);
    return pgofs;
}

static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
                           int whence)
{
    switch (whence) {
    case SEEK_DATA:
        if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
                (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
            return true;
        break;
    case SEEK_HOLE:
        if (blkaddr == NULL_ADDR)
            return true;
        break;
    }
    return false;
}

static inline int unsigned_offsets(struct file *file)
{
    return file->f_mode & FMODE_UNSIGNED_OFFSET;
}

static loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
{
    if (offset < 0 && !unsigned_offsets(file))
        return -EINVAL;
    if (offset > maxsize)
        return -EINVAL;

    if (offset != file->f_pos) {
        file->f_pos = offset;
        file->f_version = 0;
    }
    return offset;
}

static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
{
    struct inode *inode = file->f_mapping->host;
    loff_t maxbytes = inode->i_sb->s_maxbytes;
    struct dnode_of_data dn;
    pgoff_t pgofs, end_offset, dirty;
    loff_t data_ofs = offset;
    loff_t isize;
    int err = 0;

    mutex_lock(&inode->i_mutex);

    isize = i_size_read(inode);
    if (offset >= isize)
        goto fail;

    /* handle inline data case */
    if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
        if (whence == SEEK_HOLE)
            data_ofs = isize;
        goto found;
    }

    pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT);

    dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);

    for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) {
        set_new_dnode(&dn, inode, NULL, NULL, 0);
        err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
        if (err && err != -ENOENT) {
            goto fail;
        } else if (err == -ENOENT) {
            /* direct node does not exists */
            if (whence == SEEK_DATA) {
                pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
                                            F2FS_I(inode));
                continue;
            } else {
                goto found;
            }
        }

        end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));

        /* find data/hole in dnode block */
        for (; dn.ofs_in_node < end_offset;
                dn.ofs_in_node++, pgofs++,
                data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) {
            block_t blkaddr;
            blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);

            if (__found_offset(blkaddr, dirty, pgofs, whence)) {
                f2fs_put_dnode(&dn);
                goto found;
            }
        }
        f2fs_put_dnode(&dn);
    }

    if (whence == SEEK_DATA)
        goto fail;
found:
    if (whence == SEEK_HOLE && data_ofs > isize)
        data_ofs = isize;
    mutex_unlock(&inode->i_mutex);
    return vfs_setpos(file, data_ofs, maxbytes);
fail:
    mutex_unlock(&inode->i_mutex);
    return -ENXIO;
}

static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
{
    struct inode *inode = file->f_mapping->host;
    loff_t maxbytes = inode->i_sb->s_maxbytes;

    switch (whence) {
    case SEEK_SET:
    case SEEK_CUR:
    case SEEK_END:
        return generic_file_llseek_size(file, offset, whence,
                                        maxbytes);
    case SEEK_DATA:
    case SEEK_HOLE:
        if (offset < 0)
            return -ENXIO;
        return f2fs_seek_block(file, offset, whence);
    }

    return -EINVAL;
}

static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
    struct inode *inode = file_inode(file);

    /* we don't need to use inline_data strictly */
    if (f2fs_has_inline_data(inode)) {
        int err = f2fs_convert_inline_inode(inode);
        if (err)
            return err;
    }

    file_accessed(file);
    vma->vm_ops = &f2fs_file_vm_ops;
    return 0;
}

int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
{
    int nr_free = 0, ofs = dn->ofs_in_node;
    struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
    struct f2fs_node *raw_node;
    __le32 *addr;

    raw_node = F2FS_NODE(dn->node_page);
    addr = blkaddr_in_node(raw_node) + ofs;

    for (; count > 0; count--, addr++, dn->ofs_in_node++) {
        block_t blkaddr = le32_to_cpu(*addr);
        if (blkaddr == NULL_ADDR)
            continue;

        dn->data_blkaddr = NULL_ADDR;
        set_data_blkaddr(dn);
        f2fs_update_extent_cache(dn);
        invalidate_blocks(sbi, blkaddr);
        if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
            clear_inode_flag(F2FS_I(dn->inode),
                             FI_FIRST_BLOCK_WRITTEN);
        nr_free++;
    }
    if (nr_free) {
        dec_valid_block_count(sbi, dn->inode, nr_free);
        set_page_dirty(dn->node_page);
        sync_inode_page(dn);
    }
    dn->ofs_in_node = ofs;

    trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
                                          dn->ofs_in_node, nr_free);
    return nr_free;
}