Ejemplo n.º 1
0
/*
 * Will look for holes and unwritten extents in the range starting at
 * pos for count bytes (inclusive).
 */
static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos,
				       size_t count)
{
	int ret = 0;
	unsigned int extent_flags;
	u32 cpos, clusters, extent_len, phys_cpos;
	struct super_block *sb = inode->i_sb;

	cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
	clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;

	while (clusters) {
		ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
					 &extent_flags);
		if (ret < 0) {
			mlog_errno(ret);
			goto out;
		}

		if (phys_cpos == 0 || (extent_flags & OCFS2_EXT_UNWRITTEN)) {
			ret = 1;
			break;
		}

		if (extent_len > clusters)
			extent_len = clusters;

		clusters -= extent_len;
		cpos += extent_len;
	}
out:
	return ret;
}
static int __ocfs2_move_extents_range(struct buffer_head *di_bh,
				struct ocfs2_move_extents_context *context)
{
	int ret = 0, flags, do_defrag, skip = 0;
	u32 cpos, phys_cpos, move_start, len_to_move, alloc_size;
	u32 len_defraged = 0, defrag_thresh = 0, new_phys_cpos = 0;

	struct inode *inode = context->inode;
	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
	struct ocfs2_move_extents *range = context->range;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

	if ((inode->i_size == 0) || (range->me_len == 0))
		return 0;

	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
		return 0;

	context->refcount_loc = le64_to_cpu(di->i_refcount_loc);

	ocfs2_init_dinode_extent_tree(&context->et, INODE_CACHE(inode), di_bh);
	ocfs2_init_dealloc_ctxt(&context->dealloc);

	/*
	 * TO-DO XXX:
	 *
	 * - xattr extents.
	 */

	do_defrag = context->auto_defrag;

	/*
	 * extents moving happens in unit of clusters, for the sake
	 * of simplicity, we may ignore two clusters where 'byte_start'
	 * and 'byte_start + len' were within.
	 */
	move_start = ocfs2_clusters_for_bytes(osb->sb, range->me_start);
	len_to_move = (range->me_start + range->me_len) >>
						osb->s_clustersize_bits;
	if (len_to_move >= move_start)
		len_to_move -= move_start;
	else
		len_to_move = 0;

	if (do_defrag) {
		defrag_thresh = range->me_threshold >> osb->s_clustersize_bits;
		if (defrag_thresh <= 1)
			goto done;
	} else
Ejemplo n.º 3
0
/* 
 * A tail_to_skip value > 0 indicates that we're being called from
 * ocfs2_file_aio_write(). This has the following implications:
 *
 * - we don't want to update i_size
 * - di_bh will be NULL, which is fine because it's only used in the
 *   case where we want to update i_size.
 * - ocfs2_zero_extend() will then only be filling the hole created
 *   between i_size and the start of the write.
 */
static int ocfs2_extend_file(struct inode *inode,
			     struct buffer_head *di_bh,
			     u64 new_i_size,
			     size_t tail_to_skip)
{
	int ret = 0;
	u32 clusters_to_add = 0;

	BUG_ON(!tail_to_skip && !di_bh);

	/* setattr sometimes calls us like this. */
	if (new_i_size == 0)
		goto out;

	if (i_size_read(inode) == new_i_size)
  		goto out;
	BUG_ON(new_i_size < i_size_read(inode));

	if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
		BUG_ON(tail_to_skip != 0);
		goto out_update_size;
	}

	clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size) - 
		OCFS2_I(inode)->ip_clusters;

	/* 
	 * protect the pages that ocfs2_zero_extend is going to be
	 * pulling into the page cache.. we do this before the
	 * metadata extend so that we don't get into the situation
	 * where we've extended the metadata but can't get the data
	 * lock to zero.
	 */
	ret = ocfs2_data_lock(inode, 1);
	if (ret < 0) {
		mlog_errno(ret);
		goto out;
	}

	if (clusters_to_add) {
		ret = ocfs2_extend_allocation(inode, clusters_to_add);
		if (ret < 0) {
			mlog_errno(ret);
			goto out_unlock;
		}
	}

	/*
	 * Call this even if we don't add any clusters to the tree. We
	 * still need to zero the area between the old i_size and the
	 * new i_size.
	 */
	ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip);
	if (ret < 0) {
		mlog_errno(ret);
		goto out_unlock;
	}

out_update_size:
	if (!tail_to_skip) {
		/* We're being called from ocfs2_setattr() which wants
		 * us to update i_size */
		ret = ocfs2_simple_size_update(inode, di_bh, new_i_size);
		if (ret < 0)
			mlog_errno(ret);
	}

out_unlock:
	if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
		ocfs2_data_unlock(inode, 1);

out:
	return ret;
}
Ejemplo n.º 4
0
static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
					 loff_t *ppos,
					 size_t count,
					 int appending,
					 int *direct_io)
{
	int ret = 0, meta_level = appending;
	struct inode *inode = dentry->d_inode;
	u32 clusters;
	loff_t newsize, saved_pos;

	/* 
	 * We sample i_size under a read level meta lock to see if our write
	 * is extending the file, if it is we back off and get a write level
	 * meta lock.
	 */
	for(;;) {
		ret = ocfs2_meta_lock(inode, NULL, meta_level);
		if (ret < 0) {
			meta_level = -1;
			mlog_errno(ret);
			goto out;
		}

		/* Clear suid / sgid if necessary. We do this here
		 * instead of later in the write path because
		 * remove_suid() calls ->setattr without any hint that
		 * we may have already done our cluster locking. Since
		 * ocfs2_setattr() *must* take cluster locks to
		 * proceeed, this will lead us to recursively lock the
		 * inode. There's also the dinode i_size state which
		 * can be lost via setattr during extending writes (we
		 * set inode->i_size at the end of a write. */
		if (should_remove_suid(dentry)) {
			if (meta_level == 0) {
				ocfs2_meta_unlock(inode, meta_level);
				meta_level = 1;
				continue;
			}

			ret = ocfs2_write_remove_suid(inode);
			if (ret < 0) {
				mlog_errno(ret);
				goto out_unlock;
			}
		}

		/* work on a copy of ppos until we're sure that we won't have
		 * to recalculate it due to relocking. */
		if (appending) {
			saved_pos = i_size_read(inode);
			mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos);
		} else {
			saved_pos = *ppos;
		}

		if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
			loff_t end = saved_pos + count;

			/*
			 * Skip the O_DIRECT checks if we don't need
			 * them.
			 */
			if (!direct_io || !(*direct_io))
				break;

			/*
			 * Allowing concurrent direct writes means
			 * i_size changes wouldn't be synchronized, so
			 * one node could wind up truncating another
			 * nodes writes.
			 */
			if (end > i_size_read(inode)) {
				*direct_io = 0;
				break;
			}

			/*
			 * We don't fill holes during direct io, so
			 * check for them here. If any are found, the
			 * caller will have to retake some cluster
			 * locks and initiate the io as buffered.
			 */
			ret = ocfs2_check_range_for_holes(inode, saved_pos,
							  count);
			if (ret == 1) {
				*direct_io = 0;
				ret = 0;
			} else if (ret < 0)
				mlog_errno(ret);
			break;
		}

		/*
		 * The rest of this loop is concerned with legacy file
		 * systems which don't support sparse files.
		 */

		newsize = count + saved_pos;

		mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",
		     (long long) saved_pos, (long long) newsize,
		     (long long) i_size_read(inode));

		/* No need for a higher level metadata lock if we're
		 * never going past i_size. */
		if (newsize <= i_size_read(inode))
			break;

		if (meta_level == 0) {
			ocfs2_meta_unlock(inode, meta_level);
			meta_level = 1;
			continue;
		}

		spin_lock(&OCFS2_I(inode)->ip_lock);
		clusters = ocfs2_clusters_for_bytes(inode->i_sb, newsize) -
			OCFS2_I(inode)->ip_clusters;
		spin_unlock(&OCFS2_I(inode)->ip_lock);

		mlog(0, "Writing at EOF, may need more allocation: "
		     "i_size = %lld, newsize = %lld, need %u clusters\n",
		     (long long) i_size_read(inode), (long long) newsize,
		     clusters);

		/* We only want to continue the rest of this loop if
		 * our extend will actually require more
		 * allocation. */
		if (!clusters)
			break;

		ret = ocfs2_extend_file(inode, NULL, newsize, count);
		if (ret < 0) {
			if (ret != -ENOSPC)
				mlog_errno(ret);
			goto out_unlock;
		}
		break;
	}

	if (appending)
		*ppos = saved_pos;

out_unlock:
	ocfs2_meta_unlock(inode, meta_level);

out:
	return ret;
}