Exemple #1
0
static char *ocfs2_fast_symlink_getlink(struct inode *inode,
					struct buffer_head **bh)
{
	int status;
	char *link = NULL;
	struct ocfs2_dinode *fe;

	mlog_entry_void();

	status = ocfs2_read_block(OCFS2_SB(inode->i_sb),
				  OCFS2_I(inode)->ip_blkno,
				  bh,
				  OCFS2_BH_CACHED,
				  inode);
	if (status < 0) {
		mlog_errno(status);
		link = ERR_PTR(status);
		goto bail;
	}

	fe = (struct ocfs2_dinode *) (*bh)->b_data;
	link = (char *) fe->id2.i_symlink;
bail:
	mlog_exit(status);

	return link;
}
Exemple #2
0
static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
				  handle_t *handle,
				  u32 bits_wanted,
				  u32 min_bits,
				  u16 *bit_off,
				  unsigned int *num_bits,
				  u64 gd_blkno,
				  u16 *bits_left)
{
	int ret;
	u16 found;
	struct buffer_head *group_bh = NULL;
	struct ocfs2_group_desc *gd;
	struct inode *alloc_inode = ac->ac_inode;

	ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno,
			       &group_bh, OCFS2_BH_CACHED, alloc_inode);
	if (ret < 0) {
		mlog_errno(ret);
		return ret;
	}

	gd = (struct ocfs2_group_desc *) group_bh->b_data;
	if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
		OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd);
		ret = -EIO;
		goto out;
	}

	ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
				  bit_off, &found);
	if (ret < 0) {
		if (ret != -ENOSPC)
			mlog_errno(ret);
		goto out;
	}

	*num_bits = found;

	ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
					       *num_bits,
					       le16_to_cpu(gd->bg_chain));
	if (ret < 0) {
		mlog_errno(ret);
		goto out;
	}

	ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
					 *bit_off, *num_bits);
	if (ret < 0)
		mlog_errno(ret);

	*bits_left = le16_to_cpu(gd->bg_free_bits_count);

out:
	brelse(group_bh);

	return ret;
}
int ocfs2_init_slot_info(struct ocfs2_super *osb)
{
	int status, i;
	u64 blkno;
	struct inode *inode = NULL;
	struct buffer_head *bh = NULL;
	struct ocfs2_slot_info *si;

	si = kzalloc(sizeof(struct ocfs2_slot_info), GFP_KERNEL);
	if (!si) {
		status = -ENOMEM;
		mlog_errno(status);
		goto bail;
	}

	spin_lock_init(&si->si_lock);
	si->si_num_slots = osb->max_slots;
	si->si_size = OCFS2_MAX_SLOTS;

	for(i = 0; i < si->si_num_slots; i++)
		si->si_global_node_nums[i] = OCFS2_INVALID_SLOT;

	inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE,
					    OCFS2_INVALID_SLOT);
	if (!inode) {
		status = -EINVAL;
		mlog_errno(status);
		goto bail;
	}

	status = ocfs2_extent_map_get_blocks(inode, 0ULL, 1, &blkno, NULL);
	if (status < 0) {
		mlog_errno(status);
		goto bail;
	}

	status = ocfs2_read_block(osb, blkno, &bh, 0, inode);
	if (status < 0) {
		mlog_errno(status);
		goto bail;
	}

	si->si_inode = inode;
	si->si_bh = bh;
	osb->slot_info = si;
bail:
	if (status < 0 && si)
		ocfs2_free_slot_info(si);

	return status;
}
Exemple #4
0
static int ocfs2_write_remove_suid(struct inode *inode)
{
	int ret;
	struct buffer_head *bh = NULL;
	struct ocfs2_inode_info *oi = OCFS2_I(inode);
	handle_t *handle;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	struct ocfs2_dinode *di;

	mlog_entry("(Inode %llu, mode 0%o)\n",
		   (unsigned long long)oi->ip_blkno, inode->i_mode);

	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
	if (handle == NULL) {
		ret = -ENOMEM;
		mlog_errno(ret);
		goto out;
	}

	ret = ocfs2_read_block(osb, oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode);
	if (ret < 0) {
		mlog_errno(ret);
		goto out_trans;
	}

	ret = ocfs2_journal_access(handle, inode, bh,
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret < 0) {
		mlog_errno(ret);
		goto out_bh;
	}

	inode->i_mode &= ~S_ISUID;
	if ((inode->i_mode & S_ISGID) && (inode->i_mode & S_IXGRP))
		inode->i_mode &= ~S_ISGID;

	di = (struct ocfs2_dinode *) bh->b_data;
	di->i_mode = cpu_to_le16(inode->i_mode);

	ret = ocfs2_journal_dirty(handle, bh);
	if (ret < 0)
		mlog_errno(ret);
out_bh:
	brelse(bh);
out_trans:
	ocfs2_commit_trans(osb, handle);
out:
	mlog_exit(ret);
	return ret;
}
Exemple #5
0
/*
 * TODO: this should probably be merged into ocfs2_get_block
 *
 * However, you now need to pay attention to the cont_prepare_write()
 * stuff in ocfs2_get_block (that is, ocfs2_get_block pretty much
 * expects never to extend).
 */
struct buffer_head *ocfs2_bread(struct inode *inode,
				int block, int *err, int reada)
{
	struct buffer_head *bh = NULL;
	int tmperr;
	u64 p_blkno;
	int readflags = OCFS2_BH_CACHED;

	if (reada)
		readflags |= OCFS2_BH_READAHEAD;

	if (((u64)block << inode->i_sb->s_blocksize_bits) >=
	    i_size_read(inode)) {
		BUG_ON(!reada);
		return NULL;
	}

	down_read(&OCFS2_I(inode)->ip_alloc_sem);
	tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
					     NULL);
	up_read(&OCFS2_I(inode)->ip_alloc_sem);
	if (tmperr < 0) {
		mlog_errno(tmperr);
		goto fail;
	}

	tmperr = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno, &bh,
				  readflags, inode);
	if (tmperr < 0)
		goto fail;

	tmperr = 0;

	*err = 0;
	return bh;

fail:
	if (bh) {
		brelse(bh);
		bh = NULL;
	}
	*err = -EIO;
	return NULL;
}
Exemple #6
0
static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
{
	int ret;
	struct buffer_head *di_bh = NULL;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

	BUG_ON(!PageLocked(page));
	BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));

	ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh,
			       OCFS2_BH_CACHED, inode);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	ret = ocfs2_read_inline_data(inode, page, di_bh);
out:
	unlock_page(page);

	brelse(di_bh);
	return ret;
}
Exemple #7
0
int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
				u64 gd_blkno, struct buffer_head **bh)
{
	int rc;
	struct buffer_head *tmp = *bh;

	rc = ocfs2_read_block(INODE_CACHE(inode), gd_blkno, &tmp,
			      ocfs2_validate_group_descriptor);
	if (rc)
		goto out;

	rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0);
	if (rc) {
		brelse(tmp);
		goto out;
	}

	/* If ocfs2_read_block() got us a new bh, pass it up. */
	if (!*bh)
		*bh = tmp;

out:
	return rc;
}
Exemple #8
0
static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
				   struct buffer_head *bh_result, int create)
{
	int err = -EIO;
	int status;
	struct ocfs2_dinode *fe = NULL;
	struct buffer_head *bh = NULL;
	struct buffer_head *buffer_cache_bh = NULL;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	void *kaddr;

	mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode,
		   (unsigned long long)iblock, bh_result, create);

	BUG_ON(ocfs2_inode_is_fast_symlink(inode));

	if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) {
		mlog(ML_ERROR, "block offset > PATH_MAX: %llu",
		     (unsigned long long)iblock);
		goto bail;
	}

	status = ocfs2_read_block(OCFS2_SB(inode->i_sb),
				  OCFS2_I(inode)->ip_blkno,
				  &bh, OCFS2_BH_CACHED, inode);
	if (status < 0) {
		mlog_errno(status);
		goto bail;
	}
	fe = (struct ocfs2_dinode *) bh->b_data;

	if (!OCFS2_IS_VALID_DINODE(fe)) {
		mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
		     (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
		     fe->i_signature);
		goto bail;
	}

	if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb,
						    le32_to_cpu(fe->i_clusters))) {
		mlog(ML_ERROR, "block offset is outside the allocated size: "
		     "%llu\n", (unsigned long long)iblock);
		goto bail;
	}

	/* We don't use the page cache to create symlink data, so if
	 * need be, copy it over from the buffer cache. */
	if (!buffer_uptodate(bh_result) && ocfs2_inode_is_new(inode)) {
		u64 blkno = le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) +
			    iblock;
		buffer_cache_bh = sb_getblk(osb->sb, blkno);
		if (!buffer_cache_bh) {
			mlog(ML_ERROR, "couldn't getblock for symlink!\n");
			goto bail;
		}

		/* we haven't locked out transactions, so a commit
		 * could've happened. Since we've got a reference on
		 * the bh, even if it commits while we're doing the
		 * copy, the data is still good. */
		if (buffer_jbd(buffer_cache_bh)
		    && ocfs2_inode_is_new(inode)) {
			kaddr = kmap_atomic(bh_result->b_page, KM_USER0);
			if (!kaddr) {
				mlog(ML_ERROR, "couldn't kmap!\n");
				goto bail;
			}
			memcpy(kaddr + (bh_result->b_size * iblock),
			       buffer_cache_bh->b_data,
			       bh_result->b_size);
			kunmap_atomic(kaddr, KM_USER0);
			set_buffer_uptodate(bh_result);
		}
		brelse(buffer_cache_bh);
	}

	map_bh(bh_result, inode->i_sb,
	       le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock);

	err = 0;

bail:
	if (bh)
		brelse(bh);

	mlog_exit(err);
	return err;
}
Exemple #9
0
static int ocfs2_read_locked_inode(struct inode *inode,
				   struct ocfs2_find_inode_args *args)
{
	struct super_block *sb;
	struct ocfs2_super *osb;
	struct ocfs2_dinode *fe;
	struct buffer_head *bh = NULL;
	int status, can_lock;
	u32 generation = 0;

	mlog_entry("(0x%p, 0x%p)\n", inode, args);

	status = -EINVAL;
	if (inode == NULL || inode->i_sb == NULL) {
		mlog(ML_ERROR, "bad inode\n");
		return status;
	}
	sb = inode->i_sb;
	osb = OCFS2_SB(sb);

	if (!args) {
		mlog(ML_ERROR, "bad inode args\n");
		make_bad_inode(inode);
		return status;
	}

	/*
	 * To improve performance of cold-cache inode stats, we take
	 * the cluster lock here if possible.
	 *
	 * Generally, OCFS2 never trusts the contents of an inode
	 * unless it's holding a cluster lock, so taking it here isn't
	 * a correctness issue as much as it is a performance
	 * improvement.
	 *
	 * There are three times when taking the lock is not a good idea:
	 *
	 * 1) During startup, before we have initialized the DLM.
	 *
	 * 2) If we are reading certain system files which never get
	 *    cluster locks (local alloc, truncate log).
	 *
	 * 3) If the process doing the iget() is responsible for
	 *    orphan dir recovery. We're holding the orphan dir lock and
	 *    can get into a deadlock with another process on another
	 *    node in ->delete_inode().
	 *
	 * #1 and #2 can be simply solved by never taking the lock
	 * here for system files (which are the only type we read
	 * during mount). It's a heavier approach, but our main
	 * concern is user-accesible files anyway.
	 *
	 * #3 works itself out because we'll eventually take the
	 * cluster lock before trusting anything anyway.
	 */
	can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
		&& !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY)
		&& !ocfs2_mount_local(osb);

	/*
	 * To maintain backwards compatibility with older versions of
	 * ocfs2-tools, we still store the generation value for system
	 * files. The only ones that actually matter to userspace are
	 * the journals, but it's easier and inexpensive to just flag
	 * all system files similarly.
	 */
	if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
		generation = osb->fs_generation;

	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
				  OCFS2_LOCK_TYPE_META,
				  generation, inode);

	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres,
				  OCFS2_LOCK_TYPE_OPEN,
				  0, inode);

	if (can_lock) {
		status = ocfs2_open_lock(inode);
		if (status) {
			make_bad_inode(inode);
			mlog_errno(status);
			return status;
		}
		status = ocfs2_meta_lock(inode, NULL, 0);
		if (status) {
			make_bad_inode(inode);
			mlog_errno(status);
			return status;
		}
	}

	if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) {
		status = ocfs2_try_open_lock(inode, 0);
		if (status) {
			make_bad_inode(inode);	
			return status;
		}
	}

	status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0,
				  can_lock ? inode : NULL);
	if (status < 0) {
		mlog_errno(status);
		goto bail;
	}

	status = -EINVAL;
	fe = (struct ocfs2_dinode *) bh->b_data;
	if (!OCFS2_IS_VALID_DINODE(fe)) {
		mlog(0, "Invalid dinode #%llu: signature = %.*s\n",
		     (unsigned long long)args->fi_blkno, 7,
		     fe->i_signature);
		goto bail;
	}

	/*
	 * This is a code bug. Right now the caller needs to
	 * understand whether it is asking for a system file inode or
	 * not so the proper lock names can be built.
	 */
	mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) !=
			!!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE),
			"Inode %llu: system file state is ambigous\n",
			(unsigned long long)args->fi_blkno);

	if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
	    S_ISBLK(le16_to_cpu(fe->i_mode)))
    		inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));

	if (ocfs2_populate_inode(inode, fe, 0) < 0)
		goto bail;

	BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));

	status = 0;

bail:
	if (can_lock)
		ocfs2_meta_unlock(inode, 0);

	if (status < 0)
		make_bad_inode(inode);

	if (args && bh)
		brelse(bh);

	mlog_exit(status);
	return status;
}
Exemple #10
0
/*
 * Extend the filesystem to the new number of clusters specified.  This entry
 * point is only used to extend the current filesystem to the end of the last
 * existing group.
 */
int ocfs2_group_extend(struct inode * inode, int new_clusters)
{
	int ret;
	handle_t *handle;
	struct buffer_head *main_bm_bh = NULL;
	struct buffer_head *group_bh = NULL;
	struct inode *main_bm_inode = NULL;
	struct ocfs2_dinode *fe = NULL;
	struct ocfs2_group_desc *group = NULL;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	u16 cl_bpc;
	u32 first_new_cluster;
	u64 lgd_blkno;

	mlog_entry_void();

	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
		return -EROFS;

	if (new_clusters < 0)
		return -EINVAL;
	else if (new_clusters == 0)
		return 0;

	main_bm_inode = ocfs2_get_system_file_inode(osb,
						    GLOBAL_BITMAP_SYSTEM_INODE,
						    OCFS2_INVALID_SLOT);
	if (!main_bm_inode) {
		ret = -EINVAL;
		mlog_errno(ret);
		goto out;
	}

	mutex_lock(&main_bm_inode->i_mutex);

	ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
	if (ret < 0) {
		mlog_errno(ret);
		goto out_mutex;
	}

	fe = (struct ocfs2_dinode *)main_bm_bh->b_data;

	if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
				 ocfs2_group_bitmap_size(osb->sb) * 8) {
		mlog(ML_ERROR, "The disk is too old and small. "
		     "Force to do offline resize.");
		ret = -EINVAL;
		goto out_unlock;
	}

	if (!OCFS2_IS_VALID_DINODE(fe)) {
		OCFS2_RO_ON_INVALID_DINODE(main_bm_inode->i_sb, fe);
		ret = -EIO;
		goto out_unlock;
	}

	first_new_cluster = le32_to_cpu(fe->i_clusters);
	lgd_blkno = ocfs2_which_cluster_group(main_bm_inode,
					      first_new_cluster - 1);

	ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh);
	if (ret < 0) {
		mlog_errno(ret);
		goto out_unlock;
	}

	group = (struct ocfs2_group_desc *)group_bh->b_data;

	ret = ocfs2_check_group_descriptor(inode->i_sb, fe, group);
	if (ret) {
		mlog_errno(ret);
		goto out_unlock;
	}

	cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc);
	if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters >
		le16_to_cpu(fe->id2.i_chain.cl_cpg)) {
		ret = -EINVAL;
		goto out_unlock;
	}

	mlog(0, "extend the last group at %llu, new clusters = %d\n",
	     (unsigned long long)le64_to_cpu(group->bg_blkno), new_clusters);

	handle = ocfs2_start_trans(osb, OCFS2_GROUP_EXTEND_CREDITS);
	if (IS_ERR(handle)) {
		mlog_errno(PTR_ERR(handle));
		ret = -EINVAL;
		goto out_unlock;
	}

	/* update the last group descriptor and inode. */
	ret = ocfs2_update_last_group_and_inode(handle, main_bm_inode,
						main_bm_bh, group_bh,
						first_new_cluster,
						new_clusters);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	ocfs2_update_super_and_backups(main_bm_inode, new_clusters);

out_commit:
	ocfs2_commit_trans(osb, handle);
out_unlock:
	brelse(group_bh);
	brelse(main_bm_bh);

	ocfs2_inode_unlock(main_bm_inode, 1);

out_mutex:
	mutex_unlock(&main_bm_inode->i_mutex);
	iput(main_bm_inode);

out:
	mlog_exit_void();
	return ret;
}
/*
 * We want to free the bitmap bits outside of any recovery context as
 * we'll need a cluster lock to do so, but we must clear the local
 * alloc before giving up the recovered nodes journal. To solve this,
 * we kmalloc a copy of the local alloc before it's change for the
 * caller to process with ocfs2_complete_local_alloc_recovery
 */
int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
				     int slot_num,
				     struct ocfs2_dinode **alloc_copy)
{
	int status = 0;
	struct buffer_head *alloc_bh = NULL;
	struct inode *inode = NULL;
	struct ocfs2_dinode *alloc;

	mlog_entry("(slot_num = %d)\n", slot_num);

	*alloc_copy = NULL;

	inode = ocfs2_get_system_file_inode(osb,
					    LOCAL_ALLOC_SYSTEM_INODE,
					    slot_num);
	if (!inode) {
		status = -EINVAL;
		mlog_errno(status);
		goto bail;
	}

	mutex_lock(&inode->i_mutex);

	status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno,
				  &alloc_bh, 0, inode);
	if (status < 0) {
		mlog_errno(status);
		goto bail;
	}

	*alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL);
	if (!(*alloc_copy)) {
		status = -ENOMEM;
		goto bail;
	}
	memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size);

	alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
	ocfs2_clear_local_alloc(alloc);

	status = ocfs2_write_block(osb, alloc_bh, inode);
	if (status < 0)
		mlog_errno(status);

bail:
	if ((status < 0) && (*alloc_copy)) {
		kfree(*alloc_copy);
		*alloc_copy = NULL;
	}

	if (alloc_bh)
		brelse(alloc_bh);

	if (inode) {
		mutex_unlock(&inode->i_mutex);
		iput(inode);
	}

	mlog_exit(status);
	return status;
}
Exemple #12
0
static int ocfs2_extend_allocation(struct inode *inode,
				   u32 clusters_to_add)
{
	int status = 0;
	int restart_func = 0;
	int drop_alloc_sem = 0;
	int credits;
	u32 prev_clusters, logical_start;
	struct buffer_head *bh = NULL;
	struct ocfs2_dinode *fe = NULL;
	handle_t *handle = NULL;
	struct ocfs2_alloc_context *data_ac = NULL;
	struct ocfs2_alloc_context *meta_ac = NULL;
	enum ocfs2_alloc_restarted why;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

	mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);

	/*
	 * This function only exists for file systems which don't
	 * support holes.
	 */
	BUG_ON(ocfs2_sparse_alloc(osb));

	status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
				  OCFS2_BH_CACHED, inode);
	if (status < 0) {
		mlog_errno(status);
		goto leave;
	}

	fe = (struct ocfs2_dinode *) bh->b_data;
	if (!OCFS2_IS_VALID_DINODE(fe)) {
		OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
		status = -EIO;
		goto leave;
	}

	logical_start = OCFS2_I(inode)->ip_clusters;

restart_all:
	BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);

	/* blocks peope in read/write from reading our allocation
	 * until we're done changing it. We depend on i_mutex to block
	 * other extend/truncate calls while we're here. Ordering wrt
	 * start_trans is important here -- always do it before! */
	down_write(&OCFS2_I(inode)->ip_alloc_sem);
	drop_alloc_sem = 1;

	status = ocfs2_lock_allocators(inode, fe, clusters_to_add, &data_ac,
				       &meta_ac);
	if (status) {
		mlog_errno(status);
		goto leave;
	}

	credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add);
	handle = ocfs2_start_trans(osb, credits);
	if (IS_ERR(handle)) {
		status = PTR_ERR(handle);
		handle = NULL;
		mlog_errno(status);
		goto leave;
	}

restarted_transaction:
	/* reserve a write to the file entry early on - that we if we
	 * run out of credits in the allocation path, we can still
	 * update i_size. */
	status = ocfs2_journal_access(handle, inode, bh,
				      OCFS2_JOURNAL_ACCESS_WRITE);
	if (status < 0) {
		mlog_errno(status);
		goto leave;
	}

	prev_clusters = OCFS2_I(inode)->ip_clusters;

	status = ocfs2_do_extend_allocation(osb,
					    inode,
					    &logical_start,
					    clusters_to_add,
					    bh,
					    handle,
					    data_ac,
					    meta_ac,
					    &why);
	if ((status < 0) && (status != -EAGAIN)) {
		if (status != -ENOSPC)
			mlog_errno(status);
		goto leave;
	}

	status = ocfs2_journal_dirty(handle, bh);
	if (status < 0) {
		mlog_errno(status);
		goto leave;
	}

	spin_lock(&OCFS2_I(inode)->ip_lock);
	clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
	spin_unlock(&OCFS2_I(inode)->ip_lock);

	if (why != RESTART_NONE && clusters_to_add) {
		if (why == RESTART_META) {
			mlog(0, "restarting function.\n");
			restart_func = 1;
		} else {
			BUG_ON(why != RESTART_TRANS);

			mlog(0, "restarting transaction.\n");
			/* TODO: This can be more intelligent. */
			credits = ocfs2_calc_extend_credits(osb->sb,
							    fe,
							    clusters_to_add);
			status = ocfs2_extend_trans(handle, credits);
			if (status < 0) {
				/* handle still has to be committed at
				 * this point. */
				status = -ENOMEM;
				mlog_errno(status);
				goto leave;
			}
			goto restarted_transaction;
		}
	}

	mlog(0, "fe: i_clusters = %u, i_size=%llu\n",
	     le32_to_cpu(fe->i_clusters),
	     (unsigned long long)le64_to_cpu(fe->i_size));
	mlog(0, "inode: ip_clusters=%u, i_size=%lld\n",
	     OCFS2_I(inode)->ip_clusters, i_size_read(inode));

leave:
	if (drop_alloc_sem) {
		up_write(&OCFS2_I(inode)->ip_alloc_sem);
		drop_alloc_sem = 0;
	}
	if (handle) {
		ocfs2_commit_trans(osb, handle);
		handle = NULL;
	}
	if (data_ac) {
		ocfs2_free_alloc_context(data_ac);
		data_ac = NULL;
	}
	if (meta_ac) {
		ocfs2_free_alloc_context(meta_ac);
		meta_ac = NULL;
	}
	if ((!status) && restart_func) {
		restart_func = 0;
		goto restart_all;
	}
	if (bh) {
		brelse(bh);
		bh = NULL;
	}

	mlog_exit(status);
	return status;
}
Exemple #13
0
/*
 * expects the suballoc inode to already be locked.
 */
static int ocfs2_free_suballoc_bits(handle_t *handle,
				    struct inode *alloc_inode,
				    struct buffer_head *alloc_bh,
				    unsigned int start_bit,
				    u64 bg_blkno,
				    unsigned int count)
{
	int status = 0;
	u32 tmp_used;
	struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data;
	struct ocfs2_chain_list *cl = &fe->id2.i_chain;
	struct buffer_head *group_bh = NULL;
	struct ocfs2_group_desc *group;

	mlog_entry_void();

	if (!OCFS2_IS_VALID_DINODE(fe)) {
		OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
		status = -EIO;
		goto bail;
	}
	BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));

	mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
	     (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
	     (unsigned long long)bg_blkno, start_bit);

	status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED,
				  alloc_inode);
	if (status < 0) {
		mlog_errno(status);
		goto bail;
	}

	group = (struct ocfs2_group_desc *) group_bh->b_data;
	status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group);
	if (status) {
		mlog_errno(status);
		goto bail;
	}
	BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));

	status = ocfs2_block_group_clear_bits(handle, alloc_inode,
					      group, group_bh,
					      start_bit, count);
	if (status < 0) {
		mlog_errno(status);
		goto bail;
	}

	status = ocfs2_journal_access(handle, alloc_inode, alloc_bh,
				      OCFS2_JOURNAL_ACCESS_WRITE);
	if (status < 0) {
		mlog_errno(status);
		goto bail;
	}

	le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free,
		     count);
	tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
	fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count);

	status = ocfs2_journal_dirty(handle, alloc_bh);
	if (status < 0) {
		mlog_errno(status);
		goto bail;
	}

bail:
	if (group_bh)
		brelse(group_bh);

	mlog_exit(status);
	return status;
}
Exemple #14
0
static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
			      handle_t *handle,
			      u32 bits_wanted,
			      u32 min_bits,
			      u16 *bit_off,
			      unsigned int *num_bits,
			      u64 *bg_blkno,
			      u16 *bits_left)
{
	int status;
	u16 chain, tmp_bits;
	u32 tmp_used;
	u64 next_group;
	struct inode *alloc_inode = ac->ac_inode;
	struct buffer_head *group_bh = NULL;
	struct buffer_head *prev_group_bh = NULL;
	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
	struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
	struct ocfs2_group_desc *bg;

	chain = ac->ac_chain;
	mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n",
	     bits_wanted, chain,
	     (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);

	status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb),
				  le64_to_cpu(cl->cl_recs[chain].c_blkno),
				  &group_bh, OCFS2_BH_CACHED, alloc_inode);
	if (status < 0) {
		mlog_errno(status);
		goto bail;
	}
	bg = (struct ocfs2_group_desc *) group_bh->b_data;
	status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
	if (status) {
		mlog_errno(status);
		goto bail;
	}

	status = -ENOSPC;
	/* for now, the chain search is a bit simplistic. We just use
	 * the 1st group with any empty bits. */
	while ((status = ac->ac_group_search(alloc_inode, group_bh,
					     bits_wanted, min_bits, bit_off,
					     &tmp_bits)) == -ENOSPC) {
		if (!bg->bg_next_group)
			break;

		if (prev_group_bh) {
			brelse(prev_group_bh);
			prev_group_bh = NULL;
		}
		next_group = le64_to_cpu(bg->bg_next_group);
		prev_group_bh = group_bh;
		group_bh = NULL;
		status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb),
					  next_group, &group_bh,
					  OCFS2_BH_CACHED, alloc_inode);
		if (status < 0) {
			mlog_errno(status);
			goto bail;
		}
		bg = (struct ocfs2_group_desc *) group_bh->b_data;
		status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
		if (status) {
			mlog_errno(status);
			goto bail;
		}
	}
	if (status < 0) {
		if (status != -ENOSPC)
			mlog_errno(status);
		goto bail;
	}

	mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
	     tmp_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno));

	*num_bits = tmp_bits;

	BUG_ON(*num_bits == 0);

	/*
	 * Keep track of previous block descriptor read. When
	 * we find a target, if we have read more than X
	 * number of descriptors, and the target is reasonably
	 * empty, relink him to top of his chain.
	 *
	 * We've read 0 extra blocks and only send one more to
	 * the transaction, yet the next guy to search has a
	 * much easier time.
	 *
	 * Do this *after* figuring out how many bits we're taking out
	 * of our target group.
	 */
	if (ac->ac_allow_chain_relink &&
	    (prev_group_bh) &&
	    (ocfs2_block_group_reasonably_empty(bg, *num_bits))) {
		status = ocfs2_relink_block_group(handle, alloc_inode,
						  ac->ac_bh, group_bh,
						  prev_group_bh, chain);
		if (status < 0) {
			mlog_errno(status);
			goto bail;
		}
	}

	/* Ok, claim our bits now: set the info on dinode, chainlist
	 * and then the group */
	status = ocfs2_journal_access(handle,
				      alloc_inode,
				      ac->ac_bh,
				      OCFS2_JOURNAL_ACCESS_WRITE);
	if (status < 0) {
		mlog_errno(status);
		goto bail;
	}

	tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
	fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used);
	le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits));

	status = ocfs2_journal_dirty(handle,
				     ac->ac_bh);
	if (status < 0) {
		mlog_errno(status);
		goto bail;
	}

	status = ocfs2_block_group_set_bits(handle,
					    alloc_inode,
					    bg,
					    group_bh,
					    *bit_off,
					    *num_bits);
	if (status < 0) {
		mlog_errno(status);
		goto bail;
	}

	mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits,
	     (unsigned long long)le64_to_cpu(fe->i_blkno));

	*bg_blkno = le64_to_cpu(bg->bg_blkno);
	*bits_left = le16_to_cpu(bg->bg_free_bits_count);
bail:
	if (group_bh)
		brelse(group_bh);
	if (prev_group_bh)
		brelse(prev_group_bh);

	mlog_exit(status);
	return status;
}
Exemple #15
0
/*
 * Find the leaf containing the interval we want.  While we're on our
 * way down the tree, fill in every record we see at any depth, because
 * we might want it later.
 *
 * Note that this code is run without ip_lock.  That's because it
 * sleeps while reading.  If someone is also filling the extent list at
 * the same time we are, we might have to restart.
 */
static int ocfs2_extent_map_find_leaf(struct inode *inode,
				      u32 cpos, u32 clusters,
				      struct ocfs2_extent_list *el)
{
	int i, ret;
	struct buffer_head *eb_bh = NULL;
	u64 blkno;
	u32 rec_end;
	struct ocfs2_extent_block *eb;
	struct ocfs2_extent_rec *rec;

	/*
	 * The bh data containing the el cannot change here, because
	 * we hold alloc_sem.  So we can do this without other
	 * locks.
	 */
	while (el->l_tree_depth)
	{
		blkno = 0;
		for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
			rec = &el->l_recs[i];
			rec_end = (le32_to_cpu(rec->e_cpos) +
				   le32_to_cpu(rec->e_clusters));

			ret = -EBADR;
			if (rec_end > OCFS2_I(inode)->ip_clusters) {
				mlog_errno(ret);
				ocfs2_error(inode->i_sb,
					    "Extent %d at e_blkno %llu of inode %llu goes past ip_clusters of %u\n",
					    i,
					    (unsigned long long)le64_to_cpu(rec->e_blkno),
					    (unsigned long long)OCFS2_I(inode)->ip_blkno,
					    OCFS2_I(inode)->ip_clusters);
				goto out_free;
			}

			if (rec_end <= cpos) {
				ret = ocfs2_extent_map_insert(inode, rec,
						le16_to_cpu(el->l_tree_depth));
				if (ret && (ret != -EEXIST)) {
					mlog_errno(ret);
					goto out_free;
				}
				continue;
			}
			if ((cpos + clusters) <= le32_to_cpu(rec->e_cpos)) {
				ret = ocfs2_extent_map_insert(inode, rec,
						le16_to_cpu(el->l_tree_depth));
				if (ret && (ret != -EEXIST)) {
					mlog_errno(ret);
					goto out_free;
				}
				continue;
			}

			/*
			 * We've found a record that matches our
			 * interval.  We don't insert it because we're
			 * about to traverse it.
			 */

			/* Check to see if we're stradling */
			ret = -ESRCH;
			if (!ocfs2_extent_rec_contains_clusters(rec,
							        cpos,
								clusters)) {
				mlog_errno(ret);
				goto out_free;
			}

			/*
			 * If we've already found a record, the el has
			 * two records covering the same interval.
			 * EEEK!
			 */
			ret = -EBADR;
			if (blkno) {
				mlog_errno(ret);
				ocfs2_error(inode->i_sb,
					    "Multiple extents for (cpos = %u, clusters = %u) on inode %llu; e_blkno %llu and rec %d at e_blkno %llu\n",
					    cpos, clusters,
					    (unsigned long long)OCFS2_I(inode)->ip_blkno,
					    (unsigned long long)blkno, i,
					    (unsigned long long)le64_to_cpu(rec->e_blkno));
				goto out_free;
			}

			blkno = le64_to_cpu(rec->e_blkno);
		}

		/*
		 * We don't support holes, and we're still up
		 * in the branches, so we'd better have found someone
		 */
		ret = -EBADR;
		if (!blkno) {
			ocfs2_error(inode->i_sb,
				    "No record found for (cpos = %u, clusters = %u) on inode %llu\n",
				    cpos, clusters,
				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
			mlog_errno(ret);
			goto out_free;
		}

		if (eb_bh) {
			brelse(eb_bh);
			eb_bh = NULL;
		}
		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
				       blkno, &eb_bh, OCFS2_BH_CACHED,
				       inode);
		if (ret) {
			mlog_errno(ret);
			goto out_free;
		}
		eb = (struct ocfs2_extent_block *)eb_bh->b_data;
		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
			ret = -EIO;
			goto out_free;
		}
		el = &eb->h_list;
	}

	BUG_ON(el->l_tree_depth);

	for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
		rec = &el->l_recs[i];

		if ((le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)) >
		    OCFS2_I(inode)->ip_clusters) {
			ret = -EBADR;
			mlog_errno(ret);
			ocfs2_error(inode->i_sb,
				    "Extent %d at e_blkno %llu of inode %llu goes past ip_clusters of %u\n",
				    i,
				    (unsigned long long)le64_to_cpu(rec->e_blkno),
				    (unsigned long long)OCFS2_I(inode)->ip_blkno,
				    OCFS2_I(inode)->ip_clusters);
			return ret;
		}

		ret = ocfs2_extent_map_insert(inode, rec,
					      le16_to_cpu(el->l_tree_depth));
		if (ret && (ret != -EEXIST)) {
			mlog_errno(ret);
			goto out_free;
		}
	}

	ret = 0;

out_free:
	if (eb_bh)
		brelse(eb_bh);

	return ret;
}
Exemple #16
0
/*
 * This lookup actually will read from disk.  It has one invariant:
 * It will never re-traverse blocks.  This means that all inserts should
 * be new regions or more granular regions (both allowed by insert).
 */
static int ocfs2_extent_map_lookup_read(struct inode *inode,
					u32 cpos,
					u32 clusters,
					struct ocfs2_extent_map_entry **ret_ent)
{
	int ret;
	u64 blkno;
	struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
	struct ocfs2_extent_map_entry *ent;
	struct buffer_head *bh = NULL;
	struct ocfs2_extent_block *eb;
	struct ocfs2_dinode *di;
	struct ocfs2_extent_list *el;

	spin_lock(&OCFS2_I(inode)->ip_lock);
	ent = ocfs2_extent_map_lookup(em, cpos, clusters, NULL, NULL);
	if (ent) {
		if (!ent->e_tree_depth) {
			spin_unlock(&OCFS2_I(inode)->ip_lock);
			*ret_ent = ent;
			return 0;
		}
		blkno = le64_to_cpu(ent->e_rec.e_blkno);
		spin_unlock(&OCFS2_I(inode)->ip_lock);

		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, &bh,
				       OCFS2_BH_CACHED, inode);
		if (ret) {
			mlog_errno(ret);
			if (bh)
				brelse(bh);
			return ret;
		}
		eb = (struct ocfs2_extent_block *)bh->b_data;
		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
			brelse(bh);
			return -EIO;
		}
		el = &eb->h_list;
	} else {
		spin_unlock(&OCFS2_I(inode)->ip_lock);

		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
				       OCFS2_I(inode)->ip_blkno, &bh,
				       OCFS2_BH_CACHED, inode);
		if (ret) {
			mlog_errno(ret);
			if (bh)
				brelse(bh);
			return ret;
		}
		di = (struct ocfs2_dinode *)bh->b_data;
		if (!OCFS2_IS_VALID_DINODE(di)) {
			brelse(bh);
			OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, di);
			return -EIO;
		}
		el = &di->id2.i_list;
	}

	ret = ocfs2_extent_map_find_leaf(inode, cpos, clusters, el);
	brelse(bh);
	if (ret) {
		mlog_errno(ret);
		return ret;
	}

	ent = ocfs2_extent_map_lookup(em, cpos, clusters, NULL, NULL);
	if (!ent) {
		ret = -ESRCH;
		mlog_errno(ret);
		return ret;
	}

	/* FIXME: Make sure this isn't a corruption */
	BUG_ON(ent->e_tree_depth);

	*ret_ent = ent;

	return 0;
}
int ocfs2_load_local_alloc(struct ocfs2_super *osb)
{
	int status = 0;
	struct ocfs2_dinode *alloc = NULL;
	struct buffer_head *alloc_bh = NULL;
	u32 num_used;
	struct inode *inode = NULL;
	struct ocfs2_local_alloc *la;

	mlog_entry_void();

	/* read the alloc off disk */
	inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE,
					    osb->slot_num);
	if (!inode) {
		status = -EINVAL;
		mlog_errno(status);
		goto bail;
	}

	status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno,
				  &alloc_bh, 0, inode);
	if (status < 0) {
		mlog_errno(status);
		goto bail;
	}

	alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
	la = OCFS2_LOCAL_ALLOC(alloc);

	if (!(le32_to_cpu(alloc->i_flags) &
	    (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
		mlog(ML_ERROR, "Invalid local alloc inode, %"MLFu64"\n",
		     OCFS2_I(inode)->ip_blkno);
		status = -EINVAL;
		goto bail;
	}

	if ((la->la_size == 0) ||
	    (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) {
		mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n",
		     le16_to_cpu(la->la_size));
		status = -EINVAL;
		goto bail;
	}

	/* do a little verification. */
	num_used = ocfs2_local_alloc_count_bits(alloc);

	/* hopefully the local alloc has always been recovered before
	 * we load it. */
	if (num_used
	    || alloc->id1.bitmap1.i_used
	    || alloc->id1.bitmap1.i_total
	    || la->la_bm_off)
		mlog(ML_ERROR, "Local alloc hasn't been recovered!\n"
		     "found = %u, set = %u, taken = %u, off = %u\n",
		     num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
		     le32_to_cpu(alloc->id1.bitmap1.i_total),
		     OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);

	osb->local_alloc_bh = alloc_bh;
	osb->local_alloc_state = OCFS2_LA_ENABLED;

bail:
	if (status < 0)
		if (alloc_bh)
			brelse(alloc_bh);
	if (inode)
		iput(inode);

	mlog_exit(status);
	return status;
}