Exemple #1
0
static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
				       struct inode *inode_alloc, u64 blkno,
				       struct ocfs2_info_freeinode *fi,
				       u32 slot)
{
	int status = 0, unlock = 0;

	struct buffer_head *bh = NULL;
	struct ocfs2_dinode *dinode_alloc = NULL;

	if (inode_alloc)
		mutex_lock(&inode_alloc->i_mutex);

	if (o2info_coherent(&fi->ifi_req)) {
		status = ocfs2_inode_lock(inode_alloc, &bh, 0);
		if (status < 0) {
			mlog_errno(status);
			goto bail;
		}
		unlock = 1;
	} else {
		status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh);
		if (status < 0) {
			mlog_errno(status);
			goto bail;
		}
	}

	dinode_alloc = (struct ocfs2_dinode *)bh->b_data;

	fi->ifi_stat[slot].lfi_total =
		le32_to_cpu(dinode_alloc->id1.bitmap1.i_total);
	fi->ifi_stat[slot].lfi_free =
		le32_to_cpu(dinode_alloc->id1.bitmap1.i_total) -
		le32_to_cpu(dinode_alloc->id1.bitmap1.i_used);

bail:
	if (unlock)
		ocfs2_inode_unlock(inode_alloc, 0);

	if (inode_alloc)
		mutex_unlock(&inode_alloc->i_mutex);

	brelse(bh);

	return status;
}
static void ocfs2_update_super_and_backups(struct inode *inode,
					   int new_clusters)
{
	int ret;
	u32 clusters = 0;
	struct buffer_head *super_bh = NULL;
	struct ocfs2_dinode *super_di = NULL;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

	/*
	 * update the superblock last.
	 * It doesn't matter if the write failed.
	 */
	ret = ocfs2_read_blocks_sync(osb, OCFS2_SUPER_BLOCK_BLKNO, 1,
				     &super_bh);
	if (ret < 0) {
		mlog_errno(ret);
		goto out;
	}

	super_di = (struct ocfs2_dinode *)super_bh->b_data;
	le32_add_cpu(&super_di->i_clusters, new_clusters);
	clusters = le32_to_cpu(super_di->i_clusters);

	ret = ocfs2_write_super_or_backup(osb, super_bh);
	if (ret < 0) {
		mlog_errno(ret);
		goto out;
	}

	if (OCFS2_HAS_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_COMPAT_BACKUP_SB))
		ret = update_backups(inode, clusters, super_bh->b_data);

out:
	brelse(super_bh);
	if (ret)
		printk(KERN_WARNING "ocfs2: Failed to update super blocks on %s"
			" during fs resize. This condition is not fatal,"
			" but fsck.ocfs2 should be run to fix it\n",
			osb->dev_str);
	return;
}
static int update_backups(struct inode * inode, u32 clusters, char *data)
{
	int i, ret = 0;
	u32 cluster;
	u64 blkno;
	struct buffer_head *backup = NULL;
	struct ocfs2_dinode *backup_di = NULL;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

	/* calculate the real backups we need to update. */
	for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
		blkno = ocfs2_backup_super_blkno(inode->i_sb, i);
		cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
		if (cluster > clusters)
			break;

		ret = ocfs2_read_blocks_sync(osb, blkno, 1, &backup);
		if (ret < 0) {
			mlog_errno(ret);
			break;
		}

		memcpy(backup->b_data, data, inode->i_sb->s_blocksize);

		backup_di = (struct ocfs2_dinode *)backup->b_data;
		backup_di->i_blkno = cpu_to_le64(blkno);

		ret = ocfs2_write_super_or_backup(osb, backup);
		brelse(backup);
		backup = NULL;
		if (ret < 0) {
			mlog_errno(ret);
			break;
		}
	}

	return ret;
}
/* Add a new group descriptor to global_bitmap. */
int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
{
	int ret;
	handle_t *handle;
	struct buffer_head *main_bm_bh = NULL;
	struct inode *main_bm_inode = NULL;
	struct ocfs2_dinode *fe = NULL;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	struct buffer_head *group_bh = NULL;
	struct ocfs2_group_desc *group = NULL;
	struct ocfs2_chain_list *cl;
	struct ocfs2_chain_rec *cr;
	u16 cl_bpc;

	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
		return -EROFS;

	main_bm_inode = ocfs2_get_system_file_inode(osb,
						    GLOBAL_BITMAP_SYSTEM_INODE,
						    OCFS2_INVALID_SLOT);
	if (!main_bm_inode) {
		ret = -EINVAL;
		mlog_errno(ret);
		goto out;
	}

	mutex_lock(&main_bm_inode->i_mutex);

	ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
	if (ret < 0) {
		mlog_errno(ret);
		goto out_mutex;
	}

	fe = (struct ocfs2_dinode *)main_bm_bh->b_data;

	if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
		ocfs2_group_bitmap_size(osb->sb, 0,
					osb->s_feature_incompat) * 8) {
		mlog(ML_ERROR, "The disk is too old and small."
		     " Force to do offline resize.");
		ret = -EINVAL;
		goto out_unlock;
	}

	ret = ocfs2_read_blocks_sync(osb, input->group, 1, &group_bh);
	if (ret < 0) {
		mlog(ML_ERROR, "Can't read the group descriptor # %llu "
		     "from the device.", (unsigned long long)input->group);
		goto out_unlock;
	}

	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), group_bh);

	ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh);
	if (ret) {
		mlog_errno(ret);
		goto out_unlock;
	}

	trace_ocfs2_group_add((unsigned long long)input->group,
			       input->chain, input->clusters, input->frees);

	handle = ocfs2_start_trans(osb, OCFS2_GROUP_ADD_CREDITS);
	if (IS_ERR(handle)) {
		mlog_errno(PTR_ERR(handle));
		ret = -EINVAL;
		goto out_unlock;
	}

	cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc);
	cl = &fe->id2.i_chain;
	cr = &cl->cl_recs[input->chain];

	ret = ocfs2_journal_access_gd(handle, INODE_CACHE(main_bm_inode),
				      group_bh, OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret < 0) {
		mlog_errno(ret);
		goto out_commit;
	}

	group = (struct ocfs2_group_desc *)group_bh->b_data;
	group->bg_next_group = cr->c_blkno;
	ocfs2_journal_dirty(handle, group_bh);

	ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode),
				      main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret < 0) {
		mlog_errno(ret);
		goto out_commit;
	}

	if (input->chain == le16_to_cpu(cl->cl_next_free_rec)) {
		le16_add_cpu(&cl->cl_next_free_rec, 1);
		memset(cr, 0, sizeof(struct ocfs2_chain_rec));
	}

	cr->c_blkno = cpu_to_le64(input->group);
	le32_add_cpu(&cr->c_total, input->clusters * cl_bpc);
	le32_add_cpu(&cr->c_free, input->frees * cl_bpc);

	le32_add_cpu(&fe->id1.bitmap1.i_total, input->clusters *cl_bpc);
	le32_add_cpu(&fe->id1.bitmap1.i_used,
		     (input->clusters - input->frees) * cl_bpc);
	le32_add_cpu(&fe->i_clusters, input->clusters);

	ocfs2_journal_dirty(handle, main_bm_bh);

	spin_lock(&OCFS2_I(main_bm_inode)->ip_lock);
	OCFS2_I(main_bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
	le64_add_cpu(&fe->i_size, input->clusters << osb->s_clustersize_bits);
	spin_unlock(&OCFS2_I(main_bm_inode)->ip_lock);
	i_size_write(main_bm_inode, le64_to_cpu(fe->i_size));

	ocfs2_update_super_and_backups(main_bm_inode, input->clusters);

out_commit:
	ocfs2_commit_trans(osb, handle);
out_unlock:
	brelse(group_bh);
	brelse(main_bm_bh);

	ocfs2_inode_unlock(main_bm_inode, 1);

out_mutex:
	mutex_unlock(&main_bm_inode->i_mutex);
	iput(main_bm_inode);

out:
	return ret;
}
Exemple #5
0
static int ocfs2_info_freefrag_scan_bitmap(struct ocfs2_super *osb,
					   struct inode *gb_inode, u64 blkno,
					   struct ocfs2_info_freefrag *ffg)
{
	u32 chunks_in_group;
	int status = 0, unlock = 0, i;

	struct buffer_head *bh = NULL;
	struct ocfs2_chain_list *cl = NULL;
	struct ocfs2_chain_rec *rec = NULL;
	struct ocfs2_dinode *gb_dinode = NULL;

	if (gb_inode)
		mutex_lock(&gb_inode->i_mutex);

	if (o2info_coherent(&ffg->iff_req)) {
		status = ocfs2_inode_lock(gb_inode, &bh, 0);
		if (status < 0) {
			mlog_errno(status);
			goto bail;
		}
		unlock = 1;
	} else {
		status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh);
		if (status < 0) {
			mlog_errno(status);
			goto bail;
		}
	}

	gb_dinode = (struct ocfs2_dinode *)bh->b_data;
	cl = &(gb_dinode->id2.i_chain);

	/*
	 * Chunksize(in) clusters from userspace should be
	 * less than clusters in a group.
	 */
	if (ffg->iff_chunksize > le16_to_cpu(cl->cl_cpg)) {
		status = -EINVAL;
		goto bail;
	}

	memset(&ffg->iff_ffs, 0, sizeof(struct ocfs2_info_freefrag_stats));

	ffg->iff_ffs.ffs_min = ~0U;
	ffg->iff_ffs.ffs_clusters =
			le32_to_cpu(gb_dinode->id1.bitmap1.i_total);
	ffg->iff_ffs.ffs_free_clusters = ffg->iff_ffs.ffs_clusters -
			le32_to_cpu(gb_dinode->id1.bitmap1.i_used);

	chunks_in_group = le16_to_cpu(cl->cl_cpg) / ffg->iff_chunksize + 1;

	for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) {
		rec = &(cl->cl_recs[i]);
		status = ocfs2_info_freefrag_scan_chain(osb, gb_inode,
							gb_dinode,
							rec, ffg,
							chunks_in_group);
		if (status)
			goto bail;
	}

	if (ffg->iff_ffs.ffs_free_chunks_real)
		ffg->iff_ffs.ffs_avg = (ffg->iff_ffs.ffs_avg /
					ffg->iff_ffs.ffs_free_chunks_real);
bail:
	if (unlock)
		ocfs2_inode_unlock(gb_inode, 0);

	if (gb_inode)
		mutex_unlock(&gb_inode->i_mutex);

	if (gb_inode)
		iput(gb_inode);

	brelse(bh);

	return status;
}
Exemple #6
0
static int ocfs2_info_freefrag_scan_chain(struct ocfs2_super *osb,
					  struct inode *gb_inode,
					  struct ocfs2_dinode *gb_dinode,
					  struct ocfs2_chain_rec *rec,
					  struct ocfs2_info_freefrag *ffg,
					  u32 chunks_in_group)
{
	int status = 0, used;
	u64 blkno;

	struct buffer_head *bh = NULL;
	struct ocfs2_group_desc *bg = NULL;

	unsigned int max_bits, num_clusters;
	unsigned int offset = 0, cluster, chunk;
	unsigned int chunk_free, last_chunksize = 0;

	if (!le32_to_cpu(rec->c_free))
		goto bail;

	do {
		if (!bg)
			blkno = le64_to_cpu(rec->c_blkno);
		else
			blkno = le64_to_cpu(bg->bg_next_group);

		if (bh) {
			brelse(bh);
			bh = NULL;
		}

		if (o2info_coherent(&ffg->iff_req))
			status = ocfs2_read_group_descriptor(gb_inode,
							     gb_dinode,
							     blkno, &bh);
		else
			status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh);

		if (status < 0) {
			mlog(ML_ERROR, "Can't read the group descriptor # "
			     "%llu from device.", (unsigned long long)blkno);
			status = -EIO;
			goto bail;
		}

		bg = (struct ocfs2_group_desc *)bh->b_data;

		if (!le16_to_cpu(bg->bg_free_bits_count))
			continue;

		max_bits = le16_to_cpu(bg->bg_bits);
		offset = 0;

		for (chunk = 0; chunk < chunks_in_group; chunk++) {
			/*
			 * last chunk may be not an entire one.
			 */
			if ((offset + ffg->iff_chunksize) > max_bits)
				num_clusters = max_bits - offset;
			else
				num_clusters = ffg->iff_chunksize;

			chunk_free = 0;
			for (cluster = 0; cluster < num_clusters; cluster++) {
				used = ocfs2_test_bit(offset,
						(unsigned long *)bg->bg_bitmap);
				/*
				 * - chunk_free counts free clusters in #N chunk.
				 * - last_chunksize records the size(in) clusters
				 *   for the last real free chunk being counted.
				 */
				if (!used) {
					last_chunksize++;
					chunk_free++;
				}

				if (used && last_chunksize) {
					ocfs2_info_update_ffg(ffg,
							      last_chunksize);
					last_chunksize = 0;
				}

				offset++;
			}

			if (chunk_free == ffg->iff_chunksize)
				ffg->iff_ffs.ffs_free_chunks++;
		}

		/*
		 * need to update the info for last free chunk.
		 */
		if (last_chunksize)
			ocfs2_info_update_ffg(ffg, last_chunksize);

	} while (le64_to_cpu(bg->bg_next_group));

bail:
	brelse(bh);

	return status;
}
Exemple #7
0
static int ocfs2_read_locked_inode(struct inode *inode,
				   struct ocfs2_find_inode_args *args)
{
	struct super_block *sb;
	struct ocfs2_super *osb;
	struct ocfs2_dinode *fe;
	struct buffer_head *bh = NULL;
	int status, can_lock;
	u32 generation = 0;

	status = -EINVAL;
	if (inode == NULL || inode->i_sb == NULL) {
		mlog(ML_ERROR, "bad inode\n");
		return status;
	}
	sb = inode->i_sb;
	osb = OCFS2_SB(sb);

	if (!args) {
		mlog(ML_ERROR, "bad inode args\n");
		make_bad_inode(inode);
		return status;
	}

	/*
	 * To improve performance of cold-cache inode stats, we take
	 * the cluster lock here if possible.
	 *
	 * Generally, OCFS2 never trusts the contents of an inode
	 * unless it's holding a cluster lock, so taking it here isn't
	 * a correctness issue as much as it is a performance
	 * improvement.
	 *
	 * There are three times when taking the lock is not a good idea:
	 *
	 * 1) During startup, before we have initialized the DLM.
	 *
	 * 2) If we are reading certain system files which never get
	 *    cluster locks (local alloc, truncate log).
	 *
	 * 3) If the process doing the iget() is responsible for
	 *    orphan dir recovery. We're holding the orphan dir lock and
	 *    can get into a deadlock with another process on another
	 *    node in ->delete_inode().
	 *
	 * #1 and #2 can be simply solved by never taking the lock
	 * here for system files (which are the only type we read
	 * during mount). It's a heavier approach, but our main
	 * concern is user-accessible files anyway.
	 *
	 * #3 works itself out because we'll eventually take the
	 * cluster lock before trusting anything anyway.
	 */
	can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
		&& !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY)
		&& !ocfs2_mount_local(osb);

	trace_ocfs2_read_locked_inode(
		(unsigned long long)OCFS2_I(inode)->ip_blkno, can_lock);

	/*
	 * To maintain backwards compatibility with older versions of
	 * ocfs2-tools, we still store the generation value for system
	 * files. The only ones that actually matter to userspace are
	 * the journals, but it's easier and inexpensive to just flag
	 * all system files similarly.
	 */
	if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
		generation = osb->fs_generation;

	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_inode_lockres,
				  OCFS2_LOCK_TYPE_META,
				  generation, inode);

	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres,
				  OCFS2_LOCK_TYPE_OPEN,
				  0, inode);

	if (can_lock) {
		status = ocfs2_open_lock(inode);
		if (status) {
			make_bad_inode(inode);
			mlog_errno(status);
			return status;
		}
		status = ocfs2_inode_lock(inode, NULL, 0);
		if (status) {
			make_bad_inode(inode);
			mlog_errno(status);
			return status;
		}
	}

	if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) {
		status = ocfs2_try_open_lock(inode, 0);
		if (status) {
			make_bad_inode(inode);
			return status;
		}
	}

	if (can_lock) {
		status = ocfs2_read_inode_block_full(inode, &bh,
						     OCFS2_BH_IGNORE_CACHE);
	} else {
		status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
		/*
		 * If buffer is in jbd, then its checksum may not have been
		 * computed as yet.
		 */
		if (!status && !buffer_jbd(bh))
			status = ocfs2_validate_inode_block(osb->sb, bh);
	}
	if (status < 0) {
		mlog_errno(status);
		goto bail;
	}

	status = -EINVAL;
	fe = (struct ocfs2_dinode *) bh->b_data;

	/*
	 * This is a code bug. Right now the caller needs to
	 * understand whether it is asking for a system file inode or
	 * not so the proper lock names can be built.
	 */
	mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) !=
			!!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE),
			"Inode %llu: system file state is ambigous\n",
			(unsigned long long)args->fi_blkno);

	if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
	    S_ISBLK(le16_to_cpu(fe->i_mode)))
		inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));

	ocfs2_populate_inode(inode, fe, 0);

	BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));

	status = 0;

bail:
	if (can_lock)
		ocfs2_inode_unlock(inode, 0);

	if (status < 0)
		make_bad_inode(inode);

	if (args && bh)
		brelse(bh);

	return status;
}
Exemple #8
0
/*
 * find the victim alloc group, where #blkno fits.
 */
static int ocfs2_find_victim_alloc_group(struct inode *inode,
					 u64 vict_blkno,
					 int type, int slot,
					 int *vict_bit,
					 struct buffer_head **ret_bh)
{
	int ret, i, bits_per_unit = 0;
	u64 blkno;
	char namebuf[40];

	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	struct buffer_head *ac_bh = NULL, *gd_bh = NULL;
	struct ocfs2_chain_list *cl;
	struct ocfs2_chain_rec *rec;
	struct ocfs2_dinode *ac_dinode;
	struct ocfs2_group_desc *bg;

	ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot);
	ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf,
					 strlen(namebuf), &blkno);
	if (ret) {
		ret = -ENOENT;
		goto out;
	}

	ret = ocfs2_read_blocks_sync(osb, blkno, 1, &ac_bh);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	ac_dinode = (struct ocfs2_dinode *)ac_bh->b_data;
	cl = &(ac_dinode->id2.i_chain);
	rec = &(cl->cl_recs[0]);

	if (type == GLOBAL_BITMAP_SYSTEM_INODE)
		bits_per_unit = osb->s_clustersize_bits -
					inode->i_sb->s_blocksize_bits;
	/*
	 * 'vict_blkno' was out of the valid range.
	 */
	if ((vict_blkno < le64_to_cpu(rec->c_blkno)) ||
	    (vict_blkno >= ((u64)le32_to_cpu(ac_dinode->id1.bitmap1.i_total) <<
				bits_per_unit))) {
		ret = -EINVAL;
		goto out;
	}

	for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) {

		rec = &(cl->cl_recs[i]);
		if (!rec)
			continue;

		bg = NULL;

		do {
			if (!bg)
				blkno = le64_to_cpu(rec->c_blkno);
			else
				blkno = le64_to_cpu(bg->bg_next_group);

			if (gd_bh) {
				brelse(gd_bh);
				gd_bh = NULL;
			}

			ret = ocfs2_read_blocks_sync(osb, blkno, 1, &gd_bh);
			if (ret) {
				mlog_errno(ret);
				goto out;
			}

			bg = (struct ocfs2_group_desc *)gd_bh->b_data;

			if (vict_blkno < (le64_to_cpu(bg->bg_blkno) +
						le16_to_cpu(bg->bg_bits))) {

				*ret_bh = gd_bh;
				*vict_bit = (vict_blkno - blkno) >>
							bits_per_unit;
				mlog(0, "find the victim group: #%llu, "
				     "total_bits: %u, vict_bit: %u\n",
				     blkno, le16_to_cpu(bg->bg_bits),
				     *vict_bit);
				goto out;
			}

		} while (le64_to_cpu(bg->bg_next_group));
	}

	ret = -EINVAL;
out:
	brelse(ac_bh);

	/*
	 * caller has to release the gd_bh properly.
	 */
	return ret;
}