示例#1
0
文件: mmp.c 项目: AiWinters/linux
/*
 * Write the MMP block using WRITE_SYNC to try to get the block on-disk
 * faster.
 */
static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
{
	struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);

	/*
	 * We protect against freezing so that we don't create dirty buffers
	 * on frozen filesystem.
	 */
	sb_start_write(sb);
	ext4_mmp_csum_set(sb, mmp);
	mark_buffer_dirty(bh);
	lock_buffer(bh);
	bh->b_end_io = end_buffer_write_sync;
	get_bh(bh);
	submit_bh(WRITE_SYNC, bh);
	wait_on_buffer(bh);
	sb_end_write(sb);
	if (unlikely(!buffer_uptodate(bh)))
		return 1;

	return 0;
}
示例#2
0
/*
 * Read the MMP block. It _must_ be read from disk and hence we clear the
 * uptodate flag on the buffer.
 */
static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
			  ext4_fsblk_t mmp_block)
{
	struct mmp_struct *mmp;

	if (*bh)
		clear_buffer_uptodate(*bh);

	/* This would be sb_bread(sb, mmp_block), except we need to be sure
	 * that the MD RAID device cache has been bypassed, and that the read
	 * is not blocked in the elevator. */
	if (!*bh)
		*bh = sb_getblk(sb, mmp_block);
	if (!*bh)
		return -ENOMEM;
	if (*bh) {
		get_bh(*bh);
		lock_buffer(*bh);
		(*bh)->b_end_io = end_buffer_read_sync;
		submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
		wait_on_buffer(*bh);
		if (!buffer_uptodate(*bh)) {
			brelse(*bh);
			*bh = NULL;
		}
	}
	if (unlikely(!*bh)) {
		ext4_warning(sb, "Error while reading MMP block %llu",
			     mmp_block);
		return -EIO;
	}

	mmp = (struct mmp_struct *)((*bh)->b_data);
	if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC ||
	    !ext4_mmp_csum_verify(sb, mmp))
		return -EINVAL;

	return 0;
}
示例#3
0
//// 读设备上一个页面(4 个缓冲块)的内容到内存指定的地址。
void
bread_page( unsigned long address, int dev, int b[4] )
{
	struct buffer_head	*bh[4];
	int					i;

// 循环执行4 次,读一页内容。
	for( i = 0; i < 4; i++ )
	{
		if( b[i] )
		{
// 取高速缓冲中指定设备和块号的缓冲区,如果该缓冲区数据无效则产生读设备请求。
			if( bh[i] = getblk( dev, b[i] ) )
			{
				if( !bh[i]->b_uptodate )
				{
					ll_rw_block( READ, bh[i] );
				}
			}
		}else
		{
			bh[i] = NULL;
		}
	}
// 将4 块缓冲区上的内容顺序复制到指定地址处。
	for( i = 0; i < 4; i++, address += BLOCK_SIZE )
	{
		if( bh[i] )
		{
			wait_on_buffer( bh[i] );    // 等待缓冲区解锁(如果已被上锁的话)。
			if( bh[i]->b_uptodate )     // 如果该缓冲区中数据有效的话,则复制。
			{
				COPYBLK( (unsigned long)bh[i]->b_data, address );
			}
			brelse( bh[i] );            // 释放该缓冲区。
		}
	}
}
示例#4
0
/*
 * floppy-change is never called from an interrupt, so we can relax a bit
 * here, sleep etc. Note that floppy-on tries to set current_DOR to point
 * to the desired drive, but it will probably not survive the sleep if
 * several floppies are used at the same time: thus the loop.
 */
int floppy_change(struct buffer_head * bh)
{
    unsigned int mask = 1 << (bh->b_dev & 0x03);

    if (MAJOR(bh->b_dev) != MAJOR_NR) {
        printk("floppy_changed: not a floppy\n");
        return 0;
    }
    if (fake_change & mask) {
        buffer_track = -1;
        fake_change &= ~mask;
        /* omitting the next line breaks formatting in a horrible way ... */
        changed_floppies &= ~mask;
        return 1;
    }
    if (changed_floppies & mask) {
        buffer_track = -1;
        changed_floppies &= ~mask;
        recalibrate = 1;
        return 1;
    }
    if (!bh)
        return 0;
    if (bh->b_dirt)
        ll_rw_block(WRITE, 1, &bh);
    else {
        buffer_track = -1;
        bh->b_uptodate = 0;
        ll_rw_block(READ, 1, &bh);
    }
    wait_on_buffer(bh);
    if (changed_floppies & mask) {
        changed_floppies &= ~mask;
        recalibrate = 1;
        return 1;
    }
    return 0;
}
示例#5
0
文件: buffer.c 项目: jjhlzn/haribote
//从设备上读取指定的数据块并返回含有数据的缓冲区。如果指定的块不存在则返回NULL。
struct buffer_head * bread(int dev,int block)
{
	//debug("bread1");
	//debug("dev = %d, block = %d",dev, block);
	struct buffer_head * bh;

	//首先在高速缓冲中申请一块缓冲块。如果返回值是NULL,则表示内核出错,停机。然后我们判断
	//其中是否已有可用数据。如果该缓冲块中数据是有效的(已更新的)可以直接使用,则返回。
	if (!(bh=getblk(dev,block)))
		panic("bread: getblk returned NULL\n");
	if (bh->b_uptodate)
		return bh;
	//debug("bread2");
	ll_rw_block(READ,bh);
	//debug("about to wait on buffer");
	wait_on_buffer(bh);
	//debug("wait end");
	//debug("bh->b_uptodate = %d",bh->b_uptodate);
	if (bh->b_uptodate)
		return bh;
	brelse(bh);
	return NULL;
}
示例#6
0
/*
 * Write super block and backups doesn't need to collaborate with journal,
 * so we don't need to lock ip_io_mutex and ci doesn't need to bea passed
 * into this function.
 */
int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
				struct buffer_head *bh)
{
	int ret = 0;
	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;

	BUG_ON(buffer_jbd(bh));
	ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr);

	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) {
		ret = -EROFS;
		mlog_errno(ret);
		goto out;
	}

	lock_buffer(bh);
	set_buffer_uptodate(bh);

	/* remove from dirty list before I/O. */
	clear_buffer_dirty(bh);

	get_bh(bh); /* for end_buffer_write_sync() */
	bh->b_end_io = end_buffer_write_sync;
	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check);
	submit_bh(REQ_OP_WRITE, 0, bh);

	wait_on_buffer(bh);

	if (!buffer_uptodate(bh)) {
		ret = -EIO;
		mlog_errno(ret);
	}

out:
	return ret;
}
示例#7
0
struct buffer_head *
get_hash_table( int dev, int block )
{
	struct buffer_head *bh;

	for(;; )
	{
// 在高速缓冲中寻找给定设备和指定块的缓冲区,如果没有找到则返回NULL,退出。
		if( !( bh = find_buffer( dev, block ) ) )
		{
			return NULL;
		}
// 对该缓冲区增加引用计数,并等待该缓冲区解锁(如果已被上锁)。
		bh->b_count++;
		wait_on_buffer( bh );
// 由于经过了睡眠状态,因此有必要再验证该缓冲区块的正确性,并返回缓冲区头指针。
		if( bh->b_dev == dev && bh->b_blocknr == block )
		{
			return bh;
		}
// 如果该缓冲区所属的设备号或块号在睡眠时发生了改变,则撤消对它的引用计数,重新寻找。
		bh->b_count--;
	}
}
示例#8
0
/*
 * Write super block and backups doesn't need to collaborate with journal,
 * so we don't need to lock ip_io_mutex and inode doesn't need to bea passed
 * into this function.
 */
int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
				struct buffer_head *bh)
{
	int ret = 0;

	mlog_entry_void();

	BUG_ON(buffer_jbd(bh));
	ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr);

	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) {
		ret = -EROFS;
		goto out;
	}

	lock_buffer(bh);
	set_buffer_uptodate(bh);

	/* remove from dirty list before I/O. */
	clear_buffer_dirty(bh);

	get_bh(bh); /* for end_buffer_write_sync() */
	bh->b_end_io = end_buffer_write_sync;
	submit_bh(WRITE, bh);

	wait_on_buffer(bh);

	if (!buffer_uptodate(bh)) {
		ret = -EIO;
		put_bh(bh);
	}

out:
	mlog_exit(ret);
	return ret;
}
示例#9
0
static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
{
    struct journal_head *jh;
    struct buffer_head *bh;
    tid_t this_tid;
    int released = 0;
    int ret = 0;

    this_tid = transaction->t_tid;
restart:

    if (journal->j_checkpoint_transactions != transaction ||
            transaction->t_tid != this_tid)
        return ret;
    while (!released && transaction->t_checkpoint_io_list) {
        jh = transaction->t_checkpoint_io_list;
        bh = jh2bh(jh);
        get_bh(bh);
        if (buffer_locked(bh)) {
            spin_unlock(&journal->j_list_lock);
            wait_on_buffer(bh);

            BUFFER_TRACE(bh, "brelse");
            __brelse(bh);
            spin_lock(&journal->j_list_lock);
            goto restart;
        }
        if (unlikely(buffer_write_io_error(bh)))
            ret = -EIO;

        released = __jbd2_journal_remove_checkpoint(jh);
        __brelse(bh);
    }

    return ret;
}
示例#10
0
int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
		      struct buffer_head *bhs[], int flags,
		      int (*validate)(struct super_block *sb,
				      struct buffer_head *bh))
{
	int status = 0;
	int i, ignore_cache = 0;
	struct buffer_head *bh;

	mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n",
		   inode, (unsigned long long)block, nr, flags);

	BUG_ON(!inode);
	BUG_ON((flags & OCFS2_BH_READAHEAD) &&
	       (flags & OCFS2_BH_IGNORE_CACHE));

	if (bhs == NULL) {
		status = -EINVAL;
		mlog_errno(status);
		goto bail;
	}

	if (nr < 0) {
		mlog(ML_ERROR, "asked to read %d blocks!\n", nr);
		status = -EINVAL;
		mlog_errno(status);
		goto bail;
	}

	if (nr == 0) {
		mlog(ML_BH_IO, "No buffers will be read!\n");
		status = 0;
		goto bail;
	}

	mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
	for (i = 0 ; i < nr ; i++) {
		if (bhs[i] == NULL) {
			bhs[i] = sb_getblk(inode->i_sb, block++);
			if (bhs[i] == NULL) {
				mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
				status = -EIO;
				mlog_errno(status);
				goto bail;
			}
		}
		bh = bhs[i];
		ignore_cache = (flags & OCFS2_BH_IGNORE_CACHE);

		/* There are three read-ahead cases here which we need to
		 * be concerned with. All three assume a buffer has
		 * previously been submitted with OCFS2_BH_READAHEAD
		 * and it hasn't yet completed I/O.
		 *
		 * 1) The current request is sync to disk. This rarely
		 *    happens these days, and never when performance
		 *    matters - the code can just wait on the buffer
		 *    lock and re-submit.
		 *
		 * 2) The current request is cached, but not
		 *    readahead. ocfs2_buffer_uptodate() will return
		 *    false anyway, so we'll wind up waiting on the
		 *    buffer lock to do I/O. We re-check the request
		 *    with after getting the lock to avoid a re-submit.
		 *
		 * 3) The current request is readahead (and so must
		 *    also be a caching one). We short circuit if the
		 *    buffer is locked (under I/O) and if it's in the
		 *    uptodate cache. The re-check from #2 catches the
		 *    case that the previous read-ahead completes just
		 *    before our is-it-in-flight check.
		 */

		if (!ignore_cache && !ocfs2_buffer_uptodate(inode, bh)) {
			mlog(ML_UPTODATE,
			     "bh (%llu), inode %llu not uptodate\n",
			     (unsigned long long)bh->b_blocknr,
			     (unsigned long long)OCFS2_I(inode)->ip_blkno);
			/* We're using ignore_cache here to say
			 * "go to disk" */
			ignore_cache = 1;
		}

		if (buffer_jbd(bh)) {
			if (ignore_cache)
				mlog(ML_BH_IO, "trying to sync read a jbd "
					       "managed bh (blocknr = %llu)\n",
				     (unsigned long long)bh->b_blocknr);
			continue;
		}

		if (ignore_cache) {
			if (buffer_dirty(bh)) {
				/* This should probably be a BUG, or
				 * at least return an error. */
				mlog(ML_BH_IO, "asking me to sync read a dirty "
					       "buffer! (blocknr = %llu)\n",
				     (unsigned long long)bh->b_blocknr);
				continue;
			}

			/* A read-ahead request was made - if the
			 * buffer is already under read-ahead from a
			 * previously submitted request than we are
			 * done here. */
			if ((flags & OCFS2_BH_READAHEAD)
			    && ocfs2_buffer_read_ahead(inode, bh))
				continue;

			lock_buffer(bh);
			if (buffer_jbd(bh)) {
#ifdef CATCH_BH_JBD_RACES
				mlog(ML_ERROR, "block %llu had the JBD bit set "
					       "while I was in lock_buffer!",
				     (unsigned long long)bh->b_blocknr);
				BUG();
#else
				unlock_buffer(bh);
				continue;
#endif
			}

			/* Re-check ocfs2_buffer_uptodate() as a
			 * previously read-ahead buffer may have
			 * completed I/O while we were waiting for the
			 * buffer lock. */
			if (!(flags & OCFS2_BH_IGNORE_CACHE)
			    && !(flags & OCFS2_BH_READAHEAD)
			    && ocfs2_buffer_uptodate(inode, bh)) {
				unlock_buffer(bh);
				continue;
			}

			clear_buffer_uptodate(bh);
			get_bh(bh); /* for end_buffer_read_sync() */
			if (validate)
				set_buffer_needs_validate(bh);
			bh->b_end_io = end_buffer_read_sync;
			submit_bh(READ, bh);
			continue;
		}
	}

	status = 0;

	for (i = (nr - 1); i >= 0; i--) {
		bh = bhs[i];

		if (!(flags & OCFS2_BH_READAHEAD)) {
			/* We know this can't have changed as we hold the
			 * inode sem. Avoid doing any work on the bh if the
			 * journal has it. */
			if (!buffer_jbd(bh))
				wait_on_buffer(bh);

			if (!buffer_uptodate(bh)) {
				/* Status won't be cleared from here on out,
				 * so we can safely record this and loop back
				 * to cleanup the other buffers. Don't need to
				 * remove the clustered uptodate information
				 * for this bh as it's not marked locally
				 * uptodate. */
				status = -EIO;
				put_bh(bh);
				bhs[i] = NULL;
				continue;
			}

			if (buffer_needs_validate(bh)) {
				/* We never set NeedsValidate if the
				 * buffer was held by the journal, so
				 * that better not have changed */
				BUG_ON(buffer_jbd(bh));
				clear_buffer_needs_validate(bh);
				status = validate(inode->i_sb, bh);
				if (status) {
					put_bh(bh);
					bhs[i] = NULL;
					continue;
				}
			}
		}

		/* Always set the buffer in the cache, even if it was
		 * a forced read, or read-ahead which hasn't yet
		 * completed. */
		ocfs2_set_buffer_uptodate(inode, bh);
	}
	mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);

	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 
	     (unsigned long long)block, nr,
	     ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes",
	     flags);

bail:

	mlog_exit(status);
	return status;
}
示例#11
0
int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
			   unsigned int nr, struct buffer_head *bhs[])
{
	int status = 0;
	unsigned int i;
	struct buffer_head *bh;

	if (!nr) {
		mlog(ML_BH_IO, "No buffers will be read!\n");
		goto bail;
	}

	for (i = 0 ; i < nr ; i++) {
		if (bhs[i] == NULL) {
			bhs[i] = sb_getblk(osb->sb, block++);
			if (bhs[i] == NULL) {
				status = -EIO;
				mlog_errno(status);
				goto bail;
			}
		}
		bh = bhs[i];

		if (buffer_jbd(bh)) {
			mlog(ML_BH_IO,
			     "trying to sync read a jbd "
			     "managed bh (blocknr = %llu), skipping\n",
			     (unsigned long long)bh->b_blocknr);
			continue;
		}

		if (buffer_dirty(bh)) {
			/* This should probably be a BUG, or
			 * at least return an error. */
			mlog(ML_ERROR,
			     "trying to sync read a dirty "
			     "buffer! (blocknr = %llu), skipping\n",
			     (unsigned long long)bh->b_blocknr);
			continue;
		}

		lock_buffer(bh);
		if (buffer_jbd(bh)) {
			mlog(ML_ERROR,
			     "block %llu had the JBD bit set "
			     "while I was in lock_buffer!",
			     (unsigned long long)bh->b_blocknr);
			BUG();
		}

		clear_buffer_uptodate(bh);
		get_bh(bh); /* for end_buffer_read_sync() */
		bh->b_end_io = end_buffer_read_sync;
		submit_bh(READ, bh);
	}

	for (i = nr; i > 0; i--) {
		bh = bhs[i - 1];

		/* No need to wait on the buffer if it's managed by JBD. */
		if (!buffer_jbd(bh))
			wait_on_buffer(bh);

		if (!buffer_uptodate(bh)) {
			/* Status won't be cleared from here on out,
			 * so we can safely record this and loop back
			 * to cleanup the other buffers. */
			status = -EIO;
			put_bh(bh);
			bhs[i - 1] = NULL;
		}
	}

bail:
	return status;
}
示例#12
0
/*
 * journal_commit_transaction
 *
 * The primary function for committing a transaction to the log.  This
 * function is called by the journal thread to begin a complete commit.
 */
void journal_commit_transaction(journal_t *journal)
{
	transaction_t *commit_transaction;
	struct journal_head *jh, *new_jh, *descriptor;
	struct buffer_head **wbuf = journal->j_wbuf;
	int bufs;
	int flags;
	int err;
	unsigned int blocknr;
	ktime_t start_time;
	u64 commit_time;
	char *tagp = NULL;
	journal_header_t *header;
	journal_block_tag_t *tag = NULL;
	int space_left = 0;
	int first_tag = 0;
	int tag_flag;
	int i;
	struct blk_plug plug;

	/*
	 * First job: lock down the current transaction and wait for
	 * all outstanding updates to complete.
	 */

	/* Do we need to erase the effects of a prior journal_flush? */
	if (journal->j_flags & JFS_FLUSHED) {
		jbd_debug(3, "super block updated\n");
		journal_update_superblock(journal, 1);
	} else {
		jbd_debug(3, "superblock not updated\n");
	}

	J_ASSERT(journal->j_running_transaction != NULL);
	J_ASSERT(journal->j_committing_transaction == NULL);

	commit_transaction = journal->j_running_transaction;
	J_ASSERT(commit_transaction->t_state == T_RUNNING);

	trace_jbd_start_commit(journal, commit_transaction);
	jbd_debug(1, "JBD: starting commit of transaction %d\n",
			commit_transaction->t_tid);

	spin_lock(&journal->j_state_lock);
	commit_transaction->t_state = T_LOCKED;

	trace_jbd_commit_locking(journal, commit_transaction);
	spin_lock(&commit_transaction->t_handle_lock);
	while (commit_transaction->t_updates) {
		DEFINE_WAIT(wait);

		prepare_to_wait(&journal->j_wait_updates, &wait,
					TASK_UNINTERRUPTIBLE);
		if (commit_transaction->t_updates) {
			spin_unlock(&commit_transaction->t_handle_lock);
			spin_unlock(&journal->j_state_lock);
			schedule();
			spin_lock(&journal->j_state_lock);
			spin_lock(&commit_transaction->t_handle_lock);
		}
		finish_wait(&journal->j_wait_updates, &wait);
	}
	spin_unlock(&commit_transaction->t_handle_lock);

	J_ASSERT (commit_transaction->t_outstanding_credits <=
			journal->j_max_transaction_buffers);

	/*
	 * First thing we are allowed to do is to discard any remaining
	 * BJ_Reserved buffers.  Note, it is _not_ permissible to assume
	 * that there are no such buffers: if a large filesystem
	 * operation like a truncate needs to split itself over multiple
	 * transactions, then it may try to do a journal_restart() while
	 * there are still BJ_Reserved buffers outstanding.  These must
	 * be released cleanly from the current transaction.
	 *
	 * In this case, the filesystem must still reserve write access
	 * again before modifying the buffer in the new transaction, but
	 * we do not require it to remember exactly which old buffers it
	 * has reserved.  This is consistent with the existing behaviour
	 * that multiple journal_get_write_access() calls to the same
	 * buffer are perfectly permissible.
	 */
	while (commit_transaction->t_reserved_list) {
		jh = commit_transaction->t_reserved_list;
		JBUFFER_TRACE(jh, "reserved, unused: refile");
		/*
		 * A journal_get_undo_access()+journal_release_buffer() may
		 * leave undo-committed data.
		 */
		if (jh->b_committed_data) {
			struct buffer_head *bh = jh2bh(jh);

			jbd_lock_bh_state(bh);
			jbd_free(jh->b_committed_data, bh->b_size);
			jh->b_committed_data = NULL;
			jbd_unlock_bh_state(bh);
		}
		journal_refile_buffer(journal, jh);
	}

	/*
	 * Now try to drop any written-back buffers from the journal's
	 * checkpoint lists.  We do this *before* commit because it potentially
	 * frees some memory
	 */
	spin_lock(&journal->j_list_lock);
	__journal_clean_checkpoint_list(journal);
	spin_unlock(&journal->j_list_lock);

	jbd_debug (3, "JBD: commit phase 1\n");

	/*
	 * Clear revoked flag to reflect there is no revoked buffers
	 * in the next transaction which is going to be started.
	 */
	journal_clear_buffer_revoked_flags(journal);

	/*
	 * Switch to a new revoke table.
	 */
	journal_switch_revoke_table(journal);

	trace_jbd_commit_flushing(journal, commit_transaction);
	commit_transaction->t_state = T_FLUSH;
	journal->j_committing_transaction = commit_transaction;
	journal->j_running_transaction = NULL;
	start_time = ktime_get();
	commit_transaction->t_log_start = journal->j_head;
	wake_up(&journal->j_wait_transaction_locked);
	spin_unlock(&journal->j_state_lock);

	jbd_debug (3, "JBD: commit phase 2\n");

	/*
	 * Now start flushing things to disk, in the order they appear
	 * on the transaction lists.  Data blocks go first.
	 */
	blk_start_plug(&plug);
	err = journal_submit_data_buffers(journal, commit_transaction,
					  WRITE_SYNC);
	blk_finish_plug(&plug);

	/*
	 * Wait for all previously submitted IO to complete.
	 */
	spin_lock(&journal->j_list_lock);
	while (commit_transaction->t_locked_list) {
		struct buffer_head *bh;

		jh = commit_transaction->t_locked_list->b_tprev;
		bh = jh2bh(jh);
		get_bh(bh);
		if (buffer_locked(bh)) {
			spin_unlock(&journal->j_list_lock);
			wait_on_buffer(bh);
			spin_lock(&journal->j_list_lock);
		}
		if (unlikely(!buffer_uptodate(bh))) {
			if (!trylock_page(bh->b_page)) {
				spin_unlock(&journal->j_list_lock);
				lock_page(bh->b_page);
				spin_lock(&journal->j_list_lock);
			}
			if (bh->b_page->mapping)
				set_bit(AS_EIO, &bh->b_page->mapping->flags);

			unlock_page(bh->b_page);
			SetPageError(bh->b_page);
			err = -EIO;
		}
		if (!inverted_lock(journal, bh)) {
			put_bh(bh);
			spin_lock(&journal->j_list_lock);
			continue;
		}
		if (buffer_jbd(bh) && bh2jh(bh) == jh &&
		    jh->b_transaction == commit_transaction &&
		    jh->b_jlist == BJ_Locked)
			__journal_unfile_buffer(jh);
		jbd_unlock_bh_state(bh);
		release_data_buffer(bh);
		cond_resched_lock(&journal->j_list_lock);
	}
	spin_unlock(&journal->j_list_lock);

	if (err) {
		char b[BDEVNAME_SIZE];

		printk(KERN_WARNING
			"JBD: Detected IO errors while flushing file data "
			"on %s\n", bdevname(journal->j_fs_dev, b));
		if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR)
			journal_abort(journal, err);
		err = 0;
	}

	blk_start_plug(&plug);

	journal_write_revoke_records(journal, commit_transaction, WRITE_SYNC);

	/*
	 * If we found any dirty or locked buffers, then we should have
	 * looped back up to the write_out_data label.  If there weren't
	 * any then journal_clean_data_list should have wiped the list
	 * clean by now, so check that it is in fact empty.
	 */
	J_ASSERT (commit_transaction->t_sync_datalist == NULL);

	jbd_debug (3, "JBD: commit phase 3\n");

	/*
	 * Way to go: we have now written out all of the data for a
	 * transaction!  Now comes the tricky part: we need to write out
	 * metadata.  Loop over the transaction's entire buffer list:
	 */
	spin_lock(&journal->j_state_lock);
	commit_transaction->t_state = T_COMMIT;
	spin_unlock(&journal->j_state_lock);

	trace_jbd_commit_logging(journal, commit_transaction);
	J_ASSERT(commit_transaction->t_nr_buffers <=
		 commit_transaction->t_outstanding_credits);

	descriptor = NULL;
	bufs = 0;
	while (commit_transaction->t_buffers) {

		/* Find the next buffer to be journaled... */

		jh = commit_transaction->t_buffers;

		/* If we're in abort mode, we just un-journal the buffer and
		   release it. */

		if (is_journal_aborted(journal)) {
			clear_buffer_jbddirty(jh2bh(jh));
			JBUFFER_TRACE(jh, "journal is aborting: refile");
			journal_refile_buffer(journal, jh);
			/* If that was the last one, we need to clean up
			 * any descriptor buffers which may have been
			 * already allocated, even if we are now
			 * aborting. */
			if (!commit_transaction->t_buffers)
				goto start_journal_io;
			continue;
		}

		/* Make sure we have a descriptor block in which to
		   record the metadata buffer. */

		if (!descriptor) {
			struct buffer_head *bh;

			J_ASSERT (bufs == 0);

			jbd_debug(4, "JBD: get descriptor\n");

			descriptor = journal_get_descriptor_buffer(journal);
			if (!descriptor) {
				journal_abort(journal, -EIO);
				continue;
			}

			bh = jh2bh(descriptor);
			jbd_debug(4, "JBD: got buffer %llu (%p)\n",
				(unsigned long long)bh->b_blocknr, bh->b_data);
			header = (journal_header_t *)&bh->b_data[0];
			header->h_magic     = cpu_to_be32(JFS_MAGIC_NUMBER);
			header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK);
			header->h_sequence  = cpu_to_be32(commit_transaction->t_tid);

			tagp = &bh->b_data[sizeof(journal_header_t)];
			space_left = bh->b_size - sizeof(journal_header_t);
			first_tag = 1;
			set_buffer_jwrite(bh);
			set_buffer_dirty(bh);
			wbuf[bufs++] = bh;

			/* Record it so that we can wait for IO
                           completion later */
			BUFFER_TRACE(bh, "ph3: file as descriptor");
			journal_file_buffer(descriptor, commit_transaction,
					BJ_LogCtl);
		}

		/* Where is the buffer to be written? */

		err = journal_next_log_block(journal, &blocknr);
		/* If the block mapping failed, just abandon the buffer
		   and repeat this loop: we'll fall into the
		   refile-on-abort condition above. */
		if (err) {
			journal_abort(journal, err);
			continue;
		}

		/*
		 * start_this_handle() uses t_outstanding_credits to determine
		 * the free space in the log, but this counter is changed
		 * by journal_next_log_block() also.
		 */
		commit_transaction->t_outstanding_credits--;

		/* Bump b_count to prevent truncate from stumbling over
                   the shadowed buffer!  @@@ This can go if we ever get
                   rid of the BJ_IO/BJ_Shadow pairing of buffers. */
		get_bh(jh2bh(jh));

		/* Make a temporary IO buffer with which to write it out
                   (this will requeue both the metadata buffer and the
                   temporary IO buffer). new_bh goes on BJ_IO*/

		set_buffer_jwrite(jh2bh(jh));
		/*
		 * akpm: journal_write_metadata_buffer() sets
		 * new_bh->b_transaction to commit_transaction.
		 * We need to clean this up before we release new_bh
		 * (which is of type BJ_IO)
		 */
		JBUFFER_TRACE(jh, "ph3: write metadata");
		flags = journal_write_metadata_buffer(commit_transaction,
						      jh, &new_jh, blocknr);
		set_buffer_jwrite(jh2bh(new_jh));
		wbuf[bufs++] = jh2bh(new_jh);

		/* Record the new block's tag in the current descriptor
                   buffer */

		tag_flag = 0;
		if (flags & 1)
			tag_flag |= JFS_FLAG_ESCAPE;
		if (!first_tag)
			tag_flag |= JFS_FLAG_SAME_UUID;

		tag = (journal_block_tag_t *) tagp;
		tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
		tag->t_flags = cpu_to_be32(tag_flag);
		tagp += sizeof(journal_block_tag_t);
		space_left -= sizeof(journal_block_tag_t);

		if (first_tag) {
			memcpy (tagp, journal->j_uuid, 16);
			tagp += 16;
			space_left -= 16;
			first_tag = 0;
		}

		/* If there's no more to do, or if the descriptor is full,
		   let the IO rip! */

		if (bufs == journal->j_wbufsize ||
		    commit_transaction->t_buffers == NULL ||
		    space_left < sizeof(journal_block_tag_t) + 16) {

			jbd_debug(4, "JBD: Submit %d IOs\n", bufs);

			/* Write an end-of-descriptor marker before
                           submitting the IOs.  "tag" still points to
                           the last tag we set up. */

			tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG);

start_journal_io:
			for (i = 0; i < bufs; i++) {
				struct buffer_head *bh = wbuf[i];
				lock_buffer(bh);
				clear_buffer_dirty(bh);
				set_buffer_uptodate(bh);
				bh->b_end_io = journal_end_buffer_io_sync;
				submit_bh(WRITE_SYNC, bh);
			}
			cond_resched();

			/* Force a new descriptor to be generated next
                           time round the loop. */
			descriptor = NULL;
			bufs = 0;
		}
	}

	blk_finish_plug(&plug);

	/* Lo and behold: we have just managed to send a transaction to
           the log.  Before we can commit it, wait for the IO so far to
           complete.  Control buffers being written are on the
           transaction's t_log_list queue, and metadata buffers are on
           the t_iobuf_list queue.

	   Wait for the buffers in reverse order.  That way we are
	   less likely to be woken up until all IOs have completed, and
	   so we incur less scheduling load.
	*/

	jbd_debug(3, "JBD: commit phase 4\n");

	/*
	 * akpm: these are BJ_IO, and j_list_lock is not needed.
	 * See __journal_try_to_free_buffer.
	 */
wait_for_iobuf:
	while (commit_transaction->t_iobuf_list != NULL) {
		struct buffer_head *bh;

		jh = commit_transaction->t_iobuf_list->b_tprev;
		bh = jh2bh(jh);
		if (buffer_locked(bh)) {
			wait_on_buffer(bh);
			goto wait_for_iobuf;
		}
		if (cond_resched())
			goto wait_for_iobuf;

		if (unlikely(!buffer_uptodate(bh)))
			err = -EIO;

		clear_buffer_jwrite(bh);

		JBUFFER_TRACE(jh, "ph4: unfile after journal write");
		journal_unfile_buffer(journal, jh);

		/*
		 * ->t_iobuf_list should contain only dummy buffer_heads
		 * which were created by journal_write_metadata_buffer().
		 */
		BUFFER_TRACE(bh, "dumping temporary bh");
		journal_put_journal_head(jh);
		__brelse(bh);
		J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
		free_buffer_head(bh);

		/* We also have to unlock and free the corresponding
                   shadowed buffer */
		jh = commit_transaction->t_shadow_list->b_tprev;
		bh = jh2bh(jh);
		clear_buffer_jwrite(bh);
		J_ASSERT_BH(bh, buffer_jbddirty(bh));

		/* The metadata is now released for reuse, but we need
                   to remember it against this transaction so that when
                   we finally commit, we can do any checkpointing
                   required. */
		JBUFFER_TRACE(jh, "file as BJ_Forget");
		journal_file_buffer(jh, commit_transaction, BJ_Forget);
		/*
		 * Wake up any transactions which were waiting for this
		 * IO to complete. The barrier must be here so that changes
		 * by journal_file_buffer() take effect before wake_up_bit()
		 * does the waitqueue check.
		 */
		smp_mb();
		wake_up_bit(&bh->b_state, BH_Unshadow);
		JBUFFER_TRACE(jh, "brelse shadowed buffer");
		__brelse(bh);
	}

	J_ASSERT (commit_transaction->t_shadow_list == NULL);

	jbd_debug(3, "JBD: commit phase 5\n");

	/* Here we wait for the revoke record and descriptor record buffers */
 wait_for_ctlbuf:
	while (commit_transaction->t_log_list != NULL) {
		struct buffer_head *bh;

		jh = commit_transaction->t_log_list->b_tprev;
		bh = jh2bh(jh);
		if (buffer_locked(bh)) {
			wait_on_buffer(bh);
			goto wait_for_ctlbuf;
		}
		if (cond_resched())
			goto wait_for_ctlbuf;

		if (unlikely(!buffer_uptodate(bh)))
			err = -EIO;

		BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
		clear_buffer_jwrite(bh);
		journal_unfile_buffer(journal, jh);
		journal_put_journal_head(jh);
		__brelse(bh);		/* One for getblk */
		/* AKPM: bforget here */
	}

	if (err)
		journal_abort(journal, err);

	jbd_debug(3, "JBD: commit phase 6\n");

	/* All metadata is written, now write commit record and do cleanup */
	spin_lock(&journal->j_state_lock);
	J_ASSERT(commit_transaction->t_state == T_COMMIT);
	commit_transaction->t_state = T_COMMIT_RECORD;
	spin_unlock(&journal->j_state_lock);

	if (journal_write_commit_record(journal, commit_transaction))
		err = -EIO;

	if (err)
		journal_abort(journal, err);

	/* End of a transaction!  Finally, we can do checkpoint
           processing: any buffers committed as a result of this
           transaction can be removed from any checkpoint list it was on
           before. */

	jbd_debug(3, "JBD: commit phase 7\n");

	J_ASSERT(commit_transaction->t_sync_datalist == NULL);
	J_ASSERT(commit_transaction->t_buffers == NULL);
	J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
	J_ASSERT(commit_transaction->t_iobuf_list == NULL);
	J_ASSERT(commit_transaction->t_shadow_list == NULL);
	J_ASSERT(commit_transaction->t_log_list == NULL);

restart_loop:
	/*
	 * As there are other places (journal_unmap_buffer()) adding buffers
	 * to this list we have to be careful and hold the j_list_lock.
	 */
	spin_lock(&journal->j_list_lock);
	while (commit_transaction->t_forget) {
		transaction_t *cp_transaction;
		struct buffer_head *bh;
		int try_to_free = 0;

		jh = commit_transaction->t_forget;
		spin_unlock(&journal->j_list_lock);
		bh = jh2bh(jh);
		/*
		 * Get a reference so that bh cannot be freed before we are
		 * done with it.
		 */
		get_bh(bh);
		jbd_lock_bh_state(bh);
		J_ASSERT_JH(jh,	jh->b_transaction == commit_transaction ||
			jh->b_transaction == journal->j_running_transaction);

		/*
		 * If there is undo-protected committed data against
		 * this buffer, then we can remove it now.  If it is a
		 * buffer needing such protection, the old frozen_data
		 * field now points to a committed version of the
		 * buffer, so rotate that field to the new committed
		 * data.
		 *
		 * Otherwise, we can just throw away the frozen data now.
		 */
		if (jh->b_committed_data) {
			jbd_free(jh->b_committed_data, bh->b_size);
			jh->b_committed_data = NULL;
			if (jh->b_frozen_data) {
				jh->b_committed_data = jh->b_frozen_data;
				jh->b_frozen_data = NULL;
			}
		} else if (jh->b_frozen_data) {
			jbd_free(jh->b_frozen_data, bh->b_size);
			jh->b_frozen_data = NULL;
		}

		spin_lock(&journal->j_list_lock);
		cp_transaction = jh->b_cp_transaction;
		if (cp_transaction) {
			JBUFFER_TRACE(jh, "remove from old cp transaction");
			__journal_remove_checkpoint(jh);
		}

		/* Only re-checkpoint the buffer_head if it is marked
		 * dirty.  If the buffer was added to the BJ_Forget list
		 * by journal_forget, it may no longer be dirty and
		 * there's no point in keeping a checkpoint record for
		 * it. */

		/* A buffer which has been freed while still being
		 * journaled by a previous transaction may end up still
		 * being dirty here, but we want to avoid writing back
		 * that buffer in the future after the "add to orphan"
		 * operation been committed,  That's not only a performance
		 * gain, it also stops aliasing problems if the buffer is
		 * left behind for writeback and gets reallocated for another
		 * use in a different page. */
		if (buffer_freed(bh) && !jh->b_next_transaction) {
			clear_buffer_freed(bh);
			clear_buffer_jbddirty(bh);
		}

		if (buffer_jbddirty(bh)) {
			JBUFFER_TRACE(jh, "add to new checkpointing trans");
			__journal_insert_checkpoint(jh, commit_transaction);
			if (is_journal_aborted(journal))
				clear_buffer_jbddirty(bh);
		} else {
			J_ASSERT_BH(bh, !buffer_dirty(bh));
			/*
			 * The buffer on BJ_Forget list and not jbddirty means
			 * it has been freed by this transaction and hence it
			 * could not have been reallocated until this
			 * transaction has committed. *BUT* it could be
			 * reallocated once we have written all the data to
			 * disk and before we process the buffer on BJ_Forget
			 * list.
			 */
			if (!jh->b_next_transaction)
				try_to_free = 1;
		}
		JBUFFER_TRACE(jh, "refile or unfile freed buffer");
		__journal_refile_buffer(jh);
		jbd_unlock_bh_state(bh);
		if (try_to_free)
			release_buffer_page(bh);
		else
			__brelse(bh);
		cond_resched_lock(&journal->j_list_lock);
	}
	spin_unlock(&journal->j_list_lock);
	/*
	 * This is a bit sleazy.  We use j_list_lock to protect transition
	 * of a transaction into T_FINISHED state and calling
	 * __journal_drop_transaction(). Otherwise we could race with
	 * other checkpointing code processing the transaction...
	 */
	spin_lock(&journal->j_state_lock);
	spin_lock(&journal->j_list_lock);
	/*
	 * Now recheck if some buffers did not get attached to the transaction
	 * while the lock was dropped...
	 */
	if (commit_transaction->t_forget) {
		spin_unlock(&journal->j_list_lock);
		spin_unlock(&journal->j_state_lock);
		goto restart_loop;
	}

	/* Done with this transaction! */

	jbd_debug(3, "JBD: commit phase 8\n");

	J_ASSERT(commit_transaction->t_state == T_COMMIT_RECORD);

	commit_transaction->t_state = T_FINISHED;
	J_ASSERT(commit_transaction == journal->j_committing_transaction);
	journal->j_commit_sequence = commit_transaction->t_tid;
	journal->j_committing_transaction = NULL;
	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));

	/*
	 * weight the commit time higher than the average time so we don't
	 * react too strongly to vast changes in commit time
	 */
	if (likely(journal->j_average_commit_time))
		journal->j_average_commit_time = (commit_time*3 +
				journal->j_average_commit_time) / 4;
	else
		journal->j_average_commit_time = commit_time;

	spin_unlock(&journal->j_state_lock);

	if (commit_transaction->t_checkpoint_list == NULL &&
	    commit_transaction->t_checkpoint_io_list == NULL) {
		__journal_drop_transaction(journal, commit_transaction);
	} else {
		if (journal->j_checkpoint_transactions == NULL) {
			journal->j_checkpoint_transactions = commit_transaction;
			commit_transaction->t_cpnext = commit_transaction;
			commit_transaction->t_cpprev = commit_transaction;
		} else {
			commit_transaction->t_cpnext =
				journal->j_checkpoint_transactions;
			commit_transaction->t_cpprev =
				commit_transaction->t_cpnext->t_cpprev;
			commit_transaction->t_cpnext->t_cpprev =
				commit_transaction;
			commit_transaction->t_cpprev->t_cpnext =
				commit_transaction;
		}
	}
	spin_unlock(&journal->j_list_lock);

	trace_jbd_end_commit(journal, commit_transaction);
	jbd_debug(1, "JBD: commit %d complete, head %d\n",
		  journal->j_commit_sequence, journal->j_tail_sequence);

	wake_up(&journal->j_wait_done_commit);
}
示例#13
0
文件: buffer.c 项目: jjhlzn/haribote
//检查指定(设备号和块号)的缓冲区是否已经在高速缓冲中。如果指定块已经在高速缓冲中,则返回
//对应缓冲区头指针退出;如果不在,就需要在高速缓冲中设置一个对应设备号和块号的新项。返回相应
//缓冲区头指针
struct buffer_head * getblk(int dev,int block)
{
	struct buffer_head * tmp, * bh;

repeat:
	if ( (bh = get_hash_table(dev,block)) )
		return bh;
	tmp = free_list;
	do {
		if (tmp->b_count)
			continue;
		if (!bh || BADNESS(tmp)<BADNESS(bh)) {
			bh = tmp;
			if (!BADNESS(tmp))
				break;
		}
/* and repeat until we find something good */
	} while ((tmp = tmp->b_next_free) != free_list);
	
	//如果循环检查所有缓冲块都正在被使用(所有缓冲块的头部引用计数都>0)中,则睡眠等待有空闲
	//缓冲块可用。当有空闲块可用时本进程会被明确地唤醒。然后我们就跳转到函数开始处重新查找空闲
	//缓冲块
	if (!bh) {
		sleep_on(&buffer_wait);
		goto repeat;
	}
	
	//如果跑到这里,说明已经找到一个空闲的缓冲区块。于是先等待该缓冲区解锁(如果已经被上锁的话)。
	wait_on_buffer(bh);
	if (bh->b_count)
		goto repeat;
	
	//如果该缓冲区已被修改,则将数据写盘,并再次等待缓冲区解锁。
	while (bh->b_dirt) {
		sync_dev(bh->b_dev);
		wait_on_buffer(bh);
		if (bh->b_count)
			goto repeat;
	}
/* NOTE!! While we slept waiting for this block, somebody else might */
/* already have added "this" block to the cache. check it */
	//当进程为了等待该缓冲块而睡眠时,其他进程可能已经将该缓冲块进入高速缓冲中,因此我们也要对此进行检查
	if (find_buffer(dev,block))
		goto repeat;
/* OK, FINALLY we know that this buffer is the only one of it's kind, */
/* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
	
	//到了这里,最终我们知道该缓冲块是指定参数的唯一一块,而且目前还没有被占用(b_count=0)
	//也没有被上锁(b_lock=0),而且是干净的(b_dirt=0)
	//于是我们占用此缓冲块。置引用计数为1,复位修改标志和有效标志
	bh->b_count=1;
	bh->b_dirt=0;
	bh->b_uptodate=0;
	//从hash队列和空闲块链表中移出该缓冲区头,让该缓冲区用于指定设备和其上的指定块。然后根据
	//此新的设备号和块号重新插入空闲链表和hash队列新位置处。并最终返回缓冲头指针。
	remove_from_queues(bh);
	bh->b_dev=dev;
	bh->b_blocknr=block;
	insert_into_queues(bh);
	return bh;
}
示例#14
0
文件: file.c 项目: rohsaini/mkunity
static int bext2_file_write (struct inode * inode, struct file * filp,
			    const char * buf, int count)
{
	const loff_t two_gb = 2147483647;
	loff_t pos;
	off_t pos2;
	long block;
	int offset;
	int written, c;
	struct buffer_head * bh, *bufferlist[NBUF];
	struct super_block * sb;
	int err;
	int i,buffercount,write_error;

	write_error = buffercount = 0;
	if (!inode) {
		printk("bext2_file_write: inode = NULL\n");
		return -EINVAL;
	}
	sb = inode->i_sb;
	if (sb->s_flags & MS_RDONLY)
		/*
		 * This fs has been automatically remounted ro because of errors
		 */
		return -ENOSPC;

	if (!S_ISREG(inode->i_mode)) {
		bext2_warning (sb, "bext2_file_write", "mode = %07o",
			      inode->i_mode);
		return -EINVAL;
	}
	if (filp->f_flags & O_APPEND)
		pos = inode->i_size;
	else
		pos = filp->f_pos;
	pos2 = (off_t) pos;
	/*
	 * If a file has been opened in synchronous mode, we have to ensure
	 * that meta-data will also be written synchronously.  Thus, we
	 * set the i_osync field.  This field is tested by the allocation
	 * routines.
	 */
	if (filp->f_flags & O_SYNC)
		inode->u.ext2_i.i_osync++;
	block = pos2 >> EXT2_BLOCK_SIZE_BITS(sb);
	offset = pos2 & (sb->s_blocksize - 1);
	c = sb->s_blocksize - offset;
	written = 0;
	while (count > 0) {
		if (pos > two_gb) {
			if (!written)
				written = -EFBIG;
			break;
		}
		bh = bext2_getblk (inode, block, 1, &err);
		if (!bh) {
			if (!written)
				written = err;
			break;
		}
		count -= c;
		if (count < 0)
			c += count;
		if (c != sb->s_blocksize && !buffer_uptodate(bh)) {
			ll_rw_block (READ, 1, &bh);
			wait_on_buffer (bh);
			if (!buffer_uptodate(bh)) {
				brelse (bh);
				if (!written)
					written = -EIO;
				break;
			}
		}
		memcpy_fromfs (bh->b_data + offset, buf, c);
		update_vm_cache(inode, pos, bh->b_data + offset, c);
		pos2 += c;
		pos += c;
		written += c;
		buf += c;
		mark_buffer_uptodate(bh, 1);
		mark_buffer_dirty(bh, 0);
		if (filp->f_flags & O_SYNC)
			bufferlist[buffercount++] = bh;
		else
			brelse(bh);
		if (buffercount == NBUF){
			ll_rw_block(WRITE, buffercount, bufferlist);
			for(i=0; i<buffercount; i++){
				wait_on_buffer(bufferlist[i]);
				if (!buffer_uptodate(bufferlist[i]))
					write_error=1;
				brelse(bufferlist[i]);
			}
			buffercount=0;
		}
		if(write_error)
			break;
		block++;
		offset = 0;
		c = sb->s_blocksize;
	}
	if ( buffercount ){
		ll_rw_block(WRITE, buffercount, bufferlist);
		for(i=0; i<buffercount; i++){
			wait_on_buffer(bufferlist[i]);
			if (!buffer_uptodate(bufferlist[i]))
				write_error=1;
			brelse(bufferlist[i]);
		}
	}		
	if (pos > inode->i_size)
		inode->i_size = pos;
	if (filp->f_flags & O_SYNC)
		inode->u.ext2_i.i_osync--;
	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
	filp->f_pos = pos;
	inode->i_dirt = 1;
	return written;
}
示例#15
0
/*
 * Read data of @inode from @block_start to @block_end and uncompress
 * to one zisofs block. Store the data in the @pages array with @pcount
 * entries. Start storing at offset @poffset of the first page.
 */
static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
				      loff_t block_end, int pcount,
				      struct page **pages, unsigned poffset,
				      int *errp)
{
	unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1];
	unsigned int bufsize = ISOFS_BUFFER_SIZE(inode);
	unsigned int bufshift = ISOFS_BUFFER_BITS(inode);
	unsigned int bufmask = bufsize - 1;
	int i, block_size = block_end - block_start;
	z_stream stream = { .total_out = 0,
			    .avail_in = 0,
			    .avail_out = 0, };
	int zerr;
	int needblocks = (block_size + (block_start & bufmask) + bufmask)
				>> bufshift;
	int haveblocks;
	blkcnt_t blocknum;
	struct buffer_head *bhs[needblocks + 1];
	int curbh, curpage;

	if (block_size > deflateBound(1UL << zisofs_block_shift)) {
		*errp = -EIO;
		return 0;
	}
	/* Empty block? */
	if (block_size == 0) {
		for ( i = 0 ; i < pcount ; i++ ) {
			if (!pages[i])
				continue;
			memset(page_address(pages[i]), 0, PAGE_SIZE);
			flush_dcache_page(pages[i]);
			SetPageUptodate(pages[i]);
		}
		return ((loff_t)pcount) << PAGE_SHIFT;
	}

	/* Because zlib is not thread-safe, do all the I/O at the top. */
	blocknum = block_start >> bufshift;
	memset(bhs, 0, (needblocks + 1) * sizeof(struct buffer_head *));
	haveblocks = isofs_get_blocks(inode, blocknum, bhs, needblocks);
	ll_rw_block(REQ_OP_READ, 0, haveblocks, bhs);

	curbh = 0;
	curpage = 0;
	/*
	 * First block is special since it may be fractional.  We also wait for
	 * it before grabbing the zlib mutex; odds are that the subsequent
	 * blocks are going to come in in short order so we don't hold the zlib
	 * mutex longer than necessary.
	 */

	if (!bhs[0])
		goto b_eio;

	wait_on_buffer(bhs[0]);
	if (!buffer_uptodate(bhs[0])) {
		*errp = -EIO;
		goto b_eio;
	}

	stream.workspace = zisofs_zlib_workspace;
	mutex_lock(&zisofs_zlib_lock);
		
	zerr = zlib_inflateInit(&stream);
	if (zerr != Z_OK) {
		if (zerr == Z_MEM_ERROR)
			*errp = -ENOMEM;
		else
			*errp = -EIO;
		printk(KERN_DEBUG "zisofs: zisofs_inflateInit returned %d\n",
			       zerr);
		goto z_eio;
	}

	while (curpage < pcount && curbh < haveblocks &&
	       zerr != Z_STREAM_END) {
		if (!stream.avail_out) {
			if (pages[curpage]) {
				stream.next_out = page_address(pages[curpage])
						+ poffset;
				stream.avail_out = PAGE_SIZE - poffset;
				poffset = 0;
			} else {
				stream.next_out = (void *)&zisofs_sink_page;
				stream.avail_out = PAGE_SIZE;
			}
		}
		if (!stream.avail_in) {
			wait_on_buffer(bhs[curbh]);
			if (!buffer_uptodate(bhs[curbh])) {
				*errp = -EIO;
				break;
			}
			stream.next_in  = bhs[curbh]->b_data +
						(block_start & bufmask);
			stream.avail_in = min_t(unsigned, bufsize -
						(block_start & bufmask),
						block_size);
			block_size -= stream.avail_in;
			block_start = 0;
		}

		while (stream.avail_out && stream.avail_in) {
			zerr = zlib_inflate(&stream, Z_SYNC_FLUSH);
			if (zerr == Z_BUF_ERROR && stream.avail_in == 0)
				break;
			if (zerr == Z_STREAM_END)
				break;
			if (zerr != Z_OK) {
				/* EOF, error, or trying to read beyond end of input */
				if (zerr == Z_MEM_ERROR)
					*errp = -ENOMEM;
				else {
					printk(KERN_DEBUG
					       "zisofs: zisofs_inflate returned"
					       " %d, inode = %lu,"
					       " page idx = %d, bh idx = %d,"
					       " avail_in = %ld,"
					       " avail_out = %ld\n",
					       zerr, inode->i_ino, curpage,
					       curbh, stream.avail_in,
					       stream.avail_out);
					*errp = -EIO;
				}
				goto inflate_out;
			}
		}

		if (!stream.avail_out) {
			/* This page completed */
			if (pages[curpage]) {
				flush_dcache_page(pages[curpage]);
				SetPageUptodate(pages[curpage]);
			}
			curpage++;
		}
		if (!stream.avail_in)
			curbh++;
	}
inflate_out:
	zlib_inflateEnd(&stream);

z_eio:
	mutex_unlock(&zisofs_zlib_lock);

b_eio:
	for (i = 0; i < haveblocks; i++)
		brelse(bhs[i]);
	return stream.total_out;
}
示例#16
0
/*
 * NOTE! When we get the inode, we're the only people
 * that have access to it, and as such there are no
 * race conditions we have to worry about. The inode
 * is not on the hash-lists, and it cannot be reached
 * through the filesystem because the directory entry
 * has been deleted earlier.
 *
 * HOWEVER: we must make sure that we get no aliases,
 * which means that we have to call "clear_inode()"
 * _before_ we mark the inode not in use in the inode
 * bitmaps. Otherwise a newly created file might use
 * the same inode number (not actually the same pointer
 * though), and then we'd have two inodes sharing the
 * same inode number and space on the harddisk.
 */
void ext2_free_inode (struct inode * inode)
{
	struct super_block * sb = inode->i_sb;
	int is_directory;
	unsigned long ino;
	struct buffer_head * bh;
	struct buffer_head * bh2;
	unsigned long block_group;
	unsigned long bit;
	struct ext2_group_desc * desc;
	struct ext2_super_block * es;

	ino = inode->i_ino;
	ext2_debug ("freeing inode %lu\n", ino);

	/*
	 * Note: we must free any quota before locking the superblock,
	 * as writing the quota to disk may need the lock as well.
	 */
	if (!is_bad_inode(inode)) {
		/* Quota is already initialized in iput() */
	    	DQUOT_FREE_INODE(inode);
		DQUOT_DROP(inode);
	}

	lock_super (sb);
	es = sb->u.ext2_sb.s_es;
	is_directory = S_ISDIR(inode->i_mode);

	/* Do this BEFORE marking the inode not in use or returning an error */
	clear_inode (inode);

	if (ino < EXT2_FIRST_INO(sb) ||
	    ino > le32_to_cpu(es->s_inodes_count)) {
		ext2_error (sb, "ext2_free_inode",
			    "reserved or nonexistent inode %lu", ino);
		goto error_return;
	}
	block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);
	bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb);
	bh = load_inode_bitmap (sb, block_group);
	if (IS_ERR(bh))
		goto error_return;

	/* Ok, now we can actually update the inode bitmaps.. */
	if (!ext2_clear_bit (bit, bh->b_data))
		ext2_error (sb, "ext2_free_inode",
			      "bit already cleared for inode %lu", ino);
	else {
		desc = ext2_get_group_desc (sb, block_group, &bh2);
		if (desc) {
			desc->bg_free_inodes_count =
				cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
			if (is_directory)
				desc->bg_used_dirs_count =
					cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
		}
		mark_buffer_dirty(bh2);
		es->s_free_inodes_count =
			cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1);
		mark_buffer_dirty(sb->u.ext2_sb.s_sbh);
	}
	mark_buffer_dirty(bh);
	if (sb->s_flags & MS_SYNCHRONOUS) {
		ll_rw_block (WRITE, 1, &bh);
		wait_on_buffer (bh);
	}
	sb->s_dirt = 1;
error_return:
	unlock_super (sb);
}
示例#17
0
/*
 * Returns a pointer to a buffer containing at least LEN bytes of
 * filesystem starting at byte offset OFFSET into the filesystem.
 */
static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned int len)
{
	struct buffer_head * bh_array[BLKS_PER_BUF];
	struct buffer_head * read_array[BLKS_PER_BUF];
	unsigned i, blocknr, buffer, unread;
	unsigned long devsize;
	int major, minor;

	char *data;

	if (!len)
		return NULL;
	blocknr = offset >> PAGE_CACHE_SHIFT;
	offset &= PAGE_CACHE_SIZE - 1;

	/* Check if an existing buffer already has the data.. */
	for (i = 0; i < READ_BUFFERS; i++) {
		unsigned int blk_offset;

		if (buffer_dev[i] != sb)
			continue;
		if (blocknr < buffer_blocknr[i])
			continue;
		blk_offset = (blocknr - buffer_blocknr[i]) << PAGE_CACHE_SHIFT;
		blk_offset += offset;
		if (blk_offset + len > BUFFER_SIZE)
			continue;
		return read_buffers[i] + blk_offset;
	}

	devsize = ~0UL;
	major = MAJOR(sb->s_dev);
	minor = MINOR(sb->s_dev);

	if (blk_size[major])
		devsize = blk_size[major][minor] >> 2;

	/* Ok, read in BLKS_PER_BUF pages completely first. */
	unread = 0;
	for (i = 0; i < BLKS_PER_BUF; i++) {
		struct buffer_head *bh;

		bh = NULL;
		if (blocknr + i < devsize) {
			bh = sb_getblk(sb, blocknr + i);
			if (!buffer_uptodate(bh))
				read_array[unread++] = bh;
		}
		bh_array[i] = bh;
	}

	if (unread) {
		ll_rw_block(READ, unread, read_array);
		do {
			unread--;
			wait_on_buffer(read_array[unread]);
		} while (unread);
	}

	/* Ok, copy them to the staging area without sleeping. */
	buffer = next_buffer;
	next_buffer = NEXT_BUFFER(buffer);
	buffer_blocknr[buffer] = blocknr;
	buffer_dev[buffer] = sb;

	data = read_buffers[buffer];
	for (i = 0; i < BLKS_PER_BUF; i++) {
		struct buffer_head * bh = bh_array[i];
		if (bh) {
			memcpy(data, bh->b_data, PAGE_CACHE_SIZE);
			brelse(bh);
		} else
			memset(data, 0, PAGE_CACHE_SIZE);
		data += PAGE_CACHE_SIZE;
	}
	return read_buffers[buffer] + offset;
}
示例#18
0
/*
 * Modify inode page cache in such way:
 * have - blocks with b_blocknr equal to oldb...oldb+count-1
 * get - blocks with b_blocknr equal to newb...newb+count-1
 * also we suppose that oldb...oldb+count-1 blocks
 * situated at the end of file.
 *
 * We can come here from ufs_writepage or ufs_prepare_write,
 * locked_page is argument of these functions, so we already lock it.
 */
static void ufs_change_blocknr(struct inode *inode, sector_t beg,
			       unsigned int count, sector_t oldb,
			       sector_t newb, struct page *locked_page)
{
	const unsigned blks_per_page =
		1 << (PAGE_CACHE_SHIFT - inode->i_blkbits);
	const unsigned mask = blks_per_page - 1;
	struct address_space * const mapping = inode->i_mapping;
	pgoff_t index, cur_index, last_index;
	unsigned pos, j, lblock;
	sector_t end, i;
	struct page *page;
	struct buffer_head *head, *bh;

	UFSD("ENTER, ino %lu, count %u, oldb %llu, newb %llu\n",
	      inode->i_ino, count,
	     (unsigned long long)oldb, (unsigned long long)newb);

	BUG_ON(!locked_page);
	BUG_ON(!PageLocked(locked_page));

	cur_index = locked_page->index;
	end = count + beg;
	last_index = end >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
	for (i = beg; i < end; i = (i | mask) + 1) {
		index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);

		if (likely(cur_index != index)) {
			page = ufs_get_locked_page(mapping, index);
			if (!page)/* it was truncated */
				continue;
			if (IS_ERR(page)) {/* or EIO */
				ufs_error(inode->i_sb, __func__,
					  "read of page %llu failed\n",
					  (unsigned long long)index);
				continue;
			}
		} else
			page = locked_page;

		head = page_buffers(page);
		bh = head;
		pos = i & mask;
		for (j = 0; j < pos; ++j)
			bh = bh->b_this_page;


		if (unlikely(index == last_index))
			lblock = end & mask;
		else
			lblock = blks_per_page;

		do {
			if (j >= lblock)
				break;
			pos = (i - beg) + j;

			if (!buffer_mapped(bh))
					map_bh(bh, inode->i_sb, oldb + pos);
			if (!buffer_uptodate(bh)) {
				ll_rw_block(READ, 1, &bh);
				wait_on_buffer(bh);
				if (!buffer_uptodate(bh)) {
					ufs_error(inode->i_sb, __func__,
						  "read of block failed\n");
					break;
				}
			}

			UFSD(" change from %llu to %llu, pos %u\n",
			     (unsigned long long)(pos + oldb),
			     (unsigned long long)(pos + newb), pos);

			bh->b_blocknr = newb + pos;
			unmap_underlying_metadata(bh->b_bdev,
						  bh->b_blocknr);
			mark_buffer_dirty(bh);
			++j;
			bh = bh->b_this_page;
		} while (bh != head);

		if (likely(cur_index != index))
			ufs_put_locked_page(page);
 	}
	UFSD("EXIT\n");
}
示例#19
0
文件: buffer.c 项目: foolsh/elks
void lock_buffer(register struct buffer_head *bh)
{
    wait_on_buffer(bh);
    bh->b_lock = 1;
    map_buffer(bh);
}
示例#20
0
int __microfs_read_blks(struct super_block* sb,
	struct address_space* mapping, void* data,
	microfs_read_blks_recycler recycler,
	microfs_read_blks_consumer consumer,
	__u32 offset, __u32 length)
{
	__u32 i;
	__u32 n;
	__u32 dev_blks;
	
	int err = 0;
	
	__u32 blk_nr;
	__u32 blk_offset;
	
	__u32 nbhs;
	struct buffer_head** bhs;
	
	if (recycler(sb, data, offset, length, consumer) == 0)
		goto out_cachehit;
	
	blk_offset = offset - (offset & PAGE_MASK);
	
	nbhs = i_blks(blk_offset + length, PAGE_SIZE);
	bhs = kmalloc(nbhs * sizeof(void*), GFP_KERNEL);
	if (!bhs) {
		pr_err("__microfs_read_blks: failed to allocate bhs (%u slots)\n", nbhs);
		err = -ENOMEM;
		goto err_mem;
	}
	
	blk_nr = offset >> PAGE_SHIFT;
	dev_blks = sb->s_bdev->bd_inode->i_size >> PAGE_SHIFT;
	
	pr_spam("__microfs_read_blks: offset=0x%x, blk_offset=%u, length=%u\n",
		offset, blk_offset, length);
	pr_spam("__microfs_read_blks: nbhs=%u, blk_nr=%u, dev_blks=%u\n",
		nbhs, blk_nr, dev_blks);
	
	for (i = 0, n = 0; i < nbhs; ++i) {
		if (likely(blk_nr + i < dev_blks)) {
			bhs[n++] = sb_getblk(sb, blk_nr + i);
			if (unlikely(bhs[n - 1] == NULL)) {
				pr_err("__microfs_read_blks: failed to get a bh for block %u\n",
					blk_nr + i);
				err = -EIO;
				goto err_bhs;
			} else {
				pr_spam("__microfs_read_blks: got bh 0x%p for block %u\n",
					bhs[n - 1], blk_nr + i);
			}
		} else {
			/* It is not possible to fill the entire read buffer this
			 * time. "Welcome to the end of the image."
			 */
			bhs[i] = NULL;
		}
	}
	
	ll_rw_block(REQ_OP_READ, 0, n, bhs);
	
	pr_spam("__microfs_read_blks: bhs submitted for reading\n");
	
	for (i = 0; i < n; ++i) {
		wait_on_buffer(bhs[i]);
		if (unlikely(!buffer_uptodate(bhs[i]))) {
			pr_err("__microfs_read_blks: bh 0x%p (#%u) is not up-to-date\n", bhs[i], i);
			err = -EIO;
			goto err_bhs;
		}
	}
	
	pr_spam("__microfs_read_blks: reading complete\n");
	
	err = consumer(sb, data, bhs, n, offset, length);
	
	pr_spam("__microfs_read_blks: processing complete\n");
	
err_bhs:
	for (i = 0; i < n; ++i) {
		pr_spam("__microfs_read_blks: releasing bh 0x%p\n", bhs[i]);
		put_bh(bhs[i]);
	}
	kfree(bhs);
err_mem:
out_cachehit:
	return err;
}
示例#21
0
static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
				  struct buffer_head **out_bh,
				  void (*init_block)(struct inode *,
						     struct buffer_head *,
						     void *))
{
	struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs;
	struct super_block *sb = inode->i_sb;
	struct nilfs_transaction_info ti;
	struct buffer_head *bh;
	int err;

	if (!sb) {
		/*
		 * Make sure this function is not called from any
		 * read-only context.
		 */
		if (!nilfs->ns_writer) {
			WARN_ON(1);
			err = -EROFS;
			goto out;
		}
		sb = nilfs->ns_writer->s_super;
	}

	nilfs_transaction_begin(sb, &ti, 0);

	err = -ENOMEM;
	bh = nilfs_grab_buffer(inode, inode->i_mapping, block, 0);
	if (unlikely(!bh))
		goto failed_unlock;

	err = -EEXIST;
	if (buffer_uptodate(bh) || buffer_mapped(bh))
		goto failed_bh;
#if 0
	/* The uptodate flag is not protected by the page lock, but
	   the mapped flag is.  Thus, we don't have to wait the buffer. */
	wait_on_buffer(bh);
	if (buffer_uptodate(bh))
		goto failed_bh;
#endif

	bh->b_bdev = nilfs->ns_bdev;
	err = nilfs_mdt_insert_new_block(inode, block, bh, init_block);
	if (likely(!err)) {
		get_bh(bh);
		*out_bh = bh;
	}

 failed_bh:
	unlock_page(bh->b_page);
	page_cache_release(bh->b_page);
	brelse(bh);

 failed_unlock:
	if (likely(!err))
		err = nilfs_transaction_commit(sb);
	else
		nilfs_transaction_abort(sb);
 out:
	return err;
}
示例#22
0
文件: file.c 项目: binsys/doc-linux
static int ext_file_read(struct inode * inode, struct file * filp, char * buf, int count)
{
	int read,left,chars;
	int block, blocks, offset;
	struct buffer_head ** bhb, ** bhe;
	struct buffer_head * buflist[NBUF];

	if (!inode) {
		printk("ext_file_read: inode = NULL\n");
		return -EINVAL;
	}
	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
		printk("ext_file_read: mode = %07o\n",inode->i_mode);
		return -EINVAL;
	}
	if (filp->f_pos > inode->i_size)
		left = 0;
	else
		left = inode->i_size - filp->f_pos;
	if (left > count)
		left = count;
	if (left <= 0)
		return 0;
	read = 0;
	block = filp->f_pos >> BLOCK_SIZE_BITS;
	offset = filp->f_pos & (BLOCK_SIZE-1);
	blocks = (left + offset + BLOCK_SIZE - 1) / BLOCK_SIZE;
	bhb = bhe = buflist;
	do {
		if (blocks) {
			--blocks;
			*bhb = ext_getblk(inode,block++,0);
			if (*bhb && !(*bhb)->b_uptodate)
				ll_rw_block(READ,*bhb);

			if (++bhb == &buflist[NBUF])
				bhb = buflist;

			if (bhb != bhe)
				continue;
		}
		if (*bhe) {
			wait_on_buffer(*bhe);
			if (!(*bhe)->b_uptodate) {
				do {
					brelse(*bhe);
					if (++bhe == &buflist[NBUF])
						bhe = buflist;
				} while (bhe != bhb);
				break;
			}
		}

		if (left < BLOCK_SIZE - offset)
			chars = left;
		else
			chars = BLOCK_SIZE - offset;
		filp->f_pos += chars;
		left -= chars;
		read += chars;
		if (*bhe) {
			memcpy_tofs(buf,offset+(*bhe)->b_data,chars);
			brelse(*bhe);
			buf += chars;
		} else {
			while (chars-->0)
				put_fs_byte(0,buf++);
		}
		offset = 0;
		if (++bhe == &buflist[NBUF])
			bhe = buflist;
	} while (left > 0);
	if (!read)
		return -EIO;
	if (!IS_RDONLY(inode)) {
		inode->i_atime = CURRENT_TIME;
		inode->i_dirt = 1;
	}
	return read;
}
示例#23
0
static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
	struct buffer_head **bh, int b, int offset, int length, int srclength,
	int pages)
{
	int zlib_err = 0, zlib_init = 0;
	int avail, bytes, k = 0, page = 0;
	z_stream *stream = msblk->stream;

	mutex_lock(&msblk->read_data_mutex);

	stream->avail_out = 0;
	stream->avail_in = 0;

	bytes = length;
	do {
		if (stream->avail_in == 0 && k < b) {
			avail = min(bytes, msblk->devblksize - offset);
			bytes -= avail;
			wait_on_buffer(bh[k]);
			if (!buffer_uptodate(bh[k]))
				goto release_mutex;

			if (avail == 0) {
				offset = 0;
				put_bh(bh[k++]);
				continue;
			}

			stream->next_in = bh[k]->b_data + offset;
			stream->avail_in = avail;
			offset = 0;
		}

		if (stream->avail_out == 0 && page < pages) {
			stream->next_out = buffer[page++];
			stream->avail_out = PAGE_CACHE_SIZE;
		}

		if (!zlib_init) {
			zlib_err = zlib_inflateInit(stream);
			if (zlib_err != Z_OK) {
				ERROR("zlib_inflateInit returned unexpected "
					"result 0x%x, srclength %d\n",
					zlib_err, srclength);
				goto release_mutex;
			}
			zlib_init = 1;
		}

		zlib_err = zlib_inflate(stream, Z_SYNC_FLUSH);

		if (stream->avail_in == 0 && k < b)
			put_bh(bh[k++]);
	} while (zlib_err == Z_OK);

	if (zlib_err != Z_STREAM_END) {
		ERROR("zlib_inflate error, data probably corrupt\n");
		goto release_mutex;
	}

	zlib_err = zlib_inflateEnd(stream);
	if (zlib_err != Z_OK) {
		ERROR("zlib_inflate error, data probably corrupt\n");
		goto release_mutex;
	}

	length = stream->total_out;
	mutex_unlock(&msblk->read_data_mutex);
	return length;

release_mutex:
	mutex_unlock(&msblk->read_data_mutex);

	for (; k < b; k++)
		put_bh(bh[k]);

	return -EIO;
}
示例#24
0
文件: file.c 项目: binsys/doc-linux
static int ext_file_write(struct inode * inode, struct file * filp, char * buf, int count)
{
	off_t pos;
	int written,c;
	struct buffer_head * bh;
	char * p;

	if (!inode) {
		printk("ext_file_write: inode = NULL\n");
		return -EINVAL;
	}
	if (!S_ISREG(inode->i_mode)) {
		printk("ext_file_write: mode = %07o\n",inode->i_mode);
		return -EINVAL;
	}
/*
 * ok, append may not work when many processes are writing at the same time
 * but so what. That way leads to madness anyway.
 */
	if (filp->f_flags & O_APPEND)
		pos = inode->i_size;
	else
		pos = filp->f_pos;
	written = 0;
	while (written<count) {
		bh = ext_getblk(inode,pos/BLOCK_SIZE,1);
		if (!bh) {
			if (!written)
				written = -ENOSPC;
			break;
		}
		c = BLOCK_SIZE - (pos % BLOCK_SIZE);
		if (c > count-written)
			c = count-written;
		if (c != BLOCK_SIZE && !bh->b_uptodate) {
			ll_rw_block(READ,bh);
			wait_on_buffer(bh);
			if (!bh->b_uptodate) {
				brelse(bh);
				if (!written)
					written = -EIO;
				break;
			}
		}
		p = (pos % BLOCK_SIZE) + bh->b_data;
		pos += c;
		if (pos > inode->i_size) {
			inode->i_size = pos;
			inode->i_dirt = 1;
		}
		written += c;
		memcpy_fromfs(p,buf,c);
		buf += c;
		bh->b_uptodate = 1;
		bh->b_dirt = 1;
		brelse(bh);
	}
	inode->i_mtime = CURRENT_TIME;
	inode->i_ctime = CURRENT_TIME;
	filp->f_pos = pos;
	inode->i_dirt = 1;
	return written;
}
示例#25
0
文件: buffer.c 项目: binsys/doc-linux
struct buffer_head * getblk(int dev, int block, int size)
{
	struct buffer_head * bh, * tmp;
	int buffers;

repeat:
	if (bh = get_hash_table(dev, block, size))
		return bh;

	if (nr_free_pages > 30)
		grow_buffers(size);

	buffers = nr_buffers;
	bh = NULL;

	for (tmp = free_list; buffers-- > 0 ; tmp = tmp->b_next_free) {
		if (tmp->b_count || tmp->b_size != size)
			continue;
		if (!bh || BADNESS(tmp)<BADNESS(bh)) {
			bh = tmp;
			if (!BADNESS(tmp))
				break;
		}
#if 0
		if (tmp->b_dirt)
			ll_rw_block(WRITEA,tmp);
#endif
	}

	if (!bh && nr_free_pages > 5) {
		grow_buffers(size);
		goto repeat;
	}
	
/* and repeat until we find something good */
	if (!bh) {
		sleep_on(&buffer_wait);
		goto repeat;
	}
	wait_on_buffer(bh);
	if (bh->b_count || bh->b_size != size)
		goto repeat;
	if (bh->b_dirt) {
		sync_buffers(bh->b_dev);
		goto repeat;
	}
/* NOTE!! While we slept waiting for this block, somebody else might */
/* already have added "this" block to the cache. check it */
	if (find_buffer(dev,block,size))
		goto repeat;
/* OK, FINALLY we know that this buffer is the only one of it's kind, */
/* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
	bh->b_count=1;
	bh->b_dirt=0;
	bh->b_uptodate=0;
	remove_from_queues(bh);
	bh->b_dev=dev;
	bh->b_blocknr=block;
	insert_into_queues(bh);
	return bh;
}
示例#26
0
文件: checkpoint.c 项目: 020gzh/linux
/*
 * Perform an actual checkpoint. We take the first transaction on the
 * list of transactions to be checkpointed and send all its buffers
 * to disk. We submit larger chunks of data at once.
 *
 * The journal should be locked before calling this function.
 * Called with j_checkpoint_mutex held.
 */
int jbd2_log_do_checkpoint(journal_t *journal)
{
	struct journal_head	*jh;
	struct buffer_head	*bh;
	transaction_t		*transaction;
	tid_t			this_tid;
	int			result, batch_count = 0;

	jbd_debug(1, "Start checkpoint\n");

	/*
	 * First thing: if there are any transactions in the log which
	 * don't need checkpointing, just eliminate them from the
	 * journal straight away.
	 */
	result = jbd2_cleanup_journal_tail(journal);
	trace_jbd2_checkpoint(journal, result);
	jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
	if (result <= 0)
		return result;

	/*
	 * OK, we need to start writing disk blocks.  Take one transaction
	 * and write it.
	 */
	result = 0;
	spin_lock(&journal->j_list_lock);
	if (!journal->j_checkpoint_transactions)
		goto out;
	transaction = journal->j_checkpoint_transactions;
	if (transaction->t_chp_stats.cs_chp_time == 0)
		transaction->t_chp_stats.cs_chp_time = jiffies;
	this_tid = transaction->t_tid;
restart:
	/*
	 * If someone cleaned up this transaction while we slept, we're
	 * done (maybe it's a new transaction, but it fell at the same
	 * address).
	 */
	if (journal->j_checkpoint_transactions != transaction ||
	    transaction->t_tid != this_tid)
		goto out;

	/* checkpoint all of the transaction's buffers */
	while (transaction->t_checkpoint_list) {
		jh = transaction->t_checkpoint_list;
		bh = jh2bh(jh);

		if (buffer_locked(bh)) {
			spin_unlock(&journal->j_list_lock);
			get_bh(bh);
			wait_on_buffer(bh);
			/* the journal_head may have gone by now */
			BUFFER_TRACE(bh, "brelse");
			__brelse(bh);
			goto retry;
		}
		if (jh->b_transaction != NULL) {
			transaction_t *t = jh->b_transaction;
			tid_t tid = t->t_tid;

			transaction->t_chp_stats.cs_forced_to_close++;
			spin_unlock(&journal->j_list_lock);
			if (unlikely(journal->j_flags & JBD2_UNMOUNT))
				/*
				 * The journal thread is dead; so
				 * starting and waiting for a commit
				 * to finish will cause us to wait for
				 * a _very_ long time.
				 */
				printk(KERN_ERR
		"JBD2: %s: Waiting for Godot: block %llu\n",
		journal->j_devname, (unsigned long long) bh->b_blocknr);

			jbd2_log_start_commit(journal, tid);
			jbd2_log_wait_commit(journal, tid);
			goto retry;
		}
		if (!buffer_dirty(bh)) {
			if (unlikely(buffer_write_io_error(bh)) && !result)
				result = -EIO;
			BUFFER_TRACE(bh, "remove from checkpoint");
			if (__jbd2_journal_remove_checkpoint(jh))
				/* The transaction was released; we're done */
				goto out;
			continue;
		}
		/*
		 * Important: we are about to write the buffer, and
		 * possibly block, while still holding the journal
		 * lock.  We cannot afford to let the transaction
		 * logic start messing around with this buffer before
		 * we write it to disk, as that would break
		 * recoverability.
		 */
		BUFFER_TRACE(bh, "queue");
		get_bh(bh);
		J_ASSERT_BH(bh, !buffer_jwrite(bh));
		journal->j_chkpt_bhs[batch_count++] = bh;
		__buffer_relink_io(jh);
		transaction->t_chp_stats.cs_written++;
		if ((batch_count == JBD2_NR_BATCH) ||
		    need_resched() ||
		    spin_needbreak(&journal->j_list_lock))
			goto unlock_and_flush;
	}

	if (batch_count) {
		unlock_and_flush:
			spin_unlock(&journal->j_list_lock);
		retry:
			if (batch_count)
				__flush_batch(journal, &batch_count);
			spin_lock(&journal->j_list_lock);
			goto restart;
	}

	/*
	 * Now we issued all of the transaction's buffers, let's deal
	 * with the buffers that are out for I/O.
	 */
restart2:
	/* Did somebody clean up the transaction in the meanwhile? */
	if (journal->j_checkpoint_transactions != transaction ||
	    transaction->t_tid != this_tid)
		goto out;

	while (transaction->t_checkpoint_io_list) {
		jh = transaction->t_checkpoint_io_list;
		bh = jh2bh(jh);
		if (buffer_locked(bh)) {
			spin_unlock(&journal->j_list_lock);
			get_bh(bh);
			wait_on_buffer(bh);
			/* the journal_head may have gone by now */
			BUFFER_TRACE(bh, "brelse");
			__brelse(bh);
			spin_lock(&journal->j_list_lock);
			goto restart2;
		}
		if (unlikely(buffer_write_io_error(bh)) && !result)
			result = -EIO;

		/*
		 * Now in whatever state the buffer currently is, we
		 * know that it has been written out and so we can
		 * drop it from the list
		 */
		if (__jbd2_journal_remove_checkpoint(jh))
			break;
	}
out:
	spin_unlock(&journal->j_list_lock);
	if (result < 0)
		jbd2_journal_abort(journal, result);
	else
		result = jbd2_cleanup_journal_tail(journal);

	return (result < 0) ? result : 0;
}
示例#27
0
static ssize_t minix_file_write(struct file * filp, const char * buf,
				size_t count, loff_t *ppos)
{
	struct inode * inode = filp->f_dentry->d_inode;
	off_t pos;
	ssize_t written, c;
	struct buffer_head * bh;
	char * p;

	if (!inode) {
		printk("minix_file_write: inode = NULL\n");
		return -EINVAL;
	}
	if (!S_ISREG(inode->i_mode)) {
		printk("minix_file_write: mode = %07o\n",inode->i_mode);
		return -EINVAL;
	}
	if (filp->f_flags & O_APPEND)
		pos = inode->i_size;
	else
		pos = *ppos;
	written = 0;
	while (written < count) {
		bh = minix_getblk(inode,pos/BLOCK_SIZE,1);
		if (!bh) {
			if (!written)
				written = -ENOSPC;
			break;
		}
		c = BLOCK_SIZE - (pos % BLOCK_SIZE);
		if (c > count-written)
			c = count-written;
		if (c != BLOCK_SIZE && !buffer_uptodate(bh)) {
			ll_rw_block(READ, 1, &bh);
			wait_on_buffer(bh);
			if (!buffer_uptodate(bh)) {
				brelse(bh);
				if (!written)
					written = -EIO;
				break;
			}
		}
		p = (pos % BLOCK_SIZE) + bh->b_data;
		c -= copy_from_user(p,buf,c);
		if (!c) {
			brelse(bh);
			if (!written)
				written = -EFAULT;
			break;
		}
		update_vm_cache(inode, pos, p, c);
		mark_buffer_uptodate(bh, 1);
		mark_buffer_dirty(bh, 0);
		brelse(bh);
		pos += c;
		written += c;
		buf += c;
	}
	if (pos > inode->i_size)
		inode->i_size = pos;
	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
	*ppos = pos;
	mark_inode_dirty(inode);
	return written;
}
示例#28
0
文件: block_dev.c 项目: Mellvik/elks
static int blk_rw(struct inode *inode, register struct file *filp,
		  char *buf, size_t count, int wr)
{
    register struct buffer_head *bh;
    size_t chars, offset;
    int written = 0;

    while (count > 0) {
    /*
     *      Offset to block/offset
     */
	offset = ((size_t)filp->f_pos) & (BLOCK_SIZE - 1);
	chars = BLOCK_SIZE - offset;
	if (chars > count)
	    chars = count;
	/*
	 *      Read the block in - use getblk on a write
	 *      of a whole block to avoid a read of the data.
	 */
	bh = getblk(inode->i_rdev, (block_t)(filp->f_pos >> BLOCK_SIZE_BITS));
	if ((wr == BLOCK_READ) || (chars != BLOCK_SIZE)) {
	    if (!readbuf(bh)) {
		if (!written) written = -EIO;
		break;
	    }
	}

	map_buffer(bh);
	if (wr == BLOCK_WRITE) {
	    /*
	     *      Alter buffer, mark dirty
	     */
	    memcpy_fromfs(bh->b_data + offset, buf, chars);
	    bh->b_uptodate = bh->b_dirty = 1;
	    /*
	     *      Writing: queue physical I/O
	     */
	    ll_rw_blk(WRITE, bh);
	    wait_on_buffer(bh);
	    if (!bh->b_uptodate) { /* Write error. */
		unmap_brelse(bh);
		if (!written) written = -EIO;
		break;
	    }
	} else {
	    /*
	     *      Empty buffer data. Buffer unchanged
	     */
	    memcpy_tofs(buf, bh->b_data + offset, chars);
	}
	/*
	 *      Move on and release buffer
	 */

	unmap_brelse(bh);

	buf += chars;
	filp->f_pos += chars;
	written += chars;
	count -= chars;
    }
    return written;
}
示例#29
0
struct inode * ext2_new_inode (const struct inode * dir, int mode)
{
	struct super_block * sb;
	struct buffer_head * bh;
	struct buffer_head * bh2;
	int group, i;
	ino_t ino;
	struct inode * inode;
	struct ext2_group_desc * desc;
	struct ext2_super_block * es;
	int err;

	sb = dir->i_sb;
	inode = new_inode(sb);
	if (!inode)
		return ERR_PTR(-ENOMEM);

	lock_super (sb);
	es = sb->u.ext2_sb.s_es;
repeat:
	if (S_ISDIR(mode))
		group = find_group_dir(sb, dir->u.ext2_i.i_block_group);
	else 
		group = find_group_other(sb, dir->u.ext2_i.i_block_group);

	err = -ENOSPC;
	if (group == -1)
		goto fail;

	err = -EIO;
	bh = load_inode_bitmap (sb, group);
	if (IS_ERR(bh))
		goto fail2;

	i = ext2_find_first_zero_bit ((unsigned long *) bh->b_data,
				      EXT2_INODES_PER_GROUP(sb));
	if (i >= EXT2_INODES_PER_GROUP(sb))
		goto bad_count;
	ext2_set_bit (i, bh->b_data);

	mark_buffer_dirty(bh);
	if (sb->s_flags & MS_SYNCHRONOUS) {
		ll_rw_block (WRITE, 1, &bh);
		wait_on_buffer (bh);
	}

	ino = group * EXT2_INODES_PER_GROUP(sb) + i + 1;
	if (ino < EXT2_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
		ext2_error (sb, "ext2_new_inode",
			    "reserved inode or inode > inodes count - "
			    "block_group = %d,inode=%ld", group, ino);
		err = -EIO;
		goto fail2;
	}

	es->s_free_inodes_count =
		cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1);
	mark_buffer_dirty(sb->u.ext2_sb.s_sbh);
	sb->s_dirt = 1;
	inode->i_uid = current->fsuid;
	if (test_opt (sb, GRPID))
		inode->i_gid = dir->i_gid;
	else if (dir->i_mode & S_ISGID) {
		inode->i_gid = dir->i_gid;
		if (S_ISDIR(mode))
			mode |= S_ISGID;
	} else
		inode->i_gid = current->fsgid;
	inode->i_mode = mode;

	inode->i_ino = ino;
	inode->i_blksize = PAGE_SIZE;	/* This is the optimal IO size (for stat), not the fs block size */
	inode->i_blocks = 0;
	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
	inode->u.ext2_i.i_new_inode = 1;
	inode->u.ext2_i.i_flags = dir->u.ext2_i.i_flags & ~EXT2_BTREE_FL;
	if (S_ISLNK(mode))
		inode->u.ext2_i.i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL);
	inode->u.ext2_i.i_block_group = group;
	if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL)
		inode->i_flags |= S_SYNC;
	insert_inode_hash(inode);
	inode->i_generation = event++;
	mark_inode_dirty(inode);

	unlock_super (sb);
	if(DQUOT_ALLOC_INODE(inode)) {
		DQUOT_DROP(inode);
		inode->i_flags |= S_NOQUOTA;
		inode->i_nlink = 0;
		iput(inode);
		return ERR_PTR(-EDQUOT);
	}
	ext2_debug ("allocating inode %lu\n", inode->i_ino);
	return inode;

fail2:
	desc = ext2_get_group_desc (sb, group, &bh2);
	desc->bg_free_inodes_count =
		cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
	if (S_ISDIR(mode))
		desc->bg_used_dirs_count =
			cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
	mark_buffer_dirty(bh2);
fail:
	unlock_super(sb);
	make_bad_inode(inode);
	iput(inode);
	return ERR_PTR(err);

bad_count:
	ext2_error (sb, "ext2_new_inode",
		    "Free inodes count corrupted in group %d",
		    group);
	/* Is it really ENOSPC? */
	err = -ENOSPC;
	if (sb->s_flags & MS_RDONLY)
		goto fail;

	desc = ext2_get_group_desc (sb, group, &bh2);
	desc->bg_free_inodes_count = 0;
	mark_buffer_dirty(bh2);
	goto repeat;
}
示例#30
0
//// 取高速缓冲中指定的缓冲区。
// 检查所指定的缓冲区是否已经在高速缓冲中,如果不在,就需要在高速缓冲中建立一个对应的新项。
// 返回相应缓冲区头指针。
struct buffer_head *
getblk( int dev, int block )
{
	struct buffer_head *tmp, *bh;

repeat:
// 搜索hash 表,如果指定块已经在高速缓冲中,则返回对应缓冲区头指针,退出。
	if( bh = get_hash_table( dev, block ) )
	{
		return bh;
	}
// 扫描空闲数据块链表,寻找空闲缓冲区。
// 首先让tmp 指向空闲链表的第一个空闲缓冲区头。
	tmp = free_list;
	do
	{
// 如果该缓冲区正被使用(引用计数不等于0),则继续扫描下一项。
		if( tmp->b_count )
		{
			continue;
		}
// 如果缓冲头指针bh 为空,或者tmp 所指缓冲头的标志(修改、锁定)权重小于bh 头标志的权重,
// 则让bh 指向该tmp 缓冲区头。如果该tmp 缓冲区头表明缓冲区既没有修改也没有锁定标志置位,
// 则说明已为指定设备上的块取得对应的高速缓冲区,则退出循环。
		if( !bh || BADNESS( tmp ) < BADNESS( bh ) )
		{
			bh = tmp;
			if( !BADNESS( tmp ) )
			{
				break;
			}
		}
/* and repeat until we find something good *//* 重复操作直到找到适合的缓冲区 */
	}
	while( ( tmp = tmp->b_next_free ) != free_list );
// 如果所有缓冲区都正被使用(所有缓冲区的头部引用计数都>0),则睡眠,等待有空闲的缓冲区可用。
	if( !bh )
	{
		sleep_on( &buffer_wait );
		goto repeat;
	}
// 等待该缓冲区解锁(如果已被上锁的话)。
	wait_on_buffer( bh );
// 如果该缓冲区又被其它任务使用的话,只好重复上述过程。
	if( bh->b_count )
	{
		goto repeat;
	}
// 如果该缓冲区已被修改,则将数据写盘,并再次等待缓冲区解锁。如果该缓冲区又被其它任务使用
// 的话,只好再重复上述过程。
	while( bh->b_dirt )
	{
		sync_dev( bh->b_dev );
		wait_on_buffer( bh );
		if( bh->b_count )
		{
			goto repeat;
		}
	}
/* NOTE!! While we slept waiting for this block, somebody else might */
/* already have added "this" block to the cache. check it */
/* 注意!!当进程为了等待该缓冲块而睡眠时,其它进程可能已经将该缓冲块 */
	** /
// 在高速缓冲hash 表中检查指定设备和块的缓冲区是否已经被加入进去。如果是的话,就再次重复
// 上述过程。
	if( find_buffer( dev, block ) )
	{
		goto repeat;
	}
/* OK, FINALLY we know that this buffer is the only one of it's kind, */
/* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
/* OK,最终我们知道该缓冲区是指定参数的唯一一块,*/
/* 而且还没有被使用(b_count=0),未被上锁(b_lock=0),并且是干净的(未被修改的)*/
// 于是让我们占用此缓冲区。置引用计数为1,复位修改标志和有效(更新)标志。
	bh->b_count	   = 1;
	bh->b_dirt	   = 0;
	bh->b_uptodate = 0;
// 从hash 队列和空闲块链表中移出该缓冲区头,让该缓冲区用于指定设备和其上的指定块。
	remove_from_queues( bh );
	bh->b_dev	   = dev;
	bh->b_blocknr  = block;
// 然后根据此新的设备号和块号重新插入空闲链表和hash 队列新位置处。并最终返回缓冲头指针。
	insert_into_queues( bh );
	return bh;
}