int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; tid_t tid; int need_to_start, ret; if (is_handle_aborted(handle)) return 0; J_ASSERT(atomic_read(&transaction->t_updates) > 0); J_ASSERT(journal_current_handle() == handle); read_lock(&journal->j_state_lock); spin_lock(&transaction->t_handle_lock); atomic_sub(handle->h_buffer_credits, &transaction->t_outstanding_credits); if (atomic_dec_and_test(&transaction->t_updates)) wake_up(&journal->j_wait_updates); spin_unlock(&transaction->t_handle_lock); jbd_debug(2, "restarting handle %p\n", handle); tid = transaction->t_tid; need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); if (need_to_start) jbd2_log_start_commit(journal, tid); lock_map_release(&handle->h_lockdep_map); handle->h_buffer_credits = nblocks; ret = start_this_handle(journal, handle, gfp_mask); return ret; }
int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; struct ext4_inode_info *ei = EXT4_I(inode); journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; int ret; tid_t commit_tid; J_ASSERT(ext4_journal_current_handle() == NULL); trace_ext4_sync_file(file, dentry, datasync); if (inode->i_sb->s_flags & MS_RDONLY) return 0; ret = flush_aio_dio_completed_IO(inode); if (ret < 0) return ret; if (!journal) return simple_fsync(file, dentry, datasync); /* * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. * Metadata is in the journal, we wait for proper transaction to * commit here. * * data=journal: * filemap_fdatawrite won't do anything (the buffers are clean). * ext4_force_commit will write the file data into the journal and * will wait on that. * filemap_fdatawait() will encounter a ton of newly-dirtied pages * (they were dirtied by commit). But that's OK - the blocks are * safe in-journal, which is all fsync() needs to ensure. */ if (ext4_should_journal_data(inode)) return ext4_force_commit(inode->i_sb); commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; if (jbd2_log_start_commit(journal, commit_tid)) { /* * When the journal is on a different device than the * fs data disk, we need to issue the barrier in * writeback mode. (In ordered mode, the jbd2 layer * will take care of issuing the barrier. In * data=journal, all of the data blocks are written to * the journal device.) */ if (ext4_should_writeback_data(inode) && (journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) blkdev_issue_flush(inode->i_sb->s_bdev, NULL); ret = jbd2_log_wait_commit(journal, commit_tid); } else if (journal->j_flags & JBD2_BARRIER) blkdev_issue_flush(inode->i_sb->s_bdev, NULL); return ret; }
int ext4_sync_file(struct file *file, int datasync) { struct inode *inode = file->f_mapping->host; struct ext4_inode_info *ei = EXT4_I(inode); journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; int ret; tid_t commit_tid; bool needs_barrier = false; J_ASSERT(ext4_journal_current_handle() == NULL); trace_ext4_sync_file(file, datasync); if (inode->i_sb->s_flags & MS_RDONLY) return 0; ret = flush_completed_IO(inode); if (ret < 0) return ret; if (!journal) { ret = generic_file_fsync(file, datasync); if (!ret && !list_empty(&inode->i_dentry)) ext4_sync_parent(inode); return ret; } /* * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. * Metadata is in the journal, we wait for proper transaction to * commit here. * * data=journal: * filemap_fdatawrite won't do anything (the buffers are clean). * ext4_force_commit will write the file data into the journal and * will wait on that. * filemap_fdatawait() will encounter a ton of newly-dirtied pages * (they were dirtied by commit). But that's OK - the blocks are * safe in-journal, which is all fsync() needs to ensure. */ if (ext4_should_journal_data(inode)) return ext4_force_commit(inode->i_sb); commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; if (journal->j_flags & JBD2_BARRIER && !jbd2_trans_will_send_data_barrier(journal, commit_tid)) needs_barrier = true; jbd2_log_start_commit(journal, commit_tid); ret = jbd2_log_wait_commit(journal, commit_tid); if (needs_barrier) blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT); return ret; }
static int __process_buffer(journal_t *journal, struct journal_head *jh, int *batch_count, transaction_t *transaction) { struct buffer_head *bh = jh2bh(jh); int ret = 0; if (buffer_locked(bh)) { get_bh(bh); spin_unlock(&journal->j_list_lock); wait_on_buffer(bh); BUFFER_TRACE(bh, "brelse"); __brelse(bh); ret = 1; } else if (jh->b_transaction != NULL) { transaction_t *t = jh->b_transaction; tid_t tid = t->t_tid; transaction->t_chp_stats.cs_forced_to_close++; spin_unlock(&journal->j_list_lock); if (unlikely(journal->j_flags & JBD2_UNMOUNT)) printk(KERN_ERR "JBD2: %s: " "Waiting for Godot: block %llu\n", journal->j_devname, (unsigned long long) bh->b_blocknr); jbd2_log_start_commit(journal, tid); jbd2_log_wait_commit(journal, tid); ret = 1; } else if (!buffer_dirty(bh)) { ret = 1; if (unlikely(buffer_write_io_error(bh))) ret = -EIO; get_bh(bh); BUFFER_TRACE(bh, "remove from checkpoint"); __jbd2_journal_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); __brelse(bh); } else { BUFFER_TRACE(bh, "queue"); get_bh(bh); J_ASSERT_BH(bh, !buffer_jwrite(bh)); journal->j_chkpt_bhs[*batch_count] = bh; __buffer_relink_io(jh); transaction->t_chp_stats.cs_written++; (*batch_count)++; if (*batch_count == JBD2_NR_BATCH) { spin_unlock(&journal->j_list_lock); __flush_batch(journal, batch_count); ret = 1; } } return ret; }
/* * Perform an actual checkpoint. We take the first transaction on the * list of transactions to be checkpointed and send all its buffers * to disk. We submit larger chunks of data at once. * * The journal should be locked before calling this function. * Called with j_checkpoint_mutex held. */ int jbd2_log_do_checkpoint(journal_t *journal) { struct journal_head *jh; struct buffer_head *bh; transaction_t *transaction; tid_t this_tid; int result, batch_count = 0; jbd_debug(1, "Start checkpoint\n"); /* * First thing: if there are any transactions in the log which * don't need checkpointing, just eliminate them from the * journal straight away. */ result = jbd2_cleanup_journal_tail(journal); trace_jbd2_checkpoint(journal, result); jbd_debug(1, "cleanup_journal_tail returned %d\n", result); if (result <= 0) return result; /* * OK, we need to start writing disk blocks. Take one transaction * and write it. */ result = 0; spin_lock(&journal->j_list_lock); if (!journal->j_checkpoint_transactions) goto out; transaction = journal->j_checkpoint_transactions; if (transaction->t_chp_stats.cs_chp_time == 0) transaction->t_chp_stats.cs_chp_time = jiffies; this_tid = transaction->t_tid; restart: /* * If someone cleaned up this transaction while we slept, we're * done (maybe it's a new transaction, but it fell at the same * address). */ if (journal->j_checkpoint_transactions != transaction || transaction->t_tid != this_tid) goto out; /* checkpoint all of the transaction's buffers */ while (transaction->t_checkpoint_list) { jh = transaction->t_checkpoint_list; bh = jh2bh(jh); if (buffer_locked(bh)) { spin_unlock(&journal->j_list_lock); get_bh(bh); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); goto retry; } if (jh->b_transaction != NULL) { transaction_t *t = jh->b_transaction; tid_t tid = t->t_tid; transaction->t_chp_stats.cs_forced_to_close++; spin_unlock(&journal->j_list_lock); if (unlikely(journal->j_flags & JBD2_UNMOUNT)) /* * The journal thread is dead; so * starting and waiting for a commit * to finish will cause us to wait for * a _very_ long time. */ printk(KERN_ERR "JBD2: %s: Waiting for Godot: block %llu\n", journal->j_devname, (unsigned long long) bh->b_blocknr); jbd2_log_start_commit(journal, tid); jbd2_log_wait_commit(journal, tid); goto retry; } if (!buffer_dirty(bh)) { if (unlikely(buffer_write_io_error(bh)) && !result) result = -EIO; BUFFER_TRACE(bh, "remove from checkpoint"); if (__jbd2_journal_remove_checkpoint(jh)) /* The transaction was released; we're done */ goto out; continue; } /* * Important: we are about to write the buffer, and * possibly block, while still holding the journal * lock. We cannot afford to let the transaction * logic start messing around with this buffer before * we write it to disk, as that would break * recoverability. */ BUFFER_TRACE(bh, "queue"); get_bh(bh); J_ASSERT_BH(bh, !buffer_jwrite(bh)); journal->j_chkpt_bhs[batch_count++] = bh; __buffer_relink_io(jh); transaction->t_chp_stats.cs_written++; if ((batch_count == JBD2_NR_BATCH) || need_resched() || spin_needbreak(&journal->j_list_lock)) goto unlock_and_flush; } if (batch_count) { unlock_and_flush: spin_unlock(&journal->j_list_lock); retry: if (batch_count) __flush_batch(journal, &batch_count); spin_lock(&journal->j_list_lock); goto restart; } /* * Now we issued all of the transaction's buffers, let's deal * with the buffers that are out for I/O. */ restart2: /* Did somebody clean up the transaction in the meanwhile? */ if (journal->j_checkpoint_transactions != transaction || transaction->t_tid != this_tid) goto out; while (transaction->t_checkpoint_io_list) { jh = transaction->t_checkpoint_io_list; bh = jh2bh(jh); if (buffer_locked(bh)) { spin_unlock(&journal->j_list_lock); get_bh(bh); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); spin_lock(&journal->j_list_lock); goto restart2; } if (unlikely(buffer_write_io_error(bh)) && !result) result = -EIO; /* * Now in whatever state the buffer currently is, we * know that it has been written out and so we can * drop it from the list */ if (__jbd2_journal_remove_checkpoint(jh)) break; } out: spin_unlock(&journal->j_list_lock); if (result < 0) jbd2_journal_abort(journal, result); else result = jbd2_cleanup_journal_tail(journal); return (result < 0) ? result : 0; }
int jbd2_journal_stop(handle_t *handle) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; int err, wait_for_commit = 0; tid_t tid; pid_t pid; J_ASSERT(journal_current_handle() == handle); if (is_handle_aborted(handle)) err = -EIO; else { J_ASSERT(atomic_read(&transaction->t_updates) > 0); err = 0; } if (--handle->h_ref > 0) { jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, handle->h_ref); return err; } jbd_debug(4, "Handle %p going down\n", handle); pid = current->pid; if (handle->h_sync && journal->j_last_sync_writer != pid) { u64 commit_time, trans_time; journal->j_last_sync_writer = pid; read_lock(&journal->j_state_lock); commit_time = journal->j_average_commit_time; read_unlock(&journal->j_state_lock); trans_time = ktime_to_ns(ktime_sub(ktime_get(), transaction->t_start_time)); commit_time = max_t(u64, commit_time, 1000*journal->j_min_batch_time); commit_time = min_t(u64, commit_time, 1000*journal->j_max_batch_time); if (trans_time < commit_time) { ktime_t expires = ktime_add_ns(ktime_get(), commit_time); set_current_state(TASK_UNINTERRUPTIBLE); schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); } } if (handle->h_sync) transaction->t_synchronous_commit = 1; current->journal_info = NULL; atomic_sub(handle->h_buffer_credits, &transaction->t_outstanding_credits); if (handle->h_sync || (atomic_read(&transaction->t_outstanding_credits) > journal->j_max_transaction_buffers) || time_after_eq(jiffies, transaction->t_expires)) { jbd_debug(2, "transaction too old, requesting commit for " "handle %p\n", handle); jbd2_log_start_commit(journal, transaction->t_tid); if (handle->h_sync && !(current->flags & PF_MEMALLOC)) wait_for_commit = 1; } tid = transaction->t_tid; if (atomic_dec_and_test(&transaction->t_updates)) { wake_up(&journal->j_wait_updates); if (journal->j_barrier_count) wake_up(&journal->j_wait_transaction_locked); } if (wait_for_commit) err = jbd2_log_wait_commit(journal, tid); lock_map_release(&handle->h_lockdep_map); jbd2_free_handle(handle); return err; }
static int start_this_handle(journal_t *journal, handle_t *handle, gfp_t gfp_mask) { transaction_t *transaction, *new_transaction = NULL; tid_t tid; int needed, need_to_start; int nblocks = handle->h_buffer_credits; unsigned long ts = jiffies; if (nblocks > journal->j_max_transaction_buffers) { printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n", current->comm, nblocks, journal->j_max_transaction_buffers); return -ENOSPC; } alloc_transaction: if (!journal->j_running_transaction) { new_transaction = kmem_cache_alloc(transaction_cache, gfp_mask | __GFP_ZERO); if (!new_transaction) { if ((gfp_mask & __GFP_FS) == 0) { congestion_wait(BLK_RW_ASYNC, HZ/50); goto alloc_transaction; } return -ENOMEM; } } jbd_debug(3, "New handle %p going live.\n", handle); repeat: read_lock(&journal->j_state_lock); BUG_ON(journal->j_flags & JBD2_UNMOUNT); if (is_journal_aborted(journal) || (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { read_unlock(&journal->j_state_lock); jbd2_journal_free_transaction(new_transaction); return -EROFS; } if (journal->j_barrier_count) { read_unlock(&journal->j_state_lock); wait_event(journal->j_wait_transaction_locked, journal->j_barrier_count == 0); goto repeat; } if (!journal->j_running_transaction) { read_unlock(&journal->j_state_lock); if (!new_transaction) goto alloc_transaction; write_lock(&journal->j_state_lock); if (journal->j_barrier_count) { printk(KERN_WARNING "JBD: %s: wait for transaction barrier\n", __func__); write_unlock(&journal->j_state_lock); goto repeat; } if (!journal->j_running_transaction) { jbd2_get_transaction(journal, new_transaction); new_transaction = NULL; } write_unlock(&journal->j_state_lock); goto repeat; } transaction = journal->j_running_transaction; if (transaction->t_state == T_LOCKED) { DEFINE_WAIT(wait); prepare_to_wait(&journal->j_wait_transaction_locked, &wait, TASK_UNINTERRUPTIBLE); read_unlock(&journal->j_state_lock); schedule(); finish_wait(&journal->j_wait_transaction_locked, &wait); goto repeat; } needed = atomic_add_return(nblocks, &transaction->t_outstanding_credits); if (needed > journal->j_max_transaction_buffers) { DEFINE_WAIT(wait); jbd_debug(2, "Handle %p starting new commit...\n", handle); atomic_sub(nblocks, &transaction->t_outstanding_credits); prepare_to_wait(&journal->j_wait_transaction_locked, &wait, TASK_UNINTERRUPTIBLE); tid = transaction->t_tid; need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); if (need_to_start) jbd2_log_start_commit(journal, tid); schedule(); finish_wait(&journal->j_wait_transaction_locked, &wait); goto repeat; } if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); atomic_sub(nblocks, &transaction->t_outstanding_credits); read_unlock(&journal->j_state_lock); write_lock(&journal->j_state_lock); if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) __jbd2_log_wait_for_space(journal); write_unlock(&journal->j_state_lock); goto repeat; } update_t_max_wait(transaction, ts); handle->h_transaction = transaction; atomic_inc(&transaction->t_updates); atomic_inc(&transaction->t_handle_count); jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", handle, nblocks, atomic_read(&transaction->t_outstanding_credits), __jbd2_log_space_left(journal)); read_unlock(&journal->j_state_lock); lock_map_acquire(&handle->h_lockdep_map); jbd2_journal_free_transaction(new_transaction); return 0; }
/* * Try to flush one buffer from the checkpoint list to disk. * * Return 1 if something happened which requires us to abort the current * scan of the checkpoint list. Return <0 if the buffer has failed to * be written out. * * Called with j_list_lock held and drops it if 1 is returned */ static int __process_buffer(journal_t *journal, struct journal_head *jh, int *batch_count, transaction_t *transaction) { struct buffer_head *bh = jh2bh(jh); int ret = 0; if (buffer_locked(bh)) { get_bh(bh); spin_unlock(&journal->j_list_lock); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); ret = 1; } else if (jh->b_transaction != NULL) { transaction_t *t = jh->b_transaction; tid_t tid = t->t_tid; transaction->t_chp_stats.cs_forced_to_close++; spin_unlock(&journal->j_list_lock); if (unlikely(journal->j_flags & JBD2_UNMOUNT)) /* * The journal thread is dead; so starting and * waiting for a commit to finish will cause * us to wait for a _very_ long time. */ printk(KERN_ERR "JBD2: %s: " "Waiting for Godot: block %llu\n", journal->j_devname, (unsigned long long) bh->b_blocknr); jbd2_log_start_commit(journal, tid); jbd2_log_wait_commit(journal, tid); ret = 1; } else if (!buffer_dirty(bh)) { ret = 1; if (unlikely(buffer_write_io_error(bh))) ret = -EIO; get_bh(bh); BUFFER_TRACE(bh, "remove from checkpoint"); __jbd2_journal_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); __brelse(bh); } else { /* * Important: we are about to write the buffer, and * possibly block, while still holding the journal lock. * We cannot afford to let the transaction logic start * messing around with this buffer before we write it to * disk, as that would break recoverability. */ BUFFER_TRACE(bh, "queue"); get_bh(bh); J_ASSERT_BH(bh, !buffer_jwrite(bh)); journal->j_chkpt_bhs[*batch_count] = bh; __buffer_relink_io(jh); transaction->t_chp_stats.cs_written++; (*batch_count)++; if (*batch_count == JBD2_NR_BATCH) { spin_unlock(&journal->j_list_lock); __flush_batch(journal, batch_count); ret = 1; } } return ret; }