/* * __jbd2_log_wait_for_space: wait until there is space in the journal. * * Called under j-state_lock *only*. It will be unlocked if we have to wait * for a checkpoint to free up some space in the log. */ void __jbd2_log_wait_for_space(journal_t *journal) { int nblocks, space_left; /* assert_spin_locked(&journal->j_state_lock); */ nblocks = jbd_space_needed(journal); while (__jbd2_log_space_left(journal) < nblocks) { if (journal->j_flags & JBD2_ABORT) return; write_unlock(&journal->j_state_lock); mutex_lock(&journal->j_checkpoint_mutex); /* * Test again, another process may have checkpointed while we * were waiting for the checkpoint lock. If there are no * transactions ready to be checkpointed, try to recover * journal space by calling cleanup_journal_tail(), and if * that doesn't work, by waiting for the currently committing * transaction to complete. If there is absolutely no way * to make progress, this is either a BUG or corrupted * filesystem, so abort the journal and leave a stack * trace for forensic evidence. */ write_lock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); nblocks = jbd_space_needed(journal); space_left = __jbd2_log_space_left(journal); if (space_left < nblocks) { int chkpt = journal->j_checkpoint_transactions != NULL; tid_t tid = 0; if (journal->j_committing_transaction) tid = journal->j_committing_transaction->t_tid; spin_unlock(&journal->j_list_lock); write_unlock(&journal->j_state_lock); if (chkpt) { jbd2_log_do_checkpoint(journal); } else if (jbd2_cleanup_journal_tail(journal) == 0) { /* We were able to recover space; yay! */ ; } else if (tid) { jbd2_log_wait_commit(journal, tid); } else { printk(KERN_ERR "%s: needed %d blocks and " "only had %d space available\n", __func__, nblocks, space_left); printk(KERN_ERR "%s: no way to get more " "journal space in %s\n", __func__, journal->j_devname); WARN_ON(1); jbd2_journal_abort(journal, 0); } write_lock(&journal->j_state_lock); } else { spin_unlock(&journal->j_list_lock); } mutex_unlock(&journal->j_checkpoint_mutex); } }
void __jbd2_log_wait_for_space(journal_t *journal) { int nblocks, space_left; /* */ nblocks = jbd_space_needed(journal); while (__jbd2_log_space_left(journal) < nblocks) { if (journal->j_flags & JBD2_ABORT) return; write_unlock(&journal->j_state_lock); mutex_lock(&journal->j_checkpoint_mutex); /* */ write_lock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); nblocks = jbd_space_needed(journal); space_left = __jbd2_log_space_left(journal); if (space_left < nblocks) { int chkpt = journal->j_checkpoint_transactions != NULL; tid_t tid = 0; if (journal->j_committing_transaction) tid = journal->j_committing_transaction->t_tid; spin_unlock(&journal->j_list_lock); write_unlock(&journal->j_state_lock); if (chkpt) { jbd2_log_do_checkpoint(journal); } else if (jbd2_cleanup_journal_tail(journal) == 0) { /* */ ; } else if (tid) { jbd2_log_wait_commit(journal, tid); } else { printk(KERN_ERR "%s: needed %d blocks and " "only had %d space available\n", __func__, nblocks, space_left); printk(KERN_ERR "%s: no way to get more " "journal space in %s\n", __func__, journal->j_devname); WARN_ON(1); jbd2_journal_abort(journal, 0); } write_lock(&journal->j_state_lock); } else { spin_unlock(&journal->j_list_lock); } mutex_unlock(&journal->j_checkpoint_mutex); } }
int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; struct ext4_inode_info *ei = EXT4_I(inode); journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; int ret; tid_t commit_tid; J_ASSERT(ext4_journal_current_handle() == NULL); trace_ext4_sync_file(file, dentry, datasync); if (inode->i_sb->s_flags & MS_RDONLY) return 0; ret = flush_aio_dio_completed_IO(inode); if (ret < 0) return ret; if (!journal) return simple_fsync(file, dentry, datasync); /* * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. * Metadata is in the journal, we wait for proper transaction to * commit here. * * data=journal: * filemap_fdatawrite won't do anything (the buffers are clean). * ext4_force_commit will write the file data into the journal and * will wait on that. * filemap_fdatawait() will encounter a ton of newly-dirtied pages * (they were dirtied by commit). But that's OK - the blocks are * safe in-journal, which is all fsync() needs to ensure. */ if (ext4_should_journal_data(inode)) return ext4_force_commit(inode->i_sb); commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; if (jbd2_log_start_commit(journal, commit_tid)) { /* * When the journal is on a different device than the * fs data disk, we need to issue the barrier in * writeback mode. (In ordered mode, the jbd2 layer * will take care of issuing the barrier. In * data=journal, all of the data blocks are written to * the journal device.) */ if (ext4_should_writeback_data(inode) && (journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) blkdev_issue_flush(inode->i_sb->s_bdev, NULL); ret = jbd2_log_wait_commit(journal, commit_tid); } else if (journal->j_flags & JBD2_BARRIER) blkdev_issue_flush(inode->i_sb->s_bdev, NULL); return ret; }
int ext4_sync_file(struct file *file, int datasync) { struct inode *inode = file->f_mapping->host; struct ext4_inode_info *ei = EXT4_I(inode); journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; int ret; tid_t commit_tid; bool needs_barrier = false; J_ASSERT(ext4_journal_current_handle() == NULL); trace_ext4_sync_file(file, datasync); if (inode->i_sb->s_flags & MS_RDONLY) return 0; ret = flush_completed_IO(inode); if (ret < 0) return ret; if (!journal) { ret = generic_file_fsync(file, datasync); if (!ret && !list_empty(&inode->i_dentry)) ext4_sync_parent(inode); return ret; } /* * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. * Metadata is in the journal, we wait for proper transaction to * commit here. * * data=journal: * filemap_fdatawrite won't do anything (the buffers are clean). * ext4_force_commit will write the file data into the journal and * will wait on that. * filemap_fdatawait() will encounter a ton of newly-dirtied pages * (they were dirtied by commit). But that's OK - the blocks are * safe in-journal, which is all fsync() needs to ensure. */ if (ext4_should_journal_data(inode)) return ext4_force_commit(inode->i_sb); commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; if (journal->j_flags & JBD2_BARRIER && !jbd2_trans_will_send_data_barrier(journal, commit_tid)) needs_barrier = true; jbd2_log_start_commit(journal, commit_tid); ret = jbd2_log_wait_commit(journal, commit_tid); if (needs_barrier) blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT); return ret; }
static int __process_buffer(journal_t *journal, struct journal_head *jh, int *batch_count, transaction_t *transaction) { struct buffer_head *bh = jh2bh(jh); int ret = 0; if (buffer_locked(bh)) { get_bh(bh); spin_unlock(&journal->j_list_lock); wait_on_buffer(bh); BUFFER_TRACE(bh, "brelse"); __brelse(bh); ret = 1; } else if (jh->b_transaction != NULL) { transaction_t *t = jh->b_transaction; tid_t tid = t->t_tid; transaction->t_chp_stats.cs_forced_to_close++; spin_unlock(&journal->j_list_lock); if (unlikely(journal->j_flags & JBD2_UNMOUNT)) printk(KERN_ERR "JBD2: %s: " "Waiting for Godot: block %llu\n", journal->j_devname, (unsigned long long) bh->b_blocknr); jbd2_log_start_commit(journal, tid); jbd2_log_wait_commit(journal, tid); ret = 1; } else if (!buffer_dirty(bh)) { ret = 1; if (unlikely(buffer_write_io_error(bh))) ret = -EIO; get_bh(bh); BUFFER_TRACE(bh, "remove from checkpoint"); __jbd2_journal_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); __brelse(bh); } else { BUFFER_TRACE(bh, "queue"); get_bh(bh); J_ASSERT_BH(bh, !buffer_jwrite(bh)); journal->j_chkpt_bhs[*batch_count] = bh; __buffer_relink_io(jh); transaction->t_chp_stats.cs_written++; (*batch_count)++; if (*batch_count == JBD2_NR_BATCH) { spin_unlock(&journal->j_list_lock); __flush_batch(journal, batch_count); ret = 1; } } return ret; }
/* * Perform an actual checkpoint. We take the first transaction on the * list of transactions to be checkpointed and send all its buffers * to disk. We submit larger chunks of data at once. * * The journal should be locked before calling this function. * Called with j_checkpoint_mutex held. */ int jbd2_log_do_checkpoint(journal_t *journal) { struct journal_head *jh; struct buffer_head *bh; transaction_t *transaction; tid_t this_tid; int result, batch_count = 0; jbd_debug(1, "Start checkpoint\n"); /* * First thing: if there are any transactions in the log which * don't need checkpointing, just eliminate them from the * journal straight away. */ result = jbd2_cleanup_journal_tail(journal); trace_jbd2_checkpoint(journal, result); jbd_debug(1, "cleanup_journal_tail returned %d\n", result); if (result <= 0) return result; /* * OK, we need to start writing disk blocks. Take one transaction * and write it. */ result = 0; spin_lock(&journal->j_list_lock); if (!journal->j_checkpoint_transactions) goto out; transaction = journal->j_checkpoint_transactions; if (transaction->t_chp_stats.cs_chp_time == 0) transaction->t_chp_stats.cs_chp_time = jiffies; this_tid = transaction->t_tid; restart: /* * If someone cleaned up this transaction while we slept, we're * done (maybe it's a new transaction, but it fell at the same * address). */ if (journal->j_checkpoint_transactions != transaction || transaction->t_tid != this_tid) goto out; /* checkpoint all of the transaction's buffers */ while (transaction->t_checkpoint_list) { jh = transaction->t_checkpoint_list; bh = jh2bh(jh); if (buffer_locked(bh)) { spin_unlock(&journal->j_list_lock); get_bh(bh); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); goto retry; } if (jh->b_transaction != NULL) { transaction_t *t = jh->b_transaction; tid_t tid = t->t_tid; transaction->t_chp_stats.cs_forced_to_close++; spin_unlock(&journal->j_list_lock); if (unlikely(journal->j_flags & JBD2_UNMOUNT)) /* * The journal thread is dead; so * starting and waiting for a commit * to finish will cause us to wait for * a _very_ long time. */ printk(KERN_ERR "JBD2: %s: Waiting for Godot: block %llu\n", journal->j_devname, (unsigned long long) bh->b_blocknr); jbd2_log_start_commit(journal, tid); jbd2_log_wait_commit(journal, tid); goto retry; } if (!buffer_dirty(bh)) { if (unlikely(buffer_write_io_error(bh)) && !result) result = -EIO; BUFFER_TRACE(bh, "remove from checkpoint"); if (__jbd2_journal_remove_checkpoint(jh)) /* The transaction was released; we're done */ goto out; continue; } /* * Important: we are about to write the buffer, and * possibly block, while still holding the journal * lock. We cannot afford to let the transaction * logic start messing around with this buffer before * we write it to disk, as that would break * recoverability. */ BUFFER_TRACE(bh, "queue"); get_bh(bh); J_ASSERT_BH(bh, !buffer_jwrite(bh)); journal->j_chkpt_bhs[batch_count++] = bh; __buffer_relink_io(jh); transaction->t_chp_stats.cs_written++; if ((batch_count == JBD2_NR_BATCH) || need_resched() || spin_needbreak(&journal->j_list_lock)) goto unlock_and_flush; } if (batch_count) { unlock_and_flush: spin_unlock(&journal->j_list_lock); retry: if (batch_count) __flush_batch(journal, &batch_count); spin_lock(&journal->j_list_lock); goto restart; } /* * Now we issued all of the transaction's buffers, let's deal * with the buffers that are out for I/O. */ restart2: /* Did somebody clean up the transaction in the meanwhile? */ if (journal->j_checkpoint_transactions != transaction || transaction->t_tid != this_tid) goto out; while (transaction->t_checkpoint_io_list) { jh = transaction->t_checkpoint_io_list; bh = jh2bh(jh); if (buffer_locked(bh)) { spin_unlock(&journal->j_list_lock); get_bh(bh); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); spin_lock(&journal->j_list_lock); goto restart2; } if (unlikely(buffer_write_io_error(bh)) && !result) result = -EIO; /* * Now in whatever state the buffer currently is, we * know that it has been written out and so we can * drop it from the list */ if (__jbd2_journal_remove_checkpoint(jh)) break; } out: spin_unlock(&journal->j_list_lock); if (result < 0) jbd2_journal_abort(journal, result); else result = jbd2_cleanup_journal_tail(journal); return (result < 0) ? result : 0; }
int jbd2_journal_stop(handle_t *handle) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; int err, wait_for_commit = 0; tid_t tid; pid_t pid; J_ASSERT(journal_current_handle() == handle); if (is_handle_aborted(handle)) err = -EIO; else { J_ASSERT(atomic_read(&transaction->t_updates) > 0); err = 0; } if (--handle->h_ref > 0) { jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, handle->h_ref); return err; } jbd_debug(4, "Handle %p going down\n", handle); pid = current->pid; if (handle->h_sync && journal->j_last_sync_writer != pid) { u64 commit_time, trans_time; journal->j_last_sync_writer = pid; read_lock(&journal->j_state_lock); commit_time = journal->j_average_commit_time; read_unlock(&journal->j_state_lock); trans_time = ktime_to_ns(ktime_sub(ktime_get(), transaction->t_start_time)); commit_time = max_t(u64, commit_time, 1000*journal->j_min_batch_time); commit_time = min_t(u64, commit_time, 1000*journal->j_max_batch_time); if (trans_time < commit_time) { ktime_t expires = ktime_add_ns(ktime_get(), commit_time); set_current_state(TASK_UNINTERRUPTIBLE); schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); } } if (handle->h_sync) transaction->t_synchronous_commit = 1; current->journal_info = NULL; atomic_sub(handle->h_buffer_credits, &transaction->t_outstanding_credits); if (handle->h_sync || (atomic_read(&transaction->t_outstanding_credits) > journal->j_max_transaction_buffers) || time_after_eq(jiffies, transaction->t_expires)) { jbd_debug(2, "transaction too old, requesting commit for " "handle %p\n", handle); jbd2_log_start_commit(journal, transaction->t_tid); if (handle->h_sync && !(current->flags & PF_MEMALLOC)) wait_for_commit = 1; } tid = transaction->t_tid; if (atomic_dec_and_test(&transaction->t_updates)) { wake_up(&journal->j_wait_updates); if (journal->j_barrier_count) wake_up(&journal->j_wait_transaction_locked); } if (wait_for_commit) err = jbd2_log_wait_commit(journal, tid); lock_map_release(&handle->h_lockdep_map); jbd2_free_handle(handle); return err; }
/* * Try to flush one buffer from the checkpoint list to disk. * * Return 1 if something happened which requires us to abort the current * scan of the checkpoint list. Return <0 if the buffer has failed to * be written out. * * Called with j_list_lock held and drops it if 1 is returned */ static int __process_buffer(journal_t *journal, struct journal_head *jh, int *batch_count, transaction_t *transaction) { struct buffer_head *bh = jh2bh(jh); int ret = 0; if (buffer_locked(bh)) { get_bh(bh); spin_unlock(&journal->j_list_lock); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); ret = 1; } else if (jh->b_transaction != NULL) { transaction_t *t = jh->b_transaction; tid_t tid = t->t_tid; transaction->t_chp_stats.cs_forced_to_close++; spin_unlock(&journal->j_list_lock); if (unlikely(journal->j_flags & JBD2_UNMOUNT)) /* * The journal thread is dead; so starting and * waiting for a commit to finish will cause * us to wait for a _very_ long time. */ printk(KERN_ERR "JBD2: %s: " "Waiting for Godot: block %llu\n", journal->j_devname, (unsigned long long) bh->b_blocknr); jbd2_log_start_commit(journal, tid); jbd2_log_wait_commit(journal, tid); ret = 1; } else if (!buffer_dirty(bh)) { ret = 1; if (unlikely(buffer_write_io_error(bh))) ret = -EIO; get_bh(bh); BUFFER_TRACE(bh, "remove from checkpoint"); __jbd2_journal_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); __brelse(bh); } else { /* * Important: we are about to write the buffer, and * possibly block, while still holding the journal lock. * We cannot afford to let the transaction logic start * messing around with this buffer before we write it to * disk, as that would break recoverability. */ BUFFER_TRACE(bh, "queue"); get_bh(bh); J_ASSERT_BH(bh, !buffer_jwrite(bh)); journal->j_chkpt_bhs[*batch_count] = bh; __buffer_relink_io(jh); transaction->t_chp_stats.cs_written++; (*batch_count)++; if (*batch_count == JBD2_NR_BATCH) { spin_unlock(&journal->j_list_lock); __flush_batch(journal, batch_count); ret = 1; } } return ret; }