void journal_sync_buffer(struct buffer_head *bh) { transaction_t *transaction; journal_t *journal; long sequence; /* If the buffer isn't journaled, this is easy: just sync it to * disk. */ if (bh->b_transaction == NULL) { /* If the buffer has already been journaled, then this * is a noop. */ if (bh->b_cp_transaction == NULL) return; ll_rw_block (WRITE, 1, &bh); wait_on_buffer (bh); return; } /* Otherwise, just wait until the transaction is synced to disk. */ transaction = bh->b_transaction; journal = transaction->t_journal; sequence = transaction->t_tid; jfs_debug(2, "requesting commit for bh %p\n", bh); log_start_commit (journal, transaction); while (tid_gt(sequence, journal->j_commit_sequence)) { wake_up(&journal->j_wait_done_commit); sleep_on(&journal->j_wait_done_commit); } }
int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct ext3_inode_info *ei = EXT3_I(inode); journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; int ret, needs_barrier = 0; tid_t commit_tid; trace_ext3_sync_file_enter(file, datasync); if (inode->i_sb->s_flags & MS_RDONLY) return 0; ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret) goto out; J_ASSERT(ext3_journal_current_handle() == NULL); /* * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. * Metadata is in the journal, we wait for a proper transaction * to commit here. * * data=journal: * filemap_fdatawrite won't do anything (the buffers are clean). * ext3_force_commit will write the file data into the journal and * will wait on that. * filemap_fdatawait() will encounter a ton of newly-dirtied pages * (they were dirtied by commit). But that's OK - the blocks are * safe in-journal, which is all fsync() needs to ensure. */ if (ext3_should_journal_data(inode)) { ret = ext3_force_commit(inode->i_sb); goto out; } if (datasync) commit_tid = atomic_read(&ei->i_datasync_tid); else commit_tid = atomic_read(&ei->i_sync_tid); if (test_opt(inode->i_sb, BARRIER) && !journal_trans_will_send_data_barrier(journal, commit_tid)) needs_barrier = 1; log_start_commit(journal, commit_tid); ret = log_wait_commit(journal, commit_tid); /* * In case we didn't commit a transaction, we have to flush * disk caches manually so that data really is on persistent * storage */ if (needs_barrier) blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); out: trace_ext3_sync_file_exit(inode, ret); return ret; }
static int mlowerfs_ext3_commit_async(struct inode *inode, void *h, void **wait_handle) { unsigned long tid = 0; transaction_t *transaction = NULL; handle_t *handle = h; journal_t *journal = NULL; int ret = 0; MENTRY(); MASSERT(current->journal_info == handle); transaction = handle->h_transaction; journal = transaction->t_journal; tid = transaction->t_tid; /* we don't want to be blocked */ handle->h_sync = 0; ret = _mlowerfs_ext3_journal_stop(handle); if (ret) { MERROR("error while stopping transaction: %d\n", ret); goto out; } log_start_commit(journal, tid); *wait_handle = (void *) tid; out: MRETURN(ret); }
/* * Try to flush one buffer from the checkpoint list to disk. * * Return 1 if something happened which requires us to abort the current * scan of the checkpoint list. * * Called with j_list_lock held and drops it if 1 is returned * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ static int __process_buffer(journal_t *journal, struct journal_head *jh, struct buffer_head **bhs, int *batch_count) { struct buffer_head *bh = jh2bh(jh); int ret = 0; if (buffer_locked(bh)) { atomic_inc(&bh->b_count); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); ret = 1; } else if (jh->b_transaction != NULL) { transaction_t *t = jh->b_transaction; tid_t tid = t->t_tid; spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); log_start_commit(journal, tid); log_wait_commit(journal, tid); ret = 1; } else if (!buffer_dirty(bh)) { J_ASSERT_JH(jh, !buffer_jbddirty(bh)); BUFFER_TRACE(bh, "remove from checkpoint"); __journal_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); journal_remove_journal_head(bh); __brelse(bh); ret = 1; } else { /* * Important: we are about to write the buffer, and * possibly block, while still holding the journal lock. * We cannot afford to let the transaction logic start * messing around with this buffer before we write it to * disk, as that would break recoverability. */ BUFFER_TRACE(bh, "queue"); get_bh(bh); J_ASSERT_BH(bh, !buffer_jwrite(bh)); set_buffer_jwrite(bh); bhs[*batch_count] = bh; __buffer_relink_io(jh); jbd_unlock_bh_state(bh); (*batch_count)++; if (*batch_count == NR_BATCH) { spin_unlock(&journal->j_list_lock); __flush_batch(journal, bhs, batch_count); ret = 1; } } return ret; }
int journal_stop (handle_t *handle) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; int force_sync; if (!handle) return 0; J_ASSERT (transaction->t_updates > 0); J_ASSERT (current->j_handle == handle); if (--handle->h_ref > 0) return 0; jfs_debug(4, "Handle %p going down\n", handle); current->j_handle = NULL; // current->fs_locks--; transaction->t_outstanding_credits -= handle->h_buffer_credits; transaction->t_updates--; if (!transaction->t_updates) { wake_up(&journal->j_wait_updates); if (journal->j_barrier_count) wake_up(&journal->j_wait_transaction_locked); } /* * If the journal is marked SYNC, we need to set another commit * going! We also want to force a commit if the current * transaction is occupying too much of the log, or if the * transaction is too old now. */ force_sync = (journal->j_flags & JFS_SYNC) || handle->h_sync; if (force_sync || transaction->t_outstanding_credits > journal->j_max_transaction_buffers || time_after_eq(jiffies, transaction->t_expires)) { tid_t tid = transaction->t_tid; jfs_debug(2, "transaction too old, requesting commit for handle %p\n", handle); log_start_commit(journal, transaction); /* * Special case: JFS_SYNC synchronous updates require us * to wait for the commit to complete. */ if (force_sync) log_wait_commit(journal, tid); } kfree(handle); return 0; }
int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; struct ext3_inode_info *ei = EXT3_I(inode); journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; int ret = 0; tid_t commit_tid; if (inode->i_sb->s_flags & MS_RDONLY) return 0; J_ASSERT(ext3_journal_current_handle() == NULL); /* * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. * Metadata is in the journal, we wait for a proper transaction * to commit here. * * data=journal: * filemap_fdatawrite won't do anything (the buffers are clean). * ext3_force_commit will write the file data into the journal and * will wait on that. * filemap_fdatawait() will encounter a ton of newly-dirtied pages * (they were dirtied by commit). But that's OK - the blocks are * safe in-journal, which is all fsync() needs to ensure. */ if (ext3_should_journal_data(inode)) { ret = ext3_force_commit(inode->i_sb); goto out; } if (datasync) commit_tid = atomic_read(&ei->i_datasync_tid); else commit_tid = atomic_read(&ei->i_sync_tid); if (log_start_commit(journal, commit_tid)) { log_wait_commit(journal, commit_tid); goto out; } /* * In case we didn't commit a transaction, we have to flush * disk caches manually so that data really is on persistent * storage */ if (test_opt(inode->i_sb, BARRIER)) blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT); out: return ret; }
int journal_restart(handle_t *handle, int nblocks) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; /* First unlink the handle from its current transaction, and * start the commit on that. */ J_ASSERT (transaction->t_updates > 0); J_ASSERT (current->j_handle == handle); transaction->t_outstanding_credits -= handle->h_buffer_credits; transaction->t_updates--; if (!transaction->t_updates) wake_up(&journal->j_wait_updates); jfs_debug(2, "restarting handle %p\n", handle); log_start_commit(journal, transaction); handle->h_buffer_credits = nblocks; return start_this_handle(journal, handle); }
static int start_this_handle(journal_t *journal, handle_t *handle) { transaction_t *transaction; int needed; int nblocks = handle->h_buffer_credits; jfs_debug(4, "New handle %p going live.\n", handle); repeat: lock_journal(journal); if ((journal->j_flags & JFS_ABORT) || (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) { unlock_journal(journal); return -EROFS; } /* Wait on the journal's transaction barrier if necessary */ if (journal->j_barrier_count) { unlock_journal(journal); sleep_on(&journal->j_wait_transaction_locked); goto repeat; } repeat_locked: if (!journal->j_running_transaction) get_transaction(journal); /* @@@ Error? */ J_ASSERT(journal->j_running_transaction); transaction = journal->j_running_transaction; /* If the current transaction is locked down for commit, wait * for the lock to be released. */ if (transaction->t_state == T_LOCKED) { unlock_journal(journal); jfs_debug(3, "Handle %p stalling...\n", handle); sleep_on(&journal->j_wait_transaction_locked); goto repeat; } /* If there is not enough space left in the log to write all * potential buffers requested by this operation, we need to * stall pending a log checkpoint to free some more log * space. */ needed = transaction->t_outstanding_credits + nblocks; if (needed > journal->j_max_transaction_buffers) { /* If the current transaction is already too large, then * start to commit it: we can then go back and attach * this handle to a new transaction. */ jfs_debug(2, "Handle %p starting new commit...\n", handle); log_start_commit(journal, transaction); unlock_journal(journal); sleep_on(&journal->j_wait_transaction_locked); lock_journal(journal); goto repeat_locked; } /* * The commit code assumes that it can get enough log space * without forcing a checkpoint. This is *critical* for * correctness: a checkpoint of a buffer which is also * associated with a committing transaction creates a deadlock, * so commit simply cannot force through checkpoints. * * We must therefore ensure the necessary space in the journal * *before* starting to dirty potentially checkpointed buffers * in the new transaction. * * The worst part is, any transaction currently committing can * reduce the free space arbitrarily. Be careful to account for * those buffers when checkpointing. */ needed = journal->j_max_transaction_buffers; if (journal->j_committing_transaction) needed += journal->j_committing_transaction->t_outstanding_credits; if (log_space_left(journal) < needed) { jfs_debug(2, "Handle %p waiting for checkpoint...\n", handle); log_wait_for_space(journal, needed); goto repeat_locked; } /* OK, account for the buffers that this operation expects to * use and add the handle to the running transaction. */ handle->h_transaction = transaction; transaction->t_outstanding_credits += nblocks; transaction->t_updates++; jfs_debug(4, "Handle %p given %d credits (total %d, free %d)\n", handle, nblocks, transaction->t_outstanding_credits, log_space_left(journal)); unlock_journal(journal); return 0; }
/* * Called at inode eviction from icache */ void ext3_evict_inode (struct inode *inode) { struct ext3_inode_info *ei = EXT3_I(inode); struct ext3_block_alloc_info *rsv; handle_t *handle; int want_delete = 0; trace_ext3_evict_inode(inode); if (!inode->i_nlink && !is_bad_inode(inode)) { dquot_initialize(inode); want_delete = 1; } /* * When journalling data dirty buffers are tracked only in the journal. * So although mm thinks everything is clean and ready for reaping the * inode might still have some pages to write in the running * transaction or waiting to be checkpointed. Thus calling * journal_invalidatepage() (via truncate_inode_pages()) to discard * these buffers can cause data loss. Also even if we did not discard * these buffers, we would have no way to find them after the inode * is reaped and thus user could see stale data if he tries to read * them before the transaction is checkpointed. So be careful and * force everything to disk here... We use ei->i_datasync_tid to * store the newest transaction containing inode's data. * * Note that directories do not have this problem because they don't * use page cache. * * The s_journal check handles the case when ext3_get_journal() fails * and puts the journal inode. */ if (inode->i_nlink && ext3_should_journal_data(inode) && EXT3_SB(inode->i_sb)->s_journal && (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && inode->i_ino != EXT3_JOURNAL_INO) { tid_t commit_tid = atomic_read(&ei->i_datasync_tid); journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; log_start_commit(journal, commit_tid); log_wait_commit(journal, commit_tid); filemap_write_and_wait(&inode->i_data); } truncate_inode_pages(&inode->i_data, 0); ext3_discard_reservation(inode); rsv = ei->i_block_alloc_info; ei->i_block_alloc_info = NULL; if (unlikely(rsv)) kfree(rsv); if (!want_delete) goto no_delete; handle = start_transaction(inode); if (IS_ERR(handle)) { /* * If we're going to skip the normal cleanup, we still need to * make sure that the in-core orphan linked list is properly * cleaned up. */ ext3_orphan_del(NULL, inode); goto no_delete; } if (IS_SYNC(inode)) handle->h_sync = 1; inode->i_size = 0; if (inode->i_blocks) ext3_truncate(inode); /* * Kill off the orphan record created when the inode lost the last * link. Note that ext3_orphan_del() has to be able to cope with the * deletion of a non-existent orphan - ext3_truncate() could * have removed the record. */ ext3_orphan_del(handle, inode); ei->i_dtime = get_seconds(); /* * One subtle ordering requirement: if anything has gone wrong * (transaction abort, IO errors, whatever), then we can still * do these next steps (the fs will already have been marked as * having errors), but we can't free the inode if the mark_dirty * fails. */ if (ext3_mark_inode_dirty(handle, inode)) { /* If that failed, just dquot_drop() and be done with that */ dquot_drop(inode); clear_inode(inode); } else { ext3_xattr_delete_inode(handle, inode); dquot_free_inode(inode); dquot_drop(inode); clear_inode(inode); ext3_free_inode(handle, inode); } ext3_journal_stop(handle); return; no_delete: clear_inode(inode); dquot_drop(inode); }
/* * Clean up a transaction's checkpoint list. * * We wait for any pending IO to complete and make sure any clean * buffers are removed from the transaction. * * Return 1 if we performed any actions which might have destroyed the * checkpoint. (journal_remove_checkpoint() deletes the transaction when * the last checkpoint buffer is cleansed) * * Called with the journal locked. * Called with journal_datalist_lock held. */ static int __cleanup_transaction(journal_t *journal, transaction_t *transaction) { struct journal_head *jh, *next_jh, *last_jh; struct buffer_head *bh; int ret = 0; assert_spin_locked(&journal_datalist_lock); jh = transaction->t_checkpoint_list; if (!jh) return 0; last_jh = jh->b_cpprev; next_jh = jh; do { jh = next_jh; bh = jh2bh(jh); if (buffer_locked(bh)) { atomic_inc(&bh->b_count); spin_unlock(&journal_datalist_lock); unlock_journal(journal); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); goto out_return_1; } if (jh->b_transaction != NULL) { transaction_t *transaction = jh->b_transaction; tid_t tid = transaction->t_tid; spin_unlock(&journal_datalist_lock); log_start_commit(journal, transaction); unlock_journal(journal); log_wait_commit(journal, tid); goto out_return_1; } /* * We used to test for (jh->b_list != BUF_CLEAN) here. * But unmap_underlying_metadata() can place buffer onto * BUF_CLEAN. Since refile_buffer() no longer takes buffers * off checkpoint lists, we cope with it here */ /* * AKPM: I think the buffer_jdirty test is redundant - it * shouldn't have NULL b_transaction? */ next_jh = jh->b_cpnext; if (!buffer_dirty(bh) && !buffer_jdirty(bh)) { BUFFER_TRACE(bh, "remove from checkpoint"); __journal_remove_checkpoint(jh); __journal_remove_journal_head(bh); refile_buffer(bh); __brelse(bh); ret = 1; } jh = next_jh; } while (jh != last_jh); return ret; out_return_1: lock_journal(journal); spin_lock(&journal_datalist_lock); return 1; }
/* * Clean up a transaction's checkpoint list. * * We wait for any pending IO to complete and make sure any clean * buffers are removed from the transaction. * * Return 1 if we performed any actions which might have destroyed the * checkpoint. (journal_remove_checkpoint() deletes the transaction when * the last checkpoint buffer is cleansed) * * Called with j_list_lock held. */ static int __cleanup_transaction(journal_t *journal, transaction_t *transaction) { struct journal_head *jh, *next_jh, *last_jh; struct buffer_head *bh; int ret = 0; assert_spin_locked(&journal->j_list_lock); jh = transaction->t_checkpoint_list; if (!jh) return 0; last_jh = jh->b_cpprev; next_jh = jh; do { jh = next_jh; bh = jh2bh(jh); if (buffer_locked(bh)) { atomic_inc(&bh->b_count); spin_unlock(&journal->j_list_lock); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); goto out_return_1; } /* * This is foul */ if (!jbd_trylock_bh_state(bh)) { jbd_sync_bh(journal, bh); goto out_return_1; } if (jh->b_transaction != NULL) { transaction_t *t = jh->b_transaction; tid_t tid = t->t_tid; spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); log_start_commit(journal, tid); log_wait_commit(journal, tid); goto out_return_1; } /* * AKPM: I think the buffer_jbddirty test is redundant - it * shouldn't have NULL b_transaction? */ next_jh = jh->b_cpnext; if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) { BUFFER_TRACE(bh, "remove from checkpoint"); __journal_remove_checkpoint(jh); jbd_unlock_bh_state(bh); journal_remove_journal_head(bh); __brelse(bh); ret = 1; } else { jbd_unlock_bh_state(bh); } } while (jh != last_jh); return ret; out_return_1: spin_lock(&journal->j_list_lock); return 1; }