void __jbd2_journal_refile_buffer(struct journal_head *jh) { int was_dirty, jlist; struct buffer_head *bh = jh2bh(jh); J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); if (jh->b_transaction) assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock); if (jh->b_next_transaction == NULL) { __jbd2_journal_unfile_buffer(jh); return; } was_dirty = test_clear_buffer_jbddirty(bh); __jbd2_journal_temp_unlink_buffer(jh); jh->b_transaction = jh->b_next_transaction; jh->b_next_transaction = NULL; if (buffer_freed(bh)) jlist = BJ_Forget; else if (jh->b_modified) jlist = BJ_Metadata; else jlist = BJ_Reserved; __jbd2_journal_file_buffer(jh, jh->b_transaction, jlist); J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); if (was_dirty) set_buffer_jbddirty(bh); }
void __journal_refile_buffer(struct journal_head *jh) { int was_dirty, jlist; struct buffer_head *bh = jh2bh(jh); J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); if (jh->b_transaction) assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock); /* If the buffer is now unused, just drop it. */ if (jh->b_next_transaction == NULL) { __journal_unfile_buffer(jh); return; } /* * It has been modified by a later transaction: add it to the new * transaction's metadata list. */ was_dirty = test_clear_buffer_jbddirty(bh); __journal_temp_unlink_buffer(jh); jh->b_transaction = jh->b_next_transaction; jh->b_next_transaction = NULL; if (buffer_freed(bh)) jlist = BJ_Forget; else if (jh->b_modified) jlist = BJ_Metadata; else jlist = BJ_Reserved; __journal_file_buffer(jh, jh->b_transaction, jlist); J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); if (was_dirty) set_buffer_jbddirty(bh); }
int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; struct journal_head *jh = bh2jh(bh); jbd_debug(5, "journal_head %p\n", jh); JBUFFER_TRACE(jh, "entry"); if (is_handle_aborted(handle)) goto out; jbd_lock_bh_state(bh); if (jh->b_modified == 0) { /* * This buffer's got modified and becoming part * of the transaction. This needs to be done * once a transaction -bzzz */ jh->b_modified = 1; J_ASSERT_JH(jh, handle->h_buffer_credits > 0); handle->h_buffer_credits--; } /* * fastpath, to avoid expensive locking. If this buffer is already * on the running transaction's metadata list there is nothing to do. * Nobody can take it off again because there is a handle open. * I _think_ we're OK here with SMP barriers - a mistaken decision will * result in this test being false, so we go in and take the locks. */ if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) { JBUFFER_TRACE(jh, "fastpath"); J_ASSERT_JH(jh, jh->b_transaction == journal->j_running_transaction); goto out_unlock_bh; } set_buffer_jbddirty(bh); /* * Metadata already on the current transaction list doesn't * need to be filed. Metadata on another transaction's list must * be committing, and will be refiled once the commit completes: * leave it alone for now. */ if (jh->b_transaction != transaction) { JBUFFER_TRACE(jh, "already on other transaction"); J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction); J_ASSERT_JH(jh, jh->b_next_transaction == transaction); /* And this case is illegal: we can't reuse another * transaction's data buffer, ever. */ goto out_unlock_bh; } /* That test should have eliminated the following case: */ J_ASSERT_JH(jh, jh->b_frozen_data == NULL); JBUFFER_TRACE(jh, "file as BJ_Metadata"); spin_lock(&journal->j_list_lock); __journal_file_buffer(jh, handle->h_transaction, BJ_Metadata); spin_unlock(&journal->j_list_lock); out_unlock_bh: jbd_unlock_bh_state(bh); out: JBUFFER_TRACE(jh, "exit"); return 0; }
static int do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy) { struct buffer_head *bh; transaction_t *transaction; journal_t *journal; int error; char *frozen_buffer = NULL; int need_copy = 0; if (is_handle_aborted(handle)) return -EROFS; transaction = handle->h_transaction; journal = transaction->t_journal; jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy); JBUFFER_TRACE(jh, "entry"); repeat: bh = jh2bh(jh); /* @@@ Need to check for errors here at some point. */ lock_buffer(bh); jbd_lock_bh_state(bh); /* We now hold the buffer lock so it is safe to query the buffer * state. Is the buffer dirty? * * If so, there are two possibilities. The buffer may be * non-journaled, and undergoing a quite legitimate writeback. * Otherwise, it is journaled, and we don't expect dirty buffers * in that state (the buffers should be marked JBD_Dirty * instead.) So either the IO is being done under our own * control and this is a bug, or it's a third party IO such as * dump(8) (which may leave the buffer scheduled for read --- * ie. locked but not dirty) or tune2fs (which may actually have * the buffer dirtied, ugh.) */ if (buffer_dirty(bh)) { /* * First question: is this buffer already part of the current * transaction or the existing committing transaction? */ if (jh->b_transaction) { J_ASSERT_JH(jh, jh->b_transaction == transaction || jh->b_transaction == journal->j_committing_transaction); if (jh->b_next_transaction) J_ASSERT_JH(jh, jh->b_next_transaction == transaction); warn_dirty_buffer(bh); } /* * In any case we need to clean the dirty flag and we must * do it under the buffer lock to be sure we don't race * with running write-out. */ JBUFFER_TRACE(jh, "Journalling dirty buffer"); clear_buffer_dirty(bh); set_buffer_jbddirty(bh); } unlock_buffer(bh); error = -EROFS; if (is_handle_aborted(handle)) { jbd_unlock_bh_state(bh); goto out; } error = 0; /* * The buffer is already part of this transaction if b_transaction or * b_next_transaction points to it */ if (jh->b_transaction == transaction || jh->b_next_transaction == transaction) goto done; /* * this is the first time this transaction is touching this buffer, * reset the modified flag */ jh->b_modified = 0; /* * If there is already a copy-out version of this buffer, then we don't * need to make another one */ if (jh->b_frozen_data) { JBUFFER_TRACE(jh, "has frozen data"); J_ASSERT_JH(jh, jh->b_next_transaction == NULL); jh->b_next_transaction = transaction; goto done; } /* Is there data here we need to preserve? */ if (jh->b_transaction && jh->b_transaction != transaction) { JBUFFER_TRACE(jh, "owned by older transaction"); J_ASSERT_JH(jh, jh->b_next_transaction == NULL); J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction); /* There is one case we have to be very careful about. * If the committing transaction is currently writing * this buffer out to disk and has NOT made a copy-out, * then we cannot modify the buffer contents at all * right now. The essence of copy-out is that it is the * extra copy, not the primary copy, which gets * journaled. If the primary copy is already going to * disk then we cannot do copy-out here. */ if (jh->b_jlist == BJ_Shadow) { DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow); wait_queue_head_t *wqh; wqh = bit_waitqueue(&bh->b_state, BH_Unshadow); JBUFFER_TRACE(jh, "on shadow: sleep"); jbd_unlock_bh_state(bh); /* commit wakes up all shadow buffers after IO */ for ( ; ; ) { prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE); if (jh->b_jlist != BJ_Shadow) break; schedule(); } finish_wait(wqh, &wait.wait); goto repeat; } /* Only do the copy if the currently-owning transaction * still needs it. If it is on the Forget list, the * committing transaction is past that stage. The * buffer had better remain locked during the kmalloc, * but that should be true --- we hold the journal lock * still and the buffer is already on the BUF_JOURNAL * list so won't be flushed. * * Subtle point, though: if this is a get_undo_access, * then we will be relying on the frozen_data to contain * the new value of the committed_data record after the * transaction, so we HAVE to force the frozen_data copy * in that case. */ if (jh->b_jlist != BJ_Forget || force_copy) { JBUFFER_TRACE(jh, "generate frozen data"); if (!frozen_buffer) { JBUFFER_TRACE(jh, "allocate memory for buffer"); jbd_unlock_bh_state(bh); frozen_buffer = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!frozen_buffer) { printk(KERN_EMERG "%s: OOM for frozen_buffer\n", __func__); JBUFFER_TRACE(jh, "oom!"); error = -ENOMEM; jbd_lock_bh_state(bh); goto done; } goto repeat; } jh->b_frozen_data = frozen_buffer; frozen_buffer = NULL; need_copy = 1; } jh->b_next_transaction = transaction; } /* * Finally, if the buffer is not journaled right now, we need to make * sure it doesn't get written to disk before the caller actually * commits the new data */ if (!jh->b_transaction) { JBUFFER_TRACE(jh, "no transaction"); J_ASSERT_JH(jh, !jh->b_next_transaction); jh->b_transaction = transaction; JBUFFER_TRACE(jh, "file as BJ_Reserved"); spin_lock(&journal->j_list_lock); __journal_file_buffer(jh, transaction, BJ_Reserved); spin_unlock(&journal->j_list_lock); } done: if (need_copy) { struct page *page; int offset; char *source; J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)), "Possible IO failure.\n"); page = jh2bh(jh)->b_page; offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; source = kmap_atomic(page, KM_USER0); memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); kunmap_atomic(source, KM_USER0); } jbd_unlock_bh_state(bh); /* * If we are about to journal a buffer, then any revoke pending on it is * no longer valid */ journal_cancel_revoke(handle, jh); out: if (unlikely(frozen_buffer)) /* It's usually NULL */ jbd_free(frozen_buffer, bh->b_size); JBUFFER_TRACE(jh, "exit"); return error; }
void __journal_file_buffer(struct journal_head *jh, transaction_t *transaction, int jlist) { struct journal_head **list = NULL; int was_dirty = 0; struct buffer_head *bh = jh2bh(jh); J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); assert_spin_locked(&transaction->t_journal->j_list_lock); J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); J_ASSERT_JH(jh, jh->b_transaction == transaction || jh->b_transaction == NULL); if (jh->b_transaction && jh->b_jlist == jlist) return; if (jlist == BJ_Metadata || jlist == BJ_Reserved || jlist == BJ_Shadow || jlist == BJ_Forget) { /* * For metadata buffers, we track dirty bit in buffer_jbddirty * instead of buffer_dirty. We should not see a dirty bit set * here because we clear it in do_get_write_access but e.g. * tune2fs can modify the sb and set the dirty bit at any time * so we try to gracefully handle that. */ if (buffer_dirty(bh)) warn_dirty_buffer(bh); if (test_clear_buffer_dirty(bh) || test_clear_buffer_jbddirty(bh)) was_dirty = 1; } if (jh->b_transaction) __journal_temp_unlink_buffer(jh); jh->b_transaction = transaction; switch (jlist) { case BJ_None: J_ASSERT_JH(jh, !jh->b_committed_data); J_ASSERT_JH(jh, !jh->b_frozen_data); return; case BJ_SyncData: list = &transaction->t_sync_datalist; break; case BJ_Metadata: transaction->t_nr_buffers++; list = &transaction->t_buffers; break; case BJ_Forget: list = &transaction->t_forget; break; case BJ_IO: list = &transaction->t_iobuf_list; break; case BJ_Shadow: list = &transaction->t_shadow_list; break; case BJ_LogCtl: list = &transaction->t_log_list; break; case BJ_Reserved: list = &transaction->t_reserved_list; break; case BJ_Locked: list = &transaction->t_locked_list; break; } __blist_add_buffer(list, jh); jh->b_jlist = jlist; if (was_dirty) set_buffer_jbddirty(bh); }
int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; struct journal_head *jh = bh2jh(bh); int ret = 0; jbd_debug(5, "journal_head %p\n", jh); JBUFFER_TRACE(jh, "entry"); if (is_handle_aborted(handle)) goto out; if (!buffer_jbd(bh)) { ret = -EUCLEAN; goto out; } jbd_lock_bh_state(bh); if (jh->b_modified == 0) { jh->b_modified = 1; J_ASSERT_JH(jh, handle->h_buffer_credits > 0); handle->h_buffer_credits--; } if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) { JBUFFER_TRACE(jh, "fastpath"); if (unlikely(jh->b_transaction != journal->j_running_transaction)) { printk(KERN_EMERG "JBD: %s: " "jh->b_transaction (%llu, %p, %u) != " "journal->j_running_transaction (%p, %u)", journal->j_devname, (unsigned long long) bh->b_blocknr, jh->b_transaction, jh->b_transaction ? jh->b_transaction->t_tid : 0, journal->j_running_transaction, journal->j_running_transaction ? journal->j_running_transaction->t_tid : 0); ret = -EINVAL; } goto out_unlock_bh; } set_buffer_jbddirty(bh); if (jh->b_transaction != transaction) { JBUFFER_TRACE(jh, "already on other transaction"); if (unlikely(jh->b_transaction != journal->j_committing_transaction)) { printk(KERN_EMERG "JBD: %s: " "jh->b_transaction (%llu, %p, %u) != " "journal->j_committing_transaction (%p, %u)", journal->j_devname, (unsigned long long) bh->b_blocknr, jh->b_transaction, jh->b_transaction ? jh->b_transaction->t_tid : 0, journal->j_committing_transaction, journal->j_committing_transaction ? journal->j_committing_transaction->t_tid : 0); ret = -EINVAL; } if (unlikely(jh->b_next_transaction != transaction)) { printk(KERN_EMERG "JBD: %s: " "jh->b_next_transaction (%llu, %p, %u) != " "transaction (%p, %u)", journal->j_devname, (unsigned long long) bh->b_blocknr, jh->b_next_transaction, jh->b_next_transaction ? jh->b_next_transaction->t_tid : 0, transaction, transaction->t_tid); ret = -EINVAL; } goto out_unlock_bh; } J_ASSERT_JH(jh, jh->b_frozen_data == NULL); JBUFFER_TRACE(jh, "file as BJ_Metadata"); spin_lock(&journal->j_list_lock); __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_Metadata); spin_unlock(&journal->j_list_lock); out_unlock_bh: jbd_unlock_bh_state(bh); out: JBUFFER_TRACE(jh, "exit"); WARN_ON(ret); return ret; }
static int do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy) { struct buffer_head *bh; transaction_t *transaction; journal_t *journal; int error; char *frozen_buffer = NULL; int need_copy = 0; if (is_handle_aborted(handle)) return -EROFS; transaction = handle->h_transaction; journal = transaction->t_journal; jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy); JBUFFER_TRACE(jh, "entry"); repeat: bh = jh2bh(jh); lock_buffer(bh); jbd_lock_bh_state(bh); if (buffer_dirty(bh)) { if (jh->b_transaction) { J_ASSERT_JH(jh, jh->b_transaction == transaction || jh->b_transaction == journal->j_committing_transaction); if (jh->b_next_transaction) J_ASSERT_JH(jh, jh->b_next_transaction == transaction); warn_dirty_buffer(bh); } JBUFFER_TRACE(jh, "Journalling dirty buffer"); clear_buffer_dirty(bh); set_buffer_jbddirty(bh); } unlock_buffer(bh); error = -EROFS; if (is_handle_aborted(handle)) { jbd_unlock_bh_state(bh); goto out; } error = 0; if (jh->b_transaction == transaction || jh->b_next_transaction == transaction) goto done; jh->b_modified = 0; if (jh->b_frozen_data) { JBUFFER_TRACE(jh, "has frozen data"); J_ASSERT_JH(jh, jh->b_next_transaction == NULL); jh->b_next_transaction = transaction; goto done; } if (jh->b_transaction && jh->b_transaction != transaction) { JBUFFER_TRACE(jh, "owned by older transaction"); J_ASSERT_JH(jh, jh->b_next_transaction == NULL); J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction); if (jh->b_jlist == BJ_Shadow) { DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow); wait_queue_head_t *wqh; wqh = bit_waitqueue(&bh->b_state, BH_Unshadow); JBUFFER_TRACE(jh, "on shadow: sleep"); jbd_unlock_bh_state(bh); for ( ; ; ) { prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE); if (jh->b_jlist != BJ_Shadow) break; schedule(); } finish_wait(wqh, &wait.wait); goto repeat; } if (jh->b_jlist != BJ_Forget || force_copy) { JBUFFER_TRACE(jh, "generate frozen data"); if (!frozen_buffer) { JBUFFER_TRACE(jh, "allocate memory for buffer"); jbd_unlock_bh_state(bh); frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!frozen_buffer) { printk(KERN_EMERG "%s: OOM for frozen_buffer\n", __func__); JBUFFER_TRACE(jh, "oom!"); error = -ENOMEM; jbd_lock_bh_state(bh); goto done; } goto repeat; } jh->b_frozen_data = frozen_buffer; frozen_buffer = NULL; need_copy = 1; } jh->b_next_transaction = transaction; } if (!jh->b_transaction) { JBUFFER_TRACE(jh, "no transaction"); J_ASSERT_JH(jh, !jh->b_next_transaction); JBUFFER_TRACE(jh, "file as BJ_Reserved"); spin_lock(&journal->j_list_lock); __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); spin_unlock(&journal->j_list_lock); } done: if (need_copy) { struct page *page; int offset; char *source; J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)), "Possible IO failure.\n"); page = jh2bh(jh)->b_page; offset = offset_in_page(jh2bh(jh)->b_data); source = kmap_atomic(page); jbd2_buffer_frozen_trigger(jh, source + offset, jh->b_triggers); memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); kunmap_atomic(source); jh->b_frozen_triggers = jh->b_triggers; } jbd_unlock_bh_state(bh); jbd2_journal_cancel_revoke(handle, jh); out: if (unlikely(frozen_buffer)) jbd2_free(frozen_buffer, bh->b_size); JBUFFER_TRACE(jh, "exit"); return error; }
void __jbd2_journal_file_buffer(struct journal_head *jh, transaction_t *transaction, int jlist) { struct journal_head **list = NULL; int was_dirty = 0; struct buffer_head *bh = jh2bh(jh); J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); assert_spin_locked(&transaction->t_journal->j_list_lock); J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); J_ASSERT_JH(jh, jh->b_transaction == transaction || jh->b_transaction == NULL); if (jh->b_transaction && jh->b_jlist == jlist) return; if (jlist == BJ_Metadata || jlist == BJ_Reserved || jlist == BJ_Shadow || jlist == BJ_Forget) { if (buffer_dirty(bh)) warn_dirty_buffer(bh); if (test_clear_buffer_dirty(bh) || test_clear_buffer_jbddirty(bh)) was_dirty = 1; } if (jh->b_transaction) __jbd2_journal_temp_unlink_buffer(jh); else jbd2_journal_grab_journal_head(bh); jh->b_transaction = transaction; switch (jlist) { case BJ_None: J_ASSERT_JH(jh, !jh->b_committed_data); J_ASSERT_JH(jh, !jh->b_frozen_data); return; case BJ_Metadata: transaction->t_nr_buffers++; list = &transaction->t_buffers; break; case BJ_Forget: list = &transaction->t_forget; break; case BJ_IO: list = &transaction->t_iobuf_list; break; case BJ_Shadow: list = &transaction->t_shadow_list; break; case BJ_LogCtl: list = &transaction->t_log_list; break; case BJ_Reserved: list = &transaction->t_reserved_list; break; } __blist_add_buffer(list, jh); jh->b_jlist = jlist; if (was_dirty) set_buffer_jbddirty(bh); }
/* * File a buffer on the given transaction list. */ void __journal_file_buffer(struct journal_head *jh, transaction_t *transaction, int jlist) { struct journal_head **list = NULL; int was_dirty = 0; struct buffer_head *bh = jh2bh(jh); J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); assert_jbd_locked(&transaction->t_journal->j_list_lock); J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); J_ASSERT_JH(jh, jh->b_transaction == transaction || jh->b_transaction == NULL); if (jh->b_transaction && jh->b_jlist == (unsigned) jlist) return; /* The following list of buffer states needs to be consistent * with __jbd_unexpected_dirty_buffer()'s handling of dirty * state. */ if (jlist == BJ_Metadata || jlist == BJ_Reserved || jlist == BJ_Shadow || jlist == BJ_Forget) { if (test_clear_buffer_dirty(bh) || test_clear_buffer_jbddirty(bh)) was_dirty = 1; } if (jh->b_transaction) __journal_temp_unlink_buffer(jh); jh->b_transaction = transaction; switch (jlist) { case BJ_None: J_ASSERT_JH(jh, !jh->b_committed_data); J_ASSERT_JH(jh, !jh->b_frozen_data); return; case BJ_SyncData: list = &transaction->t_sync_datalist; break; case BJ_Metadata: transaction->t_nr_buffers++; list = &transaction->t_buffers; break; case BJ_Forget: list = &transaction->t_forget; break; case BJ_IO: list = &transaction->t_iobuf_list; break; case BJ_Shadow: list = &transaction->t_shadow_list; break; case BJ_LogCtl: list = &transaction->t_log_list; break; case BJ_Reserved: list = &transaction->t_reserved_list; break; case BJ_Locked: list = &transaction->t_locked_list; break; } __blist_add_buffer(list, jh); jh->b_jlist = jlist; if (was_dirty) set_buffer_jbddirty(bh); }