int journal_get_write_access(handle_t *handle, struct buffer_head *bh) { struct journal_head *jh = journal_add_journal_head(bh); int rc; /* We do not want to get caught playing with fields which the * log thread also manipulates. Make sure that the buffer * completes any outstanding IO before proceeding. */ rc = do_get_write_access(handle, jh, 0); journal_put_journal_head(jh); return rc; }
int journal_get_undo_access(handle_t *handle, struct buffer_head *bh) { int err; struct journal_head *jh = journal_add_journal_head(bh); char *committed_data = NULL; JBUFFER_TRACE(jh, "entry"); /* * Do this first --- it can drop the journal lock, so we want to * make sure that obtaining the committed_data is done * atomically wrt. completion of any outstanding commits. */ err = do_get_write_access(handle, jh, 1); if (err) goto out; repeat: if (!jh->b_committed_data) { committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!committed_data) { printk(KERN_EMERG "%s: No memory for committed data\n", __func__); err = -ENOMEM; goto out; } } jbd_lock_bh_state(bh); if (!jh->b_committed_data) { /* Copy out the current buffer contents into the * preserved, committed copy. */ JBUFFER_TRACE(jh, "generate b_committed data"); if (!committed_data) { jbd_unlock_bh_state(bh); goto repeat; } jh->b_committed_data = committed_data; committed_data = NULL; memcpy(jh->b_committed_data, bh->b_data, bh->b_size); } jbd_unlock_bh_state(bh); out: journal_put_journal_head(jh); if (unlikely(committed_data)) jbd_free(committed_data, bh->b_size); return err; }
/* * We play buffer_head aliasing tricks to write data/metadata blocks to * the journal without copying their contents, but for journal * descriptor blocks we do need to generate bona fide buffers. * * After the caller of journal_get_descriptor_buffer() has finished modifying * the buffer's contents they really should run flush_dcache_page(bh->b_page). * But we don't bother doing that, so there will be coherency problems with * mmaps of blockdevs which hold live JBD-controlled filesystems. */ struct journal_head *journal_get_descriptor_buffer(journal_t *journal) { struct buffer_head *bh; unsigned long blocknr; int err; err = journal_next_log_block(journal, &blocknr); if (err) return NULL; bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); lock_buffer(bh); memset(bh->b_data, 0, journal->j_blocksize); set_buffer_uptodate(bh); unlock_buffer(bh); BUFFER_TRACE(bh, "return this buffer"); return journal_add_journal_head(bh); }
int journal_dirty_data(handle_t *handle, struct buffer_head *bh) { journal_t *journal = handle->h_transaction->t_journal; int need_brelse = 0; struct journal_head *jh; int ret = 0; if (is_handle_aborted(handle)) return ret; jh = journal_add_journal_head(bh); JBUFFER_TRACE(jh, "entry"); /* * The buffer could *already* be dirty. Writeout can start * at any time. */ jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid); /* * What if the buffer is already part of a running transaction? * * There are two cases: * 1) It is part of the current running transaction. Refile it, * just in case we have allocated it as metadata, deallocated * it, then reallocated it as data. * 2) It is part of the previous, still-committing transaction. * If all we want to do is to guarantee that the buffer will be * written to disk before this new transaction commits, then * being sure that the *previous* transaction has this same * property is sufficient for us! Just leave it on its old * transaction. * * In case (2), the buffer must not already exist as metadata * --- that would violate write ordering (a transaction is free * to write its data at any point, even before the previous * committing transaction has committed). The caller must * never, ever allow this to happen: there's nothing we can do * about it in this layer. */ jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); /* Now that we have bh_state locked, are we really still mapped? */ if (!buffer_mapped(bh)) { JBUFFER_TRACE(jh, "unmapped buffer, bailing out"); goto no_journal; } if (jh->b_transaction) { JBUFFER_TRACE(jh, "has transaction"); if (jh->b_transaction != handle->h_transaction) { JBUFFER_TRACE(jh, "belongs to older transaction"); J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction); /* @@@ IS THIS TRUE ? */ /* * Not any more. Scenario: someone does a write() * in data=journal mode. The buffer's transaction has * moved into commit. Then someone does another * write() to the file. We do the frozen data copyout * and set b_next_transaction to point to j_running_t. * And while we're in that state, someone does a * writepage() in an attempt to pageout the same area * of the file via a shared mapping. At present that * calls journal_dirty_data(), and we get right here. * It may be too late to journal the data. Simply * falling through to the next test will suffice: the * data will be dirty and wil be checkpointed. The * ordering comments in the next comment block still * apply. */ //J_ASSERT_JH(jh, jh->b_next_transaction == NULL); /* * If we're journalling data, and this buffer was * subject to a write(), it could be metadata, forget * or shadow against the committing transaction. Now, * someone has dirtied the same darn page via a mapping * and it is being writepage()'d. * We *could* just steal the page from commit, with some * fancy locking there. Instead, we just skip it - * don't tie the page's buffers to the new transaction * at all. * Implication: if we crash before the writepage() data * is written into the filesystem, recovery will replay * the write() data. */ if (jh->b_jlist != BJ_None && jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { JBUFFER_TRACE(jh, "Not stealing"); goto no_journal; } /* * This buffer may be undergoing writeout in commit. We * can't return from here and let the caller dirty it * again because that can cause the write-out loop in * commit to never terminate. */ if (buffer_dirty(bh)) { get_bh(bh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); need_brelse = 1; sync_dirty_buffer(bh); jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); /* Since we dropped the lock... */ if (!buffer_mapped(bh)) { JBUFFER_TRACE(jh, "buffer got unmapped"); goto no_journal; } /* The buffer may become locked again at any time if it is redirtied */ } /* * We cannot remove the buffer with io error from the * committing transaction, because otherwise it would * miss the error and the commit would not abort. */ if (unlikely(!buffer_uptodate(bh))) { ret = -EIO; goto no_journal; } if (jh->b_transaction != NULL) { JBUFFER_TRACE(jh, "unfile from commit"); __journal_temp_unlink_buffer(jh); /* It still points to the committing * transaction; move it to this one so * that the refile assert checks are * happy. */ jh->b_transaction = handle->h_transaction; } /* The buffer will be refiled below */ } /* * Special case --- the buffer might actually have been * allocated and then immediately deallocated in the previous, * committing transaction, so might still be left on that * transaction's metadata lists. */ if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { JBUFFER_TRACE(jh, "not on correct data list: unfile"); J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); __journal_temp_unlink_buffer(jh); jh->b_transaction = handle->h_transaction; JBUFFER_TRACE(jh, "file as data"); __journal_file_buffer(jh, handle->h_transaction, BJ_SyncData); } } else { JBUFFER_TRACE(jh, "not on a transaction"); __journal_file_buffer(jh, handle->h_transaction, BJ_SyncData); } no_journal: spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); if (need_brelse) { BUFFER_TRACE(bh, "brelse"); __brelse(bh); } JBUFFER_TRACE(jh, "exit"); journal_put_journal_head(jh); return ret; }
int journal_get_create_access(handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; struct journal_head *jh = journal_add_journal_head(bh); int err; jbd_debug(5, "journal_head %p\n", jh); err = -EROFS; if (is_handle_aborted(handle)) goto out; err = 0; JBUFFER_TRACE(jh, "entry"); /* * The buffer may already belong to this transaction due to pre-zeroing * in the filesystem's new_block code. It may also be on the previous, * committing transaction's lists, but it HAS to be in Forget state in * that case: the transaction must have deleted the buffer for it to be * reused here. */ jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); J_ASSERT_JH(jh, (jh->b_transaction == transaction || jh->b_transaction == NULL || (jh->b_transaction == journal->j_committing_transaction && jh->b_jlist == BJ_Forget))); J_ASSERT_JH(jh, jh->b_next_transaction == NULL); J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); if (jh->b_transaction == NULL) { /* * Previous journal_forget() could have left the buffer * with jbddirty bit set because it was being committed. When * the commit finished, we've filed the buffer for * checkpointing and marked it dirty. Now we are reallocating * the buffer so the transaction freeing it must have * committed and so it's safe to clear the dirty bit. */ clear_buffer_dirty(jh2bh(jh)); jh->b_transaction = transaction; /* first access by this transaction */ jh->b_modified = 0; JBUFFER_TRACE(jh, "file as BJ_Reserved"); __journal_file_buffer(jh, transaction, BJ_Reserved); } else if (jh->b_transaction == journal->j_committing_transaction) { /* first access by this transaction */ jh->b_modified = 0; JBUFFER_TRACE(jh, "set next transaction"); jh->b_next_transaction = transaction; } spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); /* * akpm: I added this. ext3_alloc_branch can pick up new indirect * blocks which contain freed but then revoked metadata. We need * to cancel the revoke in case we end up freeing it yet again * and the reallocating as data - this would cause a second revoke, * which hits an assertion error. */ JBUFFER_TRACE(jh, "cancelling revoke"); journal_cancel_revoke(handle, jh); journal_put_journal_head(jh); out: return err; }
int journal_write_metadata_buffer(transaction_t *transaction, struct journal_head *jh_in, struct journal_head **jh_out, unsigned int blocknr) { int need_copy_out = 0; int done_copy_out = 0; int do_escape = 0; char *mapped_data; struct buffer_head *new_bh; struct journal_head *new_jh; struct page *new_page; unsigned int new_offset; struct buffer_head *bh_in = jh2bh(jh_in); journal_t *journal = transaction->t_journal; /* * The buffer really shouldn't be locked: only the current committing * transaction is allowed to write it, so nobody else is allowed * to do any IO. * * akpm: except if we're journalling data, and write() output is * also part of a shared mapping, and another thread has * decided to launch a writepage() against this buffer. */ J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); /* keep subsequent assertions sane */ new_bh->b_state = 0; init_buffer(new_bh, NULL, NULL); atomic_set(&new_bh->b_count, 1); new_jh = journal_add_journal_head(new_bh); /* This sleeps */ /* * If a new transaction has already done a buffer copy-out, then * we use that version of the data for the commit. */ jbd_lock_bh_state(bh_in); repeat: if (jh_in->b_frozen_data) { done_copy_out = 1; new_page = virt_to_page(jh_in->b_frozen_data); new_offset = offset_in_page(jh_in->b_frozen_data); } else { new_page = jh2bh(jh_in)->b_page; new_offset = offset_in_page(jh2bh(jh_in)->b_data); } mapped_data = kmap_atomic(new_page); /* * Check for escaping */ if (*((__be32 *)(mapped_data + new_offset)) == cpu_to_be32(JFS_MAGIC_NUMBER)) { need_copy_out = 1; do_escape = 1; } kunmap_atomic(mapped_data); /* * Do we need to do a data copy? */ if (need_copy_out && !done_copy_out) { char *tmp; jbd_unlock_bh_state(bh_in); tmp = jbd_alloc(bh_in->b_size, GFP_NOFS); jbd_lock_bh_state(bh_in); if (jh_in->b_frozen_data) { jbd_free(tmp, bh_in->b_size); goto repeat; } jh_in->b_frozen_data = tmp; mapped_data = kmap_atomic(new_page); memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); kunmap_atomic(mapped_data); new_page = virt_to_page(tmp); new_offset = offset_in_page(tmp); done_copy_out = 1; } /* * Did we need to do an escaping? Now we've done all the * copying, we can finally do so. */ if (do_escape) { mapped_data = kmap_atomic(new_page); *((unsigned int *)(mapped_data + new_offset)) = 0; kunmap_atomic(mapped_data); } set_bh_page(new_bh, new_page, new_offset); new_jh->b_transaction = NULL; new_bh->b_size = jh2bh(jh_in)->b_size; new_bh->b_bdev = transaction->t_journal->j_dev; new_bh->b_blocknr = blocknr; set_buffer_mapped(new_bh); set_buffer_dirty(new_bh); *jh_out = new_jh; /* * The to-be-written buffer needs to get moved to the io queue, * and the original buffer whose contents we are shadowing or * copying is moved to the transaction's shadow queue. */ JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); spin_lock(&journal->j_list_lock); __journal_file_buffer(jh_in, transaction, BJ_Shadow); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh_in); JBUFFER_TRACE(new_jh, "file as BJ_IO"); journal_file_buffer(new_jh, transaction, BJ_IO); return do_escape | (done_copy_out << 1); }