/* * Try to flush one buffer from the checkpoint list to disk. * * Return 1 if something happened which requires us to abort the current * scan of the checkpoint list. * * Called with j_list_lock held and drops it if 1 is returned * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ static int __process_buffer(journal_t *journal, struct journal_head *jh, struct buffer_head **bhs, int *batch_count) { struct buffer_head *bh = jh2bh(jh); int ret = 0; if (buffer_locked(bh)) { atomic_inc(&bh->b_count); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); ret = 1; } else if (jh->b_transaction != NULL) { transaction_t *t = jh->b_transaction; tid_t tid = t->t_tid; spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); log_start_commit(journal, tid); log_wait_commit(journal, tid); ret = 1; } else if (!buffer_dirty(bh)) { J_ASSERT_JH(jh, !buffer_jbddirty(bh)); BUFFER_TRACE(bh, "remove from checkpoint"); __journal_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); journal_remove_journal_head(bh); __brelse(bh); ret = 1; } else { /* * Important: we are about to write the buffer, and * possibly block, while still holding the journal lock. * We cannot afford to let the transaction logic start * messing around with this buffer before we write it to * disk, as that would break recoverability. */ BUFFER_TRACE(bh, "queue"); get_bh(bh); J_ASSERT_BH(bh, !buffer_jwrite(bh)); set_buffer_jwrite(bh); bhs[*batch_count] = bh; __buffer_relink_io(jh); jbd_unlock_bh_state(bh); (*batch_count)++; if (*batch_count == NR_BATCH) { spin_unlock(&journal->j_list_lock); __flush_batch(journal, bhs, batch_count); ret = 1; } } return ret; }
static int __process_buffer(journal_t *journal, struct journal_head *jh, int *batch_count, transaction_t *transaction) { struct buffer_head *bh = jh2bh(jh); int ret = 0; if (buffer_locked(bh)) { get_bh(bh); spin_unlock(&journal->j_list_lock); wait_on_buffer(bh); BUFFER_TRACE(bh, "brelse"); __brelse(bh); ret = 1; } else if (jh->b_transaction != NULL) { transaction_t *t = jh->b_transaction; tid_t tid = t->t_tid; transaction->t_chp_stats.cs_forced_to_close++; spin_unlock(&journal->j_list_lock); if (unlikely(journal->j_flags & JBD2_UNMOUNT)) printk(KERN_ERR "JBD2: %s: " "Waiting for Godot: block %llu\n", journal->j_devname, (unsigned long long) bh->b_blocknr); jbd2_log_start_commit(journal, tid); jbd2_log_wait_commit(journal, tid); ret = 1; } else if (!buffer_dirty(bh)) { ret = 1; if (unlikely(buffer_write_io_error(bh))) ret = -EIO; get_bh(bh); BUFFER_TRACE(bh, "remove from checkpoint"); __jbd2_journal_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); __brelse(bh); } else { BUFFER_TRACE(bh, "queue"); get_bh(bh); J_ASSERT_BH(bh, !buffer_jwrite(bh)); journal->j_chkpt_bhs[*batch_count] = bh; __buffer_relink_io(jh); transaction->t_chp_stats.cs_written++; (*batch_count)++; if (*batch_count == JBD2_NR_BATCH) { spin_unlock(&journal->j_list_lock); __flush_batch(journal, batch_count); ret = 1; } } return ret; }
/* * Try to flush one buffer from the checkpoint list to disk. * * Return 1 if something happened which requires us to abort the current * scan of the checkpoint list. * * Called with j_list_lock held. * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ static int __flush_buffer(journal_t *journal, struct journal_head *jh, struct buffer_head **bhs, int *batch_count, int *drop_count) { struct buffer_head *bh = jh2bh(jh); int ret = 0; if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) { J_ASSERT_JH(jh, jh->b_transaction == NULL); /* * Important: we are about to write the buffer, and * possibly block, while still holding the journal lock. * We cannot afford to let the transaction logic start * messing around with this buffer before we write it to * disk, as that would break recoverability. */ BUFFER_TRACE(bh, "queue"); get_bh(bh); J_ASSERT_BH(bh, !buffer_jwrite(bh)); set_buffer_jwrite(bh); bhs[*batch_count] = bh; jbd_unlock_bh_state(bh); (*batch_count)++; if (*batch_count == NR_BATCH) { __flush_batch(journal, bhs, batch_count); ret = 1; } } else { int last_buffer = 0; if (jh->b_cpnext == jh) { /* We may be about to drop the transaction. Tell the * caller that the lists have changed. */ last_buffer = 1; } if (__try_to_free_cp_buf(jh)) { (*drop_count)++; ret = last_buffer; } } return ret; }
int jbd2_log_do_checkpoint(journal_t *journal) { transaction_t *transaction; tid_t this_tid; int result; jbd_debug(1, "Start checkpoint\n"); result = jbd2_cleanup_journal_tail(journal); trace_jbd2_checkpoint(journal, result); jbd_debug(1, "cleanup_journal_tail returned %d\n", result); if (result <= 0) return result; result = 0; spin_lock(&journal->j_list_lock); if (!journal->j_checkpoint_transactions) goto out; transaction = journal->j_checkpoint_transactions; if (transaction->t_chp_stats.cs_chp_time == 0) transaction->t_chp_stats.cs_chp_time = jiffies; this_tid = transaction->t_tid; restart: if (journal->j_checkpoint_transactions == transaction && transaction->t_tid == this_tid) { int batch_count = 0; struct journal_head *jh; int retry = 0, err; while (!retry && transaction->t_checkpoint_list) { jh = transaction->t_checkpoint_list; retry = __process_buffer(journal, jh, &batch_count, transaction); if (retry < 0 && !result) result = retry; if (!retry && (need_resched() || spin_needbreak(&journal->j_list_lock))) { spin_unlock(&journal->j_list_lock); retry = 1; break; } } if (batch_count) { if (!retry) { spin_unlock(&journal->j_list_lock); retry = 1; } __flush_batch(journal, &batch_count); } if (retry) { spin_lock(&journal->j_list_lock); goto restart; } err = __wait_cp_io(journal, transaction); if (!result) result = err; } out: spin_unlock(&journal->j_list_lock); if (result < 0) jbd2_journal_abort(journal, result); else result = jbd2_cleanup_journal_tail(journal); return (result < 0) ? result : 0; }
/* * Perform an actual checkpoint. We take the first transaction on the * list of transactions to be checkpointed and send all its buffers * to disk. We submit larger chunks of data at once. * * The journal should be locked before calling this function. * Called with j_checkpoint_mutex held. */ int jbd2_log_do_checkpoint(journal_t *journal) { struct journal_head *jh; struct buffer_head *bh; transaction_t *transaction; tid_t this_tid; int result, batch_count = 0; jbd_debug(1, "Start checkpoint\n"); /* * First thing: if there are any transactions in the log which * don't need checkpointing, just eliminate them from the * journal straight away. */ result = jbd2_cleanup_journal_tail(journal); trace_jbd2_checkpoint(journal, result); jbd_debug(1, "cleanup_journal_tail returned %d\n", result); if (result <= 0) return result; /* * OK, we need to start writing disk blocks. Take one transaction * and write it. */ result = 0; spin_lock(&journal->j_list_lock); if (!journal->j_checkpoint_transactions) goto out; transaction = journal->j_checkpoint_transactions; if (transaction->t_chp_stats.cs_chp_time == 0) transaction->t_chp_stats.cs_chp_time = jiffies; this_tid = transaction->t_tid; restart: /* * If someone cleaned up this transaction while we slept, we're * done (maybe it's a new transaction, but it fell at the same * address). */ if (journal->j_checkpoint_transactions != transaction || transaction->t_tid != this_tid) goto out; /* checkpoint all of the transaction's buffers */ while (transaction->t_checkpoint_list) { jh = transaction->t_checkpoint_list; bh = jh2bh(jh); if (buffer_locked(bh)) { spin_unlock(&journal->j_list_lock); get_bh(bh); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); goto retry; } if (jh->b_transaction != NULL) { transaction_t *t = jh->b_transaction; tid_t tid = t->t_tid; transaction->t_chp_stats.cs_forced_to_close++; spin_unlock(&journal->j_list_lock); if (unlikely(journal->j_flags & JBD2_UNMOUNT)) /* * The journal thread is dead; so * starting and waiting for a commit * to finish will cause us to wait for * a _very_ long time. */ printk(KERN_ERR "JBD2: %s: Waiting for Godot: block %llu\n", journal->j_devname, (unsigned long long) bh->b_blocknr); jbd2_log_start_commit(journal, tid); jbd2_log_wait_commit(journal, tid); goto retry; } if (!buffer_dirty(bh)) { if (unlikely(buffer_write_io_error(bh)) && !result) result = -EIO; BUFFER_TRACE(bh, "remove from checkpoint"); if (__jbd2_journal_remove_checkpoint(jh)) /* The transaction was released; we're done */ goto out; continue; } /* * Important: we are about to write the buffer, and * possibly block, while still holding the journal * lock. We cannot afford to let the transaction * logic start messing around with this buffer before * we write it to disk, as that would break * recoverability. */ BUFFER_TRACE(bh, "queue"); get_bh(bh); J_ASSERT_BH(bh, !buffer_jwrite(bh)); journal->j_chkpt_bhs[batch_count++] = bh; __buffer_relink_io(jh); transaction->t_chp_stats.cs_written++; if ((batch_count == JBD2_NR_BATCH) || need_resched() || spin_needbreak(&journal->j_list_lock)) goto unlock_and_flush; } if (batch_count) { unlock_and_flush: spin_unlock(&journal->j_list_lock); retry: if (batch_count) __flush_batch(journal, &batch_count); spin_lock(&journal->j_list_lock); goto restart; } /* * Now we issued all of the transaction's buffers, let's deal * with the buffers that are out for I/O. */ restart2: /* Did somebody clean up the transaction in the meanwhile? */ if (journal->j_checkpoint_transactions != transaction || transaction->t_tid != this_tid) goto out; while (transaction->t_checkpoint_io_list) { jh = transaction->t_checkpoint_io_list; bh = jh2bh(jh); if (buffer_locked(bh)) { spin_unlock(&journal->j_list_lock); get_bh(bh); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); spin_lock(&journal->j_list_lock); goto restart2; } if (unlikely(buffer_write_io_error(bh)) && !result) result = -EIO; /* * Now in whatever state the buffer currently is, we * know that it has been written out and so we can * drop it from the list */ if (__jbd2_journal_remove_checkpoint(jh)) break; } out: spin_unlock(&journal->j_list_lock); if (result < 0) jbd2_journal_abort(journal, result); else result = jbd2_cleanup_journal_tail(journal); return (result < 0) ? result : 0; }
/* @@@ `nblocks' is unused. Should it be used? */ int log_do_checkpoint (journal_t *journal, int nblocks) { transaction_t *transaction, *last_transaction, *next_transaction; int result; int target; int batch_count = 0; struct buffer_head *bhs[NR_BATCH]; jbd_debug(1, "Start checkpoint\n"); /* * First thing: if there are any transactions in the log which * don't need checkpointing, just eliminate them from the * journal straight away. */ result = cleanup_journal_tail(journal); jbd_debug(1, "cleanup_journal_tail returned %d\n", result); if (result <= 0) return result; /* * OK, we need to start writing disk blocks. Try to free up a * quarter of the log in a single checkpoint if we can. */ /* * AKPM: check this code. I had a feeling a while back that it * degenerates into a busy loop at unmount time. */ target = (journal->j_last - journal->j_first) / 4; spin_lock(&journal_datalist_lock); repeat: transaction = journal->j_checkpoint_transactions; if (transaction == NULL) goto done; last_transaction = transaction->t_cpprev; next_transaction = transaction; do { struct journal_head *jh, *last_jh, *next_jh; int drop_count = 0; int cleanup_ret, retry = 0; transaction = next_transaction; next_transaction = transaction->t_cpnext; jh = transaction->t_checkpoint_list; last_jh = jh->b_cpprev; next_jh = jh; do { jh = next_jh; next_jh = jh->b_cpnext; retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); } while (jh != last_jh && !retry); if (batch_count) { __flush_batch(bhs, &batch_count); goto repeat; } if (retry) goto repeat; /* * We have walked the whole transaction list without * finding anything to write to disk. We had better be * able to make some progress or we are in trouble. */ cleanup_ret = __cleanup_transaction(journal, transaction); J_ASSERT(drop_count != 0 || cleanup_ret != 0); goto repeat; /* __cleanup may have dropped lock */ } while (transaction != last_transaction); done: spin_unlock(&journal_datalist_lock); result = cleanup_journal_tail(journal); if (result < 0) return result; return 0; }
/* * Perform an actual checkpoint. We don't write out only enough to * satisfy the current blocked requests: rather we submit a reasonably * sized chunk of the outstanding data to disk at once for * efficiency. __log_wait_for_space() will retry if we didn't free enough. * * However, we _do_ take into account the amount requested so that once * the IO has been queued, we can return as soon as enough of it has * completed to disk. * * The journal should be locked before calling this function. */ int log_do_checkpoint(journal_t *journal) { int result; int batch_count = 0; struct buffer_head *bhs[NR_BATCH]; jbd_debug(1, "Start checkpoint\n"); /* * First thing: if there are any transactions in the log which * don't need checkpointing, just eliminate them from the * journal straight away. */ result = cleanup_journal_tail(journal); jbd_debug(1, "cleanup_journal_tail returned %d\n", result); if (result <= 0) return result; /* * OK, we need to start writing disk blocks. Try to free up a * quarter of the log in a single checkpoint if we can. */ /* * AKPM: check this code. I had a feeling a while back that it * degenerates into a busy loop at unmount time. */ spin_lock(&journal->j_list_lock); while (journal->j_checkpoint_transactions) { transaction_t *transaction; struct journal_head *jh, *last_jh, *next_jh; int drop_count = 0; int cleanup_ret, retry = 0; tid_t this_tid; transaction = journal->j_checkpoint_transactions; this_tid = transaction->t_tid; jh = transaction->t_checkpoint_list; last_jh = jh->b_cpprev; next_jh = jh; do { struct buffer_head *bh; jh = next_jh; next_jh = jh->b_cpnext; bh = jh2bh(jh); if (!jbd_trylock_bh_state(bh)) { jbd_sync_bh(journal, bh); spin_lock(&journal->j_list_lock); retry = 1; break; } retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); if (cond_resched_lock(&journal->j_list_lock)) { retry = 1; break; } } while (jh != last_jh && !retry); if (batch_count) { __flush_batch(journal, bhs, &batch_count); retry = 1; } /* * If someone cleaned up this transaction while we slept, we're * done */ if (journal->j_checkpoint_transactions != transaction) break; if (retry) continue; /* * Maybe it's a new transaction, but it fell at the same * address */ if (transaction->t_tid != this_tid) continue; /* * We have walked the whole transaction list without * finding anything to write to disk. We had better be * able to make some progress or we are in trouble. */ cleanup_ret = __cleanup_transaction(journal, transaction); J_ASSERT(drop_count != 0 || cleanup_ret != 0); if (journal->j_checkpoint_transactions != transaction) break; } spin_unlock(&journal->j_list_lock); result = cleanup_journal_tail(journal); if (result < 0) return result; return 0; }
/* * Perform an actual checkpoint. We take the first transaction on the * list of transactions to be checkpointed and send all its buffers * to disk. We submit larger chunks of data at once. * * The journal should be locked before calling this function. * Called with j_checkpoint_mutex held. */ int jbd2_log_do_checkpoint(journal_t *journal) { transaction_t *transaction; tid_t this_tid; int result; jbd_debug(1, "Start checkpoint\n"); /* * First thing: if there are any transactions in the log which * don't need checkpointing, just eliminate them from the * journal straight away. */ result = jbd2_cleanup_journal_tail(journal); trace_jbd2_checkpoint(journal, result); jbd_debug(1, "cleanup_journal_tail returned %d\n", result); if (result <= 0) return result; /* * OK, we need to start writing disk blocks. Take one transaction * and write it. */ result = 0; spin_lock(&journal->j_list_lock); if (!journal->j_checkpoint_transactions) goto out; transaction = journal->j_checkpoint_transactions; if (transaction->t_chp_stats.cs_chp_time == 0) transaction->t_chp_stats.cs_chp_time = jiffies; this_tid = transaction->t_tid; restart: /* * If someone cleaned up this transaction while we slept, we're * done (maybe it's a new transaction, but it fell at the same * address). */ if (journal->j_checkpoint_transactions == transaction && transaction->t_tid == this_tid) { int batch_count = 0; struct journal_head *jh; int retry = 0, err; while (!retry && transaction->t_checkpoint_list) { jh = transaction->t_checkpoint_list; retry = __process_buffer(journal, jh, &batch_count, transaction); if (retry < 0 && !result) result = retry; if (!retry && (need_resched() || spin_needbreak(&journal->j_list_lock))) { spin_unlock(&journal->j_list_lock); retry = 1; break; } } if (batch_count) { if (!retry) { spin_unlock(&journal->j_list_lock); retry = 1; } __flush_batch(journal, &batch_count); } if (retry) { spin_lock(&journal->j_list_lock); goto restart; } /* * Now we have cleaned up the first transaction's checkpoint * list. Let's clean up the second one */ err = __wait_cp_io(journal, transaction); if (!result) result = err; } out: spin_unlock(&journal->j_list_lock); if (result < 0) jbd2_journal_abort(journal, result); else result = jbd2_cleanup_journal_tail(journal); return (result < 0) ? result : 0; }
/* * Try to flush one buffer from the checkpoint list to disk. * * Return 1 if something happened which requires us to abort the current * scan of the checkpoint list. Return <0 if the buffer has failed to * be written out. * * Called with j_list_lock held and drops it if 1 is returned */ static int __process_buffer(journal_t *journal, struct journal_head *jh, int *batch_count, transaction_t *transaction) { struct buffer_head *bh = jh2bh(jh); int ret = 0; if (buffer_locked(bh)) { get_bh(bh); spin_unlock(&journal->j_list_lock); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); ret = 1; } else if (jh->b_transaction != NULL) { transaction_t *t = jh->b_transaction; tid_t tid = t->t_tid; transaction->t_chp_stats.cs_forced_to_close++; spin_unlock(&journal->j_list_lock); if (unlikely(journal->j_flags & JBD2_UNMOUNT)) /* * The journal thread is dead; so starting and * waiting for a commit to finish will cause * us to wait for a _very_ long time. */ printk(KERN_ERR "JBD2: %s: " "Waiting for Godot: block %llu\n", journal->j_devname, (unsigned long long) bh->b_blocknr); jbd2_log_start_commit(journal, tid); jbd2_log_wait_commit(journal, tid); ret = 1; } else if (!buffer_dirty(bh)) { ret = 1; if (unlikely(buffer_write_io_error(bh))) ret = -EIO; get_bh(bh); BUFFER_TRACE(bh, "remove from checkpoint"); __jbd2_journal_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); __brelse(bh); } else { /* * Important: we are about to write the buffer, and * possibly block, while still holding the journal lock. * We cannot afford to let the transaction logic start * messing around with this buffer before we write it to * disk, as that would break recoverability. */ BUFFER_TRACE(bh, "queue"); get_bh(bh); J_ASSERT_BH(bh, !buffer_jwrite(bh)); journal->j_chkpt_bhs[*batch_count] = bh; __buffer_relink_io(jh); transaction->t_chp_stats.cs_written++; (*batch_count)++; if (*batch_count == JBD2_NR_BATCH) { spin_unlock(&journal->j_list_lock); __flush_batch(journal, batch_count); ret = 1; } } return ret; }
/* * Perform an actual checkpoint. We take the first transaction on the * list of transactions to be checkpointed and send all its buffers * to disk. We submit larger chunks of data at once. * * The journal should be locked before calling this function. */ int log_do_checkpoint(journal_t *journal) { transaction_t *transaction; tid_t this_tid; int result; jbd_debug(1, "Start checkpoint\n"); /* * First thing: if there are any transactions in the log which * don't need checkpointing, just eliminate them from the * journal straight away. */ result = cleanup_journal_tail(journal); jbd_debug(1, "cleanup_journal_tail returned %d\n", result); if (result <= 0) return result; /* * OK, we need to start writing disk blocks. Take one transaction * and write it. */ spin_lock(&journal->j_list_lock); if (!journal->j_checkpoint_transactions) goto out; transaction = journal->j_checkpoint_transactions; this_tid = transaction->t_tid; restart: /* * If someone cleaned up this transaction while we slept, we're * done (maybe it's a new transaction, but it fell at the same * address). */ if (journal->j_checkpoint_transactions == transaction && transaction->t_tid == this_tid) { int batch_count = 0; struct buffer_head *bhs[NR_BATCH]; struct journal_head *jh; int retry = 0; while (!retry && transaction->t_checkpoint_list) { struct buffer_head *bh; jh = transaction->t_checkpoint_list; bh = jh2bh(jh); if (!jbd_trylock_bh_state(bh)) { jbd_sync_bh(journal, bh); retry = 1; break; } retry = __process_buffer(journal, jh, bhs,&batch_count); if (!retry && lock_need_resched(&journal->j_list_lock)){ spin_unlock(&journal->j_list_lock); retry = 1; break; } } if (batch_count) { if (!retry) { spin_unlock(&journal->j_list_lock); retry = 1; } __flush_batch(journal, bhs, &batch_count); } if (retry) { spin_lock(&journal->j_list_lock); goto restart; } /* * Now we have cleaned up the first transaction's checkpoint * list. Let's clean up the second one */ __wait_cp_io(journal, transaction); } out: spin_unlock(&journal->j_list_lock); result = cleanup_journal_tail(journal); if (result < 0) return result; return 0; }