static errcode_t e2fsck_journal_load(journal_t *journal) { e2fsck_t ctx = journal->j_dev->k_ctx; journal_superblock_t *jsb; struct buffer_head *jbh = journal->j_sb_buffer; struct problem_context pctx; clear_problem_context(&pctx); ll_rw_block(READ, 1, &jbh); if (jbh->b_err) { com_err(ctx->device_name, jbh->b_err, "%s", _("reading journal superblock\n")); return jbh->b_err; } jsb = journal->j_superblock; /* If we don't even have JFS_MAGIC, we probably have a wrong inode */ if (jsb->s_header.h_magic != htonl(JFS_MAGIC_NUMBER)) return e2fsck_journal_fix_bad_inode(ctx, &pctx); switch (ntohl(jsb->s_header.h_blocktype)) { case JFS_SUPERBLOCK_V1: journal->j_format_version = 1; if (jsb->s_feature_compat || jsb->s_feature_incompat || jsb->s_feature_ro_compat || jsb->s_nr_users) clear_v2_journal_fields(journal); break; case JFS_SUPERBLOCK_V2: journal->j_format_version = 2; if (ntohl(jsb->s_nr_users) > 1 && uuid_is_null(ctx->fs->super->s_journal_uuid)) clear_v2_journal_fields(journal); if (ntohl(jsb->s_nr_users) > 1) { fix_problem(ctx, PR_0_JOURNAL_UNSUPP_MULTIFS, &pctx); return EXT2_ET_JOURNAL_UNSUPP_VERSION; } break; /* * These should never appear in a journal super block, so if * they do, the journal is badly corrupted. */ case JFS_DESCRIPTOR_BLOCK: case JFS_COMMIT_BLOCK: case JFS_REVOKE_BLOCK: return EXT2_ET_CORRUPT_SUPERBLOCK; /* If we don't understand the superblock major type, but there * is a magic number, then it is likely to be a new format we * just don't understand, so leave it alone. */ default: return EXT2_ET_JOURNAL_UNSUPP_VERSION; } if (JFS_HAS_INCOMPAT_FEATURE(journal, ~JFS_KNOWN_INCOMPAT_FEATURES)) return EXT2_ET_UNSUPP_FEATURE; if (JFS_HAS_RO_COMPAT_FEATURE(journal, ~JFS_KNOWN_ROCOMPAT_FEATURES)) return EXT2_ET_RO_UNSUPP_FEATURE; /* Checksum v1-3 are mutually exclusive features. */ if (JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_CSUM_V2) && JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_CSUM_V3)) return EXT2_ET_CORRUPT_SUPERBLOCK; if (journal_has_csum_v2or3(journal) && JFS_HAS_COMPAT_FEATURE(journal, JFS_FEATURE_COMPAT_CHECKSUM)) return EXT2_ET_CORRUPT_SUPERBLOCK; if (!e2fsck_journal_verify_csum_type(journal, jsb) || !e2fsck_journal_sb_csum_verify(journal, jsb)) return EXT2_ET_CORRUPT_SUPERBLOCK; if (journal_has_csum_v2or3(journal)) journal->j_csum_seed = jbd2_chksum(journal, ~0, jsb->s_uuid, sizeof(jsb->s_uuid)); /* We have now checked whether we know enough about the journal * format to be able to proceed safely, so any other checks that * fail we should attempt to recover from. */ if (jsb->s_blocksize != htonl(journal->j_blocksize)) { com_err(ctx->program_name, EXT2_ET_CORRUPT_SUPERBLOCK, _("%s: no valid journal superblock found\n"), ctx->device_name); return EXT2_ET_CORRUPT_SUPERBLOCK; } if (ntohl(jsb->s_maxlen) < journal->j_maxlen) journal->j_maxlen = ntohl(jsb->s_maxlen); else if (ntohl(jsb->s_maxlen) > journal->j_maxlen) { com_err(ctx->program_name, EXT2_ET_CORRUPT_SUPERBLOCK, _("%s: journal too short\n"), ctx->device_name); return EXT2_ET_CORRUPT_SUPERBLOCK; } journal->j_tail_sequence = ntohl(jsb->s_sequence); journal->j_transaction_sequence = journal->j_tail_sequence; journal->j_tail = ntohl(jsb->s_start); journal->j_first = ntohl(jsb->s_first); journal->j_last = ntohl(jsb->s_maxlen); return 0; }
static int do_one_pass(journal_t *journal, struct recovery_info *info, enum passtype pass) { unsigned int first_commit_ID, next_commit_ID; unsigned long long next_log_block; int err, success = 0; journal_superblock_t * sb; journal_header_t * tmp; struct buffer_head * bh; unsigned int sequence; int blocktype; int tag_bytes = journal_tag_bytes(journal); __u32 crc32_sum = ~0; /* Transactional Checksums */ /* Precompute the maximum metadata descriptors in a descriptor block */ int MAX_BLOCKS_PER_DESC; MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) / tag_bytes); /* * First thing is to establish what we expect to find in the log * (in terms of transaction IDs), and where (in terms of log * block offsets): query the superblock. */ sb = journal->j_superblock; next_commit_ID = be32_to_cpu(sb->s_sequence); next_log_block = be32_to_cpu(sb->s_start); first_commit_ID = next_commit_ID; if (pass == PASS_SCAN) info->start_transaction = first_commit_ID; jbd_debug(1, "Starting recovery pass %d\n", pass); /* * Now we walk through the log, transaction by transaction, * making sure that each transaction has a commit block in the * expected place. Each complete transaction gets replayed back * into the main filesystem. */ while (1) { int flags; char * tagp; journal_block_tag_t * tag; struct buffer_head * obh; struct buffer_head * nbh; cond_resched(); /* If we already know where to stop the log traversal, * check right now that we haven't gone past the end of * the log. */ if (pass != PASS_SCAN) if (tid_geq(next_commit_ID, info->end_transaction)) break; jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", next_commit_ID, next_log_block, journal->j_last); /* Skip over each chunk of the transaction looking * either the next descriptor block or the final commit * record. */ jbd_debug(3, "JBD: checking block %ld\n", next_log_block); err = jread(&bh, journal, next_log_block); if (err) goto failed; next_log_block++; wrap(journal, next_log_block); /* What kind of buffer is it? * * If it is a descriptor block, check that it has the * expected sequence number. Otherwise, we're all done * here. */ tmp = (journal_header_t *)bh->b_data; if (tmp->h_magic != cpu_to_be32(JFS_MAGIC_NUMBER)) { brelse(bh); break; } blocktype = be32_to_cpu(tmp->h_blocktype); sequence = be32_to_cpu(tmp->h_sequence); jbd_debug(3, "Found magic %d, sequence %d\n", blocktype, sequence); if (sequence != next_commit_ID) { brelse(bh); break; } /* OK, we have a valid descriptor block which matches * all of the sequence number checks. What are we going * to do with it? That depends on the pass... */ switch(blocktype) { case JFS_DESCRIPTOR_BLOCK: /* If it is a valid descriptor block, replay it * in pass REPLAY; if journal_checksums enabled, then * calculate checksums in PASS_SCAN, otherwise, * just skip over the blocks it describes. */ if (pass != PASS_REPLAY) { if (pass == PASS_SCAN && JFS_HAS_COMPAT_FEATURE(journal, JFS_FEATURE_COMPAT_CHECKSUM) && !info->end_transaction) { if (calc_chksums(journal, bh, &next_log_block, &crc32_sum)) { brelse(bh); break; } brelse(bh); continue; } next_log_block += count_tags(journal, bh); wrap(journal, next_log_block); brelse(bh); continue; } /* A descriptor block: we can now write all of * the data blocks. Yay, useful work is finally * getting done here! */ tagp = &bh->b_data[sizeof(journal_header_t)]; while ((tagp - bh->b_data + tag_bytes) <= journal->j_blocksize) { unsigned long long io_block; tag = (journal_block_tag_t *) tagp; flags = be32_to_cpu(tag->t_flags); io_block = next_log_block++; wrap(journal, next_log_block); err = jread(&obh, journal, io_block); if (err) { /* Recover what we can, but * report failure at the end. */ success = err; printk (KERN_ERR "JBD: IO error %d recovering " "block %llu in log\n", err, io_block); } else { unsigned long long blocknr; J_ASSERT(obh != NULL); blocknr = read_tag_block(tag_bytes, tag); /* If the block has been * revoked, then we're all done * here. */ if (journal_test_revoke (journal, blocknr, next_commit_ID)) { brelse(obh); ++info->nr_revoke_hits; goto skip_write; } /* Find a buffer for the new * data being restored */ nbh = __getblk(journal->j_fs_dev, blocknr, journal->j_blocksize); if (nbh == NULL) { printk(KERN_ERR "JBD: Out of memory " "during recovery.\n"); err = -ENOMEM; brelse(bh); brelse(obh); goto failed; } lock_buffer(nbh); memcpy(nbh->b_data, obh->b_data, journal->j_blocksize); if (flags & JFS_FLAG_ESCAPE) { journal_header_t *header; header = (journal_header_t *) &nbh->b_data[0]; header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); } BUFFER_TRACE(nbh, "marking dirty"); set_buffer_uptodate(nbh); mark_buffer_dirty(nbh); BUFFER_TRACE(nbh, "marking uptodate"); ++info->nr_replays; /* ll_rw_block(WRITE, 1, &nbh); */ unlock_buffer(nbh); brelse(obh); brelse(nbh); } skip_write: tagp += tag_bytes; if (!(flags & JFS_FLAG_SAME_UUID)) tagp += 16; if (flags & JFS_FLAG_LAST_TAG) break; } brelse(bh); continue; case JFS_COMMIT_BLOCK: jbd_debug(3, "Commit block for #%u found\n", next_commit_ID); /* How to differentiate between interrupted commit * and journal corruption ? * * {nth transaction} * Checksum Verification Failed * | * ____________________ * | | * async_commit sync_commit * | | * | GO TO NEXT "Journal Corruption" * | TRANSACTION * | * {(n+1)th transanction} * | * _______|______________ * | | * Commit block found Commit block not found * | | * "Journal Corruption" | * _____________|_________ * | | * nth trans corrupt OR nth trans * and (n+1)th interrupted interrupted * before commit block * could reach the disk. * (Cannot find the difference in above * mentioned conditions. Hence assume * "Interrupted Commit".) */ /* Found an expected commit block: if checksums * are present verify them in PASS_SCAN; else not * much to do other than move on to the next sequence * number. */ if (pass == PASS_SCAN && JFS_HAS_COMPAT_FEATURE(journal, JFS_FEATURE_COMPAT_CHECKSUM)) { int chksum_err, chksum_seen; struct commit_header *cbh = (struct commit_header *)bh->b_data; unsigned found_chksum = be32_to_cpu(cbh->h_chksum[0]); chksum_err = chksum_seen = 0; jbd_debug(3, "Checksums %x %x\n", crc32_sum, found_chksum); if (info->end_transaction) { journal->j_failed_commit = info->end_transaction; brelse(bh); break; } if (crc32_sum == found_chksum && cbh->h_chksum_type == JBD2_CRC32_CHKSUM && cbh->h_chksum_size == JBD2_CRC32_CHKSUM_SIZE) chksum_seen = 1; else if (!(cbh->h_chksum_type == 0 && cbh->h_chksum_size == 0 && found_chksum == 0 && !chksum_seen)) /* * If fs is mounted using an old kernel and then * kernel with journal_chksum is used then we * get a situation where the journal flag has * checksum flag set but checksums are not * present i.e chksum = 0, in the individual * commit blocks. * Hence to avoid checksum failures, in this * situation, this extra check is added. */ chksum_err = 1; if (chksum_err) { info->end_transaction = next_commit_ID; jbd_debug(1, "Checksum_err %x %x\n", crc32_sum, found_chksum); if (!JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_ASYNC_COMMIT)){ journal->j_failed_commit = next_commit_ID; brelse(bh); break; } } crc32_sum = ~0; } brelse(bh); next_commit_ID++; continue; case JFS_REVOKE_BLOCK: /* If we aren't in the REVOKE pass, then we can * just skip over this block. */ if (pass != PASS_REVOKE) { brelse(bh); continue; } err = scan_revoke_records(journal, bh, next_commit_ID, info); brelse(bh); if (err) goto failed; continue; default: jbd_debug(3, "Unrecognised magic %d, end of scan.\n", blocktype); brelse(bh); goto done; } } done: /* * We broke out of the log scan loop: either we came to the * known end of the log or we found an unexpected block in the * log. If the latter happened, then we know that the "current" * transaction marks the end of the valid log. */ if (pass == PASS_SCAN) { if (!info->end_transaction) info->end_transaction = next_commit_ID; } else { /* It's really bad news if different passes end up at * different places (but possible due to IO errors). */ if (info->end_transaction != next_commit_ID) { printk (KERN_ERR "JBD: recovery pass %d ended at " "transaction %u, expected %u\n", pass, next_commit_ID, info->end_transaction); if (!success) success = -EIO; } } return success; failed: return err; }
static errcode_t journal_commit_trans(journal_transaction_t *trans) { struct buffer_head *bh, *cbh = NULL; struct commit_header *commit; #ifdef HAVE_SYS_TIME_H struct timeval tv; #endif errcode_t err; JOURNAL_CHECK_TRANS_MAGIC(trans); if ((trans->flags & J_TRANS_COMMITTED) || !(trans->flags & J_TRANS_OPEN)) return EXT2_ET_INVALID_ARGUMENT; bh = getblk(trans->journal->j_dev, 0, trans->journal->j_blocksize); if (bh == NULL) return ENOMEM; /* write the descriptor block header */ commit = (struct commit_header *)bh->b_data; commit->h_magic = ext2fs_cpu_to_be32(JFS_MAGIC_NUMBER); commit->h_blocktype = ext2fs_cpu_to_be32(JFS_COMMIT_BLOCK); commit->h_sequence = ext2fs_cpu_to_be32(trans->tid); if (JFS_HAS_COMPAT_FEATURE(trans->journal, JFS_FEATURE_COMPAT_CHECKSUM)) { __u32 csum_v1 = ~0; blk64_t cblk; cbh = getblk(trans->journal->j_dev, 0, trans->journal->j_blocksize); if (cbh == NULL) { err = ENOMEM; goto error; } for (cblk = trans->start; cblk < trans->block; cblk++) { err = journal_bmap(trans->journal, cblk, &cbh->b_blocknr); if (err) goto error; mark_buffer_uptodate(cbh, 0); ll_rw_block(READ, 1, &cbh); err = cbh->b_err; if (err) goto error; csum_v1 = ext2fs_crc32_be(csum_v1, (unsigned char const *)cbh->b_data, cbh->b_size); } commit->h_chksum_type = JFS_CRC32_CHKSUM; commit->h_chksum_size = JFS_CRC32_CHKSUM_SIZE; commit->h_chksum[0] = ext2fs_cpu_to_be32(csum_v1); } else { commit->h_chksum_type = 0; commit->h_chksum_size = 0; commit->h_chksum[0] = 0; } #ifdef HAVE_SYS_TIME_H gettimeofday(&tv, NULL); commit->h_commit_sec = ext2fs_cpu_to_be32(tv.tv_sec); commit->h_commit_nsec = ext2fs_cpu_to_be32(tv.tv_usec * 1000); #else commit->h_commit_sec = 0; commit->h_commit_nsec = 0; #endif /* Write block */ jbd2_commit_block_csum_set(trans->journal, bh); err = journal_bmap(trans->journal, trans->block, &bh->b_blocknr); if (err) goto error; dbg_printf("Writing commit block at %llu:%llu\n", trans->block, bh->b_blocknr); mark_buffer_dirty(bh); ll_rw_block(WRITE, 1, &bh); err = bh->b_err; if (err) goto error; trans->flags |= J_TRANS_COMMITTED; trans->flags &= ~J_TRANS_OPEN; trans->block++; trans->fs->super->s_feature_incompat |= EXT3_FEATURE_INCOMPAT_RECOVER; ext2fs_mark_super_dirty(trans->fs); error: if (cbh) brelse(cbh); brelse(bh); return err; }