/* checks if `offset' is within a corrupted block */ static inline bool is_erasure(fec_handle *f, uint64_t offset, const uint8_t *data) { if (unlikely(offset >= f->data_size)) { return false; } /* ideally, we would like to know if a specific byte on this block has been corrupted, but knowing whether any of them is can be useful as well, because often the entire block is corrupted */ uint64_t n = offset / FEC_BLOCKSIZE; return !verity_check_block(f, &f->verity.hash[n * SHA256_DIGEST_LENGTH], data); }
/* reads the verity hash tree, validates it against the root hash in `root', corrects errors if necessary, and copies valid data blocks for later use to `f->verity.hash' */ static int verify_tree(fec_handle *f, const uint8_t *root) { uint8_t data[FEC_BLOCKSIZE]; uint8_t hash[SHA256_DIGEST_LENGTH]; check(f); check(root); verity_info *v = &f->verity; uint32_t levels = 0; /* calculate the size and the number of levels in the hash tree */ v->hash_size = verity_get_size(v->data_blocks * FEC_BLOCKSIZE, &levels, NULL); check(v->hash_start < UINT64_MAX - v->hash_size); check(v->hash_start + v->hash_size <= f->data_size); uint64_t hash_offset = v->hash_start; uint64_t data_offset = hash_offset + FEC_BLOCKSIZE; v->hash_data_offset = data_offset; /* validate the root hash */ if (!raw_pread(f, data, FEC_BLOCKSIZE, hash_offset) || !verity_check_block(f, root, data)) { /* try to correct */ if (!ecc_read_hashes(f, 0, NULL, hash_offset, data) || !verity_check_block(f, root, data)) { error("root hash invalid"); return -1; } else if (f->mode & O_RDWR && !raw_pwrite(f, data, FEC_BLOCKSIZE, hash_offset)) { error("failed to rewrite the root block: %s", strerror(errno)); return -1; } } debug("root hash valid"); /* calculate the number of hashes on each level */ uint32_t hashes[levels]; verity_get_size(v->data_blocks * FEC_BLOCKSIZE, NULL, hashes); /* calculate the size and offset for the data hashes */ for (uint32_t i = 1; i < levels; ++i) { uint32_t blocks = hashes[levels - i]; debug("%u hash blocks on level %u", blocks, levels - i); v->hash_data_offset = data_offset; v->hash_data_blocks = blocks; data_offset += blocks * FEC_BLOCKSIZE; } check(v->hash_data_blocks); check(v->hash_data_blocks <= v->hash_size / FEC_BLOCKSIZE); check(v->hash_data_offset); check(v->hash_data_offset <= UINT64_MAX - (v->hash_data_blocks * FEC_BLOCKSIZE)); check(v->hash_data_offset < f->data_size); check(v->hash_data_offset + v->hash_data_blocks * FEC_BLOCKSIZE <= f->data_size); /* copy data hashes to memory in case they are corrupted, so we don't have to correct them every time they are needed */ std::unique_ptr<uint8_t[]> data_hashes( new (std::nothrow) uint8_t[f->verity.hash_data_blocks * FEC_BLOCKSIZE]); if (!data_hashes) { errno = ENOMEM; return -1; } /* validate the rest of the hash tree */ data_offset = hash_offset + FEC_BLOCKSIZE; for (uint32_t i = 1; i < levels; ++i) { uint32_t blocks = hashes[levels - i]; for (uint32_t j = 0; j < blocks; ++j) { /* ecc reads are very I/O intensive, so read raw hash tree and do error correcting only if it doesn't validate */ if (!raw_pread(f, hash, SHA256_DIGEST_LENGTH, hash_offset + j * SHA256_DIGEST_LENGTH) || !raw_pread(f, data, FEC_BLOCKSIZE, data_offset + j * FEC_BLOCKSIZE)) { error("failed to read hashes: %s", strerror(errno)); return -1; } if (!verity_check_block(f, hash, data)) { /* try to correct */ if (!ecc_read_hashes(f, hash_offset + j * SHA256_DIGEST_LENGTH, hash, data_offset + j * FEC_BLOCKSIZE, data) || !verity_check_block(f, hash, data)) { error("invalid hash tree: hash_offset %" PRIu64 ", " "data_offset %" PRIu64 ", block %u", hash_offset, data_offset, j); return -1; } /* update the corrected blocks to the file if we are in r/w mode */ if (f->mode & O_RDWR) { if (!raw_pwrite(f, hash, SHA256_DIGEST_LENGTH, hash_offset + j * SHA256_DIGEST_LENGTH) || !raw_pwrite(f, data, FEC_BLOCKSIZE, data_offset + j * FEC_BLOCKSIZE)) { error("failed to write hashes: %s", strerror(errno)); return -1; } } } if (blocks == v->hash_data_blocks) { memcpy(data_hashes.get() + j * FEC_BLOCKSIZE, data, FEC_BLOCKSIZE); } } hash_offset = data_offset; data_offset += blocks * FEC_BLOCKSIZE; } debug("valid"); if (v->hash) { delete[] v->hash; v->hash = NULL; } v->hash = data_hashes.release(); return 0; }
/* reads `count' bytes from `offset', corrects possible errors with erasure detection, and verifies the integrity of read data using verity hash tree; returns the number of corrections in `errors' */ static ssize_t verity_read(fec_handle *f, uint8_t *dest, size_t count, uint64_t offset, size_t *errors) { check(f); check(dest); check(offset < f->data_size); check(offset + count <= f->data_size); check(f->verity.hash); check(errors); debug("[%" PRIu64 ", %" PRIu64 ")", offset, offset + count); rs_unique_ptr rs(NULL, free_rs_char); std::unique_ptr<uint8_t[]> ecc_data; if (f->ecc.start && ecc_init(f, rs, ecc_data) == -1) { return -1; } uint64_t curr = offset / FEC_BLOCKSIZE; size_t coff = (size_t)(offset - curr * FEC_BLOCKSIZE); size_t left = count; uint8_t data[FEC_BLOCKSIZE]; uint64_t max_hash_block = (f->verity.hash_data_blocks * FEC_BLOCKSIZE - SHA256_DIGEST_LENGTH) / SHA256_DIGEST_LENGTH; while (left > 0) { check(curr <= max_hash_block); uint8_t *hash = &f->verity.hash[curr * SHA256_DIGEST_LENGTH]; uint64_t curr_offset = curr * FEC_BLOCKSIZE; bool expect_zeros = is_zero(f, curr_offset); /* if we are in read-only mode and expect to read a zero block, skip reading and just return zeros */ if (f->mode & O_RDONLY && expect_zeros) { memset(data, 0, FEC_BLOCKSIZE); goto valid; } /* copy raw data without error correction */ if (!raw_pread(f, data, FEC_BLOCKSIZE, curr_offset)) { error("failed to read: %s", strerror(errno)); return -1; } if (likely(verity_check_block(f, hash, data))) { goto valid; } /* we know the block is supposed to contain zeros, so return zeros instead of trying to correct it */ if (expect_zeros) { memset(data, 0, FEC_BLOCKSIZE); goto corrected; } if (!f->ecc.start) { /* fatal error without ecc */ error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64, offset, offset + count, curr); return -1; } else { debug("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64, offset, offset + count, curr); } /* try to correct without erasures first, because checking for erasure locations is slower */ if (__ecc_read(f, rs.get(), data, curr_offset, false, ecc_data.get(), errors) == FEC_BLOCKSIZE && verity_check_block(f, hash, data)) { goto corrected; } /* try to correct with erasures */ if (__ecc_read(f, rs.get(), data, curr_offset, true, ecc_data.get(), errors) == FEC_BLOCKSIZE && verity_check_block(f, hash, data)) { goto corrected; } error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64 " (offset %" PRIu64 ") cannot be recovered", offset, offset + count, curr, curr_offset); dump("decoded block", curr, data, FEC_BLOCKSIZE); errno = EIO; return -1; corrected: /* update the corrected block to the file if we are in r/w mode */ if (f->mode & O_RDWR && !raw_pwrite(f, data, FEC_BLOCKSIZE, curr_offset)) { error("failed to write: %s", strerror(errno)); return -1; } valid: size_t copy = FEC_BLOCKSIZE - coff; if (copy > left) { copy = left; } memcpy(dest, &data[coff], copy); dest += copy; left -= copy; coff = 0; ++curr; } return count; }