/* checks if `offset' is within a corrupted block */
static inline bool is_erasure(fec_handle *f, uint64_t offset,
                              const uint8_t *data)
{
    if (unlikely(offset >= f->data_size)) {
        return false;
    }

    /* ideally, we would like to know if a specific byte on this block has
       been corrupted, but knowing whether any of them is can be useful as
       well, because often the entire block is corrupted */

    uint64_t n = offset / FEC_BLOCKSIZE;

    return !verity_check_block(f, &f->verity.hash[n * SHA256_DIGEST_LENGTH],
                               data);
}
Exemple #2
0
/* reads the verity hash tree, validates it against the root hash in `root',
   corrects errors if necessary, and copies valid data blocks for later use
   to `f->verity.hash' */
static int verify_tree(fec_handle *f, const uint8_t *root)
{
    uint8_t data[FEC_BLOCKSIZE];
    uint8_t hash[SHA256_DIGEST_LENGTH];

    check(f);
    check(root);

    verity_info *v = &f->verity;
    uint32_t levels = 0;

    /* calculate the size and the number of levels in the hash tree */
    v->hash_size =
        verity_get_size(v->data_blocks * FEC_BLOCKSIZE, &levels, NULL);

    check(v->hash_start < UINT64_MAX - v->hash_size);
    check(v->hash_start + v->hash_size <= f->data_size);

    uint64_t hash_offset = v->hash_start;
    uint64_t data_offset = hash_offset + FEC_BLOCKSIZE;

    v->hash_data_offset = data_offset;

    /* validate the root hash */
    if (!raw_pread(f, data, FEC_BLOCKSIZE, hash_offset) ||
            !verity_check_block(f, root, data)) {
        /* try to correct */
        if (!ecc_read_hashes(f, 0, NULL, hash_offset, data) ||
                !verity_check_block(f, root, data)) {
            error("root hash invalid");
            return -1;
        } else if (f->mode & O_RDWR &&
                    !raw_pwrite(f, data, FEC_BLOCKSIZE, hash_offset)) {
            error("failed to rewrite the root block: %s", strerror(errno));
            return -1;
        }
    }

    debug("root hash valid");

    /* calculate the number of hashes on each level */
    uint32_t hashes[levels];

    verity_get_size(v->data_blocks * FEC_BLOCKSIZE, NULL, hashes);

    /* calculate the size and offset for the data hashes */
    for (uint32_t i = 1; i < levels; ++i) {
        uint32_t blocks = hashes[levels - i];
        debug("%u hash blocks on level %u", blocks, levels - i);

        v->hash_data_offset = data_offset;
        v->hash_data_blocks = blocks;

        data_offset += blocks * FEC_BLOCKSIZE;
    }

    check(v->hash_data_blocks);
    check(v->hash_data_blocks <= v->hash_size / FEC_BLOCKSIZE);

    check(v->hash_data_offset);
    check(v->hash_data_offset <=
        UINT64_MAX - (v->hash_data_blocks * FEC_BLOCKSIZE));
    check(v->hash_data_offset < f->data_size);
    check(v->hash_data_offset + v->hash_data_blocks * FEC_BLOCKSIZE <=
        f->data_size);

    /* copy data hashes to memory in case they are corrupted, so we don't
       have to correct them every time they are needed */
    std::unique_ptr<uint8_t[]> data_hashes(
       new (std::nothrow) uint8_t[f->verity.hash_data_blocks * FEC_BLOCKSIZE]);

    if (!data_hashes) {
        errno = ENOMEM;
        return -1;
    }

    /* validate the rest of the hash tree */
    data_offset = hash_offset + FEC_BLOCKSIZE;

    for (uint32_t i = 1; i < levels; ++i) {
        uint32_t blocks = hashes[levels - i];

        for (uint32_t j = 0; j < blocks; ++j) {
            /* ecc reads are very I/O intensive, so read raw hash tree and do
               error correcting only if it doesn't validate */
            if (!raw_pread(f, hash, SHA256_DIGEST_LENGTH,
                    hash_offset + j * SHA256_DIGEST_LENGTH) ||
                !raw_pread(f, data, FEC_BLOCKSIZE,
                    data_offset + j * FEC_BLOCKSIZE)) {
                error("failed to read hashes: %s", strerror(errno));
                return -1;
            }

            if (!verity_check_block(f, hash, data)) {
                /* try to correct */
                if (!ecc_read_hashes(f,
                        hash_offset + j * SHA256_DIGEST_LENGTH, hash,
                        data_offset + j * FEC_BLOCKSIZE, data) ||
                    !verity_check_block(f, hash, data)) {
                    error("invalid hash tree: hash_offset %" PRIu64 ", "
                        "data_offset %" PRIu64 ", block %u",
                        hash_offset, data_offset, j);
                    return -1;
                }

                /* update the corrected blocks to the file if we are in r/w
                   mode */
                if (f->mode & O_RDWR) {
                    if (!raw_pwrite(f, hash, SHA256_DIGEST_LENGTH,
                            hash_offset + j * SHA256_DIGEST_LENGTH) ||
                        !raw_pwrite(f, data, FEC_BLOCKSIZE,
                            data_offset + j * FEC_BLOCKSIZE)) {
                        error("failed to write hashes: %s", strerror(errno));
                        return -1;
                    }
                }
            }

            if (blocks == v->hash_data_blocks) {
                memcpy(data_hashes.get() + j * FEC_BLOCKSIZE, data,
                    FEC_BLOCKSIZE);
            }
        }

        hash_offset = data_offset;
        data_offset += blocks * FEC_BLOCKSIZE;
    }

    debug("valid");

    if (v->hash) {
        delete[] v->hash;
        v->hash = NULL;
    }

    v->hash = data_hashes.release();
    return 0;
}
/* reads `count' bytes from `offset', corrects possible errors with
   erasure detection, and verifies the integrity of read data using
   verity hash tree; returns the number of corrections in `errors' */
static ssize_t verity_read(fec_handle *f, uint8_t *dest, size_t count,
                           uint64_t offset, size_t *errors)
{
    check(f);
    check(dest);
    check(offset < f->data_size);
    check(offset + count <= f->data_size);
    check(f->verity.hash);
    check(errors);

    debug("[%" PRIu64 ", %" PRIu64 ")", offset, offset + count);

    rs_unique_ptr rs(NULL, free_rs_char);
    std::unique_ptr<uint8_t[]> ecc_data;

    if (f->ecc.start && ecc_init(f, rs, ecc_data) == -1) {
        return -1;
    }

    uint64_t curr = offset / FEC_BLOCKSIZE;
    size_t coff = (size_t)(offset - curr * FEC_BLOCKSIZE);
    size_t left = count;
    uint8_t data[FEC_BLOCKSIZE];

    uint64_t max_hash_block = (f->verity.hash_data_blocks * FEC_BLOCKSIZE -
                               SHA256_DIGEST_LENGTH) / SHA256_DIGEST_LENGTH;

    while (left > 0) {
        check(curr <= max_hash_block);

        uint8_t *hash = &f->verity.hash[curr * SHA256_DIGEST_LENGTH];
        uint64_t curr_offset = curr * FEC_BLOCKSIZE;

        bool expect_zeros = is_zero(f, curr_offset);

        /* if we are in read-only mode and expect to read a zero block,
           skip reading and just return zeros */
        if (f->mode & O_RDONLY && expect_zeros) {
            memset(data, 0, FEC_BLOCKSIZE);
            goto valid;
        }

        /* copy raw data without error correction */
        if (!raw_pread(f, data, FEC_BLOCKSIZE, curr_offset)) {
            error("failed to read: %s", strerror(errno));
            return -1;
        }

        if (likely(verity_check_block(f, hash, data))) {
            goto valid;
        }

        /* we know the block is supposed to contain zeros, so return zeros
           instead of trying to correct it */
        if (expect_zeros) {
            memset(data, 0, FEC_BLOCKSIZE);
            goto corrected;
        }

        if (!f->ecc.start) {
            /* fatal error without ecc */
            error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64,
                  offset, offset + count, curr);
            return -1;
        } else {
            debug("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64,
                  offset, offset + count, curr);
        }

        /* try to correct without erasures first, because checking for
           erasure locations is slower */
        if (__ecc_read(f, rs.get(), data, curr_offset, false, ecc_data.get(),
                       errors) == FEC_BLOCKSIZE &&
                verity_check_block(f, hash, data)) {
            goto corrected;
        }

        /* try to correct with erasures */
        if (__ecc_read(f, rs.get(), data, curr_offset, true, ecc_data.get(),
                       errors) == FEC_BLOCKSIZE &&
                verity_check_block(f, hash, data)) {
            goto corrected;
        }

        error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64
              " (offset %" PRIu64 ") cannot be recovered",
              offset, offset + count, curr, curr_offset);
        dump("decoded block", curr, data, FEC_BLOCKSIZE);

        errno = EIO;
        return -1;

corrected:
        /* update the corrected block to the file if we are in r/w mode */
        if (f->mode & O_RDWR &&
                !raw_pwrite(f, data, FEC_BLOCKSIZE, curr_offset)) {
            error("failed to write: %s", strerror(errno));
            return -1;
        }

valid:
        size_t copy = FEC_BLOCKSIZE - coff;

        if (copy > left) {
            copy = left;
        }

        memcpy(dest, &data[coff], copy);

        dest += copy;
        left -= copy;
        coff = 0;
        ++curr;
    }

    return count;
}