/// \brief Parse the Index(es) from the given .xz file /// /// \param xfi Pointer to structure where the decoded information /// is stored. /// \param pair Input file /// /// \return On success, false is returned. On error, true is returned. /// // TODO: This function is pretty big. liblzma should have a function that // takes a callback function to parse the Index(es) from a .xz file to make // it easy for applications. static bool parse_indexes(xz_file_info *xfi, file_pair *pair) { if (pair->src_st.st_size <= 0) { message_error(_("%s: File is empty"), pair->src_name); return true; } if (pair->src_st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { message_error(_("%s: Too small to be a valid .xz file"), pair->src_name); return true; } io_buf buf; lzma_stream_flags header_flags; lzma_stream_flags footer_flags; lzma_ret ret; // lzma_stream for the Index decoder lzma_stream strm = LZMA_STREAM_INIT; // All Indexes decoded so far lzma_index *combined_index = NULL; // The Index currently being decoded lzma_index *this_index = NULL; // Current position in the file. We parse the file backwards so // initialize it to point to the end of the file. off_t pos = pair->src_st.st_size; // Each loop iteration decodes one Index. do { // Check that there is enough data left to contain at least // the Stream Header and Stream Footer. This check cannot // fail in the first pass of this loop. if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { message_error("%s: %s", pair->src_name, message_strm(LZMA_DATA_ERROR)); goto error; } pos -= LZMA_STREAM_HEADER_SIZE; lzma_vli stream_padding = 0; // Locate the Stream Footer. There may be Stream Padding which // we must skip when reading backwards. while (true) { if (pos < LZMA_STREAM_HEADER_SIZE) { message_error("%s: %s", pair->src_name, message_strm( LZMA_DATA_ERROR)); goto error; } if (io_pread(pair, &buf, LZMA_STREAM_HEADER_SIZE, pos)) goto error; // Stream Padding is always a multiple of four bytes. int i = 2; if (buf.u32[i] != 0) break; // To avoid calling io_pread() for every four bytes // of Stream Padding, take advantage that we read // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and // check them too before calling io_pread() again. do { stream_padding += 4; pos -= 4; --i; } while (i >= 0 && buf.u32[i] == 0); } // Decode the Stream Footer. ret = lzma_stream_footer_decode(&footer_flags, buf.u8); if (ret != LZMA_OK) { message_error("%s: %s", pair->src_name, message_strm(ret)); goto error; } // Check that the Stream Footer doesn't specify something // that we don't support. This can only happen if the xz // version is older than liblzma and liblzma supports // something new. // // It is enough to check Stream Footer. Stream Header must // match when it is compared against Stream Footer with // lzma_stream_flags_compare(). if (footer_flags.version != 0) { message_error("%s: %s", pair->src_name, message_strm(LZMA_OPTIONS_ERROR)); goto error; } // Check that the size of the Index field looks sane. lzma_vli index_size = footer_flags.backward_size; if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { message_error("%s: %s", pair->src_name, message_strm(LZMA_DATA_ERROR)); goto error; } // Set pos to the beginning of the Index. pos -= index_size; // See how much memory we can use for decoding this Index. uint64_t memlimit = hardware_memlimit_get(MODE_LIST); uint64_t memused = 0; if (combined_index != NULL) { memused = lzma_index_memused(combined_index); if (memused > memlimit) message_bug(); memlimit -= memused; } // Decode the Index. ret = lzma_index_decoder(&strm, &this_index, memlimit); if (ret != LZMA_OK) { message_error("%s: %s", pair->src_name, message_strm(ret)); goto error; } do { // Don't give the decoder more input than the // Index size. strm.avail_in = my_min(IO_BUFFER_SIZE, index_size); if (io_pread(pair, &buf, strm.avail_in, pos)) goto error; pos += strm.avail_in; index_size -= strm.avail_in; strm.next_in = buf.u8; ret = lzma_code(&strm, LZMA_RUN); } while (ret == LZMA_OK); // If the decoding seems to be successful, check also that // the Index decoder consumed as much input as indicated // by the Backward Size field. if (ret == LZMA_STREAM_END) if (index_size != 0 || strm.avail_in != 0) ret = LZMA_DATA_ERROR; if (ret != LZMA_STREAM_END) { // LZMA_BUFFER_ERROR means that the Index decoder // would have liked more input than what the Index // size should be according to Stream Footer. // The message for LZMA_DATA_ERROR makes more // sense in that case. if (ret == LZMA_BUF_ERROR) ret = LZMA_DATA_ERROR; message_error("%s: %s", pair->src_name, message_strm(ret)); // If the error was too low memory usage limit, // show also how much memory would have been needed. if (ret == LZMA_MEMLIMIT_ERROR) { uint64_t needed = lzma_memusage(&strm); if (UINT64_MAX - needed < memused) needed = UINT64_MAX; else needed += memused; message_mem_needed(V_ERROR, needed); } goto error; } // Decode the Stream Header and check that its Stream Flags // match the Stream Footer. pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) { message_error("%s: %s", pair->src_name, message_strm(LZMA_DATA_ERROR)); goto error; } pos -= lzma_index_total_size(this_index); if (io_pread(pair, &buf, LZMA_STREAM_HEADER_SIZE, pos)) goto error; ret = lzma_stream_header_decode(&header_flags, buf.u8); if (ret != LZMA_OK) { message_error("%s: %s", pair->src_name, message_strm(ret)); goto error; } ret = lzma_stream_flags_compare(&header_flags, &footer_flags); if (ret != LZMA_OK) { message_error("%s: %s", pair->src_name, message_strm(ret)); goto error; } // Store the decoded Stream Flags into this_index. This is // needed so that we can print which Check is used in each // Stream. ret = lzma_index_stream_flags(this_index, &footer_flags); if (ret != LZMA_OK) message_bug(); // Store also the size of the Stream Padding field. It is // needed to show the offsets of the Streams correctly. ret = lzma_index_stream_padding(this_index, stream_padding); if (ret != LZMA_OK) message_bug(); if (combined_index != NULL) { // Append the earlier decoded Indexes // after this_index. ret = lzma_index_cat( this_index, combined_index, NULL); if (ret != LZMA_OK) { message_error("%s: %s", pair->src_name, message_strm(ret)); goto error; } } combined_index = this_index; this_index = NULL; xfi->stream_padding += stream_padding; } while (pos > 0); lzma_end(&strm); // All OK. Make combined_index available to the caller. xfi->idx = combined_index; return false; error: // Something went wrong, free the allocated memory. lzma_end(&strm); lzma_index_end(combined_index, NULL); lzma_index_end(this_index, NULL); return true; }
/// \brief Parse the Block Header /// /// The result is stored into *bhi. The caller takes care of initializing it. /// /// \return False on success, true on error. static bool parse_block_header(file_pair *pair, const lzma_index_iter *iter, block_header_info *bhi, xz_file_info *xfi) { #if IO_BUFFER_SIZE < LZMA_BLOCK_HEADER_SIZE_MAX # error IO_BUFFER_SIZE < LZMA_BLOCK_HEADER_SIZE_MAX #endif // Get the whole Block Header with one read, but don't read past // the end of the Block (or even its Check field). const uint32_t size = my_min(iter->block.total_size - lzma_check_size(iter->stream.flags->check), LZMA_BLOCK_HEADER_SIZE_MAX); io_buf buf; if (io_pread(pair, &buf, size, iter->block.compressed_file_offset)) return true; // Zero would mean Index Indicator and thus not a valid Block. if (buf.u8[0] == 0) goto data_error; // Initialize the block structure and decode Block Header Size. lzma_filter filters[LZMA_FILTERS_MAX + 1]; lzma_block block; block.version = 0; block.check = iter->stream.flags->check; block.filters = filters; block.header_size = lzma_block_header_size_decode(buf.u8[0]); if (block.header_size > size) goto data_error; // Decode the Block Header. switch (lzma_block_header_decode(&block, NULL, buf.u8)) { case LZMA_OK: break; case LZMA_OPTIONS_ERROR: message_error("%s: %s", pair->src_name, message_strm(LZMA_OPTIONS_ERROR)); return true; case LZMA_DATA_ERROR: goto data_error; default: message_bug(); } // Check the Block Flags. These must be done before calling // lzma_block_compressed_size(), because it overwrites // block.compressed_size. bhi->flags[0] = block.compressed_size != LZMA_VLI_UNKNOWN ? 'c' : '-'; bhi->flags[1] = block.uncompressed_size != LZMA_VLI_UNKNOWN ? 'u' : '-'; bhi->flags[2] = '\0'; // Collect information if all Blocks have both Compressed Size // and Uncompressed Size fields. They can be useful e.g. for // multi-threaded decompression so it can be useful to know it. xfi->all_have_sizes &= block.compressed_size != LZMA_VLI_UNKNOWN && block.uncompressed_size != LZMA_VLI_UNKNOWN; // Validate or set block.compressed_size. switch (lzma_block_compressed_size(&block, iter->block.unpadded_size)) { case LZMA_OK: // Validate also block.uncompressed_size if it is present. // If it isn't present, there's no need to set it since // we aren't going to actually decompress the Block; if // we were decompressing, then we should set it so that // the Block decoder could validate the Uncompressed Size // that was stored in the Index. if (block.uncompressed_size == LZMA_VLI_UNKNOWN || block.uncompressed_size == iter->block.uncompressed_size) break; // If the above fails, the file is corrupt so // LZMA_DATA_ERROR is a good error code. // Fall through case LZMA_DATA_ERROR: // Free the memory allocated by lzma_block_header_decode(). for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) free(filters[i].options); goto data_error; default: message_bug(); } // Copy the known sizes. bhi->header_size = block.header_size; bhi->compressed_size = block.compressed_size; // Calculate the decoder memory usage and update the maximum // memory usage of this Block. bhi->memusage = lzma_raw_decoder_memusage(filters); if (xfi->memusage_max < bhi->memusage) xfi->memusage_max = bhi->memusage; // Determine the minimum XZ Utils version that supports this Block. // // Currently the only thing that 5.0.0 doesn't support is empty // LZMA2 Block. This decoder bug was fixed in 5.0.2. { size_t i = 0; while (filters[i + 1].id != LZMA_VLI_UNKNOWN) ++i; if (filters[i].id == LZMA_FILTER_LZMA2 && iter->block.uncompressed_size == 0 && xfi->min_version < 50000022U) xfi->min_version = 50000022U; } // Convert the filter chain to human readable form. message_filters_to_str(bhi->filter_chain, filters, false); // Free the memory allocated by lzma_block_header_decode(). for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) free(filters[i].options); return false; data_error: // Show the error message. message_error("%s: %s", pair->src_name, message_strm(LZMA_DATA_ERROR)); return true; }
/// \brief Parse the Block Header /// /// The result is stored into *bhi. The caller takes care of initializing it. /// /// \return False on success, true on error. static bool parse_block_header(file_pair *pair, const lzma_index_iter *iter, block_header_info *bhi, xz_file_info *xfi) { #if IO_BUFFER_SIZE < LZMA_BLOCK_HEADER_SIZE_MAX # error IO_BUFFER_SIZE < LZMA_BLOCK_HEADER_SIZE_MAX #endif // Get the whole Block Header with one read, but don't read past // the end of the Block (or even its Check field). const uint32_t size = my_min(iter->block.total_size - lzma_check_size(iter->stream.flags->check), LZMA_BLOCK_HEADER_SIZE_MAX); io_buf buf; if (io_pread(pair, &buf, size, iter->block.compressed_file_offset)) return true; // Zero would mean Index Indicator and thus not a valid Block. if (buf.u8[0] == 0) goto data_error; lzma_block block; lzma_filter filters[LZMA_FILTERS_MAX + 1]; // Initialize the pointers so that they can be passed to free(). for (size_t i = 0; i < ARRAY_SIZE(filters); ++i) filters[i].options = NULL; // Initialize the block structure and decode Block Header Size. block.version = 0; block.check = iter->stream.flags->check; block.filters = filters; block.header_size = lzma_block_header_size_decode(buf.u8[0]); if (block.header_size > size) goto data_error; // Decode the Block Header. switch (lzma_block_header_decode(&block, NULL, buf.u8)) { case LZMA_OK: break; case LZMA_OPTIONS_ERROR: message_error("%s: %s", pair->src_name, message_strm(LZMA_OPTIONS_ERROR)); return true; case LZMA_DATA_ERROR: goto data_error; default: message_bug(); } // Check the Block Flags. These must be done before calling // lzma_block_compressed_size(), because it overwrites // block.compressed_size. bhi->flags[0] = block.compressed_size != LZMA_VLI_UNKNOWN ? 'c' : '-'; bhi->flags[1] = block.uncompressed_size != LZMA_VLI_UNKNOWN ? 'u' : '-'; bhi->flags[2] = '\0'; // Collect information if all Blocks have both Compressed Size // and Uncompressed Size fields. They can be useful e.g. for // multi-threaded decompression so it can be useful to know it. xfi->all_have_sizes &= block.compressed_size != LZMA_VLI_UNKNOWN && block.uncompressed_size != LZMA_VLI_UNKNOWN; // Validate or set block.compressed_size. switch (lzma_block_compressed_size(&block, iter->block.unpadded_size)) { case LZMA_OK: break; case LZMA_DATA_ERROR: goto data_error; default: message_bug(); } // Copy the known sizes. bhi->header_size = block.header_size; bhi->compressed_size = block.compressed_size; // Calculate the decoder memory usage and update the maximum // memory usage of this Block. bhi->memusage = lzma_raw_decoder_memusage(filters); if (xfi->memusage_max < bhi->memusage) xfi->memusage_max = bhi->memusage; // Convert the filter chain to human readable form. message_filters_to_str(bhi->filter_chain, filters, false); // Free the memory allocated by lzma_block_header_decode(). for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) free(filters[i].options); return false; data_error: // Show the error message. message_error("%s: %s", pair->src_name, message_strm(LZMA_DATA_ERROR)); // Free the memory allocated by lzma_block_header_decode(). // This is truly needed only if we get here after a succcessful // call to lzma_block_header_decode() but it doesn't hurt to // always do it. for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) free(filters[i].options); return true; }